/** * Convert a file of UTF-8 to Windows-1252. */ #include #include #include #include #include static int tolerance = 100; static char * defaultValue; static char ochar[1153]; int getUnicodePoint( FILE * infp ) { int c; int answer; int mask; int topmask; int bytes; c = fgetc( infp ); answer = c; if ( EOF != c && 0x80 & c ) { bytes = 0; topmask = 0xff; for ( mask = 0x80; c & mask; mask >>= 1 ) { topmask >>= 1; bytes++; } bytes--; answer = ( c & topmask ) << ( bytes * 6 ); while ( 0 < bytes ) { c = fgetc( infp ); bytes--; answer += ( c & 0x3f ) << ( bytes * 6 ); } assert( EOF != answer ); } return answer; } void replace( int c ) { fprintf( stderr, "Unicode code point %d has no equivalent in ISO-8859-1.\n", c ); if ( 0 == tolerance ) { fprintf( stderr, "Too many unmappable characters. Quitting.\n" ); exit( 1 ); } tolerance--; printf( defaultValue, c ); } void putUnicodePoint( int c ) { if ( c < 128 ) { putchar( c ); } else if ( c < 1000 ) { putchar( ochar[c - 128] ); } else { putchar( ochar[c - 7200 - 128] ); } } void doFiles( int argc, char **argv ) { FILE *infp; char *infile; int c; while ( --argc > 0 ) { infile = *++argv; if ( 0 == strcmp( infile, "-" ) ) { infp = stdin; } /* Must open input in binary mode so we can catch chars with high bit set. */ else if ( ( infp = fopen( infile, "rb" ) ) == NULL ) { fprintf( stderr, "Unable to open input file \"%s\".\n", infile ); continue; } while ( EOF != ( c = getUnicodePoint( infp ) ) ) { putUnicodePoint( c ); } } } int main( int argc, char **argv ) { extern int optind; int i, j, c; char * oneName[2]; for ( i = 0; i < 1154; i++ ) { ochar[i] = '?'; } ochar[32] = 0xa0; ochar[33] = 0xa1; ochar[34] = 0xa2; ochar[35] = 0xa3; ochar[36] = 0xa4; ochar[37] = 0xa5; ochar[38] = 0xa6; ochar[39] = 0xa7; ochar[40] = 0xa8; ochar[41] = 0xa9; ochar[42] = 0xaa; ochar[43] = 0xab; ochar[44] = 0xac; ochar[45] = 0xad; ochar[46] = 0xae; ochar[47] = 0xaf; ochar[48] = 0xb0; ochar[49] = 0xb1; ochar[50] = 0xb2; ochar[51] = 0xb3; ochar[52] = 0xb4; ochar[53] = 0xb5; ochar[54] = 0xb6; ochar[55] = 0xb7; ochar[56] = 0xb8; ochar[57] = 0xb9; ochar[58] = 0xba; ochar[59] = 0xbb; ochar[60] = 0xbc; ochar[61] = 0xbd; ochar[62] = 0xbe; ochar[63] = 0xbf; ochar[64] = 0xc0; ochar[65] = 0xc1; ochar[66] = 0xc2; ochar[67] = 0xc3; ochar[68] = 0xc4; ochar[69] = 0xc5; ochar[70] = 0xc6; ochar[71] = 0xc7; ochar[72] = 0xc8; ochar[73] = 0xc9; ochar[74] = 0xca; ochar[75] = 0xcb; ochar[76] = 0xcc; ochar[77] = 0xcd; ochar[78] = 0xce; ochar[79] = 0xcf; ochar[80] = 0xd0; ochar[81] = 0xd1; ochar[82] = 0xd2; ochar[83] = 0xd3; ochar[84] = 0xd4; ochar[85] = 0xd5; ochar[86] = 0xd6; ochar[87] = 0xd7; ochar[88] = 0xd8; ochar[89] = 0xd9; ochar[90] = 0xda; ochar[91] = 0xdb; ochar[92] = 0xdc; ochar[93] = 0xdd; ochar[94] = 0xde; ochar[95] = 0xdf; ochar[96] = 0xe0; ochar[97] = 0xe1; ochar[98] = 0xe2; ochar[99] = 0xe3; ochar[100] = 0xe4; ochar[101] = 0xe5; ochar[102] = 0xe6; ochar[103] = 0xe7; ochar[104] = 0xe8; ochar[105] = 0xe9; ochar[106] = 0xea; ochar[107] = 0xeb; ochar[108] = 0xec; ochar[109] = 0xed; ochar[110] = 0xee; ochar[111] = 0xef; ochar[112] = 0xf0; ochar[113] = 0xf1; ochar[114] = 0xf2; ochar[115] = 0xf3; ochar[116] = 0xf4; ochar[117] = 0xf5; ochar[118] = 0xf6; ochar[119] = 0xf7; ochar[120] = 0xf8; ochar[121] = 0xf9; ochar[122] = 0xfa; ochar[123] = 0xfb; ochar[124] = 0xfc; ochar[125] = 0xfd; ochar[126] = 0xfe; ochar[127] = 0xff; ochar[210] = 0x8c; ochar[211] = 0x9c; ochar[224] = 0x8a; ochar[225] = 0x9a; ochar[248] = 0x9f; ochar[253] = 0x8e; ochar[254] = 0x9e; ochar[274] = 0x83; ochar[582] = 0x88; ochar[604] = 0x98; ochar[883] = 0x96; ochar[884] = 0x97; ochar[888] = 0x91; ochar[889] = 0x92; ochar[890] = 0x82; ochar[892] = 0x93; ochar[893] = 0x94; ochar[894] = 0x84; ochar[896] = 0x86; ochar[897] = 0x87; ochar[898] = 0x95; ochar[902] = 0x85; ochar[912] = 0x89; ochar[921] = 0x8b; ochar[922] = 0x9b; ochar[1036] = 0x80; ochar[1154] = 0x99; defaultValue = "?"; while ( -1 != ( c = getopt( argc, argv, "t:d:" ) ) ) { switch (c) { case 't': fprintf( stderr, "Tolerance given as %s.\n", optarg ); tolerance = atoi(optarg); break; case 'd': defaultValue = optarg; break; default: fprintf( stderr, "usage: utf2iso [-t n] [-d s] [files]\n" ); fprintf( stderr, " where n is how many errors to tolerate\n" ); fprintf( stderr, " and s is a default string" ); fprintf( stderr, " for unmappable chars.\n" ); fprintf( stderr, " s can contain something like %%d" ); fprintf( stderr, " or be empty.\n" ); exit( 1 ); } } i = 1; j = optind; while ( j < argc ) { argv[i++] = argv[j++]; } argc -= ( j - i ); if ( argc < 2 ) { oneName[0] = argv[0]; oneName[1] = "-"; doFiles( 2, oneName ); } else { doFiles( argc, argv ); } return 0; }