/** * Convert a file of UTF-8 to ISO-8859-1. */ #include #include #include #include #include static int tolerance = 100; static char * defaultValue; int getUnicodePoint( FILE * infp ) { int c; int answer; int mask; int topmask; int bytes; c = fgetc( infp ); answer = c; if ( EOF != c && 0x80 & c ) { bytes = 0; topmask = 0xff; for ( mask = 0x80; c & mask; mask >>= 1 ) { topmask >>= 1; bytes++; } bytes--; answer = ( c & topmask ) << ( bytes * 6 ); while ( 0 < bytes ) { c = fgetc( infp ); bytes--; answer += ( c & 0x3f ) << ( bytes * 6 ); } assert( EOF != answer ); } return answer; } void replace( int c ) { fprintf( stderr, "Unicode code point %d has no equivalent in ISO-8859-1.\n", c ); if ( 0 == tolerance ) { fprintf( stderr, "Too many unmappable characters. Quitting.\n" ); exit( 1 ); } tolerance--; printf( defaultValue, c ); } void putUnicodePoint( int c ) { if ( c < 0 || 255 < c ) { replace( c ); } else { putchar( c ); } } void doFiles( int argc, char **argv ) { FILE *infp; char *infile; int c; while ( --argc > 0 ) { infile = *++argv; if ( 0 == strcmp( infile, "-" ) ) { infp = stdin; } /* Must open input in binary mode so we can catch chars with high bit set. */ else if ( ( infp = fopen( infile, "rb" ) ) == NULL ) { fprintf( stderr, "Unable to open input file \"%s\".\n", infile ); continue; } while ( EOF != ( c = getUnicodePoint( infp ) ) ) { putUnicodePoint( c ); } } } int main( int argc, char **argv ) { extern int optind; int i, j, c; char * oneName[2]; defaultValue = "?"; while ( -1 != ( c = getopt( argc, argv, "t:d:" ) ) ) { switch (c) { case 't': fprintf( stderr, "Tolerance given as %s.\n", optarg ); tolerance = atoi(optarg); break; case 'd': defaultValue = optarg; break; default: fprintf( stderr, "usage: utf2iso [-t n] [-d s] [files]\n" ); fprintf( stderr, " where n is how many errors to tolerate\n" ); fprintf( stderr, " and s is a default string" ); fprintf( stderr, " for unmappable chars.\n" ); fprintf( stderr, " s can contain something like %%d" ); fprintf( stderr, " or be empty.\n" ); exit( 1 ); } } i = 1; j = optind; while ( j < argc ) { argv[i++] = argv[j++]; } argc -= ( j - i ); if ( argc < 2 ) { oneName[0] = argv[0]; oneName[1] = "-"; doFiles( 2, oneName ); } else { doFiles( argc, argv ); } return 0; }