/** * Convert from ISO-8859-1 to UTF-8 * Note that ISO-8859-1 seems to be a superset of Latin-1 (ISO 8859-1). * * Usage: iso2utf8 [file ...] * */ #include #include #include /** * Put the unicode character out encoded in UTF-8. * This is the "utf" part of the name. */ void putBytes( int theChar ) { int firstByte, secondByte, thirdByte; if ( 0xfff < theChar ) { /* then we need three bytes */ thirdByte = 0x80 | ( theChar & 0x3f ); theChar >>= 6; secondByte = 0x80 | ( theChar & 0x3f ); theChar >>= 6; firstByte = 0xe0 | theChar; putchar(firstByte); putchar(secondByte); putchar(thirdByte); } else if ( 0x7f < theChar ) { /* we only need two */ secondByte = 0x80 | ( theChar & 0x3f ); theChar >>= 6; firstByte = 0xc0 | theChar; putchar(firstByte); putchar(secondByte); } else { putchar( theChar ); } } void doFiles( int argc, char **argv ) { FILE *infp; char *infile; int c; while ( --argc > 0 ) { infile = *++argv; if ( 0 == strcmp( infile, "-" ) ) { infp = stdin; } /* Must open input in binary mode so we can catch chars with high bit set. */ else if ( ( infp = fopen( infile, "rb" ) ) == NULL ) { fprintf( stderr, "Unable to open input file \"%s\".\n", infile ); continue; } while ( EOF != ( c = fgetc( infp ) ) ) { putBytes( c ); } } } int main( int argc, char **argv ) { char * oneName[2]; if ( argc < 2 ) { oneName[0] = argv[0]; oneName[1] = "-"; doFiles( 2, oneName ); } else { doFiles( argc, argv ); } return 0; }