/**
* Convert from ISO-8859-1 to UTF-8
* Note that ISO-8859-1 seems to be a superset of Latin-1 (ISO 8859-1).
*
* Usage: iso2utf8 [file ...]
*
*/
#include
#include
#include
/**
* Put the unicode character out encoded in UTF-8.
* This is the "utf" part of the name.
*/
void putBytes( int theChar ) {
int firstByte, secondByte, thirdByte;
if ( 0xfff < theChar ) { /* then we need three bytes */
thirdByte = 0x80 | ( theChar & 0x3f );
theChar >>= 6;
secondByte = 0x80 | ( theChar & 0x3f );
theChar >>= 6;
firstByte = 0xe0 | theChar;
putchar(firstByte);
putchar(secondByte);
putchar(thirdByte);
}
else if ( 0x7f < theChar ) { /* we only need two */
secondByte = 0x80 | ( theChar & 0x3f );
theChar >>= 6;
firstByte = 0xc0 | theChar;
putchar(firstByte);
putchar(secondByte);
}
else {
putchar( theChar );
}
}
void doFiles( int argc, char **argv ) {
FILE *infp;
char *infile;
int c;
while ( --argc > 0 ) {
infile = *++argv;
if ( 0 == strcmp( infile, "-" ) ) {
infp = stdin;
}
/* Must open input in binary mode so we can catch chars with high bit set. */
else if ( ( infp = fopen( infile, "rb" ) ) == NULL ) {
fprintf( stderr, "Unable to open input file \"%s\".\n", infile );
continue;
}
while ( EOF != ( c = fgetc( infp ) ) ) {
putBytes( c );
}
}
}
int main( int argc, char **argv ) {
char * oneName[2];
if ( argc < 2 ) {
oneName[0] = argv[0];
oneName[1] = "-";
doFiles( 2, oneName );
}
else {
doFiles( argc, argv );
}
return 0;
}