/**
* Convert from Macintosh OS Roman to UTF-8
*
* Usage: mac2utf [file ...]
*
* BUG: The Apple logo is mapped to a question mark.
*/
#include
#include
#include
static int unicode[256];
/**
* Get the next byte and convert it to unicode.
* This is the "mac" part of the name.
* What is returned is actually a unicode code point.
* 16 bits is sufficient for the task at hand.
* All the code points for the characters in Mac OS Roman
* are below 0x3000.
*/
int getNext( FILE *infp ) {
int c = fgetc( infp );
if ( EOF == c ) {
return EOF;
}
return unicode[c];
}
/**
* Put the unicode character out encoded in UTF-8.
* This is the "utf" part of the name.
*/
void putBytes( int theChar ) {
int firstByte, secondByte, thirdByte;
if ( 0xfff < theChar ) { /* then we need three bytes */
thirdByte = 0x80 | ( theChar & 0x3f );
theChar >>= 6;
secondByte = 0x80 | ( theChar & 0x3f );
theChar >>= 6;
firstByte = 0xe0 | theChar;
putchar(firstByte);
putchar(secondByte);
putchar(thirdByte);
}
else if ( 0x7f < theChar ) { /* we only need two */
secondByte = 0x80 | ( theChar & 0x3f );
theChar >>= 6;
firstByte = 0xc0 | theChar;
putchar(firstByte);
putchar(secondByte);
}
else {
putchar( theChar );
}
}
void doFiles( int argc, char **argv ) {
FILE *infp;
char *infile;
int c;
while ( --argc > 0 ) {
infile = *++argv;
if ( 0 == strcmp( infile, "-" ) ) {
infp = stdin;
}
/* Must open input in binary mode so we can catch chars with high bit set. */
else if ( ( infp = fopen( infile, "rb" ) ) == NULL ) {
fprintf( stderr, "Unable to open input file \"%s\".\n", infile );
continue;
}
while ( ( c = getNext( infp ) ) != EOF ) {
putBytes( c );
}
}
}
int main( int argc, char **argv ) {
char * oneName[2];
unicode[0x00] = 0x0000; /* NUL */
unicode[0x01] = 0x0001; /* SOH */
unicode[0x02] = 0x0002; /* STX */
unicode[0x03] = 0x0003; /* ETX */
unicode[0x04] = 0x0004; /* EOT */
unicode[0x05] = 0x0005; /* ENQ */
unicode[0x06] = 0x0006; /* ACK */
unicode[0x07] = 0x0007; /* BEL */
unicode[0x08] = 0x0008; /* BS */
unicode[0x09] = 0x0009; /* HT */
unicode[0x0a] = 0x000a; /* LF */
unicode[0x0b] = 0x000b; /* VT */
unicode[0x0c] = 0x000c; /* FF */
unicode[0x0d] = 0x000d; /* CR */
unicode[0x0e] = 0x000e; /* SO */
unicode[0x0f] = 0x000f; /* SI */
unicode[0x10] = 0x0010; /* DLE */
unicode[0x11] = 0x0011; /* DC1 */
unicode[0x12] = 0x0012; /* DC2 */
unicode[0x13] = 0x0013; /* DC3 */
unicode[0x14] = 0x0014; /* DC4 */
unicode[0x15] = 0x0015; /* NAK */
unicode[0x16] = 0x0016; /* SYN */
unicode[0x17] = 0x0017; /* ETB */
unicode[0x18] = 0x0018; /* CAN */
unicode[0x19] = 0x0019; /* EM */
unicode[0x1a] = 0x001a; /* SUB */
unicode[0x1b] = 0x001b; /* ESC */
unicode[0x1c] = 0x001c; /* FS */
unicode[0x1d] = 0x001d; /* GS */
unicode[0x1e] = 0x001e; /* RS */
unicode[0x1f] = 0x001f; /* US */
unicode[0x20] = 0x0020; /* SPACE */
unicode[0x21] = 0x0021; /* ! */
unicode[0x22] = 0x0022; /* " */
unicode[0x23] = 0x0023; /* # */
unicode[0x24] = 0x0024; /* $ */
unicode[0x25] = 0x0025; /* % */
unicode[0x26] = 0x0026; /* & */
unicode[0x27] = 0x0027; /* ' */
unicode[0x28] = 0x0028; /* ( */
unicode[0x29] = 0x0029; /* ) */
unicode[0x2A] = 0x002A; /* * */
unicode[0x2B] = 0x002B; /* + */
unicode[0x2C] = 0x002C; /* , */
unicode[0x2D] = 0x002D; /* - */
unicode[0x2E] = 0x002E; /* . */
unicode[0x2F] = 0x002F; /* / */
unicode[0x30] = 0x0030; /* 0 */
unicode[0x31] = 0x0031; /* 1 */
unicode[0x32] = 0x0032; /* 2 */
unicode[0x33] = 0x0033; /* 3 */
unicode[0x34] = 0x0034; /* 4 */
unicode[0x35] = 0x0035; /* 5 */
unicode[0x36] = 0x0036; /* 6 */
unicode[0x37] = 0x0037; /* 7 */
unicode[0x38] = 0x0038; /* 8 */
unicode[0x39] = 0x0039; /* 9 */
unicode[0x3A] = 0x003A; /* : */
unicode[0x3B] = 0x003B; /* ; */
unicode[0x3C] = 0x003C; /* < */
unicode[0x3D] = 0x003D; /* = */
unicode[0x3E] = 0x003E; /* > */
unicode[0x3F] = 0x003F; /* ? */
unicode[0x40] = 0x0040; /* @ */
unicode[0x41] = 0x0041; /* A */
unicode[0x42] = 0x0042; /* B */
unicode[0x43] = 0x0043; /* C */
unicode[0x44] = 0x0044; /* D */
unicode[0x45] = 0x0045; /* E */
unicode[0x46] = 0x0046; /* F */
unicode[0x47] = 0x0047; /* G */
unicode[0x48] = 0x0048; /* H */
unicode[0x49] = 0x0049; /* I */
unicode[0x4A] = 0x004A; /* J */
unicode[0x4B] = 0x004B; /* K */
unicode[0x4C] = 0x004C; /* L */
unicode[0x4D] = 0x004D; /* M */
unicode[0x4E] = 0x004E; /* N */
unicode[0x4F] = 0x004F; /* O */
unicode[0x50] = 0x0050; /* P */
unicode[0x51] = 0x0051; /* Q */
unicode[0x52] = 0x0052; /* R */
unicode[0x53] = 0x0053; /* S */
unicode[0x54] = 0x0054; /* T */
unicode[0x55] = 0x0055; /* U */
unicode[0x56] = 0x0056; /* V */
unicode[0x57] = 0x0057; /* W */
unicode[0x58] = 0x0058; /* X */
unicode[0x59] = 0x0059; /* Y */
unicode[0x5A] = 0x005A; /* Z */
unicode[0x5B] = 0x005B; /* [ */
unicode[0x5C] = 0x005C; /* \ */
unicode[0x5D] = 0x005D; /* ] */
unicode[0x5E] = 0x005E; /* ^ */
unicode[0x5F] = 0x005F; /* _ */
unicode[0x60] = 0x0060; /* ` */
unicode[0x61] = 0x0061; /* a */
unicode[0x62] = 0x0062; /* b */
unicode[0x63] = 0x0063; /* c */
unicode[0x64] = 0x0064; /* d */
unicode[0x65] = 0x0065; /* e */
unicode[0x66] = 0x0066; /* f */
unicode[0x67] = 0x0067; /* g */
unicode[0x68] = 0x0068; /* h */
unicode[0x69] = 0x0069; /* i */
unicode[0x6A] = 0x006A; /* j */
unicode[0x6B] = 0x006B; /* k */
unicode[0x6C] = 0x006C; /* l */
unicode[0x6D] = 0x006D; /* m */
unicode[0x6E] = 0x006E; /* n */
unicode[0x6F] = 0x006F; /* o */
unicode[0x70] = 0x0070; /* p */
unicode[0x71] = 0x0071; /* q */
unicode[0x72] = 0x0072; /* r */
unicode[0x73] = 0x0073; /* s */
unicode[0x74] = 0x0074; /* t */
unicode[0x75] = 0x0075; /* u */
unicode[0x76] = 0x0076; /* v */
unicode[0x77] = 0x0077; /* w */
unicode[0x78] = 0x0078; /* x */
unicode[0x79] = 0x0079; /* y */
unicode[0x7A] = 0x007A; /* z */
unicode[0x7B] = 0x007B; /* { */
unicode[0x7C] = 0x007C; /* | */
unicode[0x7D] = 0x007D; /* } */
unicode[0x7E] = 0x007E; /* ~ */
unicode[0x7F] = 0x007F; /* DEL */
unicode[0x80] = 0x00C4; /* Ao */
unicode[0x81] = 0x00C5; /* A.. */
unicode[0x82] = 0x00C7; /* C, */
unicode[0x83] = 0x00C9; /* E' */
unicode[0x84] = 0x00D1; /* N~ */
unicode[0x85] = 0x00D6; /* O.. */
unicode[0x86] = 0x00DC; /* U.. */
unicode[0x87] = 0x00E1; /* a' */
unicode[0x88] = 0x00E0; /* a` */
unicode[0x89] = 0x00E2; /* a^ */
unicode[0x8A] = 0x00E4; /* a.. */
unicode[0x8B] = 0x00E3; /* a~ */
unicode[0x8C] = 0x00E5; /* ao */
unicode[0x8D] = 0x00E7; /* c, */
unicode[0x8E] = 0x00E9; /* e' */
unicode[0x8F] = 0x00E8; /* e` */
unicode[0x90] = 0x00EA; /* e^ */
unicode[0x91] = 0x00EB; /* e.. */
unicode[0x92] = 0x00ED; /* i' */
unicode[0x93] = 0x00EC; /* i` */
unicode[0x94] = 0x00EE; /* i^ */
unicode[0x95] = 0x00EF; /* i~ */
unicode[0x96] = 0x00F1; /* n~ */
unicode[0x97] = 0x00F3; /* o' */
unicode[0x98] = 0x00F2; /* o` */
unicode[0x99] = 0x00F4; /* o^ */
unicode[0x9A] = 0x00F6; /* o.. */
unicode[0x9B] = 0x00F5; /* o~ */
unicode[0x9C] = 0x00FA; /* u' */
unicode[0x9D] = 0x00F9; /* u` */
unicode[0x9E] = 0x00FB; /* u^ */
unicode[0x9F] = 0x00FC; /* u.. */
unicode[0xA0] = 0x2020; /* DAGGER */
unicode[0xA1] = 0x00B0; /* DEGREE */
unicode[0xA2] = 0x00A2; /* CENT */
unicode[0xA3] = 0x00A3; /* POUND */
unicode[0xA4] = 0x00A7; /* SECTION */
unicode[0xA5] = 0x2022; /* BULLET */
unicode[0xA6] = 0x00B6; /* PILCROW */
unicode[0xA7] = 0x00DF; /* Ess set */
unicode[0xA8] = 0x00AE; /* REGISTERED */
unicode[0xA9] = 0x00A9; /* COPYRIGHT */
unicode[0xAA] = 0x2122; /* TRADE */
unicode[0xAB] = 0x00B4; /* ACUTE */
unicode[0xAC] = 0x00A8; /* DIAERESIS */
unicode[0xAD] = 0x2260; /* NOT EQUAL */
unicode[0xAE] = 0x00C6; /* AE */
unicode[0xAF] = 0x00D8; /* O/ */
unicode[0xB0] = 0x221E; /* INFINITY */
unicode[0xB1] = 0x00B1; /* PLUS-MINUS */
unicode[0xB2] = 0x2264; /* LESS-THAN OR EQUAL */
unicode[0xB3] = 0x2265; /* GREATER-THAN OR EQUAL */
unicode[0xB4] = 0x00A5; /* YEN */
unicode[0xB5] = 0x00B5; /* MICRO */
unicode[0xB6] = 0x2202; /* PARTIAL */
unicode[0xB7] = 0x2211; /* N-ARY SUM */
unicode[0xB8] = 0x220F; /* N-ARY PRODUCT */
unicode[0xB9] = 0x03C0; /* PI */
unicode[0xBA] = 0x222B; /* INTEGRAL */
unicode[0xBB] = 0x00AA; /* FEMININE */
unicode[0xBC] = 0x00BA; /* MASCULINE */
unicode[0xBD] = 0x03A9; /* Omega */
unicode[0xBE] = 0x00E6; /* ae */
unicode[0xBF] = 0x00F8; /* o/ */
unicode[0xC0] = 0x00BF; /* INVERTED ? */
unicode[0xC1] = 0x00A1; /* INVERTED ! */
unicode[0xC2] = 0x00AC; /* NOT */
unicode[0xC3] = 0x221A; /* SQUARE ROOT */
unicode[0xC4] = 0x0192; /* FUNCTION */
unicode[0xC5] = 0x2248; /* ALMOST */
unicode[0xC6] = 0x2206; /* DELTS */
unicode[0xC7] = 0x00AB; /* << */
unicode[0xC8] = 0x00BB; /* >> */
unicode[0xC9] = 0x2026; /* ELIPSIS */
unicode[0xCA] = 0x00A0; /* NONBREAKING SPACE */
unicode[0xCB] = 0x00C0; /* A` */
unicode[0xCC] = 0x00C3; /* A~ */
unicode[0xCD] = 0x00D5; /* O~ */
unicode[0xCE] = 0x0152; /* OE */
unicode[0xCF] = 0x0153; /* oe */
unicode[0xD0] = 0x2013; /* EN DASH */
unicode[0xD1] = 0x2014; /* EM DASH */
unicode[0xD2] = 0x201C; /* LEFT CURVED DOUBLE QUOTE */
unicode[0xD3] = 0x201D; /* RIGHT CURVED DOUBLE QUOTE */
unicode[0xD4] = 0x2018; /* LEFT CURVED SINGLE QUOTE */
unicode[0xD5] = 0x2019; /* RIGHT CURVED SINGLE QUOTE */
unicode[0xD6] = 0x00F7; /* DIVISION */
unicode[0xD7] = 0x25CA; /* LOZENGE */
unicode[0xD8] = 0x00FF; /* y.. */
unicode[0xD9] = 0x0178; /* Y.. */
unicode[0xDA] = 0x2044; /* FRACTION */
unicode[0xDB] = 0x20AC; /* EURO */
unicode[0xDC] = 0x2039; /* SINGLE SMALL < */
unicode[0xDD] = 0x203A; /* SINGLE SMALL > */
unicode[0xDE] = 0xFB01; /* fi */
unicode[0xDF] = 0xFB02; /* fl */
unicode[0xE0] = 0x2021; /* DOUBLE DAGGER */
unicode[0xE1] = 0x00B7; /* MIDDLE DOT */
unicode[0xE2] = 0x201A; /* SINGLE LOWER QUOTE */
unicode[0xE3] = 0x201E; /* DOUBLE LOWER QUOTE */
unicode[0xE4] = 0x2030; /* PER THOUSAND */
unicode[0xE5] = 0x00C2; /* A^ */
unicode[0xE6] = 0x00CA; /* E^ */
unicode[0xE7] = 0x00C1; /* A' */
unicode[0xE8] = 0x00CB; /* E.. */
unicode[0xE9] = 0x00C8; /* E` */
unicode[0xEA] = 0x00CD; /* I' */
unicode[0xEB] = 0x00CE; /* I^ */
unicode[0xEC] = 0x00CF; /* I.. */
unicode[0xED] = 0x00CC; /* I` */
unicode[0xEE] = 0x00D3; /* O' */
unicode[0xEF] = 0x00D4; /* O^ */
unicode[0xF0] = '?'; /* Apple Logo */
unicode[0xF1] = 0x00D2; /* O` */
unicode[0xF2] = 0x00DA; /* U' */
unicode[0xF3] = 0x00DB; /* U^ */
unicode[0xF4] = 0x00D9; /* U' */
unicode[0xF5] = 0x0131; /* SMALL DOTLESS I */
unicode[0xF6] = 0x02C6; /* MODIFIER LETTER CIRCUMFLEX ACCENT */
unicode[0xF7] = 0x02DC; /* SMALL TILDE */
unicode[0xF8] = 0x00AF; /* MACRON */
unicode[0xF9] = 0x02D8; /* BREVE */
unicode[0xFA] = 0x02D9; /* DOT ABOVE */
unicode[0xFB] = 0x02DA; /* RING ABOVE */
unicode[0xFC] = 0x00B8; /* CEDILLA */
unicode[0xFD] = 0x02DD; /* DOUBLE ACUTE ACCENT */
unicode[0xFE] = 0x02DB; /* OGONEK */
unicode[0xFF] = 0x02C7; /* CARON */
if ( argc < 2 ) {
oneName[0] = argv[0];
oneName[1] = "-";
doFiles( 2, oneName );
}
else {
doFiles( argc, argv );
}
return 0;
}