/** * Convert from Macintosh OS Roman to UTF-8 * * Usage: mac2utf [file ...] * * BUG: The Apple logo is mapped to a question mark. */ #include #include #include static int unicode[256]; /** * Get the next byte and convert it to unicode. * This is the "mac" part of the name. * What is returned is actually a unicode code point. * 16 bits is sufficient for the task at hand. * All the code points for the characters in Mac OS Roman * are below 0x3000. */ int getNext( FILE *infp ) { int c = fgetc( infp ); if ( EOF == c ) { return EOF; } return unicode[c]; } /** * Put the unicode character out encoded in UTF-8. * This is the "utf" part of the name. */ void putBytes( int theChar ) { int firstByte, secondByte, thirdByte; if ( 0xfff < theChar ) { /* then we need three bytes */ thirdByte = 0x80 | ( theChar & 0x3f ); theChar >>= 6; secondByte = 0x80 | ( theChar & 0x3f ); theChar >>= 6; firstByte = 0xe0 | theChar; putchar(firstByte); putchar(secondByte); putchar(thirdByte); } else if ( 0x7f < theChar ) { /* we only need two */ secondByte = 0x80 | ( theChar & 0x3f ); theChar >>= 6; firstByte = 0xc0 | theChar; putchar(firstByte); putchar(secondByte); } else { putchar( theChar ); } } void doFiles( int argc, char **argv ) { FILE *infp; char *infile; int c; while ( --argc > 0 ) { infile = *++argv; if ( 0 == strcmp( infile, "-" ) ) { infp = stdin; } /* Must open input in binary mode so we can catch chars with high bit set. */ else if ( ( infp = fopen( infile, "rb" ) ) == NULL ) { fprintf( stderr, "Unable to open input file \"%s\".\n", infile ); continue; } while ( ( c = getNext( infp ) ) != EOF ) { putBytes( c ); } } } int main( int argc, char **argv ) { char * oneName[2]; unicode[0x00] = 0x0000; /* NUL */ unicode[0x01] = 0x0001; /* SOH */ unicode[0x02] = 0x0002; /* STX */ unicode[0x03] = 0x0003; /* ETX */ unicode[0x04] = 0x0004; /* EOT */ unicode[0x05] = 0x0005; /* ENQ */ unicode[0x06] = 0x0006; /* ACK */ unicode[0x07] = 0x0007; /* BEL */ unicode[0x08] = 0x0008; /* BS */ unicode[0x09] = 0x0009; /* HT */ unicode[0x0a] = 0x000a; /* LF */ unicode[0x0b] = 0x000b; /* VT */ unicode[0x0c] = 0x000c; /* FF */ unicode[0x0d] = 0x000d; /* CR */ unicode[0x0e] = 0x000e; /* SO */ unicode[0x0f] = 0x000f; /* SI */ unicode[0x10] = 0x0010; /* DLE */ unicode[0x11] = 0x0011; /* DC1 */ unicode[0x12] = 0x0012; /* DC2 */ unicode[0x13] = 0x0013; /* DC3 */ unicode[0x14] = 0x0014; /* DC4 */ unicode[0x15] = 0x0015; /* NAK */ unicode[0x16] = 0x0016; /* SYN */ unicode[0x17] = 0x0017; /* ETB */ unicode[0x18] = 0x0018; /* CAN */ unicode[0x19] = 0x0019; /* EM */ unicode[0x1a] = 0x001a; /* SUB */ unicode[0x1b] = 0x001b; /* ESC */ unicode[0x1c] = 0x001c; /* FS */ unicode[0x1d] = 0x001d; /* GS */ unicode[0x1e] = 0x001e; /* RS */ unicode[0x1f] = 0x001f; /* US */ unicode[0x20] = 0x0020; /* SPACE */ unicode[0x21] = 0x0021; /* ! */ unicode[0x22] = 0x0022; /* " */ unicode[0x23] = 0x0023; /* # */ unicode[0x24] = 0x0024; /* $ */ unicode[0x25] = 0x0025; /* % */ unicode[0x26] = 0x0026; /* & */ unicode[0x27] = 0x0027; /* ' */ unicode[0x28] = 0x0028; /* ( */ unicode[0x29] = 0x0029; /* ) */ unicode[0x2A] = 0x002A; /* * */ unicode[0x2B] = 0x002B; /* + */ unicode[0x2C] = 0x002C; /* , */ unicode[0x2D] = 0x002D; /* - */ unicode[0x2E] = 0x002E; /* . */ unicode[0x2F] = 0x002F; /* / */ unicode[0x30] = 0x0030; /* 0 */ unicode[0x31] = 0x0031; /* 1 */ unicode[0x32] = 0x0032; /* 2 */ unicode[0x33] = 0x0033; /* 3 */ unicode[0x34] = 0x0034; /* 4 */ unicode[0x35] = 0x0035; /* 5 */ unicode[0x36] = 0x0036; /* 6 */ unicode[0x37] = 0x0037; /* 7 */ unicode[0x38] = 0x0038; /* 8 */ unicode[0x39] = 0x0039; /* 9 */ unicode[0x3A] = 0x003A; /* : */ unicode[0x3B] = 0x003B; /* ; */ unicode[0x3C] = 0x003C; /* < */ unicode[0x3D] = 0x003D; /* = */ unicode[0x3E] = 0x003E; /* > */ unicode[0x3F] = 0x003F; /* ? */ unicode[0x40] = 0x0040; /* @ */ unicode[0x41] = 0x0041; /* A */ unicode[0x42] = 0x0042; /* B */ unicode[0x43] = 0x0043; /* C */ unicode[0x44] = 0x0044; /* D */ unicode[0x45] = 0x0045; /* E */ unicode[0x46] = 0x0046; /* F */ unicode[0x47] = 0x0047; /* G */ unicode[0x48] = 0x0048; /* H */ unicode[0x49] = 0x0049; /* I */ unicode[0x4A] = 0x004A; /* J */ unicode[0x4B] = 0x004B; /* K */ unicode[0x4C] = 0x004C; /* L */ unicode[0x4D] = 0x004D; /* M */ unicode[0x4E] = 0x004E; /* N */ unicode[0x4F] = 0x004F; /* O */ unicode[0x50] = 0x0050; /* P */ unicode[0x51] = 0x0051; /* Q */ unicode[0x52] = 0x0052; /* R */ unicode[0x53] = 0x0053; /* S */ unicode[0x54] = 0x0054; /* T */ unicode[0x55] = 0x0055; /* U */ unicode[0x56] = 0x0056; /* V */ unicode[0x57] = 0x0057; /* W */ unicode[0x58] = 0x0058; /* X */ unicode[0x59] = 0x0059; /* Y */ unicode[0x5A] = 0x005A; /* Z */ unicode[0x5B] = 0x005B; /* [ */ unicode[0x5C] = 0x005C; /* \ */ unicode[0x5D] = 0x005D; /* ] */ unicode[0x5E] = 0x005E; /* ^ */ unicode[0x5F] = 0x005F; /* _ */ unicode[0x60] = 0x0060; /* ` */ unicode[0x61] = 0x0061; /* a */ unicode[0x62] = 0x0062; /* b */ unicode[0x63] = 0x0063; /* c */ unicode[0x64] = 0x0064; /* d */ unicode[0x65] = 0x0065; /* e */ unicode[0x66] = 0x0066; /* f */ unicode[0x67] = 0x0067; /* g */ unicode[0x68] = 0x0068; /* h */ unicode[0x69] = 0x0069; /* i */ unicode[0x6A] = 0x006A; /* j */ unicode[0x6B] = 0x006B; /* k */ unicode[0x6C] = 0x006C; /* l */ unicode[0x6D] = 0x006D; /* m */ unicode[0x6E] = 0x006E; /* n */ unicode[0x6F] = 0x006F; /* o */ unicode[0x70] = 0x0070; /* p */ unicode[0x71] = 0x0071; /* q */ unicode[0x72] = 0x0072; /* r */ unicode[0x73] = 0x0073; /* s */ unicode[0x74] = 0x0074; /* t */ unicode[0x75] = 0x0075; /* u */ unicode[0x76] = 0x0076; /* v */ unicode[0x77] = 0x0077; /* w */ unicode[0x78] = 0x0078; /* x */ unicode[0x79] = 0x0079; /* y */ unicode[0x7A] = 0x007A; /* z */ unicode[0x7B] = 0x007B; /* { */ unicode[0x7C] = 0x007C; /* | */ unicode[0x7D] = 0x007D; /* } */ unicode[0x7E] = 0x007E; /* ~ */ unicode[0x7F] = 0x007F; /* DEL */ unicode[0x80] = 0x00C4; /* Ao */ unicode[0x81] = 0x00C5; /* A.. */ unicode[0x82] = 0x00C7; /* C, */ unicode[0x83] = 0x00C9; /* E' */ unicode[0x84] = 0x00D1; /* N~ */ unicode[0x85] = 0x00D6; /* O.. */ unicode[0x86] = 0x00DC; /* U.. */ unicode[0x87] = 0x00E1; /* a' */ unicode[0x88] = 0x00E0; /* a` */ unicode[0x89] = 0x00E2; /* a^ */ unicode[0x8A] = 0x00E4; /* a.. */ unicode[0x8B] = 0x00E3; /* a~ */ unicode[0x8C] = 0x00E5; /* ao */ unicode[0x8D] = 0x00E7; /* c, */ unicode[0x8E] = 0x00E9; /* e' */ unicode[0x8F] = 0x00E8; /* e` */ unicode[0x90] = 0x00EA; /* e^ */ unicode[0x91] = 0x00EB; /* e.. */ unicode[0x92] = 0x00ED; /* i' */ unicode[0x93] = 0x00EC; /* i` */ unicode[0x94] = 0x00EE; /* i^ */ unicode[0x95] = 0x00EF; /* i~ */ unicode[0x96] = 0x00F1; /* n~ */ unicode[0x97] = 0x00F3; /* o' */ unicode[0x98] = 0x00F2; /* o` */ unicode[0x99] = 0x00F4; /* o^ */ unicode[0x9A] = 0x00F6; /* o.. */ unicode[0x9B] = 0x00F5; /* o~ */ unicode[0x9C] = 0x00FA; /* u' */ unicode[0x9D] = 0x00F9; /* u` */ unicode[0x9E] = 0x00FB; /* u^ */ unicode[0x9F] = 0x00FC; /* u.. */ unicode[0xA0] = 0x2020; /* DAGGER */ unicode[0xA1] = 0x00B0; /* DEGREE */ unicode[0xA2] = 0x00A2; /* CENT */ unicode[0xA3] = 0x00A3; /* POUND */ unicode[0xA4] = 0x00A7; /* SECTION */ unicode[0xA5] = 0x2022; /* BULLET */ unicode[0xA6] = 0x00B6; /* PILCROW */ unicode[0xA7] = 0x00DF; /* Ess set */ unicode[0xA8] = 0x00AE; /* REGISTERED */ unicode[0xA9] = 0x00A9; /* COPYRIGHT */ unicode[0xAA] = 0x2122; /* TRADE */ unicode[0xAB] = 0x00B4; /* ACUTE */ unicode[0xAC] = 0x00A8; /* DIAERESIS */ unicode[0xAD] = 0x2260; /* NOT EQUAL */ unicode[0xAE] = 0x00C6; /* AE */ unicode[0xAF] = 0x00D8; /* O/ */ unicode[0xB0] = 0x221E; /* INFINITY */ unicode[0xB1] = 0x00B1; /* PLUS-MINUS */ unicode[0xB2] = 0x2264; /* LESS-THAN OR EQUAL */ unicode[0xB3] = 0x2265; /* GREATER-THAN OR EQUAL */ unicode[0xB4] = 0x00A5; /* YEN */ unicode[0xB5] = 0x00B5; /* MICRO */ unicode[0xB6] = 0x2202; /* PARTIAL */ unicode[0xB7] = 0x2211; /* N-ARY SUM */ unicode[0xB8] = 0x220F; /* N-ARY PRODUCT */ unicode[0xB9] = 0x03C0; /* PI */ unicode[0xBA] = 0x222B; /* INTEGRAL */ unicode[0xBB] = 0x00AA; /* FEMININE */ unicode[0xBC] = 0x00BA; /* MASCULINE */ unicode[0xBD] = 0x03A9; /* Omega */ unicode[0xBE] = 0x00E6; /* ae */ unicode[0xBF] = 0x00F8; /* o/ */ unicode[0xC0] = 0x00BF; /* INVERTED ? */ unicode[0xC1] = 0x00A1; /* INVERTED ! */ unicode[0xC2] = 0x00AC; /* NOT */ unicode[0xC3] = 0x221A; /* SQUARE ROOT */ unicode[0xC4] = 0x0192; /* FUNCTION */ unicode[0xC5] = 0x2248; /* ALMOST */ unicode[0xC6] = 0x2206; /* DELTS */ unicode[0xC7] = 0x00AB; /* << */ unicode[0xC8] = 0x00BB; /* >> */ unicode[0xC9] = 0x2026; /* ELIPSIS */ unicode[0xCA] = 0x00A0; /* NONBREAKING SPACE */ unicode[0xCB] = 0x00C0; /* A` */ unicode[0xCC] = 0x00C3; /* A~ */ unicode[0xCD] = 0x00D5; /* O~ */ unicode[0xCE] = 0x0152; /* OE */ unicode[0xCF] = 0x0153; /* oe */ unicode[0xD0] = 0x2013; /* EN DASH */ unicode[0xD1] = 0x2014; /* EM DASH */ unicode[0xD2] = 0x201C; /* LEFT CURVED DOUBLE QUOTE */ unicode[0xD3] = 0x201D; /* RIGHT CURVED DOUBLE QUOTE */ unicode[0xD4] = 0x2018; /* LEFT CURVED SINGLE QUOTE */ unicode[0xD5] = 0x2019; /* RIGHT CURVED SINGLE QUOTE */ unicode[0xD6] = 0x00F7; /* DIVISION */ unicode[0xD7] = 0x25CA; /* LOZENGE */ unicode[0xD8] = 0x00FF; /* y.. */ unicode[0xD9] = 0x0178; /* Y.. */ unicode[0xDA] = 0x2044; /* FRACTION */ unicode[0xDB] = 0x20AC; /* EURO */ unicode[0xDC] = 0x2039; /* SINGLE SMALL < */ unicode[0xDD] = 0x203A; /* SINGLE SMALL > */ unicode[0xDE] = 0xFB01; /* fi */ unicode[0xDF] = 0xFB02; /* fl */ unicode[0xE0] = 0x2021; /* DOUBLE DAGGER */ unicode[0xE1] = 0x00B7; /* MIDDLE DOT */ unicode[0xE2] = 0x201A; /* SINGLE LOWER QUOTE */ unicode[0xE3] = 0x201E; /* DOUBLE LOWER QUOTE */ unicode[0xE4] = 0x2030; /* PER THOUSAND */ unicode[0xE5] = 0x00C2; /* A^ */ unicode[0xE6] = 0x00CA; /* E^ */ unicode[0xE7] = 0x00C1; /* A' */ unicode[0xE8] = 0x00CB; /* E.. */ unicode[0xE9] = 0x00C8; /* E` */ unicode[0xEA] = 0x00CD; /* I' */ unicode[0xEB] = 0x00CE; /* I^ */ unicode[0xEC] = 0x00CF; /* I.. */ unicode[0xED] = 0x00CC; /* I` */ unicode[0xEE] = 0x00D3; /* O' */ unicode[0xEF] = 0x00D4; /* O^ */ unicode[0xF0] = '?'; /* Apple Logo */ unicode[0xF1] = 0x00D2; /* O` */ unicode[0xF2] = 0x00DA; /* U' */ unicode[0xF3] = 0x00DB; /* U^ */ unicode[0xF4] = 0x00D9; /* U' */ unicode[0xF5] = 0x0131; /* SMALL DOTLESS I */ unicode[0xF6] = 0x02C6; /* MODIFIER LETTER CIRCUMFLEX ACCENT */ unicode[0xF7] = 0x02DC; /* SMALL TILDE */ unicode[0xF8] = 0x00AF; /* MACRON */ unicode[0xF9] = 0x02D8; /* BREVE */ unicode[0xFA] = 0x02D9; /* DOT ABOVE */ unicode[0xFB] = 0x02DA; /* RING ABOVE */ unicode[0xFC] = 0x00B8; /* CEDILLA */ unicode[0xFD] = 0x02DD; /* DOUBLE ACUTE ACCENT */ unicode[0xFE] = 0x02DB; /* OGONEK */ unicode[0xFF] = 0x02C7; /* CARON */ if ( argc < 2 ) { oneName[0] = argv[0]; oneName[1] = "-"; doFiles( 2, oneName ); } else { doFiles( argc, argv ); } return 0; }