Next Previous

Retired Document

Important: This sample code may not represent best practices for current development. The project may use deprecated symbols and illustrate technologies and techniques that are no longer recommended.

TableGenerator.c

/*

    File:       TableGenerator.c

    Contains:   Generates text encoding conversion tables for MFSLives.

    Written by: DTS

    Copyright:  Copyright (c) 2006 by Apple Computer, Inc., All Rights Reserved.

    Disclaimer: IMPORTANT:  This Apple software is supplied to you by Apple Computer, Inc.

                ("Apple") in consideration of your agreement to the following terms, and your

                use, installation, modification or redistribution of this Apple software

                constitutes acceptance of these terms.  If you do not agree with these terms,

                please do not use, install, modify or redistribute this Apple software.

                In consideration of your agreement to abide by the following terms, and subject

                to these terms, Apple grants you a personal, non-exclusive license, under Apple's

                copyrights in this original Apple software (the "Apple Software"), to use,

                reproduce, modify and redistribute the Apple Software, with or without

                modifications, in source and/or binary forms; provided that if you redistribute

                the Apple Software in its entirety and without modifications, you must retain

                this notice and the following text and disclaimers in all such redistributions of

                the Apple Software.  Neither the name, trademarks, service marks or logos of

                Apple Computer, Inc. may be used to endorse or promote products derived from the

                Apple Software without specific prior written permission from Apple.  Except as

                expressly stated in this notice, no other rights or licenses, express or implied,

                are granted by Apple herein, including but not limited to any patent rights that

                may be infringed by your derivative works or by other works in which the Apple

                Software may be incorporated.

                The Apple Software is provided by Apple on an "AS IS" basis.  APPLE MAKES NO

                WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED

                WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A PARTICULAR

                PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION ALONE OR IN

                COMBINATION WITH YOUR PRODUCTS.

                IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR

                CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE

                GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

                ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION

                OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, TORT

                (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS BEEN

                ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

    Change History (most recent first):

$Log: TableGenerator.c,v $

Revision 1.1  2006/07/27 15:49:19  eskimo1

First checked in.

*/

/////////////////////////////////////////////////////////////////////

#include <CoreServices/CoreServices.h>

/////////////////////////////////////////////////////////////////////

#pragma mark ***** UTF-8

static void StrLCatHighBitPretty(char *buf, const uint8_t *str, size_t bufSize)

    char        tmp[256];

    size_t      strIndex;

    size_t      strCount;

    strCount = strlen( (const char *) str );

    for (strIndex = 0; strIndex < strCount; strIndex++) {

        if ( (str[strIndex] < 32) || (str[strIndex] >= 127) ) {

            snprintf(tmp, sizeof(tmp), "\\x%02X", str[strIndex]);

        } else if ( (str[strIndex] == '"') || (str[strIndex] == '\\') ) {

            snprintf(tmp, sizeof(tmp), "\\%c", str[strIndex]);

        } else {

            snprintf(tmp, sizeof(tmp), "%c", str[strIndex]);

        strlcat(buf, tmp, bufSize);

static void __attribute__ ((unused)) PrintMacRomanToUTF8Table(int nf)

    UInt8   ch;

    CFIndex utf8CountMax;

    fprintf(stdout, "static const char * kMacRomanToUTF8[128] = {\n");

    utf8CountMax = 0;

    ch = 128;

    do {

        CFStringRef         str;

        UInt8               utf8Buffer[256];

        CFIndex             utf16Count;

        CFIndex             utf16Index;

        CFIndex             utf8Count;

        CFIndex             usedChars;

        CFMutableStringRef  canonStr;

        char                out[256];

        str = CFStringCreateWithBytes(NULL, &ch, sizeof(ch), kCFStringEncodingMacRoman, false);

        assert(str != NULL);

        canonStr = CFStringCreateMutableCopy(NULL, 0, str);

        assert(canonStr != NULL);

        switch (nf) {

            case 0:

                // do nothing

                break;

            case 1:

                CFStringNormalize(canonStr, kCFStringNormalizationFormD);

                break;

            case 2:

                CFStringNormalize(canonStr, kCFStringNormalizationFormC);

                break;

            default:

                assert(false);

                break;

        utf16Count = CFStringGetLength(canonStr);

        usedChars = CFStringGetBytes(canonStr, CFRangeMake(0, utf16Count), kCFStringEncodingUTF8, false, false, utf8Buffer, sizeof(utf8Buffer), &utf8Count);

        assert(usedChars == utf16Count);

        assert(utf8Count < (sizeof(utf8Buffer) - 1));

        utf8Buffer[utf8Count] = 0;

        if (utf8Count > utf8CountMax) {

            utf8CountMax = utf8Count;

        if ( (ch % 4) == 0 ) {

            fprintf(stdout, "    /* 0x%02X */ ", ch);

        strlcpy(out, "/*", sizeof(out));

        for (utf16Index = 0; utf16Index < utf16Count; utf16Index++) {

            char    tmp[16];

            snprintf(tmp, sizeof(tmp), " U+%04X", CFStringGetCharacterAtIndex(canonStr, utf16Index));

            strlcat(out, tmp, sizeof(out));

        strlcat(out, " */", sizeof(out));

        fprintf(stdout, "%-19s ", out);

        // fprintf(stdout, "%s ", out);

        strlcpy(out, "\"", sizeof(out));

        StrLCatHighBitPretty(out, utf8Buffer, sizeof(out));

        strlcat(out, "\"", sizeof(out));

        if (ch != 255) {

            strlcat(out, ", ", sizeof(out));

        fprintf(stdout, "%-16s", out);

        if ( (ch % 4) == 3) {

            fprintf(stdout, "\n");

        CFRelease(canonStr);

        CFRelease(str);

        ch += 1;

    } while (ch != 0);

    fprintf(stdout, "};\n");

    fprintf(stdout, "\nconst int kMacRomanToUTF8Expansion = %ld;\n", (long) utf8CountMax);

static void __attribute__ ((unused)) PrintMacRomanToUTF8TableSimple(void)

    UInt8   ch;

    fprintf(stdout, "static const char * kMacRomanToUTF8[128] = {\n");

    // The MFS core code optimises away the ASCII case (the bottom 128 characters),

    // so we only include information about the top 128 characters.

    ch = 128;

    do {

        CFStringRef         str;

        UInt8               utf8Buffer[16];

        CFIndex             utf8Count;

        CFIndex             usedChars;

        char                out[256];

        str = CFStringCreateWithBytes(NULL, &ch, sizeof(ch), kCFStringEncodingMacRoman, false);

        assert(str != NULL);

        assert(CFStringGetLength(str) == 1);

        usedChars = CFStringGetBytes(str, CFRangeMake(0, 1), kCFStringEncodingUTF8, false, false, utf8Buffer, sizeof(utf8Buffer), &utf8Count);

        assert(usedChars == 1);

        assert(utf8Count < (sizeof(utf8Buffer) - 1));

        utf8Buffer[utf8Count] = 0;

        if ( (ch % 4) == 0 ) {

            fprintf(stdout, "    /* 0x%02X */ ", ch);

        fprintf(stdout, "/* U+%04X */ ", CFStringGetCharacterAtIndex(str, 0));

        strlcpy(out, "\"", sizeof(out));

        StrLCatHighBitPretty(out, utf8Buffer, sizeof(out));

        strlcat(out, "\"", sizeof(out));

        if (ch != 255) {

            strlcat(out, ", ", sizeof(out));

        fprintf(stdout, "%-16s", out);

        if ( (ch % 4) == 3) {

            fprintf(stdout, "\n");

        CFRelease(str);

        ch += 1;

    } while (ch != 0);

    fprintf(stdout, "};\n");

struct UTF8CharInfo {

    char    utf8[16];

    uint8_t mfsEquivalent;

    uint8_t validCombiners[16];

    uint8_t macRomanEquivalents[16];

};

typedef struct UTF8CharInfo UTF8CharInfo;

static void ProcessStr(CFMutableDictionaryRef charMap, uint8_t macRomanChar, CFStringRef str)

    CFIndex     utf16Count;

    CFIndex     utf16Index;

    utf16Count = CFStringGetLength(str);

    for (utf16Index = 0; utf16Index < utf16Count; utf16Index++) {

        CFStringRef     charStr;

        UniChar         uch;

        UTF8CharInfo *  value;

        CFIndex         usedChars;

        CFIndex         utf8Count;

        uch = CFStringGetCharacterAtIndex(str, utf16Index);

        charStr = CFStringCreateWithCharacters(NULL, &uch, 1);

        assert(charStr != NULL);

        if ( ! CFDictionaryContainsKey(charMap, charStr) ) {

            value = calloc(1, sizeof(*value));

            assert(value != NULL);

            usedChars = CFStringGetBytes(charStr, CFRangeMake(0, 1), kCFStringEncodingUTF8, false, false, (uint8_t *) value->utf8, sizeof(value->utf8), &utf8Count);

            assert(usedChars == 1);

            assert(utf8Count != sizeof(value->utf8));       // no need to NULL terminate because we calloc'd

            CFDictionaryAddValue(charMap, charStr, value);

        value = (UTF8CharInfo *) CFDictionaryGetValue(charMap, charStr);

        assert(value != NULL);

        if (utf16Index == 0) {

            if (value->mfsEquivalent == 0) {

                value->mfsEquivalent = macRomanChar;

            } else {

                // assert(value->mfsEquivalent == macRomanChar);

        } else {

            assert(CFStringGetCharacterAtIndex(str, 0) < 256);

            value->validCombiners[strlen((char *) value->validCombiners)] = (char) CFStringGetCharacterAtIndex(str, 0);

            value->macRomanEquivalents[strlen((char *) value->macRomanEquivalents)] = macRomanChar;

        CFRelease(charStr);

static CFComparisonResult Sorter(const void *val1, const void *val2, void *context)

    CFDictionaryRef charMap;

    UTF8CharInfo *  info1;

    UTF8CharInfo *  info2;

    charMap = (CFDictionaryRef) context;

    info1 = (UTF8CharInfo *) CFDictionaryGetValue(charMap, (CFStringRef) val1);

    assert(info1 != NULL);

    info2 = (UTF8CharInfo *) CFDictionaryGetValue(charMap, (CFStringRef) val2);

    assert(info2 != NULL);

    return strcmp(info1->utf8, info2->utf8);

static void __attribute__ ((unused)) PrintUTF8ToMacRomanTable(void)

    // This code works, but I decided I didn't need it!

    CFMutableDictionaryRef  charMap;

    uint8_t                 ch;

    CFStringRef             str;

    CFMutableStringRef      canonStr;

    CFIndex                 charCount;

    CFIndex                 charIndex;

    CFStringRef *           keys;

    CFArrayRef              keysArray;

    CFMutableArrayRef       keysArrayM;

    charMap = CFDictionaryCreateMutable(NULL, 0, &kCFTypeDictionaryKeyCallBacks, NULL);

    assert(charMap != NULL);

    ch = 1;                 // start at 1 because we don't need the null character in our output

    do {

        str = CFStringCreateWithBytes(NULL, &ch, sizeof(ch), kCFStringEncodingMacRoman, false);

        assert(str != NULL);

        canonStr = CFStringCreateMutableCopy(NULL, 0, str);

        assert(canonStr != NULL);

        CFStringNormalize(canonStr, kCFStringNormalizationFormD);

        ProcessStr(charMap, ch, str);

        ProcessStr(charMap, ch, canonStr);

        CFRelease(canonStr);

        CFRelease(str);

        ch += 1;

    } while (ch != 0);

    // CFShow(charMap);

    charCount = CFDictionaryGetCount(charMap);

    keys = (CFStringRef *) malloc(charCount * sizeof(*keys));

    assert(keys != NULL);

    CFDictionaryGetKeysAndValues(charMap, (const void **) keys, NULL);

    keysArray = CFArrayCreate(NULL, (const void **) keys, charCount, &kCFTypeArrayCallBacks);

    assert(keysArray != NULL);

    keysArrayM = CFArrayCreateMutableCopy(NULL, 0, keysArray);

    assert(keysArrayM != NULL);

    CFArraySortValues(keysArrayM, CFRangeMake(0, charCount), Sorter, charMap);

    for (charIndex = 0; charIndex < charCount; charIndex++) {

        UTF8CharInfo *  info;

        char            out[256];

        uint8_t         tmp[2];

        info = (UTF8CharInfo *) CFDictionaryGetValue( charMap, CFArrayGetValueAtIndex(keysArrayM, charIndex) );

        assert(info != NULL);

        if (strlen(info->utf8) == 0) {

            strlcpy(out, "NULL", sizeof(out));

        } else {

            strlcpy(out, "\"", sizeof(out));

            StrLCatHighBitPretty(out, (uint8_t *) info->utf8, sizeof(out));

            strlcat(out, "\"", sizeof(out));

        fprintf(stdout, "{ %s, ", out);

        tmp[0] = info->mfsEquivalent;

        tmp[1] = 0;

        strlcpy(out, "\"", sizeof(out));

        StrLCatHighBitPretty(out, tmp, sizeof(out));

        strlcat(out, "\"", sizeof(out));

        if ( (info->mfsEquivalent >= 32) && (info->mfsEquivalent < 127) && (info->mfsEquivalent != '\'') && (info->mfsEquivalent != '\\') ) {

            snprintf(out, sizeof(out), "'%c'", info->mfsEquivalent);

        } else {

            snprintf(out, sizeof(out), "0x%02X", info->mfsEquivalent);

        fprintf(stdout, "%s, ", out);

        if (strlen( (const char *) info->validCombiners ) == 0) {

            strlcpy(out, "NULL", sizeof(out));

        } else {

            strlcpy(out, "\"", sizeof(out));

            StrLCatHighBitPretty(out, info->validCombiners, sizeof(out));

            strlcat(out, "\"", sizeof(out));

        fprintf(stdout, "%s, ", out);

        if (strlen( (const char *) info->macRomanEquivalents ) == 0) {

            strlcpy(out, "NULL", sizeof(out));

        } else {

            strlcpy(out, "\"", sizeof(out));

            StrLCatHighBitPretty(out, info->macRomanEquivalents, sizeof(out));

            strlcat(out, "\"", sizeof(out));

        fprintf(stdout, "%s }", out);

        if (charIndex != (charCount - 1)) {

            fprintf(stdout, ",");

        fprintf(stdout, "\n");

#pragma mark ***** UTF-16

static void __attribute__ ((unused)) PrintMacRomanToUTF16Table(void)

    uint8_t     ch;

    fprintf(stdout, "static const uint16_t kMacRomanToUTF16[256] = {\n");

    ch = 0;

    do {

        CFStringRef         str;

        CFIndex             utf16Count;

        UniChar             uch;

        char                niceUch;

        str = CFStringCreateWithBytes(NULL, &ch, sizeof(ch), kCFStringEncodingMacRoman, false);

        assert(str != NULL);

        utf16Count = CFStringGetLength(str);

        assert(utf16Count == 1);

        uch = CFStringGetCharacterAtIndex(str, 0);

        if ( (ch % 8) == 0 ) {

            fprintf(stdout, "    /* 0x%02X */", ch);

        if ( (uch >= 32) && (uch < 127) ) {

            niceUch = (char) uch;

        } else {

            niceUch = ' ';

        fprintf(stdout, " /* %c */ 0x%04X", niceUch, uch);

        if (ch != 255) {

            fprintf(stdout, ",");

        if ( (ch % 8) == 7) {

            fprintf(stdout, "\n");

        CFRelease(str);

        ch += 1;

    } while (ch != 0);

    fprintf(stdout, "};\n");

struct UniCharInfo {

    uint16_t    utf16Char;

    uint8_t     macRomanChar;

};

typedef struct UniCharInfo UniCharInfo;

static int UniCharInfoSorter(const void *p1, const void *p2)

    int             result;

    UniCharInfo *   info1;

    UniCharInfo *   info2;

    info1 = (UniCharInfo *) p1;

    info2 = (UniCharInfo *) p2;

    if (info1->utf16Char < info2->utf16Char) {

        result = -1;

    } else if (info1->utf16Char > info2->utf16Char) {

        result =  1;

    } else {

        result =  0;

    return result;

static void __attribute__ ((unused)) PrintUTF16ToMacRoman(void)

    uint8_t         ch;

    CFStringRef     str;

    UniCharInfo     charMap[128];

    int             i;

    ch = 128;

    do {

        str = CFStringCreateWithBytes(NULL, &ch, sizeof(ch), kCFStringEncodingMacRoman, false);

        assert(str != NULL);

        assert(CFStringGetLength(str) == 1);

        charMap[ch - 128].utf16Char    = CFStringGetCharacterAtIndex(str, 0);

        charMap[ch - 128].macRomanChar = ch;

        CFRelease(str);

        ch += 1;

    } while (ch != 0);

    qsort(charMap, 128, sizeof(UniCharInfo), UniCharInfoSorter);

    fprintf(stdout, "struct UniCharInfo {\n");

    fprintf(stdout, "    uint16_t   utf16Char;\n");

    fprintf(stdout, "    uint8_t    macRomanChar;\n");

    fprintf(stdout, "};\n");

    fprintf(stdout, "\n");

    fprintf(stdout, "typedef struct UniCharInfo UniCharInfo;\n");

    fprintf(stdout, "static const UniCharInfo kUTF16ToMacRoman[128] = {\n");

    for (i = 0; i < 128; i++) {

        if ( (i % 8) == 0 ) {

            fprintf(stdout, "   ");

        fprintf(stdout, " {0x%04X, 0x%02X}", charMap[i].utf16Char, charMap[i].macRomanChar);

        if (i != 255) {

            fprintf(stdout, ",");

        if ( (i % 8) == 7) {

            fprintf(stdout, "\n");

    fprintf(stdout, "};\n");

#pragma mark ***** Case Folder

static void PrintMacRomanCaseFoldingTable(void)

    uint8_t             ch;

    CFStringRef         strs[256];

    int                 row;

    int                 col;

    CFComparisonResult  res;

    uint8_t             theMatch;

    ch = 0;

    do {

        strs[ch] = CFStringCreateWithBytes(NULL, &ch, sizeof(ch), kCFStringEncodingMacRoman, false);

        assert(strs[ch] != NULL);

        assert(CFStringGetLength(strs[ch]) == 1);

        ch += 1;

    } while (ch != 0);

    fprintf(stdout, "static const uint16_t kMacRomanToUpper[256] = {\n");

    for (row = 0; row < 256; row++) {

        theMatch = row;

        for (col = 0; col < row; col++) {

            res = CFStringCompare(strs[row], strs[col], kCFCompareCaseInsensitive);

            if (res == 0) {

                theMatch = col;

        if ( (row % 16) == 0 ) {

            fprintf(stdout, "    /* 0x%02X */", row);

        fprintf(stdout, " 0x%02X", theMatch);

        if (row != 255) {

            fprintf(stdout, ",");

        if ( (row % 16) == 15) {

            fprintf(stdout, "\n");

    fprintf(stdout, "};\n");

static void PrintUsage(const char *argv0)

    const char *    commandStr;

    commandStr = strrchr(argv0, '/');

    if (commandStr == NULL) {

        commandStr = argv0;

    } else {

        commandStr += 1;

    fprintf(stderr, "usage: %s\n", commandStr);

int main(int argc, char **argv)

    int     retVal;

    if (argc != 1) {

        PrintUsage(argv[0]);

        retVal = EXIT_FAILURE;

    } else {

        fprintf(stdout, "// These tables were generated by the TableGenerator program (see \"TableGenerator.c\").\n");

        fprintf(stdout, "\n");

        PrintMacRomanToUTF8Table(1);

        fprintf(stdout, "\n");

        PrintUTF16ToMacRoman();

        fprintf(stdout, "\n");

        PrintMacRomanCaseFoldingTable();

        fprintf(stdout, "\n");

        fprintf(stdout, "// End of automatically generated tables.");

        retVal = EXIT_SUCCESS;

    return retVal;

Next Previous