Retired Document
Important: This sample code may not represent best practices for current development. The project may use deprecated symbols and illustrate technologies and techniques that are no longer recommended.
TableGenerator.c
/* |
File: TableGenerator.c |
Contains: Generates text encoding conversion tables for MFSLives. |
Written by: DTS |
Copyright: Copyright (c) 2006 by Apple Computer, Inc., All Rights Reserved. |
Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Computer, Inc. |
("Apple") in consideration of your agreement to the following terms, and your |
use, installation, modification or redistribution of this Apple software |
constitutes acceptance of these terms. If you do not agree with these terms, |
please do not use, install, modify or redistribute this Apple software. |
In consideration of your agreement to abide by the following terms, and subject |
to these terms, Apple grants you a personal, non-exclusive license, under Apple's |
copyrights in this original Apple software (the "Apple Software"), to use, |
reproduce, modify and redistribute the Apple Software, with or without |
modifications, in source and/or binary forms; provided that if you redistribute |
the Apple Software in its entirety and without modifications, you must retain |
this notice and the following text and disclaimers in all such redistributions of |
the Apple Software. Neither the name, trademarks, service marks or logos of |
Apple Computer, Inc. may be used to endorse or promote products derived from the |
Apple Software without specific prior written permission from Apple. Except as |
expressly stated in this notice, no other rights or licenses, express or implied, |
are granted by Apple herein, including but not limited to any patent rights that |
may be infringed by your derivative works or by other works in which the Apple |
Software may be incorporated. |
The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO |
WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED |
WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION ALONE OR IN |
COMBINATION WITH YOUR PRODUCTS. |
IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR |
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE |
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION |
OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, TORT |
(INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS BEEN |
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
Change History (most recent first): |
$Log: TableGenerator.c,v $ |
Revision 1.1 2006/07/27 15:49:19 eskimo1 |
First checked in. |
*/ |
///////////////////////////////////////////////////////////////////// |
#include <CoreServices/CoreServices.h> |
///////////////////////////////////////////////////////////////////// |
#pragma mark ***** UTF-8 |
static void StrLCatHighBitPretty(char *buf, const uint8_t *str, size_t bufSize) |
{ |
char tmp[256]; |
size_t strIndex; |
size_t strCount; |
strCount = strlen( (const char *) str ); |
for (strIndex = 0; strIndex < strCount; strIndex++) { |
if ( (str[strIndex] < 32) || (str[strIndex] >= 127) ) { |
snprintf(tmp, sizeof(tmp), "\\x%02X", str[strIndex]); |
} else if ( (str[strIndex] == '"') || (str[strIndex] == '\\') ) { |
snprintf(tmp, sizeof(tmp), "\\%c", str[strIndex]); |
} else { |
snprintf(tmp, sizeof(tmp), "%c", str[strIndex]); |
} |
strlcat(buf, tmp, bufSize); |
} |
} |
static void __attribute__ ((unused)) PrintMacRomanToUTF8Table(int nf) |
{ |
UInt8 ch; |
CFIndex utf8CountMax; |
fprintf(stdout, "static const char * kMacRomanToUTF8[128] = {\n"); |
utf8CountMax = 0; |
ch = 128; |
do { |
CFStringRef str; |
UInt8 utf8Buffer[256]; |
CFIndex utf16Count; |
CFIndex utf16Index; |
CFIndex utf8Count; |
CFIndex usedChars; |
CFMutableStringRef canonStr; |
char out[256]; |
str = CFStringCreateWithBytes(NULL, &ch, sizeof(ch), kCFStringEncodingMacRoman, false); |
assert(str != NULL); |
canonStr = CFStringCreateMutableCopy(NULL, 0, str); |
assert(canonStr != NULL); |
switch (nf) { |
case 0: |
// do nothing |
break; |
case 1: |
CFStringNormalize(canonStr, kCFStringNormalizationFormD); |
break; |
case 2: |
CFStringNormalize(canonStr, kCFStringNormalizationFormC); |
break; |
default: |
assert(false); |
break; |
} |
utf16Count = CFStringGetLength(canonStr); |
usedChars = CFStringGetBytes(canonStr, CFRangeMake(0, utf16Count), kCFStringEncodingUTF8, false, false, utf8Buffer, sizeof(utf8Buffer), &utf8Count); |
assert(usedChars == utf16Count); |
assert(utf8Count < (sizeof(utf8Buffer) - 1)); |
utf8Buffer[utf8Count] = 0; |
if (utf8Count > utf8CountMax) { |
utf8CountMax = utf8Count; |
} |
if ( (ch % 4) == 0 ) { |
fprintf(stdout, " /* 0x%02X */ ", ch); |
} |
strlcpy(out, "/*", sizeof(out)); |
for (utf16Index = 0; utf16Index < utf16Count; utf16Index++) { |
char tmp[16]; |
snprintf(tmp, sizeof(tmp), " U+%04X", CFStringGetCharacterAtIndex(canonStr, utf16Index)); |
strlcat(out, tmp, sizeof(out)); |
} |
strlcat(out, " */", sizeof(out)); |
fprintf(stdout, "%-19s ", out); |
// fprintf(stdout, "%s ", out); |
strlcpy(out, "\"", sizeof(out)); |
StrLCatHighBitPretty(out, utf8Buffer, sizeof(out)); |
strlcat(out, "\"", sizeof(out)); |
if (ch != 255) { |
strlcat(out, ", ", sizeof(out)); |
} |
fprintf(stdout, "%-16s", out); |
if ( (ch % 4) == 3) { |
fprintf(stdout, "\n"); |
} |
CFRelease(canonStr); |
CFRelease(str); |
ch += 1; |
} while (ch != 0); |
fprintf(stdout, "};\n"); |
fprintf(stdout, "\nconst int kMacRomanToUTF8Expansion = %ld;\n", (long) utf8CountMax); |
} |
static void __attribute__ ((unused)) PrintMacRomanToUTF8TableSimple(void) |
{ |
UInt8 ch; |
fprintf(stdout, "static const char * kMacRomanToUTF8[128] = {\n"); |
// The MFS core code optimises away the ASCII case (the bottom 128 characters), |
// so we only include information about the top 128 characters. |
ch = 128; |
do { |
CFStringRef str; |
UInt8 utf8Buffer[16]; |
CFIndex utf8Count; |
CFIndex usedChars; |
char out[256]; |
str = CFStringCreateWithBytes(NULL, &ch, sizeof(ch), kCFStringEncodingMacRoman, false); |
assert(str != NULL); |
assert(CFStringGetLength(str) == 1); |
usedChars = CFStringGetBytes(str, CFRangeMake(0, 1), kCFStringEncodingUTF8, false, false, utf8Buffer, sizeof(utf8Buffer), &utf8Count); |
assert(usedChars == 1); |
assert(utf8Count < (sizeof(utf8Buffer) - 1)); |
utf8Buffer[utf8Count] = 0; |
if ( (ch % 4) == 0 ) { |
fprintf(stdout, " /* 0x%02X */ ", ch); |
} |
fprintf(stdout, "/* U+%04X */ ", CFStringGetCharacterAtIndex(str, 0)); |
strlcpy(out, "\"", sizeof(out)); |
StrLCatHighBitPretty(out, utf8Buffer, sizeof(out)); |
strlcat(out, "\"", sizeof(out)); |
if (ch != 255) { |
strlcat(out, ", ", sizeof(out)); |
} |
fprintf(stdout, "%-16s", out); |
if ( (ch % 4) == 3) { |
fprintf(stdout, "\n"); |
} |
CFRelease(str); |
ch += 1; |
} while (ch != 0); |
fprintf(stdout, "};\n"); |
} |
struct UTF8CharInfo { |
char utf8[16]; |
uint8_t mfsEquivalent; |
uint8_t validCombiners[16]; |
uint8_t macRomanEquivalents[16]; |
}; |
typedef struct UTF8CharInfo UTF8CharInfo; |
static void ProcessStr(CFMutableDictionaryRef charMap, uint8_t macRomanChar, CFStringRef str) |
{ |
CFIndex utf16Count; |
CFIndex utf16Index; |
utf16Count = CFStringGetLength(str); |
for (utf16Index = 0; utf16Index < utf16Count; utf16Index++) { |
CFStringRef charStr; |
UniChar uch; |
UTF8CharInfo * value; |
CFIndex usedChars; |
CFIndex utf8Count; |
uch = CFStringGetCharacterAtIndex(str, utf16Index); |
charStr = CFStringCreateWithCharacters(NULL, &uch, 1); |
assert(charStr != NULL); |
if ( ! CFDictionaryContainsKey(charMap, charStr) ) { |
value = calloc(1, sizeof(*value)); |
assert(value != NULL); |
usedChars = CFStringGetBytes(charStr, CFRangeMake(0, 1), kCFStringEncodingUTF8, false, false, (uint8_t *) value->utf8, sizeof(value->utf8), &utf8Count); |
assert(usedChars == 1); |
assert(utf8Count != sizeof(value->utf8)); // no need to NULL terminate because we calloc'd |
CFDictionaryAddValue(charMap, charStr, value); |
} |
value = (UTF8CharInfo *) CFDictionaryGetValue(charMap, charStr); |
assert(value != NULL); |
if (utf16Index == 0) { |
if (value->mfsEquivalent == 0) { |
value->mfsEquivalent = macRomanChar; |
} else { |
// assert(value->mfsEquivalent == macRomanChar); |
} |
} else { |
assert(CFStringGetCharacterAtIndex(str, 0) < 256); |
value->validCombiners[strlen((char *) value->validCombiners)] = (char) CFStringGetCharacterAtIndex(str, 0); |
value->macRomanEquivalents[strlen((char *) value->macRomanEquivalents)] = macRomanChar; |
} |
CFRelease(charStr); |
} |
} |
static CFComparisonResult Sorter(const void *val1, const void *val2, void *context) |
{ |
CFDictionaryRef charMap; |
UTF8CharInfo * info1; |
UTF8CharInfo * info2; |
charMap = (CFDictionaryRef) context; |
info1 = (UTF8CharInfo *) CFDictionaryGetValue(charMap, (CFStringRef) val1); |
assert(info1 != NULL); |
info2 = (UTF8CharInfo *) CFDictionaryGetValue(charMap, (CFStringRef) val2); |
assert(info2 != NULL); |
return strcmp(info1->utf8, info2->utf8); |
} |
static void __attribute__ ((unused)) PrintUTF8ToMacRomanTable(void) |
// This code works, but I decided I didn't need it! |
{ |
CFMutableDictionaryRef charMap; |
uint8_t ch; |
CFStringRef str; |
CFMutableStringRef canonStr; |
CFIndex charCount; |
CFIndex charIndex; |
CFStringRef * keys; |
CFArrayRef keysArray; |
CFMutableArrayRef keysArrayM; |
charMap = CFDictionaryCreateMutable(NULL, 0, &kCFTypeDictionaryKeyCallBacks, NULL); |
assert(charMap != NULL); |
ch = 1; // start at 1 because we don't need the null character in our output |
do { |
str = CFStringCreateWithBytes(NULL, &ch, sizeof(ch), kCFStringEncodingMacRoman, false); |
assert(str != NULL); |
canonStr = CFStringCreateMutableCopy(NULL, 0, str); |
assert(canonStr != NULL); |
CFStringNormalize(canonStr, kCFStringNormalizationFormD); |
ProcessStr(charMap, ch, str); |
ProcessStr(charMap, ch, canonStr); |
CFRelease(canonStr); |
CFRelease(str); |
ch += 1; |
} while (ch != 0); |
// CFShow(charMap); |
charCount = CFDictionaryGetCount(charMap); |
keys = (CFStringRef *) malloc(charCount * sizeof(*keys)); |
assert(keys != NULL); |
CFDictionaryGetKeysAndValues(charMap, (const void **) keys, NULL); |
keysArray = CFArrayCreate(NULL, (const void **) keys, charCount, &kCFTypeArrayCallBacks); |
assert(keysArray != NULL); |
keysArrayM = CFArrayCreateMutableCopy(NULL, 0, keysArray); |
assert(keysArrayM != NULL); |
CFArraySortValues(keysArrayM, CFRangeMake(0, charCount), Sorter, charMap); |
for (charIndex = 0; charIndex < charCount; charIndex++) { |
UTF8CharInfo * info; |
char out[256]; |
uint8_t tmp[2]; |
info = (UTF8CharInfo *) CFDictionaryGetValue( charMap, CFArrayGetValueAtIndex(keysArrayM, charIndex) ); |
assert(info != NULL); |
if (strlen(info->utf8) == 0) { |
strlcpy(out, "NULL", sizeof(out)); |
} else { |
strlcpy(out, "\"", sizeof(out)); |
StrLCatHighBitPretty(out, (uint8_t *) info->utf8, sizeof(out)); |
strlcat(out, "\"", sizeof(out)); |
} |
fprintf(stdout, "{ %s, ", out); |
tmp[0] = info->mfsEquivalent; |
tmp[1] = 0; |
strlcpy(out, "\"", sizeof(out)); |
StrLCatHighBitPretty(out, tmp, sizeof(out)); |
strlcat(out, "\"", sizeof(out)); |
if ( (info->mfsEquivalent >= 32) && (info->mfsEquivalent < 127) && (info->mfsEquivalent != '\'') && (info->mfsEquivalent != '\\') ) { |
snprintf(out, sizeof(out), "'%c'", info->mfsEquivalent); |
} else { |
snprintf(out, sizeof(out), "0x%02X", info->mfsEquivalent); |
} |
fprintf(stdout, "%s, ", out); |
if (strlen( (const char *) info->validCombiners ) == 0) { |
strlcpy(out, "NULL", sizeof(out)); |
} else { |
strlcpy(out, "\"", sizeof(out)); |
StrLCatHighBitPretty(out, info->validCombiners, sizeof(out)); |
strlcat(out, "\"", sizeof(out)); |
} |
fprintf(stdout, "%s, ", out); |
if (strlen( (const char *) info->macRomanEquivalents ) == 0) { |
strlcpy(out, "NULL", sizeof(out)); |
} else { |
strlcpy(out, "\"", sizeof(out)); |
StrLCatHighBitPretty(out, info->macRomanEquivalents, sizeof(out)); |
strlcat(out, "\"", sizeof(out)); |
} |
fprintf(stdout, "%s }", out); |
if (charIndex != (charCount - 1)) { |
fprintf(stdout, ","); |
} |
fprintf(stdout, "\n"); |
} |
} |
#pragma mark ***** UTF-16 |
static void __attribute__ ((unused)) PrintMacRomanToUTF16Table(void) |
{ |
uint8_t ch; |
fprintf(stdout, "static const uint16_t kMacRomanToUTF16[256] = {\n"); |
ch = 0; |
do { |
CFStringRef str; |
CFIndex utf16Count; |
UniChar uch; |
char niceUch; |
str = CFStringCreateWithBytes(NULL, &ch, sizeof(ch), kCFStringEncodingMacRoman, false); |
assert(str != NULL); |
utf16Count = CFStringGetLength(str); |
assert(utf16Count == 1); |
uch = CFStringGetCharacterAtIndex(str, 0); |
if ( (ch % 8) == 0 ) { |
fprintf(stdout, " /* 0x%02X */", ch); |
} |
if ( (uch >= 32) && (uch < 127) ) { |
niceUch = (char) uch; |
} else { |
niceUch = ' '; |
} |
fprintf(stdout, " /* %c */ 0x%04X", niceUch, uch); |
if (ch != 255) { |
fprintf(stdout, ","); |
} |
if ( (ch % 8) == 7) { |
fprintf(stdout, "\n"); |
} |
CFRelease(str); |
ch += 1; |
} while (ch != 0); |
fprintf(stdout, "};\n"); |
} |
struct UniCharInfo { |
uint16_t utf16Char; |
uint8_t macRomanChar; |
}; |
typedef struct UniCharInfo UniCharInfo; |
static int UniCharInfoSorter(const void *p1, const void *p2) |
{ |
int result; |
UniCharInfo * info1; |
UniCharInfo * info2; |
info1 = (UniCharInfo *) p1; |
info2 = (UniCharInfo *) p2; |
if (info1->utf16Char < info2->utf16Char) { |
result = -1; |
} else if (info1->utf16Char > info2->utf16Char) { |
result = 1; |
} else { |
result = 0; |
} |
return result; |
} |
static void __attribute__ ((unused)) PrintUTF16ToMacRoman(void) |
{ |
uint8_t ch; |
CFStringRef str; |
UniCharInfo charMap[128]; |
int i; |
ch = 128; |
do { |
str = CFStringCreateWithBytes(NULL, &ch, sizeof(ch), kCFStringEncodingMacRoman, false); |
assert(str != NULL); |
assert(CFStringGetLength(str) == 1); |
charMap[ch - 128].utf16Char = CFStringGetCharacterAtIndex(str, 0); |
charMap[ch - 128].macRomanChar = ch; |
CFRelease(str); |
ch += 1; |
} while (ch != 0); |
qsort(charMap, 128, sizeof(UniCharInfo), UniCharInfoSorter); |
fprintf(stdout, "struct UniCharInfo {\n"); |
fprintf(stdout, " uint16_t utf16Char;\n"); |
fprintf(stdout, " uint8_t macRomanChar;\n"); |
fprintf(stdout, "};\n"); |
fprintf(stdout, "\n"); |
fprintf(stdout, "typedef struct UniCharInfo UniCharInfo;\n"); |
fprintf(stdout, "static const UniCharInfo kUTF16ToMacRoman[128] = {\n"); |
for (i = 0; i < 128; i++) { |
if ( (i % 8) == 0 ) { |
fprintf(stdout, " "); |
} |
fprintf(stdout, " {0x%04X, 0x%02X}", charMap[i].utf16Char, charMap[i].macRomanChar); |
if (i != 255) { |
fprintf(stdout, ","); |
} |
if ( (i % 8) == 7) { |
fprintf(stdout, "\n"); |
} |
} |
fprintf(stdout, "};\n"); |
} |
#pragma mark ***** Case Folder |
static void PrintMacRomanCaseFoldingTable(void) |
{ |
uint8_t ch; |
CFStringRef strs[256]; |
int row; |
int col; |
CFComparisonResult res; |
uint8_t theMatch; |
ch = 0; |
do { |
strs[ch] = CFStringCreateWithBytes(NULL, &ch, sizeof(ch), kCFStringEncodingMacRoman, false); |
assert(strs[ch] != NULL); |
assert(CFStringGetLength(strs[ch]) == 1); |
ch += 1; |
} while (ch != 0); |
fprintf(stdout, "static const uint16_t kMacRomanToUpper[256] = {\n"); |
for (row = 0; row < 256; row++) { |
theMatch = row; |
for (col = 0; col < row; col++) { |
res = CFStringCompare(strs[row], strs[col], kCFCompareCaseInsensitive); |
if (res == 0) { |
theMatch = col; |
} |
} |
if ( (row % 16) == 0 ) { |
fprintf(stdout, " /* 0x%02X */", row); |
} |
fprintf(stdout, " 0x%02X", theMatch); |
if (row != 255) { |
fprintf(stdout, ","); |
} |
if ( (row % 16) == 15) { |
fprintf(stdout, "\n"); |
} |
} |
fprintf(stdout, "};\n"); |
} |
static void PrintUsage(const char *argv0) |
{ |
const char * commandStr; |
commandStr = strrchr(argv0, '/'); |
if (commandStr == NULL) { |
commandStr = argv0; |
} else { |
commandStr += 1; |
} |
fprintf(stderr, "usage: %s\n", commandStr); |
} |
int main(int argc, char **argv) |
{ |
int retVal; |
if (argc != 1) { |
PrintUsage(argv[0]); |
retVal = EXIT_FAILURE; |
} else { |
fprintf(stdout, "// These tables were generated by the TableGenerator program (see \"TableGenerator.c\").\n"); |
fprintf(stdout, "\n"); |
PrintMacRomanToUTF8Table(1); |
fprintf(stdout, "\n"); |
PrintUTF16ToMacRoman(); |
fprintf(stdout, "\n"); |
PrintMacRomanCaseFoldingTable(); |
fprintf(stdout, "\n"); |
fprintf(stdout, "// End of automatically generated tables."); |
retVal = EXIT_SUCCESS; |
} |
return retVal; |
} |
Copyright © 2006 Apple Computer, Inc. All Rights Reserved. Terms of Use | Privacy Policy | Updated: 2006-11-09