|
|
1.1 ! root 1: /* ! 2: * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. ! 3: * ! 4: * @APPLE_LICENSE_HEADER_START@ ! 5: * ! 6: * The contents of this file constitute Original Code as defined in and ! 7: * are subject to the Apple Public Source License Version 1.1 (the ! 8: * "License"). You may not use this file except in compliance with the ! 9: * License. Please obtain a copy of the License at ! 10: * http://www.apple.com/publicsource and read it before using this file. ! 11: * ! 12: * This Original Code and all software distributed under the License are ! 13: * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER ! 14: * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, ! 15: * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, ! 16: * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the ! 17: * License for the specific language governing rights and limitations ! 18: * under the License. ! 19: * ! 20: * @APPLE_LICENSE_HEADER_END@ ! 21: */ ! 22: /* ! 23: File: UnicodeWrappers.c ! 24: ! 25: Contains: Wrapper routines for Unicode conversion and comparison. ! 26: ! 27: Version: HFS Plus 1.0 ! 28: ! 29: Written by: Mark Day ! 30: ! 31: Copyright: � 1996-1999 by Apple Computer, Inc., all rights reserved. ! 32: ! 33: File Ownership: ! 34: ! 35: DRI: Mark Day ! 36: ! 37: Other Contact: Don Brady ! 38: ! 39: Technology: xxx put technology here xxx ! 40: ! 41: Writers: ! 42: ! 43: (DSH) Deric Horn ! 44: (msd) Mark Day ! 45: (djb) Don Brady ! 46: ! 47: Change History (most recent first): ! 48: <MOSXS> 6/10/99 djb Add support for Euro Sign (0x20AC) to MacRoman/Unicode conversions. ! 49: <MOSXS> 2/09/99 djb Fix UnicodeToMacRoman to handle a terminating decomposed char. ! 50: <MOSXS> 1/22/99 djb Add more TARGET_OS_MAC conditionals to remove orphaned code. ! 51: <MOSXS> 7/6/98 djb Handle hi-bit Mac Roman characters in basic latin conversions (radar #2247519). ! 52: <MOSXS> 6/11/98 PPD Added a few special-case ASCII/Unicode mappings to cover installer's needs. ! 53: ! 54: <CS41> 1/28/98 msd Bug 2207446: When mangling a name, check to see if the Unicode ! 55: Converter is installed before we call it. ! 56: <CS40> 1/21/98 msd Bug 2206836: If a name contains a colon, change it to question ! 57: mark and mangle the name. ! 58: <CS39> 12/11/97 msd For Metrowerks and test tools, call the Get_xxx routines to get ! 59: the Unicode table addresses. ! 60: <CS38> 12/10/97 djb Radar #2005461, don't use fallback chars when converting to ! 61: Unicode, instead let the client (Catalog) retry with MacRoman. ! 62: <CS37> 12/2/97 DSH Conditionalize out some unicode related routines for DFA ! 63: <CS36> 11/26/97 djb Radar #2005461,2005688 don't swallow kTECPartialCharErr errors! ! 64: <CS35> 11/17/97 djb Name mangling was broken with decomposed Unicode. ! 65: <CS34> 11/16/97 djb Radar #2001928 - use kUnicodeCanonicalDecompVariant variant. ! 66: <CS33> 11/11/97 DSH Use Get_gLowerCaseTable for DiskFirstAid builds to avoid loading ! 67: in a branch to the table. ! 68: <CS32> 11/7/97 msd Replace FastSimpleCompareStrings with FastUnicodeCompare (which ! 69: handles ignorable Unicode characters). Remove the wrapper ! 70: routine, CompareUnicodeNames, and have its callers call ! 71: FastUnicodeCompare directly. ! 72: <CS31> 10/17/97 djb Change kUnicodeUseHFSPlusMapping to kUnicodeUseLatestMapping. ! 73: <CS30> 10/17/97 msd Fix some type casts for char pointers. ! 74: <CS29> 10/13/97 djb Add new SPIs for Finder View font (radar #1679073). ! 75: <CS28> 10/1/97 djb Preserve current heap zone in InitializeEncodingContext routine ! 76: (radar #1682686). ! 77: <CS27> 9/17/97 djb Handle kTECPartialCharErr errors in ConvertHFSNameToUnicode. ! 78: <CS26> 9/16/97 msd In MockConvertFromPStringToUnicode, use pragma unused instead of ! 79: commenting out unused parameter (so SC will compile it). ! 80: <CS25> 9/15/97 djb Fix MockConverters to do either 7-bit ascii or else mangle the ! 81: name (radar #1672388). Use 'p2u#' resource for bootstrapping ! 82: Unicode. Make sure InitializeEncodingContext uses System heap. ! 83: <CS24> 9/10/97 msd Make InitializeEncodingContext public. ! 84: <CS23> 9/7/97 djb Handle '�' char in BasicLatinUnicode converter. ! 85: <CS22> 9/4/97 djb Add logging to BasicLatinUnicodeToPascal. ! 86: <CS21> 8/26/97 djb Make FastSimpleCompareStrings faster. Add ! 87: BasicLatinUnicodeToPascal to make 7-bit ascii conversions ! 88: faster. ! 89: <CS20> 8/14/97 djb Add FastRelString here (to be next to the data tables). ! 90: <CS19> 7/21/97 djb LogEndTime now takes an error code. ! 91: <CS18> 7/18/97 msd Include LowMemPriv.h, Gestalt.h, TextUtils.h. ! 92: <CS17> 7/16/97 DSH FilesInternal.i renamed FileMgrInternal.i to avoid name ! 93: collision ! 94: <CS16> 7/8/97 DSH Loading PrecompiledHeaders from define passed in on C line ! 95: <CS15> 7/8/97 DSH InitializeUnicode changed its API ! 96: <CS14> 7/1/97 DSH SC, DFA complier, requires parameters in functions. #pragma'd ! 97: them out to eliminate C warnings. ! 98: <CS13> 6/30/97 msd Remove unused parameter warnings in FallbackProc by commenting ! 99: out unused parameter names. ! 100: <CS12> 6/26/97 DSH FallbackProc declare variables before useage for SC, ! 101: MockConverters no longer static for DFA. ! 102: <CS11> 6/25/97 msd In function InitStaticUnicodeConverter, the variable fsVars was ! 103: being used before being initialized. ! 104: <CS10> 6/24/97 DSH Runtime checks to call through CFM or static linked routines. ! 105: <CS9> 6/20/97 msd Re-introduce fix from <CS7>. Fix another missing cast. Remove a ! 106: spurious semicolon. ! 107: <CS8> 6/18/97 djb Add more ConversionContexts routines. Improved file mangling. ! 108: <CS7> 6/16/97 msd Add a missing cast in GetFileIDString. ! 109: <CS6> 6/13/97 djb Added support for long filenames. Switched to ! 110: ConvertUnicodeToHFSName, ConvertHFSNameToUnicode, and ! 111: CompareUnicodeNames. ! 112: <CS5> 6/4/97 djb Use system script instead of macRoman. ! 113: <CS4> 5/19/97 djb Add call to LockMappingTable so tables won't move! ! 114: <CS3> 5/9/97 djb Include HFSInstrumentation.h ! 115: <CS2> 5/7/97 djb Add summary traces. Add FastSimpleCompareStrings routine. ! 116: <CS1> 4/24/97 djb first checked in ! 117: <HFS5> 3/27/97 djb Add calls to real Unicode conversion routines. ! 118: <HFS4> 2/6/97 msd Add conditional code to use real Unicode comparison routines ! 119: (default to off). ! 120: <HFS3> 1/6/97 djb Fix HFSUnicodeCompare - the final comparison of length1 and ! 121: length2 was backwards. ! 122: <HFS2> 12/12/96 msd Use precompiled headers. ! 123: <HFS1> 12/12/96 msd first checked in ! 124: ! 125: */ ! 126: ! 127: #include "../../hfs_macos_defs.h" ! 128: #include "UCStringCompareData.h" ! 129: ! 130: #include "../headers/FileMgrInternal.h" ! 131: #include "../headers/HFSUnicodeWrappers.h" ! 132: ! 133: #include "ConvertUTF.h" ! 134: ! 135: enum { ! 136: kMinFileExtensionChars = 1, // does not include dot ! 137: kMaxFileExtensionChars = 5 // does not include dot ! 138: }; ! 139: ! 140: #define kASCIIPiSymbol 0xB9 ! 141: #define kASCIIMicroSign 0xB5 ! 142: #define kASCIIGreekDelta 0xC6 ! 143: ! 144: ! 145: #define Is7BitASCII(c) ( (c) >= 0x20 && (c) <= 0x7F ) ! 146: ! 147: #define IsSpecialASCIIChar(c) ( (c) == (UInt8) kASCIIMicroSign || (c) == (UInt8) kASCIIPiSymbol || (c) == (UInt8) kASCIIGreekDelta ) ! 148: ! 149: // Note: '�' has two Unicode representations 0x00B5 (micro sign) and 0x03BC (greek) ! 150: // '�' has two Unicode representations 0x2206 (increment) and 0x0394 (greek) ! 151: #define IsSpecialUnicodeChar(c) ( (c) == 0x00B5 || (c) == 0x03BC || (c) == 0x03C0 || (c) == 0x2206 || (c) == 0x0394 ) ! 152: ! 153: #define IsHexDigit(c) ( ((c) >= (UInt8) '0' && (c) <= (UInt8) '9') || ((c) >= (UInt8) 'A' && (c) <= (UInt8) 'F') ) ! 154: ! 155: ! 156: static void GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, Str15 extStr ); ! 157: ! 158: static void GetFileIDString( HFSCatalogNodeID fileID, Str15 fileIDStr ); ! 159: ! 160: static void AppendPascalString( ConstStr15Param src, Str31 dst ); ! 161: ! 162: static UInt32 HexStringToInteger( UInt32 length, const UInt8 *hexStr ); ! 163: ! 164: ! 165: ! 166: // ! 167: // Get filename extension (if any) as a pascal string ! 168: // ! 169: #if TARGET_API_MAC_OS8 ! 170: static void ! 171: GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, Str15 extStr ) ! 172: { ! 173: UInt32 i; ! 174: UniChar c; ! 175: UInt16 extChars; // number of extension characters (excluding the dot) ! 176: UInt16 maxExtChars; ! 177: Boolean foundExtension; ! 178: ! 179: ! 180: extStr[0] = (UInt8) 0; // assume there's no extension ! 181: ! 182: if ( length < 3 ) ! 183: return; // sorry, "x.y" is smallest possible extension ! 184: ! 185: if ( length < (kMaxFileExtensionChars + 2) ) ! 186: maxExtChars = length - 2; // we need at least one prefix character and dot ! 187: else ! 188: maxExtChars = kMaxFileExtensionChars; ! 189: ! 190: i = length; ! 191: extChars = 0; ! 192: foundExtension = false; ! 193: ! 194: while ( extChars <= maxExtChars ) ! 195: { ! 196: c = unicodeStr[--i]; ! 197: ! 198: if ( c == (UniChar) '.' ) // look for leading dot ! 199: { ! 200: if ( extChars > 0 ) // cannot end with a dot ! 201: foundExtension = true; ! 202: break; ! 203: } ! 204: ! 205: if ( Is7BitASCII(c) || IsSpecialUnicodeChar(c) ) ! 206: ++extChars; ! 207: else ! 208: break; ! 209: } ! 210: ! 211: // if we found one then copy it ! 212: if ( foundExtension ) ! 213: { ! 214: UInt8 *extStrPtr = extStr; ! 215: const UniChar *unicodeStrPtr = &unicodeStr[i]; // point to dot char ! 216: ! 217: *(extStrPtr++) = extChars + 1; // set length to extension chars plus dot ! 218: ! 219: for ( i = 0; i <= extChars; ++i ) ! 220: { ! 221: c = *(unicodeStrPtr++); ! 222: ! 223: // map any special characters ! 224: switch (c) ! 225: { ! 226: case 0x00B5: // micro sign ! 227: case 0x03BC: // greek mu ! 228: c = (UniChar) '�'; ! 229: break; ! 230: ! 231: case 0x03C0: // greek pi ! 232: c = (UniChar) '�'; ! 233: break; ! 234: ! 235: case 0x2206: // increment sign ! 236: case 0x0394: // greek capital delta ! 237: c = (UniChar) '�'; ! 238: break; ! 239: } ! 240: ! 241: *(extStrPtr++) = (UInt8) c; // copy/convert to ascii ! 242: } ! 243: } ! 244: ! 245: } // end GetFilenameExtension ! 246: #endif /* TARGET_API_MAC_OS8 */ ! 247: ! 248: ! 249: // ! 250: // Count filename extension characters (if any) ! 251: // ! 252: static UInt32 ! 253: CountFilenameExtensionChars( const unsigned char * filename, UInt32 length ) ! 254: { ! 255: UInt32 i; ! 256: UniChar c; ! 257: UInt32 extChars; // number of extension characters (excluding the dot) ! 258: UInt16 maxExtChars; ! 259: Boolean foundExtension; ! 260: ! 261: ! 262: if (length == kUndefinedStrLen) ! 263: length = strlen(filename); ! 264: ! 265: if ( length < 3 ) ! 266: return 0; // sorry, "x.y" is smallest possible extension ! 267: ! 268: if ( length < (kMaxFileExtensionChars + 2) ) ! 269: maxExtChars = length - 2; // we need at least on prefix character and dot ! 270: else ! 271: maxExtChars = kMaxFileExtensionChars; ! 272: ! 273: extChars = 0; // assume there's no extension ! 274: i = length - 1; // index to last ascii character ! 275: foundExtension = false; ! 276: ! 277: while ( extChars <= maxExtChars ) ! 278: { ! 279: c = filename[i--]; ! 280: ! 281: if ( c == (UInt8) '.' ) // look for leading dot ! 282: { ! 283: if ( extChars > 0 ) // cannot end with a dot ! 284: return (extChars); ! 285: ! 286: break; ! 287: } ! 288: ! 289: if ( Is7BitASCII(c) || IsSpecialASCIIChar(c) ) ! 290: ++extChars; ! 291: else ! 292: break; ! 293: } ! 294: ! 295: return 0; ! 296: ! 297: } // end CountFilenameExtensionChars ! 298: ! 299: ! 300: // ! 301: // Convert file ID into a hexidecimal string with no leading zeros ! 302: // ! 303: #if TARGET_API_MAC_OS8 ! 304: static void ! 305: GetFileIDString( HFSCatalogNodeID fileID, Str15 fileIDStr ) ! 306: { ! 307: SInt32 i, b; ! 308: static UInt8 *translate = (UInt8 *) "0123456789ABCDEF"; ! 309: UInt8 c; ! 310: ! 311: fileIDStr[1] = '#'; ! 312: ! 313: for ( i = 1, b = 28; b >= 0; b -= 4 ) ! 314: { ! 315: c = *(translate + ((fileID >> b) & 0x0000000F)); ! 316: ! 317: // if its not a leading zero add it to our string ! 318: if ( (c != (UInt8) '0') || (i > 1) || (b == 0) ) ! 319: fileIDStr[++i] = c; ! 320: } ! 321: ! 322: fileIDStr[0] = (UInt8) i; ! 323: ! 324: } // end GetFileIDString ! 325: #endif /* TARGET_API_MAC_OS8 */ ! 326: ! 327: ! 328: // ! 329: // Append a suffix to a pascal string ! 330: // ! 331: #if TARGET_API_MAC_OS8 ! 332: static void ! 333: AppendPascalString( ConstStr15Param src, Str31 dst ) ! 334: { ! 335: UInt32 i, j; ! 336: UInt32 srcLen; ! 337: ! 338: srcLen = StrLength(src); ! 339: ! 340: if ( (srcLen + StrLength(dst)) > 31 ) // safety net ! 341: return; ! 342: ! 343: i = dst[0] + 1; // get end of dst ! 344: ! 345: for (j = 1; j <= srcLen; ++j) ! 346: dst[i++] = src[j]; ! 347: ! 348: dst[0] += srcLen; ! 349: ! 350: } // end AppendPascalString ! 351: #endif /* TARGET_API_MAC_OS8 */ ! 352: ! 353: ! 354: HFSCatalogNodeID ! 355: GetEmbeddedFileID(const unsigned char * filename, UInt32 length, UInt32 *prefixLength) ! 356: { ! 357: short extChars; ! 358: short i; ! 359: UInt8 c; // current character in filename ! 360: ! 361: *prefixLength = 0; ! 362: ! 363: if ( filename == NULL ) ! 364: return 0; ! 365: ! 366: if (length == kUndefinedStrLen) ! 367: length = strlen(filename); ! 368: ! 369: if ( length < 4 ) ! 370: return 0; // too small to have a file ID ! 371: ! 372: if ( length >= 6 ) // big enough for a file ID (#10) and an extension (.x) ? ! 373: extChars = CountFilenameExtensionChars(filename, length); ! 374: else ! 375: extChars = 0; ! 376: ! 377: if ( extChars > 0 ) ! 378: length -= (extChars + 1); // skip dot plus extension characters ! 379: ! 380: // scan for file id digits... ! 381: for ( i = length - 1; i >= 0; --i) ! 382: { ! 383: c = filename[i]; ! 384: ! 385: if ( c == '#' ) // look for file ID marker ! 386: { ! 387: if ( (length - i) < 3 ) ! 388: break; // too small to be a file ID ! 389: ! 390: *prefixLength = i; ! 391: return HexStringToInteger(length - i - 1, &filename[i+1]); ! 392: } ! 393: ! 394: if ( !IsHexDigit(c) ) ! 395: break; // file ID string must have hex digits ! 396: } ! 397: ! 398: return 0; ! 399: ! 400: } // end GetEmbeddedFileID ! 401: ! 402: ! 403: //_______________________________________________________________________ ! 404: ! 405: static UInt32 ! 406: HexStringToInteger (UInt32 length, const UInt8 *hexStr) ! 407: { ! 408: UInt32 value; // decimal value represented by the string ! 409: short i; ! 410: UInt8 c; // next character in buffer ! 411: const UInt8 *p; // pointer to character string ! 412: ! 413: value = 0; ! 414: p = hexStr; ! 415: ! 416: for ( i = 0; i < length; ++i ) ! 417: { ! 418: c = *p++; ! 419: ! 420: if (c >= '0' && c <= '9') ! 421: { ! 422: value = value << 4; ! 423: value += (UInt32) c - (UInt32) '0'; ! 424: } ! 425: else if (c >= 'A' && c <= 'F') ! 426: { ! 427: value = value << 4; ! 428: value += 10 + ((unsigned int) c - (unsigned int) 'A'); ! 429: } ! 430: else ! 431: { ! 432: return 0; // oops, how did this character get in here? ! 433: } ! 434: } ! 435: ! 436: return value; ! 437: ! 438: } // end HexStringToInteger ! 439: ! 440: ! 441: //_______________________________________________________________________ ! 442: // ! 443: // Routine: FastRelString ! 444: // ! 445: // Output: returns -1 if str1 < str2 ! 446: // returns 1 if str1 > str2 ! 447: // return 0 if equal ! 448: // ! 449: //_______________________________________________________________________ ! 450: ! 451: extern unsigned short gCompareTable[]; ! 452: ! 453: SInt32 FastRelString( ConstStr255Param str1, ConstStr255Param str2 ) ! 454: { ! 455: UInt16* compareTable; ! 456: SInt32 bestGuess; ! 457: UInt8 length, length2; ! 458: UInt8 delta; ! 459: ! 460: delta = 0; ! 461: length = *(str1++); ! 462: length2 = *(str2++); ! 463: ! 464: if (length == length2) ! 465: bestGuess = 0; ! 466: else if (length < length2) ! 467: { ! 468: bestGuess = -1; ! 469: delta = length2 - length; ! 470: } ! 471: else ! 472: { ! 473: bestGuess = 1; ! 474: length = length2; ! 475: } ! 476: ! 477: compareTable = (UInt16*) gCompareTable; ! 478: ! 479: while (length--) ! 480: { ! 481: UInt8 aChar, bChar; ! 482: ! 483: aChar = *(str1++); ! 484: bChar = *(str2++); ! 485: ! 486: if (aChar != bChar) // If they don't match exacly, do case conversion ! 487: { ! 488: UInt16 aSortWord, bSortWord; ! 489: ! 490: aSortWord = compareTable[aChar]; ! 491: if (bChar == 0 && delta == 1) { ! 492: bChar = *(str2++); /* skip over embedded null */ ! 493: bestGuess = 0; ! 494: } ! 495: bSortWord = compareTable[bChar]; ! 496: ! 497: if (aSortWord > bSortWord) ! 498: return 1; ! 499: ! 500: if (aSortWord < bSortWord) ! 501: return -1; ! 502: } ! 503: ! 504: // If characters match exactly, then go on to next character immediately without ! 505: // doing any extra work. ! 506: } ! 507: ! 508: // if you got to here, then return bestGuess ! 509: return bestGuess; ! 510: } ! 511: ! 512: ! 513: ! 514: // ! 515: // FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering ! 516: // ! 517: // IF RESULT ! 518: // -------------------------- ! 519: // str1 < str2 => -1 ! 520: // str1 = str2 => 0 ! 521: // str1 > str2 => +1 ! 522: // ! 523: // The lower case table starts with 256 entries (one for each of the upper bytes ! 524: // of the original Unicode char). If that entry is zero, then all characters with ! 525: // that upper byte are already case folded. If the entry is non-zero, then it is ! 526: // the _index_ (not byte offset) of the start of the sub-table for the characters ! 527: // with that upper byte. All ignorable characters are folded to the value zero. ! 528: // ! 529: // In pseudocode: ! 530: // ! 531: // Let c = source Unicode character ! 532: // Let table[] = lower case table ! 533: // ! 534: // lower = table[highbyte(c)] ! 535: // if (lower == 0) ! 536: // lower = c ! 537: // else ! 538: // lower = table[lower+lowbyte(c)] ! 539: // ! 540: // if (lower == 0) ! 541: // ignore this character ! 542: // ! 543: // To handle ignorable characters, we now need a loop to find the next valid character. ! 544: // Also, we can't pre-compute the number of characters to compare; the string length might ! 545: // be larger than the number of non-ignorable characters. Further, we must be able to handle ! 546: // ignorable characters at any point in the string, including as the first or last characters. ! 547: // We use a zero value as a sentinel to detect both end-of-string and ignorable characters. ! 548: // Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename, ! 549: // the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is ! 550: // an invalid Unicode character). ! 551: // ! 552: // Pseudocode: ! 553: // ! 554: // while (1) { ! 555: // c1 = GetNextValidChar(str1) // returns zero if at end of string ! 556: // c2 = GetNextValidChar(str2) ! 557: // ! 558: // if (c1 != c2) break // found a difference ! 559: // ! 560: // if (c1 == 0) // reached end of string on both strings at once? ! 561: // return 0; // yes, so strings are equal ! 562: // } ! 563: // ! 564: // // When we get here, c1 != c2. So, we just need to determine which one is less. ! 565: // if (c1 < c2) ! 566: // return -1; ! 567: // else ! 568: // return 1; ! 569: // ! 570: ! 571: extern UInt16 gLowerCaseTable[]; ! 572: extern UInt16 gLatinCaseFold[]; ! 573: ! 574: SInt32 FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1, ! 575: register ConstUniCharArrayPtr str2, register ItemCount length2) ! 576: { ! 577: register UInt16 c1,c2; ! 578: register UInt16 temp; ! 579: register UInt16* lowerCaseTable; ! 580: ! 581: lowerCaseTable = (UInt16*) gLowerCaseTable; ! 582: ! 583: while (1) { ! 584: /* Set default values for c1, c2 in case there are no more valid chars */ ! 585: c1 = 0; ! 586: c2 = 0; ! 587: ! 588: /* Find next non-ignorable char from str1, or zero if no more */ ! 589: while (length1 && c1 == 0) { ! 590: c1 = *(str1++); ! 591: --length1; ! 592: /* check for basic latin first */ ! 593: if (c1 < 0x0100) { ! 594: c1 = gLatinCaseFold[c1]; ! 595: break; ! 596: } ! 597: /* case fold if neccessary */ ! 598: if ((temp = lowerCaseTable[c1>>8]) != 0) ! 599: c1 = lowerCaseTable[temp + (c1 & 0x00FF)]; ! 600: } ! 601: ! 602: ! 603: /* Find next non-ignorable char from str2, or zero if no more */ ! 604: while (length2 && c2 == 0) { ! 605: c2 = *(str2++); ! 606: --length2; ! 607: /* check for basic latin first */ ! 608: if (c2 < 0x0100) { ! 609: if ((c2 = gLatinCaseFold[c2]) != 0) ! 610: break; ! 611: else ! 612: continue; /* ignore this character */ ! 613: } ! 614: /* case fold if neccessary */ ! 615: if ((temp = lowerCaseTable[c2>>8]) != 0) ! 616: c2 = lowerCaseTable[temp + (c2 & 0x00FF)]; ! 617: } ! 618: ! 619: if (c1 != c2) // found a difference, so stop looping ! 620: break; ! 621: ! 622: if (c1 == 0) // did we reach the end of both strings at the same time? ! 623: return 0; // yes, so strings are equal ! 624: } ! 625: ! 626: if (c1 < c2) ! 627: return -1; ! 628: else ! 629: return 1; ! 630: } ! 631: ! 632: ! 633: OSErr ! 634: ConvertUTF8ToUnicode(ByteCount srcLen, const unsigned char* srcStr, ByteCount maxDstLen, ! 635: ByteCount *actualDstLen, UniCharArrayPtr dstStr) ! 636: { ! 637: ConversionResult result; ! 638: UTF8* sourceStart; ! 639: UTF8* sourceEnd; ! 640: UTF16* targetStart; ! 641: UTF16* targetEnd; ! 642: ! 643: sourceStart = (UTF8*) srcStr; ! 644: sourceEnd = sourceStart + srcLen; ! 645: targetStart = (UTF16*) dstStr; ! 646: targetEnd = targetStart + maxDstLen/2; ! 647: ! 648: result = ConvertUTF8toUTF16 (&sourceStart, sourceEnd, &targetStart, targetEnd); ! 649: ! 650: *actualDstLen = (targetStart - dstStr) * sizeof(UniChar); ! 651: ! 652: if (result == targetExhausted) ! 653: return kTECOutputBufferFullStatus; ! 654: else if (result == sourceExhausted) ! 655: return kTextMalformedInputErr; ! 656: ! 657: return noErr; ! 658: } ! 659: ! 660: ! 661: OSErr ! 662: ConvertUnicodeToUTF8(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen, ! 663: ByteCount *actualDstLen, unsigned char* dstStr) ! 664: { ! 665: ConversionResult result; ! 666: UTF16* sourceStart; ! 667: UTF16* sourceEnd; ! 668: UTF8* targetStart; ! 669: UTF8* targetEnd; ! 670: ByteCount outputLength; ! 671: ! 672: sourceStart = (UTF16*) srcStr; ! 673: sourceEnd = (UTF16*) ((char*) srcStr + srcLen); ! 674: targetStart = (UTF8*) dstStr; ! 675: targetEnd = targetStart + maxDstLen; ! 676: ! 677: result = ConvertUTF16toUTF8 (&sourceStart, sourceEnd, &targetStart, targetEnd); ! 678: ! 679: *actualDstLen = outputLength = targetStart - dstStr; ! 680: ! 681: if (result == targetExhausted) ! 682: return kTECOutputBufferFullStatus; ! 683: else if (result == sourceExhausted) ! 684: return kTECPartialCharErr; ! 685: ! 686: if (outputLength >= maxDstLen) ! 687: return kTECOutputBufferFullStatus; ! 688: ! 689: dstStr[outputLength] = 0; /* also add null termination */ ! 690: ! 691: return noErr; ! 692: } ! 693:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.