Source to bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c


Enter a symbol's name here to quickly find it.

/*
 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
 *
 * @APPLE_LICENSE_HEADER_START@
 * 
 * The contents of this file constitute Original Code as defined in and
 * are subject to the Apple Public Source License Version 1.1 (the
 * "License").  You may not use this file except in compliance with the
 * License.  Please obtain a copy of the License at
 * http://www.apple.com/publicsource and read it before using this file.
 * 
 * This Original Code and all software distributed under the License are
 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
 * License for the specific language governing rights and limitations
 * under the License.
 * 
 * @APPLE_LICENSE_HEADER_END@
 */
/*
	File:		UnicodeWrappers.c

	Contains:	Wrapper routines for Unicode conversion and comparison.

	Version:	HFS Plus 1.0

	Written by:	Mark Day

	Copyright:	� 1996-1999 by Apple Computer, Inc., all rights reserved.

	File Ownership:

		DRI:				Mark Day

		Other Contact:		Don Brady

		Technology:			xxx put technology here xxx

	Writers:

		(DSH)	Deric Horn
		(msd)	Mark Day
		(djb)	Don Brady

	Change History (most recent first):
	<MOSXS>	 6/10/99	djb		Add support for Euro Sign (0x20AC) to MacRoman/Unicode conversions.
	<MOSXS>	 2/09/99	djb		Fix UnicodeToMacRoman to handle a terminating decomposed char.
	<MOSXS>	 1/22/99	djb		Add more TARGET_OS_MAC conditionals to remove orphaned code.
	<MOSXS>	  7/6/98	djb		Handle hi-bit Mac Roman characters in basic latin conversions (radar #2247519).
	<MOSXS>	 6/11/98	PPD		Added a few special-case ASCII/Unicode mappings to cover installer's needs.

	  <CS41>	 1/28/98	msd		Bug 2207446: When mangling a name, check to see if the Unicode
									Converter is installed before we call it.
	  <CS40>	 1/21/98	msd		Bug 2206836: If a name contains a colon, change it to question
									mark and mangle the name.
	  <CS39>	12/11/97	msd		For Metrowerks and test tools, call the Get_xxx routines to get
									the Unicode table addresses.
	  <CS38>	12/10/97	djb		Radar #2005461, don't use fallback chars when converting to
									Unicode, instead let the client (Catalog) retry with MacRoman.
	  <CS37>	 12/2/97	DSH		Conditionalize out some unicode related routines for DFA
	  <CS36>	11/26/97	djb		Radar #2005461,2005688 don't swallow kTECPartialCharErr errors!
	  <CS35>	11/17/97	djb		Name mangling was broken with decomposed Unicode.
	  <CS34>	11/16/97	djb		Radar #2001928 - use kUnicodeCanonicalDecompVariant variant.
	  <CS33>	11/11/97	DSH		Use Get_gLowerCaseTable for DiskFirstAid builds to avoid loading
									in a branch to the table.
	  <CS32>	 11/7/97	msd		Replace FastSimpleCompareStrings with FastUnicodeCompare (which
									handles ignorable Unicode characters). Remove the wrapper
									routine, CompareUnicodeNames, and have its callers call
									FastUnicodeCompare directly.
	  <CS31>	10/17/97	djb		Change kUnicodeUseHFSPlusMapping to kUnicodeUseLatestMapping.
	  <CS30>	10/17/97	msd		Fix some type casts for char pointers.
	  <CS29>	10/13/97	djb		Add new SPIs for Finder View font (radar #1679073).
	  <CS28>	 10/1/97	djb		Preserve current heap zone in InitializeEncodingContext routine
									(radar #1682686).
	  <CS27>	 9/17/97	djb		Handle kTECPartialCharErr errors in ConvertHFSNameToUnicode.
	  <CS26>	 9/16/97	msd		In MockConvertFromPStringToUnicode, use pragma unused instead of
									commenting out unused parameter (so SC will compile it).
	  <CS25>	 9/15/97	djb		Fix MockConverters to do either 7-bit ascii or else mangle the
									name (radar #1672388). Use 'p2u#' resource for bootstrapping
									Unicode. Make sure InitializeEncodingContext uses System heap.
	  <CS24>	 9/10/97	msd		Make InitializeEncodingContext public.
	  <CS23>	  9/7/97	djb		Handle '�' char in BasicLatinUnicode converter.
	  <CS22>	  9/4/97	djb		Add logging to BasicLatinUnicodeToPascal.
	  <CS21>	 8/26/97	djb		Make FastSimpleCompareStrings faster. Add
									BasicLatinUnicodeToPascal to make 7-bit ascii conversions
									faster.
	  <CS20>	 8/14/97	djb		Add FastRelString here (to be next to the data tables).
	  <CS19>	 7/21/97	djb		LogEndTime now takes an error code.
	  <CS18>	 7/18/97	msd		Include LowMemPriv.h, Gestalt.h, TextUtils.h.
	  <CS17>	 7/16/97	DSH		FilesInternal.i renamed FileMgrInternal.i to avoid name
									collision
	  <CS16>	  7/8/97	DSH		Loading PrecompiledHeaders from define passed in on C line
	  <CS15>	  7/8/97	DSH		InitializeUnicode changed its API
	  <CS14>	  7/1/97	DSH		SC, DFA complier, requires parameters in functions. #pragma'd
									them out to eliminate C warnings.
	  <CS13>	 6/30/97	msd		Remove unused parameter warnings in FallbackProc by commenting
									out unused parameter names.
	  <CS12>	 6/26/97	DSH		FallbackProc declare variables before useage for SC,
									MockConverters no longer static for DFA.
	  <CS11>	 6/25/97	msd		In function InitStaticUnicodeConverter, the variable fsVars was
									being used before being initialized.
	  <CS10>	 6/24/97	DSH		Runtime checks to call through CFM or static linked routines.
	   <CS9>	 6/20/97	msd		Re-introduce fix from <CS7>. Fix another missing cast. Remove a
									spurious semicolon.
	   <CS8>	 6/18/97	djb		Add more ConversionContexts routines. Improved file mangling.
	   <CS7>	 6/16/97	msd		Add a missing cast in GetFileIDString.
	   <CS6>	 6/13/97	djb		Added support for long filenames. Switched to
									ConvertUnicodeToHFSName, ConvertHFSNameToUnicode, and
									CompareUnicodeNames.
	   <CS5>	  6/4/97	djb		Use system script instead of macRoman.
	   <CS4>	 5/19/97	djb		Add call to LockMappingTable so tables won't move!
	   <CS3>	  5/9/97	djb		Include HFSInstrumentation.h
	   <CS2>	  5/7/97	djb		Add summary traces. Add FastSimpleCompareStrings routine.
	   <CS1>	 4/24/97	djb		first checked in
	  <HFS5>	 3/27/97	djb		Add calls to real Unicode conversion routines.
	  <HFS4>	  2/6/97	msd		Add conditional code to use real Unicode comparison routines
									(default to off).
	  <HFS3>	  1/6/97	djb		Fix HFSUnicodeCompare - the final comparison of length1 and
									length2 was backwards.
	  <HFS2>	12/12/96	msd		Use precompiled headers.
	  <HFS1>	12/12/96	msd		first checked in

*/

#include "../../hfs_macos_defs.h"
#include "UCStringCompareData.h"

#include "../headers/FileMgrInternal.h"
#include "../headers/HFSUnicodeWrappers.h"

#include "ConvertUTF.h"

enum {
	kMinFileExtensionChars = 1,		// does not include dot
	kMaxFileExtensionChars = 5		// does not include dot
};

#define kASCIIPiSymbol				0xB9
#define kASCIIMicroSign				0xB5
#define kASCIIGreekDelta			0xC6


#define Is7BitASCII(c)				( (c) >= 0x20 && (c) <= 0x7F )

#define	IsSpecialASCIIChar(c)		( (c) == (UInt8) kASCIIMicroSign || (c) == (UInt8) kASCIIPiSymbol || (c) == (UInt8) kASCIIGreekDelta )

// Note:	'�' has two Unicode representations 0x00B5 (micro sign) and 0x03BC (greek)
//			'�' has two Unicode representations 0x2206 (increment) and 0x0394 (greek)
#define	IsSpecialUnicodeChar(c)		( (c) == 0x00B5 || (c) == 0x03BC || (c) == 0x03C0 || (c) == 0x2206 || (c) == 0x0394 )

#define IsHexDigit(c)				( ((c) >= (UInt8) '0' && (c) <= (UInt8) '9') || ((c) >= (UInt8) 'A' && (c) <= (UInt8) 'F') )


static void	GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, Str15 extStr );

static void	GetFileIDString( HFSCatalogNodeID fileID, Str15 fileIDStr );

static void AppendPascalString( ConstStr15Param src, Str31 dst );

static UInt32 HexStringToInteger( UInt32 length, const UInt8 *hexStr );



//
// Get filename extension (if any) as a pascal string
//
#if TARGET_API_MAC_OS8
static void
GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, Str15 extStr )
{
	UInt32	i;
	UniChar	c;
	UInt16	extChars;			// number of extension characters (excluding the dot)
	UInt16	maxExtChars;
	Boolean	foundExtension;


	extStr[0] = (UInt8) 0;		// assume there's no extension

	if ( length < 3 )
		return;					// sorry, "x.y" is smallest possible extension	
	
	if ( length < (kMaxFileExtensionChars + 2) )
		maxExtChars = length - 2;	// we need at least one prefix character and dot
	else
		maxExtChars = kMaxFileExtensionChars;

	i = length;
	extChars = 0;
	foundExtension = false;

	while ( extChars <= maxExtChars )
	{
		c = unicodeStr[--i];

		if ( c == (UniChar) '.' )		// look for leading dot
		{
			if ( extChars > 0 )			// cannot end with a dot
				foundExtension = true;
			break;
		}

		if ( Is7BitASCII(c) || IsSpecialUnicodeChar(c) )
			++extChars;
		else
			break;
	}
	
	// if we found one then copy it
	if ( foundExtension )
	{
		UInt8 *extStrPtr = extStr;
		const UniChar *unicodeStrPtr = &unicodeStr[i];	// point to dot char
		
		*(extStrPtr++) = extChars + 1;		// set length to extension chars plus dot

		for ( i = 0; i <= extChars; ++i )
		{
			c = *(unicodeStrPtr++);
			
			// map any special characters
			switch (c)
			{
				case 0x00B5:			// micro sign
				case 0x03BC:			// greek mu
					c = (UniChar) '�';
					break;

				case 0x03C0:			// greek pi
					c = (UniChar) '�';
					break;

				case 0x2206:			// increment sign
				case 0x0394:			// greek capital delta
					c = (UniChar) '�';
					break;
			}

			*(extStrPtr++) = (UInt8) c;		// copy/convert to ascii
		}
	}

} // end GetFilenameExtension
#endif /* TARGET_API_MAC_OS8 */


//
// Count filename extension characters (if any)
//
static UInt32
CountFilenameExtensionChars( const unsigned char * filename, UInt32	length )
{
	UInt32	i;
	UniChar	c;
	UInt32	extChars;			// number of extension characters (excluding the dot)
	UInt16	maxExtChars;
	Boolean	foundExtension;


    if (length == kUndefinedStrLen)
		length = strlen(filename);

	if ( length < 3 )
		return 0;					// sorry, "x.y" is smallest possible extension	
	
	if ( length < (kMaxFileExtensionChars + 2) )
		maxExtChars = length - 2;	// we need at least on prefix character and dot
	else
		maxExtChars = kMaxFileExtensionChars;

	extChars = 0;				// assume there's no extension
	i = length - 1;				// index to last ascii character
	foundExtension = false;

	while ( extChars <= maxExtChars )
	{
		c = filename[i--];

		if ( c == (UInt8) '.' )		// look for leading dot
		{
			if ( extChars > 0 )			// cannot end with a dot
				return (extChars);

			break;
		}

		if ( Is7BitASCII(c) || IsSpecialASCIIChar(c) )
			++extChars;
		else
			break;
	}
	
	return 0;

} // end CountFilenameExtensionChars


//
// Convert file ID into a hexidecimal string with no leading zeros
//
#if TARGET_API_MAC_OS8
static void
GetFileIDString( HFSCatalogNodeID fileID, Str15 fileIDStr )
{
	SInt32	i, b;
	static UInt8 *translate = (UInt8 *) "0123456789ABCDEF";
	UInt8	c;
	
	fileIDStr[1] = '#';

	for ( i = 1, b = 28; b >= 0; b -= 4 )
	{
		c = *(translate + ((fileID >> b) & 0x0000000F));
		
		// if its not a leading zero add it to our string
		if ( (c != (UInt8) '0') || (i > 1) || (b == 0) )
			fileIDStr[++i] = c;
	}

	fileIDStr[0] = (UInt8) i;

} // end GetFileIDString
#endif /* TARGET_API_MAC_OS8 */


//
// Append a suffix to a pascal string
//
#if TARGET_API_MAC_OS8
static void
AppendPascalString( ConstStr15Param src, Str31 dst )
{
	UInt32	i, j;
	UInt32	srcLen;
	
	srcLen = StrLength(src);
	
	if ( (srcLen + StrLength(dst)) > 31 )	// safety net
		return;
	
	i = dst[0] + 1;		// get end of dst
	
	for (j = 1; j <= srcLen; ++j)
		dst[i++] = src[j];
		
	dst[0] += srcLen;

} // end AppendPascalString
#endif /* TARGET_API_MAC_OS8 */


HFSCatalogNodeID
GetEmbeddedFileID(const unsigned char * filename, UInt32 length, UInt32 *prefixLength)
{
	short	extChars;
	short	i;
	UInt8	c;			// current character in filename

	*prefixLength = 0;

	if ( filename == NULL )
		return 0;

    if (length == kUndefinedStrLen)
		length = strlen(filename);

	if ( length < 4 )
		return 0;		// too small to have a file ID

	if ( length >= 6 )	// big enough for a file ID (#10) and an extension (.x) ?
		extChars = CountFilenameExtensionChars(filename, length);
	else
		extChars = 0;
	
	if ( extChars > 0 )
		length -= (extChars + 1);	// skip dot plus extension characters

	// scan for file id digits...
	for ( i = length - 1; i >= 0; --i)
	{
		c = filename[i];

		if ( c == '#' )		// look for file ID marker
		{
			if ( (length - i) < 3 )
				break;		// too small to be a file ID

			*prefixLength = i;
			return HexStringToInteger(length - i - 1, &filename[i+1]);
		}

		if ( !IsHexDigit(c) )
			break;			// file ID string must have hex digits	
	}

	return 0;

} // end GetEmbeddedFileID


//_______________________________________________________________________

static UInt32
HexStringToInteger (UInt32 length, const UInt8 *hexStr)
{
	UInt32		value;	// decimal value represented by the string
	short		i;
	UInt8		c;		// next character in buffer
	const UInt8	*p;		// pointer to character string

	value = 0;
	p = hexStr;

	for ( i = 0; i < length; ++i )
	{
		c = *p++;

		if (c >= '0' && c <= '9')
		{
			value = value << 4;
			value += (UInt32) c - (UInt32) '0';
		}
		else if (c >= 'A' && c <= 'F')
		{
			value = value << 4;
			value += 10 + ((unsigned int) c - (unsigned int) 'A');
		}
		else
		{
			return 0;	// oops, how did this character get in here?
		}
	}

	return value;

} // end HexStringToInteger


//_______________________________________________________________________
//
//	Routine:	FastRelString
//
//	Output:		returns -1 if str1 < str2
//				returns  1 if str1 > str2
//				return	 0 if equal
//
//_______________________________________________________________________

extern unsigned short gCompareTable[];