|
|
1.1 root 1: /*
2: Hatari - str.c
3:
1.1.1.6 root 4: This file is distributed under the GNU General Public License, version 2
5: or at your option any later version. Read the file gpl.txt for details.
1.1 root 6:
7: String functions.
8: */
1.1.1.3 root 9: const char Str_fileid[] = "Hatari str.c : " __DATE__ " " __TIME__;
1.1 root 10:
1.1.1.3 root 11: #include <stdio.h>
1.1 root 12: #include <ctype.h>
13: #include <stdbool.h>
1.1.1.7 root 14: #include <stdlib.h>
1.1.1.8 ! root 15: #include <locale.h>
1.1.1.3 root 16: #include <SDL_types.h>
17: #include "configuration.h"
1.1 root 18: #include "str.h"
19:
1.1.1.8 ! root 20: /* Used only by Str_Filename2TOSname() */
! 21: static void Str_HostToAtari(const char *source, char *dest, char replacementChar);
! 22:
1.1 root 23:
24: /**
25: * Remove whitespace from beginning and end of a string.
26: * Returns the trimmed string (string content is moved
27: * so that it still starts from the same address)
28: */
29: char *Str_Trim(char *buffer)
30: {
31: int i, linelen;
32:
33: if (buffer == NULL)
34: return NULL;
35:
36: linelen = strlen(buffer);
37:
38: for (i = 0; i < linelen; i++)
39: {
1.1.1.7 root 40: if (!isspace((unsigned char)buffer[i]))
1.1 root 41: break;
42: }
43:
44: if (i > 0 && i < linelen)
45: {
46: linelen -= i;
47: memmove(buffer, buffer + i, linelen);
48: }
49:
50: for (i = linelen; i > 0; i--)
51: {
1.1.1.7 root 52: if (!isspace((unsigned char)buffer[i-1]))
1.1 root 53: break;
54: }
55:
56: buffer[i] = '\0';
57:
58: return buffer;
59: }
60:
61:
62: /**
63: * Convert a string to uppercase in place.
64: */
65: char *Str_ToUpper(char *pString)
66: {
67: char *str = pString;
68: while (*str)
69: {
1.1.1.7 root 70: *str = toupper((unsigned char)*str);
1.1 root 71: str++;
72: }
73: return pString;
74: }
75:
76:
77: /**
1.1.1.4 root 78: * Convert string to lowercase in place.
1.1 root 79: */
80: char *Str_ToLower(char *pString)
81: {
82: char *str = pString;
83: while (*str)
84: {
1.1.1.7 root 85: *str = tolower((unsigned char)*str);
1.1 root 86: str++;
87: }
88: return pString;
89: }
90:
91:
92: /**
1.1.1.4 root 93: * truncate string at first unprintable char (e.g. newline).
1.1 root 94: */
1.1.1.8 ! root 95: #if 0
1.1 root 96: char *Str_Trunc(char *pString)
97: {
98: int i = 0;
99: char *str = pString;
100: while (str[i] != '\0')
101: {
102: if (!isprint((unsigned)str[i]))
103: {
104: str[i] = '\0';
105: break;
106: }
107: i++;
108: }
109: return pString;
110: }
1.1.1.8 ! root 111: #endif
1.1 root 112:
113: /**
114: * check if string is valid hex number.
115: */
1.1.1.8 ! root 116: #if 0
1.1 root 117: bool Str_IsHex(const char *str)
118: {
119: int i = 0;
120: while (str[i] != '\0' && str[i] != ' ')
121: {
122: if (!isxdigit((unsigned)str[i]))
123: return false;
124: i++;
125: }
126: return true;
127: }
1.1.1.8 ! root 128: #endif
1.1.1.5 root 129:
130: /**
131: * Convert potentially too long host filenames to 8.3 TOS filenames
132: * by truncating extension and part before it, replacing invalid
133: * GEMDOS file name characters with INVALID_CHAR + upcasing the result.
134: *
135: * Matching them from the host file system should first try exact
136: * case-insensitive match, and then with a pattern that takes into
137: * account the conversion done in here.
138: */
139: void Str_Filename2TOSname(const char *source, char *dst)
140: {
141: char *dot, *tmp, *src;
142: int len;
143:
144: src = strdup(source); /* dup so that it can be modified */
1.1.1.8 ! root 145:
! 146: /* convert host string encoding to AtariST character set */
! 147: Str_HostToAtari(source, src, INVALID_CHAR);
1.1.1.5 root 148: len = strlen(src);
149:
150: /* does filename have an extension? */
151: dot = strrchr(src, '.');
152: if (dot)
153: {
154: /* limit extension to 3 chars */
155: if (src + len - dot > 3)
156: dot[4] = '\0';
157:
158: /* if there are extra dots, convert them */
159: for (tmp = src; tmp < dot; tmp++)
160: if (*tmp == '.')
161: *tmp = INVALID_CHAR;
162:
1.1.1.6 root 163: /* limit part before extension to 8 chars */
164: if (dot - src > 8)
165: memmove(src + 8, dot, strlen(dot) + 1);
1.1.1.5 root 166: }
1.1.1.6 root 167: else if (len > 8)
168: src[8] = '\0';
169:
170: strcpy(dst, src);
1.1.1.5 root 171: free(src);
172:
1.1.1.6 root 173: /* upcase and replace rest of invalid characters */
1.1.1.5 root 174: for (tmp = dst; *tmp; tmp++)
175: {
1.1.1.8 ! root 176: /* invalid characters above 0x80 have already been replaced */
! 177: if (((unsigned char)*tmp) < 32 || *tmp == 127)
1.1.1.5 root 178: *tmp = INVALID_CHAR;
179: else
180: {
181: switch (*tmp)
182: {
183: case '*':
184: case '/':
185: case ':':
186: case '?':
187: case '\\':
188: case '{':
189: case '}':
190: *tmp = INVALID_CHAR;
1.1.1.6 root 191: break;
192: default:
1.1.1.8 ! root 193: if (((unsigned char)*tmp) < 128)
1.1.1.7 root 194: *tmp = toupper((unsigned char)*tmp);
1.1.1.5 root 195: }
196: }
197: }
198: }
1.1.1.7 root 199:
200:
1.1.1.8 ! root 201: /* ---------------------------------------------------------------------- */
! 202:
! 203: /* Implementation of character set conversions */
! 204:
! 205: /* Maps AtariST characters 0x80..0xFF to unicode code points
! 206: * see http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/ATARIST.TXT
! 207: */
! 208: static int mapAtariToUnicode[128] =
! 209: {
! 210: 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
! 211: 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
! 212: 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
! 213: 0x00FF, 0x00D6, 0x00DC, 0x00A2, 0x00A3, 0x00A5, 0x00DF, 0x0192,
! 214: 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
! 215: 0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
! 216: 0x00E3, 0x00F5, 0x00D8, 0x00F8, 0x0153, 0x0152, 0x00C0, 0x00C3,
! 217: 0x00D5, 0x00A8, 0x00B4, 0x2020, 0x00B6, 0x00A9, 0x00AE, 0x2122,
! 218: 0x0133, 0x0132, 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5,
! 219: 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DB, 0x05DC, 0x05DE, 0x05E0,
! 220: 0x05E1, 0x05E2, 0x05E4, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA,
! 221: 0x05DF, 0x05DA, 0x05DD, 0x05E3, 0x05E5, 0x00A7, 0x2227, 0x221E,
! 222: 0x03B1, 0x03B2, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4,
! 223: 0x03A6, 0x0398, 0x03A9, 0x03B4, 0x222E, 0x03C6, 0x2208, 0x2229,
! 224: 0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248,
! 225: 0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x00B3, 0x00AF
! 226: };
! 227:
! 228: /* Hashtable which maps unicode code points to AtariST characters 0x80..0xFF.
! 229: * The last 9 bits of the unicode code point provide a hash function
! 230: * without collisions.
! 231: */
! 232: static char mapUnicodeToAtari[512];
! 233: static bool characterMappingsInitialized = false;
! 234:
! 235: /**
! 236: * This function initializes the mapUnicodeToAtari[] hashtable.
! 237: */
! 238: static void initCharacterMappings(void)
! 239: {
! 240: int i;
! 241: for (i = 0; i < 128; i++)
! 242: {
! 243: mapUnicodeToAtari[mapAtariToUnicode[i] & 511] = i;
! 244: }
! 245: characterMappingsInitialized = true;
! 246:
! 247: #if defined(WIN32) || defined(USE_LOCALE_CHARSET)
! 248: setlocale(LC_ALL, "");
! 249: #endif
! 250: }
! 251:
! 252: #if !(defined(WIN32) || defined(USE_LOCALE_CHARSET))
! 253: /**
! 254: * Convert a 0-terminated string in the AtariST character set to a 0-terminated
! 255: * UTF-8 encoded string. destLen is the number of available bytes in dest[].
! 256: * A single character of the AtariST charset can consume up to 3 bytes in UTF-8.
! 257: */
! 258: static void Str_AtariToUtf8(const char *source, char *dest, int destLen)
! 259: {
! 260: int c;
! 261: while (*source)
! 262: {
! 263: c = *source++ & 255;
! 264: if (c >= 128)
! 265: {
! 266: c = mapAtariToUnicode[c & 127];
! 267: }
! 268: if (c < 128 && destLen > 1)
! 269: {
! 270: *dest++ = c; /* 0xxxxxxx */
! 271: destLen--;
! 272: }
! 273: else if (c < 2048 && destLen > 2)
! 274: {
! 275: *dest++ = (c >> 6) | 192; /* 110xxxxx */
! 276: *dest++ = (c & 63) | 128; /* 10xxxxxx */
! 277: destLen -= 2;
! 278: }
! 279: else if (destLen > 3)
! 280: {
! 281: *dest++ = (c >> 12) | 224; /* 1110xxxx */
! 282: *dest++ = ((c >> 6) & 63) | 128; /* 10xxxxxx */
! 283: *dest++ = (c & 63) | 128; /* 10xxxxxx */
! 284: destLen -= 3;
! 285: }
! 286: }
! 287: *dest = 0;
! 288: }
! 289:
! 290: /**
! 291: * Convert a 0-terminated utf-8 encoded string to a 0-terminated string
! 292: * in the AtariST character set.
! 293: * replacementChar is inserted when there is no mapping.
! 294: */
! 295: static void Str_Utf8ToAtari(const char *source, char *dest, char replacementChar)
! 296: {
! 297: int c, c2, c3, i;
! 298: if (!characterMappingsInitialized) { initCharacterMappings(); }
! 299:
! 300: while (*source)
! 301: {
! 302: c = *source++ & 255;
! 303: if (c < 128) /* single-byte utf-8 code (0xxxxxxx) */
! 304: {
! 305: *dest++ = c;
! 306: }
! 307: else if (c < 192) /* invalid utf-8 encoding (10xxxxxx) */
! 308: {
! 309: *dest++ = replacementChar;
! 310: }
! 311: else /* multi-byte utf-8 code */
! 312: {
! 313: if (c < 224) /* 110xxxxx, 10xxxxxx */
! 314: {
! 315: c2 = *source++;
! 316: c = ((c & 31) << 6) | (c2 & 63);
! 317: }
! 318: else if (c < 240) /* 1110xxxx, 10xxxxxx, 10xxxxxx */
! 319: {
! 320: c2 = *source++;
! 321: c3 = *source++;
! 322: c = ((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63);
! 323: }
! 324:
! 325: /* find AtariST character code for unicode code point c */
! 326: i = mapUnicodeToAtari[c & 511];
! 327: *dest++ = (mapAtariToUnicode[i] == c ? i + 128 : replacementChar);
! 328: }
! 329: }
! 330: *dest = 0;
! 331: }
! 332:
! 333: #else
! 334:
! 335: /**
! 336: * Convert a string from the AtariST character set into the host representation as
! 337: * defined by the current locale. Characters which do not exist in character set
! 338: * of the host as defined by the locale will be replaced by replacementChar.
! 339: */
! 340: static void Str_AtariToLocal(const char *source, char *dest, int destLen, char replacementChar)
! 341: {
! 342: int c, i;
! 343: if (!characterMappingsInitialized) { initCharacterMappings(); }
! 344:
! 345: while (*source && destLen > (int)MB_CUR_MAX)
! 346: {
! 347: c = *source++ & 255;
! 348: if (c >= 128)
! 349: c = mapAtariToUnicode[c & 127];
! 350: /* convert the unicode code point c to a character in the current locale */
! 351: i = wctomb(dest, c);
! 352: if (i < 0)
! 353: {
! 354: *dest = replacementChar;
! 355: i = 1;
! 356: }
! 357: dest += i;
! 358: destLen -= i;
! 359: }
! 360: *dest = 0;
! 361: }
! 362:
! 363: /**
! 364: * Convert a string from the character set defined by current host locale into the
! 365: * AtariST character set. Characters which do not exist in the AtariST character set
! 366: * will be replaced by replacementChar.
! 367: */
! 368: static void Str_LocalToAtari(const char *source, char *dest, char replacementChar)
! 369: {
! 370: int i;
! 371: wchar_t c;
! 372: if (!characterMappingsInitialized) { initCharacterMappings(); }
! 373:
! 374: while (*source)
! 375: {
! 376: /* convert a character from the current locale into an unicode code point */
! 377: i = mbtowc(&c, source, 4);
! 378: if (i < 0)
! 379: {
! 380: c = replacementChar;
! 381: i = 1;
! 382: }
! 383: source += i;
! 384: if (c >= 128)
! 385: {
! 386: /* find AtariST character code for unicode code point c */
! 387: i = mapUnicodeToAtari[c & 511];
! 388: c = (mapAtariToUnicode[i] == c ? i + 128 : replacementChar);
! 389: }
! 390: *dest++ = c;
! 391: }
! 392: *dest = 0;
! 393: }
! 394: #endif
! 395:
! 396:
! 397: void Str_AtariToHost(const char *source, char *dest, int destLen, char replacementChar)
! 398: {
! 399: if (!ConfigureParams.HardDisk.bFilenameConversion)
! 400: {
! 401: strncpy(dest, source, destLen);
! 402: if (destLen > 0)
! 403: dest[destLen-1]= '\0';
! 404: return;
! 405: }
! 406: #if defined(WIN32) || defined(USE_LOCALE_CHARSET)
! 407: Str_AtariToLocal(source, dest, destLen, replacementChar);
! 408: #else
! 409: Str_AtariToUtf8(source, dest, destLen);
! 410: #endif
! 411: }
! 412:
! 413: static void Str_HostToAtari(const char *source, char *dest, char replacementChar)
! 414: {
! 415: if (!ConfigureParams.HardDisk.bFilenameConversion)
! 416: {
! 417: strcpy(dest, source);
! 418: return;
! 419: }
! 420: #if defined(WIN32) || defined(USE_LOCALE_CHARSET)
! 421: Str_LocalToAtari(source, dest, replacementChar);
! 422: #else
! 423: Str_Utf8ToAtari(source, dest, replacementChar);
! 424: #endif
! 425: }
! 426:
! 427:
! 428: /* This table is needed to convert the UTF-8 representation of paths with
! 429: * diacritical marks from the decomposed form (as returned by OSX) into the
! 430: * precomposed form. Combining unicode characters are 0x0300..0x036F.
! 431: * This table contains only those characters which are part of the AtariST
! 432: * character set.
! 433: */
! 434: static int mapDecomposedPrecomposed[] =
! 435: {
! 436: 'A', 0x0300, 0xC0,
! 437: 'A', 0x0301, 0xC1,
! 438: 'A', 0x0302, 0xC2,
! 439: 'A', 0x0303, 0xC3,
! 440: 'A', 0x0308, 0xC4,
! 441: 'A', 0x030A, 0xC5,
! 442: 'C', 0x0327, 0xC7,
! 443: 'E', 0x0300, 0xC8,
! 444: 'E', 0x0301, 0xC9,
! 445: 'E', 0x0302, 0xCA,
! 446: 'E', 0x0308, 0xCB,
! 447: 'I', 0x0300, 0xCC,
! 448: 'I', 0x0301, 0xCD,
! 449: 'I', 0x0302, 0xCE,
! 450: 'I', 0x0308, 0xCF,
! 451: 'N', 0x0303, 0xD1,
! 452: 'O', 0x0300, 0xD2,
! 453: 'O', 0x0301, 0xD3,
! 454: 'O', 0x0302, 0xD4,
! 455: 'O', 0x0303, 0xD5,
! 456: 'O', 0x0308, 0xD6,
! 457: 'U', 0x0300, 0xD9,
! 458: 'U', 0x0301, 0xDA,
! 459: 'U', 0x0302, 0xDB,
! 460: 'U', 0x0308, 0xDC,
! 461: 'Y', 0x0301, 0xDD,
! 462: 'a', 0x0300, 0xE0,
! 463: 'a', 0x0301, 0xE1,
! 464: 'a', 0x0302, 0xE2,
! 465: 'a', 0x0303, 0xE3,
! 466: 'a', 0x0308, 0xE4,
! 467: 'a', 0x030A, 0xE5,
! 468: 'c', 0x0327, 0xE7,
! 469: 'e', 0x0300, 0xE8,
! 470: 'e', 0x0301, 0xE9,
! 471: 'e', 0x0302, 0xEA,
! 472: 'e', 0x0308, 0xEB,
! 473: 'i', 0x0300, 0xEC,
! 474: 'i', 0x0301, 0xED,
! 475: 'i', 0x0302, 0xEE,
! 476: 'i', 0x0308, 0xEF,
! 477: 'n', 0x0303, 0xF1,
! 478: 'o', 0x0300, 0xF2,
! 479: 'o', 0x0301, 0xF3,
! 480: 'o', 0x0302, 0xF4,
! 481: 'o', 0x0303, 0xF5,
! 482: 'o', 0x0308, 0xF6,
! 483: 'u', 0x0300, 0xF9,
! 484: 'u', 0x0301, 0xFA,
! 485: 'u', 0x0302, 0xFB,
! 486: 'u', 0x0308, 0xFC,
! 487: 'y', 0x0301, 0xFD,
! 488: 'y', 0x0308, 0xFF,
! 489: 0
! 490: };
! 491:
! 492: /**
! 493: * Convert decomposed unicode characters (sequence of a letter
! 494: * and a combining character) in an UTF-8 encoded string into
! 495: * the precomposed UTF-8 encoded form. Only characters which
! 496: * exist in the AtariST character set are converted.
! 497: * This is needed for OSX which returns filesystem paths in the
! 498: * decomposed form (NFD).
! 499: */
! 500: void Str_DecomposedToPrecomposedUtf8(const char *source, char *dest)
! 501: {
! 502: int c, c1, i;
! 503: while (*source)
! 504: {
! 505: c = *source++ & 255;
! 506: /* do we have a combining character behind the current character */
! 507: if ((source[0] & 0xFC) == 0xCC) /* 0x03XX is in UTF-8: 110011xx 10xxxxxx */
! 508: {
! 509: c1 = ((source[0] & 31) << 6) | (source[1] & 63);
! 510: for (i = 0; mapDecomposedPrecomposed[i]; i += 3)
! 511: {
! 512: if (mapDecomposedPrecomposed[i] == c && mapDecomposedPrecomposed[i + 1] == c1)
! 513: {
! 514: c = mapDecomposedPrecomposed[i + 2]; /* precomposed unicode code point */
! 515: *dest++ = 0xC0 | (c >> 6); /* UTF-8 first byte: 110xxxxx */
! 516: c = 0x80 + (c & 63); /* UTF-8 second byte: 10xxxxxx */
! 517: source += 2;
! 518: break;
! 519: }
! 520: }
! 521: }
! 522: *dest++ = c;
! 523: }
! 524: *dest = 0;
! 525: }
! 526:
! 527: /* ---------------------------------------------------------------------- */
! 528:
! 529:
! 530:
1.1.1.7 root 531: /**
532: * Print an Hex/Ascii dump of Len bytes located at *p
533: * Each line consists of Width bytes, printed as an hexa value and as a char
534: * (non printable chars are replaced by a '.')
535: * The Suffix string is added at the beginning of each line.
536: */
537: void Str_Dump_Hex_Ascii ( char *p , int Len , int Width , const char *Suffix , FILE *pFile )
538: {
539: int nb;
540: char buf_hex[ 200*3 ]; /* max for 200 bytes per line */
541: char buf_ascii[ 200 ];
542: char *p_h;
543: char *p_a;
544: unsigned char c;
545: int offset;
546:
547:
548: nb = 0;
549: offset = 0;
550: p_h = buf_hex;
551: p_a = buf_ascii;
552: while ( Len > 0 )
553: {
554: c = *p++;
555: sprintf ( p_h , "%2.2x " , c );
556: if ( ( c < 0x20 ) || ( c >= 0x7f ) )
557: c = '.';
558: sprintf ( p_a , "%c" , c );
559:
560: p_h += 3;
561: p_a += 1;
562:
563: Len--;
564: nb++;
565: if ( ( nb % Width == 0 ) || ( Len == 0 ) )
566: {
567: fprintf ( pFile , "%s%6.6x: %-*s %-*s\n" , Suffix , offset , Width*3 , buf_hex , Width , buf_ascii );
568: offset = nb;
569: p_h = buf_hex;
570: p_a = buf_ascii;
571: }
572:
573: }
574: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.