|
|
1.1 root 1: /*
2: Hatari - str.c
3:
1.1.1.6 root 4: This file is distributed under the GNU General Public License, version 2
5: or at your option any later version. Read the file gpl.txt for details.
1.1 root 6:
7: String functions.
8: */
1.1.1.3 root 9: const char Str_fileid[] = "Hatari str.c : " __DATE__ " " __TIME__;
1.1 root 10:
1.1.1.3 root 11: #include <stdio.h>
1.1 root 12: #include <ctype.h>
13: #include <stdbool.h>
1.1.1.7 root 14: #include <stdlib.h>
1.1.1.8 root 15: #include <locale.h>
1.1.1.3 root 16: #include <SDL_types.h>
17: #include "configuration.h"
1.1 root 18: #include "str.h"
19:
1.1.1.8 root 20: /* Used only by Str_Filename2TOSname() */
21: static void Str_HostToAtari(const char *source, char *dest, char replacementChar);
22:
1.1 root 23:
24: /**
25: * Remove whitespace from beginning and end of a string.
26: * Returns the trimmed string (string content is moved
27: * so that it still starts from the same address)
28: */
29: char *Str_Trim(char *buffer)
30: {
31: int i, linelen;
32:
33: if (buffer == NULL)
34: return NULL;
35:
36: linelen = strlen(buffer);
37:
38: for (i = 0; i < linelen; i++)
39: {
1.1.1.7 root 40: if (!isspace((unsigned char)buffer[i]))
1.1 root 41: break;
42: }
43:
44: if (i > 0 && i < linelen)
45: {
46: linelen -= i;
47: memmove(buffer, buffer + i, linelen);
48: }
49:
50: for (i = linelen; i > 0; i--)
51: {
1.1.1.7 root 52: if (!isspace((unsigned char)buffer[i-1]))
1.1 root 53: break;
54: }
55:
56: buffer[i] = '\0';
57:
58: return buffer;
59: }
60:
61:
62: /**
63: * Convert a string to uppercase in place.
64: */
65: char *Str_ToUpper(char *pString)
66: {
67: char *str = pString;
68: while (*str)
69: {
1.1.1.7 root 70: *str = toupper((unsigned char)*str);
1.1 root 71: str++;
72: }
73: return pString;
74: }
75:
76:
77: /**
1.1.1.4 root 78: * Convert string to lowercase in place.
1.1 root 79: */
80: char *Str_ToLower(char *pString)
81: {
82: char *str = pString;
83: while (*str)
84: {
1.1.1.7 root 85: *str = tolower((unsigned char)*str);
1.1 root 86: str++;
87: }
88: return pString;
89: }
90:
1.1.1.9 ! root 91: /**
! 92: * Allocate memory for a string and check for out-of memory (and exit the
! 93: * program in that case, since there is likely nothing we can do if we even
! 94: * can not allocate small strings anymore).
! 95: *
! 96: * @len Length of the string (without the trailing NUL character)
! 97: */
! 98: char *Str_Alloc(int len)
! 99: {
! 100: char *newstr = malloc(len + 1);
! 101:
! 102: if (!newstr)
! 103: {
! 104: perror("string allocation failed");
! 105: exit(1);
! 106: }
! 107:
! 108: newstr[0] = newstr[len] = 0;
! 109:
! 110: return newstr;
! 111: }
! 112:
! 113: /**
! 114: * This function is like strdup, but also checks for out-of memory and exits
! 115: * the program in that case (there is likely nothing we can do if we even can
! 116: * not allocate small strings anymore).
! 117: */
! 118: char *Str_Dup(const char *str)
! 119: {
! 120: char *newstr;
! 121:
! 122: if (!str)
! 123: return NULL;
! 124:
! 125: newstr = strdup(str);
! 126: if (!newstr)
! 127: {
! 128: perror("string duplication failed");
! 129: exit(1);
! 130: }
! 131:
! 132: return newstr;
! 133: }
1.1 root 134:
135: /**
1.1.1.4 root 136: * truncate string at first unprintable char (e.g. newline).
1.1 root 137: */
1.1.1.8 root 138: #if 0
1.1 root 139: char *Str_Trunc(char *pString)
140: {
141: int i = 0;
142: char *str = pString;
143: while (str[i] != '\0')
144: {
145: if (!isprint((unsigned)str[i]))
146: {
147: str[i] = '\0';
148: break;
149: }
150: i++;
151: }
152: return pString;
153: }
1.1.1.8 root 154: #endif
1.1 root 155:
156: /**
157: * check if string is valid hex number.
158: */
1.1.1.8 root 159: #if 0
1.1 root 160: bool Str_IsHex(const char *str)
161: {
162: int i = 0;
163: while (str[i] != '\0' && str[i] != ' ')
164: {
165: if (!isxdigit((unsigned)str[i]))
166: return false;
167: i++;
168: }
169: return true;
170: }
1.1.1.8 root 171: #endif
1.1.1.5 root 172:
173: /**
174: * Convert potentially too long host filenames to 8.3 TOS filenames
175: * by truncating extension and part before it, replacing invalid
176: * GEMDOS file name characters with INVALID_CHAR + upcasing the result.
177: *
178: * Matching them from the host file system should first try exact
179: * case-insensitive match, and then with a pattern that takes into
180: * account the conversion done in here.
181: */
182: void Str_Filename2TOSname(const char *source, char *dst)
183: {
184: char *dot, *tmp, *src;
185: int len;
186:
187: src = strdup(source); /* dup so that it can be modified */
1.1.1.8 root 188:
189: /* convert host string encoding to AtariST character set */
190: Str_HostToAtari(source, src, INVALID_CHAR);
1.1.1.5 root 191: len = strlen(src);
192:
193: /* does filename have an extension? */
194: dot = strrchr(src, '.');
195: if (dot)
196: {
197: /* limit extension to 3 chars */
198: if (src + len - dot > 3)
199: dot[4] = '\0';
200:
201: /* if there are extra dots, convert them */
202: for (tmp = src; tmp < dot; tmp++)
203: if (*tmp == '.')
204: *tmp = INVALID_CHAR;
205:
1.1.1.6 root 206: /* limit part before extension to 8 chars */
207: if (dot - src > 8)
208: memmove(src + 8, dot, strlen(dot) + 1);
1.1.1.5 root 209: }
1.1.1.6 root 210: else if (len > 8)
211: src[8] = '\0';
212:
213: strcpy(dst, src);
1.1.1.5 root 214: free(src);
215:
1.1.1.6 root 216: /* upcase and replace rest of invalid characters */
1.1.1.5 root 217: for (tmp = dst; *tmp; tmp++)
218: {
1.1.1.8 root 219: /* invalid characters above 0x80 have already been replaced */
220: if (((unsigned char)*tmp) < 32 || *tmp == 127)
1.1.1.5 root 221: *tmp = INVALID_CHAR;
222: else
223: {
224: switch (*tmp)
225: {
226: case '*':
227: case '/':
228: case ':':
229: case '?':
230: case '\\':
231: case '{':
232: case '}':
233: *tmp = INVALID_CHAR;
1.1.1.6 root 234: break;
235: default:
1.1.1.8 root 236: if (((unsigned char)*tmp) < 128)
1.1.1.7 root 237: *tmp = toupper((unsigned char)*tmp);
1.1.1.5 root 238: }
239: }
240: }
241: }
1.1.1.7 root 242:
243:
1.1.1.8 root 244: /* ---------------------------------------------------------------------- */
245:
246: /* Implementation of character set conversions */
247:
248: /* Maps AtariST characters 0x80..0xFF to unicode code points
249: * see http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/ATARIST.TXT
250: */
251: static int mapAtariToUnicode[128] =
252: {
253: 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
254: 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
255: 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
256: 0x00FF, 0x00D6, 0x00DC, 0x00A2, 0x00A3, 0x00A5, 0x00DF, 0x0192,
257: 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
258: 0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
259: 0x00E3, 0x00F5, 0x00D8, 0x00F8, 0x0153, 0x0152, 0x00C0, 0x00C3,
260: 0x00D5, 0x00A8, 0x00B4, 0x2020, 0x00B6, 0x00A9, 0x00AE, 0x2122,
261: 0x0133, 0x0132, 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5,
262: 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DB, 0x05DC, 0x05DE, 0x05E0,
263: 0x05E1, 0x05E2, 0x05E4, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA,
264: 0x05DF, 0x05DA, 0x05DD, 0x05E3, 0x05E5, 0x00A7, 0x2227, 0x221E,
265: 0x03B1, 0x03B2, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4,
266: 0x03A6, 0x0398, 0x03A9, 0x03B4, 0x222E, 0x03C6, 0x2208, 0x2229,
267: 0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248,
268: 0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x00B3, 0x00AF
269: };
270:
271: /* Hashtable which maps unicode code points to AtariST characters 0x80..0xFF.
272: * The last 9 bits of the unicode code point provide a hash function
273: * without collisions.
274: */
275: static char mapUnicodeToAtari[512];
276: static bool characterMappingsInitialized = false;
277:
278: /**
279: * This function initializes the mapUnicodeToAtari[] hashtable.
280: */
281: static void initCharacterMappings(void)
282: {
283: int i;
284: for (i = 0; i < 128; i++)
285: {
286: mapUnicodeToAtari[mapAtariToUnicode[i] & 511] = i;
287: }
288: characterMappingsInitialized = true;
289:
290: #if defined(WIN32) || defined(USE_LOCALE_CHARSET)
291: setlocale(LC_ALL, "");
292: #endif
293: }
294:
295: #if !(defined(WIN32) || defined(USE_LOCALE_CHARSET))
296: /**
297: * Convert a 0-terminated string in the AtariST character set to a 0-terminated
298: * UTF-8 encoded string. destLen is the number of available bytes in dest[].
299: * A single character of the AtariST charset can consume up to 3 bytes in UTF-8.
300: */
301: static void Str_AtariToUtf8(const char *source, char *dest, int destLen)
302: {
303: int c;
304: while (*source)
305: {
306: c = *source++ & 255;
307: if (c >= 128)
308: {
309: c = mapAtariToUnicode[c & 127];
310: }
311: if (c < 128 && destLen > 1)
312: {
313: *dest++ = c; /* 0xxxxxxx */
314: destLen--;
315: }
316: else if (c < 2048 && destLen > 2)
317: {
318: *dest++ = (c >> 6) | 192; /* 110xxxxx */
319: *dest++ = (c & 63) | 128; /* 10xxxxxx */
320: destLen -= 2;
321: }
322: else if (destLen > 3)
323: {
324: *dest++ = (c >> 12) | 224; /* 1110xxxx */
325: *dest++ = ((c >> 6) & 63) | 128; /* 10xxxxxx */
326: *dest++ = (c & 63) | 128; /* 10xxxxxx */
327: destLen -= 3;
328: }
329: }
330: *dest = 0;
331: }
332:
333: /**
334: * Convert a 0-terminated utf-8 encoded string to a 0-terminated string
335: * in the AtariST character set.
336: * replacementChar is inserted when there is no mapping.
337: */
338: static void Str_Utf8ToAtari(const char *source, char *dest, char replacementChar)
339: {
340: int c, c2, c3, i;
341: if (!characterMappingsInitialized) { initCharacterMappings(); }
342:
343: while (*source)
344: {
345: c = *source++ & 255;
346: if (c < 128) /* single-byte utf-8 code (0xxxxxxx) */
347: {
348: *dest++ = c;
349: }
350: else if (c < 192) /* invalid utf-8 encoding (10xxxxxx) */
351: {
352: *dest++ = replacementChar;
353: }
354: else /* multi-byte utf-8 code */
355: {
356: if (c < 224) /* 110xxxxx, 10xxxxxx */
357: {
358: c2 = *source++;
359: c = ((c & 31) << 6) | (c2 & 63);
360: }
361: else if (c < 240) /* 1110xxxx, 10xxxxxx, 10xxxxxx */
362: {
363: c2 = *source++;
364: c3 = *source++;
365: c = ((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63);
366: }
367:
368: /* find AtariST character code for unicode code point c */
369: i = mapUnicodeToAtari[c & 511];
370: *dest++ = (mapAtariToUnicode[i] == c ? i + 128 : replacementChar);
371: }
372: }
373: *dest = 0;
374: }
375:
376: #else
377:
378: /**
379: * Convert a string from the AtariST character set into the host representation as
380: * defined by the current locale. Characters which do not exist in character set
381: * of the host as defined by the locale will be replaced by replacementChar.
382: */
383: static void Str_AtariToLocal(const char *source, char *dest, int destLen, char replacementChar)
384: {
385: int c, i;
386: if (!characterMappingsInitialized) { initCharacterMappings(); }
387:
388: while (*source && destLen > (int)MB_CUR_MAX)
389: {
390: c = *source++ & 255;
391: if (c >= 128)
392: c = mapAtariToUnicode[c & 127];
393: /* convert the unicode code point c to a character in the current locale */
394: i = wctomb(dest, c);
395: if (i < 0)
396: {
397: *dest = replacementChar;
398: i = 1;
399: }
400: dest += i;
401: destLen -= i;
402: }
403: *dest = 0;
404: }
405:
406: /**
407: * Convert a string from the character set defined by current host locale into the
408: * AtariST character set. Characters which do not exist in the AtariST character set
409: * will be replaced by replacementChar.
410: */
411: static void Str_LocalToAtari(const char *source, char *dest, char replacementChar)
412: {
413: int i;
414: wchar_t c;
415: if (!characterMappingsInitialized) { initCharacterMappings(); }
416:
417: while (*source)
418: {
419: /* convert a character from the current locale into an unicode code point */
420: i = mbtowc(&c, source, 4);
421: if (i < 0)
422: {
423: c = replacementChar;
424: i = 1;
425: }
426: source += i;
427: if (c >= 128)
428: {
429: /* find AtariST character code for unicode code point c */
430: i = mapUnicodeToAtari[c & 511];
431: c = (mapAtariToUnicode[i] == c ? i + 128 : replacementChar);
432: }
433: *dest++ = c;
434: }
435: *dest = 0;
436: }
437: #endif
438:
439:
440: void Str_AtariToHost(const char *source, char *dest, int destLen, char replacementChar)
441: {
442: if (!ConfigureParams.HardDisk.bFilenameConversion)
443: {
444: strncpy(dest, source, destLen);
445: if (destLen > 0)
446: dest[destLen-1]= '\0';
447: return;
448: }
449: #if defined(WIN32) || defined(USE_LOCALE_CHARSET)
450: Str_AtariToLocal(source, dest, destLen, replacementChar);
451: #else
452: Str_AtariToUtf8(source, dest, destLen);
453: #endif
454: }
455:
456: static void Str_HostToAtari(const char *source, char *dest, char replacementChar)
457: {
458: if (!ConfigureParams.HardDisk.bFilenameConversion)
459: {
460: strcpy(dest, source);
461: return;
462: }
463: #if defined(WIN32) || defined(USE_LOCALE_CHARSET)
464: Str_LocalToAtari(source, dest, replacementChar);
465: #else
466: Str_Utf8ToAtari(source, dest, replacementChar);
467: #endif
468: }
469:
470:
471: /* This table is needed to convert the UTF-8 representation of paths with
472: * diacritical marks from the decomposed form (as returned by OSX) into the
473: * precomposed form. Combining unicode characters are 0x0300..0x036F.
474: * This table contains only those characters which are part of the AtariST
475: * character set.
476: */
477: static int mapDecomposedPrecomposed[] =
478: {
479: 'A', 0x0300, 0xC0,
480: 'A', 0x0301, 0xC1,
481: 'A', 0x0302, 0xC2,
482: 'A', 0x0303, 0xC3,
483: 'A', 0x0308, 0xC4,
484: 'A', 0x030A, 0xC5,
485: 'C', 0x0327, 0xC7,
486: 'E', 0x0300, 0xC8,
487: 'E', 0x0301, 0xC9,
488: 'E', 0x0302, 0xCA,
489: 'E', 0x0308, 0xCB,
490: 'I', 0x0300, 0xCC,
491: 'I', 0x0301, 0xCD,
492: 'I', 0x0302, 0xCE,
493: 'I', 0x0308, 0xCF,
494: 'N', 0x0303, 0xD1,
495: 'O', 0x0300, 0xD2,
496: 'O', 0x0301, 0xD3,
497: 'O', 0x0302, 0xD4,
498: 'O', 0x0303, 0xD5,
499: 'O', 0x0308, 0xD6,
500: 'U', 0x0300, 0xD9,
501: 'U', 0x0301, 0xDA,
502: 'U', 0x0302, 0xDB,
503: 'U', 0x0308, 0xDC,
504: 'Y', 0x0301, 0xDD,
505: 'a', 0x0300, 0xE0,
506: 'a', 0x0301, 0xE1,
507: 'a', 0x0302, 0xE2,
508: 'a', 0x0303, 0xE3,
509: 'a', 0x0308, 0xE4,
510: 'a', 0x030A, 0xE5,
511: 'c', 0x0327, 0xE7,
512: 'e', 0x0300, 0xE8,
513: 'e', 0x0301, 0xE9,
514: 'e', 0x0302, 0xEA,
515: 'e', 0x0308, 0xEB,
516: 'i', 0x0300, 0xEC,
517: 'i', 0x0301, 0xED,
518: 'i', 0x0302, 0xEE,
519: 'i', 0x0308, 0xEF,
520: 'n', 0x0303, 0xF1,
521: 'o', 0x0300, 0xF2,
522: 'o', 0x0301, 0xF3,
523: 'o', 0x0302, 0xF4,
524: 'o', 0x0303, 0xF5,
525: 'o', 0x0308, 0xF6,
526: 'u', 0x0300, 0xF9,
527: 'u', 0x0301, 0xFA,
528: 'u', 0x0302, 0xFB,
529: 'u', 0x0308, 0xFC,
530: 'y', 0x0301, 0xFD,
531: 'y', 0x0308, 0xFF,
532: 0
533: };
534:
535: /**
536: * Convert decomposed unicode characters (sequence of a letter
537: * and a combining character) in an UTF-8 encoded string into
538: * the precomposed UTF-8 encoded form. Only characters which
539: * exist in the AtariST character set are converted.
540: * This is needed for OSX which returns filesystem paths in the
541: * decomposed form (NFD).
542: */
543: void Str_DecomposedToPrecomposedUtf8(const char *source, char *dest)
544: {
545: int c, c1, i;
546: while (*source)
547: {
548: c = *source++ & 255;
549: /* do we have a combining character behind the current character */
550: if ((source[0] & 0xFC) == 0xCC) /* 0x03XX is in UTF-8: 110011xx 10xxxxxx */
551: {
552: c1 = ((source[0] & 31) << 6) | (source[1] & 63);
553: for (i = 0; mapDecomposedPrecomposed[i]; i += 3)
554: {
555: if (mapDecomposedPrecomposed[i] == c && mapDecomposedPrecomposed[i + 1] == c1)
556: {
557: c = mapDecomposedPrecomposed[i + 2]; /* precomposed unicode code point */
558: *dest++ = 0xC0 | (c >> 6); /* UTF-8 first byte: 110xxxxx */
559: c = 0x80 + (c & 63); /* UTF-8 second byte: 10xxxxxx */
560: source += 2;
561: break;
562: }
563: }
564: }
565: *dest++ = c;
566: }
567: *dest = 0;
568: }
569:
570: /* ---------------------------------------------------------------------- */
571:
572:
573:
1.1.1.7 root 574: /**
575: * Print an Hex/Ascii dump of Len bytes located at *p
576: * Each line consists of Width bytes, printed as an hexa value and as a char
577: * (non printable chars are replaced by a '.')
578: * The Suffix string is added at the beginning of each line.
579: */
580: void Str_Dump_Hex_Ascii ( char *p , int Len , int Width , const char *Suffix , FILE *pFile )
581: {
582: int nb;
583: char buf_hex[ 200*3 ]; /* max for 200 bytes per line */
584: char buf_ascii[ 200 ];
585: char *p_h;
586: char *p_a;
587: unsigned char c;
588: int offset;
589:
590:
591: nb = 0;
592: offset = 0;
593: p_h = buf_hex;
594: p_a = buf_ascii;
595: while ( Len > 0 )
596: {
597: c = *p++;
598: sprintf ( p_h , "%2.2x " , c );
599: if ( ( c < 0x20 ) || ( c >= 0x7f ) )
600: c = '.';
601: sprintf ( p_a , "%c" , c );
602:
603: p_h += 3;
604: p_a += 1;
605:
606: Len--;
607: nb++;
608: if ( ( nb % Width == 0 ) || ( Len == 0 ) )
609: {
610: fprintf ( pFile , "%s%6.6x: %-*s %-*s\n" , Suffix , offset , Width*3 , buf_hex , Width , buf_ascii );
611: offset = nb;
612: p_h = buf_hex;
613: p_a = buf_ascii;
614: }
615:
616: }
617: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.