hatari/src/str.c - view

File: [HATARI the Atari ST Emulator] / hatari / src / str.c
Revision 1.1.1.9 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 9 08:58:58 2019 UTC (7 years, 1 month ago) by root
Branches: hatari, MAIN
CVS tags: hatari02210, hatari02200, HEAD

hatari 2.2.0

/* Hatari - str.c This file is distributed under the GNU General Public License, version 2 or at your option any later version. Read the file gpl.txt for details. String functions. */ const char Str_fileid[] = "Hatari str.c : " __DATE__ " " __TIME__; #include <stdio.h> #include <ctype.h> #include <stdbool.h> #include <stdlib.h> #include <locale.h> #include <SDL_types.h> #include "configuration.h" #include "str.h" /* Used only by Str_Filename2TOSname() */ static void Str_HostToAtari(const char *source, char *dest, char replacementChar); /** * Remove whitespace from beginning and end of a string. * Returns the trimmed string (string content is moved * so that it still starts from the same address) */ char *Str_Trim(char *buffer) { int i, linelen; if (buffer == NULL) return NULL; linelen = strlen(buffer); for (i = 0; i < linelen; i++) { if (!isspace((unsigned char)buffer[i])) break; } if (i > 0 && i < linelen) { linelen -= i; memmove(buffer, buffer + i, linelen); } for (i = linelen; i > 0; i--) { if (!isspace((unsigned char)buffer[i-1])) break; } buffer[i] = '\0'; return buffer; } /** * Convert a string to uppercase in place. */ char *Str_ToUpper(char *pString) { char *str = pString; while (*str) { *str = toupper((unsigned char)*str); str++; } return pString; } /** * Convert string to lowercase in place. */ char *Str_ToLower(char *pString) { char *str = pString; while (*str) { *str = tolower((unsigned char)*str); str++; } return pString; } /** * Allocate memory for a string and check for out-of memory (and exit the * program in that case, since there is likely nothing we can do if we even * can not allocate small strings anymore). * * @len Length of the string (without the trailing NUL character) */ char *Str_Alloc(int len) { char *newstr = malloc(len + 1); if (!newstr) { perror("string allocation failed"); exit(1); } newstr[0] = newstr[len] = 0; return newstr; } /** * This function is like strdup, but also checks for out-of memory and exits * the program in that case (there is likely nothing we can do if we even can * not allocate small strings anymore). */ char *Str_Dup(const char *str) { char *newstr; if (!str) return NULL; newstr = strdup(str); if (!newstr) { perror("string duplication failed"); exit(1); } return newstr; } /** * truncate string at first unprintable char (e.g. newline). */ #if 0 char *Str_Trunc(char *pString) { int i = 0; char *str = pString; while (str[i] != '\0') { if (!isprint((unsigned)str[i])) { str[i] = '\0'; break; } i++; } return pString; } #endif /** * check if string is valid hex number. */ #if 0 bool Str_IsHex(const char *str) { int i = 0; while (str[i] != '\0' && str[i] != ' ') { if (!isxdigit((unsigned)str[i])) return false; i++; } return true; } #endif /** * Convert potentially too long host filenames to 8.3 TOS filenames * by truncating extension and part before it, replacing invalid * GEMDOS file name characters with INVALID_CHAR + upcasing the result. * * Matching them from the host file system should first try exact * case-insensitive match, and then with a pattern that takes into * account the conversion done in here. */ void Str_Filename2TOSname(const char *source, char *dst) { char *dot, *tmp, *src; int len; src = strdup(source); /* dup so that it can be modified */ /* convert host string encoding to AtariST character set */ Str_HostToAtari(source, src, INVALID_CHAR); len = strlen(src); /* does filename have an extension? */ dot = strrchr(src, '.'); if (dot) { /* limit extension to 3 chars */ if (src + len - dot > 3) dot[4] = '\0'; /* if there are extra dots, convert them */ for (tmp = src; tmp < dot; tmp++) if (*tmp == '.') *tmp = INVALID_CHAR; /* limit part before extension to 8 chars */ if (dot - src > 8) memmove(src + 8, dot, strlen(dot) + 1); } else if (len > 8) src[8] = '\0'; strcpy(dst, src); free(src); /* upcase and replace rest of invalid characters */ for (tmp = dst; *tmp; tmp++) { /* invalid characters above 0x80 have already been replaced */ if (((unsigned char)*tmp) < 32 || *tmp == 127) *tmp = INVALID_CHAR; else { switch (*tmp) { case '*': case '/': case ':': case '?': case '\\': case '{': case '}': *tmp = INVALID_CHAR; break; default: if (((unsigned char)*tmp) < 128) *tmp = toupper((unsigned char)*tmp); } } } } /* ---------------------------------------------------------------------- */ /* Implementation of character set conversions */ /* Maps AtariST characters 0x80..0xFF to unicode code points * see http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/ATARIST.TXT */ static int mapAtariToUnicode[128] = { 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, 0x00FF, 0x00D6, 0x00DC, 0x00A2, 0x00A3, 0x00A5, 0x00DF, 0x0192, 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, 0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, 0x00E3, 0x00F5, 0x00D8, 0x00F8, 0x0153, 0x0152, 0x00C0, 0x00C3, 0x00D5, 0x00A8, 0x00B4, 0x2020, 0x00B6, 0x00A9, 0x00AE, 0x2122, 0x0133, 0x0132, 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DB, 0x05DC, 0x05DE, 0x05E0, 0x05E1, 0x05E2, 0x05E4, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0x05DF, 0x05DA, 0x05DD, 0x05E3, 0x05E5, 0x00A7, 0x2227, 0x221E, 0x03B1, 0x03B2, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4, 0x03A6, 0x0398, 0x03A9, 0x03B4, 0x222E, 0x03C6, 0x2208, 0x2229, 0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248, 0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x00B3, 0x00AF }; /* Hashtable which maps unicode code points to AtariST characters 0x80..0xFF. * The last 9 bits of the unicode code point provide a hash function * without collisions. */ static char mapUnicodeToAtari[512]; static bool characterMappingsInitialized = false; /** * This function initializes the mapUnicodeToAtari[] hashtable. */ static void initCharacterMappings(void) { int i; for (i = 0; i < 128; i++) { mapUnicodeToAtari[mapAtariToUnicode[i] & 511] = i; } characterMappingsInitialized = true; #if defined(WIN32) || defined(USE_LOCALE_CHARSET) setlocale(LC_ALL, ""); #endif } #if !(defined(WIN32) || defined(USE_LOCALE_CHARSET)) /** * Convert a 0-terminated string in the AtariST character set to a 0-terminated * UTF-8 encoded string. destLen is the number of available bytes in dest[]. * A single character of the AtariST charset can consume up to 3 bytes in UTF-8. */ static void Str_AtariToUtf8(const char *source, char *dest, int destLen) { int c; while (*source) { c = *source++ & 255; if (c >= 128) { c = mapAtariToUnicode[c & 127]; } if (c < 128 && destLen > 1) { *dest++ = c; /* 0xxxxxxx */ destLen--; } else if (c < 2048 && destLen > 2) { *dest++ = (c >> 6) | 192; /* 110xxxxx */ *dest++ = (c & 63) | 128; /* 10xxxxxx */ destLen -= 2; } else if (destLen > 3) { *dest++ = (c >> 12) | 224; /* 1110xxxx */ *dest++ = ((c >> 6) & 63) | 128; /* 10xxxxxx */ *dest++ = (c & 63) | 128; /* 10xxxxxx */ destLen -= 3; } } *dest = 0; } /** * Convert a 0-terminated utf-8 encoded string to a 0-terminated string * in the AtariST character set. * replacementChar is inserted when there is no mapping. */ static void Str_Utf8ToAtari(const char *source, char *dest, char replacementChar) { int c, c2, c3, i; if (!characterMappingsInitialized) { initCharacterMappings(); } while (*source) { c = *source++ & 255; if (c < 128) /* single-byte utf-8 code (0xxxxxxx) */ { *dest++ = c; } else if (c < 192) /* invalid utf-8 encoding (10xxxxxx) */ { *dest++ = replacementChar; } else /* multi-byte utf-8 code */ { if (c < 224) /* 110xxxxx, 10xxxxxx */ { c2 = *source++; c = ((c & 31) << 6) | (c2 & 63); } else if (c < 240) /* 1110xxxx, 10xxxxxx, 10xxxxxx */ { c2 = *source++; c3 = *source++; c = ((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63); } /* find AtariST character code for unicode code point c */ i = mapUnicodeToAtari[c & 511]; *dest++ = (mapAtariToUnicode[i] == c ? i + 128 : replacementChar); } } *dest = 0; } #else /** * Convert a string from the AtariST character set into the host representation as * defined by the current locale. Characters which do not exist in character set * of the host as defined by the locale will be replaced by replacementChar. */ static void Str_AtariToLocal(const char *source, char *dest, int destLen, char replacementChar) { int c, i; if (!characterMappingsInitialized) { initCharacterMappings(); } while (*source && destLen > (int)MB_CUR_MAX) { c = *source++ & 255; if (c >= 128) c = mapAtariToUnicode[c & 127]; /* convert the unicode code point c to a character in the current locale */ i = wctomb(dest, c); if (i < 0) { *dest = replacementChar; i = 1; } dest += i; destLen -= i; } *dest = 0; } /** * Convert a string from the character set defined by current host locale into the * AtariST character set. Characters which do not exist in the AtariST character set * will be replaced by replacementChar. */ static void Str_LocalToAtari(const char *source, char *dest, char replacementChar) { int i; wchar_t c; if (!characterMappingsInitialized) { initCharacterMappings(); } while (*source) { /* convert a character from the current locale into an unicode code point */ i = mbtowc(&c, source, 4); if (i < 0) { c = replacementChar; i = 1; } source += i; if (c >= 128) { /* find AtariST character code for unicode code point c */ i = mapUnicodeToAtari[c & 511]; c = (mapAtariToUnicode[i] == c ? i + 128 : replacementChar); } *dest++ = c; } *dest = 0; } #endif void Str_AtariToHost(const char *source, char *dest, int destLen, char replacementChar) { if (!ConfigureParams.HardDisk.bFilenameConversion) { strncpy(dest, source, destLen); if (destLen > 0) dest[destLen-1]= '\0'; return; } #if defined(WIN32) || defined(USE_LOCALE_CHARSET) Str_AtariToLocal(source, dest, destLen, replacementChar); #else Str_AtariToUtf8(source, dest, destLen); #endif } static void Str_HostToAtari(const char *source, char *dest, char replacementChar) { if (!ConfigureParams.HardDisk.bFilenameConversion) { strcpy(dest, source); return; } #if defined(WIN32) || defined(USE_LOCALE_CHARSET) Str_LocalToAtari(source, dest, replacementChar); #else Str_Utf8ToAtari(source, dest, replacementChar); #endif } /* This table is needed to convert the UTF-8 representation of paths with * diacritical marks from the decomposed form (as returned by OSX) into the * precomposed form. Combining unicode characters are 0x0300..0x036F. * This table contains only those characters which are part of the AtariST * character set. */ static int mapDecomposedPrecomposed[] = { 'A', 0x0300, 0xC0, 'A', 0x0301, 0xC1, 'A', 0x0302, 0xC2, 'A', 0x0303, 0xC3, 'A', 0x0308, 0xC4, 'A', 0x030A, 0xC5, 'C', 0x0327, 0xC7, 'E', 0x0300, 0xC8, 'E', 0x0301, 0xC9, 'E', 0x0302, 0xCA, 'E', 0x0308, 0xCB, 'I', 0x0300, 0xCC, 'I', 0x0301, 0xCD, 'I', 0x0302, 0xCE, 'I', 0x0308, 0xCF, 'N', 0x0303, 0xD1, 'O', 0x0300, 0xD2, 'O', 0x0301, 0xD3, 'O', 0x0302, 0xD4, 'O', 0x0303, 0xD5, 'O', 0x0308, 0xD6, 'U', 0x0300, 0xD9, 'U', 0x0301, 0xDA, 'U', 0x0302, 0xDB, 'U', 0x0308, 0xDC, 'Y', 0x0301, 0xDD, 'a', 0x0300, 0xE0, 'a', 0x0301, 0xE1, 'a', 0x0302, 0xE2, 'a', 0x0303, 0xE3, 'a', 0x0308, 0xE4, 'a', 0x030A, 0xE5, 'c', 0x0327, 0xE7, 'e', 0x0300, 0xE8, 'e', 0x0301, 0xE9, 'e', 0x0302, 0xEA, 'e', 0x0308, 0xEB, 'i', 0x0300, 0xEC, 'i', 0x0301, 0xED, 'i', 0x0302, 0xEE, 'i', 0x0308, 0xEF, 'n', 0x0303, 0xF1, 'o', 0x0300, 0xF2, 'o', 0x0301, 0xF3, 'o', 0x0302, 0xF4, 'o', 0x0303, 0xF5, 'o', 0x0308, 0xF6, 'u', 0x0300, 0xF9, 'u', 0x0301, 0xFA, 'u', 0x0302, 0xFB, 'u', 0x0308, 0xFC, 'y', 0x0301, 0xFD, 'y', 0x0308, 0xFF, 0 }; /** * Convert decomposed unicode characters (sequence of a letter * and a combining character) in an UTF-8 encoded string into * the precomposed UTF-8 encoded form. Only characters which * exist in the AtariST character set are converted. * This is needed for OSX which returns filesystem paths in the * decomposed form (NFD). */ void Str_DecomposedToPrecomposedUtf8(const char *source, char *dest) { int c, c1, i; while (*source) { c = *source++ & 255; /* do we have a combining character behind the current character */ if ((source[0] & 0xFC) == 0xCC) /* 0x03XX is in UTF-8: 110011xx 10xxxxxx */ { c1 = ((source[0] & 31) << 6) | (source[1] & 63); for (i = 0; mapDecomposedPrecomposed[i]; i += 3) { if (mapDecomposedPrecomposed[i] == c && mapDecomposedPrecomposed[i + 1] == c1) { c = mapDecomposedPrecomposed[i + 2]; /* precomposed unicode code point */ *dest++ = 0xC0 | (c >> 6); /* UTF-8 first byte: 110xxxxx */ c = 0x80 + (c & 63); /* UTF-8 second byte: 10xxxxxx */ source += 2; break; } } } *dest++ = c; } *dest = 0; } /* ---------------------------------------------------------------------- */ /** * Print an Hex/Ascii dump of Len bytes located at *p * Each line consists of Width bytes, printed as an hexa value and as a char * (non printable chars are replaced by a '.') * The Suffix string is added at the beginning of each line. */ void Str_Dump_Hex_Ascii ( char *p , int Len , int Width , const char *Suffix , FILE *pFile ) { int nb; char buf_hex[ 200*3 ]; /* max for 200 bytes per line */ char buf_ascii[ 200 ]; char *p_h; char *p_a; unsigned char c; int offset; nb = 0; offset = 0; p_h = buf_hex; p_a = buf_ascii; while ( Len > 0 ) { c = *p++; sprintf ( p_h , "%2.2x " , c ); if ( ( c < 0x20 ) || ( c >= 0x7f ) ) c = '.'; sprintf ( p_a , "%c" , c ); p_h += 3; p_a += 1; Len--; nb++; if ( ( nb % Width == 0 ) || ( Len == 0 ) ) { fprintf ( pFile , "%s%6.6x: %-*s %-*s\n" , Suffix , offset , Width*3 , buf_hex , Width , buf_ascii ); offset = nb; p_h = buf_hex; p_a = buf_ascii; } } }

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.