|
|
1.1.1.2 ! root 1: /* ! 2: * charset.c ! 3: * ! 4: * Conversion tables and routines to support different character sets. ! 5: * The PGP internal format is latin-1. ! 6: */ ! 7: ! 8: #include <stdio.h> ! 9: #include "usuals.h" ! 10: #include "language.h" ! 11: #include "fileio.h" ! 12: ! 13: #ifndef NULL ! 14: #define NULL 0 ! 15: #endif ! 16: ! 17: #define UNK '?' ! 18: ! 19: static unsigned char ! 20: intern2ascii[] = { /* ISO 8859-1 Latin Alphabet 1 to US ASCII */ ! 21: UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, ! 22: UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, ! 23: 32, 33, 99, 35, 36, 89, 124, 80, 34, 67, 97, 34, 126, 45, 82, 95, ! 24: 111, UNK, 50, 51, 39, 117, 45, 45, 44, 49, 111, 34, UNK, UNK, UNK, 63, ! 25: 65, 65, 65, 65, 65, 65, 65, 67, 69, 69, 69, 69, 73, 73, 73, 73, ! 26: 68, 78, 79, 79, 79, 79, 79, 120, 79, 85, 85, 85, 85, 89, 84, 115, ! 27: 97, 97, 97, 97, 97, 97, 97, 99, 101, 101, 101, 101, 105, 105, 105, 105, ! 28: 100, 110, 111, 111, 111, 111, 111, 47, 111, 117, 117, 117, 117, 121, 116, 121 ! 29: }; ! 30: ! 31: static unsigned char ! 32: intern2cp850[] = { /* ISO 8859-1 Latin Alphabet 1 (Latin-1) to IBM Code Page 850 */ ! 33: 186, 205, 201, 187, 200, 188, 204, 185, 203, 202, 206, 223, 220, 219, 254, 242, ! 34: 179, 196, 218, 191, 192, 217, 195, 180, 194, 193, 197, 176, 177, 178, 213, 159, ! 35: 255, 173, 189, 156, 207, 190, 221, 245, 249, 184, 166, 174, 170, 240, 169, 238, ! 36: 248, 241, 253, 252, 239, 230, 244, 250, 247, 251, 167, 175, 172, 171, 243, 168, ! 37: 183, 181, 182, 199, 142, 143, 146, 128, 212, 144, 210, 211, 222, 214, 215, 216, ! 38: 209, 165, 227, 224, 226, 229, 153, 158, 157, 235, 233, 234, 154, 237, 232, 225, ! 39: 133, 160, 131, 198, 132, 134, 145, 135, 138, 130, 136, 137, 141, 161, 140, 139, ! 40: 208, 164, 149, 162, 147, 228, 148, 246, 155, 151, 163, 150, 129, 236, 231, 152 ! 41: }; ! 42: ! 43: static unsigned char ! 44: cp8502intern[] = { /* IBM Code Page 850 to Latin-1 */ ! 45: 199, 252, 233, 226, 228, 224, 229, 231, 234, 235, 232, 239, 238, 236, 196, 197, ! 46: 201, 230, 198, 244, 246, 242, 251, 249, 255, 214, 220, 248, 163, 216, 215, 159, ! 47: 225, 237, 243, 250, 241, 209, 170, 186, 191, 174, 172, 189, 188, 161, 171, 187, ! 48: 155, 156, 157, 144, 151, 193, 194, 192, 169, 135, 128, 131, 133, 162, 165, 147, ! 49: 148, 153, 152, 150, 145, 154, 227, 195, 132, 130, 137, 136, 134, 129, 138, 164, ! 50: 240, 208, 202, 203, 200, 158, 205, 206, 207, 149, 146, 141, 140, 166, 204, 139, ! 51: 211, 223, 212, 210, 245, 213, 181, 254, 222, 218, 219, 217, 253, 221, 175, 180, ! 52: 173, 177, 143, 190, 182, 167, 247, 184, 176, 168, 183, 185, 179, 178, 142, 160 ! 53: }; ! 54: ! 55: /* Russian language specific conversation section */ ! 56: /* Two point-to-point charset decode tables */ ! 57: ! 58: /* Decode single char from KOI8 to ALT-CODES, if present */ ! 59: static unsigned char intern2alt[] = { ! 60: 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, ! 61: 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, ! 62: 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, ! 63: 0xc8, 0xc9, 0xca, 0xff, 0xcc, 0xcd, 0xce, 0xcf, ! 64: 0xd0, 0xd1, 0xd2, 0xf1, 0xd4, 0xd5, 0xd6, 0xd7, ! 65: 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, ! 66: 0xd3, 0xf3, 0xf2, 0xf0, 0xf4, 0xf5, 0xf6, 0xf7, ! 67: 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xcb, ! 68: 0xee, 0xa0, 0xa1, 0xe6, 0xa4, 0xa5, 0xe4, 0xa3, ! 69: 0xe5, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, ! 70: 0xaf, 0xef, 0xe0, 0xe1, 0xe2, 0xe3, 0xa6, 0xa2, ! 71: 0xec, 0xeb, 0xa7, 0xe8, 0xed, 0xe9, 0xe7, 0xea, ! 72: 0x9e, 0x80, 0x81, 0x96, 0x84, 0x85, 0x94, 0x83, ! 73: 0x95, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, ! 74: 0x8f, 0x9f, 0x90, 0x91, 0x92, 0x93, 0x86, 0x82, ! 75: 0x9c, 0x9b, 0x87, 0x98, 0x9d, 0x99, 0x97, 0x9a ! 76: }; ! 77: ! 78: /* Decode single char from ALT-CODES, if present, to KOI8 */ ! 79: static unsigned char alt2intern[] = { ! 80: 0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa, ! 81: 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, ! 82: 0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe, ! 83: 0xfb, 0xfd, 0xff, 0xf9, 0xf8, 0xfc, 0xe0, 0xf1, ! 84: 0xc1, 0xc2, 0xd7, 0xc7, 0xc4, 0xc5, 0xd6, 0xda, ! 85: 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, ! 86: 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, ! 87: 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, ! 88: 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, ! 89: 0x98, 0x99, 0x9a, 0xbf, 0x9c, 0x9d, 0x9e, 0x9f, ! 90: 0xa0, 0xa1, 0xa2, 0xb0, 0xa4, 0xa5, 0xa6, 0xa7, ! 91: 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, ! 92: 0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0xde, ! 93: 0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1, ! 94: 0xb3, 0xa3, 0xb2, 0xb1, 0xb4, 0xb5, 0xb6, 0xb7, ! 95: 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0x9b ! 96: }; ! 97: ! 98: /* End of Russian section */ ! 99: ! 100: /* ! 101: * Most Unixes has KOI8, and DOS has ALT_CODE ! 102: * If your Unix is non-standard, set CHARSET to "alt_codes" ! 103: * in config.txt ! 104: */ ! 105: ! 106: #ifndef DEFAULT_CSET ! 107: #define DEFAULT_CSET "noconv" ! 108: #endif ! 109: #ifndef DEFAULT_RU_CSET ! 110: #define DEFAULT_RU_CSET "noconv" ! 111: #endif ! 112: ! 113: ! 114: int CONVERSION = NO_CONV; /* None text file conversion at start time */ ! 115: ! 116: unsigned char *ext_c_ptr; ! 117: unsigned char *int_c_ptr; ! 118: ! 119: #ifdef MSDOS ! 120: char charset[64] = "cp850"; ! 121: #else ! 122: char charset[64] = ""; ! 123: #endif ! 124: ! 125: void ! 126: init_charset() ! 127: { ! 128: ext_c_ptr = NULL; /* NULL means latin1 or KOI8 (internal format) */ ! 129: int_c_ptr = NULL; ! 130: ! 131: if (charset[0] == '\0') ! 132: { /* use default character set for this system */ ! 133: if (strcmp(language, "ru") == 0) ! 134: strcpy(charset, DEFAULT_RU_CSET); ! 135: else ! 136: strcpy(charset, DEFAULT_CSET); ! 137: } ! 138: else ! 139: strlwr(charset); ! 140: ! 141: /* latin-1 and KOI8 are in internal format: no conversion needed */ ! 142: if (!strcmp(charset, "latin1") || !strcmp(charset, "koi8") || ! 143: !strcmp(charset, "noconv")) ! 144: return; ! 145: ! 146: if (!strcmp(charset, "alt_codes")) ! 147: { ext_c_ptr = intern2alt; ! 148: int_c_ptr = alt2intern; ! 149: } else if (!strcmp(charset, "cp850")) ! 150: { ext_c_ptr = intern2cp850; ! 151: int_c_ptr = cp8502intern; ! 152: } else if (!strcmp(charset, "ascii")) ! 153: { ext_c_ptr = intern2ascii; ! 154: } else ! 155: { ! 156: fprintf(stderr, PSTR("Unsupported character set: '%s'\n"), charset); ! 157: } ! 158: } ! 159: ! 160: char ! 161: EXT_C(char c) ! 162: { ! 163: if (c > '\0' || !ext_c_ptr) ! 164: return c; ! 165: return ext_c_ptr[c & 0x7f]; ! 166: } ! 167: ! 168: char ! 169: INT_C(char c) ! 170: { ! 171: if (c > '\0' || !int_c_ptr) ! 172: return c; ! 173: return int_c_ptr[c & 0x7f]; ! 174: } ! 175: ! 176: /* ! 177: * to_upper() and to_lower(), replacement for toupper() and tolower(), ! 178: * calling to_upper() on uppercase or to_lower on lowercase characters ! 179: * is handled correctly. ! 180: * ! 181: * XXX: should handle local characterset when 8-bit userID's are allowed ! 182: */ ! 183: int ! 184: to_upper(int c) ! 185: { ! 186: return (c >= 'a' && c <= 'z' ? c - ('a' - 'A') : c); ! 187: } ! 188: ! 189: int ! 190: to_lower(int c) ! 191: { ! 192: return (c >= 'A' && c <= 'Z' ? c + ('a' - 'A') : c); ! 193: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.