pgp/src/charset.c - annotate

Return to charset.c CVS log
Up to [PGP] / pgp / src
Annotation of pgp/src/charset.c, revision 1.1.1.5

1.1.1.5 ! root        1: /*
        !             2:  * charset.c
        !             3:  *
        !             4:  * Conversion tables and routines to support different character sets.
        !             5:  * The PGP internal format is latin-1.
        !             6:  *
        !             7:  * (c) Copyright 1990-1994 by Philip Zimmermann.  All rights reserved.
        !             8:  * The author assumes no liability for damages resulting from the use
        !             9:  * of this software, even if the damage results from defects in this
        !            10:  * software.  No warranty is expressed or implied.
        !            11:  *
        !            12:  * Code that has been incorporated into PGP from other sources was
        !            13:  * either originally published in the public domain or is used with
        !            14:  * permission from the various authors.
        !            15:  *
        !            16:  * PGP is available for free to the public under certain restrictions.
        !            17:  * See the PGP User's Guide (included in the release package) for
        !            18:  * important information about licensing, patent restrictions on
        !            19:  * certain algorithms, trademarks, copyrights, and export controls.
        !            20:  */
        !            21: 
        !            22: #include <stdio.h>
        !            23: #include <string.h>
        !            24: #include "usuals.h"
        !            25: #include "language.h"
        !            26: #include "charset.h"
        !            27: #include "system.h"
        !            28: 
        !            29: #ifndef NULL
        !            30: #define        NULL    0
        !            31: #endif
        !            32: 
        !            33: #define UNK    '?'
        !            34: 
        !            35: static unsigned char
        !            36: intern2ascii[] = {  /* ISO 8859-1 Latin Alphabet 1 to US ASCII */
        !            37: UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK,
        !            38: UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK,
        !            39:  32,  33,  99,  35,  36,  89, 124,  80,  34,  67,  97,  34, 126,  45,  82,  95,
        !            40: 111, UNK,  50,  51,  39, 117,  45,  45,  44,  49, 111,  34, UNK, UNK, UNK,  63,
        !            41:  65,  65,  65,  65,  65,  65,  65,  67,  69,  69,  69,  69,  73,  73,  73,  73,
        !            42:  68,  78,  79,  79,  79,  79,  79, 120,  79,  85,  85,  85,  85,  89,  84, 115,
        !            43:  97,  97,  97,  97,  97,  97,  97,  99, 101, 101, 101, 101, 105, 105, 105, 105,
        !            44: 100, 110, 111, 111, 111, 111, 111,  47, 111, 117, 117, 117, 117, 121, 116, 121
        !            45: };
        !            46: 
        !            47: static unsigned char
        !            48: intern2cp850[] = {  /* ISO 8859-1 Latin Alphabet 1 (Latin-1) to IBM Code Page 850 */
        !            49: 186, 205, 201, 187, 200, 188, 204, 185, 203, 202, 206, 223, 220, 219, 254, 242,
        !            50: 179, 196, 218, 191, 192, 217, 195, 180, 194, 193, 197, 176, 177, 178, 213, 159,
        !            51: 255, 173, 189, 156, 207, 190, 221, 245, 249, 184, 166, 174, 170, 240, 169, 238,
        !            52: 248, 241, 253, 252, 239, 230, 244, 250, 247, 251, 167, 175, 172, 171, 243, 168,
        !            53: 183, 181, 182, 199, 142, 143, 146, 128, 212, 144, 210, 211, 222, 214, 215, 216,
        !            54: 209, 165, 227, 224, 226, 229, 153, 158, 157, 235, 233, 234, 154, 237, 232, 225,
        !            55: 133, 160, 131, 198, 132, 134, 145, 135, 138, 130, 136, 137, 141, 161, 140, 139,
        !            56: 208, 164, 149, 162, 147, 228, 148, 246, 155, 151, 163, 150, 129, 236, 231, 152
        !            57: };
        !            58: 
        !            59: static unsigned char
        !            60: cp8502intern[] = {  /* IBM Code Page 850 to Latin-1 */
        !            61: 199, 252, 233, 226, 228, 224, 229, 231, 234, 235, 232, 239, 238, 236, 196, 197,
        !            62: 201, 230, 198, 244, 246, 242, 251, 249, 255, 214, 220, 248, 163, 216, 215, 159,
        !            63: 225, 237, 243, 250, 241, 209, 170, 186, 191, 174, 172, 189, 188, 161, 171, 187,
        !            64: 155, 156, 157, 144, 151, 193, 194, 192, 169, 135, 128, 131, 133, 162, 165, 147,
        !            65: 148, 153, 152, 150, 145, 154, 227, 195, 132, 130, 137, 136, 134, 129, 138, 164,
        !            66: 240, 208, 202, 203, 200, 158, 205, 206, 207, 149, 146, 141, 140, 166, 204, 139,
        !            67: 211, 223, 212, 210, 245, 213, 181, 254, 222, 218, 219, 217, 253, 221, 175, 180,
        !            68: 173, 177, 143, 190, 182, 167, 247, 184, 176, 168, 183, 185, 179, 178, 142, 160
        !            69: };
        !            70: 
        !            71: /* Russian language specific conversation section */
        !            72: /* Two point-to-point charset decode tables       */
        !            73: /* produced by Andrew A. Chernov                  */
        !            74: 
        !            75: /* Decode single char from KOI8-R to ALT-CODES, if present */
        !            76: static unsigned char intern2alt[] = {
        !            77:        0xc4, 0xb3, 0xda, 0xbf, 0xc0, 0xd9, 0xc3, 0xb4,
        !            78:        0xc2, 0xc1, 0xc5, 0xdf, 0xdc, 0xdb, 0xdd, 0xde,
        !            79:        0xb0, 0xb1, 0xb2, 0xf4, 0xfe, 0xf9, 0xfb, 0xf7,
        !            80:        0xf3, 0xf2, 0xff, 0xf5, 0xf8, 0xfd, 0xfa, 0xf6,
        !            81:        0xcd, 0xba, 0xd5, 0xf1, 0xd6, 0xc9, 0xb8, 0xb7,
        !            82:        0xbb, 0xd4, 0xd3, 0xc8, 0xbe, 0xbd, 0xbc, 0xc6,
        !            83:        0xc7, 0xcc, 0xb5, 0xf0, 0xb6, 0xb9, 0xd1, 0xd2,
        !            84:        0xcb, 0xcf, 0xd0, 0xca, 0xd8, 0xd7, 0xce, 0xfc,
        !            85:        0xee, 0xa0, 0xa1, 0xe6, 0xa4, 0xa5, 0xe4, 0xa3,
        !            86:        0xe5, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae,
        !            87:        0xaf, 0xef, 0xe0, 0xe1, 0xe2, 0xe3, 0xa6, 0xa2,
        !            88:        0xec, 0xeb, 0xa7, 0xe8, 0xed, 0xe9, 0xe7, 0xea,
        !            89:        0x9e, 0x80, 0x81, 0x96, 0x84, 0x85, 0x94, 0x83,
        !            90:        0x95, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
        !            91:        0x8f, 0x9f, 0x90, 0x91, 0x92, 0x93, 0x86, 0x82,
        !            92:        0x9c, 0x9b, 0x87, 0x98, 0x9d, 0x99, 0x97, 0x9a
        !            93: };
        !            94: 
        !            95: /* Decode single char from ALT-CODES, if present, to KOI8-R */
        !            96: static unsigned char alt2intern[] = {
        !            97:        0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa,
        !            98:        0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0,
        !            99:        0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe,
        !           100:        0xfb, 0xfd, 0xff, 0xf9, 0xf8, 0xfc, 0xe0, 0xf1,
        !           101:        0xc1, 0xc2, 0xd7, 0xc7, 0xc4, 0xc5, 0xd6, 0xda,
        !           102:        0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
        !           103:        0x90, 0x91, 0x92, 0x81, 0x87, 0xb2, 0xb4, 0xa7,
        !           104:        0xa6, 0xb5, 0xa1, 0xa8, 0xae, 0xad, 0xac, 0x83,
        !           105:        0x84, 0x89, 0x88, 0x86, 0x80, 0x8a, 0xaf, 0xb0,
        !           106:        0xab, 0xa5, 0xbb, 0xb8, 0xb1, 0xa0, 0xbe, 0xb9,
        !           107:        0xba, 0xb6, 0xb7, 0xaa, 0xa9, 0xa2, 0xa4, 0xbd,
        !           108:        0xbc, 0x85, 0x82, 0x8d, 0x8c, 0x8e, 0x8f, 0x8b,
        !           109:        0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0xde,
        !           110:        0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1,
        !           111:        0xb3, 0xa3, 0x99, 0x98, 0x93, 0x9b, 0x9f, 0x97,
        !           112:        0x9c, 0x95, 0x9e, 0x96, 0xbf, 0x9d, 0x94, 0x9a
        !           113: };
        !           114: 
        !           115: /*
        !           116:  * Most Unixes has KOI8, and DOS has ALT_CODE
        !           117:  * If your Unix is non-standard, set CHARSET to "alt_codes"
        !           118:  * in config.txt
        !           119:  */
        !           120: 
        !           121: #ifndef        DEFAULT_CSET
        !           122: #define        DEFAULT_CSET    "noconv"
        !           123: #endif
        !           124: #ifndef        DEFAULT_RU_CSET
        !           125: #define        DEFAULT_RU_CSET "noconv"
        !           126: #endif
        !           127: 
        !           128: /* End of Russian section */
        !           129: 
        !           130: int CONVERSION = NO_CONV;      /* None text file conversion at start time */
        !           131: 
        !           132: unsigned char *ext_c_ptr;
        !           133: static unsigned char *int_c_ptr;
        !           134: 
        !           135: #ifdef MSDOS
        !           136: char charset[64] = "cp850";
        !           137: #else
        !           138: char charset[64] = "";
        !           139: #endif
        !           140: 
        !           141: void
        !           142: init_charset(void)
        !           143: {
        !           144:        ext_c_ptr = NULL;       /* NULL means latin1 or KOI8 (internal format) */
        !           145:        int_c_ptr = NULL;
        !           146: 
        !           147:        if (charset[0] == '\0') {
        !           148:                /* use default character set for this system */
        !           149:                if (strcmp(language, "ru") == 0)
        !           150:                        strcpy(charset, DEFAULT_RU_CSET);
        !           151:                else
        !           152:                        strcpy(charset, DEFAULT_CSET);
        !           153:        } else {
        !           154:                strlwr(charset);
        !           155:        }
        !           156: 
        !           157:        /* latin-1 and KOI8 are in internal format: no conversion needed */
        !           158:        if (!strcmp(charset, "latin1") || !strcmp(charset, "koi8") ||
        !           159:                !strcmp(charset, "noconv"))
        !           160:                return;
        !           161: 
        !           162:        if (!strcmp(charset, "alt_codes")) {
        !           163:                ext_c_ptr = intern2alt;
        !           164:                int_c_ptr = alt2intern;
        !           165:        } else if (!strcmp(charset, "cp850")) {
        !           166:                ext_c_ptr = intern2cp850;
        !           167:                int_c_ptr = cp8502intern;
        !           168:        } else if (!strcmp(charset, "ascii")) {
        !           169:                ext_c_ptr = intern2ascii;
        !           170:        } else {
        !           171:                fprintf(stderr, LANG("Unsupported character set: '%s'\n"), charset);
        !           172:        }
        !           173: }
        !           174: 
        !           175: char
        !           176: EXT_C(char c)
        !           177: {
        !           178:        if (c > '\0' || !ext_c_ptr)
        !           179:                return c;
        !           180:        return ext_c_ptr[c & 0x7f];
        !           181: }
        !           182: 
        !           183: char
        !           184: INT_C(char c)
        !           185: {
        !           186:        if (c > '\0' || !int_c_ptr)
        !           187:                return c;
        !           188:        return int_c_ptr[c & 0x7f];
        !           189: }
        !           190: 
        !           191: /*
        !           192:  * to_upper() and to_lower(), replacement for toupper() and tolower(),
        !           193:  * calling to_upper() on uppercase or to_lower on lowercase characters
        !           194:  * is handled correctly.
        !           195:  * 
        !           196:  * XXX: should handle local characterset when 8-bit userID's are allowed
        !           197:  */
        !           198: int
        !           199: to_upper(int c)
        !           200: {
        !           201:        return (c >= 'a' && c <= 'z' ? c - ('a' - 'A') : c);
        !           202: }
        !           203: 
        !           204: int
        !           205: to_lower(int c)
        !           206: {
        !           207:        return (c >= 'A' && c <= 'Z' ? c + ('a' - 'A') : c);
        !           208: }
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.