Annotation of pgp/src/charset.c, revision 1.1.1.1

1.1       root        1: /*
                      2:  * charset.c
                      3:  *
                      4:  * Conversion tables and routines to support different character sets.
                      5:  * The PGP internal format is latin-1.
                      6:  */
                      7: 
                      8: #include <stdio.h>
                      9: #include "usuals.h"
                     10: #include "language.h"
                     11: #include "fileio.h"
                     12: 
                     13: #ifndef NULL
                     14: #define        NULL    0
                     15: #endif
                     16: 
                     17: #define UNK    '?'
                     18: 
                     19: static unsigned char
                     20: intern2ascii[] = {  /* ISO 8859-1 Latin Alphabet 1 to US ASCII */
                     21: UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK,  
                     22: UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK, UNK,  
                     23:  32,  33,  99,  35,  36,  89, 124,  80,  34,  67,  97,  34, 126,  45,  82,  95,  
                     24: 111, UNK,  50,  51,  39, 117,  45,  45,  44,  49, 111,  34, UNK, UNK, UNK,  63,  
                     25:  65,  65,  65,  65,  65,  65,  65,  67,  69,  69,  69,  69,  73,  73,  73,  73,  
                     26:  68,  78,  79,  79,  79,  79,  79, 120,  79,  85,  85,  85,  85,  89,  84, 115,  
                     27:  97,  97,  97,  97,  97,  97,  97,  99, 101, 101, 101, 101, 105, 105, 105, 105,  
                     28: 100, 110, 111, 111, 111, 111, 111,  47, 111, 117, 117, 117, 117, 121, 116, 121 
                     29: };
                     30: 
                     31: static unsigned char
                     32: intern2cp850[] = {  /* ISO 8859-1 Latin Alphabet 1 (Latin-1) to IBM Code Page 850 */
                     33: 186, 205, 201, 187, 200, 188, 204, 185, 203, 202, 206, 223, 220, 219, 254, 242,
                     34: 179, 196, 218, 191, 192, 217, 195, 180, 194, 193, 197, 176, 177, 178, 213, 159,
                     35: 255, 173, 189, 156, 207, 190, 221, 245, 249, 184, 166, 174, 170, 240, 169, 238,
                     36: 248, 241, 253, 252, 239, 230, 244, 250, 247, 251, 167, 175, 172, 171, 243, 168,
                     37: 183, 181, 182, 199, 142, 143, 146, 128, 212, 144, 210, 211, 222, 214, 215, 216,
                     38: 209, 165, 227, 224, 226, 229, 153, 158, 157, 235, 233, 234, 154, 237, 232, 225,
                     39: 133, 160, 131, 198, 132, 134, 145, 135, 138, 130, 136, 137, 141, 161, 140, 139,
                     40: 208, 164, 149, 162, 147, 228, 148, 246, 155, 151, 163, 150, 129, 236, 231, 152
                     41: };
                     42: 
                     43: static unsigned char
                     44: cp8502intern[] = {  /* IBM Code Page 850 to Latin-1 */
                     45: 199, 252, 233, 226, 228, 224, 229, 231, 234, 235, 232, 239, 238, 236, 196, 197,
                     46: 201, 230, 198, 244, 246, 242, 251, 249, 255, 214, 220, 248, 163, 216, 215, 159,
                     47: 225, 237, 243, 250, 241, 209, 170, 186, 191, 174, 172, 189, 188, 161, 171, 187,
                     48: 155, 156, 157, 144, 151, 193, 194, 192, 169, 135, 128, 131, 133, 162, 165, 147,
                     49: 148, 153, 152, 150, 145, 154, 227, 195, 132, 130, 137, 136, 134, 129, 138, 164,
                     50: 240, 208, 202, 203, 200, 158, 205, 206, 207, 149, 146, 141, 140, 166, 204, 139,
                     51: 211, 223, 212, 210, 245, 213, 181, 254, 222, 218, 219, 217, 253, 221, 175, 180,
                     52: 173, 177, 143, 190, 182, 167, 247, 184, 176, 168, 183, 185, 179, 178, 142, 160
                     53: };
                     54: 
                     55: /* Russian language specific conversation section */
                     56: /* Two point-to-point charset decode tables */
                     57: 
                     58: /* Decode single char from KOI8 to ALT-CODES, if present */
                     59: static unsigned char intern2alt[] = {
                     60:        0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
                     61:        0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
                     62:        0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
                     63:        0xc8, 0xc9, 0xca, 0xff, 0xcc, 0xcd, 0xce, 0xcf,
                     64:        0xd0, 0xd1, 0xd2, 0xf1, 0xd4, 0xd5, 0xd6, 0xd7,
                     65:        0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
                     66:        0xd3, 0xf3, 0xf2, 0xf0, 0xf4, 0xf5, 0xf6, 0xf7,
                     67:        0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xcb,
                     68:        0xee, 0xa0, 0xa1, 0xe6, 0xa4, 0xa5, 0xe4, 0xa3,
                     69:        0xe5, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae,
                     70:        0xaf, 0xef, 0xe0, 0xe1, 0xe2, 0xe3, 0xa6, 0xa2,
                     71:        0xec, 0xeb, 0xa7, 0xe8, 0xed, 0xe9, 0xe7, 0xea,
                     72:        0x9e, 0x80, 0x81, 0x96, 0x84, 0x85, 0x94, 0x83,
                     73:        0x95, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
                     74:        0x8f, 0x9f, 0x90, 0x91, 0x92, 0x93, 0x86, 0x82,
                     75:        0x9c, 0x9b, 0x87, 0x98, 0x9d, 0x99, 0x97, 0x9a
                     76: };
                     77: 
                     78: /* Decode single char from ALT-CODES, if present, to KOI8 */
                     79: static unsigned char alt2intern[] = {
                     80:        0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa,
                     81:        0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0,
                     82:        0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe,
                     83:        0xfb, 0xfd, 0xff, 0xf9, 0xf8, 0xfc, 0xe0, 0xf1,
                     84:        0xc1, 0xc2, 0xd7, 0xc7, 0xc4, 0xc5, 0xd6, 0xda,
                     85:        0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
                     86:        0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
                     87:        0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
                     88:        0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
                     89:        0x98, 0x99, 0x9a, 0xbf, 0x9c, 0x9d, 0x9e, 0x9f,
                     90:        0xa0, 0xa1, 0xa2, 0xb0, 0xa4, 0xa5, 0xa6, 0xa7,
                     91:        0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
                     92:        0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0xde,
                     93:        0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1,
                     94:        0xb3, 0xa3, 0xb2, 0xb1, 0xb4, 0xb5, 0xb6, 0xb7,
                     95:        0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0x9b
                     96: };
                     97: 
                     98: /* End of Russian section */
                     99: 
                    100: /*
                    101:  * Most Unixes has KOI8, and DOS has ALT_CODE
                    102:  * If your Unix is non-standard, set CHARSET to "alt_codes"
                    103:  * in config.txt
                    104:  */
                    105: 
                    106: #ifndef        DEFAULT_CSET
                    107: #define        DEFAULT_CSET    "noconv"
                    108: #endif
                    109: #ifndef        DEFAULT_RU_CSET
                    110: #define        DEFAULT_RU_CSET "noconv"
                    111: #endif
                    112: 
                    113: 
                    114: int CONVERSION = NO_CONV;      /* None text file conversion at start time */
                    115: 
                    116: unsigned char *ext_c_ptr;
                    117: unsigned char *int_c_ptr;
                    118: 
                    119: char charset[64] = "";
                    120: 
                    121: void
                    122: init_charset()
                    123: {
                    124:        ext_c_ptr = NULL;       /* NULL means latin1 or KOI8 (internal format) */
                    125:        int_c_ptr = NULL;
                    126: 
                    127:        if (charset[0] == '\0')
                    128:        {       /* use default character set for this system */
                    129:                if (strcmp(language, "ru") == 0)
                    130:                        strcpy(charset, DEFAULT_RU_CSET);
                    131:                else
                    132:                        strcpy(charset, DEFAULT_CSET);
                    133:        }
                    134:        else
                    135:                strlwr(charset);
                    136: 
                    137:        /* latin-1 and KOI8 are in internal format: no conversion needed */
                    138:        if (!strcmp(charset, "latin1") || !strcmp(charset, "koi8") ||
                    139:                !strcmp(charset, "noconv"))
                    140:                return;
                    141: 
                    142:        if (!strcmp(charset, "alt_codes"))
                    143:        {       ext_c_ptr = intern2alt;
                    144:                int_c_ptr = alt2intern;
                    145:        } else if (!strcmp(charset, "cp850"))
                    146:        {       ext_c_ptr = intern2cp850;
                    147:                int_c_ptr = cp8502intern;
                    148:        } else if (!strcmp(charset, "ascii"))
                    149:        {       ext_c_ptr = intern2ascii;
                    150:        } else
                    151:        {
                    152:                fprintf(stderr, PSTR("Unsupported character set: '%s'\n"), charset);
                    153:        }
                    154: }
                    155: 
                    156: char
                    157: EXT_C(char c)
                    158: {
                    159:        if (c > '\0' || !ext_c_ptr)
                    160:                return c;
                    161:        return ext_c_ptr[c & 0x7f];
                    162: }
                    163: 
                    164: char
                    165: INT_C(char c)
                    166: {
                    167:        if (c > '\0' || !int_c_ptr)
                    168:                return c;
                    169:        return int_c_ptr[c & 0x7f];
                    170: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.