|
|
1.1.1.3 ! root 1: /* ! 2: * language.c - Foreign language translation for PGP ! 3: * Finds foreign language "subtitles" for English phrases ! 4: * in external foriegn language text file. ! 5: */ ! 6: ! 7: #include <stdio.h> ! 8: #include <stdlib.h> ! 9: #include <string.h> ! 10: #include <ctype.h> ! 11: #include "usuals.h" ! 12: #ifndef LANGTOOL ! 13: #include "fileio.h" ! 14: #include "language.h" ! 15: #include "pgp.h" ! 16: #else ! 17: #define MAX_PATH 255 ! 18: boolean verbose; ! 19: long fsize(); ! 20: #endif ! 21: ! 22: char langfile[80] = "language.txt"; ! 23: #define LANG_INDEXFILE "language.idx" ! 24: ! 25: #define STRBUFSIZE 2048 ! 26: ! 27: char language[16] = "en"; /* The language code, defaults to English */ ! 28: static char *strbuf; ! 29: static char lang[16]; /* readstr sets this to the language id of the msg it last read */ ! 30: static int subtitles_available = 0; ! 31: static int line = 0; ! 32: static int errcount = 0; ! 33: /* subtitles_available is used to determine if we know whether the special ! 34: subtitles_file exists. subtitles_available has the following values: ! 35: 0 = first time thru, we don't yet know if subtitles_file exists. ! 36: 1 = we have already determined that subtitles_file exists. ! 37: -1 = we have already determined that subtitles_file does not exist. ! 38: */ ! 39: ! 40: static void error(char *); ! 41: ! 42: #define NEWLINE 0 ! 43: #define COMMENT 1 ! 44: #define INSTRING 2 ! 45: #define ESCAPE 3 ! 46: #define IDENT 4 ! 47: #define DONE 5 ! 48: #define ERROR 6 ! 49: #define ERR1 7 ! 50: ! 51: /* Look for and return a quoted string from the file. ! 52: * If nlabort is true, return failure if we find a blank line ! 53: * before we find the opening quote. ! 54: */ ! 55: static char * ! 56: readstr (FILE *f, char *buf, int nlabort) ! 57: { ! 58: int c, d; ! 59: char *p = buf; ! 60: int state = NEWLINE; ! 61: int i = 0; ! 62: ! 63: while ((c = getc(f)) != EOF) { ! 64: if (c == '\r') ! 65: continue; ! 66: /* line numbers are only incremented when creating index file */ ! 67: if (line && c == '\n') ! 68: ++line; ! 69: switch (state) { ! 70: case NEWLINE: ! 71: switch(c) { ! 72: case '#': state = COMMENT; break; ! 73: case '"': state = INSTRING; break; ! 74: case '\n': ! 75: if (nlabort) { ! 76: *buf = '\0'; ! 77: return(buf); ! 78: } ! 79: default: ! 80: if (i == 0 && isalnum(c)) { ! 81: state = IDENT; ! 82: lang[i++] = c; ! 83: break; ! 84: } ! 85: if (!isspace(c)) { ! 86: error("syntax error\n"); ! 87: state = ERROR; ! 88: } ! 89: } ! 90: break; ! 91: case COMMENT: ! 92: if (c == '\n') ! 93: state = NEWLINE; ! 94: break; ! 95: case INSTRING: ! 96: switch(c) { ! 97: case '\\': state = ESCAPE; break; ! 98: case '"': state = DONE; break; ! 99: default: *p++ = c; ! 100: } ! 101: break; ! 102: case ESCAPE: ! 103: switch (c) { ! 104: case 'n': *p++ = '\n'; break; ! 105: case 'r': *p++ = '\r'; break; ! 106: case 't': *p++ = '\t'; break; ! 107: case 'e': *p++ = '\033'; break; ! 108: case 'a': *p++ = '\007'; break; ! 109: case '#': ! 110: case '"': ! 111: case '\\': *p++ = c; break; ! 112: case '\n': break; ! 113: case '0': ! 114: case '1': ! 115: case '2': ! 116: case '3': ! 117: case '4': ! 118: case '5': ! 119: case '6': ! 120: case '7': ! 121: /* ANSI C rules: up to 3 octal digits */ ! 122: d = c - '0'; ! 123: if ((c = getc(f)) >= '0' && c <= '7') { ! 124: d = (d<<3) + (c-'0'); ! 125: if ((c = getc(f)) >= '0' && c <= '7') ! 126: d = (d<<3) + (c-'0'); ! 127: else ! 128: ungetc(c, f); ! 129: } else { ! 130: ungetc(c, f); ! 131: } ! 132: *p++ = d; ! 133: break; ! 134: default: ! 135: error("illegal escape sequence: "); ! 136: fprintf(stderr, "'\\%c'\n", c); ! 137: break; ! 138: } ! 139: state = INSTRING; ! 140: break; ! 141: case IDENT: /* language identifier */ ! 142: if (c == ':') { ! 143: state = NEWLINE; ! 144: break; ! 145: } ! 146: if (c == '\n' && strncmp(lang, "No translation", 14) == 0) ! 147: { ! 148: i = 0; ! 149: state = NEWLINE; ! 150: break; ! 151: } ! 152: lang[i++] = c; ! 153: if (i == 15 || !isalnum(c) && !isspace(c)) { ! 154: lang[i] = '\0'; ! 155: error("bad language identifier\n"); ! 156: state = ERROR; ! 157: i = 0; ! 158: } ! 159: break; ! 160: case DONE: ! 161: if (c == '\n') { ! 162: lang[i] = '\0'; ! 163: *p = '\0'; ! 164: return(buf); ! 165: } ! 166: if (!isspace(c)) { ! 167: error("extra characters after '\"'\n"); ! 168: state = ERROR; ! 169: } ! 170: break; ! 171: case ERROR: ! 172: if (c == '\n') ! 173: state = ERR1; ! 174: break; ! 175: case ERR1: ! 176: state = (c == '\n' ? NEWLINE : ERROR); ! 177: break; ! 178: } ! 179: } ! 180: if (state != NEWLINE) ! 181: error("unexpected EOF\n"); ! 182: return(NULL); ! 183: } ! 184: ! 185: ! 186: static struct indx_ent { ! 187: word32 crc; ! 188: long offset; ! 189: } *indx_tbl = NULL; ! 190: ! 191: static int max_msgs = 0; ! 192: static int nmsg = 0; ! 193: ! 194: static FILE *langf; ! 195: ! 196: static struct { ! 197: long lang_fsize; /* size of language.txt */ ! 198: char lang[16]; /* language identifier */ ! 199: int nmsg; /* number of messages */ ! 200: } indx_hdr; ! 201: ! 202: ! 203: static int make_indexfile(char *); ! 204: word32 crcupdate(byte, word32); ! 205: void init_crc(); ! 206: ! 207: /* ! 208: * uses 24-bit CRC function from armor.c ! 209: */ ! 210: static word32 ! 211: message_crc(char *s) ! 212: { ! 213: word32 crc = 0; ! 214: ! 215: while (*s) ! 216: crc = crcupdate(*s++, crc); ! 217: return(crc); ! 218: } ! 219: ! 220: /* ! 221: * lookup file offset in indx_tbl ! 222: */ ! 223: static long ! 224: lookup_offset(word32 crc) ! 225: { ! 226: int i; ! 227: ! 228: for (i = 0; i < nmsg; ++i) ! 229: if (indx_tbl[i].crc == crc) ! 230: return(indx_tbl[i].offset); ! 231: return(-1); ! 232: } ! 233: ! 234: ! 235: ! 236: #ifndef LANGTOOL ! 237: static void init_lang(); ! 238: ! 239: /* ! 240: * return foreign translation of s ! 241: */ ! 242: char * ! 243: PSTR (char *s) ! 244: { ! 245: long filepos; ! 246: ! 247: if (subtitles_available == 0) ! 248: init_lang(); ! 249: if (subtitles_available < 0) ! 250: return(s); ! 251: ! 252: filepos = lookup_offset(message_crc(s)); ! 253: if (filepos == -1) { ! 254: return(s); ! 255: } else { ! 256: fseek(langf, filepos, SEEK_SET); ! 257: readstr(langf, strbuf, 1); ! 258: } ! 259: ! 260: if (strbuf[0] == '\0') ! 261: return(s); ! 262: ! 263: for (s = strbuf; *s; ++s) ! 264: *s = EXT_C(*s); ! 265: return(strbuf); ! 266: } ! 267: ! 268: /* ! 269: * initialize the index table: read it from language.idx or create ! 270: * a new one and write it to the index file. A new index file is ! 271: * created if the language set in config.pgp doesn't match the one ! 272: * in language.idx or if the size of language.txt has changed. ! 273: */ ! 274: static void ! 275: init_lang() ! 276: { ! 277: char indexfile[MAX_PATH]; ! 278: char subtitles_file[MAX_PATH]; ! 279: FILE *indexf; ! 280: ! 281: if (strcmp(language, "en") == 0) { ! 282: subtitles_available = -1; ! 283: return; /* use default messages */ ! 284: } ! 285: ! 286: buildfilename (subtitles_file, langfile); ! 287: if ((langf = fopen(subtitles_file, "rb")) == NULL) { ! 288: subtitles_available = -1; ! 289: return; ! 290: } ! 291: init_crc(); ! 292: if ((strbuf = (char *) malloc(STRBUFSIZE)) == NULL) { ! 293: fprintf(stderr, "Not enough memory for foreign subtitles\n"); ! 294: fclose(langf); ! 295: subtitles_available = -1; ! 296: return; ! 297: } ! 298: buildfilename(indexfile, LANG_INDEXFILE); ! 299: if ((indexf = fopen(indexfile, "rb")) != NULL) { ! 300: if (fread(&indx_hdr, sizeof(indx_hdr), 1, indexf) == 1 && ! 301: indx_hdr.lang_fsize == fsize(langf) && ! 302: strcmp(indx_hdr.lang, language) == 0) ! 303: { ! 304: nmsg = indx_hdr.nmsg; ! 305: indx_tbl = (struct indx_ent *) malloc(nmsg * sizeof(struct indx_ent)); ! 306: if (indx_tbl == NULL) { ! 307: fprintf(stderr, "Not enough memory for foreign subtitles\n"); ! 308: fclose(indexf); ! 309: fclose(langf); ! 310: subtitles_available = -1; ! 311: return; ! 312: } ! 313: if (fread(indx_tbl, sizeof(struct indx_ent), nmsg, indexf) != nmsg) ! 314: { ! 315: free(indx_tbl); /* create a new one */ ! 316: indx_tbl = NULL; ! 317: } ! 318: } ! 319: fclose(indexf); ! 320: } ! 321: if (indx_tbl == NULL && make_indexfile(indexfile) < 0) { ! 322: fclose(langf); ! 323: subtitles_available = -1; ! 324: } else { ! 325: subtitles_available = 1; ! 326: } ! 327: } ! 328: #endif /* !LANGTOOL */ ! 329: ! 330: ! 331: /* ! 332: * build the index table in memory, and if indexfile is not NULL, ! 333: * write it to this file ! 334: */ ! 335: static int ! 336: make_indexfile(char *indexfile) ! 337: { ! 338: FILE *indexf; ! 339: long filepos; ! 340: int total_msgs = 0; ! 341: char *res; ! 342: ! 343: rewind(langf); ! 344: indx_hdr.lang_fsize = fsize(langf); ! 345: strncpy(indx_hdr.lang, language, 15); ! 346: init_crc(); ! 347: line = 1; ! 348: nmsg = 0; ! 349: while (readstr(langf, strbuf, 0)) { ! 350: if (nmsg == max_msgs) { ! 351: if (max_msgs) { ! 352: max_msgs *= 2; ! 353: indx_tbl = (struct indx_ent *) realloc(indx_tbl, max_msgs * ! 354: sizeof(struct indx_ent)); ! 355: } else { ! 356: max_msgs = 400; ! 357: indx_tbl = (struct indx_ent *) malloc(max_msgs * ! 358: sizeof(struct indx_ent)); ! 359: } ! 360: if (indx_tbl == NULL) { ! 361: fprintf(stderr, "Not enough memory for foreign subtitles\n"); ! 362: return(-1); ! 363: } ! 364: } ! 365: ++total_msgs; ! 366: indx_tbl[nmsg].crc = message_crc(strbuf); ! 367: if (lookup_offset(indx_tbl[nmsg].crc) != -1) ! 368: error("message CRC not unique.\n"); ! 369: do { ! 370: filepos = ftell(langf); ! 371: res = readstr (langf, strbuf, 1); /* Abort if find newline first */ ! 372: if (*language == '\0') /* use first language found */ ! 373: strcpy(language, lang); ! 374: } while (res && strbuf[0] != '\0' && strcmp(language, lang) != 0); ! 375: ! 376: if (res == NULL) ! 377: break; ! 378: if (strbuf[0] == '\0') /* No translation */ ! 379: continue; ! 380: ! 381: indx_tbl[nmsg].offset = filepos; ! 382: ++nmsg; ! 383: do ! 384: res = readstr (langf, strbuf, 1); /* Abort if find newline first */ ! 385: while (res && strbuf[0] != '\0'); ! 386: } ! 387: line = 0; ! 388: indx_hdr.nmsg = nmsg; ! 389: if (verbose) ! 390: fprintf(stderr, "%s: %d messages, %d translations for language \"%s\"\n", ! 391: langfile, total_msgs, nmsg, language); ! 392: if (nmsg == 0) { ! 393: fprintf(stderr, "No translations available for language \"%s\"\n\n", ! 394: language); ! 395: return(-1); ! 396: } ! 397: ! 398: if (indexfile) { ! 399: if ((indexf = fopen(indexfile, "wb")) == NULL) ! 400: fprintf(stderr, "Cannot create %s\n", indexfile); ! 401: else { ! 402: fwrite(&indx_hdr, 1, sizeof(indx_hdr), indexf); ! 403: fwrite(indx_tbl, sizeof(struct indx_ent), nmsg, indexf); ! 404: if (ferror(indexf) || fclose(indexf)) ! 405: fprintf(stderr, "error writing %s\n", indexfile); ! 406: } ! 407: } ! 408: return(0); ! 409: } ! 410: ! 411: static void ! 412: error(char *s) ! 413: { ! 414: ++errcount; ! 415: if (langfile[0]) ! 416: fprintf(stderr, "%s:", langfile); ! 417: if (line) ! 418: fprintf(stderr, "%d:", line); ! 419: fprintf(stderr, " %s", s); ! 420: } ! 421: ! 422: #ifdef LANGTOOL ! 423: /* ! 424: * language string tool for manipulating language files ! 425: * link with CRC routines from armor.c ! 426: */ ! 427: ! 428: #define CMD_EXTRACT 1 ! 429: #define CMD_CHECK 2 ! 430: #define CMD_MERGE 3 ! 431: ! 432: extern char *optarg; ! 433: extern int optind; ! 434: ! 435: main(int argc, char **argv) ! 436: { ! 437: int opt, cmd = 0, rc = 0; ! 438: char *langIDs[16]; ! 439: char *outfile = NULL; ! 440: ! 441: init_crc(); ! 442: if ((strbuf = (char *) malloc(STRBUFSIZE)) == NULL) { ! 443: perror(argv[0]); ! 444: exit(1); ! 445: } ! 446: while ((opt = getopt(argc, argv, "cxmo:")) != EOF) { ! 447: switch (opt) { ! 448: case 'c': cmd = CMD_CHECK; break; ! 449: case 'x': cmd = CMD_EXTRACT; break; ! 450: case 'm': cmd = CMD_MERGE; break; ! 451: case 'o': outfile = optarg; break; ! 452: default: usage(); ! 453: } ! 454: } ! 455: argc -= optind; argv += optind; ! 456: switch (cmd) { ! 457: case CMD_EXTRACT: ! 458: if (argc < 2) ! 459: usage(); ! 460: rc = extract(argv[0], outfile, &argv[1]); ! 461: break; ! 462: case CMD_MERGE: ! 463: if (argc < 2) ! 464: usage(); ! 465: rc = merge(argv[0], argv[1], outfile, argv[2]); ! 466: break; ! 467: case CMD_CHECK: ! 468: verbose = 1; ! 469: if (argc == 0) ! 470: checkfile("language.txt"); ! 471: else ! 472: while (--argc >= 0) ! 473: checkfile(*argv++); ! 474: break; ! 475: default: usage(); ! 476: } ! 477: exit(rc); ! 478: } ! 479: ! 480: usage() ! 481: { ! 482: fprintf(stderr, "usage: langtool -[x|c|m] [-o outputfile] ...\n\n\ ! 483: To extract one or more languages from a merged file:\n\ ! 484: langtool -x [-o outputfile] file langID...\n\n\ ! 485: To check a language file for syntax errors:\n\ ! 486: langtool -c file...\n\n\ ! 487: To merge language \"lang\" from lang_file with source_file:\n\ ! 488: langtool -m [-o outputfile] source_file lang_file [lang]\n"); ! 489: exit(1); ! 490: } ! 491: ! 492: merge(char *base_file, char *lang_file, char *outfile, char *langID) ! 493: { ! 494: FILE *fp, *outf; ! 495: long fpos = 0, filepos; ! 496: int newmsgs = 0; ! 497: ! 498: if ((langf = fopen(lang_file, "r")) == NULL) { ! 499: perror(lang_file); ! 500: return -1; ! 501: } ! 502: strcpy(langfile, lang_file); ! 503: if (langID) ! 504: strcpy(language, langID); ! 505: else ! 506: language[0] = '\0'; /* use first language found */ ! 507: ! 508: errcount = 0; ! 509: make_indexfile(NULL); ! 510: if (errcount) ! 511: return -1; ! 512: ! 513: langfile[0] = '\0'; /* don't print filename in error msgs */ ! 514: ! 515: if ((fp = fopen(base_file, "r")) == NULL) { ! 516: perror(base_file); ! 517: return -1; ! 518: } ! 519: if (outfile == NULL) ! 520: outf = stdout; ! 521: else { ! 522: if ((outf = fopen(outfile, "w")) == NULL) { ! 523: perror(outfile); ! 524: return(-1); ! 525: } ! 526: } ! 527: ! 528: while (readstr(fp, strbuf, 0)) { ! 529: copypos(fp, outf, fpos); ! 530: fpos = ftell(fp); ! 531: ! 532: filepos = lookup_offset(message_crc(strbuf)); ! 533: if (filepos == -1) { ! 534: fprintf(outf, "No translation\n"); ! 535: ++newmsgs; ! 536: } else { ! 537: fseek(langf, filepos, SEEK_SET); ! 538: readstr(langf, strbuf, 1); ! 539: copypos(langf, outf, filepos); ! 540: } ! 541: ! 542: while (readstr(fp, strbuf, 1)) ! 543: if (*strbuf == '\0') ! 544: break; ! 545: } ! 546: copypos(fp, outf, fpos); ! 547: fflush(outf); ! 548: if (ferror(outf)) { ! 549: perror(outfile); ! 550: return -1; ! 551: } ! 552: if (newmsgs) ! 553: fprintf(stderr, "%d untranslated messages\n", newmsgs); ! 554: return errcount; ! 555: } ! 556: ! 557: extract(char *infile, char *outfile, char **langIDs) ! 558: { ! 559: FILE *fp, *outf; ! 560: long fpos = 0; ! 561: char **langID; ! 562: ! 563: if ((fp = fopen(infile, "r")) == NULL) { ! 564: perror(infile); ! 565: return -1; ! 566: } ! 567: if (outfile == NULL) { ! 568: outf = stdout; ! 569: } else { ! 570: if ((outf = fopen(outfile, "w")) == NULL) { ! 571: perror(outfile); ! 572: fclose(fp); ! 573: return(-1); ! 574: } ! 575: } ! 576: ! 577: while (readstr(fp, strbuf, 0)) { ! 578: copypos(fp, outf, fpos); ! 579: fpos = ftell(fp); ! 580: while (readstr(fp, strbuf, 1)) { ! 581: if (*strbuf == '\0') ! 582: break; ! 583: for (langID = langIDs; *langID; ++langID) { ! 584: if (strcmp(lang, *langID) == 0) ! 585: copypos(fp, outf, fpos); ! 586: } ! 587: fpos = ftell(fp); ! 588: } ! 589: } ! 590: copypos(fp, outf, fpos); ! 591: fflush(outf); ! 592: if (ferror(outf)) { ! 593: perror(outfile); ! 594: return -1; ! 595: } ! 596: return 0; ! 597: } ! 598: ! 599: checkfile(char *name) ! 600: { ! 601: if ((langf = fopen(name, "rb")) == NULL) { ! 602: perror(name); ! 603: return -1; ! 604: } ! 605: strcpy(langfile, name); ! 606: language[0] = '\0'; /* count messages for first language */ ! 607: errcount = 0; ! 608: make_indexfile(NULL); ! 609: fclose(langf); ! 610: return errcount; ! 611: } ! 612: ! 613: copypos(FILE *f, FILE *g, long pos) ! 614: { ! 615: long size; ! 616: size = ftell(f) - pos; ! 617: fseek(f, pos, SEEK_SET); ! 618: copyfile(f, g, size); ! 619: } ! 620: ! 621: copyfile(FILE *f, FILE *g, long n) ! 622: { ! 623: int c; ! 624: ! 625: while (--n >= 0 && (c = getc(f)) != EOF) ! 626: putc(c, g); ! 627: } ! 628: ! 629: long ! 630: fsize(FILE *f) ! 631: { ! 632: long len, pos; ! 633: pos = ftell(f); ! 634: fseek(f, 0L, SEEK_END); ! 635: len = ftell(f); ! 636: fseek(f, pos, SEEK_SET); ! 637: return len; ! 638: } ! 639: #endif /* LANGTOOL */
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.