|
|
1.1 root 1: /*
2: * language.c - Foreign language translation for PGP
3: * Finds foreign language "subtitles" for English phrases
4: * in external foriegn language text file.
5: */
6:
7: #include <stdio.h>
8: #include <stdlib.h>
9: #include <string.h>
10: #include <ctype.h>
11: #include "usuals.h"
12: #ifndef LANGTOOL
13: #include "fileio.h"
14: #include "language.h"
15: #include "pgp.h"
16: #else
17: #define MAX_PATH 255
18: boolean verbose;
19: long fsize();
20: #endif
21:
22: char langfile[80] = "language.txt";
23: #define LANG_INDEXFILE "language.idx"
24:
25: #define STRBUFSIZE 2048
26:
27: char language[16] = "en"; /* The language code, defaults to English */
28: static char *strbuf;
29: static char lang[16]; /* readstr sets this to the language id of the msg it last read */
30: static int subtitles_available = 0;
31: static int line = 0;
32: static int errcount = 0;
33: /* subtitles_available is used to determine if we know whether the special
34: subtitles_file exists. subtitles_available has the following values:
35: 0 = first time thru, we don't yet know if subtitles_file exists.
36: 1 = we have already determined that subtitles_file exists.
37: -1 = we have already determined that subtitles_file does not exist.
38: */
39:
40: static void error(char *);
41:
42: #define NEWLINE 0
43: #define COMMENT 1
44: #define INSTRING 2
45: #define ESCAPE 3
46: #define IDENT 4
47: #define DONE 5
48: #define ERROR 6
49: #define ERR1 7
50:
51: /* Look for and return a quoted string from the file.
52: * If nlabort is true, return failure if we find a blank line
53: * before we find the opening quote.
54: */
55: static char *
56: readstr (FILE *f, char *buf, int nlabort)
57: {
58: int c, d;
59: char *p = buf;
60: int state = NEWLINE;
61: int i = 0;
62:
63: while ((c = getc(f)) != EOF) {
64: if (c == '\r')
65: continue;
66: /* line numbers are only incremented when creating index file */
67: if (line && c == '\n')
68: ++line;
69: switch (state) {
70: case NEWLINE:
71: switch(c) {
72: case '#': state = COMMENT; break;
73: case '"': state = INSTRING; break;
74: case '\n':
75: if (nlabort) {
76: *buf = '\0';
77: return(buf);
78: }
79: default:
80: if (i == 0 && isalnum(c)) {
81: state = IDENT;
82: lang[i++] = c;
83: break;
84: }
85: if (!isspace(c)) {
86: error("syntax error\n");
87: state = ERROR;
88: }
89: }
90: break;
91: case COMMENT:
92: if (c == '\n')
93: state = NEWLINE;
94: break;
95: case INSTRING:
96: switch(c) {
97: case '\\': state = ESCAPE; break;
98: case '"': state = DONE; break;
99: default: *p++ = c;
100: }
101: break;
102: case ESCAPE:
103: switch (c) {
104: case 'n': *p++ = '\n'; break;
105: case 'r': *p++ = '\r'; break;
106: case 't': *p++ = '\t'; break;
107: case 'e': *p++ = '\033'; break;
108: case 'a': *p++ = '\007'; break;
109: case '#':
110: case '"':
111: case '\\': *p++ = c; break;
112: case '\n': break;
113: case '0':
114: case '1':
115: case '2':
116: case '3':
117: case '4':
118: case '5':
119: case '6':
120: case '7':
121: /* ANSI C rules: up to 3 octal digits */
122: d = c - '0';
123: if ((c = getc(f)) >= '0' && c <= '7') {
124: d = (d<<3) + (c-'0');
125: if ((c = getc(f)) >= '0' && c <= '7')
126: d = (d<<3) + (c-'0');
127: else
128: ungetc(c, f);
129: } else {
130: ungetc(c, f);
131: }
132: *p++ = d;
133: break;
134: default:
135: error("illegal escape sequence: ");
136: fprintf(stderr, "'\\%c'\n", c);
137: break;
138: }
139: state = INSTRING;
140: break;
141: case IDENT: /* language identifier */
142: if (c == ':') {
143: state = NEWLINE;
144: break;
145: }
146: if (c == '\n' && strncmp(lang, "No translation", 14) == 0)
147: {
148: i = 0;
149: state = NEWLINE;
150: break;
151: }
152: lang[i++] = c;
153: if (i == 15 || !isalnum(c) && !isspace(c)) {
154: lang[i] = '\0';
155: error("bad language identifier\n");
156: state = ERROR;
157: i = 0;
158: }
159: break;
160: case DONE:
161: if (c == '\n') {
162: lang[i] = '\0';
163: *p = '\0';
164: return(buf);
165: }
166: if (!isspace(c)) {
167: error("extra characters after '\"'\n");
168: state = ERROR;
169: }
170: break;
171: case ERROR:
172: if (c == '\n')
173: state = ERR1;
174: break;
175: case ERR1:
176: state = (c == '\n' ? NEWLINE : ERROR);
177: break;
178: }
179: }
180: if (state != NEWLINE)
181: error("unexpected EOF\n");
182: return(NULL);
183: }
184:
185:
186: static struct indx_ent {
187: word32 crc;
188: long offset;
189: } *indx_tbl = NULL;
190:
191: static int max_msgs = 0;
192: static int nmsg = 0;
193:
194: static FILE *langf;
195:
196: static struct {
197: long lang_fsize; /* size of language.txt */
198: char lang[16]; /* language identifier */
199: int nmsg; /* number of messages */
200: } indx_hdr;
201:
202:
203: static int make_indexfile(char *);
204: word32 crcupdate(byte, word32);
205: void init_crc();
206:
207: /*
208: * uses 24-bit CRC function from armor.c
209: */
210: static word32
211: message_crc(char *s)
212: {
213: word32 crc = 0;
214:
215: while (*s)
216: crc = crcupdate(*s++, crc);
217: return(crc);
218: }
219:
220: /*
221: * lookup file offset in indx_tbl
222: */
223: static long
224: lookup_offset(word32 crc)
225: {
226: int i;
227:
228: for (i = 0; i < nmsg; ++i)
229: if (indx_tbl[i].crc == crc)
230: return(indx_tbl[i].offset);
231: return(-1);
232: }
233:
234:
235:
236: #ifndef LANGTOOL
237: static void init_lang();
238:
239: /*
240: * return foreign translation of s
241: */
242: char *
243: PSTR (char *s)
244: {
245: long filepos;
246:
247: if (subtitles_available == 0)
248: init_lang();
249: if (subtitles_available < 0)
250: return(s);
251:
252: filepos = lookup_offset(message_crc(s));
253: if (filepos == -1) {
254: return(s);
255: } else {
256: fseek(langf, filepos, SEEK_SET);
257: readstr(langf, strbuf, 1);
258: }
259:
260: if (strbuf[0] == '\0')
261: return(s);
262:
263: for (s = strbuf; *s; ++s)
264: *s = EXT_C(*s);
265: return(strbuf);
266: }
267:
268: /*
269: * initialize the index table: read it from language.idx or create
270: * a new one and write it to the index file. A new index file is
271: * created if the language set in config.pgp doesn't match the one
272: * in language.idx or if the size of language.txt has changed.
273: */
274: static void
275: init_lang()
276: {
277: char indexfile[MAX_PATH];
278: char subtitles_file[MAX_PATH];
279: FILE *indexf;
280:
281: if (strcmp(language, "en") == 0) {
282: subtitles_available = -1;
283: return; /* use default messages */
284: }
285:
286: buildfilename (subtitles_file, langfile);
287: if ((langf = fopen(subtitles_file, "rb")) == NULL) {
288: subtitles_available = -1;
289: return;
290: }
291: init_crc();
292: if ((strbuf = (char *) malloc(STRBUFSIZE)) == NULL) {
293: fprintf(stderr, "Not enough memory for foreign subtitles\n");
294: fclose(langf);
295: subtitles_available = -1;
296: return;
297: }
298: buildfilename(indexfile, LANG_INDEXFILE);
299: if ((indexf = fopen(indexfile, "rb")) != NULL) {
300: if (fread(&indx_hdr, sizeof(indx_hdr), 1, indexf) == 1 &&
301: indx_hdr.lang_fsize == fsize(langf) &&
302: strcmp(indx_hdr.lang, language) == 0)
303: {
304: nmsg = indx_hdr.nmsg;
305: indx_tbl = (struct indx_ent *) malloc(nmsg * sizeof(struct indx_ent));
306: if (indx_tbl == NULL) {
307: fprintf(stderr, "Not enough memory for foreign subtitles\n");
308: fclose(indexf);
309: fclose(langf);
310: subtitles_available = -1;
311: return;
312: }
313: if (fread(indx_tbl, sizeof(struct indx_ent), nmsg, indexf) != nmsg)
314: {
315: free(indx_tbl); /* create a new one */
316: indx_tbl = NULL;
317: }
318: }
319: fclose(indexf);
320: }
321: if (indx_tbl == NULL && make_indexfile(indexfile) < 0) {
322: fclose(langf);
323: subtitles_available = -1;
324: } else {
325: subtitles_available = 1;
326: }
327: }
328: #endif /* !LANGTOOL */
329:
330:
331: /*
332: * build the index table in memory, and if indexfile is not NULL,
333: * write it to this file
334: */
335: static int
336: make_indexfile(char *indexfile)
337: {
338: FILE *indexf;
339: long filepos;
340: int total_msgs = 0;
341: char *res;
342:
343: rewind(langf);
344: indx_hdr.lang_fsize = fsize(langf);
345: strncpy(indx_hdr.lang, language, 15);
346: init_crc();
347: line = 1;
348: nmsg = 0;
349: while (readstr(langf, strbuf, 0)) {
350: if (nmsg == max_msgs) {
351: if (max_msgs) {
352: max_msgs *= 2;
353: indx_tbl = (struct indx_ent *) realloc(indx_tbl, max_msgs *
354: sizeof(struct indx_ent));
355: } else {
356: max_msgs = 400;
357: indx_tbl = (struct indx_ent *) malloc(max_msgs *
358: sizeof(struct indx_ent));
359: }
360: if (indx_tbl == NULL) {
361: fprintf(stderr, "Not enough memory for foreign subtitles\n");
362: return(-1);
363: }
364: }
365: ++total_msgs;
366: indx_tbl[nmsg].crc = message_crc(strbuf);
367: if (lookup_offset(indx_tbl[nmsg].crc) != -1)
368: error("message CRC not unique.\n");
369: do {
370: filepos = ftell(langf);
371: res = readstr (langf, strbuf, 1); /* Abort if find newline first */
372: if (*language == '\0') /* use first language found */
373: strcpy(language, lang);
374: } while (res && strbuf[0] != '\0' && strcmp(language, lang) != 0);
375:
376: if (res == NULL)
377: break;
378: if (strbuf[0] == '\0') /* No translation */
379: continue;
380:
381: indx_tbl[nmsg].offset = filepos;
382: ++nmsg;
383: do
384: res = readstr (langf, strbuf, 1); /* Abort if find newline first */
385: while (res && strbuf[0] != '\0');
386: }
387: line = 0;
388: indx_hdr.nmsg = nmsg;
389: if (verbose)
390: fprintf(stderr, "%s: %d messages, %d translations for language \"%s\"\n",
391: langfile, total_msgs, nmsg, language);
392: if (nmsg == 0) {
393: fprintf(stderr, "No translations available for language \"%s\"\n\n",
394: language);
395: return(-1);
396: }
397:
398: if (indexfile) {
399: if ((indexf = fopen(indexfile, "wb")) == NULL)
400: fprintf(stderr, "Cannot create %s\n", indexfile);
401: else {
402: fwrite(&indx_hdr, 1, sizeof(indx_hdr), indexf);
403: fwrite(indx_tbl, sizeof(struct indx_ent), nmsg, indexf);
404: if (ferror(indexf) || fclose(indexf))
405: fprintf(stderr, "error writing %s\n", indexfile);
406: }
407: }
408: return(0);
409: }
410:
411: static void
412: error(char *s)
413: {
414: ++errcount;
415: if (langfile[0])
416: fprintf(stderr, "%s:", langfile);
417: if (line)
418: fprintf(stderr, "%d:", line);
419: fprintf(stderr, " %s", s);
420: }
421:
422: #ifdef LANGTOOL
423: /*
424: * language string tool for manipulating language files
425: * link with CRC routines from armor.c
426: */
427:
428: #define CMD_EXTRACT 1
429: #define CMD_CHECK 2
430: #define CMD_MERGE 3
431:
432: extern char *optarg;
433: extern int optind;
434:
435: main(int argc, char **argv)
436: {
437: int opt, cmd = 0, rc = 0;
438: char *langIDs[16];
439: char *outfile = NULL;
440:
441: init_crc();
442: if ((strbuf = (char *) malloc(STRBUFSIZE)) == NULL) {
443: perror(argv[0]);
444: exit(1);
445: }
446: while ((opt = getopt(argc, argv, "cxmo:")) != EOF) {
447: switch (opt) {
448: case 'c': cmd = CMD_CHECK; break;
449: case 'x': cmd = CMD_EXTRACT; break;
450: case 'm': cmd = CMD_MERGE; break;
451: case 'o': outfile = optarg; break;
452: default: usage();
453: }
454: }
455: argc -= optind; argv += optind;
456: switch (cmd) {
457: case CMD_EXTRACT:
458: if (argc < 2)
459: usage();
460: rc = extract(argv[0], outfile, &argv[1]);
461: break;
462: case CMD_MERGE:
463: if (argc < 2)
464: usage();
465: rc = merge(argv[0], argv[1], outfile, argv[2]);
466: break;
467: case CMD_CHECK:
468: verbose = 1;
469: if (argc == 0)
470: checkfile("language.txt");
471: else
472: while (--argc >= 0)
473: checkfile(*argv++);
474: break;
475: default: usage();
476: }
477: exit(rc);
478: }
479:
480: usage()
481: {
482: fprintf(stderr, "usage: langtool -[x|c|m] [-o outputfile] ...\n\n\
483: To extract one or more languages from a merged file:\n\
484: langtool -x [-o outputfile] file langID...\n\n\
485: To check a language file for syntax errors:\n\
486: langtool -c file...\n\n\
487: To merge language \"lang\" from lang_file with source_file:\n\
488: langtool -m [-o outputfile] source_file lang_file [lang]\n");
489: exit(1);
490: }
491:
492: merge(char *base_file, char *lang_file, char *outfile, char *langID)
493: {
494: FILE *fp, *outf;
495: long fpos = 0, filepos;
496: int newmsgs = 0;
497:
498: if ((langf = fopen(lang_file, "r")) == NULL) {
499: perror(lang_file);
500: return -1;
501: }
502: strcpy(langfile, lang_file);
503: if (langID)
504: strcpy(language, langID);
505: else
506: language[0] = '\0'; /* use first language found */
507:
508: errcount = 0;
509: make_indexfile(NULL);
510: if (errcount)
511: return -1;
512:
513: langfile[0] = '\0'; /* don't print filename in error msgs */
514:
515: if ((fp = fopen(base_file, "r")) == NULL) {
516: perror(base_file);
517: return -1;
518: }
519: if (outfile == NULL)
520: outf = stdout;
521: else {
522: if ((outf = fopen(outfile, "w")) == NULL) {
523: perror(outfile);
524: return(-1);
525: }
526: }
527:
528: while (readstr(fp, strbuf, 0)) {
529: copypos(fp, outf, fpos);
530: fpos = ftell(fp);
531:
532: filepos = lookup_offset(message_crc(strbuf));
533: if (filepos == -1) {
534: fprintf(outf, "No translation\n");
535: ++newmsgs;
536: } else {
537: fseek(langf, filepos, SEEK_SET);
538: readstr(langf, strbuf, 1);
539: copypos(langf, outf, filepos);
540: }
541:
542: while (readstr(fp, strbuf, 1))
543: if (*strbuf == '\0')
544: break;
545: }
546: copypos(fp, outf, fpos);
547: fflush(outf);
548: if (ferror(outf)) {
549: perror(outfile);
550: return -1;
551: }
552: if (newmsgs)
553: fprintf(stderr, "%d untranslated messages\n", newmsgs);
554: return errcount;
555: }
556:
557: extract(char *infile, char *outfile, char **langIDs)
558: {
559: FILE *fp, *outf;
560: long fpos = 0;
561: char **langID;
562:
563: if ((fp = fopen(infile, "r")) == NULL) {
564: perror(infile);
565: return -1;
566: }
567: if (outfile == NULL) {
568: outf = stdout;
569: } else {
570: if ((outf = fopen(outfile, "w")) == NULL) {
571: perror(outfile);
572: fclose(fp);
573: return(-1);
574: }
575: }
576:
577: while (readstr(fp, strbuf, 0)) {
578: copypos(fp, outf, fpos);
579: fpos = ftell(fp);
580: while (readstr(fp, strbuf, 1)) {
581: if (*strbuf == '\0')
582: break;
583: for (langID = langIDs; *langID; ++langID) {
584: if (strcmp(lang, *langID) == 0)
585: copypos(fp, outf, fpos);
586: }
587: fpos = ftell(fp);
588: }
589: }
590: copypos(fp, outf, fpos);
591: fflush(outf);
592: if (ferror(outf)) {
593: perror(outfile);
594: return -1;
595: }
596: return 0;
597: }
598:
599: checkfile(char *name)
600: {
601: if ((langf = fopen(name, "rb")) == NULL) {
602: perror(name);
603: return -1;
604: }
605: strcpy(langfile, name);
606: language[0] = '\0'; /* count messages for first language */
607: errcount = 0;
608: make_indexfile(NULL);
609: fclose(langf);
610: return errcount;
611: }
612:
613: copypos(FILE *f, FILE *g, long pos)
614: {
615: long size;
616: size = ftell(f) - pos;
617: fseek(f, pos, SEEK_SET);
618: copyfile(f, g, size);
619: }
620:
621: copyfile(FILE *f, FILE *g, long n)
622: {
623: int c;
624:
625: while (--n >= 0 && (c = getc(f)) != EOF)
626: putc(c, g);
627: }
628:
629: long
630: fsize(FILE *f)
631: {
632: long len, pos;
633: pos = ftell(f);
634: fseek(f, 0L, SEEK_END);
635: len = ftell(f);
636: fseek(f, pos, SEEK_SET);
637: return len;
638: }
639: #endif /* LANGTOOL */
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.