|
|
1.1 root 1: char *xxxvers = "\nDeroff Version 2.0 29 December 1979\n";
2:
3:
4: #include <stdio.h>
5:
6: /* Deroff command -- strip troff, eqn, and Tbl sequences from
7: a file. Has two flags argument, -w, to cause output one word per line
8: rather than in the original format.
9: -ms (or -m) causes -ms macro's to be interpreted so that just
10: sentences are output, -mm does same for -mm macro's,
11: -ml in addition to interpreting -ms macros also gets rid of
12: lists.
13: Deroff follows .so and .nx commands, removes contents of macro
14: definitions, equations (both .EQ ... .EN and $...$),
15: Tbl command sequences, and Troff backslash constructions.
16:
17: All input is through the C macro; the most recently read character is in c.
18: */
19:
20: #define C ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) )
21: #define C1 ( (c=getc(infile)) == EOF ? eof() : c)
22: #define SKIP while(C != '\n')
23: #define SKIP_TO_COM SKIP; SKIP; pc=c; while(C != '.' || pc != '\n' || C > 'Z')pc=c
24:
25: #define YES 1
26: #define NO 0
27: #define MS 0
28: #define MM 1
29: #define ONE 1
30: #define TWO 2
31:
32: #define NOCHAR -2
33: #define SPECIAL 0
34: #define APOS 1
35: #define DIGIT 2
36: #define LETTER 3
37:
38: int wordflag = NO;
39: int msflag = NO;
40: int mac = MS;
41: int disp = 0;
42: int inmacro = NO;
43: int intable = NO;
44:
45: char chars[128]; /* SPECIAL, APOS, DIGIT, or LETTER */
46:
47: char line[512];
48: char *lp;
49:
50: int c;
51: int pc;
52: int ldelim = NOCHAR;
53: int rdelim = NOCHAR;
54:
55:
56: int argc;
57: char **argv;
58:
59: char fname[50];
60: FILE *files[15];
61: FILE **filesp;
62: FILE *infile;
63:
64: char *calloc();
65:
66:
67:
68: main(ac, av)
69: int ac;
70: char **av;
71: {
72: register int i;
73: register char *p;
74: static char onechar[2] = "X";
75: FILE *opn();
76:
77: argc = ac - 1;
78: argv = av + 1;
79:
80: while(argc>0 && argv[0][0]=='-' && argv[0][1]!='\0')
81: {
82: for(p=argv[0]+1; *p; ++p) switch(*p)
83: {
84: case 'w':
85: wordflag = YES;
86: break;
87: case 'm':
88: msflag = YES;
89: if(*(p+1) == 'm'){
90: mac=MM;
91: p++;
92: }
93: else if(*(p+1) == 's')
94: p++;
95: else if(*(p+1) == 'l'){
96: disp=1;
97: p++;
98: }
99: break;
100: default:
101: onechar[0] = *p;
102: fatal("Invalid flag %s\n", onechar);
103: }
104: --argc;
105: ++argv;
106: }
107:
108: if(argc == 0)
109: infile = stdin;
110: else {
111: infile = opn(argv[0]);
112: --argc;
113: ++argv;
114: }
115:
116: files[0] = infile;
117: filesp = &files[0];
118:
119: for(i='a'; i<='z' ; ++i)
120: chars[i] = LETTER;
121: for(i='A'; i<='Z'; ++i)
122: chars[i] = LETTER;
123: for(i='0'; i<='9'; ++i)
124: chars[i] = DIGIT;
125: chars['\''] = APOS;
126: chars['&'] = APOS;
127:
128: work();
129: }
130:
131:
132:
133: skeqn()
134: {
135: while((c = getc(infile)) != rdelim)
136: if(c == EOF)
137: c = eof();
138: else if(c == '"')
139: while( (c = getc(infile)) != '"')
140: if(c == EOF)
141: c = eof();
142: else if(c == '\\')
143: if((c = getc(infile)) == EOF)
144: c = eof();
145: if(msflag)return(c='x');
146: return(c = ' ');
147: }
148:
149:
150: FILE *opn(p)
151: register char *p;
152: {
153: FILE *fd;
154:
155: if(p[0]=='-' && p[1]=='\0')
156: fd = stdin;
157: else if( (fd = fopen(p, "r")) == NULL)
158: fatal("Cannot open file %s\n", p);
159:
160: return(fd);
161: }
162:
163:
164:
165: eof()
166: {
167: if(infile != stdin)
168: fclose(infile);
169: if(filesp > files)
170: infile = *--filesp;
171: else if(argc > 0)
172: {
173: infile = opn(argv[0]);
174: --argc;
175: ++argv;
176: }
177: else
178: exit(0);
179:
180: return(C);
181: }
182:
183:
184:
185: getfname()
186: {
187: register char *p;
188: struct chain { struct chain *nextp; char *datap; } *chainblock;
189: register struct chain *q;
190: static struct chain *namechain = NULL;
191: char *copys();
192:
193: while(C == ' ') ;
194:
195: for(p = fname ; (*p=c)!= '\n' && c!=' ' && c!='\t' && c!='\\' ; ++p)
196: C;
197: *p = '\0';
198: while(c != '\n')
199: C;
200:
201: /* see if this name has already been used */
202:
203: for(q = namechain ; q; q = q->nextp)
204: if( ! strcmp(fname, q->datap))
205: {
206: fname[0] = '\0';
207: return;
208: }
209:
210: q = (struct chain *) calloc(1, sizeof(*chainblock));
211: q->nextp = namechain;
212: q->datap = copys(fname);
213: namechain = q;
214: }
215:
216:
217:
218:
219: fatal(s,p)
220: char *s, *p;
221: {
222: fprintf(stderr, "Deroff: ");
223: fprintf(stderr, s, p);
224: exit(1);
225: }
226:
227: work()
228: {
229:
230: for( ;; )
231: {
232: if(C == '.' || c == '\'')
233: comline();
234: else
235: regline(NO,TWO);
236: }
237: }
238:
239:
240:
241:
242: regline(macline,const)
243: int macline;
244: int const;
245: {
246: line[0] = c;
247: lp = line;
248: for( ; ; )
249: {
250: if(c == '\\')
251: {
252: *lp = ' ';
253: backsl();
254: }
255: if(c == '\n') break;
256: if(intable && c=='T')
257: {
258: *++lp = C;
259: if(c=='{' || c=='}')
260: {
261: lp[-1] = ' ';
262: *lp = C;
263: }
264: }
265: else *++lp = C;
266: }
267:
268: *lp = '\0';
269:
270: if(line[0] != '\0')
271: if(wordflag)
272: putwords(macline);
273: else if(macline)
274: putmac(line,const);
275: else
276: puts(line);
277: }
278:
279:
280:
281:
282: putmac(s,const)
283: register char *s;
284: int const;
285: {
286: register char *t;
287:
288: while(*s)
289: {
290: while(*s==' ' || *s=='\t')
291: putchar(*s++);
292: for(t = s ; *t!=' ' && *t!='\t' && *t!='\0' ; ++t)
293: ;
294: if(*s == '\"')s++;
295: if(t>s+const && chars[ s[0] ]==LETTER && chars[ s[1] ]==LETTER)
296: while(s < t)
297: if(*s == '\"')s++;
298: else
299: putchar(*s++);
300: else
301: s = t;
302: }
303: putchar('\n');
304: }
305:
306:
307:
308: putwords(macline) /* break into words for -w option */
309: int macline;
310: {
311: register char *p, *p1;
312: int i, nlet;
313:
314:
315: for(p1 = line ; ;)
316: {
317: /* skip initial specials ampersands and apostrophes */
318: while( chars[*p1] < DIGIT)
319: if(*p1++ == '\0') return;
320: nlet = 0;
321: for(p = p1 ; (i=chars[*p]) != SPECIAL ; ++p)
322: if(i == LETTER) ++nlet;
323:
324: if( (!macline && nlet>1) /* MDM definition of word */
325: || (macline && nlet>2 && chars[ p1[0] ]==LETTER && chars[ p1[1] ]==LETTER) )
326: {
327: /* delete trailing ampersands and apostrophes */
328: while(p[-1]=='\'' || p[-1]=='&')
329: --p;
330: while(p1 < p) putchar(*p1++);
331: putchar('\n');
332: }
333: else
334: p1 = p;
335: }
336: }
337:
338:
339:
340: comline()
341: {
342: register int c1, c2;
343:
344: com:
345: while(C==' ' || c=='\t')
346: ;
347: comx:
348: if( (c1=c) == '\n')
349: return;
350: c2 = C;
351: if(c1=='.' && c2!='.')
352: inmacro = NO;
353: if(c2 == '\n')
354: return;
355:
356: if(c1=='E' && c2=='Q' && filesp==files)
357: eqn();
358: else if(c1=='T' && (c2=='S' || c2=='C' || c2=='&') && filesp==files){
359: if(msflag){ stbl(); }
360: else tbl(); }
361: else if(c1=='T' && c2=='E')
362: intable = NO;
363: else if(c1=='G' && c2 == 'R')
364: sdis('G','E');
365: else if(!inmacro && c1=='d' && c2=='e')
366: macro();
367: else if(!inmacro && c1=='i' && c2=='g')
368: macro();
369: else if(!inmacro && c1=='a' && c2 == 'm')
370: macro();
371: else if(c1=='s' && c2=='o')
372: {
373: getfname();
374: if( fname[0] )
375: infile = *++filesp = opn( fname );
376: }
377: else if(c1=='n' && c2=='x')
378: {
379: getfname();
380: if(fname[0] == '\0') exit(0);
381: if(infile != stdin)
382: fclose(infile);
383: infile = *filesp = opn(fname);
384: }
385: else if(c1=='h' && c2=='w')
386: { SKIP; }
387: else if(msflag && c1 == 'T' && c2 == 'L'){
388: SKIP_TO_COM;
389: goto comx; }
390: else if(msflag && c1=='N' && c2 == 'R')SKIP;
391: else if(msflag && c1 == 'A' && (c2 == 'U' || c2 == 'I')){
392: if(mac==MM)SKIP;
393: else {
394: SKIP_TO_COM;
395: goto comx; }
396: }
397: else if(msflag && c1 == 'F' && c2 == 'S'){
398: SKIP_TO_COM;
399: goto comx; }
400: else if(msflag && c1 == 'S' && c2 == 'H'){
401: SKIP_TO_COM;
402: goto comx; }
403: else if(msflag && c1 == 'N' && c2 == 'H'){
404: SKIP_TO_COM;
405: goto comx; }
406: else if(msflag && c1 == 'O' && c2 == 'K'){
407: SKIP_TO_COM;
408: goto comx; }
409: else if(msflag && c1 == 'N' && c2 == 'D')
410: SKIP;
411: else if(msflag && mac==MM && c1=='H' && (c2==' '||c2=='U'))
412: SKIP;
413: else if(msflag && mac==MM && c2=='L'){
414: if(disp || c1 == 'R')sdis('L','E');
415: else{
416: SKIP;
417: putchar('.');
418: }
419: }
420: else if(msflag && (c1 == 'D' || c1 == 'N' || c1 == 'K') && c2 == 'S')
421: { sdis(c1,'E'); } /* removed RS-RE */
422: else if(msflag && c1 == 'n' && c2 == 'f')
423: sdis('f','i');
424: else if(msflag && c1 == 'c' && c2 == 'e')
425: sce();
426: else
427: {
428: if(c1=='.' && c2=='.')
429: while(C == '.')
430: ;
431: ++inmacro;
432: if(c1 <= 'Z' && msflag)regline(YES,ONE);
433: else regline(YES,TWO);
434: --inmacro;
435: }
436: }
437:
438:
439:
440: macro()
441: {
442: if(msflag){
443: do { SKIP; }
444: while(C!='.' || C!='.' || C=='.'); /* look for .. */
445: if(c != '\n')SKIP;
446: return;
447: }
448: SKIP;
449: inmacro = YES;
450: }
451:
452:
453:
454:
455: sdis(a1,a2)
456: char a1,a2;
457: {
458: register int c1,c2;
459: register int eqnf;
460: eqnf=1;
461: SKIP;
462: while(1){
463: while(C != '.')SKIP;
464: if((c1=C) == '\n')continue;
465: if((c2=C) == '\n')continue;
466: if(c1==a1 && c2 == a2){
467: SKIP;
468: if(eqnf)putchar('.');
469: putchar('\n');
470: return;
471: }
472: else if(a1 == 'D' && c1 == 'E' && c2 == 'Q'){eqn(); eqnf=0;}
473: else SKIP;
474: }
475: }
476: tbl()
477: {
478: while(C != '.');
479: SKIP;
480: intable = YES;
481: }
482: stbl()
483: {
484: while(C != '.');
485: SKIP_TO_COM;
486: if(c != 'T' || C != 'E'){
487: SKIP;
488: pc=c;
489: while(C != '.' || pc != '\n' || C != 'T' || C != 'E')pc=c;
490: }
491: }
492:
493: eqn()
494: {
495: register int c1, c2;
496: register int dflg;
497: int last;
498:
499: last=0;
500: dflg = 1;
501: SKIP;
502:
503: for( ;;)
504: {
505: if(C == '.' || c == '\'')
506: {
507: while(C==' ' || c=='\t')
508: ;
509: if(c=='E' && C=='N')
510: {
511: SKIP;
512: if(msflag && dflg){
513: putchar('x');
514: putchar(' ');
515: if(last){putchar('.'); putchar(' '); }
516: }
517: return;
518: }
519: }
520: else if(c == 'd') /* look for delim */
521: {
522: if(C=='e' && C=='l')
523: if( C=='i' && C=='m')
524: {
525: while(C1 == ' ');
526: if((c1=c)=='\n' || (c2=C1)=='\n'
527: || (c1=='o' && c2=='f' && C1=='f') )
528: {
529: ldelim = NOCHAR;
530: rdelim = NOCHAR;
531: }
532: else {
533: ldelim = c1;
534: rdelim = c2;
535: }
536: }
537: dflg = 0;
538: }
539:
540: if(c != '\n') while(C != '\n'){ if(c == '.')last=1; else last=0; }
541: }
542: }
543:
544:
545:
546: backsl() /* skip over a complete backslash construction */
547: {
548: int bdelim;
549:
550: sw: switch(C)
551: {
552: case '"':
553: SKIP;
554: return;
555: case 's':
556: if(C == '\\') backsl();
557: else {
558: while(C>='0' && c<='9') ;
559: ungetc(c,infile);
560: c = '0';
561: }
562: --lp;
563: return;
564:
565: case 'f':
566: case 'n':
567: case '*':
568: if(C != '(')
569: return;
570:
571: case '(':
572: if(C != '\n') C;
573: return;
574:
575: case '$':
576: C; /* discard argument number */
577: return;
578:
579: case 'b':
580: case 'x':
581: case 'v':
582: case 'h':
583: case 'w':
584: case 'o':
585: case 'l':
586: case 'L':
587: if( (bdelim=C) == '\n')
588: return;
589: while(C!='\n' && c!=bdelim)
590: if(c == '\\') backsl();
591: return;
592:
593: case '\\':
594: if(inmacro)
595: goto sw;
596: default:
597: return;
598: }
599: }
600:
601:
602:
603:
604: char *copys(s)
605: register char *s;
606: {
607: register char *t, *t0;
608:
609: if( (t0 = t = calloc( strlen(s)+1, sizeof(*t) ) ) == NULL)
610: fatal("Cannot allocate memory", (char *) NULL);
611:
612: while( *t++ = *s++ )
613: ;
614: return(t0);
615: }
616: sce(){
617: register char *ap;
618: register int n, i;
619: char a[10];
620: for(ap=a;C != '\n';ap++){
621: *ap = c;
622: if(ap == &a[9]){
623: SKIP;
624: ap=a;
625: break;
626: }
627: }
628: if(ap != a)n = atoi(a);
629: else n = 1;
630: for(i=0;i<n;){
631: if(C == '.'){
632: if(C == 'c'){
633: if(C == 'e'){
634: while(C == ' ');
635: if(c == '0')break;
636: else SKIP;
637: }
638: else SKIP;
639: }
640: else SKIP;
641: }
642: else {
643: SKIP;
644: i++;
645: }
646: }
647: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.