|
|
1.1 root 1: char *xxxvers = "@(#)deroff.c 1.7";
2:
3:
4: #include <stdio.h>
5:
6: /* Deroff command -- strip troff, eqn, and Tbl sequences from
7: a file. Has three flags argument, -w, to cause output one word per line
8: rather than in the original format.
9: -mm (or -ms) causes the corresponding macro's to be interpreted
10: so that just sentences are output
11: -ml also gets rid of lists.
12: -i causes deroff to ignore .so and .nx commands.
13: Deroff follows .so and .nx commands, removes contents of macro
14: definitions, equations (both .EQ ... .EN and $...$),
15: Tbl command sequences, and Troff backslash constructions.
16:
17: All input is through the C macro; the most recently read character is in c.
18: */
19:
20: #define C ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() :( c == '\n'?(linect++,c):c) ))
21: #define C1 ( (c=getc(infile)) == EOF ? eof() : (c == '\n' ? (linect++,c): c))
22: #define SKIP while(C != '\n')
23: #define SKIP1 while(C1 != '\n')
24: #define SKIP_TO_COM SKIP; SKIP; pc=c; while(C != '.' || pc != '\n' || C > 'Z')pc=c
25:
26: #define YES 1
27: #define NO 0
28: #define MS 0
29: #define MM 1
30: #define ONE 1
31: #define TWO 2
32:
33: #define NOCHAR -2
34: #define SPECIAL 0
35: #define APOS 1
36: #define PUNCT 2
37: #define DIGIT 3
38: #define LETTER 4
39:
40: int linect = 0;
41: int numflag = 0;
42: int wordflag = NO;
43: int msflag = NO;
44: int iflag = NO;
45: int mac = MM;
46: int disp = 0;
47: int parag = 0;
48: int inmacro = NO;
49: int intable = NO;
50: int eqnflag = 0;
51:
52: char chars[128]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
53:
54: char line[512];
55: char *lp;
56:
57: int c;
58: int pc;
59: int ldelim = NOCHAR;
60: int rdelim = NOCHAR;
61:
62:
63: int argc;
64: char **argv;
65:
66: extern int optind;
67: extern char*optarg;
68: char fname[50];
69: FILE *files[15];
70: FILE **filesp;
71: FILE *infile;
72:
73: main(ac, av)
74: int ac;
75: char **av;
76: {
77: register int i;
78: int errflg = 0;
79: register optchar;
80: FILE *opn();
81:
82: argc = ac;
83: argv = av;
84: while ((optchar = getopt(argc, argv, "winpm:")) != EOF) switch(optchar) {
85: case 'n':
86: numflag = 1;
87: break;
88: case 'w':
89: wordflag = YES;
90: break;
91: case 'm':
92: msflag = YES;
93: if (*optarg == 'm')
94: mac = MM;
95: else if (*optarg == 's')
96: mac = MS;
97: else if (*optarg == 'l')
98: disp = 1;
99: else errflg++;
100: break;
101: case 'p':
102: parag=YES;
103: break;
104: case 'i':
105: iflag = YES;
106: break;
107: case '?':
108: errflg++;
109: }
110: if (errflg)
111: fatal("usage: deroff [ -w ] [ -m (m s l) ] [ -i ] [ file ] ... \n", (char *) NULL);
112: if ( optind == argc )
113: infile = stdin;
114: else
115: infile = opn(argv[optind++]);
116: files[0] = infile;
117: filesp = &files[0];
118:
119: for(i='a'; i<='z' ; ++i)
120: chars[i] = LETTER;
121: for(i='A'; i<='Z'; ++i)
122: chars[i] = LETTER;
123: for(i='0'; i<='9'; ++i)
124: chars[i] = DIGIT;
125: chars['\''] = APOS;
126: chars['&'] = APOS;
127: chars['\b'] = APOS;
128: chars['.'] = PUNCT;
129: chars[','] = PUNCT;
130: chars[';'] = PUNCT;
131: chars['?'] = PUNCT;
132: chars[':'] = PUNCT;
133: work();
134: }
135: char *calloc();
136:
137:
138:
139:
140:
141:
142: skeqn()
143: {
144: while((c = getc(infile)) != rdelim)
145: if(c == '\n')linect++;
146: else if(c == '\\')
147: c = getc(infile);
148: else if(c == EOF)
149: c = eof();
150: else if(c == '"')
151: while( (c = getc(infile)) != '"')
152: if(c == '\n')linect++;
153: else if(c == EOF)
154: c = eof();
155: else if(c == '\\'){
156: if((c = getc(infile)) == EOF)
157: c = eof();
158: else if(c == '\n')linect++;
159: }
160: if(msflag)
161: eqnflag = 1;
162: return(c = ' ');
163: }
164:
165:
166: char *devnull = "/dev/null";
167: FILE *opn(p)
168: register char *p;
169: {
170: FILE *fd;
171:
172: again:
173: if( (fd = fopen(p, "r")) == NULL){
174: if(msflag || p==devnull)
175: fatal("Cannot open file %s - quitting\n", p);
176: else {
177: fprintf(stderr,"Deroff: Cannot open file %s - continuing\n",
178: p);
179: p = devnull;
180: goto again;
181: }
182: }
183: linect = 0;
184: if(numflag)printf(".F %s\n",p);
185:
186: return(fd);
187: }
188:
189:
190:
191: eof()
192: {
193: if(infile != stdin)
194: fclose(infile);
195: if(filesp > files)
196: infile = *--filesp;
197: else if(optind < argc)
198: {
199: infile = opn(argv[optind++]);
200: }
201: else
202: exit(0);
203:
204: return(C);
205: }
206:
207:
208:
209: getfname()
210: {
211: register char *p;
212: struct chain {
213: struct chain *nextp;
214: char *datap;
215: } *chainblock;
216: register struct chain *q;
217: static struct chain *namechain = NULL;
218: char *csskip = "/usr/lib/tmac/tmac.cs";
219: char *sskip = "/usr/lib/tmac/tmac.s";
220: char *copys();
221:
222: while(C == ' ') ;
223:
224: for(p = fname ; (*p=c)!= '\n' && c!=' ' && c!='\t' && c!='\\' ; ++p)
225: C;
226: *p = '\0';
227: while(c != '\n')
228: C;
229: if(!strcmp(fname, csskip) || !strcmp(fname, sskip)){
230: fname[0] = '\0';
231: return;
232: }
233: /* see if this name has already been used */
234:
235: for(q = namechain ; q; q = q->nextp)
236: if( ! strcmp(fname, q->datap))
237: {
238: fname[0] = '\0';
239: return;
240: }
241:
242: q = (struct chain *) calloc(1, sizeof(*chainblock));
243: q->nextp = namechain;
244: q->datap = copys(fname);
245: namechain = q;
246: }
247:
248:
249:
250:
251: fatal(s,p)
252: char *s, *p;
253: {
254: fprintf(stderr, "Deroff: ");
255: fprintf(stderr, s, p);
256: exit(1);
257: }
258:
259: work()
260: {
261:
262: for( ;; )
263: {
264: eqnflag = 0;
265: if(C == '.' || c == '\'')
266: comline();
267: else
268: regline(NO,TWO);
269: }
270: }
271:
272:
273:
274:
275: regline(macline,const)
276: int macline;
277: int const;
278: {
279: line[0] = c;
280: lp = line;
281: for( ; ; )
282: {
283: if(c == '\\')
284: {
285: *lp = ' ';
286: backsl();
287: if ( c == '%') /* no blank for hyphenation char */
288: lp--;
289: }
290: if(c == '\n') break;
291: if(intable && c=='T')
292: {
293: *++lp = C;
294: if(c=='{' || c=='}')
295: {
296: lp[-1] = ' ';
297: *lp = C;
298: }
299: }
300: else {
301: if((msflag == 1) && (eqnflag == 1)){
302: eqnflag = 0;
303: *++lp = 'x';
304: }
305: *++lp = C;
306: }
307: }
308:
309: *lp = '\0';
310:
311: if(line[0] != '\0'){
312: if(wordflag)
313: putwords();
314: else if(macline)
315: putmac(line,const);
316: else
317: puts(line);
318: if(numflag &&(linect%10 == 0))printf(".%d\n",linect);
319: }
320: }
321:
322:
323:
324:
325: putmac(s,const)
326: register char *s;
327: int const;
328: {
329: register char *t;
330: register found;
331: int last;
332: found = 0;
333:
334: while(*s)
335: {
336: while(*s==' ' || *s=='\t')
337: putchar(*s++);
338: for(t = s ; *t!=' ' && *t!='\t' && *t!='\0' ; ++t)
339: ;
340: if(*s == '\"')s++;
341: if(t>s+const && chars[ s[0] ]==LETTER && chars[ s[1] ]==LETTER){
342: while(s < t)
343: if(*s == '\"')s++;
344: else
345: putchar(*s++);
346: last = *(t-1);
347: found++;
348: }
349: else if(found && chars[ s[0] ] == PUNCT && s[1] == '\0')
350: putchar(*s++);
351: else{
352: last = *(t-1);
353: s = t;
354: }
355: }
356: putchar('\n');
357: if(msflag && chars[last] == PUNCT){
358: printf(" %c\n",last);
359: }
360: }
361:
362:
363:
364: putwords() /* break into words for -w option */
365: {
366: register char *p, *p1;
367: int i, nlet;
368:
369:
370: for(p1 = line ; ;)
371: {
372: /* skip initial specials ampersands and apostrophes */
373: while( chars[*p1] < DIGIT)
374: if(*p1++ == '\0') return;
375: nlet = 0;
376: for(p = p1 ; (i=chars[*p]) != SPECIAL ; ++p)
377: if(i == LETTER) ++nlet;
378:
379: if(nlet>1) /* MDM definition of word */
380: {
381: /* delete trailing ampersands and apostrophes */
382: while(p[-1]=='\'' || p[-1]=='&'|| chars[ p[-1] ] == PUNCT)
383: --p;
384: while(p1 < p) putchar(*p1++);
385: putchar('\n');
386: }
387: else
388: p1 = p;
389: }
390: }
391:
392:
393: comline()
394: {
395: register int c1, c2;
396:
397: com:
398: while(C==' ' || c=='\t')
399: ;
400: comx:
401: if( (c1=c) == '\n')
402: return;
403: c2 = C;
404: if(c1=='.' && c2!='.')
405: inmacro = NO;
406: if(msflag && c1 == '['){
407: refer(c2);
408: return;
409: }
410: if(parag && mac==MM && c1 == 'P' && c2 == '\n'){
411: printf(".P\n");
412: return;
413: }
414: if(c2 == '\n')
415: return;
416:
417: if(c1 == '\\' && c2 == '\"')
418: SKIP;
419: else if(c1=='E' && c2=='Q' && filesp==files)
420: eqn();
421: else if(c1=='T' && (c2=='S' || c2=='C' || c2=='&') && filesp==files){
422: if(msflag){
423: stbl();
424: }
425: else tbl();
426: }
427: else if(c1=='T' && c2=='E')
428: intable = NO;
429: else if(!inmacro && c1=='d' && c2=='e')
430: macro();
431: else if(!inmacro && c1=='i' && c2=='g')
432: macro();
433: else if(!inmacro && c1=='a' && c2 == 'm')
434: macro();
435: else if(c1=='s' && c2=='o')
436: {
437: if(iflag)
438: SKIP;
439: else
440: {
441: getfname();
442: if( fname[0] ){
443: infile = *++filesp = opn( fname );
444: if(!infile)
445: infile = *--filesp;
446: }
447: }
448: }
449: else if(c1=='n' && c2=='x')
450: if(iflag)
451: SKIP;
452: else
453: {
454: getfname();
455: if(fname[0] == '\0') exit(0);
456: if(infile != stdin)
457: fclose(infile);
458: infile = *filesp = opn(fname);
459: }
460: else if(c1 == 't' && c2 == 'm')
461: SKIP;
462: else if(c1=='h' && c2=='w')
463: SKIP;
464: else if(msflag && c1 == 'T' && c2 == 'L'){
465: SKIP_TO_COM;
466: goto comx;
467: }
468: else if(msflag && c1=='N' && c2 == 'R')SKIP;
469: else if(parag && msflag && (c1 == 'P' || c1 == 'I' || c1 == 'L') && c2 == 'P'){
470: printf(".%c%c",c1,c2);
471: while(C != '\n')putchar(c);
472: putchar('\n');
473: }
474: else if(parag && mac==MM && c1 == 'P' && c2 == ' '){
475: printf(".%c%c",c1,c2);
476: while(C != '\n')putchar(c);
477: putchar('\n');
478: }
479: else if(msflag && c1 == 'A' && (c2 == 'U' || c2 == 'I')){
480: if(mac==MM)SKIP;
481: else {
482: SKIP_TO_COM;
483: goto comx;
484: }
485: }
486: else if(msflag && c1 == 'F' && c2 == 'S'){
487: SKIP_TO_COM;
488: goto comx;
489: }
490: else if(msflag && (c1 == 'S' || c1 == 'N') && c2 == 'H'){
491: if(parag){
492: printf(".%c%c",c1,c2);
493: while(C != '\n')putchar(c);
494: putchar(c);
495: putchar('!');
496: while(1){
497: while(C != '\n')putchar(c);
498: putchar('\n');
499: if(C == '.')goto com;
500: putchar('!');
501: putchar(c);
502: }
503: }
504: else {
505: SKIP_TO_COM;
506: goto comx;
507: }
508: }
509: else if(c1 == 'U' && c2 == 'X'){
510: if(wordflag)printf("UNIX\n");
511: else printf("UNIX ");
512: }
513: else if(msflag && c1 == 'O' && c2 == 'K'){
514: SKIP_TO_COM;
515: goto comx;
516: }
517: else if(msflag && c1 == 'N' && c2 == 'D')
518: SKIP;
519: else if(msflag && mac==MM && c1=='H' && (c2==' '||c2=='U')){
520: if(parag){
521: printf(".%c%c",c1,c2);
522: while(C != '\n')putchar(c);
523: putchar('\n');
524: }
525: else {
526: SKIP;
527: }
528: }
529: else if(msflag && mac==MM && c2=='L'){
530: if(disp || c1 == 'R')sdis('L','E');
531: else{
532: SKIP;
533: printf(" .");
534: }
535: }
536: else if(!msflag &&c1 == 'P' && c2 == 'S'){
537: inpic();
538: }
539: else if(msflag && (c1 == 'D' || c1 == 'N' || c1 == 'K'|| c1=='P') && c2 == 'S')
540: {
541: sdis(c1,'E');
542: } /* removed RS-RE */
543: else if(msflag && (c1 == 'K' && c2 == 'F'))
544: {
545: sdis(c1,'E');
546: }
547: else if(msflag && c1 == 'n' && c2 == 'f')
548: sdis('f','i');
549: else if(msflag && c1 == 'c' && c2 == 'e')
550: sce();
551: else
552: {
553: if(c1=='.' && c2=='.'){
554: if(msflag){
555: SKIP;
556: return;
557: }
558: while(C == '.')
559: ;
560: }
561: ++inmacro;
562: if(c1 <= 'Z' && msflag)regline(YES,ONE);
563: else {
564: if(wordflag)c1=C;
565: regline(YES,TWO);
566: }
567: --inmacro;
568: }
569: }
570:
571:
572:
573: macro()
574: {
575: if(msflag){
576: do {
577: SKIP1;
578: } while(C1!='.' || C1!='.' || C1=='.'); /* look for .. */
579: if(c != '\n')SKIP;
580: return;
581: }
582: SKIP;
583: inmacro = YES;
584: }
585:
586:
587:
588:
589: sdis(a1,a2)
590: char a1,a2;
591: {
592: register int c1,c2;
593: register int eqnf;
594: int lct;
595: if(a1 == 'P'){
596: if(C1 == ' ')
597: while(C1 == ' ');
598: if(c == '<'){
599: SKIP1;
600: return;
601: }
602: }
603: lct = 0;
604: eqnf=1;
605: if(c != '\n')
606: SKIP1;
607: while(1){
608: while(C1 != '.')
609: if(c == '\n')continue;
610: else SKIP1;
611: if((c1=C1) == '\n')continue;
612: if((c2=C1) == '\n'){
613: if(a1 == 'f' && (c1 == 'P' || c1 == 'H'))
614: return;
615: continue;
616: }
617: if(c1==a1 && c2 == a2){
618: SKIP1;
619: if(lct != 0){
620: lct--;
621: continue;
622: }
623: if(eqnf)printf(" .");
624: putchar('\n');
625: return;
626: }
627: else if(a1 == 'L' && c2 == 'L'){
628: lct++;
629: SKIP1;
630: }
631: else if(a1 == 'D' && c1 == 'E' && c2 == 'Q'){
632: eqn();
633: eqnf=0;
634: }
635: else if(a1 == 'f'){
636: if((mac == MS && c2 == 'P') ||
637: (mac==MM && c1 == 'H' && c2 == 'U')){
638: SKIP1;
639: return;
640: }
641: SKIP1;
642: }
643: else SKIP1;
644: }
645: }
646: tbl()
647: {
648: while(C != '.');
649: SKIP;
650: intable = YES;
651: }
652: stbl()
653: {
654: while(C != '.');
655: SKIP_TO_COM;
656: if(c != 'T' || C != 'E'){
657: SKIP;
658: pc=c;
659: while(C != '.' || pc != '\n' || C != 'T' || C != 'E')pc=c;
660: }
661: }
662:
663: eqn()
664: {
665: register int c1, c2;
666: register int dflg;
667: char last;
668:
669: last=0;
670: dflg = 1;
671: SKIP;
672:
673: for( ;;)
674: {
675: if(C1 == '.' || c == '\'')
676: {
677: while(C1==' ' || c=='\t')
678: ;
679: if(c=='E' && C1=='N')
680: {
681: SKIP;
682: if(msflag && dflg){
683: putchar('x');
684: putchar(' ');
685: if(last){
686: putchar(last);
687: putchar('\n');
688: }
689: }
690: return;
691: }
692: }
693: else if(c == 'd') /* look for delim */
694: {
695: if(C1=='e' && C1=='l')
696: if( C1=='i' && C1=='m')
697: {
698: while(C1 == ' ');
699: if((c1=c)=='\n' || (c2=C1)=='\n'
700: || (c1=='o' && c2=='f' && C1=='f') )
701: {
702: ldelim = NOCHAR;
703: rdelim = NOCHAR;
704: }
705: else {
706: ldelim = c1;
707: rdelim = c2;
708: }
709: }
710: dflg = 0;
711: }
712:
713: if(c != '\n') while(C1 != '\n'){
714: if(chars[c] == PUNCT)last = c;
715: else if(c != ' ')last = 0;
716: }
717: }
718: }
719:
720:
721: backsl() /* skip over a complete backslash construction */
722: {
723: int bdelim;
724:
725: sw:
726: switch(C1)
727: {
728: case '"':
729: SKIP1;
730: return;
731: case 's':
732: if(C1 == '\\') backsl();
733: else {
734: while(C1>='0' && c<='9') ;
735: ungetc(c,infile);
736: c = '0';
737: }
738: --lp;
739: return;
740:
741: case 'f':
742: case 'n':
743: case '*':
744: if(C1 != '(')
745: return;
746:
747: case '(':
748: if(msflag){
749: if(C == 'e'){
750: if(C1 == 'm'){
751: *lp = '-';
752: return;
753: }
754: }
755: else if(c != '\n')C1;
756: return;
757: }
758: if(C1 != '\n') C1;
759: return;
760:
761: case '$':
762: C1; /* discard argument number */
763: return;
764:
765: case 'b':
766: case 'x':
767: case 'v':
768: case 'h':
769: case 'w':
770: case 'o':
771: case 'l':
772: case 'L':
773: if( (bdelim=C1) == '\n')
774: return;
775: while(C1!='\n' && c!=bdelim)
776: if(c == '\\') backsl();
777: return;
778:
779: case '\\':
780: if(inmacro)
781: goto sw;
782: default:
783: return;
784: }
785: }
786:
787:
788:
789:
790: char *copys(s)
791: register char *s;
792: {
793: register char *t, *t0;
794:
795: if( (t0 = t = calloc( (unsigned)(strlen(s)+1), sizeof(*t) ) ) == NULL)
796: fatal("Cannot allocate memory", (char *) NULL);
797:
798: while( *t++ = *s++ )
799: ;
800: return(t0);
801: }
802: sce(){
803: register char *ap;
804: register int n, i;
805: char a[10];
806: for(ap=a;C != '\n';ap++){
807: *ap = c;
808: if(ap == &a[9]){
809: SKIP;
810: ap=a;
811: break;
812: }
813: }
814: if(ap != a)n = atoi(a);
815: else n = 1;
816: for(i=0;i<n;){
817: if(C == '.'){
818: if(C == 'c'){
819: if(C == 'e'){
820: while(C == ' ');
821: if(c == '0'){
822: SKIP;
823: break;
824: }
825: else SKIP;
826: }
827: else SKIP;
828: }
829: else if(c == 'P' || C == 'P'){
830: if(c != '\n')SKIP;
831: break;
832: }
833: else if(c != '\n')SKIP;
834: }
835: else {
836: SKIP;
837: i++;
838: }
839: }
840: }
841: refer(c1)
842: {
843: register int c2;
844: if(c1 != '\n')
845: SKIP;
846: while(1){
847: if(C != '.')
848: SKIP;
849: else {
850: if(C != ']')
851: SKIP;
852: else {
853: while(C != '\n')
854: c2=c;
855: if(chars[c2] == PUNCT)printf(" %c",c2);
856: return;
857: }
858: }
859: }
860: }
861: inpic(){
862: register int c1;
863: register char *p1;
864: /* SKIP1;*/
865: while(C1 != '\n')
866: if(c == '<'){
867: SKIP1;
868: return;
869: }
870: p1 = line;
871: c = '\n';
872: while(1){
873: c1 = c;
874: if(C1 == '.' && c1 == '\n'){
875: if(C1 != 'P'){
876: if(c == '\n')continue;
877: else { SKIP1; c='\n'; continue;}
878: }
879: if(C1 != 'E'){
880: if(c == '\n')continue;
881: else { SKIP1; c='\n';continue; }
882: }
883: SKIP1;
884: return;
885: }
886: else if(c == '\"'){
887: while(C1 != '\"'){
888: if(c == '\\'){
889: if(C1 == '\"')continue;
890: ungetc(c,infile);
891: backsl();
892: }
893: else *p1++ = c;
894: }
895: *p1++ = ' ';
896: }
897: else if(c == '\n' && p1 != line){
898: *p1 = '\0';
899: if(wordflag)putwords(NO);
900: else {
901: puts(line);
902: putchar('\n');
903: }
904: p1 = line;
905: }
906: }
907: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.