|
|
1.1 root 1: /*
2: * Copyright (c) 1980 The Regents of the University of California.
3: * All rights reserved.
4: *
5: * Redistribution and use in source and binary forms are permitted
6: * provided that: (1) source distributions retain this entire copyright
7: * notice and comment, and (2) distributions including binaries display
8: * the following acknowledgement: ``This product includes software
9: * developed by the University of California, Berkeley and its contributors''
10: * in the documentation or other materials provided with the distribution
11: * and in all advertising materials mentioning features or use of this
12: * software. Neither the name of the University nor the names of its
13: * contributors may be used to endorse or promote products derived
14: * from this software without specific prior written permission.
15: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
16: * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
17: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
18: */
19:
20: #ifndef lint
21: char copyright[] =
22: "@(#) Copyright (c) 1980 The Regents of the University of California.\n\
23: All rights reserved.\n";
24: #endif /* not lint */
25:
26: #ifndef lint
27: static char sccsid[] = "@(#)checknr.c 5.4 (Berkeley) 6/1/90";
28: #endif /* not lint */
29:
30: /*
31: * checknr: check an nroff/troff input file for matching macro calls.
32: * we also attempt to match size and font changes, but only the embedded
33: * kind. These must end in \s0 and \fP resp. Maybe more sophistication
34: * later but for now think of these restrictions as contributions to
35: * structured typesetting.
36: */
37: #include <stdio.h>
38: #include <ctype.h>
39:
40: #define MAXSTK 100 /* Stack size */
41: #define MAXBR 100 /* Max number of bracket pairs known */
42: #define MAXCMDS 500 /* Max number of commands known */
43:
44: /*
45: * The stack on which we remember what we've seen so far.
46: */
47: struct stkstr {
48: int opno; /* number of opening bracket */
49: int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
50: int parm; /* parm to size, font, etc */
51: int lno; /* line number the thing came in in */
52: } stk[MAXSTK];
53: int stktop;
54:
55: /*
56: * The kinds of opening and closing brackets.
57: */
58: struct brstr {
59: char *opbr;
60: char *clbr;
61: } br[MAXBR] = {
62: /* A few bare bones troff commands */
63: #define SZ 0
64: "sz", "sz", /* also \s */
65: #define FT 1
66: "ft", "ft", /* also \f */
67: /* the -mm package */
68: "AL", "LE",
69: "AS", "AE",
70: "BL", "LE",
71: "BS", "BE",
72: "DF", "DE",
73: "DL", "LE",
74: "DS", "DE",
75: "FS", "FE",
76: "ML", "LE",
77: "NS", "NE",
78: "RL", "LE",
79: "VL", "LE",
80: /* the -ms package */
81: "AB", "AE",
82: "BD", "DE",
83: "CD", "DE",
84: "DS", "DE",
85: "FS", "FE",
86: "ID", "DE",
87: "KF", "KE",
88: "KS", "KE",
89: "LD", "DE",
90: "LG", "NL",
91: "QS", "QE",
92: "RS", "RE",
93: "SM", "NL",
94: "XA", "XE",
95: "XS", "XE",
96: /* The -me package */
97: "(b", ")b",
98: "(c", ")c",
99: "(d", ")d",
100: "(f", ")f",
101: "(l", ")l",
102: "(q", ")q",
103: "(x", ")x",
104: "(z", ")z",
105: /* Things needed by preprocessors */
106: "EQ", "EN",
107: "TS", "TE",
108: /* Refer */
109: "[", "]",
110: 0, 0
111: };
112:
113: /*
114: * All commands known to nroff, plus macro packages.
115: * Used so we can complain about unrecognized commands.
116: */
117: char *knowncmds[MAXCMDS] = {
118: "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
119: "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
120: "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
121: "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
122: "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
123: "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
124: "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
125: "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
126: "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
127: "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
128: "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
129: "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
130: "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
131: "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
132: "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
133: "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
134: "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
135: "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
136: "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
137: "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
138: "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
139: "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
140: "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
141: "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
142: "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
143: "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
144: "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
145: "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
146: "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
147: "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
148: "yr", 0
149: };
150:
151: int lineno; /* current line number in input file */
152: char line[256]; /* the current line */
153: char *cfilename; /* name of current file */
154: int nfiles; /* number of files to process */
155: int fflag; /* -f: ignore \f */
156: int sflag; /* -s: ignore \s */
157: int ncmds; /* size of knowncmds */
158: int slot; /* slot in knowncmds found by binsrch */
159:
160: char *malloc();
161:
162: main(argc, argv)
163: int argc;
164: char **argv;
165: {
166: FILE *f;
167: int i;
168: char *cp;
169: char b1[4];
170:
171: /* Figure out how many known commands there are */
172: while (knowncmds[ncmds])
173: ncmds++;
174: while (argc > 1 && argv[1][0] == '-') {
175: switch(argv[1][1]) {
176:
177: /* -a: add pairs of macros */
178: case 'a':
179: i = strlen(argv[1]) - 2;
180: if (i % 6 != 0)
181: usage();
182: /* look for empty macro slots */
183: for (i=0; br[i].opbr; i++)
184: ;
185: for (cp=argv[1]+3; cp[-1]; cp += 6) {
186: br[i].opbr = malloc(3);
187: strncpy(br[i].opbr, cp, 2);
188: br[i].clbr = malloc(3);
189: strncpy(br[i].clbr, cp+3, 2);
190: addmac(br[i].opbr); /* knows pairs are also known cmds */
191: addmac(br[i].clbr);
192: i++;
193: }
194: break;
195:
196: /* -c: add known commands */
197: case 'c':
198: i = strlen(argv[1]) - 2;
199: if (i % 3 != 0)
200: usage();
201: for (cp=argv[1]+3; cp[-1]; cp += 3) {
202: if (cp[2] && cp[2] != '.')
203: usage();
204: strncpy(b1, cp, 2);
205: addmac(b1);
206: }
207: break;
208:
209: /* -f: ignore font changes */
210: case 'f':
211: fflag = 1;
212: break;
213:
214: /* -s: ignore size changes */
215: case 's':
216: sflag = 1;
217: break;
218: default:
219: usage();
220: }
221: argc--; argv++;
222: }
223:
224: nfiles = argc - 1;
225:
226: if (nfiles > 0) {
227: for (i=1; i<argc; i++) {
228: cfilename = argv[i];
229: f = fopen(cfilename, "r");
230: if (f == NULL)
231: perror(cfilename);
232: else
233: process(f);
234: }
235: } else {
236: cfilename = "stdin";
237: process(stdin);
238: }
239: exit(0);
240: }
241:
242: usage()
243: {
244: printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
245: exit(1);
246: }
247:
248: process(f)
249: FILE *f;
250: {
251: register int i, n;
252: char mac[5]; /* The current macro or nroff command */
253: int pl;
254:
255: stktop = -1;
256: for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
257: if (line[0] == '.') {
258: /*
259: * find and isolate the macro/command name.
260: */
261: strncpy(mac, line+1, 4);
262: if (isspace(mac[0])) {
263: pe(lineno);
264: printf("Empty command\n");
265: } else if (isspace(mac[1])) {
266: mac[1] = 0;
267: } else if (isspace(mac[2])) {
268: mac[2] = 0;
269: } else if (mac[0] != '\\' || mac[1] != '\"') {
270: pe(lineno);
271: printf("Command too long\n");
272: }
273:
274: /*
275: * Is it a known command?
276: */
277: checkknown(mac);
278:
279: /*
280: * Should we add it?
281: */
282: if (eq(mac, "de"))
283: addcmd(line);
284:
285: chkcmd(line, mac);
286: }
287:
288: /*
289: * At this point we process the line looking
290: * for \s and \f.
291: */
292: for (i=0; line[i]; i++)
293: if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
294: if (!sflag && line[++i]=='s') {
295: pl = line[++i];
296: if (isdigit(pl)) {
297: n = pl - '0';
298: pl = ' ';
299: } else
300: n = 0;
301: while (isdigit(line[++i]))
302: n = 10 * n + line[i] - '0';
303: i--;
304: if (n == 0) {
305: if (stk[stktop].opno == SZ) {
306: stktop--;
307: } else {
308: pe(lineno);
309: printf("unmatched \\s0\n");
310: }
311: } else {
312: stk[++stktop].opno = SZ;
313: stk[stktop].pl = pl;
314: stk[stktop].parm = n;
315: stk[stktop].lno = lineno;
316: }
317: } else if (!fflag && line[i]=='f') {
318: n = line[++i];
319: if (n == 'P') {
320: if (stk[stktop].opno == FT) {
321: stktop--;
322: } else {
323: pe(lineno);
324: printf("unmatched \\fP\n");
325: }
326: } else {
327: stk[++stktop].opno = FT;
328: stk[stktop].pl = 1;
329: stk[stktop].parm = n;
330: stk[stktop].lno = lineno;
331: }
332: }
333: }
334: }
335: /*
336: * We've hit the end and look at all this stuff that hasn't been
337: * matched yet! Complain, complain.
338: */
339: for (i=stktop; i>=0; i--) {
340: complain(i);
341: }
342: }
343:
344: complain(i)
345: {
346: pe(stk[i].lno);
347: printf("Unmatched ");
348: prop(i);
349: printf("\n");
350: }
351:
352: prop(i)
353: {
354: if (stk[i].pl == 0)
355: printf(".%s", br[stk[i].opno].opbr);
356: else switch(stk[i].opno) {
357: case SZ:
358: printf("\\s%c%d", stk[i].pl, stk[i].parm);
359: break;
360: case FT:
361: printf("\\f%c", stk[i].parm);
362: break;
363: default:
364: printf("Bug: stk[%d].opno = %d = .%s, .%s",
365: i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
366: }
367: }
368:
369: chkcmd(line, mac)
370: char *line;
371: char *mac;
372: {
373: register int i, n;
374:
375: /*
376: * Check to see if it matches top of stack.
377: */
378: if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
379: stktop--; /* OK. Pop & forget */
380: else {
381: /* No. Maybe it's an opener */
382: for (i=0; br[i].opbr; i++) {
383: if (eq(mac, br[i].opbr)) {
384: /* Found. Push it. */
385: stktop++;
386: stk[stktop].opno = i;
387: stk[stktop].pl = 0;
388: stk[stktop].parm = 0;
389: stk[stktop].lno = lineno;
390: break;
391: }
392: /*
393: * Maybe it's an unmatched closer.
394: * NOTE: this depends on the fact
395: * that none of the closers can be
396: * openers too.
397: */
398: if (eq(mac, br[i].clbr)) {
399: nomatch(mac);
400: break;
401: }
402: }
403: }
404: }
405:
406: nomatch(mac)
407: char *mac;
408: {
409: register int i, j;
410:
411: /*
412: * Look for a match further down on stack
413: * If we find one, it suggests that the stuff in
414: * between is supposed to match itself.
415: */
416: for (j=stktop; j>=0; j--)
417: if (eq(mac,br[stk[j].opno].clbr)) {
418: /* Found. Make a good diagnostic. */
419: if (j == stktop-2) {
420: /*
421: * Check for special case \fx..\fR and don't
422: * complain.
423: */
424: if (stk[j+1].opno==FT && stk[j+1].parm!='R'
425: && stk[j+2].opno==FT && stk[j+2].parm=='R') {
426: stktop = j -1;
427: return;
428: }
429: /*
430: * We have two unmatched frobs. Chances are
431: * they were intended to match, so we mention
432: * them together.
433: */
434: pe(stk[j+1].lno);
435: prop(j+1);
436: printf(" does not match %d: ", stk[j+2].lno);
437: prop(j+2);
438: printf("\n");
439: } else for (i=j+1; i <= stktop; i++) {
440: complain(i);
441: }
442: stktop = j-1;
443: return;
444: }
445: /* Didn't find one. Throw this away. */
446: pe(lineno);
447: printf("Unmatched .%s\n", mac);
448: }
449:
450: /* eq: are two strings equal? */
451: eq(s1, s2)
452: char *s1, *s2;
453: {
454: return (strcmp(s1, s2) == 0);
455: }
456:
457: /* print the first part of an error message, given the line number */
458: pe(lineno)
459: int lineno;
460: {
461: if (nfiles > 1)
462: printf("%s: ", cfilename);
463: printf("%d: ", lineno);
464: }
465:
466: checkknown(mac)
467: char *mac;
468: {
469:
470: if (eq(mac, "."))
471: return;
472: if (binsrch(mac) >= 0)
473: return;
474: if (mac[0] == '\\' && mac[1] == '"') /* comments */
475: return;
476:
477: pe(lineno);
478: printf("Unknown command: .%s\n", mac);
479: }
480:
481: /*
482: * We have a .de xx line in "line". Add xx to the list of known commands.
483: */
484: addcmd(line)
485: char *line;
486: {
487: char *mac;
488:
489: /* grab the macro being defined */
490: mac = line+4;
491: while (isspace(*mac))
492: mac++;
493: if (*mac == 0) {
494: pe(lineno);
495: printf("illegal define: %s\n", line);
496: return;
497: }
498: mac[2] = 0;
499: if (isspace(mac[1]) || mac[1] == '\\')
500: mac[1] = 0;
501: if (ncmds >= MAXCMDS) {
502: printf("Only %d known commands allowed\n", MAXCMDS);
503: exit(1);
504: }
505: addmac(mac);
506: }
507:
508: /*
509: * Add mac to the list. We should really have some kind of tree
510: * structure here but this is a quick-and-dirty job and I just don't
511: * have time to mess with it. (I wonder if this will come back to haunt
512: * me someday?) Anyway, I claim that .de is fairly rare in user
513: * nroff programs, and the register loop below is pretty fast.
514: */
515: addmac(mac)
516: char *mac;
517: {
518: register char **src, **dest, **loc;
519:
520: if (binsrch(mac) >= 0){ /* it's OK to redefine something */
521: #ifdef DEBUG
522: printf("binsrch(%s) -> already in table\n", mac);
523: #endif DEBUG
524: return;
525: }
526: /* binsrch sets slot as a side effect */
527: #ifdef DEBUG
528: printf("binsrch(%s) -> %d\n", mac, slot);
529: #endif
530: loc = &knowncmds[slot];
531: src = &knowncmds[ncmds-1];
532: dest = src+1;
533: while (dest > loc)
534: *dest-- = *src--;
535: *loc = malloc(3);
536: strcpy(*loc, mac);
537: ncmds++;
538: #ifdef DEBUG
539: printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
540: #endif
541: }
542:
543: /*
544: * Do a binary search in knowncmds for mac.
545: * If found, return the index. If not, return -1.
546: */
547: binsrch(mac)
548: char *mac;
549: {
550: register char *p; /* pointer to current cmd in list */
551: register int d; /* difference if any */
552: register int mid; /* mid point in binary search */
553: register int top, bot; /* boundaries of bin search, inclusive */
554:
555: top = ncmds-1;
556: bot = 0;
557: while (top >= bot) {
558: mid = (top+bot)/2;
559: p = knowncmds[mid];
560: d = p[0] - mac[0];
561: if (d == 0)
562: d = p[1] - mac[1];
563: if (d == 0)
564: return mid;
565: if (d < 0)
566: bot = mid + 1;
567: else
568: top = mid - 1;
569: }
570: slot = bot; /* place it would have gone */
571: return -1;
572: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.