|
|
1.1 root 1: #ifndef lint
2: static char sccsid[] = "@(#)checknr.c 4.6 (Berkeley) 9/24/83";
3: #endif
4: /*
5: * checknr: check an nroff/troff input file for matching macro calls.
6: * we also attempt to match size and font changes, but only the embedded
7: * kind. These must end in \s0 and \fP resp. Maybe more sophistication
8: * later but for now think of these restrictions as contributions to
9: * structured typesetting.
10: */
11: #include <stdio.h>
12: #include <ctype.h>
13:
14: #define MAXSTK 100 /* Stack size */
15: #define MAXBR 100 /* Max number of bracket pairs known */
16: #define MAXCMDS 500 /* Max number of commands known */
17:
18: /*
19: * The stack on which we remember what we've seen so far.
20: */
21: struct stkstr {
22: int opno; /* number of opening bracket */
23: int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
24: int parm; /* parm to size, font, etc */
25: int lno; /* line number the thing came in in */
26: } stk[MAXSTK];
27: int stktop;
28:
29: /*
30: * The kinds of opening and closing brackets.
31: */
32: struct brstr {
33: char *opbr;
34: char *clbr;
35: } br[MAXBR] = {
36: /* A few bare bones troff commands */
37: #define SZ 0
38: "sz", "sz", /* also \s */
39: #define FT 1
40: "ft", "ft", /* also \f */
41: /* the -mm package */
42: "AL", "LE",
43: "AS", "AE",
44: "BL", "LE",
45: "BS", "BE",
46: "DF", "DE",
47: "DL", "LE",
48: "DS", "DE",
49: "FS", "FE",
50: "ML", "LE",
51: "NS", "NE",
52: "RL", "LE",
53: "VL", "LE",
54: /* the -ms package */
55: "AB", "AE",
56: "BD", "DE",
57: "CD", "DE",
58: "DS", "DE",
59: "FS", "FE",
60: "ID", "DE",
61: "KF", "KE",
62: "KS", "KE",
63: "LD", "DE",
64: "LG", "NL",
65: "QS", "QE",
66: "RS", "RE",
67: "SM", "NL",
68: "XA", "XE",
69: "XS", "XE",
70: /* The -me package */
71: "(b", ")b",
72: "(c", ")c",
73: "(d", ")d",
74: "(f", ")f",
75: "(l", ")l",
76: "(q", ")q",
77: "(x", ")x",
78: "(z", ")z",
79: /* Things needed by preprocessors */
80: "EQ", "EN",
81: "TS", "TE",
82: /* Refer */
83: "[", "]",
84: 0, 0
85: };
86:
87: /*
88: * All commands known to nroff, plus macro packages.
89: * Used so we can complain about unrecognized commands.
90: */
91: char *knowncmds[MAXCMDS] = {
92: "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
93: "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
94: "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
95: "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
96: "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
97: "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
98: "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
99: "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
100: "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
101: "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
102: "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
103: "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
104: "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
105: "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
106: "SG", "SH", "SK", "SM", "SP", "SY", "TA", "TB", "TC", "TD", "TE", "TH",
107: "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL", "WC",
108: "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0", "[1",
109: "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", "][",
110: "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", "bi",
111: "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct",
112: "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef",
113: "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp",
114: "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", "ie",
115: "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", "lo",
116: "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", "n2",
117: "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", "of",
118: "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", "q",
119: "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", "sc",
120: "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", "ti",
121: "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", "yr",
122: 0
123: };
124:
125: int lineno; /* current line number in input file */
126: char line[256]; /* the current line */
127: char *cfilename; /* name of current file */
128: int nfiles; /* number of files to process */
129: int fflag; /* -f: ignore \f */
130: int sflag; /* -s: ignore \s */
131: int ncmds; /* size of knowncmds */
132: int slot; /* slot in knowncmds found by binsrch */
133:
134: char *malloc();
135:
136: main(argc, argv)
137: int argc;
138: char **argv;
139: {
140: FILE *f;
141: int i;
142: char *cp;
143: char b1[4];
144:
145: /* Figure out how many known commands there are */
146: while (knowncmds[ncmds])
147: ncmds++;
148: while (argc > 1 && argv[1][0] == '-') {
149: switch(argv[1][1]) {
150:
151: /* -a: add pairs of macros */
152: case 'a':
153: i = strlen(argv[1]) - 2;
154: if (i % 6 != 0)
155: usage();
156: /* look for empty macro slots */
157: for (i=0; br[i].opbr; i++)
158: ;
159: for (cp=argv[1]+3; cp[-1]; cp += 6) {
160: br[i].opbr = malloc(3);
161: strncpy(br[i].opbr, cp, 2);
162: br[i].clbr = malloc(3);
163: strncpy(br[i].clbr, cp+3, 2);
164: addmac(br[i].opbr); /* knows pairs are also known cmds */
165: addmac(br[i].clbr);
166: i++;
167: }
168: break;
169:
170: /* -c: add known commands */
171: case 'c':
172: i = strlen(argv[1]) - 2;
173: if (i % 3 != 0)
174: usage();
175: for (cp=argv[1]+3; cp[-1]; cp += 3) {
176: if (cp[2] && cp[2] != '.')
177: usage();
178: strncpy(b1, cp, 2);
179: addmac(b1);
180: }
181: break;
182:
183: /* -f: ignore font changes */
184: case 'f':
185: fflag = 1;
186: break;
187:
188: /* -s: ignore size changes */
189: case 's':
190: sflag = 1;
191: break;
192: default:
193: usage();
194: }
195: argc--; argv++;
196: }
197:
198: nfiles = argc - 1;
199:
200: if (nfiles > 0) {
201: for (i=1; i<argc; i++) {
202: cfilename = argv[i];
203: f = fopen(cfilename, "r");
204: if (f == NULL)
205: perror(cfilename);
206: else
207: process(f);
208: }
209: } else {
210: cfilename = "stdin";
211: process(stdin);
212: }
213: exit(0);
214: }
215:
216: usage()
217: {
218: printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
219: exit(1);
220: }
221:
222: process(f)
223: FILE *f;
224: {
225: register int i, n;
226: char mac[5]; /* The current macro or nroff command */
227: int pl;
228:
229: stktop = -1;
230: for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
231: if (line[0] == '.') {
232: /*
233: * find and isolate the macro/command name.
234: */
235: strncpy(mac, line+1, 4);
236: if (isspace(mac[0])) {
237: pe(lineno);
238: printf("Empty command\n");
239: } else if (isspace(mac[1])) {
240: mac[1] = 0;
241: } else if (isspace(mac[2])) {
242: mac[2] = 0;
243: } else if (mac[0] != '\\' || mac[1] != '\"') {
244: pe(lineno);
245: printf("Command too long\n");
246: }
247:
248: /*
249: * Is it a known command?
250: */
251: checkknown(mac);
252:
253: /*
254: * Should we add it?
255: */
256: if (eq(mac, "de"))
257: addcmd(line);
258:
259: chkcmd(line, mac);
260: }
261:
262: /*
263: * At this point we process the line looking
264: * for \s and \f.
265: */
266: for (i=0; line[i]; i++)
267: if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
268: if (!sflag && line[++i]=='s') {
269: pl = line[++i];
270: if (isdigit(pl)) {
271: n = pl - '0';
272: pl = ' ';
273: } else
274: n = 0;
275: while (isdigit(line[++i]))
276: n = 10 * n + line[i] - '0';
277: i--;
278: if (n == 0) {
279: if (stk[stktop].opno == SZ) {
280: stktop--;
281: } else {
282: pe(lineno);
283: printf("unmatched \\s0\n");
284: }
285: } else {
286: stk[++stktop].opno = SZ;
287: stk[stktop].pl = pl;
288: stk[stktop].parm = n;
289: stk[stktop].lno = lineno;
290: }
291: } else if (!fflag && line[i]=='f') {
292: n = line[++i];
293: if (n == 'P') {
294: if (stk[stktop].opno == FT) {
295: stktop--;
296: } else {
297: pe(lineno);
298: printf("unmatched \\fP\n");
299: }
300: } else {
301: stk[++stktop].opno = FT;
302: stk[stktop].pl = 1;
303: stk[stktop].parm = n;
304: stk[stktop].lno = lineno;
305: }
306: }
307: }
308: }
309: /*
310: * We've hit the end and look at all this stuff that hasn't been
311: * matched yet! Complain, complain.
312: */
313: for (i=stktop; i>=0; i--) {
314: complain(i);
315: }
316: }
317:
318: complain(i)
319: {
320: pe(stk[i].lno);
321: printf("Unmatched ");
322: prop(i);
323: printf("\n");
324: }
325:
326: prop(i)
327: {
328: if (stk[i].pl == 0)
329: printf(".%s", br[stk[i].opno].opbr);
330: else switch(stk[i].opno) {
331: case SZ:
332: printf("\\s%c%d", stk[i].pl, stk[i].parm);
333: break;
334: case FT:
335: printf("\\f%c", stk[i].parm);
336: break;
337: default:
338: printf("Bug: stk[%d].opno = %d = .%s, .%s",
339: i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
340: }
341: }
342:
343: chkcmd(line, mac)
344: char *line;
345: char *mac;
346: {
347: register int i, n;
348:
349: /*
350: * Check to see if it matches top of stack.
351: */
352: if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
353: stktop--; /* OK. Pop & forget */
354: else {
355: /* No. Maybe it's an opener */
356: for (i=0; br[i].opbr; i++) {
357: if (eq(mac, br[i].opbr)) {
358: /* Found. Push it. */
359: stktop++;
360: stk[stktop].opno = i;
361: stk[stktop].pl = 0;
362: stk[stktop].parm = 0;
363: stk[stktop].lno = lineno;
364: break;
365: }
366: /*
367: * Maybe it's an unmatched closer.
368: * NOTE: this depends on the fact
369: * that none of the closers can be
370: * openers too.
371: */
372: if (eq(mac, br[i].clbr)) {
373: nomatch(mac);
374: break;
375: }
376: }
377: }
378: }
379:
380: nomatch(mac)
381: char *mac;
382: {
383: register int i, j;
384:
385: /*
386: * Look for a match further down on stack
387: * If we find one, it suggests that the stuff in
388: * between is supposed to match itself.
389: */
390: for (j=stktop; j>=0; j--)
391: if (eq(mac,br[stk[j].opno].clbr)) {
392: /* Found. Make a good diagnostic. */
393: if (j == stktop-2) {
394: /*
395: * Check for special case \fx..\fR and don't
396: * complain.
397: */
398: if (stk[j+1].opno==FT && stk[j+1].parm!='R'
399: && stk[j+2].opno==FT && stk[j+2].parm=='R') {
400: stktop = j -1;
401: return;
402: }
403: /*
404: * We have two unmatched frobs. Chances are
405: * they were intended to match, so we mention
406: * them together.
407: */
408: pe(stk[j+1].lno);
409: prop(j+1);
410: printf(" does not match %d: ", stk[j+2].lno);
411: prop(j+2);
412: printf("\n");
413: } else for (i=j+1; i <= stktop; i++) {
414: complain(i);
415: }
416: stktop = j-1;
417: return;
418: }
419: /* Didn't find one. Throw this away. */
420: pe(lineno);
421: printf("Unmatched .%s\n", mac);
422: }
423:
424: /* eq: are two strings equal? */
425: eq(s1, s2)
426: char *s1, *s2;
427: {
428: return (strcmp(s1, s2) == 0);
429: }
430:
431: /* print the first part of an error message, given the line number */
432: pe(lineno)
433: int lineno;
434: {
435: if (nfiles > 1)
436: printf("%s: ", cfilename);
437: printf("%d: ", lineno);
438: }
439:
440: checkknown(mac)
441: char *mac;
442: {
443:
444: if (eq(mac, "."))
445: return;
446: if (binsrch(mac) >= 0)
447: return;
448: if (mac[0] == '\\' && mac[1] == '"') /* comments */
449: return;
450:
451: pe(lineno);
452: printf("Unknown command: .%s\n", mac);
453: }
454:
455: /*
456: * We have a .de xx line in "line". Add xx to the list of known commands.
457: */
458: addcmd(line)
459: char *line;
460: {
461: char *mac;
462:
463: /* grab the macro being defined */
464: mac = line+4;
465: while (isspace(*mac))
466: mac++;
467: if (*mac == 0) {
468: pe(lineno);
469: printf("illegal define: %s\n", line);
470: return;
471: }
472: mac[2] = 0;
473: if (isspace(mac[1]) || mac[1] == '\\')
474: mac[1] = 0;
475: if (ncmds >= MAXCMDS) {
476: printf("Only %d known commands allowed\n", MAXCMDS);
477: exit(1);
478: }
479: addmac(mac);
480: }
481:
482: /*
483: * Add mac to the list. We should really have some kind of tree
484: * structure here but this is a quick-and-dirty job and I just don't
485: * have time to mess with it. (I wonder if this will come back to haunt
486: * me someday?) Anyway, I claim that .de is fairly rare in user
487: * nroff programs, and the register loop below is pretty fast.
488: */
489: addmac(mac)
490: char *mac;
491: {
492: register char **src, **dest, **loc;
493:
494: binsrch(mac); /* it's OK to redefine something */
495: /* binsrch sets slot as a side effect */
496: #ifdef DEBUG
497: printf("binsrch(%s) -> %d\n", mac, slot);
498: #endif
499: loc = &knowncmds[slot];
500: src = &knowncmds[ncmds-1];
501: dest = src+1;
502: while (dest > loc)
503: *dest-- = *src--;
504: *loc = malloc(3);
505: strcpy(*loc, mac);
506: ncmds++;
507: #ifdef DEBUG
508: printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
509: #endif
510: }
511:
512: /*
513: * Do a binary search in knowncmds for mac.
514: * If found, return the index. If not, return -1.
515: */
516: binsrch(mac)
517: char *mac;
518: {
519: register char *p; /* pointer to current cmd in list */
520: register int d; /* difference if any */
521: register int mid; /* mid point in binary search */
522: register int top, bot; /* boundaries of bin search, inclusive */
523:
524: top = ncmds-1;
525: bot = 0;
526: while (top >= bot) {
527: mid = (top+bot)/2;
528: p = knowncmds[mid];
529: d = p[0] - mac[0];
530: if (d == 0)
531: d = p[1] - mac[1];
532: if (d == 0)
533: return mid;
534: if (d < 0)
535: bot = mid + 1;
536: else
537: top = mid - 1;
538: }
539: slot = bot; /* place it would have gone */
540: return -1;
541: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.