|
|
1.1 root 1: /*
2: * Copyright (c) 1980 Regents of the University of California.
3: * All rights reserved. The Berkeley software License Agreement
4: * specifies the terms and conditions for redistribution.
5: */
6:
7: #ifndef lint
8: char copyright[] =
9: "@(#) Copyright (c) 1980 Regents of the University of California.\n\
10: All rights reserved.\n";
11: #endif not lint
12:
13: #ifndef lint
14: static char sccsid[] = "@(#)checknr.c 5.2 (Berkeley) 12/5/85";
15: #endif not lint
16:
17: /*
18: * checknr: check an nroff/troff input file for matching macro calls.
19: * we also attempt to match size and font changes, but only the embedded
20: * kind. These must end in \s0 and \fP resp. Maybe more sophistication
21: * later but for now think of these restrictions as contributions to
22: * structured typesetting.
23: */
24: #include <stdio.h>
25: #include <ctype.h>
26:
27: #define MAXSTK 100 /* Stack size */
28: #define MAXBR 100 /* Max number of bracket pairs known */
29: #define MAXCMDS 500 /* Max number of commands known */
30:
31: /*
32: * The stack on which we remember what we've seen so far.
33: */
34: struct stkstr {
35: int opno; /* number of opening bracket */
36: int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
37: int parm; /* parm to size, font, etc */
38: int lno; /* line number the thing came in in */
39: } stk[MAXSTK];
40: int stktop;
41:
42: /*
43: * The kinds of opening and closing brackets.
44: */
45: struct brstr {
46: char *opbr;
47: char *clbr;
48: } br[MAXBR] = {
49: /* A few bare bones troff commands */
50: #define SZ 0
51: "sz", "sz", /* also \s */
52: #define FT 1
53: "ft", "ft", /* also \f */
54: /* the -mm package */
55: "AL", "LE",
56: "AS", "AE",
57: "BL", "LE",
58: "BS", "BE",
59: "DF", "DE",
60: "DL", "LE",
61: "DS", "DE",
62: "FS", "FE",
63: "ML", "LE",
64: "NS", "NE",
65: "RL", "LE",
66: "VL", "LE",
67: /* the -ms package */
68: "AB", "AE",
69: "BD", "DE",
70: "CD", "DE",
71: "DS", "DE",
72: "FS", "FE",
73: "ID", "DE",
74: "KF", "KE",
75: "KS", "KE",
76: "LD", "DE",
77: "LG", "NL",
78: "QS", "QE",
79: "RS", "RE",
80: "SM", "NL",
81: "XA", "XE",
82: "XS", "XE",
83: /* The -me package */
84: "(b", ")b",
85: "(c", ")c",
86: "(d", ")d",
87: "(f", ")f",
88: "(l", ")l",
89: "(q", ")q",
90: "(x", ")x",
91: "(z", ")z",
92: /* Things needed by preprocessors */
93: "EQ", "EN",
94: "TS", "TE",
95: /* Refer */
96: "[", "]",
97: 0, 0
98: };
99:
100: /*
101: * All commands known to nroff, plus macro packages.
102: * Used so we can complain about unrecognized commands.
103: */
104: char *knowncmds[MAXCMDS] = {
105: "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
106: "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
107: "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
108: "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
109: "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
110: "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
111: "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
112: "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
113: "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
114: "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
115: "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
116: "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
117: "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
118: "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
119: "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
120: "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
121: "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
122: "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
123: "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
124: "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
125: "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
126: "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
127: "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
128: "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
129: "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
130: "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
131: "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
132: "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
133: "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
134: "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
135: "yr", 0
136: };
137:
138: int lineno; /* current line number in input file */
139: char line[256]; /* the current line */
140: char *cfilename; /* name of current file */
141: int nfiles; /* number of files to process */
142: int fflag; /* -f: ignore \f */
143: int sflag; /* -s: ignore \s */
144: int ncmds; /* size of knowncmds */
145: int slot; /* slot in knowncmds found by binsrch */
146:
147: char *malloc();
148:
149: main(argc, argv)
150: int argc;
151: char **argv;
152: {
153: FILE *f;
154: int i;
155: char *cp;
156: char b1[4];
157:
158: /* Figure out how many known commands there are */
159: while (knowncmds[ncmds])
160: ncmds++;
161: while (argc > 1 && argv[1][0] == '-') {
162: switch(argv[1][1]) {
163:
164: /* -a: add pairs of macros */
165: case 'a':
166: i = strlen(argv[1]) - 2;
167: if (i % 6 != 0)
168: usage();
169: /* look for empty macro slots */
170: for (i=0; br[i].opbr; i++)
171: ;
172: for (cp=argv[1]+3; cp[-1]; cp += 6) {
173: br[i].opbr = malloc(3);
174: strncpy(br[i].opbr, cp, 2);
175: br[i].clbr = malloc(3);
176: strncpy(br[i].clbr, cp+3, 2);
177: addmac(br[i].opbr); /* knows pairs are also known cmds */
178: addmac(br[i].clbr);
179: i++;
180: }
181: break;
182:
183: /* -c: add known commands */
184: case 'c':
185: i = strlen(argv[1]) - 2;
186: if (i % 3 != 0)
187: usage();
188: for (cp=argv[1]+3; cp[-1]; cp += 3) {
189: if (cp[2] && cp[2] != '.')
190: usage();
191: strncpy(b1, cp, 2);
192: addmac(b1);
193: }
194: break;
195:
196: /* -f: ignore font changes */
197: case 'f':
198: fflag = 1;
199: break;
200:
201: /* -s: ignore size changes */
202: case 's':
203: sflag = 1;
204: break;
205: default:
206: usage();
207: }
208: argc--; argv++;
209: }
210:
211: nfiles = argc - 1;
212:
213: if (nfiles > 0) {
214: for (i=1; i<argc; i++) {
215: cfilename = argv[i];
216: f = fopen(cfilename, "r");
217: if (f == NULL)
218: perror(cfilename);
219: else
220: process(f);
221: }
222: } else {
223: cfilename = "stdin";
224: process(stdin);
225: }
226: exit(0);
227: }
228:
229: usage()
230: {
231: printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
232: exit(1);
233: }
234:
235: process(f)
236: FILE *f;
237: {
238: register int i, n;
239: char mac[5]; /* The current macro or nroff command */
240: int pl;
241:
242: stktop = -1;
243: for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
244: if (line[0] == '.') {
245: /*
246: * find and isolate the macro/command name.
247: */
248: strncpy(mac, line+1, 4);
249: if (isspace(mac[0])) {
250: pe(lineno);
251: printf("Empty command\n");
252: } else if (isspace(mac[1])) {
253: mac[1] = 0;
254: } else if (isspace(mac[2])) {
255: mac[2] = 0;
256: } else if (mac[0] != '\\' || mac[1] != '\"') {
257: pe(lineno);
258: printf("Command too long\n");
259: }
260:
261: /*
262: * Is it a known command?
263: */
264: checkknown(mac);
265:
266: /*
267: * Should we add it?
268: */
269: if (eq(mac, "de"))
270: addcmd(line);
271:
272: chkcmd(line, mac);
273: }
274:
275: /*
276: * At this point we process the line looking
277: * for \s and \f.
278: */
279: for (i=0; line[i]; i++)
280: if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
281: if (!sflag && line[++i]=='s') {
282: pl = line[++i];
283: if (isdigit(pl)) {
284: n = pl - '0';
285: pl = ' ';
286: } else
287: n = 0;
288: while (isdigit(line[++i]))
289: n = 10 * n + line[i] - '0';
290: i--;
291: if (n == 0) {
292: if (stk[stktop].opno == SZ) {
293: stktop--;
294: } else {
295: pe(lineno);
296: printf("unmatched \\s0\n");
297: }
298: } else {
299: stk[++stktop].opno = SZ;
300: stk[stktop].pl = pl;
301: stk[stktop].parm = n;
302: stk[stktop].lno = lineno;
303: }
304: } else if (!fflag && line[i]=='f') {
305: n = line[++i];
306: if (n == 'P') {
307: if (stk[stktop].opno == FT) {
308: stktop--;
309: } else {
310: pe(lineno);
311: printf("unmatched \\fP\n");
312: }
313: } else {
314: stk[++stktop].opno = FT;
315: stk[stktop].pl = 1;
316: stk[stktop].parm = n;
317: stk[stktop].lno = lineno;
318: }
319: }
320: }
321: }
322: /*
323: * We've hit the end and look at all this stuff that hasn't been
324: * matched yet! Complain, complain.
325: */
326: for (i=stktop; i>=0; i--) {
327: complain(i);
328: }
329: }
330:
331: complain(i)
332: {
333: pe(stk[i].lno);
334: printf("Unmatched ");
335: prop(i);
336: printf("\n");
337: }
338:
339: prop(i)
340: {
341: if (stk[i].pl == 0)
342: printf(".%s", br[stk[i].opno].opbr);
343: else switch(stk[i].opno) {
344: case SZ:
345: printf("\\s%c%d", stk[i].pl, stk[i].parm);
346: break;
347: case FT:
348: printf("\\f%c", stk[i].parm);
349: break;
350: default:
351: printf("Bug: stk[%d].opno = %d = .%s, .%s",
352: i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
353: }
354: }
355:
356: chkcmd(line, mac)
357: char *line;
358: char *mac;
359: {
360: register int i, n;
361:
362: /*
363: * Check to see if it matches top of stack.
364: */
365: if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
366: stktop--; /* OK. Pop & forget */
367: else {
368: /* No. Maybe it's an opener */
369: for (i=0; br[i].opbr; i++) {
370: if (eq(mac, br[i].opbr)) {
371: /* Found. Push it. */
372: stktop++;
373: stk[stktop].opno = i;
374: stk[stktop].pl = 0;
375: stk[stktop].parm = 0;
376: stk[stktop].lno = lineno;
377: break;
378: }
379: /*
380: * Maybe it's an unmatched closer.
381: * NOTE: this depends on the fact
382: * that none of the closers can be
383: * openers too.
384: */
385: if (eq(mac, br[i].clbr)) {
386: nomatch(mac);
387: break;
388: }
389: }
390: }
391: }
392:
393: nomatch(mac)
394: char *mac;
395: {
396: register int i, j;
397:
398: /*
399: * Look for a match further down on stack
400: * If we find one, it suggests that the stuff in
401: * between is supposed to match itself.
402: */
403: for (j=stktop; j>=0; j--)
404: if (eq(mac,br[stk[j].opno].clbr)) {
405: /* Found. Make a good diagnostic. */
406: if (j == stktop-2) {
407: /*
408: * Check for special case \fx..\fR and don't
409: * complain.
410: */
411: if (stk[j+1].opno==FT && stk[j+1].parm!='R'
412: && stk[j+2].opno==FT && stk[j+2].parm=='R') {
413: stktop = j -1;
414: return;
415: }
416: /*
417: * We have two unmatched frobs. Chances are
418: * they were intended to match, so we mention
419: * them together.
420: */
421: pe(stk[j+1].lno);
422: prop(j+1);
423: printf(" does not match %d: ", stk[j+2].lno);
424: prop(j+2);
425: printf("\n");
426: } else for (i=j+1; i <= stktop; i++) {
427: complain(i);
428: }
429: stktop = j-1;
430: return;
431: }
432: /* Didn't find one. Throw this away. */
433: pe(lineno);
434: printf("Unmatched .%s\n", mac);
435: }
436:
437: /* eq: are two strings equal? */
438: eq(s1, s2)
439: char *s1, *s2;
440: {
441: return (strcmp(s1, s2) == 0);
442: }
443:
444: /* print the first part of an error message, given the line number */
445: pe(lineno)
446: int lineno;
447: {
448: if (nfiles > 1)
449: printf("%s: ", cfilename);
450: printf("%d: ", lineno);
451: }
452:
453: checkknown(mac)
454: char *mac;
455: {
456:
457: if (eq(mac, "."))
458: return;
459: if (binsrch(mac) >= 0)
460: return;
461: if (mac[0] == '\\' && mac[1] == '"') /* comments */
462: return;
463:
464: pe(lineno);
465: printf("Unknown command: .%s\n", mac);
466: }
467:
468: /*
469: * We have a .de xx line in "line". Add xx to the list of known commands.
470: */
471: addcmd(line)
472: char *line;
473: {
474: char *mac;
475:
476: /* grab the macro being defined */
477: mac = line+4;
478: while (isspace(*mac))
479: mac++;
480: if (*mac == 0) {
481: pe(lineno);
482: printf("illegal define: %s\n", line);
483: return;
484: }
485: mac[2] = 0;
486: if (isspace(mac[1]) || mac[1] == '\\')
487: mac[1] = 0;
488: if (ncmds >= MAXCMDS) {
489: printf("Only %d known commands allowed\n", MAXCMDS);
490: exit(1);
491: }
492: addmac(mac);
493: }
494:
495: /*
496: * Add mac to the list. We should really have some kind of tree
497: * structure here but this is a quick-and-dirty job and I just don't
498: * have time to mess with it. (I wonder if this will come back to haunt
499: * me someday?) Anyway, I claim that .de is fairly rare in user
500: * nroff programs, and the register loop below is pretty fast.
501: */
502: addmac(mac)
503: char *mac;
504: {
505: register char **src, **dest, **loc;
506:
507: if (binsrch(mac) >= 0){ /* it's OK to redefine something */
508: #ifdef DEBUG
509: printf("binsrch(%s) -> already in table\n", mac);
510: #endif DEBUG
511: return;
512: }
513: /* binsrch sets slot as a side effect */
514: #ifdef DEBUG
515: printf("binsrch(%s) -> %d\n", mac, slot);
516: #endif
517: loc = &knowncmds[slot];
518: src = &knowncmds[ncmds-1];
519: dest = src+1;
520: while (dest > loc)
521: *dest-- = *src--;
522: *loc = malloc(3);
523: strcpy(*loc, mac);
524: ncmds++;
525: #ifdef DEBUG
526: printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
527: #endif
528: }
529:
530: /*
531: * Do a binary search in knowncmds for mac.
532: * If found, return the index. If not, return -1.
533: */
534: binsrch(mac)
535: char *mac;
536: {
537: register char *p; /* pointer to current cmd in list */
538: register int d; /* difference if any */
539: register int mid; /* mid point in binary search */
540: register int top, bot; /* boundaries of bin search, inclusive */
541:
542: top = ncmds-1;
543: bot = 0;
544: while (top >= bot) {
545: mid = (top+bot)/2;
546: p = knowncmds[mid];
547: d = p[0] - mac[0];
548: if (d == 0)
549: d = p[1] - mac[1];
550: if (d == 0)
551: return mid;
552: if (d < 0)
553: bot = mid + 1;
554: else
555: top = mid - 1;
556: }
557: slot = bot; /* place it would have gone */
558: return -1;
559: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.