|
|
1.1 root 1: static char *sccsid = "@(#)checknr.c 4.2 (Berkeley) 10/12/80";
2: /*
3: * checknr: check an nroff/troff input file for matching macro calls.
4: * we also attempt to match size and font changes, but only the embedded
5: * kind. These must end in \s0 and \fP resp. Maybe more sophistication
6: * later but for now think of these restrictions as contributions to
7: * structured typesetting.
8: */
9: #include <stdio.h>
10: #include <ctype.h>
11:
12: #define MAXSTK 100 /* Stack size */
13: #define MAXBR 100 /* Max number of bracket pairs known */
14: #define MAXCMDS 500 /* Max number of commands known */
15:
16: /*
17: * The stack on which we remember what we've seen so far.
18: */
19: struct stkstr {
20: int opno; /* number of opening bracket */
21: int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
22: int parm; /* parm to size, font, etc */
23: int lno; /* line number the thing came in in */
24: } stk[MAXSTK];
25: int stktop;
26:
27: /*
28: * The kinds of opening and closing brackets.
29: */
30: struct brstr {
31: char *opbr;
32: char *clbr;
33: } br[MAXBR] = {
34: /* A few bare bones troff commands */
35: #define SZ 0
36: "sz", "sz", /* also \s */
37: #define FT 1
38: "ft", "ft", /* also \f */
39: /* the -ms package */
40: "AB", "AE",
41: "RS", "RE",
42: "LG", "NL",
43: "SM", "NL",
44: "FS", "FE",
45: "DS", "DE",
46: "CD", "DE",
47: "LD", "DE",
48: "ID", "DE",
49: "KS", "KE",
50: "KF", "KE",
51: "QS", "QE",
52: /* Things needed by preprocessors */
53: "TS", "TE",
54: "EQ", "EN",
55: /* The -me package */
56: "(l", ")l",
57: "(q", ")q",
58: "(b", ")b",
59: "(z", ")z",
60: "(c", ")c",
61: "(d", ")d",
62: "(f", ")f",
63: "(x", ")x",
64: 0, 0
65: };
66:
67: /*
68: * All commands known to nroff, plus ms and me.
69: * Used so we can complain about unrecognized commands.
70: */
71: char *knowncmds[MAXCMDS] = {
72: "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l",
73: "(q", "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q",
74: ")t", ")x", ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(",
75: "@)", "@C", "@D", "@F", "@I", "@M", "@c", "@e", "@f", "@h",
76: "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", "AB", "AE",
77: "AE", "AI", "AI", "AT", "AU", "AU", "AX", "B", "B1", "B2",
78: "BD", "BG", "BT", "BX", "C1", "C2", "CD", "CM", "CT", "D",
79: "DA", "DE", "DF", "DS", "EG", "EM", "EN", "EQ", "EQ", "FA",
80: "FE", "FJ", "FK", "FL", "FN", "FO", "FQ", "FS", "FV", "FX",
81: "HO", "I", "ID", "IE", "IH", "IM", "IP", "IZ", "KD", "KE",
82: "KF", "KQ", "KS", "LB", "LD", "LG", "LP", "MC", "ME", "MF",
83: "MH", "MR", "ND", "NH", "NL", "NP", "OK", "PP", "PT", "PY",
84: "QE", "QP", "QS", "R", "RA", "RC", "RE", "RP", "RQ", "RS",
85: "RT", "S0", "S2", "S3", "SG", "SH", "SM", "SY", "TA", "TC",
86: "TD", "TE", "TH", "TL", "TL", "TM", "TQ", "TR", "TS", "TS",
87: "TX", "UL", "US", "UX", "WH", "XD", "XF", "XK", "XP", "[-",
88: "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]-",
89: "]<", "]>", "][", "ab", "ac", "ad", "af", "am", "ar", "as",
90: "b", "ba", "bc", "bd", "bi", "bl", "bp", "bp", "br", "bx",
91: "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da",
92: "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef",
93: "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl",
94: "fo", "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw",
95: "hx", "hy", "i", "ie", "if", "ig", "in", "ip", "it", "ix",
96: "lc", "lg", "li", "ll", "ll", "ln", "lo", "lp", "ls", "lt",
97: "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", "n2", "na",
98: "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
99: "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po",
100: "po", "pp", "ps", "q", "r", "rb", "rd", "re", "re", "rm",
101: "rn", "ro", "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so",
102: "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", "ti", "tl",
103: "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "yr",
104: 0
105: };
106:
107: int lineno; /* current line number in input file */
108: char line[256]; /* the current line */
109: char *cfilename; /* name of current file */
110: int nfiles; /* number of files to process */
111: int fflag; /* -f: ignore \f */
112: int sflag; /* -s: ignore \s */
113: int ncmds; /* size of knowncmds */
114: int slot; /* slot in knowncmds found by binsrch */
115:
116: char *malloc();
117:
118: main(argc, argv)
119: int argc;
120: char **argv;
121: {
122: FILE *f;
123: int i;
124: char *cp;
125: char b1[4];
126:
127: if (argc <= 1)
128: usage();
129: /* Figure out how many known commands there are */
130: while (knowncmds[ncmds])
131: ncmds++;
132: while (argc > 1 && argv[1][0] == '-') {
133: switch(argv[1][1]) {
134:
135: /* -a: add pairs of macros */
136: case 'a':
137: i = strlen(argv[1]) - 2;
138: if (i % 6 != 0)
139: usage();
140: /* look for empty macro slots */
141: for (i=0; br[i].opbr; i++)
142: ;
143: for (cp=argv[1]+3; cp[-1]; cp += 6) {
144: br[i].opbr = malloc(3);
145: strncpy(br[i].opbr, cp, 2);
146: br[i].clbr = malloc(3);
147: strncpy(br[i].clbr, cp+3, 2);
148: addmac(br[i].opbr); /* knows pairs are also known cmds */
149: addmac(br[i].clbr);
150: i++;
151: }
152: break;
153:
154: /* -c: add known commands */
155: case 'c':
156: i = strlen(argv[1]) - 2;
157: if (i % 3 != 0)
158: usage();
159: for (cp=argv[1]+3; cp[-1]; cp += 3) {
160: if (cp[2] && cp[2] != '.')
161: usage();
162: strncpy(b1, cp, 2);
163: addmac(b1);
164: }
165: break;
166:
167: /* -f: ignore font changes */
168: case 'f':
169: fflag = 1;
170: break;
171:
172: /* -s: ignore size changes */
173: case 's':
174: sflag = 1;
175: break;
176: default:
177: usage();
178: }
179: argc--; argv++;
180: }
181:
182: nfiles = argc - 1;
183:
184: if (nfiles > 0) {
185: for (i=1; i<argc; i++) {
186: cfilename = argv[i];
187: f = fopen(cfilename, "r");
188: if (f == NULL)
189: perror(cfilename);
190: else
191: process(f);
192: }
193: } else {
194: cfilename = "stdin";
195: process(stdin);
196: }
197: exit(0);
198: }
199:
200: usage()
201: {
202: printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
203: exit(1);
204: }
205:
206: process(f)
207: FILE *f;
208: {
209: register int i, n;
210: char mac[5]; /* The current macro or nroff command */
211: int pl;
212:
213: stktop = -1;
214: for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
215: if (line[0] == '.') {
216: /*
217: * find and isolate the macro/command name.
218: */
219: strncpy(mac, line+1, 4);
220: if (isspace(mac[0])) {
221: pe(lineno);
222: printf("Empty command\n");
223: } else if (isspace(mac[1])) {
224: mac[1] = 0;
225: } else if (isspace(mac[2])) {
226: mac[2] = 0;
227: } else if (mac[2] != '\\' || mac[3] != '\"') {
228: pe(lineno);
229: printf("Command too long\n");
230: }
231:
232: /*
233: * Is it a known command?
234: */
235: checkknown(mac);
236:
237: /*
238: * Should we add it?
239: */
240: if (eq(mac, "de"))
241: addcmd(line);
242:
243: chkcmd(line, mac);
244: }
245:
246: /*
247: * At this point we process the line looking
248: * for \s and \f.
249: */
250: for (i=0; line[i]; i++)
251: if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
252: if (!sflag && line[++i]=='s') {
253: pl = line[++i];
254: if (isdigit(pl)) {
255: n = pl - '0';
256: pl = ' ';
257: } else
258: n = 0;
259: while (isdigit(line[++i]))
260: n = 10 * n + line[i] - '0';
261: i--;
262: if (n == 0) {
263: if (stk[stktop].opno == SZ) {
264: stktop--;
265: } else {
266: pe(lineno);
267: printf("unmatched \\s0\n");
268: }
269: } else {
270: stk[++stktop].opno = SZ;
271: stk[stktop].pl = pl;
272: stk[stktop].parm = n;
273: stk[stktop].lno = lineno;
274: }
275: } else if (!fflag && line[i]=='f') {
276: n = line[++i];
277: if (n == 'P') {
278: if (stk[stktop].opno == FT) {
279: stktop--;
280: } else {
281: pe(lineno);
282: printf("unmatched \\fP\n");
283: }
284: } else {
285: stk[++stktop].opno = FT;
286: stk[stktop].pl = 1;
287: stk[stktop].parm = n;
288: stk[stktop].lno = lineno;
289: }
290: }
291: }
292: }
293: /*
294: * We've hit the end and look at all this stuff that hasn't been
295: * matched yet! Complain, complain.
296: */
297: for (i=stktop; i>=0; i--) {
298: complain(i);
299: }
300: }
301:
302: complain(i)
303: {
304: pe(stk[i].lno);
305: printf("Unmatched ");
306: prop(i);
307: printf("\n");
308: }
309:
310: prop(i)
311: {
312: if (stk[i].pl == 0)
313: printf(".%s", br[stk[i].opno].opbr);
314: else switch(stk[i].opno) {
315: case SZ:
316: printf("\\s%c%d", stk[i].pl, stk[i].parm);
317: break;
318: case FT:
319: printf("\\f%c", stk[i].parm);
320: break;
321: default:
322: printf("Bug: stk[%d].opno = %d = .%s, .%s",
323: i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
324: }
325: }
326:
327: chkcmd(line, mac)
328: char *line;
329: char *mac;
330: {
331: register int i, n;
332:
333: /*
334: * Check to see if it matches top of stack.
335: */
336: if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
337: stktop--; /* OK. Pop & forget */
338: else {
339: /* No. Maybe it's an opener */
340: for (i=0; br[i].opbr; i++) {
341: if (eq(mac, br[i].opbr)) {
342: /* Found. Push it. */
343: stktop++;
344: stk[stktop].opno = i;
345: stk[stktop].pl = 0;
346: stk[stktop].parm = 0;
347: stk[stktop].lno = lineno;
348: break;
349: }
350: /*
351: * Maybe it's an unmatched closer.
352: * NOTE: this depends on the fact
353: * that none of the closers can be
354: * openers too.
355: */
356: if (eq(mac, br[i].clbr)) {
357: nomatch(mac);
358: break;
359: }
360: }
361: }
362: }
363:
364: nomatch(mac)
365: char *mac;
366: {
367: register int i, j;
368:
369: /*
370: * Look for a match further down on stack
371: * If we find one, it suggests that the stuff in
372: * between is supposed to match itself.
373: */
374: for (j=stktop; j>=0; j--)
375: if (eq(mac,br[stk[j].opno].clbr)) {
376: /* Found. Make a good diagnostic. */
377: if (j == stktop-2) {
378: /*
379: * Check for special case \fx..\fR and don't
380: * complain.
381: */
382: if (stk[j+1].opno==FT && stk[j+1].parm!='R'
383: && stk[j+2].opno==FT && stk[j+2].parm=='R') {
384: stktop = j -1;
385: return;
386: }
387: /*
388: * We have two unmatched frobs. Chances are
389: * they were intended to match, so we mention
390: * them together.
391: */
392: pe(stk[j+1].lno);
393: prop(j+1);
394: printf(" does not match %d: ", stk[j+2].lno);
395: prop(j+2);
396: printf("\n");
397: } else for (i=j+1; i <= stktop; i++) {
398: complain(i);
399: }
400: stktop = j-1;
401: return;
402: }
403: /* Didn't find one. Throw this away. */
404: pe(lineno);
405: printf("Unmatched .%s\n", mac);
406: }
407:
408: /* eq: are two strings equal? */
409: eq(s1, s2)
410: char *s1, *s2;
411: {
412: return (strcmp(s1, s2) == 0);
413: }
414:
415: /* print the first part of an error message, given the line number */
416: pe(lineno)
417: int lineno;
418: {
419: if (nfiles > 1)
420: printf("%s: ", cfilename);
421: printf("%d: ", lineno);
422: }
423:
424: checkknown(mac)
425: char *mac;
426: {
427:
428: if (eq(mac, "."))
429: return;
430: if (binsrch(mac) >= 0)
431: return;
432:
433: pe(lineno);
434: printf("Unknown command: .%s\n", mac);
435: }
436:
437: /*
438: * We have a .de xx line in "line". Add xx to the list of known commands.
439: */
440: addcmd(line)
441: char *line;
442: {
443: char *mac;
444:
445: /* grab the macro being defined */
446: mac = line+4;
447: while (isspace(*mac))
448: mac++;
449: if (*mac == 0) {
450: pe(lineno);
451: printf("illegal define: %s\n", line);
452: return;
453: }
454: mac[2] = 0;
455: if (isspace(mac[1]) || mac[1] == '\\')
456: mac[1] = 0;
457: if (ncmds >= MAXCMDS) {
458: printf("Only %d known commands allowed\n", MAXCMDS);
459: exit(1);
460: }
461: addmac(mac);
462: }
463:
464: /*
465: * Add mac to the list. We should really have some kind of tree
466: * structure here but this is a quick-and-dirty job and I just don't
467: * have time to mess with it. (I wonder if this will come back to haunt
468: * me someday?) Anyway, I claim that .de is fairly rare in user
469: * nroff programs, and the register loop below is pretty fast.
470: */
471: addmac(mac)
472: char *mac;
473: {
474: register char **src, **dest, **loc;
475:
476: binsrch(mac); /* it's OK to redefine something */
477: /* binsrch sets slot as a side effect */
478: #ifdef DEBUG
479: printf("binsrch(%s) -> %d\n", mac, slot);
480: #endif
481: loc = &knowncmds[slot];
482: src = &knowncmds[ncmds-1];
483: dest = src+1;
484: while (dest > loc)
485: *dest-- = *src--;
486: *loc = malloc(3);
487: strcpy(*loc, mac);
488: ncmds++;
489: #ifdef DEBUG
490: printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
491: #endif
492: }
493:
494: /*
495: * Do a binary search in knowncmds for mac.
496: * If found, return the index. If not, return -1.
497: */
498: binsrch(mac)
499: char *mac;
500: {
501: register char *p; /* pointer to current cmd in list */
502: register int d; /* difference if any */
503: register int mid; /* mid point in binary search */
504: register int top, bot; /* boundaries of bin search, inclusive */
505:
506: top = ncmds-1;
507: bot = 0;
508: while (top >= bot) {
509: mid = (top+bot)/2;
510: p = knowncmds[mid];
511: d = p[0] - mac[0];
512: if (d == 0)
513: d = p[1] - mac[1];
514: if (d == 0)
515: return mid;
516: if (d < 0)
517: bot = mid + 1;
518: else
519: top = mid - 1;
520: }
521: slot = bot; /* place it would have gone */
522: return -1;
523: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.