|
|
1.1 ! root 1: static char *sccsid = "@(#)checknr.c 4.2 (Berkeley) 10/12/80"; ! 2: /* ! 3: * checknr: check an nroff/troff input file for matching macro calls. ! 4: * we also attempt to match size and font changes, but only the embedded ! 5: * kind. These must end in \s0 and \fP resp. Maybe more sophistication ! 6: * later but for now think of these restrictions as contributions to ! 7: * structured typesetting. ! 8: */ ! 9: #include <stdio.h> ! 10: #include <ctype.h> ! 11: ! 12: #define MAXSTK 100 /* Stack size */ ! 13: #define MAXBR 100 /* Max number of bracket pairs known */ ! 14: #define MAXCMDS 500 /* Max number of commands known */ ! 15: ! 16: /* ! 17: * The stack on which we remember what we've seen so far. ! 18: */ ! 19: struct stkstr { ! 20: int opno; /* number of opening bracket */ ! 21: int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ ! 22: int parm; /* parm to size, font, etc */ ! 23: int lno; /* line number the thing came in in */ ! 24: } stk[MAXSTK]; ! 25: int stktop; ! 26: ! 27: /* ! 28: * The kinds of opening and closing brackets. ! 29: */ ! 30: struct brstr { ! 31: char *opbr; ! 32: char *clbr; ! 33: } br[MAXBR] = { ! 34: /* A few bare bones troff commands */ ! 35: #define SZ 0 ! 36: "sz", "sz", /* also \s */ ! 37: #define FT 1 ! 38: "ft", "ft", /* also \f */ ! 39: /* the -ms package */ ! 40: "AB", "AE", ! 41: "RS", "RE", ! 42: "LG", "NL", ! 43: "SM", "NL", ! 44: "FS", "FE", ! 45: "DS", "DE", ! 46: "CD", "DE", ! 47: "LD", "DE", ! 48: "ID", "DE", ! 49: "KS", "KE", ! 50: "KF", "KE", ! 51: "QS", "QE", ! 52: /* Things needed by preprocessors */ ! 53: "TS", "TE", ! 54: "EQ", "EN", ! 55: /* The -me package */ ! 56: "(l", ")l", ! 57: "(q", ")q", ! 58: "(b", ")b", ! 59: "(z", ")z", ! 60: "(c", ")c", ! 61: "(d", ")d", ! 62: "(f", ")f", ! 63: "(x", ")x", ! 64: 0, 0 ! 65: }; ! 66: ! 67: /* ! 68: * All commands known to nroff, plus ms and me. ! 69: * Used so we can complain about unrecognized commands. ! 70: */ ! 71: char *knowncmds[MAXCMDS] = { ! 72: "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", ! 73: "(q", "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ! 74: ")t", ")x", ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", ! 75: "@)", "@C", "@D", "@F", "@I", "@M", "@c", "@e", "@f", "@h", ! 76: "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", "AB", "AE", ! 77: "AE", "AI", "AI", "AT", "AU", "AU", "AX", "B", "B1", "B2", ! 78: "BD", "BG", "BT", "BX", "C1", "C2", "CD", "CM", "CT", "D", ! 79: "DA", "DE", "DF", "DS", "EG", "EM", "EN", "EQ", "EQ", "FA", ! 80: "FE", "FJ", "FK", "FL", "FN", "FO", "FQ", "FS", "FV", "FX", ! 81: "HO", "I", "ID", "IE", "IH", "IM", "IP", "IZ", "KD", "KE", ! 82: "KF", "KQ", "KS", "LB", "LD", "LG", "LP", "MC", "ME", "MF", ! 83: "MH", "MR", "ND", "NH", "NL", "NP", "OK", "PP", "PT", "PY", ! 84: "QE", "QP", "QS", "R", "RA", "RC", "RE", "RP", "RQ", "RS", ! 85: "RT", "S0", "S2", "S3", "SG", "SH", "SM", "SY", "TA", "TC", ! 86: "TD", "TE", "TH", "TL", "TL", "TM", "TQ", "TR", "TS", "TS", ! 87: "TX", "UL", "US", "UX", "WH", "XD", "XF", "XK", "XP", "[-", ! 88: "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]-", ! 89: "]<", "]>", "][", "ab", "ac", "ad", "af", "am", "ar", "as", ! 90: "b", "ba", "bc", "bd", "bi", "bl", "bp", "bp", "br", "bx", ! 91: "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", ! 92: "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", ! 93: "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", ! 94: "fo", "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", ! 95: "hx", "hy", "i", "ie", "if", "ig", "in", "ip", "it", "ix", ! 96: "lc", "lg", "li", "ll", "ll", "ln", "lo", "lp", "ls", "lt", ! 97: "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", "n2", "na", ! 98: "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", ! 99: "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", ! 100: "po", "pp", "ps", "q", "r", "rb", "rd", "re", "re", "rm", ! 101: "rn", "ro", "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", ! 102: "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", "ti", "tl", ! 103: "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "yr", ! 104: 0 ! 105: }; ! 106: ! 107: int lineno; /* current line number in input file */ ! 108: char line[256]; /* the current line */ ! 109: char *cfilename; /* name of current file */ ! 110: int nfiles; /* number of files to process */ ! 111: int fflag; /* -f: ignore \f */ ! 112: int sflag; /* -s: ignore \s */ ! 113: int ncmds; /* size of knowncmds */ ! 114: int slot; /* slot in knowncmds found by binsrch */ ! 115: ! 116: char *malloc(); ! 117: ! 118: main(argc, argv) ! 119: int argc; ! 120: char **argv; ! 121: { ! 122: FILE *f; ! 123: int i; ! 124: char *cp; ! 125: char b1[4]; ! 126: ! 127: if (argc <= 1) ! 128: usage(); ! 129: /* Figure out how many known commands there are */ ! 130: while (knowncmds[ncmds]) ! 131: ncmds++; ! 132: while (argc > 1 && argv[1][0] == '-') { ! 133: switch(argv[1][1]) { ! 134: ! 135: /* -a: add pairs of macros */ ! 136: case 'a': ! 137: i = strlen(argv[1]) - 2; ! 138: if (i % 6 != 0) ! 139: usage(); ! 140: /* look for empty macro slots */ ! 141: for (i=0; br[i].opbr; i++) ! 142: ; ! 143: for (cp=argv[1]+3; cp[-1]; cp += 6) { ! 144: br[i].opbr = malloc(3); ! 145: strncpy(br[i].opbr, cp, 2); ! 146: br[i].clbr = malloc(3); ! 147: strncpy(br[i].clbr, cp+3, 2); ! 148: addmac(br[i].opbr); /* knows pairs are also known cmds */ ! 149: addmac(br[i].clbr); ! 150: i++; ! 151: } ! 152: break; ! 153: ! 154: /* -c: add known commands */ ! 155: case 'c': ! 156: i = strlen(argv[1]) - 2; ! 157: if (i % 3 != 0) ! 158: usage(); ! 159: for (cp=argv[1]+3; cp[-1]; cp += 3) { ! 160: if (cp[2] && cp[2] != '.') ! 161: usage(); ! 162: strncpy(b1, cp, 2); ! 163: addmac(b1); ! 164: } ! 165: break; ! 166: ! 167: /* -f: ignore font changes */ ! 168: case 'f': ! 169: fflag = 1; ! 170: break; ! 171: ! 172: /* -s: ignore size changes */ ! 173: case 's': ! 174: sflag = 1; ! 175: break; ! 176: default: ! 177: usage(); ! 178: } ! 179: argc--; argv++; ! 180: } ! 181: ! 182: nfiles = argc - 1; ! 183: ! 184: if (nfiles > 0) { ! 185: for (i=1; i<argc; i++) { ! 186: cfilename = argv[i]; ! 187: f = fopen(cfilename, "r"); ! 188: if (f == NULL) ! 189: perror(cfilename); ! 190: else ! 191: process(f); ! 192: } ! 193: } else { ! 194: cfilename = "stdin"; ! 195: process(stdin); ! 196: } ! 197: exit(0); ! 198: } ! 199: ! 200: usage() ! 201: { ! 202: printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n"); ! 203: exit(1); ! 204: } ! 205: ! 206: process(f) ! 207: FILE *f; ! 208: { ! 209: register int i, n; ! 210: char mac[5]; /* The current macro or nroff command */ ! 211: int pl; ! 212: ! 213: stktop = -1; ! 214: for (lineno = 1; fgets(line, sizeof line, f); lineno++) { ! 215: if (line[0] == '.') { ! 216: /* ! 217: * find and isolate the macro/command name. ! 218: */ ! 219: strncpy(mac, line+1, 4); ! 220: if (isspace(mac[0])) { ! 221: pe(lineno); ! 222: printf("Empty command\n"); ! 223: } else if (isspace(mac[1])) { ! 224: mac[1] = 0; ! 225: } else if (isspace(mac[2])) { ! 226: mac[2] = 0; ! 227: } else if (mac[2] != '\\' || mac[3] != '\"') { ! 228: pe(lineno); ! 229: printf("Command too long\n"); ! 230: } ! 231: ! 232: /* ! 233: * Is it a known command? ! 234: */ ! 235: checkknown(mac); ! 236: ! 237: /* ! 238: * Should we add it? ! 239: */ ! 240: if (eq(mac, "de")) ! 241: addcmd(line); ! 242: ! 243: chkcmd(line, mac); ! 244: } ! 245: ! 246: /* ! 247: * At this point we process the line looking ! 248: * for \s and \f. ! 249: */ ! 250: for (i=0; line[i]; i++) ! 251: if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { ! 252: if (!sflag && line[++i]=='s') { ! 253: pl = line[++i]; ! 254: if (isdigit(pl)) { ! 255: n = pl - '0'; ! 256: pl = ' '; ! 257: } else ! 258: n = 0; ! 259: while (isdigit(line[++i])) ! 260: n = 10 * n + line[i] - '0'; ! 261: i--; ! 262: if (n == 0) { ! 263: if (stk[stktop].opno == SZ) { ! 264: stktop--; ! 265: } else { ! 266: pe(lineno); ! 267: printf("unmatched \\s0\n"); ! 268: } ! 269: } else { ! 270: stk[++stktop].opno = SZ; ! 271: stk[stktop].pl = pl; ! 272: stk[stktop].parm = n; ! 273: stk[stktop].lno = lineno; ! 274: } ! 275: } else if (!fflag && line[i]=='f') { ! 276: n = line[++i]; ! 277: if (n == 'P') { ! 278: if (stk[stktop].opno == FT) { ! 279: stktop--; ! 280: } else { ! 281: pe(lineno); ! 282: printf("unmatched \\fP\n"); ! 283: } ! 284: } else { ! 285: stk[++stktop].opno = FT; ! 286: stk[stktop].pl = 1; ! 287: stk[stktop].parm = n; ! 288: stk[stktop].lno = lineno; ! 289: } ! 290: } ! 291: } ! 292: } ! 293: /* ! 294: * We've hit the end and look at all this stuff that hasn't been ! 295: * matched yet! Complain, complain. ! 296: */ ! 297: for (i=stktop; i>=0; i--) { ! 298: complain(i); ! 299: } ! 300: } ! 301: ! 302: complain(i) ! 303: { ! 304: pe(stk[i].lno); ! 305: printf("Unmatched "); ! 306: prop(i); ! 307: printf("\n"); ! 308: } ! 309: ! 310: prop(i) ! 311: { ! 312: if (stk[i].pl == 0) ! 313: printf(".%s", br[stk[i].opno].opbr); ! 314: else switch(stk[i].opno) { ! 315: case SZ: ! 316: printf("\\s%c%d", stk[i].pl, stk[i].parm); ! 317: break; ! 318: case FT: ! 319: printf("\\f%c", stk[i].parm); ! 320: break; ! 321: default: ! 322: printf("Bug: stk[%d].opno = %d = .%s, .%s", ! 323: i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr); ! 324: } ! 325: } ! 326: ! 327: chkcmd(line, mac) ! 328: char *line; ! 329: char *mac; ! 330: { ! 331: register int i, n; ! 332: ! 333: /* ! 334: * Check to see if it matches top of stack. ! 335: */ ! 336: if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) ! 337: stktop--; /* OK. Pop & forget */ ! 338: else { ! 339: /* No. Maybe it's an opener */ ! 340: for (i=0; br[i].opbr; i++) { ! 341: if (eq(mac, br[i].opbr)) { ! 342: /* Found. Push it. */ ! 343: stktop++; ! 344: stk[stktop].opno = i; ! 345: stk[stktop].pl = 0; ! 346: stk[stktop].parm = 0; ! 347: stk[stktop].lno = lineno; ! 348: break; ! 349: } ! 350: /* ! 351: * Maybe it's an unmatched closer. ! 352: * NOTE: this depends on the fact ! 353: * that none of the closers can be ! 354: * openers too. ! 355: */ ! 356: if (eq(mac, br[i].clbr)) { ! 357: nomatch(mac); ! 358: break; ! 359: } ! 360: } ! 361: } ! 362: } ! 363: ! 364: nomatch(mac) ! 365: char *mac; ! 366: { ! 367: register int i, j; ! 368: ! 369: /* ! 370: * Look for a match further down on stack ! 371: * If we find one, it suggests that the stuff in ! 372: * between is supposed to match itself. ! 373: */ ! 374: for (j=stktop; j>=0; j--) ! 375: if (eq(mac,br[stk[j].opno].clbr)) { ! 376: /* Found. Make a good diagnostic. */ ! 377: if (j == stktop-2) { ! 378: /* ! 379: * Check for special case \fx..\fR and don't ! 380: * complain. ! 381: */ ! 382: if (stk[j+1].opno==FT && stk[j+1].parm!='R' ! 383: && stk[j+2].opno==FT && stk[j+2].parm=='R') { ! 384: stktop = j -1; ! 385: return; ! 386: } ! 387: /* ! 388: * We have two unmatched frobs. Chances are ! 389: * they were intended to match, so we mention ! 390: * them together. ! 391: */ ! 392: pe(stk[j+1].lno); ! 393: prop(j+1); ! 394: printf(" does not match %d: ", stk[j+2].lno); ! 395: prop(j+2); ! 396: printf("\n"); ! 397: } else for (i=j+1; i <= stktop; i++) { ! 398: complain(i); ! 399: } ! 400: stktop = j-1; ! 401: return; ! 402: } ! 403: /* Didn't find one. Throw this away. */ ! 404: pe(lineno); ! 405: printf("Unmatched .%s\n", mac); ! 406: } ! 407: ! 408: /* eq: are two strings equal? */ ! 409: eq(s1, s2) ! 410: char *s1, *s2; ! 411: { ! 412: return (strcmp(s1, s2) == 0); ! 413: } ! 414: ! 415: /* print the first part of an error message, given the line number */ ! 416: pe(lineno) ! 417: int lineno; ! 418: { ! 419: if (nfiles > 1) ! 420: printf("%s: ", cfilename); ! 421: printf("%d: ", lineno); ! 422: } ! 423: ! 424: checkknown(mac) ! 425: char *mac; ! 426: { ! 427: ! 428: if (eq(mac, ".")) ! 429: return; ! 430: if (binsrch(mac) >= 0) ! 431: return; ! 432: ! 433: pe(lineno); ! 434: printf("Unknown command: .%s\n", mac); ! 435: } ! 436: ! 437: /* ! 438: * We have a .de xx line in "line". Add xx to the list of known commands. ! 439: */ ! 440: addcmd(line) ! 441: char *line; ! 442: { ! 443: char *mac; ! 444: ! 445: /* grab the macro being defined */ ! 446: mac = line+4; ! 447: while (isspace(*mac)) ! 448: mac++; ! 449: if (*mac == 0) { ! 450: pe(lineno); ! 451: printf("illegal define: %s\n", line); ! 452: return; ! 453: } ! 454: mac[2] = 0; ! 455: if (isspace(mac[1]) || mac[1] == '\\') ! 456: mac[1] = 0; ! 457: if (ncmds >= MAXCMDS) { ! 458: printf("Only %d known commands allowed\n", MAXCMDS); ! 459: exit(1); ! 460: } ! 461: addmac(mac); ! 462: } ! 463: ! 464: /* ! 465: * Add mac to the list. We should really have some kind of tree ! 466: * structure here but this is a quick-and-dirty job and I just don't ! 467: * have time to mess with it. (I wonder if this will come back to haunt ! 468: * me someday?) Anyway, I claim that .de is fairly rare in user ! 469: * nroff programs, and the register loop below is pretty fast. ! 470: */ ! 471: addmac(mac) ! 472: char *mac; ! 473: { ! 474: register char **src, **dest, **loc; ! 475: ! 476: binsrch(mac); /* it's OK to redefine something */ ! 477: /* binsrch sets slot as a side effect */ ! 478: #ifdef DEBUG ! 479: printf("binsrch(%s) -> %d\n", mac, slot); ! 480: #endif ! 481: loc = &knowncmds[slot]; ! 482: src = &knowncmds[ncmds-1]; ! 483: dest = src+1; ! 484: while (dest > loc) ! 485: *dest-- = *src--; ! 486: *loc = malloc(3); ! 487: strcpy(*loc, mac); ! 488: ncmds++; ! 489: #ifdef DEBUG ! 490: printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds); ! 491: #endif ! 492: } ! 493: ! 494: /* ! 495: * Do a binary search in knowncmds for mac. ! 496: * If found, return the index. If not, return -1. ! 497: */ ! 498: binsrch(mac) ! 499: char *mac; ! 500: { ! 501: register char *p; /* pointer to current cmd in list */ ! 502: register int d; /* difference if any */ ! 503: register int mid; /* mid point in binary search */ ! 504: register int top, bot; /* boundaries of bin search, inclusive */ ! 505: ! 506: top = ncmds-1; ! 507: bot = 0; ! 508: while (top >= bot) { ! 509: mid = (top+bot)/2; ! 510: p = knowncmds[mid]; ! 511: d = p[0] - mac[0]; ! 512: if (d == 0) ! 513: d = p[1] - mac[1]; ! 514: if (d == 0) ! 515: return mid; ! 516: if (d < 0) ! 517: bot = mid + 1; ! 518: else ! 519: top = mid - 1; ! 520: } ! 521: slot = bot; /* place it would have gone */ ! 522: return -1; ! 523: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.