|
|
1.1 ! root 1: /* Jim Noble at Planning Research Corporation, June 1987. Fixes for */ ! 2: /* miscellaneous bugs found when reformatting state transititon code in */ ! 3: /* CKCPRO.W. */ ! 4: ! 5: char *wartv = "Wart Version 1A(005) Jan 1988"; ! 6: ! 7: /* W A R T */ ! 8: ! 9: /* ! 10: pre-process a lex-like file into a C program. ! 11: ! 12: Author:Jeff Damens, Columbia University Center for Computing Activites, 11/84. ! 13: Copyright (C) 1985, Trustees of Columbia University in the City of New York. ! 14: Permission is granted to any individual or institution to use, copy, or ! 15: redistribute this software so long as it is not sold for profit, provided this ! 16: copyright notice is retained. ! 17: ! 18: * input format is: ! 19: * lines to be copied | %state <state names...> ! 20: * %% ! 21: * <state> | <state,state,...> CHAR { actions } ! 22: * ... ! 23: * %% ! 24: */ ! 25: ! 26: #include "ckcdeb.h" /* Includes */ ! 27: #include <stdio.h> ! 28: #include <ctype.h> ! 29: ! 30: #define C_L 014 /* Formfeed */ ! 31: ! 32: #define SEP 1 /* Token types */ ! 33: #define LBRACK 2 ! 34: #define RBRACK 3 ! 35: #define WORD 4 ! 36: #define COMMA 5 ! 37: ! 38: /* Storage sizes */ ! 39: ! 40: #define MAXSTATES 50 /* max number of states */ ! 41: #define MAXWORD 50 /* max # of chars/word */ ! 42: #define SBYTES ((MAXSTATES+7)/8) /* # of bytes for state bitmask */ ! 43: ! 44: /* Name of wart function in generated program */ ! 45: ! 46: #ifndef FNAME ! 47: #define FNAME "wart" ! 48: #endif ! 49: ! 50: /* Structure for state information */ ! 51: ! 52: struct trans { CHAR states[SBYTES]; /* included states */ ! 53: int anyst; /* true if this good from any state */ ! 54: CHAR inchr; /* input character */ ! 55: int actno; /* associated action */ ! 56: struct trans *nxt; }; /* next transition */ ! 57: ! 58: typedef struct trans *Trans; ! 59: ! 60: char *malloc(); /* Returns pointer (not int) */ ! 61: ! 62: ! 63: /* Variables and tables */ ! 64: ! 65: int lines,nstates,nacts; ! 66: ! 67: char tokval[MAXWORD]; ! 68: ! 69: int tbl[MAXSTATES*128]; ! 70: ! 71: ! 72: ! 73: char *txt1 = "\n#define BEGIN state =\n\nint state = 0;\n\n"; ! 74: ! 75: char *fname = FNAME; /* function name goes here */ ! 76: ! 77: /* rest of program... */ ! 78: ! 79: char *txt2 = "()\n\ ! 80: {\n\ ! 81: int c,actno;\n\ ! 82: extern int tbl[];\n\ ! 83: while (1) {\n\ ! 84: c = input();\n\ ! 85: if ((actno = tbl[c + state*128]) != -1)\n\ ! 86: switch(actno) {\n"; ! 87: ! 88: /* this program's output goes here, followed by final text... */ ! 89: ! 90: char *txt3 = "\n }\n }\n\}\n\n"; ! 91: ! 92: ! 93: /* ! 94: * turn on the bit associated with the given state ! 95: * ! 96: */ ! 97: setstate(state,t) ! 98: int state; ! 99: Trans t; ! 100: { ! 101: int idx,msk; ! 102: idx = state/8; /* byte associated with state */ ! 103: msk = 0x80 >> (state % 8); /* bit mask for state */ ! 104: t->states[idx] |= msk; ! 105: } ! 106: ! 107: /* ! 108: * see if the state is involved in the transition ! 109: * ! 110: */ ! 111: ! 112: teststate(state,t) ! 113: int state; ! 114: Trans t; ! 115: { ! 116: int idx,msk; ! 117: idx = state/8; ! 118: msk = 0x80 >> (state % 8); ! 119: return(t->states[idx] & msk); ! 120: } ! 121: ! 122: ! 123: /* ! 124: * read input from here... ! 125: * ! 126: */ ! 127: ! 128: Trans ! 129: rdinput(infp,outfp) ! 130: FILE *infp,*outfp; ! 131: { ! 132: Trans x,rdrules(); ! 133: lines = 1; /* line counter */ ! 134: nstates = 0; /* no states */ ! 135: nacts = 0; /* no actions yet */ ! 136: fprintf(outfp,"\n%c* WARNING -- This C source program generated by ",'/'); ! 137: fprintf(outfp,"Wart preprocessor. */\n"); ! 138: fprintf(outfp,"%c* Do not edit this file; edit the Wart-format ",'/'); ! 139: fprintf(outfp,"source file instead, */\n"); ! 140: fprintf(outfp,"%c* and then run it through Wart to produce a new ",'/'); ! 141: fprintf(outfp,"C source file. */\n\n"); ! 142: fprintf(outfp,"%c* Wart Version Info: */\n",'/'); ! 143: fprintf(outfp,"char *wartv = \"%s\";\n\n",wartv); ! 144: ! 145: initial(infp,outfp); /* read state names, initial defs */ ! 146: prolog(outfp); /* write out our initial code */ ! 147: x = rdrules(infp,outfp); /* read rules */ ! 148: epilogue(outfp); /* write out epilogue code */ ! 149: return(x); ! 150: } ! 151: ! 152: ! 153: /* ! 154: * initial - read initial definitions and state names. Returns ! 155: * on EOF or %%. ! 156: * ! 157: */ ! 158: ! 159: initial(infp,outfp) ! 160: FILE *infp,*outfp; ! 161: { ! 162: int c; ! 163: char wordbuf[MAXWORD]; ! 164: while ((c = getc(infp)) != EOF) { ! 165: if (c == '%') { ! 166: rdword(infp,wordbuf); ! 167: if (strcmp(wordbuf,"states") == 0) ! 168: rdstates(infp,outfp); ! 169: else if (strcmp(wordbuf,"%") == 0) return; ! 170: else fprintf(outfp,"%%%s",wordbuf); ! 171: } ! 172: else putc(c,outfp); ! 173: if (c == '\n') lines++; ! 174: } ! 175: } ! 176: ! 177: /* ! 178: * boolean function to tell if the given character can be part of ! 179: * a word. ! 180: * ! 181: */ ! 182: isin(s,c) char *s; int c; { ! 183: for (; *s != '\0'; s++) ! 184: if (*s == c) return(1); ! 185: return(0); ! 186: } ! 187: isword(c) ! 188: int c; ! 189: { ! 190: static char special[] = ".%_-$@"; /* these are allowable */ ! 191: return(isalnum(c) || isin(special,c)); ! 192: } ! 193: ! 194: /* ! 195: * read the next word into the given buffer. ! 196: * ! 197: */ ! 198: rdword(fp,buf) ! 199: FILE *fp; ! 200: char *buf; ! 201: { ! 202: int len = 0,c; ! 203: while (isword(c = getc(fp)) && ++len < MAXWORD) *buf++ = c; ! 204: *buf++ = '\0'; /* tie off word */ ! 205: ungetc(c,fp); /* put break char back */ ! 206: } ! 207: ! 208: ! 209: /* ! 210: * read state names, up to a newline. ! 211: * ! 212: */ ! 213: ! 214: rdstates(fp,ofp) ! 215: FILE *fp,*ofp; ! 216: { ! 217: int c; ! 218: char wordbuf[MAXWORD]; ! 219: while ((c = getc(fp)) != EOF && c != '\n') ! 220: { ! 221: if (isspace(c) || c == C_L) continue; /* skip whitespace */ ! 222: ungetc(c,fp); /* put char back */ ! 223: rdword(fp,wordbuf); /* read the whole word */ ! 224: enter(wordbuf,++nstates); /* put into symbol tbl */ ! 225: fprintf(ofp,"#define %s %d\n",wordbuf,nstates); ! 226: } ! 227: lines++; ! 228: } ! 229: ! 230: /* ! 231: * allocate a new, empty transition node ! 232: * ! 233: */ ! 234: ! 235: Trans ! 236: newtrans() ! 237: { ! 238: Trans new; ! 239: int i; ! 240: new = (Trans) malloc(sizeof (struct trans)); ! 241: for (i=0; i<SBYTES; i++) new->states[i] = 0; ! 242: new->anyst = 0; ! 243: new->nxt = NULL; ! 244: return(new); ! 245: } ! 246: ! 247: ! 248: /* ! 249: * read all the rules. ! 250: * ! 251: */ ! 252: ! 253: Trans ! 254: rdrules(fp,out) ! 255: FILE *fp,*out; ! 256: { ! 257: Trans head,cur,prev; ! 258: int curtok,i; ! 259: head = cur = NULL; ! 260: while ((curtok = gettoken(fp)) != SEP) ! 261: ! 262: switch(curtok) { ! 263: case LBRACK: if (cur == NULL) cur = newtrans(); ! 264: else fatal("duplicate state list"); ! 265: statelist(fp,cur);/* set states */ ! 266: continue; /* prepare to read char */ ! 267: ! 268: case WORD: if (strlen(tokval) != 1) ! 269: fatal("multiple chars in state"); ! 270: if (cur == NULL) { ! 271: cur = newtrans(); ! 272: cur->anyst = 1; ! 273: } ! 274: cur->actno = ++nacts; ! 275: cur->inchr = tokval[0]; ! 276: if (head == NULL) head = cur; ! 277: else prev->nxt = cur; ! 278: prev = cur; ! 279: cur = NULL; ! 280: copyact(fp,out,nacts); ! 281: break; ! 282: default: fatal("bad input format"); ! 283: } ! 284: ! 285: return(head); ! 286: } ! 287: ! 288: ! 289: /* ! 290: * read a list of (comma-separated) states, set them in the ! 291: * given transition. ! 292: * ! 293: */ ! 294: statelist(fp,t) ! 295: FILE *fp; ! 296: Trans t; ! 297: { ! 298: int curtok,sval; ! 299: curtok = COMMA; ! 300: while (curtok != RBRACK) { ! 301: if (curtok != COMMA) fatal("missing comma"); ! 302: if ((curtok = gettoken(fp)) != WORD) fatal("missing state name"); ! 303: if ((sval = lkup(tokval)) == -1) { ! 304: fprintf(stderr,"state %s undefined\n",tokval); ! 305: fatal("undefined state"); ! 306: } ! 307: setstate(sval,t); ! 308: curtok = gettoken(fp); ! 309: } ! 310: } ! 311: ! 312: /* ! 313: * copy an action from the input to the output file ! 314: * ! 315: */ ! 316: copyact(inp,outp,actno) ! 317: FILE *inp,*outp; ! 318: int actno; ! 319: { ! 320: int c,bcnt; ! 321: fprintf(outp,"case %d:\n",actno); ! 322: while (c = getc(inp), (isspace(c) || c == C_L)) ! 323: if (c == '\n') lines++; ! 324: if (c == '{') { ! 325: bcnt = 1; ! 326: fputs(" {",outp); ! 327: while (bcnt > 0 && (c = getc(inp)) != EOF) { ! 328: if (c == '{') bcnt++; ! 329: else if (c == '}') bcnt--; ! 330: else if (c == '\n') lines++; ! 331: putc(c,outp); ! 332: } ! 333: if (bcnt > 0) fatal("action doesn't end"); ! 334: } ! 335: else { ! 336: while (c != '\n' && c != EOF) { ! 337: putc(c,outp); ! 338: c = getc(inp); ! 339: } ! 340: lines++; ! 341: } ! 342: fprintf(outp,"\n break;\n"); ! 343: } ! 344: ! 345: ! 346: /* ! 347: * find the action associated with a given character and state. ! 348: * returns -1 if one can't be found. ! 349: * ! 350: */ ! 351: faction(hd,state,chr) ! 352: Trans hd; ! 353: int state,chr; ! 354: { ! 355: while (hd != NULL) { ! 356: if (hd->anyst || teststate(state,hd)) ! 357: if (hd->inchr == '.' || hd->inchr == chr) return(hd->actno); ! 358: hd = hd->nxt; ! 359: } ! 360: return(-1); ! 361: } ! 362: ! 363: ! 364: /* ! 365: * empty the table... ! 366: * ! 367: */ ! 368: emptytbl() ! 369: { ! 370: int i; ! 371: for (i=0; i<nstates*128; i++) tbl[i] = -1; ! 372: } ! 373: ! 374: /* ! 375: * add the specified action to the output for the given state and chr. ! 376: * ! 377: */ ! 378: ! 379: addaction(act,state,chr) ! 380: int act,state,chr; ! 381: { ! 382: tbl[state*128 + chr] = act; ! 383: } ! 384: ! 385: writetbl(fp) ! 386: FILE *fp; ! 387: { ! 388: warray(fp,"tbl",tbl,128*(nstates+1)); ! 389: } ! 390: ! 391: ! 392: /* ! 393: * write an array to the output file, given its name and size. ! 394: * ! 395: */ ! 396: warray(fp,nam,cont,siz) ! 397: FILE *fp; ! 398: char *nam; ! 399: int cont[],siz; ! 400: { ! 401: int i; ! 402: fprintf(fp,"int %s[] = {\n",nam); ! 403: for (i = 0; i < siz; ) { ! 404: fprintf(fp,"%2d, ",cont[i]); ! 405: if ((++i % 16) == 0) putc('\n',fp); ! 406: } ! 407: fprintf(fp,"};\n"); ! 408: } ! 409: ! 410: main(argc,argv) ! 411: int argc; ! 412: char *argv[]; ! 413: { ! 414: Trans head; ! 415: int state,c; ! 416: FILE *infile,*outfile; ! 417: ! 418: if (argc > 1) { ! 419: if ((infile = fopen(argv[1],"r")) == NULL) { ! 420: fprintf(stderr,"Can't open %s\n",argv[1]); ! 421: fatal("unreadable input file"); } } ! 422: else infile = stdin; ! 423: ! 424: if (argc > 2) { ! 425: if ((outfile = fopen(argv[2],"w")) == NULL) { ! 426: fprintf(stderr,"Can't write to %s\n",argv[2]); ! 427: fatal("bad output file"); } } ! 428: else outfile = stdout; ! 429: ! 430: clrhash(); /* empty hash table */ ! 431: head = rdinput(infile,outfile); /* read input file */ ! 432: emptytbl(); /* empty our tables */ ! 433: for (state = 0; state <= nstates; state++) ! 434: for (c = 1; c < 128; c++) ! 435: addaction(faction(head,state,c),state,c); /* find actions, add to tbl */ ! 436: writetbl(outfile); ! 437: copyrest(infile,outfile); ! 438: printf("%d states, %d actions\n",nstates,nacts); ! 439: #ifdef undef ! 440: for (state = 1; state <= nstates; state ++) ! 441: for (c = 1; c < 128; c++) ! 442: if (tbl[state*128 + c] != -1) printf("state %d, chr %d, act %d\n", ! 443: state,c,tbl[state*128 + c]); ! 444: #endif ! 445: exit(GOOD_EXIT); ! 446: } ! 447: ! 448: ! 449: /* ! 450: * fatal error handler ! 451: * ! 452: */ ! 453: ! 454: fatal(msg) ! 455: char *msg; ! 456: { ! 457: fprintf(stderr,"error in line %d: %s\n",lines,msg); ! 458: exit(BAD_EXIT); ! 459: } ! 460: ! 461: prolog(outfp) ! 462: FILE *outfp; ! 463: { ! 464: int c; ! 465: while ((c = *txt1++) != '\0') putc(c,outfp); ! 466: while ((c = *fname++) != '\0') putc(c,outfp); ! 467: while ((c = *txt2++) != '\0') putc(c,outfp); ! 468: } ! 469: ! 470: epilogue(outfp) ! 471: FILE *outfp; ! 472: { ! 473: int c; ! 474: while ((c = *txt3++) != '\0') putc(c,outfp); ! 475: } ! 476: ! 477: copyrest(in,out) ! 478: FILE *in,*out; ! 479: { ! 480: int c; ! 481: while ((c = getc(in)) != EOF) putc(c,out); ! 482: } ! 483: ! 484: ! 485: /* ! 486: * gettoken - returns token type of next token, sets tokval ! 487: * to the string value of the token if appropriate. ! 488: * ! 489: */ ! 490: ! 491: gettoken(fp) ! 492: FILE *fp; ! 493: { ! 494: int c; ! 495: while (1) { /* loop if reading comments... */ ! 496: do { ! 497: c = getc(fp); ! 498: if (c == '\n') lines++; ! 499: } while ((isspace(c) || c == C_L)); /* skip whitespace */ ! 500: switch(c) { ! 501: case EOF: return(SEP); ! 502: case '%': if ((c = getc(fp)) == '%') return(SEP); ! 503: tokval[0] = '%'; ! 504: tokval[1] = c; ! 505: rdword(fp,tokval+2); ! 506: return(WORD); ! 507: case '<': return(LBRACK); ! 508: case '>': return(RBRACK); ! 509: case ',': return(COMMA); ! 510: case '/': if ((c = getc(fp)) == '*') { ! 511: rdcmnt(fp); /* skip over the comment */ ! 512: continue; } /* and keep looping */ ! 513: else { ! 514: ungetc(c,fp); /* put this back into input */ ! 515: c = '/'; } /* put character back, fall thru */ ! 516: ! 517: default: if (isword(c)) { ! 518: ungetc(c,fp); ! 519: rdword(fp,tokval); ! 520: return(WORD); ! 521: } ! 522: else fatal("Invalid character in input"); ! 523: } ! 524: } ! 525: } ! 526: ! 527: /* ! 528: * skip over a comment ! 529: * ! 530: */ ! 531: ! 532: rdcmnt(fp) ! 533: FILE *fp; ! 534: { ! 535: int c,star,prcnt; ! 536: prcnt = star = 0; /* no star seen yet */ ! 537: while (!((c = getc(fp)) == '/' && star)) { ! 538: if (c == EOF || (prcnt && c == '%')) fatal("Unterminated comment"); ! 539: prcnt = (c == '%'); ! 540: star = (c == '*'); ! 541: if (c == '\n') lines++; } ! 542: } ! 543: ! 544: ! 545: ! 546: /* ! 547: * symbol table management for wart ! 548: * ! 549: * entry points: ! 550: * clrhash - empty hash table. ! 551: * enter - enter a name into the symbol table ! 552: * lkup - find a name's value in the symbol table. ! 553: * ! 554: */ ! 555: ! 556: #define HASHSIZE 101 /* # of entries in hash table */ ! 557: ! 558: struct sym { char *name; /* symbol name */ ! 559: int val; /* value */ ! 560: struct sym *hnxt; } /* next on collision chain */ ! 561: *htab[HASHSIZE]; /* the hash table */ ! 562: ! 563: ! 564: /* ! 565: * empty the hash table before using it... ! 566: * ! 567: */ ! 568: clrhash() ! 569: { ! 570: int i; ! 571: for (i=0; i<HASHSIZE; i++) htab[i] = NULL; ! 572: } ! 573: ! 574: /* ! 575: * compute the value of the hash for a symbol ! 576: * ! 577: */ ! 578: hash(name) ! 579: char *name; ! 580: { ! 581: int sum; ! 582: for (sum = 0; *name != '\0'; name++) sum += (sum + *name); ! 583: sum %= HASHSIZE; /* take sum mod hashsize */ ! 584: if (sum < 0) sum += HASHSIZE; /* disallow negative hash value */ ! 585: return(sum); ! 586: } ! 587: ! 588: /* ! 589: * make a private copy of a string... ! 590: * ! 591: */ ! 592: char * ! 593: copy(s) ! 594: char *s; ! 595: { ! 596: char *new; ! 597: new = (char *) malloc(strlen(s) + 1); ! 598: strcpy(new,s); ! 599: return(new); ! 600: } ! 601: ! 602: ! 603: /* ! 604: * enter state name into the hash table ! 605: * ! 606: */ ! 607: enter(name,svalue) ! 608: char *name; ! 609: int svalue; ! 610: { ! 611: int h; ! 612: struct sym *cur; ! 613: if (lkup(name) != -1) { ! 614: fprintf(stderr,"state %s appears twice...\n"); ! 615: exit(BAD_EXIT); } ! 616: h = hash(name); ! 617: cur = (struct sym *)malloc(sizeof (struct sym)); ! 618: cur->name = copy(name); ! 619: cur->val = svalue; ! 620: cur->hnxt = htab[h]; ! 621: htab[h] = cur; ! 622: } ! 623: ! 624: /* ! 625: * find name in the symbol table, return its value. Returns -1 ! 626: * if not found. ! 627: * ! 628: */ ! 629: lkup(name) ! 630: char *name; ! 631: { ! 632: struct sym *cur; ! 633: for (cur = htab[hash(name)]; cur != NULL; cur = cur->hnxt) ! 634: if (strcmp(cur->name,name) == 0) return(cur->val); ! 635: return(-1); ! 636: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.