|
|
1.1 ! root 1: char *wartv = "Wart Version 1A(003) 27 May 85"; ! 2: ! 3: /* W A R T */ ! 4: ! 5: /* ! 6: pre-process a lex-like file into a C program. ! 7: ! 8: Author:Jeff Damens, Columbia University Center for Computing Activites, 11/84. ! 9: Copyright (C) 1985, Trustees of Columbia University in the City of New York. ! 10: Permission is granted to any individual or institution to use, copy, or ! 11: redistribute this software so long as it is not sold for profit, provided this ! 12: copyright notice is retained. ! 13: ! 14: * input format is: ! 15: * lines to be copied | %state <state names...> ! 16: * %% ! 17: * <state> | <state,state,...> CHAR { actions } ! 18: * ... ! 19: * %% ! 20: */ ! 21: ! 22: #include "ckcdeb.h" /* Includes */ ! 23: #include <stdio.h> ! 24: #include <ctype.h> ! 25: ! 26: #define C_L 014 /* Formfeed */ ! 27: ! 28: #define SEP 1 /* Token types */ ! 29: #define LBRACK 2 ! 30: #define RBRACK 3 ! 31: #define WORD 4 ! 32: #define COMMA 5 ! 33: ! 34: /* Storage sizes */ ! 35: ! 36: #define MAXSTATES 50 /* max number of states */ ! 37: #define MAXWORD 50 /* max # of chars/word */ ! 38: #define SBYTES ((MAXSTATES+7)/8) /* # of bytes for state bitmask */ ! 39: ! 40: /* Name of wart function in generated program */ ! 41: ! 42: #ifndef FNAME ! 43: #define FNAME "wart" ! 44: #endif ! 45: ! 46: /* Structure for state information */ ! 47: ! 48: struct trans { CHAR states[SBYTES]; /* included states */ ! 49: int anyst; /* true if this good from any state */ ! 50: CHAR inchr; /* input character */ ! 51: int actno; /* associated action */ ! 52: struct trans *nxt; }; /* next transition */ ! 53: ! 54: typedef struct trans *Trans; ! 55: ! 56: char *malloc(); /* Returns pointer (not int) */ ! 57: ! 58: ! 59: /* Variables and tables */ ! 60: ! 61: int lines,nstates,nacts; ! 62: ! 63: char tokval[MAXWORD]; ! 64: ! 65: int tbl[MAXSTATES*128]; ! 66: ! 67: ! 68: ! 69: char *txt1 = "\n#define BEGIN state =\n\nint state = 0;\n\n"; ! 70: ! 71: char *fname = FNAME; /* function name goes here */ ! 72: ! 73: /* rest of program... */ ! 74: ! 75: char *txt2 = "()\n\ ! 76: {\n\ ! 77: int c,actno;\n\ ! 78: extern int tbl[];\n\ ! 79: while (1) {\n\ ! 80: c = input();\n\ ! 81: if ((actno = tbl[c + state*128]) != -1)\n\ ! 82: switch(actno) {\n"; ! 83: ! 84: /* this program's output goes here, followed by final text... */ ! 85: ! 86: char *txt3 = "\n }\n }\n}\n\n"; ! 87: ! 88: ! 89: /* ! 90: * turn on the bit associated with the given state ! 91: * ! 92: */ ! 93: setstate(state,t) ! 94: int state; ! 95: Trans t; ! 96: { ! 97: int idx,msk; ! 98: idx = state/8; /* byte associated with state */ ! 99: msk = 0x80 >> (state % 8); /* bit mask for state */ ! 100: t->states[idx] |= msk; ! 101: } ! 102: ! 103: /* ! 104: * see if the state is involved in the transition ! 105: * ! 106: */ ! 107: ! 108: teststate(state,t) ! 109: int state; ! 110: Trans t; ! 111: { ! 112: int idx,msk; ! 113: idx = state/8; ! 114: msk = 0x80 >> (state % 8); ! 115: return(t->states[idx] & msk); ! 116: } ! 117: ! 118: ! 119: /* ! 120: * read input from here... ! 121: * ! 122: */ ! 123: ! 124: Trans ! 125: rdinput(infp,outfp) ! 126: FILE *infp,*outfp; ! 127: { ! 128: Trans x,rdrules(); ! 129: lines = 1; /* line counter */ ! 130: nstates = 0; /* no states */ ! 131: nacts = 0; /* no actions yet */ ! 132: fprintf(outfp,"\n%c* WARNING -- This C source program generated by ",'/'); ! 133: fprintf(outfp,"Wart preprocessor. */\n"); ! 134: fprintf(outfp,"%c* Do not edit this file; edit the Wart-format ",'/'); ! 135: fprintf(outfp,"source file instead, */\n"); ! 136: fprintf(outfp,"%c* and then run it through Wart to produce a new ",'/'); ! 137: fprintf(outfp,"C source file. */\n\n"); ! 138: fprintf(outfp,"%c* Wart Version Info: */\n",'/'); ! 139: fprintf(outfp,"char *wartv = \"%s\";\n\n",wartv); ! 140: ! 141: initial(infp,outfp); /* read state names, initial defs */ ! 142: prolog(outfp); /* write out our initial code */ ! 143: x = rdrules(infp,outfp); /* read rules */ ! 144: epilogue(outfp); /* write out epilogue code */ ! 145: return(x); ! 146: } ! 147: ! 148: ! 149: /* ! 150: * initial - read initial definitions and state names. Returns ! 151: * on EOF or %%. ! 152: * ! 153: */ ! 154: ! 155: initial(infp,outfp) ! 156: FILE *infp,*outfp; ! 157: { ! 158: int c; ! 159: char wordbuf[MAXWORD]; ! 160: while ((c = getc(infp)) != EOF) { ! 161: if (c == '%') { ! 162: rdword(infp,wordbuf); ! 163: if (strcmp(wordbuf,"states") == 0) ! 164: rdstates(infp,outfp); ! 165: else if (strcmp(wordbuf,"%") == 0) return; ! 166: else fprintf(outfp,"%%%s",wordbuf); ! 167: } ! 168: else putc(c,outfp); ! 169: if (c == '\n') lines++; ! 170: } ! 171: } ! 172: ! 173: /* ! 174: * boolean function to tell if the given character can be part of ! 175: * a word. ! 176: * ! 177: */ ! 178: isin(s,c) char *s; int c; { ! 179: for (; *s != '\0'; s++) ! 180: if (*s == c) return(1); ! 181: return(0); ! 182: } ! 183: isword(c) ! 184: int c; ! 185: { ! 186: static char special[] = ".%_-$@"; /* these are allowable */ ! 187: return(isalnum(c) || isin(special,c)); ! 188: } ! 189: ! 190: /* ! 191: * read the next word into the given buffer. ! 192: * ! 193: */ ! 194: rdword(fp,buf) ! 195: FILE *fp; ! 196: char *buf; ! 197: { ! 198: int len = 0,c; ! 199: while (isword(c = getc(fp)) && ++len < MAXWORD) *buf++ = c; ! 200: *buf++ = '\0'; /* tie off word */ ! 201: ungetc(c,fp); /* put break char back */ ! 202: } ! 203: ! 204: ! 205: /* ! 206: * read state names, up to a newline. ! 207: * ! 208: */ ! 209: ! 210: rdstates(fp,ofp) ! 211: FILE *fp,*ofp; ! 212: { ! 213: int c; ! 214: char wordbuf[MAXWORD]; ! 215: while ((c = getc(fp)) != EOF && c != '\n') ! 216: { ! 217: if (isspace(c) || c == C_L) continue; /* skip whitespace */ ! 218: ungetc(c,fp); /* put char back */ ! 219: rdword(fp,wordbuf); /* read the whole word */ ! 220: enter(wordbuf,++nstates); /* put into symbol tbl */ ! 221: fprintf(ofp,"#define %s %d\n",wordbuf,nstates); ! 222: } ! 223: lines++; ! 224: } ! 225: ! 226: /* ! 227: * allocate a new, empty transition node ! 228: * ! 229: */ ! 230: ! 231: Trans ! 232: newtrans() ! 233: { ! 234: Trans new; ! 235: int i; ! 236: new = (Trans) malloc(sizeof (struct trans)); ! 237: for (i=0; i<SBYTES; i++) new->states[i] = 0; ! 238: new->anyst = 0; ! 239: new->nxt = NULL; ! 240: return(new); ! 241: } ! 242: ! 243: ! 244: /* ! 245: * read all the rules. ! 246: * ! 247: */ ! 248: ! 249: Trans ! 250: rdrules(fp,out) ! 251: FILE *fp,*out; ! 252: { ! 253: Trans head,cur,prev; ! 254: int curtok,i; ! 255: head = cur = NULL; ! 256: while ((curtok = gettoken(fp)) != SEP) ! 257: ! 258: switch(curtok) { ! 259: case LBRACK: if (cur == NULL) cur = newtrans(); ! 260: else fatal("duplicate state list"); ! 261: statelist(fp,cur);/* set states */ ! 262: continue; /* prepare to read char */ ! 263: ! 264: case WORD: if (strlen(tokval) != 1) ! 265: fatal("multiple chars in state"); ! 266: if (cur == NULL) { ! 267: cur = newtrans(); ! 268: cur->anyst = 1; ! 269: } ! 270: cur->actno = ++nacts; ! 271: cur->inchr = tokval[0]; ! 272: if (head == NULL) head = cur; ! 273: else prev->nxt = cur; ! 274: prev = cur; ! 275: cur = NULL; ! 276: copyact(fp,out,nacts); ! 277: break; ! 278: default: fatal("bad input format"); ! 279: } ! 280: ! 281: return(head); ! 282: } ! 283: ! 284: ! 285: /* ! 286: * read a list of (comma-separated) states, set them in the ! 287: * given transition. ! 288: * ! 289: */ ! 290: statelist(fp,t) ! 291: FILE *fp; ! 292: Trans t; ! 293: { ! 294: int curtok,sval; ! 295: curtok = COMMA; ! 296: while (curtok != RBRACK) { ! 297: if (curtok != COMMA) fatal("missing comma"); ! 298: if ((curtok = gettoken(fp)) != WORD) fatal("missing state name"); ! 299: if ((sval = lkup(tokval)) == -1) { ! 300: fprintf(stderr,"state %s undefined\n",tokval); ! 301: fatal("undefined state"); ! 302: } ! 303: setstate(sval,t); ! 304: curtok = gettoken(fp); ! 305: } ! 306: } ! 307: ! 308: /* ! 309: * copy an action from the input to the output file ! 310: * ! 311: */ ! 312: copyact(inp,outp,actno) ! 313: FILE *inp,*outp; ! 314: int actno; ! 315: { ! 316: int c,bcnt; ! 317: fprintf(outp,"case %d:\n",actno); ! 318: while (((c = getc(inp)) != '\n') && (isspace(c) || c == C_L)); ! 319: if (c == '{') { ! 320: bcnt = 1; ! 321: putc(c,outp); ! 322: while (bcnt > 0 && (c = getc(inp)) != EOF) { ! 323: if (c == '{') bcnt++; ! 324: else if (c == '}') bcnt--; ! 325: else if (c == '\n') lines++; ! 326: putc(c,outp); ! 327: } ! 328: if (bcnt > 0) fatal("action doesn't end"); ! 329: } ! 330: else { ! 331: while (c != '\n' && c != EOF) { ! 332: putc(c,outp); ! 333: c = getc(inp); ! 334: } ! 335: lines++; ! 336: } ! 337: fprintf(outp,"\nbreak;\n"); ! 338: } ! 339: ! 340: ! 341: /* ! 342: * find the action associated with a given character and state. ! 343: * returns -1 if one can't be found. ! 344: * ! 345: */ ! 346: faction(hd,state,chr) ! 347: Trans hd; ! 348: int state,chr; ! 349: { ! 350: while (hd != NULL) { ! 351: if (hd->anyst || teststate(state,hd)) ! 352: if (hd->inchr == '.' || hd->inchr == chr) return(hd->actno); ! 353: hd = hd->nxt; ! 354: } ! 355: return(-1); ! 356: } ! 357: ! 358: ! 359: /* ! 360: * empty the table... ! 361: * ! 362: */ ! 363: emptytbl() ! 364: { ! 365: int i; ! 366: for (i=0; i<nstates*128; i++) tbl[i] = -1; ! 367: } ! 368: ! 369: /* ! 370: * add the specified action to the output for the given state and chr. ! 371: * ! 372: */ ! 373: ! 374: addaction(act,state,chr) ! 375: int act,state,chr; ! 376: { ! 377: tbl[state*128 + chr] = act; ! 378: } ! 379: ! 380: writetbl(fp) ! 381: FILE *fp; ! 382: { ! 383: warray(fp,"tbl",tbl,128*(nstates+1)); ! 384: } ! 385: ! 386: ! 387: /* ! 388: * write an array to the output file, given its name and size. ! 389: * ! 390: */ ! 391: warray(fp,nam,cont,siz) ! 392: FILE *fp; ! 393: char *nam; ! 394: int cont[],siz; ! 395: { ! 396: int i; ! 397: fprintf(fp,"int %s[] = {\n",nam); ! 398: for (i = 0; i < siz; i++) { ! 399: fprintf(fp,"%d, ",cont[i]); ! 400: if ((i % 20) == 0) putc('\n',fp); ! 401: } ! 402: fprintf(fp,"};\n"); ! 403: } ! 404: ! 405: main(argc,argv) ! 406: int argc; ! 407: char *argv[]; ! 408: { ! 409: Trans head; ! 410: int state,c; ! 411: FILE *infile,*outfile; ! 412: ! 413: if (argc > 1) { ! 414: if ((infile = fopen(argv[1],"r")) == NULL) { ! 415: fprintf(stderr,"Can't open %s\n",argv[1]); ! 416: fatal("unreadable input file"); } } ! 417: else infile = stdin; ! 418: ! 419: if (argc > 2) { ! 420: if ((outfile = fopen(argv[2],"w")) == NULL) { ! 421: fprintf(stderr,"Can't write to %s\n",argv[2]); ! 422: fatal("bad output file"); } } ! 423: else outfile = stdout; ! 424: ! 425: clrhash(); /* empty hash table */ ! 426: head = rdinput(infile,outfile); /* read input file */ ! 427: emptytbl(); /* empty our tables */ ! 428: for (state = 0; state <= nstates; state++) ! 429: for (c = 1; c < 128; c++) ! 430: addaction(faction(head,state,c),state,c); /* find actions, add to tbl */ ! 431: writetbl(outfile); ! 432: copyrest(infile,outfile); ! 433: fprintf(stderr,"%d states, %d actions\n",nstates,nacts); ! 434: #ifdef undef ! 435: for (state = 1; state <= nstates; state ++) ! 436: for (c = 1; c < 128; c++) ! 437: if (tbl[state*128 + c] != -1) printf("state %d, chr %d, act %d\n", ! 438: state,c,tbl[state*128 + c]); ! 439: #endif ! 440: exit(GOOD_EXIT); ! 441: } ! 442: ! 443: ! 444: /* ! 445: * fatal error handler ! 446: * ! 447: */ ! 448: ! 449: fatal(msg) ! 450: char *msg; ! 451: { ! 452: fprintf(stderr,"error in line %d: %s\n",lines,msg); ! 453: exit(BAD_EXIT); ! 454: } ! 455: ! 456: prolog(outfp) ! 457: FILE *outfp; ! 458: { ! 459: int c; ! 460: while ((c = *txt1++) != '\0') putc(c,outfp); ! 461: while ((c = *fname++) != '\0') putc(c,outfp); ! 462: while ((c = *txt2++) != '\0') putc(c,outfp); ! 463: } ! 464: ! 465: epilogue(outfp) ! 466: FILE *outfp; ! 467: { ! 468: int c; ! 469: while ((c = *txt3++) != '\0') putc(c,outfp); ! 470: } ! 471: ! 472: copyrest(in,out) ! 473: FILE *in,*out; ! 474: { ! 475: int c; ! 476: while ((c = getc(in)) != EOF) putc(c,out); ! 477: } ! 478: ! 479: ! 480: /* ! 481: * gettoken - returns token type of next token, sets tokval ! 482: * to the string value of the token if appropriate. ! 483: * ! 484: */ ! 485: ! 486: gettoken(fp) ! 487: FILE *fp; ! 488: { ! 489: int c; ! 490: while (1) { /* loop if reading comments... */ ! 491: do { ! 492: c = getc(fp); ! 493: if (c == '\n') lines++; ! 494: } while ((isspace(c) || c == C_L)); /* skip whitespace */ ! 495: switch(c) { ! 496: case EOF: return(SEP); ! 497: case '%': if ((c = getc(fp)) == '%') return(SEP); ! 498: tokval[0] = '%'; ! 499: tokval[1] = c; ! 500: rdword(fp,tokval+2); ! 501: return(WORD); ! 502: case '<': return(LBRACK); ! 503: case '>': return(RBRACK); ! 504: case ',': return(COMMA); ! 505: case '/': if ((c = getc(fp)) == '*') { ! 506: rdcmnt(fp); /* skip over the comment */ ! 507: continue; } /* and keep looping */ ! 508: else { ! 509: ungetc(c); /* put this back into input */ ! 510: c = '/'; } /* put character back, fall thru */ ! 511: ! 512: default: if (isword(c)) { ! 513: ungetc(c,fp); ! 514: rdword(fp,tokval); ! 515: return(WORD); ! 516: } ! 517: else fatal("Invalid character in input"); ! 518: } ! 519: } ! 520: } ! 521: ! 522: /* ! 523: * skip over a comment ! 524: * ! 525: */ ! 526: ! 527: rdcmnt(fp) ! 528: FILE *fp; ! 529: { ! 530: int c,star,prcnt; ! 531: prcnt = star = 0; /* no star seen yet */ ! 532: while (!((c = getc(fp)) == '/' && star)) { ! 533: if (c == EOF || (prcnt && c == '%')) fatal("Unterminated comment"); ! 534: prcnt = (c == '%'); ! 535: star = (c == '*'); ! 536: if (c == '\n') lines++; } ! 537: } ! 538: ! 539: ! 540: ! 541: /* ! 542: * symbol table management for wart ! 543: * ! 544: * entry points: ! 545: * clrhash - empty hash table. ! 546: * enter - enter a name into the symbol table ! 547: * lkup - find a name's value in the symbol table. ! 548: * ! 549: */ ! 550: ! 551: #define HASHSIZE 101 /* # of entries in hash table */ ! 552: ! 553: struct sym { char *name; /* symbol name */ ! 554: int val; /* value */ ! 555: struct sym *hnxt; } /* next on collision chain */ ! 556: *htab[HASHSIZE]; /* the hash table */ ! 557: ! 558: ! 559: /* ! 560: * empty the hash table before using it... ! 561: * ! 562: */ ! 563: clrhash() ! 564: { ! 565: int i; ! 566: for (i=0; i<HASHSIZE; i++) htab[i] = NULL; ! 567: } ! 568: ! 569: /* ! 570: * compute the value of the hash for a symbol ! 571: * ! 572: */ ! 573: hash(name) ! 574: char *name; ! 575: { ! 576: int sum; ! 577: for (sum = 0; *name != '\0'; name++) sum += (sum + *name); ! 578: sum %= HASHSIZE; /* take sum mod hashsize */ ! 579: if (sum < 0) sum += HASHSIZE; /* disallow negative hash value */ ! 580: return(sum); ! 581: } ! 582: ! 583: /* ! 584: * make a private copy of a string... ! 585: * ! 586: */ ! 587: char * ! 588: copy(s) ! 589: char *s; ! 590: { ! 591: char *new; ! 592: new = (char *) malloc(strlen(s) + 1); ! 593: strcpy(new,s); ! 594: return(new); ! 595: } ! 596: ! 597: ! 598: /* ! 599: * enter state name into the hash table ! 600: * ! 601: */ ! 602: enter(name,svalue) ! 603: char *name; ! 604: int svalue; ! 605: { ! 606: int h; ! 607: struct sym *cur; ! 608: if (lkup(name) != -1) { ! 609: fprintf(stderr,"state %s appears twice...\n"); ! 610: exit(BAD_EXIT); } ! 611: h = hash(name); ! 612: cur = (struct sym *)malloc(sizeof (struct sym)); ! 613: cur->name = copy(name); ! 614: cur->val = svalue; ! 615: cur->hnxt = htab[h]; ! 616: htab[h] = cur; ! 617: } ! 618: ! 619: /* ! 620: * find name in the symbol table, return its value. Returns -1 ! 621: * if not found. ! 622: * ! 623: */ ! 624: lkup(name) ! 625: char *name; ! 626: { ! 627: struct sym *cur; ! 628: for (cur = htab[hash(name)]; cur != NULL; cur = cur->hnxt) ! 629: if (strcmp(cur->name,name) == 0) return(cur->val); ! 630: return(-1); ! 631: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.