|
|
1.1 root 1: /* Jim Noble at Planning Research Corporation, June 1987. Fixes for */
2: /* miscellaneous bugs found when reformatting state transititon code in */
3: /* CKCPRO.W. */
4:
5: char *wartv = "Wart Version 1A(005) Jan 1988";
6:
7: /* W A R T */
8:
9: /*
10: pre-process a lex-like file into a C program.
11:
12: Author:Jeff Damens, Columbia University Center for Computing Activites, 11/84.
13: Copyright (C) 1985, Trustees of Columbia University in the City of New York.
14: Permission is granted to any individual or institution to use, copy, or
15: redistribute this software so long as it is not sold for profit, provided this
16: copyright notice is retained.
17:
18: * input format is:
19: * lines to be copied | %state <state names...>
20: * %%
21: * <state> | <state,state,...> CHAR { actions }
22: * ...
23: * %%
24: */
25:
26: #include "ckcdeb.h" /* Includes */
27: #include <stdio.h>
28: #include <ctype.h>
29:
30: #define C_L 014 /* Formfeed */
31:
32: #define SEP 1 /* Token types */
33: #define LBRACK 2
34: #define RBRACK 3
35: #define WORD 4
36: #define COMMA 5
37:
38: /* Storage sizes */
39:
40: #define MAXSTATES 50 /* max number of states */
41: #define MAXWORD 50 /* max # of chars/word */
42: #define SBYTES ((MAXSTATES+7)/8) /* # of bytes for state bitmask */
43:
44: /* Name of wart function in generated program */
45:
46: #ifndef FNAME
47: #define FNAME "wart"
48: #endif
49:
50: /* Structure for state information */
51:
52: struct trans { CHAR states[SBYTES]; /* included states */
53: int anyst; /* true if this good from any state */
54: CHAR inchr; /* input character */
55: int actno; /* associated action */
56: struct trans *nxt; }; /* next transition */
57:
58: typedef struct trans *Trans;
59:
60: char *malloc(); /* Returns pointer (not int) */
61:
62:
63: /* Variables and tables */
64:
65: int lines,nstates,nacts;
66:
67: char tokval[MAXWORD];
68:
69: int tbl[MAXSTATES*128];
70:
71:
72:
73: char *txt1 = "\n#define BEGIN state =\n\nint state = 0;\n\n";
74:
75: char *fname = FNAME; /* function name goes here */
76:
77: /* rest of program... */
78:
79: char *txt2 = "()\n\
80: {\n\
81: int c,actno;\n\
82: extern int tbl[];\n\
83: while (1) {\n\
84: c = input();\n\
85: if ((actno = tbl[c + state*128]) != -1)\n\
86: switch(actno) {\n";
87:
88: /* this program's output goes here, followed by final text... */
89:
90: char *txt3 = "\n }\n }\n\}\n\n";
91:
92:
93: /*
94: * turn on the bit associated with the given state
95: *
96: */
97: setstate(state,t)
98: int state;
99: Trans t;
100: {
101: int idx,msk;
102: idx = state/8; /* byte associated with state */
103: msk = 0x80 >> (state % 8); /* bit mask for state */
104: t->states[idx] |= msk;
105: }
106:
107: /*
108: * see if the state is involved in the transition
109: *
110: */
111:
112: teststate(state,t)
113: int state;
114: Trans t;
115: {
116: int idx,msk;
117: idx = state/8;
118: msk = 0x80 >> (state % 8);
119: return(t->states[idx] & msk);
120: }
121:
122:
123: /*
124: * read input from here...
125: *
126: */
127:
128: Trans
129: rdinput(infp,outfp)
130: FILE *infp,*outfp;
131: {
132: Trans x,rdrules();
133: lines = 1; /* line counter */
134: nstates = 0; /* no states */
135: nacts = 0; /* no actions yet */
136: fprintf(outfp,"\n%c* WARNING -- This C source program generated by ",'/');
137: fprintf(outfp,"Wart preprocessor. */\n");
138: fprintf(outfp,"%c* Do not edit this file; edit the Wart-format ",'/');
139: fprintf(outfp,"source file instead, */\n");
140: fprintf(outfp,"%c* and then run it through Wart to produce a new ",'/');
141: fprintf(outfp,"C source file. */\n\n");
142: fprintf(outfp,"%c* Wart Version Info: */\n",'/');
143: fprintf(outfp,"char *wartv = \"%s\";\n\n",wartv);
144:
145: initial(infp,outfp); /* read state names, initial defs */
146: prolog(outfp); /* write out our initial code */
147: x = rdrules(infp,outfp); /* read rules */
148: epilogue(outfp); /* write out epilogue code */
149: return(x);
150: }
151:
152:
153: /*
154: * initial - read initial definitions and state names. Returns
155: * on EOF or %%.
156: *
157: */
158:
159: initial(infp,outfp)
160: FILE *infp,*outfp;
161: {
162: int c;
163: char wordbuf[MAXWORD];
164: while ((c = getc(infp)) != EOF) {
165: if (c == '%') {
166: rdword(infp,wordbuf);
167: if (strcmp(wordbuf,"states") == 0)
168: rdstates(infp,outfp);
169: else if (strcmp(wordbuf,"%") == 0) return;
170: else fprintf(outfp,"%%%s",wordbuf);
171: }
172: else putc(c,outfp);
173: if (c == '\n') lines++;
174: }
175: }
176:
177: /*
178: * boolean function to tell if the given character can be part of
179: * a word.
180: *
181: */
182: isin(s,c) char *s; int c; {
183: for (; *s != '\0'; s++)
184: if (*s == c) return(1);
185: return(0);
186: }
187: isword(c)
188: int c;
189: {
190: static char special[] = ".%_-$@"; /* these are allowable */
191: return(isalnum(c) || isin(special,c));
192: }
193:
194: /*
195: * read the next word into the given buffer.
196: *
197: */
198: rdword(fp,buf)
199: FILE *fp;
200: char *buf;
201: {
202: int len = 0,c;
203: while (isword(c = getc(fp)) && ++len < MAXWORD) *buf++ = c;
204: *buf++ = '\0'; /* tie off word */
205: ungetc(c,fp); /* put break char back */
206: }
207:
208:
209: /*
210: * read state names, up to a newline.
211: *
212: */
213:
214: rdstates(fp,ofp)
215: FILE *fp,*ofp;
216: {
217: int c;
218: char wordbuf[MAXWORD];
219: while ((c = getc(fp)) != EOF && c != '\n')
220: {
221: if (isspace(c) || c == C_L) continue; /* skip whitespace */
222: ungetc(c,fp); /* put char back */
223: rdword(fp,wordbuf); /* read the whole word */
224: enter(wordbuf,++nstates); /* put into symbol tbl */
225: fprintf(ofp,"#define %s %d\n",wordbuf,nstates);
226: }
227: lines++;
228: }
229:
230: /*
231: * allocate a new, empty transition node
232: *
233: */
234:
235: Trans
236: newtrans()
237: {
238: Trans new;
239: int i;
240: new = (Trans) malloc(sizeof (struct trans));
241: for (i=0; i<SBYTES; i++) new->states[i] = 0;
242: new->anyst = 0;
243: new->nxt = NULL;
244: return(new);
245: }
246:
247:
248: /*
249: * read all the rules.
250: *
251: */
252:
253: Trans
254: rdrules(fp,out)
255: FILE *fp,*out;
256: {
257: Trans head,cur,prev;
258: int curtok,i;
259: head = cur = NULL;
260: while ((curtok = gettoken(fp)) != SEP)
261:
262: switch(curtok) {
263: case LBRACK: if (cur == NULL) cur = newtrans();
264: else fatal("duplicate state list");
265: statelist(fp,cur);/* set states */
266: continue; /* prepare to read char */
267:
268: case WORD: if (strlen(tokval) != 1)
269: fatal("multiple chars in state");
270: if (cur == NULL) {
271: cur = newtrans();
272: cur->anyst = 1;
273: }
274: cur->actno = ++nacts;
275: cur->inchr = tokval[0];
276: if (head == NULL) head = cur;
277: else prev->nxt = cur;
278: prev = cur;
279: cur = NULL;
280: copyact(fp,out,nacts);
281: break;
282: default: fatal("bad input format");
283: }
284:
285: return(head);
286: }
287:
288:
289: /*
290: * read a list of (comma-separated) states, set them in the
291: * given transition.
292: *
293: */
294: statelist(fp,t)
295: FILE *fp;
296: Trans t;
297: {
298: int curtok,sval;
299: curtok = COMMA;
300: while (curtok != RBRACK) {
301: if (curtok != COMMA) fatal("missing comma");
302: if ((curtok = gettoken(fp)) != WORD) fatal("missing state name");
303: if ((sval = lkup(tokval)) == -1) {
304: fprintf(stderr,"state %s undefined\n",tokval);
305: fatal("undefined state");
306: }
307: setstate(sval,t);
308: curtok = gettoken(fp);
309: }
310: }
311:
312: /*
313: * copy an action from the input to the output file
314: *
315: */
316: copyact(inp,outp,actno)
317: FILE *inp,*outp;
318: int actno;
319: {
320: int c,bcnt;
321: fprintf(outp,"case %d:\n",actno);
322: while (c = getc(inp), (isspace(c) || c == C_L))
323: if (c == '\n') lines++;
324: if (c == '{') {
325: bcnt = 1;
326: fputs(" {",outp);
327: while (bcnt > 0 && (c = getc(inp)) != EOF) {
328: if (c == '{') bcnt++;
329: else if (c == '}') bcnt--;
330: else if (c == '\n') lines++;
331: putc(c,outp);
332: }
333: if (bcnt > 0) fatal("action doesn't end");
334: }
335: else {
336: while (c != '\n' && c != EOF) {
337: putc(c,outp);
338: c = getc(inp);
339: }
340: lines++;
341: }
342: fprintf(outp,"\n break;\n");
343: }
344:
345:
346: /*
347: * find the action associated with a given character and state.
348: * returns -1 if one can't be found.
349: *
350: */
351: faction(hd,state,chr)
352: Trans hd;
353: int state,chr;
354: {
355: while (hd != NULL) {
356: if (hd->anyst || teststate(state,hd))
357: if (hd->inchr == '.' || hd->inchr == chr) return(hd->actno);
358: hd = hd->nxt;
359: }
360: return(-1);
361: }
362:
363:
364: /*
365: * empty the table...
366: *
367: */
368: emptytbl()
369: {
370: int i;
371: for (i=0; i<nstates*128; i++) tbl[i] = -1;
372: }
373:
374: /*
375: * add the specified action to the output for the given state and chr.
376: *
377: */
378:
379: addaction(act,state,chr)
380: int act,state,chr;
381: {
382: tbl[state*128 + chr] = act;
383: }
384:
385: writetbl(fp)
386: FILE *fp;
387: {
388: warray(fp,"tbl",tbl,128*(nstates+1));
389: }
390:
391:
392: /*
393: * write an array to the output file, given its name and size.
394: *
395: */
396: warray(fp,nam,cont,siz)
397: FILE *fp;
398: char *nam;
399: int cont[],siz;
400: {
401: int i;
402: fprintf(fp,"int %s[] = {\n",nam);
403: for (i = 0; i < siz; ) {
404: fprintf(fp,"%2d, ",cont[i]);
405: if ((++i % 16) == 0) putc('\n',fp);
406: }
407: fprintf(fp,"};\n");
408: }
409:
410: main(argc,argv)
411: int argc;
412: char *argv[];
413: {
414: Trans head;
415: int state,c;
416: FILE *infile,*outfile;
417:
418: if (argc > 1) {
419: if ((infile = fopen(argv[1],"r")) == NULL) {
420: fprintf(stderr,"Can't open %s\n",argv[1]);
421: fatal("unreadable input file"); } }
422: else infile = stdin;
423:
424: if (argc > 2) {
425: if ((outfile = fopen(argv[2],"w")) == NULL) {
426: fprintf(stderr,"Can't write to %s\n",argv[2]);
427: fatal("bad output file"); } }
428: else outfile = stdout;
429:
430: clrhash(); /* empty hash table */
431: head = rdinput(infile,outfile); /* read input file */
432: emptytbl(); /* empty our tables */
433: for (state = 0; state <= nstates; state++)
434: for (c = 1; c < 128; c++)
435: addaction(faction(head,state,c),state,c); /* find actions, add to tbl */
436: writetbl(outfile);
437: copyrest(infile,outfile);
438: printf("%d states, %d actions\n",nstates,nacts);
439: #ifdef undef
440: for (state = 1; state <= nstates; state ++)
441: for (c = 1; c < 128; c++)
442: if (tbl[state*128 + c] != -1) printf("state %d, chr %d, act %d\n",
443: state,c,tbl[state*128 + c]);
444: #endif
445: exit(GOOD_EXIT);
446: }
447:
448:
449: /*
450: * fatal error handler
451: *
452: */
453:
454: fatal(msg)
455: char *msg;
456: {
457: fprintf(stderr,"error in line %d: %s\n",lines,msg);
458: exit(BAD_EXIT);
459: }
460:
461: prolog(outfp)
462: FILE *outfp;
463: {
464: int c;
465: while ((c = *txt1++) != '\0') putc(c,outfp);
466: while ((c = *fname++) != '\0') putc(c,outfp);
467: while ((c = *txt2++) != '\0') putc(c,outfp);
468: }
469:
470: epilogue(outfp)
471: FILE *outfp;
472: {
473: int c;
474: while ((c = *txt3++) != '\0') putc(c,outfp);
475: }
476:
477: copyrest(in,out)
478: FILE *in,*out;
479: {
480: int c;
481: while ((c = getc(in)) != EOF) putc(c,out);
482: }
483:
484:
485: /*
486: * gettoken - returns token type of next token, sets tokval
487: * to the string value of the token if appropriate.
488: *
489: */
490:
491: gettoken(fp)
492: FILE *fp;
493: {
494: int c;
495: while (1) { /* loop if reading comments... */
496: do {
497: c = getc(fp);
498: if (c == '\n') lines++;
499: } while ((isspace(c) || c == C_L)); /* skip whitespace */
500: switch(c) {
501: case EOF: return(SEP);
502: case '%': if ((c = getc(fp)) == '%') return(SEP);
503: tokval[0] = '%';
504: tokval[1] = c;
505: rdword(fp,tokval+2);
506: return(WORD);
507: case '<': return(LBRACK);
508: case '>': return(RBRACK);
509: case ',': return(COMMA);
510: case '/': if ((c = getc(fp)) == '*') {
511: rdcmnt(fp); /* skip over the comment */
512: continue; } /* and keep looping */
513: else {
514: ungetc(c,fp); /* put this back into input */
515: c = '/'; } /* put character back, fall thru */
516:
517: default: if (isword(c)) {
518: ungetc(c,fp);
519: rdword(fp,tokval);
520: return(WORD);
521: }
522: else fatal("Invalid character in input");
523: }
524: }
525: }
526:
527: /*
528: * skip over a comment
529: *
530: */
531:
532: rdcmnt(fp)
533: FILE *fp;
534: {
535: int c,star,prcnt;
536: prcnt = star = 0; /* no star seen yet */
537: while (!((c = getc(fp)) == '/' && star)) {
538: if (c == EOF || (prcnt && c == '%')) fatal("Unterminated comment");
539: prcnt = (c == '%');
540: star = (c == '*');
541: if (c == '\n') lines++; }
542: }
543:
544:
545:
546: /*
547: * symbol table management for wart
548: *
549: * entry points:
550: * clrhash - empty hash table.
551: * enter - enter a name into the symbol table
552: * lkup - find a name's value in the symbol table.
553: *
554: */
555:
556: #define HASHSIZE 101 /* # of entries in hash table */
557:
558: struct sym { char *name; /* symbol name */
559: int val; /* value */
560: struct sym *hnxt; } /* next on collision chain */
561: *htab[HASHSIZE]; /* the hash table */
562:
563:
564: /*
565: * empty the hash table before using it...
566: *
567: */
568: clrhash()
569: {
570: int i;
571: for (i=0; i<HASHSIZE; i++) htab[i] = NULL;
572: }
573:
574: /*
575: * compute the value of the hash for a symbol
576: *
577: */
578: hash(name)
579: char *name;
580: {
581: int sum;
582: for (sum = 0; *name != '\0'; name++) sum += (sum + *name);
583: sum %= HASHSIZE; /* take sum mod hashsize */
584: if (sum < 0) sum += HASHSIZE; /* disallow negative hash value */
585: return(sum);
586: }
587:
588: /*
589: * make a private copy of a string...
590: *
591: */
592: char *
593: copy(s)
594: char *s;
595: {
596: char *new;
597: new = (char *) malloc(strlen(s) + 1);
598: strcpy(new,s);
599: return(new);
600: }
601:
602:
603: /*
604: * enter state name into the hash table
605: *
606: */
607: enter(name,svalue)
608: char *name;
609: int svalue;
610: {
611: int h;
612: struct sym *cur;
613: if (lkup(name) != -1) {
614: fprintf(stderr,"state %s appears twice...\n");
615: exit(BAD_EXIT); }
616: h = hash(name);
617: cur = (struct sym *)malloc(sizeof (struct sym));
618: cur->name = copy(name);
619: cur->val = svalue;
620: cur->hnxt = htab[h];
621: htab[h] = cur;
622: }
623:
624: /*
625: * find name in the symbol table, return its value. Returns -1
626: * if not found.
627: *
628: */
629: lkup(name)
630: char *name;
631: {
632: struct sym *cur;
633: for (cur = htab[hash(name)]; cur != NULL; cur = cur->hnxt)
634: if (strcmp(cur->name,name) == 0) return(cur->val);
635: return(-1);
636: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.