|
|
1.1 root 1: char *wartv = "Wart Version 1A(003) 27 May 85";
2:
3: /* W A R T */
4:
5: /*
6: pre-process a lex-like file into a C program.
7:
8: Author:Jeff Damens, Columbia University Center for Computing Activites, 11/84.
9: Copyright (C) 1985, Trustees of Columbia University in the City of New York.
10: Permission is granted to any individual or institution to use, copy, or
11: redistribute this software so long as it is not sold for profit, provided this
12: copyright notice is retained.
13:
14: * input format is:
15: * lines to be copied | %state <state names...>
16: * %%
17: * <state> | <state,state,...> CHAR { actions }
18: * ...
19: * %%
20: */
21:
22: #include "ckcdeb.h" /* Includes */
23: #include <stdio.h>
24: #include <ctype.h>
25:
26: #define C_L 014 /* Formfeed */
27:
28: #define SEP 1 /* Token types */
29: #define LBRACK 2
30: #define RBRACK 3
31: #define WORD 4
32: #define COMMA 5
33:
34: /* Storage sizes */
35:
36: #define MAXSTATES 50 /* max number of states */
37: #define MAXWORD 50 /* max # of chars/word */
38: #define SBYTES ((MAXSTATES+7)/8) /* # of bytes for state bitmask */
39:
40: /* Name of wart function in generated program */
41:
42: #ifndef FNAME
43: #define FNAME "wart"
44: #endif
45:
46: /* Structure for state information */
47:
48: struct trans { CHAR states[SBYTES]; /* included states */
49: int anyst; /* true if this good from any state */
50: CHAR inchr; /* input character */
51: int actno; /* associated action */
52: struct trans *nxt; }; /* next transition */
53:
54: typedef struct trans *Trans;
55:
56: char *malloc(); /* Returns pointer (not int) */
57:
58:
59: /* Variables and tables */
60:
61: int lines,nstates,nacts;
62:
63: char tokval[MAXWORD];
64:
65: int tbl[MAXSTATES*128];
66:
67:
68:
69: char *txt1 = "\n#define BEGIN state =\n\nint state = 0;\n\n";
70:
71: char *fname = FNAME; /* function name goes here */
72:
73: /* rest of program... */
74:
75: char *txt2 = "()\n\
76: {\n\
77: int c,actno;\n\
78: extern int tbl[];\n\
79: while (1) {\n\
80: c = input();\n\
81: if ((actno = tbl[c + state*128]) != -1)\n\
82: switch(actno) {\n";
83:
84: /* this program's output goes here, followed by final text... */
85:
86: char *txt3 = "\n }\n }\n}\n\n";
87:
88:
89: /*
90: * turn on the bit associated with the given state
91: *
92: */
93: setstate(state,t)
94: int state;
95: Trans t;
96: {
97: int idx,msk;
98: idx = state/8; /* byte associated with state */
99: msk = 0x80 >> (state % 8); /* bit mask for state */
100: t->states[idx] |= msk;
101: }
102:
103: /*
104: * see if the state is involved in the transition
105: *
106: */
107:
108: teststate(state,t)
109: int state;
110: Trans t;
111: {
112: int idx,msk;
113: idx = state/8;
114: msk = 0x80 >> (state % 8);
115: return(t->states[idx] & msk);
116: }
117:
118:
119: /*
120: * read input from here...
121: *
122: */
123:
124: Trans
125: rdinput(infp,outfp)
126: FILE *infp,*outfp;
127: {
128: Trans x,rdrules();
129: lines = 1; /* line counter */
130: nstates = 0; /* no states */
131: nacts = 0; /* no actions yet */
132: fprintf(outfp,"\n%c* WARNING -- This C source program generated by ",'/');
133: fprintf(outfp,"Wart preprocessor. */\n");
134: fprintf(outfp,"%c* Do not edit this file; edit the Wart-format ",'/');
135: fprintf(outfp,"source file instead, */\n");
136: fprintf(outfp,"%c* and then run it through Wart to produce a new ",'/');
137: fprintf(outfp,"C source file. */\n\n");
138: fprintf(outfp,"%c* Wart Version Info: */\n",'/');
139: fprintf(outfp,"char *wartv = \"%s\";\n\n",wartv);
140:
141: initial(infp,outfp); /* read state names, initial defs */
142: prolog(outfp); /* write out our initial code */
143: x = rdrules(infp,outfp); /* read rules */
144: epilogue(outfp); /* write out epilogue code */
145: return(x);
146: }
147:
148:
149: /*
150: * initial - read initial definitions and state names. Returns
151: * on EOF or %%.
152: *
153: */
154:
155: initial(infp,outfp)
156: FILE *infp,*outfp;
157: {
158: int c;
159: char wordbuf[MAXWORD];
160: while ((c = getc(infp)) != EOF) {
161: if (c == '%') {
162: rdword(infp,wordbuf);
163: if (strcmp(wordbuf,"states") == 0)
164: rdstates(infp,outfp);
165: else if (strcmp(wordbuf,"%") == 0) return;
166: else fprintf(outfp,"%%%s",wordbuf);
167: }
168: else putc(c,outfp);
169: if (c == '\n') lines++;
170: }
171: }
172:
173: /*
174: * boolean function to tell if the given character can be part of
175: * a word.
176: *
177: */
178: isin(s,c) char *s; int c; {
179: for (; *s != '\0'; s++)
180: if (*s == c) return(1);
181: return(0);
182: }
183: isword(c)
184: int c;
185: {
186: static char special[] = ".%_-$@"; /* these are allowable */
187: return(isalnum(c) || isin(special,c));
188: }
189:
190: /*
191: * read the next word into the given buffer.
192: *
193: */
194: rdword(fp,buf)
195: FILE *fp;
196: char *buf;
197: {
198: int len = 0,c;
199: while (isword(c = getc(fp)) && ++len < MAXWORD) *buf++ = c;
200: *buf++ = '\0'; /* tie off word */
201: ungetc(c,fp); /* put break char back */
202: }
203:
204:
205: /*
206: * read state names, up to a newline.
207: *
208: */
209:
210: rdstates(fp,ofp)
211: FILE *fp,*ofp;
212: {
213: int c;
214: char wordbuf[MAXWORD];
215: while ((c = getc(fp)) != EOF && c != '\n')
216: {
217: if (isspace(c) || c == C_L) continue; /* skip whitespace */
218: ungetc(c,fp); /* put char back */
219: rdword(fp,wordbuf); /* read the whole word */
220: enter(wordbuf,++nstates); /* put into symbol tbl */
221: fprintf(ofp,"#define %s %d\n",wordbuf,nstates);
222: }
223: lines++;
224: }
225:
226: /*
227: * allocate a new, empty transition node
228: *
229: */
230:
231: Trans
232: newtrans()
233: {
234: Trans new;
235: int i;
236: new = (Trans) malloc(sizeof (struct trans));
237: for (i=0; i<SBYTES; i++) new->states[i] = 0;
238: new->anyst = 0;
239: new->nxt = NULL;
240: return(new);
241: }
242:
243:
244: /*
245: * read all the rules.
246: *
247: */
248:
249: Trans
250: rdrules(fp,out)
251: FILE *fp,*out;
252: {
253: Trans head,cur,prev;
254: int curtok,i;
255: head = cur = NULL;
256: while ((curtok = gettoken(fp)) != SEP)
257:
258: switch(curtok) {
259: case LBRACK: if (cur == NULL) cur = newtrans();
260: else fatal("duplicate state list");
261: statelist(fp,cur);/* set states */
262: continue; /* prepare to read char */
263:
264: case WORD: if (strlen(tokval) != 1)
265: fatal("multiple chars in state");
266: if (cur == NULL) {
267: cur = newtrans();
268: cur->anyst = 1;
269: }
270: cur->actno = ++nacts;
271: cur->inchr = tokval[0];
272: if (head == NULL) head = cur;
273: else prev->nxt = cur;
274: prev = cur;
275: cur = NULL;
276: copyact(fp,out,nacts);
277: break;
278: default: fatal("bad input format");
279: }
280:
281: return(head);
282: }
283:
284:
285: /*
286: * read a list of (comma-separated) states, set them in the
287: * given transition.
288: *
289: */
290: statelist(fp,t)
291: FILE *fp;
292: Trans t;
293: {
294: int curtok,sval;
295: curtok = COMMA;
296: while (curtok != RBRACK) {
297: if (curtok != COMMA) fatal("missing comma");
298: if ((curtok = gettoken(fp)) != WORD) fatal("missing state name");
299: if ((sval = lkup(tokval)) == -1) {
300: fprintf(stderr,"state %s undefined\n",tokval);
301: fatal("undefined state");
302: }
303: setstate(sval,t);
304: curtok = gettoken(fp);
305: }
306: }
307:
308: /*
309: * copy an action from the input to the output file
310: *
311: */
312: copyact(inp,outp,actno)
313: FILE *inp,*outp;
314: int actno;
315: {
316: int c,bcnt;
317: fprintf(outp,"case %d:\n",actno);
318: while (((c = getc(inp)) != '\n') && (isspace(c) || c == C_L));
319: if (c == '{') {
320: bcnt = 1;
321: putc(c,outp);
322: while (bcnt > 0 && (c = getc(inp)) != EOF) {
323: if (c == '{') bcnt++;
324: else if (c == '}') bcnt--;
325: else if (c == '\n') lines++;
326: putc(c,outp);
327: }
328: if (bcnt > 0) fatal("action doesn't end");
329: }
330: else {
331: while (c != '\n' && c != EOF) {
332: putc(c,outp);
333: c = getc(inp);
334: }
335: lines++;
336: }
337: fprintf(outp,"\nbreak;\n");
338: }
339:
340:
341: /*
342: * find the action associated with a given character and state.
343: * returns -1 if one can't be found.
344: *
345: */
346: faction(hd,state,chr)
347: Trans hd;
348: int state,chr;
349: {
350: while (hd != NULL) {
351: if (hd->anyst || teststate(state,hd))
352: if (hd->inchr == '.' || hd->inchr == chr) return(hd->actno);
353: hd = hd->nxt;
354: }
355: return(-1);
356: }
357:
358:
359: /*
360: * empty the table...
361: *
362: */
363: emptytbl()
364: {
365: int i;
366: for (i=0; i<nstates*128; i++) tbl[i] = -1;
367: }
368:
369: /*
370: * add the specified action to the output for the given state and chr.
371: *
372: */
373:
374: addaction(act,state,chr)
375: int act,state,chr;
376: {
377: tbl[state*128 + chr] = act;
378: }
379:
380: writetbl(fp)
381: FILE *fp;
382: {
383: warray(fp,"tbl",tbl,128*(nstates+1));
384: }
385:
386:
387: /*
388: * write an array to the output file, given its name and size.
389: *
390: */
391: warray(fp,nam,cont,siz)
392: FILE *fp;
393: char *nam;
394: int cont[],siz;
395: {
396: int i;
397: fprintf(fp,"int %s[] = {\n",nam);
398: for (i = 0; i < siz; i++) {
399: fprintf(fp,"%d, ",cont[i]);
400: if ((i % 20) == 0) putc('\n',fp);
401: }
402: fprintf(fp,"};\n");
403: }
404:
405: main(argc,argv)
406: int argc;
407: char *argv[];
408: {
409: Trans head;
410: int state,c;
411: FILE *infile,*outfile;
412:
413: if (argc > 1) {
414: if ((infile = fopen(argv[1],"r")) == NULL) {
415: fprintf(stderr,"Can't open %s\n",argv[1]);
416: fatal("unreadable input file"); } }
417: else infile = stdin;
418:
419: if (argc > 2) {
420: if ((outfile = fopen(argv[2],"w")) == NULL) {
421: fprintf(stderr,"Can't write to %s\n",argv[2]);
422: fatal("bad output file"); } }
423: else outfile = stdout;
424:
425: clrhash(); /* empty hash table */
426: head = rdinput(infile,outfile); /* read input file */
427: emptytbl(); /* empty our tables */
428: for (state = 0; state <= nstates; state++)
429: for (c = 1; c < 128; c++)
430: addaction(faction(head,state,c),state,c); /* find actions, add to tbl */
431: writetbl(outfile);
432: copyrest(infile,outfile);
433: fprintf(stderr,"%d states, %d actions\n",nstates,nacts);
434: #ifdef undef
435: for (state = 1; state <= nstates; state ++)
436: for (c = 1; c < 128; c++)
437: if (tbl[state*128 + c] != -1) printf("state %d, chr %d, act %d\n",
438: state,c,tbl[state*128 + c]);
439: #endif
440: exit(GOOD_EXIT);
441: }
442:
443:
444: /*
445: * fatal error handler
446: *
447: */
448:
449: fatal(msg)
450: char *msg;
451: {
452: fprintf(stderr,"error in line %d: %s\n",lines,msg);
453: exit(BAD_EXIT);
454: }
455:
456: prolog(outfp)
457: FILE *outfp;
458: {
459: int c;
460: while ((c = *txt1++) != '\0') putc(c,outfp);
461: while ((c = *fname++) != '\0') putc(c,outfp);
462: while ((c = *txt2++) != '\0') putc(c,outfp);
463: }
464:
465: epilogue(outfp)
466: FILE *outfp;
467: {
468: int c;
469: while ((c = *txt3++) != '\0') putc(c,outfp);
470: }
471:
472: copyrest(in,out)
473: FILE *in,*out;
474: {
475: int c;
476: while ((c = getc(in)) != EOF) putc(c,out);
477: }
478:
479:
480: /*
481: * gettoken - returns token type of next token, sets tokval
482: * to the string value of the token if appropriate.
483: *
484: */
485:
486: gettoken(fp)
487: FILE *fp;
488: {
489: int c;
490: while (1) { /* loop if reading comments... */
491: do {
492: c = getc(fp);
493: if (c == '\n') lines++;
494: } while ((isspace(c) || c == C_L)); /* skip whitespace */
495: switch(c) {
496: case EOF: return(SEP);
497: case '%': if ((c = getc(fp)) == '%') return(SEP);
498: tokval[0] = '%';
499: tokval[1] = c;
500: rdword(fp,tokval+2);
501: return(WORD);
502: case '<': return(LBRACK);
503: case '>': return(RBRACK);
504: case ',': return(COMMA);
505: case '/': if ((c = getc(fp)) == '*') {
506: rdcmnt(fp); /* skip over the comment */
507: continue; } /* and keep looping */
508: else {
509: ungetc(c); /* put this back into input */
510: c = '/'; } /* put character back, fall thru */
511:
512: default: if (isword(c)) {
513: ungetc(c,fp);
514: rdword(fp,tokval);
515: return(WORD);
516: }
517: else fatal("Invalid character in input");
518: }
519: }
520: }
521:
522: /*
523: * skip over a comment
524: *
525: */
526:
527: rdcmnt(fp)
528: FILE *fp;
529: {
530: int c,star,prcnt;
531: prcnt = star = 0; /* no star seen yet */
532: while (!((c = getc(fp)) == '/' && star)) {
533: if (c == EOF || (prcnt && c == '%')) fatal("Unterminated comment");
534: prcnt = (c == '%');
535: star = (c == '*');
536: if (c == '\n') lines++; }
537: }
538:
539:
540:
541: /*
542: * symbol table management for wart
543: *
544: * entry points:
545: * clrhash - empty hash table.
546: * enter - enter a name into the symbol table
547: * lkup - find a name's value in the symbol table.
548: *
549: */
550:
551: #define HASHSIZE 101 /* # of entries in hash table */
552:
553: struct sym { char *name; /* symbol name */
554: int val; /* value */
555: struct sym *hnxt; } /* next on collision chain */
556: *htab[HASHSIZE]; /* the hash table */
557:
558:
559: /*
560: * empty the hash table before using it...
561: *
562: */
563: clrhash()
564: {
565: int i;
566: for (i=0; i<HASHSIZE; i++) htab[i] = NULL;
567: }
568:
569: /*
570: * compute the value of the hash for a symbol
571: *
572: */
573: hash(name)
574: char *name;
575: {
576: int sum;
577: for (sum = 0; *name != '\0'; name++) sum += (sum + *name);
578: sum %= HASHSIZE; /* take sum mod hashsize */
579: if (sum < 0) sum += HASHSIZE; /* disallow negative hash value */
580: return(sum);
581: }
582:
583: /*
584: * make a private copy of a string...
585: *
586: */
587: char *
588: copy(s)
589: char *s;
590: {
591: char *new;
592: new = (char *) malloc(strlen(s) + 1);
593: strcpy(new,s);
594: return(new);
595: }
596:
597:
598: /*
599: * enter state name into the hash table
600: *
601: */
602: enter(name,svalue)
603: char *name;
604: int svalue;
605: {
606: int h;
607: struct sym *cur;
608: if (lkup(name) != -1) {
609: fprintf(stderr,"state %s appears twice...\n");
610: exit(BAD_EXIT); }
611: h = hash(name);
612: cur = (struct sym *)malloc(sizeof (struct sym));
613: cur->name = copy(name);
614: cur->val = svalue;
615: cur->hnxt = htab[h];
616: htab[h] = cur;
617: }
618:
619: /*
620: * find name in the symbol table, return its value. Returns -1
621: * if not found.
622: *
623: */
624: lkup(name)
625: char *name;
626: {
627: struct sym *cur;
628: for (cur = htab[hash(name)]; cur != NULL; cur = cur->hnxt)
629: if (strcmp(cur->name,name) == 0) return(cur->val);
630: return(-1);
631: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.