|
|
1.1 root 1: /* $Header: llscan.c,v 2.2 88/09/19 12:55:06 nhall Exp $ */
2: /* $Source: /var/home/tadl/src/argo/xebec/RCS/llscan.c,v $ */
3: /*
4: * ************************* NOTICE *******************************
5: * This code is in the public domain. It cannot be copyrighted.
6: * This scanner was originally written by Keith Thompson for the
7: * University of Wisconsin Crystal project.
8: * It was subsequently modified significantly by Nancy Hall at the
9: * University of Wisconsin for the ARGO project.
10: * ****************************************************************
11: */
12: #include "xebec.h"
13: #include "llparse.h"
14:
15: #include "main.h"
16: #include <stdio.h>
17: #include "procs.h"
18: #include "debug.h"
19:
20: #define EOFILE 0x01
21: #define UNUSED 0x02
22: #define IGNORE 0x04
23: #define OPCHAR 0x8
24: #define DIGITS 0x10
25: #define LETTER 0x20
26:
27: int chtype[128] = {
28: /* null, soh ^a, stx ^b etx ^c eot ^d enq ^e ack ^f bel ^g */
29: EOFILE, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
30: /* bs ^h ht ^i lf ^j vt ^k ff ^l cr ^m so ^n si ^o */
31: UNUSED, IGNORE, IGNORE, UNUSED, IGNORE, IGNORE, UNUSED, UNUSED,
32: /* dle ^p dc1 ^q dc2 ^r dc3 ^s dc4 ^t nak ^u syn ^v etb ^w */
33: UNUSED, UNUSED, UNUSED, UNUSED, EOFILE, UNUSED, UNUSED, UNUSED,
34: /* can ^x em ^y sub ^z esc ^] fs ^\ gs ^} rs ^` us ^/ */
35: UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
36:
37: /* ! " # $ % & ' */
38: IGNORE, UNUSED, OPCHAR, UNUSED, OPCHAR, UNUSED, OPCHAR, OPCHAR,
39: /* ( ) * + , - . / */
40: OPCHAR, OPCHAR, OPCHAR, OPCHAR, OPCHAR, OPCHAR, OPCHAR, OPCHAR,
41: /* 0 1 2 3 4 5 6 7 */
42: DIGITS, DIGITS, DIGITS, DIGITS, DIGITS, DIGITS, DIGITS, DIGITS,
43: /* 8 9 : ; < = > ? */
44: DIGITS, DIGITS, OPCHAR, OPCHAR, OPCHAR, OPCHAR, OPCHAR, OPCHAR,
45:
46: /* @ A B C D E F G */
47: UNUSED, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
48: /* H I J K L M N O */
49: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
50: /* P Q R S T U V W */
51: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
52: /* X Y Z [ \ ] ^ _ */
53: LETTER, LETTER, LETTER, OPCHAR, UNUSED, OPCHAR, OPCHAR, LETTER,
54:
55: /* ` a b c d e f g */
56: UNUSED, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
57: /* h i j k l m n o */
58: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
59: /* p q r s t u v w */
60: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
61: /* x y z { | } ~ del */
62: LETTER, LETTER, LETTER, OPCHAR, UNUSED, OPCHAR, UNUSED, UNUSED
63: };
64:
65:
66: extern FILE *astringfile;
67: static char *buffptr;
68: static char buffer[2][LINELEN];
69: static int currentbuf = 1;
70:
71: #define addbuf(x) *buffptr++ = x
72:
73: static int ch = ' ';
74:
75: skip()
76: {
77: while((chtype[ch] == IGNORE) ) {
78: ch = getch();
79: }
80: }
81:
82: llaccept(t)
83: LLtoken *t;
84: {
85: switch(t->llstate) {
86: case NORMAL:
87: break;
88: case INSERT:
89: fprintf(stderr,"Insert %s\n", llstrings[t->llterm]);
90: break;
91: case DELETE:
92: fprintf(stderr,"Delete %s\n", llstrings[t->llterm]);
93: break;
94: }
95: }
96:
97: #define TVAL (t->llattrib)
98:
99:
100: dump_buffer()
101: {
102: register int i;
103: for(i=0; i<20; i++)
104: (void) fputc(buffer[currentbuf][i], stderr);
105: (void) fputc('\n', stderr);
106: (void) fflush(stderr);
107: }
108:
109: int iskey(c, buf)
110: char *c;
111: char **buf;
112: {
113: register int i;
114: static struct { char *key_word; int term_type; } keys[] = {
115: { "SAME", T_SAME },
116: { "DEFAULT", T_DEFAULT },
117: { "NULLACTION", T_NULLACTION },
118: { "STRUCT", T_STRUCT },
119: { "SYNONYM", T_SYNONYM },
120: { "TRANSITIONS", T_TRANSITIONS },
121: { "STATES", T_STATES },
122: { "EVENTS", T_EVENTS },
123: { "PCB", T_PCB },
124: { "INCLUDE", T_INCLUDE },
125: { "PROTOCOL", T_PROTOCOL },
126: { 0, 0},
127: };
128:
129: for (i = 0; keys[i].key_word ; i++) {
130: if( !strcmp(c, (*buf = keys[i].key_word) ) ) {
131: return ( keys[i].term_type );
132: }
133: }
134: *buf = (char *)0;
135: return(0);
136: }
137:
138: getstr(o,c)
139: /* c is the string delimiter
140: * allow the delimiter to be escaped
141: * the messy part: translate $ID to
142: * e->ev_union.ID
143: * where ID is an event with a non-zero obj_struc
144: * need we check for the field???
145: */
146: char o,c;
147: {
148: register int nested = 1;
149: register int allow_nesting = (o==c)?-1:1;
150:
151: IFDEBUG(S)
152: fprintf(stdout,"getstr: ch=%c, delimiters %c %c\n",
153: ch,o, c);
154: fprintf(stdout,"getstr: buffptr 0x%x, currentbuf 0x%x\n",
155: buffptr, currentbuf);
156: ENDDEBUG
157:
158: if( ch == c ) nested--;
159: while(nested) {
160: if(ch == '\0') {
161: fprintf(stderr,
162: "Eof inside of a string, delims= %c,%c, nesting %d",c,o, nested);
163: Exit(-1);
164: /* notreached */
165: } else if(ch == '$') {
166: /* might be an attribute */
167: IFDEBUG(S)
168: fprintf(stdout,"getstr: atttribute?\n");
169: ENDDEBUG
170:
171: /* assume it's an event */
172: /* addbuf is a macro so this isn't as bad as
173: * it looks
174: * add "e->ev_union."
175: */
176: if( (ch = getch()) == '$' ) {
177: addbuf('e'); addbuf('-'); addbuf('>');
178: addbuf('e'); addbuf('v'); addbuf('_');
179: addbuf('u'); addbuf('n'); addbuf('i');
180: addbuf('o'); addbuf('n');
181: addbuf('.');
182: AddCurrentEventName(& buffptr);
183: } else {
184: char *obufp = buffptr;
185:
186: do {
187: addbuf(ch);
188: ch = getch();
189: } while(chtype[ch] & LETTER);
190: addbuf('\0');
191: if( !strcmp(obufp, synonyms[PCB_SYN],
192: strlen(synonyms[PCB_SYN]) )) {
193: buffptr = obufp;
194: addbuf('p');
195: } else if( !strncmp(obufp, synonyms[EVENT_SYN],
196: strlen(synonyms[EVENT_SYN]))) {
197: buffptr = obufp;
198: addbuf('e');
199: } else {
200: fprintf(stderr, "Unknown synonym %s\n", obufp);
201: Exit(-1);
202: }
203: if(ch == '.') {
204: addbuf('-'); addbuf('>');
205: } else {
206: /* needs to be checked for nesting */
207: goto check;
208: }
209: }
210: /* end of attribute handling */
211: goto skip;
212: } else if(ch == '\\') {
213: /* possible escape - this is kludgy beyond belief:
214: * \ is used to escape open and closing delimiters
215: * and '$'
216: * otherwise it's passed through to be compiled by C
217: */
218: ch = getch();
219: if( (ch != o ) && (ch != c) && (ch != '$') ) {
220: /* may need to handle case where \ is last char in file... */
221: /* don't treat is as escape; not open or close so
222: * don't have to worry about nesting either
223: */
224: addbuf('\\');
225: }
226: }
227: addbuf(ch);
228: skip:
229: ch = getch();
230: check:
231: if( ch == o ) nested += allow_nesting;
232: else if( ch == c ) nested--;
233: if ( (buffptr - buffer[currentbuf]) > LINELEN) {
234: fprintf(stderr,
235: "%s too long.\n", (o=='{')?"Action":"Predicate"); /*}*/
236: fprintf(stderr,
237: "buffptr, currentbuf 0x%x, 0x%x\n",buffptr,currentbuf );
238: Exit(-1);
239: }
240: IFDEBUG(S)
241: fprintf(stdout,"loop in getstr: ch 0x%x,%c o=%c,c=%c nested=%d\n",
242: ch,ch,o,c,nested);
243: ENDDEBUG
244: }
245: addbuf(ch);
246: addbuf('\0');
247:
248: IFDEBUG(S)
249: fprintf(stdout,"exit getstr: got %s\n", buffer[currentbuf]);
250: fprintf(stdout,"exit getstr: buffptr 0x%x, currentbuf 0x%x\n",
251: buffptr, currentbuf);
252: ENDDEBUG
253: }
254:
255: getch()
256: {
257: char c;
258: extern FILE *infile;
259: extern int lineno;
260:
261: c = fgetc(infile) ;
262: if (c == '\n') lineno++;
263: if ((int)c == EOF) c = (char)0;
264: if (feof(infile)) c = (char) 0;
265: IFDEBUG(e)
266: fprintf(stdout, "getch: 0x%x\n", c);
267: (void) fputc( c, stdout);
268: fflush(stdout);
269: ENDDEBUG
270:
271: return c;
272: }
273:
274: llscan(t)
275: LLtoken *t;
276: {
277: char c;
278:
279: t->llstate = NORMAL;
280:
281: ++currentbuf;
282: currentbuf&=1;
283: again:
284: buffptr = &buffer[currentbuf][0];
285:
286: skip();
287:
288: switch(chtype[ch]) {
289:
290: case EOFILE:
291: t->llterm = T_ENDMARKER;
292: break;
293:
294: case UNUSED:
295: fprintf(stderr, "Illegal character in input - 0x%x ignored.", ch);
296: ch = getch();
297: goto again;
298:
299: case OPCHAR:
300:
301: switch(ch) {
302:
303: case '/':
304: /* possible comment : elide ; kludge */
305: IFDEBUG(S)
306: fprintf(stdout, "Comment ch=%c\n", ch);
307: ENDDEBUG
308: c = getch();
309: if (c != '*') {
310: fprintf(stderr,"Syntax error : character(0x%x) ignored", ch);
311: ch = c;
312: goto again;
313: } else {
314: register int state = 2, whatchar=0;
315: static int dfa[3][3] = {
316: /* done seen-star middle */
317: /* star */ { 0, 1, 1 },
318: /* / */ { 0, 0, 2 },
319: /* other */ { 0, 2, 2 }
320: };
321:
322: while( state ) {
323: if( (c = getch()) == (char)0)
324: break;
325: whatchar = (c=='*')?0:(c=='/'?1:2);
326: IFDEBUG(S)
327: fprintf(stdout,
328: "comment: whatchar = %d, c = 0x%x,%c, oldstate=%d",
329: whatchar, c,c, state);
330: ENDDEBUG
331: state = dfa[whatchar][state];
332: IFDEBUG(S)
333: fprintf(stdout, ", newstate=%d\n", state);
334: ENDDEBUG
335: }
336: if(state) {
337: fprintf(stderr,
338: "Syntax error: end of file inside a comment");
339: Exit(-1);
340: } else ch = getch();
341: }
342: IFDEBUG(S)
343: fprintf(stdout, "end of comment at 0x%x,%c\n",ch,ch);
344: ENDDEBUG
345: goto again;
346:
347:
348: case '*':
349: t->llterm = T_STAR;
350: break;
351:
352: case ',':
353: t->llterm = T_COMMA;
354: break;
355:
356: case ';':
357: t->llterm = T_SEMI;
358: break;
359:
360: case '<':
361: t->llterm = T_LANGLE;
362: break;
363:
364: case '=':
365: t->llterm = T_EQUAL;
366: break;
367:
368: case '[':
369: t->llterm = T_LBRACK;
370: break;
371:
372: case ']':
373: t->llterm = T_RBRACK;
374: break;
375:
376: #ifdef T_FSTRING
377: case '"':
378: t->llterm = T_FSTRING;
379: addbuf(ch);
380: ch = getch();
381: getstr('"', '"');
382: TVAL.FSTRING.address = stash(buffer[currentbuf]);
383: break;
384: #endif T_FSTRING
385:
386: case '(':
387: t->llterm = T_PREDICATE;
388: getstr(ch, ')' );
389: TVAL.PREDICATE.address = buffer[currentbuf];
390: break;
391:
392: case '{':
393: t->llterm = T_ACTION;
394: getstr(ch, '}');
395: TVAL.ACTION.address = buffer[currentbuf];
396: break;
397:
398: default:
399: fprintf(stderr,"Syntax error : character(0x%x) ignored", ch);
400: ch = getch();
401: goto again;
402:
403: }
404: ch = getch();
405: break;
406:
407: case LETTER:
408: do {
409: addbuf(ch);
410: ch = getch();
411: } while(chtype[ch] & (LETTER | DIGITS));
412:
413: addbuf('\0');
414:
415: t->llterm = iskey(buffer[currentbuf], &TVAL.ID.address);
416: if(!t->llterm) {
417: t->llterm = T_ID;
418: TVAL.ID.address = buffer[currentbuf];
419: }
420: IFDEBUG(S)
421: fprintf(stdout, "llscan: id or keyword 0x%x, %s\n",
422: TVAL.ID.address, TVAL.ID.address);
423: ENDDEBUG
424: break;
425:
426: default:
427: fprintf(stderr, "Snark in llscan: chtype=0x%x, ch=0x%x\n",
428: chtype[ch], ch);
429: }
430: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.