|
|
1.1 root 1: /* Copyright (c) 1982 Regents of the University of California */
2:
3: static char sccsid[] = "@(#)scanner.c 1.8 8/5/83";
4:
5: /*
6: * Debugger scanner.
7: */
8:
9: #include "defs.h"
10: #include "scanner.h"
11: #include "main.h"
12: #include "keywords.h"
13: #include "tree.h"
14: #include "symbols.h"
15: #include "names.h"
16: #include "y.tab.h"
17:
18: #ifndef public
19: typedef int Token;
20: #endif
21:
22: public String initfile = ".dbxinit";
23:
24: typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
25:
26: private Charclass class[256 + 1];
27: private Charclass *lexclass = class + 1;
28:
29: #define isdigit(c) (lexclass[c] == NUM)
30: #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
31: #define ishexdigit(c) ( \
32: isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
33: )
34:
35: #define MAXLINESIZE 1024
36:
37: private File in;
38: private Char linebuf[MAXLINESIZE];
39: private Char *curchar;
40:
41: #define MAXINCLDEPTH 10
42:
43: private struct {
44: File savefile;
45: Filename savefn;
46: int savelineno;
47: } inclinfo[MAXINCLDEPTH];
48:
49: private unsigned int curinclindex;
50:
51: private Token getident();
52: private Token getnum();
53: private Token getstring();
54: private Boolean eofinput();
55: private Char charcon();
56: private Char charlookup();
57:
58: private enterlexclass(class, s)
59: Charclass class;
60: String s;
61: {
62: register char *p;
63:
64: for (p = s; *p != '\0'; p++) {
65: lexclass[*p] = class;
66: }
67: }
68:
69: public scanner_init()
70: {
71: register Integer i;
72:
73: for (i = 0; i < 257; i++) {
74: class[i] = OTHER;
75: }
76: enterlexclass(WHITE, " \t");
77: enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
78: enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
79: enterlexclass(NUM, "0123456789");
80: in = stdin;
81: errfilename = nil;
82: errlineno = 0;
83: curchar = linebuf;
84: linebuf[0] = '\0';
85: }
86:
87: /*
88: * Read a single token.
89: *
90: * Input is line buffered.
91: *
92: * There are two "modes" of operation: one as in a compiler,
93: * and one for reading shell-like syntax.
94: */
95:
96: private Boolean shellmode;
97:
98: public Token yylex()
99: {
100: register int c;
101: register char *p;
102: register Token t;
103: String line;
104:
105: p = curchar;
106: if (*p == '\0') {
107: do {
108: if (isterm(in)) {
109: printf("(%s) ", cmdname);
110: fflush(stdout);
111: }
112: line = fgets(linebuf, MAXLINESIZE, in);
113: } while (line == nil and not eofinput());
114: if (line == nil) {
115: c = EOF;
116: } else {
117: p = linebuf;
118: while (lexclass[*p] == WHITE) {
119: p++;
120: }
121: shellmode = false;
122: }
123: } else {
124: while (lexclass[*p] == WHITE) {
125: p++;
126: }
127: }
128: curchar = p;
129: c = *p;
130: if (lexclass[c] == ALPHA) {
131: t = getident();
132: } else if (lexclass[c] == NUM) {
133: if (shellmode) {
134: t = getident();
135: } else {
136: t = getnum();
137: }
138: } else {
139: ++curchar;
140: switch (c) {
141: case '\n':
142: t = '\n';
143: if (errlineno != 0) {
144: errlineno++;
145: }
146: break;
147:
148: case '"':
149: case '\'':
150: t = getstring();
151: break;
152:
153: case '.':
154: if (shellmode) {
155: --curchar;
156: t = getident();
157: } else if (isdigit(*curchar)) {
158: --curchar;
159: t = getnum();
160: } else {
161: t = '.';
162: }
163: break;
164:
165: case '<':
166: if (not shellmode and *curchar == '<') {
167: ++curchar;
168: t = LFORMER;
169: } else {
170: t = '<';
171: }
172: break;
173:
174: case '>':
175: if (not shellmode and *curchar == '>') {
176: ++curchar;
177: t = RFORMER;
178: } else {
179: t = '>';
180: }
181: break;
182:
183: case '#':
184: if (*curchar == '^') {
185: ++curchar;
186: t = ABSTRACTION;
187: } else {
188: t = '#';
189: }
190: break;
191:
192: case '-':
193: if (shellmode) {
194: --curchar;
195: t = getident();
196: } else if (*curchar == '>') {
197: ++curchar;
198: t = ARROW;
199: } else {
200: t = '-';
201: }
202: break;
203:
204: case EOF:
205: t = 0;
206: break;
207:
208: default:
209: if (shellmode and index("!&*()[]", c) == nil) {
210: --curchar;
211: t = getident();
212: } else {
213: t = c;
214: }
215: break;
216: }
217: }
218: # ifdef LEXDEBUG
219: if (lexdebug) {
220: fprintf(stderr, "yylex returns ");
221: print_token(stderr, t);
222: fprintf(stderr, "\n");
223: }
224: # endif
225: return t;
226: }
227:
228: /*
229: * Parser error handling.
230: */
231:
232: public yyerror(s)
233: String s;
234: {
235: register Char *p, *tokenbegin, *tokenend;
236: register Integer len;
237:
238: if (streq(s, "syntax error")) {
239: beginerrmsg();
240: tokenend = curchar - 1;
241: tokenbegin = tokenend;
242: while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) {
243: --tokenbegin;
244: }
245: len = tokenend - tokenbegin + 1;
246: p = tokenbegin;
247: if (p > &linebuf[0]) {
248: while (lexclass[*p] == WHITE and p > &linebuf[0]) {
249: --p;
250: }
251: }
252: if (p == &linebuf[0]) {
253: fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin);
254: } else {
255: fprintf(stderr, "syntax error");
256: if (len != 0) {
257: fprintf(stderr, " on \"%.*s\"", len, tokenbegin);
258: }
259: }
260: enderrmsg();
261: } else {
262: error(s);
263: }
264: }
265:
266: /*
267: * Eat the current line.
268: */
269:
270: public gobble()
271: {
272: curchar = linebuf;
273: linebuf[0] = '\0';
274: }
275:
276: /*
277: * Scan an identifier and check to see if it's a keyword.
278: */
279:
280: private Token getident()
281: {
282: char buf[256];
283: register Char *p, *q;
284: register Token t;
285:
286: p = curchar;
287: q = buf;
288: if (shellmode) {
289: do {
290: *q++ = *p++;
291: } while (index(" \t\n!&<>*[]()", *p) == nil);
292: } else {
293: do {
294: *q++ = *p++;
295: } while (isalnum(*p));
296: }
297: curchar = p;
298: *q = '\0';
299: yylval.y_name = identname(buf, false);
300: if (not shellmode) {
301: t = findkeyword(yylval.y_name);
302: if (t == nil) {
303: t = NAME;
304: }
305: } else {
306: t = NAME;
307: }
308: return t;
309: }
310:
311: /*
312: * Scan a number.
313: */
314:
315: private Token getnum()
316: {
317: char buf[256];
318: register Char *p, *q;
319: register Token t;
320: Integer base;
321:
322: p = curchar;
323: q = buf;
324: if (*p == '0') {
325: if (*(p+1) == 'x') {
326: p += 2;
327: base = 16;
328: } else {
329: base = 8;
330: }
331: } else {
332: base = 10;
333: }
334: if (base == 16) {
335: do {
336: *q++ = *p++;
337: } while (ishexdigit(*p));
338: } else {
339: do {
340: *q++ = *p++;
341: } while (isdigit(*p));
342: }
343: if (*p == '.') {
344: do {
345: *q++ = *p++;
346: } while (isdigit(*p));
347: if (*p == 'e' or *p == 'E') {
348: p++;
349: if (*p == '+' or *p == '-' or isdigit(*p)) {
350: *q++ = 'e';
351: do {
352: *q++ = *p++;
353: } while (isdigit(*p));
354: }
355: }
356: *q = '\0';
357: yylval.y_real = atof(buf);
358: t = REAL;
359: } else {
360: *q = '\0';
361: switch (base) {
362: case 10:
363: yylval.y_int = atol(buf);
364: break;
365:
366: case 8:
367: yylval.y_int = octal(buf);
368: break;
369:
370: case 16:
371: yylval.y_int = hex(buf);
372: break;
373:
374: default:
375: badcaseval(base);
376: }
377: t = INT;
378: }
379: curchar = p;
380: return t;
381: }
382:
383: /*
384: * Convert a string of octal digits to an integer.
385: */
386:
387: private int octal(s)
388: String s;
389: {
390: register Char *p;
391: register Integer n;
392:
393: n = 0;
394: for (p = s; *p != '\0'; p++) {
395: n = 8*n + (*p - '0');
396: }
397: return n;
398: }
399:
400: /*
401: * Convert a string of hexadecimal digits to an integer.
402: */
403:
404: private int hex(s)
405: String s;
406: {
407: register Char *p;
408: register Integer n;
409:
410: n = 0;
411: for (p = s; *p != '\0'; p++) {
412: n *= 16;
413: if (*p >= 'a' and *p <= 'f') {
414: n += (*p - 'a' + 10);
415: } else if (*p >= 'A' and *p <= 'F') {
416: n += (*p - 'A' + 10);
417: } else {
418: n += (*p - '0');
419: }
420: }
421: return n;
422: }
423:
424: /*
425: * Scan a string.
426: */
427:
428: private Token getstring()
429: {
430: char buf[256];
431: register Char *p, *q;
432: Boolean endofstring;
433:
434: p = curchar;
435: q = buf;
436: endofstring = false;
437: while (not endofstring) {
438: if (*p == '\n' or *p == '\0') {
439: error("non-terminated string");
440: endofstring = true;
441: } else if (*p == '"' or *p == '\'') {
442: if (*(p+1) != *p) {
443: endofstring = true;
444: } else {
445: *q++ = *p;
446: }
447: } else {
448: curchar = p;
449: *q++ = charcon(p);
450: p = curchar;
451: }
452: p++;
453: }
454: curchar = p;
455: *q = '\0';
456: yylval.y_string = strdup(buf);
457: return STRING;
458: }
459:
460: /*
461: * Process a character constant.
462: * Watch out for backslashes.
463: */
464:
465: private Char charcon(p)
466: char *p;
467: {
468: char c, buf[10], *q;
469:
470: if (*p == '\\') {
471: ++p;
472: if (*p != '\\') {
473: q = buf;
474: do {
475: *q++ = *p++;
476: } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0');
477: *q = '\0';
478: if (isdigit(buf[0])) {
479: c = (Char) octal(buf);
480: } else {
481: c = charlookup(buf);
482: }
483: curchar = p - 1;
484: } else {
485: c = '\\';
486: }
487: } else {
488: c = *p;
489: }
490: return c;
491: }
492:
493: /*
494: * Do a lookup for a ASCII character name.
495: */
496:
497: private String ascii[] = {
498: "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
499: "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI",
500: "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
501: "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
502: "SP", nil
503: };
504:
505: private char charlookup(s)
506: String s;
507: {
508: register int i;
509:
510: for (i = 0; ascii[i] != NULL; i++) {
511: if (streq(s, ascii[i])) {
512: return i;
513: }
514: }
515: if (streq(s, "DEL")) {
516: return 0177;
517: }
518: error("unknown ascii name \"%s\"", s);
519: return '?';
520: }
521:
522: /*
523: * Input file management routines.
524: */
525:
526: public setinput(filename)
527: Filename filename;
528: {
529: File f;
530:
531: f = fopen(filename, "r");
532: if (f == nil) {
533: error("can't open %s", filename);
534: } else {
535: if (curinclindex >= MAXINCLDEPTH) {
536: error("unreasonable input nesting on \"%s\"", filename);
537: }
538: inclinfo[curinclindex].savefile = in;
539: inclinfo[curinclindex].savefn = errfilename;
540: inclinfo[curinclindex].savelineno = errlineno;
541: curinclindex++;
542: in = f;
543: errfilename = filename;
544: errlineno = 1;
545: }
546: }
547:
548: private Boolean eofinput()
549: {
550: register Boolean b;
551:
552: if (curinclindex == 0) {
553: if (isterm(in)) {
554: putchar('\n');
555: clearerr(in);
556: b = false;
557: } else {
558: b = true;
559: }
560: } else {
561: fclose(in);
562: --curinclindex;
563: in = inclinfo[curinclindex].savefile;
564: errfilename = inclinfo[curinclindex].savefn;
565: errlineno = inclinfo[curinclindex].savelineno;
566: b = false;
567: }
568: return b;
569: }
570:
571: /*
572: * Pop the current input. Return whether successful.
573: */
574:
575: public Boolean popinput()
576: {
577: Boolean b;
578:
579: if (curinclindex == 0) {
580: b = false;
581: } else {
582: b = (Boolean) (not eofinput());
583: }
584: return b;
585: }
586:
587: /*
588: * Return whether we are currently reading from standard input.
589: */
590:
591: public Boolean isstdin()
592: {
593: return (Boolean) (in == stdin);
594: }
595:
596: /*
597: * Send the current line to the shell.
598: */
599:
600: public shellline()
601: {
602: register char *p;
603:
604: p = curchar;
605: while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
606: ++p;
607: }
608: shell(p);
609: if (*p == '\0' and isterm(in)) {
610: putchar('\n');
611: }
612: erecover();
613: }
614:
615: /*
616: * Read the rest of the current line in "shell mode".
617: */
618:
619: public beginshellmode()
620: {
621: shellmode = true;
622: }
623:
624: /*
625: * Print out a token for debugging.
626: */
627:
628: public print_token(f, t)
629: File f;
630: Token t;
631: {
632: if (t == '\n') {
633: fprintf(f, "char '\\n'");
634: } else if (t == EOF) {
635: fprintf(f, "EOF");
636: } else if (t < 256) {
637: fprintf(f, "char '%c'", t);
638: } else {
639: fprintf(f, "\"%s\"", keywdstring(t));
640: }
641: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.