|
|
1.1 root 1: /*
2: * Copyright (c) 1983 Regents of the University of California.
3: * All rights reserved. The Berkeley software License Agreement
4: * specifies the terms and conditions for redistribution.
5: */
6:
7: #ifndef lint
8: static char sccsid[] = "@(#)scanner.c 5.1 (Berkeley) 5/31/85";
9: #endif not lint
10:
11: static char rcsid[] = "$Header: scanner.c,v 1.5 84/12/26 10:42:05 linton Exp $";
12:
13: /*
14: * Debugger scanner.
15: */
16:
17: #include "defs.h"
18: #include "scanner.h"
19: #include "main.h"
20: #include "keywords.h"
21: #include "tree.h"
22: #include "symbols.h"
23: #include "names.h"
24: #include "y.tab.h"
25:
26: #ifndef public
27: typedef int Token;
28:
29: #define MAXLINESIZE 10240
30:
31: #endif
32:
33: public String initfile = ".dbxinit";
34:
35: typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
36:
37: private Charclass class[256 + 1];
38: private Charclass *lexclass = class + 1;
39:
40: #define isdigit(c) (lexclass[c] == NUM)
41: #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
42: #define ishexdigit(c) ( \
43: isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
44: )
45:
46: public boolean chkalias;
47: public char scanner_linebuf[MAXLINESIZE];
48:
49: private File in;
50: private char *curchar, *prevchar;
51:
52: #define MAXINCLDEPTH 10
53:
54: private struct {
55: File savefile;
56: Filename savefn;
57: int savelineno;
58: } inclinfo[MAXINCLDEPTH];
59:
60: private unsigned int curinclindex;
61:
62: private Token getident();
63: private Token getnum();
64: private Token getstring();
65: private Boolean eofinput();
66: private char charcon();
67:
68: private enterlexclass(class, s)
69: Charclass class;
70: String s;
71: {
72: register char *p;
73:
74: for (p = s; *p != '\0'; p++) {
75: lexclass[*p] = class;
76: }
77: }
78:
79: public scanner_init()
80: {
81: register Integer i;
82:
83: for (i = 0; i < 257; i++) {
84: class[i] = OTHER;
85: }
86: enterlexclass(WHITE, " \t");
87: enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
88: enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
89: enterlexclass(NUM, "0123456789");
90: in = stdin;
91: errfilename = nil;
92: errlineno = 0;
93: curchar = scanner_linebuf;
94: scanner_linebuf[0] = '\0';
95: chkalias = true;
96: }
97:
98: /*
99: * Read a single token.
100: *
101: * The input is line buffered. Tokens cannot cross line boundaries.
102: *
103: * There are two "modes" of operation: one as in a compiler,
104: * and one for reading shell-like syntax. In the first mode
105: * there is the additional choice of doing alias processing.
106: */
107:
108: private Boolean shellmode;
109:
110: public Token yylex()
111: {
112: register int c;
113: register char *p;
114: register Token t;
115: String line;
116: integer n;
117:
118: p = curchar;
119: if (*p == '\0') {
120: do {
121: if (isterm(in)) {
122: printf("(%s) ", cmdname);
123: }
124: fflush(stdout);
125: line = fgets(scanner_linebuf, MAXLINESIZE, in);
126: } while (line == nil and not eofinput());
127: if (line == nil) {
128: c = EOF;
129: } else {
130: p = scanner_linebuf;
131: while (lexclass[*p] == WHITE) {
132: p++;
133: }
134: shellmode = false;
135: }
136: chkalias = true;
137: } else {
138: while (lexclass[*p] == WHITE) {
139: p++;
140: }
141: }
142: curchar = p;
143: prevchar = curchar;
144: c = *p;
145: if (lexclass[c] == ALPHA) {
146: t = getident(chkalias);
147: } else if (lexclass[c] == NUM) {
148: if (shellmode) {
149: t = getident(chkalias);
150: } else {
151: t = getnum();
152: }
153: } else {
154: ++curchar;
155: switch (c) {
156: case '\n':
157: t = '\n';
158: if (errlineno != 0) {
159: errlineno++;
160: }
161: break;
162:
163: case '"':
164: case '\'':
165: t = getstring(c);
166: break;
167:
168: case '.':
169: if (shellmode) {
170: --curchar;
171: t = getident(chkalias);
172: } else if (isdigit(*curchar)) {
173: --curchar;
174: t = getnum();
175: } else {
176: t = '.';
177: }
178: break;
179:
180: case '-':
181: if (shellmode) {
182: --curchar;
183: t = getident(chkalias);
184: } else if (*curchar == '>') {
185: ++curchar;
186: t = ARROW;
187: } else {
188: t = '-';
189: }
190: break;
191:
192: case '#':
193: if (not isterm(in)) {
194: *p = '\0';
195: curchar = p;
196: t = '\n';
197: ++errlineno;
198: } else {
199: t = '#';
200: }
201: break;
202:
203: case '\\':
204: if (*(p+1) == '\n') {
205: n = MAXLINESIZE - (p - &scanner_linebuf[0]);
206: if (n > 1) {
207: if (fgets(p, n, in) == nil) {
208: t = 0;
209: } else {
210: curchar = p;
211: t = yylex();
212: }
213: } else {
214: t = '\\';
215: }
216: } else {
217: t = '\\';
218: }
219: break;
220:
221: case EOF:
222: t = 0;
223: break;
224:
225: default:
226: if (shellmode and index("!&*<>()[]", c) == nil) {
227: --curchar;
228: t = getident(chkalias);
229: } else {
230: t = c;
231: }
232: break;
233: }
234: }
235: chkalias = false;
236: # ifdef LEXDEBUG
237: if (lexdebug) {
238: fprintf(stderr, "yylex returns ");
239: print_token(stderr, t);
240: fprintf(stderr, "\n");
241: }
242: # endif
243: return t;
244: }
245:
246: /*
247: * Put the given string before the current character
248: * in the current line, thus inserting it into the input stream.
249: */
250:
251: public insertinput (s)
252: String s;
253: {
254: register char *p, *q;
255: int need, avail, shift;
256:
257: q = s;
258: need = strlen(q);
259: avail = curchar - &scanner_linebuf[0];
260: if (need <= avail) {
261: curchar = &scanner_linebuf[avail - need];
262: p = curchar;
263: while (*q != '\0') {
264: *p++ = *q++;
265: }
266: } else {
267: p = curchar;
268: while (*p != '\0') {
269: ++p;
270: }
271: shift = need - avail;
272: if (p + shift >= &scanner_linebuf[MAXLINESIZE]) {
273: error("alias expansion too large");
274: }
275: for (;;) {
276: *(p + shift) = *p;
277: if (p == curchar) {
278: break;
279: }
280: --p;
281: }
282: p = &scanner_linebuf[0];
283: while (*q != '\0') {
284: *p++ = *q++;
285: }
286: curchar = &scanner_linebuf[0];
287: }
288: }
289:
290: /*
291: * Get the actuals for a macro call.
292: */
293:
294: private String movetochar (str, c)
295: String str;
296: char c;
297: {
298: register char *p;
299:
300: while (*p != c) {
301: if (*p == '\0') {
302: error("missing ')' in macro call");
303: } else if (*p == ')') {
304: error("not enough parameters in macro call");
305: } else if (*p == ',') {
306: error("too many parameters in macro call");
307: }
308: ++p;
309: }
310: return p;
311: }
312:
313: private String *getactuals (n)
314: integer n;
315: {
316: String *a;
317: register char *p;
318: int i;
319:
320: a = newarr(String, n);
321: p = curchar;
322: while (*p != '(') {
323: if (lexclass[*p] != WHITE) {
324: error("missing actuals for macro");
325: }
326: ++p;
327: }
328: ++p;
329: for (i = 0; i < n - 1; i++) {
330: a[i] = p;
331: p = movetochar(p, ',');
332: *p = '\0';
333: ++p;
334: }
335: a[n-1] = p;
336: p = movetochar(p, ')');
337: *p = '\0';
338: curchar = p + 1;
339: return a;
340: }
341:
342: /*
343: * Do command macro expansion, assuming curchar points to the beginning
344: * of the actuals, and we are not in shell mode.
345: */
346:
347: private expand (pl, str)
348: List pl;
349: String str;
350: {
351: char buf[4096], namebuf[100];
352: register char *p, *q, *r;
353: String *actual;
354: Name n;
355: integer i;
356: boolean match;
357:
358: if (pl == nil) {
359: insertinput(str);
360: } else {
361: actual = getactuals(list_size(pl));
362: p = buf;
363: q = str;
364: while (*q != '\0') {
365: if (p >= &buf[4096]) {
366: error("alias expansion too large");
367: }
368: if (lexclass[*q] == ALPHA) {
369: r = namebuf;
370: do {
371: *r++ = *q++;
372: } while (isalnum(*q));
373: *r = '\0';
374: i = 0;
375: match = false;
376: foreach(Name, n, pl)
377: if (streq(ident(n), namebuf)) {
378: match = true;
379: break;
380: }
381: ++i;
382: endfor
383: if (match) {
384: r = actual[i];
385: } else {
386: r = namebuf;
387: }
388: while (*r != '\0') {
389: *p++ = *r++;
390: }
391: } else {
392: *p++ = *q++;
393: }
394: }
395: *p = '\0';
396: insertinput(buf);
397: }
398: }
399:
400: /*
401: * Parser error handling.
402: */
403:
404: public yyerror(s)
405: String s;
406: {
407: register char *p;
408: register integer start;
409:
410: if (streq(s, "syntax error")) {
411: beginerrmsg();
412: p = prevchar;
413: start = p - &scanner_linebuf[0];
414: if (p > &scanner_linebuf[0]) {
415: while (lexclass[*p] == WHITE and p > &scanner_linebuf[0]) {
416: --p;
417: }
418: }
419: fprintf(stderr, "%s", scanner_linebuf);
420: if (start != 0) {
421: fprintf(stderr, "%*c", start, ' ');
422: }
423: if (p == &scanner_linebuf[0]) {
424: fprintf(stderr, "^ unrecognized command");
425: } else {
426: fprintf(stderr, "^ syntax error");
427: }
428: enderrmsg();
429: } else {
430: error(s);
431: }
432: }
433:
434: /*
435: * Eat the current line.
436: */
437:
438: public gobble ()
439: {
440: curchar = scanner_linebuf;
441: scanner_linebuf[0] = '\0';
442: }
443:
444: /*
445: * Scan an identifier.
446: *
447: * If chkalias is true, check first to see if it's an alias.
448: * Otherwise, check to see if it's a keyword.
449: */
450:
451: private Token getident (chkalias)
452: boolean chkalias;
453: {
454: char buf[1024];
455: register char *p, *q;
456: register Token t;
457: List pl;
458: String str;
459:
460: p = curchar;
461: q = buf;
462: if (shellmode) {
463: do {
464: *q++ = *p++;
465: } while (index(" \t\n!&<>*[]()'\"", *p) == nil);
466: } else {
467: do {
468: *q++ = *p++;
469: } while (isalnum(*p));
470: }
471: curchar = p;
472: *q = '\0';
473: yylval.y_name = identname(buf, false);
474: if (chkalias) {
475: if (findalias(yylval.y_name, &pl, &str)) {
476: expand(pl, str);
477: while (lexclass[*curchar] == WHITE) {
478: ++curchar;
479: }
480: if (pl == nil) {
481: t = getident(false);
482: } else {
483: t = getident(true);
484: }
485: } else if (shellmode) {
486: t = NAME;
487: } else {
488: t = findkeyword(yylval.y_name, NAME);
489: }
490: } else if (shellmode) {
491: t = NAME;
492: } else {
493: t = findkeyword(yylval.y_name, NAME);
494: }
495: return t;
496: }
497:
498: /*
499: * Scan a number.
500: */
501:
502: private Token getnum()
503: {
504: char buf[1024];
505: register Char *p, *q;
506: register Token t;
507: Integer base;
508:
509: p = curchar;
510: q = buf;
511: if (*p == '0') {
512: if (*(p+1) == 'x') {
513: p += 2;
514: base = 16;
515: } else if (*(p+1) == 't') {
516: base = 10;
517: } else if (varIsSet("$hexin")) {
518: base = 16;
519: } else {
520: base = 8;
521: }
522: } else if (varIsSet("$hexin")) {
523: base = 16;
524: } else if (varIsSet("$octin")) {
525: base = 8;
526: } else {
527: base = 10;
528: }
529: if (base == 16) {
530: do {
531: *q++ = *p++;
532: } while (ishexdigit(*p));
533: } else {
534: do {
535: *q++ = *p++;
536: } while (isdigit(*p));
537: }
538: if (*p == '.') {
539: do {
540: *q++ = *p++;
541: } while (isdigit(*p));
542: if (*p == 'e' or *p == 'E') {
543: p++;
544: if (*p == '+' or *p == '-' or isdigit(*p)) {
545: *q++ = 'e';
546: do {
547: *q++ = *p++;
548: } while (isdigit(*p));
549: }
550: }
551: *q = '\0';
552: yylval.y_real = atof(buf);
553: t = REAL;
554: } else {
555: *q = '\0';
556: switch (base) {
557: case 10:
558: yylval.y_int = atol(buf);
559: break;
560:
561: case 8:
562: yylval.y_int = octal(buf);
563: break;
564:
565: case 16:
566: yylval.y_int = hex(buf);
567: break;
568:
569: default:
570: badcaseval(base);
571: }
572: t = INT;
573: }
574: curchar = p;
575: return t;
576: }
577:
578: /*
579: * Convert a string of octal digits to an integer.
580: */
581:
582: private int octal(s)
583: String s;
584: {
585: register Char *p;
586: register Integer n;
587:
588: n = 0;
589: for (p = s; *p != '\0'; p++) {
590: n = 8*n + (*p - '0');
591: }
592: return n;
593: }
594:
595: /*
596: * Convert a string of hexadecimal digits to an integer.
597: */
598:
599: private int hex(s)
600: String s;
601: {
602: register Char *p;
603: register Integer n;
604:
605: n = 0;
606: for (p = s; *p != '\0'; p++) {
607: n *= 16;
608: if (*p >= 'a' and *p <= 'f') {
609: n += (*p - 'a' + 10);
610: } else if (*p >= 'A' and *p <= 'F') {
611: n += (*p - 'A' + 10);
612: } else {
613: n += (*p - '0');
614: }
615: }
616: return n;
617: }
618:
619: /*
620: * Scan a string.
621: */
622:
623: private Token getstring (quote)
624: char quote;
625: {
626: register char *p, *q;
627: char buf[MAXLINESIZE];
628: boolean endofstring;
629: Token t;
630:
631: p = curchar;
632: q = buf;
633: endofstring = false;
634: while (not endofstring) {
635: if (*p == '\\' and *(p+1) == '\n') {
636: if (fgets(scanner_linebuf, MAXLINESIZE, in) == nil) {
637: error("non-terminated string");
638: }
639: p = &scanner_linebuf[0] - 1;
640: } else if (*p == '\n' or *p == '\0') {
641: error("non-terminated string");
642: endofstring = true;
643: } else if (*p == quote) {
644: endofstring = true;
645: } else {
646: curchar = p;
647: *q++ = charcon(p);
648: p = curchar;
649: }
650: p++;
651: }
652: curchar = p;
653: *q = '\0';
654: if (quote == '\'' and buf[1] == '\0') {
655: yylval.y_char = buf[0];
656: t = CHAR;
657: } else {
658: yylval.y_string = strdup(buf);
659: t = STRING;
660: }
661: return t;
662: }
663:
664: /*
665: * Process a character constant.
666: * Watch out for backslashes.
667: */
668:
669: private char charcon (s)
670: String s;
671: {
672: register char *p, *q;
673: char c, buf[10];
674:
675: p = s;
676: if (*p == '\\') {
677: ++p;
678: switch (*p) {
679: case '\\':
680: c = '\\';
681: break;
682:
683: case 'n':
684: c = '\n';
685: break;
686:
687: case 'r':
688: c = '\r';
689: break;
690:
691: case 't':
692: c = '\t';
693: break;
694:
695: case '\'':
696: case '"':
697: c = *p;
698: break;
699:
700: default:
701: if (isdigit(*p)) {
702: q = buf;
703: do {
704: *q++ = *p++;
705: } while (isdigit(*p));
706: *q = '\0';
707: c = (char) octal(buf);
708: }
709: --p;
710: break;
711: }
712: curchar = p;
713: } else {
714: c = *p;
715: }
716: return c;
717: }
718:
719: /*
720: * Input file management routines.
721: */
722:
723: public setinput(filename)
724: Filename filename;
725: {
726: File f;
727:
728: f = fopen(filename, "r");
729: if (f == nil) {
730: error("can't open %s", filename);
731: } else {
732: if (curinclindex >= MAXINCLDEPTH) {
733: error("unreasonable input nesting on \"%s\"", filename);
734: }
735: inclinfo[curinclindex].savefile = in;
736: inclinfo[curinclindex].savefn = errfilename;
737: inclinfo[curinclindex].savelineno = errlineno;
738: curinclindex++;
739: in = f;
740: errfilename = filename;
741: errlineno = 1;
742: }
743: }
744:
745: private Boolean eofinput()
746: {
747: register Boolean b;
748:
749: if (curinclindex == 0) {
750: if (isterm(in)) {
751: putchar('\n');
752: clearerr(in);
753: b = false;
754: } else {
755: b = true;
756: }
757: } else {
758: fclose(in);
759: --curinclindex;
760: in = inclinfo[curinclindex].savefile;
761: errfilename = inclinfo[curinclindex].savefn;
762: errlineno = inclinfo[curinclindex].savelineno;
763: b = false;
764: }
765: return b;
766: }
767:
768: /*
769: * Pop the current input. Return whether successful.
770: */
771:
772: public Boolean popinput()
773: {
774: Boolean b;
775:
776: if (curinclindex == 0) {
777: b = false;
778: } else {
779: b = (Boolean) (not eofinput());
780: }
781: return b;
782: }
783:
784: /*
785: * Return whether we are currently reading from standard input.
786: */
787:
788: public Boolean isstdin()
789: {
790: return (Boolean) (in == stdin);
791: }
792:
793: /*
794: * Send the current line to the shell.
795: */
796:
797: public shellline()
798: {
799: register char *p;
800:
801: p = curchar;
802: while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
803: ++p;
804: }
805: shell(p);
806: if (*p == '\0' and isterm(in)) {
807: putchar('\n');
808: }
809: erecover();
810: }
811:
812: /*
813: * Read the rest of the current line in "shell mode".
814: */
815:
816: public beginshellmode()
817: {
818: shellmode = true;
819: }
820:
821: /*
822: * Print out a token for debugging.
823: */
824:
825: public print_token(f, t)
826: File f;
827: Token t;
828: {
829: if (t == '\n') {
830: fprintf(f, "char '\\n'");
831: } else if (t == EOF) {
832: fprintf(f, "EOF");
833: } else if (t < 256) {
834: fprintf(f, "char '%c'", t);
835: } else {
836: fprintf(f, "\"%s\"", keywdstring(t));
837: }
838: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.