|
|
1.1 root 1: /*
2: * Copyright (c) 1983 The Regents of the University of California.
3: * All rights reserved.
4: *
5: * Redistribution and use in source and binary forms are permitted
6: * provided that: (1) source distributions retain this entire copyright
7: * notice and comment, and (2) distributions including binaries display
8: * the following acknowledgement: ``This product includes software
9: * developed by the University of California, Berkeley and its contributors''
10: * in the documentation or other materials provided with the distribution
11: * and in all advertising materials mentioning features or use of this
12: * software. Neither the name of the University nor the names of its
13: * contributors may be used to endorse or promote products derived
14: * from this software without specific prior written permission.
15: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
16: * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
17: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
18: */
19:
20: #ifndef lint
21: static char sccsid[] = "@(#)scanner.c 5.3 (Berkeley) 6/1/90";
22: #endif /* not lint */
23:
24: /*
25: * Debugger scanner.
26: */
27:
28: #include "defs.h"
29: #include "scanner.h"
30: #include "main.h"
31: #include "keywords.h"
32: #include "tree.h"
33: #include "symbols.h"
34: #include "names.h"
35: #include "y.tab.h"
36:
37: #ifndef public
38: typedef int Token;
39:
40: #define MAXLINESIZE 10240
41:
42: #endif
43:
44: public String initfile = ".dbxinit";
45:
46: typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
47:
48: private Charclass class[256 + 1];
49: private Charclass *lexclass = class + 1;
50:
51: #define isdigit(c) (lexclass[c] == NUM)
52: #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
53: #define ishexdigit(c) ( \
54: isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
55: )
56:
57: public boolean chkalias;
58: public char scanner_linebuf[MAXLINESIZE];
59:
60: private File in;
61: private char *curchar, *prevchar;
62:
63: #define MAXINCLDEPTH 10
64:
65: private struct {
66: File savefile;
67: Filename savefn;
68: int savelineno;
69: } inclinfo[MAXINCLDEPTH];
70:
71: private unsigned int curinclindex;
72:
73: private Token getident();
74: private Token getnum();
75: private Token getstring();
76: private Boolean eofinput();
77: private char charcon();
78:
79: private enterlexclass(class, s)
80: Charclass class;
81: String s;
82: {
83: register char *p;
84:
85: for (p = s; *p != '\0'; p++) {
86: lexclass[*p] = class;
87: }
88: }
89:
90: public scanner_init()
91: {
92: register Integer i;
93:
94: for (i = 0; i < 257; i++) {
95: class[i] = OTHER;
96: }
97: enterlexclass(WHITE, " \t");
98: enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
99: enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
100: enterlexclass(NUM, "0123456789");
101: in = stdin;
102: errfilename = nil;
103: errlineno = 0;
104: curchar = scanner_linebuf;
105: scanner_linebuf[0] = '\0';
106: chkalias = true;
107: }
108:
109: /*
110: * Read a single token.
111: *
112: * The input is line buffered. Tokens cannot cross line boundaries.
113: *
114: * There are two "modes" of operation: one as in a compiler,
115: * and one for reading shell-like syntax. In the first mode
116: * there is the additional choice of doing alias processing.
117: */
118:
119: private Boolean shellmode;
120:
121: public Token yylex()
122: {
123: register int c;
124: register char *p;
125: register Token t;
126: String line;
127: integer n;
128:
129: p = curchar;
130: if (*p == '\0') {
131: do {
132: if (isterm(in)) {
133: printf("(%s) ", cmdname);
134: }
135: fflush(stdout);
136: line = fgets(scanner_linebuf, MAXLINESIZE, in);
137: } while (line == nil and not eofinput());
138: if (line == nil) {
139: c = EOF;
140: } else {
141: p = scanner_linebuf;
142: while (lexclass[*p] == WHITE) {
143: p++;
144: }
145: shellmode = false;
146: }
147: chkalias = true;
148: } else {
149: while (lexclass[*p] == WHITE) {
150: p++;
151: }
152: }
153: curchar = p;
154: prevchar = curchar;
155: c = *p;
156: if (lexclass[c] == ALPHA) {
157: t = getident(chkalias);
158: } else if (lexclass[c] == NUM) {
159: if (shellmode) {
160: t = getident(chkalias);
161: } else {
162: t = getnum();
163: }
164: } else {
165: ++curchar;
166: switch (c) {
167: case '\n':
168: t = '\n';
169: if (errlineno != 0) {
170: errlineno++;
171: }
172: break;
173:
174: case '"':
175: case '\'':
176: t = getstring(c);
177: break;
178:
179: case '.':
180: if (shellmode) {
181: --curchar;
182: t = getident(chkalias);
183: } else if (isdigit(*curchar)) {
184: --curchar;
185: t = getnum();
186: } else {
187: t = '.';
188: }
189: break;
190:
191: case '-':
192: if (shellmode) {
193: --curchar;
194: t = getident(chkalias);
195: } else if (*curchar == '>') {
196: ++curchar;
197: t = ARROW;
198: } else {
199: t = '-';
200: }
201: break;
202:
203: case '#':
204: if (not isterm(in)) {
205: *p = '\0';
206: curchar = p;
207: t = '\n';
208: ++errlineno;
209: } else {
210: t = '#';
211: }
212: break;
213:
214: case '\\':
215: if (*(p+1) == '\n') {
216: n = MAXLINESIZE - (p - &scanner_linebuf[0]);
217: if (n > 1) {
218: if (fgets(p, n, in) == nil) {
219: t = 0;
220: } else {
221: curchar = p;
222: t = yylex();
223: }
224: } else {
225: t = '\\';
226: }
227: } else {
228: t = '\\';
229: }
230: break;
231:
232: case EOF:
233: t = 0;
234: break;
235:
236: default:
237: if (shellmode and index("!&*<>()[]", c) == nil) {
238: --curchar;
239: t = getident(chkalias);
240: } else {
241: t = c;
242: }
243: break;
244: }
245: }
246: chkalias = false;
247: # ifdef LEXDEBUG
248: if (lexdebug) {
249: fprintf(stderr, "yylex returns ");
250: print_token(stderr, t);
251: fprintf(stderr, "\n");
252: }
253: # endif
254: return t;
255: }
256:
257: /*
258: * Put the given string before the current character
259: * in the current line, thus inserting it into the input stream.
260: */
261:
262: public insertinput (s)
263: String s;
264: {
265: register char *p, *q;
266: int need, avail, shift;
267:
268: q = s;
269: need = strlen(q);
270: avail = curchar - &scanner_linebuf[0];
271: if (need <= avail) {
272: curchar = &scanner_linebuf[avail - need];
273: p = curchar;
274: while (*q != '\0') {
275: *p++ = *q++;
276: }
277: } else {
278: p = curchar;
279: while (*p != '\0') {
280: ++p;
281: }
282: shift = need - avail;
283: if (p + shift >= &scanner_linebuf[MAXLINESIZE]) {
284: error("alias expansion too large");
285: }
286: for (;;) {
287: *(p + shift) = *p;
288: if (p == curchar) {
289: break;
290: }
291: --p;
292: }
293: p = &scanner_linebuf[0];
294: while (*q != '\0') {
295: *p++ = *q++;
296: }
297: curchar = &scanner_linebuf[0];
298: }
299: }
300:
301: /*
302: * Get the actuals for a macro call.
303: */
304:
305: private String movetochar (str, c)
306: String str;
307: char c;
308: {
309: register char *p;
310:
311: while (*p != c) {
312: if (*p == '\0') {
313: error("missing ')' in macro call");
314: } else if (*p == ')') {
315: error("not enough parameters in macro call");
316: } else if (*p == ',') {
317: error("too many parameters in macro call");
318: }
319: ++p;
320: }
321: return p;
322: }
323:
324: private String *getactuals (n)
325: integer n;
326: {
327: String *a;
328: register char *p;
329: int i;
330:
331: a = newarr(String, n);
332: p = curchar;
333: while (*p != '(') {
334: if (lexclass[*p] != WHITE) {
335: error("missing actuals for macro");
336: }
337: ++p;
338: }
339: ++p;
340: for (i = 0; i < n - 1; i++) {
341: a[i] = p;
342: p = movetochar(p, ',');
343: *p = '\0';
344: ++p;
345: }
346: a[n-1] = p;
347: p = movetochar(p, ')');
348: *p = '\0';
349: curchar = p + 1;
350: return a;
351: }
352:
353: /*
354: * Do command macro expansion, assuming curchar points to the beginning
355: * of the actuals, and we are not in shell mode.
356: */
357:
358: private expand (pl, str)
359: List pl;
360: String str;
361: {
362: char buf[4096], namebuf[100];
363: register char *p, *q, *r;
364: String *actual;
365: Name n;
366: integer i;
367: boolean match;
368:
369: if (pl == nil) {
370: insertinput(str);
371: } else {
372: actual = getactuals(list_size(pl));
373: p = buf;
374: q = str;
375: while (*q != '\0') {
376: if (p >= &buf[4096]) {
377: error("alias expansion too large");
378: }
379: if (lexclass[*q] == ALPHA) {
380: r = namebuf;
381: do {
382: *r++ = *q++;
383: } while (isalnum(*q));
384: *r = '\0';
385: i = 0;
386: match = false;
387: foreach(Name, n, pl)
388: if (streq(ident(n), namebuf)) {
389: match = true;
390: break;
391: }
392: ++i;
393: endfor
394: if (match) {
395: r = actual[i];
396: } else {
397: r = namebuf;
398: }
399: while (*r != '\0') {
400: *p++ = *r++;
401: }
402: } else {
403: *p++ = *q++;
404: }
405: }
406: *p = '\0';
407: insertinput(buf);
408: }
409: }
410:
411: /*
412: * Parser error handling.
413: */
414:
415: public yyerror(s)
416: String s;
417: {
418: register char *p;
419: register integer start;
420:
421: if (streq(s, "syntax error")) {
422: beginerrmsg();
423: p = prevchar;
424: start = p - &scanner_linebuf[0];
425: if (p > &scanner_linebuf[0]) {
426: while (lexclass[*p] == WHITE and p > &scanner_linebuf[0]) {
427: --p;
428: }
429: }
430: fprintf(stderr, "%s", scanner_linebuf);
431: if (start != 0) {
432: fprintf(stderr, "%*c", start, ' ');
433: }
434: if (p == &scanner_linebuf[0]) {
435: fprintf(stderr, "^ unrecognized command");
436: } else {
437: fprintf(stderr, "^ syntax error");
438: }
439: enderrmsg();
440: } else {
441: error(s);
442: }
443: }
444:
445: /*
446: * Eat the current line.
447: */
448:
449: public gobble ()
450: {
451: curchar = scanner_linebuf;
452: scanner_linebuf[0] = '\0';
453: }
454:
455: /*
456: * Scan an identifier.
457: *
458: * If chkalias is true, check first to see if it's an alias.
459: * Otherwise, check to see if it's a keyword.
460: */
461:
462: private Token getident (chkalias)
463: boolean chkalias;
464: {
465: char buf[1024];
466: register char *p, *q;
467: register Token t;
468: List pl;
469: String str;
470:
471: p = curchar;
472: q = buf;
473: if (shellmode) {
474: do {
475: *q++ = *p++;
476: } while (index(" \t\n!&<>*[]()'\"", *p) == nil);
477: } else {
478: do {
479: *q++ = *p++;
480: } while (isalnum(*p));
481: }
482: curchar = p;
483: *q = '\0';
484: yylval.y_name = identname(buf, false);
485: if (chkalias) {
486: if (findalias(yylval.y_name, &pl, &str)) {
487: expand(pl, str);
488: while (lexclass[*curchar] == WHITE) {
489: ++curchar;
490: }
491: if (pl == nil) {
492: t = getident(false);
493: } else {
494: t = getident(true);
495: }
496: } else if (shellmode) {
497: t = NAME;
498: } else {
499: t = findkeyword(yylval.y_name, NAME);
500: }
501: } else if (shellmode) {
502: t = NAME;
503: } else {
504: t = findkeyword(yylval.y_name, NAME);
505: }
506: return t;
507: }
508:
509: /*
510: * Scan a number.
511: */
512:
513: private Token getnum()
514: {
515: char buf[1024];
516: register Char *p, *q;
517: register Token t;
518: Integer base;
519:
520: p = curchar;
521: q = buf;
522: if (*p == '0') {
523: if (*(p+1) == 'x') {
524: p += 2;
525: base = 16;
526: } else if (*(p+1) == 't') {
527: base = 10;
528: } else if (varIsSet("$hexin")) {
529: base = 16;
530: } else {
531: base = 8;
532: }
533: } else if (varIsSet("$hexin")) {
534: base = 16;
535: } else if (varIsSet("$octin")) {
536: base = 8;
537: } else {
538: base = 10;
539: }
540: if (base == 16) {
541: do {
542: *q++ = *p++;
543: } while (ishexdigit(*p));
544: } else {
545: do {
546: *q++ = *p++;
547: } while (isdigit(*p));
548: }
549: if (*p == '.') {
550: do {
551: *q++ = *p++;
552: } while (isdigit(*p));
553: if (*p == 'e' or *p == 'E') {
554: p++;
555: if (*p == '+' or *p == '-' or isdigit(*p)) {
556: *q++ = 'e';
557: do {
558: *q++ = *p++;
559: } while (isdigit(*p));
560: }
561: }
562: *q = '\0';
563: yylval.y_real = atof(buf);
564: t = REAL;
565: } else {
566: *q = '\0';
567: switch (base) {
568: case 10:
569: yylval.y_int = atol(buf);
570: break;
571:
572: case 8:
573: yylval.y_int = octal(buf);
574: break;
575:
576: case 16:
577: yylval.y_int = hex(buf);
578: break;
579:
580: default:
581: badcaseval(base);
582: }
583: t = INT;
584: }
585: curchar = p;
586: return t;
587: }
588:
589: /*
590: * Convert a string of octal digits to an integer.
591: */
592:
593: private int octal(s)
594: String s;
595: {
596: register Char *p;
597: register Integer n;
598:
599: n = 0;
600: for (p = s; *p != '\0'; p++) {
601: n = 8*n + (*p - '0');
602: }
603: return n;
604: }
605:
606: /*
607: * Convert a string of hexadecimal digits to an integer.
608: */
609:
610: private int hex(s)
611: String s;
612: {
613: register Char *p;
614: register Integer n;
615:
616: n = 0;
617: for (p = s; *p != '\0'; p++) {
618: n *= 16;
619: if (*p >= 'a' and *p <= 'f') {
620: n += (*p - 'a' + 10);
621: } else if (*p >= 'A' and *p <= 'F') {
622: n += (*p - 'A' + 10);
623: } else {
624: n += (*p - '0');
625: }
626: }
627: return n;
628: }
629:
630: /*
631: * Scan a string.
632: */
633:
634: private Token getstring (quote)
635: char quote;
636: {
637: register char *p, *q;
638: char buf[MAXLINESIZE];
639: boolean endofstring;
640: Token t;
641:
642: p = curchar;
643: q = buf;
644: endofstring = false;
645: while (not endofstring) {
646: if (*p == '\\' and *(p+1) == '\n') {
647: if (fgets(scanner_linebuf, MAXLINESIZE, in) == nil) {
648: error("non-terminated string");
649: }
650: p = &scanner_linebuf[0] - 1;
651: } else if (*p == '\n' or *p == '\0') {
652: error("non-terminated string");
653: endofstring = true;
654: } else if (*p == quote) {
655: endofstring = true;
656: } else {
657: curchar = p;
658: *q++ = charcon(p);
659: p = curchar;
660: }
661: p++;
662: }
663: curchar = p;
664: *q = '\0';
665: if (quote == '\'' and buf[1] == '\0') {
666: yylval.y_char = buf[0];
667: t = CHAR;
668: } else {
669: yylval.y_string = strdup(buf);
670: t = STRING;
671: }
672: return t;
673: }
674:
675: /*
676: * Process a character constant.
677: * Watch out for backslashes.
678: */
679:
680: private char charcon (s)
681: String s;
682: {
683: register char *p, *q;
684: char c, buf[10];
685:
686: p = s;
687: if (*p == '\\') {
688: ++p;
689: switch (*p) {
690: case '\\':
691: c = '\\';
692: break;
693:
694: case 'n':
695: c = '\n';
696: break;
697:
698: case 'r':
699: c = '\r';
700: break;
701:
702: case 't':
703: c = '\t';
704: break;
705:
706: case '\'':
707: case '"':
708: c = *p;
709: break;
710:
711: default:
712: if (isdigit(*p)) {
713: q = buf;
714: do {
715: *q++ = *p++;
716: } while (isdigit(*p));
717: *q = '\0';
718: c = (char) octal(buf);
719: }
720: --p;
721: break;
722: }
723: curchar = p;
724: } else {
725: c = *p;
726: }
727: return c;
728: }
729:
730: /*
731: * Input file management routines.
732: */
733:
734: public setinput(filename)
735: Filename filename;
736: {
737: File f;
738:
739: f = fopen(filename, "r");
740: if (f == nil) {
741: error("can't open %s", filename);
742: } else {
743: if (curinclindex >= MAXINCLDEPTH) {
744: error("unreasonable input nesting on \"%s\"", filename);
745: }
746: inclinfo[curinclindex].savefile = in;
747: inclinfo[curinclindex].savefn = errfilename;
748: inclinfo[curinclindex].savelineno = errlineno;
749: curinclindex++;
750: in = f;
751: errfilename = filename;
752: errlineno = 1;
753: }
754: }
755:
756: private Boolean eofinput()
757: {
758: register Boolean b;
759:
760: if (curinclindex == 0) {
761: if (isterm(in)) {
762: putchar('\n');
763: clearerr(in);
764: b = false;
765: } else {
766: b = true;
767: }
768: } else {
769: fclose(in);
770: --curinclindex;
771: in = inclinfo[curinclindex].savefile;
772: errfilename = inclinfo[curinclindex].savefn;
773: errlineno = inclinfo[curinclindex].savelineno;
774: b = false;
775: }
776: return b;
777: }
778:
779: /*
780: * Pop the current input. Return whether successful.
781: */
782:
783: public Boolean popinput()
784: {
785: Boolean b;
786:
787: if (curinclindex == 0) {
788: b = false;
789: } else {
790: b = (Boolean) (not eofinput());
791: }
792: return b;
793: }
794:
795: /*
796: * Return whether we are currently reading from standard input.
797: */
798:
799: public Boolean isstdin()
800: {
801: return (Boolean) (in == stdin);
802: }
803:
804: /*
805: * Send the current line to the shell.
806: */
807:
808: public shellline()
809: {
810: register char *p;
811:
812: p = curchar;
813: while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
814: ++p;
815: }
816: shell(p);
817: if (*p == '\0' and isterm(in)) {
818: putchar('\n');
819: }
820: erecover();
821: }
822:
823: /*
824: * Read the rest of the current line in "shell mode".
825: */
826:
827: public beginshellmode()
828: {
829: shellmode = true;
830: }
831:
832: /*
833: * Print out a token for debugging.
834: */
835:
836: public print_token(f, t)
837: File f;
838: Token t;
839: {
840: if (t == '\n') {
841: fprintf(f, "char '\\n'");
842: } else if (t == EOF) {
843: fprintf(f, "EOF");
844: } else if (t < 256) {
845: fprintf(f, "char '%c'", t);
846: } else {
847: fprintf(f, "\"%s\"", keywdstring(t));
848: }
849: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.