|
|
1.1 root 1: /*
2: * Copyright (c) 1983 Regents of the University of California.
3: * All rights reserved.
4: *
5: * This code is derived from software contributed to Berkeley by
6: * Edward Wang at The University of California, Berkeley.
7: *
8: * Redistribution and use in source and binary forms are permitted provided
9: * that: (1) source distributions retain this entire copyright notice and
10: * comment, and (2) distributions including binaries display the following
11: * acknowledgement: ``This product includes software developed by the
12: * University of California, Berkeley and its contributors'' in the
13: * documentation or other materials provided with the distribution and in
14: * all advertising materials mentioning features or use of this software.
15: * Neither the name of the University nor the names of its contributors may
16: * be used to endorse or promote products derived from this software without
17: * specific prior written permission.
18: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
19: * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
20: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
21: */
22:
23: #ifndef lint
24: static char sccsid[] = "@(#)scanner.c 3.14 (Berkeley) 6/6/90";
25: #endif /* not lint */
26:
27: #include "value.h"
28: #include "token.h"
29: #include "context.h"
30: #include "string.h"
31:
32: s_getc()
33: {
34: register c;
35:
36: switch (cx.x_type) {
37: case X_FILE:
38: c = getc(cx.x_fp);
39: if (cx.x_bol && c != EOF) {
40: cx.x_bol = 0;
41: cx.x_lineno++;
42: }
43: if (c == '\n')
44: cx.x_bol = 1;
45: return c;
46: case X_BUF:
47: if (*cx.x_bufp != 0)
48: return *cx.x_bufp++ & 0xff;
49: else
50: return EOF;
51: }
52: /*NOTREACHED*/
53: }
54:
55: s_ungetc(c)
56: {
57: if (c == EOF)
58: return EOF;
59: switch (cx.x_type) {
60: case X_FILE:
61: cx.x_bol = 0;
62: return ungetc(c, cx.x_fp);
63: case X_BUF:
64: if (cx.x_bufp > cx.x_buf)
65: return *--cx.x_bufp = c;
66: else
67: return EOF;
68: }
69: /*NOTREACHED*/
70: }
71:
72: s_gettok()
73: {
74: char buf[100];
75: register char *p = buf;
76: register c;
77: register state = 0;
78:
79: loop:
80: c = s_getc();
81: switch (state) {
82: case 0:
83: switch (c) {
84: case ' ':
85: case '\t':
86: break;
87: case '\n':
88: case ';':
89: cx.x_token = T_EOL;
90: state = -1;
91: break;
92: case '#':
93: state = 1;
94: break;
95: case EOF:
96: cx.x_token = T_EOF;
97: state = -1;
98: break;
99: case 'a': case 'b': case 'c': case 'd': case 'e':
100: case 'f': case 'g': case 'h': case 'i': case 'j':
101: case 'k': case 'l': case 'm': case 'n': case 'o':
102: case 'p': case 'q': case 'r': case 's': case 't':
103: case 'u': case 'v': case 'w': case 'x': case 'y':
104: case 'z':
105: case 'A': case 'B': case 'C': case 'D': case 'E':
106: case 'F': case 'G': case 'H': case 'I': case 'J':
107: case 'K': case 'L': case 'M': case 'N': case 'O':
108: case 'P': case 'Q': case 'R': case 'S': case 'T':
109: case 'U': case 'V': case 'W': case 'X': case 'Y':
110: case 'Z':
111: case '_': case '.':
112: *p++ = c;
113: state = 2;
114: break;
115: case '"':
116: state = 3;
117: break;
118: case '\'':
119: state = 4;
120: break;
121: case '\\':
122: switch (c = s_gettok1()) {
123: case -1:
124: break;
125: case -2:
126: state = 0;
127: break;
128: default:
129: *p++ = c;
130: state = 2;
131: }
132: break;
133: case '0':
134: cx.x_val.v_num = 0;
135: state = 10;
136: break;
137: case '1': case '2': case '3': case '4':
138: case '5': case '6': case '7': case '8': case '9':
139: cx.x_val.v_num = c - '0';
140: state = 11;
141: break;
142: case '>':
143: state = 20;
144: break;
145: case '<':
146: state = 21;
147: break;
148: case '=':
149: state = 22;
150: break;
151: case '!':
152: state = 23;
153: break;
154: case '&':
155: state = 24;
156: break;
157: case '|':
158: state = 25;
159: break;
160: case '$':
161: state = 26;
162: break;
163: case '~':
164: cx.x_token = T_COMP;
165: state = -1;
166: break;
167: case '+':
168: cx.x_token = T_PLUS;
169: state = -1;
170: break;
171: case '-':
172: cx.x_token = T_MINUS;
173: state = -1;
174: break;
175: case '*':
176: cx.x_token = T_MUL;
177: state = -1;
178: break;
179: case '/':
180: cx.x_token = T_DIV;
181: state = -1;
182: break;
183: case '%':
184: cx.x_token = T_MOD;
185: state = -1;
186: break;
187: case '^':
188: cx.x_token = T_XOR;
189: state = -1;
190: break;
191: case '(':
192: cx.x_token = T_LP;
193: state = -1;
194: break;
195: case ')':
196: cx.x_token = T_RP;
197: state = -1;
198: break;
199: case ',':
200: cx.x_token = T_COMMA;
201: state = -1;
202: break;
203: case '?':
204: cx.x_token = T_QUEST;
205: state = -1;
206: break;
207: case ':':
208: cx.x_token = T_COLON;
209: state = -1;
210: break;
211: case '[':
212: cx.x_token = T_LB;
213: state = -1;
214: break;
215: case ']':
216: cx.x_token = T_RB;
217: state = -1;
218: break;
219: default:
220: cx.x_val.v_num = c;
221: cx.x_token = T_CHAR;
222: state = -1;
223: break;
224: }
225: break;
226: case 1: /* got # */
227: if (c == '\n' || c == EOF) {
228: (void) s_ungetc(c);
229: state = 0;
230: }
231: break;
232: case 2: /* unquoted string */
233: switch (c) {
234: case 'a': case 'b': case 'c': case 'd': case 'e':
235: case 'f': case 'g': case 'h': case 'i': case 'j':
236: case 'k': case 'l': case 'm': case 'n': case 'o':
237: case 'p': case 'q': case 'r': case 's': case 't':
238: case 'u': case 'v': case 'w': case 'x': case 'y':
239: case 'z':
240: case 'A': case 'B': case 'C': case 'D': case 'E':
241: case 'F': case 'G': case 'H': case 'I': case 'J':
242: case 'K': case 'L': case 'M': case 'N': case 'O':
243: case 'P': case 'Q': case 'R': case 'S': case 'T':
244: case 'U': case 'V': case 'W': case 'X': case 'Y':
245: case 'Z':
246: case '_': case '.':
247: case '0': case '1': case '2': case '3': case '4':
248: case '5': case '6': case '7': case '8': case '9':
249: if (p < buf + sizeof buf - 1)
250: *p++ = c;
251: break;
252: case '"':
253: state = 3;
254: break;
255: case '\'':
256: state = 4;
257: break;
258: case '\\':
259: switch (c = s_gettok1()) {
260: case -2:
261: (void) s_ungetc(' ');
262: case -1:
263: break;
264: default:
265: if (p < buf + sizeof buf - 1)
266: *p++ = c;
267: }
268: break;
269: default:
270: (void) s_ungetc(c);
271: case EOF:
272: *p = 0;
273: cx.x_token = T_STR;
274: switch (*buf) {
275: case 'i':
276: if (buf[1] == 'f' && buf[2] == 0)
277: cx.x_token = T_IF;
278: break;
279: case 't':
280: if (buf[1] == 'h' && buf[2] == 'e'
281: && buf[3] == 'n' && buf[4] == 0)
282: cx.x_token = T_THEN;
283: break;
284: case 'e':
285: if (buf[1] == 'n' && buf[2] == 'd'
286: && buf[3] == 'i' && buf[4] == 'f'
287: && buf[5] == 0)
288: cx.x_token = T_ENDIF;
289: else if (buf[1] == 'l' && buf[2] == 's')
290: if (buf[3] == 'i' && buf[4] == 'f'
291: && buf[5] == 0)
292: cx.x_token = T_ELSIF;
293: else if (buf[3] == 'e' && buf[4] == 0)
294: cx.x_token = T_ELSE;
295: break;
296: }
297: if (cx.x_token == T_STR
298: && (cx.x_val.v_str = str_cpy(buf)) == 0) {
299: p_memerror();
300: cx.x_token = T_EOF;
301: }
302: state = -1;
303: break;
304: }
305: break;
306: case 3: /* " quoted string */
307: switch (c) {
308: case '\n':
309: (void) s_ungetc(c);
310: case EOF:
311: case '"':
312: state = 2;
313: break;
314: case '\\':
315: switch (c = s_gettok1()) {
316: case -1:
317: case -2: /* newlines are invisible */
318: break;
319: default:
320: if (p < buf + sizeof buf - 1)
321: *p++ = c;
322: }
323: break;
324: default:
325: if (p < buf + sizeof buf - 1)
326: *p++ = c;
327: break;
328: }
329: break;
330: case 4: /* ' quoted string */
331: switch (c) {
332: case '\n':
333: (void) s_ungetc(c);
334: case EOF:
335: case '\'':
336: state = 2;
337: break;
338: case '\\':
339: switch (c = s_gettok1()) {
340: case -1:
341: case -2: /* newlines are invisible */
342: break;
343: default:
344: if (p < buf + sizeof buf - 1)
345: *p++ = c;
346: }
347: break;
348: default:
349: if (p < buf + sizeof buf - 1)
350: *p++ = c;
351: break;
352: }
353: break;
354: case 10: /* got 0 */
355: switch (c) {
356: case 'x':
357: case 'X':
358: cx.x_val.v_num = 0;
359: state = 12;
360: break;
361: case '0': case '1': case '2': case '3': case '4':
362: case '5': case '6': case '7':
363: cx.x_val.v_num = c - '0';
364: state = 13;
365: break;
366: case '8': case '9':
367: cx.x_val.v_num = c - '0';
368: state = 11;
369: break;
370: default:
371: (void) s_ungetc(c);
372: state = -1;
373: cx.x_token = T_NUM;
374: }
375: break;
376: case 11: /* decimal number */
377: switch (c) {
378: case '0': case '1': case '2': case '3': case '4':
379: case '5': case '6': case '7': case '8': case '9':
380: cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0';
381: break;
382: default:
383: (void) s_ungetc(c);
384: state = -1;
385: cx.x_token = T_NUM;
386: }
387: break;
388: case 12: /* hex number */
389: switch (c) {
390: case '0': case '1': case '2': case '3': case '4':
391: case '5': case '6': case '7': case '8': case '9':
392: cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0';
393: break;
394: case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
395: cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10;
396: break;
397: case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
398: cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10;
399: break;
400: default:
401: (void) s_ungetc(c);
402: state = -1;
403: cx.x_token = T_NUM;
404: }
405: break;
406: case 13: /* octal number */
407: switch (c) {
408: case '0': case '1': case '2': case '3': case '4':
409: case '5': case '6': case '7':
410: cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0';
411: break;
412: default:
413: (void) s_ungetc(c);
414: state = -1;
415: cx.x_token = T_NUM;
416: }
417: break;
418: case 20: /* got > */
419: switch (c) {
420: case '=':
421: cx.x_token = T_GE;
422: state = -1;
423: break;
424: case '>':
425: cx.x_token = T_RS;
426: state = -1;
427: break;
428: default:
429: (void) s_ungetc(c);
430: cx.x_token = T_GT;
431: state = -1;
432: }
433: break;
434: case 21: /* got < */
435: switch (c) {
436: case '=':
437: cx.x_token = T_LE;
438: state = -1;
439: break;
440: case '<':
441: cx.x_token = T_LS;
442: state = -1;
443: break;
444: default:
445: (void) s_ungetc(c);
446: cx.x_token = T_LT;
447: state = -1;
448: }
449: break;
450: case 22: /* got = */
451: switch (c) {
452: case '=':
453: cx.x_token = T_EQ;
454: state = -1;
455: break;
456: default:
457: (void) s_ungetc(c);
458: cx.x_token = T_ASSIGN;
459: state = -1;
460: }
461: break;
462: case 23: /* got ! */
463: switch (c) {
464: case '=':
465: cx.x_token = T_NE;
466: state = -1;
467: break;
468: default:
469: (void) s_ungetc(c);
470: cx.x_token = T_NOT;
471: state = -1;
472: }
473: break;
474: case 24: /* got & */
475: switch (c) {
476: case '&':
477: cx.x_token = T_ANDAND;
478: state = -1;
479: break;
480: default:
481: (void) s_ungetc(c);
482: cx.x_token = T_AND;
483: state = -1;
484: }
485: break;
486: case 25: /* got | */
487: switch (c) {
488: case '|':
489: cx.x_token = T_OROR;
490: state = -1;
491: break;
492: default:
493: (void) s_ungetc(c);
494: cx.x_token = T_OR;
495: state = -1;
496: }
497: break;
498: case 26: /* got $ */
499: switch (c) {
500: case '?':
501: cx.x_token = T_DQ;
502: state = -1;
503: break;
504: default:
505: (void) s_ungetc(c);
506: cx.x_token = T_DOLLAR;
507: state = -1;
508: }
509: break;
510: default:
511: abort();
512: }
513: if (state >= 0)
514: goto loop;
515: return cx.x_token;
516: }
517:
518: s_gettok1()
519: {
520: register c;
521: register n;
522:
523: c = s_getc(); /* got \ */
524: switch (c) {
525: case EOF:
526: return -1;
527: case '\n':
528: return -2;
529: case 'b':
530: return '\b';
531: case 'f':
532: return '\f';
533: case 'n':
534: return '\n';
535: case 'r':
536: return '\r';
537: case 't':
538: return '\t';
539: default:
540: return c;
541: case '0': case '1': case '2': case '3': case '4':
542: case '5': case '6': case '7':
543: break;
544: }
545: n = c - '0';
546: c = s_getc(); /* got \[0-7] */
547: if (c < '0' || c > '7') {
548: (void) s_ungetc(c);
549: return n;
550: }
551: n = n * 8 + c - '0';
552: c = s_getc(); /* got \[0-7][0-7] */
553: if (c < '0' || c > '7') {
554: (void) s_ungetc(c);
555: return n;
556: }
557: return n * 8 + c - '0';
558: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.