|
|
1.1 root 1: /*
2: * Copyright (c) 1983 Regents of the University of California.
3: * All rights reserved.
4: *
5: * Redistribution and use in source and binary forms are permitted
6: * provided that the above copyright notice and this paragraph are
7: * duplicated in all such forms and that any documentation,
8: * advertising materials, and other materials related to such
9: * distribution and use acknowledge that the software was developed
10: * by the University of California, Berkeley. The name of the
11: * University may not be used to endorse or promote products derived
12: * from this software without specific prior written permission.
13: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14: * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15: * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16: */
17:
18: #ifndef lint
19: static char sccsid[] = "@(#)scanner.c 3.11 (Berkeley) 6/29/88";
20: #endif /* not lint */
21:
22: #include <stdio.h>
23: #include "value.h"
24: #include "token.h"
25: #include "context.h"
26: #include "string.h"
27:
28: s_getc()
29: {
30: register c;
31:
32: switch (cx.x_type) {
33: case X_FILE:
34: c = getc(cx.x_fp);
35: if (cx.x_bol && c != EOF) {
36: cx.x_bol = 0;
37: cx.x_lineno++;
38: }
39: if (c == '\n')
40: cx.x_bol = 1;
41: return c;
42: case X_BUF:
43: if (*cx.x_bufp != 0)
44: return *cx.x_bufp++ & 0xff;
45: else
46: return EOF;
47: }
48: /*NOTREACHED*/
49: }
50:
51: s_ungetc(c)
52: {
53: if (c == EOF)
54: return EOF;
55: switch (cx.x_type) {
56: case X_FILE:
57: cx.x_bol = 0;
58: return ungetc(c, cx.x_fp);
59: case X_BUF:
60: if (cx.x_bufp > cx.x_buf)
61: return *--cx.x_bufp = c;
62: else
63: return EOF;
64: }
65: /*NOTREACHED*/
66: }
67:
68: s_gettok()
69: {
70: char buf[100];
71: register char *p = buf;
72: register c;
73: register state = 0;
74:
75: loop:
76: c = s_getc();
77: switch (state) {
78: case 0:
79: switch (c) {
80: case ' ':
81: case '\t':
82: break;
83: case '\n':
84: case ';':
85: cx.x_token = T_EOL;
86: state = -1;
87: break;
88: case '#':
89: state = 1;
90: break;
91: case EOF:
92: cx.x_token = T_EOF;
93: state = -1;
94: break;
95: case 'a': case 'b': case 'c': case 'd': case 'e':
96: case 'f': case 'g': case 'h': case 'i': case 'j':
97: case 'k': case 'l': case 'm': case 'n': case 'o':
98: case 'p': case 'q': case 'r': case 's': case 't':
99: case 'u': case 'v': case 'w': case 'x': case 'y':
100: case 'z':
101: case 'A': case 'B': case 'C': case 'D': case 'E':
102: case 'F': case 'G': case 'H': case 'I': case 'J':
103: case 'K': case 'L': case 'M': case 'N': case 'O':
104: case 'P': case 'Q': case 'R': case 'S': case 'T':
105: case 'U': case 'V': case 'W': case 'X': case 'Y':
106: case 'Z':
107: case '_': case '.':
108: *p++ = c;
109: state = 2;
110: break;
111: case '"':
112: state = 3;
113: break;
114: case '\'':
115: state = 4;
116: break;
117: case '\\':
118: switch (c = s_gettok1()) {
119: case -1:
120: break;
121: case -2:
122: state = 0;
123: break;
124: default:
125: *p++ = c;
126: state = 2;
127: }
128: break;
129: case '0':
130: cx.x_val.v_num = 0;
131: state = 10;
132: break;
133: case '1': case '2': case '3': case '4':
134: case '5': case '6': case '7': case '8': case '9':
135: cx.x_val.v_num = c - '0';
136: state = 11;
137: break;
138: case '>':
139: state = 20;
140: break;
141: case '<':
142: state = 21;
143: break;
144: case '=':
145: state = 22;
146: break;
147: case '!':
148: state = 23;
149: break;
150: case '&':
151: state = 24;
152: break;
153: case '|':
154: state = 25;
155: break;
156: case '$':
157: state = 26;
158: break;
159: case '~':
160: cx.x_token = T_COMP;
161: state = -1;
162: break;
163: case '+':
164: cx.x_token = T_PLUS;
165: state = -1;
166: break;
167: case '-':
168: cx.x_token = T_MINUS;
169: state = -1;
170: break;
171: case '*':
172: cx.x_token = T_MUL;
173: state = -1;
174: break;
175: case '/':
176: cx.x_token = T_DIV;
177: state = -1;
178: break;
179: case '%':
180: cx.x_token = T_MOD;
181: state = -1;
182: break;
183: case '^':
184: cx.x_token = T_XOR;
185: state = -1;
186: break;
187: case '(':
188: cx.x_token = T_LP;
189: state = -1;
190: break;
191: case ')':
192: cx.x_token = T_RP;
193: state = -1;
194: break;
195: case ',':
196: cx.x_token = T_COMMA;
197: state = -1;
198: break;
199: case '?':
200: cx.x_token = T_QUEST;
201: state = -1;
202: break;
203: case ':':
204: cx.x_token = T_COLON;
205: state = -1;
206: break;
207: case '[':
208: cx.x_token = T_LB;
209: state = -1;
210: break;
211: case ']':
212: cx.x_token = T_RB;
213: state = -1;
214: break;
215: default:
216: cx.x_val.v_num = c;
217: cx.x_token = T_CHAR;
218: state = -1;
219: break;
220: }
221: break;
222: case 1: /* got # */
223: if (c == '\n' || c == EOF) {
224: (void) s_ungetc(c);
225: state = 0;
226: }
227: break;
228: case 2: /* unquoted string */
229: switch (c) {
230: case 'a': case 'b': case 'c': case 'd': case 'e':
231: case 'f': case 'g': case 'h': case 'i': case 'j':
232: case 'k': case 'l': case 'm': case 'n': case 'o':
233: case 'p': case 'q': case 'r': case 's': case 't':
234: case 'u': case 'v': case 'w': case 'x': case 'y':
235: case 'z':
236: case 'A': case 'B': case 'C': case 'D': case 'E':
237: case 'F': case 'G': case 'H': case 'I': case 'J':
238: case 'K': case 'L': case 'M': case 'N': case 'O':
239: case 'P': case 'Q': case 'R': case 'S': case 'T':
240: case 'U': case 'V': case 'W': case 'X': case 'Y':
241: case 'Z':
242: case '_': case '.':
243: case '0': case '1': case '2': case '3': case '4':
244: case '5': case '6': case '7': case '8': case '9':
245: if (p < buf + sizeof buf - 1)
246: *p++ = c;
247: break;
248: case '"':
249: state = 3;
250: break;
251: case '\'':
252: state = 4;
253: break;
254: case '\\':
255: switch (c = s_gettok1()) {
256: case -2:
257: (void) s_ungetc(' ');
258: case -1:
259: break;
260: default:
261: if (p < buf + sizeof buf - 1)
262: *p++ = c;
263: }
264: break;
265: default:
266: (void) s_ungetc(c);
267: case EOF:
268: *p = 0;
269: cx.x_token = T_STR;
270: switch (*buf) {
271: case 'i':
272: if (buf[1] == 'f' && buf[2] == 0)
273: cx.x_token = T_IF;
274: break;
275: case 't':
276: if (buf[1] == 'h' && buf[2] == 'e'
277: && buf[3] == 'n' && buf[4] == 0)
278: cx.x_token = T_THEN;
279: break;
280: case 'e':
281: if (buf[1] == 'n' && buf[2] == 'd'
282: && buf[3] == 'i' && buf[4] == 'f'
283: && buf[5] == 0)
284: cx.x_token = T_ENDIF;
285: else if (buf[1] == 'l' && buf[2] == 's')
286: if (buf[3] == 'i' && buf[4] == 'f'
287: && buf[5] == 0)
288: cx.x_token = T_ELSIF;
289: else if (buf[3] == 'e' && buf[4] == 0)
290: cx.x_token = T_ELSE;
291: break;
292: }
293: if (cx.x_token == T_STR
294: && (cx.x_val.v_str = str_cpy(buf)) == 0) {
295: p_memerror();
296: cx.x_token = T_EOF;
297: }
298: state = -1;
299: break;
300: }
301: break;
302: case 3: /* " quoted string */
303: switch (c) {
304: case '\n':
305: (void) s_ungetc(c);
306: case EOF:
307: case '"':
308: state = 2;
309: break;
310: case '\\':
311: switch (c = s_gettok1()) {
312: case -1:
313: case -2: /* newlines are invisible */
314: break;
315: default:
316: if (p < buf + sizeof buf - 1)
317: *p++ = c;
318: }
319: break;
320: default:
321: if (p < buf + sizeof buf - 1)
322: *p++ = c;
323: break;
324: }
325: break;
326: case 4: /* ' quoted string */
327: switch (c) {
328: case '\n':
329: (void) s_ungetc(c);
330: case EOF:
331: case '\'':
332: state = 2;
333: break;
334: case '\\':
335: switch (c = s_gettok1()) {
336: case -1:
337: case -2: /* newlines are invisible */
338: break;
339: default:
340: if (p < buf + sizeof buf - 1)
341: *p++ = c;
342: }
343: break;
344: default:
345: if (p < buf + sizeof buf - 1)
346: *p++ = c;
347: break;
348: }
349: break;
350: case 10: /* got 0 */
351: switch (c) {
352: case 'x':
353: case 'X':
354: cx.x_val.v_num = 0;
355: state = 12;
356: break;
357: case '0': case '1': case '2': case '3': case '4':
358: case '5': case '6': case '7':
359: cx.x_val.v_num = c - '0';
360: state = 13;
361: break;
362: case '8': case '9':
363: cx.x_val.v_num = c - '0';
364: state = 11;
365: break;
366: default:
367: (void) s_ungetc(c);
368: state = -1;
369: cx.x_token = T_NUM;
370: }
371: break;
372: case 11: /* decimal number */
373: switch (c) {
374: case '0': case '1': case '2': case '3': case '4':
375: case '5': case '6': case '7': case '8': case '9':
376: cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0';
377: break;
378: default:
379: (void) s_ungetc(c);
380: state = -1;
381: cx.x_token = T_NUM;
382: }
383: break;
384: case 12: /* hex number */
385: switch (c) {
386: case '0': case '1': case '2': case '3': case '4':
387: case '5': case '6': case '7': case '8': case '9':
388: cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0';
389: break;
390: case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
391: cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10;
392: break;
393: case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
394: cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10;
395: break;
396: default:
397: (void) s_ungetc(c);
398: state = -1;
399: cx.x_token = T_NUM;
400: }
401: break;
402: case 13: /* octal number */
403: switch (c) {
404: case '0': case '1': case '2': case '3': case '4':
405: case '5': case '6': case '7':
406: cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0';
407: break;
408: default:
409: (void) s_ungetc(c);
410: state = -1;
411: cx.x_token = T_NUM;
412: }
413: break;
414: case 20: /* got > */
415: switch (c) {
416: case '=':
417: cx.x_token = T_GE;
418: state = -1;
419: break;
420: case '>':
421: cx.x_token = T_RS;
422: state = -1;
423: break;
424: default:
425: (void) s_ungetc(c);
426: cx.x_token = T_GT;
427: state = -1;
428: }
429: break;
430: case 21: /* got < */
431: switch (c) {
432: case '=':
433: cx.x_token = T_LE;
434: state = -1;
435: break;
436: case '<':
437: cx.x_token = T_LS;
438: state = -1;
439: break;
440: default:
441: (void) s_ungetc(c);
442: cx.x_token = T_LT;
443: state = -1;
444: }
445: break;
446: case 22: /* got = */
447: switch (c) {
448: case '=':
449: cx.x_token = T_EQ;
450: state = -1;
451: break;
452: default:
453: (void) s_ungetc(c);
454: cx.x_token = T_ASSIGN;
455: state = -1;
456: }
457: break;
458: case 23: /* got ! */
459: switch (c) {
460: case '=':
461: cx.x_token = T_NE;
462: state = -1;
463: break;
464: default:
465: (void) s_ungetc(c);
466: cx.x_token = T_NOT;
467: state = -1;
468: }
469: break;
470: case 24: /* got & */
471: switch (c) {
472: case '&':
473: cx.x_token = T_ANDAND;
474: state = -1;
475: break;
476: default:
477: (void) s_ungetc(c);
478: cx.x_token = T_AND;
479: state = -1;
480: }
481: break;
482: case 25: /* got | */
483: switch (c) {
484: case '|':
485: cx.x_token = T_OROR;
486: state = -1;
487: break;
488: default:
489: (void) s_ungetc(c);
490: cx.x_token = T_OR;
491: state = -1;
492: }
493: break;
494: case 26: /* got $ */
495: switch (c) {
496: case '?':
497: cx.x_token = T_DQ;
498: state = -1;
499: break;
500: default:
501: (void) s_ungetc(c);
502: cx.x_token = T_DOLLAR;
503: state = -1;
504: }
505: break;
506: default:
507: abort();
508: }
509: if (state >= 0)
510: goto loop;
511: return cx.x_token;
512: }
513:
514: s_gettok1()
515: {
516: register c;
517: register n;
518:
519: c = s_getc(); /* got \ */
520: switch (c) {
521: case EOF:
522: return -1;
523: case '\n':
524: return -2;
525: case 'b':
526: return '\b';
527: case 'f':
528: return '\f';
529: case 'n':
530: return '\n';
531: case 'r':
532: return '\r';
533: case 't':
534: return '\t';
535: default:
536: return c;
537: case '0': case '1': case '2': case '3': case '4':
538: case '5': case '6': case '7':
539: break;
540: }
541: n = c - '0';
542: c = s_getc(); /* got \[0-7] */
543: if (c < '0' || c > '7') {
544: (void) s_ungetc(c);
545: return n;
546: }
547: n = n * 8 + c - '0';
548: c = s_getc(); /* got \[0-7][0-7] */
549: if (c < '0' || c > '7') {
550: (void) s_ungetc(c);
551: return n;
552: }
553: return n * 8 + c - '0';
554: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.