|
|
1.1 root 1: /* C compiler: lexical analysis */
2:
3: #include "c.h"
4:
5: #ifdef __STDC__
6: enum { BLANK=01, NEWLINE=02, LETTER=04, DIGIT=010, HEX=020, OTHER=040 };
7: #else
8: #define BLANK 01
9: #define NEWLINE 02
10: #define LETTER 04
11: #define DIGIT 010
12: #define HEX 020
13: #define OTHER 040
14: #endif
15: char kind[] = { /* token kind, i.e., classification */
16: #define xx(a,b,c,d,e,f,g) f,
17: #include "token.h"
18: };
19: Coordinate src; /* current source coordinate */
20: #ifdef __STDC__
21: enum tokencode t;
22: #else
23: int t;
24: #endif
25: char *token; /* current token */
26: Symbol tsym; /* symbol table entry for current token */
27:
28: static struct symbol tval; /* symbol for constants */
29:
30: static unsigned char map[256] = {
31: /* 000 nul */ 0,
32: /* 001 soh */ 0,
33: /* 002 stx */ 0,
34: /* 003 etx */ 0,
35: /* 004 eot */ 0,
36: /* 005 enq */ 0,
37: /* 006 ack */ 0,
38: /* 007 bel */ 0,
39: /* 010 bs */ 0,
40: /* 011 ht */ BLANK,
41: /* 012 nl */ NEWLINE,
42: /* 013 vt */ BLANK,
43: /* 014 ff */ BLANK,
44: /* 015 cr */ 0,
45: /* 016 so */ 0,
46: /* 017 si */ 0,
47: /* 020 dle */ 0,
48: /* 021 dc1 */ 0,
49: /* 022 dc2 */ 0,
50: /* 023 dc3 */ 0,
51: /* 024 dc4 */ 0,
52: /* 025 nak */ 0,
53: /* 026 syn */ 0,
54: /* 027 etb */ 0,
55: /* 030 can */ 0,
56: /* 031 em */ 0,
57: /* 032 sub */ 0,
58: /* 033 esc */ 0,
59: /* 034 fs */ 0,
60: /* 035 gs */ 0,
61: /* 036 rs */ 0,
62: /* 037 us */ 0,
63: /* 040 sp */ BLANK,
64: /* 041 ! */ OTHER,
65: /* 042 " */ OTHER,
66: /* 043 # */ OTHER,
67: /* 044 $ */ 0,
68: /* 045 % */ OTHER,
69: /* 046 & */ OTHER,
70: /* 047 ' */ OTHER,
71: /* 050 ( */ OTHER,
72: /* 051 ) */ OTHER,
73: /* 052 * */ OTHER,
74: /* 053 + */ OTHER,
75: /* 054 , */ OTHER,
76: /* 055 - */ OTHER,
77: /* 056 . */ OTHER,
78: /* 057 / */ OTHER,
79: /* 060 0 */ DIGIT,
80: /* 061 1 */ DIGIT,
81: /* 062 2 */ DIGIT,
82: /* 063 3 */ DIGIT,
83: /* 064 4 */ DIGIT,
84: /* 065 5 */ DIGIT,
85: /* 066 6 */ DIGIT,
86: /* 067 7 */ DIGIT,
87: /* 070 8 */ DIGIT,
88: /* 071 9 */ DIGIT,
89: /* 072 : */ OTHER,
90: /* 073 ; */ OTHER,
91: /* 074 < */ OTHER,
92: /* 075 = */ OTHER,
93: /* 076 > */ OTHER,
94: /* 077 ? */ OTHER,
95: /* 100 @ */ 0,
96: /* 101 A */ LETTER|HEX,
97: /* 102 B */ LETTER|HEX,
98: /* 103 C */ LETTER|HEX,
99: /* 104 D */ LETTER|HEX,
100: /* 105 E */ LETTER|HEX,
101: /* 106 F */ LETTER|HEX,
102: /* 107 G */ LETTER,
103: /* 110 H */ LETTER,
104: /* 111 I */ LETTER,
105: /* 112 J */ LETTER,
106: /* 113 K */ LETTER,
107: /* 114 L */ LETTER,
108: /* 115 M */ LETTER,
109: /* 116 N */ LETTER,
110: /* 117 O */ LETTER,
111: /* 120 P */ LETTER,
112: /* 121 Q */ LETTER,
113: /* 122 R */ LETTER,
114: /* 123 S */ LETTER,
115: /* 124 T */ LETTER,
116: /* 125 U */ LETTER,
117: /* 126 V */ LETTER,
118: /* 127 W */ LETTER,
119: /* 130 X */ LETTER,
120: /* 131 Y */ LETTER,
121: /* 132 Z */ LETTER,
122: /* 133 [ */ OTHER,
123: /* 134 \ */ OTHER,
124: /* 135 ] */ OTHER,
125: /* 136 ^ */ OTHER,
126: /* 137 _ */ LETTER,
127: /* 140 ` */ 0,
128: /* 141 a */ LETTER|HEX,
129: /* 142 b */ LETTER|HEX,
130: /* 143 c */ LETTER|HEX,
131: /* 144 d */ LETTER|HEX,
132: /* 145 e */ LETTER|HEX,
133: /* 146 f */ LETTER|HEX,
134: /* 147 g */ LETTER,
135: /* 150 h */ LETTER,
136: /* 151 i */ LETTER,
137: /* 152 j */ LETTER,
138: /* 153 k */ LETTER,
139: /* 154 l */ LETTER,
140: /* 155 m */ LETTER,
141: /* 156 n */ LETTER,
142: /* 157 o */ LETTER,
143: /* 160 p */ LETTER,
144: /* 161 q */ LETTER,
145: /* 162 r */ LETTER,
146: /* 163 s */ LETTER,
147: /* 164 t */ LETTER,
148: /* 165 u */ LETTER,
149: /* 166 v */ LETTER,
150: /* 167 w */ LETTER,
151: /* 170 x */ LETTER,
152: /* 171 y */ LETTER,
153: /* 172 z */ LETTER,
154: /* 173 { */ OTHER,
155: /* 174 | */ OTHER,
156: /* 175 } */ OTHER,
157: /* 176 ~ */ OTHER,
158: };
159: #ifdef strtod
160: #define ERANGE 1
161: static int errno;
162: #else
163: #include <errno.h>
164: #endif
165: dclproto(static int backslash,(int));
166: dclproto(static Symbol fcon,(void));
167: dclproto(static Symbol icon,(unsigned int, int));
168: dclproto(static void ppnumber,(char *));
169: /* backslash - get next character with \'s interpreted in q ... q */
170: static int backslash(q) {
171: int c;
172:
173: switch (*cp++) {
174: case 'a': return 7;
175: case 'b': return '\b';
176: case 'f': return '\f';
177: case 'n': return '\n';
178: case 'r': return '\r';
179: case 't': return '\t';
180: case 'v': return '\v';
181: case '\'': case '"': case '\\': case '\?': break;
182: case 'x': {
183: int overflow = 0;
184: if ((map[*cp]&(DIGIT|HEX)) == 0) {
185: if (*cp < ' ' || *cp == 0177)
186: error("ill-formed hexadecimal escape sequence\n");
187: else
188: error("ill-formed hexadecimal escape sequence `\\x%c'\n", *cp);
189: if (*cp != q)
190: cp++;
191: return 0;
192: }
193: for (c = 0; map[*cp]&(DIGIT|HEX); cp++) {
194: if (c&~((unsigned)-1 >> 4))
195: overflow++;
196: if (map[*cp]&DIGIT)
197: c = (c<<4) + *cp - '0';
198: else
199: c = (c<<4) + (*cp&~040) - 'A' + 10;
200: }
201: if (c&~0377 || overflow)
202: warning("overflow in hexadecimal escape sequence\n");
203: return c&0377;
204: }
205: case '0': case '1': case '2': case '3':
206: case '4': case '5': case '6': case '7':
207: c = *(cp-1) - '0';
208: if (*cp >= '0' && *cp <= '7') {
209: c = (c<<3) + *cp++ - '0';
210: if (*cp >= '0' && *cp <= '7')
211: c = (c<<3) + *cp++ - '0';
212: }
213: if (c&~0377)
214: warning("overflow in octal escape sequence\n");
215: return c&0377;
216: default:
217: if (cp[-1] < ' ' || cp[-1] >= 0177)
218: warning("unrecognized character escape sequence\n");
219: else
220: warning("unrecognized character escape sequence `\\%c'\n", cp[-1]);
221: }
222: return cp[-1];
223: }
224: /* fcon - scan for tail of a floating constant, return symbol */
225: static Symbol fcon() {
226: char *s = token;
227: int n = 0;
228:
229: while (s < (char *)cp)
230: n += *s++ - '0';
231: if (*cp == '.')
232: for (cp++; map[*cp]&DIGIT; cp++)
233: n += *cp - '0';
234: if (*cp == 'e' || *cp == 'E') {
235: if (*++cp == '-' || *cp == '+')
236: cp++;
237: if (map[*cp]&DIGIT)
238: do cp++; while (map[*cp]&DIGIT);
239: else
240: error("invalid floating constant `%S'\n", token, (char*)cp - token);
241: }
242: if (n == 0)
243: tval.u.c.v.d = 0.0;
244: else {
245: errno = 0;
246: tval.u.c.v.d = strtod(token, (char **)0);
247: if (errno == ERANGE)
248: warning("overflow in floating constant `%S'\n", token, (char*)cp - token);
249: }
250: if (*cp == 'f' || *cp == 'F') {
251: ++cp;
252: if (tval.u.c.v.d > FLT_MAX)
253: warning("overflow in floating constant `%S'\n", token, (char*)cp - token);
254: tval.type = floattype;
255: tval.u.c.v.f = tval.u.c.v.d;
256: } else if (*cp == 'l' || *cp == 'L') {
257: cp++;
258: tval.type = longdouble;
259: } else
260: tval.type = doubletype;
261: ppnumber("floating");
262: return &tval;
263: }
264:
265: /* getchr - return next significant character */
266: int getchr() {
267: while (*cp) {
268: while (map[*cp]&BLANK)
269: cp++;
270: if (!(map[*cp]&NEWLINE))
271: return *cp;
272: cp++;
273: nextline();
274: }
275: return EOI;
276: }
277:
278: /* gettok - return next token */
279: int gettok() {
280: while (*cp) {
281: register unsigned char *rcp = cp;
282: while (map[*rcp]&BLANK)
283: rcp++;
284: if (limit - rcp < MAXTOKEN) {
285: cp = rcp;
286: fillbuf();
287: rcp = cp;
288: }
289: src.file = file;
290: src.x = (char *)rcp - line;
291: src.y = lineno;
292: cp = rcp + 1;
293: switch (*rcp++) {
294: case '\n': case '\v': case '\r': case '\f':
295: nextline();
296: continue;
297: case '/': if (*rcp == '*') {
298: int c = 0;
299: for (rcp++; *rcp && (*rcp != '/' || c != '*'); )
300: if (map[*rcp]&NEWLINE) {
301: if (rcp < limit)
302: c = *rcp;
303: cp = rcp + 1;
304: nextline();
305: rcp = cp;
306: } else
307: c = *rcp++;
308: if (*rcp)
309: rcp++;
310: else
311: error("unclosed comment\n");
312: cp = rcp;
313: continue;
314: }
315: return '/';
316: case '.': if (rcp[0] == '.' && rcp[1] == '.')
317: return cp += 2, ELLIPSIS;
318: if ((map[*rcp]&DIGIT) == 0)
319: return '.';
320: if (limit - rcp < MAXLINE) {
321: cp = rcp - 1;
322: fillbuf();
323: rcp = ++cp;
324: }
325: assert(cp == rcp);
326: cp = rcp - 1;
327: token = (char *)cp;
328: tsym = fcon();
329: return FCON;
330: case '0': case '1': case '2': case '3': case '4':
331: case '5': case '6': case '7': case '8': case '9':
332: { unsigned int n = 0;
333: if (limit - rcp < MAXLINE) {
334: cp = rcp - 1;
335: fillbuf();
336: rcp = ++cp;
337: }
338: assert(cp == rcp);
339: token = (char *)rcp - 1;
340: if (*token == '0' && (*rcp == 'x' || *rcp == 'X')) {
341: int d, overflow = 0;
342: while (*++rcp) {
343: if (map[*rcp]&DIGIT)
344: d = *rcp - '0';
345: else if (*rcp >= 'a' && *rcp <= 'f')
346: d = *rcp - 'a' + 10;
347: else if (*rcp >= 'A' && *rcp <= 'F')
348: d = *rcp - 'A' + 10;
349: else
350: break;
351: if (n&~((unsigned)-1 >> 4))
352: overflow = 1;
353: else
354: n = (n<<4) + d;
355: }
356: if ((char *)rcp - token <= 2)
357: error("invalid hexadecimal constant `%S'\n", token, (char *)rcp - token);
358: cp = rcp;
359: tsym = icon(n, overflow);
360: return ICON;
361: } else if (*token == '0') {
362: int err = 0, overflow = 0;
363: for ( ; map[*rcp]&DIGIT; rcp++) {
364: if (*rcp == '8' || *rcp == '9')
365: err = 1;
366: if (n&~((unsigned)-1 >> 3))
367: overflow = 1;
368: else
369: n = (n<<3) + (unsigned)(*rcp - '0');
370: }
371: cp = rcp;
372: if (*rcp == '.' || *rcp == 'e' || *rcp == 'E') {
373: tsym = fcon();
374: return FCON;
375: }
376: if (err)
377: error("invalid octal constant `%S'\n", token, (char*)cp - token);
378: tsym = icon(n, overflow);
379: return ICON;
380: } else {
381: int overflow = 0;
382: for (n = *token - '0'; map[*rcp]&DIGIT; ) {
383: int d = *rcp++ - '0';
384: if (n > ((unsigned)UINT_MAX - d)/10)
385: overflow = 1;
386: else
387: n = 10*n + d;
388: }
389: cp = rcp;
390: if (*rcp == '.' || *rcp == 'e' || *rcp == 'E') {
391: tsym = fcon();
392: return FCON;
393: }
394: tsym = icon(n, overflow);
395: return ICON;
396: } }
397: case 'L': if (*rcp == '\'') {
398: int t = gettok();
399: assert(t == ICON);
400: src.x--;
401: tval.type = unsignedchar;
402: tval.u.c.v.uc = tval.u.c.v.i;
403: return t;
404: }
405: if (*rcp != '"')
406: goto id;
407: rcp++;
408: goto scon;
409: scon:
410: case '\'': case '"':
411: { static char cbuf[BUFSIZE+1];
412: char *s = cbuf;
413: int nbad = 0;
414: *s++ = *--rcp;
415: cp = rcp;
416: do {
417: cp++;
418: while (*cp && *cp != cbuf[0]) {
419: int c = *cp++;
420: if (map[c]&NEWLINE) {
421: if (cp <= limit)
422: break;
423: nextline();
424: continue;
425: }
426: if (c == '\\') {
427: if (map[*cp]&NEWLINE) {
428: if (cp < limit)
429: break;
430: cp++;
431: nextline();
432: }
433: if (limit - cp < MAXTOKEN)
434: fillbuf();
435: c = backslash(cbuf[0]);
436: } else if (map[c] == 0)
437: nbad++;
438: if (s < &cbuf[sizeof cbuf] - 2)
439: *s++ = c;
440: }
441: if (*cp == cbuf[0])
442: cp++;
443: else
444: error("missing %c\n", cbuf[0]);
445: } while (cbuf[0] == '"' && getchr() == '"');
446: *s++ = 0;
447: if (s >= &cbuf[sizeof cbuf])
448: error("%s literal too long\n",
449: cbuf[0] == '"' ? "string" : "character");
450: if (Aflag >= 2 && cbuf[0] == '"' && s - cbuf - 1 > 509)
451: warning("more than 509 characters in a string literal\n");
452: if (Aflag >= 2 && nbad)
453: warning("%s literal contains non-portable characters\n",
454: cbuf[0] == '"' ? "string" : "character");
455: token = cbuf;
456: tsym = &tval;
457: if (cbuf[0] == '"') {
458: tval.type = array(chartype, s - cbuf - 1, IR->structmetric.align);
459: tval.u.c.v.p = cbuf + 1;
460: return SCON;
461: } else {
462: if (s - cbuf > 3)
463: warning("excess characters in multibyte character literal `%S' ignored\n",
464: token, (char*)cp - token);
465: else if (s - cbuf <= 2)
466: error("missing '\n");
467: tval.type = inttype;
468: tval.u.c.v.i = cbuf[1];
469: return ICON;
470: } }
471: case '<':
472: if (*rcp == '=') return cp++, LEQ;
473: if (*rcp == '<') return cp++, LSHIFT;
474: return '<';
475: case '>':
476: if (*rcp == '=') return cp++, GEQ;
477: if (*rcp == '>') return cp++, RSHIFT;
478: return '>';
479: case '-':
480: if (*rcp == '>') return cp++, DEREF;
481: if (*rcp == '-') return cp++, DECR;
482: return '-';
483: case '=': return *rcp == '=' ? cp++, EQL : '=';
484: case '!': return *rcp == '=' ? cp++, NEQ : '!';
485: case '|': return *rcp == '|' ? cp++, OROR : '|';
486: case '&': return *rcp == '&' ? cp++, ANDAND : '&';
487: case '+': return *rcp == '+' ? cp++, INCR : '+';
488: case ';': case ',': case ':':
489: case '*': case '~': case '%': case '^': case '?':
490: case '[': case ']': case '{': case '}': case '(': case ')':
491: return rcp[-1];
492: #include "keywords.h"
493: id: if (limit - rcp < MAXLINE) {
494: cp = rcp - 1;
495: fillbuf();
496: rcp = ++cp;
497: }
498: assert(cp == rcp);
499: token = (char *)rcp - 1;
500: while (map[*rcp]&(DIGIT|LETTER))
501: rcp++;
502: token = stringn(token, (char *)rcp - token);
503: cp = rcp;
504: tsym = lookup(token, identifiers);
505: return ID;
506: default:
507: if ((map[cp[-1]]&BLANK) == 0)
508: if (cp[-1] < ' ' || cp[-1] >= 0177)
509: error("illegal character `\\0%o'\n", cp[-1]);
510: else
511: error("illegal character `%c'\n", cp[-1]);
512: }
513: }
514: return EOI;
515: }
516: /* icon - scan for tail of an integer constant n, return symbol */
517: static Symbol icon(n, overflow) unsigned n; {
518: int u = 0;
519:
520: if (*cp == 'u' || *cp == 'U')
521: u = *cp++;
522: if (*cp == 'l' || *cp == 'L')
523: *cp++;
524: if (u == 0 && *cp == 'u' || *cp == 'U')
525: u = *cp++;
526: if (overflow) {
527: warning("overflow in constant `%S'\n", token, (char*)cp - token);
528: n = INT_MAX;
529: }
530: if (u || n > (unsigned)INT_MAX) {
531: tval.type = unsignedtype;
532: tval.u.c.v.u = n;
533: } else {
534: tval.type = inttype;
535: tval.u.c.v.i = n;
536: }
537: ppnumber("integer");
538: return &tval;
539: }
540: static void ppnumber(kind) char *kind; {
541: unsigned char *rcp;
542:
543: for (rcp = cp--; (map[*cp]&(DIGIT|LETTER)) || *cp == '.'; cp++)
544: if ((cp[0] == 'E' || cp[0] == 'e')
545: && (cp[1] == '-' || cp[1] == '+'))
546: cp++;
547: if (cp > rcp)
548: error("`%S' is a preprocessing number but an invalid %s constant\n",
549: token, (char*)cp - token, kind);
550: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.