|
|
1.1 ! root 1: /* C compiler: lexical analysis */ ! 2: ! 3: #include "c.h" ! 4: ! 5: #ifdef __STDC__ ! 6: enum { BLANK=01, NEWLINE=02, LETTER=04, DIGIT=010, HEX=020, OTHER=040 }; ! 7: #else ! 8: #define BLANK 01 ! 9: #define NEWLINE 02 ! 10: #define LETTER 04 ! 11: #define DIGIT 010 ! 12: #define HEX 020 ! 13: #define OTHER 040 ! 14: #endif ! 15: char kind[] = { /* token kind, i.e., classification */ ! 16: #define xx(a,b,c,d,e,f,g) f, ! 17: #include "token.h" ! 18: }; ! 19: Coordinate src; /* current source coordinate */ ! 20: #ifdef __STDC__ ! 21: enum tokencode t; ! 22: #else ! 23: int t; ! 24: #endif ! 25: char *token; /* current token */ ! 26: Symbol tsym; /* symbol table entry for current token */ ! 27: ! 28: static struct symbol tval; /* symbol for constants */ ! 29: ! 30: static unsigned char map[256] = { ! 31: /* 000 nul */ 0, ! 32: /* 001 soh */ 0, ! 33: /* 002 stx */ 0, ! 34: /* 003 etx */ 0, ! 35: /* 004 eot */ 0, ! 36: /* 005 enq */ 0, ! 37: /* 006 ack */ 0, ! 38: /* 007 bel */ 0, ! 39: /* 010 bs */ 0, ! 40: /* 011 ht */ BLANK, ! 41: /* 012 nl */ NEWLINE, ! 42: /* 013 vt */ BLANK, ! 43: /* 014 ff */ BLANK, ! 44: /* 015 cr */ 0, ! 45: /* 016 so */ 0, ! 46: /* 017 si */ 0, ! 47: /* 020 dle */ 0, ! 48: /* 021 dc1 */ 0, ! 49: /* 022 dc2 */ 0, ! 50: /* 023 dc3 */ 0, ! 51: /* 024 dc4 */ 0, ! 52: /* 025 nak */ 0, ! 53: /* 026 syn */ 0, ! 54: /* 027 etb */ 0, ! 55: /* 030 can */ 0, ! 56: /* 031 em */ 0, ! 57: /* 032 sub */ 0, ! 58: /* 033 esc */ 0, ! 59: /* 034 fs */ 0, ! 60: /* 035 gs */ 0, ! 61: /* 036 rs */ 0, ! 62: /* 037 us */ 0, ! 63: /* 040 sp */ BLANK, ! 64: /* 041 ! */ OTHER, ! 65: /* 042 " */ OTHER, ! 66: /* 043 # */ OTHER, ! 67: /* 044 $ */ 0, ! 68: /* 045 % */ OTHER, ! 69: /* 046 & */ OTHER, ! 70: /* 047 ' */ OTHER, ! 71: /* 050 ( */ OTHER, ! 72: /* 051 ) */ OTHER, ! 73: /* 052 * */ OTHER, ! 74: /* 053 + */ OTHER, ! 75: /* 054 , */ OTHER, ! 76: /* 055 - */ OTHER, ! 77: /* 056 . */ OTHER, ! 78: /* 057 / */ OTHER, ! 79: /* 060 0 */ DIGIT, ! 80: /* 061 1 */ DIGIT, ! 81: /* 062 2 */ DIGIT, ! 82: /* 063 3 */ DIGIT, ! 83: /* 064 4 */ DIGIT, ! 84: /* 065 5 */ DIGIT, ! 85: /* 066 6 */ DIGIT, ! 86: /* 067 7 */ DIGIT, ! 87: /* 070 8 */ DIGIT, ! 88: /* 071 9 */ DIGIT, ! 89: /* 072 : */ OTHER, ! 90: /* 073 ; */ OTHER, ! 91: /* 074 < */ OTHER, ! 92: /* 075 = */ OTHER, ! 93: /* 076 > */ OTHER, ! 94: /* 077 ? */ OTHER, ! 95: /* 100 @ */ 0, ! 96: /* 101 A */ LETTER|HEX, ! 97: /* 102 B */ LETTER|HEX, ! 98: /* 103 C */ LETTER|HEX, ! 99: /* 104 D */ LETTER|HEX, ! 100: /* 105 E */ LETTER|HEX, ! 101: /* 106 F */ LETTER|HEX, ! 102: /* 107 G */ LETTER, ! 103: /* 110 H */ LETTER, ! 104: /* 111 I */ LETTER, ! 105: /* 112 J */ LETTER, ! 106: /* 113 K */ LETTER, ! 107: /* 114 L */ LETTER, ! 108: /* 115 M */ LETTER, ! 109: /* 116 N */ LETTER, ! 110: /* 117 O */ LETTER, ! 111: /* 120 P */ LETTER, ! 112: /* 121 Q */ LETTER, ! 113: /* 122 R */ LETTER, ! 114: /* 123 S */ LETTER, ! 115: /* 124 T */ LETTER, ! 116: /* 125 U */ LETTER, ! 117: /* 126 V */ LETTER, ! 118: /* 127 W */ LETTER, ! 119: /* 130 X */ LETTER, ! 120: /* 131 Y */ LETTER, ! 121: /* 132 Z */ LETTER, ! 122: /* 133 [ */ OTHER, ! 123: /* 134 \ */ OTHER, ! 124: /* 135 ] */ OTHER, ! 125: /* 136 ^ */ OTHER, ! 126: /* 137 _ */ LETTER, ! 127: /* 140 ` */ 0, ! 128: /* 141 a */ LETTER|HEX, ! 129: /* 142 b */ LETTER|HEX, ! 130: /* 143 c */ LETTER|HEX, ! 131: /* 144 d */ LETTER|HEX, ! 132: /* 145 e */ LETTER|HEX, ! 133: /* 146 f */ LETTER|HEX, ! 134: /* 147 g */ LETTER, ! 135: /* 150 h */ LETTER, ! 136: /* 151 i */ LETTER, ! 137: /* 152 j */ LETTER, ! 138: /* 153 k */ LETTER, ! 139: /* 154 l */ LETTER, ! 140: /* 155 m */ LETTER, ! 141: /* 156 n */ LETTER, ! 142: /* 157 o */ LETTER, ! 143: /* 160 p */ LETTER, ! 144: /* 161 q */ LETTER, ! 145: /* 162 r */ LETTER, ! 146: /* 163 s */ LETTER, ! 147: /* 164 t */ LETTER, ! 148: /* 165 u */ LETTER, ! 149: /* 166 v */ LETTER, ! 150: /* 167 w */ LETTER, ! 151: /* 170 x */ LETTER, ! 152: /* 171 y */ LETTER, ! 153: /* 172 z */ LETTER, ! 154: /* 173 { */ OTHER, ! 155: /* 174 | */ OTHER, ! 156: /* 175 } */ OTHER, ! 157: /* 176 ~ */ OTHER, ! 158: }; ! 159: #ifdef strtod ! 160: #define ERANGE 1 ! 161: static int errno; ! 162: #else ! 163: #include <errno.h> ! 164: #endif ! 165: dclproto(static int backslash,(int)); ! 166: dclproto(static Symbol fcon,(void)); ! 167: dclproto(static Symbol icon,(unsigned int, int)); ! 168: dclproto(static void ppnumber,(char *)); ! 169: /* backslash - get next character with \'s interpreted in q ... q */ ! 170: static int backslash(q) { ! 171: int c; ! 172: ! 173: switch (*cp++) { ! 174: case 'a': return 7; ! 175: case 'b': return '\b'; ! 176: case 'f': return '\f'; ! 177: case 'n': return '\n'; ! 178: case 'r': return '\r'; ! 179: case 't': return '\t'; ! 180: case 'v': return '\v'; ! 181: case '\'': case '"': case '\\': case '\?': break; ! 182: case 'x': { ! 183: int overflow = 0; ! 184: if ((map[*cp]&(DIGIT|HEX)) == 0) { ! 185: if (*cp < ' ' || *cp == 0177) ! 186: error("ill-formed hexadecimal escape sequence\n"); ! 187: else ! 188: error("ill-formed hexadecimal escape sequence `\\x%c'\n", *cp); ! 189: if (*cp != q) ! 190: cp++; ! 191: return 0; ! 192: } ! 193: for (c = 0; map[*cp]&(DIGIT|HEX); cp++) { ! 194: if (c&~((unsigned)-1 >> 4)) ! 195: overflow++; ! 196: if (map[*cp]&DIGIT) ! 197: c = (c<<4) + *cp - '0'; ! 198: else ! 199: c = (c<<4) + (*cp&~040) - 'A' + 10; ! 200: } ! 201: if (c&~0377 || overflow) ! 202: warning("overflow in hexadecimal escape sequence\n"); ! 203: return c&0377; ! 204: } ! 205: case '0': case '1': case '2': case '3': ! 206: case '4': case '5': case '6': case '7': ! 207: c = *(cp-1) - '0'; ! 208: if (*cp >= '0' && *cp <= '7') { ! 209: c = (c<<3) + *cp++ - '0'; ! 210: if (*cp >= '0' && *cp <= '7') ! 211: c = (c<<3) + *cp++ - '0'; ! 212: } ! 213: if (c&~0377) ! 214: warning("overflow in octal escape sequence\n"); ! 215: return c&0377; ! 216: default: ! 217: if (cp[-1] < ' ' || cp[-1] >= 0177) ! 218: warning("unrecognized character escape sequence\n"); ! 219: else ! 220: warning("unrecognized character escape sequence `\\%c'\n", cp[-1]); ! 221: } ! 222: return cp[-1]; ! 223: } ! 224: /* fcon - scan for tail of a floating constant, return symbol */ ! 225: static Symbol fcon() { ! 226: char *s = token; ! 227: int n = 0; ! 228: ! 229: while (s < (char *)cp) ! 230: n += *s++ - '0'; ! 231: if (*cp == '.') ! 232: for (cp++; map[*cp]&DIGIT; cp++) ! 233: n += *cp - '0'; ! 234: if (*cp == 'e' || *cp == 'E') { ! 235: if (*++cp == '-' || *cp == '+') ! 236: cp++; ! 237: if (map[*cp]&DIGIT) ! 238: do cp++; while (map[*cp]&DIGIT); ! 239: else ! 240: error("invalid floating constant `%S'\n", token, (char*)cp - token); ! 241: } ! 242: if (n == 0) ! 243: tval.u.c.v.d = 0.0; ! 244: else { ! 245: errno = 0; ! 246: tval.u.c.v.d = strtod(token, (char **)0); ! 247: if (errno == ERANGE) ! 248: warning("overflow in floating constant `%S'\n", token, (char*)cp - token); ! 249: } ! 250: if (*cp == 'f' || *cp == 'F') { ! 251: ++cp; ! 252: if (tval.u.c.v.d > FLT_MAX) ! 253: warning("overflow in floating constant `%S'\n", token, (char*)cp - token); ! 254: tval.type = floattype; ! 255: tval.u.c.v.f = tval.u.c.v.d; ! 256: } else if (*cp == 'l' || *cp == 'L') { ! 257: cp++; ! 258: tval.type = longdouble; ! 259: } else ! 260: tval.type = doubletype; ! 261: ppnumber("floating"); ! 262: return &tval; ! 263: } ! 264: ! 265: /* getchr - return next significant character */ ! 266: int getchr() { ! 267: while (*cp) { ! 268: while (map[*cp]&BLANK) ! 269: cp++; ! 270: if (!(map[*cp]&NEWLINE)) ! 271: return *cp; ! 272: cp++; ! 273: nextline(); ! 274: } ! 275: return EOI; ! 276: } ! 277: ! 278: /* gettok - return next token */ ! 279: int gettok() { ! 280: while (*cp) { ! 281: register unsigned char *rcp = cp; ! 282: while (map[*rcp]&BLANK) ! 283: rcp++; ! 284: if (limit - rcp < MAXTOKEN) { ! 285: cp = rcp; ! 286: fillbuf(); ! 287: rcp = cp; ! 288: } ! 289: src.file = file; ! 290: src.x = (char *)rcp - line; ! 291: src.y = lineno; ! 292: cp = rcp + 1; ! 293: switch (*rcp++) { ! 294: case '\n': case '\v': case '\r': case '\f': ! 295: nextline(); ! 296: continue; ! 297: case '/': if (*rcp == '*') { ! 298: int c = 0; ! 299: for (rcp++; *rcp && (*rcp != '/' || c != '*'); ) ! 300: if (map[*rcp]&NEWLINE) { ! 301: if (rcp < limit) ! 302: c = *rcp; ! 303: cp = rcp + 1; ! 304: nextline(); ! 305: rcp = cp; ! 306: } else ! 307: c = *rcp++; ! 308: if (*rcp) ! 309: rcp++; ! 310: else ! 311: error("unclosed comment\n"); ! 312: cp = rcp; ! 313: continue; ! 314: } ! 315: return '/'; ! 316: case '.': if (rcp[0] == '.' && rcp[1] == '.') ! 317: return cp += 2, ELLIPSIS; ! 318: if ((map[*rcp]&DIGIT) == 0) ! 319: return '.'; ! 320: if (limit - rcp < MAXLINE) { ! 321: cp = rcp - 1; ! 322: fillbuf(); ! 323: rcp = ++cp; ! 324: } ! 325: assert(cp == rcp); ! 326: cp = rcp - 1; ! 327: token = (char *)cp; ! 328: tsym = fcon(); ! 329: return FCON; ! 330: case '0': case '1': case '2': case '3': case '4': ! 331: case '5': case '6': case '7': case '8': case '9': ! 332: { unsigned int n = 0; ! 333: if (limit - rcp < MAXLINE) { ! 334: cp = rcp - 1; ! 335: fillbuf(); ! 336: rcp = ++cp; ! 337: } ! 338: assert(cp == rcp); ! 339: token = (char *)rcp - 1; ! 340: if (*token == '0' && (*rcp == 'x' || *rcp == 'X')) { ! 341: int d, overflow = 0; ! 342: while (*++rcp) { ! 343: if (map[*rcp]&DIGIT) ! 344: d = *rcp - '0'; ! 345: else if (*rcp >= 'a' && *rcp <= 'f') ! 346: d = *rcp - 'a' + 10; ! 347: else if (*rcp >= 'A' && *rcp <= 'F') ! 348: d = *rcp - 'A' + 10; ! 349: else ! 350: break; ! 351: if (n&~((unsigned)-1 >> 4)) ! 352: overflow = 1; ! 353: else ! 354: n = (n<<4) + d; ! 355: } ! 356: if ((char *)rcp - token <= 2) ! 357: error("invalid hexadecimal constant `%S'\n", token, (char *)rcp - token); ! 358: cp = rcp; ! 359: tsym = icon(n, overflow); ! 360: return ICON; ! 361: } else if (*token == '0') { ! 362: int err = 0, overflow = 0; ! 363: for ( ; map[*rcp]&DIGIT; rcp++) { ! 364: if (*rcp == '8' || *rcp == '9') ! 365: err = 1; ! 366: if (n&~((unsigned)-1 >> 3)) ! 367: overflow = 1; ! 368: else ! 369: n = (n<<3) + (unsigned)(*rcp - '0'); ! 370: } ! 371: cp = rcp; ! 372: if (*rcp == '.' || *rcp == 'e' || *rcp == 'E') { ! 373: tsym = fcon(); ! 374: return FCON; ! 375: } ! 376: if (err) ! 377: error("invalid octal constant `%S'\n", token, (char*)cp - token); ! 378: tsym = icon(n, overflow); ! 379: return ICON; ! 380: } else { ! 381: int overflow = 0; ! 382: for (n = *token - '0'; map[*rcp]&DIGIT; ) { ! 383: int d = *rcp++ - '0'; ! 384: if (n > ((unsigned)UINT_MAX - d)/10) ! 385: overflow = 1; ! 386: else ! 387: n = 10*n + d; ! 388: } ! 389: cp = rcp; ! 390: if (*rcp == '.' || *rcp == 'e' || *rcp == 'E') { ! 391: tsym = fcon(); ! 392: return FCON; ! 393: } ! 394: tsym = icon(n, overflow); ! 395: return ICON; ! 396: } } ! 397: case 'L': if (*rcp == '\'') { ! 398: int t = gettok(); ! 399: assert(t == ICON); ! 400: src.x--; ! 401: tval.type = unsignedchar; ! 402: tval.u.c.v.uc = tval.u.c.v.i; ! 403: return t; ! 404: } ! 405: if (*rcp != '"') ! 406: goto id; ! 407: rcp++; ! 408: goto scon; ! 409: scon: ! 410: case '\'': case '"': ! 411: { static char cbuf[BUFSIZE+1]; ! 412: char *s = cbuf; ! 413: int nbad = 0; ! 414: *s++ = *--rcp; ! 415: cp = rcp; ! 416: do { ! 417: cp++; ! 418: while (*cp && *cp != cbuf[0]) { ! 419: int c = *cp++; ! 420: if (map[c]&NEWLINE) { ! 421: if (cp <= limit) ! 422: break; ! 423: nextline(); ! 424: continue; ! 425: } ! 426: if (c == '\\') { ! 427: if (map[*cp]&NEWLINE) { ! 428: if (cp < limit) ! 429: break; ! 430: cp++; ! 431: nextline(); ! 432: } ! 433: if (limit - cp < MAXTOKEN) ! 434: fillbuf(); ! 435: c = backslash(cbuf[0]); ! 436: } else if (map[c] == 0) ! 437: nbad++; ! 438: if (s < &cbuf[sizeof cbuf] - 2) ! 439: *s++ = c; ! 440: } ! 441: if (*cp == cbuf[0]) ! 442: cp++; ! 443: else ! 444: error("missing %c\n", cbuf[0]); ! 445: } while (cbuf[0] == '"' && getchr() == '"'); ! 446: *s++ = 0; ! 447: if (s >= &cbuf[sizeof cbuf]) ! 448: error("%s literal too long\n", ! 449: cbuf[0] == '"' ? "string" : "character"); ! 450: if (Aflag >= 2 && cbuf[0] == '"' && s - cbuf - 1 > 509) ! 451: warning("more than 509 characters in a string literal\n"); ! 452: if (Aflag >= 2 && nbad) ! 453: warning("%s literal contains non-portable characters\n", ! 454: cbuf[0] == '"' ? "string" : "character"); ! 455: token = cbuf; ! 456: tsym = &tval; ! 457: if (cbuf[0] == '"') { ! 458: tval.type = array(chartype, s - cbuf - 1, IR->structmetric.align); ! 459: tval.u.c.v.p = cbuf + 1; ! 460: return SCON; ! 461: } else { ! 462: if (s - cbuf > 3) ! 463: warning("excess characters in multibyte character literal `%S' ignored\n", ! 464: token, (char*)cp - token); ! 465: else if (s - cbuf <= 2) ! 466: error("missing '\n"); ! 467: tval.type = inttype; ! 468: tval.u.c.v.i = cbuf[1]; ! 469: return ICON; ! 470: } } ! 471: case '<': ! 472: if (*rcp == '=') return cp++, LEQ; ! 473: if (*rcp == '<') return cp++, LSHIFT; ! 474: return '<'; ! 475: case '>': ! 476: if (*rcp == '=') return cp++, GEQ; ! 477: if (*rcp == '>') return cp++, RSHIFT; ! 478: return '>'; ! 479: case '-': ! 480: if (*rcp == '>') return cp++, DEREF; ! 481: if (*rcp == '-') return cp++, DECR; ! 482: return '-'; ! 483: case '=': return *rcp == '=' ? cp++, EQL : '='; ! 484: case '!': return *rcp == '=' ? cp++, NEQ : '!'; ! 485: case '|': return *rcp == '|' ? cp++, OROR : '|'; ! 486: case '&': return *rcp == '&' ? cp++, ANDAND : '&'; ! 487: case '+': return *rcp == '+' ? cp++, INCR : '+'; ! 488: case ';': case ',': case ':': ! 489: case '*': case '~': case '%': case '^': case '?': ! 490: case '[': case ']': case '{': case '}': case '(': case ')': ! 491: return rcp[-1]; ! 492: #include "keywords.h" ! 493: id: if (limit - rcp < MAXLINE) { ! 494: cp = rcp - 1; ! 495: fillbuf(); ! 496: rcp = ++cp; ! 497: } ! 498: assert(cp == rcp); ! 499: token = (char *)rcp - 1; ! 500: while (map[*rcp]&(DIGIT|LETTER)) ! 501: rcp++; ! 502: token = stringn(token, (char *)rcp - token); ! 503: cp = rcp; ! 504: tsym = lookup(token, identifiers); ! 505: return ID; ! 506: default: ! 507: if ((map[cp[-1]]&BLANK) == 0) ! 508: if (cp[-1] < ' ' || cp[-1] >= 0177) ! 509: error("illegal character `\\0%o'\n", cp[-1]); ! 510: else ! 511: error("illegal character `%c'\n", cp[-1]); ! 512: } ! 513: } ! 514: return EOI; ! 515: } ! 516: /* icon - scan for tail of an integer constant n, return symbol */ ! 517: static Symbol icon(n, overflow) unsigned n; { ! 518: int u = 0; ! 519: ! 520: if (*cp == 'u' || *cp == 'U') ! 521: u = *cp++; ! 522: if (*cp == 'l' || *cp == 'L') ! 523: *cp++; ! 524: if (u == 0 && *cp == 'u' || *cp == 'U') ! 525: u = *cp++; ! 526: if (overflow) { ! 527: warning("overflow in constant `%S'\n", token, (char*)cp - token); ! 528: n = INT_MAX; ! 529: } ! 530: if (u || n > (unsigned)INT_MAX) { ! 531: tval.type = unsignedtype; ! 532: tval.u.c.v.u = n; ! 533: } else { ! 534: tval.type = inttype; ! 535: tval.u.c.v.i = n; ! 536: } ! 537: ppnumber("integer"); ! 538: return &tval; ! 539: } ! 540: static void ppnumber(kind) char *kind; { ! 541: unsigned char *rcp; ! 542: ! 543: for (rcp = cp--; (map[*cp]&(DIGIT|LETTER)) || *cp == '.'; cp++) ! 544: if ((cp[0] == 'E' || cp[0] == 'e') ! 545: && (cp[1] == '-' || cp[1] == '+')) ! 546: cp++; ! 547: if (cp > rcp) ! 548: error("`%S' is a preprocessing number but an invalid %s constant\n", ! 549: token, (char*)cp - token, kind); ! 550: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.