|
|
1.1 ! root 1: /* ! 2: * The lexical analyzer. ! 3: */ ! 4: ! 5: #include "itran.h" ! 6: #include "token.h" ! 7: #include "lex.h" ! 8: #include "char.h" ! 9: #include "tree.h" ! 10: ! 11: int tline; ! 12: int tcol; ! 13: ! 14: /* ! 15: * yylex - find the next token in the input stream, and return its token ! 16: * type and value to the parser. ! 17: * ! 18: * Variables of interest: ! 19: * ! 20: * cc - character following last token. ! 21: * comflag - set if in a comment. ! 22: * nlflag - set if a newline was between the last token and the current token ! 23: * lastend - set if the last token was an ENDER. ! 24: * lastval - when a semicolon is inserted and returned, lastval gets the ! 25: * token value that would have been returned if the semicolon hadn't ! 26: * been inserted. ! 27: */ ! 28: ! 29: yylex() ! 30: { ! 31: register struct toktab *t; ! 32: register int c; ! 33: int nlflag; ! 34: int comflag; ! 35: static struct toktab *lasttok = NULL; ! 36: static nodeptr lastval; ! 37: static int lastend = 0; ! 38: static int eofflag = 0; ! 39: static int lastline = 0; ! 40: static int cc = '\n'; ! 41: extern struct toktab *getident(), *getnum(), *getstring(), *getop(); ! 42: ! 43: if (lasttok != NULL) { ! 44: /* ! 45: * A semicolon was inserted and returned on the last call to yylex, ! 46: * instead of going to the input, return lasttok and set the ! 47: * appropriate variables. ! 48: */ ! 49: yylval = lastval; ! 50: tline = LINE(lastval); ! 51: tcol = COL(lastval); ! 52: t = lasttok; ! 53: goto ret; ! 54: } ! 55: nlflag = 0; ! 56: comflag = 0; ! 57: loop: ! 58: c = cc; ! 59: /* ! 60: * Skip whitespace and comments. ! 61: */ ! 62: while (c != EOF && (comflag || c == COMMENT || isspace(c))) { ! 63: if (c == '\n') { ! 64: nlflag++; ! 65: comflag = 0; ! 66: } ! 67: else if (c == COMMENT) ! 68: comflag++; ! 69: c = NEXTCHAR; ! 70: } ! 71: /* ! 72: * A token is the next thing in the input. Record the last line number ! 73: * and set tline and tcol to the current line and column. ! 74: */ ! 75: lastline = tline; ! 76: tline = inline; ! 77: tcol = incol; ! 78: ! 79: if (c == EOF) { ! 80: /* ! 81: * End of file has been reached. Set eofflag, return T_EOF, and ! 82: * set cc to EOF so that any subsequent scans also return T_EOF. ! 83: */ ! 84: if (eofflag++) { ! 85: eofflag = 0; ! 86: cc = '\n'; ! 87: return (int) (yylval = 0); ! 88: } ! 89: cc = EOF; ! 90: t = T_EOF; ! 91: yylval = 0; ! 92: goto ret; ! 93: } ! 94: ! 95: /* ! 96: * Look at current input character to determine what class of token ! 97: * is next and take the appropriate action. Note that the various ! 98: * token gathering routines write a value into cc. ! 99: */ ! 100: c = ctran[c]; ! 101: if (isalpha(c)) { /* gather ident or reserved word */ ! 102: if ((t = getident(c, &cc)) == NULL) ! 103: goto loop; ! 104: } ! 105: else if (isdigit(c)) { /* gather numeric literal */ ! 106: if ((t = getnum(c, &cc)) == NULL) ! 107: goto loop; ! 108: } ! 109: else if (c == '"' || c == '\'') { /* gather string or cset literal */ ! 110: if ((t = getstring(c, &cc)) == NULL) ! 111: goto loop; ! 112: } ! 113: else { /* gather longest legal operator */ ! 114: if ((t = getop(c, &cc)) == NULL) ! 115: goto loop; ! 116: yylval = OPNODE(t->t_type); ! 117: } ! 118: if (nlflag && lastend && (t->t_flags & BEGINNER)) { ! 119: /* ! 120: * A newline was encountered between the current token and the last, ! 121: * the last token was an ENDER, and the current token is a BEGINNER. ! 122: * Return a semicolon and save the current token in lastval. ! 123: */ ! 124: lastval = yylval; ! 125: lasttok = t; ! 126: tline = lastline; ! 127: tcol = 0; ! 128: yylval = OPNODE(SEMICOL); ! 129: return (SEMICOL); ! 130: } ! 131: ret: ! 132: /* ! 133: * Clear lasttok, set lastend if the token being returned is an ! 134: * ENDER, and return the token. ! 135: */ ! 136: lasttok = 0; ! 137: lastend = t->t_flags & ENDER; ! 138: return (t->t_type); ! 139: } ! 140: ! 141: /* ! 142: * getident - gather an identifier beginning with ac. The character ! 143: * following identifier goes in cc. ! 144: */ ! 145: ! 146: struct toktab *getident(ac, cc) ! 147: char ac; ! 148: int *cc; ! 149: { ! 150: register c; ! 151: register char *p; ! 152: register struct toktab *t; ! 153: extern char *putident(); ! 154: extern struct toktab *findres(); ! 155: ! 156: c = ac; ! 157: p = sfree; ! 158: /* ! 159: * Copy characters into string space until a non-alphanumeric character ! 160: * is found. ! 161: */ ! 162: do { ! 163: if (p >= send) ! 164: syserr("out of string space"); ! 165: *p++ = c; ! 166: c = ctran[NEXTCHAR]; ! 167: } while (isalnum(c)); ! 168: if (p >= send) ! 169: syserr("out of string space"); ! 170: *p++ = 0; ! 171: *cc = c; ! 172: /* ! 173: * If the identifier is a reserved word, make a RESNODE for it and return ! 174: * the token value. Otherwise, install it with putident, make an ! 175: * IDNODE for it, and return. ! 176: */ ! 177: if ((t = findres()) != NULL) { ! 178: yylval = RESNODE(t->t_type); ! 179: return (t); ! 180: } ! 181: else { ! 182: yylval = IDNODE((int)putident(p-sfree)); ! 183: return (T_IDENT); ! 184: } ! 185: } ! 186: ! 187: /* ! 188: * findres - if the string just copied into the string space by getident ! 189: * is a reserved word, return a pointer to its entry in the token table. ! 190: * Return NULL if the string isn't a reserved word. ! 191: */ ! 192: ! 193: struct toktab *findres() ! 194: { ! 195: register struct toktab *t; ! 196: register char c, *p; ! 197: ! 198: p = sfree; ! 199: c = *p; ! 200: if (!islower(c)) ! 201: return (NULL); ! 202: /* ! 203: * Point t at first reserved word that starts with c (if any). ! 204: */ ! 205: if ((t = restab[c - '_']) == NULL) ! 206: return (NULL); ! 207: /* ! 208: * Search through reserved words, stopping when a match is found ! 209: * or when the current reserved word doesn't start with c. ! 210: */ ! 211: while (t->t_word[0] == c) { ! 212: if (strcmp(t->t_word, p) == 0) ! 213: return (t); ! 214: t++; ! 215: } ! 216: return (NULL); ! 217: } ! 218: ! 219: /* ! 220: * getnum - gather a numeric literal starting with ac and put the ! 221: * character following the literal into *cc. ! 222: */ ! 223: ! 224: struct toktab *getnum(ac, cc) ! 225: char ac; ! 226: int *cc; ! 227: { ! 228: register c; ! 229: register r; ! 230: register state; ! 231: char *p; ! 232: int realflag; ! 233: extern char *putident(); ! 234: ! 235: c = ac; ! 236: r = tonum(c); ! 237: p = sfree; ! 238: state = 0; ! 239: realflag = 0; ! 240: for (;;) { ! 241: if (p >= send) ! 242: syserr("out of string space"); ! 243: *p++ = c; ! 244: c = ctran[NEXTCHAR]; ! 245: switch (state) { ! 246: case 0: /* integer part */ ! 247: if (isdigit(c)) { r = r * 10 + tonum(c); continue; } ! 248: if (c == '.') { state = 1; realflag++; continue; } ! 249: if (tolower(c) == 'e') { state = 2; realflag++; continue; } ! 250: if (tolower(c) == 'r') { ! 251: state = 5; ! 252: if (r < 2 || r > 36) ! 253: err("invalid radix for integer literal", 0); ! 254: continue; ! 255: } ! 256: break; ! 257: case 1: /* fractional part */ ! 258: if (isdigit(c)) continue; ! 259: if (tolower(c) == 'e') { state = 2; continue; } ! 260: break; ! 261: case 2: /* optional exponent sign */ ! 262: if (c == '+' || c == '-') { state = 3; continue; } ! 263: case 3: /* first digit after e, e+, or e- */ ! 264: if (isdigit(c)) { state = 4; continue; } ! 265: err("invalid real literal", 0); ! 266: break; ! 267: case 4: /* remaining digits after e */ ! 268: if (isdigit(c)) continue; ! 269: break; ! 270: case 5: /* first digit after r */ ! 271: if ((isdigit(c) || isletter(c)) && tonum(c) < r) ! 272: { state = 6; continue; } ! 273: err("invalid integer literal", 0); ! 274: break; ! 275: case 6: /* remaining digits after r */ ! 276: if (isdigit(c) || isletter(c)) { ! 277: if (tonum(c) >= r) { /* illegal digit for radix r */ ! 278: err("invalid digit in integer literal", 0); ! 279: r = tonum('z'); /* prevent more messages */ ! 280: } ! 281: continue; ! 282: } ! 283: break; ! 284: } ! 285: break; ! 286: } ! 287: if (p >= send) ! 288: syserr("out of string space"); ! 289: *p++ = 0; ! 290: *cc = c; ! 291: if (realflag) { ! 292: yylval = REALNODE((int)putident(p-sfree)); ! 293: return (T_REAL); ! 294: } ! 295: yylval = INTNODE((int)putident(p-sfree)); ! 296: return (T_INT); ! 297: } ! 298: ! 299: /* ! 300: * getstring - gather a string literal starting with ac and place the ! 301: * character following the literal in *cc. ! 302: */ ! 303: ! 304: struct toktab *getstring(ac, cc) ! 305: char ac; ! 306: int *cc; ! 307: { ! 308: register c, sc; ! 309: register char *p; ! 310: char *lc; ! 311: extern char *putident(); ! 312: ! 313: sc = c = ac; ! 314: p = sfree; ! 315: lc = 0; ! 316: while ((c = NEXTCHAR) != sc && c != '\n' && c != EOF) { ! 317: contin: ! 318: if (c == '_') ! 319: lc = p; ! 320: else if (!isspace(c)) ! 321: lc = 0; ! 322: if (ctran[c] == ESCAPE) { ! 323: c = NEXTCHAR; ! 324: if (isoctal(c)) ! 325: c = octesc(c); ! 326: else if (ctran[c] == 'x') ! 327: c = hexesc(); ! 328: else if (ctran[c] == '^') ! 329: c = ctlesc(); ! 330: else ! 331: c = esctab[c]; ! 332: if (c == EOF) ! 333: goto noquote; ! 334: } ! 335: if (p >= send) ! 336: syserr("out of string space"); ! 337: *p++ = c; ! 338: } ! 339: if (p >= send) ! 340: syserr("out of string space"); ! 341: *p++ = 0; ! 342: if (c == sc) ! 343: *cc = ' '; ! 344: else { ! 345: if (c == '\n' && lc) { ! 346: p = lc; ! 347: while ((c = NEXTCHAR) != EOF && isspace(c)) ; ! 348: if (c != EOF) ! 349: goto contin; ! 350: } ! 351: noquote: ! 352: err("unclosed quote", 0); ! 353: *cc = c; ! 354: } ! 355: if (ac == '"') { /* a string literal */ ! 356: yylval = STRNODE((int)putident(p-sfree), p-sfree); ! 357: return (T_STRING); ! 358: } ! 359: else { /* a cset literal */ ! 360: yylval = CSETNODE((int)putident(p-sfree), p-sfree); ! 361: return (T_CSET); ! 362: } ! 363: } ! 364: ! 365: /* ! 366: * ctlesc - translate a control escape -- backslash followed by ! 367: * caret and one character. ! 368: */ ! 369: ! 370: ctlesc() ! 371: { ! 372: register c; ! 373: ! 374: c = NEXTCHAR; ! 375: if (c == EOF) ! 376: return (EOF); ! 377: return (c & 037); ! 378: } ! 379: ! 380: /* ! 381: * octesc - translate an octal escape -- backslash followed by ! 382: * one, two, or three octal digits. ! 383: */ ! 384: ! 385: octesc(ac) ! 386: char ac; ! 387: { ! 388: register c, nc, i; ! 389: ! 390: c = 0; ! 391: nc = ac; ! 392: i = 1; ! 393: do { ! 394: c = (c << 3) | (nc - '0'); ! 395: nc = NEXTCHAR; ! 396: if (nc == EOF) ! 397: return (EOF); ! 398: } while (isoctal(nc) && i++ < 3); ! 399: PUSHCHAR(nc); ! 400: return (c & 0377); ! 401: } ! 402: ! 403: /* ! 404: * hexesc - translate a hexadecimal escape -- backslash-x ! 405: * followed by one or two hexadecimal digits. ! 406: */ ! 407: ! 408: hexesc() ! 409: { ! 410: register c, nc, i; ! 411: ! 412: c = 0; ! 413: i = 0; ! 414: while (i++ < 2) { ! 415: nc = NEXTCHAR; ! 416: if (nc == EOF) ! 417: return (EOF); ! 418: if (nc >= 'a' && nc <= 'f') ! 419: nc -= 'a' - 10; ! 420: else if (nc >= 'A' && nc <= 'F') ! 421: nc -= 'A' - 10; ! 422: else if (isdigit(nc)) ! 423: nc -= '0'; ! 424: else { ! 425: PUSHCHAR(nc); ! 426: break; ! 427: } ! 428: c = (c << 4) | nc; ! 429: } ! 430: return (c); ! 431: } ! 432: ! 433: /* ! 434: * getop - find the longest legal operator and return a pointer ! 435: * to its entry in the token table. The tour describes the ! 436: * operator recognition process in detail. ! 437: */ ! 438: ! 439: struct toktab *getop(ac, cc) ! 440: char ac; ! 441: int *cc; ! 442: { ! 443: register struct optab *state; ! 444: register char c, i; ! 445: ! 446: state = state0; ! 447: c = ac; ! 448: for (;;) { ! 449: while ((i = state->o_input) && c != i) ! 450: state++; ! 451: switch (state->o_action) { ! 452: case A_GOTO: ! 453: state = (struct optab *) state->o_val; ! 454: c = ctran[NEXTCHAR]; ! 455: continue; ! 456: case A_ERROR: ! 457: err("invalid character", 0); ! 458: *cc = ' '; ! 459: return (NULL); ! 460: case A_RETURN: ! 461: *cc = c; ! 462: return (struct toktab *) (state->o_val); ! 463: case A_IMMRET: ! 464: *cc = ' '; ! 465: return (struct toktab *) (state->o_val); ! 466: } ! 467: } ! 468: } ! 469: ! 470: /* ! 471: * nextchar - return the next character in the input. ! 472: */ ! 473: ! 474: nextchar() ! 475: { ! 476: register char c; ! 477: ! 478: if (c = peekc) { ! 479: peekc = 0; ! 480: return (c); ! 481: } ! 482: c = getc(infile); ! 483: switch (c) { ! 484: case EOF: ! 485: inline = 0; ! 486: incol = 0; ! 487: break; ! 488: case '\n': ! 489: inline++; ! 490: incol = 0; ! 491: break; ! 492: case '\t': ! 493: incol = (incol | 7) + 1; ! 494: break; ! 495: case '\b': ! 496: if (incol) ! 497: incol--; ! 498: break; ! 499: default: ! 500: incol++; ! 501: } ! 502: return (c); ! 503: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.