|
|
1.1 ! root 1: /* Token-reader for Bison's input parser, ! 2: Copyright (C) 1984, 1986 Bob Corbett and Free Software Foundation, Inc. ! 3: ! 4: BISON is distributed in the hope that it will be useful, but WITHOUT ANY ! 5: WARRANTY. No author or distributor accepts responsibility to anyone ! 6: for the consequences of using it or for whether it serves any ! 7: particular purpose or works at all, unless he says so in writing. ! 8: Refer to the BISON General Public License for full details. ! 9: ! 10: Everyone is granted permission to copy, modify and redistribute BISON, ! 11: but only under the conditions described in the BISON General Public ! 12: License. A copy of this license is supposed to have been given to you ! 13: along with BISON so you can know your rights and responsibilities. It ! 14: should be in a file named COPYING. Among other things, the copyright ! 15: notice and this notice must be preserved on all copies. ! 16: ! 17: In other words, you are welcome to use, share and improve this program. ! 18: You are forbidden to forbid anyone else to use, share and improve ! 19: what you give them. Help stamp out software-hoarding! */ ! 20: ! 21: /* ! 22: lex() is the entry point. It is called from reader.c. ! 23: It returns one of the token-type codes defined in lex.h. ! 24: When an identifier is seen, the code IDENTIFIER is returned ! 25: and the name is looked up in the symbol table using symtab.c; ! 26: symval is set to a pointer to the entry found. */ ! 27: ! 28: #include <stdio.h> ! 29: #include <ctype.h> ! 30: #include "files.h" ! 31: #include "symtab.h" ! 32: #include "lex.h" ! 33: ! 34: ! 35: extern int lineno; ! 36: extern int translations; ! 37: ! 38: ! 39: char token_buffer[MAXTOKEN + 1]; ! 40: bucket *symval; ! 41: int numval; ! 42: ! 43: static int unlexed; /* these two describe a token to be reread */ ! 44: static bucket *unlexed_symval; /* by the next call to lex */ ! 45: ! 46: ! 47: ! 48: init_lex() ! 49: { ! 50: unlexed = -1; ! 51: } ! 52: ! 53: ! 54: ! 55: int ! 56: skip_white_space() ! 57: { ! 58: register int c; ! 59: register int inside; ! 60: ! 61: c = getc(finput); ! 62: ! 63: for (;;) ! 64: { ! 65: switch (c) ! 66: { ! 67: case '/': ! 68: c = getc(finput); ! 69: if (c != '*') ! 70: fatals("unexpected '/%c' found",c); ! 71: ! 72: c = getc(finput); ! 73: ! 74: inside = 1; ! 75: while (inside) ! 76: { ! 77: if (c == '*') ! 78: { ! 79: while (c == '*') ! 80: c = getc(finput); ! 81: ! 82: if (c == '/') ! 83: { ! 84: inside = 0; ! 85: c = getc(finput); ! 86: } ! 87: } ! 88: else if (c == '\n') ! 89: { ! 90: lineno++; ! 91: c = getc(finput); ! 92: } ! 93: else if (c == EOF) ! 94: fatal("unterminated comment"); ! 95: else ! 96: c = getc(finput); ! 97: } ! 98: ! 99: break; ! 100: ! 101: case '\n': ! 102: lineno++; ! 103: ! 104: case ' ': ! 105: case '\t': ! 106: case '\f': ! 107: c = getc(finput); ! 108: break; ! 109: ! 110: default: ! 111: return (c); ! 112: } ! 113: } ! 114: } ! 115: ! 116: ! 117: ! 118: unlex(token) ! 119: int token; ! 120: { ! 121: unlexed = token; ! 122: unlexed_symval = symval; ! 123: } ! 124: ! 125: ! 126: ! 127: int ! 128: lex() ! 129: { ! 130: register int c; ! 131: register char *p; ! 132: ! 133: if (unlexed >= 0) ! 134: { ! 135: symval = unlexed_symval; ! 136: c = unlexed; ! 137: unlexed = -1; ! 138: return (c); ! 139: } ! 140: ! 141: c = skip_white_space(); ! 142: ! 143: switch (c) ! 144: { ! 145: case EOF: ! 146: return (ENDFILE); ! 147: ! 148: case 'A': case 'B': case 'C': case 'D': case 'E': ! 149: case 'F': case 'G': case 'H': case 'I': case 'J': ! 150: case 'K': case 'L': case 'M': case 'N': case 'O': ! 151: case 'P': case 'Q': case 'R': case 'S': case 'T': ! 152: case 'U': case 'V': case 'W': case 'X': case 'Y': ! 153: case 'Z': ! 154: case 'a': case 'b': case 'c': case 'd': case 'e': ! 155: case 'f': case 'g': case 'h': case 'i': case 'j': ! 156: case 'k': case 'l': case 'm': case 'n': case 'o': ! 157: case 'p': case 'q': case 'r': case 's': case 't': ! 158: case 'u': case 'v': case 'w': case 'x': case 'y': ! 159: case 'z': ! 160: case '.': case '_': ! 161: p = token_buffer; ! 162: while (isalnum(c) || c == '_' || c == '.') ! 163: { ! 164: if (p < token_buffer + MAXTOKEN) ! 165: *p++ = c; ! 166: c = getc(finput); ! 167: } ! 168: ! 169: *p = 0; ! 170: ungetc(c, finput); ! 171: symval = getsym(token_buffer); ! 172: return (IDENTIFIER); ! 173: ! 174: case '0': case '1': case '2': case '3': case '4': ! 175: case '5': case '6': case '7': case '8': case '9': ! 176: { ! 177: numval = 0; ! 178: ! 179: while (isdigit(c)) ! 180: { ! 181: numval = numval*10 + c - '0'; ! 182: c = getc(finput); ! 183: } ! 184: ungetc(c, finput); ! 185: return (NUMBER); ! 186: } ! 187: ! 188: case '\'': ! 189: translations = -1; ! 190: ! 191: /* parse the literal token and compute character code in code */ ! 192: ! 193: c = getc(finput); ! 194: { ! 195: register int code = 0; ! 196: ! 197: if (c == '\\') ! 198: { ! 199: c = getc(finput); ! 200: ! 201: if (c <= '7' && c >= '0') ! 202: { ! 203: while (c <= '7' && c >= '0') ! 204: { ! 205: code = (code * 8) + (c - '0'); ! 206: c = getc(finput); ! 207: } ! 208: if (code >= 128 || code<0)/* JF this said if(c>=128) */ ! 209: fatals("malformatted literal token '\\%03o'",code); ! 210: } ! 211: else ! 212: { ! 213: if (c == 't') ! 214: code = '\t'; ! 215: else if (c == 'n') ! 216: code = '\n'; ! 217: else if (c == 'r') ! 218: code = '\r'; ! 219: else if (c == 'f') ! 220: code = '\f'; ! 221: else if (c == 'b') ! 222: code = '\b'; ! 223: else if (c == '\\') ! 224: code = '\\'; ! 225: else if (c == '\'') ! 226: code = '\''; ! 227: else if (c == '\"') /* JF this is a good idea */ ! 228: code = '\"'; ! 229: else fatals("invalid literal token '\\%c'",c); ! 230: c = getc(finput); ! 231: } ! 232: } ! 233: else ! 234: { ! 235: code = c; ! 236: c = getc(finput); ! 237: } ! 238: if (c != '\'') ! 239: fatal("multicharacter literal tokens NOT supported"); ! 240: ! 241: /* now fill token_buffer with the canonical name for this character ! 242: as a literal token. Do not use what the user typed, ! 243: so that '\012' and '\n' can be interchangeable. */ ! 244: ! 245: p = token_buffer; ! 246: *p++ = '\''; ! 247: if (code == '\\') ! 248: { ! 249: p = token_buffer + 1; ! 250: *p++ = '\\'; ! 251: *p++ = '\\'; ! 252: } ! 253: else if (code == '\'') ! 254: { ! 255: p = token_buffer + 1; ! 256: *p++ = '\\'; ! 257: *p++ = '\''; ! 258: } ! 259: else if (code >= 040 && code != 0177) ! 260: *p++ = code; ! 261: else if (code == '\t') ! 262: { ! 263: p = token_buffer + 1; ! 264: *p++ = '\\'; ! 265: *p++ = 't'; ! 266: } ! 267: else if (code == '\n') ! 268: { ! 269: p = token_buffer + 1; ! 270: *p++ = '\\'; ! 271: *p++ = 'n'; ! 272: } ! 273: else if (code == '\r') ! 274: { ! 275: p = token_buffer + 1; ! 276: *p++ = '\\'; ! 277: *p++ = 'r'; ! 278: } ! 279: else if (code == '\b') ! 280: { ! 281: p = token_buffer + 1; ! 282: *p++ = '\\'; ! 283: *p++ = 'b'; ! 284: } ! 285: else if (code == '\f') ! 286: { ! 287: p = token_buffer + 1; ! 288: *p++ = '\\'; ! 289: *p++ = 'f'; ! 290: } ! 291: else ! 292: { ! 293: *p++ = code / 0100 + '0'; ! 294: *p++ = ((code / 010) & 07) + '0'; ! 295: *p++ = (code & 07) + '0'; ! 296: } ! 297: *p++ = '\''; ! 298: *p = 0; ! 299: symval = getsym(token_buffer); ! 300: symval->class = STOKEN; ! 301: if (! symval->user_token_number) ! 302: symval->user_token_number = code; ! 303: return (IDENTIFIER); ! 304: } ! 305: ! 306: case ',': ! 307: return (COMMA); ! 308: ! 309: case ':': ! 310: return (COLON); ! 311: ! 312: case ';': ! 313: return (SEMICOLON); ! 314: ! 315: case '|': ! 316: return (BAR); ! 317: ! 318: case '{': ! 319: return (LEFT_CURLY); ! 320: ! 321: case '=': ! 322: do ! 323: { ! 324: c = getc(finput); ! 325: if (c == '\n') lineno++; ! 326: } ! 327: while(c==' ' || c=='\n' || c=='\t'); ! 328: ! 329: if (c == '{') ! 330: return(LEFT_CURLY); ! 331: else ! 332: { ! 333: ungetc(c, finput); ! 334: return(ILLEGAL); ! 335: } ! 336: ! 337: case '<': ! 338: p = token_buffer; ! 339: c = getc(finput); ! 340: while (c != '>') ! 341: { ! 342: if (c == '\n' || c == EOF) ! 343: fatal("unterminated type name"); ! 344: ! 345: if (p >= token_buffer + MAXTOKEN - 1) ! 346: fatals("type name too long (%d max)",MAXTOKEN-1); ! 347: ! 348: *p++ = c; ! 349: c = getc(finput); ! 350: } ! 351: *p = 0; ! 352: return (TYPENAME); ! 353: ! 354: ! 355: case '%': ! 356: return (parse_percent_token()); ! 357: ! 358: default: ! 359: return (ILLEGAL); ! 360: } ! 361: } ! 362: ! 363: ! 364: /* parse a token which starts with %. Assumes the % has already been read and discarded. */ ! 365: ! 366: int ! 367: parse_percent_token () ! 368: { ! 369: register int c; ! 370: register char *p; ! 371: ! 372: p = token_buffer; ! 373: c = getc(finput); ! 374: ! 375: switch (c) ! 376: { ! 377: case '%': ! 378: return (TWO_PERCENTS); ! 379: ! 380: case '{': ! 381: return (PERCENT_LEFT_CURLY); ! 382: ! 383: case '<': ! 384: return (LEFT); ! 385: ! 386: case '>': ! 387: return (RIGHT); ! 388: ! 389: case '2': ! 390: return (NONASSOC); ! 391: ! 392: case '0': ! 393: return (TOKEN); ! 394: ! 395: case '=': ! 396: return (PREC); ! 397: } ! 398: if (!isalpha(c)) ! 399: return (ILLEGAL); ! 400: ! 401: while (isalpha(c) || c == '_') ! 402: { ! 403: if (p < token_buffer + MAXTOKEN) ! 404: *p++ = c; ! 405: c = getc(finput); ! 406: } ! 407: ! 408: ungetc(c, finput); ! 409: ! 410: *p = 0; ! 411: ! 412: if (strcmp(token_buffer, "token") == 0 ! 413: || ! 414: strcmp(token_buffer, "term") == 0) ! 415: return (TOKEN); ! 416: else if (strcmp(token_buffer, "nterm") == 0) ! 417: return (NTERM); ! 418: else if (strcmp(token_buffer, "type") == 0) ! 419: return (TYPE); ! 420: else if (strcmp(token_buffer, "guard") == 0) ! 421: return (GUARD); ! 422: else if (strcmp(token_buffer, "union") == 0) ! 423: return (UNION); ! 424: else if (strcmp(token_buffer, "expect") == 0) ! 425: return (EXPECT); ! 426: else if (strcmp(token_buffer, "start") == 0) ! 427: return (START); ! 428: else if (strcmp(token_buffer, "left") == 0) ! 429: return (LEFT); ! 430: else if (strcmp(token_buffer, "right") == 0) ! 431: return (RIGHT); ! 432: else if (strcmp(token_buffer, "nonassoc") == 0 ! 433: || ! 434: strcmp(token_buffer, "binary") == 0) ! 435: return (NONASSOC); ! 436: else if (strcmp(token_buffer, "semantic_parser") == 0) ! 437: return (SEMANTIC_PARSER); ! 438: else if (strcmp(token_buffer, "pure_parser") == 0) ! 439: return (PURE_PARSER); ! 440: else if (strcmp(token_buffer, "prec") == 0) ! 441: return (PREC); ! 442: else return (ILLEGAL); ! 443: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.