|
|
1.1 ! root 1: /* ! 2: * Copyright (c) 1983 Regents of the University of California. ! 3: * All rights reserved. The Berkeley software License Agreement ! 4: * specifies the terms and conditions for redistribution. ! 5: */ ! 6: ! 7: #ifndef lint ! 8: static char sccsid[] = "@(#)scanner.c 5.1 (Berkeley) 5/31/85"; ! 9: #endif not lint ! 10: ! 11: static char rcsid[] = "$Header: scanner.c,v 1.5 84/12/26 10:42:05 linton Exp $"; ! 12: ! 13: /* ! 14: * Debugger scanner. ! 15: */ ! 16: ! 17: #include "defs.h" ! 18: #include "scanner.h" ! 19: #include "main.h" ! 20: #include "keywords.h" ! 21: #include "tree.h" ! 22: #include "symbols.h" ! 23: #include "names.h" ! 24: #include "y.tab.h" ! 25: ! 26: #ifndef public ! 27: typedef int Token; ! 28: ! 29: #define MAXLINESIZE 10240 ! 30: ! 31: #endif ! 32: ! 33: public String initfile = ".dbxinit"; ! 34: ! 35: typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; ! 36: ! 37: private Charclass class[256 + 1]; ! 38: private Charclass *lexclass = class + 1; ! 39: ! 40: #define isdigit(c) (lexclass[c] == NUM) ! 41: #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) ! 42: #define ishexdigit(c) ( \ ! 43: isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ ! 44: ) ! 45: ! 46: public boolean chkalias; ! 47: public char scanner_linebuf[MAXLINESIZE]; ! 48: ! 49: private File in; ! 50: private char *curchar, *prevchar; ! 51: ! 52: #define MAXINCLDEPTH 10 ! 53: ! 54: private struct { ! 55: File savefile; ! 56: Filename savefn; ! 57: int savelineno; ! 58: } inclinfo[MAXINCLDEPTH]; ! 59: ! 60: private unsigned int curinclindex; ! 61: ! 62: private Token getident(); ! 63: private Token getnum(); ! 64: private Token getstring(); ! 65: private Boolean eofinput(); ! 66: private char charcon(); ! 67: ! 68: private enterlexclass(class, s) ! 69: Charclass class; ! 70: String s; ! 71: { ! 72: register char *p; ! 73: ! 74: for (p = s; *p != '\0'; p++) { ! 75: lexclass[*p] = class; ! 76: } ! 77: } ! 78: ! 79: public scanner_init() ! 80: { ! 81: register Integer i; ! 82: ! 83: for (i = 0; i < 257; i++) { ! 84: class[i] = OTHER; ! 85: } ! 86: enterlexclass(WHITE, " \t"); ! 87: enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); ! 88: enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); ! 89: enterlexclass(NUM, "0123456789"); ! 90: in = stdin; ! 91: errfilename = nil; ! 92: errlineno = 0; ! 93: curchar = scanner_linebuf; ! 94: scanner_linebuf[0] = '\0'; ! 95: chkalias = true; ! 96: } ! 97: ! 98: /* ! 99: * Read a single token. ! 100: * ! 101: * The input is line buffered. Tokens cannot cross line boundaries. ! 102: * ! 103: * There are two "modes" of operation: one as in a compiler, ! 104: * and one for reading shell-like syntax. In the first mode ! 105: * there is the additional choice of doing alias processing. ! 106: */ ! 107: ! 108: private Boolean shellmode; ! 109: ! 110: public Token yylex() ! 111: { ! 112: register int c; ! 113: register char *p; ! 114: register Token t; ! 115: String line; ! 116: integer n; ! 117: ! 118: p = curchar; ! 119: if (*p == '\0') { ! 120: do { ! 121: if (isterm(in)) { ! 122: printf("(%s) ", cmdname); ! 123: } ! 124: fflush(stdout); ! 125: line = fgets(scanner_linebuf, MAXLINESIZE, in); ! 126: } while (line == nil and not eofinput()); ! 127: if (line == nil) { ! 128: c = EOF; ! 129: } else { ! 130: p = scanner_linebuf; ! 131: while (lexclass[*p] == WHITE) { ! 132: p++; ! 133: } ! 134: shellmode = false; ! 135: } ! 136: chkalias = true; ! 137: } else { ! 138: while (lexclass[*p] == WHITE) { ! 139: p++; ! 140: } ! 141: } ! 142: curchar = p; ! 143: prevchar = curchar; ! 144: c = *p; ! 145: if (lexclass[c] == ALPHA) { ! 146: t = getident(chkalias); ! 147: } else if (lexclass[c] == NUM) { ! 148: if (shellmode) { ! 149: t = getident(chkalias); ! 150: } else { ! 151: t = getnum(); ! 152: } ! 153: } else { ! 154: ++curchar; ! 155: switch (c) { ! 156: case '\n': ! 157: t = '\n'; ! 158: if (errlineno != 0) { ! 159: errlineno++; ! 160: } ! 161: break; ! 162: ! 163: case '"': ! 164: case '\'': ! 165: t = getstring(c); ! 166: break; ! 167: ! 168: case '.': ! 169: if (shellmode) { ! 170: --curchar; ! 171: t = getident(chkalias); ! 172: } else if (isdigit(*curchar)) { ! 173: --curchar; ! 174: t = getnum(); ! 175: } else { ! 176: t = '.'; ! 177: } ! 178: break; ! 179: ! 180: case '-': ! 181: if (shellmode) { ! 182: --curchar; ! 183: t = getident(chkalias); ! 184: } else if (*curchar == '>') { ! 185: ++curchar; ! 186: t = ARROW; ! 187: } else { ! 188: t = '-'; ! 189: } ! 190: break; ! 191: ! 192: case '#': ! 193: if (not isterm(in)) { ! 194: *p = '\0'; ! 195: curchar = p; ! 196: t = '\n'; ! 197: ++errlineno; ! 198: } else { ! 199: t = '#'; ! 200: } ! 201: break; ! 202: ! 203: case '\\': ! 204: if (*(p+1) == '\n') { ! 205: n = MAXLINESIZE - (p - &scanner_linebuf[0]); ! 206: if (n > 1) { ! 207: if (fgets(p, n, in) == nil) { ! 208: t = 0; ! 209: } else { ! 210: curchar = p; ! 211: t = yylex(); ! 212: } ! 213: } else { ! 214: t = '\\'; ! 215: } ! 216: } else { ! 217: t = '\\'; ! 218: } ! 219: break; ! 220: ! 221: case EOF: ! 222: t = 0; ! 223: break; ! 224: ! 225: default: ! 226: if (shellmode and index("!&*<>()[]", c) == nil) { ! 227: --curchar; ! 228: t = getident(chkalias); ! 229: } else { ! 230: t = c; ! 231: } ! 232: break; ! 233: } ! 234: } ! 235: chkalias = false; ! 236: # ifdef LEXDEBUG ! 237: if (lexdebug) { ! 238: fprintf(stderr, "yylex returns "); ! 239: print_token(stderr, t); ! 240: fprintf(stderr, "\n"); ! 241: } ! 242: # endif ! 243: return t; ! 244: } ! 245: ! 246: /* ! 247: * Put the given string before the current character ! 248: * in the current line, thus inserting it into the input stream. ! 249: */ ! 250: ! 251: public insertinput (s) ! 252: String s; ! 253: { ! 254: register char *p, *q; ! 255: int need, avail, shift; ! 256: ! 257: q = s; ! 258: need = strlen(q); ! 259: avail = curchar - &scanner_linebuf[0]; ! 260: if (need <= avail) { ! 261: curchar = &scanner_linebuf[avail - need]; ! 262: p = curchar; ! 263: while (*q != '\0') { ! 264: *p++ = *q++; ! 265: } ! 266: } else { ! 267: p = curchar; ! 268: while (*p != '\0') { ! 269: ++p; ! 270: } ! 271: shift = need - avail; ! 272: if (p + shift >= &scanner_linebuf[MAXLINESIZE]) { ! 273: error("alias expansion too large"); ! 274: } ! 275: for (;;) { ! 276: *(p + shift) = *p; ! 277: if (p == curchar) { ! 278: break; ! 279: } ! 280: --p; ! 281: } ! 282: p = &scanner_linebuf[0]; ! 283: while (*q != '\0') { ! 284: *p++ = *q++; ! 285: } ! 286: curchar = &scanner_linebuf[0]; ! 287: } ! 288: } ! 289: ! 290: /* ! 291: * Get the actuals for a macro call. ! 292: */ ! 293: ! 294: private String movetochar (str, c) ! 295: String str; ! 296: char c; ! 297: { ! 298: register char *p; ! 299: ! 300: while (*p != c) { ! 301: if (*p == '\0') { ! 302: error("missing ')' in macro call"); ! 303: } else if (*p == ')') { ! 304: error("not enough parameters in macro call"); ! 305: } else if (*p == ',') { ! 306: error("too many parameters in macro call"); ! 307: } ! 308: ++p; ! 309: } ! 310: return p; ! 311: } ! 312: ! 313: private String *getactuals (n) ! 314: integer n; ! 315: { ! 316: String *a; ! 317: register char *p; ! 318: int i; ! 319: ! 320: a = newarr(String, n); ! 321: p = curchar; ! 322: while (*p != '(') { ! 323: if (lexclass[*p] != WHITE) { ! 324: error("missing actuals for macro"); ! 325: } ! 326: ++p; ! 327: } ! 328: ++p; ! 329: for (i = 0; i < n - 1; i++) { ! 330: a[i] = p; ! 331: p = movetochar(p, ','); ! 332: *p = '\0'; ! 333: ++p; ! 334: } ! 335: a[n-1] = p; ! 336: p = movetochar(p, ')'); ! 337: *p = '\0'; ! 338: curchar = p + 1; ! 339: return a; ! 340: } ! 341: ! 342: /* ! 343: * Do command macro expansion, assuming curchar points to the beginning ! 344: * of the actuals, and we are not in shell mode. ! 345: */ ! 346: ! 347: private expand (pl, str) ! 348: List pl; ! 349: String str; ! 350: { ! 351: char buf[4096], namebuf[100]; ! 352: register char *p, *q, *r; ! 353: String *actual; ! 354: Name n; ! 355: integer i; ! 356: boolean match; ! 357: ! 358: if (pl == nil) { ! 359: insertinput(str); ! 360: } else { ! 361: actual = getactuals(list_size(pl)); ! 362: p = buf; ! 363: q = str; ! 364: while (*q != '\0') { ! 365: if (p >= &buf[4096]) { ! 366: error("alias expansion too large"); ! 367: } ! 368: if (lexclass[*q] == ALPHA) { ! 369: r = namebuf; ! 370: do { ! 371: *r++ = *q++; ! 372: } while (isalnum(*q)); ! 373: *r = '\0'; ! 374: i = 0; ! 375: match = false; ! 376: foreach(Name, n, pl) ! 377: if (streq(ident(n), namebuf)) { ! 378: match = true; ! 379: break; ! 380: } ! 381: ++i; ! 382: endfor ! 383: if (match) { ! 384: r = actual[i]; ! 385: } else { ! 386: r = namebuf; ! 387: } ! 388: while (*r != '\0') { ! 389: *p++ = *r++; ! 390: } ! 391: } else { ! 392: *p++ = *q++; ! 393: } ! 394: } ! 395: *p = '\0'; ! 396: insertinput(buf); ! 397: } ! 398: } ! 399: ! 400: /* ! 401: * Parser error handling. ! 402: */ ! 403: ! 404: public yyerror(s) ! 405: String s; ! 406: { ! 407: register char *p; ! 408: register integer start; ! 409: ! 410: if (streq(s, "syntax error")) { ! 411: beginerrmsg(); ! 412: p = prevchar; ! 413: start = p - &scanner_linebuf[0]; ! 414: if (p > &scanner_linebuf[0]) { ! 415: while (lexclass[*p] == WHITE and p > &scanner_linebuf[0]) { ! 416: --p; ! 417: } ! 418: } ! 419: fprintf(stderr, "%s", scanner_linebuf); ! 420: if (start != 0) { ! 421: fprintf(stderr, "%*c", start, ' '); ! 422: } ! 423: if (p == &scanner_linebuf[0]) { ! 424: fprintf(stderr, "^ unrecognized command"); ! 425: } else { ! 426: fprintf(stderr, "^ syntax error"); ! 427: } ! 428: enderrmsg(); ! 429: } else { ! 430: error(s); ! 431: } ! 432: } ! 433: ! 434: /* ! 435: * Eat the current line. ! 436: */ ! 437: ! 438: public gobble () ! 439: { ! 440: curchar = scanner_linebuf; ! 441: scanner_linebuf[0] = '\0'; ! 442: } ! 443: ! 444: /* ! 445: * Scan an identifier. ! 446: * ! 447: * If chkalias is true, check first to see if it's an alias. ! 448: * Otherwise, check to see if it's a keyword. ! 449: */ ! 450: ! 451: private Token getident (chkalias) ! 452: boolean chkalias; ! 453: { ! 454: char buf[1024]; ! 455: register char *p, *q; ! 456: register Token t; ! 457: List pl; ! 458: String str; ! 459: ! 460: p = curchar; ! 461: q = buf; ! 462: if (shellmode) { ! 463: do { ! 464: *q++ = *p++; ! 465: } while (index(" \t\n!&<>*[]()'\"", *p) == nil); ! 466: } else { ! 467: do { ! 468: *q++ = *p++; ! 469: } while (isalnum(*p)); ! 470: } ! 471: curchar = p; ! 472: *q = '\0'; ! 473: yylval.y_name = identname(buf, false); ! 474: if (chkalias) { ! 475: if (findalias(yylval.y_name, &pl, &str)) { ! 476: expand(pl, str); ! 477: while (lexclass[*curchar] == WHITE) { ! 478: ++curchar; ! 479: } ! 480: if (pl == nil) { ! 481: t = getident(false); ! 482: } else { ! 483: t = getident(true); ! 484: } ! 485: } else if (shellmode) { ! 486: t = NAME; ! 487: } else { ! 488: t = findkeyword(yylval.y_name, NAME); ! 489: } ! 490: } else if (shellmode) { ! 491: t = NAME; ! 492: } else { ! 493: t = findkeyword(yylval.y_name, NAME); ! 494: } ! 495: return t; ! 496: } ! 497: ! 498: /* ! 499: * Scan a number. ! 500: */ ! 501: ! 502: private Token getnum() ! 503: { ! 504: char buf[1024]; ! 505: register Char *p, *q; ! 506: register Token t; ! 507: Integer base; ! 508: ! 509: p = curchar; ! 510: q = buf; ! 511: if (*p == '0') { ! 512: if (*(p+1) == 'x') { ! 513: p += 2; ! 514: base = 16; ! 515: } else if (*(p+1) == 't') { ! 516: base = 10; ! 517: } else if (varIsSet("$hexin")) { ! 518: base = 16; ! 519: } else { ! 520: base = 8; ! 521: } ! 522: } else if (varIsSet("$hexin")) { ! 523: base = 16; ! 524: } else if (varIsSet("$octin")) { ! 525: base = 8; ! 526: } else { ! 527: base = 10; ! 528: } ! 529: if (base == 16) { ! 530: do { ! 531: *q++ = *p++; ! 532: } while (ishexdigit(*p)); ! 533: } else { ! 534: do { ! 535: *q++ = *p++; ! 536: } while (isdigit(*p)); ! 537: } ! 538: if (*p == '.') { ! 539: do { ! 540: *q++ = *p++; ! 541: } while (isdigit(*p)); ! 542: if (*p == 'e' or *p == 'E') { ! 543: p++; ! 544: if (*p == '+' or *p == '-' or isdigit(*p)) { ! 545: *q++ = 'e'; ! 546: do { ! 547: *q++ = *p++; ! 548: } while (isdigit(*p)); ! 549: } ! 550: } ! 551: *q = '\0'; ! 552: yylval.y_real = atof(buf); ! 553: t = REAL; ! 554: } else { ! 555: *q = '\0'; ! 556: switch (base) { ! 557: case 10: ! 558: yylval.y_int = atol(buf); ! 559: break; ! 560: ! 561: case 8: ! 562: yylval.y_int = octal(buf); ! 563: break; ! 564: ! 565: case 16: ! 566: yylval.y_int = hex(buf); ! 567: break; ! 568: ! 569: default: ! 570: badcaseval(base); ! 571: } ! 572: t = INT; ! 573: } ! 574: curchar = p; ! 575: return t; ! 576: } ! 577: ! 578: /* ! 579: * Convert a string of octal digits to an integer. ! 580: */ ! 581: ! 582: private int octal(s) ! 583: String s; ! 584: { ! 585: register Char *p; ! 586: register Integer n; ! 587: ! 588: n = 0; ! 589: for (p = s; *p != '\0'; p++) { ! 590: n = 8*n + (*p - '0'); ! 591: } ! 592: return n; ! 593: } ! 594: ! 595: /* ! 596: * Convert a string of hexadecimal digits to an integer. ! 597: */ ! 598: ! 599: private int hex(s) ! 600: String s; ! 601: { ! 602: register Char *p; ! 603: register Integer n; ! 604: ! 605: n = 0; ! 606: for (p = s; *p != '\0'; p++) { ! 607: n *= 16; ! 608: if (*p >= 'a' and *p <= 'f') { ! 609: n += (*p - 'a' + 10); ! 610: } else if (*p >= 'A' and *p <= 'F') { ! 611: n += (*p - 'A' + 10); ! 612: } else { ! 613: n += (*p - '0'); ! 614: } ! 615: } ! 616: return n; ! 617: } ! 618: ! 619: /* ! 620: * Scan a string. ! 621: */ ! 622: ! 623: private Token getstring (quote) ! 624: char quote; ! 625: { ! 626: register char *p, *q; ! 627: char buf[MAXLINESIZE]; ! 628: boolean endofstring; ! 629: Token t; ! 630: ! 631: p = curchar; ! 632: q = buf; ! 633: endofstring = false; ! 634: while (not endofstring) { ! 635: if (*p == '\\' and *(p+1) == '\n') { ! 636: if (fgets(scanner_linebuf, MAXLINESIZE, in) == nil) { ! 637: error("non-terminated string"); ! 638: } ! 639: p = &scanner_linebuf[0] - 1; ! 640: } else if (*p == '\n' or *p == '\0') { ! 641: error("non-terminated string"); ! 642: endofstring = true; ! 643: } else if (*p == quote) { ! 644: endofstring = true; ! 645: } else { ! 646: curchar = p; ! 647: *q++ = charcon(p); ! 648: p = curchar; ! 649: } ! 650: p++; ! 651: } ! 652: curchar = p; ! 653: *q = '\0'; ! 654: if (quote == '\'' and buf[1] == '\0') { ! 655: yylval.y_char = buf[0]; ! 656: t = CHAR; ! 657: } else { ! 658: yylval.y_string = strdup(buf); ! 659: t = STRING; ! 660: } ! 661: return t; ! 662: } ! 663: ! 664: /* ! 665: * Process a character constant. ! 666: * Watch out for backslashes. ! 667: */ ! 668: ! 669: private char charcon (s) ! 670: String s; ! 671: { ! 672: register char *p, *q; ! 673: char c, buf[10]; ! 674: ! 675: p = s; ! 676: if (*p == '\\') { ! 677: ++p; ! 678: switch (*p) { ! 679: case '\\': ! 680: c = '\\'; ! 681: break; ! 682: ! 683: case 'n': ! 684: c = '\n'; ! 685: break; ! 686: ! 687: case 'r': ! 688: c = '\r'; ! 689: break; ! 690: ! 691: case 't': ! 692: c = '\t'; ! 693: break; ! 694: ! 695: case '\'': ! 696: case '"': ! 697: c = *p; ! 698: break; ! 699: ! 700: default: ! 701: if (isdigit(*p)) { ! 702: q = buf; ! 703: do { ! 704: *q++ = *p++; ! 705: } while (isdigit(*p)); ! 706: *q = '\0'; ! 707: c = (char) octal(buf); ! 708: } ! 709: --p; ! 710: break; ! 711: } ! 712: curchar = p; ! 713: } else { ! 714: c = *p; ! 715: } ! 716: return c; ! 717: } ! 718: ! 719: /* ! 720: * Input file management routines. ! 721: */ ! 722: ! 723: public setinput(filename) ! 724: Filename filename; ! 725: { ! 726: File f; ! 727: ! 728: f = fopen(filename, "r"); ! 729: if (f == nil) { ! 730: error("can't open %s", filename); ! 731: } else { ! 732: if (curinclindex >= MAXINCLDEPTH) { ! 733: error("unreasonable input nesting on \"%s\"", filename); ! 734: } ! 735: inclinfo[curinclindex].savefile = in; ! 736: inclinfo[curinclindex].savefn = errfilename; ! 737: inclinfo[curinclindex].savelineno = errlineno; ! 738: curinclindex++; ! 739: in = f; ! 740: errfilename = filename; ! 741: errlineno = 1; ! 742: } ! 743: } ! 744: ! 745: private Boolean eofinput() ! 746: { ! 747: register Boolean b; ! 748: ! 749: if (curinclindex == 0) { ! 750: if (isterm(in)) { ! 751: putchar('\n'); ! 752: clearerr(in); ! 753: b = false; ! 754: } else { ! 755: b = true; ! 756: } ! 757: } else { ! 758: fclose(in); ! 759: --curinclindex; ! 760: in = inclinfo[curinclindex].savefile; ! 761: errfilename = inclinfo[curinclindex].savefn; ! 762: errlineno = inclinfo[curinclindex].savelineno; ! 763: b = false; ! 764: } ! 765: return b; ! 766: } ! 767: ! 768: /* ! 769: * Pop the current input. Return whether successful. ! 770: */ ! 771: ! 772: public Boolean popinput() ! 773: { ! 774: Boolean b; ! 775: ! 776: if (curinclindex == 0) { ! 777: b = false; ! 778: } else { ! 779: b = (Boolean) (not eofinput()); ! 780: } ! 781: return b; ! 782: } ! 783: ! 784: /* ! 785: * Return whether we are currently reading from standard input. ! 786: */ ! 787: ! 788: public Boolean isstdin() ! 789: { ! 790: return (Boolean) (in == stdin); ! 791: } ! 792: ! 793: /* ! 794: * Send the current line to the shell. ! 795: */ ! 796: ! 797: public shellline() ! 798: { ! 799: register char *p; ! 800: ! 801: p = curchar; ! 802: while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { ! 803: ++p; ! 804: } ! 805: shell(p); ! 806: if (*p == '\0' and isterm(in)) { ! 807: putchar('\n'); ! 808: } ! 809: erecover(); ! 810: } ! 811: ! 812: /* ! 813: * Read the rest of the current line in "shell mode". ! 814: */ ! 815: ! 816: public beginshellmode() ! 817: { ! 818: shellmode = true; ! 819: } ! 820: ! 821: /* ! 822: * Print out a token for debugging. ! 823: */ ! 824: ! 825: public print_token(f, t) ! 826: File f; ! 827: Token t; ! 828: { ! 829: if (t == '\n') { ! 830: fprintf(f, "char '\\n'"); ! 831: } else if (t == EOF) { ! 832: fprintf(f, "EOF"); ! 833: } else if (t < 256) { ! 834: fprintf(f, "char '%c'", t); ! 835: } else { ! 836: fprintf(f, "\"%s\"", keywdstring(t)); ! 837: } ! 838: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.