|
|
1.1 ! root 1: /*ident "@(#)ctrans:src/lex.c 1.3.4.17" */ ! 2: /*************************************************************************** ! 3: ! 4: C++ source for cfront, the C++ compiler front-end ! 5: written in the computer science research center of Bell Labs ! 6: ! 7: Copyright (c) 1984 AT&T, Inc. All Rights Reserved ! 8: THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T, INC. ! 9: ! 10: lex.c: ! 11: lexical analyser based on pcc's and cpre's scanners ! 12: modified to handle classes: ! 13: new keywords: class ! 14: public ! 15: call ! 16: etc. ! 17: names are not entered in the symbol table by lex() ! 18: names can be of arbitrary length ! 19: error() is used to report errors ! 20: {} and () must match ! 21: numeric constants are not converted into internal representation ! 22: but stored as strings ! 23: ! 24: ****************************************************************************/ ! 25: ! 26: #include "cfront.h" ! 27: #include "yystype.h" ! 28: #include "size.h" ! 29: #include "tqueue.h" ! 30: ! 31: # define CCTRANS(x) x ! 32: ! 33: /* lexical actions */ ! 34: ! 35: #define A_ERR 0 /* illegal character */ ! 36: #define A_LET 1 /* saw a letter */ ! 37: #define A_DIG 2 /* saw a digit */ ! 38: #define A_1C 3 /* return a single character */ ! 39: #define A_STR 4 /* string */ ! 40: #define A_CC 5 /* character constant */ ! 41: #define A_BCD 6 /* GCOS BCD constant */ ! 42: #define A_SL 7 /* saw a / */ ! 43: #define A_DOT 8 /* saw a . */ ! 44: #define A_2C 9 /* possible two character symbol */ ! 45: #define A_WS 10 /* whitespace (not \n) */ ! 46: #define A_NL 11 /* \n */ ! 47: #define A_LC 12 /* { */ ! 48: #define A_RC 13 /* } */ ! 49: #define A_L 14 /* ( */ ! 50: #define A_R 15 /* ) */ ! 51: #define A_EOF 16 ! 52: #define A_ASS 17 ! 53: #define A_LT 18 ! 54: #define A_GT 19 /* > */ ! 55: #define A_ER 20 ! 56: #define A_OR 21 ! 57: #define A_AND 22 ! 58: #define A_MOD 23 ! 59: #define A_NOT 24 ! 60: #define A_MIN 25 ! 61: #define A_MUL 26 ! 62: #define A_PL 27 ! 63: #define A_COL 28 /* : */ ! 64: #define A_SHARP 29 /* # */ ! 65: ! 66: /* character classes */ ! 67: ! 68: # define LEXLET 01 ! 69: # define LEXDIG 02 ! 70: /* no LEXOCT because 8 and 9 used to be octal digits */ ! 71: # define LEXHEX 010 ! 72: # define LEXWS 020 ! 73: # define LEXDOT 040 ! 74: ! 75: const FIRSTCHUNK = 8*1024-8; ! 76: const BUFCHUNK = 4*1024-8; ! 77: ! 78: /* text buffer */ ! 79: static char inbuf[FIRSTCHUNK/*TBUFSZ*/]; ! 80: char* txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1]; ! 81: char* txtstart = 0; ! 82: char* txtfree = 0; ! 83: ! 84: static struct buf* bufhead; ! 85: static buf* freebuf; ! 86: //static bufs; ! 87: ! 88: struct buf { ! 89: buf* next; ! 90: char chars[BUFCHUNK]; ! 91: // buf() { next=bufhead; bufhead=this; } ! 92: }; ! 93: ! 94: new_buf(char c) ! 95: { ! 96: //fprintf(stderr,"new_buf %d\n",bufs++); ! 97: buf* pbuf; ! 98: if (freebuf) { ! 99: pbuf = freebuf; ! 100: freebuf = freebuf->next; ! 101: } ! 102: else ! 103: pbuf = new buf; // allocate and register new chunk ! 104: pbuf->next = bufhead; ! 105: bufhead = pbuf; ! 106: ! 107: if (BUFCHUNK < txtmax-txtstart) error('l',"lexical token too long"); ! 108: ! 109: // copy current token: ! 110: char* p = txtstart; ! 111: txtstart = txtfree = &pbuf->chars[0]; ! 112: while (p<txtmax) *txtfree++ = *p++; ! 113: *txtfree++=c; ! 114: txtmax = &pbuf->chars[BUFCHUNK-1]; ! 115: return 0; ! 116: } ! 117: ! 118: ! 119: #define pch(c) ((txtmax<=txtfree)?new_buf(c):(*txtfree++=c)) ! 120: #define start_txt() txtstart = txtfree ! 121: #define del_txt() txtfree = txtstart ! 122: ! 123: /*static*/ char* file_name[MAXFILE*4]; // source file names ! 124: // file_name[0] == src_file_name ! 125: // file_name[0] == 0 means stdin ! 126: static short file_stack[MAXFILE]; // stack of file name indices ! 127: int curr_file; // current index in file_stack ! 128: // that is current #include nest level ! 129: ! 130: int linkage; // linkage is default C++ ! 131: // linkage==0 => C++ linkage ! 132: // linkage==1 => C linkage ! 133: const LINKMAX = 10; ! 134: static lvec[LINKMAX]; ! 135: int lcount; ! 136: ! 137: void set_linkage(char* p) ! 138: { ! 139: if (p==0 || *p == 0) { // resume previous linkage ! 140: if (lcount) linkage = lvec[--lcount]; ! 141: } ! 142: else { ! 143: if (LINKMAX<=++lcount) ! 144: error('l',"linkage directive nested too deep"); ! 145: if (strcmp(p,"C")==0) ! 146: lvec[lcount] = linkage = 1; ! 147: else if (strcmp(p,"C++")==0) ! 148: lvec[lcount] = linkage = 0; ! 149: else ! 150: error("%s linkage",p); ! 151: } ! 152: } ! 153: ! 154: class loc curloc; ! 155: FILE * out_file = stdout; ! 156: FILE * in_file = stdin; ! 157: Ptable ktbl; ! 158: int br_level = 0; /* number of unmatched ``(''s */ ! 159: int bl_level = 0; /* number of unmatched ``{''s */ ! 160: ! 161: # ifdef ibm ! 162: ! 163: # define CSMASK 0377 ! 164: # define CSSZ 256 ! 165: ! 166: # else ! 167: ! 168: # define CSMASK 0177 ! 169: # define CSSZ 128 ! 170: ! 171: # endif ! 172: ! 173: static short lxmask[CSSZ+1]; ! 174: ! 175: int saved = 0; /* putback character, avoid ungetchar */ ! 176: static int lxtitle(); ! 177: ! 178: overload rt; ! 179: inline YYSTYPE rt(char* x) { YYSTYPE y; y.s = x; return y; } ! 180: inline YYSTYPE rt(int x) { YYSTYPE y; y.t = x; return y; } ! 181: inline YYSTYPE rt(loc x) { YYSTYPE y; y.l = x; return y; } ! 182: inline YYSTYPE rt(void* x) { YYSTYPE y; y.pn = Pname(x); return y; } ! 183: ! 184: #define get(c) (c=getc(in_file)) ! 185: #define unget(c) ungetc(c,in_file) ! 186: ! 187: #define reti(a,b) { addtok(a, rt(b)); return a; } ! 188: #define retn(a,b) { addtok(a, rt((Pnode)b)); return a; } ! 189: #define rets(a,b) { addtok(a, rt(b)); return a; } ! 190: #define retl(a) { addtok(a, rt(curloc)); return a; } ! 191: ! 192: void ktbl_init() ! 193: /* ! 194: enter keywords into keyword table for use by lex() ! 195: and into keyword representation table used for output ! 196: */ ! 197: { ! 198: ktbl = new table(KTBLSIZE,0,0); ! 199: ! 200: new_key("asm",ASM,0); ! 201: new_key("auto",AUTO,TYPE); ! 202: new_key("break",LOC,BREAK); ! 203: new_key("case",LOC,CASE); ! 204: new_key("continue",LOC,CONTINUE); ! 205: new_key("char",CHAR,TYPE); ! 206: new_key("do",LOC,DO); ! 207: new_key("double",DOUBLE,TYPE); ! 208: new_key("default",LOC,DEFAULT); ! 209: new_key("enum",ENUM,0); ! 210: new_key("else",LOC,ELSE); ! 211: new_key("extern",EXTERN,TYPE); ! 212: new_key("float",FLOAT,TYPE); ! 213: new_key("for",LOC,FOR); ! 214: // new_key("fortran",FORTRAN,0); ! 215: new_key("goto",LOC,GOTO); ! 216: new_key("catch",CATCH,CATCH); ! 217: new_key("if",LOC,IF); ! 218: new_key("int",INT,TYPE); ! 219: new_key("long",LONG,TYPE); ! 220: new_key("return",LOC,RETURN); ! 221: new_key("register",REGISTER,TYPE); ! 222: new_key("static",STATIC,TYPE); ! 223: new_key("struct",STRUCT,AGGR); ! 224: new_key("sizeof",SIZEOF,0); ! 225: new_key("short",SHORT,TYPE); ! 226: new_key("switch",LOC,SWITCH); ! 227: new_key("template",TEMPLATE,TEMPLATE); ! 228: new_key("typedef",TYPEDEF,TYPE); ! 229: new_key("unsigned",UNSIGNED,TYPE); ! 230: new_key("union",UNION,AGGR); ! 231: new_key("void",VOID,TYPE); ! 232: new_key("while",LOC,WHILE); ! 233: ! 234: new_key("class",CLASS,AGGR); ! 235: new_key("const",CONST,TYPE); ! 236: new_key("delete",LOC,DELETE); ! 237: new_key("friend",FRIEND,TYPE); ! 238: new_key("inline",INLINE,TYPE); ! 239: new_key("new",NEW,0); ! 240: new_key("operator",OPERATOR,0); ! 241: new_key("overload",OVERLOAD,TYPE); ! 242: new_key("private",PRIVATE,PR); ! 243: new_key("protected",PROTECTED,PR); ! 244: new_key("public",PUBLIC,PR); ! 245: new_key("signed",SIGNED,TYPE); ! 246: new_key("this",THIS,0); ! 247: new_key("virtual",VIRTUAL,TYPE); ! 248: new_key("volatile",VOLATILE,TYPE); ! 249: #ifdef DK ! 250: new_key("or",OR,0); ! 251: new_key("cor",OROR,0); ! 252: new_key("and",AND,0); ! 253: new_key("cand",ANDAND,0); ! 254: new_key("xor",ER,0); ! 255: new_key("compl",COMPL,0); ! 256: #endif ! 257: } ! 258: ! 259: extern char* src_file_name; ! 260: extern char* line_format; ! 261: loc last_line; ! 262: ! 263: void loc::putline() ! 264: { ! 265: if (file==0 && line==0) return; ! 266: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4 ! 267: // if (0<=file && file<MAXFILE) { ! 268: if ( 0<=file && file <= Nfile ) { ! 269: char* f = file_name[file]; ! 270: if (f==0) f = (src_file_name) ? src_file_name : ""; ! 271: fprintf(out_file,line_format,line,f); ! 272: last_line = *this; ! 273: } ! 274: } ! 275: ! 276: void loc::put(FILE* p) ! 277: { ! 278: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4 ! 279: // if (0<=file && file<MAXFILE) { ! 280: if ( 0<=file && file <= Nfile ) { ! 281: char* f = file_name[file]; ! 282: if (f==0) f = (src_file_name) ? src_file_name : ""; ! 283: fprintf(p,"\"%s\", line %d: ",f,line); ! 284: } ! 285: } ! 286: ! 287: void lxenter(register char* s, short m) ! 288: /* enter a mask into lxmask */ ! 289: { ! 290: register c; ! 291: ! 292: while( c= *s++ ) lxmask[c+1] |= m; ! 293: ! 294: } ! 295: ! 296: ! 297: void lxget(register c, register m) ! 298: /* ! 299: put 'c' back then scan for members of character class 'm' ! 300: terminate the string read with \0 ! 301: txtfree points to the character position after that \0 ! 302: */ ! 303: { ! 304: pch(c); ! 305: while ( (get(c), lxmask[c+1]&m) ) pch(c); ! 306: unget(c); ! 307: pch('\0'); ! 308: } ! 309: ! 310: struct LXDOPE { ! 311: short lxch; /* the character */ ! 312: short lxact; /* the action to be performed */ ! 313: TOK lxtok; /* the token number to be returned */ ! 314: } lxdope[] = { ! 315: #ifdef apollo ! 316: '@', A_ERR, 0, /* illegal characters go here... */ ! 317: #else ! 318: '$', A_ERR, 0, /* illegal characters go here... */ ! 319: #endif ! 320: '_', A_LET, 0, /* letters point here */ ! 321: '0', A_DIG, 0, /* digits point here */ ! 322: ' ', A_WS, 0, /* whitespace goes here */ ! 323: '\n', A_NL, 0, ! 324: '"', A_STR, 0, /* character string */ ! 325: '\'', A_CC, 0, /* ASCII character constant */ ! 326: '`', A_BCD, 0, /* 'foreign' character constant, e.g. BCD */ ! 327: '(', A_L, LP, ! 328: ')', A_R, RP, ! 329: '{', A_LC, LC, ! 330: '}', A_RC, RC, ! 331: '[', A_1C, LB, ! 332: ']', A_1C, RB, ! 333: '*', A_MUL, MUL, ! 334: '?', A_1C, QUEST, ! 335: ':', A_COL, COLON, ! 336: '+', A_PL, PLUS, ! 337: '-', A_MIN, MINUS, ! 338: '/', A_SL, DIV, ! 339: '%', A_MOD, MOD, ! 340: '&', A_AND, AND, ! 341: '|', A_OR, OR, ! 342: '^', A_ER, ER, ! 343: '!', A_NOT, NOT, ! 344: '~', A_1C, COMPL, ! 345: ',', A_1C, CM, ! 346: ';', A_1C, SM, ! 347: '.', A_DOT, DOT, ! 348: '<', A_LT, LT, ! 349: '>', A_GT, GT, ! 350: '=', A_ASS, ASSIGN, ! 351: '#', A_SHARP, 0, ! 352: EOF, A_EOF, EOFTOK ! 353: }; ! 354: /* note: EOF is used as sentinel, so must be <=0 and last entry in table */ ! 355: ! 356: static struct LXDOPE *lxcp[CSSZ+1]; ! 357: ! 358: void lex_init() ! 359: { ! 360: register struct LXDOPE *p; ! 361: register i; ! 362: register char *cp; ! 363: /* set up character classes */ ! 364: ! 365: /* first clear lexmask */ ! 366: for(i=0; i<=CSSZ; i++) lxmask[i] = 0; ! 367: ! 368: #ifdef apollo ! 369: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$", LEXLET ); ! 370: #else ! 371: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", LEXLET ); ! 372: #endif ! 373: lxenter( "0123456789", LEXDIG ); ! 374: lxenter( "0123456789abcdefABCDEF", LEXHEX ); ! 375: /* \013 should become \v someday; \013 is OK for ASCII and EBCDIC */ ! 376: lxenter( " \t\r\b\f\013", LEXWS ); ! 377: lxmask['.'+1] |= LEXDOT; ! 378: ! 379: /* make lxcp point to appropriate lxdope entry for each character */ ! 380: ! 381: /* initialize error entries */ ! 382: ! 383: for( i= 0; i<=CSSZ; ++i ) lxcp[i] = lxdope; ! 384: ! 385: /* make unique entries */ ! 386: ! 387: for( p=lxdope; ; ++p ) { ! 388: lxcp[p->lxch+1] = p; ! 389: if( p->lxch < 0 ) break; ! 390: } ! 391: ! 392: /* handle letters, digits, and whitespace */ ! 393: /* by convention, first, second, and third places */ ! 394: ! 395: cp = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; ! 396: while( *cp ) lxcp[*cp++ + 1] = &lxdope[1]; ! 397: cp = "123456789"; ! 398: while( *cp ) lxcp[*cp++ + 1] = &lxdope[2]; ! 399: cp = "\t\b\r\f\013"; ! 400: while( *cp ) lxcp[*cp++ + 1] = &lxdope[3]; ! 401: ! 402: file_name[0] = src_file_name; ! 403: // curloc.file = 0; // spurious: curloc is a static ! 404: curloc.line = 1; ! 405: ! 406: ktbl_init(); ! 407: lex_clear(); ! 408: saved = lxtitle(); ! 409: } ! 410: ! 411: void lex_clear() ! 412: { ! 413: // delete extra buffers: ! 414: buf* p = bufhead; ! 415: bufhead = 0; ! 416: //if (p) { ! 417: //fprintf(stderr,"lex_clear\n"); ! 418: //bufs=0; ! 419: //} ! 420: while (p) { ! 421: buf* pp = p; ! 422: p = p->next; ! 423: pp->next = freebuf; ! 424: freebuf = pp; ! 425: } ! 426: ! 427: // re-set to static buffer: ! 428: txtstart = txtfree = inbuf; ! 429: txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1]; ! 430: } ! 431: ! 432: int int_val(char hex) ! 433: { ! 434: switch (hex) { ! 435: case '0': case '1': case '2': case '3': case '4': ! 436: case '5': case '6': case '7': case '8': case '9': ! 437: return hex-'0'; ! 438: case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': ! 439: return hex-'a'+10; ! 440: case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': ! 441: return hex-'A'+10; ! 442: } ! 443: } ! 444: ! 445: void hex_to_oct() ! 446: /* ! 447: \x has been seen on input (in char const or string) and \ printed ! 448: read the following hexadecimal integer and replace it with an octal ! 449: */ ! 450: { ! 451: int i = 0; ! 452: int c; ! 453: get(c); ! 454: if (lxmask[c+1] & LEXHEX) { ! 455: i = int_val(c); ! 456: get(c); // try for two ! 457: if (lxmask[c+1] & LEXHEX) { ! 458: i = (i<<4) + int_val(c); ! 459: get(c); // try for three ! 460: if (lxmask[c+1] & LEXHEX) ! 461: i = (i<<4) + int_val(c); ! 462: else ! 463: unget(c); ! 464: } ! 465: else ! 466: unget(c); ! 467: } ! 468: else { ! 469: error("hexadecimal digitE after \\x"); ! 470: unget(c); ! 471: } ! 472: ! 473: // if (0377 < i) error('l',"hexadecimal constant too large"); ! 474: i &= 0377; ! 475: ! 476: pch(('0'+(i>>6))); ! 477: pch(('0'+((i&070)>>3))); ! 478: pch(('0'+(i&7))); ! 479: } ! 480: ! 481: ! 482: char * chconst() ! 483: /* ! 484: read a character constant into inbuf ! 485: */ ! 486: { ! 487: register c; ! 488: int nch = 0; ! 489: ! 490: pch('\''); ! 491: ! 492: for(;;) { ! 493: char* p; ! 494: char cc = 0; ! 495: ! 496: switch (get(c)) { ! 497: case '\'': ! 498: goto ex; ! 499: case EOF: ! 500: error("eof in char constant"); ! 501: goto ex; ! 502: case '\n': ! 503: error("newline in char constant"); ! 504: goto ex; ! 505: case '\\': ! 506: if (SZ_INT == nch++) error('l',"char constant too long"); ! 507: pch(c); ! 508: switch (get(c)){ ! 509: case '\n': ! 510: ++curloc.line; ! 511: default: ! 512: pch(c); ! 513: break; ! 514: case '4': case '5': case '6': case '7': // octal ! 515: p = txtfree; ! 516: cc = c-4; ! 517: case '0': case '1': case '2': case '3': ! 518: pch(c); ! 519: get(c); /* try for 2 */ ! 520: if( lxmask[c+1] & LEXDIG && c<'8'){ ! 521: pch(c); ! 522: get(c); /* try for 3 */ ! 523: if (lxmask[c+1] & LEXDIG && c<'8') { ! 524: if (cc) *p = cc; // zap high bit ! 525: pch(c); ! 526: } ! 527: else ! 528: unget(c); ! 529: } ! 530: else ! 531: unget(c); ! 532: break; ! 533: case 'x': // hexadecimal ! 534: hex_to_oct(); ! 535: break; ! 536: }; ! 537: break; ! 538: default: ! 539: if (SZ_INT == nch++) error('l',"char constant too long"); ! 540: pch(c); ! 541: } ! 542: } ! 543: ex: ! 544: pch('\''); ! 545: pch('\0'); ! 546: return txtstart; ! 547: } ! 548: ! 549: void lxcom() ! 550: /* process a "block comment" */ ! 551: { ! 552: register c; ! 553: ! 554: for(;;) ! 555: switch (get(c)) { ! 556: case EOF: ! 557: error('w',"eof in comment"); ! 558: return; ! 559: case '\n': ! 560: curloc.line++; ! 561: // Nline++; ! 562: break; ! 563: case '*': ! 564: if (get(c) == '/') return; ! 565: unget(c); ! 566: break; ! 567: case '/': ! 568: if (get(c) == '*') error('w',"``/*'' in comment"); ! 569: unget(c); ! 570: break; ! 571: } ! 572: } ! 573: ! 574: ! 575: void linecom() ! 576: // process a "line comment" ! 577: { ! 578: register c; ! 579: ! 580: for(;;) ! 581: switch (get(c)) { ! 582: case EOF: ! 583: error('w',"eof in comment"); ! 584: return; ! 585: case '\n': ! 586: curloc.line++; ! 587: // Nline++; ! 588: saved = lxtitle(); ! 589: return; ! 590: } ! 591: } ! 592: ! 593: char eat_whitespace() ! 594: { ! 595: ! 596: for(;;) { ! 597: register c = get(c); ! 598: lx: ! 599: ! 600: switch (c) { ! 601: case EOF: ! 602: error('w',"unexpected comment"); ! 603: return EOF; ! 604: case '/': ! 605: switch (get(c)) { ! 606: case '*': ! 607: lxcom(); ! 608: break; ! 609: case '/': ! 610: linecom(); ! 611: break; ! 612: default: ! 613: unget(c); ! 614: return '/'; ! 615: } ! 616: break; ! 617: case '\n': ! 618: ++curloc.line; ! 619: c = lxtitle(); ! 620: goto lx; ! 621: case ' ': ! 622: case '\t': ! 623: break; ! 624: default: ! 625: return c; ! 626: } ! 627: } ! 628: } ! 629: ! 630: void get_string() ! 631: { ! 632: int lxchar; ! 633: ! 634: for(;;) ! 635: switch (get(lxchar)) { ! 636: case '\\': ! 637: pch('\\'); ! 638: switch (get(lxchar)){ ! 639: case '\n': ! 640: ++curloc.line; ! 641: default: ! 642: pch(lxchar); ! 643: break; ! 644: case 'x': // hexadecimal ! 645: hex_to_oct(); ! 646: break; ! 647: }; ! 648: break; ! 649: case '"': ! 650: { char* p = txtstart; // eat_whitespace() moves txtstart ! 651: if ((lxchar = eat_whitespace()) == '"') { ! 652: // string catenation, break with ! 653: // newline to avoid merging characters ! 654: // (e.g. "\xAB" "C") ! 655: pch('\\'); ! 656: pch('\n'); ! 657: ! 658: continue; // eat '\"' and carry on ! 659: }; ! 660: ! 661: txtstart = p; ! 662: unget(lxchar); ! 663: pch(0); ! 664: return; ! 665: } ! 666: case '\n': ! 667: error("newline in string"); ! 668: pch(0); ! 669: return; ! 670: case EOF: ! 671: error("eof in string"); ! 672: pch(0); ! 673: return; ! 674: default: ! 675: pch(lxchar); ! 676: } ! 677: } ! 678: ! 679: TOK tlex() ! 680: { ! 681: TOK ret; ! 682: Pname n; ! 683: ! 684: // Ntoken++; ! 685: ! 686: for(;;) { ! 687: register lxchar; ! 688: register struct LXDOPE *p; ! 689: ! 690: start_txt(); ! 691: ! 692: if (saved) { ! 693: lxchar = saved; ! 694: saved = 0; ! 695: } ! 696: else ! 697: get(lxchar); ! 698: ! 699: if (lxchar+1 >= CSSZ ) ! 700: error( "illegal input character enountered: %d", lxchar ); ! 701: ! 702: switch( (p=lxcp[lxchar+1])->lxact ){ ! 703: ! 704: case A_1C: // eat up a single character, and return an opcode ! 705: reti(p->lxtok,p->lxtok); ! 706: ! 707: case A_EOF: ! 708: if (br_level || bl_level+lcount) ! 709: error("'%s' missing at end of input",(bl_level+lcount) ? "}" : ")"); ! 710: ! 711: reti(EOFTOK,0); ! 712: ! 713: case A_SHARP: ! 714: // cope with header file not ended with '\n' ! 715: unget('#'); ! 716: saved = lxtitle(); ! 717: continue; ! 718: ! 719: case A_ERR: ! 720: { if (' '<=lxchar && lxchar<='~') // ASCII printable ! 721: error("illegal character '%c' (ignored)",lxchar); ! 722: else ! 723: error("illegal character '0%o' (ignored)",lxchar); ! 724: continue; ! 725: } ! 726: case A_LET: // collect an identifier and check for keyword ! 727: { ! 728: char ll; ! 729: switch (ll = lxchar) { ! 730: // case 'l': ! 731: case 'L': ! 732: switch (get(lxchar)) { ! 733: case '\'': ! 734: error('s',"wide character constant"); ! 735: unget(lxchar); ! 736: continue; ! 737: case '"': ! 738: error('s',"wide character string"); ! 739: unget(lxchar); ! 740: continue; ! 741: } ! 742: unget(lxchar); ! 743: lxchar = ll; ! 744: } ! 745: } ! 746: lxget( lxchar, LEXLET|LEXDIG ); ! 747: ! 748: //error( 'd', "lex: bl_level: %d txtstart %s", bl_level, txtstart); ! 749: // local class ! 750: if (n = ktbl->look(txtstart,0)) { ! 751: TOK x; ! 752: del_txt(); ! 753: switch (x=n->base) { ! 754: case TNAME: ! 755: //('d',"lex tname %n",n); ! 756: if (bl_level > 1) { ! 757: Pname nn = ktbl->look(txtstart,LOCAL); ! 758: if ( nn ) { ! 759: n = nn; ! 760: //error( 'd', "lex: local class instance: %n", nn ); ! 761: } ! 762: } ! 763: retn(TNAME,n); ! 764: case LOC: ! 765: retl(n->syn_class); ! 766: case EXTERN: ! 767: if ((lxchar = eat_whitespace()) == '\"') { ! 768: // linkage directive ! 769: get_string(); ! 770: rets(LINKAGE,txtstart); ! 771: } ! 772: unget(lxchar); ! 773: reti(TYPE,EXTERN); ! 774: case CATCH: ! 775: case TEMPLATE: ! 776: error('s',"%k",n->syn_class); ! 777: continue; ! 778: default: ! 779: #ifdef DK ! 780: if (get(lxchar) == '=') ! 781: switch (x) { ! 782: case OR: reti(ASOP,ASOR); ! 783: case ER: reti(ASOP,ASER); ! 784: case AND: reti(ASOP,ASAND); ! 785: } ! 786: saved = lxchar; ! 787: ! 788: #endif ! 789: reti(n->syn_class,x); ! 790: } ! 791: } ! 792: else ! 793: // local class ! 794: if ( bl_level && ! 795: (n=ktbl->look(txtstart,LOCAL)) ) ! 796: { ! 797: //error( 'd', "lex2: local class instance: %n", n ); ! 798: retn(TNAME,n); ! 799: } ! 800: else ! 801: rets(ID,txtstart); ! 802: ! 803: case A_DIG: ! 804: ! 805: ret = ICON; ! 806: ! 807: if (lxchar=='0') { /* octal or hexadecimal number */ ! 808: pch('0'); ! 809: switch (get(lxchar)) { ! 810: case 'l': ! 811: case 'L': ! 812: pch('L'); ! 813: pch(0); ! 814: rets(ICON,txtstart); ! 815: case 'e': ! 816: case 'E': ! 817: // lxget(lxchar,LEXDIG); ! 818: // goto getfp; ! 819: goto getfp2; ! 820: case 'x': ! 821: case 'X': ! 822: lxget('X',LEXHEX); ! 823: if (txtfree-txtstart<4) // minimum "0Xd\0" ! 824: error("hexadecimal digitX after \"0x\""); ! 825: switch (get(lxchar)) { ! 826: case 'l': ! 827: case 'L': ! 828: txtfree--; ! 829: pch('L'); ! 830: pch(0); ! 831: break; ! 832: default: ! 833: saved = lxchar; ! 834: } ! 835: rets(ICON,txtstart); ! 836: case '8': ! 837: case '9': ! 838: {error("%c used as octal digit",lxchar);} ! 839: case '0': ! 840: case '1': ! 841: case '2': ! 842: case '3': ! 843: case '4': ! 844: case '5': ! 845: case '6': ! 846: case '7': ! 847: pch(lxchar); ! 848: ox: ! 849: switch (get(lxchar)) { ! 850: case '8': ! 851: case '9': ! 852: {error("%c used as octal digit",lxchar);} ! 853: case '0': ! 854: case '1': ! 855: case '2': ! 856: case '3': ! 857: case '4': ! 858: case '5': ! 859: case '6': ! 860: case '7': ! 861: pch(lxchar); ! 862: goto ox; ! 863: case 'l': ! 864: case 'L': ! 865: pch('L'); ! 866: pch(0); ! 867: break; ! 868: default: ! 869: pch(0); ! 870: saved = lxchar; ! 871: } ! 872: rets(ICON,txtstart); ! 873: case '.': ! 874: lxget('.',LEXDIG); ! 875: goto getfp; ! 876: default: ! 877: saved = lxchar; ! 878: reti(ZERO,0); ! 879: } ! 880: } ! 881: else ! 882: lxget(lxchar,LEXDIG); ! 883: ! 884: if (get(lxchar) == '.') { ! 885: txtfree--; ! 886: lxget('.', LEXDIG ); ! 887: getfp: ! 888: ret = FCON; ! 889: get(lxchar); ! 890: }; ! 891: ! 892: switch (lxchar) { ! 893: case 'f': ! 894: case 'F': ! 895: txtfree--; ! 896: pch('F'); ! 897: break; ! 898: case 'e': ! 899: case 'E': ! 900: txtfree--; ! 901: switch (get(lxchar)) { ! 902: case '-': ! 903: case '+': ! 904: pch('e'); ! 905: break; ! 906: default: ! 907: unget(lxchar); ! 908: lxchar = 'e'; ! 909: }; ! 910: getfp2: ! 911: lxget( lxchar, LEXDIG ); ! 912: ret = FCON; ! 913: break; ! 914: case 'u': ! 915: case 'U': ! 916: if (ret==FCON) error("%c suffix for floating constant",lxchar); ! 917: case 'l': ! 918: case 'L': ! 919: txtfree--; ! 920: pch(lxchar); ! 921: switch (get(lxchar)) { // ul, Lu, ets. ! 922: case 'l': ! 923: case 'L': ! 924: case 'u': ! 925: case 'U': ! 926: pch(lxchar); ! 927: break; ! 928: default: ! 929: saved = lxchar; ! 930: } ! 931: break; ! 932: default: ! 933: saved = lxchar; ! 934: }; ! 935: ! 936: pch(0); ! 937: rets(ret,txtstart); ! 938: ! 939: case A_DOT: ! 940: /* if (get(lxchar) == '.') { // look for ellipsis ! 941: if (get(lxchar) != '.') { ! 942: error("token .. ?"); ! 943: saved = lxchar; ! 944: } ! 945: reti(ELLIPSIS,0); ! 946: } ! 947: */ ! 948: switch (get(lxchar)) { ! 949: case '.': // look for ellipsis ! 950: if (get(lxchar) != '.') { ! 951: error("token .. ?"); ! 952: saved = lxchar; ! 953: } ! 954: reti(ELLIPSIS,0); ! 955: case '*': ! 956: reti (REFMUL,DOT); ! 957: } ! 958: ! 959: if (lxmask[lxchar+1] & LEXDIG){// look for floating constant ! 960: unget(lxchar); ! 961: lxget( '.', LEXDIG ); ! 962: goto getfp; ! 963: } ! 964: saved = lxchar; ! 965: reti(DOT,0); ! 966: ! 967: case A_STR: ! 968: /* save string constant in buffer */ ! 969: get_string(); ! 970: rets(STRING,txtstart); ! 971: ! 972: case A_CC: ! 973: /* character constant */ ! 974: rets(CCON,chconst()); ! 975: ! 976: case A_BCD: ! 977: { ! 978: register i; ! 979: int j; ! 980: ! 981: pch('`'); ! 982: ! 983: for (i=0; i<7; ++i) { ! 984: pch(get(j)); ! 985: if (j == '`' ) break; ! 986: } ! 987: pch(0); ! 988: if (6<i) ! 989: error('l',"bcd constant exceeds 6 characters" ); ! 990: rets(CCON,txtstart); ! 991: } ! 992: ! 993: case A_SL: /* / */ ! 994: switch (get(lxchar)) { ! 995: case '*': ! 996: lxcom(); ! 997: break; ! 998: case '/': ! 999: linecom(); ! 1000: break; ! 1001: case '=': ! 1002: reti(ASOP,ASDIV); ! 1003: default: ! 1004: saved = lxchar; ! 1005: reti(DIVOP,DIV); ! 1006: } ! 1007: ! 1008: case A_WS: ! 1009: continue; ! 1010: ! 1011: case A_NL: ! 1012: ++curloc.line; ! 1013: // Nline++; ! 1014: saved = lxtitle(); ! 1015: continue; ! 1016: ! 1017: case A_LC: ! 1018: #ifdef DK ! 1019: alc: ! 1020: #endif ! 1021: if (BLMAX <= bl_level++) { ! 1022: error('l',"blocks too deeply nested"); ! 1023: ext(3); ! 1024: } ! 1025: retl(LC); ! 1026: ! 1027: case A_RC: ! 1028: #ifdef DK ! 1029: arc: ! 1030: #endif ! 1031: if (lcount+bl_level-- <= 0) { ! 1032: error("unexpected '}'"); ! 1033: bl_level = 0; ! 1034: } ! 1035: retl(RC); ! 1036: ! 1037: case A_L: ! 1038: #ifdef DK ! 1039: if (get(lxchar) == ':') // (# is { ! 1040: goto alc; ! 1041: else ! 1042: saved = lxchar; ! 1043: #endif ! 1044: br_level++; ! 1045: reti(LP,0); ! 1046: ! 1047: case A_R: ! 1048: if (br_level-- <= 0) { ! 1049: error("unexpected ')'"); ! 1050: br_level = 0; ! 1051: } ! 1052: reti(RP,0); ! 1053: ! 1054: case A_ASS: ! 1055: switch (get(lxchar)) { ! 1056: case '=': ! 1057: reti(EQUOP,EQ); ! 1058: default: ! 1059: saved = lxchar; ! 1060: reti(ASSIGN,ASSIGN); ! 1061: } ! 1062: ! 1063: case A_COL: ! 1064: switch (get(lxchar)) { ! 1065: case ':': ! 1066: reti(MEM,0); ! 1067: case '=': ! 1068: error("':=' is not a c++ operator"); ! 1069: reti(ASSIGN,ASSIGN); ! 1070: #ifdef DK ! 1071: if (get(lxchar)==')') goto arc; // :) is } ! 1072: unget(lxchar); ! 1073: #endif ! 1074: default: ! 1075: saved = lxchar; ! 1076: reti(COLON,COLON); ! 1077: } ! 1078: case A_NOT: ! 1079: switch (get(lxchar)) { ! 1080: case '=': ! 1081: reti(EQUOP,NE); ! 1082: default: ! 1083: saved = lxchar; ! 1084: reti(NOT,NOT); ! 1085: } ! 1086: case A_GT: ! 1087: switch(get(lxchar)) { ! 1088: case '>': ! 1089: switch (get(lxchar)) { ! 1090: case '=': ! 1091: reti(ASOP,ASRS); ! 1092: break; ! 1093: default: ! 1094: saved = lxchar; ! 1095: reti(SHIFTOP,RS); ! 1096: } ! 1097: case '=': ! 1098: reti(RELOP,GE); ! 1099: default: ! 1100: saved = lxchar; ! 1101: reti(RELOP,GT); ! 1102: } ! 1103: case A_LT: ! 1104: switch (get(lxchar)) { ! 1105: case '<': ! 1106: switch (get(lxchar)) { ! 1107: case '=': ! 1108: reti(ASOP,ASLS); ! 1109: default: ! 1110: saved = lxchar; ! 1111: reti(SHIFTOP,LS); ! 1112: } ! 1113: case '=': ! 1114: reti(RELOP,LE); ! 1115: default: ! 1116: saved = lxchar; ! 1117: reti(RELOP,LT); ! 1118: } ! 1119: case A_AND: ! 1120: switch (get(lxchar)) { ! 1121: case '&': ! 1122: reti(ANDAND,ANDAND); ! 1123: case '=': ! 1124: reti(ASOP,ASAND); ! 1125: default: ! 1126: saved = lxchar; ! 1127: reti(AND,AND); ! 1128: } ! 1129: case A_OR: ! 1130: switch (get(lxchar)) { ! 1131: case '|': ! 1132: reti(OROR,OROR); ! 1133: case '=': ! 1134: reti(ASOP,ASOR); ! 1135: default: ! 1136: saved = lxchar; ! 1137: reti(OR,OR); ! 1138: } ! 1139: case A_ER: ! 1140: switch (get(lxchar)) { ! 1141: case '=': ! 1142: reti(ASOP,ASER); ! 1143: default: ! 1144: saved = lxchar; ! 1145: reti(ER,ER); ! 1146: } ! 1147: case A_PL: ! 1148: switch (get(lxchar)) { ! 1149: case '=': ! 1150: reti(ASOP,ASPLUS); ! 1151: case '+': ! 1152: reti(ICOP,INCR); ! 1153: default: ! 1154: saved = lxchar; ! 1155: reti(PLUS,PLUS); ! 1156: } ! 1157: case A_MIN: ! 1158: switch (get(lxchar)) { ! 1159: case '=': ! 1160: reti(ASOP,ASMINUS); ! 1161: case '-': ! 1162: reti(ICOP,DECR); ! 1163: case '>': ! 1164: if (get(lxchar) == '*') ! 1165: {reti(REFMUL,REF);} ! 1166: else ! 1167: saved = lxchar; ! 1168: reti(REF,REF); ! 1169: default: ! 1170: saved = lxchar; ! 1171: reti(MINUS,MINUS); ! 1172: } ! 1173: case A_MUL: ! 1174: switch (get(lxchar)) { ! 1175: case '=': ! 1176: reti(ASOP,ASMUL); ! 1177: case '/': ! 1178: error('w',"*/ not as end of comment"); ! 1179: default: ! 1180: saved = lxchar; ! 1181: reti(MUL,MUL); ! 1182: } ! 1183: case A_MOD: ! 1184: switch (get(lxchar)) { ! 1185: case '=': ! 1186: reti(ASOP,ASMOD); ! 1187: default: ! 1188: saved = lxchar; ! 1189: reti(DIVOP,MOD); ! 1190: } ! 1191: default: ! 1192: {error('i',"lex act==%d getc()->%d",p,lxchar);} ! 1193: ! 1194: } ! 1195: ! 1196: error('i',"lex, main switch"); ! 1197: } ! 1198: ! 1199: } ! 1200: ! 1201: int lxtitle() ! 1202: /* ! 1203: called after a newline; set linenumber and file name ! 1204: */ ! 1205: { ! 1206: register c; ! 1207: ! 1208: for(;;) ! 1209: switch ( get(c) ) { ! 1210: default: // e.g. not '\n', not '#' ! 1211: return c; ! 1212: case '\n': ! 1213: curloc.line++; ! 1214: // Nline++; ! 1215: ll: ! 1216: break; ! 1217: case '#': /* # lineno "filename" */ ! 1218: { int cl = curloc.line; ! 1219: curloc.line = 0; ! 1220: for(;;) ! 1221: switch (get(c)) { ! 1222: case '"': ! 1223: start_txt(); ! 1224: for(;;) ! 1225: switch (get(c)) { ! 1226: case '"': ! 1227: pch('\0'); ! 1228: ! 1229: while (get(c) != '\n') ; // skip to eol.. ignore anything more ! 1230: ! 1231: if (*txtstart) { // stack file name ! 1232: char* fn; ! 1233: if (curr_file == 0){ ! 1234: if (( fn = file_name[0]) ! 1235: && (strcmp(txtstart,fn)!=0)){ // 1st include ! 1236: if (MAXFILE*4<++Nfile) error('i',"fileN buffer overflow"); ! 1237: if (MAXFILE<++curr_file) error('i',"fileN stack overflow"); ! 1238: file_stack[curr_file] = Nfile; ! 1239: ! 1240: char* p1 = new char[txtfree-txtstart]; ! 1241: (void) strcpy(p1,txtstart); ! 1242: file_name[Nfile] = p1; ! 1243: // Nstr++; ! 1244: } ! 1245: else { //&& line is dummy #line "input.c" ! 1246: // ignore ! 1247: } ! 1248: //&& dead, dead, dead goto push; ! 1249: } ! 1250: else if ( (fn=file_name[file_stack[curr_file]]) ! 1251: && (strcmp(txtstart,fn)==0) ) { ! 1252: //new line, same file: ignore ! 1253: } ! 1254: else if ( (fn=file_name[file_stack[curr_file-1]]) ! 1255: && (strcmp(txtstart,fn)==0) ) { ! 1256: // previous file: pop ! 1257: curr_file--; ! 1258: } ! 1259: else { // new file name: push ! 1260: //&& push: ! 1261: if (MAXFILE*4<Nfile++) error('i',"fileN buffer overflow"); ! 1262: if (MAXFILE<curr_file++) error('i',"fileN stack overflow"); ! 1263: file_stack[curr_file] = Nfile; ! 1264: ! 1265: char* p = new char[txtfree-txtstart]; ! 1266: (void) strcpy(p,txtstart); ! 1267: file_name[Nfile] = p; ! 1268: // Nstr++; ! 1269: } ! 1270: } ! 1271: else { // no name .. back to the original .c file: "" ! 1272: curr_file = 0; ! 1273: } ! 1274: del_txt(); ! 1275: curloc.file = file_stack[curr_file]; ! 1276: goto ll; ! 1277: case '\n': ! 1278: error("unexpected end of line on '# line'"); ! 1279: default: ! 1280: pch(c); ! 1281: } ! 1282: case ' ': ! 1283: break; ! 1284: ! 1285: case '0': ! 1286: case '1': ! 1287: case '2': ! 1288: case '3': ! 1289: case '4': ! 1290: case '5': ! 1291: case '6': ! 1292: case '7': ! 1293: case '8': ! 1294: case '9': ! 1295: curloc.line = curloc.line*10+c-'0'; ! 1296: break; ! 1297: ! 1298: case 'l': // look for "#line ..." and then ignore "line" ! 1299: if (get(c)=='i' && get(c)=='n' && get(c)=='e') break; ! 1300: case '\n': ! 1301: curloc.putline(); ! 1302: goto ll; ! 1303: ! 1304: default: // detect #pragma ! 1305: // pass #rubbish through ! 1306: { char* p = txtstart+1; ! 1307: curloc.line = cl; ! 1308: pch('#'); ! 1309: pch(c); ! 1310: while (get(c) != '\n') pch(c); ! 1311: pch('\0'); ! 1312: if (strncmp(p,"pragma",6)==0) { ! 1313: p += 6; ! 1314: while (*p==' ' || *p=='\t') p++; ! 1315: if (strncmp(p,"linkage",7)==0) { ! 1316: if (bl_level) error("linkage pragma inside block"); ! 1317: p += 7; ! 1318: while (*p==' ' || *p=='\t') p++; ! 1319: set_linkage(p); ! 1320: } ! 1321: else ! 1322: fprintf(out_file,"\n%s\n",txtstart); ! 1323: } ! 1324: else ! 1325: fprintf(out_file,"\n%s\n",txtstart); ! 1326: ! 1327: // fprintf(out_file,"\n%s\n",txtstart); ! 1328: start_txt(); ! 1329: curloc.line++; ! 1330: // Nline++; ! 1331: goto ll; ! 1332: } ! 1333: } ! 1334: } ! 1335: } ! 1336: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.