|
|
1.1 ! root 1: /*ident "@(#)ctrans:src/lex.c 1.3.4.24" */ ! 2: /*************************************************************************** ! 3: ! 4: C++ source for cfront, the C++ compiler front-end ! 5: written in the computer science research center of Bell Labs ! 6: ! 7: Copyright (c) 1984 AT&T, Inc. All Rights Reserved ! 8: THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T, INC. ! 9: ! 10: lex.c: ! 11: lexical analyser based on pcc's and cpre's scanners ! 12: modified to handle classes: ! 13: new keywords: class ! 14: public ! 15: call ! 16: etc. ! 17: names are not entered in the symbol table by lex() ! 18: names can be of arbitrary length ! 19: error() is used to report errors ! 20: {} and () must match ! 21: numeric constants are not converted into internal representation ! 22: but stored as strings ! 23: ! 24: ****************************************************************************/ ! 25: ! 26: #include "cfront.h" ! 27: #include "yystype.h" ! 28: #include "size.h" ! 29: #include "tqueue.h" ! 30: ! 31: # define CCTRANS(x) x ! 32: ! 33: /* lexical actions */ ! 34: ! 35: #define A_ERR 0 /* illegal character */ ! 36: #define A_LET 1 /* saw a letter */ ! 37: #define A_DIG 2 /* saw a digit */ ! 38: #define A_1C 3 /* return a single character */ ! 39: #define A_STR 4 /* string */ ! 40: #define A_CC 5 /* character constant */ ! 41: #define A_BCD 6 /* GCOS BCD constant */ ! 42: #define A_SL 7 /* saw a / */ ! 43: #define A_DOT 8 /* saw a . */ ! 44: #define A_2C 9 /* possible two character symbol */ ! 45: #define A_WS 10 /* whitespace (not \n) */ ! 46: #define A_NL 11 /* \n */ ! 47: #define A_LC 12 /* { */ ! 48: #define A_RC 13 /* } */ ! 49: #define A_L 14 /* ( */ ! 50: #define A_R 15 /* ) */ ! 51: #define A_EOF 16 ! 52: #define A_ASS 17 ! 53: #define A_LT 18 ! 54: #define A_GT 19 /* > */ ! 55: #define A_ER 20 ! 56: #define A_OR 21 ! 57: #define A_AND 22 ! 58: #define A_MOD 23 ! 59: #define A_NOT 24 ! 60: #define A_MIN 25 ! 61: #define A_MUL 26 ! 62: #define A_PL 27 ! 63: #define A_COL 28 /* : */ ! 64: #define A_SHARP 29 /* # */ ! 65: ! 66: /* character classes */ ! 67: ! 68: # define LEXLET 01 ! 69: # define LEXDIG 02 ! 70: /* no LEXOCT because 8 and 9 used to be octal digits */ ! 71: # define LEXHEX 010 ! 72: # define LEXWS 020 ! 73: # define LEXDOT 040 ! 74: ! 75: const FIRSTCHUNK = 8*1024-8; ! 76: const BUFCHUNK = 4*1024-8; ! 77: ! 78: /* text buffer */ ! 79: static char inbuf[FIRSTCHUNK/*TBUFSZ*/]; ! 80: static char* txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1]; ! 81: static char* txtstart = 0; ! 82: static char* txtfree = 0; ! 83: ! 84: static struct buf* bufhead; ! 85: static buf* freebuf; ! 86: //static bufs; ! 87: ! 88: struct buf { ! 89: buf* next; ! 90: char chars[BUFCHUNK]; ! 91: // buf() { next=bufhead; bufhead=this; } ! 92: }; ! 93: ! 94: new_buf(char c) ! 95: { ! 96: //fprintf(stderr,"new_buf %d\n",bufs++); ! 97: buf* pbuf; ! 98: if (freebuf) { ! 99: pbuf = freebuf; ! 100: freebuf = freebuf->next; ! 101: } ! 102: else ! 103: pbuf = new buf; // allocate and register new chunk ! 104: pbuf->next = bufhead; ! 105: bufhead = pbuf; ! 106: ! 107: if (BUFCHUNK < txtmax-txtstart) error('l',"lexical token too long"); ! 108: ! 109: // copy current token: ! 110: char* p = txtstart; ! 111: txtstart = txtfree = &pbuf->chars[0]; ! 112: while (p<txtmax) *txtfree++ = *p++; ! 113: *txtfree++=c; ! 114: txtmax = &pbuf->chars[BUFCHUNK-1]; ! 115: return 0; ! 116: } ! 117: ! 118: ! 119: #define pch(c) ((txtmax<=txtfree)?new_buf(c):(*txtfree++=c)) ! 120: #define start_txt() txtstart = txtfree ! 121: #define del_txt() txtfree = txtstart ! 122: ! 123: static int Nfile;// = 1; ! 124: static char* file_name[MAXFILE*4]; // source file names ! 125: // file_name[0] == src_file_name ! 126: // file_name[0] == 0 means stdin ! 127: static short file_stack[MAXFILE]; // stack of file name indices ! 128: static int tcurr_file; // current index in file_stack ! 129: // that is current #include nest level ! 130: ! 131: Linkage linkage = linkage_default; // linkage is default C++ ! 132: const LINKMAX = 10; ! 133: static Linkage lvec[LINKMAX] = { linkage_default }; ! 134: static int lcount = 0; ! 135: ! 136: void set_linkage(char* p) ! 137: { ! 138: if (p==0 || *p == 0) { // resume previous linkage ! 139: if (lcount>0) linkage = lvec[--lcount]; ! 140: } ! 141: else { ! 142: if (LINKMAX<=++lcount) { ! 143: error('l',"linkage directive nested too deep"); ! 144: --lcount; ! 145: } else if (strcmp(p,"C")==0) ! 146: lvec[lcount] = linkage = linkage_C; ! 147: else if (strcmp(p,"C++")==0) ! 148: lvec[lcount] = linkage = linkage_Cplusplus; ! 149: else { ! 150: error("%s linkage",p); ! 151: --lcount; ! 152: } ! 153: } ! 154: } ! 155: ! 156: static struct loc tloc; ! 157: FILE * out_file = stdout; ! 158: FILE * in_file = stdin; ! 159: Ptable ktbl; ! 160: static int p_level = 0; /* number of unmatched ``(''s */ ! 161: static int b_level = 0; /* number of unmatched ``{''s */ ! 162: ! 163: # ifdef ibm ! 164: ! 165: # define CSMASK 0377 ! 166: # define CSSZ 256 ! 167: ! 168: # else ! 169: ! 170: # define CSMASK 0177 ! 171: # define CSSZ 128 ! 172: ! 173: # endif ! 174: ! 175: static short lxmask[CSSZ+1]; ! 176: ! 177: int saved = 0; /* putback character, avoid ungetchar */ ! 178: static int lxtitle(); ! 179: ! 180: overload rt; ! 181: inline YYSTYPE rt(char* x) { YYSTYPE y; y.s = x; return y; } ! 182: inline YYSTYPE rt(int x) { YYSTYPE y; y.t = x; return y; } ! 183: inline YYSTYPE rt(loc x) { YYSTYPE y; y.l = x; return y; } ! 184: inline YYSTYPE rt(void* x) { YYSTYPE y; y.pn = Pname(x); return y; } ! 185: ! 186: #define get(c) (c=getc(in_file)) ! 187: #define unget(c) ungetc(c,in_file) ! 188: ! 189: #define reti(a,b) { addtok(a, rt(b), tloc); return a; } ! 190: #define retn(a,b) { addtok(a, rt((Pnode)b), tloc); return a; } ! 191: #define rets(a,b) { addtok(a, rt(b), tloc); return a; } ! 192: #define retl(a) { addtok(a, rt(tloc), tloc); return a; } ! 193: ! 194: void ktbl_init() ! 195: /* ! 196: enter keywords into keyword table for use by lex() ! 197: and into keyword representation table used for output ! 198: */ ! 199: { ! 200: ktbl = new table(KTBLSIZE,0,0); ! 201: ! 202: new_key("asm",ASM,0); ! 203: new_key("auto",AUTO,TYPE); ! 204: new_key("break",LOC,BREAK); ! 205: new_key("case",LOC,CASE); ! 206: new_key("continue",LOC,CONTINUE); ! 207: new_key("char",CHAR,TYPE); ! 208: new_key("do",LOC,DO); ! 209: new_key("double",DOUBLE,TYPE); ! 210: new_key("default",LOC,DEFAULT); ! 211: new_key("enum",ENUM,0); ! 212: new_key("else",LOC,ELSE); ! 213: new_key("extern",EXTERN,TYPE); ! 214: new_key("float",FLOAT,TYPE); ! 215: new_key("for",LOC,FOR); ! 216: // new_key("fortran",FORTRAN,0); ! 217: new_key("goto",LOC,GOTO); ! 218: new_key("catch",CATCH,CATCH); ! 219: new_key("try",TRY,TRY); ! 220: new_key("if",LOC,IF); ! 221: new_key("int",INT,TYPE); ! 222: new_key("long",LONG,TYPE); ! 223: new_key("return",LOC,RETURN); ! 224: new_key("register",REGISTER,TYPE); ! 225: new_key("static",STATIC,TYPE); ! 226: new_key("struct",STRUCT,AGGR); ! 227: new_key("sizeof",SIZEOF,0); ! 228: new_key("short",SHORT,TYPE); ! 229: new_key("switch",LOC,SWITCH); ! 230: new_key("template",TEMPLATE,TEMPLATE); ! 231: new_key("typedef",TYPEDEF,TYPE); ! 232: new_key("unsigned",UNSIGNED,TYPE); ! 233: new_key("union",UNION,AGGR); ! 234: new_key("void",VOID,TYPE); ! 235: new_key("while",LOC,WHILE); ! 236: ! 237: new_key("class",CLASS,AGGR); ! 238: new_key("const",CONST,TYPE); ! 239: new_key("delete",LOC,DELETE); ! 240: new_key("friend",FRIEND,TYPE); ! 241: new_key("inline",INLINE,TYPE); ! 242: new_key("new",NEW,0); ! 243: new_key("operator",OPERATOR,0); ! 244: new_key("overload",OVERLOAD,TYPE); ! 245: new_key("private",PRIVATE,PR); ! 246: new_key("protected",PROTECTED,PR); ! 247: new_key("public",PUBLIC,PR); ! 248: new_key("signed",SIGNED,TYPE); ! 249: new_key("this",THIS,0); ! 250: new_key("virtual",VIRTUAL,TYPE); ! 251: new_key("volatile",VOLATILE,TYPE); ! 252: } ! 253: ! 254: loc last_line; ! 255: loc noloc = { 0, 0 }; ! 256: ! 257: void loc::putline() ! 258: { ! 259: if (file==0 && line==0) return; ! 260: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4 ! 261: // if (0<=file && file<MAXFILE) { ! 262: if ( 0<=file && file <= Nfile ) { ! 263: char* f = file_name[file]; ! 264: if (f==0) f = src_file_name; ! 265: fprintf(out_file,line_format,line,f); ! 266: last_line = *this; ! 267: } ! 268: } ! 269: ! 270: void loc::put(FILE* p) ! 271: { ! 272: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4 ! 273: // if (0<=file && file<MAXFILE) { ! 274: if ( 0<=file && file <= Nfile ) { ! 275: char* f = file_name[file]; ! 276: if (f==0) f = src_file_name; ! 277: fprintf(p,"\"%s\", line %d: ",f,line); ! 278: } ! 279: } ! 280: ! 281: void lxenter(register char* s, short m) ! 282: /* enter a mask into lxmask */ ! 283: { ! 284: register c; ! 285: ! 286: while( c= *s++ ) lxmask[c+1] |= m; ! 287: ! 288: } ! 289: ! 290: ! 291: void lxget(register c, register m) ! 292: /* ! 293: put 'c' back then scan for members of character class 'm' ! 294: terminate the string read with \0 ! 295: txtfree points to the character position after that \0 ! 296: */ ! 297: { ! 298: pch(c); ! 299: while ( (get(c), lxmask[c+1]&m) ) pch(c); ! 300: unget(c); ! 301: pch('\0'); ! 302: } ! 303: ! 304: struct LXDOPE { ! 305: short lxch; /* the character */ ! 306: short lxact; /* the action to be performed */ ! 307: TOK lxtok; /* the token number to be returned */ ! 308: } lxdope[] = { ! 309: #ifdef apollo ! 310: '@', A_ERR, 0, /* illegal characters go here... */ ! 311: #else ! 312: '$', A_ERR, 0, /* illegal characters go here... */ ! 313: #endif ! 314: '_', A_LET, 0, /* letters point here */ ! 315: '0', A_DIG, 0, /* digits point here */ ! 316: ' ', A_WS, 0, /* whitespace goes here */ ! 317: '\n', A_NL, 0, ! 318: '"', A_STR, 0, /* character string */ ! 319: '\'', A_CC, 0, /* ASCII character constant */ ! 320: '`', A_BCD, 0, /* 'foreign' character constant, e.g. BCD */ ! 321: '(', A_L, LP, ! 322: ')', A_R, RP, ! 323: '{', A_LC, LC, ! 324: '}', A_RC, RC, ! 325: '[', A_1C, LB, ! 326: ']', A_1C, RB, ! 327: '*', A_MUL, MUL, ! 328: '?', A_1C, QUEST, ! 329: ':', A_COL, COLON, ! 330: '+', A_PL, PLUS, ! 331: '-', A_MIN, MINUS, ! 332: '/', A_SL, DIV, ! 333: '%', A_MOD, MOD, ! 334: '&', A_AND, AND, ! 335: '|', A_OR, OR, ! 336: '^', A_ER, ER, ! 337: '!', A_NOT, NOT, ! 338: '~', A_1C, COMPL, ! 339: ',', A_1C, CM, ! 340: ';', A_1C, SM, ! 341: '.', A_DOT, DOT, ! 342: '<', A_LT, LT, ! 343: '>', A_GT, GT, ! 344: '=', A_ASS, ASSIGN, ! 345: '#', A_SHARP, 0, ! 346: EOF, A_EOF, EOFTOK ! 347: }; ! 348: /* note: EOF is used as sentinel, so must be <=0 and last entry in table */ ! 349: ! 350: static struct LXDOPE *lxcp[CSSZ+1]; ! 351: ! 352: void lex_init() ! 353: { ! 354: register struct LXDOPE *p; ! 355: register i; ! 356: register char *cp; ! 357: /* set up character classes */ ! 358: ! 359: /* first clear lexmask */ ! 360: for(i=0; i<=CSSZ; i++) lxmask[i] = 0; ! 361: ! 362: #ifdef apollo ! 363: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$", LEXLET ); ! 364: #else ! 365: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", LEXLET ); ! 366: #endif ! 367: lxenter( "0123456789", LEXDIG ); ! 368: lxenter( "0123456789abcdefABCDEF", LEXHEX ); ! 369: /* \013 should become \v someday; \013 is OK for ASCII and EBCDIC */ ! 370: lxenter( " \t\r\b\f\013", LEXWS ); ! 371: lxmask['.'+1] |= LEXDOT; ! 372: ! 373: /* make lxcp point to appropriate lxdope entry for each character */ ! 374: ! 375: /* initialize error entries */ ! 376: ! 377: for( i= 0; i<=CSSZ; ++i ) lxcp[i] = lxdope; ! 378: ! 379: /* make unique entries */ ! 380: ! 381: for( p=lxdope; ; ++p ) { ! 382: lxcp[p->lxch+1] = p; ! 383: if( p->lxch < 0 ) break; ! 384: } ! 385: ! 386: /* handle letters, digits, and whitespace */ ! 387: /* by convention, first, second, and third places */ ! 388: ! 389: cp = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; ! 390: while( *cp ) lxcp[*cp++ + 1] = &lxdope[1]; ! 391: cp = "123456789"; ! 392: while( *cp ) lxcp[*cp++ + 1] = &lxdope[2]; ! 393: cp = "\t\b\r\f\013"; ! 394: while( *cp ) lxcp[*cp++ + 1] = &lxdope[3]; ! 395: ! 396: file_name[0] = src_file_name; ! 397: // set both curloc and tloc so curloc is valid at program startup ! 398: // curloc.file = tloc.file = 0; ! 399: curloc.line = tloc.line = 1; ! 400: ! 401: ktbl_init(); ! 402: lex_clear(); ! 403: saved = lxtitle(); ! 404: } ! 405: ! 406: void lex_clear() ! 407: { ! 408: // delete extra buffers: ! 409: buf* p = bufhead; ! 410: bufhead = 0; ! 411: //if (p) { ! 412: //fprintf(stderr,"lex_clear\n"); ! 413: //bufs=0; ! 414: //} ! 415: while (p) { ! 416: buf* pp = p; ! 417: p = p->next; ! 418: pp->next = freebuf; ! 419: freebuf = pp; ! 420: } ! 421: ! 422: // re-set to static buffer: ! 423: txtstart = txtfree = inbuf; ! 424: txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1]; ! 425: } ! 426: ! 427: int int_val(char hex) ! 428: { ! 429: switch (hex) { ! 430: case '0': case '1': case '2': case '3': case '4': ! 431: case '5': case '6': case '7': case '8': case '9': ! 432: return hex-'0'; ! 433: case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': ! 434: return hex-'a'+10; ! 435: case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': ! 436: return hex-'A'+10; ! 437: } ! 438: } ! 439: ! 440: void hex_to_oct() ! 441: /* ! 442: \x has been seen on input (in char const or string) and \ printed ! 443: read the following hexadecimal integer and replace it with an octal ! 444: */ ! 445: { ! 446: int i = 0; ! 447: int c; ! 448: get(c); ! 449: if (lxmask[c+1] & LEXHEX) { ! 450: i = int_val(c); ! 451: get(c); // try for two ! 452: if (lxmask[c+1] & LEXHEX) { ! 453: i = (i<<4) + int_val(c); ! 454: get(c); // try for three ! 455: if (lxmask[c+1] & LEXHEX) ! 456: i = (i<<4) + int_val(c); ! 457: else ! 458: unget(c); ! 459: } ! 460: else ! 461: unget(c); ! 462: } ! 463: else { ! 464: error("hexadecimal digitE after \\x"); ! 465: unget(c); ! 466: } ! 467: ! 468: // if (0377 < i) error('l',"hexadecimal constant too large"); ! 469: i &= 0377; ! 470: ! 471: pch(('0'+(i>>6))); ! 472: pch(('0'+((i&070)>>3))); ! 473: pch(('0'+(i&7))); ! 474: } ! 475: ! 476: ! 477: char * chconst() ! 478: /* ! 479: read a character constant into inbuf ! 480: */ ! 481: { ! 482: register c; ! 483: int nch = 0; ! 484: ! 485: pch('\''); ! 486: ! 487: for(;;) { ! 488: char* p; ! 489: char cc = 0; ! 490: ! 491: switch (get(c)) { ! 492: case '\'': ! 493: goto ex; ! 494: case EOF: ! 495: error("eof in char constant"); ! 496: goto ex; ! 497: case '\n': ! 498: error("newline in char constant"); ! 499: goto ex; ! 500: case '\\': ! 501: if (SZ_INT == nch++) error('l',"char constant too long"); ! 502: pch(c); ! 503: switch (get(c)){ ! 504: case '\n': ! 505: ++tloc.line; ! 506: default: ! 507: pch(c); ! 508: break; ! 509: case '4': case '5': case '6': case '7': // octal ! 510: p = txtfree; ! 511: cc = c-4; ! 512: case '0': case '1': case '2': case '3': ! 513: pch(c); ! 514: get(c); /* try for 2 */ ! 515: if( lxmask[c+1] & LEXDIG && c<'8'){ ! 516: pch(c); ! 517: get(c); /* try for 3 */ ! 518: if (lxmask[c+1] & LEXDIG && c<'8') { ! 519: if (cc) *p = cc; // zap high bit ! 520: pch(c); ! 521: } ! 522: else ! 523: unget(c); ! 524: } ! 525: else ! 526: unget(c); ! 527: break; ! 528: case 'x': // hexadecimal ! 529: hex_to_oct(); ! 530: break; ! 531: }; ! 532: break; ! 533: default: ! 534: if (SZ_INT == nch++) error('l',"char constant too long"); ! 535: pch(c); ! 536: } ! 537: } ! 538: ex: ! 539: if(nch==0) ! 540: error("empty char constant"); ! 541: pch('\''); ! 542: pch('\0'); ! 543: return txtstart; ! 544: } ! 545: ! 546: void lxcom() ! 547: /* process a "block comment" */ ! 548: { ! 549: register c; ! 550: ! 551: for(;;) ! 552: switch (get(c)) { ! 553: case EOF: ! 554: error('w',"eof in comment"); ! 555: return; ! 556: case '\n': ! 557: tloc.line++; ! 558: // Nline++; ! 559: break; ! 560: case '*': ! 561: if (get(c) == '/') return; ! 562: unget(c); ! 563: break; ! 564: case '/': ! 565: if (get(c) == '*') error('w',"``/*'' in comment"); ! 566: unget(c); ! 567: break; ! 568: } ! 569: } ! 570: ! 571: ! 572: void linecom() ! 573: // process a "line comment" ! 574: { ! 575: register c; ! 576: ! 577: get(c); ! 578: #ifdef DBG ! 579: if ( c=='@' && get(c)=='!' ) { ! 580: while ( get(c) != '\n' && c != EOF ) pch(c); ! 581: pch('\0'); ! 582: process_debug_flags(txtstart); ! 583: del_txt(); ! 584: } ! 585: #endif ! 586: for(;;get(c)) ! 587: switch (c) { ! 588: case EOF: ! 589: error('w',"eof in comment"); ! 590: return; ! 591: case '\n': ! 592: tloc.line++; ! 593: // Nline++; ! 594: saved = lxtitle(); ! 595: return; ! 596: } ! 597: } ! 598: ! 599: char eat_whitespace() ! 600: { ! 601: ! 602: for(;;) { ! 603: register c = get(c); ! 604: lx: ! 605: ! 606: switch (c) { ! 607: case EOF: ! 608: error('w',"unexpected comment"); ! 609: return EOF; ! 610: case '/': ! 611: switch (get(c)) { ! 612: case '*': ! 613: lxcom(); ! 614: break; ! 615: case '/': ! 616: linecom(); ! 617: break; ! 618: default: ! 619: unget(c); ! 620: return '/'; ! 621: } ! 622: break; ! 623: case '\n': ! 624: ++tloc.line; ! 625: c = lxtitle(); ! 626: goto lx; ! 627: case ' ': ! 628: case '\t': ! 629: break; ! 630: default: ! 631: return c; ! 632: } ! 633: } ! 634: } ! 635: ! 636: void get_string() ! 637: { ! 638: int lxchar; ! 639: ! 640: for(;;) ! 641: switch (get(lxchar)) { ! 642: case '\\': ! 643: pch('\\'); ! 644: switch (get(lxchar)){ ! 645: case '\n': ! 646: ++tloc.line; ! 647: default: ! 648: pch(lxchar); ! 649: break; ! 650: case 'x': // hexadecimal ! 651: hex_to_oct(); ! 652: break; ! 653: }; ! 654: break; ! 655: case '"': ! 656: { char* p = txtstart; // eat_whitespace() moves txtstart ! 657: if ((lxchar = eat_whitespace()) == '"') { ! 658: // string catenation, break with ! 659: // newline to avoid merging characters ! 660: // (e.g. "\xAB" "C") ! 661: pch('\\'); ! 662: pch('\n'); ! 663: ! 664: continue; // eat '\"' and carry on ! 665: }; ! 666: ! 667: txtstart = p; ! 668: unget(lxchar); ! 669: pch(0); ! 670: return; ! 671: } ! 672: case '\n': ! 673: error("newline in string"); ! 674: pch(0); ! 675: return; ! 676: case EOF: ! 677: error("eof in string"); ! 678: pch(0); ! 679: return; ! 680: default: ! 681: pch(lxchar); ! 682: } ! 683: } ! 684: ! 685: TOK tlex() ! 686: { ! 687: TOK ret; ! 688: Pname n; ! 689: ! 690: // Ntoken++; ! 691: ! 692: for(;;) { ! 693: register lxchar; ! 694: register struct LXDOPE *p; ! 695: ! 696: start_txt(); ! 697: ! 698: if (saved) { ! 699: lxchar = saved; ! 700: saved = 0; ! 701: } ! 702: else ! 703: get(lxchar); ! 704: ! 705: if (lxchar+1 >= CSSZ ) ! 706: error( "illegal input character encountered: %d", lxchar ); ! 707: ! 708: switch( (p=lxcp[lxchar+1])->lxact ){ ! 709: ! 710: case A_1C: // eat up a single character, and return an opcode ! 711: reti(p->lxtok,p->lxtok); ! 712: ! 713: case A_EOF: ! 714: if (p_level || b_level+lcount) ! 715: error("'%s' missing at end of input",(b_level+lcount) ? "}" : ")"); ! 716: ! 717: reti(EOFTOK,0); ! 718: ! 719: case A_SHARP: ! 720: // cope with header file not ended with '\n' ! 721: unget('#'); ! 722: saved = lxtitle(); ! 723: continue; ! 724: ! 725: case A_ERR: ! 726: { if (' '<=lxchar && lxchar<='~') // ASCII printable ! 727: error("illegal character '%c' (ignored)",lxchar); ! 728: else ! 729: error("illegal character '0%o' (ignored)",lxchar); ! 730: continue; ! 731: } ! 732: case A_LET: // collect an identifier and check for keyword ! 733: { ! 734: char ll; ! 735: switch (ll = lxchar) { ! 736: // case 'l': ! 737: case 'L': ! 738: switch (get(lxchar)) { ! 739: case '\'': ! 740: error('s',"wide character constant"); ! 741: unget(lxchar); ! 742: continue; ! 743: case '"': ! 744: error('s',"wide character string"); ! 745: unget(lxchar); ! 746: continue; ! 747: } ! 748: unget(lxchar); ! 749: lxchar = ll; ! 750: } ! 751: } ! 752: lxget( lxchar, LEXLET|LEXDIG ); ! 753: ! 754: //error( 'd', "lex: b_level: %d txtstart %s", b_level, txtstart); ! 755: // local class ! 756: if (n = ktbl->look(txtstart,0)) { ! 757: TOK x; ! 758: del_txt(); ! 759: switch (x=n->base) { ! 760: case TNAME: ! 761: rets(ID,n->string); ! 762: //error('d',"lex tname %n",n); ! 763: //if (b_level > 1) { ! 764: // Pname nn = ktbl->look(txtstart,LOCAL); ! 765: // if ( nn ) { ! 766: // n = nn; ! 767: //error( 'd', "lex: local class instance: %n", nn ); ! 768: // } ! 769: //} ! 770: // retn(TNAME,n); ! 771: case LOC: ! 772: retl(n->syn_class); ! 773: case EXTERN: ! 774: if ((lxchar = eat_whitespace()) == '\"') { ! 775: // linkage directive ! 776: get_string(); ! 777: rets(LINKAGE,txtstart); ! 778: } ! 779: unget(lxchar); ! 780: reti(TYPE,EXTERN); ! 781: case CATCH: ! 782: case TEMPLATE: ! 783: error('s',"%k",n->syn_class); ! 784: continue; ! 785: case TRY: ! 786: { ! 787: static int warn_try; ! 788: if (!warn_try) { ! 789: Pname n = ktbl->look("try",0); ! 790: n->n_key = DEFAULT; ! 791: error('w',&tloc,"%k is a future reserved keyword",n->syn_class); ! 792: warn_try++; ! 793: } ! 794: rets(ID,n->string); ! 795: } ! 796: default: ! 797: reti(n->syn_class,x); ! 798: } ! 799: } ! 800: rets(ID,txtstart); ! 801: //else ! 802: // local class ! 803: //if ( b_level && (n=ktbl->look(txtstart,LOCAL)) ) { ! 804: //error( 'd', "lex2: local class instance: %n", n ); ! 805: // retn(TNAME,n); ! 806: //} else ! 807: // rets(ID,txtstart); ! 808: ! 809: case A_DIG: ! 810: ! 811: ret = ICON; ! 812: ! 813: if (lxchar=='0') { ! 814: int pkchar; ! 815: get(pkchar); ! 816: if(pkchar=='x' || pkchar=='X') { // hex ! 817: pch(lxchar); ! 818: lxget(pkchar,LEXHEX); ! 819: txtfree--; ! 820: if (txtfree-txtstart<3) // minimum "0Xd\0" ! 821: error("hex digitX after \"0x\""); ! 822: get(lxchar); ! 823: goto getsuffix; ! 824: } ! 825: unget(pkchar); ! 826: } ! 827: ! 828: lxget(lxchar,LEXDIG); ! 829: txtfree--; ! 830: ! 831: if (get(lxchar) == '.') { ! 832: getfp: ! 833: lxget('.', LEXDIG ); ! 834: txtfree--; ! 835: ret = FCON; ! 836: get(lxchar); ! 837: }; ! 838: ! 839: if (lxchar=='e' || lxchar=='E') { ! 840: pch(lxchar); ! 841: get(lxchar); ! 842: if(lxchar=='-' || lxchar=='+') { ! 843: pch(lxchar); ! 844: get(lxchar); ! 845: } ! 846: if (lxmask[lxchar+1] & LEXDIG) { ! 847: lxget( lxchar, LEXDIG ); ! 848: txtfree--; ! 849: get(lxchar); ! 850: } ! 851: else ! 852: error("missing exponent digits?"); ! 853: ret = FCON; ! 854: }; ! 855: ! 856: if(*txtstart=='0' && ret==ICON) { ! 857: char *bch = txtstart; ! 858: while (++bch <= txtfree) { ! 859: if(*bch=='8' || *bch=='9') ! 860: error("%c used as octal digit",*bch); ! 861: } ! 862: } ! 863: ! 864: getsuffix: ! 865: switch (lxchar) { ! 866: case 'f': ! 867: case 'F': ! 868: if (ret==ICON) ! 869: error("%c suffix for integer constant",lxchar); ! 870: else ! 871: pch(lxchar); ! 872: break; ! 873: case 'u': ! 874: case 'U': ! 875: if (ret==FCON) { ! 876: error("%c suffix for floating constant",lxchar); ! 877: break; ! 878: } ! 879: pch(lxchar); ! 880: switch(get(lxchar)) { ! 881: case 'l': ! 882: case 'L': ! 883: pch(lxchar); ! 884: break; ! 885: default: ! 886: saved=lxchar; ! 887: break; ! 888: } ! 889: break; ! 890: case 'l': ! 891: case 'L': ! 892: pch(lxchar); ! 893: if (ret==FCON) { ! 894: break; ! 895: } ! 896: switch(get(lxchar)) { ! 897: case 'u': ! 898: case 'U': ! 899: pch(lxchar); ! 900: break; ! 901: default: ! 902: saved=lxchar; ! 903: break; ! 904: } ! 905: break; ! 906: default: ! 907: saved = lxchar; ! 908: break; ! 909: }; ! 910: ! 911: if(*txtstart=='0' && txtfree-txtstart==1) ! 912: reti(ZERO,0); // plain zero ! 913: ! 914: pch(0); ! 915: rets(ret,txtstart); ! 916: ! 917: ! 918: case A_DOT: ! 919: switch (get(lxchar)) { ! 920: case '.': // look for ellipsis ! 921: if (get(lxchar) != '.') { ! 922: error("token .. ?"); ! 923: saved = lxchar; ! 924: } ! 925: reti(ELLIPSIS,0); ! 926: case '*': ! 927: reti (REFMUL,DOT); ! 928: } ! 929: ! 930: if (lxmask[lxchar+1] & LEXDIG){// look for floating constant ! 931: unget(lxchar); ! 932: goto getfp; ! 933: } ! 934: saved = lxchar; ! 935: reti(DOT,0); ! 936: ! 937: case A_STR: ! 938: /* save string constant in buffer */ ! 939: get_string(); ! 940: rets(STRING,txtstart); ! 941: ! 942: case A_CC: ! 943: /* character constant */ ! 944: rets(CCON,chconst()); ! 945: ! 946: case A_BCD: ! 947: { ! 948: register i; ! 949: int j; ! 950: ! 951: pch('`'); ! 952: ! 953: for (i=0; i<7; ++i) { ! 954: pch(get(j)); ! 955: if (j == '`' ) break; ! 956: } ! 957: pch(0); ! 958: if (6<i) ! 959: error('l',"bcd constant exceeds 6 characters" ); ! 960: rets(CCON,txtstart); ! 961: } ! 962: ! 963: case A_SL: /* / */ ! 964: switch (get(lxchar)) { ! 965: case '*': ! 966: lxcom(); ! 967: break; ! 968: case '/': ! 969: linecom(); ! 970: break; ! 971: case '=': ! 972: reti(ASOP,ASDIV); ! 973: default: ! 974: saved = lxchar; ! 975: reti(DIVOP,DIV); ! 976: } ! 977: ! 978: case A_WS: ! 979: continue; ! 980: ! 981: case A_NL: ! 982: ++tloc.line; ! 983: // Nline++; ! 984: saved = lxtitle(); ! 985: continue; ! 986: ! 987: case A_LC: ! 988: if (BLMAX <= b_level++) { ! 989: error('l',"blocks too deeply nested"); ! 990: ext(3); ! 991: } ! 992: retl(LC); ! 993: ! 994: case A_RC: ! 995: if (lcount+b_level-- <= 0) { ! 996: error("unexpected '}'"); ! 997: b_level = 0; ! 998: } ! 999: retl(RC); ! 1000: ! 1001: case A_L: ! 1002: p_level++; ! 1003: reti(LP,0); ! 1004: ! 1005: case A_R: ! 1006: if (p_level-- <= 0) { ! 1007: error("unexpected ')'"); ! 1008: p_level = 0; ! 1009: } ! 1010: reti(RP,0); ! 1011: ! 1012: case A_ASS: ! 1013: switch (get(lxchar)) { ! 1014: case '=': ! 1015: reti(EQUOP,EQ); ! 1016: default: ! 1017: saved = lxchar; ! 1018: reti(ASSIGN,ASSIGN); ! 1019: } ! 1020: ! 1021: case A_COL: ! 1022: switch (get(lxchar)) { ! 1023: case ':': ! 1024: reti(MEM,0); ! 1025: case '=': ! 1026: error("':=' is not a c++ operator"); ! 1027: reti(ASSIGN,ASSIGN); ! 1028: default: ! 1029: saved = lxchar; ! 1030: reti(COLON,COLON); ! 1031: } ! 1032: case A_NOT: ! 1033: switch (get(lxchar)) { ! 1034: case '=': ! 1035: reti(EQUOP,NE); ! 1036: default: ! 1037: saved = lxchar; ! 1038: reti(NOT,NOT); ! 1039: } ! 1040: case A_GT: ! 1041: switch(get(lxchar)) { ! 1042: case '>': ! 1043: switch (get(lxchar)) { ! 1044: case '=': ! 1045: reti(ASOP,ASRS); ! 1046: break; ! 1047: default: ! 1048: saved = lxchar; ! 1049: reti(SHIFTOP,RS); ! 1050: } ! 1051: case '=': ! 1052: reti(RELOP,GE); ! 1053: default: ! 1054: saved = lxchar; ! 1055: reti(RELOP,GT); ! 1056: } ! 1057: case A_LT: ! 1058: switch (get(lxchar)) { ! 1059: case '<': ! 1060: switch (get(lxchar)) { ! 1061: case '=': ! 1062: reti(ASOP,ASLS); ! 1063: default: ! 1064: saved = lxchar; ! 1065: reti(SHIFTOP,LS); ! 1066: } ! 1067: case '=': ! 1068: reti(RELOP,LE); ! 1069: default: ! 1070: saved = lxchar; ! 1071: reti(RELOP,LT); ! 1072: } ! 1073: case A_AND: ! 1074: switch (get(lxchar)) { ! 1075: case '&': ! 1076: reti(ANDAND,ANDAND); ! 1077: case '=': ! 1078: reti(ASOP,ASAND); ! 1079: default: ! 1080: saved = lxchar; ! 1081: reti(AND,AND); ! 1082: } ! 1083: case A_OR: ! 1084: switch (get(lxchar)) { ! 1085: case '|': ! 1086: reti(OROR,OROR); ! 1087: case '=': ! 1088: reti(ASOP,ASOR); ! 1089: default: ! 1090: saved = lxchar; ! 1091: reti(OR,OR); ! 1092: } ! 1093: case A_ER: ! 1094: switch (get(lxchar)) { ! 1095: case '=': ! 1096: reti(ASOP,ASER); ! 1097: default: ! 1098: saved = lxchar; ! 1099: reti(ER,ER); ! 1100: } ! 1101: case A_PL: ! 1102: switch (get(lxchar)) { ! 1103: case '=': ! 1104: reti(ASOP,ASPLUS); ! 1105: case '+': ! 1106: reti(ICOP,INCR); ! 1107: default: ! 1108: saved = lxchar; ! 1109: reti(PLUS,PLUS); ! 1110: } ! 1111: case A_MIN: ! 1112: switch (get(lxchar)) { ! 1113: case '=': ! 1114: reti(ASOP,ASMINUS); ! 1115: case '-': ! 1116: reti(ICOP,DECR); ! 1117: case '>': ! 1118: if (get(lxchar) == '*') ! 1119: {reti(REFMUL,REF);} ! 1120: else ! 1121: saved = lxchar; ! 1122: reti(REF,REF); ! 1123: default: ! 1124: saved = lxchar; ! 1125: reti(MINUS,MINUS); ! 1126: } ! 1127: case A_MUL: ! 1128: switch (get(lxchar)) { ! 1129: case '=': ! 1130: reti(ASOP,ASMUL); ! 1131: case '/': ! 1132: error('w',"*/ not as end of comment"); ! 1133: default: ! 1134: saved = lxchar; ! 1135: reti(MUL,MUL); ! 1136: } ! 1137: case A_MOD: ! 1138: switch (get(lxchar)) { ! 1139: case '=': ! 1140: reti(ASOP,ASMOD); ! 1141: default: ! 1142: saved = lxchar; ! 1143: reti(DIVOP,MOD); ! 1144: } ! 1145: default: ! 1146: {error('i',"lex act==%d getc()->%d",p,lxchar);} ! 1147: ! 1148: } ! 1149: ! 1150: error('i',"lex, main switch"); ! 1151: } ! 1152: ! 1153: } ! 1154: ! 1155: int lxtitle() ! 1156: /* ! 1157: called after a newline; set linenumber and file name ! 1158: */ ! 1159: { ! 1160: register c; ! 1161: ! 1162: for(;;) ! 1163: switch ( get(c) ) { ! 1164: default: // e.g. not '\n', not '#' ! 1165: return c; ! 1166: case '\n': ! 1167: tloc.line++; ! 1168: // Nline++; ! 1169: ll: ! 1170: break; ! 1171: case '#': /* # lineno "filename" */ ! 1172: { int cl = tloc.line; ! 1173: tloc.line = 0; ! 1174: for(;;) ! 1175: switch (get(c)) { ! 1176: case '"': ! 1177: start_txt(); ! 1178: for(;;) ! 1179: switch (get(c)) { ! 1180: case '"': ! 1181: pch('\0'); ! 1182: ! 1183: while (get(c) != '\n') ; // skip to eol.. ignore anything more ! 1184: ! 1185: if (*txtstart) { // stack file name ! 1186: char* fn; ! 1187: if (tcurr_file == 0){ ! 1188: if (( fn = file_name[0]) ! 1189: && (strcmp(txtstart,fn)!=0)){ // 1st include ! 1190: if (MAXFILE*4<++Nfile) error('i',"fileN buffer overflow"); ! 1191: if (MAXFILE<++tcurr_file) error('i',"fileN stack overflow"); ! 1192: file_stack[tcurr_file] = Nfile; ! 1193: ! 1194: char* p1 = new char[txtfree-txtstart]; ! 1195: (void) strcpy(p1,txtstart); ! 1196: file_name[Nfile] = p1; ! 1197: // Nstr++; ! 1198: } ! 1199: else { //&& line is dummy #line "input.c" ! 1200: // ignore ! 1201: } ! 1202: //&& dead, dead, dead goto push; ! 1203: } ! 1204: else if ( (fn=file_name[file_stack[tcurr_file]]) ! 1205: && (strcmp(txtstart,fn)==0) ) { ! 1206: //new line, same file: ignore ! 1207: } ! 1208: else if ( (fn=file_name[file_stack[tcurr_file-1]]) ! 1209: && (strcmp(txtstart,fn)==0) ) { ! 1210: // previous file: pop ! 1211: tcurr_file--; ! 1212: } ! 1213: else { // new file name: push ! 1214: //&& push: ! 1215: if (MAXFILE*4<Nfile++) error('i',"fileN buffer overflow"); ! 1216: if (MAXFILE<tcurr_file++) error('i',"fileN stack overflow"); ! 1217: file_stack[tcurr_file] = Nfile; ! 1218: char* p = new char[txtfree-txtstart]; ! 1219: (void) strcpy(p,txtstart); ! 1220: file_name[Nfile] = p; ! 1221: // Nstr++; ! 1222: } ! 1223: } ! 1224: else { // no name .. back to the original .c file: "" ! 1225: tcurr_file = 0; ! 1226: } ! 1227: del_txt(); ! 1228: tloc.file = file_stack[tcurr_file]; ! 1229: goto ll; ! 1230: case '\n': ! 1231: error("unexpected end of line on '# line'"); ! 1232: default: ! 1233: pch(c); ! 1234: } ! 1235: case ' ': ! 1236: break; ! 1237: ! 1238: case '0': ! 1239: case '1': ! 1240: case '2': ! 1241: case '3': ! 1242: case '4': ! 1243: case '5': ! 1244: case '6': ! 1245: case '7': ! 1246: case '8': ! 1247: case '9': ! 1248: tloc.line = tloc.line*10+c-'0'; ! 1249: break; ! 1250: ! 1251: case 'l': // look for "#line ..." and then ignore "line" ! 1252: if (get(c)=='i' && get(c)=='n' && get(c)=='e') break; ! 1253: case '\n': ! 1254: tloc.putline(); ! 1255: goto ll; ! 1256: ! 1257: default: // pass #rubbish through ! 1258: tloc.line = cl; ! 1259: pch('#'); ! 1260: pch(c); ! 1261: while (get(c) != '\n') pch(c); ! 1262: pch('\0'); ! 1263: fprintf(out_file,"\n%s\n",txtstart); ! 1264: start_txt(); ! 1265: tloc.line++; ! 1266: // Nline++; ! 1267: goto ll; ! 1268: } ! 1269: } ! 1270: } ! 1271: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.