|
|
1.1 ! root 1: /*ident "@(#)ctrans:src/lex.c 1.4" */ ! 2: /*************************************************************************** ! 3: ! 4: C++ source for cfront, the C++ compiler front-end ! 5: written in the computer science research center of Bell Labs ! 6: ! 7: Copyright (c) 1984 AT&T, Inc. All Rights Reserved ! 8: THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T, INC. ! 9: ! 10: lex.c: ! 11: lexical analyser based on pcc's and cpre's scanners ! 12: modified to handle classes: ! 13: new keywords: class ! 14: public ! 15: call ! 16: etc. ! 17: names are not entered in the symbol table by lex() ! 18: names can be of arbitrary length ! 19: error() is used to report errors ! 20: {} and () must match ! 21: numeric constants are not converted into internal representation ! 22: but stored as strings ! 23: ! 24: ****************************************************************************/ ! 25: ! 26: #include "cfront.h" ! 27: #include "yystype.h" ! 28: #include "size.h" ! 29: #include "tqueue.h" ! 30: #include "template.h" ! 31: ! 32: # define CCTRANS(x) x ! 33: #define copy_if_need_be(s) ((templp->in_progress || templp->parameters_in_progress) ? strdup(s) : (s)) ! 34: ! 35: /* lexical actions */ ! 36: ! 37: #define A_ERR 0 /* illegal character */ ! 38: #define A_LET 1 /* saw a letter */ ! 39: #define A_DIG 2 /* saw a digit */ ! 40: #define A_1C 3 /* return a single character */ ! 41: #define A_STR 4 /* string */ ! 42: #define A_CC 5 /* character constant */ ! 43: #define A_BCD 6 /* GCOS BCD constant */ ! 44: #define A_SL 7 /* saw a / */ ! 45: #define A_DOT 8 /* saw a . */ ! 46: #define A_2C 9 /* possible two character symbol */ ! 47: #define A_WS 10 /* whitespace (not \n) */ ! 48: #define A_NL 11 /* \n */ ! 49: #define A_LC 12 /* { */ ! 50: #define A_RC 13 /* } */ ! 51: #define A_L 14 /* ( */ ! 52: #define A_R 15 /* ) */ ! 53: #define A_EOF 16 ! 54: #define A_ASS 17 ! 55: #define A_LT 18 ! 56: #define A_GT 19 /* > */ ! 57: #define A_ER 20 ! 58: #define A_OR 21 ! 59: #define A_AND 22 ! 60: #define A_MOD 23 ! 61: #define A_NOT 24 ! 62: #define A_MIN 25 ! 63: #define A_MUL 26 ! 64: #define A_PL 27 ! 65: #define A_COL 28 /* : */ ! 66: #define A_SHARP 29 /* # */ ! 67: #define A_DOLL 30 /* $ */ ! 68: ! 69: /* character classes */ ! 70: ! 71: # define LEXLET 01 ! 72: # define LEXDIG 02 ! 73: /* no LEXOCT because 8 and 9 used to be octal digits */ ! 74: # define LEXHEX 010 ! 75: # define LEXWS 020 ! 76: # define LEXDOT 040 ! 77: ! 78: const FIRSTCHUNK = 8*1024-8; ! 79: const BUFCHUNK = 4*1024-8; ! 80: ! 81: /* text buffer */ ! 82: static char inbuf[FIRSTCHUNK/*TBUFSZ*/]; ! 83: static char* txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1]; ! 84: static char* txtstart = 0; ! 85: static char* txtfree = 0; ! 86: ! 87: static struct buf* bufhead; ! 88: static buf* freebuf; ! 89: //static bufs; ! 90: ! 91: struct buf { ! 92: buf* next; ! 93: char chars[BUFCHUNK]; ! 94: // buf() { next=bufhead; bufhead=this; } ! 95: }; ! 96: ! 97: new_buf(char c) ! 98: { ! 99: //fprintf(stderr,"new_buf %d\n",bufs++); ! 100: buf* pbuf; ! 101: if (freebuf) { ! 102: pbuf = freebuf; ! 103: freebuf = freebuf->next; ! 104: } ! 105: else ! 106: pbuf = new buf; // allocate and register new chunk ! 107: pbuf->next = bufhead; ! 108: bufhead = pbuf; ! 109: ! 110: if (BUFCHUNK < txtmax-txtstart) error('l',"lexical token too long"); ! 111: ! 112: // copy current token: ! 113: char* p = txtstart; ! 114: txtstart = txtfree = &pbuf->chars[0]; ! 115: while (p<txtmax) *txtfree++ = *p++; ! 116: *txtfree++=c; ! 117: txtmax = &pbuf->chars[BUFCHUNK-1]; ! 118: return 0; ! 119: } ! 120: ! 121: ! 122: #define pch(c) ((txtmax<=txtfree)?new_buf(c):(*txtfree++=c)) ! 123: #define start_txt() txtstart = txtfree ! 124: #define del_txt() txtfree = txtstart ! 125: ! 126: static int Nfile;// = 1; ! 127: static char* file_name[MAXFILE*4]; // source file names ! 128: // file_name[0] == src_file_name ! 129: // file_name[0] == 0 means stdin ! 130: static short file_stack[MAXFILE]; // stack of file name indices ! 131: static int tcurr_file; // current index in file_stack ! 132: // that is current #include nest level ! 133: ! 134: Linkage linkage = linkage_default; // linkage is default C++ ! 135: const LINKMAX = 10; ! 136: static Linkage lvec[LINKMAX] = { linkage_default }; ! 137: static int lcount = 0; ! 138: ! 139: void set_linkage(char* p) ! 140: { ! 141: if (p==0 || *p == 0) { // resume previous linkage ! 142: if (lcount>0) linkage = lvec[--lcount]; ! 143: } ! 144: else { ! 145: if (LINKMAX<=++lcount) { ! 146: error('l',"linkage directive nested too deep"); ! 147: --lcount; ! 148: } else if (strcmp(p,"C")==0) ! 149: lvec[lcount] = linkage = linkage_C; ! 150: else if (strcmp(p,"C++")==0) ! 151: lvec[lcount] = linkage = linkage_Cplusplus; ! 152: else { ! 153: error("%s linkage",p); ! 154: --lcount; ! 155: } ! 156: } ! 157: } ! 158: ! 159: static struct loc tloc; ! 160: FILE * out_file = stdout; ! 161: FILE * in_file = stdin; ! 162: Ptable ktbl; ! 163: Ptable keyword_table; ! 164: ! 165: static int p_level = 0; /* number of unmatched ``(''s */ ! 166: static int b_level = 0; /* number of unmatched ``{''s */ ! 167: ! 168: # ifdef ibm ! 169: ! 170: # define CSMASK 0377 ! 171: # define CSSZ 256 ! 172: ! 173: # else ! 174: ! 175: # define CSMASK 0177 ! 176: # define CSSZ 128 ! 177: ! 178: # endif ! 179: ! 180: static short lxmask[CSSZ+1]; ! 181: ! 182: int saved = 0; /* putback character, avoid ungetchar */ ! 183: static int lxtitle(); ! 184: ! 185: // overload rt; ! 186: inline YYSTYPE rt(char* x) { YYSTYPE y; y.s = x; return y; } ! 187: inline YYSTYPE rt(int x) { YYSTYPE y; y.t = x; return y; } ! 188: inline YYSTYPE rt(loc x) { YYSTYPE y; y.l = x; return y; } ! 189: inline YYSTYPE rt(void* x) { YYSTYPE y; y.pn = Pname(x); return y; } ! 190: ! 191: #define get(c) (c=getc(in_file)) ! 192: #define unget(c) ungetc(c,in_file) ! 193: ! 194: #define reti(a,b) { addtok(a, rt(b), tloc); return a; } ! 195: #define retn(a,b) { addtok(a, rt((Pnode)b), tloc); return a; } ! 196: #define rets(a,b) { addtok(a, rt(b), tloc); return a; } ! 197: #define retl(a) { addtok(a, rt(tloc), tloc); return a; } ! 198: ! 199: // keys[] holds the external form for tokens with fixed representation ! 200: // illegal tokens and those with variable representation have 0 entries ! 201: char* keys[MAXTOK+1]; ! 202: ! 203: static void ! 204: new_key(char* s, TOK toknum, TOK yyclass) ! 205: /* ! 206: make "s" a new keyword with the representation (token) "toknum" ! 207: "yyclass" is the yacc token (for example new_key("int",INT,TYPE); ) ! 208: "yyclass==0" means yyclass=toknum; ! 209: */ ! 210: { ! 211: Pname n = new name(s); ! 212: ! 213: keys[(toknum==LOC)?yyclass:toknum] = s; ! 214: n = new name(s); ! 215: Pname nn = keyword_table->insert(n,0); ! 216: // if (Nold) error('i',"keyword %sD twice",s); ! 217: nn->base = toknum; ! 218: nn->syn_class = (yyclass) ? yyclass : toknum; ! 219: delete n; ! 220: } ! 221: ! 222: const int keyword_count = 67; ! 223: static void ! 224: ktbl_init() ! 225: /* ! 226: enter keywords into keyword table for use by lex() ! 227: and into keyword representation table used for output ! 228: ! 229: ktbl is only for types. We put nothing in it. ! 230: keyword_table is for user-defined reserved words ! 231: */ ! 232: { ! 233: ktbl = new table(KTBLSIZE,0,0); ! 234: keyword_table = new table(keyword_count,0,0); ! 235: ! 236: new_key("asm",ASM,0); ! 237: new_key("auto",AUTO,TYPE); ! 238: new_key("break",LOC,BREAK); ! 239: new_key("case",LOC,CASE); ! 240: new_key("continue",LOC,CONTINUE); ! 241: new_key("char",CHAR,TYPE); ! 242: new_key("do",LOC,DO); ! 243: new_key("double",DOUBLE,TYPE); ! 244: new_key("default",LOC,DEFAULT); ! 245: new_key("enum",ENUM,0); ! 246: new_key("else",LOC,ELSE); ! 247: new_key("extern",EXTERN,TYPE); ! 248: new_key("float",FLOAT,TYPE); ! 249: new_key("for",LOC,FOR); ! 250: new_key("goto",LOC,GOTO); ! 251: new_key("catch",CATCH,CATCH); ! 252: new_key("try",TRY,TRY); ! 253: new_key("if",LOC,IF); ! 254: new_key("int",INT,TYPE); ! 255: new_key("long",LONG,TYPE); ! 256: new_key("return",LOC,RETURN); ! 257: new_key("register",REGISTER,TYPE); ! 258: new_key("static",STATIC,TYPE); ! 259: new_key("struct",STRUCT,AGGR); ! 260: new_key("sizeof",SIZEOF,0); ! 261: new_key("short",SHORT,TYPE); ! 262: new_key("switch",LOC,SWITCH); ! 263: new_key("typedef",TYPEDEF,TYPE); ! 264: new_key("unsigned",UNSIGNED,TYPE); ! 265: new_key("union",UNION,AGGR); ! 266: new_key("void",VOID,TYPE); ! 267: new_key("while",LOC,WHILE); ! 268: ! 269: new_key("class",CLASS,AGGR); ! 270: new_key("const",CONST,TYPE); ! 271: new_key("delete",LOC,DELETE); ! 272: new_key("friend",FRIEND,TYPE); ! 273: new_key("inline",INLINE,TYPE); ! 274: new_key("new",NEW,0); ! 275: new_key("operator",OPERATOR,0); ! 276: new_key("overload",OVERLOAD,TYPE); ! 277: new_key("private",PRIVATE,PR); ! 278: new_key("protected",PROTECTED,PR); ! 279: new_key("public",PUBLIC,PR); ! 280: new_key("signed",SIGNED,TYPE); ! 281: new_key("template",TEMPLATE,0); ! 282: new_key("this",THIS,0); ! 283: new_key("virtual",VIRTUAL,TYPE); ! 284: new_key("volatile",VOLATILE,TYPE); ! 285: ! 286: new_key("__statement", STATEMENT, 0) ; ! 287: new_key("__expression", EXPRESSION, 0) ; ! 288: new_key("__template_test", TEMPLATE_TEST, 0) ; ! 289: } ! 290: ! 291: loc last_line; ! 292: loc noloc = { 0, 0 }; ! 293: ! 294: void loc::putline() ! 295: { ! 296: if (file==0 && line==0) return; ! 297: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4 ! 298: // if (0<=file && file<MAXFILE) { ! 299: if ( 0<=file && file <= Nfile ) { ! 300: char* f = file_name[file]; ! 301: if (f==0) f = src_file_name; ! 302: fprintf(out_file,line_format,line,f); ! 303: last_line = *this; ! 304: } ! 305: } ! 306: ! 307: void loc::put(FILE* p) ! 308: { ! 309: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4 ! 310: // if (0<=file && file<MAXFILE) { ! 311: if ( 0<=file && file <= Nfile ) { ! 312: char* f = file_name[file]; ! 313: if (f==0) f = src_file_name; ! 314: fprintf(p,"\"%s\", line %d: ",f,line); ! 315: } ! 316: } ! 317: ! 318: void lxenter(register char* s, short m) ! 319: /* enter a mask into lxmask */ ! 320: { ! 321: register c; ! 322: ! 323: while( c= *s++ ) lxmask[c+1] |= m; ! 324: ! 325: } ! 326: ! 327: ! 328: void lxget(register c, register m) ! 329: /* ! 330: put 'c' back then scan for members of character class 'm' ! 331: terminate the string read with \0 ! 332: txtfree points to the character position after that \0 ! 333: */ ! 334: { ! 335: pch(c); ! 336: while ( (get(c), lxmask[c+1]&m) ) pch(c); ! 337: unget(c); ! 338: pch('\0'); ! 339: } ! 340: ! 341: struct LXDOPE { ! 342: short lxch; /* the character */ ! 343: short lxact; /* the action to be performed */ ! 344: TOK lxtok; /* the token number to be returned */ ! 345: } lxdope[] = { ! 346: #ifdef apollo ! 347: '@', A_ERR, 0, /* illegal characters go here... */ ! 348: #else ! 349: '$', A_DOLL, 0, ! 350: // '$', A_ERR, 0, /* illegal characters go here... */ ! 351: #endif ! 352: '_', A_LET, 0, /* letters point here */ ! 353: '0', A_DIG, 0, /* digits point here */ ! 354: ' ', A_WS, 0, /* whitespace goes here */ ! 355: '\n', A_NL, 0, ! 356: '"', A_STR, 0, /* character string */ ! 357: '\'', A_CC, 0, /* ASCII character constant */ ! 358: '`', A_BCD, 0, /* 'foreign' character constant, e.g. BCD */ ! 359: '(', A_L, LP, ! 360: ')', A_R, RP, ! 361: '{', A_LC, LC, ! 362: '}', A_RC, RC, ! 363: '[', A_1C, LB, ! 364: ']', A_1C, RB, ! 365: '*', A_MUL, MUL, ! 366: '?', A_1C, QUEST, ! 367: ':', A_COL, COLON, ! 368: '+', A_PL, PLUS, ! 369: '-', A_MIN, MINUS, ! 370: '/', A_SL, DIV, ! 371: '%', A_MOD, MOD, ! 372: '&', A_AND, AND, ! 373: '|', A_OR, OR, ! 374: '^', A_ER, ER, ! 375: '!', A_NOT, NOT, ! 376: '~', A_1C, COMPL, ! 377: ',', A_1C, CM, ! 378: ';', A_1C, SM, ! 379: '.', A_DOT, DOT, ! 380: '<', A_LT, LT, ! 381: '>', A_GT, GT, ! 382: '=', A_ASS, ASSIGN, ! 383: '#', A_SHARP, 0, ! 384: EOF, A_EOF, EOFTOK ! 385: }; ! 386: /* note: EOF is used as sentinel, so must be <=0 and last entry in table */ ! 387: ! 388: static struct LXDOPE *lxcp[CSSZ+1]; ! 389: ! 390: void ! 391: lex_init() ! 392: { ! 393: register struct LXDOPE *p; ! 394: register i; ! 395: register char *cp; ! 396: /* set up character classes */ ! 397: ! 398: /* first clear lexmask */ ! 399: for(i=0; i<=CSSZ; i++) lxmask[i] = 0; ! 400: ! 401: #ifdef apollo ! 402: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$", LEXLET ); ! 403: #else ! 404: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", LEXLET ); ! 405: #endif ! 406: lxenter( "0123456789", LEXDIG ); ! 407: lxenter( "0123456789abcdefABCDEF", LEXHEX ); ! 408: /* \013 should become \v someday; \013 is OK for ASCII and EBCDIC */ ! 409: lxenter( " \t\r\b\f\013", LEXWS ); ! 410: lxmask['.'+1] |= LEXDOT; ! 411: ! 412: /* make lxcp point to appropriate lxdope entry for each character */ ! 413: ! 414: /* initialize error entries */ ! 415: ! 416: for( i= 0; i<=CSSZ; ++i ) lxcp[i] = lxdope; ! 417: ! 418: /* make unique entries */ ! 419: ! 420: for( p=lxdope; ; ++p ) { ! 421: lxcp[p->lxch+1] = p; ! 422: if( p->lxch < 0 ) break; ! 423: } ! 424: ! 425: /* handle letters, digits, and whitespace */ ! 426: /* by convention, first, second, and third places */ ! 427: ! 428: cp = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; ! 429: while( *cp ) lxcp[*cp++ + 1] = &lxdope[1]; ! 430: cp = "123456789"; ! 431: while( *cp ) lxcp[*cp++ + 1] = &lxdope[2]; ! 432: cp = "\t\b\r\f\013"; ! 433: while( *cp ) lxcp[*cp++ + 1] = &lxdope[3]; ! 434: ! 435: file_name[0] = src_file_name; ! 436: // set both curloc and tloc so curloc is valid at program startup ! 437: // curloc.file = tloc.file = 0; ! 438: curloc.line = tloc.line = 1; ! 439: ! 440: ktbl_init(); ! 441: lex_clear(); ! 442: saved = lxtitle(); ! 443: } ! 444: ! 445: void lex_clear() ! 446: { ! 447: // delete extra buffers: ! 448: buf* p = bufhead; ! 449: bufhead = 0; ! 450: //if (p) { ! 451: //fprintf(stderr,"lex_clear\n"); ! 452: //bufs=0; ! 453: //} ! 454: while (p) { ! 455: buf* pp = p; ! 456: p = p->next; ! 457: pp->next = freebuf; ! 458: freebuf = pp; ! 459: } ! 460: ! 461: // re-set to static buffer: ! 462: txtstart = txtfree = inbuf; ! 463: txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1]; ! 464: } ! 465: ! 466: int int_val(char hex) ! 467: { ! 468: switch (hex) { ! 469: case '0': case '1': case '2': case '3': case '4': ! 470: case '5': case '6': case '7': case '8': case '9': ! 471: return hex-'0'; ! 472: case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': ! 473: return hex-'a'+10; ! 474: case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': ! 475: return hex-'A'+10; ! 476: } ! 477: } ! 478: ! 479: void hex_to_oct() ! 480: /* ! 481: \x has been seen on input (in char const or string) and \ printed ! 482: read the following hexadecimal integer and replace it with an octal ! 483: */ ! 484: { ! 485: int i = 0; ! 486: int c; ! 487: get(c); ! 488: if (lxmask[c+1] & LEXHEX) { ! 489: i = int_val(c); ! 490: get(c); // try for two ! 491: if (lxmask[c+1] & LEXHEX) { ! 492: i = (i<<4) + int_val(c); ! 493: get(c); // try for three ! 494: if (lxmask[c+1] & LEXHEX) ! 495: i = (i<<4) + int_val(c); ! 496: else ! 497: unget(c); ! 498: } ! 499: else ! 500: unget(c); ! 501: } ! 502: else { ! 503: error("hexadecimal digitE after \\x"); ! 504: unget(c); ! 505: } ! 506: ! 507: // if (0377 < i) error('l',"hexadecimal constant too large"); ! 508: i &= 0377; ! 509: ! 510: pch(('0'+(i>>6))); ! 511: pch(('0'+((i&070)>>3))); ! 512: pch(('0'+(i&7))); ! 513: } ! 514: ! 515: ! 516: char * chconst() ! 517: /* ! 518: read a character constant into inbuf ! 519: */ ! 520: { ! 521: register c; ! 522: int nch = 0; ! 523: ! 524: pch('\''); ! 525: ! 526: for(;;) { ! 527: char* p; ! 528: char cc = 0; ! 529: ! 530: switch (get(c)) { ! 531: case '\'': ! 532: goto ex; ! 533: case EOF: ! 534: error("eof in char constant"); ! 535: goto ex; ! 536: case '\n': ! 537: error("newline in char constant"); ! 538: goto ex; ! 539: case '\\': ! 540: if (SZ_INT == nch++) error('l',"char constant too long"); ! 541: pch(c); ! 542: switch (get(c)){ ! 543: case '\n': ! 544: ++tloc.line; ! 545: default: ! 546: pch(c); ! 547: break; ! 548: case '4': case '5': case '6': case '7': // octal ! 549: p = txtfree; ! 550: cc = c-4; ! 551: case '0': case '1': case '2': case '3': ! 552: pch(c); ! 553: get(c); /* try for 2 */ ! 554: if( lxmask[c+1] & LEXDIG && c<'8'){ ! 555: pch(c); ! 556: get(c); /* try for 3 */ ! 557: if (lxmask[c+1] & LEXDIG && c<'8') { ! 558: if (cc) *p = cc; // zap high bit ! 559: pch(c); ! 560: } ! 561: else ! 562: unget(c); ! 563: } ! 564: else ! 565: unget(c); ! 566: break; ! 567: case 'x': // hexadecimal ! 568: hex_to_oct(); ! 569: break; ! 570: }; ! 571: break; ! 572: default: ! 573: if (SZ_INT == nch++) error('l',"char constant too long"); ! 574: pch(c); ! 575: } ! 576: } ! 577: ex: ! 578: if(nch==0) ! 579: error("empty char constant"); ! 580: pch('\''); ! 581: pch('\0'); ! 582: return txtstart; ! 583: } ! 584: ! 585: void lxcom() ! 586: /* process a "block comment" */ ! 587: { ! 588: register c; ! 589: ! 590: for(;;) ! 591: switch (get(c)) { ! 592: case EOF: ! 593: error('w',"eof in comment"); ! 594: return; ! 595: case '\n': ! 596: tloc.line++; ! 597: // Nline++; ! 598: break; ! 599: case '*': ! 600: if (get(c) == '/') return; ! 601: unget(c); ! 602: break; ! 603: case '/': ! 604: if (get(c) == '*') error('w',"``/*'' in comment"); ! 605: unget(c); ! 606: break; ! 607: } ! 608: } ! 609: ! 610: ! 611: void linecom() ! 612: // process a "line comment" ! 613: { ! 614: register c; ! 615: ! 616: get(c); ! 617: #ifdef DBG ! 618: if ( c=='@' && get(c)=='!' ) { ! 619: while ( get(c) != '\n' && c != EOF ) pch(c); ! 620: pch('\0'); ! 621: process_debug_flags(txtstart); ! 622: del_txt(); ! 623: } ! 624: #endif ! 625: for(;;get(c)) ! 626: switch (c) { ! 627: case EOF: ! 628: error('w',"eof in comment"); ! 629: return; ! 630: case '\n': ! 631: tloc.line++; ! 632: // Nline++; ! 633: saved = lxtitle(); ! 634: return; ! 635: } ! 636: } ! 637: ! 638: char eat_whitespace() ! 639: { ! 640: ! 641: for(;;) { ! 642: register c = get(c); ! 643: lx: ! 644: ! 645: switch (c) { ! 646: case EOF: ! 647: error('w',"unexpected comment"); ! 648: return EOF; ! 649: case '/': ! 650: switch (get(c)) { ! 651: case '*': ! 652: lxcom(); ! 653: break; ! 654: case '/': ! 655: linecom(); ! 656: break; ! 657: default: ! 658: unget(c); ! 659: return '/'; ! 660: } ! 661: break; ! 662: case '\n': ! 663: ++tloc.line; ! 664: c = lxtitle(); ! 665: goto lx; ! 666: case ' ': ! 667: case '\t': ! 668: break; ! 669: default: ! 670: return c; ! 671: } ! 672: } ! 673: } ! 674: ! 675: void get_string() ! 676: { ! 677: int lxchar; ! 678: ! 679: for(;;) ! 680: switch (get(lxchar)) { ! 681: case '\\': ! 682: pch('\\'); ! 683: switch (get(lxchar)){ ! 684: case '\n': ! 685: ++tloc.line; ! 686: default: ! 687: pch(lxchar); ! 688: break; ! 689: case 'x': // hexadecimal ! 690: hex_to_oct(); ! 691: break; ! 692: }; ! 693: break; ! 694: case '"': ! 695: { char* p = txtstart; // eat_whitespace() moves txtstart ! 696: if ((lxchar = eat_whitespace()) == '"') { ! 697: // string catenation, break with ! 698: // newline to avoid merging characters ! 699: // (e.g. "\xAB" "C") ! 700: pch('\\'); ! 701: pch('\n'); ! 702: ! 703: continue; // eat '\"' and carry on ! 704: }; ! 705: ! 706: txtstart = p; ! 707: unget(lxchar); ! 708: pch(0); ! 709: return; ! 710: } ! 711: case '\n': ! 712: error("newline in string"); ! 713: pch(0); ! 714: return; ! 715: case EOF: ! 716: error("eof in string"); ! 717: pch(0); ! 718: return; ! 719: default: ! 720: pch(lxchar); ! 721: } ! 722: } ! 723: ! 724: TOK tlex() ! 725: { ! 726: TOK ret; ! 727: Pname n; ! 728: ! 729: // Ntoken++; ! 730: ! 731: for(;;) { ! 732: register lxchar; ! 733: register struct LXDOPE *p; ! 734: ! 735: start_txt(); ! 736: ! 737: if (saved) { ! 738: lxchar = saved; ! 739: saved = 0; ! 740: } ! 741: else ! 742: get(lxchar); ! 743: ! 744: if (lxchar+1 >= CSSZ ) ! 745: error( "illegal input character encountered: %d", lxchar ); ! 746: ! 747: switch( (p=lxcp[lxchar+1])->lxact ){ ! 748: ! 749: case A_1C: // eat up a single character, and return an opcode ! 750: reti(p->lxtok,p->lxtok); ! 751: ! 752: case A_EOF: ! 753: if (p_level || b_level+lcount) ! 754: error("'%s' missing at end of input",(b_level+lcount) ? "}" : ")"); ! 755: ! 756: reti(EOFTOK,0); ! 757: ! 758: case A_SHARP: ! 759: // cope with header file not ended with '\n' ! 760: unget('#'); ! 761: saved = lxtitle(); ! 762: continue; ! 763: ! 764: case A_ERR: ! 765: { if (' '<=lxchar && lxchar<='~') // ASCII printable ! 766: error("illegal character '%c' (ignored)",lxchar); ! 767: else ! 768: error("illegal character '0%o' (ignored)",lxchar); ! 769: continue; ! 770: } ! 771: ! 772: case A_DOLL: ! 773: { // lex a name of the for $id for template tree formals ! 774: Pname fn ; ! 775: lxget( lxchar, LEXLET|LEXDIG ) ; ! 776: ! 777: if (!templp->in_progress || !txtstart[1]) { ! 778: // no name string immediately follows, treat it ! 779: // like an illegal character ! 780: error("illegal character '0%o' (ignored)",lxchar); ! 781: continue; ! 782: } ! 783: ! 784: txtstart++ ; ! 785: if(fn=templ_compilation::tree_parameter(txtstart)) { ! 786: switch (fn->n_template_arg) { ! 787: case template_expr_tree_formal: ! 788: // retain the $ in the name ! 789: retn(ID, strdup(--txtstart)) ; ! 790: ! 791: case template_stmt_tree_formal: ! 792: retn(SM_PARAM, fn) ; ! 793: } ! 794: } ! 795: error("%s wasn't a statement or expression formal", txtstart); ! 796: rets(ID, copy_if_need_be(txtstart)); ! 797: } ! 798: ! 799: case A_LET: // collect an identifier and check for keyword ! 800: { ! 801: char ll; ! 802: switch (ll = lxchar) { ! 803: // case 'l': ! 804: case 'L': ! 805: switch (get(lxchar)) { ! 806: case '\'': ! 807: error('s',"wide character constant"); ! 808: unget(lxchar); ! 809: continue; ! 810: case '"': ! 811: error('s',"wide character string"); ! 812: unget(lxchar); ! 813: continue; ! 814: } ! 815: unget(lxchar); ! 816: lxchar = ll; ! 817: } ! 818: } ! 819: ! 820: lxget( lxchar, LEXLET|LEXDIG ); ! 821: /* look for a keyword or a global type */ ! 822: if ((n = keyword_table->look(txtstart,0)) /* keyword */ ! 823: || (n = ktbl->look(txtstart, 0))) /* local type */ ! 824: { ! 825: TOK x; ! 826: del_txt(); ! 827: switch (x=n->base) { ! 828: case TNAME: ! 829: rets(ID,n->string); ! 830: case LOC: ! 831: retl(n->syn_class); ! 832: case EXTERN: ! 833: if ((lxchar = eat_whitespace()) == '\"') { ! 834: // linkage directive ! 835: get_string(); ! 836: rets(LINKAGE,txtstart); ! 837: } ! 838: unget(lxchar); ! 839: reti(TYPE,EXTERN); ! 840: case CATCH: ! 841: // case TEMPLATE: ! 842: error('s',"%k",n->syn_class); ! 843: continue; ! 844: case TRY: ! 845: { ! 846: static int warn_try; ! 847: if (!warn_try) { ! 848: Pname n = keyword_table->look("try",0); ! 849: n->n_key = DEFAULT; ! 850: error('w',&tloc,"%k is a future reserved keyword",n->syn_class); ! 851: warn_try++; ! 852: } ! 853: rets(ID,n->string); ! 854: } ! 855: default: ! 856: reti(n->syn_class,x); ! 857: } ! 858: } ! 859: // rets(ID,txtstart); ! 860: rets(ID, copy_if_need_be(txtstart)) ; ! 861: ! 862: case A_DIG: ! 863: ! 864: ret = ICON; ! 865: ! 866: if (lxchar=='0') { ! 867: int pkchar; ! 868: get(pkchar); ! 869: if(pkchar=='x' || pkchar=='X') { // hex ! 870: pch(lxchar); ! 871: lxget(pkchar,LEXHEX); ! 872: txtfree--; ! 873: if (txtfree-txtstart<3) // minimum "0Xd\0" ! 874: error("hex digitX after \"0x\""); ! 875: get(lxchar); ! 876: goto getsuffix; ! 877: } ! 878: unget(pkchar); ! 879: } ! 880: ! 881: lxget(lxchar,LEXDIG); ! 882: txtfree--; ! 883: ! 884: if (get(lxchar) == '.') { ! 885: getfp: ! 886: lxget('.', LEXDIG ); ! 887: txtfree--; ! 888: ret = FCON; ! 889: get(lxchar); ! 890: }; ! 891: ! 892: if (lxchar=='e' || lxchar=='E') { ! 893: pch(lxchar); ! 894: get(lxchar); ! 895: if(lxchar=='-' || lxchar=='+') { ! 896: pch(lxchar); ! 897: get(lxchar); ! 898: } ! 899: if (lxmask[lxchar+1] & LEXDIG) { ! 900: lxget( lxchar, LEXDIG ); ! 901: txtfree--; ! 902: get(lxchar); ! 903: } ! 904: else ! 905: error("missing exponent digits?"); ! 906: ret = FCON; ! 907: }; ! 908: ! 909: if(*txtstart=='0' && ret==ICON) { ! 910: char *bch = txtstart; ! 911: while (++bch <= txtfree) { ! 912: if(*bch=='8' || *bch=='9') ! 913: error("%c used as octal digit",*bch); ! 914: } ! 915: } ! 916: ! 917: getsuffix: ! 918: switch (lxchar) { ! 919: case 'f': ! 920: case 'F': ! 921: if (ret==ICON) ! 922: error("%c suffix for integer constant",lxchar); ! 923: else ! 924: pch(lxchar); ! 925: break; ! 926: case 'u': ! 927: case 'U': ! 928: if (ret==FCON) { ! 929: error("%c suffix for floating constant",lxchar); ! 930: break; ! 931: } ! 932: pch(lxchar); ! 933: switch(get(lxchar)) { ! 934: case 'l': ! 935: case 'L': ! 936: pch(lxchar); ! 937: break; ! 938: default: ! 939: saved=lxchar; ! 940: break; ! 941: } ! 942: break; ! 943: case 'l': ! 944: case 'L': ! 945: pch(lxchar); ! 946: if (ret==FCON) { ! 947: break; ! 948: } ! 949: switch(get(lxchar)) { ! 950: case 'u': ! 951: case 'U': ! 952: pch(lxchar); ! 953: break; ! 954: default: ! 955: saved=lxchar; ! 956: break; ! 957: } ! 958: break; ! 959: default: ! 960: saved = lxchar; ! 961: break; ! 962: }; ! 963: ! 964: if(*txtstart=='0' && txtfree-txtstart==1) ! 965: reti(ZERO,0); // plain zero ! 966: ! 967: pch(0); ! 968: rets(ret,txtstart); ! 969: ! 970: ! 971: case A_DOT: ! 972: switch (get(lxchar)) { ! 973: case '.': // look for ellipsis ! 974: if (get(lxchar) != '.') { ! 975: error("token .. ?"); ! 976: saved = lxchar; ! 977: } ! 978: reti(ELLIPSIS,0); ! 979: case '*': ! 980: reti (REFMUL,DOT); ! 981: } ! 982: ! 983: if (lxmask[lxchar+1] & LEXDIG){// look for floating constant ! 984: unget(lxchar); ! 985: goto getfp; ! 986: } ! 987: saved = lxchar; ! 988: reti(DOT,0); ! 989: ! 990: case A_STR: ! 991: /* save string constant in buffer */ ! 992: get_string(); ! 993: rets(STRING,txtstart); ! 994: ! 995: case A_CC: ! 996: /* character constant */ ! 997: rets(CCON,chconst()); ! 998: ! 999: case A_BCD: ! 1000: { ! 1001: register i; ! 1002: int j; ! 1003: ! 1004: pch('`'); ! 1005: ! 1006: for (i=0; i<7; ++i) { ! 1007: pch(get(j)); ! 1008: if (j == '`' ) break; ! 1009: } ! 1010: pch(0); ! 1011: if (6<i) ! 1012: error('l',"bcd constant exceeds 6 characters" ); ! 1013: rets(CCON,txtstart); ! 1014: } ! 1015: ! 1016: case A_SL: /* / */ ! 1017: switch (get(lxchar)) { ! 1018: case '*': ! 1019: lxcom(); ! 1020: break; ! 1021: case '/': ! 1022: linecom(); ! 1023: break; ! 1024: case '=': ! 1025: reti(ASOP,ASDIV); ! 1026: default: ! 1027: saved = lxchar; ! 1028: reti(DIVOP,DIV); ! 1029: } ! 1030: ! 1031: case A_WS: ! 1032: continue; ! 1033: ! 1034: case A_NL: ! 1035: ++tloc.line; ! 1036: // Nline++; ! 1037: saved = lxtitle(); ! 1038: continue; ! 1039: ! 1040: case A_LC: ! 1041: if (BLMAX <= b_level++) { ! 1042: error('l',"blocks too deeply nested"); ! 1043: ext(3); ! 1044: } ! 1045: retl(LC); ! 1046: ! 1047: case A_RC: ! 1048: if (lcount+b_level-- <= 0) { ! 1049: error("unexpected '}'"); ! 1050: b_level = 0; ! 1051: } ! 1052: retl(RC); ! 1053: ! 1054: case A_L: ! 1055: p_level++; ! 1056: reti(LP,0); ! 1057: ! 1058: case A_R: ! 1059: if (p_level-- <= 0) { ! 1060: error("unexpected ')'"); ! 1061: p_level = 0; ! 1062: } ! 1063: reti(RP,0); ! 1064: ! 1065: case A_ASS: ! 1066: switch (get(lxchar)) { ! 1067: case '=': ! 1068: reti(EQUOP,EQ); ! 1069: default: ! 1070: saved = lxchar; ! 1071: reti(ASSIGN,ASSIGN); ! 1072: } ! 1073: ! 1074: case A_COL: ! 1075: switch (get(lxchar)) { ! 1076: case ':': ! 1077: reti(MEM,0); ! 1078: case '=': ! 1079: error("':=' is not a c++ operator"); ! 1080: reti(ASSIGN,ASSIGN); ! 1081: default: ! 1082: saved = lxchar; ! 1083: reti(COLON,COLON); ! 1084: } ! 1085: case A_NOT: ! 1086: switch (get(lxchar)) { ! 1087: case '=': ! 1088: reti(EQUOP,NE); ! 1089: default: ! 1090: saved = lxchar; ! 1091: reti(NOT,NOT); ! 1092: } ! 1093: case A_GT: ! 1094: switch(get(lxchar)) { ! 1095: case '>': ! 1096: switch (get(lxchar)) { ! 1097: case '=': ! 1098: reti(ASOP,ASRS); ! 1099: break; ! 1100: default: ! 1101: saved = lxchar; ! 1102: reti(SHIFTOP,RS); ! 1103: } ! 1104: case '=': ! 1105: reti(RELOP,GE); ! 1106: default: ! 1107: saved = lxchar; ! 1108: reti(GT,GT); ! 1109: } ! 1110: case A_LT: ! 1111: switch (get(lxchar)) { ! 1112: case '<': ! 1113: switch (get(lxchar)) { ! 1114: case '=': ! 1115: reti(ASOP,ASLS); ! 1116: default: ! 1117: saved = lxchar; ! 1118: reti(SHIFTOP,LS); ! 1119: } ! 1120: case '=': ! 1121: reti(RELOP,LE); ! 1122: default: ! 1123: saved = lxchar; ! 1124: reti(LT,LT); ! 1125: } ! 1126: case A_AND: ! 1127: switch (get(lxchar)) { ! 1128: case '&': ! 1129: reti(ANDAND,ANDAND); ! 1130: case '=': ! 1131: reti(ASOP,ASAND); ! 1132: default: ! 1133: saved = lxchar; ! 1134: reti(AND,AND); ! 1135: } ! 1136: case A_OR: ! 1137: switch (get(lxchar)) { ! 1138: case '|': ! 1139: reti(OROR,OROR); ! 1140: case '=': ! 1141: reti(ASOP,ASOR); ! 1142: default: ! 1143: saved = lxchar; ! 1144: reti(OR,OR); ! 1145: } ! 1146: case A_ER: ! 1147: switch (get(lxchar)) { ! 1148: case '=': ! 1149: reti(ASOP,ASER); ! 1150: default: ! 1151: saved = lxchar; ! 1152: reti(ER,ER); ! 1153: } ! 1154: case A_PL: ! 1155: switch (get(lxchar)) { ! 1156: case '=': ! 1157: reti(ASOP,ASPLUS); ! 1158: case '+': ! 1159: reti(ICOP,INCR); ! 1160: default: ! 1161: saved = lxchar; ! 1162: reti(PLUS,PLUS); ! 1163: } ! 1164: case A_MIN: ! 1165: switch (get(lxchar)) { ! 1166: case '=': ! 1167: reti(ASOP,ASMINUS); ! 1168: case '-': ! 1169: reti(ICOP,DECR); ! 1170: case '>': ! 1171: if (get(lxchar) == '*') ! 1172: {reti(REFMUL,REF);} ! 1173: else ! 1174: saved = lxchar; ! 1175: reti(REF,REF); ! 1176: default: ! 1177: saved = lxchar; ! 1178: reti(MINUS,MINUS); ! 1179: } ! 1180: case A_MUL: ! 1181: switch (get(lxchar)) { ! 1182: case '=': ! 1183: reti(ASOP,ASMUL); ! 1184: case '/': ! 1185: error('w',"*/ not as end of comment"); ! 1186: default: ! 1187: saved = lxchar; ! 1188: reti(MUL,MUL); ! 1189: } ! 1190: case A_MOD: ! 1191: switch (get(lxchar)) { ! 1192: case '=': ! 1193: reti(ASOP,ASMOD); ! 1194: default: ! 1195: saved = lxchar; ! 1196: reti(DIVOP,MOD); ! 1197: } ! 1198: default: ! 1199: {error('i',"lex act==%d getc()->%d",p,lxchar);} ! 1200: ! 1201: } ! 1202: ! 1203: error('i',"lex, main switch"); ! 1204: } ! 1205: ! 1206: } ! 1207: ! 1208: int lxtitle() ! 1209: /* ! 1210: called after a newline; set linenumber and file name ! 1211: */ ! 1212: { ! 1213: register c; ! 1214: ! 1215: for(;;) ! 1216: switch ( get(c) ) { ! 1217: default: // e.g. not '\n', not '#' ! 1218: return c; ! 1219: case '\n': ! 1220: tloc.line++; ! 1221: // Nline++; ! 1222: ll: ! 1223: break; ! 1224: case '#': /* # lineno "filename" */ ! 1225: { int cl = tloc.line; ! 1226: tloc.line = 0; ! 1227: for(;;) ! 1228: switch (get(c)) { ! 1229: case '"': ! 1230: start_txt(); ! 1231: for(;;) ! 1232: switch (get(c)) { ! 1233: case '"': ! 1234: pch('\0'); ! 1235: ! 1236: while (get(c) != '\n') ; // skip to eol.. ignore anything more ! 1237: ! 1238: if (*txtstart) { // stack file name ! 1239: char* fn; ! 1240: if (tcurr_file == 0){ ! 1241: if (( fn = file_name[0]) ! 1242: && (strcmp(txtstart,fn)!=0)){ // 1st include ! 1243: if (MAXFILE*4<++Nfile) error('i',"fileN buffer overflow"); ! 1244: if (MAXFILE<++tcurr_file) error('i',"fileN stack overflow"); ! 1245: file_stack[tcurr_file] = Nfile; ! 1246: ! 1247: char* p1 = new char[txtfree-txtstart]; ! 1248: (void) strcpy(p1,txtstart); ! 1249: file_name[Nfile] = p1; ! 1250: // Nstr++; ! 1251: } ! 1252: else { //&& line is dummy #line "input.c" ! 1253: // ignore ! 1254: } ! 1255: //&& dead, dead, dead goto push; ! 1256: } ! 1257: else if ( (fn=file_name[file_stack[tcurr_file]]) ! 1258: && (strcmp(txtstart,fn)==0) ) { ! 1259: //new line, same file: ignore ! 1260: } ! 1261: else if ( (fn=file_name[file_stack[tcurr_file-1]]) ! 1262: && (strcmp(txtstart,fn)==0) ) { ! 1263: // previous file: pop ! 1264: tcurr_file--; ! 1265: } ! 1266: else { // new file name: push ! 1267: //&& push: ! 1268: if (MAXFILE*4<Nfile++) error('i',"fileN buffer overflow"); ! 1269: if (MAXFILE<tcurr_file++) error('i',"fileN stack overflow"); ! 1270: file_stack[tcurr_file] = Nfile; ! 1271: char* p = new char[txtfree-txtstart]; ! 1272: (void) strcpy(p,txtstart); ! 1273: file_name[Nfile] = p; ! 1274: // Nstr++; ! 1275: } ! 1276: } ! 1277: else { // no name .. back to the original .c file: "" ! 1278: tcurr_file = 0; ! 1279: } ! 1280: del_txt(); ! 1281: tloc.file = file_stack[tcurr_file]; ! 1282: goto ll; ! 1283: case '\n': ! 1284: error("unexpected end of line on '# line'"); ! 1285: default: ! 1286: pch(c); ! 1287: } ! 1288: case ' ': ! 1289: break; ! 1290: ! 1291: case '0': ! 1292: case '1': ! 1293: case '2': ! 1294: case '3': ! 1295: case '4': ! 1296: case '5': ! 1297: case '6': ! 1298: case '7': ! 1299: case '8': ! 1300: case '9': ! 1301: tloc.line = tloc.line*10+c-'0'; ! 1302: break; ! 1303: ! 1304: case 'l': // look for "#line ..." and then ignore "line" ! 1305: if (get(c)=='i' && get(c)=='n' && get(c)=='e') break; ! 1306: case '\n': ! 1307: tloc.putline(); ! 1308: goto ll; ! 1309: ! 1310: default: // pass #rubbish through ! 1311: tloc.line = cl; ! 1312: pch('#'); ! 1313: pch(c); ! 1314: while (get(c) != '\n') pch(c); ! 1315: pch('\0'); ! 1316: fprintf(out_file,"\n%s\n",txtstart); ! 1317: start_txt(); ! 1318: tloc.line++; ! 1319: // Nline++; ! 1320: goto ll; ! 1321: } ! 1322: } ! 1323: } ! 1324: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.