|
|
1.1 ! root 1: /*ident "@(#)ctrans:src/lex.c 1.3.4.17" */ ! 2: /*************************************************************************** ! 3: ! 4: C++ source for cfront, the C++ compiler front-end ! 5: written in the computer science research center of Bell Labs ! 6: ! 7: Copyright (c) 1984 AT&T, Inc. All Rights Reserved ! 8: THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T, INC. ! 9: ! 10: lex.c: ! 11: lexical analyser based on pcc's and cpre's scanners ! 12: modified to handle classes: ! 13: new keywords: class ! 14: public ! 15: call ! 16: etc. ! 17: names are not entered in the symbol table by lex() ! 18: names can be of arbitrary length ! 19: error() is used to report errors ! 20: {} and () must match ! 21: numeric constants are not converted into internal representation ! 22: but stored as strings ! 23: ! 24: ****************************************************************************/ ! 25: ! 26: #include "cfront.h" ! 27: #include "yystype.h" ! 28: #include "size.h" ! 29: #include "tqueue.h" ! 30: #include "template.h" ! 31: ! 32: # define CCTRANS(x) x ! 33: ! 34: #define copy_if_need_be(s) ((templp->in_progress || templp->parameters_in_progress) ? strdup(s) : (s)) ! 35: ! 36: /* lexical actions */ ! 37: ! 38: #define A_ERR 0 /* illegal character */ ! 39: #define A_LET 1 /* saw a letter */ ! 40: #define A_DIG 2 /* saw a digit */ ! 41: #define A_1C 3 /* return a single character */ ! 42: #define A_STR 4 /* string */ ! 43: #define A_CC 5 /* character constant */ ! 44: #define A_BCD 6 /* GCOS BCD constant */ ! 45: #define A_SL 7 /* saw a / */ ! 46: #define A_DOT 8 /* saw a . */ ! 47: #define A_2C 9 /* possible two character symbol */ ! 48: #define A_WS 10 /* whitespace (not \n) */ ! 49: #define A_NL 11 /* \n */ ! 50: #define A_LC 12 /* { */ ! 51: #define A_RC 13 /* } */ ! 52: #define A_L 14 /* ( */ ! 53: #define A_R 15 /* ) */ ! 54: #define A_EOF 16 ! 55: #define A_ASS 17 ! 56: #define A_LT 18 ! 57: #define A_GT 19 /* > */ ! 58: #define A_ER 20 ! 59: #define A_OR 21 ! 60: #define A_AND 22 ! 61: #define A_MOD 23 ! 62: #define A_NOT 24 ! 63: #define A_MIN 25 ! 64: #define A_MUL 26 ! 65: #define A_PL 27 ! 66: #define A_COL 28 /* : */ ! 67: #define A_SHARP 29 /* # */ ! 68: #define A_DOLL 30 /* $ */ ! 69: ! 70: /* character classes */ ! 71: ! 72: # define LEXLET 01 ! 73: # define LEXDIG 02 ! 74: /* no LEXOCT because 8 and 9 used to be octal digits */ ! 75: # define LEXHEX 010 ! 76: # define LEXWS 020 ! 77: # define LEXDOT 040 ! 78: ! 79: const FIRSTCHUNK = 8*1024-8; ! 80: const BUFCHUNK = 4*1024-8; ! 81: ! 82: /* text buffer */ ! 83: static char inbuf[FIRSTCHUNK/*TBUFSZ*/]; ! 84: char* txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1]; ! 85: char* txtstart = 0; ! 86: char* txtfree = 0; ! 87: ! 88: static struct buf* bufhead; ! 89: static buf* freebuf; ! 90: //static bufs; ! 91: ! 92: struct buf { ! 93: buf* next; ! 94: char chars[BUFCHUNK]; ! 95: // buf() { next=bufhead; bufhead=this; } ! 96: }; ! 97: ! 98: new_buf(char c) ! 99: { ! 100: //fprintf(stderr,"new_buf %d\n",bufs++); ! 101: buf* pbuf; ! 102: if (freebuf) { ! 103: pbuf = freebuf; ! 104: freebuf = freebuf->next; ! 105: } ! 106: else ! 107: pbuf = new buf; // allocate and register new chunk ! 108: pbuf->next = bufhead; ! 109: bufhead = pbuf; ! 110: ! 111: if (BUFCHUNK < txtmax-txtstart) error('l',"lexical token too long"); ! 112: ! 113: // copy current token: ! 114: char* p = txtstart; ! 115: txtstart = txtfree = &pbuf->chars[0]; ! 116: while (p<txtmax) *txtfree++ = *p++; ! 117: *txtfree++=c; ! 118: txtmax = &pbuf->chars[BUFCHUNK-1]; ! 119: return 0; ! 120: } ! 121: ! 122: ! 123: #define pch(c) ((txtmax<=txtfree)?new_buf(c):(*txtfree++=c)) ! 124: #define start_txt() txtstart = txtfree ! 125: #define del_txt() txtfree = txtstart ! 126: ! 127: /*static*/ char* file_name[MAXFILE*4]; // source file names ! 128: // file_name[0] == src_file_name ! 129: // file_name[0] == 0 means stdin ! 130: static short file_stack[MAXFILE]; // stack of file name indices ! 131: int curr_file; // current index in file_stack ! 132: // that is current #include nest level ! 133: ! 134: int linkage; // linkage is default C++ ! 135: // linkage==0 => C++ linkage ! 136: // linkage==1 => C linkage ! 137: const LINKMAX = 10; ! 138: static lvec[LINKMAX]; ! 139: int lcount; ! 140: ! 141: void pragma_set_linkage (char *p) ! 142: { ! 143: if (bl_level) error("linkage pragma inside block"); ! 144: set_linkage(p); ! 145: } ! 146: ! 147: ! 148: void set_linkage(char* p) ! 149: { ! 150: if (p==0 || *p == 0) { // resume previous linkage ! 151: if (lcount) linkage = lvec[--lcount]; ! 152: } ! 153: else { ! 154: if (LINKMAX<=++lcount) ! 155: error('l',"linkage directive nested too deep"); ! 156: if (strcmp(p,"C")==0) ! 157: lvec[lcount] = linkage = 1; ! 158: else if (strcmp(p,"C++")==0) ! 159: lvec[lcount] = linkage = 0; ! 160: else ! 161: error("%s linkage",p); ! 162: } ! 163: } ! 164: ! 165: class loc curloc; ! 166: FILE * out_file = stdout; ! 167: FILE * in_file = stdin; ! 168: Ptable ktbl; ! 169: Ptable keyword_table; ! 170: int br_level = 0; /* number of unmatched ``(''s */ ! 171: int bl_level = 0; /* number of unmatched ``{''s */ ! 172: ! 173: # ifdef ibm ! 174: ! 175: # define CSMASK 0377 ! 176: # define CSSZ 256 ! 177: ! 178: # else ! 179: ! 180: # define CSMASK 0177 ! 181: # define CSSZ 128 ! 182: ! 183: # endif ! 184: ! 185: static short lxmask[CSSZ+1]; ! 186: ! 187: int saved = 0; /* putback character, avoid ungetchar */ ! 188: static int lxtitle(); ! 189: ! 190: inline YYSTYPE rt(char* x) { YYSTYPE y; y.s = x; return y; } ! 191: inline YYSTYPE rt(int x) { YYSTYPE y; y.t = x; return y; } ! 192: inline YYSTYPE rt(loc x) { YYSTYPE y; y.l = x; return y; } ! 193: inline YYSTYPE rt(void* x) { YYSTYPE y; y.pn = Pname(x); return y; } ! 194: ! 195: #define get(c) (c=getc(in_file)) ! 196: #define unget(c) (ungetc(c,in_file)) ! 197: ! 198: #define reti(a,b) { addtok(a, rt(b)); return a; } ! 199: #define retn(a,b) { addtok(a, rt((Pnode)b)); return a; } ! 200: #define rets(a,b) { addtok(a, rt(b)); return a; } ! 201: #define retl(a) { addtok(a, rt(curloc)); return a; } ! 202: ! 203: void new_key(char* s, TOK toknum, TOK yyclass) ! 204: /* ! 205: make "s" a new keyword with the representation (token) "toknum" ! 206: "yyclass" is the yacc token (for example new_key("int",INT,TYPE); ) ! 207: "yyclass==0" means yyclass=toknum; ! 208: */ ! 209: { ! 210: Pname n = new name(s); ! 211: ! 212: keys[(toknum==LOC)?yyclass:toknum] = s; ! 213: n = new name(s); ! 214: Pname nn = keyword_table->insert(n,0); ! 215: if (Nold) error('i',"keyword %sD twice",s); ! 216: nn->base = toknum; ! 217: nn->syn_class = (yyclass) ? yyclass : toknum; ! 218: delete n; ! 219: } ! 220: ! 221: void ktbl_init() ! 222: /* ! 223: enter keywords into keyword table for use by lex() ! 224: and into keyword representation table used for output ! 225: */ ! 226: { ! 227: /* The ktbl is only for types. We put nothing in it. */ ! 228: ktbl = new table(KTBLSIZE,0,0); ! 229: ! 230: /* The keyword_table is for the reserved words */ ! 231: keyword_table = new table(67,0,0); ! 232: ! 233: new_key("asm",ASM,0); ! 234: new_key("auto",AUTO,TYPE); ! 235: new_key("break",LOC,BREAK); ! 236: new_key("case",LOC,CASE); ! 237: new_key("continue",LOC,CONTINUE); ! 238: new_key("char",CHAR,TYPE); ! 239: new_key("do",LOC,DO); ! 240: new_key("double",DOUBLE,TYPE); ! 241: new_key("default",LOC,DEFAULT); ! 242: new_key("enum",ENUM,0); ! 243: new_key("else",LOC,ELSE); ! 244: new_key("extern",EXTERN,TYPE); ! 245: new_key("float",FLOAT,TYPE); ! 246: new_key("for",LOC,FOR); ! 247: // new_key("fortran",FORTRAN,0); ! 248: new_key("goto",LOC,GOTO); ! 249: new_key("catch",CATCH,CATCH); ! 250: new_key("if",LOC,IF); ! 251: new_key("int",INT,TYPE); ! 252: new_key("long",LONG,TYPE); ! 253: new_key("return",LOC,RETURN); ! 254: new_key("register",REGISTER,TYPE); ! 255: new_key("static",STATIC,TYPE); ! 256: new_key("struct",STRUCT,AGGR); ! 257: new_key("sizeof",SIZEOF,0); ! 258: new_key("short",SHORT,TYPE); ! 259: new_key("switch",LOC,SWITCH); ! 260: new_key("typedef",TYPEDEF,TYPE); ! 261: new_key("unsigned",UNSIGNED,TYPE); ! 262: new_key("union",UNION,AGGR); ! 263: new_key("void",VOID,TYPE); ! 264: new_key("while",LOC,WHILE); ! 265: ! 266: new_key("class",CLASS,AGGR); ! 267: new_key("const",CONST,TYPE); ! 268: new_key("delete",LOC,DELETE); ! 269: new_key("friend",FRIEND,TYPE); ! 270: new_key("inline",INLINE,TYPE); ! 271: new_key("new",NEW,0); ! 272: new_key("operator",OPERATOR,0); ! 273: new_key("overload",OVERLOAD,TYPE); ! 274: new_key("private",PRIVATE,PR); ! 275: new_key("protected",PROTECTED,PR); ! 276: new_key("public",PUBLIC,PR); ! 277: new_key("signed",SIGNED,TYPE); ! 278: new_key("template",TEMPLATE,0); ! 279: new_key("this",THIS,0); ! 280: new_key("virtual",VIRTUAL,TYPE); ! 281: new_key("volatile",VOLATILE,TYPE); ! 282: ! 283: new_key("__statement", STATEMENT, 0) ; ! 284: new_key("__expression", EXPRESSION, 0) ; ! 285: new_key("__template_test", TEMPLATE_TEST, 0) ; ! 286: #ifdef DK ! 287: new_key("or",OR,0); ! 288: new_key("cor",OROR,0); ! 289: new_key("and",AND,0); ! 290: new_key("cand",ANDAND,0); ! 291: new_key("xor",ER,0); ! 292: new_key("compl",COMPL,0); ! 293: #endif ! 294: } ! 295: ! 296: extern char* src_file_name; ! 297: extern char* line_format; ! 298: loc last_line; ! 299: ! 300: void loc::putline() ! 301: { ! 302: if (file==0 && line==0) return; ! 303: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4 ! 304: // if (0<=file && file<MAXFILE) { ! 305: if ( 0<=file && file <= Nfile ) { ! 306: char* f = file_name[file]; ! 307: if (f==0) f = (src_file_name) ? src_file_name : ""; ! 308: fprintf(out_file,line_format,line,f); ! 309: last_line = *this; ! 310: } ! 311: } ! 312: ! 313: void loc::put(FILE* p) ! 314: { ! 315: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4 ! 316: // if (0<=file && file<MAXFILE) { ! 317: if ( 0<=file && file <= Nfile ) { ! 318: char* f = file_name[file]; ! 319: if (f==0) f = (src_file_name) ? src_file_name : ""; ! 320: fprintf(p,"\"%s\", line %d: ",f,line); ! 321: } ! 322: } ! 323: ! 324: void lxenter(register char* s, short m) ! 325: /* enter a mask into lxmask */ ! 326: { ! 327: register c; ! 328: ! 329: while( c= *s++ ) lxmask[c+1] |= m; ! 330: ! 331: } ! 332: ! 333: ! 334: void lxget(register c, register m) ! 335: /* ! 336: put 'c' back then scan for members of character class 'm' ! 337: terminate the string read with \0 ! 338: txtfree points to the character position after that \0 ! 339: */ ! 340: { ! 341: pch(c); ! 342: while ( (get(c), lxmask[c+1]&m) ) pch(c); ! 343: unget(c); ! 344: pch('\0'); ! 345: } ! 346: ! 347: struct LXDOPE { ! 348: short lxch; /* the character */ ! 349: short lxact; /* the action to be performed */ ! 350: TOK lxtok; /* the token number to be returned */ ! 351: } lxdope[] = { ! 352: #ifdef apollo ! 353: '@', A_ERR, 0, /* illegal characters go here... */ ! 354: #else ! 355: '$', A_DOLL, 0, ! 356: // '$', A_ERR, 0, /* illegal characters go here... */ ! 357: ! 358: #endif ! 359: '_', A_LET, 0, /* letters point here */ ! 360: '0', A_DIG, 0, /* digits point here */ ! 361: ' ', A_WS, 0, /* whitespace goes here */ ! 362: '\n', A_NL, 0, ! 363: '"', A_STR, 0, /* character string */ ! 364: '\'', A_CC, 0, /* ASCII character constant */ ! 365: '`', A_BCD, 0, /* 'foreign' character constant, e.g. BCD */ ! 366: '(', A_L, LP, ! 367: ')', A_R, RP, ! 368: '{', A_LC, LC, ! 369: '}', A_RC, RC, ! 370: ']', A_1C, RB, ! 371: '[', A_1C, LB, ! 372: '*', A_MUL, MUL, ! 373: '?', A_1C, QUEST, ! 374: ':', A_COL, COLON, ! 375: '+', A_PL, PLUS, ! 376: '-', A_MIN, MINUS, ! 377: '/', A_SL, DIV, ! 378: '%', A_MOD, MOD, ! 379: '&', A_AND, AND, ! 380: '|', A_OR, OR, ! 381: '^', A_ER, ER, ! 382: '!', A_NOT, NOT, ! 383: '~', A_1C, COMPL, ! 384: ',', A_1C, CM, ! 385: ';', A_1C, SM, ! 386: '.', A_DOT, DOT, ! 387: '<', A_LT, LT, ! 388: '>', A_GT, GT, ! 389: '=', A_ASS, ASSIGN, ! 390: '#', A_SHARP, 0, ! 391: EOF, A_EOF, EOFTOK ! 392: }; ! 393: /* note: EOF is used as sentinel, so must be <=0 and last entry in table */ ! 394: ! 395: static struct LXDOPE *lxcp[CSSZ+1]; ! 396: ! 397: void lex_init() ! 398: { ! 399: register struct LXDOPE *p; ! 400: register i; ! 401: register char *cp; ! 402: /* set up character classes */ ! 403: ! 404: /* first clear lexmask */ ! 405: for(i=0; i<=CSSZ; i++) lxmask[i] = 0; ! 406: ! 407: #ifdef apollo ! 408: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$", LEXLET ); ! 409: #else ! 410: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", LEXLET ); ! 411: #endif ! 412: lxenter( "0123456789", LEXDIG ); ! 413: lxenter( "0123456789abcdefABCDEF", LEXHEX ); ! 414: /* \013 should become \v someday; \013 is OK for ASCII and EBCDIC */ ! 415: lxenter( " \t\r\b\f\013", LEXWS ); ! 416: lxmask['.'+1] |= LEXDOT; ! 417: ! 418: /* make lxcp point to appropriate lxdope entry for each character */ ! 419: ! 420: /* initialize error entries */ ! 421: ! 422: for( i= 0; i<=CSSZ; ++i ) lxcp[i] = lxdope; ! 423: ! 424: /* make unique entries */ ! 425: ! 426: for( p=lxdope; ; ++p ) { ! 427: lxcp[p->lxch+1] = p; ! 428: if( p->lxch < 0 ) break; ! 429: } ! 430: ! 431: /* handle letters, digits, and whitespace */ ! 432: /* by convention, first, second, and third places */ ! 433: ! 434: cp = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; ! 435: while( *cp ) lxcp[*cp++ + 1] = &lxdope[1]; ! 436: cp = "123456789"; ! 437: while( *cp ) lxcp[*cp++ + 1] = &lxdope[2]; ! 438: cp = "\t\b\r\f\013"; ! 439: while( *cp ) lxcp[*cp++ + 1] = &lxdope[3]; ! 440: ! 441: file_name[0] = src_file_name; ! 442: // curloc.file = 0; // spurious: curloc is a static ! 443: curloc.line = 1; ! 444: ! 445: ktbl_init(); ! 446: lex_clear(); ! 447: saved = lxtitle(); ! 448: } ! 449: ! 450: void lex_clear() ! 451: { ! 452: // delete extra buffers: ! 453: buf* p = bufhead; ! 454: bufhead = 0; ! 455: //if (p) { ! 456: //fprintf(stderr,"lex_clear\n"); ! 457: //bufs=0; ! 458: //} ! 459: while (p) { ! 460: buf* pp = p; ! 461: p = p->next; ! 462: pp->next = freebuf; ! 463: freebuf = pp; ! 464: } ! 465: ! 466: // re-set to static buffer: ! 467: txtstart = txtfree = inbuf; ! 468: txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1]; ! 469: } ! 470: ! 471: int int_val(char hex) ! 472: { ! 473: switch (hex) { ! 474: case '0': case '1': case '2': case '3': case '4': ! 475: case '5': case '6': case '7': case '8': case '9': ! 476: return hex-'0'; ! 477: case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': ! 478: return hex-'a'+10; ! 479: case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': ! 480: return hex-'A'+10; ! 481: } ! 482: } ! 483: ! 484: void hex_to_oct() ! 485: /* ! 486: \x has been seen on input (in char const or string) and \ printed ! 487: read the following hexadecimal integer and replace it with an octal ! 488: */ ! 489: { ! 490: int i = 0; ! 491: int c; ! 492: get(c); ! 493: if (lxmask[c+1] & LEXHEX) { ! 494: i = int_val(c); ! 495: get(c); // try for two ! 496: if (lxmask[c+1] & LEXHEX) { ! 497: i = (i<<4) + int_val(c); ! 498: get(c); // try for three ! 499: if (lxmask[c+1] & LEXHEX) ! 500: i = (i<<4) + int_val(c); ! 501: else ! 502: unget(c); ! 503: } ! 504: else ! 505: unget(c); ! 506: } ! 507: else { ! 508: error("hexadecimal digitE after \\x"); ! 509: unget(c); ! 510: } ! 511: ! 512: // if (0377 < i) error('l',"hexadecimal constant too large"); ! 513: i &= 0377; ! 514: ! 515: pch(('0'+(i>>6))); ! 516: pch(('0'+((i&070)>>3))); ! 517: pch(('0'+(i&7))); ! 518: } ! 519: ! 520: ! 521: char * chconst() ! 522: /* ! 523: read a character constant into inbuf ! 524: */ ! 525: { ! 526: register c; ! 527: int nch = 0; ! 528: ! 529: pch('\''); ! 530: ! 531: for(;;) { ! 532: char* p; ! 533: char cc = 0; ! 534: ! 535: switch (get(c)) { ! 536: case '\'': ! 537: goto ex; ! 538: case EOF: ! 539: error("eof in char constant"); ! 540: goto ex; ! 541: case '\n': ! 542: error("newline in char constant"); ! 543: goto ex; ! 544: case '\\': ! 545: if (SZ_INT == nch++) error('l',"char constant too long"); ! 546: pch(c); ! 547: switch (get(c)){ ! 548: case '\n': ! 549: ++curloc.line; ! 550: default: ! 551: pch(c); ! 552: break; ! 553: case '4': case '5': case '6': case '7': // octal ! 554: p = txtfree; ! 555: cc = c-4; ! 556: case '0': case '1': case '2': case '3': ! 557: pch(c); ! 558: get(c); /* try for 2 */ ! 559: if( lxmask[c+1] & LEXDIG && c<'8'){ ! 560: pch(c); ! 561: get(c); /* try for 3 */ ! 562: if (lxmask[c+1] & LEXDIG && c<'8') { ! 563: if (cc) *p = cc; // zap high bit ! 564: pch(c); ! 565: } ! 566: else ! 567: unget(c); ! 568: } ! 569: else ! 570: unget(c); ! 571: break; ! 572: case 'x': // hexadecimal ! 573: hex_to_oct(); ! 574: break; ! 575: }; ! 576: break; ! 577: default: ! 578: if (SZ_INT == nch++) error('l',"char constant too long"); ! 579: pch(c); ! 580: } ! 581: } ! 582: ex: ! 583: pch('\''); ! 584: pch('\0'); ! 585: return txtstart; ! 586: } ! 587: ! 588: void lxcom() ! 589: /* process a "block comment" */ ! 590: { ! 591: register c; ! 592: ! 593: for(;;) ! 594: switch (get(c)) { ! 595: case EOF: ! 596: error('w',"eof in comment"); ! 597: return; ! 598: case '\n': ! 599: curloc.line++; ! 600: // Nline++; ! 601: break; ! 602: case '*': ! 603: if (get(c) == '/') return; ! 604: unget(c); ! 605: break; ! 606: case '/': ! 607: if (get(c) == '*') error('w',"``/*'' in comment"); ! 608: unget(c); ! 609: break; ! 610: } ! 611: } ! 612: ! 613: ! 614: void linecom() ! 615: // process a "line comment" ! 616: { ! 617: register c; ! 618: ! 619: for(;;) ! 620: switch (get(c)) { ! 621: case EOF: ! 622: error('w',"eof in comment"); ! 623: return; ! 624: case '\n': ! 625: curloc.line++; ! 626: // Nline++; ! 627: saved = lxtitle(); ! 628: return; ! 629: } ! 630: } ! 631: ! 632: char eat_whitespace() ! 633: { ! 634: ! 635: for(;;) { ! 636: register c = get(c); ! 637: lx: ! 638: ! 639: switch (c) { ! 640: case EOF: ! 641: error('w',"unexpected comment"); ! 642: return EOF; ! 643: case '/': ! 644: switch (get(c)) { ! 645: case '*': ! 646: lxcom(); ! 647: break; ! 648: case '/': ! 649: linecom(); ! 650: break; ! 651: default: ! 652: unget(c); ! 653: return '/'; ! 654: } ! 655: break; ! 656: case '\n': ! 657: ++curloc.line; ! 658: c = lxtitle(); ! 659: goto lx; ! 660: case ' ': ! 661: case '\t': ! 662: break; ! 663: default: ! 664: return c; ! 665: } ! 666: } ! 667: } ! 668: ! 669: void get_string() ! 670: { ! 671: int lxchar; ! 672: ! 673: for(;;) ! 674: switch (get(lxchar)) { ! 675: case '\\': ! 676: pch('\\'); ! 677: switch (get(lxchar)){ ! 678: case '\n': ! 679: ++curloc.line; ! 680: default: ! 681: pch(lxchar); ! 682: break; ! 683: case 'x': // hexadecimal ! 684: hex_to_oct(); ! 685: break; ! 686: }; ! 687: break; ! 688: case '"': ! 689: { char* p = txtstart; // eat_whitespace() moves txtstart ! 690: if ((lxchar = eat_whitespace()) == '"') { ! 691: // string catenation, break with ! 692: // newline to avoid merging characters ! 693: // (e.g. "\xAB" "C") ! 694: pch('\\'); ! 695: pch('\n'); ! 696: ! 697: continue; // eat '\"' and carry on ! 698: }; ! 699: ! 700: txtstart = p; ! 701: unget(lxchar); ! 702: pch(0); ! 703: return; ! 704: } ! 705: case '\n': ! 706: error("newline in string"); ! 707: pch(0); ! 708: return; ! 709: case EOF: ! 710: error("eof in string"); ! 711: pch(0); ! 712: return; ! 713: default: ! 714: pch(lxchar); ! 715: } ! 716: } ! 717: ! 718: TOK tlex() ! 719: { ! 720: TOK ret; ! 721: Pname n; ! 722: ! 723: // Ntoken++; ! 724: ! 725: for(;;) { ! 726: register lxchar; ! 727: register struct LXDOPE *p; ! 728: ! 729: start_txt(); ! 730: ! 731: if (saved) { ! 732: lxchar = saved; ! 733: saved = 0; ! 734: } ! 735: else ! 736: get(lxchar); ! 737: ! 738: if (lxchar+1 >= CSSZ ) ! 739: error( "illegal input character enountered: %d", lxchar ); ! 740: ! 741: switch( (p=lxcp[lxchar+1])->lxact ){ ! 742: ! 743: case A_1C: // eat up a single character, and return an opcode ! 744: reti(p->lxtok,p->lxtok); ! 745: ! 746: case A_EOF: ! 747: if (br_level || bl_level+lcount) ! 748: error("'%s' missing at end of input",(bl_level+lcount) ? "}" : ")"); ! 749: ! 750: reti(EOFTOK,0); ! 751: ! 752: case A_SHARP: ! 753: // cope with header file not ended with '\n' ! 754: unget('#'); ! 755: saved = lxtitle(); ! 756: continue; ! 757: ! 758: case A_ERR: ! 759: ! 760: { if (' '<=lxchar && lxchar<='~') // ASCII printable ! 761: error("illegal character '%c' (ignored)",lxchar); ! 762: else ! 763: error("illegal character '0%o' (ignored)",lxchar); ! 764: continue; ! 765: } ! 766: ! 767: case A_DOLL: ! 768: { // lex a name of the for $id for template tree formals ! 769: ! 770: Pname fn ; ! 771: lxget( lxchar, LEXLET|LEXDIG ) ; ! 772: // TBD: make sure that the pragma is set ! 773: ! 774: if (!templp->in_progress || !txtstart[1]) { ! 775: // no name string immediately follows, treat it ! 776: // like an illegal character ! 777: error("illegal character '0%o' (ignored)",lxchar); ! 778: continue; ! 779: } ! 780: txtstart++ ; ! 781: if(fn=templ_compilation::tree_parameter(txtstart)){ ! 782: switch (fn->n_template_arg) { ! 783: case name::template_expr_tree_formal: ! 784: // retain the $ in the name ! 785: retn(ID, strdup(--txtstart)) ; ! 786: case name::template_stmt_tree_formal: ! 787: retn(SM_PARAM, fn) ; ! 788: } ! 789: } ! 790: error("%s wasn't a statement or expression formal", ! 791: txtstart) ; ! 792: rets(ID, copy_if_need_be(txtstart)) ; ! 793: } ! 794: ! 795: case A_LET: // collect an identifier and check for keyword ! 796: { ! 797: char ll; ! 798: switch (ll = lxchar) { ! 799: // case 'l': ! 800: case 'L': ! 801: switch (get(lxchar)) { ! 802: case '\'': ! 803: error('s',"wide character constant"); ! 804: unget(lxchar); ! 805: continue; ! 806: case '"': ! 807: error('s',"wide character string"); ! 808: unget(lxchar); ! 809: continue; ! 810: } ! 811: unget(lxchar); ! 812: lxchar = ll; ! 813: } ! 814: } ! 815: lxget( lxchar, LEXLET|LEXDIG ); ! 816: ! 817: //error( 'd', "lex: bl_level: %d txtstart %s", bl_level, txtstart); ! 818: // local class ! 819: /* look for a keyword or a global type */ ! 820: if ((n = keyword_table->look(txtstart,0)) /* keyword */ ! 821: || (n = ktbl->look(txtstart, 0))) { /* local type */ ! 822: TOK x; ! 823: del_txt(); ! 824: switch (x=n->base) { ! 825: case TNAME: ! 826: //('d',"lex tname %n",n); ! 827: if (bl_level > 1) { ! 828: Pname nn = ktbl->look(txtstart,LOCAL); ! 829: if ( nn ) { ! 830: n = nn; ! 831: //error( 'd', "lex: local class instance: %n", nn ); ! 832: } ! 833: } ! 834: retn(TNAME,n); ! 835: case LOC: ! 836: retl(n->syn_class); ! 837: case EXTERN: ! 838: if ((lxchar = eat_whitespace()) == '\"') { ! 839: // linkage directive ! 840: get_string(); ! 841: rets(LINKAGE,txtstart); ! 842: } ! 843: unget(lxchar); ! 844: reti(TYPE,EXTERN); ! 845: case CATCH: ! 846: /* ! 847: case TEMPLATE: ! 848: */ ! 849: error('s',"%k",n->syn_class); ! 850: continue; ! 851: default: ! 852: #ifdef DK ! 853: if (get(lxchar) == '=') ! 854: switch (x) { ! 855: case OR: reti(ASOP,ASOR); ! 856: case ER: reti(ASOP,ASER); ! 857: case AND: reti(ASOP,ASAND); ! 858: } ! 859: saved = lxchar; ! 860: ! 861: #endif ! 862: reti(n->syn_class,x); ! 863: } ! 864: } ! 865: else ! 866: // local class ! 867: if ( bl_level && ! 868: (n=ktbl->look(txtstart,LOCAL)) ) ! 869: { ! 870: //error( 'd', "lex2: local class instance: %n", n ); ! 871: retn(TNAME,n); ! 872: } ! 873: else ! 874: rets(ID, copy_if_need_be(txtstart)) ; ! 875: ! 876: ! 877: case A_DIG: ! 878: ! 879: ret = ICON; ! 880: ! 881: if (lxchar=='0') { /* octal or hexadecimal number */ ! 882: pch('0'); ! 883: switch (get(lxchar)) { ! 884: case 'l': ! 885: case 'L': ! 886: pch('L'); ! 887: pch(0); ! 888: rets(ICON,txtstart); ! 889: case 'e': ! 890: case 'E': ! 891: // lxget(lxchar,LEXDIG); ! 892: // goto getfp; ! 893: goto getfp2; ! 894: case 'x': ! 895: case 'X': ! 896: lxget('X',LEXHEX); ! 897: if (txtfree-txtstart<4) // minimum "0Xd\0" ! 898: error("hexadecimal digitX after \"0x\""); ! 899: switch (get(lxchar)) { ! 900: case 'l': ! 901: case 'L': ! 902: txtfree--; ! 903: pch('L'); ! 904: pch(0); ! 905: break; ! 906: default: ! 907: saved = lxchar; ! 908: } ! 909: rets(ICON,txtstart); ! 910: case '8': ! 911: case '9': ! 912: {error("%c used as octal digit",lxchar);} ! 913: case '0': ! 914: case '1': ! 915: case '2': ! 916: case '3': ! 917: case '4': ! 918: case '5': ! 919: case '6': ! 920: case '7': ! 921: pch(lxchar); ! 922: ox: ! 923: switch (get(lxchar)) { ! 924: case '8': ! 925: case '9': ! 926: {error("%c used as octal digit",lxchar);} ! 927: case '0': ! 928: case '1': ! 929: case '2': ! 930: case '3': ! 931: case '4': ! 932: case '5': ! 933: case '6': ! 934: case '7': ! 935: pch(lxchar); ! 936: goto ox; ! 937: case 'l': ! 938: case 'L': ! 939: pch('L'); ! 940: pch(0); ! 941: break; ! 942: default: ! 943: pch(0); ! 944: saved = lxchar; ! 945: } ! 946: rets(ICON,txtstart); ! 947: case '.': ! 948: lxget('.',LEXDIG); ! 949: goto getfp; ! 950: default: ! 951: saved = lxchar; ! 952: reti(ZERO,0); ! 953: } ! 954: } ! 955: else ! 956: lxget(lxchar,LEXDIG); ! 957: ! 958: if (get(lxchar) == '.') { ! 959: txtfree--; ! 960: lxget('.', LEXDIG ); ! 961: getfp: ! 962: ret = FCON; ! 963: get(lxchar); ! 964: }; ! 965: ! 966: switch (lxchar) { ! 967: case 'f': ! 968: case 'F': ! 969: txtfree--; ! 970: pch('F'); ! 971: break; ! 972: case 'e': ! 973: case 'E': ! 974: txtfree--; ! 975: switch (get(lxchar)) { ! 976: case '-': ! 977: case '+': ! 978: pch('e'); ! 979: break; ! 980: default: ! 981: unget(lxchar); ! 982: lxchar = 'e'; ! 983: }; ! 984: getfp2: ! 985: lxget( lxchar, LEXDIG ); ! 986: ret = FCON; ! 987: break; ! 988: case 'u': ! 989: case 'U': ! 990: if (ret==FCON) error("%c suffix for floating constant",lxchar); ! 991: case 'l': ! 992: case 'L': ! 993: txtfree--; ! 994: pch(lxchar); ! 995: switch (get(lxchar)) { // ul, Lu, ets. ! 996: case 'l': ! 997: case 'L': ! 998: case 'u': ! 999: case 'U': ! 1000: pch(lxchar); ! 1001: break; ! 1002: default: ! 1003: saved = lxchar; ! 1004: } ! 1005: break; ! 1006: default: ! 1007: saved = lxchar; ! 1008: }; ! 1009: ! 1010: pch(0); ! 1011: rets(ret,txtstart); ! 1012: ! 1013: case A_DOT: ! 1014: /* if (get(lxchar) == '.') { // look for ellipsis ! 1015: if (get(lxchar) != '.') { ! 1016: error("token .. ?"); ! 1017: saved = lxchar; ! 1018: } ! 1019: reti(ELLIPSIS,0); ! 1020: } ! 1021: */ ! 1022: switch (get(lxchar)) { ! 1023: case '.': // look for ellipsis ! 1024: if (get(lxchar) != '.') { ! 1025: error("token .. ?"); ! 1026: saved = lxchar; ! 1027: } ! 1028: reti(ELLIPSIS,0); ! 1029: case '*': ! 1030: reti (REFMUL,DOT); ! 1031: } ! 1032: ! 1033: if (lxmask[lxchar+1] & LEXDIG){// look for floating constant ! 1034: unget(lxchar); ! 1035: lxget( '.', LEXDIG ); ! 1036: goto getfp; ! 1037: } ! 1038: saved = lxchar; ! 1039: reti(DOT,0); ! 1040: ! 1041: case A_STR: ! 1042: /* save string constant in buffer */ ! 1043: get_string(); ! 1044: rets(STRING,txtstart); ! 1045: ! 1046: case A_CC: ! 1047: /* character constant */ ! 1048: rets(CCON,chconst()); ! 1049: ! 1050: case A_BCD: ! 1051: { ! 1052: register i; ! 1053: int j; ! 1054: ! 1055: pch('`'); ! 1056: ! 1057: for (i=0; i<7; ++i) { ! 1058: pch(get(j)); ! 1059: if (j == '`' ) break; ! 1060: } ! 1061: pch(0); ! 1062: if (6<i) ! 1063: error('l',"bcd constant exceeds 6 characters" ); ! 1064: rets(CCON,txtstart); ! 1065: } ! 1066: ! 1067: case A_SL: /* / */ ! 1068: switch (get(lxchar)) { ! 1069: case '*': ! 1070: lxcom(); ! 1071: break; ! 1072: case '/': ! 1073: linecom(); ! 1074: break; ! 1075: case '=': ! 1076: reti(ASOP,ASDIV); ! 1077: default: ! 1078: saved = lxchar; ! 1079: reti(DIVOP,DIV); ! 1080: } ! 1081: ! 1082: case A_WS: ! 1083: continue; ! 1084: ! 1085: case A_NL: ! 1086: ++curloc.line; ! 1087: // Nline++; ! 1088: saved = lxtitle(); ! 1089: continue; ! 1090: ! 1091: case A_LC: ! 1092: #ifdef DK ! 1093: alc: ! 1094: #endif ! 1095: if (BLMAX <= bl_level++) { ! 1096: error('l',"blocks too deeply nested"); ! 1097: ext(3); ! 1098: } ! 1099: retl(LC); ! 1100: ! 1101: case A_RC: ! 1102: #ifdef DK ! 1103: arc: ! 1104: #endif ! 1105: if (lcount+bl_level-- <= 0) { ! 1106: error("unexpected '}'"); ! 1107: bl_level = 0; ! 1108: } ! 1109: retl(RC); ! 1110: ! 1111: case A_L: ! 1112: #ifdef DK ! 1113: if (get(lxchar) == ':') // (# is { ! 1114: goto alc; ! 1115: else ! 1116: saved = lxchar; ! 1117: #endif ! 1118: br_level++; ! 1119: reti(LP,0); ! 1120: ! 1121: case A_R: ! 1122: if (br_level-- <= 0) { ! 1123: error("unexpected ')'"); ! 1124: br_level = 0; ! 1125: } ! 1126: reti(RP,0); ! 1127: ! 1128: case A_ASS: ! 1129: switch (get(lxchar)) { ! 1130: case '=': ! 1131: reti(EQUOP,EQ); ! 1132: default: ! 1133: saved = lxchar; ! 1134: reti(ASSIGN,ASSIGN); ! 1135: } ! 1136: ! 1137: case A_COL: ! 1138: switch (get(lxchar)) { ! 1139: case ':': ! 1140: reti(MEM,0); ! 1141: case '=': ! 1142: error("':=' is not a c++ operator"); ! 1143: reti(ASSIGN,ASSIGN); ! 1144: #ifdef DK ! 1145: if (get(lxchar)==')') goto arc; // :) is } ! 1146: unget(lxchar); ! 1147: #endif ! 1148: default: ! 1149: saved = lxchar; ! 1150: reti(COLON,COLON); ! 1151: } ! 1152: case A_NOT: ! 1153: switch (get(lxchar)) { ! 1154: case '=': ! 1155: reti(EQUOP,NE); ! 1156: default: ! 1157: saved = lxchar; ! 1158: reti(NOT,NOT); ! 1159: } ! 1160: case A_GT: ! 1161: switch(get(lxchar)) { ! 1162: case '>': ! 1163: switch (get(lxchar)) { ! 1164: case '=': ! 1165: reti(ASOP,ASRS); ! 1166: break; ! 1167: default: ! 1168: saved = lxchar; ! 1169: reti(SHIFTOP,RS); ! 1170: } ! 1171: case '=': ! 1172: reti(RELOP,GE); ! 1173: default: ! 1174: saved = lxchar; ! 1175: reti(GT,GT); ! 1176: } ! 1177: case A_LT: ! 1178: switch (get(lxchar)) { ! 1179: case '<': ! 1180: switch (get(lxchar)) { ! 1181: case '=': ! 1182: reti(ASOP,ASLS); ! 1183: default: ! 1184: saved = lxchar; ! 1185: reti(SHIFTOP,LS); ! 1186: } ! 1187: case '=': ! 1188: reti(RELOP,LE); ! 1189: default: ! 1190: saved = lxchar; ! 1191: reti(LT,LT); ! 1192: } ! 1193: case A_AND: ! 1194: switch (get(lxchar)) { ! 1195: case '&': ! 1196: reti(ANDAND,ANDAND); ! 1197: case '=': ! 1198: reti(ASOP,ASAND); ! 1199: default: ! 1200: saved = lxchar; ! 1201: reti(AND,AND); ! 1202: } ! 1203: case A_OR: ! 1204: switch (get(lxchar)) { ! 1205: case '|': ! 1206: reti(OROR,OROR); ! 1207: case '=': ! 1208: reti(ASOP,ASOR); ! 1209: default: ! 1210: saved = lxchar; ! 1211: reti(OR,OR); ! 1212: } ! 1213: case A_ER: ! 1214: switch (get(lxchar)) { ! 1215: case '=': ! 1216: reti(ASOP,ASER); ! 1217: default: ! 1218: saved = lxchar; ! 1219: reti(ER,ER); ! 1220: } ! 1221: case A_PL: ! 1222: switch (get(lxchar)) { ! 1223: case '=': ! 1224: reti(ASOP,ASPLUS); ! 1225: case '+': ! 1226: reti(ICOP,INCR); ! 1227: default: ! 1228: saved = lxchar; ! 1229: reti(PLUS,PLUS); ! 1230: } ! 1231: case A_MIN: ! 1232: switch (get(lxchar)) { ! 1233: case '=': ! 1234: reti(ASOP,ASMINUS); ! 1235: case '-': ! 1236: reti(ICOP,DECR); ! 1237: case '>': ! 1238: if (get(lxchar) == '*') ! 1239: {reti(REFMUL,REF);} ! 1240: else ! 1241: saved = lxchar; ! 1242: reti(REF,REF); ! 1243: default: ! 1244: saved = lxchar; ! 1245: reti(MINUS,MINUS); ! 1246: } ! 1247: case A_MUL: ! 1248: switch (get(lxchar)) { ! 1249: case '=': ! 1250: reti(ASOP,ASMUL); ! 1251: case '/': ! 1252: error('w',"*/ not as end of comment"); ! 1253: default: ! 1254: saved = lxchar; ! 1255: reti(MUL,MUL); ! 1256: } ! 1257: case A_MOD: ! 1258: switch (get(lxchar)) { ! 1259: case '=': ! 1260: reti(ASOP,ASMOD); ! 1261: default: ! 1262: saved = lxchar; ! 1263: reti(DIVOP,MOD); ! 1264: } ! 1265: default: ! 1266: {error('i',"lex act==%d getc()->%d",p,lxchar);} ! 1267: ! 1268: } ! 1269: ! 1270: error('i',"lex, main switch"); ! 1271: } ! 1272: ! 1273: } ! 1274: ! 1275: int lxtitle() ! 1276: /* ! 1277: called after a newline; set linenumber and file name ! 1278: */ ! 1279: { ! 1280: register c; ! 1281: ! 1282: for(;;) ! 1283: switch ( get(c) ) { ! 1284: default: // e.g. not '\n', not '#' ! 1285: return c; ! 1286: case '\n': ! 1287: curloc.line++; ! 1288: // Nline++; ! 1289: ll: ! 1290: break; ! 1291: case '#': /* # lineno "filename" */ ! 1292: { int cl = curloc.line; ! 1293: curloc.line = 0; ! 1294: for(;;) ! 1295: switch (get(c)) { ! 1296: case '"': ! 1297: start_txt(); ! 1298: for(;;) ! 1299: switch (get(c)) { ! 1300: case '"': ! 1301: pch('\0'); ! 1302: ! 1303: while (get(c) != '\n') ; // skip to eol.. ignore anything more ! 1304: ! 1305: if (*txtstart) { // stack file name ! 1306: char* fn; ! 1307: if (curr_file == 0){ ! 1308: if (( fn = file_name[0]) ! 1309: && (strcmp(txtstart,fn)!=0)){ // 1st include ! 1310: if (MAXFILE*4<++Nfile) error('i',"fileN buffer overflow"); ! 1311: if (MAXFILE<++curr_file) error('i',"fileN stack overflow"); ! 1312: file_stack[curr_file] = Nfile; ! 1313: ! 1314: char* p1 = new char[txtfree-txtstart]; ! 1315: (void) strcpy(p1,txtstart); ! 1316: file_name[Nfile] = p1; ! 1317: // Nstr++; ! 1318: } ! 1319: else { //&& line is dummy #line "input.c" ! 1320: // ignore ! 1321: } ! 1322: //&& dead, dead, dead goto push; ! 1323: } ! 1324: else if ( (fn=file_name[file_stack[curr_file]]) ! 1325: && (strcmp(txtstart,fn)==0) ) { ! 1326: //new line, same file: ignore ! 1327: } ! 1328: else if ( (fn=file_name[file_stack[curr_file-1]]) ! 1329: && (strcmp(txtstart,fn)==0) ) { ! 1330: // previous file: pop ! 1331: curr_file--; ! 1332: } ! 1333: else { // new file name: push ! 1334: //&& push: ! 1335: if (MAXFILE*4<Nfile++) error('i',"fileN buffer overflow"); ! 1336: if (MAXFILE<curr_file++) error('i',"fileN stack overflow"); ! 1337: file_stack[curr_file] = Nfile; ! 1338: ! 1339: char* p = new char[txtfree-txtstart]; ! 1340: (void) strcpy(p,txtstart); ! 1341: file_name[Nfile] = p; ! 1342: // Nstr++; ! 1343: } ! 1344: } ! 1345: else { // no name .. back to the original .c file: "" ! 1346: curr_file = 0; ! 1347: } ! 1348: del_txt(); ! 1349: curloc.file = file_stack[curr_file]; ! 1350: goto ll; ! 1351: case '\n': ! 1352: error("unexpected end of line on '# line'"); ! 1353: default: ! 1354: pch(c); ! 1355: } ! 1356: case ' ': ! 1357: break; ! 1358: ! 1359: case '0': ! 1360: case '1': ! 1361: case '2': ! 1362: case '3': ! 1363: case '4': ! 1364: case '5': ! 1365: case '6': ! 1366: case '7': ! 1367: case '8': ! 1368: case '9': ! 1369: curloc.line = curloc.line*10+c-'0'; ! 1370: break; ! 1371: ! 1372: case 'l': // look for "#line ..." and then ignore "line" ! 1373: if (get(c)=='i' && get(c)=='n' && get(c)=='e') break; ! 1374: case '\n': ! 1375: curloc.putline(); ! 1376: goto ll; ! 1377: ! 1378: default: // detect #pragma ! 1379: // pass #rubbish through ! 1380: { char* p = txtstart+1; ! 1381: curloc.line = cl; ! 1382: pch('#'); ! 1383: pch(c); ! 1384: while (get(c) != '\n') pch(c); ! 1385: pch('\0'); ! 1386: if (strncmp(p,"pragma",6)==0) { ! 1387: p += 6; ! 1388: while (*p==' ' || *p=='\t') p++; ! 1389: if (strncmp(p,"linkage",7)==0) { ! 1390: if (bl_level) error("linkage pragma inside block"); ! 1391: p += 7; ! 1392: while (*p==' ' || *p=='\t') p++; ! 1393: set_linkage(p); ! 1394: } ! 1395: else ! 1396: fprintf(out_file,"\n%s\n",txtstart); ! 1397: } ! 1398: else ! 1399: fprintf(out_file,"%s\n",txtstart); ! 1400: ! 1401: // fprintf(out_file,"\n%s\n",txtstart); ! 1402: start_txt(); ! 1403: curloc.line++; ! 1404: // Nline++; ! 1405: goto ll; ! 1406: } ! 1407: } ! 1408: } ! 1409: } ! 1410: } ! 1411: ! 1412: /* ODI notes - ! 1413: ! 1414: template classes ! 1415: ! 1416: separate ktbl from keywords to because it dosen't contain ! 1417: legitimate nodes. ! 1418: */ ! 1419:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.