|
|
1.1 ! root 1: /*ident "@(#)ctrans:src/lex.c 1.5.1.1" */ ! 2: /*************************************************************************** ! 3: ! 4: C++ source for cfront, the C++ compiler front-end ! 5: written in the computer science research center of Bell Labs ! 6: ! 7: Copyright (c) 1984 AT&T, Inc. All Rights Reserved ! 8: THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T, INC. ! 9: ! 10: lex.c: ! 11: lexical analyser based on pcc's and cpre's scanners ! 12: modified to handle classes: ! 13: new keywords: class ! 14: public ! 15: call ! 16: etc. ! 17: names are not entered in the symbol table by lex() ! 18: names can be of arbitrary length ! 19: error() is used to report errors ! 20: {} and () must match ! 21: numeric constants are not converted into internal representation ! 22: but stored as strings ! 23: ! 24: ****************************************************************************/ ! 25: ! 26: #include "cfront.h" ! 27: #include "yystype.h" ! 28: #include "size.h" ! 29: #include "tqueue.h" ! 30: #include "template.h" ! 31: ! 32: # define CCTRANS(x) x ! 33: ! 34: #ifndef NULL ! 35: #define NULL 0 ! 36: #endif ! 37: ! 38: char* strdup(const char* s1) ! 39: /* string duplication ! 40: returns pointer to a new string which is the duplicate of string ! 41: pointed to by s1 ! 42: NULL is returned if new string can't be created ! 43: */ ! 44: { ! 45: char * s2; ! 46: ! 47: s2 = malloc((unsigned) strlen(s1)+1) ; ! 48: return(s2==NULL ? NULL : strcpy(s2,s1) ); ! 49: } ! 50: ! 51: #define copy_if_need_be(s) ((templp->in_progress || templp->parameters_in_progress) ? strdup(s) : (s)) ! 52: ! 53: /* lexical actions */ ! 54: ! 55: #define A_ERR 0 /* illegal character */ ! 56: #define A_LET 1 /* saw a letter */ ! 57: #define A_DIG 2 /* saw a digit */ ! 58: #define A_1C 3 /* return a single character */ ! 59: #define A_STR 4 /* string */ ! 60: #define A_CC 5 /* character constant */ ! 61: #define A_BCD 6 /* GCOS BCD constant */ ! 62: #define A_SL 7 /* saw a / */ ! 63: #define A_DOT 8 /* saw a . */ ! 64: #define A_2C 9 /* possible two character symbol */ ! 65: #define A_WS 10 /* whitespace (not \n) */ ! 66: #define A_NL 11 /* \n */ ! 67: #define A_LC 12 /* { */ ! 68: #define A_RC 13 /* } */ ! 69: #define A_L 14 /* ( */ ! 70: #define A_R 15 /* ) */ ! 71: #define A_EOF 16 ! 72: #define A_ASS 17 ! 73: #define A_LT 18 ! 74: #define A_GT 19 /* > */ ! 75: #define A_ER 20 ! 76: #define A_OR 21 ! 77: #define A_AND 22 ! 78: #define A_MOD 23 ! 79: #define A_NOT 24 ! 80: #define A_MIN 25 ! 81: #define A_MUL 26 ! 82: #define A_PL 27 ! 83: #define A_COL 28 /* : */ ! 84: #define A_SHARP 29 /* # */ ! 85: #define A_DOLL 30 /* $ */ ! 86: ! 87: /* character classes */ ! 88: ! 89: # define LEXLET 01 ! 90: # define LEXDIG 02 ! 91: /* no LEXOCT because 8 and 9 used to be octal digits */ ! 92: # define LEXHEX 010 ! 93: # define LEXWS 020 ! 94: # define LEXDOT 040 ! 95: ! 96: const FIRSTCHUNK = 8*1024-8; ! 97: const BUFCHUNK = 4*1024-8; ! 98: ! 99: /* text buffer */ ! 100: static char inbuf[FIRSTCHUNK/*TBUFSZ*/]; ! 101: static char* txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1]; ! 102: static char* txtstart = 0; ! 103: static char* txtfree = 0; ! 104: ! 105: static struct buf* bufhead; ! 106: static buf* freebuf; ! 107: //static bufs; ! 108: ! 109: struct buf { ! 110: buf* next; ! 111: char chars[BUFCHUNK]; ! 112: // buf() { next=bufhead; bufhead=this; } ! 113: }; ! 114: ! 115: new_buf(char c) ! 116: { ! 117: //fprintf(stderr,"new_buf %d\n",bufs++); ! 118: buf* pbuf; ! 119: if (freebuf) { ! 120: pbuf = freebuf; ! 121: freebuf = freebuf->next; ! 122: } ! 123: else ! 124: pbuf = new buf; // allocate and register new chunk ! 125: pbuf->next = bufhead; ! 126: bufhead = pbuf; ! 127: ! 128: if (BUFCHUNK < txtmax-txtstart) error('l',"lexical token too long"); ! 129: ! 130: // copy current token: ! 131: char* p = txtstart; ! 132: txtstart = txtfree = &pbuf->chars[0]; ! 133: while (p<txtmax) *txtfree++ = *p++; ! 134: *txtfree++=c; ! 135: txtmax = &pbuf->chars[BUFCHUNK-1]; ! 136: return 0; ! 137: } ! 138: ! 139: ! 140: #define pch(c) ((txtmax<=txtfree)?new_buf(c):(*txtfree++=c)) ! 141: #define start_txt() txtstart = txtfree ! 142: #define del_txt() txtfree = txtstart ! 143: ! 144: static int Nfile;// = 1; ! 145: static char* file_name; // source file names ! 146: ! 147: Linkage linkage = linkage_default; // linkage is default C++ ! 148: const LINKMAX = 10; ! 149: static Linkage lvec[LINKMAX] = { linkage_default }; ! 150: static int lcount = 0; ! 151: ! 152: void set_linkage(char* p) ! 153: { ! 154: if (p==0 || *p == 0) { // resume previous linkage ! 155: if (lcount>0) linkage = lvec[--lcount]; ! 156: } ! 157: else { ! 158: if (LINKMAX<=++lcount) { ! 159: error('l',"linkage directive nested too deep"); ! 160: --lcount; ! 161: } else if (strcmp(p,"C")==0) ! 162: lvec[lcount] = linkage = linkage_C; ! 163: else if (strcmp(p,"C++")==0) ! 164: lvec[lcount] = linkage = linkage_Cplusplus; ! 165: else { ! 166: error("%s linkage",p); ! 167: --lcount; ! 168: } ! 169: } ! 170: } ! 171: ! 172: static struct loc tloc; ! 173: FILE * out_file = stdout; ! 174: FILE * in_file = stdin; ! 175: Ptable ktbl; ! 176: Ptable keyword_table; ! 177: ! 178: static int p_level = 0; /* number of unmatched ``(''s */ ! 179: static int b_level = 0; /* number of unmatched ``{''s */ ! 180: ! 181: # ifdef ibm ! 182: ! 183: # define CSMASK 0377 ! 184: # define CSSZ 256 ! 185: ! 186: # else ! 187: ! 188: # define CSMASK 0177 ! 189: # define CSSZ 128 ! 190: ! 191: # endif ! 192: ! 193: static short lxmask[CSSZ+1]; ! 194: ! 195: int saved = 0; /* putback character, avoid ungetchar */ ! 196: static int lxtitle(); ! 197: ! 198: // overload rt; ! 199: inline YYSTYPE rt(char* x) { YYSTYPE y; y.s = x; return y; } ! 200: inline YYSTYPE rt(int x) { YYSTYPE y; y.t = x; return y; } ! 201: inline YYSTYPE rt(loc x) { YYSTYPE y; y.l = x; return y; } ! 202: inline YYSTYPE rt(void* x) { YYSTYPE y; y.pn = Pname(x); return y; } ! 203: ! 204: #define get(c) (c=getc(in_file)) ! 205: #define unget(c) ungetc(c,in_file) ! 206: ! 207: #define reti(a,b) { addtok(a, rt(b), tloc); return a; } ! 208: #define retn(a,b) { addtok(a, rt((Pnode)b), tloc); return a; } ! 209: #define rets(a,b) { addtok(a, rt(b), tloc); return a; } ! 210: #define retl(a) { addtok(a, rt(tloc), tloc); return a; } ! 211: ! 212: // keys[] holds the external form for tokens with fixed representation ! 213: // illegal tokens and those with variable representation have 0 entries ! 214: char* keys[MAXTOK+1]; ! 215: ! 216: static void ! 217: new_key(char* s, TOK toknum, TOK yyclass) ! 218: /* ! 219: make "s" a new keyword with the representation (token) "toknum" ! 220: "yyclass" is the yacc token (for example new_key("int",INT,TYPE); ) ! 221: "yyclass==0" means yyclass=toknum; ! 222: */ ! 223: { ! 224: Pname n = new name(s); ! 225: ! 226: keys[(toknum==LOC)?yyclass:toknum] = s; ! 227: n = new name(s); ! 228: Pname nn = keyword_table->insert(n,0); ! 229: // if (Nold) error('i',"keyword %sD twice",s); ! 230: nn->base = toknum; ! 231: nn->syn_class = (yyclass) ? yyclass : toknum; ! 232: delete n; ! 233: } ! 234: ! 235: const int keyword_count = 67; ! 236: static void ! 237: ktbl_init() ! 238: /* ! 239: enter keywords into keyword table for use by lex() ! 240: and into keyword representation table used for output ! 241: ! 242: ktbl is only for types. We put nothing in it. ! 243: keyword_table is for user-defined reserved words ! 244: */ ! 245: { ! 246: ktbl = new table(KTBLSIZE,0,0); ! 247: keyword_table = new table(keyword_count,0,0); ! 248: ! 249: new_key("asm",ASM,0); ! 250: new_key("auto",AUTO,TYPE); ! 251: new_key("break",LOC,BREAK); ! 252: new_key("case",LOC,CASE); ! 253: new_key("continue",LOC,CONTINUE); ! 254: new_key("char",CHAR,TYPE); ! 255: new_key("do",LOC,DO); ! 256: new_key("double",DOUBLE,TYPE); ! 257: new_key("default",LOC,DEFAULT); ! 258: new_key("enum",ENUM,0); ! 259: new_key("else",LOC,ELSE); ! 260: new_key("extern",EXTERN,TYPE); ! 261: new_key("float",FLOAT,TYPE); ! 262: new_key("for",LOC,FOR); ! 263: new_key("goto",LOC,GOTO); ! 264: new_key("catch",CATCH,CATCH); ! 265: new_key("try",TRY,TRY); ! 266: new_key("if",LOC,IF); ! 267: new_key("int",INT,TYPE); ! 268: new_key("long",LONG,TYPE); ! 269: new_key("return",LOC,RETURN); ! 270: new_key("register",REGISTER,TYPE); ! 271: new_key("static",STATIC,TYPE); ! 272: new_key("struct",STRUCT,AGGR); ! 273: new_key("sizeof",SIZEOF,0); ! 274: new_key("short",SHORT,TYPE); ! 275: new_key("switch",LOC,SWITCH); ! 276: new_key("typedef",TYPEDEF,TYPE); ! 277: new_key("unsigned",UNSIGNED,TYPE); ! 278: new_key("union",UNION,AGGR); ! 279: new_key("void",VOID,TYPE); ! 280: new_key("while",LOC,WHILE); ! 281: ! 282: new_key("class",CLASS,AGGR); ! 283: new_key("const",CONST,TYPE); ! 284: new_key("delete",LOC,DELETE); ! 285: new_key("friend",FRIEND,TYPE); ! 286: new_key("inline",INLINE,TYPE); ! 287: new_key("new",NEW,0); ! 288: new_key("operator",OPERATOR,0); ! 289: new_key("overload",OVERLOAD,TYPE); ! 290: new_key("private",PRIVATE,PR); ! 291: new_key("protected",PROTECTED,PR); ! 292: new_key("public",PUBLIC,PR); ! 293: new_key("signed",SIGNED,TYPE); ! 294: new_key("template",TEMPLATE,0); ! 295: new_key("this",THIS,0); ! 296: new_key("virtual",VIRTUAL,TYPE); ! 297: new_key("volatile",VOLATILE,TYPE); ! 298: ! 299: new_key("__statement", STATEMENT, 0) ; ! 300: new_key("__expression", EXPRESSION, 0) ; ! 301: new_key("__template_test", TEMPLATE_TEST, 0) ; ! 302: } ! 303: ! 304: loc last_line; ! 305: loc noloc = { 0, 0 }; ! 306: ! 307: void loc::putline() ! 308: { ! 309: if (file==0 && line==0) return; ! 310: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4 ! 311: // if (0<=file && file<MAXFILE) { ! 312: if ( 0<=file && file <= Nfile ) { ! 313: char* f = file_name; ! 314: if (f==0) f = src_file_name; ! 315: fprintf(out_file,line_format,line,f); ! 316: last_line = *this; ! 317: } ! 318: } ! 319: ! 320: void loc::put(FILE* p) ! 321: { ! 322: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4 ! 323: // if (0<=file && file<MAXFILE) { ! 324: if ( 0<=file && file <= Nfile ) { ! 325: char* f = file_name; ! 326: if (f==0) f = src_file_name; ! 327: fprintf(p,"\"%s\", line %d: ",f,line); ! 328: } ! 329: } ! 330: ! 331: void lxenter(register char* s, short m) ! 332: /* enter a mask into lxmask */ ! 333: { ! 334: register c; ! 335: ! 336: while( c= *s++ ) lxmask[c+1] |= m; ! 337: ! 338: } ! 339: ! 340: ! 341: void lxget(register c, register m) ! 342: /* ! 343: put 'c' back then scan for members of character class 'm' ! 344: terminate the string read with \0 ! 345: txtfree points to the character position after that \0 ! 346: */ ! 347: { ! 348: pch(c); ! 349: while ( (get(c), lxmask[c+1]&m) ) pch(c); ! 350: unget(c); ! 351: pch('\0'); ! 352: } ! 353: ! 354: struct LXDOPE { ! 355: short lxch; /* the character */ ! 356: short lxact; /* the action to be performed */ ! 357: TOK lxtok; /* the token number to be returned */ ! 358: } lxdope[] = { ! 359: #ifdef apollo ! 360: '@', A_ERR, 0, /* illegal characters go here... */ ! 361: #else ! 362: '$', A_DOLL, 0, ! 363: // '$', A_ERR, 0, /* illegal characters go here... */ ! 364: #endif ! 365: '_', A_LET, 0, /* letters point here */ ! 366: '0', A_DIG, 0, /* digits point here */ ! 367: ' ', A_WS, 0, /* whitespace goes here */ ! 368: '\n', A_NL, 0, ! 369: '"', A_STR, 0, /* character string */ ! 370: '\'', A_CC, 0, /* ASCII character constant */ ! 371: '`', A_BCD, 0, /* 'foreign' character constant, e.g. BCD */ ! 372: '(', A_L, LP, ! 373: ')', A_R, RP, ! 374: '{', A_LC, LC, ! 375: '}', A_RC, RC, ! 376: '[', A_1C, LB, ! 377: ']', A_1C, RB, ! 378: '*', A_MUL, MUL, ! 379: '?', A_1C, QUEST, ! 380: ':', A_COL, COLON, ! 381: '+', A_PL, PLUS, ! 382: '-', A_MIN, MINUS, ! 383: '/', A_SL, DIV, ! 384: '%', A_MOD, MOD, ! 385: '&', A_AND, AND, ! 386: '|', A_OR, OR, ! 387: '^', A_ER, ER, ! 388: '!', A_NOT, NOT, ! 389: '~', A_1C, COMPL, ! 390: ',', A_1C, CM, ! 391: ';', A_1C, SM, ! 392: '.', A_DOT, DOT, ! 393: '<', A_LT, LT, ! 394: '>', A_GT, GT, ! 395: '=', A_ASS, ASSIGN, ! 396: '#', A_SHARP, 0, ! 397: EOF, A_EOF, EOFTOK ! 398: }; ! 399: /* note: EOF is used as sentinel, so must be <=0 and last entry in table */ ! 400: ! 401: static struct LXDOPE *lxcp[CSSZ+1]; ! 402: ! 403: void ! 404: lex_init() ! 405: { ! 406: register struct LXDOPE *p; ! 407: register i; ! 408: register char *cp; ! 409: /* set up character classes */ ! 410: ! 411: /* first clear lexmask */ ! 412: for(i=0; i<=CSSZ; i++) lxmask[i] = 0; ! 413: ! 414: #ifdef apollo ! 415: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$", LEXLET ); ! 416: #else ! 417: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", LEXLET ); ! 418: #endif ! 419: lxenter( "0123456789", LEXDIG ); ! 420: lxenter( "0123456789abcdefABCDEF", LEXHEX ); ! 421: /* \013 should become \v someday; \013 is OK for ASCII and EBCDIC */ ! 422: lxenter( " \t\r\b\f\013", LEXWS ); ! 423: lxmask['.'+1] |= LEXDOT; ! 424: ! 425: /* make lxcp point to appropriate lxdope entry for each character */ ! 426: ! 427: /* initialize error entries */ ! 428: ! 429: for( i= 0; i<=CSSZ; ++i ) lxcp[i] = lxdope; ! 430: ! 431: /* make unique entries */ ! 432: ! 433: for( p=lxdope; ; ++p ) { ! 434: lxcp[p->lxch+1] = p; ! 435: if( p->lxch < 0 ) break; ! 436: } ! 437: ! 438: /* handle letters, digits, and whitespace */ ! 439: /* by convention, first, second, and third places */ ! 440: ! 441: cp = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; ! 442: while( *cp ) lxcp[*cp++ + 1] = &lxdope[1]; ! 443: cp = "123456789"; ! 444: while( *cp ) lxcp[*cp++ + 1] = &lxdope[2]; ! 445: cp = "\t\b\r\f\013"; ! 446: while( *cp ) lxcp[*cp++ + 1] = &lxdope[3]; ! 447: ! 448: file_name = src_file_name; ! 449: // set both curloc and tloc so curloc is valid at program startup ! 450: // curloc.file = tloc.file = 0; ! 451: curloc.line = tloc.line = 1; ! 452: ! 453: ktbl_init(); ! 454: lex_clear(); ! 455: saved = lxtitle(); ! 456: } ! 457: ! 458: void lex_clear() ! 459: { ! 460: // delete extra buffers: ! 461: buf* p = bufhead; ! 462: bufhead = 0; ! 463: //if (p) { ! 464: //fprintf(stderr,"lex_clear\n"); ! 465: //bufs=0; ! 466: //} ! 467: while (p) { ! 468: buf* pp = p; ! 469: p = p->next; ! 470: pp->next = freebuf; ! 471: freebuf = pp; ! 472: } ! 473: ! 474: // re-set to static buffer: ! 475: txtstart = txtfree = inbuf; ! 476: txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1]; ! 477: } ! 478: ! 479: int int_val(char hex) ! 480: { ! 481: switch (hex) { ! 482: case '0': case '1': case '2': case '3': case '4': ! 483: case '5': case '6': case '7': case '8': case '9': ! 484: return hex-'0'; ! 485: case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': ! 486: return hex-'a'+10; ! 487: case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': ! 488: return hex-'A'+10; ! 489: } ! 490: } ! 491: ! 492: void hex_to_oct() ! 493: /* ! 494: \x has been seen on input (in char const or string) and \ printed ! 495: read the following hexadecimal integer and replace it with an octal ! 496: */ ! 497: { ! 498: int i = 0; ! 499: int c; ! 500: get(c); ! 501: if (lxmask[c+1] & LEXHEX) { ! 502: i = int_val(c); ! 503: get(c); // try for two ! 504: if (lxmask[c+1] & LEXHEX) { ! 505: i = (i<<4) + int_val(c); ! 506: get(c); // try for three ! 507: if (lxmask[c+1] & LEXHEX) ! 508: i = (i<<4) + int_val(c); ! 509: else ! 510: unget(c); ! 511: } ! 512: else ! 513: unget(c); ! 514: } ! 515: else { ! 516: error("hexadecimal digitE after \\x"); ! 517: unget(c); ! 518: } ! 519: ! 520: // if (0377 < i) error('l',"hexadecimal constant too large"); ! 521: i &= 0377; ! 522: ! 523: pch(('0'+(i>>6))); ! 524: pch(('0'+((i&070)>>3))); ! 525: pch(('0'+(i&7))); ! 526: } ! 527: ! 528: ! 529: char * chconst() ! 530: /* ! 531: read a character constant into inbuf ! 532: */ ! 533: { ! 534: register c; ! 535: int nch = 0; ! 536: ! 537: pch('\''); ! 538: ! 539: for(;;) { ! 540: char* p; ! 541: char cc = 0; ! 542: ! 543: switch (get(c)) { ! 544: case '\'': ! 545: goto ex; ! 546: case EOF: ! 547: error("eof in char constant"); ! 548: goto ex; ! 549: case '\n': ! 550: error("newline in char constant"); ! 551: goto ex; ! 552: case '\\': ! 553: if (SZ_INT == nch++) error('l',"char constant too long"); ! 554: pch(c); ! 555: switch (get(c)){ ! 556: case '\n': ! 557: ++tloc.line; ! 558: default: ! 559: pch(c); ! 560: break; ! 561: case '4': case '5': case '6': case '7': // octal ! 562: p = txtfree; ! 563: cc = c-4; ! 564: case '0': case '1': case '2': case '3': ! 565: pch(c); ! 566: get(c); /* try for 2 */ ! 567: if( lxmask[c+1] & LEXDIG && c<'8'){ ! 568: pch(c); ! 569: get(c); /* try for 3 */ ! 570: if (lxmask[c+1] & LEXDIG && c<'8') { ! 571: if (cc) *p = cc; // zap high bit ! 572: pch(c); ! 573: } ! 574: else ! 575: unget(c); ! 576: } ! 577: else ! 578: unget(c); ! 579: break; ! 580: case 'x': // hexadecimal ! 581: hex_to_oct(); ! 582: break; ! 583: }; ! 584: break; ! 585: default: ! 586: if (SZ_INT == nch++) error('l',"char constant too long"); ! 587: pch(c); ! 588: } ! 589: } ! 590: ex: ! 591: if(nch==0) ! 592: error("empty char constant"); ! 593: pch('\''); ! 594: pch('\0'); ! 595: return txtstart; ! 596: } ! 597: ! 598: void lxcom() ! 599: /* process a "block comment" */ ! 600: { ! 601: register c; ! 602: ! 603: for(;;) ! 604: switch (get(c)) { ! 605: case EOF: ! 606: error('w',"eof in comment"); ! 607: return; ! 608: case '\n': ! 609: tloc.line++; ! 610: // Nline++; ! 611: break; ! 612: case '*': ! 613: if (get(c) == '/') return; ! 614: unget(c); ! 615: break; ! 616: case '/': ! 617: if (get(c) == '*') error('w',"``/*'' in comment"); ! 618: unget(c); ! 619: break; ! 620: } ! 621: } ! 622: ! 623: ! 624: void linecom() ! 625: // process a "line comment" ! 626: { ! 627: register c; ! 628: ! 629: get(c); ! 630: #ifdef DBG ! 631: if ( c=='@' && get(c)=='!' ) { ! 632: while ( get(c) != '\n' && c != EOF ) pch(c); ! 633: pch('\0'); ! 634: process_debug_flags(txtstart); ! 635: del_txt(); ! 636: } ! 637: #endif ! 638: for(;;get(c)) ! 639: switch (c) { ! 640: case EOF: ! 641: error('w',"eof in comment"); ! 642: return; ! 643: case '\n': ! 644: tloc.line++; ! 645: // Nline++; ! 646: saved = lxtitle(); ! 647: return; ! 648: } ! 649: } ! 650: ! 651: char eat_whitespace() ! 652: { ! 653: ! 654: for(;;) { ! 655: register c = get(c); ! 656: lx: ! 657: ! 658: switch (c) { ! 659: case EOF: ! 660: error('w',"unexpected comment"); ! 661: return EOF; ! 662: case '/': ! 663: switch (get(c)) { ! 664: case '*': ! 665: lxcom(); ! 666: break; ! 667: case '/': ! 668: linecom(); ! 669: break; ! 670: default: ! 671: unget(c); ! 672: return '/'; ! 673: } ! 674: break; ! 675: case '\n': ! 676: ++tloc.line; ! 677: c = lxtitle(); ! 678: goto lx; ! 679: case ' ': ! 680: case '\t': ! 681: break; ! 682: default: ! 683: return c; ! 684: } ! 685: } ! 686: } ! 687: ! 688: void get_string() ! 689: { ! 690: int lxchar; ! 691: ! 692: for(;;) ! 693: switch (get(lxchar)) { ! 694: case '\\': ! 695: pch('\\'); ! 696: switch (get(lxchar)){ ! 697: case '\n': ! 698: ++tloc.line; ! 699: default: ! 700: pch(lxchar); ! 701: break; ! 702: case 'x': // hexadecimal ! 703: hex_to_oct(); ! 704: break; ! 705: }; ! 706: break; ! 707: case '"': ! 708: { char* p = txtstart; // eat_whitespace() moves txtstart ! 709: if ((lxchar = eat_whitespace()) == '"') { ! 710: // string catenation, break with ! 711: // newline to avoid merging characters ! 712: // (e.g. "\xAB" "C") ! 713: pch('\\'); ! 714: pch('\n'); ! 715: ! 716: continue; // eat '\"' and carry on ! 717: }; ! 718: ! 719: txtstart = p; ! 720: unget(lxchar); ! 721: pch(0); ! 722: return; ! 723: } ! 724: case '\n': ! 725: error("newline in string"); ! 726: pch(0); ! 727: return; ! 728: case EOF: ! 729: error("eof in string"); ! 730: pch(0); ! 731: return; ! 732: default: ! 733: pch(lxchar); ! 734: } ! 735: } ! 736: ! 737: TOK tlex() ! 738: { ! 739: TOK ret; ! 740: Pname n; ! 741: ! 742: // Ntoken++; ! 743: ! 744: for(;;) { ! 745: register lxchar; ! 746: register struct LXDOPE *p; ! 747: ! 748: start_txt(); ! 749: ! 750: if (saved) { ! 751: lxchar = saved; ! 752: saved = 0; ! 753: } ! 754: else ! 755: get(lxchar); ! 756: ! 757: if (lxchar+1 >= CSSZ ) ! 758: error( "illegal input character encountered: %d", lxchar ); ! 759: ! 760: switch( (p=lxcp[lxchar+1])->lxact ){ ! 761: ! 762: case A_1C: // eat up a single character, and return an opcode ! 763: reti(p->lxtok,p->lxtok); ! 764: ! 765: case A_EOF: ! 766: if (p_level || b_level+lcount) ! 767: error("'%s' missing at end of input",(b_level+lcount) ? "}" : ")"); ! 768: ! 769: reti(EOFTOK,0); ! 770: ! 771: case A_SHARP: ! 772: // cope with header file not ended with '\n' ! 773: unget('#'); ! 774: saved = lxtitle(); ! 775: continue; ! 776: ! 777: case A_ERR: ! 778: { if (' '<=lxchar && lxchar<='~') // ASCII printable ! 779: error("illegal character '%c' (ignored)",lxchar); ! 780: else ! 781: error("illegal character '0%o' (ignored)",lxchar); ! 782: continue; ! 783: } ! 784: ! 785: case A_DOLL: ! 786: { // lex a name of the for $id for template tree formals ! 787: Pname fn ; ! 788: lxget( lxchar, LEXLET|LEXDIG ) ; ! 789: ! 790: if (!templp->in_progress || !txtstart[1]) { ! 791: // no name string immediately follows, treat it ! 792: // like an illegal character ! 793: error("illegal character '0%o' (ignored)",lxchar); ! 794: continue; ! 795: } ! 796: ! 797: txtstart++ ; ! 798: if(fn=templ_compilation::tree_parameter(txtstart)) { ! 799: switch (fn->n_template_arg) { ! 800: case template_expr_tree_formal: ! 801: // retain the $ in the name ! 802: retn(ID, strdup(--txtstart)) ; ! 803: ! 804: case template_stmt_tree_formal: ! 805: retn(SM_PARAM, fn) ; ! 806: } ! 807: } ! 808: error("%s wasn't a statement or expression formal", txtstart); ! 809: rets(ID, copy_if_need_be(txtstart)); ! 810: } ! 811: ! 812: case A_LET: // collect an identifier and check for keyword ! 813: { ! 814: char ll; ! 815: switch (ll = lxchar) { ! 816: // case 'l': ! 817: case 'L': ! 818: switch (get(lxchar)) { ! 819: case '\'': ! 820: error('s',"wide character constant"); ! 821: unget(lxchar); ! 822: continue; ! 823: case '"': ! 824: error('s',"wide character string"); ! 825: unget(lxchar); ! 826: continue; ! 827: } ! 828: unget(lxchar); ! 829: lxchar = ll; ! 830: } ! 831: } ! 832: ! 833: lxget( lxchar, LEXLET|LEXDIG ); ! 834: /* look for a keyword or a global type */ ! 835: if ((n = keyword_table->look(txtstart,0)) /* keyword */ ! 836: || (n = ktbl->look(txtstart, 0))) /* local type */ ! 837: { ! 838: TOK x; ! 839: del_txt(); ! 840: switch (x=n->base) { ! 841: case TNAME: ! 842: rets(ID,n->string); ! 843: case LOC: ! 844: retl(n->syn_class); ! 845: case EXTERN: ! 846: if ((lxchar = eat_whitespace()) == '\"') { ! 847: // linkage directive ! 848: get_string(); ! 849: rets(LINKAGE,txtstart); ! 850: } ! 851: unget(lxchar); ! 852: reti(TYPE,EXTERN); ! 853: case CATCH: ! 854: // case TEMPLATE: ! 855: error('s',"%k",n->syn_class); ! 856: continue; ! 857: case TRY: ! 858: { ! 859: static int warn_try; ! 860: if (!warn_try) { ! 861: Pname n = keyword_table->look("try",0); ! 862: n->n_key = DEFAULT; ! 863: error('w',&tloc,"%k is a future reserved keyword",n->syn_class); ! 864: warn_try++; ! 865: } ! 866: rets(ID,n->string); ! 867: } ! 868: default: ! 869: reti(n->syn_class,x); ! 870: } ! 871: } ! 872: // rets(ID,txtstart); ! 873: rets(ID, copy_if_need_be(txtstart)) ; ! 874: ! 875: case A_DIG: ! 876: ! 877: ret = ICON; ! 878: ! 879: if (lxchar=='0') { ! 880: int pkchar; ! 881: get(pkchar); ! 882: if(pkchar=='x' || pkchar=='X') { // hex ! 883: pch(lxchar); ! 884: lxget(pkchar,LEXHEX); ! 885: txtfree--; ! 886: if (txtfree-txtstart<3) // minimum "0Xd\0" ! 887: error("hex digitX after \"0x\""); ! 888: get(lxchar); ! 889: goto getsuffix; ! 890: } ! 891: unget(pkchar); ! 892: } ! 893: ! 894: lxget(lxchar,LEXDIG); ! 895: txtfree--; ! 896: ! 897: if (get(lxchar) == '.') { ! 898: getfp: ! 899: lxget('.', LEXDIG ); ! 900: txtfree--; ! 901: ret = FCON; ! 902: get(lxchar); ! 903: }; ! 904: ! 905: if (lxchar=='e' || lxchar=='E') { ! 906: pch(lxchar); ! 907: get(lxchar); ! 908: if(lxchar=='-' || lxchar=='+') { ! 909: pch(lxchar); ! 910: get(lxchar); ! 911: } ! 912: if (lxmask[lxchar+1] & LEXDIG) { ! 913: lxget( lxchar, LEXDIG ); ! 914: txtfree--; ! 915: get(lxchar); ! 916: } ! 917: else ! 918: error("missing exponent digits?"); ! 919: ret = FCON; ! 920: }; ! 921: ! 922: if(*txtstart=='0' && ret==ICON) { ! 923: char *bch = txtstart; ! 924: while (++bch <= txtfree) { ! 925: if(*bch=='8' || *bch=='9') ! 926: error("%c used as octal digit",*bch); ! 927: } ! 928: } ! 929: ! 930: getsuffix: ! 931: switch (lxchar) { ! 932: case 'f': ! 933: case 'F': ! 934: if (ret==ICON) ! 935: error("%c suffix for integer constant",lxchar); ! 936: else ! 937: pch(lxchar); ! 938: break; ! 939: case 'u': ! 940: case 'U': ! 941: if (ret==FCON) { ! 942: error("%c suffix for floating constant",lxchar); ! 943: break; ! 944: } ! 945: pch(lxchar); ! 946: switch(get(lxchar)) { ! 947: case 'l': ! 948: case 'L': ! 949: pch(lxchar); ! 950: break; ! 951: default: ! 952: saved=lxchar; ! 953: break; ! 954: } ! 955: break; ! 956: case 'l': ! 957: case 'L': ! 958: pch(lxchar); ! 959: if (ret==FCON) { ! 960: break; ! 961: } ! 962: switch(get(lxchar)) { ! 963: case 'u': ! 964: case 'U': ! 965: pch(lxchar); ! 966: break; ! 967: default: ! 968: saved=lxchar; ! 969: break; ! 970: } ! 971: break; ! 972: default: ! 973: saved = lxchar; ! 974: break; ! 975: }; ! 976: ! 977: if(*txtstart=='0' && txtfree-txtstart==1) ! 978: reti(ZERO,0); // plain zero ! 979: ! 980: pch(0); ! 981: rets(ret,txtstart); ! 982: ! 983: ! 984: case A_DOT: ! 985: switch (get(lxchar)) { ! 986: case '.': // look for ellipsis ! 987: if (get(lxchar) != '.') { ! 988: error("token .. ?"); ! 989: saved = lxchar; ! 990: } ! 991: reti(ELLIPSIS,0); ! 992: case '*': ! 993: reti (REFMUL,DOT); ! 994: } ! 995: ! 996: if (lxmask[lxchar+1] & LEXDIG){// look for floating constant ! 997: unget(lxchar); ! 998: goto getfp; ! 999: } ! 1000: saved = lxchar; ! 1001: reti(DOT,0); ! 1002: ! 1003: case A_STR: ! 1004: /* save string constant in buffer */ ! 1005: get_string(); ! 1006: rets(STRING,txtstart); ! 1007: ! 1008: case A_CC: ! 1009: /* character constant */ ! 1010: rets(CCON,chconst()); ! 1011: ! 1012: case A_BCD: ! 1013: { ! 1014: register i; ! 1015: int j; ! 1016: ! 1017: pch('`'); ! 1018: ! 1019: for (i=0; i<7; ++i) { ! 1020: pch(get(j)); ! 1021: if (j == '`' ) break; ! 1022: } ! 1023: pch(0); ! 1024: if (6<i) ! 1025: error('l',"bcd constant exceeds 6 characters" ); ! 1026: rets(CCON,txtstart); ! 1027: } ! 1028: ! 1029: case A_SL: /* / */ ! 1030: switch (get(lxchar)) { ! 1031: case '*': ! 1032: lxcom(); ! 1033: break; ! 1034: case '/': ! 1035: linecom(); ! 1036: break; ! 1037: case '=': ! 1038: reti(ASOP,ASDIV); ! 1039: default: ! 1040: saved = lxchar; ! 1041: reti(DIVOP,DIV); ! 1042: } ! 1043: ! 1044: case A_WS: ! 1045: continue; ! 1046: ! 1047: case A_NL: ! 1048: ++tloc.line; ! 1049: // Nline++; ! 1050: saved = lxtitle(); ! 1051: continue; ! 1052: ! 1053: case A_LC: ! 1054: if (BLMAX <= b_level++) { ! 1055: error('l',"blocks too deeply nested"); ! 1056: ext(3); ! 1057: } ! 1058: retl(LC); ! 1059: ! 1060: case A_RC: ! 1061: if (lcount+b_level-- <= 0) { ! 1062: error("unexpected '}'"); ! 1063: b_level = 0; ! 1064: } ! 1065: retl(RC); ! 1066: ! 1067: case A_L: ! 1068: p_level++; ! 1069: reti(LP,0); ! 1070: ! 1071: case A_R: ! 1072: if (p_level-- <= 0) { ! 1073: error("unexpected ')'"); ! 1074: p_level = 0; ! 1075: } ! 1076: reti(RP,0); ! 1077: ! 1078: case A_ASS: ! 1079: switch (get(lxchar)) { ! 1080: case '=': ! 1081: reti(EQUOP,EQ); ! 1082: default: ! 1083: saved = lxchar; ! 1084: reti(ASSIGN,ASSIGN); ! 1085: } ! 1086: ! 1087: case A_COL: ! 1088: switch (get(lxchar)) { ! 1089: case ':': ! 1090: reti(MEM,0); ! 1091: case '=': ! 1092: error("':=' is not a c++ operator"); ! 1093: reti(ASSIGN,ASSIGN); ! 1094: default: ! 1095: saved = lxchar; ! 1096: reti(COLON,COLON); ! 1097: } ! 1098: case A_NOT: ! 1099: switch (get(lxchar)) { ! 1100: case '=': ! 1101: reti(EQUOP,NE); ! 1102: default: ! 1103: saved = lxchar; ! 1104: reti(NOT,NOT); ! 1105: } ! 1106: case A_GT: ! 1107: switch(get(lxchar)) { ! 1108: case '>': ! 1109: switch (get(lxchar)) { ! 1110: case '=': ! 1111: reti(ASOP,ASRS); ! 1112: break; ! 1113: default: ! 1114: saved = lxchar; ! 1115: reti(SHIFTOP,RS); ! 1116: } ! 1117: case '=': ! 1118: reti(RELOP,GE); ! 1119: default: ! 1120: saved = lxchar; ! 1121: reti(GT,GT); ! 1122: } ! 1123: case A_LT: ! 1124: switch (get(lxchar)) { ! 1125: case '<': ! 1126: switch (get(lxchar)) { ! 1127: case '=': ! 1128: reti(ASOP,ASLS); ! 1129: default: ! 1130: saved = lxchar; ! 1131: reti(SHIFTOP,LS); ! 1132: } ! 1133: case '=': ! 1134: reti(RELOP,LE); ! 1135: default: ! 1136: saved = lxchar; ! 1137: reti(LT,LT); ! 1138: } ! 1139: case A_AND: ! 1140: switch (get(lxchar)) { ! 1141: case '&': ! 1142: reti(ANDAND,ANDAND); ! 1143: case '=': ! 1144: reti(ASOP,ASAND); ! 1145: default: ! 1146: saved = lxchar; ! 1147: reti(AND,AND); ! 1148: } ! 1149: case A_OR: ! 1150: switch (get(lxchar)) { ! 1151: case '|': ! 1152: reti(OROR,OROR); ! 1153: case '=': ! 1154: reti(ASOP,ASOR); ! 1155: default: ! 1156: saved = lxchar; ! 1157: reti(OR,OR); ! 1158: } ! 1159: case A_ER: ! 1160: switch (get(lxchar)) { ! 1161: case '=': ! 1162: reti(ASOP,ASER); ! 1163: default: ! 1164: saved = lxchar; ! 1165: reti(ER,ER); ! 1166: } ! 1167: case A_PL: ! 1168: switch (get(lxchar)) { ! 1169: case '=': ! 1170: reti(ASOP,ASPLUS); ! 1171: case '+': ! 1172: reti(ICOP,INCR); ! 1173: default: ! 1174: saved = lxchar; ! 1175: reti(PLUS,PLUS); ! 1176: } ! 1177: case A_MIN: ! 1178: switch (get(lxchar)) { ! 1179: case '=': ! 1180: reti(ASOP,ASMINUS); ! 1181: case '-': ! 1182: reti(ICOP,DECR); ! 1183: case '>': ! 1184: if (get(lxchar) == '*') ! 1185: {reti(REFMUL,REF);} ! 1186: else ! 1187: saved = lxchar; ! 1188: reti(REF,REF); ! 1189: default: ! 1190: saved = lxchar; ! 1191: reti(MINUS,MINUS); ! 1192: } ! 1193: case A_MUL: ! 1194: switch (get(lxchar)) { ! 1195: case '=': ! 1196: reti(ASOP,ASMUL); ! 1197: case '/': ! 1198: error('w',"*/ not as end of comment"); ! 1199: default: ! 1200: saved = lxchar; ! 1201: reti(MUL,MUL); ! 1202: } ! 1203: case A_MOD: ! 1204: switch (get(lxchar)) { ! 1205: case '=': ! 1206: reti(ASOP,ASMOD); ! 1207: default: ! 1208: saved = lxchar; ! 1209: reti(DIVOP,MOD); ! 1210: } ! 1211: default: ! 1212: {error('i',"lex act==%d getc()->%d",p,lxchar);} ! 1213: ! 1214: } ! 1215: ! 1216: error('i',"lex, main switch"); ! 1217: } ! 1218: ! 1219: } ! 1220: ! 1221: int lxtitle() ! 1222: /* ! 1223: called after a newline; set linenumber and file name ! 1224: */ ! 1225: { ! 1226: register c; ! 1227: ! 1228: for(;;) ! 1229: switch ( get(c) ) { ! 1230: default: // e.g. not '\n', not '#' ! 1231: return c; ! 1232: case '\n': ! 1233: tloc.line++; ! 1234: // Nline++; ! 1235: ll: ! 1236: break; ! 1237: case '#': /* # lineno "filename" */ ! 1238: { int cl = tloc.line; ! 1239: tloc.line = 0; ! 1240: for(;;) ! 1241: switch (get(c)) { ! 1242: case '"': ! 1243: start_txt(); ! 1244: for(;;) ! 1245: switch (get(c)) { ! 1246: case '"': ! 1247: pch('\0'); ! 1248: ! 1249: while (get(c) != '\n') ; // skip to eol.. ignore anything more ! 1250: ! 1251: if (*txtstart) { // stack file name ! 1252: if (curr_file == 0){ ! 1253: char* p1 = new char[txtfree-txtstart + 1]; ! 1254: (void) strcpy(p1,txtstart); ! 1255: file_name = p1; ! 1256: } ! 1257: else { //&& line is dummy #line "input.c" ! 1258: // ignore ! 1259: } ! 1260: del_txt(); ! 1261: goto ll; ! 1262: } ! 1263: case '\n': ! 1264: error("unexpected end of line on '# line'"); ! 1265: default: ! 1266: pch(c); ! 1267: } ! 1268: case ' ': ! 1269: break; ! 1270: ! 1271: case '0': ! 1272: case '1': ! 1273: case '2': ! 1274: case '3': ! 1275: case '4': ! 1276: case '5': ! 1277: case '6': ! 1278: case '7': ! 1279: case '8': ! 1280: case '9': ! 1281: tloc.line = tloc.line*10+c-'0'; ! 1282: break; ! 1283: ! 1284: case 'l': // look for "#line ..." and then ignore "line" ! 1285: if (get(c)=='i' && get(c)=='n' && get(c)=='e') break; ! 1286: case '\n': ! 1287: tloc.putline(); ! 1288: goto ll; ! 1289: ! 1290: default: // pass #rubbish through ! 1291: tloc.line = cl; ! 1292: pch('#'); ! 1293: pch(c); ! 1294: while (get(c) != '\n') pch(c); ! 1295: pch('\0'); ! 1296: fprintf(out_file,"\n%s\n",txtstart); ! 1297: start_txt(); ! 1298: tloc.line++; ! 1299: // Nline++; ! 1300: goto ll; ! 1301: } ! 1302: } ! 1303: } ! 1304: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.