|
|
1.1 ! root 1: /*ident "@(#)ctrans:src/lex.c 1.5" */ ! 2: /*************************************************************************** ! 3: ! 4: C++ source for cfront, the C++ compiler front-end ! 5: written in the computer science research center of Bell Labs ! 6: ! 7: Copyright (c) 1984 AT&T, Inc. All Rights Reserved ! 8: THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T, INC. ! 9: ! 10: lex.c: ! 11: lexical analyser based on pcc's and cpre's scanners ! 12: modified to handle classes: ! 13: new keywords: class ! 14: public ! 15: call ! 16: etc. ! 17: names are not entered in the symbol table by lex() ! 18: names can be of arbitrary length ! 19: error() is used to report errors ! 20: {} and () must match ! 21: numeric constants are not converted into internal representation ! 22: but stored as strings ! 23: ! 24: ****************************************************************************/ ! 25: ! 26: #include "cfront.h" ! 27: #include "yystype.h" ! 28: #include "size.h" ! 29: #include "tqueue.h" ! 30: #include "template.h" ! 31: ! 32: # define CCTRANS(x) x ! 33: ! 34: #ifndef NULL ! 35: #define NULL 0 ! 36: #endif ! 37: ! 38: char* strdup(const char* s1) ! 39: /* string duplication ! 40: returns pointer to a new string which is the duplicate of string ! 41: pointed to by s1 ! 42: NULL is returned if new string can't be created ! 43: */ ! 44: { ! 45: char * s2; ! 46: ! 47: s2 = malloc((unsigned) strlen(s1)+1) ; ! 48: return(s2==NULL ? NULL : strcpy(s2,s1) ); ! 49: } ! 50: ! 51: #define copy_if_need_be(s) ((templp->in_progress || templp->parameters_in_progress) ? strdup(s) : (s)) ! 52: ! 53: /* lexical actions */ ! 54: ! 55: #define A_ERR 0 /* illegal character */ ! 56: #define A_LET 1 /* saw a letter */ ! 57: #define A_DIG 2 /* saw a digit */ ! 58: #define A_1C 3 /* return a single character */ ! 59: #define A_STR 4 /* string */ ! 60: #define A_CC 5 /* character constant */ ! 61: #define A_BCD 6 /* GCOS BCD constant */ ! 62: #define A_SL 7 /* saw a / */ ! 63: #define A_DOT 8 /* saw a . */ ! 64: #define A_2C 9 /* possible two character symbol */ ! 65: #define A_WS 10 /* whitespace (not \n) */ ! 66: #define A_NL 11 /* \n */ ! 67: #define A_LC 12 /* { */ ! 68: #define A_RC 13 /* } */ ! 69: #define A_L 14 /* ( */ ! 70: #define A_R 15 /* ) */ ! 71: #define A_EOF 16 ! 72: #define A_ASS 17 ! 73: #define A_LT 18 ! 74: #define A_GT 19 /* > */ ! 75: #define A_ER 20 ! 76: #define A_OR 21 ! 77: #define A_AND 22 ! 78: #define A_MOD 23 ! 79: #define A_NOT 24 ! 80: #define A_MIN 25 ! 81: #define A_MUL 26 ! 82: #define A_PL 27 ! 83: #define A_COL 28 /* : */ ! 84: #define A_SHARP 29 /* # */ ! 85: #define A_DOLL 30 /* $ */ ! 86: ! 87: /* character classes */ ! 88: ! 89: # define LEXLET 01 ! 90: # define LEXDIG 02 ! 91: /* no LEXOCT because 8 and 9 used to be octal digits */ ! 92: # define LEXHEX 010 ! 93: # define LEXWS 020 ! 94: # define LEXDOT 040 ! 95: ! 96: const FIRSTCHUNK = 8*1024-8; ! 97: const BUFCHUNK = 4*1024-8; ! 98: ! 99: /* text buffer */ ! 100: static char inbuf[FIRSTCHUNK/*TBUFSZ*/]; ! 101: static char* txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1]; ! 102: static char* txtstart = 0; ! 103: static char* txtfree = 0; ! 104: ! 105: static struct buf* bufhead; ! 106: static buf* freebuf; ! 107: //static bufs; ! 108: ! 109: struct buf { ! 110: buf* next; ! 111: char chars[BUFCHUNK]; ! 112: // buf() { next=bufhead; bufhead=this; } ! 113: }; ! 114: ! 115: new_buf(char c) ! 116: { ! 117: //fprintf(stderr,"new_buf %d\n",bufs++); ! 118: buf* pbuf; ! 119: if (freebuf) { ! 120: pbuf = freebuf; ! 121: freebuf = freebuf->next; ! 122: } ! 123: else ! 124: pbuf = new buf; // allocate and register new chunk ! 125: pbuf->next = bufhead; ! 126: bufhead = pbuf; ! 127: ! 128: if (BUFCHUNK < txtmax-txtstart) error('l',"lexical token too long"); ! 129: ! 130: // copy current token: ! 131: char* p = txtstart; ! 132: txtstart = txtfree = &pbuf->chars[0]; ! 133: while (p<txtmax) *txtfree++ = *p++; ! 134: *txtfree++=c; ! 135: txtmax = &pbuf->chars[BUFCHUNK-1]; ! 136: return 0; ! 137: } ! 138: ! 139: ! 140: #define pch(c) ((txtmax<=txtfree)?new_buf(c):(*txtfree++=c)) ! 141: #define start_txt() txtstart = txtfree ! 142: #define del_txt() txtfree = txtstart ! 143: ! 144: static int Nfile;// = 1; ! 145: static char* file_name[MAXFILE*4]; // source file names ! 146: // file_name[0] == src_file_name ! 147: // file_name[0] == 0 means stdin ! 148: static short file_stack[MAXFILE]; // stack of file name indices ! 149: static int tcurr_file; // current index in file_stack ! 150: // that is current #include nest level ! 151: ! 152: Linkage linkage = linkage_default; // linkage is default C++ ! 153: const LINKMAX = 10; ! 154: static Linkage lvec[LINKMAX] = { linkage_default }; ! 155: static int lcount = 0; ! 156: ! 157: void set_linkage(char* p) ! 158: { ! 159: if (p==0 || *p == 0) { // resume previous linkage ! 160: if (lcount>0) linkage = lvec[--lcount]; ! 161: } ! 162: else { ! 163: if (LINKMAX<=++lcount) { ! 164: error('l',"linkage directive nested too deep"); ! 165: --lcount; ! 166: } else if (strcmp(p,"C")==0) ! 167: lvec[lcount] = linkage = linkage_C; ! 168: else if (strcmp(p,"C++")==0) ! 169: lvec[lcount] = linkage = linkage_Cplusplus; ! 170: else { ! 171: error("%s linkage",p); ! 172: --lcount; ! 173: } ! 174: } ! 175: } ! 176: ! 177: static struct loc tloc; ! 178: FILE * out_file = stdout; ! 179: FILE * in_file = stdin; ! 180: Ptable ktbl; ! 181: Ptable keyword_table; ! 182: ! 183: static int p_level = 0; /* number of unmatched ``(''s */ ! 184: static int b_level = 0; /* number of unmatched ``{''s */ ! 185: ! 186: # ifdef ibm ! 187: ! 188: # define CSMASK 0377 ! 189: # define CSSZ 256 ! 190: ! 191: # else ! 192: ! 193: # define CSMASK 0177 ! 194: # define CSSZ 128 ! 195: ! 196: # endif ! 197: ! 198: static short lxmask[CSSZ+1]; ! 199: ! 200: int saved = 0; /* putback character, avoid ungetchar */ ! 201: static int lxtitle(); ! 202: ! 203: // overload rt; ! 204: inline YYSTYPE rt(char* x) { YYSTYPE y; y.s = x; return y; } ! 205: inline YYSTYPE rt(int x) { YYSTYPE y; y.t = x; return y; } ! 206: inline YYSTYPE rt(loc x) { YYSTYPE y; y.l = x; return y; } ! 207: inline YYSTYPE rt(void* x) { YYSTYPE y; y.pn = Pname(x); return y; } ! 208: ! 209: #define get(c) (c=getc(in_file)) ! 210: #define unget(c) ungetc(c,in_file) ! 211: ! 212: #define reti(a,b) { addtok(a, rt(b), tloc); return a; } ! 213: #define retn(a,b) { addtok(a, rt((Pnode)b), tloc); return a; } ! 214: #define rets(a,b) { addtok(a, rt(b), tloc); return a; } ! 215: #define retl(a) { addtok(a, rt(tloc), tloc); return a; } ! 216: ! 217: // keys[] holds the external form for tokens with fixed representation ! 218: // illegal tokens and those with variable representation have 0 entries ! 219: char* keys[MAXTOK+1]; ! 220: ! 221: static void ! 222: new_key(char* s, TOK toknum, TOK yyclass) ! 223: /* ! 224: make "s" a new keyword with the representation (token) "toknum" ! 225: "yyclass" is the yacc token (for example new_key("int",INT,TYPE); ) ! 226: "yyclass==0" means yyclass=toknum; ! 227: */ ! 228: { ! 229: Pname n = new name(s); ! 230: ! 231: keys[(toknum==LOC)?yyclass:toknum] = s; ! 232: n = new name(s); ! 233: Pname nn = keyword_table->insert(n,0); ! 234: // if (Nold) error('i',"keyword %sD twice",s); ! 235: nn->base = toknum; ! 236: nn->syn_class = (yyclass) ? yyclass : toknum; ! 237: delete n; ! 238: } ! 239: ! 240: const int keyword_count = 67; ! 241: static void ! 242: ktbl_init() ! 243: /* ! 244: enter keywords into keyword table for use by lex() ! 245: and into keyword representation table used for output ! 246: ! 247: ktbl is only for types. We put nothing in it. ! 248: keyword_table is for user-defined reserved words ! 249: */ ! 250: { ! 251: ktbl = new table(KTBLSIZE,0,0); ! 252: keyword_table = new table(keyword_count,0,0); ! 253: ! 254: new_key("asm",ASM,0); ! 255: new_key("auto",AUTO,TYPE); ! 256: new_key("break",LOC,BREAK); ! 257: new_key("case",LOC,CASE); ! 258: new_key("continue",LOC,CONTINUE); ! 259: new_key("char",CHAR,TYPE); ! 260: new_key("do",LOC,DO); ! 261: new_key("double",DOUBLE,TYPE); ! 262: new_key("default",LOC,DEFAULT); ! 263: new_key("enum",ENUM,0); ! 264: new_key("else",LOC,ELSE); ! 265: new_key("extern",EXTERN,TYPE); ! 266: new_key("float",FLOAT,TYPE); ! 267: new_key("for",LOC,FOR); ! 268: new_key("goto",LOC,GOTO); ! 269: new_key("catch",CATCH,CATCH); ! 270: new_key("try",TRY,TRY); ! 271: new_key("if",LOC,IF); ! 272: new_key("int",INT,TYPE); ! 273: new_key("long",LONG,TYPE); ! 274: new_key("return",LOC,RETURN); ! 275: new_key("register",REGISTER,TYPE); ! 276: new_key("static",STATIC,TYPE); ! 277: new_key("struct",STRUCT,AGGR); ! 278: new_key("sizeof",SIZEOF,0); ! 279: new_key("short",SHORT,TYPE); ! 280: new_key("switch",LOC,SWITCH); ! 281: new_key("typedef",TYPEDEF,TYPE); ! 282: new_key("unsigned",UNSIGNED,TYPE); ! 283: new_key("union",UNION,AGGR); ! 284: new_key("void",VOID,TYPE); ! 285: new_key("while",LOC,WHILE); ! 286: ! 287: new_key("class",CLASS,AGGR); ! 288: new_key("const",CONST,TYPE); ! 289: new_key("delete",LOC,DELETE); ! 290: new_key("friend",FRIEND,TYPE); ! 291: new_key("inline",INLINE,TYPE); ! 292: new_key("new",NEW,0); ! 293: new_key("operator",OPERATOR,0); ! 294: new_key("overload",OVERLOAD,TYPE); ! 295: new_key("private",PRIVATE,PR); ! 296: new_key("protected",PROTECTED,PR); ! 297: new_key("public",PUBLIC,PR); ! 298: new_key("signed",SIGNED,TYPE); ! 299: new_key("template",TEMPLATE,0); ! 300: new_key("this",THIS,0); ! 301: new_key("virtual",VIRTUAL,TYPE); ! 302: new_key("volatile",VOLATILE,TYPE); ! 303: ! 304: new_key("__statement", STATEMENT, 0) ; ! 305: new_key("__expression", EXPRESSION, 0) ; ! 306: new_key("__template_test", TEMPLATE_TEST, 0) ; ! 307: } ! 308: ! 309: loc last_line; ! 310: loc noloc = { 0, 0 }; ! 311: ! 312: void loc::putline() ! 313: { ! 314: if (file==0 && line==0) return; ! 315: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4 ! 316: // if (0<=file && file<MAXFILE) { ! 317: if ( 0<=file && file <= Nfile ) { ! 318: char* f = file_name[file]; ! 319: if (f==0) f = src_file_name; ! 320: fprintf(out_file,line_format,line,f); ! 321: last_line = *this; ! 322: } ! 323: } ! 324: ! 325: void loc::put(FILE* p) ! 326: { ! 327: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4 ! 328: // if (0<=file && file<MAXFILE) { ! 329: if ( 0<=file && file <= Nfile ) { ! 330: char* f = file_name[file]; ! 331: if (f==0) f = src_file_name; ! 332: fprintf(p,"\"%s\", line %d: ",f,line); ! 333: } ! 334: } ! 335: ! 336: void lxenter(register char* s, short m) ! 337: /* enter a mask into lxmask */ ! 338: { ! 339: register c; ! 340: ! 341: while( c= *s++ ) lxmask[c+1] |= m; ! 342: ! 343: } ! 344: ! 345: ! 346: void lxget(register c, register m) ! 347: /* ! 348: put 'c' back then scan for members of character class 'm' ! 349: terminate the string read with \0 ! 350: txtfree points to the character position after that \0 ! 351: */ ! 352: { ! 353: pch(c); ! 354: while ( (get(c), lxmask[c+1]&m) ) pch(c); ! 355: unget(c); ! 356: pch('\0'); ! 357: } ! 358: ! 359: struct LXDOPE { ! 360: short lxch; /* the character */ ! 361: short lxact; /* the action to be performed */ ! 362: TOK lxtok; /* the token number to be returned */ ! 363: } lxdope[] = { ! 364: #ifdef apollo ! 365: '@', A_ERR, 0, /* illegal characters go here... */ ! 366: #else ! 367: '$', A_DOLL, 0, ! 368: // '$', A_ERR, 0, /* illegal characters go here... */ ! 369: #endif ! 370: '_', A_LET, 0, /* letters point here */ ! 371: '0', A_DIG, 0, /* digits point here */ ! 372: ' ', A_WS, 0, /* whitespace goes here */ ! 373: '\n', A_NL, 0, ! 374: '"', A_STR, 0, /* character string */ ! 375: '\'', A_CC, 0, /* ASCII character constant */ ! 376: '`', A_BCD, 0, /* 'foreign' character constant, e.g. BCD */ ! 377: '(', A_L, LP, ! 378: ')', A_R, RP, ! 379: '{', A_LC, LC, ! 380: '}', A_RC, RC, ! 381: '[', A_1C, LB, ! 382: ']', A_1C, RB, ! 383: '*', A_MUL, MUL, ! 384: '?', A_1C, QUEST, ! 385: ':', A_COL, COLON, ! 386: '+', A_PL, PLUS, ! 387: '-', A_MIN, MINUS, ! 388: '/', A_SL, DIV, ! 389: '%', A_MOD, MOD, ! 390: '&', A_AND, AND, ! 391: '|', A_OR, OR, ! 392: '^', A_ER, ER, ! 393: '!', A_NOT, NOT, ! 394: '~', A_1C, COMPL, ! 395: ',', A_1C, CM, ! 396: ';', A_1C, SM, ! 397: '.', A_DOT, DOT, ! 398: '<', A_LT, LT, ! 399: '>', A_GT, GT, ! 400: '=', A_ASS, ASSIGN, ! 401: '#', A_SHARP, 0, ! 402: EOF, A_EOF, EOFTOK ! 403: }; ! 404: /* note: EOF is used as sentinel, so must be <=0 and last entry in table */ ! 405: ! 406: static struct LXDOPE *lxcp[CSSZ+1]; ! 407: ! 408: void ! 409: lex_init() ! 410: { ! 411: register struct LXDOPE *p; ! 412: register i; ! 413: register char *cp; ! 414: /* set up character classes */ ! 415: ! 416: /* first clear lexmask */ ! 417: for(i=0; i<=CSSZ; i++) lxmask[i] = 0; ! 418: ! 419: #ifdef apollo ! 420: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$", LEXLET ); ! 421: #else ! 422: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", LEXLET ); ! 423: #endif ! 424: lxenter( "0123456789", LEXDIG ); ! 425: lxenter( "0123456789abcdefABCDEF", LEXHEX ); ! 426: /* \013 should become \v someday; \013 is OK for ASCII and EBCDIC */ ! 427: lxenter( " \t\r\b\f\013", LEXWS ); ! 428: lxmask['.'+1] |= LEXDOT; ! 429: ! 430: /* make lxcp point to appropriate lxdope entry for each character */ ! 431: ! 432: /* initialize error entries */ ! 433: ! 434: for( i= 0; i<=CSSZ; ++i ) lxcp[i] = lxdope; ! 435: ! 436: /* make unique entries */ ! 437: ! 438: for( p=lxdope; ; ++p ) { ! 439: lxcp[p->lxch+1] = p; ! 440: if( p->lxch < 0 ) break; ! 441: } ! 442: ! 443: /* handle letters, digits, and whitespace */ ! 444: /* by convention, first, second, and third places */ ! 445: ! 446: cp = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; ! 447: while( *cp ) lxcp[*cp++ + 1] = &lxdope[1]; ! 448: cp = "123456789"; ! 449: while( *cp ) lxcp[*cp++ + 1] = &lxdope[2]; ! 450: cp = "\t\b\r\f\013"; ! 451: while( *cp ) lxcp[*cp++ + 1] = &lxdope[3]; ! 452: ! 453: file_name[0] = src_file_name; ! 454: // set both curloc and tloc so curloc is valid at program startup ! 455: // curloc.file = tloc.file = 0; ! 456: curloc.line = tloc.line = 1; ! 457: ! 458: ktbl_init(); ! 459: lex_clear(); ! 460: saved = lxtitle(); ! 461: } ! 462: ! 463: void lex_clear() ! 464: { ! 465: // delete extra buffers: ! 466: buf* p = bufhead; ! 467: bufhead = 0; ! 468: //if (p) { ! 469: //fprintf(stderr,"lex_clear\n"); ! 470: //bufs=0; ! 471: //} ! 472: while (p) { ! 473: buf* pp = p; ! 474: p = p->next; ! 475: pp->next = freebuf; ! 476: freebuf = pp; ! 477: } ! 478: ! 479: // re-set to static buffer: ! 480: txtstart = txtfree = inbuf; ! 481: txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1]; ! 482: } ! 483: ! 484: int int_val(char hex) ! 485: { ! 486: switch (hex) { ! 487: case '0': case '1': case '2': case '3': case '4': ! 488: case '5': case '6': case '7': case '8': case '9': ! 489: return hex-'0'; ! 490: case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': ! 491: return hex-'a'+10; ! 492: case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': ! 493: return hex-'A'+10; ! 494: } ! 495: } ! 496: ! 497: void hex_to_oct() ! 498: /* ! 499: \x has been seen on input (in char const or string) and \ printed ! 500: read the following hexadecimal integer and replace it with an octal ! 501: */ ! 502: { ! 503: int i = 0; ! 504: int c; ! 505: get(c); ! 506: if (lxmask[c+1] & LEXHEX) { ! 507: i = int_val(c); ! 508: get(c); // try for two ! 509: if (lxmask[c+1] & LEXHEX) { ! 510: i = (i<<4) + int_val(c); ! 511: get(c); // try for three ! 512: if (lxmask[c+1] & LEXHEX) ! 513: i = (i<<4) + int_val(c); ! 514: else ! 515: unget(c); ! 516: } ! 517: else ! 518: unget(c); ! 519: } ! 520: else { ! 521: error("hexadecimal digitE after \\x"); ! 522: unget(c); ! 523: } ! 524: ! 525: // if (0377 < i) error('l',"hexadecimal constant too large"); ! 526: i &= 0377; ! 527: ! 528: pch(('0'+(i>>6))); ! 529: pch(('0'+((i&070)>>3))); ! 530: pch(('0'+(i&7))); ! 531: } ! 532: ! 533: ! 534: char * chconst() ! 535: /* ! 536: read a character constant into inbuf ! 537: */ ! 538: { ! 539: register c; ! 540: int nch = 0; ! 541: ! 542: pch('\''); ! 543: ! 544: for(;;) { ! 545: char* p; ! 546: char cc = 0; ! 547: ! 548: switch (get(c)) { ! 549: case '\'': ! 550: goto ex; ! 551: case EOF: ! 552: error("eof in char constant"); ! 553: goto ex; ! 554: case '\n': ! 555: error("newline in char constant"); ! 556: goto ex; ! 557: case '\\': ! 558: if (SZ_INT == nch++) error('l',"char constant too long"); ! 559: pch(c); ! 560: switch (get(c)){ ! 561: case '\n': ! 562: ++tloc.line; ! 563: default: ! 564: pch(c); ! 565: break; ! 566: case '4': case '5': case '6': case '7': // octal ! 567: p = txtfree; ! 568: cc = c-4; ! 569: case '0': case '1': case '2': case '3': ! 570: pch(c); ! 571: get(c); /* try for 2 */ ! 572: if( lxmask[c+1] & LEXDIG && c<'8'){ ! 573: pch(c); ! 574: get(c); /* try for 3 */ ! 575: if (lxmask[c+1] & LEXDIG && c<'8') { ! 576: if (cc) *p = cc; // zap high bit ! 577: pch(c); ! 578: } ! 579: else ! 580: unget(c); ! 581: } ! 582: else ! 583: unget(c); ! 584: break; ! 585: case 'x': // hexadecimal ! 586: hex_to_oct(); ! 587: break; ! 588: }; ! 589: break; ! 590: default: ! 591: if (SZ_INT == nch++) error('l',"char constant too long"); ! 592: pch(c); ! 593: } ! 594: } ! 595: ex: ! 596: if(nch==0) ! 597: error("empty char constant"); ! 598: pch('\''); ! 599: pch('\0'); ! 600: return txtstart; ! 601: } ! 602: ! 603: void lxcom() ! 604: /* process a "block comment" */ ! 605: { ! 606: register c; ! 607: ! 608: for(;;) ! 609: switch (get(c)) { ! 610: case EOF: ! 611: error('w',"eof in comment"); ! 612: return; ! 613: case '\n': ! 614: tloc.line++; ! 615: // Nline++; ! 616: break; ! 617: case '*': ! 618: if (get(c) == '/') return; ! 619: unget(c); ! 620: break; ! 621: case '/': ! 622: if (get(c) == '*') error('w',"``/*'' in comment"); ! 623: unget(c); ! 624: break; ! 625: } ! 626: } ! 627: ! 628: ! 629: void linecom() ! 630: // process a "line comment" ! 631: { ! 632: register c; ! 633: ! 634: get(c); ! 635: #ifdef DBG ! 636: if ( c=='@' && get(c)=='!' ) { ! 637: while ( get(c) != '\n' && c != EOF ) pch(c); ! 638: pch('\0'); ! 639: process_debug_flags(txtstart); ! 640: del_txt(); ! 641: } ! 642: #endif ! 643: for(;;get(c)) ! 644: switch (c) { ! 645: case EOF: ! 646: error('w',"eof in comment"); ! 647: return; ! 648: case '\n': ! 649: tloc.line++; ! 650: // Nline++; ! 651: saved = lxtitle(); ! 652: return; ! 653: } ! 654: } ! 655: ! 656: char eat_whitespace() ! 657: { ! 658: ! 659: for(;;) { ! 660: register c = get(c); ! 661: lx: ! 662: ! 663: switch (c) { ! 664: case EOF: ! 665: error('w',"unexpected comment"); ! 666: return EOF; ! 667: case '/': ! 668: switch (get(c)) { ! 669: case '*': ! 670: lxcom(); ! 671: break; ! 672: case '/': ! 673: linecom(); ! 674: break; ! 675: default: ! 676: unget(c); ! 677: return '/'; ! 678: } ! 679: break; ! 680: case '\n': ! 681: ++tloc.line; ! 682: c = lxtitle(); ! 683: goto lx; ! 684: case ' ': ! 685: case '\t': ! 686: break; ! 687: default: ! 688: return c; ! 689: } ! 690: } ! 691: } ! 692: ! 693: void get_string() ! 694: { ! 695: int lxchar; ! 696: ! 697: for(;;) ! 698: switch (get(lxchar)) { ! 699: case '\\': ! 700: pch('\\'); ! 701: switch (get(lxchar)){ ! 702: case '\n': ! 703: ++tloc.line; ! 704: default: ! 705: pch(lxchar); ! 706: break; ! 707: case 'x': // hexadecimal ! 708: hex_to_oct(); ! 709: break; ! 710: }; ! 711: break; ! 712: case '"': ! 713: { char* p = txtstart; // eat_whitespace() moves txtstart ! 714: if ((lxchar = eat_whitespace()) == '"') { ! 715: // string catenation, break with ! 716: // newline to avoid merging characters ! 717: // (e.g. "\xAB" "C") ! 718: pch('\\'); ! 719: pch('\n'); ! 720: ! 721: continue; // eat '\"' and carry on ! 722: }; ! 723: ! 724: txtstart = p; ! 725: unget(lxchar); ! 726: pch(0); ! 727: return; ! 728: } ! 729: case '\n': ! 730: error("newline in string"); ! 731: pch(0); ! 732: return; ! 733: case EOF: ! 734: error("eof in string"); ! 735: pch(0); ! 736: return; ! 737: default: ! 738: pch(lxchar); ! 739: } ! 740: } ! 741: ! 742: TOK tlex() ! 743: { ! 744: TOK ret; ! 745: Pname n; ! 746: ! 747: // Ntoken++; ! 748: ! 749: for(;;) { ! 750: register lxchar; ! 751: register struct LXDOPE *p; ! 752: ! 753: start_txt(); ! 754: ! 755: if (saved) { ! 756: lxchar = saved; ! 757: saved = 0; ! 758: } ! 759: else ! 760: get(lxchar); ! 761: ! 762: if (lxchar+1 >= CSSZ ) ! 763: error( "illegal input character encountered: %d", lxchar ); ! 764: ! 765: switch( (p=lxcp[lxchar+1])->lxact ){ ! 766: ! 767: case A_1C: // eat up a single character, and return an opcode ! 768: reti(p->lxtok,p->lxtok); ! 769: ! 770: case A_EOF: ! 771: if (p_level || b_level+lcount) ! 772: error("'%s' missing at end of input",(b_level+lcount) ? "}" : ")"); ! 773: ! 774: reti(EOFTOK,0); ! 775: ! 776: case A_SHARP: ! 777: // cope with header file not ended with '\n' ! 778: unget('#'); ! 779: saved = lxtitle(); ! 780: continue; ! 781: ! 782: case A_ERR: ! 783: { if (' '<=lxchar && lxchar<='~') // ASCII printable ! 784: error("illegal character '%c' (ignored)",lxchar); ! 785: else ! 786: error("illegal character '0%o' (ignored)",lxchar); ! 787: continue; ! 788: } ! 789: ! 790: case A_DOLL: ! 791: { // lex a name of the for $id for template tree formals ! 792: Pname fn ; ! 793: lxget( lxchar, LEXLET|LEXDIG ) ; ! 794: ! 795: if (!templp->in_progress || !txtstart[1]) { ! 796: // no name string immediately follows, treat it ! 797: // like an illegal character ! 798: error("illegal character '0%o' (ignored)",lxchar); ! 799: continue; ! 800: } ! 801: ! 802: txtstart++ ; ! 803: if(fn=templ_compilation::tree_parameter(txtstart)) { ! 804: switch (fn->n_template_arg) { ! 805: case template_expr_tree_formal: ! 806: // retain the $ in the name ! 807: retn(ID, strdup(--txtstart)) ; ! 808: ! 809: case template_stmt_tree_formal: ! 810: retn(SM_PARAM, fn) ; ! 811: } ! 812: } ! 813: error("%s wasn't a statement or expression formal", txtstart); ! 814: rets(ID, copy_if_need_be(txtstart)); ! 815: } ! 816: ! 817: case A_LET: // collect an identifier and check for keyword ! 818: { ! 819: char ll; ! 820: switch (ll = lxchar) { ! 821: // case 'l': ! 822: case 'L': ! 823: switch (get(lxchar)) { ! 824: case '\'': ! 825: error('s',"wide character constant"); ! 826: unget(lxchar); ! 827: continue; ! 828: case '"': ! 829: error('s',"wide character string"); ! 830: unget(lxchar); ! 831: continue; ! 832: } ! 833: unget(lxchar); ! 834: lxchar = ll; ! 835: } ! 836: } ! 837: ! 838: lxget( lxchar, LEXLET|LEXDIG ); ! 839: /* look for a keyword or a global type */ ! 840: if ((n = keyword_table->look(txtstart,0)) /* keyword */ ! 841: || (n = ktbl->look(txtstart, 0))) /* local type */ ! 842: { ! 843: TOK x; ! 844: del_txt(); ! 845: switch (x=n->base) { ! 846: case TNAME: ! 847: rets(ID,n->string); ! 848: case LOC: ! 849: retl(n->syn_class); ! 850: case EXTERN: ! 851: if ((lxchar = eat_whitespace()) == '\"') { ! 852: // linkage directive ! 853: get_string(); ! 854: rets(LINKAGE,txtstart); ! 855: } ! 856: unget(lxchar); ! 857: reti(TYPE,EXTERN); ! 858: case CATCH: ! 859: // case TEMPLATE: ! 860: error('s',"%k",n->syn_class); ! 861: continue; ! 862: case TRY: ! 863: { ! 864: static int warn_try; ! 865: if (!warn_try) { ! 866: Pname n = keyword_table->look("try",0); ! 867: n->n_key = DEFAULT; ! 868: error('w',&tloc,"%k is a future reserved keyword",n->syn_class); ! 869: warn_try++; ! 870: } ! 871: rets(ID,n->string); ! 872: } ! 873: default: ! 874: reti(n->syn_class,x); ! 875: } ! 876: } ! 877: // rets(ID,txtstart); ! 878: rets(ID, copy_if_need_be(txtstart)) ; ! 879: ! 880: case A_DIG: ! 881: ! 882: ret = ICON; ! 883: ! 884: if (lxchar=='0') { ! 885: int pkchar; ! 886: get(pkchar); ! 887: if(pkchar=='x' || pkchar=='X') { // hex ! 888: pch(lxchar); ! 889: lxget(pkchar,LEXHEX); ! 890: txtfree--; ! 891: if (txtfree-txtstart<3) // minimum "0Xd\0" ! 892: error("hex digitX after \"0x\""); ! 893: get(lxchar); ! 894: goto getsuffix; ! 895: } ! 896: unget(pkchar); ! 897: } ! 898: ! 899: lxget(lxchar,LEXDIG); ! 900: txtfree--; ! 901: ! 902: if (get(lxchar) == '.') { ! 903: getfp: ! 904: lxget('.', LEXDIG ); ! 905: txtfree--; ! 906: ret = FCON; ! 907: get(lxchar); ! 908: }; ! 909: ! 910: if (lxchar=='e' || lxchar=='E') { ! 911: pch(lxchar); ! 912: get(lxchar); ! 913: if(lxchar=='-' || lxchar=='+') { ! 914: pch(lxchar); ! 915: get(lxchar); ! 916: } ! 917: if (lxmask[lxchar+1] & LEXDIG) { ! 918: lxget( lxchar, LEXDIG ); ! 919: txtfree--; ! 920: get(lxchar); ! 921: } ! 922: else ! 923: error("missing exponent digits?"); ! 924: ret = FCON; ! 925: }; ! 926: ! 927: if(*txtstart=='0' && ret==ICON) { ! 928: char *bch = txtstart; ! 929: while (++bch <= txtfree) { ! 930: if(*bch=='8' || *bch=='9') ! 931: error("%c used as octal digit",*bch); ! 932: } ! 933: } ! 934: ! 935: getsuffix: ! 936: switch (lxchar) { ! 937: case 'f': ! 938: case 'F': ! 939: if (ret==ICON) ! 940: error("%c suffix for integer constant",lxchar); ! 941: else ! 942: pch(lxchar); ! 943: break; ! 944: case 'u': ! 945: case 'U': ! 946: if (ret==FCON) { ! 947: error("%c suffix for floating constant",lxchar); ! 948: break; ! 949: } ! 950: pch(lxchar); ! 951: switch(get(lxchar)) { ! 952: case 'l': ! 953: case 'L': ! 954: pch(lxchar); ! 955: break; ! 956: default: ! 957: saved=lxchar; ! 958: break; ! 959: } ! 960: break; ! 961: case 'l': ! 962: case 'L': ! 963: pch(lxchar); ! 964: if (ret==FCON) { ! 965: break; ! 966: } ! 967: switch(get(lxchar)) { ! 968: case 'u': ! 969: case 'U': ! 970: pch(lxchar); ! 971: break; ! 972: default: ! 973: saved=lxchar; ! 974: break; ! 975: } ! 976: break; ! 977: default: ! 978: saved = lxchar; ! 979: break; ! 980: }; ! 981: ! 982: if(*txtstart=='0' && txtfree-txtstart==1) ! 983: reti(ZERO,0); // plain zero ! 984: ! 985: pch(0); ! 986: rets(ret,txtstart); ! 987: ! 988: ! 989: case A_DOT: ! 990: switch (get(lxchar)) { ! 991: case '.': // look for ellipsis ! 992: if (get(lxchar) != '.') { ! 993: error("token .. ?"); ! 994: saved = lxchar; ! 995: } ! 996: reti(ELLIPSIS,0); ! 997: case '*': ! 998: reti (REFMUL,DOT); ! 999: } ! 1000: ! 1001: if (lxmask[lxchar+1] & LEXDIG){// look for floating constant ! 1002: unget(lxchar); ! 1003: goto getfp; ! 1004: } ! 1005: saved = lxchar; ! 1006: reti(DOT,0); ! 1007: ! 1008: case A_STR: ! 1009: /* save string constant in buffer */ ! 1010: get_string(); ! 1011: rets(STRING,txtstart); ! 1012: ! 1013: case A_CC: ! 1014: /* character constant */ ! 1015: rets(CCON,chconst()); ! 1016: ! 1017: case A_BCD: ! 1018: { ! 1019: register i; ! 1020: int j; ! 1021: ! 1022: pch('`'); ! 1023: ! 1024: for (i=0; i<7; ++i) { ! 1025: pch(get(j)); ! 1026: if (j == '`' ) break; ! 1027: } ! 1028: pch(0); ! 1029: if (6<i) ! 1030: error('l',"bcd constant exceeds 6 characters" ); ! 1031: rets(CCON,txtstart); ! 1032: } ! 1033: ! 1034: case A_SL: /* / */ ! 1035: switch (get(lxchar)) { ! 1036: case '*': ! 1037: lxcom(); ! 1038: break; ! 1039: case '/': ! 1040: linecom(); ! 1041: break; ! 1042: case '=': ! 1043: reti(ASOP,ASDIV); ! 1044: default: ! 1045: saved = lxchar; ! 1046: reti(DIVOP,DIV); ! 1047: } ! 1048: ! 1049: case A_WS: ! 1050: continue; ! 1051: ! 1052: case A_NL: ! 1053: ++tloc.line; ! 1054: // Nline++; ! 1055: saved = lxtitle(); ! 1056: continue; ! 1057: ! 1058: case A_LC: ! 1059: if (BLMAX <= b_level++) { ! 1060: error('l',"blocks too deeply nested"); ! 1061: ext(3); ! 1062: } ! 1063: retl(LC); ! 1064: ! 1065: case A_RC: ! 1066: if (lcount+b_level-- <= 0) { ! 1067: error("unexpected '}'"); ! 1068: b_level = 0; ! 1069: } ! 1070: retl(RC); ! 1071: ! 1072: case A_L: ! 1073: p_level++; ! 1074: reti(LP,0); ! 1075: ! 1076: case A_R: ! 1077: if (p_level-- <= 0) { ! 1078: error("unexpected ')'"); ! 1079: p_level = 0; ! 1080: } ! 1081: reti(RP,0); ! 1082: ! 1083: case A_ASS: ! 1084: switch (get(lxchar)) { ! 1085: case '=': ! 1086: reti(EQUOP,EQ); ! 1087: default: ! 1088: saved = lxchar; ! 1089: reti(ASSIGN,ASSIGN); ! 1090: } ! 1091: ! 1092: case A_COL: ! 1093: switch (get(lxchar)) { ! 1094: case ':': ! 1095: reti(MEM,0); ! 1096: case '=': ! 1097: error("':=' is not a c++ operator"); ! 1098: reti(ASSIGN,ASSIGN); ! 1099: default: ! 1100: saved = lxchar; ! 1101: reti(COLON,COLON); ! 1102: } ! 1103: case A_NOT: ! 1104: switch (get(lxchar)) { ! 1105: case '=': ! 1106: reti(EQUOP,NE); ! 1107: default: ! 1108: saved = lxchar; ! 1109: reti(NOT,NOT); ! 1110: } ! 1111: case A_GT: ! 1112: switch(get(lxchar)) { ! 1113: case '>': ! 1114: switch (get(lxchar)) { ! 1115: case '=': ! 1116: reti(ASOP,ASRS); ! 1117: break; ! 1118: default: ! 1119: saved = lxchar; ! 1120: reti(SHIFTOP,RS); ! 1121: } ! 1122: case '=': ! 1123: reti(RELOP,GE); ! 1124: default: ! 1125: saved = lxchar; ! 1126: reti(GT,GT); ! 1127: } ! 1128: case A_LT: ! 1129: switch (get(lxchar)) { ! 1130: case '<': ! 1131: switch (get(lxchar)) { ! 1132: case '=': ! 1133: reti(ASOP,ASLS); ! 1134: default: ! 1135: saved = lxchar; ! 1136: reti(SHIFTOP,LS); ! 1137: } ! 1138: case '=': ! 1139: reti(RELOP,LE); ! 1140: default: ! 1141: saved = lxchar; ! 1142: reti(LT,LT); ! 1143: } ! 1144: case A_AND: ! 1145: switch (get(lxchar)) { ! 1146: case '&': ! 1147: reti(ANDAND,ANDAND); ! 1148: case '=': ! 1149: reti(ASOP,ASAND); ! 1150: default: ! 1151: saved = lxchar; ! 1152: reti(AND,AND); ! 1153: } ! 1154: case A_OR: ! 1155: switch (get(lxchar)) { ! 1156: case '|': ! 1157: reti(OROR,OROR); ! 1158: case '=': ! 1159: reti(ASOP,ASOR); ! 1160: default: ! 1161: saved = lxchar; ! 1162: reti(OR,OR); ! 1163: } ! 1164: case A_ER: ! 1165: switch (get(lxchar)) { ! 1166: case '=': ! 1167: reti(ASOP,ASER); ! 1168: default: ! 1169: saved = lxchar; ! 1170: reti(ER,ER); ! 1171: } ! 1172: case A_PL: ! 1173: switch (get(lxchar)) { ! 1174: case '=': ! 1175: reti(ASOP,ASPLUS); ! 1176: case '+': ! 1177: reti(ICOP,INCR); ! 1178: default: ! 1179: saved = lxchar; ! 1180: reti(PLUS,PLUS); ! 1181: } ! 1182: case A_MIN: ! 1183: switch (get(lxchar)) { ! 1184: case '=': ! 1185: reti(ASOP,ASMINUS); ! 1186: case '-': ! 1187: reti(ICOP,DECR); ! 1188: case '>': ! 1189: if (get(lxchar) == '*') ! 1190: {reti(REFMUL,REF);} ! 1191: else ! 1192: saved = lxchar; ! 1193: reti(REF,REF); ! 1194: default: ! 1195: saved = lxchar; ! 1196: reti(MINUS,MINUS); ! 1197: } ! 1198: case A_MUL: ! 1199: switch (get(lxchar)) { ! 1200: case '=': ! 1201: reti(ASOP,ASMUL); ! 1202: case '/': ! 1203: error('w',"*/ not as end of comment"); ! 1204: default: ! 1205: saved = lxchar; ! 1206: reti(MUL,MUL); ! 1207: } ! 1208: case A_MOD: ! 1209: switch (get(lxchar)) { ! 1210: case '=': ! 1211: reti(ASOP,ASMOD); ! 1212: default: ! 1213: saved = lxchar; ! 1214: reti(DIVOP,MOD); ! 1215: } ! 1216: default: ! 1217: {error('i',"lex act==%d getc()->%d",p,lxchar);} ! 1218: ! 1219: } ! 1220: ! 1221: error('i',"lex, main switch"); ! 1222: } ! 1223: ! 1224: } ! 1225: ! 1226: int lxtitle() ! 1227: /* ! 1228: called after a newline; set linenumber and file name ! 1229: */ ! 1230: { ! 1231: register c; ! 1232: ! 1233: for(;;) ! 1234: switch ( get(c) ) { ! 1235: default: // e.g. not '\n', not '#' ! 1236: return c; ! 1237: case '\n': ! 1238: tloc.line++; ! 1239: // Nline++; ! 1240: ll: ! 1241: break; ! 1242: case '#': /* # lineno "filename" */ ! 1243: { int cl = tloc.line; ! 1244: tloc.line = 0; ! 1245: for(;;) ! 1246: switch (get(c)) { ! 1247: case '"': ! 1248: start_txt(); ! 1249: for(;;) ! 1250: switch (get(c)) { ! 1251: case '"': ! 1252: pch('\0'); ! 1253: ! 1254: while (get(c) != '\n') ; // skip to eol.. ignore anything more ! 1255: ! 1256: if (*txtstart) { // stack file name ! 1257: char* fn; ! 1258: if (tcurr_file == 0){ ! 1259: if (( fn = file_name[0]) ! 1260: && (strcmp(txtstart,fn)!=0)){ // 1st include ! 1261: if (MAXFILE*4<++Nfile) error('i',"fileN buffer overflow"); ! 1262: if (MAXFILE<++tcurr_file) error('i',"fileN stack overflow"); ! 1263: file_stack[tcurr_file] = Nfile; ! 1264: ! 1265: char* p1 = new char[txtfree-txtstart]; ! 1266: (void) strcpy(p1,txtstart); ! 1267: file_name[Nfile] = p1; ! 1268: // Nstr++; ! 1269: } ! 1270: else { //&& line is dummy #line "input.c" ! 1271: // ignore ! 1272: } ! 1273: //&& dead, dead, dead goto push; ! 1274: } ! 1275: else if ( (fn=file_name[file_stack[tcurr_file]]) ! 1276: && (strcmp(txtstart,fn)==0) ) { ! 1277: //new line, same file: ignore ! 1278: } ! 1279: else if ( (fn=file_name[file_stack[tcurr_file-1]]) ! 1280: && (strcmp(txtstart,fn)==0) ) { ! 1281: // previous file: pop ! 1282: tcurr_file--; ! 1283: } ! 1284: else { // new file name: push ! 1285: //&& push: ! 1286: if (MAXFILE*4<Nfile++) error('i',"fileN buffer overflow"); ! 1287: if (MAXFILE<tcurr_file++) error('i',"fileN stack overflow"); ! 1288: file_stack[tcurr_file] = Nfile; ! 1289: char* p = new char[txtfree-txtstart]; ! 1290: (void) strcpy(p,txtstart); ! 1291: file_name[Nfile] = p; ! 1292: // Nstr++; ! 1293: } ! 1294: } ! 1295: else { // no name .. back to the original .c file: "" ! 1296: tcurr_file = 0; ! 1297: } ! 1298: del_txt(); ! 1299: tloc.file = file_stack[tcurr_file]; ! 1300: goto ll; ! 1301: case '\n': ! 1302: error("unexpected end of line on '# line'"); ! 1303: default: ! 1304: pch(c); ! 1305: } ! 1306: case ' ': ! 1307: break; ! 1308: ! 1309: case '0': ! 1310: case '1': ! 1311: case '2': ! 1312: case '3': ! 1313: case '4': ! 1314: case '5': ! 1315: case '6': ! 1316: case '7': ! 1317: case '8': ! 1318: case '9': ! 1319: tloc.line = tloc.line*10+c-'0'; ! 1320: break; ! 1321: ! 1322: case 'l': // look for "#line ..." and then ignore "line" ! 1323: if (get(c)=='i' && get(c)=='n' && get(c)=='e') break; ! 1324: case '\n': ! 1325: tloc.putline(); ! 1326: goto ll; ! 1327: ! 1328: default: // pass #rubbish through ! 1329: tloc.line = cl; ! 1330: pch('#'); ! 1331: pch(c); ! 1332: while (get(c) != '\n') pch(c); ! 1333: pch('\0'); ! 1334: fprintf(out_file,"\n%s\n",txtstart); ! 1335: start_txt(); ! 1336: tloc.line++; ! 1337: // Nline++; ! 1338: goto ll; ! 1339: } ! 1340: } ! 1341: } ! 1342: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.