researchv10no/cmd/cfront/ocfront/lex.c - annotate

Return to lex.c CVS log
Up to [Research Unix] / researchv10no / cmd / cfront / ocfront
Annotation of researchv10no/cmd/cfront/ocfront/lex.c, revision 1.1.1.1

1.1       root        1: /*ident        "@(#)ctrans:src/lex.c   1.3.4.24" */
                      2: /***************************************************************************
                      3: 
                      4:        C++ source for cfront, the C++ compiler front-end
                      5:        written in the computer science research center of Bell Labs
                      6: 
                      7:        Copyright (c) 1984 AT&T, Inc. All Rights Reserved
                      8:        THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T, INC.
                      9: 
                     10: lex.c:
                     11:        lexical analyser based on pcc's and cpre's scanners
                     12:        modified to handle classes:
                     13:        new keywords:   class
                     14:                        public
                     15:                        call
                     16:                        etc.
                     17:        names are not entered in the symbol table by lex()
                     18:        names can be of arbitrary length
                     19:        error() is used to report errors
                     20:        {} and () must match
                     21:        numeric constants are not converted into internal representation
                     22:        but stored as strings
                     23: 
                     24: ****************************************************************************/
                     25: 
                     26: #include "cfront.h"
                     27: #include "yystype.h"
                     28: #include "size.h"
                     29: #include "tqueue.h"
                     30: 
                     31: # define  CCTRANS(x) x
                     32: 
                     33:        /* lexical actions */
                     34: 
                     35: #define A_ERR  0               /* illegal character */
                     36: #define A_LET  1               /* saw a letter */
                     37: #define A_DIG  2               /* saw a digit */
                     38: #define A_1C   3               /* return a single character */
                     39: #define A_STR  4               /* string */
                     40: #define A_CC   5               /* character constant */
                     41: #define A_BCD  6               /* GCOS BCD constant */
                     42: #define A_SL   7               /* saw a / */
                     43: #define A_DOT  8               /* saw a . */
                     44: #define A_2C   9               /* possible two character symbol */
                     45: #define A_WS   10              /* whitespace (not \n) */
                     46: #define A_NL   11              /* \n */
                     47: #define A_LC   12              /* { */
                     48: #define A_RC   13              /* } */
                     49: #define A_L    14              /* ( */
                     50: #define A_R    15              /* ) */
                     51: #define A_EOF  16
                     52: #define A_ASS  17
                     53: #define A_LT   18
                     54: #define A_GT   19              /* > */
                     55: #define A_ER   20
                     56: #define A_OR   21
                     57: #define A_AND  22
                     58: #define A_MOD  23
                     59: #define A_NOT  24
                     60: #define A_MIN  25
                     61: #define A_MUL  26
                     62: #define A_PL   27
                     63: #define A_COL  28              /* : */
                     64: #define A_SHARP        29              /* # */
                     65: 
                     66:        /* character classes */
                     67: 
                     68: # define LEXLET 01
                     69: # define LEXDIG 02
                     70: /* no LEXOCT because 8 and 9 used to be octal digits */
                     71: # define LEXHEX 010
                     72: # define LEXWS 020
                     73: # define LEXDOT 040
                     74: 
                     75: const FIRSTCHUNK = 8*1024-8;
                     76: const BUFCHUNK = 4*1024-8;
                     77: 
                     78:        /* text buffer */
                     79: static char inbuf[FIRSTCHUNK/*TBUFSZ*/];
                     80: static char* txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1];
                     81: static char* txtstart = 0;
                     82: static char* txtfree = 0;
                     83: 
                     84: static struct buf* bufhead;
                     85: static buf* freebuf;
                     86: //static bufs;
                     87: 
                     88: struct buf {
                     89:        buf* next;
                     90:        char chars[BUFCHUNK];
                     91: //     buf() { next=bufhead; bufhead=this; }
                     92: };
                     93: 
                     94: new_buf(char c)
                     95: {
                     96: //fprintf(stderr,"new_buf %d\n",bufs++);
                     97:        buf* pbuf;
                     98:        if (freebuf) {
                     99:                pbuf = freebuf;
                    100:                freebuf = freebuf->next;
                    101:        }
                    102:        else
                    103:                pbuf = new buf; // allocate and register new chunk
                    104:        pbuf->next = bufhead;
                    105:        bufhead = pbuf;
                    106: 
                    107:        if (BUFCHUNK < txtmax-txtstart) error('l',"lexical token too long");
                    108: 
                    109:        // copy current token:
                    110:        char* p = txtstart;
                    111:        txtstart = txtfree = &pbuf->chars[0];
                    112:        while (p<txtmax) *txtfree++ = *p++;
                    113:        *txtfree++=c;
                    114:        txtmax = &pbuf->chars[BUFCHUNK-1];
                    115:        return 0;
                    116: }
                    117: 
                    118: 
                    119: #define pch(c) ((txtmax<=txtfree)?new_buf(c):(*txtfree++=c))
                    120: #define start_txt()    txtstart = txtfree
                    121: #define del_txt()      txtfree = txtstart
                    122: 
                    123: static int Nfile;// = 1;
                    124: static char* file_name[MAXFILE*4];     // source file names
                    125:                                        // file_name[0] == src_file_name
                    126:                                        // file_name[0] == 0 means stdin
                    127: static short file_stack[MAXFILE];      // stack of file name indices
                    128: static int tcurr_file;                 // current index in file_stack
                    129:                                        // that is current #include nest level
                    130: 
                    131: Linkage linkage = linkage_default; // linkage is default C++   
                    132: const LINKMAX = 10;
                    133: static Linkage lvec[LINKMAX] = { linkage_default };
                    134: static int lcount = 0;
                    135: 
                    136: void set_linkage(char* p)
                    137: {
                    138:        if (p==0 || *p == 0) {  // resume previous linkage
                    139:                if (lcount>0) linkage = lvec[--lcount];
                    140:        }
                    141:        else {
                    142:                if (LINKMAX<=++lcount) {
                    143:                        error('l',"linkage directive nested too deep");
                    144:                        --lcount;
                    145:                } else if (strcmp(p,"C")==0)
                    146:                        lvec[lcount] = linkage = linkage_C;
                    147:                else if (strcmp(p,"C++")==0)
                    148:                        lvec[lcount] = linkage = linkage_Cplusplus;
                    149:                else {
                    150:                        error("%s linkage",p);
                    151:                        --lcount;
                    152:                }
                    153:        }
                    154: }
                    155: 
                    156: static struct loc tloc;
                    157: FILE * out_file = stdout;
                    158: FILE * in_file = stdin;
                    159: Ptable ktbl;
                    160: static int p_level = 0;                /* number of unmatched ``(''s */
                    161: static int b_level = 0;                /* number of unmatched ``{''s */
                    162: 
                    163: # ifdef ibm
                    164: 
                    165: # define CSMASK 0377
                    166: # define CSSZ 256
                    167: 
                    168: # else
                    169: 
                    170: # define CSMASK 0177
                    171: # define CSSZ 128
                    172: 
                    173: # endif
                    174: 
                    175: static short lxmask[CSSZ+1];
                    176: 
                    177: int saved = 0; /* putback character, avoid ungetchar */
                    178: static int lxtitle();
                    179: 
                    180: overload rt;
                    181: inline YYSTYPE rt(char* x) { YYSTYPE y; y.s  = x;        return y; }
                    182: inline YYSTYPE rt(int   x) { YYSTYPE y; y.t  = x;        return y; }
                    183: inline YYSTYPE rt(loc   x) { YYSTYPE y; y.l  = x;        return y; }
                    184: inline YYSTYPE rt(void* x) { YYSTYPE y; y.pn = Pname(x); return y; }
                    185: 
                    186: #define get(c)         (c=getc(in_file))
                    187: #define unget(c)       ungetc(c,in_file)
                    188: 
                    189: #define reti(a,b)      { addtok(a, rt(b), tloc);          return a; }
                    190: #define retn(a,b)      { addtok(a, rt((Pnode)b), tloc);   return a; }
                    191: #define rets(a,b)      { addtok(a, rt(b), tloc);          return a; }
                    192: #define retl(a)                { addtok(a, rt(tloc), tloc);       return a; }
                    193: 
                    194: void ktbl_init()
                    195: /*
                    196:        enter keywords into keyword table for use by lex()
                    197:        and into keyword representation table used for output
                    198: */
                    199: {
                    200:        ktbl = new table(KTBLSIZE,0,0);
                    201: 
                    202:        new_key("asm",ASM,0);
                    203:        new_key("auto",AUTO,TYPE);
                    204:        new_key("break",LOC,BREAK);
                    205:        new_key("case",LOC,CASE);
                    206:        new_key("continue",LOC,CONTINUE);
                    207:        new_key("char",CHAR,TYPE);
                    208:        new_key("do",LOC,DO);
                    209:        new_key("double",DOUBLE,TYPE);
                    210:        new_key("default",LOC,DEFAULT);
                    211:        new_key("enum",ENUM,0);
                    212:        new_key("else",LOC,ELSE);
                    213:        new_key("extern",EXTERN,TYPE);
                    214:        new_key("float",FLOAT,TYPE);
                    215:        new_key("for",LOC,FOR);
                    216: //     new_key("fortran",FORTRAN,0);
                    217:        new_key("goto",LOC,GOTO);
                    218:        new_key("catch",CATCH,CATCH);
                    219:        new_key("try",TRY,TRY);
                    220:        new_key("if",LOC,IF);
                    221:        new_key("int",INT,TYPE);
                    222:        new_key("long",LONG,TYPE);
                    223:        new_key("return",LOC,RETURN);
                    224:        new_key("register",REGISTER,TYPE);
                    225:        new_key("static",STATIC,TYPE);
                    226:        new_key("struct",STRUCT,AGGR);
                    227:        new_key("sizeof",SIZEOF,0);
                    228:        new_key("short",SHORT,TYPE);
                    229:        new_key("switch",LOC,SWITCH);
                    230:        new_key("template",TEMPLATE,TEMPLATE);
                    231:        new_key("typedef",TYPEDEF,TYPE);
                    232:        new_key("unsigned",UNSIGNED,TYPE);
                    233:        new_key("union",UNION,AGGR);
                    234:        new_key("void",VOID,TYPE);
                    235:        new_key("while",LOC,WHILE);
                    236: 
                    237:        new_key("class",CLASS,AGGR);
                    238:        new_key("const",CONST,TYPE);
                    239:        new_key("delete",LOC,DELETE);
                    240:        new_key("friend",FRIEND,TYPE);
                    241:        new_key("inline",INLINE,TYPE);
                    242:        new_key("new",NEW,0);
                    243:        new_key("operator",OPERATOR,0);
                    244:        new_key("overload",OVERLOAD,TYPE);
                    245:        new_key("private",PRIVATE,PR);
                    246:        new_key("protected",PROTECTED,PR);
                    247:        new_key("public",PUBLIC,PR);
                    248:        new_key("signed",SIGNED,TYPE);
                    249:        new_key("this",THIS,0);
                    250:        new_key("virtual",VIRTUAL,TYPE);
                    251:        new_key("volatile",VOLATILE,TYPE);
                    252: }
                    253: 
                    254: loc last_line;
                    255: loc noloc = { 0, 0 };
                    256: 
                    257: void loc::putline()
                    258: {
                    259:        if (file==0 && line==0) return;
                    260: //   fix, Nfile, not MAXFILE; Nfile == MAXFILE*4
                    261: //     if (0<=file && file<MAXFILE) {
                    262:        if ( 0<=file && file <= Nfile ) {
                    263:                char* f = file_name[file];
                    264:                if (f==0) f = src_file_name;
                    265:                fprintf(out_file,line_format,line,f);
                    266:                last_line = *this;
                    267:        }
                    268: }
                    269: 
                    270: void loc::put(FILE* p)
                    271: {
                    272: //    fix, Nfile, not MAXFILE; Nfile == MAXFILE*4
                    273: //     if (0<=file && file<MAXFILE) {
                    274:        if ( 0<=file && file <= Nfile ) {
                    275:                char* f = file_name[file];
                    276:                if (f==0) f = src_file_name;
                    277:                fprintf(p,"\"%s\", line %d: ",f,line);
                    278:        }
                    279: }
                    280: 
                    281: void lxenter(register char* s, short m)
                    282: /* enter a mask into lxmask */
                    283: {
                    284:        register c;
                    285: 
                    286:        while( c= *s++ ) lxmask[c+1] |= m;
                    287: 
                    288: }
                    289: 
                    290: 
                    291: void lxget(register c, register m)
                    292: /*
                    293:        put 'c' back then scan for members of character class 'm'
                    294:        terminate the string read with \0
                    295:        txtfree points to the character position after that \0
                    296: */
                    297: {
                    298:        pch(c);
                    299:        while ( (get(c), lxmask[c+1]&m) ) pch(c);
                    300:        unget(c);
                    301:        pch('\0');
                    302: }
                    303: 
                    304: struct LXDOPE {
                    305:        short lxch;     /* the character */
                    306:        short lxact;    /* the action to be performed */
                    307:        TOK   lxtok;    /* the token number to be returned */
                    308: } lxdope[] = {
                    309: #ifdef apollo
                    310:        '@',    A_ERR,  0,      /* illegal characters go here... */
                    311: #else
                    312:        '$',    A_ERR,  0,      /* illegal characters go here... */
                    313: #endif
                    314:        '_',    A_LET,  0,      /* letters point here */
                    315:        '0',    A_DIG,  0,      /* digits point here */
                    316:        ' ',    A_WS,   0,      /* whitespace goes here */
                    317:        '\n',   A_NL,   0,
                    318:        '"',    A_STR,  0,      /* character string */
                    319:        '\'',   A_CC,   0,      /* ASCII character constant */
                    320:        '`',    A_BCD,  0,      /* 'foreign' character constant, e.g. BCD */
                    321:        '(',    A_L,    LP,
                    322:        ')',    A_R,    RP,
                    323:        '{',    A_LC,   LC,
                    324:        '}',    A_RC,   RC,
                    325:        '[',    A_1C,   LB,
                    326:        ']',    A_1C,   RB,
                    327:        '*',    A_MUL,  MUL,
                    328:        '?',    A_1C,   QUEST,
                    329:        ':',    A_COL,  COLON,
                    330:        '+',    A_PL,   PLUS,
                    331:        '-',    A_MIN,  MINUS,
                    332:        '/',    A_SL,   DIV,
                    333:        '%',    A_MOD,  MOD,
                    334:        '&',    A_AND,  AND,
                    335:        '|',    A_OR,   OR,
                    336:        '^',    A_ER,   ER,
                    337:        '!',    A_NOT,  NOT,
                    338:        '~',    A_1C,   COMPL,
                    339:        ',',    A_1C,   CM,
                    340:        ';',    A_1C,   SM,
                    341:        '.',    A_DOT,  DOT,
                    342:        '<',    A_LT,   LT,
                    343:        '>',    A_GT,   GT,
                    344:        '=',    A_ASS,  ASSIGN,
                    345:        '#',    A_SHARP, 0,
                    346:        EOF,    A_EOF,  EOFTOK
                    347:        };
                    348: /* note: EOF is used as sentinel, so must be <=0 and last entry in table */
                    349: 
                    350: static struct LXDOPE *lxcp[CSSZ+1];
                    351: 
                    352: void lex_init()
                    353: {
                    354:        register struct LXDOPE *p;
                    355:        register i;
                    356:        register char *cp;
                    357:        /* set up character classes */
                    358: 
                    359:        /* first clear lexmask */
                    360:        for(i=0; i<=CSSZ; i++) lxmask[i] = 0;
                    361: 
                    362: #ifdef apollo
                    363:        lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$", LEXLET );
                    364: #else
                    365:        lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", LEXLET );
                    366: #endif
                    367:        lxenter( "0123456789", LEXDIG );
                    368:        lxenter( "0123456789abcdefABCDEF", LEXHEX );
                    369:                /* \013 should become \v someday; \013 is OK for ASCII and EBCDIC */
                    370:        lxenter( " \t\r\b\f\013", LEXWS );
                    371:        lxmask['.'+1] |= LEXDOT;
                    372: 
                    373:        /* make lxcp point to appropriate lxdope entry for each character */
                    374: 
                    375:        /* initialize error entries */
                    376: 
                    377:        for( i= 0; i<=CSSZ; ++i ) lxcp[i] = lxdope;
                    378: 
                    379:        /* make unique entries */
                    380: 
                    381:        for( p=lxdope; ; ++p ) {
                    382:                lxcp[p->lxch+1] = p;
                    383:                if( p->lxch < 0 ) break;
                    384:        }
                    385: 
                    386:        /* handle letters, digits, and whitespace */
                    387:        /* by convention, first, second, and third places */
                    388: 
                    389:        cp = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
                    390:        while( *cp ) lxcp[*cp++ + 1] = &lxdope[1];
                    391:        cp = "123456789";
                    392:        while( *cp ) lxcp[*cp++ + 1] = &lxdope[2];
                    393:        cp = "\t\b\r\f\013";
                    394:        while( *cp ) lxcp[*cp++ + 1] = &lxdope[3];
                    395: 
                    396:        file_name[0] = src_file_name;
                    397:        // set both curloc and tloc so curloc is valid at program startup
                    398: //     curloc.file = tloc.file = 0;
                    399:        curloc.line = tloc.line = 1;
                    400: 
                    401:        ktbl_init();
                    402:        lex_clear();
                    403:        saved = lxtitle();
                    404: }
                    405: 
                    406: void lex_clear()
                    407: {
                    408:        // delete extra buffers:
                    409:        buf* p = bufhead;       
                    410:        bufhead = 0;
                    411: //if (p) {
                    412: //fprintf(stderr,"lex_clear\n");
                    413: //bufs=0;
                    414: //}
                    415:        while (p) {
                    416:                buf* pp = p;
                    417:                p = p->next;
                    418:                pp->next = freebuf;
                    419:                freebuf = pp;
                    420:        }
                    421: 
                    422:        // re-set to static buffer:
                    423:        txtstart = txtfree = inbuf;
                    424:        txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1];
                    425: }
                    426: 
                    427: int int_val(char hex)
                    428: {
                    429:        switch (hex) {
                    430:        case '0': case '1': case '2': case '3': case '4':
                    431:        case '5': case '6': case '7': case '8': case '9':
                    432:                return hex-'0';
                    433:        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
                    434:                return hex-'a'+10;
                    435:        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
                    436:                return hex-'A'+10;
                    437:        }
                    438: }
                    439: 
                    440: void hex_to_oct()
                    441: /*
                    442:        \x has been seen on input (in char const or string) and \ printed
                    443:        read the following hexadecimal integer and replace it with an octal
                    444: */
                    445: {
                    446:        int i = 0;
                    447:        int c;
                    448:        get(c);
                    449:        if (lxmask[c+1] & LEXHEX) {
                    450:                i = int_val(c);
                    451:                get(c);                                 // try for two
                    452:                if (lxmask[c+1] & LEXHEX) {
                    453:                        i = (i<<4) + int_val(c);
                    454:                        get(c);                         // try for three
                    455:                        if (lxmask[c+1] & LEXHEX)
                    456:                                i = (i<<4) + int_val(c);
                    457:                        else
                    458:                                unget(c);
                    459:                }
                    460:                else
                    461:                        unget(c);
                    462:        }
                    463:        else {
                    464:                error("hexadecimal digitE after \\x");
                    465:                unget(c);
                    466:        }
                    467: 
                    468: //     if (0377 < i) error('l',"hexadecimal constant too large");
                    469:        i &= 0377;
                    470: 
                    471:        pch(('0'+(i>>6)));
                    472:        pch(('0'+((i&070)>>3)));
                    473:        pch(('0'+(i&7)));
                    474: }
                    475: 
                    476: 
                    477: char * chconst()
                    478: /*
                    479:        read a character constant into inbuf
                    480: */
                    481: {
                    482:        register c;
                    483:        int nch = 0;
                    484: 
                    485:        pch('\'');
                    486: 
                    487:        for(;;) {
                    488:                char* p;
                    489:                char cc = 0;
                    490: 
                    491:                switch (get(c)) {
                    492:                case '\'':
                    493:                        goto ex;
                    494:                case EOF:
                    495:                        error("eof in char constant");
                    496:                        goto ex;
                    497:                case '\n':
                    498:                        error("newline in char constant");
                    499:                        goto ex;
                    500:                case '\\':
                    501:                        if (SZ_INT == nch++) error('l',"char constant too long");
                    502:                        pch(c);
                    503:                        switch (get(c)){
                    504:                        case '\n':
                    505:                                ++tloc.line;
                    506:                        default:
                    507:                                pch(c);
                    508:                                break;
                    509:                        case '4': case '5': case '6': case '7': // octal
                    510:                                p = txtfree;
                    511:                                cc = c-4;
                    512:                        case '0': case '1': case '2': case '3':
                    513:                                pch(c);
                    514:                                get(c);  /* try for 2 */
                    515:                                if( lxmask[c+1] & LEXDIG && c<'8'){
                    516:                                        pch(c);
                    517:                                        get(c);  /* try for 3 */
                    518:                                        if (lxmask[c+1] & LEXDIG && c<'8') {
                    519:                                                if (cc) *p = cc; // zap high bit
                    520:                                                pch(c);
                    521:                                        }
                    522:                                        else
                    523:                                                unget(c);
                    524:                                }
                    525:                                else
                    526:                                        unget(c);
                    527:                                break;
                    528:                        case 'x':       // hexadecimal
                    529:                                hex_to_oct();
                    530:                                break;
                    531:                        };
                    532:                        break;
                    533:                default:
                    534:                        if (SZ_INT == nch++) error('l',"char constant too long");
                    535:                        pch(c);
                    536:                }
                    537:        }
                    538: ex:
                    539:        if(nch==0)
                    540:                error("empty char constant");
                    541:        pch('\'');
                    542:        pch('\0');
                    543:        return txtstart;
                    544: }
                    545: 
                    546: void lxcom()
                    547: /* process a "block comment" */
                    548: {
                    549:        register c;
                    550: 
                    551:        for(;;)
                    552:        switch (get(c)) {
                    553:        case EOF:
                    554:                error('w',"eof in comment");
                    555:                return;
                    556:        case '\n':
                    557:                tloc.line++;
                    558: //             Nline++;
                    559:                break;
                    560:        case '*':
                    561:                if (get(c) == '/') return;
                    562:                unget(c);
                    563:                break;
                    564:        case '/':
                    565:                if (get(c) == '*') error('w',"``/*'' in comment");
                    566:                unget(c);
                    567:                break;
                    568:        }
                    569: }
                    570: 
                    571: 
                    572: void linecom()
                    573: // process a "line comment"
                    574: {
                    575:        register c;
                    576: 
                    577:        get(c);
                    578: #ifdef DBG
                    579:        if ( c=='@' && get(c)=='!' ) {
                    580:                while ( get(c) != '\n' && c != EOF ) pch(c);
                    581:                pch('\0');
                    582:                process_debug_flags(txtstart);
                    583:                del_txt();
                    584:        }
                    585: #endif
                    586:        for(;;get(c))
                    587:        switch (c) {
                    588:        case EOF:
                    589:                error('w',"eof in comment");
                    590:                return;
                    591:        case '\n':
                    592:                tloc.line++;
                    593: //             Nline++;
                    594:                saved = lxtitle();
                    595:                return;
                    596:        }
                    597: }
                    598: 
                    599: char eat_whitespace()
                    600: {
                    601: 
                    602:        for(;;) {
                    603:                register c = get(c);
                    604:        lx:
                    605: 
                    606:                switch (c) {
                    607:                case EOF:
                    608:                        error('w',"unexpected comment");
                    609:                        return EOF;
                    610:                case '/':
                    611:                        switch (get(c)) {
                    612:                        case '*':
                    613:                                lxcom();
                    614:                                break;
                    615:                        case '/':
                    616:                                linecom();
                    617:                                break;
                    618:                        default:
                    619:                                unget(c);
                    620:                                return '/';
                    621:                        }
                    622:                        break;
                    623:                case '\n':
                    624:                        ++tloc.line;
                    625:                        c = lxtitle();
                    626:                        goto lx;
                    627:                case ' ':
                    628:                case '\t':
                    629:                        break;
                    630:                default:
                    631:                        return c;               
                    632:                }
                    633:        }       
                    634: }
                    635: 
                    636: void get_string()
                    637: {
                    638:        int lxchar;
                    639: 
                    640:        for(;;)
                    641:        switch (get(lxchar)) {
                    642:        case '\\':
                    643:                pch('\\');
                    644:                switch (get(lxchar)){
                    645:                case '\n':
                    646:                        ++tloc.line;
                    647:                default:
                    648:                        pch(lxchar);
                    649:                        break;
                    650:                case 'x':       // hexadecimal
                    651:                        hex_to_oct();
                    652:                        break;
                    653:                };
                    654:                break;
                    655:        case '"':
                    656:         {       char* p = txtstart;     // eat_whitespace() moves txtstart
                    657:                 if ((lxchar = eat_whitespace()) == '"') {
                    658:                         // string catenation, break with
                    659:                         // newline to avoid merging characters
                    660:                         // (e.g. "\xAB" "C")
                    661:                         pch('\\');
                    662:                         pch('\n');
                    663: 
                    664:                         continue;       // eat '\"' and carry on
                    665:                 };
                    666: 
                    667:                 txtstart = p;
                    668:                unget(lxchar);
                    669:                pch(0);
                    670:                return;
                    671:         }
                    672:        case '\n':
                    673:                error("newline in string");
                    674:                pch(0);
                    675:                return;
                    676:        case EOF:
                    677:                error("eof in string");
                    678:                pch(0);
                    679:                return;
                    680:        default:
                    681:                pch(lxchar);
                    682:        }
                    683: }
                    684: 
                    685: TOK tlex()
                    686: {
                    687:        TOK ret;
                    688:        Pname n;
                    689: 
                    690: //     Ntoken++;
                    691: 
                    692:        for(;;) {
                    693:                register lxchar;
                    694:                register struct LXDOPE *p;
                    695: 
                    696:                start_txt();
                    697: 
                    698:                if (saved) {
                    699:                        lxchar = saved;
                    700:                        saved = 0;
                    701:                }
                    702:                else
                    703:                        get(lxchar);
                    704: 
                    705:                if (lxchar+1 >= CSSZ )
                    706:                 error( "illegal input character encountered: %d", lxchar );
                    707: 
                    708:                switch( (p=lxcp[lxchar+1])->lxact ){
                    709: 
                    710:                case A_1C:      // eat up a single character, and return an opcode 
                    711:                        reti(p->lxtok,p->lxtok);
                    712: 
                    713:                case A_EOF:
                    714:                        if (p_level || b_level+lcount)
                    715:                                error("'%s' missing at end of input",(b_level+lcount) ? "}" : ")");
                    716:                
                    717:                        reti(EOFTOK,0);
                    718: 
                    719:                case A_SHARP:
                    720:                        // cope with header file not ended with '\n'
                    721:                        unget('#');
                    722:                        saved = lxtitle();
                    723:                        continue;
                    724: 
                    725:                case A_ERR:
                    726:                {       if (' '<=lxchar && lxchar<='~') // ASCII printable
                    727:                                error("illegal character '%c' (ignored)",lxchar);
                    728:                        else
                    729:                                error("illegal character '0%o' (ignored)",lxchar);
                    730:                        continue;
                    731:                }
                    732:                case A_LET:     // collect an identifier and check for keyword
                    733:                {
                    734:                        char ll;
                    735:                        switch (ll = lxchar) {
                    736:                //      case 'l':
                    737:                        case 'L':
                    738:                                switch (get(lxchar)) {
                    739:                                case '\'':
                    740:                                        error('s',"wide character constant");
                    741:                                        unget(lxchar);
                    742:                                        continue;
                    743:                                case '"':
                    744:                                        error('s',"wide character string");
                    745:                                        unget(lxchar);
                    746:                                        continue;
                    747:                                }
                    748:                                unget(lxchar);
                    749:                                lxchar = ll;
                    750:                        }
                    751:                }
                    752:                        lxget( lxchar, LEXLET|LEXDIG );
                    753: 
                    754: //error( 'd', "lex: b_level: %d txtstart %s", b_level, txtstart);
                    755: // local class
                    756:                        if (n = ktbl->look(txtstart,0)) {
                    757:                                TOK x;
                    758:                                del_txt();
                    759:                                switch (x=n->base) {
                    760:                                case TNAME:
                    761:                                        rets(ID,n->string);
                    762:                                //error('d',"lex tname %n",n);
                    763:                                //if (b_level > 1) {
                    764:                                //      Pname nn = ktbl->look(txtstart,LOCAL);
                    765:                                //      if ( nn ) {
                    766:                                //              n = nn;
                    767:                                //error( 'd', "lex: local class instance: %n", nn );
                    768:                                //      }
                    769:                                //}
                    770:                                //      retn(TNAME,n);
                    771:                                case LOC:
                    772:                                        retl(n->syn_class);
                    773:                                case EXTERN:
                    774:                                        if ((lxchar = eat_whitespace()) == '\"') {
                    775:                                                // linkage directive
                    776:                                                get_string();
                    777:                                                rets(LINKAGE,txtstart);
                    778:                                        }
                    779:                                        unget(lxchar);
                    780:                                        reti(TYPE,EXTERN);
                    781:                                case CATCH:
                    782:                                case TEMPLATE:
                    783:                                        error('s',"%k",n->syn_class);
                    784:                                        continue;
                    785:                                case TRY:
                    786:                                {
                    787:                                        static int warn_try;
                    788:                                        if (!warn_try) { 
                    789:                                                Pname n = ktbl->look("try",0);
                    790:                                                n->n_key = DEFAULT;
                    791:                                                error('w',&tloc,"%k is a future reserved keyword",n->syn_class);
                    792:                                                warn_try++;
                    793:                                        }
                    794:                                        rets(ID,n->string);
                    795:                                }
                    796:                                default:
                    797:                                        reti(n->syn_class,x);
                    798:                                }
                    799:                        }
                    800:                        rets(ID,txtstart);
                    801:                        //else
                    802:                        // local class
                    803:                        //if ( b_level && (n=ktbl->look(txtstart,LOCAL)) ) {
                    804:                        //error( 'd', "lex2: local class instance: %n", n );
                    805:                        //      retn(TNAME,n);
                    806:                        //} else
                    807:                        //      rets(ID,txtstart);
                    808: 
                    809:                case A_DIG:
                    810: 
                    811:                        ret = ICON;
                    812: 
                    813:                        if (lxchar=='0') {
                    814:                                int pkchar;
                    815:                                get(pkchar);
                    816:                                if(pkchar=='x' || pkchar=='X') {        // hex
                    817:                                        pch(lxchar);
                    818:                                        lxget(pkchar,LEXHEX);
                    819:                                        txtfree--;
                    820:                                        if (txtfree-txtstart<3) // minimum "0Xd\0"
                    821:                                                error("hex digitX after \"0x\"");
                    822:                                        get(lxchar);
                    823:                                        goto getsuffix;
                    824:                                }
                    825:                                unget(pkchar);
                    826:                        }
                    827: 
                    828:                        lxget(lxchar,LEXDIG);
                    829:                        txtfree--;
                    830: 
                    831:                        if (get(lxchar) == '.') {
                    832:                getfp:
                    833:                                lxget('.', LEXDIG );
                    834:                                txtfree--;
                    835:                                ret = FCON;
                    836:                                get(lxchar);
                    837:                        };
                    838:                
                    839:                        if (lxchar=='e' || lxchar=='E') {
                    840:                                pch(lxchar);
                    841:                                get(lxchar);
                    842:                                if(lxchar=='-' || lxchar=='+') {
                    843:                                        pch(lxchar);
                    844:                                        get(lxchar);
                    845:                                }
                    846:                                if (lxmask[lxchar+1] & LEXDIG) {
                    847:                                        lxget( lxchar, LEXDIG );
                    848:                                        txtfree--;
                    849:                                        get(lxchar);
                    850:                                }
                    851:                                else
                    852:                                        error("missing exponent digits?");
                    853:                                ret = FCON;
                    854:                        };
                    855: 
                    856:                        if(*txtstart=='0' && ret==ICON) {
                    857:                                char *bch = txtstart;
                    858:                                while (++bch <= txtfree) {
                    859:                                        if(*bch=='8' || *bch=='9')
                    860:                                                error("%c used as octal digit",*bch);
                    861:                                }
                    862:                        }
                    863: 
                    864:                getsuffix:
                    865:                        switch (lxchar) {
                    866:                        case 'f':
                    867:                        case 'F':
                    868:                                if (ret==ICON)
                    869:                                        error("%c suffix for integer constant",lxchar);
                    870:                                else
                    871:                                        pch(lxchar);
                    872:                                break;
                    873:                        case 'u':
                    874:                        case 'U':
                    875:                                if (ret==FCON) {
                    876:                                        error("%c suffix for floating constant",lxchar);
                    877:                                        break;
                    878:                                }
                    879:                                pch(lxchar);
                    880:                                switch(get(lxchar)) {
                    881:                                case 'l':
                    882:                                case 'L':
                    883:                                        pch(lxchar);
                    884:                                        break;
                    885:                                default:
                    886:                                        saved=lxchar;
                    887:                                        break;
                    888:                                }
                    889:                                break;
                    890:                        case 'l':
                    891:                        case 'L':
                    892:                                pch(lxchar);
                    893:                                if (ret==FCON) {
                    894:                                        break;
                    895:                                }
                    896:                                switch(get(lxchar)) {
                    897:                                case 'u':
                    898:                                case 'U':
                    899:                                        pch(lxchar);
                    900:                                        break;
                    901:                                default:
                    902:                                        saved=lxchar;
                    903:                                        break;
                    904:                                }
                    905:                                break;
                    906:                        default:
                    907:                                saved = lxchar;
                    908:                                break;
                    909:                        };
                    910: 
                    911:                        if(*txtstart=='0' && txtfree-txtstart==1)
                    912:                                reti(ZERO,0);   // plain zero
                    913: 
                    914:                        pch(0);
                    915:                        rets(ret,txtstart);
                    916: 
                    917: 
                    918:                case A_DOT:
                    919:                        switch (get(lxchar)) {
                    920:                        case '.':       // look for ellipsis
                    921:                                if (get(lxchar) != '.') {
                    922:                                        error("token .. ?");
                    923:                                        saved = lxchar;
                    924:                                }
                    925:                                reti(ELLIPSIS,0);
                    926:                        case '*':
                    927:                                reti (REFMUL,DOT);
                    928:                        }
                    929: 
                    930:                        if (lxmask[lxchar+1] & LEXDIG){// look for floating constant
                    931:                                unget(lxchar);
                    932:                                goto getfp;
                    933:                        }
                    934:                        saved = lxchar;
                    935:                        reti(DOT,0);
                    936: 
                    937:                case A_STR:
                    938:                        /* save string constant in buffer */
                    939:                        get_string();
                    940:                        rets(STRING,txtstart);
                    941: 
                    942:                case A_CC:
                    943:                        /* character constant */
                    944:                        rets(CCON,chconst());
                    945: 
                    946:                case A_BCD:
                    947:                        {
                    948:                                register i;
                    949:                                int j;
                    950:        
                    951:                                pch('`');
                    952:        
                    953:                                for (i=0; i<7; ++i) {
                    954:                                        pch(get(j));
                    955:                                        if (j == '`' ) break;
                    956:                                }
                    957:                                pch(0);
                    958:                                if (6<i)
                    959:                                        error('l',"bcd constant exceeds 6 characters" );
                    960:                                rets(CCON,txtstart);
                    961:                        }
                    962: 
                    963:                case A_SL:      /* / */
                    964:                        switch (get(lxchar))  {
                    965:                        case '*':
                    966:                                lxcom();
                    967:                                break;
                    968:                        case '/':
                    969:                                linecom();
                    970:                                break;
                    971:                        case '=':
                    972:                                reti(ASOP,ASDIV);
                    973:                        default:
                    974:                                saved = lxchar;
                    975:                                reti(DIVOP,DIV);
                    976:                        }
                    977: 
                    978:                case A_WS:
                    979:                        continue;
                    980: 
                    981:                case A_NL:
                    982:                        ++tloc.line;
                    983: //                     Nline++;
                    984:                        saved = lxtitle();
                    985:                        continue;
                    986: 
                    987:                case A_LC:
                    988:                        if (BLMAX <= b_level++) {
                    989:                                error('l',"blocks too deeply nested");
                    990:                                ext(3);
                    991:                        }
                    992:                        retl(LC);
                    993: 
                    994:                case A_RC:
                    995:                        if (lcount+b_level-- <= 0) {
                    996:                                error("unexpected '}'");
                    997:                                b_level = 0;
                    998:                        }
                    999:                        retl(RC);
                   1000: 
                   1001:                case A_L:
                   1002:                        p_level++;
                   1003:                        reti(LP,0);
                   1004: 
                   1005:                case A_R:
                   1006:                        if (p_level-- <= 0) {
                   1007:                                error("unexpected ')'");
                   1008:                                p_level = 0;
                   1009:                        }
                   1010:                        reti(RP,0);
                   1011: 
                   1012:                case A_ASS:
                   1013:                        switch (get(lxchar)) {
                   1014:                        case '=':
                   1015:                                reti(EQUOP,EQ);
                   1016:                        default:
                   1017:                                saved = lxchar;
                   1018:                                reti(ASSIGN,ASSIGN);
                   1019:                        }
                   1020: 
                   1021:                case A_COL:
                   1022:                        switch (get(lxchar)) {
                   1023:                        case ':':
                   1024:                                reti(MEM,0);
                   1025:                        case '=':
                   1026:                                error("':=' is not a c++ operator");
                   1027:                                reti(ASSIGN,ASSIGN);
                   1028:                        default:
                   1029:                                saved = lxchar;
                   1030:                                reti(COLON,COLON);
                   1031:                        }
                   1032:                case A_NOT:
                   1033:                        switch (get(lxchar)) {
                   1034:                        case '=':
                   1035:                                reti(EQUOP,NE);
                   1036:                        default:
                   1037:                                saved = lxchar;
                   1038:                                reti(NOT,NOT);
                   1039:                        }
                   1040:                case A_GT:
                   1041:                        switch(get(lxchar)) {
                   1042:                        case '>':
                   1043:                                switch (get(lxchar)) {
                   1044:                                case '=':
                   1045:                                        reti(ASOP,ASRS);
                   1046:                                        break;
                   1047:                                default:
                   1048:                                        saved = lxchar;
                   1049:                                        reti(SHIFTOP,RS);
                   1050:                                }
                   1051:                        case '=':
                   1052:                                reti(RELOP,GE);
                   1053:                        default:
                   1054:                                saved = lxchar;
                   1055:                                reti(RELOP,GT);
                   1056:                        }
                   1057:                case A_LT:
                   1058:                        switch (get(lxchar)) {
                   1059:                        case '<':
                   1060:                                switch (get(lxchar)) {
                   1061:                                case '=':
                   1062:                                        reti(ASOP,ASLS);
                   1063:                                default:
                   1064:                                        saved = lxchar;
                   1065:                                        reti(SHIFTOP,LS);
                   1066:                                }
                   1067:                        case '=':
                   1068:                                reti(RELOP,LE);
                   1069:                        default:
                   1070:                                saved = lxchar;
                   1071:                                reti(RELOP,LT);
                   1072:                        }
                   1073:                case A_AND:
                   1074:                        switch (get(lxchar)) {
                   1075:                        case '&':
                   1076:                                reti(ANDAND,ANDAND);
                   1077:                        case '=':
                   1078:                                reti(ASOP,ASAND);
                   1079:                        default:
                   1080:                                saved = lxchar;
                   1081:                                reti(AND,AND);
                   1082:                        }
                   1083:                case A_OR:
                   1084:                        switch (get(lxchar)) {
                   1085:                        case '|':
                   1086:                                reti(OROR,OROR);
                   1087:                        case '=':
                   1088:                                reti(ASOP,ASOR);
                   1089:                        default:
                   1090:                                saved = lxchar;
                   1091:                                reti(OR,OR);
                   1092:                        }
                   1093:                case A_ER:
                   1094:                        switch (get(lxchar)) {
                   1095:                        case '=':
                   1096:                                reti(ASOP,ASER);
                   1097:                        default:
                   1098:                                saved = lxchar;
                   1099:                                reti(ER,ER);
                   1100:                        }
                   1101:                case A_PL:
                   1102:                        switch (get(lxchar)) {
                   1103:                        case '=':
                   1104:                                reti(ASOP,ASPLUS);
                   1105:                        case '+':
                   1106:                                reti(ICOP,INCR);
                   1107:                        default:
                   1108:                                saved = lxchar;
                   1109:                                reti(PLUS,PLUS);
                   1110:                        }
                   1111:                case A_MIN:
                   1112:                        switch (get(lxchar)) {
                   1113:                        case '=':
                   1114:                                reti(ASOP,ASMINUS);
                   1115:                        case '-':
                   1116:                                reti(ICOP,DECR);
                   1117:                        case '>':
                   1118:                                if (get(lxchar) == '*')
                   1119:                                        {reti(REFMUL,REF);}
                   1120:                                else
                   1121:                                        saved = lxchar;
                   1122:                                reti(REF,REF);
                   1123:                        default:
                   1124:                                saved = lxchar;
                   1125:                                reti(MINUS,MINUS);
                   1126:                        }
                   1127:                case A_MUL:
                   1128:                        switch (get(lxchar)) {
                   1129:                        case '=':
                   1130:                                reti(ASOP,ASMUL);
                   1131:                        case '/':
                   1132:                                error('w',"*/ not as end of comment");
                   1133:                        default:
                   1134:                                saved = lxchar;
                   1135:                                reti(MUL,MUL);
                   1136:                        }
                   1137:                case A_MOD:
                   1138:                        switch (get(lxchar)) {
                   1139:                        case '=':
                   1140:                                reti(ASOP,ASMOD);
                   1141:                        default:
                   1142:                                saved = lxchar;
                   1143:                                reti(DIVOP,MOD);
                   1144:                        }
                   1145:                default:
                   1146:                        {error('i',"lex act==%d getc()->%d",p,lxchar);}
                   1147: 
                   1148:                }
                   1149: 
                   1150:                error('i',"lex, main switch");
                   1151:        }
                   1152: 
                   1153: }
                   1154: 
                   1155: int lxtitle()
                   1156: /*
                   1157:        called after a newline; set linenumber and file name
                   1158: */
                   1159: {
                   1160:        register c;
                   1161: 
                   1162:        for(;;)
                   1163:        switch ( get(c) ) {
                   1164:        default:                // e.g. not '\n', not '#'
                   1165:                return c;
                   1166:        case '\n':
                   1167:                tloc.line++;
                   1168: //             Nline++;
                   1169:        ll:
                   1170:                break;
                   1171:        case '#':       /* # lineno "filename" */
                   1172:        {       int cl = tloc.line;
                   1173:                tloc.line = 0;
                   1174:                for(;;) 
                   1175:                switch (get(c)) {
                   1176:                case '"':
                   1177:                        start_txt();
                   1178:                        for(;;)
                   1179:                        switch (get(c)) {
                   1180:                        case '"':
                   1181:                                pch('\0');
                   1182: 
                   1183:                                while (get(c) != '\n') ;  // skip to eol.. ignore anything more
                   1184: 
                   1185:                                if (*txtstart) {        // stack file name
                   1186:                                        char* fn;
                   1187:                                        if (tcurr_file == 0){
                   1188:                                                if (( fn = file_name[0]) 
                   1189:                                                        && (strcmp(txtstart,fn)!=0)){  // 1st include
                   1190:                                                        if (MAXFILE*4<++Nfile) error('i',"fileN buffer overflow");
                   1191:                                                        if (MAXFILE<++tcurr_file) error('i',"fileN stack overflow");
                   1192:                                                        file_stack[tcurr_file] = Nfile;
                   1193:                                                
                   1194:                                                        char* p1 = new char[txtfree-txtstart];
                   1195:                                                        (void) strcpy(p1,txtstart);
                   1196:                                                        file_name[Nfile] = p1;
                   1197:        //                                              Nstr++;
                   1198:                                                }
                   1199:                                                else { //&& line is dummy #line "input.c"
                   1200:                                                        // ignore
                   1201:                                                }
                   1202: //&& dead, dead, dead                  goto push;
                   1203:                                        }
                   1204:                                        else if ( (fn=file_name[file_stack[tcurr_file]])
                   1205:                                        && (strcmp(txtstart,fn)==0) ) {
                   1206:                                                //new line, same file: ignore
                   1207:                                        }
                   1208:                                        else if ( (fn=file_name[file_stack[tcurr_file-1]])
                   1209:                                        && (strcmp(txtstart,fn)==0) ) {
                   1210:                                                // previous file: pop
                   1211:                                                tcurr_file--;
                   1212:                                        }
                   1213:                                        else {  // new file name: push
                   1214: //&&                           push:
                   1215:                                                if (MAXFILE*4<Nfile++) error('i',"fileN buffer overflow");
                   1216:                                                if (MAXFILE<tcurr_file++) error('i',"fileN stack overflow");
                   1217:                                                file_stack[tcurr_file] = Nfile;
                   1218:                                                char* p = new char[txtfree-txtstart];
                   1219:                                                (void) strcpy(p,txtstart);
                   1220:                                                file_name[Nfile] = p;
                   1221: //                                             Nstr++;
                   1222:                                        }
                   1223:                                }
                   1224:                                else {  // no name .. back to the original .c file: ""
                   1225:                                        tcurr_file = 0;
                   1226:                                }
                   1227:                                del_txt();
                   1228:                                tloc.file = file_stack[tcurr_file];
                   1229:                                goto ll;
                   1230:                        case '\n':
                   1231:                                error("unexpected end of line on '# line'");
                   1232:                        default:
                   1233:                                pch(c);
                   1234:                        }
                   1235:                case ' ':
                   1236:                        break;
                   1237: 
                   1238:                case '0':
                   1239:                case '1':
                   1240:                case '2':
                   1241:                case '3':
                   1242:                case '4':
                   1243:                case '5':
                   1244:                case '6':
                   1245:                case '7':
                   1246:                case '8':
                   1247:                case '9':
                   1248:                        tloc.line = tloc.line*10+c-'0'; 
                   1249:                        break;
                   1250: 
                   1251:                case 'l':       // look for "#line ..." and then ignore "line"
                   1252:                        if (get(c)=='i' && get(c)=='n' && get(c)=='e') break;
                   1253:                case '\n':
                   1254:                         tloc.putline();
                   1255:                         goto ll;
                   1256: 
                   1257:                default: // pass #rubbish through
                   1258:                        tloc.line = cl;
                   1259:                        pch('#');
                   1260:                        pch(c);
                   1261:                        while (get(c) != '\n') pch(c);
                   1262:                        pch('\0');
                   1263:                        fprintf(out_file,"\n%s\n",txtstart);
                   1264:                        start_txt();
                   1265:                        tloc.line++;
                   1266: //                     Nline++;
                   1267:                        goto ll;
                   1268:                }
                   1269:        }
                   1270:        }
                   1271: }
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.