43BSD/contrib/icon/tran/lex.c - annotate

Return to lex.c CVS log
Up to [CSRG BSD Unix] / 43BSD / contrib / icon / tran
Annotation of 43BSD/contrib/icon/tran/lex.c, revision 1.1

1.1     ! root        1: /*
        !             2:  * The lexical analyzer.
        !             3:  */
        !             4: 
        !             5: #include "itran.h"
        !             6: #include "token.h"
        !             7: #include "lex.h"
        !             8: #include "char.h"
        !             9: #include "tree.h"
        !            10: 
        !            11: int tline;
        !            12: int tcol;
        !            13: 
        !            14: /*
        !            15:  * yylex - find the next token in the input stream, and return its token
        !            16:  *  type and value to the parser.
        !            17:  *
        !            18:  * Variables of interest:
        !            19:  *
        !            20:  *  cc - character following last token.
        !            21:  *  comflag - set if in a comment.
        !            22:  *  nlflag - set if a newline was between the last token and the current token
        !            23:  *  lastend - set if the last token was an ENDER.
        !            24:  *  lastval - when a semicolon is inserted and returned, lastval gets the
        !            25:  *   token value that would have been returned if the semicolon hadn't
        !            26:  *   been inserted.
        !            27:  */
        !            28: 
        !            29: yylex()
        !            30:    {
        !            31:    register struct toktab *t;
        !            32:    register int c;
        !            33:    int nlflag;
        !            34:    int comflag;
        !            35:    static struct toktab *lasttok = NULL;
        !            36:    static nodeptr lastval;
        !            37:    static int lastend = 0;
        !            38:    static int eofflag = 0;
        !            39:    static int lastline = 0;
        !            40:    static int cc = '\n';
        !            41:    extern struct toktab *getident(), *getnum(), *getstring(), *getop();
        !            42: 
        !            43:    if (lasttok != NULL) {
        !            44:       /*
        !            45:        * A semicolon was inserted and returned on the last call to yylex,
        !            46:        *  instead of going to the input, return lasttok and set the
        !            47:        *  appropriate variables.
        !            48:        */
        !            49:       yylval = lastval;
        !            50:       tline = LINE(lastval);
        !            51:       tcol = COL(lastval);
        !            52:       t = lasttok;
        !            53:       goto ret;
        !            54:       }
        !            55:    nlflag = 0;
        !            56:    comflag = 0;
        !            57: loop:
        !            58:    c = cc;
        !            59:    /*
        !            60:     * Skip whitespace and comments.
        !            61:     */
        !            62:    while (c != EOF && (comflag || c == COMMENT || isspace(c))) {
        !            63:       if (c == '\n') {
        !            64:          nlflag++;
        !            65:          comflag = 0;
        !            66:          }
        !            67:       else if (c == COMMENT)
        !            68:          comflag++;
        !            69:       c = NEXTCHAR;
        !            70:       }
        !            71:    /*
        !            72:     * A token is the next thing in the input.  Record the last line number
        !            73:     *  and set tline and tcol to the current line and column.
        !            74:     */
        !            75:    lastline = tline;
        !            76:    tline = inline;
        !            77:    tcol = incol;
        !            78: 
        !            79:    if (c == EOF) {
        !            80:       /*
        !            81:        * End of file has been reached.  Set eofflag, return T_EOF, and
        !            82:        *  set cc to EOF so that any subsequent scans also return T_EOF.
        !            83:        */
        !            84:       if (eofflag++) {
        !            85:          eofflag = 0;
        !            86:          cc = '\n';
        !            87:          return (int) (yylval = 0);
        !            88:          }
        !            89:       cc = EOF;
        !            90:       t = T_EOF;
        !            91:       yylval = 0;
        !            92:       goto ret;
        !            93:       }
        !            94: 
        !            95:    /*
        !            96:     * Look at current input character to determine what class of token
        !            97:     *  is next and take the appropriate action.  Note that the various
        !            98:     *  token gathering routines write a value into cc.
        !            99:     */
        !           100:    c = ctran[c];
        !           101:    if (isalpha(c)) {                    /* gather ident or reserved word */
        !           102:       if ((t = getident(c, &cc)) == NULL)
        !           103:          goto loop;
        !           104:       }
        !           105:    else if (isdigit(c)) {               /* gather numeric literal */
        !           106:       if ((t = getnum(c, &cc)) == NULL)
        !           107:          goto loop;
        !           108:       }
        !           109:    else if (c == '"' || c == '\'') {    /* gather string or cset literal */
        !           110:       if ((t = getstring(c, &cc)) == NULL)
        !           111:          goto loop;
        !           112:       }
        !           113:    else {                      /* gather longest legal operator */
        !           114:       if ((t = getop(c, &cc)) == NULL)
        !           115:          goto loop;
        !           116:       yylval = OPNODE(t->t_type);
        !           117:       }
        !           118:    if (nlflag && lastend && (t->t_flags & BEGINNER)) {
        !           119:       /*
        !           120:        * A newline was encountered between the current token and the last,
        !           121:        *  the last token was an ENDER, and the current token is a BEGINNER.
        !           122:        *  Return a semicolon and save the current token in lastval.
        !           123:        */
        !           124:       lastval = yylval;
        !           125:       lasttok = t;
        !           126:       tline = lastline;
        !           127:       tcol = 0;
        !           128:       yylval = OPNODE(SEMICOL);
        !           129:       return (SEMICOL);
        !           130:       }
        !           131: ret:
        !           132:    /*
        !           133:     * Clear lasttok, set lastend if the token being returned is an
        !           134:     *  ENDER, and return the token.
        !           135:     */
        !           136:    lasttok = 0;
        !           137:    lastend = t->t_flags & ENDER;
        !           138:    return (t->t_type);
        !           139:    }
        !           140: 
        !           141: /*
        !           142:  * getident - gather an identifier beginning with ac.  The character
        !           143:  *  following identifier goes in cc.
        !           144:  */
        !           145: 
        !           146: struct toktab *getident(ac, cc)
        !           147: char ac;
        !           148: int *cc;
        !           149:    {
        !           150:    register c;
        !           151:    register char *p;
        !           152:    register struct toktab *t;
        !           153:    extern char *putident();
        !           154:    extern struct toktab *findres();
        !           155: 
        !           156:    c = ac;
        !           157:    p = sfree;
        !           158:    /*
        !           159:     * Copy characters into string space until a non-alphanumeric character
        !           160:     *  is found.
        !           161:     */
        !           162:    do {
        !           163:       if (p >= send)
        !           164:          syserr("out of string space");
        !           165:       *p++ = c;
        !           166:       c = ctran[NEXTCHAR];
        !           167:       } while (isalnum(c));
        !           168:    if (p >= send)
        !           169:       syserr("out of string space");
        !           170:    *p++ = 0;
        !           171:    *cc = c;
        !           172:    /*
        !           173:     * If the identifier is a reserved word, make a RESNODE for it and return
        !           174:     *  the token value.  Otherwise, install it with putident, make an
        !           175:     *  IDNODE for it, and return.
        !           176:     */
        !           177:    if ((t = findres()) != NULL) {
        !           178:       yylval = RESNODE(t->t_type);
        !           179:       return (t);
        !           180:       }
        !           181:    else {
        !           182:       yylval = IDNODE((int)putident(p-sfree));
        !           183:       return (T_IDENT);
        !           184:       }
        !           185:    }
        !           186: 
        !           187: /*
        !           188:  * findres - if the string just copied into the string space by getident
        !           189:  *  is a reserved word, return a pointer to its entry in the token table.
        !           190:  *  Return NULL if the string isn't a reserved word.
        !           191:  */
        !           192: 
        !           193: struct toktab *findres()
        !           194:    {
        !           195:    register struct toktab *t;
        !           196:    register char c, *p;
        !           197: 
        !           198:    p = sfree;
        !           199:    c = *p;
        !           200:    if (!islower(c))
        !           201:       return (NULL);
        !           202:    /*
        !           203:     * Point t at first reserved word that starts with c (if any).
        !           204:     */
        !           205:    if ((t = restab[c - '_']) == NULL)
        !           206:       return (NULL);
        !           207:    /*
        !           208:     * Search through reserved words, stopping when a match is found
        !           209:     *  or when the current reserved word doesn't start with c.
        !           210:     */
        !           211:    while (t->t_word[0] == c) {
        !           212:       if (strcmp(t->t_word, p) == 0)
        !           213:          return (t);
        !           214:       t++;
        !           215:       }
        !           216:    return (NULL);
        !           217:    }
        !           218: 
        !           219: /*
        !           220:  * getnum - gather a numeric literal starting with ac and put the
        !           221:  *  character following the literal into *cc.
        !           222:  */
        !           223: 
        !           224: struct toktab *getnum(ac, cc)
        !           225: char ac;
        !           226: int *cc;
        !           227:    {
        !           228:    register c;
        !           229:    register r;
        !           230:    register state;
        !           231:    char *p;
        !           232:    int realflag;
        !           233:    extern char *putident();
        !           234: 
        !           235:    c = ac;
        !           236:    r = tonum(c);
        !           237:    p = sfree;
        !           238:    state = 0;
        !           239:    realflag = 0;
        !           240:    for (;;) {
        !           241:       if (p >= send)
        !           242:          syserr("out of string space");
        !           243:       *p++ = c;
        !           244:       c = ctran[NEXTCHAR];
        !           245:       switch (state) {
        !           246:          case 0:               /* integer part */
        !           247:             if (isdigit(c))         { r = r * 10 + tonum(c); continue; }
        !           248:             if (c == '.')           { state = 1; realflag++; continue; }
        !           249:             if (tolower(c) == 'e')  { state = 2; realflag++; continue; }
        !           250:             if (tolower(c) == 'r')  {
        !           251:                state = 5;
        !           252:                if (r < 2 || r > 36)
        !           253:                   err("invalid radix for integer literal", 0);
        !           254:                continue;
        !           255:                }
        !           256:             break;
        !           257:          case 1:               /* fractional part */
        !           258:             if (isdigit(c))   continue;
        !           259:             if (tolower(c) == 'e')   { state = 2; continue; }
        !           260:             break;
        !           261:          case 2:               /* optional exponent sign */
        !           262:             if (c == '+' || c == '-') { state = 3; continue; }
        !           263:          case 3:               /* first digit after e, e+, or e- */
        !           264:             if (isdigit(c)) { state = 4; continue; }
        !           265:             err("invalid real literal", 0);
        !           266:             break;
        !           267:          case 4:               /* remaining digits after e */
        !           268:             if (isdigit(c))   continue;
        !           269:             break;
        !           270:          case 5:               /* first digit after r */
        !           271:             if ((isdigit(c) || isletter(c)) && tonum(c) < r)
        !           272:                { state = 6; continue; }
        !           273:             err("invalid integer literal", 0);
        !           274:             break;
        !           275:          case 6:               /* remaining digits after r */
        !           276:             if (isdigit(c) || isletter(c)) {
        !           277:                if (tonum(c) >= r) {    /* illegal digit for radix r */
        !           278:                   err("invalid digit in integer literal", 0);
        !           279:                   r = tonum('z');      /* prevent more messages */
        !           280:                   }
        !           281:                continue;
        !           282:                }
        !           283:             break;
        !           284:          }
        !           285:       break;
        !           286:       }
        !           287:    if (p >= send)
        !           288:       syserr("out of string space");
        !           289:    *p++ = 0;
        !           290:    *cc = c;
        !           291:    if (realflag) {
        !           292:       yylval = REALNODE((int)putident(p-sfree));
        !           293:       return (T_REAL);
        !           294:       }
        !           295:    yylval = INTNODE((int)putident(p-sfree));
        !           296:    return (T_INT);
        !           297:    }
        !           298: 
        !           299: /*
        !           300:  * getstring - gather a string literal starting with ac and place the
        !           301:  *  character following the literal in *cc.
        !           302:  */
        !           303: 
        !           304: struct toktab *getstring(ac, cc)
        !           305: char ac;
        !           306: int *cc;
        !           307:    {
        !           308:    register c, sc;
        !           309:    register char *p;
        !           310:    char *lc;
        !           311:    extern char *putident();
        !           312: 
        !           313:    sc = c = ac;
        !           314:    p = sfree;
        !           315:    lc = 0;
        !           316:    while ((c = NEXTCHAR) != sc && c != '\n' && c != EOF) {
        !           317:    contin:
        !           318:       if (c == '_')
        !           319:          lc = p;
        !           320:       else if (!isspace(c))
        !           321:          lc = 0;
        !           322:       if (ctran[c] == ESCAPE) {
        !           323:          c = NEXTCHAR;
        !           324:          if (isoctal(c))
        !           325:             c = octesc(c);
        !           326:          else if (ctran[c] == 'x')
        !           327:             c = hexesc();
        !           328:          else if (ctran[c] == '^')
        !           329:             c = ctlesc();
        !           330:          else
        !           331:             c = esctab[c];
        !           332:          if (c == EOF)
        !           333:             goto noquote;
        !           334:          }
        !           335:       if (p >= send)
        !           336:          syserr("out of string space");
        !           337:       *p++ = c;
        !           338:       }
        !           339:    if (p >= send)
        !           340:       syserr("out of string space");
        !           341:    *p++ = 0;
        !           342:    if (c == sc)
        !           343:       *cc = ' ';
        !           344:    else {
        !           345:       if (c == '\n' && lc) {
        !           346:          p = lc;
        !           347:          while ((c = NEXTCHAR) != EOF && isspace(c)) ;
        !           348:          if (c != EOF)
        !           349:             goto contin;
        !           350:          }
        !           351: noquote:
        !           352:       err("unclosed quote", 0);
        !           353:       *cc = c;
        !           354:       }
        !           355:    if (ac == '"') {    /* a string literal */
        !           356:       yylval = STRNODE((int)putident(p-sfree), p-sfree);
        !           357:       return (T_STRING);
        !           358:       }
        !           359:    else {              /* a cset literal */
        !           360:       yylval = CSETNODE((int)putident(p-sfree), p-sfree);
        !           361:       return (T_CSET);
        !           362:       }
        !           363:    }
        !           364: 
        !           365: /*
        !           366:  * ctlesc - translate a control escape -- backslash followed by
        !           367:  *  caret and one character.
        !           368:  */
        !           369: 
        !           370: ctlesc()
        !           371:    {
        !           372:    register c;
        !           373: 
        !           374:    c = NEXTCHAR;
        !           375:    if (c == EOF)
        !           376:       return (EOF);
        !           377:    return (c & 037);
        !           378:    }
        !           379: 
        !           380: /*
        !           381:  * octesc - translate an octal escape -- backslash followed by
        !           382:  *  one, two, or three octal digits.
        !           383:  */
        !           384: 
        !           385: octesc(ac)
        !           386: char ac;
        !           387:    {
        !           388:    register c, nc, i;
        !           389: 
        !           390:    c = 0;
        !           391:    nc = ac;
        !           392:    i = 1;
        !           393:    do {
        !           394:       c = (c << 3) | (nc - '0');
        !           395:       nc = NEXTCHAR;
        !           396:       if (nc == EOF)
        !           397:          return (EOF);
        !           398:       } while (isoctal(nc) && i++ < 3);
        !           399:    PUSHCHAR(nc);
        !           400:    return (c & 0377);
        !           401:    }
        !           402: 
        !           403: /*
        !           404:  * hexesc - translate a hexadecimal escape -- backslash-x
        !           405:  *  followed by one or two hexadecimal digits.
        !           406:  */
        !           407: 
        !           408: hexesc()
        !           409:    {
        !           410:    register c, nc, i;
        !           411: 
        !           412:    c = 0;
        !           413:    i = 0;
        !           414:    while (i++ < 2) {
        !           415:       nc = NEXTCHAR;
        !           416:       if (nc == EOF)
        !           417:          return (EOF);
        !           418:       if (nc >= 'a' && nc <= 'f')
        !           419:          nc -= 'a' - 10;
        !           420:       else if (nc >= 'A' && nc <= 'F')
        !           421:          nc -= 'A' - 10;
        !           422:       else if (isdigit(nc))
        !           423:          nc -= '0';
        !           424:       else {
        !           425:          PUSHCHAR(nc);
        !           426:          break;
        !           427:          }
        !           428:       c = (c << 4) | nc;
        !           429:       }
        !           430:    return (c);
        !           431:    }
        !           432: 
        !           433: /*
        !           434:  * getop - find the longest legal operator and return a pointer
        !           435:  *  to its entry in the token table.  The tour describes the
        !           436:  *  operator recognition process in detail.
        !           437:  */
        !           438: 
        !           439: struct toktab *getop(ac, cc)
        !           440: char ac;
        !           441: int *cc;
        !           442:    {
        !           443:    register struct optab *state;
        !           444:    register char c, i;
        !           445: 
        !           446:    state = state0;
        !           447:    c = ac;
        !           448:    for (;;) {
        !           449:       while ((i = state->o_input) && c != i)
        !           450:          state++;
        !           451:       switch (state->o_action) {
        !           452:          case A_GOTO:
        !           453:             state = (struct optab *) state->o_val;
        !           454:             c = ctran[NEXTCHAR];
        !           455:             continue;
        !           456:          case A_ERROR:
        !           457:             err("invalid character", 0);
        !           458:             *cc = ' ';
        !           459:             return (NULL);
        !           460:          case A_RETURN:
        !           461:             *cc = c;
        !           462:             return (struct toktab *) (state->o_val);
        !           463:          case A_IMMRET:
        !           464:             *cc = ' ';
        !           465:             return (struct toktab *) (state->o_val);
        !           466:          }
        !           467:       }
        !           468:    }
        !           469: 
        !           470: /*
        !           471:  * nextchar - return the next character in the input.
        !           472:  */
        !           473: 
        !           474: nextchar()
        !           475:    {
        !           476:    register char c;
        !           477: 
        !           478:    if (c = peekc) {
        !           479:       peekc = 0;
        !           480:       return (c);
        !           481:       }
        !           482:    c = getc(infile);
        !           483:    switch (c) {
        !           484:       case EOF:
        !           485:          inline = 0;
        !           486:          incol = 0;
        !           487:          break;
        !           488:       case '\n':
        !           489:          inline++;
        !           490:          incol = 0;
        !           491:          break;
        !           492:       case '\t':
        !           493:          incol = (incol | 7) + 1;
        !           494:          break;
        !           495:       case '\b':
        !           496:          if (incol)
        !           497:             incol--;
        !           498:          break;
        !           499:       default:
        !           500:          incol++;
        !           501:       }
        !           502:    return (c);
        !           503:    }
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.