GNUtools/bison/lex.c - annotate

Return to lex.c CVS log
Up to [Apple XNU] / GNUtools / bison
Annotation of GNUtools/bison/lex.c, revision 1.1

1.1     ! root        1: /* Token-reader for Bison's input parser,
        !             2:    Copyright (C) 1984, 1986, 1989 Free Software Foundation, Inc.
        !             3: 
        !             4: This file is part of Bison, the GNU Compiler Compiler.
        !             5: 
        !             6: Bison is free software; you can redistribute it and/or modify
        !             7: it under the terms of the GNU General Public License as published by
        !             8: the Free Software Foundation; either version 2, or (at your option)
        !             9: any later version.
        !            10: 
        !            11: Bison is distributed in the hope that it will be useful,
        !            12: but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        !            14: GNU General Public License for more details.
        !            15: 
        !            16: You should have received a copy of the GNU General Public License
        !            17: along with Bison; see the file COPYING.  If not, write to
        !            18: the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
        !            19: 
        !            20: 
        !            21: /* 
        !            22:    lex() is the entry point.  It is called from reader.c.
        !            23:    It returns one of the token-type codes defined in lex.h.
        !            24:    When an identifier is seen, the code IDENTIFIER is returned
        !            25:    and the name is looked up in the symbol table using symtab.c;
        !            26:    symval is set to a pointer to the entry found.  */
        !            27: 
        !            28: #include <stdio.h>
        !            29: #include <ctype.h>
        !            30: #include "system.h"
        !            31: #include "files.h"
        !            32: #include "symtab.h"
        !            33: #include "lex.h"
        !            34: #include "new.h"
        !            35: 
        !            36: 
        !            37: extern int lineno;
        !            38: extern int translations;
        !            39: 
        !            40: int parse_percent_token();
        !            41: 
        !            42: extern void fatals();
        !            43: extern void fatal();
        !            44: 
        !            45: /* Buffer for storing the current token.  */
        !            46: char *token_buffer;
        !            47: 
        !            48: /* Allocated size of token_buffer, not including space for terminator.  */
        !            49: static int maxtoken;
        !            50: 
        !            51: bucket *symval;
        !            52: int numval;
        !            53: 
        !            54: static int unlexed;            /* these two describe a token to be reread */
        !            55: static bucket *unlexed_symval; /* by the next call to lex */
        !            56: 
        !            57: 
        !            58: void
        !            59: init_lex()
        !            60: {
        !            61:   maxtoken = 100;
        !            62:   token_buffer = NEW2 (maxtoken + 1, char);
        !            63:   unlexed = -1;
        !            64: }
        !            65: 
        !            66: 
        !            67: static char *
        !            68: grow_token_buffer (p)
        !            69:      char *p;
        !            70: {
        !            71:   int offset = p - token_buffer;
        !            72:   maxtoken *= 2;
        !            73:   token_buffer = (char *) xrealloc(token_buffer, maxtoken + 1);
        !            74:   return token_buffer + offset;
        !            75: }
        !            76: 
        !            77: 
        !            78: int
        !            79: skip_white_space()
        !            80: {
        !            81:   register int c;
        !            82:   register int inside;
        !            83: 
        !            84:   c = getc(finput);
        !            85: 
        !            86:   for (;;)
        !            87:     {
        !            88:       int cplus_comment;
        !            89: 
        !            90:       switch (c)
        !            91:        {
        !            92:        case '/':
        !            93:          c = getc(finput);
        !            94:          if (c != '*' && c != '/')
        !            95:            fatals("unexpected `/%c' found",c);
        !            96:          cplus_comment = (c == '/');
        !            97: 
        !            98:          c = getc(finput);
        !            99: 
        !           100:          inside = 1;
        !           101:          while (inside)
        !           102:            {
        !           103:              if (!cplus_comment && c == '*')
        !           104:                {
        !           105:                  while (c == '*')
        !           106:                    c = getc(finput);
        !           107: 
        !           108:                  if (c == '/')
        !           109:                    {
        !           110:                      inside = 0;
        !           111:                      c = getc(finput);
        !           112:                    }
        !           113:                }
        !           114:              else if (c == '\n')
        !           115:                {
        !           116:                  lineno++;
        !           117:                  if (cplus_comment)
        !           118:                    inside = 0;
        !           119:                  c = getc(finput);
        !           120:                }
        !           121:              else if (c == EOF)
        !           122:                fatal("unterminated comment");
        !           123:              else
        !           124:                c = getc(finput);
        !           125:            }
        !           126: 
        !           127:          break;
        !           128: 
        !           129:        case '\n':
        !           130:          lineno++;
        !           131: 
        !           132:        case ' ':
        !           133:        case '\t':
        !           134:        case '\f':
        !           135:          c = getc(finput);
        !           136:          break;
        !           137: 
        !           138:        default:
        !           139:          return (c);
        !           140:        }
        !           141:     }
        !           142: }
        !           143: 
        !           144: 
        !           145: void
        !           146: unlex(token)
        !           147: int token;
        !           148: {
        !           149:   unlexed = token;
        !           150:   unlexed_symval = symval;
        !           151: }
        !           152: 
        !           153: 
        !           154: 
        !           155: int
        !           156: lex()
        !           157: {
        !           158:   register int c;
        !           159:   register char *p;
        !           160: 
        !           161:   if (unlexed >= 0)
        !           162:     {
        !           163:       symval = unlexed_symval;
        !           164:       c = unlexed;
        !           165:       unlexed = -1;
        !           166:       return (c);
        !           167:     }
        !           168: 
        !           169:   c = skip_white_space();
        !           170: 
        !           171:   switch (c)
        !           172:     {
        !           173:     case EOF:
        !           174:       return (ENDFILE);
        !           175: 
        !           176:     case 'A':  case 'B':  case 'C':  case 'D':  case 'E':
        !           177:     case 'F':  case 'G':  case 'H':  case 'I':  case 'J':
        !           178:     case 'K':  case 'L':  case 'M':  case 'N':  case 'O':
        !           179:     case 'P':  case 'Q':  case 'R':  case 'S':  case 'T':
        !           180:     case 'U':  case 'V':  case 'W':  case 'X':  case 'Y':
        !           181:     case 'Z':
        !           182:     case 'a':  case 'b':  case 'c':  case 'd':  case 'e':
        !           183:     case 'f':  case 'g':  case 'h':  case 'i':  case 'j':
        !           184:     case 'k':  case 'l':  case 'm':  case 'n':  case 'o':
        !           185:     case 'p':  case 'q':  case 'r':  case 's':  case 't':
        !           186:     case 'u':  case 'v':  case 'w':  case 'x':  case 'y':
        !           187:     case 'z':
        !           188:     case '.':  case '_':
        !           189:       p = token_buffer;
        !           190:       while (isalnum(c) || c == '_' || c == '.')
        !           191:        {
        !           192:          if (p == token_buffer + maxtoken)
        !           193:            p = grow_token_buffer(p);
        !           194: 
        !           195:          *p++ = c;
        !           196:          c = getc(finput);
        !           197:        }
        !           198: 
        !           199:       *p = 0;
        !           200:       ungetc(c, finput);
        !           201:       symval = getsym(token_buffer);
        !           202:       return (IDENTIFIER);
        !           203: 
        !           204:     case '0':  case '1':  case '2':  case '3':  case '4':
        !           205:     case '5':  case '6':  case '7':  case '8':  case '9':
        !           206:       {
        !           207:        numval = 0;
        !           208: 
        !           209:        while (isdigit(c))
        !           210:          {
        !           211:            numval = numval*10 + c - '0';
        !           212:            c = getc(finput);
        !           213:          }
        !           214:        ungetc(c, finput);
        !           215:        return (NUMBER);
        !           216:       }
        !           217: 
        !           218:     case '\'':
        !           219:       translations = -1;
        !           220: 
        !           221:       /* parse the literal token and compute character code in  code  */
        !           222: 
        !           223:       c = getc(finput);
        !           224:       {
        !           225:        register int code = 0;
        !           226: 
        !           227:        if (c == '\\')
        !           228:          {
        !           229:            c = getc(finput);
        !           230: 
        !           231:            if (c <= '7' && c >= '0')
        !           232:              {
        !           233:                while (c <= '7' && c >= '0')
        !           234:                  {
        !           235:                    code = (code * 8) + (c - '0');
        !           236:                    c = getc(finput);
        !           237:                    if (code >= 256 || code < 0)
        !           238:                      fatals("malformatted literal token `\\%03o'", code);
        !           239:                  }
        !           240:              }
        !           241:            else
        !           242:              {
        !           243:                if (c == 't')
        !           244:                  code = '\t';
        !           245:                else if (c == 'n')
        !           246:                  code = '\n';
        !           247:                else if (c == 'a')
        !           248:                  code = '\007';
        !           249:                else if (c == 'r')
        !           250:                  code = '\r';
        !           251:                else if (c == 'f')
        !           252:                  code = '\f';
        !           253:                else if (c == 'b')
        !           254:                  code = '\b';
        !           255:                else if (c == 'v')
        !           256:                  code = 013;
        !           257:                else if (c == 'x')
        !           258:                  {
        !           259:                    c = getc(finput);
        !           260:                    while ((c <= '9' && c >= '0')
        !           261:                           || (c >= 'a' && c <= 'z')
        !           262:                           || (c >= 'A' && c <= 'Z'))
        !           263:                      {
        !           264:                        code *= 16;
        !           265:                        if (c <= '9' && c >= '0')
        !           266:                          code += c - '0';
        !           267:                        else if (c >= 'a' && c <= 'z')
        !           268:                          code += c - 'a' + 10;
        !           269:                        else if (c >= 'A' && c <= 'Z')
        !           270:                          code += c - 'A' + 10;
        !           271:                        if (code >= 256 || code<0)/* JF this said if(c>=128) */
        !           272:                          fatals("malformatted literal token `\\x%x'",code);
        !           273:                        c = getc(finput);
        !           274:                      }
        !           275:                    ungetc(c, finput);
        !           276:                  }
        !           277:                else if (c == '\\')
        !           278:                  code = '\\';
        !           279:                else if (c == '\'')
        !           280:                  code = '\'';
        !           281:                else if (c == '\"')     /* JF this is a good idea */
        !           282:                  code = '\"';
        !           283:                else
        !           284:                  {
        !           285:                    if (c >= 040 && c <= 0177)
        !           286:                      fatals ("unknown escape sequence `\\%c'", c);
        !           287:                    else
        !           288:                      fatals ("unknown escape sequence: `\\' followed by char code 0x%x", c);
        !           289:                  }
        !           290: 
        !           291:                c = getc(finput);
        !           292:              }
        !           293:          }
        !           294:        else
        !           295:          {
        !           296:            code = c;
        !           297:            c = getc(finput);
        !           298:          }
        !           299:        if (c != '\'')
        !           300:          fatal("multicharacter literal tokens not supported");
        !           301: 
        !           302:        /* now fill token_buffer with the canonical name for this character
        !           303:           as a literal token.  Do not use what the user typed,
        !           304:           so that '\012' and '\n' can be interchangeable.  */
        !           305: 
        !           306:        p = token_buffer;
        !           307:        *p++ = '\'';
        !           308:        if (code == '\\')
        !           309:          {
        !           310:            *p++ = '\\';
        !           311:            *p++ = '\\';
        !           312:          }
        !           313:        else if (code == '\'')
        !           314:          {
        !           315:            *p++ = '\\';
        !           316:            *p++ = '\'';
        !           317:          }
        !           318:        else if (code >= 040 && code != 0177)
        !           319:          *p++ = code;
        !           320:        else if (code == '\t')
        !           321:          {
        !           322:            *p++ = '\\';
        !           323:            *p++ = 't';
        !           324:          }
        !           325:        else if (code == '\n')
        !           326:          {
        !           327:            *p++ = '\\';
        !           328:            *p++ = 'n';
        !           329:          }
        !           330:        else if (code == '\r')
        !           331:          {
        !           332:            *p++ = '\\';
        !           333:            *p++ = 'r';
        !           334:          }
        !           335:        else if (code == '\v')
        !           336:          {
        !           337:            *p++ = '\\';
        !           338:            *p++ = 'v';
        !           339:          }
        !           340:        else if (code == '\b')
        !           341:          {
        !           342:            *p++ = '\\';
        !           343:            *p++ = 'b';
        !           344:          }
        !           345:        else if (code == '\f')
        !           346:          {
        !           347:            *p++ = '\\';
        !           348:            *p++ = 'f';
        !           349:          }
        !           350:         else
        !           351:          {
        !           352:            *p++ = code / 0100 + '0';
        !           353:            *p++ = ((code / 010) & 07) + '0';
        !           354:            *p++ = (code & 07) + '0';
        !           355:          }
        !           356:        *p++ = '\'';
        !           357:        *p = 0;
        !           358:        symval = getsym(token_buffer);
        !           359:        symval->class = STOKEN;
        !           360:        if (! symval->user_token_number)
        !           361:          symval->user_token_number = code;
        !           362:        return (IDENTIFIER);
        !           363:       }
        !           364: 
        !           365:     case ',':
        !           366:       return (COMMA);
        !           367: 
        !           368:     case ':':
        !           369:       return (COLON);
        !           370: 
        !           371:     case ';':
        !           372:       return (SEMICOLON);
        !           373: 
        !           374:     case '|':
        !           375:       return (BAR);
        !           376: 
        !           377:     case '{':
        !           378:       return (LEFT_CURLY);
        !           379: 
        !           380:     case '=':
        !           381:       do
        !           382:        {
        !           383:          c = getc(finput);
        !           384:          if (c == '\n') lineno++;
        !           385:        }
        !           386:       while(c==' ' || c=='\n' || c=='\t');
        !           387: 
        !           388:       if (c == '{')
        !           389:        return(LEFT_CURLY);
        !           390:       else
        !           391:        {
        !           392:          ungetc(c, finput);
        !           393:          return(ILLEGAL);
        !           394:        }
        !           395: 
        !           396:     case '<':
        !           397:       p = token_buffer;
        !           398:       c = getc(finput);
        !           399:       while (c != '>')
        !           400:        {
        !           401:          if (c == '\n' || c == EOF)
        !           402:            fatal("unterminated type name");
        !           403: 
        !           404:          if (p == token_buffer + maxtoken)
        !           405:            p = grow_token_buffer(p);
        !           406: 
        !           407:          *p++ = c;
        !           408:          c = getc(finput);
        !           409:        }
        !           410:       *p = 0;
        !           411:       return (TYPENAME);
        !           412:            
        !           413: 
        !           414:     case '%':
        !           415:       return (parse_percent_token());
        !           416: 
        !           417:     default:
        !           418:       return (ILLEGAL);
        !           419:     }
        !           420: }
        !           421: 
        !           422: 
        !           423: /* parse a token which starts with %.  Assumes the % has already been read and discarded.  */
        !           424: 
        !           425: int
        !           426: parse_percent_token ()
        !           427: {
        !           428:   register int c;
        !           429:   register char *p;
        !           430: 
        !           431:   p = token_buffer;
        !           432:   c = getc(finput);
        !           433: 
        !           434:   switch (c)
        !           435:     {
        !           436:     case '%':
        !           437:       return (TWO_PERCENTS);
        !           438: 
        !           439:     case '{':
        !           440:       return (PERCENT_LEFT_CURLY);
        !           441: 
        !           442:     case '<':
        !           443:       return (LEFT);
        !           444: 
        !           445:     case '>':
        !           446:       return (RIGHT);
        !           447: 
        !           448:     case '2':
        !           449:       return (NONASSOC);
        !           450: 
        !           451:     case '0':
        !           452:       return (TOKEN);
        !           453: 
        !           454:     case '=':
        !           455:       return (PREC);
        !           456:     }
        !           457:   if (!isalpha(c))
        !           458:     return (ILLEGAL);
        !           459: 
        !           460:   while (isalpha(c) || c == '_')
        !           461:     {
        !           462:       if (p == token_buffer + maxtoken)
        !           463:        p = grow_token_buffer(p);
        !           464: 
        !           465:       *p++ = c;
        !           466:       c = getc(finput);
        !           467:     }
        !           468: 
        !           469:   ungetc(c, finput);
        !           470: 
        !           471:   *p = 0;
        !           472: 
        !           473:   if (strcmp(token_buffer, "token") == 0
        !           474:       ||
        !           475:       strcmp(token_buffer, "term") == 0)
        !           476:     return (TOKEN);
        !           477:   else if (strcmp(token_buffer, "nterm") == 0)
        !           478:     return (NTERM);
        !           479:   else if (strcmp(token_buffer, "type") == 0)
        !           480:     return (TYPE);
        !           481:   else if (strcmp(token_buffer, "guard") == 0)
        !           482:     return (GUARD);
        !           483:   else if (strcmp(token_buffer, "union") == 0)
        !           484:     return (UNION);
        !           485:   else if (strcmp(token_buffer, "expect") == 0)
        !           486:     return (EXPECT);
        !           487:   else if (strcmp(token_buffer, "start") == 0)
        !           488:     return (START);
        !           489:   else if (strcmp(token_buffer, "left") == 0)
        !           490:     return (LEFT);
        !           491:   else if (strcmp(token_buffer, "right") == 0)
        !           492:     return (RIGHT);
        !           493:   else if (strcmp(token_buffer, "nonassoc") == 0
        !           494:           ||
        !           495:           strcmp(token_buffer, "binary") == 0)
        !           496:     return (NONASSOC);
        !           497:   else if (strcmp(token_buffer, "semantic_parser") == 0)
        !           498:     return (SEMANTIC_PARSER);
        !           499:   else if (strcmp(token_buffer, "pure_parser") == 0)
        !           500:     return (PURE_PARSER);
        !           501:   else if (strcmp(token_buffer, "prec") == 0)
        !           502:     return (PREC);
        !           503:   else return (ILLEGAL);
        !           504: }
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.