researchv10dc/cmd/bison/lex.c - annotate

Return to lex.c CVS log
Up to [Research Unix] / researchv10dc / cmd / bison
Annotation of researchv10dc/cmd/bison/lex.c, revision 1.1

1.1     ! root        1: /* Token-reader for Bison's input parser,
        !             2:    Copyright (C) 1984, 1986 Bob Corbett and Free Software Foundation, Inc.
        !             3: 
        !             4: BISON is distributed in the hope that it will be useful, but WITHOUT ANY
        !             5: WARRANTY.  No author or distributor accepts responsibility to anyone
        !             6: for the consequences of using it or for whether it serves any
        !             7: particular purpose or works at all, unless he says so in writing.
        !             8: Refer to the BISON General Public License for full details.
        !             9: 
        !            10: Everyone is granted permission to copy, modify and redistribute BISON,
        !            11: but only under the conditions described in the BISON General Public
        !            12: License.  A copy of this license is supposed to have been given to you
        !            13: along with BISON so you can know your rights and responsibilities.  It
        !            14: should be in a file named COPYING.  Among other things, the copyright
        !            15: notice and this notice must be preserved on all copies.
        !            16: 
        !            17:  In other words, you are welcome to use, share and improve this program.
        !            18:  You are forbidden to forbid anyone else to use, share and improve
        !            19:  what you give them.   Help stamp out software-hoarding!  */
        !            20: 
        !            21: /* 
        !            22:    lex() is the entry point.  It is called from reader.c.
        !            23:    It returns one of the token-type codes defined in lex.h.
        !            24:    When an identifier is seen, the code IDENTIFIER is returned
        !            25:    and the name is looked up in the symbol table using symtab.c;
        !            26:    symval is set to a pointer to the entry found.  */
        !            27: 
        !            28: #include <stdio.h>
        !            29: #include <ctype.h>
        !            30: #include "files.h"
        !            31: #include "symtab.h"
        !            32: #include "lex.h"
        !            33: 
        !            34: 
        !            35: extern int lineno;
        !            36: extern int translations;
        !            37: 
        !            38: 
        !            39: char token_buffer[MAXTOKEN + 1];
        !            40: bucket *symval;
        !            41: int numval;
        !            42: 
        !            43: static int unlexed;            /* these two describe a token to be reread */
        !            44: static bucket *unlexed_symval; /* by the next call to lex */
        !            45: 
        !            46: 
        !            47: 
        !            48: init_lex()
        !            49: {
        !            50:   unlexed = -1;
        !            51: }
        !            52: 
        !            53: 
        !            54: 
        !            55: int
        !            56: skip_white_space()
        !            57: {
        !            58:   register int c;
        !            59:   register int inside;
        !            60: 
        !            61:   c = getc(finput);
        !            62: 
        !            63:   for (;;)
        !            64:     {
        !            65:       switch (c)
        !            66:        {
        !            67:        case '/':
        !            68:          c = getc(finput);
        !            69:          if (c != '*')
        !            70:            fatals("unexpected '/%c' found",c);
        !            71: 
        !            72:          c = getc(finput);
        !            73: 
        !            74:          inside = 1;
        !            75:          while (inside)
        !            76:            {
        !            77:              if (c == '*')
        !            78:                {
        !            79:                  while (c == '*')
        !            80:                    c = getc(finput);
        !            81: 
        !            82:                  if (c == '/')
        !            83:                    {
        !            84:                      inside = 0;
        !            85:                      c = getc(finput);
        !            86:                    }
        !            87:                }
        !            88:              else if (c == '\n')
        !            89:                {
        !            90:                  lineno++;
        !            91:                  c = getc(finput);
        !            92:                }
        !            93:              else if (c == EOF)
        !            94:                fatal("unterminated comment");
        !            95:              else
        !            96:                c = getc(finput);
        !            97:            }
        !            98: 
        !            99:          break;
        !           100: 
        !           101:        case '\n':
        !           102:          lineno++;
        !           103: 
        !           104:        case ' ':
        !           105:        case '\t':
        !           106:        case '\f':
        !           107:          c = getc(finput);
        !           108:          break;
        !           109: 
        !           110:        default:
        !           111:          return (c);
        !           112:        }
        !           113:     }
        !           114: }
        !           115: 
        !           116: 
        !           117: 
        !           118: unlex(token)
        !           119: int token;
        !           120: {
        !           121:   unlexed = token;
        !           122:   unlexed_symval = symval;
        !           123: }
        !           124: 
        !           125: 
        !           126: 
        !           127: int
        !           128: lex()
        !           129: {
        !           130:   register int c;
        !           131:   register char *p;
        !           132: 
        !           133:   if (unlexed >= 0)
        !           134:     {
        !           135:       symval = unlexed_symval;
        !           136:       c = unlexed;
        !           137:       unlexed = -1;
        !           138:       return (c);
        !           139:     }
        !           140: 
        !           141:   c = skip_white_space();
        !           142: 
        !           143:   switch (c)
        !           144:     {
        !           145:     case EOF:
        !           146:       return (ENDFILE);
        !           147: 
        !           148:     case 'A':  case 'B':  case 'C':  case 'D':  case 'E':
        !           149:     case 'F':  case 'G':  case 'H':  case 'I':  case 'J':
        !           150:     case 'K':  case 'L':  case 'M':  case 'N':  case 'O':
        !           151:     case 'P':  case 'Q':  case 'R':  case 'S':  case 'T':
        !           152:     case 'U':  case 'V':  case 'W':  case 'X':  case 'Y':
        !           153:     case 'Z':
        !           154:     case 'a':  case 'b':  case 'c':  case 'd':  case 'e':
        !           155:     case 'f':  case 'g':  case 'h':  case 'i':  case 'j':
        !           156:     case 'k':  case 'l':  case 'm':  case 'n':  case 'o':
        !           157:     case 'p':  case 'q':  case 'r':  case 's':  case 't':
        !           158:     case 'u':  case 'v':  case 'w':  case 'x':  case 'y':
        !           159:     case 'z':
        !           160:     case '.':  case '_':
        !           161:       p = token_buffer;
        !           162:       while (isalnum(c) || c == '_' || c == '.')
        !           163:        {
        !           164:          if (p < token_buffer + MAXTOKEN)
        !           165:            *p++ = c;
        !           166:          c = getc(finput);
        !           167:        }
        !           168: 
        !           169:       *p = 0;
        !           170:       ungetc(c, finput);
        !           171:       symval = getsym(token_buffer);
        !           172:       return (IDENTIFIER);
        !           173: 
        !           174:     case '0':  case '1':  case '2':  case '3':  case '4':
        !           175:     case '5':  case '6':  case '7':  case '8':  case '9':
        !           176:       {
        !           177:        numval = 0;
        !           178: 
        !           179:        while (isdigit(c))
        !           180:          {
        !           181:            numval = numval*10 + c - '0';
        !           182:            c = getc(finput);
        !           183:          }
        !           184:        ungetc(c, finput);
        !           185:        return (NUMBER);
        !           186:       }
        !           187: 
        !           188:     case '\'':
        !           189:       translations = -1;
        !           190: 
        !           191:       /* parse the literal token and compute character code in  code  */
        !           192: 
        !           193:       c = getc(finput);
        !           194:       {
        !           195:        register int code = 0;
        !           196: 
        !           197:        if (c == '\\')
        !           198:          {
        !           199:            c = getc(finput);
        !           200: 
        !           201:            if (c <= '7' && c >= '0')
        !           202:              {
        !           203:                while (c <= '7' && c >= '0')
        !           204:                  {
        !           205:                    code = (code * 8) + (c - '0');
        !           206:                    c = getc(finput);
        !           207:                  }
        !           208:                if (code >= 128 || code<0)/* JF this said if(c>=128) */
        !           209:                  fatals("malformatted literal token '\\%03o'",code);
        !           210:              }
        !           211:            else
        !           212:              {
        !           213:                if (c == 't')
        !           214:                  code = '\t';
        !           215:                else if (c == 'n')
        !           216:                  code = '\n';
        !           217:                else if (c == 'r')
        !           218:                  code = '\r';
        !           219:                else if (c == 'f')
        !           220:                  code = '\f';
        !           221:                else if (c == 'b')
        !           222:                  code = '\b';
        !           223:                else if (c == '\\')
        !           224:                  code = '\\';
        !           225:                else if (c == '\'')
        !           226:                  code = '\'';
        !           227:                else if (c == '\"')     /* JF this is a good idea */
        !           228:                  code = '\"';
        !           229:                else fatals("invalid literal token '\\%c'",c);
        !           230:                c = getc(finput);
        !           231:              }
        !           232:          }
        !           233:        else
        !           234:          {
        !           235:            code = c;
        !           236:            c = getc(finput);
        !           237:          }
        !           238:        if (c != '\'')
        !           239:          fatal("multicharacter literal tokens NOT supported");
        !           240: 
        !           241:        /* now fill token_buffer with the canonical name for this character
        !           242:           as a literal token.  Do not use what the user typed,
        !           243:           so that '\012' and '\n' can be interchangeable.  */
        !           244: 
        !           245:        p = token_buffer;
        !           246:        *p++ = '\'';
        !           247:        if (code == '\\')
        !           248:          {
        !           249:            p = token_buffer + 1;
        !           250:            *p++ = '\\';
        !           251:            *p++ = '\\';
        !           252:          }
        !           253:        else if (code == '\'')
        !           254:          {
        !           255:            p = token_buffer + 1;
        !           256:            *p++ = '\\';
        !           257:            *p++ = '\'';
        !           258:          }
        !           259:        else if (code >= 040 && code != 0177)
        !           260:          *p++ = code;
        !           261:        else if (code == '\t')
        !           262:          {
        !           263:            p = token_buffer + 1;
        !           264:            *p++ = '\\';
        !           265:            *p++ = 't';
        !           266:          }
        !           267:        else if (code == '\n')
        !           268:          {
        !           269:            p = token_buffer + 1;
        !           270:            *p++ = '\\';
        !           271:            *p++ = 'n';
        !           272:          }
        !           273:        else if (code == '\r')
        !           274:          {
        !           275:            p = token_buffer + 1;
        !           276:            *p++ = '\\';
        !           277:            *p++ = 'r';
        !           278:          }
        !           279:        else if (code == '\b')
        !           280:          {
        !           281:            p = token_buffer + 1;
        !           282:            *p++ = '\\';
        !           283:            *p++ = 'b';
        !           284:          }
        !           285:        else if (code == '\f')
        !           286:          {
        !           287:            p = token_buffer + 1;
        !           288:            *p++ = '\\';
        !           289:            *p++ = 'f';
        !           290:          }
        !           291:         else
        !           292:          {
        !           293:            *p++ = code / 0100 + '0';
        !           294:            *p++ = ((code / 010) & 07) + '0';
        !           295:            *p++ = (code & 07) + '0';
        !           296:          }
        !           297:        *p++ = '\'';
        !           298:        *p = 0;
        !           299:        symval = getsym(token_buffer);
        !           300:        symval->class = STOKEN;
        !           301:        if (! symval->user_token_number)
        !           302:          symval->user_token_number = code;
        !           303:        return (IDENTIFIER);
        !           304:       }
        !           305: 
        !           306:     case ',':
        !           307:       return (COMMA);
        !           308: 
        !           309:     case ':':
        !           310:       return (COLON);
        !           311: 
        !           312:     case ';':
        !           313:       return (SEMICOLON);
        !           314: 
        !           315:     case '|':
        !           316:       return (BAR);
        !           317: 
        !           318:     case '{':
        !           319:       return (LEFT_CURLY);
        !           320: 
        !           321:     case '=':
        !           322:       do
        !           323:        {
        !           324:          c = getc(finput);
        !           325:          if (c == '\n') lineno++;
        !           326:        }
        !           327:       while(c==' ' || c=='\n' || c=='\t');
        !           328: 
        !           329:       if (c == '{')
        !           330:        return(LEFT_CURLY);
        !           331:       else
        !           332:        {
        !           333:          ungetc(c, finput);
        !           334:          return(ILLEGAL);
        !           335:        }
        !           336: 
        !           337:     case '<':
        !           338:       p = token_buffer;
        !           339:       c = getc(finput);
        !           340:       while (c != '>')
        !           341:        {
        !           342:          if (c == '\n' || c == EOF)
        !           343:            fatal("unterminated type name");
        !           344: 
        !           345:          if (p >= token_buffer + MAXTOKEN - 1)
        !           346:            fatals("type name too long (%d max)",MAXTOKEN-1);
        !           347: 
        !           348:          *p++ = c;
        !           349:          c = getc(finput);
        !           350:        }
        !           351:       *p = 0;
        !           352:       return (TYPENAME);
        !           353:            
        !           354: 
        !           355:     case '%':
        !           356:       return (parse_percent_token());
        !           357: 
        !           358:     default:
        !           359:       return (ILLEGAL);
        !           360:     }
        !           361: }
        !           362: 
        !           363: 
        !           364: /* parse a token which starts with %.  Assumes the % has already been read and discarded.  */
        !           365: 
        !           366: int
        !           367: parse_percent_token ()
        !           368: {
        !           369:   register int c;
        !           370:   register char *p;
        !           371: 
        !           372:   p = token_buffer;
        !           373:   c = getc(finput);
        !           374: 
        !           375:   switch (c)
        !           376:     {
        !           377:     case '%':
        !           378:       return (TWO_PERCENTS);
        !           379: 
        !           380:     case '{':
        !           381:       return (PERCENT_LEFT_CURLY);
        !           382: 
        !           383:     case '<':
        !           384:       return (LEFT);
        !           385: 
        !           386:     case '>':
        !           387:       return (RIGHT);
        !           388: 
        !           389:     case '2':
        !           390:       return (NONASSOC);
        !           391: 
        !           392:     case '0':
        !           393:       return (TOKEN);
        !           394: 
        !           395:     case '=':
        !           396:       return (PREC);
        !           397:     }
        !           398:   if (!isalpha(c))
        !           399:     return (ILLEGAL);
        !           400: 
        !           401:   while (isalpha(c) || c == '_')
        !           402:     {
        !           403:       if (p < token_buffer + MAXTOKEN)
        !           404:        *p++ = c;
        !           405:       c = getc(finput);
        !           406:     }
        !           407: 
        !           408:   ungetc(c, finput);
        !           409: 
        !           410:   *p = 0;
        !           411: 
        !           412:   if (strcmp(token_buffer, "token") == 0
        !           413:       ||
        !           414:       strcmp(token_buffer, "term") == 0)
        !           415:     return (TOKEN);
        !           416:   else if (strcmp(token_buffer, "nterm") == 0)
        !           417:     return (NTERM);
        !           418:   else if (strcmp(token_buffer, "type") == 0)
        !           419:     return (TYPE);
        !           420:   else if (strcmp(token_buffer, "guard") == 0)
        !           421:     return (GUARD);
        !           422:   else if (strcmp(token_buffer, "union") == 0)
        !           423:     return (UNION);
        !           424:   else if (strcmp(token_buffer, "expect") == 0)
        !           425:     return (EXPECT);
        !           426:   else if (strcmp(token_buffer, "start") == 0)
        !           427:     return (START);
        !           428:   else if (strcmp(token_buffer, "left") == 0)
        !           429:     return (LEFT);
        !           430:   else if (strcmp(token_buffer, "right") == 0)
        !           431:     return (RIGHT);
        !           432:   else if (strcmp(token_buffer, "nonassoc") == 0
        !           433:           ||
        !           434:           strcmp(token_buffer, "binary") == 0)
        !           435:     return (NONASSOC);
        !           436:   else if (strcmp(token_buffer, "semantic_parser") == 0)
        !           437:     return (SEMANTIC_PARSER);
        !           438:   else if (strcmp(token_buffer, "pure_parser") == 0)
        !           439:     return (PURE_PARSER);
        !           440:   else if (strcmp(token_buffer, "prec") == 0)
        !           441:     return (PREC);
        !           442:   else return (ILLEGAL);
        !           443: }
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.