42BSD/ucb/indent/lexi.c - annotate

Return to lexi.c CVS log
Up to [CSRG BSD Unix] / 42BSD / ucb / indent
Annotation of 42BSD/ucb/indent/lexi.c, revision 1.1.1.1

1.1       root        1: static char sccsid[] = "@(#)lexi.c     4.1     (Berkeley)      10/21/82";
                      2: 
                      3: /*
                      4: 
                      5:                          Copyright (C) 1976
                      6:                                by the
                      7:                          Board of Trustees
                      8:                                of the
                      9:                        University of Illinois
                     10: 
                     11:                         All rights reserved
                     12: 
                     13: 
                     14: NAME:
                     15:        lexi
                     16: 
                     17: FUNCTION:
                     18:        This is the token scanner for indent
                     19: 
                     20: ALGORITHM:
                     21:        1) Strip off intervening blanks and/or tabs.
                     22:        2) If it is an alphanumeric token, move it to the token buffer "token".
                     23:           Check if it is a special reserved word that indent will want to
                     24:           know about.
                     25:        3) Non-alphanumeric tokens are handled with a big switch statement.  A
                     26:           flag is kept to remember if the last token was a "unary delimiter",
                     27:           which forces a following operator to be unary as opposed to binary.
                     28: 
                     29: PARAMETERS:
                     30:        None
                     31: 
                     32: RETURNS:
                     33:        An integer code indicating the type of token scanned.
                     34: 
                     35: GLOBALS:
                     36:        buf_ptr =
                     37:        had_eof
                     38:        last_u_d =      Set to true iff this token is a "unary delimiter"
                     39: 
                     40: CALLS:
                     41:        fill_buffer
                     42:        printf (lib)
                     43: 
                     44: CALLED BY:
                     45:        main
                     46: 
                     47: NOTES:
                     48:        Start of comment is passed back so that the comment can be scanned by
                     49:        pr_comment.
                     50: 
                     51:        Strings and character literals are returned just like identifiers.
                     52: 
                     53: HISTORY:
                     54:        initial coding  November 1976   D A Willcox of CAC
                     55:        1/7/77          D A Willcox of CAC      Fix to provide proper handling
                     56:                                                of "int a -1;"
                     57: 
                     58: */
                     59: 
                     60: /* Here we have the token scanner for indent.  It scans off one token and
                     61:    puts it in the global variable "token".  It returns a code, indicating the
                     62:    type of token scanned. */
                     63: 
                     64: #include "indent_globs.h";
                     65: #include "indent_codes.h";
                     66: 
                     67: 
                     68: 
                     69: #define alphanum 1
                     70: #define opchar 3
                     71: 
                     72: struct templ {
                     73:     char   *rwd;
                     74:     int     rwcode;
                     75: };
                     76: 
                     77: struct templ    specials[] =
                     78: {
                     79:     "switch", 1,
                     80:     "case", 2,
                     81:     "struct", 3,
                     82:     "default", 2,
                     83:     "int", 4,
                     84:     "char", 4,
                     85:     "float", 4,
                     86:     "double", 4,
                     87:     "long", 4,
                     88:     "short", 4,
                     89:     "typdef", 4,
                     90:     "unsigned", 4,
                     91:     "register", 4,
                     92:     "static", 4,
                     93:     "global", 4,
                     94:     "extern", 4,
                     95:     "if", 5,
                     96:     "while", 5,
                     97:     "for", 5,
                     98:     "else", 6,
                     99:     "do", 6,
                    100:     "sizeof", 0,
                    101:     0, 0
                    102: };
                    103: 
                    104: char    chartype[128] =
                    105: {                 /* this is used to facilitate the decision of what type
                    106:                      (alphanumeric, operator) each character is */
                    107:     0, 0, 0, 0, 0, 0, 0, 0,
                    108:     0, 0, 0, 0, 0, 0, 0, 0,
                    109:     0, 0, 0, 0, 0, 0, 0, 0,
                    110:     0, 0, 0, 0, 0, 0, 0, 0,
                    111:     0, 3, 0, 0, 0, 3, 3, 0,
                    112:     0, 0, 3, 3, 0, 3, 3, 3,
                    113:     1, 1, 1, 1, 1, 1, 1, 1,
                    114:     1, 1, 0, 0, 3, 3, 3, 3,
                    115:     0, 1, 1, 1, 1, 1, 1, 1,
                    116:     1, 1, 1, 1, 1, 1, 1, 1,
                    117:     1, 1, 1, 1, 1, 1, 1, 1,
                    118:     1, 1, 1, 0, 0, 0, 3, 1,
                    119:     0, 1, 1, 1, 1, 1, 1, 1,
                    120:     1, 1, 1, 1, 1, 1, 1, 1,
                    121:     1, 1, 1, 1, 1, 1, 1, 1,
                    122:     1, 1, 1, 0, 3, 0, 3, 0
                    123: };
                    124: 
                    125: int     last_nl = true;
                    126:  /* this is true if the last thing scanned was a newline */
                    127: 
                    128: 
                    129: 
                    130: int     lexi () {
                    131:     register char  *tok;
                    132:  /* local pointer to next char in token */
                    133:     register int    i;
                    134:  /* local loop counter */
                    135:     register char  *j;
                    136:  /* used for searching thru list of reserved words */
                    137:     int     unary_delim;
                    138:  /* this is set to 1 if the current token forces a following operator to be
                    139:     unary */
                    140:     static int  last_code;
                    141:  /* the last token type returned */
                    142:     static int  l_struct;
                    143:  /* set to 1 if the last token was 'struct' */
                    144:     int     found_it;
                    145:     int     code;  /* internal code to be returned */
                    146:     char    qchar; /* the delimiter character for a string */
                    147: 
                    148:     tok = token;              /* point to start of place to save token */
                    149:     unary_delim = false;
                    150:     col_1 = last_nl;          /* tell world that this token started in column
                    151:                                  1 iff the last thing scanned was nl */
                    152:     last_nl = false;
                    153: 
                    154:     while (*buf_ptr == ' ' || *buf_ptr == '\t') {
                    155:     /* get rid of blanks */
                    156:        col_1 = false;         /* leading blanks imply token is not in column 1
                    157:                                  */
                    158:        if (++buf_ptr >= buf_end)
                    159:            fill_buffer ();
                    160:     }
                    161: 
                    162: /*----------------------------------------------------------*\ 
                    163: |    Scan an alphanumeric token
                    164: \*----------------------------------------------------------*/
                    165: 
                    166:     if (chartype[*buf_ptr & 0177] == alphanum) {
                    167:     /* we have a character or number */
                    168:        while (chartype[*buf_ptr & 0177] == alphanum) {
                    169:        /* copy it over */
                    170:            *tok++ = *buf_ptr++;
                    171:            if (buf_ptr >= buf_end)
                    172:                fill_buffer ();
                    173:        }
                    174: 
                    175:        *tok++ = '\0';
                    176: 
                    177:        if (l_struct) {        /* if last token was 'struct', then this token
                    178:                                  should be treated as a declaration */
                    179:            l_struct = false;
                    180:            last_code = ident;
                    181:            last_u_d = true;
                    182:            return (decl);
                    183:        }
                    184: 
                    185:        last_u_d = false;      /* operator after indentifier is binary */
                    186: 
                    187:        for (i = 0; specials[i].rwd != 0; ++i) {
                    188:        /* this loop will check if the token is a keyword.  if so, a following
                    189:           operator is unary */
                    190:            last_code = ident; /* remember that this is the code we will return
                    191:                                  */
                    192:            j = specials[i].rwd;
                    193:        /* point at ith reserved word */
                    194:            tok = token;       /* point at scanned toekn */
                    195:            found_it = true;   /* set to false if not found */
                    196:            do {
                    197:                if (*tok++ != *j) {
                    198:                    found_it = false;
                    199:                    break;
                    200:                }
                    201:            } while (*j++);
                    202: 
                    203:            if (found_it) {    /* we have a keyword */
                    204:                last_u_d = true;
                    205:                switch (specials[i].rwcode) {
                    206:                    case 1:    /* it is a switch */
                    207:                        return (swstmt);
                    208:                    case 2:    /* a case or default */
                    209:                        return (casestmt);
                    210: 
                    211:                    case 3:    /* a "struct" */
                    212:                        l_struct = true;
                    213:                    /* Next time around, we will want to know that we have had
                    214:                       a 'struct' */
                    215:                    case 4:    /* one of the declaration keywords */
                    216:                        if(p_l_follow) break;   /* inside parens: cast */
                    217:                        last_code = decl;
                    218:                        return (decl);
                    219: 
                    220:                    case 5:    /* if, while, for */
                    221:                        return (sp_paren);
                    222: 
                    223:                    case 6:    /* do, else */
                    224:                        return (sp_nparen);
                    225: 
                    226:                    default:   /* all others are treated like any other
                    227:                                  identifier */
                    228:                        return (ident);
                    229:                }              /* end of switch */
                    230:            }                  /* end of if (found_it) */
                    231: 
                    232:        }
                    233: 
                    234:        if (last_code == decl) /* if this is a declared variable, then
                    235:                                  following sign is unary */
                    236:            last_u_d = true;   /* will make "int a -1" work */
                    237:        last_code = ident;
                    238:        return (ident);        /* the ident is not in the list */
                    239:     }                         /* end of procesing for alpanum character */
                    240: 
                    241: 
                    242: 
                    243: /*----------------------------------------------------------*\ 
                    244: |   Scan a non-alphanumeric token
                    245: \*----------------------------------------------------------*/
                    246: 
                    247:     *tok++ = *buf_ptr;        /* if it is only a one-character token, it is
                    248:                                  moved here */
                    249:     *tok = '\0';
                    250:     if (++buf_ptr >= buf_end)
                    251:        fill_buffer ();
                    252: 
                    253:     switch (*token) {
                    254:        case '\n': 
                    255:            unary_delim = last_u_d;
                    256:            last_nl = true;    /* remember that we just had a newline */
                    257:            code = (had_eof ? 0 : newline);
                    258:        /* if data has been exausted, the newline is a dummy, and we should
                    259:           return code to stop */
                    260:            break;
                    261: 
                    262:        case '\'':             /* start of quoted character */
                    263:            qchar = '\'';      /* remember final delimiter */
                    264:            goto copy_lit;     /* and go to common literal code */
                    265: 
                    266:        case '"':              /* start of string */
                    267:            qchar = '"';
                    268: 
                    269:     copy_lit: 
                    270:            do {               /* copy the string */
                    271:                while (1) {    /* move one character or [/<char>]<char> */
                    272:                    if (*buf_ptr == '\n') {
                    273:                    /* check for unterminated literal */
                    274:                        printf ("%d: Unterminated literal\n", line_no);
                    275:                        goto stop_lit;
                    276:                    /* Don't copy any more */
                    277:                    }
                    278: 
                    279:                    *tok = *buf_ptr++;
                    280:                    if (buf_ptr >= buf_end)
                    281:                        fill_buffer ();
                    282:                    if (had_eof || ((tok - token) > (bufsize - 2))) {
                    283:                        printf ("Unterminated literal\n");
                    284:                        ++tok;
                    285:                        goto stop_lit;
                    286:                    /* get outof literal copying loop */
                    287:                    }
                    288: 
                    289:                    if (*tok == '\\') {
                    290:                    /* if escape, copy extra char */
                    291:                        if (*buf_ptr == '\n')
                    292:                               /* check for escaped newline */
                    293:                            ++line_no;
                    294:                        *(++tok) = *buf_ptr++;
                    295:                        ++tok; /* we must increment this again because we
                    296:                                  copied two chars */
                    297:                        if (buf_ptr >= buf_end)
                    298:                            fill_buffer ();
                    299:                    }
                    300:                    else
                    301:                        break; /* we copied one character */
                    302:                }              /* end of while (1) */
                    303:            } while (*tok++ != qchar);
                    304: 
                    305:     stop_lit: 
                    306:            code = ident;
                    307:            break;
                    308: 
                    309:        case ('('): 
                    310:        case ('['): 
                    311:            unary_delim = true;
                    312:            code = lparen;
                    313:            break;
                    314: 
                    315:        case (')'): 
                    316:        case (']'): 
                    317:            code = rparen;
                    318:            break;
                    319: 
                    320:        case '#': 
                    321:            unary_delim = last_u_d;
                    322:            code = preesc;
                    323:            break;
                    324: 
                    325:        case '?': 
                    326:            unary_delim = true;
                    327:            code = question;
                    328:            break;
                    329: 
                    330:        case (':'): 
                    331:            code = colon;
                    332:            unary_delim = true;
                    333:            break;
                    334: 
                    335:        case (';'): 
                    336:            unary_delim = true;
                    337:            code = semicolon;
                    338:            break;
                    339: 
                    340:        case ('{'): 
                    341:            unary_delim = true;
                    342:            code = lbrace;
                    343:            break;
                    344: 
                    345:        case ('}'): 
                    346:            unary_delim = true;
                    347:            code = rbrace;
                    348:            break;
                    349: 
                    350:        case 014:              /* a form feed */
                    351:            unary_delim = last_u_d;
                    352:            last_nl = true;    /* remember this so we can set 'col_1' right */
                    353:            code = form_feed;
                    354:            break;
                    355: 
                    356:        case (','): 
                    357:            unary_delim = true;
                    358:            code = comma;
                    359:            break;
                    360: 
                    361:        case '.': 
                    362:            unary_delim = false;
                    363:            code = period;
                    364:            break;
                    365: 
                    366:        case '-': 
                    367:        case '+':              /* check for -, +, --, ++ */
                    368:            code = (last_u_d ? unary_op : binary_op);
                    369:            unary_delim = true;
                    370: 
                    371:            if (*buf_ptr == token[0]) {
                    372:            /* check for doubled character */
                    373:                *tok++ = *buf_ptr++;
                    374:            /* buffer overflow will be checked at end of loop */
                    375:                if (last_code == ident || last_code == rparen) {
                    376:                    code = (last_u_d ? unary_op : postop);
                    377:                /* check for following ++ or -- */
                    378:                    unary_delim = false;
                    379:                }
                    380:            }
                    381:            else
                    382:                if (*buf_ptr == '>' || *buf_ptr == '=')
                    383:                               /* check for operator -> or += */
                    384:                    *tok++ = *buf_ptr++;
                    385:        /* buffer overflow will be checked at end of switch */
                    386: 
                    387:            break;
                    388: 
                    389:        case '=': 
                    390:            if (chartype[*buf_ptr] == opchar) {
                    391:            /* we have two char assignment */
                    392:                *tok++ = *buf_ptr;
                    393:            /* move second character */
                    394:                if (++buf_ptr >= buf_end)
                    395:                    fill_buffer ();
                    396:            }
                    397: 
                    398:            code = binary_op;
                    399:            unary_delim = true;
                    400:            if (token[1] != '<' && token[1] != '>')
                    401:                               /* check for possible 3 char operator */
                    402:                break;
                    403:        /* can drop thru!!! */
                    404: 
                    405:        case '>': 
                    406:        case '<': 
                    407:        case '!':              /* ops like <, <<, <=, !=, etc */
                    408:            if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
                    409:                *tok++ = *buf_ptr;
                    410:                if (++buf_ptr >= buf_end)
                    411:                    fill_buffer ();
                    412:            }
                    413: 
                    414:            if (*buf_ptr == '=')
                    415:                 *tok++ = *buf_ptr++;
                    416:            code = (last_u_d ? unary_op : binary_op);
                    417:            unary_delim = true;
                    418:            break;
                    419: 
                    420:        default: 
                    421:            if (token[0] == '/' && *buf_ptr == '*') {
                    422:            /* it is start of comment */
                    423:                *tok++ = '*';
                    424: 
                    425:                if (++buf_ptr >= buf_end)
                    426:                    fill_buffer ();
                    427: 
                    428:                code = comment;
                    429:                unary_delim = last_u_d;
                    430:                break;
                    431:            }
                    432: 
                    433:            while (*(tok - 1) == *buf_ptr || *buf_ptr=='=') {
                    434:            /* handle ||, &&, etc, and also things as in int *****i */
                    435:                *tok++ = *buf_ptr;
                    436:                if (++buf_ptr >= buf_end)
                    437:                    fill_buffer ();
                    438:            }
                    439: 
                    440: 
                    441:            code = (last_u_d ? unary_op : binary_op);
                    442:            unary_delim = true;
                    443: 
                    444: 
                    445:     }                         /* end of switch */
                    446: 
                    447:     if (code != newline) {
                    448:        l_struct = false;
                    449:        last_code = code;
                    450:     }
                    451: 
                    452:     if (buf_ptr >= buf_end)    /* check for input buffer empty */
                    453:        fill_buffer ();
                    454:     last_u_d = unary_delim;
                    455:     *tok = '\0';              /* null terminate the token */
                    456:     return (code);
                    457: };
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.