43BSD/ucb/indent/lexi.c - annotate

Return to lexi.c CVS log
Up to [CSRG BSD Unix] / 43BSD / ucb / indent
Annotation of 43BSD/ucb/indent/lexi.c, revision 1.1.1.1

1.1       root        1: /*
                      2:  * Copyright (c) 1980 Regents of the University of California.
                      3:  * All rights reserved.  The Berkeley software License Agreement
                      4:  * specifies the terms and conditions for redistribution.
                      5:  */
                      6: 
                      7: #ifndef lint
                      8: static char sccsid[] = "@(#)lexi.c     5.4 (Berkeley) 9/10/85";
                      9: #endif not lint
                     10: 
                     11: /*-
                     12:  *
                     13:  *                       Copyright (C) 1976
                     14:  *                             by the
                     15:  *                       Board of Trustees
                     16:  *                             of the
                     17:  *                     University of Illinois
                     18:  *
                     19:  *                      All rights reserved
                     20:  *
                     21:  *
                     22:  * NAME:
                     23:  *     lexi
                     24:  *
                     25:  * FUNCTION:
                     26:  *     This is the token scanner for indent
                     27:  *
                     28:  * ALGORITHM:
                     29:  *     1) Strip off intervening blanks and/or tabs.
                     30:  *     2) If it is an alphanumeric token, move it to the token buffer "token".
                     31:  *        Check if it is a special reserved word that indent will want to
                     32:  *        know about.
                     33:  *     3) Non-alphanumeric tokens are handled with a big switch statement.  A
                     34:  *        flag is kept to remember if the last token was a "unary delimiter",
                     35:  *        which forces a following operator to be unary as opposed to binary.
                     36:  *
                     37:  * PARAMETERS:
                     38:  *     None
                     39:  *
                     40:  * RETURNS:
                     41:  *     An integer code indicating the type of token scanned.
                     42:  *
                     43:  * GLOBALS:
                     44:  *     buf_ptr =
                     45:  *     had_eof
                     46:  *     ps.last_u_d =   Set to true iff this token is a "unary delimiter"
                     47:  *
                     48:  * CALLS:
                     49:  *     fill_buffer
                     50:  *     printf (lib)
                     51:  *
                     52:  * CALLED BY:
                     53:  *     main
                     54:  *
                     55:  * NOTES:
                     56:  *     Start of comment is passed back so that the comment can be scanned by
                     57:  *     pr_comment.
                     58:  *
                     59:  *     Strings and character literals are returned just like identifiers.
                     60:  *
                     61:  * HISTORY:
                     62:  *     initial coding  November 1976   D A Willcox of CAC
                     63:  *     1/7/77          D A Willcox of CAC      Fix to provide proper handling
                     64:  *                                             of "int a -1;"
                     65:  *
                     66:  */
                     67: 
                     68: /*
                     69:  * Here we have the token scanner for indent.  It scans off one token and
                     70:  * puts it in the global variable "token".  It returns a code, indicating
                     71:  * the type of token scanned. 
                     72:  */
                     73: 
                     74: #include "indent_globs.h";
                     75: #include "indent_codes.h";
                     76: #include "ctype.h"
                     77: 
                     78: #define alphanum 1
                     79: #define opchar 3
                     80: 
                     81: struct templ {
                     82:     char       *rwd;
                     83:     int         rwcode;
                     84: };
                     85: 
                     86: struct templ specials[100] =
                     87: {
                     88:     "switch", 1,
                     89:     "case", 2,
                     90:     "break", 0,
                     91:     "struct", 3,
                     92:     "union", 3,
                     93:     "enum", 3,
                     94:     "default", 2,
                     95:     "int", 4,
                     96:     "char", 4,
                     97:     "float", 4,
                     98:     "double", 4,
                     99:     "long", 4,
                    100:     "short", 4,
                    101:     "typdef", 4,
                    102:     "unsigned", 4,
                    103:     "register", 4,
                    104:     "static", 4,
                    105:     "global", 4,
                    106:     "extern", 4,
                    107:     "void", 4,
                    108:     "goto", 0,
                    109:     "return", 0,
                    110:     "if", 5,
                    111:     "while", 5,
                    112:     "for", 5,
                    113:     "else", 6,
                    114:     "do", 6,
                    115:     "sizeof", 7,
                    116:     0, 0
                    117: };
                    118: 
                    119: char        chartype[128] =
                    120: {                              /* this is used to facilitate the decision
                    121:                                 * of what type (alphanumeric, operator)
                    122:                                 * each character is */
                    123:     0, 0, 0, 0, 0, 0, 0, 0,
                    124:     0, 0, 0, 0, 0, 0, 0, 0,
                    125:     0, 0, 0, 0, 0, 0, 0, 0,
                    126:     0, 0, 0, 0, 0, 0, 0, 0,
                    127:     0, 3, 0, 0, 0, 3, 3, 0,
                    128:     0, 0, 3, 3, 0, 3, 3, 3,
                    129:     1, 1, 1, 1, 1, 1, 1, 1,
                    130:     1, 1, 0, 0, 3, 3, 3, 3,
                    131:     0, 1, 1, 1, 1, 1, 1, 1,
                    132:     1, 1, 1, 1, 1, 1, 1, 1,
                    133:     1, 1, 1, 1, 1, 1, 1, 1,
                    134:     1, 1, 1, 0, 0, 0, 3, 1,
                    135:     0, 1, 1, 1, 1, 1, 1, 1,
                    136:     1, 1, 1, 1, 1, 1, 1, 1,
                    137:     1, 1, 1, 1, 1, 1, 1, 1,
                    138:     1, 1, 1, 0, 3, 0, 3, 0
                    139: };
                    140: 
                    141: 
                    142: 
                    143: 
                    144: int 
                    145: lexi()
                    146: {
                    147:     register char *tok;                /* local pointer to next char in token */
                    148:     int         unary_delim;   /* this is set to 1 if the current token 
                    149:                                 *
                    150:                                 * forces a following operator to be unary */
                    151:     static int  last_code;     /* the last token type returned */
                    152:     static int  l_struct;      /* set to 1 if the last token was 'struct' */
                    153:     int         code;          /* internal code to be returned */
                    154:     char        qchar;         /* the delimiter character for a string */
                    155: 
                    156:     tok = token;               /* point to start of place to save token */
                    157:     unary_delim = false;
                    158:     ps.col_1 = ps.last_nl;     /* tell world that this token started in
                    159:                                 * column 1 iff the last thing scanned was
                    160:                                 * nl */
                    161:     ps.last_nl = false;
                    162: 
                    163:     while (*buf_ptr == ' ' || *buf_ptr == '\t') {      /* get rid of blanks */
                    164:        ps.col_1 = false;       /* leading blanks imply token is not in
                    165:                                 * column 1 */
                    166:        if (++buf_ptr >= buf_end)
                    167:            fill_buffer();
                    168:     }
                    169: 
                    170:     /* Scan an alphanumeric token.  Note that we must also handle
                    171:      * stuff like "1.0e+03" and "7e-6". */
                    172:     if (chartype[*buf_ptr & 0177] == alphanum) {       /* we have a character
                    173:                                                         * or number */
                    174:        register char *j;       /* used for searching thru list of 
                    175:                                 * reserved words */
                    176:        register struct templ *p;
                    177:        register int c;
                    178: 
                    179:        do {                    /* copy it over */
                    180:            *tok++ = *buf_ptr++;
                    181:            if (buf_ptr >= buf_end)
                    182:                fill_buffer();
                    183:        } while (chartype[c = *buf_ptr & 0177] == alphanum ||
                    184:                isdigit(token[0]) && (c == '+' || c == '-') &&
                    185:                (tok[-1] == 'e' || tok[-1] == 'E'));
                    186:        *tok++ = '\0';
                    187:        while (*buf_ptr == ' ' || *buf_ptr == '\t') {   /* get rid of blanks */
                    188:            if (++buf_ptr >= buf_end)
                    189:                fill_buffer();
                    190:        }
                    191:        ps.its_a_keyword = false;
                    192:        ps.sizeof_keyword = false;
                    193:        if (l_struct) {         /* if last token was 'struct', then this
                    194:                                 * token should be treated as a
                    195:                                 * declaration */
                    196:            l_struct = false;
                    197:            last_code = ident;
                    198:            ps.last_u_d = true;
                    199:            return (decl);
                    200:        }
                    201:        ps.last_u_d = false;    /* Operator after indentifier is binary */
                    202:        last_code = ident;      /* Remember that this is the code we will
                    203:                                 * return */
                    204: 
                    205:        /*
                    206:         * This loop will check if the token is a keyword. 
                    207:         */
                    208:        for (p = specials; (j = p->rwd) != 0; p++) {
                    209:            tok = token;        /* point at scanned token */
                    210:            if (*j++ != *tok++ || *j++ != *tok++)
                    211:                continue;       /* This test depends on the fact that
                    212:                                 * identifiers are always at least 1
                    213:                                 * character long (ie. the first two bytes
                    214:                                 * of the identifier are always
                    215:                                 * meaningful) */
                    216:            if (tok[-1] == 0)
                    217:                break;          /* If its a one-character identifier */
                    218:            while (*tok++ == *j)
                    219:                if (*j++ == 0)
                    220:                    goto found_keyword; /* I wish that C had a multi-level
                    221:                                         * break... */
                    222:        }
                    223:        if (p->rwd) {           /* we have a keyword */
                    224:     found_keyword:
                    225:            ps.its_a_keyword = true;
                    226:            ps.last_u_d = true;
                    227:            switch (p->rwcode) {
                    228:                case 1: /* it is a switch */
                    229:                    return (swstmt);
                    230:                case 2: /* a case or default */
                    231:                    return (casestmt);
                    232: 
                    233:                case 3: /* a "struct" */
                    234:                    if (ps.p_l_follow)
                    235:                        break;  /* inside parens: cast */
                    236:                    l_struct = true;
                    237: 
                    238:                    /*
                    239:                     * Next time around, we will want to know that we have
                    240:                     * had a 'struct' 
                    241:                     */
                    242:                case 4: /* one of the declaration keywords */
                    243:                    if (ps.p_l_follow) {
                    244:                        ps.cast_mask |= 1 << ps.p_l_follow;
                    245:                        break;  /* inside parens: cast */
                    246:                    }
                    247:                    last_code = decl;
                    248:                    return (decl);
                    249: 
                    250:                case 5: /* if, while, for */
                    251:                    return (sp_paren);
                    252: 
                    253:                case 6: /* do, else */
                    254:                    return (sp_nparen);
                    255: 
                    256:                case 7:
                    257:                    ps.sizeof_keyword = true;
                    258:                default:        /* all others are treated like any other
                    259:                                 * identifier */
                    260:                    return (ident);
                    261:            }                   /* end of switch */
                    262:        }                       /* end of if (found_it) */
                    263:        if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0
                    264:            && (buf_ptr[1] != ')' || buf_ptr[2] != ';')) {
                    265:            strncpy(ps.procname, token, sizeof ps.procname - 1);
                    266:            ps.in_parameter_declaration = 1;
                    267:        }
                    268: 
                    269:        /*
                    270:         * The following hack attempts to guess whether or not the current
                    271:         * token is in fact a declaration keyword -- one that has been
                    272:         * typedefd 
                    273:         */
                    274:        if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr))
                    275:            && !ps.p_l_follow
                    276:            && (ps.last_token == rparen || ps.last_token == semicolon ||
                    277:                ps.last_token == decl ||
                    278:                ps.last_token == lbrace || ps.last_token == rbrace)) {
                    279:            ps.its_a_keyword = true;
                    280:            ps.last_u_d = true;
                    281:            last_code = decl;
                    282:            return decl;
                    283:        }
                    284:        if (last_code == decl)  /* if this is a declared variable, then
                    285:                                 * following sign is unary */
                    286:            ps.last_u_d = true; /* will make "int a -1" work */
                    287:        last_code = ident;
                    288:        return (ident);         /* the ident is not in the list */
                    289:     }                          /* end of procesing for alpanum character */
                    290:     /* Scan a non-alphanumeric token */
                    291: 
                    292:     *tok++ = *buf_ptr;         /* if it is only a one-character token, it
                    293:                                 * is moved here */
                    294:     *tok = '\0';
                    295:     if (++buf_ptr >= buf_end)
                    296:        fill_buffer();
                    297: 
                    298:     switch (*token) {
                    299:        case '\n':
                    300:            unary_delim = ps.last_u_d;
                    301:            ps.last_nl = true;  /* remember that we just had a newline */
                    302:            code = (had_eof ? 0 : newline);
                    303: 
                    304:            /*
                    305:             * if data has been exausted, the newline is a dummy, and we
                    306:             * should return code to stop 
                    307:             */
                    308:            break;
                    309: 
                    310:        case '\'':              /* start of quoted character */
                    311:        case '"':               /* start of string */
                    312:            qchar = *token;
                    313:            if (troff) {
                    314:                tok[-1] = '`';
                    315:                if (qchar == '"')
                    316:                    *tok++ = '`';
                    317:                *tok++ = BACKSLASH;
                    318:                *tok++ = 'f';
                    319:                *tok++ = 'L';
                    320:            }
                    321:            do {                /* copy the string */
                    322:                while (1) {     /* move one character or [/<char>]<char> */
                    323:                    if (*buf_ptr == '\n') {
                    324:                        printf("%d: Unterminated literal\n", line_no);
                    325:                        goto stop_lit;
                    326:                    }
                    327:                    *tok = *buf_ptr++;
                    328:                    if (buf_ptr >= buf_end)
                    329:                        fill_buffer();
                    330:                    if (had_eof || ((tok - token) > (bufsize - 2))) {
                    331:                        printf("Unterminated literal\n");
                    332:                        ++tok;
                    333:                        goto stop_lit;
                    334:                        /* get outof literal copying loop */
                    335:                    }
                    336:                    if (*tok == BACKSLASH) {    /* if escape, copy extra
                    337:                                                 * char */
                    338:                        if (*buf_ptr == '\n')   /* check for escaped
                    339:                                                 * newline */
                    340:                            ++line_no;
                    341:                        if (troff) {
                    342:                            *++tok = BACKSLASH;
                    343:                            if (*buf_ptr == BACKSLASH)
                    344:                                *++tok = BACKSLASH;
                    345:                        }
                    346:                        *++tok = *buf_ptr++;
                    347:                        ++tok;  /* we must increment this again because we
                    348:                                 * copied two chars */
                    349:                        if (buf_ptr >= buf_end)
                    350:                            fill_buffer();
                    351:                    }
                    352:                    else
                    353:                        break;  /* we copied one character */
                    354:                }               /* end of while (1) */
                    355:            } while (*tok++ != qchar);
                    356:            if (troff) {
                    357:                tok[-1] = BACKSLASH;
                    358:                *tok++ = 'f';
                    359:                *tok++ = 'R';
                    360:                *tok++ = '\'';
                    361:                if (qchar == '"')
                    362:                    *tok++ = '\'';
                    363:            }
                    364:     stop_lit:
                    365:            code = ident;
                    366:            break;
                    367: 
                    368:        case ('('):
                    369:        case ('['):
                    370:            unary_delim = true;
                    371:            code = lparen;
                    372:            break;
                    373: 
                    374:        case (')'):
                    375:        case (']'):
                    376:            code = rparen;
                    377:            break;
                    378: 
                    379:        case '#':
                    380:            unary_delim = ps.last_u_d;
                    381:            code = preesc;
                    382:            break;
                    383: 
                    384:        case '?':
                    385:            unary_delim = true;
                    386:            code = question;
                    387:            break;
                    388: 
                    389:        case (':'):
                    390:            code = colon;
                    391:            unary_delim = true;
                    392:            break;
                    393: 
                    394:        case (';'):
                    395:            unary_delim = true;
                    396:            code = semicolon;
                    397:            break;
                    398: 
                    399:        case ('{'):
                    400:            unary_delim = true;
                    401: 
                    402:            /*
                    403:             * if (ps.in_or_st) ps.block_init = 1; 
                    404:             */
                    405:            code = ps.block_init ? lparen : lbrace;
                    406:            break;
                    407: 
                    408:        case ('}'):
                    409:            unary_delim = true;
                    410:            code = ps.block_init ? rparen : rbrace;
                    411:            break;
                    412: 
                    413:        case 014:               /* a form feed */
                    414:            unary_delim = ps.last_u_d;
                    415:            ps.last_nl = true;  /* remember this so we can set 'ps.col_1'
                    416:                                 * right */
                    417:            code = form_feed;
                    418:            break;
                    419: 
                    420:        case (','):
                    421:            unary_delim = true;
                    422:            code = comma;
                    423:            break;
                    424: 
                    425:        case '.':
                    426:            unary_delim = false;
                    427:            code = period;
                    428:            break;
                    429: 
                    430:        case '-':
                    431:        case '+':               /* check for -, +, --, ++ */
                    432:            code = (ps.last_u_d ? unary_op : binary_op);
                    433:            unary_delim = true;
                    434: 
                    435:            if (*buf_ptr == token[0]) {
                    436:                /* check for doubled character */
                    437:                *tok++ = *buf_ptr++;
                    438:                /* buffer overflow will be checked at end of loop */
                    439:                if (last_code == ident || last_code == rparen) {
                    440:                    code = (ps.last_u_d ? unary_op : postop);
                    441:                    /* check for following ++ or -- */
                    442:                    unary_delim = false;
                    443:                }
                    444:            }
                    445:            else if (*buf_ptr == '=')
                    446:                /* check for operator += */
                    447:                *tok++ = *buf_ptr++;
                    448:            else if (token[0] == '-' && *buf_ptr == '>') {
                    449:                /* check for operator -> */
                    450:                *tok++ = *buf_ptr++;
                    451:                if (!pointer_as_binop) {
                    452:                    code = unary_op;
                    453:                    unary_delim = false;
                    454:                    ps.want_blank = false;
                    455:                }
                    456:            }
                    457:            /* buffer overflow will be checked at end of switch */
                    458: 
                    459:            break;
                    460: 
                    461:        case '=':
                    462:            if (ps.in_or_st)
                    463:                ps.block_init = 1;
                    464:            if (chartype[*buf_ptr] == opchar) { /* we have two char
                    465:                                                 * assignment */
                    466:                tok[-1] = *buf_ptr++;
                    467:                if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr)
                    468:                    *tok++ = *buf_ptr++;
                    469:                *tok++ = '=';   /* Flip =+ to += */
                    470:                *tok = 0;
                    471:            }
                    472:            code = binary_op;
                    473:            unary_delim = true;
                    474:            break;
                    475:            /* can drop thru!!! */
                    476: 
                    477:        case '>':
                    478:        case '<':
                    479:        case '!':               /* ops like <, <<, <=, !=, etc */
                    480:            if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
                    481:                *tok++ = *buf_ptr;
                    482:                if (++buf_ptr >= buf_end)
                    483:                    fill_buffer();
                    484:            }
                    485:            if (*buf_ptr == '=')
                    486:                *tok++ = *buf_ptr++;
                    487:            code = (ps.last_u_d ? unary_op : binary_op);
                    488:            unary_delim = true;
                    489:            break;
                    490: 
                    491:        default:
                    492:            if (token[0] == '/' && *buf_ptr == '*') {
                    493:                /* it is start of comment */
                    494:                *tok++ = '*';
                    495: 
                    496:                if (++buf_ptr >= buf_end)
                    497:                    fill_buffer();
                    498: 
                    499:                code = comment;
                    500:                unary_delim = ps.last_u_d;
                    501:                break;
                    502:            }
                    503:            while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') {
                    504:                /* handle ||, &&, etc, and also things as in int *****i */
                    505:                *tok++ = *buf_ptr;
                    506:                if (++buf_ptr >= buf_end)
                    507:                    fill_buffer();
                    508:            }
                    509:            code = (ps.last_u_d ? unary_op : binary_op);
                    510:            unary_delim = true;
                    511: 
                    512: 
                    513:     }                          /* end of switch */
                    514:     if (code != newline) {
                    515:        l_struct = false;
                    516:        last_code = code;
                    517:     }
                    518:     if (buf_ptr >= buf_end)    /* check for input buffer empty */
                    519:        fill_buffer();
                    520:     ps.last_u_d = unary_delim;
                    521:     *tok = '\0';               /* null terminate the token */
                    522:     return (code);
                    523: };
                    524: 
                    525: /* Add the given keyword to the keyword table, using val as the keyword type
                    526:    */
                    527: addkey (key, val)
                    528: char       *key;
                    529: {
                    530:     register struct templ *p = specials;
                    531:     while (p->rwd)
                    532:        if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
                    533:            return;
                    534:        else
                    535:            p++;
                    536:     if (p >= specials + sizeof specials / sizeof specials[0])
                    537:        return;                 /* For now, table overflows are silently
                    538:                                   ignored */
                    539:     p->rwd = key;
                    540:     p->rwcode = val;
                    541:     p[1].rwd = 0;
                    542:     p[1].rwcode = 0;
                    543:     return;
                    544: }
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.