43BSDTahoe/ucb/indent/lexi.c - annotate

Return to lexi.c CVS log
Up to [CSRG BSD Unix] / 43BSDTahoe / ucb / indent
Annotation of 43BSDTahoe/ucb/indent/lexi.c, revision 1.1.1.1

1.1       root        1: /*
                      2:  * Copyright (c) 1980 Regents of the University of California.
                      3:  * Copyright (c) 1976 Board of Trustees of the University of Illinois.
                      4:  * All rights reserved.
                      5:  *
                      6:  * Redistribution and use in source and binary forms are permitted
                      7:  * provided that the above copyright notice and this paragraph are
                      8:  * duplicated in all such forms and that any documentation,
                      9:  * advertising materials, and other materials related to such
                     10:  * distribution and use acknowledge that the software was developed
                     11:  * by the University of California, Berkeley and the University
                     12:  * of Illinois, Urbana.  The name of either
                     13:  * University may not be used to endorse or promote products derived
                     14:  * from this software without specific prior written permission.
                     15:  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
                     16:  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
                     17:  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
                     18:  */
                     19: 
                     20: #ifndef lint
                     21: static char sccsid[] = "@(#)lexi.c     5.8 (Berkeley) 6/29/88";
                     22: #endif /* not lint */
                     23: 
                     24: /*
                     25:  * NAME:
                     26:  *     lexi
                     27:  *
                     28:  * FUNCTION:
                     29:  *     This is the token scanner for indent
                     30:  *
                     31:  * ALGORITHM:
                     32:  *     1) Strip off intervening blanks and/or tabs.
                     33:  *     2) If it is an alphanumeric token, move it to the token buffer "token".
                     34:  *        Check if it is a special reserved word that indent will want to
                     35:  *        know about.
                     36:  *     3) Non-alphanumeric tokens are handled with a big switch statement.  A
                     37:  *        flag is kept to remember if the last token was a "unary delimiter",
                     38:  *        which forces a following operator to be unary as opposed to binary.
                     39:  *
                     40:  * PARAMETERS:
                     41:  *     None
                     42:  *
                     43:  * RETURNS:
                     44:  *     An integer code indicating the type of token scanned.
                     45:  *
                     46:  * GLOBALS:
                     47:  *     buf_ptr =
                     48:  *     had_eof
                     49:  *     ps.last_u_d =   Set to true iff this token is a "unary delimiter"
                     50:  *
                     51:  * CALLS:
                     52:  *     fill_buffer
                     53:  *     printf (lib)
                     54:  *
                     55:  * CALLED BY:
                     56:  *     main
                     57:  *
                     58:  * NOTES:
                     59:  *     Start of comment is passed back so that the comment can be scanned by
                     60:  *     pr_comment.
                     61:  *
                     62:  *     Strings and character literals are returned just like identifiers.
                     63:  *
                     64:  * HISTORY:
                     65:  *     initial coding  November 1976   D A Willcox of CAC
                     66:  *     1/7/77          D A Willcox of CAC      Fix to provide proper handling
                     67:  *                                             of "int a -1;"
                     68:  *
                     69:  */
                     70: 
                     71: /*
                     72:  * Here we have the token scanner for indent.  It scans off one token and
                     73:  * puts it in the global variable "token".  It returns a code, indicating
                     74:  * the type of token scanned. 
                     75:  */
                     76: 
                     77: #include "indent_globs.h"
                     78: #include "indent_codes.h"
                     79: #include "ctype.h"
                     80: 
                     81: #define alphanum 1
                     82: #define opchar 3
                     83: 
                     84: struct templ {
                     85:     char       *rwd;
                     86:     int         rwcode;
                     87: };
                     88: 
                     89: struct templ specials[100] =
                     90: {
                     91:     "switch", 1,
                     92:     "case", 2,
                     93:     "break", 0,
                     94:     "struct", 3,
                     95:     "union", 3,
                     96:     "enum", 3,
                     97:     "default", 2,
                     98:     "int", 4,
                     99:     "char", 4,
                    100:     "float", 4,
                    101:     "double", 4,
                    102:     "long", 4,
                    103:     "short", 4,
                    104:     "typdef", 4,
                    105:     "unsigned", 4,
                    106:     "register", 4,
                    107:     "static", 4,
                    108:     "global", 4,
                    109:     "extern", 4,
                    110:     "void", 4,
                    111:     "goto", 0,
                    112:     "return", 0,
                    113:     "if", 5,
                    114:     "while", 5,
                    115:     "for", 5,
                    116:     "else", 6,
                    117:     "do", 6,
                    118:     "sizeof", 7,
                    119:     0, 0
                    120: };
                    121: 
                    122: char        chartype[128] =
                    123: {                              /* this is used to facilitate the decision
                    124:                                 * of what type (alphanumeric, operator)
                    125:                                 * each character is */
                    126:     0, 0, 0, 0, 0, 0, 0, 0,
                    127:     0, 0, 0, 0, 0, 0, 0, 0,
                    128:     0, 0, 0, 0, 0, 0, 0, 0,
                    129:     0, 0, 0, 0, 0, 0, 0, 0,
                    130:     0, 3, 0, 0, 1, 3, 3, 0,
                    131:     0, 0, 3, 3, 0, 3, 3, 3,
                    132:     1, 1, 1, 1, 1, 1, 1, 1,
                    133:     1, 1, 0, 0, 3, 3, 3, 3,
                    134:     0, 1, 1, 1, 1, 1, 1, 1,
                    135:     1, 1, 1, 1, 1, 1, 1, 1,
                    136:     1, 1, 1, 1, 1, 1, 1, 1,
                    137:     1, 1, 1, 0, 0, 0, 3, 1,
                    138:     0, 1, 1, 1, 1, 1, 1, 1,
                    139:     1, 1, 1, 1, 1, 1, 1, 1,
                    140:     1, 1, 1, 1, 1, 1, 1, 1,
                    141:     1, 1, 1, 0, 3, 0, 3, 0
                    142: };
                    143: 
                    144: 
                    145: 
                    146: 
                    147: int 
                    148: lexi()
                    149: {
                    150:     register char *tok;                /* local pointer to next char in token */
                    151:     int         unary_delim;   /* this is set to 1 if the current token 
                    152:                                 *
                    153:                                 * forces a following operator to be unary */
                    154:     static int  last_code;     /* the last token type returned */
                    155:     static int  l_struct;      /* set to 1 if the last token was 'struct' */
                    156:     int         code;          /* internal code to be returned */
                    157:     char        qchar;         /* the delimiter character for a string */
                    158: 
                    159:     tok = token;               /* point to start of place to save token */
                    160:     unary_delim = false;
                    161:     ps.col_1 = ps.last_nl;     /* tell world that this token started in
                    162:                                 * column 1 iff the last thing scanned was
                    163:                                 * nl */
                    164:     ps.last_nl = false;
                    165: 
                    166:     while (*buf_ptr == ' ' || *buf_ptr == '\t') {      /* get rid of blanks */
                    167:        ps.col_1 = false;       /* leading blanks imply token is not in
                    168:                                 * column 1 */
                    169:        if (++buf_ptr >= buf_end)
                    170:            fill_buffer();
                    171:     }
                    172: 
                    173:     /* Scan an alphanumeric token.  Note that we must also handle
                    174:      * stuff like "1.0e+03" and "7e-6". */
                    175:     if (chartype[*buf_ptr & 0177] == alphanum) {       /* we have a character
                    176:                                                         * or number */
                    177:        register char *j;       /* used for searching thru list of 
                    178:                                 * reserved words */
                    179:        register struct templ *p;
                    180:        register int c;
                    181: 
                    182:        do {                    /* copy it over */
                    183:            *tok++ = *buf_ptr++;
                    184:            if (buf_ptr >= buf_end)
                    185:                fill_buffer();
                    186:        } while (chartype[c = *buf_ptr & 0177] == alphanum ||
                    187:                isdigit(token[0]) && (c == '+' || c == '-') &&
                    188:                (tok[-1] == 'e' || tok[-1] == 'E'));
                    189:        *tok++ = '\0';
                    190:        while (*buf_ptr == ' ' || *buf_ptr == '\t') {   /* get rid of blanks */
                    191:            if (++buf_ptr >= buf_end)
                    192:                fill_buffer();
                    193:        }
                    194:        ps.its_a_keyword = false;
                    195:        ps.sizeof_keyword = false;
                    196:        if (l_struct) {         /* if last token was 'struct', then this
                    197:                                 * token should be treated as a
                    198:                                 * declaration */
                    199:            l_struct = false;
                    200:            last_code = ident;
                    201:            ps.last_u_d = true;
                    202:            return (decl);
                    203:        }
                    204:        ps.last_u_d = false;    /* Operator after indentifier is binary */
                    205:        last_code = ident;      /* Remember that this is the code we will
                    206:                                 * return */
                    207: 
                    208:        /*
                    209:         * This loop will check if the token is a keyword. 
                    210:         */
                    211:        for (p = specials; (j = p->rwd) != 0; p++) {
                    212:            tok = token;        /* point at scanned token */
                    213:            if (*j++ != *tok++ || *j++ != *tok++)
                    214:                continue;       /* This test depends on the fact that
                    215:                                 * identifiers are always at least 1
                    216:                                 * character long (ie. the first two bytes
                    217:                                 * of the identifier are always
                    218:                                 * meaningful) */
                    219:            if (tok[-1] == 0)
                    220:                break;          /* If its a one-character identifier */
                    221:            while (*tok++ == *j)
                    222:                if (*j++ == 0)
                    223:                    goto found_keyword; /* I wish that C had a multi-level
                    224:                                         * break... */
                    225:        }
                    226:        if (p->rwd) {           /* we have a keyword */
                    227:     found_keyword:
                    228:            ps.its_a_keyword = true;
                    229:            ps.last_u_d = true;
                    230:            switch (p->rwcode) {
                    231:                case 1: /* it is a switch */
                    232:                    return (swstmt);
                    233:                case 2: /* a case or default */
                    234:                    return (casestmt);
                    235: 
                    236:                case 3: /* a "struct" */
                    237:                    if (ps.p_l_follow)
                    238:                        break;  /* inside parens: cast */
                    239:                    l_struct = true;
                    240: 
                    241:                    /*
                    242:                     * Next time around, we will want to know that we have
                    243:                     * had a 'struct' 
                    244:                     */
                    245:                case 4: /* one of the declaration keywords */
                    246:                    if (ps.p_l_follow) {
                    247:                        ps.cast_mask |= 1 << ps.p_l_follow;
                    248:                        break;  /* inside parens: cast */
                    249:                    }
                    250:                    last_code = decl;
                    251:                    return (decl);
                    252: 
                    253:                case 5: /* if, while, for */
                    254:                    return (sp_paren);
                    255: 
                    256:                case 6: /* do, else */
                    257:                    return (sp_nparen);
                    258: 
                    259:                case 7:
                    260:                    ps.sizeof_keyword = true;
                    261:                default:        /* all others are treated like any other
                    262:                                 * identifier */
                    263:                    return (ident);
                    264:            }                   /* end of switch */
                    265:        }                       /* end of if (found_it) */
                    266:        if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0
                    267:            && (buf_ptr[1] != ')' || buf_ptr[2] != ';')) {
                    268:            strncpy(ps.procname, token, sizeof ps.procname - 1);
                    269:            ps.in_parameter_declaration = 1;
                    270:        }
                    271: 
                    272:        /*
                    273:         * The following hack attempts to guess whether or not the current
                    274:         * token is in fact a declaration keyword -- one that has been
                    275:         * typedefd 
                    276:         */
                    277:        if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr))
                    278:            && !ps.p_l_follow
                    279:            && (ps.last_token == rparen || ps.last_token == semicolon ||
                    280:                ps.last_token == decl ||
                    281:                ps.last_token == lbrace || ps.last_token == rbrace)) {
                    282:            ps.its_a_keyword = true;
                    283:            ps.last_u_d = true;
                    284:            last_code = decl;
                    285:            return decl;
                    286:        }
                    287:        if (last_code == decl)  /* if this is a declared variable, then
                    288:                                 * following sign is unary */
                    289:            ps.last_u_d = true; /* will make "int a -1" work */
                    290:        last_code = ident;
                    291:        return (ident);         /* the ident is not in the list */
                    292:     }                          /* end of procesing for alpanum character */
                    293:     /* Scan a non-alphanumeric token */
                    294: 
                    295:     *tok++ = *buf_ptr;         /* if it is only a one-character token, it
                    296:                                 * is moved here */
                    297:     *tok = '\0';
                    298:     if (++buf_ptr >= buf_end)
                    299:        fill_buffer();
                    300: 
                    301:     switch (*token) {
                    302:        case '\n':
                    303:            unary_delim = ps.last_u_d;
                    304:            ps.last_nl = true;  /* remember that we just had a newline */
                    305:            code = (had_eof ? 0 : newline);
                    306: 
                    307:            /*
                    308:             * if data has been exausted, the newline is a dummy, and we
                    309:             * should return code to stop 
                    310:             */
                    311:            break;
                    312: 
                    313:        case '\'':              /* start of quoted character */
                    314:        case '"':               /* start of string */
                    315:            qchar = *token;
                    316:            if (troff) {
                    317:                tok[-1] = '`';
                    318:                if (qchar == '"')
                    319:                    *tok++ = '`';
                    320:                *tok++ = BACKSLASH;
                    321:                *tok++ = 'f';
                    322:                *tok++ = 'L';
                    323:            }
                    324:            do {                /* copy the string */
                    325:                while (1) {     /* move one character or [/<char>]<char> */
                    326:                    if (*buf_ptr == '\n') {
                    327:                        printf("%d: Unterminated literal\n", line_no);
                    328:                        goto stop_lit;
                    329:                    }
                    330:                    *tok = *buf_ptr++;
                    331:                    if (buf_ptr >= buf_end)
                    332:                        fill_buffer();
                    333:                    if (had_eof || ((tok - token) > (bufsize - 2))) {
                    334:                        printf("Unterminated literal\n");
                    335:                        ++tok;
                    336:                        goto stop_lit;
                    337:                        /* get outof literal copying loop */
                    338:                    }
                    339:                    if (*tok == BACKSLASH) {    /* if escape, copy extra
                    340:                                                 * char */
                    341:                        if (*buf_ptr == '\n')   /* check for escaped
                    342:                                                 * newline */
                    343:                            ++line_no;
                    344:                        if (troff) {
                    345:                            *++tok = BACKSLASH;
                    346:                            if (*buf_ptr == BACKSLASH)
                    347:                                *++tok = BACKSLASH;
                    348:                        }
                    349:                        *++tok = *buf_ptr++;
                    350:                        ++tok;  /* we must increment this again because we
                    351:                                 * copied two chars */
                    352:                        if (buf_ptr >= buf_end)
                    353:                            fill_buffer();
                    354:                    }
                    355:                    else
                    356:                        break;  /* we copied one character */
                    357:                }               /* end of while (1) */
                    358:            } while (*tok++ != qchar);
                    359:            if (troff) {
                    360:                tok[-1] = BACKSLASH;
                    361:                *tok++ = 'f';
                    362:                *tok++ = 'R';
                    363:                *tok++ = '\'';
                    364:                if (qchar == '"')
                    365:                    *tok++ = '\'';
                    366:            }
                    367:     stop_lit:
                    368:            code = ident;
                    369:            break;
                    370: 
                    371:        case ('('):
                    372:        case ('['):
                    373:            unary_delim = true;
                    374:            code = lparen;
                    375:            break;
                    376: 
                    377:        case (')'):
                    378:        case (']'):
                    379:            code = rparen;
                    380:            break;
                    381: 
                    382:        case '#':
                    383:            unary_delim = ps.last_u_d;
                    384:            code = preesc;
                    385:            break;
                    386: 
                    387:        case '?':
                    388:            unary_delim = true;
                    389:            code = question;
                    390:            break;
                    391: 
                    392:        case (':'):
                    393:            code = colon;
                    394:            unary_delim = true;
                    395:            break;
                    396: 
                    397:        case (';'):
                    398:            unary_delim = true;
                    399:            code = semicolon;
                    400:            break;
                    401: 
                    402:        case ('{'):
                    403:            unary_delim = true;
                    404: 
                    405:            /*
                    406:             * if (ps.in_or_st) ps.block_init = 1; 
                    407:             */
                    408:            code = ps.block_init ? lparen : lbrace;
                    409:            break;
                    410: 
                    411:        case ('}'):
                    412:            unary_delim = true;
                    413:            code = ps.block_init ? rparen : rbrace;
                    414:            break;
                    415: 
                    416:        case 014:               /* a form feed */
                    417:            unary_delim = ps.last_u_d;
                    418:            ps.last_nl = true;  /* remember this so we can set 'ps.col_1'
                    419:                                 * right */
                    420:            code = form_feed;
                    421:            break;
                    422: 
                    423:        case (','):
                    424:            unary_delim = true;
                    425:            code = comma;
                    426:            break;
                    427: 
                    428:        case '.':
                    429:            unary_delim = false;
                    430:            code = period;
                    431:            break;
                    432: 
                    433:        case '-':
                    434:        case '+':               /* check for -, +, --, ++ */
                    435:            code = (ps.last_u_d ? unary_op : binary_op);
                    436:            unary_delim = true;
                    437: 
                    438:            if (*buf_ptr == token[0]) {
                    439:                /* check for doubled character */
                    440:                *tok++ = *buf_ptr++;
                    441:                /* buffer overflow will be checked at end of loop */
                    442:                if (last_code == ident || last_code == rparen) {
                    443:                    code = (ps.last_u_d ? unary_op : postop);
                    444:                    /* check for following ++ or -- */
                    445:                    unary_delim = false;
                    446:                }
                    447:            }
                    448:            else if (*buf_ptr == '=')
                    449:                /* check for operator += */
                    450:                *tok++ = *buf_ptr++;
                    451:            else if (token[0] == '-' && *buf_ptr == '>') {
                    452:                /* check for operator -> */
                    453:                *tok++ = *buf_ptr++;
                    454:                if (!pointer_as_binop) {
                    455:                    code = unary_op;
                    456:                    unary_delim = false;
                    457:                    ps.want_blank = false;
                    458:                }
                    459:            }
                    460:            /* buffer overflow will be checked at end of switch */
                    461: 
                    462:            break;
                    463: 
                    464:        case '=':
                    465:            if (ps.in_or_st)
                    466:                ps.block_init = 1;
                    467:            if (chartype[*buf_ptr] == opchar) { /* we have two char
                    468:                                                 * assignment */
                    469:                tok[-1] = *buf_ptr++;
                    470:                if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr)
                    471:                    *tok++ = *buf_ptr++;
                    472:                *tok++ = '=';   /* Flip =+ to += */
                    473:                *tok = 0;
                    474:            }
                    475:            code = binary_op;
                    476:            unary_delim = true;
                    477:            break;
                    478:            /* can drop thru!!! */
                    479: 
                    480:        case '>':
                    481:        case '<':
                    482:        case '!':               /* ops like <, <<, <=, !=, etc */
                    483:            if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
                    484:                *tok++ = *buf_ptr;
                    485:                if (++buf_ptr >= buf_end)
                    486:                    fill_buffer();
                    487:            }
                    488:            if (*buf_ptr == '=')
                    489:                *tok++ = *buf_ptr++;
                    490:            code = (ps.last_u_d ? unary_op : binary_op);
                    491:            unary_delim = true;
                    492:            break;
                    493: 
                    494:        default:
                    495:            if (token[0] == '/' && *buf_ptr == '*') {
                    496:                /* it is start of comment */
                    497:                *tok++ = '*';
                    498: 
                    499:                if (++buf_ptr >= buf_end)
                    500:                    fill_buffer();
                    501: 
                    502:                code = comment;
                    503:                unary_delim = ps.last_u_d;
                    504:                break;
                    505:            }
                    506:            while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') {
                    507:                /* handle ||, &&, etc, and also things as in int *****i */
                    508:                *tok++ = *buf_ptr;
                    509:                if (++buf_ptr >= buf_end)
                    510:                    fill_buffer();
                    511:            }
                    512:            code = (ps.last_u_d ? unary_op : binary_op);
                    513:            unary_delim = true;
                    514: 
                    515: 
                    516:     }                          /* end of switch */
                    517:     if (code != newline) {
                    518:        l_struct = false;
                    519:        last_code = code;
                    520:     }
                    521:     if (buf_ptr >= buf_end)    /* check for input buffer empty */
                    522:        fill_buffer();
                    523:     ps.last_u_d = unary_delim;
                    524:     *tok = '\0';               /* null terminate the token */
                    525:     return (code);
                    526: };
                    527: 
                    528: /* Add the given keyword to the keyword table, using val as the keyword type
                    529:    */
                    530: addkey (key, val)
                    531: char       *key;
                    532: {
                    533:     register struct templ *p = specials;
                    534:     while (p->rwd)
                    535:        if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
                    536:            return;
                    537:        else
                    538:            p++;
                    539:     if (p >= specials + sizeof specials / sizeof specials[0])
                    540:        return;                 /* For now, table overflows are silently
                    541:                                   ignored */
                    542:     p->rwd = key;
                    543:     p->rwcode = val;
                    544:     p[1].rwd = 0;
                    545:     p[1].rwcode = 0;
                    546:     return;
                    547: }
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.