43BSDReno/pgrm/indent/lexi.c - annotate

Return to lexi.c CVS log
Up to [CSRG BSD Unix] / 43BSDReno / pgrm / indent
Annotation of 43BSDReno/pgrm/indent/lexi.c, revision 1.1.1.1

1.1       root        1: /*
                      2:  * Copyright (c) 1985 Sun Microsystems, Inc.
                      3:  * Copyright (c) 1980 The Regents of the University of California.
                      4:  * Copyright (c) 1976 Board of Trustees of the University of Illinois.
                      5:  * All rights reserved.
                      6:  *
                      7:  * Redistribution and use in source and binary forms are permitted
                      8:  * provided that: (1) source distributions retain this entire copyright
                      9:  * notice and comment, and (2) distributions including binaries display
                     10:  * the following acknowledgement:  ``This product includes software
                     11:  * developed by the University of California, Berkeley and its contributors''
                     12:  * in the documentation or other materials provided with the distribution
                     13:  * and in all advertising materials mentioning features or use of this
                     14:  * software. Neither the name of the University nor the names of its
                     15:  * contributors may be used to endorse or promote products derived
                     16:  * from this software without specific prior written permission.
                     17:  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
                     18:  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
                     19:  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
                     20:  */
                     21: 
                     22: #ifndef lint
                     23: static char sccsid[] = "@(#)lexi.c     5.15 (Berkeley) 6/1/90";
                     24: #endif /* not lint */
                     25: 
                     26: /*
                     27:  * Here we have the token scanner for indent.  It scans off one token and puts
                     28:  * it in the global variable "token".  It returns a code, indicating the type
                     29:  * of token scanned.
                     30:  */
                     31: 
                     32: #include "indent_globs.h"
                     33: #include "indent_codes.h"
                     34: #include <ctype.h>
                     35: 
                     36: #define alphanum 1
                     37: #define opchar 3
                     38: 
                     39: struct templ {
                     40:     char       *rwd;
                     41:     int         rwcode;
                     42: };
                     43: 
                     44: struct templ specials[100] =
                     45: {
                     46:     "switch", 1,
                     47:     "case", 2,
                     48:     "break", 0,
                     49:     "struct", 3,
                     50:     "union", 3,
                     51:     "enum", 3,
                     52:     "default", 2,
                     53:     "int", 4,
                     54:     "char", 4,
                     55:     "float", 4,
                     56:     "double", 4,
                     57:     "long", 4,
                     58:     "short", 4,
                     59:     "typdef", 4,
                     60:     "unsigned", 4,
                     61:     "register", 4,
                     62:     "static", 4,
                     63:     "global", 4,
                     64:     "extern", 4,
                     65:     "void", 4,
                     66:     "goto", 0,
                     67:     "return", 0,
                     68:     "if", 5,
                     69:     "while", 5,
                     70:     "for", 5,
                     71:     "else", 6,
                     72:     "do", 6,
                     73:     "sizeof", 7,
                     74:     0, 0
                     75: };
                     76: 
                     77: char        chartype[128] =
                     78: {                              /* this is used to facilitate the decision of
                     79:                                 * what type (alphanumeric, operator) each
                     80:                                 * character is */
                     81:     0, 0, 0, 0, 0, 0, 0, 0,
                     82:     0, 0, 0, 0, 0, 0, 0, 0,
                     83:     0, 0, 0, 0, 0, 0, 0, 0,
                     84:     0, 0, 0, 0, 0, 0, 0, 0,
                     85:     0, 3, 0, 0, 1, 3, 3, 0,
                     86:     0, 0, 3, 3, 0, 3, 0, 3,
                     87:     1, 1, 1, 1, 1, 1, 1, 1,
                     88:     1, 1, 0, 0, 3, 3, 3, 3,
                     89:     0, 1, 1, 1, 1, 1, 1, 1,
                     90:     1, 1, 1, 1, 1, 1, 1, 1,
                     91:     1, 1, 1, 1, 1, 1, 1, 1,
                     92:     1, 1, 1, 0, 0, 0, 3, 1,
                     93:     0, 1, 1, 1, 1, 1, 1, 1,
                     94:     1, 1, 1, 1, 1, 1, 1, 1,
                     95:     1, 1, 1, 1, 1, 1, 1, 1,
                     96:     1, 1, 1, 0, 3, 0, 3, 0
                     97: };
                     98: 
                     99: 
                    100: 
                    101: 
                    102: int
                    103: lexi()
                    104: {
                    105:     int         unary_delim;   /* this is set to 1 if the current token
                    106:                                 * 
                    107:                                 * forces a following operator to be unary */
                    108:     static int  last_code;     /* the last token type returned */
                    109:     static int  l_struct;      /* set to 1 if the last token was 'struct' */
                    110:     int         code;          /* internal code to be returned */
                    111:     char        qchar;         /* the delimiter character for a string */
                    112: 
                    113:     e_token = s_token;         /* point to start of place to save token */
                    114:     unary_delim = false;
                    115:     ps.col_1 = ps.last_nl;     /* tell world that this token started in
                    116:                                 * column 1 iff the last thing scanned was nl */
                    117:     ps.last_nl = false;
                    118: 
                    119:     while (*buf_ptr == ' ' || *buf_ptr == '\t') {      /* get rid of blanks */
                    120:        ps.col_1 = false;       /* leading blanks imply token is not in column
                    121:                                 * 1 */
                    122:        if (++buf_ptr >= buf_end)
                    123:            fill_buffer();
                    124:     }
                    125: 
                    126:     /* Scan an alphanumeric token */
                    127:     if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
                    128:        /*
                    129:         * we have a character or number
                    130:         */
                    131:        register char *j;       /* used for searching thru list of
                    132:                                 * 
                    133:                                 * reserved words */
                    134:        register struct templ *p;
                    135: 
                    136:        if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
                    137:            int         seendot = 0,
                    138:                        seenexp = 0;
                    139:            if (*buf_ptr == '0' &&
                    140:                    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
                    141:                *e_token++ = *buf_ptr++;
                    142:                *e_token++ = *buf_ptr++;
                    143:                while (isxdigit(*buf_ptr)) {
                    144:                    CHECK_SIZE_TOKEN;
                    145:                    *e_token++ = *buf_ptr++;
                    146:                }
                    147:            }
                    148:            else
                    149:                while (1) {
                    150:                    if (*buf_ptr == '.')
                    151:                        if (seendot)
                    152:                            break;
                    153:                        else
                    154:                            seendot++;
                    155:                    CHECK_SIZE_TOKEN;
                    156:                    *e_token++ = *buf_ptr++;
                    157:                    if (!isdigit(*buf_ptr) && *buf_ptr != '.')
                    158:                        if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
                    159:                            break;
                    160:                        else {
                    161:                            seenexp++;
                    162:                            seendot++;
                    163:                            CHECK_SIZE_TOKEN;
                    164:                            *e_token++ = *buf_ptr++;
                    165:                            if (*buf_ptr == '+' || *buf_ptr == '-')
                    166:                                *e_token++ = *buf_ptr++;
                    167:                        }
                    168:                }
                    169:            if (*buf_ptr == 'L' || *buf_ptr == 'l')
                    170:                *e_token++ = *buf_ptr++;
                    171:        }
                    172:        else
                    173:            while (chartype[*buf_ptr] == alphanum) {    /* copy it over */
                    174:                CHECK_SIZE_TOKEN;
                    175:                *e_token++ = *buf_ptr++;
                    176:                if (buf_ptr >= buf_end)
                    177:                    fill_buffer();
                    178:            }
                    179:        *e_token++ = '\0';
                    180:        while (*buf_ptr == ' ' || *buf_ptr == '\t') {   /* get rid of blanks */
                    181:            if (++buf_ptr >= buf_end)
                    182:                fill_buffer();
                    183:        }
                    184:        ps.its_a_keyword = false;
                    185:        ps.sizeof_keyword = false;
                    186:        if (l_struct) {         /* if last token was 'struct', then this token
                    187:                                 * should be treated as a declaration */
                    188:            l_struct = false;
                    189:            last_code = ident;
                    190:            ps.last_u_d = true;
                    191:            return (decl);
                    192:        }
                    193:        ps.last_u_d = false;    /* Operator after indentifier is binary */
                    194:        last_code = ident;      /* Remember that this is the code we will
                    195:                                 * return */
                    196: 
                    197:        /*
                    198:         * This loop will check if the token is a keyword.
                    199:         */
                    200:        for (p = specials; (j = p->rwd) != 0; p++) {
                    201:            register char *p = s_token; /* point at scanned token */
                    202:            if (*j++ != *p++ || *j++ != *p++)
                    203:                continue;       /* This test depends on the fact that
                    204:                                 * identifiers are always at least 1 character
                    205:                                 * long (ie. the first two bytes of the
                    206:                                 * identifier are always meaningful) */
                    207:            if (p[-1] == 0)
                    208:                break;          /* If its a one-character identifier */
                    209:            while (*p++ == *j)
                    210:                if (*j++ == 0)
                    211:                    goto found_keyword; /* I wish that C had a multi-level
                    212:                                         * break... */
                    213:        }
                    214:        if (p->rwd) {           /* we have a keyword */
                    215:     found_keyword:
                    216:            ps.its_a_keyword = true;
                    217:            ps.last_u_d = true;
                    218:            switch (p->rwcode) {
                    219:            case 1:             /* it is a switch */
                    220:                return (swstmt);
                    221:            case 2:             /* a case or default */
                    222:                return (casestmt);
                    223: 
                    224:            case 3:             /* a "struct" */
                    225:                if (ps.p_l_follow)
                    226:                    break;      /* inside parens: cast */
                    227:                l_struct = true;
                    228: 
                    229:                /*
                    230:                 * Next time around, we will want to know that we have had a
                    231:                 * 'struct'
                    232:                 */
                    233:            case 4:             /* one of the declaration keywords */
                    234:                if (ps.p_l_follow) {
                    235:                    ps.cast_mask |= 1 << ps.p_l_follow;
                    236:                    break;      /* inside parens: cast */
                    237:                }
                    238:                last_code = decl;
                    239:                return (decl);
                    240: 
                    241:            case 5:             /* if, while, for */
                    242:                return (sp_paren);
                    243: 
                    244:            case 6:             /* do, else */
                    245:                return (sp_nparen);
                    246: 
                    247:            case 7:
                    248:                ps.sizeof_keyword = true;
                    249:            default:            /* all others are treated like any other
                    250:                                 * identifier */
                    251:                return (ident);
                    252:            }                   /* end of switch */
                    253:        }                       /* end of if (found_it) */
                    254:        if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
                    255:            register char *tp = buf_ptr;
                    256:            while (tp < buf_end)
                    257:                if (*tp++ == ')' && (*tp == ';' || *tp == ','))
                    258:                    goto not_proc;
                    259:            strncpy(ps.procname, token, sizeof ps.procname - 1);
                    260:            ps.in_parameter_declaration = 1;
                    261:            rparen_count = 1;
                    262:     not_proc:;
                    263:        }
                    264:        /*
                    265:         * The following hack attempts to guess whether or not the current
                    266:         * token is in fact a declaration keyword -- one that has been
                    267:         * typedefd
                    268:         */
                    269:        if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
                    270:                && !ps.p_l_follow
                    271:                && !ps.block_init
                    272:                && (ps.last_token == rparen || ps.last_token == semicolon ||
                    273:                    ps.last_token == decl ||
                    274:                    ps.last_token == lbrace || ps.last_token == rbrace)) {
                    275:            ps.its_a_keyword = true;
                    276:            ps.last_u_d = true;
                    277:            last_code = decl;
                    278:            return decl;
                    279:        }
                    280:        if (last_code == decl)  /* if this is a declared variable, then
                    281:                                 * following sign is unary */
                    282:            ps.last_u_d = true; /* will make "int a -1" work */
                    283:        last_code = ident;
                    284:        return (ident);         /* the ident is not in the list */
                    285:     }                          /* end of procesing for alpanum character */
                    286: 
                    287:     /* Scan a non-alphanumeric token */
                    288: 
                    289:     *e_token++ = *buf_ptr;             /* if it is only a one-character token, it is
                    290:                                 * moved here */
                    291:     *e_token = '\0';
                    292:     if (++buf_ptr >= buf_end)
                    293:        fill_buffer();
                    294: 
                    295:     switch (*token) {
                    296:     case '\n':
                    297:        unary_delim = ps.last_u_d;
                    298:        ps.last_nl = true;      /* remember that we just had a newline */
                    299:        code = (had_eof ? 0 : newline);
                    300: 
                    301:        /*
                    302:         * if data has been exausted, the newline is a dummy, and we should
                    303:         * return code to stop
                    304:         */
                    305:        break;
                    306: 
                    307:     case '\'':                 /* start of quoted character */
                    308:     case '"':                  /* start of string */
                    309:        qchar = *token;
                    310:        if (troff) {
                    311:            e_token[-1] = '`';
                    312:            if (qchar == '"')
                    313:                *e_token++ = '`';
                    314:            e_token = chfont(&bodyf, &stringf, e_token);
                    315:        }
                    316:        do {                    /* copy the string */
                    317:            while (1) {         /* move one character or [/<char>]<char> */
                    318:                if (*buf_ptr == '\n') {
                    319:                    printf("%d: Unterminated literal\n", line_no);
                    320:                    goto stop_lit;
                    321:                }
                    322:                CHECK_SIZE_TOKEN;       /* Only have to do this once in this loop,
                    323:                                         * since CHECK_SIZE guarantees that there
                    324:                                         * are at least 5 entries left */
                    325:                *e_token = *buf_ptr++;
                    326:                if (buf_ptr >= buf_end)
                    327:                    fill_buffer();
                    328:                if (*e_token == BACKSLASH) {    /* if escape, copy extra char */
                    329:                    if (*buf_ptr == '\n')       /* check for escaped newline */
                    330:                        ++line_no;
                    331:                    if (troff) {
                    332:                        *++e_token = BACKSLASH;
                    333:                        if (*buf_ptr == BACKSLASH)
                    334:                            *++e_token = BACKSLASH;
                    335:                    }
                    336:                    *++e_token = *buf_ptr++;
                    337:                    ++e_token;  /* we must increment this again because we
                    338:                                 * copied two chars */
                    339:                    if (buf_ptr >= buf_end)
                    340:                        fill_buffer();
                    341:                }
                    342:                else
                    343:                    break;      /* we copied one character */
                    344:            }                   /* end of while (1) */
                    345:        } while (*e_token++ != qchar);
                    346:        if (troff) {
                    347:            e_token = chfont(&stringf, &bodyf, e_token - 1);
                    348:            if (qchar == '"')
                    349:                *e_token++ = '\'';
                    350:        }
                    351: stop_lit:
                    352:        code = ident;
                    353:        break;
                    354: 
                    355:     case ('('):
                    356:     case ('['):
                    357:        unary_delim = true;
                    358:        code = lparen;
                    359:        break;
                    360: 
                    361:     case (')'):
                    362:     case (']'):
                    363:        code = rparen;
                    364:        break;
                    365: 
                    366:     case '#':
                    367:        unary_delim = ps.last_u_d;
                    368:        code = preesc;
                    369:        break;
                    370: 
                    371:     case '?':
                    372:        unary_delim = true;
                    373:        code = question;
                    374:        break;
                    375: 
                    376:     case (':'):
                    377:        code = colon;
                    378:        unary_delim = true;
                    379:        break;
                    380: 
                    381:     case (';'):
                    382:        unary_delim = true;
                    383:        code = semicolon;
                    384:        break;
                    385: 
                    386:     case ('{'):
                    387:        unary_delim = true;
                    388: 
                    389:        /*
                    390:         * if (ps.in_or_st) ps.block_init = 1;
                    391:         */
                    392:        /* ?    code = ps.block_init ? lparen : lbrace; */
                    393:        code = lbrace;
                    394:        break;
                    395: 
                    396:     case ('}'):
                    397:        unary_delim = true;
                    398:        /* ?    code = ps.block_init ? rparen : rbrace; */
                    399:        code = rbrace;
                    400:        break;
                    401: 
                    402:     case 014:                  /* a form feed */
                    403:        unary_delim = ps.last_u_d;
                    404:        ps.last_nl = true;      /* remember this so we can set 'ps.col_1'
                    405:                                 * right */
                    406:        code = form_feed;
                    407:        break;
                    408: 
                    409:     case (','):
                    410:        unary_delim = true;
                    411:        code = comma;
                    412:        break;
                    413: 
                    414:     case '.':
                    415:        unary_delim = false;
                    416:        code = period;
                    417:        break;
                    418: 
                    419:     case '-':
                    420:     case '+':                  /* check for -, +, --, ++ */
                    421:        code = (ps.last_u_d ? unary_op : binary_op);
                    422:        unary_delim = true;
                    423: 
                    424:        if (*buf_ptr == token[0]) {
                    425:            /* check for doubled character */
                    426:            *e_token++ = *buf_ptr++;
                    427:            /* buffer overflow will be checked at end of loop */
                    428:            if (last_code == ident || last_code == rparen) {
                    429:                code = (ps.last_u_d ? unary_op : postop);
                    430:                /* check for following ++ or -- */
                    431:                unary_delim = false;
                    432:            }
                    433:        }
                    434:        else if (*buf_ptr == '=')
                    435:            /* check for operator += */
                    436:            *e_token++ = *buf_ptr++;
                    437:        else if (*buf_ptr == '>') {
                    438:            /* check for operator -> */
                    439:            *e_token++ = *buf_ptr++;
                    440:            if (!pointer_as_binop) {
                    441:                unary_delim = false;
                    442:                code = unary_op;
                    443:                ps.want_blank = false;
                    444:            }
                    445:        }
                    446:        break;                  /* buffer overflow will be checked at end of
                    447:                                 * switch */
                    448: 
                    449:     case '=':
                    450:        if (ps.in_or_st)
                    451:            ps.block_init = 1;
                    452: #ifdef undef
                    453:        if (chartype[*buf_ptr] == opchar) {     /* we have two char assignment */
                    454:            e_token[-1] = *buf_ptr++;
                    455:            if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
                    456:                *e_token++ = *buf_ptr++;
                    457:            *e_token++ = '=';   /* Flip =+ to += */
                    458:            *e_token = 0;
                    459:        }
                    460: #else
                    461:        if (*buf_ptr == '=') {/* == */
                    462:            *e_token++ = '=';   /* Flip =+ to += */
                    463:            buf_ptr++;
                    464:            *e_token = 0;
                    465:        }
                    466: #endif
                    467:        code = binary_op;
                    468:        unary_delim = true;
                    469:        break;
                    470:        /* can drop thru!!! */
                    471: 
                    472:     case '>':
                    473:     case '<':
                    474:     case '!':                  /* ops like <, <<, <=, !=, etc */
                    475:        if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
                    476:            *e_token++ = *buf_ptr;
                    477:            if (++buf_ptr >= buf_end)
                    478:                fill_buffer();
                    479:        }
                    480:        if (*buf_ptr == '=')
                    481:            *e_token++ = *buf_ptr++;
                    482:        code = (ps.last_u_d ? unary_op : binary_op);
                    483:        unary_delim = true;
                    484:        break;
                    485: 
                    486:     default:
                    487:        if (token[0] == '/' && *buf_ptr == '*') {
                    488:            /* it is start of comment */
                    489:            *e_token++ = '*';
                    490: 
                    491:            if (++buf_ptr >= buf_end)
                    492:                fill_buffer();
                    493: 
                    494:            code = comment;
                    495:            unary_delim = ps.last_u_d;
                    496:            break;
                    497:        }
                    498:        while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
                    499:            /*
                    500:             * handle ||, &&, etc, and also things as in int *****i
                    501:             */
                    502:            *e_token++ = *buf_ptr;
                    503:            if (++buf_ptr >= buf_end)
                    504:                fill_buffer();
                    505:        }
                    506:        code = (ps.last_u_d ? unary_op : binary_op);
                    507:        unary_delim = true;
                    508: 
                    509: 
                    510:     }                          /* end of switch */
                    511:     if (code != newline) {
                    512:        l_struct = false;
                    513:        last_code = code;
                    514:     }
                    515:     if (buf_ptr >= buf_end)    /* check for input buffer empty */
                    516:        fill_buffer();
                    517:     ps.last_u_d = unary_delim;
                    518:     *e_token = '\0';           /* null terminate the token */
                    519:     return (code);
                    520: }
                    521: 
                    522: /*
                    523:  * Add the given keyword to the keyword table, using val as the keyword type
                    524:  */
                    525: addkey(key, val)
                    526:     char       *key;
                    527: {
                    528:     register struct templ *p = specials;
                    529:     while (p->rwd)
                    530:        if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
                    531:            return;
                    532:        else
                    533:            p++;
                    534:     if (p >= specials + sizeof specials / sizeof specials[0])
                    535:        return;                 /* For now, table overflows are silently
                    536:                                 * ignored */
                    537:     p->rwd = key;
                    538:     p->rwcode = val;
                    539:     p[1].rwd = 0;
                    540:     p[1].rwcode = 0;
                    541:     return;
                    542: }
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.