43BSDReno/pgrm/indent/lexi.c - annotate

Return to lexi.c CVS log
Up to [CSRG BSD Unix] / 43BSDReno / pgrm / indent
Annotation of 43BSDReno/pgrm/indent/lexi.c, revision 1.1

1.1     ! root        1: /*
        !             2:  * Copyright (c) 1985 Sun Microsystems, Inc.
        !             3:  * Copyright (c) 1980 The Regents of the University of California.
        !             4:  * Copyright (c) 1976 Board of Trustees of the University of Illinois.
        !             5:  * All rights reserved.
        !             6:  *
        !             7:  * Redistribution and use in source and binary forms are permitted
        !             8:  * provided that: (1) source distributions retain this entire copyright
        !             9:  * notice and comment, and (2) distributions including binaries display
        !            10:  * the following acknowledgement:  ``This product includes software
        !            11:  * developed by the University of California, Berkeley and its contributors''
        !            12:  * in the documentation or other materials provided with the distribution
        !            13:  * and in all advertising materials mentioning features or use of this
        !            14:  * software. Neither the name of the University nor the names of its
        !            15:  * contributors may be used to endorse or promote products derived
        !            16:  * from this software without specific prior written permission.
        !            17:  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
        !            18:  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
        !            19:  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
        !            20:  */
        !            21: 
        !            22: #ifndef lint
        !            23: static char sccsid[] = "@(#)lexi.c     5.15 (Berkeley) 6/1/90";
        !            24: #endif /* not lint */
        !            25: 
        !            26: /*
        !            27:  * Here we have the token scanner for indent.  It scans off one token and puts
        !            28:  * it in the global variable "token".  It returns a code, indicating the type
        !            29:  * of token scanned.
        !            30:  */
        !            31: 
        !            32: #include "indent_globs.h"
        !            33: #include "indent_codes.h"
        !            34: #include <ctype.h>
        !            35: 
        !            36: #define alphanum 1
        !            37: #define opchar 3
        !            38: 
        !            39: struct templ {
        !            40:     char       *rwd;
        !            41:     int         rwcode;
        !            42: };
        !            43: 
        !            44: struct templ specials[100] =
        !            45: {
        !            46:     "switch", 1,
        !            47:     "case", 2,
        !            48:     "break", 0,
        !            49:     "struct", 3,
        !            50:     "union", 3,
        !            51:     "enum", 3,
        !            52:     "default", 2,
        !            53:     "int", 4,
        !            54:     "char", 4,
        !            55:     "float", 4,
        !            56:     "double", 4,
        !            57:     "long", 4,
        !            58:     "short", 4,
        !            59:     "typdef", 4,
        !            60:     "unsigned", 4,
        !            61:     "register", 4,
        !            62:     "static", 4,
        !            63:     "global", 4,
        !            64:     "extern", 4,
        !            65:     "void", 4,
        !            66:     "goto", 0,
        !            67:     "return", 0,
        !            68:     "if", 5,
        !            69:     "while", 5,
        !            70:     "for", 5,
        !            71:     "else", 6,
        !            72:     "do", 6,
        !            73:     "sizeof", 7,
        !            74:     0, 0
        !            75: };
        !            76: 
        !            77: char        chartype[128] =
        !            78: {                              /* this is used to facilitate the decision of
        !            79:                                 * what type (alphanumeric, operator) each
        !            80:                                 * character is */
        !            81:     0, 0, 0, 0, 0, 0, 0, 0,
        !            82:     0, 0, 0, 0, 0, 0, 0, 0,
        !            83:     0, 0, 0, 0, 0, 0, 0, 0,
        !            84:     0, 0, 0, 0, 0, 0, 0, 0,
        !            85:     0, 3, 0, 0, 1, 3, 3, 0,
        !            86:     0, 0, 3, 3, 0, 3, 0, 3,
        !            87:     1, 1, 1, 1, 1, 1, 1, 1,
        !            88:     1, 1, 0, 0, 3, 3, 3, 3,
        !            89:     0, 1, 1, 1, 1, 1, 1, 1,
        !            90:     1, 1, 1, 1, 1, 1, 1, 1,
        !            91:     1, 1, 1, 1, 1, 1, 1, 1,
        !            92:     1, 1, 1, 0, 0, 0, 3, 1,
        !            93:     0, 1, 1, 1, 1, 1, 1, 1,
        !            94:     1, 1, 1, 1, 1, 1, 1, 1,
        !            95:     1, 1, 1, 1, 1, 1, 1, 1,
        !            96:     1, 1, 1, 0, 3, 0, 3, 0
        !            97: };
        !            98: 
        !            99: 
        !           100: 
        !           101: 
        !           102: int
        !           103: lexi()
        !           104: {
        !           105:     int         unary_delim;   /* this is set to 1 if the current token
        !           106:                                 * 
        !           107:                                 * forces a following operator to be unary */
        !           108:     static int  last_code;     /* the last token type returned */
        !           109:     static int  l_struct;      /* set to 1 if the last token was 'struct' */
        !           110:     int         code;          /* internal code to be returned */
        !           111:     char        qchar;         /* the delimiter character for a string */
        !           112: 
        !           113:     e_token = s_token;         /* point to start of place to save token */
        !           114:     unary_delim = false;
        !           115:     ps.col_1 = ps.last_nl;     /* tell world that this token started in
        !           116:                                 * column 1 iff the last thing scanned was nl */
        !           117:     ps.last_nl = false;
        !           118: 
        !           119:     while (*buf_ptr == ' ' || *buf_ptr == '\t') {      /* get rid of blanks */
        !           120:        ps.col_1 = false;       /* leading blanks imply token is not in column
        !           121:                                 * 1 */
        !           122:        if (++buf_ptr >= buf_end)
        !           123:            fill_buffer();
        !           124:     }
        !           125: 
        !           126:     /* Scan an alphanumeric token */
        !           127:     if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
        !           128:        /*
        !           129:         * we have a character or number
        !           130:         */
        !           131:        register char *j;       /* used for searching thru list of
        !           132:                                 * 
        !           133:                                 * reserved words */
        !           134:        register struct templ *p;
        !           135: 
        !           136:        if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
        !           137:            int         seendot = 0,
        !           138:                        seenexp = 0;
        !           139:            if (*buf_ptr == '0' &&
        !           140:                    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
        !           141:                *e_token++ = *buf_ptr++;
        !           142:                *e_token++ = *buf_ptr++;
        !           143:                while (isxdigit(*buf_ptr)) {
        !           144:                    CHECK_SIZE_TOKEN;
        !           145:                    *e_token++ = *buf_ptr++;
        !           146:                }
        !           147:            }
        !           148:            else
        !           149:                while (1) {
        !           150:                    if (*buf_ptr == '.')
        !           151:                        if (seendot)
        !           152:                            break;
        !           153:                        else
        !           154:                            seendot++;
        !           155:                    CHECK_SIZE_TOKEN;
        !           156:                    *e_token++ = *buf_ptr++;
        !           157:                    if (!isdigit(*buf_ptr) && *buf_ptr != '.')
        !           158:                        if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
        !           159:                            break;
        !           160:                        else {
        !           161:                            seenexp++;
        !           162:                            seendot++;
        !           163:                            CHECK_SIZE_TOKEN;
        !           164:                            *e_token++ = *buf_ptr++;
        !           165:                            if (*buf_ptr == '+' || *buf_ptr == '-')
        !           166:                                *e_token++ = *buf_ptr++;
        !           167:                        }
        !           168:                }
        !           169:            if (*buf_ptr == 'L' || *buf_ptr == 'l')
        !           170:                *e_token++ = *buf_ptr++;
        !           171:        }
        !           172:        else
        !           173:            while (chartype[*buf_ptr] == alphanum) {    /* copy it over */
        !           174:                CHECK_SIZE_TOKEN;
        !           175:                *e_token++ = *buf_ptr++;
        !           176:                if (buf_ptr >= buf_end)
        !           177:                    fill_buffer();
        !           178:            }
        !           179:        *e_token++ = '\0';
        !           180:        while (*buf_ptr == ' ' || *buf_ptr == '\t') {   /* get rid of blanks */
        !           181:            if (++buf_ptr >= buf_end)
        !           182:                fill_buffer();
        !           183:        }
        !           184:        ps.its_a_keyword = false;
        !           185:        ps.sizeof_keyword = false;
        !           186:        if (l_struct) {         /* if last token was 'struct', then this token
        !           187:                                 * should be treated as a declaration */
        !           188:            l_struct = false;
        !           189:            last_code = ident;
        !           190:            ps.last_u_d = true;
        !           191:            return (decl);
        !           192:        }
        !           193:        ps.last_u_d = false;    /* Operator after indentifier is binary */
        !           194:        last_code = ident;      /* Remember that this is the code we will
        !           195:                                 * return */
        !           196: 
        !           197:        /*
        !           198:         * This loop will check if the token is a keyword.
        !           199:         */
        !           200:        for (p = specials; (j = p->rwd) != 0; p++) {
        !           201:            register char *p = s_token; /* point at scanned token */
        !           202:            if (*j++ != *p++ || *j++ != *p++)
        !           203:                continue;       /* This test depends on the fact that
        !           204:                                 * identifiers are always at least 1 character
        !           205:                                 * long (ie. the first two bytes of the
        !           206:                                 * identifier are always meaningful) */
        !           207:            if (p[-1] == 0)
        !           208:                break;          /* If its a one-character identifier */
        !           209:            while (*p++ == *j)
        !           210:                if (*j++ == 0)
        !           211:                    goto found_keyword; /* I wish that C had a multi-level
        !           212:                                         * break... */
        !           213:        }
        !           214:        if (p->rwd) {           /* we have a keyword */
        !           215:     found_keyword:
        !           216:            ps.its_a_keyword = true;
        !           217:            ps.last_u_d = true;
        !           218:            switch (p->rwcode) {
        !           219:            case 1:             /* it is a switch */
        !           220:                return (swstmt);
        !           221:            case 2:             /* a case or default */
        !           222:                return (casestmt);
        !           223: 
        !           224:            case 3:             /* a "struct" */
        !           225:                if (ps.p_l_follow)
        !           226:                    break;      /* inside parens: cast */
        !           227:                l_struct = true;
        !           228: 
        !           229:                /*
        !           230:                 * Next time around, we will want to know that we have had a
        !           231:                 * 'struct'
        !           232:                 */
        !           233:            case 4:             /* one of the declaration keywords */
        !           234:                if (ps.p_l_follow) {
        !           235:                    ps.cast_mask |= 1 << ps.p_l_follow;
        !           236:                    break;      /* inside parens: cast */
        !           237:                }
        !           238:                last_code = decl;
        !           239:                return (decl);
        !           240: 
        !           241:            case 5:             /* if, while, for */
        !           242:                return (sp_paren);
        !           243: 
        !           244:            case 6:             /* do, else */
        !           245:                return (sp_nparen);
        !           246: 
        !           247:            case 7:
        !           248:                ps.sizeof_keyword = true;
        !           249:            default:            /* all others are treated like any other
        !           250:                                 * identifier */
        !           251:                return (ident);
        !           252:            }                   /* end of switch */
        !           253:        }                       /* end of if (found_it) */
        !           254:        if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
        !           255:            register char *tp = buf_ptr;
        !           256:            while (tp < buf_end)
        !           257:                if (*tp++ == ')' && (*tp == ';' || *tp == ','))
        !           258:                    goto not_proc;
        !           259:            strncpy(ps.procname, token, sizeof ps.procname - 1);
        !           260:            ps.in_parameter_declaration = 1;
        !           261:            rparen_count = 1;
        !           262:     not_proc:;
        !           263:        }
        !           264:        /*
        !           265:         * The following hack attempts to guess whether or not the current
        !           266:         * token is in fact a declaration keyword -- one that has been
        !           267:         * typedefd
        !           268:         */
        !           269:        if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
        !           270:                && !ps.p_l_follow
        !           271:                && !ps.block_init
        !           272:                && (ps.last_token == rparen || ps.last_token == semicolon ||
        !           273:                    ps.last_token == decl ||
        !           274:                    ps.last_token == lbrace || ps.last_token == rbrace)) {
        !           275:            ps.its_a_keyword = true;
        !           276:            ps.last_u_d = true;
        !           277:            last_code = decl;
        !           278:            return decl;
        !           279:        }
        !           280:        if (last_code == decl)  /* if this is a declared variable, then
        !           281:                                 * following sign is unary */
        !           282:            ps.last_u_d = true; /* will make "int a -1" work */
        !           283:        last_code = ident;
        !           284:        return (ident);         /* the ident is not in the list */
        !           285:     }                          /* end of procesing for alpanum character */
        !           286: 
        !           287:     /* Scan a non-alphanumeric token */
        !           288: 
        !           289:     *e_token++ = *buf_ptr;             /* if it is only a one-character token, it is
        !           290:                                 * moved here */
        !           291:     *e_token = '\0';
        !           292:     if (++buf_ptr >= buf_end)
        !           293:        fill_buffer();
        !           294: 
        !           295:     switch (*token) {
        !           296:     case '\n':
        !           297:        unary_delim = ps.last_u_d;
        !           298:        ps.last_nl = true;      /* remember that we just had a newline */
        !           299:        code = (had_eof ? 0 : newline);
        !           300: 
        !           301:        /*
        !           302:         * if data has been exausted, the newline is a dummy, and we should
        !           303:         * return code to stop
        !           304:         */
        !           305:        break;
        !           306: 
        !           307:     case '\'':                 /* start of quoted character */
        !           308:     case '"':                  /* start of string */
        !           309:        qchar = *token;
        !           310:        if (troff) {
        !           311:            e_token[-1] = '`';
        !           312:            if (qchar == '"')
        !           313:                *e_token++ = '`';
        !           314:            e_token = chfont(&bodyf, &stringf, e_token);
        !           315:        }
        !           316:        do {                    /* copy the string */
        !           317:            while (1) {         /* move one character or [/<char>]<char> */
        !           318:                if (*buf_ptr == '\n') {
        !           319:                    printf("%d: Unterminated literal\n", line_no);
        !           320:                    goto stop_lit;
        !           321:                }
        !           322:                CHECK_SIZE_TOKEN;       /* Only have to do this once in this loop,
        !           323:                                         * since CHECK_SIZE guarantees that there
        !           324:                                         * are at least 5 entries left */
        !           325:                *e_token = *buf_ptr++;
        !           326:                if (buf_ptr >= buf_end)
        !           327:                    fill_buffer();
        !           328:                if (*e_token == BACKSLASH) {    /* if escape, copy extra char */
        !           329:                    if (*buf_ptr == '\n')       /* check for escaped newline */
        !           330:                        ++line_no;
        !           331:                    if (troff) {
        !           332:                        *++e_token = BACKSLASH;
        !           333:                        if (*buf_ptr == BACKSLASH)
        !           334:                            *++e_token = BACKSLASH;
        !           335:                    }
        !           336:                    *++e_token = *buf_ptr++;
        !           337:                    ++e_token;  /* we must increment this again because we
        !           338:                                 * copied two chars */
        !           339:                    if (buf_ptr >= buf_end)
        !           340:                        fill_buffer();
        !           341:                }
        !           342:                else
        !           343:                    break;      /* we copied one character */
        !           344:            }                   /* end of while (1) */
        !           345:        } while (*e_token++ != qchar);
        !           346:        if (troff) {
        !           347:            e_token = chfont(&stringf, &bodyf, e_token - 1);
        !           348:            if (qchar == '"')
        !           349:                *e_token++ = '\'';
        !           350:        }
        !           351: stop_lit:
        !           352:        code = ident;
        !           353:        break;
        !           354: 
        !           355:     case ('('):
        !           356:     case ('['):
        !           357:        unary_delim = true;
        !           358:        code = lparen;
        !           359:        break;
        !           360: 
        !           361:     case (')'):
        !           362:     case (']'):
        !           363:        code = rparen;
        !           364:        break;
        !           365: 
        !           366:     case '#':
        !           367:        unary_delim = ps.last_u_d;
        !           368:        code = preesc;
        !           369:        break;
        !           370: 
        !           371:     case '?':
        !           372:        unary_delim = true;
        !           373:        code = question;
        !           374:        break;
        !           375: 
        !           376:     case (':'):
        !           377:        code = colon;
        !           378:        unary_delim = true;
        !           379:        break;
        !           380: 
        !           381:     case (';'):
        !           382:        unary_delim = true;
        !           383:        code = semicolon;
        !           384:        break;
        !           385: 
        !           386:     case ('{'):
        !           387:        unary_delim = true;
        !           388: 
        !           389:        /*
        !           390:         * if (ps.in_or_st) ps.block_init = 1;
        !           391:         */
        !           392:        /* ?    code = ps.block_init ? lparen : lbrace; */
        !           393:        code = lbrace;
        !           394:        break;
        !           395: 
        !           396:     case ('}'):
        !           397:        unary_delim = true;
        !           398:        /* ?    code = ps.block_init ? rparen : rbrace; */
        !           399:        code = rbrace;
        !           400:        break;
        !           401: 
        !           402:     case 014:                  /* a form feed */
        !           403:        unary_delim = ps.last_u_d;
        !           404:        ps.last_nl = true;      /* remember this so we can set 'ps.col_1'
        !           405:                                 * right */
        !           406:        code = form_feed;
        !           407:        break;
        !           408: 
        !           409:     case (','):
        !           410:        unary_delim = true;
        !           411:        code = comma;
        !           412:        break;
        !           413: 
        !           414:     case '.':
        !           415:        unary_delim = false;
        !           416:        code = period;
        !           417:        break;
        !           418: 
        !           419:     case '-':
        !           420:     case '+':                  /* check for -, +, --, ++ */
        !           421:        code = (ps.last_u_d ? unary_op : binary_op);
        !           422:        unary_delim = true;
        !           423: 
        !           424:        if (*buf_ptr == token[0]) {
        !           425:            /* check for doubled character */
        !           426:            *e_token++ = *buf_ptr++;
        !           427:            /* buffer overflow will be checked at end of loop */
        !           428:            if (last_code == ident || last_code == rparen) {
        !           429:                code = (ps.last_u_d ? unary_op : postop);
        !           430:                /* check for following ++ or -- */
        !           431:                unary_delim = false;
        !           432:            }
        !           433:        }
        !           434:        else if (*buf_ptr == '=')
        !           435:            /* check for operator += */
        !           436:            *e_token++ = *buf_ptr++;
        !           437:        else if (*buf_ptr == '>') {
        !           438:            /* check for operator -> */
        !           439:            *e_token++ = *buf_ptr++;
        !           440:            if (!pointer_as_binop) {
        !           441:                unary_delim = false;
        !           442:                code = unary_op;
        !           443:                ps.want_blank = false;
        !           444:            }
        !           445:        }
        !           446:        break;                  /* buffer overflow will be checked at end of
        !           447:                                 * switch */
        !           448: 
        !           449:     case '=':
        !           450:        if (ps.in_or_st)
        !           451:            ps.block_init = 1;
        !           452: #ifdef undef
        !           453:        if (chartype[*buf_ptr] == opchar) {     /* we have two char assignment */
        !           454:            e_token[-1] = *buf_ptr++;
        !           455:            if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
        !           456:                *e_token++ = *buf_ptr++;
        !           457:            *e_token++ = '=';   /* Flip =+ to += */
        !           458:            *e_token = 0;
        !           459:        }
        !           460: #else
        !           461:        if (*buf_ptr == '=') {/* == */
        !           462:            *e_token++ = '=';   /* Flip =+ to += */
        !           463:            buf_ptr++;
        !           464:            *e_token = 0;
        !           465:        }
        !           466: #endif
        !           467:        code = binary_op;
        !           468:        unary_delim = true;
        !           469:        break;
        !           470:        /* can drop thru!!! */
        !           471: 
        !           472:     case '>':
        !           473:     case '<':
        !           474:     case '!':                  /* ops like <, <<, <=, !=, etc */
        !           475:        if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
        !           476:            *e_token++ = *buf_ptr;
        !           477:            if (++buf_ptr >= buf_end)
        !           478:                fill_buffer();
        !           479:        }
        !           480:        if (*buf_ptr == '=')
        !           481:            *e_token++ = *buf_ptr++;
        !           482:        code = (ps.last_u_d ? unary_op : binary_op);
        !           483:        unary_delim = true;
        !           484:        break;
        !           485: 
        !           486:     default:
        !           487:        if (token[0] == '/' && *buf_ptr == '*') {
        !           488:            /* it is start of comment */
        !           489:            *e_token++ = '*';
        !           490: 
        !           491:            if (++buf_ptr >= buf_end)
        !           492:                fill_buffer();
        !           493: 
        !           494:            code = comment;
        !           495:            unary_delim = ps.last_u_d;
        !           496:            break;
        !           497:        }
        !           498:        while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
        !           499:            /*
        !           500:             * handle ||, &&, etc, and also things as in int *****i
        !           501:             */
        !           502:            *e_token++ = *buf_ptr;
        !           503:            if (++buf_ptr >= buf_end)
        !           504:                fill_buffer();
        !           505:        }
        !           506:        code = (ps.last_u_d ? unary_op : binary_op);
        !           507:        unary_delim = true;
        !           508: 
        !           509: 
        !           510:     }                          /* end of switch */
        !           511:     if (code != newline) {
        !           512:        l_struct = false;
        !           513:        last_code = code;
        !           514:     }
        !           515:     if (buf_ptr >= buf_end)    /* check for input buffer empty */
        !           516:        fill_buffer();
        !           517:     ps.last_u_d = unary_delim;
        !           518:     *e_token = '\0';           /* null terminate the token */
        !           519:     return (code);
        !           520: }
        !           521: 
        !           522: /*
        !           523:  * Add the given keyword to the keyword table, using val as the keyword type
        !           524:  */
        !           525: addkey(key, val)
        !           526:     char       *key;
        !           527: {
        !           528:     register struct templ *p = specials;
        !           529:     while (p->rwd)
        !           530:        if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
        !           531:            return;
        !           532:        else
        !           533:            p++;
        !           534:     if (p >= specials + sizeof specials / sizeof specials[0])
        !           535:        return;                 /* For now, table overflows are silently
        !           536:                                 * ignored */
        !           537:     p->rwd = key;
        !           538:     p->rwcode = val;
        !           539:     p[1].rwd = 0;
        !           540:     p[1].rwcode = 0;
        !           541:     return;
        !           542: }
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.