43BSD/ucb/indent/lexi.c - annotate

Return to lexi.c CVS log
Up to [CSRG BSD Unix] / 43BSD / ucb / indent
Annotation of 43BSD/ucb/indent/lexi.c, revision 1.1

1.1     ! root        1: /*
        !             2:  * Copyright (c) 1980 Regents of the University of California.
        !             3:  * All rights reserved.  The Berkeley software License Agreement
        !             4:  * specifies the terms and conditions for redistribution.
        !             5:  */
        !             6: 
        !             7: #ifndef lint
        !             8: static char sccsid[] = "@(#)lexi.c     5.4 (Berkeley) 9/10/85";
        !             9: #endif not lint
        !            10: 
        !            11: /*-
        !            12:  *
        !            13:  *                       Copyright (C) 1976
        !            14:  *                             by the
        !            15:  *                       Board of Trustees
        !            16:  *                             of the
        !            17:  *                     University of Illinois
        !            18:  *
        !            19:  *                      All rights reserved
        !            20:  *
        !            21:  *
        !            22:  * NAME:
        !            23:  *     lexi
        !            24:  *
        !            25:  * FUNCTION:
        !            26:  *     This is the token scanner for indent
        !            27:  *
        !            28:  * ALGORITHM:
        !            29:  *     1) Strip off intervening blanks and/or tabs.
        !            30:  *     2) If it is an alphanumeric token, move it to the token buffer "token".
        !            31:  *        Check if it is a special reserved word that indent will want to
        !            32:  *        know about.
        !            33:  *     3) Non-alphanumeric tokens are handled with a big switch statement.  A
        !            34:  *        flag is kept to remember if the last token was a "unary delimiter",
        !            35:  *        which forces a following operator to be unary as opposed to binary.
        !            36:  *
        !            37:  * PARAMETERS:
        !            38:  *     None
        !            39:  *
        !            40:  * RETURNS:
        !            41:  *     An integer code indicating the type of token scanned.
        !            42:  *
        !            43:  * GLOBALS:
        !            44:  *     buf_ptr =
        !            45:  *     had_eof
        !            46:  *     ps.last_u_d =   Set to true iff this token is a "unary delimiter"
        !            47:  *
        !            48:  * CALLS:
        !            49:  *     fill_buffer
        !            50:  *     printf (lib)
        !            51:  *
        !            52:  * CALLED BY:
        !            53:  *     main
        !            54:  *
        !            55:  * NOTES:
        !            56:  *     Start of comment is passed back so that the comment can be scanned by
        !            57:  *     pr_comment.
        !            58:  *
        !            59:  *     Strings and character literals are returned just like identifiers.
        !            60:  *
        !            61:  * HISTORY:
        !            62:  *     initial coding  November 1976   D A Willcox of CAC
        !            63:  *     1/7/77          D A Willcox of CAC      Fix to provide proper handling
        !            64:  *                                             of "int a -1;"
        !            65:  *
        !            66:  */
        !            67: 
        !            68: /*
        !            69:  * Here we have the token scanner for indent.  It scans off one token and
        !            70:  * puts it in the global variable "token".  It returns a code, indicating
        !            71:  * the type of token scanned. 
        !            72:  */
        !            73: 
        !            74: #include "indent_globs.h";
        !            75: #include "indent_codes.h";
        !            76: #include "ctype.h"
        !            77: 
        !            78: #define alphanum 1
        !            79: #define opchar 3
        !            80: 
        !            81: struct templ {
        !            82:     char       *rwd;
        !            83:     int         rwcode;
        !            84: };
        !            85: 
        !            86: struct templ specials[100] =
        !            87: {
        !            88:     "switch", 1,
        !            89:     "case", 2,
        !            90:     "break", 0,
        !            91:     "struct", 3,
        !            92:     "union", 3,
        !            93:     "enum", 3,
        !            94:     "default", 2,
        !            95:     "int", 4,
        !            96:     "char", 4,
        !            97:     "float", 4,
        !            98:     "double", 4,
        !            99:     "long", 4,
        !           100:     "short", 4,
        !           101:     "typdef", 4,
        !           102:     "unsigned", 4,
        !           103:     "register", 4,
        !           104:     "static", 4,
        !           105:     "global", 4,
        !           106:     "extern", 4,
        !           107:     "void", 4,
        !           108:     "goto", 0,
        !           109:     "return", 0,
        !           110:     "if", 5,
        !           111:     "while", 5,
        !           112:     "for", 5,
        !           113:     "else", 6,
        !           114:     "do", 6,
        !           115:     "sizeof", 7,
        !           116:     0, 0
        !           117: };
        !           118: 
        !           119: char        chartype[128] =
        !           120: {                              /* this is used to facilitate the decision
        !           121:                                 * of what type (alphanumeric, operator)
        !           122:                                 * each character is */
        !           123:     0, 0, 0, 0, 0, 0, 0, 0,
        !           124:     0, 0, 0, 0, 0, 0, 0, 0,
        !           125:     0, 0, 0, 0, 0, 0, 0, 0,
        !           126:     0, 0, 0, 0, 0, 0, 0, 0,
        !           127:     0, 3, 0, 0, 0, 3, 3, 0,
        !           128:     0, 0, 3, 3, 0, 3, 3, 3,
        !           129:     1, 1, 1, 1, 1, 1, 1, 1,
        !           130:     1, 1, 0, 0, 3, 3, 3, 3,
        !           131:     0, 1, 1, 1, 1, 1, 1, 1,
        !           132:     1, 1, 1, 1, 1, 1, 1, 1,
        !           133:     1, 1, 1, 1, 1, 1, 1, 1,
        !           134:     1, 1, 1, 0, 0, 0, 3, 1,
        !           135:     0, 1, 1, 1, 1, 1, 1, 1,
        !           136:     1, 1, 1, 1, 1, 1, 1, 1,
        !           137:     1, 1, 1, 1, 1, 1, 1, 1,
        !           138:     1, 1, 1, 0, 3, 0, 3, 0
        !           139: };
        !           140: 
        !           141: 
        !           142: 
        !           143: 
        !           144: int 
        !           145: lexi()
        !           146: {
        !           147:     register char *tok;                /* local pointer to next char in token */
        !           148:     int         unary_delim;   /* this is set to 1 if the current token 
        !           149:                                 *
        !           150:                                 * forces a following operator to be unary */
        !           151:     static int  last_code;     /* the last token type returned */
        !           152:     static int  l_struct;      /* set to 1 if the last token was 'struct' */
        !           153:     int         code;          /* internal code to be returned */
        !           154:     char        qchar;         /* the delimiter character for a string */
        !           155: 
        !           156:     tok = token;               /* point to start of place to save token */
        !           157:     unary_delim = false;
        !           158:     ps.col_1 = ps.last_nl;     /* tell world that this token started in
        !           159:                                 * column 1 iff the last thing scanned was
        !           160:                                 * nl */
        !           161:     ps.last_nl = false;
        !           162: 
        !           163:     while (*buf_ptr == ' ' || *buf_ptr == '\t') {      /* get rid of blanks */
        !           164:        ps.col_1 = false;       /* leading blanks imply token is not in
        !           165:                                 * column 1 */
        !           166:        if (++buf_ptr >= buf_end)
        !           167:            fill_buffer();
        !           168:     }
        !           169: 
        !           170:     /* Scan an alphanumeric token.  Note that we must also handle
        !           171:      * stuff like "1.0e+03" and "7e-6". */
        !           172:     if (chartype[*buf_ptr & 0177] == alphanum) {       /* we have a character
        !           173:                                                         * or number */
        !           174:        register char *j;       /* used for searching thru list of 
        !           175:                                 * reserved words */
        !           176:        register struct templ *p;
        !           177:        register int c;
        !           178: 
        !           179:        do {                    /* copy it over */
        !           180:            *tok++ = *buf_ptr++;
        !           181:            if (buf_ptr >= buf_end)
        !           182:                fill_buffer();
        !           183:        } while (chartype[c = *buf_ptr & 0177] == alphanum ||
        !           184:                isdigit(token[0]) && (c == '+' || c == '-') &&
        !           185:                (tok[-1] == 'e' || tok[-1] == 'E'));
        !           186:        *tok++ = '\0';
        !           187:        while (*buf_ptr == ' ' || *buf_ptr == '\t') {   /* get rid of blanks */
        !           188:            if (++buf_ptr >= buf_end)
        !           189:                fill_buffer();
        !           190:        }
        !           191:        ps.its_a_keyword = false;
        !           192:        ps.sizeof_keyword = false;
        !           193:        if (l_struct) {         /* if last token was 'struct', then this
        !           194:                                 * token should be treated as a
        !           195:                                 * declaration */
        !           196:            l_struct = false;
        !           197:            last_code = ident;
        !           198:            ps.last_u_d = true;
        !           199:            return (decl);
        !           200:        }
        !           201:        ps.last_u_d = false;    /* Operator after indentifier is binary */
        !           202:        last_code = ident;      /* Remember that this is the code we will
        !           203:                                 * return */
        !           204: 
        !           205:        /*
        !           206:         * This loop will check if the token is a keyword. 
        !           207:         */
        !           208:        for (p = specials; (j = p->rwd) != 0; p++) {
        !           209:            tok = token;        /* point at scanned token */
        !           210:            if (*j++ != *tok++ || *j++ != *tok++)
        !           211:                continue;       /* This test depends on the fact that
        !           212:                                 * identifiers are always at least 1
        !           213:                                 * character long (ie. the first two bytes
        !           214:                                 * of the identifier are always
        !           215:                                 * meaningful) */
        !           216:            if (tok[-1] == 0)
        !           217:                break;          /* If its a one-character identifier */
        !           218:            while (*tok++ == *j)
        !           219:                if (*j++ == 0)
        !           220:                    goto found_keyword; /* I wish that C had a multi-level
        !           221:                                         * break... */
        !           222:        }
        !           223:        if (p->rwd) {           /* we have a keyword */
        !           224:     found_keyword:
        !           225:            ps.its_a_keyword = true;
        !           226:            ps.last_u_d = true;
        !           227:            switch (p->rwcode) {
        !           228:                case 1: /* it is a switch */
        !           229:                    return (swstmt);
        !           230:                case 2: /* a case or default */
        !           231:                    return (casestmt);
        !           232: 
        !           233:                case 3: /* a "struct" */
        !           234:                    if (ps.p_l_follow)
        !           235:                        break;  /* inside parens: cast */
        !           236:                    l_struct = true;
        !           237: 
        !           238:                    /*
        !           239:                     * Next time around, we will want to know that we have
        !           240:                     * had a 'struct' 
        !           241:                     */
        !           242:                case 4: /* one of the declaration keywords */
        !           243:                    if (ps.p_l_follow) {
        !           244:                        ps.cast_mask |= 1 << ps.p_l_follow;
        !           245:                        break;  /* inside parens: cast */
        !           246:                    }
        !           247:                    last_code = decl;
        !           248:                    return (decl);
        !           249: 
        !           250:                case 5: /* if, while, for */
        !           251:                    return (sp_paren);
        !           252: 
        !           253:                case 6: /* do, else */
        !           254:                    return (sp_nparen);
        !           255: 
        !           256:                case 7:
        !           257:                    ps.sizeof_keyword = true;
        !           258:                default:        /* all others are treated like any other
        !           259:                                 * identifier */
        !           260:                    return (ident);
        !           261:            }                   /* end of switch */
        !           262:        }                       /* end of if (found_it) */
        !           263:        if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0
        !           264:            && (buf_ptr[1] != ')' || buf_ptr[2] != ';')) {
        !           265:            strncpy(ps.procname, token, sizeof ps.procname - 1);
        !           266:            ps.in_parameter_declaration = 1;
        !           267:        }
        !           268: 
        !           269:        /*
        !           270:         * The following hack attempts to guess whether or not the current
        !           271:         * token is in fact a declaration keyword -- one that has been
        !           272:         * typedefd 
        !           273:         */
        !           274:        if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr))
        !           275:            && !ps.p_l_follow
        !           276:            && (ps.last_token == rparen || ps.last_token == semicolon ||
        !           277:                ps.last_token == decl ||
        !           278:                ps.last_token == lbrace || ps.last_token == rbrace)) {
        !           279:            ps.its_a_keyword = true;
        !           280:            ps.last_u_d = true;
        !           281:            last_code = decl;
        !           282:            return decl;
        !           283:        }
        !           284:        if (last_code == decl)  /* if this is a declared variable, then
        !           285:                                 * following sign is unary */
        !           286:            ps.last_u_d = true; /* will make "int a -1" work */
        !           287:        last_code = ident;
        !           288:        return (ident);         /* the ident is not in the list */
        !           289:     }                          /* end of procesing for alpanum character */
        !           290:     /* Scan a non-alphanumeric token */
        !           291: 
        !           292:     *tok++ = *buf_ptr;         /* if it is only a one-character token, it
        !           293:                                 * is moved here */
        !           294:     *tok = '\0';
        !           295:     if (++buf_ptr >= buf_end)
        !           296:        fill_buffer();
        !           297: 
        !           298:     switch (*token) {
        !           299:        case '\n':
        !           300:            unary_delim = ps.last_u_d;
        !           301:            ps.last_nl = true;  /* remember that we just had a newline */
        !           302:            code = (had_eof ? 0 : newline);
        !           303: 
        !           304:            /*
        !           305:             * if data has been exausted, the newline is a dummy, and we
        !           306:             * should return code to stop 
        !           307:             */
        !           308:            break;
        !           309: 
        !           310:        case '\'':              /* start of quoted character */
        !           311:        case '"':               /* start of string */
        !           312:            qchar = *token;
        !           313:            if (troff) {
        !           314:                tok[-1] = '`';
        !           315:                if (qchar == '"')
        !           316:                    *tok++ = '`';
        !           317:                *tok++ = BACKSLASH;
        !           318:                *tok++ = 'f';
        !           319:                *tok++ = 'L';
        !           320:            }
        !           321:            do {                /* copy the string */
        !           322:                while (1) {     /* move one character or [/<char>]<char> */
        !           323:                    if (*buf_ptr == '\n') {
        !           324:                        printf("%d: Unterminated literal\n", line_no);
        !           325:                        goto stop_lit;
        !           326:                    }
        !           327:                    *tok = *buf_ptr++;
        !           328:                    if (buf_ptr >= buf_end)
        !           329:                        fill_buffer();
        !           330:                    if (had_eof || ((tok - token) > (bufsize - 2))) {
        !           331:                        printf("Unterminated literal\n");
        !           332:                        ++tok;
        !           333:                        goto stop_lit;
        !           334:                        /* get outof literal copying loop */
        !           335:                    }
        !           336:                    if (*tok == BACKSLASH) {    /* if escape, copy extra
        !           337:                                                 * char */
        !           338:                        if (*buf_ptr == '\n')   /* check for escaped
        !           339:                                                 * newline */
        !           340:                            ++line_no;
        !           341:                        if (troff) {
        !           342:                            *++tok = BACKSLASH;
        !           343:                            if (*buf_ptr == BACKSLASH)
        !           344:                                *++tok = BACKSLASH;
        !           345:                        }
        !           346:                        *++tok = *buf_ptr++;
        !           347:                        ++tok;  /* we must increment this again because we
        !           348:                                 * copied two chars */
        !           349:                        if (buf_ptr >= buf_end)
        !           350:                            fill_buffer();
        !           351:                    }
        !           352:                    else
        !           353:                        break;  /* we copied one character */
        !           354:                }               /* end of while (1) */
        !           355:            } while (*tok++ != qchar);
        !           356:            if (troff) {
        !           357:                tok[-1] = BACKSLASH;
        !           358:                *tok++ = 'f';
        !           359:                *tok++ = 'R';
        !           360:                *tok++ = '\'';
        !           361:                if (qchar == '"')
        !           362:                    *tok++ = '\'';
        !           363:            }
        !           364:     stop_lit:
        !           365:            code = ident;
        !           366:            break;
        !           367: 
        !           368:        case ('('):
        !           369:        case ('['):
        !           370:            unary_delim = true;
        !           371:            code = lparen;
        !           372:            break;
        !           373: 
        !           374:        case (')'):
        !           375:        case (']'):
        !           376:            code = rparen;
        !           377:            break;
        !           378: 
        !           379:        case '#':
        !           380:            unary_delim = ps.last_u_d;
        !           381:            code = preesc;
        !           382:            break;
        !           383: 
        !           384:        case '?':
        !           385:            unary_delim = true;
        !           386:            code = question;
        !           387:            break;
        !           388: 
        !           389:        case (':'):
        !           390:            code = colon;
        !           391:            unary_delim = true;
        !           392:            break;
        !           393: 
        !           394:        case (';'):
        !           395:            unary_delim = true;
        !           396:            code = semicolon;
        !           397:            break;
        !           398: 
        !           399:        case ('{'):
        !           400:            unary_delim = true;
        !           401: 
        !           402:            /*
        !           403:             * if (ps.in_or_st) ps.block_init = 1; 
        !           404:             */
        !           405:            code = ps.block_init ? lparen : lbrace;
        !           406:            break;
        !           407: 
        !           408:        case ('}'):
        !           409:            unary_delim = true;
        !           410:            code = ps.block_init ? rparen : rbrace;
        !           411:            break;
        !           412: 
        !           413:        case 014:               /* a form feed */
        !           414:            unary_delim = ps.last_u_d;
        !           415:            ps.last_nl = true;  /* remember this so we can set 'ps.col_1'
        !           416:                                 * right */
        !           417:            code = form_feed;
        !           418:            break;
        !           419: 
        !           420:        case (','):
        !           421:            unary_delim = true;
        !           422:            code = comma;
        !           423:            break;
        !           424: 
        !           425:        case '.':
        !           426:            unary_delim = false;
        !           427:            code = period;
        !           428:            break;
        !           429: 
        !           430:        case '-':
        !           431:        case '+':               /* check for -, +, --, ++ */
        !           432:            code = (ps.last_u_d ? unary_op : binary_op);
        !           433:            unary_delim = true;
        !           434: 
        !           435:            if (*buf_ptr == token[0]) {
        !           436:                /* check for doubled character */
        !           437:                *tok++ = *buf_ptr++;
        !           438:                /* buffer overflow will be checked at end of loop */
        !           439:                if (last_code == ident || last_code == rparen) {
        !           440:                    code = (ps.last_u_d ? unary_op : postop);
        !           441:                    /* check for following ++ or -- */
        !           442:                    unary_delim = false;
        !           443:                }
        !           444:            }
        !           445:            else if (*buf_ptr == '=')
        !           446:                /* check for operator += */
        !           447:                *tok++ = *buf_ptr++;
        !           448:            else if (token[0] == '-' && *buf_ptr == '>') {
        !           449:                /* check for operator -> */
        !           450:                *tok++ = *buf_ptr++;
        !           451:                if (!pointer_as_binop) {
        !           452:                    code = unary_op;
        !           453:                    unary_delim = false;
        !           454:                    ps.want_blank = false;
        !           455:                }
        !           456:            }
        !           457:            /* buffer overflow will be checked at end of switch */
        !           458: 
        !           459:            break;
        !           460: 
        !           461:        case '=':
        !           462:            if (ps.in_or_st)
        !           463:                ps.block_init = 1;
        !           464:            if (chartype[*buf_ptr] == opchar) { /* we have two char
        !           465:                                                 * assignment */
        !           466:                tok[-1] = *buf_ptr++;
        !           467:                if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr)
        !           468:                    *tok++ = *buf_ptr++;
        !           469:                *tok++ = '=';   /* Flip =+ to += */
        !           470:                *tok = 0;
        !           471:            }
        !           472:            code = binary_op;
        !           473:            unary_delim = true;
        !           474:            break;
        !           475:            /* can drop thru!!! */
        !           476: 
        !           477:        case '>':
        !           478:        case '<':
        !           479:        case '!':               /* ops like <, <<, <=, !=, etc */
        !           480:            if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
        !           481:                *tok++ = *buf_ptr;
        !           482:                if (++buf_ptr >= buf_end)
        !           483:                    fill_buffer();
        !           484:            }
        !           485:            if (*buf_ptr == '=')
        !           486:                *tok++ = *buf_ptr++;
        !           487:            code = (ps.last_u_d ? unary_op : binary_op);
        !           488:            unary_delim = true;
        !           489:            break;
        !           490: 
        !           491:        default:
        !           492:            if (token[0] == '/' && *buf_ptr == '*') {
        !           493:                /* it is start of comment */
        !           494:                *tok++ = '*';
        !           495: 
        !           496:                if (++buf_ptr >= buf_end)
        !           497:                    fill_buffer();
        !           498: 
        !           499:                code = comment;
        !           500:                unary_delim = ps.last_u_d;
        !           501:                break;
        !           502:            }
        !           503:            while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') {
        !           504:                /* handle ||, &&, etc, and also things as in int *****i */
        !           505:                *tok++ = *buf_ptr;
        !           506:                if (++buf_ptr >= buf_end)
        !           507:                    fill_buffer();
        !           508:            }
        !           509:            code = (ps.last_u_d ? unary_op : binary_op);
        !           510:            unary_delim = true;
        !           511: 
        !           512: 
        !           513:     }                          /* end of switch */
        !           514:     if (code != newline) {
        !           515:        l_struct = false;
        !           516:        last_code = code;
        !           517:     }
        !           518:     if (buf_ptr >= buf_end)    /* check for input buffer empty */
        !           519:        fill_buffer();
        !           520:     ps.last_u_d = unary_delim;
        !           521:     *tok = '\0';               /* null terminate the token */
        !           522:     return (code);
        !           523: };
        !           524: 
        !           525: /* Add the given keyword to the keyword table, using val as the keyword type
        !           526:    */
        !           527: addkey (key, val)
        !           528: char       *key;
        !           529: {
        !           530:     register struct templ *p = specials;
        !           531:     while (p->rwd)
        !           532:        if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
        !           533:            return;
        !           534:        else
        !           535:            p++;
        !           536:     if (p >= specials + sizeof specials / sizeof specials[0])
        !           537:        return;                 /* For now, table overflows are silently
        !           538:                                   ignored */
        !           539:     p->rwd = key;
        !           540:     p->rwcode = val;
        !           541:     p[1].rwd = 0;
        !           542:     p[1].rwcode = 0;
        !           543:     return;
        !           544: }
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.