42BSD/ucb/indent/lexi.c - annotate

Return to lexi.c CVS log
Up to [CSRG BSD Unix] / 42BSD / ucb / indent
Annotation of 42BSD/ucb/indent/lexi.c, revision 1.1

1.1     ! root        1: static char sccsid[] = "@(#)lexi.c     4.1     (Berkeley)      10/21/82";
        !             2: 
        !             3: /*
        !             4: 
        !             5:                          Copyright (C) 1976
        !             6:                                by the
        !             7:                          Board of Trustees
        !             8:                                of the
        !             9:                        University of Illinois
        !            10: 
        !            11:                         All rights reserved
        !            12: 
        !            13: 
        !            14: NAME:
        !            15:        lexi
        !            16: 
        !            17: FUNCTION:
        !            18:        This is the token scanner for indent
        !            19: 
        !            20: ALGORITHM:
        !            21:        1) Strip off intervening blanks and/or tabs.
        !            22:        2) If it is an alphanumeric token, move it to the token buffer "token".
        !            23:           Check if it is a special reserved word that indent will want to
        !            24:           know about.
        !            25:        3) Non-alphanumeric tokens are handled with a big switch statement.  A
        !            26:           flag is kept to remember if the last token was a "unary delimiter",
        !            27:           which forces a following operator to be unary as opposed to binary.
        !            28: 
        !            29: PARAMETERS:
        !            30:        None
        !            31: 
        !            32: RETURNS:
        !            33:        An integer code indicating the type of token scanned.
        !            34: 
        !            35: GLOBALS:
        !            36:        buf_ptr =
        !            37:        had_eof
        !            38:        last_u_d =      Set to true iff this token is a "unary delimiter"
        !            39: 
        !            40: CALLS:
        !            41:        fill_buffer
        !            42:        printf (lib)
        !            43: 
        !            44: CALLED BY:
        !            45:        main
        !            46: 
        !            47: NOTES:
        !            48:        Start of comment is passed back so that the comment can be scanned by
        !            49:        pr_comment.
        !            50: 
        !            51:        Strings and character literals are returned just like identifiers.
        !            52: 
        !            53: HISTORY:
        !            54:        initial coding  November 1976   D A Willcox of CAC
        !            55:        1/7/77          D A Willcox of CAC      Fix to provide proper handling
        !            56:                                                of "int a -1;"
        !            57: 
        !            58: */
        !            59: 
        !            60: /* Here we have the token scanner for indent.  It scans off one token and
        !            61:    puts it in the global variable "token".  It returns a code, indicating the
        !            62:    type of token scanned. */
        !            63: 
        !            64: #include "indent_globs.h";
        !            65: #include "indent_codes.h";
        !            66: 
        !            67: 
        !            68: 
        !            69: #define alphanum 1
        !            70: #define opchar 3
        !            71: 
        !            72: struct templ {
        !            73:     char   *rwd;
        !            74:     int     rwcode;
        !            75: };
        !            76: 
        !            77: struct templ    specials[] =
        !            78: {
        !            79:     "switch", 1,
        !            80:     "case", 2,
        !            81:     "struct", 3,
        !            82:     "default", 2,
        !            83:     "int", 4,
        !            84:     "char", 4,
        !            85:     "float", 4,
        !            86:     "double", 4,
        !            87:     "long", 4,
        !            88:     "short", 4,
        !            89:     "typdef", 4,
        !            90:     "unsigned", 4,
        !            91:     "register", 4,
        !            92:     "static", 4,
        !            93:     "global", 4,
        !            94:     "extern", 4,
        !            95:     "if", 5,
        !            96:     "while", 5,
        !            97:     "for", 5,
        !            98:     "else", 6,
        !            99:     "do", 6,
        !           100:     "sizeof", 0,
        !           101:     0, 0
        !           102: };
        !           103: 
        !           104: char    chartype[128] =
        !           105: {                 /* this is used to facilitate the decision of what type
        !           106:                      (alphanumeric, operator) each character is */
        !           107:     0, 0, 0, 0, 0, 0, 0, 0,
        !           108:     0, 0, 0, 0, 0, 0, 0, 0,
        !           109:     0, 0, 0, 0, 0, 0, 0, 0,
        !           110:     0, 0, 0, 0, 0, 0, 0, 0,
        !           111:     0, 3, 0, 0, 0, 3, 3, 0,
        !           112:     0, 0, 3, 3, 0, 3, 3, 3,
        !           113:     1, 1, 1, 1, 1, 1, 1, 1,
        !           114:     1, 1, 0, 0, 3, 3, 3, 3,
        !           115:     0, 1, 1, 1, 1, 1, 1, 1,
        !           116:     1, 1, 1, 1, 1, 1, 1, 1,
        !           117:     1, 1, 1, 1, 1, 1, 1, 1,
        !           118:     1, 1, 1, 0, 0, 0, 3, 1,
        !           119:     0, 1, 1, 1, 1, 1, 1, 1,
        !           120:     1, 1, 1, 1, 1, 1, 1, 1,
        !           121:     1, 1, 1, 1, 1, 1, 1, 1,
        !           122:     1, 1, 1, 0, 3, 0, 3, 0
        !           123: };
        !           124: 
        !           125: int     last_nl = true;
        !           126:  /* this is true if the last thing scanned was a newline */
        !           127: 
        !           128: 
        !           129: 
        !           130: int     lexi () {
        !           131:     register char  *tok;
        !           132:  /* local pointer to next char in token */
        !           133:     register int    i;
        !           134:  /* local loop counter */
        !           135:     register char  *j;
        !           136:  /* used for searching thru list of reserved words */
        !           137:     int     unary_delim;
        !           138:  /* this is set to 1 if the current token forces a following operator to be
        !           139:     unary */
        !           140:     static int  last_code;
        !           141:  /* the last token type returned */
        !           142:     static int  l_struct;
        !           143:  /* set to 1 if the last token was 'struct' */
        !           144:     int     found_it;
        !           145:     int     code;  /* internal code to be returned */
        !           146:     char    qchar; /* the delimiter character for a string */
        !           147: 
        !           148:     tok = token;              /* point to start of place to save token */
        !           149:     unary_delim = false;
        !           150:     col_1 = last_nl;          /* tell world that this token started in column
        !           151:                                  1 iff the last thing scanned was nl */
        !           152:     last_nl = false;
        !           153: 
        !           154:     while (*buf_ptr == ' ' || *buf_ptr == '\t') {
        !           155:     /* get rid of blanks */
        !           156:        col_1 = false;         /* leading blanks imply token is not in column 1
        !           157:                                  */
        !           158:        if (++buf_ptr >= buf_end)
        !           159:            fill_buffer ();
        !           160:     }
        !           161: 
        !           162: /*----------------------------------------------------------*\ 
        !           163: |    Scan an alphanumeric token
        !           164: \*----------------------------------------------------------*/
        !           165: 
        !           166:     if (chartype[*buf_ptr & 0177] == alphanum) {
        !           167:     /* we have a character or number */
        !           168:        while (chartype[*buf_ptr & 0177] == alphanum) {
        !           169:        /* copy it over */
        !           170:            *tok++ = *buf_ptr++;
        !           171:            if (buf_ptr >= buf_end)
        !           172:                fill_buffer ();
        !           173:        }
        !           174: 
        !           175:        *tok++ = '\0';
        !           176: 
        !           177:        if (l_struct) {        /* if last token was 'struct', then this token
        !           178:                                  should be treated as a declaration */
        !           179:            l_struct = false;
        !           180:            last_code = ident;
        !           181:            last_u_d = true;
        !           182:            return (decl);
        !           183:        }
        !           184: 
        !           185:        last_u_d = false;      /* operator after indentifier is binary */
        !           186: 
        !           187:        for (i = 0; specials[i].rwd != 0; ++i) {
        !           188:        /* this loop will check if the token is a keyword.  if so, a following
        !           189:           operator is unary */
        !           190:            last_code = ident; /* remember that this is the code we will return
        !           191:                                  */
        !           192:            j = specials[i].rwd;
        !           193:        /* point at ith reserved word */
        !           194:            tok = token;       /* point at scanned toekn */
        !           195:            found_it = true;   /* set to false if not found */
        !           196:            do {
        !           197:                if (*tok++ != *j) {
        !           198:                    found_it = false;
        !           199:                    break;
        !           200:                }
        !           201:            } while (*j++);
        !           202: 
        !           203:            if (found_it) {    /* we have a keyword */
        !           204:                last_u_d = true;
        !           205:                switch (specials[i].rwcode) {
        !           206:                    case 1:    /* it is a switch */
        !           207:                        return (swstmt);
        !           208:                    case 2:    /* a case or default */
        !           209:                        return (casestmt);
        !           210: 
        !           211:                    case 3:    /* a "struct" */
        !           212:                        l_struct = true;
        !           213:                    /* Next time around, we will want to know that we have had
        !           214:                       a 'struct' */
        !           215:                    case 4:    /* one of the declaration keywords */
        !           216:                        if(p_l_follow) break;   /* inside parens: cast */
        !           217:                        last_code = decl;
        !           218:                        return (decl);
        !           219: 
        !           220:                    case 5:    /* if, while, for */
        !           221:                        return (sp_paren);
        !           222: 
        !           223:                    case 6:    /* do, else */
        !           224:                        return (sp_nparen);
        !           225: 
        !           226:                    default:   /* all others are treated like any other
        !           227:                                  identifier */
        !           228:                        return (ident);
        !           229:                }              /* end of switch */
        !           230:            }                  /* end of if (found_it) */
        !           231: 
        !           232:        }
        !           233: 
        !           234:        if (last_code == decl) /* if this is a declared variable, then
        !           235:                                  following sign is unary */
        !           236:            last_u_d = true;   /* will make "int a -1" work */
        !           237:        last_code = ident;
        !           238:        return (ident);        /* the ident is not in the list */
        !           239:     }                         /* end of procesing for alpanum character */
        !           240: 
        !           241: 
        !           242: 
        !           243: /*----------------------------------------------------------*\ 
        !           244: |   Scan a non-alphanumeric token
        !           245: \*----------------------------------------------------------*/
        !           246: 
        !           247:     *tok++ = *buf_ptr;        /* if it is only a one-character token, it is
        !           248:                                  moved here */
        !           249:     *tok = '\0';
        !           250:     if (++buf_ptr >= buf_end)
        !           251:        fill_buffer ();
        !           252: 
        !           253:     switch (*token) {
        !           254:        case '\n': 
        !           255:            unary_delim = last_u_d;
        !           256:            last_nl = true;    /* remember that we just had a newline */
        !           257:            code = (had_eof ? 0 : newline);
        !           258:        /* if data has been exausted, the newline is a dummy, and we should
        !           259:           return code to stop */
        !           260:            break;
        !           261: 
        !           262:        case '\'':             /* start of quoted character */
        !           263:            qchar = '\'';      /* remember final delimiter */
        !           264:            goto copy_lit;     /* and go to common literal code */
        !           265: 
        !           266:        case '"':              /* start of string */
        !           267:            qchar = '"';
        !           268: 
        !           269:     copy_lit: 
        !           270:            do {               /* copy the string */
        !           271:                while (1) {    /* move one character or [/<char>]<char> */
        !           272:                    if (*buf_ptr == '\n') {
        !           273:                    /* check for unterminated literal */
        !           274:                        printf ("%d: Unterminated literal\n", line_no);
        !           275:                        goto stop_lit;
        !           276:                    /* Don't copy any more */
        !           277:                    }
        !           278: 
        !           279:                    *tok = *buf_ptr++;
        !           280:                    if (buf_ptr >= buf_end)
        !           281:                        fill_buffer ();
        !           282:                    if (had_eof || ((tok - token) > (bufsize - 2))) {
        !           283:                        printf ("Unterminated literal\n");
        !           284:                        ++tok;
        !           285:                        goto stop_lit;
        !           286:                    /* get outof literal copying loop */
        !           287:                    }
        !           288: 
        !           289:                    if (*tok == '\\') {
        !           290:                    /* if escape, copy extra char */
        !           291:                        if (*buf_ptr == '\n')
        !           292:                               /* check for escaped newline */
        !           293:                            ++line_no;
        !           294:                        *(++tok) = *buf_ptr++;
        !           295:                        ++tok; /* we must increment this again because we
        !           296:                                  copied two chars */
        !           297:                        if (buf_ptr >= buf_end)
        !           298:                            fill_buffer ();
        !           299:                    }
        !           300:                    else
        !           301:                        break; /* we copied one character */
        !           302:                }              /* end of while (1) */
        !           303:            } while (*tok++ != qchar);
        !           304: 
        !           305:     stop_lit: 
        !           306:            code = ident;
        !           307:            break;
        !           308: 
        !           309:        case ('('): 
        !           310:        case ('['): 
        !           311:            unary_delim = true;
        !           312:            code = lparen;
        !           313:            break;
        !           314: 
        !           315:        case (')'): 
        !           316:        case (']'): 
        !           317:            code = rparen;
        !           318:            break;
        !           319: 
        !           320:        case '#': 
        !           321:            unary_delim = last_u_d;
        !           322:            code = preesc;
        !           323:            break;
        !           324: 
        !           325:        case '?': 
        !           326:            unary_delim = true;
        !           327:            code = question;
        !           328:            break;
        !           329: 
        !           330:        case (':'): 
        !           331:            code = colon;
        !           332:            unary_delim = true;
        !           333:            break;
        !           334: 
        !           335:        case (';'): 
        !           336:            unary_delim = true;
        !           337:            code = semicolon;
        !           338:            break;
        !           339: 
        !           340:        case ('{'): 
        !           341:            unary_delim = true;
        !           342:            code = lbrace;
        !           343:            break;
        !           344: 
        !           345:        case ('}'): 
        !           346:            unary_delim = true;
        !           347:            code = rbrace;
        !           348:            break;
        !           349: 
        !           350:        case 014:              /* a form feed */
        !           351:            unary_delim = last_u_d;
        !           352:            last_nl = true;    /* remember this so we can set 'col_1' right */
        !           353:            code = form_feed;
        !           354:            break;
        !           355: 
        !           356:        case (','): 
        !           357:            unary_delim = true;
        !           358:            code = comma;
        !           359:            break;
        !           360: 
        !           361:        case '.': 
        !           362:            unary_delim = false;
        !           363:            code = period;
        !           364:            break;
        !           365: 
        !           366:        case '-': 
        !           367:        case '+':              /* check for -, +, --, ++ */
        !           368:            code = (last_u_d ? unary_op : binary_op);
        !           369:            unary_delim = true;
        !           370: 
        !           371:            if (*buf_ptr == token[0]) {
        !           372:            /* check for doubled character */
        !           373:                *tok++ = *buf_ptr++;
        !           374:            /* buffer overflow will be checked at end of loop */
        !           375:                if (last_code == ident || last_code == rparen) {
        !           376:                    code = (last_u_d ? unary_op : postop);
        !           377:                /* check for following ++ or -- */
        !           378:                    unary_delim = false;
        !           379:                }
        !           380:            }
        !           381:            else
        !           382:                if (*buf_ptr == '>' || *buf_ptr == '=')
        !           383:                               /* check for operator -> or += */
        !           384:                    *tok++ = *buf_ptr++;
        !           385:        /* buffer overflow will be checked at end of switch */
        !           386: 
        !           387:            break;
        !           388: 
        !           389:        case '=': 
        !           390:            if (chartype[*buf_ptr] == opchar) {
        !           391:            /* we have two char assignment */
        !           392:                *tok++ = *buf_ptr;
        !           393:            /* move second character */
        !           394:                if (++buf_ptr >= buf_end)
        !           395:                    fill_buffer ();
        !           396:            }
        !           397: 
        !           398:            code = binary_op;
        !           399:            unary_delim = true;
        !           400:            if (token[1] != '<' && token[1] != '>')
        !           401:                               /* check for possible 3 char operator */
        !           402:                break;
        !           403:        /* can drop thru!!! */
        !           404: 
        !           405:        case '>': 
        !           406:        case '<': 
        !           407:        case '!':              /* ops like <, <<, <=, !=, etc */
        !           408:            if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
        !           409:                *tok++ = *buf_ptr;
        !           410:                if (++buf_ptr >= buf_end)
        !           411:                    fill_buffer ();
        !           412:            }
        !           413: 
        !           414:            if (*buf_ptr == '=')
        !           415:                 *tok++ = *buf_ptr++;
        !           416:            code = (last_u_d ? unary_op : binary_op);
        !           417:            unary_delim = true;
        !           418:            break;
        !           419: 
        !           420:        default: 
        !           421:            if (token[0] == '/' && *buf_ptr == '*') {
        !           422:            /* it is start of comment */
        !           423:                *tok++ = '*';
        !           424: 
        !           425:                if (++buf_ptr >= buf_end)
        !           426:                    fill_buffer ();
        !           427: 
        !           428:                code = comment;
        !           429:                unary_delim = last_u_d;
        !           430:                break;
        !           431:            }
        !           432: 
        !           433:            while (*(tok - 1) == *buf_ptr || *buf_ptr=='=') {
        !           434:            /* handle ||, &&, etc, and also things as in int *****i */
        !           435:                *tok++ = *buf_ptr;
        !           436:                if (++buf_ptr >= buf_end)
        !           437:                    fill_buffer ();
        !           438:            }
        !           439: 
        !           440: 
        !           441:            code = (last_u_d ? unary_op : binary_op);
        !           442:            unary_delim = true;
        !           443: 
        !           444: 
        !           445:     }                         /* end of switch */
        !           446: 
        !           447:     if (code != newline) {
        !           448:        l_struct = false;
        !           449:        last_code = code;
        !           450:     }
        !           451: 
        !           452:     if (buf_ptr >= buf_end)    /* check for input buffer empty */
        !           453:        fill_buffer ();
        !           454:     last_u_d = unary_delim;
        !           455:     *tok = '\0';              /* null terminate the token */
        !           456:     return (code);
        !           457: };
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.