|
|
1.1 ! root 1: /* ! 2: * Copyright (c) 1985 Sun Microsystems, Inc. ! 3: * Copyright (c) 1980 The Regents of the University of California. ! 4: * Copyright (c) 1976 Board of Trustees of the University of Illinois. ! 5: * All rights reserved. ! 6: * ! 7: * Redistribution and use in source and binary forms are permitted ! 8: * provided that: (1) source distributions retain this entire copyright ! 9: * notice and comment, and (2) distributions including binaries display ! 10: * the following acknowledgement: ``This product includes software ! 11: * developed by the University of California, Berkeley and its contributors'' ! 12: * in the documentation or other materials provided with the distribution ! 13: * and in all advertising materials mentioning features or use of this ! 14: * software. Neither the name of the University nor the names of its ! 15: * contributors may be used to endorse or promote products derived ! 16: * from this software without specific prior written permission. ! 17: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR ! 18: * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED ! 19: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. ! 20: */ ! 21: ! 22: #ifndef lint ! 23: static char sccsid[] = "@(#)lexi.c 5.15 (Berkeley) 6/1/90"; ! 24: #endif /* not lint */ ! 25: ! 26: /* ! 27: * Here we have the token scanner for indent. It scans off one token and puts ! 28: * it in the global variable "token". It returns a code, indicating the type ! 29: * of token scanned. ! 30: */ ! 31: ! 32: #include "indent_globs.h" ! 33: #include "indent_codes.h" ! 34: #include <ctype.h> ! 35: ! 36: #define alphanum 1 ! 37: #define opchar 3 ! 38: ! 39: struct templ { ! 40: char *rwd; ! 41: int rwcode; ! 42: }; ! 43: ! 44: struct templ specials[100] = ! 45: { ! 46: "switch", 1, ! 47: "case", 2, ! 48: "break", 0, ! 49: "struct", 3, ! 50: "union", 3, ! 51: "enum", 3, ! 52: "default", 2, ! 53: "int", 4, ! 54: "char", 4, ! 55: "float", 4, ! 56: "double", 4, ! 57: "long", 4, ! 58: "short", 4, ! 59: "typdef", 4, ! 60: "unsigned", 4, ! 61: "register", 4, ! 62: "static", 4, ! 63: "global", 4, ! 64: "extern", 4, ! 65: "void", 4, ! 66: "goto", 0, ! 67: "return", 0, ! 68: "if", 5, ! 69: "while", 5, ! 70: "for", 5, ! 71: "else", 6, ! 72: "do", 6, ! 73: "sizeof", 7, ! 74: 0, 0 ! 75: }; ! 76: ! 77: char chartype[128] = ! 78: { /* this is used to facilitate the decision of ! 79: * what type (alphanumeric, operator) each ! 80: * character is */ ! 81: 0, 0, 0, 0, 0, 0, 0, 0, ! 82: 0, 0, 0, 0, 0, 0, 0, 0, ! 83: 0, 0, 0, 0, 0, 0, 0, 0, ! 84: 0, 0, 0, 0, 0, 0, 0, 0, ! 85: 0, 3, 0, 0, 1, 3, 3, 0, ! 86: 0, 0, 3, 3, 0, 3, 0, 3, ! 87: 1, 1, 1, 1, 1, 1, 1, 1, ! 88: 1, 1, 0, 0, 3, 3, 3, 3, ! 89: 0, 1, 1, 1, 1, 1, 1, 1, ! 90: 1, 1, 1, 1, 1, 1, 1, 1, ! 91: 1, 1, 1, 1, 1, 1, 1, 1, ! 92: 1, 1, 1, 0, 0, 0, 3, 1, ! 93: 0, 1, 1, 1, 1, 1, 1, 1, ! 94: 1, 1, 1, 1, 1, 1, 1, 1, ! 95: 1, 1, 1, 1, 1, 1, 1, 1, ! 96: 1, 1, 1, 0, 3, 0, 3, 0 ! 97: }; ! 98: ! 99: ! 100: ! 101: ! 102: int ! 103: lexi() ! 104: { ! 105: int unary_delim; /* this is set to 1 if the current token ! 106: * ! 107: * forces a following operator to be unary */ ! 108: static int last_code; /* the last token type returned */ ! 109: static int l_struct; /* set to 1 if the last token was 'struct' */ ! 110: int code; /* internal code to be returned */ ! 111: char qchar; /* the delimiter character for a string */ ! 112: ! 113: e_token = s_token; /* point to start of place to save token */ ! 114: unary_delim = false; ! 115: ps.col_1 = ps.last_nl; /* tell world that this token started in ! 116: * column 1 iff the last thing scanned was nl */ ! 117: ps.last_nl = false; ! 118: ! 119: while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ ! 120: ps.col_1 = false; /* leading blanks imply token is not in column ! 121: * 1 */ ! 122: if (++buf_ptr >= buf_end) ! 123: fill_buffer(); ! 124: } ! 125: ! 126: /* Scan an alphanumeric token */ ! 127: if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { ! 128: /* ! 129: * we have a character or number ! 130: */ ! 131: register char *j; /* used for searching thru list of ! 132: * ! 133: * reserved words */ ! 134: register struct templ *p; ! 135: ! 136: if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { ! 137: int seendot = 0, ! 138: seenexp = 0; ! 139: if (*buf_ptr == '0' && ! 140: (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { ! 141: *e_token++ = *buf_ptr++; ! 142: *e_token++ = *buf_ptr++; ! 143: while (isxdigit(*buf_ptr)) { ! 144: CHECK_SIZE_TOKEN; ! 145: *e_token++ = *buf_ptr++; ! 146: } ! 147: } ! 148: else ! 149: while (1) { ! 150: if (*buf_ptr == '.') ! 151: if (seendot) ! 152: break; ! 153: else ! 154: seendot++; ! 155: CHECK_SIZE_TOKEN; ! 156: *e_token++ = *buf_ptr++; ! 157: if (!isdigit(*buf_ptr) && *buf_ptr != '.') ! 158: if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) ! 159: break; ! 160: else { ! 161: seenexp++; ! 162: seendot++; ! 163: CHECK_SIZE_TOKEN; ! 164: *e_token++ = *buf_ptr++; ! 165: if (*buf_ptr == '+' || *buf_ptr == '-') ! 166: *e_token++ = *buf_ptr++; ! 167: } ! 168: } ! 169: if (*buf_ptr == 'L' || *buf_ptr == 'l') ! 170: *e_token++ = *buf_ptr++; ! 171: } ! 172: else ! 173: while (chartype[*buf_ptr] == alphanum) { /* copy it over */ ! 174: CHECK_SIZE_TOKEN; ! 175: *e_token++ = *buf_ptr++; ! 176: if (buf_ptr >= buf_end) ! 177: fill_buffer(); ! 178: } ! 179: *e_token++ = '\0'; ! 180: while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ ! 181: if (++buf_ptr >= buf_end) ! 182: fill_buffer(); ! 183: } ! 184: ps.its_a_keyword = false; ! 185: ps.sizeof_keyword = false; ! 186: if (l_struct) { /* if last token was 'struct', then this token ! 187: * should be treated as a declaration */ ! 188: l_struct = false; ! 189: last_code = ident; ! 190: ps.last_u_d = true; ! 191: return (decl); ! 192: } ! 193: ps.last_u_d = false; /* Operator after indentifier is binary */ ! 194: last_code = ident; /* Remember that this is the code we will ! 195: * return */ ! 196: ! 197: /* ! 198: * This loop will check if the token is a keyword. ! 199: */ ! 200: for (p = specials; (j = p->rwd) != 0; p++) { ! 201: register char *p = s_token; /* point at scanned token */ ! 202: if (*j++ != *p++ || *j++ != *p++) ! 203: continue; /* This test depends on the fact that ! 204: * identifiers are always at least 1 character ! 205: * long (ie. the first two bytes of the ! 206: * identifier are always meaningful) */ ! 207: if (p[-1] == 0) ! 208: break; /* If its a one-character identifier */ ! 209: while (*p++ == *j) ! 210: if (*j++ == 0) ! 211: goto found_keyword; /* I wish that C had a multi-level ! 212: * break... */ ! 213: } ! 214: if (p->rwd) { /* we have a keyword */ ! 215: found_keyword: ! 216: ps.its_a_keyword = true; ! 217: ps.last_u_d = true; ! 218: switch (p->rwcode) { ! 219: case 1: /* it is a switch */ ! 220: return (swstmt); ! 221: case 2: /* a case or default */ ! 222: return (casestmt); ! 223: ! 224: case 3: /* a "struct" */ ! 225: if (ps.p_l_follow) ! 226: break; /* inside parens: cast */ ! 227: l_struct = true; ! 228: ! 229: /* ! 230: * Next time around, we will want to know that we have had a ! 231: * 'struct' ! 232: */ ! 233: case 4: /* one of the declaration keywords */ ! 234: if (ps.p_l_follow) { ! 235: ps.cast_mask |= 1 << ps.p_l_follow; ! 236: break; /* inside parens: cast */ ! 237: } ! 238: last_code = decl; ! 239: return (decl); ! 240: ! 241: case 5: /* if, while, for */ ! 242: return (sp_paren); ! 243: ! 244: case 6: /* do, else */ ! 245: return (sp_nparen); ! 246: ! 247: case 7: ! 248: ps.sizeof_keyword = true; ! 249: default: /* all others are treated like any other ! 250: * identifier */ ! 251: return (ident); ! 252: } /* end of switch */ ! 253: } /* end of if (found_it) */ ! 254: if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { ! 255: register char *tp = buf_ptr; ! 256: while (tp < buf_end) ! 257: if (*tp++ == ')' && (*tp == ';' || *tp == ',')) ! 258: goto not_proc; ! 259: strncpy(ps.procname, token, sizeof ps.procname - 1); ! 260: ps.in_parameter_declaration = 1; ! 261: rparen_count = 1; ! 262: not_proc:; ! 263: } ! 264: /* ! 265: * The following hack attempts to guess whether or not the current ! 266: * token is in fact a declaration keyword -- one that has been ! 267: * typedefd ! 268: */ ! 269: if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') ! 270: && !ps.p_l_follow ! 271: && !ps.block_init ! 272: && (ps.last_token == rparen || ps.last_token == semicolon || ! 273: ps.last_token == decl || ! 274: ps.last_token == lbrace || ps.last_token == rbrace)) { ! 275: ps.its_a_keyword = true; ! 276: ps.last_u_d = true; ! 277: last_code = decl; ! 278: return decl; ! 279: } ! 280: if (last_code == decl) /* if this is a declared variable, then ! 281: * following sign is unary */ ! 282: ps.last_u_d = true; /* will make "int a -1" work */ ! 283: last_code = ident; ! 284: return (ident); /* the ident is not in the list */ ! 285: } /* end of procesing for alpanum character */ ! 286: ! 287: /* Scan a non-alphanumeric token */ ! 288: ! 289: *e_token++ = *buf_ptr; /* if it is only a one-character token, it is ! 290: * moved here */ ! 291: *e_token = '\0'; ! 292: if (++buf_ptr >= buf_end) ! 293: fill_buffer(); ! 294: ! 295: switch (*token) { ! 296: case '\n': ! 297: unary_delim = ps.last_u_d; ! 298: ps.last_nl = true; /* remember that we just had a newline */ ! 299: code = (had_eof ? 0 : newline); ! 300: ! 301: /* ! 302: * if data has been exausted, the newline is a dummy, and we should ! 303: * return code to stop ! 304: */ ! 305: break; ! 306: ! 307: case '\'': /* start of quoted character */ ! 308: case '"': /* start of string */ ! 309: qchar = *token; ! 310: if (troff) { ! 311: e_token[-1] = '`'; ! 312: if (qchar == '"') ! 313: *e_token++ = '`'; ! 314: e_token = chfont(&bodyf, &stringf, e_token); ! 315: } ! 316: do { /* copy the string */ ! 317: while (1) { /* move one character or [/<char>]<char> */ ! 318: if (*buf_ptr == '\n') { ! 319: printf("%d: Unterminated literal\n", line_no); ! 320: goto stop_lit; ! 321: } ! 322: CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, ! 323: * since CHECK_SIZE guarantees that there ! 324: * are at least 5 entries left */ ! 325: *e_token = *buf_ptr++; ! 326: if (buf_ptr >= buf_end) ! 327: fill_buffer(); ! 328: if (*e_token == BACKSLASH) { /* if escape, copy extra char */ ! 329: if (*buf_ptr == '\n') /* check for escaped newline */ ! 330: ++line_no; ! 331: if (troff) { ! 332: *++e_token = BACKSLASH; ! 333: if (*buf_ptr == BACKSLASH) ! 334: *++e_token = BACKSLASH; ! 335: } ! 336: *++e_token = *buf_ptr++; ! 337: ++e_token; /* we must increment this again because we ! 338: * copied two chars */ ! 339: if (buf_ptr >= buf_end) ! 340: fill_buffer(); ! 341: } ! 342: else ! 343: break; /* we copied one character */ ! 344: } /* end of while (1) */ ! 345: } while (*e_token++ != qchar); ! 346: if (troff) { ! 347: e_token = chfont(&stringf, &bodyf, e_token - 1); ! 348: if (qchar == '"') ! 349: *e_token++ = '\''; ! 350: } ! 351: stop_lit: ! 352: code = ident; ! 353: break; ! 354: ! 355: case ('('): ! 356: case ('['): ! 357: unary_delim = true; ! 358: code = lparen; ! 359: break; ! 360: ! 361: case (')'): ! 362: case (']'): ! 363: code = rparen; ! 364: break; ! 365: ! 366: case '#': ! 367: unary_delim = ps.last_u_d; ! 368: code = preesc; ! 369: break; ! 370: ! 371: case '?': ! 372: unary_delim = true; ! 373: code = question; ! 374: break; ! 375: ! 376: case (':'): ! 377: code = colon; ! 378: unary_delim = true; ! 379: break; ! 380: ! 381: case (';'): ! 382: unary_delim = true; ! 383: code = semicolon; ! 384: break; ! 385: ! 386: case ('{'): ! 387: unary_delim = true; ! 388: ! 389: /* ! 390: * if (ps.in_or_st) ps.block_init = 1; ! 391: */ ! 392: /* ? code = ps.block_init ? lparen : lbrace; */ ! 393: code = lbrace; ! 394: break; ! 395: ! 396: case ('}'): ! 397: unary_delim = true; ! 398: /* ? code = ps.block_init ? rparen : rbrace; */ ! 399: code = rbrace; ! 400: break; ! 401: ! 402: case 014: /* a form feed */ ! 403: unary_delim = ps.last_u_d; ! 404: ps.last_nl = true; /* remember this so we can set 'ps.col_1' ! 405: * right */ ! 406: code = form_feed; ! 407: break; ! 408: ! 409: case (','): ! 410: unary_delim = true; ! 411: code = comma; ! 412: break; ! 413: ! 414: case '.': ! 415: unary_delim = false; ! 416: code = period; ! 417: break; ! 418: ! 419: case '-': ! 420: case '+': /* check for -, +, --, ++ */ ! 421: code = (ps.last_u_d ? unary_op : binary_op); ! 422: unary_delim = true; ! 423: ! 424: if (*buf_ptr == token[0]) { ! 425: /* check for doubled character */ ! 426: *e_token++ = *buf_ptr++; ! 427: /* buffer overflow will be checked at end of loop */ ! 428: if (last_code == ident || last_code == rparen) { ! 429: code = (ps.last_u_d ? unary_op : postop); ! 430: /* check for following ++ or -- */ ! 431: unary_delim = false; ! 432: } ! 433: } ! 434: else if (*buf_ptr == '=') ! 435: /* check for operator += */ ! 436: *e_token++ = *buf_ptr++; ! 437: else if (*buf_ptr == '>') { ! 438: /* check for operator -> */ ! 439: *e_token++ = *buf_ptr++; ! 440: if (!pointer_as_binop) { ! 441: unary_delim = false; ! 442: code = unary_op; ! 443: ps.want_blank = false; ! 444: } ! 445: } ! 446: break; /* buffer overflow will be checked at end of ! 447: * switch */ ! 448: ! 449: case '=': ! 450: if (ps.in_or_st) ! 451: ps.block_init = 1; ! 452: #ifdef undef ! 453: if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ ! 454: e_token[-1] = *buf_ptr++; ! 455: if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) ! 456: *e_token++ = *buf_ptr++; ! 457: *e_token++ = '='; /* Flip =+ to += */ ! 458: *e_token = 0; ! 459: } ! 460: #else ! 461: if (*buf_ptr == '=') {/* == */ ! 462: *e_token++ = '='; /* Flip =+ to += */ ! 463: buf_ptr++; ! 464: *e_token = 0; ! 465: } ! 466: #endif ! 467: code = binary_op; ! 468: unary_delim = true; ! 469: break; ! 470: /* can drop thru!!! */ ! 471: ! 472: case '>': ! 473: case '<': ! 474: case '!': /* ops like <, <<, <=, !=, etc */ ! 475: if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { ! 476: *e_token++ = *buf_ptr; ! 477: if (++buf_ptr >= buf_end) ! 478: fill_buffer(); ! 479: } ! 480: if (*buf_ptr == '=') ! 481: *e_token++ = *buf_ptr++; ! 482: code = (ps.last_u_d ? unary_op : binary_op); ! 483: unary_delim = true; ! 484: break; ! 485: ! 486: default: ! 487: if (token[0] == '/' && *buf_ptr == '*') { ! 488: /* it is start of comment */ ! 489: *e_token++ = '*'; ! 490: ! 491: if (++buf_ptr >= buf_end) ! 492: fill_buffer(); ! 493: ! 494: code = comment; ! 495: unary_delim = ps.last_u_d; ! 496: break; ! 497: } ! 498: while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { ! 499: /* ! 500: * handle ||, &&, etc, and also things as in int *****i ! 501: */ ! 502: *e_token++ = *buf_ptr; ! 503: if (++buf_ptr >= buf_end) ! 504: fill_buffer(); ! 505: } ! 506: code = (ps.last_u_d ? unary_op : binary_op); ! 507: unary_delim = true; ! 508: ! 509: ! 510: } /* end of switch */ ! 511: if (code != newline) { ! 512: l_struct = false; ! 513: last_code = code; ! 514: } ! 515: if (buf_ptr >= buf_end) /* check for input buffer empty */ ! 516: fill_buffer(); ! 517: ps.last_u_d = unary_delim; ! 518: *e_token = '\0'; /* null terminate the token */ ! 519: return (code); ! 520: } ! 521: ! 522: /* ! 523: * Add the given keyword to the keyword table, using val as the keyword type ! 524: */ ! 525: addkey(key, val) ! 526: char *key; ! 527: { ! 528: register struct templ *p = specials; ! 529: while (p->rwd) ! 530: if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) ! 531: return; ! 532: else ! 533: p++; ! 534: if (p >= specials + sizeof specials / sizeof specials[0]) ! 535: return; /* For now, table overflows are silently ! 536: * ignored */ ! 537: p->rwd = key; ! 538: p->rwcode = val; ! 539: p[1].rwd = 0; ! 540: p[1].rwcode = 0; ! 541: return; ! 542: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.