|
|
1.1 ! root 1: /* ! 2: * Copyright (c) 1980 Regents of the University of California. ! 3: * Copyright (c) 1976 Board of Trustees of the University of Illinois. ! 4: * All rights reserved. ! 5: * ! 6: * Redistribution and use in source and binary forms are permitted ! 7: * provided that the above copyright notice and this paragraph are ! 8: * duplicated in all such forms and that any documentation, ! 9: * advertising materials, and other materials related to such ! 10: * distribution and use acknowledge that the software was developed ! 11: * by the University of California, Berkeley and the University ! 12: * of Illinois, Urbana. The name of either ! 13: * University may not be used to endorse or promote products derived ! 14: * from this software without specific prior written permission. ! 15: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR ! 16: * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED ! 17: * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. ! 18: */ ! 19: ! 20: #ifndef lint ! 21: static char sccsid[] = "@(#)lexi.c 5.8 (Berkeley) 6/29/88"; ! 22: #endif /* not lint */ ! 23: ! 24: /* ! 25: * NAME: ! 26: * lexi ! 27: * ! 28: * FUNCTION: ! 29: * This is the token scanner for indent ! 30: * ! 31: * ALGORITHM: ! 32: * 1) Strip off intervening blanks and/or tabs. ! 33: * 2) If it is an alphanumeric token, move it to the token buffer "token". ! 34: * Check if it is a special reserved word that indent will want to ! 35: * know about. ! 36: * 3) Non-alphanumeric tokens are handled with a big switch statement. A ! 37: * flag is kept to remember if the last token was a "unary delimiter", ! 38: * which forces a following operator to be unary as opposed to binary. ! 39: * ! 40: * PARAMETERS: ! 41: * None ! 42: * ! 43: * RETURNS: ! 44: * An integer code indicating the type of token scanned. ! 45: * ! 46: * GLOBALS: ! 47: * buf_ptr = ! 48: * had_eof ! 49: * ps.last_u_d = Set to true iff this token is a "unary delimiter" ! 50: * ! 51: * CALLS: ! 52: * fill_buffer ! 53: * printf (lib) ! 54: * ! 55: * CALLED BY: ! 56: * main ! 57: * ! 58: * NOTES: ! 59: * Start of comment is passed back so that the comment can be scanned by ! 60: * pr_comment. ! 61: * ! 62: * Strings and character literals are returned just like identifiers. ! 63: * ! 64: * HISTORY: ! 65: * initial coding November 1976 D A Willcox of CAC ! 66: * 1/7/77 D A Willcox of CAC Fix to provide proper handling ! 67: * of "int a -1;" ! 68: * ! 69: */ ! 70: ! 71: /* ! 72: * Here we have the token scanner for indent. It scans off one token and ! 73: * puts it in the global variable "token". It returns a code, indicating ! 74: * the type of token scanned. ! 75: */ ! 76: ! 77: #include "indent_globs.h" ! 78: #include "indent_codes.h" ! 79: #include "ctype.h" ! 80: ! 81: #define alphanum 1 ! 82: #define opchar 3 ! 83: ! 84: struct templ { ! 85: char *rwd; ! 86: int rwcode; ! 87: }; ! 88: ! 89: struct templ specials[100] = ! 90: { ! 91: "switch", 1, ! 92: "case", 2, ! 93: "break", 0, ! 94: "struct", 3, ! 95: "union", 3, ! 96: "enum", 3, ! 97: "default", 2, ! 98: "int", 4, ! 99: "char", 4, ! 100: "float", 4, ! 101: "double", 4, ! 102: "long", 4, ! 103: "short", 4, ! 104: "typdef", 4, ! 105: "unsigned", 4, ! 106: "register", 4, ! 107: "static", 4, ! 108: "global", 4, ! 109: "extern", 4, ! 110: "void", 4, ! 111: "goto", 0, ! 112: "return", 0, ! 113: "if", 5, ! 114: "while", 5, ! 115: "for", 5, ! 116: "else", 6, ! 117: "do", 6, ! 118: "sizeof", 7, ! 119: 0, 0 ! 120: }; ! 121: ! 122: char chartype[128] = ! 123: { /* this is used to facilitate the decision ! 124: * of what type (alphanumeric, operator) ! 125: * each character is */ ! 126: 0, 0, 0, 0, 0, 0, 0, 0, ! 127: 0, 0, 0, 0, 0, 0, 0, 0, ! 128: 0, 0, 0, 0, 0, 0, 0, 0, ! 129: 0, 0, 0, 0, 0, 0, 0, 0, ! 130: 0, 3, 0, 0, 1, 3, 3, 0, ! 131: 0, 0, 3, 3, 0, 3, 3, 3, ! 132: 1, 1, 1, 1, 1, 1, 1, 1, ! 133: 1, 1, 0, 0, 3, 3, 3, 3, ! 134: 0, 1, 1, 1, 1, 1, 1, 1, ! 135: 1, 1, 1, 1, 1, 1, 1, 1, ! 136: 1, 1, 1, 1, 1, 1, 1, 1, ! 137: 1, 1, 1, 0, 0, 0, 3, 1, ! 138: 0, 1, 1, 1, 1, 1, 1, 1, ! 139: 1, 1, 1, 1, 1, 1, 1, 1, ! 140: 1, 1, 1, 1, 1, 1, 1, 1, ! 141: 1, 1, 1, 0, 3, 0, 3, 0 ! 142: }; ! 143: ! 144: ! 145: ! 146: ! 147: int ! 148: lexi() ! 149: { ! 150: register char *tok; /* local pointer to next char in token */ ! 151: int unary_delim; /* this is set to 1 if the current token ! 152: * ! 153: * forces a following operator to be unary */ ! 154: static int last_code; /* the last token type returned */ ! 155: static int l_struct; /* set to 1 if the last token was 'struct' */ ! 156: int code; /* internal code to be returned */ ! 157: char qchar; /* the delimiter character for a string */ ! 158: ! 159: tok = token; /* point to start of place to save token */ ! 160: unary_delim = false; ! 161: ps.col_1 = ps.last_nl; /* tell world that this token started in ! 162: * column 1 iff the last thing scanned was ! 163: * nl */ ! 164: ps.last_nl = false; ! 165: ! 166: while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ ! 167: ps.col_1 = false; /* leading blanks imply token is not in ! 168: * column 1 */ ! 169: if (++buf_ptr >= buf_end) ! 170: fill_buffer(); ! 171: } ! 172: ! 173: /* Scan an alphanumeric token. Note that we must also handle ! 174: * stuff like "1.0e+03" and "7e-6". */ ! 175: if (chartype[*buf_ptr & 0177] == alphanum) { /* we have a character ! 176: * or number */ ! 177: register char *j; /* used for searching thru list of ! 178: * reserved words */ ! 179: register struct templ *p; ! 180: register int c; ! 181: ! 182: do { /* copy it over */ ! 183: *tok++ = *buf_ptr++; ! 184: if (buf_ptr >= buf_end) ! 185: fill_buffer(); ! 186: } while (chartype[c = *buf_ptr & 0177] == alphanum || ! 187: isdigit(token[0]) && (c == '+' || c == '-') && ! 188: (tok[-1] == 'e' || tok[-1] == 'E')); ! 189: *tok++ = '\0'; ! 190: while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ ! 191: if (++buf_ptr >= buf_end) ! 192: fill_buffer(); ! 193: } ! 194: ps.its_a_keyword = false; ! 195: ps.sizeof_keyword = false; ! 196: if (l_struct) { /* if last token was 'struct', then this ! 197: * token should be treated as a ! 198: * declaration */ ! 199: l_struct = false; ! 200: last_code = ident; ! 201: ps.last_u_d = true; ! 202: return (decl); ! 203: } ! 204: ps.last_u_d = false; /* Operator after indentifier is binary */ ! 205: last_code = ident; /* Remember that this is the code we will ! 206: * return */ ! 207: ! 208: /* ! 209: * This loop will check if the token is a keyword. ! 210: */ ! 211: for (p = specials; (j = p->rwd) != 0; p++) { ! 212: tok = token; /* point at scanned token */ ! 213: if (*j++ != *tok++ || *j++ != *tok++) ! 214: continue; /* This test depends on the fact that ! 215: * identifiers are always at least 1 ! 216: * character long (ie. the first two bytes ! 217: * of the identifier are always ! 218: * meaningful) */ ! 219: if (tok[-1] == 0) ! 220: break; /* If its a one-character identifier */ ! 221: while (*tok++ == *j) ! 222: if (*j++ == 0) ! 223: goto found_keyword; /* I wish that C had a multi-level ! 224: * break... */ ! 225: } ! 226: if (p->rwd) { /* we have a keyword */ ! 227: found_keyword: ! 228: ps.its_a_keyword = true; ! 229: ps.last_u_d = true; ! 230: switch (p->rwcode) { ! 231: case 1: /* it is a switch */ ! 232: return (swstmt); ! 233: case 2: /* a case or default */ ! 234: return (casestmt); ! 235: ! 236: case 3: /* a "struct" */ ! 237: if (ps.p_l_follow) ! 238: break; /* inside parens: cast */ ! 239: l_struct = true; ! 240: ! 241: /* ! 242: * Next time around, we will want to know that we have ! 243: * had a 'struct' ! 244: */ ! 245: case 4: /* one of the declaration keywords */ ! 246: if (ps.p_l_follow) { ! 247: ps.cast_mask |= 1 << ps.p_l_follow; ! 248: break; /* inside parens: cast */ ! 249: } ! 250: last_code = decl; ! 251: return (decl); ! 252: ! 253: case 5: /* if, while, for */ ! 254: return (sp_paren); ! 255: ! 256: case 6: /* do, else */ ! 257: return (sp_nparen); ! 258: ! 259: case 7: ! 260: ps.sizeof_keyword = true; ! 261: default: /* all others are treated like any other ! 262: * identifier */ ! 263: return (ident); ! 264: } /* end of switch */ ! 265: } /* end of if (found_it) */ ! 266: if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0 ! 267: && (buf_ptr[1] != ')' || buf_ptr[2] != ';')) { ! 268: strncpy(ps.procname, token, sizeof ps.procname - 1); ! 269: ps.in_parameter_declaration = 1; ! 270: } ! 271: ! 272: /* ! 273: * The following hack attempts to guess whether or not the current ! 274: * token is in fact a declaration keyword -- one that has been ! 275: * typedefd ! 276: */ ! 277: if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr)) ! 278: && !ps.p_l_follow ! 279: && (ps.last_token == rparen || ps.last_token == semicolon || ! 280: ps.last_token == decl || ! 281: ps.last_token == lbrace || ps.last_token == rbrace)) { ! 282: ps.its_a_keyword = true; ! 283: ps.last_u_d = true; ! 284: last_code = decl; ! 285: return decl; ! 286: } ! 287: if (last_code == decl) /* if this is a declared variable, then ! 288: * following sign is unary */ ! 289: ps.last_u_d = true; /* will make "int a -1" work */ ! 290: last_code = ident; ! 291: return (ident); /* the ident is not in the list */ ! 292: } /* end of procesing for alpanum character */ ! 293: /* Scan a non-alphanumeric token */ ! 294: ! 295: *tok++ = *buf_ptr; /* if it is only a one-character token, it ! 296: * is moved here */ ! 297: *tok = '\0'; ! 298: if (++buf_ptr >= buf_end) ! 299: fill_buffer(); ! 300: ! 301: switch (*token) { ! 302: case '\n': ! 303: unary_delim = ps.last_u_d; ! 304: ps.last_nl = true; /* remember that we just had a newline */ ! 305: code = (had_eof ? 0 : newline); ! 306: ! 307: /* ! 308: * if data has been exausted, the newline is a dummy, and we ! 309: * should return code to stop ! 310: */ ! 311: break; ! 312: ! 313: case '\'': /* start of quoted character */ ! 314: case '"': /* start of string */ ! 315: qchar = *token; ! 316: if (troff) { ! 317: tok[-1] = '`'; ! 318: if (qchar == '"') ! 319: *tok++ = '`'; ! 320: *tok++ = BACKSLASH; ! 321: *tok++ = 'f'; ! 322: *tok++ = 'L'; ! 323: } ! 324: do { /* copy the string */ ! 325: while (1) { /* move one character or [/<char>]<char> */ ! 326: if (*buf_ptr == '\n') { ! 327: printf("%d: Unterminated literal\n", line_no); ! 328: goto stop_lit; ! 329: } ! 330: *tok = *buf_ptr++; ! 331: if (buf_ptr >= buf_end) ! 332: fill_buffer(); ! 333: if (had_eof || ((tok - token) > (bufsize - 2))) { ! 334: printf("Unterminated literal\n"); ! 335: ++tok; ! 336: goto stop_lit; ! 337: /* get outof literal copying loop */ ! 338: } ! 339: if (*tok == BACKSLASH) { /* if escape, copy extra ! 340: * char */ ! 341: if (*buf_ptr == '\n') /* check for escaped ! 342: * newline */ ! 343: ++line_no; ! 344: if (troff) { ! 345: *++tok = BACKSLASH; ! 346: if (*buf_ptr == BACKSLASH) ! 347: *++tok = BACKSLASH; ! 348: } ! 349: *++tok = *buf_ptr++; ! 350: ++tok; /* we must increment this again because we ! 351: * copied two chars */ ! 352: if (buf_ptr >= buf_end) ! 353: fill_buffer(); ! 354: } ! 355: else ! 356: break; /* we copied one character */ ! 357: } /* end of while (1) */ ! 358: } while (*tok++ != qchar); ! 359: if (troff) { ! 360: tok[-1] = BACKSLASH; ! 361: *tok++ = 'f'; ! 362: *tok++ = 'R'; ! 363: *tok++ = '\''; ! 364: if (qchar == '"') ! 365: *tok++ = '\''; ! 366: } ! 367: stop_lit: ! 368: code = ident; ! 369: break; ! 370: ! 371: case ('('): ! 372: case ('['): ! 373: unary_delim = true; ! 374: code = lparen; ! 375: break; ! 376: ! 377: case (')'): ! 378: case (']'): ! 379: code = rparen; ! 380: break; ! 381: ! 382: case '#': ! 383: unary_delim = ps.last_u_d; ! 384: code = preesc; ! 385: break; ! 386: ! 387: case '?': ! 388: unary_delim = true; ! 389: code = question; ! 390: break; ! 391: ! 392: case (':'): ! 393: code = colon; ! 394: unary_delim = true; ! 395: break; ! 396: ! 397: case (';'): ! 398: unary_delim = true; ! 399: code = semicolon; ! 400: break; ! 401: ! 402: case ('{'): ! 403: unary_delim = true; ! 404: ! 405: /* ! 406: * if (ps.in_or_st) ps.block_init = 1; ! 407: */ ! 408: code = ps.block_init ? lparen : lbrace; ! 409: break; ! 410: ! 411: case ('}'): ! 412: unary_delim = true; ! 413: code = ps.block_init ? rparen : rbrace; ! 414: break; ! 415: ! 416: case 014: /* a form feed */ ! 417: unary_delim = ps.last_u_d; ! 418: ps.last_nl = true; /* remember this so we can set 'ps.col_1' ! 419: * right */ ! 420: code = form_feed; ! 421: break; ! 422: ! 423: case (','): ! 424: unary_delim = true; ! 425: code = comma; ! 426: break; ! 427: ! 428: case '.': ! 429: unary_delim = false; ! 430: code = period; ! 431: break; ! 432: ! 433: case '-': ! 434: case '+': /* check for -, +, --, ++ */ ! 435: code = (ps.last_u_d ? unary_op : binary_op); ! 436: unary_delim = true; ! 437: ! 438: if (*buf_ptr == token[0]) { ! 439: /* check for doubled character */ ! 440: *tok++ = *buf_ptr++; ! 441: /* buffer overflow will be checked at end of loop */ ! 442: if (last_code == ident || last_code == rparen) { ! 443: code = (ps.last_u_d ? unary_op : postop); ! 444: /* check for following ++ or -- */ ! 445: unary_delim = false; ! 446: } ! 447: } ! 448: else if (*buf_ptr == '=') ! 449: /* check for operator += */ ! 450: *tok++ = *buf_ptr++; ! 451: else if (token[0] == '-' && *buf_ptr == '>') { ! 452: /* check for operator -> */ ! 453: *tok++ = *buf_ptr++; ! 454: if (!pointer_as_binop) { ! 455: code = unary_op; ! 456: unary_delim = false; ! 457: ps.want_blank = false; ! 458: } ! 459: } ! 460: /* buffer overflow will be checked at end of switch */ ! 461: ! 462: break; ! 463: ! 464: case '=': ! 465: if (ps.in_or_st) ! 466: ps.block_init = 1; ! 467: if (chartype[*buf_ptr] == opchar) { /* we have two char ! 468: * assignment */ ! 469: tok[-1] = *buf_ptr++; ! 470: if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr) ! 471: *tok++ = *buf_ptr++; ! 472: *tok++ = '='; /* Flip =+ to += */ ! 473: *tok = 0; ! 474: } ! 475: code = binary_op; ! 476: unary_delim = true; ! 477: break; ! 478: /* can drop thru!!! */ ! 479: ! 480: case '>': ! 481: case '<': ! 482: case '!': /* ops like <, <<, <=, !=, etc */ ! 483: if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { ! 484: *tok++ = *buf_ptr; ! 485: if (++buf_ptr >= buf_end) ! 486: fill_buffer(); ! 487: } ! 488: if (*buf_ptr == '=') ! 489: *tok++ = *buf_ptr++; ! 490: code = (ps.last_u_d ? unary_op : binary_op); ! 491: unary_delim = true; ! 492: break; ! 493: ! 494: default: ! 495: if (token[0] == '/' && *buf_ptr == '*') { ! 496: /* it is start of comment */ ! 497: *tok++ = '*'; ! 498: ! 499: if (++buf_ptr >= buf_end) ! 500: fill_buffer(); ! 501: ! 502: code = comment; ! 503: unary_delim = ps.last_u_d; ! 504: break; ! 505: } ! 506: while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') { ! 507: /* handle ||, &&, etc, and also things as in int *****i */ ! 508: *tok++ = *buf_ptr; ! 509: if (++buf_ptr >= buf_end) ! 510: fill_buffer(); ! 511: } ! 512: code = (ps.last_u_d ? unary_op : binary_op); ! 513: unary_delim = true; ! 514: ! 515: ! 516: } /* end of switch */ ! 517: if (code != newline) { ! 518: l_struct = false; ! 519: last_code = code; ! 520: } ! 521: if (buf_ptr >= buf_end) /* check for input buffer empty */ ! 522: fill_buffer(); ! 523: ps.last_u_d = unary_delim; ! 524: *tok = '\0'; /* null terminate the token */ ! 525: return (code); ! 526: }; ! 527: ! 528: /* Add the given keyword to the keyword table, using val as the keyword type ! 529: */ ! 530: addkey (key, val) ! 531: char *key; ! 532: { ! 533: register struct templ *p = specials; ! 534: while (p->rwd) ! 535: if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) ! 536: return; ! 537: else ! 538: p++; ! 539: if (p >= specials + sizeof specials / sizeof specials[0]) ! 540: return; /* For now, table overflows are silently ! 541: ignored */ ! 542: p->rwd = key; ! 543: p->rwcode = val; ! 544: p[1].rwd = 0; ! 545: p[1].rwcode = 0; ! 546: return; ! 547: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.