|
|
1.1 ! root 1: /* ! 2: * Copyright (c) 1980 Regents of the University of California. ! 3: * All rights reserved. The Berkeley software License Agreement ! 4: * specifies the terms and conditions for redistribution. ! 5: */ ! 6: ! 7: #ifndef lint ! 8: static char sccsid[] = "@(#)lexi.c 5.4 (Berkeley) 9/10/85"; ! 9: #endif not lint ! 10: ! 11: /*- ! 12: * ! 13: * Copyright (C) 1976 ! 14: * by the ! 15: * Board of Trustees ! 16: * of the ! 17: * University of Illinois ! 18: * ! 19: * All rights reserved ! 20: * ! 21: * ! 22: * NAME: ! 23: * lexi ! 24: * ! 25: * FUNCTION: ! 26: * This is the token scanner for indent ! 27: * ! 28: * ALGORITHM: ! 29: * 1) Strip off intervening blanks and/or tabs. ! 30: * 2) If it is an alphanumeric token, move it to the token buffer "token". ! 31: * Check if it is a special reserved word that indent will want to ! 32: * know about. ! 33: * 3) Non-alphanumeric tokens are handled with a big switch statement. A ! 34: * flag is kept to remember if the last token was a "unary delimiter", ! 35: * which forces a following operator to be unary as opposed to binary. ! 36: * ! 37: * PARAMETERS: ! 38: * None ! 39: * ! 40: * RETURNS: ! 41: * An integer code indicating the type of token scanned. ! 42: * ! 43: * GLOBALS: ! 44: * buf_ptr = ! 45: * had_eof ! 46: * ps.last_u_d = Set to true iff this token is a "unary delimiter" ! 47: * ! 48: * CALLS: ! 49: * fill_buffer ! 50: * printf (lib) ! 51: * ! 52: * CALLED BY: ! 53: * main ! 54: * ! 55: * NOTES: ! 56: * Start of comment is passed back so that the comment can be scanned by ! 57: * pr_comment. ! 58: * ! 59: * Strings and character literals are returned just like identifiers. ! 60: * ! 61: * HISTORY: ! 62: * initial coding November 1976 D A Willcox of CAC ! 63: * 1/7/77 D A Willcox of CAC Fix to provide proper handling ! 64: * of "int a -1;" ! 65: * ! 66: */ ! 67: ! 68: /* ! 69: * Here we have the token scanner for indent. It scans off one token and ! 70: * puts it in the global variable "token". It returns a code, indicating ! 71: * the type of token scanned. ! 72: */ ! 73: ! 74: #include "indent_globs.h"; ! 75: #include "indent_codes.h"; ! 76: #include "ctype.h" ! 77: ! 78: #define alphanum 1 ! 79: #define opchar 3 ! 80: ! 81: struct templ { ! 82: char *rwd; ! 83: int rwcode; ! 84: }; ! 85: ! 86: struct templ specials[100] = ! 87: { ! 88: "switch", 1, ! 89: "case", 2, ! 90: "break", 0, ! 91: "struct", 3, ! 92: "union", 3, ! 93: "enum", 3, ! 94: "default", 2, ! 95: "int", 4, ! 96: "char", 4, ! 97: "float", 4, ! 98: "double", 4, ! 99: "long", 4, ! 100: "short", 4, ! 101: "typdef", 4, ! 102: "unsigned", 4, ! 103: "register", 4, ! 104: "static", 4, ! 105: "global", 4, ! 106: "extern", 4, ! 107: "void", 4, ! 108: "goto", 0, ! 109: "return", 0, ! 110: "if", 5, ! 111: "while", 5, ! 112: "for", 5, ! 113: "else", 6, ! 114: "do", 6, ! 115: "sizeof", 7, ! 116: 0, 0 ! 117: }; ! 118: ! 119: char chartype[128] = ! 120: { /* this is used to facilitate the decision ! 121: * of what type (alphanumeric, operator) ! 122: * each character is */ ! 123: 0, 0, 0, 0, 0, 0, 0, 0, ! 124: 0, 0, 0, 0, 0, 0, 0, 0, ! 125: 0, 0, 0, 0, 0, 0, 0, 0, ! 126: 0, 0, 0, 0, 0, 0, 0, 0, ! 127: 0, 3, 0, 0, 0, 3, 3, 0, ! 128: 0, 0, 3, 3, 0, 3, 3, 3, ! 129: 1, 1, 1, 1, 1, 1, 1, 1, ! 130: 1, 1, 0, 0, 3, 3, 3, 3, ! 131: 0, 1, 1, 1, 1, 1, 1, 1, ! 132: 1, 1, 1, 1, 1, 1, 1, 1, ! 133: 1, 1, 1, 1, 1, 1, 1, 1, ! 134: 1, 1, 1, 0, 0, 0, 3, 1, ! 135: 0, 1, 1, 1, 1, 1, 1, 1, ! 136: 1, 1, 1, 1, 1, 1, 1, 1, ! 137: 1, 1, 1, 1, 1, 1, 1, 1, ! 138: 1, 1, 1, 0, 3, 0, 3, 0 ! 139: }; ! 140: ! 141: ! 142: ! 143: ! 144: int ! 145: lexi() ! 146: { ! 147: register char *tok; /* local pointer to next char in token */ ! 148: int unary_delim; /* this is set to 1 if the current token ! 149: * ! 150: * forces a following operator to be unary */ ! 151: static int last_code; /* the last token type returned */ ! 152: static int l_struct; /* set to 1 if the last token was 'struct' */ ! 153: int code; /* internal code to be returned */ ! 154: char qchar; /* the delimiter character for a string */ ! 155: ! 156: tok = token; /* point to start of place to save token */ ! 157: unary_delim = false; ! 158: ps.col_1 = ps.last_nl; /* tell world that this token started in ! 159: * column 1 iff the last thing scanned was ! 160: * nl */ ! 161: ps.last_nl = false; ! 162: ! 163: while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ ! 164: ps.col_1 = false; /* leading blanks imply token is not in ! 165: * column 1 */ ! 166: if (++buf_ptr >= buf_end) ! 167: fill_buffer(); ! 168: } ! 169: ! 170: /* Scan an alphanumeric token. Note that we must also handle ! 171: * stuff like "1.0e+03" and "7e-6". */ ! 172: if (chartype[*buf_ptr & 0177] == alphanum) { /* we have a character ! 173: * or number */ ! 174: register char *j; /* used for searching thru list of ! 175: * reserved words */ ! 176: register struct templ *p; ! 177: register int c; ! 178: ! 179: do { /* copy it over */ ! 180: *tok++ = *buf_ptr++; ! 181: if (buf_ptr >= buf_end) ! 182: fill_buffer(); ! 183: } while (chartype[c = *buf_ptr & 0177] == alphanum || ! 184: isdigit(token[0]) && (c == '+' || c == '-') && ! 185: (tok[-1] == 'e' || tok[-1] == 'E')); ! 186: *tok++ = '\0'; ! 187: while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ ! 188: if (++buf_ptr >= buf_end) ! 189: fill_buffer(); ! 190: } ! 191: ps.its_a_keyword = false; ! 192: ps.sizeof_keyword = false; ! 193: if (l_struct) { /* if last token was 'struct', then this ! 194: * token should be treated as a ! 195: * declaration */ ! 196: l_struct = false; ! 197: last_code = ident; ! 198: ps.last_u_d = true; ! 199: return (decl); ! 200: } ! 201: ps.last_u_d = false; /* Operator after indentifier is binary */ ! 202: last_code = ident; /* Remember that this is the code we will ! 203: * return */ ! 204: ! 205: /* ! 206: * This loop will check if the token is a keyword. ! 207: */ ! 208: for (p = specials; (j = p->rwd) != 0; p++) { ! 209: tok = token; /* point at scanned token */ ! 210: if (*j++ != *tok++ || *j++ != *tok++) ! 211: continue; /* This test depends on the fact that ! 212: * identifiers are always at least 1 ! 213: * character long (ie. the first two bytes ! 214: * of the identifier are always ! 215: * meaningful) */ ! 216: if (tok[-1] == 0) ! 217: break; /* If its a one-character identifier */ ! 218: while (*tok++ == *j) ! 219: if (*j++ == 0) ! 220: goto found_keyword; /* I wish that C had a multi-level ! 221: * break... */ ! 222: } ! 223: if (p->rwd) { /* we have a keyword */ ! 224: found_keyword: ! 225: ps.its_a_keyword = true; ! 226: ps.last_u_d = true; ! 227: switch (p->rwcode) { ! 228: case 1: /* it is a switch */ ! 229: return (swstmt); ! 230: case 2: /* a case or default */ ! 231: return (casestmt); ! 232: ! 233: case 3: /* a "struct" */ ! 234: if (ps.p_l_follow) ! 235: break; /* inside parens: cast */ ! 236: l_struct = true; ! 237: ! 238: /* ! 239: * Next time around, we will want to know that we have ! 240: * had a 'struct' ! 241: */ ! 242: case 4: /* one of the declaration keywords */ ! 243: if (ps.p_l_follow) { ! 244: ps.cast_mask |= 1 << ps.p_l_follow; ! 245: break; /* inside parens: cast */ ! 246: } ! 247: last_code = decl; ! 248: return (decl); ! 249: ! 250: case 5: /* if, while, for */ ! 251: return (sp_paren); ! 252: ! 253: case 6: /* do, else */ ! 254: return (sp_nparen); ! 255: ! 256: case 7: ! 257: ps.sizeof_keyword = true; ! 258: default: /* all others are treated like any other ! 259: * identifier */ ! 260: return (ident); ! 261: } /* end of switch */ ! 262: } /* end of if (found_it) */ ! 263: if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0 ! 264: && (buf_ptr[1] != ')' || buf_ptr[2] != ';')) { ! 265: strncpy(ps.procname, token, sizeof ps.procname - 1); ! 266: ps.in_parameter_declaration = 1; ! 267: } ! 268: ! 269: /* ! 270: * The following hack attempts to guess whether or not the current ! 271: * token is in fact a declaration keyword -- one that has been ! 272: * typedefd ! 273: */ ! 274: if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr)) ! 275: && !ps.p_l_follow ! 276: && (ps.last_token == rparen || ps.last_token == semicolon || ! 277: ps.last_token == decl || ! 278: ps.last_token == lbrace || ps.last_token == rbrace)) { ! 279: ps.its_a_keyword = true; ! 280: ps.last_u_d = true; ! 281: last_code = decl; ! 282: return decl; ! 283: } ! 284: if (last_code == decl) /* if this is a declared variable, then ! 285: * following sign is unary */ ! 286: ps.last_u_d = true; /* will make "int a -1" work */ ! 287: last_code = ident; ! 288: return (ident); /* the ident is not in the list */ ! 289: } /* end of procesing for alpanum character */ ! 290: /* Scan a non-alphanumeric token */ ! 291: ! 292: *tok++ = *buf_ptr; /* if it is only a one-character token, it ! 293: * is moved here */ ! 294: *tok = '\0'; ! 295: if (++buf_ptr >= buf_end) ! 296: fill_buffer(); ! 297: ! 298: switch (*token) { ! 299: case '\n': ! 300: unary_delim = ps.last_u_d; ! 301: ps.last_nl = true; /* remember that we just had a newline */ ! 302: code = (had_eof ? 0 : newline); ! 303: ! 304: /* ! 305: * if data has been exausted, the newline is a dummy, and we ! 306: * should return code to stop ! 307: */ ! 308: break; ! 309: ! 310: case '\'': /* start of quoted character */ ! 311: case '"': /* start of string */ ! 312: qchar = *token; ! 313: if (troff) { ! 314: tok[-1] = '`'; ! 315: if (qchar == '"') ! 316: *tok++ = '`'; ! 317: *tok++ = BACKSLASH; ! 318: *tok++ = 'f'; ! 319: *tok++ = 'L'; ! 320: } ! 321: do { /* copy the string */ ! 322: while (1) { /* move one character or [/<char>]<char> */ ! 323: if (*buf_ptr == '\n') { ! 324: printf("%d: Unterminated literal\n", line_no); ! 325: goto stop_lit; ! 326: } ! 327: *tok = *buf_ptr++; ! 328: if (buf_ptr >= buf_end) ! 329: fill_buffer(); ! 330: if (had_eof || ((tok - token) > (bufsize - 2))) { ! 331: printf("Unterminated literal\n"); ! 332: ++tok; ! 333: goto stop_lit; ! 334: /* get outof literal copying loop */ ! 335: } ! 336: if (*tok == BACKSLASH) { /* if escape, copy extra ! 337: * char */ ! 338: if (*buf_ptr == '\n') /* check for escaped ! 339: * newline */ ! 340: ++line_no; ! 341: if (troff) { ! 342: *++tok = BACKSLASH; ! 343: if (*buf_ptr == BACKSLASH) ! 344: *++tok = BACKSLASH; ! 345: } ! 346: *++tok = *buf_ptr++; ! 347: ++tok; /* we must increment this again because we ! 348: * copied two chars */ ! 349: if (buf_ptr >= buf_end) ! 350: fill_buffer(); ! 351: } ! 352: else ! 353: break; /* we copied one character */ ! 354: } /* end of while (1) */ ! 355: } while (*tok++ != qchar); ! 356: if (troff) { ! 357: tok[-1] = BACKSLASH; ! 358: *tok++ = 'f'; ! 359: *tok++ = 'R'; ! 360: *tok++ = '\''; ! 361: if (qchar == '"') ! 362: *tok++ = '\''; ! 363: } ! 364: stop_lit: ! 365: code = ident; ! 366: break; ! 367: ! 368: case ('('): ! 369: case ('['): ! 370: unary_delim = true; ! 371: code = lparen; ! 372: break; ! 373: ! 374: case (')'): ! 375: case (']'): ! 376: code = rparen; ! 377: break; ! 378: ! 379: case '#': ! 380: unary_delim = ps.last_u_d; ! 381: code = preesc; ! 382: break; ! 383: ! 384: case '?': ! 385: unary_delim = true; ! 386: code = question; ! 387: break; ! 388: ! 389: case (':'): ! 390: code = colon; ! 391: unary_delim = true; ! 392: break; ! 393: ! 394: case (';'): ! 395: unary_delim = true; ! 396: code = semicolon; ! 397: break; ! 398: ! 399: case ('{'): ! 400: unary_delim = true; ! 401: ! 402: /* ! 403: * if (ps.in_or_st) ps.block_init = 1; ! 404: */ ! 405: code = ps.block_init ? lparen : lbrace; ! 406: break; ! 407: ! 408: case ('}'): ! 409: unary_delim = true; ! 410: code = ps.block_init ? rparen : rbrace; ! 411: break; ! 412: ! 413: case 014: /* a form feed */ ! 414: unary_delim = ps.last_u_d; ! 415: ps.last_nl = true; /* remember this so we can set 'ps.col_1' ! 416: * right */ ! 417: code = form_feed; ! 418: break; ! 419: ! 420: case (','): ! 421: unary_delim = true; ! 422: code = comma; ! 423: break; ! 424: ! 425: case '.': ! 426: unary_delim = false; ! 427: code = period; ! 428: break; ! 429: ! 430: case '-': ! 431: case '+': /* check for -, +, --, ++ */ ! 432: code = (ps.last_u_d ? unary_op : binary_op); ! 433: unary_delim = true; ! 434: ! 435: if (*buf_ptr == token[0]) { ! 436: /* check for doubled character */ ! 437: *tok++ = *buf_ptr++; ! 438: /* buffer overflow will be checked at end of loop */ ! 439: if (last_code == ident || last_code == rparen) { ! 440: code = (ps.last_u_d ? unary_op : postop); ! 441: /* check for following ++ or -- */ ! 442: unary_delim = false; ! 443: } ! 444: } ! 445: else if (*buf_ptr == '=') ! 446: /* check for operator += */ ! 447: *tok++ = *buf_ptr++; ! 448: else if (token[0] == '-' && *buf_ptr == '>') { ! 449: /* check for operator -> */ ! 450: *tok++ = *buf_ptr++; ! 451: if (!pointer_as_binop) { ! 452: code = unary_op; ! 453: unary_delim = false; ! 454: ps.want_blank = false; ! 455: } ! 456: } ! 457: /* buffer overflow will be checked at end of switch */ ! 458: ! 459: break; ! 460: ! 461: case '=': ! 462: if (ps.in_or_st) ! 463: ps.block_init = 1; ! 464: if (chartype[*buf_ptr] == opchar) { /* we have two char ! 465: * assignment */ ! 466: tok[-1] = *buf_ptr++; ! 467: if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr) ! 468: *tok++ = *buf_ptr++; ! 469: *tok++ = '='; /* Flip =+ to += */ ! 470: *tok = 0; ! 471: } ! 472: code = binary_op; ! 473: unary_delim = true; ! 474: break; ! 475: /* can drop thru!!! */ ! 476: ! 477: case '>': ! 478: case '<': ! 479: case '!': /* ops like <, <<, <=, !=, etc */ ! 480: if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { ! 481: *tok++ = *buf_ptr; ! 482: if (++buf_ptr >= buf_end) ! 483: fill_buffer(); ! 484: } ! 485: if (*buf_ptr == '=') ! 486: *tok++ = *buf_ptr++; ! 487: code = (ps.last_u_d ? unary_op : binary_op); ! 488: unary_delim = true; ! 489: break; ! 490: ! 491: default: ! 492: if (token[0] == '/' && *buf_ptr == '*') { ! 493: /* it is start of comment */ ! 494: *tok++ = '*'; ! 495: ! 496: if (++buf_ptr >= buf_end) ! 497: fill_buffer(); ! 498: ! 499: code = comment; ! 500: unary_delim = ps.last_u_d; ! 501: break; ! 502: } ! 503: while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') { ! 504: /* handle ||, &&, etc, and also things as in int *****i */ ! 505: *tok++ = *buf_ptr; ! 506: if (++buf_ptr >= buf_end) ! 507: fill_buffer(); ! 508: } ! 509: code = (ps.last_u_d ? unary_op : binary_op); ! 510: unary_delim = true; ! 511: ! 512: ! 513: } /* end of switch */ ! 514: if (code != newline) { ! 515: l_struct = false; ! 516: last_code = code; ! 517: } ! 518: if (buf_ptr >= buf_end) /* check for input buffer empty */ ! 519: fill_buffer(); ! 520: ps.last_u_d = unary_delim; ! 521: *tok = '\0'; /* null terminate the token */ ! 522: return (code); ! 523: }; ! 524: ! 525: /* Add the given keyword to the keyword table, using val as the keyword type ! 526: */ ! 527: addkey (key, val) ! 528: char *key; ! 529: { ! 530: register struct templ *p = specials; ! 531: while (p->rwd) ! 532: if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) ! 533: return; ! 534: else ! 535: p++; ! 536: if (p >= specials + sizeof specials / sizeof specials[0]) ! 537: return; /* For now, table overflows are silently ! 538: ignored */ ! 539: p->rwd = key; ! 540: p->rwcode = val; ! 541: p[1].rwd = 0; ! 542: p[1].rwcode = 0; ! 543: return; ! 544: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.