|
|
1.1 ! root 1: static char sccsid[] = "@(#)lexi.c 4.1 (Berkeley) 10/21/82"; ! 2: ! 3: /* ! 4: ! 5: Copyright (C) 1976 ! 6: by the ! 7: Board of Trustees ! 8: of the ! 9: University of Illinois ! 10: ! 11: All rights reserved ! 12: ! 13: ! 14: NAME: ! 15: lexi ! 16: ! 17: FUNCTION: ! 18: This is the token scanner for indent ! 19: ! 20: ALGORITHM: ! 21: 1) Strip off intervening blanks and/or tabs. ! 22: 2) If it is an alphanumeric token, move it to the token buffer "token". ! 23: Check if it is a special reserved word that indent will want to ! 24: know about. ! 25: 3) Non-alphanumeric tokens are handled with a big switch statement. A ! 26: flag is kept to remember if the last token was a "unary delimiter", ! 27: which forces a following operator to be unary as opposed to binary. ! 28: ! 29: PARAMETERS: ! 30: None ! 31: ! 32: RETURNS: ! 33: An integer code indicating the type of token scanned. ! 34: ! 35: GLOBALS: ! 36: buf_ptr = ! 37: had_eof ! 38: last_u_d = Set to true iff this token is a "unary delimiter" ! 39: ! 40: CALLS: ! 41: fill_buffer ! 42: printf (lib) ! 43: ! 44: CALLED BY: ! 45: main ! 46: ! 47: NOTES: ! 48: Start of comment is passed back so that the comment can be scanned by ! 49: pr_comment. ! 50: ! 51: Strings and character literals are returned just like identifiers. ! 52: ! 53: HISTORY: ! 54: initial coding November 1976 D A Willcox of CAC ! 55: 1/7/77 D A Willcox of CAC Fix to provide proper handling ! 56: of "int a -1;" ! 57: ! 58: */ ! 59: ! 60: /* Here we have the token scanner for indent. It scans off one token and ! 61: puts it in the global variable "token". It returns a code, indicating the ! 62: type of token scanned. */ ! 63: ! 64: #include "indent_globs.h"; ! 65: #include "indent_codes.h"; ! 66: ! 67: ! 68: ! 69: #define alphanum 1 ! 70: #define opchar 3 ! 71: ! 72: struct templ { ! 73: char *rwd; ! 74: int rwcode; ! 75: }; ! 76: ! 77: struct templ specials[] = ! 78: { ! 79: "switch", 1, ! 80: "case", 2, ! 81: "struct", 3, ! 82: "default", 2, ! 83: "int", 4, ! 84: "char", 4, ! 85: "float", 4, ! 86: "double", 4, ! 87: "long", 4, ! 88: "short", 4, ! 89: "typdef", 4, ! 90: "unsigned", 4, ! 91: "register", 4, ! 92: "static", 4, ! 93: "global", 4, ! 94: "extern", 4, ! 95: "if", 5, ! 96: "while", 5, ! 97: "for", 5, ! 98: "else", 6, ! 99: "do", 6, ! 100: "sizeof", 0, ! 101: 0, 0 ! 102: }; ! 103: ! 104: char chartype[128] = ! 105: { /* this is used to facilitate the decision of what type ! 106: (alphanumeric, operator) each character is */ ! 107: 0, 0, 0, 0, 0, 0, 0, 0, ! 108: 0, 0, 0, 0, 0, 0, 0, 0, ! 109: 0, 0, 0, 0, 0, 0, 0, 0, ! 110: 0, 0, 0, 0, 0, 0, 0, 0, ! 111: 0, 3, 0, 0, 0, 3, 3, 0, ! 112: 0, 0, 3, 3, 0, 3, 3, 3, ! 113: 1, 1, 1, 1, 1, 1, 1, 1, ! 114: 1, 1, 0, 0, 3, 3, 3, 3, ! 115: 0, 1, 1, 1, 1, 1, 1, 1, ! 116: 1, 1, 1, 1, 1, 1, 1, 1, ! 117: 1, 1, 1, 1, 1, 1, 1, 1, ! 118: 1, 1, 1, 0, 0, 0, 3, 1, ! 119: 0, 1, 1, 1, 1, 1, 1, 1, ! 120: 1, 1, 1, 1, 1, 1, 1, 1, ! 121: 1, 1, 1, 1, 1, 1, 1, 1, ! 122: 1, 1, 1, 0, 3, 0, 3, 0 ! 123: }; ! 124: ! 125: int last_nl = true; ! 126: /* this is true if the last thing scanned was a newline */ ! 127: ! 128: ! 129: ! 130: int lexi () { ! 131: register char *tok; ! 132: /* local pointer to next char in token */ ! 133: register int i; ! 134: /* local loop counter */ ! 135: register char *j; ! 136: /* used for searching thru list of reserved words */ ! 137: int unary_delim; ! 138: /* this is set to 1 if the current token forces a following operator to be ! 139: unary */ ! 140: static int last_code; ! 141: /* the last token type returned */ ! 142: static int l_struct; ! 143: /* set to 1 if the last token was 'struct' */ ! 144: int found_it; ! 145: int code; /* internal code to be returned */ ! 146: char qchar; /* the delimiter character for a string */ ! 147: ! 148: tok = token; /* point to start of place to save token */ ! 149: unary_delim = false; ! 150: col_1 = last_nl; /* tell world that this token started in column ! 151: 1 iff the last thing scanned was nl */ ! 152: last_nl = false; ! 153: ! 154: while (*buf_ptr == ' ' || *buf_ptr == '\t') { ! 155: /* get rid of blanks */ ! 156: col_1 = false; /* leading blanks imply token is not in column 1 ! 157: */ ! 158: if (++buf_ptr >= buf_end) ! 159: fill_buffer (); ! 160: } ! 161: ! 162: /*----------------------------------------------------------*\ ! 163: | Scan an alphanumeric token ! 164: \*----------------------------------------------------------*/ ! 165: ! 166: if (chartype[*buf_ptr & 0177] == alphanum) { ! 167: /* we have a character or number */ ! 168: while (chartype[*buf_ptr & 0177] == alphanum) { ! 169: /* copy it over */ ! 170: *tok++ = *buf_ptr++; ! 171: if (buf_ptr >= buf_end) ! 172: fill_buffer (); ! 173: } ! 174: ! 175: *tok++ = '\0'; ! 176: ! 177: if (l_struct) { /* if last token was 'struct', then this token ! 178: should be treated as a declaration */ ! 179: l_struct = false; ! 180: last_code = ident; ! 181: last_u_d = true; ! 182: return (decl); ! 183: } ! 184: ! 185: last_u_d = false; /* operator after indentifier is binary */ ! 186: ! 187: for (i = 0; specials[i].rwd != 0; ++i) { ! 188: /* this loop will check if the token is a keyword. if so, a following ! 189: operator is unary */ ! 190: last_code = ident; /* remember that this is the code we will return ! 191: */ ! 192: j = specials[i].rwd; ! 193: /* point at ith reserved word */ ! 194: tok = token; /* point at scanned toekn */ ! 195: found_it = true; /* set to false if not found */ ! 196: do { ! 197: if (*tok++ != *j) { ! 198: found_it = false; ! 199: break; ! 200: } ! 201: } while (*j++); ! 202: ! 203: if (found_it) { /* we have a keyword */ ! 204: last_u_d = true; ! 205: switch (specials[i].rwcode) { ! 206: case 1: /* it is a switch */ ! 207: return (swstmt); ! 208: case 2: /* a case or default */ ! 209: return (casestmt); ! 210: ! 211: case 3: /* a "struct" */ ! 212: l_struct = true; ! 213: /* Next time around, we will want to know that we have had ! 214: a 'struct' */ ! 215: case 4: /* one of the declaration keywords */ ! 216: if(p_l_follow) break; /* inside parens: cast */ ! 217: last_code = decl; ! 218: return (decl); ! 219: ! 220: case 5: /* if, while, for */ ! 221: return (sp_paren); ! 222: ! 223: case 6: /* do, else */ ! 224: return (sp_nparen); ! 225: ! 226: default: /* all others are treated like any other ! 227: identifier */ ! 228: return (ident); ! 229: } /* end of switch */ ! 230: } /* end of if (found_it) */ ! 231: ! 232: } ! 233: ! 234: if (last_code == decl) /* if this is a declared variable, then ! 235: following sign is unary */ ! 236: last_u_d = true; /* will make "int a -1" work */ ! 237: last_code = ident; ! 238: return (ident); /* the ident is not in the list */ ! 239: } /* end of procesing for alpanum character */ ! 240: ! 241: ! 242: ! 243: /*----------------------------------------------------------*\ ! 244: | Scan a non-alphanumeric token ! 245: \*----------------------------------------------------------*/ ! 246: ! 247: *tok++ = *buf_ptr; /* if it is only a one-character token, it is ! 248: moved here */ ! 249: *tok = '\0'; ! 250: if (++buf_ptr >= buf_end) ! 251: fill_buffer (); ! 252: ! 253: switch (*token) { ! 254: case '\n': ! 255: unary_delim = last_u_d; ! 256: last_nl = true; /* remember that we just had a newline */ ! 257: code = (had_eof ? 0 : newline); ! 258: /* if data has been exausted, the newline is a dummy, and we should ! 259: return code to stop */ ! 260: break; ! 261: ! 262: case '\'': /* start of quoted character */ ! 263: qchar = '\''; /* remember final delimiter */ ! 264: goto copy_lit; /* and go to common literal code */ ! 265: ! 266: case '"': /* start of string */ ! 267: qchar = '"'; ! 268: ! 269: copy_lit: ! 270: do { /* copy the string */ ! 271: while (1) { /* move one character or [/<char>]<char> */ ! 272: if (*buf_ptr == '\n') { ! 273: /* check for unterminated literal */ ! 274: printf ("%d: Unterminated literal\n", line_no); ! 275: goto stop_lit; ! 276: /* Don't copy any more */ ! 277: } ! 278: ! 279: *tok = *buf_ptr++; ! 280: if (buf_ptr >= buf_end) ! 281: fill_buffer (); ! 282: if (had_eof || ((tok - token) > (bufsize - 2))) { ! 283: printf ("Unterminated literal\n"); ! 284: ++tok; ! 285: goto stop_lit; ! 286: /* get outof literal copying loop */ ! 287: } ! 288: ! 289: if (*tok == '\\') { ! 290: /* if escape, copy extra char */ ! 291: if (*buf_ptr == '\n') ! 292: /* check for escaped newline */ ! 293: ++line_no; ! 294: *(++tok) = *buf_ptr++; ! 295: ++tok; /* we must increment this again because we ! 296: copied two chars */ ! 297: if (buf_ptr >= buf_end) ! 298: fill_buffer (); ! 299: } ! 300: else ! 301: break; /* we copied one character */ ! 302: } /* end of while (1) */ ! 303: } while (*tok++ != qchar); ! 304: ! 305: stop_lit: ! 306: code = ident; ! 307: break; ! 308: ! 309: case ('('): ! 310: case ('['): ! 311: unary_delim = true; ! 312: code = lparen; ! 313: break; ! 314: ! 315: case (')'): ! 316: case (']'): ! 317: code = rparen; ! 318: break; ! 319: ! 320: case '#': ! 321: unary_delim = last_u_d; ! 322: code = preesc; ! 323: break; ! 324: ! 325: case '?': ! 326: unary_delim = true; ! 327: code = question; ! 328: break; ! 329: ! 330: case (':'): ! 331: code = colon; ! 332: unary_delim = true; ! 333: break; ! 334: ! 335: case (';'): ! 336: unary_delim = true; ! 337: code = semicolon; ! 338: break; ! 339: ! 340: case ('{'): ! 341: unary_delim = true; ! 342: code = lbrace; ! 343: break; ! 344: ! 345: case ('}'): ! 346: unary_delim = true; ! 347: code = rbrace; ! 348: break; ! 349: ! 350: case 014: /* a form feed */ ! 351: unary_delim = last_u_d; ! 352: last_nl = true; /* remember this so we can set 'col_1' right */ ! 353: code = form_feed; ! 354: break; ! 355: ! 356: case (','): ! 357: unary_delim = true; ! 358: code = comma; ! 359: break; ! 360: ! 361: case '.': ! 362: unary_delim = false; ! 363: code = period; ! 364: break; ! 365: ! 366: case '-': ! 367: case '+': /* check for -, +, --, ++ */ ! 368: code = (last_u_d ? unary_op : binary_op); ! 369: unary_delim = true; ! 370: ! 371: if (*buf_ptr == token[0]) { ! 372: /* check for doubled character */ ! 373: *tok++ = *buf_ptr++; ! 374: /* buffer overflow will be checked at end of loop */ ! 375: if (last_code == ident || last_code == rparen) { ! 376: code = (last_u_d ? unary_op : postop); ! 377: /* check for following ++ or -- */ ! 378: unary_delim = false; ! 379: } ! 380: } ! 381: else ! 382: if (*buf_ptr == '>' || *buf_ptr == '=') ! 383: /* check for operator -> or += */ ! 384: *tok++ = *buf_ptr++; ! 385: /* buffer overflow will be checked at end of switch */ ! 386: ! 387: break; ! 388: ! 389: case '=': ! 390: if (chartype[*buf_ptr] == opchar) { ! 391: /* we have two char assignment */ ! 392: *tok++ = *buf_ptr; ! 393: /* move second character */ ! 394: if (++buf_ptr >= buf_end) ! 395: fill_buffer (); ! 396: } ! 397: ! 398: code = binary_op; ! 399: unary_delim = true; ! 400: if (token[1] != '<' && token[1] != '>') ! 401: /* check for possible 3 char operator */ ! 402: break; ! 403: /* can drop thru!!! */ ! 404: ! 405: case '>': ! 406: case '<': ! 407: case '!': /* ops like <, <<, <=, !=, etc */ ! 408: if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { ! 409: *tok++ = *buf_ptr; ! 410: if (++buf_ptr >= buf_end) ! 411: fill_buffer (); ! 412: } ! 413: ! 414: if (*buf_ptr == '=') ! 415: *tok++ = *buf_ptr++; ! 416: code = (last_u_d ? unary_op : binary_op); ! 417: unary_delim = true; ! 418: break; ! 419: ! 420: default: ! 421: if (token[0] == '/' && *buf_ptr == '*') { ! 422: /* it is start of comment */ ! 423: *tok++ = '*'; ! 424: ! 425: if (++buf_ptr >= buf_end) ! 426: fill_buffer (); ! 427: ! 428: code = comment; ! 429: unary_delim = last_u_d; ! 430: break; ! 431: } ! 432: ! 433: while (*(tok - 1) == *buf_ptr || *buf_ptr=='=') { ! 434: /* handle ||, &&, etc, and also things as in int *****i */ ! 435: *tok++ = *buf_ptr; ! 436: if (++buf_ptr >= buf_end) ! 437: fill_buffer (); ! 438: } ! 439: ! 440: ! 441: code = (last_u_d ? unary_op : binary_op); ! 442: unary_delim = true; ! 443: ! 444: ! 445: } /* end of switch */ ! 446: ! 447: if (code != newline) { ! 448: l_struct = false; ! 449: last_code = code; ! 450: } ! 451: ! 452: if (buf_ptr >= buf_end) /* check for input buffer empty */ ! 453: fill_buffer (); ! 454: last_u_d = unary_delim; ! 455: *tok = '\0'; /* null terminate the token */ ! 456: return (code); ! 457: };
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.