|
|
1.1 root 1: /*
2: * Copyright (c) 1980 Regents of the University of California.
3: * All rights reserved. The Berkeley software License Agreement
4: * specifies the terms and conditions for redistribution.
5: */
6:
7: #ifndef lint
8: static char sccsid[] = "@(#)lexi.c 5.4 (Berkeley) 9/10/85";
9: #endif not lint
10:
11: /*-
12: *
13: * Copyright (C) 1976
14: * by the
15: * Board of Trustees
16: * of the
17: * University of Illinois
18: *
19: * All rights reserved
20: *
21: *
22: * NAME:
23: * lexi
24: *
25: * FUNCTION:
26: * This is the token scanner for indent
27: *
28: * ALGORITHM:
29: * 1) Strip off intervening blanks and/or tabs.
30: * 2) If it is an alphanumeric token, move it to the token buffer "token".
31: * Check if it is a special reserved word that indent will want to
32: * know about.
33: * 3) Non-alphanumeric tokens are handled with a big switch statement. A
34: * flag is kept to remember if the last token was a "unary delimiter",
35: * which forces a following operator to be unary as opposed to binary.
36: *
37: * PARAMETERS:
38: * None
39: *
40: * RETURNS:
41: * An integer code indicating the type of token scanned.
42: *
43: * GLOBALS:
44: * buf_ptr =
45: * had_eof
46: * ps.last_u_d = Set to true iff this token is a "unary delimiter"
47: *
48: * CALLS:
49: * fill_buffer
50: * printf (lib)
51: *
52: * CALLED BY:
53: * main
54: *
55: * NOTES:
56: * Start of comment is passed back so that the comment can be scanned by
57: * pr_comment.
58: *
59: * Strings and character literals are returned just like identifiers.
60: *
61: * HISTORY:
62: * initial coding November 1976 D A Willcox of CAC
63: * 1/7/77 D A Willcox of CAC Fix to provide proper handling
64: * of "int a -1;"
65: *
66: */
67:
68: /*
69: * Here we have the token scanner for indent. It scans off one token and
70: * puts it in the global variable "token". It returns a code, indicating
71: * the type of token scanned.
72: */
73:
74: #include "indent_globs.h";
75: #include "indent_codes.h";
76: #include "ctype.h"
77:
78: #define alphanum 1
79: #define opchar 3
80:
81: struct templ {
82: char *rwd;
83: int rwcode;
84: };
85:
86: struct templ specials[100] =
87: {
88: "switch", 1,
89: "case", 2,
90: "break", 0,
91: "struct", 3,
92: "union", 3,
93: "enum", 3,
94: "default", 2,
95: "int", 4,
96: "char", 4,
97: "float", 4,
98: "double", 4,
99: "long", 4,
100: "short", 4,
101: "typdef", 4,
102: "unsigned", 4,
103: "register", 4,
104: "static", 4,
105: "global", 4,
106: "extern", 4,
107: "void", 4,
108: "goto", 0,
109: "return", 0,
110: "if", 5,
111: "while", 5,
112: "for", 5,
113: "else", 6,
114: "do", 6,
115: "sizeof", 7,
116: 0, 0
117: };
118:
119: char chartype[128] =
120: { /* this is used to facilitate the decision
121: * of what type (alphanumeric, operator)
122: * each character is */
123: 0, 0, 0, 0, 0, 0, 0, 0,
124: 0, 0, 0, 0, 0, 0, 0, 0,
125: 0, 0, 0, 0, 0, 0, 0, 0,
126: 0, 0, 0, 0, 0, 0, 0, 0,
127: 0, 3, 0, 0, 0, 3, 3, 0,
128: 0, 0, 3, 3, 0, 3, 3, 3,
129: 1, 1, 1, 1, 1, 1, 1, 1,
130: 1, 1, 0, 0, 3, 3, 3, 3,
131: 0, 1, 1, 1, 1, 1, 1, 1,
132: 1, 1, 1, 1, 1, 1, 1, 1,
133: 1, 1, 1, 1, 1, 1, 1, 1,
134: 1, 1, 1, 0, 0, 0, 3, 1,
135: 0, 1, 1, 1, 1, 1, 1, 1,
136: 1, 1, 1, 1, 1, 1, 1, 1,
137: 1, 1, 1, 1, 1, 1, 1, 1,
138: 1, 1, 1, 0, 3, 0, 3, 0
139: };
140:
141:
142:
143:
144: int
145: lexi()
146: {
147: register char *tok; /* local pointer to next char in token */
148: int unary_delim; /* this is set to 1 if the current token
149: *
150: * forces a following operator to be unary */
151: static int last_code; /* the last token type returned */
152: static int l_struct; /* set to 1 if the last token was 'struct' */
153: int code; /* internal code to be returned */
154: char qchar; /* the delimiter character for a string */
155:
156: tok = token; /* point to start of place to save token */
157: unary_delim = false;
158: ps.col_1 = ps.last_nl; /* tell world that this token started in
159: * column 1 iff the last thing scanned was
160: * nl */
161: ps.last_nl = false;
162:
163: while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
164: ps.col_1 = false; /* leading blanks imply token is not in
165: * column 1 */
166: if (++buf_ptr >= buf_end)
167: fill_buffer();
168: }
169:
170: /* Scan an alphanumeric token. Note that we must also handle
171: * stuff like "1.0e+03" and "7e-6". */
172: if (chartype[*buf_ptr & 0177] == alphanum) { /* we have a character
173: * or number */
174: register char *j; /* used for searching thru list of
175: * reserved words */
176: register struct templ *p;
177: register int c;
178:
179: do { /* copy it over */
180: *tok++ = *buf_ptr++;
181: if (buf_ptr >= buf_end)
182: fill_buffer();
183: } while (chartype[c = *buf_ptr & 0177] == alphanum ||
184: isdigit(token[0]) && (c == '+' || c == '-') &&
185: (tok[-1] == 'e' || tok[-1] == 'E'));
186: *tok++ = '\0';
187: while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
188: if (++buf_ptr >= buf_end)
189: fill_buffer();
190: }
191: ps.its_a_keyword = false;
192: ps.sizeof_keyword = false;
193: if (l_struct) { /* if last token was 'struct', then this
194: * token should be treated as a
195: * declaration */
196: l_struct = false;
197: last_code = ident;
198: ps.last_u_d = true;
199: return (decl);
200: }
201: ps.last_u_d = false; /* Operator after indentifier is binary */
202: last_code = ident; /* Remember that this is the code we will
203: * return */
204:
205: /*
206: * This loop will check if the token is a keyword.
207: */
208: for (p = specials; (j = p->rwd) != 0; p++) {
209: tok = token; /* point at scanned token */
210: if (*j++ != *tok++ || *j++ != *tok++)
211: continue; /* This test depends on the fact that
212: * identifiers are always at least 1
213: * character long (ie. the first two bytes
214: * of the identifier are always
215: * meaningful) */
216: if (tok[-1] == 0)
217: break; /* If its a one-character identifier */
218: while (*tok++ == *j)
219: if (*j++ == 0)
220: goto found_keyword; /* I wish that C had a multi-level
221: * break... */
222: }
223: if (p->rwd) { /* we have a keyword */
224: found_keyword:
225: ps.its_a_keyword = true;
226: ps.last_u_d = true;
227: switch (p->rwcode) {
228: case 1: /* it is a switch */
229: return (swstmt);
230: case 2: /* a case or default */
231: return (casestmt);
232:
233: case 3: /* a "struct" */
234: if (ps.p_l_follow)
235: break; /* inside parens: cast */
236: l_struct = true;
237:
238: /*
239: * Next time around, we will want to know that we have
240: * had a 'struct'
241: */
242: case 4: /* one of the declaration keywords */
243: if (ps.p_l_follow) {
244: ps.cast_mask |= 1 << ps.p_l_follow;
245: break; /* inside parens: cast */
246: }
247: last_code = decl;
248: return (decl);
249:
250: case 5: /* if, while, for */
251: return (sp_paren);
252:
253: case 6: /* do, else */
254: return (sp_nparen);
255:
256: case 7:
257: ps.sizeof_keyword = true;
258: default: /* all others are treated like any other
259: * identifier */
260: return (ident);
261: } /* end of switch */
262: } /* end of if (found_it) */
263: if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0
264: && (buf_ptr[1] != ')' || buf_ptr[2] != ';')) {
265: strncpy(ps.procname, token, sizeof ps.procname - 1);
266: ps.in_parameter_declaration = 1;
267: }
268:
269: /*
270: * The following hack attempts to guess whether or not the current
271: * token is in fact a declaration keyword -- one that has been
272: * typedefd
273: */
274: if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr))
275: && !ps.p_l_follow
276: && (ps.last_token == rparen || ps.last_token == semicolon ||
277: ps.last_token == decl ||
278: ps.last_token == lbrace || ps.last_token == rbrace)) {
279: ps.its_a_keyword = true;
280: ps.last_u_d = true;
281: last_code = decl;
282: return decl;
283: }
284: if (last_code == decl) /* if this is a declared variable, then
285: * following sign is unary */
286: ps.last_u_d = true; /* will make "int a -1" work */
287: last_code = ident;
288: return (ident); /* the ident is not in the list */
289: } /* end of procesing for alpanum character */
290: /* Scan a non-alphanumeric token */
291:
292: *tok++ = *buf_ptr; /* if it is only a one-character token, it
293: * is moved here */
294: *tok = '\0';
295: if (++buf_ptr >= buf_end)
296: fill_buffer();
297:
298: switch (*token) {
299: case '\n':
300: unary_delim = ps.last_u_d;
301: ps.last_nl = true; /* remember that we just had a newline */
302: code = (had_eof ? 0 : newline);
303:
304: /*
305: * if data has been exausted, the newline is a dummy, and we
306: * should return code to stop
307: */
308: break;
309:
310: case '\'': /* start of quoted character */
311: case '"': /* start of string */
312: qchar = *token;
313: if (troff) {
314: tok[-1] = '`';
315: if (qchar == '"')
316: *tok++ = '`';
317: *tok++ = BACKSLASH;
318: *tok++ = 'f';
319: *tok++ = 'L';
320: }
321: do { /* copy the string */
322: while (1) { /* move one character or [/<char>]<char> */
323: if (*buf_ptr == '\n') {
324: printf("%d: Unterminated literal\n", line_no);
325: goto stop_lit;
326: }
327: *tok = *buf_ptr++;
328: if (buf_ptr >= buf_end)
329: fill_buffer();
330: if (had_eof || ((tok - token) > (bufsize - 2))) {
331: printf("Unterminated literal\n");
332: ++tok;
333: goto stop_lit;
334: /* get outof literal copying loop */
335: }
336: if (*tok == BACKSLASH) { /* if escape, copy extra
337: * char */
338: if (*buf_ptr == '\n') /* check for escaped
339: * newline */
340: ++line_no;
341: if (troff) {
342: *++tok = BACKSLASH;
343: if (*buf_ptr == BACKSLASH)
344: *++tok = BACKSLASH;
345: }
346: *++tok = *buf_ptr++;
347: ++tok; /* we must increment this again because we
348: * copied two chars */
349: if (buf_ptr >= buf_end)
350: fill_buffer();
351: }
352: else
353: break; /* we copied one character */
354: } /* end of while (1) */
355: } while (*tok++ != qchar);
356: if (troff) {
357: tok[-1] = BACKSLASH;
358: *tok++ = 'f';
359: *tok++ = 'R';
360: *tok++ = '\'';
361: if (qchar == '"')
362: *tok++ = '\'';
363: }
364: stop_lit:
365: code = ident;
366: break;
367:
368: case ('('):
369: case ('['):
370: unary_delim = true;
371: code = lparen;
372: break;
373:
374: case (')'):
375: case (']'):
376: code = rparen;
377: break;
378:
379: case '#':
380: unary_delim = ps.last_u_d;
381: code = preesc;
382: break;
383:
384: case '?':
385: unary_delim = true;
386: code = question;
387: break;
388:
389: case (':'):
390: code = colon;
391: unary_delim = true;
392: break;
393:
394: case (';'):
395: unary_delim = true;
396: code = semicolon;
397: break;
398:
399: case ('{'):
400: unary_delim = true;
401:
402: /*
403: * if (ps.in_or_st) ps.block_init = 1;
404: */
405: code = ps.block_init ? lparen : lbrace;
406: break;
407:
408: case ('}'):
409: unary_delim = true;
410: code = ps.block_init ? rparen : rbrace;
411: break;
412:
413: case 014: /* a form feed */
414: unary_delim = ps.last_u_d;
415: ps.last_nl = true; /* remember this so we can set 'ps.col_1'
416: * right */
417: code = form_feed;
418: break;
419:
420: case (','):
421: unary_delim = true;
422: code = comma;
423: break;
424:
425: case '.':
426: unary_delim = false;
427: code = period;
428: break;
429:
430: case '-':
431: case '+': /* check for -, +, --, ++ */
432: code = (ps.last_u_d ? unary_op : binary_op);
433: unary_delim = true;
434:
435: if (*buf_ptr == token[0]) {
436: /* check for doubled character */
437: *tok++ = *buf_ptr++;
438: /* buffer overflow will be checked at end of loop */
439: if (last_code == ident || last_code == rparen) {
440: code = (ps.last_u_d ? unary_op : postop);
441: /* check for following ++ or -- */
442: unary_delim = false;
443: }
444: }
445: else if (*buf_ptr == '=')
446: /* check for operator += */
447: *tok++ = *buf_ptr++;
448: else if (token[0] == '-' && *buf_ptr == '>') {
449: /* check for operator -> */
450: *tok++ = *buf_ptr++;
451: if (!pointer_as_binop) {
452: code = unary_op;
453: unary_delim = false;
454: ps.want_blank = false;
455: }
456: }
457: /* buffer overflow will be checked at end of switch */
458:
459: break;
460:
461: case '=':
462: if (ps.in_or_st)
463: ps.block_init = 1;
464: if (chartype[*buf_ptr] == opchar) { /* we have two char
465: * assignment */
466: tok[-1] = *buf_ptr++;
467: if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr)
468: *tok++ = *buf_ptr++;
469: *tok++ = '='; /* Flip =+ to += */
470: *tok = 0;
471: }
472: code = binary_op;
473: unary_delim = true;
474: break;
475: /* can drop thru!!! */
476:
477: case '>':
478: case '<':
479: case '!': /* ops like <, <<, <=, !=, etc */
480: if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
481: *tok++ = *buf_ptr;
482: if (++buf_ptr >= buf_end)
483: fill_buffer();
484: }
485: if (*buf_ptr == '=')
486: *tok++ = *buf_ptr++;
487: code = (ps.last_u_d ? unary_op : binary_op);
488: unary_delim = true;
489: break;
490:
491: default:
492: if (token[0] == '/' && *buf_ptr == '*') {
493: /* it is start of comment */
494: *tok++ = '*';
495:
496: if (++buf_ptr >= buf_end)
497: fill_buffer();
498:
499: code = comment;
500: unary_delim = ps.last_u_d;
501: break;
502: }
503: while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') {
504: /* handle ||, &&, etc, and also things as in int *****i */
505: *tok++ = *buf_ptr;
506: if (++buf_ptr >= buf_end)
507: fill_buffer();
508: }
509: code = (ps.last_u_d ? unary_op : binary_op);
510: unary_delim = true;
511:
512:
513: } /* end of switch */
514: if (code != newline) {
515: l_struct = false;
516: last_code = code;
517: }
518: if (buf_ptr >= buf_end) /* check for input buffer empty */
519: fill_buffer();
520: ps.last_u_d = unary_delim;
521: *tok = '\0'; /* null terminate the token */
522: return (code);
523: };
524:
525: /* Add the given keyword to the keyword table, using val as the keyword type
526: */
527: addkey (key, val)
528: char *key;
529: {
530: register struct templ *p = specials;
531: while (p->rwd)
532: if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
533: return;
534: else
535: p++;
536: if (p >= specials + sizeof specials / sizeof specials[0])
537: return; /* For now, table overflows are silently
538: ignored */
539: p->rwd = key;
540: p->rwcode = val;
541: p[1].rwd = 0;
542: p[1].rwcode = 0;
543: return;
544: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.