|
|
1.1 root 1: /*
2: * Copyright (c) 1980 Regents of the University of California.
3: * Copyright (c) 1976 Board of Trustees of the University of Illinois.
4: * All rights reserved.
5: *
6: * Redistribution and use in source and binary forms are permitted
7: * provided that the above copyright notice and this paragraph are
8: * duplicated in all such forms and that any documentation,
9: * advertising materials, and other materials related to such
10: * distribution and use acknowledge that the software was developed
11: * by the University of California, Berkeley and the University
12: * of Illinois, Urbana. The name of either
13: * University may not be used to endorse or promote products derived
14: * from this software without specific prior written permission.
15: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
16: * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
17: * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
18: */
19:
20: #ifndef lint
21: static char sccsid[] = "@(#)lexi.c 5.8 (Berkeley) 6/29/88";
22: #endif /* not lint */
23:
24: /*
25: * NAME:
26: * lexi
27: *
28: * FUNCTION:
29: * This is the token scanner for indent
30: *
31: * ALGORITHM:
32: * 1) Strip off intervening blanks and/or tabs.
33: * 2) If it is an alphanumeric token, move it to the token buffer "token".
34: * Check if it is a special reserved word that indent will want to
35: * know about.
36: * 3) Non-alphanumeric tokens are handled with a big switch statement. A
37: * flag is kept to remember if the last token was a "unary delimiter",
38: * which forces a following operator to be unary as opposed to binary.
39: *
40: * PARAMETERS:
41: * None
42: *
43: * RETURNS:
44: * An integer code indicating the type of token scanned.
45: *
46: * GLOBALS:
47: * buf_ptr =
48: * had_eof
49: * ps.last_u_d = Set to true iff this token is a "unary delimiter"
50: *
51: * CALLS:
52: * fill_buffer
53: * printf (lib)
54: *
55: * CALLED BY:
56: * main
57: *
58: * NOTES:
59: * Start of comment is passed back so that the comment can be scanned by
60: * pr_comment.
61: *
62: * Strings and character literals are returned just like identifiers.
63: *
64: * HISTORY:
65: * initial coding November 1976 D A Willcox of CAC
66: * 1/7/77 D A Willcox of CAC Fix to provide proper handling
67: * of "int a -1;"
68: *
69: */
70:
71: /*
72: * Here we have the token scanner for indent. It scans off one token and
73: * puts it in the global variable "token". It returns a code, indicating
74: * the type of token scanned.
75: */
76:
77: #include "indent_globs.h"
78: #include "indent_codes.h"
79: #include "ctype.h"
80:
81: #define alphanum 1
82: #define opchar 3
83:
84: struct templ {
85: char *rwd;
86: int rwcode;
87: };
88:
89: struct templ specials[100] =
90: {
91: "switch", 1,
92: "case", 2,
93: "break", 0,
94: "struct", 3,
95: "union", 3,
96: "enum", 3,
97: "default", 2,
98: "int", 4,
99: "char", 4,
100: "float", 4,
101: "double", 4,
102: "long", 4,
103: "short", 4,
104: "typdef", 4,
105: "unsigned", 4,
106: "register", 4,
107: "static", 4,
108: "global", 4,
109: "extern", 4,
110: "void", 4,
111: "goto", 0,
112: "return", 0,
113: "if", 5,
114: "while", 5,
115: "for", 5,
116: "else", 6,
117: "do", 6,
118: "sizeof", 7,
119: 0, 0
120: };
121:
122: char chartype[128] =
123: { /* this is used to facilitate the decision
124: * of what type (alphanumeric, operator)
125: * each character is */
126: 0, 0, 0, 0, 0, 0, 0, 0,
127: 0, 0, 0, 0, 0, 0, 0, 0,
128: 0, 0, 0, 0, 0, 0, 0, 0,
129: 0, 0, 0, 0, 0, 0, 0, 0,
130: 0, 3, 0, 0, 1, 3, 3, 0,
131: 0, 0, 3, 3, 0, 3, 3, 3,
132: 1, 1, 1, 1, 1, 1, 1, 1,
133: 1, 1, 0, 0, 3, 3, 3, 3,
134: 0, 1, 1, 1, 1, 1, 1, 1,
135: 1, 1, 1, 1, 1, 1, 1, 1,
136: 1, 1, 1, 1, 1, 1, 1, 1,
137: 1, 1, 1, 0, 0, 0, 3, 1,
138: 0, 1, 1, 1, 1, 1, 1, 1,
139: 1, 1, 1, 1, 1, 1, 1, 1,
140: 1, 1, 1, 1, 1, 1, 1, 1,
141: 1, 1, 1, 0, 3, 0, 3, 0
142: };
143:
144:
145:
146:
147: int
148: lexi()
149: {
150: register char *tok; /* local pointer to next char in token */
151: int unary_delim; /* this is set to 1 if the current token
152: *
153: * forces a following operator to be unary */
154: static int last_code; /* the last token type returned */
155: static int l_struct; /* set to 1 if the last token was 'struct' */
156: int code; /* internal code to be returned */
157: char qchar; /* the delimiter character for a string */
158:
159: tok = token; /* point to start of place to save token */
160: unary_delim = false;
161: ps.col_1 = ps.last_nl; /* tell world that this token started in
162: * column 1 iff the last thing scanned was
163: * nl */
164: ps.last_nl = false;
165:
166: while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
167: ps.col_1 = false; /* leading blanks imply token is not in
168: * column 1 */
169: if (++buf_ptr >= buf_end)
170: fill_buffer();
171: }
172:
173: /* Scan an alphanumeric token. Note that we must also handle
174: * stuff like "1.0e+03" and "7e-6". */
175: if (chartype[*buf_ptr & 0177] == alphanum) { /* we have a character
176: * or number */
177: register char *j; /* used for searching thru list of
178: * reserved words */
179: register struct templ *p;
180: register int c;
181:
182: do { /* copy it over */
183: *tok++ = *buf_ptr++;
184: if (buf_ptr >= buf_end)
185: fill_buffer();
186: } while (chartype[c = *buf_ptr & 0177] == alphanum ||
187: isdigit(token[0]) && (c == '+' || c == '-') &&
188: (tok[-1] == 'e' || tok[-1] == 'E'));
189: *tok++ = '\0';
190: while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
191: if (++buf_ptr >= buf_end)
192: fill_buffer();
193: }
194: ps.its_a_keyword = false;
195: ps.sizeof_keyword = false;
196: if (l_struct) { /* if last token was 'struct', then this
197: * token should be treated as a
198: * declaration */
199: l_struct = false;
200: last_code = ident;
201: ps.last_u_d = true;
202: return (decl);
203: }
204: ps.last_u_d = false; /* Operator after indentifier is binary */
205: last_code = ident; /* Remember that this is the code we will
206: * return */
207:
208: /*
209: * This loop will check if the token is a keyword.
210: */
211: for (p = specials; (j = p->rwd) != 0; p++) {
212: tok = token; /* point at scanned token */
213: if (*j++ != *tok++ || *j++ != *tok++)
214: continue; /* This test depends on the fact that
215: * identifiers are always at least 1
216: * character long (ie. the first two bytes
217: * of the identifier are always
218: * meaningful) */
219: if (tok[-1] == 0)
220: break; /* If its a one-character identifier */
221: while (*tok++ == *j)
222: if (*j++ == 0)
223: goto found_keyword; /* I wish that C had a multi-level
224: * break... */
225: }
226: if (p->rwd) { /* we have a keyword */
227: found_keyword:
228: ps.its_a_keyword = true;
229: ps.last_u_d = true;
230: switch (p->rwcode) {
231: case 1: /* it is a switch */
232: return (swstmt);
233: case 2: /* a case or default */
234: return (casestmt);
235:
236: case 3: /* a "struct" */
237: if (ps.p_l_follow)
238: break; /* inside parens: cast */
239: l_struct = true;
240:
241: /*
242: * Next time around, we will want to know that we have
243: * had a 'struct'
244: */
245: case 4: /* one of the declaration keywords */
246: if (ps.p_l_follow) {
247: ps.cast_mask |= 1 << ps.p_l_follow;
248: break; /* inside parens: cast */
249: }
250: last_code = decl;
251: return (decl);
252:
253: case 5: /* if, while, for */
254: return (sp_paren);
255:
256: case 6: /* do, else */
257: return (sp_nparen);
258:
259: case 7:
260: ps.sizeof_keyword = true;
261: default: /* all others are treated like any other
262: * identifier */
263: return (ident);
264: } /* end of switch */
265: } /* end of if (found_it) */
266: if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0
267: && (buf_ptr[1] != ')' || buf_ptr[2] != ';')) {
268: strncpy(ps.procname, token, sizeof ps.procname - 1);
269: ps.in_parameter_declaration = 1;
270: }
271:
272: /*
273: * The following hack attempts to guess whether or not the current
274: * token is in fact a declaration keyword -- one that has been
275: * typedefd
276: */
277: if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr))
278: && !ps.p_l_follow
279: && (ps.last_token == rparen || ps.last_token == semicolon ||
280: ps.last_token == decl ||
281: ps.last_token == lbrace || ps.last_token == rbrace)) {
282: ps.its_a_keyword = true;
283: ps.last_u_d = true;
284: last_code = decl;
285: return decl;
286: }
287: if (last_code == decl) /* if this is a declared variable, then
288: * following sign is unary */
289: ps.last_u_d = true; /* will make "int a -1" work */
290: last_code = ident;
291: return (ident); /* the ident is not in the list */
292: } /* end of procesing for alpanum character */
293: /* Scan a non-alphanumeric token */
294:
295: *tok++ = *buf_ptr; /* if it is only a one-character token, it
296: * is moved here */
297: *tok = '\0';
298: if (++buf_ptr >= buf_end)
299: fill_buffer();
300:
301: switch (*token) {
302: case '\n':
303: unary_delim = ps.last_u_d;
304: ps.last_nl = true; /* remember that we just had a newline */
305: code = (had_eof ? 0 : newline);
306:
307: /*
308: * if data has been exausted, the newline is a dummy, and we
309: * should return code to stop
310: */
311: break;
312:
313: case '\'': /* start of quoted character */
314: case '"': /* start of string */
315: qchar = *token;
316: if (troff) {
317: tok[-1] = '`';
318: if (qchar == '"')
319: *tok++ = '`';
320: *tok++ = BACKSLASH;
321: *tok++ = 'f';
322: *tok++ = 'L';
323: }
324: do { /* copy the string */
325: while (1) { /* move one character or [/<char>]<char> */
326: if (*buf_ptr == '\n') {
327: printf("%d: Unterminated literal\n", line_no);
328: goto stop_lit;
329: }
330: *tok = *buf_ptr++;
331: if (buf_ptr >= buf_end)
332: fill_buffer();
333: if (had_eof || ((tok - token) > (bufsize - 2))) {
334: printf("Unterminated literal\n");
335: ++tok;
336: goto stop_lit;
337: /* get outof literal copying loop */
338: }
339: if (*tok == BACKSLASH) { /* if escape, copy extra
340: * char */
341: if (*buf_ptr == '\n') /* check for escaped
342: * newline */
343: ++line_no;
344: if (troff) {
345: *++tok = BACKSLASH;
346: if (*buf_ptr == BACKSLASH)
347: *++tok = BACKSLASH;
348: }
349: *++tok = *buf_ptr++;
350: ++tok; /* we must increment this again because we
351: * copied two chars */
352: if (buf_ptr >= buf_end)
353: fill_buffer();
354: }
355: else
356: break; /* we copied one character */
357: } /* end of while (1) */
358: } while (*tok++ != qchar);
359: if (troff) {
360: tok[-1] = BACKSLASH;
361: *tok++ = 'f';
362: *tok++ = 'R';
363: *tok++ = '\'';
364: if (qchar == '"')
365: *tok++ = '\'';
366: }
367: stop_lit:
368: code = ident;
369: break;
370:
371: case ('('):
372: case ('['):
373: unary_delim = true;
374: code = lparen;
375: break;
376:
377: case (')'):
378: case (']'):
379: code = rparen;
380: break;
381:
382: case '#':
383: unary_delim = ps.last_u_d;
384: code = preesc;
385: break;
386:
387: case '?':
388: unary_delim = true;
389: code = question;
390: break;
391:
392: case (':'):
393: code = colon;
394: unary_delim = true;
395: break;
396:
397: case (';'):
398: unary_delim = true;
399: code = semicolon;
400: break;
401:
402: case ('{'):
403: unary_delim = true;
404:
405: /*
406: * if (ps.in_or_st) ps.block_init = 1;
407: */
408: code = ps.block_init ? lparen : lbrace;
409: break;
410:
411: case ('}'):
412: unary_delim = true;
413: code = ps.block_init ? rparen : rbrace;
414: break;
415:
416: case 014: /* a form feed */
417: unary_delim = ps.last_u_d;
418: ps.last_nl = true; /* remember this so we can set 'ps.col_1'
419: * right */
420: code = form_feed;
421: break;
422:
423: case (','):
424: unary_delim = true;
425: code = comma;
426: break;
427:
428: case '.':
429: unary_delim = false;
430: code = period;
431: break;
432:
433: case '-':
434: case '+': /* check for -, +, --, ++ */
435: code = (ps.last_u_d ? unary_op : binary_op);
436: unary_delim = true;
437:
438: if (*buf_ptr == token[0]) {
439: /* check for doubled character */
440: *tok++ = *buf_ptr++;
441: /* buffer overflow will be checked at end of loop */
442: if (last_code == ident || last_code == rparen) {
443: code = (ps.last_u_d ? unary_op : postop);
444: /* check for following ++ or -- */
445: unary_delim = false;
446: }
447: }
448: else if (*buf_ptr == '=')
449: /* check for operator += */
450: *tok++ = *buf_ptr++;
451: else if (token[0] == '-' && *buf_ptr == '>') {
452: /* check for operator -> */
453: *tok++ = *buf_ptr++;
454: if (!pointer_as_binop) {
455: code = unary_op;
456: unary_delim = false;
457: ps.want_blank = false;
458: }
459: }
460: /* buffer overflow will be checked at end of switch */
461:
462: break;
463:
464: case '=':
465: if (ps.in_or_st)
466: ps.block_init = 1;
467: if (chartype[*buf_ptr] == opchar) { /* we have two char
468: * assignment */
469: tok[-1] = *buf_ptr++;
470: if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr)
471: *tok++ = *buf_ptr++;
472: *tok++ = '='; /* Flip =+ to += */
473: *tok = 0;
474: }
475: code = binary_op;
476: unary_delim = true;
477: break;
478: /* can drop thru!!! */
479:
480: case '>':
481: case '<':
482: case '!': /* ops like <, <<, <=, !=, etc */
483: if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
484: *tok++ = *buf_ptr;
485: if (++buf_ptr >= buf_end)
486: fill_buffer();
487: }
488: if (*buf_ptr == '=')
489: *tok++ = *buf_ptr++;
490: code = (ps.last_u_d ? unary_op : binary_op);
491: unary_delim = true;
492: break;
493:
494: default:
495: if (token[0] == '/' && *buf_ptr == '*') {
496: /* it is start of comment */
497: *tok++ = '*';
498:
499: if (++buf_ptr >= buf_end)
500: fill_buffer();
501:
502: code = comment;
503: unary_delim = ps.last_u_d;
504: break;
505: }
506: while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') {
507: /* handle ||, &&, etc, and also things as in int *****i */
508: *tok++ = *buf_ptr;
509: if (++buf_ptr >= buf_end)
510: fill_buffer();
511: }
512: code = (ps.last_u_d ? unary_op : binary_op);
513: unary_delim = true;
514:
515:
516: } /* end of switch */
517: if (code != newline) {
518: l_struct = false;
519: last_code = code;
520: }
521: if (buf_ptr >= buf_end) /* check for input buffer empty */
522: fill_buffer();
523: ps.last_u_d = unary_delim;
524: *tok = '\0'; /* null terminate the token */
525: return (code);
526: };
527:
528: /* Add the given keyword to the keyword table, using val as the keyword type
529: */
530: addkey (key, val)
531: char *key;
532: {
533: register struct templ *p = specials;
534: while (p->rwd)
535: if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
536: return;
537: else
538: p++;
539: if (p >= specials + sizeof specials / sizeof specials[0])
540: return; /* For now, table overflows are silently
541: ignored */
542: p->rwd = key;
543: p->rwcode = val;
544: p[1].rwd = 0;
545: p[1].rwcode = 0;
546: return;
547: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.