|
|
1.1 root 1: /*
2: * Copyright (c) 1985 Sun Microsystems, Inc.
3: * Copyright (c) 1980 The Regents of the University of California.
4: * Copyright (c) 1976 Board of Trustees of the University of Illinois.
5: * All rights reserved.
6: *
7: * Redistribution and use in source and binary forms are permitted
8: * provided that: (1) source distributions retain this entire copyright
9: * notice and comment, and (2) distributions including binaries display
10: * the following acknowledgement: ``This product includes software
11: * developed by the University of California, Berkeley and its contributors''
12: * in the documentation or other materials provided with the distribution
13: * and in all advertising materials mentioning features or use of this
14: * software. Neither the name of the University nor the names of its
15: * contributors may be used to endorse or promote products derived
16: * from this software without specific prior written permission.
17: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
18: * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
19: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20: */
21:
22: #ifndef lint
23: static char sccsid[] = "@(#)lexi.c 5.15 (Berkeley) 6/1/90";
24: #endif /* not lint */
25:
26: /*
27: * Here we have the token scanner for indent. It scans off one token and puts
28: * it in the global variable "token". It returns a code, indicating the type
29: * of token scanned.
30: */
31:
32: #include "indent_globs.h"
33: #include "indent_codes.h"
34: #include <ctype.h>
35:
36: #define alphanum 1
37: #define opchar 3
38:
39: struct templ {
40: char *rwd;
41: int rwcode;
42: };
43:
44: struct templ specials[100] =
45: {
46: "switch", 1,
47: "case", 2,
48: "break", 0,
49: "struct", 3,
50: "union", 3,
51: "enum", 3,
52: "default", 2,
53: "int", 4,
54: "char", 4,
55: "float", 4,
56: "double", 4,
57: "long", 4,
58: "short", 4,
59: "typdef", 4,
60: "unsigned", 4,
61: "register", 4,
62: "static", 4,
63: "global", 4,
64: "extern", 4,
65: "void", 4,
66: "goto", 0,
67: "return", 0,
68: "if", 5,
69: "while", 5,
70: "for", 5,
71: "else", 6,
72: "do", 6,
73: "sizeof", 7,
74: 0, 0
75: };
76:
77: char chartype[128] =
78: { /* this is used to facilitate the decision of
79: * what type (alphanumeric, operator) each
80: * character is */
81: 0, 0, 0, 0, 0, 0, 0, 0,
82: 0, 0, 0, 0, 0, 0, 0, 0,
83: 0, 0, 0, 0, 0, 0, 0, 0,
84: 0, 0, 0, 0, 0, 0, 0, 0,
85: 0, 3, 0, 0, 1, 3, 3, 0,
86: 0, 0, 3, 3, 0, 3, 0, 3,
87: 1, 1, 1, 1, 1, 1, 1, 1,
88: 1, 1, 0, 0, 3, 3, 3, 3,
89: 0, 1, 1, 1, 1, 1, 1, 1,
90: 1, 1, 1, 1, 1, 1, 1, 1,
91: 1, 1, 1, 1, 1, 1, 1, 1,
92: 1, 1, 1, 0, 0, 0, 3, 1,
93: 0, 1, 1, 1, 1, 1, 1, 1,
94: 1, 1, 1, 1, 1, 1, 1, 1,
95: 1, 1, 1, 1, 1, 1, 1, 1,
96: 1, 1, 1, 0, 3, 0, 3, 0
97: };
98:
99:
100:
101:
102: int
103: lexi()
104: {
105: int unary_delim; /* this is set to 1 if the current token
106: *
107: * forces a following operator to be unary */
108: static int last_code; /* the last token type returned */
109: static int l_struct; /* set to 1 if the last token was 'struct' */
110: int code; /* internal code to be returned */
111: char qchar; /* the delimiter character for a string */
112:
113: e_token = s_token; /* point to start of place to save token */
114: unary_delim = false;
115: ps.col_1 = ps.last_nl; /* tell world that this token started in
116: * column 1 iff the last thing scanned was nl */
117: ps.last_nl = false;
118:
119: while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
120: ps.col_1 = false; /* leading blanks imply token is not in column
121: * 1 */
122: if (++buf_ptr >= buf_end)
123: fill_buffer();
124: }
125:
126: /* Scan an alphanumeric token */
127: if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
128: /*
129: * we have a character or number
130: */
131: register char *j; /* used for searching thru list of
132: *
133: * reserved words */
134: register struct templ *p;
135:
136: if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
137: int seendot = 0,
138: seenexp = 0;
139: if (*buf_ptr == '0' &&
140: (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
141: *e_token++ = *buf_ptr++;
142: *e_token++ = *buf_ptr++;
143: while (isxdigit(*buf_ptr)) {
144: CHECK_SIZE_TOKEN;
145: *e_token++ = *buf_ptr++;
146: }
147: }
148: else
149: while (1) {
150: if (*buf_ptr == '.')
151: if (seendot)
152: break;
153: else
154: seendot++;
155: CHECK_SIZE_TOKEN;
156: *e_token++ = *buf_ptr++;
157: if (!isdigit(*buf_ptr) && *buf_ptr != '.')
158: if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
159: break;
160: else {
161: seenexp++;
162: seendot++;
163: CHECK_SIZE_TOKEN;
164: *e_token++ = *buf_ptr++;
165: if (*buf_ptr == '+' || *buf_ptr == '-')
166: *e_token++ = *buf_ptr++;
167: }
168: }
169: if (*buf_ptr == 'L' || *buf_ptr == 'l')
170: *e_token++ = *buf_ptr++;
171: }
172: else
173: while (chartype[*buf_ptr] == alphanum) { /* copy it over */
174: CHECK_SIZE_TOKEN;
175: *e_token++ = *buf_ptr++;
176: if (buf_ptr >= buf_end)
177: fill_buffer();
178: }
179: *e_token++ = '\0';
180: while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
181: if (++buf_ptr >= buf_end)
182: fill_buffer();
183: }
184: ps.its_a_keyword = false;
185: ps.sizeof_keyword = false;
186: if (l_struct) { /* if last token was 'struct', then this token
187: * should be treated as a declaration */
188: l_struct = false;
189: last_code = ident;
190: ps.last_u_d = true;
191: return (decl);
192: }
193: ps.last_u_d = false; /* Operator after indentifier is binary */
194: last_code = ident; /* Remember that this is the code we will
195: * return */
196:
197: /*
198: * This loop will check if the token is a keyword.
199: */
200: for (p = specials; (j = p->rwd) != 0; p++) {
201: register char *p = s_token; /* point at scanned token */
202: if (*j++ != *p++ || *j++ != *p++)
203: continue; /* This test depends on the fact that
204: * identifiers are always at least 1 character
205: * long (ie. the first two bytes of the
206: * identifier are always meaningful) */
207: if (p[-1] == 0)
208: break; /* If its a one-character identifier */
209: while (*p++ == *j)
210: if (*j++ == 0)
211: goto found_keyword; /* I wish that C had a multi-level
212: * break... */
213: }
214: if (p->rwd) { /* we have a keyword */
215: found_keyword:
216: ps.its_a_keyword = true;
217: ps.last_u_d = true;
218: switch (p->rwcode) {
219: case 1: /* it is a switch */
220: return (swstmt);
221: case 2: /* a case or default */
222: return (casestmt);
223:
224: case 3: /* a "struct" */
225: if (ps.p_l_follow)
226: break; /* inside parens: cast */
227: l_struct = true;
228:
229: /*
230: * Next time around, we will want to know that we have had a
231: * 'struct'
232: */
233: case 4: /* one of the declaration keywords */
234: if (ps.p_l_follow) {
235: ps.cast_mask |= 1 << ps.p_l_follow;
236: break; /* inside parens: cast */
237: }
238: last_code = decl;
239: return (decl);
240:
241: case 5: /* if, while, for */
242: return (sp_paren);
243:
244: case 6: /* do, else */
245: return (sp_nparen);
246:
247: case 7:
248: ps.sizeof_keyword = true;
249: default: /* all others are treated like any other
250: * identifier */
251: return (ident);
252: } /* end of switch */
253: } /* end of if (found_it) */
254: if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
255: register char *tp = buf_ptr;
256: while (tp < buf_end)
257: if (*tp++ == ')' && (*tp == ';' || *tp == ','))
258: goto not_proc;
259: strncpy(ps.procname, token, sizeof ps.procname - 1);
260: ps.in_parameter_declaration = 1;
261: rparen_count = 1;
262: not_proc:;
263: }
264: /*
265: * The following hack attempts to guess whether or not the current
266: * token is in fact a declaration keyword -- one that has been
267: * typedefd
268: */
269: if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
270: && !ps.p_l_follow
271: && !ps.block_init
272: && (ps.last_token == rparen || ps.last_token == semicolon ||
273: ps.last_token == decl ||
274: ps.last_token == lbrace || ps.last_token == rbrace)) {
275: ps.its_a_keyword = true;
276: ps.last_u_d = true;
277: last_code = decl;
278: return decl;
279: }
280: if (last_code == decl) /* if this is a declared variable, then
281: * following sign is unary */
282: ps.last_u_d = true; /* will make "int a -1" work */
283: last_code = ident;
284: return (ident); /* the ident is not in the list */
285: } /* end of procesing for alpanum character */
286:
287: /* Scan a non-alphanumeric token */
288:
289: *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
290: * moved here */
291: *e_token = '\0';
292: if (++buf_ptr >= buf_end)
293: fill_buffer();
294:
295: switch (*token) {
296: case '\n':
297: unary_delim = ps.last_u_d;
298: ps.last_nl = true; /* remember that we just had a newline */
299: code = (had_eof ? 0 : newline);
300:
301: /*
302: * if data has been exausted, the newline is a dummy, and we should
303: * return code to stop
304: */
305: break;
306:
307: case '\'': /* start of quoted character */
308: case '"': /* start of string */
309: qchar = *token;
310: if (troff) {
311: e_token[-1] = '`';
312: if (qchar == '"')
313: *e_token++ = '`';
314: e_token = chfont(&bodyf, &stringf, e_token);
315: }
316: do { /* copy the string */
317: while (1) { /* move one character or [/<char>]<char> */
318: if (*buf_ptr == '\n') {
319: printf("%d: Unterminated literal\n", line_no);
320: goto stop_lit;
321: }
322: CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
323: * since CHECK_SIZE guarantees that there
324: * are at least 5 entries left */
325: *e_token = *buf_ptr++;
326: if (buf_ptr >= buf_end)
327: fill_buffer();
328: if (*e_token == BACKSLASH) { /* if escape, copy extra char */
329: if (*buf_ptr == '\n') /* check for escaped newline */
330: ++line_no;
331: if (troff) {
332: *++e_token = BACKSLASH;
333: if (*buf_ptr == BACKSLASH)
334: *++e_token = BACKSLASH;
335: }
336: *++e_token = *buf_ptr++;
337: ++e_token; /* we must increment this again because we
338: * copied two chars */
339: if (buf_ptr >= buf_end)
340: fill_buffer();
341: }
342: else
343: break; /* we copied one character */
344: } /* end of while (1) */
345: } while (*e_token++ != qchar);
346: if (troff) {
347: e_token = chfont(&stringf, &bodyf, e_token - 1);
348: if (qchar == '"')
349: *e_token++ = '\'';
350: }
351: stop_lit:
352: code = ident;
353: break;
354:
355: case ('('):
356: case ('['):
357: unary_delim = true;
358: code = lparen;
359: break;
360:
361: case (')'):
362: case (']'):
363: code = rparen;
364: break;
365:
366: case '#':
367: unary_delim = ps.last_u_d;
368: code = preesc;
369: break;
370:
371: case '?':
372: unary_delim = true;
373: code = question;
374: break;
375:
376: case (':'):
377: code = colon;
378: unary_delim = true;
379: break;
380:
381: case (';'):
382: unary_delim = true;
383: code = semicolon;
384: break;
385:
386: case ('{'):
387: unary_delim = true;
388:
389: /*
390: * if (ps.in_or_st) ps.block_init = 1;
391: */
392: /* ? code = ps.block_init ? lparen : lbrace; */
393: code = lbrace;
394: break;
395:
396: case ('}'):
397: unary_delim = true;
398: /* ? code = ps.block_init ? rparen : rbrace; */
399: code = rbrace;
400: break;
401:
402: case 014: /* a form feed */
403: unary_delim = ps.last_u_d;
404: ps.last_nl = true; /* remember this so we can set 'ps.col_1'
405: * right */
406: code = form_feed;
407: break;
408:
409: case (','):
410: unary_delim = true;
411: code = comma;
412: break;
413:
414: case '.':
415: unary_delim = false;
416: code = period;
417: break;
418:
419: case '-':
420: case '+': /* check for -, +, --, ++ */
421: code = (ps.last_u_d ? unary_op : binary_op);
422: unary_delim = true;
423:
424: if (*buf_ptr == token[0]) {
425: /* check for doubled character */
426: *e_token++ = *buf_ptr++;
427: /* buffer overflow will be checked at end of loop */
428: if (last_code == ident || last_code == rparen) {
429: code = (ps.last_u_d ? unary_op : postop);
430: /* check for following ++ or -- */
431: unary_delim = false;
432: }
433: }
434: else if (*buf_ptr == '=')
435: /* check for operator += */
436: *e_token++ = *buf_ptr++;
437: else if (*buf_ptr == '>') {
438: /* check for operator -> */
439: *e_token++ = *buf_ptr++;
440: if (!pointer_as_binop) {
441: unary_delim = false;
442: code = unary_op;
443: ps.want_blank = false;
444: }
445: }
446: break; /* buffer overflow will be checked at end of
447: * switch */
448:
449: case '=':
450: if (ps.in_or_st)
451: ps.block_init = 1;
452: #ifdef undef
453: if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
454: e_token[-1] = *buf_ptr++;
455: if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
456: *e_token++ = *buf_ptr++;
457: *e_token++ = '='; /* Flip =+ to += */
458: *e_token = 0;
459: }
460: #else
461: if (*buf_ptr == '=') {/* == */
462: *e_token++ = '='; /* Flip =+ to += */
463: buf_ptr++;
464: *e_token = 0;
465: }
466: #endif
467: code = binary_op;
468: unary_delim = true;
469: break;
470: /* can drop thru!!! */
471:
472: case '>':
473: case '<':
474: case '!': /* ops like <, <<, <=, !=, etc */
475: if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
476: *e_token++ = *buf_ptr;
477: if (++buf_ptr >= buf_end)
478: fill_buffer();
479: }
480: if (*buf_ptr == '=')
481: *e_token++ = *buf_ptr++;
482: code = (ps.last_u_d ? unary_op : binary_op);
483: unary_delim = true;
484: break;
485:
486: default:
487: if (token[0] == '/' && *buf_ptr == '*') {
488: /* it is start of comment */
489: *e_token++ = '*';
490:
491: if (++buf_ptr >= buf_end)
492: fill_buffer();
493:
494: code = comment;
495: unary_delim = ps.last_u_d;
496: break;
497: }
498: while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
499: /*
500: * handle ||, &&, etc, and also things as in int *****i
501: */
502: *e_token++ = *buf_ptr;
503: if (++buf_ptr >= buf_end)
504: fill_buffer();
505: }
506: code = (ps.last_u_d ? unary_op : binary_op);
507: unary_delim = true;
508:
509:
510: } /* end of switch */
511: if (code != newline) {
512: l_struct = false;
513: last_code = code;
514: }
515: if (buf_ptr >= buf_end) /* check for input buffer empty */
516: fill_buffer();
517: ps.last_u_d = unary_delim;
518: *e_token = '\0'; /* null terminate the token */
519: return (code);
520: }
521:
522: /*
523: * Add the given keyword to the keyword table, using val as the keyword type
524: */
525: addkey(key, val)
526: char *key;
527: {
528: register struct templ *p = specials;
529: while (p->rwd)
530: if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
531: return;
532: else
533: p++;
534: if (p >= specials + sizeof specials / sizeof specials[0])
535: return; /* For now, table overflows are silently
536: * ignored */
537: p->rwd = key;
538: p->rwcode = val;
539: p[1].rwd = 0;
540: p[1].rwcode = 0;
541: return;
542: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.