|
|
1.1 root 1: static char sccsid[] = "@(#)lexi.c 4.1 (Berkeley) 10/21/82";
2:
3: /*
4:
5: Copyright (C) 1976
6: by the
7: Board of Trustees
8: of the
9: University of Illinois
10:
11: All rights reserved
12:
13:
14: NAME:
15: lexi
16:
17: FUNCTION:
18: This is the token scanner for indent
19:
20: ALGORITHM:
21: 1) Strip off intervening blanks and/or tabs.
22: 2) If it is an alphanumeric token, move it to the token buffer "token".
23: Check if it is a special reserved word that indent will want to
24: know about.
25: 3) Non-alphanumeric tokens are handled with a big switch statement. A
26: flag is kept to remember if the last token was a "unary delimiter",
27: which forces a following operator to be unary as opposed to binary.
28:
29: PARAMETERS:
30: None
31:
32: RETURNS:
33: An integer code indicating the type of token scanned.
34:
35: GLOBALS:
36: buf_ptr =
37: had_eof
38: last_u_d = Set to true iff this token is a "unary delimiter"
39:
40: CALLS:
41: fill_buffer
42: printf (lib)
43:
44: CALLED BY:
45: main
46:
47: NOTES:
48: Start of comment is passed back so that the comment can be scanned by
49: pr_comment.
50:
51: Strings and character literals are returned just like identifiers.
52:
53: HISTORY:
54: initial coding November 1976 D A Willcox of CAC
55: 1/7/77 D A Willcox of CAC Fix to provide proper handling
56: of "int a -1;"
57:
58: */
59:
60: /* Here we have the token scanner for indent. It scans off one token and
61: puts it in the global variable "token". It returns a code, indicating the
62: type of token scanned. */
63:
64: #include "indent_globs.h";
65: #include "indent_codes.h";
66:
67:
68:
69: #define alphanum 1
70: #define opchar 3
71:
72: struct templ {
73: char *rwd;
74: int rwcode;
75: };
76:
77: struct templ specials[] =
78: {
79: "switch", 1,
80: "case", 2,
81: "struct", 3,
82: "default", 2,
83: "int", 4,
84: "char", 4,
85: "float", 4,
86: "double", 4,
87: "long", 4,
88: "short", 4,
89: "typdef", 4,
90: "unsigned", 4,
91: "register", 4,
92: "static", 4,
93: "global", 4,
94: "extern", 4,
95: "if", 5,
96: "while", 5,
97: "for", 5,
98: "else", 6,
99: "do", 6,
100: "sizeof", 0,
101: 0, 0
102: };
103:
104: char chartype[128] =
105: { /* this is used to facilitate the decision of what type
106: (alphanumeric, operator) each character is */
107: 0, 0, 0, 0, 0, 0, 0, 0,
108: 0, 0, 0, 0, 0, 0, 0, 0,
109: 0, 0, 0, 0, 0, 0, 0, 0,
110: 0, 0, 0, 0, 0, 0, 0, 0,
111: 0, 3, 0, 0, 0, 3, 3, 0,
112: 0, 0, 3, 3, 0, 3, 3, 3,
113: 1, 1, 1, 1, 1, 1, 1, 1,
114: 1, 1, 0, 0, 3, 3, 3, 3,
115: 0, 1, 1, 1, 1, 1, 1, 1,
116: 1, 1, 1, 1, 1, 1, 1, 1,
117: 1, 1, 1, 1, 1, 1, 1, 1,
118: 1, 1, 1, 0, 0, 0, 3, 1,
119: 0, 1, 1, 1, 1, 1, 1, 1,
120: 1, 1, 1, 1, 1, 1, 1, 1,
121: 1, 1, 1, 1, 1, 1, 1, 1,
122: 1, 1, 1, 0, 3, 0, 3, 0
123: };
124:
125: int last_nl = true;
126: /* this is true if the last thing scanned was a newline */
127:
128:
129:
130: int lexi () {
131: register char *tok;
132: /* local pointer to next char in token */
133: register int i;
134: /* local loop counter */
135: register char *j;
136: /* used for searching thru list of reserved words */
137: int unary_delim;
138: /* this is set to 1 if the current token forces a following operator to be
139: unary */
140: static int last_code;
141: /* the last token type returned */
142: static int l_struct;
143: /* set to 1 if the last token was 'struct' */
144: int found_it;
145: int code; /* internal code to be returned */
146: char qchar; /* the delimiter character for a string */
147:
148: tok = token; /* point to start of place to save token */
149: unary_delim = false;
150: col_1 = last_nl; /* tell world that this token started in column
151: 1 iff the last thing scanned was nl */
152: last_nl = false;
153:
154: while (*buf_ptr == ' ' || *buf_ptr == '\t') {
155: /* get rid of blanks */
156: col_1 = false; /* leading blanks imply token is not in column 1
157: */
158: if (++buf_ptr >= buf_end)
159: fill_buffer ();
160: }
161:
162: /*----------------------------------------------------------*\
163: | Scan an alphanumeric token
164: \*----------------------------------------------------------*/
165:
166: if (chartype[*buf_ptr & 0177] == alphanum) {
167: /* we have a character or number */
168: while (chartype[*buf_ptr & 0177] == alphanum) {
169: /* copy it over */
170: *tok++ = *buf_ptr++;
171: if (buf_ptr >= buf_end)
172: fill_buffer ();
173: }
174:
175: *tok++ = '\0';
176:
177: if (l_struct) { /* if last token was 'struct', then this token
178: should be treated as a declaration */
179: l_struct = false;
180: last_code = ident;
181: last_u_d = true;
182: return (decl);
183: }
184:
185: last_u_d = false; /* operator after indentifier is binary */
186:
187: for (i = 0; specials[i].rwd != 0; ++i) {
188: /* this loop will check if the token is a keyword. if so, a following
189: operator is unary */
190: last_code = ident; /* remember that this is the code we will return
191: */
192: j = specials[i].rwd;
193: /* point at ith reserved word */
194: tok = token; /* point at scanned toekn */
195: found_it = true; /* set to false if not found */
196: do {
197: if (*tok++ != *j) {
198: found_it = false;
199: break;
200: }
201: } while (*j++);
202:
203: if (found_it) { /* we have a keyword */
204: last_u_d = true;
205: switch (specials[i].rwcode) {
206: case 1: /* it is a switch */
207: return (swstmt);
208: case 2: /* a case or default */
209: return (casestmt);
210:
211: case 3: /* a "struct" */
212: l_struct = true;
213: /* Next time around, we will want to know that we have had
214: a 'struct' */
215: case 4: /* one of the declaration keywords */
216: if(p_l_follow) break; /* inside parens: cast */
217: last_code = decl;
218: return (decl);
219:
220: case 5: /* if, while, for */
221: return (sp_paren);
222:
223: case 6: /* do, else */
224: return (sp_nparen);
225:
226: default: /* all others are treated like any other
227: identifier */
228: return (ident);
229: } /* end of switch */
230: } /* end of if (found_it) */
231:
232: }
233:
234: if (last_code == decl) /* if this is a declared variable, then
235: following sign is unary */
236: last_u_d = true; /* will make "int a -1" work */
237: last_code = ident;
238: return (ident); /* the ident is not in the list */
239: } /* end of procesing for alpanum character */
240:
241:
242:
243: /*----------------------------------------------------------*\
244: | Scan a non-alphanumeric token
245: \*----------------------------------------------------------*/
246:
247: *tok++ = *buf_ptr; /* if it is only a one-character token, it is
248: moved here */
249: *tok = '\0';
250: if (++buf_ptr >= buf_end)
251: fill_buffer ();
252:
253: switch (*token) {
254: case '\n':
255: unary_delim = last_u_d;
256: last_nl = true; /* remember that we just had a newline */
257: code = (had_eof ? 0 : newline);
258: /* if data has been exausted, the newline is a dummy, and we should
259: return code to stop */
260: break;
261:
262: case '\'': /* start of quoted character */
263: qchar = '\''; /* remember final delimiter */
264: goto copy_lit; /* and go to common literal code */
265:
266: case '"': /* start of string */
267: qchar = '"';
268:
269: copy_lit:
270: do { /* copy the string */
271: while (1) { /* move one character or [/<char>]<char> */
272: if (*buf_ptr == '\n') {
273: /* check for unterminated literal */
274: printf ("%d: Unterminated literal\n", line_no);
275: goto stop_lit;
276: /* Don't copy any more */
277: }
278:
279: *tok = *buf_ptr++;
280: if (buf_ptr >= buf_end)
281: fill_buffer ();
282: if (had_eof || ((tok - token) > (bufsize - 2))) {
283: printf ("Unterminated literal\n");
284: ++tok;
285: goto stop_lit;
286: /* get outof literal copying loop */
287: }
288:
289: if (*tok == '\\') {
290: /* if escape, copy extra char */
291: if (*buf_ptr == '\n')
292: /* check for escaped newline */
293: ++line_no;
294: *(++tok) = *buf_ptr++;
295: ++tok; /* we must increment this again because we
296: copied two chars */
297: if (buf_ptr >= buf_end)
298: fill_buffer ();
299: }
300: else
301: break; /* we copied one character */
302: } /* end of while (1) */
303: } while (*tok++ != qchar);
304:
305: stop_lit:
306: code = ident;
307: break;
308:
309: case ('('):
310: case ('['):
311: unary_delim = true;
312: code = lparen;
313: break;
314:
315: case (')'):
316: case (']'):
317: code = rparen;
318: break;
319:
320: case '#':
321: unary_delim = last_u_d;
322: code = preesc;
323: break;
324:
325: case '?':
326: unary_delim = true;
327: code = question;
328: break;
329:
330: case (':'):
331: code = colon;
332: unary_delim = true;
333: break;
334:
335: case (';'):
336: unary_delim = true;
337: code = semicolon;
338: break;
339:
340: case ('{'):
341: unary_delim = true;
342: code = lbrace;
343: break;
344:
345: case ('}'):
346: unary_delim = true;
347: code = rbrace;
348: break;
349:
350: case 014: /* a form feed */
351: unary_delim = last_u_d;
352: last_nl = true; /* remember this so we can set 'col_1' right */
353: code = form_feed;
354: break;
355:
356: case (','):
357: unary_delim = true;
358: code = comma;
359: break;
360:
361: case '.':
362: unary_delim = false;
363: code = period;
364: break;
365:
366: case '-':
367: case '+': /* check for -, +, --, ++ */
368: code = (last_u_d ? unary_op : binary_op);
369: unary_delim = true;
370:
371: if (*buf_ptr == token[0]) {
372: /* check for doubled character */
373: *tok++ = *buf_ptr++;
374: /* buffer overflow will be checked at end of loop */
375: if (last_code == ident || last_code == rparen) {
376: code = (last_u_d ? unary_op : postop);
377: /* check for following ++ or -- */
378: unary_delim = false;
379: }
380: }
381: else
382: if (*buf_ptr == '>' || *buf_ptr == '=')
383: /* check for operator -> or += */
384: *tok++ = *buf_ptr++;
385: /* buffer overflow will be checked at end of switch */
386:
387: break;
388:
389: case '=':
390: if (chartype[*buf_ptr] == opchar) {
391: /* we have two char assignment */
392: *tok++ = *buf_ptr;
393: /* move second character */
394: if (++buf_ptr >= buf_end)
395: fill_buffer ();
396: }
397:
398: code = binary_op;
399: unary_delim = true;
400: if (token[1] != '<' && token[1] != '>')
401: /* check for possible 3 char operator */
402: break;
403: /* can drop thru!!! */
404:
405: case '>':
406: case '<':
407: case '!': /* ops like <, <<, <=, !=, etc */
408: if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
409: *tok++ = *buf_ptr;
410: if (++buf_ptr >= buf_end)
411: fill_buffer ();
412: }
413:
414: if (*buf_ptr == '=')
415: *tok++ = *buf_ptr++;
416: code = (last_u_d ? unary_op : binary_op);
417: unary_delim = true;
418: break;
419:
420: default:
421: if (token[0] == '/' && *buf_ptr == '*') {
422: /* it is start of comment */
423: *tok++ = '*';
424:
425: if (++buf_ptr >= buf_end)
426: fill_buffer ();
427:
428: code = comment;
429: unary_delim = last_u_d;
430: break;
431: }
432:
433: while (*(tok - 1) == *buf_ptr || *buf_ptr=='=') {
434: /* handle ||, &&, etc, and also things as in int *****i */
435: *tok++ = *buf_ptr;
436: if (++buf_ptr >= buf_end)
437: fill_buffer ();
438: }
439:
440:
441: code = (last_u_d ? unary_op : binary_op);
442: unary_delim = true;
443:
444:
445: } /* end of switch */
446:
447: if (code != newline) {
448: l_struct = false;
449: last_code = code;
450: }
451:
452: if (buf_ptr >= buf_end) /* check for input buffer empty */
453: fill_buffer ();
454: last_u_d = unary_delim;
455: *tok = '\0'; /* null terminate the token */
456: return (code);
457: };
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.