|
|
1.1 root 1: /*
2: * Copyright (c) 1982 Regents of the University of California
3: */
4: #ifndef lint
5: static char sccsid[] = "@(#)asscan2.c 4.14 7/6/83";
6: #endif not lint
7:
8: #include "asscanl.h"
9:
10: static inttoktype oval = NL;
11: #define ASINBUFSIZ 4096
12: char inbufunget[8];
13: char inbuffer[ASINBUFSIZ];
14: char *Ginbufptr = inbuffer;
15: int Ginbufcnt = 0;
16: int scannerhadeof;
17:
18: fillinbuffer()
19: {
20: int nread;
21: int goal;
22: int got;
23:
24: nread = 0;
25: if (scannerhadeof == 0){
26: goal = sizeof(inbuffer);
27: do {
28: got = read(stdin->_file, inbuffer + nread, goal);
29: if (got == 0)
30: scannerhadeof = 1;
31: if (got <= 0)
32: break;
33: nread += got;
34: goal -= got;
35: } while (goal);
36: } else {
37: scannerhadeof = 0;
38: }
39: /*
40: * getchar assumes that Ginbufcnt and Ginbufptr
41: * are adjusted as if one character has been removed
42: * from the input.
43: */
44: if (nread == 0){
45: inbuffer[0] = EOFCHAR;
46: nread = 1;
47: }
48: Ginbufcnt = nread - 1;
49: Ginbufptr = inbuffer + 1;
50: }
51:
52: scan_dot_s(bufferbox)
53: struct tokbufdesc *bufferbox;
54: {
55: reg char *inbufptr;
56: reg int inbufcnt;
57: reg int ryylval; /* local copy of lexical value */
58: extern int yylval; /* global copy of lexical value */
59: reg int val; /* the value returned */
60: int i; /* simple counter */
61: reg char *rcp;
62: int ch; /* treated as a character */
63: int ch1; /* shadow value */
64: struct symtab *op;
65: ptrall lgbackpatch; /* where to stuff a string length */
66: reg ptrall bufptr; /* where to stuff tokens */
67: ptrall bufub; /* where not to stuff tokens */
68: long intval; /* value of int */
69: int linescrossed; /* when doing strings and comments */
70: struct Opcode opstruct;
71: reg int strlg; /* the length of a string */
72:
73: (bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]);
74: (bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]);
75:
76: MEMTOREGBUF;
77: if (newfflag){
78: newfflag = 0;
79: ryylval = (int)savestr(newfname, strlen(newfname)+1, STR_BOTH);
80:
81: ptoken(bufptr, IFILE);
82: ptoken(bufptr, STRING);
83: pptr(bufptr, ryylval);
84:
85: ptoken(bufptr, ILINENO);
86: ptoken(bufptr, INT);
87: pint(bufptr, 1);
88: }
89:
90: while (bufptr < bufub){
91: loop:
92: switch(ryylval = (type+1)[ch = getchar()]) {
93: case SCANEOF:
94: endoffile: ;
95: inbufptr = 0;
96: ptoken(bufptr, PARSEEOF);
97: goto done;
98:
99: case DIV: /*process C style comments*/
100: if ( (ch = getchar()) == '*') { /*comment prelude*/
101: int incomment;
102: linescrossed = 0;
103: incomment = 1;
104: ch = getchar(); /*skip over the * */
105: while(incomment){
106: switch(ch){
107: case '*':
108: ch = getchar();
109: incomment = (ch != '/');
110: break;
111: case '\n':
112: scanlineno++;
113: linescrossed++;
114: ch = getchar();
115: break;
116: case EOFCHAR:
117: goto endoffile;
118: default:
119: ch = getchar();
120: break;
121: }
122: }
123: val = ILINESKIP;
124: ryylval = linescrossed;
125: goto ret;
126: } else { /*just an ordinary DIV*/
127: ungetc(ch);
128: val = ryylval = DIV;
129: goto ret;
130: }
131: case SH:
132: if (oval == NL){
133: /*
134: * Attempt to recognize a C preprocessor
135: * style comment '^#[ \t]*[0-9]*[ \t]*".*"
136: */
137: ch = getchar(); /*bump the #*/
138: while (INCHARSET(ch, SPACE))
139: ch = getchar();/*bump white */
140: if (INCHARSET(ch, DIGIT)){
141: intval = 0;
142: while(INCHARSET(ch, DIGIT)){
143: intval = intval*10 + ch - '0';
144: ch = getchar();
145: }
146: while (INCHARSET(ch, SPACE))
147: ch = getchar();
148: if (ch == '"'){
149: ptoken(bufptr, ILINENO);
150: ptoken(bufptr, INT);
151: pint(bufptr, intval - 1);
152: ptoken(bufptr, IFILE);
153: /*
154: * The '"' has already been
155: * munched
156: *
157: * eatstr will not eat
158: * the trailing \n, so
159: * it is given to the parser
160: * and counted.
161: */
162: goto eatstr;
163: }
164: }
165: }
166: /*
167: * Well, its just an ordinary decadent comment
168: */
169: while ((ch != '\n') && (ch != EOFCHAR))
170: ch = getchar();
171: if (ch == EOFCHAR)
172: goto endoffile;
173: val = ryylval = oval = NL;
174: scanlineno++;
175: goto ret;
176:
177: case NL:
178: scanlineno++;
179: val = ryylval;
180: goto ret;
181:
182: case SP:
183: oval = SP; /*invalidate ^# meta comments*/
184: goto loop;
185:
186: case REGOP: /* % , could be used as modulo, or register*/
187: ch = getchar();
188: if (INCHARSET(ch, DIGIT)){
189: ryylval = ch-'0';
190: if (ch=='1') {
191: if (INCHARSET( (ch = getchar()), REGDIGIT))
192: ryylval = 10+ch-'0';
193: else
194: ungetc(ch);
195: }
196: /*
197: * God only knows what the original author
198: * wanted this undocumented feature to
199: * do.
200: * %5++ is really r7
201: */
202: while(INCHARSET( (ch = getchar()), SIGN)) {
203: if (ch=='+')
204: ryylval++;
205: else
206: ryylval--;
207: }
208: ungetc(ch);
209: val = REG;
210: } else {
211: ungetc(ch);
212: val = REGOP;
213: }
214: goto ret;
215:
216: case ALPH:
217: ch1 = ch;
218: if (INCHARSET(ch, SZSPECBEGIN)){
219: if( (ch = getchar()) == '`' || ch == '^'){
220: ch1 |= 0100; /*convert to lower*/
221: switch(ch1){
222: case 'b': ryylval = 1; break;
223: case 'w': ryylval = 2; break;
224: case 'l': ryylval = 4; break;
225: default: ryylval = d124; break;
226: }
227: val = SIZESPEC;
228: goto ret;
229: } else {
230: ungetc(ch);
231: ch = ch1; /*restore first character*/
232: }
233: }
234: rcp = yytext;
235: do {
236: if (rcp < &yytext[NCPName])
237: *rcp++ = ch;
238: } while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT));
239: *rcp = '\0';
240: while (INCHARSET(ch, SPACE))
241: ch = getchar();
242: ungetc(ch);
243:
244: switch((op = *lookup(1))->s_tag){
245: case 0:
246: case LABELID:
247: /*
248: * Its a name... (Labels are subsets of name)
249: */
250: ryylval = (int)op;
251: val = NAME;
252: break;
253: case INST0:
254: case INSTn:
255: case IJXXX:
256: opstruct.Op_popcode = ( (struct instab *)op)->i_popcode;
257: opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode;
258: val = op->s_tag;
259: break;
260: default:
261: ryylval = ( (struct instab *)op)->i_popcode;
262: val = op->s_tag;
263: break;
264: }
265: goto ret;
266:
267: case DIG:
268: /*
269: * restore local inbufptr and inbufcnt
270: */
271: REGTOMEMBUF;
272: val = number(ch);
273: MEMTOREGBUF;
274: /*
275: * yylval or yybignum has been stuffed as a side
276: * effect to number(); get the global yylval
277: * into our fast local copy in case it was an INT.
278: */
279: ryylval = yylval;
280: goto ret;
281:
282: case LSH:
283: case RSH:
284: /*
285: * We allow the C style operators
286: * << and >>, as well as < and >
287: */
288: if ( (ch1 = getchar()) != ch)
289: ungetc(ch1);
290: val = ryylval;
291: goto ret;
292:
293: case MINUS:
294: if ( (ch = getchar()) =='(')
295: ryylval=val=MP;
296: else {
297: ungetc(ch);
298: val=MINUS;
299: }
300: goto ret;
301:
302: case SQ:
303: if ((ryylval = getchar()) == '\n')
304: scanlineno++; /*not entirely correct*/
305: val = INT;
306: goto ret;
307:
308: case DQ:
309: eatstr:
310: linescrossed = 0;
311: for (strlg = 0; /*VOID*/; strlg++){
312: switch(ch = getchar()){
313: case '"':
314: goto tailDQ;
315: default:
316: stuff:
317: putc(ch, strfile);
318: break;
319: case '\n':
320: yywarning("New line in a string constant");
321: scanlineno++;
322: linescrossed++;
323: ch = getchar();
324: switch(ch){
325: case EOFCHAR:
326: putc('\n', strfile);
327: ungetc(EOFCHAR);
328: goto tailDQ;
329: default:
330: ungetc(ch);
331: ch = '\n';
332: goto stuff;
333: }
334: break;
335:
336: case '\\':
337: ch = getchar(); /*skip the '\\'*/
338: if ( INCHARSET(ch, BSESCAPE)){
339: switch (ch){
340: case 'b': ch = '\b'; goto stuff;
341: case 'f': ch = '\f'; goto stuff;
342: case 'n': ch = '\n'; goto stuff;
343: case 'r': ch = '\r'; goto stuff;
344: case 't': ch = '\t'; goto stuff;
345: }
346: }
347: if ( !(INCHARSET(ch, OCTDIGIT)) )
348: goto stuff;
349: i = 0;
350: intval = 0;
351: while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){
352: i++;
353: intval <<= 3;
354: intval += ch - '0';
355: ch = getchar();
356: }
357: ungetc(ch);
358: ch = (char)intval;
359: goto stuff;
360: }
361: }
362: tailDQ: ;
363: /*
364: * account for any lines that were crossed
365: */
366: if (linescrossed){
367: ptoken(bufptr, ILINESKIP);
368: pint(bufptr, linescrossed);
369: }
370: /*
371: * Cheat: append a trailing null to the string
372: * and then adjust the string length to ignore
373: * the trailing null. If any STRING client requires
374: * the trailing null, the client can just change STRLEN
375: */
376: putc(0, strfile);
377: ryylval = (int)savestr((char *)0, strlg + 1, STR_FILE);
378: val = STRING;
379: ((struct strdesc *)ryylval)->sd_strlen -= 1;
380: goto ret;
381:
382: case BADCHAR:
383: linescrossed = lineno;
384: lineno = scanlineno;
385: yyerror("Illegal character mapped: %d, char read:(octal) %o",
386: ryylval, ch);
387: lineno = linescrossed;
388: val = BADCHAR;
389: goto ret;
390:
391: default:
392: val = ryylval;
393: goto ret;
394: } /*end of the switch*/
395: /*
396: * here with one token, so stuff it
397: */
398: ret:
399: oval = val;
400: ptoken(bufptr, val);
401: switch(val){
402: case ILINESKIP:
403: pint(bufptr, ryylval);
404: break;
405: case SIZESPEC:
406: pchar(bufptr, ryylval);
407: break;
408: case BFINT: plong(bufptr, ryylval);
409: break;
410: case INT: plong(bufptr, ryylval);
411: break;
412: case BIGNUM: pnumber(bufptr, yybignum);
413: break;
414: case STRING: pptr(bufptr, (int)(char *)ryylval);
415: break;
416: case NAME: pptr(bufptr, (int)(struct symtab *)ryylval);
417: break;
418: case REG: pchar(bufptr, ryylval);
419: break;
420: case INST0:
421: case INSTn:
422: popcode(bufptr, opstruct);
423: break;
424: case IJXXX:
425: popcode(bufptr, opstruct);
426: pptr(bufptr, (int)(struct symtab *)symalloc());
427: break;
428: case ISTAB:
429: case ISTABSTR:
430: case ISTABNONE:
431: case ISTABDOT:
432: case IALIGN:
433: pptr(bufptr, (int)(struct symtab *)symalloc());
434: break;
435: /*
436: * default:
437: */
438: }
439: builtval: ;
440: } /*end of the while to stuff the buffer*/
441: done:
442: bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]);
443: /*
444: * This is a real kludge:
445: *
446: * We put the last token in the buffer to be a MINUS
447: * symbol. This last token will never be picked up
448: * in the normal way, but can be looked at during
449: * a peekahead look that the short circuit expression
450: * evaluator uses to see if an expression is complicated.
451: *
452: * Consider the following situation:
453: *
454: * .word 45 + 47
455: * buffer 1 | buffer 0
456: * the peekahead would want to look across the buffer,
457: * but will look in the buffer end zone, see the minus, and
458: * fail.
459: */
460: ptoken(bufptr, MINUS);
461: REGTOMEMBUF;
462: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.