|
|
1.1 root 1: /*ident "@(#)ctrans:src/lex.c 1.3.4.17" */
2: /***************************************************************************
3:
4: C++ source for cfront, the C++ compiler front-end
5: written in the computer science research center of Bell Labs
6:
7: Copyright (c) 1984 AT&T, Inc. All Rights Reserved
8: THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T, INC.
9:
10: lex.c:
11: lexical analyser based on pcc's and cpre's scanners
12: modified to handle classes:
13: new keywords: class
14: public
15: call
16: etc.
17: names are not entered in the symbol table by lex()
18: names can be of arbitrary length
19: error() is used to report errors
20: {} and () must match
21: numeric constants are not converted into internal representation
22: but stored as strings
23:
24: ****************************************************************************/
25:
26: #include "cfront.h"
27: #include "yystype.h"
28: #include "size.h"
29: #include "tqueue.h"
30:
31: # define CCTRANS(x) x
32:
33: /* lexical actions */
34:
35: #define A_ERR 0 /* illegal character */
36: #define A_LET 1 /* saw a letter */
37: #define A_DIG 2 /* saw a digit */
38: #define A_1C 3 /* return a single character */
39: #define A_STR 4 /* string */
40: #define A_CC 5 /* character constant */
41: #define A_BCD 6 /* GCOS BCD constant */
42: #define A_SL 7 /* saw a / */
43: #define A_DOT 8 /* saw a . */
44: #define A_2C 9 /* possible two character symbol */
45: #define A_WS 10 /* whitespace (not \n) */
46: #define A_NL 11 /* \n */
47: #define A_LC 12 /* { */
48: #define A_RC 13 /* } */
49: #define A_L 14 /* ( */
50: #define A_R 15 /* ) */
51: #define A_EOF 16
52: #define A_ASS 17
53: #define A_LT 18
54: #define A_GT 19 /* > */
55: #define A_ER 20
56: #define A_OR 21
57: #define A_AND 22
58: #define A_MOD 23
59: #define A_NOT 24
60: #define A_MIN 25
61: #define A_MUL 26
62: #define A_PL 27
63: #define A_COL 28 /* : */
64: #define A_SHARP 29 /* # */
65:
66: /* character classes */
67:
68: # define LEXLET 01
69: # define LEXDIG 02
70: /* no LEXOCT because 8 and 9 used to be octal digits */
71: # define LEXHEX 010
72: # define LEXWS 020
73: # define LEXDOT 040
74:
75: const FIRSTCHUNK = 8*1024-8;
76: const BUFCHUNK = 4*1024-8;
77:
78: /* text buffer */
79: static char inbuf[FIRSTCHUNK/*TBUFSZ*/];
80: char* txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1];
81: char* txtstart = 0;
82: char* txtfree = 0;
83:
84: static struct buf* bufhead;
85: static buf* freebuf;
86: //static bufs;
87:
88: struct buf {
89: buf* next;
90: char chars[BUFCHUNK];
91: // buf() { next=bufhead; bufhead=this; }
92: };
93:
94: new_buf(char c)
95: {
96: //fprintf(stderr,"new_buf %d\n",bufs++);
97: buf* pbuf;
98: if (freebuf) {
99: pbuf = freebuf;
100: freebuf = freebuf->next;
101: }
102: else
103: pbuf = new buf; // allocate and register new chunk
104: pbuf->next = bufhead;
105: bufhead = pbuf;
106:
107: if (BUFCHUNK < txtmax-txtstart) error('l',"lexical token too long");
108:
109: // copy current token:
110: char* p = txtstart;
111: txtstart = txtfree = &pbuf->chars[0];
112: while (p<txtmax) *txtfree++ = *p++;
113: *txtfree++=c;
114: txtmax = &pbuf->chars[BUFCHUNK-1];
115: return 0;
116: }
117:
118:
119: #define pch(c) ((txtmax<=txtfree)?new_buf(c):(*txtfree++=c))
120: #define start_txt() txtstart = txtfree
121: #define del_txt() txtfree = txtstart
122:
123: /*static*/ char* file_name[MAXFILE*4]; // source file names
124: // file_name[0] == src_file_name
125: // file_name[0] == 0 means stdin
126: static short file_stack[MAXFILE]; // stack of file name indices
127: int curr_file; // current index in file_stack
128: // that is current #include nest level
129:
130: int linkage; // linkage is default C++
131: // linkage==0 => C++ linkage
132: // linkage==1 => C linkage
133: const LINKMAX = 10;
134: static lvec[LINKMAX];
135: int lcount;
136:
137: void set_linkage(char* p)
138: {
139: if (p==0 || *p == 0) { // resume previous linkage
140: if (lcount) linkage = lvec[--lcount];
141: }
142: else {
143: if (LINKMAX<=++lcount)
144: error('l',"linkage directive nested too deep");
145: if (strcmp(p,"C")==0)
146: lvec[lcount] = linkage = 1;
147: else if (strcmp(p,"C++")==0)
148: lvec[lcount] = linkage = 0;
149: else
150: error("%s linkage",p);
151: }
152: }
153:
154: class loc curloc;
155: FILE * out_file = stdout;
156: FILE * in_file = stdin;
157: Ptable ktbl;
158: int br_level = 0; /* number of unmatched ``(''s */
159: int bl_level = 0; /* number of unmatched ``{''s */
160:
161: # ifdef ibm
162:
163: # define CSMASK 0377
164: # define CSSZ 256
165:
166: # else
167:
168: # define CSMASK 0177
169: # define CSSZ 128
170:
171: # endif
172:
173: static short lxmask[CSSZ+1];
174:
175: int saved = 0; /* putback character, avoid ungetchar */
176: static int lxtitle();
177:
178: overload rt;
179: inline YYSTYPE rt(char* x) { YYSTYPE y; y.s = x; return y; }
180: inline YYSTYPE rt(int x) { YYSTYPE y; y.t = x; return y; }
181: inline YYSTYPE rt(loc x) { YYSTYPE y; y.l = x; return y; }
182: inline YYSTYPE rt(void* x) { YYSTYPE y; y.pn = Pname(x); return y; }
183:
184: #define get(c) (c=getc(in_file))
185: #define unget(c) ungetc(c,in_file)
186:
187: #define reti(a,b) { addtok(a, rt(b)); return a; }
188: #define retn(a,b) { addtok(a, rt((Pnode)b)); return a; }
189: #define rets(a,b) { addtok(a, rt(b)); return a; }
190: #define retl(a) { addtok(a, rt(curloc)); return a; }
191:
192: void ktbl_init()
193: /*
194: enter keywords into keyword table for use by lex()
195: and into keyword representation table used for output
196: */
197: {
198: ktbl = new table(KTBLSIZE,0,0);
199:
200: new_key("asm",ASM,0);
201: new_key("auto",AUTO,TYPE);
202: new_key("break",LOC,BREAK);
203: new_key("case",LOC,CASE);
204: new_key("continue",LOC,CONTINUE);
205: new_key("char",CHAR,TYPE);
206: new_key("do",LOC,DO);
207: new_key("double",DOUBLE,TYPE);
208: new_key("default",LOC,DEFAULT);
209: new_key("enum",ENUM,0);
210: new_key("else",LOC,ELSE);
211: new_key("extern",EXTERN,TYPE);
212: new_key("float",FLOAT,TYPE);
213: new_key("for",LOC,FOR);
214: // new_key("fortran",FORTRAN,0);
215: new_key("goto",LOC,GOTO);
216: new_key("catch",CATCH,CATCH);
217: new_key("if",LOC,IF);
218: new_key("int",INT,TYPE);
219: new_key("long",LONG,TYPE);
220: new_key("return",LOC,RETURN);
221: new_key("register",REGISTER,TYPE);
222: new_key("static",STATIC,TYPE);
223: new_key("struct",STRUCT,AGGR);
224: new_key("sizeof",SIZEOF,0);
225: new_key("short",SHORT,TYPE);
226: new_key("switch",LOC,SWITCH);
227: new_key("template",TEMPLATE,TEMPLATE);
228: new_key("typedef",TYPEDEF,TYPE);
229: new_key("unsigned",UNSIGNED,TYPE);
230: new_key("union",UNION,AGGR);
231: new_key("void",VOID,TYPE);
232: new_key("while",LOC,WHILE);
233:
234: new_key("class",CLASS,AGGR);
235: new_key("const",CONST,TYPE);
236: new_key("delete",LOC,DELETE);
237: new_key("friend",FRIEND,TYPE);
238: new_key("inline",INLINE,TYPE);
239: new_key("new",NEW,0);
240: new_key("operator",OPERATOR,0);
241: new_key("overload",OVERLOAD,TYPE);
242: new_key("private",PRIVATE,PR);
243: new_key("protected",PROTECTED,PR);
244: new_key("public",PUBLIC,PR);
245: new_key("signed",SIGNED,TYPE);
246: new_key("this",THIS,0);
247: new_key("virtual",VIRTUAL,TYPE);
248: new_key("volatile",VOLATILE,TYPE);
249: #ifdef DK
250: new_key("or",OR,0);
251: new_key("cor",OROR,0);
252: new_key("and",AND,0);
253: new_key("cand",ANDAND,0);
254: new_key("xor",ER,0);
255: new_key("compl",COMPL,0);
256: #endif
257: }
258:
259: extern char* src_file_name;
260: extern char* line_format;
261: loc last_line;
262:
263: void loc::putline()
264: {
265: if (file==0 && line==0) return;
266: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4
267: // if (0<=file && file<MAXFILE) {
268: if ( 0<=file && file <= Nfile ) {
269: char* f = file_name[file];
270: if (f==0) f = (src_file_name) ? src_file_name : "";
271: fprintf(out_file,line_format,line,f);
272: last_line = *this;
273: }
274: }
275:
276: void loc::put(FILE* p)
277: {
278: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4
279: // if (0<=file && file<MAXFILE) {
280: if ( 0<=file && file <= Nfile ) {
281: char* f = file_name[file];
282: if (f==0) f = (src_file_name) ? src_file_name : "";
283: fprintf(p,"\"%s\", line %d: ",f,line);
284: }
285: }
286:
287: void lxenter(register char* s, short m)
288: /* enter a mask into lxmask */
289: {
290: register c;
291:
292: while( c= *s++ ) lxmask[c+1] |= m;
293:
294: }
295:
296:
297: void lxget(register c, register m)
298: /*
299: put 'c' back then scan for members of character class 'm'
300: terminate the string read with \0
301: txtfree points to the character position after that \0
302: */
303: {
304: pch(c);
305: while ( (get(c), lxmask[c+1]&m) ) pch(c);
306: unget(c);
307: pch('\0');
308: }
309:
310: struct LXDOPE {
311: short lxch; /* the character */
312: short lxact; /* the action to be performed */
313: TOK lxtok; /* the token number to be returned */
314: } lxdope[] = {
315: #ifdef apollo
316: '@', A_ERR, 0, /* illegal characters go here... */
317: #else
318: '$', A_ERR, 0, /* illegal characters go here... */
319: #endif
320: '_', A_LET, 0, /* letters point here */
321: '0', A_DIG, 0, /* digits point here */
322: ' ', A_WS, 0, /* whitespace goes here */
323: '\n', A_NL, 0,
324: '"', A_STR, 0, /* character string */
325: '\'', A_CC, 0, /* ASCII character constant */
326: '`', A_BCD, 0, /* 'foreign' character constant, e.g. BCD */
327: '(', A_L, LP,
328: ')', A_R, RP,
329: '{', A_LC, LC,
330: '}', A_RC, RC,
331: '[', A_1C, LB,
332: ']', A_1C, RB,
333: '*', A_MUL, MUL,
334: '?', A_1C, QUEST,
335: ':', A_COL, COLON,
336: '+', A_PL, PLUS,
337: '-', A_MIN, MINUS,
338: '/', A_SL, DIV,
339: '%', A_MOD, MOD,
340: '&', A_AND, AND,
341: '|', A_OR, OR,
342: '^', A_ER, ER,
343: '!', A_NOT, NOT,
344: '~', A_1C, COMPL,
345: ',', A_1C, CM,
346: ';', A_1C, SM,
347: '.', A_DOT, DOT,
348: '<', A_LT, LT,
349: '>', A_GT, GT,
350: '=', A_ASS, ASSIGN,
351: '#', A_SHARP, 0,
352: EOF, A_EOF, EOFTOK
353: };
354: /* note: EOF is used as sentinel, so must be <=0 and last entry in table */
355:
356: static struct LXDOPE *lxcp[CSSZ+1];
357:
358: void lex_init()
359: {
360: register struct LXDOPE *p;
361: register i;
362: register char *cp;
363: /* set up character classes */
364:
365: /* first clear lexmask */
366: for(i=0; i<=CSSZ; i++) lxmask[i] = 0;
367:
368: #ifdef apollo
369: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$", LEXLET );
370: #else
371: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", LEXLET );
372: #endif
373: lxenter( "0123456789", LEXDIG );
374: lxenter( "0123456789abcdefABCDEF", LEXHEX );
375: /* \013 should become \v someday; \013 is OK for ASCII and EBCDIC */
376: lxenter( " \t\r\b\f\013", LEXWS );
377: lxmask['.'+1] |= LEXDOT;
378:
379: /* make lxcp point to appropriate lxdope entry for each character */
380:
381: /* initialize error entries */
382:
383: for( i= 0; i<=CSSZ; ++i ) lxcp[i] = lxdope;
384:
385: /* make unique entries */
386:
387: for( p=lxdope; ; ++p ) {
388: lxcp[p->lxch+1] = p;
389: if( p->lxch < 0 ) break;
390: }
391:
392: /* handle letters, digits, and whitespace */
393: /* by convention, first, second, and third places */
394:
395: cp = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
396: while( *cp ) lxcp[*cp++ + 1] = &lxdope[1];
397: cp = "123456789";
398: while( *cp ) lxcp[*cp++ + 1] = &lxdope[2];
399: cp = "\t\b\r\f\013";
400: while( *cp ) lxcp[*cp++ + 1] = &lxdope[3];
401:
402: file_name[0] = src_file_name;
403: // curloc.file = 0; // spurious: curloc is a static
404: curloc.line = 1;
405:
406: ktbl_init();
407: lex_clear();
408: saved = lxtitle();
409: }
410:
411: void lex_clear()
412: {
413: // delete extra buffers:
414: buf* p = bufhead;
415: bufhead = 0;
416: //if (p) {
417: //fprintf(stderr,"lex_clear\n");
418: //bufs=0;
419: //}
420: while (p) {
421: buf* pp = p;
422: p = p->next;
423: pp->next = freebuf;
424: freebuf = pp;
425: }
426:
427: // re-set to static buffer:
428: txtstart = txtfree = inbuf;
429: txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1];
430: }
431:
432: int int_val(char hex)
433: {
434: switch (hex) {
435: case '0': case '1': case '2': case '3': case '4':
436: case '5': case '6': case '7': case '8': case '9':
437: return hex-'0';
438: case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
439: return hex-'a'+10;
440: case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
441: return hex-'A'+10;
442: }
443: }
444:
445: void hex_to_oct()
446: /*
447: \x has been seen on input (in char const or string) and \ printed
448: read the following hexadecimal integer and replace it with an octal
449: */
450: {
451: int i = 0;
452: int c;
453: get(c);
454: if (lxmask[c+1] & LEXHEX) {
455: i = int_val(c);
456: get(c); // try for two
457: if (lxmask[c+1] & LEXHEX) {
458: i = (i<<4) + int_val(c);
459: get(c); // try for three
460: if (lxmask[c+1] & LEXHEX)
461: i = (i<<4) + int_val(c);
462: else
463: unget(c);
464: }
465: else
466: unget(c);
467: }
468: else {
469: error("hexadecimal digitE after \\x");
470: unget(c);
471: }
472:
473: // if (0377 < i) error('l',"hexadecimal constant too large");
474: i &= 0377;
475:
476: pch(('0'+(i>>6)));
477: pch(('0'+((i&070)>>3)));
478: pch(('0'+(i&7)));
479: }
480:
481:
482: char * chconst()
483: /*
484: read a character constant into inbuf
485: */
486: {
487: register c;
488: int nch = 0;
489:
490: pch('\'');
491:
492: for(;;) {
493: char* p;
494: char cc = 0;
495:
496: switch (get(c)) {
497: case '\'':
498: goto ex;
499: case EOF:
500: error("eof in char constant");
501: goto ex;
502: case '\n':
503: error("newline in char constant");
504: goto ex;
505: case '\\':
506: if (SZ_INT == nch++) error('l',"char constant too long");
507: pch(c);
508: switch (get(c)){
509: case '\n':
510: ++curloc.line;
511: default:
512: pch(c);
513: break;
514: case '4': case '5': case '6': case '7': // octal
515: p = txtfree;
516: cc = c-4;
517: case '0': case '1': case '2': case '3':
518: pch(c);
519: get(c); /* try for 2 */
520: if( lxmask[c+1] & LEXDIG && c<'8'){
521: pch(c);
522: get(c); /* try for 3 */
523: if (lxmask[c+1] & LEXDIG && c<'8') {
524: if (cc) *p = cc; // zap high bit
525: pch(c);
526: }
527: else
528: unget(c);
529: }
530: else
531: unget(c);
532: break;
533: case 'x': // hexadecimal
534: hex_to_oct();
535: break;
536: };
537: break;
538: default:
539: if (SZ_INT == nch++) error('l',"char constant too long");
540: pch(c);
541: }
542: }
543: ex:
544: pch('\'');
545: pch('\0');
546: return txtstart;
547: }
548:
549: void lxcom()
550: /* process a "block comment" */
551: {
552: register c;
553:
554: for(;;)
555: switch (get(c)) {
556: case EOF:
557: error('w',"eof in comment");
558: return;
559: case '\n':
560: curloc.line++;
561: // Nline++;
562: break;
563: case '*':
564: if (get(c) == '/') return;
565: unget(c);
566: break;
567: case '/':
568: if (get(c) == '*') error('w',"``/*'' in comment");
569: unget(c);
570: break;
571: }
572: }
573:
574:
575: void linecom()
576: // process a "line comment"
577: {
578: register c;
579:
580: for(;;)
581: switch (get(c)) {
582: case EOF:
583: error('w',"eof in comment");
584: return;
585: case '\n':
586: curloc.line++;
587: // Nline++;
588: saved = lxtitle();
589: return;
590: }
591: }
592:
593: char eat_whitespace()
594: {
595:
596: for(;;) {
597: register c = get(c);
598: lx:
599:
600: switch (c) {
601: case EOF:
602: error('w',"unexpected comment");
603: return EOF;
604: case '/':
605: switch (get(c)) {
606: case '*':
607: lxcom();
608: break;
609: case '/':
610: linecom();
611: break;
612: default:
613: unget(c);
614: return '/';
615: }
616: break;
617: case '\n':
618: ++curloc.line;
619: c = lxtitle();
620: goto lx;
621: case ' ':
622: case '\t':
623: break;
624: default:
625: return c;
626: }
627: }
628: }
629:
630: void get_string()
631: {
632: int lxchar;
633:
634: for(;;)
635: switch (get(lxchar)) {
636: case '\\':
637: pch('\\');
638: switch (get(lxchar)){
639: case '\n':
640: ++curloc.line;
641: default:
642: pch(lxchar);
643: break;
644: case 'x': // hexadecimal
645: hex_to_oct();
646: break;
647: };
648: break;
649: case '"':
650: { char* p = txtstart; // eat_whitespace() moves txtstart
651: if ((lxchar = eat_whitespace()) == '"') {
652: // string catenation, break with
653: // newline to avoid merging characters
654: // (e.g. "\xAB" "C")
655: pch('\\');
656: pch('\n');
657:
658: continue; // eat '\"' and carry on
659: };
660:
661: txtstart = p;
662: unget(lxchar);
663: pch(0);
664: return;
665: }
666: case '\n':
667: error("newline in string");
668: pch(0);
669: return;
670: case EOF:
671: error("eof in string");
672: pch(0);
673: return;
674: default:
675: pch(lxchar);
676: }
677: }
678:
679: TOK tlex()
680: {
681: TOK ret;
682: Pname n;
683:
684: // Ntoken++;
685:
686: for(;;) {
687: register lxchar;
688: register struct LXDOPE *p;
689:
690: start_txt();
691:
692: if (saved) {
693: lxchar = saved;
694: saved = 0;
695: }
696: else
697: get(lxchar);
698:
699: if (lxchar+1 >= CSSZ )
700: error( "illegal input character enountered: %d", lxchar );
701:
702: switch( (p=lxcp[lxchar+1])->lxact ){
703:
704: case A_1C: // eat up a single character, and return an opcode
705: reti(p->lxtok,p->lxtok);
706:
707: case A_EOF:
708: if (br_level || bl_level+lcount)
709: error("'%s' missing at end of input",(bl_level+lcount) ? "}" : ")");
710:
711: reti(EOFTOK,0);
712:
713: case A_SHARP:
714: // cope with header file not ended with '\n'
715: unget('#');
716: saved = lxtitle();
717: continue;
718:
719: case A_ERR:
720: { if (' '<=lxchar && lxchar<='~') // ASCII printable
721: error("illegal character '%c' (ignored)",lxchar);
722: else
723: error("illegal character '0%o' (ignored)",lxchar);
724: continue;
725: }
726: case A_LET: // collect an identifier and check for keyword
727: {
728: char ll;
729: switch (ll = lxchar) {
730: // case 'l':
731: case 'L':
732: switch (get(lxchar)) {
733: case '\'':
734: error('s',"wide character constant");
735: unget(lxchar);
736: continue;
737: case '"':
738: error('s',"wide character string");
739: unget(lxchar);
740: continue;
741: }
742: unget(lxchar);
743: lxchar = ll;
744: }
745: }
746: lxget( lxchar, LEXLET|LEXDIG );
747:
748: //error( 'd', "lex: bl_level: %d txtstart %s", bl_level, txtstart);
749: // local class
750: if (n = ktbl->look(txtstart,0)) {
751: TOK x;
752: del_txt();
753: switch (x=n->base) {
754: case TNAME:
755: //('d',"lex tname %n",n);
756: if (bl_level > 1) {
757: Pname nn = ktbl->look(txtstart,LOCAL);
758: if ( nn ) {
759: n = nn;
760: //error( 'd', "lex: local class instance: %n", nn );
761: }
762: }
763: retn(TNAME,n);
764: case LOC:
765: retl(n->syn_class);
766: case EXTERN:
767: if ((lxchar = eat_whitespace()) == '\"') {
768: // linkage directive
769: get_string();
770: rets(LINKAGE,txtstart);
771: }
772: unget(lxchar);
773: reti(TYPE,EXTERN);
774: case CATCH:
775: case TEMPLATE:
776: error('s',"%k",n->syn_class);
777: continue;
778: default:
779: #ifdef DK
780: if (get(lxchar) == '=')
781: switch (x) {
782: case OR: reti(ASOP,ASOR);
783: case ER: reti(ASOP,ASER);
784: case AND: reti(ASOP,ASAND);
785: }
786: saved = lxchar;
787:
788: #endif
789: reti(n->syn_class,x);
790: }
791: }
792: else
793: // local class
794: if ( bl_level &&
795: (n=ktbl->look(txtstart,LOCAL)) )
796: {
797: //error( 'd', "lex2: local class instance: %n", n );
798: retn(TNAME,n);
799: }
800: else
801: rets(ID,txtstart);
802:
803: case A_DIG:
804:
805: ret = ICON;
806:
807: if (lxchar=='0') { /* octal or hexadecimal number */
808: pch('0');
809: switch (get(lxchar)) {
810: case 'l':
811: case 'L':
812: pch('L');
813: pch(0);
814: rets(ICON,txtstart);
815: case 'e':
816: case 'E':
817: // lxget(lxchar,LEXDIG);
818: // goto getfp;
819: goto getfp2;
820: case 'x':
821: case 'X':
822: lxget('X',LEXHEX);
823: if (txtfree-txtstart<4) // minimum "0Xd\0"
824: error("hexadecimal digitX after \"0x\"");
825: switch (get(lxchar)) {
826: case 'l':
827: case 'L':
828: txtfree--;
829: pch('L');
830: pch(0);
831: break;
832: default:
833: saved = lxchar;
834: }
835: rets(ICON,txtstart);
836: case '8':
837: case '9':
838: {error("%c used as octal digit",lxchar);}
839: case '0':
840: case '1':
841: case '2':
842: case '3':
843: case '4':
844: case '5':
845: case '6':
846: case '7':
847: pch(lxchar);
848: ox:
849: switch (get(lxchar)) {
850: case '8':
851: case '9':
852: {error("%c used as octal digit",lxchar);}
853: case '0':
854: case '1':
855: case '2':
856: case '3':
857: case '4':
858: case '5':
859: case '6':
860: case '7':
861: pch(lxchar);
862: goto ox;
863: case 'l':
864: case 'L':
865: pch('L');
866: pch(0);
867: break;
868: default:
869: pch(0);
870: saved = lxchar;
871: }
872: rets(ICON,txtstart);
873: case '.':
874: lxget('.',LEXDIG);
875: goto getfp;
876: default:
877: saved = lxchar;
878: reti(ZERO,0);
879: }
880: }
881: else
882: lxget(lxchar,LEXDIG);
883:
884: if (get(lxchar) == '.') {
885: txtfree--;
886: lxget('.', LEXDIG );
887: getfp:
888: ret = FCON;
889: get(lxchar);
890: };
891:
892: switch (lxchar) {
893: case 'f':
894: case 'F':
895: txtfree--;
896: pch('F');
897: break;
898: case 'e':
899: case 'E':
900: txtfree--;
901: switch (get(lxchar)) {
902: case '-':
903: case '+':
904: pch('e');
905: break;
906: default:
907: unget(lxchar);
908: lxchar = 'e';
909: };
910: getfp2:
911: lxget( lxchar, LEXDIG );
912: ret = FCON;
913: break;
914: case 'u':
915: case 'U':
916: if (ret==FCON) error("%c suffix for floating constant",lxchar);
917: case 'l':
918: case 'L':
919: txtfree--;
920: pch(lxchar);
921: switch (get(lxchar)) { // ul, Lu, ets.
922: case 'l':
923: case 'L':
924: case 'u':
925: case 'U':
926: pch(lxchar);
927: break;
928: default:
929: saved = lxchar;
930: }
931: break;
932: default:
933: saved = lxchar;
934: };
935:
936: pch(0);
937: rets(ret,txtstart);
938:
939: case A_DOT:
940: /* if (get(lxchar) == '.') { // look for ellipsis
941: if (get(lxchar) != '.') {
942: error("token .. ?");
943: saved = lxchar;
944: }
945: reti(ELLIPSIS,0);
946: }
947: */
948: switch (get(lxchar)) {
949: case '.': // look for ellipsis
950: if (get(lxchar) != '.') {
951: error("token .. ?");
952: saved = lxchar;
953: }
954: reti(ELLIPSIS,0);
955: case '*':
956: reti (REFMUL,DOT);
957: }
958:
959: if (lxmask[lxchar+1] & LEXDIG){// look for floating constant
960: unget(lxchar);
961: lxget( '.', LEXDIG );
962: goto getfp;
963: }
964: saved = lxchar;
965: reti(DOT,0);
966:
967: case A_STR:
968: /* save string constant in buffer */
969: get_string();
970: rets(STRING,txtstart);
971:
972: case A_CC:
973: /* character constant */
974: rets(CCON,chconst());
975:
976: case A_BCD:
977: {
978: register i;
979: int j;
980:
981: pch('`');
982:
983: for (i=0; i<7; ++i) {
984: pch(get(j));
985: if (j == '`' ) break;
986: }
987: pch(0);
988: if (6<i)
989: error('l',"bcd constant exceeds 6 characters" );
990: rets(CCON,txtstart);
991: }
992:
993: case A_SL: /* / */
994: switch (get(lxchar)) {
995: case '*':
996: lxcom();
997: break;
998: case '/':
999: linecom();
1000: break;
1001: case '=':
1002: reti(ASOP,ASDIV);
1003: default:
1004: saved = lxchar;
1005: reti(DIVOP,DIV);
1006: }
1007:
1008: case A_WS:
1009: continue;
1010:
1011: case A_NL:
1012: ++curloc.line;
1013: // Nline++;
1014: saved = lxtitle();
1015: continue;
1016:
1017: case A_LC:
1018: #ifdef DK
1019: alc:
1020: #endif
1021: if (BLMAX <= bl_level++) {
1022: error('l',"blocks too deeply nested");
1023: ext(3);
1024: }
1025: retl(LC);
1026:
1027: case A_RC:
1028: #ifdef DK
1029: arc:
1030: #endif
1031: if (lcount+bl_level-- <= 0) {
1032: error("unexpected '}'");
1033: bl_level = 0;
1034: }
1035: retl(RC);
1036:
1037: case A_L:
1038: #ifdef DK
1039: if (get(lxchar) == ':') // (# is {
1040: goto alc;
1041: else
1042: saved = lxchar;
1043: #endif
1044: br_level++;
1045: reti(LP,0);
1046:
1047: case A_R:
1048: if (br_level-- <= 0) {
1049: error("unexpected ')'");
1050: br_level = 0;
1051: }
1052: reti(RP,0);
1053:
1054: case A_ASS:
1055: switch (get(lxchar)) {
1056: case '=':
1057: reti(EQUOP,EQ);
1058: default:
1059: saved = lxchar;
1060: reti(ASSIGN,ASSIGN);
1061: }
1062:
1063: case A_COL:
1064: switch (get(lxchar)) {
1065: case ':':
1066: reti(MEM,0);
1067: case '=':
1068: error("':=' is not a c++ operator");
1069: reti(ASSIGN,ASSIGN);
1070: #ifdef DK
1071: if (get(lxchar)==')') goto arc; // :) is }
1072: unget(lxchar);
1073: #endif
1074: default:
1075: saved = lxchar;
1076: reti(COLON,COLON);
1077: }
1078: case A_NOT:
1079: switch (get(lxchar)) {
1080: case '=':
1081: reti(EQUOP,NE);
1082: default:
1083: saved = lxchar;
1084: reti(NOT,NOT);
1085: }
1086: case A_GT:
1087: switch(get(lxchar)) {
1088: case '>':
1089: switch (get(lxchar)) {
1090: case '=':
1091: reti(ASOP,ASRS);
1092: break;
1093: default:
1094: saved = lxchar;
1095: reti(SHIFTOP,RS);
1096: }
1097: case '=':
1098: reti(RELOP,GE);
1099: default:
1100: saved = lxchar;
1101: reti(RELOP,GT);
1102: }
1103: case A_LT:
1104: switch (get(lxchar)) {
1105: case '<':
1106: switch (get(lxchar)) {
1107: case '=':
1108: reti(ASOP,ASLS);
1109: default:
1110: saved = lxchar;
1111: reti(SHIFTOP,LS);
1112: }
1113: case '=':
1114: reti(RELOP,LE);
1115: default:
1116: saved = lxchar;
1117: reti(RELOP,LT);
1118: }
1119: case A_AND:
1120: switch (get(lxchar)) {
1121: case '&':
1122: reti(ANDAND,ANDAND);
1123: case '=':
1124: reti(ASOP,ASAND);
1125: default:
1126: saved = lxchar;
1127: reti(AND,AND);
1128: }
1129: case A_OR:
1130: switch (get(lxchar)) {
1131: case '|':
1132: reti(OROR,OROR);
1133: case '=':
1134: reti(ASOP,ASOR);
1135: default:
1136: saved = lxchar;
1137: reti(OR,OR);
1138: }
1139: case A_ER:
1140: switch (get(lxchar)) {
1141: case '=':
1142: reti(ASOP,ASER);
1143: default:
1144: saved = lxchar;
1145: reti(ER,ER);
1146: }
1147: case A_PL:
1148: switch (get(lxchar)) {
1149: case '=':
1150: reti(ASOP,ASPLUS);
1151: case '+':
1152: reti(ICOP,INCR);
1153: default:
1154: saved = lxchar;
1155: reti(PLUS,PLUS);
1156: }
1157: case A_MIN:
1158: switch (get(lxchar)) {
1159: case '=':
1160: reti(ASOP,ASMINUS);
1161: case '-':
1162: reti(ICOP,DECR);
1163: case '>':
1164: if (get(lxchar) == '*')
1165: {reti(REFMUL,REF);}
1166: else
1167: saved = lxchar;
1168: reti(REF,REF);
1169: default:
1170: saved = lxchar;
1171: reti(MINUS,MINUS);
1172: }
1173: case A_MUL:
1174: switch (get(lxchar)) {
1175: case '=':
1176: reti(ASOP,ASMUL);
1177: case '/':
1178: error('w',"*/ not as end of comment");
1179: default:
1180: saved = lxchar;
1181: reti(MUL,MUL);
1182: }
1183: case A_MOD:
1184: switch (get(lxchar)) {
1185: case '=':
1186: reti(ASOP,ASMOD);
1187: default:
1188: saved = lxchar;
1189: reti(DIVOP,MOD);
1190: }
1191: default:
1192: {error('i',"lex act==%d getc()->%d",p,lxchar);}
1193:
1194: }
1195:
1196: error('i',"lex, main switch");
1197: }
1198:
1199: }
1200:
1201: int lxtitle()
1202: /*
1203: called after a newline; set linenumber and file name
1204: */
1205: {
1206: register c;
1207:
1208: for(;;)
1209: switch ( get(c) ) {
1210: default: // e.g. not '\n', not '#'
1211: return c;
1212: case '\n':
1213: curloc.line++;
1214: // Nline++;
1215: ll:
1216: break;
1217: case '#': /* # lineno "filename" */
1218: { int cl = curloc.line;
1219: curloc.line = 0;
1220: for(;;)
1221: switch (get(c)) {
1222: case '"':
1223: start_txt();
1224: for(;;)
1225: switch (get(c)) {
1226: case '"':
1227: pch('\0');
1228:
1229: while (get(c) != '\n') ; // skip to eol.. ignore anything more
1230:
1231: if (*txtstart) { // stack file name
1232: char* fn;
1233: if (curr_file == 0){
1234: if (( fn = file_name[0])
1235: && (strcmp(txtstart,fn)!=0)){ // 1st include
1236: if (MAXFILE*4<++Nfile) error('i',"fileN buffer overflow");
1237: if (MAXFILE<++curr_file) error('i',"fileN stack overflow");
1238: file_stack[curr_file] = Nfile;
1239:
1240: char* p1 = new char[txtfree-txtstart];
1241: (void) strcpy(p1,txtstart);
1242: file_name[Nfile] = p1;
1243: // Nstr++;
1244: }
1245: else { //&& line is dummy #line "input.c"
1246: // ignore
1247: }
1248: //&& dead, dead, dead goto push;
1249: }
1250: else if ( (fn=file_name[file_stack[curr_file]])
1251: && (strcmp(txtstart,fn)==0) ) {
1252: //new line, same file: ignore
1253: }
1254: else if ( (fn=file_name[file_stack[curr_file-1]])
1255: && (strcmp(txtstart,fn)==0) ) {
1256: // previous file: pop
1257: curr_file--;
1258: }
1259: else { // new file name: push
1260: //&& push:
1261: if (MAXFILE*4<Nfile++) error('i',"fileN buffer overflow");
1262: if (MAXFILE<curr_file++) error('i',"fileN stack overflow");
1263: file_stack[curr_file] = Nfile;
1264:
1265: char* p = new char[txtfree-txtstart];
1266: (void) strcpy(p,txtstart);
1267: file_name[Nfile] = p;
1268: // Nstr++;
1269: }
1270: }
1271: else { // no name .. back to the original .c file: ""
1272: curr_file = 0;
1273: }
1274: del_txt();
1275: curloc.file = file_stack[curr_file];
1276: goto ll;
1277: case '\n':
1278: error("unexpected end of line on '# line'");
1279: default:
1280: pch(c);
1281: }
1282: case ' ':
1283: break;
1284:
1285: case '0':
1286: case '1':
1287: case '2':
1288: case '3':
1289: case '4':
1290: case '5':
1291: case '6':
1292: case '7':
1293: case '8':
1294: case '9':
1295: curloc.line = curloc.line*10+c-'0';
1296: break;
1297:
1298: case 'l': // look for "#line ..." and then ignore "line"
1299: if (get(c)=='i' && get(c)=='n' && get(c)=='e') break;
1300: case '\n':
1301: curloc.putline();
1302: goto ll;
1303:
1304: default: // detect #pragma
1305: // pass #rubbish through
1306: { char* p = txtstart+1;
1307: curloc.line = cl;
1308: pch('#');
1309: pch(c);
1310: while (get(c) != '\n') pch(c);
1311: pch('\0');
1312: if (strncmp(p,"pragma",6)==0) {
1313: p += 6;
1314: while (*p==' ' || *p=='\t') p++;
1315: if (strncmp(p,"linkage",7)==0) {
1316: if (bl_level) error("linkage pragma inside block");
1317: p += 7;
1318: while (*p==' ' || *p=='\t') p++;
1319: set_linkage(p);
1320: }
1321: else
1322: fprintf(out_file,"\n%s\n",txtstart);
1323: }
1324: else
1325: fprintf(out_file,"\n%s\n",txtstart);
1326:
1327: // fprintf(out_file,"\n%s\n",txtstart);
1328: start_txt();
1329: curloc.line++;
1330: // Nline++;
1331: goto ll;
1332: }
1333: }
1334: }
1335: }
1336: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.