|
|
1.1 root 1: /*ident "@(#)ctrans:src/lex.c 1.3.4.17" */
2: /***************************************************************************
3:
4: C++ source for cfront, the C++ compiler front-end
5: written in the computer science research center of Bell Labs
6:
7: Copyright (c) 1984 AT&T, Inc. All Rights Reserved
8: THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T, INC.
9:
10: lex.c:
11: lexical analyser based on pcc's and cpre's scanners
12: modified to handle classes:
13: new keywords: class
14: public
15: call
16: etc.
17: names are not entered in the symbol table by lex()
18: names can be of arbitrary length
19: error() is used to report errors
20: {} and () must match
21: numeric constants are not converted into internal representation
22: but stored as strings
23:
24: ****************************************************************************/
25:
26: #include "cfront.h"
27: #include "yystype.h"
28: #include "size.h"
29: #include "tqueue.h"
30: #include "template.h"
31:
32: # define CCTRANS(x) x
33:
34: #define copy_if_need_be(s) ((templp->in_progress || templp->parameters_in_progress) ? strdup(s) : (s))
35:
36: /* lexical actions */
37:
38: #define A_ERR 0 /* illegal character */
39: #define A_LET 1 /* saw a letter */
40: #define A_DIG 2 /* saw a digit */
41: #define A_1C 3 /* return a single character */
42: #define A_STR 4 /* string */
43: #define A_CC 5 /* character constant */
44: #define A_BCD 6 /* GCOS BCD constant */
45: #define A_SL 7 /* saw a / */
46: #define A_DOT 8 /* saw a . */
47: #define A_2C 9 /* possible two character symbol */
48: #define A_WS 10 /* whitespace (not \n) */
49: #define A_NL 11 /* \n */
50: #define A_LC 12 /* { */
51: #define A_RC 13 /* } */
52: #define A_L 14 /* ( */
53: #define A_R 15 /* ) */
54: #define A_EOF 16
55: #define A_ASS 17
56: #define A_LT 18
57: #define A_GT 19 /* > */
58: #define A_ER 20
59: #define A_OR 21
60: #define A_AND 22
61: #define A_MOD 23
62: #define A_NOT 24
63: #define A_MIN 25
64: #define A_MUL 26
65: #define A_PL 27
66: #define A_COL 28 /* : */
67: #define A_SHARP 29 /* # */
68: #define A_DOLL 30 /* $ */
69:
70: /* character classes */
71:
72: # define LEXLET 01
73: # define LEXDIG 02
74: /* no LEXOCT because 8 and 9 used to be octal digits */
75: # define LEXHEX 010
76: # define LEXWS 020
77: # define LEXDOT 040
78:
79: const FIRSTCHUNK = 8*1024-8;
80: const BUFCHUNK = 4*1024-8;
81:
82: /* text buffer */
83: static char inbuf[FIRSTCHUNK/*TBUFSZ*/];
84: char* txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1];
85: char* txtstart = 0;
86: char* txtfree = 0;
87:
88: static struct buf* bufhead;
89: static buf* freebuf;
90: //static bufs;
91:
92: struct buf {
93: buf* next;
94: char chars[BUFCHUNK];
95: // buf() { next=bufhead; bufhead=this; }
96: };
97:
98: new_buf(char c)
99: {
100: //fprintf(stderr,"new_buf %d\n",bufs++);
101: buf* pbuf;
102: if (freebuf) {
103: pbuf = freebuf;
104: freebuf = freebuf->next;
105: }
106: else
107: pbuf = new buf; // allocate and register new chunk
108: pbuf->next = bufhead;
109: bufhead = pbuf;
110:
111: if (BUFCHUNK < txtmax-txtstart) error('l',"lexical token too long");
112:
113: // copy current token:
114: char* p = txtstart;
115: txtstart = txtfree = &pbuf->chars[0];
116: while (p<txtmax) *txtfree++ = *p++;
117: *txtfree++=c;
118: txtmax = &pbuf->chars[BUFCHUNK-1];
119: return 0;
120: }
121:
122:
123: #define pch(c) ((txtmax<=txtfree)?new_buf(c):(*txtfree++=c))
124: #define start_txt() txtstart = txtfree
125: #define del_txt() txtfree = txtstart
126:
127: /*static*/ char* file_name[MAXFILE*4]; // source file names
128: // file_name[0] == src_file_name
129: // file_name[0] == 0 means stdin
130: static short file_stack[MAXFILE]; // stack of file name indices
131: int curr_file; // current index in file_stack
132: // that is current #include nest level
133:
134: int linkage; // linkage is default C++
135: // linkage==0 => C++ linkage
136: // linkage==1 => C linkage
137: const LINKMAX = 10;
138: static lvec[LINKMAX];
139: int lcount;
140:
141: void pragma_set_linkage (char *p)
142: {
143: if (bl_level) error("linkage pragma inside block");
144: set_linkage(p);
145: }
146:
147:
148: void set_linkage(char* p)
149: {
150: if (p==0 || *p == 0) { // resume previous linkage
151: if (lcount) linkage = lvec[--lcount];
152: }
153: else {
154: if (LINKMAX<=++lcount)
155: error('l',"linkage directive nested too deep");
156: if (strcmp(p,"C")==0)
157: lvec[lcount] = linkage = 1;
158: else if (strcmp(p,"C++")==0)
159: lvec[lcount] = linkage = 0;
160: else
161: error("%s linkage",p);
162: }
163: }
164:
165: class loc curloc;
166: FILE * out_file = stdout;
167: FILE * in_file = stdin;
168: Ptable ktbl;
169: Ptable keyword_table;
170: int br_level = 0; /* number of unmatched ``(''s */
171: int bl_level = 0; /* number of unmatched ``{''s */
172:
173: # ifdef ibm
174:
175: # define CSMASK 0377
176: # define CSSZ 256
177:
178: # else
179:
180: # define CSMASK 0177
181: # define CSSZ 128
182:
183: # endif
184:
185: static short lxmask[CSSZ+1];
186:
187: int saved = 0; /* putback character, avoid ungetchar */
188: static int lxtitle();
189:
190: inline YYSTYPE rt(char* x) { YYSTYPE y; y.s = x; return y; }
191: inline YYSTYPE rt(int x) { YYSTYPE y; y.t = x; return y; }
192: inline YYSTYPE rt(loc x) { YYSTYPE y; y.l = x; return y; }
193: inline YYSTYPE rt(void* x) { YYSTYPE y; y.pn = Pname(x); return y; }
194:
195: #define get(c) (c=getc(in_file))
196: #define unget(c) (ungetc(c,in_file))
197:
198: #define reti(a,b) { addtok(a, rt(b)); return a; }
199: #define retn(a,b) { addtok(a, rt((Pnode)b)); return a; }
200: #define rets(a,b) { addtok(a, rt(b)); return a; }
201: #define retl(a) { addtok(a, rt(curloc)); return a; }
202:
203: void new_key(char* s, TOK toknum, TOK yyclass)
204: /*
205: make "s" a new keyword with the representation (token) "toknum"
206: "yyclass" is the yacc token (for example new_key("int",INT,TYPE); )
207: "yyclass==0" means yyclass=toknum;
208: */
209: {
210: Pname n = new name(s);
211:
212: keys[(toknum==LOC)?yyclass:toknum] = s;
213: n = new name(s);
214: Pname nn = keyword_table->insert(n,0);
215: if (Nold) error('i',"keyword %sD twice",s);
216: nn->base = toknum;
217: nn->syn_class = (yyclass) ? yyclass : toknum;
218: delete n;
219: }
220:
221: void ktbl_init()
222: /*
223: enter keywords into keyword table for use by lex()
224: and into keyword representation table used for output
225: */
226: {
227: /* The ktbl is only for types. We put nothing in it. */
228: ktbl = new table(KTBLSIZE,0,0);
229:
230: /* The keyword_table is for the reserved words */
231: keyword_table = new table(67,0,0);
232:
233: new_key("asm",ASM,0);
234: new_key("auto",AUTO,TYPE);
235: new_key("break",LOC,BREAK);
236: new_key("case",LOC,CASE);
237: new_key("continue",LOC,CONTINUE);
238: new_key("char",CHAR,TYPE);
239: new_key("do",LOC,DO);
240: new_key("double",DOUBLE,TYPE);
241: new_key("default",LOC,DEFAULT);
242: new_key("enum",ENUM,0);
243: new_key("else",LOC,ELSE);
244: new_key("extern",EXTERN,TYPE);
245: new_key("float",FLOAT,TYPE);
246: new_key("for",LOC,FOR);
247: // new_key("fortran",FORTRAN,0);
248: new_key("goto",LOC,GOTO);
249: new_key("catch",CATCH,CATCH);
250: new_key("if",LOC,IF);
251: new_key("int",INT,TYPE);
252: new_key("long",LONG,TYPE);
253: new_key("return",LOC,RETURN);
254: new_key("register",REGISTER,TYPE);
255: new_key("static",STATIC,TYPE);
256: new_key("struct",STRUCT,AGGR);
257: new_key("sizeof",SIZEOF,0);
258: new_key("short",SHORT,TYPE);
259: new_key("switch",LOC,SWITCH);
260: new_key("typedef",TYPEDEF,TYPE);
261: new_key("unsigned",UNSIGNED,TYPE);
262: new_key("union",UNION,AGGR);
263: new_key("void",VOID,TYPE);
264: new_key("while",LOC,WHILE);
265:
266: new_key("class",CLASS,AGGR);
267: new_key("const",CONST,TYPE);
268: new_key("delete",LOC,DELETE);
269: new_key("friend",FRIEND,TYPE);
270: new_key("inline",INLINE,TYPE);
271: new_key("new",NEW,0);
272: new_key("operator",OPERATOR,0);
273: new_key("overload",OVERLOAD,TYPE);
274: new_key("private",PRIVATE,PR);
275: new_key("protected",PROTECTED,PR);
276: new_key("public",PUBLIC,PR);
277: new_key("signed",SIGNED,TYPE);
278: new_key("template",TEMPLATE,0);
279: new_key("this",THIS,0);
280: new_key("virtual",VIRTUAL,TYPE);
281: new_key("volatile",VOLATILE,TYPE);
282:
283: new_key("__statement", STATEMENT, 0) ;
284: new_key("__expression", EXPRESSION, 0) ;
285: new_key("__template_test", TEMPLATE_TEST, 0) ;
286: #ifdef DK
287: new_key("or",OR,0);
288: new_key("cor",OROR,0);
289: new_key("and",AND,0);
290: new_key("cand",ANDAND,0);
291: new_key("xor",ER,0);
292: new_key("compl",COMPL,0);
293: #endif
294: }
295:
296: extern char* src_file_name;
297: extern char* line_format;
298: loc last_line;
299:
300: void loc::putline()
301: {
302: if (file==0 && line==0) return;
303: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4
304: // if (0<=file && file<MAXFILE) {
305: if ( 0<=file && file <= Nfile ) {
306: char* f = file_name[file];
307: if (f==0) f = (src_file_name) ? src_file_name : "";
308: fprintf(out_file,line_format,line,f);
309: last_line = *this;
310: }
311: }
312:
313: void loc::put(FILE* p)
314: {
315: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4
316: // if (0<=file && file<MAXFILE) {
317: if ( 0<=file && file <= Nfile ) {
318: char* f = file_name[file];
319: if (f==0) f = (src_file_name) ? src_file_name : "";
320: fprintf(p,"\"%s\", line %d: ",f,line);
321: }
322: }
323:
324: void lxenter(register char* s, short m)
325: /* enter a mask into lxmask */
326: {
327: register c;
328:
329: while( c= *s++ ) lxmask[c+1] |= m;
330:
331: }
332:
333:
334: void lxget(register c, register m)
335: /*
336: put 'c' back then scan for members of character class 'm'
337: terminate the string read with \0
338: txtfree points to the character position after that \0
339: */
340: {
341: pch(c);
342: while ( (get(c), lxmask[c+1]&m) ) pch(c);
343: unget(c);
344: pch('\0');
345: }
346:
347: struct LXDOPE {
348: short lxch; /* the character */
349: short lxact; /* the action to be performed */
350: TOK lxtok; /* the token number to be returned */
351: } lxdope[] = {
352: #ifdef apollo
353: '@', A_ERR, 0, /* illegal characters go here... */
354: #else
355: '$', A_DOLL, 0,
356: // '$', A_ERR, 0, /* illegal characters go here... */
357:
358: #endif
359: '_', A_LET, 0, /* letters point here */
360: '0', A_DIG, 0, /* digits point here */
361: ' ', A_WS, 0, /* whitespace goes here */
362: '\n', A_NL, 0,
363: '"', A_STR, 0, /* character string */
364: '\'', A_CC, 0, /* ASCII character constant */
365: '`', A_BCD, 0, /* 'foreign' character constant, e.g. BCD */
366: '(', A_L, LP,
367: ')', A_R, RP,
368: '{', A_LC, LC,
369: '}', A_RC, RC,
370: ']', A_1C, RB,
371: '[', A_1C, LB,
372: '*', A_MUL, MUL,
373: '?', A_1C, QUEST,
374: ':', A_COL, COLON,
375: '+', A_PL, PLUS,
376: '-', A_MIN, MINUS,
377: '/', A_SL, DIV,
378: '%', A_MOD, MOD,
379: '&', A_AND, AND,
380: '|', A_OR, OR,
381: '^', A_ER, ER,
382: '!', A_NOT, NOT,
383: '~', A_1C, COMPL,
384: ',', A_1C, CM,
385: ';', A_1C, SM,
386: '.', A_DOT, DOT,
387: '<', A_LT, LT,
388: '>', A_GT, GT,
389: '=', A_ASS, ASSIGN,
390: '#', A_SHARP, 0,
391: EOF, A_EOF, EOFTOK
392: };
393: /* note: EOF is used as sentinel, so must be <=0 and last entry in table */
394:
395: static struct LXDOPE *lxcp[CSSZ+1];
396:
397: void lex_init()
398: {
399: register struct LXDOPE *p;
400: register i;
401: register char *cp;
402: /* set up character classes */
403:
404: /* first clear lexmask */
405: for(i=0; i<=CSSZ; i++) lxmask[i] = 0;
406:
407: #ifdef apollo
408: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$", LEXLET );
409: #else
410: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", LEXLET );
411: #endif
412: lxenter( "0123456789", LEXDIG );
413: lxenter( "0123456789abcdefABCDEF", LEXHEX );
414: /* \013 should become \v someday; \013 is OK for ASCII and EBCDIC */
415: lxenter( " \t\r\b\f\013", LEXWS );
416: lxmask['.'+1] |= LEXDOT;
417:
418: /* make lxcp point to appropriate lxdope entry for each character */
419:
420: /* initialize error entries */
421:
422: for( i= 0; i<=CSSZ; ++i ) lxcp[i] = lxdope;
423:
424: /* make unique entries */
425:
426: for( p=lxdope; ; ++p ) {
427: lxcp[p->lxch+1] = p;
428: if( p->lxch < 0 ) break;
429: }
430:
431: /* handle letters, digits, and whitespace */
432: /* by convention, first, second, and third places */
433:
434: cp = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
435: while( *cp ) lxcp[*cp++ + 1] = &lxdope[1];
436: cp = "123456789";
437: while( *cp ) lxcp[*cp++ + 1] = &lxdope[2];
438: cp = "\t\b\r\f\013";
439: while( *cp ) lxcp[*cp++ + 1] = &lxdope[3];
440:
441: file_name[0] = src_file_name;
442: // curloc.file = 0; // spurious: curloc is a static
443: curloc.line = 1;
444:
445: ktbl_init();
446: lex_clear();
447: saved = lxtitle();
448: }
449:
450: void lex_clear()
451: {
452: // delete extra buffers:
453: buf* p = bufhead;
454: bufhead = 0;
455: //if (p) {
456: //fprintf(stderr,"lex_clear\n");
457: //bufs=0;
458: //}
459: while (p) {
460: buf* pp = p;
461: p = p->next;
462: pp->next = freebuf;
463: freebuf = pp;
464: }
465:
466: // re-set to static buffer:
467: txtstart = txtfree = inbuf;
468: txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1];
469: }
470:
471: int int_val(char hex)
472: {
473: switch (hex) {
474: case '0': case '1': case '2': case '3': case '4':
475: case '5': case '6': case '7': case '8': case '9':
476: return hex-'0';
477: case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
478: return hex-'a'+10;
479: case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
480: return hex-'A'+10;
481: }
482: }
483:
484: void hex_to_oct()
485: /*
486: \x has been seen on input (in char const or string) and \ printed
487: read the following hexadecimal integer and replace it with an octal
488: */
489: {
490: int i = 0;
491: int c;
492: get(c);
493: if (lxmask[c+1] & LEXHEX) {
494: i = int_val(c);
495: get(c); // try for two
496: if (lxmask[c+1] & LEXHEX) {
497: i = (i<<4) + int_val(c);
498: get(c); // try for three
499: if (lxmask[c+1] & LEXHEX)
500: i = (i<<4) + int_val(c);
501: else
502: unget(c);
503: }
504: else
505: unget(c);
506: }
507: else {
508: error("hexadecimal digitE after \\x");
509: unget(c);
510: }
511:
512: // if (0377 < i) error('l',"hexadecimal constant too large");
513: i &= 0377;
514:
515: pch(('0'+(i>>6)));
516: pch(('0'+((i&070)>>3)));
517: pch(('0'+(i&7)));
518: }
519:
520:
521: char * chconst()
522: /*
523: read a character constant into inbuf
524: */
525: {
526: register c;
527: int nch = 0;
528:
529: pch('\'');
530:
531: for(;;) {
532: char* p;
533: char cc = 0;
534:
535: switch (get(c)) {
536: case '\'':
537: goto ex;
538: case EOF:
539: error("eof in char constant");
540: goto ex;
541: case '\n':
542: error("newline in char constant");
543: goto ex;
544: case '\\':
545: if (SZ_INT == nch++) error('l',"char constant too long");
546: pch(c);
547: switch (get(c)){
548: case '\n':
549: ++curloc.line;
550: default:
551: pch(c);
552: break;
553: case '4': case '5': case '6': case '7': // octal
554: p = txtfree;
555: cc = c-4;
556: case '0': case '1': case '2': case '3':
557: pch(c);
558: get(c); /* try for 2 */
559: if( lxmask[c+1] & LEXDIG && c<'8'){
560: pch(c);
561: get(c); /* try for 3 */
562: if (lxmask[c+1] & LEXDIG && c<'8') {
563: if (cc) *p = cc; // zap high bit
564: pch(c);
565: }
566: else
567: unget(c);
568: }
569: else
570: unget(c);
571: break;
572: case 'x': // hexadecimal
573: hex_to_oct();
574: break;
575: };
576: break;
577: default:
578: if (SZ_INT == nch++) error('l',"char constant too long");
579: pch(c);
580: }
581: }
582: ex:
583: pch('\'');
584: pch('\0');
585: return txtstart;
586: }
587:
588: void lxcom()
589: /* process a "block comment" */
590: {
591: register c;
592:
593: for(;;)
594: switch (get(c)) {
595: case EOF:
596: error('w',"eof in comment");
597: return;
598: case '\n':
599: curloc.line++;
600: // Nline++;
601: break;
602: case '*':
603: if (get(c) == '/') return;
604: unget(c);
605: break;
606: case '/':
607: if (get(c) == '*') error('w',"``/*'' in comment");
608: unget(c);
609: break;
610: }
611: }
612:
613:
614: void linecom()
615: // process a "line comment"
616: {
617: register c;
618:
619: for(;;)
620: switch (get(c)) {
621: case EOF:
622: error('w',"eof in comment");
623: return;
624: case '\n':
625: curloc.line++;
626: // Nline++;
627: saved = lxtitle();
628: return;
629: }
630: }
631:
632: char eat_whitespace()
633: {
634:
635: for(;;) {
636: register c = get(c);
637: lx:
638:
639: switch (c) {
640: case EOF:
641: error('w',"unexpected comment");
642: return EOF;
643: case '/':
644: switch (get(c)) {
645: case '*':
646: lxcom();
647: break;
648: case '/':
649: linecom();
650: break;
651: default:
652: unget(c);
653: return '/';
654: }
655: break;
656: case '\n':
657: ++curloc.line;
658: c = lxtitle();
659: goto lx;
660: case ' ':
661: case '\t':
662: break;
663: default:
664: return c;
665: }
666: }
667: }
668:
669: void get_string()
670: {
671: int lxchar;
672:
673: for(;;)
674: switch (get(lxchar)) {
675: case '\\':
676: pch('\\');
677: switch (get(lxchar)){
678: case '\n':
679: ++curloc.line;
680: default:
681: pch(lxchar);
682: break;
683: case 'x': // hexadecimal
684: hex_to_oct();
685: break;
686: };
687: break;
688: case '"':
689: { char* p = txtstart; // eat_whitespace() moves txtstart
690: if ((lxchar = eat_whitespace()) == '"') {
691: // string catenation, break with
692: // newline to avoid merging characters
693: // (e.g. "\xAB" "C")
694: pch('\\');
695: pch('\n');
696:
697: continue; // eat '\"' and carry on
698: };
699:
700: txtstart = p;
701: unget(lxchar);
702: pch(0);
703: return;
704: }
705: case '\n':
706: error("newline in string");
707: pch(0);
708: return;
709: case EOF:
710: error("eof in string");
711: pch(0);
712: return;
713: default:
714: pch(lxchar);
715: }
716: }
717:
718: TOK tlex()
719: {
720: TOK ret;
721: Pname n;
722:
723: // Ntoken++;
724:
725: for(;;) {
726: register lxchar;
727: register struct LXDOPE *p;
728:
729: start_txt();
730:
731: if (saved) {
732: lxchar = saved;
733: saved = 0;
734: }
735: else
736: get(lxchar);
737:
738: if (lxchar+1 >= CSSZ )
739: error( "illegal input character enountered: %d", lxchar );
740:
741: switch( (p=lxcp[lxchar+1])->lxact ){
742:
743: case A_1C: // eat up a single character, and return an opcode
744: reti(p->lxtok,p->lxtok);
745:
746: case A_EOF:
747: if (br_level || bl_level+lcount)
748: error("'%s' missing at end of input",(bl_level+lcount) ? "}" : ")");
749:
750: reti(EOFTOK,0);
751:
752: case A_SHARP:
753: // cope with header file not ended with '\n'
754: unget('#');
755: saved = lxtitle();
756: continue;
757:
758: case A_ERR:
759:
760: { if (' '<=lxchar && lxchar<='~') // ASCII printable
761: error("illegal character '%c' (ignored)",lxchar);
762: else
763: error("illegal character '0%o' (ignored)",lxchar);
764: continue;
765: }
766:
767: case A_DOLL:
768: { // lex a name of the for $id for template tree formals
769:
770: Pname fn ;
771: lxget( lxchar, LEXLET|LEXDIG ) ;
772: // TBD: make sure that the pragma is set
773:
774: if (!templp->in_progress || !txtstart[1]) {
775: // no name string immediately follows, treat it
776: // like an illegal character
777: error("illegal character '0%o' (ignored)",lxchar);
778: continue;
779: }
780: txtstart++ ;
781: if(fn=templ_compilation::tree_parameter(txtstart)){
782: switch (fn->n_template_arg) {
783: case name::template_expr_tree_formal:
784: // retain the $ in the name
785: retn(ID, strdup(--txtstart)) ;
786: case name::template_stmt_tree_formal:
787: retn(SM_PARAM, fn) ;
788: }
789: }
790: error("%s wasn't a statement or expression formal",
791: txtstart) ;
792: rets(ID, copy_if_need_be(txtstart)) ;
793: }
794:
795: case A_LET: // collect an identifier and check for keyword
796: {
797: char ll;
798: switch (ll = lxchar) {
799: // case 'l':
800: case 'L':
801: switch (get(lxchar)) {
802: case '\'':
803: error('s',"wide character constant");
804: unget(lxchar);
805: continue;
806: case '"':
807: error('s',"wide character string");
808: unget(lxchar);
809: continue;
810: }
811: unget(lxchar);
812: lxchar = ll;
813: }
814: }
815: lxget( lxchar, LEXLET|LEXDIG );
816:
817: //error( 'd', "lex: bl_level: %d txtstart %s", bl_level, txtstart);
818: // local class
819: /* look for a keyword or a global type */
820: if ((n = keyword_table->look(txtstart,0)) /* keyword */
821: || (n = ktbl->look(txtstart, 0))) { /* local type */
822: TOK x;
823: del_txt();
824: switch (x=n->base) {
825: case TNAME:
826: //('d',"lex tname %n",n);
827: if (bl_level > 1) {
828: Pname nn = ktbl->look(txtstart,LOCAL);
829: if ( nn ) {
830: n = nn;
831: //error( 'd', "lex: local class instance: %n", nn );
832: }
833: }
834: retn(TNAME,n);
835: case LOC:
836: retl(n->syn_class);
837: case EXTERN:
838: if ((lxchar = eat_whitespace()) == '\"') {
839: // linkage directive
840: get_string();
841: rets(LINKAGE,txtstart);
842: }
843: unget(lxchar);
844: reti(TYPE,EXTERN);
845: case CATCH:
846: /*
847: case TEMPLATE:
848: */
849: error('s',"%k",n->syn_class);
850: continue;
851: default:
852: #ifdef DK
853: if (get(lxchar) == '=')
854: switch (x) {
855: case OR: reti(ASOP,ASOR);
856: case ER: reti(ASOP,ASER);
857: case AND: reti(ASOP,ASAND);
858: }
859: saved = lxchar;
860:
861: #endif
862: reti(n->syn_class,x);
863: }
864: }
865: else
866: // local class
867: if ( bl_level &&
868: (n=ktbl->look(txtstart,LOCAL)) )
869: {
870: //error( 'd', "lex2: local class instance: %n", n );
871: retn(TNAME,n);
872: }
873: else
874: rets(ID, copy_if_need_be(txtstart)) ;
875:
876:
877: case A_DIG:
878:
879: ret = ICON;
880:
881: if (lxchar=='0') { /* octal or hexadecimal number */
882: pch('0');
883: switch (get(lxchar)) {
884: case 'l':
885: case 'L':
886: pch('L');
887: pch(0);
888: rets(ICON,txtstart);
889: case 'e':
890: case 'E':
891: // lxget(lxchar,LEXDIG);
892: // goto getfp;
893: goto getfp2;
894: case 'x':
895: case 'X':
896: lxget('X',LEXHEX);
897: if (txtfree-txtstart<4) // minimum "0Xd\0"
898: error("hexadecimal digitX after \"0x\"");
899: switch (get(lxchar)) {
900: case 'l':
901: case 'L':
902: txtfree--;
903: pch('L');
904: pch(0);
905: break;
906: default:
907: saved = lxchar;
908: }
909: rets(ICON,txtstart);
910: case '8':
911: case '9':
912: {error("%c used as octal digit",lxchar);}
913: case '0':
914: case '1':
915: case '2':
916: case '3':
917: case '4':
918: case '5':
919: case '6':
920: case '7':
921: pch(lxchar);
922: ox:
923: switch (get(lxchar)) {
924: case '8':
925: case '9':
926: {error("%c used as octal digit",lxchar);}
927: case '0':
928: case '1':
929: case '2':
930: case '3':
931: case '4':
932: case '5':
933: case '6':
934: case '7':
935: pch(lxchar);
936: goto ox;
937: case 'l':
938: case 'L':
939: pch('L');
940: pch(0);
941: break;
942: default:
943: pch(0);
944: saved = lxchar;
945: }
946: rets(ICON,txtstart);
947: case '.':
948: lxget('.',LEXDIG);
949: goto getfp;
950: default:
951: saved = lxchar;
952: reti(ZERO,0);
953: }
954: }
955: else
956: lxget(lxchar,LEXDIG);
957:
958: if (get(lxchar) == '.') {
959: txtfree--;
960: lxget('.', LEXDIG );
961: getfp:
962: ret = FCON;
963: get(lxchar);
964: };
965:
966: switch (lxchar) {
967: case 'f':
968: case 'F':
969: txtfree--;
970: pch('F');
971: break;
972: case 'e':
973: case 'E':
974: txtfree--;
975: switch (get(lxchar)) {
976: case '-':
977: case '+':
978: pch('e');
979: break;
980: default:
981: unget(lxchar);
982: lxchar = 'e';
983: };
984: getfp2:
985: lxget( lxchar, LEXDIG );
986: ret = FCON;
987: break;
988: case 'u':
989: case 'U':
990: if (ret==FCON) error("%c suffix for floating constant",lxchar);
991: case 'l':
992: case 'L':
993: txtfree--;
994: pch(lxchar);
995: switch (get(lxchar)) { // ul, Lu, ets.
996: case 'l':
997: case 'L':
998: case 'u':
999: case 'U':
1000: pch(lxchar);
1001: break;
1002: default:
1003: saved = lxchar;
1004: }
1005: break;
1006: default:
1007: saved = lxchar;
1008: };
1009:
1010: pch(0);
1011: rets(ret,txtstart);
1012:
1013: case A_DOT:
1014: /* if (get(lxchar) == '.') { // look for ellipsis
1015: if (get(lxchar) != '.') {
1016: error("token .. ?");
1017: saved = lxchar;
1018: }
1019: reti(ELLIPSIS,0);
1020: }
1021: */
1022: switch (get(lxchar)) {
1023: case '.': // look for ellipsis
1024: if (get(lxchar) != '.') {
1025: error("token .. ?");
1026: saved = lxchar;
1027: }
1028: reti(ELLIPSIS,0);
1029: case '*':
1030: reti (REFMUL,DOT);
1031: }
1032:
1033: if (lxmask[lxchar+1] & LEXDIG){// look for floating constant
1034: unget(lxchar);
1035: lxget( '.', LEXDIG );
1036: goto getfp;
1037: }
1038: saved = lxchar;
1039: reti(DOT,0);
1040:
1041: case A_STR:
1042: /* save string constant in buffer */
1043: get_string();
1044: rets(STRING,txtstart);
1045:
1046: case A_CC:
1047: /* character constant */
1048: rets(CCON,chconst());
1049:
1050: case A_BCD:
1051: {
1052: register i;
1053: int j;
1054:
1055: pch('`');
1056:
1057: for (i=0; i<7; ++i) {
1058: pch(get(j));
1059: if (j == '`' ) break;
1060: }
1061: pch(0);
1062: if (6<i)
1063: error('l',"bcd constant exceeds 6 characters" );
1064: rets(CCON,txtstart);
1065: }
1066:
1067: case A_SL: /* / */
1068: switch (get(lxchar)) {
1069: case '*':
1070: lxcom();
1071: break;
1072: case '/':
1073: linecom();
1074: break;
1075: case '=':
1076: reti(ASOP,ASDIV);
1077: default:
1078: saved = lxchar;
1079: reti(DIVOP,DIV);
1080: }
1081:
1082: case A_WS:
1083: continue;
1084:
1085: case A_NL:
1086: ++curloc.line;
1087: // Nline++;
1088: saved = lxtitle();
1089: continue;
1090:
1091: case A_LC:
1092: #ifdef DK
1093: alc:
1094: #endif
1095: if (BLMAX <= bl_level++) {
1096: error('l',"blocks too deeply nested");
1097: ext(3);
1098: }
1099: retl(LC);
1100:
1101: case A_RC:
1102: #ifdef DK
1103: arc:
1104: #endif
1105: if (lcount+bl_level-- <= 0) {
1106: error("unexpected '}'");
1107: bl_level = 0;
1108: }
1109: retl(RC);
1110:
1111: case A_L:
1112: #ifdef DK
1113: if (get(lxchar) == ':') // (# is {
1114: goto alc;
1115: else
1116: saved = lxchar;
1117: #endif
1118: br_level++;
1119: reti(LP,0);
1120:
1121: case A_R:
1122: if (br_level-- <= 0) {
1123: error("unexpected ')'");
1124: br_level = 0;
1125: }
1126: reti(RP,0);
1127:
1128: case A_ASS:
1129: switch (get(lxchar)) {
1130: case '=':
1131: reti(EQUOP,EQ);
1132: default:
1133: saved = lxchar;
1134: reti(ASSIGN,ASSIGN);
1135: }
1136:
1137: case A_COL:
1138: switch (get(lxchar)) {
1139: case ':':
1140: reti(MEM,0);
1141: case '=':
1142: error("':=' is not a c++ operator");
1143: reti(ASSIGN,ASSIGN);
1144: #ifdef DK
1145: if (get(lxchar)==')') goto arc; // :) is }
1146: unget(lxchar);
1147: #endif
1148: default:
1149: saved = lxchar;
1150: reti(COLON,COLON);
1151: }
1152: case A_NOT:
1153: switch (get(lxchar)) {
1154: case '=':
1155: reti(EQUOP,NE);
1156: default:
1157: saved = lxchar;
1158: reti(NOT,NOT);
1159: }
1160: case A_GT:
1161: switch(get(lxchar)) {
1162: case '>':
1163: switch (get(lxchar)) {
1164: case '=':
1165: reti(ASOP,ASRS);
1166: break;
1167: default:
1168: saved = lxchar;
1169: reti(SHIFTOP,RS);
1170: }
1171: case '=':
1172: reti(RELOP,GE);
1173: default:
1174: saved = lxchar;
1175: reti(GT,GT);
1176: }
1177: case A_LT:
1178: switch (get(lxchar)) {
1179: case '<':
1180: switch (get(lxchar)) {
1181: case '=':
1182: reti(ASOP,ASLS);
1183: default:
1184: saved = lxchar;
1185: reti(SHIFTOP,LS);
1186: }
1187: case '=':
1188: reti(RELOP,LE);
1189: default:
1190: saved = lxchar;
1191: reti(LT,LT);
1192: }
1193: case A_AND:
1194: switch (get(lxchar)) {
1195: case '&':
1196: reti(ANDAND,ANDAND);
1197: case '=':
1198: reti(ASOP,ASAND);
1199: default:
1200: saved = lxchar;
1201: reti(AND,AND);
1202: }
1203: case A_OR:
1204: switch (get(lxchar)) {
1205: case '|':
1206: reti(OROR,OROR);
1207: case '=':
1208: reti(ASOP,ASOR);
1209: default:
1210: saved = lxchar;
1211: reti(OR,OR);
1212: }
1213: case A_ER:
1214: switch (get(lxchar)) {
1215: case '=':
1216: reti(ASOP,ASER);
1217: default:
1218: saved = lxchar;
1219: reti(ER,ER);
1220: }
1221: case A_PL:
1222: switch (get(lxchar)) {
1223: case '=':
1224: reti(ASOP,ASPLUS);
1225: case '+':
1226: reti(ICOP,INCR);
1227: default:
1228: saved = lxchar;
1229: reti(PLUS,PLUS);
1230: }
1231: case A_MIN:
1232: switch (get(lxchar)) {
1233: case '=':
1234: reti(ASOP,ASMINUS);
1235: case '-':
1236: reti(ICOP,DECR);
1237: case '>':
1238: if (get(lxchar) == '*')
1239: {reti(REFMUL,REF);}
1240: else
1241: saved = lxchar;
1242: reti(REF,REF);
1243: default:
1244: saved = lxchar;
1245: reti(MINUS,MINUS);
1246: }
1247: case A_MUL:
1248: switch (get(lxchar)) {
1249: case '=':
1250: reti(ASOP,ASMUL);
1251: case '/':
1252: error('w',"*/ not as end of comment");
1253: default:
1254: saved = lxchar;
1255: reti(MUL,MUL);
1256: }
1257: case A_MOD:
1258: switch (get(lxchar)) {
1259: case '=':
1260: reti(ASOP,ASMOD);
1261: default:
1262: saved = lxchar;
1263: reti(DIVOP,MOD);
1264: }
1265: default:
1266: {error('i',"lex act==%d getc()->%d",p,lxchar);}
1267:
1268: }
1269:
1270: error('i',"lex, main switch");
1271: }
1272:
1273: }
1274:
1275: int lxtitle()
1276: /*
1277: called after a newline; set linenumber and file name
1278: */
1279: {
1280: register c;
1281:
1282: for(;;)
1283: switch ( get(c) ) {
1284: default: // e.g. not '\n', not '#'
1285: return c;
1286: case '\n':
1287: curloc.line++;
1288: // Nline++;
1289: ll:
1290: break;
1291: case '#': /* # lineno "filename" */
1292: { int cl = curloc.line;
1293: curloc.line = 0;
1294: for(;;)
1295: switch (get(c)) {
1296: case '"':
1297: start_txt();
1298: for(;;)
1299: switch (get(c)) {
1300: case '"':
1301: pch('\0');
1302:
1303: while (get(c) != '\n') ; // skip to eol.. ignore anything more
1304:
1305: if (*txtstart) { // stack file name
1306: char* fn;
1307: if (curr_file == 0){
1308: if (( fn = file_name[0])
1309: && (strcmp(txtstart,fn)!=0)){ // 1st include
1310: if (MAXFILE*4<++Nfile) error('i',"fileN buffer overflow");
1311: if (MAXFILE<++curr_file) error('i',"fileN stack overflow");
1312: file_stack[curr_file] = Nfile;
1313:
1314: char* p1 = new char[txtfree-txtstart];
1315: (void) strcpy(p1,txtstart);
1316: file_name[Nfile] = p1;
1317: // Nstr++;
1318: }
1319: else { //&& line is dummy #line "input.c"
1320: // ignore
1321: }
1322: //&& dead, dead, dead goto push;
1323: }
1324: else if ( (fn=file_name[file_stack[curr_file]])
1325: && (strcmp(txtstart,fn)==0) ) {
1326: //new line, same file: ignore
1327: }
1328: else if ( (fn=file_name[file_stack[curr_file-1]])
1329: && (strcmp(txtstart,fn)==0) ) {
1330: // previous file: pop
1331: curr_file--;
1332: }
1333: else { // new file name: push
1334: //&& push:
1335: if (MAXFILE*4<Nfile++) error('i',"fileN buffer overflow");
1336: if (MAXFILE<curr_file++) error('i',"fileN stack overflow");
1337: file_stack[curr_file] = Nfile;
1338:
1339: char* p = new char[txtfree-txtstart];
1340: (void) strcpy(p,txtstart);
1341: file_name[Nfile] = p;
1342: // Nstr++;
1343: }
1344: }
1345: else { // no name .. back to the original .c file: ""
1346: curr_file = 0;
1347: }
1348: del_txt();
1349: curloc.file = file_stack[curr_file];
1350: goto ll;
1351: case '\n':
1352: error("unexpected end of line on '# line'");
1353: default:
1354: pch(c);
1355: }
1356: case ' ':
1357: break;
1358:
1359: case '0':
1360: case '1':
1361: case '2':
1362: case '3':
1363: case '4':
1364: case '5':
1365: case '6':
1366: case '7':
1367: case '8':
1368: case '9':
1369: curloc.line = curloc.line*10+c-'0';
1370: break;
1371:
1372: case 'l': // look for "#line ..." and then ignore "line"
1373: if (get(c)=='i' && get(c)=='n' && get(c)=='e') break;
1374: case '\n':
1375: curloc.putline();
1376: goto ll;
1377:
1378: default: // detect #pragma
1379: // pass #rubbish through
1380: { char* p = txtstart+1;
1381: curloc.line = cl;
1382: pch('#');
1383: pch(c);
1384: while (get(c) != '\n') pch(c);
1385: pch('\0');
1386: if (strncmp(p,"pragma",6)==0) {
1387: p += 6;
1388: while (*p==' ' || *p=='\t') p++;
1389: if (strncmp(p,"linkage",7)==0) {
1390: if (bl_level) error("linkage pragma inside block");
1391: p += 7;
1392: while (*p==' ' || *p=='\t') p++;
1393: set_linkage(p);
1394: }
1395: else
1396: fprintf(out_file,"\n%s\n",txtstart);
1397: }
1398: else
1399: fprintf(out_file,"%s\n",txtstart);
1400:
1401: // fprintf(out_file,"\n%s\n",txtstart);
1402: start_txt();
1403: curloc.line++;
1404: // Nline++;
1405: goto ll;
1406: }
1407: }
1408: }
1409: }
1410: }
1411:
1412: /* ODI notes -
1413:
1414: template classes
1415:
1416: separate ktbl from keywords to because it dosen't contain
1417: legitimate nodes.
1418: */
1419:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.