|
|
1.1 root 1: /*ident "@(#)ctrans:src/lex.c 1.4" */
2: /***************************************************************************
3:
4: C++ source for cfront, the C++ compiler front-end
5: written in the computer science research center of Bell Labs
6:
7: Copyright (c) 1984 AT&T, Inc. All Rights Reserved
8: THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T, INC.
9:
10: lex.c:
11: lexical analyser based on pcc's and cpre's scanners
12: modified to handle classes:
13: new keywords: class
14: public
15: call
16: etc.
17: names are not entered in the symbol table by lex()
18: names can be of arbitrary length
19: error() is used to report errors
20: {} and () must match
21: numeric constants are not converted into internal representation
22: but stored as strings
23:
24: ****************************************************************************/
25:
26: #include "cfront.h"
27: #include "yystype.h"
28: #include "size.h"
29: #include "tqueue.h"
30: #include "template.h"
31:
32: # define CCTRANS(x) x
33: #define copy_if_need_be(s) ((templp->in_progress || templp->parameters_in_progress) ? strdup(s) : (s))
34:
35: /* lexical actions */
36:
37: #define A_ERR 0 /* illegal character */
38: #define A_LET 1 /* saw a letter */
39: #define A_DIG 2 /* saw a digit */
40: #define A_1C 3 /* return a single character */
41: #define A_STR 4 /* string */
42: #define A_CC 5 /* character constant */
43: #define A_BCD 6 /* GCOS BCD constant */
44: #define A_SL 7 /* saw a / */
45: #define A_DOT 8 /* saw a . */
46: #define A_2C 9 /* possible two character symbol */
47: #define A_WS 10 /* whitespace (not \n) */
48: #define A_NL 11 /* \n */
49: #define A_LC 12 /* { */
50: #define A_RC 13 /* } */
51: #define A_L 14 /* ( */
52: #define A_R 15 /* ) */
53: #define A_EOF 16
54: #define A_ASS 17
55: #define A_LT 18
56: #define A_GT 19 /* > */
57: #define A_ER 20
58: #define A_OR 21
59: #define A_AND 22
60: #define A_MOD 23
61: #define A_NOT 24
62: #define A_MIN 25
63: #define A_MUL 26
64: #define A_PL 27
65: #define A_COL 28 /* : */
66: #define A_SHARP 29 /* # */
67: #define A_DOLL 30 /* $ */
68:
69: /* character classes */
70:
71: # define LEXLET 01
72: # define LEXDIG 02
73: /* no LEXOCT because 8 and 9 used to be octal digits */
74: # define LEXHEX 010
75: # define LEXWS 020
76: # define LEXDOT 040
77:
78: const FIRSTCHUNK = 8*1024-8;
79: const BUFCHUNK = 4*1024-8;
80:
81: /* text buffer */
82: static char inbuf[FIRSTCHUNK/*TBUFSZ*/];
83: static char* txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1];
84: static char* txtstart = 0;
85: static char* txtfree = 0;
86:
87: static struct buf* bufhead;
88: static buf* freebuf;
89: //static bufs;
90:
91: struct buf {
92: buf* next;
93: char chars[BUFCHUNK];
94: // buf() { next=bufhead; bufhead=this; }
95: };
96:
97: new_buf(char c)
98: {
99: //fprintf(stderr,"new_buf %d\n",bufs++);
100: buf* pbuf;
101: if (freebuf) {
102: pbuf = freebuf;
103: freebuf = freebuf->next;
104: }
105: else
106: pbuf = new buf; // allocate and register new chunk
107: pbuf->next = bufhead;
108: bufhead = pbuf;
109:
110: if (BUFCHUNK < txtmax-txtstart) error('l',"lexical token too long");
111:
112: // copy current token:
113: char* p = txtstart;
114: txtstart = txtfree = &pbuf->chars[0];
115: while (p<txtmax) *txtfree++ = *p++;
116: *txtfree++=c;
117: txtmax = &pbuf->chars[BUFCHUNK-1];
118: return 0;
119: }
120:
121:
122: #define pch(c) ((txtmax<=txtfree)?new_buf(c):(*txtfree++=c))
123: #define start_txt() txtstart = txtfree
124: #define del_txt() txtfree = txtstart
125:
126: static int Nfile;// = 1;
127: static char* file_name[MAXFILE*4]; // source file names
128: // file_name[0] == src_file_name
129: // file_name[0] == 0 means stdin
130: static short file_stack[MAXFILE]; // stack of file name indices
131: static int tcurr_file; // current index in file_stack
132: // that is current #include nest level
133:
134: Linkage linkage = linkage_default; // linkage is default C++
135: const LINKMAX = 10;
136: static Linkage lvec[LINKMAX] = { linkage_default };
137: static int lcount = 0;
138:
139: void set_linkage(char* p)
140: {
141: if (p==0 || *p == 0) { // resume previous linkage
142: if (lcount>0) linkage = lvec[--lcount];
143: }
144: else {
145: if (LINKMAX<=++lcount) {
146: error('l',"linkage directive nested too deep");
147: --lcount;
148: } else if (strcmp(p,"C")==0)
149: lvec[lcount] = linkage = linkage_C;
150: else if (strcmp(p,"C++")==0)
151: lvec[lcount] = linkage = linkage_Cplusplus;
152: else {
153: error("%s linkage",p);
154: --lcount;
155: }
156: }
157: }
158:
159: static struct loc tloc;
160: FILE * out_file = stdout;
161: FILE * in_file = stdin;
162: Ptable ktbl;
163: Ptable keyword_table;
164:
165: static int p_level = 0; /* number of unmatched ``(''s */
166: static int b_level = 0; /* number of unmatched ``{''s */
167:
168: # ifdef ibm
169:
170: # define CSMASK 0377
171: # define CSSZ 256
172:
173: # else
174:
175: # define CSMASK 0177
176: # define CSSZ 128
177:
178: # endif
179:
180: static short lxmask[CSSZ+1];
181:
182: int saved = 0; /* putback character, avoid ungetchar */
183: static int lxtitle();
184:
185: // overload rt;
186: inline YYSTYPE rt(char* x) { YYSTYPE y; y.s = x; return y; }
187: inline YYSTYPE rt(int x) { YYSTYPE y; y.t = x; return y; }
188: inline YYSTYPE rt(loc x) { YYSTYPE y; y.l = x; return y; }
189: inline YYSTYPE rt(void* x) { YYSTYPE y; y.pn = Pname(x); return y; }
190:
191: #define get(c) (c=getc(in_file))
192: #define unget(c) ungetc(c,in_file)
193:
194: #define reti(a,b) { addtok(a, rt(b), tloc); return a; }
195: #define retn(a,b) { addtok(a, rt((Pnode)b), tloc); return a; }
196: #define rets(a,b) { addtok(a, rt(b), tloc); return a; }
197: #define retl(a) { addtok(a, rt(tloc), tloc); return a; }
198:
199: // keys[] holds the external form for tokens with fixed representation
200: // illegal tokens and those with variable representation have 0 entries
201: char* keys[MAXTOK+1];
202:
203: static void
204: new_key(char* s, TOK toknum, TOK yyclass)
205: /*
206: make "s" a new keyword with the representation (token) "toknum"
207: "yyclass" is the yacc token (for example new_key("int",INT,TYPE); )
208: "yyclass==0" means yyclass=toknum;
209: */
210: {
211: Pname n = new name(s);
212:
213: keys[(toknum==LOC)?yyclass:toknum] = s;
214: n = new name(s);
215: Pname nn = keyword_table->insert(n,0);
216: // if (Nold) error('i',"keyword %sD twice",s);
217: nn->base = toknum;
218: nn->syn_class = (yyclass) ? yyclass : toknum;
219: delete n;
220: }
221:
222: const int keyword_count = 67;
223: static void
224: ktbl_init()
225: /*
226: enter keywords into keyword table for use by lex()
227: and into keyword representation table used for output
228:
229: ktbl is only for types. We put nothing in it.
230: keyword_table is for user-defined reserved words
231: */
232: {
233: ktbl = new table(KTBLSIZE,0,0);
234: keyword_table = new table(keyword_count,0,0);
235:
236: new_key("asm",ASM,0);
237: new_key("auto",AUTO,TYPE);
238: new_key("break",LOC,BREAK);
239: new_key("case",LOC,CASE);
240: new_key("continue",LOC,CONTINUE);
241: new_key("char",CHAR,TYPE);
242: new_key("do",LOC,DO);
243: new_key("double",DOUBLE,TYPE);
244: new_key("default",LOC,DEFAULT);
245: new_key("enum",ENUM,0);
246: new_key("else",LOC,ELSE);
247: new_key("extern",EXTERN,TYPE);
248: new_key("float",FLOAT,TYPE);
249: new_key("for",LOC,FOR);
250: new_key("goto",LOC,GOTO);
251: new_key("catch",CATCH,CATCH);
252: new_key("try",TRY,TRY);
253: new_key("if",LOC,IF);
254: new_key("int",INT,TYPE);
255: new_key("long",LONG,TYPE);
256: new_key("return",LOC,RETURN);
257: new_key("register",REGISTER,TYPE);
258: new_key("static",STATIC,TYPE);
259: new_key("struct",STRUCT,AGGR);
260: new_key("sizeof",SIZEOF,0);
261: new_key("short",SHORT,TYPE);
262: new_key("switch",LOC,SWITCH);
263: new_key("typedef",TYPEDEF,TYPE);
264: new_key("unsigned",UNSIGNED,TYPE);
265: new_key("union",UNION,AGGR);
266: new_key("void",VOID,TYPE);
267: new_key("while",LOC,WHILE);
268:
269: new_key("class",CLASS,AGGR);
270: new_key("const",CONST,TYPE);
271: new_key("delete",LOC,DELETE);
272: new_key("friend",FRIEND,TYPE);
273: new_key("inline",INLINE,TYPE);
274: new_key("new",NEW,0);
275: new_key("operator",OPERATOR,0);
276: new_key("overload",OVERLOAD,TYPE);
277: new_key("private",PRIVATE,PR);
278: new_key("protected",PROTECTED,PR);
279: new_key("public",PUBLIC,PR);
280: new_key("signed",SIGNED,TYPE);
281: new_key("template",TEMPLATE,0);
282: new_key("this",THIS,0);
283: new_key("virtual",VIRTUAL,TYPE);
284: new_key("volatile",VOLATILE,TYPE);
285:
286: new_key("__statement", STATEMENT, 0) ;
287: new_key("__expression", EXPRESSION, 0) ;
288: new_key("__template_test", TEMPLATE_TEST, 0) ;
289: }
290:
291: loc last_line;
292: loc noloc = { 0, 0 };
293:
294: void loc::putline()
295: {
296: if (file==0 && line==0) return;
297: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4
298: // if (0<=file && file<MAXFILE) {
299: if ( 0<=file && file <= Nfile ) {
300: char* f = file_name[file];
301: if (f==0) f = src_file_name;
302: fprintf(out_file,line_format,line,f);
303: last_line = *this;
304: }
305: }
306:
307: void loc::put(FILE* p)
308: {
309: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4
310: // if (0<=file && file<MAXFILE) {
311: if ( 0<=file && file <= Nfile ) {
312: char* f = file_name[file];
313: if (f==0) f = src_file_name;
314: fprintf(p,"\"%s\", line %d: ",f,line);
315: }
316: }
317:
318: void lxenter(register char* s, short m)
319: /* enter a mask into lxmask */
320: {
321: register c;
322:
323: while( c= *s++ ) lxmask[c+1] |= m;
324:
325: }
326:
327:
328: void lxget(register c, register m)
329: /*
330: put 'c' back then scan for members of character class 'm'
331: terminate the string read with \0
332: txtfree points to the character position after that \0
333: */
334: {
335: pch(c);
336: while ( (get(c), lxmask[c+1]&m) ) pch(c);
337: unget(c);
338: pch('\0');
339: }
340:
341: struct LXDOPE {
342: short lxch; /* the character */
343: short lxact; /* the action to be performed */
344: TOK lxtok; /* the token number to be returned */
345: } lxdope[] = {
346: #ifdef apollo
347: '@', A_ERR, 0, /* illegal characters go here... */
348: #else
349: '$', A_DOLL, 0,
350: // '$', A_ERR, 0, /* illegal characters go here... */
351: #endif
352: '_', A_LET, 0, /* letters point here */
353: '0', A_DIG, 0, /* digits point here */
354: ' ', A_WS, 0, /* whitespace goes here */
355: '\n', A_NL, 0,
356: '"', A_STR, 0, /* character string */
357: '\'', A_CC, 0, /* ASCII character constant */
358: '`', A_BCD, 0, /* 'foreign' character constant, e.g. BCD */
359: '(', A_L, LP,
360: ')', A_R, RP,
361: '{', A_LC, LC,
362: '}', A_RC, RC,
363: '[', A_1C, LB,
364: ']', A_1C, RB,
365: '*', A_MUL, MUL,
366: '?', A_1C, QUEST,
367: ':', A_COL, COLON,
368: '+', A_PL, PLUS,
369: '-', A_MIN, MINUS,
370: '/', A_SL, DIV,
371: '%', A_MOD, MOD,
372: '&', A_AND, AND,
373: '|', A_OR, OR,
374: '^', A_ER, ER,
375: '!', A_NOT, NOT,
376: '~', A_1C, COMPL,
377: ',', A_1C, CM,
378: ';', A_1C, SM,
379: '.', A_DOT, DOT,
380: '<', A_LT, LT,
381: '>', A_GT, GT,
382: '=', A_ASS, ASSIGN,
383: '#', A_SHARP, 0,
384: EOF, A_EOF, EOFTOK
385: };
386: /* note: EOF is used as sentinel, so must be <=0 and last entry in table */
387:
388: static struct LXDOPE *lxcp[CSSZ+1];
389:
390: void
391: lex_init()
392: {
393: register struct LXDOPE *p;
394: register i;
395: register char *cp;
396: /* set up character classes */
397:
398: /* first clear lexmask */
399: for(i=0; i<=CSSZ; i++) lxmask[i] = 0;
400:
401: #ifdef apollo
402: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$", LEXLET );
403: #else
404: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", LEXLET );
405: #endif
406: lxenter( "0123456789", LEXDIG );
407: lxenter( "0123456789abcdefABCDEF", LEXHEX );
408: /* \013 should become \v someday; \013 is OK for ASCII and EBCDIC */
409: lxenter( " \t\r\b\f\013", LEXWS );
410: lxmask['.'+1] |= LEXDOT;
411:
412: /* make lxcp point to appropriate lxdope entry for each character */
413:
414: /* initialize error entries */
415:
416: for( i= 0; i<=CSSZ; ++i ) lxcp[i] = lxdope;
417:
418: /* make unique entries */
419:
420: for( p=lxdope; ; ++p ) {
421: lxcp[p->lxch+1] = p;
422: if( p->lxch < 0 ) break;
423: }
424:
425: /* handle letters, digits, and whitespace */
426: /* by convention, first, second, and third places */
427:
428: cp = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
429: while( *cp ) lxcp[*cp++ + 1] = &lxdope[1];
430: cp = "123456789";
431: while( *cp ) lxcp[*cp++ + 1] = &lxdope[2];
432: cp = "\t\b\r\f\013";
433: while( *cp ) lxcp[*cp++ + 1] = &lxdope[3];
434:
435: file_name[0] = src_file_name;
436: // set both curloc and tloc so curloc is valid at program startup
437: // curloc.file = tloc.file = 0;
438: curloc.line = tloc.line = 1;
439:
440: ktbl_init();
441: lex_clear();
442: saved = lxtitle();
443: }
444:
445: void lex_clear()
446: {
447: // delete extra buffers:
448: buf* p = bufhead;
449: bufhead = 0;
450: //if (p) {
451: //fprintf(stderr,"lex_clear\n");
452: //bufs=0;
453: //}
454: while (p) {
455: buf* pp = p;
456: p = p->next;
457: pp->next = freebuf;
458: freebuf = pp;
459: }
460:
461: // re-set to static buffer:
462: txtstart = txtfree = inbuf;
463: txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1];
464: }
465:
466: int int_val(char hex)
467: {
468: switch (hex) {
469: case '0': case '1': case '2': case '3': case '4':
470: case '5': case '6': case '7': case '8': case '9':
471: return hex-'0';
472: case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
473: return hex-'a'+10;
474: case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
475: return hex-'A'+10;
476: }
477: }
478:
479: void hex_to_oct()
480: /*
481: \x has been seen on input (in char const or string) and \ printed
482: read the following hexadecimal integer and replace it with an octal
483: */
484: {
485: int i = 0;
486: int c;
487: get(c);
488: if (lxmask[c+1] & LEXHEX) {
489: i = int_val(c);
490: get(c); // try for two
491: if (lxmask[c+1] & LEXHEX) {
492: i = (i<<4) + int_val(c);
493: get(c); // try for three
494: if (lxmask[c+1] & LEXHEX)
495: i = (i<<4) + int_val(c);
496: else
497: unget(c);
498: }
499: else
500: unget(c);
501: }
502: else {
503: error("hexadecimal digitE after \\x");
504: unget(c);
505: }
506:
507: // if (0377 < i) error('l',"hexadecimal constant too large");
508: i &= 0377;
509:
510: pch(('0'+(i>>6)));
511: pch(('0'+((i&070)>>3)));
512: pch(('0'+(i&7)));
513: }
514:
515:
516: char * chconst()
517: /*
518: read a character constant into inbuf
519: */
520: {
521: register c;
522: int nch = 0;
523:
524: pch('\'');
525:
526: for(;;) {
527: char* p;
528: char cc = 0;
529:
530: switch (get(c)) {
531: case '\'':
532: goto ex;
533: case EOF:
534: error("eof in char constant");
535: goto ex;
536: case '\n':
537: error("newline in char constant");
538: goto ex;
539: case '\\':
540: if (SZ_INT == nch++) error('l',"char constant too long");
541: pch(c);
542: switch (get(c)){
543: case '\n':
544: ++tloc.line;
545: default:
546: pch(c);
547: break;
548: case '4': case '5': case '6': case '7': // octal
549: p = txtfree;
550: cc = c-4;
551: case '0': case '1': case '2': case '3':
552: pch(c);
553: get(c); /* try for 2 */
554: if( lxmask[c+1] & LEXDIG && c<'8'){
555: pch(c);
556: get(c); /* try for 3 */
557: if (lxmask[c+1] & LEXDIG && c<'8') {
558: if (cc) *p = cc; // zap high bit
559: pch(c);
560: }
561: else
562: unget(c);
563: }
564: else
565: unget(c);
566: break;
567: case 'x': // hexadecimal
568: hex_to_oct();
569: break;
570: };
571: break;
572: default:
573: if (SZ_INT == nch++) error('l',"char constant too long");
574: pch(c);
575: }
576: }
577: ex:
578: if(nch==0)
579: error("empty char constant");
580: pch('\'');
581: pch('\0');
582: return txtstart;
583: }
584:
585: void lxcom()
586: /* process a "block comment" */
587: {
588: register c;
589:
590: for(;;)
591: switch (get(c)) {
592: case EOF:
593: error('w',"eof in comment");
594: return;
595: case '\n':
596: tloc.line++;
597: // Nline++;
598: break;
599: case '*':
600: if (get(c) == '/') return;
601: unget(c);
602: break;
603: case '/':
604: if (get(c) == '*') error('w',"``/*'' in comment");
605: unget(c);
606: break;
607: }
608: }
609:
610:
611: void linecom()
612: // process a "line comment"
613: {
614: register c;
615:
616: get(c);
617: #ifdef DBG
618: if ( c=='@' && get(c)=='!' ) {
619: while ( get(c) != '\n' && c != EOF ) pch(c);
620: pch('\0');
621: process_debug_flags(txtstart);
622: del_txt();
623: }
624: #endif
625: for(;;get(c))
626: switch (c) {
627: case EOF:
628: error('w',"eof in comment");
629: return;
630: case '\n':
631: tloc.line++;
632: // Nline++;
633: saved = lxtitle();
634: return;
635: }
636: }
637:
638: char eat_whitespace()
639: {
640:
641: for(;;) {
642: register c = get(c);
643: lx:
644:
645: switch (c) {
646: case EOF:
647: error('w',"unexpected comment");
648: return EOF;
649: case '/':
650: switch (get(c)) {
651: case '*':
652: lxcom();
653: break;
654: case '/':
655: linecom();
656: break;
657: default:
658: unget(c);
659: return '/';
660: }
661: break;
662: case '\n':
663: ++tloc.line;
664: c = lxtitle();
665: goto lx;
666: case ' ':
667: case '\t':
668: break;
669: default:
670: return c;
671: }
672: }
673: }
674:
675: void get_string()
676: {
677: int lxchar;
678:
679: for(;;)
680: switch (get(lxchar)) {
681: case '\\':
682: pch('\\');
683: switch (get(lxchar)){
684: case '\n':
685: ++tloc.line;
686: default:
687: pch(lxchar);
688: break;
689: case 'x': // hexadecimal
690: hex_to_oct();
691: break;
692: };
693: break;
694: case '"':
695: { char* p = txtstart; // eat_whitespace() moves txtstart
696: if ((lxchar = eat_whitespace()) == '"') {
697: // string catenation, break with
698: // newline to avoid merging characters
699: // (e.g. "\xAB" "C")
700: pch('\\');
701: pch('\n');
702:
703: continue; // eat '\"' and carry on
704: };
705:
706: txtstart = p;
707: unget(lxchar);
708: pch(0);
709: return;
710: }
711: case '\n':
712: error("newline in string");
713: pch(0);
714: return;
715: case EOF:
716: error("eof in string");
717: pch(0);
718: return;
719: default:
720: pch(lxchar);
721: }
722: }
723:
724: TOK tlex()
725: {
726: TOK ret;
727: Pname n;
728:
729: // Ntoken++;
730:
731: for(;;) {
732: register lxchar;
733: register struct LXDOPE *p;
734:
735: start_txt();
736:
737: if (saved) {
738: lxchar = saved;
739: saved = 0;
740: }
741: else
742: get(lxchar);
743:
744: if (lxchar+1 >= CSSZ )
745: error( "illegal input character encountered: %d", lxchar );
746:
747: switch( (p=lxcp[lxchar+1])->lxact ){
748:
749: case A_1C: // eat up a single character, and return an opcode
750: reti(p->lxtok,p->lxtok);
751:
752: case A_EOF:
753: if (p_level || b_level+lcount)
754: error("'%s' missing at end of input",(b_level+lcount) ? "}" : ")");
755:
756: reti(EOFTOK,0);
757:
758: case A_SHARP:
759: // cope with header file not ended with '\n'
760: unget('#');
761: saved = lxtitle();
762: continue;
763:
764: case A_ERR:
765: { if (' '<=lxchar && lxchar<='~') // ASCII printable
766: error("illegal character '%c' (ignored)",lxchar);
767: else
768: error("illegal character '0%o' (ignored)",lxchar);
769: continue;
770: }
771:
772: case A_DOLL:
773: { // lex a name of the for $id for template tree formals
774: Pname fn ;
775: lxget( lxchar, LEXLET|LEXDIG ) ;
776:
777: if (!templp->in_progress || !txtstart[1]) {
778: // no name string immediately follows, treat it
779: // like an illegal character
780: error("illegal character '0%o' (ignored)",lxchar);
781: continue;
782: }
783:
784: txtstart++ ;
785: if(fn=templ_compilation::tree_parameter(txtstart)) {
786: switch (fn->n_template_arg) {
787: case template_expr_tree_formal:
788: // retain the $ in the name
789: retn(ID, strdup(--txtstart)) ;
790:
791: case template_stmt_tree_formal:
792: retn(SM_PARAM, fn) ;
793: }
794: }
795: error("%s wasn't a statement or expression formal", txtstart);
796: rets(ID, copy_if_need_be(txtstart));
797: }
798:
799: case A_LET: // collect an identifier and check for keyword
800: {
801: char ll;
802: switch (ll = lxchar) {
803: // case 'l':
804: case 'L':
805: switch (get(lxchar)) {
806: case '\'':
807: error('s',"wide character constant");
808: unget(lxchar);
809: continue;
810: case '"':
811: error('s',"wide character string");
812: unget(lxchar);
813: continue;
814: }
815: unget(lxchar);
816: lxchar = ll;
817: }
818: }
819:
820: lxget( lxchar, LEXLET|LEXDIG );
821: /* look for a keyword or a global type */
822: if ((n = keyword_table->look(txtstart,0)) /* keyword */
823: || (n = ktbl->look(txtstart, 0))) /* local type */
824: {
825: TOK x;
826: del_txt();
827: switch (x=n->base) {
828: case TNAME:
829: rets(ID,n->string);
830: case LOC:
831: retl(n->syn_class);
832: case EXTERN:
833: if ((lxchar = eat_whitespace()) == '\"') {
834: // linkage directive
835: get_string();
836: rets(LINKAGE,txtstart);
837: }
838: unget(lxchar);
839: reti(TYPE,EXTERN);
840: case CATCH:
841: // case TEMPLATE:
842: error('s',"%k",n->syn_class);
843: continue;
844: case TRY:
845: {
846: static int warn_try;
847: if (!warn_try) {
848: Pname n = keyword_table->look("try",0);
849: n->n_key = DEFAULT;
850: error('w',&tloc,"%k is a future reserved keyword",n->syn_class);
851: warn_try++;
852: }
853: rets(ID,n->string);
854: }
855: default:
856: reti(n->syn_class,x);
857: }
858: }
859: // rets(ID,txtstart);
860: rets(ID, copy_if_need_be(txtstart)) ;
861:
862: case A_DIG:
863:
864: ret = ICON;
865:
866: if (lxchar=='0') {
867: int pkchar;
868: get(pkchar);
869: if(pkchar=='x' || pkchar=='X') { // hex
870: pch(lxchar);
871: lxget(pkchar,LEXHEX);
872: txtfree--;
873: if (txtfree-txtstart<3) // minimum "0Xd\0"
874: error("hex digitX after \"0x\"");
875: get(lxchar);
876: goto getsuffix;
877: }
878: unget(pkchar);
879: }
880:
881: lxget(lxchar,LEXDIG);
882: txtfree--;
883:
884: if (get(lxchar) == '.') {
885: getfp:
886: lxget('.', LEXDIG );
887: txtfree--;
888: ret = FCON;
889: get(lxchar);
890: };
891:
892: if (lxchar=='e' || lxchar=='E') {
893: pch(lxchar);
894: get(lxchar);
895: if(lxchar=='-' || lxchar=='+') {
896: pch(lxchar);
897: get(lxchar);
898: }
899: if (lxmask[lxchar+1] & LEXDIG) {
900: lxget( lxchar, LEXDIG );
901: txtfree--;
902: get(lxchar);
903: }
904: else
905: error("missing exponent digits?");
906: ret = FCON;
907: };
908:
909: if(*txtstart=='0' && ret==ICON) {
910: char *bch = txtstart;
911: while (++bch <= txtfree) {
912: if(*bch=='8' || *bch=='9')
913: error("%c used as octal digit",*bch);
914: }
915: }
916:
917: getsuffix:
918: switch (lxchar) {
919: case 'f':
920: case 'F':
921: if (ret==ICON)
922: error("%c suffix for integer constant",lxchar);
923: else
924: pch(lxchar);
925: break;
926: case 'u':
927: case 'U':
928: if (ret==FCON) {
929: error("%c suffix for floating constant",lxchar);
930: break;
931: }
932: pch(lxchar);
933: switch(get(lxchar)) {
934: case 'l':
935: case 'L':
936: pch(lxchar);
937: break;
938: default:
939: saved=lxchar;
940: break;
941: }
942: break;
943: case 'l':
944: case 'L':
945: pch(lxchar);
946: if (ret==FCON) {
947: break;
948: }
949: switch(get(lxchar)) {
950: case 'u':
951: case 'U':
952: pch(lxchar);
953: break;
954: default:
955: saved=lxchar;
956: break;
957: }
958: break;
959: default:
960: saved = lxchar;
961: break;
962: };
963:
964: if(*txtstart=='0' && txtfree-txtstart==1)
965: reti(ZERO,0); // plain zero
966:
967: pch(0);
968: rets(ret,txtstart);
969:
970:
971: case A_DOT:
972: switch (get(lxchar)) {
973: case '.': // look for ellipsis
974: if (get(lxchar) != '.') {
975: error("token .. ?");
976: saved = lxchar;
977: }
978: reti(ELLIPSIS,0);
979: case '*':
980: reti (REFMUL,DOT);
981: }
982:
983: if (lxmask[lxchar+1] & LEXDIG){// look for floating constant
984: unget(lxchar);
985: goto getfp;
986: }
987: saved = lxchar;
988: reti(DOT,0);
989:
990: case A_STR:
991: /* save string constant in buffer */
992: get_string();
993: rets(STRING,txtstart);
994:
995: case A_CC:
996: /* character constant */
997: rets(CCON,chconst());
998:
999: case A_BCD:
1000: {
1001: register i;
1002: int j;
1003:
1004: pch('`');
1005:
1006: for (i=0; i<7; ++i) {
1007: pch(get(j));
1008: if (j == '`' ) break;
1009: }
1010: pch(0);
1011: if (6<i)
1012: error('l',"bcd constant exceeds 6 characters" );
1013: rets(CCON,txtstart);
1014: }
1015:
1016: case A_SL: /* / */
1017: switch (get(lxchar)) {
1018: case '*':
1019: lxcom();
1020: break;
1021: case '/':
1022: linecom();
1023: break;
1024: case '=':
1025: reti(ASOP,ASDIV);
1026: default:
1027: saved = lxchar;
1028: reti(DIVOP,DIV);
1029: }
1030:
1031: case A_WS:
1032: continue;
1033:
1034: case A_NL:
1035: ++tloc.line;
1036: // Nline++;
1037: saved = lxtitle();
1038: continue;
1039:
1040: case A_LC:
1041: if (BLMAX <= b_level++) {
1042: error('l',"blocks too deeply nested");
1043: ext(3);
1044: }
1045: retl(LC);
1046:
1047: case A_RC:
1048: if (lcount+b_level-- <= 0) {
1049: error("unexpected '}'");
1050: b_level = 0;
1051: }
1052: retl(RC);
1053:
1054: case A_L:
1055: p_level++;
1056: reti(LP,0);
1057:
1058: case A_R:
1059: if (p_level-- <= 0) {
1060: error("unexpected ')'");
1061: p_level = 0;
1062: }
1063: reti(RP,0);
1064:
1065: case A_ASS:
1066: switch (get(lxchar)) {
1067: case '=':
1068: reti(EQUOP,EQ);
1069: default:
1070: saved = lxchar;
1071: reti(ASSIGN,ASSIGN);
1072: }
1073:
1074: case A_COL:
1075: switch (get(lxchar)) {
1076: case ':':
1077: reti(MEM,0);
1078: case '=':
1079: error("':=' is not a c++ operator");
1080: reti(ASSIGN,ASSIGN);
1081: default:
1082: saved = lxchar;
1083: reti(COLON,COLON);
1084: }
1085: case A_NOT:
1086: switch (get(lxchar)) {
1087: case '=':
1088: reti(EQUOP,NE);
1089: default:
1090: saved = lxchar;
1091: reti(NOT,NOT);
1092: }
1093: case A_GT:
1094: switch(get(lxchar)) {
1095: case '>':
1096: switch (get(lxchar)) {
1097: case '=':
1098: reti(ASOP,ASRS);
1099: break;
1100: default:
1101: saved = lxchar;
1102: reti(SHIFTOP,RS);
1103: }
1104: case '=':
1105: reti(RELOP,GE);
1106: default:
1107: saved = lxchar;
1108: reti(GT,GT);
1109: }
1110: case A_LT:
1111: switch (get(lxchar)) {
1112: case '<':
1113: switch (get(lxchar)) {
1114: case '=':
1115: reti(ASOP,ASLS);
1116: default:
1117: saved = lxchar;
1118: reti(SHIFTOP,LS);
1119: }
1120: case '=':
1121: reti(RELOP,LE);
1122: default:
1123: saved = lxchar;
1124: reti(LT,LT);
1125: }
1126: case A_AND:
1127: switch (get(lxchar)) {
1128: case '&':
1129: reti(ANDAND,ANDAND);
1130: case '=':
1131: reti(ASOP,ASAND);
1132: default:
1133: saved = lxchar;
1134: reti(AND,AND);
1135: }
1136: case A_OR:
1137: switch (get(lxchar)) {
1138: case '|':
1139: reti(OROR,OROR);
1140: case '=':
1141: reti(ASOP,ASOR);
1142: default:
1143: saved = lxchar;
1144: reti(OR,OR);
1145: }
1146: case A_ER:
1147: switch (get(lxchar)) {
1148: case '=':
1149: reti(ASOP,ASER);
1150: default:
1151: saved = lxchar;
1152: reti(ER,ER);
1153: }
1154: case A_PL:
1155: switch (get(lxchar)) {
1156: case '=':
1157: reti(ASOP,ASPLUS);
1158: case '+':
1159: reti(ICOP,INCR);
1160: default:
1161: saved = lxchar;
1162: reti(PLUS,PLUS);
1163: }
1164: case A_MIN:
1165: switch (get(lxchar)) {
1166: case '=':
1167: reti(ASOP,ASMINUS);
1168: case '-':
1169: reti(ICOP,DECR);
1170: case '>':
1171: if (get(lxchar) == '*')
1172: {reti(REFMUL,REF);}
1173: else
1174: saved = lxchar;
1175: reti(REF,REF);
1176: default:
1177: saved = lxchar;
1178: reti(MINUS,MINUS);
1179: }
1180: case A_MUL:
1181: switch (get(lxchar)) {
1182: case '=':
1183: reti(ASOP,ASMUL);
1184: case '/':
1185: error('w',"*/ not as end of comment");
1186: default:
1187: saved = lxchar;
1188: reti(MUL,MUL);
1189: }
1190: case A_MOD:
1191: switch (get(lxchar)) {
1192: case '=':
1193: reti(ASOP,ASMOD);
1194: default:
1195: saved = lxchar;
1196: reti(DIVOP,MOD);
1197: }
1198: default:
1199: {error('i',"lex act==%d getc()->%d",p,lxchar);}
1200:
1201: }
1202:
1203: error('i',"lex, main switch");
1204: }
1205:
1206: }
1207:
1208: int lxtitle()
1209: /*
1210: called after a newline; set linenumber and file name
1211: */
1212: {
1213: register c;
1214:
1215: for(;;)
1216: switch ( get(c) ) {
1217: default: // e.g. not '\n', not '#'
1218: return c;
1219: case '\n':
1220: tloc.line++;
1221: // Nline++;
1222: ll:
1223: break;
1224: case '#': /* # lineno "filename" */
1225: { int cl = tloc.line;
1226: tloc.line = 0;
1227: for(;;)
1228: switch (get(c)) {
1229: case '"':
1230: start_txt();
1231: for(;;)
1232: switch (get(c)) {
1233: case '"':
1234: pch('\0');
1235:
1236: while (get(c) != '\n') ; // skip to eol.. ignore anything more
1237:
1238: if (*txtstart) { // stack file name
1239: char* fn;
1240: if (tcurr_file == 0){
1241: if (( fn = file_name[0])
1242: && (strcmp(txtstart,fn)!=0)){ // 1st include
1243: if (MAXFILE*4<++Nfile) error('i',"fileN buffer overflow");
1244: if (MAXFILE<++tcurr_file) error('i',"fileN stack overflow");
1245: file_stack[tcurr_file] = Nfile;
1246:
1247: char* p1 = new char[txtfree-txtstart];
1248: (void) strcpy(p1,txtstart);
1249: file_name[Nfile] = p1;
1250: // Nstr++;
1251: }
1252: else { //&& line is dummy #line "input.c"
1253: // ignore
1254: }
1255: //&& dead, dead, dead goto push;
1256: }
1257: else if ( (fn=file_name[file_stack[tcurr_file]])
1258: && (strcmp(txtstart,fn)==0) ) {
1259: //new line, same file: ignore
1260: }
1261: else if ( (fn=file_name[file_stack[tcurr_file-1]])
1262: && (strcmp(txtstart,fn)==0) ) {
1263: // previous file: pop
1264: tcurr_file--;
1265: }
1266: else { // new file name: push
1267: //&& push:
1268: if (MAXFILE*4<Nfile++) error('i',"fileN buffer overflow");
1269: if (MAXFILE<tcurr_file++) error('i',"fileN stack overflow");
1270: file_stack[tcurr_file] = Nfile;
1271: char* p = new char[txtfree-txtstart];
1272: (void) strcpy(p,txtstart);
1273: file_name[Nfile] = p;
1274: // Nstr++;
1275: }
1276: }
1277: else { // no name .. back to the original .c file: ""
1278: tcurr_file = 0;
1279: }
1280: del_txt();
1281: tloc.file = file_stack[tcurr_file];
1282: goto ll;
1283: case '\n':
1284: error("unexpected end of line on '# line'");
1285: default:
1286: pch(c);
1287: }
1288: case ' ':
1289: break;
1290:
1291: case '0':
1292: case '1':
1293: case '2':
1294: case '3':
1295: case '4':
1296: case '5':
1297: case '6':
1298: case '7':
1299: case '8':
1300: case '9':
1301: tloc.line = tloc.line*10+c-'0';
1302: break;
1303:
1304: case 'l': // look for "#line ..." and then ignore "line"
1305: if (get(c)=='i' && get(c)=='n' && get(c)=='e') break;
1306: case '\n':
1307: tloc.putline();
1308: goto ll;
1309:
1310: default: // pass #rubbish through
1311: tloc.line = cl;
1312: pch('#');
1313: pch(c);
1314: while (get(c) != '\n') pch(c);
1315: pch('\0');
1316: fprintf(out_file,"\n%s\n",txtstart);
1317: start_txt();
1318: tloc.line++;
1319: // Nline++;
1320: goto ll;
1321: }
1322: }
1323: }
1324: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.