|
|
1.1 root 1: /*ident "@(#)ctrans:src/lex.c 1.5" */
2: /***************************************************************************
3:
4: C++ source for cfront, the C++ compiler front-end
5: written in the computer science research center of Bell Labs
6:
7: Copyright (c) 1984 AT&T, Inc. All Rights Reserved
8: THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T, INC.
9:
10: lex.c:
11: lexical analyser based on pcc's and cpre's scanners
12: modified to handle classes:
13: new keywords: class
14: public
15: call
16: etc.
17: names are not entered in the symbol table by lex()
18: names can be of arbitrary length
19: error() is used to report errors
20: {} and () must match
21: numeric constants are not converted into internal representation
22: but stored as strings
23:
24: ****************************************************************************/
25:
26: #include "cfront.h"
27: #include "yystype.h"
28: #include "size.h"
29: #include "tqueue.h"
30: #include "template.h"
31:
32: # define CCTRANS(x) x
33:
34: #ifndef NULL
35: #define NULL 0
36: #endif
37:
38: char* strdup(const char* s1)
39: /* string duplication
40: returns pointer to a new string which is the duplicate of string
41: pointed to by s1
42: NULL is returned if new string can't be created
43: */
44: {
45: char * s2;
46:
47: s2 = malloc((unsigned) strlen(s1)+1) ;
48: return(s2==NULL ? NULL : strcpy(s2,s1) );
49: }
50:
51: #define copy_if_need_be(s) ((templp->in_progress || templp->parameters_in_progress) ? strdup(s) : (s))
52:
53: /* lexical actions */
54:
55: #define A_ERR 0 /* illegal character */
56: #define A_LET 1 /* saw a letter */
57: #define A_DIG 2 /* saw a digit */
58: #define A_1C 3 /* return a single character */
59: #define A_STR 4 /* string */
60: #define A_CC 5 /* character constant */
61: #define A_BCD 6 /* GCOS BCD constant */
62: #define A_SL 7 /* saw a / */
63: #define A_DOT 8 /* saw a . */
64: #define A_2C 9 /* possible two character symbol */
65: #define A_WS 10 /* whitespace (not \n) */
66: #define A_NL 11 /* \n */
67: #define A_LC 12 /* { */
68: #define A_RC 13 /* } */
69: #define A_L 14 /* ( */
70: #define A_R 15 /* ) */
71: #define A_EOF 16
72: #define A_ASS 17
73: #define A_LT 18
74: #define A_GT 19 /* > */
75: #define A_ER 20
76: #define A_OR 21
77: #define A_AND 22
78: #define A_MOD 23
79: #define A_NOT 24
80: #define A_MIN 25
81: #define A_MUL 26
82: #define A_PL 27
83: #define A_COL 28 /* : */
84: #define A_SHARP 29 /* # */
85: #define A_DOLL 30 /* $ */
86:
87: /* character classes */
88:
89: # define LEXLET 01
90: # define LEXDIG 02
91: /* no LEXOCT because 8 and 9 used to be octal digits */
92: # define LEXHEX 010
93: # define LEXWS 020
94: # define LEXDOT 040
95:
96: const FIRSTCHUNK = 8*1024-8;
97: const BUFCHUNK = 4*1024-8;
98:
99: /* text buffer */
100: static char inbuf[FIRSTCHUNK/*TBUFSZ*/];
101: static char* txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1];
102: static char* txtstart = 0;
103: static char* txtfree = 0;
104:
105: static struct buf* bufhead;
106: static buf* freebuf;
107: //static bufs;
108:
109: struct buf {
110: buf* next;
111: char chars[BUFCHUNK];
112: // buf() { next=bufhead; bufhead=this; }
113: };
114:
115: new_buf(char c)
116: {
117: //fprintf(stderr,"new_buf %d\n",bufs++);
118: buf* pbuf;
119: if (freebuf) {
120: pbuf = freebuf;
121: freebuf = freebuf->next;
122: }
123: else
124: pbuf = new buf; // allocate and register new chunk
125: pbuf->next = bufhead;
126: bufhead = pbuf;
127:
128: if (BUFCHUNK < txtmax-txtstart) error('l',"lexical token too long");
129:
130: // copy current token:
131: char* p = txtstart;
132: txtstart = txtfree = &pbuf->chars[0];
133: while (p<txtmax) *txtfree++ = *p++;
134: *txtfree++=c;
135: txtmax = &pbuf->chars[BUFCHUNK-1];
136: return 0;
137: }
138:
139:
140: #define pch(c) ((txtmax<=txtfree)?new_buf(c):(*txtfree++=c))
141: #define start_txt() txtstart = txtfree
142: #define del_txt() txtfree = txtstart
143:
144: static int Nfile;// = 1;
145: static char* file_name[MAXFILE*4]; // source file names
146: // file_name[0] == src_file_name
147: // file_name[0] == 0 means stdin
148: static short file_stack[MAXFILE]; // stack of file name indices
149: static int tcurr_file; // current index in file_stack
150: // that is current #include nest level
151:
152: Linkage linkage = linkage_default; // linkage is default C++
153: const LINKMAX = 10;
154: static Linkage lvec[LINKMAX] = { linkage_default };
155: static int lcount = 0;
156:
157: void set_linkage(char* p)
158: {
159: if (p==0 || *p == 0) { // resume previous linkage
160: if (lcount>0) linkage = lvec[--lcount];
161: }
162: else {
163: if (LINKMAX<=++lcount) {
164: error('l',"linkage directive nested too deep");
165: --lcount;
166: } else if (strcmp(p,"C")==0)
167: lvec[lcount] = linkage = linkage_C;
168: else if (strcmp(p,"C++")==0)
169: lvec[lcount] = linkage = linkage_Cplusplus;
170: else {
171: error("%s linkage",p);
172: --lcount;
173: }
174: }
175: }
176:
177: static struct loc tloc;
178: FILE * out_file = stdout;
179: FILE * in_file = stdin;
180: Ptable ktbl;
181: Ptable keyword_table;
182:
183: static int p_level = 0; /* number of unmatched ``(''s */
184: static int b_level = 0; /* number of unmatched ``{''s */
185:
186: # ifdef ibm
187:
188: # define CSMASK 0377
189: # define CSSZ 256
190:
191: # else
192:
193: # define CSMASK 0177
194: # define CSSZ 128
195:
196: # endif
197:
198: static short lxmask[CSSZ+1];
199:
200: int saved = 0; /* putback character, avoid ungetchar */
201: static int lxtitle();
202:
203: // overload rt;
204: inline YYSTYPE rt(char* x) { YYSTYPE y; y.s = x; return y; }
205: inline YYSTYPE rt(int x) { YYSTYPE y; y.t = x; return y; }
206: inline YYSTYPE rt(loc x) { YYSTYPE y; y.l = x; return y; }
207: inline YYSTYPE rt(void* x) { YYSTYPE y; y.pn = Pname(x); return y; }
208:
209: #define get(c) (c=getc(in_file))
210: #define unget(c) ungetc(c,in_file)
211:
212: #define reti(a,b) { addtok(a, rt(b), tloc); return a; }
213: #define retn(a,b) { addtok(a, rt((Pnode)b), tloc); return a; }
214: #define rets(a,b) { addtok(a, rt(b), tloc); return a; }
215: #define retl(a) { addtok(a, rt(tloc), tloc); return a; }
216:
217: // keys[] holds the external form for tokens with fixed representation
218: // illegal tokens and those with variable representation have 0 entries
219: char* keys[MAXTOK+1];
220:
221: static void
222: new_key(char* s, TOK toknum, TOK yyclass)
223: /*
224: make "s" a new keyword with the representation (token) "toknum"
225: "yyclass" is the yacc token (for example new_key("int",INT,TYPE); )
226: "yyclass==0" means yyclass=toknum;
227: */
228: {
229: Pname n = new name(s);
230:
231: keys[(toknum==LOC)?yyclass:toknum] = s;
232: n = new name(s);
233: Pname nn = keyword_table->insert(n,0);
234: // if (Nold) error('i',"keyword %sD twice",s);
235: nn->base = toknum;
236: nn->syn_class = (yyclass) ? yyclass : toknum;
237: delete n;
238: }
239:
240: const int keyword_count = 67;
241: static void
242: ktbl_init()
243: /*
244: enter keywords into keyword table for use by lex()
245: and into keyword representation table used for output
246:
247: ktbl is only for types. We put nothing in it.
248: keyword_table is for user-defined reserved words
249: */
250: {
251: ktbl = new table(KTBLSIZE,0,0);
252: keyword_table = new table(keyword_count,0,0);
253:
254: new_key("asm",ASM,0);
255: new_key("auto",AUTO,TYPE);
256: new_key("break",LOC,BREAK);
257: new_key("case",LOC,CASE);
258: new_key("continue",LOC,CONTINUE);
259: new_key("char",CHAR,TYPE);
260: new_key("do",LOC,DO);
261: new_key("double",DOUBLE,TYPE);
262: new_key("default",LOC,DEFAULT);
263: new_key("enum",ENUM,0);
264: new_key("else",LOC,ELSE);
265: new_key("extern",EXTERN,TYPE);
266: new_key("float",FLOAT,TYPE);
267: new_key("for",LOC,FOR);
268: new_key("goto",LOC,GOTO);
269: new_key("catch",CATCH,CATCH);
270: new_key("try",TRY,TRY);
271: new_key("if",LOC,IF);
272: new_key("int",INT,TYPE);
273: new_key("long",LONG,TYPE);
274: new_key("return",LOC,RETURN);
275: new_key("register",REGISTER,TYPE);
276: new_key("static",STATIC,TYPE);
277: new_key("struct",STRUCT,AGGR);
278: new_key("sizeof",SIZEOF,0);
279: new_key("short",SHORT,TYPE);
280: new_key("switch",LOC,SWITCH);
281: new_key("typedef",TYPEDEF,TYPE);
282: new_key("unsigned",UNSIGNED,TYPE);
283: new_key("union",UNION,AGGR);
284: new_key("void",VOID,TYPE);
285: new_key("while",LOC,WHILE);
286:
287: new_key("class",CLASS,AGGR);
288: new_key("const",CONST,TYPE);
289: new_key("delete",LOC,DELETE);
290: new_key("friend",FRIEND,TYPE);
291: new_key("inline",INLINE,TYPE);
292: new_key("new",NEW,0);
293: new_key("operator",OPERATOR,0);
294: new_key("overload",OVERLOAD,TYPE);
295: new_key("private",PRIVATE,PR);
296: new_key("protected",PROTECTED,PR);
297: new_key("public",PUBLIC,PR);
298: new_key("signed",SIGNED,TYPE);
299: new_key("template",TEMPLATE,0);
300: new_key("this",THIS,0);
301: new_key("virtual",VIRTUAL,TYPE);
302: new_key("volatile",VOLATILE,TYPE);
303:
304: new_key("__statement", STATEMENT, 0) ;
305: new_key("__expression", EXPRESSION, 0) ;
306: new_key("__template_test", TEMPLATE_TEST, 0) ;
307: }
308:
309: loc last_line;
310: loc noloc = { 0, 0 };
311:
312: void loc::putline()
313: {
314: if (file==0 && line==0) return;
315: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4
316: // if (0<=file && file<MAXFILE) {
317: if ( 0<=file && file <= Nfile ) {
318: char* f = file_name[file];
319: if (f==0) f = src_file_name;
320: fprintf(out_file,line_format,line,f);
321: last_line = *this;
322: }
323: }
324:
325: void loc::put(FILE* p)
326: {
327: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4
328: // if (0<=file && file<MAXFILE) {
329: if ( 0<=file && file <= Nfile ) {
330: char* f = file_name[file];
331: if (f==0) f = src_file_name;
332: fprintf(p,"\"%s\", line %d: ",f,line);
333: }
334: }
335:
336: void lxenter(register char* s, short m)
337: /* enter a mask into lxmask */
338: {
339: register c;
340:
341: while( c= *s++ ) lxmask[c+1] |= m;
342:
343: }
344:
345:
346: void lxget(register c, register m)
347: /*
348: put 'c' back then scan for members of character class 'm'
349: terminate the string read with \0
350: txtfree points to the character position after that \0
351: */
352: {
353: pch(c);
354: while ( (get(c), lxmask[c+1]&m) ) pch(c);
355: unget(c);
356: pch('\0');
357: }
358:
359: struct LXDOPE {
360: short lxch; /* the character */
361: short lxact; /* the action to be performed */
362: TOK lxtok; /* the token number to be returned */
363: } lxdope[] = {
364: #ifdef apollo
365: '@', A_ERR, 0, /* illegal characters go here... */
366: #else
367: '$', A_DOLL, 0,
368: // '$', A_ERR, 0, /* illegal characters go here... */
369: #endif
370: '_', A_LET, 0, /* letters point here */
371: '0', A_DIG, 0, /* digits point here */
372: ' ', A_WS, 0, /* whitespace goes here */
373: '\n', A_NL, 0,
374: '"', A_STR, 0, /* character string */
375: '\'', A_CC, 0, /* ASCII character constant */
376: '`', A_BCD, 0, /* 'foreign' character constant, e.g. BCD */
377: '(', A_L, LP,
378: ')', A_R, RP,
379: '{', A_LC, LC,
380: '}', A_RC, RC,
381: '[', A_1C, LB,
382: ']', A_1C, RB,
383: '*', A_MUL, MUL,
384: '?', A_1C, QUEST,
385: ':', A_COL, COLON,
386: '+', A_PL, PLUS,
387: '-', A_MIN, MINUS,
388: '/', A_SL, DIV,
389: '%', A_MOD, MOD,
390: '&', A_AND, AND,
391: '|', A_OR, OR,
392: '^', A_ER, ER,
393: '!', A_NOT, NOT,
394: '~', A_1C, COMPL,
395: ',', A_1C, CM,
396: ';', A_1C, SM,
397: '.', A_DOT, DOT,
398: '<', A_LT, LT,
399: '>', A_GT, GT,
400: '=', A_ASS, ASSIGN,
401: '#', A_SHARP, 0,
402: EOF, A_EOF, EOFTOK
403: };
404: /* note: EOF is used as sentinel, so must be <=0 and last entry in table */
405:
406: static struct LXDOPE *lxcp[CSSZ+1];
407:
408: void
409: lex_init()
410: {
411: register struct LXDOPE *p;
412: register i;
413: register char *cp;
414: /* set up character classes */
415:
416: /* first clear lexmask */
417: for(i=0; i<=CSSZ; i++) lxmask[i] = 0;
418:
419: #ifdef apollo
420: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$", LEXLET );
421: #else
422: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", LEXLET );
423: #endif
424: lxenter( "0123456789", LEXDIG );
425: lxenter( "0123456789abcdefABCDEF", LEXHEX );
426: /* \013 should become \v someday; \013 is OK for ASCII and EBCDIC */
427: lxenter( " \t\r\b\f\013", LEXWS );
428: lxmask['.'+1] |= LEXDOT;
429:
430: /* make lxcp point to appropriate lxdope entry for each character */
431:
432: /* initialize error entries */
433:
434: for( i= 0; i<=CSSZ; ++i ) lxcp[i] = lxdope;
435:
436: /* make unique entries */
437:
438: for( p=lxdope; ; ++p ) {
439: lxcp[p->lxch+1] = p;
440: if( p->lxch < 0 ) break;
441: }
442:
443: /* handle letters, digits, and whitespace */
444: /* by convention, first, second, and third places */
445:
446: cp = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
447: while( *cp ) lxcp[*cp++ + 1] = &lxdope[1];
448: cp = "123456789";
449: while( *cp ) lxcp[*cp++ + 1] = &lxdope[2];
450: cp = "\t\b\r\f\013";
451: while( *cp ) lxcp[*cp++ + 1] = &lxdope[3];
452:
453: file_name[0] = src_file_name;
454: // set both curloc and tloc so curloc is valid at program startup
455: // curloc.file = tloc.file = 0;
456: curloc.line = tloc.line = 1;
457:
458: ktbl_init();
459: lex_clear();
460: saved = lxtitle();
461: }
462:
463: void lex_clear()
464: {
465: // delete extra buffers:
466: buf* p = bufhead;
467: bufhead = 0;
468: //if (p) {
469: //fprintf(stderr,"lex_clear\n");
470: //bufs=0;
471: //}
472: while (p) {
473: buf* pp = p;
474: p = p->next;
475: pp->next = freebuf;
476: freebuf = pp;
477: }
478:
479: // re-set to static buffer:
480: txtstart = txtfree = inbuf;
481: txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1];
482: }
483:
484: int int_val(char hex)
485: {
486: switch (hex) {
487: case '0': case '1': case '2': case '3': case '4':
488: case '5': case '6': case '7': case '8': case '9':
489: return hex-'0';
490: case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
491: return hex-'a'+10;
492: case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
493: return hex-'A'+10;
494: }
495: }
496:
497: void hex_to_oct()
498: /*
499: \x has been seen on input (in char const or string) and \ printed
500: read the following hexadecimal integer and replace it with an octal
501: */
502: {
503: int i = 0;
504: int c;
505: get(c);
506: if (lxmask[c+1] & LEXHEX) {
507: i = int_val(c);
508: get(c); // try for two
509: if (lxmask[c+1] & LEXHEX) {
510: i = (i<<4) + int_val(c);
511: get(c); // try for three
512: if (lxmask[c+1] & LEXHEX)
513: i = (i<<4) + int_val(c);
514: else
515: unget(c);
516: }
517: else
518: unget(c);
519: }
520: else {
521: error("hexadecimal digitE after \\x");
522: unget(c);
523: }
524:
525: // if (0377 < i) error('l',"hexadecimal constant too large");
526: i &= 0377;
527:
528: pch(('0'+(i>>6)));
529: pch(('0'+((i&070)>>3)));
530: pch(('0'+(i&7)));
531: }
532:
533:
534: char * chconst()
535: /*
536: read a character constant into inbuf
537: */
538: {
539: register c;
540: int nch = 0;
541:
542: pch('\'');
543:
544: for(;;) {
545: char* p;
546: char cc = 0;
547:
548: switch (get(c)) {
549: case '\'':
550: goto ex;
551: case EOF:
552: error("eof in char constant");
553: goto ex;
554: case '\n':
555: error("newline in char constant");
556: goto ex;
557: case '\\':
558: if (SZ_INT == nch++) error('l',"char constant too long");
559: pch(c);
560: switch (get(c)){
561: case '\n':
562: ++tloc.line;
563: default:
564: pch(c);
565: break;
566: case '4': case '5': case '6': case '7': // octal
567: p = txtfree;
568: cc = c-4;
569: case '0': case '1': case '2': case '3':
570: pch(c);
571: get(c); /* try for 2 */
572: if( lxmask[c+1] & LEXDIG && c<'8'){
573: pch(c);
574: get(c); /* try for 3 */
575: if (lxmask[c+1] & LEXDIG && c<'8') {
576: if (cc) *p = cc; // zap high bit
577: pch(c);
578: }
579: else
580: unget(c);
581: }
582: else
583: unget(c);
584: break;
585: case 'x': // hexadecimal
586: hex_to_oct();
587: break;
588: };
589: break;
590: default:
591: if (SZ_INT == nch++) error('l',"char constant too long");
592: pch(c);
593: }
594: }
595: ex:
596: if(nch==0)
597: error("empty char constant");
598: pch('\'');
599: pch('\0');
600: return txtstart;
601: }
602:
603: void lxcom()
604: /* process a "block comment" */
605: {
606: register c;
607:
608: for(;;)
609: switch (get(c)) {
610: case EOF:
611: error('w',"eof in comment");
612: return;
613: case '\n':
614: tloc.line++;
615: // Nline++;
616: break;
617: case '*':
618: if (get(c) == '/') return;
619: unget(c);
620: break;
621: case '/':
622: if (get(c) == '*') error('w',"``/*'' in comment");
623: unget(c);
624: break;
625: }
626: }
627:
628:
629: void linecom()
630: // process a "line comment"
631: {
632: register c;
633:
634: get(c);
635: #ifdef DBG
636: if ( c=='@' && get(c)=='!' ) {
637: while ( get(c) != '\n' && c != EOF ) pch(c);
638: pch('\0');
639: process_debug_flags(txtstart);
640: del_txt();
641: }
642: #endif
643: for(;;get(c))
644: switch (c) {
645: case EOF:
646: error('w',"eof in comment");
647: return;
648: case '\n':
649: tloc.line++;
650: // Nline++;
651: saved = lxtitle();
652: return;
653: }
654: }
655:
656: char eat_whitespace()
657: {
658:
659: for(;;) {
660: register c = get(c);
661: lx:
662:
663: switch (c) {
664: case EOF:
665: error('w',"unexpected comment");
666: return EOF;
667: case '/':
668: switch (get(c)) {
669: case '*':
670: lxcom();
671: break;
672: case '/':
673: linecom();
674: break;
675: default:
676: unget(c);
677: return '/';
678: }
679: break;
680: case '\n':
681: ++tloc.line;
682: c = lxtitle();
683: goto lx;
684: case ' ':
685: case '\t':
686: break;
687: default:
688: return c;
689: }
690: }
691: }
692:
693: void get_string()
694: {
695: int lxchar;
696:
697: for(;;)
698: switch (get(lxchar)) {
699: case '\\':
700: pch('\\');
701: switch (get(lxchar)){
702: case '\n':
703: ++tloc.line;
704: default:
705: pch(lxchar);
706: break;
707: case 'x': // hexadecimal
708: hex_to_oct();
709: break;
710: };
711: break;
712: case '"':
713: { char* p = txtstart; // eat_whitespace() moves txtstart
714: if ((lxchar = eat_whitespace()) == '"') {
715: // string catenation, break with
716: // newline to avoid merging characters
717: // (e.g. "\xAB" "C")
718: pch('\\');
719: pch('\n');
720:
721: continue; // eat '\"' and carry on
722: };
723:
724: txtstart = p;
725: unget(lxchar);
726: pch(0);
727: return;
728: }
729: case '\n':
730: error("newline in string");
731: pch(0);
732: return;
733: case EOF:
734: error("eof in string");
735: pch(0);
736: return;
737: default:
738: pch(lxchar);
739: }
740: }
741:
742: TOK tlex()
743: {
744: TOK ret;
745: Pname n;
746:
747: // Ntoken++;
748:
749: for(;;) {
750: register lxchar;
751: register struct LXDOPE *p;
752:
753: start_txt();
754:
755: if (saved) {
756: lxchar = saved;
757: saved = 0;
758: }
759: else
760: get(lxchar);
761:
762: if (lxchar+1 >= CSSZ )
763: error( "illegal input character encountered: %d", lxchar );
764:
765: switch( (p=lxcp[lxchar+1])->lxact ){
766:
767: case A_1C: // eat up a single character, and return an opcode
768: reti(p->lxtok,p->lxtok);
769:
770: case A_EOF:
771: if (p_level || b_level+lcount)
772: error("'%s' missing at end of input",(b_level+lcount) ? "}" : ")");
773:
774: reti(EOFTOK,0);
775:
776: case A_SHARP:
777: // cope with header file not ended with '\n'
778: unget('#');
779: saved = lxtitle();
780: continue;
781:
782: case A_ERR:
783: { if (' '<=lxchar && lxchar<='~') // ASCII printable
784: error("illegal character '%c' (ignored)",lxchar);
785: else
786: error("illegal character '0%o' (ignored)",lxchar);
787: continue;
788: }
789:
790: case A_DOLL:
791: { // lex a name of the for $id for template tree formals
792: Pname fn ;
793: lxget( lxchar, LEXLET|LEXDIG ) ;
794:
795: if (!templp->in_progress || !txtstart[1]) {
796: // no name string immediately follows, treat it
797: // like an illegal character
798: error("illegal character '0%o' (ignored)",lxchar);
799: continue;
800: }
801:
802: txtstart++ ;
803: if(fn=templ_compilation::tree_parameter(txtstart)) {
804: switch (fn->n_template_arg) {
805: case template_expr_tree_formal:
806: // retain the $ in the name
807: retn(ID, strdup(--txtstart)) ;
808:
809: case template_stmt_tree_formal:
810: retn(SM_PARAM, fn) ;
811: }
812: }
813: error("%s wasn't a statement or expression formal", txtstart);
814: rets(ID, copy_if_need_be(txtstart));
815: }
816:
817: case A_LET: // collect an identifier and check for keyword
818: {
819: char ll;
820: switch (ll = lxchar) {
821: // case 'l':
822: case 'L':
823: switch (get(lxchar)) {
824: case '\'':
825: error('s',"wide character constant");
826: unget(lxchar);
827: continue;
828: case '"':
829: error('s',"wide character string");
830: unget(lxchar);
831: continue;
832: }
833: unget(lxchar);
834: lxchar = ll;
835: }
836: }
837:
838: lxget( lxchar, LEXLET|LEXDIG );
839: /* look for a keyword or a global type */
840: if ((n = keyword_table->look(txtstart,0)) /* keyword */
841: || (n = ktbl->look(txtstart, 0))) /* local type */
842: {
843: TOK x;
844: del_txt();
845: switch (x=n->base) {
846: case TNAME:
847: rets(ID,n->string);
848: case LOC:
849: retl(n->syn_class);
850: case EXTERN:
851: if ((lxchar = eat_whitespace()) == '\"') {
852: // linkage directive
853: get_string();
854: rets(LINKAGE,txtstart);
855: }
856: unget(lxchar);
857: reti(TYPE,EXTERN);
858: case CATCH:
859: // case TEMPLATE:
860: error('s',"%k",n->syn_class);
861: continue;
862: case TRY:
863: {
864: static int warn_try;
865: if (!warn_try) {
866: Pname n = keyword_table->look("try",0);
867: n->n_key = DEFAULT;
868: error('w',&tloc,"%k is a future reserved keyword",n->syn_class);
869: warn_try++;
870: }
871: rets(ID,n->string);
872: }
873: default:
874: reti(n->syn_class,x);
875: }
876: }
877: // rets(ID,txtstart);
878: rets(ID, copy_if_need_be(txtstart)) ;
879:
880: case A_DIG:
881:
882: ret = ICON;
883:
884: if (lxchar=='0') {
885: int pkchar;
886: get(pkchar);
887: if(pkchar=='x' || pkchar=='X') { // hex
888: pch(lxchar);
889: lxget(pkchar,LEXHEX);
890: txtfree--;
891: if (txtfree-txtstart<3) // minimum "0Xd\0"
892: error("hex digitX after \"0x\"");
893: get(lxchar);
894: goto getsuffix;
895: }
896: unget(pkchar);
897: }
898:
899: lxget(lxchar,LEXDIG);
900: txtfree--;
901:
902: if (get(lxchar) == '.') {
903: getfp:
904: lxget('.', LEXDIG );
905: txtfree--;
906: ret = FCON;
907: get(lxchar);
908: };
909:
910: if (lxchar=='e' || lxchar=='E') {
911: pch(lxchar);
912: get(lxchar);
913: if(lxchar=='-' || lxchar=='+') {
914: pch(lxchar);
915: get(lxchar);
916: }
917: if (lxmask[lxchar+1] & LEXDIG) {
918: lxget( lxchar, LEXDIG );
919: txtfree--;
920: get(lxchar);
921: }
922: else
923: error("missing exponent digits?");
924: ret = FCON;
925: };
926:
927: if(*txtstart=='0' && ret==ICON) {
928: char *bch = txtstart;
929: while (++bch <= txtfree) {
930: if(*bch=='8' || *bch=='9')
931: error("%c used as octal digit",*bch);
932: }
933: }
934:
935: getsuffix:
936: switch (lxchar) {
937: case 'f':
938: case 'F':
939: if (ret==ICON)
940: error("%c suffix for integer constant",lxchar);
941: else
942: pch(lxchar);
943: break;
944: case 'u':
945: case 'U':
946: if (ret==FCON) {
947: error("%c suffix for floating constant",lxchar);
948: break;
949: }
950: pch(lxchar);
951: switch(get(lxchar)) {
952: case 'l':
953: case 'L':
954: pch(lxchar);
955: break;
956: default:
957: saved=lxchar;
958: break;
959: }
960: break;
961: case 'l':
962: case 'L':
963: pch(lxchar);
964: if (ret==FCON) {
965: break;
966: }
967: switch(get(lxchar)) {
968: case 'u':
969: case 'U':
970: pch(lxchar);
971: break;
972: default:
973: saved=lxchar;
974: break;
975: }
976: break;
977: default:
978: saved = lxchar;
979: break;
980: };
981:
982: if(*txtstart=='0' && txtfree-txtstart==1)
983: reti(ZERO,0); // plain zero
984:
985: pch(0);
986: rets(ret,txtstart);
987:
988:
989: case A_DOT:
990: switch (get(lxchar)) {
991: case '.': // look for ellipsis
992: if (get(lxchar) != '.') {
993: error("token .. ?");
994: saved = lxchar;
995: }
996: reti(ELLIPSIS,0);
997: case '*':
998: reti (REFMUL,DOT);
999: }
1000:
1001: if (lxmask[lxchar+1] & LEXDIG){// look for floating constant
1002: unget(lxchar);
1003: goto getfp;
1004: }
1005: saved = lxchar;
1006: reti(DOT,0);
1007:
1008: case A_STR:
1009: /* save string constant in buffer */
1010: get_string();
1011: rets(STRING,txtstart);
1012:
1013: case A_CC:
1014: /* character constant */
1015: rets(CCON,chconst());
1016:
1017: case A_BCD:
1018: {
1019: register i;
1020: int j;
1021:
1022: pch('`');
1023:
1024: for (i=0; i<7; ++i) {
1025: pch(get(j));
1026: if (j == '`' ) break;
1027: }
1028: pch(0);
1029: if (6<i)
1030: error('l',"bcd constant exceeds 6 characters" );
1031: rets(CCON,txtstart);
1032: }
1033:
1034: case A_SL: /* / */
1035: switch (get(lxchar)) {
1036: case '*':
1037: lxcom();
1038: break;
1039: case '/':
1040: linecom();
1041: break;
1042: case '=':
1043: reti(ASOP,ASDIV);
1044: default:
1045: saved = lxchar;
1046: reti(DIVOP,DIV);
1047: }
1048:
1049: case A_WS:
1050: continue;
1051:
1052: case A_NL:
1053: ++tloc.line;
1054: // Nline++;
1055: saved = lxtitle();
1056: continue;
1057:
1058: case A_LC:
1059: if (BLMAX <= b_level++) {
1060: error('l',"blocks too deeply nested");
1061: ext(3);
1062: }
1063: retl(LC);
1064:
1065: case A_RC:
1066: if (lcount+b_level-- <= 0) {
1067: error("unexpected '}'");
1068: b_level = 0;
1069: }
1070: retl(RC);
1071:
1072: case A_L:
1073: p_level++;
1074: reti(LP,0);
1075:
1076: case A_R:
1077: if (p_level-- <= 0) {
1078: error("unexpected ')'");
1079: p_level = 0;
1080: }
1081: reti(RP,0);
1082:
1083: case A_ASS:
1084: switch (get(lxchar)) {
1085: case '=':
1086: reti(EQUOP,EQ);
1087: default:
1088: saved = lxchar;
1089: reti(ASSIGN,ASSIGN);
1090: }
1091:
1092: case A_COL:
1093: switch (get(lxchar)) {
1094: case ':':
1095: reti(MEM,0);
1096: case '=':
1097: error("':=' is not a c++ operator");
1098: reti(ASSIGN,ASSIGN);
1099: default:
1100: saved = lxchar;
1101: reti(COLON,COLON);
1102: }
1103: case A_NOT:
1104: switch (get(lxchar)) {
1105: case '=':
1106: reti(EQUOP,NE);
1107: default:
1108: saved = lxchar;
1109: reti(NOT,NOT);
1110: }
1111: case A_GT:
1112: switch(get(lxchar)) {
1113: case '>':
1114: switch (get(lxchar)) {
1115: case '=':
1116: reti(ASOP,ASRS);
1117: break;
1118: default:
1119: saved = lxchar;
1120: reti(SHIFTOP,RS);
1121: }
1122: case '=':
1123: reti(RELOP,GE);
1124: default:
1125: saved = lxchar;
1126: reti(GT,GT);
1127: }
1128: case A_LT:
1129: switch (get(lxchar)) {
1130: case '<':
1131: switch (get(lxchar)) {
1132: case '=':
1133: reti(ASOP,ASLS);
1134: default:
1135: saved = lxchar;
1136: reti(SHIFTOP,LS);
1137: }
1138: case '=':
1139: reti(RELOP,LE);
1140: default:
1141: saved = lxchar;
1142: reti(LT,LT);
1143: }
1144: case A_AND:
1145: switch (get(lxchar)) {
1146: case '&':
1147: reti(ANDAND,ANDAND);
1148: case '=':
1149: reti(ASOP,ASAND);
1150: default:
1151: saved = lxchar;
1152: reti(AND,AND);
1153: }
1154: case A_OR:
1155: switch (get(lxchar)) {
1156: case '|':
1157: reti(OROR,OROR);
1158: case '=':
1159: reti(ASOP,ASOR);
1160: default:
1161: saved = lxchar;
1162: reti(OR,OR);
1163: }
1164: case A_ER:
1165: switch (get(lxchar)) {
1166: case '=':
1167: reti(ASOP,ASER);
1168: default:
1169: saved = lxchar;
1170: reti(ER,ER);
1171: }
1172: case A_PL:
1173: switch (get(lxchar)) {
1174: case '=':
1175: reti(ASOP,ASPLUS);
1176: case '+':
1177: reti(ICOP,INCR);
1178: default:
1179: saved = lxchar;
1180: reti(PLUS,PLUS);
1181: }
1182: case A_MIN:
1183: switch (get(lxchar)) {
1184: case '=':
1185: reti(ASOP,ASMINUS);
1186: case '-':
1187: reti(ICOP,DECR);
1188: case '>':
1189: if (get(lxchar) == '*')
1190: {reti(REFMUL,REF);}
1191: else
1192: saved = lxchar;
1193: reti(REF,REF);
1194: default:
1195: saved = lxchar;
1196: reti(MINUS,MINUS);
1197: }
1198: case A_MUL:
1199: switch (get(lxchar)) {
1200: case '=':
1201: reti(ASOP,ASMUL);
1202: case '/':
1203: error('w',"*/ not as end of comment");
1204: default:
1205: saved = lxchar;
1206: reti(MUL,MUL);
1207: }
1208: case A_MOD:
1209: switch (get(lxchar)) {
1210: case '=':
1211: reti(ASOP,ASMOD);
1212: default:
1213: saved = lxchar;
1214: reti(DIVOP,MOD);
1215: }
1216: default:
1217: {error('i',"lex act==%d getc()->%d",p,lxchar);}
1218:
1219: }
1220:
1221: error('i',"lex, main switch");
1222: }
1223:
1224: }
1225:
1226: int lxtitle()
1227: /*
1228: called after a newline; set linenumber and file name
1229: */
1230: {
1231: register c;
1232:
1233: for(;;)
1234: switch ( get(c) ) {
1235: default: // e.g. not '\n', not '#'
1236: return c;
1237: case '\n':
1238: tloc.line++;
1239: // Nline++;
1240: ll:
1241: break;
1242: case '#': /* # lineno "filename" */
1243: { int cl = tloc.line;
1244: tloc.line = 0;
1245: for(;;)
1246: switch (get(c)) {
1247: case '"':
1248: start_txt();
1249: for(;;)
1250: switch (get(c)) {
1251: case '"':
1252: pch('\0');
1253:
1254: while (get(c) != '\n') ; // skip to eol.. ignore anything more
1255:
1256: if (*txtstart) { // stack file name
1257: char* fn;
1258: if (tcurr_file == 0){
1259: if (( fn = file_name[0])
1260: && (strcmp(txtstart,fn)!=0)){ // 1st include
1261: if (MAXFILE*4<++Nfile) error('i',"fileN buffer overflow");
1262: if (MAXFILE<++tcurr_file) error('i',"fileN stack overflow");
1263: file_stack[tcurr_file] = Nfile;
1264:
1265: char* p1 = new char[txtfree-txtstart];
1266: (void) strcpy(p1,txtstart);
1267: file_name[Nfile] = p1;
1268: // Nstr++;
1269: }
1270: else { //&& line is dummy #line "input.c"
1271: // ignore
1272: }
1273: //&& dead, dead, dead goto push;
1274: }
1275: else if ( (fn=file_name[file_stack[tcurr_file]])
1276: && (strcmp(txtstart,fn)==0) ) {
1277: //new line, same file: ignore
1278: }
1279: else if ( (fn=file_name[file_stack[tcurr_file-1]])
1280: && (strcmp(txtstart,fn)==0) ) {
1281: // previous file: pop
1282: tcurr_file--;
1283: }
1284: else { // new file name: push
1285: //&& push:
1286: if (MAXFILE*4<Nfile++) error('i',"fileN buffer overflow");
1287: if (MAXFILE<tcurr_file++) error('i',"fileN stack overflow");
1288: file_stack[tcurr_file] = Nfile;
1289: char* p = new char[txtfree-txtstart];
1290: (void) strcpy(p,txtstart);
1291: file_name[Nfile] = p;
1292: // Nstr++;
1293: }
1294: }
1295: else { // no name .. back to the original .c file: ""
1296: tcurr_file = 0;
1297: }
1298: del_txt();
1299: tloc.file = file_stack[tcurr_file];
1300: goto ll;
1301: case '\n':
1302: error("unexpected end of line on '# line'");
1303: default:
1304: pch(c);
1305: }
1306: case ' ':
1307: break;
1308:
1309: case '0':
1310: case '1':
1311: case '2':
1312: case '3':
1313: case '4':
1314: case '5':
1315: case '6':
1316: case '7':
1317: case '8':
1318: case '9':
1319: tloc.line = tloc.line*10+c-'0';
1320: break;
1321:
1322: case 'l': // look for "#line ..." and then ignore "line"
1323: if (get(c)=='i' && get(c)=='n' && get(c)=='e') break;
1324: case '\n':
1325: tloc.putline();
1326: goto ll;
1327:
1328: default: // pass #rubbish through
1329: tloc.line = cl;
1330: pch('#');
1331: pch(c);
1332: while (get(c) != '\n') pch(c);
1333: pch('\0');
1334: fprintf(out_file,"\n%s\n",txtstart);
1335: start_txt();
1336: tloc.line++;
1337: // Nline++;
1338: goto ll;
1339: }
1340: }
1341: }
1342: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.