|
|
1.1 root 1: /*ident "@(#)ctrans:src/lex.c 1.5.1.1" */
2: /***************************************************************************
3:
4: C++ source for cfront, the C++ compiler front-end
5: written in the computer science research center of Bell Labs
6:
7: Copyright (c) 1984 AT&T, Inc. All Rights Reserved
8: THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T, INC.
9:
10: lex.c:
11: lexical analyser based on pcc's and cpre's scanners
12: modified to handle classes:
13: new keywords: class
14: public
15: call
16: etc.
17: names are not entered in the symbol table by lex()
18: names can be of arbitrary length
19: error() is used to report errors
20: {} and () must match
21: numeric constants are not converted into internal representation
22: but stored as strings
23:
24: ****************************************************************************/
25:
26: #include "cfront.h"
27: #include "yystype.h"
28: #include "size.h"
29: #include "tqueue.h"
30: #include "template.h"
31:
32: # define CCTRANS(x) x
33:
34: #ifndef NULL
35: #define NULL 0
36: #endif
37:
38: char* strdup(const char* s1)
39: /* string duplication
40: returns pointer to a new string which is the duplicate of string
41: pointed to by s1
42: NULL is returned if new string can't be created
43: */
44: {
45: char * s2;
46:
47: s2 = malloc((unsigned) strlen(s1)+1) ;
48: return(s2==NULL ? NULL : strcpy(s2,s1) );
49: }
50:
51: #define copy_if_need_be(s) ((templp->in_progress || templp->parameters_in_progress) ? strdup(s) : (s))
52:
53: /* lexical actions */
54:
55: #define A_ERR 0 /* illegal character */
56: #define A_LET 1 /* saw a letter */
57: #define A_DIG 2 /* saw a digit */
58: #define A_1C 3 /* return a single character */
59: #define A_STR 4 /* string */
60: #define A_CC 5 /* character constant */
61: #define A_BCD 6 /* GCOS BCD constant */
62: #define A_SL 7 /* saw a / */
63: #define A_DOT 8 /* saw a . */
64: #define A_2C 9 /* possible two character symbol */
65: #define A_WS 10 /* whitespace (not \n) */
66: #define A_NL 11 /* \n */
67: #define A_LC 12 /* { */
68: #define A_RC 13 /* } */
69: #define A_L 14 /* ( */
70: #define A_R 15 /* ) */
71: #define A_EOF 16
72: #define A_ASS 17
73: #define A_LT 18
74: #define A_GT 19 /* > */
75: #define A_ER 20
76: #define A_OR 21
77: #define A_AND 22
78: #define A_MOD 23
79: #define A_NOT 24
80: #define A_MIN 25
81: #define A_MUL 26
82: #define A_PL 27
83: #define A_COL 28 /* : */
84: #define A_SHARP 29 /* # */
85: #define A_DOLL 30 /* $ */
86:
87: /* character classes */
88:
89: # define LEXLET 01
90: # define LEXDIG 02
91: /* no LEXOCT because 8 and 9 used to be octal digits */
92: # define LEXHEX 010
93: # define LEXWS 020
94: # define LEXDOT 040
95:
96: const FIRSTCHUNK = 8*1024-8;
97: const BUFCHUNK = 4*1024-8;
98:
99: /* text buffer */
100: static char inbuf[FIRSTCHUNK/*TBUFSZ*/];
101: static char* txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1];
102: static char* txtstart = 0;
103: static char* txtfree = 0;
104:
105: static struct buf* bufhead;
106: static buf* freebuf;
107: //static bufs;
108:
109: struct buf {
110: buf* next;
111: char chars[BUFCHUNK];
112: // buf() { next=bufhead; bufhead=this; }
113: };
114:
115: new_buf(char c)
116: {
117: //fprintf(stderr,"new_buf %d\n",bufs++);
118: buf* pbuf;
119: if (freebuf) {
120: pbuf = freebuf;
121: freebuf = freebuf->next;
122: }
123: else
124: pbuf = new buf; // allocate and register new chunk
125: pbuf->next = bufhead;
126: bufhead = pbuf;
127:
128: if (BUFCHUNK < txtmax-txtstart) error('l',"lexical token too long");
129:
130: // copy current token:
131: char* p = txtstart;
132: txtstart = txtfree = &pbuf->chars[0];
133: while (p<txtmax) *txtfree++ = *p++;
134: *txtfree++=c;
135: txtmax = &pbuf->chars[BUFCHUNK-1];
136: return 0;
137: }
138:
139:
140: #define pch(c) ((txtmax<=txtfree)?new_buf(c):(*txtfree++=c))
141: #define start_txt() txtstart = txtfree
142: #define del_txt() txtfree = txtstart
143:
144: static int Nfile;// = 1;
145: static char* file_name; // source file names
146:
147: Linkage linkage = linkage_default; // linkage is default C++
148: const LINKMAX = 10;
149: static Linkage lvec[LINKMAX] = { linkage_default };
150: static int lcount = 0;
151:
152: void set_linkage(char* p)
153: {
154: if (p==0 || *p == 0) { // resume previous linkage
155: if (lcount>0) linkage = lvec[--lcount];
156: }
157: else {
158: if (LINKMAX<=++lcount) {
159: error('l',"linkage directive nested too deep");
160: --lcount;
161: } else if (strcmp(p,"C")==0)
162: lvec[lcount] = linkage = linkage_C;
163: else if (strcmp(p,"C++")==0)
164: lvec[lcount] = linkage = linkage_Cplusplus;
165: else {
166: error("%s linkage",p);
167: --lcount;
168: }
169: }
170: }
171:
172: static struct loc tloc;
173: FILE * out_file = stdout;
174: FILE * in_file = stdin;
175: Ptable ktbl;
176: Ptable keyword_table;
177:
178: static int p_level = 0; /* number of unmatched ``(''s */
179: static int b_level = 0; /* number of unmatched ``{''s */
180:
181: # ifdef ibm
182:
183: # define CSMASK 0377
184: # define CSSZ 256
185:
186: # else
187:
188: # define CSMASK 0177
189: # define CSSZ 128
190:
191: # endif
192:
193: static short lxmask[CSSZ+1];
194:
195: int saved = 0; /* putback character, avoid ungetchar */
196: static int lxtitle();
197:
198: // overload rt;
199: inline YYSTYPE rt(char* x) { YYSTYPE y; y.s = x; return y; }
200: inline YYSTYPE rt(int x) { YYSTYPE y; y.t = x; return y; }
201: inline YYSTYPE rt(loc x) { YYSTYPE y; y.l = x; return y; }
202: inline YYSTYPE rt(void* x) { YYSTYPE y; y.pn = Pname(x); return y; }
203:
204: #define get(c) (c=getc(in_file))
205: #define unget(c) ungetc(c,in_file)
206:
207: #define reti(a,b) { addtok(a, rt(b), tloc); return a; }
208: #define retn(a,b) { addtok(a, rt((Pnode)b), tloc); return a; }
209: #define rets(a,b) { addtok(a, rt(b), tloc); return a; }
210: #define retl(a) { addtok(a, rt(tloc), tloc); return a; }
211:
212: // keys[] holds the external form for tokens with fixed representation
213: // illegal tokens and those with variable representation have 0 entries
214: char* keys[MAXTOK+1];
215:
216: static void
217: new_key(char* s, TOK toknum, TOK yyclass)
218: /*
219: make "s" a new keyword with the representation (token) "toknum"
220: "yyclass" is the yacc token (for example new_key("int",INT,TYPE); )
221: "yyclass==0" means yyclass=toknum;
222: */
223: {
224: Pname n = new name(s);
225:
226: keys[(toknum==LOC)?yyclass:toknum] = s;
227: n = new name(s);
228: Pname nn = keyword_table->insert(n,0);
229: // if (Nold) error('i',"keyword %sD twice",s);
230: nn->base = toknum;
231: nn->syn_class = (yyclass) ? yyclass : toknum;
232: delete n;
233: }
234:
235: const int keyword_count = 67;
236: static void
237: ktbl_init()
238: /*
239: enter keywords into keyword table for use by lex()
240: and into keyword representation table used for output
241:
242: ktbl is only for types. We put nothing in it.
243: keyword_table is for user-defined reserved words
244: */
245: {
246: ktbl = new table(KTBLSIZE,0,0);
247: keyword_table = new table(keyword_count,0,0);
248:
249: new_key("asm",ASM,0);
250: new_key("auto",AUTO,TYPE);
251: new_key("break",LOC,BREAK);
252: new_key("case",LOC,CASE);
253: new_key("continue",LOC,CONTINUE);
254: new_key("char",CHAR,TYPE);
255: new_key("do",LOC,DO);
256: new_key("double",DOUBLE,TYPE);
257: new_key("default",LOC,DEFAULT);
258: new_key("enum",ENUM,0);
259: new_key("else",LOC,ELSE);
260: new_key("extern",EXTERN,TYPE);
261: new_key("float",FLOAT,TYPE);
262: new_key("for",LOC,FOR);
263: new_key("goto",LOC,GOTO);
264: new_key("catch",CATCH,CATCH);
265: new_key("try",TRY,TRY);
266: new_key("if",LOC,IF);
267: new_key("int",INT,TYPE);
268: new_key("long",LONG,TYPE);
269: new_key("return",LOC,RETURN);
270: new_key("register",REGISTER,TYPE);
271: new_key("static",STATIC,TYPE);
272: new_key("struct",STRUCT,AGGR);
273: new_key("sizeof",SIZEOF,0);
274: new_key("short",SHORT,TYPE);
275: new_key("switch",LOC,SWITCH);
276: new_key("typedef",TYPEDEF,TYPE);
277: new_key("unsigned",UNSIGNED,TYPE);
278: new_key("union",UNION,AGGR);
279: new_key("void",VOID,TYPE);
280: new_key("while",LOC,WHILE);
281:
282: new_key("class",CLASS,AGGR);
283: new_key("const",CONST,TYPE);
284: new_key("delete",LOC,DELETE);
285: new_key("friend",FRIEND,TYPE);
286: new_key("inline",INLINE,TYPE);
287: new_key("new",NEW,0);
288: new_key("operator",OPERATOR,0);
289: new_key("overload",OVERLOAD,TYPE);
290: new_key("private",PRIVATE,PR);
291: new_key("protected",PROTECTED,PR);
292: new_key("public",PUBLIC,PR);
293: new_key("signed",SIGNED,TYPE);
294: new_key("template",TEMPLATE,0);
295: new_key("this",THIS,0);
296: new_key("virtual",VIRTUAL,TYPE);
297: new_key("volatile",VOLATILE,TYPE);
298:
299: new_key("__statement", STATEMENT, 0) ;
300: new_key("__expression", EXPRESSION, 0) ;
301: new_key("__template_test", TEMPLATE_TEST, 0) ;
302: }
303:
304: loc last_line;
305: loc noloc = { 0, 0 };
306:
307: void loc::putline()
308: {
309: if (file==0 && line==0) return;
310: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4
311: // if (0<=file && file<MAXFILE) {
312: if ( 0<=file && file <= Nfile ) {
313: char* f = file_name;
314: if (f==0) f = src_file_name;
315: fprintf(out_file,line_format,line,f);
316: last_line = *this;
317: }
318: }
319:
320: void loc::put(FILE* p)
321: {
322: // fix, Nfile, not MAXFILE; Nfile == MAXFILE*4
323: // if (0<=file && file<MAXFILE) {
324: if ( 0<=file && file <= Nfile ) {
325: char* f = file_name;
326: if (f==0) f = src_file_name;
327: fprintf(p,"\"%s\", line %d: ",f,line);
328: }
329: }
330:
331: void lxenter(register char* s, short m)
332: /* enter a mask into lxmask */
333: {
334: register c;
335:
336: while( c= *s++ ) lxmask[c+1] |= m;
337:
338: }
339:
340:
341: void lxget(register c, register m)
342: /*
343: put 'c' back then scan for members of character class 'm'
344: terminate the string read with \0
345: txtfree points to the character position after that \0
346: */
347: {
348: pch(c);
349: while ( (get(c), lxmask[c+1]&m) ) pch(c);
350: unget(c);
351: pch('\0');
352: }
353:
354: struct LXDOPE {
355: short lxch; /* the character */
356: short lxact; /* the action to be performed */
357: TOK lxtok; /* the token number to be returned */
358: } lxdope[] = {
359: #ifdef apollo
360: '@', A_ERR, 0, /* illegal characters go here... */
361: #else
362: '$', A_DOLL, 0,
363: // '$', A_ERR, 0, /* illegal characters go here... */
364: #endif
365: '_', A_LET, 0, /* letters point here */
366: '0', A_DIG, 0, /* digits point here */
367: ' ', A_WS, 0, /* whitespace goes here */
368: '\n', A_NL, 0,
369: '"', A_STR, 0, /* character string */
370: '\'', A_CC, 0, /* ASCII character constant */
371: '`', A_BCD, 0, /* 'foreign' character constant, e.g. BCD */
372: '(', A_L, LP,
373: ')', A_R, RP,
374: '{', A_LC, LC,
375: '}', A_RC, RC,
376: '[', A_1C, LB,
377: ']', A_1C, RB,
378: '*', A_MUL, MUL,
379: '?', A_1C, QUEST,
380: ':', A_COL, COLON,
381: '+', A_PL, PLUS,
382: '-', A_MIN, MINUS,
383: '/', A_SL, DIV,
384: '%', A_MOD, MOD,
385: '&', A_AND, AND,
386: '|', A_OR, OR,
387: '^', A_ER, ER,
388: '!', A_NOT, NOT,
389: '~', A_1C, COMPL,
390: ',', A_1C, CM,
391: ';', A_1C, SM,
392: '.', A_DOT, DOT,
393: '<', A_LT, LT,
394: '>', A_GT, GT,
395: '=', A_ASS, ASSIGN,
396: '#', A_SHARP, 0,
397: EOF, A_EOF, EOFTOK
398: };
399: /* note: EOF is used as sentinel, so must be <=0 and last entry in table */
400:
401: static struct LXDOPE *lxcp[CSSZ+1];
402:
403: void
404: lex_init()
405: {
406: register struct LXDOPE *p;
407: register i;
408: register char *cp;
409: /* set up character classes */
410:
411: /* first clear lexmask */
412: for(i=0; i<=CSSZ; i++) lxmask[i] = 0;
413:
414: #ifdef apollo
415: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$", LEXLET );
416: #else
417: lxenter( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", LEXLET );
418: #endif
419: lxenter( "0123456789", LEXDIG );
420: lxenter( "0123456789abcdefABCDEF", LEXHEX );
421: /* \013 should become \v someday; \013 is OK for ASCII and EBCDIC */
422: lxenter( " \t\r\b\f\013", LEXWS );
423: lxmask['.'+1] |= LEXDOT;
424:
425: /* make lxcp point to appropriate lxdope entry for each character */
426:
427: /* initialize error entries */
428:
429: for( i= 0; i<=CSSZ; ++i ) lxcp[i] = lxdope;
430:
431: /* make unique entries */
432:
433: for( p=lxdope; ; ++p ) {
434: lxcp[p->lxch+1] = p;
435: if( p->lxch < 0 ) break;
436: }
437:
438: /* handle letters, digits, and whitespace */
439: /* by convention, first, second, and third places */
440:
441: cp = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
442: while( *cp ) lxcp[*cp++ + 1] = &lxdope[1];
443: cp = "123456789";
444: while( *cp ) lxcp[*cp++ + 1] = &lxdope[2];
445: cp = "\t\b\r\f\013";
446: while( *cp ) lxcp[*cp++ + 1] = &lxdope[3];
447:
448: file_name = src_file_name;
449: // set both curloc and tloc so curloc is valid at program startup
450: // curloc.file = tloc.file = 0;
451: curloc.line = tloc.line = 1;
452:
453: ktbl_init();
454: lex_clear();
455: saved = lxtitle();
456: }
457:
458: void lex_clear()
459: {
460: // delete extra buffers:
461: buf* p = bufhead;
462: bufhead = 0;
463: //if (p) {
464: //fprintf(stderr,"lex_clear\n");
465: //bufs=0;
466: //}
467: while (p) {
468: buf* pp = p;
469: p = p->next;
470: pp->next = freebuf;
471: freebuf = pp;
472: }
473:
474: // re-set to static buffer:
475: txtstart = txtfree = inbuf;
476: txtmax = &inbuf[FIRSTCHUNK/*TBUFSZ*/-1];
477: }
478:
479: int int_val(char hex)
480: {
481: switch (hex) {
482: case '0': case '1': case '2': case '3': case '4':
483: case '5': case '6': case '7': case '8': case '9':
484: return hex-'0';
485: case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
486: return hex-'a'+10;
487: case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
488: return hex-'A'+10;
489: }
490: }
491:
492: void hex_to_oct()
493: /*
494: \x has been seen on input (in char const or string) and \ printed
495: read the following hexadecimal integer and replace it with an octal
496: */
497: {
498: int i = 0;
499: int c;
500: get(c);
501: if (lxmask[c+1] & LEXHEX) {
502: i = int_val(c);
503: get(c); // try for two
504: if (lxmask[c+1] & LEXHEX) {
505: i = (i<<4) + int_val(c);
506: get(c); // try for three
507: if (lxmask[c+1] & LEXHEX)
508: i = (i<<4) + int_val(c);
509: else
510: unget(c);
511: }
512: else
513: unget(c);
514: }
515: else {
516: error("hexadecimal digitE after \\x");
517: unget(c);
518: }
519:
520: // if (0377 < i) error('l',"hexadecimal constant too large");
521: i &= 0377;
522:
523: pch(('0'+(i>>6)));
524: pch(('0'+((i&070)>>3)));
525: pch(('0'+(i&7)));
526: }
527:
528:
529: char * chconst()
530: /*
531: read a character constant into inbuf
532: */
533: {
534: register c;
535: int nch = 0;
536:
537: pch('\'');
538:
539: for(;;) {
540: char* p;
541: char cc = 0;
542:
543: switch (get(c)) {
544: case '\'':
545: goto ex;
546: case EOF:
547: error("eof in char constant");
548: goto ex;
549: case '\n':
550: error("newline in char constant");
551: goto ex;
552: case '\\':
553: if (SZ_INT == nch++) error('l',"char constant too long");
554: pch(c);
555: switch (get(c)){
556: case '\n':
557: ++tloc.line;
558: default:
559: pch(c);
560: break;
561: case '4': case '5': case '6': case '7': // octal
562: p = txtfree;
563: cc = c-4;
564: case '0': case '1': case '2': case '3':
565: pch(c);
566: get(c); /* try for 2 */
567: if( lxmask[c+1] & LEXDIG && c<'8'){
568: pch(c);
569: get(c); /* try for 3 */
570: if (lxmask[c+1] & LEXDIG && c<'8') {
571: if (cc) *p = cc; // zap high bit
572: pch(c);
573: }
574: else
575: unget(c);
576: }
577: else
578: unget(c);
579: break;
580: case 'x': // hexadecimal
581: hex_to_oct();
582: break;
583: };
584: break;
585: default:
586: if (SZ_INT == nch++) error('l',"char constant too long");
587: pch(c);
588: }
589: }
590: ex:
591: if(nch==0)
592: error("empty char constant");
593: pch('\'');
594: pch('\0');
595: return txtstart;
596: }
597:
598: void lxcom()
599: /* process a "block comment" */
600: {
601: register c;
602:
603: for(;;)
604: switch (get(c)) {
605: case EOF:
606: error('w',"eof in comment");
607: return;
608: case '\n':
609: tloc.line++;
610: // Nline++;
611: break;
612: case '*':
613: if (get(c) == '/') return;
614: unget(c);
615: break;
616: case '/':
617: if (get(c) == '*') error('w',"``/*'' in comment");
618: unget(c);
619: break;
620: }
621: }
622:
623:
624: void linecom()
625: // process a "line comment"
626: {
627: register c;
628:
629: get(c);
630: #ifdef DBG
631: if ( c=='@' && get(c)=='!' ) {
632: while ( get(c) != '\n' && c != EOF ) pch(c);
633: pch('\0');
634: process_debug_flags(txtstart);
635: del_txt();
636: }
637: #endif
638: for(;;get(c))
639: switch (c) {
640: case EOF:
641: error('w',"eof in comment");
642: return;
643: case '\n':
644: tloc.line++;
645: // Nline++;
646: saved = lxtitle();
647: return;
648: }
649: }
650:
651: char eat_whitespace()
652: {
653:
654: for(;;) {
655: register c = get(c);
656: lx:
657:
658: switch (c) {
659: case EOF:
660: error('w',"unexpected comment");
661: return EOF;
662: case '/':
663: switch (get(c)) {
664: case '*':
665: lxcom();
666: break;
667: case '/':
668: linecom();
669: break;
670: default:
671: unget(c);
672: return '/';
673: }
674: break;
675: case '\n':
676: ++tloc.line;
677: c = lxtitle();
678: goto lx;
679: case ' ':
680: case '\t':
681: break;
682: default:
683: return c;
684: }
685: }
686: }
687:
688: void get_string()
689: {
690: int lxchar;
691:
692: for(;;)
693: switch (get(lxchar)) {
694: case '\\':
695: pch('\\');
696: switch (get(lxchar)){
697: case '\n':
698: ++tloc.line;
699: default:
700: pch(lxchar);
701: break;
702: case 'x': // hexadecimal
703: hex_to_oct();
704: break;
705: };
706: break;
707: case '"':
708: { char* p = txtstart; // eat_whitespace() moves txtstart
709: if ((lxchar = eat_whitespace()) == '"') {
710: // string catenation, break with
711: // newline to avoid merging characters
712: // (e.g. "\xAB" "C")
713: pch('\\');
714: pch('\n');
715:
716: continue; // eat '\"' and carry on
717: };
718:
719: txtstart = p;
720: unget(lxchar);
721: pch(0);
722: return;
723: }
724: case '\n':
725: error("newline in string");
726: pch(0);
727: return;
728: case EOF:
729: error("eof in string");
730: pch(0);
731: return;
732: default:
733: pch(lxchar);
734: }
735: }
736:
737: TOK tlex()
738: {
739: TOK ret;
740: Pname n;
741:
742: // Ntoken++;
743:
744: for(;;) {
745: register lxchar;
746: register struct LXDOPE *p;
747:
748: start_txt();
749:
750: if (saved) {
751: lxchar = saved;
752: saved = 0;
753: }
754: else
755: get(lxchar);
756:
757: if (lxchar+1 >= CSSZ )
758: error( "illegal input character encountered: %d", lxchar );
759:
760: switch( (p=lxcp[lxchar+1])->lxact ){
761:
762: case A_1C: // eat up a single character, and return an opcode
763: reti(p->lxtok,p->lxtok);
764:
765: case A_EOF:
766: if (p_level || b_level+lcount)
767: error("'%s' missing at end of input",(b_level+lcount) ? "}" : ")");
768:
769: reti(EOFTOK,0);
770:
771: case A_SHARP:
772: // cope with header file not ended with '\n'
773: unget('#');
774: saved = lxtitle();
775: continue;
776:
777: case A_ERR:
778: { if (' '<=lxchar && lxchar<='~') // ASCII printable
779: error("illegal character '%c' (ignored)",lxchar);
780: else
781: error("illegal character '0%o' (ignored)",lxchar);
782: continue;
783: }
784:
785: case A_DOLL:
786: { // lex a name of the for $id for template tree formals
787: Pname fn ;
788: lxget( lxchar, LEXLET|LEXDIG ) ;
789:
790: if (!templp->in_progress || !txtstart[1]) {
791: // no name string immediately follows, treat it
792: // like an illegal character
793: error("illegal character '0%o' (ignored)",lxchar);
794: continue;
795: }
796:
797: txtstart++ ;
798: if(fn=templ_compilation::tree_parameter(txtstart)) {
799: switch (fn->n_template_arg) {
800: case template_expr_tree_formal:
801: // retain the $ in the name
802: retn(ID, strdup(--txtstart)) ;
803:
804: case template_stmt_tree_formal:
805: retn(SM_PARAM, fn) ;
806: }
807: }
808: error("%s wasn't a statement or expression formal", txtstart);
809: rets(ID, copy_if_need_be(txtstart));
810: }
811:
812: case A_LET: // collect an identifier and check for keyword
813: {
814: char ll;
815: switch (ll = lxchar) {
816: // case 'l':
817: case 'L':
818: switch (get(lxchar)) {
819: case '\'':
820: error('s',"wide character constant");
821: unget(lxchar);
822: continue;
823: case '"':
824: error('s',"wide character string");
825: unget(lxchar);
826: continue;
827: }
828: unget(lxchar);
829: lxchar = ll;
830: }
831: }
832:
833: lxget( lxchar, LEXLET|LEXDIG );
834: /* look for a keyword or a global type */
835: if ((n = keyword_table->look(txtstart,0)) /* keyword */
836: || (n = ktbl->look(txtstart, 0))) /* local type */
837: {
838: TOK x;
839: del_txt();
840: switch (x=n->base) {
841: case TNAME:
842: rets(ID,n->string);
843: case LOC:
844: retl(n->syn_class);
845: case EXTERN:
846: if ((lxchar = eat_whitespace()) == '\"') {
847: // linkage directive
848: get_string();
849: rets(LINKAGE,txtstart);
850: }
851: unget(lxchar);
852: reti(TYPE,EXTERN);
853: case CATCH:
854: // case TEMPLATE:
855: error('s',"%k",n->syn_class);
856: continue;
857: case TRY:
858: {
859: static int warn_try;
860: if (!warn_try) {
861: Pname n = keyword_table->look("try",0);
862: n->n_key = DEFAULT;
863: error('w',&tloc,"%k is a future reserved keyword",n->syn_class);
864: warn_try++;
865: }
866: rets(ID,n->string);
867: }
868: default:
869: reti(n->syn_class,x);
870: }
871: }
872: // rets(ID,txtstart);
873: rets(ID, copy_if_need_be(txtstart)) ;
874:
875: case A_DIG:
876:
877: ret = ICON;
878:
879: if (lxchar=='0') {
880: int pkchar;
881: get(pkchar);
882: if(pkchar=='x' || pkchar=='X') { // hex
883: pch(lxchar);
884: lxget(pkchar,LEXHEX);
885: txtfree--;
886: if (txtfree-txtstart<3) // minimum "0Xd\0"
887: error("hex digitX after \"0x\"");
888: get(lxchar);
889: goto getsuffix;
890: }
891: unget(pkchar);
892: }
893:
894: lxget(lxchar,LEXDIG);
895: txtfree--;
896:
897: if (get(lxchar) == '.') {
898: getfp:
899: lxget('.', LEXDIG );
900: txtfree--;
901: ret = FCON;
902: get(lxchar);
903: };
904:
905: if (lxchar=='e' || lxchar=='E') {
906: pch(lxchar);
907: get(lxchar);
908: if(lxchar=='-' || lxchar=='+') {
909: pch(lxchar);
910: get(lxchar);
911: }
912: if (lxmask[lxchar+1] & LEXDIG) {
913: lxget( lxchar, LEXDIG );
914: txtfree--;
915: get(lxchar);
916: }
917: else
918: error("missing exponent digits?");
919: ret = FCON;
920: };
921:
922: if(*txtstart=='0' && ret==ICON) {
923: char *bch = txtstart;
924: while (++bch <= txtfree) {
925: if(*bch=='8' || *bch=='9')
926: error("%c used as octal digit",*bch);
927: }
928: }
929:
930: getsuffix:
931: switch (lxchar) {
932: case 'f':
933: case 'F':
934: if (ret==ICON)
935: error("%c suffix for integer constant",lxchar);
936: else
937: pch(lxchar);
938: break;
939: case 'u':
940: case 'U':
941: if (ret==FCON) {
942: error("%c suffix for floating constant",lxchar);
943: break;
944: }
945: pch(lxchar);
946: switch(get(lxchar)) {
947: case 'l':
948: case 'L':
949: pch(lxchar);
950: break;
951: default:
952: saved=lxchar;
953: break;
954: }
955: break;
956: case 'l':
957: case 'L':
958: pch(lxchar);
959: if (ret==FCON) {
960: break;
961: }
962: switch(get(lxchar)) {
963: case 'u':
964: case 'U':
965: pch(lxchar);
966: break;
967: default:
968: saved=lxchar;
969: break;
970: }
971: break;
972: default:
973: saved = lxchar;
974: break;
975: };
976:
977: if(*txtstart=='0' && txtfree-txtstart==1)
978: reti(ZERO,0); // plain zero
979:
980: pch(0);
981: rets(ret,txtstart);
982:
983:
984: case A_DOT:
985: switch (get(lxchar)) {
986: case '.': // look for ellipsis
987: if (get(lxchar) != '.') {
988: error("token .. ?");
989: saved = lxchar;
990: }
991: reti(ELLIPSIS,0);
992: case '*':
993: reti (REFMUL,DOT);
994: }
995:
996: if (lxmask[lxchar+1] & LEXDIG){// look for floating constant
997: unget(lxchar);
998: goto getfp;
999: }
1000: saved = lxchar;
1001: reti(DOT,0);
1002:
1003: case A_STR:
1004: /* save string constant in buffer */
1005: get_string();
1006: rets(STRING,txtstart);
1007:
1008: case A_CC:
1009: /* character constant */
1010: rets(CCON,chconst());
1011:
1012: case A_BCD:
1013: {
1014: register i;
1015: int j;
1016:
1017: pch('`');
1018:
1019: for (i=0; i<7; ++i) {
1020: pch(get(j));
1021: if (j == '`' ) break;
1022: }
1023: pch(0);
1024: if (6<i)
1025: error('l',"bcd constant exceeds 6 characters" );
1026: rets(CCON,txtstart);
1027: }
1028:
1029: case A_SL: /* / */
1030: switch (get(lxchar)) {
1031: case '*':
1032: lxcom();
1033: break;
1034: case '/':
1035: linecom();
1036: break;
1037: case '=':
1038: reti(ASOP,ASDIV);
1039: default:
1040: saved = lxchar;
1041: reti(DIVOP,DIV);
1042: }
1043:
1044: case A_WS:
1045: continue;
1046:
1047: case A_NL:
1048: ++tloc.line;
1049: // Nline++;
1050: saved = lxtitle();
1051: continue;
1052:
1053: case A_LC:
1054: if (BLMAX <= b_level++) {
1055: error('l',"blocks too deeply nested");
1056: ext(3);
1057: }
1058: retl(LC);
1059:
1060: case A_RC:
1061: if (lcount+b_level-- <= 0) {
1062: error("unexpected '}'");
1063: b_level = 0;
1064: }
1065: retl(RC);
1066:
1067: case A_L:
1068: p_level++;
1069: reti(LP,0);
1070:
1071: case A_R:
1072: if (p_level-- <= 0) {
1073: error("unexpected ')'");
1074: p_level = 0;
1075: }
1076: reti(RP,0);
1077:
1078: case A_ASS:
1079: switch (get(lxchar)) {
1080: case '=':
1081: reti(EQUOP,EQ);
1082: default:
1083: saved = lxchar;
1084: reti(ASSIGN,ASSIGN);
1085: }
1086:
1087: case A_COL:
1088: switch (get(lxchar)) {
1089: case ':':
1090: reti(MEM,0);
1091: case '=':
1092: error("':=' is not a c++ operator");
1093: reti(ASSIGN,ASSIGN);
1094: default:
1095: saved = lxchar;
1096: reti(COLON,COLON);
1097: }
1098: case A_NOT:
1099: switch (get(lxchar)) {
1100: case '=':
1101: reti(EQUOP,NE);
1102: default:
1103: saved = lxchar;
1104: reti(NOT,NOT);
1105: }
1106: case A_GT:
1107: switch(get(lxchar)) {
1108: case '>':
1109: switch (get(lxchar)) {
1110: case '=':
1111: reti(ASOP,ASRS);
1112: break;
1113: default:
1114: saved = lxchar;
1115: reti(SHIFTOP,RS);
1116: }
1117: case '=':
1118: reti(RELOP,GE);
1119: default:
1120: saved = lxchar;
1121: reti(GT,GT);
1122: }
1123: case A_LT:
1124: switch (get(lxchar)) {
1125: case '<':
1126: switch (get(lxchar)) {
1127: case '=':
1128: reti(ASOP,ASLS);
1129: default:
1130: saved = lxchar;
1131: reti(SHIFTOP,LS);
1132: }
1133: case '=':
1134: reti(RELOP,LE);
1135: default:
1136: saved = lxchar;
1137: reti(LT,LT);
1138: }
1139: case A_AND:
1140: switch (get(lxchar)) {
1141: case '&':
1142: reti(ANDAND,ANDAND);
1143: case '=':
1144: reti(ASOP,ASAND);
1145: default:
1146: saved = lxchar;
1147: reti(AND,AND);
1148: }
1149: case A_OR:
1150: switch (get(lxchar)) {
1151: case '|':
1152: reti(OROR,OROR);
1153: case '=':
1154: reti(ASOP,ASOR);
1155: default:
1156: saved = lxchar;
1157: reti(OR,OR);
1158: }
1159: case A_ER:
1160: switch (get(lxchar)) {
1161: case '=':
1162: reti(ASOP,ASER);
1163: default:
1164: saved = lxchar;
1165: reti(ER,ER);
1166: }
1167: case A_PL:
1168: switch (get(lxchar)) {
1169: case '=':
1170: reti(ASOP,ASPLUS);
1171: case '+':
1172: reti(ICOP,INCR);
1173: default:
1174: saved = lxchar;
1175: reti(PLUS,PLUS);
1176: }
1177: case A_MIN:
1178: switch (get(lxchar)) {
1179: case '=':
1180: reti(ASOP,ASMINUS);
1181: case '-':
1182: reti(ICOP,DECR);
1183: case '>':
1184: if (get(lxchar) == '*')
1185: {reti(REFMUL,REF);}
1186: else
1187: saved = lxchar;
1188: reti(REF,REF);
1189: default:
1190: saved = lxchar;
1191: reti(MINUS,MINUS);
1192: }
1193: case A_MUL:
1194: switch (get(lxchar)) {
1195: case '=':
1196: reti(ASOP,ASMUL);
1197: case '/':
1198: error('w',"*/ not as end of comment");
1199: default:
1200: saved = lxchar;
1201: reti(MUL,MUL);
1202: }
1203: case A_MOD:
1204: switch (get(lxchar)) {
1205: case '=':
1206: reti(ASOP,ASMOD);
1207: default:
1208: saved = lxchar;
1209: reti(DIVOP,MOD);
1210: }
1211: default:
1212: {error('i',"lex act==%d getc()->%d",p,lxchar);}
1213:
1214: }
1215:
1216: error('i',"lex, main switch");
1217: }
1218:
1219: }
1220:
1221: int lxtitle()
1222: /*
1223: called after a newline; set linenumber and file name
1224: */
1225: {
1226: register c;
1227:
1228: for(;;)
1229: switch ( get(c) ) {
1230: default: // e.g. not '\n', not '#'
1231: return c;
1232: case '\n':
1233: tloc.line++;
1234: // Nline++;
1235: ll:
1236: break;
1237: case '#': /* # lineno "filename" */
1238: { int cl = tloc.line;
1239: tloc.line = 0;
1240: for(;;)
1241: switch (get(c)) {
1242: case '"':
1243: start_txt();
1244: for(;;)
1245: switch (get(c)) {
1246: case '"':
1247: pch('\0');
1248:
1249: while (get(c) != '\n') ; // skip to eol.. ignore anything more
1250:
1251: if (*txtstart) { // stack file name
1252: if (curr_file == 0){
1253: char* p1 = new char[txtfree-txtstart + 1];
1254: (void) strcpy(p1,txtstart);
1255: file_name = p1;
1256: }
1257: else { //&& line is dummy #line "input.c"
1258: // ignore
1259: }
1260: del_txt();
1261: goto ll;
1262: }
1263: case '\n':
1264: error("unexpected end of line on '# line'");
1265: default:
1266: pch(c);
1267: }
1268: case ' ':
1269: break;
1270:
1271: case '0':
1272: case '1':
1273: case '2':
1274: case '3':
1275: case '4':
1276: case '5':
1277: case '6':
1278: case '7':
1279: case '8':
1280: case '9':
1281: tloc.line = tloc.line*10+c-'0';
1282: break;
1283:
1284: case 'l': // look for "#line ..." and then ignore "line"
1285: if (get(c)=='i' && get(c)=='n' && get(c)=='e') break;
1286: case '\n':
1287: tloc.putline();
1288: goto ll;
1289:
1290: default: // pass #rubbish through
1291: tloc.line = cl;
1292: pch('#');
1293: pch(c);
1294: while (get(c) != '\n') pch(c);
1295: pch('\0');
1296: fprintf(out_file,"\n%s\n",txtstart);
1297: start_txt();
1298: tloc.line++;
1299: // Nline++;
1300: goto ll;
1301: }
1302: }
1303: }
1304: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.