|
|
1.1 root 1: /* C compiler
2: *
3: *
4: *
5: * Called from cc:
6: * c0 source temp1 temp2 [ profileflag ]
7: * temp1 gets most of the intermediate code;
8: * strings are put on temp2, which c1 reads after temp1.
9: */
10:
11: #include "c0.h"
12:
13: int isn = 1;
14: int peeksym = -1;
15: int line = 1;
16: struct tnode funcblk = { NAME };
17:
18: struct kwtab {
19: char *kwname;
20: int kwval;
21: } kwtab[] = {
22: "int", INT,
23: "char", CHAR,
24: "float", FLOAT,
25: "double", DOUBLE,
26: "struct", STRUCT,
27: "long", LONG,
28: "unsigned", UNSIGN,
29: "union", UNION,
30: "short", INT,
31: "void", VOID,
32: "auto", AUTO,
33: "extern", EXTERN,
34: "static", STATIC,
35: "register", REG,
36: "goto", GOTO,
37: "return", RETURN,
38: "if", IF,
39: "while", WHILE,
40: "else", ELSE,
41: "switch", SWITCH,
42: "case", CASE,
43: "break", BREAK,
44: "continue", CONTIN,
45: "do", DO,
46: "default", DEFAULT,
47: "for", FOR,
48: "sizeof", SIZEOF,
49: "typedef", TYPEDEF,
50: "enum", ENUM,
51: 0, 0,
52: };
53:
54: union tree *cmst[CMSIZ];
55: union tree **cp = cmst;
56:
57: main(argc, argv)
58: char *argv[];
59: {
60: register unsigned i;
61: register struct kwtab *ip;
62:
63: if (argc>1 && strcmp(argv[1], "-u")==0) {
64: argc--;
65: argv++;
66: unscflg++;
67: }
68: if(argc<4) {
69: error("Arg count");
70: exit(1);
71: }
72: if (freopen(argv[1], "r", stdin)==NULL) {
73: error("Can't find %s", argv[1]);
74: exit(1);
75: }
76: if (freopen(argv[2], "w", stdout)==NULL || (sbufp=fopen(argv[3],"w"))==NULL) {
77: error("Can't create temp");
78: exit(1);
79: }
80: setbuf(sbufp, sbuf);
81: if (argc>4)
82: proflg++;
83: /*
84: * The hash table locations of the keywords
85: * are marked; if an identifier hashes to one of
86: * these locations, it is looked up in in the keyword
87: * table first.
88: */
89: for (ip=kwtab; ip->kwname; ip++) {
90: i = hash(ip->kwname);
91: kwhash[i/LNBPW] |= 1 << (i%LNBPW);
92: }
93: coremax = locbase = sbrk(0);
94: while(!eof)
95: extdef();
96: outcode("B", EOFC);
97: strflg++;
98: outcode("B", EOFC);
99: blkend();
100: exit(nerror!=0);
101: }
102:
103: /*
104: * Look up the identifier in symbuf in the symbol table.
105: * If it hashes to the same spot as a keyword, try the keyword table
106: * first.
107: * Return is a ptr to the symbol table entry.
108: */
109: lookup()
110: {
111: unsigned ihash;
112: register struct nmlist *rp;
113: register char *sp, *np;
114:
115: ihash = hash(symbuf);
116: if (kwhash[ihash/LNBPW] & (1 << (ihash%LNBPW)))
117: if (findkw())
118: return(KEYW);
119: rp = hshtab[ihash];
120: while (rp) {
121: np = rp->name;
122: for (sp=symbuf; sp<symbuf+NCPS;)
123: if (*np++ != *sp++)
124: goto no;
125: if (mossym != (rp->hflag&FKIND))
126: goto no;
127: csym = rp;
128: return(NAME);
129: no:
130: rp = rp->nextnm;
131: }
132: rp = (struct nmlist *)Dblock(sizeof(struct nmlist));
133: rp->nextnm = hshtab[ihash];
134: hshtab[ihash] = rp;
135: rp->hclass = 0;
136: rp->htype = 0;
137: rp->hoffset = 0;
138: rp->hsubsp = NULL;
139: rp->hstrp = NULL;
140: rp->sparent = NULL;
141: rp->hblklev = blklev;
142: rp->hflag = mossym;
143: sp = symbuf;
144: for (np=rp->name; sp<symbuf+NCPS;)
145: *np++ = *sp++;
146: csym = rp;
147: return(NAME);
148: }
149:
150: /*
151: * Search the keyword table.
152: */
153: findkw()
154: {
155: register struct kwtab *kp;
156: register char *p1, *p2;
157: char *wp;
158: int firstc;
159:
160: wp = symbuf;
161: firstc = *wp;
162: for (kp=kwtab; (p2 = kp->kwname); kp++) {
163: p1 = wp;
164: while (*p1 == *p2++)
165: if (*p1++ == '\0') {
166: cval = kp->kwval;
167: return(1);
168: }
169: }
170: *wp = firstc;
171: return(0);
172: }
173:
174:
175: /*
176: * Return the next symbol from the input.
177: * peeksym is a pushed-back symbol, peekc is a pushed-back
178: * character (after peeksym).
179: * mosflg means that the next symbol, if an identifier,
180: * is a member of structure or a structure tag or an enum tag
181: */
182: symbol()
183: {
184: register c;
185: register char *sp;
186: register tline;
187:
188: if (peeksym>=0) {
189: c = peeksym;
190: peeksym = -1;
191: if (c==NAME)
192: mosflg = 0;
193: return(c);
194: }
195: if (peekc) {
196: c = peekc;
197: peekc = 0;
198: } else
199: if (eof)
200: return(EOFC);
201: else
202: c = getchar();
203: loop:
204: if (c==EOF) {
205: eof++;
206: return(EOFC);
207: }
208: switch(ctab[c]) {
209:
210: case SHARP:
211: if ((c=symbol())!=CON) {
212: error("Illegal #");
213: return(c);
214: }
215: tline = cval;
216: while (ctab[peekc]==SPACE)
217: peekc = getchar();
218: if (peekc=='"') {
219: sp = filename;
220: while ((c = mapch('"')) >= 0)
221: *sp++ = c;
222: *sp++ = 0;
223: peekc = getchar();
224: }
225: if (peekc != '\n') {
226: error("Illegal #");
227: while (getchar()!='\n' && eof==0)
228: ;
229: }
230: peekc = 0;
231: line = tline;
232: return(symbol());
233:
234: case NEWLN:
235: line++;
236:
237: case SPACE:
238: c = getchar();
239: goto loop;
240:
241: case PLUS:
242: return(subseq(c,PLUS,INCBEF));
243:
244: case MINUS:
245: if (subseq(c, 0, 1))
246: return(DECBEF);
247: return(subseq('>', MINUS, ARROW));
248:
249: case ASSIGN:
250: return(subseq(c, ASSIGN, EQUAL));
251:
252: case LESS:
253: if (subseq(c,0,1))
254: return(LSHIFT);
255: return(subseq('=',LESS,LESSEQ));
256:
257: case GREAT:
258: if (subseq(c,0,1))
259: return(RSHIFT);
260: return(subseq('=',GREAT,GREATEQ));
261:
262: case EXCLA:
263: return(subseq('=',EXCLA,NEQUAL));
264:
265: case BSLASH:
266: if (subseq('/', 0, 1))
267: return(MAX);
268: goto unkn;
269:
270: case DIVIDE:
271: if (subseq('\\', 0, 1))
272: return(MIN);
273: if (subseq('*',1,0))
274: return(DIVIDE);
275: while ((c = spnextchar()) != EOFC) {
276: peekc = 0;
277: if (c=='*') {
278: if (spnextchar() == '/') {
279: peekc = 0;
280: c = getchar();
281: goto loop;
282: }
283: }
284: }
285: eof++;
286: error("Nonterminated comment");
287: return(0);
288:
289: case PERIOD:
290: case DIGIT:
291: peekc = c;
292: return(getnum());
293:
294: case DQUOTE:
295: cval = isn++;
296: return(STRING);
297:
298: case SQUOTE:
299: return(getcc());
300:
301: case LETTER:
302: sp = symbuf;
303: while(ctab[c]==LETTER || ctab[c]==DIGIT) {
304: if (sp<symbuf+NCPS)
305: *sp++ = c;
306: c = getchar();
307: }
308: while(sp<symbuf+NCPS)
309: *sp++ = '\0';
310: mossym = mosflg;
311: mosflg = 0;
312: peekc = c;
313: if ((c=lookup())==KEYW && cval==SIZEOF)
314: c = SIZEOF;
315: return(c);
316:
317: case AND:
318: return(subseq('&', AND, LOGAND));
319:
320: case OR:
321: return(subseq('|', OR, LOGOR));
322:
323: case UNKN:
324: unkn:
325: error("Unknown character");
326: c = getchar();
327: goto loop;
328:
329: }
330: return(ctab[c]);
331: }
332:
333: /*
334: * Read a number. Return kind.
335: */
336: getnum()
337: {
338: register char *np;
339: register c, base;
340: int expseen, sym, ndigit;
341: char *nsyn;
342: int maxdigit;
343:
344: nsyn = "Number syntax";
345: lcval = 0;
346: base = 10;
347: maxdigit = 0;
348: np = numbuf;
349: ndigit = 0;
350: sym = CON;
351: expseen = 0;
352: if ((c=spnextchar()) == '0')
353: base = 8;
354: for (;; c = getchar()) {
355: *np++ = c;
356: if (ctab[c]==DIGIT || (base==16) && ('a'<=c&&c<='f'||'A'<=c&&c<='F')) {
357: if (base==8)
358: lcval <<= 3;
359: else if (base==10)
360: lcval = ((lcval<<2) + lcval)<<1;
361: else
362: lcval <<= 4;
363: if (ctab[c]==DIGIT)
364: c -= '0';
365: else if (c>='a')
366: c -= 'a'-10;
367: else
368: c -= 'A'-10;
369: lcval += c;
370: ndigit++;
371: if (c>maxdigit)
372: maxdigit = c;
373: continue;
374: }
375: if (c=='.') {
376: if (base==16 || sym==FCON)
377: error(nsyn);
378: sym = FCON;
379: base = 10;
380: continue;
381: }
382: if (ndigit==0) {
383: sym = DOT;
384: break;
385: }
386: if ((c=='e'||c=='E') && expseen==0) {
387: expseen++;
388: sym = FCON;
389: if (base==16 || maxdigit>=10)
390: error(nsyn);
391: base = 10;
392: *np++ = c = getchar();
393: if (c!='+' && c!='-' && ctab[c]!=DIGIT)
394: break;
395: } else if (c=='x' || c=='X') {
396: if (base!=8 || lcval!=0 || sym!=CON)
397: error(nsyn);
398: base = 16;
399: } else if ((c=='l' || c=='L') && sym==CON) {
400: c = getchar();
401: sym = LCON;
402: break;
403: } else
404: break;
405: }
406: peekc = c;
407: if (maxdigit >= base)
408: error(nsyn);
409: if (sym==FCON) {
410: np[-1] = 0;
411: cval = np-numbuf;
412: return(FCON);
413: }
414: if (sym==CON && (lcval<0 || lcval>MAXINT&&base==10 || (lcval>>1)>MAXINT)) {
415: sym = LCON;
416: }
417: cval = lcval;
418: return(sym);
419: }
420:
421: /*
422: * If the next input character is c, return b and advance.
423: * Otherwise push back the character and return a.
424: */
425: subseq(c,a,b)
426: {
427: if (spnextchar() != c)
428: return(a);
429: peekc = 0;
430: return(b);
431: }
432:
433: /*
434: * Write out a string, either in-line
435: * or in the string temp file labelled by
436: * lab.
437: */
438: putstr(lab, max)
439: register max;
440: {
441: register int c;
442:
443: nchstr = 0;
444: if (lab) {
445: strflg++;
446: outcode("BNB", LABEL, lab, BDATA);
447: max = 10000;
448: } else
449: outcode("B", BDATA);
450: while ((c = mapch('"')) >= 0) {
451: if (nchstr < max) {
452: nchstr++;
453: if (nchstr%15 == 0)
454: outcode("0B", BDATA);
455: outcode("1N", c & 0377);
456: }
457: }
458: if (nchstr < max) {
459: nchstr++;
460: outcode("10");
461: }
462: outcode("0");
463: strflg = 0;
464: }
465:
466: /*
467: * read a single-quoted character constant.
468: * The routine is sensitive to the layout of
469: * characters in a word.
470: */
471: getcc()
472: {
473: register int c, cc;
474: register char *ccp;
475: char realc;
476:
477: cval = 0;
478: ccp = (char *)&cval;
479: cc = 0;
480: while((c=mapch('\'')) >= 0)
481: if(cc++ < LNCPW)
482: *ccp++ = c;
483: if (cc>LNCPW)
484: error("Long character constant");
485: if (cc==1) {
486: realc = cval;
487: cval = realc;
488: }
489: return(CON);
490: }
491:
492: /*
493: * Read a character in a string or character constant,
494: * detecting the end of the string.
495: * It implements the escape sequences.
496: */
497: mapch(ac)
498: {
499: register int a, c, n;
500: static mpeek;
501:
502: c = ac;
503: if (a = mpeek)
504: mpeek = 0;
505: else
506: a = getchar();
507: loop:
508: if (a==c)
509: return(-1);
510: switch(a) {
511:
512: case '\n':
513: case '\0':
514: error("Nonterminated string");
515: peekc = a;
516: return(-1);
517:
518: case '\\':
519: switch (a=getchar()) {
520:
521: case 't':
522: return('\t');
523:
524: case 'n':
525: return('\n');
526:
527: case 'b':
528: return('\b');
529:
530: case 'f':
531: return('\014');
532:
533: case 'v':
534: return('\013');
535:
536: case '0': case '1': case '2': case '3':
537: case '4': case '5': case '6': case '7':
538: n = 0;
539: c = 0;
540: while (++c<=3 && '0'<=a && a<='7') {
541: n <<= 3;
542: n += a-'0';
543: a = getchar();
544: }
545: mpeek = a;
546: return(n);
547:
548: case 'r':
549: return('\r');
550:
551: case '\n':
552: line++;
553: a = getchar();
554: goto loop;
555: }
556: }
557: return(a);
558: }
559:
560: /*
561: * Read an expression and return a pointer to its tree.
562: * It's the classical bottom-up, priority-driven scheme.
563: * The initflg prevents the parse from going past
564: * "," or ":" because those delimiters are special
565: * in initializer (and some other) expressions.
566: */
567: union tree *
568: tree(eflag)
569: {
570: int *op, opst[SSIZE], *pp, prst[SSIZE];
571: register int andflg, o;
572: register struct nmlist *cs;
573: int p, ps, os;
574: char *svtree;
575: static struct cnode garbage = { CON, INT, (int *)NULL, (union str *)NULL, 0 };
576:
577: svtree = starttree();
578: op = opst;
579: pp = prst;
580: *op = SEOF;
581: *pp = 06;
582: andflg = 0;
583:
584: advanc:
585: switch (o=symbol()) {
586:
587: case NAME:
588: cs = csym;
589: if (cs->hclass==TYPEDEF)
590: goto atype;
591: if (cs->hclass==ENUMCON) {
592: *cp++ = cblock(cs->hoffset);
593: goto tand;
594: }
595: if (cs->hclass==0 && cs->htype==0)
596: if(nextchar()=='(') {
597: /* set function */
598: cs->hclass = EXTERN;
599: cs->htype = FUNC;
600: } else {
601: cs->hclass = STATIC;
602: error("%.8s undefined; func. %.8s", cs->name,
603: funcsym?funcsym->name:"(none)");
604: }
605: *cp++ = nblock(cs);
606: goto tand;
607:
608: case FCON:
609: *cp++ = fblock(DOUBLE, copnum(cval));
610: goto tand;
611:
612: case LCON:
613: *cp = (union tree *)Tblock(sizeof(struct lnode));
614: (*cp)->l.op = LCON;
615: (*cp)->l.type = LONG;
616: (*cp)->l.lvalue = lcval;
617: cp++;
618: goto tand;
619:
620: case CON:
621: *cp++ = cblock(cval);
622: goto tand;
623:
624: /* fake a static char array */
625: case STRING:
626: putstr(cval, 0);
627: cs = (struct nmlist *)Tblock(sizeof(struct nmlist));
628: cs->hclass = STATIC;
629: cs->hoffset = cval;
630: *cp++ = block(NAME, unscflg? ARRAY+UNCHAR:ARRAY+CHAR, &nchstr,
631: (union str *)NULL, (union tree *)cs, TNULL);
632:
633: tand:
634: if(cp>=cmst+CMSIZ) {
635: error("Expression overflow");
636: exit(1);
637: }
638: if (andflg)
639: goto syntax;
640: andflg = 1;
641: goto advanc;
642:
643: case KEYW:
644: atype:
645: if (*op != LPARN || andflg)
646: goto syntax;
647: peeksym = o;
648: *cp++ = xprtype();
649: if ((o=symbol()) != RPARN)
650: goto syntax;
651: o = CAST;
652: --op;
653: --pp;
654: if (*op == SIZEOF) {
655: andflg = 1;
656: *pp = 100;
657: goto advanc;
658: }
659: goto oponst;
660:
661: case INCBEF:
662: case DECBEF:
663: if (andflg)
664: o += 2;
665: goto oponst;
666:
667: case COMPL:
668: case EXCLA:
669: case SIZEOF:
670: if (andflg)
671: goto syntax;
672: goto oponst;
673:
674: case MINUS:
675: if (!andflg)
676: o = NEG;
677: andflg = 0;
678: goto oponst;
679:
680: case AND:
681: case TIMES:
682: if (andflg)
683: andflg = 0;
684: else if (o==AND)
685: o = AMPER;
686: else
687: o = STAR;
688: goto oponst;
689:
690: case LPARN:
691: if (andflg) {
692: o = symbol();
693: if (o==RPARN)
694: o = MCALL;
695: else {
696: peeksym = o;
697: o = CALL;
698: andflg = 0;
699: }
700: }
701: goto oponst;
702:
703: case RBRACK:
704: case RPARN:
705: if (!andflg)
706: goto syntax;
707: goto oponst;
708:
709: case DOT:
710: case ARROW:
711: mosflg = FMOS;
712: break;
713:
714: case ASSIGN:
715: if (andflg==0 && PLUS<=*op && *op<=EXOR) {
716: o = *op-- + ASPLUS - PLUS;
717: pp--;
718: goto oponst;
719: }
720: break;
721:
722: }
723: /* binaries */
724: if (andflg==0)
725: goto syntax;
726: andflg = 0;
727:
728: oponst:
729: p = (opdope[o]>>9) & 037;
730: opon1:
731: if (o==COLON && op[0]==COLON && op[-1]==QUEST) {
732: build(*op--);
733: build(*op--);
734: pp -= 2;
735: }
736: ps = *pp;
737: if (p>ps || p==ps && (opdope[o]&RASSOC)!=0) {
738: switch (o) {
739:
740: case INCAFT:
741: case DECAFT:
742: p = 37;
743: break;
744: case LPARN:
745: case LBRACK:
746: case CALL:
747: p = 04;
748: }
749: if (initflg) {
750: if ((o==COMMA && *op!=LPARN && *op!=CALL)
751: || (o==COLON && *op!=QUEST)) {
752: p = 00;
753: goto opon1;
754: }
755: }
756: if (op >= &opst[SSIZE-1]) {
757: error("expression overflow");
758: exit(1);
759: }
760: *++op = o;
761: *++pp = p;
762: goto advanc;
763: }
764: --pp;
765: os = *op--;
766: if (andflg==0 && p>5 && ((opdope[o]&BINARY)==0 || o>=INCBEF&&o<=DECAFT) && opdope[os]&BINARY)
767: goto syntax;
768: switch (os) {
769:
770: case SEOF:
771: peeksym = o;
772: build(0); /* flush conversions */
773: if (eflag)
774: endtree(svtree);
775: return(*--cp);
776:
777: case COMMA:
778: if (*op != CALL)
779: os = SEQNC;
780: break;
781:
782: case CALL:
783: if (o!=RPARN)
784: goto syntax;
785: build(os);
786: goto advanc;
787:
788: case MCALL:
789: *cp++ = block(NULLOP, INT, (int *)NULL,
790: (union str *)NULL, TNULL, TNULL);
791: os = CALL;
792: break;
793:
794: case INCBEF:
795: case INCAFT:
796: case DECBEF:
797: case DECAFT:
798: *cp++ = cblock(1);
799: break;
800:
801: case LPARN:
802: if (o!=RPARN)
803: goto syntax;
804: goto advanc;
805:
806: case LBRACK:
807: if (o!=RBRACK)
808: goto syntax;
809: build(LBRACK);
810: goto advanc;
811: }
812: build(os);
813: goto opon1;
814:
815: syntax:
816: error("Expression syntax");
817: errflush(o);
818: if (eflag)
819: endtree(svtree);
820: return((union tree *) &garbage);
821: }
822:
823: union tree *
824: xprtype()
825: {
826: struct nmlist typer, absname;
827: int sc;
828: register union tree **scp;
829:
830: scp = cp;
831: sc = DEFXTRN; /* will cause error if class mentioned */
832: getkeywords(&sc, &typer);
833: absname.hclass = 0;
834: absname.hblklev = blklev;
835: absname.hsubsp = NULL;
836: absname.hstrp = NULL;
837: absname.htype = 0;
838: decl1(sc, &typer, 0, &absname);
839: cp = scp;
840: return(block(ETYPE, absname.htype, absname.hsubsp,
841: absname.hstrp, TNULL, TNULL));
842: }
843:
844: char *
845: copnum(len)
846: {
847: register char *s1, *s2, *s3;
848:
849: s1 = s2 = Tblock((len+LNCPW-1) & ~(LNCPW-1));
850: s3 = numbuf;
851: while (*s2++ = *s3++)
852: ;
853: return(s1);
854: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.