|
|
1.1 root 1: /*
2: * Copyright (c) 1989 The Regents of the University of California.
3: * All rights reserved.
4: *
5: * This code is derived from software contributed to Berkeley by
6: * Robert Paul Corbett.
7: *
8: * Redistribution and use in source and binary forms are permitted provided
9: * that: (1) source distributions retain this entire copyright notice and
10: * comment, and (2) distributions including binaries display the following
11: * acknowledgement: ``This product includes software developed by the
12: * University of California, Berkeley and its contributors'' in the
13: * documentation or other materials provided with the distribution and in
14: * all advertising materials mentioning features or use of this software.
15: * Neither the name of the University nor the names of its contributors may
16: * be used to endorse or promote products derived from this software without
17: * specific prior written permission.
18: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
19: * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
20: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
21: */
22:
23: #ifndef lint
24: static char sccsid[] = "@(#)reader.c 5.6 (Berkeley) 6/1/90";
25: #endif /* not lint */
26:
27: #include "defs.h"
28:
29: /* The line size must be a positive integer. One hundred was chosen */
30: /* because few lines in Yacc input grammars exceed 100 characters. */
31: /* Note that if a line exceeds LINESIZE characters, the line buffer */
32: /* will be expanded to accomodate it. */
33:
34: #define LINESIZE 100
35:
36: char *cache;
37: int cinc, cache_size;
38:
39: int ntags, tagmax;
40: char **tag_table;
41:
42: char saw_eof, unionized;
43: char *cptr, *line;
44: int linesize;
45:
46: bucket *goal;
47: int prec;
48: int gensym;
49: char last_was_action;
50:
51: int maxitems;
52: bucket **pitem;
53:
54: int maxrules;
55: bucket **plhs;
56:
57: int name_pool_size;
58: char *name_pool;
59:
60: char line_format[] = "#line %d \"%s\"\n";
61:
62:
63: cachec(c)
64: int c;
65: {
66: assert(cinc >= 0);
67: if (cinc >= cache_size)
68: {
69: cache_size += 256;
70: cache = REALLOC(cache, cache_size);
71: if (cache == 0) no_space();
72: }
73: cache[cinc] = c;
74: ++cinc;
75: }
76:
77:
78: get_line()
79: {
80: register FILE *f = input_file;
81: register int c;
82: register int i;
83:
84: if (saw_eof || (c = getc(f)) == EOF)
85: {
86: if (line) { FREE(line); line = 0; }
87: cptr = 0;
88: saw_eof = 1;
89: return;
90: }
91:
92: if (line == 0 || linesize != (LINESIZE + 1))
93: {
94: if (line) FREE(line);
95: linesize = LINESIZE + 1;
96: line = MALLOC(linesize);
97: if (line == 0) no_space();
98: }
99:
100: i = 0;
101: ++lineno;
102: for (;;)
103: {
104: line[i] = c;
105: if (c == '\n') { cptr = line; return; }
106: if (++i >= linesize)
107: {
108: linesize += LINESIZE;
109: line = REALLOC(line, linesize);
110: if (line == 0) no_space();
111: }
112: c = getc(f);
113: if (c == EOF)
114: {
115: line[i] = '\n';
116: saw_eof = 1;
117: cptr = line;
118: return;
119: }
120: }
121: }
122:
123:
124: char *
125: dup_line()
126: {
127: register char *p, *s, *t;
128:
129: if (line == 0) return (0);
130: s = line;
131: while (*s != '\n') ++s;
132: p = MALLOC(s - line + 1);
133: if (p == 0) no_space();
134:
135: s = line;
136: t = p;
137: while ((*t++ = *s++) != '\n') continue;
138: return (p);
139: }
140:
141:
142: skip_comment()
143: {
144: register char *s;
145:
146: int st_lineno = lineno;
147: char *st_line = dup_line();
148: char *st_cptr = st_line + (cptr - line);
149:
150: s = cptr + 2;
151: for (;;)
152: {
153: if (*s == '*' && s[1] == '/')
154: {
155: cptr = s + 2;
156: FREE(st_line);
157: return;
158: }
159: if (*s == '\n')
160: {
161: get_line();
162: if (line == 0)
163: unterminated_comment(st_lineno, st_line, st_cptr);
164: s = cptr;
165: }
166: else
167: ++s;
168: }
169: }
170:
171:
172: int
173: nextc()
174: {
175: register char *s;
176:
177: if (line == 0)
178: {
179: get_line();
180: if (line == 0)
181: return (EOF);
182: }
183:
184: s = cptr;
185: for (;;)
186: {
187: switch (*s)
188: {
189: case '\n':
190: get_line();
191: if (line == 0) return (EOF);
192: s = cptr;
193: break;
194:
195: case ' ':
196: case '\t':
197: case '\f':
198: case '\r':
199: case '\v':
200: case ',':
201: case ';':
202: ++s;
203: break;
204:
205: case '\\':
206: cptr = s;
207: return ('%');
208:
209: case '/':
210: if (s[1] == '*')
211: {
212: cptr = s;
213: skip_comment();
214: s = cptr;
215: break;
216: }
217: else if (s[1] == '/')
218: {
219: get_line();
220: if (line == 0) return (EOF);
221: s = cptr;
222: break;
223: }
224: /* fall through */
225:
226: default:
227: cptr = s;
228: return (*s);
229: }
230: }
231: }
232:
233:
234: int
235: keyword()
236: {
237: register int c;
238: char *t_cptr = cptr;
239:
240: c = *++cptr;
241: if (isalpha(c))
242: {
243: cinc = 0;
244: for (;;)
245: {
246: if (isalpha(c))
247: {
248: if (isupper(c)) c = tolower(c);
249: cachec(c);
250: }
251: else if (isdigit(c) || c == '_' || c == '.' || c == '$')
252: cachec(c);
253: else
254: break;
255: c = *++cptr;
256: }
257: cachec(NUL);
258:
259: if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0)
260: return (TOKEN);
261: if (strcmp(cache, "type") == 0)
262: return (TYPE);
263: if (strcmp(cache, "left") == 0)
264: return (LEFT);
265: if (strcmp(cache, "right") == 0)
266: return (RIGHT);
267: if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0)
268: return (NONASSOC);
269: if (strcmp(cache, "start") == 0)
270: return (START);
271: if (strcmp(cache, "union") == 0)
272: return (UNION);
273: if (strcmp(cache, "ident") == 0)
274: return (IDENT);
275: }
276: else
277: {
278: ++cptr;
279: if (c == '{')
280: return (TEXT);
281: if (c == '%' || c == '\\')
282: return (MARK);
283: if (c == '<')
284: return (LEFT);
285: if (c == '>')
286: return (RIGHT);
287: if (c == '0')
288: return (TOKEN);
289: if (c == '2')
290: return (NONASSOC);
291: }
292: syntax_error(lineno, line, t_cptr);
293: /*NOTREACHED*/
294: }
295:
296:
297: copy_ident()
298: {
299: register int c;
300: register FILE *f = output_file;
301:
302: c = nextc();
303: if (c == EOF) unexpected_EOF();
304: if (c != '"') syntax_error(lineno, line, cptr);
305: ++outline;
306: fprintf(f, "#ident \"");
307: for (;;)
308: {
309: c = *++cptr;
310: if (c == '\n')
311: {
312: fprintf(f, "\"\n");
313: return;
314: }
315: putc(c, f);
316: if (c == '"')
317: {
318: putc('\n', f);
319: ++cptr;
320: return;
321: }
322: }
323: }
324:
325:
326: copy_text()
327: {
328: register int c;
329: int quote;
330: register FILE *f = text_file;
331: int need_newline = 0;
332: int t_lineno = lineno;
333: char *t_line = dup_line();
334: char *t_cptr = t_line + (cptr - line - 2);
335:
336: if (*cptr == '\n')
337: {
338: get_line();
339: if (line == 0)
340: unterminated_text(t_lineno, t_line, t_cptr);
341: }
342: if (!lflag) fprintf(f, line_format, lineno, input_file_name);
343:
344: loop:
345: c = *cptr++;
346: switch (c)
347: {
348: case '\n':
349: next_line:
350: putc('\n', f);
351: need_newline = 0;
352: get_line();
353: if (line) goto loop;
354: unterminated_text(t_lineno, t_line, t_cptr);
355:
356: case '\'':
357: case '"':
358: {
359: int s_lineno = lineno;
360: char *s_line = dup_line();
361: char *s_cptr = s_line + (cptr - line - 1);
362:
363: quote = c;
364: putc(c, f);
365: for (;;)
366: {
367: c = *cptr++;
368: putc(c, f);
369: if (c == quote)
370: {
371: need_newline = 1;
372: FREE(s_line);
373: goto loop;
374: }
375: if (c == '\n')
376: unterminated_string(s_lineno, s_line, s_cptr);
377: if (c == '\\')
378: {
379: c = *cptr++;
380: putc(c, f);
381: if (c == '\n')
382: {
383: get_line();
384: if (line == 0)
385: unterminated_string(s_lineno, s_line, s_cptr);
386: }
387: }
388: }
389: }
390:
391: case '/':
392: putc(c, f);
393: need_newline = 1;
394: c = *cptr;
395: if (c == '/')
396: {
397: putc('*', f);
398: while ((c = *++cptr) != '\n')
399: {
400: if (c == '*' && cptr[1] == '/')
401: fprintf(f, "* ");
402: else
403: putc(c, f);
404: }
405: fprintf(f, "*/");
406: goto next_line;
407: }
408: if (c == '*')
409: {
410: int c_lineno = lineno;
411: char *c_line = dup_line();
412: char *c_cptr = c_line + (cptr - line - 1);
413:
414: putc('*', f);
415: ++cptr;
416: for (;;)
417: {
418: c = *cptr++;
419: putc(c, f);
420: if (c == '*' && *cptr == '/')
421: {
422: putc('/', f);
423: ++cptr;
424: FREE(c_line);
425: goto loop;
426: }
427: if (c == '\n')
428: {
429: get_line();
430: if (line == 0)
431: unterminated_comment(c_lineno, c_line, c_cptr);
432: }
433: }
434: }
435: need_newline = 1;
436: goto loop;
437:
438: case '%':
439: case '\\':
440: if (*cptr == '}')
441: {
442: if (need_newline) putc('\n', f);
443: ++cptr;
444: FREE(t_line);
445: return;
446: }
447: /* fall through */
448:
449: default:
450: putc(c, f);
451: need_newline = 1;
452: goto loop;
453: }
454: }
455:
456:
457: copy_union()
458: {
459: register int c;
460: int quote;
461: int depth;
462: int u_lineno = lineno;
463: char *u_line = dup_line();
464: char *u_cptr = u_line + (cptr - line - 6);
465:
466: if (unionized) over_unionized(cptr - 6);
467: unionized = 1;
468:
469: if (!lflag)
470: fprintf(text_file, line_format, lineno, input_file_name);
471:
472: fprintf(text_file, "typedef union");
473: if (dflag) fprintf(union_file, "typedef union");
474:
475: depth = 0;
476: loop:
477: c = *cptr++;
478: putc(c, text_file);
479: if (dflag) putc(c, union_file);
480: switch (c)
481: {
482: case '\n':
483: next_line:
484: get_line();
485: if (line == 0) unterminated_union(u_lineno, u_line, u_cptr);
486: goto loop;
487:
488: case '{':
489: ++depth;
490: goto loop;
491:
492: case '}':
493: if (--depth == 0)
494: {
495: fprintf(text_file, " YYSTYPE;\n");
496: FREE(u_line);
497: return;
498: }
499: goto loop;
500:
501: case '\'':
502: case '"':
503: {
504: int s_lineno = lineno;
505: char *s_line = dup_line();
506: char *s_cptr = s_line + (cptr - line - 1);
507:
508: quote = c;
509: for (;;)
510: {
511: c = *cptr++;
512: putc(c, text_file);
513: if (dflag) putc(c, union_file);
514: if (c == quote)
515: {
516: FREE(s_line);
517: goto loop;
518: }
519: if (c == '\n')
520: unterminated_string(s_lineno, s_line, s_cptr);
521: if (c == '\\')
522: {
523: c = *cptr++;
524: putc(c, text_file);
525: if (dflag) putc(c, union_file);
526: if (c == '\n')
527: {
528: get_line();
529: if (line == 0)
530: unterminated_string(s_lineno, s_line, s_cptr);
531: }
532: }
533: }
534: }
535:
536: case '/':
537: c = *cptr;
538: if (c == '/')
539: {
540: putc('*', text_file);
541: if (dflag) putc('*', union_file);
542: while ((c = *++cptr) != '\n')
543: {
544: if (c == '*' && cptr[1] == '/')
545: {
546: fprintf(text_file, "* ");
547: if (dflag) fprintf(union_file, "* ");
548: }
549: else
550: {
551: putc(c, text_file);
552: if (dflag) putc(c, union_file);
553: }
554: }
555: fprintf(text_file, "*/\n");
556: if (dflag) fprintf(union_file, "*/\n");
557: goto next_line;
558: }
559: if (c == '*')
560: {
561: int c_lineno = lineno;
562: char *c_line = dup_line();
563: char *c_cptr = c_line + (cptr - line - 1);
564:
565: putc('*', text_file);
566: if (dflag) putc('*', union_file);
567: ++cptr;
568: for (;;)
569: {
570: c = *cptr++;
571: putc(c, text_file);
572: if (dflag) putc(c, union_file);
573: if (c == '*' && *cptr == '/')
574: {
575: putc('/', text_file);
576: if (dflag) putc('/', union_file);
577: ++cptr;
578: FREE(c_line);
579: goto loop;
580: }
581: if (c == '\n')
582: {
583: get_line();
584: if (line == 0)
585: unterminated_comment(c_lineno, c_line, c_cptr);
586: }
587: }
588: }
589: goto loop;
590:
591: default:
592: goto loop;
593: }
594: }
595:
596:
597: int
598: hexval(c)
599: int c;
600: {
601: if (c >= '0' && c <= '9')
602: return (c - '0');
603: if (c >= 'A' && c <= 'F')
604: return (c - 'A' + 10);
605: if (c >= 'a' && c <= 'f')
606: return (c - 'a' + 10);
607: return (-1);
608: }
609:
610:
611: bucket *
612: get_literal()
613: {
614: register int c, quote;
615: register int i;
616: register int n;
617: register char *s;
618: register bucket *bp;
619: int s_lineno = lineno;
620: char *s_line = dup_line();
621: char *s_cptr = s_line + (cptr - line);
622:
623: quote = *cptr++;
624: cinc = 0;
625: for (;;)
626: {
627: c = *cptr++;
628: if (c == quote) break;
629: if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr);
630: if (c == '\\')
631: {
632: char *c_cptr = cptr - 1;
633:
634: c = *cptr++;
635: switch (c)
636: {
637: case '\n':
638: get_line();
639: if (line == 0) unterminated_string(s_lineno, s_line, s_cptr);
640: continue;
641:
642: case '0': case '1': case '2': case '3':
643: case '4': case '5': case '6': case '7':
644: n = c - '0';
645: c = *cptr;
646: if (IS_OCTAL(c))
647: {
648: n = (n << 3) + (c - '0');
649: c = *++cptr;
650: if (IS_OCTAL(c))
651: {
652: n = (n << 3) + (c - '0');
653: ++cptr;
654: }
655: }
656: if (n > MAXCHAR) illegal_character(c_cptr);
657: c = n;
658: break;
659:
660: case 'x':
661: c = *cptr++;
662: n = hexval(c);
663: if (n < 0 || n >= 16)
664: illegal_character(c_cptr);
665: for (;;)
666: {
667: c = *cptr;
668: i = hexval(c);
669: if (i < 0 || i >= 16) break;
670: ++cptr;
671: n = (n << 4) + i;
672: if (n > MAXCHAR) illegal_character(c_cptr);
673: }
674: c = n;
675: break;
676:
677: case 'a': c = 7; break;
678: case 'b': c = '\b'; break;
679: case 'f': c = '\f'; break;
680: case 'n': c = '\n'; break;
681: case 'r': c = '\r'; break;
682: case 't': c = '\t'; break;
683: case 'v': c = '\v'; break;
684: }
685: }
686: cachec(c);
687: }
688: FREE(s_line);
689:
690: n = cinc;
691: s = MALLOC(n);
692: if (s == 0) no_space();
693:
694: for (i = 0; i < n; ++i)
695: s[i] = cache[i];
696:
697: cinc = 0;
698: if (n == 1)
699: cachec('\'');
700: else
701: cachec('"');
702:
703: for (i = 0; i < n; ++i)
704: {
705: c = ((unsigned char *)s)[i];
706: if (c == '\\' || c == cache[0])
707: {
708: cachec('\\');
709: cachec(c);
710: }
711: else if (isprint(c))
712: cachec(c);
713: else
714: {
715: cachec('\\');
716: switch (c)
717: {
718: case 7: cachec('a'); break;
719: case '\b': cachec('b'); break;
720: case '\f': cachec('f'); break;
721: case '\n': cachec('n'); break;
722: case '\r': cachec('r'); break;
723: case '\t': cachec('t'); break;
724: case '\v': cachec('v'); break;
725: default:
726: cachec(((c >> 6) & 7) + '0');
727: cachec(((c >> 3) & 7) + '0');
728: cachec((c & 7) + '0');
729: break;
730: }
731: }
732: }
733:
734: if (n == 1)
735: cachec('\'');
736: else
737: cachec('"');
738:
739: cachec(NUL);
740: bp = lookup(cache);
741: bp->class = TERM;
742: if (n == 1 && bp->value == UNDEFINED)
743: bp->value = *(unsigned char *)s;
744: FREE(s);
745:
746: return (bp);
747: }
748:
749:
750: int
751: is_reserved(name)
752: char *name;
753: {
754: char *s;
755:
756: if (strcmp(name, ".") == 0 ||
757: strcmp(name, "$accept") == 0 ||
758: strcmp(name, "$end") == 0)
759: return (1);
760:
761: if (name[0] == '$' && name[1] == '$' && isdigit(name[2]))
762: {
763: s = name + 3;
764: while (isdigit(*s)) ++s;
765: if (*s == NUL) return (1);
766: }
767:
768: return (0);
769: }
770:
771:
772: bucket *
773: get_name()
774: {
775: register int c;
776:
777: cinc = 0;
778: for (c = *cptr; IS_IDENT(c); c = *++cptr)
779: cachec(c);
780: cachec(NUL);
781:
782: if (is_reserved(cache)) used_reserved(cache);
783:
784: return (lookup(cache));
785: }
786:
787:
788: int
789: get_number()
790: {
791: register int c;
792: register int n;
793:
794: n = 0;
795: for (c = *cptr; isdigit(c); c = *++cptr)
796: n = 10*n + (c - '0');
797:
798: return (n);
799: }
800:
801:
802: char *
803: get_tag()
804: {
805: register int c;
806: register int i;
807: register char *s;
808: int t_lineno = lineno;
809: char *t_line = dup_line();
810: char *t_cptr = t_line + (cptr - line);
811:
812: ++cptr;
813: c = nextc();
814: if (c == EOF) unexpected_EOF();
815: if (!isalpha(c) && c != '_' && c != '$')
816: illegal_tag(t_lineno, t_line, t_cptr);
817:
818: cinc = 0;
819: do { cachec(c); c = *++cptr; } while (IS_IDENT(c));
820: cachec(NUL);
821:
822: c = nextc();
823: if (c == EOF) unexpected_EOF();
824: if (c != '>')
825: illegal_tag(t_lineno, t_line, t_cptr);
826: ++cptr;
827:
828: for (i = 0; i < ntags; ++i)
829: {
830: if (strcmp(cache, tag_table[i]) == 0)
831: return (tag_table[i]);
832: }
833:
834: if (ntags >= tagmax)
835: {
836: tagmax += 16;
837: tag_table = (char **)
838: (tag_table ? REALLOC(tag_table, tagmax*sizeof(char *))
839: : MALLOC(tagmax*sizeof(char *)));
840: if (tag_table == 0) no_space();
841: }
842:
843: s = MALLOC(cinc);
844: if (s == 0) no_space();
845: strcpy(s, cache);
846: tag_table[ntags] = s;
847: ++ntags;
848: FREE(t_line);
849: return (s);
850: }
851:
852:
853: declare_tokens(assoc)
854: int assoc;
855: {
856: register int c;
857: register bucket *bp;
858: int value;
859: char *tag = 0;
860:
861: if (assoc != TOKEN) ++prec;
862:
863: c = nextc();
864: if (c == EOF) unexpected_EOF();
865: if (c == '<')
866: {
867: tag = get_tag();
868: c = nextc();
869: if (c == EOF) unexpected_EOF();
870: }
871:
872: for (;;)
873: {
874: if (isalpha(c) || c == '_' || c == '.' || c == '$')
875: bp = get_name();
876: else if (c == '\'' || c == '"')
877: bp = get_literal();
878: else
879: return;
880:
881: if (bp == goal) tokenized_start(bp->name);
882: bp->class = TERM;
883:
884: if (tag)
885: {
886: if (bp->tag && tag != bp->tag)
887: retyped_warning(bp->name);
888: bp->tag = tag;
889: }
890:
891: if (assoc != TOKEN)
892: {
893: if (bp->prec && prec != bp->prec)
894: reprec_warning(bp->name);
895: bp->assoc = assoc;
896: bp->prec = prec;
897: }
898:
899: c = nextc();
900: if (c == EOF) unexpected_EOF();
901: value = UNDEFINED;
902: if (isdigit(c))
903: {
904: value = get_number();
905: if (bp->value != UNDEFINED && value != bp->value)
906: revalued_warning(bp->name);
907: bp->value = value;
908: c = nextc();
909: if (c == EOF) unexpected_EOF();
910: }
911: }
912: }
913:
914:
915: declare_types()
916: {
917: register int c;
918: register bucket *bp;
919: char *tag;
920:
921: c = nextc();
922: if (c == EOF) unexpected_EOF();
923: if (c != '<') syntax_error(lineno, line, cptr);
924: tag = get_tag();
925:
926: for (;;)
927: {
928: c = nextc();
929: if (isalpha(c) || c == '_' || c == '.' || c == '$')
930: bp = get_name();
931: else if (c == '\'' || c == '"')
932: bp = get_literal();
933: else
934: return;
935:
936: if (bp->tag && tag != bp->tag)
937: retyped_warning(bp->name);
938: bp->tag = tag;
939: }
940: }
941:
942:
943: declare_start()
944: {
945: register int c;
946: register bucket *bp;
947:
948: c = nextc();
949: if (c == EOF) unexpected_EOF();
950: if (!isalpha(c) && c != '_' && c != '.' && c != '$')
951: syntax_error(lineno, line, cptr);
952: bp = get_name();
953: if (bp->class == TERM)
954: terminal_start(bp->name);
955: if (goal && goal != bp)
956: restarted_warning();
957: goal = bp;
958: }
959:
960:
961: read_declarations()
962: {
963: register int c, k;
964:
965: cache_size = 256;
966: cache = MALLOC(cache_size);
967: if (cache == 0) no_space();
968:
969: for (;;)
970: {
971: c = nextc();
972: if (c == EOF) unexpected_EOF();
973: if (c != '%') syntax_error(lineno, line, cptr);
974: switch (k = keyword())
975: {
976: case MARK:
977: return;
978:
979: case IDENT:
980: copy_ident();
981: break;
982:
983: case TEXT:
984: copy_text();
985: break;
986:
987: case UNION:
988: copy_union();
989: break;
990:
991: case TOKEN:
992: case LEFT:
993: case RIGHT:
994: case NONASSOC:
995: declare_tokens(k);
996: break;
997:
998: case TYPE:
999: declare_types();
1000: break;
1001:
1002: case START:
1003: declare_start();
1004: break;
1005: }
1006: }
1007: }
1008:
1009:
1010: initialize_grammar()
1011: {
1012: nitems = 4;
1013: maxitems = 300;
1014: pitem = (bucket **) MALLOC(maxitems*sizeof(bucket *));
1015: if (pitem == 0) no_space();
1016: pitem[0] = 0;
1017: pitem[1] = 0;
1018: pitem[2] = 0;
1019: pitem[3] = 0;
1020:
1021: nrules = 3;
1022: maxrules = 100;
1023: plhs = (bucket **) MALLOC(maxrules*sizeof(bucket *));
1024: if (plhs == 0) no_space();
1025: plhs[0] = 0;
1026: plhs[1] = 0;
1027: plhs[2] = 0;
1028: rprec = (short *) MALLOC(maxrules*sizeof(short));
1029: if (rprec == 0) no_space();
1030: rprec[0] = 0;
1031: rprec[1] = 0;
1032: rprec[2] = 0;
1033: rassoc = (char *) MALLOC(maxrules*sizeof(char));
1034: if (rassoc == 0) no_space();
1035: rassoc[0] = TOKEN;
1036: rassoc[1] = TOKEN;
1037: rassoc[2] = TOKEN;
1038: }
1039:
1040:
1041: expand_items()
1042: {
1043: maxitems += 300;
1044: pitem = (bucket **) REALLOC(pitem, maxitems*sizeof(bucket *));
1045: if (pitem == 0) no_space();
1046: }
1047:
1048:
1049: expand_rules()
1050: {
1051: maxrules += 100;
1052: plhs = (bucket **) REALLOC(plhs, maxrules*sizeof(bucket *));
1053: if (plhs == 0) no_space();
1054: rprec = (short *) REALLOC(rprec, maxrules*sizeof(short));
1055: if (rprec == 0) no_space();
1056: rassoc = (char *) REALLOC(rassoc, maxrules*sizeof(char));
1057: if (rassoc == 0) no_space();
1058: }
1059:
1060:
1061: advance_to_start()
1062: {
1063: register int c;
1064: register bucket *bp;
1065: char *s_cptr;
1066: int s_lineno;
1067:
1068: for (;;)
1069: {
1070: c = nextc();
1071: if (c != '%') break;
1072: s_cptr = cptr;
1073: switch (keyword())
1074: {
1075: case MARK:
1076: no_grammar();
1077:
1078: case TEXT:
1079: copy_text();
1080: break;
1081:
1082: case START:
1083: declare_start();
1084: break;
1085:
1086: default:
1087: syntax_error(lineno, line, s_cptr);
1088: }
1089: }
1090:
1091: c = nextc();
1092: if (!isalpha(c) && c != '_' && c != '.' && c != '_')
1093: syntax_error(lineno, line, cptr);
1094: bp = get_name();
1095: if (goal == 0)
1096: {
1097: if (bp->class == TERM)
1098: terminal_start(bp->name);
1099: goal = bp;
1100: }
1101:
1102: s_lineno = lineno;
1103: c = nextc();
1104: if (c == EOF) unexpected_EOF();
1105: if (c != ':') syntax_error(lineno, line, cptr);
1106: start_rule(bp, s_lineno);
1107: ++cptr;
1108: }
1109:
1110:
1111: start_rule(bp, s_lineno)
1112: register bucket *bp;
1113: int s_lineno;
1114: {
1115: if (bp->class == TERM)
1116: terminal_lhs(s_lineno);
1117: bp->class = NONTERM;
1118: if (nrules >= maxrules)
1119: expand_rules();
1120: plhs[nrules] = bp;
1121: rprec[nrules] = UNDEFINED;
1122: rassoc[nrules] = TOKEN;
1123: }
1124:
1125:
1126: end_rule()
1127: {
1128: register int i;
1129:
1130: if (!last_was_action && plhs[nrules]->tag)
1131: {
1132: for (i = nitems - 1; pitem[i]; --i) continue;
1133: if (pitem[i+1] == 0 || pitem[i+1]->tag != plhs[nrules]->tag)
1134: default_action_warning();
1135: }
1136:
1137: last_was_action = 0;
1138: if (nitems >= maxitems) expand_items();
1139: pitem[nitems] = 0;
1140: ++nitems;
1141: ++nrules;
1142: }
1143:
1144:
1145: insert_empty_rule()
1146: {
1147: register bucket *bp, **bpp;
1148:
1149: assert(cache);
1150: sprintf(cache, "$$%d", ++gensym);
1151: bp = make_bucket(cache);
1152: last_symbol->next = bp;
1153: last_symbol = bp;
1154: bp->tag = plhs[nrules]->tag;
1155: bp->class = NONTERM;
1156:
1157: if ((nitems += 2) > maxitems)
1158: expand_items();
1159: bpp = pitem + nitems - 1;
1160: *bpp-- = bp;
1161: while (bpp[0] = bpp[-1]) --bpp;
1162:
1163: if (++nrules >= maxrules)
1164: expand_rules();
1165: plhs[nrules] = plhs[nrules-1];
1166: plhs[nrules-1] = bp;
1167: rprec[nrules] = rprec[nrules-1];
1168: rprec[nrules-1] = 0;
1169: rassoc[nrules] = rassoc[nrules-1];
1170: rassoc[nrules-1] = TOKEN;
1171: }
1172:
1173:
1174: add_symbol()
1175: {
1176: register int c;
1177: register bucket *bp;
1178: int s_lineno = lineno;
1179:
1180: c = *cptr;
1181: if (c == '\'' || c == '"')
1182: bp = get_literal();
1183: else
1184: bp = get_name();
1185:
1186: c = nextc();
1187: if (c == ':')
1188: {
1189: end_rule();
1190: start_rule(bp, s_lineno);
1191: ++cptr;
1192: return;
1193: }
1194:
1195: if (last_was_action)
1196: insert_empty_rule();
1197: last_was_action = 0;
1198:
1199: if (++nitems > maxitems)
1200: expand_items();
1201: pitem[nitems-1] = bp;
1202: }
1203:
1204:
1205: copy_action()
1206: {
1207: register int c;
1208: register int i, n;
1209: int depth;
1210: int quote;
1211: char *tag;
1212: register FILE *f = action_file;
1213: int a_lineno = lineno;
1214: char *a_line = dup_line();
1215: char *a_cptr = a_line + (cptr - line);
1216:
1217: if (last_was_action)
1218: insert_empty_rule();
1219: last_was_action = 1;
1220:
1221: fprintf(f, "case %d:\n", nrules - 2);
1222: if (!lflag)
1223: fprintf(f, line_format, lineno, input_file_name);
1224: if (*cptr == '=') ++cptr;
1225:
1226: n = 0;
1227: for (i = nitems - 1; pitem[i]; --i) ++n;
1228:
1229: depth = 0;
1230: loop:
1231: c = *cptr;
1232: if (c == '$')
1233: {
1234: if (cptr[1] == '<')
1235: {
1236: int d_lineno = lineno;
1237: char *d_line = dup_line();
1238: char *d_cptr = d_line + (cptr - line);
1239:
1240: ++cptr;
1241: tag = get_tag();
1242: c = *cptr;
1243: if (c == '$')
1244: {
1245: fprintf(f, "yyval.%s ", tag);
1246: ++cptr;
1247: FREE(d_line);
1248: goto loop;
1249: }
1250: else if (isdigit(c))
1251: {
1252: i = get_number();
1253: if (i > n) dollar_warning(d_lineno, i);
1254: fprintf(f, "yyvsp[%d].%s ", i - n, tag);
1255: FREE(d_line);
1256: goto loop;
1257: }
1258: else if (c == '-' && isdigit(cptr[1]))
1259: {
1260: ++cptr;
1261: i = -get_number() - n;
1262: fprintf(f, "yyvsp[%d].%s ", i, tag);
1263: FREE(d_line);
1264: goto loop;
1265: }
1266: else
1267: dollar_error(d_lineno, d_line, d_cptr);
1268: }
1269: else if (cptr[1] == '$')
1270: {
1271: if (ntags)
1272: {
1273: tag = plhs[nrules]->tag;
1274: if (tag == 0) untyped_lhs();
1275: fprintf(f, "yyval.%s ", tag);
1276: }
1277: else
1278: fprintf(f, "yyval ");
1279: cptr += 2;
1280: goto loop;
1281: }
1282: else if (isdigit(cptr[1]))
1283: {
1284: ++cptr;
1285: i = get_number();
1286: if (ntags)
1287: {
1288: if (i <= 0 || i > n)
1289: unknown_rhs(i);
1290: tag = pitem[nitems + i - n - 1]->tag;
1291: if (tag == 0) untyped_rhs(i, pitem[nitems + i - n - 1]->name);
1292: fprintf(f, "yyvsp[%d].%s ", i - n, tag);
1293: }
1294: else
1295: {
1296: if (i > n)
1297: dollar_warning(lineno, i);
1298: fprintf(f, "yyvsp[%d]", i - n);
1299: }
1300: goto loop;
1301: }
1302: else if (cptr[1] == '-')
1303: {
1304: cptr += 2;
1305: i = get_number();
1306: if (ntags)
1307: unknown_rhs(-i);
1308: fprintf(f, "yyvsp[%d]", -i - n);
1309: goto loop;
1310: }
1311: }
1312: if (isalpha(c) || c == '_' || c == '$')
1313: {
1314: do
1315: {
1316: putc(c, f);
1317: c = *++cptr;
1318: } while (isalnum(c) || c == '_' || c == '$');
1319: goto loop;
1320: }
1321: putc(c, f);
1322: ++cptr;
1323: switch (c)
1324: {
1325: case '\n':
1326: next_line:
1327: get_line();
1328: if (line) goto loop;
1329: unterminated_action(a_lineno, a_line, a_cptr);
1330:
1331: case ';':
1332: if (depth > 0) goto loop;
1333: fprintf(f, "\nbreak;\n");
1334: return;
1335:
1336: case '{':
1337: ++depth;
1338: goto loop;
1339:
1340: case '}':
1341: if (--depth > 0) goto loop;
1342: fprintf(f, "\nbreak;\n");
1343: return;
1344:
1345: case '\'':
1346: case '"':
1347: {
1348: int s_lineno = lineno;
1349: char *s_line = dup_line();
1350: char *s_cptr = s_line + (cptr - line - 1);
1351:
1352: quote = c;
1353: for (;;)
1354: {
1355: c = *cptr++;
1356: putc(c, f);
1357: if (c == quote)
1358: {
1359: FREE(s_line);
1360: goto loop;
1361: }
1362: if (c == '\n')
1363: unterminated_string(s_lineno, s_line, s_cptr);
1364: if (c == '\\')
1365: {
1366: c = *cptr++;
1367: putc(c, f);
1368: if (c == '\n')
1369: {
1370: get_line();
1371: if (line == 0)
1372: unterminated_string(s_lineno, s_line, s_cptr);
1373: }
1374: }
1375: }
1376: }
1377:
1378: case '/':
1379: c = *cptr;
1380: if (c == '/')
1381: {
1382: putc('*', f);
1383: while ((c = *++cptr) != '\n')
1384: {
1385: if (c == '*' && cptr[1] == '/')
1386: fprintf(f, "* ");
1387: else
1388: putc(c, f);
1389: }
1390: fprintf(f, "*/\n");
1391: goto next_line;
1392: }
1393: if (c == '*')
1394: {
1395: int c_lineno = lineno;
1396: char *c_line = dup_line();
1397: char *c_cptr = c_line + (cptr - line - 1);
1398:
1399: putc('*', f);
1400: ++cptr;
1401: for (;;)
1402: {
1403: c = *cptr++;
1404: putc(c, f);
1405: if (c == '*' && *cptr == '/')
1406: {
1407: putc('/', f);
1408: ++cptr;
1409: FREE(c_line);
1410: goto loop;
1411: }
1412: if (c == '\n')
1413: {
1414: get_line();
1415: if (line == 0)
1416: unterminated_comment(c_lineno, c_line, c_cptr);
1417: }
1418: }
1419: }
1420: goto loop;
1421:
1422: default:
1423: goto loop;
1424: }
1425: }
1426:
1427:
1428: int
1429: mark_symbol()
1430: {
1431: register int c;
1432: register bucket *bp;
1433:
1434: c = cptr[1];
1435: if (c == '%' || c == '\\')
1436: {
1437: cptr += 2;
1438: return (1);
1439: }
1440:
1441: if (c == '=')
1442: cptr += 2;
1443: else if ((c == 'p' || c == 'P') &&
1444: ((c = cptr[2]) == 'r' || c == 'R') &&
1445: ((c = cptr[3]) == 'e' || c == 'E') &&
1446: ((c = cptr[4]) == 'c' || c == 'C') &&
1447: ((c = cptr[5], !IS_IDENT(c))))
1448: cptr += 5;
1449: else
1450: syntax_error(lineno, line, cptr);
1451:
1452: c = nextc();
1453: if (isalpha(c) || c == '_' || c == '.' || c == '$')
1454: bp = get_name();
1455: else if (c == '\'' || c == '"')
1456: bp = get_literal();
1457: else
1458: {
1459: syntax_error(lineno, line, cptr);
1460: /*NOTREACHED*/
1461: }
1462:
1463: if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1464: prec_redeclared();
1465:
1466: rprec[nrules] = bp->prec;
1467: rassoc[nrules] = bp->assoc;
1468: return (0);
1469: }
1470:
1471:
1472: read_grammar()
1473: {
1474: register int c;
1475:
1476: initialize_grammar();
1477: advance_to_start();
1478:
1479: for (;;)
1480: {
1481: c = nextc();
1482: if (c == EOF) break;
1483: if (isalpha(c) || c == '_' || c == '.' || c == '$' || c == '\'' ||
1484: c == '"')
1485: add_symbol();
1486: else if (c == '{' || c == '=')
1487: copy_action();
1488: else if (c == '|')
1489: {
1490: end_rule();
1491: start_rule(plhs[nrules-1], 0);
1492: ++cptr;
1493: }
1494: else if (c == '%')
1495: {
1496: if (mark_symbol()) break;
1497: }
1498: else
1499: syntax_error(lineno, line, cptr);
1500: }
1501: end_rule();
1502: }
1503:
1504:
1505: free_tags()
1506: {
1507: register int i;
1508:
1509: if (tag_table == 0) return;
1510:
1511: for (i = 0; i < ntags; ++i)
1512: {
1513: assert(tag_table[i]);
1514: FREE(tag_table[i]);
1515: }
1516: FREE(tag_table);
1517: }
1518:
1519:
1520: pack_names()
1521: {
1522: register bucket *bp;
1523: register char *p, *s, *t;
1524:
1525: name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1526: for (bp = first_symbol; bp; bp = bp->next)
1527: name_pool_size += strlen(bp->name) + 1;
1528: name_pool = MALLOC(name_pool_size);
1529: if (name_pool == 0) no_space();
1530:
1531: strcpy(name_pool, "$accept");
1532: strcpy(name_pool+8, "$end");
1533: t = name_pool + 13;
1534: for (bp = first_symbol; bp; bp = bp->next)
1535: {
1536: p = t;
1537: s = bp->name;
1538: while (*t++ = *s++) continue;
1539: FREE(bp->name);
1540: bp->name = p;
1541: }
1542: }
1543:
1544:
1545: check_symbols()
1546: {
1547: register bucket *bp;
1548:
1549: if (goal->class == UNKNOWN)
1550: undefined_goal(goal->name);
1551:
1552: for (bp = first_symbol; bp; bp = bp->next)
1553: {
1554: if (bp->class == UNKNOWN)
1555: {
1556: undefined_symbol_warning(bp->name);
1557: bp->class = TERM;
1558: }
1559: }
1560: }
1561:
1562:
1563: pack_symbols()
1564: {
1565: register bucket *bp;
1566: register bucket **v;
1567: register int i, j, k, n;
1568:
1569: nsyms = 2;
1570: ntokens = 1;
1571: for (bp = first_symbol; bp; bp = bp->next)
1572: {
1573: ++nsyms;
1574: if (bp->class == TERM) ++ntokens;
1575: }
1576: start_symbol = ntokens;
1577: nvars = nsyms - ntokens;
1578:
1579: symbol_name = (char **) MALLOC(nsyms*sizeof(char *));
1580: if (symbol_name == 0) no_space();
1581: symbol_value = (short *) MALLOC(nsyms*sizeof(short));
1582: if (symbol_value == 0) no_space();
1583: symbol_prec = (short *) MALLOC(nsyms*sizeof(short));
1584: if (symbol_prec == 0) no_space();
1585: symbol_assoc = MALLOC(nsyms);
1586: if (symbol_assoc == 0) no_space();
1587:
1588: v = (bucket **) MALLOC(nsyms*sizeof(bucket *));
1589: if (v == 0) no_space();
1590:
1591: v[0] = 0;
1592: v[start_symbol] = 0;
1593:
1594: i = 1;
1595: j = start_symbol + 1;
1596: for (bp = first_symbol; bp; bp = bp->next)
1597: {
1598: if (bp->class == TERM)
1599: v[i++] = bp;
1600: else
1601: v[j++] = bp;
1602: }
1603: assert(i == ntokens && j == nsyms);
1604:
1605: for (i = 1; i < ntokens; ++i)
1606: v[i]->index = i;
1607:
1608: goal->index = start_symbol + 1;
1609: k = start_symbol + 2;
1610: while (++i < nsyms)
1611: if (v[i] != goal)
1612: {
1613: v[i]->index = k;
1614: ++k;
1615: }
1616:
1617: goal->value = 0;
1618: k = 1;
1619: for (i = start_symbol + 1; i < nsyms; ++i)
1620: {
1621: if (v[i] != goal)
1622: {
1623: v[i]->value = k;
1624: ++k;
1625: }
1626: }
1627:
1628: k = 0;
1629: for (i = 1; i < ntokens; ++i)
1630: {
1631: n = v[i]->value;
1632: if (n > 256)
1633: {
1634: for (j = k++; j > 0 && symbol_value[j-1] > n; --j)
1635: symbol_value[j] = symbol_value[j-1];
1636: symbol_value[j] = n;
1637: }
1638: }
1639:
1640: if (v[1]->value == UNDEFINED)
1641: v[1]->value = 256;
1642:
1643: j = 0;
1644: n = 257;
1645: for (i = 2; i < ntokens; ++i)
1646: {
1647: if (v[i]->value == UNDEFINED)
1648: {
1649: while (j < k && n == symbol_value[j])
1650: {
1651: while (++j < k && n == symbol_value[j]) continue;
1652: ++n;
1653: }
1654: v[i]->value = n;
1655: ++n;
1656: }
1657: }
1658:
1659: symbol_name[0] = name_pool + 8;
1660: symbol_value[0] = 0;
1661: symbol_prec[0] = 0;
1662: symbol_assoc[0] = TOKEN;
1663: for (i = 1; i < ntokens; ++i)
1664: {
1665: symbol_name[i] = v[i]->name;
1666: symbol_value[i] = v[i]->value;
1667: symbol_prec[i] = v[i]->prec;
1668: symbol_assoc[i] = v[i]->assoc;
1669: }
1670: symbol_name[start_symbol] = name_pool;
1671: symbol_value[start_symbol] = -1;
1672: symbol_prec[start_symbol] = 0;
1673: symbol_assoc[start_symbol] = TOKEN;
1674: for (++i; i < nsyms; ++i)
1675: {
1676: k = v[i]->index;
1677: symbol_name[k] = v[i]->name;
1678: symbol_value[k] = v[i]->value;
1679: symbol_prec[k] = v[i]->prec;
1680: symbol_assoc[k] = v[i]->assoc;
1681: }
1682:
1683: FREE(v);
1684: }
1685:
1686:
1687: pack_grammar()
1688: {
1689: register int i, j;
1690: int assoc, prec;
1691:
1692: ritem = (short *) MALLOC(nitems*sizeof(short));
1693: if (ritem == 0) no_space();
1694: rlhs = (short *) MALLOC(nrules*sizeof(short));
1695: if (rlhs == 0) no_space();
1696: rrhs = (short *) MALLOC((nrules+1)*sizeof(short));
1697: if (rrhs == 0) no_space();
1698: rprec = (short *) REALLOC(rprec, nrules*sizeof(short));
1699: if (rprec == 0) no_space();
1700: rassoc = REALLOC(rassoc, nrules);
1701: if (rassoc == 0) no_space();
1702:
1703: ritem[0] = -1;
1704: ritem[1] = goal->index;
1705: ritem[2] = 0;
1706: ritem[3] = -2;
1707: rlhs[0] = 0;
1708: rlhs[1] = 0;
1709: rlhs[2] = start_symbol;
1710: rrhs[0] = 0;
1711: rrhs[1] = 0;
1712: rrhs[2] = 1;
1713:
1714: j = 4;
1715: for (i = 3; i < nrules; ++i)
1716: {
1717: rlhs[i] = plhs[i]->index;
1718: rrhs[i] = j;
1719: assoc = TOKEN;
1720: prec = 0;
1721: while (pitem[j])
1722: {
1723: ritem[j] = pitem[j]->index;
1724: if (pitem[j]->class == TERM)
1725: {
1726: prec = pitem[j]->prec;
1727: assoc = pitem[j]->assoc;
1728: }
1729: ++j;
1730: }
1731: ritem[j] = -i;
1732: ++j;
1733: if (rprec[i] == UNDEFINED)
1734: {
1735: rprec[i] = prec;
1736: rassoc[i] = assoc;
1737: }
1738: }
1739: rrhs[i] = j;
1740:
1741: FREE(plhs);
1742: FREE(pitem);
1743: }
1744:
1745:
1746: print_grammar()
1747: {
1748: register int i, j, k;
1749: int spacing;
1750: register FILE *f = verbose_file;
1751:
1752: if (!vflag) return;
1753:
1754: k = 1;
1755: for (i = 2; i < nrules; ++i)
1756: {
1757: if (rlhs[i] != rlhs[i-1])
1758: {
1759: if (i != 2) fprintf(f, "\n");
1760: fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
1761: spacing = strlen(symbol_name[rlhs[i]]) + 1;
1762: }
1763: else
1764: {
1765: fprintf(f, "%4d ", i - 2);
1766: j = spacing;
1767: while (--j >= 0) putc(' ', f);
1768: putc('|', f);
1769: }
1770:
1771: while (ritem[k] >= 0)
1772: {
1773: fprintf(f, " %s", symbol_name[ritem[k]]);
1774: ++k;
1775: }
1776: ++k;
1777: putc('\n', f);
1778: }
1779: }
1780:
1781:
1782: reader()
1783: {
1784: write_section(banner);
1785: create_symbol_table();
1786: read_declarations();
1787: read_grammar();
1788: free_symbol_table();
1789: free_tags();
1790: pack_names();
1791: check_symbols();
1792: pack_symbols();
1793: pack_grammar();
1794: free_symbols();
1795: print_grammar();
1796: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.