|
|
1.1 root 1: /* Input parser for bison
2: Copyright (C) 1984, 1986, 1989 Free Software Foundation, Inc.
3:
4: This file is part of Bison, the GNU Compiler Compiler.
5:
6: Bison is free software; you can redistribute it and/or modify
7: it under the terms of the GNU General Public License as published by
8: the Free Software Foundation; either version 2, or (at your option)
9: any later version.
10:
11: Bison is distributed in the hope that it will be useful,
12: but WITHOUT ANY WARRANTY; without even the implied warranty of
13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14: GNU General Public License for more details.
15:
16: You should have received a copy of the GNU General Public License
17: along with Bison; see the file COPYING. If not, write to
18: the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19:
20:
21: /* read in the grammar specification and record it in the format described in gram.h.
22: All guards are copied into the fguard file and all actions into faction,
23: in each case forming the body of a C function (yyguard or yyaction)
24: which contains a switch statement to decide which guard or action to execute.
25:
26: The entry point is reader(). */
27:
28: #include <stdio.h>
29: #include <ctype.h>
30: #include "system.h"
31: #include "files.h"
32: #include "new.h"
33: #include "symtab.h"
34: #include "lex.h"
35: #include "gram.h"
36: #include "machine.h"
37:
38: #define LTYPESTR "\n#ifndef YYLTYPE\ntypedef\n struct yyltype\n\
39: {\n int timestamp;\n int first_line;\n int first_column;\
40: \n int last_line;\n int last_column;\n char *text;\n }\n\
41: yyltype;\n\n#define YYLTYPE yyltype\n#endif\n\n"
42:
43: /* Number of slots allocated (but not necessarily used yet) in `rline' */
44: int rline_allocated;
45:
46: extern char *program_name;
47: extern int definesflag;
48: extern int nolinesflag;
49: extern bucket *symval;
50: extern int numval;
51: extern int failure;
52: extern int expected_conflicts;
53: extern char *token_buffer;
54:
55: extern void init_lex();
56: extern void tabinit();
57: extern void output_headers();
58: extern void output_trailers();
59: extern void free_symtab();
60: extern void open_extra_files();
61: extern void fatal();
62: extern void fatals();
63: extern void unlex();
64: extern void done();
65:
66: extern int skip_white_space();
67: extern int parse_percent_token();
68: extern int lex();
69:
70: void read_declarations();
71: void copy_definition();
72: void parse_token_decl();
73: void parse_start_decl();
74: void parse_type_decl();
75: void parse_assoc_decl();
76: void parse_union_decl();
77: void parse_expect_decl();
78: void copy_action();
79: void readgram();
80: void record_rule_line();
81: void packsymbols();
82: void output_token_defines();
83: void packgram();
84: int read_signed_integer();
85: int get_type();
86:
87: typedef
88: struct symbol_list
89: {
90: struct symbol_list *next;
91: bucket *sym;
92: bucket *ruleprec;
93: }
94: symbol_list;
95:
96:
97:
98: int lineno;
99: symbol_list *grammar;
100: int start_flag;
101: bucket *startval;
102: char **tags;
103:
104: /* Nonzero if components of semantic values are used, implying
105: they must be unions. */
106: static int value_components_used;
107:
108: static int typed; /* nonzero if %union has been seen. */
109:
110: static int lastprec; /* incremented for each %left, %right or %nonassoc seen */
111:
112: static int gensym_count; /* incremented for each generated symbol */
113:
114: static bucket *errtoken;
115:
116: /* Nonzero if any action or guard uses the @n construct. */
117: static int yylsp_needed;
118:
119: extern char *version_string;
120:
121: void
122: reader()
123: {
124: start_flag = 0;
125: startval = NULL; /* start symbol not specified yet. */
126:
127: #if 0
128: translations = 0; /* initially assume token number translation not needed. */
129: #endif
130: /* Nowadays translations is always set to 1,
131: since we give `error' a user-token-number
132: to satisfy the Posix demand for YYERRCODE==256. */
133: translations = 1;
134:
135: nsyms = 1;
136: nvars = 0;
137: nrules = 0;
138: nitems = 0;
139: rline_allocated = 10;
140: rline = NEW2(rline_allocated, short);
141:
142: typed = 0;
143: lastprec = 0;
144:
145: gensym_count = 0;
146:
147: semantic_parser = 0;
148: pure_parser = 0;
149: yylsp_needed = 0;
150:
151: grammar = NULL;
152:
153: init_lex();
154: lineno = 1;
155:
156: /* initialize the symbol table. */
157: tabinit();
158: /* construct the error token */
159: errtoken = getsym("error");
160: errtoken->class = STOKEN;
161: errtoken->user_token_number = 256; /* Value specified by posix. */
162: /* construct a token that represents all undefined literal tokens. */
163: /* it is always token number 2. */
164: getsym("$illegal.")->class = STOKEN;
165: /* Read the declaration section. Copy %{ ... %} groups to ftable and fdefines file.
166: Also notice any %token, %left, etc. found there. */
167: fprintf(ftable, "\n/* A Bison parser, made from %s", infile);
168: fprintf(ftable, " with Bison version %s */\n\n", version_string);
169: fprintf(ftable, "#define YYBISON 1 /* Identify Bison output. */\n\n");
170: read_declarations();
171: /* output the definition of YYLTYPE into the fattrs and fdefines files. */
172: /* fattrs winds up in the .tab.c file, before bison.simple. */
173: fprintf(fattrs, LTYPESTR);
174: /* start writing the guard and action files, if they are needed. */
175: output_headers();
176: /* read in the grammar, build grammar in list form. write out guards and actions. */
177: readgram();
178: /* Now we know whether we need the line-number stack.
179: If we do, write its type into the .tab.h file. */
180: if (yylsp_needed)
181: {
182: if (fdefines)
183: fprintf(fdefines, LTYPESTR);
184: }
185: /* write closing delimiters for actions and guards. */
186: output_trailers();
187: if (yylsp_needed)
188: fprintf(ftable, "#define YYLSP_NEEDED\n\n");
189: /* assign the symbols their symbol numbers.
190: Write #defines for the token symbols into fdefines if requested. */
191: packsymbols();
192: /* convert the grammar into the format described in gram.h. */
193: packgram();
194: /* free the symbol table data structure
195: since symbols are now all referred to by symbol number. */
196: free_symtab();
197: }
198:
199:
200:
201: /* read from finput until %% is seen. Discard the %%.
202: Handle any % declarations,
203: and copy the contents of any %{ ... %} groups to fattrs. */
204:
205: void
206: read_declarations ()
207: {
208: register int c;
209: register int tok;
210:
211: for (;;)
212: {
213: c = skip_white_space();
214:
215: if (c == '%')
216: {
217: tok = parse_percent_token();
218:
219: switch (tok)
220: {
221: case TWO_PERCENTS:
222: return;
223:
224: case PERCENT_LEFT_CURLY:
225: copy_definition();
226: break;
227:
228: case TOKEN:
229: parse_token_decl (STOKEN, SNTERM);
230: break;
231:
232: case NTERM:
233: parse_token_decl (SNTERM, STOKEN);
234: break;
235:
236: case TYPE:
237: parse_type_decl();
238: break;
239:
240: case START:
241: parse_start_decl();
242: break;
243:
244: case UNION:
245: parse_union_decl();
246: break;
247:
248: case EXPECT:
249: parse_expect_decl();
250: break;
251:
252: case LEFT:
253: parse_assoc_decl(LEFT_ASSOC);
254: break;
255:
256: case RIGHT:
257: parse_assoc_decl(RIGHT_ASSOC);
258: break;
259:
260: case NONASSOC:
261: parse_assoc_decl(NON_ASSOC);
262: break;
263:
264: case SEMANTIC_PARSER:
265: if (semantic_parser == 0)
266: {
267: semantic_parser = 1;
268: open_extra_files();
269: }
270: break;
271:
272: case PURE_PARSER:
273: pure_parser = 1;
274: break;
275:
276: default:
277: fatal("junk after `%%' in definition section");
278: }
279: }
280: else if (c == EOF)
281: fatal("no input grammar");
282: else if (c >= 040 && c <= 0177)
283: fatals ("unknown character `%c' in declaration section", c);
284: else
285: fatals ("unknown character with code 0x%x in declaration section", c);
286: }
287: }
288:
289:
290: /* copy the contents of a %{ ... %} into the definitions file.
291: The %{ has already been read. Return after reading the %}. */
292:
293: void
294: copy_definition ()
295: {
296: register int c;
297: register int match;
298: register int ended;
299: register int after_percent; /* -1 while reading a character if prev char was % */
300: int cplus_comment;
301:
302: if (!nolinesflag)
303: fprintf(fattrs, "#line %d \"%s\"\n", lineno, infile);
304:
305: after_percent = 0;
306:
307: c = getc(finput);
308:
309: for (;;)
310: {
311: switch (c)
312: {
313: case '\n':
314: putc(c, fattrs);
315: lineno++;
316: break;
317:
318: case '%':
319: after_percent = -1;
320: break;
321:
322: case '\'':
323: case '"':
324: match = c;
325: putc(c, fattrs);
326: c = getc(finput);
327:
328: while (c != match)
329: {
330: if (c == EOF || c == '\n')
331: fatal("unterminated string");
332:
333: putc(c, fattrs);
334:
335: if (c == '\\')
336: {
337: c = getc(finput);
338: if (c == EOF)
339: fatal("unterminated string");
340: putc(c, fattrs);
341: if (c == '\n')
342: lineno++;
343: }
344:
345: c = getc(finput);
346: }
347:
348: putc(c, fattrs);
349: break;
350:
351: case '/':
352: putc(c, fattrs);
353: c = getc(finput);
354: if (c != '*' && c != '/')
355: continue;
356:
357: cplus_comment = (c == '/');
358: putc(c, fattrs);
359: c = getc(finput);
360:
361: ended = 0;
362: while (!ended)
363: {
364: if (!cplus_comment && c == '*')
365: {
366: while (c == '*')
367: {
368: putc(c, fattrs);
369: c = getc(finput);
370: }
371:
372: if (c == '/')
373: {
374: putc(c, fattrs);
375: ended = 1;
376: }
377: }
378: else if (c == '\n')
379: {
380: lineno++;
381: putc(c, fattrs);
382: if (cplus_comment)
383: ended = 1;
384: else
385: c = getc(finput);
386: }
387: else if (c == EOF)
388: fatal("unterminated comment in `%{' definition");
389: else
390: {
391: putc(c, fattrs);
392: c = getc(finput);
393: }
394: }
395:
396: break;
397:
398: case EOF:
399: fatal("unterminated `%{' definition");
400:
401: default:
402: putc(c, fattrs);
403: }
404:
405: c = getc(finput);
406:
407: if (after_percent)
408: {
409: if (c == '}')
410: return;
411: putc('%', fattrs);
412: }
413: after_percent = 0;
414:
415: }
416:
417: }
418:
419:
420:
421: /* parse what comes after %token or %nterm.
422: For %token, what_is is STOKEN and what_is_not is SNTERM.
423: For %nterm, the arguments are reversed. */
424:
425: void
426: parse_token_decl (what_is, what_is_not)
427: int what_is, what_is_not;
428: {
429: /* register int start_lineno; JF */
430: register int token = 0;
431: register int prev;
432: register char *typename = 0;
433: int k;
434:
435: /* start_lineno = lineno; JF */
436:
437: for (;;)
438: {
439: if(ungetc(skip_white_space(), finput) == '%')
440: return;
441:
442: /* if (lineno != start_lineno)
443: return; JF */
444:
445: /* we have not passed a newline, so the token now starting is in this declaration */
446: prev = token;
447:
448: token = lex();
449: if (token == COMMA)
450: continue;
451: if (token == TYPENAME)
452: {
453: k = strlen(token_buffer);
454: typename = NEW2(k + 1, char);
455: strcpy(typename, token_buffer);
456: value_components_used = 1;
457: }
458: else if (token == IDENTIFIER)
459: {
460: int oldclass = symval->class;
461:
462: if (symval->class == what_is_not)
463: fatals("symbol %s redefined", symval->tag);
464: symval->class = what_is;
465: if (what_is == SNTERM && oldclass != SNTERM)
466: symval->value = nvars++;
467:
468: if (typename)
469: {
470: if (symval->type_name == NULL)
471: symval->type_name = typename;
472: else
473: fatals("type redeclaration for %s", symval->tag);
474: }
475: }
476: else if (prev == IDENTIFIER && token == NUMBER)
477: {
478: symval->user_token_number = numval;
479: translations = 1;
480: }
481: else
482: fatal("invalid text in %token or %nterm declaration");
483: }
484:
485: }
486:
487:
488:
489: /* parse what comes after %start */
490:
491: void
492: parse_start_decl ()
493: {
494: if (start_flag)
495: fatal("multiple %start declarations");
496: start_flag = 1;
497: if (lex() != IDENTIFIER)
498: fatal("invalid %start declaration");
499: startval = symval;
500: }
501:
502:
503:
504: /* read in a %type declaration and record its information for get_type_name to access */
505:
506: void
507: parse_type_decl ()
508: {
509: register int k;
510: register char *name;
511: /* register int start_lineno; JF */
512:
513: if (lex() != TYPENAME)
514: fatal("ill-formed %type declaration");
515:
516: k = strlen(token_buffer);
517: name = NEW2(k + 1, char);
518: strcpy(name, token_buffer);
519:
520: /* start_lineno = lineno; */
521:
522: for (;;)
523: {
524: register int t;
525:
526: if(ungetc(skip_white_space(), finput) == '%')
527: return;
528:
529: /* if (lineno != start_lineno)
530: return; JF */
531:
532: /* we have not passed a newline, so the token now starting is in this declaration */
533:
534: t = lex();
535:
536: switch (t)
537: {
538:
539: case COMMA:
540: case SEMICOLON:
541: break;
542:
543: case IDENTIFIER:
544: if (symval->type_name == NULL)
545: symval->type_name = name;
546: else
547: fatals("type redeclaration for %s", symval->tag);
548:
549: break;
550:
551: default:
552: fatal("invalid %type declaration");
553: }
554: }
555: }
556:
557:
558:
559: /* read in a %left, %right or %nonassoc declaration and record its information. */
560: /* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC. */
561:
562: void
563: parse_assoc_decl (assoc)
564: int assoc;
565: {
566: register int k;
567: register char *name = NULL;
568: /* register int start_lineno; JF */
569: register int prev = 0; /* JF added = 0 to keep lint happy */
570:
571: lastprec++; /* Assign a new precedence level, never 0. */
572:
573: /* start_lineno = lineno; */
574:
575: for (;;)
576: {
577: register int t;
578:
579: if(ungetc(skip_white_space(), finput) == '%')
580: return;
581:
582: /* if (lineno != start_lineno)
583: return; JF */
584:
585: /* we have not passed a newline, so the token now starting is in this declaration */
586:
587: t = lex();
588:
589: switch (t)
590: {
591:
592: case TYPENAME:
593: k = strlen(token_buffer);
594: name = NEW2(k + 1, char);
595: strcpy(name, token_buffer);
596: break;
597:
598: case COMMA:
599: break;
600:
601: case IDENTIFIER:
602: if (symval->prec != 0)
603: fatals("redefining precedence of %s", symval->tag);
604: symval->prec = lastprec;
605: symval->assoc = assoc;
606: if (symval->class == SNTERM)
607: fatals("symbol %s redefined", symval->tag);
608: symval->class = STOKEN;
609: if (name)
610: { /* record the type, if one is specified */
611: if (symval->type_name == NULL)
612: symval->type_name = name;
613: else
614: fatals("type redeclaration for %s", symval->tag);
615: }
616: break;
617:
618: case NUMBER:
619: if (prev == IDENTIFIER)
620: {
621: symval->user_token_number = numval;
622: translations = 1;
623: }
624: else
625: fatal("invalid text in association declaration");
626: break;
627:
628: case SEMICOLON:
629: return;
630:
631: default:
632: fatal("malformatted association declaration");
633: }
634:
635: prev = t;
636:
637: }
638: }
639:
640:
641:
642: /* copy the union declaration into fattrs (and fdefines),
643: where it is made into the
644: definition of YYSTYPE, the type of elements of the parser value stack. */
645:
646: void
647: parse_union_decl()
648: {
649: register int c;
650: register int count;
651: register int in_comment;
652: int cplus_comment;
653:
654: if (typed)
655: fatal("multiple %union declarations");
656:
657: typed = 1;
658:
659: if (!nolinesflag)
660: fprintf(fattrs, "\n#line %d \"%s\"\n", lineno, infile);
661: else
662: fprintf(fattrs, "\n");
663:
664: fprintf(fattrs, "typedef union");
665: if (fdefines)
666: fprintf(fdefines, "typedef union");
667:
668: count = 0;
669: in_comment = 0;
670:
671: c = getc(finput);
672:
673: while (c != EOF)
674: {
675: putc(c, fattrs);
676: if (fdefines)
677: putc(c, fdefines);
678:
679: switch (c)
680: {
681: case '\n':
682: lineno++;
683: break;
684:
685: case '/':
686: c = getc(finput);
687: if (c != '*' && c != '/')
688: ungetc(c, finput);
689: else
690: {
691: putc(c, fattrs);
692: if (fdefines)
693: putc(c, fdefines);
694: cplus_comment = (c == '/');
695: in_comment = 1;
696: c = getc(finput);
697: while (in_comment)
698: {
699: putc(c, fattrs);
700: if (fdefines)
701: putc(c, fdefines);
702:
703: if (c == '\n')
704: {
705: lineno++;
706: if (cplus_comment)
707: {
708: in_comment = 0;
709: break;
710: }
711: }
712: if (c == EOF)
713: fatal("unterminated comment");
714:
715: if (!cplus_comment && c == '*')
716: {
717: c = getc(finput);
718: if (c == '/')
719: {
720: putc('/', fattrs);
721: if (fdefines)
722: putc('/', fdefines);
723: in_comment = 0;
724: }
725: }
726: else
727: c = getc(finput);
728: }
729: }
730: break;
731:
732:
733: case '{':
734: count++;
735: break;
736:
737: case '}':
738: if (count == 0)
739: fatal ("unmatched close-brace (`}')");
740: count--;
741: if (count == 0)
742: {
743: fprintf(fattrs, " YYSTYPE;\n");
744: if (fdefines)
745: fprintf(fdefines, " YYSTYPE;\n");
746: /* JF don't choke on trailing semi */
747: c=skip_white_space();
748: if(c!=';') ungetc(c,finput);
749: return;
750: }
751: }
752:
753: c = getc(finput);
754: }
755: }
756:
757: /* parse the declaration %expect N which says to expect N
758: shift-reduce conflicts. */
759:
760: void
761: parse_expect_decl()
762: {
763: register int c;
764: register int count;
765: char buffer[20];
766:
767: c = getc(finput);
768: while (c == ' ' || c == '\t')
769: c = getc(finput);
770:
771: count = 0;
772: while (c >= '0' && c <= '9')
773: {
774: if (count < 20)
775: buffer[count++] = c;
776: c = getc(finput);
777: }
778: buffer[count] = 0;
779:
780: ungetc (c, finput);
781:
782: expected_conflicts = atoi (buffer);
783: }
784:
785: /* that's all of parsing the declaration section */
786:
787: /* Get the data type (alternative in the union) of the value for symbol n in rule rule. */
788:
789: char *
790: get_type_name(n, rule)
791: int n;
792: symbol_list *rule;
793: {
794: static char *msg = "invalid $ value";
795:
796: register int i;
797: register symbol_list *rp;
798:
799: if (n < 0)
800: fatal(msg);
801:
802: rp = rule;
803: i = 0;
804:
805: while (i < n)
806: {
807: rp = rp->next;
808: if (rp == NULL || rp->sym == NULL)
809: fatal(msg);
810: i++;
811: }
812:
813: return (rp->sym->type_name);
814: }
815:
816:
817:
818: /* after %guard is seen in the input file,
819: copy the actual guard into the guards file.
820: If the guard is followed by an action, copy that into the actions file.
821: stack_offset is the number of values in the current rule so far,
822: which says where to find $0 with respect to the top of the stack,
823: for the simple parser in which the stack is not popped until after the guard is run. */
824:
825: void
826: copy_guard(rule, stack_offset)
827: symbol_list *rule;
828: int stack_offset;
829: {
830: register int c;
831: register int n;
832: register int count;
833: register int match;
834: register int ended;
835: register char *type_name;
836: int brace_flag = 0;
837: int cplus_comment;
838:
839: /* offset is always 0 if parser has already popped the stack pointer */
840: if (semantic_parser) stack_offset = 0;
841:
842: fprintf(fguard, "\ncase %d:\n", nrules);
843: if (!nolinesflag)
844: fprintf(fguard, "#line %d \"%s\"\n", lineno, infile);
845: putc('{', fguard);
846:
847: count = 0;
848: c = getc(finput);
849:
850: while (brace_flag ? (count > 0) : (c != ';'))
851: {
852: switch (c)
853: {
854: case '\n':
855: putc(c, fguard);
856: lineno++;
857: break;
858:
859: case '{':
860: putc(c, fguard);
861: brace_flag = 1;
862: count++;
863: break;
864:
865: case '}':
866: putc(c, fguard);
867: if (count > 0)
868: count--;
869: else
870: fatal("unmatched right brace ('}')");
871: break;
872:
873: case '\'':
874: case '"':
875: match = c;
876: putc(c, fguard);
877: c = getc(finput);
878:
879: while (c != match)
880: {
881: if (c == EOF || c == '\n')
882: fatal("unterminated string");
883:
884: putc(c, fguard);
885:
886: if (c == '\\')
887: {
888: c = getc(finput);
889: if (c == EOF)
890: fatal("unterminated string");
891: putc(c, fguard);
892: if (c == '\n')
893: lineno++;
894: }
895:
896: c = getc(finput);
897: }
898:
899: putc(c, fguard);
900: break;
901:
902: case '/':
903: putc(c, fguard);
904: c = getc(finput);
905: if (c != '*' && c != '/')
906: continue;
907:
908: cplus_comment = (c == '/');
909: putc(c, fguard);
910: c = getc(finput);
911:
912: ended = 0;
913: while (!ended)
914: {
915: if (!cplus_comment && c == '*')
916: {
917: while (c == '*')
918: {
919: putc(c, fguard);
920: c = getc(finput);
921: }
922:
923: if (c == '/')
924: {
925: putc(c, fguard);
926: ended = 1;
927: }
928: }
929: else if (c == '\n')
930: {
931: lineno++;
932: putc(c, fguard);
933: if (cplus_comment)
934: ended = 1;
935: else
936: c = getc(finput);
937: }
938: else if (c == EOF)
939: fatal("unterminated comment");
940: else
941: {
942: putc(c, fguard);
943: c = getc(finput);
944: }
945: }
946:
947: break;
948:
949: case '$':
950: c = getc(finput);
951: type_name = NULL;
952:
953: if (c == '<')
954: {
955: register char *cp = token_buffer;
956:
957: while ((c = getc(finput)) != '>' && c > 0)
958: *cp++ = c;
959: *cp = 0;
960: type_name = token_buffer;
961:
962: c = getc(finput);
963: }
964:
965: if (c == '$')
966: {
967: fprintf(fguard, "yyval");
968: if (!type_name) type_name = rule->sym->type_name;
969: if (type_name)
970: fprintf(fguard, ".%s", type_name);
971: if(!type_name && typed) /* JF */
972: fprintf(stderr,"%s:%d: warning: $$ of '%s' has no declared type.\n",infile,lineno,rule->sym->tag);
973: }
974:
975: else if (isdigit(c) || c == '-')
976: {
977: ungetc (c, finput);
978: n = read_signed_integer(finput);
979: c = getc(finput);
980:
981: if (!type_name && n > 0)
982: type_name = get_type_name(n, rule);
983:
984: fprintf(fguard, "yyvsp[%d]", n - stack_offset);
985: if (type_name)
986: fprintf(fguard, ".%s", type_name);
987: if(!type_name && typed) /* JF */
988: fprintf(stderr,"%s:%d: warning: $%d of '%s' has no declared type.\n",infile,lineno,n,rule->sym->tag);
989: continue;
990: }
991: else
992: fatals("$%c is invalid",c); /* JF changed style */
993:
994: break;
995:
996: case '@':
997: c = getc(finput);
998: if (isdigit(c) || c == '-')
999: {
1000: ungetc (c, finput);
1001: n = read_signed_integer(finput);
1002: c = getc(finput);
1003: }
1004: else
1005: fatals("@%c is invalid",c); /* JF changed style */
1006:
1007: fprintf(fguard, "yylsp[%d]", n - stack_offset);
1008: yylsp_needed = 1;
1009:
1010: continue;
1011:
1012: case EOF:
1013: fatal("unterminated %guard clause");
1014:
1015: default:
1016: putc(c, fguard);
1017: }
1018:
1019: if (c != '}' || count != 0)
1020: c = getc(finput);
1021: }
1022:
1023: c = skip_white_space();
1024:
1025: fprintf(fguard, ";\n break;}");
1026: if (c == '{')
1027: copy_action(rule, stack_offset);
1028: else if (c == '=')
1029: {
1030: c = getc(finput);
1031: if (c == '{')
1032: copy_action(rule, stack_offset);
1033: }
1034: else
1035: ungetc(c, finput);
1036: }
1037:
1038:
1039:
1040: /* Assuming that a { has just been seen, copy everything up to the matching }
1041: into the actions file.
1042: stack_offset is the number of values in the current rule so far,
1043: which says where to find $0 with respect to the top of the stack. */
1044:
1045: void
1046: copy_action(rule, stack_offset)
1047: symbol_list *rule;
1048: int stack_offset;
1049: {
1050: register int c;
1051: register int n;
1052: register int count;
1053: register int match;
1054: register int ended;
1055: register char *type_name;
1056: int cplus_comment;
1057:
1058: /* offset is always 0 if parser has already popped the stack pointer */
1059: if (semantic_parser) stack_offset = 0;
1060:
1061: fprintf(faction, "\ncase %d:\n", nrules);
1062: if (!nolinesflag)
1063: fprintf(faction, "#line %d \"%s\"\n", lineno, infile);
1064: putc('{', faction);
1065:
1066: count = 1;
1067: c = getc(finput);
1068:
1069: while (count > 0)
1070: {
1071: while (c != '}')
1072: {
1073: switch (c)
1074: {
1075: case '\n':
1076: putc(c, faction);
1077: lineno++;
1078: break;
1079:
1080: case '{':
1081: putc(c, faction);
1082: count++;
1083: break;
1084:
1085: case '\'':
1086: case '"':
1087: match = c;
1088: putc(c, faction);
1089: c = getc(finput);
1090:
1091: while (c != match)
1092: {
1093: if (c == EOF || c == '\n')
1094: fatal("unterminated string");
1095:
1096: putc(c, faction);
1097:
1098: if (c == '\\')
1099: {
1100: c = getc(finput);
1101: if (c == EOF)
1102: fatal("unterminated string");
1103: putc(c, faction);
1104: if (c == '\n')
1105: lineno++;
1106: }
1107:
1108: c = getc(finput);
1109: }
1110:
1111: putc(c, faction);
1112: break;
1113:
1114: case '/':
1115: putc(c, faction);
1116: c = getc(finput);
1117: if (c != '*' && c != '/')
1118: continue;
1119:
1120: cplus_comment = (c == '/');
1121: putc(c, faction);
1122: c = getc(finput);
1123:
1124: ended = 0;
1125: while (!ended)
1126: {
1127: if (!cplus_comment && c == '*')
1128: {
1129: while (c == '*')
1130: {
1131: putc(c, faction);
1132: c = getc(finput);
1133: }
1134:
1135: if (c == '/')
1136: {
1137: putc(c, faction);
1138: ended = 1;
1139: }
1140: }
1141: else if (c == '\n')
1142: {
1143: lineno++;
1144: putc(c, faction);
1145: if (cplus_comment)
1146: ended = 1;
1147: else
1148: c = getc(finput);
1149: }
1150: else if (c == EOF)
1151: fatal("unterminated comment");
1152: else
1153: {
1154: putc(c, faction);
1155: c = getc(finput);
1156: }
1157: }
1158:
1159: break;
1160:
1161: case '$':
1162: c = getc(finput);
1163: type_name = NULL;
1164:
1165: if (c == '<')
1166: {
1167: register char *cp = token_buffer;
1168:
1169: while ((c = getc(finput)) != '>' && c > 0)
1170: *cp++ = c;
1171: *cp = 0;
1172: type_name = token_buffer;
1173: value_components_used = 1;
1174:
1175: c = getc(finput);
1176: }
1177: if (c == '$')
1178: {
1179: fprintf(faction, "yyval");
1180: if (!type_name) type_name = get_type_name(0, rule);
1181: if (type_name)
1182: fprintf(faction, ".%s", type_name);
1183: if(!type_name && typed) /* JF */
1184: fprintf(stderr,"%s:%d: warning: $$ of '%s' has no declared type.\n",infile,lineno,rule->sym->tag);
1185: }
1186: else if (isdigit(c) || c == '-')
1187: {
1188: ungetc (c, finput);
1189: n = read_signed_integer(finput);
1190: c = getc(finput);
1191:
1192: if (!type_name && n > 0)
1193: type_name = get_type_name(n, rule);
1194:
1195: fprintf(faction, "yyvsp[%d]", n - stack_offset);
1196: if (type_name)
1197: fprintf(faction, ".%s", type_name);
1198: if(!type_name && typed) /* JF */
1199: fprintf(stderr,"%s:%d: warning: $%d of '%s' has no declared type.\n",infile,lineno,n,rule->sym->tag);
1200: continue;
1201: }
1202: else
1203: fatals("$%c is invalid",c); /* JF changed format */
1204:
1205: break;
1206:
1207: case '@':
1208: c = getc(finput);
1209: if (isdigit(c) || c == '-')
1210: {
1211: ungetc (c, finput);
1212: n = read_signed_integer(finput);
1213: c = getc(finput);
1214: }
1215: else
1216: fatal("invalid @-construct");
1217:
1218: fprintf(faction, "yylsp[%d]", n - stack_offset);
1219: yylsp_needed = 1;
1220:
1221: continue;
1222:
1223: case EOF:
1224: fatal("unmatched '{'");
1225:
1226: default:
1227: putc(c, faction);
1228: }
1229:
1230: c = getc(finput);
1231: }
1232:
1233: /* above loop exits when c is '}' */
1234:
1235: if (--count)
1236: {
1237: putc(c, faction);
1238: c = getc(finput);
1239: }
1240: }
1241:
1242: fprintf(faction, ";\n break;}");
1243: }
1244:
1245:
1246:
1247: /* generate a dummy symbol, a nonterminal,
1248: whose name cannot conflict with the user's names. */
1249:
1250: bucket *
1251: gensym()
1252: {
1253: register bucket *sym;
1254:
1255: sprintf (token_buffer, "@%d", ++gensym_count);
1256: sym = getsym(token_buffer);
1257: sym->class = SNTERM;
1258: sym->value = nvars++;
1259: return (sym);
1260: }
1261:
1262: /* Parse the input grammar into a one symbol_list structure.
1263: Each rule is represented by a sequence of symbols: the left hand side
1264: followed by the contents of the right hand side, followed by a null pointer
1265: instead of a symbol to terminate the rule.
1266: The next symbol is the lhs of the following rule.
1267:
1268: All guards and actions are copied out to the appropriate files,
1269: labelled by the rule number they apply to. */
1270:
1271: void
1272: readgram()
1273: {
1274: register int t;
1275: register bucket *lhs;
1276: register symbol_list *p;
1277: register symbol_list *p1;
1278: register bucket *bp;
1279:
1280: symbol_list *crule; /* points to first symbol_list of current rule. */
1281: /* its symbol is the lhs of the rule. */
1282: symbol_list *crule1; /* points to the symbol_list preceding crule. */
1283:
1284: p1 = NULL;
1285:
1286: t = lex();
1287:
1288: while (t != TWO_PERCENTS && t != ENDFILE)
1289: {
1290: if (t == IDENTIFIER || t == BAR)
1291: {
1292: register int actionflag = 0;
1293: int rulelength = 0; /* number of symbols in rhs of this rule so far */
1294: int xactions = 0; /* JF for error checking */
1295: bucket *first_rhs = 0;
1296:
1297: if (t == IDENTIFIER)
1298: {
1299: lhs = symval;
1300:
1301: t = lex();
1302: if (t != COLON)
1303: fatal("ill-formed rule");
1304: }
1305:
1306: if (nrules == 0)
1307: {
1308: if (t == BAR)
1309: fatal("grammar starts with vertical bar");
1310:
1311: if (!start_flag)
1312: startval = lhs;
1313: }
1314:
1315: /* start a new rule and record its lhs. */
1316:
1317: nrules++;
1318: nitems++;
1319:
1320: record_rule_line ();
1321:
1322: p = NEW(symbol_list);
1323: p->sym = lhs;
1324:
1325: crule1 = p1;
1326: if (p1)
1327: p1->next = p;
1328: else
1329: grammar = p;
1330:
1331: p1 = p;
1332: crule = p;
1333:
1334: /* mark the rule's lhs as a nonterminal if not already so. */
1335:
1336: if (lhs->class == SUNKNOWN)
1337: {
1338: lhs->class = SNTERM;
1339: lhs->value = nvars;
1340: nvars++;
1341: }
1342: else if (lhs->class == STOKEN)
1343: fatals("rule given for %s, which is a token", lhs->tag);
1344:
1345: /* read the rhs of the rule. */
1346:
1347: for (;;)
1348: {
1349: t = lex();
1350:
1351: if (! (t == IDENTIFIER || t == LEFT_CURLY)) break;
1352:
1353: /* If next token is an identifier, see if a colon follows it.
1354: If one does, exit this rule now. */
1355: if (t == IDENTIFIER)
1356: {
1357: register bucket *ssave;
1358: register int t1;
1359:
1360: ssave = symval;
1361: t1 = lex();
1362: unlex(t1);
1363: symval = ssave;
1364: if (t1 == COLON) break;
1365:
1366: if(!first_rhs) /* JF */
1367: first_rhs = symval;
1368: /* Not followed by colon =>
1369: process as part of this rule's rhs. */
1370: }
1371:
1372: /* If we just passed an action, that action was in the middle
1373: of a rule, so make a dummy rule to reduce it to a
1374: non-terminal. */
1375: if (actionflag)
1376: {
1377: register bucket *sdummy;
1378:
1379: /* Since the action was written out with this rule's */
1380: /* number, we must write give the new rule this number */
1381: /* by inserting the new rule before it. */
1382:
1383: /* Make a dummy nonterminal, a gensym. */
1384: sdummy = gensym();
1385:
1386: /* Make a new rule, whose body is empty,
1387: before the current one, so that the action
1388: just read can belong to it. */
1389: nrules++;
1390: nitems++;
1391: record_rule_line ();
1392: p = NEW(symbol_list);
1393: if (crule1)
1394: crule1->next = p;
1395: else grammar = p;
1396: p->sym = sdummy;
1397: crule1 = NEW(symbol_list);
1398: p->next = crule1;
1399: crule1->next = crule;
1400:
1401: /* insert the dummy generated by that rule into this rule. */
1402: nitems++;
1403: p = NEW(symbol_list);
1404: p->sym = sdummy;
1405: p1->next = p;
1406: p1 = p;
1407:
1408: actionflag = 0;
1409: }
1410:
1411: if (t == IDENTIFIER)
1412: {
1413: nitems++;
1414: p = NEW(symbol_list);
1415: p->sym = symval;
1416: p1->next = p;
1417: p1 = p;
1418: }
1419: else /* handle an action. */
1420: {
1421: copy_action(crule, rulelength);
1422: actionflag = 1;
1423: xactions++; /* JF */
1424: }
1425: rulelength++;
1426: }
1427:
1428: /* Put an empty link in the list to mark the end of this rule */
1429: p = NEW(symbol_list);
1430: p1->next = p;
1431: p1 = p;
1432:
1433: if (t == PREC)
1434: {
1435: t = lex();
1436: crule->ruleprec = symval;
1437: t = lex();
1438: }
1439: if (t == GUARD)
1440: {
1441: if (! semantic_parser)
1442: fatal("%guard present but %semantic_parser not specified");
1443:
1444: copy_guard(crule, rulelength);
1445: t = lex();
1446: }
1447: else if (t == LEFT_CURLY)
1448: {
1449: if (actionflag) fatal("two actions at end of one rule");
1450: copy_action(crule, rulelength);
1451: t = lex();
1452: }
1453: /* If $$ is being set in default way,
1454: warn if any type mismatch. */
1455: else if (!xactions && first_rhs && lhs->type_name != first_rhs->type_name)
1456: {
1457: if (lhs->type_name == 0 || first_rhs->type_name == 0
1458: || strcmp(lhs->type_name,first_rhs->type_name))
1459: fprintf(stderr, "%s:%d: warning: type clash ('%s' '%s') on default action\n",
1460: infile,
1461: lineno,
1462: lhs->type_name ? lhs->type_name : "",
1463: first_rhs->type_name ? first_rhs->type_name : "");
1464: }
1465: /* Warn if there is no default for $$ but we need one. */
1466: else if (!xactions && !first_rhs && lhs->type_name != 0)
1467: fprintf(stderr,
1468: "%s:%d: warning: empty rule for typed nonterminal, and no action\n",
1469: infile,
1470: lineno);
1471: if (t == SEMICOLON)
1472: t = lex();
1473: }
1474: /* these things can appear as alternatives to rules. */
1475: else if (t == TOKEN)
1476: {
1477: parse_token_decl(STOKEN, SNTERM);
1478: t = lex();
1479: }
1480: else if (t == NTERM)
1481: {
1482: parse_token_decl(SNTERM, STOKEN);
1483: t = lex();
1484: }
1485: else if (t == TYPE)
1486: {
1487: t = get_type();
1488: }
1489: else if (t == UNION)
1490: {
1491: parse_union_decl();
1492: t = lex();
1493: }
1494: else if (t == EXPECT)
1495: {
1496: parse_expect_decl();
1497: t = lex();
1498: }
1499: else if (t == START)
1500: {
1501: parse_start_decl();
1502: t = lex();
1503: }
1504: else
1505: fatal("invalid input");
1506: }
1507:
1508: if (nsyms > MAXSHORT)
1509: fatals("too many symbols (tokens plus nonterminals); maximum %d",
1510: MAXSHORT);
1511: if (nrules == 0)
1512: fatal("no input grammar");
1513:
1514: if (typed == 0 /* JF put out same default YYSTYPE as YACC does */
1515: && !value_components_used)
1516: {
1517: /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1518: but it seems better to be consistent.
1519: Most programs should declare their own type anyway. */
1520: fprintf(fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1521: if (fdefines)
1522: fprintf(fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1523: }
1524:
1525: /* Report any undefined symbols and consider them nonterminals. */
1526:
1527: for (bp = firstsymbol; bp; bp = bp->next)
1528: if (bp->class == SUNKNOWN)
1529: {
1530: fprintf(stderr, "symbol %s used, not defined as token, and no rules for it\n",
1531: bp->tag);
1532: failure = 1;
1533: bp->class = SNTERM;
1534: bp->value = nvars++;
1535: }
1536:
1537: ntokens = nsyms - nvars;
1538: }
1539:
1540:
1541: void
1542: record_rule_line ()
1543: {
1544: /* Record each rule's source line number in rline table. */
1545:
1546: if (nrules >= rline_allocated)
1547: {
1548: rline_allocated = nrules * 2;
1549: rline = (short *) xrealloc (rline,
1550: rline_allocated * sizeof (short));
1551: }
1552: rline[nrules] = lineno;
1553: }
1554:
1555:
1556: /* read in a %type declaration and record its information for get_type_name to access */
1557:
1558: int
1559: get_type()
1560: {
1561: register int k;
1562: register int t;
1563: register char *name;
1564:
1565: t = lex();
1566:
1567: if (t != TYPENAME)
1568: fatal("ill-formed %type declaration");
1569:
1570: k = strlen(token_buffer);
1571: name = NEW2(k + 1, char);
1572: strcpy(name, token_buffer);
1573:
1574: for (;;)
1575: {
1576: t = lex();
1577:
1578: switch (t)
1579: {
1580: case SEMICOLON:
1581: return (lex());
1582:
1583: case COMMA:
1584: break;
1585:
1586: case IDENTIFIER:
1587: if (symval->type_name == NULL)
1588: symval->type_name = name;
1589: else
1590: fatals("type redeclaration for %s", symval->tag);
1591:
1592: break;
1593:
1594: default:
1595: return (t);
1596: }
1597: }
1598: }
1599:
1600:
1601:
1602: /* assign symbol numbers, and write definition of token names into fdefines.
1603: Set up vectors tags and sprec of names and precedences of symbols. */
1604:
1605: void
1606: packsymbols()
1607: {
1608: register bucket *bp;
1609: register int tokno = 1;
1610: register int i;
1611: register int last_user_token_number;
1612:
1613: /* int lossage = 0; JF set but not used */
1614:
1615: tags = NEW2(nsyms + 1, char *);
1616: tags[0] = "$";
1617:
1618: sprec = NEW2(nsyms, short);
1619: sassoc = NEW2(nsyms, short);
1620:
1621: max_user_token_number = 256;
1622: last_user_token_number = 256;
1623:
1624: for (bp = firstsymbol; bp; bp = bp->next)
1625: {
1626: if (bp->class == SNTERM)
1627: {
1628: bp->value += ntokens;
1629: }
1630: else
1631: {
1632: if (translations && !(bp->user_token_number))
1633: bp->user_token_number = ++last_user_token_number;
1634: if (bp->user_token_number > max_user_token_number)
1635: max_user_token_number = bp->user_token_number;
1636: bp->value = tokno++;
1637: }
1638:
1639: tags[bp->value] = bp->tag;
1640: sprec[bp->value] = bp->prec;
1641: sassoc[bp->value] = bp->assoc;
1642:
1643: }
1644:
1645: if (translations)
1646: {
1647: register int i;
1648:
1649: token_translations = NEW2(max_user_token_number+1, short);
1650:
1651: /* initialize all entries for literal tokens to 2,
1652: the internal token number for $illegal., which represents all invalid inputs. */
1653: for (i = 0; i <= max_user_token_number; i++)
1654: token_translations[i] = 2;
1655: }
1656:
1657: for (bp = firstsymbol; bp; bp = bp->next)
1658: {
1659: if (bp->value >= ntokens) continue;
1660: if (translations)
1661: {
1662: if (token_translations[bp->user_token_number] != 2)
1663: {
1664: /* JF made this a call to fatals() */
1665: fatals( "tokens %s and %s both assigned number %d",
1666: tags[token_translations[bp->user_token_number]],
1667: bp->tag,
1668: bp->user_token_number);
1669: }
1670: token_translations[bp->user_token_number] = bp->value;
1671: }
1672: }
1673:
1674: error_token_number = errtoken->value;
1675:
1676: output_token_defines(ftable);
1677:
1678: if (startval->class == SUNKNOWN)
1679: fatals("the start symbol %s is undefined", startval->tag);
1680: else if (startval->class == STOKEN)
1681: fatals("the start symbol %s is a token", startval->tag);
1682:
1683: start_symbol = startval->value;
1684:
1685: if (definesflag)
1686: {
1687: output_token_defines(fdefines);
1688:
1689: if (!pure_parser)
1690: {
1691: if (spec_name_prefix)
1692: fprintf(fdefines, "\nextern YYSTYPE %slval;\n", spec_name_prefix);
1693: else
1694: fprintf(fdefines, "\nextern YYSTYPE yylval;\n");
1695: }
1696:
1697: if (semantic_parser)
1698: for (i = ntokens; i < nsyms; i++)
1699: {
1700: /* don't make these for dummy nonterminals made by gensym. */
1701: if (*tags[i] != '@')
1702: fprintf(fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1703: }
1704: #if 0
1705: /* `fdefines' is now a temporary file, so we need to copy its
1706: contents in `done', so we can't close it here. */
1707: fclose(fdefines);
1708: fdefines = NULL;
1709: #endif
1710: }
1711: }
1712:
1713:
1714: void
1715: output_token_defines(file)
1716: FILE *file;
1717: {
1718: bucket *bp;
1719:
1720: for (bp = firstsymbol; bp; bp = bp->next)
1721: {
1722: if (bp->value >= ntokens) continue;
1723:
1724: /* For named tokens, but not literal ones, define the name. */
1725: /* The value is the user token number. */
1726:
1727: if ('\'' != *tags[bp->value] && bp != errtoken)
1728: {
1729: register char *cp = tags[bp->value];
1730: register char c;
1731:
1732: /* Don't #define nonliteral tokens whose names contain periods. */
1733:
1734: while ((c = *cp++) && c != '.');
1735: if (!c)
1736: {
1737: fprintf(file, "#define\t%s\t%d\n", tags[bp->value],
1738: (translations ? bp->user_token_number : bp->value));
1739: if (semantic_parser)
1740: fprintf(file, "#define\tT%s\t%d\n", tags[bp->value],
1741: bp->value);
1742: }
1743: }
1744: }
1745:
1746: putc('\n', file);
1747: }
1748:
1749:
1750:
1751: /* convert the rules into the representation using rrhs, rlhs and ritems. */
1752:
1753: void
1754: packgram()
1755: {
1756: register int itemno;
1757: register int ruleno;
1758: register symbol_list *p;
1759: /* register bucket *bp; JF unused */
1760:
1761: bucket *ruleprec;
1762:
1763: ritem = NEW2(nitems + 1, short);
1764: rlhs = NEW2(nrules, short) - 1;
1765: rrhs = NEW2(nrules, short) - 1;
1766: rprec = NEW2(nrules, short) - 1;
1767: rprecsym = NEW2(nrules, short) - 1;
1768: rassoc = NEW2(nrules, short) - 1;
1769:
1770: itemno = 0;
1771: ruleno = 1;
1772:
1773: p = grammar;
1774: while (p)
1775: {
1776: rlhs[ruleno] = p->sym->value;
1777: rrhs[ruleno] = itemno;
1778: ruleprec = p->ruleprec;
1779:
1780: p = p->next;
1781: while (p && p->sym)
1782: {
1783: ritem[itemno++] = p->sym->value;
1784: /* A rule gets by default the precedence and associativity
1785: of the last token in it. */
1786: if (p->sym->class == STOKEN)
1787: {
1788: rprec[ruleno] = p->sym->prec;
1789: rassoc[ruleno] = p->sym->assoc;
1790: }
1791: if (p) p = p->next;
1792: }
1793:
1794: /* If this rule has a %prec,
1795: the specified symbol's precedence replaces the default. */
1796: if (ruleprec)
1797: {
1798: rprec[ruleno] = ruleprec->prec;
1799: rassoc[ruleno] = ruleprec->assoc;
1800: rprecsym[ruleno] = ruleprec->value;
1801: }
1802:
1803: ritem[itemno++] = -ruleno;
1804: ruleno++;
1805:
1806: if (p) p = p->next;
1807: }
1808:
1809: ritem[itemno] = 0;
1810: }
1811:
1812: /* Read a signed integer from STREAM and return its value. */
1813:
1814: int
1815: read_signed_integer (stream)
1816: FILE *stream;
1817: {
1818: register int c = getc(stream);
1819: register int sign = 1;
1820: register int n;
1821:
1822: if (c == '-')
1823: {
1824: c = getc(stream);
1825: sign = -1;
1826: }
1827: n = 0;
1828: while (isdigit(c))
1829: {
1830: n = 10*n + (c - '0');
1831: c = getc(stream);
1832: }
1833:
1834: ungetc(c, stream);
1835:
1836: return n * sign;
1837: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.