|
|
1.1 root 1: /* parse.y - parser for flex input */
2:
3: %token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL EOF_OP
4:
5: %{
6:
7: /*-
8: * Copyright (c) 1990 The Regents of the University of California.
9: * All rights reserved.
10: *
11: * This code is derived from software contributed to Berkeley by
12: * Vern Paxson.
13: *
14: * The United States Government has rights in this work pursuant
15: * to contract no. DE-AC03-76SF00098 between the United States
16: * Department of Energy and the University of California.
17: *
18: * Redistribution and use in source and binary forms are permitted provided
19: * that: (1) source distributions retain this entire copyright notice and
20: * comment, and (2) distributions including binaries display the following
21: * acknowledgement: ``This product includes software developed by the
22: * University of California, Berkeley and its contributors'' in the
23: * documentation or other materials provided with the distribution and in
24: * all advertising materials mentioning features or use of this software.
25: * Neither the name of the University nor the names of its contributors may
26: * be used to endorse or promote products derived from this software without
27: * specific prior written permission.
28: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
29: * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
30: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
31: */
32:
33: #ifndef lint
34: static char sccsid[] = "@(#)parse.y 5.2 (Berkeley) 6/18/90";
35: #endif /* not lint */
36:
37: #include "flexdef.h"
38:
39: int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen;
40: int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule;
41: Char clower();
42:
43: static int madeany = false; /* whether we've made the '.' character class */
44: int previous_continued_action; /* whether the previous rule's action was '|' */
45:
46: %}
47:
48: %%
49: goal : initlex sect1 sect1end sect2 initforrule
50: { /* add default rule */
51: int def_rule;
52:
53: pat = cclinit();
54: cclnegate( pat );
55:
56: def_rule = mkstate( -pat );
57:
58: finish_rule( def_rule, false, 0, 0 );
59:
60: for ( i = 1; i <= lastsc; ++i )
61: scset[i] = mkbranch( scset[i], def_rule );
62:
63: if ( spprdflt )
64: fputs( "YY_FATAL_ERROR( \"flex scanner jammed\" )",
65: temp_action_file );
66: else
67: fputs( "ECHO", temp_action_file );
68:
69: fputs( ";\n\tYY_BREAK\n", temp_action_file );
70: }
71: ;
72:
73: initlex :
74: {
75: /* initialize for processing rules */
76:
77: /* create default DFA start condition */
78: scinstal( "INITIAL", false );
79: }
80: ;
81:
82: sect1 : sect1 startconddecl WHITESPACE namelist1 '\n'
83: |
84: | error '\n'
85: { synerr( "unknown error processing section 1" ); }
86: ;
87:
88: sect1end : SECTEND
89: ;
90:
91: startconddecl : SCDECL
92: {
93: /* these productions are separate from the s1object
94: * rule because the semantics must be done before
95: * we parse the remainder of an s1object
96: */
97:
98: xcluflg = false;
99: }
100:
101: | XSCDECL
102: { xcluflg = true; }
103: ;
104:
105: namelist1 : namelist1 WHITESPACE NAME
106: { scinstal( nmstr, xcluflg ); }
107:
108: | NAME
109: { scinstal( nmstr, xcluflg ); }
110:
111: | error
112: { synerr( "bad start condition list" ); }
113: ;
114:
115: sect2 : sect2 initforrule flexrule '\n'
116: |
117: ;
118:
119: initforrule :
120: {
121: /* initialize for a parse of one rule */
122: trlcontxt = variable_trail_rule = varlength = false;
123: trailcnt = headcnt = rulelen = 0;
124: current_state_type = STATE_NORMAL;
125: previous_continued_action = continued_action;
126: new_rule();
127: }
128: ;
129:
130: flexrule : scon '^' rule
131: {
132: pat = $3;
133: finish_rule( pat, variable_trail_rule,
134: headcnt, trailcnt );
135:
136: for ( i = 1; i <= actvp; ++i )
137: scbol[actvsc[i]] =
138: mkbranch( scbol[actvsc[i]], pat );
139:
140: if ( ! bol_needed )
141: {
142: bol_needed = true;
143:
144: if ( performance_report )
145: pinpoint_message(
146: "'^' operator results in sub-optimal performance" );
147: }
148: }
149:
150: | scon rule
151: {
152: pat = $2;
153: finish_rule( pat, variable_trail_rule,
154: headcnt, trailcnt );
155:
156: for ( i = 1; i <= actvp; ++i )
157: scset[actvsc[i]] =
158: mkbranch( scset[actvsc[i]], pat );
159: }
160:
161: | '^' rule
162: {
163: pat = $2;
164: finish_rule( pat, variable_trail_rule,
165: headcnt, trailcnt );
166:
167: /* add to all non-exclusive start conditions,
168: * including the default (0) start condition
169: */
170:
171: for ( i = 1; i <= lastsc; ++i )
172: if ( ! scxclu[i] )
173: scbol[i] = mkbranch( scbol[i], pat );
174:
175: if ( ! bol_needed )
176: {
177: bol_needed = true;
178:
179: if ( performance_report )
180: pinpoint_message(
181: "'^' operator results in sub-optimal performance" );
182: }
183: }
184:
185: | rule
186: {
187: pat = $1;
188: finish_rule( pat, variable_trail_rule,
189: headcnt, trailcnt );
190:
191: for ( i = 1; i <= lastsc; ++i )
192: if ( ! scxclu[i] )
193: scset[i] = mkbranch( scset[i], pat );
194: }
195:
196: | scon EOF_OP
197: { build_eof_action(); }
198:
199: | EOF_OP
200: {
201: /* this EOF applies to all start conditions
202: * which don't already have EOF actions
203: */
204: actvp = 0;
205:
206: for ( i = 1; i <= lastsc; ++i )
207: if ( ! sceof[i] )
208: actvsc[++actvp] = i;
209:
210: if ( actvp == 0 )
211: pinpoint_message(
212: "warning - all start conditions already have <<EOF>> rules" );
213:
214: else
215: build_eof_action();
216: }
217:
218: | error
219: { synerr( "unrecognized rule" ); }
220: ;
221:
222: scon : '<' namelist2 '>'
223: ;
224:
225: namelist2 : namelist2 ',' NAME
226: {
227: if ( (scnum = sclookup( nmstr )) == 0 )
228: format_pinpoint_message(
229: "undeclared start condition %s", nmstr );
230:
231: else
232: actvsc[++actvp] = scnum;
233: }
234:
235: | NAME
236: {
237: if ( (scnum = sclookup( nmstr )) == 0 )
238: format_pinpoint_message(
239: "undeclared start condition %s", nmstr );
240: else
241: actvsc[actvp = 1] = scnum;
242: }
243:
244: | error
245: { synerr( "bad start condition list" ); }
246: ;
247:
248: rule : re2 re
249: {
250: if ( transchar[lastst[$2]] != SYM_EPSILON )
251: /* provide final transition \now/ so it
252: * will be marked as a trailing context
253: * state
254: */
255: $2 = link_machines( $2, mkstate( SYM_EPSILON ) );
256:
257: mark_beginning_as_normal( $2 );
258: current_state_type = STATE_NORMAL;
259:
260: if ( previous_continued_action )
261: {
262: /* we need to treat this as variable trailing
263: * context so that the backup does not happen
264: * in the action but before the action switch
265: * statement. If the backup happens in the
266: * action, then the rules "falling into" this
267: * one's action will *also* do the backup,
268: * erroneously.
269: */
270: if ( ! varlength || headcnt != 0 )
271: {
272: fprintf( stderr,
273: "%s: warning - trailing context rule at line %d made variable because\n",
274: program_name, linenum );
275: fprintf( stderr,
276: " of preceding '|' action\n" );
277: }
278:
279: /* mark as variable */
280: varlength = true;
281: headcnt = 0;
282: }
283:
284: if ( varlength && headcnt == 0 )
285: { /* variable trailing context rule */
286: /* mark the first part of the rule as the accepting
287: * "head" part of a trailing context rule
288: */
289: /* by the way, we didn't do this at the beginning
290: * of this production because back then
291: * current_state_type was set up for a trail
292: * rule, and add_accept() can create a new
293: * state ...
294: */
295: add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK );
296: variable_trail_rule = true;
297: }
298:
299: else
300: trailcnt = rulelen;
301:
302: $$ = link_machines( $1, $2 );
303: }
304:
305: | re2 re '$'
306: { synerr( "trailing context used twice" ); }
307:
308: | re '$'
309: {
310: if ( trlcontxt )
311: {
312: synerr( "trailing context used twice" );
313: $$ = mkstate( SYM_EPSILON );
314: }
315:
316: else if ( previous_continued_action )
317: {
318: /* see the comment in the rule for "re2 re"
319: * above
320: */
321: if ( ! varlength || headcnt != 0 )
322: {
323: fprintf( stderr,
324: "%s: warning - trailing context rule at line %d made variable because\n",
325: program_name, linenum );
326: fprintf( stderr,
327: " of preceding '|' action\n" );
328: }
329:
330: /* mark as variable */
331: varlength = true;
332: headcnt = 0;
333: }
334:
335: trlcontxt = true;
336:
337: if ( ! varlength )
338: headcnt = rulelen;
339:
340: ++rulelen;
341: trailcnt = 1;
342:
343: eps = mkstate( SYM_EPSILON );
344: $$ = link_machines( $1,
345: link_machines( eps, mkstate( '\n' ) ) );
346: }
347:
348: | re
349: {
350: $$ = $1;
351:
352: if ( trlcontxt )
353: {
354: if ( varlength && headcnt == 0 )
355: /* both head and trail are variable-length */
356: variable_trail_rule = true;
357: else
358: trailcnt = rulelen;
359: }
360: }
361: ;
362:
363:
364: re : re '|' series
365: {
366: varlength = true;
367: $$ = mkor( $1, $3 );
368: }
369:
370: | series
371: { $$ = $1; }
372: ;
373:
374:
375: re2 : re '/'
376: {
377: /* this rule is written separately so
378: * the reduction will occur before the trailing
379: * series is parsed
380: */
381:
382: if ( trlcontxt )
383: synerr( "trailing context used twice" );
384: else
385: trlcontxt = true;
386:
387: if ( varlength )
388: /* we hope the trailing context is fixed-length */
389: varlength = false;
390: else
391: headcnt = rulelen;
392:
393: rulelen = 0;
394:
395: current_state_type = STATE_TRAILING_CONTEXT;
396: $$ = $1;
397: }
398: ;
399:
400: series : series singleton
401: {
402: /* this is where concatenation of adjacent patterns
403: * gets done
404: */
405: $$ = link_machines( $1, $2 );
406: }
407:
408: | singleton
409: { $$ = $1; }
410: ;
411:
412: singleton : singleton '*'
413: {
414: varlength = true;
415:
416: $$ = mkclos( $1 );
417: }
418:
419: | singleton '+'
420: {
421: varlength = true;
422:
423: $$ = mkposcl( $1 );
424: }
425:
426: | singleton '?'
427: {
428: varlength = true;
429:
430: $$ = mkopt( $1 );
431: }
432:
433: | singleton '{' NUMBER ',' NUMBER '}'
434: {
435: varlength = true;
436:
437: if ( $3 > $5 || $3 < 0 )
438: {
439: synerr( "bad iteration values" );
440: $$ = $1;
441: }
442: else
443: {
444: if ( $3 == 0 )
445: $$ = mkopt( mkrep( $1, $3, $5 ) );
446: else
447: $$ = mkrep( $1, $3, $5 );
448: }
449: }
450:
451: | singleton '{' NUMBER ',' '}'
452: {
453: varlength = true;
454:
455: if ( $3 <= 0 )
456: {
457: synerr( "iteration value must be positive" );
458: $$ = $1;
459: }
460:
461: else
462: $$ = mkrep( $1, $3, INFINITY );
463: }
464:
465: | singleton '{' NUMBER '}'
466: {
467: /* the singleton could be something like "(foo)",
468: * in which case we have no idea what its length
469: * is, so we punt here.
470: */
471: varlength = true;
472:
473: if ( $3 <= 0 )
474: {
475: synerr( "iteration value must be positive" );
476: $$ = $1;
477: }
478:
479: else
480: $$ = link_machines( $1, copysingl( $1, $3 - 1 ) );
481: }
482:
483: | '.'
484: {
485: if ( ! madeany )
486: {
487: /* create the '.' character class */
488: anyccl = cclinit();
489: ccladd( anyccl, '\n' );
490: cclnegate( anyccl );
491:
492: if ( useecs )
493: mkeccl( ccltbl + cclmap[anyccl],
494: ccllen[anyccl], nextecm,
495: ecgroup, csize, csize );
496:
497: madeany = true;
498: }
499:
500: ++rulelen;
501:
502: $$ = mkstate( -anyccl );
503: }
504:
505: | fullccl
506: {
507: if ( ! cclsorted )
508: /* sort characters for fast searching. We use a
509: * shell sort since this list could be large.
510: */
511: cshell( ccltbl + cclmap[$1], ccllen[$1], true );
512:
513: if ( useecs )
514: mkeccl( ccltbl + cclmap[$1], ccllen[$1],
515: nextecm, ecgroup, csize, csize );
516:
517: ++rulelen;
518:
519: $$ = mkstate( -$1 );
520: }
521:
522: | PREVCCL
523: {
524: ++rulelen;
525:
526: $$ = mkstate( -$1 );
527: }
528:
529: | '"' string '"'
530: { $$ = $2; }
531:
532: | '(' re ')'
533: { $$ = $2; }
534:
535: | CHAR
536: {
537: ++rulelen;
538:
539: if ( caseins && $1 >= 'A' && $1 <= 'Z' )
540: $1 = clower( $1 );
541:
542: $$ = mkstate( $1 );
543: }
544: ;
545:
546: fullccl : '[' ccl ']'
547: { $$ = $2; }
548:
549: | '[' '^' ccl ']'
550: {
551: /* *Sigh* - to be compatible Unix lex, negated ccls
552: * match newlines
553: */
554: #ifdef NOTDEF
555: ccladd( $3, '\n' ); /* negated ccls don't match '\n' */
556: cclsorted = false; /* because we added the newline */
557: #endif
558: cclnegate( $3 );
559: $$ = $3;
560: }
561: ;
562:
563: ccl : ccl CHAR '-' CHAR
564: {
565: if ( $2 > $4 )
566: synerr( "negative range in character class" );
567:
568: else
569: {
570: if ( caseins )
571: {
572: if ( $2 >= 'A' && $2 <= 'Z' )
573: $2 = clower( $2 );
574: if ( $4 >= 'A' && $4 <= 'Z' )
575: $4 = clower( $4 );
576: }
577:
578: for ( i = $2; i <= $4; ++i )
579: ccladd( $1, i );
580:
581: /* keep track if this ccl is staying in alphabetical
582: * order
583: */
584: cclsorted = cclsorted && ($2 > lastchar);
585: lastchar = $4;
586: }
587:
588: $$ = $1;
589: }
590:
591: | ccl CHAR
592: {
593: if ( caseins )
594: if ( $2 >= 'A' && $2 <= 'Z' )
595: $2 = clower( $2 );
596:
597: ccladd( $1, $2 );
598: cclsorted = cclsorted && ($2 > lastchar);
599: lastchar = $2;
600: $$ = $1;
601: }
602:
603: |
604: {
605: cclsorted = true;
606: lastchar = 0;
607: $$ = cclinit();
608: }
609: ;
610:
611: string : string CHAR
612: {
613: if ( caseins )
614: if ( $2 >= 'A' && $2 <= 'Z' )
615: $2 = clower( $2 );
616:
617: ++rulelen;
618:
619: $$ = link_machines( $1, mkstate( $2 ) );
620: }
621:
622: |
623: { $$ = mkstate( SYM_EPSILON ); }
624: ;
625:
626: %%
627:
628:
629: /* build_eof_action - build the "<<EOF>>" action for the active start
630: * conditions
631: */
632:
633: void build_eof_action()
634:
635: {
636: register int i;
637:
638: for ( i = 1; i <= actvp; ++i )
639: {
640: if ( sceof[actvsc[i]] )
641: format_pinpoint_message(
642: "multiple <<EOF>> rules for start condition %s",
643: scname[actvsc[i]] );
644:
645: else
646: {
647: sceof[actvsc[i]] = true;
648: fprintf( temp_action_file, "case YY_STATE_EOF(%s):\n",
649: scname[actvsc[i]] );
650: }
651: }
652:
653: line_directive_out( temp_action_file );
654: }
655:
656:
657: /* synerr - report a syntax error */
658:
659: void synerr( str )
660: char str[];
661:
662: {
663: syntaxerror = true;
664: pinpoint_message( str );
665: }
666:
667:
668: /* format_pinpoint_message - write out a message formatted with one string,
669: * pinpointing its location
670: */
671:
672: void format_pinpoint_message( msg, arg )
673: char msg[], arg[];
674:
675: {
676: char errmsg[MAXLINE];
677:
678: (void) sprintf( errmsg, msg, arg );
679: pinpoint_message( errmsg );
680: }
681:
682:
683: /* pinpoint_message - write out a message, pinpointing its location */
684:
685: void pinpoint_message( str )
686: char str[];
687:
688: {
689: fprintf( stderr, "\"%s\", line %d: %s\n", infilename, linenum, str );
690: }
691:
692:
693: /* yyerror - eat up an error message from the parser;
694: * currently, messages are ignore
695: */
696:
697: void yyerror( msg )
698: char msg[];
699:
700: {
701: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.