|
|
1.1 root 1: /*-
2: * Copyright (c) 1990 The Regents of the University of California.
3: * All rights reserved.
4: *
5: * This code is derived from software contributed to Berkeley by
6: * Vern Paxson.
7: *
8: * The United States Government has rights in this work pursuant
9: * to contract no. DE-AC03-76SF00098 between the United States
10: * Department of Energy and the University of California.
11: *
12: * Redistribution and use in source and binary forms are permitted provided
13: * that: (1) source distributions retain this entire copyright notice and
14: * comment, and (2) distributions including binaries display the following
15: * acknowledgement: ``This product includes software developed by the
16: * University of California, Berkeley and its contributors'' in the
17: * documentation or other materials provided with the distribution and in
18: * all advertising materials mentioning features or use of this software.
19: * Neither the name of the University nor the names of its contributors may
20: * be used to endorse or promote products derived from this software without
21: * specific prior written permission.
22: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
23: * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
24: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
25: */
26:
27: #ifndef lint
28: static char sccsid[] = "@(#)nfa.c 5.2 (Berkeley) 6/18/90";
29: #endif /* not lint */
30:
31: /* nfa - NFA construction routines */
32:
33: #include "flexdef.h"
34:
35: /* declare functions that have forward references */
36:
37: int dupmachine PROTO((int));
38: void mkxtion PROTO((int, int));
39:
40:
41: /* add_accept - add an accepting state to a machine
42: *
43: * synopsis
44: *
45: * add_accept( mach, accepting_number );
46: *
47: * accepting_number becomes mach's accepting number.
48: */
49:
50: void add_accept( mach, accepting_number )
51: int mach, accepting_number;
52:
53: {
54: /* hang the accepting number off an epsilon state. if it is associated
55: * with a state that has a non-epsilon out-transition, then the state
56: * will accept BEFORE it makes that transition, i.e., one character
57: * too soon
58: */
59:
60: if ( transchar[finalst[mach]] == SYM_EPSILON )
61: accptnum[finalst[mach]] = accepting_number;
62:
63: else
64: {
65: int astate = mkstate( SYM_EPSILON );
66: accptnum[astate] = accepting_number;
67: mach = link_machines( mach, astate );
68: }
69: }
70:
71:
72: /* copysingl - make a given number of copies of a singleton machine
73: *
74: * synopsis
75: *
76: * newsng = copysingl( singl, num );
77: *
78: * newsng - a new singleton composed of num copies of singl
79: * singl - a singleton machine
80: * num - the number of copies of singl to be present in newsng
81: */
82:
83: int copysingl( singl, num )
84: int singl, num;
85:
86: {
87: int copy, i;
88:
89: copy = mkstate( SYM_EPSILON );
90:
91: for ( i = 1; i <= num; ++i )
92: copy = link_machines( copy, dupmachine( singl ) );
93:
94: return ( copy );
95: }
96:
97:
98: /* dumpnfa - debugging routine to write out an nfa
99: *
100: * synopsis
101: * int state1;
102: * dumpnfa( state1 );
103: */
104:
105: void dumpnfa( state1 )
106: int state1;
107:
108: {
109: int sym, tsp1, tsp2, anum, ns;
110:
111: fprintf( stderr, "\n\n********** beginning dump of nfa with start state %d\n",
112: state1 );
113:
114: /* we probably should loop starting at firstst[state1] and going to
115: * lastst[state1], but they're not maintained properly when we "or"
116: * all of the rules together. So we use our knowledge that the machine
117: * starts at state 1 and ends at lastnfa.
118: */
119:
120: /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
121: for ( ns = 1; ns <= lastnfa; ++ns )
122: {
123: fprintf( stderr, "state # %4d\t", ns );
124:
125: sym = transchar[ns];
126: tsp1 = trans1[ns];
127: tsp2 = trans2[ns];
128: anum = accptnum[ns];
129:
130: fprintf( stderr, "%3d: %4d, %4d", sym, tsp1, tsp2 );
131:
132: if ( anum != NIL )
133: fprintf( stderr, " [%d]", anum );
134:
135: fprintf( stderr, "\n" );
136: }
137:
138: fprintf( stderr, "********** end of dump\n" );
139: }
140:
141:
142: /* dupmachine - make a duplicate of a given machine
143: *
144: * synopsis
145: *
146: * copy = dupmachine( mach );
147: *
148: * copy - holds duplicate of mach
149: * mach - machine to be duplicated
150: *
151: * note that the copy of mach is NOT an exact duplicate; rather, all the
152: * transition states values are adjusted so that the copy is self-contained,
153: * as the original should have been.
154: *
155: * also note that the original MUST be contiguous, with its low and high
156: * states accessible by the arrays firstst and lastst
157: */
158:
159: int dupmachine( mach )
160: int mach;
161:
162: {
163: int i, init, state_offset;
164: int state = 0;
165: int last = lastst[mach];
166:
167: for ( i = firstst[mach]; i <= last; ++i )
168: {
169: state = mkstate( transchar[i] );
170:
171: if ( trans1[i] != NO_TRANSITION )
172: {
173: mkxtion( finalst[state], trans1[i] + state - i );
174:
175: if ( transchar[i] == SYM_EPSILON && trans2[i] != NO_TRANSITION )
176: mkxtion( finalst[state], trans2[i] + state - i );
177: }
178:
179: accptnum[state] = accptnum[i];
180: }
181:
182: if ( state == 0 )
183: flexfatal( "empty machine in dupmachine()" );
184:
185: state_offset = state - i + 1;
186:
187: init = mach + state_offset;
188: firstst[init] = firstst[mach] + state_offset;
189: finalst[init] = finalst[mach] + state_offset;
190: lastst[init] = lastst[mach] + state_offset;
191:
192: return ( init );
193: }
194:
195:
196: /* finish_rule - finish up the processing for a rule
197: *
198: * synopsis
199: *
200: * finish_rule( mach, variable_trail_rule, headcnt, trailcnt );
201: *
202: * An accepting number is added to the given machine. If variable_trail_rule
203: * is true then the rule has trailing context and both the head and trail
204: * are variable size. Otherwise if headcnt or trailcnt is non-zero then
205: * the machine recognizes a pattern with trailing context and headcnt is
206: * the number of characters in the matched part of the pattern, or zero
207: * if the matched part has variable length. trailcnt is the number of
208: * trailing context characters in the pattern, or zero if the trailing
209: * context has variable length.
210: */
211:
212: void finish_rule( mach, variable_trail_rule, headcnt, trailcnt )
213: int mach, variable_trail_rule, headcnt, trailcnt;
214:
215: {
216: add_accept( mach, num_rules );
217:
218: /* we did this in new_rule(), but it often gets the wrong
219: * number because we do it before we start parsing the current rule
220: */
221: rule_linenum[num_rules] = linenum;
222:
223: /* if this is a continued action, then the line-number has
224: * already been updated, giving us the wrong number
225: */
226: if ( continued_action )
227: --rule_linenum[num_rules];
228:
229: fprintf( temp_action_file, "case %d:\n", num_rules );
230:
231: if ( variable_trail_rule )
232: {
233: rule_type[num_rules] = RULE_VARIABLE;
234:
235: if ( performance_report )
236: fprintf( stderr, "Variable trailing context rule at line %d\n",
237: rule_linenum[num_rules] );
238:
239: variable_trailing_context_rules = true;
240: }
241:
242: else
243: {
244: rule_type[num_rules] = RULE_NORMAL;
245:
246: if ( headcnt > 0 || trailcnt > 0 )
247: {
248: /* do trailing context magic to not match the trailing characters */
249: char *scanner_cp = "yy_c_buf_p = yy_cp";
250: char *scanner_bp = "yy_bp";
251:
252: fprintf( temp_action_file,
253: "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" );
254:
255: if ( headcnt > 0 )
256: fprintf( temp_action_file, "%s = %s + %d;\n",
257: scanner_cp, scanner_bp, headcnt );
258:
259: else
260: fprintf( temp_action_file,
261: "%s -= %d;\n", scanner_cp, trailcnt );
262:
263: fprintf( temp_action_file,
264: "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" );
265: }
266: }
267:
268: line_directive_out( temp_action_file );
269: }
270:
271:
272: /* link_machines - connect two machines together
273: *
274: * synopsis
275: *
276: * new = link_machines( first, last );
277: *
278: * new - a machine constructed by connecting first to last
279: * first - the machine whose successor is to be last
280: * last - the machine whose predecessor is to be first
281: *
282: * note: this routine concatenates the machine first with the machine
283: * last to produce a machine new which will pattern-match first first
284: * and then last, and will fail if either of the sub-patterns fails.
285: * FIRST is set to new by the operation. last is unmolested.
286: */
287:
288: int link_machines( first, last )
289: int first, last;
290:
291: {
292: if ( first == NIL )
293: return ( last );
294:
295: else if ( last == NIL )
296: return ( first );
297:
298: else
299: {
300: mkxtion( finalst[first], last );
301: finalst[first] = finalst[last];
302: lastst[first] = max( lastst[first], lastst[last] );
303: firstst[first] = min( firstst[first], firstst[last] );
304:
305: return ( first );
306: }
307: }
308:
309:
310: /* mark_beginning_as_normal - mark each "beginning" state in a machine
311: * as being a "normal" (i.e., not trailing context-
312: * associated) states
313: *
314: * synopsis
315: *
316: * mark_beginning_as_normal( mach )
317: *
318: * mach - machine to mark
319: *
320: * The "beginning" states are the epsilon closure of the first state
321: */
322:
323: void mark_beginning_as_normal( mach )
324: register int mach;
325:
326: {
327: switch ( state_type[mach] )
328: {
329: case STATE_NORMAL:
330: /* oh, we've already visited here */
331: return;
332:
333: case STATE_TRAILING_CONTEXT:
334: state_type[mach] = STATE_NORMAL;
335:
336: if ( transchar[mach] == SYM_EPSILON )
337: {
338: if ( trans1[mach] != NO_TRANSITION )
339: mark_beginning_as_normal( trans1[mach] );
340:
341: if ( trans2[mach] != NO_TRANSITION )
342: mark_beginning_as_normal( trans2[mach] );
343: }
344: break;
345:
346: default:
347: flexerror( "bad state type in mark_beginning_as_normal()" );
348: break;
349: }
350: }
351:
352:
353: /* mkbranch - make a machine that branches to two machines
354: *
355: * synopsis
356: *
357: * branch = mkbranch( first, second );
358: *
359: * branch - a machine which matches either first's pattern or second's
360: * first, second - machines whose patterns are to be or'ed (the | operator)
361: *
362: * note that first and second are NEITHER destroyed by the operation. Also,
363: * the resulting machine CANNOT be used with any other "mk" operation except
364: * more mkbranch's. Compare with mkor()
365: */
366:
367: int mkbranch( first, second )
368: int first, second;
369:
370: {
371: int eps;
372:
373: if ( first == NO_TRANSITION )
374: return ( second );
375:
376: else if ( second == NO_TRANSITION )
377: return ( first );
378:
379: eps = mkstate( SYM_EPSILON );
380:
381: mkxtion( eps, first );
382: mkxtion( eps, second );
383:
384: return ( eps );
385: }
386:
387:
388: /* mkclos - convert a machine into a closure
389: *
390: * synopsis
391: * new = mkclos( state );
392: *
393: * new - a new state which matches the closure of "state"
394: */
395:
396: int mkclos( state )
397: int state;
398:
399: {
400: return ( mkopt( mkposcl( state ) ) );
401: }
402:
403:
404: /* mkopt - make a machine optional
405: *
406: * synopsis
407: *
408: * new = mkopt( mach );
409: *
410: * new - a machine which optionally matches whatever mach matched
411: * mach - the machine to make optional
412: *
413: * notes:
414: * 1. mach must be the last machine created
415: * 2. mach is destroyed by the call
416: */
417:
418: int mkopt( mach )
419: int mach;
420:
421: {
422: int eps;
423:
424: if ( ! SUPER_FREE_EPSILON(finalst[mach]) )
425: {
426: eps = mkstate( SYM_EPSILON );
427: mach = link_machines( mach, eps );
428: }
429:
430: /* can't skimp on the following if FREE_EPSILON(mach) is true because
431: * some state interior to "mach" might point back to the beginning
432: * for a closure
433: */
434: eps = mkstate( SYM_EPSILON );
435: mach = link_machines( eps, mach );
436:
437: mkxtion( mach, finalst[mach] );
438:
439: return ( mach );
440: }
441:
442:
443: /* mkor - make a machine that matches either one of two machines
444: *
445: * synopsis
446: *
447: * new = mkor( first, second );
448: *
449: * new - a machine which matches either first's pattern or second's
450: * first, second - machines whose patterns are to be or'ed (the | operator)
451: *
452: * note that first and second are both destroyed by the operation
453: * the code is rather convoluted because an attempt is made to minimize
454: * the number of epsilon states needed
455: */
456:
457: int mkor( first, second )
458: int first, second;
459:
460: {
461: int eps, orend;
462:
463: if ( first == NIL )
464: return ( second );
465:
466: else if ( second == NIL )
467: return ( first );
468:
469: else
470: {
471: /* see comment in mkopt() about why we can't use the first state
472: * of "first" or "second" if they satisfy "FREE_EPSILON"
473: */
474: eps = mkstate( SYM_EPSILON );
475:
476: first = link_machines( eps, first );
477:
478: mkxtion( first, second );
479:
480: if ( SUPER_FREE_EPSILON(finalst[first]) &&
481: accptnum[finalst[first]] == NIL )
482: {
483: orend = finalst[first];
484: mkxtion( finalst[second], orend );
485: }
486:
487: else if ( SUPER_FREE_EPSILON(finalst[second]) &&
488: accptnum[finalst[second]] == NIL )
489: {
490: orend = finalst[second];
491: mkxtion( finalst[first], orend );
492: }
493:
494: else
495: {
496: eps = mkstate( SYM_EPSILON );
497:
498: first = link_machines( first, eps );
499: orend = finalst[first];
500:
501: mkxtion( finalst[second], orend );
502: }
503: }
504:
505: finalst[first] = orend;
506: return ( first );
507: }
508:
509:
510: /* mkposcl - convert a machine into a positive closure
511: *
512: * synopsis
513: * new = mkposcl( state );
514: *
515: * new - a machine matching the positive closure of "state"
516: */
517:
518: int mkposcl( state )
519: int state;
520:
521: {
522: int eps;
523:
524: if ( SUPER_FREE_EPSILON(finalst[state]) )
525: {
526: mkxtion( finalst[state], state );
527: return ( state );
528: }
529:
530: else
531: {
532: eps = mkstate( SYM_EPSILON );
533: mkxtion( eps, state );
534: return ( link_machines( state, eps ) );
535: }
536: }
537:
538:
539: /* mkrep - make a replicated machine
540: *
541: * synopsis
542: * new = mkrep( mach, lb, ub );
543: *
544: * new - a machine that matches whatever "mach" matched from "lb"
545: * number of times to "ub" number of times
546: *
547: * note
548: * if "ub" is INFINITY then "new" matches "lb" or more occurrences of "mach"
549: */
550:
551: int mkrep( mach, lb, ub )
552: int mach, lb, ub;
553:
554: {
555: int base_mach, tail, copy, i;
556:
557: base_mach = copysingl( mach, lb - 1 );
558:
559: if ( ub == INFINITY )
560: {
561: copy = dupmachine( mach );
562: mach = link_machines( mach,
563: link_machines( base_mach, mkclos( copy ) ) );
564: }
565:
566: else
567: {
568: tail = mkstate( SYM_EPSILON );
569:
570: for ( i = lb; i < ub; ++i )
571: {
572: copy = dupmachine( mach );
573: tail = mkopt( link_machines( copy, tail ) );
574: }
575:
576: mach = link_machines( mach, link_machines( base_mach, tail ) );
577: }
578:
579: return ( mach );
580: }
581:
582:
583: /* mkstate - create a state with a transition on a given symbol
584: *
585: * synopsis
586: *
587: * state = mkstate( sym );
588: *
589: * state - a new state matching sym
590: * sym - the symbol the new state is to have an out-transition on
591: *
592: * note that this routine makes new states in ascending order through the
593: * state array (and increments LASTNFA accordingly). The routine DUPMACHINE
594: * relies on machines being made in ascending order and that they are
595: * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge
596: * that it admittedly is)
597: */
598:
599: int mkstate( sym )
600: int sym;
601:
602: {
603: if ( ++lastnfa >= current_mns )
604: {
605: if ( (current_mns += MNS_INCREMENT) >= MAXIMUM_MNS )
606: lerrif( "input rules are too complicated (>= %d NFA states)",
607: current_mns );
608:
609: ++num_reallocs;
610:
611: firstst = reallocate_integer_array( firstst, current_mns );
612: lastst = reallocate_integer_array( lastst, current_mns );
613: finalst = reallocate_integer_array( finalst, current_mns );
614: transchar = reallocate_integer_array( transchar, current_mns );
615: trans1 = reallocate_integer_array( trans1, current_mns );
616: trans2 = reallocate_integer_array( trans2, current_mns );
617: accptnum = reallocate_integer_array( accptnum, current_mns );
618: assoc_rule = reallocate_integer_array( assoc_rule, current_mns );
619: state_type = reallocate_integer_array( state_type, current_mns );
620: }
621:
622: firstst[lastnfa] = lastnfa;
623: finalst[lastnfa] = lastnfa;
624: lastst[lastnfa] = lastnfa;
625: transchar[lastnfa] = sym;
626: trans1[lastnfa] = NO_TRANSITION;
627: trans2[lastnfa] = NO_TRANSITION;
628: accptnum[lastnfa] = NIL;
629: assoc_rule[lastnfa] = num_rules;
630: state_type[lastnfa] = current_state_type;
631:
632: /* fix up equivalence classes base on this transition. Note that any
633: * character which has its own transition gets its own equivalence class.
634: * Thus only characters which are only in character classes have a chance
635: * at being in the same equivalence class. E.g. "a|b" puts 'a' and 'b'
636: * into two different equivalence classes. "[ab]" puts them in the same
637: * equivalence class (barring other differences elsewhere in the input).
638: */
639:
640: if ( sym < 0 )
641: {
642: /* we don't have to update the equivalence classes since that was
643: * already done when the ccl was created for the first time
644: */
645: }
646:
647: else if ( sym == SYM_EPSILON )
648: ++numeps;
649:
650: else
651: {
652: if ( useecs )
653: /* map NUL's to csize */
654: mkechar( sym ? sym : csize, nextecm, ecgroup );
655: }
656:
657: return ( lastnfa );
658: }
659:
660:
661: /* mkxtion - make a transition from one state to another
662: *
663: * synopsis
664: *
665: * mkxtion( statefrom, stateto );
666: *
667: * statefrom - the state from which the transition is to be made
668: * stateto - the state to which the transition is to be made
669: */
670:
671: void mkxtion( statefrom, stateto )
672: int statefrom, stateto;
673:
674: {
675: if ( trans1[statefrom] == NO_TRANSITION )
676: trans1[statefrom] = stateto;
677:
678: else if ( (transchar[statefrom] != SYM_EPSILON) ||
679: (trans2[statefrom] != NO_TRANSITION) )
680: flexfatal( "found too many transitions in mkxtion()" );
681:
682: else
683: { /* second out-transition for an epsilon state */
684: ++eps2;
685: trans2[statefrom] = stateto;
686: }
687: }
688:
689: /* new_rule - initialize for a new rule
690: *
691: * synopsis
692: *
693: * new_rule();
694: *
695: * the global num_rules is incremented and the any corresponding dynamic
696: * arrays (such as rule_type[]) are grown as needed.
697: */
698:
699: void new_rule()
700:
701: {
702: if ( ++num_rules >= current_max_rules )
703: {
704: ++num_reallocs;
705: current_max_rules += MAX_RULES_INCREMENT;
706: rule_type = reallocate_integer_array( rule_type, current_max_rules );
707: rule_linenum =
708: reallocate_integer_array( rule_linenum, current_max_rules );
709: }
710:
711: if ( num_rules > MAX_RULE )
712: lerrif( "too many rules (> %d)!", MAX_RULE );
713:
714: rule_linenum[num_rules] = linenum;
715: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.