|
|
1.1 root 1: /*-
2: * Copyright (c) 1990 The Regents of the University of California.
3: * All rights reserved.
4: *
5: * This code is derived from software contributed to Berkeley by
6: * Vern Paxson.
7: *
8: * The United States Government has rights in this work pursuant
9: * to contract no. DE-AC03-76SF00098 between the United States
10: * Department of Energy and the University of California.
11: *
12: * Redistribution and use in source and binary forms are permitted provided
13: * that: (1) source distributions retain this entire copyright notice and
14: * comment, and (2) distributions including binaries display the following
15: * acknowledgement: ``This product includes software developed by the
16: * University of California, Berkeley and its contributors'' in the
17: * documentation or other materials provided with the distribution and in
18: * all advertising materials mentioning features or use of this software.
19: * Neither the name of the University nor the names of its contributors may
20: * be used to endorse or promote products derived from this software without
21: * specific prior written permission.
22: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
23: * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
24: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
25: */
26:
27: #ifndef lint
28: static char sccsid[] = "@(#)dfa.c 5.2 (Berkeley) 6/18/90";
29: #endif /* not lint */
30:
31: /* dfa - DFA construction routines */
32:
33: #include "flexdef.h"
34:
35: /* declare functions that have forward references */
36:
37: void dump_associated_rules PROTO((FILE*, int));
38: void dump_transitions PROTO((FILE*, int[]));
39: void sympartition PROTO((int[], int, int[], int[]));
40: int symfollowset PROTO((int[], int, int, int[]));
41:
42:
43: /* check_for_backtracking - check a DFA state for backtracking
44: *
45: * synopsis
46: * int ds, state[numecs];
47: * check_for_backtracking( ds, state );
48: *
49: * ds is the number of the state to check and state[] is its out-transitions,
50: * indexed by equivalence class, and state_rules[] is the set of rules
51: * associated with this state
52: */
53:
54: void check_for_backtracking( ds, state )
55: int ds;
56: int state[];
57:
58: {
59: if ( (reject && ! dfaacc[ds].dfaacc_set) || ! dfaacc[ds].dfaacc_state )
60: { /* state is non-accepting */
61: ++num_backtracking;
62:
63: if ( backtrack_report )
64: {
65: fprintf( backtrack_file, "State #%d is non-accepting -\n", ds );
66:
67: /* identify the state */
68: dump_associated_rules( backtrack_file, ds );
69:
70: /* now identify it further using the out- and jam-transitions */
71: dump_transitions( backtrack_file, state );
72:
73: putc( '\n', backtrack_file );
74: }
75: }
76: }
77:
78:
79: /* check_trailing_context - check to see if NFA state set constitutes
80: * "dangerous" trailing context
81: *
82: * synopsis
83: * int nfa_states[num_states+1], num_states;
84: * int accset[nacc+1], nacc;
85: * check_trailing_context( nfa_states, num_states, accset, nacc );
86: *
87: * NOTES
88: * Trailing context is "dangerous" if both the head and the trailing
89: * part are of variable size \and/ there's a DFA state which contains
90: * both an accepting state for the head part of the rule and NFA states
91: * which occur after the beginning of the trailing context.
92: * When such a rule is matched, it's impossible to tell if having been
93: * in the DFA state indicates the beginning of the trailing context
94: * or further-along scanning of the pattern. In these cases, a warning
95: * message is issued.
96: *
97: * nfa_states[1 .. num_states] is the list of NFA states in the DFA.
98: * accset[1 .. nacc] is the list of accepting numbers for the DFA state.
99: */
100:
101: void check_trailing_context( nfa_states, num_states, accset, nacc )
102: int *nfa_states, num_states;
103: int *accset;
104: register int nacc;
105:
106: {
107: register int i, j;
108:
109: for ( i = 1; i <= num_states; ++i )
110: {
111: int ns = nfa_states[i];
112: register int type = state_type[ns];
113: register int ar = assoc_rule[ns];
114:
115: if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE )
116: { /* do nothing */
117: }
118:
119: else if ( type == STATE_TRAILING_CONTEXT )
120: {
121: /* potential trouble. Scan set of accepting numbers for
122: * the one marking the end of the "head". We assume that
123: * this looping will be fairly cheap since it's rare that
124: * an accepting number set is large.
125: */
126: for ( j = 1; j <= nacc; ++j )
127: if ( accset[j] & YY_TRAILING_HEAD_MASK )
128: {
129: fprintf( stderr,
130: "%s: Dangerous trailing context in rule at line %d\n",
131: program_name, rule_linenum[ar] );
132: return;
133: }
134: }
135: }
136: }
137:
138:
139: /* dump_associated_rules - list the rules associated with a DFA state
140: *
141: * synopisis
142: * int ds;
143: * FILE *file;
144: * dump_associated_rules( file, ds );
145: *
146: * goes through the set of NFA states associated with the DFA and
147: * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
148: * and writes a report to the given file
149: */
150:
151: void dump_associated_rules( file, ds )
152: FILE *file;
153: int ds;
154:
155: {
156: register int i, j;
157: register int num_associated_rules = 0;
158: int rule_set[MAX_ASSOC_RULES + 1];
159: int *dset = dss[ds];
160: int size = dfasiz[ds];
161:
162: for ( i = 1; i <= size; ++i )
163: {
164: register rule_num = rule_linenum[assoc_rule[dset[i]]];
165:
166: for ( j = 1; j <= num_associated_rules; ++j )
167: if ( rule_num == rule_set[j] )
168: break;
169:
170: if ( j > num_associated_rules )
171: { /* new rule */
172: if ( num_associated_rules < MAX_ASSOC_RULES )
173: rule_set[++num_associated_rules] = rule_num;
174: }
175: }
176:
177: bubble( rule_set, num_associated_rules );
178:
179: fprintf( file, " associated rule line numbers:" );
180:
181: for ( i = 1; i <= num_associated_rules; ++i )
182: {
183: if ( i % 8 == 1 )
184: putc( '\n', file );
185:
186: fprintf( file, "\t%d", rule_set[i] );
187: }
188:
189: putc( '\n', file );
190: }
191:
192:
193: /* dump_transitions - list the transitions associated with a DFA state
194: *
195: * synopisis
196: * int state[numecs];
197: * FILE *file;
198: * dump_transitions( file, state );
199: *
200: * goes through the set of out-transitions and lists them in human-readable
201: * form (i.e., not as equivalence classes); also lists jam transitions
202: * (i.e., all those which are not out-transitions, plus EOF). The dump
203: * is done to the given file.
204: */
205:
206: void dump_transitions( file, state )
207: FILE *file;
208: int state[];
209:
210: {
211: register int i, ec;
212: int out_char_set[CSIZE];
213:
214: for ( i = 0; i < csize; ++i )
215: {
216: ec = abs( ecgroup[i] );
217: out_char_set[i] = state[ec];
218: }
219:
220: fprintf( file, " out-transitions: " );
221:
222: list_character_set( file, out_char_set );
223:
224: /* now invert the members of the set to get the jam transitions */
225: for ( i = 0; i < csize; ++i )
226: out_char_set[i] = ! out_char_set[i];
227:
228: fprintf( file, "\n jam-transitions: EOF " );
229:
230: list_character_set( file, out_char_set );
231:
232: putc( '\n', file );
233: }
234:
235:
236: /* epsclosure - construct the epsilon closure of a set of ndfa states
237: *
238: * synopsis
239: * int t[current_max_dfa_size], numstates, accset[num_rules + 1], nacc;
240: * int hashval;
241: * int *epsclosure();
242: * t = epsclosure( t, &numstates, accset, &nacc, &hashval );
243: *
244: * NOTES
245: * the epsilon closure is the set of all states reachable by an arbitrary
246: * number of epsilon transitions which themselves do not have epsilon
247: * transitions going out, unioned with the set of states which have non-null
248: * accepting numbers. t is an array of size numstates of nfa state numbers.
249: * Upon return, t holds the epsilon closure and numstates is updated. accset
250: * holds a list of the accepting numbers, and the size of accset is given
251: * by nacc. t may be subjected to reallocation if it is not large enough
252: * to hold the epsilon closure.
253: *
254: * hashval is the hash value for the dfa corresponding to the state set
255: */
256:
257: int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr )
258: int *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
259:
260: {
261: register int stkpos, ns, tsp;
262: int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
263: int stkend, nstate;
264: static int did_stk_init = false, *stk;
265:
266: #define MARK_STATE(state) \
267: trans1[state] = trans1[state] - MARKER_DIFFERENCE;
268:
269: #define IS_MARKED(state) (trans1[state] < 0)
270:
271: #define UNMARK_STATE(state) \
272: trans1[state] = trans1[state] + MARKER_DIFFERENCE;
273:
274: #define CHECK_ACCEPT(state) \
275: { \
276: nfaccnum = accptnum[state]; \
277: if ( nfaccnum != NIL ) \
278: accset[++nacc] = nfaccnum; \
279: }
280:
281: #define DO_REALLOCATION \
282: { \
283: current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
284: ++num_reallocs; \
285: t = reallocate_integer_array( t, current_max_dfa_size ); \
286: stk = reallocate_integer_array( stk, current_max_dfa_size ); \
287: } \
288:
289: #define PUT_ON_STACK(state) \
290: { \
291: if ( ++stkend >= current_max_dfa_size ) \
292: DO_REALLOCATION \
293: stk[stkend] = state; \
294: MARK_STATE(state) \
295: }
296:
297: #define ADD_STATE(state) \
298: { \
299: if ( ++numstates >= current_max_dfa_size ) \
300: DO_REALLOCATION \
301: t[numstates] = state; \
302: hashval = hashval + state; \
303: }
304:
305: #define STACK_STATE(state) \
306: { \
307: PUT_ON_STACK(state) \
308: CHECK_ACCEPT(state) \
309: if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
310: ADD_STATE(state) \
311: }
312:
313: if ( ! did_stk_init )
314: {
315: stk = allocate_integer_array( current_max_dfa_size );
316: did_stk_init = true;
317: }
318:
319: nacc = stkend = hashval = 0;
320:
321: for ( nstate = 1; nstate <= numstates; ++nstate )
322: {
323: ns = t[nstate];
324:
325: /* the state could be marked if we've already pushed it onto
326: * the stack
327: */
328: if ( ! IS_MARKED(ns) )
329: PUT_ON_STACK(ns)
330:
331: CHECK_ACCEPT(ns)
332: hashval = hashval + ns;
333: }
334:
335: for ( stkpos = 1; stkpos <= stkend; ++stkpos )
336: {
337: ns = stk[stkpos];
338: transsym = transchar[ns];
339:
340: if ( transsym == SYM_EPSILON )
341: {
342: tsp = trans1[ns] + MARKER_DIFFERENCE;
343:
344: if ( tsp != NO_TRANSITION )
345: {
346: if ( ! IS_MARKED(tsp) )
347: STACK_STATE(tsp)
348:
349: tsp = trans2[ns];
350:
351: if ( tsp != NO_TRANSITION )
352: if ( ! IS_MARKED(tsp) )
353: STACK_STATE(tsp)
354: }
355: }
356: }
357:
358: /* clear out "visit" markers */
359:
360: for ( stkpos = 1; stkpos <= stkend; ++stkpos )
361: {
362: if ( IS_MARKED(stk[stkpos]) )
363: {
364: UNMARK_STATE(stk[stkpos])
365: }
366: else
367: flexfatal( "consistency check failed in epsclosure()" );
368: }
369:
370: *ns_addr = numstates;
371: *hv_addr = hashval;
372: *nacc_addr = nacc;
373:
374: return ( t );
375: }
376:
377:
378: /* increase_max_dfas - increase the maximum number of DFAs */
379:
380: void increase_max_dfas()
381:
382: {
383: current_max_dfas += MAX_DFAS_INCREMENT;
384:
385: ++num_reallocs;
386:
387: base = reallocate_integer_array( base, current_max_dfas );
388: def = reallocate_integer_array( def, current_max_dfas );
389: dfasiz = reallocate_integer_array( dfasiz, current_max_dfas );
390: accsiz = reallocate_integer_array( accsiz, current_max_dfas );
391: dhash = reallocate_integer_array( dhash, current_max_dfas );
392: dss = reallocate_int_ptr_array( dss, current_max_dfas );
393: dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas );
394:
395: if ( nultrans )
396: nultrans = reallocate_integer_array( nultrans, current_max_dfas );
397: }
398:
399:
400: /* ntod - convert an ndfa to a dfa
401: *
402: * synopsis
403: * ntod();
404: *
405: * creates the dfa corresponding to the ndfa we've constructed. the
406: * dfa starts out in state #1.
407: */
408:
409: void ntod()
410:
411: {
412: int *accset, ds, nacc, newds;
413: int sym, hashval, numstates, dsize;
414: int num_full_table_rows; /* used only for -f */
415: int *nset, *dset;
416: int targptr, totaltrans, i, comstate, comfreq, targ;
417: int *epsclosure(), snstods(), symlist[CSIZE + 1];
418: int num_start_states;
419: int todo_head, todo_next;
420:
421: /* note that the following are indexed by *equivalence classes*
422: * and not by characters. Since equivalence classes are indexed
423: * beginning with 1, even if the scanner accepts NUL's, this
424: * means that (since every character is potentially in its own
425: * equivalence class) these arrays must have room for indices
426: * from 1 to CSIZE, so their size must be CSIZE + 1.
427: */
428: int duplist[CSIZE + 1], state[CSIZE + 1];
429: int targfreq[CSIZE + 1], targstate[CSIZE + 1];
430:
431: /* this is so find_table_space(...) will know where to start looking in
432: * chk/nxt for unused records for space to put in the state
433: */
434: if ( fullspd )
435: firstfree = 0;
436:
437: accset = allocate_integer_array( num_rules + 1 );
438: nset = allocate_integer_array( current_max_dfa_size );
439:
440: /* the "todo" queue is represented by the head, which is the DFA
441: * state currently being processed, and the "next", which is the
442: * next DFA state number available (not in use). We depend on the
443: * fact that snstods() returns DFA's \in increasing order/, and thus
444: * need only know the bounds of the dfas to be processed.
445: */
446: todo_head = todo_next = 0;
447:
448: for ( i = 0; i <= csize; ++i )
449: {
450: duplist[i] = NIL;
451: symlist[i] = false;
452: }
453:
454: for ( i = 0; i <= num_rules; ++i )
455: accset[i] = NIL;
456:
457: if ( trace )
458: {
459: dumpnfa( scset[1] );
460: fputs( "\n\nDFA Dump:\n\n", stderr );
461: }
462:
463: inittbl();
464:
465: /* check to see whether we should build a separate table for transitions
466: * on NUL characters. We don't do this for full-speed (-F) scanners,
467: * since for them we don't have a simple state number lying around with
468: * which to index the table. We also don't bother doing it for scanners
469: * unless (1) NUL is in its own equivalence class (indicated by a
470: * positive value of ecgroup[NUL]), (2) NUL's equilvalence class is
471: * the last equivalence class, and (3) the number of equivalence classes
472: * is the same as the number of characters. This latter case comes about
473: * when useecs is false or when its true but every character still
474: * manages to land in its own class (unlikely, but it's cheap to check
475: * for). If all these things are true then the character code needed
476: * to represent NUL's equivalence class for indexing the tables is
477: * going to take one more bit than the number of characters, and therefore
478: * we won't be assured of being able to fit it into a YY_CHAR variable.
479: * This rules out storing the transitions in a compressed table, since
480: * the code for interpreting them uses a YY_CHAR variable (perhaps it
481: * should just use an integer, though; this is worth pondering ... ###).
482: *
483: * Finally, for full tables, we want the number of entries in the
484: * table to be a power of two so the array references go fast (it
485: * will just take a shift to compute the major index). If encoding
486: * NUL's transitions in the table will spoil this, we give it its
487: * own table (note that this will be the case if we're not using
488: * equivalence classes).
489: */
490:
491: /* note that the test for ecgroup[0] == numecs below accomplishes
492: * both (1) and (2) above
493: */
494: if ( ! fullspd && ecgroup[0] == numecs )
495: { /* NUL is alone in its equivalence class, which is the last one */
496: int use_NUL_table = (numecs == csize);
497:
498: if ( fulltbl && ! use_NUL_table )
499: { /* we still may want to use the table if numecs is a power of 2 */
500: int power_of_two;
501:
502: for ( power_of_two = 1; power_of_two <= csize; power_of_two *= 2 )
503: if ( numecs == power_of_two )
504: {
505: use_NUL_table = true;
506: break;
507: }
508: }
509:
510: if ( use_NUL_table )
511: nultrans = allocate_integer_array( current_max_dfas );
512: /* from now on, nultrans != nil indicates that we're
513: * saving null transitions for later, separate encoding
514: */
515: }
516:
517:
518: if ( fullspd )
519: {
520: for ( i = 0; i <= numecs; ++i )
521: state[i] = 0;
522: place_state( state, 0, 0 );
523: }
524:
525: else if ( fulltbl )
526: {
527: if ( nultrans )
528: /* we won't be including NUL's transitions in the table,
529: * so build it for entries from 0 .. numecs - 1
530: */
531: num_full_table_rows = numecs;
532:
533: else
534: /* take into account the fact that we'll be including
535: * the NUL entries in the transition table. Build it
536: * from 0 .. numecs.
537: */
538: num_full_table_rows = numecs + 1;
539:
540: /* declare it "short" because it's a real long-shot that that
541: * won't be large enough.
542: */
543: printf( "static short int yy_nxt[][%d] =\n {\n",
544: /* '}' so vi doesn't get too confused */
545: num_full_table_rows );
546:
547: /* generate 0 entries for state #0 */
548: for ( i = 0; i < num_full_table_rows; ++i )
549: mk2data( 0 );
550:
551: /* force ',' and dataflush() next call to mk2data */
552: datapos = NUMDATAITEMS;
553:
554: /* force extra blank line next dataflush() */
555: dataline = NUMDATALINES;
556: }
557:
558: /* create the first states */
559:
560: num_start_states = lastsc * 2;
561:
562: for ( i = 1; i <= num_start_states; ++i )
563: {
564: numstates = 1;
565:
566: /* for each start condition, make one state for the case when
567: * we're at the beginning of the line (the '%' operator) and
568: * one for the case when we're not
569: */
570: if ( i % 2 == 1 )
571: nset[numstates] = scset[(i / 2) + 1];
572: else
573: nset[numstates] = mkbranch( scbol[i / 2], scset[i / 2] );
574:
575: nset = epsclosure( nset, &numstates, accset, &nacc, &hashval );
576:
577: if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) )
578: {
579: numas += nacc;
580: totnst += numstates;
581: ++todo_next;
582:
583: if ( variable_trailing_context_rules && nacc > 0 )
584: check_trailing_context( nset, numstates, accset, nacc );
585: }
586: }
587:
588: if ( ! fullspd )
589: {
590: if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) )
591: flexfatal( "could not create unique end-of-buffer state" );
592:
593: ++numas;
594: ++num_start_states;
595: ++todo_next;
596: }
597:
598: while ( todo_head < todo_next )
599: {
600: targptr = 0;
601: totaltrans = 0;
602:
603: for ( i = 1; i <= numecs; ++i )
604: state[i] = 0;
605:
606: ds = ++todo_head;
607:
608: dset = dss[ds];
609: dsize = dfasiz[ds];
610:
611: if ( trace )
612: fprintf( stderr, "state # %d:\n", ds );
613:
614: sympartition( dset, dsize, symlist, duplist );
615:
616: for ( sym = 1; sym <= numecs; ++sym )
617: {
618: if ( symlist[sym] )
619: {
620: symlist[sym] = 0;
621:
622: if ( duplist[sym] == NIL )
623: { /* symbol has unique out-transitions */
624: numstates = symfollowset( dset, dsize, sym, nset );
625: nset = epsclosure( nset, &numstates, accset,
626: &nacc, &hashval );
627:
628: if ( snstods( nset, numstates, accset,
629: nacc, hashval, &newds ) )
630: {
631: totnst = totnst + numstates;
632: ++todo_next;
633: numas += nacc;
634:
635: if ( variable_trailing_context_rules && nacc > 0 )
636: check_trailing_context( nset, numstates,
637: accset, nacc );
638: }
639:
640: state[sym] = newds;
641:
642: if ( trace )
643: fprintf( stderr, "\t%d\t%d\n", sym, newds );
644:
645: targfreq[++targptr] = 1;
646: targstate[targptr] = newds;
647: ++numuniq;
648: }
649:
650: else
651: {
652: /* sym's equivalence class has the same transitions
653: * as duplist(sym)'s equivalence class
654: */
655: targ = state[duplist[sym]];
656: state[sym] = targ;
657:
658: if ( trace )
659: fprintf( stderr, "\t%d\t%d\n", sym, targ );
660:
661: /* update frequency count for destination state */
662:
663: i = 0;
664: while ( targstate[++i] != targ )
665: ;
666:
667: ++targfreq[i];
668: ++numdup;
669: }
670:
671: ++totaltrans;
672: duplist[sym] = NIL;
673: }
674: }
675:
676: numsnpairs = numsnpairs + totaltrans;
677:
678: if ( caseins && ! useecs )
679: {
680: register int j;
681:
682: for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j )
683: state[i] = state[j];
684: }
685:
686: if ( ds > num_start_states )
687: check_for_backtracking( ds, state );
688:
689: if ( nultrans )
690: {
691: nultrans[ds] = state[NUL_ec];
692: state[NUL_ec] = 0; /* remove transition */
693: }
694:
695: if ( fulltbl )
696: {
697: /* supply array's 0-element */
698: if ( ds == end_of_buffer_state )
699: mk2data( -end_of_buffer_state );
700: else
701: mk2data( end_of_buffer_state );
702:
703: for ( i = 1; i < num_full_table_rows; ++i )
704: /* jams are marked by negative of state number */
705: mk2data( state[i] ? state[i] : -ds );
706:
707: /* force ',' and dataflush() next call to mk2data */
708: datapos = NUMDATAITEMS;
709:
710: /* force extra blank line next dataflush() */
711: dataline = NUMDATALINES;
712: }
713:
714: else if ( fullspd )
715: place_state( state, ds, totaltrans );
716:
717: else if ( ds == end_of_buffer_state )
718: /* special case this state to make sure it does what it's
719: * supposed to, i.e., jam on end-of-buffer
720: */
721: stack1( ds, 0, 0, JAMSTATE );
722:
723: else /* normal, compressed state */
724: {
725: /* determine which destination state is the most common, and
726: * how many transitions to it there are
727: */
728:
729: comfreq = 0;
730: comstate = 0;
731:
732: for ( i = 1; i <= targptr; ++i )
733: if ( targfreq[i] > comfreq )
734: {
735: comfreq = targfreq[i];
736: comstate = targstate[i];
737: }
738:
739: bldtbl( state, ds, totaltrans, comstate, comfreq );
740: }
741: }
742:
743: if ( fulltbl )
744: dataend();
745:
746: else if ( ! fullspd )
747: {
748: cmptmps(); /* create compressed template entries */
749:
750: /* create tables for all the states with only one out-transition */
751: while ( onesp > 0 )
752: {
753: mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp],
754: onedef[onesp] );
755: --onesp;
756: }
757:
758: mkdeftbl();
759: }
760: }
761:
762:
763: /* snstods - converts a set of ndfa states into a dfa state
764: *
765: * synopsis
766: * int sns[numstates], numstates, newds, accset[num_rules + 1], nacc, hashval;
767: * int snstods();
768: * is_new_state = snstods( sns, numstates, accset, nacc, hashval, &newds );
769: *
770: * on return, the dfa state number is in newds.
771: */
772:
773: int snstods( sns, numstates, accset, nacc, hashval, newds_addr )
774: int sns[], numstates, accset[], nacc, hashval, *newds_addr;
775:
776: {
777: int didsort = 0;
778: register int i, j;
779: int newds, *oldsns;
780:
781: for ( i = 1; i <= lastdfa; ++i )
782: if ( hashval == dhash[i] )
783: {
784: if ( numstates == dfasiz[i] )
785: {
786: oldsns = dss[i];
787:
788: if ( ! didsort )
789: {
790: /* we sort the states in sns so we can compare it to
791: * oldsns quickly. we use bubble because there probably
792: * aren't very many states
793: */
794: bubble( sns, numstates );
795: didsort = 1;
796: }
797:
798: for ( j = 1; j <= numstates; ++j )
799: if ( sns[j] != oldsns[j] )
800: break;
801:
802: if ( j > numstates )
803: {
804: ++dfaeql;
805: *newds_addr = i;
806: return ( 0 );
807: }
808:
809: ++hshcol;
810: }
811:
812: else
813: ++hshsave;
814: }
815:
816: /* make a new dfa */
817:
818: if ( ++lastdfa >= current_max_dfas )
819: increase_max_dfas();
820:
821: newds = lastdfa;
822:
823: dss[newds] = (int *) malloc( (unsigned) ((numstates + 1) * sizeof( int )) );
824:
825: if ( ! dss[newds] )
826: flexfatal( "dynamic memory failure in snstods()" );
827:
828: /* if we haven't already sorted the states in sns, we do so now, so that
829: * future comparisons with it can be made quickly
830: */
831:
832: if ( ! didsort )
833: bubble( sns, numstates );
834:
835: for ( i = 1; i <= numstates; ++i )
836: dss[newds][i] = sns[i];
837:
838: dfasiz[newds] = numstates;
839: dhash[newds] = hashval;
840:
841: if ( nacc == 0 )
842: {
843: if ( reject )
844: dfaacc[newds].dfaacc_set = (int *) 0;
845: else
846: dfaacc[newds].dfaacc_state = 0;
847:
848: accsiz[newds] = 0;
849: }
850:
851: else if ( reject )
852: {
853: /* we sort the accepting set in increasing order so the disambiguating
854: * rule that the first rule listed is considered match in the event of
855: * ties will work. We use a bubble sort since the list is probably
856: * quite small.
857: */
858:
859: bubble( accset, nacc );
860:
861: dfaacc[newds].dfaacc_set =
862: (int *) malloc( (unsigned) ((nacc + 1) * sizeof( int )) );
863:
864: if ( ! dfaacc[newds].dfaacc_set )
865: flexfatal( "dynamic memory failure in snstods()" );
866:
867: /* save the accepting set for later */
868: for ( i = 1; i <= nacc; ++i )
869: dfaacc[newds].dfaacc_set[i] = accset[i];
870:
871: accsiz[newds] = nacc;
872: }
873:
874: else
875: { /* find lowest numbered rule so the disambiguating rule will work */
876: j = num_rules + 1;
877:
878: for ( i = 1; i <= nacc; ++i )
879: if ( accset[i] < j )
880: j = accset[i];
881:
882: dfaacc[newds].dfaacc_state = j;
883: }
884:
885: *newds_addr = newds;
886:
887: return ( 1 );
888: }
889:
890:
891: /* symfollowset - follow the symbol transitions one step
892: *
893: * synopsis
894: * int ds[current_max_dfa_size], dsize, transsym;
895: * int nset[current_max_dfa_size], numstates;
896: * numstates = symfollowset( ds, dsize, transsym, nset );
897: */
898:
899: int symfollowset( ds, dsize, transsym, nset )
900: int ds[], dsize, transsym, nset[];
901:
902: {
903: int ns, tsp, sym, i, j, lenccl, ch, numstates;
904: int ccllist;
905:
906: numstates = 0;
907:
908: for ( i = 1; i <= dsize; ++i )
909: { /* for each nfa state ns in the state set of ds */
910: ns = ds[i];
911: sym = transchar[ns];
912: tsp = trans1[ns];
913:
914: if ( sym < 0 )
915: { /* it's a character class */
916: sym = -sym;
917: ccllist = cclmap[sym];
918: lenccl = ccllen[sym];
919:
920: if ( cclng[sym] )
921: {
922: for ( j = 0; j < lenccl; ++j )
923: { /* loop through negated character class */
924: ch = ccltbl[ccllist + j];
925:
926: if ( ch == 0 )
927: ch = NUL_ec;
928:
929: if ( ch > transsym )
930: break; /* transsym isn't in negated ccl */
931:
932: else if ( ch == transsym )
933: /* next 2 */ goto bottom;
934: }
935:
936: /* didn't find transsym in ccl */
937: nset[++numstates] = tsp;
938: }
939:
940: else
941: for ( j = 0; j < lenccl; ++j )
942: {
943: ch = ccltbl[ccllist + j];
944:
945: if ( ch == 0 )
946: ch = NUL_ec;
947:
948: if ( ch > transsym )
949: break;
950:
951: else if ( ch == transsym )
952: {
953: nset[++numstates] = tsp;
954: break;
955: }
956: }
957: }
958:
959: else if ( sym >= 'A' && sym <= 'Z' && caseins )
960: flexfatal( "consistency check failed in symfollowset" );
961:
962: else if ( sym == SYM_EPSILON )
963: { /* do nothing */
964: }
965:
966: else if ( abs( ecgroup[sym] ) == transsym )
967: nset[++numstates] = tsp;
968:
969: bottom:
970: ;
971: }
972:
973: return ( numstates );
974: }
975:
976:
977: /* sympartition - partition characters with same out-transitions
978: *
979: * synopsis
980: * integer ds[current_max_dfa_size], numstates, duplist[numecs];
981: * symlist[numecs];
982: * sympartition( ds, numstates, symlist, duplist );
983: */
984:
985: void sympartition( ds, numstates, symlist, duplist )
986: int ds[], numstates, duplist[];
987: int symlist[];
988:
989: {
990: int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
991:
992: /* partitioning is done by creating equivalence classes for those
993: * characters which have out-transitions from the given state. Thus
994: * we are really creating equivalence classes of equivalence classes.
995: */
996:
997: for ( i = 1; i <= numecs; ++i )
998: { /* initialize equivalence class list */
999: duplist[i] = i - 1;
1000: dupfwd[i] = i + 1;
1001: }
1002:
1003: duplist[1] = NIL;
1004: dupfwd[numecs] = NIL;
1005:
1006: for ( i = 1; i <= numstates; ++i )
1007: {
1008: ns = ds[i];
1009: tch = transchar[ns];
1010:
1011: if ( tch != SYM_EPSILON )
1012: {
1013: if ( tch < -lastccl || tch > csize )
1014: {
1015: if ( tch > csize && tch <= CSIZE )
1016: flexerror( "scanner requires -8 flag" );
1017:
1018: else
1019: flexfatal(
1020: "bad transition character detected in sympartition()" );
1021: }
1022:
1023: if ( tch >= 0 )
1024: { /* character transition */
1025: /* abs() needed for fake %t ec's */
1026: int ec = abs( ecgroup[tch] );
1027:
1028: mkechar( ec, dupfwd, duplist );
1029: symlist[ec] = 1;
1030: }
1031:
1032: else
1033: { /* character class */
1034: tch = -tch;
1035:
1036: lenccl = ccllen[tch];
1037: cclp = cclmap[tch];
1038: mkeccl( ccltbl + cclp, lenccl, dupfwd, duplist, numecs,
1039: NUL_ec );
1040:
1041: if ( cclng[tch] )
1042: {
1043: j = 0;
1044:
1045: for ( k = 0; k < lenccl; ++k )
1046: {
1047: ich = ccltbl[cclp + k];
1048:
1049: if ( ich == 0 )
1050: ich = NUL_ec;
1051:
1052: for ( ++j; j < ich; ++j )
1053: symlist[j] = 1;
1054: }
1055:
1056: for ( ++j; j <= numecs; ++j )
1057: symlist[j] = 1;
1058: }
1059:
1060: else
1061: for ( k = 0; k < lenccl; ++k )
1062: {
1063: ich = ccltbl[cclp + k];
1064:
1065: if ( ich == 0 )
1066: ich = NUL_ec;
1067:
1068: symlist[ich] = 1;
1069: }
1070: }
1071: }
1072: }
1073: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.