|
|
1.1 ! root 1: /*- ! 2: * Copyright (c) 1990 The Regents of the University of California. ! 3: * All rights reserved. ! 4: * ! 5: * This code is derived from software contributed to Berkeley by ! 6: * Vern Paxson. ! 7: * ! 8: * The United States Government has rights in this work pursuant ! 9: * to contract no. DE-AC03-76SF00098 between the United States ! 10: * Department of Energy and the University of California. ! 11: * ! 12: * Redistribution and use in source and binary forms are permitted provided ! 13: * that: (1) source distributions retain this entire copyright notice and ! 14: * comment, and (2) distributions including binaries display the following ! 15: * acknowledgement: ``This product includes software developed by the ! 16: * University of California, Berkeley and its contributors'' in the ! 17: * documentation or other materials provided with the distribution and in ! 18: * all advertising materials mentioning features or use of this software. ! 19: * Neither the name of the University nor the names of its contributors may ! 20: * be used to endorse or promote products derived from this software without ! 21: * specific prior written permission. ! 22: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED ! 23: * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF ! 24: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. ! 25: */ ! 26: ! 27: #ifndef lint ! 28: static char sccsid[] = "@(#)dfa.c 5.2 (Berkeley) 6/18/90"; ! 29: #endif /* not lint */ ! 30: ! 31: /* dfa - DFA construction routines */ ! 32: ! 33: #include "flexdef.h" ! 34: ! 35: /* declare functions that have forward references */ ! 36: ! 37: void dump_associated_rules PROTO((FILE*, int)); ! 38: void dump_transitions PROTO((FILE*, int[])); ! 39: void sympartition PROTO((int[], int, int[], int[])); ! 40: int symfollowset PROTO((int[], int, int, int[])); ! 41: ! 42: ! 43: /* check_for_backtracking - check a DFA state for backtracking ! 44: * ! 45: * synopsis ! 46: * int ds, state[numecs]; ! 47: * check_for_backtracking( ds, state ); ! 48: * ! 49: * ds is the number of the state to check and state[] is its out-transitions, ! 50: * indexed by equivalence class, and state_rules[] is the set of rules ! 51: * associated with this state ! 52: */ ! 53: ! 54: void check_for_backtracking( ds, state ) ! 55: int ds; ! 56: int state[]; ! 57: ! 58: { ! 59: if ( (reject && ! dfaacc[ds].dfaacc_set) || ! dfaacc[ds].dfaacc_state ) ! 60: { /* state is non-accepting */ ! 61: ++num_backtracking; ! 62: ! 63: if ( backtrack_report ) ! 64: { ! 65: fprintf( backtrack_file, "State #%d is non-accepting -\n", ds ); ! 66: ! 67: /* identify the state */ ! 68: dump_associated_rules( backtrack_file, ds ); ! 69: ! 70: /* now identify it further using the out- and jam-transitions */ ! 71: dump_transitions( backtrack_file, state ); ! 72: ! 73: putc( '\n', backtrack_file ); ! 74: } ! 75: } ! 76: } ! 77: ! 78: ! 79: /* check_trailing_context - check to see if NFA state set constitutes ! 80: * "dangerous" trailing context ! 81: * ! 82: * synopsis ! 83: * int nfa_states[num_states+1], num_states; ! 84: * int accset[nacc+1], nacc; ! 85: * check_trailing_context( nfa_states, num_states, accset, nacc ); ! 86: * ! 87: * NOTES ! 88: * Trailing context is "dangerous" if both the head and the trailing ! 89: * part are of variable size \and/ there's a DFA state which contains ! 90: * both an accepting state for the head part of the rule and NFA states ! 91: * which occur after the beginning of the trailing context. ! 92: * When such a rule is matched, it's impossible to tell if having been ! 93: * in the DFA state indicates the beginning of the trailing context ! 94: * or further-along scanning of the pattern. In these cases, a warning ! 95: * message is issued. ! 96: * ! 97: * nfa_states[1 .. num_states] is the list of NFA states in the DFA. ! 98: * accset[1 .. nacc] is the list of accepting numbers for the DFA state. ! 99: */ ! 100: ! 101: void check_trailing_context( nfa_states, num_states, accset, nacc ) ! 102: int *nfa_states, num_states; ! 103: int *accset; ! 104: register int nacc; ! 105: ! 106: { ! 107: register int i, j; ! 108: ! 109: for ( i = 1; i <= num_states; ++i ) ! 110: { ! 111: int ns = nfa_states[i]; ! 112: register int type = state_type[ns]; ! 113: register int ar = assoc_rule[ns]; ! 114: ! 115: if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE ) ! 116: { /* do nothing */ ! 117: } ! 118: ! 119: else if ( type == STATE_TRAILING_CONTEXT ) ! 120: { ! 121: /* potential trouble. Scan set of accepting numbers for ! 122: * the one marking the end of the "head". We assume that ! 123: * this looping will be fairly cheap since it's rare that ! 124: * an accepting number set is large. ! 125: */ ! 126: for ( j = 1; j <= nacc; ++j ) ! 127: if ( accset[j] & YY_TRAILING_HEAD_MASK ) ! 128: { ! 129: fprintf( stderr, ! 130: "%s: Dangerous trailing context in rule at line %d\n", ! 131: program_name, rule_linenum[ar] ); ! 132: return; ! 133: } ! 134: } ! 135: } ! 136: } ! 137: ! 138: ! 139: /* dump_associated_rules - list the rules associated with a DFA state ! 140: * ! 141: * synopisis ! 142: * int ds; ! 143: * FILE *file; ! 144: * dump_associated_rules( file, ds ); ! 145: * ! 146: * goes through the set of NFA states associated with the DFA and ! 147: * extracts the first MAX_ASSOC_RULES unique rules, sorts them, ! 148: * and writes a report to the given file ! 149: */ ! 150: ! 151: void dump_associated_rules( file, ds ) ! 152: FILE *file; ! 153: int ds; ! 154: ! 155: { ! 156: register int i, j; ! 157: register int num_associated_rules = 0; ! 158: int rule_set[MAX_ASSOC_RULES + 1]; ! 159: int *dset = dss[ds]; ! 160: int size = dfasiz[ds]; ! 161: ! 162: for ( i = 1; i <= size; ++i ) ! 163: { ! 164: register rule_num = rule_linenum[assoc_rule[dset[i]]]; ! 165: ! 166: for ( j = 1; j <= num_associated_rules; ++j ) ! 167: if ( rule_num == rule_set[j] ) ! 168: break; ! 169: ! 170: if ( j > num_associated_rules ) ! 171: { /* new rule */ ! 172: if ( num_associated_rules < MAX_ASSOC_RULES ) ! 173: rule_set[++num_associated_rules] = rule_num; ! 174: } ! 175: } ! 176: ! 177: bubble( rule_set, num_associated_rules ); ! 178: ! 179: fprintf( file, " associated rule line numbers:" ); ! 180: ! 181: for ( i = 1; i <= num_associated_rules; ++i ) ! 182: { ! 183: if ( i % 8 == 1 ) ! 184: putc( '\n', file ); ! 185: ! 186: fprintf( file, "\t%d", rule_set[i] ); ! 187: } ! 188: ! 189: putc( '\n', file ); ! 190: } ! 191: ! 192: ! 193: /* dump_transitions - list the transitions associated with a DFA state ! 194: * ! 195: * synopisis ! 196: * int state[numecs]; ! 197: * FILE *file; ! 198: * dump_transitions( file, state ); ! 199: * ! 200: * goes through the set of out-transitions and lists them in human-readable ! 201: * form (i.e., not as equivalence classes); also lists jam transitions ! 202: * (i.e., all those which are not out-transitions, plus EOF). The dump ! 203: * is done to the given file. ! 204: */ ! 205: ! 206: void dump_transitions( file, state ) ! 207: FILE *file; ! 208: int state[]; ! 209: ! 210: { ! 211: register int i, ec; ! 212: int out_char_set[CSIZE]; ! 213: ! 214: for ( i = 0; i < csize; ++i ) ! 215: { ! 216: ec = abs( ecgroup[i] ); ! 217: out_char_set[i] = state[ec]; ! 218: } ! 219: ! 220: fprintf( file, " out-transitions: " ); ! 221: ! 222: list_character_set( file, out_char_set ); ! 223: ! 224: /* now invert the members of the set to get the jam transitions */ ! 225: for ( i = 0; i < csize; ++i ) ! 226: out_char_set[i] = ! out_char_set[i]; ! 227: ! 228: fprintf( file, "\n jam-transitions: EOF " ); ! 229: ! 230: list_character_set( file, out_char_set ); ! 231: ! 232: putc( '\n', file ); ! 233: } ! 234: ! 235: ! 236: /* epsclosure - construct the epsilon closure of a set of ndfa states ! 237: * ! 238: * synopsis ! 239: * int t[current_max_dfa_size], numstates, accset[num_rules + 1], nacc; ! 240: * int hashval; ! 241: * int *epsclosure(); ! 242: * t = epsclosure( t, &numstates, accset, &nacc, &hashval ); ! 243: * ! 244: * NOTES ! 245: * the epsilon closure is the set of all states reachable by an arbitrary ! 246: * number of epsilon transitions which themselves do not have epsilon ! 247: * transitions going out, unioned with the set of states which have non-null ! 248: * accepting numbers. t is an array of size numstates of nfa state numbers. ! 249: * Upon return, t holds the epsilon closure and numstates is updated. accset ! 250: * holds a list of the accepting numbers, and the size of accset is given ! 251: * by nacc. t may be subjected to reallocation if it is not large enough ! 252: * to hold the epsilon closure. ! 253: * ! 254: * hashval is the hash value for the dfa corresponding to the state set ! 255: */ ! 256: ! 257: int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr ) ! 258: int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; ! 259: ! 260: { ! 261: register int stkpos, ns, tsp; ! 262: int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; ! 263: int stkend, nstate; ! 264: static int did_stk_init = false, *stk; ! 265: ! 266: #define MARK_STATE(state) \ ! 267: trans1[state] = trans1[state] - MARKER_DIFFERENCE; ! 268: ! 269: #define IS_MARKED(state) (trans1[state] < 0) ! 270: ! 271: #define UNMARK_STATE(state) \ ! 272: trans1[state] = trans1[state] + MARKER_DIFFERENCE; ! 273: ! 274: #define CHECK_ACCEPT(state) \ ! 275: { \ ! 276: nfaccnum = accptnum[state]; \ ! 277: if ( nfaccnum != NIL ) \ ! 278: accset[++nacc] = nfaccnum; \ ! 279: } ! 280: ! 281: #define DO_REALLOCATION \ ! 282: { \ ! 283: current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ ! 284: ++num_reallocs; \ ! 285: t = reallocate_integer_array( t, current_max_dfa_size ); \ ! 286: stk = reallocate_integer_array( stk, current_max_dfa_size ); \ ! 287: } \ ! 288: ! 289: #define PUT_ON_STACK(state) \ ! 290: { \ ! 291: if ( ++stkend >= current_max_dfa_size ) \ ! 292: DO_REALLOCATION \ ! 293: stk[stkend] = state; \ ! 294: MARK_STATE(state) \ ! 295: } ! 296: ! 297: #define ADD_STATE(state) \ ! 298: { \ ! 299: if ( ++numstates >= current_max_dfa_size ) \ ! 300: DO_REALLOCATION \ ! 301: t[numstates] = state; \ ! 302: hashval = hashval + state; \ ! 303: } ! 304: ! 305: #define STACK_STATE(state) \ ! 306: { \ ! 307: PUT_ON_STACK(state) \ ! 308: CHECK_ACCEPT(state) \ ! 309: if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ ! 310: ADD_STATE(state) \ ! 311: } ! 312: ! 313: if ( ! did_stk_init ) ! 314: { ! 315: stk = allocate_integer_array( current_max_dfa_size ); ! 316: did_stk_init = true; ! 317: } ! 318: ! 319: nacc = stkend = hashval = 0; ! 320: ! 321: for ( nstate = 1; nstate <= numstates; ++nstate ) ! 322: { ! 323: ns = t[nstate]; ! 324: ! 325: /* the state could be marked if we've already pushed it onto ! 326: * the stack ! 327: */ ! 328: if ( ! IS_MARKED(ns) ) ! 329: PUT_ON_STACK(ns) ! 330: ! 331: CHECK_ACCEPT(ns) ! 332: hashval = hashval + ns; ! 333: } ! 334: ! 335: for ( stkpos = 1; stkpos <= stkend; ++stkpos ) ! 336: { ! 337: ns = stk[stkpos]; ! 338: transsym = transchar[ns]; ! 339: ! 340: if ( transsym == SYM_EPSILON ) ! 341: { ! 342: tsp = trans1[ns] + MARKER_DIFFERENCE; ! 343: ! 344: if ( tsp != NO_TRANSITION ) ! 345: { ! 346: if ( ! IS_MARKED(tsp) ) ! 347: STACK_STATE(tsp) ! 348: ! 349: tsp = trans2[ns]; ! 350: ! 351: if ( tsp != NO_TRANSITION ) ! 352: if ( ! IS_MARKED(tsp) ) ! 353: STACK_STATE(tsp) ! 354: } ! 355: } ! 356: } ! 357: ! 358: /* clear out "visit" markers */ ! 359: ! 360: for ( stkpos = 1; stkpos <= stkend; ++stkpos ) ! 361: { ! 362: if ( IS_MARKED(stk[stkpos]) ) ! 363: { ! 364: UNMARK_STATE(stk[stkpos]) ! 365: } ! 366: else ! 367: flexfatal( "consistency check failed in epsclosure()" ); ! 368: } ! 369: ! 370: *ns_addr = numstates; ! 371: *hv_addr = hashval; ! 372: *nacc_addr = nacc; ! 373: ! 374: return ( t ); ! 375: } ! 376: ! 377: ! 378: /* increase_max_dfas - increase the maximum number of DFAs */ ! 379: ! 380: void increase_max_dfas() ! 381: ! 382: { ! 383: current_max_dfas += MAX_DFAS_INCREMENT; ! 384: ! 385: ++num_reallocs; ! 386: ! 387: base = reallocate_integer_array( base, current_max_dfas ); ! 388: def = reallocate_integer_array( def, current_max_dfas ); ! 389: dfasiz = reallocate_integer_array( dfasiz, current_max_dfas ); ! 390: accsiz = reallocate_integer_array( accsiz, current_max_dfas ); ! 391: dhash = reallocate_integer_array( dhash, current_max_dfas ); ! 392: dss = reallocate_int_ptr_array( dss, current_max_dfas ); ! 393: dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas ); ! 394: ! 395: if ( nultrans ) ! 396: nultrans = reallocate_integer_array( nultrans, current_max_dfas ); ! 397: } ! 398: ! 399: ! 400: /* ntod - convert an ndfa to a dfa ! 401: * ! 402: * synopsis ! 403: * ntod(); ! 404: * ! 405: * creates the dfa corresponding to the ndfa we've constructed. the ! 406: * dfa starts out in state #1. ! 407: */ ! 408: ! 409: void ntod() ! 410: ! 411: { ! 412: int *accset, ds, nacc, newds; ! 413: int sym, hashval, numstates, dsize; ! 414: int num_full_table_rows; /* used only for -f */ ! 415: int *nset, *dset; ! 416: int targptr, totaltrans, i, comstate, comfreq, targ; ! 417: int *epsclosure(), snstods(), symlist[CSIZE + 1]; ! 418: int num_start_states; ! 419: int todo_head, todo_next; ! 420: ! 421: /* note that the following are indexed by *equivalence classes* ! 422: * and not by characters. Since equivalence classes are indexed ! 423: * beginning with 1, even if the scanner accepts NUL's, this ! 424: * means that (since every character is potentially in its own ! 425: * equivalence class) these arrays must have room for indices ! 426: * from 1 to CSIZE, so their size must be CSIZE + 1. ! 427: */ ! 428: int duplist[CSIZE + 1], state[CSIZE + 1]; ! 429: int targfreq[CSIZE + 1], targstate[CSIZE + 1]; ! 430: ! 431: /* this is so find_table_space(...) will know where to start looking in ! 432: * chk/nxt for unused records for space to put in the state ! 433: */ ! 434: if ( fullspd ) ! 435: firstfree = 0; ! 436: ! 437: accset = allocate_integer_array( num_rules + 1 ); ! 438: nset = allocate_integer_array( current_max_dfa_size ); ! 439: ! 440: /* the "todo" queue is represented by the head, which is the DFA ! 441: * state currently being processed, and the "next", which is the ! 442: * next DFA state number available (not in use). We depend on the ! 443: * fact that snstods() returns DFA's \in increasing order/, and thus ! 444: * need only know the bounds of the dfas to be processed. ! 445: */ ! 446: todo_head = todo_next = 0; ! 447: ! 448: for ( i = 0; i <= csize; ++i ) ! 449: { ! 450: duplist[i] = NIL; ! 451: symlist[i] = false; ! 452: } ! 453: ! 454: for ( i = 0; i <= num_rules; ++i ) ! 455: accset[i] = NIL; ! 456: ! 457: if ( trace ) ! 458: { ! 459: dumpnfa( scset[1] ); ! 460: fputs( "\n\nDFA Dump:\n\n", stderr ); ! 461: } ! 462: ! 463: inittbl(); ! 464: ! 465: /* check to see whether we should build a separate table for transitions ! 466: * on NUL characters. We don't do this for full-speed (-F) scanners, ! 467: * since for them we don't have a simple state number lying around with ! 468: * which to index the table. We also don't bother doing it for scanners ! 469: * unless (1) NUL is in its own equivalence class (indicated by a ! 470: * positive value of ecgroup[NUL]), (2) NUL's equilvalence class is ! 471: * the last equivalence class, and (3) the number of equivalence classes ! 472: * is the same as the number of characters. This latter case comes about ! 473: * when useecs is false or when its true but every character still ! 474: * manages to land in its own class (unlikely, but it's cheap to check ! 475: * for). If all these things are true then the character code needed ! 476: * to represent NUL's equivalence class for indexing the tables is ! 477: * going to take one more bit than the number of characters, and therefore ! 478: * we won't be assured of being able to fit it into a YY_CHAR variable. ! 479: * This rules out storing the transitions in a compressed table, since ! 480: * the code for interpreting them uses a YY_CHAR variable (perhaps it ! 481: * should just use an integer, though; this is worth pondering ... ###). ! 482: * ! 483: * Finally, for full tables, we want the number of entries in the ! 484: * table to be a power of two so the array references go fast (it ! 485: * will just take a shift to compute the major index). If encoding ! 486: * NUL's transitions in the table will spoil this, we give it its ! 487: * own table (note that this will be the case if we're not using ! 488: * equivalence classes). ! 489: */ ! 490: ! 491: /* note that the test for ecgroup[0] == numecs below accomplishes ! 492: * both (1) and (2) above ! 493: */ ! 494: if ( ! fullspd && ecgroup[0] == numecs ) ! 495: { /* NUL is alone in its equivalence class, which is the last one */ ! 496: int use_NUL_table = (numecs == csize); ! 497: ! 498: if ( fulltbl && ! use_NUL_table ) ! 499: { /* we still may want to use the table if numecs is a power of 2 */ ! 500: int power_of_two; ! 501: ! 502: for ( power_of_two = 1; power_of_two <= csize; power_of_two *= 2 ) ! 503: if ( numecs == power_of_two ) ! 504: { ! 505: use_NUL_table = true; ! 506: break; ! 507: } ! 508: } ! 509: ! 510: if ( use_NUL_table ) ! 511: nultrans = allocate_integer_array( current_max_dfas ); ! 512: /* from now on, nultrans != nil indicates that we're ! 513: * saving null transitions for later, separate encoding ! 514: */ ! 515: } ! 516: ! 517: ! 518: if ( fullspd ) ! 519: { ! 520: for ( i = 0; i <= numecs; ++i ) ! 521: state[i] = 0; ! 522: place_state( state, 0, 0 ); ! 523: } ! 524: ! 525: else if ( fulltbl ) ! 526: { ! 527: if ( nultrans ) ! 528: /* we won't be including NUL's transitions in the table, ! 529: * so build it for entries from 0 .. numecs - 1 ! 530: */ ! 531: num_full_table_rows = numecs; ! 532: ! 533: else ! 534: /* take into account the fact that we'll be including ! 535: * the NUL entries in the transition table. Build it ! 536: * from 0 .. numecs. ! 537: */ ! 538: num_full_table_rows = numecs + 1; ! 539: ! 540: /* declare it "short" because it's a real long-shot that that ! 541: * won't be large enough. ! 542: */ ! 543: printf( "static short int yy_nxt[][%d] =\n {\n", ! 544: /* '}' so vi doesn't get too confused */ ! 545: num_full_table_rows ); ! 546: ! 547: /* generate 0 entries for state #0 */ ! 548: for ( i = 0; i < num_full_table_rows; ++i ) ! 549: mk2data( 0 ); ! 550: ! 551: /* force ',' and dataflush() next call to mk2data */ ! 552: datapos = NUMDATAITEMS; ! 553: ! 554: /* force extra blank line next dataflush() */ ! 555: dataline = NUMDATALINES; ! 556: } ! 557: ! 558: /* create the first states */ ! 559: ! 560: num_start_states = lastsc * 2; ! 561: ! 562: for ( i = 1; i <= num_start_states; ++i ) ! 563: { ! 564: numstates = 1; ! 565: ! 566: /* for each start condition, make one state for the case when ! 567: * we're at the beginning of the line (the '%' operator) and ! 568: * one for the case when we're not ! 569: */ ! 570: if ( i % 2 == 1 ) ! 571: nset[numstates] = scset[(i / 2) + 1]; ! 572: else ! 573: nset[numstates] = mkbranch( scbol[i / 2], scset[i / 2] ); ! 574: ! 575: nset = epsclosure( nset, &numstates, accset, &nacc, &hashval ); ! 576: ! 577: if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) ) ! 578: { ! 579: numas += nacc; ! 580: totnst += numstates; ! 581: ++todo_next; ! 582: ! 583: if ( variable_trailing_context_rules && nacc > 0 ) ! 584: check_trailing_context( nset, numstates, accset, nacc ); ! 585: } ! 586: } ! 587: ! 588: if ( ! fullspd ) ! 589: { ! 590: if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) ) ! 591: flexfatal( "could not create unique end-of-buffer state" ); ! 592: ! 593: ++numas; ! 594: ++num_start_states; ! 595: ++todo_next; ! 596: } ! 597: ! 598: while ( todo_head < todo_next ) ! 599: { ! 600: targptr = 0; ! 601: totaltrans = 0; ! 602: ! 603: for ( i = 1; i <= numecs; ++i ) ! 604: state[i] = 0; ! 605: ! 606: ds = ++todo_head; ! 607: ! 608: dset = dss[ds]; ! 609: dsize = dfasiz[ds]; ! 610: ! 611: if ( trace ) ! 612: fprintf( stderr, "state # %d:\n", ds ); ! 613: ! 614: sympartition( dset, dsize, symlist, duplist ); ! 615: ! 616: for ( sym = 1; sym <= numecs; ++sym ) ! 617: { ! 618: if ( symlist[sym] ) ! 619: { ! 620: symlist[sym] = 0; ! 621: ! 622: if ( duplist[sym] == NIL ) ! 623: { /* symbol has unique out-transitions */ ! 624: numstates = symfollowset( dset, dsize, sym, nset ); ! 625: nset = epsclosure( nset, &numstates, accset, ! 626: &nacc, &hashval ); ! 627: ! 628: if ( snstods( nset, numstates, accset, ! 629: nacc, hashval, &newds ) ) ! 630: { ! 631: totnst = totnst + numstates; ! 632: ++todo_next; ! 633: numas += nacc; ! 634: ! 635: if ( variable_trailing_context_rules && nacc > 0 ) ! 636: check_trailing_context( nset, numstates, ! 637: accset, nacc ); ! 638: } ! 639: ! 640: state[sym] = newds; ! 641: ! 642: if ( trace ) ! 643: fprintf( stderr, "\t%d\t%d\n", sym, newds ); ! 644: ! 645: targfreq[++targptr] = 1; ! 646: targstate[targptr] = newds; ! 647: ++numuniq; ! 648: } ! 649: ! 650: else ! 651: { ! 652: /* sym's equivalence class has the same transitions ! 653: * as duplist(sym)'s equivalence class ! 654: */ ! 655: targ = state[duplist[sym]]; ! 656: state[sym] = targ; ! 657: ! 658: if ( trace ) ! 659: fprintf( stderr, "\t%d\t%d\n", sym, targ ); ! 660: ! 661: /* update frequency count for destination state */ ! 662: ! 663: i = 0; ! 664: while ( targstate[++i] != targ ) ! 665: ; ! 666: ! 667: ++targfreq[i]; ! 668: ++numdup; ! 669: } ! 670: ! 671: ++totaltrans; ! 672: duplist[sym] = NIL; ! 673: } ! 674: } ! 675: ! 676: numsnpairs = numsnpairs + totaltrans; ! 677: ! 678: if ( caseins && ! useecs ) ! 679: { ! 680: register int j; ! 681: ! 682: for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j ) ! 683: state[i] = state[j]; ! 684: } ! 685: ! 686: if ( ds > num_start_states ) ! 687: check_for_backtracking( ds, state ); ! 688: ! 689: if ( nultrans ) ! 690: { ! 691: nultrans[ds] = state[NUL_ec]; ! 692: state[NUL_ec] = 0; /* remove transition */ ! 693: } ! 694: ! 695: if ( fulltbl ) ! 696: { ! 697: /* supply array's 0-element */ ! 698: if ( ds == end_of_buffer_state ) ! 699: mk2data( -end_of_buffer_state ); ! 700: else ! 701: mk2data( end_of_buffer_state ); ! 702: ! 703: for ( i = 1; i < num_full_table_rows; ++i ) ! 704: /* jams are marked by negative of state number */ ! 705: mk2data( state[i] ? state[i] : -ds ); ! 706: ! 707: /* force ',' and dataflush() next call to mk2data */ ! 708: datapos = NUMDATAITEMS; ! 709: ! 710: /* force extra blank line next dataflush() */ ! 711: dataline = NUMDATALINES; ! 712: } ! 713: ! 714: else if ( fullspd ) ! 715: place_state( state, ds, totaltrans ); ! 716: ! 717: else if ( ds == end_of_buffer_state ) ! 718: /* special case this state to make sure it does what it's ! 719: * supposed to, i.e., jam on end-of-buffer ! 720: */ ! 721: stack1( ds, 0, 0, JAMSTATE ); ! 722: ! 723: else /* normal, compressed state */ ! 724: { ! 725: /* determine which destination state is the most common, and ! 726: * how many transitions to it there are ! 727: */ ! 728: ! 729: comfreq = 0; ! 730: comstate = 0; ! 731: ! 732: for ( i = 1; i <= targptr; ++i ) ! 733: if ( targfreq[i] > comfreq ) ! 734: { ! 735: comfreq = targfreq[i]; ! 736: comstate = targstate[i]; ! 737: } ! 738: ! 739: bldtbl( state, ds, totaltrans, comstate, comfreq ); ! 740: } ! 741: } ! 742: ! 743: if ( fulltbl ) ! 744: dataend(); ! 745: ! 746: else if ( ! fullspd ) ! 747: { ! 748: cmptmps(); /* create compressed template entries */ ! 749: ! 750: /* create tables for all the states with only one out-transition */ ! 751: while ( onesp > 0 ) ! 752: { ! 753: mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp], ! 754: onedef[onesp] ); ! 755: --onesp; ! 756: } ! 757: ! 758: mkdeftbl(); ! 759: } ! 760: } ! 761: ! 762: ! 763: /* snstods - converts a set of ndfa states into a dfa state ! 764: * ! 765: * synopsis ! 766: * int sns[numstates], numstates, newds, accset[num_rules + 1], nacc, hashval; ! 767: * int snstods(); ! 768: * is_new_state = snstods( sns, numstates, accset, nacc, hashval, &newds ); ! 769: * ! 770: * on return, the dfa state number is in newds. ! 771: */ ! 772: ! 773: int snstods( sns, numstates, accset, nacc, hashval, newds_addr ) ! 774: int sns[], numstates, accset[], nacc, hashval, *newds_addr; ! 775: ! 776: { ! 777: int didsort = 0; ! 778: register int i, j; ! 779: int newds, *oldsns; ! 780: ! 781: for ( i = 1; i <= lastdfa; ++i ) ! 782: if ( hashval == dhash[i] ) ! 783: { ! 784: if ( numstates == dfasiz[i] ) ! 785: { ! 786: oldsns = dss[i]; ! 787: ! 788: if ( ! didsort ) ! 789: { ! 790: /* we sort the states in sns so we can compare it to ! 791: * oldsns quickly. we use bubble because there probably ! 792: * aren't very many states ! 793: */ ! 794: bubble( sns, numstates ); ! 795: didsort = 1; ! 796: } ! 797: ! 798: for ( j = 1; j <= numstates; ++j ) ! 799: if ( sns[j] != oldsns[j] ) ! 800: break; ! 801: ! 802: if ( j > numstates ) ! 803: { ! 804: ++dfaeql; ! 805: *newds_addr = i; ! 806: return ( 0 ); ! 807: } ! 808: ! 809: ++hshcol; ! 810: } ! 811: ! 812: else ! 813: ++hshsave; ! 814: } ! 815: ! 816: /* make a new dfa */ ! 817: ! 818: if ( ++lastdfa >= current_max_dfas ) ! 819: increase_max_dfas(); ! 820: ! 821: newds = lastdfa; ! 822: ! 823: dss[newds] = (int *) malloc( (unsigned) ((numstates + 1) * sizeof( int )) ); ! 824: ! 825: if ( ! dss[newds] ) ! 826: flexfatal( "dynamic memory failure in snstods()" ); ! 827: ! 828: /* if we haven't already sorted the states in sns, we do so now, so that ! 829: * future comparisons with it can be made quickly ! 830: */ ! 831: ! 832: if ( ! didsort ) ! 833: bubble( sns, numstates ); ! 834: ! 835: for ( i = 1; i <= numstates; ++i ) ! 836: dss[newds][i] = sns[i]; ! 837: ! 838: dfasiz[newds] = numstates; ! 839: dhash[newds] = hashval; ! 840: ! 841: if ( nacc == 0 ) ! 842: { ! 843: if ( reject ) ! 844: dfaacc[newds].dfaacc_set = (int *) 0; ! 845: else ! 846: dfaacc[newds].dfaacc_state = 0; ! 847: ! 848: accsiz[newds] = 0; ! 849: } ! 850: ! 851: else if ( reject ) ! 852: { ! 853: /* we sort the accepting set in increasing order so the disambiguating ! 854: * rule that the first rule listed is considered match in the event of ! 855: * ties will work. We use a bubble sort since the list is probably ! 856: * quite small. ! 857: */ ! 858: ! 859: bubble( accset, nacc ); ! 860: ! 861: dfaacc[newds].dfaacc_set = ! 862: (int *) malloc( (unsigned) ((nacc + 1) * sizeof( int )) ); ! 863: ! 864: if ( ! dfaacc[newds].dfaacc_set ) ! 865: flexfatal( "dynamic memory failure in snstods()" ); ! 866: ! 867: /* save the accepting set for later */ ! 868: for ( i = 1; i <= nacc; ++i ) ! 869: dfaacc[newds].dfaacc_set[i] = accset[i]; ! 870: ! 871: accsiz[newds] = nacc; ! 872: } ! 873: ! 874: else ! 875: { /* find lowest numbered rule so the disambiguating rule will work */ ! 876: j = num_rules + 1; ! 877: ! 878: for ( i = 1; i <= nacc; ++i ) ! 879: if ( accset[i] < j ) ! 880: j = accset[i]; ! 881: ! 882: dfaacc[newds].dfaacc_state = j; ! 883: } ! 884: ! 885: *newds_addr = newds; ! 886: ! 887: return ( 1 ); ! 888: } ! 889: ! 890: ! 891: /* symfollowset - follow the symbol transitions one step ! 892: * ! 893: * synopsis ! 894: * int ds[current_max_dfa_size], dsize, transsym; ! 895: * int nset[current_max_dfa_size], numstates; ! 896: * numstates = symfollowset( ds, dsize, transsym, nset ); ! 897: */ ! 898: ! 899: int symfollowset( ds, dsize, transsym, nset ) ! 900: int ds[], dsize, transsym, nset[]; ! 901: ! 902: { ! 903: int ns, tsp, sym, i, j, lenccl, ch, numstates; ! 904: int ccllist; ! 905: ! 906: numstates = 0; ! 907: ! 908: for ( i = 1; i <= dsize; ++i ) ! 909: { /* for each nfa state ns in the state set of ds */ ! 910: ns = ds[i]; ! 911: sym = transchar[ns]; ! 912: tsp = trans1[ns]; ! 913: ! 914: if ( sym < 0 ) ! 915: { /* it's a character class */ ! 916: sym = -sym; ! 917: ccllist = cclmap[sym]; ! 918: lenccl = ccllen[sym]; ! 919: ! 920: if ( cclng[sym] ) ! 921: { ! 922: for ( j = 0; j < lenccl; ++j ) ! 923: { /* loop through negated character class */ ! 924: ch = ccltbl[ccllist + j]; ! 925: ! 926: if ( ch == 0 ) ! 927: ch = NUL_ec; ! 928: ! 929: if ( ch > transsym ) ! 930: break; /* transsym isn't in negated ccl */ ! 931: ! 932: else if ( ch == transsym ) ! 933: /* next 2 */ goto bottom; ! 934: } ! 935: ! 936: /* didn't find transsym in ccl */ ! 937: nset[++numstates] = tsp; ! 938: } ! 939: ! 940: else ! 941: for ( j = 0; j < lenccl; ++j ) ! 942: { ! 943: ch = ccltbl[ccllist + j]; ! 944: ! 945: if ( ch == 0 ) ! 946: ch = NUL_ec; ! 947: ! 948: if ( ch > transsym ) ! 949: break; ! 950: ! 951: else if ( ch == transsym ) ! 952: { ! 953: nset[++numstates] = tsp; ! 954: break; ! 955: } ! 956: } ! 957: } ! 958: ! 959: else if ( sym >= 'A' && sym <= 'Z' && caseins ) ! 960: flexfatal( "consistency check failed in symfollowset" ); ! 961: ! 962: else if ( sym == SYM_EPSILON ) ! 963: { /* do nothing */ ! 964: } ! 965: ! 966: else if ( abs( ecgroup[sym] ) == transsym ) ! 967: nset[++numstates] = tsp; ! 968: ! 969: bottom: ! 970: ; ! 971: } ! 972: ! 973: return ( numstates ); ! 974: } ! 975: ! 976: ! 977: /* sympartition - partition characters with same out-transitions ! 978: * ! 979: * synopsis ! 980: * integer ds[current_max_dfa_size], numstates, duplist[numecs]; ! 981: * symlist[numecs]; ! 982: * sympartition( ds, numstates, symlist, duplist ); ! 983: */ ! 984: ! 985: void sympartition( ds, numstates, symlist, duplist ) ! 986: int ds[], numstates, duplist[]; ! 987: int symlist[]; ! 988: ! 989: { ! 990: int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; ! 991: ! 992: /* partitioning is done by creating equivalence classes for those ! 993: * characters which have out-transitions from the given state. Thus ! 994: * we are really creating equivalence classes of equivalence classes. ! 995: */ ! 996: ! 997: for ( i = 1; i <= numecs; ++i ) ! 998: { /* initialize equivalence class list */ ! 999: duplist[i] = i - 1; ! 1000: dupfwd[i] = i + 1; ! 1001: } ! 1002: ! 1003: duplist[1] = NIL; ! 1004: dupfwd[numecs] = NIL; ! 1005: ! 1006: for ( i = 1; i <= numstates; ++i ) ! 1007: { ! 1008: ns = ds[i]; ! 1009: tch = transchar[ns]; ! 1010: ! 1011: if ( tch != SYM_EPSILON ) ! 1012: { ! 1013: if ( tch < -lastccl || tch > csize ) ! 1014: { ! 1015: if ( tch > csize && tch <= CSIZE ) ! 1016: flexerror( "scanner requires -8 flag" ); ! 1017: ! 1018: else ! 1019: flexfatal( ! 1020: "bad transition character detected in sympartition()" ); ! 1021: } ! 1022: ! 1023: if ( tch >= 0 ) ! 1024: { /* character transition */ ! 1025: /* abs() needed for fake %t ec's */ ! 1026: int ec = abs( ecgroup[tch] ); ! 1027: ! 1028: mkechar( ec, dupfwd, duplist ); ! 1029: symlist[ec] = 1; ! 1030: } ! 1031: ! 1032: else ! 1033: { /* character class */ ! 1034: tch = -tch; ! 1035: ! 1036: lenccl = ccllen[tch]; ! 1037: cclp = cclmap[tch]; ! 1038: mkeccl( ccltbl + cclp, lenccl, dupfwd, duplist, numecs, ! 1039: NUL_ec ); ! 1040: ! 1041: if ( cclng[tch] ) ! 1042: { ! 1043: j = 0; ! 1044: ! 1045: for ( k = 0; k < lenccl; ++k ) ! 1046: { ! 1047: ich = ccltbl[cclp + k]; ! 1048: ! 1049: if ( ich == 0 ) ! 1050: ich = NUL_ec; ! 1051: ! 1052: for ( ++j; j < ich; ++j ) ! 1053: symlist[j] = 1; ! 1054: } ! 1055: ! 1056: for ( ++j; j <= numecs; ++j ) ! 1057: symlist[j] = 1; ! 1058: } ! 1059: ! 1060: else ! 1061: for ( k = 0; k < lenccl; ++k ) ! 1062: { ! 1063: ich = ccltbl[cclp + k]; ! 1064: ! 1065: if ( ich == 0 ) ! 1066: ich = NUL_ec; ! 1067: ! 1068: symlist[ich] = 1; ! 1069: } ! 1070: } ! 1071: } ! 1072: } ! 1073: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.