|
|
1.1 ! root 1: /*- ! 2: * Copyright (c) 1990 The Regents of the University of California. ! 3: * All rights reserved. ! 4: * ! 5: * This code is derived from software contributed to Berkeley by ! 6: * Vern Paxson. ! 7: * ! 8: * The United States Government has rights in this work pursuant ! 9: * to contract no. DE-AC03-76SF00098 between the United States ! 10: * Department of Energy and the University of California. ! 11: * ! 12: * Redistribution and use in source and binary forms are permitted provided ! 13: * that: (1) source distributions retain this entire copyright notice and ! 14: * comment, and (2) distributions including binaries display the following ! 15: * acknowledgement: ``This product includes software developed by the ! 16: * University of California, Berkeley and its contributors'' in the ! 17: * documentation or other materials provided with the distribution and in ! 18: * all advertising materials mentioning features or use of this software. ! 19: * Neither the name of the University nor the names of its contributors may ! 20: * be used to endorse or promote products derived from this software without ! 21: * specific prior written permission. ! 22: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED ! 23: * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF ! 24: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. ! 25: */ ! 26: ! 27: #ifndef lint ! 28: static char sccsid[] = "@(#)nfa.c 5.2 (Berkeley) 6/18/90"; ! 29: #endif /* not lint */ ! 30: ! 31: /* nfa - NFA construction routines */ ! 32: ! 33: #include "flexdef.h" ! 34: ! 35: /* declare functions that have forward references */ ! 36: ! 37: int dupmachine PROTO((int)); ! 38: void mkxtion PROTO((int, int)); ! 39: ! 40: ! 41: /* add_accept - add an accepting state to a machine ! 42: * ! 43: * synopsis ! 44: * ! 45: * add_accept( mach, accepting_number ); ! 46: * ! 47: * accepting_number becomes mach's accepting number. ! 48: */ ! 49: ! 50: void add_accept( mach, accepting_number ) ! 51: int mach, accepting_number; ! 52: ! 53: { ! 54: /* hang the accepting number off an epsilon state. if it is associated ! 55: * with a state that has a non-epsilon out-transition, then the state ! 56: * will accept BEFORE it makes that transition, i.e., one character ! 57: * too soon ! 58: */ ! 59: ! 60: if ( transchar[finalst[mach]] == SYM_EPSILON ) ! 61: accptnum[finalst[mach]] = accepting_number; ! 62: ! 63: else ! 64: { ! 65: int astate = mkstate( SYM_EPSILON ); ! 66: accptnum[astate] = accepting_number; ! 67: mach = link_machines( mach, astate ); ! 68: } ! 69: } ! 70: ! 71: ! 72: /* copysingl - make a given number of copies of a singleton machine ! 73: * ! 74: * synopsis ! 75: * ! 76: * newsng = copysingl( singl, num ); ! 77: * ! 78: * newsng - a new singleton composed of num copies of singl ! 79: * singl - a singleton machine ! 80: * num - the number of copies of singl to be present in newsng ! 81: */ ! 82: ! 83: int copysingl( singl, num ) ! 84: int singl, num; ! 85: ! 86: { ! 87: int copy, i; ! 88: ! 89: copy = mkstate( SYM_EPSILON ); ! 90: ! 91: for ( i = 1; i <= num; ++i ) ! 92: copy = link_machines( copy, dupmachine( singl ) ); ! 93: ! 94: return ( copy ); ! 95: } ! 96: ! 97: ! 98: /* dumpnfa - debugging routine to write out an nfa ! 99: * ! 100: * synopsis ! 101: * int state1; ! 102: * dumpnfa( state1 ); ! 103: */ ! 104: ! 105: void dumpnfa( state1 ) ! 106: int state1; ! 107: ! 108: { ! 109: int sym, tsp1, tsp2, anum, ns; ! 110: ! 111: fprintf( stderr, "\n\n********** beginning dump of nfa with start state %d\n", ! 112: state1 ); ! 113: ! 114: /* we probably should loop starting at firstst[state1] and going to ! 115: * lastst[state1], but they're not maintained properly when we "or" ! 116: * all of the rules together. So we use our knowledge that the machine ! 117: * starts at state 1 and ends at lastnfa. ! 118: */ ! 119: ! 120: /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */ ! 121: for ( ns = 1; ns <= lastnfa; ++ns ) ! 122: { ! 123: fprintf( stderr, "state # %4d\t", ns ); ! 124: ! 125: sym = transchar[ns]; ! 126: tsp1 = trans1[ns]; ! 127: tsp2 = trans2[ns]; ! 128: anum = accptnum[ns]; ! 129: ! 130: fprintf( stderr, "%3d: %4d, %4d", sym, tsp1, tsp2 ); ! 131: ! 132: if ( anum != NIL ) ! 133: fprintf( stderr, " [%d]", anum ); ! 134: ! 135: fprintf( stderr, "\n" ); ! 136: } ! 137: ! 138: fprintf( stderr, "********** end of dump\n" ); ! 139: } ! 140: ! 141: ! 142: /* dupmachine - make a duplicate of a given machine ! 143: * ! 144: * synopsis ! 145: * ! 146: * copy = dupmachine( mach ); ! 147: * ! 148: * copy - holds duplicate of mach ! 149: * mach - machine to be duplicated ! 150: * ! 151: * note that the copy of mach is NOT an exact duplicate; rather, all the ! 152: * transition states values are adjusted so that the copy is self-contained, ! 153: * as the original should have been. ! 154: * ! 155: * also note that the original MUST be contiguous, with its low and high ! 156: * states accessible by the arrays firstst and lastst ! 157: */ ! 158: ! 159: int dupmachine( mach ) ! 160: int mach; ! 161: ! 162: { ! 163: int i, init, state_offset; ! 164: int state = 0; ! 165: int last = lastst[mach]; ! 166: ! 167: for ( i = firstst[mach]; i <= last; ++i ) ! 168: { ! 169: state = mkstate( transchar[i] ); ! 170: ! 171: if ( trans1[i] != NO_TRANSITION ) ! 172: { ! 173: mkxtion( finalst[state], trans1[i] + state - i ); ! 174: ! 175: if ( transchar[i] == SYM_EPSILON && trans2[i] != NO_TRANSITION ) ! 176: mkxtion( finalst[state], trans2[i] + state - i ); ! 177: } ! 178: ! 179: accptnum[state] = accptnum[i]; ! 180: } ! 181: ! 182: if ( state == 0 ) ! 183: flexfatal( "empty machine in dupmachine()" ); ! 184: ! 185: state_offset = state - i + 1; ! 186: ! 187: init = mach + state_offset; ! 188: firstst[init] = firstst[mach] + state_offset; ! 189: finalst[init] = finalst[mach] + state_offset; ! 190: lastst[init] = lastst[mach] + state_offset; ! 191: ! 192: return ( init ); ! 193: } ! 194: ! 195: ! 196: /* finish_rule - finish up the processing for a rule ! 197: * ! 198: * synopsis ! 199: * ! 200: * finish_rule( mach, variable_trail_rule, headcnt, trailcnt ); ! 201: * ! 202: * An accepting number is added to the given machine. If variable_trail_rule ! 203: * is true then the rule has trailing context and both the head and trail ! 204: * are variable size. Otherwise if headcnt or trailcnt is non-zero then ! 205: * the machine recognizes a pattern with trailing context and headcnt is ! 206: * the number of characters in the matched part of the pattern, or zero ! 207: * if the matched part has variable length. trailcnt is the number of ! 208: * trailing context characters in the pattern, or zero if the trailing ! 209: * context has variable length. ! 210: */ ! 211: ! 212: void finish_rule( mach, variable_trail_rule, headcnt, trailcnt ) ! 213: int mach, variable_trail_rule, headcnt, trailcnt; ! 214: ! 215: { ! 216: add_accept( mach, num_rules ); ! 217: ! 218: /* we did this in new_rule(), but it often gets the wrong ! 219: * number because we do it before we start parsing the current rule ! 220: */ ! 221: rule_linenum[num_rules] = linenum; ! 222: ! 223: /* if this is a continued action, then the line-number has ! 224: * already been updated, giving us the wrong number ! 225: */ ! 226: if ( continued_action ) ! 227: --rule_linenum[num_rules]; ! 228: ! 229: fprintf( temp_action_file, "case %d:\n", num_rules ); ! 230: ! 231: if ( variable_trail_rule ) ! 232: { ! 233: rule_type[num_rules] = RULE_VARIABLE; ! 234: ! 235: if ( performance_report ) ! 236: fprintf( stderr, "Variable trailing context rule at line %d\n", ! 237: rule_linenum[num_rules] ); ! 238: ! 239: variable_trailing_context_rules = true; ! 240: } ! 241: ! 242: else ! 243: { ! 244: rule_type[num_rules] = RULE_NORMAL; ! 245: ! 246: if ( headcnt > 0 || trailcnt > 0 ) ! 247: { ! 248: /* do trailing context magic to not match the trailing characters */ ! 249: char *scanner_cp = "yy_c_buf_p = yy_cp"; ! 250: char *scanner_bp = "yy_bp"; ! 251: ! 252: fprintf( temp_action_file, ! 253: "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" ); ! 254: ! 255: if ( headcnt > 0 ) ! 256: fprintf( temp_action_file, "%s = %s + %d;\n", ! 257: scanner_cp, scanner_bp, headcnt ); ! 258: ! 259: else ! 260: fprintf( temp_action_file, ! 261: "%s -= %d;\n", scanner_cp, trailcnt ); ! 262: ! 263: fprintf( temp_action_file, ! 264: "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" ); ! 265: } ! 266: } ! 267: ! 268: line_directive_out( temp_action_file ); ! 269: } ! 270: ! 271: ! 272: /* link_machines - connect two machines together ! 273: * ! 274: * synopsis ! 275: * ! 276: * new = link_machines( first, last ); ! 277: * ! 278: * new - a machine constructed by connecting first to last ! 279: * first - the machine whose successor is to be last ! 280: * last - the machine whose predecessor is to be first ! 281: * ! 282: * note: this routine concatenates the machine first with the machine ! 283: * last to produce a machine new which will pattern-match first first ! 284: * and then last, and will fail if either of the sub-patterns fails. ! 285: * FIRST is set to new by the operation. last is unmolested. ! 286: */ ! 287: ! 288: int link_machines( first, last ) ! 289: int first, last; ! 290: ! 291: { ! 292: if ( first == NIL ) ! 293: return ( last ); ! 294: ! 295: else if ( last == NIL ) ! 296: return ( first ); ! 297: ! 298: else ! 299: { ! 300: mkxtion( finalst[first], last ); ! 301: finalst[first] = finalst[last]; ! 302: lastst[first] = max( lastst[first], lastst[last] ); ! 303: firstst[first] = min( firstst[first], firstst[last] ); ! 304: ! 305: return ( first ); ! 306: } ! 307: } ! 308: ! 309: ! 310: /* mark_beginning_as_normal - mark each "beginning" state in a machine ! 311: * as being a "normal" (i.e., not trailing context- ! 312: * associated) states ! 313: * ! 314: * synopsis ! 315: * ! 316: * mark_beginning_as_normal( mach ) ! 317: * ! 318: * mach - machine to mark ! 319: * ! 320: * The "beginning" states are the epsilon closure of the first state ! 321: */ ! 322: ! 323: void mark_beginning_as_normal( mach ) ! 324: register int mach; ! 325: ! 326: { ! 327: switch ( state_type[mach] ) ! 328: { ! 329: case STATE_NORMAL: ! 330: /* oh, we've already visited here */ ! 331: return; ! 332: ! 333: case STATE_TRAILING_CONTEXT: ! 334: state_type[mach] = STATE_NORMAL; ! 335: ! 336: if ( transchar[mach] == SYM_EPSILON ) ! 337: { ! 338: if ( trans1[mach] != NO_TRANSITION ) ! 339: mark_beginning_as_normal( trans1[mach] ); ! 340: ! 341: if ( trans2[mach] != NO_TRANSITION ) ! 342: mark_beginning_as_normal( trans2[mach] ); ! 343: } ! 344: break; ! 345: ! 346: default: ! 347: flexerror( "bad state type in mark_beginning_as_normal()" ); ! 348: break; ! 349: } ! 350: } ! 351: ! 352: ! 353: /* mkbranch - make a machine that branches to two machines ! 354: * ! 355: * synopsis ! 356: * ! 357: * branch = mkbranch( first, second ); ! 358: * ! 359: * branch - a machine which matches either first's pattern or second's ! 360: * first, second - machines whose patterns are to be or'ed (the | operator) ! 361: * ! 362: * note that first and second are NEITHER destroyed by the operation. Also, ! 363: * the resulting machine CANNOT be used with any other "mk" operation except ! 364: * more mkbranch's. Compare with mkor() ! 365: */ ! 366: ! 367: int mkbranch( first, second ) ! 368: int first, second; ! 369: ! 370: { ! 371: int eps; ! 372: ! 373: if ( first == NO_TRANSITION ) ! 374: return ( second ); ! 375: ! 376: else if ( second == NO_TRANSITION ) ! 377: return ( first ); ! 378: ! 379: eps = mkstate( SYM_EPSILON ); ! 380: ! 381: mkxtion( eps, first ); ! 382: mkxtion( eps, second ); ! 383: ! 384: return ( eps ); ! 385: } ! 386: ! 387: ! 388: /* mkclos - convert a machine into a closure ! 389: * ! 390: * synopsis ! 391: * new = mkclos( state ); ! 392: * ! 393: * new - a new state which matches the closure of "state" ! 394: */ ! 395: ! 396: int mkclos( state ) ! 397: int state; ! 398: ! 399: { ! 400: return ( mkopt( mkposcl( state ) ) ); ! 401: } ! 402: ! 403: ! 404: /* mkopt - make a machine optional ! 405: * ! 406: * synopsis ! 407: * ! 408: * new = mkopt( mach ); ! 409: * ! 410: * new - a machine which optionally matches whatever mach matched ! 411: * mach - the machine to make optional ! 412: * ! 413: * notes: ! 414: * 1. mach must be the last machine created ! 415: * 2. mach is destroyed by the call ! 416: */ ! 417: ! 418: int mkopt( mach ) ! 419: int mach; ! 420: ! 421: { ! 422: int eps; ! 423: ! 424: if ( ! SUPER_FREE_EPSILON(finalst[mach]) ) ! 425: { ! 426: eps = mkstate( SYM_EPSILON ); ! 427: mach = link_machines( mach, eps ); ! 428: } ! 429: ! 430: /* can't skimp on the following if FREE_EPSILON(mach) is true because ! 431: * some state interior to "mach" might point back to the beginning ! 432: * for a closure ! 433: */ ! 434: eps = mkstate( SYM_EPSILON ); ! 435: mach = link_machines( eps, mach ); ! 436: ! 437: mkxtion( mach, finalst[mach] ); ! 438: ! 439: return ( mach ); ! 440: } ! 441: ! 442: ! 443: /* mkor - make a machine that matches either one of two machines ! 444: * ! 445: * synopsis ! 446: * ! 447: * new = mkor( first, second ); ! 448: * ! 449: * new - a machine which matches either first's pattern or second's ! 450: * first, second - machines whose patterns are to be or'ed (the | operator) ! 451: * ! 452: * note that first and second are both destroyed by the operation ! 453: * the code is rather convoluted because an attempt is made to minimize ! 454: * the number of epsilon states needed ! 455: */ ! 456: ! 457: int mkor( first, second ) ! 458: int first, second; ! 459: ! 460: { ! 461: int eps, orend; ! 462: ! 463: if ( first == NIL ) ! 464: return ( second ); ! 465: ! 466: else if ( second == NIL ) ! 467: return ( first ); ! 468: ! 469: else ! 470: { ! 471: /* see comment in mkopt() about why we can't use the first state ! 472: * of "first" or "second" if they satisfy "FREE_EPSILON" ! 473: */ ! 474: eps = mkstate( SYM_EPSILON ); ! 475: ! 476: first = link_machines( eps, first ); ! 477: ! 478: mkxtion( first, second ); ! 479: ! 480: if ( SUPER_FREE_EPSILON(finalst[first]) && ! 481: accptnum[finalst[first]] == NIL ) ! 482: { ! 483: orend = finalst[first]; ! 484: mkxtion( finalst[second], orend ); ! 485: } ! 486: ! 487: else if ( SUPER_FREE_EPSILON(finalst[second]) && ! 488: accptnum[finalst[second]] == NIL ) ! 489: { ! 490: orend = finalst[second]; ! 491: mkxtion( finalst[first], orend ); ! 492: } ! 493: ! 494: else ! 495: { ! 496: eps = mkstate( SYM_EPSILON ); ! 497: ! 498: first = link_machines( first, eps ); ! 499: orend = finalst[first]; ! 500: ! 501: mkxtion( finalst[second], orend ); ! 502: } ! 503: } ! 504: ! 505: finalst[first] = orend; ! 506: return ( first ); ! 507: } ! 508: ! 509: ! 510: /* mkposcl - convert a machine into a positive closure ! 511: * ! 512: * synopsis ! 513: * new = mkposcl( state ); ! 514: * ! 515: * new - a machine matching the positive closure of "state" ! 516: */ ! 517: ! 518: int mkposcl( state ) ! 519: int state; ! 520: ! 521: { ! 522: int eps; ! 523: ! 524: if ( SUPER_FREE_EPSILON(finalst[state]) ) ! 525: { ! 526: mkxtion( finalst[state], state ); ! 527: return ( state ); ! 528: } ! 529: ! 530: else ! 531: { ! 532: eps = mkstate( SYM_EPSILON ); ! 533: mkxtion( eps, state ); ! 534: return ( link_machines( state, eps ) ); ! 535: } ! 536: } ! 537: ! 538: ! 539: /* mkrep - make a replicated machine ! 540: * ! 541: * synopsis ! 542: * new = mkrep( mach, lb, ub ); ! 543: * ! 544: * new - a machine that matches whatever "mach" matched from "lb" ! 545: * number of times to "ub" number of times ! 546: * ! 547: * note ! 548: * if "ub" is INFINITY then "new" matches "lb" or more occurrences of "mach" ! 549: */ ! 550: ! 551: int mkrep( mach, lb, ub ) ! 552: int mach, lb, ub; ! 553: ! 554: { ! 555: int base_mach, tail, copy, i; ! 556: ! 557: base_mach = copysingl( mach, lb - 1 ); ! 558: ! 559: if ( ub == INFINITY ) ! 560: { ! 561: copy = dupmachine( mach ); ! 562: mach = link_machines( mach, ! 563: link_machines( base_mach, mkclos( copy ) ) ); ! 564: } ! 565: ! 566: else ! 567: { ! 568: tail = mkstate( SYM_EPSILON ); ! 569: ! 570: for ( i = lb; i < ub; ++i ) ! 571: { ! 572: copy = dupmachine( mach ); ! 573: tail = mkopt( link_machines( copy, tail ) ); ! 574: } ! 575: ! 576: mach = link_machines( mach, link_machines( base_mach, tail ) ); ! 577: } ! 578: ! 579: return ( mach ); ! 580: } ! 581: ! 582: ! 583: /* mkstate - create a state with a transition on a given symbol ! 584: * ! 585: * synopsis ! 586: * ! 587: * state = mkstate( sym ); ! 588: * ! 589: * state - a new state matching sym ! 590: * sym - the symbol the new state is to have an out-transition on ! 591: * ! 592: * note that this routine makes new states in ascending order through the ! 593: * state array (and increments LASTNFA accordingly). The routine DUPMACHINE ! 594: * relies on machines being made in ascending order and that they are ! 595: * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge ! 596: * that it admittedly is) ! 597: */ ! 598: ! 599: int mkstate( sym ) ! 600: int sym; ! 601: ! 602: { ! 603: if ( ++lastnfa >= current_mns ) ! 604: { ! 605: if ( (current_mns += MNS_INCREMENT) >= MAXIMUM_MNS ) ! 606: lerrif( "input rules are too complicated (>= %d NFA states)", ! 607: current_mns ); ! 608: ! 609: ++num_reallocs; ! 610: ! 611: firstst = reallocate_integer_array( firstst, current_mns ); ! 612: lastst = reallocate_integer_array( lastst, current_mns ); ! 613: finalst = reallocate_integer_array( finalst, current_mns ); ! 614: transchar = reallocate_integer_array( transchar, current_mns ); ! 615: trans1 = reallocate_integer_array( trans1, current_mns ); ! 616: trans2 = reallocate_integer_array( trans2, current_mns ); ! 617: accptnum = reallocate_integer_array( accptnum, current_mns ); ! 618: assoc_rule = reallocate_integer_array( assoc_rule, current_mns ); ! 619: state_type = reallocate_integer_array( state_type, current_mns ); ! 620: } ! 621: ! 622: firstst[lastnfa] = lastnfa; ! 623: finalst[lastnfa] = lastnfa; ! 624: lastst[lastnfa] = lastnfa; ! 625: transchar[lastnfa] = sym; ! 626: trans1[lastnfa] = NO_TRANSITION; ! 627: trans2[lastnfa] = NO_TRANSITION; ! 628: accptnum[lastnfa] = NIL; ! 629: assoc_rule[lastnfa] = num_rules; ! 630: state_type[lastnfa] = current_state_type; ! 631: ! 632: /* fix up equivalence classes base on this transition. Note that any ! 633: * character which has its own transition gets its own equivalence class. ! 634: * Thus only characters which are only in character classes have a chance ! 635: * at being in the same equivalence class. E.g. "a|b" puts 'a' and 'b' ! 636: * into two different equivalence classes. "[ab]" puts them in the same ! 637: * equivalence class (barring other differences elsewhere in the input). ! 638: */ ! 639: ! 640: if ( sym < 0 ) ! 641: { ! 642: /* we don't have to update the equivalence classes since that was ! 643: * already done when the ccl was created for the first time ! 644: */ ! 645: } ! 646: ! 647: else if ( sym == SYM_EPSILON ) ! 648: ++numeps; ! 649: ! 650: else ! 651: { ! 652: if ( useecs ) ! 653: /* map NUL's to csize */ ! 654: mkechar( sym ? sym : csize, nextecm, ecgroup ); ! 655: } ! 656: ! 657: return ( lastnfa ); ! 658: } ! 659: ! 660: ! 661: /* mkxtion - make a transition from one state to another ! 662: * ! 663: * synopsis ! 664: * ! 665: * mkxtion( statefrom, stateto ); ! 666: * ! 667: * statefrom - the state from which the transition is to be made ! 668: * stateto - the state to which the transition is to be made ! 669: */ ! 670: ! 671: void mkxtion( statefrom, stateto ) ! 672: int statefrom, stateto; ! 673: ! 674: { ! 675: if ( trans1[statefrom] == NO_TRANSITION ) ! 676: trans1[statefrom] = stateto; ! 677: ! 678: else if ( (transchar[statefrom] != SYM_EPSILON) || ! 679: (trans2[statefrom] != NO_TRANSITION) ) ! 680: flexfatal( "found too many transitions in mkxtion()" ); ! 681: ! 682: else ! 683: { /* second out-transition for an epsilon state */ ! 684: ++eps2; ! 685: trans2[statefrom] = stateto; ! 686: } ! 687: } ! 688: ! 689: /* new_rule - initialize for a new rule ! 690: * ! 691: * synopsis ! 692: * ! 693: * new_rule(); ! 694: * ! 695: * the global num_rules is incremented and the any corresponding dynamic ! 696: * arrays (such as rule_type[]) are grown as needed. ! 697: */ ! 698: ! 699: void new_rule() ! 700: ! 701: { ! 702: if ( ++num_rules >= current_max_rules ) ! 703: { ! 704: ++num_reallocs; ! 705: current_max_rules += MAX_RULES_INCREMENT; ! 706: rule_type = reallocate_integer_array( rule_type, current_max_rules ); ! 707: rule_linenum = ! 708: reallocate_integer_array( rule_linenum, current_max_rules ); ! 709: } ! 710: ! 711: if ( num_rules > MAX_RULE ) ! 712: lerrif( "too many rules (> %d)!", MAX_RULE ); ! 713: ! 714: rule_linenum[num_rules] = linenum; ! 715: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.