Annotation of 43BSDReno/pgrm/lex/dfa.c, revision 1.1

1.1     ! root        1: /*-
        !             2:  * Copyright (c) 1990 The Regents of the University of California.
        !             3:  * All rights reserved.
        !             4:  *
        !             5:  * This code is derived from software contributed to Berkeley by
        !             6:  * Vern Paxson.
        !             7:  * 
        !             8:  * The United States Government has rights in this work pursuant
        !             9:  * to contract no. DE-AC03-76SF00098 between the United States
        !            10:  * Department of Energy and the University of California.
        !            11:  *
        !            12:  * Redistribution and use in source and binary forms are permitted provided
        !            13:  * that: (1) source distributions retain this entire copyright notice and
        !            14:  * comment, and (2) distributions including binaries display the following
        !            15:  * acknowledgement:  ``This product includes software developed by the
        !            16:  * University of California, Berkeley and its contributors'' in the
        !            17:  * documentation or other materials provided with the distribution and in
        !            18:  * all advertising materials mentioning features or use of this software.
        !            19:  * Neither the name of the University nor the names of its contributors may
        !            20:  * be used to endorse or promote products derived from this software without
        !            21:  * specific prior written permission.
        !            22:  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
        !            23:  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
        !            24:  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
        !            25:  */
        !            26: 
        !            27: #ifndef lint
        !            28: static char sccsid[] = "@(#)dfa.c      5.2 (Berkeley) 6/18/90";
        !            29: #endif /* not lint */
        !            30: 
        !            31: /* dfa - DFA construction routines */
        !            32: 
        !            33: #include "flexdef.h"
        !            34: 
        !            35: /* declare functions that have forward references */
        !            36: 
        !            37: void dump_associated_rules PROTO((FILE*, int));
        !            38: void dump_transitions PROTO((FILE*, int[]));
        !            39: void sympartition PROTO((int[], int, int[], int[]));
        !            40: int symfollowset PROTO((int[], int, int, int[]));
        !            41: 
        !            42: 
        !            43: /* check_for_backtracking - check a DFA state for backtracking
        !            44:  *
        !            45:  * synopsis
        !            46:  *     int ds, state[numecs];
        !            47:  *     check_for_backtracking( ds, state );
        !            48:  *
        !            49:  * ds is the number of the state to check and state[] is its out-transitions,
        !            50:  * indexed by equivalence class, and state_rules[] is the set of rules
        !            51:  * associated with this state
        !            52:  */
        !            53: 
        !            54: void check_for_backtracking( ds, state )
        !            55: int ds;
        !            56: int state[];
        !            57: 
        !            58:     {
        !            59:     if ( (reject && ! dfaacc[ds].dfaacc_set) || ! dfaacc[ds].dfaacc_state )
        !            60:        { /* state is non-accepting */
        !            61:        ++num_backtracking;
        !            62: 
        !            63:        if ( backtrack_report )
        !            64:            {
        !            65:            fprintf( backtrack_file, "State #%d is non-accepting -\n", ds );
        !            66: 
        !            67:            /* identify the state */
        !            68:            dump_associated_rules( backtrack_file, ds );
        !            69: 
        !            70:            /* now identify it further using the out- and jam-transitions */
        !            71:            dump_transitions( backtrack_file, state );
        !            72: 
        !            73:            putc( '\n', backtrack_file );
        !            74:            }
        !            75:        }
        !            76:     }
        !            77: 
        !            78: 
        !            79: /* check_trailing_context - check to see if NFA state set constitutes
        !            80:  *                          "dangerous" trailing context
        !            81:  *
        !            82:  * synopsis
        !            83:  *    int nfa_states[num_states+1], num_states;
        !            84:  *    int accset[nacc+1], nacc;
        !            85:  *    check_trailing_context( nfa_states, num_states, accset, nacc );
        !            86:  *
        !            87:  * NOTES
        !            88:  *    Trailing context is "dangerous" if both the head and the trailing
        !            89:  *  part are of variable size \and/ there's a DFA state which contains
        !            90:  *  both an accepting state for the head part of the rule and NFA states
        !            91:  *  which occur after the beginning of the trailing context.
        !            92:  *  When such a rule is matched, it's impossible to tell if having been
        !            93:  *  in the DFA state indicates the beginning of the trailing context
        !            94:  *  or further-along scanning of the pattern.  In these cases, a warning
        !            95:  *  message is issued.
        !            96:  *
        !            97:  *    nfa_states[1 .. num_states] is the list of NFA states in the DFA.
        !            98:  *    accset[1 .. nacc] is the list of accepting numbers for the DFA state.
        !            99:  */
        !           100: 
        !           101: void check_trailing_context( nfa_states, num_states, accset, nacc )
        !           102: int *nfa_states, num_states;
        !           103: int *accset;
        !           104: register int nacc;
        !           105: 
        !           106:     {
        !           107:     register int i, j;
        !           108: 
        !           109:     for ( i = 1; i <= num_states; ++i )
        !           110:        {
        !           111:        int ns = nfa_states[i];
        !           112:        register int type = state_type[ns];
        !           113:        register int ar = assoc_rule[ns];
        !           114: 
        !           115:        if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE )
        !           116:            { /* do nothing */
        !           117:            }
        !           118: 
        !           119:        else if ( type == STATE_TRAILING_CONTEXT )
        !           120:            {
        !           121:            /* potential trouble.  Scan set of accepting numbers for
        !           122:             * the one marking the end of the "head".  We assume that
        !           123:             * this looping will be fairly cheap since it's rare that
        !           124:             * an accepting number set is large.
        !           125:             */
        !           126:            for ( j = 1; j <= nacc; ++j )
        !           127:                if ( accset[j] & YY_TRAILING_HEAD_MASK )
        !           128:                    {
        !           129:                    fprintf( stderr,
        !           130:                     "%s: Dangerous trailing context in rule at line %d\n",
        !           131:                             program_name, rule_linenum[ar] );
        !           132:                    return;
        !           133:                    }
        !           134:            }
        !           135:        }
        !           136:     }
        !           137: 
        !           138: 
        !           139: /* dump_associated_rules - list the rules associated with a DFA state
        !           140:  *
        !           141:  * synopisis
        !           142:  *     int ds;
        !           143:  *     FILE *file;
        !           144:  *     dump_associated_rules( file, ds );
        !           145:  *
        !           146:  * goes through the set of NFA states associated with the DFA and
        !           147:  * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
        !           148:  * and writes a report to the given file
        !           149:  */
        !           150: 
        !           151: void dump_associated_rules( file, ds )
        !           152: FILE *file;
        !           153: int ds;
        !           154: 
        !           155:     {
        !           156:     register int i, j;
        !           157:     register int num_associated_rules = 0;
        !           158:     int rule_set[MAX_ASSOC_RULES + 1];
        !           159:     int *dset = dss[ds];
        !           160:     int size = dfasiz[ds];
        !           161:     
        !           162:     for ( i = 1; i <= size; ++i )
        !           163:        {
        !           164:        register rule_num = rule_linenum[assoc_rule[dset[i]]];
        !           165: 
        !           166:        for ( j = 1; j <= num_associated_rules; ++j )
        !           167:            if ( rule_num == rule_set[j] )
        !           168:                break;
        !           169: 
        !           170:        if ( j > num_associated_rules )
        !           171:            { /* new rule */
        !           172:            if ( num_associated_rules < MAX_ASSOC_RULES )
        !           173:                rule_set[++num_associated_rules] = rule_num;
        !           174:            }
        !           175:        }
        !           176: 
        !           177:     bubble( rule_set, num_associated_rules );
        !           178: 
        !           179:     fprintf( file, " associated rule line numbers:" );
        !           180: 
        !           181:     for ( i = 1; i <= num_associated_rules; ++i )
        !           182:        {
        !           183:        if ( i % 8 == 1 )
        !           184:            putc( '\n', file );
        !           185:        
        !           186:        fprintf( file, "\t%d", rule_set[i] );
        !           187:        }
        !           188:     
        !           189:     putc( '\n', file );
        !           190:     }
        !           191: 
        !           192: 
        !           193: /* dump_transitions - list the transitions associated with a DFA state
        !           194:  *
        !           195:  * synopisis
        !           196:  *     int state[numecs];
        !           197:  *     FILE *file;
        !           198:  *     dump_transitions( file, state );
        !           199:  *
        !           200:  * goes through the set of out-transitions and lists them in human-readable
        !           201:  * form (i.e., not as equivalence classes); also lists jam transitions
        !           202:  * (i.e., all those which are not out-transitions, plus EOF).  The dump
        !           203:  * is done to the given file.
        !           204:  */
        !           205: 
        !           206: void dump_transitions( file, state )
        !           207: FILE *file;
        !           208: int state[];
        !           209: 
        !           210:     {
        !           211:     register int i, ec;
        !           212:     int out_char_set[CSIZE];
        !           213: 
        !           214:     for ( i = 0; i < csize; ++i )
        !           215:        {
        !           216:        ec = abs( ecgroup[i] );
        !           217:        out_char_set[i] = state[ec];
        !           218:        }
        !           219:     
        !           220:     fprintf( file, " out-transitions: " );
        !           221: 
        !           222:     list_character_set( file, out_char_set );
        !           223: 
        !           224:     /* now invert the members of the set to get the jam transitions */
        !           225:     for ( i = 0; i < csize; ++i )
        !           226:        out_char_set[i] = ! out_char_set[i];
        !           227: 
        !           228:     fprintf( file, "\n jam-transitions: EOF " );
        !           229: 
        !           230:     list_character_set( file, out_char_set );
        !           231: 
        !           232:     putc( '\n', file );
        !           233:     }
        !           234: 
        !           235: 
        !           236: /* epsclosure - construct the epsilon closure of a set of ndfa states
        !           237:  *
        !           238:  * synopsis
        !           239:  *    int t[current_max_dfa_size], numstates, accset[num_rules + 1], nacc;
        !           240:  *    int hashval;
        !           241:  *    int *epsclosure();
        !           242:  *    t = epsclosure( t, &numstates, accset, &nacc, &hashval );
        !           243:  *
        !           244:  * NOTES
        !           245:  *    the epsilon closure is the set of all states reachable by an arbitrary
        !           246:  *  number of epsilon transitions which themselves do not have epsilon
        !           247:  *  transitions going out, unioned with the set of states which have non-null
        !           248:  *  accepting numbers.  t is an array of size numstates of nfa state numbers.
        !           249:  *  Upon return, t holds the epsilon closure and numstates is updated.  accset
        !           250:  *  holds a list of the accepting numbers, and the size of accset is given
        !           251:  *  by nacc.  t may be subjected to reallocation if it is not large enough
        !           252:  *  to hold the epsilon closure.
        !           253:  *
        !           254:  *    hashval is the hash value for the dfa corresponding to the state set
        !           255:  */
        !           256: 
        !           257: int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr )
        !           258: int *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
        !           259: 
        !           260:     {
        !           261:     register int stkpos, ns, tsp;
        !           262:     int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
        !           263:     int stkend, nstate;
        !           264:     static int did_stk_init = false, *stk; 
        !           265: 
        !           266: #define MARK_STATE(state) \
        !           267:        trans1[state] = trans1[state] - MARKER_DIFFERENCE;
        !           268: 
        !           269: #define IS_MARKED(state) (trans1[state] < 0)
        !           270: 
        !           271: #define UNMARK_STATE(state) \
        !           272:        trans1[state] = trans1[state] + MARKER_DIFFERENCE;
        !           273: 
        !           274: #define CHECK_ACCEPT(state) \
        !           275:        { \
        !           276:        nfaccnum = accptnum[state]; \
        !           277:        if ( nfaccnum != NIL ) \
        !           278:            accset[++nacc] = nfaccnum; \
        !           279:        }
        !           280: 
        !           281: #define DO_REALLOCATION \
        !           282:        { \
        !           283:        current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
        !           284:        ++num_reallocs; \
        !           285:        t = reallocate_integer_array( t, current_max_dfa_size ); \
        !           286:        stk = reallocate_integer_array( stk, current_max_dfa_size ); \
        !           287:        } \
        !           288: 
        !           289: #define PUT_ON_STACK(state) \
        !           290:        { \
        !           291:        if ( ++stkend >= current_max_dfa_size ) \
        !           292:            DO_REALLOCATION \
        !           293:        stk[stkend] = state; \
        !           294:        MARK_STATE(state) \
        !           295:        }
        !           296: 
        !           297: #define ADD_STATE(state) \
        !           298:        { \
        !           299:        if ( ++numstates >= current_max_dfa_size ) \
        !           300:            DO_REALLOCATION \
        !           301:        t[numstates] = state; \
        !           302:        hashval = hashval + state; \
        !           303:        }
        !           304: 
        !           305: #define STACK_STATE(state) \
        !           306:        { \
        !           307:        PUT_ON_STACK(state) \
        !           308:        CHECK_ACCEPT(state) \
        !           309:        if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
        !           310:            ADD_STATE(state) \
        !           311:        }
        !           312: 
        !           313:     if ( ! did_stk_init )
        !           314:        {
        !           315:        stk = allocate_integer_array( current_max_dfa_size );
        !           316:        did_stk_init = true;
        !           317:        }
        !           318: 
        !           319:     nacc = stkend = hashval = 0;
        !           320: 
        !           321:     for ( nstate = 1; nstate <= numstates; ++nstate )
        !           322:        {
        !           323:        ns = t[nstate];
        !           324: 
        !           325:        /* the state could be marked if we've already pushed it onto
        !           326:         * the stack
        !           327:         */
        !           328:        if ( ! IS_MARKED(ns) )
        !           329:            PUT_ON_STACK(ns)
        !           330: 
        !           331:        CHECK_ACCEPT(ns)
        !           332:        hashval = hashval + ns;
        !           333:        }
        !           334: 
        !           335:     for ( stkpos = 1; stkpos <= stkend; ++stkpos )
        !           336:        {
        !           337:        ns = stk[stkpos];
        !           338:        transsym = transchar[ns];
        !           339: 
        !           340:        if ( transsym == SYM_EPSILON )
        !           341:            {
        !           342:            tsp = trans1[ns] + MARKER_DIFFERENCE;
        !           343: 
        !           344:            if ( tsp != NO_TRANSITION )
        !           345:                {
        !           346:                if ( ! IS_MARKED(tsp) )
        !           347:                    STACK_STATE(tsp)
        !           348: 
        !           349:                tsp = trans2[ns];
        !           350: 
        !           351:                if ( tsp != NO_TRANSITION )
        !           352:                    if ( ! IS_MARKED(tsp) )
        !           353:                        STACK_STATE(tsp)
        !           354:                }
        !           355:            }
        !           356:        }
        !           357: 
        !           358:     /* clear out "visit" markers */
        !           359: 
        !           360:     for ( stkpos = 1; stkpos <= stkend; ++stkpos )
        !           361:        {
        !           362:        if ( IS_MARKED(stk[stkpos]) )
        !           363:            {
        !           364:            UNMARK_STATE(stk[stkpos])
        !           365:            }
        !           366:        else
        !           367:            flexfatal( "consistency check failed in epsclosure()" );
        !           368:        }
        !           369: 
        !           370:     *ns_addr = numstates;
        !           371:     *hv_addr = hashval;
        !           372:     *nacc_addr = nacc;
        !           373: 
        !           374:     return ( t );
        !           375:     }
        !           376: 
        !           377: 
        !           378: /* increase_max_dfas - increase the maximum number of DFAs */
        !           379: 
        !           380: void increase_max_dfas()
        !           381: 
        !           382:     {
        !           383:     current_max_dfas += MAX_DFAS_INCREMENT;
        !           384: 
        !           385:     ++num_reallocs;
        !           386: 
        !           387:     base = reallocate_integer_array( base, current_max_dfas );
        !           388:     def = reallocate_integer_array( def, current_max_dfas );
        !           389:     dfasiz = reallocate_integer_array( dfasiz, current_max_dfas );
        !           390:     accsiz = reallocate_integer_array( accsiz, current_max_dfas );
        !           391:     dhash = reallocate_integer_array( dhash, current_max_dfas );
        !           392:     dss = reallocate_int_ptr_array( dss, current_max_dfas );
        !           393:     dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas );
        !           394: 
        !           395:     if ( nultrans )
        !           396:        nultrans = reallocate_integer_array( nultrans, current_max_dfas );
        !           397:     }
        !           398: 
        !           399: 
        !           400: /* ntod - convert an ndfa to a dfa
        !           401:  *
        !           402:  * synopsis
        !           403:  *    ntod();
        !           404:  *
        !           405:  *  creates the dfa corresponding to the ndfa we've constructed.  the
        !           406:  *  dfa starts out in state #1.
        !           407:  */
        !           408: 
        !           409: void ntod()
        !           410: 
        !           411:     {
        !           412:     int *accset, ds, nacc, newds;
        !           413:     int sym, hashval, numstates, dsize;
        !           414:     int num_full_table_rows;   /* used only for -f */
        !           415:     int *nset, *dset;
        !           416:     int targptr, totaltrans, i, comstate, comfreq, targ;
        !           417:     int *epsclosure(), snstods(), symlist[CSIZE + 1];
        !           418:     int num_start_states;
        !           419:     int todo_head, todo_next;
        !           420: 
        !           421:     /* note that the following are indexed by *equivalence classes*
        !           422:      * and not by characters.  Since equivalence classes are indexed
        !           423:      * beginning with 1, even if the scanner accepts NUL's, this
        !           424:      * means that (since every character is potentially in its own
        !           425:      * equivalence class) these arrays must have room for indices
        !           426:      * from 1 to CSIZE, so their size must be CSIZE + 1.
        !           427:      */
        !           428:     int duplist[CSIZE + 1], state[CSIZE + 1];
        !           429:     int targfreq[CSIZE + 1], targstate[CSIZE + 1];
        !           430: 
        !           431:     /* this is so find_table_space(...) will know where to start looking in
        !           432:      * chk/nxt for unused records for space to put in the state
        !           433:      */
        !           434:     if ( fullspd )
        !           435:        firstfree = 0;
        !           436: 
        !           437:     accset = allocate_integer_array( num_rules + 1 );
        !           438:     nset = allocate_integer_array( current_max_dfa_size );
        !           439: 
        !           440:     /* the "todo" queue is represented by the head, which is the DFA
        !           441:      * state currently being processed, and the "next", which is the
        !           442:      * next DFA state number available (not in use).  We depend on the
        !           443:      * fact that snstods() returns DFA's \in increasing order/, and thus
        !           444:      * need only know the bounds of the dfas to be processed.
        !           445:      */
        !           446:     todo_head = todo_next = 0;
        !           447: 
        !           448:     for ( i = 0; i <= csize; ++i )
        !           449:        {
        !           450:        duplist[i] = NIL;
        !           451:        symlist[i] = false;
        !           452:        }
        !           453: 
        !           454:     for ( i = 0; i <= num_rules; ++i )
        !           455:        accset[i] = NIL;
        !           456: 
        !           457:     if ( trace )
        !           458:        {
        !           459:        dumpnfa( scset[1] );
        !           460:        fputs( "\n\nDFA Dump:\n\n", stderr );
        !           461:        }
        !           462: 
        !           463:     inittbl();
        !           464: 
        !           465:     /* check to see whether we should build a separate table for transitions
        !           466:      * on NUL characters.  We don't do this for full-speed (-F) scanners,
        !           467:      * since for them we don't have a simple state number lying around with
        !           468:      * which to index the table.  We also don't bother doing it for scanners
        !           469:      * unless (1) NUL is in its own equivalence class (indicated by a
        !           470:      * positive value of ecgroup[NUL]), (2) NUL's equilvalence class is
        !           471:      * the last equivalence class, and (3) the number of equivalence classes
        !           472:      * is the same as the number of characters.  This latter case comes about
        !           473:      * when useecs is false or when its true but every character still
        !           474:      * manages to land in its own class (unlikely, but it's cheap to check
        !           475:      * for).  If all these things are true then the character code needed
        !           476:      * to represent NUL's equivalence class for indexing the tables is
        !           477:      * going to take one more bit than the number of characters, and therefore
        !           478:      * we won't be assured of being able to fit it into a YY_CHAR variable.
        !           479:      * This rules out storing the transitions in a compressed table, since
        !           480:      * the code for interpreting them uses a YY_CHAR variable (perhaps it
        !           481:      * should just use an integer, though; this is worth pondering ... ###).
        !           482:      *
        !           483:      * Finally, for full tables, we want the number of entries in the
        !           484:      * table to be a power of two so the array references go fast (it
        !           485:      * will just take a shift to compute the major index).  If encoding
        !           486:      * NUL's transitions in the table will spoil this, we give it its
        !           487:      * own table (note that this will be the case if we're not using
        !           488:      * equivalence classes).
        !           489:      */
        !           490: 
        !           491:     /* note that the test for ecgroup[0] == numecs below accomplishes
        !           492:      * both (1) and (2) above
        !           493:      */
        !           494:     if ( ! fullspd && ecgroup[0] == numecs )
        !           495:        { /* NUL is alone in its equivalence class, which is the last one */
        !           496:        int use_NUL_table = (numecs == csize);
        !           497: 
        !           498:        if ( fulltbl && ! use_NUL_table )
        !           499:            { /* we still may want to use the table if numecs is a power of 2 */
        !           500:            int power_of_two;
        !           501: 
        !           502:            for ( power_of_two = 1; power_of_two <= csize; power_of_two *= 2 )
        !           503:                if ( numecs == power_of_two )
        !           504:                    {
        !           505:                    use_NUL_table = true;
        !           506:                    break;
        !           507:                    }
        !           508:            }
        !           509: 
        !           510:        if ( use_NUL_table )
        !           511:            nultrans = allocate_integer_array( current_max_dfas );
        !           512:            /* from now on, nultrans != nil indicates that we're
        !           513:             * saving null transitions for later, separate encoding
        !           514:             */
        !           515:        }
        !           516: 
        !           517: 
        !           518:     if ( fullspd )
        !           519:        {
        !           520:        for ( i = 0; i <= numecs; ++i )
        !           521:            state[i] = 0;
        !           522:        place_state( state, 0, 0 );
        !           523:        }
        !           524: 
        !           525:     else if ( fulltbl )
        !           526:        {
        !           527:        if ( nultrans )
        !           528:            /* we won't be including NUL's transitions in the table,
        !           529:             * so build it for entries from 0 .. numecs - 1
        !           530:             */
        !           531:            num_full_table_rows = numecs;
        !           532: 
        !           533:        else
        !           534:            /* take into account the fact that we'll be including
        !           535:             * the NUL entries in the transition table.  Build it
        !           536:             * from 0 .. numecs.
        !           537:             */
        !           538:            num_full_table_rows = numecs + 1;
        !           539: 
        !           540:        /* declare it "short" because it's a real long-shot that that
        !           541:         * won't be large enough.
        !           542:         */
        !           543:        printf( "static short int yy_nxt[][%d] =\n    {\n",
        !           544:                /* '}' so vi doesn't get too confused */
        !           545:                num_full_table_rows );
        !           546: 
        !           547:        /* generate 0 entries for state #0 */
        !           548:        for ( i = 0; i < num_full_table_rows; ++i )
        !           549:            mk2data( 0 );
        !           550: 
        !           551:        /* force ',' and dataflush() next call to mk2data */
        !           552:        datapos = NUMDATAITEMS;
        !           553: 
        !           554:        /* force extra blank line next dataflush() */
        !           555:        dataline = NUMDATALINES;
        !           556:        }
        !           557: 
        !           558:     /* create the first states */
        !           559: 
        !           560:     num_start_states = lastsc * 2;
        !           561: 
        !           562:     for ( i = 1; i <= num_start_states; ++i )
        !           563:        {
        !           564:        numstates = 1;
        !           565: 
        !           566:        /* for each start condition, make one state for the case when
        !           567:         * we're at the beginning of the line (the '%' operator) and
        !           568:         * one for the case when we're not
        !           569:         */
        !           570:        if ( i % 2 == 1 )
        !           571:            nset[numstates] = scset[(i / 2) + 1];
        !           572:        else
        !           573:            nset[numstates] = mkbranch( scbol[i / 2], scset[i / 2] );
        !           574: 
        !           575:        nset = epsclosure( nset, &numstates, accset, &nacc, &hashval );
        !           576: 
        !           577:        if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) )
        !           578:            {
        !           579:            numas += nacc;
        !           580:            totnst += numstates;
        !           581:            ++todo_next;
        !           582: 
        !           583:            if ( variable_trailing_context_rules && nacc > 0 )
        !           584:                check_trailing_context( nset, numstates, accset, nacc );
        !           585:            }
        !           586:        }
        !           587: 
        !           588:     if ( ! fullspd )
        !           589:        {
        !           590:        if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) )
        !           591:            flexfatal( "could not create unique end-of-buffer state" );
        !           592: 
        !           593:        ++numas;
        !           594:        ++num_start_states;
        !           595:        ++todo_next;
        !           596:        }
        !           597: 
        !           598:     while ( todo_head < todo_next )
        !           599:        {
        !           600:        targptr = 0;
        !           601:        totaltrans = 0;
        !           602: 
        !           603:        for ( i = 1; i <= numecs; ++i )
        !           604:            state[i] = 0;
        !           605: 
        !           606:        ds = ++todo_head;
        !           607: 
        !           608:        dset = dss[ds];
        !           609:        dsize = dfasiz[ds];
        !           610: 
        !           611:        if ( trace )
        !           612:            fprintf( stderr, "state # %d:\n", ds );
        !           613: 
        !           614:        sympartition( dset, dsize, symlist, duplist );
        !           615: 
        !           616:        for ( sym = 1; sym <= numecs; ++sym )
        !           617:            {
        !           618:            if ( symlist[sym] )
        !           619:                {
        !           620:                symlist[sym] = 0;
        !           621: 
        !           622:                if ( duplist[sym] == NIL )
        !           623:                    { /* symbol has unique out-transitions */
        !           624:                    numstates = symfollowset( dset, dsize, sym, nset );
        !           625:                    nset = epsclosure( nset, &numstates, accset,
        !           626:                                       &nacc, &hashval );
        !           627: 
        !           628:                    if ( snstods( nset, numstates, accset,
        !           629:                                  nacc, hashval, &newds ) )
        !           630:                        {
        !           631:                        totnst = totnst + numstates;
        !           632:                        ++todo_next;
        !           633:                        numas += nacc;
        !           634: 
        !           635:                        if ( variable_trailing_context_rules && nacc > 0 )
        !           636:                            check_trailing_context( nset, numstates,
        !           637:                                accset, nacc );
        !           638:                        }
        !           639: 
        !           640:                    state[sym] = newds;
        !           641: 
        !           642:                    if ( trace )
        !           643:                        fprintf( stderr, "\t%d\t%d\n", sym, newds );
        !           644: 
        !           645:                    targfreq[++targptr] = 1;
        !           646:                    targstate[targptr] = newds;
        !           647:                    ++numuniq;
        !           648:                    }
        !           649: 
        !           650:                else
        !           651:                    {
        !           652:                    /* sym's equivalence class has the same transitions
        !           653:                     * as duplist(sym)'s equivalence class
        !           654:                     */
        !           655:                    targ = state[duplist[sym]];
        !           656:                    state[sym] = targ;
        !           657: 
        !           658:                    if ( trace )
        !           659:                        fprintf( stderr, "\t%d\t%d\n", sym, targ );
        !           660: 
        !           661:                    /* update frequency count for destination state */
        !           662: 
        !           663:                    i = 0;
        !           664:                    while ( targstate[++i] != targ )
        !           665:                        ;
        !           666: 
        !           667:                    ++targfreq[i];
        !           668:                    ++numdup;
        !           669:                    }
        !           670: 
        !           671:                ++totaltrans;
        !           672:                duplist[sym] = NIL;
        !           673:                }
        !           674:            }
        !           675: 
        !           676:        numsnpairs = numsnpairs + totaltrans;
        !           677: 
        !           678:        if ( caseins && ! useecs )
        !           679:            {
        !           680:            register int j;
        !           681: 
        !           682:            for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j )
        !           683:                state[i] = state[j];
        !           684:            }
        !           685: 
        !           686:        if ( ds > num_start_states )
        !           687:            check_for_backtracking( ds, state );
        !           688: 
        !           689:        if ( nultrans )
        !           690:            {
        !           691:            nultrans[ds] = state[NUL_ec];
        !           692:            state[NUL_ec] = 0;  /* remove transition */
        !           693:            }
        !           694: 
        !           695:        if ( fulltbl )
        !           696:            {
        !           697:            /* supply array's 0-element */
        !           698:            if ( ds == end_of_buffer_state )
        !           699:                mk2data( -end_of_buffer_state );
        !           700:            else
        !           701:                mk2data( end_of_buffer_state );
        !           702: 
        !           703:            for ( i = 1; i < num_full_table_rows; ++i )
        !           704:                /* jams are marked by negative of state number */
        !           705:                mk2data( state[i] ? state[i] : -ds );
        !           706: 
        !           707:            /* force ',' and dataflush() next call to mk2data */
        !           708:            datapos = NUMDATAITEMS;
        !           709: 
        !           710:            /* force extra blank line next dataflush() */
        !           711:            dataline = NUMDATALINES;
        !           712:            }
        !           713: 
        !           714:         else if ( fullspd )
        !           715:            place_state( state, ds, totaltrans );
        !           716: 
        !           717:        else if ( ds == end_of_buffer_state )
        !           718:            /* special case this state to make sure it does what it's
        !           719:             * supposed to, i.e., jam on end-of-buffer
        !           720:             */
        !           721:            stack1( ds, 0, 0, JAMSTATE );
        !           722: 
        !           723:        else /* normal, compressed state */
        !           724:            {
        !           725:            /* determine which destination state is the most common, and
        !           726:             * how many transitions to it there are
        !           727:             */
        !           728: 
        !           729:            comfreq = 0;
        !           730:            comstate = 0;
        !           731: 
        !           732:            for ( i = 1; i <= targptr; ++i )
        !           733:                if ( targfreq[i] > comfreq )
        !           734:                    {
        !           735:                    comfreq = targfreq[i];
        !           736:                    comstate = targstate[i];
        !           737:                    }
        !           738: 
        !           739:            bldtbl( state, ds, totaltrans, comstate, comfreq );
        !           740:            }
        !           741:        }
        !           742: 
        !           743:     if ( fulltbl )
        !           744:        dataend();
        !           745: 
        !           746:     else if ( ! fullspd )
        !           747:        {
        !           748:        cmptmps();  /* create compressed template entries */
        !           749: 
        !           750:        /* create tables for all the states with only one out-transition */
        !           751:        while ( onesp > 0 )
        !           752:            {
        !           753:            mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp],
        !           754:                    onedef[onesp] );
        !           755:            --onesp;
        !           756:            }
        !           757: 
        !           758:        mkdeftbl();
        !           759:        }
        !           760:     }
        !           761: 
        !           762: 
        !           763: /* snstods - converts a set of ndfa states into a dfa state
        !           764:  *
        !           765:  * synopsis
        !           766:  *    int sns[numstates], numstates, newds, accset[num_rules + 1], nacc, hashval;
        !           767:  *    int snstods();
        !           768:  *    is_new_state = snstods( sns, numstates, accset, nacc, hashval, &newds );
        !           769:  *
        !           770:  * on return, the dfa state number is in newds.
        !           771:  */
        !           772: 
        !           773: int snstods( sns, numstates, accset, nacc, hashval, newds_addr )
        !           774: int sns[], numstates, accset[], nacc, hashval, *newds_addr;
        !           775: 
        !           776:     {
        !           777:     int didsort = 0;
        !           778:     register int i, j;
        !           779:     int newds, *oldsns;
        !           780: 
        !           781:     for ( i = 1; i <= lastdfa; ++i )
        !           782:        if ( hashval == dhash[i] )
        !           783:            {
        !           784:            if ( numstates == dfasiz[i] )
        !           785:                {
        !           786:                oldsns = dss[i];
        !           787: 
        !           788:                if ( ! didsort )
        !           789:                    {
        !           790:                    /* we sort the states in sns so we can compare it to
        !           791:                     * oldsns quickly.  we use bubble because there probably
        !           792:                     * aren't very many states
        !           793:                     */
        !           794:                    bubble( sns, numstates );
        !           795:                    didsort = 1;
        !           796:                    }
        !           797: 
        !           798:                for ( j = 1; j <= numstates; ++j )
        !           799:                    if ( sns[j] != oldsns[j] )
        !           800:                        break;
        !           801: 
        !           802:                if ( j > numstates )
        !           803:                    {
        !           804:                    ++dfaeql;
        !           805:                    *newds_addr = i;
        !           806:                    return ( 0 );
        !           807:                    }
        !           808: 
        !           809:                ++hshcol;
        !           810:                }
        !           811: 
        !           812:            else
        !           813:                ++hshsave;
        !           814:            }
        !           815: 
        !           816:     /* make a new dfa */
        !           817: 
        !           818:     if ( ++lastdfa >= current_max_dfas )
        !           819:        increase_max_dfas();
        !           820: 
        !           821:     newds = lastdfa;
        !           822: 
        !           823:     dss[newds] = (int *) malloc( (unsigned) ((numstates + 1) * sizeof( int )) );
        !           824: 
        !           825:     if ( ! dss[newds] )
        !           826:        flexfatal( "dynamic memory failure in snstods()" );
        !           827: 
        !           828:     /* if we haven't already sorted the states in sns, we do so now, so that
        !           829:      * future comparisons with it can be made quickly
        !           830:      */
        !           831: 
        !           832:     if ( ! didsort )
        !           833:        bubble( sns, numstates );
        !           834: 
        !           835:     for ( i = 1; i <= numstates; ++i )
        !           836:        dss[newds][i] = sns[i];
        !           837: 
        !           838:     dfasiz[newds] = numstates;
        !           839:     dhash[newds] = hashval;
        !           840: 
        !           841:     if ( nacc == 0 )
        !           842:        {
        !           843:        if ( reject )
        !           844:            dfaacc[newds].dfaacc_set = (int *) 0;
        !           845:        else
        !           846:            dfaacc[newds].dfaacc_state = 0;
        !           847: 
        !           848:        accsiz[newds] = 0;
        !           849:        }
        !           850: 
        !           851:     else if ( reject )
        !           852:        {
        !           853:        /* we sort the accepting set in increasing order so the disambiguating
        !           854:         * rule that the first rule listed is considered match in the event of
        !           855:         * ties will work.  We use a bubble sort since the list is probably
        !           856:         * quite small.
        !           857:         */
        !           858: 
        !           859:        bubble( accset, nacc );
        !           860: 
        !           861:        dfaacc[newds].dfaacc_set =
        !           862:            (int *) malloc( (unsigned) ((nacc + 1) * sizeof( int )) );
        !           863: 
        !           864:        if ( ! dfaacc[newds].dfaacc_set )
        !           865:            flexfatal( "dynamic memory failure in snstods()" );
        !           866: 
        !           867:        /* save the accepting set for later */
        !           868:        for ( i = 1; i <= nacc; ++i )
        !           869:            dfaacc[newds].dfaacc_set[i] = accset[i];
        !           870: 
        !           871:        accsiz[newds] = nacc;
        !           872:        }
        !           873: 
        !           874:     else
        !           875:        { /* find lowest numbered rule so the disambiguating rule will work */
        !           876:        j = num_rules + 1;
        !           877: 
        !           878:        for ( i = 1; i <= nacc; ++i )
        !           879:            if ( accset[i] < j )
        !           880:                j = accset[i];
        !           881: 
        !           882:        dfaacc[newds].dfaacc_state = j;
        !           883:        }
        !           884: 
        !           885:     *newds_addr = newds;
        !           886: 
        !           887:     return ( 1 );
        !           888:     }
        !           889: 
        !           890: 
        !           891: /* symfollowset - follow the symbol transitions one step
        !           892:  *
        !           893:  * synopsis
        !           894:  *    int ds[current_max_dfa_size], dsize, transsym;
        !           895:  *    int nset[current_max_dfa_size], numstates;
        !           896:  *    numstates = symfollowset( ds, dsize, transsym, nset );
        !           897:  */
        !           898: 
        !           899: int symfollowset( ds, dsize, transsym, nset )
        !           900: int ds[], dsize, transsym, nset[];
        !           901: 
        !           902:     {
        !           903:     int ns, tsp, sym, i, j, lenccl, ch, numstates;
        !           904:     int ccllist;
        !           905: 
        !           906:     numstates = 0;
        !           907: 
        !           908:     for ( i = 1; i <= dsize; ++i )
        !           909:        { /* for each nfa state ns in the state set of ds */
        !           910:        ns = ds[i];
        !           911:        sym = transchar[ns];
        !           912:        tsp = trans1[ns];
        !           913: 
        !           914:        if ( sym < 0 )
        !           915:            { /* it's a character class */
        !           916:            sym = -sym;
        !           917:            ccllist = cclmap[sym];
        !           918:            lenccl = ccllen[sym];
        !           919: 
        !           920:            if ( cclng[sym] )
        !           921:                {
        !           922:                for ( j = 0; j < lenccl; ++j )
        !           923:                    { /* loop through negated character class */
        !           924:                    ch = ccltbl[ccllist + j];
        !           925: 
        !           926:                    if ( ch == 0 )
        !           927:                        ch = NUL_ec;
        !           928: 
        !           929:                    if ( ch > transsym )
        !           930:                        break;  /* transsym isn't in negated ccl */
        !           931: 
        !           932:                    else if ( ch == transsym )
        !           933:                        /* next 2 */ goto bottom;
        !           934:                    }
        !           935: 
        !           936:                /* didn't find transsym in ccl */
        !           937:                nset[++numstates] = tsp;
        !           938:                }
        !           939: 
        !           940:            else
        !           941:                for ( j = 0; j < lenccl; ++j )
        !           942:                    {
        !           943:                    ch = ccltbl[ccllist + j];
        !           944: 
        !           945:                    if ( ch == 0 )
        !           946:                        ch = NUL_ec;
        !           947: 
        !           948:                    if ( ch > transsym )
        !           949:                        break;
        !           950: 
        !           951:                    else if ( ch == transsym )
        !           952:                        {
        !           953:                        nset[++numstates] = tsp;
        !           954:                        break;
        !           955:                        }
        !           956:                    }
        !           957:            }
        !           958: 
        !           959:        else if ( sym >= 'A' && sym <= 'Z' && caseins )
        !           960:            flexfatal( "consistency check failed in symfollowset" );
        !           961: 
        !           962:        else if ( sym == SYM_EPSILON )
        !           963:            { /* do nothing */
        !           964:            }
        !           965: 
        !           966:        else if ( abs( ecgroup[sym] ) == transsym )
        !           967:            nset[++numstates] = tsp;
        !           968: 
        !           969: bottom:
        !           970:        ;
        !           971:        }
        !           972: 
        !           973:     return ( numstates );
        !           974:     }
        !           975: 
        !           976: 
        !           977: /* sympartition - partition characters with same out-transitions
        !           978:  *
        !           979:  * synopsis
        !           980:  *    integer ds[current_max_dfa_size], numstates, duplist[numecs];
        !           981:  *    symlist[numecs];
        !           982:  *    sympartition( ds, numstates, symlist, duplist );
        !           983:  */
        !           984: 
        !           985: void sympartition( ds, numstates, symlist, duplist )
        !           986: int ds[], numstates, duplist[];
        !           987: int symlist[];
        !           988: 
        !           989:     {
        !           990:     int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
        !           991: 
        !           992:     /* partitioning is done by creating equivalence classes for those
        !           993:      * characters which have out-transitions from the given state.  Thus
        !           994:      * we are really creating equivalence classes of equivalence classes.
        !           995:      */
        !           996: 
        !           997:     for ( i = 1; i <= numecs; ++i )
        !           998:        { /* initialize equivalence class list */
        !           999:        duplist[i] = i - 1;
        !          1000:        dupfwd[i] = i + 1;
        !          1001:        }
        !          1002: 
        !          1003:     duplist[1] = NIL;
        !          1004:     dupfwd[numecs] = NIL;
        !          1005: 
        !          1006:     for ( i = 1; i <= numstates; ++i )
        !          1007:        {
        !          1008:        ns = ds[i];
        !          1009:        tch = transchar[ns];
        !          1010: 
        !          1011:        if ( tch != SYM_EPSILON )
        !          1012:            {
        !          1013:            if ( tch < -lastccl || tch > csize )
        !          1014:                {
        !          1015:                if ( tch > csize && tch <= CSIZE )
        !          1016:                    flexerror( "scanner requires -8 flag" );
        !          1017: 
        !          1018:                else
        !          1019:                    flexfatal(
        !          1020:                        "bad transition character detected in sympartition()" );
        !          1021:                }
        !          1022: 
        !          1023:            if ( tch >= 0 )
        !          1024:                { /* character transition */
        !          1025:                /* abs() needed for fake %t ec's */
        !          1026:                int ec = abs( ecgroup[tch] );
        !          1027: 
        !          1028:                mkechar( ec, dupfwd, duplist );
        !          1029:                symlist[ec] = 1;
        !          1030:                }
        !          1031: 
        !          1032:            else
        !          1033:                { /* character class */
        !          1034:                tch = -tch;
        !          1035: 
        !          1036:                lenccl = ccllen[tch];
        !          1037:                cclp = cclmap[tch];
        !          1038:                mkeccl( ccltbl + cclp, lenccl, dupfwd, duplist, numecs,
        !          1039:                        NUL_ec );
        !          1040: 
        !          1041:                if ( cclng[tch] )
        !          1042:                    {
        !          1043:                    j = 0;
        !          1044: 
        !          1045:                    for ( k = 0; k < lenccl; ++k )
        !          1046:                        {
        !          1047:                        ich = ccltbl[cclp + k];
        !          1048: 
        !          1049:                        if ( ich == 0 )
        !          1050:                            ich = NUL_ec;
        !          1051: 
        !          1052:                        for ( ++j; j < ich; ++j )
        !          1053:                            symlist[j] = 1;
        !          1054:                        }
        !          1055: 
        !          1056:                    for ( ++j; j <= numecs; ++j )
        !          1057:                        symlist[j] = 1;
        !          1058:                    }
        !          1059: 
        !          1060:                else
        !          1061:                    for ( k = 0; k < lenccl; ++k )
        !          1062:                        {
        !          1063:                        ich = ccltbl[cclp + k];
        !          1064: 
        !          1065:                        if ( ich == 0 )
        !          1066:                            ich = NUL_ec;
        !          1067: 
        !          1068:                        symlist[ich] = 1;
        !          1069:                        }
        !          1070:                }
        !          1071:            }
        !          1072:        }
        !          1073:     }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.