Annotation of 43BSDReno/pgrm/lex/dfa.c, revision 1.1.1.1

1.1       root        1: /*-
                      2:  * Copyright (c) 1990 The Regents of the University of California.
                      3:  * All rights reserved.
                      4:  *
                      5:  * This code is derived from software contributed to Berkeley by
                      6:  * Vern Paxson.
                      7:  * 
                      8:  * The United States Government has rights in this work pursuant
                      9:  * to contract no. DE-AC03-76SF00098 between the United States
                     10:  * Department of Energy and the University of California.
                     11:  *
                     12:  * Redistribution and use in source and binary forms are permitted provided
                     13:  * that: (1) source distributions retain this entire copyright notice and
                     14:  * comment, and (2) distributions including binaries display the following
                     15:  * acknowledgement:  ``This product includes software developed by the
                     16:  * University of California, Berkeley and its contributors'' in the
                     17:  * documentation or other materials provided with the distribution and in
                     18:  * all advertising materials mentioning features or use of this software.
                     19:  * Neither the name of the University nor the names of its contributors may
                     20:  * be used to endorse or promote products derived from this software without
                     21:  * specific prior written permission.
                     22:  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
                     23:  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
                     24:  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
                     25:  */
                     26: 
                     27: #ifndef lint
                     28: static char sccsid[] = "@(#)dfa.c      5.2 (Berkeley) 6/18/90";
                     29: #endif /* not lint */
                     30: 
                     31: /* dfa - DFA construction routines */
                     32: 
                     33: #include "flexdef.h"
                     34: 
                     35: /* declare functions that have forward references */
                     36: 
                     37: void dump_associated_rules PROTO((FILE*, int));
                     38: void dump_transitions PROTO((FILE*, int[]));
                     39: void sympartition PROTO((int[], int, int[], int[]));
                     40: int symfollowset PROTO((int[], int, int, int[]));
                     41: 
                     42: 
                     43: /* check_for_backtracking - check a DFA state for backtracking
                     44:  *
                     45:  * synopsis
                     46:  *     int ds, state[numecs];
                     47:  *     check_for_backtracking( ds, state );
                     48:  *
                     49:  * ds is the number of the state to check and state[] is its out-transitions,
                     50:  * indexed by equivalence class, and state_rules[] is the set of rules
                     51:  * associated with this state
                     52:  */
                     53: 
                     54: void check_for_backtracking( ds, state )
                     55: int ds;
                     56: int state[];
                     57: 
                     58:     {
                     59:     if ( (reject && ! dfaacc[ds].dfaacc_set) || ! dfaacc[ds].dfaacc_state )
                     60:        { /* state is non-accepting */
                     61:        ++num_backtracking;
                     62: 
                     63:        if ( backtrack_report )
                     64:            {
                     65:            fprintf( backtrack_file, "State #%d is non-accepting -\n", ds );
                     66: 
                     67:            /* identify the state */
                     68:            dump_associated_rules( backtrack_file, ds );
                     69: 
                     70:            /* now identify it further using the out- and jam-transitions */
                     71:            dump_transitions( backtrack_file, state );
                     72: 
                     73:            putc( '\n', backtrack_file );
                     74:            }
                     75:        }
                     76:     }
                     77: 
                     78: 
                     79: /* check_trailing_context - check to see if NFA state set constitutes
                     80:  *                          "dangerous" trailing context
                     81:  *
                     82:  * synopsis
                     83:  *    int nfa_states[num_states+1], num_states;
                     84:  *    int accset[nacc+1], nacc;
                     85:  *    check_trailing_context( nfa_states, num_states, accset, nacc );
                     86:  *
                     87:  * NOTES
                     88:  *    Trailing context is "dangerous" if both the head and the trailing
                     89:  *  part are of variable size \and/ there's a DFA state which contains
                     90:  *  both an accepting state for the head part of the rule and NFA states
                     91:  *  which occur after the beginning of the trailing context.
                     92:  *  When such a rule is matched, it's impossible to tell if having been
                     93:  *  in the DFA state indicates the beginning of the trailing context
                     94:  *  or further-along scanning of the pattern.  In these cases, a warning
                     95:  *  message is issued.
                     96:  *
                     97:  *    nfa_states[1 .. num_states] is the list of NFA states in the DFA.
                     98:  *    accset[1 .. nacc] is the list of accepting numbers for the DFA state.
                     99:  */
                    100: 
                    101: void check_trailing_context( nfa_states, num_states, accset, nacc )
                    102: int *nfa_states, num_states;
                    103: int *accset;
                    104: register int nacc;
                    105: 
                    106:     {
                    107:     register int i, j;
                    108: 
                    109:     for ( i = 1; i <= num_states; ++i )
                    110:        {
                    111:        int ns = nfa_states[i];
                    112:        register int type = state_type[ns];
                    113:        register int ar = assoc_rule[ns];
                    114: 
                    115:        if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE )
                    116:            { /* do nothing */
                    117:            }
                    118: 
                    119:        else if ( type == STATE_TRAILING_CONTEXT )
                    120:            {
                    121:            /* potential trouble.  Scan set of accepting numbers for
                    122:             * the one marking the end of the "head".  We assume that
                    123:             * this looping will be fairly cheap since it's rare that
                    124:             * an accepting number set is large.
                    125:             */
                    126:            for ( j = 1; j <= nacc; ++j )
                    127:                if ( accset[j] & YY_TRAILING_HEAD_MASK )
                    128:                    {
                    129:                    fprintf( stderr,
                    130:                     "%s: Dangerous trailing context in rule at line %d\n",
                    131:                             program_name, rule_linenum[ar] );
                    132:                    return;
                    133:                    }
                    134:            }
                    135:        }
                    136:     }
                    137: 
                    138: 
                    139: /* dump_associated_rules - list the rules associated with a DFA state
                    140:  *
                    141:  * synopisis
                    142:  *     int ds;
                    143:  *     FILE *file;
                    144:  *     dump_associated_rules( file, ds );
                    145:  *
                    146:  * goes through the set of NFA states associated with the DFA and
                    147:  * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
                    148:  * and writes a report to the given file
                    149:  */
                    150: 
                    151: void dump_associated_rules( file, ds )
                    152: FILE *file;
                    153: int ds;
                    154: 
                    155:     {
                    156:     register int i, j;
                    157:     register int num_associated_rules = 0;
                    158:     int rule_set[MAX_ASSOC_RULES + 1];
                    159:     int *dset = dss[ds];
                    160:     int size = dfasiz[ds];
                    161:     
                    162:     for ( i = 1; i <= size; ++i )
                    163:        {
                    164:        register rule_num = rule_linenum[assoc_rule[dset[i]]];
                    165: 
                    166:        for ( j = 1; j <= num_associated_rules; ++j )
                    167:            if ( rule_num == rule_set[j] )
                    168:                break;
                    169: 
                    170:        if ( j > num_associated_rules )
                    171:            { /* new rule */
                    172:            if ( num_associated_rules < MAX_ASSOC_RULES )
                    173:                rule_set[++num_associated_rules] = rule_num;
                    174:            }
                    175:        }
                    176: 
                    177:     bubble( rule_set, num_associated_rules );
                    178: 
                    179:     fprintf( file, " associated rule line numbers:" );
                    180: 
                    181:     for ( i = 1; i <= num_associated_rules; ++i )
                    182:        {
                    183:        if ( i % 8 == 1 )
                    184:            putc( '\n', file );
                    185:        
                    186:        fprintf( file, "\t%d", rule_set[i] );
                    187:        }
                    188:     
                    189:     putc( '\n', file );
                    190:     }
                    191: 
                    192: 
                    193: /* dump_transitions - list the transitions associated with a DFA state
                    194:  *
                    195:  * synopisis
                    196:  *     int state[numecs];
                    197:  *     FILE *file;
                    198:  *     dump_transitions( file, state );
                    199:  *
                    200:  * goes through the set of out-transitions and lists them in human-readable
                    201:  * form (i.e., not as equivalence classes); also lists jam transitions
                    202:  * (i.e., all those which are not out-transitions, plus EOF).  The dump
                    203:  * is done to the given file.
                    204:  */
                    205: 
                    206: void dump_transitions( file, state )
                    207: FILE *file;
                    208: int state[];
                    209: 
                    210:     {
                    211:     register int i, ec;
                    212:     int out_char_set[CSIZE];
                    213: 
                    214:     for ( i = 0; i < csize; ++i )
                    215:        {
                    216:        ec = abs( ecgroup[i] );
                    217:        out_char_set[i] = state[ec];
                    218:        }
                    219:     
                    220:     fprintf( file, " out-transitions: " );
                    221: 
                    222:     list_character_set( file, out_char_set );
                    223: 
                    224:     /* now invert the members of the set to get the jam transitions */
                    225:     for ( i = 0; i < csize; ++i )
                    226:        out_char_set[i] = ! out_char_set[i];
                    227: 
                    228:     fprintf( file, "\n jam-transitions: EOF " );
                    229: 
                    230:     list_character_set( file, out_char_set );
                    231: 
                    232:     putc( '\n', file );
                    233:     }
                    234: 
                    235: 
                    236: /* epsclosure - construct the epsilon closure of a set of ndfa states
                    237:  *
                    238:  * synopsis
                    239:  *    int t[current_max_dfa_size], numstates, accset[num_rules + 1], nacc;
                    240:  *    int hashval;
                    241:  *    int *epsclosure();
                    242:  *    t = epsclosure( t, &numstates, accset, &nacc, &hashval );
                    243:  *
                    244:  * NOTES
                    245:  *    the epsilon closure is the set of all states reachable by an arbitrary
                    246:  *  number of epsilon transitions which themselves do not have epsilon
                    247:  *  transitions going out, unioned with the set of states which have non-null
                    248:  *  accepting numbers.  t is an array of size numstates of nfa state numbers.
                    249:  *  Upon return, t holds the epsilon closure and numstates is updated.  accset
                    250:  *  holds a list of the accepting numbers, and the size of accset is given
                    251:  *  by nacc.  t may be subjected to reallocation if it is not large enough
                    252:  *  to hold the epsilon closure.
                    253:  *
                    254:  *    hashval is the hash value for the dfa corresponding to the state set
                    255:  */
                    256: 
                    257: int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr )
                    258: int *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
                    259: 
                    260:     {
                    261:     register int stkpos, ns, tsp;
                    262:     int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
                    263:     int stkend, nstate;
                    264:     static int did_stk_init = false, *stk; 
                    265: 
                    266: #define MARK_STATE(state) \
                    267:        trans1[state] = trans1[state] - MARKER_DIFFERENCE;
                    268: 
                    269: #define IS_MARKED(state) (trans1[state] < 0)
                    270: 
                    271: #define UNMARK_STATE(state) \
                    272:        trans1[state] = trans1[state] + MARKER_DIFFERENCE;
                    273: 
                    274: #define CHECK_ACCEPT(state) \
                    275:        { \
                    276:        nfaccnum = accptnum[state]; \
                    277:        if ( nfaccnum != NIL ) \
                    278:            accset[++nacc] = nfaccnum; \
                    279:        }
                    280: 
                    281: #define DO_REALLOCATION \
                    282:        { \
                    283:        current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
                    284:        ++num_reallocs; \
                    285:        t = reallocate_integer_array( t, current_max_dfa_size ); \
                    286:        stk = reallocate_integer_array( stk, current_max_dfa_size ); \
                    287:        } \
                    288: 
                    289: #define PUT_ON_STACK(state) \
                    290:        { \
                    291:        if ( ++stkend >= current_max_dfa_size ) \
                    292:            DO_REALLOCATION \
                    293:        stk[stkend] = state; \
                    294:        MARK_STATE(state) \
                    295:        }
                    296: 
                    297: #define ADD_STATE(state) \
                    298:        { \
                    299:        if ( ++numstates >= current_max_dfa_size ) \
                    300:            DO_REALLOCATION \
                    301:        t[numstates] = state; \
                    302:        hashval = hashval + state; \
                    303:        }
                    304: 
                    305: #define STACK_STATE(state) \
                    306:        { \
                    307:        PUT_ON_STACK(state) \
                    308:        CHECK_ACCEPT(state) \
                    309:        if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
                    310:            ADD_STATE(state) \
                    311:        }
                    312: 
                    313:     if ( ! did_stk_init )
                    314:        {
                    315:        stk = allocate_integer_array( current_max_dfa_size );
                    316:        did_stk_init = true;
                    317:        }
                    318: 
                    319:     nacc = stkend = hashval = 0;
                    320: 
                    321:     for ( nstate = 1; nstate <= numstates; ++nstate )
                    322:        {
                    323:        ns = t[nstate];
                    324: 
                    325:        /* the state could be marked if we've already pushed it onto
                    326:         * the stack
                    327:         */
                    328:        if ( ! IS_MARKED(ns) )
                    329:            PUT_ON_STACK(ns)
                    330: 
                    331:        CHECK_ACCEPT(ns)
                    332:        hashval = hashval + ns;
                    333:        }
                    334: 
                    335:     for ( stkpos = 1; stkpos <= stkend; ++stkpos )
                    336:        {
                    337:        ns = stk[stkpos];
                    338:        transsym = transchar[ns];
                    339: 
                    340:        if ( transsym == SYM_EPSILON )
                    341:            {
                    342:            tsp = trans1[ns] + MARKER_DIFFERENCE;
                    343: 
                    344:            if ( tsp != NO_TRANSITION )
                    345:                {
                    346:                if ( ! IS_MARKED(tsp) )
                    347:                    STACK_STATE(tsp)
                    348: 
                    349:                tsp = trans2[ns];
                    350: 
                    351:                if ( tsp != NO_TRANSITION )
                    352:                    if ( ! IS_MARKED(tsp) )
                    353:                        STACK_STATE(tsp)
                    354:                }
                    355:            }
                    356:        }
                    357: 
                    358:     /* clear out "visit" markers */
                    359: 
                    360:     for ( stkpos = 1; stkpos <= stkend; ++stkpos )
                    361:        {
                    362:        if ( IS_MARKED(stk[stkpos]) )
                    363:            {
                    364:            UNMARK_STATE(stk[stkpos])
                    365:            }
                    366:        else
                    367:            flexfatal( "consistency check failed in epsclosure()" );
                    368:        }
                    369: 
                    370:     *ns_addr = numstates;
                    371:     *hv_addr = hashval;
                    372:     *nacc_addr = nacc;
                    373: 
                    374:     return ( t );
                    375:     }
                    376: 
                    377: 
                    378: /* increase_max_dfas - increase the maximum number of DFAs */
                    379: 
                    380: void increase_max_dfas()
                    381: 
                    382:     {
                    383:     current_max_dfas += MAX_DFAS_INCREMENT;
                    384: 
                    385:     ++num_reallocs;
                    386: 
                    387:     base = reallocate_integer_array( base, current_max_dfas );
                    388:     def = reallocate_integer_array( def, current_max_dfas );
                    389:     dfasiz = reallocate_integer_array( dfasiz, current_max_dfas );
                    390:     accsiz = reallocate_integer_array( accsiz, current_max_dfas );
                    391:     dhash = reallocate_integer_array( dhash, current_max_dfas );
                    392:     dss = reallocate_int_ptr_array( dss, current_max_dfas );
                    393:     dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas );
                    394: 
                    395:     if ( nultrans )
                    396:        nultrans = reallocate_integer_array( nultrans, current_max_dfas );
                    397:     }
                    398: 
                    399: 
                    400: /* ntod - convert an ndfa to a dfa
                    401:  *
                    402:  * synopsis
                    403:  *    ntod();
                    404:  *
                    405:  *  creates the dfa corresponding to the ndfa we've constructed.  the
                    406:  *  dfa starts out in state #1.
                    407:  */
                    408: 
                    409: void ntod()
                    410: 
                    411:     {
                    412:     int *accset, ds, nacc, newds;
                    413:     int sym, hashval, numstates, dsize;
                    414:     int num_full_table_rows;   /* used only for -f */
                    415:     int *nset, *dset;
                    416:     int targptr, totaltrans, i, comstate, comfreq, targ;
                    417:     int *epsclosure(), snstods(), symlist[CSIZE + 1];
                    418:     int num_start_states;
                    419:     int todo_head, todo_next;
                    420: 
                    421:     /* note that the following are indexed by *equivalence classes*
                    422:      * and not by characters.  Since equivalence classes are indexed
                    423:      * beginning with 1, even if the scanner accepts NUL's, this
                    424:      * means that (since every character is potentially in its own
                    425:      * equivalence class) these arrays must have room for indices
                    426:      * from 1 to CSIZE, so their size must be CSIZE + 1.
                    427:      */
                    428:     int duplist[CSIZE + 1], state[CSIZE + 1];
                    429:     int targfreq[CSIZE + 1], targstate[CSIZE + 1];
                    430: 
                    431:     /* this is so find_table_space(...) will know where to start looking in
                    432:      * chk/nxt for unused records for space to put in the state
                    433:      */
                    434:     if ( fullspd )
                    435:        firstfree = 0;
                    436: 
                    437:     accset = allocate_integer_array( num_rules + 1 );
                    438:     nset = allocate_integer_array( current_max_dfa_size );
                    439: 
                    440:     /* the "todo" queue is represented by the head, which is the DFA
                    441:      * state currently being processed, and the "next", which is the
                    442:      * next DFA state number available (not in use).  We depend on the
                    443:      * fact that snstods() returns DFA's \in increasing order/, and thus
                    444:      * need only know the bounds of the dfas to be processed.
                    445:      */
                    446:     todo_head = todo_next = 0;
                    447: 
                    448:     for ( i = 0; i <= csize; ++i )
                    449:        {
                    450:        duplist[i] = NIL;
                    451:        symlist[i] = false;
                    452:        }
                    453: 
                    454:     for ( i = 0; i <= num_rules; ++i )
                    455:        accset[i] = NIL;
                    456: 
                    457:     if ( trace )
                    458:        {
                    459:        dumpnfa( scset[1] );
                    460:        fputs( "\n\nDFA Dump:\n\n", stderr );
                    461:        }
                    462: 
                    463:     inittbl();
                    464: 
                    465:     /* check to see whether we should build a separate table for transitions
                    466:      * on NUL characters.  We don't do this for full-speed (-F) scanners,
                    467:      * since for them we don't have a simple state number lying around with
                    468:      * which to index the table.  We also don't bother doing it for scanners
                    469:      * unless (1) NUL is in its own equivalence class (indicated by a
                    470:      * positive value of ecgroup[NUL]), (2) NUL's equilvalence class is
                    471:      * the last equivalence class, and (3) the number of equivalence classes
                    472:      * is the same as the number of characters.  This latter case comes about
                    473:      * when useecs is false or when its true but every character still
                    474:      * manages to land in its own class (unlikely, but it's cheap to check
                    475:      * for).  If all these things are true then the character code needed
                    476:      * to represent NUL's equivalence class for indexing the tables is
                    477:      * going to take one more bit than the number of characters, and therefore
                    478:      * we won't be assured of being able to fit it into a YY_CHAR variable.
                    479:      * This rules out storing the transitions in a compressed table, since
                    480:      * the code for interpreting them uses a YY_CHAR variable (perhaps it
                    481:      * should just use an integer, though; this is worth pondering ... ###).
                    482:      *
                    483:      * Finally, for full tables, we want the number of entries in the
                    484:      * table to be a power of two so the array references go fast (it
                    485:      * will just take a shift to compute the major index).  If encoding
                    486:      * NUL's transitions in the table will spoil this, we give it its
                    487:      * own table (note that this will be the case if we're not using
                    488:      * equivalence classes).
                    489:      */
                    490: 
                    491:     /* note that the test for ecgroup[0] == numecs below accomplishes
                    492:      * both (1) and (2) above
                    493:      */
                    494:     if ( ! fullspd && ecgroup[0] == numecs )
                    495:        { /* NUL is alone in its equivalence class, which is the last one */
                    496:        int use_NUL_table = (numecs == csize);
                    497: 
                    498:        if ( fulltbl && ! use_NUL_table )
                    499:            { /* we still may want to use the table if numecs is a power of 2 */
                    500:            int power_of_two;
                    501: 
                    502:            for ( power_of_two = 1; power_of_two <= csize; power_of_two *= 2 )
                    503:                if ( numecs == power_of_two )
                    504:                    {
                    505:                    use_NUL_table = true;
                    506:                    break;
                    507:                    }
                    508:            }
                    509: 
                    510:        if ( use_NUL_table )
                    511:            nultrans = allocate_integer_array( current_max_dfas );
                    512:            /* from now on, nultrans != nil indicates that we're
                    513:             * saving null transitions for later, separate encoding
                    514:             */
                    515:        }
                    516: 
                    517: 
                    518:     if ( fullspd )
                    519:        {
                    520:        for ( i = 0; i <= numecs; ++i )
                    521:            state[i] = 0;
                    522:        place_state( state, 0, 0 );
                    523:        }
                    524: 
                    525:     else if ( fulltbl )
                    526:        {
                    527:        if ( nultrans )
                    528:            /* we won't be including NUL's transitions in the table,
                    529:             * so build it for entries from 0 .. numecs - 1
                    530:             */
                    531:            num_full_table_rows = numecs;
                    532: 
                    533:        else
                    534:            /* take into account the fact that we'll be including
                    535:             * the NUL entries in the transition table.  Build it
                    536:             * from 0 .. numecs.
                    537:             */
                    538:            num_full_table_rows = numecs + 1;
                    539: 
                    540:        /* declare it "short" because it's a real long-shot that that
                    541:         * won't be large enough.
                    542:         */
                    543:        printf( "static short int yy_nxt[][%d] =\n    {\n",
                    544:                /* '}' so vi doesn't get too confused */
                    545:                num_full_table_rows );
                    546: 
                    547:        /* generate 0 entries for state #0 */
                    548:        for ( i = 0; i < num_full_table_rows; ++i )
                    549:            mk2data( 0 );
                    550: 
                    551:        /* force ',' and dataflush() next call to mk2data */
                    552:        datapos = NUMDATAITEMS;
                    553: 
                    554:        /* force extra blank line next dataflush() */
                    555:        dataline = NUMDATALINES;
                    556:        }
                    557: 
                    558:     /* create the first states */
                    559: 
                    560:     num_start_states = lastsc * 2;
                    561: 
                    562:     for ( i = 1; i <= num_start_states; ++i )
                    563:        {
                    564:        numstates = 1;
                    565: 
                    566:        /* for each start condition, make one state for the case when
                    567:         * we're at the beginning of the line (the '%' operator) and
                    568:         * one for the case when we're not
                    569:         */
                    570:        if ( i % 2 == 1 )
                    571:            nset[numstates] = scset[(i / 2) + 1];
                    572:        else
                    573:            nset[numstates] = mkbranch( scbol[i / 2], scset[i / 2] );
                    574: 
                    575:        nset = epsclosure( nset, &numstates, accset, &nacc, &hashval );
                    576: 
                    577:        if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) )
                    578:            {
                    579:            numas += nacc;
                    580:            totnst += numstates;
                    581:            ++todo_next;
                    582: 
                    583:            if ( variable_trailing_context_rules && nacc > 0 )
                    584:                check_trailing_context( nset, numstates, accset, nacc );
                    585:            }
                    586:        }
                    587: 
                    588:     if ( ! fullspd )
                    589:        {
                    590:        if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) )
                    591:            flexfatal( "could not create unique end-of-buffer state" );
                    592: 
                    593:        ++numas;
                    594:        ++num_start_states;
                    595:        ++todo_next;
                    596:        }
                    597: 
                    598:     while ( todo_head < todo_next )
                    599:        {
                    600:        targptr = 0;
                    601:        totaltrans = 0;
                    602: 
                    603:        for ( i = 1; i <= numecs; ++i )
                    604:            state[i] = 0;
                    605: 
                    606:        ds = ++todo_head;
                    607: 
                    608:        dset = dss[ds];
                    609:        dsize = dfasiz[ds];
                    610: 
                    611:        if ( trace )
                    612:            fprintf( stderr, "state # %d:\n", ds );
                    613: 
                    614:        sympartition( dset, dsize, symlist, duplist );
                    615: 
                    616:        for ( sym = 1; sym <= numecs; ++sym )
                    617:            {
                    618:            if ( symlist[sym] )
                    619:                {
                    620:                symlist[sym] = 0;
                    621: 
                    622:                if ( duplist[sym] == NIL )
                    623:                    { /* symbol has unique out-transitions */
                    624:                    numstates = symfollowset( dset, dsize, sym, nset );
                    625:                    nset = epsclosure( nset, &numstates, accset,
                    626:                                       &nacc, &hashval );
                    627: 
                    628:                    if ( snstods( nset, numstates, accset,
                    629:                                  nacc, hashval, &newds ) )
                    630:                        {
                    631:                        totnst = totnst + numstates;
                    632:                        ++todo_next;
                    633:                        numas += nacc;
                    634: 
                    635:                        if ( variable_trailing_context_rules && nacc > 0 )
                    636:                            check_trailing_context( nset, numstates,
                    637:                                accset, nacc );
                    638:                        }
                    639: 
                    640:                    state[sym] = newds;
                    641: 
                    642:                    if ( trace )
                    643:                        fprintf( stderr, "\t%d\t%d\n", sym, newds );
                    644: 
                    645:                    targfreq[++targptr] = 1;
                    646:                    targstate[targptr] = newds;
                    647:                    ++numuniq;
                    648:                    }
                    649: 
                    650:                else
                    651:                    {
                    652:                    /* sym's equivalence class has the same transitions
                    653:                     * as duplist(sym)'s equivalence class
                    654:                     */
                    655:                    targ = state[duplist[sym]];
                    656:                    state[sym] = targ;
                    657: 
                    658:                    if ( trace )
                    659:                        fprintf( stderr, "\t%d\t%d\n", sym, targ );
                    660: 
                    661:                    /* update frequency count for destination state */
                    662: 
                    663:                    i = 0;
                    664:                    while ( targstate[++i] != targ )
                    665:                        ;
                    666: 
                    667:                    ++targfreq[i];
                    668:                    ++numdup;
                    669:                    }
                    670: 
                    671:                ++totaltrans;
                    672:                duplist[sym] = NIL;
                    673:                }
                    674:            }
                    675: 
                    676:        numsnpairs = numsnpairs + totaltrans;
                    677: 
                    678:        if ( caseins && ! useecs )
                    679:            {
                    680:            register int j;
                    681: 
                    682:            for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j )
                    683:                state[i] = state[j];
                    684:            }
                    685: 
                    686:        if ( ds > num_start_states )
                    687:            check_for_backtracking( ds, state );
                    688: 
                    689:        if ( nultrans )
                    690:            {
                    691:            nultrans[ds] = state[NUL_ec];
                    692:            state[NUL_ec] = 0;  /* remove transition */
                    693:            }
                    694: 
                    695:        if ( fulltbl )
                    696:            {
                    697:            /* supply array's 0-element */
                    698:            if ( ds == end_of_buffer_state )
                    699:                mk2data( -end_of_buffer_state );
                    700:            else
                    701:                mk2data( end_of_buffer_state );
                    702: 
                    703:            for ( i = 1; i < num_full_table_rows; ++i )
                    704:                /* jams are marked by negative of state number */
                    705:                mk2data( state[i] ? state[i] : -ds );
                    706: 
                    707:            /* force ',' and dataflush() next call to mk2data */
                    708:            datapos = NUMDATAITEMS;
                    709: 
                    710:            /* force extra blank line next dataflush() */
                    711:            dataline = NUMDATALINES;
                    712:            }
                    713: 
                    714:         else if ( fullspd )
                    715:            place_state( state, ds, totaltrans );
                    716: 
                    717:        else if ( ds == end_of_buffer_state )
                    718:            /* special case this state to make sure it does what it's
                    719:             * supposed to, i.e., jam on end-of-buffer
                    720:             */
                    721:            stack1( ds, 0, 0, JAMSTATE );
                    722: 
                    723:        else /* normal, compressed state */
                    724:            {
                    725:            /* determine which destination state is the most common, and
                    726:             * how many transitions to it there are
                    727:             */
                    728: 
                    729:            comfreq = 0;
                    730:            comstate = 0;
                    731: 
                    732:            for ( i = 1; i <= targptr; ++i )
                    733:                if ( targfreq[i] > comfreq )
                    734:                    {
                    735:                    comfreq = targfreq[i];
                    736:                    comstate = targstate[i];
                    737:                    }
                    738: 
                    739:            bldtbl( state, ds, totaltrans, comstate, comfreq );
                    740:            }
                    741:        }
                    742: 
                    743:     if ( fulltbl )
                    744:        dataend();
                    745: 
                    746:     else if ( ! fullspd )
                    747:        {
                    748:        cmptmps();  /* create compressed template entries */
                    749: 
                    750:        /* create tables for all the states with only one out-transition */
                    751:        while ( onesp > 0 )
                    752:            {
                    753:            mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp],
                    754:                    onedef[onesp] );
                    755:            --onesp;
                    756:            }
                    757: 
                    758:        mkdeftbl();
                    759:        }
                    760:     }
                    761: 
                    762: 
                    763: /* snstods - converts a set of ndfa states into a dfa state
                    764:  *
                    765:  * synopsis
                    766:  *    int sns[numstates], numstates, newds, accset[num_rules + 1], nacc, hashval;
                    767:  *    int snstods();
                    768:  *    is_new_state = snstods( sns, numstates, accset, nacc, hashval, &newds );
                    769:  *
                    770:  * on return, the dfa state number is in newds.
                    771:  */
                    772: 
                    773: int snstods( sns, numstates, accset, nacc, hashval, newds_addr )
                    774: int sns[], numstates, accset[], nacc, hashval, *newds_addr;
                    775: 
                    776:     {
                    777:     int didsort = 0;
                    778:     register int i, j;
                    779:     int newds, *oldsns;
                    780: 
                    781:     for ( i = 1; i <= lastdfa; ++i )
                    782:        if ( hashval == dhash[i] )
                    783:            {
                    784:            if ( numstates == dfasiz[i] )
                    785:                {
                    786:                oldsns = dss[i];
                    787: 
                    788:                if ( ! didsort )
                    789:                    {
                    790:                    /* we sort the states in sns so we can compare it to
                    791:                     * oldsns quickly.  we use bubble because there probably
                    792:                     * aren't very many states
                    793:                     */
                    794:                    bubble( sns, numstates );
                    795:                    didsort = 1;
                    796:                    }
                    797: 
                    798:                for ( j = 1; j <= numstates; ++j )
                    799:                    if ( sns[j] != oldsns[j] )
                    800:                        break;
                    801: 
                    802:                if ( j > numstates )
                    803:                    {
                    804:                    ++dfaeql;
                    805:                    *newds_addr = i;
                    806:                    return ( 0 );
                    807:                    }
                    808: 
                    809:                ++hshcol;
                    810:                }
                    811: 
                    812:            else
                    813:                ++hshsave;
                    814:            }
                    815: 
                    816:     /* make a new dfa */
                    817: 
                    818:     if ( ++lastdfa >= current_max_dfas )
                    819:        increase_max_dfas();
                    820: 
                    821:     newds = lastdfa;
                    822: 
                    823:     dss[newds] = (int *) malloc( (unsigned) ((numstates + 1) * sizeof( int )) );
                    824: 
                    825:     if ( ! dss[newds] )
                    826:        flexfatal( "dynamic memory failure in snstods()" );
                    827: 
                    828:     /* if we haven't already sorted the states in sns, we do so now, so that
                    829:      * future comparisons with it can be made quickly
                    830:      */
                    831: 
                    832:     if ( ! didsort )
                    833:        bubble( sns, numstates );
                    834: 
                    835:     for ( i = 1; i <= numstates; ++i )
                    836:        dss[newds][i] = sns[i];
                    837: 
                    838:     dfasiz[newds] = numstates;
                    839:     dhash[newds] = hashval;
                    840: 
                    841:     if ( nacc == 0 )
                    842:        {
                    843:        if ( reject )
                    844:            dfaacc[newds].dfaacc_set = (int *) 0;
                    845:        else
                    846:            dfaacc[newds].dfaacc_state = 0;
                    847: 
                    848:        accsiz[newds] = 0;
                    849:        }
                    850: 
                    851:     else if ( reject )
                    852:        {
                    853:        /* we sort the accepting set in increasing order so the disambiguating
                    854:         * rule that the first rule listed is considered match in the event of
                    855:         * ties will work.  We use a bubble sort since the list is probably
                    856:         * quite small.
                    857:         */
                    858: 
                    859:        bubble( accset, nacc );
                    860: 
                    861:        dfaacc[newds].dfaacc_set =
                    862:            (int *) malloc( (unsigned) ((nacc + 1) * sizeof( int )) );
                    863: 
                    864:        if ( ! dfaacc[newds].dfaacc_set )
                    865:            flexfatal( "dynamic memory failure in snstods()" );
                    866: 
                    867:        /* save the accepting set for later */
                    868:        for ( i = 1; i <= nacc; ++i )
                    869:            dfaacc[newds].dfaacc_set[i] = accset[i];
                    870: 
                    871:        accsiz[newds] = nacc;
                    872:        }
                    873: 
                    874:     else
                    875:        { /* find lowest numbered rule so the disambiguating rule will work */
                    876:        j = num_rules + 1;
                    877: 
                    878:        for ( i = 1; i <= nacc; ++i )
                    879:            if ( accset[i] < j )
                    880:                j = accset[i];
                    881: 
                    882:        dfaacc[newds].dfaacc_state = j;
                    883:        }
                    884: 
                    885:     *newds_addr = newds;
                    886: 
                    887:     return ( 1 );
                    888:     }
                    889: 
                    890: 
                    891: /* symfollowset - follow the symbol transitions one step
                    892:  *
                    893:  * synopsis
                    894:  *    int ds[current_max_dfa_size], dsize, transsym;
                    895:  *    int nset[current_max_dfa_size], numstates;
                    896:  *    numstates = symfollowset( ds, dsize, transsym, nset );
                    897:  */
                    898: 
                    899: int symfollowset( ds, dsize, transsym, nset )
                    900: int ds[], dsize, transsym, nset[];
                    901: 
                    902:     {
                    903:     int ns, tsp, sym, i, j, lenccl, ch, numstates;
                    904:     int ccllist;
                    905: 
                    906:     numstates = 0;
                    907: 
                    908:     for ( i = 1; i <= dsize; ++i )
                    909:        { /* for each nfa state ns in the state set of ds */
                    910:        ns = ds[i];
                    911:        sym = transchar[ns];
                    912:        tsp = trans1[ns];
                    913: 
                    914:        if ( sym < 0 )
                    915:            { /* it's a character class */
                    916:            sym = -sym;
                    917:            ccllist = cclmap[sym];
                    918:            lenccl = ccllen[sym];
                    919: 
                    920:            if ( cclng[sym] )
                    921:                {
                    922:                for ( j = 0; j < lenccl; ++j )
                    923:                    { /* loop through negated character class */
                    924:                    ch = ccltbl[ccllist + j];
                    925: 
                    926:                    if ( ch == 0 )
                    927:                        ch = NUL_ec;
                    928: 
                    929:                    if ( ch > transsym )
                    930:                        break;  /* transsym isn't in negated ccl */
                    931: 
                    932:                    else if ( ch == transsym )
                    933:                        /* next 2 */ goto bottom;
                    934:                    }
                    935: 
                    936:                /* didn't find transsym in ccl */
                    937:                nset[++numstates] = tsp;
                    938:                }
                    939: 
                    940:            else
                    941:                for ( j = 0; j < lenccl; ++j )
                    942:                    {
                    943:                    ch = ccltbl[ccllist + j];
                    944: 
                    945:                    if ( ch == 0 )
                    946:                        ch = NUL_ec;
                    947: 
                    948:                    if ( ch > transsym )
                    949:                        break;
                    950: 
                    951:                    else if ( ch == transsym )
                    952:                        {
                    953:                        nset[++numstates] = tsp;
                    954:                        break;
                    955:                        }
                    956:                    }
                    957:            }
                    958: 
                    959:        else if ( sym >= 'A' && sym <= 'Z' && caseins )
                    960:            flexfatal( "consistency check failed in symfollowset" );
                    961: 
                    962:        else if ( sym == SYM_EPSILON )
                    963:            { /* do nothing */
                    964:            }
                    965: 
                    966:        else if ( abs( ecgroup[sym] ) == transsym )
                    967:            nset[++numstates] = tsp;
                    968: 
                    969: bottom:
                    970:        ;
                    971:        }
                    972: 
                    973:     return ( numstates );
                    974:     }
                    975: 
                    976: 
                    977: /* sympartition - partition characters with same out-transitions
                    978:  *
                    979:  * synopsis
                    980:  *    integer ds[current_max_dfa_size], numstates, duplist[numecs];
                    981:  *    symlist[numecs];
                    982:  *    sympartition( ds, numstates, symlist, duplist );
                    983:  */
                    984: 
                    985: void sympartition( ds, numstates, symlist, duplist )
                    986: int ds[], numstates, duplist[];
                    987: int symlist[];
                    988: 
                    989:     {
                    990:     int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
                    991: 
                    992:     /* partitioning is done by creating equivalence classes for those
                    993:      * characters which have out-transitions from the given state.  Thus
                    994:      * we are really creating equivalence classes of equivalence classes.
                    995:      */
                    996: 
                    997:     for ( i = 1; i <= numecs; ++i )
                    998:        { /* initialize equivalence class list */
                    999:        duplist[i] = i - 1;
                   1000:        dupfwd[i] = i + 1;
                   1001:        }
                   1002: 
                   1003:     duplist[1] = NIL;
                   1004:     dupfwd[numecs] = NIL;
                   1005: 
                   1006:     for ( i = 1; i <= numstates; ++i )
                   1007:        {
                   1008:        ns = ds[i];
                   1009:        tch = transchar[ns];
                   1010: 
                   1011:        if ( tch != SYM_EPSILON )
                   1012:            {
                   1013:            if ( tch < -lastccl || tch > csize )
                   1014:                {
                   1015:                if ( tch > csize && tch <= CSIZE )
                   1016:                    flexerror( "scanner requires -8 flag" );
                   1017: 
                   1018:                else
                   1019:                    flexfatal(
                   1020:                        "bad transition character detected in sympartition()" );
                   1021:                }
                   1022: 
                   1023:            if ( tch >= 0 )
                   1024:                { /* character transition */
                   1025:                /* abs() needed for fake %t ec's */
                   1026:                int ec = abs( ecgroup[tch] );
                   1027: 
                   1028:                mkechar( ec, dupfwd, duplist );
                   1029:                symlist[ec] = 1;
                   1030:                }
                   1031: 
                   1032:            else
                   1033:                { /* character class */
                   1034:                tch = -tch;
                   1035: 
                   1036:                lenccl = ccllen[tch];
                   1037:                cclp = cclmap[tch];
                   1038:                mkeccl( ccltbl + cclp, lenccl, dupfwd, duplist, numecs,
                   1039:                        NUL_ec );
                   1040: 
                   1041:                if ( cclng[tch] )
                   1042:                    {
                   1043:                    j = 0;
                   1044: 
                   1045:                    for ( k = 0; k < lenccl; ++k )
                   1046:                        {
                   1047:                        ich = ccltbl[cclp + k];
                   1048: 
                   1049:                        if ( ich == 0 )
                   1050:                            ich = NUL_ec;
                   1051: 
                   1052:                        for ( ++j; j < ich; ++j )
                   1053:                            symlist[j] = 1;
                   1054:                        }
                   1055: 
                   1056:                    for ( ++j; j <= numecs; ++j )
                   1057:                        symlist[j] = 1;
                   1058:                    }
                   1059: 
                   1060:                else
                   1061:                    for ( k = 0; k < lenccl; ++k )
                   1062:                        {
                   1063:                        ich = ccltbl[cclp + k];
                   1064: 
                   1065:                        if ( ich == 0 )
                   1066:                            ich = NUL_ec;
                   1067: 
                   1068:                        symlist[ich] = 1;
                   1069:                        }
                   1070:                }
                   1071:            }
                   1072:        }
                   1073:     }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.