Annotation of 43BSDReno/pgrm/lex/ecs.c, revision 1.1.1.1

1.1       root        1: /*-
                      2:  * Copyright (c) 1990 The Regents of the University of California.
                      3:  * All rights reserved.
                      4:  *
                      5:  * This code is derived from software contributed to Berkeley by
                      6:  * Vern Paxson.
                      7:  * 
                      8:  * The United States Government has rights in this work pursuant
                      9:  * to contract no. DE-AC03-76SF00098 between the United States
                     10:  * Department of Energy and the University of California.
                     11:  *
                     12:  * Redistribution and use in source and binary forms are permitted provided
                     13:  * that: (1) source distributions retain this entire copyright notice and
                     14:  * comment, and (2) distributions including binaries display the following
                     15:  * acknowledgement:  ``This product includes software developed by the
                     16:  * University of California, Berkeley and its contributors'' in the
                     17:  * documentation or other materials provided with the distribution and in
                     18:  * all advertising materials mentioning features or use of this software.
                     19:  * Neither the name of the University nor the names of its contributors may
                     20:  * be used to endorse or promote products derived from this software without
                     21:  * specific prior written permission.
                     22:  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
                     23:  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
                     24:  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
                     25:  */
                     26: 
                     27: #ifndef lint
                     28: static char sccsid[] = "@(#)ecs.c      5.2 (Berkeley) 6/18/90";
                     29: #endif /* not lint */
                     30: 
                     31: /* ecs - equivalence class routines */
                     32: 
                     33: #include "flexdef.h"
                     34: 
                     35: /* ccl2ecl - convert character classes to set of equivalence classes
                     36:  *
                     37:  * synopsis
                     38:  *    ccl2ecl();
                     39:  */
                     40: 
                     41: void ccl2ecl()
                     42: 
                     43:     {
                     44:     int i, ich, newlen, cclp, ccls, cclmec;
                     45: 
                     46:     for ( i = 1; i <= lastccl; ++i )
                     47:        {
                     48:        /* we loop through each character class, and for each character
                     49:         * in the class, add the character's equivalence class to the
                     50:         * new "character" class we are creating.  Thus when we are all
                     51:         * done, character classes will really consist of collections
                     52:         * of equivalence classes
                     53:         */
                     54: 
                     55:        newlen = 0;
                     56:        cclp = cclmap[i];
                     57: 
                     58:        for ( ccls = 0; ccls < ccllen[i]; ++ccls )
                     59:            {
                     60:            ich = ccltbl[cclp + ccls];
                     61:            cclmec = ecgroup[ich];
                     62: 
                     63:            if ( xlation && cclmec < 0 )
                     64:                {
                     65:                /* special hack--if we're doing %t tables then it's
                     66:                 * possible that no representative of this character's
                     67:                 * equivalence class is in the ccl.  So waiting till
                     68:                 * we see the representative would be disastrous.  Instead,
                     69:                 * we add this character's equivalence class anyway, if it's
                     70:                 * not already present.
                     71:                 */
                     72:                int j;
                     73: 
                     74:                /* this loop makes this whole process n^2; but we don't
                     75:                 * really care about %t performance anyway
                     76:                 */
                     77:                for ( j = 0; j < newlen; ++j )
                     78:                    if ( ccltbl[cclp + j] == -cclmec )
                     79:                        break;
                     80: 
                     81:                if ( j >= newlen )
                     82:                    { /* no representative yet, add this one in */
                     83:                    ccltbl[cclp + newlen] = -cclmec;
                     84:                    ++newlen;
                     85:                    }
                     86:                }
                     87: 
                     88:            else if ( cclmec > 0 )
                     89:                {
                     90:                ccltbl[cclp + newlen] = cclmec;
                     91:                ++newlen;
                     92:                }
                     93:            }
                     94: 
                     95:        ccllen[i] = newlen;
                     96:        }
                     97:     }
                     98: 
                     99: 
                    100: /* cre8ecs - associate equivalence class numbers with class members
                    101:  *
                    102:  * synopsis
                    103:  *    int cre8ecs();
                    104:  *    number of classes = cre8ecs( fwd, bck, num );
                    105:  *
                    106:  *  fwd is the forward linked-list of equivalence class members.  bck
                    107:  *  is the backward linked-list, and num is the number of class members.
                    108:  *
                    109:  *  Returned is the number of classes.
                    110:  */
                    111: 
                    112: int cre8ecs( fwd, bck, num )
                    113: int fwd[], bck[], num;
                    114: 
                    115:     {
                    116:     int i, j, numcl;
                    117: 
                    118:     numcl = 0;
                    119: 
                    120:     /* create equivalence class numbers.  From now on, abs( bck(x) )
                    121:      * is the equivalence class number for object x.  If bck(x)
                    122:      * is positive, then x is the representative of its equivalence
                    123:      * class.
                    124:      */
                    125:     for ( i = 1; i <= num; ++i )
                    126:        if ( bck[i] == NIL )
                    127:            {
                    128:            bck[i] = ++numcl;
                    129:            for ( j = fwd[i]; j != NIL; j = fwd[j] )
                    130:                bck[j] = -numcl;
                    131:            }
                    132: 
                    133:     return ( numcl );
                    134:     }
                    135: 
                    136: 
                    137: /* ecs_from_xlation - associate equivalence class numbers using %t table
                    138:  *
                    139:  * synopsis
                    140:  *    numecs = ecs_from_xlation( ecmap );
                    141:  *
                    142:  *  Upon return, ecmap will map each character code to its equivalence
                    143:  *  class.  The mapping will be positive if the character is the representative
                    144:  *  of its class, negative otherwise.
                    145:  *
                    146:  *  Returns the number of equivalence classes used.
                    147:  */
                    148: 
                    149: int ecs_from_xlation( ecmap )
                    150: int ecmap[];
                    151: 
                    152:     {
                    153:     int i;
                    154:     int nul_is_alone = false;
                    155:     int did_default_xlation_class = false;
                    156: 
                    157:     if ( xlation[0] != 0 )
                    158:        {
                    159:        /* if NUL shares its translation with other characters, choose one
                    160:         * of the other characters as the representative for the equivalence
                    161:         * class.  This allows a cheap test later to see whether we can
                    162:         * do away with NUL's equivalence class.
                    163:         */
                    164:        for ( i = 1; i < csize; ++i )
                    165:            if ( xlation[i] == -xlation[0] )
                    166:                {
                    167:                xlation[i] = xlation[0];
                    168:                ecmap[0] = -xlation[0];
                    169:                break;
                    170:                }
                    171: 
                    172:        if ( i >= csize )
                    173:            /* didn't find a companion character--remember this fact */
                    174:            nul_is_alone = true;
                    175:        }
                    176: 
                    177:     for ( i = 1; i < csize; ++i )
                    178:        if ( xlation[i] == 0 )
                    179:            {
                    180:            if ( did_default_xlation_class )
                    181:                ecmap[i] = -num_xlations;
                    182:            
                    183:            else
                    184:                {
                    185:                /* make an equivalence class for those characters not
                    186:                 * specified in the %t table
                    187:                 */
                    188:                ++num_xlations;
                    189:                ecmap[i] = num_xlations;
                    190:                did_default_xlation_class = true;
                    191:                }
                    192:            }
                    193: 
                    194:        else
                    195:            ecmap[i] = xlation[i];
                    196: 
                    197:     if ( nul_is_alone )
                    198:        /* force NUL's equivalence class to be the last one */
                    199:        {
                    200:        ++num_xlations;
                    201:        ecmap[0] = num_xlations;
                    202: 
                    203:        /* there's actually a bug here: if someone is fanatic enough to
                    204:         * put every character in its own translation class, then right
                    205:         * now we just promoted NUL's equivalence class to be csize + 1;
                    206:         * we can handle NUL's class number being == csize (by instead
                    207:         * putting it in its own table), but we can't handle some *other*
                    208:         * character having to be put in its own table, too.  So in
                    209:         * this case we bail out.
                    210:         */
                    211:        if ( num_xlations > csize )
                    212:            flexfatal( "too many %t classes!" );
                    213:        }
                    214: 
                    215:     return num_xlations;
                    216:     }
                    217: 
                    218: 
                    219: /* mkeccl - update equivalence classes based on character class xtions
                    220:  *
                    221:  * synopsis
                    222:  *    Char ccls[];
                    223:  *    int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping;
                    224:  *    mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping );
                    225:  *
                    226:  * where ccls contains the elements of the character class, lenccl is the
                    227:  * number of elements in the ccl, fwd is the forward link-list of equivalent
                    228:  * characters, bck is the backward link-list, and llsiz size of the link-list
                    229:  *
                    230:  * NUL_mapping is the value which NUL (0) should be mapped to.
                    231:  */
                    232: 
                    233: void mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping )
                    234: Char ccls[];
                    235: int lenccl, fwd[], bck[], llsiz, NUL_mapping;
                    236: 
                    237:     {
                    238:     int cclp, oldec, newec;
                    239:     int cclm, i, j;
                    240:     static unsigned char cclflags[CSIZE];      /* initialized to all '\0' */
                    241: 
                    242:     /* note that it doesn't matter whether or not the character class is
                    243:      * negated.  The same results will be obtained in either case.
                    244:      */
                    245: 
                    246:     cclp = 0;
                    247: 
                    248:     while ( cclp < lenccl )
                    249:        {
                    250:        cclm = ccls[cclp];
                    251: 
                    252:        if ( NUL_mapping && cclm == 0 )
                    253:            cclm = NUL_mapping;
                    254: 
                    255:        oldec = bck[cclm];
                    256:        newec = cclm;
                    257: 
                    258:        j = cclp + 1;
                    259: 
                    260:        for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] )
                    261:            { /* look for the symbol in the character class */
                    262:            for ( ; j < lenccl; ++j )
                    263:                {
                    264:                register int ccl_char;
                    265: 
                    266:                if ( NUL_mapping && ccls[j] == 0 )
                    267:                    ccl_char = NUL_mapping;
                    268:                else
                    269:                    ccl_char = ccls[j];
                    270: 
                    271:                if ( ccl_char > i )
                    272:                    break;
                    273: 
                    274:                if ( ccl_char == i && ! cclflags[j] )
                    275:                    {
                    276:                    /* we found an old companion of cclm in the ccl.
                    277:                     * link it into the new equivalence class and flag it as
                    278:                     * having been processed
                    279:                     */
                    280: 
                    281:                    bck[i] = newec;
                    282:                    fwd[newec] = i;
                    283:                    newec = i;
                    284:                    cclflags[j] = 1;    /* set flag so we don't reprocess */
                    285: 
                    286:                    /* get next equivalence class member */
                    287:                    /* continue 2 */
                    288:                    goto next_pt;
                    289:                    }
                    290:                }
                    291: 
                    292:            /* symbol isn't in character class.  Put it in the old equivalence
                    293:             * class
                    294:             */
                    295: 
                    296:            bck[i] = oldec;
                    297: 
                    298:            if ( oldec != NIL )
                    299:                fwd[oldec] = i;
                    300: 
                    301:            oldec = i;
                    302: next_pt:
                    303:            ;
                    304:            }
                    305: 
                    306:        if ( bck[cclm] != NIL || oldec != bck[cclm] )
                    307:            {
                    308:            bck[cclm] = NIL;
                    309:            fwd[oldec] = NIL;
                    310:            }
                    311: 
                    312:        fwd[newec] = NIL;
                    313: 
                    314:        /* find next ccl member to process */
                    315: 
                    316:        for ( ++cclp; cclflags[cclp] && cclp < lenccl; ++cclp )
                    317:            {
                    318:            /* reset "doesn't need processing" flag */
                    319:            cclflags[cclp] = 0;
                    320:            }
                    321:        }
                    322:     }
                    323: 
                    324: 
                    325: /* mkechar - create equivalence class for single character
                    326:  *
                    327:  * synopsis
                    328:  *    int tch, fwd[], bck[];
                    329:  *    mkechar( tch, fwd, bck );
                    330:  */
                    331: 
                    332: void mkechar( tch, fwd, bck )
                    333: int tch, fwd[], bck[];
                    334: 
                    335:     {
                    336:     /* if until now the character has been a proper subset of
                    337:      * an equivalence class, break it away to create a new ec
                    338:      */
                    339: 
                    340:     if ( fwd[tch] != NIL )
                    341:        bck[fwd[tch]] = bck[tch];
                    342: 
                    343:     if ( bck[tch] != NIL )
                    344:        fwd[bck[tch]] = fwd[tch];
                    345: 
                    346:     fwd[tch] = NIL;
                    347:     bck[tch] = NIL;
                    348:     }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.