Annotation of 43BSDReno/pgrm/lex/ecs.c, revision 1.1

1.1     ! root        1: /*-
        !             2:  * Copyright (c) 1990 The Regents of the University of California.
        !             3:  * All rights reserved.
        !             4:  *
        !             5:  * This code is derived from software contributed to Berkeley by
        !             6:  * Vern Paxson.
        !             7:  * 
        !             8:  * The United States Government has rights in this work pursuant
        !             9:  * to contract no. DE-AC03-76SF00098 between the United States
        !            10:  * Department of Energy and the University of California.
        !            11:  *
        !            12:  * Redistribution and use in source and binary forms are permitted provided
        !            13:  * that: (1) source distributions retain this entire copyright notice and
        !            14:  * comment, and (2) distributions including binaries display the following
        !            15:  * acknowledgement:  ``This product includes software developed by the
        !            16:  * University of California, Berkeley and its contributors'' in the
        !            17:  * documentation or other materials provided with the distribution and in
        !            18:  * all advertising materials mentioning features or use of this software.
        !            19:  * Neither the name of the University nor the names of its contributors may
        !            20:  * be used to endorse or promote products derived from this software without
        !            21:  * specific prior written permission.
        !            22:  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
        !            23:  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
        !            24:  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
        !            25:  */
        !            26: 
        !            27: #ifndef lint
        !            28: static char sccsid[] = "@(#)ecs.c      5.2 (Berkeley) 6/18/90";
        !            29: #endif /* not lint */
        !            30: 
        !            31: /* ecs - equivalence class routines */
        !            32: 
        !            33: #include "flexdef.h"
        !            34: 
        !            35: /* ccl2ecl - convert character classes to set of equivalence classes
        !            36:  *
        !            37:  * synopsis
        !            38:  *    ccl2ecl();
        !            39:  */
        !            40: 
        !            41: void ccl2ecl()
        !            42: 
        !            43:     {
        !            44:     int i, ich, newlen, cclp, ccls, cclmec;
        !            45: 
        !            46:     for ( i = 1; i <= lastccl; ++i )
        !            47:        {
        !            48:        /* we loop through each character class, and for each character
        !            49:         * in the class, add the character's equivalence class to the
        !            50:         * new "character" class we are creating.  Thus when we are all
        !            51:         * done, character classes will really consist of collections
        !            52:         * of equivalence classes
        !            53:         */
        !            54: 
        !            55:        newlen = 0;
        !            56:        cclp = cclmap[i];
        !            57: 
        !            58:        for ( ccls = 0; ccls < ccllen[i]; ++ccls )
        !            59:            {
        !            60:            ich = ccltbl[cclp + ccls];
        !            61:            cclmec = ecgroup[ich];
        !            62: 
        !            63:            if ( xlation && cclmec < 0 )
        !            64:                {
        !            65:                /* special hack--if we're doing %t tables then it's
        !            66:                 * possible that no representative of this character's
        !            67:                 * equivalence class is in the ccl.  So waiting till
        !            68:                 * we see the representative would be disastrous.  Instead,
        !            69:                 * we add this character's equivalence class anyway, if it's
        !            70:                 * not already present.
        !            71:                 */
        !            72:                int j;
        !            73: 
        !            74:                /* this loop makes this whole process n^2; but we don't
        !            75:                 * really care about %t performance anyway
        !            76:                 */
        !            77:                for ( j = 0; j < newlen; ++j )
        !            78:                    if ( ccltbl[cclp + j] == -cclmec )
        !            79:                        break;
        !            80: 
        !            81:                if ( j >= newlen )
        !            82:                    { /* no representative yet, add this one in */
        !            83:                    ccltbl[cclp + newlen] = -cclmec;
        !            84:                    ++newlen;
        !            85:                    }
        !            86:                }
        !            87: 
        !            88:            else if ( cclmec > 0 )
        !            89:                {
        !            90:                ccltbl[cclp + newlen] = cclmec;
        !            91:                ++newlen;
        !            92:                }
        !            93:            }
        !            94: 
        !            95:        ccllen[i] = newlen;
        !            96:        }
        !            97:     }
        !            98: 
        !            99: 
        !           100: /* cre8ecs - associate equivalence class numbers with class members
        !           101:  *
        !           102:  * synopsis
        !           103:  *    int cre8ecs();
        !           104:  *    number of classes = cre8ecs( fwd, bck, num );
        !           105:  *
        !           106:  *  fwd is the forward linked-list of equivalence class members.  bck
        !           107:  *  is the backward linked-list, and num is the number of class members.
        !           108:  *
        !           109:  *  Returned is the number of classes.
        !           110:  */
        !           111: 
        !           112: int cre8ecs( fwd, bck, num )
        !           113: int fwd[], bck[], num;
        !           114: 
        !           115:     {
        !           116:     int i, j, numcl;
        !           117: 
        !           118:     numcl = 0;
        !           119: 
        !           120:     /* create equivalence class numbers.  From now on, abs( bck(x) )
        !           121:      * is the equivalence class number for object x.  If bck(x)
        !           122:      * is positive, then x is the representative of its equivalence
        !           123:      * class.
        !           124:      */
        !           125:     for ( i = 1; i <= num; ++i )
        !           126:        if ( bck[i] == NIL )
        !           127:            {
        !           128:            bck[i] = ++numcl;
        !           129:            for ( j = fwd[i]; j != NIL; j = fwd[j] )
        !           130:                bck[j] = -numcl;
        !           131:            }
        !           132: 
        !           133:     return ( numcl );
        !           134:     }
        !           135: 
        !           136: 
        !           137: /* ecs_from_xlation - associate equivalence class numbers using %t table
        !           138:  *
        !           139:  * synopsis
        !           140:  *    numecs = ecs_from_xlation( ecmap );
        !           141:  *
        !           142:  *  Upon return, ecmap will map each character code to its equivalence
        !           143:  *  class.  The mapping will be positive if the character is the representative
        !           144:  *  of its class, negative otherwise.
        !           145:  *
        !           146:  *  Returns the number of equivalence classes used.
        !           147:  */
        !           148: 
        !           149: int ecs_from_xlation( ecmap )
        !           150: int ecmap[];
        !           151: 
        !           152:     {
        !           153:     int i;
        !           154:     int nul_is_alone = false;
        !           155:     int did_default_xlation_class = false;
        !           156: 
        !           157:     if ( xlation[0] != 0 )
        !           158:        {
        !           159:        /* if NUL shares its translation with other characters, choose one
        !           160:         * of the other characters as the representative for the equivalence
        !           161:         * class.  This allows a cheap test later to see whether we can
        !           162:         * do away with NUL's equivalence class.
        !           163:         */
        !           164:        for ( i = 1; i < csize; ++i )
        !           165:            if ( xlation[i] == -xlation[0] )
        !           166:                {
        !           167:                xlation[i] = xlation[0];
        !           168:                ecmap[0] = -xlation[0];
        !           169:                break;
        !           170:                }
        !           171: 
        !           172:        if ( i >= csize )
        !           173:            /* didn't find a companion character--remember this fact */
        !           174:            nul_is_alone = true;
        !           175:        }
        !           176: 
        !           177:     for ( i = 1; i < csize; ++i )
        !           178:        if ( xlation[i] == 0 )
        !           179:            {
        !           180:            if ( did_default_xlation_class )
        !           181:                ecmap[i] = -num_xlations;
        !           182:            
        !           183:            else
        !           184:                {
        !           185:                /* make an equivalence class for those characters not
        !           186:                 * specified in the %t table
        !           187:                 */
        !           188:                ++num_xlations;
        !           189:                ecmap[i] = num_xlations;
        !           190:                did_default_xlation_class = true;
        !           191:                }
        !           192:            }
        !           193: 
        !           194:        else
        !           195:            ecmap[i] = xlation[i];
        !           196: 
        !           197:     if ( nul_is_alone )
        !           198:        /* force NUL's equivalence class to be the last one */
        !           199:        {
        !           200:        ++num_xlations;
        !           201:        ecmap[0] = num_xlations;
        !           202: 
        !           203:        /* there's actually a bug here: if someone is fanatic enough to
        !           204:         * put every character in its own translation class, then right
        !           205:         * now we just promoted NUL's equivalence class to be csize + 1;
        !           206:         * we can handle NUL's class number being == csize (by instead
        !           207:         * putting it in its own table), but we can't handle some *other*
        !           208:         * character having to be put in its own table, too.  So in
        !           209:         * this case we bail out.
        !           210:         */
        !           211:        if ( num_xlations > csize )
        !           212:            flexfatal( "too many %t classes!" );
        !           213:        }
        !           214: 
        !           215:     return num_xlations;
        !           216:     }
        !           217: 
        !           218: 
        !           219: /* mkeccl - update equivalence classes based on character class xtions
        !           220:  *
        !           221:  * synopsis
        !           222:  *    Char ccls[];
        !           223:  *    int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping;
        !           224:  *    mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping );
        !           225:  *
        !           226:  * where ccls contains the elements of the character class, lenccl is the
        !           227:  * number of elements in the ccl, fwd is the forward link-list of equivalent
        !           228:  * characters, bck is the backward link-list, and llsiz size of the link-list
        !           229:  *
        !           230:  * NUL_mapping is the value which NUL (0) should be mapped to.
        !           231:  */
        !           232: 
        !           233: void mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping )
        !           234: Char ccls[];
        !           235: int lenccl, fwd[], bck[], llsiz, NUL_mapping;
        !           236: 
        !           237:     {
        !           238:     int cclp, oldec, newec;
        !           239:     int cclm, i, j;
        !           240:     static unsigned char cclflags[CSIZE];      /* initialized to all '\0' */
        !           241: 
        !           242:     /* note that it doesn't matter whether or not the character class is
        !           243:      * negated.  The same results will be obtained in either case.
        !           244:      */
        !           245: 
        !           246:     cclp = 0;
        !           247: 
        !           248:     while ( cclp < lenccl )
        !           249:        {
        !           250:        cclm = ccls[cclp];
        !           251: 
        !           252:        if ( NUL_mapping && cclm == 0 )
        !           253:            cclm = NUL_mapping;
        !           254: 
        !           255:        oldec = bck[cclm];
        !           256:        newec = cclm;
        !           257: 
        !           258:        j = cclp + 1;
        !           259: 
        !           260:        for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] )
        !           261:            { /* look for the symbol in the character class */
        !           262:            for ( ; j < lenccl; ++j )
        !           263:                {
        !           264:                register int ccl_char;
        !           265: 
        !           266:                if ( NUL_mapping && ccls[j] == 0 )
        !           267:                    ccl_char = NUL_mapping;
        !           268:                else
        !           269:                    ccl_char = ccls[j];
        !           270: 
        !           271:                if ( ccl_char > i )
        !           272:                    break;
        !           273: 
        !           274:                if ( ccl_char == i && ! cclflags[j] )
        !           275:                    {
        !           276:                    /* we found an old companion of cclm in the ccl.
        !           277:                     * link it into the new equivalence class and flag it as
        !           278:                     * having been processed
        !           279:                     */
        !           280: 
        !           281:                    bck[i] = newec;
        !           282:                    fwd[newec] = i;
        !           283:                    newec = i;
        !           284:                    cclflags[j] = 1;    /* set flag so we don't reprocess */
        !           285: 
        !           286:                    /* get next equivalence class member */
        !           287:                    /* continue 2 */
        !           288:                    goto next_pt;
        !           289:                    }
        !           290:                }
        !           291: 
        !           292:            /* symbol isn't in character class.  Put it in the old equivalence
        !           293:             * class
        !           294:             */
        !           295: 
        !           296:            bck[i] = oldec;
        !           297: 
        !           298:            if ( oldec != NIL )
        !           299:                fwd[oldec] = i;
        !           300: 
        !           301:            oldec = i;
        !           302: next_pt:
        !           303:            ;
        !           304:            }
        !           305: 
        !           306:        if ( bck[cclm] != NIL || oldec != bck[cclm] )
        !           307:            {
        !           308:            bck[cclm] = NIL;
        !           309:            fwd[oldec] = NIL;
        !           310:            }
        !           311: 
        !           312:        fwd[newec] = NIL;
        !           313: 
        !           314:        /* find next ccl member to process */
        !           315: 
        !           316:        for ( ++cclp; cclflags[cclp] && cclp < lenccl; ++cclp )
        !           317:            {
        !           318:            /* reset "doesn't need processing" flag */
        !           319:            cclflags[cclp] = 0;
        !           320:            }
        !           321:        }
        !           322:     }
        !           323: 
        !           324: 
        !           325: /* mkechar - create equivalence class for single character
        !           326:  *
        !           327:  * synopsis
        !           328:  *    int tch, fwd[], bck[];
        !           329:  *    mkechar( tch, fwd, bck );
        !           330:  */
        !           331: 
        !           332: void mkechar( tch, fwd, bck )
        !           333: int tch, fwd[], bck[];
        !           334: 
        !           335:     {
        !           336:     /* if until now the character has been a proper subset of
        !           337:      * an equivalence class, break it away to create a new ec
        !           338:      */
        !           339: 
        !           340:     if ( fwd[tch] != NIL )
        !           341:        bck[fwd[tch]] = bck[tch];
        !           342: 
        !           343:     if ( bck[tch] != NIL )
        !           344:        fwd[bck[tch]] = fwd[tch];
        !           345: 
        !           346:     fwd[tch] = NIL;
        !           347:     bck[tch] = NIL;
        !           348:     }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.