43BSDReno/lib/libc/gen/regexp/regexp.c - annotate

Return to regexp.c CVS log
Up to [CSRG BSD Unix] / 43BSDReno / lib / libc / gen / regexp
Annotation of 43BSDReno/lib/libc/gen/regexp/regexp.c, revision 1.1

1.1     ! root        1: /*
        !             2:  * regcomp and regexec -- regsub and regerror are elsewhere
        !             3:  *
        !             4:  *     Copyright (c) 1986 by University of Toronto.
        !             5:  *     Written by Henry Spencer.  Not derived from licensed software.
        !             6:  *
        !             7:  *     Permission is granted to anyone to use this software for any
        !             8:  *     purpose on any computer system, and to redistribute it freely,
        !             9:  *     subject to the following restrictions:
        !            10:  *
        !            11:  *     1. The author is not responsible for the consequences of use of
        !            12:  *             this software, no matter how awful, even if they arise
        !            13:  *             from defects in it.
        !            14:  *
        !            15:  *     2. The origin of this software must not be misrepresented, either
        !            16:  *             by explicit claim or by omission.
        !            17:  *
        !            18:  *     3. Altered versions must be plainly marked as such, and must not
        !            19:  *             be misrepresented as being the original software.
        !            20:  *** THIS IS AN ALTERED VERSION.  It was altered by John Gilmore,
        !            21:  *** hoptoad!gnu, on 27 Dec 1986, to add \n as an alternative to |
        !            22:  *** to assist in implementing egrep.
        !            23:  *** THIS IS AN ALTERED VERSION.  It was altered by John Gilmore,
        !            24:  *** hoptoad!gnu, on 27 Dec 1986, to add \< and \> for word-matching
        !            25:  *** as in BSD grep and ex.
        !            26:  *** THIS IS AN ALTERED VERSION.  It was altered by John Gilmore,
        !            27:  *** hoptoad!gnu, on 28 Dec 1986, to optimize characters quoted with \.
        !            28:  *** THIS IS AN ALTERED VERSION.  It was altered by James A. Woods,
        !            29:  *** ames!jaw, on 19 June 1987, to quash a regcomp() redundancy.
        !            30:  *
        !            31:  * Beware that some of this code is subtly aware of the way operator
        !            32:  * precedence is structured in regular expressions.  Serious changes in
        !            33:  * regular-expression syntax might require a total rethink.
        !            34:  */
        !            35: #include <stdio.h>
        !            36: #include <ctype.h>
        !            37: #include <regexp.h>
        !            38: #include "regmagic.h"
        !            39: 
        !            40: /*
        !            41:  * The "internal use only" fields in regexp.h are present to pass info from
        !            42:  * compile to execute that permits the execute phase to run lots faster on
        !            43:  * simple cases.  They are:
        !            44:  *
        !            45:  * regstart    char that must begin a match; '\0' if none obvious
        !            46:  * reganch     is the match anchored (at beginning-of-line only)?
        !            47:  * regmust     string (pointer into program) that match must include, or NULL
        !            48:  * regmlen     length of regmust string
        !            49:  *
        !            50:  * Regstart and reganch permit very fast decisions on suitable starting points
        !            51:  * for a match, cutting down the work a lot.  Regmust permits fast rejection
        !            52:  * of lines that cannot possibly match.  The regmust tests are costly enough
        !            53:  * that regcomp() supplies a regmust only if the r.e. contains something
        !            54:  * potentially expensive (at present, the only such thing detected is * or +
        !            55:  * at the start of the r.e., which can involve a lot of backup).  Regmlen is
        !            56:  * supplied because the test in regexec() needs it and regcomp() is computing
        !            57:  * it anyway.
        !            58:  */
        !            59: 
        !            60: /*
        !            61:  * Structure for regexp "program".  This is essentially a linear encoding
        !            62:  * of a nondeterministic finite-state machine (aka syntax charts or
        !            63:  * "railroad normal form" in parsing technology).  Each node is an opcode
        !            64:  * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
        !            65:  * all nodes except BRANCH implement concatenation; a "next" pointer with
        !            66:  * a BRANCH on both ends of it is connecting two alternatives.  (Here we
        !            67:  * have one of the subtle syntax dependencies:  an individual BRANCH (as
        !            68:  * opposed to a collection of them) is never concatenated with anything
        !            69:  * because of operator precedence.)  The operand of some types of node is
        !            70:  * a literal string; for others, it is a node leading into a sub-FSM.  In
        !            71:  * particular, the operand of a BRANCH node is the first node of the branch.
        !            72:  * (NB this is *not* a tree structure:  the tail of the branch connects
        !            73:  * to the thing following the set of BRANCHes.)  The opcodes are:
        !            74:  */
        !            75: 
        !            76: /* definition  number  opnd?   meaning */
        !            77: #define        END     0       /* no   End of program. */
        !            78: #define        BOL     1       /* no   Match "" at beginning of line. */
        !            79: #define        EOL     2       /* no   Match "" at end of line. */
        !            80: #define        ANY     3       /* no   Match any one character. */
        !            81: #define        ANYOF   4       /* str  Match any character in this string. */
        !            82: #define        ANYBUT  5       /* str  Match any character not in this string. */
        !            83: #define        BRANCH  6       /* node Match this alternative, or the next... */
        !            84: #define        BACK    7       /* no   Match "", "next" ptr points backward. */
        !            85: #define        EXACTLY 8       /* str  Match this string. */
        !            86: #define        NOTHING 9       /* no   Match empty string. */
        !            87: #define        STAR    10      /* node Match this (simple) thing 0 or more times. */
        !            88: #define        PLUS    11      /* node Match this (simple) thing 1 or more times. */
        !            89: #define        WORDA   12      /* no   Match "" at wordchar, where prev is nonword */
        !            90: #define        WORDZ   13      /* no   Match "" at nonwordchar, where prev is word */
        !            91: #define        OPEN    20      /* no   Mark this point in input as start of #n. */
        !            92:                        /*      OPEN+1 is number 1, etc. */
        !            93: #define        CLOSE   30      /* no   Analogous to OPEN. */
        !            94: 
        !            95: /*
        !            96:  * Opcode notes:
        !            97:  *
        !            98:  * BRANCH      The set of branches constituting a single choice are hooked
        !            99:  *             together with their "next" pointers, since precedence prevents
        !           100:  *             anything being concatenated to any individual branch.  The
        !           101:  *             "next" pointer of the last BRANCH in a choice points to the
        !           102:  *             thing following the whole choice.  This is also where the
        !           103:  *             final "next" pointer of each individual branch points; each
        !           104:  *             branch starts with the operand node of a BRANCH node.
        !           105:  *
        !           106:  * BACK                Normal "next" pointers all implicitly point forward; BACK
        !           107:  *             exists to make loop structures possible.
        !           108:  *
        !           109:  * STAR,PLUS   '?', and complex '*' and '+', are implemented as circular
        !           110:  *             BRANCH structures using BACK.  Simple cases (one character
        !           111:  *             per match) are implemented with STAR and PLUS for speed
        !           112:  *             and to minimize recursive plunges.
        !           113:  *
        !           114:  * OPEN,CLOSE  ...are numbered at compile time.
        !           115:  */
        !           116: 
        !           117: /*
        !           118:  * A node is one char of opcode followed by two chars of "next" pointer.
        !           119:  * "Next" pointers are stored as two 8-bit pieces, high order first.  The
        !           120:  * value is a positive offset from the opcode of the node containing it.
        !           121:  * An operand, if any, simply follows the node.  (Note that much of the
        !           122:  * code generation knows about this implicit relationship.)
        !           123:  *
        !           124:  * Using two bytes for the "next" pointer is vast overkill for most things,
        !           125:  * but allows patterns to get big without disasters.
        !           126:  */
        !           127: #define        OP(p)   (*(p))
        !           128: #define        NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
        !           129: #define        OPERAND(p)      ((p) + 3)
        !           130: 
        !           131: /*
        !           132:  * See regmagic.h for one further detail of program structure.
        !           133:  */
        !           134: 
        !           135: 
        !           136: /*
        !           137:  * Utility definitions.
        !           138:  */
        !           139: #ifndef CHARBITS
        !           140: #define        UCHARAT(p)      ((int)*(unsigned char *)(p))
        !           141: #else
        !           142: #define        UCHARAT(p)      ((int)*(p)&CHARBITS)
        !           143: #endif
        !           144: 
        !           145: #define        FAIL(m) { regerror(m); return(NULL); }
        !           146: #define        ISMULT(c)       ((c) == '*' || (c) == '+' || (c) == '?')
        !           147: 
        !           148: /*
        !           149:  * Flags to be passed up and down.
        !           150:  */
        !           151: #define        HASWIDTH        01      /* Known never to match null string. */
        !           152: #define        SIMPLE          02      /* Simple enough to be STAR/PLUS operand. */
        !           153: #define        SPSTART         04      /* Starts with * or +. */
        !           154: #define        WORST           0       /* Worst case. */
        !           155: 
        !           156: /*
        !           157:  * Global work variables for regcomp().
        !           158:  */
        !           159: static char *regparse;         /* Input-scan pointer. */
        !           160: static int regnpar;            /* () count. */
        !           161: static char regdummy;
        !           162: static char *regcode;          /* Code-emit pointer; &regdummy = don't. */
        !           163: static long regsize;           /* Code size. */
        !           164: 
        !           165: /*
        !           166:  * Forward declarations for regcomp()'s friends.
        !           167:  */
        !           168: #ifndef STATIC
        !           169: #define        STATIC  static
        !           170: #endif
        !           171: STATIC char *reg();
        !           172: STATIC char *regbranch();
        !           173: STATIC char *regpiece();
        !           174: STATIC char *regatom();
        !           175: STATIC char *regnode();
        !           176: STATIC char *regnext();
        !           177: STATIC void regc();
        !           178: STATIC void reginsert();
        !           179: STATIC void regtail();
        !           180: STATIC void regoptail();
        !           181: #ifdef STRCSPN
        !           182: STATIC int strcspn();
        !           183: #endif
        !           184: 
        !           185: /*
        !           186:  - regcomp - compile a regular expression into internal code
        !           187:  *
        !           188:  * We can't allocate space until we know how big the compiled form will be,
        !           189:  * but we can't compile it (and thus know how big it is) until we've got a
        !           190:  * place to put the code.  So we cheat:  we compile it twice, once with code
        !           191:  * generation turned off and size counting turned on, and once "for real".
        !           192:  * This also means that we don't allocate space until we are sure that the
        !           193:  * thing really will compile successfully, and we never have to move the
        !           194:  * code and thus invalidate pointers into it.  (Note that it has to be in
        !           195:  * one piece because free() must be able to free it all.)
        !           196:  *
        !           197:  * Beware that the optimization-preparation code in here knows about some
        !           198:  * of the structure of the compiled regexp.
        !           199:  */
        !           200: regexp *
        !           201: regcomp(exp)
        !           202: char *exp;
        !           203: {
        !           204:        register regexp *r;
        !           205:        register char *scan;
        !           206:        register char *longest;
        !           207:        register int len;
        !           208:        int flags;
        !           209:        extern char *malloc();
        !           210: 
        !           211:        if (exp == NULL)
        !           212:                FAIL("NULL argument");
        !           213: 
        !           214:        /* First pass: determine size, legality. */
        !           215:        if (exp[0] == '.' && exp[1] == '*') exp += 2;  /* aid grep */
        !           216:        regparse = exp;
        !           217:        regnpar = 1;
        !           218:        regsize = 0L;
        !           219:        regcode = &regdummy;
        !           220:        regc(MAGIC);
        !           221:        if (reg(0, &flags) == NULL)
        !           222:                return(NULL);
        !           223: 
        !           224:        /* Small enough for pointer-storage convention? */
        !           225:        if (regsize >= 32767L)          /* Probably could be 65535L. */
        !           226:                FAIL("regexp too big");
        !           227: 
        !           228:        /* Allocate space. */
        !           229:        r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize);
        !           230:        if (r == NULL)
        !           231:                FAIL("out of space");
        !           232: 
        !           233:        /* Second pass: emit code. */
        !           234:        regparse = exp;
        !           235:        regnpar = 1;
        !           236:        regcode = r->program;
        !           237:        regc(MAGIC);
        !           238:        if (reg(0, &flags) == NULL)
        !           239:                return(NULL);
        !           240: 
        !           241:        /* Dig out information for optimizations. */
        !           242:        r->regstart = '\0';     /* Worst-case defaults. */
        !           243:        r->reganch = 0;
        !           244:        r->regmust = NULL;
        !           245:        r->regmlen = 0;
        !           246:        scan = r->program+1;                    /* First BRANCH. */
        !           247:        if (OP(regnext(scan)) == END) {         /* Only one top-level choice. */
        !           248:                scan = OPERAND(scan);
        !           249: 
        !           250:                /* Starting-point info. */
        !           251:                if (OP(scan) == EXACTLY)
        !           252:                        r->regstart = *OPERAND(scan);
        !           253:                else if (OP(scan) == BOL)
        !           254:                        r->reganch++;
        !           255: 
        !           256:                /*
        !           257:                 * If there's something expensive in the r.e., find the
        !           258:                 * longest literal string that must appear and make it the
        !           259:                 * regmust.  Resolve ties in favor of later strings, since
        !           260:                 * the regstart check works with the beginning of the r.e.
        !           261:                 * and avoiding duplication strengthens checking.  Not a
        !           262:                 * strong reason, but sufficient in the absence of others.
        !           263:                 */
        !           264:                if (flags&SPSTART) {
        !           265:                        longest = NULL;
        !           266:                        len = 0;
        !           267:                        for (; scan != NULL; scan = regnext(scan))
        !           268:                                if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
        !           269:                                        longest = OPERAND(scan);
        !           270:                                        len = strlen(OPERAND(scan));
        !           271:                                }
        !           272:                        r->regmust = longest;
        !           273:                        r->regmlen = len;
        !           274:                }
        !           275:        }
        !           276: 
        !           277:        return(r);
        !           278: }
        !           279: 
        !           280: /*
        !           281:  - reg - regular expression, i.e. main body or parenthesized thing
        !           282:  *
        !           283:  * Caller must absorb opening parenthesis.
        !           284:  *
        !           285:  * Combining parenthesis handling with the base level of regular expression
        !           286:  * is a trifle forced, but the need to tie the tails of the branches to what
        !           287:  * follows makes it hard to avoid.
        !           288:  */
        !           289: static char *
        !           290: reg(paren, flagp)
        !           291: int paren;                     /* Parenthesized? */
        !           292: int *flagp;
        !           293: {
        !           294:        register char *ret;
        !           295:        register char *br;
        !           296:        register char *ender;
        !           297:        register int parno;
        !           298:        int flags;
        !           299: 
        !           300:        *flagp = HASWIDTH;      /* Tentatively. */
        !           301: 
        !           302:        /* Make an OPEN node, if parenthesized. */
        !           303:        if (paren) {
        !           304:                if (regnpar >= NSUBEXP)
        !           305:                        FAIL("too many ()");
        !           306:                parno = regnpar;
        !           307:                regnpar++;
        !           308:                ret = regnode(OPEN+parno);
        !           309:        } else
        !           310:                ret = NULL;
        !           311: 
        !           312:        /* Pick up the branches, linking them together. */
        !           313:        br = regbranch(&flags);
        !           314:        if (br == NULL)
        !           315:                return(NULL);
        !           316:        if (ret != NULL)
        !           317:                regtail(ret, br);       /* OPEN -> first. */
        !           318:        else
        !           319:                ret = br;
        !           320:        if (!(flags&HASWIDTH))
        !           321:                *flagp &= ~HASWIDTH;
        !           322:        *flagp |= flags&SPSTART;
        !           323:        while (*regparse == '|' || *regparse == '\n') {
        !           324:                regparse++;
        !           325:                br = regbranch(&flags);
        !           326:                if (br == NULL)
        !           327:                        return(NULL);
        !           328:                regtail(ret, br);       /* BRANCH -> BRANCH. */
        !           329:                if (!(flags&HASWIDTH))
        !           330:                        *flagp &= ~HASWIDTH;
        !           331:                *flagp |= flags&SPSTART;
        !           332:        }
        !           333: 
        !           334:        /* Make a closing node, and hook it on the end. */
        !           335:        ender = regnode((paren) ? CLOSE+parno : END);   
        !           336:        regtail(ret, ender);
        !           337: 
        !           338:        /* Hook the tails of the branches to the closing node. */
        !           339:        for (br = ret; br != NULL; br = regnext(br))
        !           340:                regoptail(br, ender);
        !           341: 
        !           342:        /* Check for proper termination. */
        !           343:        if (paren && *regparse++ != ')') {
        !           344:                FAIL("unmatched ()");
        !           345:        } else if (!paren && *regparse != '\0') {
        !           346:                if (*regparse == ')') {
        !           347:                        FAIL("unmatched ()");
        !           348:                } else
        !           349:                        FAIL("junk on end");    /* "Can't happen". */
        !           350:                /* NOTREACHED */
        !           351:        }
        !           352: 
        !           353:        return(ret);
        !           354: }
        !           355: 
        !           356: /*
        !           357:  - regbranch - one alternative of an | operator
        !           358:  *
        !           359:  * Implements the concatenation operator.
        !           360:  */
        !           361: static char *
        !           362: regbranch(flagp)
        !           363: int *flagp;
        !           364: {
        !           365:        register char *ret;
        !           366:        register char *chain;
        !           367:        register char *latest;
        !           368:        int flags;
        !           369: 
        !           370:        *flagp = WORST;         /* Tentatively. */
        !           371: 
        !           372:        ret = regnode(BRANCH);
        !           373:        chain = NULL;
        !           374:        while (*regparse != '\0' && *regparse != ')' &&
        !           375:               *regparse != '\n' && *regparse != '|') {
        !           376:                latest = regpiece(&flags);
        !           377:                if (latest == NULL)
        !           378:                        return(NULL);
        !           379:                *flagp |= flags&HASWIDTH;
        !           380:                if (chain == NULL)      /* First piece. */
        !           381:                        *flagp |= flags&SPSTART;
        !           382:                else
        !           383:                        regtail(chain, latest);
        !           384:                chain = latest;
        !           385:        }
        !           386:        if (chain == NULL)      /* Loop ran zero times. */
        !           387:                (void) regnode(NOTHING);
        !           388: 
        !           389:        return(ret);
        !           390: }
        !           391: 
        !           392: /*
        !           393:  - regpiece - something followed by possible [*+?]
        !           394:  *
        !           395:  * Note that the branching code sequences used for ? and the general cases
        !           396:  * of * and + are somewhat optimized:  they use the same NOTHING node as
        !           397:  * both the endmarker for their branch list and the body of the last branch.
        !           398:  * It might seem that this node could be dispensed with entirely, but the
        !           399:  * endmarker role is not redundant.
        !           400:  */
        !           401: static char *
        !           402: regpiece(flagp)
        !           403: int *flagp;
        !           404: {
        !           405:        register char *ret;
        !           406:        register char op;
        !           407:        register char *next;
        !           408:        int flags;
        !           409: 
        !           410:        ret = regatom(&flags);
        !           411:        if (ret == NULL)
        !           412:                return(NULL);
        !           413: 
        !           414:        op = *regparse;
        !           415:        if (!ISMULT(op)) {
        !           416:                *flagp = flags;
        !           417:                return(ret);
        !           418:        }
        !           419: 
        !           420:        if (!(flags&HASWIDTH) && op != '?')
        !           421:                FAIL("*+ operand could be empty");
        !           422:        *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
        !           423: 
        !           424:        if (op == '*' && (flags&SIMPLE))
        !           425:                reginsert(STAR, ret);
        !           426:        else if (op == '*') {
        !           427:                /* Emit x* as (x&|), where & means "self". */
        !           428:                reginsert(BRANCH, ret);                 /* Either x */
        !           429:                regoptail(ret, regnode(BACK));          /* and loop */
        !           430:                regoptail(ret, ret);                    /* back */
        !           431:                regtail(ret, regnode(BRANCH));          /* or */
        !           432:                regtail(ret, regnode(NOTHING));         /* null. */
        !           433:        } else if (op == '+' && (flags&SIMPLE))
        !           434:                reginsert(PLUS, ret);
        !           435:        else if (op == '+') {
        !           436:                /* Emit x+ as x(&|), where & means "self". */
        !           437:                next = regnode(BRANCH);                 /* Either */
        !           438:                regtail(ret, next);
        !           439:                regtail(regnode(BACK), ret);            /* loop back */
        !           440:                regtail(next, regnode(BRANCH));         /* or */
        !           441:                regtail(ret, regnode(NOTHING));         /* null. */
        !           442:        } else if (op == '?') {
        !           443:                /* Emit x? as (x|) */
        !           444:                reginsert(BRANCH, ret);                 /* Either x */
        !           445:                regtail(ret, regnode(BRANCH));          /* or */
        !           446:                next = regnode(NOTHING);                /* null. */
        !           447:                regtail(ret, next);
        !           448:                regoptail(ret, next);
        !           449:        }
        !           450:        regparse++;
        !           451:        if (ISMULT(*regparse))
        !           452:                FAIL("nested *?+");
        !           453: 
        !           454:        return(ret);
        !           455: }
        !           456: 
        !           457: /*
        !           458:  - regatom - the lowest level
        !           459:  *
        !           460:  * Optimization:  gobbles an entire sequence of ordinary characters so that
        !           461:  * it can turn them into a single node, which is smaller to store and
        !           462:  * faster to run.  Backslashed characters are exceptions, each becoming a
        !           463:  * separate node; the code is simpler that way and it's not worth fixing.
        !           464:  */
        !           465: static char *
        !           466: regatom(flagp)
        !           467: int *flagp;
        !           468: {
        !           469:        register char *ret;
        !           470:        int flags;
        !           471: 
        !           472:        *flagp = WORST;         /* Tentatively. */
        !           473: 
        !           474:        switch (*regparse++) {
        !           475:        /* FIXME: these chars only have meaning at beg/end of pat? */
        !           476:        case '^':
        !           477:                ret = regnode(BOL);
        !           478:                break;
        !           479:        case '$':
        !           480:                ret = regnode(EOL);
        !           481:                break;
        !           482:        case '.':
        !           483:                ret = regnode(ANY);
        !           484:                *flagp |= HASWIDTH|SIMPLE;
        !           485:                break;
        !           486:        case '[': {
        !           487:                        register int class;
        !           488:                        register int classend;
        !           489: 
        !           490:                        if (*regparse == '^') { /* Complement of range. */
        !           491:                                ret = regnode(ANYBUT);
        !           492:                                regparse++;
        !           493:                        } else
        !           494:                                ret = regnode(ANYOF);
        !           495:                        if (*regparse == ']' || *regparse == '-')
        !           496:                                regc(*regparse++);
        !           497:                        while (*regparse != '\0' && *regparse != ']') {
        !           498:                                if (*regparse == '-') {
        !           499:                                        regparse++;
        !           500:                                        if (*regparse == ']' || *regparse == '\0')
        !           501:                                                regc('-');
        !           502:                                        else {
        !           503:                                                class = UCHARAT(regparse-2)+1;
        !           504:                                                classend = UCHARAT(regparse);
        !           505:                                                if (class > classend+1)
        !           506:                                                        FAIL("invalid [] range");
        !           507:                                                for (; class <= classend; class++)
        !           508:                                                        regc(class);
        !           509:                                                regparse++;
        !           510:                                        }
        !           511:                                } else
        !           512:                                        regc(*regparse++);
        !           513:                        }
        !           514:                        regc('\0');
        !           515:                        if (*regparse != ']')
        !           516:                                FAIL("unmatched []");
        !           517:                        regparse++;
        !           518:                        *flagp |= HASWIDTH|SIMPLE;
        !           519:                }
        !           520:                break;
        !           521:        case '(':
        !           522:                ret = reg(1, &flags);
        !           523:                if (ret == NULL)
        !           524:                        return(NULL);
        !           525:                *flagp |= flags&(HASWIDTH|SPSTART);
        !           526:                break;
        !           527:        case '\0':
        !           528:        case '|':
        !           529:        case '\n':
        !           530:        case ')':
        !           531:                FAIL("internal urp");   /* Supposed to be caught earlier. */
        !           532:                break;
        !           533:        case '?':
        !           534:        case '+':
        !           535:        case '*':
        !           536:                FAIL("?+* follows nothing");
        !           537:                break;
        !           538:        case '\\':
        !           539:                switch (*regparse++) {
        !           540:                case '\0':
        !           541:                        FAIL("trailing \\");
        !           542:                        break;
        !           543:                case '<':
        !           544:                        ret = regnode(WORDA);
        !           545:                        break;
        !           546:                case '>':
        !           547:                        ret = regnode(WORDZ);
        !           548:                        break;
        !           549:                /* FIXME: Someday handle \1, \2, ... */
        !           550:                default:
        !           551:                        /* Handle general quoted chars in exact-match routine */
        !           552:                        goto de_fault;
        !           553:                }
        !           554:                break;
        !           555:        de_fault:
        !           556:        default:
        !           557:                /*
        !           558:                 * Encode a string of characters to be matched exactly.
        !           559:                 *
        !           560:                 * This is a bit tricky due to quoted chars and due to
        !           561:                 * '*', '+', and '?' taking the SINGLE char previous
        !           562:                 * as their operand.
        !           563:                 *
        !           564:                 * On entry, the char at regparse[-1] is going to go
        !           565:                 * into the string, no matter what it is.  (It could be
        !           566:                 * following a \ if we are entered from the '\' case.)
        !           567:                 * 
        !           568:                 * Basic idea is to pick up a good char in  ch  and
        !           569:                 * examine the next char.  If it's *+? then we twiddle.
        !           570:                 * If it's \ then we frozzle.  If it's other magic char
        !           571:                 * we push  ch  and terminate the string.  If none of the
        !           572:                 * above, we push  ch  on the string and go around again.
        !           573:                 *
        !           574:                 *  regprev  is used to remember where "the current char"
        !           575:                 * starts in the string, if due to a *+? we need to back
        !           576:                 * up and put the current char in a separate, 1-char, string.
        !           577:                 * When  regprev  is NULL,  ch  is the only char in the
        !           578:                 * string; this is used in *+? handling, and in setting
        !           579:                 * flags |= SIMPLE at the end.
        !           580:                 */
        !           581:                {
        !           582:                        char *regprev;
        !           583:                        register char ch;
        !           584: 
        !           585:                        regparse--;                     /* Look at cur char */
        !           586:                        ret = regnode(EXACTLY);
        !           587:                        for ( regprev = 0 ; ; ) {
        !           588:                                ch = *regparse++;       /* Get current char */
        !           589:                                switch (*regparse) {    /* look at next one */
        !           590: 
        !           591:                                default:
        !           592:                                        regc(ch);       /* Add cur to string */
        !           593:                                        break;
        !           594: 
        !           595:                                case '.': case '[': case '(':
        !           596:                                case ')': case '|': case '\n':
        !           597:                                case '$': case '^':
        !           598:                                case '\0':
        !           599:                                /* FIXME, $ and ^ should not always be magic */
        !           600:                                magic:
        !           601:                                        regc(ch);       /* dump cur char */
        !           602:                                        goto done;      /* and we are done */
        !           603: 
        !           604:                                case '?': case '+': case '*':
        !           605:                                        if (!regprev)   /* If just ch in str, */
        !           606:                                                goto magic;     /* use it */
        !           607:                                        /* End mult-char string one early */
        !           608:                                        regparse = regprev; /* Back up parse */
        !           609:                                        goto done;
        !           610: 
        !           611:                                case '\\':
        !           612:                                        regc(ch);       /* Cur char OK */
        !           613:                                        switch (regparse[1]){ /* Look after \ */
        !           614:                                        case '\0':
        !           615:                                        case '<':
        !           616:                                        case '>':
        !           617:                                        /* FIXME: Someday handle \1, \2, ... */
        !           618:                                                goto done; /* Not quoted */
        !           619:                                        default:
        !           620:                                                /* Backup point is \, scan                                                       * point is after it. */
        !           621:                                                regprev = regparse;
        !           622:                                                regparse++; 
        !           623:                                                continue;       /* NOT break; */
        !           624:                                        }
        !           625:                                }
        !           626:                                regprev = regparse;     /* Set backup point */
        !           627:                        }
        !           628:                done:
        !           629:                        regc('\0');
        !           630:                        *flagp |= HASWIDTH;
        !           631:                        if (!regprev)           /* One char? */
        !           632:                                *flagp |= SIMPLE;
        !           633:                }
        !           634:                break;
        !           635:        }
        !           636: 
        !           637:        return(ret);
        !           638: }
        !           639: 
        !           640: /*
        !           641:  - regnode - emit a node
        !           642:  */
        !           643: static char *                  /* Location. */
        !           644: regnode(op)
        !           645: char op;
        !           646: {
        !           647:        register char *ret;
        !           648:        register char *ptr;
        !           649: 
        !           650:        ret = regcode;
        !           651:        if (ret == &regdummy) {
        !           652:                regsize += 3;
        !           653:                return(ret);
        !           654:        }
        !           655: 
        !           656:        ptr = ret;
        !           657:        *ptr++ = op;
        !           658:        *ptr++ = '\0';          /* Null "next" pointer. */
        !           659:        *ptr++ = '\0';
        !           660:        regcode = ptr;
        !           661: 
        !           662:        return(ret);
        !           663: }
        !           664: 
        !           665: /*
        !           666:  - regc - emit (if appropriate) a byte of code
        !           667:  */
        !           668: static void
        !           669: regc(b)
        !           670: char b;
        !           671: {
        !           672:        if (regcode != &regdummy)
        !           673:                *regcode++ = b;
        !           674:        else
        !           675:                regsize++;
        !           676: }
        !           677: 
        !           678: /*
        !           679:  - reginsert - insert an operator in front of already-emitted operand
        !           680:  *
        !           681:  * Means relocating the operand.
        !           682:  */
        !           683: static void
        !           684: reginsert(op, opnd)
        !           685: char op;
        !           686: char *opnd;
        !           687: {
        !           688:        register char *src;
        !           689:        register char *dst;
        !           690:        register char *place;
        !           691: 
        !           692:        if (regcode == &regdummy) {
        !           693:                regsize += 3;
        !           694:                return;
        !           695:        }
        !           696: 
        !           697:        src = regcode;
        !           698:        regcode += 3;
        !           699:        dst = regcode;
        !           700:        while (src > opnd)
        !           701:                *--dst = *--src;
        !           702: 
        !           703:        place = opnd;           /* Op node, where operand used to be. */
        !           704:        *place++ = op;
        !           705:        *place++ = '\0';
        !           706:        *place++ = '\0';
        !           707: }
        !           708: 
        !           709: /*
        !           710:  - regtail - set the next-pointer at the end of a node chain
        !           711:  */
        !           712: static void
        !           713: regtail(p, val)
        !           714: char *p;
        !           715: char *val;
        !           716: {
        !           717:        register char *scan;
        !           718:        register char *temp;
        !           719:        register int offset;
        !           720: 
        !           721:        if (p == &regdummy)
        !           722:                return;
        !           723: 
        !           724:        /* Find last node. */
        !           725:        scan = p;
        !           726:        for (;;) {
        !           727:                temp = regnext(scan);
        !           728:                if (temp == NULL)
        !           729:                        break;
        !           730:                scan = temp;
        !           731:        }
        !           732: 
        !           733:        if (OP(scan) == BACK)
        !           734:                offset = scan - val;
        !           735:        else
        !           736:                offset = val - scan;
        !           737:        *(scan+1) = (offset>>8)&0377;
        !           738:        *(scan+2) = offset&0377;
        !           739: }
        !           740: 
        !           741: /*
        !           742:  - regoptail - regtail on operand of first argument; nop if operandless
        !           743:  */
        !           744: static void
        !           745: regoptail(p, val)
        !           746: char *p;
        !           747: char *val;
        !           748: {
        !           749:        /* "Operandless" and "op != BRANCH" are synonymous in practice. */
        !           750:        if (p == NULL || p == &regdummy || OP(p) != BRANCH)
        !           751:                return;
        !           752:        regtail(OPERAND(p), val);
        !           753: }
        !           754: 
        !           755: /*
        !           756:  * regexec and friends
        !           757:  */
        !           758: 
        !           759: /*
        !           760:  * Global work variables for regexec().
        !           761:  */
        !           762: static char *reginput;         /* String-input pointer. */
        !           763: static char *regbol;           /* Beginning of input, for ^ check. */
        !           764: static char **regstartp;       /* Pointer to startp array. */
        !           765: static char **regendp;         /* Ditto for endp. */
        !           766: 
        !           767: /*
        !           768:  * Forwards.
        !           769:  */
        !           770: STATIC int regtry();
        !           771: STATIC int regmatch();
        !           772: STATIC int regrepeat();
        !           773: 
        !           774: #ifdef DEBUG
        !           775: int regnarrate = 0;
        !           776: void regdump();
        !           777: STATIC char *regprop();
        !           778: #endif
        !           779: 
        !           780: /*
        !           781:  - regexec - match a regexp against a string
        !           782:  */
        !           783: int
        !           784: regexec(prog, string)
        !           785: register regexp *prog;
        !           786: register char *string;
        !           787: {
        !           788:        register char *s;
        !           789:        extern char *strchr();
        !           790: 
        !           791:        /* Be paranoid... */
        !           792:        if (prog == NULL || string == NULL) {
        !           793:                regerror("NULL parameter");
        !           794:                return(0);
        !           795:        }
        !           796: 
        !           797:        /* Check validity of program. */
        !           798:        if (UCHARAT(prog->program) != MAGIC) {
        !           799:                regerror("corrupted program");
        !           800:                return(0);
        !           801:        }
        !           802: 
        !           803:        /* If there is a "must appear" string, look for it. */
        !           804:        if (prog->regmust != NULL) {
        !           805:                s = string;
        !           806:                while ((s = strchr(s, prog->regmust[0])) != NULL) {
        !           807:                        if (strncmp(s, prog->regmust, prog->regmlen) == 0)
        !           808:                                break;  /* Found it. */
        !           809:                        s++;
        !           810:                }
        !           811:                if (s == NULL)  /* Not present. */
        !           812:                        return(0);
        !           813:        }
        !           814: 
        !           815:        /* Mark beginning of line for ^ . */
        !           816:        regbol = string;
        !           817: 
        !           818:        /* Simplest case:  anchored match need be tried only once. */
        !           819:        if (prog->reganch)
        !           820:                return(regtry(prog, string));
        !           821: 
        !           822:        /* Messy cases:  unanchored match. */
        !           823:        s = string;
        !           824:        if (prog->regstart != '\0')
        !           825:                /* We know what char it must start with. */
        !           826:                while ((s = strchr(s, prog->regstart)) != NULL) {
        !           827:                        if (regtry(prog, s))
        !           828:                                return(1);
        !           829:                        s++;
        !           830:                }
        !           831:        else
        !           832:                /* We don't -- general case. */
        !           833:                do {
        !           834:                        if (regtry(prog, s))
        !           835:                                return(1);
        !           836:                } while (*s++ != '\0');
        !           837: 
        !           838:        /* Failure. */
        !           839:        return(0);
        !           840: }
        !           841: 
        !           842: /*
        !           843:  - regtry - try match at specific point
        !           844:  */
        !           845: static int                     /* 0 failure, 1 success */
        !           846: regtry(prog, string)
        !           847: regexp *prog;
        !           848: char *string;
        !           849: {
        !           850:        register int i;
        !           851:        register char **sp;
        !           852:        register char **ep;
        !           853: 
        !           854:        reginput = string;
        !           855:        regstartp = prog->startp;
        !           856:        regendp = prog->endp;
        !           857: 
        !           858:        sp = prog->startp;
        !           859:        ep = prog->endp;
        !           860:        for (i = NSUBEXP; i > 0; i--) {
        !           861:                *sp++ = NULL;
        !           862:                *ep++ = NULL;
        !           863:        }
        !           864:        if (regmatch(prog->program + 1)) {
        !           865:                prog->startp[0] = string;
        !           866:                prog->endp[0] = reginput;
        !           867:                return(1);
        !           868:        } else
        !           869:                return(0);
        !           870: }
        !           871: 
        !           872: /*
        !           873:  - regmatch - main matching routine
        !           874:  *
        !           875:  * Conceptually the strategy is simple:  check to see whether the current
        !           876:  * node matches, call self recursively to see whether the rest matches,
        !           877:  * and then act accordingly.  In practice we make some effort to avoid
        !           878:  * recursion, in particular by going through "ordinary" nodes (that don't
        !           879:  * need to know whether the rest of the match failed) by a loop instead of
        !           880:  * by recursion.
        !           881:  */
        !           882: static int                     /* 0 failure, 1 success */
        !           883: regmatch(prog)
        !           884: char *prog;
        !           885: {
        !           886:        register char *scan;    /* Current node. */
        !           887:        char *next;             /* Next node. */
        !           888:        extern char *strchr();
        !           889: 
        !           890:        scan = prog;
        !           891: #ifdef DEBUG
        !           892:        if (scan != NULL && regnarrate)
        !           893:                fprintf(stderr, "%s(\n", regprop(scan));
        !           894: #endif
        !           895:        while (scan != NULL) {
        !           896: #ifdef DEBUG
        !           897:                if (regnarrate)
        !           898:                        fprintf(stderr, "%s...\n", regprop(scan));
        !           899: #endif
        !           900:                next = regnext(scan);
        !           901: 
        !           902:                switch (OP(scan)) {
        !           903:                case BOL:
        !           904:                        if (reginput != regbol)
        !           905:                                return(0);
        !           906:                        break;
        !           907:                case EOL:
        !           908:                        if (*reginput != '\0')
        !           909:                                return(0);
        !           910:                        break;
        !           911:                case WORDA:
        !           912:                        /* Must be looking at a letter, digit, or _ */
        !           913:                        if ((!isalnum(*reginput)) && *reginput != '_')
        !           914:                                return(0);
        !           915:                        /* Prev must be BOL or nonword */
        !           916:                        if (reginput > regbol &&
        !           917:                            (isalnum(reginput[-1]) || reginput[-1] == '_'))
        !           918:                                return(0);
        !           919:                        break;
        !           920:                case WORDZ:
        !           921:                        /* Must be looking at non letter, digit, or _ */
        !           922:                        if (isalnum(*reginput) || *reginput == '_')
        !           923:                                return(0);
        !           924:                        /* We don't care what the previous char was */
        !           925:                        break;
        !           926:                case ANY:
        !           927:                        if (*reginput == '\0')
        !           928:                                return(0);
        !           929:                        reginput++;
        !           930:                        break;
        !           931:                case EXACTLY: {
        !           932:                                register int len;
        !           933:                                register char *opnd;
        !           934: 
        !           935:                                opnd = OPERAND(scan);
        !           936:                                /* Inline the first character, for speed. */
        !           937:                                if (*opnd != *reginput)
        !           938:                                        return(0);
        !           939:                                len = strlen(opnd);
        !           940:                                if (len > 1 && strncmp(opnd, reginput, len) != 0)
        !           941:                                        return(0);
        !           942:                                reginput += len;
        !           943:                        }
        !           944:                        break;
        !           945:                case ANYOF:
        !           946:                        if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL)
        !           947:                                return(0);
        !           948:                        reginput++;
        !           949:                        break;
        !           950:                case ANYBUT:
        !           951:                        if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL)
        !           952:                                return(0);
        !           953:                        reginput++;
        !           954:                        break;
        !           955:                case NOTHING:
        !           956:                        break;
        !           957:                case BACK:
        !           958:                        break;
        !           959:                case OPEN+1:
        !           960:                case OPEN+2:
        !           961:                case OPEN+3:
        !           962:                case OPEN+4:
        !           963:                case OPEN+5:
        !           964:                case OPEN+6:
        !           965:                case OPEN+7:
        !           966:                case OPEN+8:
        !           967:                case OPEN+9: {
        !           968:                                register int no;
        !           969:                                register char *save;
        !           970: 
        !           971:                                no = OP(scan) - OPEN;
        !           972:                                save = reginput;
        !           973: 
        !           974:                                if (regmatch(next)) {
        !           975:                                        /*
        !           976:                                         * Don't set startp if some later
        !           977:                                         * invocation of the same parentheses
        !           978:                                         * already has.
        !           979:                                         */
        !           980:                                        if (regstartp[no] == NULL)
        !           981:                                                regstartp[no] = save;
        !           982:                                        return(1);
        !           983:                                } else
        !           984:                                        return(0);
        !           985:                        }
        !           986:                        break;
        !           987:                case CLOSE+1:
        !           988:                case CLOSE+2:
        !           989:                case CLOSE+3:
        !           990:                case CLOSE+4:
        !           991:                case CLOSE+5:
        !           992:                case CLOSE+6:
        !           993:                case CLOSE+7:
        !           994:                case CLOSE+8:
        !           995:                case CLOSE+9: {
        !           996:                                register int no;
        !           997:                                register char *save;
        !           998: 
        !           999:                                no = OP(scan) - CLOSE;
        !          1000:                                save = reginput;
        !          1001: 
        !          1002:                                if (regmatch(next)) {
        !          1003:                                        /*
        !          1004:                                         * Don't set endp if some later
        !          1005:                                         * invocation of the same parentheses
        !          1006:                                         * already has.
        !          1007:                                         */
        !          1008:                                        if (regendp[no] == NULL)
        !          1009:                                                regendp[no] = save;
        !          1010:                                        return(1);
        !          1011:                                } else
        !          1012:                                        return(0);
        !          1013:                        }
        !          1014:                        break;
        !          1015:                case BRANCH: {
        !          1016:                                register char *save;
        !          1017: 
        !          1018:                                if (OP(next) != BRANCH)         /* No choice. */
        !          1019:                                        next = OPERAND(scan);   /* Avoid recursion. */
        !          1020:                                else {
        !          1021:                                        do {
        !          1022:                                                save = reginput;
        !          1023:                                                if (regmatch(OPERAND(scan)))
        !          1024:                                                        return(1);
        !          1025:                                                reginput = save;
        !          1026:                                                scan = regnext(scan);
        !          1027:                                        } while (scan != NULL && OP(scan) == BRANCH);
        !          1028:                                        return(0);
        !          1029:                                        /* NOTREACHED */
        !          1030:                                }
        !          1031:                        }
        !          1032:                        break;
        !          1033:                case STAR:
        !          1034:                case PLUS: {
        !          1035:                                register char nextch;
        !          1036:                                register int no;
        !          1037:                                register char *save;
        !          1038:                                register int min;
        !          1039: 
        !          1040:                                /*
        !          1041:                                 * Lookahead to avoid useless match attempts
        !          1042:                                 * when we know what character comes next.
        !          1043:                                 */
        !          1044:                                nextch = '\0';
        !          1045:                                if (OP(next) == EXACTLY)
        !          1046:                                        nextch = *OPERAND(next);
        !          1047:                                min = (OP(scan) == STAR) ? 0 : 1;
        !          1048:                                save = reginput;
        !          1049:                                no = regrepeat(OPERAND(scan));
        !          1050:                                while (no >= min) {
        !          1051:                                        /* If it could work, try it. */
        !          1052:                                        if (nextch == '\0' || *reginput == nextch)
        !          1053:                                                if (regmatch(next))
        !          1054:                                                        return(1);
        !          1055:                                        /* Couldn't or didn't -- back up. */
        !          1056:                                        no--;
        !          1057:                                        reginput = save + no;
        !          1058:                                }
        !          1059:                                return(0);
        !          1060:                        }
        !          1061:                        break;
        !          1062:                case END:
        !          1063:                        return(1);      /* Success! */
        !          1064:                        break;
        !          1065:                default:
        !          1066:                        regerror("memory corruption");
        !          1067:                        return(0);
        !          1068:                        break;
        !          1069:                }
        !          1070: 
        !          1071:                scan = next;
        !          1072:        }
        !          1073: 
        !          1074:        /*
        !          1075:         * We get here only if there's trouble -- normally "case END" is
        !          1076:         * the terminating point.
        !          1077:         */
        !          1078:        regerror("corrupted pointers");
        !          1079:        return(0);
        !          1080: }
        !          1081: 
        !          1082: /*
        !          1083:  - regrepeat - repeatedly match something simple, report how many
        !          1084:  */
        !          1085: static int
        !          1086: regrepeat(p)
        !          1087: char *p;
        !          1088: {
        !          1089:        register int count = 0;
        !          1090:        register char *scan;
        !          1091:        register char *opnd;
        !          1092: 
        !          1093:        scan = reginput;
        !          1094:        opnd = OPERAND(p);
        !          1095:        switch (OP(p)) {
        !          1096:        case ANY:
        !          1097:                count = strlen(scan);
        !          1098:                scan += count;
        !          1099:                break;
        !          1100:        case EXACTLY:
        !          1101:                while (*opnd == *scan) {
        !          1102:                        count++;
        !          1103:                        scan++;
        !          1104:                }
        !          1105:                break;
        !          1106:        case ANYOF:
        !          1107:                while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
        !          1108:                        count++;
        !          1109:                        scan++;
        !          1110:                }
        !          1111:                break;
        !          1112:        case ANYBUT:
        !          1113:                while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
        !          1114:                        count++;
        !          1115:                        scan++;
        !          1116:                }
        !          1117:                break;
        !          1118:        default:                /* Oh dear.  Called inappropriately. */
        !          1119:                regerror("internal foulup");
        !          1120:                count = 0;      /* Best compromise. */
        !          1121:                break;
        !          1122:        }
        !          1123:        reginput = scan;
        !          1124: 
        !          1125:        return(count);
        !          1126: }
        !          1127: 
        !          1128: /*
        !          1129:  - regnext - dig the "next" pointer out of a node
        !          1130:  */
        !          1131: static char *
        !          1132: regnext(p)
        !          1133: register char *p;
        !          1134: {
        !          1135:        register int offset;
        !          1136: 
        !          1137:        if (p == &regdummy)
        !          1138:                return(NULL);
        !          1139: 
        !          1140:        offset = NEXT(p);
        !          1141:        if (offset == 0)
        !          1142:                return(NULL);
        !          1143: 
        !          1144:        if (OP(p) == BACK)
        !          1145:                return(p-offset);
        !          1146:        else
        !          1147:                return(p+offset);
        !          1148: }
        !          1149: 
        !          1150: #ifdef DEBUG
        !          1151: 
        !          1152: STATIC char *regprop();
        !          1153: 
        !          1154: /*
        !          1155:  - regdump - dump a regexp onto stdout in vaguely comprehensible form
        !          1156:  */
        !          1157: void
        !          1158: regdump(r)
        !          1159: regexp *r;
        !          1160: {
        !          1161:        register char *s;
        !          1162:        register char op = EXACTLY;     /* Arbitrary non-END op. */
        !          1163:        register char *next;
        !          1164:        extern char *strchr();
        !          1165: 
        !          1166: 
        !          1167:        s = r->program + 1;
        !          1168:        while (op != END) {     /* While that wasn't END last time... */
        !          1169:                op = OP(s);
        !          1170:                printf("%2d%s", s-r->program, regprop(s));      /* Where, what. */
        !          1171:                next = regnext(s);
        !          1172:                if (next == NULL)               /* Next ptr. */
        !          1173:                        printf("(0)");
        !          1174:                else 
        !          1175:                        printf("(%d)", (s-r->program)+(next-s));
        !          1176:                s += 3;
        !          1177:                if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
        !          1178:                        /* Literal string, where present. */
        !          1179:                        while (*s != '\0') {
        !          1180:                                putchar(*s);
        !          1181:                                s++;
        !          1182:                        }
        !          1183:                        s++;
        !          1184:                }
        !          1185:                putchar('\n');
        !          1186:        }
        !          1187: 
        !          1188:        /* Header fields of interest. */
        !          1189:        if (r->regstart != '\0')
        !          1190:                printf("start `%c' ", r->regstart);
        !          1191:        if (r->reganch)
        !          1192:                printf("anchored ");
        !          1193:        if (r->regmust != NULL)
        !          1194:                printf("must have \"%s\"", r->regmust);
        !          1195:        printf("\n");
        !          1196: }
        !          1197: 
        !          1198: /*
        !          1199:  - regprop - printable representation of opcode
        !          1200:  */
        !          1201: static char *
        !          1202: regprop(op)
        !          1203: char *op;
        !          1204: {
        !          1205:        register char *p;
        !          1206:        static char buf[50];
        !          1207: 
        !          1208:        (void) strcpy(buf, ":");
        !          1209: 
        !          1210:        switch (OP(op)) {
        !          1211:        case BOL:
        !          1212:                p = "BOL";
        !          1213:                break;
        !          1214:        case EOL:
        !          1215:                p = "EOL";
        !          1216:                break;
        !          1217:        case ANY:
        !          1218:                p = "ANY";
        !          1219:                break;
        !          1220:        case ANYOF:
        !          1221:                p = "ANYOF";
        !          1222:                break;
        !          1223:        case ANYBUT:
        !          1224:                p = "ANYBUT";
        !          1225:                break;
        !          1226:        case BRANCH:
        !          1227:                p = "BRANCH";
        !          1228:                break;
        !          1229:        case EXACTLY:
        !          1230:                p = "EXACTLY";
        !          1231:                break;
        !          1232:        case NOTHING:
        !          1233:                p = "NOTHING";
        !          1234:                break;
        !          1235:        case BACK:
        !          1236:                p = "BACK";
        !          1237:                break;
        !          1238:        case END:
        !          1239:                p = "END";
        !          1240:                break;
        !          1241:        case OPEN+1:
        !          1242:        case OPEN+2:
        !          1243:        case OPEN+3:
        !          1244:        case OPEN+4:
        !          1245:        case OPEN+5:
        !          1246:        case OPEN+6:
        !          1247:        case OPEN+7:
        !          1248:        case OPEN+8:
        !          1249:        case OPEN+9:
        !          1250:                sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN);
        !          1251:                p = NULL;
        !          1252:                break;
        !          1253:        case CLOSE+1:
        !          1254:        case CLOSE+2:
        !          1255:        case CLOSE+3:
        !          1256:        case CLOSE+4:
        !          1257:        case CLOSE+5:
        !          1258:        case CLOSE+6:
        !          1259:        case CLOSE+7:
        !          1260:        case CLOSE+8:
        !          1261:        case CLOSE+9:
        !          1262:                sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE);
        !          1263:                p = NULL;
        !          1264:                break;
        !          1265:        case STAR:
        !          1266:                p = "STAR";
        !          1267:                break;
        !          1268:        case PLUS:
        !          1269:                p = "PLUS";
        !          1270:                break;
        !          1271:        case WORDA:
        !          1272:                p = "WORDA";
        !          1273:                break;
        !          1274:        case WORDZ:
        !          1275:                p = "WORDZ";
        !          1276:                break;
        !          1277:        default:
        !          1278:                regerror("corrupted opcode");
        !          1279:                break;
        !          1280:        }
        !          1281:        if (p != NULL)
        !          1282:                (void) strcat(buf, p);
        !          1283:        return(buf);
        !          1284: }
        !          1285: #endif
        !          1286: 
        !          1287: /*
        !          1288:  * The following is provided for those people who do not have strcspn() in
        !          1289:  * their C libraries.  They should get off their butts and do something
        !          1290:  * about it; at least one public-domain implementation of those (highly
        !          1291:  * useful) string routines has been published on Usenet.
        !          1292:  */
        !          1293: #ifdef STRCSPN
        !          1294: /*
        !          1295:  * strcspn - find length of initial segment of s1 consisting entirely
        !          1296:  * of characters not from s2
        !          1297:  */
        !          1298: 
        !          1299: static int
        !          1300: strcspn(s1, s2)
        !          1301: char *s1;
        !          1302: char *s2;
        !          1303: {
        !          1304:        register char *scan1;
        !          1305:        register char *scan2;
        !          1306:        register int count;
        !          1307: 
        !          1308:        count = 0;
        !          1309:        for (scan1 = s1; *scan1 != '\0'; scan1++) {
        !          1310:                for (scan2 = s2; *scan2 != '\0';)       /* ++ moved down. */
        !          1311:                        if (*scan1 == *scan2++)
        !          1312:                                return(count);
        !          1313:                count++;
        !          1314:        }
        !          1315:        return(count);
        !          1316: }
        !          1317: #endif
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.