Annotation of 43BSD/contrib/emacs/src/regex.c, revision 1.1

1.1     ! root        1: /* Extended regular expression matching and search.
        !             2:    Copyright (C) 1985 Richard M. Stallman
        !             3: 
        !             4: This program is distributed in the hope that it will be useful,
        !             5: but without any warranty.  No author or distributor
        !             6: accepts responsibility to anyone for the consequences of using it
        !             7: or for whether it serves any particular purpose or works at all,
        !             8: unless he says so in writing.
        !             9: 
        !            10:    Permission is granted to anyone to distribute verbatim copies
        !            11:    of this program's source code as received, in any medium, provided that
        !            12:    the copyright notice, the nonwarraty notice above
        !            13:    and this permission notice are preserved,
        !            14:    and that the distributor grants the recipient all rights
        !            15:    for further redistribution as permitted by this notice,
        !            16:    and informs him of these rights.
        !            17: 
        !            18:    Permission is granted to distribute modified versions of this
        !            19:    program's source code, or of portions of it, under the above
        !            20:    conditions, plus the conditions that all changed files carry
        !            21:    prominent notices stating who last changed them and that the
        !            22:    derived material, including anything packaged together with it and
        !            23:    conceptually functioning as a modification of it rather than an
        !            24:    application of it, is in its entirety subject to a permission
        !            25:    notice identical to this one.
        !            26: 
        !            27:    Permission is granted to distribute this program (verbatim or
        !            28:    as modified) in compiled or executable form, provided verbatim
        !            29:    redistribution is permitted as stated above for source code, and
        !            30:     A.  it is accompanied by the corresponding machine-readable
        !            31:       source code, under the above conditions, or
        !            32:     B.  it is accompanied by a written offer, with no time limit,
        !            33:       to distribute the corresponding machine-readable source code,
        !            34:       under the above conditions, to any one, in return for reimbursement
        !            35:       of the cost of distribution.   Verbatim redistribution of the
        !            36:       written offer must be permitted.  Or,
        !            37:     C.  it is distributed by someone who received only the
        !            38:       compiled or executable form, and is accompanied by a copy of the
        !            39:       written offer of source code which he received along with it.
        !            40: 
        !            41:    Permission is granted to distribute this program (verbatim or as modified)
        !            42:    in executable form as part of a larger system provided that the source
        !            43:    code for this program, including any modifications used,
        !            44:    is also distributed or offered as stated in the preceding paragraph.
        !            45: 
        !            46: In other words, you are welcome to use, share and improve this program.
        !            47: You are forbidden to forbid anyone else to use, share and improve
        !            48: what you give them.   Help stamp out software-hoarding!  */
        !            49: 
        !            50: 
        !            51: /* To test, compile with -Dtest.
        !            52:  This Dtestable feature turns this into a self-contained program
        !            53:  which reads a pattern, describes how it compiles,
        !            54:  then reads a string and searches for it.  */
        !            55: 
        !            56: 
        !            57: #ifdef emacs
        !            58: 
        !            59: /* The `emacs' switch turns on certain special matching commands
        !            60:  that make sense only in emacs. */
        !            61: 
        !            62: #include "config.h"
        !            63: #include "lisp.h"
        !            64: #include "buffer.h"
        !            65: #include "syntax.h"
        !            66: 
        !            67: #else  /* not emacs */
        !            68: 
        !            69: /*
        !            70:  * Define the syntax stuff, so we can do the \<...\> things.
        !            71:  */
        !            72: #define Sword 1
        !            73: 
        !            74: #define SYNTAX(c) syntax_table[c]
        !            75: 
        !            76: static char syntax_table[256];
        !            77: 
        !            78: #endif /* not emacs */
        !            79: 
        !            80: #include "regex.h"
        !            81: 
        !            82: /* Number of failure points to allocate space for initially,
        !            83:  when matching.  If this number is exceeded, more space is allocated,
        !            84:  so it is not a hard limit.  */
        !            85: 
        !            86: #ifndef NFAILURES
        !            87: #define NFAILURES 80
        !            88: #endif NFAILURES
        !            89: 
        !            90: /* width of a byte in bits */
        !            91: 
        !            92: #define BYTEWIDTH 8
        !            93: 
        !            94: /* These are the command codes that appear in compiled regular expressions, one per byte.
        !            95:   Some command codes are followed by argument bytes.
        !            96:   A command code can specify any interpretation whatever for its arguments.
        !            97:   Zero-bytes may appear in the compiled regular expression. */
        !            98: 
        !            99: enum regexpcode
        !           100:   {
        !           101:     unused,
        !           102:     exactn,    /* followed by one byte giving n, and then by n literal bytes */
        !           103:     begline,   /* fails unless at beginning of line */
        !           104:     endline,   /* fails unless at end of line */
        !           105:     jump,       /* followed by two bytes giving relative address to jump to */
        !           106:     on_failure_jump,    /* followed by two bytes giving relative address of place
        !           107:                            to resume at in case of failure. */
        !           108:     finalize_jump,      /* Throw away latest failure point and then jump to address. */
        !           109:     maybe_finalize_jump, /* Like jump but finalize if safe to do so.
        !           110:                            This is used to jump back to the beginning
        !           111:                            of a repeat.  If the command that follows
        !           112:                            this jump is clearly incompatible with the
        !           113:                            one at the beginning of the repeat, such that
        !           114:                            we can be sure that there is no use backtracking
        !           115:                            out of repetitions already completed,
        !           116:                            then we finalize. */
        !           117:     dummy_failure_jump,  /* jump, and push a dummy failure point.
        !           118:                            This failure point will be thrown away
        !           119:                            if an attempt is made to use it for a failure.
        !           120:                            A + construct makes this before the first repeat.  */
        !           121:     anychar,    /* matches any one character */
        !           122:     charset,     /* matches any one char belonging to specified set.
        !           123:                    First following byte is # bitmap bytes.
        !           124:                    Then come bytes for a bit-map saying which chars are in.
        !           125:                    Bits in each byte are ordered low-bit-first.
        !           126:                    A character is in the set if its bit is 1.
        !           127:                    A character too large to have a bit in the map
        !           128:                    is automatically not in the set */
        !           129:     charset_not, /* similar but match any character that is NOT one of those specified */
        !           130:     start_memory, /* starts remembering the text that is matched
        !           131:                    and stores it in a memory register.
        !           132:                    followed by one byte containing the register number.
        !           133:                    Register numbers must be in the range 0 through NREGS. */
        !           134:     stop_memory, /* stops remembering the text that is matched
        !           135:                    and stores it in a memory register.
        !           136:                    followed by one byte containing the register number.
        !           137:                    Register numbers must be in the range 0 through NREGS. */
        !           138:     duplicate,    /* match a duplicate of something remembered.
        !           139:                    Followed by one byte containing the index of the memory register. */
        !           140:     before_dot,         /* Succeeds if before dot */
        !           141:     at_dot,     /* Succeeds if at dot */
        !           142:     after_dot,  /* Succeeds if after dot */
        !           143:     begbuf,      /* Succeeds if at beginning of buffer */
        !           144:     endbuf,      /* Succeeds if at end of buffer */
        !           145:     wordchar,    /* Matches any word-constituent character */
        !           146:     notwordchar, /* Matches any char that is not a word-constituent */
        !           147:     wordbeg,    /* Succeeds if at word beginning */
        !           148:     wordend,    /* Succeeds if at word end */
        !           149:     wordbound,   /* Succeeds if at a word boundary */
        !           150:     notwordbound, /* Succeeds if not at a word boundary */
        !           151:     syntaxspec,  /* Matches any character whose syntax is specified.
        !           152:                    followed by a byte which contains a syntax code, Sword or such like */
        !           153:     notsyntaxspec /* Matches any character whose syntax differs from the specified. */
        !           154:   };
        !           155: 
        !           156: #ifndef SIGN_EXTEND_CHAR
        !           157: #define SIGN_EXTEND_CHAR(x) (x)
        !           158: #endif
        !           159: 
        !           160: /* compile_pattern takes a regular-expression descriptor string in the user's format
        !           161:   and converts it into a buffer full of byte commands for matching.
        !           162: 
        !           163:   pattern   is the address of the pattern string
        !           164:   size      is the length of it.
        !           165:   bufp     is a  struct re_pattern_buffer *  which points to the info
        !           166:            on where to store the byte commands.
        !           167:            This structure contains a  char *  which points to the
        !           168:            actual space, which should have been obtained with malloc.
        !           169:            compile_pattern may use  realloc  to grow the buffer space.
        !           170: 
        !           171:   The number of bytes of commands can be found out by looking in
        !           172:   the  struct re_pattern_buffer  that bufp pointed to,
        !           173:   after compile_pattern returns.
        !           174: */
        !           175: 
        !           176: #define PATPUSH(ch) (*b++ = (char) (ch))
        !           177: 
        !           178: #define PATFETCH(c) \
        !           179:  {if (p == pend) goto end_of_pattern; \
        !           180:   c = * (unsigned char *) p++; \
        !           181:   if (translate) c = translate[c]; }
        !           182: 
        !           183: #define PATFETCH_RAW(c) \
        !           184:  {if (p == pend) goto end_of_pattern; \
        !           185:   c = * (unsigned char *) p++; }
        !           186: 
        !           187: #define PATUNFETCH p--
        !           188: 
        !           189: #define EXTEND_BUFFER \
        !           190:   { old_buffer = bufp->buffer; \
        !           191:     if (bufp->allocated == (1<<16)) goto too_big; \
        !           192:     bufp->allocated *= 2; \
        !           193:     if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
        !           194:     if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
        !           195:       goto memory_exhausted; \
        !           196:     c = bufp->buffer - old_buffer; \
        !           197:     b += c; \
        !           198:     if (fixup_jump) \
        !           199:       fixup_jump += c; \
        !           200:     if (laststart) \
        !           201:       laststart += c; \
        !           202:     begalt += c; \
        !           203:     if (pending_exact) \
        !           204:       pending_exact += c; \
        !           205:   }
        !           206: 
        !           207: static int store_jump (), insert_jump ();
        !           208: 
        !           209: char *
        !           210: re_compile_pattern (pattern, size, bufp)
        !           211:      char *pattern;
        !           212:      int size;
        !           213:      struct re_pattern_buffer *bufp;
        !           214: {
        !           215:   register char *b = bufp->buffer;
        !           216:   register char *p = pattern;
        !           217:   char *pend = pattern + size;
        !           218:   register unsigned c, c1;
        !           219:   char *p1;
        !           220:   unsigned char *translate = (unsigned char *) bufp->translate;
        !           221: 
        !           222:   /* Temporary used when buffer is made bigger. */
        !           223: 
        !           224:   char *old_buffer;
        !           225: 
        !           226:   /* address of the count-byte of the most recently inserted "exactn" command.
        !           227:     This makes it possible to tell whether a new exact-match character
        !           228:     can be added to that command or requires a new "exactn" command. */
        !           229:      
        !           230:   char *pending_exact = 0;
        !           231: 
        !           232:   /* address of the place where a forward-jump should go
        !           233:     to the end of the containing expression.
        !           234:     Each alternative of an "or", except the last, ends with a forward-jump
        !           235:     of this sort. */
        !           236: 
        !           237:   char *fixup_jump = 0;
        !           238: 
        !           239:   /* address of start of the most recently finished expression.
        !           240:     This tells postfix * where to find the start of its operand. */
        !           241: 
        !           242:   char *laststart = 0;
        !           243: 
        !           244:   /* In processing a repeat, 1 means zero matches is allowed */
        !           245: 
        !           246:   char zero_times_ok;
        !           247: 
        !           248:   /* In processing a repeat, 1 means many matches is allowed */
        !           249: 
        !           250:   char many_times_ok;
        !           251: 
        !           252:   /* address of beginning of regexp, or inside of last \( */
        !           253: 
        !           254:   char *begalt = b;
        !           255: 
        !           256:   /* Stack of information saved by \( and restored by \).
        !           257:      Four stack elements are pushed by each \(:
        !           258:        First, the value of b.
        !           259:        Second, the value of fixup_jump.
        !           260:        Third, the value of regnum.
        !           261:        Fourth, the value of begalt.  */
        !           262: 
        !           263:   int stackb[40];
        !           264:   int *stackp = stackb;
        !           265:   int *stacke = stackb + 40;
        !           266:   int *stackt;
        !           267: 
        !           268:   /* Counts \('s as they are encountered.  Remembered for the matching \),
        !           269:      where it becomes the "register number" to put in the stop_memory command */
        !           270: 
        !           271:   int regnum = 1;
        !           272: 
        !           273:   bufp->fastmap_accurate = 0;
        !           274: 
        !           275: #ifndef emacs
        !           276:   /*
        !           277:    * Initialize the syntax table.
        !           278:    */
        !           279:    init_syntax_once();
        !           280: #endif emacs
        !           281: 
        !           282:   while (p != pend)
        !           283:     {
        !           284:       if (b - bufp->buffer
        !           285:          > bufp->allocated - 10)
        !           286:        /* Note that EXTEND_BUFFER clobbers c */
        !           287:        EXTEND_BUFFER;
        !           288: 
        !           289:       PATFETCH (c);
        !           290: 
        !           291:       switch (c)
        !           292:        {
        !           293:        case '$':
        !           294:          /* $ means succeed if at end of line, but only in special contexts.
        !           295:            If randonly in the middle of a pattern, it is a normal character. */
        !           296:          if (p == pend || (*p == '\\' && (p[1] == ')' || p[1] == '|')))
        !           297:            {
        !           298:              PATPUSH (endline);
        !           299:              break;
        !           300:            }
        !           301:          goto normal_char;
        !           302: 
        !           303:        case '^':
        !           304:          /* ^ means succeed if at beg of line, but only if no preceding pattern. */
        !           305:          if (laststart) goto normal_char;
        !           306:          PATPUSH (begline);
        !           307:          break;
        !           308: 
        !           309:        case '*':
        !           310:        case '+':
        !           311:        case '?':
        !           312:          /* If there is no previous pattern, char not special. */
        !           313:          if (!laststart)
        !           314:            goto normal_char;
        !           315:          /* If there is a sequence of repetition chars,
        !           316:             collapse it down to equivalent to just one.  */
        !           317:          zero_times_ok = 0;
        !           318:          many_times_ok = 0;
        !           319:          while (1)
        !           320:            {
        !           321:              zero_times_ok |= c != '+';
        !           322:              many_times_ok |= c != '?';
        !           323:              if (p == pend)
        !           324:                break;
        !           325:              PATFETCH (c);
        !           326:              if (!(c == '*' || c == '+' || c == '?'))
        !           327:                {
        !           328:                  PATUNFETCH;
        !           329:                  break;
        !           330:                }
        !           331:            }
        !           332: 
        !           333:          /* Now we know whether 0 matches is allowed,
        !           334:             and whether 2 or more matches is allowed.  */
        !           335:          if (many_times_ok)
        !           336:            {
        !           337:              /* If more than one repetition is allowed,
        !           338:                 put in a backward jump at the end.  */
        !           339:              store_jump (b, maybe_finalize_jump, laststart - 3);
        !           340:              b += 3;
        !           341:            }
        !           342:          insert_jump (on_failure_jump, laststart, b + 3, b);
        !           343:          pending_exact = 0;
        !           344:          b += 3;
        !           345:          if (!zero_times_ok)
        !           346:            {
        !           347:              /* At least one repetition required: insert before the loop
        !           348:                 a skip over the initial on-failure-jump instruction */
        !           349:              insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
        !           350:              b += 3;
        !           351:            }
        !           352:          break;
        !           353: 
        !           354:        case '.':
        !           355:          laststart = b;
        !           356:          PATPUSH (anychar);
        !           357:          break;
        !           358: 
        !           359:        case '[':
        !           360:          if (b - bufp->buffer
        !           361:              > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
        !           362:            /* Note that EXTEND_BUFFER clobbers c */
        !           363:            EXTEND_BUFFER;
        !           364: 
        !           365:          laststart = b;
        !           366:          if (*p == '^')
        !           367:            PATPUSH (charset_not), p++;
        !           368:          else
        !           369:            PATPUSH (charset);
        !           370:          p1 = p;
        !           371: 
        !           372:          PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
        !           373:          /* Clear the whole map */
        !           374:          bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
        !           375:          /* Read in characters and ranges, setting map bits */
        !           376:          while (1)
        !           377:            {
        !           378:              PATFETCH (c);
        !           379:              if (c == ']' && p != p1 + 1) break;
        !           380:              if (*p == '-')
        !           381:                {
        !           382:                  PATFETCH (c1);
        !           383:                  PATFETCH (c1);
        !           384:                  while (c <= c1)
        !           385:                    b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
        !           386:                }
        !           387:              else
        !           388:                {
        !           389:                  b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
        !           390:                }
        !           391:            }
        !           392:          /* Discard any bitmap bytes that are all 0 at the end of the map.
        !           393:             Decrement the map-length byte too. */
        !           394:          while (b[-1] > 0 && b[b[-1] - 1] == 0)
        !           395:            b[-1]--;
        !           396:          b += b[-1];
        !           397:          break;
        !           398: 
        !           399:         case '\\':
        !           400:          if (p == pend) goto invalid_pattern;
        !           401:          PATFETCH_RAW (c);
        !           402:          switch (c)
        !           403:            {
        !           404:            case '(':
        !           405:              if (stackp == stacke) goto nesting_too_deep;
        !           406:              if (regnum < RE_NREGS)
        !           407:                {
        !           408:                  PATPUSH (start_memory);
        !           409:                  PATPUSH (regnum);
        !           410:                }
        !           411:              *stackp++ = b - bufp->buffer;
        !           412:              *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
        !           413:              *stackp++ = regnum++;
        !           414:              *stackp++ = begalt - bufp->buffer;
        !           415:              fixup_jump = 0;
        !           416:              laststart = 0;
        !           417:              begalt = b;
        !           418:              break;
        !           419: 
        !           420:            case ')':
        !           421:              if (stackp == stackb) goto unmatched_close;
        !           422:              begalt = *--stackp + bufp->buffer;
        !           423:              if (fixup_jump)
        !           424:                store_jump (fixup_jump, jump, b);
        !           425:              if (stackp[-1] < RE_NREGS)
        !           426:                {
        !           427:                  PATPUSH (stop_memory);
        !           428:                  PATPUSH (stackp[-1]);
        !           429:                }
        !           430:              stackp -= 2;
        !           431:              fixup_jump = 0;
        !           432:              if (*stackp)
        !           433:                fixup_jump = *stackp + bufp->buffer - 1;
        !           434:              laststart = *--stackp + bufp->buffer;
        !           435:              break;
        !           436: 
        !           437:            case '|':
        !           438:              insert_jump (on_failure_jump, begalt, b + 6, b);
        !           439:              pending_exact = 0;
        !           440:              b += 3;
        !           441:              if (fixup_jump)
        !           442:                store_jump (fixup_jump, jump, b);
        !           443:              fixup_jump = b;
        !           444:              b += 3;
        !           445:              laststart = 0;
        !           446:              begalt = b;
        !           447:              break;
        !           448: 
        !           449: #ifdef emacs
        !           450:            case '=':
        !           451:              PATPUSH (at_dot);
        !           452:              break;
        !           453: 
        !           454:            case 's':   
        !           455:              laststart = b;
        !           456:              PATPUSH (syntaxspec);
        !           457:              PATFETCH (c);
        !           458:              PATPUSH (syntax_spec_code[c]);
        !           459:              break;
        !           460: 
        !           461:            case 'S':
        !           462:              laststart = b;
        !           463:              PATPUSH (notsyntaxspec);
        !           464:              PATFETCH (c);
        !           465:              PATPUSH (syntax_spec_code[c]);
        !           466:              break;
        !           467: #endif emacs
        !           468: 
        !           469:            case 'w':
        !           470:              laststart = b;
        !           471:              PATPUSH (wordchar);
        !           472:              break;
        !           473: 
        !           474:            case 'W':
        !           475:              laststart = b;
        !           476:              PATPUSH (notwordchar);
        !           477:              break;
        !           478: 
        !           479:            case '<':
        !           480:              PATPUSH (wordbeg);
        !           481:              break;
        !           482: 
        !           483:            case '>':
        !           484:              PATPUSH (wordend);
        !           485:              break;
        !           486: 
        !           487:            case 'b':
        !           488:              PATPUSH (wordbound);
        !           489:              break;
        !           490: 
        !           491:            case 'B':
        !           492:              PATPUSH (notwordbound);
        !           493:              break;
        !           494: 
        !           495:            case '`':
        !           496:              PATPUSH (begbuf);
        !           497:              break;
        !           498: 
        !           499:            case '\'':
        !           500:              PATPUSH (endbuf);
        !           501:              break;
        !           502: 
        !           503:            case '1':
        !           504:            case '2':
        !           505:            case '3':
        !           506:            case '4':
        !           507:            case '5':
        !           508:            case '6':
        !           509:            case '7':
        !           510:            case '8':
        !           511:            case '9':
        !           512:              c1 = c - '0';
        !           513:              if (c1 >= regnum)
        !           514:                goto normal_char;
        !           515:              for (stackt = stackp - 2;  stackt > stackb;  stackt -= 4)
        !           516:                if (*stackt == c1)
        !           517:                  goto normal_char;
        !           518:              laststart = b;
        !           519:              PATPUSH (duplicate);
        !           520:              PATPUSH (c1);
        !           521:              break;
        !           522:            default:
        !           523:              goto normal_char;
        !           524:            }
        !           525:          break;
        !           526: 
        !           527:        default:
        !           528:        normal_char:
        !           529:          if (!pending_exact || pending_exact + *pending_exact + 1 != b
        !           530:              || *pending_exact == 0177 || *p == '*' || *p == '^'
        !           531:              || *p == '+' || *p == '?')
        !           532:            {
        !           533:              laststart = b;
        !           534:              PATPUSH (exactn);
        !           535:              pending_exact = b;
        !           536:              PATPUSH (0);
        !           537:            }
        !           538:          PATPUSH (c);
        !           539:          (*pending_exact)++;
        !           540:        }
        !           541:     }
        !           542: 
        !           543:   if (fixup_jump)
        !           544:     store_jump (fixup_jump, jump, b);
        !           545: 
        !           546:   if (stackp != stackb) goto unmatched_open;
        !           547: 
        !           548:   bufp->used = b - bufp->buffer;
        !           549:   return 0;
        !           550: 
        !           551:  invalid_pattern:
        !           552:   return "Invalid regular expression";
        !           553: 
        !           554:  unmatched_open:
        !           555:   return "Unmatched \\(";
        !           556: 
        !           557:  unmatched_close:
        !           558:   return "Unmatched \\)";
        !           559: 
        !           560:  end_of_pattern:
        !           561:   return "Premature end of regular expression";
        !           562: 
        !           563:  nesting_too_deep:
        !           564:   return "Nesting too deep";
        !           565: 
        !           566:  too_big:
        !           567:   return "Regular expression too big";
        !           568: 
        !           569:  memory_exhausted:
        !           570:   return "Memory exhausted";
        !           571: }
        !           572: 
        !           573: #ifndef emacs
        !           574: init_syntax_once ()
        !           575: {
        !           576:    register int c;
        !           577:    static int done = 0;
        !           578: 
        !           579:    if (done)
        !           580:      return;
        !           581: 
        !           582:    bzero (syntax_table, sizeof syntax_table);
        !           583: 
        !           584:    for (c = 'a'; c <= 'z'; c++)
        !           585:      syntax_table[c] = Sword;
        !           586: 
        !           587:    for (c = 'A'; c <= 'Z'; c++)
        !           588:      syntax_table[c] = Sword;
        !           589: 
        !           590:    for (c = '0'; c <= '9'; c++)
        !           591:      syntax_table[c] = Sword;
        !           592: 
        !           593:    done = 1;
        !           594: }
        !           595: #endif not emacs
        !           596: 
        !           597: /* Store where `from' points a jump operation to jump to where `to' points.
        !           598:   `opcode' is the opcode to store. */
        !           599: 
        !           600: static int
        !           601: store_jump (from, opcode, to)
        !           602:      char *from, *to;
        !           603:      char opcode;
        !           604: {
        !           605:   from[0] = opcode;
        !           606:   from[1] = (to - (from + 3)) & 0377;
        !           607:   from[2] = (to - (from + 3)) >> 8;
        !           608: }
        !           609: 
        !           610: /* Open up space at char FROM, and insert there a jump to TO.
        !           611:    CURRENT_END gives te end of the storage no in use,
        !           612:    so we know how much data to copy up.
        !           613:    OP is the opcode of the jump to insert.
        !           614: 
        !           615:    If you call this function, you must zero out pending_exact.  */
        !           616: 
        !           617: static int
        !           618: insert_jump (op, from, to, current_end)
        !           619:      char op;
        !           620:      char *from, *to, *current_end;
        !           621: {
        !           622:   register char *pto = current_end + 3;
        !           623:   register char *pfrom = current_end;
        !           624:   while (pfrom != from)
        !           625:     *--pto = *--pfrom;
        !           626:   store_jump (from, op, to);
        !           627: }
        !           628: 
        !           629: /* Given a pattern, compute a fastmap from it.
        !           630:  The fastmap records which of the (1 << BYTEWIDTH) possible characters
        !           631:  can start a string that matches the pattern.
        !           632:  This fastmap is used by re_search to skip quickly over totally implausible text.
        !           633: 
        !           634:  The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
        !           635:  as bufp->fastmap.
        !           636:  The other components of bufp describe the pattern to be used.  */
        !           637: 
        !           638: re_compile_fastmap (bufp)
        !           639:      struct re_pattern_buffer *bufp;
        !           640: {
        !           641:   char *pattern = bufp->buffer;
        !           642:   int size = bufp->used;
        !           643:   register char *fastmap = bufp->fastmap;
        !           644:   register char *p = pattern;
        !           645:   register char *pend = pattern + size;
        !           646:   register int j, k;
        !           647:   unsigned char *translate = (unsigned char *) bufp->translate;
        !           648: 
        !           649:   char *stackb[NFAILURES];
        !           650:   char **stackp = stackb;
        !           651: 
        !           652:   bzero (fastmap, (1 << BYTEWIDTH));
        !           653:   bufp->fastmap_accurate = 1;
        !           654:   bufp->can_be_null = 0;
        !           655:       
        !           656:   while (p)
        !           657:     {
        !           658:       if (p == pend)
        !           659:        {
        !           660:          bufp->can_be_null = 1;
        !           661:          break;
        !           662:        }
        !           663: #ifdef SWITCH_ENUM_BUG
        !           664:       switch ((int) ((enum regexpcode) *p++))
        !           665: #else
        !           666:       switch ((enum regexpcode) *p++)
        !           667: #endif
        !           668:        {
        !           669:        case exactn:
        !           670:          if (translate)
        !           671:            fastmap[translate[p[1]]] = 1;
        !           672:          else
        !           673:            fastmap[p[1]] = 1;
        !           674:          break;
        !           675: 
        !           676:         case begline:
        !           677:         case before_dot:
        !           678:        case at_dot:
        !           679:        case after_dot:
        !           680:        case begbuf:
        !           681:        case endbuf:
        !           682:        case wordbound:
        !           683:        case notwordbound:
        !           684:        case wordbeg:
        !           685:        case wordend:
        !           686:          continue;
        !           687: 
        !           688:        case endline:
        !           689:          if (translate)
        !           690:            fastmap[translate['\n']] = 1;
        !           691:          else
        !           692:            fastmap['\n'] = 1;
        !           693:          bufp->can_be_null = 1;
        !           694:          break;
        !           695: 
        !           696:        case finalize_jump:
        !           697:        case maybe_finalize_jump:
        !           698:        case jump:
        !           699:        case dummy_failure_jump:
        !           700:          bufp->can_be_null = 1;
        !           701:          j = *p++ & 0377;
        !           702:          j += SIGN_EXTEND_CHAR (*p++) << 8;
        !           703:          p += j;
        !           704:          if (j > 0)
        !           705:            continue;
        !           706:          /* Jump backward reached implies we just went through
        !           707:             the body of a loop and matched nothing.
        !           708:             Opcode jumped to should be an on_failure_jump.
        !           709:             Just treat it like an ordinary jump.
        !           710:             For a * loop, it has pushed its failure point already;
        !           711:             if so, discard that as redundant.  */
        !           712:          if ((enum regexpcode) *p != on_failure_jump)
        !           713:            continue;
        !           714:          p++;
        !           715:          j = *p++ & 0377;
        !           716:          j += SIGN_EXTEND_CHAR (*p++) << 8;
        !           717:          p += j;
        !           718:          if (stackp != stackb && *stackp == p)
        !           719:            stackp--;
        !           720:          continue;
        !           721:          
        !           722:        case on_failure_jump:
        !           723:          j = *p++ & 0377;
        !           724:          j += SIGN_EXTEND_CHAR (*p++) << 8;
        !           725:          *++stackp = p + j;
        !           726:          continue;
        !           727: 
        !           728:        case start_memory:
        !           729:        case stop_memory:
        !           730:          p++;
        !           731:          continue;
        !           732: 
        !           733:        case duplicate:
        !           734:          bufp->can_be_null = 1;
        !           735:        case anychar:
        !           736:          for (j = 0; j < (1 << BYTEWIDTH); j++)
        !           737:            fastmap[j] = 1;
        !           738:          return;
        !           739: 
        !           740:        case wordchar:
        !           741:          for (j = 0; j < (1 << BYTEWIDTH); j++)
        !           742:            if (SYNTAX (j) == Sword)
        !           743:              fastmap[j] = 1;
        !           744:          break;
        !           745: 
        !           746:        case notwordchar:
        !           747:          for (j = 0; j < (1 << BYTEWIDTH); j++)
        !           748:            if (SYNTAX (j) != Sword)
        !           749:              fastmap[j] = 1;
        !           750:          break;
        !           751: 
        !           752: #ifdef emacs
        !           753:        case syntaxspec:
        !           754:          k = *p++;
        !           755:          for (j = 0; j < (1 << BYTEWIDTH); j++)
        !           756:            if (SYNTAX (j) == (enum syntaxcode) k)
        !           757:              fastmap[j] = 1;
        !           758:          break;
        !           759: 
        !           760:        case notsyntaxspec:
        !           761:          for (j = 0; j < (1 << BYTEWIDTH); j++)
        !           762:            if (SYNTAX (j) != (enum syntaxcode) k)
        !           763:              fastmap[j] = 1;
        !           764:          break;
        !           765: #endif emacs
        !           766: 
        !           767:        case charset:
        !           768:          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
        !           769:            if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
        !           770:              {
        !           771:                if (translate)
        !           772:                  fastmap[translate[j]] = 1;
        !           773:                else
        !           774:                  fastmap[j] = 1;
        !           775:              }
        !           776:          break;
        !           777: 
        !           778:        case charset_not:
        !           779:          /* Chars beyond end of map must be allowed */
        !           780:          for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
        !           781:            if (translate)
        !           782:              fastmap[translate[j]] = 1;
        !           783:            else
        !           784:              fastmap[j] = 1;
        !           785: 
        !           786:          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
        !           787:            if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
        !           788:              {
        !           789:                if (translate)
        !           790:                  fastmap[translate[j]] = 1;
        !           791:                else
        !           792:                  fastmap[j] = 1;
        !           793:              }
        !           794:          break;
        !           795:        }
        !           796: 
        !           797:       /* Get here means we have successfully found the possible starting characters
        !           798:         of one path of the pattern.  We need not follow this path any farther.
        !           799:         Instead, look at the next alternative remembered in the stack. */
        !           800:       if (stackp != stackb)
        !           801:        p = *stackp--;
        !           802:       else
        !           803:        break;
        !           804:     }
        !           805: }
        !           806: 
        !           807: /* Like re_search_2, below, but only one string is specified. */
        !           808: 
        !           809: re_search (pbufp, string, size, startpos, range, regs)
        !           810:      struct re_pattern_buffer *pbufp;
        !           811:      char *string;
        !           812:      int size, startpos, range;
        !           813:      struct re_registers *regs;
        !           814: {
        !           815:   return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
        !           816: }
        !           817: 
        !           818: /* Like re_match_2 but tries first a match starting at index `startpos',
        !           819:  then at startpos + 1, and so on.
        !           820:  `range' is the number of places to try before giving up.
        !           821:  If `range' is negative, the starting positions tried are
        !           822:   startpos, startpos - 1, etc.
        !           823:  It is up to the caller to make sure that range is not so large
        !           824:   as to take the starting position outside of the input strings.
        !           825: 
        !           826: The value returned is the position at which the match was found,
        !           827:  or -1 if no match was found. */
        !           828: 
        !           829: int
        !           830: re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
        !           831:      struct re_pattern_buffer *pbufp;
        !           832:      char *string1, *string2;
        !           833:      int size1, size2;
        !           834:      int startpos;
        !           835:      register int range;
        !           836:      struct re_registers *regs;
        !           837:      int mstop;
        !           838: {
        !           839:   register char *fastmap = pbufp->fastmap;
        !           840:   register char *translate = pbufp->translate;
        !           841:   int total = size1 + size2;
        !           842: 
        !           843:   /* Update the fastmap now if not correct already */
        !           844:   if (fastmap && !pbufp->fastmap_accurate)
        !           845:     re_compile_fastmap (pbufp);
        !           846: 
        !           847:   while (1)
        !           848:     {
        !           849:       /* If a fastmap is supplied, skip quickly over characters
        !           850:         that cannot possibly be the start of a match.
        !           851:         Note, however, that if the pattern can possibly match
        !           852:         the null string, we must test it at each starting point
        !           853:         so that we take the first null string we get.  */
        !           854: 
        !           855:       if (fastmap && startpos < total && !pbufp->can_be_null)
        !           856:        {
        !           857:          if (range > 0)
        !           858:            {
        !           859:              register int lim = 0;
        !           860:              register char *p;
        !           861:              int irange = range;
        !           862:              if (startpos < size1 && startpos + range >= size1)
        !           863:                lim = range - (size1 - startpos);
        !           864: 
        !           865:              p = &(startpos >= size1 ? string2 - size1 : string1)[startpos];
        !           866: 
        !           867:              if (translate)
        !           868:                {
        !           869:                  while (range > lim && !fastmap[translate[*p++]])
        !           870:                    range--;
        !           871:                }
        !           872:              else
        !           873:                {
        !           874:                  while (range > lim && !fastmap[*p++])
        !           875:                    range--;
        !           876:                }
        !           877:              startpos += irange - range;
        !           878:            }
        !           879:          else
        !           880:            {
        !           881:              register char c;
        !           882:              if (startpos >= size1) c = string2[startpos - size1];
        !           883:              else c = string1[startpos];
        !           884:              if (translate ? !fastmap[translate[c]] : !fastmap[c])
        !           885:                goto advance;
        !           886:            }
        !           887:        }
        !           888: 
        !           889:       if (range >= 0 && startpos == total
        !           890:          && fastmap && !pbufp->can_be_null)
        !           891:        return -1;
        !           892: 
        !           893:       if (0 <= re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop))
        !           894:        return startpos;
        !           895: 
        !           896:     advance:
        !           897:       if (!range) break;
        !           898:       if (range > 0) range--, startpos++; else range++, startpos--;
        !           899:     }
        !           900:   return -1;
        !           901: }
        !           902: 
        !           903: #ifndef emacs   /* emacs never uses this */
        !           904: re_match (pbufp, string, size, pos, regs)
        !           905:      struct re_pattern_buffer *pbufp;
        !           906:      char *string;
        !           907:      int size, pos;
        !           908:      struct re_registers *regs;
        !           909: {
        !           910:   return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
        !           911: }
        !           912: #endif /* emacs */
        !           913: 
        !           914: /* Match the pattern described by `pbufp'
        !           915:   against data which is the virtual concatenation of `string1' and `string2'.
        !           916:   `size1' and `size2' are the sizes of the two data strings.
        !           917:   Start the match at position `pos'.
        !           918:   Do not consider matching past the position `mstop'.
        !           919: 
        !           920:   If pbufp->fastmap is nonzero, then it had better be up to date.
        !           921: 
        !           922:   The reason that the data to match is specified as two components
        !           923:   which are to be regarded as concatenated
        !           924:   is so that this function can be used directly on the contents of an Emacs buffer.
        !           925: 
        !           926:   -1 is returned if there is no match.  Otherwise the value is the length
        !           927:   of the substring which was matched.
        !           928: */
        !           929: 
        !           930: int
        !           931: re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
        !           932:      struct re_pattern_buffer *pbufp;
        !           933:      char *string1, *string2;
        !           934:      int size1, size2;
        !           935:      int pos;
        !           936:      struct re_registers *regs;
        !           937:      int mstop;
        !           938: {
        !           939:   register char *p = pbufp->buffer;
        !           940:   register char *pend = p + pbufp->used;
        !           941:   /* End of first string */
        !           942:   char *end1;
        !           943:   /* End of second string */
        !           944:   char *end2;
        !           945:   /* Pointer just past last char to consider matching */
        !           946:   char *end_match_1, *end_match_2;
        !           947:   register char *d, *dend;
        !           948:   register int mcnt;
        !           949:   char *translate = pbufp->translate;
        !           950: 
        !           951:  /* Failure point stack.  Each place that can handle a failure further down the line
        !           952:     pushes a failure point on this stack.  It consists of two char *'s.
        !           953:     The first one pushed is where to resume scanning the pattern;
        !           954:     the second pushed is where to resume scanning the strings.
        !           955:     If the latter is zero, the failure point is a "dummy".
        !           956:     If a failure happens and the innermost failure point is dormant,
        !           957:     it discards that failure point and tries the next one. */
        !           958: 
        !           959:   char **stackb = (char **) alloca (2 * NFAILURES * sizeof (char *));
        !           960:   char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
        !           961: 
        !           962:   /* Information on the "contents" of registers.
        !           963:      These are pointers into the input strings; they record
        !           964:      just what was matched (on this attempt) by some part of the pattern.
        !           965:      The start_memory command stores the start of a register's contents
        !           966:      and the stop_memory command stores the end.
        !           967: 
        !           968:      At that point, regstart[regnum] points to the first character in the register,
        !           969:      regend[regnum] points to the first character beyond the end of the register,
        !           970:      and regstart_segend[regnum] is either the same as regend[regnum]
        !           971:      or else points to the end of the input string into which regstart[regnum] points.
        !           972:      The latter case happens when regstart[regnum] is in string1 and
        !           973:      regend[regnum] is in string2.  */
        !           974: 
        !           975:   char *regstart[RE_NREGS];
        !           976:   char *regstart_segend[RE_NREGS];
        !           977:   char *regend[RE_NREGS];
        !           978: 
        !           979:   /* Set up pointers to ends of strings.
        !           980:      Don't allow the second string to be empty unless both are empty.  */
        !           981:   if (!size2)
        !           982:     {
        !           983:       string2 = string1;
        !           984:       size2 = size1;
        !           985:       string1 = 0;
        !           986:       size1 = 0;
        !           987:     }
        !           988:   end1 = string1 + size1;
        !           989:   end2 = string2 + size2;
        !           990: 
        !           991:   /* Compute where to stop matching, within the two strings */
        !           992:   if (mstop <= size1)
        !           993:     {
        !           994:       end_match_1 = string1 + mstop;
        !           995:       end_match_2 = string2;
        !           996:     }
        !           997:   else
        !           998:     {
        !           999:       end_match_1 = end1;
        !          1000:       end_match_2 = string2 + mstop - size1;
        !          1001:     }
        !          1002: 
        !          1003:   /* Initialize \( and \) text positions to -1
        !          1004:      to mark ones that no \( or \) has been seen for.  */
        !          1005: 
        !          1006:   for (mcnt = 0; mcnt < sizeof (regstart) / sizeof (*regstart); mcnt++)
        !          1007:     regstart[mcnt] = (char *) -1;
        !          1008: 
        !          1009:   /* `p' scans through the pattern as `d' scans through the data.
        !          1010:      `dend' is the end of the input string that `d' points within.
        !          1011:      `d' is advanced into the following input string whenever necessary,
        !          1012:      but this happens before fetching;
        !          1013:      therefore, at the beginning of the loop,
        !          1014:      `d' can be pointing at the end of a string,
        !          1015:      but it cannot equal string2.  */
        !          1016: 
        !          1017:   if (pos <= size1)
        !          1018:     d = string1 + pos, dend = end_match_1;
        !          1019:   else
        !          1020:     d = string2 + pos - size1, dend = end_match_2;
        !          1021: 
        !          1022: /* Write PREFETCH; just before fetching a character with *d.  */
        !          1023: #define PREFETCH \
        !          1024:  while (d == dend)                                                 \
        !          1025:   { if (dend == end_match_2) goto fail;  /* end of string2 => failure */   \
        !          1026:     d = string2;  /* end of string1 => advance to string2. */       \
        !          1027:     dend = end_match_2; }
        !          1028: 
        !          1029:   /* This loop loops over pattern commands.
        !          1030:      It exits by returning from the function if match is complete,
        !          1031:      or it drops through if match fails at this starting point in the input data. */
        !          1032: 
        !          1033:   while (1)
        !          1034:     {
        !          1035:       if (p == pend)
        !          1036:        /* End of pattern means we have succeeded! */
        !          1037:        {
        !          1038:          /* If caller wants register contents data back, convert it to indices */
        !          1039:          if (regs)
        !          1040:            {
        !          1041:              bzero (regs, sizeof (*regs));
        !          1042: 
        !          1043:              regend[0] = d;
        !          1044:              regstart[0] = string1;
        !          1045:              for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
        !          1046:                {
        !          1047:                  if (mcnt && regstart[mcnt] == (char *) -1) continue;
        !          1048:                  if (regstart[mcnt] - string1 < 0 || regstart[mcnt] - string1 > size1)
        !          1049:                    regs->start[mcnt] = regstart[mcnt] - string2 + size1;
        !          1050:                  else
        !          1051:                    regs->start[mcnt] = regstart[mcnt] - string1;
        !          1052:                  if (regend[mcnt] - string1 < 0 || regend[mcnt] - string1 > size1)
        !          1053:                    regs->end[mcnt] = regend[mcnt] - string2 + size1;
        !          1054:                  else
        !          1055:                    regs->end[mcnt] = regend[mcnt] - string1;
        !          1056:                }
        !          1057:              regs->start[0] = pos;
        !          1058:            }
        !          1059:          if (d - string1 >= 0 && d - string1 <= size1)
        !          1060:            return d - string1 - pos;
        !          1061:          else
        !          1062:            return d - string2 + size1 - pos;
        !          1063:        }
        !          1064: 
        !          1065:       /* Otherwise match next pattern command */
        !          1066: #ifdef SWITCH_ENUM_BUG
        !          1067:       switch ((int) ((enum regexpcode) *p++))
        !          1068: #else
        !          1069:       switch ((enum regexpcode) *p++)
        !          1070: #endif
        !          1071:        {
        !          1072: 
        !          1073:        /* \( is represented by a start_memory, \) by a stop_memory.
        !          1074:            Both of those commands contain a "register number" argument.
        !          1075:            The text matched within the \( and \) is recorded under that number.
        !          1076:            Then, \<digit> turns into a `duplicate' command which
        !          1077:            is followed by the numeric value of <digit> as the register number. */
        !          1078: 
        !          1079:        case start_memory:
        !          1080:          regstart[*p] = d;
        !          1081:          regstart_segend[*p++] = dend;
        !          1082:          break;
        !          1083: 
        !          1084:        case stop_memory:
        !          1085:          regend[*p] = d;
        !          1086:          if (regstart_segend[*p] == dend)
        !          1087:            regstart_segend[*p] = d;
        !          1088:          p++;
        !          1089:          break;
        !          1090: 
        !          1091:        case duplicate:
        !          1092:          {
        !          1093:            int regno = *p++;   /* Get which register to match against */
        !          1094:            register char *d2, *dend2;
        !          1095: 
        !          1096:            d2 = regstart[regno];
        !          1097:            dend2 = regstart_segend[regno];
        !          1098:            while (1)
        !          1099:              {
        !          1100:                /* Advance to next segment in register contents, if necessary */
        !          1101:                while (d2 == dend2)
        !          1102:                  {
        !          1103:                    if (dend2 == end_match_2) break;
        !          1104:                    if (dend2 == regend[regno]) break;
        !          1105:                    d2 = string2, dend2 = regend[regno];  /* end of string1 => advance to string2. */
        !          1106:                  }
        !          1107:                /* At end of register contents => success */
        !          1108:                if (d2 == dend2) break;
        !          1109: 
        !          1110:                /* Advance to next segment in data being matched, if necessary */
        !          1111:                PREFETCH;
        !          1112: 
        !          1113:                /* mcnt gets # consecutive chars to compare */
        !          1114:                mcnt = dend - d;
        !          1115:                if (mcnt > dend2 - d2)
        !          1116:                  mcnt = dend2 - d2;
        !          1117:                /* Compare that many; failure if mismatch, else skip them. */
        !          1118:                if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
        !          1119:                  goto fail;
        !          1120:                d += mcnt, d2 += mcnt;
        !          1121:              }
        !          1122:          }
        !          1123:          break;
        !          1124: 
        !          1125:        case anychar:
        !          1126:          /* fetch a data character */
        !          1127:          PREFETCH;
        !          1128:          /* Match anything but a newline.  */
        !          1129:          if ((translate ? translate[*d++] : *d++) == '\n')
        !          1130:            goto fail;
        !          1131:          break;
        !          1132: 
        !          1133:        case charset:
        !          1134:        case charset_not:
        !          1135:          {
        !          1136:            /* Nonzero for charset_not */
        !          1137:            int not = 0;
        !          1138:            register int c;
        !          1139:            if (*(p - 1) == (char) charset_not)
        !          1140:              not = 1;
        !          1141: 
        !          1142:            /* fetch a data character */
        !          1143:            PREFETCH;
        !          1144: 
        !          1145:            if (translate)
        !          1146:              c = translate [*(unsigned char *)d];
        !          1147:            else
        !          1148:              c = *(unsigned char *)d;
        !          1149: 
        !          1150:            if (c < *p * BYTEWIDTH
        !          1151:                && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
        !          1152:              not = !not;
        !          1153: 
        !          1154:            p += 1 + *p;
        !          1155: 
        !          1156:            if (!not) goto fail;
        !          1157:            d++;
        !          1158:            break;
        !          1159:          }
        !          1160: 
        !          1161:        case begline:
        !          1162:          if (d == string1 || d[-1] == '\n')
        !          1163:            break;
        !          1164:          goto fail;
        !          1165: 
        !          1166:        case endline:
        !          1167:          if (d == end2
        !          1168:              || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
        !          1169:            break;
        !          1170:          goto fail;
        !          1171: 
        !          1172:        /* "or" constructs ("|") are handled by starting each alternative
        !          1173:            with an on_failure_jump that points to the start of the next alternative.
        !          1174:            Each alternative except the last ends with a jump to the joining point.
        !          1175:            (Actually, each jump except for the last one really jumps
        !          1176:             to the following jump, because tensioning the jumps is a hassle.) */
        !          1177: 
        !          1178:        /* The start of a stupid repeat has an on_failure_jump that points
        !          1179:           past the end of the repeat text.
        !          1180:           This makes a failure point so that, on failure to match a repetition,
        !          1181:           matching restarts past as many repetitions have been found
        !          1182:           with no way to fail and look for another one.  */
        !          1183: 
        !          1184:        /* A smart repeat is similar but loops back to the on_failure_jump
        !          1185:           so that each repetition makes another failure point. */
        !          1186: 
        !          1187:        case on_failure_jump:
        !          1188:          if (stackp == stacke)
        !          1189:            {
        !          1190:              char **stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *));
        !          1191:              bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
        !          1192:              stackp += stackx - stackb;
        !          1193:              stacke = stackx + 2 * (stacke - stackb);
        !          1194:              stackb = stackx;
        !          1195:            }
        !          1196:          mcnt = *p++ & 0377;
        !          1197:          mcnt += SIGN_EXTEND_CHAR (*p++) << 8;
        !          1198:          *stackp++ = mcnt + p;
        !          1199:          *stackp++ = d;
        !          1200:          break;
        !          1201: 
        !          1202:        /* The end of a smart repeat has an maybe_finalize_jump back.
        !          1203:           Change it either to a finalize_jump or an ordinary jump. */
        !          1204: 
        !          1205:        case maybe_finalize_jump:
        !          1206:          mcnt = *p++ & 0377;
        !          1207:          mcnt += SIGN_EXTEND_CHAR (*p++) << 8;
        !          1208:          /* Compare what follows with the begining of the repeat.
        !          1209:             If we can establish that there is nothing that they would
        !          1210:             both match, we can change to finalize_jump */
        !          1211:          if (p == pend)
        !          1212:            p[-3] = (char) finalize_jump;
        !          1213:          else if (*p == (char) exactn || *p == (char) endline)
        !          1214:            {
        !          1215:              register int c = *p == (char) endline ? '\n' : p[2];
        !          1216:              register char *p1 = p + mcnt;
        !          1217:              /* p1[0] ... p1[2] are an on_failure_jump.
        !          1218:                 Examine what follows that */
        !          1219:              if (p1[3] == (char) exactn && p1[5] != c)
        !          1220:                p[-3] = (char) finalize_jump;
        !          1221:              else if (p1[3] == (char) charset || p1[3] == (char) charset_not)
        !          1222:                {
        !          1223:                  int not = p1[3] == (char) charset_not;
        !          1224:                  if (c < p1[4] * BYTEWIDTH
        !          1225:                      && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
        !          1226:                    not = !not;
        !          1227:                  /* not is 1 if c would match */
        !          1228:                  /* That means it is not safe to finalize */
        !          1229:                  if (!not)
        !          1230:                    p[-3] = (char) finalize_jump;
        !          1231:                }
        !          1232:            }
        !          1233:          p -= 2;
        !          1234:          if (p[-1] != (char) finalize_jump)
        !          1235:            {
        !          1236:              p[-1] = (char) jump;
        !          1237:              goto nofinalize;
        !          1238:            }
        !          1239: 
        !          1240:        /* The end of a stupid repeat has a finalize-jump
        !          1241:           back to the start, where another failure point will be made
        !          1242:           which will point after all the repetitions found so far. */
        !          1243: 
        !          1244:        case finalize_jump:
        !          1245:          stackp -= 2;
        !          1246: 
        !          1247:        case jump:
        !          1248:        nofinalize:
        !          1249:          mcnt = *p++ & 0377;
        !          1250:          mcnt += SIGN_EXTEND_CHAR (*p++) << 8;
        !          1251:          p += mcnt;
        !          1252:          break;
        !          1253: 
        !          1254:        case dummy_failure_jump:
        !          1255:          if (stackp == stacke)
        !          1256:            {
        !          1257:              char **stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *));
        !          1258:              bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
        !          1259:              stackp += stackx - stackb;
        !          1260:              stacke = stackx + 2 * (stacke - stackb);
        !          1261:              stackb = stackx;
        !          1262:            }
        !          1263:          *stackp++ = 0;
        !          1264:          *stackp++ = 0;
        !          1265:          goto nofinalize;
        !          1266: 
        !          1267:        case wordbound:
        !          1268:          if (d == string1  /* Points to first char */
        !          1269:              || d == end2  /* Points to end */
        !          1270:              || (d == end1 && size2 == 0)) /* Points to end */
        !          1271:            break;
        !          1272:          if ((SYNTAX (((unsigned char *)d)[-1]) == Sword)
        !          1273:              != (SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) == Sword))
        !          1274:            break;
        !          1275:          goto fail;
        !          1276: 
        !          1277:        case notwordbound:
        !          1278:          if (d == string1  /* Points to first char */
        !          1279:              || d == end2  /* Points to end */
        !          1280:              || (d == end1 && size2 == 0)) /* Points to end */
        !          1281:            goto fail;
        !          1282:          if ((SYNTAX (((unsigned char *)d)[-1]) == Sword)
        !          1283:              != (SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) == Sword))
        !          1284:            goto fail;
        !          1285:          break;
        !          1286: 
        !          1287:        case wordbeg:
        !          1288:          if (d == end2  /* Points to end */
        !          1289:              || (d == end1 && size2 == 0) /* Points to end */
        !          1290:              || SYNTAX (*(unsigned char *) (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
        !          1291:            goto fail;
        !          1292:          if (d == string1  /* Points to first char */
        !          1293:              || SYNTAX (((unsigned char *)d)[-1]) != Sword)  /* prev char not letter */
        !          1294:            break;
        !          1295:          goto fail;
        !          1296: 
        !          1297:        case wordend:
        !          1298:          if (d == string1  /* Points to first char */
        !          1299:              || SYNTAX (((unsigned char *)d)[-1]) != Sword)  /* prev char not letter */
        !          1300:            goto fail;
        !          1301:          if (d == end2  /* Points to end */
        !          1302:              || (d == end1 && size2 == 0) /* Points to end */
        !          1303:              || SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) != Sword) /* Next char not a letter */
        !          1304:            break;
        !          1305:          goto fail;
        !          1306: 
        !          1307: #ifdef emacs
        !          1308:        case before_dot:
        !          1309:          if (((d - string2 <= (unsigned) size2)
        !          1310:               ? d - (char *) bf_p2 : d - (char *) bf_p1)
        !          1311:              <= point)
        !          1312:            goto fail;
        !          1313:          break;
        !          1314: 
        !          1315:        case at_dot:
        !          1316:          if (((d - string2 <= (unsigned) size2)
        !          1317:               ? d - (char *) bf_p2 : d - (char *) bf_p1)
        !          1318:              == point)
        !          1319:            goto fail;
        !          1320:          break;
        !          1321: 
        !          1322:        case after_dot:
        !          1323:          if (((d - string2 <= (unsigned) size2)
        !          1324:               ? d - (char *) bf_p2 : d - (char *) bf_p1)
        !          1325:              >= point)
        !          1326:            goto fail;
        !          1327:          break;
        !          1328: 
        !          1329:        case wordchar:
        !          1330:          mcnt = (int) Sword;
        !          1331:          goto matchsyntax;
        !          1332: 
        !          1333:        case syntaxspec:
        !          1334:          mcnt = *p++;
        !          1335:        matchsyntax:
        !          1336:          PREFETCH;
        !          1337:          if (SYNTAX (*(unsigned char *)d++) != (enum syntaxcode) mcnt) goto fail;
        !          1338:          break;
        !          1339:          
        !          1340:        case notwordchar:
        !          1341:          mcnt = (int) Sword;
        !          1342:          goto matchnotsyntax;
        !          1343: 
        !          1344:        case notsyntaxspec:
        !          1345:          mcnt = *p++;
        !          1346:        matchnotsyntax:
        !          1347:          PREFETCH;
        !          1348:          if (SYNTAX (*(unsigned char *)d++) == (enum syntaxcode) mcnt) goto fail;
        !          1349:          break;
        !          1350: #else
        !          1351:        case wordchar:
        !          1352:          PREFETCH;
        !          1353:          if (SYNTAX (*(unsigned char *)d++) == 0) goto fail;
        !          1354:          break;
        !          1355:          
        !          1356:        case notwordchar:
        !          1357:          PREFETCH;
        !          1358:          if (SYNTAX (*(unsigned char *)d++) != 0) goto fail;
        !          1359:          break;
        !          1360: #endif not emacs
        !          1361: 
        !          1362:        case begbuf:
        !          1363:          if (d == string1)     /* Note, d cannot equal string2 */
        !          1364:            break;              /* unless string1 == string2.  */
        !          1365:          goto fail;
        !          1366: 
        !          1367:        case endbuf:
        !          1368:          if (d == end2 || (d == end1 && size2 == 0))
        !          1369:            break;
        !          1370:          goto fail;
        !          1371: 
        !          1372:        case exactn:
        !          1373:          /* Match the next few pattern characters exactly.
        !          1374:             mcnt is how many characters to match. */
        !          1375:          mcnt = *p++;
        !          1376:          if (translate)
        !          1377:            {
        !          1378:              do
        !          1379:                {
        !          1380:                  PREFETCH;
        !          1381:                  if (translate[*(unsigned char *)d++] != *p++) goto fail;
        !          1382:                }
        !          1383:              while (--mcnt);
        !          1384:            }
        !          1385:          else
        !          1386:            {
        !          1387:              do
        !          1388:                {
        !          1389:                  PREFETCH;
        !          1390:                  if (*d++ != *p++) goto fail;
        !          1391:                }
        !          1392:              while (--mcnt);
        !          1393:            }
        !          1394:          break;
        !          1395:        }
        !          1396:       continue;    /* Successfully matched one pattern command; keep matching */
        !          1397: 
        !          1398:       /* Jump here if any matching operation fails. */
        !          1399:     fail:
        !          1400:       if (stackp != stackb)
        !          1401:        /* A restart point is known.  Restart there and pop it. */
        !          1402:        {
        !          1403:          if (!stackp[-2])
        !          1404:            {   /* If innermost failure point is dormant, flush it and keep looking */
        !          1405:              stackp -= 2;
        !          1406:              goto fail;
        !          1407:            }
        !          1408:          d = *--stackp;
        !          1409:          p = *--stackp;
        !          1410:          if (d >= string1 && d <= end1)
        !          1411:            dend = end_match_1;
        !          1412:        }
        !          1413:       else break;   /* Matching at this starting point really fails! */
        !          1414:     }
        !          1415:   return -1;         /* Failure to match */
        !          1416: }
        !          1417: 
        !          1418: static int
        !          1419: bcmp_translate (s1, s2, len, translate)
        !          1420:      char *s1, *s2;
        !          1421:      register int len;
        !          1422:      char *translate;
        !          1423: {
        !          1424:   register char *p1 = s1, *p2 = s2;
        !          1425:   while (len)
        !          1426:     {
        !          1427:       if (translate [*p1++] != translate [*p2++]) return 1;
        !          1428:       len--;
        !          1429:     }
        !          1430:   return 0;
        !          1431: }
        !          1432: 
        !          1433: /* Entry points compatible with bsd4.2 regex library */
        !          1434: 
        !          1435: #ifndef emacs
        !          1436: 
        !          1437: static struct re_pattern_buffer re_comp_buf;
        !          1438: 
        !          1439: char *
        !          1440: re_comp (s)
        !          1441:      char *s;
        !          1442: {
        !          1443:   if (!s)
        !          1444:     {
        !          1445:       if (!re_comp_buf.buffer)
        !          1446:        return "No previous regular expression";
        !          1447:       return 0;
        !          1448:     }
        !          1449: 
        !          1450:   if (!re_comp_buf.buffer)
        !          1451:     {
        !          1452:       if (!(re_comp_buf.buffer = (char *) malloc (200)))
        !          1453:        return "Memory exhausted";
        !          1454:       re_comp_buf.allocated = 200;
        !          1455:       if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
        !          1456:        return "Memory exhausted";
        !          1457:     }
        !          1458:   return re_compile_pattern (s, strlen (s), &re_comp_buf);
        !          1459: }
        !          1460: 
        !          1461: int
        !          1462: re_exec (s)
        !          1463:      char *s;
        !          1464: {
        !          1465:   int len = strlen (s);
        !          1466:   return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
        !          1467: }
        !          1468: 
        !          1469: #endif /* emacs */
        !          1470: 
        !          1471: #ifdef test
        !          1472: 
        !          1473: #include <stdio.h>
        !          1474: 
        !          1475: /* Indexed by a character, gives the upper case equivalent of the character */
        !          1476: 
        !          1477: static char upcase[0400] = 
        !          1478:   { 000, 001, 002, 003, 004, 005, 006, 007,
        !          1479:     010, 011, 012, 013, 014, 015, 016, 017,
        !          1480:     020, 021, 022, 023, 024, 025, 026, 027,
        !          1481:     030, 031, 032, 033, 034, 035, 036, 037,
        !          1482:     040, 041, 042, 043, 044, 045, 046, 047,
        !          1483:     050, 051, 052, 053, 054, 055, 056, 057,
        !          1484:     060, 061, 062, 063, 064, 065, 066, 067,
        !          1485:     070, 071, 072, 073, 074, 075, 076, 077,
        !          1486:     0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
        !          1487:     0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
        !          1488:     0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
        !          1489:     0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
        !          1490:     0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
        !          1491:     0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
        !          1492:     0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
        !          1493:     0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
        !          1494:     0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
        !          1495:     0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
        !          1496:     0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
        !          1497:     0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
        !          1498:     0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
        !          1499:     0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
        !          1500:     0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
        !          1501:     0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
        !          1502:     0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
        !          1503:     0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
        !          1504:     0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
        !          1505:     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
        !          1506:     0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
        !          1507:     0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
        !          1508:     0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
        !          1509:     0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
        !          1510:   };
        !          1511: 
        !          1512: main ()
        !          1513: {
        !          1514:   char pat[80];
        !          1515:   struct re_pattern_buffer buf;
        !          1516:   int i;
        !          1517:   char c;
        !          1518:   char fastmap[(1 << BYTEWIDTH)];
        !          1519: 
        !          1520:   buf.allocated = 40;
        !          1521:   buf.buffer = (char *) malloc (buf.allocated);
        !          1522:   buf.fastmap = fastmap;
        !          1523:   buf.translate = upcase;
        !          1524: 
        !          1525:   while (1)
        !          1526:     {
        !          1527:       gets (pat);
        !          1528: 
        !          1529:       if (*pat)
        !          1530:        {
        !          1531:           re_compile_pattern (pat, strlen(pat), &buf);
        !          1532: 
        !          1533:          for (i = 0; i < buf.used; i++)
        !          1534:            printchar (buf.buffer[i]);
        !          1535: 
        !          1536:          putchar ('\n');
        !          1537: 
        !          1538:          printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
        !          1539: 
        !          1540:          re_compile_fastmap (&buf);
        !          1541:          printf ("Allowed by fastmap: ");
        !          1542:          for (i = 0; i < (1 << BYTEWIDTH); i++)
        !          1543:            if (fastmap[i]) printchar (i);
        !          1544:          putchar ('\n');
        !          1545:        }
        !          1546: 
        !          1547:       gets (pat);      /* Now read the string to match against */
        !          1548: 
        !          1549:       i = re_match (&buf, pat, strlen (pat), 0, 0);
        !          1550:       printf ("Match value %d.\n", i);
        !          1551:     }
        !          1552: }
        !          1553: 
        !          1554: #ifdef NOTDEF
        !          1555: print_buf (bufp)
        !          1556:      struct re_pattern_buffer *bufp;
        !          1557: {
        !          1558:   int i;
        !          1559: 
        !          1560:   printf ("buf is :\n----------------\n");
        !          1561:   for (i = 0; i < bufp->used; i++)
        !          1562:     printchar (bufp->buffer[i]);
        !          1563:   
        !          1564:   printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
        !          1565:   
        !          1566:   printf ("Allowed by fastmap: ");
        !          1567:   for (i = 0; i < (1 << BYTEWIDTH); i++)
        !          1568:     if (bufp->fastmap[i])
        !          1569:       printchar (i);
        !          1570:   printf ("\nAllowed by translate: ");
        !          1571:   if (bufp->translate)
        !          1572:     for (i = 0; i < (1 << BYTEWIDTH); i++)
        !          1573:       if (bufp->translate[i])
        !          1574:        printchar (i);
        !          1575:   printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
        !          1576:   printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
        !          1577: }
        !          1578: #endif
        !          1579: 
        !          1580: printchar (c)
        !          1581:      char c;
        !          1582: {
        !          1583:   if (c < 041 || c >= 0177)
        !          1584:     {
        !          1585:       putchar ('\\');
        !          1586:       putchar (((c >> 6) & 3) + '0');
        !          1587:       putchar (((c >> 3) & 7) + '0');
        !          1588:       putchar ((c & 7) + '0');
        !          1589:     }
        !          1590:   else
        !          1591:     putchar (c);
        !          1592: }
        !          1593: 
        !          1594: error (string)
        !          1595:      char *string;
        !          1596: {
        !          1597:   puts (string);
        !          1598:   exit (1);
        !          1599: }
        !          1600: 
        !          1601: #endif test

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.