43BSD/contrib/emacs/src/regex.c - annotate

Return to regex.c CVS log
Up to [CSRG BSD Unix] / 43BSD / contrib / emacs / src
Annotation of 43BSD/contrib/emacs/src/regex.c, revision 1.1.1.1

1.1       root        1: /* Extended regular expression matching and search.
                      2:    Copyright (C) 1985 Richard M. Stallman
                      3: 
                      4: This program is distributed in the hope that it will be useful,
                      5: but without any warranty.  No author or distributor
                      6: accepts responsibility to anyone for the consequences of using it
                      7: or for whether it serves any particular purpose or works at all,
                      8: unless he says so in writing.
                      9: 
                     10:    Permission is granted to anyone to distribute verbatim copies
                     11:    of this program's source code as received, in any medium, provided that
                     12:    the copyright notice, the nonwarraty notice above
                     13:    and this permission notice are preserved,
                     14:    and that the distributor grants the recipient all rights
                     15:    for further redistribution as permitted by this notice,
                     16:    and informs him of these rights.
                     17: 
                     18:    Permission is granted to distribute modified versions of this
                     19:    program's source code, or of portions of it, under the above
                     20:    conditions, plus the conditions that all changed files carry
                     21:    prominent notices stating who last changed them and that the
                     22:    derived material, including anything packaged together with it and
                     23:    conceptually functioning as a modification of it rather than an
                     24:    application of it, is in its entirety subject to a permission
                     25:    notice identical to this one.
                     26: 
                     27:    Permission is granted to distribute this program (verbatim or
                     28:    as modified) in compiled or executable form, provided verbatim
                     29:    redistribution is permitted as stated above for source code, and
                     30:     A.  it is accompanied by the corresponding machine-readable
                     31:       source code, under the above conditions, or
                     32:     B.  it is accompanied by a written offer, with no time limit,
                     33:       to distribute the corresponding machine-readable source code,
                     34:       under the above conditions, to any one, in return for reimbursement
                     35:       of the cost of distribution.   Verbatim redistribution of the
                     36:       written offer must be permitted.  Or,
                     37:     C.  it is distributed by someone who received only the
                     38:       compiled or executable form, and is accompanied by a copy of the
                     39:       written offer of source code which he received along with it.
                     40: 
                     41:    Permission is granted to distribute this program (verbatim or as modified)
                     42:    in executable form as part of a larger system provided that the source
                     43:    code for this program, including any modifications used,
                     44:    is also distributed or offered as stated in the preceding paragraph.
                     45: 
                     46: In other words, you are welcome to use, share and improve this program.
                     47: You are forbidden to forbid anyone else to use, share and improve
                     48: what you give them.   Help stamp out software-hoarding!  */
                     49: 
                     50: 
                     51: /* To test, compile with -Dtest.
                     52:  This Dtestable feature turns this into a self-contained program
                     53:  which reads a pattern, describes how it compiles,
                     54:  then reads a string and searches for it.  */
                     55: 
                     56: 
                     57: #ifdef emacs
                     58: 
                     59: /* The `emacs' switch turns on certain special matching commands
                     60:  that make sense only in emacs. */
                     61: 
                     62: #include "config.h"
                     63: #include "lisp.h"
                     64: #include "buffer.h"
                     65: #include "syntax.h"
                     66: 
                     67: #else  /* not emacs */
                     68: 
                     69: /*
                     70:  * Define the syntax stuff, so we can do the \<...\> things.
                     71:  */
                     72: #define Sword 1
                     73: 
                     74: #define SYNTAX(c) syntax_table[c]
                     75: 
                     76: static char syntax_table[256];
                     77: 
                     78: #endif /* not emacs */
                     79: 
                     80: #include "regex.h"
                     81: 
                     82: /* Number of failure points to allocate space for initially,
                     83:  when matching.  If this number is exceeded, more space is allocated,
                     84:  so it is not a hard limit.  */
                     85: 
                     86: #ifndef NFAILURES
                     87: #define NFAILURES 80
                     88: #endif NFAILURES
                     89: 
                     90: /* width of a byte in bits */
                     91: 
                     92: #define BYTEWIDTH 8
                     93: 
                     94: /* These are the command codes that appear in compiled regular expressions, one per byte.
                     95:   Some command codes are followed by argument bytes.
                     96:   A command code can specify any interpretation whatever for its arguments.
                     97:   Zero-bytes may appear in the compiled regular expression. */
                     98: 
                     99: enum regexpcode
                    100:   {
                    101:     unused,
                    102:     exactn,    /* followed by one byte giving n, and then by n literal bytes */
                    103:     begline,   /* fails unless at beginning of line */
                    104:     endline,   /* fails unless at end of line */
                    105:     jump,       /* followed by two bytes giving relative address to jump to */
                    106:     on_failure_jump,    /* followed by two bytes giving relative address of place
                    107:                            to resume at in case of failure. */
                    108:     finalize_jump,      /* Throw away latest failure point and then jump to address. */
                    109:     maybe_finalize_jump, /* Like jump but finalize if safe to do so.
                    110:                            This is used to jump back to the beginning
                    111:                            of a repeat.  If the command that follows
                    112:                            this jump is clearly incompatible with the
                    113:                            one at the beginning of the repeat, such that
                    114:                            we can be sure that there is no use backtracking
                    115:                            out of repetitions already completed,
                    116:                            then we finalize. */
                    117:     dummy_failure_jump,  /* jump, and push a dummy failure point.
                    118:                            This failure point will be thrown away
                    119:                            if an attempt is made to use it for a failure.
                    120:                            A + construct makes this before the first repeat.  */
                    121:     anychar,    /* matches any one character */
                    122:     charset,     /* matches any one char belonging to specified set.
                    123:                    First following byte is # bitmap bytes.
                    124:                    Then come bytes for a bit-map saying which chars are in.
                    125:                    Bits in each byte are ordered low-bit-first.
                    126:                    A character is in the set if its bit is 1.
                    127:                    A character too large to have a bit in the map
                    128:                    is automatically not in the set */
                    129:     charset_not, /* similar but match any character that is NOT one of those specified */
                    130:     start_memory, /* starts remembering the text that is matched
                    131:                    and stores it in a memory register.
                    132:                    followed by one byte containing the register number.
                    133:                    Register numbers must be in the range 0 through NREGS. */
                    134:     stop_memory, /* stops remembering the text that is matched
                    135:                    and stores it in a memory register.
                    136:                    followed by one byte containing the register number.
                    137:                    Register numbers must be in the range 0 through NREGS. */
                    138:     duplicate,    /* match a duplicate of something remembered.
                    139:                    Followed by one byte containing the index of the memory register. */
                    140:     before_dot,         /* Succeeds if before dot */
                    141:     at_dot,     /* Succeeds if at dot */
                    142:     after_dot,  /* Succeeds if after dot */
                    143:     begbuf,      /* Succeeds if at beginning of buffer */
                    144:     endbuf,      /* Succeeds if at end of buffer */
                    145:     wordchar,    /* Matches any word-constituent character */
                    146:     notwordchar, /* Matches any char that is not a word-constituent */
                    147:     wordbeg,    /* Succeeds if at word beginning */
                    148:     wordend,    /* Succeeds if at word end */
                    149:     wordbound,   /* Succeeds if at a word boundary */
                    150:     notwordbound, /* Succeeds if not at a word boundary */
                    151:     syntaxspec,  /* Matches any character whose syntax is specified.
                    152:                    followed by a byte which contains a syntax code, Sword or such like */
                    153:     notsyntaxspec /* Matches any character whose syntax differs from the specified. */
                    154:   };
                    155: 
                    156: #ifndef SIGN_EXTEND_CHAR
                    157: #define SIGN_EXTEND_CHAR(x) (x)
                    158: #endif
                    159: 
                    160: /* compile_pattern takes a regular-expression descriptor string in the user's format
                    161:   and converts it into a buffer full of byte commands for matching.
                    162: 
                    163:   pattern   is the address of the pattern string
                    164:   size      is the length of it.
                    165:   bufp     is a  struct re_pattern_buffer *  which points to the info
                    166:            on where to store the byte commands.
                    167:            This structure contains a  char *  which points to the
                    168:            actual space, which should have been obtained with malloc.
                    169:            compile_pattern may use  realloc  to grow the buffer space.
                    170: 
                    171:   The number of bytes of commands can be found out by looking in
                    172:   the  struct re_pattern_buffer  that bufp pointed to,
                    173:   after compile_pattern returns.
                    174: */
                    175: 
                    176: #define PATPUSH(ch) (*b++ = (char) (ch))
                    177: 
                    178: #define PATFETCH(c) \
                    179:  {if (p == pend) goto end_of_pattern; \
                    180:   c = * (unsigned char *) p++; \
                    181:   if (translate) c = translate[c]; }
                    182: 
                    183: #define PATFETCH_RAW(c) \
                    184:  {if (p == pend) goto end_of_pattern; \
                    185:   c = * (unsigned char *) p++; }
                    186: 
                    187: #define PATUNFETCH p--
                    188: 
                    189: #define EXTEND_BUFFER \
                    190:   { old_buffer = bufp->buffer; \
                    191:     if (bufp->allocated == (1<<16)) goto too_big; \
                    192:     bufp->allocated *= 2; \
                    193:     if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
                    194:     if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
                    195:       goto memory_exhausted; \
                    196:     c = bufp->buffer - old_buffer; \
                    197:     b += c; \
                    198:     if (fixup_jump) \
                    199:       fixup_jump += c; \
                    200:     if (laststart) \
                    201:       laststart += c; \
                    202:     begalt += c; \
                    203:     if (pending_exact) \
                    204:       pending_exact += c; \
                    205:   }
                    206: 
                    207: static int store_jump (), insert_jump ();
                    208: 
                    209: char *
                    210: re_compile_pattern (pattern, size, bufp)
                    211:      char *pattern;
                    212:      int size;
                    213:      struct re_pattern_buffer *bufp;
                    214: {
                    215:   register char *b = bufp->buffer;
                    216:   register char *p = pattern;
                    217:   char *pend = pattern + size;
                    218:   register unsigned c, c1;
                    219:   char *p1;
                    220:   unsigned char *translate = (unsigned char *) bufp->translate;
                    221: 
                    222:   /* Temporary used when buffer is made bigger. */
                    223: 
                    224:   char *old_buffer;
                    225: 
                    226:   /* address of the count-byte of the most recently inserted "exactn" command.
                    227:     This makes it possible to tell whether a new exact-match character
                    228:     can be added to that command or requires a new "exactn" command. */
                    229:      
                    230:   char *pending_exact = 0;
                    231: 
                    232:   /* address of the place where a forward-jump should go
                    233:     to the end of the containing expression.
                    234:     Each alternative of an "or", except the last, ends with a forward-jump
                    235:     of this sort. */
                    236: 
                    237:   char *fixup_jump = 0;
                    238: 
                    239:   /* address of start of the most recently finished expression.
                    240:     This tells postfix * where to find the start of its operand. */
                    241: 
                    242:   char *laststart = 0;
                    243: 
                    244:   /* In processing a repeat, 1 means zero matches is allowed */
                    245: 
                    246:   char zero_times_ok;
                    247: 
                    248:   /* In processing a repeat, 1 means many matches is allowed */
                    249: 
                    250:   char many_times_ok;
                    251: 
                    252:   /* address of beginning of regexp, or inside of last \( */
                    253: 
                    254:   char *begalt = b;
                    255: 
                    256:   /* Stack of information saved by \( and restored by \).
                    257:      Four stack elements are pushed by each \(:
                    258:        First, the value of b.
                    259:        Second, the value of fixup_jump.
                    260:        Third, the value of regnum.
                    261:        Fourth, the value of begalt.  */
                    262: 
                    263:   int stackb[40];
                    264:   int *stackp = stackb;
                    265:   int *stacke = stackb + 40;
                    266:   int *stackt;
                    267: 
                    268:   /* Counts \('s as they are encountered.  Remembered for the matching \),
                    269:      where it becomes the "register number" to put in the stop_memory command */
                    270: 
                    271:   int regnum = 1;
                    272: 
                    273:   bufp->fastmap_accurate = 0;
                    274: 
                    275: #ifndef emacs
                    276:   /*
                    277:    * Initialize the syntax table.
                    278:    */
                    279:    init_syntax_once();
                    280: #endif emacs
                    281: 
                    282:   while (p != pend)
                    283:     {
                    284:       if (b - bufp->buffer
                    285:          > bufp->allocated - 10)
                    286:        /* Note that EXTEND_BUFFER clobbers c */
                    287:        EXTEND_BUFFER;
                    288: 
                    289:       PATFETCH (c);
                    290: 
                    291:       switch (c)
                    292:        {
                    293:        case '$':
                    294:          /* $ means succeed if at end of line, but only in special contexts.
                    295:            If randonly in the middle of a pattern, it is a normal character. */
                    296:          if (p == pend || (*p == '\\' && (p[1] == ')' || p[1] == '|')))
                    297:            {
                    298:              PATPUSH (endline);
                    299:              break;
                    300:            }
                    301:          goto normal_char;
                    302: 
                    303:        case '^':
                    304:          /* ^ means succeed if at beg of line, but only if no preceding pattern. */
                    305:          if (laststart) goto normal_char;
                    306:          PATPUSH (begline);
                    307:          break;
                    308: 
                    309:        case '*':
                    310:        case '+':
                    311:        case '?':
                    312:          /* If there is no previous pattern, char not special. */
                    313:          if (!laststart)
                    314:            goto normal_char;
                    315:          /* If there is a sequence of repetition chars,
                    316:             collapse it down to equivalent to just one.  */
                    317:          zero_times_ok = 0;
                    318:          many_times_ok = 0;
                    319:          while (1)
                    320:            {
                    321:              zero_times_ok |= c != '+';
                    322:              many_times_ok |= c != '?';
                    323:              if (p == pend)
                    324:                break;
                    325:              PATFETCH (c);
                    326:              if (!(c == '*' || c == '+' || c == '?'))
                    327:                {
                    328:                  PATUNFETCH;
                    329:                  break;
                    330:                }
                    331:            }
                    332: 
                    333:          /* Now we know whether 0 matches is allowed,
                    334:             and whether 2 or more matches is allowed.  */
                    335:          if (many_times_ok)
                    336:            {
                    337:              /* If more than one repetition is allowed,
                    338:                 put in a backward jump at the end.  */
                    339:              store_jump (b, maybe_finalize_jump, laststart - 3);
                    340:              b += 3;
                    341:            }
                    342:          insert_jump (on_failure_jump, laststart, b + 3, b);
                    343:          pending_exact = 0;
                    344:          b += 3;
                    345:          if (!zero_times_ok)
                    346:            {
                    347:              /* At least one repetition required: insert before the loop
                    348:                 a skip over the initial on-failure-jump instruction */
                    349:              insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
                    350:              b += 3;
                    351:            }
                    352:          break;
                    353: 
                    354:        case '.':
                    355:          laststart = b;
                    356:          PATPUSH (anychar);
                    357:          break;
                    358: 
                    359:        case '[':
                    360:          if (b - bufp->buffer
                    361:              > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
                    362:            /* Note that EXTEND_BUFFER clobbers c */
                    363:            EXTEND_BUFFER;
                    364: 
                    365:          laststart = b;
                    366:          if (*p == '^')
                    367:            PATPUSH (charset_not), p++;
                    368:          else
                    369:            PATPUSH (charset);
                    370:          p1 = p;
                    371: 
                    372:          PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
                    373:          /* Clear the whole map */
                    374:          bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
                    375:          /* Read in characters and ranges, setting map bits */
                    376:          while (1)
                    377:            {
                    378:              PATFETCH (c);
                    379:              if (c == ']' && p != p1 + 1) break;
                    380:              if (*p == '-')
                    381:                {
                    382:                  PATFETCH (c1);
                    383:                  PATFETCH (c1);
                    384:                  while (c <= c1)
                    385:                    b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
                    386:                }
                    387:              else
                    388:                {
                    389:                  b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
                    390:                }
                    391:            }
                    392:          /* Discard any bitmap bytes that are all 0 at the end of the map.
                    393:             Decrement the map-length byte too. */
                    394:          while (b[-1] > 0 && b[b[-1] - 1] == 0)
                    395:            b[-1]--;
                    396:          b += b[-1];
                    397:          break;
                    398: 
                    399:         case '\\':
                    400:          if (p == pend) goto invalid_pattern;
                    401:          PATFETCH_RAW (c);
                    402:          switch (c)
                    403:            {
                    404:            case '(':
                    405:              if (stackp == stacke) goto nesting_too_deep;
                    406:              if (regnum < RE_NREGS)
                    407:                {
                    408:                  PATPUSH (start_memory);
                    409:                  PATPUSH (regnum);
                    410:                }
                    411:              *stackp++ = b - bufp->buffer;
                    412:              *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
                    413:              *stackp++ = regnum++;
                    414:              *stackp++ = begalt - bufp->buffer;
                    415:              fixup_jump = 0;
                    416:              laststart = 0;
                    417:              begalt = b;
                    418:              break;
                    419: 
                    420:            case ')':
                    421:              if (stackp == stackb) goto unmatched_close;
                    422:              begalt = *--stackp + bufp->buffer;
                    423:              if (fixup_jump)
                    424:                store_jump (fixup_jump, jump, b);
                    425:              if (stackp[-1] < RE_NREGS)
                    426:                {
                    427:                  PATPUSH (stop_memory);
                    428:                  PATPUSH (stackp[-1]);
                    429:                }
                    430:              stackp -= 2;
                    431:              fixup_jump = 0;
                    432:              if (*stackp)
                    433:                fixup_jump = *stackp + bufp->buffer - 1;
                    434:              laststart = *--stackp + bufp->buffer;
                    435:              break;
                    436: 
                    437:            case '|':
                    438:              insert_jump (on_failure_jump, begalt, b + 6, b);
                    439:              pending_exact = 0;
                    440:              b += 3;
                    441:              if (fixup_jump)
                    442:                store_jump (fixup_jump, jump, b);
                    443:              fixup_jump = b;
                    444:              b += 3;
                    445:              laststart = 0;
                    446:              begalt = b;
                    447:              break;
                    448: 
                    449: #ifdef emacs
                    450:            case '=':
                    451:              PATPUSH (at_dot);
                    452:              break;
                    453: 
                    454:            case 's':   
                    455:              laststart = b;
                    456:              PATPUSH (syntaxspec);
                    457:              PATFETCH (c);
                    458:              PATPUSH (syntax_spec_code[c]);
                    459:              break;
                    460: 
                    461:            case 'S':
                    462:              laststart = b;
                    463:              PATPUSH (notsyntaxspec);
                    464:              PATFETCH (c);
                    465:              PATPUSH (syntax_spec_code[c]);
                    466:              break;
                    467: #endif emacs
                    468: 
                    469:            case 'w':
                    470:              laststart = b;
                    471:              PATPUSH (wordchar);
                    472:              break;
                    473: 
                    474:            case 'W':
                    475:              laststart = b;
                    476:              PATPUSH (notwordchar);
                    477:              break;
                    478: 
                    479:            case '<':
                    480:              PATPUSH (wordbeg);
                    481:              break;
                    482: 
                    483:            case '>':
                    484:              PATPUSH (wordend);
                    485:              break;
                    486: 
                    487:            case 'b':
                    488:              PATPUSH (wordbound);
                    489:              break;
                    490: 
                    491:            case 'B':
                    492:              PATPUSH (notwordbound);
                    493:              break;
                    494: 
                    495:            case '`':
                    496:              PATPUSH (begbuf);
                    497:              break;
                    498: 
                    499:            case '\'':
                    500:              PATPUSH (endbuf);
                    501:              break;
                    502: 
                    503:            case '1':
                    504:            case '2':
                    505:            case '3':
                    506:            case '4':
                    507:            case '5':
                    508:            case '6':
                    509:            case '7':
                    510:            case '8':
                    511:            case '9':
                    512:              c1 = c - '0';
                    513:              if (c1 >= regnum)
                    514:                goto normal_char;
                    515:              for (stackt = stackp - 2;  stackt > stackb;  stackt -= 4)
                    516:                if (*stackt == c1)
                    517:                  goto normal_char;
                    518:              laststart = b;
                    519:              PATPUSH (duplicate);
                    520:              PATPUSH (c1);
                    521:              break;
                    522:            default:
                    523:              goto normal_char;
                    524:            }
                    525:          break;
                    526: 
                    527:        default:
                    528:        normal_char:
                    529:          if (!pending_exact || pending_exact + *pending_exact + 1 != b
                    530:              || *pending_exact == 0177 || *p == '*' || *p == '^'
                    531:              || *p == '+' || *p == '?')
                    532:            {
                    533:              laststart = b;
                    534:              PATPUSH (exactn);
                    535:              pending_exact = b;
                    536:              PATPUSH (0);
                    537:            }
                    538:          PATPUSH (c);
                    539:          (*pending_exact)++;
                    540:        }
                    541:     }
                    542: 
                    543:   if (fixup_jump)
                    544:     store_jump (fixup_jump, jump, b);
                    545: 
                    546:   if (stackp != stackb) goto unmatched_open;
                    547: 
                    548:   bufp->used = b - bufp->buffer;
                    549:   return 0;
                    550: 
                    551:  invalid_pattern:
                    552:   return "Invalid regular expression";
                    553: 
                    554:  unmatched_open:
                    555:   return "Unmatched \\(";
                    556: 
                    557:  unmatched_close:
                    558:   return "Unmatched \\)";
                    559: 
                    560:  end_of_pattern:
                    561:   return "Premature end of regular expression";
                    562: 
                    563:  nesting_too_deep:
                    564:   return "Nesting too deep";
                    565: 
                    566:  too_big:
                    567:   return "Regular expression too big";
                    568: 
                    569:  memory_exhausted:
                    570:   return "Memory exhausted";
                    571: }
                    572: 
                    573: #ifndef emacs
                    574: init_syntax_once ()
                    575: {
                    576:    register int c;
                    577:    static int done = 0;
                    578: 
                    579:    if (done)
                    580:      return;
                    581: 
                    582:    bzero (syntax_table, sizeof syntax_table);
                    583: 
                    584:    for (c = 'a'; c <= 'z'; c++)
                    585:      syntax_table[c] = Sword;
                    586: 
                    587:    for (c = 'A'; c <= 'Z'; c++)
                    588:      syntax_table[c] = Sword;
                    589: 
                    590:    for (c = '0'; c <= '9'; c++)
                    591:      syntax_table[c] = Sword;
                    592: 
                    593:    done = 1;
                    594: }
                    595: #endif not emacs
                    596: 
                    597: /* Store where `from' points a jump operation to jump to where `to' points.
                    598:   `opcode' is the opcode to store. */
                    599: 
                    600: static int
                    601: store_jump (from, opcode, to)
                    602:      char *from, *to;
                    603:      char opcode;
                    604: {
                    605:   from[0] = opcode;
                    606:   from[1] = (to - (from + 3)) & 0377;
                    607:   from[2] = (to - (from + 3)) >> 8;
                    608: }
                    609: 
                    610: /* Open up space at char FROM, and insert there a jump to TO.
                    611:    CURRENT_END gives te end of the storage no in use,
                    612:    so we know how much data to copy up.
                    613:    OP is the opcode of the jump to insert.
                    614: 
                    615:    If you call this function, you must zero out pending_exact.  */
                    616: 
                    617: static int
                    618: insert_jump (op, from, to, current_end)
                    619:      char op;
                    620:      char *from, *to, *current_end;
                    621: {
                    622:   register char *pto = current_end + 3;
                    623:   register char *pfrom = current_end;
                    624:   while (pfrom != from)
                    625:     *--pto = *--pfrom;
                    626:   store_jump (from, op, to);
                    627: }
                    628: 
                    629: /* Given a pattern, compute a fastmap from it.
                    630:  The fastmap records which of the (1 << BYTEWIDTH) possible characters
                    631:  can start a string that matches the pattern.
                    632:  This fastmap is used by re_search to skip quickly over totally implausible text.
                    633: 
                    634:  The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
                    635:  as bufp->fastmap.
                    636:  The other components of bufp describe the pattern to be used.  */
                    637: 
                    638: re_compile_fastmap (bufp)
                    639:      struct re_pattern_buffer *bufp;
                    640: {
                    641:   char *pattern = bufp->buffer;
                    642:   int size = bufp->used;
                    643:   register char *fastmap = bufp->fastmap;
                    644:   register char *p = pattern;
                    645:   register char *pend = pattern + size;
                    646:   register int j, k;
                    647:   unsigned char *translate = (unsigned char *) bufp->translate;
                    648: 
                    649:   char *stackb[NFAILURES];
                    650:   char **stackp = stackb;
                    651: 
                    652:   bzero (fastmap, (1 << BYTEWIDTH));
                    653:   bufp->fastmap_accurate = 1;
                    654:   bufp->can_be_null = 0;
                    655:       
                    656:   while (p)
                    657:     {
                    658:       if (p == pend)
                    659:        {
                    660:          bufp->can_be_null = 1;
                    661:          break;
                    662:        }
                    663: #ifdef SWITCH_ENUM_BUG
                    664:       switch ((int) ((enum regexpcode) *p++))
                    665: #else
                    666:       switch ((enum regexpcode) *p++)
                    667: #endif
                    668:        {
                    669:        case exactn:
                    670:          if (translate)
                    671:            fastmap[translate[p[1]]] = 1;
                    672:          else
                    673:            fastmap[p[1]] = 1;
                    674:          break;
                    675: 
                    676:         case begline:
                    677:         case before_dot:
                    678:        case at_dot:
                    679:        case after_dot:
                    680:        case begbuf:
                    681:        case endbuf:
                    682:        case wordbound:
                    683:        case notwordbound:
                    684:        case wordbeg:
                    685:        case wordend:
                    686:          continue;
                    687: 
                    688:        case endline:
                    689:          if (translate)
                    690:            fastmap[translate['\n']] = 1;
                    691:          else
                    692:            fastmap['\n'] = 1;
                    693:          bufp->can_be_null = 1;
                    694:          break;
                    695: 
                    696:        case finalize_jump:
                    697:        case maybe_finalize_jump:
                    698:        case jump:
                    699:        case dummy_failure_jump:
                    700:          bufp->can_be_null = 1;
                    701:          j = *p++ & 0377;
                    702:          j += SIGN_EXTEND_CHAR (*p++) << 8;
                    703:          p += j;
                    704:          if (j > 0)
                    705:            continue;
                    706:          /* Jump backward reached implies we just went through
                    707:             the body of a loop and matched nothing.
                    708:             Opcode jumped to should be an on_failure_jump.
                    709:             Just treat it like an ordinary jump.
                    710:             For a * loop, it has pushed its failure point already;
                    711:             if so, discard that as redundant.  */
                    712:          if ((enum regexpcode) *p != on_failure_jump)
                    713:            continue;
                    714:          p++;
                    715:          j = *p++ & 0377;
                    716:          j += SIGN_EXTEND_CHAR (*p++) << 8;
                    717:          p += j;
                    718:          if (stackp != stackb && *stackp == p)
                    719:            stackp--;
                    720:          continue;
                    721:          
                    722:        case on_failure_jump:
                    723:          j = *p++ & 0377;
                    724:          j += SIGN_EXTEND_CHAR (*p++) << 8;
                    725:          *++stackp = p + j;
                    726:          continue;
                    727: 
                    728:        case start_memory:
                    729:        case stop_memory:
                    730:          p++;
                    731:          continue;
                    732: 
                    733:        case duplicate:
                    734:          bufp->can_be_null = 1;
                    735:        case anychar:
                    736:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                    737:            fastmap[j] = 1;
                    738:          return;
                    739: 
                    740:        case wordchar:
                    741:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                    742:            if (SYNTAX (j) == Sword)
                    743:              fastmap[j] = 1;
                    744:          break;
                    745: 
                    746:        case notwordchar:
                    747:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                    748:            if (SYNTAX (j) != Sword)
                    749:              fastmap[j] = 1;
                    750:          break;
                    751: 
                    752: #ifdef emacs
                    753:        case syntaxspec:
                    754:          k = *p++;
                    755:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                    756:            if (SYNTAX (j) == (enum syntaxcode) k)
                    757:              fastmap[j] = 1;
                    758:          break;
                    759: 
                    760:        case notsyntaxspec:
                    761:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                    762:            if (SYNTAX (j) != (enum syntaxcode) k)
                    763:              fastmap[j] = 1;
                    764:          break;
                    765: #endif emacs
                    766: 
                    767:        case charset:
                    768:          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
                    769:            if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
                    770:              {
                    771:                if (translate)
                    772:                  fastmap[translate[j]] = 1;
                    773:                else
                    774:                  fastmap[j] = 1;
                    775:              }
                    776:          break;
                    777: 
                    778:        case charset_not:
                    779:          /* Chars beyond end of map must be allowed */
                    780:          for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
                    781:            if (translate)
                    782:              fastmap[translate[j]] = 1;
                    783:            else
                    784:              fastmap[j] = 1;
                    785: 
                    786:          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
                    787:            if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
                    788:              {
                    789:                if (translate)
                    790:                  fastmap[translate[j]] = 1;
                    791:                else
                    792:                  fastmap[j] = 1;
                    793:              }
                    794:          break;
                    795:        }
                    796: 
                    797:       /* Get here means we have successfully found the possible starting characters
                    798:         of one path of the pattern.  We need not follow this path any farther.
                    799:         Instead, look at the next alternative remembered in the stack. */
                    800:       if (stackp != stackb)
                    801:        p = *stackp--;
                    802:       else
                    803:        break;
                    804:     }
                    805: }
                    806: 
                    807: /* Like re_search_2, below, but only one string is specified. */
                    808: 
                    809: re_search (pbufp, string, size, startpos, range, regs)
                    810:      struct re_pattern_buffer *pbufp;
                    811:      char *string;
                    812:      int size, startpos, range;
                    813:      struct re_registers *regs;
                    814: {
                    815:   return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
                    816: }
                    817: 
                    818: /* Like re_match_2 but tries first a match starting at index `startpos',
                    819:  then at startpos + 1, and so on.
                    820:  `range' is the number of places to try before giving up.
                    821:  If `range' is negative, the starting positions tried are
                    822:   startpos, startpos - 1, etc.
                    823:  It is up to the caller to make sure that range is not so large
                    824:   as to take the starting position outside of the input strings.
                    825: 
                    826: The value returned is the position at which the match was found,
                    827:  or -1 if no match was found. */
                    828: 
                    829: int
                    830: re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
                    831:      struct re_pattern_buffer *pbufp;
                    832:      char *string1, *string2;
                    833:      int size1, size2;
                    834:      int startpos;
                    835:      register int range;
                    836:      struct re_registers *regs;
                    837:      int mstop;
                    838: {
                    839:   register char *fastmap = pbufp->fastmap;
                    840:   register char *translate = pbufp->translate;
                    841:   int total = size1 + size2;
                    842: 
                    843:   /* Update the fastmap now if not correct already */
                    844:   if (fastmap && !pbufp->fastmap_accurate)
                    845:     re_compile_fastmap (pbufp);
                    846: 
                    847:   while (1)
                    848:     {
                    849:       /* If a fastmap is supplied, skip quickly over characters
                    850:         that cannot possibly be the start of a match.
                    851:         Note, however, that if the pattern can possibly match
                    852:         the null string, we must test it at each starting point
                    853:         so that we take the first null string we get.  */
                    854: 
                    855:       if (fastmap && startpos < total && !pbufp->can_be_null)
                    856:        {
                    857:          if (range > 0)
                    858:            {
                    859:              register int lim = 0;
                    860:              register char *p;
                    861:              int irange = range;
                    862:              if (startpos < size1 && startpos + range >= size1)
                    863:                lim = range - (size1 - startpos);
                    864: 
                    865:              p = &(startpos >= size1 ? string2 - size1 : string1)[startpos];
                    866: 
                    867:              if (translate)
                    868:                {
                    869:                  while (range > lim && !fastmap[translate[*p++]])
                    870:                    range--;
                    871:                }
                    872:              else
                    873:                {
                    874:                  while (range > lim && !fastmap[*p++])
                    875:                    range--;
                    876:                }
                    877:              startpos += irange - range;
                    878:            }
                    879:          else
                    880:            {
                    881:              register char c;
                    882:              if (startpos >= size1) c = string2[startpos - size1];
                    883:              else c = string1[startpos];
                    884:              if (translate ? !fastmap[translate[c]] : !fastmap[c])
                    885:                goto advance;
                    886:            }
                    887:        }
                    888: 
                    889:       if (range >= 0 && startpos == total
                    890:          && fastmap && !pbufp->can_be_null)
                    891:        return -1;
                    892: 
                    893:       if (0 <= re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop))
                    894:        return startpos;
                    895: 
                    896:     advance:
                    897:       if (!range) break;
                    898:       if (range > 0) range--, startpos++; else range++, startpos--;
                    899:     }
                    900:   return -1;
                    901: }
                    902: 
                    903: #ifndef emacs   /* emacs never uses this */
                    904: re_match (pbufp, string, size, pos, regs)
                    905:      struct re_pattern_buffer *pbufp;
                    906:      char *string;
                    907:      int size, pos;
                    908:      struct re_registers *regs;
                    909: {
                    910:   return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
                    911: }
                    912: #endif /* emacs */
                    913: 
                    914: /* Match the pattern described by `pbufp'
                    915:   against data which is the virtual concatenation of `string1' and `string2'.
                    916:   `size1' and `size2' are the sizes of the two data strings.
                    917:   Start the match at position `pos'.
                    918:   Do not consider matching past the position `mstop'.
                    919: 
                    920:   If pbufp->fastmap is nonzero, then it had better be up to date.
                    921: 
                    922:   The reason that the data to match is specified as two components
                    923:   which are to be regarded as concatenated
                    924:   is so that this function can be used directly on the contents of an Emacs buffer.
                    925: 
                    926:   -1 is returned if there is no match.  Otherwise the value is the length
                    927:   of the substring which was matched.
                    928: */
                    929: 
                    930: int
                    931: re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
                    932:      struct re_pattern_buffer *pbufp;
                    933:      char *string1, *string2;
                    934:      int size1, size2;
                    935:      int pos;
                    936:      struct re_registers *regs;
                    937:      int mstop;
                    938: {
                    939:   register char *p = pbufp->buffer;
                    940:   register char *pend = p + pbufp->used;
                    941:   /* End of first string */
                    942:   char *end1;
                    943:   /* End of second string */
                    944:   char *end2;
                    945:   /* Pointer just past last char to consider matching */
                    946:   char *end_match_1, *end_match_2;
                    947:   register char *d, *dend;
                    948:   register int mcnt;
                    949:   char *translate = pbufp->translate;
                    950: 
                    951:  /* Failure point stack.  Each place that can handle a failure further down the line
                    952:     pushes a failure point on this stack.  It consists of two char *'s.
                    953:     The first one pushed is where to resume scanning the pattern;
                    954:     the second pushed is where to resume scanning the strings.
                    955:     If the latter is zero, the failure point is a "dummy".
                    956:     If a failure happens and the innermost failure point is dormant,
                    957:     it discards that failure point and tries the next one. */
                    958: 
                    959:   char **stackb = (char **) alloca (2 * NFAILURES * sizeof (char *));
                    960:   char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
                    961: 
                    962:   /* Information on the "contents" of registers.
                    963:      These are pointers into the input strings; they record
                    964:      just what was matched (on this attempt) by some part of the pattern.
                    965:      The start_memory command stores the start of a register's contents
                    966:      and the stop_memory command stores the end.
                    967: 
                    968:      At that point, regstart[regnum] points to the first character in the register,
                    969:      regend[regnum] points to the first character beyond the end of the register,
                    970:      and regstart_segend[regnum] is either the same as regend[regnum]
                    971:      or else points to the end of the input string into which regstart[regnum] points.
                    972:      The latter case happens when regstart[regnum] is in string1 and
                    973:      regend[regnum] is in string2.  */
                    974: 
                    975:   char *regstart[RE_NREGS];
                    976:   char *regstart_segend[RE_NREGS];
                    977:   char *regend[RE_NREGS];
                    978: 
                    979:   /* Set up pointers to ends of strings.
                    980:      Don't allow the second string to be empty unless both are empty.  */
                    981:   if (!size2)
                    982:     {
                    983:       string2 = string1;
                    984:       size2 = size1;
                    985:       string1 = 0;
                    986:       size1 = 0;
                    987:     }
                    988:   end1 = string1 + size1;
                    989:   end2 = string2 + size2;
                    990: 
                    991:   /* Compute where to stop matching, within the two strings */
                    992:   if (mstop <= size1)
                    993:     {
                    994:       end_match_1 = string1 + mstop;
                    995:       end_match_2 = string2;
                    996:     }
                    997:   else
                    998:     {
                    999:       end_match_1 = end1;
                   1000:       end_match_2 = string2 + mstop - size1;
                   1001:     }
                   1002: 
                   1003:   /* Initialize \( and \) text positions to -1
                   1004:      to mark ones that no \( or \) has been seen for.  */
                   1005: 
                   1006:   for (mcnt = 0; mcnt < sizeof (regstart) / sizeof (*regstart); mcnt++)
                   1007:     regstart[mcnt] = (char *) -1;
                   1008: 
                   1009:   /* `p' scans through the pattern as `d' scans through the data.
                   1010:      `dend' is the end of the input string that `d' points within.
                   1011:      `d' is advanced into the following input string whenever necessary,
                   1012:      but this happens before fetching;
                   1013:      therefore, at the beginning of the loop,
                   1014:      `d' can be pointing at the end of a string,
                   1015:      but it cannot equal string2.  */
                   1016: 
                   1017:   if (pos <= size1)
                   1018:     d = string1 + pos, dend = end_match_1;
                   1019:   else
                   1020:     d = string2 + pos - size1, dend = end_match_2;
                   1021: 
                   1022: /* Write PREFETCH; just before fetching a character with *d.  */
                   1023: #define PREFETCH \
                   1024:  while (d == dend)                                                 \
                   1025:   { if (dend == end_match_2) goto fail;  /* end of string2 => failure */   \
                   1026:     d = string2;  /* end of string1 => advance to string2. */       \
                   1027:     dend = end_match_2; }
                   1028: 
                   1029:   /* This loop loops over pattern commands.
                   1030:      It exits by returning from the function if match is complete,
                   1031:      or it drops through if match fails at this starting point in the input data. */
                   1032: 
                   1033:   while (1)
                   1034:     {
                   1035:       if (p == pend)
                   1036:        /* End of pattern means we have succeeded! */
                   1037:        {
                   1038:          /* If caller wants register contents data back, convert it to indices */
                   1039:          if (regs)
                   1040:            {
                   1041:              bzero (regs, sizeof (*regs));
                   1042: 
                   1043:              regend[0] = d;
                   1044:              regstart[0] = string1;
                   1045:              for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
                   1046:                {
                   1047:                  if (mcnt && regstart[mcnt] == (char *) -1) continue;
                   1048:                  if (regstart[mcnt] - string1 < 0 || regstart[mcnt] - string1 > size1)
                   1049:                    regs->start[mcnt] = regstart[mcnt] - string2 + size1;
                   1050:                  else
                   1051:                    regs->start[mcnt] = regstart[mcnt] - string1;
                   1052:                  if (regend[mcnt] - string1 < 0 || regend[mcnt] - string1 > size1)
                   1053:                    regs->end[mcnt] = regend[mcnt] - string2 + size1;
                   1054:                  else
                   1055:                    regs->end[mcnt] = regend[mcnt] - string1;
                   1056:                }
                   1057:              regs->start[0] = pos;
                   1058:            }
                   1059:          if (d - string1 >= 0 && d - string1 <= size1)
                   1060:            return d - string1 - pos;
                   1061:          else
                   1062:            return d - string2 + size1 - pos;
                   1063:        }
                   1064: 
                   1065:       /* Otherwise match next pattern command */
                   1066: #ifdef SWITCH_ENUM_BUG
                   1067:       switch ((int) ((enum regexpcode) *p++))
                   1068: #else
                   1069:       switch ((enum regexpcode) *p++)
                   1070: #endif
                   1071:        {
                   1072: 
                   1073:        /* \( is represented by a start_memory, \) by a stop_memory.
                   1074:            Both of those commands contain a "register number" argument.
                   1075:            The text matched within the \( and \) is recorded under that number.
                   1076:            Then, \<digit> turns into a `duplicate' command which
                   1077:            is followed by the numeric value of <digit> as the register number. */
                   1078: 
                   1079:        case start_memory:
                   1080:          regstart[*p] = d;
                   1081:          regstart_segend[*p++] = dend;
                   1082:          break;
                   1083: 
                   1084:        case stop_memory:
                   1085:          regend[*p] = d;
                   1086:          if (regstart_segend[*p] == dend)
                   1087:            regstart_segend[*p] = d;
                   1088:          p++;
                   1089:          break;
                   1090: 
                   1091:        case duplicate:
                   1092:          {
                   1093:            int regno = *p++;   /* Get which register to match against */
                   1094:            register char *d2, *dend2;
                   1095: 
                   1096:            d2 = regstart[regno];
                   1097:            dend2 = regstart_segend[regno];
                   1098:            while (1)
                   1099:              {
                   1100:                /* Advance to next segment in register contents, if necessary */
                   1101:                while (d2 == dend2)
                   1102:                  {
                   1103:                    if (dend2 == end_match_2) break;
                   1104:                    if (dend2 == regend[regno]) break;
                   1105:                    d2 = string2, dend2 = regend[regno];  /* end of string1 => advance to string2. */
                   1106:                  }
                   1107:                /* At end of register contents => success */
                   1108:                if (d2 == dend2) break;
                   1109: 
                   1110:                /* Advance to next segment in data being matched, if necessary */
                   1111:                PREFETCH;
                   1112: 
                   1113:                /* mcnt gets # consecutive chars to compare */
                   1114:                mcnt = dend - d;
                   1115:                if (mcnt > dend2 - d2)
                   1116:                  mcnt = dend2 - d2;
                   1117:                /* Compare that many; failure if mismatch, else skip them. */
                   1118:                if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
                   1119:                  goto fail;
                   1120:                d += mcnt, d2 += mcnt;
                   1121:              }
                   1122:          }
                   1123:          break;
                   1124: 
                   1125:        case anychar:
                   1126:          /* fetch a data character */
                   1127:          PREFETCH;
                   1128:          /* Match anything but a newline.  */
                   1129:          if ((translate ? translate[*d++] : *d++) == '\n')
                   1130:            goto fail;
                   1131:          break;
                   1132: 
                   1133:        case charset:
                   1134:        case charset_not:
                   1135:          {
                   1136:            /* Nonzero for charset_not */
                   1137:            int not = 0;
                   1138:            register int c;
                   1139:            if (*(p - 1) == (char) charset_not)
                   1140:              not = 1;
                   1141: 
                   1142:            /* fetch a data character */
                   1143:            PREFETCH;
                   1144: 
                   1145:            if (translate)
                   1146:              c = translate [*(unsigned char *)d];
                   1147:            else
                   1148:              c = *(unsigned char *)d;
                   1149: 
                   1150:            if (c < *p * BYTEWIDTH
                   1151:                && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
                   1152:              not = !not;
                   1153: 
                   1154:            p += 1 + *p;
                   1155: 
                   1156:            if (!not) goto fail;
                   1157:            d++;
                   1158:            break;
                   1159:          }
                   1160: 
                   1161:        case begline:
                   1162:          if (d == string1 || d[-1] == '\n')
                   1163:            break;
                   1164:          goto fail;
                   1165: 
                   1166:        case endline:
                   1167:          if (d == end2
                   1168:              || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
                   1169:            break;
                   1170:          goto fail;
                   1171: 
                   1172:        /* "or" constructs ("|") are handled by starting each alternative
                   1173:            with an on_failure_jump that points to the start of the next alternative.
                   1174:            Each alternative except the last ends with a jump to the joining point.
                   1175:            (Actually, each jump except for the last one really jumps
                   1176:             to the following jump, because tensioning the jumps is a hassle.) */
                   1177: 
                   1178:        /* The start of a stupid repeat has an on_failure_jump that points
                   1179:           past the end of the repeat text.
                   1180:           This makes a failure point so that, on failure to match a repetition,
                   1181:           matching restarts past as many repetitions have been found
                   1182:           with no way to fail and look for another one.  */
                   1183: 
                   1184:        /* A smart repeat is similar but loops back to the on_failure_jump
                   1185:           so that each repetition makes another failure point. */
                   1186: 
                   1187:        case on_failure_jump:
                   1188:          if (stackp == stacke)
                   1189:            {
                   1190:              char **stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *));
                   1191:              bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
                   1192:              stackp += stackx - stackb;
                   1193:              stacke = stackx + 2 * (stacke - stackb);
                   1194:              stackb = stackx;
                   1195:            }
                   1196:          mcnt = *p++ & 0377;
                   1197:          mcnt += SIGN_EXTEND_CHAR (*p++) << 8;
                   1198:          *stackp++ = mcnt + p;
                   1199:          *stackp++ = d;
                   1200:          break;
                   1201: 
                   1202:        /* The end of a smart repeat has an maybe_finalize_jump back.
                   1203:           Change it either to a finalize_jump or an ordinary jump. */
                   1204: 
                   1205:        case maybe_finalize_jump:
                   1206:          mcnt = *p++ & 0377;
                   1207:          mcnt += SIGN_EXTEND_CHAR (*p++) << 8;
                   1208:          /* Compare what follows with the begining of the repeat.
                   1209:             If we can establish that there is nothing that they would
                   1210:             both match, we can change to finalize_jump */
                   1211:          if (p == pend)
                   1212:            p[-3] = (char) finalize_jump;
                   1213:          else if (*p == (char) exactn || *p == (char) endline)
                   1214:            {
                   1215:              register int c = *p == (char) endline ? '\n' : p[2];
                   1216:              register char *p1 = p + mcnt;
                   1217:              /* p1[0] ... p1[2] are an on_failure_jump.
                   1218:                 Examine what follows that */
                   1219:              if (p1[3] == (char) exactn && p1[5] != c)
                   1220:                p[-3] = (char) finalize_jump;
                   1221:              else if (p1[3] == (char) charset || p1[3] == (char) charset_not)
                   1222:                {
                   1223:                  int not = p1[3] == (char) charset_not;
                   1224:                  if (c < p1[4] * BYTEWIDTH
                   1225:                      && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
                   1226:                    not = !not;
                   1227:                  /* not is 1 if c would match */
                   1228:                  /* That means it is not safe to finalize */
                   1229:                  if (!not)
                   1230:                    p[-3] = (char) finalize_jump;
                   1231:                }
                   1232:            }
                   1233:          p -= 2;
                   1234:          if (p[-1] != (char) finalize_jump)
                   1235:            {
                   1236:              p[-1] = (char) jump;
                   1237:              goto nofinalize;
                   1238:            }
                   1239: 
                   1240:        /* The end of a stupid repeat has a finalize-jump
                   1241:           back to the start, where another failure point will be made
                   1242:           which will point after all the repetitions found so far. */
                   1243: 
                   1244:        case finalize_jump:
                   1245:          stackp -= 2;
                   1246: 
                   1247:        case jump:
                   1248:        nofinalize:
                   1249:          mcnt = *p++ & 0377;
                   1250:          mcnt += SIGN_EXTEND_CHAR (*p++) << 8;
                   1251:          p += mcnt;
                   1252:          break;
                   1253: 
                   1254:        case dummy_failure_jump:
                   1255:          if (stackp == stacke)
                   1256:            {
                   1257:              char **stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *));
                   1258:              bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
                   1259:              stackp += stackx - stackb;
                   1260:              stacke = stackx + 2 * (stacke - stackb);
                   1261:              stackb = stackx;
                   1262:            }
                   1263:          *stackp++ = 0;
                   1264:          *stackp++ = 0;
                   1265:          goto nofinalize;
                   1266: 
                   1267:        case wordbound:
                   1268:          if (d == string1  /* Points to first char */
                   1269:              || d == end2  /* Points to end */
                   1270:              || (d == end1 && size2 == 0)) /* Points to end */
                   1271:            break;
                   1272:          if ((SYNTAX (((unsigned char *)d)[-1]) == Sword)
                   1273:              != (SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) == Sword))
                   1274:            break;
                   1275:          goto fail;
                   1276: 
                   1277:        case notwordbound:
                   1278:          if (d == string1  /* Points to first char */
                   1279:              || d == end2  /* Points to end */
                   1280:              || (d == end1 && size2 == 0)) /* Points to end */
                   1281:            goto fail;
                   1282:          if ((SYNTAX (((unsigned char *)d)[-1]) == Sword)
                   1283:              != (SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) == Sword))
                   1284:            goto fail;
                   1285:          break;
                   1286: 
                   1287:        case wordbeg:
                   1288:          if (d == end2  /* Points to end */
                   1289:              || (d == end1 && size2 == 0) /* Points to end */
                   1290:              || SYNTAX (*(unsigned char *) (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
                   1291:            goto fail;
                   1292:          if (d == string1  /* Points to first char */
                   1293:              || SYNTAX (((unsigned char *)d)[-1]) != Sword)  /* prev char not letter */
                   1294:            break;
                   1295:          goto fail;
                   1296: 
                   1297:        case wordend:
                   1298:          if (d == string1  /* Points to first char */
                   1299:              || SYNTAX (((unsigned char *)d)[-1]) != Sword)  /* prev char not letter */
                   1300:            goto fail;
                   1301:          if (d == end2  /* Points to end */
                   1302:              || (d == end1 && size2 == 0) /* Points to end */
                   1303:              || SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) != Sword) /* Next char not a letter */
                   1304:            break;
                   1305:          goto fail;
                   1306: 
                   1307: #ifdef emacs
                   1308:        case before_dot:
                   1309:          if (((d - string2 <= (unsigned) size2)
                   1310:               ? d - (char *) bf_p2 : d - (char *) bf_p1)
                   1311:              <= point)
                   1312:            goto fail;
                   1313:          break;
                   1314: 
                   1315:        case at_dot:
                   1316:          if (((d - string2 <= (unsigned) size2)
                   1317:               ? d - (char *) bf_p2 : d - (char *) bf_p1)
                   1318:              == point)
                   1319:            goto fail;
                   1320:          break;
                   1321: 
                   1322:        case after_dot:
                   1323:          if (((d - string2 <= (unsigned) size2)
                   1324:               ? d - (char *) bf_p2 : d - (char *) bf_p1)
                   1325:              >= point)
                   1326:            goto fail;
                   1327:          break;
                   1328: 
                   1329:        case wordchar:
                   1330:          mcnt = (int) Sword;
                   1331:          goto matchsyntax;
                   1332: 
                   1333:        case syntaxspec:
                   1334:          mcnt = *p++;
                   1335:        matchsyntax:
                   1336:          PREFETCH;
                   1337:          if (SYNTAX (*(unsigned char *)d++) != (enum syntaxcode) mcnt) goto fail;
                   1338:          break;
                   1339:          
                   1340:        case notwordchar:
                   1341:          mcnt = (int) Sword;
                   1342:          goto matchnotsyntax;
                   1343: 
                   1344:        case notsyntaxspec:
                   1345:          mcnt = *p++;
                   1346:        matchnotsyntax:
                   1347:          PREFETCH;
                   1348:          if (SYNTAX (*(unsigned char *)d++) == (enum syntaxcode) mcnt) goto fail;
                   1349:          break;
                   1350: #else
                   1351:        case wordchar:
                   1352:          PREFETCH;
                   1353:          if (SYNTAX (*(unsigned char *)d++) == 0) goto fail;
                   1354:          break;
                   1355:          
                   1356:        case notwordchar:
                   1357:          PREFETCH;
                   1358:          if (SYNTAX (*(unsigned char *)d++) != 0) goto fail;
                   1359:          break;
                   1360: #endif not emacs
                   1361: 
                   1362:        case begbuf:
                   1363:          if (d == string1)     /* Note, d cannot equal string2 */
                   1364:            break;              /* unless string1 == string2.  */
                   1365:          goto fail;
                   1366: 
                   1367:        case endbuf:
                   1368:          if (d == end2 || (d == end1 && size2 == 0))
                   1369:            break;
                   1370:          goto fail;
                   1371: 
                   1372:        case exactn:
                   1373:          /* Match the next few pattern characters exactly.
                   1374:             mcnt is how many characters to match. */
                   1375:          mcnt = *p++;
                   1376:          if (translate)
                   1377:            {
                   1378:              do
                   1379:                {
                   1380:                  PREFETCH;
                   1381:                  if (translate[*(unsigned char *)d++] != *p++) goto fail;
                   1382:                }
                   1383:              while (--mcnt);
                   1384:            }
                   1385:          else
                   1386:            {
                   1387:              do
                   1388:                {
                   1389:                  PREFETCH;
                   1390:                  if (*d++ != *p++) goto fail;
                   1391:                }
                   1392:              while (--mcnt);
                   1393:            }
                   1394:          break;
                   1395:        }
                   1396:       continue;    /* Successfully matched one pattern command; keep matching */
                   1397: 
                   1398:       /* Jump here if any matching operation fails. */
                   1399:     fail:
                   1400:       if (stackp != stackb)
                   1401:        /* A restart point is known.  Restart there and pop it. */
                   1402:        {
                   1403:          if (!stackp[-2])
                   1404:            {   /* If innermost failure point is dormant, flush it and keep looking */
                   1405:              stackp -= 2;
                   1406:              goto fail;
                   1407:            }
                   1408:          d = *--stackp;
                   1409:          p = *--stackp;
                   1410:          if (d >= string1 && d <= end1)
                   1411:            dend = end_match_1;
                   1412:        }
                   1413:       else break;   /* Matching at this starting point really fails! */
                   1414:     }
                   1415:   return -1;         /* Failure to match */
                   1416: }
                   1417: 
                   1418: static int
                   1419: bcmp_translate (s1, s2, len, translate)
                   1420:      char *s1, *s2;
                   1421:      register int len;
                   1422:      char *translate;
                   1423: {
                   1424:   register char *p1 = s1, *p2 = s2;
                   1425:   while (len)
                   1426:     {
                   1427:       if (translate [*p1++] != translate [*p2++]) return 1;
                   1428:       len--;
                   1429:     }
                   1430:   return 0;
                   1431: }
                   1432: 
                   1433: /* Entry points compatible with bsd4.2 regex library */
                   1434: 
                   1435: #ifndef emacs
                   1436: 
                   1437: static struct re_pattern_buffer re_comp_buf;
                   1438: 
                   1439: char *
                   1440: re_comp (s)
                   1441:      char *s;
                   1442: {
                   1443:   if (!s)
                   1444:     {
                   1445:       if (!re_comp_buf.buffer)
                   1446:        return "No previous regular expression";
                   1447:       return 0;
                   1448:     }
                   1449: 
                   1450:   if (!re_comp_buf.buffer)
                   1451:     {
                   1452:       if (!(re_comp_buf.buffer = (char *) malloc (200)))
                   1453:        return "Memory exhausted";
                   1454:       re_comp_buf.allocated = 200;
                   1455:       if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
                   1456:        return "Memory exhausted";
                   1457:     }
                   1458:   return re_compile_pattern (s, strlen (s), &re_comp_buf);
                   1459: }
                   1460: 
                   1461: int
                   1462: re_exec (s)
                   1463:      char *s;
                   1464: {
                   1465:   int len = strlen (s);
                   1466:   return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
                   1467: }
                   1468: 
                   1469: #endif /* emacs */
                   1470: 
                   1471: #ifdef test
                   1472: 
                   1473: #include <stdio.h>
                   1474: 
                   1475: /* Indexed by a character, gives the upper case equivalent of the character */
                   1476: 
                   1477: static char upcase[0400] = 
                   1478:   { 000, 001, 002, 003, 004, 005, 006, 007,
                   1479:     010, 011, 012, 013, 014, 015, 016, 017,
                   1480:     020, 021, 022, 023, 024, 025, 026, 027,
                   1481:     030, 031, 032, 033, 034, 035, 036, 037,
                   1482:     040, 041, 042, 043, 044, 045, 046, 047,
                   1483:     050, 051, 052, 053, 054, 055, 056, 057,
                   1484:     060, 061, 062, 063, 064, 065, 066, 067,
                   1485:     070, 071, 072, 073, 074, 075, 076, 077,
                   1486:     0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
                   1487:     0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
                   1488:     0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
                   1489:     0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
                   1490:     0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
                   1491:     0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
                   1492:     0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
                   1493:     0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
                   1494:     0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
                   1495:     0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
                   1496:     0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
                   1497:     0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
                   1498:     0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
                   1499:     0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
                   1500:     0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
                   1501:     0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
                   1502:     0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
                   1503:     0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
                   1504:     0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
                   1505:     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
                   1506:     0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
                   1507:     0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
                   1508:     0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
                   1509:     0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
                   1510:   };
                   1511: 
                   1512: main ()
                   1513: {
                   1514:   char pat[80];
                   1515:   struct re_pattern_buffer buf;
                   1516:   int i;
                   1517:   char c;
                   1518:   char fastmap[(1 << BYTEWIDTH)];
                   1519: 
                   1520:   buf.allocated = 40;
                   1521:   buf.buffer = (char *) malloc (buf.allocated);
                   1522:   buf.fastmap = fastmap;
                   1523:   buf.translate = upcase;
                   1524: 
                   1525:   while (1)
                   1526:     {
                   1527:       gets (pat);
                   1528: 
                   1529:       if (*pat)
                   1530:        {
                   1531:           re_compile_pattern (pat, strlen(pat), &buf);
                   1532: 
                   1533:          for (i = 0; i < buf.used; i++)
                   1534:            printchar (buf.buffer[i]);
                   1535: 
                   1536:          putchar ('\n');
                   1537: 
                   1538:          printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
                   1539: 
                   1540:          re_compile_fastmap (&buf);
                   1541:          printf ("Allowed by fastmap: ");
                   1542:          for (i = 0; i < (1 << BYTEWIDTH); i++)
                   1543:            if (fastmap[i]) printchar (i);
                   1544:          putchar ('\n');
                   1545:        }
                   1546: 
                   1547:       gets (pat);      /* Now read the string to match against */
                   1548: 
                   1549:       i = re_match (&buf, pat, strlen (pat), 0, 0);
                   1550:       printf ("Match value %d.\n", i);
                   1551:     }
                   1552: }
                   1553: 
                   1554: #ifdef NOTDEF
                   1555: print_buf (bufp)
                   1556:      struct re_pattern_buffer *bufp;
                   1557: {
                   1558:   int i;
                   1559: 
                   1560:   printf ("buf is :\n----------------\n");
                   1561:   for (i = 0; i < bufp->used; i++)
                   1562:     printchar (bufp->buffer[i]);
                   1563:   
                   1564:   printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
                   1565:   
                   1566:   printf ("Allowed by fastmap: ");
                   1567:   for (i = 0; i < (1 << BYTEWIDTH); i++)
                   1568:     if (bufp->fastmap[i])
                   1569:       printchar (i);
                   1570:   printf ("\nAllowed by translate: ");
                   1571:   if (bufp->translate)
                   1572:     for (i = 0; i < (1 << BYTEWIDTH); i++)
                   1573:       if (bufp->translate[i])
                   1574:        printchar (i);
                   1575:   printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
                   1576:   printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
                   1577: }
                   1578: #endif
                   1579: 
                   1580: printchar (c)
                   1581:      char c;
                   1582: {
                   1583:   if (c < 041 || c >= 0177)
                   1584:     {
                   1585:       putchar ('\\');
                   1586:       putchar (((c >> 6) & 3) + '0');
                   1587:       putchar (((c >> 3) & 7) + '0');
                   1588:       putchar ((c & 7) + '0');
                   1589:     }
                   1590:   else
                   1591:     putchar (c);
                   1592: }
                   1593: 
                   1594: error (string)
                   1595:      char *string;
                   1596: {
                   1597:   puts (string);
                   1598:   exit (1);
                   1599: }
                   1600: 
                   1601: #endif test
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.