Annotation of 43BSDReno/contrib/emacs-18.55/src/regex.c, revision 1.1.1.1

1.1       root        1: /* Extended regular expression matching and search.
                      2:    Copyright (C) 1985 Free Software Foundation, Inc.
                      3: 
                      4:                       NO WARRANTY
                      5: 
                      6:   BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY
                      7: NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW.  EXCEPT
                      8: WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC,
                      9: RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS"
                     10: WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
                     11: BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
                     12: FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY
                     13: AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE PROGRAM PROVE
                     14: DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
                     15: CORRECTION.
                     16: 
                     17:  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M.
                     18: STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY
                     19: WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE
                     20: LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR
                     21: OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
                     22: USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR
                     23: DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR
                     24: A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS
                     25: PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
                     26: DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
                     27: 
                     28:                GENERAL PUBLIC LICENSE TO COPY
                     29: 
                     30:   1. You may copy and distribute verbatim copies of this source file
                     31: as you receive it, in any medium, provided that you conspicuously and
                     32: appropriately publish on each copy a valid copyright notice "Copyright
                     33: (C) 1985 Free Software Foundation, Inc."; and include following the
                     34: copyright notice a verbatim copy of the above disclaimer of warranty
                     35: and of this License.  You may charge a distribution fee for the
                     36: physical act of transferring a copy.
                     37: 
                     38:   2. You may modify your copy or copies of this source file or
                     39: any portion of it, and copy and distribute such modifications under
                     40: the terms of Paragraph 1 above, provided that you also do the following:
                     41: 
                     42:     a) cause the modified files to carry prominent notices stating
                     43:     that you changed the files and the date of any change; and
                     44: 
                     45:     b) cause the whole of any work that you distribute or publish,
                     46:     that in whole or in part contains or is a derivative of this
                     47:     program or any part thereof, to be licensed at no charge to all
                     48:     third parties on terms identical to those contained in this
                     49:     License Agreement (except that you may choose to grant more extensive
                     50:     warranty protection to some or all third parties, at your option).
                     51: 
                     52:     c) You may charge a distribution fee for the physical act of
                     53:     transferring a copy, and you may at your option offer warranty
                     54:     protection in exchange for a fee.
                     55: 
                     56: Mere aggregation of another unrelated program with this program (or its
                     57: derivative) on a volume of a storage or distribution medium does not bring
                     58: the other program under the scope of these terms.
                     59: 
                     60:   3. You may copy and distribute this program (or a portion or derivative
                     61: of it, under Paragraph 2) in object code or executable form under the terms
                     62: of Paragraphs 1 and 2 above provided that you also do one of the following:
                     63: 
                     64:     a) accompany it with the complete corresponding machine-readable
                     65:     source code, which must be distributed under the terms of
                     66:     Paragraphs 1 and 2 above; or,
                     67: 
                     68:     b) accompany it with a written offer, valid for at least three
                     69:     years, to give any third party free (except for a nominal
                     70:     shipping charge) a complete machine-readable copy of the
                     71:     corresponding source code, to be distributed under the terms of
                     72:     Paragraphs 1 and 2 above; or,
                     73: 
                     74:     c) accompany it with the information you received as to where the
                     75:     corresponding source code may be obtained.  (This alternative is
                     76:     allowed only for noncommercial distribution and only if you
                     77:     received the program in object code or executable form alone.)
                     78: 
                     79: For an executable file, complete source code means all the source code for
                     80: all modules it contains; but, as a special exception, it need not include
                     81: source code for modules which are standard libraries that accompany the
                     82: operating system on which the executable file runs.
                     83: 
                     84:   4. You may not copy, sublicense, distribute or transfer this program
                     85: except as expressly provided under this License Agreement.  Any attempt
                     86: otherwise to copy, sublicense, distribute or transfer this program is void and
                     87: your rights to use the program under this License agreement shall be
                     88: automatically terminated.  However, parties who have received computer
                     89: software programs from you with this License Agreement will not have
                     90: their licenses terminated so long as such parties remain in full compliance.
                     91: 
                     92:   5. If you wish to incorporate parts of this program into other free
                     93: programs whose distribution conditions are different, write to the Free
                     94: Software Foundation at 675 Mass Ave, Cambridge, MA 02139.  We have not yet
                     95: worked out a simple rule that can be stated here, but we will often permit
                     96: this.  We will be guided by the two goals of preserving the free status of
                     97: all derivatives of our free software and of promoting the sharing and reuse of
                     98: software.
                     99: 
                    100: 
                    101: In other words, you are welcome to use, share and improve this program.
                    102: You are forbidden to forbid anyone else to use, share and improve
                    103: what you give them.   Help stamp out software-hoarding!  */
                    104: 
                    105: 
                    106: /* To test, compile with -Dtest.
                    107:  This Dtestable feature turns this into a self-contained program
                    108:  which reads a pattern, describes how it compiles,
                    109:  then reads a string and searches for it.  */
                    110: 
                    111: 
                    112: #ifdef emacs
                    113: 
                    114: /* The `emacs' switch turns on certain special matching commands
                    115:  that make sense only in emacs. */
                    116: 
                    117: #include "config.h"
                    118: #include "lisp.h"
                    119: #include "buffer.h"
                    120: #include "syntax.h"
                    121: 
                    122: #else  /* not emacs */
                    123: 
                    124: /*
                    125:  * Define the syntax stuff, so we can do the \<...\> things.
                    126:  */
                    127: 
                    128: #ifndef Sword /* must be non-zero in some of the tests below... */
                    129: #define Sword 1
                    130: #endif
                    131: 
                    132: #define SYNTAX(c) re_syntax_table[c]
                    133: 
                    134: #ifdef SYNTAX_TABLE
                    135: 
                    136: char *re_syntax_table;
                    137: 
                    138: #else
                    139: 
                    140: static char re_syntax_table[256];
                    141: 
                    142: static void
                    143: init_syntax_once ()
                    144: {
                    145:    register int c;
                    146:    static int done = 0;
                    147: 
                    148:    if (done)
                    149:      return;
                    150: 
                    151:    bzero (re_syntax_table, sizeof re_syntax_table);
                    152: 
                    153:    for (c = 'a'; c <= 'z'; c++)
                    154:      re_syntax_table[c] = Sword;
                    155: 
                    156:    for (c = 'A'; c <= 'Z'; c++)
                    157:      re_syntax_table[c] = Sword;
                    158: 
                    159:    for (c = '0'; c <= '9'; c++)
                    160:      re_syntax_table[c] = Sword;
                    161: 
                    162:    done = 1;
                    163: }
                    164: 
                    165: #endif /* SYNTAX_TABLE */
                    166: #endif /* not emacs */
                    167: 
                    168: #include "regex.h"
                    169: 
                    170: /* Number of failure points to allocate space for initially,
                    171:  when matching.  If this number is exceeded, more space is allocated,
                    172:  so it is not a hard limit.  */
                    173: 
                    174: #ifndef NFAILURES
                    175: #define NFAILURES 80
                    176: #endif NFAILURES
                    177: 
                    178: /* width of a byte in bits */
                    179: 
                    180: #define BYTEWIDTH 8
                    181: 
                    182: #ifndef SIGN_EXTEND_CHAR
                    183: #define SIGN_EXTEND_CHAR(x) (x)
                    184: #endif
                    185: 
                    186: static int obscure_syntax = 0;
                    187: 
                    188: /* Specify the precise syntax of regexp for compilation.
                    189:    This provides for compatibility for various utilities
                    190:    which historically have different, incompatible syntaxes.
                    191: 
                    192:    The argument SYNTAX is a bit-mask containing the two bits
                    193:    RE_NO_BK_PARENS and RE_NO_BK_VBAR.  */
                    194: 
                    195: int
                    196: re_set_syntax (syntax)
                    197: {
                    198:   int ret;
                    199: 
                    200:   ret = obscure_syntax;
                    201:   obscure_syntax = syntax;
                    202:   return ret;
                    203: }
                    204: 
                    205: /* re_compile_pattern takes a regular-expression string
                    206:    and converts it into a buffer full of byte commands for matching.
                    207: 
                    208:   PATTERN   is the address of the pattern string
                    209:   SIZE      is the length of it.
                    210:   BUFP     is a  struct re_pattern_buffer *  which points to the info
                    211:            on where to store the byte commands.
                    212:            This structure contains a  char *  which points to the
                    213:            actual space, which should have been obtained with malloc.
                    214:            re_compile_pattern may use  realloc  to grow the buffer space.
                    215: 
                    216:   The number of bytes of commands can be found out by looking in
                    217:   the  struct re_pattern_buffer  that bufp pointed to,
                    218:   after re_compile_pattern returns.
                    219: */
                    220: 
                    221: #define PATPUSH(ch) (*b++ = (char) (ch))
                    222: 
                    223: #define PATFETCH(c) \
                    224:  {if (p == pend) goto end_of_pattern; \
                    225:   c = * (unsigned char *) p++; \
                    226:   if (translate) c = translate[c]; }
                    227: 
                    228: #define PATFETCH_RAW(c) \
                    229:  {if (p == pend) goto end_of_pattern; \
                    230:   c = * (unsigned char *) p++; }
                    231: 
                    232: #define PATUNFETCH p--
                    233: 
                    234: #define EXTEND_BUFFER \
                    235:   { char *old_buffer = bufp->buffer; \
                    236:     if (bufp->allocated == (1<<16)) goto too_big; \
                    237:     bufp->allocated *= 2; \
                    238:     if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
                    239:     if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
                    240:       goto memory_exhausted; \
                    241:     c = bufp->buffer - old_buffer; \
                    242:     b += c; \
                    243:     if (fixup_jump) \
                    244:       fixup_jump += c; \
                    245:     if (laststart) \
                    246:       laststart += c; \
                    247:     begalt += c; \
                    248:     if (pending_exact) \
                    249:       pending_exact += c; \
                    250:   }
                    251: 
                    252: static int store_jump (), insert_jump ();
                    253: 
                    254: char *
                    255: re_compile_pattern (pattern, size, bufp)
                    256:      char *pattern;
                    257:      int size;
                    258:      struct re_pattern_buffer *bufp;
                    259: {
                    260:   register char *b = bufp->buffer;
                    261:   register char *p = pattern;
                    262:   char *pend = pattern + size;
                    263:   register unsigned c, c1;
                    264:   char *p1;
                    265:   unsigned char *translate = (unsigned char *) bufp->translate;
                    266: 
                    267:   /* address of the count-byte of the most recently inserted "exactn" command.
                    268:     This makes it possible to tell whether a new exact-match character
                    269:     can be added to that command or requires a new "exactn" command. */
                    270:      
                    271:   char *pending_exact = 0;
                    272: 
                    273:   /* address of the place where a forward-jump should go
                    274:     to the end of the containing expression.
                    275:     Each alternative of an "or", except the last, ends with a forward-jump
                    276:     of this sort. */
                    277: 
                    278:   char *fixup_jump = 0;
                    279: 
                    280:   /* address of start of the most recently finished expression.
                    281:     This tells postfix * where to find the start of its operand. */
                    282: 
                    283:   char *laststart = 0;
                    284: 
                    285:   /* In processing a repeat, 1 means zero matches is allowed */
                    286: 
                    287:   char zero_times_ok;
                    288: 
                    289:   /* In processing a repeat, 1 means many matches is allowed */
                    290: 
                    291:   char many_times_ok;
                    292: 
                    293:   /* address of beginning of regexp, or inside of last \( */
                    294: 
                    295:   char *begalt = b;
                    296: 
                    297:   /* Stack of information saved by \( and restored by \).
                    298:      Four stack elements are pushed by each \(:
                    299:        First, the value of b.
                    300:        Second, the value of fixup_jump.
                    301:        Third, the value of regnum.
                    302:        Fourth, the value of begalt.  */
                    303: 
                    304:   int stackb[40];
                    305:   int *stackp = stackb;
                    306:   int *stacke = stackb + 40;
                    307:   int *stackt;
                    308: 
                    309:   /* Counts \('s as they are encountered.  Remembered for the matching \),
                    310:      where it becomes the "register number" to put in the stop_memory command */
                    311: 
                    312:   int regnum = 1;
                    313: 
                    314:   bufp->fastmap_accurate = 0;
                    315: 
                    316: #ifndef emacs
                    317: #ifndef SYNTAX_TABLE
                    318:   /*
                    319:    * Initialize the syntax table.
                    320:    */
                    321:    init_syntax_once();
                    322: #endif
                    323: #endif
                    324: 
                    325:   if (bufp->allocated == 0)
                    326:     {
                    327:       bufp->allocated = 28;
                    328:       if (bufp->buffer)
                    329:        /* EXTEND_BUFFER loses when bufp->allocated is 0 */
                    330:        bufp->buffer = (char *) realloc (bufp->buffer, 28);
                    331:       else
                    332:        /* Caller did not allocate a buffer.  Do it for him */
                    333:        bufp->buffer = (char *) malloc (28);
                    334:       if (!bufp->buffer) goto memory_exhausted;
                    335:       begalt = b = bufp->buffer;
                    336:     }
                    337: 
                    338:   while (p != pend)
                    339:     {
                    340:       if (b - bufp->buffer > bufp->allocated - 10)
                    341:        /* Note that EXTEND_BUFFER clobbers c */
                    342:        EXTEND_BUFFER;
                    343: 
                    344:       PATFETCH (c);
                    345: 
                    346:       switch (c)
                    347:        {
                    348:        case '$':
                    349:          if (obscure_syntax & RE_TIGHT_VBAR)
                    350:            {
                    351:              if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)
                    352:                goto normal_char;
                    353:              /* Make operand of last vbar end before this `$'.  */
                    354:              if (fixup_jump)
                    355:                store_jump (fixup_jump, jump, b);
                    356:              fixup_jump = 0;
                    357:              PATPUSH (endline);
                    358:              break;
                    359:            }
                    360: 
                    361:          /* $ means succeed if at end of line, but only in special contexts.
                    362:            If randomly in the middle of a pattern, it is a normal character. */
                    363:          if (p == pend || *p == '\n'
                    364:              || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
                    365:              || (obscure_syntax & RE_NO_BK_PARENS
                    366:                  ? *p == ')'
                    367:                  : *p == '\\' && p[1] == ')')
                    368:              || (obscure_syntax & RE_NO_BK_VBAR
                    369:                  ? *p == '|'
                    370:                  : *p == '\\' && p[1] == '|'))
                    371:            {
                    372:              PATPUSH (endline);
                    373:              break;
                    374:            }
                    375:          goto normal_char;
                    376: 
                    377:        case '^':
                    378:          /* ^ means succeed if at beg of line, but only if no preceding pattern. */
                    379: 
                    380:          if (laststart && p[-2] != '\n'
                    381:              && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
                    382:            goto normal_char;
                    383:          if (obscure_syntax & RE_TIGHT_VBAR)
                    384:            {
                    385:              if (p != pattern + 1
                    386:                  && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
                    387:                goto normal_char;
                    388:              PATPUSH (begline);
                    389:              begalt = b;
                    390:            }
                    391:          else
                    392:            PATPUSH (begline);
                    393:          break;
                    394: 
                    395:        case '+':
                    396:        case '?':
                    397:          if (obscure_syntax & RE_BK_PLUS_QM)
                    398:            goto normal_char;
                    399:        handle_plus:
                    400:        case '*':
                    401:          /* If there is no previous pattern, char not special. */
                    402:          if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
                    403:            goto normal_char;
                    404:          /* If there is a sequence of repetition chars,
                    405:             collapse it down to equivalent to just one.  */
                    406:          zero_times_ok = 0;
                    407:          many_times_ok = 0;
                    408:          while (1)
                    409:            {
                    410:              zero_times_ok |= c != '+';
                    411:              many_times_ok |= c != '?';
                    412:              if (p == pend)
                    413:                break;
                    414:              PATFETCH (c);
                    415:              if (c == '*')
                    416:                ;
                    417:              else if (!(obscure_syntax & RE_BK_PLUS_QM)
                    418:                       && (c == '+' || c == '?'))
                    419:                ;
                    420:              else if ((obscure_syntax & RE_BK_PLUS_QM)
                    421:                       && c == '\\')
                    422:                {
                    423:                  int c1;
                    424:                  PATFETCH (c1);
                    425:                  if (!(c1 == '+' || c1 == '?'))
                    426:                    {
                    427:                      PATUNFETCH;
                    428:                      PATUNFETCH;
                    429:                      break;
                    430:                    }
                    431:                  c = c1;
                    432:                }
                    433:              else
                    434:                {
                    435:                  PATUNFETCH;
                    436:                  break;
                    437:                }
                    438:            }
                    439: 
                    440:          /* Star, etc. applied to an empty pattern is equivalent
                    441:             to an empty pattern.  */
                    442:          if (!laststart)
                    443:            break;
                    444: 
                    445:          /* Now we know whether 0 matches is allowed,
                    446:             and whether 2 or more matches is allowed.  */
                    447:          if (many_times_ok)
                    448:            {
                    449:              /* If more than one repetition is allowed,
                    450:                 put in a backward jump at the end.  */
                    451:              store_jump (b, maybe_finalize_jump, laststart - 3);
                    452:              b += 3;
                    453:            }
                    454:          insert_jump (on_failure_jump, laststart, b + 3, b);
                    455:          pending_exact = 0;
                    456:          b += 3;
                    457:          if (!zero_times_ok)
                    458:            {
                    459:              /* At least one repetition required: insert before the loop
                    460:                 a skip over the initial on-failure-jump instruction */
                    461:              insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
                    462:              b += 3;
                    463:            }
                    464:          break;
                    465: 
                    466:        case '.':
                    467:          laststart = b;
                    468:          PATPUSH (anychar);
                    469:          break;
                    470: 
                    471:        case '[':
                    472:          while (b - bufp->buffer
                    473:                 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
                    474:            /* Note that EXTEND_BUFFER clobbers c */
                    475:            EXTEND_BUFFER;
                    476: 
                    477:          laststart = b;
                    478:          if (*p == '^')
                    479:            PATPUSH (charset_not), p++;
                    480:          else
                    481:            PATPUSH (charset);
                    482:          p1 = p;
                    483: 
                    484:          PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
                    485:          /* Clear the whole map */
                    486:          bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
                    487:          /* Read in characters and ranges, setting map bits */
                    488:          while (1)
                    489:            {
                    490:              PATFETCH (c);
                    491:              if (c == ']' && p != p1 + 1) break;
                    492:              if (*p == '-' && p[1] != ']')
                    493:                {
                    494:                  PATFETCH (c1);
                    495:                  PATFETCH (c1);
                    496:                  while (c <= c1)
                    497:                    b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
                    498:                }
                    499:              else
                    500:                {
                    501:                  b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
                    502:                }
                    503:            }
                    504:          /* Discard any bitmap bytes that are all 0 at the end of the map.
                    505:             Decrement the map-length byte too. */
                    506:          while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
                    507:            b[-1]--;
                    508:          b += b[-1];
                    509:          break;
                    510: 
                    511:        case '(':
                    512:          if (! (obscure_syntax & RE_NO_BK_PARENS))
                    513:            goto normal_char;
                    514:          else
                    515:            goto handle_open;
                    516: 
                    517:        case ')':
                    518:          if (! (obscure_syntax & RE_NO_BK_PARENS))
                    519:            goto normal_char;
                    520:          else
                    521:            goto handle_close;
                    522: 
                    523:        case '\n':
                    524:          if (! (obscure_syntax & RE_NEWLINE_OR))
                    525:            goto normal_char;
                    526:          else
                    527:            goto handle_bar;
                    528: 
                    529:        case '|':
                    530:          if (! (obscure_syntax & RE_NO_BK_VBAR))
                    531:            goto normal_char;
                    532:          else
                    533:            goto handle_bar;
                    534: 
                    535:         case '\\':
                    536:          if (p == pend) goto invalid_pattern;
                    537:          PATFETCH_RAW (c);
                    538:          switch (c)
                    539:            {
                    540:            case '(':
                    541:              if (obscure_syntax & RE_NO_BK_PARENS)
                    542:                goto normal_backsl;
                    543:            handle_open:
                    544:              if (stackp == stacke) goto nesting_too_deep;
                    545:              if (regnum < RE_NREGS)
                    546:                {
                    547:                  PATPUSH (start_memory);
                    548:                  PATPUSH (regnum);
                    549:                }
                    550:              *stackp++ = b - bufp->buffer;
                    551:              *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
                    552:              *stackp++ = regnum++;
                    553:              *stackp++ = begalt - bufp->buffer;
                    554:              fixup_jump = 0;
                    555:              laststart = 0;
                    556:              begalt = b;
                    557:              break;
                    558: 
                    559:            case ')':
                    560:              if (obscure_syntax & RE_NO_BK_PARENS)
                    561:                goto normal_backsl;
                    562:            handle_close:
                    563:              if (stackp == stackb) goto unmatched_close;
                    564:              begalt = *--stackp + bufp->buffer;
                    565:              if (fixup_jump)
                    566:                store_jump (fixup_jump, jump, b);
                    567:              if (stackp[-1] < RE_NREGS)
                    568:                {
                    569:                  PATPUSH (stop_memory);
                    570:                  PATPUSH (stackp[-1]);
                    571:                }
                    572:              stackp -= 2;
                    573:              fixup_jump = 0;
                    574:              if (*stackp)
                    575:                fixup_jump = *stackp + bufp->buffer - 1;
                    576:              laststart = *--stackp + bufp->buffer;
                    577:              break;
                    578: 
                    579:            case '|':
                    580:              if (obscure_syntax & RE_NO_BK_VBAR)
                    581:                goto normal_backsl;
                    582:            handle_bar:
                    583:              insert_jump (on_failure_jump, begalt, b + 6, b);
                    584:              pending_exact = 0;
                    585:              b += 3;
                    586:              if (fixup_jump)
                    587:                store_jump (fixup_jump, jump, b);
                    588:              fixup_jump = b;
                    589:              b += 3;
                    590:              laststart = 0;
                    591:              begalt = b;
                    592:              break;
                    593: 
                    594: #ifdef emacs
                    595:            case '=':
                    596:              PATPUSH (at_dot);
                    597:              break;
                    598: 
                    599:            case 's':   
                    600:              laststart = b;
                    601:              PATPUSH (syntaxspec);
                    602:              PATFETCH (c);
                    603:              PATPUSH (syntax_spec_code[c]);
                    604:              break;
                    605: 
                    606:            case 'S':
                    607:              laststart = b;
                    608:              PATPUSH (notsyntaxspec);
                    609:              PATFETCH (c);
                    610:              PATPUSH (syntax_spec_code[c]);
                    611:              break;
                    612: #endif emacs
                    613: 
                    614:            case 'w':
                    615:              laststart = b;
                    616:              PATPUSH (wordchar);
                    617:              break;
                    618: 
                    619:            case 'W':
                    620:              laststart = b;
                    621:              PATPUSH (notwordchar);
                    622:              break;
                    623: 
                    624:            case '<':
                    625:              PATPUSH (wordbeg);
                    626:              break;
                    627: 
                    628:            case '>':
                    629:              PATPUSH (wordend);
                    630:              break;
                    631: 
                    632:            case 'b':
                    633:              PATPUSH (wordbound);
                    634:              break;
                    635: 
                    636:            case 'B':
                    637:              PATPUSH (notwordbound);
                    638:              break;
                    639: 
                    640:            case '`':
                    641:              PATPUSH (begbuf);
                    642:              break;
                    643: 
                    644:            case '\'':
                    645:              PATPUSH (endbuf);
                    646:              break;
                    647: 
                    648:            case '1':
                    649:            case '2':
                    650:            case '3':
                    651:            case '4':
                    652:            case '5':
                    653:            case '6':
                    654:            case '7':
                    655:            case '8':
                    656:            case '9':
                    657:              c1 = c - '0';
                    658:              if (c1 >= regnum)
                    659:                goto normal_char;
                    660:              for (stackt = stackp - 2;  stackt > stackb;  stackt -= 4)
                    661:                if (*stackt == c1)
                    662:                  goto normal_char;
                    663:              laststart = b;
                    664:              PATPUSH (duplicate);
                    665:              PATPUSH (c1);
                    666:              break;
                    667: 
                    668:            case '+':
                    669:            case '?':
                    670:              if (obscure_syntax & RE_BK_PLUS_QM)
                    671:                goto handle_plus;
                    672: 
                    673:            default:
                    674:            normal_backsl:
                    675:              /* You might think it would be useful for \ to mean
                    676:                 not to translate; but if we don't translate it
                    677:                 it will never match anything.  */
                    678:              if (translate) c = translate[c];
                    679:              goto normal_char;
                    680:            }
                    681:          break;
                    682: 
                    683:        default:
                    684:        normal_char:
                    685:          if (!pending_exact || pending_exact + *pending_exact + 1 != b
                    686:              || *pending_exact == 0177 || *p == '*' || *p == '^'
                    687:              || ((obscure_syntax & RE_BK_PLUS_QM)
                    688:                  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
                    689:                  : (*p == '+' || *p == '?')))
                    690:            {
                    691:              laststart = b;
                    692:              PATPUSH (exactn);
                    693:              pending_exact = b;
                    694:              PATPUSH (0);
                    695:            }
                    696:          PATPUSH (c);
                    697:          (*pending_exact)++;
                    698:        }
                    699:     }
                    700: 
                    701:   if (fixup_jump)
                    702:     store_jump (fixup_jump, jump, b);
                    703: 
                    704:   if (stackp != stackb) goto unmatched_open;
                    705: 
                    706:   bufp->used = b - bufp->buffer;
                    707:   return 0;
                    708: 
                    709:  invalid_pattern:
                    710:   return "Invalid regular expression";
                    711: 
                    712:  unmatched_open:
                    713:   return "Unmatched \\(";
                    714: 
                    715:  unmatched_close:
                    716:   return "Unmatched \\)";
                    717: 
                    718:  end_of_pattern:
                    719:   return "Premature end of regular expression";
                    720: 
                    721:  nesting_too_deep:
                    722:   return "Nesting too deep";
                    723: 
                    724:  too_big:
                    725:   return "Regular expression too big";
                    726: 
                    727:  memory_exhausted:
                    728:   return "Memory exhausted";
                    729: }
                    730: 
                    731: /* Store where `from' points a jump operation to jump to where `to' points.
                    732:   `opcode' is the opcode to store. */
                    733: 
                    734: static int
                    735: store_jump (from, opcode, to)
                    736:      char *from, *to;
                    737:      char opcode;
                    738: {
                    739:   from[0] = opcode;
                    740:   from[1] = (to - (from + 3)) & 0377;
                    741:   from[2] = (to - (from + 3)) >> 8;
                    742: }
                    743: 
                    744: /* Open up space at char FROM, and insert there a jump to TO.
                    745:    CURRENT_END gives te end of the storage no in use,
                    746:    so we know how much data to copy up.
                    747:    OP is the opcode of the jump to insert.
                    748: 
                    749:    If you call this function, you must zero out pending_exact.  */
                    750: 
                    751: static int
                    752: insert_jump (op, from, to, current_end)
                    753:      char op;
                    754:      char *from, *to, *current_end;
                    755: {
                    756:   register char *pto = current_end + 3;
                    757:   register char *pfrom = current_end;
                    758:   while (pfrom != from)
                    759:     *--pto = *--pfrom;
                    760:   store_jump (from, op, to);
                    761: }
                    762: 
                    763: /* Given a pattern, compute a fastmap from it.
                    764:  The fastmap records which of the (1 << BYTEWIDTH) possible characters
                    765:  can start a string that matches the pattern.
                    766:  This fastmap is used by re_search to skip quickly over totally implausible text.
                    767: 
                    768:  The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
                    769:  as bufp->fastmap.
                    770:  The other components of bufp describe the pattern to be used.  */
                    771: 
                    772: void
                    773: re_compile_fastmap (bufp)
                    774:      struct re_pattern_buffer *bufp;
                    775: {
                    776:   unsigned char *pattern = (unsigned char *) bufp->buffer;
                    777:   int size = bufp->used;
                    778:   register char *fastmap = bufp->fastmap;
                    779:   register unsigned char *p = pattern;
                    780:   register unsigned char *pend = pattern + size;
                    781:   register int j, k;
                    782:   unsigned char *translate = (unsigned char *) bufp->translate;
                    783: 
                    784:   unsigned char *stackb[NFAILURES];
                    785:   unsigned char **stackp = stackb;
                    786: 
                    787:   bzero (fastmap, (1 << BYTEWIDTH));
                    788:   bufp->fastmap_accurate = 1;
                    789:   bufp->can_be_null = 0;
                    790:       
                    791:   while (p)
                    792:     {
                    793:       if (p == pend)
                    794:        {
                    795:          bufp->can_be_null = 1;
                    796:          break;
                    797:        }
                    798: #ifdef SWITCH_ENUM_BUG
                    799:       switch ((int) ((enum regexpcode) *p++))
                    800: #else
                    801:       switch ((enum regexpcode) *p++)
                    802: #endif
                    803:        {
                    804:        case exactn:
                    805:          if (translate)
                    806:            fastmap[translate[p[1]]] = 1;
                    807:          else
                    808:            fastmap[p[1]] = 1;
                    809:          break;
                    810: 
                    811:         case begline:
                    812:         case before_dot:
                    813:        case at_dot:
                    814:        case after_dot:
                    815:        case begbuf:
                    816:        case endbuf:
                    817:        case wordbound:
                    818:        case notwordbound:
                    819:        case wordbeg:
                    820:        case wordend:
                    821:          continue;
                    822: 
                    823:        case endline:
                    824:          if (translate)
                    825:            fastmap[translate['\n']] = 1;
                    826:          else
                    827:            fastmap['\n'] = 1;
                    828:          if (bufp->can_be_null != 1)
                    829:            bufp->can_be_null = 2;
                    830:          break;
                    831: 
                    832:        case finalize_jump:
                    833:        case maybe_finalize_jump:
                    834:        case jump:
                    835:        case dummy_failure_jump:
                    836:          bufp->can_be_null = 1;
                    837:          j = *p++ & 0377;
                    838:          j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
                    839:          p += j + 1;           /* The 1 compensates for missing ++ above */
                    840:          if (j > 0)
                    841:            continue;
                    842:          /* Jump backward reached implies we just went through
                    843:             the body of a loop and matched nothing.
                    844:             Opcode jumped to should be an on_failure_jump.
                    845:             Just treat it like an ordinary jump.
                    846:             For a * loop, it has pushed its failure point already;
                    847:             if so, discard that as redundant.  */
                    848:          if ((enum regexpcode) *p != on_failure_jump)
                    849:            continue;
                    850:          p++;
                    851:          j = *p++ & 0377;
                    852:          j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
                    853:          p += j + 1;           /* The 1 compensates for missing ++ above */
                    854:          if (stackp != stackb && *stackp == p)
                    855:            stackp--;
                    856:          continue;
                    857:          
                    858:        case on_failure_jump:
                    859:          j = *p++ & 0377;
                    860:          j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
                    861:          p++;
                    862:          *++stackp = p + j;
                    863:          continue;
                    864: 
                    865:        case start_memory:
                    866:        case stop_memory:
                    867:          p++;
                    868:          continue;
                    869: 
                    870:        case duplicate:
                    871:          bufp->can_be_null = 1;
                    872:          fastmap['\n'] = 1;
                    873:        case anychar:
                    874:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                    875:            if (j != '\n')
                    876:              fastmap[j] = 1;
                    877:          if (bufp->can_be_null)
                    878:            return;
                    879:          /* Don't return; check the alternative paths
                    880:             so we can set can_be_null if appropriate.  */
                    881:          break;
                    882: 
                    883:        case wordchar:
                    884:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                    885:            if (SYNTAX (j) == Sword)
                    886:              fastmap[j] = 1;
                    887:          break;
                    888: 
                    889:        case notwordchar:
                    890:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                    891:            if (SYNTAX (j) != Sword)
                    892:              fastmap[j] = 1;
                    893:          break;
                    894: 
                    895: #ifdef emacs
                    896:        case syntaxspec:
                    897:          k = *p++;
                    898:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                    899:            if (SYNTAX (j) == (enum syntaxcode) k)
                    900:              fastmap[j] = 1;
                    901:          break;
                    902: 
                    903:        case notsyntaxspec:
                    904:          k = *p++;
                    905:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                    906:            if (SYNTAX (j) != (enum syntaxcode) k)
                    907:              fastmap[j] = 1;
                    908:          break;
                    909: #endif emacs
                    910: 
                    911:        case charset:
                    912:          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
                    913:            if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
                    914:              {
                    915:                if (translate)
                    916:                  fastmap[translate[j]] = 1;
                    917:                else
                    918:                  fastmap[j] = 1;
                    919:              }
                    920:          break;
                    921: 
                    922:        case charset_not:
                    923:          /* Chars beyond end of map must be allowed */
                    924:          for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
                    925:            if (translate)
                    926:              fastmap[translate[j]] = 1;
                    927:            else
                    928:              fastmap[j] = 1;
                    929: 
                    930:          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
                    931:            if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
                    932:              {
                    933:                if (translate)
                    934:                  fastmap[translate[j]] = 1;
                    935:                else
                    936:                  fastmap[j] = 1;
                    937:              }
                    938:          break;
                    939:        }
                    940: 
                    941:       /* Get here means we have successfully found the possible starting characters
                    942:         of one path of the pattern.  We need not follow this path any farther.
                    943:         Instead, look at the next alternative remembered in the stack. */
                    944:       if (stackp != stackb)
                    945:        p = *stackp--;
                    946:       else
                    947:        break;
                    948:     }
                    949: }
                    950: 
                    951: /* Like re_search_2, below, but only one string is specified. */
                    952: 
                    953: int
                    954: re_search (pbufp, string, size, startpos, range, regs)
                    955:      struct re_pattern_buffer *pbufp;
                    956:      char *string;
                    957:      int size, startpos, range;
                    958:      struct re_registers *regs;
                    959: {
                    960:   return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
                    961: }
                    962: 
                    963: /* Like re_match_2 but tries first a match starting at index STARTPOS,
                    964:    then at STARTPOS + 1, and so on.
                    965:    RANGE is the number of places to try before giving up.
                    966:    If RANGE is negative, the starting positions tried are
                    967:     STARTPOS, STARTPOS - 1, etc.
                    968:    It is up to the caller to make sure that range is not so large
                    969:    as to take the starting position outside of the input strings.
                    970: 
                    971: The value returned is the position at which the match was found,
                    972:  or -1 if no match was found,
                    973:  or -2 if error (such as failure stack overflow).  */
                    974: 
                    975: int
                    976: re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
                    977:      struct re_pattern_buffer *pbufp;
                    978:      char *string1, *string2;
                    979:      int size1, size2;
                    980:      int startpos;
                    981:      register int range;
                    982:      struct re_registers *regs;
                    983:      int mstop;
                    984: {
                    985:   register char *fastmap = pbufp->fastmap;
                    986:   register unsigned char *translate = (unsigned char *) pbufp->translate;
                    987:   int total = size1 + size2;
                    988:   int val;
                    989: 
                    990:   /* Update the fastmap now if not correct already */
                    991:   if (fastmap && !pbufp->fastmap_accurate)
                    992:     re_compile_fastmap (pbufp);
                    993:   
                    994:   /* Don't waste time in a long search for a pattern
                    995:      that says it is anchored.  */
                    996:   if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
                    997:       && range > 0)
                    998:     {
                    999:       if (startpos > 0)
                   1000:        return -1;
                   1001:       else
                   1002:        range = 1;
                   1003:     }
                   1004: 
                   1005:   while (1)
                   1006:     {
                   1007:       /* If a fastmap is supplied, skip quickly over characters
                   1008:         that cannot possibly be the start of a match.
                   1009:         Note, however, that if the pattern can possibly match
                   1010:         the null string, we must test it at each starting point
                   1011:         so that we take the first null string we get.  */
                   1012: 
                   1013:       if (fastmap && startpos < total && pbufp->can_be_null != 1)
                   1014:        {
                   1015:          if (range > 0)
                   1016:            {
                   1017:              register int lim = 0;
                   1018:              register unsigned char *p;
                   1019:              int irange = range;
                   1020:              if (startpos < size1 && startpos + range >= size1)
                   1021:                lim = range - (size1 - startpos);
                   1022: 
                   1023:              p = ((unsigned char *)
                   1024:                   &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
                   1025: 
                   1026:              if (translate)
                   1027:                {
                   1028:                  while (range > lim && !fastmap[translate[*p++]])
                   1029:                    range--;
                   1030:                }
                   1031:              else
                   1032:                {
                   1033:                  while (range > lim && !fastmap[*p++])
                   1034:                    range--;
                   1035:                }
                   1036:              startpos += irange - range;
                   1037:            }
                   1038:          else
                   1039:            {
                   1040:              register unsigned char c;
                   1041:              if (startpos >= size1)
                   1042:                c = string2[startpos - size1];
                   1043:              else
                   1044:                c = string1[startpos];
                   1045:              c &= 0xff;
                   1046:              if (translate ? !fastmap[translate[c]] : !fastmap[c])
                   1047:                goto advance;
                   1048:            }
                   1049:        }
                   1050: 
                   1051:       if (range >= 0 && startpos == total
                   1052:          && fastmap && pbufp->can_be_null == 0)
                   1053:        return -1;
                   1054: 
                   1055:       val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop);
                   1056:       if (0 <= val)
                   1057:        {
                   1058:          if (val == -2)
                   1059:            return -2;
                   1060:          return startpos;
                   1061:        }
                   1062: 
                   1063: #ifdef C_ALLOCA
                   1064:       alloca (0);
                   1065: #endif /* C_ALLOCA */
                   1066: 
                   1067:     advance:
                   1068:       if (!range) break;
                   1069:       if (range > 0) range--, startpos++; else range++, startpos--;
                   1070:     }
                   1071:   return -1;
                   1072: }
                   1073: 
                   1074: #ifndef emacs   /* emacs never uses this */
                   1075: int
                   1076: re_match (pbufp, string, size, pos, regs)
                   1077:      struct re_pattern_buffer *pbufp;
                   1078:      char *string;
                   1079:      int size, pos;
                   1080:      struct re_registers *regs;
                   1081: {
                   1082:   return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
                   1083: }
                   1084: #endif /* emacs */
                   1085: 
                   1086: /* Maximum size of failure stack.  Beyond this, overflow is an error.  */
                   1087: 
                   1088: int re_max_failures = 2000;
                   1089: 
                   1090: static int bcmp_translate();
                   1091: /* Match the pattern described by PBUFP
                   1092:    against data which is the virtual concatenation of STRING1 and STRING2.
                   1093:    SIZE1 and SIZE2 are the sizes of the two data strings.
                   1094:    Start the match at position POS.
                   1095:    Do not consider matching past the position MSTOP.
                   1096: 
                   1097:    If pbufp->fastmap is nonzero, then it had better be up to date.
                   1098: 
                   1099:    The reason that the data to match are specified as two components
                   1100:    which are to be regarded as concatenated
                   1101:    is so this function can be used directly on the contents of an Emacs buffer.
                   1102: 
                   1103:    -1 is returned if there is no match.  -2 is returned if there is
                   1104:    an error (such as match stack overflow).  Otherwise the value is the length
                   1105:    of the substring which was matched.  */
                   1106: 
                   1107: int
                   1108: re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
                   1109:      struct re_pattern_buffer *pbufp;
                   1110:      unsigned char *string1, *string2;
                   1111:      int size1, size2;
                   1112:      int pos;
                   1113:      struct re_registers *regs;
                   1114:      int mstop;
                   1115: {
                   1116:   register unsigned char *p = (unsigned char *) pbufp->buffer;
                   1117:   register unsigned char *pend = p + pbufp->used;
                   1118:   /* End of first string */
                   1119:   unsigned char *end1;
                   1120:   /* End of second string */
                   1121:   unsigned char *end2;
                   1122:   /* Pointer just past last char to consider matching */
                   1123:   unsigned char *end_match_1, *end_match_2;
                   1124:   register unsigned char *d, *dend;
                   1125:   register int mcnt;
                   1126:   unsigned char *translate = (unsigned char *) pbufp->translate;
                   1127: 
                   1128:  /* Failure point stack.  Each place that can handle a failure further down the line
                   1129:     pushes a failure point on this stack.  It consists of two char *'s.
                   1130:     The first one pushed is where to resume scanning the pattern;
                   1131:     the second pushed is where to resume scanning the strings.
                   1132:     If the latter is zero, the failure point is a "dummy".
                   1133:     If a failure happens and the innermost failure point is dormant,
                   1134:     it discards that failure point and tries the next one. */
                   1135: 
                   1136:   unsigned char *initial_stack[2 * NFAILURES];
                   1137:   unsigned char **stackb = initial_stack;
                   1138:   unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
                   1139: 
                   1140:   /* Information on the "contents" of registers.
                   1141:      These are pointers into the input strings; they record
                   1142:      just what was matched (on this attempt) by some part of the pattern.
                   1143:      The start_memory command stores the start of a register's contents
                   1144:      and the stop_memory command stores the end.
                   1145: 
                   1146:      At that point, regstart[regnum] points to the first character in the register,
                   1147:      regend[regnum] points to the first character beyond the end of the register,
                   1148:      regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
                   1149:      and regend_seg1[regnum] is true iff regend[regnum] points into string1.  */
                   1150: 
                   1151:   unsigned char *regstart[RE_NREGS];
                   1152:   unsigned char *regend[RE_NREGS];
                   1153:   unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
                   1154: 
                   1155:   /* Set up pointers to ends of strings.
                   1156:      Don't allow the second string to be empty unless both are empty.  */
                   1157:   if (!size2)
                   1158:     {
                   1159:       string2 = string1;
                   1160:       size2 = size1;
                   1161:       string1 = 0;
                   1162:       size1 = 0;
                   1163:     }
                   1164:   end1 = string1 + size1;
                   1165:   end2 = string2 + size2;
                   1166: 
                   1167:   /* Compute where to stop matching, within the two strings */
                   1168:   if (mstop <= size1)
                   1169:     {
                   1170:       end_match_1 = string1 + mstop;
                   1171:       end_match_2 = string2;
                   1172:     }
                   1173:   else
                   1174:     {
                   1175:       end_match_1 = end1;
                   1176:       end_match_2 = string2 + mstop - size1;
                   1177:     }
                   1178: 
                   1179:   /* Initialize \) text positions to -1
                   1180:      to mark ones that no \( or \) has been seen for.  */
                   1181: 
                   1182:   for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++)
                   1183:     regend[mcnt] = (unsigned char *) -1;
                   1184: 
                   1185:   /* `p' scans through the pattern as `d' scans through the data.
                   1186:      `dend' is the end of the input string that `d' points within.
                   1187:      `d' is advanced into the following input string whenever necessary,
                   1188:      but this happens before fetching;
                   1189:      therefore, at the beginning of the loop,
                   1190:      `d' can be pointing at the end of a string,
                   1191:      but it cannot equal string2.  */
                   1192: 
                   1193:   if (pos <= size1)
                   1194:     d = string1 + pos, dend = end_match_1;
                   1195:   else
                   1196:     d = string2 + pos - size1, dend = end_match_2;
                   1197: 
                   1198: /* Write PREFETCH; just before fetching a character with *d.  */
                   1199: #define PREFETCH \
                   1200:  while (d == dend)                                                 \
                   1201:   { if (dend == end_match_2) goto fail;  /* end of string2 => failure */   \
                   1202:     d = string2;  /* end of string1 => advance to string2. */       \
                   1203:     dend = end_match_2; }
                   1204: 
                   1205:   /* This loop loops over pattern commands.
                   1206:      It exits by returning from the function if match is complete,
                   1207:      or it drops through if match fails at this starting point in the input data. */
                   1208: 
                   1209:   while (1)
                   1210:     {
                   1211:       if (p == pend)
                   1212:        /* End of pattern means we have succeeded! */
                   1213:        {
                   1214:          /* If caller wants register contents data back, convert it to indices */
                   1215:          if (regs)
                   1216:            {
                   1217:              regs->start[0] = pos;
                   1218:              if (dend == end_match_1)
                   1219:                regs->end[0] = d - string1;
                   1220:              else
                   1221:                regs->end[0] = d - string2 + size1;
                   1222:              for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
                   1223:                {
                   1224:                  if (regend[mcnt] == (unsigned char *) -1)
                   1225:                    {
                   1226:                      regs->start[mcnt] = -1;
                   1227:                      regs->end[mcnt] = -1;
                   1228:                      continue;
                   1229:                    }
                   1230:                  if (regstart_seg1[mcnt])
                   1231:                    regs->start[mcnt] = regstart[mcnt] - string1;
                   1232:                  else
                   1233:                    regs->start[mcnt] = regstart[mcnt] - string2 + size1;
                   1234:                  if (regend_seg1[mcnt])
                   1235:                    regs->end[mcnt] = regend[mcnt] - string1;
                   1236:                  else
                   1237:                    regs->end[mcnt] = regend[mcnt] - string2 + size1;
                   1238:                }
                   1239:            }
                   1240:          if (dend == end_match_1)
                   1241:            return (d - string1 - pos);
                   1242:          else
                   1243:            return d - string2 + size1 - pos;
                   1244:        }
                   1245: 
                   1246:       /* Otherwise match next pattern command */
                   1247: #ifdef SWITCH_ENUM_BUG
                   1248:       switch ((int) ((enum regexpcode) *p++))
                   1249: #else
                   1250:       switch ((enum regexpcode) *p++)
                   1251: #endif
                   1252:        {
                   1253: 
                   1254:        /* \( is represented by a start_memory, \) by a stop_memory.
                   1255:            Both of those commands contain a "register number" argument.
                   1256:            The text matched within the \( and \) is recorded under that number.
                   1257:            Then, \<digit> turns into a `duplicate' command which
                   1258:            is followed by the numeric value of <digit> as the register number. */
                   1259: 
                   1260:        case start_memory:
                   1261:          regstart[*p] = d;
                   1262:          regstart_seg1[*p++] = (dend == end_match_1);
                   1263:          break;
                   1264: 
                   1265:        case stop_memory:
                   1266:          regend[*p] = d;
                   1267:          regend_seg1[*p++] = (dend == end_match_1);
                   1268:          break;
                   1269: 
                   1270:        case duplicate:
                   1271:          {
                   1272:            int regno = *p++;   /* Get which register to match against */
                   1273:            register unsigned char *d2, *dend2;
                   1274: 
                   1275:            d2 = regstart[regno];
                   1276:            dend2 = ((regstart_seg1[regno] == regend_seg1[regno])
                   1277:                     ? regend[regno] : end_match_1);
                   1278:            while (1)
                   1279:              {
                   1280:                /* Advance to next segment in register contents, if necessary */
                   1281:                while (d2 == dend2)
                   1282:                  {
                   1283:                    if (dend2 == end_match_2) break;
                   1284:                    if (dend2 == regend[regno]) break;
                   1285:                    d2 = string2, dend2 = regend[regno];  /* end of string1 => advance to string2. */
                   1286:                  }
                   1287:                /* At end of register contents => success */
                   1288:                if (d2 == dend2) break;
                   1289: 
                   1290:                /* Advance to next segment in data being matched, if necessary */
                   1291:                PREFETCH;
                   1292: 
                   1293:                /* mcnt gets # consecutive chars to compare */
                   1294:                mcnt = dend - d;
                   1295:                if (mcnt > dend2 - d2)
                   1296:                  mcnt = dend2 - d2;
                   1297:                /* Compare that many; failure if mismatch, else skip them. */
                   1298:                if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
                   1299:                  goto fail;
                   1300:                d += mcnt, d2 += mcnt;
                   1301:              }
                   1302:          }
                   1303:          break;
                   1304: 
                   1305:        case anychar:
                   1306:          /* fetch a data character */
                   1307:          PREFETCH;
                   1308:          /* Match anything but a newline.  */
                   1309:          if ((translate ? translate[*d++] : *d++) == '\n')
                   1310:            goto fail;
                   1311:          break;
                   1312: 
                   1313:        case charset:
                   1314:        case charset_not:
                   1315:          {
                   1316:            /* Nonzero for charset_not */
                   1317:            int not = 0;
                   1318:            register int c;
                   1319:            if (*(p - 1) == (unsigned char) charset_not)
                   1320:              not = 1;
                   1321: 
                   1322:            /* fetch a data character */
                   1323:            PREFETCH;
                   1324: 
                   1325:            if (translate)
                   1326:              c = translate [*d];
                   1327:            else
                   1328:              c = *d;
                   1329: 
                   1330:            if (c < *p * BYTEWIDTH
                   1331:                && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
                   1332:              not = !not;
                   1333: 
                   1334:            p += 1 + *p;
                   1335: 
                   1336:            if (!not) goto fail;
                   1337:            d++;
                   1338:            break;
                   1339:          }
                   1340: 
                   1341:        case begline:
                   1342:          if (d == string1 || d[-1] == '\n')
                   1343:            break;
                   1344:          goto fail;
                   1345: 
                   1346:        case endline:
                   1347:          if (d == end2
                   1348:              || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
                   1349:            break;
                   1350:          goto fail;
                   1351: 
                   1352:        /* "or" constructs ("|") are handled by starting each alternative
                   1353:            with an on_failure_jump that points to the start of the next alternative.
                   1354:            Each alternative except the last ends with a jump to the joining point.
                   1355:            (Actually, each jump except for the last one really jumps
                   1356:             to the following jump, because tensioning the jumps is a hassle.) */
                   1357: 
                   1358:        /* The start of a stupid repeat has an on_failure_jump that points
                   1359:           past the end of the repeat text.
                   1360:           This makes a failure point so that, on failure to match a repetition,
                   1361:           matching restarts past as many repetitions have been found
                   1362:           with no way to fail and look for another one.  */
                   1363: 
                   1364:        /* A smart repeat is similar but loops back to the on_failure_jump
                   1365:           so that each repetition makes another failure point. */
                   1366: 
                   1367:        case on_failure_jump:
                   1368:          if (stackp == stacke)
                   1369:            {
                   1370:              unsigned char **stackx;
                   1371:              if (stacke - stackb > re_max_failures)
                   1372:                return -2;
                   1373:              stackx = (unsigned char **) alloca (2 * (stacke - stackb)
                   1374:                                         * sizeof (char *));
                   1375:              bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
                   1376:              stackp = stackx + (stackp - stackb);
                   1377:              stacke = stackx + 2 * (stacke - stackb);
                   1378:              stackb = stackx;
                   1379:            }
                   1380:          mcnt = *p++ & 0377;
                   1381:          mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
                   1382:          p++;
                   1383:          *stackp++ = mcnt + p;
                   1384:          *stackp++ = d;
                   1385:          break;
                   1386: 
                   1387:        /* The end of a smart repeat has an maybe_finalize_jump back.
                   1388:           Change it either to a finalize_jump or an ordinary jump. */
                   1389: 
                   1390:        case maybe_finalize_jump:
                   1391:          mcnt = *p++ & 0377;
                   1392:          mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
                   1393:          p++;
                   1394:          /* Compare what follows with the begining of the repeat.
                   1395:             If we can establish that there is nothing that they would
                   1396:             both match, we can change to finalize_jump */
                   1397:          if (p == pend)
                   1398:            p[-3] = (unsigned char) finalize_jump;
                   1399:          else if (*p == (unsigned char) exactn
                   1400:                   || *p == (unsigned char) endline)
                   1401:            {
                   1402:              register int c = *p == (unsigned char) endline ? '\n' : p[2];
                   1403:              register unsigned char *p1 = p + mcnt;
                   1404:              /* p1[0] ... p1[2] are an on_failure_jump.
                   1405:                 Examine what follows that */
                   1406:              if (p1[3] == (unsigned char) exactn && p1[5] != c)
                   1407:                p[-3] = (unsigned char) finalize_jump;
                   1408:              else if (p1[3] == (unsigned char) charset
                   1409:                       || p1[3] == (unsigned char) charset_not)
                   1410:                {
                   1411:                  int not = p1[3] == (unsigned char) charset_not;
                   1412:                  if (c < p1[4] * BYTEWIDTH
                   1413:                      && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
                   1414:                    not = !not;
                   1415:                  /* not is 1 if c would match */
                   1416:                  /* That means it is not safe to finalize */
                   1417:                  if (!not)
                   1418:                    p[-3] = (unsigned char) finalize_jump;
                   1419:                }
                   1420:            }
                   1421:          p -= 2;
                   1422:          if (p[-1] != (unsigned char) finalize_jump)
                   1423:            {
                   1424:              p[-1] = (unsigned char) jump;
                   1425:              goto nofinalize;
                   1426:            }
                   1427: 
                   1428:        /* The end of a stupid repeat has a finalize-jump
                   1429:           back to the start, where another failure point will be made
                   1430:           which will point after all the repetitions found so far. */
                   1431: 
                   1432:        case finalize_jump:
                   1433:          stackp -= 2;
                   1434: 
                   1435:        case jump:
                   1436:        nofinalize:
                   1437:          mcnt = *p++ & 0377;
                   1438:          mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
                   1439:          p += mcnt + 1;        /* The 1 compensates for missing ++ above */
                   1440:          break;
                   1441: 
                   1442:        case dummy_failure_jump:
                   1443:          if (stackp == stacke)
                   1444:            {
                   1445:              unsigned char **stackx
                   1446:                = (unsigned char **) alloca (2 * (stacke - stackb)
                   1447:                                             * sizeof (char *));
                   1448:              bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
                   1449:              stackp = stackx + (stackp - stackb);
                   1450:              stacke = stackx + 2 * (stacke - stackb);
                   1451:              stackb = stackx;
                   1452:            }
                   1453:          *stackp++ = 0;
                   1454:          *stackp++ = 0;
                   1455:          goto nofinalize;
                   1456: 
                   1457:        case wordbound:
                   1458:          if (d == string1  /* Points to first char */
                   1459:              || d == end2  /* Points to end */
                   1460:              || (d == end1 && size2 == 0)) /* Points to end */
                   1461:            break;
                   1462:          if ((SYNTAX (d[-1]) == Sword)
                   1463:              != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
                   1464:            break;
                   1465:          goto fail;
                   1466: 
                   1467:        case notwordbound:
                   1468:          if (d == string1  /* Points to first char */
                   1469:              || d == end2  /* Points to end */
                   1470:              || (d == end1 && size2 == 0)) /* Points to end */
                   1471:            goto fail;
                   1472:          if ((SYNTAX (d[-1]) == Sword)
                   1473:              != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
                   1474:            goto fail;
                   1475:          break;
                   1476: 
                   1477:        case wordbeg:
                   1478:          if (d == end2  /* Points to end */
                   1479:              || (d == end1 && size2 == 0) /* Points to end */
                   1480:              || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
                   1481:            goto fail;
                   1482:          if (d == string1  /* Points to first char */
                   1483:              || SYNTAX (d[-1]) != Sword)  /* prev char not letter */
                   1484:            break;
                   1485:          goto fail;
                   1486: 
                   1487:        case wordend:
                   1488:          if (d == string1  /* Points to first char */
                   1489:              || SYNTAX (d[-1]) != Sword)  /* prev char not letter */
                   1490:            goto fail;
                   1491:          if (d == end2  /* Points to end */
                   1492:              || (d == end1 && size2 == 0) /* Points to end */
                   1493:              || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
                   1494:            break;
                   1495:          goto fail;
                   1496: 
                   1497: #ifdef emacs
                   1498:        case before_dot:
                   1499:          if (((d - string2 <= (unsigned) size2)
                   1500:               ? d - bf_p2 : d - bf_p1)
                   1501:              <= point)
                   1502:            goto fail;
                   1503:          break;
                   1504: 
                   1505:        case at_dot:
                   1506:          if (((d - string2 <= (unsigned) size2)
                   1507:               ? d - bf_p2 : d - bf_p1)
                   1508:              == point)
                   1509:            goto fail;
                   1510:          break;
                   1511: 
                   1512:        case after_dot:
                   1513:          if (((d - string2 <= (unsigned) size2)
                   1514:               ? d - bf_p2 : d - bf_p1)
                   1515:              >= point)
                   1516:            goto fail;
                   1517:          break;
                   1518: 
                   1519:        case wordchar:
                   1520:          mcnt = (int) Sword;
                   1521:          goto matchsyntax;
                   1522: 
                   1523:        case syntaxspec:
                   1524:          mcnt = *p++;
                   1525:        matchsyntax:
                   1526:          PREFETCH;
                   1527:          if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
                   1528:          break;
                   1529:          
                   1530:        case notwordchar:
                   1531:          mcnt = (int) Sword;
                   1532:          goto matchnotsyntax;
                   1533: 
                   1534:        case notsyntaxspec:
                   1535:          mcnt = *p++;
                   1536:        matchnotsyntax:
                   1537:          PREFETCH;
                   1538:          if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
                   1539:          break;
                   1540: #else
                   1541:        case wordchar:
                   1542:          PREFETCH;
                   1543:          if (SYNTAX (*d++) == 0) goto fail;
                   1544:          break;
                   1545:          
                   1546:        case notwordchar:
                   1547:          PREFETCH;
                   1548:          if (SYNTAX (*d++) != 0) goto fail;
                   1549:          break;
                   1550: #endif not emacs
                   1551: 
                   1552:        case begbuf:
                   1553:          if (d == string1)     /* Note, d cannot equal string2 */
                   1554:            break;              /* unless string1 == string2.  */
                   1555:          goto fail;
                   1556: 
                   1557:        case endbuf:
                   1558:          if (d == end2 || (d == end1 && size2 == 0))
                   1559:            break;
                   1560:          goto fail;
                   1561: 
                   1562:        case exactn:
                   1563:          /* Match the next few pattern characters exactly.
                   1564:             mcnt is how many characters to match. */
                   1565:          mcnt = *p++;
                   1566:          if (translate)
                   1567:            {
                   1568:              do
                   1569:                {
                   1570:                  PREFETCH;
                   1571:                  if (translate[*d++] != *p++) goto fail;
                   1572:                }
                   1573:              while (--mcnt);
                   1574:            }
                   1575:          else
                   1576:            {
                   1577:              do
                   1578:                {
                   1579:                  PREFETCH;
                   1580:                  if (*d++ != *p++) goto fail;
                   1581:                }
                   1582:              while (--mcnt);
                   1583:            }
                   1584:          break;
                   1585:        }
                   1586:       continue;    /* Successfully matched one pattern command; keep matching */
                   1587: 
                   1588:       /* Jump here if any matching operation fails. */
                   1589:     fail:
                   1590:       if (stackp != stackb)
                   1591:        /* A restart point is known.  Restart there and pop it. */
                   1592:        {
                   1593:          if (!stackp[-2])
                   1594:            {   /* If innermost failure point is dormant, flush it and keep looking */
                   1595:              stackp -= 2;
                   1596:              goto fail;
                   1597:            }
                   1598:          d = *--stackp;
                   1599:          p = *--stackp;
                   1600:          if (d >= string1 && d <= end1)
                   1601:            dend = end_match_1;
                   1602:        }
                   1603:       else break;   /* Matching at this starting point really fails! */
                   1604:     }
                   1605:   return -1;         /* Failure to match */
                   1606: }
                   1607: 
                   1608: static int
                   1609: bcmp_translate (s1, s2, len, translate)
                   1610:      unsigned char *s1, *s2;
                   1611:      register int len;
                   1612:      unsigned char *translate;
                   1613: {
                   1614:   register unsigned char *p1 = s1, *p2 = s2;
                   1615:   while (len)
                   1616:     {
                   1617:       if (translate [*p1++] != translate [*p2++]) return 1;
                   1618:       len--;
                   1619:     }
                   1620:   return 0;
                   1621: }
                   1622: 
                   1623: /* Entry points compatible with bsd4.2 regex library */
                   1624: 
                   1625: #ifndef emacs
                   1626: 
                   1627: static struct re_pattern_buffer re_comp_buf;
                   1628: 
                   1629: char *
                   1630: re_comp (s)
                   1631:      char *s;
                   1632: {
                   1633:   if (!s)
                   1634:     {
                   1635:       if (!re_comp_buf.buffer)
                   1636:        return "No previous regular expression";
                   1637:       return 0;
                   1638:     }
                   1639: 
                   1640:   if (!re_comp_buf.buffer)
                   1641:     {
                   1642:       if (!(re_comp_buf.buffer = (char *) malloc (200)))
                   1643:        return "Memory exhausted";
                   1644:       re_comp_buf.allocated = 200;
                   1645:       if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
                   1646:        return "Memory exhausted";
                   1647:     }
                   1648:   return re_compile_pattern (s, strlen (s), &re_comp_buf);
                   1649: }
                   1650: 
                   1651: int
                   1652: re_exec (s)
                   1653:      char *s;
                   1654: {
                   1655:   int len = strlen (s);
                   1656:   return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
                   1657: }
                   1658: 
                   1659: #endif /* emacs */
                   1660: 
                   1661: #ifdef test
                   1662: 
                   1663: #include <stdio.h>
                   1664: 
                   1665: /* Indexed by a character, gives the upper case equivalent of the character */
                   1666: 
                   1667: static char upcase[0400] = 
                   1668:   { 000, 001, 002, 003, 004, 005, 006, 007,
                   1669:     010, 011, 012, 013, 014, 015, 016, 017,
                   1670:     020, 021, 022, 023, 024, 025, 026, 027,
                   1671:     030, 031, 032, 033, 034, 035, 036, 037,
                   1672:     040, 041, 042, 043, 044, 045, 046, 047,
                   1673:     050, 051, 052, 053, 054, 055, 056, 057,
                   1674:     060, 061, 062, 063, 064, 065, 066, 067,
                   1675:     070, 071, 072, 073, 074, 075, 076, 077,
                   1676:     0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
                   1677:     0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
                   1678:     0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
                   1679:     0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
                   1680:     0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
                   1681:     0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
                   1682:     0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
                   1683:     0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
                   1684:     0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
                   1685:     0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
                   1686:     0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
                   1687:     0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
                   1688:     0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
                   1689:     0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
                   1690:     0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
                   1691:     0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
                   1692:     0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
                   1693:     0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
                   1694:     0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
                   1695:     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
                   1696:     0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
                   1697:     0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
                   1698:     0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
                   1699:     0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
                   1700:   };
                   1701: 
                   1702: main (argc, argv)
                   1703:      int argc;
                   1704:      char **argv;
                   1705: {
                   1706:   char pat[80];
                   1707:   struct re_pattern_buffer buf;
                   1708:   int i;
                   1709:   char c;
                   1710:   char fastmap[(1 << BYTEWIDTH)];
                   1711: 
                   1712:   /* Allow a command argument to specify the style of syntax.  */
                   1713:   if (argc > 1)
                   1714:     obscure_syntax = atoi (argv[1]);
                   1715: 
                   1716:   buf.allocated = 40;
                   1717:   buf.buffer = (char *) malloc (buf.allocated);
                   1718:   buf.fastmap = fastmap;
                   1719:   buf.translate = upcase;
                   1720: 
                   1721:   while (1)
                   1722:     {
                   1723:       gets (pat);
                   1724: 
                   1725:       if (*pat)
                   1726:        {
                   1727:           re_compile_pattern (pat, strlen(pat), &buf);
                   1728: 
                   1729:          for (i = 0; i < buf.used; i++)
                   1730:            printchar (buf.buffer[i]);
                   1731: 
                   1732:          putchar ('\n');
                   1733: 
                   1734:          printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
                   1735: 
                   1736:          re_compile_fastmap (&buf);
                   1737:          printf ("Allowed by fastmap: ");
                   1738:          for (i = 0; i < (1 << BYTEWIDTH); i++)
                   1739:            if (fastmap[i]) printchar (i);
                   1740:          putchar ('\n');
                   1741:        }
                   1742: 
                   1743:       gets (pat);      /* Now read the string to match against */
                   1744: 
                   1745:       i = re_match (&buf, pat, strlen (pat), 0, 0);
                   1746:       printf ("Match value %d.\n", i);
                   1747:     }
                   1748: }
                   1749: 
                   1750: #ifdef NOTDEF
                   1751: print_buf (bufp)
                   1752:      struct re_pattern_buffer *bufp;
                   1753: {
                   1754:   int i;
                   1755: 
                   1756:   printf ("buf is :\n----------------\n");
                   1757:   for (i = 0; i < bufp->used; i++)
                   1758:     printchar (bufp->buffer[i]);
                   1759:   
                   1760:   printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
                   1761:   
                   1762:   printf ("Allowed by fastmap: ");
                   1763:   for (i = 0; i < (1 << BYTEWIDTH); i++)
                   1764:     if (bufp->fastmap[i])
                   1765:       printchar (i);
                   1766:   printf ("\nAllowed by translate: ");
                   1767:   if (bufp->translate)
                   1768:     for (i = 0; i < (1 << BYTEWIDTH); i++)
                   1769:       if (bufp->translate[i])
                   1770:        printchar (i);
                   1771:   printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
                   1772:   printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
                   1773: }
                   1774: #endif
                   1775: 
                   1776: printchar (c)
                   1777:      char c;
                   1778: {
                   1779:   if (c < 041 || c >= 0177)
                   1780:     {
                   1781:       putchar ('\\');
                   1782:       putchar (((c >> 6) & 3) + '0');
                   1783:       putchar (((c >> 3) & 7) + '0');
                   1784:       putchar ((c & 7) + '0');
                   1785:     }
                   1786:   else
                   1787:     putchar (c);
                   1788: }
                   1789: 
                   1790: error (string)
                   1791:      char *string;
                   1792: {
                   1793:   puts (string);
                   1794:   exit (1);
                   1795: }
                   1796: 
                   1797: #endif test

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.