Annotation of GNUtools/emacs/src/regex.c, revision 1.1.1.1

1.1       root        1: /* Extended regular expression matching and search.
                      2:    Copyright (C) 1985 Free Software Foundation, Inc.
                      3: 
                      4:     This program is free software; you can redistribute it and/or modify
                      5:     it under the terms of the GNU General Public License as published by
                      6:     the Free Software Foundation; either version 1, or (at your option)
                      7:     any later version.
                      8: 
                      9:     This program is distributed in the hope that it will be useful,
                     10:     but WITHOUT ANY WARRANTY; without even the implied warranty of
                     11:     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     12:     GNU General Public License for more details.
                     13: 
                     14:     You should have received a copy of the GNU General Public License
                     15:     along with this program; if not, write to the Free Software
                     16:     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
                     17: 
                     18: In other words, you are welcome to use, share and improve this program.
                     19: You are forbidden to forbid anyone else to use, share and improve
                     20: what you give them.   Help stamp out software-hoarding!  */
                     21: 
                     22: 
                     23: /* To test, compile with -Dtest.
                     24:  This Dtestable feature turns this into a self-contained program
                     25:  which reads a pattern, describes how it compiles,
                     26:  then reads a string and searches for it.  */
                     27: 
                     28: 
                     29: #ifdef emacs
                     30: 
                     31: /* The `emacs' switch turns on certain special matching commands
                     32:  that make sense only in emacs. */
                     33: 
                     34: #include "config.h"
                     35: #include "lisp.h"
                     36: #include "buffer.h"
                     37: #include "syntax.h"
                     38: 
                     39: #else  /* not emacs */
                     40: 
                     41: /*
                     42:  * Define the syntax stuff, so we can do the \<...\> things.
                     43:  */
                     44: 
                     45: #ifndef Sword /* must be non-zero in some of the tests below... */
                     46: #define Sword 1
                     47: #endif
                     48: 
                     49: #define SYNTAX(c) re_syntax_table[c]
                     50: 
                     51: #ifdef SYNTAX_TABLE
                     52: 
                     53: char *re_syntax_table;
                     54: 
                     55: #else
                     56: 
                     57: static char re_syntax_table[256];
                     58: 
                     59: static void
                     60: init_syntax_once ()
                     61: {
                     62:    register int c;
                     63:    static int done = 0;
                     64: 
                     65:    if (done)
                     66:      return;
                     67: 
                     68:    bzero (re_syntax_table, sizeof re_syntax_table);
                     69: 
                     70:    for (c = 'a'; c <= 'z'; c++)
                     71:      re_syntax_table[c] = Sword;
                     72: 
                     73:    for (c = 'A'; c <= 'Z'; c++)
                     74:      re_syntax_table[c] = Sword;
                     75: 
                     76:    for (c = '0'; c <= '9'; c++)
                     77:      re_syntax_table[c] = Sword;
                     78: 
                     79:    done = 1;
                     80: }
                     81: 
                     82: #endif /* SYNTAX_TABLE */
                     83: #endif /* not emacs */
                     84: 
                     85: #include "regex.h"
                     86: 
                     87: /* Number of failure points to allocate space for initially,
                     88:  when matching.  If this number is exceeded, more space is allocated,
                     89:  so it is not a hard limit.  */
                     90: 
                     91: #ifndef NFAILURES
                     92: #define NFAILURES 80
                     93: #endif NFAILURES
                     94: 
                     95: /* width of a byte in bits */
                     96: 
                     97: #define BYTEWIDTH 8
                     98: 
                     99: #ifndef SIGN_EXTEND_CHAR
                    100: #define SIGN_EXTEND_CHAR(x) (x)
                    101: #endif
                    102: 
                    103: static int obscure_syntax = 0;
                    104: 
                    105: /* Specify the precise syntax of regexp for compilation.
                    106:    This provides for compatibility for various utilities
                    107:    which historically have different, incompatible syntaxes.
                    108: 
                    109:    The argument SYNTAX is a bit-mask containing the two bits
                    110:    RE_NO_BK_PARENS and RE_NO_BK_VBAR.  */
                    111: 
                    112: int
                    113: re_set_syntax (syntax)
                    114: {
                    115:   int ret;
                    116: 
                    117:   ret = obscure_syntax;
                    118:   obscure_syntax = syntax;
                    119:   return ret;
                    120: }
                    121: 
                    122: /* re_compile_pattern takes a regular-expression string
                    123:    and converts it into a buffer full of byte commands for matching.
                    124: 
                    125:   PATTERN   is the address of the pattern string
                    126:   SIZE      is the length of it.
                    127:   BUFP     is a  struct re_pattern_buffer *  which points to the info
                    128:            on where to store the byte commands.
                    129:            This structure contains a  char *  which points to the
                    130:            actual space, which should have been obtained with malloc.
                    131:            re_compile_pattern may use  realloc  to grow the buffer space.
                    132: 
                    133:   The number of bytes of commands can be found out by looking in
                    134:   the  struct re_pattern_buffer  that bufp pointed to,
                    135:   after re_compile_pattern returns.
                    136: */
                    137: 
                    138: #define PATPUSH(ch) (*b++ = (char) (ch))
                    139: 
                    140: #define PATFETCH(c) \
                    141:  {if (p == pend) goto end_of_pattern; \
                    142:   c = * (unsigned char *) p++; \
                    143:   if (translate) c = translate[c]; }
                    144: 
                    145: #define PATFETCH_RAW(c) \
                    146:  {if (p == pend) goto end_of_pattern; \
                    147:   c = * (unsigned char *) p++; }
                    148: 
                    149: #define PATUNFETCH p--
                    150: 
                    151: #define EXTEND_BUFFER \
                    152:   { char *old_buffer = bufp->buffer; \
                    153:     if (bufp->allocated == (1<<16)) goto too_big; \
                    154:     bufp->allocated *= 2; \
                    155:     if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
                    156:     if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
                    157:       goto memory_exhausted; \
                    158:     c = bufp->buffer - old_buffer; \
                    159:     b += c; \
                    160:     if (fixup_jump) \
                    161:       fixup_jump += c; \
                    162:     if (laststart) \
                    163:       laststart += c; \
                    164:     begalt += c; \
                    165:     if (pending_exact) \
                    166:       pending_exact += c; \
                    167:   }
                    168: 
                    169: static int store_jump (), insert_jump ();
                    170: 
                    171: char *
                    172: re_compile_pattern (pattern, size, bufp)
                    173:      char *pattern;
                    174:      int size;
                    175:      struct re_pattern_buffer *bufp;
                    176: {
                    177:   register char *b = bufp->buffer;
                    178:   register char *p = pattern;
                    179:   char *pend = pattern + size;
                    180:   register unsigned c, c1;
                    181:   char *p1;
                    182:   unsigned char *translate = (unsigned char *) bufp->translate;
                    183: 
                    184:   /* address of the count-byte of the most recently inserted "exactn" command.
                    185:     This makes it possible to tell whether a new exact-match character
                    186:     can be added to that command or requires a new "exactn" command. */
                    187:      
                    188:   char *pending_exact = 0;
                    189: 
                    190:   /* address of the place where a forward-jump should go
                    191:     to the end of the containing expression.
                    192:     Each alternative of an "or", except the last, ends with a forward-jump
                    193:     of this sort. */
                    194: 
                    195:   char *fixup_jump = 0;
                    196: 
                    197:   /* address of start of the most recently finished expression.
                    198:     This tells postfix * where to find the start of its operand. */
                    199: 
                    200:   char *laststart = 0;
                    201: 
                    202:   /* In processing a repeat, 1 means zero matches is allowed */
                    203: 
                    204:   char zero_times_ok;
                    205: 
                    206:   /* In processing a repeat, 1 means many matches is allowed */
                    207: 
                    208:   char many_times_ok;
                    209: 
                    210:   /* address of beginning of regexp, or inside of last \( */
                    211: 
                    212:   char *begalt = b;
                    213: 
                    214:   /* Stack of information saved by \( and restored by \).
                    215:      Four stack elements are pushed by each \(:
                    216:        First, the value of b.
                    217:        Second, the value of fixup_jump.
                    218:        Third, the value of regnum.
                    219:        Fourth, the value of begalt.  */
                    220: 
                    221:   int stackb[40];
                    222:   int *stackp = stackb;
                    223:   int *stacke = stackb + 40;
                    224:   int *stackt;
                    225: 
                    226:   /* Counts \('s as they are encountered.  Remembered for the matching \),
                    227:      where it becomes the "register number" to put in the stop_memory command */
                    228: 
                    229:   int regnum = 1;
                    230: 
                    231:   bufp->fastmap_accurate = 0;
                    232: 
                    233: #ifndef emacs
                    234: #ifndef SYNTAX_TABLE
                    235:   /*
                    236:    * Initialize the syntax table.
                    237:    */
                    238:    init_syntax_once();
                    239: #endif
                    240: #endif
                    241: 
                    242:   if (bufp->allocated == 0)
                    243:     {
                    244:       bufp->allocated = 28;
                    245:       if (bufp->buffer)
                    246:        /* EXTEND_BUFFER loses when bufp->allocated is 0 */
                    247:        bufp->buffer = (char *) realloc (bufp->buffer, 28);
                    248:       else
                    249:        /* Caller did not allocate a buffer.  Do it for him */
                    250:        bufp->buffer = (char *) malloc (28);
                    251:       if (!bufp->buffer) goto memory_exhausted;
                    252:       begalt = b = bufp->buffer;
                    253:     }
                    254: 
                    255:   while (p != pend)
                    256:     {
                    257:       if (b - bufp->buffer > bufp->allocated - 10)
                    258:        /* Note that EXTEND_BUFFER clobbers c */
                    259:        EXTEND_BUFFER;
                    260: 
                    261:       PATFETCH (c);
                    262: 
                    263:       switch (c)
                    264:        {
                    265:        case '$':
                    266:          if (obscure_syntax & RE_TIGHT_VBAR)
                    267:            {
                    268:              if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)
                    269:                goto normal_char;
                    270:              /* Make operand of last vbar end before this `$'.  */
                    271:              if (fixup_jump)
                    272:                store_jump (fixup_jump, jump, b);
                    273:              fixup_jump = 0;
                    274:              PATPUSH (endline);
                    275:              break;
                    276:            }
                    277: 
                    278:          /* $ means succeed if at end of line, but only in special contexts.
                    279:            If randomly in the middle of a pattern, it is a normal character. */
                    280:          if (p == pend || *p == '\n'
                    281:              || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
                    282:              || (obscure_syntax & RE_NO_BK_PARENS
                    283:                  ? *p == ')'
                    284:                  : *p == '\\' && p[1] == ')')
                    285:              || (obscure_syntax & RE_NO_BK_VBAR
                    286:                  ? *p == '|'
                    287:                  : *p == '\\' && p[1] == '|'))
                    288:            {
                    289:              PATPUSH (endline);
                    290:              break;
                    291:            }
                    292:          goto normal_char;
                    293: 
                    294:        case '^':
                    295:          /* ^ means succeed if at beg of line, but only if no preceding pattern. */
                    296: 
                    297:          if (laststart && p[-2] != '\n'
                    298:              && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
                    299:            goto normal_char;
                    300:          if (obscure_syntax & RE_TIGHT_VBAR)
                    301:            {
                    302:              if (p != pattern + 1
                    303:                  && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
                    304:                goto normal_char;
                    305:              PATPUSH (begline);
                    306:              begalt = b;
                    307:            }
                    308:          else
                    309:            PATPUSH (begline);
                    310:          break;
                    311: 
                    312:        case '+':
                    313:        case '?':
                    314:          if (obscure_syntax & RE_BK_PLUS_QM)
                    315:            goto normal_char;
                    316:        handle_plus:
                    317:        case '*':
                    318:          /* If there is no previous pattern, char not special. */
                    319:          if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
                    320:            goto normal_char;
                    321:          /* If there is a sequence of repetition chars,
                    322:             collapse it down to equivalent to just one.  */
                    323:          zero_times_ok = 0;
                    324:          many_times_ok = 0;
                    325:          while (1)
                    326:            {
                    327:              zero_times_ok |= c != '+';
                    328:              many_times_ok |= c != '?';
                    329:              if (p == pend)
                    330:                break;
                    331:              PATFETCH (c);
                    332:              if (c == '*')
                    333:                ;
                    334:              else if (!(obscure_syntax & RE_BK_PLUS_QM)
                    335:                       && (c == '+' || c == '?'))
                    336:                ;
                    337:              else if ((obscure_syntax & RE_BK_PLUS_QM)
                    338:                       && c == '\\')
                    339:                {
                    340:                  int c1;
                    341:                  PATFETCH (c1);
                    342:                  if (!(c1 == '+' || c1 == '?'))
                    343:                    {
                    344:                      PATUNFETCH;
                    345:                      PATUNFETCH;
                    346:                      break;
                    347:                    }
                    348:                  c = c1;
                    349:                }
                    350:              else
                    351:                {
                    352:                  PATUNFETCH;
                    353:                  break;
                    354:                }
                    355:            }
                    356: 
                    357:          /* Star, etc. applied to an empty pattern is equivalent
                    358:             to an empty pattern.  */
                    359:          if (!laststart)
                    360:            break;
                    361: 
                    362:          /* Now we know whether 0 matches is allowed,
                    363:             and whether 2 or more matches is allowed.  */
                    364:          if (many_times_ok)
                    365:            {
                    366:              /* If more than one repetition is allowed,
                    367:                 put in a backward jump at the end.  */
                    368:              store_jump (b, maybe_finalize_jump, laststart - 3);
                    369:              b += 3;
                    370:            }
                    371:          insert_jump (on_failure_jump, laststart, b + 3, b);
                    372:          pending_exact = 0;
                    373:          b += 3;
                    374:          if (!zero_times_ok)
                    375:            {
                    376:              /* At least one repetition required: insert before the loop
                    377:                 a skip over the initial on-failure-jump instruction */
                    378:              insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
                    379:              b += 3;
                    380:            }
                    381:          break;
                    382: 
                    383:        case '.':
                    384:          laststart = b;
                    385:          PATPUSH (anychar);
                    386:          break;
                    387: 
                    388:        case '[':
                    389:          while (b - bufp->buffer
                    390:                 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
                    391:            /* Note that EXTEND_BUFFER clobbers c */
                    392:            EXTEND_BUFFER;
                    393: 
                    394:          laststart = b;
                    395:          if (*p == '^')
                    396:            PATPUSH (charset_not), p++;
                    397:          else
                    398:            PATPUSH (charset);
                    399:          p1 = p;
                    400: 
                    401:          PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
                    402:          /* Clear the whole map */
                    403:          bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
                    404:          /* Read in characters and ranges, setting map bits */
                    405:          while (1)
                    406:            {
                    407:              PATFETCH (c);
                    408:              if (c == ']' && p != p1 + 1) break;
                    409:              if (*p == '-' && p[1] != ']')
                    410:                {
                    411:                  PATFETCH (c1);
                    412:                  PATFETCH (c1);
                    413:                  if (translate)
                    414:                    while (c <= c1)
                    415:                      {
                    416:                        register unsigned char mapped_c = translate[c];
                    417:                        b[mapped_c / BYTEWIDTH] |= 1 << (mapped_c % BYTEWIDTH);
                    418:                        c++;
                    419:                      }
                    420:                  else
                    421:                    while (c <= c1)
                    422:                      b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
                    423:                }
                    424:              else
                    425:                {
                    426:                  if (translate)
                    427:                    c = translate[c];
                    428:                  b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
                    429:                }
                    430:            }
                    431:          /* Discard any bitmap bytes that are all 0 at the end of the map.
                    432:             Decrement the map-length byte too. */
                    433:          while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
                    434:            b[-1]--;
                    435:          b += b[-1];
                    436:          break;
                    437: 
                    438:        case '(':
                    439:          if (! (obscure_syntax & RE_NO_BK_PARENS))
                    440:            goto normal_char;
                    441:          else
                    442:            goto handle_open;
                    443: 
                    444:        case ')':
                    445:          if (! (obscure_syntax & RE_NO_BK_PARENS))
                    446:            goto normal_char;
                    447:          else
                    448:            goto handle_close;
                    449: 
                    450:        case '\n':
                    451:          if (! (obscure_syntax & RE_NEWLINE_OR))
                    452:            goto normal_char;
                    453:          else
                    454:            goto handle_bar;
                    455: 
                    456:        case '|':
                    457:          if (! (obscure_syntax & RE_NO_BK_VBAR))
                    458:            goto normal_char;
                    459:          else
                    460:            goto handle_bar;
                    461: 
                    462:         case '\\':
                    463:          if (p == pend) goto invalid_pattern;
                    464:          PATFETCH_RAW (c);
                    465:          switch (c)
                    466:            {
                    467:            case '(':
                    468:              if (obscure_syntax & RE_NO_BK_PARENS)
                    469:                goto normal_backsl;
                    470:            handle_open:
                    471:              if (stackp == stacke) goto nesting_too_deep;
                    472:              if (regnum < RE_NREGS)
                    473:                {
                    474:                  PATPUSH (start_memory);
                    475:                  PATPUSH (regnum);
                    476:                }
                    477:              *stackp++ = b - bufp->buffer;
                    478:              *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
                    479:              *stackp++ = regnum++;
                    480:              *stackp++ = begalt - bufp->buffer;
                    481:              fixup_jump = 0;
                    482:              laststart = 0;
                    483:              begalt = b;
                    484:              break;
                    485: 
                    486:            case ')':
                    487:              if (obscure_syntax & RE_NO_BK_PARENS)
                    488:                goto normal_backsl;
                    489:            handle_close:
                    490:              if (stackp == stackb) goto unmatched_close;
                    491:              begalt = *--stackp + bufp->buffer;
                    492:              if (fixup_jump)
                    493:                store_jump (fixup_jump, jump, b);
                    494:              if (stackp[-1] < RE_NREGS)
                    495:                {
                    496:                  PATPUSH (stop_memory);
                    497:                  PATPUSH (stackp[-1]);
                    498:                }
                    499:              stackp -= 2;
                    500:              fixup_jump = 0;
                    501:              if (*stackp)
                    502:                fixup_jump = *stackp + bufp->buffer - 1;
                    503:              laststart = *--stackp + bufp->buffer;
                    504:              break;
                    505: 
                    506:            case '|':
                    507:              if (obscure_syntax & RE_NO_BK_VBAR)
                    508:                goto normal_backsl;
                    509:            handle_bar:
                    510:              insert_jump (on_failure_jump, begalt, b + 6, b);
                    511:              pending_exact = 0;
                    512:              b += 3;
                    513:              if (fixup_jump)
                    514:                store_jump (fixup_jump, jump, b);
                    515:              fixup_jump = b;
                    516:              b += 3;
                    517:              laststart = 0;
                    518:              begalt = b;
                    519:              break;
                    520: 
                    521: #ifdef emacs
                    522:            case '=':
                    523:              PATPUSH (at_dot);
                    524:              break;
                    525: 
                    526:            case 's':   
                    527:              laststart = b;
                    528:              PATPUSH (syntaxspec);
                    529:              PATFETCH (c);
                    530:              PATPUSH (syntax_spec_code[c]);
                    531:              break;
                    532: 
                    533:            case 'S':
                    534:              laststart = b;
                    535:              PATPUSH (notsyntaxspec);
                    536:              PATFETCH (c);
                    537:              PATPUSH (syntax_spec_code[c]);
                    538:              break;
                    539: #endif emacs
                    540: 
                    541:            case 'w':
                    542:              laststart = b;
                    543:              PATPUSH (wordchar);
                    544:              break;
                    545: 
                    546:            case 'W':
                    547:              laststart = b;
                    548:              PATPUSH (notwordchar);
                    549:              break;
                    550: 
                    551:            case '<':
                    552:              PATPUSH (wordbeg);
                    553:              break;
                    554: 
                    555:            case '>':
                    556:              PATPUSH (wordend);
                    557:              break;
                    558: 
                    559:            case 'b':
                    560:              PATPUSH (wordbound);
                    561:              break;
                    562: 
                    563:            case 'B':
                    564:              PATPUSH (notwordbound);
                    565:              break;
                    566: 
                    567:            case '`':
                    568:              PATPUSH (begbuf);
                    569:              break;
                    570: 
                    571:            case '\'':
                    572:              PATPUSH (endbuf);
                    573:              break;
                    574: 
                    575:            case '1':
                    576:            case '2':
                    577:            case '3':
                    578:            case '4':
                    579:            case '5':
                    580:            case '6':
                    581:            case '7':
                    582:            case '8':
                    583:            case '9':
                    584:              c1 = c - '0';
                    585:              if (c1 >= regnum)
                    586:                goto normal_char;
                    587:              for (stackt = stackp - 2;  stackt > stackb;  stackt -= 4)
                    588:                if (*stackt == c1)
                    589:                  goto normal_char;
                    590:              laststart = b;
                    591:              PATPUSH (duplicate);
                    592:              PATPUSH (c1);
                    593:              break;
                    594: 
                    595:            case '+':
                    596:            case '?':
                    597:              if (obscure_syntax & RE_BK_PLUS_QM)
                    598:                goto handle_plus;
                    599: 
                    600:            default:
                    601:            normal_backsl:
                    602:              /* You might think it would be useful for \ to mean
                    603:                 not to translate; but if we don't translate it
                    604:                 it will never match anything.  */
                    605:              if (translate) c = translate[c];
                    606:              goto normal_char;
                    607:            }
                    608:          break;
                    609: 
                    610:        default:
                    611:        normal_char:
                    612:          if (!pending_exact || pending_exact + *pending_exact + 1 != b
                    613:              || *pending_exact == 0177 || *p == '*' || *p == '^'
                    614:              || ((obscure_syntax & RE_BK_PLUS_QM)
                    615:                  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
                    616:                  : (*p == '+' || *p == '?')))
                    617:            {
                    618:              laststart = b;
                    619:              PATPUSH (exactn);
                    620:              pending_exact = b;
                    621:              PATPUSH (0);
                    622:            }
                    623:          PATPUSH (c);
                    624:          (*pending_exact)++;
                    625:        }
                    626:     }
                    627: 
                    628:   if (fixup_jump)
                    629:     store_jump (fixup_jump, jump, b);
                    630: 
                    631:   if (stackp != stackb) goto unmatched_open;
                    632: 
                    633:   bufp->used = b - bufp->buffer;
                    634:   return 0;
                    635: 
                    636:  invalid_pattern:
                    637:   return "Invalid regular expression";
                    638: 
                    639:  unmatched_open:
                    640:   return "Unmatched \\(";
                    641: 
                    642:  unmatched_close:
                    643:   return "Unmatched \\)";
                    644: 
                    645:  end_of_pattern:
                    646:   return "Premature end of regular expression";
                    647: 
                    648:  nesting_too_deep:
                    649:   return "Nesting too deep";
                    650: 
                    651:  too_big:
                    652:   return "Regular expression too big";
                    653: 
                    654:  memory_exhausted:
                    655:   return "Memory exhausted";
                    656: }
                    657: 
                    658: /* Store where `from' points a jump operation to jump to where `to' points.
                    659:   `opcode' is the opcode to store. */
                    660: 
                    661: static int
                    662: store_jump (from, opcode, to)
                    663:      char *from, *to;
                    664:      char opcode;
                    665: {
                    666:   from[0] = opcode;
                    667:   from[1] = (to - (from + 3)) & 0377;
                    668:   from[2] = (to - (from + 3)) >> 8;
                    669: }
                    670: 
                    671: /* Open up space at char FROM, and insert there a jump to TO.
                    672:    CURRENT_END gives te end of the storage no in use,
                    673:    so we know how much data to copy up.
                    674:    OP is the opcode of the jump to insert.
                    675: 
                    676:    If you call this function, you must zero out pending_exact.  */
                    677: 
                    678: static int
                    679: insert_jump (op, from, to, current_end)
                    680:      char op;
                    681:      char *from, *to, *current_end;
                    682: {
                    683:   register char *pto = current_end + 3;
                    684:   register char *pfrom = current_end;
                    685:   while (pfrom != from)
                    686:     *--pto = *--pfrom;
                    687:   store_jump (from, op, to);
                    688: }
                    689: 
                    690: /* Given a pattern, compute a fastmap from it.
                    691:  The fastmap records which of the (1 << BYTEWIDTH) possible characters
                    692:  can start a string that matches the pattern.
                    693:  This fastmap is used by re_search to skip quickly over totally implausible text.
                    694: 
                    695:  The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
                    696:  as bufp->fastmap.
                    697:  The other components of bufp describe the pattern to be used.  */
                    698: 
                    699: void
                    700: re_compile_fastmap (bufp)
                    701:      struct re_pattern_buffer *bufp;
                    702: {
                    703:   unsigned char *pattern = (unsigned char *) bufp->buffer;
                    704:   int size = bufp->used;
                    705:   register char *fastmap = bufp->fastmap;
                    706:   register unsigned char *p = pattern;
                    707:   register unsigned char *pend = pattern + size;
                    708:   register int j, k;
                    709:   unsigned char *translate = (unsigned char *) bufp->translate;
                    710: 
                    711:   unsigned char *stackb[NFAILURES];
                    712:   unsigned char **stackp = stackb;
                    713: 
                    714:   bzero (fastmap, (1 << BYTEWIDTH));
                    715:   bufp->fastmap_accurate = 1;
                    716:   bufp->can_be_null = 0;
                    717:       
                    718:   while (p)
                    719:     {
                    720:       if (p == pend)
                    721:        {
                    722:          bufp->can_be_null = 1;
                    723:          break;
                    724:        }
                    725: #ifdef SWITCH_ENUM_BUG
                    726:       switch ((int) ((enum regexpcode) *p++))
                    727: #else
                    728:       switch ((enum regexpcode) *p++)
                    729: #endif
                    730:        {
                    731:        case exactn:
                    732:          if (translate)
                    733:            fastmap[translate[p[1]]] = 1;
                    734:          else
                    735:            fastmap[p[1]] = 1;
                    736:          break;
                    737: 
                    738:         case begline:
                    739:         case before_dot:
                    740:        case at_dot:
                    741:        case after_dot:
                    742:        case begbuf:
                    743:        case endbuf:
                    744:        case wordbound:
                    745:        case notwordbound:
                    746:        case wordbeg:
                    747:        case wordend:
                    748:          continue;
                    749: 
                    750:        case endline:
                    751:          if (translate)
                    752:            fastmap[translate['\n']] = 1;
                    753:          else
                    754:            fastmap['\n'] = 1;
                    755:          if (bufp->can_be_null != 1)
                    756:            bufp->can_be_null = 2;
                    757:          break;
                    758: 
                    759:        case finalize_jump:
                    760:        case maybe_finalize_jump:
                    761:        case jump:
                    762:        case dummy_failure_jump:
                    763:          bufp->can_be_null = 1;
                    764:          j = *p++ & 0377;
                    765:          j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
                    766:          p += j + 1;           /* The 1 compensates for missing ++ above */
                    767:          if (j > 0)
                    768:            continue;
                    769:          /* Jump backward reached implies we just went through
                    770:             the body of a loop and matched nothing.
                    771:             Opcode jumped to should be an on_failure_jump.
                    772:             Just treat it like an ordinary jump.
                    773:             For a * loop, it has pushed its failure point already;
                    774:             if so, discard that as redundant.  */
                    775:          if ((enum regexpcode) *p != on_failure_jump)
                    776:            continue;
                    777:          p++;
                    778:          j = *p++ & 0377;
                    779:          j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
                    780:          p += j + 1;           /* The 1 compensates for missing ++ above */
                    781:          if (stackp != stackb && *stackp == p)
                    782:            stackp--;
                    783:          continue;
                    784:          
                    785:        case on_failure_jump:
                    786:          j = *p++ & 0377;
                    787:          j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
                    788:          p++;
                    789:          *++stackp = p + j;
                    790:          continue;
                    791: 
                    792:        case start_memory:
                    793:        case stop_memory:
                    794:          p++;
                    795:          continue;
                    796: 
                    797:        case duplicate:
                    798:          bufp->can_be_null = 1;
                    799:          fastmap['\n'] = 1;
                    800:        case anychar:
                    801:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                    802:            if (j != '\n')
                    803:              fastmap[j] = 1;
                    804:          if (bufp->can_be_null)
                    805:            return;
                    806:          /* Don't return; check the alternative paths
                    807:             so we can set can_be_null if appropriate.  */
                    808:          break;
                    809: 
                    810:        case wordchar:
                    811:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                    812:            if (SYNTAX (j) == Sword)
                    813:              fastmap[j] = 1;
                    814:          break;
                    815: 
                    816:        case notwordchar:
                    817:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                    818:            if (SYNTAX (j) != Sword)
                    819:              fastmap[j] = 1;
                    820:          break;
                    821: 
                    822: #ifdef emacs
                    823:        case syntaxspec:
                    824:          k = *p++;
                    825:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                    826:            if (SYNTAX (j) == (enum syntaxcode) k)
                    827:              fastmap[j] = 1;
                    828:          break;
                    829: 
                    830:        case notsyntaxspec:
                    831:          k = *p++;
                    832:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                    833:            if (SYNTAX (j) != (enum syntaxcode) k)
                    834:              fastmap[j] = 1;
                    835:          break;
                    836: #endif emacs
                    837: 
                    838:        case charset:
                    839:          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
                    840:            if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
                    841:              {
                    842:                if (translate)
                    843:                  fastmap[translate[j]] = 1;
                    844:                else
                    845:                  fastmap[j] = 1;
                    846:              }
                    847:          break;
                    848: 
                    849:        case charset_not:
                    850:          /* Chars beyond end of map must be allowed */
                    851:          for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
                    852:            if (translate)
                    853:              fastmap[translate[j]] = 1;
                    854:            else
                    855:              fastmap[j] = 1;
                    856: 
                    857:          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
                    858:            if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
                    859:              {
                    860:                if (translate)
                    861:                  fastmap[translate[j]] = 1;
                    862:                else
                    863:                  fastmap[j] = 1;
                    864:              }
                    865:          break;
                    866:        }
                    867: 
                    868:       /* Get here means we have successfully found the possible starting characters
                    869:         of one path of the pattern.  We need not follow this path any farther.
                    870:         Instead, look at the next alternative remembered in the stack. */
                    871:       if (stackp != stackb)
                    872:        p = *stackp--;
                    873:       else
                    874:        break;
                    875:     }
                    876: }
                    877: 
                    878: /* Like re_search_2, below, but only one string is specified. */
                    879: 
                    880: int
                    881: re_search (pbufp, string, size, startpos, range, regs)
                    882:      struct re_pattern_buffer *pbufp;
                    883:      char *string;
                    884:      int size, startpos, range;
                    885:      struct re_registers *regs;
                    886: {
                    887:   return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
                    888: }
                    889: 
                    890: /* Like re_match_2 but tries first a match starting at index STARTPOS,
                    891:    then at STARTPOS + 1, and so on.
                    892:    RANGE is the number of places to try before giving up.
                    893:    If RANGE is negative, the starting positions tried are
                    894:     STARTPOS, STARTPOS - 1, etc.
                    895:    It is up to the caller to make sure that range is not so large
                    896:    as to take the starting position outside of the input strings.
                    897: 
                    898: The value returned is the position at which the match was found,
                    899:  or -1 if no match was found,
                    900:  or -2 if error (such as failure stack overflow).  */
                    901: 
                    902: int
                    903: re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
                    904:      struct re_pattern_buffer *pbufp;
                    905:      char *string1, *string2;
                    906:      int size1, size2;
                    907:      int startpos;
                    908:      register int range;
                    909:      struct re_registers *regs;
                    910:      int mstop;
                    911: {
                    912:   register char *fastmap = pbufp->fastmap;
                    913:   register unsigned char *translate = (unsigned char *) pbufp->translate;
                    914:   int total = size1 + size2;
                    915:   int val;
                    916: 
                    917:   /* Update the fastmap now if not correct already */
                    918:   if (fastmap && !pbufp->fastmap_accurate)
                    919:     re_compile_fastmap (pbufp);
                    920:   
                    921:   /* Don't waste time in a long search for a pattern
                    922:      that says it is anchored.  */
                    923:   if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
                    924:       && range > 0)
                    925:     {
                    926:       if (startpos > 0)
                    927:        return -1;
                    928:       else
                    929:        range = 1;
                    930:     }
                    931: 
                    932:   while (1)
                    933:     {
                    934:       /* If a fastmap is supplied, skip quickly over characters
                    935:         that cannot possibly be the start of a match.
                    936:         Note, however, that if the pattern can possibly match
                    937:         the null string, we must test it at each starting point
                    938:         so that we take the first null string we get.  */
                    939: 
                    940:       if (fastmap && startpos < total && pbufp->can_be_null != 1)
                    941:        {
                    942:          if (range > 0)
                    943:            {
                    944:              register int lim = 0;
                    945:              register unsigned char *p;
                    946:              int irange = range;
                    947:              if (startpos < size1 && startpos + range >= size1)
                    948:                lim = range - (size1 - startpos);
                    949: 
                    950:              p = ((unsigned char *)
                    951:                   &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
                    952: 
                    953:              if (translate)
                    954:                {
                    955:                  while (range > lim && !fastmap[translate[*p++]])
                    956:                    range--;
                    957:                }
                    958:              else
                    959:                {
                    960:                  while (range > lim && !fastmap[*p++])
                    961:                    range--;
                    962:                }
                    963:              startpos += irange - range;
                    964:            }
                    965:          else
                    966:            {
                    967:              register unsigned char c;
                    968:              if (startpos >= size1)
                    969:                c = string2[startpos - size1];
                    970:              else
                    971:                c = string1[startpos];
                    972:              c &= 0xff;
                    973:              if (translate ? !fastmap[translate[c]] : !fastmap[c])
                    974:                goto advance;
                    975:            }
                    976:        }
                    977: 
                    978:       if (range >= 0 && startpos == total
                    979:          && fastmap && pbufp->can_be_null == 0)
                    980:        return -1;
                    981: 
                    982:       val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs,
                    983:                        mstop);
                    984:       /* Propagate error indication if worse than mere failure.  */
                    985:       if (val == -2)
                    986:        return -2;
                    987:       /* Return position on success.  */
                    988:       if (0 <= val)
                    989:        return startpos;
                    990: 
                    991: #ifdef C_ALLOCA
                    992:       alloca (0);
                    993: #endif /* C_ALLOCA */
                    994: 
                    995:     advance:
                    996:       if (!range) break;
                    997:       if (range > 0) range--, startpos++; else range++, startpos--;
                    998:     }
                    999:   return -1;
                   1000: }
                   1001: 
                   1002: #ifndef emacs   /* emacs never uses this */
                   1003: int
                   1004: re_match (pbufp, string, size, pos, regs)
                   1005:      struct re_pattern_buffer *pbufp;
                   1006:      char *string;
                   1007:      int size, pos;
                   1008:      struct re_registers *regs;
                   1009: {
                   1010:   return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
                   1011: }
                   1012: #endif /* emacs */
                   1013: 
                   1014: /* Maximum size of failure stack.  Beyond this, overflow is an error.  */
                   1015: 
                   1016: int re_max_failures = 2000;
                   1017: 
                   1018: static int bcmp_translate();
                   1019: /* Match the pattern described by PBUFP
                   1020:    against data which is the virtual concatenation of STRING1 and STRING2.
                   1021:    SIZE1 and SIZE2 are the sizes of the two data strings.
                   1022:    Start the match at position POS.
                   1023:    Do not consider matching past the position MSTOP.
                   1024: 
                   1025:    If pbufp->fastmap is nonzero, then it had better be up to date.
                   1026: 
                   1027:    The reason that the data to match are specified as two components
                   1028:    which are to be regarded as concatenated
                   1029:    is so this function can be used directly on the contents of an Emacs buffer.
                   1030: 
                   1031:    -1 is returned if there is no match.  -2 is returned if there is
                   1032:    an error (such as match stack overflow).  Otherwise the value is the length
                   1033:    of the substring which was matched.  */
                   1034: 
                   1035: int
                   1036: re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
                   1037:      struct re_pattern_buffer *pbufp;
                   1038:      unsigned char *string1, *string2;
                   1039:      int size1, size2;
                   1040:      int pos;
                   1041:      struct re_registers *regs;
                   1042:      int mstop;
                   1043: {
                   1044:   register unsigned char *p = (unsigned char *) pbufp->buffer;
                   1045:   register unsigned char *pend = p + pbufp->used;
                   1046:   /* End of first string */
                   1047:   unsigned char *end1;
                   1048:   /* End of second string */
                   1049:   unsigned char *end2;
                   1050:   /* Pointer just past last char to consider matching */
                   1051:   unsigned char *end_match_1, *end_match_2;
                   1052:   register unsigned char *d, *dend;
                   1053:   register int mcnt;
                   1054:   unsigned char *translate = (unsigned char *) pbufp->translate;
                   1055: 
                   1056:  /* Failure point stack.  Each place that can handle a failure further down the line
                   1057:     pushes a failure point on this stack.  It consists of two char *'s.
                   1058:     The first one pushed is where to resume scanning the pattern;
                   1059:     the second pushed is where to resume scanning the strings.
                   1060:     If the latter is zero, the failure point is a "dummy".
                   1061:     If a failure happens and the innermost failure point is dormant,
                   1062:     it discards that failure point and tries the next one. */
                   1063: 
                   1064:   unsigned char *initial_stack[2 * NFAILURES];
                   1065:   unsigned char **stackb = initial_stack;
                   1066:   unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
                   1067: 
                   1068:   /* Information on the "contents" of registers.
                   1069:      These are pointers into the input strings; they record
                   1070:      just what was matched (on this attempt) by some part of the pattern.
                   1071:      The start_memory command stores the start of a register's contents
                   1072:      and the stop_memory command stores the end.
                   1073: 
                   1074:      At that point, regstart[regnum] points to the first character in the register,
                   1075:      regend[regnum] points to the first character beyond the end of the register,
                   1076:      regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
                   1077:      and regend_seg1[regnum] is true iff regend[regnum] points into string1.  */
                   1078: 
                   1079:   unsigned char *regstart[RE_NREGS];
                   1080:   unsigned char *regend[RE_NREGS];
                   1081:   unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
                   1082: 
                   1083:   /* Set up pointers to ends of strings.
                   1084:      Don't allow the second string to be empty unless both are empty.  */
                   1085:   if (!size2)
                   1086:     {
                   1087:       string2 = string1;
                   1088:       size2 = size1;
                   1089:       string1 = 0;
                   1090:       size1 = 0;
                   1091:     }
                   1092:   end1 = string1 + size1;
                   1093:   end2 = string2 + size2;
                   1094: 
                   1095:   /* Compute where to stop matching, within the two strings */
                   1096:   if (mstop <= size1)
                   1097:     {
                   1098:       end_match_1 = string1 + mstop;
                   1099:       end_match_2 = string2;
                   1100:     }
                   1101:   else
                   1102:     {
                   1103:       end_match_1 = end1;
                   1104:       end_match_2 = string2 + mstop - size1;
                   1105:     }
                   1106: 
                   1107:   /* Initialize \) text positions to -1
                   1108:      to mark ones that no \( or \) has been seen for.  */
                   1109: 
                   1110:   for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++)
                   1111:     regend[mcnt] = (unsigned char *) -1;
                   1112: 
                   1113:   /* `p' scans through the pattern as `d' scans through the data.
                   1114:      `dend' is the end of the input string that `d' points within.
                   1115:      `d' is advanced into the following input string whenever necessary,
                   1116:      but this happens before fetching;
                   1117:      therefore, at the beginning of the loop,
                   1118:      `d' can be pointing at the end of a string,
                   1119:      but it cannot equal string2.  */
                   1120: 
                   1121:   if (pos <= size1)
                   1122:     d = string1 + pos, dend = end_match_1;
                   1123:   else
                   1124:     d = string2 + pos - size1, dend = end_match_2;
                   1125: 
                   1126: /* Write PREFETCH; just before fetching a character with *d.  */
                   1127: #define PREFETCH \
                   1128:  while (d == dend)                                                 \
                   1129:   { if (dend == end_match_2) goto fail;  /* end of string2 => failure */   \
                   1130:     d = string2;  /* end of string1 => advance to string2. */       \
                   1131:     dend = end_match_2; }
                   1132: 
                   1133:   /* This loop loops over pattern commands.
                   1134:      It exits by returning from the function if match is complete,
                   1135:      or it drops through if match fails at this starting point in the input data. */
                   1136: 
                   1137:   while (1)
                   1138:     {
                   1139:       if (p == pend)
                   1140:        /* End of pattern means we have succeeded! */
                   1141:        {
                   1142:          /* If caller wants register contents data back, convert it to indices */
                   1143:          if (regs)
                   1144:            {
                   1145:              regs->start[0] = pos;
                   1146:              if (dend == end_match_1)
                   1147:                regs->end[0] = d - string1;
                   1148:              else
                   1149:                regs->end[0] = d - string2 + size1;
                   1150:              for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
                   1151:                {
                   1152:                  if (regend[mcnt] == (unsigned char *) -1)
                   1153:                    {
                   1154:                      regs->start[mcnt] = -1;
                   1155:                      regs->end[mcnt] = -1;
                   1156:                      continue;
                   1157:                    }
                   1158:                  if (regstart_seg1[mcnt])
                   1159:                    regs->start[mcnt] = regstart[mcnt] - string1;
                   1160:                  else
                   1161:                    regs->start[mcnt] = regstart[mcnt] - string2 + size1;
                   1162:                  if (regend_seg1[mcnt])
                   1163:                    regs->end[mcnt] = regend[mcnt] - string1;
                   1164:                  else
                   1165:                    regs->end[mcnt] = regend[mcnt] - string2 + size1;
                   1166:                }
                   1167:            }
                   1168:          if (dend == end_match_1)
                   1169:            return (d - string1 - pos);
                   1170:          else
                   1171:            return d - string2 + size1 - pos;
                   1172:        }
                   1173: 
                   1174:       /* Otherwise match next pattern command */
                   1175: #ifdef SWITCH_ENUM_BUG
                   1176:       switch ((int) ((enum regexpcode) *p++))
                   1177: #else
                   1178:       switch ((enum regexpcode) *p++)
                   1179: #endif
                   1180:        {
                   1181: 
                   1182:        /* \( is represented by a start_memory, \) by a stop_memory.
                   1183:            Both of those commands contain a "register number" argument.
                   1184:            The text matched within the \( and \) is recorded under that number.
                   1185:            Then, \<digit> turns into a `duplicate' command which
                   1186:            is followed by the numeric value of <digit> as the register number. */
                   1187: 
                   1188:        case start_memory:
                   1189:          regstart[*p] = d;
                   1190:          regstart_seg1[*p++] = (dend == end_match_1);
                   1191:          break;
                   1192: 
                   1193:        case stop_memory:
                   1194:          regend[*p] = d;
                   1195:          regend_seg1[*p++] = (dend == end_match_1);
                   1196:          break;
                   1197: 
                   1198:        case duplicate:
                   1199:          {
                   1200:            int regno = *p++;   /* Get which register to match against */
                   1201:            register unsigned char *d2, *dend2;
                   1202: 
                   1203:            /* Don't allow matching a register that hasn't been used.
                   1204:               This isn't fully reliable in the current version,
                   1205:               but it is better than crashing.  */
                   1206:            if ((int) regend[regno] == -1)
                   1207:              goto fail;
                   1208: 
                   1209:            d2 = regstart[regno];
                   1210:            dend2 = ((regstart_seg1[regno] == regend_seg1[regno])
                   1211:                     ? regend[regno] : end_match_1);
                   1212:            while (1)
                   1213:              {
                   1214:                /* Advance to next segment in register contents, if necessary */
                   1215:                while (d2 == dend2)
                   1216:                  {
                   1217:                    if (dend2 == end_match_2) break;
                   1218:                    if (dend2 == regend[regno]) break;
                   1219:                    d2 = string2, dend2 = regend[regno];  /* end of string1 => advance to string2. */
                   1220:                  }
                   1221:                /* At end of register contents => success */
                   1222:                if (d2 == dend2) break;
                   1223: 
                   1224:                /* Advance to next segment in data being matched, if necessary */
                   1225:                PREFETCH;
                   1226: 
                   1227:                /* mcnt gets # consecutive chars to compare */
                   1228:                mcnt = dend - d;
                   1229:                if (mcnt > dend2 - d2)
                   1230:                  mcnt = dend2 - d2;
                   1231:                /* Compare that many; failure if mismatch, else skip them. */
                   1232:                if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
                   1233:                  goto fail;
                   1234:                d += mcnt, d2 += mcnt;
                   1235:              }
                   1236:          }
                   1237:          break;
                   1238: 
                   1239:        case anychar:
                   1240:          /* fetch a data character */
                   1241:          PREFETCH;
                   1242:          /* Match anything but a newline.  */
                   1243:          if ((translate ? translate[*d++] : *d++) == '\n')
                   1244:            goto fail;
                   1245:          break;
                   1246: 
                   1247:        case charset:
                   1248:        case charset_not:
                   1249:          {
                   1250:            /* Nonzero for charset_not */
                   1251:            int not = 0;
                   1252:            register int c;
                   1253:            if (*(p - 1) == (unsigned char) charset_not)
                   1254:              not = 1;
                   1255: 
                   1256:            /* fetch a data character */
                   1257:            PREFETCH;
                   1258: 
                   1259:            if (translate)
                   1260:              c = translate [*d];
                   1261:            else
                   1262:              c = *d;
                   1263: 
                   1264:            if (c < *p * BYTEWIDTH
                   1265:                && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
                   1266:              not = !not;
                   1267: 
                   1268:            p += 1 + *p;
                   1269: 
                   1270:            if (!not) goto fail;
                   1271:            d++;
                   1272:            break;
                   1273:          }
                   1274: 
                   1275:        case begline:
                   1276:          if (d == string1 || d[-1] == '\n')
                   1277:            break;
                   1278:          goto fail;
                   1279: 
                   1280:        case endline:
                   1281:          if (d == end2
                   1282:              || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
                   1283:            break;
                   1284:          goto fail;
                   1285: 
                   1286:        /* "or" constructs ("|") are handled by starting each alternative
                   1287:            with an on_failure_jump that points to the start of the next alternative.
                   1288:            Each alternative except the last ends with a jump to the joining point.
                   1289:            (Actually, each jump except for the last one really jumps
                   1290:             to the following jump, because tensioning the jumps is a hassle.) */
                   1291: 
                   1292:        /* The start of a stupid repeat has an on_failure_jump that points
                   1293:           past the end of the repeat text.
                   1294:           This makes a failure point so that, on failure to match a repetition,
                   1295:           matching restarts past as many repetitions have been found
                   1296:           with no way to fail and look for another one.  */
                   1297: 
                   1298:        /* A smart repeat is similar but loops back to the on_failure_jump
                   1299:           so that each repetition makes another failure point. */
                   1300: 
                   1301:        case on_failure_jump:
                   1302:          if (stackp == stacke)
                   1303:            {
                   1304:              unsigned char **stackx;
                   1305:              if (stacke - stackb > re_max_failures)
                   1306:                return -2;
                   1307:              stackx = (unsigned char **) alloca (2 * (stacke - stackb)
                   1308:                                         * sizeof (char *));
                   1309:              bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
                   1310:              stackp = stackx + (stackp - stackb);
                   1311:              stacke = stackx + 2 * (stacke - stackb);
                   1312:              stackb = stackx;
                   1313:            }
                   1314:          mcnt = *p++ & 0377;
                   1315:          mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
                   1316:          p++;
                   1317:          *stackp++ = mcnt + p;
                   1318:          *stackp++ = d;
                   1319:          break;
                   1320: 
                   1321:        /* The end of a smart repeat has an maybe_finalize_jump back.
                   1322:           Change it either to a finalize_jump or an ordinary jump. */
                   1323: 
                   1324:        case maybe_finalize_jump:
                   1325:          mcnt = *p++ & 0377;
                   1326:          mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
                   1327:          p++;
                   1328:          /* Compare what follows with the begining of the repeat.
                   1329:             If we can establish that there is nothing that they would
                   1330:             both match, we can change to finalize_jump */
                   1331:          if (p == pend)
                   1332:            p[-3] = (unsigned char) finalize_jump;
                   1333:          else if (*p == (unsigned char) exactn
                   1334:                   || *p == (unsigned char) endline)
                   1335:            {
                   1336:              register int c = *p == (unsigned char) endline ? '\n' : p[2];
                   1337:              register unsigned char *p1 = p + mcnt;
                   1338:              /* p1[0] ... p1[2] are an on_failure_jump.
                   1339:                 Examine what follows that */
                   1340:              if (p1[3] == (unsigned char) exactn && p1[5] != c)
                   1341:                p[-3] = (unsigned char) finalize_jump;
                   1342:              else if (p1[3] == (unsigned char) charset
                   1343:                       || p1[3] == (unsigned char) charset_not)
                   1344:                {
                   1345:                  int not = p1[3] == (unsigned char) charset_not;
                   1346:                  if (c < p1[4] * BYTEWIDTH
                   1347:                      && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
                   1348:                    not = !not;
                   1349:                  /* not is 1 if c would match */
                   1350:                  /* That means it is not safe to finalize */
                   1351:                  if (!not)
                   1352:                    p[-3] = (unsigned char) finalize_jump;
                   1353:                }
                   1354:            }
                   1355:          p -= 2;
                   1356:          if (p[-1] != (unsigned char) finalize_jump)
                   1357:            {
                   1358:              p[-1] = (unsigned char) jump;
                   1359:              goto nofinalize;
                   1360:            }
                   1361: 
                   1362:        /* The end of a stupid repeat has a finalize-jump
                   1363:           back to the start, where another failure point will be made
                   1364:           which will point after all the repetitions found so far. */
                   1365: 
                   1366:        case finalize_jump:
                   1367:          stackp -= 2;
                   1368: 
                   1369:        case jump:
                   1370:        nofinalize:
                   1371:          mcnt = *p++ & 0377;
                   1372:          mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
                   1373:          p += mcnt + 1;        /* The 1 compensates for missing ++ above */
                   1374:          break;
                   1375: 
                   1376:        case dummy_failure_jump:
                   1377:          if (stackp == stacke)
                   1378:            {
                   1379:              unsigned char **stackx
                   1380:                = (unsigned char **) alloca (2 * (stacke - stackb)
                   1381:                                             * sizeof (char *));
                   1382:              bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
                   1383:              stackp = stackx + (stackp - stackb);
                   1384:              stacke = stackx + 2 * (stacke - stackb);
                   1385:              stackb = stackx;
                   1386:            }
                   1387:          *stackp++ = 0;
                   1388:          *stackp++ = 0;
                   1389:          goto nofinalize;
                   1390: 
                   1391:        case wordbound:
                   1392:          if (d == string1  /* Points to first char */
                   1393:              || d == end2  /* Points to end */
                   1394:              || (d == end1 && size2 == 0)) /* Points to end */
                   1395:            break;
                   1396:          if ((SYNTAX (d[-1]) == Sword)
                   1397:              != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
                   1398:            break;
                   1399:          goto fail;
                   1400: 
                   1401:        case notwordbound:
                   1402:          if (d == string1  /* Points to first char */
                   1403:              || d == end2  /* Points to end */
                   1404:              || (d == end1 && size2 == 0)) /* Points to end */
                   1405:            goto fail;
                   1406:          if ((SYNTAX (d[-1]) == Sword)
                   1407:              != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
                   1408:            goto fail;
                   1409:          break;
                   1410: 
                   1411:        case wordbeg:
                   1412:          if (d == end2  /* Points to end */
                   1413:              || (d == end1 && size2 == 0) /* Points to end */
                   1414:              || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
                   1415:            goto fail;
                   1416:          if (d == string1  /* Points to first char */
                   1417:              || SYNTAX (d[-1]) != Sword)  /* prev char not letter */
                   1418:            break;
                   1419:          goto fail;
                   1420: 
                   1421:        case wordend:
                   1422:          if (d == string1  /* Points to first char */
                   1423:              || SYNTAX (d[-1]) != Sword)  /* prev char not letter */
                   1424:            goto fail;
                   1425:          if (d == end2  /* Points to end */
                   1426:              || (d == end1 && size2 == 0) /* Points to end */
                   1427:              || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
                   1428:            break;
                   1429:          goto fail;
                   1430: 
                   1431: #ifdef emacs
                   1432:        case before_dot:
                   1433:          if (PTR_CHAR_POS (d) + 1 >= point)
                   1434:            goto fail;
                   1435:          break;
                   1436: 
                   1437:        case at_dot:
                   1438:          if (PTR_CHAR_POS (d) + 1 != point)
                   1439:            goto fail;
                   1440:          break;
                   1441: 
                   1442:        case after_dot:
                   1443:          if (PTR_CHAR_POS (d) + 1 <= point)
                   1444:            goto fail;
                   1445:          break;
                   1446: 
                   1447:        case wordchar:
                   1448:          mcnt = (int) Sword;
                   1449:          goto matchsyntax;
                   1450: 
                   1451:        case syntaxspec:
                   1452:          mcnt = *p++;
                   1453:        matchsyntax:
                   1454:          PREFETCH;
                   1455:          if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
                   1456:          break;
                   1457:          
                   1458:        case notwordchar:
                   1459:          mcnt = (int) Sword;
                   1460:          goto matchnotsyntax;
                   1461: 
                   1462:        case notsyntaxspec:
                   1463:          mcnt = *p++;
                   1464:        matchnotsyntax:
                   1465:          PREFETCH;
                   1466:          if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
                   1467:          break;
                   1468: #else
                   1469:        case wordchar:
                   1470:          PREFETCH;
                   1471:          if (SYNTAX (*d++) == 0) goto fail;
                   1472:          break;
                   1473:          
                   1474:        case notwordchar:
                   1475:          PREFETCH;
                   1476:          if (SYNTAX (*d++) != 0) goto fail;
                   1477:          break;
                   1478: #endif not emacs
                   1479: 
                   1480:        case begbuf:
                   1481:          if (d == string1)     /* Note, d cannot equal string2 */
                   1482:            break;              /* unless string1 == string2.  */
                   1483:          goto fail;
                   1484: 
                   1485:        case endbuf:
                   1486:          if (d == end2 || (d == end1 && size2 == 0))
                   1487:            break;
                   1488:          goto fail;
                   1489: 
                   1490:        case exactn:
                   1491:          /* Match the next few pattern characters exactly.
                   1492:             mcnt is how many characters to match. */
                   1493:          mcnt = *p++;
                   1494:          if (translate)
                   1495:            {
                   1496:              do
                   1497:                {
                   1498:                  PREFETCH;
                   1499:                  if (translate[*d++] != *p++) goto fail;
                   1500:                }
                   1501:              while (--mcnt);
                   1502:            }
                   1503:          else
                   1504:            {
                   1505:              do
                   1506:                {
                   1507:                  PREFETCH;
                   1508:                  if (*d++ != *p++) goto fail;
                   1509:                }
                   1510:              while (--mcnt);
                   1511:            }
                   1512:          break;
                   1513:        }
                   1514:       continue;    /* Successfully matched one pattern command; keep matching */
                   1515: 
                   1516:       /* Jump here if any matching operation fails. */
                   1517:     fail:
                   1518:       if (stackp != stackb)
                   1519:        /* A restart point is known.  Restart there and pop it. */
                   1520:        {
                   1521:          if (!stackp[-2])
                   1522:            {   /* If innermost failure point is dormant, flush it and keep looking */
                   1523:              stackp -= 2;
                   1524:              goto fail;
                   1525:            }
                   1526:          d = *--stackp;
                   1527:          p = *--stackp;
                   1528:          if (d >= string1 && d <= end1)
                   1529:            dend = end_match_1;
                   1530:        }
                   1531:       else break;   /* Matching at this starting point really fails! */
                   1532:     }
                   1533:   return -1;         /* Failure to match */
                   1534: }
                   1535: 
                   1536: static int
                   1537: bcmp_translate (s1, s2, len, translate)
                   1538:      unsigned char *s1, *s2;
                   1539:      register int len;
                   1540:      unsigned char *translate;
                   1541: {
                   1542:   register unsigned char *p1 = s1, *p2 = s2;
                   1543:   while (len)
                   1544:     {
                   1545:       if (translate [*p1++] != translate [*p2++]) return 1;
                   1546:       len--;
                   1547:     }
                   1548:   return 0;
                   1549: }
                   1550: 
                   1551: /* Entry points compatible with bsd4.2 regex library */
                   1552: 
                   1553: #ifndef emacs
                   1554: 
                   1555: static struct re_pattern_buffer re_comp_buf;
                   1556: 
                   1557: char *
                   1558: re_comp (s)
                   1559:      char *s;
                   1560: {
                   1561:   if (!s)
                   1562:     {
                   1563:       if (!re_comp_buf.buffer)
                   1564:        return "No previous regular expression";
                   1565:       return 0;
                   1566:     }
                   1567: 
                   1568:   if (!re_comp_buf.buffer)
                   1569:     {
                   1570:       if (!(re_comp_buf.buffer = (char *) malloc (200)))
                   1571:        return "Memory exhausted";
                   1572:       re_comp_buf.allocated = 200;
                   1573:       if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
                   1574:        return "Memory exhausted";
                   1575:     }
                   1576:   return re_compile_pattern (s, strlen (s), &re_comp_buf);
                   1577: }
                   1578: 
                   1579: int
                   1580: re_exec (s)
                   1581:      char *s;
                   1582: {
                   1583:   int len = strlen (s);
                   1584:   return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
                   1585: }
                   1586: 
                   1587: #endif /* emacs */
                   1588: 
                   1589: #ifdef test
                   1590: 
                   1591: #include <stdio.h>
                   1592: 
                   1593: /* Indexed by a character, gives the upper case equivalent of the character */
                   1594: 
                   1595: static char upcase[0400] = 
                   1596:   { 000, 001, 002, 003, 004, 005, 006, 007,
                   1597:     010, 011, 012, 013, 014, 015, 016, 017,
                   1598:     020, 021, 022, 023, 024, 025, 026, 027,
                   1599:     030, 031, 032, 033, 034, 035, 036, 037,
                   1600:     040, 041, 042, 043, 044, 045, 046, 047,
                   1601:     050, 051, 052, 053, 054, 055, 056, 057,
                   1602:     060, 061, 062, 063, 064, 065, 066, 067,
                   1603:     070, 071, 072, 073, 074, 075, 076, 077,
                   1604:     0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
                   1605:     0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
                   1606:     0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
                   1607:     0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
                   1608:     0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
                   1609:     0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
                   1610:     0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
                   1611:     0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
                   1612:     0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
                   1613:     0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
                   1614:     0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
                   1615:     0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
                   1616:     0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
                   1617:     0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
                   1618:     0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
                   1619:     0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
                   1620:     0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
                   1621:     0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
                   1622:     0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
                   1623:     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
                   1624:     0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
                   1625:     0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
                   1626:     0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
                   1627:     0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
                   1628:   };
                   1629: 
                   1630: main (argc, argv)
                   1631:      int argc;
                   1632:      char **argv;
                   1633: {
                   1634:   char pat[80];
                   1635:   struct re_pattern_buffer buf;
                   1636:   int i;
                   1637:   char c;
                   1638:   char fastmap[(1 << BYTEWIDTH)];
                   1639: 
                   1640:   /* Allow a command argument to specify the style of syntax.  */
                   1641:   if (argc > 1)
                   1642:     obscure_syntax = atoi (argv[1]);
                   1643: 
                   1644:   buf.allocated = 40;
                   1645:   buf.buffer = (char *) malloc (buf.allocated);
                   1646:   buf.fastmap = fastmap;
                   1647:   buf.translate = upcase;
                   1648: 
                   1649:   while (1)
                   1650:     {
                   1651:       gets (pat);
                   1652: 
                   1653:       if (*pat)
                   1654:        {
                   1655:           re_compile_pattern (pat, strlen(pat), &buf);
                   1656: 
                   1657:          for (i = 0; i < buf.used; i++)
                   1658:            printchar (buf.buffer[i]);
                   1659: 
                   1660:          putchar ('\n');
                   1661: 
                   1662:          printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
                   1663: 
                   1664:          re_compile_fastmap (&buf);
                   1665:          printf ("Allowed by fastmap: ");
                   1666:          for (i = 0; i < (1 << BYTEWIDTH); i++)
                   1667:            if (fastmap[i]) printchar (i);
                   1668:          putchar ('\n');
                   1669:        }
                   1670: 
                   1671:       gets (pat);      /* Now read the string to match against */
                   1672: 
                   1673:       i = re_match (&buf, pat, strlen (pat), 0, 0);
                   1674:       printf ("Match value %d.\n", i);
                   1675:     }
                   1676: }
                   1677: 
                   1678: #ifdef NOTDEF
                   1679: print_buf (bufp)
                   1680:      struct re_pattern_buffer *bufp;
                   1681: {
                   1682:   int i;
                   1683: 
                   1684:   printf ("buf is :\n----------------\n");
                   1685:   for (i = 0; i < bufp->used; i++)
                   1686:     printchar (bufp->buffer[i]);
                   1687:   
                   1688:   printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
                   1689:   
                   1690:   printf ("Allowed by fastmap: ");
                   1691:   for (i = 0; i < (1 << BYTEWIDTH); i++)
                   1692:     if (bufp->fastmap[i])
                   1693:       printchar (i);
                   1694:   printf ("\nAllowed by translate: ");
                   1695:   if (bufp->translate)
                   1696:     for (i = 0; i < (1 << BYTEWIDTH); i++)
                   1697:       if (bufp->translate[i])
                   1698:        printchar (i);
                   1699:   printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
                   1700:   printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
                   1701: }
                   1702: #endif
                   1703: 
                   1704: printchar (c)
                   1705:      char c;
                   1706: {
                   1707:   if (c < 041 || c >= 0177)
                   1708:     {
                   1709:       putchar ('\\');
                   1710:       putchar (((c >> 6) & 3) + '0');
                   1711:       putchar (((c >> 3) & 7) + '0');
                   1712:       putchar ((c & 7) + '0');
                   1713:     }
                   1714:   else
                   1715:     putchar (c);
                   1716: }
                   1717: 
                   1718: error (string)
                   1719:      char *string;
                   1720: {
                   1721:   puts (string);
                   1722:   exit (1);
                   1723: }
                   1724: 
                   1725: #endif test

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.