Annotation of GNUtools/emacs/src/regex.c, revision 1.1

1.1     ! root        1: /* Extended regular expression matching and search.
        !             2:    Copyright (C) 1985 Free Software Foundation, Inc.
        !             3: 
        !             4:     This program is free software; you can redistribute it and/or modify
        !             5:     it under the terms of the GNU General Public License as published by
        !             6:     the Free Software Foundation; either version 1, or (at your option)
        !             7:     any later version.
        !             8: 
        !             9:     This program is distributed in the hope that it will be useful,
        !            10:     but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            11:     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        !            12:     GNU General Public License for more details.
        !            13: 
        !            14:     You should have received a copy of the GNU General Public License
        !            15:     along with this program; if not, write to the Free Software
        !            16:     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
        !            17: 
        !            18: In other words, you are welcome to use, share and improve this program.
        !            19: You are forbidden to forbid anyone else to use, share and improve
        !            20: what you give them.   Help stamp out software-hoarding!  */
        !            21: 
        !            22: 
        !            23: /* To test, compile with -Dtest.
        !            24:  This Dtestable feature turns this into a self-contained program
        !            25:  which reads a pattern, describes how it compiles,
        !            26:  then reads a string and searches for it.  */
        !            27: 
        !            28: 
        !            29: #ifdef emacs
        !            30: 
        !            31: /* The `emacs' switch turns on certain special matching commands
        !            32:  that make sense only in emacs. */
        !            33: 
        !            34: #include "config.h"
        !            35: #include "lisp.h"
        !            36: #include "buffer.h"
        !            37: #include "syntax.h"
        !            38: 
        !            39: #else  /* not emacs */
        !            40: 
        !            41: /*
        !            42:  * Define the syntax stuff, so we can do the \<...\> things.
        !            43:  */
        !            44: 
        !            45: #ifndef Sword /* must be non-zero in some of the tests below... */
        !            46: #define Sword 1
        !            47: #endif
        !            48: 
        !            49: #define SYNTAX(c) re_syntax_table[c]
        !            50: 
        !            51: #ifdef SYNTAX_TABLE
        !            52: 
        !            53: char *re_syntax_table;
        !            54: 
        !            55: #else
        !            56: 
        !            57: static char re_syntax_table[256];
        !            58: 
        !            59: static void
        !            60: init_syntax_once ()
        !            61: {
        !            62:    register int c;
        !            63:    static int done = 0;
        !            64: 
        !            65:    if (done)
        !            66:      return;
        !            67: 
        !            68:    bzero (re_syntax_table, sizeof re_syntax_table);
        !            69: 
        !            70:    for (c = 'a'; c <= 'z'; c++)
        !            71:      re_syntax_table[c] = Sword;
        !            72: 
        !            73:    for (c = 'A'; c <= 'Z'; c++)
        !            74:      re_syntax_table[c] = Sword;
        !            75: 
        !            76:    for (c = '0'; c <= '9'; c++)
        !            77:      re_syntax_table[c] = Sword;
        !            78: 
        !            79:    done = 1;
        !            80: }
        !            81: 
        !            82: #endif /* SYNTAX_TABLE */
        !            83: #endif /* not emacs */
        !            84: 
        !            85: #include "regex.h"
        !            86: 
        !            87: /* Number of failure points to allocate space for initially,
        !            88:  when matching.  If this number is exceeded, more space is allocated,
        !            89:  so it is not a hard limit.  */
        !            90: 
        !            91: #ifndef NFAILURES
        !            92: #define NFAILURES 80
        !            93: #endif NFAILURES
        !            94: 
        !            95: /* width of a byte in bits */
        !            96: 
        !            97: #define BYTEWIDTH 8
        !            98: 
        !            99: #ifndef SIGN_EXTEND_CHAR
        !           100: #define SIGN_EXTEND_CHAR(x) (x)
        !           101: #endif
        !           102: 
        !           103: static int obscure_syntax = 0;
        !           104: 
        !           105: /* Specify the precise syntax of regexp for compilation.
        !           106:    This provides for compatibility for various utilities
        !           107:    which historically have different, incompatible syntaxes.
        !           108: 
        !           109:    The argument SYNTAX is a bit-mask containing the two bits
        !           110:    RE_NO_BK_PARENS and RE_NO_BK_VBAR.  */
        !           111: 
        !           112: int
        !           113: re_set_syntax (syntax)
        !           114: {
        !           115:   int ret;
        !           116: 
        !           117:   ret = obscure_syntax;
        !           118:   obscure_syntax = syntax;
        !           119:   return ret;
        !           120: }
        !           121: 
        !           122: /* re_compile_pattern takes a regular-expression string
        !           123:    and converts it into a buffer full of byte commands for matching.
        !           124: 
        !           125:   PATTERN   is the address of the pattern string
        !           126:   SIZE      is the length of it.
        !           127:   BUFP     is a  struct re_pattern_buffer *  which points to the info
        !           128:            on where to store the byte commands.
        !           129:            This structure contains a  char *  which points to the
        !           130:            actual space, which should have been obtained with malloc.
        !           131:            re_compile_pattern may use  realloc  to grow the buffer space.
        !           132: 
        !           133:   The number of bytes of commands can be found out by looking in
        !           134:   the  struct re_pattern_buffer  that bufp pointed to,
        !           135:   after re_compile_pattern returns.
        !           136: */
        !           137: 
        !           138: #define PATPUSH(ch) (*b++ = (char) (ch))
        !           139: 
        !           140: #define PATFETCH(c) \
        !           141:  {if (p == pend) goto end_of_pattern; \
        !           142:   c = * (unsigned char *) p++; \
        !           143:   if (translate) c = translate[c]; }
        !           144: 
        !           145: #define PATFETCH_RAW(c) \
        !           146:  {if (p == pend) goto end_of_pattern; \
        !           147:   c = * (unsigned char *) p++; }
        !           148: 
        !           149: #define PATUNFETCH p--
        !           150: 
        !           151: #define EXTEND_BUFFER \
        !           152:   { char *old_buffer = bufp->buffer; \
        !           153:     if (bufp->allocated == (1<<16)) goto too_big; \
        !           154:     bufp->allocated *= 2; \
        !           155:     if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
        !           156:     if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
        !           157:       goto memory_exhausted; \
        !           158:     c = bufp->buffer - old_buffer; \
        !           159:     b += c; \
        !           160:     if (fixup_jump) \
        !           161:       fixup_jump += c; \
        !           162:     if (laststart) \
        !           163:       laststart += c; \
        !           164:     begalt += c; \
        !           165:     if (pending_exact) \
        !           166:       pending_exact += c; \
        !           167:   }
        !           168: 
        !           169: static int store_jump (), insert_jump ();
        !           170: 
        !           171: char *
        !           172: re_compile_pattern (pattern, size, bufp)
        !           173:      char *pattern;
        !           174:      int size;
        !           175:      struct re_pattern_buffer *bufp;
        !           176: {
        !           177:   register char *b = bufp->buffer;
        !           178:   register char *p = pattern;
        !           179:   char *pend = pattern + size;
        !           180:   register unsigned c, c1;
        !           181:   char *p1;
        !           182:   unsigned char *translate = (unsigned char *) bufp->translate;
        !           183: 
        !           184:   /* address of the count-byte of the most recently inserted "exactn" command.
        !           185:     This makes it possible to tell whether a new exact-match character
        !           186:     can be added to that command or requires a new "exactn" command. */
        !           187:      
        !           188:   char *pending_exact = 0;
        !           189: 
        !           190:   /* address of the place where a forward-jump should go
        !           191:     to the end of the containing expression.
        !           192:     Each alternative of an "or", except the last, ends with a forward-jump
        !           193:     of this sort. */
        !           194: 
        !           195:   char *fixup_jump = 0;
        !           196: 
        !           197:   /* address of start of the most recently finished expression.
        !           198:     This tells postfix * where to find the start of its operand. */
        !           199: 
        !           200:   char *laststart = 0;
        !           201: 
        !           202:   /* In processing a repeat, 1 means zero matches is allowed */
        !           203: 
        !           204:   char zero_times_ok;
        !           205: 
        !           206:   /* In processing a repeat, 1 means many matches is allowed */
        !           207: 
        !           208:   char many_times_ok;
        !           209: 
        !           210:   /* address of beginning of regexp, or inside of last \( */
        !           211: 
        !           212:   char *begalt = b;
        !           213: 
        !           214:   /* Stack of information saved by \( and restored by \).
        !           215:      Four stack elements are pushed by each \(:
        !           216:        First, the value of b.
        !           217:        Second, the value of fixup_jump.
        !           218:        Third, the value of regnum.
        !           219:        Fourth, the value of begalt.  */
        !           220: 
        !           221:   int stackb[40];
        !           222:   int *stackp = stackb;
        !           223:   int *stacke = stackb + 40;
        !           224:   int *stackt;
        !           225: 
        !           226:   /* Counts \('s as they are encountered.  Remembered for the matching \),
        !           227:      where it becomes the "register number" to put in the stop_memory command */
        !           228: 
        !           229:   int regnum = 1;
        !           230: 
        !           231:   bufp->fastmap_accurate = 0;
        !           232: 
        !           233: #ifndef emacs
        !           234: #ifndef SYNTAX_TABLE
        !           235:   /*
        !           236:    * Initialize the syntax table.
        !           237:    */
        !           238:    init_syntax_once();
        !           239: #endif
        !           240: #endif
        !           241: 
        !           242:   if (bufp->allocated == 0)
        !           243:     {
        !           244:       bufp->allocated = 28;
        !           245:       if (bufp->buffer)
        !           246:        /* EXTEND_BUFFER loses when bufp->allocated is 0 */
        !           247:        bufp->buffer = (char *) realloc (bufp->buffer, 28);
        !           248:       else
        !           249:        /* Caller did not allocate a buffer.  Do it for him */
        !           250:        bufp->buffer = (char *) malloc (28);
        !           251:       if (!bufp->buffer) goto memory_exhausted;
        !           252:       begalt = b = bufp->buffer;
        !           253:     }
        !           254: 
        !           255:   while (p != pend)
        !           256:     {
        !           257:       if (b - bufp->buffer > bufp->allocated - 10)
        !           258:        /* Note that EXTEND_BUFFER clobbers c */
        !           259:        EXTEND_BUFFER;
        !           260: 
        !           261:       PATFETCH (c);
        !           262: 
        !           263:       switch (c)
        !           264:        {
        !           265:        case '$':
        !           266:          if (obscure_syntax & RE_TIGHT_VBAR)
        !           267:            {
        !           268:              if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)
        !           269:                goto normal_char;
        !           270:              /* Make operand of last vbar end before this `$'.  */
        !           271:              if (fixup_jump)
        !           272:                store_jump (fixup_jump, jump, b);
        !           273:              fixup_jump = 0;
        !           274:              PATPUSH (endline);
        !           275:              break;
        !           276:            }
        !           277: 
        !           278:          /* $ means succeed if at end of line, but only in special contexts.
        !           279:            If randomly in the middle of a pattern, it is a normal character. */
        !           280:          if (p == pend || *p == '\n'
        !           281:              || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
        !           282:              || (obscure_syntax & RE_NO_BK_PARENS
        !           283:                  ? *p == ')'
        !           284:                  : *p == '\\' && p[1] == ')')
        !           285:              || (obscure_syntax & RE_NO_BK_VBAR
        !           286:                  ? *p == '|'
        !           287:                  : *p == '\\' && p[1] == '|'))
        !           288:            {
        !           289:              PATPUSH (endline);
        !           290:              break;
        !           291:            }
        !           292:          goto normal_char;
        !           293: 
        !           294:        case '^':
        !           295:          /* ^ means succeed if at beg of line, but only if no preceding pattern. */
        !           296: 
        !           297:          if (laststart && p[-2] != '\n'
        !           298:              && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
        !           299:            goto normal_char;
        !           300:          if (obscure_syntax & RE_TIGHT_VBAR)
        !           301:            {
        !           302:              if (p != pattern + 1
        !           303:                  && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
        !           304:                goto normal_char;
        !           305:              PATPUSH (begline);
        !           306:              begalt = b;
        !           307:            }
        !           308:          else
        !           309:            PATPUSH (begline);
        !           310:          break;
        !           311: 
        !           312:        case '+':
        !           313:        case '?':
        !           314:          if (obscure_syntax & RE_BK_PLUS_QM)
        !           315:            goto normal_char;
        !           316:        handle_plus:
        !           317:        case '*':
        !           318:          /* If there is no previous pattern, char not special. */
        !           319:          if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
        !           320:            goto normal_char;
        !           321:          /* If there is a sequence of repetition chars,
        !           322:             collapse it down to equivalent to just one.  */
        !           323:          zero_times_ok = 0;
        !           324:          many_times_ok = 0;
        !           325:          while (1)
        !           326:            {
        !           327:              zero_times_ok |= c != '+';
        !           328:              many_times_ok |= c != '?';
        !           329:              if (p == pend)
        !           330:                break;
        !           331:              PATFETCH (c);
        !           332:              if (c == '*')
        !           333:                ;
        !           334:              else if (!(obscure_syntax & RE_BK_PLUS_QM)
        !           335:                       && (c == '+' || c == '?'))
        !           336:                ;
        !           337:              else if ((obscure_syntax & RE_BK_PLUS_QM)
        !           338:                       && c == '\\')
        !           339:                {
        !           340:                  int c1;
        !           341:                  PATFETCH (c1);
        !           342:                  if (!(c1 == '+' || c1 == '?'))
        !           343:                    {
        !           344:                      PATUNFETCH;
        !           345:                      PATUNFETCH;
        !           346:                      break;
        !           347:                    }
        !           348:                  c = c1;
        !           349:                }
        !           350:              else
        !           351:                {
        !           352:                  PATUNFETCH;
        !           353:                  break;
        !           354:                }
        !           355:            }
        !           356: 
        !           357:          /* Star, etc. applied to an empty pattern is equivalent
        !           358:             to an empty pattern.  */
        !           359:          if (!laststart)
        !           360:            break;
        !           361: 
        !           362:          /* Now we know whether 0 matches is allowed,
        !           363:             and whether 2 or more matches is allowed.  */
        !           364:          if (many_times_ok)
        !           365:            {
        !           366:              /* If more than one repetition is allowed,
        !           367:                 put in a backward jump at the end.  */
        !           368:              store_jump (b, maybe_finalize_jump, laststart - 3);
        !           369:              b += 3;
        !           370:            }
        !           371:          insert_jump (on_failure_jump, laststart, b + 3, b);
        !           372:          pending_exact = 0;
        !           373:          b += 3;
        !           374:          if (!zero_times_ok)
        !           375:            {
        !           376:              /* At least one repetition required: insert before the loop
        !           377:                 a skip over the initial on-failure-jump instruction */
        !           378:              insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
        !           379:              b += 3;
        !           380:            }
        !           381:          break;
        !           382: 
        !           383:        case '.':
        !           384:          laststart = b;
        !           385:          PATPUSH (anychar);
        !           386:          break;
        !           387: 
        !           388:        case '[':
        !           389:          while (b - bufp->buffer
        !           390:                 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
        !           391:            /* Note that EXTEND_BUFFER clobbers c */
        !           392:            EXTEND_BUFFER;
        !           393: 
        !           394:          laststart = b;
        !           395:          if (*p == '^')
        !           396:            PATPUSH (charset_not), p++;
        !           397:          else
        !           398:            PATPUSH (charset);
        !           399:          p1 = p;
        !           400: 
        !           401:          PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
        !           402:          /* Clear the whole map */
        !           403:          bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
        !           404:          /* Read in characters and ranges, setting map bits */
        !           405:          while (1)
        !           406:            {
        !           407:              PATFETCH (c);
        !           408:              if (c == ']' && p != p1 + 1) break;
        !           409:              if (*p == '-' && p[1] != ']')
        !           410:                {
        !           411:                  PATFETCH (c1);
        !           412:                  PATFETCH (c1);
        !           413:                  if (translate)
        !           414:                    while (c <= c1)
        !           415:                      {
        !           416:                        register unsigned char mapped_c = translate[c];
        !           417:                        b[mapped_c / BYTEWIDTH] |= 1 << (mapped_c % BYTEWIDTH);
        !           418:                        c++;
        !           419:                      }
        !           420:                  else
        !           421:                    while (c <= c1)
        !           422:                      b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
        !           423:                }
        !           424:              else
        !           425:                {
        !           426:                  if (translate)
        !           427:                    c = translate[c];
        !           428:                  b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
        !           429:                }
        !           430:            }
        !           431:          /* Discard any bitmap bytes that are all 0 at the end of the map.
        !           432:             Decrement the map-length byte too. */
        !           433:          while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
        !           434:            b[-1]--;
        !           435:          b += b[-1];
        !           436:          break;
        !           437: 
        !           438:        case '(':
        !           439:          if (! (obscure_syntax & RE_NO_BK_PARENS))
        !           440:            goto normal_char;
        !           441:          else
        !           442:            goto handle_open;
        !           443: 
        !           444:        case ')':
        !           445:          if (! (obscure_syntax & RE_NO_BK_PARENS))
        !           446:            goto normal_char;
        !           447:          else
        !           448:            goto handle_close;
        !           449: 
        !           450:        case '\n':
        !           451:          if (! (obscure_syntax & RE_NEWLINE_OR))
        !           452:            goto normal_char;
        !           453:          else
        !           454:            goto handle_bar;
        !           455: 
        !           456:        case '|':
        !           457:          if (! (obscure_syntax & RE_NO_BK_VBAR))
        !           458:            goto normal_char;
        !           459:          else
        !           460:            goto handle_bar;
        !           461: 
        !           462:         case '\\':
        !           463:          if (p == pend) goto invalid_pattern;
        !           464:          PATFETCH_RAW (c);
        !           465:          switch (c)
        !           466:            {
        !           467:            case '(':
        !           468:              if (obscure_syntax & RE_NO_BK_PARENS)
        !           469:                goto normal_backsl;
        !           470:            handle_open:
        !           471:              if (stackp == stacke) goto nesting_too_deep;
        !           472:              if (regnum < RE_NREGS)
        !           473:                {
        !           474:                  PATPUSH (start_memory);
        !           475:                  PATPUSH (regnum);
        !           476:                }
        !           477:              *stackp++ = b - bufp->buffer;
        !           478:              *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
        !           479:              *stackp++ = regnum++;
        !           480:              *stackp++ = begalt - bufp->buffer;
        !           481:              fixup_jump = 0;
        !           482:              laststart = 0;
        !           483:              begalt = b;
        !           484:              break;
        !           485: 
        !           486:            case ')':
        !           487:              if (obscure_syntax & RE_NO_BK_PARENS)
        !           488:                goto normal_backsl;
        !           489:            handle_close:
        !           490:              if (stackp == stackb) goto unmatched_close;
        !           491:              begalt = *--stackp + bufp->buffer;
        !           492:              if (fixup_jump)
        !           493:                store_jump (fixup_jump, jump, b);
        !           494:              if (stackp[-1] < RE_NREGS)
        !           495:                {
        !           496:                  PATPUSH (stop_memory);
        !           497:                  PATPUSH (stackp[-1]);
        !           498:                }
        !           499:              stackp -= 2;
        !           500:              fixup_jump = 0;
        !           501:              if (*stackp)
        !           502:                fixup_jump = *stackp + bufp->buffer - 1;
        !           503:              laststart = *--stackp + bufp->buffer;
        !           504:              break;
        !           505: 
        !           506:            case '|':
        !           507:              if (obscure_syntax & RE_NO_BK_VBAR)
        !           508:                goto normal_backsl;
        !           509:            handle_bar:
        !           510:              insert_jump (on_failure_jump, begalt, b + 6, b);
        !           511:              pending_exact = 0;
        !           512:              b += 3;
        !           513:              if (fixup_jump)
        !           514:                store_jump (fixup_jump, jump, b);
        !           515:              fixup_jump = b;
        !           516:              b += 3;
        !           517:              laststart = 0;
        !           518:              begalt = b;
        !           519:              break;
        !           520: 
        !           521: #ifdef emacs
        !           522:            case '=':
        !           523:              PATPUSH (at_dot);
        !           524:              break;
        !           525: 
        !           526:            case 's':   
        !           527:              laststart = b;
        !           528:              PATPUSH (syntaxspec);
        !           529:              PATFETCH (c);
        !           530:              PATPUSH (syntax_spec_code[c]);
        !           531:              break;
        !           532: 
        !           533:            case 'S':
        !           534:              laststart = b;
        !           535:              PATPUSH (notsyntaxspec);
        !           536:              PATFETCH (c);
        !           537:              PATPUSH (syntax_spec_code[c]);
        !           538:              break;
        !           539: #endif emacs
        !           540: 
        !           541:            case 'w':
        !           542:              laststart = b;
        !           543:              PATPUSH (wordchar);
        !           544:              break;
        !           545: 
        !           546:            case 'W':
        !           547:              laststart = b;
        !           548:              PATPUSH (notwordchar);
        !           549:              break;
        !           550: 
        !           551:            case '<':
        !           552:              PATPUSH (wordbeg);
        !           553:              break;
        !           554: 
        !           555:            case '>':
        !           556:              PATPUSH (wordend);
        !           557:              break;
        !           558: 
        !           559:            case 'b':
        !           560:              PATPUSH (wordbound);
        !           561:              break;
        !           562: 
        !           563:            case 'B':
        !           564:              PATPUSH (notwordbound);
        !           565:              break;
        !           566: 
        !           567:            case '`':
        !           568:              PATPUSH (begbuf);
        !           569:              break;
        !           570: 
        !           571:            case '\'':
        !           572:              PATPUSH (endbuf);
        !           573:              break;
        !           574: 
        !           575:            case '1':
        !           576:            case '2':
        !           577:            case '3':
        !           578:            case '4':
        !           579:            case '5':
        !           580:            case '6':
        !           581:            case '7':
        !           582:            case '8':
        !           583:            case '9':
        !           584:              c1 = c - '0';
        !           585:              if (c1 >= regnum)
        !           586:                goto normal_char;
        !           587:              for (stackt = stackp - 2;  stackt > stackb;  stackt -= 4)
        !           588:                if (*stackt == c1)
        !           589:                  goto normal_char;
        !           590:              laststart = b;
        !           591:              PATPUSH (duplicate);
        !           592:              PATPUSH (c1);
        !           593:              break;
        !           594: 
        !           595:            case '+':
        !           596:            case '?':
        !           597:              if (obscure_syntax & RE_BK_PLUS_QM)
        !           598:                goto handle_plus;
        !           599: 
        !           600:            default:
        !           601:            normal_backsl:
        !           602:              /* You might think it would be useful for \ to mean
        !           603:                 not to translate; but if we don't translate it
        !           604:                 it will never match anything.  */
        !           605:              if (translate) c = translate[c];
        !           606:              goto normal_char;
        !           607:            }
        !           608:          break;
        !           609: 
        !           610:        default:
        !           611:        normal_char:
        !           612:          if (!pending_exact || pending_exact + *pending_exact + 1 != b
        !           613:              || *pending_exact == 0177 || *p == '*' || *p == '^'
        !           614:              || ((obscure_syntax & RE_BK_PLUS_QM)
        !           615:                  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
        !           616:                  : (*p == '+' || *p == '?')))
        !           617:            {
        !           618:              laststart = b;
        !           619:              PATPUSH (exactn);
        !           620:              pending_exact = b;
        !           621:              PATPUSH (0);
        !           622:            }
        !           623:          PATPUSH (c);
        !           624:          (*pending_exact)++;
        !           625:        }
        !           626:     }
        !           627: 
        !           628:   if (fixup_jump)
        !           629:     store_jump (fixup_jump, jump, b);
        !           630: 
        !           631:   if (stackp != stackb) goto unmatched_open;
        !           632: 
        !           633:   bufp->used = b - bufp->buffer;
        !           634:   return 0;
        !           635: 
        !           636:  invalid_pattern:
        !           637:   return "Invalid regular expression";
        !           638: 
        !           639:  unmatched_open:
        !           640:   return "Unmatched \\(";
        !           641: 
        !           642:  unmatched_close:
        !           643:   return "Unmatched \\)";
        !           644: 
        !           645:  end_of_pattern:
        !           646:   return "Premature end of regular expression";
        !           647: 
        !           648:  nesting_too_deep:
        !           649:   return "Nesting too deep";
        !           650: 
        !           651:  too_big:
        !           652:   return "Regular expression too big";
        !           653: 
        !           654:  memory_exhausted:
        !           655:   return "Memory exhausted";
        !           656: }
        !           657: 
        !           658: /* Store where `from' points a jump operation to jump to where `to' points.
        !           659:   `opcode' is the opcode to store. */
        !           660: 
        !           661: static int
        !           662: store_jump (from, opcode, to)
        !           663:      char *from, *to;
        !           664:      char opcode;
        !           665: {
        !           666:   from[0] = opcode;
        !           667:   from[1] = (to - (from + 3)) & 0377;
        !           668:   from[2] = (to - (from + 3)) >> 8;
        !           669: }
        !           670: 
        !           671: /* Open up space at char FROM, and insert there a jump to TO.
        !           672:    CURRENT_END gives te end of the storage no in use,
        !           673:    so we know how much data to copy up.
        !           674:    OP is the opcode of the jump to insert.
        !           675: 
        !           676:    If you call this function, you must zero out pending_exact.  */
        !           677: 
        !           678: static int
        !           679: insert_jump (op, from, to, current_end)
        !           680:      char op;
        !           681:      char *from, *to, *current_end;
        !           682: {
        !           683:   register char *pto = current_end + 3;
        !           684:   register char *pfrom = current_end;
        !           685:   while (pfrom != from)
        !           686:     *--pto = *--pfrom;
        !           687:   store_jump (from, op, to);
        !           688: }
        !           689: 
        !           690: /* Given a pattern, compute a fastmap from it.
        !           691:  The fastmap records which of the (1 << BYTEWIDTH) possible characters
        !           692:  can start a string that matches the pattern.
        !           693:  This fastmap is used by re_search to skip quickly over totally implausible text.
        !           694: 
        !           695:  The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
        !           696:  as bufp->fastmap.
        !           697:  The other components of bufp describe the pattern to be used.  */
        !           698: 
        !           699: void
        !           700: re_compile_fastmap (bufp)
        !           701:      struct re_pattern_buffer *bufp;
        !           702: {
        !           703:   unsigned char *pattern = (unsigned char *) bufp->buffer;
        !           704:   int size = bufp->used;
        !           705:   register char *fastmap = bufp->fastmap;
        !           706:   register unsigned char *p = pattern;
        !           707:   register unsigned char *pend = pattern + size;
        !           708:   register int j, k;
        !           709:   unsigned char *translate = (unsigned char *) bufp->translate;
        !           710: 
        !           711:   unsigned char *stackb[NFAILURES];
        !           712:   unsigned char **stackp = stackb;
        !           713: 
        !           714:   bzero (fastmap, (1 << BYTEWIDTH));
        !           715:   bufp->fastmap_accurate = 1;
        !           716:   bufp->can_be_null = 0;
        !           717:       
        !           718:   while (p)
        !           719:     {
        !           720:       if (p == pend)
        !           721:        {
        !           722:          bufp->can_be_null = 1;
        !           723:          break;
        !           724:        }
        !           725: #ifdef SWITCH_ENUM_BUG
        !           726:       switch ((int) ((enum regexpcode) *p++))
        !           727: #else
        !           728:       switch ((enum regexpcode) *p++)
        !           729: #endif
        !           730:        {
        !           731:        case exactn:
        !           732:          if (translate)
        !           733:            fastmap[translate[p[1]]] = 1;
        !           734:          else
        !           735:            fastmap[p[1]] = 1;
        !           736:          break;
        !           737: 
        !           738:         case begline:
        !           739:         case before_dot:
        !           740:        case at_dot:
        !           741:        case after_dot:
        !           742:        case begbuf:
        !           743:        case endbuf:
        !           744:        case wordbound:
        !           745:        case notwordbound:
        !           746:        case wordbeg:
        !           747:        case wordend:
        !           748:          continue;
        !           749: 
        !           750:        case endline:
        !           751:          if (translate)
        !           752:            fastmap[translate['\n']] = 1;
        !           753:          else
        !           754:            fastmap['\n'] = 1;
        !           755:          if (bufp->can_be_null != 1)
        !           756:            bufp->can_be_null = 2;
        !           757:          break;
        !           758: 
        !           759:        case finalize_jump:
        !           760:        case maybe_finalize_jump:
        !           761:        case jump:
        !           762:        case dummy_failure_jump:
        !           763:          bufp->can_be_null = 1;
        !           764:          j = *p++ & 0377;
        !           765:          j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
        !           766:          p += j + 1;           /* The 1 compensates for missing ++ above */
        !           767:          if (j > 0)
        !           768:            continue;
        !           769:          /* Jump backward reached implies we just went through
        !           770:             the body of a loop and matched nothing.
        !           771:             Opcode jumped to should be an on_failure_jump.
        !           772:             Just treat it like an ordinary jump.
        !           773:             For a * loop, it has pushed its failure point already;
        !           774:             if so, discard that as redundant.  */
        !           775:          if ((enum regexpcode) *p != on_failure_jump)
        !           776:            continue;
        !           777:          p++;
        !           778:          j = *p++ & 0377;
        !           779:          j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
        !           780:          p += j + 1;           /* The 1 compensates for missing ++ above */
        !           781:          if (stackp != stackb && *stackp == p)
        !           782:            stackp--;
        !           783:          continue;
        !           784:          
        !           785:        case on_failure_jump:
        !           786:          j = *p++ & 0377;
        !           787:          j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
        !           788:          p++;
        !           789:          *++stackp = p + j;
        !           790:          continue;
        !           791: 
        !           792:        case start_memory:
        !           793:        case stop_memory:
        !           794:          p++;
        !           795:          continue;
        !           796: 
        !           797:        case duplicate:
        !           798:          bufp->can_be_null = 1;
        !           799:          fastmap['\n'] = 1;
        !           800:        case anychar:
        !           801:          for (j = 0; j < (1 << BYTEWIDTH); j++)
        !           802:            if (j != '\n')
        !           803:              fastmap[j] = 1;
        !           804:          if (bufp->can_be_null)
        !           805:            return;
        !           806:          /* Don't return; check the alternative paths
        !           807:             so we can set can_be_null if appropriate.  */
        !           808:          break;
        !           809: 
        !           810:        case wordchar:
        !           811:          for (j = 0; j < (1 << BYTEWIDTH); j++)
        !           812:            if (SYNTAX (j) == Sword)
        !           813:              fastmap[j] = 1;
        !           814:          break;
        !           815: 
        !           816:        case notwordchar:
        !           817:          for (j = 0; j < (1 << BYTEWIDTH); j++)
        !           818:            if (SYNTAX (j) != Sword)
        !           819:              fastmap[j] = 1;
        !           820:          break;
        !           821: 
        !           822: #ifdef emacs
        !           823:        case syntaxspec:
        !           824:          k = *p++;
        !           825:          for (j = 0; j < (1 << BYTEWIDTH); j++)
        !           826:            if (SYNTAX (j) == (enum syntaxcode) k)
        !           827:              fastmap[j] = 1;
        !           828:          break;
        !           829: 
        !           830:        case notsyntaxspec:
        !           831:          k = *p++;
        !           832:          for (j = 0; j < (1 << BYTEWIDTH); j++)
        !           833:            if (SYNTAX (j) != (enum syntaxcode) k)
        !           834:              fastmap[j] = 1;
        !           835:          break;
        !           836: #endif emacs
        !           837: 
        !           838:        case charset:
        !           839:          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
        !           840:            if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
        !           841:              {
        !           842:                if (translate)
        !           843:                  fastmap[translate[j]] = 1;
        !           844:                else
        !           845:                  fastmap[j] = 1;
        !           846:              }
        !           847:          break;
        !           848: 
        !           849:        case charset_not:
        !           850:          /* Chars beyond end of map must be allowed */
        !           851:          for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
        !           852:            if (translate)
        !           853:              fastmap[translate[j]] = 1;
        !           854:            else
        !           855:              fastmap[j] = 1;
        !           856: 
        !           857:          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
        !           858:            if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
        !           859:              {
        !           860:                if (translate)
        !           861:                  fastmap[translate[j]] = 1;
        !           862:                else
        !           863:                  fastmap[j] = 1;
        !           864:              }
        !           865:          break;
        !           866:        }
        !           867: 
        !           868:       /* Get here means we have successfully found the possible starting characters
        !           869:         of one path of the pattern.  We need not follow this path any farther.
        !           870:         Instead, look at the next alternative remembered in the stack. */
        !           871:       if (stackp != stackb)
        !           872:        p = *stackp--;
        !           873:       else
        !           874:        break;
        !           875:     }
        !           876: }
        !           877: 
        !           878: /* Like re_search_2, below, but only one string is specified. */
        !           879: 
        !           880: int
        !           881: re_search (pbufp, string, size, startpos, range, regs)
        !           882:      struct re_pattern_buffer *pbufp;
        !           883:      char *string;
        !           884:      int size, startpos, range;
        !           885:      struct re_registers *regs;
        !           886: {
        !           887:   return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
        !           888: }
        !           889: 
        !           890: /* Like re_match_2 but tries first a match starting at index STARTPOS,
        !           891:    then at STARTPOS + 1, and so on.
        !           892:    RANGE is the number of places to try before giving up.
        !           893:    If RANGE is negative, the starting positions tried are
        !           894:     STARTPOS, STARTPOS - 1, etc.
        !           895:    It is up to the caller to make sure that range is not so large
        !           896:    as to take the starting position outside of the input strings.
        !           897: 
        !           898: The value returned is the position at which the match was found,
        !           899:  or -1 if no match was found,
        !           900:  or -2 if error (such as failure stack overflow).  */
        !           901: 
        !           902: int
        !           903: re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
        !           904:      struct re_pattern_buffer *pbufp;
        !           905:      char *string1, *string2;
        !           906:      int size1, size2;
        !           907:      int startpos;
        !           908:      register int range;
        !           909:      struct re_registers *regs;
        !           910:      int mstop;
        !           911: {
        !           912:   register char *fastmap = pbufp->fastmap;
        !           913:   register unsigned char *translate = (unsigned char *) pbufp->translate;
        !           914:   int total = size1 + size2;
        !           915:   int val;
        !           916: 
        !           917:   /* Update the fastmap now if not correct already */
        !           918:   if (fastmap && !pbufp->fastmap_accurate)
        !           919:     re_compile_fastmap (pbufp);
        !           920:   
        !           921:   /* Don't waste time in a long search for a pattern
        !           922:      that says it is anchored.  */
        !           923:   if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
        !           924:       && range > 0)
        !           925:     {
        !           926:       if (startpos > 0)
        !           927:        return -1;
        !           928:       else
        !           929:        range = 1;
        !           930:     }
        !           931: 
        !           932:   while (1)
        !           933:     {
        !           934:       /* If a fastmap is supplied, skip quickly over characters
        !           935:         that cannot possibly be the start of a match.
        !           936:         Note, however, that if the pattern can possibly match
        !           937:         the null string, we must test it at each starting point
        !           938:         so that we take the first null string we get.  */
        !           939: 
        !           940:       if (fastmap && startpos < total && pbufp->can_be_null != 1)
        !           941:        {
        !           942:          if (range > 0)
        !           943:            {
        !           944:              register int lim = 0;
        !           945:              register unsigned char *p;
        !           946:              int irange = range;
        !           947:              if (startpos < size1 && startpos + range >= size1)
        !           948:                lim = range - (size1 - startpos);
        !           949: 
        !           950:              p = ((unsigned char *)
        !           951:                   &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
        !           952: 
        !           953:              if (translate)
        !           954:                {
        !           955:                  while (range > lim && !fastmap[translate[*p++]])
        !           956:                    range--;
        !           957:                }
        !           958:              else
        !           959:                {
        !           960:                  while (range > lim && !fastmap[*p++])
        !           961:                    range--;
        !           962:                }
        !           963:              startpos += irange - range;
        !           964:            }
        !           965:          else
        !           966:            {
        !           967:              register unsigned char c;
        !           968:              if (startpos >= size1)
        !           969:                c = string2[startpos - size1];
        !           970:              else
        !           971:                c = string1[startpos];
        !           972:              c &= 0xff;
        !           973:              if (translate ? !fastmap[translate[c]] : !fastmap[c])
        !           974:                goto advance;
        !           975:            }
        !           976:        }
        !           977: 
        !           978:       if (range >= 0 && startpos == total
        !           979:          && fastmap && pbufp->can_be_null == 0)
        !           980:        return -1;
        !           981: 
        !           982:       val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs,
        !           983:                        mstop);
        !           984:       /* Propagate error indication if worse than mere failure.  */
        !           985:       if (val == -2)
        !           986:        return -2;
        !           987:       /* Return position on success.  */
        !           988:       if (0 <= val)
        !           989:        return startpos;
        !           990: 
        !           991: #ifdef C_ALLOCA
        !           992:       alloca (0);
        !           993: #endif /* C_ALLOCA */
        !           994: 
        !           995:     advance:
        !           996:       if (!range) break;
        !           997:       if (range > 0) range--, startpos++; else range++, startpos--;
        !           998:     }
        !           999:   return -1;
        !          1000: }
        !          1001: 
        !          1002: #ifndef emacs   /* emacs never uses this */
        !          1003: int
        !          1004: re_match (pbufp, string, size, pos, regs)
        !          1005:      struct re_pattern_buffer *pbufp;
        !          1006:      char *string;
        !          1007:      int size, pos;
        !          1008:      struct re_registers *regs;
        !          1009: {
        !          1010:   return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
        !          1011: }
        !          1012: #endif /* emacs */
        !          1013: 
        !          1014: /* Maximum size of failure stack.  Beyond this, overflow is an error.  */
        !          1015: 
        !          1016: int re_max_failures = 2000;
        !          1017: 
        !          1018: static int bcmp_translate();
        !          1019: /* Match the pattern described by PBUFP
        !          1020:    against data which is the virtual concatenation of STRING1 and STRING2.
        !          1021:    SIZE1 and SIZE2 are the sizes of the two data strings.
        !          1022:    Start the match at position POS.
        !          1023:    Do not consider matching past the position MSTOP.
        !          1024: 
        !          1025:    If pbufp->fastmap is nonzero, then it had better be up to date.
        !          1026: 
        !          1027:    The reason that the data to match are specified as two components
        !          1028:    which are to be regarded as concatenated
        !          1029:    is so this function can be used directly on the contents of an Emacs buffer.
        !          1030: 
        !          1031:    -1 is returned if there is no match.  -2 is returned if there is
        !          1032:    an error (such as match stack overflow).  Otherwise the value is the length
        !          1033:    of the substring which was matched.  */
        !          1034: 
        !          1035: int
        !          1036: re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
        !          1037:      struct re_pattern_buffer *pbufp;
        !          1038:      unsigned char *string1, *string2;
        !          1039:      int size1, size2;
        !          1040:      int pos;
        !          1041:      struct re_registers *regs;
        !          1042:      int mstop;
        !          1043: {
        !          1044:   register unsigned char *p = (unsigned char *) pbufp->buffer;
        !          1045:   register unsigned char *pend = p + pbufp->used;
        !          1046:   /* End of first string */
        !          1047:   unsigned char *end1;
        !          1048:   /* End of second string */
        !          1049:   unsigned char *end2;
        !          1050:   /* Pointer just past last char to consider matching */
        !          1051:   unsigned char *end_match_1, *end_match_2;
        !          1052:   register unsigned char *d, *dend;
        !          1053:   register int mcnt;
        !          1054:   unsigned char *translate = (unsigned char *) pbufp->translate;
        !          1055: 
        !          1056:  /* Failure point stack.  Each place that can handle a failure further down the line
        !          1057:     pushes a failure point on this stack.  It consists of two char *'s.
        !          1058:     The first one pushed is where to resume scanning the pattern;
        !          1059:     the second pushed is where to resume scanning the strings.
        !          1060:     If the latter is zero, the failure point is a "dummy".
        !          1061:     If a failure happens and the innermost failure point is dormant,
        !          1062:     it discards that failure point and tries the next one. */
        !          1063: 
        !          1064:   unsigned char *initial_stack[2 * NFAILURES];
        !          1065:   unsigned char **stackb = initial_stack;
        !          1066:   unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
        !          1067: 
        !          1068:   /* Information on the "contents" of registers.
        !          1069:      These are pointers into the input strings; they record
        !          1070:      just what was matched (on this attempt) by some part of the pattern.
        !          1071:      The start_memory command stores the start of a register's contents
        !          1072:      and the stop_memory command stores the end.
        !          1073: 
        !          1074:      At that point, regstart[regnum] points to the first character in the register,
        !          1075:      regend[regnum] points to the first character beyond the end of the register,
        !          1076:      regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
        !          1077:      and regend_seg1[regnum] is true iff regend[regnum] points into string1.  */
        !          1078: 
        !          1079:   unsigned char *regstart[RE_NREGS];
        !          1080:   unsigned char *regend[RE_NREGS];
        !          1081:   unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
        !          1082: 
        !          1083:   /* Set up pointers to ends of strings.
        !          1084:      Don't allow the second string to be empty unless both are empty.  */
        !          1085:   if (!size2)
        !          1086:     {
        !          1087:       string2 = string1;
        !          1088:       size2 = size1;
        !          1089:       string1 = 0;
        !          1090:       size1 = 0;
        !          1091:     }
        !          1092:   end1 = string1 + size1;
        !          1093:   end2 = string2 + size2;
        !          1094: 
        !          1095:   /* Compute where to stop matching, within the two strings */
        !          1096:   if (mstop <= size1)
        !          1097:     {
        !          1098:       end_match_1 = string1 + mstop;
        !          1099:       end_match_2 = string2;
        !          1100:     }
        !          1101:   else
        !          1102:     {
        !          1103:       end_match_1 = end1;
        !          1104:       end_match_2 = string2 + mstop - size1;
        !          1105:     }
        !          1106: 
        !          1107:   /* Initialize \) text positions to -1
        !          1108:      to mark ones that no \( or \) has been seen for.  */
        !          1109: 
        !          1110:   for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++)
        !          1111:     regend[mcnt] = (unsigned char *) -1;
        !          1112: 
        !          1113:   /* `p' scans through the pattern as `d' scans through the data.
        !          1114:      `dend' is the end of the input string that `d' points within.
        !          1115:      `d' is advanced into the following input string whenever necessary,
        !          1116:      but this happens before fetching;
        !          1117:      therefore, at the beginning of the loop,
        !          1118:      `d' can be pointing at the end of a string,
        !          1119:      but it cannot equal string2.  */
        !          1120: 
        !          1121:   if (pos <= size1)
        !          1122:     d = string1 + pos, dend = end_match_1;
        !          1123:   else
        !          1124:     d = string2 + pos - size1, dend = end_match_2;
        !          1125: 
        !          1126: /* Write PREFETCH; just before fetching a character with *d.  */
        !          1127: #define PREFETCH \
        !          1128:  while (d == dend)                                                 \
        !          1129:   { if (dend == end_match_2) goto fail;  /* end of string2 => failure */   \
        !          1130:     d = string2;  /* end of string1 => advance to string2. */       \
        !          1131:     dend = end_match_2; }
        !          1132: 
        !          1133:   /* This loop loops over pattern commands.
        !          1134:      It exits by returning from the function if match is complete,
        !          1135:      or it drops through if match fails at this starting point in the input data. */
        !          1136: 
        !          1137:   while (1)
        !          1138:     {
        !          1139:       if (p == pend)
        !          1140:        /* End of pattern means we have succeeded! */
        !          1141:        {
        !          1142:          /* If caller wants register contents data back, convert it to indices */
        !          1143:          if (regs)
        !          1144:            {
        !          1145:              regs->start[0] = pos;
        !          1146:              if (dend == end_match_1)
        !          1147:                regs->end[0] = d - string1;
        !          1148:              else
        !          1149:                regs->end[0] = d - string2 + size1;
        !          1150:              for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
        !          1151:                {
        !          1152:                  if (regend[mcnt] == (unsigned char *) -1)
        !          1153:                    {
        !          1154:                      regs->start[mcnt] = -1;
        !          1155:                      regs->end[mcnt] = -1;
        !          1156:                      continue;
        !          1157:                    }
        !          1158:                  if (regstart_seg1[mcnt])
        !          1159:                    regs->start[mcnt] = regstart[mcnt] - string1;
        !          1160:                  else
        !          1161:                    regs->start[mcnt] = regstart[mcnt] - string2 + size1;
        !          1162:                  if (regend_seg1[mcnt])
        !          1163:                    regs->end[mcnt] = regend[mcnt] - string1;
        !          1164:                  else
        !          1165:                    regs->end[mcnt] = regend[mcnt] - string2 + size1;
        !          1166:                }
        !          1167:            }
        !          1168:          if (dend == end_match_1)
        !          1169:            return (d - string1 - pos);
        !          1170:          else
        !          1171:            return d - string2 + size1 - pos;
        !          1172:        }
        !          1173: 
        !          1174:       /* Otherwise match next pattern command */
        !          1175: #ifdef SWITCH_ENUM_BUG
        !          1176:       switch ((int) ((enum regexpcode) *p++))
        !          1177: #else
        !          1178:       switch ((enum regexpcode) *p++)
        !          1179: #endif
        !          1180:        {
        !          1181: 
        !          1182:        /* \( is represented by a start_memory, \) by a stop_memory.
        !          1183:            Both of those commands contain a "register number" argument.
        !          1184:            The text matched within the \( and \) is recorded under that number.
        !          1185:            Then, \<digit> turns into a `duplicate' command which
        !          1186:            is followed by the numeric value of <digit> as the register number. */
        !          1187: 
        !          1188:        case start_memory:
        !          1189:          regstart[*p] = d;
        !          1190:          regstart_seg1[*p++] = (dend == end_match_1);
        !          1191:          break;
        !          1192: 
        !          1193:        case stop_memory:
        !          1194:          regend[*p] = d;
        !          1195:          regend_seg1[*p++] = (dend == end_match_1);
        !          1196:          break;
        !          1197: 
        !          1198:        case duplicate:
        !          1199:          {
        !          1200:            int regno = *p++;   /* Get which register to match against */
        !          1201:            register unsigned char *d2, *dend2;
        !          1202: 
        !          1203:            /* Don't allow matching a register that hasn't been used.
        !          1204:               This isn't fully reliable in the current version,
        !          1205:               but it is better than crashing.  */
        !          1206:            if ((int) regend[regno] == -1)
        !          1207:              goto fail;
        !          1208: 
        !          1209:            d2 = regstart[regno];
        !          1210:            dend2 = ((regstart_seg1[regno] == regend_seg1[regno])
        !          1211:                     ? regend[regno] : end_match_1);
        !          1212:            while (1)
        !          1213:              {
        !          1214:                /* Advance to next segment in register contents, if necessary */
        !          1215:                while (d2 == dend2)
        !          1216:                  {
        !          1217:                    if (dend2 == end_match_2) break;
        !          1218:                    if (dend2 == regend[regno]) break;
        !          1219:                    d2 = string2, dend2 = regend[regno];  /* end of string1 => advance to string2. */
        !          1220:                  }
        !          1221:                /* At end of register contents => success */
        !          1222:                if (d2 == dend2) break;
        !          1223: 
        !          1224:                /* Advance to next segment in data being matched, if necessary */
        !          1225:                PREFETCH;
        !          1226: 
        !          1227:                /* mcnt gets # consecutive chars to compare */
        !          1228:                mcnt = dend - d;
        !          1229:                if (mcnt > dend2 - d2)
        !          1230:                  mcnt = dend2 - d2;
        !          1231:                /* Compare that many; failure if mismatch, else skip them. */
        !          1232:                if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
        !          1233:                  goto fail;
        !          1234:                d += mcnt, d2 += mcnt;
        !          1235:              }
        !          1236:          }
        !          1237:          break;
        !          1238: 
        !          1239:        case anychar:
        !          1240:          /* fetch a data character */
        !          1241:          PREFETCH;
        !          1242:          /* Match anything but a newline.  */
        !          1243:          if ((translate ? translate[*d++] : *d++) == '\n')
        !          1244:            goto fail;
        !          1245:          break;
        !          1246: 
        !          1247:        case charset:
        !          1248:        case charset_not:
        !          1249:          {
        !          1250:            /* Nonzero for charset_not */
        !          1251:            int not = 0;
        !          1252:            register int c;
        !          1253:            if (*(p - 1) == (unsigned char) charset_not)
        !          1254:              not = 1;
        !          1255: 
        !          1256:            /* fetch a data character */
        !          1257:            PREFETCH;
        !          1258: 
        !          1259:            if (translate)
        !          1260:              c = translate [*d];
        !          1261:            else
        !          1262:              c = *d;
        !          1263: 
        !          1264:            if (c < *p * BYTEWIDTH
        !          1265:                && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
        !          1266:              not = !not;
        !          1267: 
        !          1268:            p += 1 + *p;
        !          1269: 
        !          1270:            if (!not) goto fail;
        !          1271:            d++;
        !          1272:            break;
        !          1273:          }
        !          1274: 
        !          1275:        case begline:
        !          1276:          if (d == string1 || d[-1] == '\n')
        !          1277:            break;
        !          1278:          goto fail;
        !          1279: 
        !          1280:        case endline:
        !          1281:          if (d == end2
        !          1282:              || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
        !          1283:            break;
        !          1284:          goto fail;
        !          1285: 
        !          1286:        /* "or" constructs ("|") are handled by starting each alternative
        !          1287:            with an on_failure_jump that points to the start of the next alternative.
        !          1288:            Each alternative except the last ends with a jump to the joining point.
        !          1289:            (Actually, each jump except for the last one really jumps
        !          1290:             to the following jump, because tensioning the jumps is a hassle.) */
        !          1291: 
        !          1292:        /* The start of a stupid repeat has an on_failure_jump that points
        !          1293:           past the end of the repeat text.
        !          1294:           This makes a failure point so that, on failure to match a repetition,
        !          1295:           matching restarts past as many repetitions have been found
        !          1296:           with no way to fail and look for another one.  */
        !          1297: 
        !          1298:        /* A smart repeat is similar but loops back to the on_failure_jump
        !          1299:           so that each repetition makes another failure point. */
        !          1300: 
        !          1301:        case on_failure_jump:
        !          1302:          if (stackp == stacke)
        !          1303:            {
        !          1304:              unsigned char **stackx;
        !          1305:              if (stacke - stackb > re_max_failures)
        !          1306:                return -2;
        !          1307:              stackx = (unsigned char **) alloca (2 * (stacke - stackb)
        !          1308:                                         * sizeof (char *));
        !          1309:              bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
        !          1310:              stackp = stackx + (stackp - stackb);
        !          1311:              stacke = stackx + 2 * (stacke - stackb);
        !          1312:              stackb = stackx;
        !          1313:            }
        !          1314:          mcnt = *p++ & 0377;
        !          1315:          mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
        !          1316:          p++;
        !          1317:          *stackp++ = mcnt + p;
        !          1318:          *stackp++ = d;
        !          1319:          break;
        !          1320: 
        !          1321:        /* The end of a smart repeat has an maybe_finalize_jump back.
        !          1322:           Change it either to a finalize_jump or an ordinary jump. */
        !          1323: 
        !          1324:        case maybe_finalize_jump:
        !          1325:          mcnt = *p++ & 0377;
        !          1326:          mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
        !          1327:          p++;
        !          1328:          /* Compare what follows with the begining of the repeat.
        !          1329:             If we can establish that there is nothing that they would
        !          1330:             both match, we can change to finalize_jump */
        !          1331:          if (p == pend)
        !          1332:            p[-3] = (unsigned char) finalize_jump;
        !          1333:          else if (*p == (unsigned char) exactn
        !          1334:                   || *p == (unsigned char) endline)
        !          1335:            {
        !          1336:              register int c = *p == (unsigned char) endline ? '\n' : p[2];
        !          1337:              register unsigned char *p1 = p + mcnt;
        !          1338:              /* p1[0] ... p1[2] are an on_failure_jump.
        !          1339:                 Examine what follows that */
        !          1340:              if (p1[3] == (unsigned char) exactn && p1[5] != c)
        !          1341:                p[-3] = (unsigned char) finalize_jump;
        !          1342:              else if (p1[3] == (unsigned char) charset
        !          1343:                       || p1[3] == (unsigned char) charset_not)
        !          1344:                {
        !          1345:                  int not = p1[3] == (unsigned char) charset_not;
        !          1346:                  if (c < p1[4] * BYTEWIDTH
        !          1347:                      && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
        !          1348:                    not = !not;
        !          1349:                  /* not is 1 if c would match */
        !          1350:                  /* That means it is not safe to finalize */
        !          1351:                  if (!not)
        !          1352:                    p[-3] = (unsigned char) finalize_jump;
        !          1353:                }
        !          1354:            }
        !          1355:          p -= 2;
        !          1356:          if (p[-1] != (unsigned char) finalize_jump)
        !          1357:            {
        !          1358:              p[-1] = (unsigned char) jump;
        !          1359:              goto nofinalize;
        !          1360:            }
        !          1361: 
        !          1362:        /* The end of a stupid repeat has a finalize-jump
        !          1363:           back to the start, where another failure point will be made
        !          1364:           which will point after all the repetitions found so far. */
        !          1365: 
        !          1366:        case finalize_jump:
        !          1367:          stackp -= 2;
        !          1368: 
        !          1369:        case jump:
        !          1370:        nofinalize:
        !          1371:          mcnt = *p++ & 0377;
        !          1372:          mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
        !          1373:          p += mcnt + 1;        /* The 1 compensates for missing ++ above */
        !          1374:          break;
        !          1375: 
        !          1376:        case dummy_failure_jump:
        !          1377:          if (stackp == stacke)
        !          1378:            {
        !          1379:              unsigned char **stackx
        !          1380:                = (unsigned char **) alloca (2 * (stacke - stackb)
        !          1381:                                             * sizeof (char *));
        !          1382:              bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
        !          1383:              stackp = stackx + (stackp - stackb);
        !          1384:              stacke = stackx + 2 * (stacke - stackb);
        !          1385:              stackb = stackx;
        !          1386:            }
        !          1387:          *stackp++ = 0;
        !          1388:          *stackp++ = 0;
        !          1389:          goto nofinalize;
        !          1390: 
        !          1391:        case wordbound:
        !          1392:          if (d == string1  /* Points to first char */
        !          1393:              || d == end2  /* Points to end */
        !          1394:              || (d == end1 && size2 == 0)) /* Points to end */
        !          1395:            break;
        !          1396:          if ((SYNTAX (d[-1]) == Sword)
        !          1397:              != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
        !          1398:            break;
        !          1399:          goto fail;
        !          1400: 
        !          1401:        case notwordbound:
        !          1402:          if (d == string1  /* Points to first char */
        !          1403:              || d == end2  /* Points to end */
        !          1404:              || (d == end1 && size2 == 0)) /* Points to end */
        !          1405:            goto fail;
        !          1406:          if ((SYNTAX (d[-1]) == Sword)
        !          1407:              != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
        !          1408:            goto fail;
        !          1409:          break;
        !          1410: 
        !          1411:        case wordbeg:
        !          1412:          if (d == end2  /* Points to end */
        !          1413:              || (d == end1 && size2 == 0) /* Points to end */
        !          1414:              || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
        !          1415:            goto fail;
        !          1416:          if (d == string1  /* Points to first char */
        !          1417:              || SYNTAX (d[-1]) != Sword)  /* prev char not letter */
        !          1418:            break;
        !          1419:          goto fail;
        !          1420: 
        !          1421:        case wordend:
        !          1422:          if (d == string1  /* Points to first char */
        !          1423:              || SYNTAX (d[-1]) != Sword)  /* prev char not letter */
        !          1424:            goto fail;
        !          1425:          if (d == end2  /* Points to end */
        !          1426:              || (d == end1 && size2 == 0) /* Points to end */
        !          1427:              || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
        !          1428:            break;
        !          1429:          goto fail;
        !          1430: 
        !          1431: #ifdef emacs
        !          1432:        case before_dot:
        !          1433:          if (PTR_CHAR_POS (d) + 1 >= point)
        !          1434:            goto fail;
        !          1435:          break;
        !          1436: 
        !          1437:        case at_dot:
        !          1438:          if (PTR_CHAR_POS (d) + 1 != point)
        !          1439:            goto fail;
        !          1440:          break;
        !          1441: 
        !          1442:        case after_dot:
        !          1443:          if (PTR_CHAR_POS (d) + 1 <= point)
        !          1444:            goto fail;
        !          1445:          break;
        !          1446: 
        !          1447:        case wordchar:
        !          1448:          mcnt = (int) Sword;
        !          1449:          goto matchsyntax;
        !          1450: 
        !          1451:        case syntaxspec:
        !          1452:          mcnt = *p++;
        !          1453:        matchsyntax:
        !          1454:          PREFETCH;
        !          1455:          if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
        !          1456:          break;
        !          1457:          
        !          1458:        case notwordchar:
        !          1459:          mcnt = (int) Sword;
        !          1460:          goto matchnotsyntax;
        !          1461: 
        !          1462:        case notsyntaxspec:
        !          1463:          mcnt = *p++;
        !          1464:        matchnotsyntax:
        !          1465:          PREFETCH;
        !          1466:          if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
        !          1467:          break;
        !          1468: #else
        !          1469:        case wordchar:
        !          1470:          PREFETCH;
        !          1471:          if (SYNTAX (*d++) == 0) goto fail;
        !          1472:          break;
        !          1473:          
        !          1474:        case notwordchar:
        !          1475:          PREFETCH;
        !          1476:          if (SYNTAX (*d++) != 0) goto fail;
        !          1477:          break;
        !          1478: #endif not emacs
        !          1479: 
        !          1480:        case begbuf:
        !          1481:          if (d == string1)     /* Note, d cannot equal string2 */
        !          1482:            break;              /* unless string1 == string2.  */
        !          1483:          goto fail;
        !          1484: 
        !          1485:        case endbuf:
        !          1486:          if (d == end2 || (d == end1 && size2 == 0))
        !          1487:            break;
        !          1488:          goto fail;
        !          1489: 
        !          1490:        case exactn:
        !          1491:          /* Match the next few pattern characters exactly.
        !          1492:             mcnt is how many characters to match. */
        !          1493:          mcnt = *p++;
        !          1494:          if (translate)
        !          1495:            {
        !          1496:              do
        !          1497:                {
        !          1498:                  PREFETCH;
        !          1499:                  if (translate[*d++] != *p++) goto fail;
        !          1500:                }
        !          1501:              while (--mcnt);
        !          1502:            }
        !          1503:          else
        !          1504:            {
        !          1505:              do
        !          1506:                {
        !          1507:                  PREFETCH;
        !          1508:                  if (*d++ != *p++) goto fail;
        !          1509:                }
        !          1510:              while (--mcnt);
        !          1511:            }
        !          1512:          break;
        !          1513:        }
        !          1514:       continue;    /* Successfully matched one pattern command; keep matching */
        !          1515: 
        !          1516:       /* Jump here if any matching operation fails. */
        !          1517:     fail:
        !          1518:       if (stackp != stackb)
        !          1519:        /* A restart point is known.  Restart there and pop it. */
        !          1520:        {
        !          1521:          if (!stackp[-2])
        !          1522:            {   /* If innermost failure point is dormant, flush it and keep looking */
        !          1523:              stackp -= 2;
        !          1524:              goto fail;
        !          1525:            }
        !          1526:          d = *--stackp;
        !          1527:          p = *--stackp;
        !          1528:          if (d >= string1 && d <= end1)
        !          1529:            dend = end_match_1;
        !          1530:        }
        !          1531:       else break;   /* Matching at this starting point really fails! */
        !          1532:     }
        !          1533:   return -1;         /* Failure to match */
        !          1534: }
        !          1535: 
        !          1536: static int
        !          1537: bcmp_translate (s1, s2, len, translate)
        !          1538:      unsigned char *s1, *s2;
        !          1539:      register int len;
        !          1540:      unsigned char *translate;
        !          1541: {
        !          1542:   register unsigned char *p1 = s1, *p2 = s2;
        !          1543:   while (len)
        !          1544:     {
        !          1545:       if (translate [*p1++] != translate [*p2++]) return 1;
        !          1546:       len--;
        !          1547:     }
        !          1548:   return 0;
        !          1549: }
        !          1550: 
        !          1551: /* Entry points compatible with bsd4.2 regex library */
        !          1552: 
        !          1553: #ifndef emacs
        !          1554: 
        !          1555: static struct re_pattern_buffer re_comp_buf;
        !          1556: 
        !          1557: char *
        !          1558: re_comp (s)
        !          1559:      char *s;
        !          1560: {
        !          1561:   if (!s)
        !          1562:     {
        !          1563:       if (!re_comp_buf.buffer)
        !          1564:        return "No previous regular expression";
        !          1565:       return 0;
        !          1566:     }
        !          1567: 
        !          1568:   if (!re_comp_buf.buffer)
        !          1569:     {
        !          1570:       if (!(re_comp_buf.buffer = (char *) malloc (200)))
        !          1571:        return "Memory exhausted";
        !          1572:       re_comp_buf.allocated = 200;
        !          1573:       if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
        !          1574:        return "Memory exhausted";
        !          1575:     }
        !          1576:   return re_compile_pattern (s, strlen (s), &re_comp_buf);
        !          1577: }
        !          1578: 
        !          1579: int
        !          1580: re_exec (s)
        !          1581:      char *s;
        !          1582: {
        !          1583:   int len = strlen (s);
        !          1584:   return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
        !          1585: }
        !          1586: 
        !          1587: #endif /* emacs */
        !          1588: 
        !          1589: #ifdef test
        !          1590: 
        !          1591: #include <stdio.h>
        !          1592: 
        !          1593: /* Indexed by a character, gives the upper case equivalent of the character */
        !          1594: 
        !          1595: static char upcase[0400] = 
        !          1596:   { 000, 001, 002, 003, 004, 005, 006, 007,
        !          1597:     010, 011, 012, 013, 014, 015, 016, 017,
        !          1598:     020, 021, 022, 023, 024, 025, 026, 027,
        !          1599:     030, 031, 032, 033, 034, 035, 036, 037,
        !          1600:     040, 041, 042, 043, 044, 045, 046, 047,
        !          1601:     050, 051, 052, 053, 054, 055, 056, 057,
        !          1602:     060, 061, 062, 063, 064, 065, 066, 067,
        !          1603:     070, 071, 072, 073, 074, 075, 076, 077,
        !          1604:     0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
        !          1605:     0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
        !          1606:     0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
        !          1607:     0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
        !          1608:     0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
        !          1609:     0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
        !          1610:     0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
        !          1611:     0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
        !          1612:     0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
        !          1613:     0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
        !          1614:     0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
        !          1615:     0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
        !          1616:     0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
        !          1617:     0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
        !          1618:     0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
        !          1619:     0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
        !          1620:     0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
        !          1621:     0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
        !          1622:     0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
        !          1623:     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
        !          1624:     0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
        !          1625:     0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
        !          1626:     0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
        !          1627:     0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
        !          1628:   };
        !          1629: 
        !          1630: main (argc, argv)
        !          1631:      int argc;
        !          1632:      char **argv;
        !          1633: {
        !          1634:   char pat[80];
        !          1635:   struct re_pattern_buffer buf;
        !          1636:   int i;
        !          1637:   char c;
        !          1638:   char fastmap[(1 << BYTEWIDTH)];
        !          1639: 
        !          1640:   /* Allow a command argument to specify the style of syntax.  */
        !          1641:   if (argc > 1)
        !          1642:     obscure_syntax = atoi (argv[1]);
        !          1643: 
        !          1644:   buf.allocated = 40;
        !          1645:   buf.buffer = (char *) malloc (buf.allocated);
        !          1646:   buf.fastmap = fastmap;
        !          1647:   buf.translate = upcase;
        !          1648: 
        !          1649:   while (1)
        !          1650:     {
        !          1651:       gets (pat);
        !          1652: 
        !          1653:       if (*pat)
        !          1654:        {
        !          1655:           re_compile_pattern (pat, strlen(pat), &buf);
        !          1656: 
        !          1657:          for (i = 0; i < buf.used; i++)
        !          1658:            printchar (buf.buffer[i]);
        !          1659: 
        !          1660:          putchar ('\n');
        !          1661: 
        !          1662:          printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
        !          1663: 
        !          1664:          re_compile_fastmap (&buf);
        !          1665:          printf ("Allowed by fastmap: ");
        !          1666:          for (i = 0; i < (1 << BYTEWIDTH); i++)
        !          1667:            if (fastmap[i]) printchar (i);
        !          1668:          putchar ('\n');
        !          1669:        }
        !          1670: 
        !          1671:       gets (pat);      /* Now read the string to match against */
        !          1672: 
        !          1673:       i = re_match (&buf, pat, strlen (pat), 0, 0);
        !          1674:       printf ("Match value %d.\n", i);
        !          1675:     }
        !          1676: }
        !          1677: 
        !          1678: #ifdef NOTDEF
        !          1679: print_buf (bufp)
        !          1680:      struct re_pattern_buffer *bufp;
        !          1681: {
        !          1682:   int i;
        !          1683: 
        !          1684:   printf ("buf is :\n----------------\n");
        !          1685:   for (i = 0; i < bufp->used; i++)
        !          1686:     printchar (bufp->buffer[i]);
        !          1687:   
        !          1688:   printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
        !          1689:   
        !          1690:   printf ("Allowed by fastmap: ");
        !          1691:   for (i = 0; i < (1 << BYTEWIDTH); i++)
        !          1692:     if (bufp->fastmap[i])
        !          1693:       printchar (i);
        !          1694:   printf ("\nAllowed by translate: ");
        !          1695:   if (bufp->translate)
        !          1696:     for (i = 0; i < (1 << BYTEWIDTH); i++)
        !          1697:       if (bufp->translate[i])
        !          1698:        printchar (i);
        !          1699:   printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
        !          1700:   printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
        !          1701: }
        !          1702: #endif
        !          1703: 
        !          1704: printchar (c)
        !          1705:      char c;
        !          1706: {
        !          1707:   if (c < 041 || c >= 0177)
        !          1708:     {
        !          1709:       putchar ('\\');
        !          1710:       putchar (((c >> 6) & 3) + '0');
        !          1711:       putchar (((c >> 3) & 7) + '0');
        !          1712:       putchar ((c & 7) + '0');
        !          1713:     }
        !          1714:   else
        !          1715:     putchar (c);
        !          1716: }
        !          1717: 
        !          1718: error (string)
        !          1719:      char *string;
        !          1720: {
        !          1721:   puts (string);
        !          1722:   exit (1);
        !          1723: }
        !          1724: 
        !          1725: #endif test

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.