|
|
1.1 ! root 1: /* Extended regular expression matching and search. ! 2: Copyright (C) 1985 Free Software Foundation, Inc. ! 3: ! 4: This program is free software; you can redistribute it and/or modify ! 5: it under the terms of the GNU General Public License as published by ! 6: the Free Software Foundation; either version 1, or (at your option) ! 7: any later version. ! 8: ! 9: This program is distributed in the hope that it will be useful, ! 10: but WITHOUT ANY WARRANTY; without even the implied warranty of ! 11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! 12: GNU General Public License for more details. ! 13: ! 14: You should have received a copy of the GNU General Public License ! 15: along with this program; if not, write to the Free Software ! 16: Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ! 17: ! 18: In other words, you are welcome to use, share and improve this program. ! 19: You are forbidden to forbid anyone else to use, share and improve ! 20: what you give them. Help stamp out software-hoarding! */ ! 21: ! 22: ! 23: /* To test, compile with -Dtest. ! 24: This Dtestable feature turns this into a self-contained program ! 25: which reads a pattern, describes how it compiles, ! 26: then reads a string and searches for it. */ ! 27: ! 28: ! 29: #ifdef emacs ! 30: ! 31: /* The `emacs' switch turns on certain special matching commands ! 32: that make sense only in emacs. */ ! 33: ! 34: #include "config.h" ! 35: #include "lisp.h" ! 36: #include "buffer.h" ! 37: #include "syntax.h" ! 38: ! 39: #else /* not emacs */ ! 40: ! 41: /* ! 42: * Define the syntax stuff, so we can do the \<...\> things. ! 43: */ ! 44: ! 45: #ifndef Sword /* must be non-zero in some of the tests below... */ ! 46: #define Sword 1 ! 47: #endif ! 48: ! 49: #define SYNTAX(c) re_syntax_table[c] ! 50: ! 51: #ifdef SYNTAX_TABLE ! 52: ! 53: char *re_syntax_table; ! 54: ! 55: #else ! 56: ! 57: static char re_syntax_table[256]; ! 58: ! 59: static void ! 60: init_syntax_once () ! 61: { ! 62: register int c; ! 63: static int done = 0; ! 64: ! 65: if (done) ! 66: return; ! 67: ! 68: bzero (re_syntax_table, sizeof re_syntax_table); ! 69: ! 70: for (c = 'a'; c <= 'z'; c++) ! 71: re_syntax_table[c] = Sword; ! 72: ! 73: for (c = 'A'; c <= 'Z'; c++) ! 74: re_syntax_table[c] = Sword; ! 75: ! 76: for (c = '0'; c <= '9'; c++) ! 77: re_syntax_table[c] = Sword; ! 78: ! 79: done = 1; ! 80: } ! 81: ! 82: #endif /* SYNTAX_TABLE */ ! 83: #endif /* not emacs */ ! 84: ! 85: #include "regex.h" ! 86: ! 87: /* Number of failure points to allocate space for initially, ! 88: when matching. If this number is exceeded, more space is allocated, ! 89: so it is not a hard limit. */ ! 90: ! 91: #ifndef NFAILURES ! 92: #define NFAILURES 80 ! 93: #endif NFAILURES ! 94: ! 95: /* width of a byte in bits */ ! 96: ! 97: #define BYTEWIDTH 8 ! 98: ! 99: #ifndef SIGN_EXTEND_CHAR ! 100: #define SIGN_EXTEND_CHAR(x) (x) ! 101: #endif ! 102: ! 103: static int obscure_syntax = 0; ! 104: ! 105: /* Specify the precise syntax of regexp for compilation. ! 106: This provides for compatibility for various utilities ! 107: which historically have different, incompatible syntaxes. ! 108: ! 109: The argument SYNTAX is a bit-mask containing the two bits ! 110: RE_NO_BK_PARENS and RE_NO_BK_VBAR. */ ! 111: ! 112: int ! 113: re_set_syntax (syntax) ! 114: { ! 115: int ret; ! 116: ! 117: ret = obscure_syntax; ! 118: obscure_syntax = syntax; ! 119: return ret; ! 120: } ! 121: ! 122: /* re_compile_pattern takes a regular-expression string ! 123: and converts it into a buffer full of byte commands for matching. ! 124: ! 125: PATTERN is the address of the pattern string ! 126: SIZE is the length of it. ! 127: BUFP is a struct re_pattern_buffer * which points to the info ! 128: on where to store the byte commands. ! 129: This structure contains a char * which points to the ! 130: actual space, which should have been obtained with malloc. ! 131: re_compile_pattern may use realloc to grow the buffer space. ! 132: ! 133: The number of bytes of commands can be found out by looking in ! 134: the struct re_pattern_buffer that bufp pointed to, ! 135: after re_compile_pattern returns. ! 136: */ ! 137: ! 138: #define PATPUSH(ch) (*b++ = (char) (ch)) ! 139: ! 140: #define PATFETCH(c) \ ! 141: {if (p == pend) goto end_of_pattern; \ ! 142: c = * (unsigned char *) p++; \ ! 143: if (translate) c = translate[c]; } ! 144: ! 145: #define PATFETCH_RAW(c) \ ! 146: {if (p == pend) goto end_of_pattern; \ ! 147: c = * (unsigned char *) p++; } ! 148: ! 149: #define PATUNFETCH p-- ! 150: ! 151: #define EXTEND_BUFFER \ ! 152: { char *old_buffer = bufp->buffer; \ ! 153: if (bufp->allocated == (1<<16)) goto too_big; \ ! 154: bufp->allocated *= 2; \ ! 155: if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \ ! 156: if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \ ! 157: goto memory_exhausted; \ ! 158: c = bufp->buffer - old_buffer; \ ! 159: b += c; \ ! 160: if (fixup_jump) \ ! 161: fixup_jump += c; \ ! 162: if (laststart) \ ! 163: laststart += c; \ ! 164: begalt += c; \ ! 165: if (pending_exact) \ ! 166: pending_exact += c; \ ! 167: } ! 168: ! 169: static int store_jump (), insert_jump (); ! 170: ! 171: char * ! 172: re_compile_pattern (pattern, size, bufp) ! 173: char *pattern; ! 174: int size; ! 175: struct re_pattern_buffer *bufp; ! 176: { ! 177: register char *b = bufp->buffer; ! 178: register char *p = pattern; ! 179: char *pend = pattern + size; ! 180: register unsigned c, c1; ! 181: char *p1; ! 182: unsigned char *translate = (unsigned char *) bufp->translate; ! 183: ! 184: /* address of the count-byte of the most recently inserted "exactn" command. ! 185: This makes it possible to tell whether a new exact-match character ! 186: can be added to that command or requires a new "exactn" command. */ ! 187: ! 188: char *pending_exact = 0; ! 189: ! 190: /* address of the place where a forward-jump should go ! 191: to the end of the containing expression. ! 192: Each alternative of an "or", except the last, ends with a forward-jump ! 193: of this sort. */ ! 194: ! 195: char *fixup_jump = 0; ! 196: ! 197: /* address of start of the most recently finished expression. ! 198: This tells postfix * where to find the start of its operand. */ ! 199: ! 200: char *laststart = 0; ! 201: ! 202: /* In processing a repeat, 1 means zero matches is allowed */ ! 203: ! 204: char zero_times_ok; ! 205: ! 206: /* In processing a repeat, 1 means many matches is allowed */ ! 207: ! 208: char many_times_ok; ! 209: ! 210: /* address of beginning of regexp, or inside of last \( */ ! 211: ! 212: char *begalt = b; ! 213: ! 214: /* Stack of information saved by \( and restored by \). ! 215: Four stack elements are pushed by each \(: ! 216: First, the value of b. ! 217: Second, the value of fixup_jump. ! 218: Third, the value of regnum. ! 219: Fourth, the value of begalt. */ ! 220: ! 221: int stackb[40]; ! 222: int *stackp = stackb; ! 223: int *stacke = stackb + 40; ! 224: int *stackt; ! 225: ! 226: /* Counts \('s as they are encountered. Remembered for the matching \), ! 227: where it becomes the "register number" to put in the stop_memory command */ ! 228: ! 229: int regnum = 1; ! 230: ! 231: bufp->fastmap_accurate = 0; ! 232: ! 233: #ifndef emacs ! 234: #ifndef SYNTAX_TABLE ! 235: /* ! 236: * Initialize the syntax table. ! 237: */ ! 238: init_syntax_once(); ! 239: #endif ! 240: #endif ! 241: ! 242: if (bufp->allocated == 0) ! 243: { ! 244: bufp->allocated = 28; ! 245: if (bufp->buffer) ! 246: /* EXTEND_BUFFER loses when bufp->allocated is 0 */ ! 247: bufp->buffer = (char *) realloc (bufp->buffer, 28); ! 248: else ! 249: /* Caller did not allocate a buffer. Do it for him */ ! 250: bufp->buffer = (char *) malloc (28); ! 251: if (!bufp->buffer) goto memory_exhausted; ! 252: begalt = b = bufp->buffer; ! 253: } ! 254: ! 255: while (p != pend) ! 256: { ! 257: if (b - bufp->buffer > bufp->allocated - 10) ! 258: /* Note that EXTEND_BUFFER clobbers c */ ! 259: EXTEND_BUFFER; ! 260: ! 261: PATFETCH (c); ! 262: ! 263: switch (c) ! 264: { ! 265: case '$': ! 266: if (obscure_syntax & RE_TIGHT_VBAR) ! 267: { ! 268: if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend) ! 269: goto normal_char; ! 270: /* Make operand of last vbar end before this `$'. */ ! 271: if (fixup_jump) ! 272: store_jump (fixup_jump, jump, b); ! 273: fixup_jump = 0; ! 274: PATPUSH (endline); ! 275: break; ! 276: } ! 277: ! 278: /* $ means succeed if at end of line, but only in special contexts. ! 279: If randomly in the middle of a pattern, it is a normal character. */ ! 280: if (p == pend || *p == '\n' ! 281: || (obscure_syntax & RE_CONTEXT_INDEP_OPS) ! 282: || (obscure_syntax & RE_NO_BK_PARENS ! 283: ? *p == ')' ! 284: : *p == '\\' && p[1] == ')') ! 285: || (obscure_syntax & RE_NO_BK_VBAR ! 286: ? *p == '|' ! 287: : *p == '\\' && p[1] == '|')) ! 288: { ! 289: PATPUSH (endline); ! 290: break; ! 291: } ! 292: goto normal_char; ! 293: ! 294: case '^': ! 295: /* ^ means succeed if at beg of line, but only if no preceding pattern. */ ! 296: ! 297: if (laststart && p[-2] != '\n' ! 298: && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) ! 299: goto normal_char; ! 300: if (obscure_syntax & RE_TIGHT_VBAR) ! 301: { ! 302: if (p != pattern + 1 ! 303: && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) ! 304: goto normal_char; ! 305: PATPUSH (begline); ! 306: begalt = b; ! 307: } ! 308: else ! 309: PATPUSH (begline); ! 310: break; ! 311: ! 312: case '+': ! 313: case '?': ! 314: if (obscure_syntax & RE_BK_PLUS_QM) ! 315: goto normal_char; ! 316: handle_plus: ! 317: case '*': ! 318: /* If there is no previous pattern, char not special. */ ! 319: if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) ! 320: goto normal_char; ! 321: /* If there is a sequence of repetition chars, ! 322: collapse it down to equivalent to just one. */ ! 323: zero_times_ok = 0; ! 324: many_times_ok = 0; ! 325: while (1) ! 326: { ! 327: zero_times_ok |= c != '+'; ! 328: many_times_ok |= c != '?'; ! 329: if (p == pend) ! 330: break; ! 331: PATFETCH (c); ! 332: if (c == '*') ! 333: ; ! 334: else if (!(obscure_syntax & RE_BK_PLUS_QM) ! 335: && (c == '+' || c == '?')) ! 336: ; ! 337: else if ((obscure_syntax & RE_BK_PLUS_QM) ! 338: && c == '\\') ! 339: { ! 340: int c1; ! 341: PATFETCH (c1); ! 342: if (!(c1 == '+' || c1 == '?')) ! 343: { ! 344: PATUNFETCH; ! 345: PATUNFETCH; ! 346: break; ! 347: } ! 348: c = c1; ! 349: } ! 350: else ! 351: { ! 352: PATUNFETCH; ! 353: break; ! 354: } ! 355: } ! 356: ! 357: /* Star, etc. applied to an empty pattern is equivalent ! 358: to an empty pattern. */ ! 359: if (!laststart) ! 360: break; ! 361: ! 362: /* Now we know whether 0 matches is allowed, ! 363: and whether 2 or more matches is allowed. */ ! 364: if (many_times_ok) ! 365: { ! 366: /* If more than one repetition is allowed, ! 367: put in a backward jump at the end. */ ! 368: store_jump (b, maybe_finalize_jump, laststart - 3); ! 369: b += 3; ! 370: } ! 371: insert_jump (on_failure_jump, laststart, b + 3, b); ! 372: pending_exact = 0; ! 373: b += 3; ! 374: if (!zero_times_ok) ! 375: { ! 376: /* At least one repetition required: insert before the loop ! 377: a skip over the initial on-failure-jump instruction */ ! 378: insert_jump (dummy_failure_jump, laststart, laststart + 6, b); ! 379: b += 3; ! 380: } ! 381: break; ! 382: ! 383: case '.': ! 384: laststart = b; ! 385: PATPUSH (anychar); ! 386: break; ! 387: ! 388: case '[': ! 389: while (b - bufp->buffer ! 390: > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH) ! 391: /* Note that EXTEND_BUFFER clobbers c */ ! 392: EXTEND_BUFFER; ! 393: ! 394: laststart = b; ! 395: if (*p == '^') ! 396: PATPUSH (charset_not), p++; ! 397: else ! 398: PATPUSH (charset); ! 399: p1 = p; ! 400: ! 401: PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ! 402: /* Clear the whole map */ ! 403: bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); ! 404: /* Read in characters and ranges, setting map bits */ ! 405: while (1) ! 406: { ! 407: PATFETCH (c); ! 408: if (c == ']' && p != p1 + 1) break; ! 409: if (*p == '-' && p[1] != ']') ! 410: { ! 411: PATFETCH (c1); ! 412: PATFETCH (c1); ! 413: if (translate) ! 414: while (c <= c1) ! 415: { ! 416: register unsigned char mapped_c = translate[c]; ! 417: b[mapped_c / BYTEWIDTH] |= 1 << (mapped_c % BYTEWIDTH); ! 418: c++; ! 419: } ! 420: else ! 421: while (c <= c1) ! 422: b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++; ! 423: } ! 424: else ! 425: { ! 426: if (translate) ! 427: c = translate[c]; ! 428: b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH); ! 429: } ! 430: } ! 431: /* Discard any bitmap bytes that are all 0 at the end of the map. ! 432: Decrement the map-length byte too. */ ! 433: while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) ! 434: b[-1]--; ! 435: b += b[-1]; ! 436: break; ! 437: ! 438: case '(': ! 439: if (! (obscure_syntax & RE_NO_BK_PARENS)) ! 440: goto normal_char; ! 441: else ! 442: goto handle_open; ! 443: ! 444: case ')': ! 445: if (! (obscure_syntax & RE_NO_BK_PARENS)) ! 446: goto normal_char; ! 447: else ! 448: goto handle_close; ! 449: ! 450: case '\n': ! 451: if (! (obscure_syntax & RE_NEWLINE_OR)) ! 452: goto normal_char; ! 453: else ! 454: goto handle_bar; ! 455: ! 456: case '|': ! 457: if (! (obscure_syntax & RE_NO_BK_VBAR)) ! 458: goto normal_char; ! 459: else ! 460: goto handle_bar; ! 461: ! 462: case '\\': ! 463: if (p == pend) goto invalid_pattern; ! 464: PATFETCH_RAW (c); ! 465: switch (c) ! 466: { ! 467: case '(': ! 468: if (obscure_syntax & RE_NO_BK_PARENS) ! 469: goto normal_backsl; ! 470: handle_open: ! 471: if (stackp == stacke) goto nesting_too_deep; ! 472: if (regnum < RE_NREGS) ! 473: { ! 474: PATPUSH (start_memory); ! 475: PATPUSH (regnum); ! 476: } ! 477: *stackp++ = b - bufp->buffer; ! 478: *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0; ! 479: *stackp++ = regnum++; ! 480: *stackp++ = begalt - bufp->buffer; ! 481: fixup_jump = 0; ! 482: laststart = 0; ! 483: begalt = b; ! 484: break; ! 485: ! 486: case ')': ! 487: if (obscure_syntax & RE_NO_BK_PARENS) ! 488: goto normal_backsl; ! 489: handle_close: ! 490: if (stackp == stackb) goto unmatched_close; ! 491: begalt = *--stackp + bufp->buffer; ! 492: if (fixup_jump) ! 493: store_jump (fixup_jump, jump, b); ! 494: if (stackp[-1] < RE_NREGS) ! 495: { ! 496: PATPUSH (stop_memory); ! 497: PATPUSH (stackp[-1]); ! 498: } ! 499: stackp -= 2; ! 500: fixup_jump = 0; ! 501: if (*stackp) ! 502: fixup_jump = *stackp + bufp->buffer - 1; ! 503: laststart = *--stackp + bufp->buffer; ! 504: break; ! 505: ! 506: case '|': ! 507: if (obscure_syntax & RE_NO_BK_VBAR) ! 508: goto normal_backsl; ! 509: handle_bar: ! 510: insert_jump (on_failure_jump, begalt, b + 6, b); ! 511: pending_exact = 0; ! 512: b += 3; ! 513: if (fixup_jump) ! 514: store_jump (fixup_jump, jump, b); ! 515: fixup_jump = b; ! 516: b += 3; ! 517: laststart = 0; ! 518: begalt = b; ! 519: break; ! 520: ! 521: #ifdef emacs ! 522: case '=': ! 523: PATPUSH (at_dot); ! 524: break; ! 525: ! 526: case 's': ! 527: laststart = b; ! 528: PATPUSH (syntaxspec); ! 529: PATFETCH (c); ! 530: PATPUSH (syntax_spec_code[c]); ! 531: break; ! 532: ! 533: case 'S': ! 534: laststart = b; ! 535: PATPUSH (notsyntaxspec); ! 536: PATFETCH (c); ! 537: PATPUSH (syntax_spec_code[c]); ! 538: break; ! 539: #endif emacs ! 540: ! 541: case 'w': ! 542: laststart = b; ! 543: PATPUSH (wordchar); ! 544: break; ! 545: ! 546: case 'W': ! 547: laststart = b; ! 548: PATPUSH (notwordchar); ! 549: break; ! 550: ! 551: case '<': ! 552: PATPUSH (wordbeg); ! 553: break; ! 554: ! 555: case '>': ! 556: PATPUSH (wordend); ! 557: break; ! 558: ! 559: case 'b': ! 560: PATPUSH (wordbound); ! 561: break; ! 562: ! 563: case 'B': ! 564: PATPUSH (notwordbound); ! 565: break; ! 566: ! 567: case '`': ! 568: PATPUSH (begbuf); ! 569: break; ! 570: ! 571: case '\'': ! 572: PATPUSH (endbuf); ! 573: break; ! 574: ! 575: case '1': ! 576: case '2': ! 577: case '3': ! 578: case '4': ! 579: case '5': ! 580: case '6': ! 581: case '7': ! 582: case '8': ! 583: case '9': ! 584: c1 = c - '0'; ! 585: if (c1 >= regnum) ! 586: goto normal_char; ! 587: for (stackt = stackp - 2; stackt > stackb; stackt -= 4) ! 588: if (*stackt == c1) ! 589: goto normal_char; ! 590: laststart = b; ! 591: PATPUSH (duplicate); ! 592: PATPUSH (c1); ! 593: break; ! 594: ! 595: case '+': ! 596: case '?': ! 597: if (obscure_syntax & RE_BK_PLUS_QM) ! 598: goto handle_plus; ! 599: ! 600: default: ! 601: normal_backsl: ! 602: /* You might think it would be useful for \ to mean ! 603: not to translate; but if we don't translate it ! 604: it will never match anything. */ ! 605: if (translate) c = translate[c]; ! 606: goto normal_char; ! 607: } ! 608: break; ! 609: ! 610: default: ! 611: normal_char: ! 612: if (!pending_exact || pending_exact + *pending_exact + 1 != b ! 613: || *pending_exact == 0177 || *p == '*' || *p == '^' ! 614: || ((obscure_syntax & RE_BK_PLUS_QM) ! 615: ? *p == '\\' && (p[1] == '+' || p[1] == '?') ! 616: : (*p == '+' || *p == '?'))) ! 617: { ! 618: laststart = b; ! 619: PATPUSH (exactn); ! 620: pending_exact = b; ! 621: PATPUSH (0); ! 622: } ! 623: PATPUSH (c); ! 624: (*pending_exact)++; ! 625: } ! 626: } ! 627: ! 628: if (fixup_jump) ! 629: store_jump (fixup_jump, jump, b); ! 630: ! 631: if (stackp != stackb) goto unmatched_open; ! 632: ! 633: bufp->used = b - bufp->buffer; ! 634: return 0; ! 635: ! 636: invalid_pattern: ! 637: return "Invalid regular expression"; ! 638: ! 639: unmatched_open: ! 640: return "Unmatched \\("; ! 641: ! 642: unmatched_close: ! 643: return "Unmatched \\)"; ! 644: ! 645: end_of_pattern: ! 646: return "Premature end of regular expression"; ! 647: ! 648: nesting_too_deep: ! 649: return "Nesting too deep"; ! 650: ! 651: too_big: ! 652: return "Regular expression too big"; ! 653: ! 654: memory_exhausted: ! 655: return "Memory exhausted"; ! 656: } ! 657: ! 658: /* Store where `from' points a jump operation to jump to where `to' points. ! 659: `opcode' is the opcode to store. */ ! 660: ! 661: static int ! 662: store_jump (from, opcode, to) ! 663: char *from, *to; ! 664: char opcode; ! 665: { ! 666: from[0] = opcode; ! 667: from[1] = (to - (from + 3)) & 0377; ! 668: from[2] = (to - (from + 3)) >> 8; ! 669: } ! 670: ! 671: /* Open up space at char FROM, and insert there a jump to TO. ! 672: CURRENT_END gives te end of the storage no in use, ! 673: so we know how much data to copy up. ! 674: OP is the opcode of the jump to insert. ! 675: ! 676: If you call this function, you must zero out pending_exact. */ ! 677: ! 678: static int ! 679: insert_jump (op, from, to, current_end) ! 680: char op; ! 681: char *from, *to, *current_end; ! 682: { ! 683: register char *pto = current_end + 3; ! 684: register char *pfrom = current_end; ! 685: while (pfrom != from) ! 686: *--pto = *--pfrom; ! 687: store_jump (from, op, to); ! 688: } ! 689: ! 690: /* Given a pattern, compute a fastmap from it. ! 691: The fastmap records which of the (1 << BYTEWIDTH) possible characters ! 692: can start a string that matches the pattern. ! 693: This fastmap is used by re_search to skip quickly over totally implausible text. ! 694: ! 695: The caller must supply the address of a (1 << BYTEWIDTH)-byte data area ! 696: as bufp->fastmap. ! 697: The other components of bufp describe the pattern to be used. */ ! 698: ! 699: void ! 700: re_compile_fastmap (bufp) ! 701: struct re_pattern_buffer *bufp; ! 702: { ! 703: unsigned char *pattern = (unsigned char *) bufp->buffer; ! 704: int size = bufp->used; ! 705: register char *fastmap = bufp->fastmap; ! 706: register unsigned char *p = pattern; ! 707: register unsigned char *pend = pattern + size; ! 708: register int j, k; ! 709: unsigned char *translate = (unsigned char *) bufp->translate; ! 710: ! 711: unsigned char *stackb[NFAILURES]; ! 712: unsigned char **stackp = stackb; ! 713: ! 714: bzero (fastmap, (1 << BYTEWIDTH)); ! 715: bufp->fastmap_accurate = 1; ! 716: bufp->can_be_null = 0; ! 717: ! 718: while (p) ! 719: { ! 720: if (p == pend) ! 721: { ! 722: bufp->can_be_null = 1; ! 723: break; ! 724: } ! 725: #ifdef SWITCH_ENUM_BUG ! 726: switch ((int) ((enum regexpcode) *p++)) ! 727: #else ! 728: switch ((enum regexpcode) *p++) ! 729: #endif ! 730: { ! 731: case exactn: ! 732: if (translate) ! 733: fastmap[translate[p[1]]] = 1; ! 734: else ! 735: fastmap[p[1]] = 1; ! 736: break; ! 737: ! 738: case begline: ! 739: case before_dot: ! 740: case at_dot: ! 741: case after_dot: ! 742: case begbuf: ! 743: case endbuf: ! 744: case wordbound: ! 745: case notwordbound: ! 746: case wordbeg: ! 747: case wordend: ! 748: continue; ! 749: ! 750: case endline: ! 751: if (translate) ! 752: fastmap[translate['\n']] = 1; ! 753: else ! 754: fastmap['\n'] = 1; ! 755: if (bufp->can_be_null != 1) ! 756: bufp->can_be_null = 2; ! 757: break; ! 758: ! 759: case finalize_jump: ! 760: case maybe_finalize_jump: ! 761: case jump: ! 762: case dummy_failure_jump: ! 763: bufp->can_be_null = 1; ! 764: j = *p++ & 0377; ! 765: j += SIGN_EXTEND_CHAR (*(char *)p) << 8; ! 766: p += j + 1; /* The 1 compensates for missing ++ above */ ! 767: if (j > 0) ! 768: continue; ! 769: /* Jump backward reached implies we just went through ! 770: the body of a loop and matched nothing. ! 771: Opcode jumped to should be an on_failure_jump. ! 772: Just treat it like an ordinary jump. ! 773: For a * loop, it has pushed its failure point already; ! 774: if so, discard that as redundant. */ ! 775: if ((enum regexpcode) *p != on_failure_jump) ! 776: continue; ! 777: p++; ! 778: j = *p++ & 0377; ! 779: j += SIGN_EXTEND_CHAR (*(char *)p) << 8; ! 780: p += j + 1; /* The 1 compensates for missing ++ above */ ! 781: if (stackp != stackb && *stackp == p) ! 782: stackp--; ! 783: continue; ! 784: ! 785: case on_failure_jump: ! 786: j = *p++ & 0377; ! 787: j += SIGN_EXTEND_CHAR (*(char *)p) << 8; ! 788: p++; ! 789: *++stackp = p + j; ! 790: continue; ! 791: ! 792: case start_memory: ! 793: case stop_memory: ! 794: p++; ! 795: continue; ! 796: ! 797: case duplicate: ! 798: bufp->can_be_null = 1; ! 799: fastmap['\n'] = 1; ! 800: case anychar: ! 801: for (j = 0; j < (1 << BYTEWIDTH); j++) ! 802: if (j != '\n') ! 803: fastmap[j] = 1; ! 804: if (bufp->can_be_null) ! 805: return; ! 806: /* Don't return; check the alternative paths ! 807: so we can set can_be_null if appropriate. */ ! 808: break; ! 809: ! 810: case wordchar: ! 811: for (j = 0; j < (1 << BYTEWIDTH); j++) ! 812: if (SYNTAX (j) == Sword) ! 813: fastmap[j] = 1; ! 814: break; ! 815: ! 816: case notwordchar: ! 817: for (j = 0; j < (1 << BYTEWIDTH); j++) ! 818: if (SYNTAX (j) != Sword) ! 819: fastmap[j] = 1; ! 820: break; ! 821: ! 822: #ifdef emacs ! 823: case syntaxspec: ! 824: k = *p++; ! 825: for (j = 0; j < (1 << BYTEWIDTH); j++) ! 826: if (SYNTAX (j) == (enum syntaxcode) k) ! 827: fastmap[j] = 1; ! 828: break; ! 829: ! 830: case notsyntaxspec: ! 831: k = *p++; ! 832: for (j = 0; j < (1 << BYTEWIDTH); j++) ! 833: if (SYNTAX (j) != (enum syntaxcode) k) ! 834: fastmap[j] = 1; ! 835: break; ! 836: #endif emacs ! 837: ! 838: case charset: ! 839: for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ! 840: if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ! 841: { ! 842: if (translate) ! 843: fastmap[translate[j]] = 1; ! 844: else ! 845: fastmap[j] = 1; ! 846: } ! 847: break; ! 848: ! 849: case charset_not: ! 850: /* Chars beyond end of map must be allowed */ ! 851: for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ! 852: if (translate) ! 853: fastmap[translate[j]] = 1; ! 854: else ! 855: fastmap[j] = 1; ! 856: ! 857: for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ! 858: if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ! 859: { ! 860: if (translate) ! 861: fastmap[translate[j]] = 1; ! 862: else ! 863: fastmap[j] = 1; ! 864: } ! 865: break; ! 866: } ! 867: ! 868: /* Get here means we have successfully found the possible starting characters ! 869: of one path of the pattern. We need not follow this path any farther. ! 870: Instead, look at the next alternative remembered in the stack. */ ! 871: if (stackp != stackb) ! 872: p = *stackp--; ! 873: else ! 874: break; ! 875: } ! 876: } ! 877: ! 878: /* Like re_search_2, below, but only one string is specified. */ ! 879: ! 880: int ! 881: re_search (pbufp, string, size, startpos, range, regs) ! 882: struct re_pattern_buffer *pbufp; ! 883: char *string; ! 884: int size, startpos, range; ! 885: struct re_registers *regs; ! 886: { ! 887: return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size); ! 888: } ! 889: ! 890: /* Like re_match_2 but tries first a match starting at index STARTPOS, ! 891: then at STARTPOS + 1, and so on. ! 892: RANGE is the number of places to try before giving up. ! 893: If RANGE is negative, the starting positions tried are ! 894: STARTPOS, STARTPOS - 1, etc. ! 895: It is up to the caller to make sure that range is not so large ! 896: as to take the starting position outside of the input strings. ! 897: ! 898: The value returned is the position at which the match was found, ! 899: or -1 if no match was found, ! 900: or -2 if error (such as failure stack overflow). */ ! 901: ! 902: int ! 903: re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop) ! 904: struct re_pattern_buffer *pbufp; ! 905: char *string1, *string2; ! 906: int size1, size2; ! 907: int startpos; ! 908: register int range; ! 909: struct re_registers *regs; ! 910: int mstop; ! 911: { ! 912: register char *fastmap = pbufp->fastmap; ! 913: register unsigned char *translate = (unsigned char *) pbufp->translate; ! 914: int total = size1 + size2; ! 915: int val; ! 916: ! 917: /* Update the fastmap now if not correct already */ ! 918: if (fastmap && !pbufp->fastmap_accurate) ! 919: re_compile_fastmap (pbufp); ! 920: ! 921: /* Don't waste time in a long search for a pattern ! 922: that says it is anchored. */ ! 923: if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf ! 924: && range > 0) ! 925: { ! 926: if (startpos > 0) ! 927: return -1; ! 928: else ! 929: range = 1; ! 930: } ! 931: ! 932: while (1) ! 933: { ! 934: /* If a fastmap is supplied, skip quickly over characters ! 935: that cannot possibly be the start of a match. ! 936: Note, however, that if the pattern can possibly match ! 937: the null string, we must test it at each starting point ! 938: so that we take the first null string we get. */ ! 939: ! 940: if (fastmap && startpos < total && pbufp->can_be_null != 1) ! 941: { ! 942: if (range > 0) ! 943: { ! 944: register int lim = 0; ! 945: register unsigned char *p; ! 946: int irange = range; ! 947: if (startpos < size1 && startpos + range >= size1) ! 948: lim = range - (size1 - startpos); ! 949: ! 950: p = ((unsigned char *) ! 951: &(startpos >= size1 ? string2 - size1 : string1)[startpos]); ! 952: ! 953: if (translate) ! 954: { ! 955: while (range > lim && !fastmap[translate[*p++]]) ! 956: range--; ! 957: } ! 958: else ! 959: { ! 960: while (range > lim && !fastmap[*p++]) ! 961: range--; ! 962: } ! 963: startpos += irange - range; ! 964: } ! 965: else ! 966: { ! 967: register unsigned char c; ! 968: if (startpos >= size1) ! 969: c = string2[startpos - size1]; ! 970: else ! 971: c = string1[startpos]; ! 972: c &= 0xff; ! 973: if (translate ? !fastmap[translate[c]] : !fastmap[c]) ! 974: goto advance; ! 975: } ! 976: } ! 977: ! 978: if (range >= 0 && startpos == total ! 979: && fastmap && pbufp->can_be_null == 0) ! 980: return -1; ! 981: ! 982: val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, ! 983: mstop); ! 984: /* Propagate error indication if worse than mere failure. */ ! 985: if (val == -2) ! 986: return -2; ! 987: /* Return position on success. */ ! 988: if (0 <= val) ! 989: return startpos; ! 990: ! 991: #ifdef C_ALLOCA ! 992: alloca (0); ! 993: #endif /* C_ALLOCA */ ! 994: ! 995: advance: ! 996: if (!range) break; ! 997: if (range > 0) range--, startpos++; else range++, startpos--; ! 998: } ! 999: return -1; ! 1000: } ! 1001: ! 1002: #ifndef emacs /* emacs never uses this */ ! 1003: int ! 1004: re_match (pbufp, string, size, pos, regs) ! 1005: struct re_pattern_buffer *pbufp; ! 1006: char *string; ! 1007: int size, pos; ! 1008: struct re_registers *regs; ! 1009: { ! 1010: return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size); ! 1011: } ! 1012: #endif /* emacs */ ! 1013: ! 1014: /* Maximum size of failure stack. Beyond this, overflow is an error. */ ! 1015: ! 1016: int re_max_failures = 2000; ! 1017: ! 1018: static int bcmp_translate(); ! 1019: /* Match the pattern described by PBUFP ! 1020: against data which is the virtual concatenation of STRING1 and STRING2. ! 1021: SIZE1 and SIZE2 are the sizes of the two data strings. ! 1022: Start the match at position POS. ! 1023: Do not consider matching past the position MSTOP. ! 1024: ! 1025: If pbufp->fastmap is nonzero, then it had better be up to date. ! 1026: ! 1027: The reason that the data to match are specified as two components ! 1028: which are to be regarded as concatenated ! 1029: is so this function can be used directly on the contents of an Emacs buffer. ! 1030: ! 1031: -1 is returned if there is no match. -2 is returned if there is ! 1032: an error (such as match stack overflow). Otherwise the value is the length ! 1033: of the substring which was matched. */ ! 1034: ! 1035: int ! 1036: re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop) ! 1037: struct re_pattern_buffer *pbufp; ! 1038: unsigned char *string1, *string2; ! 1039: int size1, size2; ! 1040: int pos; ! 1041: struct re_registers *regs; ! 1042: int mstop; ! 1043: { ! 1044: register unsigned char *p = (unsigned char *) pbufp->buffer; ! 1045: register unsigned char *pend = p + pbufp->used; ! 1046: /* End of first string */ ! 1047: unsigned char *end1; ! 1048: /* End of second string */ ! 1049: unsigned char *end2; ! 1050: /* Pointer just past last char to consider matching */ ! 1051: unsigned char *end_match_1, *end_match_2; ! 1052: register unsigned char *d, *dend; ! 1053: register int mcnt; ! 1054: unsigned char *translate = (unsigned char *) pbufp->translate; ! 1055: ! 1056: /* Failure point stack. Each place that can handle a failure further down the line ! 1057: pushes a failure point on this stack. It consists of two char *'s. ! 1058: The first one pushed is where to resume scanning the pattern; ! 1059: the second pushed is where to resume scanning the strings. ! 1060: If the latter is zero, the failure point is a "dummy". ! 1061: If a failure happens and the innermost failure point is dormant, ! 1062: it discards that failure point and tries the next one. */ ! 1063: ! 1064: unsigned char *initial_stack[2 * NFAILURES]; ! 1065: unsigned char **stackb = initial_stack; ! 1066: unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES]; ! 1067: ! 1068: /* Information on the "contents" of registers. ! 1069: These are pointers into the input strings; they record ! 1070: just what was matched (on this attempt) by some part of the pattern. ! 1071: The start_memory command stores the start of a register's contents ! 1072: and the stop_memory command stores the end. ! 1073: ! 1074: At that point, regstart[regnum] points to the first character in the register, ! 1075: regend[regnum] points to the first character beyond the end of the register, ! 1076: regstart_seg1[regnum] is true iff regstart[regnum] points into string1, ! 1077: and regend_seg1[regnum] is true iff regend[regnum] points into string1. */ ! 1078: ! 1079: unsigned char *regstart[RE_NREGS]; ! 1080: unsigned char *regend[RE_NREGS]; ! 1081: unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS]; ! 1082: ! 1083: /* Set up pointers to ends of strings. ! 1084: Don't allow the second string to be empty unless both are empty. */ ! 1085: if (!size2) ! 1086: { ! 1087: string2 = string1; ! 1088: size2 = size1; ! 1089: string1 = 0; ! 1090: size1 = 0; ! 1091: } ! 1092: end1 = string1 + size1; ! 1093: end2 = string2 + size2; ! 1094: ! 1095: /* Compute where to stop matching, within the two strings */ ! 1096: if (mstop <= size1) ! 1097: { ! 1098: end_match_1 = string1 + mstop; ! 1099: end_match_2 = string2; ! 1100: } ! 1101: else ! 1102: { ! 1103: end_match_1 = end1; ! 1104: end_match_2 = string2 + mstop - size1; ! 1105: } ! 1106: ! 1107: /* Initialize \) text positions to -1 ! 1108: to mark ones that no \( or \) has been seen for. */ ! 1109: ! 1110: for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++) ! 1111: regend[mcnt] = (unsigned char *) -1; ! 1112: ! 1113: /* `p' scans through the pattern as `d' scans through the data. ! 1114: `dend' is the end of the input string that `d' points within. ! 1115: `d' is advanced into the following input string whenever necessary, ! 1116: but this happens before fetching; ! 1117: therefore, at the beginning of the loop, ! 1118: `d' can be pointing at the end of a string, ! 1119: but it cannot equal string2. */ ! 1120: ! 1121: if (pos <= size1) ! 1122: d = string1 + pos, dend = end_match_1; ! 1123: else ! 1124: d = string2 + pos - size1, dend = end_match_2; ! 1125: ! 1126: /* Write PREFETCH; just before fetching a character with *d. */ ! 1127: #define PREFETCH \ ! 1128: while (d == dend) \ ! 1129: { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \ ! 1130: d = string2; /* end of string1 => advance to string2. */ \ ! 1131: dend = end_match_2; } ! 1132: ! 1133: /* This loop loops over pattern commands. ! 1134: It exits by returning from the function if match is complete, ! 1135: or it drops through if match fails at this starting point in the input data. */ ! 1136: ! 1137: while (1) ! 1138: { ! 1139: if (p == pend) ! 1140: /* End of pattern means we have succeeded! */ ! 1141: { ! 1142: /* If caller wants register contents data back, convert it to indices */ ! 1143: if (regs) ! 1144: { ! 1145: regs->start[0] = pos; ! 1146: if (dend == end_match_1) ! 1147: regs->end[0] = d - string1; ! 1148: else ! 1149: regs->end[0] = d - string2 + size1; ! 1150: for (mcnt = 1; mcnt < RE_NREGS; mcnt++) ! 1151: { ! 1152: if (regend[mcnt] == (unsigned char *) -1) ! 1153: { ! 1154: regs->start[mcnt] = -1; ! 1155: regs->end[mcnt] = -1; ! 1156: continue; ! 1157: } ! 1158: if (regstart_seg1[mcnt]) ! 1159: regs->start[mcnt] = regstart[mcnt] - string1; ! 1160: else ! 1161: regs->start[mcnt] = regstart[mcnt] - string2 + size1; ! 1162: if (regend_seg1[mcnt]) ! 1163: regs->end[mcnt] = regend[mcnt] - string1; ! 1164: else ! 1165: regs->end[mcnt] = regend[mcnt] - string2 + size1; ! 1166: } ! 1167: } ! 1168: if (dend == end_match_1) ! 1169: return (d - string1 - pos); ! 1170: else ! 1171: return d - string2 + size1 - pos; ! 1172: } ! 1173: ! 1174: /* Otherwise match next pattern command */ ! 1175: #ifdef SWITCH_ENUM_BUG ! 1176: switch ((int) ((enum regexpcode) *p++)) ! 1177: #else ! 1178: switch ((enum regexpcode) *p++) ! 1179: #endif ! 1180: { ! 1181: ! 1182: /* \( is represented by a start_memory, \) by a stop_memory. ! 1183: Both of those commands contain a "register number" argument. ! 1184: The text matched within the \( and \) is recorded under that number. ! 1185: Then, \<digit> turns into a `duplicate' command which ! 1186: is followed by the numeric value of <digit> as the register number. */ ! 1187: ! 1188: case start_memory: ! 1189: regstart[*p] = d; ! 1190: regstart_seg1[*p++] = (dend == end_match_1); ! 1191: break; ! 1192: ! 1193: case stop_memory: ! 1194: regend[*p] = d; ! 1195: regend_seg1[*p++] = (dend == end_match_1); ! 1196: break; ! 1197: ! 1198: case duplicate: ! 1199: { ! 1200: int regno = *p++; /* Get which register to match against */ ! 1201: register unsigned char *d2, *dend2; ! 1202: ! 1203: /* Don't allow matching a register that hasn't been used. ! 1204: This isn't fully reliable in the current version, ! 1205: but it is better than crashing. */ ! 1206: if ((int) regend[regno] == -1) ! 1207: goto fail; ! 1208: ! 1209: d2 = regstart[regno]; ! 1210: dend2 = ((regstart_seg1[regno] == regend_seg1[regno]) ! 1211: ? regend[regno] : end_match_1); ! 1212: while (1) ! 1213: { ! 1214: /* Advance to next segment in register contents, if necessary */ ! 1215: while (d2 == dend2) ! 1216: { ! 1217: if (dend2 == end_match_2) break; ! 1218: if (dend2 == regend[regno]) break; ! 1219: d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */ ! 1220: } ! 1221: /* At end of register contents => success */ ! 1222: if (d2 == dend2) break; ! 1223: ! 1224: /* Advance to next segment in data being matched, if necessary */ ! 1225: PREFETCH; ! 1226: ! 1227: /* mcnt gets # consecutive chars to compare */ ! 1228: mcnt = dend - d; ! 1229: if (mcnt > dend2 - d2) ! 1230: mcnt = dend2 - d2; ! 1231: /* Compare that many; failure if mismatch, else skip them. */ ! 1232: if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt)) ! 1233: goto fail; ! 1234: d += mcnt, d2 += mcnt; ! 1235: } ! 1236: } ! 1237: break; ! 1238: ! 1239: case anychar: ! 1240: /* fetch a data character */ ! 1241: PREFETCH; ! 1242: /* Match anything but a newline. */ ! 1243: if ((translate ? translate[*d++] : *d++) == '\n') ! 1244: goto fail; ! 1245: break; ! 1246: ! 1247: case charset: ! 1248: case charset_not: ! 1249: { ! 1250: /* Nonzero for charset_not */ ! 1251: int not = 0; ! 1252: register int c; ! 1253: if (*(p - 1) == (unsigned char) charset_not) ! 1254: not = 1; ! 1255: ! 1256: /* fetch a data character */ ! 1257: PREFETCH; ! 1258: ! 1259: if (translate) ! 1260: c = translate [*d]; ! 1261: else ! 1262: c = *d; ! 1263: ! 1264: if (c < *p * BYTEWIDTH ! 1265: && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ! 1266: not = !not; ! 1267: ! 1268: p += 1 + *p; ! 1269: ! 1270: if (!not) goto fail; ! 1271: d++; ! 1272: break; ! 1273: } ! 1274: ! 1275: case begline: ! 1276: if (d == string1 || d[-1] == '\n') ! 1277: break; ! 1278: goto fail; ! 1279: ! 1280: case endline: ! 1281: if (d == end2 ! 1282: || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n')) ! 1283: break; ! 1284: goto fail; ! 1285: ! 1286: /* "or" constructs ("|") are handled by starting each alternative ! 1287: with an on_failure_jump that points to the start of the next alternative. ! 1288: Each alternative except the last ends with a jump to the joining point. ! 1289: (Actually, each jump except for the last one really jumps ! 1290: to the following jump, because tensioning the jumps is a hassle.) */ ! 1291: ! 1292: /* The start of a stupid repeat has an on_failure_jump that points ! 1293: past the end of the repeat text. ! 1294: This makes a failure point so that, on failure to match a repetition, ! 1295: matching restarts past as many repetitions have been found ! 1296: with no way to fail and look for another one. */ ! 1297: ! 1298: /* A smart repeat is similar but loops back to the on_failure_jump ! 1299: so that each repetition makes another failure point. */ ! 1300: ! 1301: case on_failure_jump: ! 1302: if (stackp == stacke) ! 1303: { ! 1304: unsigned char **stackx; ! 1305: if (stacke - stackb > re_max_failures) ! 1306: return -2; ! 1307: stackx = (unsigned char **) alloca (2 * (stacke - stackb) ! 1308: * sizeof (char *)); ! 1309: bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *)); ! 1310: stackp = stackx + (stackp - stackb); ! 1311: stacke = stackx + 2 * (stacke - stackb); ! 1312: stackb = stackx; ! 1313: } ! 1314: mcnt = *p++ & 0377; ! 1315: mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8; ! 1316: p++; ! 1317: *stackp++ = mcnt + p; ! 1318: *stackp++ = d; ! 1319: break; ! 1320: ! 1321: /* The end of a smart repeat has an maybe_finalize_jump back. ! 1322: Change it either to a finalize_jump or an ordinary jump. */ ! 1323: ! 1324: case maybe_finalize_jump: ! 1325: mcnt = *p++ & 0377; ! 1326: mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8; ! 1327: p++; ! 1328: /* Compare what follows with the begining of the repeat. ! 1329: If we can establish that there is nothing that they would ! 1330: both match, we can change to finalize_jump */ ! 1331: if (p == pend) ! 1332: p[-3] = (unsigned char) finalize_jump; ! 1333: else if (*p == (unsigned char) exactn ! 1334: || *p == (unsigned char) endline) ! 1335: { ! 1336: register int c = *p == (unsigned char) endline ? '\n' : p[2]; ! 1337: register unsigned char *p1 = p + mcnt; ! 1338: /* p1[0] ... p1[2] are an on_failure_jump. ! 1339: Examine what follows that */ ! 1340: if (p1[3] == (unsigned char) exactn && p1[5] != c) ! 1341: p[-3] = (unsigned char) finalize_jump; ! 1342: else if (p1[3] == (unsigned char) charset ! 1343: || p1[3] == (unsigned char) charset_not) ! 1344: { ! 1345: int not = p1[3] == (unsigned char) charset_not; ! 1346: if (c < p1[4] * BYTEWIDTH ! 1347: && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ! 1348: not = !not; ! 1349: /* not is 1 if c would match */ ! 1350: /* That means it is not safe to finalize */ ! 1351: if (!not) ! 1352: p[-3] = (unsigned char) finalize_jump; ! 1353: } ! 1354: } ! 1355: p -= 2; ! 1356: if (p[-1] != (unsigned char) finalize_jump) ! 1357: { ! 1358: p[-1] = (unsigned char) jump; ! 1359: goto nofinalize; ! 1360: } ! 1361: ! 1362: /* The end of a stupid repeat has a finalize-jump ! 1363: back to the start, where another failure point will be made ! 1364: which will point after all the repetitions found so far. */ ! 1365: ! 1366: case finalize_jump: ! 1367: stackp -= 2; ! 1368: ! 1369: case jump: ! 1370: nofinalize: ! 1371: mcnt = *p++ & 0377; ! 1372: mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8; ! 1373: p += mcnt + 1; /* The 1 compensates for missing ++ above */ ! 1374: break; ! 1375: ! 1376: case dummy_failure_jump: ! 1377: if (stackp == stacke) ! 1378: { ! 1379: unsigned char **stackx ! 1380: = (unsigned char **) alloca (2 * (stacke - stackb) ! 1381: * sizeof (char *)); ! 1382: bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *)); ! 1383: stackp = stackx + (stackp - stackb); ! 1384: stacke = stackx + 2 * (stacke - stackb); ! 1385: stackb = stackx; ! 1386: } ! 1387: *stackp++ = 0; ! 1388: *stackp++ = 0; ! 1389: goto nofinalize; ! 1390: ! 1391: case wordbound: ! 1392: if (d == string1 /* Points to first char */ ! 1393: || d == end2 /* Points to end */ ! 1394: || (d == end1 && size2 == 0)) /* Points to end */ ! 1395: break; ! 1396: if ((SYNTAX (d[-1]) == Sword) ! 1397: != (SYNTAX (d == end1 ? *string2 : *d) == Sword)) ! 1398: break; ! 1399: goto fail; ! 1400: ! 1401: case notwordbound: ! 1402: if (d == string1 /* Points to first char */ ! 1403: || d == end2 /* Points to end */ ! 1404: || (d == end1 && size2 == 0)) /* Points to end */ ! 1405: goto fail; ! 1406: if ((SYNTAX (d[-1]) == Sword) ! 1407: != (SYNTAX (d == end1 ? *string2 : *d) == Sword)) ! 1408: goto fail; ! 1409: break; ! 1410: ! 1411: case wordbeg: ! 1412: if (d == end2 /* Points to end */ ! 1413: || (d == end1 && size2 == 0) /* Points to end */ ! 1414: || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */ ! 1415: goto fail; ! 1416: if (d == string1 /* Points to first char */ ! 1417: || SYNTAX (d[-1]) != Sword) /* prev char not letter */ ! 1418: break; ! 1419: goto fail; ! 1420: ! 1421: case wordend: ! 1422: if (d == string1 /* Points to first char */ ! 1423: || SYNTAX (d[-1]) != Sword) /* prev char not letter */ ! 1424: goto fail; ! 1425: if (d == end2 /* Points to end */ ! 1426: || (d == end1 && size2 == 0) /* Points to end */ ! 1427: || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */ ! 1428: break; ! 1429: goto fail; ! 1430: ! 1431: #ifdef emacs ! 1432: case before_dot: ! 1433: if (PTR_CHAR_POS (d) + 1 >= point) ! 1434: goto fail; ! 1435: break; ! 1436: ! 1437: case at_dot: ! 1438: if (PTR_CHAR_POS (d) + 1 != point) ! 1439: goto fail; ! 1440: break; ! 1441: ! 1442: case after_dot: ! 1443: if (PTR_CHAR_POS (d) + 1 <= point) ! 1444: goto fail; ! 1445: break; ! 1446: ! 1447: case wordchar: ! 1448: mcnt = (int) Sword; ! 1449: goto matchsyntax; ! 1450: ! 1451: case syntaxspec: ! 1452: mcnt = *p++; ! 1453: matchsyntax: ! 1454: PREFETCH; ! 1455: if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail; ! 1456: break; ! 1457: ! 1458: case notwordchar: ! 1459: mcnt = (int) Sword; ! 1460: goto matchnotsyntax; ! 1461: ! 1462: case notsyntaxspec: ! 1463: mcnt = *p++; ! 1464: matchnotsyntax: ! 1465: PREFETCH; ! 1466: if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail; ! 1467: break; ! 1468: #else ! 1469: case wordchar: ! 1470: PREFETCH; ! 1471: if (SYNTAX (*d++) == 0) goto fail; ! 1472: break; ! 1473: ! 1474: case notwordchar: ! 1475: PREFETCH; ! 1476: if (SYNTAX (*d++) != 0) goto fail; ! 1477: break; ! 1478: #endif not emacs ! 1479: ! 1480: case begbuf: ! 1481: if (d == string1) /* Note, d cannot equal string2 */ ! 1482: break; /* unless string1 == string2. */ ! 1483: goto fail; ! 1484: ! 1485: case endbuf: ! 1486: if (d == end2 || (d == end1 && size2 == 0)) ! 1487: break; ! 1488: goto fail; ! 1489: ! 1490: case exactn: ! 1491: /* Match the next few pattern characters exactly. ! 1492: mcnt is how many characters to match. */ ! 1493: mcnt = *p++; ! 1494: if (translate) ! 1495: { ! 1496: do ! 1497: { ! 1498: PREFETCH; ! 1499: if (translate[*d++] != *p++) goto fail; ! 1500: } ! 1501: while (--mcnt); ! 1502: } ! 1503: else ! 1504: { ! 1505: do ! 1506: { ! 1507: PREFETCH; ! 1508: if (*d++ != *p++) goto fail; ! 1509: } ! 1510: while (--mcnt); ! 1511: } ! 1512: break; ! 1513: } ! 1514: continue; /* Successfully matched one pattern command; keep matching */ ! 1515: ! 1516: /* Jump here if any matching operation fails. */ ! 1517: fail: ! 1518: if (stackp != stackb) ! 1519: /* A restart point is known. Restart there and pop it. */ ! 1520: { ! 1521: if (!stackp[-2]) ! 1522: { /* If innermost failure point is dormant, flush it and keep looking */ ! 1523: stackp -= 2; ! 1524: goto fail; ! 1525: } ! 1526: d = *--stackp; ! 1527: p = *--stackp; ! 1528: if (d >= string1 && d <= end1) ! 1529: dend = end_match_1; ! 1530: } ! 1531: else break; /* Matching at this starting point really fails! */ ! 1532: } ! 1533: return -1; /* Failure to match */ ! 1534: } ! 1535: ! 1536: static int ! 1537: bcmp_translate (s1, s2, len, translate) ! 1538: unsigned char *s1, *s2; ! 1539: register int len; ! 1540: unsigned char *translate; ! 1541: { ! 1542: register unsigned char *p1 = s1, *p2 = s2; ! 1543: while (len) ! 1544: { ! 1545: if (translate [*p1++] != translate [*p2++]) return 1; ! 1546: len--; ! 1547: } ! 1548: return 0; ! 1549: } ! 1550: ! 1551: /* Entry points compatible with bsd4.2 regex library */ ! 1552: ! 1553: #ifndef emacs ! 1554: ! 1555: static struct re_pattern_buffer re_comp_buf; ! 1556: ! 1557: char * ! 1558: re_comp (s) ! 1559: char *s; ! 1560: { ! 1561: if (!s) ! 1562: { ! 1563: if (!re_comp_buf.buffer) ! 1564: return "No previous regular expression"; ! 1565: return 0; ! 1566: } ! 1567: ! 1568: if (!re_comp_buf.buffer) ! 1569: { ! 1570: if (!(re_comp_buf.buffer = (char *) malloc (200))) ! 1571: return "Memory exhausted"; ! 1572: re_comp_buf.allocated = 200; ! 1573: if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH))) ! 1574: return "Memory exhausted"; ! 1575: } ! 1576: return re_compile_pattern (s, strlen (s), &re_comp_buf); ! 1577: } ! 1578: ! 1579: int ! 1580: re_exec (s) ! 1581: char *s; ! 1582: { ! 1583: int len = strlen (s); ! 1584: return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0); ! 1585: } ! 1586: ! 1587: #endif /* emacs */ ! 1588: ! 1589: #ifdef test ! 1590: ! 1591: #include <stdio.h> ! 1592: ! 1593: /* Indexed by a character, gives the upper case equivalent of the character */ ! 1594: ! 1595: static char upcase[0400] = ! 1596: { 000, 001, 002, 003, 004, 005, 006, 007, ! 1597: 010, 011, 012, 013, 014, 015, 016, 017, ! 1598: 020, 021, 022, 023, 024, 025, 026, 027, ! 1599: 030, 031, 032, 033, 034, 035, 036, 037, ! 1600: 040, 041, 042, 043, 044, 045, 046, 047, ! 1601: 050, 051, 052, 053, 054, 055, 056, 057, ! 1602: 060, 061, 062, 063, 064, 065, 066, 067, ! 1603: 070, 071, 072, 073, 074, 075, 076, 077, ! 1604: 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107, ! 1605: 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, ! 1606: 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, ! 1607: 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137, ! 1608: 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107, ! 1609: 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, ! 1610: 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, ! 1611: 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177, ! 1612: 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207, ! 1613: 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217, ! 1614: 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227, ! 1615: 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237, ! 1616: 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247, ! 1617: 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257, ! 1618: 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267, ! 1619: 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, ! 1620: 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307, ! 1621: 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317, ! 1622: 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327, ! 1623: 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, ! 1624: 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347, ! 1625: 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357, ! 1626: 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367, ! 1627: 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377 ! 1628: }; ! 1629: ! 1630: main (argc, argv) ! 1631: int argc; ! 1632: char **argv; ! 1633: { ! 1634: char pat[80]; ! 1635: struct re_pattern_buffer buf; ! 1636: int i; ! 1637: char c; ! 1638: char fastmap[(1 << BYTEWIDTH)]; ! 1639: ! 1640: /* Allow a command argument to specify the style of syntax. */ ! 1641: if (argc > 1) ! 1642: obscure_syntax = atoi (argv[1]); ! 1643: ! 1644: buf.allocated = 40; ! 1645: buf.buffer = (char *) malloc (buf.allocated); ! 1646: buf.fastmap = fastmap; ! 1647: buf.translate = upcase; ! 1648: ! 1649: while (1) ! 1650: { ! 1651: gets (pat); ! 1652: ! 1653: if (*pat) ! 1654: { ! 1655: re_compile_pattern (pat, strlen(pat), &buf); ! 1656: ! 1657: for (i = 0; i < buf.used; i++) ! 1658: printchar (buf.buffer[i]); ! 1659: ! 1660: putchar ('\n'); ! 1661: ! 1662: printf ("%d allocated, %d used.\n", buf.allocated, buf.used); ! 1663: ! 1664: re_compile_fastmap (&buf); ! 1665: printf ("Allowed by fastmap: "); ! 1666: for (i = 0; i < (1 << BYTEWIDTH); i++) ! 1667: if (fastmap[i]) printchar (i); ! 1668: putchar ('\n'); ! 1669: } ! 1670: ! 1671: gets (pat); /* Now read the string to match against */ ! 1672: ! 1673: i = re_match (&buf, pat, strlen (pat), 0, 0); ! 1674: printf ("Match value %d.\n", i); ! 1675: } ! 1676: } ! 1677: ! 1678: #ifdef NOTDEF ! 1679: print_buf (bufp) ! 1680: struct re_pattern_buffer *bufp; ! 1681: { ! 1682: int i; ! 1683: ! 1684: printf ("buf is :\n----------------\n"); ! 1685: for (i = 0; i < bufp->used; i++) ! 1686: printchar (bufp->buffer[i]); ! 1687: ! 1688: printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used); ! 1689: ! 1690: printf ("Allowed by fastmap: "); ! 1691: for (i = 0; i < (1 << BYTEWIDTH); i++) ! 1692: if (bufp->fastmap[i]) ! 1693: printchar (i); ! 1694: printf ("\nAllowed by translate: "); ! 1695: if (bufp->translate) ! 1696: for (i = 0; i < (1 << BYTEWIDTH); i++) ! 1697: if (bufp->translate[i]) ! 1698: printchar (i); ! 1699: printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't"); ! 1700: printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not"); ! 1701: } ! 1702: #endif ! 1703: ! 1704: printchar (c) ! 1705: char c; ! 1706: { ! 1707: if (c < 041 || c >= 0177) ! 1708: { ! 1709: putchar ('\\'); ! 1710: putchar (((c >> 6) & 3) + '0'); ! 1711: putchar (((c >> 3) & 7) + '0'); ! 1712: putchar ((c & 7) + '0'); ! 1713: } ! 1714: else ! 1715: putchar (c); ! 1716: } ! 1717: ! 1718: error (string) ! 1719: char *string; ! 1720: { ! 1721: puts (string); ! 1722: exit (1); ! 1723: } ! 1724: ! 1725: #endif test
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.