|
|
1.1 ! root 1: /* Extended regular expression matching and search. ! 2: Copyright (C) 1985 Richard M. Stallman ! 3: ! 4: This program is distributed in the hope that it will be useful, ! 5: but without any warranty. No author or distributor ! 6: accepts responsibility to anyone for the consequences of using it ! 7: or for whether it serves any particular purpose or works at all, ! 8: unless he says so in writing. ! 9: ! 10: Permission is granted to anyone to distribute verbatim copies ! 11: of this program's source code as received, in any medium, provided that ! 12: the copyright notice, the nonwarraty notice above ! 13: and this permission notice are preserved, ! 14: and that the distributor grants the recipient all rights ! 15: for further redistribution as permitted by this notice, ! 16: and informs him of these rights. ! 17: ! 18: Permission is granted to distribute modified versions of this ! 19: program's source code, or of portions of it, under the above ! 20: conditions, plus the conditions that all changed files carry ! 21: prominent notices stating who last changed them and that the ! 22: derived material, including anything packaged together with it and ! 23: conceptually functioning as a modification of it rather than an ! 24: application of it, is in its entirety subject to a permission ! 25: notice identical to this one. ! 26: ! 27: Permission is granted to distribute this program (verbatim or ! 28: as modified) in compiled or executable form, provided verbatim ! 29: redistribution is permitted as stated above for source code, and ! 30: A. it is accompanied by the corresponding machine-readable ! 31: source code, under the above conditions, or ! 32: B. it is accompanied by a written offer, with no time limit, ! 33: to distribute the corresponding machine-readable source code, ! 34: under the above conditions, to any one, in return for reimbursement ! 35: of the cost of distribution. Verbatim redistribution of the ! 36: written offer must be permitted. Or, ! 37: C. it is distributed by someone who received only the ! 38: compiled or executable form, and is accompanied by a copy of the ! 39: written offer of source code which he received along with it. ! 40: ! 41: Permission is granted to distribute this program (verbatim or as modified) ! 42: in executable form as part of a larger system provided that the source ! 43: code for this program, including any modifications used, ! 44: is also distributed or offered as stated in the preceding paragraph. ! 45: ! 46: In other words, you are welcome to use, share and improve this program. ! 47: You are forbidden to forbid anyone else to use, share and improve ! 48: what you give them. Help stamp out software-hoarding! */ ! 49: ! 50: ! 51: /* To test, compile with -Dtest. ! 52: This Dtestable feature turns this into a self-contained program ! 53: which reads a pattern, describes how it compiles, ! 54: then reads a string and searches for it. */ ! 55: ! 56: ! 57: #ifdef emacs ! 58: ! 59: /* The `emacs' switch turns on certain special matching commands ! 60: that make sense only in emacs. */ ! 61: ! 62: #include "config.h" ! 63: #include "lisp.h" ! 64: #include "buffer.h" ! 65: #include "syntax.h" ! 66: ! 67: #else /* not emacs */ ! 68: ! 69: /* ! 70: * Define the syntax stuff, so we can do the \<...\> things. ! 71: */ ! 72: #define Sword 1 ! 73: ! 74: #define SYNTAX(c) syntax_table[c] ! 75: ! 76: static char syntax_table[256]; ! 77: ! 78: #endif /* not emacs */ ! 79: ! 80: #include "regex.h" ! 81: ! 82: /* Number of failure points to allocate space for initially, ! 83: when matching. If this number is exceeded, more space is allocated, ! 84: so it is not a hard limit. */ ! 85: ! 86: #ifndef NFAILURES ! 87: #define NFAILURES 80 ! 88: #endif NFAILURES ! 89: ! 90: /* width of a byte in bits */ ! 91: ! 92: #define BYTEWIDTH 8 ! 93: ! 94: /* These are the command codes that appear in compiled regular expressions, one per byte. ! 95: Some command codes are followed by argument bytes. ! 96: A command code can specify any interpretation whatever for its arguments. ! 97: Zero-bytes may appear in the compiled regular expression. */ ! 98: ! 99: enum regexpcode ! 100: { ! 101: unused, ! 102: exactn, /* followed by one byte giving n, and then by n literal bytes */ ! 103: begline, /* fails unless at beginning of line */ ! 104: endline, /* fails unless at end of line */ ! 105: jump, /* followed by two bytes giving relative address to jump to */ ! 106: on_failure_jump, /* followed by two bytes giving relative address of place ! 107: to resume at in case of failure. */ ! 108: finalize_jump, /* Throw away latest failure point and then jump to address. */ ! 109: maybe_finalize_jump, /* Like jump but finalize if safe to do so. ! 110: This is used to jump back to the beginning ! 111: of a repeat. If the command that follows ! 112: this jump is clearly incompatible with the ! 113: one at the beginning of the repeat, such that ! 114: we can be sure that there is no use backtracking ! 115: out of repetitions already completed, ! 116: then we finalize. */ ! 117: dummy_failure_jump, /* jump, and push a dummy failure point. ! 118: This failure point will be thrown away ! 119: if an attempt is made to use it for a failure. ! 120: A + construct makes this before the first repeat. */ ! 121: anychar, /* matches any one character */ ! 122: charset, /* matches any one char belonging to specified set. ! 123: First following byte is # bitmap bytes. ! 124: Then come bytes for a bit-map saying which chars are in. ! 125: Bits in each byte are ordered low-bit-first. ! 126: A character is in the set if its bit is 1. ! 127: A character too large to have a bit in the map ! 128: is automatically not in the set */ ! 129: charset_not, /* similar but match any character that is NOT one of those specified */ ! 130: start_memory, /* starts remembering the text that is matched ! 131: and stores it in a memory register. ! 132: followed by one byte containing the register number. ! 133: Register numbers must be in the range 0 through NREGS. */ ! 134: stop_memory, /* stops remembering the text that is matched ! 135: and stores it in a memory register. ! 136: followed by one byte containing the register number. ! 137: Register numbers must be in the range 0 through NREGS. */ ! 138: duplicate, /* match a duplicate of something remembered. ! 139: Followed by one byte containing the index of the memory register. */ ! 140: before_dot, /* Succeeds if before dot */ ! 141: at_dot, /* Succeeds if at dot */ ! 142: after_dot, /* Succeeds if after dot */ ! 143: begbuf, /* Succeeds if at beginning of buffer */ ! 144: endbuf, /* Succeeds if at end of buffer */ ! 145: wordchar, /* Matches any word-constituent character */ ! 146: notwordchar, /* Matches any char that is not a word-constituent */ ! 147: wordbeg, /* Succeeds if at word beginning */ ! 148: wordend, /* Succeeds if at word end */ ! 149: wordbound, /* Succeeds if at a word boundary */ ! 150: notwordbound, /* Succeeds if not at a word boundary */ ! 151: syntaxspec, /* Matches any character whose syntax is specified. ! 152: followed by a byte which contains a syntax code, Sword or such like */ ! 153: notsyntaxspec /* Matches any character whose syntax differs from the specified. */ ! 154: }; ! 155: ! 156: #ifndef SIGN_EXTEND_CHAR ! 157: #define SIGN_EXTEND_CHAR(x) (x) ! 158: #endif ! 159: ! 160: /* compile_pattern takes a regular-expression descriptor string in the user's format ! 161: and converts it into a buffer full of byte commands for matching. ! 162: ! 163: pattern is the address of the pattern string ! 164: size is the length of it. ! 165: bufp is a struct re_pattern_buffer * which points to the info ! 166: on where to store the byte commands. ! 167: This structure contains a char * which points to the ! 168: actual space, which should have been obtained with malloc. ! 169: compile_pattern may use realloc to grow the buffer space. ! 170: ! 171: The number of bytes of commands can be found out by looking in ! 172: the struct re_pattern_buffer that bufp pointed to, ! 173: after compile_pattern returns. ! 174: */ ! 175: ! 176: #define PATPUSH(ch) (*b++ = (char) (ch)) ! 177: ! 178: #define PATFETCH(c) \ ! 179: {if (p == pend) goto end_of_pattern; \ ! 180: c = * (unsigned char *) p++; \ ! 181: if (translate) c = translate[c]; } ! 182: ! 183: #define PATFETCH_RAW(c) \ ! 184: {if (p == pend) goto end_of_pattern; \ ! 185: c = * (unsigned char *) p++; } ! 186: ! 187: #define PATUNFETCH p-- ! 188: ! 189: #define EXTEND_BUFFER \ ! 190: { old_buffer = bufp->buffer; \ ! 191: if (bufp->allocated == (1<<16)) goto too_big; \ ! 192: bufp->allocated *= 2; \ ! 193: if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \ ! 194: if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \ ! 195: goto memory_exhausted; \ ! 196: c = bufp->buffer - old_buffer; \ ! 197: b += c; \ ! 198: if (fixup_jump) \ ! 199: fixup_jump += c; \ ! 200: if (laststart) \ ! 201: laststart += c; \ ! 202: begalt += c; \ ! 203: if (pending_exact) \ ! 204: pending_exact += c; \ ! 205: } ! 206: ! 207: static int store_jump (), insert_jump (); ! 208: ! 209: char * ! 210: re_compile_pattern (pattern, size, bufp) ! 211: char *pattern; ! 212: int size; ! 213: struct re_pattern_buffer *bufp; ! 214: { ! 215: register char *b = bufp->buffer; ! 216: register char *p = pattern; ! 217: char *pend = pattern + size; ! 218: register unsigned c, c1; ! 219: char *p1; ! 220: unsigned char *translate = (unsigned char *) bufp->translate; ! 221: ! 222: /* Temporary used when buffer is made bigger. */ ! 223: ! 224: char *old_buffer; ! 225: ! 226: /* address of the count-byte of the most recently inserted "exactn" command. ! 227: This makes it possible to tell whether a new exact-match character ! 228: can be added to that command or requires a new "exactn" command. */ ! 229: ! 230: char *pending_exact = 0; ! 231: ! 232: /* address of the place where a forward-jump should go ! 233: to the end of the containing expression. ! 234: Each alternative of an "or", except the last, ends with a forward-jump ! 235: of this sort. */ ! 236: ! 237: char *fixup_jump = 0; ! 238: ! 239: /* address of start of the most recently finished expression. ! 240: This tells postfix * where to find the start of its operand. */ ! 241: ! 242: char *laststart = 0; ! 243: ! 244: /* In processing a repeat, 1 means zero matches is allowed */ ! 245: ! 246: char zero_times_ok; ! 247: ! 248: /* In processing a repeat, 1 means many matches is allowed */ ! 249: ! 250: char many_times_ok; ! 251: ! 252: /* address of beginning of regexp, or inside of last \( */ ! 253: ! 254: char *begalt = b; ! 255: ! 256: /* Stack of information saved by \( and restored by \). ! 257: Four stack elements are pushed by each \(: ! 258: First, the value of b. ! 259: Second, the value of fixup_jump. ! 260: Third, the value of regnum. ! 261: Fourth, the value of begalt. */ ! 262: ! 263: int stackb[40]; ! 264: int *stackp = stackb; ! 265: int *stacke = stackb + 40; ! 266: int *stackt; ! 267: ! 268: /* Counts \('s as they are encountered. Remembered for the matching \), ! 269: where it becomes the "register number" to put in the stop_memory command */ ! 270: ! 271: int regnum = 1; ! 272: ! 273: bufp->fastmap_accurate = 0; ! 274: ! 275: #ifndef emacs ! 276: /* ! 277: * Initialize the syntax table. ! 278: */ ! 279: init_syntax_once(); ! 280: #endif emacs ! 281: ! 282: while (p != pend) ! 283: { ! 284: if (b - bufp->buffer ! 285: > bufp->allocated - 10) ! 286: /* Note that EXTEND_BUFFER clobbers c */ ! 287: EXTEND_BUFFER; ! 288: ! 289: PATFETCH (c); ! 290: ! 291: switch (c) ! 292: { ! 293: case '$': ! 294: /* $ means succeed if at end of line, but only in special contexts. ! 295: If randonly in the middle of a pattern, it is a normal character. */ ! 296: if (p == pend || (*p == '\\' && (p[1] == ')' || p[1] == '|'))) ! 297: { ! 298: PATPUSH (endline); ! 299: break; ! 300: } ! 301: goto normal_char; ! 302: ! 303: case '^': ! 304: /* ^ means succeed if at beg of line, but only if no preceding pattern. */ ! 305: if (laststart) goto normal_char; ! 306: PATPUSH (begline); ! 307: break; ! 308: ! 309: case '*': ! 310: case '+': ! 311: case '?': ! 312: /* If there is no previous pattern, char not special. */ ! 313: if (!laststart) ! 314: goto normal_char; ! 315: /* If there is a sequence of repetition chars, ! 316: collapse it down to equivalent to just one. */ ! 317: zero_times_ok = 0; ! 318: many_times_ok = 0; ! 319: while (1) ! 320: { ! 321: zero_times_ok |= c != '+'; ! 322: many_times_ok |= c != '?'; ! 323: if (p == pend) ! 324: break; ! 325: PATFETCH (c); ! 326: if (!(c == '*' || c == '+' || c == '?')) ! 327: { ! 328: PATUNFETCH; ! 329: break; ! 330: } ! 331: } ! 332: ! 333: /* Now we know whether 0 matches is allowed, ! 334: and whether 2 or more matches is allowed. */ ! 335: if (many_times_ok) ! 336: { ! 337: /* If more than one repetition is allowed, ! 338: put in a backward jump at the end. */ ! 339: store_jump (b, maybe_finalize_jump, laststart - 3); ! 340: b += 3; ! 341: } ! 342: insert_jump (on_failure_jump, laststart, b + 3, b); ! 343: pending_exact = 0; ! 344: b += 3; ! 345: if (!zero_times_ok) ! 346: { ! 347: /* At least one repetition required: insert before the loop ! 348: a skip over the initial on-failure-jump instruction */ ! 349: insert_jump (dummy_failure_jump, laststart, laststart + 6, b); ! 350: b += 3; ! 351: } ! 352: break; ! 353: ! 354: case '.': ! 355: laststart = b; ! 356: PATPUSH (anychar); ! 357: break; ! 358: ! 359: case '[': ! 360: if (b - bufp->buffer ! 361: > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH) ! 362: /* Note that EXTEND_BUFFER clobbers c */ ! 363: EXTEND_BUFFER; ! 364: ! 365: laststart = b; ! 366: if (*p == '^') ! 367: PATPUSH (charset_not), p++; ! 368: else ! 369: PATPUSH (charset); ! 370: p1 = p; ! 371: ! 372: PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ! 373: /* Clear the whole map */ ! 374: bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); ! 375: /* Read in characters and ranges, setting map bits */ ! 376: while (1) ! 377: { ! 378: PATFETCH (c); ! 379: if (c == ']' && p != p1 + 1) break; ! 380: if (*p == '-') ! 381: { ! 382: PATFETCH (c1); ! 383: PATFETCH (c1); ! 384: while (c <= c1) ! 385: b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++; ! 386: } ! 387: else ! 388: { ! 389: b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH); ! 390: } ! 391: } ! 392: /* Discard any bitmap bytes that are all 0 at the end of the map. ! 393: Decrement the map-length byte too. */ ! 394: while (b[-1] > 0 && b[b[-1] - 1] == 0) ! 395: b[-1]--; ! 396: b += b[-1]; ! 397: break; ! 398: ! 399: case '\\': ! 400: if (p == pend) goto invalid_pattern; ! 401: PATFETCH_RAW (c); ! 402: switch (c) ! 403: { ! 404: case '(': ! 405: if (stackp == stacke) goto nesting_too_deep; ! 406: if (regnum < RE_NREGS) ! 407: { ! 408: PATPUSH (start_memory); ! 409: PATPUSH (regnum); ! 410: } ! 411: *stackp++ = b - bufp->buffer; ! 412: *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0; ! 413: *stackp++ = regnum++; ! 414: *stackp++ = begalt - bufp->buffer; ! 415: fixup_jump = 0; ! 416: laststart = 0; ! 417: begalt = b; ! 418: break; ! 419: ! 420: case ')': ! 421: if (stackp == stackb) goto unmatched_close; ! 422: begalt = *--stackp + bufp->buffer; ! 423: if (fixup_jump) ! 424: store_jump (fixup_jump, jump, b); ! 425: if (stackp[-1] < RE_NREGS) ! 426: { ! 427: PATPUSH (stop_memory); ! 428: PATPUSH (stackp[-1]); ! 429: } ! 430: stackp -= 2; ! 431: fixup_jump = 0; ! 432: if (*stackp) ! 433: fixup_jump = *stackp + bufp->buffer - 1; ! 434: laststart = *--stackp + bufp->buffer; ! 435: break; ! 436: ! 437: case '|': ! 438: insert_jump (on_failure_jump, begalt, b + 6, b); ! 439: pending_exact = 0; ! 440: b += 3; ! 441: if (fixup_jump) ! 442: store_jump (fixup_jump, jump, b); ! 443: fixup_jump = b; ! 444: b += 3; ! 445: laststart = 0; ! 446: begalt = b; ! 447: break; ! 448: ! 449: #ifdef emacs ! 450: case '=': ! 451: PATPUSH (at_dot); ! 452: break; ! 453: ! 454: case 's': ! 455: laststart = b; ! 456: PATPUSH (syntaxspec); ! 457: PATFETCH (c); ! 458: PATPUSH (syntax_spec_code[c]); ! 459: break; ! 460: ! 461: case 'S': ! 462: laststart = b; ! 463: PATPUSH (notsyntaxspec); ! 464: PATFETCH (c); ! 465: PATPUSH (syntax_spec_code[c]); ! 466: break; ! 467: #endif emacs ! 468: ! 469: case 'w': ! 470: laststart = b; ! 471: PATPUSH (wordchar); ! 472: break; ! 473: ! 474: case 'W': ! 475: laststart = b; ! 476: PATPUSH (notwordchar); ! 477: break; ! 478: ! 479: case '<': ! 480: PATPUSH (wordbeg); ! 481: break; ! 482: ! 483: case '>': ! 484: PATPUSH (wordend); ! 485: break; ! 486: ! 487: case 'b': ! 488: PATPUSH (wordbound); ! 489: break; ! 490: ! 491: case 'B': ! 492: PATPUSH (notwordbound); ! 493: break; ! 494: ! 495: case '`': ! 496: PATPUSH (begbuf); ! 497: break; ! 498: ! 499: case '\'': ! 500: PATPUSH (endbuf); ! 501: break; ! 502: ! 503: case '1': ! 504: case '2': ! 505: case '3': ! 506: case '4': ! 507: case '5': ! 508: case '6': ! 509: case '7': ! 510: case '8': ! 511: case '9': ! 512: c1 = c - '0'; ! 513: if (c1 >= regnum) ! 514: goto normal_char; ! 515: for (stackt = stackp - 2; stackt > stackb; stackt -= 4) ! 516: if (*stackt == c1) ! 517: goto normal_char; ! 518: laststart = b; ! 519: PATPUSH (duplicate); ! 520: PATPUSH (c1); ! 521: break; ! 522: default: ! 523: goto normal_char; ! 524: } ! 525: break; ! 526: ! 527: default: ! 528: normal_char: ! 529: if (!pending_exact || pending_exact + *pending_exact + 1 != b ! 530: || *pending_exact == 0177 || *p == '*' || *p == '^' ! 531: || *p == '+' || *p == '?') ! 532: { ! 533: laststart = b; ! 534: PATPUSH (exactn); ! 535: pending_exact = b; ! 536: PATPUSH (0); ! 537: } ! 538: PATPUSH (c); ! 539: (*pending_exact)++; ! 540: } ! 541: } ! 542: ! 543: if (fixup_jump) ! 544: store_jump (fixup_jump, jump, b); ! 545: ! 546: if (stackp != stackb) goto unmatched_open; ! 547: ! 548: bufp->used = b - bufp->buffer; ! 549: return 0; ! 550: ! 551: invalid_pattern: ! 552: return "Invalid regular expression"; ! 553: ! 554: unmatched_open: ! 555: return "Unmatched \\("; ! 556: ! 557: unmatched_close: ! 558: return "Unmatched \\)"; ! 559: ! 560: end_of_pattern: ! 561: return "Premature end of regular expression"; ! 562: ! 563: nesting_too_deep: ! 564: return "Nesting too deep"; ! 565: ! 566: too_big: ! 567: return "Regular expression too big"; ! 568: ! 569: memory_exhausted: ! 570: return "Memory exhausted"; ! 571: } ! 572: ! 573: #ifndef emacs ! 574: init_syntax_once () ! 575: { ! 576: register int c; ! 577: static int done = 0; ! 578: ! 579: if (done) ! 580: return; ! 581: ! 582: bzero (syntax_table, sizeof syntax_table); ! 583: ! 584: for (c = 'a'; c <= 'z'; c++) ! 585: syntax_table[c] = Sword; ! 586: ! 587: for (c = 'A'; c <= 'Z'; c++) ! 588: syntax_table[c] = Sword; ! 589: ! 590: for (c = '0'; c <= '9'; c++) ! 591: syntax_table[c] = Sword; ! 592: ! 593: done = 1; ! 594: } ! 595: #endif not emacs ! 596: ! 597: /* Store where `from' points a jump operation to jump to where `to' points. ! 598: `opcode' is the opcode to store. */ ! 599: ! 600: static int ! 601: store_jump (from, opcode, to) ! 602: char *from, *to; ! 603: char opcode; ! 604: { ! 605: from[0] = opcode; ! 606: from[1] = (to - (from + 3)) & 0377; ! 607: from[2] = (to - (from + 3)) >> 8; ! 608: } ! 609: ! 610: /* Open up space at char FROM, and insert there a jump to TO. ! 611: CURRENT_END gives te end of the storage no in use, ! 612: so we know how much data to copy up. ! 613: OP is the opcode of the jump to insert. ! 614: ! 615: If you call this function, you must zero out pending_exact. */ ! 616: ! 617: static int ! 618: insert_jump (op, from, to, current_end) ! 619: char op; ! 620: char *from, *to, *current_end; ! 621: { ! 622: register char *pto = current_end + 3; ! 623: register char *pfrom = current_end; ! 624: while (pfrom != from) ! 625: *--pto = *--pfrom; ! 626: store_jump (from, op, to); ! 627: } ! 628: ! 629: /* Given a pattern, compute a fastmap from it. ! 630: The fastmap records which of the (1 << BYTEWIDTH) possible characters ! 631: can start a string that matches the pattern. ! 632: This fastmap is used by re_search to skip quickly over totally implausible text. ! 633: ! 634: The caller must supply the address of a (1 << BYTEWIDTH)-byte data area ! 635: as bufp->fastmap. ! 636: The other components of bufp describe the pattern to be used. */ ! 637: ! 638: re_compile_fastmap (bufp) ! 639: struct re_pattern_buffer *bufp; ! 640: { ! 641: char *pattern = bufp->buffer; ! 642: int size = bufp->used; ! 643: register char *fastmap = bufp->fastmap; ! 644: register char *p = pattern; ! 645: register char *pend = pattern + size; ! 646: register int j, k; ! 647: unsigned char *translate = (unsigned char *) bufp->translate; ! 648: ! 649: char *stackb[NFAILURES]; ! 650: char **stackp = stackb; ! 651: ! 652: bzero (fastmap, (1 << BYTEWIDTH)); ! 653: bufp->fastmap_accurate = 1; ! 654: bufp->can_be_null = 0; ! 655: ! 656: while (p) ! 657: { ! 658: if (p == pend) ! 659: { ! 660: bufp->can_be_null = 1; ! 661: break; ! 662: } ! 663: #ifdef SWITCH_ENUM_BUG ! 664: switch ((int) ((enum regexpcode) *p++)) ! 665: #else ! 666: switch ((enum regexpcode) *p++) ! 667: #endif ! 668: { ! 669: case exactn: ! 670: if (translate) ! 671: fastmap[translate[p[1]]] = 1; ! 672: else ! 673: fastmap[p[1]] = 1; ! 674: break; ! 675: ! 676: case begline: ! 677: case before_dot: ! 678: case at_dot: ! 679: case after_dot: ! 680: case begbuf: ! 681: case endbuf: ! 682: case wordbound: ! 683: case notwordbound: ! 684: case wordbeg: ! 685: case wordend: ! 686: continue; ! 687: ! 688: case endline: ! 689: if (translate) ! 690: fastmap[translate['\n']] = 1; ! 691: else ! 692: fastmap['\n'] = 1; ! 693: bufp->can_be_null = 1; ! 694: break; ! 695: ! 696: case finalize_jump: ! 697: case maybe_finalize_jump: ! 698: case jump: ! 699: case dummy_failure_jump: ! 700: bufp->can_be_null = 1; ! 701: j = *p++ & 0377; ! 702: j += SIGN_EXTEND_CHAR (*p++) << 8; ! 703: p += j; ! 704: if (j > 0) ! 705: continue; ! 706: /* Jump backward reached implies we just went through ! 707: the body of a loop and matched nothing. ! 708: Opcode jumped to should be an on_failure_jump. ! 709: Just treat it like an ordinary jump. ! 710: For a * loop, it has pushed its failure point already; ! 711: if so, discard that as redundant. */ ! 712: if ((enum regexpcode) *p != on_failure_jump) ! 713: continue; ! 714: p++; ! 715: j = *p++ & 0377; ! 716: j += SIGN_EXTEND_CHAR (*p++) << 8; ! 717: p += j; ! 718: if (stackp != stackb && *stackp == p) ! 719: stackp--; ! 720: continue; ! 721: ! 722: case on_failure_jump: ! 723: j = *p++ & 0377; ! 724: j += SIGN_EXTEND_CHAR (*p++) << 8; ! 725: *++stackp = p + j; ! 726: continue; ! 727: ! 728: case start_memory: ! 729: case stop_memory: ! 730: p++; ! 731: continue; ! 732: ! 733: case duplicate: ! 734: bufp->can_be_null = 1; ! 735: case anychar: ! 736: for (j = 0; j < (1 << BYTEWIDTH); j++) ! 737: fastmap[j] = 1; ! 738: return; ! 739: ! 740: case wordchar: ! 741: for (j = 0; j < (1 << BYTEWIDTH); j++) ! 742: if (SYNTAX (j) == Sword) ! 743: fastmap[j] = 1; ! 744: break; ! 745: ! 746: case notwordchar: ! 747: for (j = 0; j < (1 << BYTEWIDTH); j++) ! 748: if (SYNTAX (j) != Sword) ! 749: fastmap[j] = 1; ! 750: break; ! 751: ! 752: #ifdef emacs ! 753: case syntaxspec: ! 754: k = *p++; ! 755: for (j = 0; j < (1 << BYTEWIDTH); j++) ! 756: if (SYNTAX (j) == (enum syntaxcode) k) ! 757: fastmap[j] = 1; ! 758: break; ! 759: ! 760: case notsyntaxspec: ! 761: for (j = 0; j < (1 << BYTEWIDTH); j++) ! 762: if (SYNTAX (j) != (enum syntaxcode) k) ! 763: fastmap[j] = 1; ! 764: break; ! 765: #endif emacs ! 766: ! 767: case charset: ! 768: for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ! 769: if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ! 770: { ! 771: if (translate) ! 772: fastmap[translate[j]] = 1; ! 773: else ! 774: fastmap[j] = 1; ! 775: } ! 776: break; ! 777: ! 778: case charset_not: ! 779: /* Chars beyond end of map must be allowed */ ! 780: for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ! 781: if (translate) ! 782: fastmap[translate[j]] = 1; ! 783: else ! 784: fastmap[j] = 1; ! 785: ! 786: for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ! 787: if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ! 788: { ! 789: if (translate) ! 790: fastmap[translate[j]] = 1; ! 791: else ! 792: fastmap[j] = 1; ! 793: } ! 794: break; ! 795: } ! 796: ! 797: /* Get here means we have successfully found the possible starting characters ! 798: of one path of the pattern. We need not follow this path any farther. ! 799: Instead, look at the next alternative remembered in the stack. */ ! 800: if (stackp != stackb) ! 801: p = *stackp--; ! 802: else ! 803: break; ! 804: } ! 805: } ! 806: ! 807: /* Like re_search_2, below, but only one string is specified. */ ! 808: ! 809: re_search (pbufp, string, size, startpos, range, regs) ! 810: struct re_pattern_buffer *pbufp; ! 811: char *string; ! 812: int size, startpos, range; ! 813: struct re_registers *regs; ! 814: { ! 815: return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size); ! 816: } ! 817: ! 818: /* Like re_match_2 but tries first a match starting at index `startpos', ! 819: then at startpos + 1, and so on. ! 820: `range' is the number of places to try before giving up. ! 821: If `range' is negative, the starting positions tried are ! 822: startpos, startpos - 1, etc. ! 823: It is up to the caller to make sure that range is not so large ! 824: as to take the starting position outside of the input strings. ! 825: ! 826: The value returned is the position at which the match was found, ! 827: or -1 if no match was found. */ ! 828: ! 829: int ! 830: re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop) ! 831: struct re_pattern_buffer *pbufp; ! 832: char *string1, *string2; ! 833: int size1, size2; ! 834: int startpos; ! 835: register int range; ! 836: struct re_registers *regs; ! 837: int mstop; ! 838: { ! 839: register char *fastmap = pbufp->fastmap; ! 840: register char *translate = pbufp->translate; ! 841: int total = size1 + size2; ! 842: ! 843: /* Update the fastmap now if not correct already */ ! 844: if (fastmap && !pbufp->fastmap_accurate) ! 845: re_compile_fastmap (pbufp); ! 846: ! 847: while (1) ! 848: { ! 849: /* If a fastmap is supplied, skip quickly over characters ! 850: that cannot possibly be the start of a match. ! 851: Note, however, that if the pattern can possibly match ! 852: the null string, we must test it at each starting point ! 853: so that we take the first null string we get. */ ! 854: ! 855: if (fastmap && startpos < total && !pbufp->can_be_null) ! 856: { ! 857: if (range > 0) ! 858: { ! 859: register int lim = 0; ! 860: register char *p; ! 861: int irange = range; ! 862: if (startpos < size1 && startpos + range >= size1) ! 863: lim = range - (size1 - startpos); ! 864: ! 865: p = &(startpos >= size1 ? string2 - size1 : string1)[startpos]; ! 866: ! 867: if (translate) ! 868: { ! 869: while (range > lim && !fastmap[translate[*p++]]) ! 870: range--; ! 871: } ! 872: else ! 873: { ! 874: while (range > lim && !fastmap[*p++]) ! 875: range--; ! 876: } ! 877: startpos += irange - range; ! 878: } ! 879: else ! 880: { ! 881: register char c; ! 882: if (startpos >= size1) c = string2[startpos - size1]; ! 883: else c = string1[startpos]; ! 884: if (translate ? !fastmap[translate[c]] : !fastmap[c]) ! 885: goto advance; ! 886: } ! 887: } ! 888: ! 889: if (range >= 0 && startpos == total ! 890: && fastmap && !pbufp->can_be_null) ! 891: return -1; ! 892: ! 893: if (0 <= re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop)) ! 894: return startpos; ! 895: ! 896: advance: ! 897: if (!range) break; ! 898: if (range > 0) range--, startpos++; else range++, startpos--; ! 899: } ! 900: return -1; ! 901: } ! 902: ! 903: #ifndef emacs /* emacs never uses this */ ! 904: re_match (pbufp, string, size, pos, regs) ! 905: struct re_pattern_buffer *pbufp; ! 906: char *string; ! 907: int size, pos; ! 908: struct re_registers *regs; ! 909: { ! 910: return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size); ! 911: } ! 912: #endif /* emacs */ ! 913: ! 914: /* Match the pattern described by `pbufp' ! 915: against data which is the virtual concatenation of `string1' and `string2'. ! 916: `size1' and `size2' are the sizes of the two data strings. ! 917: Start the match at position `pos'. ! 918: Do not consider matching past the position `mstop'. ! 919: ! 920: If pbufp->fastmap is nonzero, then it had better be up to date. ! 921: ! 922: The reason that the data to match is specified as two components ! 923: which are to be regarded as concatenated ! 924: is so that this function can be used directly on the contents of an Emacs buffer. ! 925: ! 926: -1 is returned if there is no match. Otherwise the value is the length ! 927: of the substring which was matched. ! 928: */ ! 929: ! 930: int ! 931: re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop) ! 932: struct re_pattern_buffer *pbufp; ! 933: char *string1, *string2; ! 934: int size1, size2; ! 935: int pos; ! 936: struct re_registers *regs; ! 937: int mstop; ! 938: { ! 939: register char *p = pbufp->buffer; ! 940: register char *pend = p + pbufp->used; ! 941: /* End of first string */ ! 942: char *end1; ! 943: /* End of second string */ ! 944: char *end2; ! 945: /* Pointer just past last char to consider matching */ ! 946: char *end_match_1, *end_match_2; ! 947: register char *d, *dend; ! 948: register int mcnt; ! 949: char *translate = pbufp->translate; ! 950: ! 951: /* Failure point stack. Each place that can handle a failure further down the line ! 952: pushes a failure point on this stack. It consists of two char *'s. ! 953: The first one pushed is where to resume scanning the pattern; ! 954: the second pushed is where to resume scanning the strings. ! 955: If the latter is zero, the failure point is a "dummy". ! 956: If a failure happens and the innermost failure point is dormant, ! 957: it discards that failure point and tries the next one. */ ! 958: ! 959: char **stackb = (char **) alloca (2 * NFAILURES * sizeof (char *)); ! 960: char **stackp = stackb, **stacke = &stackb[2 * NFAILURES]; ! 961: ! 962: /* Information on the "contents" of registers. ! 963: These are pointers into the input strings; they record ! 964: just what was matched (on this attempt) by some part of the pattern. ! 965: The start_memory command stores the start of a register's contents ! 966: and the stop_memory command stores the end. ! 967: ! 968: At that point, regstart[regnum] points to the first character in the register, ! 969: regend[regnum] points to the first character beyond the end of the register, ! 970: and regstart_segend[regnum] is either the same as regend[regnum] ! 971: or else points to the end of the input string into which regstart[regnum] points. ! 972: The latter case happens when regstart[regnum] is in string1 and ! 973: regend[regnum] is in string2. */ ! 974: ! 975: char *regstart[RE_NREGS]; ! 976: char *regstart_segend[RE_NREGS]; ! 977: char *regend[RE_NREGS]; ! 978: ! 979: /* Set up pointers to ends of strings. ! 980: Don't allow the second string to be empty unless both are empty. */ ! 981: if (!size2) ! 982: { ! 983: string2 = string1; ! 984: size2 = size1; ! 985: string1 = 0; ! 986: size1 = 0; ! 987: } ! 988: end1 = string1 + size1; ! 989: end2 = string2 + size2; ! 990: ! 991: /* Compute where to stop matching, within the two strings */ ! 992: if (mstop <= size1) ! 993: { ! 994: end_match_1 = string1 + mstop; ! 995: end_match_2 = string2; ! 996: } ! 997: else ! 998: { ! 999: end_match_1 = end1; ! 1000: end_match_2 = string2 + mstop - size1; ! 1001: } ! 1002: ! 1003: /* Initialize \( and \) text positions to -1 ! 1004: to mark ones that no \( or \) has been seen for. */ ! 1005: ! 1006: for (mcnt = 0; mcnt < sizeof (regstart) / sizeof (*regstart); mcnt++) ! 1007: regstart[mcnt] = (char *) -1; ! 1008: ! 1009: /* `p' scans through the pattern as `d' scans through the data. ! 1010: `dend' is the end of the input string that `d' points within. ! 1011: `d' is advanced into the following input string whenever necessary, ! 1012: but this happens before fetching; ! 1013: therefore, at the beginning of the loop, ! 1014: `d' can be pointing at the end of a string, ! 1015: but it cannot equal string2. */ ! 1016: ! 1017: if (pos <= size1) ! 1018: d = string1 + pos, dend = end_match_1; ! 1019: else ! 1020: d = string2 + pos - size1, dend = end_match_2; ! 1021: ! 1022: /* Write PREFETCH; just before fetching a character with *d. */ ! 1023: #define PREFETCH \ ! 1024: while (d == dend) \ ! 1025: { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \ ! 1026: d = string2; /* end of string1 => advance to string2. */ \ ! 1027: dend = end_match_2; } ! 1028: ! 1029: /* This loop loops over pattern commands. ! 1030: It exits by returning from the function if match is complete, ! 1031: or it drops through if match fails at this starting point in the input data. */ ! 1032: ! 1033: while (1) ! 1034: { ! 1035: if (p == pend) ! 1036: /* End of pattern means we have succeeded! */ ! 1037: { ! 1038: /* If caller wants register contents data back, convert it to indices */ ! 1039: if (regs) ! 1040: { ! 1041: bzero (regs, sizeof (*regs)); ! 1042: ! 1043: regend[0] = d; ! 1044: regstart[0] = string1; ! 1045: for (mcnt = 0; mcnt < RE_NREGS; mcnt++) ! 1046: { ! 1047: if (mcnt && regstart[mcnt] == (char *) -1) continue; ! 1048: if (regstart[mcnt] - string1 < 0 || regstart[mcnt] - string1 > size1) ! 1049: regs->start[mcnt] = regstart[mcnt] - string2 + size1; ! 1050: else ! 1051: regs->start[mcnt] = regstart[mcnt] - string1; ! 1052: if (regend[mcnt] - string1 < 0 || regend[mcnt] - string1 > size1) ! 1053: regs->end[mcnt] = regend[mcnt] - string2 + size1; ! 1054: else ! 1055: regs->end[mcnt] = regend[mcnt] - string1; ! 1056: } ! 1057: regs->start[0] = pos; ! 1058: } ! 1059: if (d - string1 >= 0 && d - string1 <= size1) ! 1060: return d - string1 - pos; ! 1061: else ! 1062: return d - string2 + size1 - pos; ! 1063: } ! 1064: ! 1065: /* Otherwise match next pattern command */ ! 1066: #ifdef SWITCH_ENUM_BUG ! 1067: switch ((int) ((enum regexpcode) *p++)) ! 1068: #else ! 1069: switch ((enum regexpcode) *p++) ! 1070: #endif ! 1071: { ! 1072: ! 1073: /* \( is represented by a start_memory, \) by a stop_memory. ! 1074: Both of those commands contain a "register number" argument. ! 1075: The text matched within the \( and \) is recorded under that number. ! 1076: Then, \<digit> turns into a `duplicate' command which ! 1077: is followed by the numeric value of <digit> as the register number. */ ! 1078: ! 1079: case start_memory: ! 1080: regstart[*p] = d; ! 1081: regstart_segend[*p++] = dend; ! 1082: break; ! 1083: ! 1084: case stop_memory: ! 1085: regend[*p] = d; ! 1086: if (regstart_segend[*p] == dend) ! 1087: regstart_segend[*p] = d; ! 1088: p++; ! 1089: break; ! 1090: ! 1091: case duplicate: ! 1092: { ! 1093: int regno = *p++; /* Get which register to match against */ ! 1094: register char *d2, *dend2; ! 1095: ! 1096: d2 = regstart[regno]; ! 1097: dend2 = regstart_segend[regno]; ! 1098: while (1) ! 1099: { ! 1100: /* Advance to next segment in register contents, if necessary */ ! 1101: while (d2 == dend2) ! 1102: { ! 1103: if (dend2 == end_match_2) break; ! 1104: if (dend2 == regend[regno]) break; ! 1105: d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */ ! 1106: } ! 1107: /* At end of register contents => success */ ! 1108: if (d2 == dend2) break; ! 1109: ! 1110: /* Advance to next segment in data being matched, if necessary */ ! 1111: PREFETCH; ! 1112: ! 1113: /* mcnt gets # consecutive chars to compare */ ! 1114: mcnt = dend - d; ! 1115: if (mcnt > dend2 - d2) ! 1116: mcnt = dend2 - d2; ! 1117: /* Compare that many; failure if mismatch, else skip them. */ ! 1118: if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt)) ! 1119: goto fail; ! 1120: d += mcnt, d2 += mcnt; ! 1121: } ! 1122: } ! 1123: break; ! 1124: ! 1125: case anychar: ! 1126: /* fetch a data character */ ! 1127: PREFETCH; ! 1128: /* Match anything but a newline. */ ! 1129: if ((translate ? translate[*d++] : *d++) == '\n') ! 1130: goto fail; ! 1131: break; ! 1132: ! 1133: case charset: ! 1134: case charset_not: ! 1135: { ! 1136: /* Nonzero for charset_not */ ! 1137: int not = 0; ! 1138: register int c; ! 1139: if (*(p - 1) == (char) charset_not) ! 1140: not = 1; ! 1141: ! 1142: /* fetch a data character */ ! 1143: PREFETCH; ! 1144: ! 1145: if (translate) ! 1146: c = translate [*(unsigned char *)d]; ! 1147: else ! 1148: c = *(unsigned char *)d; ! 1149: ! 1150: if (c < *p * BYTEWIDTH ! 1151: && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ! 1152: not = !not; ! 1153: ! 1154: p += 1 + *p; ! 1155: ! 1156: if (!not) goto fail; ! 1157: d++; ! 1158: break; ! 1159: } ! 1160: ! 1161: case begline: ! 1162: if (d == string1 || d[-1] == '\n') ! 1163: break; ! 1164: goto fail; ! 1165: ! 1166: case endline: ! 1167: if (d == end2 ! 1168: || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n')) ! 1169: break; ! 1170: goto fail; ! 1171: ! 1172: /* "or" constructs ("|") are handled by starting each alternative ! 1173: with an on_failure_jump that points to the start of the next alternative. ! 1174: Each alternative except the last ends with a jump to the joining point. ! 1175: (Actually, each jump except for the last one really jumps ! 1176: to the following jump, because tensioning the jumps is a hassle.) */ ! 1177: ! 1178: /* The start of a stupid repeat has an on_failure_jump that points ! 1179: past the end of the repeat text. ! 1180: This makes a failure point so that, on failure to match a repetition, ! 1181: matching restarts past as many repetitions have been found ! 1182: with no way to fail and look for another one. */ ! 1183: ! 1184: /* A smart repeat is similar but loops back to the on_failure_jump ! 1185: so that each repetition makes another failure point. */ ! 1186: ! 1187: case on_failure_jump: ! 1188: if (stackp == stacke) ! 1189: { ! 1190: char **stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *)); ! 1191: bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *)); ! 1192: stackp += stackx - stackb; ! 1193: stacke = stackx + 2 * (stacke - stackb); ! 1194: stackb = stackx; ! 1195: } ! 1196: mcnt = *p++ & 0377; ! 1197: mcnt += SIGN_EXTEND_CHAR (*p++) << 8; ! 1198: *stackp++ = mcnt + p; ! 1199: *stackp++ = d; ! 1200: break; ! 1201: ! 1202: /* The end of a smart repeat has an maybe_finalize_jump back. ! 1203: Change it either to a finalize_jump or an ordinary jump. */ ! 1204: ! 1205: case maybe_finalize_jump: ! 1206: mcnt = *p++ & 0377; ! 1207: mcnt += SIGN_EXTEND_CHAR (*p++) << 8; ! 1208: /* Compare what follows with the begining of the repeat. ! 1209: If we can establish that there is nothing that they would ! 1210: both match, we can change to finalize_jump */ ! 1211: if (p == pend) ! 1212: p[-3] = (char) finalize_jump; ! 1213: else if (*p == (char) exactn || *p == (char) endline) ! 1214: { ! 1215: register int c = *p == (char) endline ? '\n' : p[2]; ! 1216: register char *p1 = p + mcnt; ! 1217: /* p1[0] ... p1[2] are an on_failure_jump. ! 1218: Examine what follows that */ ! 1219: if (p1[3] == (char) exactn && p1[5] != c) ! 1220: p[-3] = (char) finalize_jump; ! 1221: else if (p1[3] == (char) charset || p1[3] == (char) charset_not) ! 1222: { ! 1223: int not = p1[3] == (char) charset_not; ! 1224: if (c < p1[4] * BYTEWIDTH ! 1225: && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ! 1226: not = !not; ! 1227: /* not is 1 if c would match */ ! 1228: /* That means it is not safe to finalize */ ! 1229: if (!not) ! 1230: p[-3] = (char) finalize_jump; ! 1231: } ! 1232: } ! 1233: p -= 2; ! 1234: if (p[-1] != (char) finalize_jump) ! 1235: { ! 1236: p[-1] = (char) jump; ! 1237: goto nofinalize; ! 1238: } ! 1239: ! 1240: /* The end of a stupid repeat has a finalize-jump ! 1241: back to the start, where another failure point will be made ! 1242: which will point after all the repetitions found so far. */ ! 1243: ! 1244: case finalize_jump: ! 1245: stackp -= 2; ! 1246: ! 1247: case jump: ! 1248: nofinalize: ! 1249: mcnt = *p++ & 0377; ! 1250: mcnt += SIGN_EXTEND_CHAR (*p++) << 8; ! 1251: p += mcnt; ! 1252: break; ! 1253: ! 1254: case dummy_failure_jump: ! 1255: if (stackp == stacke) ! 1256: { ! 1257: char **stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *)); ! 1258: bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *)); ! 1259: stackp += stackx - stackb; ! 1260: stacke = stackx + 2 * (stacke - stackb); ! 1261: stackb = stackx; ! 1262: } ! 1263: *stackp++ = 0; ! 1264: *stackp++ = 0; ! 1265: goto nofinalize; ! 1266: ! 1267: case wordbound: ! 1268: if (d == string1 /* Points to first char */ ! 1269: || d == end2 /* Points to end */ ! 1270: || (d == end1 && size2 == 0)) /* Points to end */ ! 1271: break; ! 1272: if ((SYNTAX (((unsigned char *)d)[-1]) == Sword) ! 1273: != (SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) == Sword)) ! 1274: break; ! 1275: goto fail; ! 1276: ! 1277: case notwordbound: ! 1278: if (d == string1 /* Points to first char */ ! 1279: || d == end2 /* Points to end */ ! 1280: || (d == end1 && size2 == 0)) /* Points to end */ ! 1281: goto fail; ! 1282: if ((SYNTAX (((unsigned char *)d)[-1]) == Sword) ! 1283: != (SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) == Sword)) ! 1284: goto fail; ! 1285: break; ! 1286: ! 1287: case wordbeg: ! 1288: if (d == end2 /* Points to end */ ! 1289: || (d == end1 && size2 == 0) /* Points to end */ ! 1290: || SYNTAX (*(unsigned char *) (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */ ! 1291: goto fail; ! 1292: if (d == string1 /* Points to first char */ ! 1293: || SYNTAX (((unsigned char *)d)[-1]) != Sword) /* prev char not letter */ ! 1294: break; ! 1295: goto fail; ! 1296: ! 1297: case wordend: ! 1298: if (d == string1 /* Points to first char */ ! 1299: || SYNTAX (((unsigned char *)d)[-1]) != Sword) /* prev char not letter */ ! 1300: goto fail; ! 1301: if (d == end2 /* Points to end */ ! 1302: || (d == end1 && size2 == 0) /* Points to end */ ! 1303: || SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) != Sword) /* Next char not a letter */ ! 1304: break; ! 1305: goto fail; ! 1306: ! 1307: #ifdef emacs ! 1308: case before_dot: ! 1309: if (((d - string2 <= (unsigned) size2) ! 1310: ? d - (char *) bf_p2 : d - (char *) bf_p1) ! 1311: <= point) ! 1312: goto fail; ! 1313: break; ! 1314: ! 1315: case at_dot: ! 1316: if (((d - string2 <= (unsigned) size2) ! 1317: ? d - (char *) bf_p2 : d - (char *) bf_p1) ! 1318: == point) ! 1319: goto fail; ! 1320: break; ! 1321: ! 1322: case after_dot: ! 1323: if (((d - string2 <= (unsigned) size2) ! 1324: ? d - (char *) bf_p2 : d - (char *) bf_p1) ! 1325: >= point) ! 1326: goto fail; ! 1327: break; ! 1328: ! 1329: case wordchar: ! 1330: mcnt = (int) Sword; ! 1331: goto matchsyntax; ! 1332: ! 1333: case syntaxspec: ! 1334: mcnt = *p++; ! 1335: matchsyntax: ! 1336: PREFETCH; ! 1337: if (SYNTAX (*(unsigned char *)d++) != (enum syntaxcode) mcnt) goto fail; ! 1338: break; ! 1339: ! 1340: case notwordchar: ! 1341: mcnt = (int) Sword; ! 1342: goto matchnotsyntax; ! 1343: ! 1344: case notsyntaxspec: ! 1345: mcnt = *p++; ! 1346: matchnotsyntax: ! 1347: PREFETCH; ! 1348: if (SYNTAX (*(unsigned char *)d++) == (enum syntaxcode) mcnt) goto fail; ! 1349: break; ! 1350: #else ! 1351: case wordchar: ! 1352: PREFETCH; ! 1353: if (SYNTAX (*(unsigned char *)d++) == 0) goto fail; ! 1354: break; ! 1355: ! 1356: case notwordchar: ! 1357: PREFETCH; ! 1358: if (SYNTAX (*(unsigned char *)d++) != 0) goto fail; ! 1359: break; ! 1360: #endif not emacs ! 1361: ! 1362: case begbuf: ! 1363: if (d == string1) /* Note, d cannot equal string2 */ ! 1364: break; /* unless string1 == string2. */ ! 1365: goto fail; ! 1366: ! 1367: case endbuf: ! 1368: if (d == end2 || (d == end1 && size2 == 0)) ! 1369: break; ! 1370: goto fail; ! 1371: ! 1372: case exactn: ! 1373: /* Match the next few pattern characters exactly. ! 1374: mcnt is how many characters to match. */ ! 1375: mcnt = *p++; ! 1376: if (translate) ! 1377: { ! 1378: do ! 1379: { ! 1380: PREFETCH; ! 1381: if (translate[*(unsigned char *)d++] != *p++) goto fail; ! 1382: } ! 1383: while (--mcnt); ! 1384: } ! 1385: else ! 1386: { ! 1387: do ! 1388: { ! 1389: PREFETCH; ! 1390: if (*d++ != *p++) goto fail; ! 1391: } ! 1392: while (--mcnt); ! 1393: } ! 1394: break; ! 1395: } ! 1396: continue; /* Successfully matched one pattern command; keep matching */ ! 1397: ! 1398: /* Jump here if any matching operation fails. */ ! 1399: fail: ! 1400: if (stackp != stackb) ! 1401: /* A restart point is known. Restart there and pop it. */ ! 1402: { ! 1403: if (!stackp[-2]) ! 1404: { /* If innermost failure point is dormant, flush it and keep looking */ ! 1405: stackp -= 2; ! 1406: goto fail; ! 1407: } ! 1408: d = *--stackp; ! 1409: p = *--stackp; ! 1410: if (d >= string1 && d <= end1) ! 1411: dend = end_match_1; ! 1412: } ! 1413: else break; /* Matching at this starting point really fails! */ ! 1414: } ! 1415: return -1; /* Failure to match */ ! 1416: } ! 1417: ! 1418: static int ! 1419: bcmp_translate (s1, s2, len, translate) ! 1420: char *s1, *s2; ! 1421: register int len; ! 1422: char *translate; ! 1423: { ! 1424: register char *p1 = s1, *p2 = s2; ! 1425: while (len) ! 1426: { ! 1427: if (translate [*p1++] != translate [*p2++]) return 1; ! 1428: len--; ! 1429: } ! 1430: return 0; ! 1431: } ! 1432: ! 1433: /* Entry points compatible with bsd4.2 regex library */ ! 1434: ! 1435: #ifndef emacs ! 1436: ! 1437: static struct re_pattern_buffer re_comp_buf; ! 1438: ! 1439: char * ! 1440: re_comp (s) ! 1441: char *s; ! 1442: { ! 1443: if (!s) ! 1444: { ! 1445: if (!re_comp_buf.buffer) ! 1446: return "No previous regular expression"; ! 1447: return 0; ! 1448: } ! 1449: ! 1450: if (!re_comp_buf.buffer) ! 1451: { ! 1452: if (!(re_comp_buf.buffer = (char *) malloc (200))) ! 1453: return "Memory exhausted"; ! 1454: re_comp_buf.allocated = 200; ! 1455: if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH))) ! 1456: return "Memory exhausted"; ! 1457: } ! 1458: return re_compile_pattern (s, strlen (s), &re_comp_buf); ! 1459: } ! 1460: ! 1461: int ! 1462: re_exec (s) ! 1463: char *s; ! 1464: { ! 1465: int len = strlen (s); ! 1466: return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0); ! 1467: } ! 1468: ! 1469: #endif /* emacs */ ! 1470: ! 1471: #ifdef test ! 1472: ! 1473: #include <stdio.h> ! 1474: ! 1475: /* Indexed by a character, gives the upper case equivalent of the character */ ! 1476: ! 1477: static char upcase[0400] = ! 1478: { 000, 001, 002, 003, 004, 005, 006, 007, ! 1479: 010, 011, 012, 013, 014, 015, 016, 017, ! 1480: 020, 021, 022, 023, 024, 025, 026, 027, ! 1481: 030, 031, 032, 033, 034, 035, 036, 037, ! 1482: 040, 041, 042, 043, 044, 045, 046, 047, ! 1483: 050, 051, 052, 053, 054, 055, 056, 057, ! 1484: 060, 061, 062, 063, 064, 065, 066, 067, ! 1485: 070, 071, 072, 073, 074, 075, 076, 077, ! 1486: 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107, ! 1487: 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, ! 1488: 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, ! 1489: 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137, ! 1490: 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107, ! 1491: 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, ! 1492: 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, ! 1493: 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177, ! 1494: 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207, ! 1495: 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217, ! 1496: 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227, ! 1497: 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237, ! 1498: 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247, ! 1499: 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257, ! 1500: 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267, ! 1501: 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, ! 1502: 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307, ! 1503: 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317, ! 1504: 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327, ! 1505: 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, ! 1506: 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347, ! 1507: 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357, ! 1508: 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367, ! 1509: 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377 ! 1510: }; ! 1511: ! 1512: main () ! 1513: { ! 1514: char pat[80]; ! 1515: struct re_pattern_buffer buf; ! 1516: int i; ! 1517: char c; ! 1518: char fastmap[(1 << BYTEWIDTH)]; ! 1519: ! 1520: buf.allocated = 40; ! 1521: buf.buffer = (char *) malloc (buf.allocated); ! 1522: buf.fastmap = fastmap; ! 1523: buf.translate = upcase; ! 1524: ! 1525: while (1) ! 1526: { ! 1527: gets (pat); ! 1528: ! 1529: if (*pat) ! 1530: { ! 1531: re_compile_pattern (pat, strlen(pat), &buf); ! 1532: ! 1533: for (i = 0; i < buf.used; i++) ! 1534: printchar (buf.buffer[i]); ! 1535: ! 1536: putchar ('\n'); ! 1537: ! 1538: printf ("%d allocated, %d used.\n", buf.allocated, buf.used); ! 1539: ! 1540: re_compile_fastmap (&buf); ! 1541: printf ("Allowed by fastmap: "); ! 1542: for (i = 0; i < (1 << BYTEWIDTH); i++) ! 1543: if (fastmap[i]) printchar (i); ! 1544: putchar ('\n'); ! 1545: } ! 1546: ! 1547: gets (pat); /* Now read the string to match against */ ! 1548: ! 1549: i = re_match (&buf, pat, strlen (pat), 0, 0); ! 1550: printf ("Match value %d.\n", i); ! 1551: } ! 1552: } ! 1553: ! 1554: #ifdef NOTDEF ! 1555: print_buf (bufp) ! 1556: struct re_pattern_buffer *bufp; ! 1557: { ! 1558: int i; ! 1559: ! 1560: printf ("buf is :\n----------------\n"); ! 1561: for (i = 0; i < bufp->used; i++) ! 1562: printchar (bufp->buffer[i]); ! 1563: ! 1564: printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used); ! 1565: ! 1566: printf ("Allowed by fastmap: "); ! 1567: for (i = 0; i < (1 << BYTEWIDTH); i++) ! 1568: if (bufp->fastmap[i]) ! 1569: printchar (i); ! 1570: printf ("\nAllowed by translate: "); ! 1571: if (bufp->translate) ! 1572: for (i = 0; i < (1 << BYTEWIDTH); i++) ! 1573: if (bufp->translate[i]) ! 1574: printchar (i); ! 1575: printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't"); ! 1576: printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not"); ! 1577: } ! 1578: #endif ! 1579: ! 1580: printchar (c) ! 1581: char c; ! 1582: { ! 1583: if (c < 041 || c >= 0177) ! 1584: { ! 1585: putchar ('\\'); ! 1586: putchar (((c >> 6) & 3) + '0'); ! 1587: putchar (((c >> 3) & 7) + '0'); ! 1588: putchar ((c & 7) + '0'); ! 1589: } ! 1590: else ! 1591: putchar (c); ! 1592: } ! 1593: ! 1594: error (string) ! 1595: char *string; ! 1596: { ! 1597: puts (string); ! 1598: exit (1); ! 1599: } ! 1600: ! 1601: #endif test
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.