|
|
1.1 ! root 1: /* Extended regular expression matching and search. ! 2: Copyright (C) 1985 Free Software Foundation, Inc. ! 3: ! 4: NO WARRANTY ! 5: ! 6: BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY ! 7: NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPT ! 8: WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC, ! 9: RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS" ! 10: WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, ! 11: BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND ! 12: FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY ! 13: AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE ! 14: DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR ! 15: CORRECTION. ! 16: ! 17: IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M. ! 18: STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY ! 19: WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE ! 20: LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR ! 21: OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE ! 22: USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR ! 23: DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR ! 24: A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS ! 25: PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH ! 26: DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. ! 27: ! 28: GENERAL PUBLIC LICENSE TO COPY ! 29: ! 30: 1. You may copy and distribute verbatim copies of this source file ! 31: as you receive it, in any medium, provided that you conspicuously and ! 32: appropriately publish on each copy a valid copyright notice "Copyright ! 33: (C) 1985 Free Software Foundation, Inc."; and include following the ! 34: copyright notice a verbatim copy of the above disclaimer of warranty ! 35: and of this License. You may charge a distribution fee for the ! 36: physical act of transferring a copy. ! 37: ! 38: 2. You may modify your copy or copies of this source file or ! 39: any portion of it, and copy and distribute such modifications under ! 40: the terms of Paragraph 1 above, provided that you also do the following: ! 41: ! 42: a) cause the modified files to carry prominent notices stating ! 43: that you changed the files and the date of any change; and ! 44: ! 45: b) cause the whole of any work that you distribute or publish, ! 46: that in whole or in part contains or is a derivative of this ! 47: program or any part thereof, to be licensed at no charge to all ! 48: third parties on terms identical to those contained in this ! 49: License Agreement (except that you may choose to grant more extensive ! 50: warranty protection to some or all third parties, at your option). ! 51: ! 52: c) You may charge a distribution fee for the physical act of ! 53: transferring a copy, and you may at your option offer warranty ! 54: protection in exchange for a fee. ! 55: ! 56: Mere aggregation of another unrelated program with this program (or its ! 57: derivative) on a volume of a storage or distribution medium does not bring ! 58: the other program under the scope of these terms. ! 59: ! 60: 3. You may copy and distribute this program (or a portion or derivative ! 61: of it, under Paragraph 2) in object code or executable form under the terms ! 62: of Paragraphs 1 and 2 above provided that you also do one of the following: ! 63: ! 64: a) accompany it with the complete corresponding machine-readable ! 65: source code, which must be distributed under the terms of ! 66: Paragraphs 1 and 2 above; or, ! 67: ! 68: b) accompany it with a written offer, valid for at least three ! 69: years, to give any third party free (except for a nominal ! 70: shipping charge) a complete machine-readable copy of the ! 71: corresponding source code, to be distributed under the terms of ! 72: Paragraphs 1 and 2 above; or, ! 73: ! 74: c) accompany it with the information you received as to where the ! 75: corresponding source code may be obtained. (This alternative is ! 76: allowed only for noncommercial distribution and only if you ! 77: received the program in object code or executable form alone.) ! 78: ! 79: For an executable file, complete source code means all the source code for ! 80: all modules it contains; but, as a special exception, it need not include ! 81: source code for modules which are standard libraries that accompany the ! 82: operating system on which the executable file runs. ! 83: ! 84: 4. You may not copy, sublicense, distribute or transfer this program ! 85: except as expressly provided under this License Agreement. Any attempt ! 86: otherwise to copy, sublicense, distribute or transfer this program is void and ! 87: your rights to use the program under this License agreement shall be ! 88: automatically terminated. However, parties who have received computer ! 89: software programs from you with this License Agreement will not have ! 90: their licenses terminated so long as such parties remain in full compliance. ! 91: ! 92: 5. If you wish to incorporate parts of this program into other free ! 93: programs whose distribution conditions are different, write to the Free ! 94: Software Foundation at 675 Mass Ave, Cambridge, MA 02139. We have not yet ! 95: worked out a simple rule that can be stated here, but we will often permit ! 96: this. We will be guided by the two goals of preserving the free status of ! 97: all derivatives of our free software and of promoting the sharing and reuse of ! 98: software. ! 99: ! 100: ! 101: In other words, you are welcome to use, share and improve this program. ! 102: You are forbidden to forbid anyone else to use, share and improve ! 103: what you give them. Help stamp out software-hoarding! */ ! 104: ! 105: ! 106: /* To test, compile with -Dtest. ! 107: This Dtestable feature turns this into a self-contained program ! 108: which reads a pattern, describes how it compiles, ! 109: then reads a string and searches for it. */ ! 110: ! 111: ! 112: #ifdef emacs ! 113: ! 114: /* The `emacs' switch turns on certain special matching commands ! 115: that make sense only in emacs. */ ! 116: ! 117: #include "config.h" ! 118: #include "lisp.h" ! 119: #include "buffer.h" ! 120: #include "syntax.h" ! 121: ! 122: #else /* not emacs */ ! 123: ! 124: /* ! 125: * Define the syntax stuff, so we can do the \<...\> things. ! 126: */ ! 127: ! 128: #ifndef Sword /* must be non-zero in some of the tests below... */ ! 129: #define Sword 1 ! 130: #endif ! 131: ! 132: #define SYNTAX(c) re_syntax_table[c] ! 133: ! 134: #ifdef SYNTAX_TABLE ! 135: ! 136: char *re_syntax_table; ! 137: ! 138: #else ! 139: ! 140: static char re_syntax_table[256]; ! 141: ! 142: static void ! 143: init_syntax_once () ! 144: { ! 145: register int c; ! 146: static int done = 0; ! 147: ! 148: if (done) ! 149: return; ! 150: ! 151: bzero (re_syntax_table, sizeof re_syntax_table); ! 152: ! 153: for (c = 'a'; c <= 'z'; c++) ! 154: re_syntax_table[c] = Sword; ! 155: ! 156: for (c = 'A'; c <= 'Z'; c++) ! 157: re_syntax_table[c] = Sword; ! 158: ! 159: for (c = '0'; c <= '9'; c++) ! 160: re_syntax_table[c] = Sword; ! 161: ! 162: done = 1; ! 163: } ! 164: ! 165: #endif /* SYNTAX_TABLE */ ! 166: #endif /* not emacs */ ! 167: ! 168: #include "regex.h" ! 169: ! 170: /* Number of failure points to allocate space for initially, ! 171: when matching. If this number is exceeded, more space is allocated, ! 172: so it is not a hard limit. */ ! 173: ! 174: #ifndef NFAILURES ! 175: #define NFAILURES 80 ! 176: #endif NFAILURES ! 177: ! 178: /* width of a byte in bits */ ! 179: ! 180: #define BYTEWIDTH 8 ! 181: ! 182: #ifndef SIGN_EXTEND_CHAR ! 183: #define SIGN_EXTEND_CHAR(x) (x) ! 184: #endif ! 185: ! 186: static int obscure_syntax = 0; ! 187: ! 188: /* Specify the precise syntax of regexp for compilation. ! 189: This provides for compatibility for various utilities ! 190: which historically have different, incompatible syntaxes. ! 191: ! 192: The argument SYNTAX is a bit-mask containing the two bits ! 193: RE_NO_BK_PARENS and RE_NO_BK_VBAR. */ ! 194: ! 195: int ! 196: re_set_syntax (syntax) ! 197: { ! 198: int ret; ! 199: ! 200: ret = obscure_syntax; ! 201: obscure_syntax = syntax; ! 202: return ret; ! 203: } ! 204: ! 205: /* re_compile_pattern takes a regular-expression string ! 206: and converts it into a buffer full of byte commands for matching. ! 207: ! 208: PATTERN is the address of the pattern string ! 209: SIZE is the length of it. ! 210: BUFP is a struct re_pattern_buffer * which points to the info ! 211: on where to store the byte commands. ! 212: This structure contains a char * which points to the ! 213: actual space, which should have been obtained with malloc. ! 214: re_compile_pattern may use realloc to grow the buffer space. ! 215: ! 216: The number of bytes of commands can be found out by looking in ! 217: the struct re_pattern_buffer that bufp pointed to, ! 218: after re_compile_pattern returns. ! 219: */ ! 220: ! 221: #define PATPUSH(ch) (*b++ = (char) (ch)) ! 222: ! 223: #define PATFETCH(c) \ ! 224: {if (p == pend) goto end_of_pattern; \ ! 225: c = * (unsigned char *) p++; \ ! 226: if (translate) c = translate[c]; } ! 227: ! 228: #define PATFETCH_RAW(c) \ ! 229: {if (p == pend) goto end_of_pattern; \ ! 230: c = * (unsigned char *) p++; } ! 231: ! 232: #define PATUNFETCH p-- ! 233: ! 234: #define EXTEND_BUFFER \ ! 235: { char *old_buffer = bufp->buffer; \ ! 236: if (bufp->allocated == (1<<16)) goto too_big; \ ! 237: bufp->allocated *= 2; \ ! 238: if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \ ! 239: if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \ ! 240: goto memory_exhausted; \ ! 241: c = bufp->buffer - old_buffer; \ ! 242: b += c; \ ! 243: if (fixup_jump) \ ! 244: fixup_jump += c; \ ! 245: if (laststart) \ ! 246: laststart += c; \ ! 247: begalt += c; \ ! 248: if (pending_exact) \ ! 249: pending_exact += c; \ ! 250: } ! 251: ! 252: static int store_jump (), insert_jump (); ! 253: ! 254: char * ! 255: re_compile_pattern (pattern, size, bufp) ! 256: char *pattern; ! 257: int size; ! 258: struct re_pattern_buffer *bufp; ! 259: { ! 260: register char *b = bufp->buffer; ! 261: register char *p = pattern; ! 262: char *pend = pattern + size; ! 263: register unsigned c, c1; ! 264: char *p1; ! 265: unsigned char *translate = (unsigned char *) bufp->translate; ! 266: ! 267: /* address of the count-byte of the most recently inserted "exactn" command. ! 268: This makes it possible to tell whether a new exact-match character ! 269: can be added to that command or requires a new "exactn" command. */ ! 270: ! 271: char *pending_exact = 0; ! 272: ! 273: /* address of the place where a forward-jump should go ! 274: to the end of the containing expression. ! 275: Each alternative of an "or", except the last, ends with a forward-jump ! 276: of this sort. */ ! 277: ! 278: char *fixup_jump = 0; ! 279: ! 280: /* address of start of the most recently finished expression. ! 281: This tells postfix * where to find the start of its operand. */ ! 282: ! 283: char *laststart = 0; ! 284: ! 285: /* In processing a repeat, 1 means zero matches is allowed */ ! 286: ! 287: char zero_times_ok; ! 288: ! 289: /* In processing a repeat, 1 means many matches is allowed */ ! 290: ! 291: char many_times_ok; ! 292: ! 293: /* address of beginning of regexp, or inside of last \( */ ! 294: ! 295: char *begalt = b; ! 296: ! 297: /* Stack of information saved by \( and restored by \). ! 298: Four stack elements are pushed by each \(: ! 299: First, the value of b. ! 300: Second, the value of fixup_jump. ! 301: Third, the value of regnum. ! 302: Fourth, the value of begalt. */ ! 303: ! 304: int stackb[40]; ! 305: int *stackp = stackb; ! 306: int *stacke = stackb + 40; ! 307: int *stackt; ! 308: ! 309: /* Counts \('s as they are encountered. Remembered for the matching \), ! 310: where it becomes the "register number" to put in the stop_memory command */ ! 311: ! 312: int regnum = 1; ! 313: ! 314: bufp->fastmap_accurate = 0; ! 315: ! 316: #ifndef emacs ! 317: #ifndef SYNTAX_TABLE ! 318: /* ! 319: * Initialize the syntax table. ! 320: */ ! 321: init_syntax_once(); ! 322: #endif ! 323: #endif ! 324: ! 325: if (bufp->allocated == 0) ! 326: { ! 327: bufp->allocated = 28; ! 328: if (bufp->buffer) ! 329: /* EXTEND_BUFFER loses when bufp->allocated is 0 */ ! 330: bufp->buffer = (char *) realloc (bufp->buffer, 28); ! 331: else ! 332: /* Caller did not allocate a buffer. Do it for him */ ! 333: bufp->buffer = (char *) malloc (28); ! 334: if (!bufp->buffer) goto memory_exhausted; ! 335: begalt = b = bufp->buffer; ! 336: } ! 337: ! 338: while (p != pend) ! 339: { ! 340: if (b - bufp->buffer > bufp->allocated - 10) ! 341: /* Note that EXTEND_BUFFER clobbers c */ ! 342: EXTEND_BUFFER; ! 343: ! 344: PATFETCH (c); ! 345: ! 346: switch (c) ! 347: { ! 348: case '$': ! 349: if (obscure_syntax & RE_TIGHT_VBAR) ! 350: { ! 351: if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend) ! 352: goto normal_char; ! 353: /* Make operand of last vbar end before this `$'. */ ! 354: if (fixup_jump) ! 355: store_jump (fixup_jump, jump, b); ! 356: fixup_jump = 0; ! 357: PATPUSH (endline); ! 358: break; ! 359: } ! 360: ! 361: /* $ means succeed if at end of line, but only in special contexts. ! 362: If randomly in the middle of a pattern, it is a normal character. */ ! 363: if (p == pend || *p == '\n' ! 364: || (obscure_syntax & RE_CONTEXT_INDEP_OPS) ! 365: || (obscure_syntax & RE_NO_BK_PARENS ! 366: ? *p == ')' ! 367: : *p == '\\' && p[1] == ')') ! 368: || (obscure_syntax & RE_NO_BK_VBAR ! 369: ? *p == '|' ! 370: : *p == '\\' && p[1] == '|')) ! 371: { ! 372: PATPUSH (endline); ! 373: break; ! 374: } ! 375: goto normal_char; ! 376: ! 377: case '^': ! 378: /* ^ means succeed if at beg of line, but only if no preceding pattern. */ ! 379: ! 380: if (laststart && p[-2] != '\n' ! 381: && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) ! 382: goto normal_char; ! 383: if (obscure_syntax & RE_TIGHT_VBAR) ! 384: { ! 385: if (p != pattern + 1 ! 386: && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) ! 387: goto normal_char; ! 388: PATPUSH (begline); ! 389: begalt = b; ! 390: } ! 391: else ! 392: PATPUSH (begline); ! 393: break; ! 394: ! 395: case '+': ! 396: case '?': ! 397: if (obscure_syntax & RE_BK_PLUS_QM) ! 398: goto normal_char; ! 399: handle_plus: ! 400: case '*': ! 401: /* If there is no previous pattern, char not special. */ ! 402: if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) ! 403: goto normal_char; ! 404: /* If there is a sequence of repetition chars, ! 405: collapse it down to equivalent to just one. */ ! 406: zero_times_ok = 0; ! 407: many_times_ok = 0; ! 408: while (1) ! 409: { ! 410: zero_times_ok |= c != '+'; ! 411: many_times_ok |= c != '?'; ! 412: if (p == pend) ! 413: break; ! 414: PATFETCH (c); ! 415: if (c == '*') ! 416: ; ! 417: else if (!(obscure_syntax & RE_BK_PLUS_QM) ! 418: && (c == '+' || c == '?')) ! 419: ; ! 420: else if ((obscure_syntax & RE_BK_PLUS_QM) ! 421: && c == '\\') ! 422: { ! 423: int c1; ! 424: PATFETCH (c1); ! 425: if (!(c1 == '+' || c1 == '?')) ! 426: { ! 427: PATUNFETCH; ! 428: PATUNFETCH; ! 429: break; ! 430: } ! 431: c = c1; ! 432: } ! 433: else ! 434: { ! 435: PATUNFETCH; ! 436: break; ! 437: } ! 438: } ! 439: ! 440: /* Star, etc. applied to an empty pattern is equivalent ! 441: to an empty pattern. */ ! 442: if (!laststart) ! 443: break; ! 444: ! 445: /* Now we know whether 0 matches is allowed, ! 446: and whether 2 or more matches is allowed. */ ! 447: if (many_times_ok) ! 448: { ! 449: /* If more than one repetition is allowed, ! 450: put in a backward jump at the end. */ ! 451: store_jump (b, maybe_finalize_jump, laststart - 3); ! 452: b += 3; ! 453: } ! 454: insert_jump (on_failure_jump, laststart, b + 3, b); ! 455: pending_exact = 0; ! 456: b += 3; ! 457: if (!zero_times_ok) ! 458: { ! 459: /* At least one repetition required: insert before the loop ! 460: a skip over the initial on-failure-jump instruction */ ! 461: insert_jump (dummy_failure_jump, laststart, laststart + 6, b); ! 462: b += 3; ! 463: } ! 464: break; ! 465: ! 466: case '.': ! 467: laststart = b; ! 468: PATPUSH (anychar); ! 469: break; ! 470: ! 471: case '[': ! 472: while (b - bufp->buffer ! 473: > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH) ! 474: /* Note that EXTEND_BUFFER clobbers c */ ! 475: EXTEND_BUFFER; ! 476: ! 477: laststart = b; ! 478: if (*p == '^') ! 479: PATPUSH (charset_not), p++; ! 480: else ! 481: PATPUSH (charset); ! 482: p1 = p; ! 483: ! 484: PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ! 485: /* Clear the whole map */ ! 486: bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); ! 487: /* Read in characters and ranges, setting map bits */ ! 488: while (1) ! 489: { ! 490: PATFETCH (c); ! 491: if (c == ']' && p != p1 + 1) break; ! 492: if (*p == '-' && p[1] != ']') ! 493: { ! 494: PATFETCH (c1); ! 495: PATFETCH (c1); ! 496: while (c <= c1) ! 497: b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++; ! 498: } ! 499: else ! 500: { ! 501: b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH); ! 502: } ! 503: } ! 504: /* Discard any bitmap bytes that are all 0 at the end of the map. ! 505: Decrement the map-length byte too. */ ! 506: while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) ! 507: b[-1]--; ! 508: b += b[-1]; ! 509: break; ! 510: ! 511: case '(': ! 512: if (! (obscure_syntax & RE_NO_BK_PARENS)) ! 513: goto normal_char; ! 514: else ! 515: goto handle_open; ! 516: ! 517: case ')': ! 518: if (! (obscure_syntax & RE_NO_BK_PARENS)) ! 519: goto normal_char; ! 520: else ! 521: goto handle_close; ! 522: ! 523: case '\n': ! 524: if (! (obscure_syntax & RE_NEWLINE_OR)) ! 525: goto normal_char; ! 526: else ! 527: goto handle_bar; ! 528: ! 529: case '|': ! 530: if (! (obscure_syntax & RE_NO_BK_VBAR)) ! 531: goto normal_char; ! 532: else ! 533: goto handle_bar; ! 534: ! 535: case '\\': ! 536: if (p == pend) goto invalid_pattern; ! 537: PATFETCH_RAW (c); ! 538: switch (c) ! 539: { ! 540: case '(': ! 541: if (obscure_syntax & RE_NO_BK_PARENS) ! 542: goto normal_backsl; ! 543: handle_open: ! 544: if (stackp == stacke) goto nesting_too_deep; ! 545: if (regnum < RE_NREGS) ! 546: { ! 547: PATPUSH (start_memory); ! 548: PATPUSH (regnum); ! 549: } ! 550: *stackp++ = b - bufp->buffer; ! 551: *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0; ! 552: *stackp++ = regnum++; ! 553: *stackp++ = begalt - bufp->buffer; ! 554: fixup_jump = 0; ! 555: laststart = 0; ! 556: begalt = b; ! 557: break; ! 558: ! 559: case ')': ! 560: if (obscure_syntax & RE_NO_BK_PARENS) ! 561: goto normal_backsl; ! 562: handle_close: ! 563: if (stackp == stackb) goto unmatched_close; ! 564: begalt = *--stackp + bufp->buffer; ! 565: if (fixup_jump) ! 566: store_jump (fixup_jump, jump, b); ! 567: if (stackp[-1] < RE_NREGS) ! 568: { ! 569: PATPUSH (stop_memory); ! 570: PATPUSH (stackp[-1]); ! 571: } ! 572: stackp -= 2; ! 573: fixup_jump = 0; ! 574: if (*stackp) ! 575: fixup_jump = *stackp + bufp->buffer - 1; ! 576: laststart = *--stackp + bufp->buffer; ! 577: break; ! 578: ! 579: case '|': ! 580: if (obscure_syntax & RE_NO_BK_VBAR) ! 581: goto normal_backsl; ! 582: handle_bar: ! 583: insert_jump (on_failure_jump, begalt, b + 6, b); ! 584: pending_exact = 0; ! 585: b += 3; ! 586: if (fixup_jump) ! 587: store_jump (fixup_jump, jump, b); ! 588: fixup_jump = b; ! 589: b += 3; ! 590: laststart = 0; ! 591: begalt = b; ! 592: break; ! 593: ! 594: #ifdef emacs ! 595: case '=': ! 596: PATPUSH (at_dot); ! 597: break; ! 598: ! 599: case 's': ! 600: laststart = b; ! 601: PATPUSH (syntaxspec); ! 602: PATFETCH (c); ! 603: PATPUSH (syntax_spec_code[c]); ! 604: break; ! 605: ! 606: case 'S': ! 607: laststart = b; ! 608: PATPUSH (notsyntaxspec); ! 609: PATFETCH (c); ! 610: PATPUSH (syntax_spec_code[c]); ! 611: break; ! 612: #endif emacs ! 613: ! 614: case 'w': ! 615: laststart = b; ! 616: PATPUSH (wordchar); ! 617: break; ! 618: ! 619: case 'W': ! 620: laststart = b; ! 621: PATPUSH (notwordchar); ! 622: break; ! 623: ! 624: case '<': ! 625: PATPUSH (wordbeg); ! 626: break; ! 627: ! 628: case '>': ! 629: PATPUSH (wordend); ! 630: break; ! 631: ! 632: case 'b': ! 633: PATPUSH (wordbound); ! 634: break; ! 635: ! 636: case 'B': ! 637: PATPUSH (notwordbound); ! 638: break; ! 639: ! 640: case '`': ! 641: PATPUSH (begbuf); ! 642: break; ! 643: ! 644: case '\'': ! 645: PATPUSH (endbuf); ! 646: break; ! 647: ! 648: case '1': ! 649: case '2': ! 650: case '3': ! 651: case '4': ! 652: case '5': ! 653: case '6': ! 654: case '7': ! 655: case '8': ! 656: case '9': ! 657: c1 = c - '0'; ! 658: if (c1 >= regnum) ! 659: goto normal_char; ! 660: for (stackt = stackp - 2; stackt > stackb; stackt -= 4) ! 661: if (*stackt == c1) ! 662: goto normal_char; ! 663: laststart = b; ! 664: PATPUSH (duplicate); ! 665: PATPUSH (c1); ! 666: break; ! 667: ! 668: case '+': ! 669: case '?': ! 670: if (obscure_syntax & RE_BK_PLUS_QM) ! 671: goto handle_plus; ! 672: ! 673: default: ! 674: normal_backsl: ! 675: /* You might think it would be useful for \ to mean ! 676: not to translate; but if we don't translate it ! 677: it will never match anything. */ ! 678: if (translate) c = translate[c]; ! 679: goto normal_char; ! 680: } ! 681: break; ! 682: ! 683: default: ! 684: normal_char: ! 685: if (!pending_exact || pending_exact + *pending_exact + 1 != b ! 686: || *pending_exact == 0177 || *p == '*' || *p == '^' ! 687: || ((obscure_syntax & RE_BK_PLUS_QM) ! 688: ? *p == '\\' && (p[1] == '+' || p[1] == '?') ! 689: : (*p == '+' || *p == '?'))) ! 690: { ! 691: laststart = b; ! 692: PATPUSH (exactn); ! 693: pending_exact = b; ! 694: PATPUSH (0); ! 695: } ! 696: PATPUSH (c); ! 697: (*pending_exact)++; ! 698: } ! 699: } ! 700: ! 701: if (fixup_jump) ! 702: store_jump (fixup_jump, jump, b); ! 703: ! 704: if (stackp != stackb) goto unmatched_open; ! 705: ! 706: bufp->used = b - bufp->buffer; ! 707: return 0; ! 708: ! 709: invalid_pattern: ! 710: return "Invalid regular expression"; ! 711: ! 712: unmatched_open: ! 713: return "Unmatched \\("; ! 714: ! 715: unmatched_close: ! 716: return "Unmatched \\)"; ! 717: ! 718: end_of_pattern: ! 719: return "Premature end of regular expression"; ! 720: ! 721: nesting_too_deep: ! 722: return "Nesting too deep"; ! 723: ! 724: too_big: ! 725: return "Regular expression too big"; ! 726: ! 727: memory_exhausted: ! 728: return "Memory exhausted"; ! 729: } ! 730: ! 731: /* Store where `from' points a jump operation to jump to where `to' points. ! 732: `opcode' is the opcode to store. */ ! 733: ! 734: static int ! 735: store_jump (from, opcode, to) ! 736: char *from, *to; ! 737: char opcode; ! 738: { ! 739: from[0] = opcode; ! 740: from[1] = (to - (from + 3)) & 0377; ! 741: from[2] = (to - (from + 3)) >> 8; ! 742: } ! 743: ! 744: /* Open up space at char FROM, and insert there a jump to TO. ! 745: CURRENT_END gives te end of the storage no in use, ! 746: so we know how much data to copy up. ! 747: OP is the opcode of the jump to insert. ! 748: ! 749: If you call this function, you must zero out pending_exact. */ ! 750: ! 751: static int ! 752: insert_jump (op, from, to, current_end) ! 753: char op; ! 754: char *from, *to, *current_end; ! 755: { ! 756: register char *pto = current_end + 3; ! 757: register char *pfrom = current_end; ! 758: while (pfrom != from) ! 759: *--pto = *--pfrom; ! 760: store_jump (from, op, to); ! 761: } ! 762: ! 763: /* Given a pattern, compute a fastmap from it. ! 764: The fastmap records which of the (1 << BYTEWIDTH) possible characters ! 765: can start a string that matches the pattern. ! 766: This fastmap is used by re_search to skip quickly over totally implausible text. ! 767: ! 768: The caller must supply the address of a (1 << BYTEWIDTH)-byte data area ! 769: as bufp->fastmap. ! 770: The other components of bufp describe the pattern to be used. */ ! 771: ! 772: void ! 773: re_compile_fastmap (bufp) ! 774: struct re_pattern_buffer *bufp; ! 775: { ! 776: unsigned char *pattern = (unsigned char *) bufp->buffer; ! 777: int size = bufp->used; ! 778: register char *fastmap = bufp->fastmap; ! 779: register unsigned char *p = pattern; ! 780: register unsigned char *pend = pattern + size; ! 781: register int j, k; ! 782: unsigned char *translate = (unsigned char *) bufp->translate; ! 783: ! 784: unsigned char *stackb[NFAILURES]; ! 785: unsigned char **stackp = stackb; ! 786: ! 787: bzero (fastmap, (1 << BYTEWIDTH)); ! 788: bufp->fastmap_accurate = 1; ! 789: bufp->can_be_null = 0; ! 790: ! 791: while (p) ! 792: { ! 793: if (p == pend) ! 794: { ! 795: bufp->can_be_null = 1; ! 796: break; ! 797: } ! 798: #ifdef SWITCH_ENUM_BUG ! 799: switch ((int) ((enum regexpcode) *p++)) ! 800: #else ! 801: switch ((enum regexpcode) *p++) ! 802: #endif ! 803: { ! 804: case exactn: ! 805: if (translate) ! 806: fastmap[translate[p[1]]] = 1; ! 807: else ! 808: fastmap[p[1]] = 1; ! 809: break; ! 810: ! 811: case begline: ! 812: case before_dot: ! 813: case at_dot: ! 814: case after_dot: ! 815: case begbuf: ! 816: case endbuf: ! 817: case wordbound: ! 818: case notwordbound: ! 819: case wordbeg: ! 820: case wordend: ! 821: continue; ! 822: ! 823: case endline: ! 824: if (translate) ! 825: fastmap[translate['\n']] = 1; ! 826: else ! 827: fastmap['\n'] = 1; ! 828: if (bufp->can_be_null != 1) ! 829: bufp->can_be_null = 2; ! 830: break; ! 831: ! 832: case finalize_jump: ! 833: case maybe_finalize_jump: ! 834: case jump: ! 835: case dummy_failure_jump: ! 836: bufp->can_be_null = 1; ! 837: j = *p++ & 0377; ! 838: j += SIGN_EXTEND_CHAR (*(char *)p) << 8; ! 839: p += j + 1; /* The 1 compensates for missing ++ above */ ! 840: if (j > 0) ! 841: continue; ! 842: /* Jump backward reached implies we just went through ! 843: the body of a loop and matched nothing. ! 844: Opcode jumped to should be an on_failure_jump. ! 845: Just treat it like an ordinary jump. ! 846: For a * loop, it has pushed its failure point already; ! 847: if so, discard that as redundant. */ ! 848: if ((enum regexpcode) *p != on_failure_jump) ! 849: continue; ! 850: p++; ! 851: j = *p++ & 0377; ! 852: j += SIGN_EXTEND_CHAR (*(char *)p) << 8; ! 853: p += j + 1; /* The 1 compensates for missing ++ above */ ! 854: if (stackp != stackb && *stackp == p) ! 855: stackp--; ! 856: continue; ! 857: ! 858: case on_failure_jump: ! 859: j = *p++ & 0377; ! 860: j += SIGN_EXTEND_CHAR (*(char *)p) << 8; ! 861: p++; ! 862: *++stackp = p + j; ! 863: continue; ! 864: ! 865: case start_memory: ! 866: case stop_memory: ! 867: p++; ! 868: continue; ! 869: ! 870: case duplicate: ! 871: bufp->can_be_null = 1; ! 872: fastmap['\n'] = 1; ! 873: case anychar: ! 874: for (j = 0; j < (1 << BYTEWIDTH); j++) ! 875: if (j != '\n') ! 876: fastmap[j] = 1; ! 877: if (bufp->can_be_null) ! 878: return; ! 879: /* Don't return; check the alternative paths ! 880: so we can set can_be_null if appropriate. */ ! 881: break; ! 882: ! 883: case wordchar: ! 884: for (j = 0; j < (1 << BYTEWIDTH); j++) ! 885: if (SYNTAX (j) == Sword) ! 886: fastmap[j] = 1; ! 887: break; ! 888: ! 889: case notwordchar: ! 890: for (j = 0; j < (1 << BYTEWIDTH); j++) ! 891: if (SYNTAX (j) != Sword) ! 892: fastmap[j] = 1; ! 893: break; ! 894: ! 895: #ifdef emacs ! 896: case syntaxspec: ! 897: k = *p++; ! 898: for (j = 0; j < (1 << BYTEWIDTH); j++) ! 899: if (SYNTAX (j) == (enum syntaxcode) k) ! 900: fastmap[j] = 1; ! 901: break; ! 902: ! 903: case notsyntaxspec: ! 904: k = *p++; ! 905: for (j = 0; j < (1 << BYTEWIDTH); j++) ! 906: if (SYNTAX (j) != (enum syntaxcode) k) ! 907: fastmap[j] = 1; ! 908: break; ! 909: #endif emacs ! 910: ! 911: case charset: ! 912: for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ! 913: if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ! 914: { ! 915: if (translate) ! 916: fastmap[translate[j]] = 1; ! 917: else ! 918: fastmap[j] = 1; ! 919: } ! 920: break; ! 921: ! 922: case charset_not: ! 923: /* Chars beyond end of map must be allowed */ ! 924: for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ! 925: if (translate) ! 926: fastmap[translate[j]] = 1; ! 927: else ! 928: fastmap[j] = 1; ! 929: ! 930: for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ! 931: if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ! 932: { ! 933: if (translate) ! 934: fastmap[translate[j]] = 1; ! 935: else ! 936: fastmap[j] = 1; ! 937: } ! 938: break; ! 939: } ! 940: ! 941: /* Get here means we have successfully found the possible starting characters ! 942: of one path of the pattern. We need not follow this path any farther. ! 943: Instead, look at the next alternative remembered in the stack. */ ! 944: if (stackp != stackb) ! 945: p = *stackp--; ! 946: else ! 947: break; ! 948: } ! 949: } ! 950: ! 951: /* Like re_search_2, below, but only one string is specified. */ ! 952: ! 953: int ! 954: re_search (pbufp, string, size, startpos, range, regs) ! 955: struct re_pattern_buffer *pbufp; ! 956: char *string; ! 957: int size, startpos, range; ! 958: struct re_registers *regs; ! 959: { ! 960: return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size); ! 961: } ! 962: ! 963: /* Like re_match_2 but tries first a match starting at index STARTPOS, ! 964: then at STARTPOS + 1, and so on. ! 965: RANGE is the number of places to try before giving up. ! 966: If RANGE is negative, the starting positions tried are ! 967: STARTPOS, STARTPOS - 1, etc. ! 968: It is up to the caller to make sure that range is not so large ! 969: as to take the starting position outside of the input strings. ! 970: ! 971: The value returned is the position at which the match was found, ! 972: or -1 if no match was found, ! 973: or -2 if error (such as failure stack overflow). */ ! 974: ! 975: int ! 976: re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop) ! 977: struct re_pattern_buffer *pbufp; ! 978: char *string1, *string2; ! 979: int size1, size2; ! 980: int startpos; ! 981: register int range; ! 982: struct re_registers *regs; ! 983: int mstop; ! 984: { ! 985: register char *fastmap = pbufp->fastmap; ! 986: register unsigned char *translate = (unsigned char *) pbufp->translate; ! 987: int total = size1 + size2; ! 988: int val; ! 989: ! 990: /* Update the fastmap now if not correct already */ ! 991: if (fastmap && !pbufp->fastmap_accurate) ! 992: re_compile_fastmap (pbufp); ! 993: ! 994: /* Don't waste time in a long search for a pattern ! 995: that says it is anchored. */ ! 996: if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf ! 997: && range > 0) ! 998: { ! 999: if (startpos > 0) ! 1000: return -1; ! 1001: else ! 1002: range = 1; ! 1003: } ! 1004: ! 1005: while (1) ! 1006: { ! 1007: /* If a fastmap is supplied, skip quickly over characters ! 1008: that cannot possibly be the start of a match. ! 1009: Note, however, that if the pattern can possibly match ! 1010: the null string, we must test it at each starting point ! 1011: so that we take the first null string we get. */ ! 1012: ! 1013: if (fastmap && startpos < total && pbufp->can_be_null != 1) ! 1014: { ! 1015: if (range > 0) ! 1016: { ! 1017: register int lim = 0; ! 1018: register unsigned char *p; ! 1019: int irange = range; ! 1020: if (startpos < size1 && startpos + range >= size1) ! 1021: lim = range - (size1 - startpos); ! 1022: ! 1023: p = ((unsigned char *) ! 1024: &(startpos >= size1 ? string2 - size1 : string1)[startpos]); ! 1025: ! 1026: if (translate) ! 1027: { ! 1028: while (range > lim && !fastmap[translate[*p++]]) ! 1029: range--; ! 1030: } ! 1031: else ! 1032: { ! 1033: while (range > lim && !fastmap[*p++]) ! 1034: range--; ! 1035: } ! 1036: startpos += irange - range; ! 1037: } ! 1038: else ! 1039: { ! 1040: register unsigned char c; ! 1041: if (startpos >= size1) ! 1042: c = string2[startpos - size1]; ! 1043: else ! 1044: c = string1[startpos]; ! 1045: c &= 0xff; ! 1046: if (translate ? !fastmap[translate[c]] : !fastmap[c]) ! 1047: goto advance; ! 1048: } ! 1049: } ! 1050: ! 1051: if (range >= 0 && startpos == total ! 1052: && fastmap && pbufp->can_be_null == 0) ! 1053: return -1; ! 1054: ! 1055: val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop); ! 1056: if (0 <= val) ! 1057: { ! 1058: if (val == -2) ! 1059: return -2; ! 1060: return startpos; ! 1061: } ! 1062: ! 1063: #ifdef C_ALLOCA ! 1064: alloca (0); ! 1065: #endif /* C_ALLOCA */ ! 1066: ! 1067: advance: ! 1068: if (!range) break; ! 1069: if (range > 0) range--, startpos++; else range++, startpos--; ! 1070: } ! 1071: return -1; ! 1072: } ! 1073: ! 1074: #ifndef emacs /* emacs never uses this */ ! 1075: int ! 1076: re_match (pbufp, string, size, pos, regs) ! 1077: struct re_pattern_buffer *pbufp; ! 1078: char *string; ! 1079: int size, pos; ! 1080: struct re_registers *regs; ! 1081: { ! 1082: return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size); ! 1083: } ! 1084: #endif /* emacs */ ! 1085: ! 1086: /* Maximum size of failure stack. Beyond this, overflow is an error. */ ! 1087: ! 1088: int re_max_failures = 2000; ! 1089: ! 1090: static int bcmp_translate(); ! 1091: /* Match the pattern described by PBUFP ! 1092: against data which is the virtual concatenation of STRING1 and STRING2. ! 1093: SIZE1 and SIZE2 are the sizes of the two data strings. ! 1094: Start the match at position POS. ! 1095: Do not consider matching past the position MSTOP. ! 1096: ! 1097: If pbufp->fastmap is nonzero, then it had better be up to date. ! 1098: ! 1099: The reason that the data to match are specified as two components ! 1100: which are to be regarded as concatenated ! 1101: is so this function can be used directly on the contents of an Emacs buffer. ! 1102: ! 1103: -1 is returned if there is no match. -2 is returned if there is ! 1104: an error (such as match stack overflow). Otherwise the value is the length ! 1105: of the substring which was matched. */ ! 1106: ! 1107: int ! 1108: re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop) ! 1109: struct re_pattern_buffer *pbufp; ! 1110: unsigned char *string1, *string2; ! 1111: int size1, size2; ! 1112: int pos; ! 1113: struct re_registers *regs; ! 1114: int mstop; ! 1115: { ! 1116: register unsigned char *p = (unsigned char *) pbufp->buffer; ! 1117: register unsigned char *pend = p + pbufp->used; ! 1118: /* End of first string */ ! 1119: unsigned char *end1; ! 1120: /* End of second string */ ! 1121: unsigned char *end2; ! 1122: /* Pointer just past last char to consider matching */ ! 1123: unsigned char *end_match_1, *end_match_2; ! 1124: register unsigned char *d, *dend; ! 1125: register int mcnt; ! 1126: unsigned char *translate = (unsigned char *) pbufp->translate; ! 1127: ! 1128: /* Failure point stack. Each place that can handle a failure further down the line ! 1129: pushes a failure point on this stack. It consists of two char *'s. ! 1130: The first one pushed is where to resume scanning the pattern; ! 1131: the second pushed is where to resume scanning the strings. ! 1132: If the latter is zero, the failure point is a "dummy". ! 1133: If a failure happens and the innermost failure point is dormant, ! 1134: it discards that failure point and tries the next one. */ ! 1135: ! 1136: unsigned char *initial_stack[2 * NFAILURES]; ! 1137: unsigned char **stackb = initial_stack; ! 1138: unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES]; ! 1139: ! 1140: /* Information on the "contents" of registers. ! 1141: These are pointers into the input strings; they record ! 1142: just what was matched (on this attempt) by some part of the pattern. ! 1143: The start_memory command stores the start of a register's contents ! 1144: and the stop_memory command stores the end. ! 1145: ! 1146: At that point, regstart[regnum] points to the first character in the register, ! 1147: regend[regnum] points to the first character beyond the end of the register, ! 1148: regstart_seg1[regnum] is true iff regstart[regnum] points into string1, ! 1149: and regend_seg1[regnum] is true iff regend[regnum] points into string1. */ ! 1150: ! 1151: unsigned char *regstart[RE_NREGS]; ! 1152: unsigned char *regend[RE_NREGS]; ! 1153: unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS]; ! 1154: ! 1155: /* Set up pointers to ends of strings. ! 1156: Don't allow the second string to be empty unless both are empty. */ ! 1157: if (!size2) ! 1158: { ! 1159: string2 = string1; ! 1160: size2 = size1; ! 1161: string1 = 0; ! 1162: size1 = 0; ! 1163: } ! 1164: end1 = string1 + size1; ! 1165: end2 = string2 + size2; ! 1166: ! 1167: /* Compute where to stop matching, within the two strings */ ! 1168: if (mstop <= size1) ! 1169: { ! 1170: end_match_1 = string1 + mstop; ! 1171: end_match_2 = string2; ! 1172: } ! 1173: else ! 1174: { ! 1175: end_match_1 = end1; ! 1176: end_match_2 = string2 + mstop - size1; ! 1177: } ! 1178: ! 1179: /* Initialize \) text positions to -1 ! 1180: to mark ones that no \( or \) has been seen for. */ ! 1181: ! 1182: for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++) ! 1183: regend[mcnt] = (unsigned char *) -1; ! 1184: ! 1185: /* `p' scans through the pattern as `d' scans through the data. ! 1186: `dend' is the end of the input string that `d' points within. ! 1187: `d' is advanced into the following input string whenever necessary, ! 1188: but this happens before fetching; ! 1189: therefore, at the beginning of the loop, ! 1190: `d' can be pointing at the end of a string, ! 1191: but it cannot equal string2. */ ! 1192: ! 1193: if (pos <= size1) ! 1194: d = string1 + pos, dend = end_match_1; ! 1195: else ! 1196: d = string2 + pos - size1, dend = end_match_2; ! 1197: ! 1198: /* Write PREFETCH; just before fetching a character with *d. */ ! 1199: #define PREFETCH \ ! 1200: while (d == dend) \ ! 1201: { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \ ! 1202: d = string2; /* end of string1 => advance to string2. */ \ ! 1203: dend = end_match_2; } ! 1204: ! 1205: /* This loop loops over pattern commands. ! 1206: It exits by returning from the function if match is complete, ! 1207: or it drops through if match fails at this starting point in the input data. */ ! 1208: ! 1209: while (1) ! 1210: { ! 1211: if (p == pend) ! 1212: /* End of pattern means we have succeeded! */ ! 1213: { ! 1214: /* If caller wants register contents data back, convert it to indices */ ! 1215: if (regs) ! 1216: { ! 1217: regs->start[0] = pos; ! 1218: if (dend == end_match_1) ! 1219: regs->end[0] = d - string1; ! 1220: else ! 1221: regs->end[0] = d - string2 + size1; ! 1222: for (mcnt = 1; mcnt < RE_NREGS; mcnt++) ! 1223: { ! 1224: if (regend[mcnt] == (unsigned char *) -1) ! 1225: { ! 1226: regs->start[mcnt] = -1; ! 1227: regs->end[mcnt] = -1; ! 1228: continue; ! 1229: } ! 1230: if (regstart_seg1[mcnt]) ! 1231: regs->start[mcnt] = regstart[mcnt] - string1; ! 1232: else ! 1233: regs->start[mcnt] = regstart[mcnt] - string2 + size1; ! 1234: if (regend_seg1[mcnt]) ! 1235: regs->end[mcnt] = regend[mcnt] - string1; ! 1236: else ! 1237: regs->end[mcnt] = regend[mcnt] - string2 + size1; ! 1238: } ! 1239: } ! 1240: if (dend == end_match_1) ! 1241: return (d - string1 - pos); ! 1242: else ! 1243: return d - string2 + size1 - pos; ! 1244: } ! 1245: ! 1246: /* Otherwise match next pattern command */ ! 1247: #ifdef SWITCH_ENUM_BUG ! 1248: switch ((int) ((enum regexpcode) *p++)) ! 1249: #else ! 1250: switch ((enum regexpcode) *p++) ! 1251: #endif ! 1252: { ! 1253: ! 1254: /* \( is represented by a start_memory, \) by a stop_memory. ! 1255: Both of those commands contain a "register number" argument. ! 1256: The text matched within the \( and \) is recorded under that number. ! 1257: Then, \<digit> turns into a `duplicate' command which ! 1258: is followed by the numeric value of <digit> as the register number. */ ! 1259: ! 1260: case start_memory: ! 1261: regstart[*p] = d; ! 1262: regstart_seg1[*p++] = (dend == end_match_1); ! 1263: break; ! 1264: ! 1265: case stop_memory: ! 1266: regend[*p] = d; ! 1267: regend_seg1[*p++] = (dend == end_match_1); ! 1268: break; ! 1269: ! 1270: case duplicate: ! 1271: { ! 1272: int regno = *p++; /* Get which register to match against */ ! 1273: register unsigned char *d2, *dend2; ! 1274: ! 1275: d2 = regstart[regno]; ! 1276: dend2 = ((regstart_seg1[regno] == regend_seg1[regno]) ! 1277: ? regend[regno] : end_match_1); ! 1278: while (1) ! 1279: { ! 1280: /* Advance to next segment in register contents, if necessary */ ! 1281: while (d2 == dend2) ! 1282: { ! 1283: if (dend2 == end_match_2) break; ! 1284: if (dend2 == regend[regno]) break; ! 1285: d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */ ! 1286: } ! 1287: /* At end of register contents => success */ ! 1288: if (d2 == dend2) break; ! 1289: ! 1290: /* Advance to next segment in data being matched, if necessary */ ! 1291: PREFETCH; ! 1292: ! 1293: /* mcnt gets # consecutive chars to compare */ ! 1294: mcnt = dend - d; ! 1295: if (mcnt > dend2 - d2) ! 1296: mcnt = dend2 - d2; ! 1297: /* Compare that many; failure if mismatch, else skip them. */ ! 1298: if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt)) ! 1299: goto fail; ! 1300: d += mcnt, d2 += mcnt; ! 1301: } ! 1302: } ! 1303: break; ! 1304: ! 1305: case anychar: ! 1306: /* fetch a data character */ ! 1307: PREFETCH; ! 1308: /* Match anything but a newline. */ ! 1309: if ((translate ? translate[*d++] : *d++) == '\n') ! 1310: goto fail; ! 1311: break; ! 1312: ! 1313: case charset: ! 1314: case charset_not: ! 1315: { ! 1316: /* Nonzero for charset_not */ ! 1317: int not = 0; ! 1318: register int c; ! 1319: if (*(p - 1) == (unsigned char) charset_not) ! 1320: not = 1; ! 1321: ! 1322: /* fetch a data character */ ! 1323: PREFETCH; ! 1324: ! 1325: if (translate) ! 1326: c = translate [*d]; ! 1327: else ! 1328: c = *d; ! 1329: ! 1330: if (c < *p * BYTEWIDTH ! 1331: && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ! 1332: not = !not; ! 1333: ! 1334: p += 1 + *p; ! 1335: ! 1336: if (!not) goto fail; ! 1337: d++; ! 1338: break; ! 1339: } ! 1340: ! 1341: case begline: ! 1342: if (d == string1 || d[-1] == '\n') ! 1343: break; ! 1344: goto fail; ! 1345: ! 1346: case endline: ! 1347: if (d == end2 ! 1348: || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n')) ! 1349: break; ! 1350: goto fail; ! 1351: ! 1352: /* "or" constructs ("|") are handled by starting each alternative ! 1353: with an on_failure_jump that points to the start of the next alternative. ! 1354: Each alternative except the last ends with a jump to the joining point. ! 1355: (Actually, each jump except for the last one really jumps ! 1356: to the following jump, because tensioning the jumps is a hassle.) */ ! 1357: ! 1358: /* The start of a stupid repeat has an on_failure_jump that points ! 1359: past the end of the repeat text. ! 1360: This makes a failure point so that, on failure to match a repetition, ! 1361: matching restarts past as many repetitions have been found ! 1362: with no way to fail and look for another one. */ ! 1363: ! 1364: /* A smart repeat is similar but loops back to the on_failure_jump ! 1365: so that each repetition makes another failure point. */ ! 1366: ! 1367: case on_failure_jump: ! 1368: if (stackp == stacke) ! 1369: { ! 1370: unsigned char **stackx; ! 1371: if (stacke - stackb > re_max_failures) ! 1372: return -2; ! 1373: stackx = (unsigned char **) alloca (2 * (stacke - stackb) ! 1374: * sizeof (char *)); ! 1375: bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *)); ! 1376: stackp = stackx + (stackp - stackb); ! 1377: stacke = stackx + 2 * (stacke - stackb); ! 1378: stackb = stackx; ! 1379: } ! 1380: mcnt = *p++ & 0377; ! 1381: mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8; ! 1382: p++; ! 1383: *stackp++ = mcnt + p; ! 1384: *stackp++ = d; ! 1385: break; ! 1386: ! 1387: /* The end of a smart repeat has an maybe_finalize_jump back. ! 1388: Change it either to a finalize_jump or an ordinary jump. */ ! 1389: ! 1390: case maybe_finalize_jump: ! 1391: mcnt = *p++ & 0377; ! 1392: mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8; ! 1393: p++; ! 1394: /* Compare what follows with the begining of the repeat. ! 1395: If we can establish that there is nothing that they would ! 1396: both match, we can change to finalize_jump */ ! 1397: if (p == pend) ! 1398: p[-3] = (unsigned char) finalize_jump; ! 1399: else if (*p == (unsigned char) exactn ! 1400: || *p == (unsigned char) endline) ! 1401: { ! 1402: register int c = *p == (unsigned char) endline ? '\n' : p[2]; ! 1403: register unsigned char *p1 = p + mcnt; ! 1404: /* p1[0] ... p1[2] are an on_failure_jump. ! 1405: Examine what follows that */ ! 1406: if (p1[3] == (unsigned char) exactn && p1[5] != c) ! 1407: p[-3] = (unsigned char) finalize_jump; ! 1408: else if (p1[3] == (unsigned char) charset ! 1409: || p1[3] == (unsigned char) charset_not) ! 1410: { ! 1411: int not = p1[3] == (unsigned char) charset_not; ! 1412: if (c < p1[4] * BYTEWIDTH ! 1413: && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ! 1414: not = !not; ! 1415: /* not is 1 if c would match */ ! 1416: /* That means it is not safe to finalize */ ! 1417: if (!not) ! 1418: p[-3] = (unsigned char) finalize_jump; ! 1419: } ! 1420: } ! 1421: p -= 2; ! 1422: if (p[-1] != (unsigned char) finalize_jump) ! 1423: { ! 1424: p[-1] = (unsigned char) jump; ! 1425: goto nofinalize; ! 1426: } ! 1427: ! 1428: /* The end of a stupid repeat has a finalize-jump ! 1429: back to the start, where another failure point will be made ! 1430: which will point after all the repetitions found so far. */ ! 1431: ! 1432: case finalize_jump: ! 1433: stackp -= 2; ! 1434: ! 1435: case jump: ! 1436: nofinalize: ! 1437: mcnt = *p++ & 0377; ! 1438: mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8; ! 1439: p += mcnt + 1; /* The 1 compensates for missing ++ above */ ! 1440: break; ! 1441: ! 1442: case dummy_failure_jump: ! 1443: if (stackp == stacke) ! 1444: { ! 1445: unsigned char **stackx ! 1446: = (unsigned char **) alloca (2 * (stacke - stackb) ! 1447: * sizeof (char *)); ! 1448: bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *)); ! 1449: stackp = stackx + (stackp - stackb); ! 1450: stacke = stackx + 2 * (stacke - stackb); ! 1451: stackb = stackx; ! 1452: } ! 1453: *stackp++ = 0; ! 1454: *stackp++ = 0; ! 1455: goto nofinalize; ! 1456: ! 1457: case wordbound: ! 1458: if (d == string1 /* Points to first char */ ! 1459: || d == end2 /* Points to end */ ! 1460: || (d == end1 && size2 == 0)) /* Points to end */ ! 1461: break; ! 1462: if ((SYNTAX (d[-1]) == Sword) ! 1463: != (SYNTAX (d == end1 ? *string2 : *d) == Sword)) ! 1464: break; ! 1465: goto fail; ! 1466: ! 1467: case notwordbound: ! 1468: if (d == string1 /* Points to first char */ ! 1469: || d == end2 /* Points to end */ ! 1470: || (d == end1 && size2 == 0)) /* Points to end */ ! 1471: goto fail; ! 1472: if ((SYNTAX (d[-1]) == Sword) ! 1473: != (SYNTAX (d == end1 ? *string2 : *d) == Sword)) ! 1474: goto fail; ! 1475: break; ! 1476: ! 1477: case wordbeg: ! 1478: if (d == end2 /* Points to end */ ! 1479: || (d == end1 && size2 == 0) /* Points to end */ ! 1480: || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */ ! 1481: goto fail; ! 1482: if (d == string1 /* Points to first char */ ! 1483: || SYNTAX (d[-1]) != Sword) /* prev char not letter */ ! 1484: break; ! 1485: goto fail; ! 1486: ! 1487: case wordend: ! 1488: if (d == string1 /* Points to first char */ ! 1489: || SYNTAX (d[-1]) != Sword) /* prev char not letter */ ! 1490: goto fail; ! 1491: if (d == end2 /* Points to end */ ! 1492: || (d == end1 && size2 == 0) /* Points to end */ ! 1493: || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */ ! 1494: break; ! 1495: goto fail; ! 1496: ! 1497: #ifdef emacs ! 1498: case before_dot: ! 1499: if (((d - string2 <= (unsigned) size2) ! 1500: ? d - bf_p2 : d - bf_p1) ! 1501: <= point) ! 1502: goto fail; ! 1503: break; ! 1504: ! 1505: case at_dot: ! 1506: if (((d - string2 <= (unsigned) size2) ! 1507: ? d - bf_p2 : d - bf_p1) ! 1508: == point) ! 1509: goto fail; ! 1510: break; ! 1511: ! 1512: case after_dot: ! 1513: if (((d - string2 <= (unsigned) size2) ! 1514: ? d - bf_p2 : d - bf_p1) ! 1515: >= point) ! 1516: goto fail; ! 1517: break; ! 1518: ! 1519: case wordchar: ! 1520: mcnt = (int) Sword; ! 1521: goto matchsyntax; ! 1522: ! 1523: case syntaxspec: ! 1524: mcnt = *p++; ! 1525: matchsyntax: ! 1526: PREFETCH; ! 1527: if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail; ! 1528: break; ! 1529: ! 1530: case notwordchar: ! 1531: mcnt = (int) Sword; ! 1532: goto matchnotsyntax; ! 1533: ! 1534: case notsyntaxspec: ! 1535: mcnt = *p++; ! 1536: matchnotsyntax: ! 1537: PREFETCH; ! 1538: if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail; ! 1539: break; ! 1540: #else ! 1541: case wordchar: ! 1542: PREFETCH; ! 1543: if (SYNTAX (*d++) == 0) goto fail; ! 1544: break; ! 1545: ! 1546: case notwordchar: ! 1547: PREFETCH; ! 1548: if (SYNTAX (*d++) != 0) goto fail; ! 1549: break; ! 1550: #endif not emacs ! 1551: ! 1552: case begbuf: ! 1553: if (d == string1) /* Note, d cannot equal string2 */ ! 1554: break; /* unless string1 == string2. */ ! 1555: goto fail; ! 1556: ! 1557: case endbuf: ! 1558: if (d == end2 || (d == end1 && size2 == 0)) ! 1559: break; ! 1560: goto fail; ! 1561: ! 1562: case exactn: ! 1563: /* Match the next few pattern characters exactly. ! 1564: mcnt is how many characters to match. */ ! 1565: mcnt = *p++; ! 1566: if (translate) ! 1567: { ! 1568: do ! 1569: { ! 1570: PREFETCH; ! 1571: if (translate[*d++] != *p++) goto fail; ! 1572: } ! 1573: while (--mcnt); ! 1574: } ! 1575: else ! 1576: { ! 1577: do ! 1578: { ! 1579: PREFETCH; ! 1580: if (*d++ != *p++) goto fail; ! 1581: } ! 1582: while (--mcnt); ! 1583: } ! 1584: break; ! 1585: } ! 1586: continue; /* Successfully matched one pattern command; keep matching */ ! 1587: ! 1588: /* Jump here if any matching operation fails. */ ! 1589: fail: ! 1590: if (stackp != stackb) ! 1591: /* A restart point is known. Restart there and pop it. */ ! 1592: { ! 1593: if (!stackp[-2]) ! 1594: { /* If innermost failure point is dormant, flush it and keep looking */ ! 1595: stackp -= 2; ! 1596: goto fail; ! 1597: } ! 1598: d = *--stackp; ! 1599: p = *--stackp; ! 1600: if (d >= string1 && d <= end1) ! 1601: dend = end_match_1; ! 1602: } ! 1603: else break; /* Matching at this starting point really fails! */ ! 1604: } ! 1605: return -1; /* Failure to match */ ! 1606: } ! 1607: ! 1608: static int ! 1609: bcmp_translate (s1, s2, len, translate) ! 1610: unsigned char *s1, *s2; ! 1611: register int len; ! 1612: unsigned char *translate; ! 1613: { ! 1614: register unsigned char *p1 = s1, *p2 = s2; ! 1615: while (len) ! 1616: { ! 1617: if (translate [*p1++] != translate [*p2++]) return 1; ! 1618: len--; ! 1619: } ! 1620: return 0; ! 1621: } ! 1622: ! 1623: /* Entry points compatible with bsd4.2 regex library */ ! 1624: ! 1625: #ifndef emacs ! 1626: ! 1627: static struct re_pattern_buffer re_comp_buf; ! 1628: ! 1629: char * ! 1630: re_comp (s) ! 1631: char *s; ! 1632: { ! 1633: if (!s) ! 1634: { ! 1635: if (!re_comp_buf.buffer) ! 1636: return "No previous regular expression"; ! 1637: return 0; ! 1638: } ! 1639: ! 1640: if (!re_comp_buf.buffer) ! 1641: { ! 1642: if (!(re_comp_buf.buffer = (char *) malloc (200))) ! 1643: return "Memory exhausted"; ! 1644: re_comp_buf.allocated = 200; ! 1645: if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH))) ! 1646: return "Memory exhausted"; ! 1647: } ! 1648: return re_compile_pattern (s, strlen (s), &re_comp_buf); ! 1649: } ! 1650: ! 1651: int ! 1652: re_exec (s) ! 1653: char *s; ! 1654: { ! 1655: int len = strlen (s); ! 1656: return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0); ! 1657: } ! 1658: ! 1659: #endif /* emacs */ ! 1660: ! 1661: #ifdef test ! 1662: ! 1663: #include <stdio.h> ! 1664: ! 1665: /* Indexed by a character, gives the upper case equivalent of the character */ ! 1666: ! 1667: static char upcase[0400] = ! 1668: { 000, 001, 002, 003, 004, 005, 006, 007, ! 1669: 010, 011, 012, 013, 014, 015, 016, 017, ! 1670: 020, 021, 022, 023, 024, 025, 026, 027, ! 1671: 030, 031, 032, 033, 034, 035, 036, 037, ! 1672: 040, 041, 042, 043, 044, 045, 046, 047, ! 1673: 050, 051, 052, 053, 054, 055, 056, 057, ! 1674: 060, 061, 062, 063, 064, 065, 066, 067, ! 1675: 070, 071, 072, 073, 074, 075, 076, 077, ! 1676: 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107, ! 1677: 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, ! 1678: 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, ! 1679: 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137, ! 1680: 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107, ! 1681: 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, ! 1682: 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, ! 1683: 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177, ! 1684: 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207, ! 1685: 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217, ! 1686: 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227, ! 1687: 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237, ! 1688: 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247, ! 1689: 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257, ! 1690: 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267, ! 1691: 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, ! 1692: 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307, ! 1693: 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317, ! 1694: 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327, ! 1695: 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, ! 1696: 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347, ! 1697: 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357, ! 1698: 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367, ! 1699: 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377 ! 1700: }; ! 1701: ! 1702: main (argc, argv) ! 1703: int argc; ! 1704: char **argv; ! 1705: { ! 1706: char pat[80]; ! 1707: struct re_pattern_buffer buf; ! 1708: int i; ! 1709: char c; ! 1710: char fastmap[(1 << BYTEWIDTH)]; ! 1711: ! 1712: /* Allow a command argument to specify the style of syntax. */ ! 1713: if (argc > 1) ! 1714: obscure_syntax = atoi (argv[1]); ! 1715: ! 1716: buf.allocated = 40; ! 1717: buf.buffer = (char *) malloc (buf.allocated); ! 1718: buf.fastmap = fastmap; ! 1719: buf.translate = upcase; ! 1720: ! 1721: while (1) ! 1722: { ! 1723: gets (pat); ! 1724: ! 1725: if (*pat) ! 1726: { ! 1727: re_compile_pattern (pat, strlen(pat), &buf); ! 1728: ! 1729: for (i = 0; i < buf.used; i++) ! 1730: printchar (buf.buffer[i]); ! 1731: ! 1732: putchar ('\n'); ! 1733: ! 1734: printf ("%d allocated, %d used.\n", buf.allocated, buf.used); ! 1735: ! 1736: re_compile_fastmap (&buf); ! 1737: printf ("Allowed by fastmap: "); ! 1738: for (i = 0; i < (1 << BYTEWIDTH); i++) ! 1739: if (fastmap[i]) printchar (i); ! 1740: putchar ('\n'); ! 1741: } ! 1742: ! 1743: gets (pat); /* Now read the string to match against */ ! 1744: ! 1745: i = re_match (&buf, pat, strlen (pat), 0, 0); ! 1746: printf ("Match value %d.\n", i); ! 1747: } ! 1748: } ! 1749: ! 1750: #ifdef NOTDEF ! 1751: print_buf (bufp) ! 1752: struct re_pattern_buffer *bufp; ! 1753: { ! 1754: int i; ! 1755: ! 1756: printf ("buf is :\n----------------\n"); ! 1757: for (i = 0; i < bufp->used; i++) ! 1758: printchar (bufp->buffer[i]); ! 1759: ! 1760: printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used); ! 1761: ! 1762: printf ("Allowed by fastmap: "); ! 1763: for (i = 0; i < (1 << BYTEWIDTH); i++) ! 1764: if (bufp->fastmap[i]) ! 1765: printchar (i); ! 1766: printf ("\nAllowed by translate: "); ! 1767: if (bufp->translate) ! 1768: for (i = 0; i < (1 << BYTEWIDTH); i++) ! 1769: if (bufp->translate[i]) ! 1770: printchar (i); ! 1771: printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't"); ! 1772: printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not"); ! 1773: } ! 1774: #endif ! 1775: ! 1776: printchar (c) ! 1777: char c; ! 1778: { ! 1779: if (c < 041 || c >= 0177) ! 1780: { ! 1781: putchar ('\\'); ! 1782: putchar (((c >> 6) & 3) + '0'); ! 1783: putchar (((c >> 3) & 7) + '0'); ! 1784: putchar ((c & 7) + '0'); ! 1785: } ! 1786: else ! 1787: putchar (c); ! 1788: } ! 1789: ! 1790: error (string) ! 1791: char *string; ! 1792: { ! 1793: puts (string); ! 1794: exit (1); ! 1795: } ! 1796: ! 1797: #endif test
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.