|
|
1.1 ! root 1: /* regexp.c */ ! 2: ! 3: /* This file contains the code that compiles regular expressions and executes ! 4: * them. It supports the same syntax and features as vi's regular expression ! 5: * code. Specifically, the meta characters are: ! 6: * ^ matches the beginning of a line ! 7: * $ matches the end of a line ! 8: * \< matches the beginning of a word ! 9: * \> matches the end of a word ! 10: * . matches any single character ! 11: * [] matches any character in a character class ! 12: * \( delimits the start of a subexpression ! 13: * \) delimits the end of a subexpression ! 14: * * repeats the preceding 0 or more times ! 15: * NOTE: You cannot follow a \) with a *. ! 16: * ! 17: * The physical structure of a compiled RE is as follows: ! 18: * - First, there is a one-byte value that says how many character classes ! 19: * are used in this regular expression ! 20: * - Next, each character class is stored as a bitmap that is 256 bits ! 21: * (32 bytes) long. ! 22: * - A mixture of literal characters and compiled meta characters follows. ! 23: * This begins with M_BEGIN(0) and ends with M_END(0). All meta chars ! 24: * are stored as a \n followed by a one-byte code, so they take up two ! 25: * bytes apiece. Literal characters take up one byte apiece. \n can't ! 26: * be used as a literal character. ! 27: * ! 28: * If NO_MAGIC is defined, then a different set of functions is used instead. ! 29: * That right, this file contains TWO versions of the code. ! 30: */ ! 31: ! 32: #include <setjmp.h> ! 33: #include "config.h" ! 34: #include "ctype.h" ! 35: #include "vi.h" ! 36: #include "regexp.h" ! 37: ! 38: ! 39: ! 40: static char *previous; /* the previous regexp, used when null regexp is given */ ! 41: ! 42: ! 43: #ifndef NO_MAGIC ! 44: /* THE REAL REGEXP PACKAGE IS USED UNLESS "NO_MAGIC" IS DEFINED */ ! 45: ! 46: /* These are used to classify or recognize meta-characters */ ! 47: #define META '\0' ! 48: #define BASE_META(m) ((m) - 256) ! 49: #define INT_META(c) ((c) + 256) ! 50: #define IS_META(m) ((m) >= 256) ! 51: #define IS_CLASS(m) ((m) >= M_CLASS(0) && (m) <= M_CLASS(9)) ! 52: #define IS_START(m) ((m) >= M_START(0) && (m) <= M_START(9)) ! 53: #define IS_END(m) ((m) >= M_END(0) && (m) <= M_END(9)) ! 54: #define IS_CLOSURE(m) ((m) >= M_SPLAT && (m) <= M_RANGE) ! 55: #define ADD_META(s,m) (*(s)++ = META, *(s)++ = BASE_META(m)) ! 56: #define GET_META(s) (*(s) == META ? INT_META(*++(s)) : *s) ! 57: ! 58: /* These are the internal codes used for each type of meta-character */ ! 59: #define M_BEGLINE 256 /* internal code for ^ */ ! 60: #define M_ENDLINE 257 /* internal code for $ */ ! 61: #define M_BEGWORD 258 /* internal code for \< */ ! 62: #define M_ENDWORD 259 /* internal code for \> */ ! 63: #define M_ANY 260 /* internal code for . */ ! 64: #define M_SPLAT 261 /* internal code for * */ ! 65: #define M_PLUS 262 /* internal code for \+ */ ! 66: #define M_QMARK 263 /* internal code for \? */ ! 67: #define M_RANGE 264 /* internal code for \{ */ ! 68: #define M_CLASS(n) (265+(n)) /* internal code for [] */ ! 69: #define M_START(n) (275+(n)) /* internal code for \( */ ! 70: #define M_END(n) (285+(n)) /* internal code for \) */ ! 71: ! 72: /* These are used during compilation */ ! 73: static int class_cnt; /* used to assign class IDs */ ! 74: static int start_cnt; /* used to assign start IDs */ ! 75: static int end_stk[NSUBEXP];/* used to assign end IDs */ ! 76: static int end_sp; ! 77: static char *retext; /* points to the text being compiled */ ! 78: ! 79: /* error-handling stuff */ ! 80: jmp_buf errorhandler; ! 81: #define FAIL(why) regerror(why); longjmp(errorhandler, 1) ! 82: ! 83: ! 84: ! 85: ! 86: ! 87: /* This function builds a bitmap for a particular class */ ! 88: static char *makeclass(text, bmap) ! 89: REG char *text; /* start of the class */ ! 90: REG char *bmap; /* the bitmap */ ! 91: { ! 92: REG int i; ! 93: int complement = 0; ! 94: ! 95: ! 96: checkmem(); ! 97: ! 98: /* zero the bitmap */ ! 99: for (i = 0; bmap && i < 32; i++) ! 100: { ! 101: bmap[i] = 0; ! 102: } ! 103: ! 104: /* see if we're going to complement this class */ ! 105: if (*text == '^') ! 106: { ! 107: text++; ! 108: complement = 1; ! 109: } ! 110: ! 111: /* add in the characters */ ! 112: while (*text && *text != ']') ! 113: { ! 114: /* is this a span of characters? */ ! 115: if (text[1] == '-' && text[2]) ! 116: { ! 117: /* spans can't be backwards */ ! 118: if (text[0] > text[2]) ! 119: { ! 120: FAIL("Backwards span in []"); ! 121: } ! 122: ! 123: /* add each character in the span to the bitmap */ ! 124: for (i = UCHAR(text[0]); bmap && i <= UCHAR(text[2]); i++) ! 125: { ! 126: bmap[i >> 3] |= (1 << (i & 7)); ! 127: } ! 128: ! 129: /* move past this span */ ! 130: text += 3; ! 131: } ! 132: else ! 133: { ! 134: /* add this single character to the span */ ! 135: i = *text++; ! 136: if (bmap) ! 137: { ! 138: bmap[UCHAR(i) >> 3] |= (1 << (UCHAR(i) & 7)); ! 139: } ! 140: } ! 141: } ! 142: ! 143: /* make sure the closing ] is missing */ ! 144: if (*text++ != ']') ! 145: { ! 146: FAIL("] missing"); ! 147: } ! 148: ! 149: /* if we're supposed to complement this class, then do so */ ! 150: if (complement && bmap) ! 151: { ! 152: for (i = 0; i < 32; i++) ! 153: { ! 154: bmap[i] = ~bmap[i]; ! 155: } ! 156: } ! 157: ! 158: checkmem(); ! 159: ! 160: return text; ! 161: } ! 162: ! 163: ! 164: ! 165: ! 166: /* This function gets the next character or meta character from a string. ! 167: * The pointer is incremented by 1, or by 2 for \-quoted characters. For [], ! 168: * a bitmap is generated via makeclass() (if re is given), and the ! 169: * character-class text is skipped. ! 170: */ ! 171: static int gettoken(sptr, re) ! 172: char **sptr; ! 173: regexp *re; ! 174: { ! 175: int c; ! 176: ! 177: c = **sptr; ! 178: if (!c) ! 179: { ! 180: return c; ! 181: } ! 182: ++*sptr; ! 183: if (c == '\\') ! 184: { ! 185: c = **sptr; ! 186: ++*sptr; ! 187: switch (c) ! 188: { ! 189: case '<': ! 190: return M_BEGWORD; ! 191: ! 192: case '>': ! 193: return M_ENDWORD; ! 194: ! 195: case '(': ! 196: if (start_cnt >= NSUBEXP) ! 197: { ! 198: FAIL("Too many \\(s"); ! 199: } ! 200: end_stk[end_sp++] = start_cnt; ! 201: return M_START(start_cnt++); ! 202: ! 203: case ')': ! 204: if (end_sp <= 0) ! 205: { ! 206: FAIL("Mismatched \\)"); ! 207: } ! 208: return M_END(end_stk[--end_sp]); ! 209: ! 210: case '*': ! 211: return (*o_magic ? c : M_SPLAT); ! 212: ! 213: case '.': ! 214: return (*o_magic ? c : M_ANY); ! 215: ! 216: case '+': ! 217: return M_PLUS; ! 218: ! 219: case '?': ! 220: return M_QMARK; ! 221: #ifndef CRUNCH ! 222: case '{': ! 223: return M_RANGE; ! 224: #endif ! 225: default: ! 226: return c; ! 227: } ! 228: } ! 229: else if (*o_magic) ! 230: { ! 231: switch (c) ! 232: { ! 233: case '^': ! 234: if (*sptr == retext + 1) ! 235: { ! 236: return M_BEGLINE; ! 237: } ! 238: return c; ! 239: ! 240: case '$': ! 241: if (!**sptr) ! 242: { ! 243: return M_ENDLINE; ! 244: } ! 245: return c; ! 246: ! 247: case '.': ! 248: return M_ANY; ! 249: ! 250: case '*': ! 251: return M_SPLAT; ! 252: ! 253: case '[': ! 254: /* make sure we don't have too many classes */ ! 255: if (class_cnt >= 10) ! 256: { ! 257: FAIL("Too many []s"); ! 258: } ! 259: ! 260: /* process the character list for this class */ ! 261: if (re) ! 262: { ! 263: /* generate the bitmap for this class */ ! 264: *sptr = makeclass(*sptr, re->program + 1 + 32 * class_cnt); ! 265: } ! 266: else ! 267: { ! 268: /* skip to end of the class */ ! 269: *sptr = makeclass(*sptr, (char *)0); ! 270: } ! 271: return M_CLASS(class_cnt++); ! 272: ! 273: default: ! 274: return c; ! 275: } ! 276: } ! 277: else /* unquoted nomagic */ ! 278: { ! 279: switch (c) ! 280: { ! 281: case '^': ! 282: if (*sptr == retext + 1) ! 283: { ! 284: return M_BEGLINE; ! 285: } ! 286: return c; ! 287: ! 288: case '$': ! 289: if (!**sptr) ! 290: { ! 291: return M_ENDLINE; ! 292: } ! 293: return c; ! 294: ! 295: default: ! 296: return c; ! 297: } ! 298: } ! 299: /*NOTREACHED*/ ! 300: } ! 301: ! 302: ! 303: ! 304: ! 305: /* This function calculates the number of bytes that will be needed for a ! 306: * compiled RE. Its argument is the uncompiled version. It is not clever ! 307: * about catching syntax errors; that is done in a later pass. ! 308: */ ! 309: static unsigned calcsize(text) ! 310: char *text; ! 311: { ! 312: unsigned size; ! 313: int token; ! 314: ! 315: retext = text; ! 316: class_cnt = 0; ! 317: start_cnt = 1; ! 318: end_sp = 0; ! 319: size = 5; ! 320: while ((token = gettoken(&text, (regexp *)0)) != 0) ! 321: { ! 322: if (IS_CLASS(token)) ! 323: { ! 324: size += 34; ! 325: } ! 326: #ifndef CRUNCH ! 327: else if (token == M_RANGE) ! 328: { ! 329: size += 4; ! 330: while ((token = gettoken(&text, (regexp *)0)) != 0 ! 331: && token != '}') ! 332: { ! 333: } ! 334: if (!token) ! 335: { ! 336: return size; ! 337: } ! 338: } ! 339: #endif ! 340: else if (IS_META(token)) ! 341: { ! 342: size += 2; ! 343: } ! 344: else ! 345: { ! 346: size++; ! 347: } ! 348: } ! 349: ! 350: return size; ! 351: } ! 352: ! 353: ! 354: ! 355: /* This function compiles a regexp. */ ! 356: regexp *regcomp(exp) ! 357: char *exp; ! 358: { ! 359: int needfirst; ! 360: unsigned size; ! 361: int token; ! 362: int peek; ! 363: char *build; ! 364: regexp *re; ! 365: #ifndef CRUNCH ! 366: int from; ! 367: int to; ! 368: int digit; ! 369: #endif ! 370: #ifdef DEBUG ! 371: int calced; ! 372: #endif ! 373: ! 374: ! 375: checkmem(); ! 376: ! 377: /* prepare for error handling */ ! 378: re = (regexp *)0; ! 379: if (setjmp(errorhandler)) ! 380: { ! 381: checkmem(); ! 382: if (re) ! 383: { ! 384: _free_(re); ! 385: } ! 386: return (regexp *)0; ! 387: } ! 388: ! 389: /* if an empty regexp string was given, use the previous one */ ! 390: if (*exp == 0) ! 391: { ! 392: if (!previous) ! 393: { ! 394: FAIL("No previous RE"); ! 395: } ! 396: exp = previous; ! 397: } ! 398: else /* non-empty regexp given, so remember it */ ! 399: { ! 400: if (previous) ! 401: _free_(previous); ! 402: previous = (char *)malloc((unsigned)(strlen(exp) + 1)); ! 403: if (previous) ! 404: strcpy(previous, exp); ! 405: } ! 406: ! 407: /* allocate memory */ ! 408: checkmem(); ! 409: class_cnt = 0; ! 410: start_cnt = 1; ! 411: end_sp = 0; ! 412: retext = exp; ! 413: #ifdef DEBUG ! 414: calced = calcsize(exp); ! 415: size = calced + sizeof(regexp); ! 416: #else ! 417: size = calcsize(exp) + sizeof(regexp) + 10; /* !!! 10 bytes for slop */ ! 418: #endif ! 419: #ifdef lint ! 420: re = (regexp *)0; ! 421: #else ! 422: re = (regexp *)malloc((unsigned)size); ! 423: #endif ! 424: if (!re) ! 425: { ! 426: FAIL("Not enough memory for this RE"); ! 427: } ! 428: checkmem(); ! 429: ! 430: /* compile it */ ! 431: build = &re->program[1 + 32 * class_cnt]; ! 432: re->program[0] = class_cnt; ! 433: for (token = 0; token < NSUBEXP; token++) ! 434: { ! 435: re->startp[token] = re->endp[token] = (char *)0; ! 436: } ! 437: re->first = 0; ! 438: re->bol = 0; ! 439: re->minlen = 0; ! 440: needfirst = 1; ! 441: class_cnt = 0; ! 442: start_cnt = 1; ! 443: end_sp = 0; ! 444: retext = exp; ! 445: for (token = M_START(0), peek = gettoken(&exp, re); ! 446: token; ! 447: token = peek, peek = gettoken(&exp, re)) ! 448: { ! 449: /* special processing for the closure operator */ ! 450: if (IS_CLOSURE(peek)) ! 451: { ! 452: /* detect misuse of closure operator */ ! 453: if (IS_START(token)) ! 454: { ! 455: FAIL("Closure operator follows nothing"); ! 456: } ! 457: else if (IS_META(token) && token != M_ANY && !IS_CLASS(token)) ! 458: { ! 459: FAIL("Closure operators can only follow a normal character or . or []"); ! 460: } ! 461: ! 462: #ifndef CRUNCH ! 463: /* if \{ \} then read the range */ ! 464: if (peek == M_RANGE) ! 465: { ! 466: from = 0; ! 467: for (digit = gettoken(&exp, re); ! 468: !IS_META(digit) && isdigit(digit); ! 469: digit = gettoken(&exp, re)) ! 470: { ! 471: from = from * 10 + digit - '0'; ! 472: } ! 473: if (digit == '}') ! 474: { ! 475: to = from; ! 476: } ! 477: else if (digit == ',') ! 478: { ! 479: to = 0; ! 480: for (digit = gettoken(&exp, re); ! 481: !IS_META(digit) && isdigit(digit); ! 482: digit = gettoken(&exp, re)) ! 483: { ! 484: to = to * 10 + digit - '0'; ! 485: } ! 486: if (to == 0) ! 487: { ! 488: to = 255; ! 489: } ! 490: } ! 491: if (digit != '}') ! 492: { ! 493: FAIL("Bad characters after \\{"); ! 494: } ! 495: else if (to < from || to == 0 || from >= 255) ! 496: { ! 497: FAIL("Invalid range for \\{ \\}"); ! 498: } ! 499: re->minlen += from; ! 500: } ! 501: else ! 502: #endif ! 503: if (peek != M_SPLAT) ! 504: { ! 505: re->minlen++; ! 506: } ! 507: ! 508: /* it is okay -- make it prefix instead of postfix */ ! 509: ADD_META(build, peek); ! 510: #ifndef CRUNCH ! 511: if (peek == M_RANGE) ! 512: { ! 513: *build++ = from; ! 514: *build++ = (to < 255 ? to : 255); ! 515: } ! 516: #endif ! 517: ! 518: ! 519: /* take care of "needfirst" - is this the first char? */ ! 520: if (needfirst && peek == M_PLUS && !IS_META(token)) ! 521: { ! 522: re->first = token; ! 523: } ! 524: needfirst = 0; ! 525: ! 526: /* we used "peek" -- need to refill it */ ! 527: peek = gettoken(&exp, re); ! 528: if (IS_CLOSURE(peek)) ! 529: { ! 530: FAIL("* or \\+ or \\? doubled up"); ! 531: } ! 532: } ! 533: else if (!IS_META(token)) ! 534: { ! 535: /* normal char is NOT argument of closure */ ! 536: if (needfirst) ! 537: { ! 538: re->first = token; ! 539: needfirst = 0; ! 540: } ! 541: re->minlen++; ! 542: } ! 543: else if (token == M_ANY || IS_CLASS(token)) ! 544: { ! 545: /* . or [] is NOT argument of closure */ ! 546: needfirst = 0; ! 547: re->minlen++; ! 548: } ! 549: ! 550: /* the "token" character is not closure -- process it normally */ ! 551: if (token == M_BEGLINE) ! 552: { ! 553: /* set the BOL flag instead of storing M_BEGLINE */ ! 554: re->bol = 1; ! 555: } ! 556: else if (IS_META(token)) ! 557: { ! 558: ADD_META(build, token); ! 559: } ! 560: else ! 561: { ! 562: *build++ = token; ! 563: } ! 564: } ! 565: checkmem(); ! 566: ! 567: /* end it with a \) which MUST MATCH the opening \( */ ! 568: ADD_META(build, M_END(0)); ! 569: if (end_sp > 0) ! 570: { ! 571: FAIL("Not enough \\)s"); ! 572: } ! 573: ! 574: #ifdef DEBUG ! 575: if ((int)(build - re->program) != calced) ! 576: { ! 577: msg("regcomp error: calced=%d, actual=%d", calced, (int)(build - re->program)); ! 578: getkey(0); ! 579: } ! 580: #endif ! 581: ! 582: checkmem(); ! 583: return re; ! 584: } ! 585: ! 586: ! 587: ! 588: /*---------------------------------------------------------------------------*/ ! 589: ! 590: ! 591: /* This function checks for a match between a character and a token which is ! 592: * known to represent a single character. It returns 0 if they match, or ! 593: * 1 if they don't. ! 594: */ ! 595: int match1(re, ch, token) ! 596: regexp *re; ! 597: REG char ch; ! 598: REG int token; ! 599: { ! 600: if (!ch) ! 601: { ! 602: /* the end of a line can't match any RE of width 1 */ ! 603: return 1; ! 604: } ! 605: if (token == M_ANY) ! 606: { ! 607: return 0; ! 608: } ! 609: else if (IS_CLASS(token)) ! 610: { ! 611: if (re->program[1 + 32 * (token - M_CLASS(0)) + (UCHAR(ch) >> 3)] & (1 << (UCHAR(ch) & 7))) ! 612: return 0; ! 613: } ! 614: else if (ch == token || *o_ignorecase && tolower(ch) == tolower(token)) ! 615: { ! 616: return 0; ! 617: } ! 618: return 1; ! 619: } ! 620: ! 621: ! 622: ! 623: /* This function checks characters up to and including the next closure, at ! 624: * which point it does a recursive call to check the rest of it. This function ! 625: * returns 0 if everything matches, or 1 if something doesn't match. ! 626: */ ! 627: int match(re, str, prog, here) ! 628: regexp *re; /* the regular expression */ ! 629: char *str; /* the string */ ! 630: REG char *prog; /* a portion of re->program, an compiled RE */ ! 631: REG char *here; /* a portion of str, the string to compare it to */ ! 632: { ! 633: REG int token; /* the roken pointed to by prog */ ! 634: REG int nmatched;/* counter, used during closure matching */ ! 635: REG int closure;/* the token denoting the type of closure */ ! 636: int from; /* minimum number of matches in closure */ ! 637: int to; /* maximum number of matches in closure */ ! 638: ! 639: for (token = GET_META(prog); !IS_CLOSURE(token); prog++, token = GET_META(prog)) ! 640: { ! 641: switch (token) ! 642: { ! 643: /*case M_BEGLINE: can't happen; re->bol is used instead */ ! 644: case M_ENDLINE: ! 645: if (*here) ! 646: return 1; ! 647: break; ! 648: ! 649: case M_BEGWORD: ! 650: if (here != str && ! 651: (here[-1] == '_' || isalnum(here[-1]))) ! 652: return 1; ! 653: break; ! 654: ! 655: case M_ENDWORD: ! 656: if (here[0] == '_' || isalnum(here[0])) ! 657: return 1; ! 658: break; ! 659: ! 660: case M_START(0): ! 661: case M_START(1): ! 662: case M_START(2): ! 663: case M_START(3): ! 664: case M_START(4): ! 665: case M_START(5): ! 666: case M_START(6): ! 667: case M_START(7): ! 668: case M_START(8): ! 669: case M_START(9): ! 670: re->startp[token - M_START(0)] = (char *)here; ! 671: break; ! 672: ! 673: case M_END(0): ! 674: case M_END(1): ! 675: case M_END(2): ! 676: case M_END(3): ! 677: case M_END(4): ! 678: case M_END(5): ! 679: case M_END(6): ! 680: case M_END(7): ! 681: case M_END(8): ! 682: case M_END(9): ! 683: re->endp[token - M_END(0)] = (char *)here; ! 684: if (token == M_END(0)) ! 685: { ! 686: return 0; ! 687: } ! 688: break; ! 689: ! 690: default: /* literal, M_CLASS(n), or M_ANY */ ! 691: if (match1(re, *here, token) != 0) ! 692: return 1; ! 693: here++; ! 694: } ! 695: } ! 696: ! 697: /* C L O S U R E */ ! 698: ! 699: /* step 1: see what we have to match against, and move "prog" to point ! 700: * to the remainder of the compiled RE. ! 701: */ ! 702: closure = token; ! 703: prog++; ! 704: switch (closure) ! 705: { ! 706: case M_SPLAT: ! 707: from = 0; ! 708: to = strlen(str); /* infinity */ ! 709: break; ! 710: ! 711: case M_PLUS: ! 712: from = 1; ! 713: to = strlen(str); /* infinity */ ! 714: break; ! 715: ! 716: case M_QMARK: ! 717: from = 0; ! 718: to = 1; ! 719: break; ! 720: ! 721: #ifndef CRUNCH ! 722: case M_RANGE: ! 723: from = UCHAR(*prog++); ! 724: to = UCHAR(*prog++); ! 725: if (to == 255) ! 726: { ! 727: to = strlen(str); /* infinity */ ! 728: } ! 729: break; ! 730: #endif ! 731: } ! 732: token = GET_META(prog); ! 733: prog++; ! 734: ! 735: /* step 2: see how many times we can match that token against the string */ ! 736: for (nmatched = 0; ! 737: nmatched < to && *here && match1(re, *here, token) == 0; ! 738: nmatched++, here++) ! 739: { ! 740: } ! 741: ! 742: /* step 3: try to match the remainder, and back off if it doesn't */ ! 743: while (nmatched >= from && match(re, str, prog, here) != 0) ! 744: { ! 745: nmatched--; ! 746: here--; ! 747: } ! 748: ! 749: /* so how did it work out? */ ! 750: if (nmatched >= from) ! 751: return 0; ! 752: return 1; ! 753: } ! 754: ! 755: ! 756: ! 757: /* This function searches through a string for text that matches an RE. */ ! 758: int regexec(re, str, bol) ! 759: regexp *re; /* the compiled regexp to search for */ ! 760: char *str; /* the string to search through */ ! 761: int bol; /* boolean: does str start at the beginning of a line? */ ! 762: { ! 763: char *prog; /* the entry point of re->program */ ! 764: int len; /* length of the string */ ! 765: REG char *here; ! 766: ! 767: checkmem(); ! 768: ! 769: /* if must start at the beginning of a line, and this isn't, then fail */ ! 770: if (re->bol && !bol) ! 771: { ! 772: return 0; ! 773: } ! 774: ! 775: len = strlen(str); ! 776: prog = re->program + 1 + 32 * re->program[0]; ! 777: ! 778: /* search for the RE in the string */ ! 779: if (re->bol) ! 780: { ! 781: /* must occur at BOL */ ! 782: if ((re->first ! 783: && match1(re, *(char *)str, re->first))/* wrong first letter? */ ! 784: || len < re->minlen /* not long enough? */ ! 785: || match(re, (char *)str, prog, str)) /* doesn't match? */ ! 786: return 0; /* THEN FAIL! */ ! 787: } ! 788: #ifndef CRUNCH ! 789: else if (!*o_ignorecase) ! 790: { ! 791: /* can occur anywhere in the line, noignorecase */ ! 792: for (here = (char *)str; ! 793: (re->first && re->first != *here) ! 794: || match(re, (char *)str, prog, here); ! 795: here++, len--) ! 796: { ! 797: if (len < re->minlen) ! 798: return 0; ! 799: } ! 800: } ! 801: #endif ! 802: else ! 803: { ! 804: /* can occur anywhere in the line, ignorecase */ ! 805: for (here = (char *)str; ! 806: (re->first && match1(re, *here, (int)re->first)) ! 807: || match(re, (char *)str, prog, here); ! 808: here++, len--) ! 809: { ! 810: if (len < re->minlen) ! 811: return 0; ! 812: } ! 813: } ! 814: ! 815: /* if we didn't fail, then we must have succeeded */ ! 816: checkmem(); ! 817: return 1; ! 818: } ! 819: ! 820: /*============================================================================*/ ! 821: #else /* NO_MAGIC */ ! 822: ! 823: regexp *regcomp(exp) ! 824: char *exp; ! 825: { ! 826: char *src; ! 827: char *dest; ! 828: regexp *re; ! 829: int i; ! 830: ! 831: /* allocate a big enough regexp structure */ ! 832: #ifdef lint ! 833: re = (regexp *)0; ! 834: #else ! 835: re = (regexp *)malloc((unsigned)(strlen(exp) + 1 + sizeof(struct regexp))); ! 836: #endif ! 837: if (!re) ! 838: { ! 839: regerror("Could not malloc a regexp structure"); ! 840: return (regexp *)0; ! 841: } ! 842: ! 843: /* initialize all fields of the structure */ ! 844: for (i = 0; i < NSUBEXP; i++) ! 845: { ! 846: re->startp[i] = re->endp[i] = (char *)0; ! 847: } ! 848: re->minlen = 0; ! 849: re->first = 0; ! 850: re->bol = 0; ! 851: ! 852: /* copy the string into it, translating ^ and $ as needed */ ! 853: for (src = exp, dest = re->program + 1; *src; src++) ! 854: { ! 855: switch (*src) ! 856: { ! 857: case '^': ! 858: if (src == exp) ! 859: { ! 860: re->bol += 1; ! 861: } ! 862: else ! 863: { ! 864: *dest++ = '^'; ! 865: re->minlen++; ! 866: } ! 867: break; ! 868: ! 869: case '$': ! 870: if (!src[1]) ! 871: { ! 872: re->bol += 2; ! 873: } ! 874: else ! 875: { ! 876: *dest++ = '$'; ! 877: re->minlen++; ! 878: } ! 879: break; ! 880: ! 881: case '\\': ! 882: if (src[1]) ! 883: { ! 884: *dest++ = *++src; ! 885: re->minlen++; ! 886: } ! 887: else ! 888: { ! 889: regerror("extra \\ at end of regular expression"); ! 890: } ! 891: break; ! 892: ! 893: default: ! 894: *dest++ = *src; ! 895: re->minlen++; ! 896: } ! 897: } ! 898: *dest = '\0'; ! 899: ! 900: return re; ! 901: } ! 902: ! 903: ! 904: /* This "helper" function checks for a match at a given location. It returns ! 905: * 1 if it matches, 0 if it doesn't match here but might match later on in the ! 906: * string, or -1 if it could not possibly match ! 907: */ ! 908: static int reghelp(prog, string, bolflag) ! 909: struct regexp *prog; ! 910: char *string; ! 911: int bolflag; ! 912: { ! 913: char *scan; ! 914: char *str; ! 915: ! 916: /* if ^, then require bolflag */ ! 917: if ((prog->bol & 1) && !bolflag) ! 918: { ! 919: return -1; ! 920: } ! 921: ! 922: /* if it matches, then it will start here */ ! 923: prog->startp[0] = string; ! 924: ! 925: /* compare, possibly ignoring case */ ! 926: if (*o_ignorecase) ! 927: { ! 928: for (scan = &prog->program[1]; *scan; scan++, string++) ! 929: if (tolower(*scan) != tolower(*string)) ! 930: return *string ? 0 : -1; ! 931: } ! 932: else ! 933: { ! 934: for (scan = &prog->program[1]; *scan; scan++, string++) ! 935: if (*scan != *string) ! 936: return *string ? 0 : -1; ! 937: } ! 938: ! 939: /* if $, then require string to end here, too */ ! 940: if ((prog->bol & 2) && *string) ! 941: { ! 942: return 0; ! 943: } ! 944: ! 945: /* if we get to here, it matches */ ! 946: prog->endp[0] = string; ! 947: return 1; ! 948: } ! 949: ! 950: ! 951: ! 952: int regexec(prog, string, bolflag) ! 953: struct regexp *prog; ! 954: char *string; ! 955: int bolflag; ! 956: { ! 957: int rc; ! 958: ! 959: /* keep trying to match it */ ! 960: for (rc = reghelp(prog, string, bolflag); rc == 0; rc = reghelp(prog, string, 0)) ! 961: { ! 962: string++; ! 963: } ! 964: ! 965: /* did we match? */ ! 966: return rc == 1; ! 967: } ! 968: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.