|
|
1.1 ! root 1: #include <ctype.h> ! 2: ! 3: typedef int boolean; ! 4: #define TRUE 1 ! 5: #define FALSE 0 ! 6: #define NIL 0 ! 7: ! 8: boolean l_onecase; /* true if upper and lower equivalent */ ! 9: ! 10: #define makelower(c) (isupper((c)) ? tolower((c)) : (c)) ! 11: ! 12: /* STRNCMP - like strncmp except that we convert the ! 13: * first string to lower case before comparing ! 14: * if l_onecase is set. ! 15: */ ! 16: ! 17: STRNCMP(s1, s2, len) ! 18: register char *s1,*s2; ! 19: register int len; ! 20: { ! 21: if (l_onecase) { ! 22: do ! 23: if (*s2 - makelower(*s1)) ! 24: return (*s2 - makelower(*s1)); ! 25: else { ! 26: s2++; ! 27: s1++; ! 28: } ! 29: while (--len); ! 30: } else { ! 31: do ! 32: if (*s2 - *s1) ! 33: return (*s2 - *s1); ! 34: else { ! 35: s2++; ! 36: s1++; ! 37: } ! 38: while (--len); ! 39: } ! 40: return(0); ! 41: } ! 42: ! 43: /* The following routine converts an irregular expression to ! 44: * internal format. ! 45: * ! 46: * Either meta symbols (\a \d or \p) or character strings or ! 47: * operations ( alternation or perenthesizing ) can be ! 48: * specified. Each starts with a descriptor byte. The descriptor ! 49: * byte has STR set for strings, META set for meta symbols ! 50: * and OPER set for operations. ! 51: * The descriptor byte can also have the OPT bit set if the object ! 52: * defined is optional. Also ALT can be set to indicate an alternation. ! 53: * ! 54: * For metasymbols the byte following the descriptor byte identities ! 55: * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For ! 56: * strings the byte after the descriptor is a character count for ! 57: * the string: ! 58: * ! 59: * meta symbols := descriptor ! 60: * symbol ! 61: * ! 62: * strings := descriptor ! 63: * character count ! 64: * the string ! 65: * ! 66: * operatins := descriptor ! 67: * symbol ! 68: * character count ! 69: */ ! 70: ! 71: /* ! 72: * handy macros for accessing parts of match blocks ! 73: */ ! 74: #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */ ! 75: #define MNEXT(A) (A+2) /* character following a metasymbol block */ ! 76: ! 77: #define OSYM(A) (*(A+1)) /* symbol in an operation block */ ! 78: #define OCNT(A) (*(A+2)) /* character count */ ! 79: #define ONEXT(A) (A+3) /* next character after the operation */ ! 80: #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */ ! 81: ! 82: #define SCNT(A) (*(A+1)) /* byte count of a string */ ! 83: #define SSTR(A) (A+2) /* address of the string */ ! 84: #define SNEXT(A) (A+2+*(A+1)) /* character following the string */ ! 85: ! 86: /* ! 87: * bit flags in the descriptor ! 88: */ ! 89: #define OPT 1 ! 90: #define STR 2 ! 91: #define META 4 ! 92: #define ALT 8 ! 93: #define OPER 16 ! 94: ! 95: char *ure; /* pointer current position in unconverted exp */ ! 96: char *ccre; /* pointer to current position in converted exp*/ ! 97: char *malloc(); ! 98: ! 99: char * ! 100: convexp(re) ! 101: char *re; /* unconverted irregular expression */ ! 102: { ! 103: register char *cre; /* pointer to converted regular expression */ ! 104: ! 105: /* allocate room for the converted expression */ ! 106: if (re == NIL) ! 107: return (NIL); ! 108: if (*re == '\0') ! 109: return (NIL); ! 110: cre = malloc (4 * strlen(re) + 3); ! 111: ccre = cre; ! 112: ure = re; ! 113: ! 114: /* start the conversion with a \a */ ! 115: *cre = META | OPT; ! 116: MSYM(cre) = 'a'; ! 117: ccre = MNEXT(cre); ! 118: ! 119: /* start the conversion (its recursive) */ ! 120: expconv (); ! 121: *ccre = 0; ! 122: return (cre); ! 123: } ! 124: ! 125: expconv() ! 126: { ! 127: register char *cs; /* pointer to current symbol in converted exp */ ! 128: register char c; /* character being processed */ ! 129: register char *acs; /* pinter to last alternate */ ! 130: register int temp; ! 131: ! 132: /* let the conversion begin */ ! 133: acs = NIL; ! 134: while (*ure != NIL) { ! 135: switch (c = *ure++) { ! 136: ! 137: case '\\': ! 138: switch (c = *ure++) { ! 139: ! 140: /* escaped characters are just characters */ ! 141: default: ! 142: if ((*cs & STR) == 0) { ! 143: cs = ccre; ! 144: *cs = STR; ! 145: SCNT(cs) = 1; ! 146: ccre += 2; ! 147: } else ! 148: SCNT(cs)++; ! 149: *ccre++ = c; ! 150: break; ! 151: ! 152: /* normal(?) metacharacters */ ! 153: case 'a': ! 154: case 'd': ! 155: case 'e': ! 156: case 'p': ! 157: if (acs != NIL && acs != cs) { ! 158: do { ! 159: temp = OCNT(acs); ! 160: OCNT(acs) = ccre - acs; ! 161: acs -= temp; ! 162: } while (temp != 0); ! 163: acs = NIL; ! 164: } ! 165: cs = ccre; ! 166: *cs = META; ! 167: MSYM(cs) = c; ! 168: ccre = MNEXT(cs); ! 169: break; ! 170: } ! 171: break; ! 172: ! 173: /* just put the symbol in */ ! 174: case '^': ! 175: case '$': ! 176: if (acs != NIL && acs != cs) { ! 177: do { ! 178: temp = OCNT(acs); ! 179: OCNT(acs) = ccre - acs; ! 180: acs -= temp; ! 181: } while (temp != 0); ! 182: acs = NIL; ! 183: } ! 184: cs = ccre; ! 185: *cs = META; ! 186: MSYM(cs) = c; ! 187: ccre = MNEXT(cs); ! 188: break; ! 189: ! 190: /* mark the last match sequence as optional */ ! 191: case '?': ! 192: *cs = *cs | OPT; ! 193: break; ! 194: ! 195: /* recurse and define a subexpression */ ! 196: case '(': ! 197: if (acs != NIL && acs != cs) { ! 198: do { ! 199: temp = OCNT(acs); ! 200: OCNT(acs) = ccre - acs; ! 201: acs -= temp; ! 202: } while (temp != 0); ! 203: acs = NIL; ! 204: } ! 205: cs = ccre; ! 206: *cs = OPER; ! 207: OSYM(cs) = '('; ! 208: ccre = ONEXT(cs); ! 209: expconv (); ! 210: OCNT(cs) = ccre - cs; /* offset to next symbol */ ! 211: break; ! 212: ! 213: /* return from a recursion */ ! 214: case ')': ! 215: if (acs != NIL) { ! 216: do { ! 217: temp = OCNT(acs); ! 218: OCNT(acs) = ccre - acs; ! 219: acs -= temp; ! 220: } while (temp != 0); ! 221: acs = NIL; ! 222: } ! 223: cs = ccre; ! 224: *cs = META; ! 225: MSYM(cs) = c; ! 226: ccre = MNEXT(cs); ! 227: return; ! 228: ! 229: /* mark the last match sequence as having an alternate */ ! 230: /* the third byte will contain an offset to jump over the */ ! 231: /* alternate match in case the first did not fail */ ! 232: case '|': ! 233: if (acs != NIL && acs != cs) ! 234: OCNT(ccre) = ccre - acs; /* make a back pointer */ ! 235: else ! 236: OCNT(ccre) = 0; ! 237: *cs |= ALT; ! 238: cs = ccre; ! 239: *cs = OPER; ! 240: OSYM(cs) = '|'; ! 241: ccre = ONEXT(cs); ! 242: acs = cs; /* remember that the pointer is to be filles */ ! 243: break; ! 244: ! 245: /* if its not a metasymbol just build a scharacter string */ ! 246: default: ! 247: if ((*cs & STR) == 0) { ! 248: cs = ccre; ! 249: *cs = STR; ! 250: SCNT(cs) = 1; ! 251: ccre = SSTR(cs); ! 252: } else ! 253: SCNT(cs)++; ! 254: *ccre++ = c; ! 255: break; ! 256: } ! 257: } ! 258: if (acs != NIL) { ! 259: do { ! 260: temp = OCNT(acs); ! 261: OCNT(acs) = ccre - acs; ! 262: acs -= temp; ! 263: } while (temp != 0); ! 264: acs = NIL; ! 265: } ! 266: return; ! 267: } ! 268: /* end of convertre */ ! 269: ! 270: ! 271: /* ! 272: * The following routine recognises an irregular expresion ! 273: * with the following special characters: ! 274: * ! 275: * \? - means last match was optional ! 276: * \a - matches any number of characters ! 277: * \d - matches any number of spaces and tabs ! 278: * \p - matches any number of alphanumeric ! 279: * characters. The ! 280: * characters matched will be copied into ! 281: * the area pointed to by 'name'. ! 282: * \| - alternation ! 283: * \( \) - grouping used mostly for alternation and ! 284: * optionality ! 285: * ! 286: * The irregular expression must be translated to internal form ! 287: * prior to calling this routine ! 288: * ! 289: * The value returned is the pointer to the first non \a ! 290: * character matched. ! 291: */ ! 292: ! 293: boolean _escaped; /* true if we are currently _escaped */ ! 294: char *_start; /* start of string */ ! 295: ! 296: char * ! 297: expmatch (s, re, mstring) ! 298: register char *s; /* string to check for a match in */ ! 299: register char *re; /* a converted irregular expression */ ! 300: register char *mstring; /* where to put whatever matches a \p */ ! 301: { ! 302: register char *cs; /* the current symbol */ ! 303: register char *ptr,*s1; /* temporary pointer */ ! 304: boolean matched; /* a temporary boolean */ ! 305: ! 306: /* initial conditions */ ! 307: if (re == NIL) ! 308: return (NIL); ! 309: cs = re; ! 310: matched = FALSE; ! 311: ! 312: /* loop till expression string is exhausted (or at least pretty tired) */ ! 313: while (*cs) { ! 314: switch (*cs & (OPER | STR | META)) { ! 315: ! 316: /* try to match a string */ ! 317: case STR: ! 318: matched = !STRNCMP (s, SSTR(cs), SCNT(cs)); ! 319: if (matched) { ! 320: ! 321: /* hoorah it matches */ ! 322: s += SCNT(cs); ! 323: cs = SNEXT(cs); ! 324: } else if (*cs & ALT) { ! 325: ! 326: /* alternation, skip to next expression */ ! 327: cs = SNEXT(cs); ! 328: } else if (*cs & OPT) { ! 329: ! 330: /* the match is optional */ ! 331: cs = SNEXT(cs); ! 332: matched = 1; /* indicate a successful match */ ! 333: } else { ! 334: ! 335: /* no match, error return */ ! 336: return (NIL); ! 337: } ! 338: break; ! 339: ! 340: /* an operator, do something fancy */ ! 341: case OPER: ! 342: switch (OSYM(cs)) { ! 343: ! 344: /* this is an alternation */ ! 345: case '|': ! 346: if (matched) ! 347: ! 348: /* last thing in the alternation was a match, skip ahead */ ! 349: cs = OPTR(cs); ! 350: else ! 351: ! 352: /* no match, keep trying */ ! 353: cs = ONEXT(cs); ! 354: break; ! 355: ! 356: /* this is a grouping, recurse */ ! 357: case '(': ! 358: ptr = expmatch (s, ONEXT(cs), mstring); ! 359: if (ptr != NIL) { ! 360: ! 361: /* the subexpression matched */ ! 362: matched = 1; ! 363: s = ptr; ! 364: } else if (*cs & ALT) { ! 365: ! 366: /* alternation, skip to next expression */ ! 367: matched = 0; ! 368: } else if (*cs & OPT) { ! 369: ! 370: /* the match is optional */ ! 371: matched = 1; /* indicate a successful match */ ! 372: } else { ! 373: ! 374: /* no match, error return */ ! 375: return (NIL); ! 376: } ! 377: cs = OPTR(cs); ! 378: break; ! 379: } ! 380: break; ! 381: ! 382: /* try to match a metasymbol */ ! 383: case META: ! 384: switch (MSYM(cs)) { ! 385: ! 386: /* try to match anything and remember what was matched */ ! 387: case 'p': ! 388: /* ! 389: * This is really the same as trying the match the ! 390: * remaining parts of the expression to any subset ! 391: * of the string. ! 392: */ ! 393: s1 = s; ! 394: do { ! 395: ptr = expmatch (s1, MNEXT(cs), mstring); ! 396: if (ptr != NIL && s1 != s) { ! 397: ! 398: /* we have a match, remember the match */ ! 399: strncpy (mstring, s, s1 - s); ! 400: mstring[s1 - s] = '\0'; ! 401: return (ptr); ! 402: } else if (ptr != NIL && (*cs & OPT)) { ! 403: ! 404: /* it was aoptional so no match is ok */ ! 405: return (ptr); ! 406: } else if (ptr != NIL) { ! 407: ! 408: /* not optional and we still matched */ ! 409: return (NIL); ! 410: } ! 411: if (!isalnum(*s1) && *s1 != '_') ! 412: return (NIL); ! 413: if (*s1 == '\\') ! 414: _escaped = _escaped ? FALSE : TRUE; ! 415: else ! 416: _escaped = FALSE; ! 417: } while (*s1++); ! 418: return (NIL); ! 419: ! 420: /* try to match anything */ ! 421: case 'a': ! 422: /* ! 423: * This is really the same as trying the match the ! 424: * remaining parts of the expression to any subset ! 425: * of the string. ! 426: */ ! 427: s1 = s; ! 428: do { ! 429: ptr = expmatch (s1, MNEXT(cs), mstring); ! 430: if (ptr != NIL && s1 != s) { ! 431: ! 432: /* we have a match */ ! 433: return (ptr); ! 434: } else if (ptr != NIL && (*cs & OPT)) { ! 435: ! 436: /* it was aoptional so no match is ok */ ! 437: return (ptr); ! 438: } else if (ptr != NIL) { ! 439: ! 440: /* not optional and we still matched */ ! 441: return (NIL); ! 442: } ! 443: if (*s1 == '\\') ! 444: _escaped = _escaped ? FALSE : TRUE; ! 445: else ! 446: _escaped = FALSE; ! 447: } while (*s1++); ! 448: return (NIL); ! 449: ! 450: /* fail if we are currently _escaped */ ! 451: case 'e': ! 452: if (_escaped) ! 453: return(NIL); ! 454: cs = MNEXT(cs); ! 455: break; ! 456: ! 457: /* match any number of tabs and spaces */ ! 458: case 'd': ! 459: ptr = s; ! 460: while (*s == ' ' || *s == '\t') ! 461: s++; ! 462: if (s != ptr || s == _start) { ! 463: ! 464: /* match, be happy */ ! 465: matched = 1; ! 466: cs = MNEXT(cs); ! 467: } else if (*s == '\n' || *s == '\0') { ! 468: ! 469: /* match, be happy */ ! 470: matched = 1; ! 471: cs = MNEXT(cs); ! 472: } else if (*cs & ALT) { ! 473: ! 474: /* try the next part */ ! 475: matched = 0; ! 476: cs = MNEXT(cs); ! 477: } else if (*cs & OPT) { ! 478: ! 479: /* doesn't matter */ ! 480: matched = 1; ! 481: cs = MNEXT(cs); ! 482: } else ! 483: ! 484: /* no match, error return */ ! 485: return (NIL); ! 486: break; ! 487: ! 488: /* check for end of line */ ! 489: case '$': ! 490: if (*s == '\0' || *s == '\n') { ! 491: ! 492: /* match, be happy */ ! 493: s++; ! 494: matched = 1; ! 495: cs = MNEXT(cs); ! 496: } else if (*cs & ALT) { ! 497: ! 498: /* try the next part */ ! 499: matched = 0; ! 500: cs = MNEXT(cs); ! 501: } else if (*cs & OPT) { ! 502: ! 503: /* doesn't matter */ ! 504: matched = 1; ! 505: cs = MNEXT(cs); ! 506: } else ! 507: ! 508: /* no match, error return */ ! 509: return (NIL); ! 510: break; ! 511: ! 512: /* check for start of line */ ! 513: case '^': ! 514: if (s == _start) { ! 515: ! 516: /* match, be happy */ ! 517: matched = 1; ! 518: cs = MNEXT(cs); ! 519: } else if (*cs & ALT) { ! 520: ! 521: /* try the next part */ ! 522: matched = 0; ! 523: cs = MNEXT(cs); ! 524: } else if (*cs & OPT) { ! 525: ! 526: /* doesn't matter */ ! 527: matched = 1; ! 528: cs = MNEXT(cs); ! 529: } else ! 530: ! 531: /* no match, error return */ ! 532: return (NIL); ! 533: break; ! 534: ! 535: /* end of a subexpression, return success */ ! 536: case ')': ! 537: return (s); ! 538: } ! 539: break; ! 540: } ! 541: } ! 542: return (s); ! 543: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.