|
|
1.1 ! root 1: /* ! 2: Copyright (c) 1989 AT&T ! 3: All Rights Reserved ! 4: ! 5: THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T. ! 6: ! 7: The copyright notice above does not evidence any ! 8: actual or intended publication of such source code. ! 9: */ ! 10: ! 11: #define DEBUG ! 12: ! 13: #include <ctype.h> ! 14: #include <stdio.h> ! 15: #include <string.h> ! 16: #include <stdlib.h> ! 17: #include "awk.h" ! 18: #include "y.tab.h" ! 19: ! 20: #define HAT (NCHARS-1) /* matches ^ in regular expr */ ! 21: /* NCHARS is 2**n */ ! 22: #define MAXLIN 512 ! 23: ! 24: #define type(v) (v)->nobj ! 25: #define left(v) (v)->narg[0] ! 26: #define right(v) (v)->narg[1] ! 27: #define parent(v) (v)->nnext ! 28: ! 29: #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: ! 30: #define UNARY case STAR: case PLUS: case QUEST: ! 31: ! 32: /* encoding in tree Nodes: ! 33: leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL): ! 34: left is index, right contains value or pointer to value ! 35: unary (STAR, PLUS, QUEST): left is child, right is null ! 36: binary (CAT, OR): left and right are children ! 37: parent contains pointer to parent ! 38: */ ! 39: ! 40: ! 41: uchar chars[MAXLIN]; ! 42: int setvec[MAXLIN]; ! 43: int tmpset[MAXLIN]; ! 44: Node *point[MAXLIN]; ! 45: ! 46: int rtok; /* next token in current re */ ! 47: int rlxval; ! 48: uchar *rlxstr; ! 49: uchar *prestr; /* current position in current re */ ! 50: uchar *lastre; /* origin of last re */ ! 51: ! 52: static int setcnt; ! 53: static int poscnt; ! 54: ! 55: uchar *patbeg; ! 56: int patlen; ! 57: ! 58: #define NFA 20 /* cache this many dynamic fa's */ ! 59: fa *fatab[NFA]; ! 60: int nfatab = 0; /* entries in fatab */ ! 61: ! 62: fa *makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */ ! 63: { ! 64: int i, use, nuse; ! 65: fa *pfa; ! 66: ! 67: if (compile_time) /* a constant for sure */ ! 68: return mkdfa(s, anchor); ! 69: for (i = 0; i < nfatab; i++) /* is it there already? */ ! 70: if (fatab[i]->anchor == anchor && strcmp(fatab[i]->restr,s) == 0) { ! 71: fatab[i]->use++; ! 72: return fatab[i]; ! 73: } ! 74: pfa = mkdfa(s, anchor); ! 75: if (nfatab < NFA) { /* room for another */ ! 76: fatab[nfatab] = pfa; ! 77: fatab[nfatab]->use = 1; ! 78: nfatab++; ! 79: return pfa; ! 80: } ! 81: use = fatab[0]->use; /* replace least-recently used */ ! 82: nuse = 0; ! 83: for (i = 1; i < nfatab; i++) ! 84: if (fatab[i]->use < use) { ! 85: use = fatab[i]->use; ! 86: nuse = i; ! 87: } ! 88: freefa(fatab[nuse]); ! 89: fatab[nuse] = pfa; ! 90: pfa->use = 1; ! 91: return pfa; ! 92: } ! 93: ! 94: fa *mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */ ! 95: /* anchor = 1 for anchored matches, else 0 */ ! 96: { ! 97: Node *p, *p1; ! 98: fa *f; ! 99: ! 100: p = reparse(s); ! 101: p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); ! 102: /* put ALL STAR in front of reg. exp. */ ! 103: p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); ! 104: /* put FINAL after reg. exp. */ ! 105: ! 106: poscnt = 0; ! 107: penter(p1); /* enter parent pointers and leaf indices */ ! 108: if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) ! 109: overflo("out of space for fa"); ! 110: f->accept = poscnt-1; /* penter has computed number of positions in re */ ! 111: cfoll(f, p1); /* set up follow sets */ ! 112: freetr(p1); ! 113: if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL) ! 114: overflo("out of space in makedfa"); ! 115: if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) ! 116: overflo("out of space in makedfa"); ! 117: *f->posns[1] = 0; ! 118: f->initstat = makeinit(f, anchor); ! 119: f->anchor = anchor; ! 120: f->restr = tostring(s); ! 121: return f; ! 122: } ! 123: ! 124: int makeinit(fa *f, int anchor) ! 125: { ! 126: register int i, k; ! 127: ! 128: f->curstat = 2; ! 129: f->out[2] = 0; ! 130: f->reset = 0; ! 131: k = *(f->re[0].lfollow); ! 132: xfree(f->posns[2]); ! 133: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) ! 134: overflo("out of space in makeinit"); ! 135: for (i=0; i<=k; i++) { ! 136: (f->posns[2])[i] = (f->re[0].lfollow)[i]; ! 137: } ! 138: if ((f->posns[2])[1] == f->accept) ! 139: f->out[2] = 1; ! 140: for (i=0; i<NCHARS; i++) ! 141: f->gototab[2][i] = 0; ! 142: f->curstat = cgoto(f, 2, HAT); ! 143: if (anchor) { ! 144: *f->posns[2] = k-1; /* leave out position 0 */ ! 145: for (i=0; i<k; i++) { ! 146: (f->posns[0])[i] = (f->posns[2])[i]; ! 147: } ! 148: ! 149: f->out[0] = f->out[2]; ! 150: if (f->curstat != 2) ! 151: --(*f->posns[f->curstat]); ! 152: } ! 153: return f->curstat; ! 154: } ! 155: ! 156: void penter(Node *p) /* set up parent pointers and leaf indices */ ! 157: { ! 158: switch(type(p)) { ! 159: LEAF ! 160: left(p) = (Node *) poscnt; ! 161: if (poscnt >= MAXLIN) ! 162: overflo("leaf index overflow in penter"); ! 163: point[poscnt++] = p; ! 164: break; ! 165: UNARY ! 166: penter(left(p)); ! 167: parent(left(p)) = p; ! 168: break; ! 169: case CAT: ! 170: case OR: ! 171: penter(left(p)); ! 172: penter(right(p)); ! 173: parent(left(p)) = p; ! 174: parent(right(p)) = p; ! 175: break; ! 176: default: /* can't happen */ ! 177: ERROR "unknown type %d in penter", type(p) FATAL; ! 178: break; ! 179: } ! 180: } ! 181: ! 182: void freetr(Node *p) /* free parse tree */ ! 183: { ! 184: switch (type(p)) { ! 185: LEAF ! 186: xfree(p); ! 187: break; ! 188: UNARY ! 189: freetr(left(p)); ! 190: xfree(p); ! 191: break; ! 192: case CAT: ! 193: case OR: ! 194: freetr(left(p)); ! 195: freetr(right(p)); ! 196: xfree(p); ! 197: break; ! 198: default: /* can't happen */ ! 199: ERROR "unknown type %d in freetr", type(p) FATAL; ! 200: break; ! 201: } ! 202: } ! 203: ! 204: /* in the parsing of regular expressions, metacharacters like . have */ ! 205: /* to be seen literally; \056 is not a metacharacter. */ ! 206: ! 207: hexstr(char **pp) /* find and eval hex string at pp, return new p */ ! 208: { ! 209: char *p; ! 210: int n = 0; ! 211: ! 212: for (p = *pp; isxdigit(*p); p++) { ! 213: if (isdigit(*p)) ! 214: n = 16 * n + *p - '0'; ! 215: else if (*p >= 'a' && *p <= 'f') ! 216: n = 16 * n + *p - 'a' + 10; ! 217: else if (*p >= 'A' && *p <= 'F') ! 218: n = 16 * n + *p - 'A' + 10; ! 219: } ! 220: *pp = p; ! 221: return n; ! 222: } ! 223: ! 224: #define isoctdigit(c) ((c) >= '0' && (c) <= '8') /* multiple use of arg */ ! 225: ! 226: quoted(char **pp) /* pick up next thing after a \\ */ ! 227: /* and increment *pp */ ! 228: { ! 229: char *p = *pp; ! 230: int c; ! 231: ! 232: if ((c = *p++) == 't') ! 233: c = '\t'; ! 234: else if (c == 'n') ! 235: c = '\n'; ! 236: else if (c == 'f') ! 237: c = '\f'; ! 238: else if (c == 'r') ! 239: c = '\r'; ! 240: else if (c == 'b') ! 241: c = '\b'; ! 242: else if (c == '\\') ! 243: c = '\\'; ! 244: else if (c == 'x') { /* hexadecimal goo follows */ ! 245: c = hexstr(&p); ! 246: } else if (isoctdigit(c)) { /* \d \dd \ddd */ ! 247: int n = c - '0'; ! 248: if (isoctdigit(*p)) { ! 249: n = 8 * n + *p++ - '0'; ! 250: if (isoctdigit(*p)) ! 251: n = 8 * n + *p++ - '0'; ! 252: } ! 253: c = n; ! 254: } /* else */ ! 255: /* c = c; */ ! 256: *pp = p; ! 257: return c; ! 258: } ! 259: ! 260: uchar *cclenter(uchar *p) ! 261: { ! 262: register int i, c, c2; ! 263: uchar *op; ! 264: ! 265: op = p; ! 266: i = 0; ! 267: while ((c = *p++) != 0) { ! 268: if (c == '\\') { ! 269: c = quoted(&p); ! 270: } else if (c == '-' && i > 0 && chars[i-1] != 0) { ! 271: if (*p != 0) { ! 272: c = chars[i-1]; ! 273: c2 = *p++; ! 274: if (c2 == '\\') ! 275: c2 = quoted(&p); ! 276: while (c < c2) { ! 277: if (i >= MAXLIN-1) ! 278: overflo("character class too big"); ! 279: chars[i++] = ++c; ! 280: } ! 281: continue; ! 282: } ! 283: } ! 284: if (i >= MAXLIN-1) ! 285: overflo("character class too big"); ! 286: chars[i++] = c; ! 287: } ! 288: chars[i++] = '\0'; ! 289: dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, chars) ); ! 290: xfree(op); ! 291: return(tostring(chars)); ! 292: } ! 293: ! 294: void overflo(uchar *s) ! 295: { ! 296: ERROR "regular expression too big: %s", s FATAL; ! 297: } ! 298: ! 299: void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ ! 300: { ! 301: register int i; ! 302: register int *p; ! 303: ! 304: switch(type(v)) { ! 305: LEAF ! 306: f->re[(int) left(v)].ltype = type(v); ! 307: f->re[(int) left(v)].lval = (long) right(v); /* assumes ptr & long fit */ ! 308: for (i=0; i<=f->accept; i++) ! 309: setvec[i] = 0; ! 310: setcnt = 0; ! 311: follow(v); /* computes setvec and setcnt */ ! 312: if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) ! 313: overflo("out of space building follow set"); ! 314: f->re[(int) left(v)].lfollow = p; ! 315: *p = setcnt; ! 316: for (i = f->accept; i >= 0; i--) ! 317: if (setvec[i] == 1) *++p = i; ! 318: break; ! 319: UNARY ! 320: cfoll(f,left(v)); ! 321: break; ! 322: case CAT: ! 323: case OR: ! 324: cfoll(f,left(v)); ! 325: cfoll(f,right(v)); ! 326: break; ! 327: default: /* can't happen */ ! 328: ERROR "unknown type %d in cfoll", type(v) FATAL; ! 329: } ! 330: } ! 331: ! 332: first(Node *p) /* collects initially active leaves of p into setvec */ ! 333: /* returns 0 or 1 depending on whether p matches empty string */ ! 334: { ! 335: register int b; ! 336: ! 337: switch(type(p)) { ! 338: LEAF ! 339: if (setvec[(int) left(p)] != 1) { ! 340: setvec[(int) left(p)] = 1; ! 341: setcnt++; ! 342: } ! 343: if (type(p) == CCL && (*(uchar *) right(p)) == '\0') ! 344: return(0); /* empty CCL */ ! 345: else return(1); ! 346: case PLUS: ! 347: if (first(left(p)) == 0) return(0); ! 348: return(1); ! 349: case STAR: ! 350: case QUEST: ! 351: first(left(p)); ! 352: return(0); ! 353: case CAT: ! 354: if (first(left(p)) == 0 && first(right(p)) == 0) return(0); ! 355: return(1); ! 356: case OR: ! 357: b = first(right(p)); ! 358: if (first(left(p)) == 0 || b == 0) return(0); ! 359: return(1); ! 360: } ! 361: ERROR "unknown type %d in first", type(p) FATAL; /* can't happen */ ! 362: return(-1); ! 363: } ! 364: ! 365: void follow(Node *v) /* collects leaves that can follow v into setvec */ ! 366: { ! 367: Node *p; ! 368: ! 369: if (type(v) == FINAL) ! 370: return; ! 371: p = parent(v); ! 372: switch (type(p)) { ! 373: case STAR: ! 374: case PLUS: ! 375: first(v); ! 376: follow(p); ! 377: return; ! 378: ! 379: case OR: ! 380: case QUEST: ! 381: follow(p); ! 382: return; ! 383: ! 384: case CAT: ! 385: if (v == left(p)) { /* v is left child of p */ ! 386: if (first(right(p)) == 0) { ! 387: follow(p); ! 388: return; ! 389: } ! 390: } ! 391: else /* v is right child */ ! 392: follow(p); ! 393: return; ! 394: } ! 395: } ! 396: ! 397: member(int c, uchar *s) /* is c in s? */ ! 398: { ! 399: while (*s) ! 400: if (c == *s++) ! 401: return(1); ! 402: return(0); ! 403: } ! 404: ! 405: ! 406: match(fa *f, uchar *p) ! 407: { ! 408: register int s, ns; ! 409: ! 410: s = f->reset?makeinit(f,0):f->initstat; ! 411: if (f->out[s]) ! 412: return(1); ! 413: do { ! 414: if (ns=f->gototab[s][*p]) ! 415: s=ns; ! 416: else ! 417: s=cgoto(f,s,*p); ! 418: if (f->out[s]) ! 419: return(1); ! 420: } while (*p++ != 0); ! 421: return(0); ! 422: } ! 423: ! 424: pmatch(fa *f, uchar *p) ! 425: { ! 426: register int s, ns; ! 427: register uchar *q; ! 428: int i, k; ! 429: ! 430: s = f->reset?makeinit(f,1):f->initstat; ! 431: patbeg = p; ! 432: patlen = -1; ! 433: do { ! 434: q = p; ! 435: do { ! 436: if (f->out[s]) /* final state */ ! 437: patlen = q-p; ! 438: if (ns=f->gototab[s][*q]) ! 439: s=ns; ! 440: else ! 441: s=cgoto(f,s,*q); ! 442: if (s==1) /* no transition */ ! 443: if (patlen >= 0) { ! 444: patbeg = p; ! 445: return(1); ! 446: } ! 447: else ! 448: goto nextin; /* no match */ ! 449: } while (*q++ != 0); ! 450: if (f->out[s]) ! 451: patlen = q-p-1; /* don't count $ */ ! 452: if (patlen >= 0) { ! 453: patbeg = p; ! 454: return(1); ! 455: } ! 456: nextin: ! 457: s = 2; ! 458: if (f->reset) { ! 459: for (i=2; i<=f->curstat; i++) ! 460: xfree(f->posns[i]); ! 461: k = *f->posns[0]; ! 462: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) ! 463: overflo("out of space in pmatch"); ! 464: for (i=0; i<=k; i++) ! 465: (f->posns[2])[i] = (f->posns[0])[i]; ! 466: f->initstat = f->curstat = 2; ! 467: f->out[2] = f->out[0]; ! 468: for (i=0; i<NCHARS; i++) ! 469: f->gototab[2][i] = 0; ! 470: } ! 471: } while (*p++ != 0); ! 472: return (0); ! 473: } ! 474: ! 475: nematch(fa *f, uchar *p) ! 476: { ! 477: register int s, ns; ! 478: register uchar *q; ! 479: int i, k; ! 480: ! 481: s = f->reset?makeinit(f,1):f->initstat; ! 482: patlen = -1; ! 483: while (*p) { ! 484: q = p; ! 485: do { ! 486: if (f->out[s]) /* final state */ ! 487: patlen = q-p; ! 488: if (ns=f->gototab[s][*q]) ! 489: s=ns; ! 490: else ! 491: s=cgoto(f,s,*q); ! 492: if (s==1) /* no transition */ ! 493: if (patlen > 0) { ! 494: patbeg = p; ! 495: return(1); ! 496: } ! 497: else ! 498: goto nnextin; /* no nonempty match */ ! 499: } while (*q++ != 0); ! 500: if (f->out[s]) ! 501: patlen = q-p-1; /* don't count $ */ ! 502: if (patlen > 0 ) { ! 503: patbeg = p; ! 504: return(1); ! 505: } ! 506: nnextin: ! 507: s = 2; ! 508: if (f->reset) { ! 509: for (i=2; i<=f->curstat; i++) ! 510: xfree(f->posns[i]); ! 511: k = *f->posns[0]; ! 512: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) ! 513: overflo("out of state space"); ! 514: for (i=0; i<=k; i++) ! 515: (f->posns[2])[i] = (f->posns[0])[i]; ! 516: f->initstat = f->curstat = 2; ! 517: f->out[2] = f->out[0]; ! 518: for (i=0; i<NCHARS; i++) ! 519: f->gototab[2][i] = 0; ! 520: } ! 521: p++; ! 522: } ! 523: return (0); ! 524: } ! 525: ! 526: Node *reparse(uchar *p) ! 527: { ! 528: /* parses regular expression pointed to by p */ ! 529: /* uses relex() to scan regular expression */ ! 530: Node *np; ! 531: ! 532: dprintf( ("reparse <%s>\n", p) ); ! 533: lastre = prestr = p; /* prestr points to string to be parsed */ ! 534: rtok = relex(); ! 535: if (rtok == '\0') ! 536: ERROR "empty regular expression" FATAL; ! 537: np = regexp(); ! 538: if (rtok != '\0') ! 539: ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL; ! 540: return(np); ! 541: } ! 542: ! 543: Node *regexp(void) ! 544: { ! 545: return (alt(concat(primary()))); ! 546: } ! 547: ! 548: Node *primary(void) ! 549: { ! 550: Node *np; ! 551: ! 552: switch (rtok) { ! 553: case CHAR: ! 554: np = op2(CHAR, NIL, (Node *) rlxval); ! 555: rtok = relex(); ! 556: return (unary(np)); ! 557: case ALL: ! 558: rtok = relex(); ! 559: return (unary(op2(ALL, NIL, NIL))); ! 560: case DOT: ! 561: rtok = relex(); ! 562: return (unary(op2(DOT, NIL, NIL))); ! 563: case CCL: ! 564: np = op2(CCL, NIL, (Node*) cclenter(rlxstr)); ! 565: rtok = relex(); ! 566: return (unary(np)); ! 567: case NCCL: ! 568: np = op2(NCCL, NIL, (Node *) cclenter(rlxstr)); ! 569: rtok = relex(); ! 570: return (unary(np)); ! 571: case '^': ! 572: rtok = relex(); ! 573: return (unary(op2(CHAR, NIL, (Node *) HAT))); ! 574: case '$': ! 575: rtok = relex(); ! 576: return (unary(op2(CHAR, NIL, NIL))); ! 577: case '(': ! 578: rtok = relex(); ! 579: if (rtok == ')') { /* special pleading for () */ ! 580: rtok = relex(); ! 581: return unary(op2(CCL, NIL, (Node *) tostring(""))); ! 582: } ! 583: np = regexp(); ! 584: if (rtok == ')') { ! 585: rtok = relex(); ! 586: return (unary(np)); ! 587: } ! 588: else ! 589: ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL; ! 590: default: ! 591: ERROR "illegal primary in regular expression %s at %s", lastre, prestr FATAL; ! 592: } ! 593: return 0; /*NOTREACHED*/ ! 594: } ! 595: ! 596: Node *concat(Node *np) ! 597: { ! 598: switch (rtok) { ! 599: case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(': ! 600: return (concat(op2(CAT, np, primary()))); ! 601: } ! 602: return (np); ! 603: } ! 604: ! 605: Node *alt(Node *np) ! 606: { ! 607: if (rtok == OR) { ! 608: rtok = relex(); ! 609: return (alt(op2(OR, np, concat(primary())))); ! 610: } ! 611: return (np); ! 612: } ! 613: ! 614: Node *unary(Node *np) ! 615: { ! 616: switch (rtok) { ! 617: case STAR: ! 618: rtok = relex(); ! 619: return (unary(op2(STAR, np, NIL))); ! 620: case PLUS: ! 621: rtok = relex(); ! 622: return (unary(op2(PLUS, np, NIL))); ! 623: case QUEST: ! 624: rtok = relex(); ! 625: return (unary(op2(QUEST, np, NIL))); ! 626: default: ! 627: return (np); ! 628: } ! 629: } ! 630: ! 631: relex(void) /* lexical analyzer for reparse */ ! 632: { ! 633: register int c; ! 634: uchar cbuf[MAXLIN]; ! 635: int clen, cflag; ! 636: ! 637: switch (c = *prestr++) { ! 638: case '|': return OR; ! 639: case '*': return STAR; ! 640: case '+': return PLUS; ! 641: case '?': return QUEST; ! 642: case '.': return DOT; ! 643: case '\0': prestr--; return '\0'; ! 644: case '^': ! 645: case '$': ! 646: case '(': ! 647: case ')': ! 648: return c; ! 649: case '\\': ! 650: rlxval = quoted(&prestr); ! 651: return CHAR; ! 652: default: ! 653: rlxval = c; ! 654: return CHAR; ! 655: case '[': ! 656: clen = 0; ! 657: if (*prestr == '^') { ! 658: cflag = 1; ! 659: prestr++; ! 660: } ! 661: else ! 662: cflag = 0; ! 663: for ( ; clen < MAXLIN-1; ) { ! 664: if ((c = *prestr++) == '\\') { ! 665: cbuf[clen++] = '\\'; ! 666: if ((c = *prestr++) == '\0') ! 667: ERROR "nonterminated character class %.20s...", lastre FATAL; ! 668: cbuf[clen++] = c; ! 669: } else if (c == ']') { ! 670: cbuf[clen] = 0; ! 671: rlxstr = tostring(cbuf); ! 672: if (cflag == 0) ! 673: return CCL; ! 674: else ! 675: return NCCL; ! 676: } else if (c == '\n') { ! 677: ERROR "newline in character class %.20s...", lastre FATAL; ! 678: } else if (c == '\0') { ! 679: ERROR "nonterminated character class %.20s", lastre FATAL; ! 680: } else ! 681: cbuf[clen++] = c; ! 682: } ! 683: if (clen >= MAXLIN-1) ! 684: ERROR "character class %.20s... too long", cbuf FATAL; ! 685: } ! 686: /* can't happen */ ! 687: return 0; ! 688: } ! 689: ! 690: int cgoto(fa *f, int s, int c) ! 691: { ! 692: register int i, j, k; ! 693: register int *p, *q; ! 694: ! 695: for (i=0; i<=f->accept; i++) ! 696: setvec[i] = 0; ! 697: setcnt = 0; ! 698: /* compute positions of gototab[s,c] into setvec */ ! 699: p = f->posns[s]; ! 700: for (i=1; i<=*p; i++) { ! 701: if ((k = f->re[p[i]].ltype) != FINAL) { ! 702: if (k == CHAR && c == f->re[p[i]].lval ! 703: || k == DOT && c != 0 && c != HAT ! 704: || k == ALL && c != 0 ! 705: || k == CCL && member(c, (uchar *) f->re[p[i]].lval) ! 706: || k == NCCL && !member(c, (uchar *) f->re[p[i]].lval) && c != 0 && c != HAT) { ! 707: q = f->re[p[i]].lfollow; ! 708: for (j=1; j<=*q; j++) { ! 709: if (setvec[q[j]] == 0) { ! 710: setcnt++; ! 711: setvec[q[j]] = 1; ! 712: } ! 713: } ! 714: } ! 715: } ! 716: } ! 717: /* determine if setvec is a previous state */ ! 718: tmpset[0] = setcnt; ! 719: j = 1; ! 720: for (i = f->accept; i >= 0; i--) ! 721: if (setvec[i]) { ! 722: tmpset[j++] = i; ! 723: } ! 724: /* tmpset == previous state? */ ! 725: for (i=1; i<= f->curstat; i++) { ! 726: p = f->posns[i]; ! 727: if ((k = tmpset[0]) != p[0]) ! 728: goto different; ! 729: for (j = 1; j <= k; j++) ! 730: if (tmpset[j] != p[j]) ! 731: goto different; ! 732: /* setvec is state i */ ! 733: f->gototab[s][c] = i; ! 734: return i; ! 735: different:; ! 736: } ! 737: ! 738: /* add tmpset to current set of states */ ! 739: if (f->curstat >= NSTATES-1) { ! 740: f->curstat = 2; ! 741: f->reset = 1; ! 742: for (i=2; i<NSTATES; i++) ! 743: xfree(f->posns[i]); ! 744: } ! 745: else ! 746: ++(f->curstat); ! 747: for (i=0; i<NCHARS; i++) ! 748: f->gototab[f->curstat][i] = 0; ! 749: xfree(f->posns[f->curstat]); ! 750: if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) ! 751: overflo("out of space in cgoto"); ! 752: ! 753: f->posns[f->curstat] = p; ! 754: f->gototab[s][c] = f->curstat; ! 755: for (i = 0; i <= setcnt; i++) ! 756: p[i] = tmpset[i]; ! 757: if (setvec[f->accept]) ! 758: f->out[f->curstat] = 1; ! 759: else ! 760: f->out[f->curstat] = 0; ! 761: return f->curstat; ! 762: } ! 763: ! 764: ! 765: void freefa(fa *f) ! 766: { ! 767: ! 768: register int i; ! 769: ! 770: if (f == NULL) ! 771: return; ! 772: for (i=0; i<=f->curstat; i++) ! 773: xfree(f->posns[i]); ! 774: for (i=0; i<=f->accept; i++) { ! 775: xfree(f->re[i].lfollow); ! 776: if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) ! 777: xfree(f->re[i].lval); ! 778: } ! 779: xfree(f->restr); ! 780: xfree(f); ! 781: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.