|
|
1.1 ! root 1: #include "awk.def" ! 2: #include "stdio.h" ! 3: #include "awk.h" ! 4: ! 5: extern node *op2(); ! 6: extern struct fa *cgotofn(); ! 7: #define MAXLIN 256 ! 8: #define NCHARS 128 ! 9: #define NSTATES 256 ! 10: ! 11: #define type(v) v->nobj ! 12: #define left(v) v->narg[0] ! 13: #define right(v) v->narg[1] ! 14: #define parent(v) v->nnext ! 15: ! 16: #define LEAF case CCL: case NCCL: case CHAR: case DOT: ! 17: #define UNARY case FINAL: case STAR: case PLUS: case QUEST: ! 18: ! 19: /* encoding in tree nodes: ! 20: leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value ! 21: unary (FINAL, STAR, PLUS, QUEST): left is child, right is null ! 22: binary (CAT, OR): left and right are children ! 23: parent contains pointer to parent ! 24: */ ! 25: ! 26: struct fa { ! 27: int cch; ! 28: struct fa *st; ! 29: }; ! 30: ! 31: int *state[NSTATES]; ! 32: int *foll[MAXLIN]; ! 33: char chars[MAXLIN]; ! 34: int setvec[MAXLIN]; ! 35: node *point[MAXLIN]; ! 36: ! 37: int setcnt; ! 38: int line; ! 39: ! 40: ! 41: struct fa *makedfa(p) /* returns dfa for tree pointed to by p */ ! 42: node *p; ! 43: { ! 44: node *p1; ! 45: struct fa *fap; ! 46: p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p); ! 47: /* put DOT STAR in front of reg. exp. */ ! 48: p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */ ! 49: ! 50: line = 0; ! 51: penter(p1); /* enter parent pointers and leaf indices */ ! 52: point[line] = p1; /* FINAL node */ ! 53: setvec[0] = 1; /* for initial DOT STAR */ ! 54: cfoll(p1); /* set up follow sets */ ! 55: fap = cgotofn(); ! 56: freetr(p1); /* add this when alloc works */ ! 57: return(fap); ! 58: } ! 59: ! 60: penter(p) /* set up parent pointers and leaf indices */ ! 61: node *p; ! 62: { ! 63: switch(type(p)) { ! 64: LEAF ! 65: left(p) = (node *) line; ! 66: point[line++] = p; ! 67: break; ! 68: UNARY ! 69: penter(left(p)); ! 70: parent(left(p)) = p; ! 71: break; ! 72: case CAT: ! 73: case OR: ! 74: penter(left(p)); ! 75: penter(right(p)); ! 76: parent(left(p)) = p; ! 77: parent(right(p)) = p; ! 78: break; ! 79: default: ! 80: error(FATAL, "unknown type %d in penter\n", type(p)); ! 81: break; ! 82: } ! 83: } ! 84: ! 85: freetr(p) /* free parse tree and follow sets */ ! 86: node *p; ! 87: { ! 88: switch(type(p)) { ! 89: LEAF ! 90: xfree(foll[(int) left(p)]); ! 91: xfree(p); ! 92: break; ! 93: UNARY ! 94: freetr(left(p)); ! 95: xfree(p); ! 96: break; ! 97: case CAT: ! 98: case OR: ! 99: freetr(left(p)); ! 100: freetr(right(p)); ! 101: xfree(p); ! 102: break; ! 103: default: ! 104: error(FATAL, "unknown type %d in freetr", type(p)); ! 105: break; ! 106: } ! 107: } ! 108: char *cclenter(p) ! 109: register char *p; ! 110: { ! 111: register i, c; ! 112: char *op; ! 113: ! 114: op = p; ! 115: i = 0; ! 116: while ((c = *p++) != 0) { ! 117: if (c == '-' && i > 0 && chars[i-1] != 0) { ! 118: if (*p != 0) { ! 119: c = chars[i-1]; ! 120: while (c < *p) { ! 121: if (i >= MAXLIN) ! 122: overflo(); ! 123: chars[i++] = ++c; ! 124: } ! 125: p++; ! 126: continue; ! 127: } ! 128: } ! 129: if (i >= MAXLIN) ! 130: overflo(); ! 131: chars[i++] = c; ! 132: } ! 133: chars[i++] = '\0'; ! 134: dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL); ! 135: xfree(op); ! 136: return(tostring(chars)); ! 137: } ! 138: ! 139: overflo() ! 140: { ! 141: error(FATAL, "regular expression too long\n"); ! 142: } ! 143: ! 144: cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */ ! 145: register node *v; ! 146: { ! 147: register i; ! 148: int prev; ! 149: int *add(); ! 150: ! 151: switch(type(v)) { ! 152: LEAF ! 153: setcnt = 0; ! 154: for (i=1; i<=line; i++) ! 155: setvec[i] = 0; ! 156: follow(v); ! 157: if (notin(foll, ( (int) left(v))-1, &prev)) { ! 158: foll[(int) left(v)] = add(setcnt); ! 159: } ! 160: else ! 161: foll[ (int) left(v)] = foll[prev]; ! 162: break; ! 163: UNARY ! 164: cfoll(left(v)); ! 165: break; ! 166: case CAT: ! 167: case OR: ! 168: cfoll(left(v)); ! 169: cfoll(right(v)); ! 170: break; ! 171: default: ! 172: error(FATAL, "unknown type %d in cfoll", type(v)); ! 173: } ! 174: } ! 175: ! 176: first(p) /* collects initially active leaves of p into setvec */ ! 177: register node *p; /* returns 0 or 1 depending on whether p matches empty string */ ! 178: { ! 179: register b; ! 180: ! 181: switch(type(p)) { ! 182: LEAF ! 183: if (setvec[(int) left(p)] != 1) { ! 184: setvec[(int) left(p)] = 1; ! 185: setcnt++; ! 186: } ! 187: if (type(p) == CCL && (*(char *) right(p)) == '\0') ! 188: return(0); /* empty CCL */ ! 189: else return(1); ! 190: case FINAL: ! 191: case PLUS: ! 192: if (first(left(p)) == 0) return(0); ! 193: return(1); ! 194: case STAR: ! 195: case QUEST: ! 196: first(left(p)); ! 197: return(0); ! 198: case CAT: ! 199: if (first(left(p)) == 0 && first(right(p)) == 0) return(0); ! 200: return(1); ! 201: case OR: ! 202: b = first(right(p)); ! 203: if (first(left(p)) == 0 || b == 0) return(0); ! 204: return(1); ! 205: } ! 206: error(FATAL, "unknown type %d in first\n", type(p)); ! 207: return(-1); ! 208: } ! 209: ! 210: follow(v) ! 211: node *v; /* collects leaves that can follow v into setvec */ ! 212: { ! 213: node *p; ! 214: ! 215: if (type(v) == FINAL) ! 216: return; ! 217: p = parent(v); ! 218: switch (type(p)) { ! 219: case STAR: ! 220: case PLUS: first(v); ! 221: follow(p); ! 222: return; ! 223: ! 224: case OR: ! 225: case QUEST: follow(p); ! 226: return; ! 227: ! 228: case CAT: if (v == left(p)) { /* v is left child of p */ ! 229: if (first(right(p)) == 0) { ! 230: follow(p); ! 231: return; ! 232: } ! 233: } ! 234: else /* v is right child */ ! 235: follow(p); ! 236: return; ! 237: case FINAL: if (setvec[line] != 1) { ! 238: setvec[line] = 1; ! 239: setcnt++; ! 240: } ! 241: return; ! 242: } ! 243: } ! 244: ! 245: member(c, s) /* is c in s? */ ! 246: register char c, *s; ! 247: { ! 248: while (*s) ! 249: if (c == *s++) ! 250: return(1); ! 251: return(0); ! 252: } ! 253: ! 254: notin(array, n, prev) /* is setvec in array[0] thru array[n]? */ ! 255: int **array; ! 256: int *prev; { ! 257: register i, j; ! 258: int *ptr; ! 259: for (i=0; i<=n; i++) { ! 260: ptr = array[i]; ! 261: if (*ptr == setcnt) { ! 262: for (j=0; j < setcnt; j++) ! 263: if (setvec[*(++ptr)] != 1) goto nxt; ! 264: *prev = i; ! 265: return(0); ! 266: } ! 267: nxt: ; ! 268: } ! 269: return(1); ! 270: } ! 271: ! 272: int *add(n) { /* remember setvec */ ! 273: int *ptr, *p; ! 274: register i; ! 275: if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL) ! 276: overflo(); ! 277: *ptr = n; ! 278: dprintf("add(%d)\n", n, NULL, NULL); ! 279: for (i=1; i <= line; i++) ! 280: if (setvec[i] == 1) { ! 281: *(++ptr) = i; ! 282: dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i); ! 283: } ! 284: dprintf("\n", NULL, NULL, NULL); ! 285: return(p); ! 286: } ! 287: ! 288: struct fa *cgotofn() ! 289: { ! 290: register i, k; ! 291: register int *ptr; ! 292: char c; ! 293: char *p; ! 294: node *cp; ! 295: int j, n, s, ind, numtrans; ! 296: int finflg; ! 297: int curpos, num, prev; ! 298: struct fa *where[NSTATES]; ! 299: ! 300: int fatab[257]; ! 301: struct fa *pfa; ! 302: ! 303: char index[MAXLIN]; ! 304: char iposns[MAXLIN]; ! 305: int sposns[MAXLIN]; ! 306: int spmax, spinit; ! 307: ! 308: char symbol[NCHARS]; ! 309: char isyms[NCHARS]; ! 310: char ssyms[NCHARS]; ! 311: int ssmax, ssinit; ! 312: ! 313: for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0; ! 314: for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0; ! 315: setcnt = 0; ! 316: /* compute initial positions and symbols of state 0 */ ! 317: ssmax = 0; ! 318: ptr = state[0] = foll[0]; ! 319: spinit = *ptr; ! 320: for (i=0; i<spinit; i++) { ! 321: curpos = *(++ptr); ! 322: sposns[i] = curpos; ! 323: iposns[curpos] = 1; ! 324: cp = point[curpos]; ! 325: dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos); ! 326: switch (type(cp)) { ! 327: case CHAR: ! 328: k = (int) right(cp); ! 329: if (isyms[k] != 1) { ! 330: isyms[k] = 1; ! 331: ssyms[ssmax++] = k; ! 332: } ! 333: break; ! 334: case DOT: ! 335: for (k=1; k<NCHARS; k++) { ! 336: if (k != HAT) { ! 337: if (isyms[k] != 1) { ! 338: isyms[k] = 1; ! 339: ssyms[ssmax++] = k; ! 340: } ! 341: } ! 342: } ! 343: break; ! 344: case CCL: ! 345: for (p = (char *) right(cp); *p; p++) { ! 346: if (*p != HAT) { ! 347: if (isyms[*p] != 1) { ! 348: isyms[*p] = 1; ! 349: ssyms[ssmax++] = *p; ! 350: } ! 351: } ! 352: } ! 353: break; ! 354: case NCCL: ! 355: for (k=1; k<NCHARS; k++) { ! 356: if (k != HAT && !member(k, (char *) right(cp))) { ! 357: if (isyms[k] != 1) { ! 358: isyms[k] = 1; ! 359: ssyms[ssmax++] = k; ! 360: } ! 361: } ! 362: } ! 363: } ! 364: } ! 365: ssinit = ssmax; ! 366: n = 0; ! 367: for (s=0; s<=n; s++) { ! 368: dprintf("s = %d\n", s, NULL, NULL); ! 369: ind = 0; ! 370: numtrans = 0; ! 371: finflg = 0; ! 372: if (*(state[s] + *state[s]) == line) { /* s final? */ ! 373: finflg = 1; ! 374: goto tenter; ! 375: } ! 376: spmax = spinit; ! 377: ssmax = ssinit; ! 378: ptr = state[s]; ! 379: num = *ptr; ! 380: for (i=0; i<num; i++) { ! 381: curpos = *(++ptr); ! 382: if (iposns[curpos] != 1 && index[curpos] != 1) { ! 383: index[curpos] = 1; ! 384: sposns[spmax++] = curpos; ! 385: } ! 386: cp = point[curpos]; ! 387: switch (type(cp)) { ! 388: case CHAR: ! 389: k = (int) right(cp); ! 390: if (isyms[k] == 0 && symbol[k] == 0) { ! 391: symbol[k] = 1; ! 392: ssyms[ssmax++] = k; ! 393: } ! 394: break; ! 395: case DOT: ! 396: for (k=1; k<NCHARS; k++) { ! 397: if (k != HAT) { ! 398: if (isyms[k] == 0 && symbol[k] == 0) { ! 399: symbol[k] = 1; ! 400: ssyms[ssmax++] = k; ! 401: } ! 402: } ! 403: } ! 404: break; ! 405: case CCL: ! 406: for (p = (char *) right(cp); *p; p++) { ! 407: if (*p != HAT) { ! 408: if (isyms[*p] == 0 && symbol[*p] == 0) { ! 409: symbol[*p] = 1; ! 410: ssyms[ssmax++] = *p; ! 411: } ! 412: } ! 413: } ! 414: break; ! 415: case NCCL: ! 416: for (k=1; k<NCHARS; k++) { ! 417: if (k != HAT && !member(k, (char *) right(cp))) { ! 418: if (isyms[k] == 0 && symbol[k] == 0) { ! 419: symbol[k] = 1; ! 420: ssyms[ssmax++] = k; ! 421: } ! 422: } ! 423: } ! 424: } ! 425: } ! 426: for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */ ! 427: c = ssyms[j]; ! 428: symbol[c] = 0; ! 429: setcnt = 0; ! 430: for (k=0; k<=line; k++) setvec[k] = 0; ! 431: for (i=0; i<spmax; i++) { ! 432: index[sposns[i]] = 0; ! 433: cp = point[sposns[i]]; ! 434: if ((k = type(cp)) != FINAL) ! 435: if (k == CHAR && c == (int) right(cp) ! 436: || k == DOT ! 437: || k == CCL && member(c, (char *) right(cp)) ! 438: || k == NCCL && !member(c, (char *) right(cp))) { ! 439: ptr = foll[sposns[i]]; ! 440: num = *ptr; ! 441: for (k=0; k<num; k++) { ! 442: if (setvec[*(++ptr)] != 1 ! 443: && iposns[*ptr] != 1) { ! 444: setvec[*ptr] = 1; ! 445: setcnt++; ! 446: } ! 447: } ! 448: } ! 449: } /* end nextstate */ ! 450: if (notin(state, n, &prev)) { ! 451: if (n >= NSTATES) { ! 452: dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL); ! 453: overflo(); ! 454: } ! 455: state[++n] = add(setcnt); ! 456: dprintf(" delta(%d,%o) = %d", s,c,n); ! 457: dprintf(", ind = %d\n", ind+1, NULL, NULL); ! 458: fatab[++ind] = c; ! 459: fatab[++ind] = n; ! 460: numtrans++; ! 461: } ! 462: else { ! 463: if (prev != 0) { ! 464: dprintf(" delta(%d,%o) = %d", s,c,prev); ! 465: dprintf(", ind = %d\n", ind+1, NULL, NULL); ! 466: fatab[++ind] = c; ! 467: fatab[++ind] = prev; ! 468: numtrans++; ! 469: } ! 470: } ! 471: } ! 472: tenter: ! 473: if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL) ! 474: overflo(); ! 475: where[s] = pfa; ! 476: if (finflg) ! 477: pfa->cch = -1; /* s is a final state */ ! 478: else ! 479: pfa->cch = numtrans; ! 480: pfa->st = 0; ! 481: for (i=1, pfa += 1; i<=numtrans; i++, pfa++) { ! 482: pfa->cch = fatab[2*i-1]; ! 483: pfa->st = (struct fa *) fatab[2*i]; ! 484: } ! 485: } ! 486: for (i=0; i<=n; i++) { ! 487: xfree(state[i]); /* free state[i] */ ! 488: pfa = where[i]; ! 489: pfa->st = where[0]; ! 490: dprintf("state %d: (%o)\n", i, pfa, NULL); ! 491: dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL); ! 492: for (k=1; k<=pfa->cch; k++) { ! 493: (pfa+k)->st = where[ (int) (pfa+k)->st]; ! 494: dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL); ! 495: } ! 496: } ! 497: pfa = where[0]; ! 498: if ((num = pfa->cch) < 0) ! 499: return(where[0]); ! 500: for (pfa += num; num; num--, pfa--) ! 501: if (pfa->cch == HAT) { ! 502: return(pfa->st); ! 503: } ! 504: return(where[0]); ! 505: } ! 506: ! 507: match(pfa, p) ! 508: register struct fa *pfa; ! 509: register char *p; ! 510: { ! 511: register count; ! 512: char c; ! 513: if (p == 0) return(0); ! 514: if (pfa->cch == 1) { /* fast test for first character, if possible */ ! 515: c = (++pfa)->cch; ! 516: do ! 517: if (c == *p) { ! 518: p++; ! 519: pfa = pfa->st; ! 520: goto adv; ! 521: } ! 522: while (*p++ != 0); ! 523: return(0); ! 524: } ! 525: adv: if ((count = pfa->cch) < 0) return(1); ! 526: do { ! 527: for (pfa += count; count; count--, pfa--) ! 528: if (pfa->cch == *p) { ! 529: break; ! 530: } ! 531: pfa = pfa->st; ! 532: if ((count = pfa->cch) < 0) return(1); ! 533: } while(*p++ != 0); ! 534: return(0); ! 535: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.