|
|
1.1 ! root 1: #include "awk.def" ! 2: #include "stdio.h" ! 3: #include "awk.h" ! 4: ! 5: extern node *op2(); ! 6: extern struct fa *cgotofn(); ! 7: #define MAXLIN 256 ! 8: #define NCHARS 128 ! 9: #define NSTATES 256 ! 10: ! 11: #define type(v) v->nobj ! 12: #define left(v) v->narg[0] ! 13: #define right(v) v->narg[1] ! 14: #define parent(v) v->nnext ! 15: ! 16: #define LEAF case CCL: case NCCL: case CHAR: case DOT: ! 17: #define UNARY case FINAL: case STAR: case PLUS: case QUEST: ! 18: ! 19: /* encoding in tree nodes: ! 20: leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value ! 21: unary (FINAL, STAR, PLUS, QUEST): left is child, right is null ! 22: binary (CAT, OR): left and right are children ! 23: parent contains pointer to parent ! 24: */ ! 25: ! 26: struct fa { ! 27: int cch; ! 28: struct fa *st; ! 29: }; ! 30: ! 31: int *state[NSTATES]; ! 32: int *foll[MAXLIN]; ! 33: char chars[MAXLIN]; ! 34: int setvec[MAXLIN]; ! 35: node *point[MAXLIN]; ! 36: ! 37: int setcnt; ! 38: int line; ! 39: ! 40: ! 41: struct fa *makedfa(p) /* returns dfa for tree pointed to by p */ ! 42: node *p; ! 43: { ! 44: node *p1; ! 45: struct fa *fap; ! 46: p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p); ! 47: /* put DOT STAR in front of reg. exp. */ ! 48: p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */ ! 49: ! 50: line = 0; ! 51: penter(p1); /* enter parent pointers and leaf indices */ ! 52: point[line] = p1; /* FINAL node */ ! 53: setvec[0] = 1; /* for initial DOT STAR */ ! 54: cfoll(p1); /* set up follow sets */ ! 55: fap = cgotofn(); ! 56: freetr(p1); /* add this when alloc works */ ! 57: return(fap); ! 58: } ! 59: ! 60: penter(p) /* set up parent pointers and leaf indices */ ! 61: node *p; ! 62: { ! 63: switch(type(p)) { ! 64: LEAF ! 65: left(p) = (node *) line; ! 66: point[line++] = p; ! 67: break; ! 68: UNARY ! 69: penter(left(p)); ! 70: parent(left(p)) = p; ! 71: break; ! 72: case CAT: ! 73: case OR: ! 74: penter(left(p)); ! 75: penter(right(p)); ! 76: parent(left(p)) = p; ! 77: parent(right(p)) = p; ! 78: break; ! 79: default: ! 80: error(FATAL, "unknown type %d in penter\n", type(p)); ! 81: break; ! 82: } ! 83: } ! 84: ! 85: freetr(p) /* free parse tree and follow sets */ ! 86: node *p; ! 87: { ! 88: switch(type(p)) { ! 89: LEAF ! 90: xfree(foll[(int) left(p)]); ! 91: xfree(p); ! 92: break; ! 93: UNARY ! 94: freetr(left(p)); ! 95: xfree(p); ! 96: break; ! 97: case CAT: ! 98: case OR: ! 99: freetr(left(p)); ! 100: freetr(right(p)); ! 101: xfree(p); ! 102: break; ! 103: default: ! 104: error(FATAL, "unknown type %d in freetr", type(p)); ! 105: break; ! 106: } ! 107: } ! 108: char *cclenter(p) ! 109: register char *p; ! 110: { ! 111: register i, c; ! 112: char *op; ! 113: ! 114: op = p; ! 115: i = 0; ! 116: while ((c = *p++) != 0) { ! 117: if (c == '-' && i > 0 && chars[i-1] != 0) { ! 118: if (*p != 0) { ! 119: c = chars[i-1]; ! 120: while (c < *p) { ! 121: if (i >= MAXLIN) ! 122: overflo(); ! 123: chars[i++] = ++c; ! 124: } ! 125: p++; ! 126: continue; ! 127: } ! 128: } ! 129: if (i >= MAXLIN) ! 130: overflo(); ! 131: chars[i++] = c; ! 132: } ! 133: chars[i++] = '\0'; ! 134: dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL); ! 135: xfree(op); ! 136: return(tostring(chars)); ! 137: } ! 138: ! 139: overflo() ! 140: { ! 141: error(FATAL, "regular expression too long\n"); ! 142: } ! 143: ! 144: cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */ ! 145: register node *v; ! 146: { ! 147: register i; ! 148: int prev; ! 149: int *add(); ! 150: ! 151: switch(type(v)) { ! 152: LEAF ! 153: setcnt = 0; ! 154: for (i=1; i<=line; i++) ! 155: setvec[i] = 0; ! 156: follow(v); ! 157: if (notin(foll, ( (int) left(v))-1, &prev)) { ! 158: foll[(int) left(v)] = add(setcnt); ! 159: } ! 160: else ! 161: foll[ (int) left(v)] = foll[prev]; ! 162: break; ! 163: UNARY ! 164: cfoll(left(v)); ! 165: break; ! 166: case CAT: ! 167: case OR: ! 168: cfoll(left(v)); ! 169: cfoll(right(v)); ! 170: break; ! 171: default: ! 172: error(FATAL, "unknown type %d in cfoll", type(v)); ! 173: } ! 174: } ! 175: ! 176: first(p) /* collects initially active leaves of p into setvec */ ! 177: register node *p; /* returns 0 or 1 depending on whether p matches empty string */ ! 178: { ! 179: register b; ! 180: ! 181: switch(type(p)) { ! 182: LEAF ! 183: if (setvec[(int) left(p)] != 1) { ! 184: setvec[(int) left(p)] = 1; ! 185: setcnt++; ! 186: } ! 187: if (type(p) == CCL && (*(char *) right(p)) == '\0') ! 188: return(0); /* empty CCL */ ! 189: else return(1); ! 190: case FINAL: ! 191: case PLUS: ! 192: if (first(left(p)) == 0) return(0); ! 193: return(1); ! 194: case STAR: ! 195: case QUEST: ! 196: first(left(p)); ! 197: return(0); ! 198: case CAT: ! 199: if (first(left(p)) == 0 && first(right(p)) == 0) return(0); ! 200: return(1); ! 201: case OR: ! 202: b = first(right(p)); ! 203: if (first(left(p)) == 0 || b == 0) return(0); ! 204: return(1); ! 205: } ! 206: error(FATAL, "unknown type %d in first\n", type(p)); ! 207: return(-1); ! 208: } ! 209: ! 210: follow(v) ! 211: node *v; /* collects leaves that can follow v into setvec */ ! 212: { ! 213: node *p; ! 214: ! 215: if (type(v) == FINAL) ! 216: return; ! 217: p = parent(v); ! 218: switch (type(p)) { ! 219: case STAR: ! 220: case PLUS: first(v); ! 221: follow(p); ! 222: return; ! 223: ! 224: case OR: ! 225: case QUEST: follow(p); ! 226: return; ! 227: ! 228: case CAT: if (v == left(p)) { /* v is left child of p */ ! 229: if (first(right(p)) == 0) { ! 230: follow(p); ! 231: return; ! 232: } ! 233: } ! 234: else /* v is right child */ ! 235: follow(p); ! 236: return; ! 237: case FINAL: if (setvec[line] != 1) { ! 238: setvec[line] = 1; ! 239: setcnt++; ! 240: } ! 241: return; ! 242: } ! 243: } ! 244: ! 245: member(c, s) /* is c in s? */ ! 246: register char c, *s; ! 247: { ! 248: while (*s) ! 249: if (c == *s++) ! 250: return(1); ! 251: return(0); ! 252: } ! 253: ! 254: notin(array, n, prev) /* is setvec in array[0] thru array[n]? */ ! 255: int **array; ! 256: int *prev; { ! 257: register i, j; ! 258: int *ptr; ! 259: for (i=0; i<=n; i++) { ! 260: ptr = array[i]; ! 261: if (*ptr == setcnt) { ! 262: for (j=0; j < setcnt; j++) ! 263: if (setvec[*(++ptr)] != 1) goto nxt; ! 264: *prev = i; ! 265: return(0); ! 266: } ! 267: nxt: ; ! 268: } ! 269: return(1); ! 270: } ! 271: ! 272: int *add(n) { /* remember setvec */ ! 273: int *ptr, *p; ! 274: register i; ! 275: if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL) ! 276: overflo(); ! 277: *ptr = n; ! 278: dprintf("add(%d)\n", n, NULL, NULL); ! 279: for (i=1; i <= line; i++) ! 280: if (setvec[i] == 1) { ! 281: *(++ptr) = i; ! 282: dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i); ! 283: } ! 284: dprintf("\n", NULL, NULL, NULL); ! 285: return(p); ! 286: } ! 287: ! 288: struct fa *cgotofn() ! 289: { ! 290: register i, k; ! 291: register int *ptr; ! 292: char c; ! 293: char *p; ! 294: node *cp; ! 295: int j, n, s, ind, numtrans; ! 296: int finflg; ! 297: int curpos, num, prev; ! 298: struct fa *where[NSTATES]; ! 299: ! 300: int fatab[257]; ! 301: struct fa *pfa; ! 302: ! 303: char index[MAXLIN]; ! 304: char iposns[MAXLIN]; ! 305: int sposns[MAXLIN]; ! 306: int spmax, spinit; ! 307: ! 308: char symbol[NCHARS]; ! 309: char isyms[NCHARS]; ! 310: char ssyms[NCHARS]; ! 311: int ssmax, ssinit; ! 312: ! 313: for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0; ! 314: for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0; ! 315: setcnt = 0; ! 316: state[0] = add(0); ! 317: /* compute initial positions and symbols of state 0 */ ! 318: ssmax = 0; ! 319: ptr = foll[0]; ! 320: spinit = *ptr; ! 321: for (i=0; i<spinit; i++) { ! 322: curpos = *(++ptr); ! 323: sposns[i] = curpos; ! 324: iposns[curpos] = 1; ! 325: cp = point[curpos]; ! 326: dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos); ! 327: switch (type(cp)) { ! 328: case CHAR: ! 329: k = (int) right(cp); ! 330: if (isyms[k] != 1) { ! 331: isyms[k] = 1; ! 332: ssyms[ssmax++] = k; ! 333: } ! 334: break; ! 335: case DOT: ! 336: for (k=1; k<NCHARS; k++) { ! 337: if (k != HAT) { ! 338: if (isyms[k] != 1) { ! 339: isyms[k] = 1; ! 340: ssyms[ssmax++] = k; ! 341: } ! 342: } ! 343: } ! 344: break; ! 345: case CCL: ! 346: for (p = (char *) right(cp); *p; p++) { ! 347: if (*p != HAT) { ! 348: if (isyms[*p] != 1) { ! 349: isyms[*p] = 1; ! 350: ssyms[ssmax++] = *p; ! 351: } ! 352: } ! 353: } ! 354: break; ! 355: case NCCL: ! 356: for (k=1; k<NCHARS; k++) { ! 357: if (k != HAT && !member(k, (char *) right(cp))) { ! 358: if (isyms[k] != 1) { ! 359: isyms[k] = 1; ! 360: ssyms[ssmax++] = k; ! 361: } ! 362: } ! 363: } ! 364: } ! 365: } ! 366: ssinit = ssmax; ! 367: n = 0; ! 368: for (s=0; s<=n; s++) { ! 369: dprintf("s = %d\n", s, NULL, NULL); ! 370: ind = 0; ! 371: numtrans = 0; ! 372: finflg = 0; ! 373: if (*(state[s] + *state[s]) == line) { /* s final? */ ! 374: finflg = 1; ! 375: goto tenter; ! 376: } ! 377: spmax = spinit; ! 378: ssmax = ssinit; ! 379: ptr = state[s]; ! 380: num = *ptr; ! 381: for (i=0; i<num; i++) { ! 382: curpos = *(++ptr); ! 383: if (iposns[curpos] != 1 && index[curpos] != 1) { ! 384: index[curpos] = 1; ! 385: sposns[spmax++] = curpos; ! 386: } ! 387: cp = point[curpos]; ! 388: switch (type(cp)) { ! 389: case CHAR: ! 390: k = (int) right(cp); ! 391: if (isyms[k] == 0 && symbol[k] == 0) { ! 392: symbol[k] = 1; ! 393: ssyms[ssmax++] = k; ! 394: } ! 395: break; ! 396: case DOT: ! 397: for (k=1; k<NCHARS; k++) { ! 398: if (k != HAT) { ! 399: if (isyms[k] == 0 && symbol[k] == 0) { ! 400: symbol[k] = 1; ! 401: ssyms[ssmax++] = k; ! 402: } ! 403: } ! 404: } ! 405: break; ! 406: case CCL: ! 407: for (p = (char *) right(cp); *p; p++) { ! 408: if (*p != HAT) { ! 409: if (isyms[*p] == 0 && symbol[*p] == 0) { ! 410: symbol[*p] = 1; ! 411: ssyms[ssmax++] = *p; ! 412: } ! 413: } ! 414: } ! 415: break; ! 416: case NCCL: ! 417: for (k=1; k<NCHARS; k++) { ! 418: if (k != HAT && !member(k, (char *) right(cp))) { ! 419: if (isyms[k] == 0 && symbol[k] == 0) { ! 420: symbol[k] = 1; ! 421: ssyms[ssmax++] = k; ! 422: } ! 423: } ! 424: } ! 425: } ! 426: } ! 427: for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */ ! 428: c = ssyms[j]; ! 429: symbol[c] = 0; ! 430: setcnt = 0; ! 431: for (k=0; k<=line; k++) setvec[k] = 0; ! 432: for (i=0; i<spmax; i++) { ! 433: index[sposns[i]] = 0; ! 434: cp = point[sposns[i]]; ! 435: if ((k = type(cp)) != FINAL) ! 436: if (k == CHAR && c == (int) right(cp) ! 437: || k == DOT ! 438: || k == CCL && member(c, (char *) right(cp)) ! 439: || k == NCCL && !member(c, (char *) right(cp))) { ! 440: ptr = foll[sposns[i]]; ! 441: num = *ptr; ! 442: for (k=0; k<num; k++) { ! 443: if (setvec[*(++ptr)] != 1 ! 444: && iposns[*ptr] != 1) { ! 445: setvec[*ptr] = 1; ! 446: setcnt++; ! 447: } ! 448: } ! 449: } ! 450: } /* end nextstate */ ! 451: if (notin(state, n, &prev)) { ! 452: if (n >= NSTATES) { ! 453: dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL); ! 454: overflo(); ! 455: } ! 456: state[++n] = add(setcnt); ! 457: dprintf(" delta(%d,%o) = %d", s,c,n); ! 458: dprintf(", ind = %d\n", ind+1, NULL, NULL); ! 459: fatab[++ind] = c; ! 460: fatab[++ind] = n; ! 461: numtrans++; ! 462: } ! 463: else { ! 464: if (prev != 0) { ! 465: dprintf(" delta(%d,%o) = %d", s,c,prev); ! 466: dprintf(", ind = %d\n", ind+1, NULL, NULL); ! 467: fatab[++ind] = c; ! 468: fatab[++ind] = prev; ! 469: numtrans++; ! 470: } ! 471: } ! 472: } ! 473: tenter: ! 474: if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL) ! 475: overflo(); ! 476: where[s] = pfa; ! 477: if (finflg) ! 478: pfa->cch = -1; /* s is a final state */ ! 479: else ! 480: pfa->cch = numtrans; ! 481: pfa->st = 0; ! 482: for (i=1, pfa += 1; i<=numtrans; i++, pfa++) { ! 483: pfa->cch = fatab[2*i-1]; ! 484: pfa->st = (struct fa *) fatab[2*i]; ! 485: } ! 486: } ! 487: for (i=0; i<=n; i++) { ! 488: xfree(state[i]); /* free state[i] */ ! 489: pfa = where[i]; ! 490: pfa->st = where[0]; ! 491: dprintf("state %d: (%o)\n", i, pfa, NULL); ! 492: dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL); ! 493: for (k=1; k<=pfa->cch; k++) { ! 494: (pfa+k)->st = where[ (int) (pfa+k)->st]; ! 495: dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL); ! 496: } ! 497: } ! 498: pfa = where[0]; ! 499: if ((num = pfa->cch) < 0) ! 500: return(where[0]); ! 501: for (pfa += num; num; num--, pfa--) ! 502: if (pfa->cch == HAT) { ! 503: return(pfa->st); ! 504: } ! 505: return(where[0]); ! 506: } ! 507: ! 508: match(pfa, p) ! 509: register struct fa *pfa; ! 510: register char *p; ! 511: { ! 512: register count; ! 513: char c; ! 514: if (p == 0) return(0); ! 515: if (pfa->cch == 1) { /* fast test for first character, if possible */ ! 516: c = (++pfa)->cch; ! 517: do ! 518: if (c == *p) { ! 519: p++; ! 520: pfa = pfa->st; ! 521: goto adv; ! 522: } ! 523: while (*p++ != 0); ! 524: return(0); ! 525: } ! 526: adv: if ((count = pfa->cch) < 0) return(1); ! 527: do { ! 528: for (pfa += count; count; count--, pfa--) ! 529: if (pfa->cch == *p) { ! 530: break; ! 531: } ! 532: pfa = pfa->st; ! 533: if ((count = pfa->cch) < 0) return(1); ! 534: } while(*p++ != 0); ! 535: return(0); ! 536: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.