|
|
1.1 ! root 1: #ifndef lint ! 2: static char sccsid[] = "@(#)b.c 4.3 7/7/86"; ! 3: #endif ! 4: ! 5: #include "awk.def" ! 6: #include "stdio.h" ! 7: #include "awk.h" ! 8: ! 9: extern node *op2(); ! 10: extern struct fa *cgotofn(); ! 11: #define MAXLIN 256 ! 12: #define NCHARS 128 ! 13: #define NSTATES 256 ! 14: ! 15: #define type(v) v->nobj ! 16: #define left(v) v->narg[0] ! 17: #define right(v) v->narg[1] ! 18: #define parent(v) v->nnext ! 19: ! 20: #define LEAF case CCL: case NCCL: case CHAR: case DOT: ! 21: #define UNARY case FINAL: case STAR: case PLUS: case QUEST: ! 22: ! 23: /* encoding in tree nodes: ! 24: leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value ! 25: unary (FINAL, STAR, PLUS, QUEST): left is child, right is null ! 26: binary (CAT, OR): left and right are children ! 27: parent contains pointer to parent ! 28: */ ! 29: ! 30: struct fa { ! 31: int cch; ! 32: struct fa *st; ! 33: }; ! 34: ! 35: int *state[NSTATES]; ! 36: int *foll[MAXLIN]; ! 37: char chars[MAXLIN]; ! 38: int setvec[MAXLIN]; ! 39: node *point[MAXLIN]; ! 40: ! 41: int setcnt; ! 42: int line; ! 43: int maxfoll; /* index of highest foll[] entry set by cfoll() */ ! 44: ! 45: ! 46: struct fa *makedfa(p) /* returns dfa for tree pointed to by p */ ! 47: node *p; ! 48: { ! 49: node *p1; ! 50: struct fa *fap; ! 51: p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p); ! 52: /* put DOT STAR in front of reg. exp. */ ! 53: p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */ ! 54: ! 55: line = 0; ! 56: penter(p1); /* enter parent pointers and leaf indices */ ! 57: point[line] = p1; /* FINAL node */ ! 58: setvec[0] = 1; /* for initial DOT STAR */ ! 59: cfoll(p1); /* set up follow sets */ ! 60: fap = cgotofn(); ! 61: freetr(p1); /* add this when alloc works */ ! 62: return(fap); ! 63: } ! 64: ! 65: penter(p) /* set up parent pointers and leaf indices */ ! 66: node *p; ! 67: { ! 68: switch(type(p)) { ! 69: LEAF ! 70: left(p) = (node *) line; ! 71: point[line++] = p; ! 72: break; ! 73: UNARY ! 74: penter(left(p)); ! 75: parent(left(p)) = p; ! 76: break; ! 77: case CAT: ! 78: case OR: ! 79: penter(left(p)); ! 80: penter(right(p)); ! 81: parent(left(p)) = p; ! 82: parent(right(p)) = p; ! 83: break; ! 84: default: ! 85: error(FATAL, "unknown type %d in penter\n", type(p)); ! 86: break; ! 87: } ! 88: } ! 89: ! 90: freetr(p) /* free parse tree and follow sets */ ! 91: node *p; ! 92: { ! 93: switch(type(p)) { ! 94: LEAF ! 95: foll_free((int) left(p)); ! 96: xfree(p); ! 97: break; ! 98: UNARY ! 99: freetr(left(p)); ! 100: xfree(p); ! 101: break; ! 102: case CAT: ! 103: case OR: ! 104: freetr(left(p)); ! 105: freetr(right(p)); ! 106: xfree(p); ! 107: break; ! 108: default: ! 109: error(FATAL, "unknown type %d in freetr", type(p)); ! 110: break; ! 111: } ! 112: } ! 113: char *cclenter(p) ! 114: register char *p; ! 115: { ! 116: register i, c; ! 117: char *op; ! 118: ! 119: op = p; ! 120: i = 0; ! 121: while ((c = *p++) != 0) { ! 122: if (c == '-' && i > 0 && chars[i-1] != 0) { ! 123: if (*p != 0) { ! 124: c = chars[i-1]; ! 125: while (c < *p) { ! 126: if (i >= MAXLIN) ! 127: overflo(); ! 128: chars[i++] = ++c; ! 129: } ! 130: p++; ! 131: continue; ! 132: } ! 133: } ! 134: if (i >= MAXLIN) ! 135: overflo(); ! 136: chars[i++] = c; ! 137: } ! 138: chars[i++] = '\0'; ! 139: dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL); ! 140: xfree(op); ! 141: return(tostring(chars)); ! 142: } ! 143: ! 144: overflo() ! 145: { ! 146: error(FATAL, "regular expression too long\n"); ! 147: } ! 148: ! 149: cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */ ! 150: register node *v; ! 151: { ! 152: register i; ! 153: int prev; ! 154: int *add(); ! 155: ! 156: maxfoll = -1; ! 157: switch(type(v)) { ! 158: LEAF ! 159: setcnt = 0; ! 160: for (i=1; i<=line; i++) ! 161: setvec[i] = 0; ! 162: follow(v); ! 163: if (notin(foll, ( (int) left(v))-1, &prev)) { ! 164: foll[(int) left(v)] = add(setcnt); ! 165: } ! 166: else ! 167: foll[ (int) left(v)] = foll[prev]; ! 168: if ((int)left(v) > maxfoll) ! 169: maxfoll = (int)left(v); ! 170: break; ! 171: UNARY ! 172: cfoll(left(v)); ! 173: break; ! 174: case CAT: ! 175: case OR: ! 176: cfoll(left(v)); ! 177: cfoll(right(v)); ! 178: break; ! 179: default: ! 180: error(FATAL, "unknown type %d in cfoll", type(v)); ! 181: } ! 182: } ! 183: ! 184: first(p) /* collects initially active leaves of p into setvec */ ! 185: register node *p; /* returns 0 or 1 depending on whether p matches empty string */ ! 186: { ! 187: register b; ! 188: ! 189: switch(type(p)) { ! 190: LEAF ! 191: if (setvec[(int) left(p)] != 1) { ! 192: setvec[(int) left(p)] = 1; ! 193: setcnt++; ! 194: } ! 195: if (type(p) == CCL && (*(char *) right(p)) == '\0') ! 196: return(0); /* empty CCL */ ! 197: else return(1); ! 198: case FINAL: ! 199: case PLUS: ! 200: if (first(left(p)) == 0) return(0); ! 201: return(1); ! 202: case STAR: ! 203: case QUEST: ! 204: first(left(p)); ! 205: return(0); ! 206: case CAT: ! 207: if (first(left(p)) == 0 && first(right(p)) == 0) return(0); ! 208: return(1); ! 209: case OR: ! 210: b = first(right(p)); ! 211: if (first(left(p)) == 0 || b == 0) return(0); ! 212: return(1); ! 213: } ! 214: error(FATAL, "unknown type %d in first\n", type(p)); ! 215: return(-1); ! 216: } ! 217: ! 218: follow(v) ! 219: node *v; /* collects leaves that can follow v into setvec */ ! 220: { ! 221: node *p; ! 222: ! 223: if (type(v) == FINAL) ! 224: return; ! 225: p = parent(v); ! 226: switch (type(p)) { ! 227: case STAR: ! 228: case PLUS: first(v); ! 229: follow(p); ! 230: return; ! 231: ! 232: case OR: ! 233: case QUEST: follow(p); ! 234: return; ! 235: ! 236: case CAT: if (v == left(p)) { /* v is left child of p */ ! 237: if (first(right(p)) == 0) { ! 238: follow(p); ! 239: return; ! 240: } ! 241: } ! 242: else /* v is right child */ ! 243: follow(p); ! 244: return; ! 245: case FINAL: if (setvec[line] != 1) { ! 246: setvec[line] = 1; ! 247: setcnt++; ! 248: } ! 249: return; ! 250: } ! 251: } ! 252: ! 253: member(c, s) /* is c in s? */ ! 254: register char c, *s; ! 255: { ! 256: while (*s) ! 257: if (c == *s++) ! 258: return(1); ! 259: return(0); ! 260: } ! 261: ! 262: notin(array, n, prev) /* is setvec in array[0] thru array[n]? */ ! 263: int **array; ! 264: int *prev; { ! 265: register i, j; ! 266: int *ptr; ! 267: for (i=0; i<=n; i++) { ! 268: ptr = array[i]; ! 269: if (*ptr == setcnt) { ! 270: for (j=0; j < setcnt; j++) ! 271: if (setvec[*(++ptr)] != 1) goto nxt; ! 272: *prev = i; ! 273: return(0); ! 274: } ! 275: nxt: ; ! 276: } ! 277: return(1); ! 278: } ! 279: ! 280: int *add(n) { /* remember setvec */ ! 281: int *ptr, *p; ! 282: register i; ! 283: if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL) ! 284: overflo(); ! 285: *ptr = n; ! 286: dprintf("add(%d)\n", n, NULL, NULL); ! 287: for (i=1; i <= line; i++) ! 288: if (setvec[i] == 1) { ! 289: *(++ptr) = i; ! 290: dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i); ! 291: } ! 292: dprintf("\n", NULL, NULL, NULL); ! 293: return(p); ! 294: } ! 295: ! 296: struct fa *cgotofn() ! 297: { ! 298: register i, k; ! 299: register int *ptr; ! 300: char c; ! 301: char *p; ! 302: node *cp; ! 303: int j, n, s, ind, numtrans; ! 304: int finflg; ! 305: int curpos, num, prev; ! 306: struct fa *where[NSTATES]; ! 307: ! 308: int fatab[257]; ! 309: struct fa *pfa; ! 310: ! 311: char index[MAXLIN]; ! 312: char iposns[MAXLIN]; ! 313: int sposns[MAXLIN]; ! 314: int spmax, spinit; ! 315: ! 316: char symbol[NCHARS]; ! 317: char isyms[NCHARS]; ! 318: char ssyms[NCHARS]; ! 319: int ssmax, ssinit; ! 320: ! 321: for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0; ! 322: for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0; ! 323: setcnt = 0; ! 324: /* compute initial positions and symbols of state 0 */ ! 325: ssmax = 0; ! 326: ptr = state[0] = foll[0]; ! 327: spinit = *ptr; ! 328: for (i=0; i<spinit; i++) { ! 329: curpos = *(++ptr); ! 330: sposns[i] = curpos; ! 331: iposns[curpos] = 1; ! 332: cp = point[curpos]; ! 333: dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos); ! 334: switch (type(cp)) { ! 335: case CHAR: ! 336: k = (int) right(cp); ! 337: if (isyms[k] != 1) { ! 338: isyms[k] = 1; ! 339: ssyms[ssmax++] = k; ! 340: } ! 341: break; ! 342: case DOT: ! 343: for (k=1; k<NCHARS; k++) { ! 344: if (k != HAT) { ! 345: if (isyms[k] != 1) { ! 346: isyms[k] = 1; ! 347: ssyms[ssmax++] = k; ! 348: } ! 349: } ! 350: } ! 351: break; ! 352: case CCL: ! 353: for (p = (char *) right(cp); *p; p++) { ! 354: if (*p != HAT) { ! 355: if (isyms[*p] != 1) { ! 356: isyms[*p] = 1; ! 357: ssyms[ssmax++] = *p; ! 358: } ! 359: } ! 360: } ! 361: break; ! 362: case NCCL: ! 363: for (k=1; k<NCHARS; k++) { ! 364: if (k != HAT && !member(k, (char *) right(cp))) { ! 365: if (isyms[k] != 1) { ! 366: isyms[k] = 1; ! 367: ssyms[ssmax++] = k; ! 368: } ! 369: } ! 370: } ! 371: } ! 372: } ! 373: ssinit = ssmax; ! 374: n = 0; ! 375: for (s=0; s<=n; s++) { ! 376: dprintf("s = %d\n", s, NULL, NULL); ! 377: ind = 0; ! 378: numtrans = 0; ! 379: finflg = 0; ! 380: if (*(state[s] + *state[s]) == line) { /* s final? */ ! 381: finflg = 1; ! 382: goto tenter; ! 383: } ! 384: spmax = spinit; ! 385: ssmax = ssinit; ! 386: ptr = state[s]; ! 387: num = *ptr; ! 388: for (i=0; i<num; i++) { ! 389: curpos = *(++ptr); ! 390: if (iposns[curpos] != 1 && index[curpos] != 1) { ! 391: index[curpos] = 1; ! 392: sposns[spmax++] = curpos; ! 393: } ! 394: cp = point[curpos]; ! 395: switch (type(cp)) { ! 396: case CHAR: ! 397: k = (int) right(cp); ! 398: if (isyms[k] == 0 && symbol[k] == 0) { ! 399: symbol[k] = 1; ! 400: ssyms[ssmax++] = k; ! 401: } ! 402: break; ! 403: case DOT: ! 404: for (k=1; k<NCHARS; k++) { ! 405: if (k != HAT) { ! 406: if (isyms[k] == 0 && symbol[k] == 0) { ! 407: symbol[k] = 1; ! 408: ssyms[ssmax++] = k; ! 409: } ! 410: } ! 411: } ! 412: break; ! 413: case CCL: ! 414: for (p = (char *) right(cp); *p; p++) { ! 415: if (*p != HAT) { ! 416: if (isyms[*p] == 0 && symbol[*p] == 0) { ! 417: symbol[*p] = 1; ! 418: ssyms[ssmax++] = *p; ! 419: } ! 420: } ! 421: } ! 422: break; ! 423: case NCCL: ! 424: for (k=1; k<NCHARS; k++) { ! 425: if (k != HAT && !member(k, (char *) right(cp))) { ! 426: if (isyms[k] == 0 && symbol[k] == 0) { ! 427: symbol[k] = 1; ! 428: ssyms[ssmax++] = k; ! 429: } ! 430: } ! 431: } ! 432: } ! 433: } ! 434: for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */ ! 435: c = ssyms[j]; ! 436: symbol[c] = 0; ! 437: setcnt = 0; ! 438: for (k=0; k<=line; k++) setvec[k] = 0; ! 439: for (i=0; i<spmax; i++) { ! 440: index[sposns[i]] = 0; ! 441: cp = point[sposns[i]]; ! 442: if ((k = type(cp)) != FINAL) ! 443: if (k == CHAR && c == (int) right(cp) ! 444: || k == DOT ! 445: || k == CCL && member(c, (char *) right(cp)) ! 446: || k == NCCL && !member(c, (char *) right(cp))) { ! 447: ptr = foll[sposns[i]]; ! 448: num = *ptr; ! 449: for (k=0; k<num; k++) { ! 450: if (setvec[*(++ptr)] != 1 ! 451: && iposns[*ptr] != 1) { ! 452: setvec[*ptr] = 1; ! 453: setcnt++; ! 454: } ! 455: } ! 456: } ! 457: } /* end nextstate */ ! 458: if (notin(state, n, &prev)) { ! 459: if (n >= NSTATES) { ! 460: dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL); ! 461: overflo(); ! 462: } ! 463: state[++n] = add(setcnt); ! 464: dprintf(" delta(%d,%o) = %d", s,c,n); ! 465: dprintf(", ind = %d\n", ind+1, NULL, NULL); ! 466: fatab[++ind] = c; ! 467: fatab[++ind] = n; ! 468: numtrans++; ! 469: } ! 470: else { ! 471: if (prev != 0) { ! 472: dprintf(" delta(%d,%o) = %d", s,c,prev); ! 473: dprintf(", ind = %d\n", ind+1, NULL, NULL); ! 474: fatab[++ind] = c; ! 475: fatab[++ind] = prev; ! 476: numtrans++; ! 477: } ! 478: } ! 479: } ! 480: tenter: ! 481: if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL) ! 482: overflo(); ! 483: where[s] = pfa; ! 484: if (finflg) ! 485: pfa->cch = -1; /* s is a final state */ ! 486: else ! 487: pfa->cch = numtrans; ! 488: pfa->st = 0; ! 489: for (i=1, pfa += 1; i<=numtrans; i++, pfa++) { ! 490: pfa->cch = fatab[2*i-1]; ! 491: pfa->st = (struct fa *) fatab[2*i]; ! 492: } ! 493: } ! 494: for (i=0; i<=n; i++) { ! 495: /* N.b. state[0] == foll[0], not separately allocated */ ! 496: if (i>0) ! 497: xfree(state[i]); /* free state[i] */ ! 498: pfa = where[i]; ! 499: pfa->st = where[0]; ! 500: dprintf("state %d: (%o)\n", i, pfa, NULL); ! 501: dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL); ! 502: for (k=1; k<=pfa->cch; k++) { ! 503: (pfa+k)->st = where[ (int) (pfa+k)->st]; ! 504: dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL); ! 505: } ! 506: } ! 507: pfa = where[0]; ! 508: if ((num = pfa->cch) < 0) ! 509: return(where[0]); ! 510: for (pfa += num; num; num--, pfa--) ! 511: if (pfa->cch == HAT) { ! 512: return(pfa->st); ! 513: } ! 514: return(where[0]); ! 515: } ! 516: ! 517: match(pfa, p) ! 518: register struct fa *pfa; ! 519: register char *p; ! 520: { ! 521: register count; ! 522: char c; ! 523: if (p == 0) return(0); ! 524: if (pfa->cch == 1) { /* fast test for first character, if possible */ ! 525: c = (++pfa)->cch; ! 526: do ! 527: if (c == *p) { ! 528: p++; ! 529: pfa = pfa->st; ! 530: goto adv; ! 531: } ! 532: while (*p++ != 0); ! 533: return(0); ! 534: } ! 535: adv: if ((count = pfa->cch) < 0) return(1); ! 536: do { ! 537: for (pfa += count; count; count--, pfa--) ! 538: if (pfa->cch == *p) { ! 539: break; ! 540: } ! 541: pfa = pfa->st; ! 542: if ((count = pfa->cch) < 0) return(1); ! 543: } while(*p++ != 0); ! 544: return(0); ! 545: } ! 546: ! 547: /* ! 548: * Free foll[i], taking into account identical foll[] entries. ! 549: * This is necessary because cfoll() uses the same physical follow set for ! 550: * several foll[] entries when the set is identical. Called by freetr(). ! 551: */ ! 552: foll_free(i) ! 553: int i; ! 554: { ! 555: register int j; ! 556: int *p = foll[i]; ! 557: if (p==NULL) return; ! 558: for (j=0; j<=maxfoll; j++) ! 559: if (foll[j]==p) foll[j]=NULL; ! 560: xfree(p); ! 561: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.