|
|
1.1 ! root 1: #ifndef lint ! 2: static char sccsid[] = "@(#)b.c 4.2 8/11/83"; ! 3: #endif ! 4: ! 5: #include "awk.def" ! 6: #include "stdio.h" ! 7: #include "awk.h" ! 8: ! 9: extern node *op2(); ! 10: extern struct fa *cgotofn(); ! 11: #define MAXLIN 256 ! 12: #define NCHARS 128 ! 13: #define NSTATES 256 ! 14: ! 15: #define type(v) v->nobj ! 16: #define left(v) v->narg[0] ! 17: #define right(v) v->narg[1] ! 18: #define parent(v) v->nnext ! 19: ! 20: #define LEAF case CCL: case NCCL: case CHAR: case DOT: ! 21: #define UNARY case FINAL: case STAR: case PLUS: case QUEST: ! 22: ! 23: /* encoding in tree nodes: ! 24: leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value ! 25: unary (FINAL, STAR, PLUS, QUEST): left is child, right is null ! 26: binary (CAT, OR): left and right are children ! 27: parent contains pointer to parent ! 28: */ ! 29: ! 30: struct fa { ! 31: int cch; ! 32: struct fa *st; ! 33: }; ! 34: ! 35: int *state[NSTATES]; ! 36: int *foll[MAXLIN]; ! 37: char chars[MAXLIN]; ! 38: int setvec[MAXLIN]; ! 39: node *point[MAXLIN]; ! 40: ! 41: int setcnt; ! 42: int line; ! 43: ! 44: ! 45: struct fa *makedfa(p) /* returns dfa for tree pointed to by p */ ! 46: node *p; ! 47: { ! 48: node *p1; ! 49: struct fa *fap; ! 50: p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p); ! 51: /* put DOT STAR in front of reg. exp. */ ! 52: p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */ ! 53: ! 54: line = 0; ! 55: penter(p1); /* enter parent pointers and leaf indices */ ! 56: point[line] = p1; /* FINAL node */ ! 57: setvec[0] = 1; /* for initial DOT STAR */ ! 58: cfoll(p1); /* set up follow sets */ ! 59: fap = cgotofn(); ! 60: freetr(p1); /* add this when alloc works */ ! 61: return(fap); ! 62: } ! 63: ! 64: penter(p) /* set up parent pointers and leaf indices */ ! 65: node *p; ! 66: { ! 67: switch(type(p)) { ! 68: LEAF ! 69: left(p) = (node *) line; ! 70: point[line++] = p; ! 71: break; ! 72: UNARY ! 73: penter(left(p)); ! 74: parent(left(p)) = p; ! 75: break; ! 76: case CAT: ! 77: case OR: ! 78: penter(left(p)); ! 79: penter(right(p)); ! 80: parent(left(p)) = p; ! 81: parent(right(p)) = p; ! 82: break; ! 83: default: ! 84: error(FATAL, "unknown type %d in penter\n", type(p)); ! 85: break; ! 86: } ! 87: } ! 88: ! 89: freetr(p) /* free parse tree and follow sets */ ! 90: node *p; ! 91: { ! 92: switch(type(p)) { ! 93: LEAF ! 94: xfree(foll[(int) left(p)]); ! 95: xfree(p); ! 96: break; ! 97: UNARY ! 98: freetr(left(p)); ! 99: xfree(p); ! 100: break; ! 101: case CAT: ! 102: case OR: ! 103: freetr(left(p)); ! 104: freetr(right(p)); ! 105: xfree(p); ! 106: break; ! 107: default: ! 108: error(FATAL, "unknown type %d in freetr", type(p)); ! 109: break; ! 110: } ! 111: } ! 112: char *cclenter(p) ! 113: register char *p; ! 114: { ! 115: register i, c; ! 116: char *op; ! 117: ! 118: op = p; ! 119: i = 0; ! 120: while ((c = *p++) != 0) { ! 121: if (c == '-' && i > 0 && chars[i-1] != 0) { ! 122: if (*p != 0) { ! 123: c = chars[i-1]; ! 124: while (c < *p) { ! 125: if (i >= MAXLIN) ! 126: overflo(); ! 127: chars[i++] = ++c; ! 128: } ! 129: p++; ! 130: continue; ! 131: } ! 132: } ! 133: if (i >= MAXLIN) ! 134: overflo(); ! 135: chars[i++] = c; ! 136: } ! 137: chars[i++] = '\0'; ! 138: dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL); ! 139: xfree(op); ! 140: return(tostring(chars)); ! 141: } ! 142: ! 143: overflo() ! 144: { ! 145: error(FATAL, "regular expression too long\n"); ! 146: } ! 147: ! 148: cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */ ! 149: register node *v; ! 150: { ! 151: register i; ! 152: int prev; ! 153: int *add(); ! 154: ! 155: switch(type(v)) { ! 156: LEAF ! 157: setcnt = 0; ! 158: for (i=1; i<=line; i++) ! 159: setvec[i] = 0; ! 160: follow(v); ! 161: if (notin(foll, ( (int) left(v))-1, &prev)) { ! 162: foll[(int) left(v)] = add(setcnt); ! 163: } ! 164: else ! 165: foll[ (int) left(v)] = foll[prev]; ! 166: break; ! 167: UNARY ! 168: cfoll(left(v)); ! 169: break; ! 170: case CAT: ! 171: case OR: ! 172: cfoll(left(v)); ! 173: cfoll(right(v)); ! 174: break; ! 175: default: ! 176: error(FATAL, "unknown type %d in cfoll", type(v)); ! 177: } ! 178: } ! 179: ! 180: first(p) /* collects initially active leaves of p into setvec */ ! 181: register node *p; /* returns 0 or 1 depending on whether p matches empty string */ ! 182: { ! 183: register b; ! 184: ! 185: switch(type(p)) { ! 186: LEAF ! 187: if (setvec[(int) left(p)] != 1) { ! 188: setvec[(int) left(p)] = 1; ! 189: setcnt++; ! 190: } ! 191: if (type(p) == CCL && (*(char *) right(p)) == '\0') ! 192: return(0); /* empty CCL */ ! 193: else return(1); ! 194: case FINAL: ! 195: case PLUS: ! 196: if (first(left(p)) == 0) return(0); ! 197: return(1); ! 198: case STAR: ! 199: case QUEST: ! 200: first(left(p)); ! 201: return(0); ! 202: case CAT: ! 203: if (first(left(p)) == 0 && first(right(p)) == 0) return(0); ! 204: return(1); ! 205: case OR: ! 206: b = first(right(p)); ! 207: if (first(left(p)) == 0 || b == 0) return(0); ! 208: return(1); ! 209: } ! 210: error(FATAL, "unknown type %d in first\n", type(p)); ! 211: return(-1); ! 212: } ! 213: ! 214: follow(v) ! 215: node *v; /* collects leaves that can follow v into setvec */ ! 216: { ! 217: node *p; ! 218: ! 219: if (type(v) == FINAL) ! 220: return; ! 221: p = parent(v); ! 222: switch (type(p)) { ! 223: case STAR: ! 224: case PLUS: first(v); ! 225: follow(p); ! 226: return; ! 227: ! 228: case OR: ! 229: case QUEST: follow(p); ! 230: return; ! 231: ! 232: case CAT: if (v == left(p)) { /* v is left child of p */ ! 233: if (first(right(p)) == 0) { ! 234: follow(p); ! 235: return; ! 236: } ! 237: } ! 238: else /* v is right child */ ! 239: follow(p); ! 240: return; ! 241: case FINAL: if (setvec[line] != 1) { ! 242: setvec[line] = 1; ! 243: setcnt++; ! 244: } ! 245: return; ! 246: } ! 247: } ! 248: ! 249: member(c, s) /* is c in s? */ ! 250: register char c, *s; ! 251: { ! 252: while (*s) ! 253: if (c == *s++) ! 254: return(1); ! 255: return(0); ! 256: } ! 257: ! 258: notin(array, n, prev) /* is setvec in array[0] thru array[n]? */ ! 259: int **array; ! 260: int *prev; { ! 261: register i, j; ! 262: int *ptr; ! 263: for (i=0; i<=n; i++) { ! 264: ptr = array[i]; ! 265: if (*ptr == setcnt) { ! 266: for (j=0; j < setcnt; j++) ! 267: if (setvec[*(++ptr)] != 1) goto nxt; ! 268: *prev = i; ! 269: return(0); ! 270: } ! 271: nxt: ; ! 272: } ! 273: return(1); ! 274: } ! 275: ! 276: int *add(n) { /* remember setvec */ ! 277: int *ptr, *p; ! 278: register i; ! 279: if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL) ! 280: overflo(); ! 281: *ptr = n; ! 282: dprintf("add(%d)\n", n, NULL, NULL); ! 283: for (i=1; i <= line; i++) ! 284: if (setvec[i] == 1) { ! 285: *(++ptr) = i; ! 286: dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i); ! 287: } ! 288: dprintf("\n", NULL, NULL, NULL); ! 289: return(p); ! 290: } ! 291: ! 292: struct fa *cgotofn() ! 293: { ! 294: register i, k; ! 295: register int *ptr; ! 296: char c; ! 297: char *p; ! 298: node *cp; ! 299: int j, n, s, ind, numtrans; ! 300: int finflg; ! 301: int curpos, num, prev; ! 302: struct fa *where[NSTATES]; ! 303: ! 304: int fatab[257]; ! 305: struct fa *pfa; ! 306: ! 307: char index[MAXLIN]; ! 308: char iposns[MAXLIN]; ! 309: int sposns[MAXLIN]; ! 310: int spmax, spinit; ! 311: ! 312: char symbol[NCHARS]; ! 313: char isyms[NCHARS]; ! 314: char ssyms[NCHARS]; ! 315: int ssmax, ssinit; ! 316: ! 317: for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0; ! 318: for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0; ! 319: setcnt = 0; ! 320: /* compute initial positions and symbols of state 0 */ ! 321: ssmax = 0; ! 322: ptr = state[0] = foll[0]; ! 323: spinit = *ptr; ! 324: for (i=0; i<spinit; i++) { ! 325: curpos = *(++ptr); ! 326: sposns[i] = curpos; ! 327: iposns[curpos] = 1; ! 328: cp = point[curpos]; ! 329: dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos); ! 330: switch (type(cp)) { ! 331: case CHAR: ! 332: k = (int) right(cp); ! 333: if (isyms[k] != 1) { ! 334: isyms[k] = 1; ! 335: ssyms[ssmax++] = k; ! 336: } ! 337: break; ! 338: case DOT: ! 339: for (k=1; k<NCHARS; k++) { ! 340: if (k != HAT) { ! 341: if (isyms[k] != 1) { ! 342: isyms[k] = 1; ! 343: ssyms[ssmax++] = k; ! 344: } ! 345: } ! 346: } ! 347: break; ! 348: case CCL: ! 349: for (p = (char *) right(cp); *p; p++) { ! 350: if (*p != HAT) { ! 351: if (isyms[*p] != 1) { ! 352: isyms[*p] = 1; ! 353: ssyms[ssmax++] = *p; ! 354: } ! 355: } ! 356: } ! 357: break; ! 358: case NCCL: ! 359: for (k=1; k<NCHARS; k++) { ! 360: if (k != HAT && !member(k, (char *) right(cp))) { ! 361: if (isyms[k] != 1) { ! 362: isyms[k] = 1; ! 363: ssyms[ssmax++] = k; ! 364: } ! 365: } ! 366: } ! 367: } ! 368: } ! 369: ssinit = ssmax; ! 370: n = 0; ! 371: for (s=0; s<=n; s++) { ! 372: dprintf("s = %d\n", s, NULL, NULL); ! 373: ind = 0; ! 374: numtrans = 0; ! 375: finflg = 0; ! 376: if (*(state[s] + *state[s]) == line) { /* s final? */ ! 377: finflg = 1; ! 378: goto tenter; ! 379: } ! 380: spmax = spinit; ! 381: ssmax = ssinit; ! 382: ptr = state[s]; ! 383: num = *ptr; ! 384: for (i=0; i<num; i++) { ! 385: curpos = *(++ptr); ! 386: if (iposns[curpos] != 1 && index[curpos] != 1) { ! 387: index[curpos] = 1; ! 388: sposns[spmax++] = curpos; ! 389: } ! 390: cp = point[curpos]; ! 391: switch (type(cp)) { ! 392: case CHAR: ! 393: k = (int) right(cp); ! 394: if (isyms[k] == 0 && symbol[k] == 0) { ! 395: symbol[k] = 1; ! 396: ssyms[ssmax++] = k; ! 397: } ! 398: break; ! 399: case DOT: ! 400: for (k=1; k<NCHARS; k++) { ! 401: if (k != HAT) { ! 402: if (isyms[k] == 0 && symbol[k] == 0) { ! 403: symbol[k] = 1; ! 404: ssyms[ssmax++] = k; ! 405: } ! 406: } ! 407: } ! 408: break; ! 409: case CCL: ! 410: for (p = (char *) right(cp); *p; p++) { ! 411: if (*p != HAT) { ! 412: if (isyms[*p] == 0 && symbol[*p] == 0) { ! 413: symbol[*p] = 1; ! 414: ssyms[ssmax++] = *p; ! 415: } ! 416: } ! 417: } ! 418: break; ! 419: case NCCL: ! 420: for (k=1; k<NCHARS; k++) { ! 421: if (k != HAT && !member(k, (char *) right(cp))) { ! 422: if (isyms[k] == 0 && symbol[k] == 0) { ! 423: symbol[k] = 1; ! 424: ssyms[ssmax++] = k; ! 425: } ! 426: } ! 427: } ! 428: } ! 429: } ! 430: for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */ ! 431: c = ssyms[j]; ! 432: symbol[c] = 0; ! 433: setcnt = 0; ! 434: for (k=0; k<=line; k++) setvec[k] = 0; ! 435: for (i=0; i<spmax; i++) { ! 436: index[sposns[i]] = 0; ! 437: cp = point[sposns[i]]; ! 438: if ((k = type(cp)) != FINAL) ! 439: if (k == CHAR && c == (int) right(cp) ! 440: || k == DOT ! 441: || k == CCL && member(c, (char *) right(cp)) ! 442: || k == NCCL && !member(c, (char *) right(cp))) { ! 443: ptr = foll[sposns[i]]; ! 444: num = *ptr; ! 445: for (k=0; k<num; k++) { ! 446: if (setvec[*(++ptr)] != 1 ! 447: && iposns[*ptr] != 1) { ! 448: setvec[*ptr] = 1; ! 449: setcnt++; ! 450: } ! 451: } ! 452: } ! 453: } /* end nextstate */ ! 454: if (notin(state, n, &prev)) { ! 455: if (n >= NSTATES) { ! 456: dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL); ! 457: overflo(); ! 458: } ! 459: state[++n] = add(setcnt); ! 460: dprintf(" delta(%d,%o) = %d", s,c,n); ! 461: dprintf(", ind = %d\n", ind+1, NULL, NULL); ! 462: fatab[++ind] = c; ! 463: fatab[++ind] = n; ! 464: numtrans++; ! 465: } ! 466: else { ! 467: if (prev != 0) { ! 468: dprintf(" delta(%d,%o) = %d", s,c,prev); ! 469: dprintf(", ind = %d\n", ind+1, NULL, NULL); ! 470: fatab[++ind] = c; ! 471: fatab[++ind] = prev; ! 472: numtrans++; ! 473: } ! 474: } ! 475: } ! 476: tenter: ! 477: if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL) ! 478: overflo(); ! 479: where[s] = pfa; ! 480: if (finflg) ! 481: pfa->cch = -1; /* s is a final state */ ! 482: else ! 483: pfa->cch = numtrans; ! 484: pfa->st = 0; ! 485: for (i=1, pfa += 1; i<=numtrans; i++, pfa++) { ! 486: pfa->cch = fatab[2*i-1]; ! 487: pfa->st = (struct fa *) fatab[2*i]; ! 488: } ! 489: } ! 490: for (i=0; i<=n; i++) { ! 491: xfree(state[i]); /* free state[i] */ ! 492: pfa = where[i]; ! 493: pfa->st = where[0]; ! 494: dprintf("state %d: (%o)\n", i, pfa, NULL); ! 495: dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL); ! 496: for (k=1; k<=pfa->cch; k++) { ! 497: (pfa+k)->st = where[ (int) (pfa+k)->st]; ! 498: dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL); ! 499: } ! 500: } ! 501: pfa = where[0]; ! 502: if ((num = pfa->cch) < 0) ! 503: return(where[0]); ! 504: for (pfa += num; num; num--, pfa--) ! 505: if (pfa->cch == HAT) { ! 506: return(pfa->st); ! 507: } ! 508: return(where[0]); ! 509: } ! 510: ! 511: match(pfa, p) ! 512: register struct fa *pfa; ! 513: register char *p; ! 514: { ! 515: register count; ! 516: char c; ! 517: if (p == 0) return(0); ! 518: if (pfa->cch == 1) { /* fast test for first character, if possible */ ! 519: c = (++pfa)->cch; ! 520: do ! 521: if (c == *p) { ! 522: p++; ! 523: pfa = pfa->st; ! 524: goto adv; ! 525: } ! 526: while (*p++ != 0); ! 527: return(0); ! 528: } ! 529: adv: if ((count = pfa->cch) < 0) return(1); ! 530: do { ! 531: for (pfa += count; count; count--, pfa--) ! 532: if (pfa->cch == *p) { ! 533: break; ! 534: } ! 535: pfa = pfa->st; ! 536: if ((count = pfa->cch) < 0) return(1); ! 537: } while(*p++ != 0); ! 538: return(0); ! 539: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.