|
|
1.1 ! root 1: /* ! 2: * Copyright (c) 1989 Regents of the University of California. ! 3: * All rights reserved. ! 4: * ! 5: * This code is derived from software contributed to Berkeley by ! 6: * Edward Wang at The University of California, Berkeley. ! 7: * ! 8: * Redistribution and use in source and binary forms are permitted provided ! 9: * that: (1) source distributions retain this entire copyright notice and ! 10: * comment, and (2) distributions including binaries display the following ! 11: * acknowledgement: ``This product includes software developed by the ! 12: * University of California, Berkeley and its contributors'' in the ! 13: * documentation or other materials provided with the distribution and in ! 14: * all advertising materials mentioning features or use of this software. ! 15: * Neither the name of the University nor the names of its contributors may ! 16: * be used to endorse or promote products derived from this software without ! 17: * specific prior written permission. ! 18: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED ! 19: * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF ! 20: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. ! 21: */ ! 22: ! 23: #ifndef lint ! 24: static char sccsid[] = "@(#)compress.c 3.5 (Berkeley) 6/6/90"; ! 25: #endif /* not lint */ ! 26: ! 27: #include "ww.h" ! 28: #include "tt.h" ! 29: ! 30: /* special */ ! 31: #include <stdio.h> ! 32: int cc_trace = 0; ! 33: FILE *cc_trace_fp; ! 34: ! 35: /* tunable parameters */ ! 36: ! 37: int cc_reverse = 1; ! 38: int cc_sort = 0; ! 39: int cc_chop = 0; ! 40: ! 41: int cc_token_max = 8; /* <= TOKEN_MAX */ ! 42: int cc_token_min = 2; /* > tt.tt_put_token_cost */ ! 43: int cc_npass0 = 1; ! 44: int cc_npass1 = 1; ! 45: ! 46: int cc_bufsize = 1024 * 3; /* XXX, or 80 * 24 * 2 */ ! 47: ! 48: int cc_ntoken = 8192; ! 49: ! 50: #define cc_weight XXX ! 51: #ifndef cc_weight ! 52: int cc_weight = 0; ! 53: #endif ! 54: ! 55: #define TOKEN_MAX 16 ! 56: ! 57: struct cc { ! 58: char string[TOKEN_MAX]; ! 59: char length; ! 60: char flag; ! 61: #ifndef cc_weight ! 62: short weight; ! 63: #endif ! 64: long time; /* time last seen */ ! 65: short bcount; /* count in this buffer */ ! 66: short ccount; /* count in compression */ ! 67: short places; /* places in the buffer */ ! 68: short code; /* token code */ ! 69: struct cc *qforw, *qback; ! 70: struct cc *hforw, **hback; ! 71: }; ! 72: ! 73: short cc_thresholds[TOKEN_MAX + 1]; ! 74: #define thresh(length) (cc_thresholds[length]) ! 75: #define threshp(code, count, length) \ ! 76: ((code) >= 0 || (short) (count) >= cc_thresholds[length]) ! 77: ! 78: #ifndef cc_weight ! 79: short cc_wthresholds[TOKEN_MAX + 1]; ! 80: #define wthresh(length) (cc_wthresholds[length]) ! 81: #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length]) ! 82: #else ! 83: #define wthreshp(weight, length) (0) ! 84: #endif ! 85: ! 86: #ifndef cc_weight ! 87: short cc_wlimits[TOKEN_MAX + 1]; ! 88: #define wlimit(length) (cc_wlimits[length]) ! 89: #endif ! 90: ! 91: #define put_token_score(length) ((length) - tt.tt_put_token_cost) ! 92: ! 93: int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */ ! 94: #define score_adjust(score, p) \ ! 95: do { \ ! 96: int length = (p)->length; \ ! 97: int ccount = (p)->ccount; \ ! 98: if (threshp((p)->code, ccount, length) || \ ! 99: wthreshp((p)->weight, length)) /* XXX */ \ ! 100: (score) -= length - tt.tt_put_token_cost; \ ! 101: else \ ! 102: (score) += cc_score_adjustments[length][ccount]; \ ! 103: } while (0) ! 104: ! 105: int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */ ! 106: ! 107: struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b; ! 108: ! 109: #define qinsert(p1, p2) \ ! 110: do { \ ! 111: register struct cc *forw = (p1)->qforw; \ ! 112: register struct cc *back = (p1)->qback; \ ! 113: back->qforw = forw; \ ! 114: forw->qback = back; \ ! 115: forw = (p2)->qforw; \ ! 116: (p1)->qforw = forw; \ ! 117: forw->qback = (p1); \ ! 118: (p2)->qforw = (p1); \ ! 119: (p1)->qback = (p2); \ ! 120: } while (0) ! 121: ! 122: #define qinsertq(q, p) \ ! 123: ((q)->qforw == (q) ? 0 : \ ! 124: ((q)->qback->qforw = (p)->qforw, \ ! 125: (p)->qforw->qback = (q)->qback, \ ! 126: (q)->qforw->qback = (p), \ ! 127: (p)->qforw = (q)->qforw, \ ! 128: (q)->qforw = (q), \ ! 129: (q)->qback = (q))) ! 130: ! 131: #define H (14) ! 132: #define HSIZE (1 << H) ! 133: #define hash(h, c) ((((h) >> H - 8 | (h) << 8) ^ (c)) & HSIZE - 1) ! 134: ! 135: char *cc_buffer; ! 136: struct cc **cc_output; /* the output array */ ! 137: short *cc_places[TOKEN_MAX + 1]; ! 138: short *cc_hashcodes; /* for computing hashcodes */ ! 139: struct cc **cc_htab; /* the hash table */ ! 140: struct cc **cc_tokens; /* holds all the active tokens */ ! 141: struct cc_undo { ! 142: struct cc **pos; ! 143: struct cc *val; ! 144: } *cc_undo; ! 145: ! 146: long cc_time, cc_time0; ! 147: ! 148: char *cc_tt_ob, *cc_tt_obe; ! 149: ! 150: ccinit() ! 151: { ! 152: register i, j; ! 153: register struct cc *p; ! 154: ! 155: if (tt.tt_token_max > cc_token_max) ! 156: tt.tt_token_max = cc_token_max; ! 157: if (tt.tt_token_min < cc_token_min) ! 158: tt.tt_token_min = cc_token_min; ! 159: if (tt.tt_token_min > tt.tt_token_max) { ! 160: tt.tt_ntoken = 0; ! 161: return 0; ! 162: } ! 163: if (tt.tt_ntoken > cc_ntoken / 2) /* not likely */ ! 164: tt.tt_ntoken = cc_ntoken / 2; ! 165: #define C(x) (sizeof (x) / sizeof *(x)) ! 166: for (i = 0; i < C(cc_thresholds); i++) { ! 167: int h = i - tt.tt_put_token_cost; ! 168: if (h > 0) ! 169: cc_thresholds[i] = ! 170: (tt.tt_set_token_cost + 1 + h - 1) / h + 1; ! 171: else ! 172: cc_thresholds[i] = 0; ! 173: } ! 174: for (i = 0; i < C(cc_score_adjustments); i++) { ! 175: int t = cc_thresholds[i]; ! 176: for (j = 0; j < C(*cc_score_adjustments); j++) { ! 177: if (j >= t) ! 178: cc_score_adjustments[i][j] = ! 179: - (i - tt.tt_put_token_cost); ! 180: else if (j < t - 1) ! 181: cc_score_adjustments[i][j] = 0; ! 182: else ! 183: /* ! 184: * cost now is ! 185: * length * (ccount + 1) a ! 186: * cost before was ! 187: * set-token-cost + length + ! 188: * ccount * put-token-cost b ! 189: * the score adjustment is (b - a) ! 190: */ ! 191: cc_score_adjustments[i][j] = ! 192: tt.tt_set_token_cost + i + ! 193: j * tt.tt_put_token_cost - ! 194: i * (j + 1); ! 195: if (j >= t) ! 196: cc_initial_scores[i][j] = 0; ! 197: else ! 198: /* ! 199: * - (set-token-cost + ! 200: * (length - put-token-cost) - ! 201: * (length - put-token-cost) * ccount) ! 202: */ ! 203: cc_initial_scores[i][j] = ! 204: - (tt.tt_set_token_cost + ! 205: (i - tt.tt_put_token_cost) - ! 206: (i - tt.tt_put_token_cost) * j); ! 207: } ! 208: } ! 209: #ifndef cc_weight ! 210: for (i = 1; i < C(cc_wthresholds); i++) { ! 211: cc_wthresholds[i] = ! 212: ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i + ! 213: i / 5 + 1) * ! 214: cc_weight + 1; ! 215: cc_wlimits[i] = cc_wthresholds[i] + cc_weight; ! 216: } ! 217: #endif ! 218: #undef C ! 219: if ((cc_output = (struct cc **) ! 220: malloc((unsigned) cc_bufsize * sizeof *cc_output)) == 0) ! 221: goto nomem; ! 222: if ((cc_hashcodes = (short *) ! 223: malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == 0) ! 224: goto nomem; ! 225: if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0) ! 226: goto nomem; ! 227: if ((cc_tokens = (struct cc **) ! 228: malloc((unsigned) ! 229: (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) * ! 230: sizeof *cc_tokens)) == 0) ! 231: goto nomem; ! 232: if ((cc_undo = (struct cc_undo *) ! 233: malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == 0) ! 234: goto nomem; ! 235: for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) ! 236: if ((cc_places[i] = (short *) ! 237: malloc((unsigned) cc_bufsize * sizeof **cc_places)) == 0) ! 238: goto nomem; ! 239: cc_q0a.qforw = cc_q0a.qback = &cc_q0a; ! 240: cc_q0b.qforw = cc_q0b.qback = &cc_q0b; ! 241: cc_q1a.qforw = cc_q1a.qback = &cc_q1a; ! 242: cc_q1b.qforw = cc_q1b.qback = &cc_q1b; ! 243: if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == 0) ! 244: goto nomem; ! 245: for (i = 0; i < tt.tt_ntoken; i++) { ! 246: p->code = i; ! 247: p->time = -1; ! 248: p->qback = cc_q0a.qback; ! 249: p->qforw = &cc_q0a; ! 250: p->qback->qforw = p; ! 251: cc_q0a.qback = p; ! 252: p++; ! 253: } ! 254: for (; i < cc_ntoken; i++) { ! 255: p->code = -1; ! 256: p->time = -1; ! 257: p->qback = cc_q1a.qback; ! 258: p->qforw = &cc_q1a; ! 259: p->qback->qforw = p; ! 260: cc_q1a.qback = p; ! 261: p++; ! 262: } ! 263: cc_tt_ob = tt_ob; ! 264: cc_tt_obe = tt_obe; ! 265: if ((cc_buffer = malloc((unsigned) cc_bufsize)) == 0) ! 266: goto nomem; ! 267: return 0; ! 268: nomem: ! 269: wwerrno = WWE_NOMEM; ! 270: return -1; ! 271: } ! 272: ! 273: ccstart() ! 274: { ! 275: register struct cc *p; ! 276: int ccflush(); ! 277: ! 278: (*tt.tt_flush)(); ! 279: tt_obp = tt_ob = cc_buffer; ! 280: tt_obe = tt_ob + cc_bufsize; ! 281: tt.tt_flush = ccflush; ! 282: bzero((char *) cc_htab, HSIZE * sizeof *cc_htab); ! 283: for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw) ! 284: p->hback = 0; ! 285: for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw) ! 286: p->hback = 0; ! 287: if (cc_trace) ! 288: cc_trace_fp = fopen("window-trace", "a"); ! 289: } ! 290: ! 291: ccend() ! 292: { ! 293: int ttflush(); ! 294: ! 295: (*tt.tt_flush)(); ! 296: tt_obp = tt_ob = cc_tt_ob; ! 297: tt_obe = cc_tt_obe; ! 298: tt.tt_flush = ttflush; ! 299: if (cc_trace_fp != NULL) { ! 300: (void) fclose(cc_trace_fp); ! 301: cc_trace_fp = NULL; ! 302: } ! 303: } ! 304: ! 305: ccflush() ! 306: { ! 307: int bufsize = tt_obp - tt_ob; ! 308: int n; ! 309: int ttflush(); ! 310: ! 311: if (tt_ob != cc_buffer) ! 312: abort(); ! 313: if (cc_trace_fp != NULL) { ! 314: (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp); ! 315: putc(-1, cc_trace_fp); ! 316: } ! 317: if (bufsize < tt.tt_token_min) { ! 318: ttflush(); ! 319: return; ! 320: } ! 321: tt_obp = tt_ob = cc_tt_ob; ! 322: tt_obe = cc_tt_obe; ! 323: tt.tt_flush = ttflush; ! 324: cc_time0 = cc_time; ! 325: cc_time += bufsize; ! 326: n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens); ! 327: cc_compress_phase(cc_output, bufsize, cc_tokens, n); ! 328: cc_output_phase(cc_buffer, cc_output, bufsize); ! 329: ttflush(); ! 330: tt_obp = tt_ob = cc_buffer; ! 331: tt_obe = cc_buffer + cc_bufsize; ! 332: tt.tt_flush = ccflush; ! 333: } ! 334: ! 335: cc_sweep_phase(buffer, bufsize, tokens) ! 336: char *buffer; ! 337: struct cc **tokens; ! 338: { ! 339: register struct cc **pp = tokens; ! 340: register i, n; ! 341: #ifdef STATS ! 342: int nn, ii; ! 343: #endif ! 344: ! 345: #ifdef STATS ! 346: if (verbose >= 0) ! 347: time_begin(); ! 348: if (verbose > 0) ! 349: printf("Sweep:"); ! 350: #endif ! 351: cc_sweep0(buffer, bufsize, tt.tt_token_min - 1); ! 352: #ifdef STATS ! 353: ntoken_stat = 0; ! 354: nn = 0; ! 355: ii = 0; ! 356: #endif ! 357: for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) { ! 358: #ifdef STATS ! 359: if (verbose > 0) { ! 360: if (ii > 7) { ! 361: printf("\n "); ! 362: ii = 0; ! 363: } ! 364: ii++; ! 365: printf(" (%d", i); ! 366: (void) fflush(stdout); ! 367: } ! 368: #endif ! 369: n = cc_sweep(buffer, bufsize, pp, i); ! 370: pp += n; ! 371: #ifdef STATS ! 372: if (verbose > 0) { ! 373: if (--n > 0) { ! 374: printf(" %d", n); ! 375: nn += n; ! 376: } ! 377: putchar(')'); ! 378: } ! 379: #endif ! 380: } ! 381: qinsertq(&cc_q1b, &cc_q1a); ! 382: #ifdef STATS ! 383: if (verbose > 0) ! 384: printf("\n %d tokens, %d candidates\n", ! 385: ntoken_stat, nn); ! 386: if (verbose >= 0) ! 387: time_end(); ! 388: #endif ! 389: return pp - tokens; ! 390: } ! 391: ! 392: cc_sweep0(buffer, n, length) ! 393: char *buffer; ! 394: { ! 395: register char *p; ! 396: register short *hc; ! 397: register i; ! 398: register short c; ! 399: register short pc = tt.tt_padc; ! 400: ! 401: /* n and length are at least 1 */ ! 402: p = buffer++; ! 403: hc = cc_hashcodes; ! 404: i = n; ! 405: do { ! 406: if ((*hc++ = *p++) == pc) ! 407: hc[-1] = -1; ! 408: } while (--i); ! 409: while (--length) { ! 410: p = buffer++; ! 411: hc = cc_hashcodes; ! 412: for (i = n--; --i;) { ! 413: if ((c = *p++) == pc || *hc < 0) ! 414: c = -1; ! 415: else ! 416: c = hash(*hc, c); ! 417: *hc++ = c; ! 418: } ! 419: } ! 420: } ! 421: ! 422: cc_sweep(buffer, bufsize, tokens, length) ! 423: char *buffer; ! 424: struct cc **tokens; ! 425: register length; ! 426: { ! 427: register struct cc *p; ! 428: register char *cp; ! 429: register i; ! 430: short *hc; ! 431: short *places = cc_places[length]; ! 432: struct cc **pp = tokens; ! 433: short threshold = thresh(length); ! 434: #ifndef cc_weight ! 435: short wthreshold = wthresh(length); ! 436: short limit = wlimit(length); ! 437: #endif ! 438: int time; ! 439: short pc = tt.tt_padc; ! 440: ! 441: i = length - 1; ! 442: bufsize -= i; ! 443: cp = buffer + i; ! 444: hc = cc_hashcodes; ! 445: time = cc_time0; ! 446: for (i = 0; i < bufsize; i++, time++) { ! 447: struct cc **h; ! 448: ! 449: { ! 450: register short *hc1 = hc; ! 451: register short c = *cp++; ! 452: register short hh; ! 453: if ((hh = *hc1) < 0 || c == pc) { ! 454: *hc1++ = -1; ! 455: hc = hc1; ! 456: continue; ! 457: } ! 458: h = cc_htab + (*hc1++ = hash(hh, c)); ! 459: hc = hc1; ! 460: } ! 461: for (p = *h; p != 0; p = p->hforw) ! 462: if (p->length == (char) length) { ! 463: register char *p1 = p->string; ! 464: register char *p2 = cp - length; ! 465: register n = length; ! 466: do ! 467: if (*p1++ != *p2++) ! 468: goto fail; ! 469: while (--n); ! 470: break; ! 471: fail:; ! 472: } ! 473: if (p == 0) { ! 474: p = cc_q1a.qback; ! 475: if (p == &cc_q1a || ! 476: p->time >= cc_time0 && p->length == (char) length) ! 477: continue; ! 478: if (p->hback != 0) ! 479: if ((*p->hback = p->hforw) != 0) ! 480: p->hforw->hback = p->hback; ! 481: { ! 482: register char *p1 = p->string; ! 483: register char *p2 = cp - length; ! 484: register n = length; ! 485: do ! 486: *p1++ = *p2++; ! 487: while (--n); ! 488: } ! 489: p->length = length; ! 490: #ifndef cc_weight ! 491: p->weight = cc_weight; ! 492: #endif ! 493: p->time = time; ! 494: p->bcount = 1; ! 495: p->ccount = 0; ! 496: p->flag = 0; ! 497: if ((p->hforw = *h) != 0) ! 498: p->hforw->hback = &p->hforw; ! 499: *h = p; ! 500: p->hback = h; ! 501: qinsert(p, &cc_q1a); ! 502: places[i] = -1; ! 503: p->places = i; ! 504: #ifdef STATS ! 505: ntoken_stat++; ! 506: #endif ! 507: } else if (p->time < cc_time0) { ! 508: #ifndef cc_weight ! 509: if ((p->weight += p->time - time) < 0) ! 510: p->weight = cc_weight; ! 511: else if ((p->weight += cc_weight) > limit) ! 512: p->weight = limit; ! 513: #endif ! 514: p->time = time; ! 515: p->bcount = 1; ! 516: p->ccount = 0; ! 517: if (p->code >= 0) { ! 518: p->flag = 1; ! 519: *pp++ = p; ! 520: } else ! 521: #ifndef cc_weight ! 522: if (p->weight >= wthreshold) { ! 523: p->flag = 1; ! 524: *pp++ = p; ! 525: qinsert(p, &cc_q1b); ! 526: } else ! 527: #endif ! 528: { ! 529: p->flag = 0; ! 530: qinsert(p, &cc_q1a); ! 531: } ! 532: places[i] = -1; ! 533: p->places = i; ! 534: #ifdef STATS ! 535: ntoken_stat++; ! 536: #endif ! 537: } else if (p->time + length > time) { ! 538: /* ! 539: * overlapping token, don't count as two and ! 540: * don't update time, but do adjust weight to offset ! 541: * the difference ! 542: */ ! 543: #ifndef cc_weight ! 544: if (cc_weight != 0) { /* XXX */ ! 545: p->weight += time - p->time; ! 546: if (!p->flag && p->weight >= wthreshold) { ! 547: p->flag = 1; ! 548: *pp++ = p; ! 549: qinsert(p, &cc_q1b); ! 550: } ! 551: } ! 552: #endif ! 553: places[i] = p->places; ! 554: p->places = i; ! 555: } else { ! 556: #ifndef cc_weight ! 557: if ((p->weight += p->time - time) < 0) ! 558: p->weight = cc_weight; ! 559: else if ((p->weight += cc_weight) > limit) ! 560: p->weight = limit; ! 561: #endif ! 562: p->time = time; ! 563: p->bcount++; ! 564: if (!p->flag && ! 565: /* code must be < 0 if flag false here */ ! 566: (p->bcount >= threshold ! 567: #ifndef cc_weight ! 568: || p->weight >= wthreshold ! 569: #endif ! 570: )) { ! 571: p->flag = 1; ! 572: *pp++ = p; ! 573: qinsert(p, &cc_q1b); ! 574: } ! 575: places[i] = p->places; ! 576: p->places = i; ! 577: } ! 578: } ! 579: if ((i = pp - tokens) > 0) { ! 580: *pp = 0; ! 581: if (cc_reverse) ! 582: cc_sweep_reverse(tokens, places); ! 583: if (cc_sort && i > 1) { ! 584: int cc_token_compare(); ! 585: qsort((char *) tokens, i, sizeof *tokens, ! 586: cc_token_compare); ! 587: } ! 588: if (cc_chop) { ! 589: if ((i = i * cc_chop / 100) == 0) ! 590: i = 1; ! 591: tokens[i] = 0; ! 592: } ! 593: i++; ! 594: } ! 595: return i; ! 596: } ! 597: ! 598: cc_sweep_reverse(pp, places) ! 599: register struct cc **pp; ! 600: register short *places; ! 601: { ! 602: register struct cc *p; ! 603: register short front, back, t; ! 604: ! 605: while ((p = *pp++) != 0) { ! 606: back = -1; ! 607: t = p->places; ! 608: /* the list is never empty */ ! 609: do { ! 610: front = places[t]; ! 611: places[t] = back; ! 612: back = t; ! 613: } while ((t = front) >= 0); ! 614: p->places = back; ! 615: } ! 616: } ! 617: ! 618: cc_compress_phase(output, bufsize, tokens, ntoken) ! 619: struct cc **output; ! 620: struct cc **tokens; ! 621: { ! 622: register i; ! 623: ! 624: bzero((char *) output, bufsize * sizeof *output); ! 625: for (i = 0; i < cc_npass0; i++) ! 626: cc_compress_phase1(output, tokens, ntoken, 0); ! 627: for (i = 0; i < cc_npass1; i++) ! 628: cc_compress_phase1(output, tokens, ntoken, 1); ! 629: cc_compress_cleanup(output, bufsize); ! 630: } ! 631: ! 632: cc_compress_phase1(output, tokens, ntoken, flag) ! 633: register struct cc **output; ! 634: struct cc **tokens; ! 635: { ! 636: register int i = 0; ! 637: register struct cc **pp; ! 638: #ifdef STATS ! 639: int nt = 0, cc = 0, nc = 0; ! 640: #endif ! 641: ! 642: #ifdef STATS ! 643: if (verbose >= 0) ! 644: time_begin(); ! 645: if (verbose > 0) ! 646: printf("Compress:"); ! 647: #endif ! 648: pp = tokens; ! 649: while (pp < tokens + ntoken) { ! 650: #ifdef STATS ! 651: if (verbose > 0) { ! 652: ntoken_stat = 0; ! 653: ccount_stat = 0; ! 654: ncover_stat = 0; ! 655: if (i > 2) { ! 656: printf("\n "); ! 657: i = 0; ! 658: } ! 659: i++; ! 660: printf(" (%d", (*pp)->length); ! 661: (void) fflush(stdout); ! 662: } ! 663: #endif ! 664: pp += cc_compress(output, pp, flag); ! 665: #ifdef STATS ! 666: if (verbose > 0) { ! 667: printf(" %dt %du %dc)", ntoken_stat, ccount_stat, ! 668: ncover_stat); ! 669: nt += ntoken_stat; ! 670: cc += ccount_stat; ! 671: nc += ncover_stat; ! 672: } ! 673: #endif ! 674: } ! 675: #ifdef STATS ! 676: if (verbose > 0) ! 677: printf("\n total: (%dt %du %dc)\n", nt, cc, nc); ! 678: if (verbose >= 0) ! 679: time_end(); ! 680: #endif ! 681: } ! 682: ! 683: cc_compress_cleanup(output, bufsize) ! 684: register struct cc **output; ! 685: { ! 686: register struct cc **end; ! 687: ! 688: /* the previous output phase may have been interrupted */ ! 689: qinsertq(&cc_q0b, &cc_q0a); ! 690: for (end = output + bufsize; output < end;) { ! 691: register struct cc *p; ! 692: register length; ! 693: if ((p = *output) == 0) { ! 694: output++; ! 695: continue; ! 696: } ! 697: length = p->length; ! 698: if (!p->flag) { ! 699: } else if (p->code >= 0) { ! 700: qinsert(p, &cc_q0b); ! 701: p->flag = 0; ! 702: } else if (p->ccount == 0) { ! 703: *output = 0; ! 704: } else if (p->ccount >= thresh(length) ! 705: #ifndef cc_weight ! 706: || wthreshp(p->weight, length) ! 707: #endif ! 708: ) { ! 709: p->flag = 0; ! 710: } else { ! 711: p->ccount = 0; ! 712: *output = 0; ! 713: } ! 714: output += length; ! 715: } ! 716: } ! 717: ! 718: cc_compress(output, tokens, flag) ! 719: struct cc **output; ! 720: struct cc **tokens; ! 721: char flag; ! 722: { ! 723: struct cc **pp = tokens; ! 724: register struct cc *p = *pp++; ! 725: int length = p->length; ! 726: int threshold = thresh(length); ! 727: #ifndef cc_weight ! 728: short wthreshold = wthresh(length); ! 729: #endif ! 730: short *places = cc_places[length]; ! 731: int *initial_scores = cc_initial_scores[length]; ! 732: int initial_score0 = put_token_score(length); ! 733: ! 734: do { ! 735: int score; ! 736: register struct cc_undo *undop; ! 737: int ccount; ! 738: #ifdef STATS ! 739: int ncover; ! 740: #endif ! 741: int i; ! 742: ! 743: ccount = p->ccount; ! 744: if ((short) ccount >= p->bcount) ! 745: continue; ! 746: if (p->code >= 0 || ccount >= threshold) ! 747: score = 0; ! 748: #ifndef cc_weight ! 749: else if (p->weight >= wthreshold) ! 750: /* allow one fewer match than normal */ ! 751: /* XXX, should adjust for ccount */ ! 752: score = - tt.tt_set_token_cost; ! 753: #endif ! 754: else ! 755: score = initial_scores[ccount]; ! 756: undop = cc_undo; ! 757: #ifdef STATS ! 758: ncover = 0; ! 759: #endif ! 760: for (i = p->places; i >= 0; i = places[i]) { ! 761: register struct cc **jp; ! 762: register struct cc *x; ! 763: register struct cc **ip = output + i; ! 764: register score0 = initial_score0; ! 765: struct cc **iip = ip + length; ! 766: struct cc_undo *undop1 = undop; ! 767: ! 768: if ((x = *(jp = ip)) != 0) ! 769: goto z; ! 770: while (--jp >= output) ! 771: if ((x = *jp) != 0) { ! 772: if (jp + x->length > ip) ! 773: goto z; ! 774: break; ! 775: } ! 776: jp = ip + 1; ! 777: while (jp < iip) { ! 778: if ((x = *jp) == 0) { ! 779: jp++; ! 780: continue; ! 781: } ! 782: z: ! 783: if (x == p) ! 784: goto undo; ! 785: #ifdef STATS ! 786: ncover++; ! 787: #endif ! 788: undop->pos = jp; ! 789: undop->val = x; ! 790: undop++; ! 791: *jp = 0; ! 792: x->ccount--; ! 793: score_adjust(score0, x); ! 794: if (score0 < 0 && flag) ! 795: goto undo; ! 796: jp += x->length; ! 797: } ! 798: undop->pos = ip; ! 799: undop->val = 0; ! 800: undop++; ! 801: *ip = p; ! 802: ccount++; ! 803: score += score0; ! 804: continue; ! 805: undo: ! 806: while (--undop >= undop1) ! 807: if (*undop->pos = x = undop->val) ! 808: x->ccount++; ! 809: undop++; ! 810: } ! 811: if (score > 0) { ! 812: #ifdef STATS ! 813: ccount_stat += ccount - p->ccount; ! 814: ntoken_stat++; ! 815: ncover_stat += ncover; ! 816: #endif ! 817: p->ccount = ccount; ! 818: } else { ! 819: register struct cc_undo *u = cc_undo; ! 820: while (--undop >= u) { ! 821: register struct cc *x; ! 822: if (*undop->pos = x = undop->val) ! 823: x->ccount++; ! 824: } ! 825: } ! 826: } while ((p = *pp++) != 0); ! 827: return pp - tokens; ! 828: } ! 829: ! 830: cc_output_phase(buffer, output, bufsize) ! 831: register char *buffer; ! 832: register struct cc **output; ! 833: register bufsize; ! 834: { ! 835: register i; ! 836: register struct cc *p, *p1; ! 837: ! 838: for (i = 0; i < bufsize;) { ! 839: if ((p = output[i]) == 0) { ! 840: ttputc(buffer[i]); ! 841: i++; ! 842: } else if (p->code >= 0) { ! 843: if (--p->ccount == 0) ! 844: qinsert(p, &cc_q0a); ! 845: (*tt.tt_put_token)(p->code, p->string, p->length); ! 846: wwntokuse++; ! 847: wwntoksave += put_token_score(p->length); ! 848: i += p->length; ! 849: } else if ((p1 = cc_q0a.qback) != &cc_q0a) { ! 850: p->code = p1->code; ! 851: p1->code = -1; ! 852: qinsert(p1, &cc_q1a); ! 853: if (--p->ccount == 0) ! 854: qinsert(p, &cc_q0a); ! 855: else ! 856: qinsert(p, &cc_q0b); ! 857: (*tt.tt_set_token)(p->code, p->string, p->length); ! 858: wwntokdef++; ! 859: wwntoksave -= tt.tt_set_token_cost; ! 860: i += p->length; ! 861: } else { ! 862: p->ccount--; ! 863: ttwrite(p->string, p->length); ! 864: wwntokbad++; ! 865: i += p->length; ! 866: } ! 867: } ! 868: wwntokc += bufsize; ! 869: } ! 870: ! 871: cc_token_compare(p1, p2) ! 872: struct cc **p1, **p2; ! 873: { ! 874: return (*p2)->bcount - (*p1)->bcount; ! 875: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.