|
|
1.1 ! root 1: #include <stdio.h> ! 2: #include <ctype.h> ! 3: #include <signal.h> ! 4: #include <sys/types.h> ! 5: #include <sys/stat.h> ! 6: ! 7: #define N 16 ! 8: #define C 20 ! 9: #define NF 10 ! 10: #define TREEZ 32 /* no less than N and best if power of 2 */ ! 11: ! 12: /* ! 13: * Memory administration ! 14: * ! 15: * Using a lot of memory is great when sorting a lot of data. ! 16: * Using a megabyte to sort the output of `who' loses big. ! 17: * MAXMEM, MINMEM and DEFMEM define the absolute maximum, ! 18: * minimum and default memory requirements. Administrators ! 19: * can override any or all of these via defines at compile time. ! 20: * Users can override the amount allocated (within the limits ! 21: * of MAXMEM and MINMEM) on the command line. ! 22: */ ! 23: ! 24: #ifndef MAXMEM ! 25: #define MAXMEM 1048576 /* Megabyte maximum */ ! 26: #endif ! 27: ! 28: #ifndef MINMEM ! 29: #define MINMEM 16384 /* 16K minimum */ ! 30: #endif ! 31: ! 32: #ifndef DEFMEM ! 33: #define DEFMEM 32768 /* Same as old sort */ ! 34: #endif ! 35: ! 36: enum { ASC, NUM, MON, FLOAT }; ! 37: ! 38: #define blank(c) ((c)==' ' || (c)=='\t') ! 39: ! 40: FILE *is, *os; ! 41: char *dirtry[] = {"/usr/tmp", "/tmp", NULL}; ! 42: char **dirs; ! 43: char file1[100]; ! 44: char *file = file1; ! 45: char *filep; ! 46: #define NAMEOHD 12 /* sizeof("/stm00000aa") */ ! 47: int nfiles; ! 48: int *lspace; ! 49: unsigned alloc, tryfor; ! 50: char bufin[BUFSIZ], bufout[BUFSIZ]; /* Use setbuf's to avoid malloc calls. ! 51: ** malloc seems to get heartburn ! 52: ** when brk returns storage. ! 53: */ ! 54: int maxrec; ! 55: int cmp(), cmpa(); ! 56: int (*compare)() = cmpa; ! 57: char *eol(); ! 58: int term(); ! 59: double atof(); ! 60: int mflg; ! 61: int nway; ! 62: int cflg; ! 63: int uflg; ! 64: char *outfil; ! 65: int unsafeout; /*kludge to assure -m -o works*/ ! 66: char tabchar; ! 67: int eargc; ! 68: char **eargv; ! 69: struct btree { ! 70: char *rp; ! 71: int rn; ! 72: } tree[TREEZ], *treep[TREEZ]; ! 73: int blkcnt[TREEZ]; ! 74: char **blkcur[TREEZ]; ! 75: long wasfirst = 0, notfirst = 0; ! 76: int bonus; ! 77: ! 78: char zero[256]; ! 79: ! 80: char fold[256] = { ! 81: 0200,0201,0202,0203,0204,0205,0206,0207, ! 82: 0210,0211,0212,0213,0214,0215,0216,0217, ! 83: 0220,0221,0222,0223,0224,0225,0226,0227, ! 84: 0230,0231,0232,0233,0234,0235,0236,0237, ! 85: 0240,0241,0242,0243,0244,0245,0246,0247, ! 86: 0250,0251,0252,0253,0254,0255,0256,0257, ! 87: 0260,0261,0262,0263,0264,0265,0266,0267, ! 88: 0270,0271,0272,0273,0274,0275,0276,0277, ! 89: 0300,0301,0302,0303,0304,0305,0306,0307, ! 90: 0310,0311,0312,0313,0314,0315,0316,0317, ! 91: 0320,0321,0322,0323,0324,0325,0326,0327, ! 92: 0330,0331,0332,0333,0334,0335,0336,0337, ! 93: 0340,0341,0342,0343,0344,0345,0346,0347, ! 94: 0350,0351,0352,0353,0354,0355,0356,0357, ! 95: 0360,0361,0362,0363,0364,0365,0366,0367, ! 96: 0370,0371,0372,0373,0374,0375,0376,0377, ! 97: 0000,0001,0002,0003,0004,0005,0006,0007, ! 98: 0010,0011,0012,0013,0014,0015,0016,0017, ! 99: 0020,0021,0022,0023,0024,0025,0026,0027, ! 100: 0030,0031,0032,0033,0034,0035,0036,0037, ! 101: 0040,0041,0042,0043,0044,0045,0046,0047, ! 102: 0050,0051,0052,0053,0054,0055,0056,0057, ! 103: 0060,0061,0062,0063,0064,0065,0066,0067, ! 104: 0070,0071,0072,0073,0074,0075,0076,0077, ! 105: 0100,0101,0102,0103,0104,0105,0106,0107, ! 106: 0110,0111,0112,0113,0114,0115,0116,0117, ! 107: 0120,0121,0122,0123,0124,0125,0126,0127, ! 108: 0130,0131,0132,0133,0134,0135,0136,0137, ! 109: 0140,0101,0102,0103,0104,0105,0106,0107, ! 110: 0110,0111,0112,0113,0114,0115,0116,0117, ! 111: 0120,0121,0122,0123,0124,0125,0126,0127, ! 112: 0130,0131,0132,0173,0174,0175,0176,0177 ! 113: }; ! 114: char nofold[256] = { ! 115: 0200,0201,0202,0203,0204,0205,0206,0207, ! 116: 0210,0211,0212,0213,0214,0215,0216,0217, ! 117: 0220,0221,0222,0223,0224,0225,0226,0227, ! 118: 0230,0231,0232,0233,0234,0235,0236,0237, ! 119: 0240,0241,0242,0243,0244,0245,0246,0247, ! 120: 0250,0251,0252,0253,0254,0255,0256,0257, ! 121: 0260,0261,0262,0263,0264,0265,0266,0267, ! 122: 0270,0271,0272,0273,0274,0275,0276,0277, ! 123: 0300,0301,0302,0303,0304,0305,0306,0307, ! 124: 0310,0311,0312,0313,0314,0315,0316,0317, ! 125: 0320,0321,0322,0323,0324,0325,0326,0327, ! 126: 0330,0331,0332,0333,0334,0335,0336,0337, ! 127: 0340,0341,0342,0343,0344,0345,0346,0347, ! 128: 0350,0351,0352,0353,0354,0355,0356,0357, ! 129: 0360,0361,0362,0363,0364,0365,0366,0367, ! 130: 0370,0371,0372,0373,0374,0375,0376,0377, ! 131: 0000,0001,0002,0003,0004,0005,0006,0007, ! 132: 0010,0011,0012,0013,0014,0015,0016,0017, ! 133: 0020,0021,0022,0023,0024,0025,0026,0027, ! 134: 0030,0031,0032,0033,0034,0035,0036,0037, ! 135: 0040,0041,0042,0043,0044,0045,0046,0047, ! 136: 0050,0051,0052,0053,0054,0055,0056,0057, ! 137: 0060,0061,0062,0063,0064,0065,0066,0067, ! 138: 0070,0071,0072,0073,0074,0075,0076,0077, ! 139: 0100,0101,0102,0103,0104,0105,0106,0107, ! 140: 0110,0111,0112,0113,0114,0115,0116,0117, ! 141: 0120,0121,0122,0123,0124,0125,0126,0127, ! 142: 0130,0131,0132,0133,0134,0135,0136,0137, ! 143: 0140,0141,0142,0143,0144,0145,0146,0147, ! 144: 0150,0151,0152,0153,0154,0155,0156,0157, ! 145: 0160,0161,0162,0163,0164,0165,0166,0167, ! 146: 0170,0171,0172,0173,0174,0175,0176,0177 ! 147: }; ! 148: ! 149: char nonprint[256] = { ! 150: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 151: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 152: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 153: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 154: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 155: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 156: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 157: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 158: 1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1, ! 159: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 160: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ! 161: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ! 162: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ! 163: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ! 164: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ! 165: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 ! 166: }; ! 167: ! 168: char dict[256] = { ! 169: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 170: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 171: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 172: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 173: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 174: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 175: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 176: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 177: 1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1, ! 178: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 179: 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 180: 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1, ! 181: 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ! 182: 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1, ! 183: 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ! 184: 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1 ! 185: }; ! 186: ! 187: struct field { ! 188: char *code; ! 189: char *ignore; ! 190: int fcmp; ! 191: int rflg; ! 192: int bflg[2]; ! 193: int m[2]; ! 194: int n[2]; ! 195: } fields[NF]; ! 196: struct field proto = { ! 197: nofold+128, ! 198: zero+128, ! 199: ASC, ! 200: 1, ! 201: 0,0, ! 202: 0,-1, ! 203: 0,0 ! 204: }; ! 205: int nfields; ! 206: int error = 1; ! 207: char *setfil(); ! 208: ! 209: main(argc, argv) ! 210: char **argv; ! 211: { ! 212: register a; ! 213: char *arg; ! 214: struct field *p, *q; ! 215: int i; ! 216: long incr; ! 217: char *sbrk(); ! 218: char *sp; ! 219: ! 220: copyproto(); ! 221: initree(); ! 222: eargv = argv; ! 223: tryfor = DEFMEM; ! 224: while (--argc > 0) { ! 225: if(**++argv == '-') for(arg = *argv;;) { ! 226: switch(*++arg) { ! 227: case '\0': ! 228: if(arg[-1] == '-') ! 229: eargv[eargc++] = "-"; ! 230: break; ! 231: ! 232: case 'o': ! 233: if(--argc > 0) ! 234: outfil = *++argv; ! 235: continue; ! 236: ! 237: case 'T': ! 238: if (--argc > 0) { ! 239: if ((strlen(*++argv) + NAMEOHD) > sizeof(file1)) { ! 240: diag("path name too long:", *argv); ! 241: exit(1); ! 242: } ! 243: else dirtry[0] = *argv; ! 244: } ! 245: continue; ! 246: ! 247: default: ! 248: field(++*argv,nfields>0); ! 249: break; ! 250: } ! 251: break; ! 252: } else if (**argv == '+') { ! 253: if(++nfields>=NF) { ! 254: diag("too many keys",""); ! 255: exit(1); ! 256: } ! 257: copyproto(); ! 258: field(++*argv,0); ! 259: } else ! 260: eargv[eargc++] = *argv; ! 261: } ! 262: q = &fields[0]; ! 263: for(a=1; a<=nfields; a++) { ! 264: p = &fields[a]; ! 265: if(p->code != proto.code) continue; ! 266: if(p->ignore != proto.ignore) continue; ! 267: if(p->fcmp != proto.fcmp) continue; ! 268: if(p->rflg != proto.rflg) continue; ! 269: if(p->bflg[0] != proto.bflg[0]) continue; ! 270: if(p->bflg[1] != proto.bflg[1]) continue; ! 271: p->code = q->code; ! 272: p->ignore = q->ignore; ! 273: p->fcmp = q->fcmp; ! 274: p->rflg = q->rflg; ! 275: p->bflg[0] = p->bflg[1] = q->bflg[0]; ! 276: } ! 277: if(eargc == 0) ! 278: eargv[eargc++] = "-"; ! 279: if(cflg && eargc>1) { ! 280: diag("can check only 1 file",""); ! 281: exit(1); ! 282: } ! 283: ! 284: safeoutfil(); ! 285: ! 286: sp = sbrk(0); ! 287: lspace = (int *) sp; ! 288: if (!mflg && !cflg) { ! 289: if (tryfor < MINMEM) tryfor = MINMEM; ! 290: else if (tryfor > MAXMEM) tryfor = MAXMEM; ! 291: for (incr=tryfor; (sp + incr) <= sp; incr >>= 1); ! 292: do { ! 293: if ((long)alloc+incr <= tryfor && brk(sp+incr) == 0) { ! 294: sp += incr; ! 295: alloc += incr; ! 296: } ! 297: } while ( ( incr >>= 1 ) >= 512L ); ! 298: if ( brk((char *) lspace + alloc) != 0) { ! 299: diag("allocation error before sort", ""); ! 300: exit(1); ! 301: } ! 302: } ! 303: ! 304: a = -1; ! 305: for(dirs=dirtry; *dirs; dirs++) { ! 306: (void) sprintf(filep=file1, "%s/stm%.5uaa", *dirs, getpid()); ! 307: while (*filep) ! 308: filep++; ! 309: filep -= 2; ! 310: if ( (a=creat(file, 0600)) >=0) ! 311: break; ! 312: } ! 313: if(a < 0) { ! 314: diag("can't locate temp",""); ! 315: exit(1); ! 316: } ! 317: for (i = 3; i <= 20; i++) ! 318: (void) close(i); ! 319: (void) unlink(file); ! 320: if (signal(SIGHUP, SIG_IGN) != SIG_IGN) ! 321: (void) signal(SIGHUP, term); ! 322: if (signal(SIGINT, SIG_IGN) != SIG_IGN) ! 323: (void) signal(SIGINT, term); ! 324: (void) signal(SIGPIPE,term); ! 325: if (signal(SIGTERM, SIG_IGN) != SIG_IGN) ! 326: (void) signal(SIGTERM,term); ! 327: nfiles = eargc; ! 328: if(!mflg && !cflg) { ! 329: sort(); ! 330: if (ferror(stdin)) ! 331: rderror("stdin"); ! 332: (void) fclose(stdin); ! 333: } ! 334: ! 335: if (maxrec == 0) maxrec = 512; ! 336: alloc = (N + 1) * maxrec + N * BUFSIZ; ! 337: for (nway = N; nway >= 2; --nway) { ! 338: if (brk((char *)lspace + alloc) == 0) break; ! 339: alloc -= maxrec + BUFSIZ; ! 340: } ! 341: if (nway < 2) { ! 342: diag("allocation error before merge", ""); ! 343: term(); ! 344: } ! 345: ! 346: if (cflg) checksort(); ! 347: ! 348: wasfirst = notfirst = 0; ! 349: a = mflg || cflg ? 0 : eargc; ! 350: if ((i = nfiles - a) > nway) { /* Do leftovers early */ ! 351: if ((i %= (nway - 1)) == 0) ! 352: i = nway - 1; ! 353: if (i != 1) { ! 354: newfile(); ! 355: (void) setbuf(os, bufout); ! 356: merge(a,a+i); ! 357: a += i; ! 358: } ! 359: } ! 360: for(; a+nway<nfiles || unsafeout&&a<eargc; a=i) { ! 361: i = a+nway; ! 362: if(i>=nfiles) ! 363: i = nfiles; ! 364: newfile(); ! 365: (void) setbuf(os, bufout); ! 366: merge(a, i); ! 367: } ! 368: if(a != nfiles) { ! 369: oldfile(); ! 370: (void) setbuf(os, bufout); ! 371: merge(a, nfiles); ! 372: } ! 373: error = 0; ! 374: term(); ! 375: } ! 376: ! 377: sort() ! 378: { ! 379: register char *cp; ! 380: register c; ! 381: register char **lp; ! 382: register FILE *iop; ! 383: char *keep, *ekeep, **ep; ! 384: int done, i, recz; ! 385: char *f; ! 386: ! 387: /* ! 388: ** Records are read in from the front of the buffer area. ! 389: ** Pointers to the records are allocated from the back of the buffer. ! 390: ** If a partially read record exhausts the buffer, it is saved and ! 391: ** then copied to the start of the buffer for processing with the ! 392: ** next coreload. ! 393: */ ! 394: done = 0; ! 395: keep = 0; ! 396: i = 0; ! 397: iop = NULL; ! 398: c = EOF; ! 399: ep = (char **) (((char *) lspace) + alloc); ! 400: do { ! 401: lp = ep - 1; ! 402: cp = (char *) lspace; ! 403: if (keep != 0) { /* part of record from previous coreload */ ! 404: *lp-- = cp; ! 405: for(; keep < ekeep; *cp++ = *keep++); ! 406: } ! 407: while (cp < (char *) lp) { ! 408: if (keep == 0) *lp-- = cp; ! 409: while (cp < (char *) lp && c != '\n') { ! 410: if(c != EOF) { ! 411: *cp++ = c; ! 412: c = getc(iop); ! 413: continue; ! 414: } else if(iop != NULL) { ! 415: if ((cp > (char *) lspace) && (cp[-1] != '\n')) ! 416: *cp++ = '\n'; ! 417: if(ferror(iop)) ! 418: rderror(f); ! 419: (void) fclose(iop); ! 420: } ! 421: if(i < eargc) { ! 422: if((f = setfil(i++)) == 0) ! 423: iop = stdin; ! 424: else if((iop = fopen(f, "r")) == NULL) ! 425: cant(f); ! 426: (void) setbuf(iop, bufin); ! 427: c = getc(iop); ! 428: } else ! 429: break; ! 430: } ! 431: if (c == '\n') { ! 432: *cp++ = '\n'; ! 433: keep = 0; ! 434: if ((recz = (cp - lp[1])) > maxrec) ! 435: maxrec = recz; ! 436: } ! 437: else if (c != EOF) { /* save partial record */ ! 438: lp++; ! 439: if (keep != 0) { ! 440: diag("whopper record won't fit", ""); ! 441: term(); ! 442: } ! 443: keep = *lp; ! 444: ekeep = cp; ! 445: break; ! 446: } ! 447: if(c == EOF) { ! 448: if(ferror(iop)) ! 449: rderror(f); ! 450: (void) fclose(iop); ! 451: done++; ! 452: lp++; ! 453: break; ! 454: } ! 455: c = getc(iop); ! 456: } ! 457: lp++; ! 458: if(done == 0 || nfiles != eargc) ! 459: newfile(); ! 460: else ! 461: oldfile(); ! 462: (void) setbuf(os, bufout); ! 463: msort(lp, ep); ! 464: if (ferror(os)) ! 465: wterror("sorting"); ! 466: (void) fclose(os); ! 467: } while(done == 0); ! 468: } ! 469: ! 470: ! 471: msort(a, b) ! 472: char **a, **b; ! 473: { ! 474: register struct btree **tp; ! 475: register int i, j, n; ! 476: char *save; ! 477: ! 478: i = (b - a); ! 479: if (i < 1) return; ! 480: else if (i >= TREEZ) n = TREEZ; /* number of blocks of records */ ! 481: else n = i; ! 482: ! 483: /* break into n sorted subgroups of approximately equal size */ ! 484: tp = &(treep[0]); ! 485: j = 0; ! 486: do { ! 487: (*tp++)->rn = j; ! 488: b = a + (blkcnt[j] = i / n); ! 489: qsort(a, b); ! 490: blkcur[j] = a = b; ! 491: i -= blkcnt[j++]; ! 492: } while (--n > 0); ! 493: n = j; ! 494: ! 495: /* make a sorted binary tree using the first record in each group */ ! 496: for (i = 0; i < n;) { ! 497: (*--tp)->rp = *(--blkcur[--j]); ! 498: insert(tp, ++i); ! 499: } ! 500: wasfirst = notfirst = 0; ! 501: bonus = cmpsave(n); ! 502: ! 503: ! 504: j = uflg; ! 505: tp = &(treep[0]); ! 506: while (n > 0) { ! 507: wline((*tp)->rp); ! 508: if (j) save = (*tp)->rp; ! 509: ! 510: /* Get another record and insert. Bypass repeats if uflg */ ! 511: ! 512: do { ! 513: i = (*tp)->rn; ! 514: if (j) while((blkcnt[i] > 1) && ! 515: (**(blkcur[i]-1) == '\0')) { ! 516: --blkcnt[i]; ! 517: --blkcur[i]; ! 518: } ! 519: if (--blkcnt[i] > 0) { ! 520: (*tp)->rp = *(--blkcur[i]); ! 521: insert(tp, n); ! 522: } else { ! 523: if (--n <= 0) break; ! 524: bonus = cmpsave(n); ! 525: tp++; ! 526: } ! 527: } while (j && (*compare)((*tp)->rp, save) == 0); ! 528: } ! 529: } ! 530: ! 531: ! 532: /* Insert the element at tp[0] into its proper place in the array of size n */ ! 533: /* Pretty much Algorith B from 6.2.1 of Knuth, Sorting and Searching */ ! 534: /* Special case for data that appears to be in correct order */ ! 535: ! 536: insert(tp, n) ! 537: struct btree **tp; ! 538: int n; ! 539: { ! 540: register struct btree **lop, **hip, **midp; ! 541: register int c; ! 542: struct btree *hold; ! 543: ! 544: midp = lop = tp; ! 545: hip = lop++ + (n - 1); ! 546: if ((wasfirst > notfirst) && (n > 2) && ! 547: ((*compare)((*tp)->rp, (*lop)->rp) >= 0)) { ! 548: wasfirst += bonus; ! 549: return; ! 550: } ! 551: while ((c = hip - lop) >= 0) { /* leave midp at the one tp is in front of */ ! 552: midp = lop + c / 2; ! 553: if ((c = (*compare)((*tp)->rp, (*midp)->rp)) == 0) break; /* match */ ! 554: if (c < 0) lop = ++midp; /* c < 0 => tp > midp */ ! 555: else hip = midp - 1; /* c > 0 => tp < midp */ ! 556: } ! 557: c = midp - tp; ! 558: if (--c > 0) { /* number of moves to get tp just before midp */ ! 559: hip = tp; ! 560: lop = hip++; ! 561: hold = *lop; ! 562: do *lop++ = *hip++; while (--c > 0); ! 563: *lop = hold; ! 564: notfirst++; ! 565: } else wasfirst += bonus; ! 566: } ! 567: ! 568: ! 569: merge(a, b) ! 570: { ! 571: FILE *tfile[N]; ! 572: char *buffer = (char *) lspace; ! 573: register int nf; /* number of merge files */ ! 574: register struct btree **tp; ! 575: register int i, j; ! 576: char *t, *f; ! 577: char *save, *iobuf; ! 578: ! 579: save = (char *) lspace + (nway * maxrec); ! 580: iobuf = save + maxrec; ! 581: tp = &(treep[0]); ! 582: for (nf=0, i=a; i < b; i++) { ! 583: f = setfil(i); ! 584: if (f == 0) ! 585: tfile[nf] = stdin; ! 586: else if ((tfile[nf] = fopen(f, "r")) == NULL) ! 587: cant(f); ! 588: (*tp)->rp = buffer + (nf * maxrec); ! 589: (*tp)->rn = nf; ! 590: (void) setbuf(tfile[nf],iobuf); ! 591: iobuf += BUFSIZ; ! 592: if (rline(tfile[nf], (*tp)->rp)==0) { ! 593: nf++; ! 594: tp++; ! 595: } else { ! 596: if(ferror(tfile[nf])) ! 597: rderror(f); ! 598: (void) fclose(tfile[nf]); ! 599: } ! 600: } ! 601: ! 602: ! 603: /* make a sorted btree from the first record of each file */ ! 604: for (--tp, i = 1; i++ < nf;) insert(--tp, i); ! 605: ! 606: bonus = cmpsave(nf); ! 607: tp = &(treep[0]); ! 608: j = uflg; ! 609: while (nf > 0) { ! 610: wline((*tp)->rp); ! 611: if (j) cline(save, (*tp)->rp); ! 612: ! 613: /* Get another record and insert. Bypass repeats if uflg */ ! 614: ! 615: do { ! 616: i = (*tp)->rn; ! 617: if (rline(tfile[i], (*tp)->rp)) { ! 618: if (ferror(tfile[i])) ! 619: rderror(setfil(i+a)); ! 620: (void) fclose(tfile[i]); ! 621: if (--nf <= 0) break; ! 622: ++tp; ! 623: bonus = cmpsave(nf); ! 624: } else insert(tp, nf); ! 625: } while (j && (*compare)((*tp)->rp, save) == 0 ); ! 626: } ! 627: ! 628: ! 629: for (i=a; i < b; i++) { ! 630: if (i >= eargc) ! 631: (void) unlink(setfil(i)); ! 632: } ! 633: if (ferror(os)) ! 634: wterror("merging"); ! 635: (void) fclose(os); ! 636: } ! 637: ! 638: wline(s) ! 639: register char *s; ! 640: { ! 641: register FILE *iop; ! 642: ! 643: iop = os; ! 644: do ! 645: putc(*s, iop); ! 646: while (*s++ != '\n'); ! 647: } ! 648: ! 649: cline(tp, fp) ! 650: register char *tp, *fp; ! 651: { ! 652: while ((*tp++ = *fp++) != '\n'); ! 653: } ! 654: ! 655: rline(iop, s) ! 656: FILE *iop; ! 657: register char *s; ! 658: { ! 659: register char *ce; ! 660: register int c; ! 661: register FILE *riop; ! 662: ! 663: riop = iop; ! 664: ce = s + maxrec; ! 665: do { ! 666: c = getc(riop); ! 667: if (c == EOF) ! 668: return(1); ! 669: if (s >= ce) ! 670: --s; ! 671: *s++ = c; ! 672: } while (c != '\n'); ! 673: return(0); ! 674: } ! 675: ! 676: ! 677: checksort() ! 678: { ! 679: char *f; ! 680: char *s[2]; ! 681: register int i, j, r; ! 682: ! 683: f = setfil(0); ! 684: if (f == 0) ! 685: is = stdin; ! 686: else if ((is = fopen(f, "r")) == NULL) ! 687: cant(f); ! 688: (void) setbuf(is, bufin); ! 689: ! 690: i = 0; j = 1; ! 691: s[0] = (char *) lspace; ! 692: s[1] = s[0] + maxrec; ! 693: if ( rline(is, s[0]) ) { ! 694: if (ferror(is)) { ! 695: rderror(f); ! 696: } ! 697: (void) fclose(is); ! 698: exit(0); ! 699: } ! 700: while ( !rline(is, s[j]) ) { ! 701: r = (*compare)(s[i], s[j]); ! 702: if (r < 0) ! 703: disorder("disorder: ", s[j]); ! 704: if (r == 0 && uflg) ! 705: disorder("non-unique: ", s[j]); ! 706: r = i; i = j; j = r; ! 707: } ! 708: if (ferror(is)) ! 709: rderror(f); ! 710: (void) fclose(is); ! 711: exit(0); ! 712: } ! 713: ! 714: ! 715: disorder(s,t) ! 716: char *s, *t; ! 717: { ! 718: register char *u; ! 719: for(u=t; *u!='\n';u++) ; ! 720: *u = 0; ! 721: diag(s,t); ! 722: term(); ! 723: } ! 724: ! 725: newfile() ! 726: { ! 727: register char *f; ! 728: ! 729: f = setfil(nfiles); ! 730: if((os=fopen(f, "w")) == NULL) { ! 731: diag("can't create ",f); ! 732: term(); ! 733: } ! 734: nfiles++; ! 735: } ! 736: ! 737: char * ! 738: setfil(i) ! 739: { ! 740: ! 741: if(i < eargc) ! 742: if(eargv[i][0] == '-' && eargv[i][1] == '\0') ! 743: return(0); ! 744: else ! 745: return(eargv[i]); ! 746: i -= eargc; ! 747: filep[0] = i/26 + 'a'; ! 748: filep[1] = i%26 + 'a'; ! 749: return(file); ! 750: } ! 751: ! 752: oldfile() ! 753: { ! 754: ! 755: if(outfil) { ! 756: if((os=fopen(outfil, "w")) == NULL) { ! 757: diag("can't create ",outfil); ! 758: term(); ! 759: } ! 760: } else ! 761: os = stdout; ! 762: } ! 763: ! 764: safeoutfil() ! 765: { ! 766: register int i; ! 767: struct stat ostat,istat; ! 768: ! 769: if(!mflg||outfil==0) ! 770: return; ! 771: if(stat(outfil,&ostat)==-1) ! 772: return; ! 773: if ((i = eargc - N) < 0) i = 0; /*-N is suff., not nec. */ ! 774: for (; i < eargc; i++) { ! 775: if(stat(eargv[i],&istat)==-1) ! 776: continue; ! 777: if(ostat.st_dev==istat.st_dev&& ! 778: ostat.st_ino==istat.st_ino) ! 779: unsafeout++; ! 780: } ! 781: } ! 782: ! 783: cant(f) ! 784: char *f; ! 785: { ! 786: ! 787: diag("can't open ",f); ! 788: term(); ! 789: } ! 790: ! 791: diag(s,t) ! 792: char *s, *t; ! 793: { ! 794: (void) fputs("sort: ",stderr); ! 795: (void) fputs(s,stderr); ! 796: (void) fputs(t,stderr); ! 797: (void) fputs("\n",stderr); ! 798: } ! 799: ! 800: term() ! 801: { ! 802: register i; ! 803: ! 804: (void) signal(SIGINT, SIG_IGN); ! 805: (void) signal(SIGHUP, SIG_IGN); ! 806: (void) signal(SIGTERM, SIG_IGN); ! 807: if(nfiles == eargc) ! 808: nfiles++; ! 809: for(i=eargc; i<=nfiles; i++) { /*<= in case of interrupt*/ ! 810: (void) unlink(setfil(i)); /*with nfiles not updated*/ ! 811: } ! 812: exit(error); ! 813: } ! 814: ! 815: cmp(i, j) ! 816: char *i, *j; ! 817: { ! 818: register char *pa, *pb; ! 819: char *skip(); ! 820: char *code, *ignore; ! 821: int a, b; ! 822: int k; ! 823: char *la, *lb; ! 824: register int sa; ! 825: int sb; ! 826: char *ipa, *ipb, *jpa, *jpb; ! 827: struct field *fp; ! 828: double za, zb; ! 829: ! 830: for(k = nfields>0; k<=nfields; k++) { ! 831: fp = &fields[k]; ! 832: pa = i; ! 833: pb = j; ! 834: if(k) { ! 835: la = skip(pa, fp, 1); ! 836: pa = skip(pa, fp, 0); ! 837: lb = skip(pb, fp, 1); ! 838: pb = skip(pb, fp, 0); ! 839: } else { ! 840: la = eol(pa); ! 841: lb = eol(pb); ! 842: } ! 843: switch(fp->fcmp) { ! 844: case NUM: ! 845: sa = sb = fp->rflg; ! 846: while(blank(*pa)) ! 847: pa++; ! 848: while(blank(*pb)) ! 849: pb++; ! 850: if(*pa == '-') { ! 851: pa++; ! 852: sa = -sa; ! 853: } ! 854: if(*pb == '-') { ! 855: pb++; ! 856: sb = -sb; ! 857: } ! 858: for(ipa = pa; ipa<la&&isdigit(*ipa); ipa++) ; ! 859: for(ipb = pb; ipb<lb&&isdigit(*ipb); ipb++) ; ! 860: jpa = ipa; ! 861: jpb = ipb; ! 862: a = 0; ! 863: if(sa==sb) ! 864: while(ipa > pa && ipb > pb) ! 865: if(b = *--ipb - *--ipa) ! 866: a = b; ! 867: while(ipa > pa) ! 868: if(*--ipa != '0') ! 869: return(-sa); ! 870: while(ipb > pb) ! 871: if(*--ipb != '0') ! 872: return(sb); ! 873: if(a) return(a*sa); ! 874: if(*(pa=jpa) == '.') ! 875: pa++; ! 876: if(*(pb=jpb) == '.') ! 877: pb++; ! 878: if(sa==sb) ! 879: while(pa<la && isdigit(*pa) ! 880: && pb<lb && isdigit(*pb)) ! 881: if(a = *pb++ - *pa++) ! 882: return(a*sa); ! 883: while(pa<la && isdigit(*pa)) ! 884: if(*pa++ != '0') ! 885: return(-sa); ! 886: while(pb<lb && isdigit(*pb)) ! 887: if(*pb++ != '0') ! 888: return(sb); ! 889: continue; ! 890: case MON: ! 891: sa = fp->rflg*(month(pb)-month(pa)); ! 892: if(sa) ! 893: return(sa); ! 894: else ! 895: continue; ! 896: case FLOAT: ! 897: zb = atof(pb); ! 898: za = atof(pa); ! 899: return (zb>za?fp->rflg:zb==za?0:-fp->rflg); ! 900: } ! 901: code = fp->code; ! 902: ignore = fp->ignore; ! 903: loop: ! 904: while(ignore[*pa]) ! 905: pa++; ! 906: while(ignore[*pb]) ! 907: pb++; ! 908: if(pa>=la || *pa=='\n') ! 909: if(pb<lb && *pb!='\n') ! 910: return(fp->rflg); ! 911: else continue; ! 912: if(pb>=lb || *pb=='\n') ! 913: return(-fp->rflg); ! 914: if((sa = code[*pb++]-code[*pa++]) == 0) ! 915: goto loop; ! 916: return(sa*fp->rflg); ! 917: } ! 918: if(uflg) ! 919: return(0); ! 920: return(cmpa(i, j)); ! 921: } ! 922: ! 923: cmpa(pa, pb) ! 924: register char *pa, *pb; ! 925: { ! 926: while(*pa == *pb++) ! 927: if(*pa++ == '\n') ! 928: return(0); ! 929: return( ! 930: *pa == '\n' ? fields[0].rflg: ! 931: *--pb == '\n' ?-fields[0].rflg: ! 932: *pb > *pa ? fields[0].rflg: ! 933: -fields[0].rflg ! 934: ); ! 935: } ! 936: ! 937: char * ! 938: skip(p, fp, j) ! 939: struct field *fp; ! 940: register char *p; ! 941: { ! 942: register i; ! 943: register char tbc; ! 944: ! 945: if( (i=fp->m[j]) < 0) ! 946: return(eol(p)); ! 947: if (tbc = tabchar) ! 948: while (--i >= 0) { ! 949: while(*p != tbc) ! 950: if(*p != '\n') ! 951: p++; ! 952: else goto ret; ! 953: p++; ! 954: } ! 955: else while (--i >= 0) { ! 956: while(blank(*p)) ! 957: p++; ! 958: while(!blank(*p)) ! 959: if(*p != '\n') ! 960: p++; ! 961: else goto ret; ! 962: } ! 963: if(fp->bflg[j]) ! 964: while(blank(*p)) ! 965: p++; ! 966: i = fp->n[j]; ! 967: if(i==0 && j!=0 && fp->m[j]>0 && p[-1]==tbc) ! 968: p--; ! 969: while((i-- > 0) && (*p != '\n')) ! 970: p++; ! 971: ret: ! 972: return(p); ! 973: } ! 974: ! 975: char * ! 976: eol(p) ! 977: register char *p; ! 978: { ! 979: while(*p != '\n') p++; ! 980: return(p); ! 981: } ! 982: ! 983: copyproto() ! 984: { ! 985: register i; ! 986: register int *p, *q; ! 987: ! 988: p = (int *)&proto; ! 989: q = (int *)&fields[nfields]; ! 990: for(i=0; i<sizeof(proto)/sizeof(*p); i++) ! 991: *q++ = *p++; ! 992: } ! 993: ! 994: initree() ! 995: { ! 996: register struct btree **tpp, *tp; ! 997: register int i; ! 998: ! 999: for (tp = &(tree[0]), tpp = &(treep[0]), i = TREEZ; --i >= 0;) ! 1000: *tpp++ = tp++; ! 1001: } ! 1002: ! 1003: cmpsave(n) ! 1004: register int n; ! 1005: { ! 1006: register int award; ! 1007: ! 1008: if (n < 2) return (0); ! 1009: for (n++, award = 0; (n >>= 1) > 0; award++); ! 1010: return (award); ! 1011: } ! 1012: ! 1013: ! 1014: field(s,k) ! 1015: char *s; ! 1016: { ! 1017: register struct field *p; ! 1018: register d; ! 1019: p = &fields[nfields]; ! 1020: d = 0; ! 1021: for(; *s!=0; s++) { ! 1022: switch(*s) { ! 1023: case '\0': ! 1024: return; ! 1025: ! 1026: case 'b': ! 1027: p->bflg[k]++; ! 1028: break; ! 1029: ! 1030: case 'd': ! 1031: p->ignore = dict+128; ! 1032: break; ! 1033: ! 1034: case 'f': ! 1035: p->code = fold+128; ! 1036: break; ! 1037: case 'i': ! 1038: p->ignore = nonprint+128; ! 1039: break; ! 1040: ! 1041: case 'c': ! 1042: cflg = 1; ! 1043: continue; ! 1044: ! 1045: case 'm': ! 1046: mflg = 1; ! 1047: continue; ! 1048: ! 1049: case 'M': ! 1050: p->fcmp = MON; ! 1051: break; ! 1052: ! 1053: case 'n': ! 1054: p->fcmp = NUM; ! 1055: break; ! 1056: case 'g': ! 1057: p->fcmp = FLOAT; ! 1058: break; ! 1059: case 't': ! 1060: tabchar = *++s; ! 1061: if(tabchar == 0) s--; ! 1062: continue; ! 1063: ! 1064: case 'r': ! 1065: p->rflg = -1; ! 1066: continue; ! 1067: case 'u': ! 1068: uflg = 1; ! 1069: continue; ! 1070: ! 1071: case 'y': ! 1072: if ( *++s ) tryfor = number(&s); ! 1073: else { ! 1074: --s; ! 1075: tryfor = MAXMEM; ! 1076: } ! 1077: continue; ! 1078: ! 1079: case 'z': ! 1080: if ( *++s ) ! 1081: maxrec = number(&s); ! 1082: else --s; ! 1083: continue; ! 1084: ! 1085: case '.': ! 1086: if(p->m[k] == -1) /* -m.n with m missing */ ! 1087: p->m[k] = 0; ! 1088: d = &fields[0].n[0]-&fields[0].m[0]; ! 1089: if (*++s == '\0') { ! 1090: --s; ! 1091: p->m[k+d] = 0; ! 1092: continue; ! 1093: } ! 1094: ! 1095: default: ! 1096: p->m[k+d] = number(&s); ! 1097: } ! 1098: compare = cmp; ! 1099: } ! 1100: } ! 1101: ! 1102: number(ppa) ! 1103: char **ppa; ! 1104: { ! 1105: int n; ! 1106: register char *pa; ! 1107: pa = *ppa; ! 1108: n = 0; ! 1109: while(isdigit(*pa)) { ! 1110: n = n*10 + *pa - '0'; ! 1111: *ppa = pa++; ! 1112: } ! 1113: return(n); ! 1114: } ! 1115: ! 1116: #define qsexc(p,q) t= *p;*p= *q;*q=t ! 1117: #define qstexc(p,q,r) t= *p;*p= *r;*r= *q;*q=t ! 1118: ! 1119: qsort(a,l) ! 1120: char **a, **l; ! 1121: { ! 1122: register char **i, **j; ! 1123: register char **lp, **hp; ! 1124: char **k; ! 1125: int c, d; ! 1126: char *t; ! 1127: unsigned n; ! 1128: ! 1129: ! 1130: start: ! 1131: if((n=l-a) <= 1) ! 1132: return; ! 1133: ! 1134: i = a; ! 1135: j = l - 1; ! 1136: if (n == 2) { ! 1137: if (c = (*compare)(*i, *j)) { ! 1138: if (c > 0) { ! 1139: qsexc(i, j); ! 1140: } ! 1141: } else if (uflg) **i = '\0'; ! 1142: return; ! 1143: } ! 1144: n /= 2; ! 1145: lp = hp = a + n; ! 1146: c = (*compare)(*i, *lp); ! 1147: d = (*compare)(*lp, *j); ! 1148: if (c == 0) { ! 1149: if (d == 0) { /* x x x */ ! 1150: --lp; qsexc(i, lp); ! 1151: hp++; qsexc(hp, j); ! 1152: } else if (d > 0) { /* x x w */ ! 1153: qsexc(i, j); ! 1154: i++; ! 1155: hp++; qsexc(hp, j); ! 1156: } else { /* x x y */ ! 1157: --j; ! 1158: --lp; qsexc(i, lp); ! 1159: } ! 1160: } else if (d == 0) { ! 1161: if (c < 0) { /* w x x */ ! 1162: i++; ! 1163: hp++; qsexc(hp, j); ! 1164: } else { /* y x x */ ! 1165: qsexc(i, j); ! 1166: --j; ! 1167: --lp; qsexc(i, lp); ! 1168: } ! 1169: } else if (c > 0) { ! 1170: if (d > 0) { /* z y x */ ! 1171: qsexc(i, j); ! 1172: i++; --j; ! 1173: } else { /* z y z' */ ! 1174: qsexc(i, lp); ! 1175: i++; ! 1176: if (c = (*compare)(*lp, *j)) { ! 1177: if (c > 0) { ! 1178: qsexc(lp, j); ! 1179: } ! 1180: --j; ! 1181: } else { ! 1182: hp++; qsexc(hp, j); ! 1183: } ! 1184: } ! 1185: } else { ! 1186: if (d > 0) { /* y z y' */ ! 1187: qsexc(lp, j); ! 1188: --j; ! 1189: if (c = (*compare)(*i, *lp)) { ! 1190: if (c > 0) { ! 1191: qsexc(i, lp); ! 1192: } ! 1193: i++; ! 1194: } else { ! 1195: --lp; ! 1196: qsexc(i, lp); ! 1197: } ! 1198: } else { /* x y z */ ! 1199: i++; --j; ! 1200: } ! 1201: } ! 1202: ! 1203: ! 1204: for(;;) { ! 1205: if(i < lp) { ! 1206: if((c = (*compare)(*i, *lp)) == 0) { ! 1207: --lp; ! 1208: qsexc(i, lp); ! 1209: continue; ! 1210: } ! 1211: if(c < 0) { ! 1212: ++i; ! 1213: continue; ! 1214: } ! 1215: } ! 1216: ! 1217: loop: ! 1218: if(j > hp) { ! 1219: if((c = (*compare)(*hp, *j)) == 0) { ! 1220: ++hp; ! 1221: qsexc(hp, j); ! 1222: goto loop; ! 1223: } ! 1224: if(c > 0) { ! 1225: if(i == lp) { ! 1226: ++hp; ! 1227: qstexc(i, hp, j); ! 1228: i = ++lp; ! 1229: goto loop; ! 1230: } ! 1231: qsexc(i, j); ! 1232: --j; ! 1233: ++i; ! 1234: continue; ! 1235: } ! 1236: --j; ! 1237: goto loop; ! 1238: } ! 1239: ! 1240: ! 1241: if(i == lp) { ! 1242: if(uflg) ! 1243: for(k=lp; k<hp;) **k++ = '\0'; ! 1244: if(lp-a >= l-hp) { ! 1245: qsort(hp+1, l); ! 1246: l = lp; ! 1247: } else { ! 1248: qsort(a, lp); ! 1249: a = hp+1; ! 1250: } ! 1251: goto start; ! 1252: } ! 1253: ! 1254: ! 1255: --lp; ! 1256: qstexc(j, lp, i); ! 1257: j = --hp; ! 1258: } ! 1259: } ! 1260: ! 1261: char * months[] = { ! 1262: "jan", ! 1263: "feb", ! 1264: "mar", ! 1265: "apr", ! 1266: "may", ! 1267: "jun", ! 1268: "jul", ! 1269: "aug", ! 1270: "sep", ! 1271: "oct", ! 1272: "nov", ! 1273: "dec" ! 1274: }; ! 1275: month(s) ! 1276: char *s; ! 1277: { ! 1278: register char *t, *u; ! 1279: register i; ! 1280: char *f = fold + 128; ! 1281: ! 1282: while(blank(*s)) ! 1283: s++; ! 1284: for(i=0; i<sizeof(months)/sizeof(*months); i++) { ! 1285: for(t=s,u=months[i]; f[*t++]==f[*u++]; ) ! 1286: if(*u==0) ! 1287: return(i); ! 1288: } ! 1289: return(-1); ! 1290: } ! 1291: ! 1292: ! 1293: rderror(s) ! 1294: char *s; ! 1295: { ! 1296: diag("read error on ", s == 0 ? "stdin" : s); ! 1297: term(); ! 1298: } ! 1299: ! 1300: wterror(s) ! 1301: char *s; ! 1302: { ! 1303: diag("write error while ", s); ! 1304: term(); ! 1305: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.