|
|
1.1 ! root 1: #include <stdio.h> ! 2: #include <ctype.h> ! 3: #include <signal.h> ! 4: #include <sys/types.h> ! 5: #include <sys/stat.h> ! 6: ! 7: #define N 16 ! 8: #define C 20 ! 9: #define NF 10 ! 10: #define TREEZ 32 /* no less than N and best if power of 2 */ ! 11: ! 12: /* ! 13: * Memory administration ! 14: * ! 15: * Using a lot of memory is great when sorting a lot of data. ! 16: * Using a megabyte to sort the output of `who' loses big. ! 17: * MAXMEM, MINMEM and DEFMEM define the absolute maximum, ! 18: * minimum and default memory requirements. Administrators ! 19: * can override any or all of these via defines at compile time. ! 20: * Users can override the amount allocated (within the limits ! 21: * of MAXMEM and MINMEM) on the command line. ! 22: */ ! 23: ! 24: #ifndef MAXMEM ! 25: #define MAXMEM 16777216 /* 2^24 maximum */ ! 26: #endif ! 27: ! 28: #ifndef MINMEM ! 29: #define MINMEM 16384 /* 16K minimum */ ! 30: #endif ! 31: ! 32: #ifndef DEFMEM ! 33: #define DEFMEM 262144 /* old sort was 32768 */ ! 34: #endif ! 35: ! 36: enum { ASC, NUM, MON, FLOAT }; ! 37: ! 38: #define blank(c) ((c)==' ' || (c)=='\t') ! 39: ! 40: FILE *is, *os; ! 41: char *dirtry[] = {"/usr/tmp", "/tmp", NULL}; ! 42: char **dirs; ! 43: char file1[100]; ! 44: char *file = file1; ! 45: char *filep; ! 46: #define NAMEOHD 12 /* sizeof("/stm00000aa") */ ! 47: int nfiles; ! 48: int *lspace; ! 49: unsigned alloc, tryfor; ! 50: char bufin[BUFSIZ], bufout[BUFSIZ]; /* Use setbuf's to avoid malloc calls. ! 51: ** malloc seems to get heartburn ! 52: ** when brk returns storage. ! 53: */ ! 54: int maxrec; ! 55: int cmp(), cmpa(); ! 56: int (*compare)() = cmpa; ! 57: char *eol(); ! 58: int term(); ! 59: double atof(); ! 60: int mflg; ! 61: int nway; ! 62: int cflg; ! 63: int uflg; ! 64: char *outfil; ! 65: int unsafeout; /*kludge to assure -m -o works*/ ! 66: char tabchar; ! 67: int eargc; ! 68: char **eargv; ! 69: struct btree { ! 70: char *rp; ! 71: int rn; ! 72: } tree[TREEZ], *treep[TREEZ]; ! 73: int blkcnt[TREEZ]; ! 74: char **blkcur[TREEZ]; ! 75: long wasfirst = 0, notfirst = 0; ! 76: int bonus; ! 77: ! 78: char zero[256]; ! 79: ! 80: char fold[256] = { ! 81: 0200,0201,0202,0203,0204,0205,0206,0207, ! 82: 0210,0211,0212,0213,0214,0215,0216,0217, ! 83: 0220,0221,0222,0223,0224,0225,0226,0227, ! 84: 0230,0231,0232,0233,0234,0235,0236,0237, ! 85: 0240,0241,0242,0243,0244,0245,0246,0247, ! 86: 0250,0251,0252,0253,0254,0255,0256,0257, ! 87: 0260,0261,0262,0263,0264,0265,0266,0267, ! 88: 0270,0271,0272,0273,0274,0275,0276,0277, ! 89: 0300,0301,0302,0303,0304,0305,0306,0307, ! 90: 0310,0311,0312,0313,0314,0315,0316,0317, ! 91: 0320,0321,0322,0323,0324,0325,0326,0327, ! 92: 0330,0331,0332,0333,0334,0335,0336,0337, ! 93: 0340,0341,0342,0343,0344,0345,0346,0347, ! 94: 0350,0351,0352,0353,0354,0355,0356,0357, ! 95: 0360,0361,0362,0363,0364,0365,0366,0367, ! 96: 0370,0371,0372,0373,0374,0375,0376,0377, ! 97: 0000,0001,0002,0003,0004,0005,0006,0007, ! 98: 0010,0011,0012,0013,0014,0015,0016,0017, ! 99: 0020,0021,0022,0023,0024,0025,0026,0027, ! 100: 0030,0031,0032,0033,0034,0035,0036,0037, ! 101: 0040,0041,0042,0043,0044,0045,0046,0047, ! 102: 0050,0051,0052,0053,0054,0055,0056,0057, ! 103: 0060,0061,0062,0063,0064,0065,0066,0067, ! 104: 0070,0071,0072,0073,0074,0075,0076,0077, ! 105: 0100,0101,0102,0103,0104,0105,0106,0107, ! 106: 0110,0111,0112,0113,0114,0115,0116,0117, ! 107: 0120,0121,0122,0123,0124,0125,0126,0127, ! 108: 0130,0131,0132,0133,0134,0135,0136,0137, ! 109: 0140,0101,0102,0103,0104,0105,0106,0107, ! 110: 0110,0111,0112,0113,0114,0115,0116,0117, ! 111: 0120,0121,0122,0123,0124,0125,0126,0127, ! 112: 0130,0131,0132,0173,0174,0175,0176,0177 ! 113: }; ! 114: char nofold[256] = { ! 115: 0200,0201,0202,0203,0204,0205,0206,0207, ! 116: 0210,0211,0212,0213,0214,0215,0216,0217, ! 117: 0220,0221,0222,0223,0224,0225,0226,0227, ! 118: 0230,0231,0232,0233,0234,0235,0236,0237, ! 119: 0240,0241,0242,0243,0244,0245,0246,0247, ! 120: 0250,0251,0252,0253,0254,0255,0256,0257, ! 121: 0260,0261,0262,0263,0264,0265,0266,0267, ! 122: 0270,0271,0272,0273,0274,0275,0276,0277, ! 123: 0300,0301,0302,0303,0304,0305,0306,0307, ! 124: 0310,0311,0312,0313,0314,0315,0316,0317, ! 125: 0320,0321,0322,0323,0324,0325,0326,0327, ! 126: 0330,0331,0332,0333,0334,0335,0336,0337, ! 127: 0340,0341,0342,0343,0344,0345,0346,0347, ! 128: 0350,0351,0352,0353,0354,0355,0356,0357, ! 129: 0360,0361,0362,0363,0364,0365,0366,0367, ! 130: 0370,0371,0372,0373,0374,0375,0376,0377, ! 131: 0000,0001,0002,0003,0004,0005,0006,0007, ! 132: 0010,0011,0012,0013,0014,0015,0016,0017, ! 133: 0020,0021,0022,0023,0024,0025,0026,0027, ! 134: 0030,0031,0032,0033,0034,0035,0036,0037, ! 135: 0040,0041,0042,0043,0044,0045,0046,0047, ! 136: 0050,0051,0052,0053,0054,0055,0056,0057, ! 137: 0060,0061,0062,0063,0064,0065,0066,0067, ! 138: 0070,0071,0072,0073,0074,0075,0076,0077, ! 139: 0100,0101,0102,0103,0104,0105,0106,0107, ! 140: 0110,0111,0112,0113,0114,0115,0116,0117, ! 141: 0120,0121,0122,0123,0124,0125,0126,0127, ! 142: 0130,0131,0132,0133,0134,0135,0136,0137, ! 143: 0140,0141,0142,0143,0144,0145,0146,0147, ! 144: 0150,0151,0152,0153,0154,0155,0156,0157, ! 145: 0160,0161,0162,0163,0164,0165,0166,0167, ! 146: 0170,0171,0172,0173,0174,0175,0176,0177 ! 147: }; ! 148: ! 149: char nonprint[256] = { ! 150: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 151: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 152: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 153: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 154: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 155: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 156: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 157: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 158: 1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1, ! 159: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 160: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ! 161: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ! 162: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ! 163: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ! 164: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ! 165: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 ! 166: }; ! 167: ! 168: char dict[256] = { ! 169: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 170: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 171: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 172: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 173: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 174: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 175: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 176: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 177: 1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1, ! 178: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 179: 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ! 180: 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1, ! 181: 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ! 182: 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1, ! 183: 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ! 184: 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1 ! 185: }; ! 186: ! 187: struct field { ! 188: char *code; ! 189: char *ignore; ! 190: int fcmp; ! 191: int rflg; ! 192: int bflg[2]; ! 193: int m[2]; ! 194: int n[2]; ! 195: } fields[NF]; ! 196: struct field proto = { ! 197: nofold+128, ! 198: zero+128, ! 199: ASC, ! 200: 1, ! 201: 0,0, ! 202: 0,-1, ! 203: 0,0 ! 204: }; ! 205: int nfields; ! 206: int error = 1; ! 207: char *setfil(); ! 208: ! 209: main(argc, argv) ! 210: char **argv; ! 211: { ! 212: register a; ! 213: char *arg; ! 214: struct field *p, *q; ! 215: int i; ! 216: long incr; ! 217: char *sbrk(); ! 218: char *sp; ! 219: ! 220: copyproto(); ! 221: initree(); ! 222: eargv = argv; ! 223: tryfor = DEFMEM; ! 224: while (--argc > 0) { ! 225: if(**++argv == '-') for(arg = *argv;;) { ! 226: switch(*++arg) { ! 227: case '\0': ! 228: if(arg[-1] == '-') ! 229: eargv[eargc++] = "-"; ! 230: break; ! 231: ! 232: case 'o': ! 233: if(--argc > 0) ! 234: outfil = *++argv; ! 235: continue; ! 236: ! 237: case 'T': ! 238: if (--argc > 0) { ! 239: if ((strlen(*++argv) + NAMEOHD) > sizeof(file1)) { ! 240: diag("path name too long:", *argv); ! 241: exit(1); ! 242: } ! 243: else dirtry[0] = *argv; ! 244: } ! 245: continue; ! 246: ! 247: default: ! 248: field(++*argv,nfields>0); ! 249: break; ! 250: } ! 251: break; ! 252: } else if (**argv == '+') { ! 253: if(++nfields>=NF) { ! 254: diag("too many keys",""); ! 255: exit(1); ! 256: } ! 257: copyproto(); ! 258: field(++*argv,0); ! 259: } else ! 260: eargv[eargc++] = *argv; ! 261: } ! 262: q = &fields[0]; ! 263: for(a=1; a<=nfields; a++) { ! 264: p = &fields[a]; ! 265: if(p->code != proto.code) continue; ! 266: if(p->ignore != proto.ignore) continue; ! 267: if(p->fcmp != proto.fcmp) continue; ! 268: if(p->rflg != proto.rflg) continue; ! 269: if(p->bflg[0] != proto.bflg[0]) continue; ! 270: if(p->bflg[1] != proto.bflg[1]) continue; ! 271: p->code = q->code; ! 272: p->ignore = q->ignore; ! 273: p->fcmp = q->fcmp; ! 274: p->rflg = q->rflg; ! 275: p->bflg[0] = p->bflg[1] = q->bflg[0]; ! 276: } ! 277: if(eargc == 0) ! 278: eargv[eargc++] = "-"; ! 279: if(cflg && eargc>1) { ! 280: diag("can check only 1 file",""); ! 281: exit(1); ! 282: } ! 283: ! 284: safeoutfil(); ! 285: ! 286: sp = sbrk(0); ! 287: lspace = (int *) sp; ! 288: if (!mflg && !cflg) { ! 289: if (tryfor < MINMEM) tryfor = MINMEM; ! 290: else if (tryfor > MAXMEM) tryfor = MAXMEM; ! 291: for (incr=tryfor; (sp + incr) <= sp; incr >>= 1); ! 292: do { ! 293: if ((long)alloc+incr <= tryfor && brk(sp+incr) == 0) { ! 294: sp += incr; ! 295: alloc += incr; ! 296: } ! 297: } while ( ( incr >>= 1 ) >= 512L ); ! 298: if ( brk((char *) lspace + alloc) != 0) { ! 299: diag("allocation error before sort", ""); ! 300: exit(1); ! 301: } ! 302: } ! 303: ! 304: a = -1; ! 305: for(dirs=dirtry; *dirs; dirs++) { ! 306: (void) sprintf(filep=file1, "%s/stm%.5uaa", *dirs, getpid()); ! 307: while (*filep) ! 308: filep++; ! 309: filep -= 2; ! 310: if ( (a=creat(file, 0600)) >=0) ! 311: break; ! 312: } ! 313: if(a < 0) { ! 314: diag("can't locate temp",""); ! 315: exit(1); ! 316: } ! 317: for (i = 3; i <= 20; i++) ! 318: (void) close(i); ! 319: (void) unlink(file); ! 320: if (signal(SIGHUP, SIG_IGN) != SIG_IGN) ! 321: (void) signal(SIGHUP, term); ! 322: if (signal(SIGINT, SIG_IGN) != SIG_IGN) ! 323: (void) signal(SIGINT, term); ! 324: (void) signal(SIGPIPE,term); ! 325: if (signal(SIGTERM, SIG_IGN) != SIG_IGN) ! 326: (void) signal(SIGTERM,term); ! 327: nfiles = eargc; ! 328: if(!mflg && !cflg) ! 329: sort(); ! 330: ! 331: if (maxrec == 0) maxrec = 512; ! 332: alloc = (N + 1) * maxrec + N * BUFSIZ; ! 333: for (nway = N; nway >= 2; --nway) { ! 334: if (brk((char *)lspace + alloc) == 0) break; ! 335: alloc -= maxrec + BUFSIZ; ! 336: } ! 337: if (nway < 2) { ! 338: diag("allocation error before merge", ""); ! 339: term(); ! 340: } ! 341: ! 342: if (cflg) checksort(); ! 343: ! 344: wasfirst = notfirst = 0; ! 345: a = mflg || cflg ? 0 : eargc; ! 346: if ((i = nfiles - a) > nway) { /* Do leftovers early */ ! 347: if ((i %= (nway - 1)) == 0) ! 348: i = nway - 1; ! 349: if (i != 1) { ! 350: newfile(); ! 351: (void) setbuf(os, bufout); ! 352: merge(a,a+i); ! 353: a += i; ! 354: } ! 355: } ! 356: for(; a+nway<nfiles || unsafeout&&a<eargc; a=i) { ! 357: i = a+nway; ! 358: if(i>=nfiles) ! 359: i = nfiles; ! 360: newfile(); ! 361: (void) setbuf(os, bufout); ! 362: merge(a, i); ! 363: } ! 364: if(a != nfiles) { ! 365: oldfile(); ! 366: (void) setbuf(os, bufout); ! 367: merge(a, nfiles); ! 368: } ! 369: error = 0; ! 370: term(); ! 371: } ! 372: ! 373: #define OPEN(i) do{\ ! 374: if((f=setfil(i++))==0)iop=stdin; \ ! 375: else if((iop=fopen(f,"r"))==NULL)cant(f); \ ! 376: (void)setbuf(iop,bufin);}while(0) ! 377: ! 378: #define GETC(c) \ ! 379: while((c=getc(iop))==EOF) { \ ! 380: if(!checkclose(iop))\ ! 381: rderror(f); \ ! 382: if(i<eargc) \ ! 383: OPEN(i); \ ! 384: else break; \ ! 385: } ! 386: sort() ! 387: { ! 388: register FILE *iop; ! 389: register int c; ! 390: register char *cp,**lp; ! 391: int i; ! 392: char **ep; ! 393: char *f=0; ! 394: /* ! 395: lp[2] points to char after last \n or to beginning. ! 396: lp[3] points to second previous, etc. ! 397: (lp kept 2 below active cell in case machines ! 398: address char** by high byte) ! 399: cp grows toward lp; buffer is full when they meet. ! 400: */ ! 401: ! 402: cp=(char*)lspace; ! 403: ep=(char**)((char*)lspace+alloc); ! 404: lp=ep-2; ! 405: (--lp)[2]=cp; ! 406: i=0; ! 407: OPEN(i); ! 408: GETC(c); ! 409: for(;;) { ! 410: while(c!=EOF&&cp<(char*)lp) { ! 411: *cp++=c; ! 412: if(c=='\n') { ! 413: (--lp)[2]=cp; ! 414: if(cp-lp[3]>maxrec)maxrec=cp-lp[3]; ! 415: } ! 416: GETC(c); ! 417: } ! 418: /* buffer full or end of input files */ ! 419: if(c==EOF) ! 420: break; ! 421: if((char*)lp[2]==(char*)lspace) { ! 422: diag("whopper record won't fit",""); ! 423: term(); ! 424: } ! 425: newfile(); ! 426: (void)setbuf(os,bufout); ! 427: msort(lp+3,ep); ! 428: if(cp[-1]!='\n') { ! 429: (void) memcpy((char*)lspace,lp[2],cp-lp[2]); ! 430: cp-=lp[2]-(char*)lspace; ! 431: } ! 432: else ! 433: cp=(char*)lspace; ! 434: if(!checkclose(os)) ! 435: wterror("sorting"); ! 436: lp=ep-2; ! 437: (--lp)[2]=(char*)lspace; ! 438: } ! 439: /* end of input files */ ! 440: if(cp>(char*)lspace&&cp[-1]!='\n') { ! 441: *cp++='\n'; ! 442: (--lp); ! 443: if(cp-lp[3]>maxrec)maxrec=cp-lp[3]; ! 444: } ! 445: if(nfiles==eargc)oldfile(); ! 446: else newfile(); ! 447: (void)setbuf(os,bufout); ! 448: msort(lp+3,ep); ! 449: if(!checkclose(os)) ! 450: wterror("sorting"); ! 451: } ! 452: ! 453: msort(a, b) ! 454: char **a, **b; ! 455: { ! 456: register struct btree **tp; ! 457: register int i, j, n; ! 458: char *save; ! 459: ! 460: i = (b - a); ! 461: if (i < 1) return; ! 462: else if (i >= TREEZ) n = TREEZ; /* number of blocks of records */ ! 463: else n = i; ! 464: ! 465: /* break into n sorted subgroups of approximately equal size */ ! 466: tp = &(treep[0]); ! 467: j = 0; ! 468: do { ! 469: (*tp++)->rn = j; ! 470: b = a + (blkcnt[j] = i / n); ! 471: qsort(a, b); ! 472: blkcur[j] = a = b; ! 473: i -= blkcnt[j++]; ! 474: } while (--n > 0); ! 475: n = j; ! 476: ! 477: /* make a sorted binary tree using the first record in each group */ ! 478: for (i = 0; i < n;) { ! 479: (*--tp)->rp = *(--blkcur[--j]); ! 480: insert(tp, ++i); ! 481: } ! 482: wasfirst = notfirst = 0; ! 483: bonus = cmpsave(n); ! 484: ! 485: ! 486: j = uflg; ! 487: tp = &(treep[0]); ! 488: while (n > 0) { ! 489: wline((*tp)->rp); ! 490: if (j) save = (*tp)->rp; ! 491: ! 492: /* Get another record and insert. Bypass repeats if uflg */ ! 493: ! 494: do { ! 495: i = (*tp)->rn; ! 496: if (j) while((blkcnt[i] > 1) && ! 497: (**(blkcur[i]-1) == '\0')) { ! 498: --blkcnt[i]; ! 499: --blkcur[i]; ! 500: } ! 501: if (--blkcnt[i] > 0) { ! 502: (*tp)->rp = *(--blkcur[i]); ! 503: insert(tp, n); ! 504: } else { ! 505: if (--n <= 0) break; ! 506: bonus = cmpsave(n); ! 507: tp++; ! 508: } ! 509: } while (j && (*compare)((*tp)->rp, save) == 0); ! 510: } ! 511: } ! 512: ! 513: ! 514: /* Insert the element at tp[0] into its proper place in the array of size n */ ! 515: /* Pretty much Algorith B from 6.2.1 of Knuth, Sorting and Searching */ ! 516: /* Special case for data that appears to be in correct order */ ! 517: ! 518: insert(tp, n) ! 519: struct btree **tp; ! 520: int n; ! 521: { ! 522: register struct btree **lop, **hip, **midp; ! 523: register int c; ! 524: struct btree *hold; ! 525: ! 526: midp = lop = tp; ! 527: hip = lop++ + (n - 1); ! 528: if ((wasfirst > notfirst) && (n > 2) && ! 529: ((*compare)((*tp)->rp, (*lop)->rp) >= 0)) { ! 530: wasfirst += bonus; ! 531: return; ! 532: } ! 533: while ((c = hip - lop) >= 0) { /* leave midp at the one tp is in front of */ ! 534: midp = lop + c / 2; ! 535: if ((c = (*compare)((*tp)->rp, (*midp)->rp)) == 0) break; /* match */ ! 536: if (c < 0) lop = ++midp; /* c < 0 => tp > midp */ ! 537: else hip = midp - 1; /* c > 0 => tp < midp */ ! 538: } ! 539: c = midp - tp; ! 540: if (--c > 0) { /* number of moves to get tp just before midp */ ! 541: hip = tp; ! 542: lop = hip++; ! 543: hold = *lop; ! 544: do *lop++ = *hip++; while (--c > 0); ! 545: *lop = hold; ! 546: notfirst++; ! 547: } else wasfirst += bonus; ! 548: } ! 549: ! 550: ! 551: merge(a, b) ! 552: { ! 553: FILE *tfile[N]; ! 554: char *buffer = (char *) lspace; ! 555: register int nf; /* number of merge files */ ! 556: register struct btree **tp; ! 557: register int i, j; ! 558: char *t, *f; ! 559: char *save, *iobuf; ! 560: ! 561: save = (char *) lspace + (nway * maxrec); ! 562: iobuf = save + maxrec; ! 563: tp = &(treep[0]); ! 564: for (nf=0, i=a; i < b; i++) { ! 565: f = setfil(i); ! 566: if (f == 0) ! 567: tfile[nf] = stdin; ! 568: else if ((tfile[nf] = fopen(f, "r")) == NULL) ! 569: cant(f); ! 570: (*tp)->rp = buffer + (nf * maxrec); ! 571: (*tp)->rn = nf; ! 572: (void) setbuf(tfile[nf],iobuf); ! 573: iobuf += BUFSIZ; ! 574: if (rline(tfile[nf], (*tp)->rp)==0) { ! 575: nf++; ! 576: tp++; ! 577: } else if(!checkclose(tfile[nf])) ! 578: rderror(f); ! 579: } ! 580: ! 581: ! 582: /* make a sorted btree from the first record of each file */ ! 583: for (--tp, i = 1; i++ < nf;) insert(--tp, i); ! 584: ! 585: bonus = cmpsave(nf); ! 586: tp = &(treep[0]); ! 587: j = uflg; ! 588: while (nf > 0) { ! 589: wline((*tp)->rp); ! 590: if (j) cline(save, (*tp)->rp); ! 591: ! 592: /* Get another record and insert. Bypass repeats if uflg */ ! 593: ! 594: do { ! 595: i = (*tp)->rn; ! 596: if (rline(tfile[i], (*tp)->rp)) { ! 597: if (!checkclose(tfile[i])) ! 598: rderror(setfil(i+a)); ! 599: if (--nf <= 0) break; ! 600: ++tp; ! 601: bonus = cmpsave(nf); ! 602: } else insert(tp, nf); ! 603: } while (j && (*compare)((*tp)->rp, save) == 0 ); ! 604: } ! 605: ! 606: ! 607: for (i=a; i < b; i++) { ! 608: if (i >= eargc) ! 609: (void) unlink(setfil(i)); ! 610: } ! 611: if (!checkclose(os)) ! 612: wterror("merging"); ! 613: } ! 614: ! 615: wline(s) ! 616: register char *s; ! 617: { ! 618: register FILE *iop; ! 619: ! 620: iop = os; ! 621: do ! 622: putc(*s, iop); ! 623: while (*s++ != '\n'); ! 624: } ! 625: ! 626: cline(tp, fp) ! 627: register char *tp, *fp; ! 628: { ! 629: while ((*tp++ = *fp++) != '\n'); ! 630: } ! 631: ! 632: rline(iop, s) ! 633: FILE *iop; ! 634: register char *s; ! 635: { ! 636: register char *ce; ! 637: register int c; ! 638: register FILE *riop; ! 639: ! 640: riop = iop; ! 641: ce = s + maxrec; ! 642: do { ! 643: c = getc(riop); ! 644: if (c == EOF) ! 645: return(1); ! 646: if (s >= ce) ! 647: --s; ! 648: *s++ = c; ! 649: } while (c != '\n'); ! 650: return(0); ! 651: } ! 652: ! 653: ! 654: checksort() ! 655: { ! 656: char *f; ! 657: char *s[2]; ! 658: register int i, j, r; ! 659: ! 660: f = setfil(0); ! 661: if (f == 0) ! 662: is = stdin; ! 663: else if ((is = fopen(f, "r")) == NULL) ! 664: cant(f); ! 665: (void) setbuf(is, bufin); ! 666: ! 667: i = 0; j = 1; ! 668: s[0] = (char *) lspace; ! 669: s[1] = s[0] + maxrec; ! 670: if ( rline(is, s[0]) ) { ! 671: if (!checkclose(is)) ! 672: rderror(f); ! 673: exit(0); ! 674: } ! 675: while ( !rline(is, s[j]) ) { ! 676: r = (*compare)(s[i], s[j]); ! 677: if (r < 0) ! 678: disorder("disorder: ", s[j]); ! 679: if (r == 0 && uflg) ! 680: disorder("non-unique: ", s[j]); ! 681: r = i; i = j; j = r; ! 682: } ! 683: if (!checkclose(is)) ! 684: rderror(f); ! 685: exit(0); ! 686: } ! 687: ! 688: ! 689: disorder(s,t) ! 690: char *s, *t; ! 691: { ! 692: register char *u; ! 693: for(u=t; *u!='\n';u++) ; ! 694: *u = 0; ! 695: diag(s,t); ! 696: term(); ! 697: } ! 698: ! 699: newfile() ! 700: { ! 701: register char *f; ! 702: ! 703: f = setfil(nfiles); ! 704: if((os=fopen(f, "w")) == NULL) { ! 705: diag("can't create ",f); ! 706: term(); ! 707: } ! 708: nfiles++; ! 709: } ! 710: ! 711: char * ! 712: setfil(i) ! 713: { ! 714: ! 715: if(i < eargc) ! 716: if(eargv[i][0] == '-' && eargv[i][1] == '\0') ! 717: return(0); ! 718: else ! 719: return(eargv[i]); ! 720: i -= eargc; ! 721: filep[0] = i/26 + 'a'; ! 722: filep[1] = i%26 + 'a'; ! 723: return(file); ! 724: } ! 725: ! 726: oldfile() ! 727: { ! 728: ! 729: if(outfil) { ! 730: if((os=fopen(outfil, "w")) == NULL) { ! 731: diag("can't create ",outfil); ! 732: term(); ! 733: } ! 734: } else ! 735: os = stdout; ! 736: } ! 737: ! 738: safeoutfil() ! 739: { ! 740: register int i; ! 741: struct stat ostat,istat; ! 742: ! 743: if(!mflg||outfil==0) ! 744: return; ! 745: if(stat(outfil,&ostat)==-1) ! 746: return; ! 747: if ((i = eargc - N) < 0) i = 0; /*-N is suff., not nec. */ ! 748: for (; i < eargc; i++) { ! 749: if(stat(eargv[i],&istat)==-1) ! 750: continue; ! 751: if(ostat.st_dev==istat.st_dev&& ! 752: ostat.st_ino==istat.st_ino) ! 753: unsafeout++; ! 754: } ! 755: } ! 756: ! 757: cant(f) ! 758: char *f; ! 759: { ! 760: ! 761: diag("can't open ",f); ! 762: term(); ! 763: } ! 764: ! 765: diag(s,t) ! 766: char *s, *t; ! 767: { ! 768: (void) fputs("sort: ",stderr); ! 769: (void) fputs(s,stderr); ! 770: (void) fputs(t,stderr); ! 771: (void) fputs("\n",stderr); ! 772: } ! 773: ! 774: term() ! 775: { ! 776: register i; ! 777: ! 778: (void) signal(SIGINT, SIG_IGN); ! 779: (void) signal(SIGHUP, SIG_IGN); ! 780: (void) signal(SIGTERM, SIG_IGN); ! 781: if(nfiles == eargc) ! 782: nfiles++; ! 783: for(i=eargc; i<=nfiles; i++) { /*<= in case of interrupt*/ ! 784: (void) unlink(setfil(i)); /*with nfiles not updated*/ ! 785: } ! 786: exit(error); ! 787: } ! 788: ! 789: cmp(i, j) ! 790: char *i, *j; ! 791: { ! 792: register char *pa, *pb; ! 793: char *skip(); ! 794: char *code, *ignore; ! 795: int a, b; ! 796: int k; ! 797: char *la, *lb; ! 798: register int sa; ! 799: int sb; ! 800: char *ipa, *ipb, *jpa, *jpb; ! 801: struct field *fp; ! 802: double za, zb; ! 803: ! 804: for(k = nfields>0; k<=nfields; k++) { ! 805: fp = &fields[k]; ! 806: pa = i; ! 807: pb = j; ! 808: if(k) { ! 809: la = skip(pa, fp, 1); ! 810: pa = skip(pa, fp, 0); ! 811: lb = skip(pb, fp, 1); ! 812: pb = skip(pb, fp, 0); ! 813: } else { ! 814: la = eol(pa); ! 815: lb = eol(pb); ! 816: } ! 817: switch(fp->fcmp) { ! 818: case NUM: ! 819: sa = sb = fp->rflg; ! 820: while(blank(*pa)) ! 821: pa++; ! 822: while(blank(*pb)) ! 823: pb++; ! 824: if(*pa == '-') { ! 825: pa++; ! 826: sa = -sa; ! 827: } ! 828: if(*pb == '-') { ! 829: pb++; ! 830: sb = -sb; ! 831: } ! 832: for(ipa = pa; ipa<la&&isdigit(*ipa); ipa++) ; ! 833: for(ipb = pb; ipb<lb&&isdigit(*ipb); ipb++) ; ! 834: jpa = ipa; ! 835: jpb = ipb; ! 836: a = 0; ! 837: if(sa==sb) ! 838: while(ipa > pa && ipb > pb) ! 839: if(b = *--ipb - *--ipa) ! 840: a = b; ! 841: while(ipa > pa) ! 842: if(*--ipa != '0') ! 843: return(-sa); ! 844: while(ipb > pb) ! 845: if(*--ipb != '0') ! 846: return(sb); ! 847: if(a) return(a*sa); ! 848: if(*(pa=jpa) == '.') ! 849: pa++; ! 850: if(*(pb=jpb) == '.') ! 851: pb++; ! 852: if(sa==sb) ! 853: while(pa<la && isdigit(*pa) ! 854: && pb<lb && isdigit(*pb)) ! 855: if(a = *pb++ - *pa++) ! 856: return(a*sa); ! 857: while(pa<la && isdigit(*pa)) ! 858: if(*pa++ != '0') ! 859: return(-sa); ! 860: while(pb<lb && isdigit(*pb)) ! 861: if(*pb++ != '0') ! 862: return(sb); ! 863: continue; ! 864: case MON: ! 865: sa = fp->rflg*(month(pb)-month(pa)); ! 866: if(sa) ! 867: return(sa); ! 868: else ! 869: continue; ! 870: case FLOAT: ! 871: zb = atof(pb); ! 872: za = atof(pa); ! 873: if(zb > za) ! 874: return fp->rflg; ! 875: else if(zb < za) ! 876: return -fp->rflg; ! 877: else continue; ! 878: } ! 879: code = fp->code; ! 880: ignore = fp->ignore; ! 881: loop: ! 882: while(ignore[*pa]) ! 883: pa++; ! 884: while(ignore[*pb]) ! 885: pb++; ! 886: if(pa>=la || *pa=='\n') ! 887: if(pb<lb && *pb!='\n') ! 888: return(fp->rflg); ! 889: else continue; ! 890: if(pb>=lb || *pb=='\n') ! 891: return(-fp->rflg); ! 892: if((sa = code[*pb++]-code[*pa++]) == 0) ! 893: goto loop; ! 894: return(sa*fp->rflg); ! 895: } ! 896: if(uflg) ! 897: return(0); ! 898: return(cmpa(i, j)); ! 899: } ! 900: ! 901: cmpa(pa, pb) ! 902: register char *pa, *pb; ! 903: { ! 904: while(*pa == *pb++) ! 905: if(*pa++ == '\n') ! 906: return(0); ! 907: return( ! 908: *pa == '\n' ? fields[0].rflg: ! 909: *--pb == '\n' ?-fields[0].rflg: ! 910: *pb > *pa ? fields[0].rflg: ! 911: -fields[0].rflg ! 912: ); ! 913: } ! 914: ! 915: char * ! 916: skip(p, fp, j) ! 917: struct field *fp; ! 918: register char *p; ! 919: { ! 920: register i; ! 921: register char tbc; ! 922: ! 923: if( (i=fp->m[j]) < 0) ! 924: return(eol(p)); ! 925: if (tbc = tabchar) ! 926: while (--i >= 0) { ! 927: while(*p != tbc) ! 928: if(*p != '\n') ! 929: p++; ! 930: else goto ret; ! 931: p++; ! 932: } ! 933: else while (--i >= 0) { ! 934: while(blank(*p)) ! 935: p++; ! 936: while(!blank(*p)) ! 937: if(*p != '\n') ! 938: p++; ! 939: else goto ret; ! 940: } ! 941: if(fp->bflg[j]) ! 942: while(blank(*p)) ! 943: p++; ! 944: i = fp->n[j]; ! 945: if(i==0 && j!=0 && fp->m[j]>0 && p[-1]==tbc) ! 946: p--; ! 947: while((i-- > 0) && (*p != '\n')) ! 948: p++; ! 949: ret: ! 950: return(p); ! 951: } ! 952: ! 953: char * ! 954: eol(p) ! 955: register char *p; ! 956: { ! 957: while(*p != '\n') p++; ! 958: return(p); ! 959: } ! 960: ! 961: copyproto() ! 962: { ! 963: register i; ! 964: register int *p, *q; ! 965: ! 966: p = (int *)&proto; ! 967: q = (int *)&fields[nfields]; ! 968: for(i=0; i<sizeof(proto)/sizeof(*p); i++) ! 969: *q++ = *p++; ! 970: } ! 971: ! 972: initree() ! 973: { ! 974: register struct btree **tpp, *tp; ! 975: register int i; ! 976: ! 977: for (tp = &(tree[0]), tpp = &(treep[0]), i = TREEZ; --i >= 0;) ! 978: *tpp++ = tp++; ! 979: } ! 980: ! 981: cmpsave(n) ! 982: register int n; ! 983: { ! 984: register int award; ! 985: ! 986: if (n < 2) return (0); ! 987: for (n++, award = 0; (n >>= 1) > 0; award++); ! 988: return (award); ! 989: } ! 990: ! 991: ! 992: field(s,k) ! 993: char *s; ! 994: { ! 995: register struct field *p; ! 996: register d; ! 997: p = &fields[nfields]; ! 998: d = 0; ! 999: for(; *s!=0; s++) { ! 1000: switch(*s) { ! 1001: case '\0': ! 1002: return; ! 1003: ! 1004: case 'b': ! 1005: p->bflg[k]++; ! 1006: break; ! 1007: ! 1008: case 'd': ! 1009: p->ignore = dict+128; ! 1010: break; ! 1011: ! 1012: case 'f': ! 1013: p->code = fold+128; ! 1014: break; ! 1015: case 'i': ! 1016: p->ignore = nonprint+128; ! 1017: break; ! 1018: ! 1019: case 'c': ! 1020: cflg = 1; ! 1021: continue; ! 1022: ! 1023: case 'm': ! 1024: mflg = 1; ! 1025: continue; ! 1026: ! 1027: case 'M': ! 1028: p->fcmp = MON; ! 1029: break; ! 1030: ! 1031: case 'n': ! 1032: p->fcmp = NUM; ! 1033: break; ! 1034: case 'g': ! 1035: p->fcmp = FLOAT; ! 1036: break; ! 1037: case 't': ! 1038: tabchar = *++s; ! 1039: if(tabchar == 0) s--; ! 1040: continue; ! 1041: ! 1042: case 'r': ! 1043: p->rflg = -1; ! 1044: continue; ! 1045: case 'u': ! 1046: uflg = 1; ! 1047: continue; ! 1048: ! 1049: case 'y': ! 1050: if ( *++s ) tryfor = number(&s); ! 1051: else { ! 1052: --s; ! 1053: tryfor = MAXMEM; ! 1054: } ! 1055: continue; ! 1056: ! 1057: case 'z': ! 1058: if ( *++s ) ! 1059: maxrec = number(&s); ! 1060: else --s; ! 1061: continue; ! 1062: ! 1063: case '.': ! 1064: if(p->m[k] == -1) /* -m.n with m missing */ ! 1065: p->m[k] = 0; ! 1066: d = &fields[0].n[0]-&fields[0].m[0]; ! 1067: if (*++s == '\0') { ! 1068: --s; ! 1069: p->m[k+d] = 0; ! 1070: continue; ! 1071: } ! 1072: ! 1073: default: ! 1074: p->m[k+d] = number(&s); ! 1075: } ! 1076: compare = cmp; ! 1077: } ! 1078: } ! 1079: ! 1080: number(ppa) ! 1081: char **ppa; ! 1082: { ! 1083: int n; ! 1084: register char *pa; ! 1085: pa = *ppa; ! 1086: n = 0; ! 1087: while(isdigit(*pa)) { ! 1088: n = n*10 + *pa - '0'; ! 1089: *ppa = pa++; ! 1090: } ! 1091: return(n); ! 1092: } ! 1093: ! 1094: #define qsexc(p,q) t= *p;*p= *q;*q=t ! 1095: #define qstexc(p,q,r) t= *p;*p= *r;*r= *q;*q=t ! 1096: ! 1097: qsort(a,l) ! 1098: char **a, **l; ! 1099: { ! 1100: register char **i, **j; ! 1101: register char **lp, **hp; ! 1102: char **k; ! 1103: int c, d; ! 1104: char *t; ! 1105: unsigned n; ! 1106: ! 1107: ! 1108: start: ! 1109: if((n=l-a) <= 1) ! 1110: return; ! 1111: ! 1112: i = a; ! 1113: j = l - 1; ! 1114: if (n == 2) { ! 1115: if (c = (*compare)(*i, *j)) { ! 1116: if (c > 0) { ! 1117: qsexc(i, j); ! 1118: } ! 1119: } else if (uflg) **i = '\0'; ! 1120: return; ! 1121: } ! 1122: n /= 2; ! 1123: lp = hp = a + n; ! 1124: c = (*compare)(*i, *lp); ! 1125: d = (*compare)(*lp, *j); ! 1126: if (c == 0) { ! 1127: if (d == 0) { /* x x x */ ! 1128: --lp; qsexc(i, lp); ! 1129: hp++; qsexc(hp, j); ! 1130: } else if (d > 0) { /* x x w */ ! 1131: qsexc(i, j); ! 1132: i++; ! 1133: hp++; qsexc(hp, j); ! 1134: } else { /* x x y */ ! 1135: --j; ! 1136: --lp; qsexc(i, lp); ! 1137: } ! 1138: } else if (d == 0) { ! 1139: if (c < 0) { /* w x x */ ! 1140: i++; ! 1141: hp++; qsexc(hp, j); ! 1142: } else { /* y x x */ ! 1143: qsexc(i, j); ! 1144: --j; ! 1145: --lp; qsexc(i, lp); ! 1146: } ! 1147: } else if (c > 0) { ! 1148: if (d > 0) { /* z y x */ ! 1149: qsexc(i, j); ! 1150: i++; --j; ! 1151: } else { /* z y z' */ ! 1152: qsexc(i, lp); ! 1153: i++; ! 1154: if (c = (*compare)(*lp, *j)) { ! 1155: if (c > 0) { ! 1156: qsexc(lp, j); ! 1157: } ! 1158: --j; ! 1159: } else { ! 1160: hp++; qsexc(hp, j); ! 1161: } ! 1162: } ! 1163: } else { ! 1164: if (d > 0) { /* y z y' */ ! 1165: qsexc(lp, j); ! 1166: --j; ! 1167: if (c = (*compare)(*i, *lp)) { ! 1168: if (c > 0) { ! 1169: qsexc(i, lp); ! 1170: } ! 1171: i++; ! 1172: } else { ! 1173: --lp; ! 1174: qsexc(i, lp); ! 1175: } ! 1176: } else { /* x y z */ ! 1177: i++; --j; ! 1178: } ! 1179: } ! 1180: ! 1181: ! 1182: for(;;) { ! 1183: if(i < lp) { ! 1184: if((c = (*compare)(*i, *lp)) == 0) { ! 1185: --lp; ! 1186: qsexc(i, lp); ! 1187: continue; ! 1188: } ! 1189: if(c < 0) { ! 1190: ++i; ! 1191: continue; ! 1192: } ! 1193: } ! 1194: ! 1195: loop: ! 1196: if(j > hp) { ! 1197: if((c = (*compare)(*hp, *j)) == 0) { ! 1198: ++hp; ! 1199: qsexc(hp, j); ! 1200: goto loop; ! 1201: } ! 1202: if(c > 0) { ! 1203: if(i == lp) { ! 1204: ++hp; ! 1205: qstexc(i, hp, j); ! 1206: i = ++lp; ! 1207: goto loop; ! 1208: } ! 1209: qsexc(i, j); ! 1210: --j; ! 1211: ++i; ! 1212: continue; ! 1213: } ! 1214: --j; ! 1215: goto loop; ! 1216: } ! 1217: ! 1218: ! 1219: if(i == lp) { ! 1220: if(uflg) ! 1221: for(k=lp; k<hp;) **k++ = '\0'; ! 1222: if(lp-a >= l-hp) { ! 1223: qsort(hp+1, l); ! 1224: l = lp; ! 1225: } else { ! 1226: qsort(a, lp); ! 1227: a = hp+1; ! 1228: } ! 1229: goto start; ! 1230: } ! 1231: ! 1232: ! 1233: --lp; ! 1234: qstexc(j, lp, i); ! 1235: j = --hp; ! 1236: } ! 1237: } ! 1238: ! 1239: char * months[] = { ! 1240: "jan", ! 1241: "feb", ! 1242: "mar", ! 1243: "apr", ! 1244: "may", ! 1245: "jun", ! 1246: "jul", ! 1247: "aug", ! 1248: "sep", ! 1249: "oct", ! 1250: "nov", ! 1251: "dec" ! 1252: }; ! 1253: month(s) ! 1254: char *s; ! 1255: { ! 1256: register char *t, *u; ! 1257: register i; ! 1258: char *f = fold + 128; ! 1259: ! 1260: while(blank(*s)) ! 1261: s++; ! 1262: for(i=0; i<sizeof(months)/sizeof(*months); i++) { ! 1263: for(t=s,u=months[i]; f[*t++]==f[*u++]; ) ! 1264: if(*u==0) ! 1265: return(i); ! 1266: } ! 1267: return(-1); ! 1268: } ! 1269: ! 1270: ! 1271: rderror(s) ! 1272: char *s; ! 1273: { ! 1274: diag("read error on ", s == 0 ? "stdin" : s); ! 1275: term(); ! 1276: } ! 1277: ! 1278: wterror(s) ! 1279: char *s; ! 1280: { ! 1281: diag("write error while ", s); ! 1282: term(); ! 1283: } ! 1284: ! 1285: checkclose(f) ! 1286: FILE *f; ! 1287: { ! 1288: return !ferror(f) && fclose(f)==0; ! 1289: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.