|
|
1.1 root 1: #include <stdio.h>
2: #include <ctype.h>
3: #include <signal.h>
4: #include <sys/types.h>
5: #include <sys/stat.h>
6:
7: #define N 16
8: #define C 20
9: #define NF 10
10: #define TREEZ 32 /* no less than N and best if power of 2 */
11:
12: /*
13: * Memory administration
14: *
15: * Using a lot of memory is great when sorting a lot of data.
16: * Using a megabyte to sort the output of `who' loses big.
17: * MAXMEM, MINMEM and DEFMEM define the absolute maximum,
18: * minimum and default memory requirements. Administrators
19: * can override any or all of these via defines at compile time.
20: * Users can override the amount allocated (within the limits
21: * of MAXMEM and MINMEM) on the command line.
22: */
23:
24: #ifndef MAXMEM
25: #define MAXMEM 1048576 /* Megabyte maximum */
26: #endif
27:
28: #ifndef MINMEM
29: #define MINMEM 16384 /* 16K minimum */
30: #endif
31:
32: #ifndef DEFMEM
33: #define DEFMEM 32768 /* Same as old sort */
34: #endif
35:
36: enum { ASC, NUM, MON, FLOAT };
37:
38: #define blank(c) ((c)==' ' || (c)=='\t')
39:
40: FILE *is, *os;
41: char *dirtry[] = {"/usr/tmp", "/tmp", NULL};
42: char **dirs;
43: char file1[100];
44: char *file = file1;
45: char *filep;
46: #define NAMEOHD 12 /* sizeof("/stm00000aa") */
47: int nfiles;
48: int *lspace;
49: unsigned alloc, tryfor;
50: char bufin[BUFSIZ], bufout[BUFSIZ]; /* Use setbuf's to avoid malloc calls.
51: ** malloc seems to get heartburn
52: ** when brk returns storage.
53: */
54: int maxrec;
55: int cmp(), cmpa();
56: int (*compare)() = cmpa;
57: char *eol();
58: int term();
59: double atof();
60: int mflg;
61: int nway;
62: int cflg;
63: int uflg;
64: char *outfil;
65: int unsafeout; /*kludge to assure -m -o works*/
66: char tabchar;
67: int eargc;
68: char **eargv;
69: struct btree {
70: char *rp;
71: int rn;
72: } tree[TREEZ], *treep[TREEZ];
73: int blkcnt[TREEZ];
74: char **blkcur[TREEZ];
75: long wasfirst = 0, notfirst = 0;
76: int bonus;
77:
78: char zero[256];
79:
80: char fold[256] = {
81: 0200,0201,0202,0203,0204,0205,0206,0207,
82: 0210,0211,0212,0213,0214,0215,0216,0217,
83: 0220,0221,0222,0223,0224,0225,0226,0227,
84: 0230,0231,0232,0233,0234,0235,0236,0237,
85: 0240,0241,0242,0243,0244,0245,0246,0247,
86: 0250,0251,0252,0253,0254,0255,0256,0257,
87: 0260,0261,0262,0263,0264,0265,0266,0267,
88: 0270,0271,0272,0273,0274,0275,0276,0277,
89: 0300,0301,0302,0303,0304,0305,0306,0307,
90: 0310,0311,0312,0313,0314,0315,0316,0317,
91: 0320,0321,0322,0323,0324,0325,0326,0327,
92: 0330,0331,0332,0333,0334,0335,0336,0337,
93: 0340,0341,0342,0343,0344,0345,0346,0347,
94: 0350,0351,0352,0353,0354,0355,0356,0357,
95: 0360,0361,0362,0363,0364,0365,0366,0367,
96: 0370,0371,0372,0373,0374,0375,0376,0377,
97: 0000,0001,0002,0003,0004,0005,0006,0007,
98: 0010,0011,0012,0013,0014,0015,0016,0017,
99: 0020,0021,0022,0023,0024,0025,0026,0027,
100: 0030,0031,0032,0033,0034,0035,0036,0037,
101: 0040,0041,0042,0043,0044,0045,0046,0047,
102: 0050,0051,0052,0053,0054,0055,0056,0057,
103: 0060,0061,0062,0063,0064,0065,0066,0067,
104: 0070,0071,0072,0073,0074,0075,0076,0077,
105: 0100,0101,0102,0103,0104,0105,0106,0107,
106: 0110,0111,0112,0113,0114,0115,0116,0117,
107: 0120,0121,0122,0123,0124,0125,0126,0127,
108: 0130,0131,0132,0133,0134,0135,0136,0137,
109: 0140,0101,0102,0103,0104,0105,0106,0107,
110: 0110,0111,0112,0113,0114,0115,0116,0117,
111: 0120,0121,0122,0123,0124,0125,0126,0127,
112: 0130,0131,0132,0173,0174,0175,0176,0177
113: };
114: char nofold[256] = {
115: 0200,0201,0202,0203,0204,0205,0206,0207,
116: 0210,0211,0212,0213,0214,0215,0216,0217,
117: 0220,0221,0222,0223,0224,0225,0226,0227,
118: 0230,0231,0232,0233,0234,0235,0236,0237,
119: 0240,0241,0242,0243,0244,0245,0246,0247,
120: 0250,0251,0252,0253,0254,0255,0256,0257,
121: 0260,0261,0262,0263,0264,0265,0266,0267,
122: 0270,0271,0272,0273,0274,0275,0276,0277,
123: 0300,0301,0302,0303,0304,0305,0306,0307,
124: 0310,0311,0312,0313,0314,0315,0316,0317,
125: 0320,0321,0322,0323,0324,0325,0326,0327,
126: 0330,0331,0332,0333,0334,0335,0336,0337,
127: 0340,0341,0342,0343,0344,0345,0346,0347,
128: 0350,0351,0352,0353,0354,0355,0356,0357,
129: 0360,0361,0362,0363,0364,0365,0366,0367,
130: 0370,0371,0372,0373,0374,0375,0376,0377,
131: 0000,0001,0002,0003,0004,0005,0006,0007,
132: 0010,0011,0012,0013,0014,0015,0016,0017,
133: 0020,0021,0022,0023,0024,0025,0026,0027,
134: 0030,0031,0032,0033,0034,0035,0036,0037,
135: 0040,0041,0042,0043,0044,0045,0046,0047,
136: 0050,0051,0052,0053,0054,0055,0056,0057,
137: 0060,0061,0062,0063,0064,0065,0066,0067,
138: 0070,0071,0072,0073,0074,0075,0076,0077,
139: 0100,0101,0102,0103,0104,0105,0106,0107,
140: 0110,0111,0112,0113,0114,0115,0116,0117,
141: 0120,0121,0122,0123,0124,0125,0126,0127,
142: 0130,0131,0132,0133,0134,0135,0136,0137,
143: 0140,0141,0142,0143,0144,0145,0146,0147,
144: 0150,0151,0152,0153,0154,0155,0156,0157,
145: 0160,0161,0162,0163,0164,0165,0166,0167,
146: 0170,0171,0172,0173,0174,0175,0176,0177
147: };
148:
149: char nonprint[256] = {
150: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
151: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
152: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
153: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
154: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
155: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
156: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
157: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
158: 1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,
159: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
160: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
161: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
162: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
163: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
164: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
165: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
166: };
167:
168: char dict[256] = {
169: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
170: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
171: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
172: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
173: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
174: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
175: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
176: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
177: 1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,
178: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
179: 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
180: 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,
181: 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
182: 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,
183: 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
184: 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1
185: };
186:
187: struct field {
188: char *code;
189: char *ignore;
190: int fcmp;
191: int rflg;
192: int bflg[2];
193: int m[2];
194: int n[2];
195: } fields[NF];
196: struct field proto = {
197: nofold+128,
198: zero+128,
199: ASC,
200: 1,
201: 0,0,
202: 0,-1,
203: 0,0
204: };
205: int nfields;
206: int error = 1;
207: char *setfil();
208:
209: main(argc, argv)
210: char **argv;
211: {
212: register a;
213: char *arg;
214: struct field *p, *q;
215: int i;
216: long incr;
217: char *sbrk();
218: char *sp;
219:
220: copyproto();
221: initree();
222: eargv = argv;
223: tryfor = DEFMEM;
224: while (--argc > 0) {
225: if(**++argv == '-') for(arg = *argv;;) {
226: switch(*++arg) {
227: case '\0':
228: if(arg[-1] == '-')
229: eargv[eargc++] = "-";
230: break;
231:
232: case 'o':
233: if(--argc > 0)
234: outfil = *++argv;
235: continue;
236:
237: case 'T':
238: if (--argc > 0) {
239: if ((strlen(*++argv) + NAMEOHD) > sizeof(file1)) {
240: diag("path name too long:", *argv);
241: exit(1);
242: }
243: else dirtry[0] = *argv;
244: }
245: continue;
246:
247: default:
248: field(++*argv,nfields>0);
249: break;
250: }
251: break;
252: } else if (**argv == '+') {
253: if(++nfields>=NF) {
254: diag("too many keys","");
255: exit(1);
256: }
257: copyproto();
258: field(++*argv,0);
259: } else
260: eargv[eargc++] = *argv;
261: }
262: q = &fields[0];
263: for(a=1; a<=nfields; a++) {
264: p = &fields[a];
265: if(p->code != proto.code) continue;
266: if(p->ignore != proto.ignore) continue;
267: if(p->fcmp != proto.fcmp) continue;
268: if(p->rflg != proto.rflg) continue;
269: if(p->bflg[0] != proto.bflg[0]) continue;
270: if(p->bflg[1] != proto.bflg[1]) continue;
271: p->code = q->code;
272: p->ignore = q->ignore;
273: p->fcmp = q->fcmp;
274: p->rflg = q->rflg;
275: p->bflg[0] = p->bflg[1] = q->bflg[0];
276: }
277: if(eargc == 0)
278: eargv[eargc++] = "-";
279: if(cflg && eargc>1) {
280: diag("can check only 1 file","");
281: exit(1);
282: }
283:
284: safeoutfil();
285:
286: sp = sbrk(0);
287: lspace = (int *) sp;
288: if (!mflg && !cflg) {
289: if (tryfor < MINMEM) tryfor = MINMEM;
290: else if (tryfor > MAXMEM) tryfor = MAXMEM;
291: for (incr=tryfor; (sp + incr) <= sp; incr >>= 1);
292: do {
293: if ((long)alloc+incr <= tryfor && brk(sp+incr) == 0) {
294: sp += incr;
295: alloc += incr;
296: }
297: } while ( ( incr >>= 1 ) >= 512L );
298: if ( brk((char *) lspace + alloc) != 0) {
299: diag("allocation error before sort", "");
300: exit(1);
301: }
302: }
303:
304: a = -1;
305: for(dirs=dirtry; *dirs; dirs++) {
306: (void) sprintf(filep=file1, "%s/stm%.5uaa", *dirs, getpid());
307: while (*filep)
308: filep++;
309: filep -= 2;
310: if ( (a=creat(file, 0600)) >=0)
311: break;
312: }
313: if(a < 0) {
314: diag("can't locate temp","");
315: exit(1);
316: }
317: for (i = 3; i <= 20; i++)
318: (void) close(i);
319: (void) unlink(file);
320: if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
321: (void) signal(SIGHUP, term);
322: if (signal(SIGINT, SIG_IGN) != SIG_IGN)
323: (void) signal(SIGINT, term);
324: (void) signal(SIGPIPE,term);
325: if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
326: (void) signal(SIGTERM,term);
327: nfiles = eargc;
328: if(!mflg && !cflg) {
329: sort();
330: if (ferror(stdin))
331: rderror("stdin");
332: (void) fclose(stdin);
333: }
334:
335: if (maxrec == 0) maxrec = 512;
336: alloc = (N + 1) * maxrec + N * BUFSIZ;
337: for (nway = N; nway >= 2; --nway) {
338: if (brk((char *)lspace + alloc) == 0) break;
339: alloc -= maxrec + BUFSIZ;
340: }
341: if (nway < 2) {
342: diag("allocation error before merge", "");
343: term();
344: }
345:
346: if (cflg) checksort();
347:
348: wasfirst = notfirst = 0;
349: a = mflg || cflg ? 0 : eargc;
350: if ((i = nfiles - a) > nway) { /* Do leftovers early */
351: if ((i %= (nway - 1)) == 0)
352: i = nway - 1;
353: if (i != 1) {
354: newfile();
355: (void) setbuf(os, bufout);
356: merge(a,a+i);
357: a += i;
358: }
359: }
360: for(; a+nway<nfiles || unsafeout&&a<eargc; a=i) {
361: i = a+nway;
362: if(i>=nfiles)
363: i = nfiles;
364: newfile();
365: (void) setbuf(os, bufout);
366: merge(a, i);
367: }
368: if(a != nfiles) {
369: oldfile();
370: (void) setbuf(os, bufout);
371: merge(a, nfiles);
372: }
373: error = 0;
374: term();
375: }
376:
377: sort()
378: {
379: register char *cp;
380: register c;
381: register char **lp;
382: register FILE *iop;
383: char *keep, *ekeep, **ep;
384: int done, i, recz;
385: char *f;
386:
387: /*
388: ** Records are read in from the front of the buffer area.
389: ** Pointers to the records are allocated from the back of the buffer.
390: ** If a partially read record exhausts the buffer, it is saved and
391: ** then copied to the start of the buffer for processing with the
392: ** next coreload.
393: */
394: done = 0;
395: keep = 0;
396: i = 0;
397: iop = NULL;
398: c = EOF;
399: ep = (char **) (((char *) lspace) + alloc);
400: do {
401: lp = ep - 1;
402: cp = (char *) lspace;
403: if (keep != 0) { /* part of record from previous coreload */
404: *lp-- = cp;
405: for(; keep < ekeep; *cp++ = *keep++);
406: }
407: while (cp < (char *) lp) {
408: if (keep == 0) *lp-- = cp;
409: while (cp < (char *) lp && c != '\n') {
410: if(c != EOF) {
411: *cp++ = c;
412: c = getc(iop);
413: continue;
414: } else if(iop != NULL) {
415: if ((cp > (char *) lspace) && (cp[-1] != '\n'))
416: *cp++ = '\n';
417: if(ferror(iop))
418: rderror(f);
419: (void) fclose(iop);
420: }
421: if(i < eargc) {
422: if((f = setfil(i++)) == 0)
423: iop = stdin;
424: else if((iop = fopen(f, "r")) == NULL)
425: cant(f);
426: (void) setbuf(iop, bufin);
427: c = getc(iop);
428: } else
429: break;
430: }
431: if (c == '\n') {
432: *cp++ = '\n';
433: keep = 0;
434: if ((recz = (cp - lp[1])) > maxrec)
435: maxrec = recz;
436: }
437: else if (c != EOF) { /* save partial record */
438: lp++;
439: if (keep != 0) {
440: diag("whopper record won't fit", "");
441: term();
442: }
443: keep = *lp;
444: ekeep = cp;
445: break;
446: }
447: if(c == EOF) {
448: if(ferror(iop))
449: rderror(f);
450: (void) fclose(iop);
451: done++;
452: lp++;
453: break;
454: }
455: c = getc(iop);
456: }
457: lp++;
458: if(done == 0 || nfiles != eargc)
459: newfile();
460: else
461: oldfile();
462: (void) setbuf(os, bufout);
463: msort(lp, ep);
464: if (ferror(os))
465: wterror("sorting");
466: (void) fclose(os);
467: } while(done == 0);
468: }
469:
470:
471: msort(a, b)
472: char **a, **b;
473: {
474: register struct btree **tp;
475: register int i, j, n;
476: char *save;
477:
478: i = (b - a);
479: if (i < 1) return;
480: else if (i >= TREEZ) n = TREEZ; /* number of blocks of records */
481: else n = i;
482:
483: /* break into n sorted subgroups of approximately equal size */
484: tp = &(treep[0]);
485: j = 0;
486: do {
487: (*tp++)->rn = j;
488: b = a + (blkcnt[j] = i / n);
489: qsort(a, b);
490: blkcur[j] = a = b;
491: i -= blkcnt[j++];
492: } while (--n > 0);
493: n = j;
494:
495: /* make a sorted binary tree using the first record in each group */
496: for (i = 0; i < n;) {
497: (*--tp)->rp = *(--blkcur[--j]);
498: insert(tp, ++i);
499: }
500: wasfirst = notfirst = 0;
501: bonus = cmpsave(n);
502:
503:
504: j = uflg;
505: tp = &(treep[0]);
506: while (n > 0) {
507: wline((*tp)->rp);
508: if (j) save = (*tp)->rp;
509:
510: /* Get another record and insert. Bypass repeats if uflg */
511:
512: do {
513: i = (*tp)->rn;
514: if (j) while((blkcnt[i] > 1) &&
515: (**(blkcur[i]-1) == '\0')) {
516: --blkcnt[i];
517: --blkcur[i];
518: }
519: if (--blkcnt[i] > 0) {
520: (*tp)->rp = *(--blkcur[i]);
521: insert(tp, n);
522: } else {
523: if (--n <= 0) break;
524: bonus = cmpsave(n);
525: tp++;
526: }
527: } while (j && (*compare)((*tp)->rp, save) == 0);
528: }
529: }
530:
531:
532: /* Insert the element at tp[0] into its proper place in the array of size n */
533: /* Pretty much Algorith B from 6.2.1 of Knuth, Sorting and Searching */
534: /* Special case for data that appears to be in correct order */
535:
536: insert(tp, n)
537: struct btree **tp;
538: int n;
539: {
540: register struct btree **lop, **hip, **midp;
541: register int c;
542: struct btree *hold;
543:
544: midp = lop = tp;
545: hip = lop++ + (n - 1);
546: if ((wasfirst > notfirst) && (n > 2) &&
547: ((*compare)((*tp)->rp, (*lop)->rp) >= 0)) {
548: wasfirst += bonus;
549: return;
550: }
551: while ((c = hip - lop) >= 0) { /* leave midp at the one tp is in front of */
552: midp = lop + c / 2;
553: if ((c = (*compare)((*tp)->rp, (*midp)->rp)) == 0) break; /* match */
554: if (c < 0) lop = ++midp; /* c < 0 => tp > midp */
555: else hip = midp - 1; /* c > 0 => tp < midp */
556: }
557: c = midp - tp;
558: if (--c > 0) { /* number of moves to get tp just before midp */
559: hip = tp;
560: lop = hip++;
561: hold = *lop;
562: do *lop++ = *hip++; while (--c > 0);
563: *lop = hold;
564: notfirst++;
565: } else wasfirst += bonus;
566: }
567:
568:
569: merge(a, b)
570: {
571: FILE *tfile[N];
572: char *buffer = (char *) lspace;
573: register int nf; /* number of merge files */
574: register struct btree **tp;
575: register int i, j;
576: char *t, *f;
577: char *save, *iobuf;
578:
579: save = (char *) lspace + (nway * maxrec);
580: iobuf = save + maxrec;
581: tp = &(treep[0]);
582: for (nf=0, i=a; i < b; i++) {
583: f = setfil(i);
584: if (f == 0)
585: tfile[nf] = stdin;
586: else if ((tfile[nf] = fopen(f, "r")) == NULL)
587: cant(f);
588: (*tp)->rp = buffer + (nf * maxrec);
589: (*tp)->rn = nf;
590: (void) setbuf(tfile[nf],iobuf);
591: iobuf += BUFSIZ;
592: if (rline(tfile[nf], (*tp)->rp)==0) {
593: nf++;
594: tp++;
595: } else {
596: if(ferror(tfile[nf]))
597: rderror(f);
598: (void) fclose(tfile[nf]);
599: }
600: }
601:
602:
603: /* make a sorted btree from the first record of each file */
604: for (--tp, i = 1; i++ < nf;) insert(--tp, i);
605:
606: bonus = cmpsave(nf);
607: tp = &(treep[0]);
608: j = uflg;
609: while (nf > 0) {
610: wline((*tp)->rp);
611: if (j) cline(save, (*tp)->rp);
612:
613: /* Get another record and insert. Bypass repeats if uflg */
614:
615: do {
616: i = (*tp)->rn;
617: if (rline(tfile[i], (*tp)->rp)) {
618: if (ferror(tfile[i]))
619: rderror(setfil(i+a));
620: (void) fclose(tfile[i]);
621: if (--nf <= 0) break;
622: ++tp;
623: bonus = cmpsave(nf);
624: } else insert(tp, nf);
625: } while (j && (*compare)((*tp)->rp, save) == 0 );
626: }
627:
628:
629: for (i=a; i < b; i++) {
630: if (i >= eargc)
631: (void) unlink(setfil(i));
632: }
633: if (ferror(os))
634: wterror("merging");
635: (void) fclose(os);
636: }
637:
638: wline(s)
639: register char *s;
640: {
641: register FILE *iop;
642:
643: iop = os;
644: do
645: putc(*s, iop);
646: while (*s++ != '\n');
647: }
648:
649: cline(tp, fp)
650: register char *tp, *fp;
651: {
652: while ((*tp++ = *fp++) != '\n');
653: }
654:
655: rline(iop, s)
656: FILE *iop;
657: register char *s;
658: {
659: register char *ce;
660: register int c;
661: register FILE *riop;
662:
663: riop = iop;
664: ce = s + maxrec;
665: do {
666: c = getc(riop);
667: if (c == EOF)
668: return(1);
669: if (s >= ce)
670: --s;
671: *s++ = c;
672: } while (c != '\n');
673: return(0);
674: }
675:
676:
677: checksort()
678: {
679: char *f;
680: char *s[2];
681: register int i, j, r;
682:
683: f = setfil(0);
684: if (f == 0)
685: is = stdin;
686: else if ((is = fopen(f, "r")) == NULL)
687: cant(f);
688: (void) setbuf(is, bufin);
689:
690: i = 0; j = 1;
691: s[0] = (char *) lspace;
692: s[1] = s[0] + maxrec;
693: if ( rline(is, s[0]) ) {
694: if (ferror(is)) {
695: rderror(f);
696: }
697: (void) fclose(is);
698: exit(0);
699: }
700: while ( !rline(is, s[j]) ) {
701: r = (*compare)(s[i], s[j]);
702: if (r < 0)
703: disorder("disorder: ", s[j]);
704: if (r == 0 && uflg)
705: disorder("non-unique: ", s[j]);
706: r = i; i = j; j = r;
707: }
708: if (ferror(is))
709: rderror(f);
710: (void) fclose(is);
711: exit(0);
712: }
713:
714:
715: disorder(s,t)
716: char *s, *t;
717: {
718: register char *u;
719: for(u=t; *u!='\n';u++) ;
720: *u = 0;
721: diag(s,t);
722: term();
723: }
724:
725: newfile()
726: {
727: register char *f;
728:
729: f = setfil(nfiles);
730: if((os=fopen(f, "w")) == NULL) {
731: diag("can't create ",f);
732: term();
733: }
734: nfiles++;
735: }
736:
737: char *
738: setfil(i)
739: {
740:
741: if(i < eargc)
742: if(eargv[i][0] == '-' && eargv[i][1] == '\0')
743: return(0);
744: else
745: return(eargv[i]);
746: i -= eargc;
747: filep[0] = i/26 + 'a';
748: filep[1] = i%26 + 'a';
749: return(file);
750: }
751:
752: oldfile()
753: {
754:
755: if(outfil) {
756: if((os=fopen(outfil, "w")) == NULL) {
757: diag("can't create ",outfil);
758: term();
759: }
760: } else
761: os = stdout;
762: }
763:
764: safeoutfil()
765: {
766: register int i;
767: struct stat ostat,istat;
768:
769: if(!mflg||outfil==0)
770: return;
771: if(stat(outfil,&ostat)==-1)
772: return;
773: if ((i = eargc - N) < 0) i = 0; /*-N is suff., not nec. */
774: for (; i < eargc; i++) {
775: if(stat(eargv[i],&istat)==-1)
776: continue;
777: if(ostat.st_dev==istat.st_dev&&
778: ostat.st_ino==istat.st_ino)
779: unsafeout++;
780: }
781: }
782:
783: cant(f)
784: char *f;
785: {
786:
787: diag("can't open ",f);
788: term();
789: }
790:
791: diag(s,t)
792: char *s, *t;
793: {
794: (void) fputs("sort: ",stderr);
795: (void) fputs(s,stderr);
796: (void) fputs(t,stderr);
797: (void) fputs("\n",stderr);
798: }
799:
800: term()
801: {
802: register i;
803:
804: (void) signal(SIGINT, SIG_IGN);
805: (void) signal(SIGHUP, SIG_IGN);
806: (void) signal(SIGTERM, SIG_IGN);
807: if(nfiles == eargc)
808: nfiles++;
809: for(i=eargc; i<=nfiles; i++) { /*<= in case of interrupt*/
810: (void) unlink(setfil(i)); /*with nfiles not updated*/
811: }
812: exit(error);
813: }
814:
815: cmp(i, j)
816: char *i, *j;
817: {
818: register char *pa, *pb;
819: char *skip();
820: char *code, *ignore;
821: int a, b;
822: int k;
823: char *la, *lb;
824: register int sa;
825: int sb;
826: char *ipa, *ipb, *jpa, *jpb;
827: struct field *fp;
828: double za, zb;
829:
830: for(k = nfields>0; k<=nfields; k++) {
831: fp = &fields[k];
832: pa = i;
833: pb = j;
834: if(k) {
835: la = skip(pa, fp, 1);
836: pa = skip(pa, fp, 0);
837: lb = skip(pb, fp, 1);
838: pb = skip(pb, fp, 0);
839: } else {
840: la = eol(pa);
841: lb = eol(pb);
842: }
843: switch(fp->fcmp) {
844: case NUM:
845: sa = sb = fp->rflg;
846: while(blank(*pa))
847: pa++;
848: while(blank(*pb))
849: pb++;
850: if(*pa == '-') {
851: pa++;
852: sa = -sa;
853: }
854: if(*pb == '-') {
855: pb++;
856: sb = -sb;
857: }
858: for(ipa = pa; ipa<la&&isdigit(*ipa); ipa++) ;
859: for(ipb = pb; ipb<lb&&isdigit(*ipb); ipb++) ;
860: jpa = ipa;
861: jpb = ipb;
862: a = 0;
863: if(sa==sb)
864: while(ipa > pa && ipb > pb)
865: if(b = *--ipb - *--ipa)
866: a = b;
867: while(ipa > pa)
868: if(*--ipa != '0')
869: return(-sa);
870: while(ipb > pb)
871: if(*--ipb != '0')
872: return(sb);
873: if(a) return(a*sa);
874: if(*(pa=jpa) == '.')
875: pa++;
876: if(*(pb=jpb) == '.')
877: pb++;
878: if(sa==sb)
879: while(pa<la && isdigit(*pa)
880: && pb<lb && isdigit(*pb))
881: if(a = *pb++ - *pa++)
882: return(a*sa);
883: while(pa<la && isdigit(*pa))
884: if(*pa++ != '0')
885: return(-sa);
886: while(pb<lb && isdigit(*pb))
887: if(*pb++ != '0')
888: return(sb);
889: continue;
890: case MON:
891: sa = fp->rflg*(month(pb)-month(pa));
892: if(sa)
893: return(sa);
894: else
895: continue;
896: case FLOAT:
897: zb = atof(pb);
898: za = atof(pa);
899: return (zb>za?fp->rflg:zb==za?0:-fp->rflg);
900: }
901: code = fp->code;
902: ignore = fp->ignore;
903: loop:
904: while(ignore[*pa])
905: pa++;
906: while(ignore[*pb])
907: pb++;
908: if(pa>=la || *pa=='\n')
909: if(pb<lb && *pb!='\n')
910: return(fp->rflg);
911: else continue;
912: if(pb>=lb || *pb=='\n')
913: return(-fp->rflg);
914: if((sa = code[*pb++]-code[*pa++]) == 0)
915: goto loop;
916: return(sa*fp->rflg);
917: }
918: if(uflg)
919: return(0);
920: return(cmpa(i, j));
921: }
922:
923: cmpa(pa, pb)
924: register char *pa, *pb;
925: {
926: while(*pa == *pb++)
927: if(*pa++ == '\n')
928: return(0);
929: return(
930: *pa == '\n' ? fields[0].rflg:
931: *--pb == '\n' ?-fields[0].rflg:
932: *pb > *pa ? fields[0].rflg:
933: -fields[0].rflg
934: );
935: }
936:
937: char *
938: skip(p, fp, j)
939: struct field *fp;
940: register char *p;
941: {
942: register i;
943: register char tbc;
944:
945: if( (i=fp->m[j]) < 0)
946: return(eol(p));
947: if (tbc = tabchar)
948: while (--i >= 0) {
949: while(*p != tbc)
950: if(*p != '\n')
951: p++;
952: else goto ret;
953: p++;
954: }
955: else while (--i >= 0) {
956: while(blank(*p))
957: p++;
958: while(!blank(*p))
959: if(*p != '\n')
960: p++;
961: else goto ret;
962: }
963: if(fp->bflg[j])
964: while(blank(*p))
965: p++;
966: i = fp->n[j];
967: if(i==0 && j!=0 && fp->m[j]>0 && p[-1]==tbc)
968: p--;
969: while((i-- > 0) && (*p != '\n'))
970: p++;
971: ret:
972: return(p);
973: }
974:
975: char *
976: eol(p)
977: register char *p;
978: {
979: while(*p != '\n') p++;
980: return(p);
981: }
982:
983: copyproto()
984: {
985: register i;
986: register int *p, *q;
987:
988: p = (int *)&proto;
989: q = (int *)&fields[nfields];
990: for(i=0; i<sizeof(proto)/sizeof(*p); i++)
991: *q++ = *p++;
992: }
993:
994: initree()
995: {
996: register struct btree **tpp, *tp;
997: register int i;
998:
999: for (tp = &(tree[0]), tpp = &(treep[0]), i = TREEZ; --i >= 0;)
1000: *tpp++ = tp++;
1001: }
1002:
1003: cmpsave(n)
1004: register int n;
1005: {
1006: register int award;
1007:
1008: if (n < 2) return (0);
1009: for (n++, award = 0; (n >>= 1) > 0; award++);
1010: return (award);
1011: }
1012:
1013:
1014: field(s,k)
1015: char *s;
1016: {
1017: register struct field *p;
1018: register d;
1019: p = &fields[nfields];
1020: d = 0;
1021: for(; *s!=0; s++) {
1022: switch(*s) {
1023: case '\0':
1024: return;
1025:
1026: case 'b':
1027: p->bflg[k]++;
1028: break;
1029:
1030: case 'd':
1031: p->ignore = dict+128;
1032: break;
1033:
1034: case 'f':
1035: p->code = fold+128;
1036: break;
1037: case 'i':
1038: p->ignore = nonprint+128;
1039: break;
1040:
1041: case 'c':
1042: cflg = 1;
1043: continue;
1044:
1045: case 'm':
1046: mflg = 1;
1047: continue;
1048:
1049: case 'M':
1050: p->fcmp = MON;
1051: break;
1052:
1053: case 'n':
1054: p->fcmp = NUM;
1055: break;
1056: case 'g':
1057: p->fcmp = FLOAT;
1058: break;
1059: case 't':
1060: tabchar = *++s;
1061: if(tabchar == 0) s--;
1062: continue;
1063:
1064: case 'r':
1065: p->rflg = -1;
1066: continue;
1067: case 'u':
1068: uflg = 1;
1069: continue;
1070:
1071: case 'y':
1072: if ( *++s ) tryfor = number(&s);
1073: else {
1074: --s;
1075: tryfor = MAXMEM;
1076: }
1077: continue;
1078:
1079: case 'z':
1080: if ( *++s )
1081: maxrec = number(&s);
1082: else --s;
1083: continue;
1084:
1085: case '.':
1086: if(p->m[k] == -1) /* -m.n with m missing */
1087: p->m[k] = 0;
1088: d = &fields[0].n[0]-&fields[0].m[0];
1089: if (*++s == '\0') {
1090: --s;
1091: p->m[k+d] = 0;
1092: continue;
1093: }
1094:
1095: default:
1096: p->m[k+d] = number(&s);
1097: }
1098: compare = cmp;
1099: }
1100: }
1101:
1102: number(ppa)
1103: char **ppa;
1104: {
1105: int n;
1106: register char *pa;
1107: pa = *ppa;
1108: n = 0;
1109: while(isdigit(*pa)) {
1110: n = n*10 + *pa - '0';
1111: *ppa = pa++;
1112: }
1113: return(n);
1114: }
1115:
1116: #define qsexc(p,q) t= *p;*p= *q;*q=t
1117: #define qstexc(p,q,r) t= *p;*p= *r;*r= *q;*q=t
1118:
1119: qsort(a,l)
1120: char **a, **l;
1121: {
1122: register char **i, **j;
1123: register char **lp, **hp;
1124: char **k;
1125: int c, d;
1126: char *t;
1127: unsigned n;
1128:
1129:
1130: start:
1131: if((n=l-a) <= 1)
1132: return;
1133:
1134: i = a;
1135: j = l - 1;
1136: if (n == 2) {
1137: if (c = (*compare)(*i, *j)) {
1138: if (c > 0) {
1139: qsexc(i, j);
1140: }
1141: } else if (uflg) **i = '\0';
1142: return;
1143: }
1144: n /= 2;
1145: lp = hp = a + n;
1146: c = (*compare)(*i, *lp);
1147: d = (*compare)(*lp, *j);
1148: if (c == 0) {
1149: if (d == 0) { /* x x x */
1150: --lp; qsexc(i, lp);
1151: hp++; qsexc(hp, j);
1152: } else if (d > 0) { /* x x w */
1153: qsexc(i, j);
1154: i++;
1155: hp++; qsexc(hp, j);
1156: } else { /* x x y */
1157: --j;
1158: --lp; qsexc(i, lp);
1159: }
1160: } else if (d == 0) {
1161: if (c < 0) { /* w x x */
1162: i++;
1163: hp++; qsexc(hp, j);
1164: } else { /* y x x */
1165: qsexc(i, j);
1166: --j;
1167: --lp; qsexc(i, lp);
1168: }
1169: } else if (c > 0) {
1170: if (d > 0) { /* z y x */
1171: qsexc(i, j);
1172: i++; --j;
1173: } else { /* z y z' */
1174: qsexc(i, lp);
1175: i++;
1176: if (c = (*compare)(*lp, *j)) {
1177: if (c > 0) {
1178: qsexc(lp, j);
1179: }
1180: --j;
1181: } else {
1182: hp++; qsexc(hp, j);
1183: }
1184: }
1185: } else {
1186: if (d > 0) { /* y z y' */
1187: qsexc(lp, j);
1188: --j;
1189: if (c = (*compare)(*i, *lp)) {
1190: if (c > 0) {
1191: qsexc(i, lp);
1192: }
1193: i++;
1194: } else {
1195: --lp;
1196: qsexc(i, lp);
1197: }
1198: } else { /* x y z */
1199: i++; --j;
1200: }
1201: }
1202:
1203:
1204: for(;;) {
1205: if(i < lp) {
1206: if((c = (*compare)(*i, *lp)) == 0) {
1207: --lp;
1208: qsexc(i, lp);
1209: continue;
1210: }
1211: if(c < 0) {
1212: ++i;
1213: continue;
1214: }
1215: }
1216:
1217: loop:
1218: if(j > hp) {
1219: if((c = (*compare)(*hp, *j)) == 0) {
1220: ++hp;
1221: qsexc(hp, j);
1222: goto loop;
1223: }
1224: if(c > 0) {
1225: if(i == lp) {
1226: ++hp;
1227: qstexc(i, hp, j);
1228: i = ++lp;
1229: goto loop;
1230: }
1231: qsexc(i, j);
1232: --j;
1233: ++i;
1234: continue;
1235: }
1236: --j;
1237: goto loop;
1238: }
1239:
1240:
1241: if(i == lp) {
1242: if(uflg)
1243: for(k=lp; k<hp;) **k++ = '\0';
1244: if(lp-a >= l-hp) {
1245: qsort(hp+1, l);
1246: l = lp;
1247: } else {
1248: qsort(a, lp);
1249: a = hp+1;
1250: }
1251: goto start;
1252: }
1253:
1254:
1255: --lp;
1256: qstexc(j, lp, i);
1257: j = --hp;
1258: }
1259: }
1260:
1261: char * months[] = {
1262: "jan",
1263: "feb",
1264: "mar",
1265: "apr",
1266: "may",
1267: "jun",
1268: "jul",
1269: "aug",
1270: "sep",
1271: "oct",
1272: "nov",
1273: "dec"
1274: };
1275: month(s)
1276: char *s;
1277: {
1278: register char *t, *u;
1279: register i;
1280: char *f = fold + 128;
1281:
1282: while(blank(*s))
1283: s++;
1284: for(i=0; i<sizeof(months)/sizeof(*months); i++) {
1285: for(t=s,u=months[i]; f[*t++]==f[*u++]; )
1286: if(*u==0)
1287: return(i);
1288: }
1289: return(-1);
1290: }
1291:
1292:
1293: rderror(s)
1294: char *s;
1295: {
1296: diag("read error on ", s == 0 ? "stdin" : s);
1297: term();
1298: }
1299:
1300: wterror(s)
1301: char *s;
1302: {
1303: diag("write error while ", s);
1304: term();
1305: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.