|
|
1.1 root 1: #include <stdio.h>
2: #include <ctype.h>
3: #include <signal.h>
4: #include <sys/types.h>
5: #include <sys/stat.h>
6:
7: #define N 16
8: #define C 20
9: #define NF 10
10: #define TREEZ 32 /* no less than N and best if power of 2 */
11:
12: /*
13: * Memory administration
14: *
15: * Using a lot of memory is great when sorting a lot of data.
16: * Using a megabyte to sort the output of `who' loses big.
17: * MAXMEM, MINMEM and DEFMEM define the absolute maximum,
18: * minimum and default memory requirements. Administrators
19: * can override any or all of these via defines at compile time.
20: * Users can override the amount allocated (within the limits
21: * of MAXMEM and MINMEM) on the command line.
22: */
23:
24: #ifndef MAXMEM
25: #define MAXMEM 16777216 /* 2^24 maximum */
26: #endif
27:
28: #ifndef MINMEM
29: #define MINMEM 16384 /* 16K minimum */
30: #endif
31:
32: #ifndef DEFMEM
33: #define DEFMEM 262144 /* old sort was 32768 */
34: #endif
35:
36: enum { ASC, NUM, MON, FLOAT };
37:
38: #define blank(c) ((c)==' ' || (c)=='\t')
39:
40: FILE *is, *os;
41: char *dirtry[] = {"/usr/tmp", "/tmp", NULL};
42: char **dirs;
43: char file1[100];
44: char *file = file1;
45: char *filep;
46: #define NAMEOHD 12 /* sizeof("/stm00000aa") */
47: int nfiles;
48: int *lspace;
49: unsigned alloc, tryfor;
50: char bufin[BUFSIZ], bufout[BUFSIZ]; /* Use setbuf's to avoid malloc calls.
51: ** malloc seems to get heartburn
52: ** when brk returns storage.
53: */
54: int maxrec;
55: int cmp(), cmpa();
56: int (*compare)() = cmpa;
57: char *eol();
58: int term();
59: double atof();
60: int mflg;
61: int nway;
62: int cflg;
63: int uflg;
64: char *outfil;
65: int unsafeout; /*kludge to assure -m -o works*/
66: char tabchar;
67: int eargc;
68: char **eargv;
69: struct btree {
70: char *rp;
71: int rn;
72: } tree[TREEZ], *treep[TREEZ];
73: int blkcnt[TREEZ];
74: char **blkcur[TREEZ];
75: long wasfirst = 0, notfirst = 0;
76: int bonus;
77:
78: char zero[256];
79:
80: char fold[256] = {
81: 0200,0201,0202,0203,0204,0205,0206,0207,
82: 0210,0211,0212,0213,0214,0215,0216,0217,
83: 0220,0221,0222,0223,0224,0225,0226,0227,
84: 0230,0231,0232,0233,0234,0235,0236,0237,
85: 0240,0241,0242,0243,0244,0245,0246,0247,
86: 0250,0251,0252,0253,0254,0255,0256,0257,
87: 0260,0261,0262,0263,0264,0265,0266,0267,
88: 0270,0271,0272,0273,0274,0275,0276,0277,
89: 0300,0301,0302,0303,0304,0305,0306,0307,
90: 0310,0311,0312,0313,0314,0315,0316,0317,
91: 0320,0321,0322,0323,0324,0325,0326,0327,
92: 0330,0331,0332,0333,0334,0335,0336,0337,
93: 0340,0341,0342,0343,0344,0345,0346,0347,
94: 0350,0351,0352,0353,0354,0355,0356,0357,
95: 0360,0361,0362,0363,0364,0365,0366,0367,
96: 0370,0371,0372,0373,0374,0375,0376,0377,
97: 0000,0001,0002,0003,0004,0005,0006,0007,
98: 0010,0011,0012,0013,0014,0015,0016,0017,
99: 0020,0021,0022,0023,0024,0025,0026,0027,
100: 0030,0031,0032,0033,0034,0035,0036,0037,
101: 0040,0041,0042,0043,0044,0045,0046,0047,
102: 0050,0051,0052,0053,0054,0055,0056,0057,
103: 0060,0061,0062,0063,0064,0065,0066,0067,
104: 0070,0071,0072,0073,0074,0075,0076,0077,
105: 0100,0101,0102,0103,0104,0105,0106,0107,
106: 0110,0111,0112,0113,0114,0115,0116,0117,
107: 0120,0121,0122,0123,0124,0125,0126,0127,
108: 0130,0131,0132,0133,0134,0135,0136,0137,
109: 0140,0101,0102,0103,0104,0105,0106,0107,
110: 0110,0111,0112,0113,0114,0115,0116,0117,
111: 0120,0121,0122,0123,0124,0125,0126,0127,
112: 0130,0131,0132,0173,0174,0175,0176,0177
113: };
114: char nofold[256] = {
115: 0200,0201,0202,0203,0204,0205,0206,0207,
116: 0210,0211,0212,0213,0214,0215,0216,0217,
117: 0220,0221,0222,0223,0224,0225,0226,0227,
118: 0230,0231,0232,0233,0234,0235,0236,0237,
119: 0240,0241,0242,0243,0244,0245,0246,0247,
120: 0250,0251,0252,0253,0254,0255,0256,0257,
121: 0260,0261,0262,0263,0264,0265,0266,0267,
122: 0270,0271,0272,0273,0274,0275,0276,0277,
123: 0300,0301,0302,0303,0304,0305,0306,0307,
124: 0310,0311,0312,0313,0314,0315,0316,0317,
125: 0320,0321,0322,0323,0324,0325,0326,0327,
126: 0330,0331,0332,0333,0334,0335,0336,0337,
127: 0340,0341,0342,0343,0344,0345,0346,0347,
128: 0350,0351,0352,0353,0354,0355,0356,0357,
129: 0360,0361,0362,0363,0364,0365,0366,0367,
130: 0370,0371,0372,0373,0374,0375,0376,0377,
131: 0000,0001,0002,0003,0004,0005,0006,0007,
132: 0010,0011,0012,0013,0014,0015,0016,0017,
133: 0020,0021,0022,0023,0024,0025,0026,0027,
134: 0030,0031,0032,0033,0034,0035,0036,0037,
135: 0040,0041,0042,0043,0044,0045,0046,0047,
136: 0050,0051,0052,0053,0054,0055,0056,0057,
137: 0060,0061,0062,0063,0064,0065,0066,0067,
138: 0070,0071,0072,0073,0074,0075,0076,0077,
139: 0100,0101,0102,0103,0104,0105,0106,0107,
140: 0110,0111,0112,0113,0114,0115,0116,0117,
141: 0120,0121,0122,0123,0124,0125,0126,0127,
142: 0130,0131,0132,0133,0134,0135,0136,0137,
143: 0140,0141,0142,0143,0144,0145,0146,0147,
144: 0150,0151,0152,0153,0154,0155,0156,0157,
145: 0160,0161,0162,0163,0164,0165,0166,0167,
146: 0170,0171,0172,0173,0174,0175,0176,0177
147: };
148:
149: char nonprint[256] = {
150: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
151: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
152: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
153: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
154: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
155: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
156: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
157: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
158: 1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,
159: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
160: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
161: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
162: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
163: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
164: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
165: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
166: };
167:
168: char dict[256] = {
169: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
170: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
171: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
172: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
173: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
174: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
175: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
176: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
177: 1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,
178: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
179: 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
180: 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,
181: 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
182: 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,
183: 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
184: 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1
185: };
186:
187: struct field {
188: char *code;
189: char *ignore;
190: int fcmp;
191: int rflg;
192: int bflg[2];
193: int m[2];
194: int n[2];
195: } fields[NF];
196: struct field proto = {
197: nofold+128,
198: zero+128,
199: ASC,
200: 1,
201: 0,0,
202: 0,-1,
203: 0,0
204: };
205: int nfields;
206: int error = 1;
207: char *setfil();
208:
209: main(argc, argv)
210: char **argv;
211: {
212: register a;
213: char *arg;
214: struct field *p, *q;
215: int i;
216: long incr;
217: char *sbrk();
218: char *sp;
219:
220: copyproto();
221: initree();
222: eargv = argv;
223: tryfor = DEFMEM;
224: while (--argc > 0) {
225: if(**++argv == '-') for(arg = *argv;;) {
226: switch(*++arg) {
227: case '\0':
228: if(arg[-1] == '-')
229: eargv[eargc++] = "-";
230: break;
231:
232: case 'o':
233: if(--argc > 0)
234: outfil = *++argv;
235: continue;
236:
237: case 'T':
238: if (--argc > 0) {
239: if ((strlen(*++argv) + NAMEOHD) > sizeof(file1)) {
240: diag("path name too long:", *argv);
241: exit(1);
242: }
243: else dirtry[0] = *argv;
244: }
245: continue;
246:
247: default:
248: field(++*argv,nfields>0);
249: break;
250: }
251: break;
252: } else if (**argv == '+') {
253: if(++nfields>=NF) {
254: diag("too many keys","");
255: exit(1);
256: }
257: copyproto();
258: field(++*argv,0);
259: } else
260: eargv[eargc++] = *argv;
261: }
262: q = &fields[0];
263: for(a=1; a<=nfields; a++) {
264: p = &fields[a];
265: if(p->code != proto.code) continue;
266: if(p->ignore != proto.ignore) continue;
267: if(p->fcmp != proto.fcmp) continue;
268: if(p->rflg != proto.rflg) continue;
269: if(p->bflg[0] != proto.bflg[0]) continue;
270: if(p->bflg[1] != proto.bflg[1]) continue;
271: p->code = q->code;
272: p->ignore = q->ignore;
273: p->fcmp = q->fcmp;
274: p->rflg = q->rflg;
275: p->bflg[0] = p->bflg[1] = q->bflg[0];
276: }
277: if(eargc == 0)
278: eargv[eargc++] = "-";
279: if(cflg && eargc>1) {
280: diag("can check only 1 file","");
281: exit(1);
282: }
283:
284: safeoutfil();
285:
286: sp = sbrk(0);
287: lspace = (int *) sp;
288: if (!mflg && !cflg) {
289: if (tryfor < MINMEM) tryfor = MINMEM;
290: else if (tryfor > MAXMEM) tryfor = MAXMEM;
291: for (incr=tryfor; (sp + incr) <= sp; incr >>= 1);
292: do {
293: if ((long)alloc+incr <= tryfor && brk(sp+incr) == 0) {
294: sp += incr;
295: alloc += incr;
296: }
297: } while ( ( incr >>= 1 ) >= 512L );
298: if ( brk((char *) lspace + alloc) != 0) {
299: diag("allocation error before sort", "");
300: exit(1);
301: }
302: }
303:
304: a = -1;
305: for(dirs=dirtry; *dirs; dirs++) {
306: (void) sprintf(filep=file1, "%s/stm%.5uaa", *dirs, getpid());
307: while (*filep)
308: filep++;
309: filep -= 2;
310: if ( (a=creat(file, 0600)) >=0)
311: break;
312: }
313: if(a < 0) {
314: diag("can't locate temp","");
315: exit(1);
316: }
317: for (i = 3; i <= 20; i++)
318: (void) close(i);
319: (void) unlink(file);
320: if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
321: (void) signal(SIGHUP, term);
322: if (signal(SIGINT, SIG_IGN) != SIG_IGN)
323: (void) signal(SIGINT, term);
324: (void) signal(SIGPIPE,term);
325: if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
326: (void) signal(SIGTERM,term);
327: nfiles = eargc;
328: if(!mflg && !cflg)
329: sort();
330:
331: if (maxrec == 0) maxrec = 512;
332: alloc = (N + 1) * maxrec + N * BUFSIZ;
333: for (nway = N; nway >= 2; --nway) {
334: if (brk((char *)lspace + alloc) == 0) break;
335: alloc -= maxrec + BUFSIZ;
336: }
337: if (nway < 2) {
338: diag("allocation error before merge", "");
339: term();
340: }
341:
342: if (cflg) checksort();
343:
344: wasfirst = notfirst = 0;
345: a = mflg || cflg ? 0 : eargc;
346: if ((i = nfiles - a) > nway) { /* Do leftovers early */
347: if ((i %= (nway - 1)) == 0)
348: i = nway - 1;
349: if (i != 1) {
350: newfile();
351: (void) setbuf(os, bufout);
352: merge(a,a+i);
353: a += i;
354: }
355: }
356: for(; a+nway<nfiles || unsafeout&&a<eargc; a=i) {
357: i = a+nway;
358: if(i>=nfiles)
359: i = nfiles;
360: newfile();
361: (void) setbuf(os, bufout);
362: merge(a, i);
363: }
364: if(a != nfiles) {
365: oldfile();
366: (void) setbuf(os, bufout);
367: merge(a, nfiles);
368: }
369: error = 0;
370: term();
371: }
372:
373: #define OPEN(i) do{\
374: if((f=setfil(i++))==0)iop=stdin; \
375: else if((iop=fopen(f,"r"))==NULL)cant(f); \
376: (void)setbuf(iop,bufin);}while(0)
377:
378: #define GETC(c) \
379: while((c=getc(iop))==EOF) { \
380: if(!checkclose(iop))\
381: rderror(f); \
382: if(i<eargc) \
383: OPEN(i); \
384: else break; \
385: }
386: sort()
387: {
388: register FILE *iop;
389: register int c;
390: register char *cp,**lp;
391: int i;
392: char **ep;
393: char *f=0;
394: /*
395: lp[2] points to char after last \n or to beginning.
396: lp[3] points to second previous, etc.
397: (lp kept 2 below active cell in case machines
398: address char** by high byte)
399: cp grows toward lp; buffer is full when they meet.
400: */
401:
402: cp=(char*)lspace;
403: ep=(char**)((char*)lspace+alloc);
404: lp=ep-2;
405: (--lp)[2]=cp;
406: i=0;
407: OPEN(i);
408: GETC(c);
409: for(;;) {
410: while(c!=EOF&&cp<(char*)lp) {
411: *cp++=c;
412: if(c=='\n') {
413: (--lp)[2]=cp;
414: if(cp-lp[3]>maxrec)maxrec=cp-lp[3];
415: }
416: GETC(c);
417: }
418: /* buffer full or end of input files */
419: if(c==EOF)
420: break;
421: if((char*)lp[2]==(char*)lspace) {
422: diag("whopper record won't fit","");
423: term();
424: }
425: newfile();
426: (void)setbuf(os,bufout);
427: msort(lp+3,ep);
428: if(cp[-1]!='\n') {
429: (void) memcpy((char*)lspace,lp[2],cp-lp[2]);
430: cp-=lp[2]-(char*)lspace;
431: }
432: else
433: cp=(char*)lspace;
434: if(!checkclose(os))
435: wterror("sorting");
436: lp=ep-2;
437: (--lp)[2]=(char*)lspace;
438: }
439: /* end of input files */
440: if(cp>(char*)lspace&&cp[-1]!='\n') {
441: *cp++='\n';
442: (--lp);
443: if(cp-lp[3]>maxrec)maxrec=cp-lp[3];
444: }
445: if(nfiles==eargc)oldfile();
446: else newfile();
447: (void)setbuf(os,bufout);
448: msort(lp+3,ep);
449: if(!checkclose(os))
450: wterror("sorting");
451: }
452:
453: msort(a, b)
454: char **a, **b;
455: {
456: register struct btree **tp;
457: register int i, j, n;
458: char *save;
459:
460: i = (b - a);
461: if (i < 1) return;
462: else if (i >= TREEZ) n = TREEZ; /* number of blocks of records */
463: else n = i;
464:
465: /* break into n sorted subgroups of approximately equal size */
466: tp = &(treep[0]);
467: j = 0;
468: do {
469: (*tp++)->rn = j;
470: b = a + (blkcnt[j] = i / n);
471: qsort(a, b);
472: blkcur[j] = a = b;
473: i -= blkcnt[j++];
474: } while (--n > 0);
475: n = j;
476:
477: /* make a sorted binary tree using the first record in each group */
478: for (i = 0; i < n;) {
479: (*--tp)->rp = *(--blkcur[--j]);
480: insert(tp, ++i);
481: }
482: wasfirst = notfirst = 0;
483: bonus = cmpsave(n);
484:
485:
486: j = uflg;
487: tp = &(treep[0]);
488: while (n > 0) {
489: wline((*tp)->rp);
490: if (j) save = (*tp)->rp;
491:
492: /* Get another record and insert. Bypass repeats if uflg */
493:
494: do {
495: i = (*tp)->rn;
496: if (j) while((blkcnt[i] > 1) &&
497: (**(blkcur[i]-1) == '\0')) {
498: --blkcnt[i];
499: --blkcur[i];
500: }
501: if (--blkcnt[i] > 0) {
502: (*tp)->rp = *(--blkcur[i]);
503: insert(tp, n);
504: } else {
505: if (--n <= 0) break;
506: bonus = cmpsave(n);
507: tp++;
508: }
509: } while (j && (*compare)((*tp)->rp, save) == 0);
510: }
511: }
512:
513:
514: /* Insert the element at tp[0] into its proper place in the array of size n */
515: /* Pretty much Algorith B from 6.2.1 of Knuth, Sorting and Searching */
516: /* Special case for data that appears to be in correct order */
517:
518: insert(tp, n)
519: struct btree **tp;
520: int n;
521: {
522: register struct btree **lop, **hip, **midp;
523: register int c;
524: struct btree *hold;
525:
526: midp = lop = tp;
527: hip = lop++ + (n - 1);
528: if ((wasfirst > notfirst) && (n > 2) &&
529: ((*compare)((*tp)->rp, (*lop)->rp) >= 0)) {
530: wasfirst += bonus;
531: return;
532: }
533: while ((c = hip - lop) >= 0) { /* leave midp at the one tp is in front of */
534: midp = lop + c / 2;
535: if ((c = (*compare)((*tp)->rp, (*midp)->rp)) == 0) break; /* match */
536: if (c < 0) lop = ++midp; /* c < 0 => tp > midp */
537: else hip = midp - 1; /* c > 0 => tp < midp */
538: }
539: c = midp - tp;
540: if (--c > 0) { /* number of moves to get tp just before midp */
541: hip = tp;
542: lop = hip++;
543: hold = *lop;
544: do *lop++ = *hip++; while (--c > 0);
545: *lop = hold;
546: notfirst++;
547: } else wasfirst += bonus;
548: }
549:
550:
551: merge(a, b)
552: {
553: FILE *tfile[N];
554: char *buffer = (char *) lspace;
555: register int nf; /* number of merge files */
556: register struct btree **tp;
557: register int i, j;
558: char *t, *f;
559: char *save, *iobuf;
560:
561: save = (char *) lspace + (nway * maxrec);
562: iobuf = save + maxrec;
563: tp = &(treep[0]);
564: for (nf=0, i=a; i < b; i++) {
565: f = setfil(i);
566: if (f == 0)
567: tfile[nf] = stdin;
568: else if ((tfile[nf] = fopen(f, "r")) == NULL)
569: cant(f);
570: (*tp)->rp = buffer + (nf * maxrec);
571: (*tp)->rn = nf;
572: (void) setbuf(tfile[nf],iobuf);
573: iobuf += BUFSIZ;
574: if (rline(tfile[nf], (*tp)->rp)==0) {
575: nf++;
576: tp++;
577: } else if(!checkclose(tfile[nf]))
578: rderror(f);
579: }
580:
581:
582: /* make a sorted btree from the first record of each file */
583: for (--tp, i = 1; i++ < nf;) insert(--tp, i);
584:
585: bonus = cmpsave(nf);
586: tp = &(treep[0]);
587: j = uflg;
588: while (nf > 0) {
589: wline((*tp)->rp);
590: if (j) cline(save, (*tp)->rp);
591:
592: /* Get another record and insert. Bypass repeats if uflg */
593:
594: do {
595: i = (*tp)->rn;
596: if (rline(tfile[i], (*tp)->rp)) {
597: if (!checkclose(tfile[i]))
598: rderror(setfil(i+a));
599: if (--nf <= 0) break;
600: ++tp;
601: bonus = cmpsave(nf);
602: } else insert(tp, nf);
603: } while (j && (*compare)((*tp)->rp, save) == 0 );
604: }
605:
606:
607: for (i=a; i < b; i++) {
608: if (i >= eargc)
609: (void) unlink(setfil(i));
610: }
611: if (!checkclose(os))
612: wterror("merging");
613: }
614:
615: wline(s)
616: register char *s;
617: {
618: register FILE *iop;
619:
620: iop = os;
621: do
622: putc(*s, iop);
623: while (*s++ != '\n');
624: }
625:
626: cline(tp, fp)
627: register char *tp, *fp;
628: {
629: while ((*tp++ = *fp++) != '\n');
630: }
631:
632: rline(iop, s)
633: FILE *iop;
634: register char *s;
635: {
636: register char *ce;
637: register int c;
638: register FILE *riop;
639:
640: riop = iop;
641: ce = s + maxrec;
642: do {
643: c = getc(riop);
644: if (c == EOF)
645: return(1);
646: if (s >= ce)
647: --s;
648: *s++ = c;
649: } while (c != '\n');
650: return(0);
651: }
652:
653:
654: checksort()
655: {
656: char *f;
657: char *s[2];
658: register int i, j, r;
659:
660: f = setfil(0);
661: if (f == 0)
662: is = stdin;
663: else if ((is = fopen(f, "r")) == NULL)
664: cant(f);
665: (void) setbuf(is, bufin);
666:
667: i = 0; j = 1;
668: s[0] = (char *) lspace;
669: s[1] = s[0] + maxrec;
670: if ( rline(is, s[0]) ) {
671: if (!checkclose(is))
672: rderror(f);
673: exit(0);
674: }
675: while ( !rline(is, s[j]) ) {
676: r = (*compare)(s[i], s[j]);
677: if (r < 0)
678: disorder("disorder: ", s[j]);
679: if (r == 0 && uflg)
680: disorder("non-unique: ", s[j]);
681: r = i; i = j; j = r;
682: }
683: if (!checkclose(is))
684: rderror(f);
685: exit(0);
686: }
687:
688:
689: disorder(s,t)
690: char *s, *t;
691: {
692: register char *u;
693: for(u=t; *u!='\n';u++) ;
694: *u = 0;
695: diag(s,t);
696: term();
697: }
698:
699: newfile()
700: {
701: register char *f;
702:
703: f = setfil(nfiles);
704: if((os=fopen(f, "w")) == NULL) {
705: diag("can't create ",f);
706: term();
707: }
708: nfiles++;
709: }
710:
711: char *
712: setfil(i)
713: {
714:
715: if(i < eargc)
716: if(eargv[i][0] == '-' && eargv[i][1] == '\0')
717: return(0);
718: else
719: return(eargv[i]);
720: i -= eargc;
721: filep[0] = i/26 + 'a';
722: filep[1] = i%26 + 'a';
723: return(file);
724: }
725:
726: oldfile()
727: {
728:
729: if(outfil) {
730: if((os=fopen(outfil, "w")) == NULL) {
731: diag("can't create ",outfil);
732: term();
733: }
734: } else
735: os = stdout;
736: }
737:
738: safeoutfil()
739: {
740: register int i;
741: struct stat ostat,istat;
742:
743: if(!mflg||outfil==0)
744: return;
745: if(stat(outfil,&ostat)==-1)
746: return;
747: if ((i = eargc - N) < 0) i = 0; /*-N is suff., not nec. */
748: for (; i < eargc; i++) {
749: if(stat(eargv[i],&istat)==-1)
750: continue;
751: if(ostat.st_dev==istat.st_dev&&
752: ostat.st_ino==istat.st_ino)
753: unsafeout++;
754: }
755: }
756:
757: cant(f)
758: char *f;
759: {
760:
761: diag("can't open ",f);
762: term();
763: }
764:
765: diag(s,t)
766: char *s, *t;
767: {
768: (void) fputs("sort: ",stderr);
769: (void) fputs(s,stderr);
770: (void) fputs(t,stderr);
771: (void) fputs("\n",stderr);
772: }
773:
774: term()
775: {
776: register i;
777:
778: (void) signal(SIGINT, SIG_IGN);
779: (void) signal(SIGHUP, SIG_IGN);
780: (void) signal(SIGTERM, SIG_IGN);
781: if(nfiles == eargc)
782: nfiles++;
783: for(i=eargc; i<=nfiles; i++) { /*<= in case of interrupt*/
784: (void) unlink(setfil(i)); /*with nfiles not updated*/
785: }
786: exit(error);
787: }
788:
789: cmp(i, j)
790: char *i, *j;
791: {
792: register char *pa, *pb;
793: char *skip();
794: char *code, *ignore;
795: int a, b;
796: int k;
797: char *la, *lb;
798: register int sa;
799: int sb;
800: char *ipa, *ipb, *jpa, *jpb;
801: struct field *fp;
802: double za, zb;
803:
804: for(k = nfields>0; k<=nfields; k++) {
805: fp = &fields[k];
806: pa = i;
807: pb = j;
808: if(k) {
809: la = skip(pa, fp, 1);
810: pa = skip(pa, fp, 0);
811: lb = skip(pb, fp, 1);
812: pb = skip(pb, fp, 0);
813: } else {
814: la = eol(pa);
815: lb = eol(pb);
816: }
817: switch(fp->fcmp) {
818: case NUM:
819: sa = sb = fp->rflg;
820: while(blank(*pa))
821: pa++;
822: while(blank(*pb))
823: pb++;
824: if(*pa == '-') {
825: pa++;
826: sa = -sa;
827: }
828: if(*pb == '-') {
829: pb++;
830: sb = -sb;
831: }
832: for(ipa = pa; ipa<la&&isdigit(*ipa); ipa++) ;
833: for(ipb = pb; ipb<lb&&isdigit(*ipb); ipb++) ;
834: jpa = ipa;
835: jpb = ipb;
836: a = 0;
837: if(sa==sb)
838: while(ipa > pa && ipb > pb)
839: if(b = *--ipb - *--ipa)
840: a = b;
841: while(ipa > pa)
842: if(*--ipa != '0')
843: return(-sa);
844: while(ipb > pb)
845: if(*--ipb != '0')
846: return(sb);
847: if(a) return(a*sa);
848: if(*(pa=jpa) == '.')
849: pa++;
850: if(*(pb=jpb) == '.')
851: pb++;
852: if(sa==sb)
853: while(pa<la && isdigit(*pa)
854: && pb<lb && isdigit(*pb))
855: if(a = *pb++ - *pa++)
856: return(a*sa);
857: while(pa<la && isdigit(*pa))
858: if(*pa++ != '0')
859: return(-sa);
860: while(pb<lb && isdigit(*pb))
861: if(*pb++ != '0')
862: return(sb);
863: continue;
864: case MON:
865: sa = fp->rflg*(month(pb)-month(pa));
866: if(sa)
867: return(sa);
868: else
869: continue;
870: case FLOAT:
871: zb = atof(pb);
872: za = atof(pa);
873: if(zb > za)
874: return fp->rflg;
875: else if(zb < za)
876: return -fp->rflg;
877: else continue;
878: }
879: code = fp->code;
880: ignore = fp->ignore;
881: loop:
882: while(ignore[*pa])
883: pa++;
884: while(ignore[*pb])
885: pb++;
886: if(pa>=la || *pa=='\n')
887: if(pb<lb && *pb!='\n')
888: return(fp->rflg);
889: else continue;
890: if(pb>=lb || *pb=='\n')
891: return(-fp->rflg);
892: if((sa = code[*pb++]-code[*pa++]) == 0)
893: goto loop;
894: return(sa*fp->rflg);
895: }
896: if(uflg)
897: return(0);
898: return(cmpa(i, j));
899: }
900:
901: cmpa(pa, pb)
902: register char *pa, *pb;
903: {
904: while(*pa == *pb++)
905: if(*pa++ == '\n')
906: return(0);
907: return(
908: *pa == '\n' ? fields[0].rflg:
909: *--pb == '\n' ?-fields[0].rflg:
910: *pb > *pa ? fields[0].rflg:
911: -fields[0].rflg
912: );
913: }
914:
915: char *
916: skip(p, fp, j)
917: struct field *fp;
918: register char *p;
919: {
920: register i;
921: register char tbc;
922:
923: if( (i=fp->m[j]) < 0)
924: return(eol(p));
925: if (tbc = tabchar)
926: while (--i >= 0) {
927: while(*p != tbc)
928: if(*p != '\n')
929: p++;
930: else goto ret;
931: p++;
932: }
933: else while (--i >= 0) {
934: while(blank(*p))
935: p++;
936: while(!blank(*p))
937: if(*p != '\n')
938: p++;
939: else goto ret;
940: }
941: if(fp->bflg[j])
942: while(blank(*p))
943: p++;
944: i = fp->n[j];
945: if(i==0 && j!=0 && fp->m[j]>0 && p[-1]==tbc)
946: p--;
947: while((i-- > 0) && (*p != '\n'))
948: p++;
949: ret:
950: return(p);
951: }
952:
953: char *
954: eol(p)
955: register char *p;
956: {
957: while(*p != '\n') p++;
958: return(p);
959: }
960:
961: copyproto()
962: {
963: register i;
964: register int *p, *q;
965:
966: p = (int *)&proto;
967: q = (int *)&fields[nfields];
968: for(i=0; i<sizeof(proto)/sizeof(*p); i++)
969: *q++ = *p++;
970: }
971:
972: initree()
973: {
974: register struct btree **tpp, *tp;
975: register int i;
976:
977: for (tp = &(tree[0]), tpp = &(treep[0]), i = TREEZ; --i >= 0;)
978: *tpp++ = tp++;
979: }
980:
981: cmpsave(n)
982: register int n;
983: {
984: register int award;
985:
986: if (n < 2) return (0);
987: for (n++, award = 0; (n >>= 1) > 0; award++);
988: return (award);
989: }
990:
991:
992: field(s,k)
993: char *s;
994: {
995: register struct field *p;
996: register d;
997: p = &fields[nfields];
998: d = 0;
999: for(; *s!=0; s++) {
1000: switch(*s) {
1001: case '\0':
1002: return;
1003:
1004: case 'b':
1005: p->bflg[k]++;
1006: break;
1007:
1008: case 'd':
1009: p->ignore = dict+128;
1010: break;
1011:
1012: case 'f':
1013: p->code = fold+128;
1014: break;
1015: case 'i':
1016: p->ignore = nonprint+128;
1017: break;
1018:
1019: case 'c':
1020: cflg = 1;
1021: continue;
1022:
1023: case 'm':
1024: mflg = 1;
1025: continue;
1026:
1027: case 'M':
1028: p->fcmp = MON;
1029: break;
1030:
1031: case 'n':
1032: p->fcmp = NUM;
1033: break;
1034: case 'g':
1035: p->fcmp = FLOAT;
1036: break;
1037: case 't':
1038: tabchar = *++s;
1039: if(tabchar == 0) s--;
1040: continue;
1041:
1042: case 'r':
1043: p->rflg = -1;
1044: continue;
1045: case 'u':
1046: uflg = 1;
1047: continue;
1048:
1049: case 'y':
1050: if ( *++s ) tryfor = number(&s);
1051: else {
1052: --s;
1053: tryfor = MAXMEM;
1054: }
1055: continue;
1056:
1057: case 'z':
1058: if ( *++s )
1059: maxrec = number(&s);
1060: else --s;
1061: continue;
1062:
1063: case '.':
1064: if(p->m[k] == -1) /* -m.n with m missing */
1065: p->m[k] = 0;
1066: d = &fields[0].n[0]-&fields[0].m[0];
1067: if (*++s == '\0') {
1068: --s;
1069: p->m[k+d] = 0;
1070: continue;
1071: }
1072:
1073: default:
1074: p->m[k+d] = number(&s);
1075: }
1076: compare = cmp;
1077: }
1078: }
1079:
1080: number(ppa)
1081: char **ppa;
1082: {
1083: int n;
1084: register char *pa;
1085: pa = *ppa;
1086: n = 0;
1087: while(isdigit(*pa)) {
1088: n = n*10 + *pa - '0';
1089: *ppa = pa++;
1090: }
1091: return(n);
1092: }
1093:
1094: #define qsexc(p,q) t= *p;*p= *q;*q=t
1095: #define qstexc(p,q,r) t= *p;*p= *r;*r= *q;*q=t
1096:
1097: qsort(a,l)
1098: char **a, **l;
1099: {
1100: register char **i, **j;
1101: register char **lp, **hp;
1102: char **k;
1103: int c, d;
1104: char *t;
1105: unsigned n;
1106:
1107:
1108: start:
1109: if((n=l-a) <= 1)
1110: return;
1111:
1112: i = a;
1113: j = l - 1;
1114: if (n == 2) {
1115: if (c = (*compare)(*i, *j)) {
1116: if (c > 0) {
1117: qsexc(i, j);
1118: }
1119: } else if (uflg) **i = '\0';
1120: return;
1121: }
1122: n /= 2;
1123: lp = hp = a + n;
1124: c = (*compare)(*i, *lp);
1125: d = (*compare)(*lp, *j);
1126: if (c == 0) {
1127: if (d == 0) { /* x x x */
1128: --lp; qsexc(i, lp);
1129: hp++; qsexc(hp, j);
1130: } else if (d > 0) { /* x x w */
1131: qsexc(i, j);
1132: i++;
1133: hp++; qsexc(hp, j);
1134: } else { /* x x y */
1135: --j;
1136: --lp; qsexc(i, lp);
1137: }
1138: } else if (d == 0) {
1139: if (c < 0) { /* w x x */
1140: i++;
1141: hp++; qsexc(hp, j);
1142: } else { /* y x x */
1143: qsexc(i, j);
1144: --j;
1145: --lp; qsexc(i, lp);
1146: }
1147: } else if (c > 0) {
1148: if (d > 0) { /* z y x */
1149: qsexc(i, j);
1150: i++; --j;
1151: } else { /* z y z' */
1152: qsexc(i, lp);
1153: i++;
1154: if (c = (*compare)(*lp, *j)) {
1155: if (c > 0) {
1156: qsexc(lp, j);
1157: }
1158: --j;
1159: } else {
1160: hp++; qsexc(hp, j);
1161: }
1162: }
1163: } else {
1164: if (d > 0) { /* y z y' */
1165: qsexc(lp, j);
1166: --j;
1167: if (c = (*compare)(*i, *lp)) {
1168: if (c > 0) {
1169: qsexc(i, lp);
1170: }
1171: i++;
1172: } else {
1173: --lp;
1174: qsexc(i, lp);
1175: }
1176: } else { /* x y z */
1177: i++; --j;
1178: }
1179: }
1180:
1181:
1182: for(;;) {
1183: if(i < lp) {
1184: if((c = (*compare)(*i, *lp)) == 0) {
1185: --lp;
1186: qsexc(i, lp);
1187: continue;
1188: }
1189: if(c < 0) {
1190: ++i;
1191: continue;
1192: }
1193: }
1194:
1195: loop:
1196: if(j > hp) {
1197: if((c = (*compare)(*hp, *j)) == 0) {
1198: ++hp;
1199: qsexc(hp, j);
1200: goto loop;
1201: }
1202: if(c > 0) {
1203: if(i == lp) {
1204: ++hp;
1205: qstexc(i, hp, j);
1206: i = ++lp;
1207: goto loop;
1208: }
1209: qsexc(i, j);
1210: --j;
1211: ++i;
1212: continue;
1213: }
1214: --j;
1215: goto loop;
1216: }
1217:
1218:
1219: if(i == lp) {
1220: if(uflg)
1221: for(k=lp; k<hp;) **k++ = '\0';
1222: if(lp-a >= l-hp) {
1223: qsort(hp+1, l);
1224: l = lp;
1225: } else {
1226: qsort(a, lp);
1227: a = hp+1;
1228: }
1229: goto start;
1230: }
1231:
1232:
1233: --lp;
1234: qstexc(j, lp, i);
1235: j = --hp;
1236: }
1237: }
1238:
1239: char * months[] = {
1240: "jan",
1241: "feb",
1242: "mar",
1243: "apr",
1244: "may",
1245: "jun",
1246: "jul",
1247: "aug",
1248: "sep",
1249: "oct",
1250: "nov",
1251: "dec"
1252: };
1253: month(s)
1254: char *s;
1255: {
1256: register char *t, *u;
1257: register i;
1258: char *f = fold + 128;
1259:
1260: while(blank(*s))
1261: s++;
1262: for(i=0; i<sizeof(months)/sizeof(*months); i++) {
1263: for(t=s,u=months[i]; f[*t++]==f[*u++]; )
1264: if(*u==0)
1265: return(i);
1266: }
1267: return(-1);
1268: }
1269:
1270:
1271: rderror(s)
1272: char *s;
1273: {
1274: diag("read error on ", s == 0 ? "stdin" : s);
1275: term();
1276: }
1277:
1278: wterror(s)
1279: char *s;
1280: {
1281: diag("write error while ", s);
1282: term();
1283: }
1284:
1285: checkclose(f)
1286: FILE *f;
1287: {
1288: return !ferror(f) && fclose(f)==0;
1289: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.