|
|
1.1 root 1: number.c:
2:
3: /* Copyright 1990, AT&T Bell Labs */
4:
5: /* Canonicalize the number string pointed to by dp, of length
6: len. Put the result in kp.
7:
8: A field of length zero, or all blank, is regarded as 0.
9: Over/underflow is rendered as huge or zero and properly signed.
10: It happens 1e+-1022.
11:
12: Canonicalized strings may be compared as strings of unsigned
13: chars. For good measure, a canonical string has no zero bytes.
14:
15: Syntax: optionally signed floating point, with optional
16: leading spaces. A syntax deviation ends the number.
17:
18: Form of output: packed in 4-bit nibbles. First
19: 3 nibbles count the number N of significant digits
20: before the decimal point. The quantity actually stored
21: is 2048+sign(x)*(N+1024). Further nibbles contain
22: 1 decimal digit d each, stored as d+2 if x is positive
23: and as 10-d if x is negative. Leading and trailing
24: zeros are stripped, and a trailing "digit" d = -1
25: is appended. (The trailing digit handled like all others,
26: so encodes as 1 or 0xb according to the sign of x.)
27: An odd number of nibbles is padded with zero.
28:
29: Buglet: overflow is reported if output is exactly filled.
30:
31: */
32: #include "fsort.h"
33:
34: #define encode(x) (neg? 10-(x): (x)+2)
35: #define putdig(x) (nib? (*dig=encode(x)<<4, nib=0): \
36: (*dig++|=encode(x), nib=1))
37:
38: static int gflag;
39:
40: int
41: gcode(uchar *dp, uchar*kp, int len, struct field *f)
42: <60>{
43: int ret;
44: <60>gflag++;
45: <60>ret = ncode(dp, kp, len, f);
46: <60>gflag--;
47: return <60>ret;
48: }
49:
50: int
51: ncode(uchar *dp, uchar*kp, int len, struct field *f)
52: <374264>{
53: uchar *dig = <374264>kp + 1; /* byte for next digit */
54: int nib = <374264>0; /* high nibble 1, low nibble 0 */
55: uchar *cp = <374264>dp;
56: uchar *ep = <374264>cp + len; /* end pointer */
57: int zeros = <374264>0; /* count zeros seen but not installed */
58: int sigdig = <374264>1024;
59: int neg = <374264>f->rflag; /* 0 for +, 1 for - */
60: int decimal = <374264>0;
61: int n, inv;
62:
63: <374264>kp[1] = 0;
64: for( <374264>; <418322>cp<ep ; <44058>cp++) /* eat blanks */
65: if(<418322>*cp!=' ' && <374272>*cp!='\t')
66: <374264>break;
67: if(<374264>cp < ep) /* eat sign */
68: switch(<374264>*cp) {
69: case '-':
70: <218>neg ^= 1;
71: case '+':
72: <225>cp++;
73: }
74: for( <374264>; <707527>cp<ep; <333263>cp++) /* eat leading zeros */
75: if(<707511>*cp != '0')
76: <374248>break;
77: if(<374264>*cp=='.' && <331112>cp<ep) { /* decimal point among lead zeros */
78: <331112>decimal++;
79: for(<331112>cp++; <367735>cp<ep; <36623>cp++) {
80: if(<367735>*cp != '0')
81: <331112>break;
82: <36623>sigdig--;
83: }
84: }
85: if(<374264>*cp>'9' || <374255>*cp<'0' || <373799>cp>=ep) { /* no sig digit*/
86: <465>sigdig = 0;
87: <465>neg = 0;
88: <465>goto retzero;
89: }
90: for( <373799>; <2405784>cp<ep; <2031985>cp++) {
91: switch(<2071744>*cp) {
92: default:
93: <39726>goto out;
94: case '.':
95: if(<16>decimal)
96: <2>goto out;
97: <14>decimal++;
98: <14>continue;
99: case '0':
100: <132382>zeros++;
101: if(<132382>!decimal)
102: <3976>sigdig++;
103: <132382>continue;
104: case '1': case '2': case '3': case '4': case '5':
105: case '6': case '7': case '8': case '9':
106: for( <1899589>; <2027985>zeros>0; <128396>zeros--)
107: <128396>putdig<65793>(0);
108: <1899589>n = *cp - '0';
109: <1899589>putdig<929380>(n);
110: if(<1899589>!decimal)
111: <77803>sigdig++;
112: <1899589>continue;
113: case 'e':
114: case 'E':
115: if(<31>!gflag)
116: <6>goto out;
117: <25>inv = 1;
118: if(<25>cp < ep) switch(<25>*++cp) {
119: case '-':
120: <13>inv = -1;
121: case '+':
122: <15>cp++;
123: }
124: if(<25>*cp>'9' || <25>*cp<'0' || <21>cp>=ep)
125: <4>goto out;
126: for(<21>n=0; <62>cp<ep; <41>cp++) {
127: int c = <44>*cp;
128: if(<44>c<'0' || <44>c>'9')
129: <3>break;
130: if(<41>(n = 10*n+c-'0') >= 0)
131: <41>continue;
132: <0>sigdig = 2047*inv;
133: <0>goto out;
134: }
135: <21>sigdig += n*inv;
136: <21>goto out;
137: }
138: }
139: out:
140: if(<373799>sigdig<0 || <373799>sigdig>=2047) {
141: <0>sigdig = sigdig<0? <0>0: <0>2047;
142: <0>warn("numeric field overflow", (char*)dp, len);
143: <0>dig = kp + 1;
144: <0>*dig = 0;
145: <0>nib = 0;
146: }
147: retzero:
148: if(<374264>neg)
149: <219>sigdig = 2048 - sigdig;
150: else
151: <374045>sigdig = 2048 + sigdig;
152: <374264>kp[0] = sigdig >> 4;
153: <374264>kp[1] |= sigdig << 4;
154: <374264>putdig<37639>(-1);
155: return <374264>dig - kp + 1 - nib;
156: }
157: rsort.c:
158:
159: /* Copyright 1990, AT&T Bell Labs */
160: #include "fsort.h"
161:
162: /* Radix sort by bytes. Records are chained in a list.
163: There are up to 257 bins at each recursion level.
164: The "null bin" is for short strings that have run out;
165: the rest are for byte values 0-255 */
166:
167: int aflag = 0;
168: int uflag = 0;
169: int rflag = 0;
170: int sflag = 0;
171:
172: /* Sort the list s by distributing according to
173: digit d into an array of bins, sorting the bins
174: recursively, and re-collecting them back into the list.
175: If uflag is nonzero return only 1 representative of
176: each equivalence class. The squeeze step noted
177: below determines what bins are in use, and squeezes
178: those into a stack frame, which is contiguous to s.
179: The null bin is handled separately to cut the span
180: of the squeeze loop.
181: Precondition: length(s) > 1 */
182:
183: #define tiebreak(t) \
184: if(uflag) { \
185: t->tail = !aflag? t->head: \
186: listaccum(t->head, t->tail); \
187: t->tail->next = 0; \
188: } else if(keyed && !sflag && t->head->next) { \
189: rflag = fields[0].rflag; \
190: keyed = 0; \
191: sort(t, 0); \
192: keyed = 1; \
193: rflag = 0; \
194: } else
195:
196: void
197: sort(struct list *s, int d)
198: <126745>{
199: extern int uflag;
200: struct list *t;
201: struct rec *r, *q;
202: struct list *frame = <126745>s + 1; /* stack frame */
203: static struct list bins[257];
204: # define nullbin (bins+256)
205: int nbin; /* number of filled bins */
206: struct list *low; /* lowest unsorted nonnull bin */
207: loop:
208: <447576>r = s->head;
209: <447576>low = bins + 256;
210: <447576>nbin = 0;
211: while(<3688466>r) {
212: if(<3240890>keyed)
213: if(<1334292>r->klen > (len_t)d)
214: <1116270>t = bins + key(r)[d];
215: else
216: <218022>t = nullbin;
217: else
218: if(<1906598>r->dlen > (len_t)d)
219: <1714238>t = bins + data(r)[d];
220: else
221: <192360>t = nullbin;
222: if(<3240890>t->head == 0) {
223: if(<555902>low > t)
224: <363502>low = t;
225: <555902>t->head = r;
226: <555902>nbin++;
227: } else
228: <2684988>t->tail->next = r; /* stable sort */
229: <3240890>t->tail = r;
230: <3240890>r = r->next;
231: }
232: <447576>t = frame; /* t is stack top */
233: if(<447576>t+nbin > stackmax)
234: <0>fatal("stack overflow", "", 0);
235: # define push(b) /* onto stack */ \
236: *t = *b, \
237: t->tail->next = 0, \
238: t++, \
239: b->head = 0, \
240: nbin--
241: if(<447576>nullbin->head)
242: <106517>push(nullbin);
243: for( <447576>; <1381435>nbin>0; <933859>low++)
244: if(<933859>low->head)
245: <449385>push(low);
246: if(<447576>t == frame+1) { /* tail recursion */
247: /* debug(frame, t, d);*/
248: <427056>r = frame->head;
249: if(<427056>r->len[keyed] <= d) {
250: <106225>tiebreak(<26635>frame);
251: <106225>*s = *frame;
252: return<106225>; /* string ran out */
253: }
254: <320831>d++;
255: <320831>goto loop;
256: }
257: /* debug(frame, t, d);*/
258: <20520>t--;
259: if(<20520>t->head->next) /* skip 1-item list */
260: <13713>sort(t, d+1);
261: if(<20520>!rflag) { /* forward order */
262: <15796>r = t->tail;
263: while(<112441>t > frame) {
264: <96645>q = t->head;
265: if(<96645>(--t>frame || <15796>t->head->len[keyed]>d)
266: && <96353>t->head->next)
267: <82209>sort(t, d+1);
268: else if(<14436>t==frame)
269: <3163>tiebreak(t);<295>
270: <96645>t->tail->next = q; /* concatenate */
271: }
272: <15796>s->head = frame->head;
273: <15796>s->tail = r;
274: } else { /* reverse order */
275: <4724>r = t->head;
276: while(<16405>t > frame) {
277: <11681>q = t->tail;
278: if(<11681>(--t>frame || <4724>t->head->len[keyed]>d)
279: && <11681>t->head->next)
280: <4070>sort(t, d+1);
281: else if(<7611>t==frame)
282: <3881>tiebreak(t);<1360>
283: <11681>q->next = t->head; /* concatenate */
284: }
285: <4724>s->head = r;
286: <4724>s->tail = frame->tail;
287: }
288: /* printf("return\n"); debug(s,s+1,d);*/
289: <20520>}
290:
291: /*
292: debug(struct list *stack, struct list *top, int d)
293: {
294: printf("----------------------level %d\n", d);
295: while(stack<top) {
296: printout(stack->head, stdout, "stdout");
297: printf("----------------------\n");
298: stack++;
299: }
300: }
301: */
302: merge.c:
303:
304: /* Copyright 1990, AT&T Bell Labs */
305: #include <stdlib.h>
306: #include <string.h>
307: #include "fsort.h"
308:
309: char *disorder = "key out of order";
310: char *duplicate = "duplicate key";
311: char *space = "out of space for merging";
312:
313: enum { IEOF, IOK };
314: enum { IPRE = 01000, IFOL = 02000 }; /* see find() */
315: enum { NMERGE = 16 };
316:
317: int nmerge = NMERGE; /* max number of files to merge at once */
318: int nextfile;
319:
320: struct merge { /* current record in an open file */
321: char *name; /* name of file (for diagnostics) */
322: FILE *file;
323: struct rec *rec; /* pointer to the data (and key) */
324: short serial; /* file no., breaks ties for stable sort */
325: short del; /* delete at close */
326: };
327:
328: struct merge *mfile; /* one struct per open file */
329: struct merge **f; /* pointers to mfile, ordered by content */
330:
331: static void mergephase(int, char*);
332: static int find(int, int*);
333: static int fillrec(struct merge*);
334: static int compare(struct merge*, struct merge*);
335: static void syncerr(struct merge*, char*);
336: extern int link(char*, char*);
337: extern int unlink(char*);
338: static void recinit(int nf, int nm, int n);
339: static void mv(int, int);
340:
341: static void
342: recalloc(struct merge *m)
343: <1241>{
344: if(<1241>m->rec)
345: return<784>; /* hygiene; doesn't happen */
346: <457>m->rec = (struct rec*)malloc(MINREC);
347: if(<457>m->rec == 0)
348: <0>fatal(space, "", 0);
349: <457>m->rec->dlen = 0;
350: <457>m->rec->next = (struct rec*)((uchar*)m->rec + MINREC);
351: <457>}
352:
353: /* misfortune : all fields are parsed and converted
354: before comparison. Lazy parsing might be in order,
355: but it's too hard to contemplate */
356:
357: /* Merge strategy, to merge N inputs in groups of at most M.
358: Numbers, like (1), are keyed to comments in code.
359: If N <= M, merge everything. (4)
360: If N > M*(M-1)+1
361: greedily merge M at a time (1) to reduce to the next case.
362: Merge x bunches of M (3) and one bunch of y (2),
363: then merge the x+1 new files (4) with the remaining z files
364: in an exactly M-way merge. Calculate x,y,z thus:
365: Mx + y + z = N file count
366: x + 1 + z = M final merge
367: 2 <= y <= M
368: x >= 0
369: z >= 0
370: by algebra
371: (M-1)x + y = N-M+1
372: y == (N-M+1) mod (M-1), 2 <= y <= M
373: x == (n-M+1-y)/(M-1)
374: Total cost is Mx + y + N. (Strategy due to P. McIlroy.) */
375:
376: /* merge n files. the first unmerged temp file is nm (i.e.
377: nm is the number of already merged files). the first
378: unmerged input file is nf.
379: rename temp files to keep a dense set beginning at 0
380: (It seems silly to rename 100 files after merging
381: every 1; how much time does that actually cost?) */
382: void
383: merge1(int nf, int nm, int n)
384: <161>{
385: char *name;
386: int i, j;
387: int flag = <161>nf<nfiles;
388: <161>recinit(nf, nm, n);
389: <161>name = filename(nextfile++);
390: <161>mergephase(n, name);
391: <161>free(name);
392: if(<161>flag)
393: return<137>;
394: <24>mv(--nextfile, nm);
395: for(<24>i=nm+n, j=nm+1; <180>i<nextfile; <156>i++, j++)
396: <156>mv(i, j);
397: <24>nextfile -= n - 1;
398: <24>}
399:
400: /* if flag = 0 merge input files; else temps only.
401: this program is specialized to two levels of merge;
402: should fix that some day. */
403:
404: void
405: merge(int flag)
406: <68>{
407: char buf[BUFSIZ];
408: FILE *input, *output;
409: int n, y;
410: char *name;
411: int nf = <68>0; /* next input file to merge */
412: int nm = <68>0; /* total already merged once */
413: int nt = <68>nfiles; /* files as yet unmerged */
414: if(<68>flag) {
415: <21>nf = nfiles;
416: <21>nt = nextfile;
417: }
418: if(<68>mfile == 0)
419: <49>mfile = (struct merge*)calloc(nmerge+1,
420: sizeof(*mfile));
421: if(<68>f == 0)
422: <49>f = (struct merge**)calloc(nmerge+1,
423: sizeof(*f));
424: if(<68>mfile==0 || <68>f==0)
425: <0>fatal(space,"",0);
426:
427: if(<68>nt > nmerge*(nmerge-1)+1) { /* (1) */
428: for(<19>nm=0; <135>nt>0; <116>) {
429: <116>n = nt>nmerge? <97>nmerge: <19>nt;
430: <116>merge1(nf, nm, n);
431: if(<116>flag) <0>nf += n;
432: <116>nt -= n;
433: <116>nm++;
434: }
435: <19>merge(1);
436: return<19>;
437: }
438: if(<49>nt > nmerge) { /* (2) */
439: <30>y = (nt-nmerge+1)%(nmerge-1);
440: if(<30>y <= 1) <14>y += nmerge-1;
441: <30>merge1(nf, nm, y);
442: <30>nf = nf+y>=nfiles? <18>nfiles: <12>nf+y;
443: <30>nt -= y;
444: <30>nm++;
445: }
446: while(<64>nt+nm > nmerge) { /* (3) */
447: <15>merge1(nf, nm, nmerge);
448: <15>nf = nf+nmerge>=nfiles? <6>nfiles: <9>nf+nmerge;
449: <15>nt -= nmerge;
450: <15>nm++;
451: }
452:
453: <49>n = nm+nt; /* (4) */
454: <49>recinit(nf, 0, n);
455: if(<49>!overwrite(nf))
456: <46>name = oname;
457: else
458: <3>name = filename(nextfile++);
459: <49>mergephase(n, name);
460: if(<48>name == oname)
461: return<45>;
462: <3>input = fileopen(name, "r");
463: <3>output = fileopen(oname, "w");
464: while(<88>n = fread(buf, 1, sizeof(buf), input))
465: if(<85>fwrite(buf, 1, n, output) != n)
466: <0>fatal("error writing", oname, 0);
467: <3>fileclose(input, name);
468: <3>unlink(name);
469: <3>free(name);
470: <3>fileclose(output, oname);
471: <3>}
472:
473: /* Initialize merge records for n files, beginning with
474: input file number nf and temp file number nm.
475:
476: There are nfiles-nf input files; any files beyond that
477: number must be temporaries, which come from already
478: merged inputs. (The only time that both kinds of
479: files are present is the very last merge, when for
480: stability, the temps are regarded as earlier.) */
481:
482: static void
483: recinit(int nf, int nm, int n)
484: <210>{
485: int i;
486: struct merge *m;
487: int last = <210>nfiles-nf + nextfile-nm == n;
488: for(<210>i=0; <1277>i<=n; <1067>i++) /* one extra, for mergephase() */
489: <1067>recalloc(&mfile[i]);
490: for(<210>i=0; <1067>i<n; <857>i++) {
491: <857>m = &mfile[i];
492: <857>m->del = nf>=nfiles || <635>last && <126>i<nextfile;
493: <857>m->name = m->del? <243>filename(nm++): <614>files[nf++];
494: <857>m->file = fileopen(m->name, "r");
495: <857>m->serial = i;
496: <857>f[i] = m;
497: }
498: <210>f[n] = &mfile[n];
499: <210>}
500:
501: static void
502: mv(int i, int j)
503: <180>{
504: char *old, *new;
505: if(<180>i == j) return<0>; /* believed not to happen */
506: <180>old = filename(i);
507: <180>new = filename(j);
508: <180>unlink(new);
509: if(<180>link(old,new) == -1 || <180>unlink(old) == -1)
510: <0>fatal("cannot move", old, 0);
511: <180>free(old);
512: <180>free(new);
513: <180>}
514:
515: static void
516: emit(FILE *output)
517: <22296>{
518: struct merge *m = <22296>f[0];
519: int n = <22296>m->rec->dlen;
520: uchar *p = <22296>data(m->rec); /* hanky panky for speed */
521: uchar *e = <22296>p + n++; /* use fwrite instead of */
522: int c = <22296>*e; /* printf */
523: <22296>*e = '\n';
524: if(<22296>fwrite((char*)p, 1, n, output) != n)
525: <0>fatal("error writing", m->name, 0);
526: <22296>*e = c;
527: <22296>}
528:
529: /* Merge files in mfile[0]..mfile[n-1].
530: f[0]..f[l-1] points to files in increasing order
531: of their current records. All the current records
532: are strictly ordered, either by tie-breaking, or
533: by skipping records that compare equal under -u.
534: There are two phases: (a) initialization (reading
535: from a file when there is no record at hand) and
536: (b) continuation (reading the next record with the
537: intent of displacing the current record).
538: These conditions arise, at places numbered in comments
539: (1) the record compares equal to another, and would follow
540: that other if ties were broken
541: (2) the record compares equal to another, and would precede
542: that other if ties were broken
543: (3) the record falls between two others
544:
545: At all times the keys pointed to by f[0]..f[l]
546: are distinct (perhaps due to tie-breaking), as
547: are the files they come from. When a new key
548: is read, its proper home (in tie-broken order)
549: is between f[j-1] and f[j].
550: The picture below shows the hardest case (2b),
551: where key a from file F is about to be emitted.
552: All keys in the sequence axby are distinct, as
553: are the files they come from. A new key is read
554: from file F into the extra space pointed to by
555: f[i]. (Keeping one extra key allows disorder to
556: be detected almost for free.) The
557: new key is equal to key b pointed to by f[j]
558: and file F precedes file G in stability order.
559: Thus b(F) deserves to be kept and b(G) discarded.
560: 0 j l=n
561: --------------------------------------------
562: | a(F) | x | b(G) | y | b(F) |
563: --------------------------------------------
564:
565: We emit the record at 0, and displace the key
566: at j, from whose file there is now no record
567: present; l decreases by one.
568: 0 j l n
569: --------------------------------------------
570: | x | b(F) | y | ?(G) | |
571: --------------------------------------------
572:
573: This takes us back to initialization. From
574: this configuration it is possible to reach
575: case (2a), like (2b) except that the key at 0
576: is not to be emitted; (2a) cannot happen during
577: the original initialization.
578: */
579:
580: #define moveup(a,n) memmove(a+1, a, (n)*sizeof(*a))
581: #define movedown(a,n) memmove(a-1, a, (n)*sizeof(*a))
582:
583: static void
584: mergephase(int n, char *name)
585: <210>{
586: int j, flags;
587: struct merge *mp;
588: struct rec *r;
589: int l = <210>0;
590: int k = <210>-1; /* for spotting sync errors */
591: FILE *output = fileopen(<210>name, "w");
592: init: while(<19606>l < n) { /*(a)*/
593: <19124>mp = f[l];
594: if(<19124>fillrec(mp) == IEOF) {
595: <52>movedown(f+l+1, n-l);
596: <52>f[n--] = mp;
597: <52>continue;
598: }
599: <19072>j = find(l, &flags);
600: if(<19072>j <= k)
601: <0>syncerr(mp, disorder);
602: if(<19072>flags & IFOL) { /*(1a)*/
603: if(<17234>!aflag || <17169>doaccum(f[j-1]->rec, mp->rec))
604: <17234>continue;
605: } else if(<1838>flags & IPRE) /*(2a)*/
606: if(<761>!aflag || <758>doaccum(mp->rec, f[j]->rec)) {
607: <761>f[l] = f[j];
608: <761>f[j] = mp;
609: <761>k = j;
610: <761>continue;
611: }
612: <1077>moveup(f+j, l-j); /*(3a)*/
613: <1077>f[j] = mp;
614: <1077>l++;
615: }
616: while(<24714>n > 0) { /*(b)*/
617: <24505>mp = f[l];
618: <24505>r = mp->rec;
619: <24505>*mp = *f[0];
620: <24505>mp->rec = r;
621: if(<24505>fillrec(mp) == IEOF) {
622: <803>emit(output);
623: <803>mp = f[0];
624: <803>movedown(f+1, l);
625: <803>f[--n] = mp;
626: <803>l = n;
627: <803>continue;
628: }
629: <23702>j = find(l, &flags);
630: if(<23702>j == 0)
631: <1>syncerr(mp, disorder);
632: if(<23701>flags & IFOL) { /*(1b)*/
633: if(<2208>!aflag || <1906>doaccum(f[j-1]->rec, mp->rec))
634: <2208>continue;
635: } else if(<21493>flags & IPRE) /*(2b)*/
636: if(<272>!aflag || <107>doaccum(mp->rec, f[j]->rec)) {
637: <272>emit(output);
638: <272>f[l] = f[j];
639: <272>f[j] = mp;
640: <272>mp = f[0];
641: <272>movedown(f+1, l);
642: <272>f[l--] = mp;
643: <272>k = j - 1;
644: <272>goto init;
645: }
646: <21221>emit(output); /*(3b)*/
647: <21221>mp = f[0];
648: <21221>movedown(f+1, j-1);
649: <21221>f[j-1] = f[l];
650: <21221>f[l] = mp;
651: }
652:
653: <209>fileclose(output, name);
654: <209>}
655:
656: static int
657: fillrec(struct merge *mp)
658: <284696>{
659: struct rec *r = <284696>getline(mp->rec, mp->file);
660: if(<284696>r == 0)
661: return <283702>IOK;
662: if(<994>r == ENDFILE) {
663: <937>fileclose(mp->file, mp->name);
664: if(<937>mp->del)
665: <243>unlink(mp->name);
666: return <937>IEOF;
667: }
668: <57>mp->rec = r;
669: return <57>IOK;
670: }
671:
672: /*
673: Return the proper index for the extra record f[l]
674: to be inserted before (regardless of -u).
675: Under -u, if the extra record is a duplicate, return
676: the index flagged by IFOL or IPRE according as
677: the insertion position follows or precedes a
678: record of the same value.
679: */
680:
681: static int
682: find(int l, int *flags)
683: <42774>{
684: int i, t;
685: int bot = <42774>0;
686: int top = <42774>l;
687: while(<145791>bot < top) {
688: <123492>i = (bot+top)/2;
689: <123492>t = compare(f[l], f[i]);
690: if(<123492>t < 0)
691: <55383>top = i;
692: else if(<68109>t > 0)
693: <39369>bot = i + 1;
694: else if(<28740>uflag) {
695: if(<20475>f[l]->serial < f[i]->serial) {
696: <1033>*flags = IPRE;
697: return <1033>i;
698: } else {
699: <19442>*flags = IFOL;
700: return <19442>i + 1;
701: }
702: }
703: else if(<8265>f[l]->serial < f[i]->serial)
704: <2917>top = i;
705: else
706: <5348>bot = i + 1;
707: }
708: <22299>*flags = 0;
709: return <22299>top;
710: }
711:
712: static int
713: compare(struct merge *mi, struct merge *mj)
714: <364393>{
715: uchar *ip, *jp;
716: uchar *ei, *ej;
717: uchar *trans, *keep;
718: int li, lj, k;
719: if(<364393>simplekeyed) {
720: <19>trans = fields->trans;
721: <19>keep = fields->keep;
722: <19>ip = data(mi->rec);
723: <19>jp = data(mj->rec);
724: <19>ei = ip + mi->rec->dlen;
725: <19>ej = jp + mj->rec->dlen;
726: for( <19>; <13>; <13>ip++, jp++) {
727: while(<35>ip<ei && <29>!keep[*ip]) <3>ip++;
728: while(<35>jp<ej && <29>!keep[*jp]) <3>jp++;
729: if(<32>ip>=ei || <26>jp>=ej) <7>break;
730: <25>k = trans[*ip] - trans[*jp];
731: if(<25>k != 0) <12>break;
732: }
733: if(<19>ip<ei && <13>jp<ej)
734: return <12>k*signedrflag;
735: else if(<7>ip < ei)
736: return <1>signedrflag;
737: else if(<6>jp < ej)
738: return <1>-signedrflag;
739: else if(<5>sflag)
740: return <4>0;
741: } else if(<364374>keyed) {
742: <194802>ip = key(mi->rec);
743: <194802>jp = key(mj->rec);
744: <194802>li = mi->rec->klen;
745: <194802>lj = mj->rec->klen;
746: for(<194802>k=li<lj?<1091>li:<193711>lj; <901648>--k>=0; <706846>ip++, jp++)
747: if(<807963>*ip != *jp)
748: <101117>break;
749: if(<194802>k < 0) {
750: if(<93685>li != lj)
751: <0>fatal("theorem disproved","",0);
752: if(<93685>sflag)
753: return <24419>0;
754: } else
755: return <101117>*ip - *jp;
756:
757: }
758: <238839>li = mi->rec->dlen;
759: <238839>lj = mj->rec->dlen;
760: <238839>ip = data(mi->rec);
761: <238839>jp = data(mj->rec);
762: for(<238839>k=li<lj?<7726>li:<231113>lj; <1907768>--k>=0; <1668929>ip++, jp++)
763: if(<1764308>*ip != *jp)
764: <95379>break;
765: return <238839>(k<0? <143460>li-lj: <95379>*ip-*jp)*signedrflag;
766: }
767:
768: void
769: check(char *name)
770: <87>{
771: int i, t;
772:
773: <87>mfile = (struct merge*)calloc(2,sizeof(*mfile));
774: <87>recalloc(&mfile[0]);
775: <87>recalloc(&mfile[1]);
776: <87>mfile[0].name = mfile[1].name = name;
777: <87>mfile[0].file = mfile[1].file = fileopen(name, "r");
778: if(<87>mfile[0].file == 0)
779: <0>fatal("can't open ", name, 0);
780:
781: if(<87>fillrec(&mfile[0]) == IEOF)
782: return<3>;
783: for(<84>i=1; <240980>fillrec(&mfile[i])!=IEOF; <240896>i^=1) {
784: <240901>t = compare(&mfile[i^1], &mfile[i]);
785: if(<240901>t>0)
786: <4>syncerr(&mfile[i], disorder);
787: else if(<240897>t==0 && <138849>uflag)
788: <1>syncerr(&mfile[i], duplicate);
789: }
790: <79>}
791:
792: static void
793: syncerr(struct merge *m, char *message)
794: <6>{
795: int n = <6>m->rec->dlen;
796: <6>warn("trouble in file", m->name, 0);
797: <6>fatal(message, n?<6>(char*)data(m->rec):"", n);
798: <0>}
799: optiona.c:
800:
801: #include <float.h>
802: #include <math.h>
803: #include <ctype.h>
804: #include <string.h>
805: #include <stdlib.h>
806: #include "fsort.h"
807:
808: /* portability hack for old libc's */
809: #define realloc(p,q) (p?realloc(p,q):malloc(q))
810:
811: struct field accum[NF];
812: int naccum;
813:
814: typedef struct {
815: uchar *e; /* last+1st char */
816: signed char s; /* -1 for neg, 1 for pos */
817: char c; /* 1 if + sign, else 0 */
818: char z; /* 1 for leading 0, else 0 */
819: char p; /* nonzero if a decimal point */
820: int f; /* number of digits after dec. pt. */
821: } desc;
822:
823: #define max(a, b) ((a)>(b)?(a):(b))
824: #define mod10(a) (((a)+20)%10)
825: #define div10(a) (((a)+20)/10-2)
826:
827: /* find least significant digit and other facts about a number
828: in field beginning at a and ending just before e */
829:
830: desc
831: lsd(uchar *a, uchar *e)
832: <99690>{
833: desc d = <99690>{ 0, 1, 0, 0, 0, 0 };
834: while(<271072>a<e && <269069>(*a==' '||<97687>*a=='\t')) <171382>a++;
835: if(<99690>a >= e)
836: <2003>;
837: else if(<97687>*a == '-') {
838: <7505>d.s = -1;
839: <7505>a++;
840: } else if(<90182>*a == '+') {
841: <3>d.c = 1;
842: <3>a++;
843: }
844: if(<99690>a+1<e && <51891>*a=='0' && <13>isdigit(a[1])) d.z = 1;
845: while(<299631>a<e && <199978>isdigit(*a))
846: <199941>a++;
847: if(<99690>a<e && <37>*a=='.') {
848: <13>d.p = 1;
849: while(<38>++a<e && <25>isdigit(*a))
850: <25>d.f++;
851: }
852: <99690>d.e = a;
853: return <99690>d;
854: }
855:
856: /* add the fields at a and b as desc-ribed,
857: putting result, with decimal point omitted,
858: right justified before re.
859: calculates in 10's complement and recomplements
860: magnitude if negative.
861: set *sgn -1 for neg, 1 for signed +, 0 for unsigned. */
862:
863: uchar *
864: add(uchar *re, uchar *a, desc ad, uchar *b, desc bd, int *sgn)
865: <49845>{
866: int d = <49845>0;
867: uchar *t;
868: uchar *r = <49845>re;
869: while(<49848>ad.f > bd.f) {
870: <3>d += (*--ad.e - '0')*ad.s;
871: <3>*--re = mod10(d);
872: <3>d = div10(d);
873: <3>ad.f--;
874: }
875: while(<49861>bd.f > ad.f) {
876: <16>d += (*--bd.e - '0')*bd.s;
877: <16>*--re = mod10(d);
878: <16>d = div10(d);
879: <16>bd.f--;
880: }
881: while(<220476>ad.e>a || <50616>bd.e>b) {
882: if(<170631>ad.f-- == 0) {
883: if(<49843>ad.p)
884: <6>ad.e--;
885: if(<49843>bd.p)
886: <7>bd.e--;
887: }
888: if(<170631>ad.e > a)
889: if(<169860>isdigit(*--ad.e))
890: <139640>d += (*ad.e - '0')*ad.s;
891: else
892: <30220>ad.e = a;
893: if(<170631>bd.e > b)
894: if(<109780>isdigit(*--bd.e))
895: <60307>d += (*bd.e - '0')*bd.s;
896: else
897: <49473>bd.e = b;
898: <170631>*--re = mod10(d);
899: <170631>d = div10(d);
900: }
901: <49845>*sgn = ad.c | bd.c;
902: if(<49845>d > 0)
903: <1>*--re = 1; /* happens only on overflow */
904: else if(<49844>d < 0) {
905: int x = <4997>10;
906: <4997>*sgn = -1;
907: for(<4997>t = r; <33242>--t>=re; <28245>) {
908: <28245>*t = (x - *t)%10;
909: if(<28245>*t != 0)
910: <21111>x = 9;
911: }
912: if(<4997>d < -1)
913: <0>*--re = 1; /* hygiene, can't happen */
914: }
915: while(<81359>re<r && <79328>*re==0)
916: <31514>re++;
917: return <49845>re;
918: }
919:
920: int
921: fieldaccum(uchar *a, uchar *ae, uchar *b, uchar *be)
922: <49845>{
923: static uchar *r, *re; /* not pure ansi (re-r=nil-nil=?) */
924: desc da = <49845>lsd(a, ae);
925: desc db = <49845>lsd(b, be);
926: int sgn, Sgn, f;
927: uchar *v;
928: while(<51247>re-r < max(da.e-a+db.f, db.e-b+da.f) <26>+ 1) {
929: int l = <1402>re - r + 100;
930: <1402>r = (uchar*)<1387>realloc(r, l);
931: if(<1402>r == 0)
932: <0>fatal("out of space","",0);
933: <1402>re = r + l;
934: }
935: <49845>f = max(da.f, db.f);<3>
936: <49845>v = add(re, a, da, b, db, &sgn);
937: <49845>Sgn = sgn != 0;
938: if(<49845>Sgn+max(re-v,f+1)+<46132>(da.p|<3713>db.p) > ae-a)
939: return <1>0;
940: while(<49860>re>v && <47829>--f>=0)
941: <16>*--ae = *--re + '0';
942: while(<49850>--f >= 0)
943: <6>*--ae = '0';
944: if(<49844>da.p | db.p)
945: <10>*--ae = '.';
946: if(<49844>re <= v)
947: <2031>*--ae = '0';
948: while(<188963>re > v)
949: <139119>*--ae = *--re + '0';
950: if(<49844>da.z | db.z)
951: while(<16>ae > a+Sgn)
952: <9>*--ae = '0';
953: if(<49844>sgn > 0)
954: <3>*--ae = '+';
955: else if(<49841>sgn < 0)
956: <4997>*--ae = '-';
957: if(<49844>ae<a || <49844>v<r)
958: <0>fatal("bug in accumulation","",0);
959: while(<93111>ae > a)
960: <43267>*--ae = ' ';
961: return <49844>1;
962: }
963:
964: void
965: chkaccum(struct field *field)
966: <34>{
967: if(<34>field->coder == 0)
968: <34>field->coder = acode;
969: if(<34>field->coder != acode)
970: <0>goto bad;
971: if(<34>field->trans == 0)
972: <33>field->trans = ident;
973: if(<34>field->trans != ident)
974: <1>goto bad;
975: if(<33>field->keep == 0)
976: <31>field->keep = all;
977: if(<33>field->keep != all)
978: <2>goto bad;
979: if(<31>field->rflag)
980: <1>goto bad;
981: return<30>;
982: bad:
983: <4>fatal("improper modifier with -a","",0);
984: return<0>;
985: }
986:
987: /* acode is unlike other coding functions. instead of
988: making a key in the place pointed to by op,
989: it makes a list of field locations. it might
990: be more honest if op were declared void* */
991:
992: struct loc { uchar *from, *to; };
993:
994: int
995: acode(uchar *ip, uchar *op, int len, struct field *f)
996: <99690>{
997: struct loc *lp = <99690>(struct loc*)op;
998: <99690>lp->from = ip;
999: <99690>lp->to = ip + len;
1000: if(<99690>!tab && <99690>!f->bflag &&
1001: <99686>f->begin.fieldno>0 && <99560>f->begin.charno==0)
1002: <99556>lp->from++;
1003: return <99690>sizeof(*lp);
1004: }
1005:
1006: int
1007: doaccum(struct rec *arec, struct rec *brec)
1008: <44842>{
1009: int i;
1010: struct loc aloc[NF], bloc[NF];
1011: <44842>fieldcode(data(arec), (uchar*)aloc, arec->dlen,
1012: (uchar*)-1, accum, naccum);
1013: <44842>fieldcode(data(brec), (uchar*)bloc, brec->dlen,
1014: (uchar*)-1, accum, naccum);
1015: for(<44842>i=0; <94686>i<naccum; <49844>i++)
1016: if(<49845>!fieldaccum(aloc[i].from, aloc[i].to,
1017: bloc[i].from, bloc[i].to)) {
1018: if(<1>i==0)
1019: <1>warn("sum too long; record kept: ",
1020: (char*)data(brec), brec->dlen);
1021: else
1022: <0>fatal("sum too long on adding: ",
1023: (char*)data(brec), brec->dlen);
1024: return <1>0;
1025: }
1026: return <44841>1;
1027: }
1028:
1029: struct rec *
1030: listaccum(struct rec *head, struct rec *tail)
1031: <134>{
1032: struct rec *q = <134>head;
1033: struct rec *p = <134>head;
1034: for(<134>;<24902>;<24902>) {
1035: if(<25036>q == tail)
1036: <134>break;
1037: <24902>q = q->next;
1038: if(<24902>doaccum(p, q))
1039: <24901>p->next = q->next;
1040: else
1041: <1>p = q;
1042: }
1043: return <134>p;
1044: }
1045: fsort.c:
1046:
1047: /* Copyright 1990, AT&T Bell Labs */
1048: #include <stdlib.h>
1049: #include <string.h>
1050: #include <ctype.h>
1051: #include "fsort.h"
1052:
1053: int mflag = 0;
1054: int cflag = 0;
1055: int keyed = 0;
1056:
1057: extern void readin(void);
1058: extern void dumptotemp(void);
1059: extern void sealstack(struct rec *p);
1060: extern void filelist(int, char**);
1061: extern int getopt(int, char*const*, const char*);
1062: extern char *optarg;
1063: extern int optind;
1064:
1065: FILE *input;
1066: char *oname = "-";
1067: char *tname[] = { "/usr/tmp"/*substitutable*/, "/usr/tmp", "/tmp", 0 };
1068:
1069: char **files;
1070: int nfiles;
1071: char *nofiles[] = { "-", 0 };
1072:
1073: main(int argc, char **argv)
1074: <311>{
1075: int c, n;
1076: static char s[3] = { '-' };
1077: for(<311>;<590>;<590>) {
1078: if(<901>optind<argc && <873>argv[optind][0]=='+' &&
1079: <26>isdigit(argv[optind][1])) {
1080: <26>optind += fieldarg(argv[optind],
1081: argv[optind+1]);
1082: <26>continue;
1083: }
1084: <875>n = optind; /* for sake of case '?' */
1085: <875>c = getopt(argc,argv,"a:bcdfgik:mno:rst:uw:y:MT:");
1086: switch(<875>c) {
1087: case '?':
1088: <16>fatal("bad option", argv[n], 0);
1089: default:
1090: <0>fatal("implementation error 1","",0);
1091: case 'b':
1092: case 'd':
1093: case 'f':
1094: case 'g':
1095: case 'i':
1096: case 'n':
1097: case 'M':
1098: case 'r':
1099: <97>s[1] = c;
1100: <97>fieldarg(s, 0);
1101: <97>continue;
1102: case 'c':
1103: <88>cflag++;
1104: <88>continue;
1105: case 'm':
1106: <49>mflag++;
1107: <49>continue;
1108: case 's':
1109: <9>sflag++;
1110: <9>continue;
1111: case 'u':
1112: <17>uflag++;
1113: <17>sflag++;
1114: <17>continue;
1115: case 'a':
1116: <35>aflag++;
1117: <35>uflag++;
1118: <35>sflag++;
1119: <35>optionk(optarg, accum, &naccum);
1120: <35>continue;
1121: case 'k':
1122: <181>optionk(optarg, fields, &nfields);
1123: <175>continue;
1124: case 'o':
1125: <5>oname = optarg;
1126: <5>continue;
1127: case 'T':
1128: <1>tname[0] = optarg;
1129: <1>continue;
1130: case 'w':
1131: <33>nmerge = atoi(optarg);
1132: if(<33>nmerge < 2)
1133: <0>fatal("-w too small","",0);
1134: <33>continue;
1135: case 'y':
1136: <5>optiony(optarg);
1137: <5>continue;
1138: case 't':
1139: if(<51>strlen(optarg) != 1)
1140: <1>fatal("bad argument for -t", "", 0);
1141: <50>tab = optarg[0];
1142: <50>continue;
1143: case 'z':
1144: <0>warn("nonstandard option -z ignored","",0);
1145: case -1:
1146: <288>break;
1147: }
1148: <288>break;
1149: }
1150: <288>filelist(argc, argv);
1151: if(<288>cflag)
1152: <88>aflag = 0;
1153: <288>fieldwrapup();
1154: <282>tabinit();
1155: <282>setsigs(cleanup);
1156:
1157: if(<282>cflag) {
1158: if(<88>nfiles > 1)
1159: <1>fatal("-c takes just one file", "", 0);
1160: <87>check(files[0]);
1161: return <82>0;
1162: }
1163: if(<194>mflag) {
1164: <47>merge(0);
1165: return <46>0;
1166: }
1167: for(<147>n=0; <820>n<nfiles; <673>n++) {
1168: <676>input = fileopen(files[n], "r");
1169: <674>readin();
1170: <674>fileclose(input, files[n]);
1171: }
1172: if(<144>stack->head==0 && <12>nextfile==0) { /* empty input */
1173: if(<12>strcmp(oname,"-") != 0)
1174: <4>fileclose(fileopen(oname, "w"), oname);
1175: return <12>0;
1176: }
1177: if(<132>stack->head && <132>stack->head->next)
1178: <120>sort(stack, 0);
1179: if(<132>nextfile > 0) {
1180: if(<2>stack->head)
1181: <2>dumptotemp();
1182: <2>tabfree();
1183: <2>merge(1);
1184: } else {
1185: FILE *f;
1186: <130>f = fileopen(oname, "w");
1187: <129>printout(stack->head, f, oname);
1188: <129>fileclose(f, oname);
1189: }
1190: return <131>0;
1191: }
1192:
1193: void
1194: filelist(int argc, char **argv)
1195: <288>{
1196: int i, j;
1197: <288>files = nofiles;
1198: <288>nfiles = argc - optind;
1199: if(<288>nfiles != 0)
1200: <260>files = argv + optind;
1201: else
1202: <28>nfiles = 1;
1203: if(<288>strcmp(argv[optind-1], "--") == 0)
1204: return<2>;
1205: for(<286>i=j=0; <1673>i<nfiles; <1387>i++) {
1206: if(<1387>strncmp(files[i], "-o", 2) == 0) {
1207: if(<6>files[i][2] != 0)
1208: <1>oname = files[i] + 2;
1209: else if(<5>i < argc-1)
1210: <5>oname = files[++i];
1211: else
1212: <0>fatal("incomplete -o", "", 0);
1213: } else
1214: <1381>files[j++] = files[i];
1215: }
1216: <286>files[j] = 0;
1217: <286>nfiles = j;
1218: <286>}
1219:
1220: void
1221: readin(void)
1222: <674>{
1223: int n;
1224: struct rec *new;
1225: struct rec *p = <674>stack->tail;
1226: struct rec *r = <674>p? <529>succ(p): buffer;
1227:
1228: for(<674>;<342230>;<342230>) {
1229: if(<342904>bufmax-(uchar*)r < MINREC) {
1230: <78>sealstack(p);
1231: <78>dumptotemp();
1232: <78>p = 0;
1233: <78>r = buffer;
1234: }
1235: <342904>r->next = (struct rec*)bufmax;
1236: <342904>new = getline(r, input);
1237: recenter:
1238: if(<342906>new == 0) {
1239: <342230>r->next = 0;
1240: if(<342230>p)
1241: <342017>p->next = r;
1242: <342230>p = r;
1243: <342230>r = succ(r);
1244: } else if(<676>new == ENDFILE) {
1245: <674>sealstack(p);
1246: return<674>;
1247: } else {
1248: <2>sealstack(p);
1249: <2>dumptotemp();
1250: <2>p = 0;
1251: <2>r = buffer;
1252: <2>n = data(new)-(uchar*)new+new->dlen+new->klen;
1253: if(<2>(uchar*)r+n > bufmax)
1254: <0>fatal("monster record", "", 0);
1255: <2>memmove(r, new, n);
1256: <2>free(new);
1257: <2>new = 0;
1258: <2>goto recenter;
1259: }
1260: }
1261: }
1262:
1263: void
1264: sealstack(struct rec *p)
1265: <754>{
1266: if(<754>p == 0)
1267: return<12>;
1268: <742>p->next = 0;
1269: if(<742>stack->head == 0)
1270: <213>stack->head = buffer;
1271: <742>stack->tail = p;
1272: <742>}
1273:
1274: void
1275: printout(struct rec *r, FILE *f, char *name)
1276: <211>{
1277: int c, n;
1278: uchar *dp, *ep;
1279: for( <211>; <248661>r; <248450>r=r->next) {
1280: <248450>dp = data(r);
1281: <248450>n = r->dlen;
1282: <248450>ep = dp + n++;
1283: <248450>c = *ep;
1284: <248450>*ep = '\n';
1285: if(<248450>fwrite((char*)dp, 1, n, f) != n)
1286: <0>fatal("error writing", name, 0);
1287: <248450>*ep = c;
1288: }
1289: <211>}
1290:
1291: void
1292: dumptotemp()
1293: <82>{
1294: char *tempfile = <82>filename(nextfile++);
1295: FILE *temp = fileopen(<82>tempfile,"w");
1296:
1297: if(<82>stack->head == 0)
1298: <0>fatal("monster record", "", 0);
1299: <82>stack->tail->next = 0; /* for good measure */
1300: <82>sort(stack, 0);
1301: <82>printout(stack->head, temp, tempfile);
1302: <82>fileclose(temp, tempfile);
1303: <82>free(tempfile);
1304: <82>stack->head = stack->tail = 0;
1305: return<82>;
1306: }
1307: field.c:
1308:
1309: /* Copyright 1990, AT&T Bell Labs */
1310: #include <stdlib.h>
1311: #include <ctype.h>
1312: #include "fsort.h"
1313:
1314:
1315:
1316: static char *modifiers(struct field*, char*, int);
1317: static char *keyspec(struct pos*, char*);
1318: static void globalmods(struct field*);
1319: static void chkfieldno(struct field*);
1320:
1321: struct field fields[NF] = {
1322: { 0, 0, 0, 0, 0, 0, 0, { 0, 0 }, { NP, 0 } }
1323: };
1324: int nfields = 0;
1325:
1326: int tab;
1327: int signedrflag;
1328: int simplekeyed;
1329:
1330: #define blank(p) (*(p)==' ' || *(p)=='\t')
1331:
1332: enum { OLD, NEW };
1333:
1334: /* interpret 1 or 2 arguments and return how many */
1335: int
1336: fieldarg(char *argv1, char *argv2)
1337: <123>{
1338: char *av1 = <123>argv1;
1339: char *av2 = <123>argv2;
1340: struct field *field;
1341:
1342: if(<123>av1[0] == '+' && <26>isdigit(av1[1])) {
1343: if(<26>++nfields >= NF)
1344: <0>fatal("too many fields", argv1, 0);
1345: <26>field = &fields[nfields];
1346: <26>field->end.fieldno = NP+1;
1347: <26>field->style = OLD;
1348:
1349: <26>av1 = keyspec(&field->begin, av1+1);
1350: if(<26>*modifiers(field, av1, 0))
1351: <0>goto bad;
1352:
1353: if(<26>av2==0 || <25>av2[0]!='-' || <12>!isdigit(av2[1]))
1354: return <14>1;
1355: <12>av2 = keyspec(&field->end, av2+1);
1356: <12>argv1 = argv2; /* in case of diagnostic */
1357: if(<12>*modifiers(field, av2, 1))
1358: <0>goto bad;
1359: return <12>2;
1360: } else if(<97>*modifiers(fields, av1+1, -1))
1361: <0>goto bad; /* believed not to happen */
1362: return <97>1;
1363: bad:
1364: <0>fatal("bad field specification", argv1, 0);
1365: return <0>0; /* dummy */
1366: }
1367:
1368: void
1369: optionk(char *arg, struct field *fields, int *nfields)
1370: <216>{
1371: char *a = <216>arg;
1372: struct field *field;
1373: if(<216>++*nfields >= NF)
1374: <0>fatal("too many fields", arg, 0);
1375: <216>field = &fields[*nfields];
1376: <216>field->begin.charno = 1;
1377: <216>field->end.fieldno = NP+1;
1378: <216>field->style = NEW;
1379:
1380: <216>a = keyspec(&field->begin, a);
1381: <213>a = modifiers(field, a, 0);
1382: if(<213>*a == ',') {
1383: <116>a = keyspec(&field->end, a+1);
1384: <115>a = modifiers(field, a, 1);
1385: }
1386: if(<212>*a == 0)
1387: return<210>;
1388: bad:
1389: <2>fatal("bad -k specification", arg, 0);
1390: <0>}
1391:
1392: static char *
1393: keyspec(struct pos *p, char *arg)
1394: <370>{
1395: if(<370>!isdigit(*arg))
1396: <3>fatal("missing field number", "", 0);
1397: <367>p->fieldno = strtoul(arg, &arg, 10);
1398: if(<367>*arg == '.')
1399: if(<69>!isdigit(*++arg))
1400: <1>fatal("missing character number", "", 0);
1401: else
1402: <68>p->charno = strtoul(arg, &arg, 10);
1403: return <366>arg;
1404: }
1405:
1406: /* keyed = 1 if there are fields present (+ options) or if
1407: numeric (-ng), translation (-f) or deletion (-idb) options
1408: are present. In these cases, a separate key is constructed
1409: for rsort. The key, however is not carried on
1410: intermediate files. (It would be interesting to try.)
1411: It must be reconstructed for the merge phase, and that
1412: may be expensive, since relatively few comparisons
1413: happen in that phase. simplekeyed = 1 if there are options,
1414: so that pure ascii comparison won't work, but no fields, no
1415: months, no numerics. */
1416:
1417: void
1418: fieldwrapup(void)
1419: <288>{
1420: int i;
1421: if(<288>nfields==0 && <145>aflag)
1422: <1>fatal("-a without -k", "", 0);
1423: if(<287>fields->coder == 0) <262>fields->coder = tcode;
1424: if(<287>fields->trans == 0) <270>fields->trans = ident;
1425: if(<287>fields->keep == 0) <269>fields->keep = all;
1426: for(<287>i=1; <487>i<=nfields; <200>i++) {
1427: <201>globalmods(&fields[i]);
1428: <201>chkfieldno(&fields[i]);
1429: }
1430: for(<286>i=1; <316>i<=naccum; <30>i++) {
1431: <34>chkaccum(&accum[i]);
1432: <30>chkfieldno(&accum[i]);
1433: }
1434: <282>signedrflag = fields->rflag? <24>-1: <258>1; /* used only by merge.c*/
1435: <282>simplekeyed = nfields==0 && <144>fields->coder==tcode
1436: && <119>(fields->trans!=ident || <112>fields->keep!=all);
1437: if(<282>nfields==0 && <144>!keyed) /* used only by rsort.c */
1438: <109>rflag = fields->rflag;
1439: if(<282>nfields > 0)
1440: <138>keyed = 1;
1441: <282>}
1442:
1443: static void
1444: conflict(void)
1445: <4>{
1446: <4>warn("conflicting key types", "", 0);
1447: <4>}
1448:
1449: static void
1450: dupla(uchar **oldp, uchar *new)
1451: <48>{
1452: if(<48>*oldp != 0 && <1>*oldp != new)
1453: <1>conflict();
1454: <48>*oldp = new;
1455: <48>}
1456:
1457: static void
1458: duplb(int (**oldp)(uchar*,uchar*,int,struct field*), int (*new)(uchar*,uchar*,int,struct field*))
1459: <56>{
1460: if(<56>*oldp != 0 && <2>*oldp != new)
1461: <2>conflict();
1462: <56>*oldp = new;
1463: <56>}
1464:
1465: /* eflag=-1 global flags, =0 field start, =1 field end */
1466:
1467: static char *
1468: modifiers(struct field *field, char *argv1, int eflag)
1469: <463>{
1470: for( <463>; <629>*argv1; <166>argv1++) {
1471: switch(<284>*argv1) {
1472: case 'b': if(<22>eflag==1) <6>field->eflag = 1;
1473: else <16>field->bflag = 1; <22>goto ckglob;
1474: case 'r': <40>field->rflag = 1; <40>goto ckglob;
1475: case 'f': <23>dupla(&field->trans, fold); <23>break;
1476: case 'd': <20>dupla(&field->keep, dict); <20>break;
1477: case 'i': <5>dupla(&field->keep, ascii); <5>break;
1478: case 'g': <10>duplb(&field->coder, gcode); <10>break;
1479: case 'n': <38>duplb(&field->coder, ncode); <38>break;
1480: case 'M': <8>duplb(&field->coder, Mcode); <8>break;
1481: default:
1482: <118>goto done;
1483: }
1484: <104>keyed = 1;
1485: ckglob:
1486: if(<166>field==fields && <97>nfields>0)
1487: <2>warn("field spec precedes global option",argv1,1);
1488: }
1489: done:
1490: if(<463>field->coder==ncode && <40>field->keep)
1491: <1>conflict();
1492: return <463>argv1;
1493: }
1494:
1495: static void
1496: globalmods(struct field *field)
1497: <201>{
1498: int flagged = <201>field->bflag | field->eflag | field->rflag;
1499: if(<201>!field->coder) <172>field->coder = tcode;
1500: else <29>flagged++;
1501: if(<201>!field->trans) <196>field->trans = ident;
1502: else <5>flagged++;
1503: if(<201>!field->keep) <197>field->keep = all;
1504: else <4>flagged++;
1505: if(<201>!flagged) {
1506: <146>field->coder = fields->coder;
1507: <146>field->trans = fields->trans;
1508: <146>field->keep = fields->keep;
1509: <146>field->rflag = fields->rflag;
1510: <146>field->bflag = fields->bflag;
1511: if(<146>field->style == NEW)
1512: <126>field->eflag = fields->bflag;
1513: }
1514: <201>}
1515:
1516: /* convert field representation from numbers given in arguments
1517: to a 0-origin first,last+1 representation, with a negative
1518: quantity for a character offset to the end of this field */
1519:
1520: static void
1521: chkfieldno(struct field *field)
1522: <231>{
1523: if(<231>field->style == NEW) {
1524: if(<205>--field->begin.fieldno < 0 ||
1525: <204>--field->begin.charno < 0 ||
1526: <204>--field->end.fieldno < 0)
1527: <1>fatal("improper 0 in field specifier", "", 0);
1528: if(<204>field->end.charno == 0)
1529: <180>field->end.charno--;
1530: } else if(<26>field->end.charno==0 && <22>field->end.fieldno>0) {
1531: if(<22>tab && <8>field->eflag)
1532: <0>fatal("skipping blanks right after tab char"
1533: " is ill-defined", "", 0);
1534: <22>field->end.fieldno--;
1535: <22>field->end.charno--;
1536: }
1537: if(<230>field->begin.fieldno > NP)
1538: <0>field->begin.fieldno = NP;
1539: if(<230>field->end.fieldno > NP)
1540: <0>field->end.fieldno = NP;
1541: /* fprintf(stderr,"%d %d.%d,%d.%d\n",field-fields,field->begin.fieldno, field->begin.charno,field->end.fieldno, field->end.charno);*/
1542: <230>}
1543:
1544: int
1545: fieldcode(uchar *dp, uchar *kp, int len, uchar *b, struct field *fields, int nfields)
1546: <474734>{
1547: uchar *posns[NP+1]; /* field start positions */
1548: uchar *cp;
1549: struct field *field;
1550: uchar *op = <474734>kp;
1551: uchar *ep;
1552: uchar *bound = <474734>kp + MAXREC;
1553: int i;
1554: int np;
1555: if(<474734>bound > b)
1556: <155959>bound = b;
1557: <474734>posns[0] = dp;
1558: if(<474734>tab)
1559: for(<304>np=1, i=len, cp=dp; <6355>i>0 && <6051>np<NP; i-<6051>-) {
1560: if(<6051>*cp++ != tab)
1561: <4411>continue;
1562: <1640>posns[np++] = cp;
1563: }
1564: else
1565: for(<474430>np=1, i=len, cp=dp; <1110378>i>0 && <635948>np<NP; ) <635948>{
1566: while(<1228760>blank(cp) && i><636001>0)
1567: <592812>cp++, i--;
1568: while(<3943779>!blank(cp) && i><3782124>0)
1569: <3307831>cp++, i--;
1570: <635948>posns[np++] = cp;
1571: }
1572:
1573: if(<474734>nfields > 0)
1574: <143100>field = &fields[1];
1575: else
1576: <331634>field = &fields[0];
1577: <474734>i = nfields;
1578: do {
1579: int t = <486184>field->begin.fieldno;
1580: uchar *xp = <486184>dp + len;
1581: if(<486184>t < np) {
1582: <486176>cp = posns[t];
1583: if(<486176>field->bflag && <144>nfields)
1584: while(<454>cp<xp && <454>blank(cp))
1585: <316>cp++;
1586: <486176>cp += field->begin.charno;
1587: if(<486176>cp > xp)
1588: <12>cp = xp;
1589: } else
1590: <8>cp = xp;
1591: <486184>t = field->end.fieldno;
1592: if(<486184>t < np) {
1593: if(<112104>field->end.charno < 0) {
1594: if(<111816>t >= np-1)
1595: <25>ep = xp;
1596: else {
1597: <111791>ep = posns[t+1];
1598: if(<111791>tab) <22>ep--;
1599: }
1600: } else {
1601: <288>ep = posns[t];
1602: if(<288>field->eflag)
1603: while(<262>ep<xp && <262>blank(ep))
1604: <186>ep++;
1605: <288>ep += field->end.charno;
1606: }
1607: if(<112104>ep > xp)
1608: <28>ep = xp;
1609: else if(<112076>ep < cp)
1610: <10>ep = cp;
1611: } else
1612: <374080>ep = xp;
1613: <486184>t = ep - cp;
1614: if(<486184>field->coder != acode && <386494>op+room(t) > bound)
1615: return <17>-1;
1616: <486167>op += (*field->coder)(cp, op, ep-cp, field);
1617: <486167>field++;
1618: } while(<486167>--i > 0);
1619: return <474717>op - kp;
1620: }
1621:
1622: /* Encode text field subject to options -r -fdi -b.
1623: Fields are separated by 0 (or 255 if rflag is set)
1624: the anti-ambiguity stuff prevents such codes from
1625: happening otherwise by coding real zeros and ones
1626: as 0x0101 and 0x0102, and similarly for complements */
1627:
1628: int
1629: tcode(uchar *dp, uchar *kp, int len, struct field *f)
1630: <12156>{
1631: uchar *cp = <12156>kp;
1632: int c;
1633: uchar *keep = <12156>f->keep;
1634: uchar *trans = <12156>f->trans;
1635: int reverse = <12156>f->rflag? <106>~0: <12050>0;
1636: while(<211231>--len >= 0) {
1637: <199075>c = *dp++;
1638: if(<199075>keep[c]) {
1639: <198337>c = trans[c];
1640: if(<198337>c <= 1) { /* anti-ambiguity */
1641: <4>*cp++ = 1^reverse;
1642: <4>c++;
1643: } else if(<198333>c >= 254) {
1644: <4>*cp++ = 255^reverse;
1645: <4>c--;
1646: }
1647: <198337>*cp++ = c^reverse;
1648: }
1649: }
1650: <12156>*cp++ = reverse;
1651: return <12156>cp - kp;
1652: }
1653:
1654: static char *month[] = { "jan", "feb", "mar", "apr", "may",
1655: "jun", "jul", "aug", "sep", "oct", "nov", "dec" };
1656:
1657: int
1658: Mcode(uchar *dp, uchar *kp, int len, struct field *f)
1659: <57>{
1660: int j = <57>-1;
1661: int i;
1662: uchar *cp;
1663: for( <57>; <69>len>0; <12>dp++, len--) {
1664: if(<69>*dp!=' ' && <57>*dp!='\t')
1665: <57>break;
1666: }
1667: if(<57>len >= 3)
1668: while(<294>++j < 12) {
1669: <292>cp = (uchar*)month[j];
1670: for(<292>i=0; <166>i<3; <166>i++)
1671: if(<410>(dp[i]|('a'-'A')) != *cp++)
1672: <244>break;
1673: if(<292>i >= 3)
1674: <48>break;
1675: }
1676: <57>*kp = j>=12? <2>0: <55>j+1;
1677: if(<57>f->rflag)
1678: <8>*kp ^= ~0;
1679: return <57>1;
1680: }
1681: tables.c:
1682:
1683: /* Copyright 1990, AT&T Bell Labs */
1684: #include <stdlib.h>
1685: #include <string.h>
1686: #include <ctype.h>
1687: #include "fsort.h"
1688:
1689: /* STACK=1000 makes lots of room under normal conditions.
1690: But it can be broken. 40 cycles like this will do it:
1691: a,b,...z,za,zb,...zz,zza,zzb,...,zzz,zzza,...
1692: Minor changes to rsort() would allow the stack to be
1693: extended discontiguously. Pity we don't have alloca any more.
1694: */
1695:
1696: #define BUFFER 6000000
1697: #define MINBUF 10000 /* not worth trying */
1698: #define STACK 1000
1699:
1700: uchar *bufmax;
1701: struct rec *buffer;
1702: unsigned long bufsiz = BUFFER;
1703: struct rec endfile;
1704: struct list *stack;
1705: struct list *stackmax;
1706:
1707: uchar ident[256]; /* identity transform */
1708: uchar fold[256]; /* fold upper case to lower */
1709:
1710: uchar all[256]; /* all chars significant */
1711: uchar dict[256]; /* sig chars for dictionary order */
1712: uchar ascii[256]; /* ascii graphics significant */
1713:
1714: void
1715: tabinit(void)
1716: <282>{
1717: int i;
1718: <282>memset((char*)all,1,256);
1719:
1720: <282>memset((char*)(dict+'0'),1,10);
1721: <282>memset((char*)(dict+'A'),1,26);
1722: <282>memset((char*)(dict+'a'),1,26);
1723: <282>dict[' '] = dict['\t'] = 1;
1724:
1725: <282>memset((char*)(ascii+040),1,0137); /* 040-0176 */
1726:
1727: for(<282>i=0; <72192>i<256; <72192>i++)
1728: <72192>fold[i] = ident[i] = i;
1729: for(<282>i='a'; <7332>i<='z'; <7332>i++)
1730: <7332>fold[i] += 'A' - 'a';
1731:
1732: <282>stack = (struct list*)malloc(STACK*sizeof(*stack));
1733: do {
1734: if(<282>(buffer=(struct rec*)malloc(bufsiz)) != 0)
1735: <282>break;
1736: } while(<0>(bufsiz/=2) > MINBUF);
1737: if(<282>buffer==0 || <282>stack==0)
1738: <0>fatal("can't get working space", "", 0);
1739: <282>bufmax = (uchar*)buffer + bufsiz - 2*sizeof(*buffer);
1740: <282>stack->head = stack->tail = 0;
1741: <282>stackmax = stack + STACK;
1742: <282>}
1743:
1744: void
1745: tabfree(void)
1746: <2>{
1747: <2>free(stack);
1748: <2>free(buffer);
1749: <2>}
1750:
1751: void
1752: optiony(char *s)
1753: <5>{
1754: long size = <5>atol(s);
1755: if(<5>!isdigit(s[0]))
1756: <0>fatal("no size for -y","",0);
1757: if(<5>size >= MINBUF/10) /* tiny helps debugging */
1758: <5>bufsiz = size;
1759: else if(<0>size == 0)
1760: <0>bufsiz = 10L*BUFFER;
1761: else <0>fatal("-y too small", "", 0);
1762: <5>}
1763: cfiles.c:
1764:
1765: /* Copyright 1990, AT&T Bell Labs */
1766: #include "fsort.h"
1767: #include <stdlib.h>
1768: #include <string.h>
1769: #include <signal.h>
1770: #include <errno.h>
1771: #include <sys/types.h>
1772: #include <sys/stat.h>
1773:
1774: #define SIGHUP 1
1775: #define SIGINT 2
1776: #define SIGQUIT 3
1777: #define SIGPIPE 13
1778: #define SIGTERM 15
1779:
1780: extern int getpid(void);
1781: extern int access(char*, int);
1782: extern int unlink(char*);
1783: extern int stat(const char*, struct stat *);
1784: extern int cgets(char*, int, FILE*);
1785:
1786: FILE *
1787: fileopen(char *name, char *mode)
1788: <2052>{
1789: FILE *f;
1790: if(<2052>strcmp(name,"-") == 0)
1791: if(<199>strcmp(mode, "r") == 0)
1792: <27>f = stdin;
1793: else {
1794: <172>setbuf(stdout,malloc(BUFSIZ));
1795: <172>f = stdout;
1796: }
1797: else {
1798: if(<1853>strcmp(mode, "w") == 0 &&
1799: <257>strcmp(name, oname) == 0 &&
1800: <11>overwrite(0))
1801: <4>setsigs(SIG_IGN);
1802: <1853>f = fopen(name, mode);
1803: }
1804: if(<2052>f == 0)
1805: <3>fatal("can't open", name, 0);
1806: return <2049>f;
1807: }
1808:
1809: void
1810: fileclose(FILE *f, char *name)
1811: <2041>{
1812: if(<2041>fclose(f)==EOF && <1>name!=0)
1813: <1>fatal("error on", name, 0);
1814: <2040>}
1815:
1816: /* file name strings accumulate as garbage */
1817:
1818: char *
1819: filename(int number)
1820: <849>{
1821: char name[50];
1822: char *s;
1823: int i;
1824: for(<849>i=0; <849>(s=tname[i])!=0; <0>i++)
1825: if(<849>access(s, 03) != -1)
1826: <849>break;
1827: if(<849>s == 0)
1828: <0>fatal("no accessible temp directory", "", 0);
1829: <849>sprintf(name, "%s/stm%.5d.%.4d", s, getpid(), number);
1830: <849>s = malloc(strlen(name) + 1);
1831: if(<849>s == 0)
1832: <0>fatal("out of space", "", 0);
1833: <849>strcpy(s, name);
1834: return <849>s;
1835: }
1836:
1837: /* if there is enough room in record r, getline puts
1838: a line of data there and returns 0; otherwise it
1839: returns a pointer to a new record. The record
1840: may be grown in stages; intermediate stages can
1841: be discarded, but the original cannot. */
1842:
1843: static struct rec *
1844: newrec(struct rec *r, struct rec *retval)
1845: <83>{
1846: int n = <83>(uchar*)r->next - data(r);
1847: int len = <83>(uchar*)r->next - (uchar*)r;
1848: struct rec *new = <83>(struct rec*)malloc(len + n);
1849: if(<83>new == 0)
1850: <0>fatal("no space for record", "", 0);
1851: <83>memmove(new, r, len);
1852: <83>new->next = (struct rec*)((uchar*)new + len + n);
1853: if(<83>retval)
1854: <24>free(retval);
1855: return <83>new;
1856: }
1857:
1858: struct rec*
1859: getline(struct rec *r, FILE *f)
1860: <627600>{
1861: int n = <627600>0;
1862: int m;
1863: uchar *cp, *dp;
1864: struct rec *retval = <627600>0;
1865:
1866: if(<627600>feof(f)) /* in case newline was appended */
1867: return <1>ENDFILE;
1868: for(<627599>;<133>;<133>) { /* usually executed once */
1869: <627732>dp = data(r);
1870: <627732>cp = dp + n;
1871: <627732>m = (uchar*)r->next - cp;
1872: if(<627732>m <= 1)
1873: <66>retval = r = newrec(r, retval);
1874: else {
1875: <627666>m = cgets((char*)cp, m, f);
1876: if(<627666>m == 0) {
1877: if(<1611>n == 0)
1878: return <1610>ENDFILE;
1879: <1>warn("newline appended", "", 0);
1880: <1>break;
1881: }
1882: <626055>n += m;
1883: if(<626055>dp[n-1] == '\n') {
1884: <625988>n--;
1885: <625988>break;
1886: }
1887: }
1888: }
1889:
1890: <625989>r->dlen = n;
1891: if(<625989>n > MAXREC)
1892: <0>fatal("monster record", "", 0);
1893: if(<625989>!keyed) {
1894: <240956>r->klen = 0; /* hygiene */
1895: return <240956>retval;
1896: }
1897: while(<385050>(n = fieldcode(data(r),key(r), r->dlen,
1898: (uchar*)r->next, fields, nfields)) < 0)
1899: <17>retval = r = newrec(r, retval); /* rare event */
1900: if(<385033>n > MAXREC)
1901: <0>fatal("monster key", "", 0);
1902: <385033>r->klen = n;
1903: return <385033>retval;
1904: }
1905:
1906: static char *level = "warning";
1907:
1908: void
1909: warn(char *m, char *s, int l)
1910: <54>{
1911: <54>fprintf(stderr, "sort: %s: %s %.*s\n",
1912: level, m, l==0?<45>strlen(s):<9>l, s);
1913: <54>}
1914:
1915: void
1916: fatal(char *m, char *s, int l)
1917: <40>{
1918: <40>level = "error";
1919: <40>warn(m, s, l);
1920: if(<40>errno)
1921: <3>perror("");
1922: <40>cleanup(1);
1923: <0>}
1924:
1925: int
1926: overwrite(int j)
1927: <60>{
1928: struct stat sb1, sb2;
1929: if(<60>strcmp(oname, "-") == 0)
1930: return <46>0;
1931: if(<14>stat(oname, &sb1) == -1)
1932: return <4>0;
1933: for( <10>; <29>j<nfiles; <19>j++) {
1934: if(<26>strcmp(files[j], "-") == 0)
1935: <2>continue;
1936: if(<24>stat(files[j], &sb2) == -1)
1937: <0>fatal("cannot locate", files[j], 0);
1938: if(<24>sb1.st_dev==sb2.st_dev && <24>sb1.st_ino==sb2.st_ino)
1939: return <7>1;
1940: }
1941: return <3>0;
1942: }
1943:
1944: static int siglist[] = { SIGHUP, SIGINT, SIGQUIT, SIGPIPE, SIGTERM };
1945:
1946: void
1947: setsigs(void(*f)(int))
1948: <326>{
1949: int i;
1950: for(<326>i=0; <1956>i<sizeof(siglist)/sizeof(*siglist); <1630>i++)
1951: if(<1630>signal(siglist[i], f) == SIG_IGN)
1952: <0>signal(siglist[i], SIG_IGN);
1953: <326>}
1954:
1955: void
1956: cleanup(int i)
1957: <40>{
1958: char *name;
1959: <40>setsigs(SIG_IGN);
1960: while(<40>--nextfile >= 0) {
1961: <0>name = filename(nextfile);
1962: <0>unlink(name);
1963: <0>free(name);
1964: }
1965: <40>exit(i);
1966: <0>}
1967: cgets.c:
1968:
1969: /* Copyright AT&T Bell Laboratories, 1993 */
1970: #include <stdio.h>
1971:
1972: /* same as fgets, but returns a char count instead of first arg.
1973: * robust against null bytes in input */
1974:
1975: int
1976: cgets(char *ptr, int n, FILE *iop)
1977: <627666>{
1978: int l;
1979: char *s = <627666>ptr;
1980: unsigned char *t;
1981: while(<631159>--n > 0) {
1982: <631159>l = iop->_cnt;
1983: if(<631159>l > 0) {
1984: if(<627758>l > n) <277838>l = n;
1985: <627758>t = iop->_ptr;
1986: while(<8783600>--l >= 0 && <8781637>(*s++ = *t++) != '\n')
1987: <8155842>continue;
1988: <627758>l = t - iop->_ptr;
1989: <627758>iop->_ptr = t;
1990: <627758>iop->_cnt -= l;
1991: if(<627758>t[-1] == '\n' || <1963>(n -= l) <= 0)
1992: <625861>break;
1993: }
1994: <5298>l = getc(iop);
1995: if(<5298>l == EOF)
1996: <1612>break;
1997: <3686>*s++ = l;
1998: if(<3686>l == '\n')
1999: <193>break;
2000: }
2001: <627666>*s = 0;
2002: return <627666>s - ptr;
2003: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.