researchv10no/cmd/sort/bprint.out - view

File: [Research Unix] / researchv10no / cmd / sort / bprint.out
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 24 17:21:35 2018 UTC (8 years, 1 month ago) by root
Branches: belllabs, MAIN
CVS tags: researchv10, HEAD

researchv10 Norman

number.c: /* Copyright 1990, AT&T Bell Labs */ /* Canonicalize the number string pointed to by dp, of length len. Put the result in kp. A field of length zero, or all blank, is regarded as 0. Over/underflow is rendered as huge or zero and properly signed. It happens 1e+-1022. Canonicalized strings may be compared as strings of unsigned chars. For good measure, a canonical string has no zero bytes. Syntax: optionally signed floating point, with optional leading spaces. A syntax deviation ends the number. Form of output: packed in 4-bit nibbles. First 3 nibbles count the number N of significant digits before the decimal point. The quantity actually stored is 2048+sign(x)*(N+1024). Further nibbles contain 1 decimal digit d each, stored as d+2 if x is positive and as 10-d if x is negative. Leading and trailing zeros are stripped, and a trailing "digit" d = -1 is appended. (The trailing digit handled like all others, so encodes as 1 or 0xb according to the sign of x.) An odd number of nibbles is padded with zero. Buglet: overflow is reported if output is exactly filled. */ #include "fsort.h" #define encode(x) (neg? 10-(x): (x)+2) #define putdig(x) (nib? (*dig=encode(x)<<4, nib=0): \ (*dig++|=encode(x), nib=1)) static int gflag; int gcode(uchar *dp, uchar*kp, int len, struct field *f) <60>{ int ret; <60>gflag++; <60>ret = ncode(dp, kp, len, f); <60>gflag--; return <60>ret; } int ncode(uchar *dp, uchar*kp, int len, struct field *f) <374264>{ uchar *dig = <374264>kp + 1; /* byte for next digit */ int nib = <374264>0; /* high nibble 1, low nibble 0 */ uchar *cp = <374264>dp; uchar *ep = <374264>cp + len; /* end pointer */ int zeros = <374264>0; /* count zeros seen but not installed */ int sigdig = <374264>1024; int neg = <374264>f->rflag; /* 0 for +, 1 for - */ int decimal = <374264>0; int n, inv; <374264>kp[1] = 0; for( <374264>; <418322>cp<ep ; <44058>cp++) /* eat blanks */ if(<418322>*cp!=' ' && <374272>*cp!='\t') <374264>break; if(<374264>cp < ep) /* eat sign */ switch(<374264>*cp) { case '-': <218>neg ^= 1; case '+': <225>cp++; } for( <374264>; <707527>cp<ep; <333263>cp++) /* eat leading zeros */ if(<707511>*cp != '0') <374248>break; if(<374264>*cp=='.' && <331112>cp<ep) { /* decimal point among lead zeros */ <331112>decimal++; for(<331112>cp++; <367735>cp<ep; <36623>cp++) { if(<367735>*cp != '0') <331112>break; <36623>sigdig--; } } if(<374264>*cp>'9' || <374255>*cp<'0' || <373799>cp>=ep) { /* no sig digit*/ <465>sigdig = 0; <465>neg = 0; <465>goto retzero; } for( <373799>; <2405784>cp<ep; <2031985>cp++) { switch(<2071744>*cp) { default: <39726>goto out; case '.': if(<16>decimal) <2>goto out; <14>decimal++; <14>continue; case '0': <132382>zeros++; if(<132382>!decimal) <3976>sigdig++; <132382>continue; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': for( <1899589>; <2027985>zeros>0; <128396>zeros--) <128396>putdig<65793>(0); <1899589>n = *cp - '0'; <1899589>putdig<929380>(n); if(<1899589>!decimal) <77803>sigdig++; <1899589>continue; case 'e': case 'E': if(<31>!gflag) <6>goto out; <25>inv = 1; if(<25>cp < ep) switch(<25>*++cp) { case '-': <13>inv = -1; case '+': <15>cp++; } if(<25>*cp>'9' || <25>*cp<'0' || <21>cp>=ep) <4>goto out; for(<21>n=0; <62>cp<ep; <41>cp++) { int c = <44>*cp; if(<44>c<'0' || <44>c>'9') <3>break; if(<41>(n = 10*n+c-'0') >= 0) <41>continue; <0>sigdig = 2047*inv; <0>goto out; } <21>sigdig += n*inv; <21>goto out; } } out: if(<373799>sigdig<0 || <373799>sigdig>=2047) { <0>sigdig = sigdig<0? <0>0: <0>2047; <0>warn("numeric field overflow", (char*)dp, len); <0>dig = kp + 1; <0>*dig = 0; <0>nib = 0; } retzero: if(<374264>neg) <219>sigdig = 2048 - sigdig; else <374045>sigdig = 2048 + sigdig; <374264>kp[0] = sigdig >> 4; <374264>kp[1] |= sigdig << 4; <374264>putdig<37639>(-1); return <374264>dig - kp + 1 - nib; } rsort.c: /* Copyright 1990, AT&T Bell Labs */ #include "fsort.h" /* Radix sort by bytes. Records are chained in a list. There are up to 257 bins at each recursion level. The "null bin" is for short strings that have run out; the rest are for byte values 0-255 */ int aflag = 0; int uflag = 0; int rflag = 0; int sflag = 0; /* Sort the list s by distributing according to digit d into an array of bins, sorting the bins recursively, and re-collecting them back into the list. If uflag is nonzero return only 1 representative of each equivalence class. The squeeze step noted below determines what bins are in use, and squeezes those into a stack frame, which is contiguous to s. The null bin is handled separately to cut the span of the squeeze loop. Precondition: length(s) > 1 */ #define tiebreak(t) \ if(uflag) { \ t->tail = !aflag? t->head: \ listaccum(t->head, t->tail); \ t->tail->next = 0; \ } else if(keyed && !sflag && t->head->next) { \ rflag = fields[0].rflag; \ keyed = 0; \ sort(t, 0); \ keyed = 1; \ rflag = 0; \ } else void sort(struct list *s, int d) <126745>{ extern int uflag; struct list *t; struct rec *r, *q; struct list *frame = <126745>s + 1; /* stack frame */ static struct list bins[257]; # define nullbin (bins+256) int nbin; /* number of filled bins */ struct list *low; /* lowest unsorted nonnull bin */ loop: <447576>r = s->head; <447576>low = bins + 256; <447576>nbin = 0; while(<3688466>r) { if(<3240890>keyed) if(<1334292>r->klen > (len_t)d) <1116270>t = bins + key(r)[d]; else <218022>t = nullbin; else if(<1906598>r->dlen > (len_t)d) <1714238>t = bins + data(r)[d]; else <192360>t = nullbin; if(<3240890>t->head == 0) { if(<555902>low > t) <363502>low = t; <555902>t->head = r; <555902>nbin++; } else <2684988>t->tail->next = r; /* stable sort */ <3240890>t->tail = r; <3240890>r = r->next; } <447576>t = frame; /* t is stack top */ if(<447576>t+nbin > stackmax) <0>fatal("stack overflow", "", 0); # define push(b) /* onto stack */ \ *t = *b, \ t->tail->next = 0, \ t++, \ b->head = 0, \ nbin-- if(<447576>nullbin->head) <106517>push(nullbin); for( <447576>; <1381435>nbin>0; <933859>low++) if(<933859>low->head) <449385>push(low); if(<447576>t == frame+1) { /* tail recursion */ /* debug(frame, t, d);*/ <427056>r = frame->head; if(<427056>r->len[keyed] <= d) { <106225>tiebreak(<26635>frame); <106225>*s = *frame; return<106225>; /* string ran out */ } <320831>d++; <320831>goto loop; } /* debug(frame, t, d);*/ <20520>t--; if(<20520>t->head->next) /* skip 1-item list */ <13713>sort(t, d+1); if(<20520>!rflag) { /* forward order */ <15796>r = t->tail; while(<112441>t > frame) { <96645>q = t->head; if(<96645>(--t>frame || <15796>t->head->len[keyed]>d) && <96353>t->head->next) <82209>sort(t, d+1); else if(<14436>t==frame) <3163>tiebreak(t);<295> <96645>t->tail->next = q; /* concatenate */ } <15796>s->head = frame->head; <15796>s->tail = r; } else { /* reverse order */ <4724>r = t->head; while(<16405>t > frame) { <11681>q = t->tail; if(<11681>(--t>frame || <4724>t->head->len[keyed]>d) && <11681>t->head->next) <4070>sort(t, d+1); else if(<7611>t==frame) <3881>tiebreak(t);<1360> <11681>q->next = t->head; /* concatenate */ } <4724>s->head = r; <4724>s->tail = frame->tail; } /* printf("return\n"); debug(s,s+1,d);*/ <20520>} /* debug(struct list *stack, struct list *top, int d) { printf("----------------------level %d\n", d); while(stack<top) { printout(stack->head, stdout, "stdout"); printf("----------------------\n"); stack++; } } */ merge.c: /* Copyright 1990, AT&T Bell Labs */ #include <stdlib.h> #include <string.h> #include "fsort.h" char *disorder = "key out of order"; char *duplicate = "duplicate key"; char *space = "out of space for merging"; enum { IEOF, IOK }; enum { IPRE = 01000, IFOL = 02000 }; /* see find() */ enum { NMERGE = 16 }; int nmerge = NMERGE; /* max number of files to merge at once */ int nextfile; struct merge { /* current record in an open file */ char *name; /* name of file (for diagnostics) */ FILE *file; struct rec *rec; /* pointer to the data (and key) */ short serial; /* file no., breaks ties for stable sort */ short del; /* delete at close */ }; struct merge *mfile; /* one struct per open file */ struct merge **f; /* pointers to mfile, ordered by content */ static void mergephase(int, char*); static int find(int, int*); static int fillrec(struct merge*); static int compare(struct merge*, struct merge*); static void syncerr(struct merge*, char*); extern int link(char*, char*); extern int unlink(char*); static void recinit(int nf, int nm, int n); static void mv(int, int); static void recalloc(struct merge *m) <1241>{ if(<1241>m->rec) return<784>; /* hygiene; doesn't happen */ <457>m->rec = (struct rec*)malloc(MINREC); if(<457>m->rec == 0) <0>fatal(space, "", 0); <457>m->rec->dlen = 0; <457>m->rec->next = (struct rec*)((uchar*)m->rec + MINREC); <457>} /* misfortune : all fields are parsed and converted before comparison. Lazy parsing might be in order, but it's too hard to contemplate */ /* Merge strategy, to merge N inputs in groups of at most M. Numbers, like (1), are keyed to comments in code. If N <= M, merge everything. (4) If N > M*(M-1)+1 greedily merge M at a time (1) to reduce to the next case. Merge x bunches of M (3) and one bunch of y (2), then merge the x+1 new files (4) with the remaining z files in an exactly M-way merge. Calculate x,y,z thus: Mx + y + z = N file count x + 1 + z = M final merge 2 <= y <= M x >= 0 z >= 0 by algebra (M-1)x + y = N-M+1 y == (N-M+1) mod (M-1), 2 <= y <= M x == (n-M+1-y)/(M-1) Total cost is Mx + y + N. (Strategy due to P. McIlroy.) */ /* merge n files. the first unmerged temp file is nm (i.e. nm is the number of already merged files). the first unmerged input file is nf. rename temp files to keep a dense set beginning at 0 (It seems silly to rename 100 files after merging every 1; how much time does that actually cost?) */ void merge1(int nf, int nm, int n) <161>{ char *name; int i, j; int flag = <161>nf<nfiles; <161>recinit(nf, nm, n); <161>name = filename(nextfile++); <161>mergephase(n, name); <161>free(name); if(<161>flag) return<137>; <24>mv(--nextfile, nm); for(<24>i=nm+n, j=nm+1; <180>i<nextfile; <156>i++, j++) <156>mv(i, j); <24>nextfile -= n - 1; <24>} /* if flag = 0 merge input files; else temps only. this program is specialized to two levels of merge; should fix that some day. */ void merge(int flag) <68>{ char buf[BUFSIZ]; FILE *input, *output; int n, y; char *name; int nf = <68>0; /* next input file to merge */ int nm = <68>0; /* total already merged once */ int nt = <68>nfiles; /* files as yet unmerged */ if(<68>flag) { <21>nf = nfiles; <21>nt = nextfile; } if(<68>mfile == 0) <49>mfile = (struct merge*)calloc(nmerge+1, sizeof(*mfile)); if(<68>f == 0) <49>f = (struct merge**)calloc(nmerge+1, sizeof(*f)); if(<68>mfile==0 || <68>f==0) <0>fatal(space,"",0); if(<68>nt > nmerge*(nmerge-1)+1) { /* (1) */ for(<19>nm=0; <135>nt>0; <116>) { <116>n = nt>nmerge? <97>nmerge: <19>nt; <116>merge1(nf, nm, n); if(<116>flag) <0>nf += n; <116>nt -= n; <116>nm++; } <19>merge(1); return<19>; } if(<49>nt > nmerge) { /* (2) */ <30>y = (nt-nmerge+1)%(nmerge-1); if(<30>y <= 1) <14>y += nmerge-1; <30>merge1(nf, nm, y); <30>nf = nf+y>=nfiles? <18>nfiles: <12>nf+y; <30>nt -= y; <30>nm++; } while(<64>nt+nm > nmerge) { /* (3) */ <15>merge1(nf, nm, nmerge); <15>nf = nf+nmerge>=nfiles? <6>nfiles: <9>nf+nmerge; <15>nt -= nmerge; <15>nm++; } <49>n = nm+nt; /* (4) */ <49>recinit(nf, 0, n); if(<49>!overwrite(nf)) <46>name = oname; else <3>name = filename(nextfile++); <49>mergephase(n, name); if(<48>name == oname) return<45>; <3>input = fileopen(name, "r"); <3>output = fileopen(oname, "w"); while(<88>n = fread(buf, 1, sizeof(buf), input)) if(<85>fwrite(buf, 1, n, output) != n) <0>fatal("error writing", oname, 0); <3>fileclose(input, name); <3>unlink(name); <3>free(name); <3>fileclose(output, oname); <3>} /* Initialize merge records for n files, beginning with input file number nf and temp file number nm. There are nfiles-nf input files; any files beyond that number must be temporaries, which come from already merged inputs. (The only time that both kinds of files are present is the very last merge, when for stability, the temps are regarded as earlier.) */ static void recinit(int nf, int nm, int n) <210>{ int i; struct merge *m; int last = <210>nfiles-nf + nextfile-nm == n; for(<210>i=0; <1277>i<=n; <1067>i++) /* one extra, for mergephase() */ <1067>recalloc(&mfile[i]); for(<210>i=0; <1067>i<n; <857>i++) { <857>m = &mfile[i]; <857>m->del = nf>=nfiles || <635>last && <126>i<nextfile; <857>m->name = m->del? <243>filename(nm++): <614>files[nf++]; <857>m->file = fileopen(m->name, "r"); <857>m->serial = i; <857>f[i] = m; } <210>f[n] = &mfile[n]; <210>} static void mv(int i, int j) <180>{ char *old, *new; if(<180>i == j) return<0>; /* believed not to happen */ <180>old = filename(i); <180>new = filename(j); <180>unlink(new); if(<180>link(old,new) == -1 || <180>unlink(old) == -1) <0>fatal("cannot move", old, 0); <180>free(old); <180>free(new); <180>} static void emit(FILE *output) <22296>{ struct merge *m = <22296>f[0]; int n = <22296>m->rec->dlen; uchar *p = <22296>data(m->rec); /* hanky panky for speed */ uchar *e = <22296>p + n++; /* use fwrite instead of */ int c = <22296>*e; /* printf */ <22296>*e = '\n'; if(<22296>fwrite((char*)p, 1, n, output) != n) <0>fatal("error writing", m->name, 0); <22296>*e = c; <22296>} /* Merge files in mfile[0]..mfile[n-1]. f[0]..f[l-1] points to files in increasing order of their current records. All the current records are strictly ordered, either by tie-breaking, or by skipping records that compare equal under -u. There are two phases: (a) initialization (reading from a file when there is no record at hand) and (b) continuation (reading the next record with the intent of displacing the current record). These conditions arise, at places numbered in comments (1) the record compares equal to another, and would follow that other if ties were broken (2) the record compares equal to another, and would precede that other if ties were broken (3) the record falls between two others At all times the keys pointed to by f[0]..f[l] are distinct (perhaps due to tie-breaking), as are the files they come from. When a new key is read, its proper home (in tie-broken order) is between f[j-1] and f[j]. The picture below shows the hardest case (2b), where key a from file F is about to be emitted. All keys in the sequence axby are distinct, as are the files they come from. A new key is read from file F into the extra space pointed to by f[i]. (Keeping one extra key allows disorder to be detected almost for free.) The new key is equal to key b pointed to by f[j] and file F precedes file G in stability order. Thus b(F) deserves to be kept and b(G) discarded. 0 j l=n -------------------------------------------- | a(F) | x | b(G) | y | b(F) | -------------------------------------------- We emit the record at 0, and displace the key at j, from whose file there is now no record present; l decreases by one. 0 j l n -------------------------------------------- | x | b(F) | y | ?(G) | | -------------------------------------------- This takes us back to initialization. From this configuration it is possible to reach case (2a), like (2b) except that the key at 0 is not to be emitted; (2a) cannot happen during the original initialization. */ #define moveup(a,n) memmove(a+1, a, (n)*sizeof(*a)) #define movedown(a,n) memmove(a-1, a, (n)*sizeof(*a)) static void mergephase(int n, char *name) <210>{ int j, flags; struct merge *mp; struct rec *r; int l = <210>0; int k = <210>-1; /* for spotting sync errors */ FILE *output = fileopen(<210>name, "w"); init: while(<19606>l < n) { /*(a)*/ <19124>mp = f[l]; if(<19124>fillrec(mp) == IEOF) { <52>movedown(f+l+1, n-l); <52>f[n--] = mp; <52>continue; } <19072>j = find(l, &flags); if(<19072>j <= k) <0>syncerr(mp, disorder); if(<19072>flags & IFOL) { /*(1a)*/ if(<17234>!aflag || <17169>doaccum(f[j-1]->rec, mp->rec)) <17234>continue; } else if(<1838>flags & IPRE) /*(2a)*/ if(<761>!aflag || <758>doaccum(mp->rec, f[j]->rec)) { <761>f[l] = f[j]; <761>f[j] = mp; <761>k = j; <761>continue; } <1077>moveup(f+j, l-j); /*(3a)*/ <1077>f[j] = mp; <1077>l++; } while(<24714>n > 0) { /*(b)*/ <24505>mp = f[l]; <24505>r = mp->rec; <24505>*mp = *f[0]; <24505>mp->rec = r; if(<24505>fillrec(mp) == IEOF) { <803>emit(output); <803>mp = f[0]; <803>movedown(f+1, l); <803>f[--n] = mp; <803>l = n; <803>continue; } <23702>j = find(l, &flags); if(<23702>j == 0) <1>syncerr(mp, disorder); if(<23701>flags & IFOL) { /*(1b)*/ if(<2208>!aflag || <1906>doaccum(f[j-1]->rec, mp->rec)) <2208>continue; } else if(<21493>flags & IPRE) /*(2b)*/ if(<272>!aflag || <107>doaccum(mp->rec, f[j]->rec)) { <272>emit(output); <272>f[l] = f[j]; <272>f[j] = mp; <272>mp = f[0]; <272>movedown(f+1, l); <272>f[l--] = mp; <272>k = j - 1; <272>goto init; } <21221>emit(output); /*(3b)*/ <21221>mp = f[0]; <21221>movedown(f+1, j-1); <21221>f[j-1] = f[l]; <21221>f[l] = mp; } <209>fileclose(output, name); <209>} static int fillrec(struct merge *mp) <284696>{ struct rec *r = <284696>getline(mp->rec, mp->file); if(<284696>r == 0) return <283702>IOK; if(<994>r == ENDFILE) { <937>fileclose(mp->file, mp->name); if(<937>mp->del) <243>unlink(mp->name); return <937>IEOF; } <57>mp->rec = r; return <57>IOK; } /* Return the proper index for the extra record f[l] to be inserted before (regardless of -u). Under -u, if the extra record is a duplicate, return the index flagged by IFOL or IPRE according as the insertion position follows or precedes a record of the same value. */ static int find(int l, int *flags) <42774>{ int i, t; int bot = <42774>0; int top = <42774>l; while(<145791>bot < top) { <123492>i = (bot+top)/2; <123492>t = compare(f[l], f[i]); if(<123492>t < 0) <55383>top = i; else if(<68109>t > 0) <39369>bot = i + 1; else if(<28740>uflag) { if(<20475>f[l]->serial < f[i]->serial) { <1033>*flags = IPRE; return <1033>i; } else { <19442>*flags = IFOL; return <19442>i + 1; } } else if(<8265>f[l]->serial < f[i]->serial) <2917>top = i; else <5348>bot = i + 1; } <22299>*flags = 0; return <22299>top; } static int compare(struct merge *mi, struct merge *mj) <364393>{ uchar *ip, *jp; uchar *ei, *ej; uchar *trans, *keep; int li, lj, k; if(<364393>simplekeyed) { <19>trans = fields->trans; <19>keep = fields->keep; <19>ip = data(mi->rec); <19>jp = data(mj->rec); <19>ei = ip + mi->rec->dlen; <19>ej = jp + mj->rec->dlen; for( <19>; <13>; <13>ip++, jp++) { while(<35>ip<ei && <29>!keep[*ip]) <3>ip++; while(<35>jp<ej && <29>!keep[*jp]) <3>jp++; if(<32>ip>=ei || <26>jp>=ej) <7>break; <25>k = trans[*ip] - trans[*jp]; if(<25>k != 0) <12>break; } if(<19>ip<ei && <13>jp<ej) return <12>k*signedrflag; else if(<7>ip < ei) return <1>signedrflag; else if(<6>jp < ej) return <1>-signedrflag; else if(<5>sflag) return <4>0; } else if(<364374>keyed) { <194802>ip = key(mi->rec); <194802>jp = key(mj->rec); <194802>li = mi->rec->klen; <194802>lj = mj->rec->klen; for(<194802>k=li<lj?<1091>li:<193711>lj; <901648>--k>=0; <706846>ip++, jp++) if(<807963>*ip != *jp) <101117>break; if(<194802>k < 0) { if(<93685>li != lj) <0>fatal("theorem disproved","",0); if(<93685>sflag) return <24419>0; } else return <101117>*ip - *jp; } <238839>li = mi->rec->dlen; <238839>lj = mj->rec->dlen; <238839>ip = data(mi->rec); <238839>jp = data(mj->rec); for(<238839>k=li<lj?<7726>li:<231113>lj; <1907768>--k>=0; <1668929>ip++, jp++) if(<1764308>*ip != *jp) <95379>break; return <238839>(k<0? <143460>li-lj: <95379>*ip-*jp)*signedrflag; } void check(char *name) <87>{ int i, t; <87>mfile = (struct merge*)calloc(2,sizeof(*mfile)); <87>recalloc(&mfile[0]); <87>recalloc(&mfile[1]); <87>mfile[0].name = mfile[1].name = name; <87>mfile[0].file = mfile[1].file = fileopen(name, "r"); if(<87>mfile[0].file == 0) <0>fatal("can't open ", name, 0); if(<87>fillrec(&mfile[0]) == IEOF) return<3>; for(<84>i=1; <240980>fillrec(&mfile[i])!=IEOF; <240896>i^=1) { <240901>t = compare(&mfile[i^1], &mfile[i]); if(<240901>t>0) <4>syncerr(&mfile[i], disorder); else if(<240897>t==0 && <138849>uflag) <1>syncerr(&mfile[i], duplicate); } <79>} static void syncerr(struct merge *m, char *message) <6>{ int n = <6>m->rec->dlen; <6>warn("trouble in file", m->name, 0); <6>fatal(message, n?<6>(char*)data(m->rec):"", n); <0>} optiona.c: #include <float.h> #include <math.h> #include <ctype.h> #include <string.h> #include <stdlib.h> #include "fsort.h" /* portability hack for old libc's */ #define realloc(p,q) (p?realloc(p,q):malloc(q)) struct field accum[NF]; int naccum; typedef struct { uchar *e; /* last+1st char */ signed char s; /* -1 for neg, 1 for pos */ char c; /* 1 if + sign, else 0 */ char z; /* 1 for leading 0, else 0 */ char p; /* nonzero if a decimal point */ int f; /* number of digits after dec. pt. */ } desc; #define max(a, b) ((a)>(b)?(a):(b)) #define mod10(a) (((a)+20)%10) #define div10(a) (((a)+20)/10-2) /* find least significant digit and other facts about a number in field beginning at a and ending just before e */ desc lsd(uchar *a, uchar *e) <99690>{ desc d = <99690>{ 0, 1, 0, 0, 0, 0 }; while(<271072>a<e && <269069>(*a==' '||<97687>*a=='\t')) <171382>a++; if(<99690>a >= e) <2003>; else if(<97687>*a == '-') { <7505>d.s = -1; <7505>a++; } else if(<90182>*a == '+') { <3>d.c = 1; <3>a++; } if(<99690>a+1<e && <51891>*a=='0' && <13>isdigit(a[1])) d.z = 1; while(<299631>a<e && <199978>isdigit(*a)) <199941>a++; if(<99690>a<e && <37>*a=='.') { <13>d.p = 1; while(<38>++a<e && <25>isdigit(*a)) <25>d.f++; } <99690>d.e = a; return <99690>d; } /* add the fields at a and b as desc-ribed, putting result, with decimal point omitted, right justified before re. calculates in 10's complement and recomplements magnitude if negative. set *sgn -1 for neg, 1 for signed +, 0 for unsigned. */ uchar * add(uchar *re, uchar *a, desc ad, uchar *b, desc bd, int *sgn) <49845>{ int d = <49845>0; uchar *t; uchar *r = <49845>re; while(<49848>ad.f > bd.f) { <3>d += (*--ad.e - '0')*ad.s; <3>*--re = mod10(d); <3>d = div10(d); <3>ad.f--; } while(<49861>bd.f > ad.f) { <16>d += (*--bd.e - '0')*bd.s; <16>*--re = mod10(d); <16>d = div10(d); <16>bd.f--; } while(<220476>ad.e>a || <50616>bd.e>b) { if(<170631>ad.f-- == 0) { if(<49843>ad.p) <6>ad.e--; if(<49843>bd.p) <7>bd.e--; } if(<170631>ad.e > a) if(<169860>isdigit(*--ad.e)) <139640>d += (*ad.e - '0')*ad.s; else <30220>ad.e = a; if(<170631>bd.e > b) if(<109780>isdigit(*--bd.e)) <60307>d += (*bd.e - '0')*bd.s; else <49473>bd.e = b; <170631>*--re = mod10(d); <170631>d = div10(d); } <49845>*sgn = ad.c | bd.c; if(<49845>d > 0) <1>*--re = 1; /* happens only on overflow */ else if(<49844>d < 0) { int x = <4997>10; <4997>*sgn = -1; for(<4997>t = r; <33242>--t>=re; <28245>) { <28245>*t = (x - *t)%10; if(<28245>*t != 0) <21111>x = 9; } if(<4997>d < -1) <0>*--re = 1; /* hygiene, can't happen */ } while(<81359>re<r && <79328>*re==0) <31514>re++; return <49845>re; } int fieldaccum(uchar *a, uchar *ae, uchar *b, uchar *be) <49845>{ static uchar *r, *re; /* not pure ansi (re-r=nil-nil=?) */ desc da = <49845>lsd(a, ae); desc db = <49845>lsd(b, be); int sgn, Sgn, f; uchar *v; while(<51247>re-r < max(da.e-a+db.f, db.e-b+da.f) <26>+ 1) { int l = <1402>re - r + 100; <1402>r = (uchar*)<1387>realloc(r, l); if(<1402>r == 0) <0>fatal("out of space","",0); <1402>re = r + l; } <49845>f = max(da.f, db.f);<3> <49845>v = add(re, a, da, b, db, &sgn); <49845>Sgn = sgn != 0; if(<49845>Sgn+max(re-v,f+1)+<46132>(da.p|<3713>db.p) > ae-a) return <1>0; while(<49860>re>v && <47829>--f>=0) <16>*--ae = *--re + '0'; while(<49850>--f >= 0) <6>*--ae = '0'; if(<49844>da.p | db.p) <10>*--ae = '.'; if(<49844>re <= v) <2031>*--ae = '0'; while(<188963>re > v) <139119>*--ae = *--re + '0'; if(<49844>da.z | db.z) while(<16>ae > a+Sgn) <9>*--ae = '0'; if(<49844>sgn > 0) <3>*--ae = '+'; else if(<49841>sgn < 0) <4997>*--ae = '-'; if(<49844>ae<a || <49844>v<r) <0>fatal("bug in accumulation","",0); while(<93111>ae > a) <43267>*--ae = ' '; return <49844>1; } void chkaccum(struct field *field) <34>{ if(<34>field->coder == 0) <34>field->coder = acode; if(<34>field->coder != acode) <0>goto bad; if(<34>field->trans == 0) <33>field->trans = ident; if(<34>field->trans != ident) <1>goto bad; if(<33>field->keep == 0) <31>field->keep = all; if(<33>field->keep != all) <2>goto bad; if(<31>field->rflag) <1>goto bad; return<30>; bad: <4>fatal("improper modifier with -a","",0); return<0>; } /* acode is unlike other coding functions. instead of making a key in the place pointed to by op, it makes a list of field locations. it might be more honest if op were declared void* */ struct loc { uchar *from, *to; }; int acode(uchar *ip, uchar *op, int len, struct field *f) <99690>{ struct loc *lp = <99690>(struct loc*)op; <99690>lp->from = ip; <99690>lp->to = ip + len; if(<99690>!tab && <99690>!f->bflag && <99686>f->begin.fieldno>0 && <99560>f->begin.charno==0) <99556>lp->from++; return <99690>sizeof(*lp); } int doaccum(struct rec *arec, struct rec *brec) <44842>{ int i; struct loc aloc[NF], bloc[NF]; <44842>fieldcode(data(arec), (uchar*)aloc, arec->dlen, (uchar*)-1, accum, naccum); <44842>fieldcode(data(brec), (uchar*)bloc, brec->dlen, (uchar*)-1, accum, naccum); for(<44842>i=0; <94686>i<naccum; <49844>i++) if(<49845>!fieldaccum(aloc[i].from, aloc[i].to, bloc[i].from, bloc[i].to)) { if(<1>i==0) <1>warn("sum too long; record kept: ", (char*)data(brec), brec->dlen); else <0>fatal("sum too long on adding: ", (char*)data(brec), brec->dlen); return <1>0; } return <44841>1; } struct rec * listaccum(struct rec *head, struct rec *tail) <134>{ struct rec *q = <134>head; struct rec *p = <134>head; for(<134>;<24902>;<24902>) { if(<25036>q == tail) <134>break; <24902>q = q->next; if(<24902>doaccum(p, q)) <24901>p->next = q->next; else <1>p = q; } return <134>p; } fsort.c: /* Copyright 1990, AT&T Bell Labs */ #include <stdlib.h> #include <string.h> #include <ctype.h> #include "fsort.h" int mflag = 0; int cflag = 0; int keyed = 0; extern void readin(void); extern void dumptotemp(void); extern void sealstack(struct rec *p); extern void filelist(int, char**); extern int getopt(int, char*const*, const char*); extern char *optarg; extern int optind; FILE *input; char *oname = "-"; char *tname[] = { "/usr/tmp"/*substitutable*/, "/usr/tmp", "/tmp", 0 }; char **files; int nfiles; char *nofiles[] = { "-", 0 }; main(int argc, char **argv) <311>{ int c, n; static char s[3] = { '-' }; for(<311>;<590>;<590>) { if(<901>optind<argc && <873>argv[optind][0]=='+' && <26>isdigit(argv[optind][1])) { <26>optind += fieldarg(argv[optind], argv[optind+1]); <26>continue; } <875>n = optind; /* for sake of case '?' */ <875>c = getopt(argc,argv,"a:bcdfgik:mno:rst:uw:y:MT:"); switch(<875>c) { case '?': <16>fatal("bad option", argv[n], 0); default: <0>fatal("implementation error 1","",0); case 'b': case 'd': case 'f': case 'g': case 'i': case 'n': case 'M': case 'r': <97>s[1] = c; <97>fieldarg(s, 0); <97>continue; case 'c': <88>cflag++; <88>continue; case 'm': <49>mflag++; <49>continue; case 's': <9>sflag++; <9>continue; case 'u': <17>uflag++; <17>sflag++; <17>continue; case 'a': <35>aflag++; <35>uflag++; <35>sflag++; <35>optionk(optarg, accum, &naccum); <35>continue; case 'k': <181>optionk(optarg, fields, &nfields); <175>continue; case 'o': <5>oname = optarg; <5>continue; case 'T': <1>tname[0] = optarg; <1>continue; case 'w': <33>nmerge = atoi(optarg); if(<33>nmerge < 2) <0>fatal("-w too small","",0); <33>continue; case 'y': <5>optiony(optarg); <5>continue; case 't': if(<51>strlen(optarg) != 1) <1>fatal("bad argument for -t", "", 0); <50>tab = optarg[0]; <50>continue; case 'z': <0>warn("nonstandard option -z ignored","",0); case -1: <288>break; } <288>break; } <288>filelist(argc, argv); if(<288>cflag) <88>aflag = 0; <288>fieldwrapup(); <282>tabinit(); <282>setsigs(cleanup); if(<282>cflag) { if(<88>nfiles > 1) <1>fatal("-c takes just one file", "", 0); <87>check(files[0]); return <82>0; } if(<194>mflag) { <47>merge(0); return <46>0; } for(<147>n=0; <820>n<nfiles; <673>n++) { <676>input = fileopen(files[n], "r"); <674>readin(); <674>fileclose(input, files[n]); } if(<144>stack->head==0 && <12>nextfile==0) { /* empty input */ if(<12>strcmp(oname,"-") != 0) <4>fileclose(fileopen(oname, "w"), oname); return <12>0; } if(<132>stack->head && <132>stack->head->next) <120>sort(stack, 0); if(<132>nextfile > 0) { if(<2>stack->head) <2>dumptotemp(); <2>tabfree(); <2>merge(1); } else { FILE *f; <130>f = fileopen(oname, "w"); <129>printout(stack->head, f, oname); <129>fileclose(f, oname); } return <131>0; } void filelist(int argc, char **argv) <288>{ int i, j; <288>files = nofiles; <288>nfiles = argc - optind; if(<288>nfiles != 0) <260>files = argv + optind; else <28>nfiles = 1; if(<288>strcmp(argv[optind-1], "--") == 0) return<2>; for(<286>i=j=0; <1673>i<nfiles; <1387>i++) { if(<1387>strncmp(files[i], "-o", 2) == 0) { if(<6>files[i][2] != 0) <1>oname = files[i] + 2; else if(<5>i < argc-1) <5>oname = files[++i]; else <0>fatal("incomplete -o", "", 0); } else <1381>files[j++] = files[i]; } <286>files[j] = 0; <286>nfiles = j; <286>} void readin(void) <674>{ int n; struct rec *new; struct rec *p = <674>stack->tail; struct rec *r = <674>p? <529>succ(p): buffer; for(<674>;<342230>;<342230>) { if(<342904>bufmax-(uchar*)r < MINREC) { <78>sealstack(p); <78>dumptotemp(); <78>p = 0; <78>r = buffer; } <342904>r->next = (struct rec*)bufmax; <342904>new = getline(r, input); recenter: if(<342906>new == 0) { <342230>r->next = 0; if(<342230>p) <342017>p->next = r; <342230>p = r; <342230>r = succ(r); } else if(<676>new == ENDFILE) { <674>sealstack(p); return<674>; } else { <2>sealstack(p); <2>dumptotemp(); <2>p = 0; <2>r = buffer; <2>n = data(new)-(uchar*)new+new->dlen+new->klen; if(<2>(uchar*)r+n > bufmax) <0>fatal("monster record", "", 0); <2>memmove(r, new, n); <2>free(new); <2>new = 0; <2>goto recenter; } } } void sealstack(struct rec *p) <754>{ if(<754>p == 0) return<12>; <742>p->next = 0; if(<742>stack->head == 0) <213>stack->head = buffer; <742>stack->tail = p; <742>} void printout(struct rec *r, FILE *f, char *name) <211>{ int c, n; uchar *dp, *ep; for( <211>; <248661>r; <248450>r=r->next) { <248450>dp = data(r); <248450>n = r->dlen; <248450>ep = dp + n++; <248450>c = *ep; <248450>*ep = '\n'; if(<248450>fwrite((char*)dp, 1, n, f) != n) <0>fatal("error writing", name, 0); <248450>*ep = c; } <211>} void dumptotemp() <82>{ char *tempfile = <82>filename(nextfile++); FILE *temp = fileopen(<82>tempfile,"w"); if(<82>stack->head == 0) <0>fatal("monster record", "", 0); <82>stack->tail->next = 0; /* for good measure */ <82>sort(stack, 0); <82>printout(stack->head, temp, tempfile); <82>fileclose(temp, tempfile); <82>free(tempfile); <82>stack->head = stack->tail = 0; return<82>; } field.c: /* Copyright 1990, AT&T Bell Labs */ #include <stdlib.h> #include <ctype.h> #include "fsort.h" static char *modifiers(struct field*, char*, int); static char *keyspec(struct pos*, char*); static void globalmods(struct field*); static void chkfieldno(struct field*); struct field fields[NF] = { { 0, 0, 0, 0, 0, 0, 0, { 0, 0 }, { NP, 0 } } }; int nfields = 0; int tab; int signedrflag; int simplekeyed; #define blank(p) (*(p)==' ' || *(p)=='\t') enum { OLD, NEW }; /* interpret 1 or 2 arguments and return how many */ int fieldarg(char *argv1, char *argv2) <123>{ char *av1 = <123>argv1; char *av2 = <123>argv2; struct field *field; if(<123>av1[0] == '+' && <26>isdigit(av1[1])) { if(<26>++nfields >= NF) <0>fatal("too many fields", argv1, 0); <26>field = &fields[nfields]; <26>field->end.fieldno = NP+1; <26>field->style = OLD; <26>av1 = keyspec(&field->begin, av1+1); if(<26>*modifiers(field, av1, 0)) <0>goto bad; if(<26>av2==0 || <25>av2[0]!='-' || <12>!isdigit(av2[1])) return <14>1; <12>av2 = keyspec(&field->end, av2+1); <12>argv1 = argv2; /* in case of diagnostic */ if(<12>*modifiers(field, av2, 1)) <0>goto bad; return <12>2; } else if(<97>*modifiers(fields, av1+1, -1)) <0>goto bad; /* believed not to happen */ return <97>1; bad: <0>fatal("bad field specification", argv1, 0); return <0>0; /* dummy */ } void optionk(char *arg, struct field *fields, int *nfields) <216>{ char *a = <216>arg; struct field *field; if(<216>++*nfields >= NF) <0>fatal("too many fields", arg, 0); <216>field = &fields[*nfields]; <216>field->begin.charno = 1; <216>field->end.fieldno = NP+1; <216>field->style = NEW; <216>a = keyspec(&field->begin, a); <213>a = modifiers(field, a, 0); if(<213>*a == ',') { <116>a = keyspec(&field->end, a+1); <115>a = modifiers(field, a, 1); } if(<212>*a == 0) return<210>; bad: <2>fatal("bad -k specification", arg, 0); <0>} static char * keyspec(struct pos *p, char *arg) <370>{ if(<370>!isdigit(*arg)) <3>fatal("missing field number", "", 0); <367>p->fieldno = strtoul(arg, &arg, 10); if(<367>*arg == '.') if(<69>!isdigit(*++arg)) <1>fatal("missing character number", "", 0); else <68>p->charno = strtoul(arg, &arg, 10); return <366>arg; } /* keyed = 1 if there are fields present (+ options) or if numeric (-ng), translation (-f) or deletion (-idb) options are present. In these cases, a separate key is constructed for rsort. The key, however is not carried on intermediate files. (It would be interesting to try.) It must be reconstructed for the merge phase, and that may be expensive, since relatively few comparisons happen in that phase. simplekeyed = 1 if there are options, so that pure ascii comparison won't work, but no fields, no months, no numerics. */ void fieldwrapup(void) <288>{ int i; if(<288>nfields==0 && <145>aflag) <1>fatal("-a without -k", "", 0); if(<287>fields->coder == 0) <262>fields->coder = tcode; if(<287>fields->trans == 0) <270>fields->trans = ident; if(<287>fields->keep == 0) <269>fields->keep = all; for(<287>i=1; <487>i<=nfields; <200>i++) { <201>globalmods(&fields[i]); <201>chkfieldno(&fields[i]); } for(<286>i=1; <316>i<=naccum; <30>i++) { <34>chkaccum(&accum[i]); <30>chkfieldno(&accum[i]); } <282>signedrflag = fields->rflag? <24>-1: <258>1; /* used only by merge.c*/ <282>simplekeyed = nfields==0 && <144>fields->coder==tcode && <119>(fields->trans!=ident || <112>fields->keep!=all); if(<282>nfields==0 && <144>!keyed) /* used only by rsort.c */ <109>rflag = fields->rflag; if(<282>nfields > 0) <138>keyed = 1; <282>} static void conflict(void) <4>{ <4>warn("conflicting key types", "", 0); <4>} static void dupla(uchar **oldp, uchar *new) <48>{ if(<48>*oldp != 0 && <1>*oldp != new) <1>conflict(); <48>*oldp = new; <48>} static void duplb(int (**oldp)(uchar*,uchar*,int,struct field*), int (*new)(uchar*,uchar*,int,struct field*)) <56>{ if(<56>*oldp != 0 && <2>*oldp != new) <2>conflict(); <56>*oldp = new; <56>} /* eflag=-1 global flags, =0 field start, =1 field end */ static char * modifiers(struct field *field, char *argv1, int eflag) <463>{ for( <463>; <629>*argv1; <166>argv1++) { switch(<284>*argv1) { case 'b': if(<22>eflag==1) <6>field->eflag = 1; else <16>field->bflag = 1; <22>goto ckglob; case 'r': <40>field->rflag = 1; <40>goto ckglob; case 'f': <23>dupla(&field->trans, fold); <23>break; case 'd': <20>dupla(&field->keep, dict); <20>break; case 'i': <5>dupla(&field->keep, ascii); <5>break; case 'g': <10>duplb(&field->coder, gcode); <10>break; case 'n': <38>duplb(&field->coder, ncode); <38>break; case 'M': <8>duplb(&field->coder, Mcode); <8>break; default: <118>goto done; } <104>keyed = 1; ckglob: if(<166>field==fields && <97>nfields>0) <2>warn("field spec precedes global option",argv1,1); } done: if(<463>field->coder==ncode && <40>field->keep) <1>conflict(); return <463>argv1; } static void globalmods(struct field *field) <201>{ int flagged = <201>field->bflag | field->eflag | field->rflag; if(<201>!field->coder) <172>field->coder = tcode; else <29>flagged++; if(<201>!field->trans) <196>field->trans = ident; else <5>flagged++; if(<201>!field->keep) <197>field->keep = all; else <4>flagged++; if(<201>!flagged) { <146>field->coder = fields->coder; <146>field->trans = fields->trans; <146>field->keep = fields->keep; <146>field->rflag = fields->rflag; <146>field->bflag = fields->bflag; if(<146>field->style == NEW) <126>field->eflag = fields->bflag; } <201>} /* convert field representation from numbers given in arguments to a 0-origin first,last+1 representation, with a negative quantity for a character offset to the end of this field */ static void chkfieldno(struct field *field) <231>{ if(<231>field->style == NEW) { if(<205>--field->begin.fieldno < 0 || <204>--field->begin.charno < 0 || <204>--field->end.fieldno < 0) <1>fatal("improper 0 in field specifier", "", 0); if(<204>field->end.charno == 0) <180>field->end.charno--; } else if(<26>field->end.charno==0 && <22>field->end.fieldno>0) { if(<22>tab && <8>field->eflag) <0>fatal("skipping blanks right after tab char" " is ill-defined", "", 0); <22>field->end.fieldno--; <22>field->end.charno--; } if(<230>field->begin.fieldno > NP) <0>field->begin.fieldno = NP; if(<230>field->end.fieldno > NP) <0>field->end.fieldno = NP; /* fprintf(stderr,"%d %d.%d,%d.%d\n",field-fields,field->begin.fieldno, field->begin.charno,field->end.fieldno, field->end.charno);*/ <230>} int fieldcode(uchar *dp, uchar *kp, int len, uchar *b, struct field *fields, int nfields) <474734>{ uchar *posns[NP+1]; /* field start positions */ uchar *cp; struct field *field; uchar *op = <474734>kp; uchar *ep; uchar *bound = <474734>kp + MAXREC; int i; int np; if(<474734>bound > b) <155959>bound = b; <474734>posns[0] = dp; if(<474734>tab) for(<304>np=1, i=len, cp=dp; <6355>i>0 && <6051>np<NP; i-<6051>-) { if(<6051>*cp++ != tab) <4411>continue; <1640>posns[np++] = cp; } else for(<474430>np=1, i=len, cp=dp; <1110378>i>0 && <635948>np<NP; ) <635948>{ while(<1228760>blank(cp) && i><636001>0) <592812>cp++, i--; while(<3943779>!blank(cp) && i><3782124>0) <3307831>cp++, i--; <635948>posns[np++] = cp; } if(<474734>nfields > 0) <143100>field = &fields[1]; else <331634>field = &fields[0]; <474734>i = nfields; do { int t = <486184>field->begin.fieldno; uchar *xp = <486184>dp + len; if(<486184>t < np) { <486176>cp = posns[t]; if(<486176>field->bflag && <144>nfields) while(<454>cp<xp && <454>blank(cp)) <316>cp++; <486176>cp += field->begin.charno; if(<486176>cp > xp) <12>cp = xp; } else <8>cp = xp; <486184>t = field->end.fieldno; if(<486184>t < np) { if(<112104>field->end.charno < 0) { if(<111816>t >= np-1) <25>ep = xp; else { <111791>ep = posns[t+1]; if(<111791>tab) <22>ep--; } } else { <288>ep = posns[t]; if(<288>field->eflag) while(<262>ep<xp && <262>blank(ep)) <186>ep++; <288>ep += field->end.charno; } if(<112104>ep > xp) <28>ep = xp; else if(<112076>ep < cp) <10>ep = cp; } else <374080>ep = xp; <486184>t = ep - cp; if(<486184>field->coder != acode && <386494>op+room(t) > bound) return <17>-1; <486167>op += (*field->coder)(cp, op, ep-cp, field); <486167>field++; } while(<486167>--i > 0); return <474717>op - kp; } /* Encode text field subject to options -r -fdi -b. Fields are separated by 0 (or 255 if rflag is set) the anti-ambiguity stuff prevents such codes from happening otherwise by coding real zeros and ones as 0x0101 and 0x0102, and similarly for complements */ int tcode(uchar *dp, uchar *kp, int len, struct field *f) <12156>{ uchar *cp = <12156>kp; int c; uchar *keep = <12156>f->keep; uchar *trans = <12156>f->trans; int reverse = <12156>f->rflag? <106>~0: <12050>0; while(<211231>--len >= 0) { <199075>c = *dp++; if(<199075>keep[c]) { <198337>c = trans[c]; if(<198337>c <= 1) { /* anti-ambiguity */ <4>*cp++ = 1^reverse; <4>c++; } else if(<198333>c >= 254) { <4>*cp++ = 255^reverse; <4>c--; } <198337>*cp++ = c^reverse; } } <12156>*cp++ = reverse; return <12156>cp - kp; } static char *month[] = { "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec" }; int Mcode(uchar *dp, uchar *kp, int len, struct field *f) <57>{ int j = <57>-1; int i; uchar *cp; for( <57>; <69>len>0; <12>dp++, len--) { if(<69>*dp!=' ' && <57>*dp!='\t') <57>break; } if(<57>len >= 3) while(<294>++j < 12) { <292>cp = (uchar*)month[j]; for(<292>i=0; <166>i<3; <166>i++) if(<410>(dp[i]|('a'-'A')) != *cp++) <244>break; if(<292>i >= 3) <48>break; } <57>*kp = j>=12? <2>0: <55>j+1; if(<57>f->rflag) <8>*kp ^= ~0; return <57>1; } tables.c: /* Copyright 1990, AT&T Bell Labs */ #include <stdlib.h> #include <string.h> #include <ctype.h> #include "fsort.h" /* STACK=1000 makes lots of room under normal conditions. But it can be broken. 40 cycles like this will do it: a,b,...z,za,zb,...zz,zza,zzb,...,zzz,zzza,... Minor changes to rsort() would allow the stack to be extended discontiguously. Pity we don't have alloca any more. */ #define BUFFER 6000000 #define MINBUF 10000 /* not worth trying */ #define STACK 1000 uchar *bufmax; struct rec *buffer; unsigned long bufsiz = BUFFER; struct rec endfile; struct list *stack; struct list *stackmax; uchar ident[256]; /* identity transform */ uchar fold[256]; /* fold upper case to lower */ uchar all[256]; /* all chars significant */ uchar dict[256]; /* sig chars for dictionary order */ uchar ascii[256]; /* ascii graphics significant */ void tabinit(void) <282>{ int i; <282>memset((char*)all,1,256); <282>memset((char*)(dict+'0'),1,10); <282>memset((char*)(dict+'A'),1,26); <282>memset((char*)(dict+'a'),1,26); <282>dict[' '] = dict['\t'] = 1; <282>memset((char*)(ascii+040),1,0137); /* 040-0176 */ for(<282>i=0; <72192>i<256; <72192>i++) <72192>fold[i] = ident[i] = i; for(<282>i='a'; <7332>i<='z'; <7332>i++) <7332>fold[i] += 'A' - 'a'; <282>stack = (struct list*)malloc(STACK*sizeof(*stack)); do { if(<282>(buffer=(struct rec*)malloc(bufsiz)) != 0) <282>break; } while(<0>(bufsiz/=2) > MINBUF); if(<282>buffer==0 || <282>stack==0) <0>fatal("can't get working space", "", 0); <282>bufmax = (uchar*)buffer + bufsiz - 2*sizeof(*buffer); <282>stack->head = stack->tail = 0; <282>stackmax = stack + STACK; <282>} void tabfree(void) <2>{ <2>free(stack); <2>free(buffer); <2>} void optiony(char *s) <5>{ long size = <5>atol(s); if(<5>!isdigit(s[0])) <0>fatal("no size for -y","",0); if(<5>size >= MINBUF/10) /* tiny helps debugging */ <5>bufsiz = size; else if(<0>size == 0) <0>bufsiz = 10L*BUFFER; else <0>fatal("-y too small", "", 0); <5>} cfiles.c: /* Copyright 1990, AT&T Bell Labs */ #include "fsort.h" #include <stdlib.h> #include <string.h> #include <signal.h> #include <errno.h> #include <sys/types.h> #include <sys/stat.h> #define SIGHUP 1 #define SIGINT 2 #define SIGQUIT 3 #define SIGPIPE 13 #define SIGTERM 15 extern int getpid(void); extern int access(char*, int); extern int unlink(char*); extern int stat(const char*, struct stat *); extern int cgets(char*, int, FILE*); FILE * fileopen(char *name, char *mode) <2052>{ FILE *f; if(<2052>strcmp(name,"-") == 0) if(<199>strcmp(mode, "r") == 0) <27>f = stdin; else { <172>setbuf(stdout,malloc(BUFSIZ)); <172>f = stdout; } else { if(<1853>strcmp(mode, "w") == 0 && <257>strcmp(name, oname) == 0 && <11>overwrite(0)) <4>setsigs(SIG_IGN); <1853>f = fopen(name, mode); } if(<2052>f == 0) <3>fatal("can't open", name, 0); return <2049>f; } void fileclose(FILE *f, char *name) <2041>{ if(<2041>fclose(f)==EOF && <1>name!=0) <1>fatal("error on", name, 0); <2040>} /* file name strings accumulate as garbage */ char * filename(int number) <849>{ char name[50]; char *s; int i; for(<849>i=0; <849>(s=tname[i])!=0; <0>i++) if(<849>access(s, 03) != -1) <849>break; if(<849>s == 0) <0>fatal("no accessible temp directory", "", 0); <849>sprintf(name, "%s/stm%.5d.%.4d", s, getpid(), number); <849>s = malloc(strlen(name) + 1); if(<849>s == 0) <0>fatal("out of space", "", 0); <849>strcpy(s, name); return <849>s; } /* if there is enough room in record r, getline puts a line of data there and returns 0; otherwise it returns a pointer to a new record. The record may be grown in stages; intermediate stages can be discarded, but the original cannot. */ static struct rec * newrec(struct rec *r, struct rec *retval) <83>{ int n = <83>(uchar*)r->next - data(r); int len = <83>(uchar*)r->next - (uchar*)r; struct rec *new = <83>(struct rec*)malloc(len + n); if(<83>new == 0) <0>fatal("no space for record", "", 0); <83>memmove(new, r, len); <83>new->next = (struct rec*)((uchar*)new + len + n); if(<83>retval) <24>free(retval); return <83>new; } struct rec* getline(struct rec *r, FILE *f) <627600>{ int n = <627600>0; int m; uchar *cp, *dp; struct rec *retval = <627600>0; if(<627600>feof(f)) /* in case newline was appended */ return <1>ENDFILE; for(<627599>;<133>;<133>) { /* usually executed once */ <627732>dp = data(r); <627732>cp = dp + n; <627732>m = (uchar*)r->next - cp; if(<627732>m <= 1) <66>retval = r = newrec(r, retval); else { <627666>m = cgets((char*)cp, m, f); if(<627666>m == 0) { if(<1611>n == 0) return <1610>ENDFILE; <1>warn("newline appended", "", 0); <1>break; } <626055>n += m; if(<626055>dp[n-1] == '\n') { <625988>n--; <625988>break; } } } <625989>r->dlen = n; if(<625989>n > MAXREC) <0>fatal("monster record", "", 0); if(<625989>!keyed) { <240956>r->klen = 0; /* hygiene */ return <240956>retval; } while(<385050>(n = fieldcode(data(r),key(r), r->dlen, (uchar*)r->next, fields, nfields)) < 0) <17>retval = r = newrec(r, retval); /* rare event */ if(<385033>n > MAXREC) <0>fatal("monster key", "", 0); <385033>r->klen = n; return <385033>retval; } static char *level = "warning"; void warn(char *m, char *s, int l) <54>{ <54>fprintf(stderr, "sort: %s: %s %.*s\n", level, m, l==0?<45>strlen(s):<9>l, s); <54>} void fatal(char *m, char *s, int l) <40>{ <40>level = "error"; <40>warn(m, s, l); if(<40>errno) <3>perror(""); <40>cleanup(1); <0>} int overwrite(int j) <60>{ struct stat sb1, sb2; if(<60>strcmp(oname, "-") == 0) return <46>0; if(<14>stat(oname, &sb1) == -1) return <4>0; for( <10>; <29>j<nfiles; <19>j++) { if(<26>strcmp(files[j], "-") == 0) <2>continue; if(<24>stat(files[j], &sb2) == -1) <0>fatal("cannot locate", files[j], 0); if(<24>sb1.st_dev==sb2.st_dev && <24>sb1.st_ino==sb2.st_ino) return <7>1; } return <3>0; } static int siglist[] = { SIGHUP, SIGINT, SIGQUIT, SIGPIPE, SIGTERM }; void setsigs(void(*f)(int)) <326>{ int i; for(<326>i=0; <1956>i<sizeof(siglist)/sizeof(*siglist); <1630>i++) if(<1630>signal(siglist[i], f) == SIG_IGN) <0>signal(siglist[i], SIG_IGN); <326>} void cleanup(int i) <40>{ char *name; <40>setsigs(SIG_IGN); while(<40>--nextfile >= 0) { <0>name = filename(nextfile); <0>unlink(name); <0>free(name); } <40>exit(i); <0>} cgets.c: /* Copyright AT&T Bell Laboratories, 1993 */ #include <stdio.h> /* same as fgets, but returns a char count instead of first arg. * robust against null bytes in input */ int cgets(char *ptr, int n, FILE *iop) <627666>{ int l; char *s = <627666>ptr; unsigned char *t; while(<631159>--n > 0) { <631159>l = iop->_cnt; if(<631159>l > 0) { if(<627758>l > n) <277838>l = n; <627758>t = iop->_ptr; while(<8783600>--l >= 0 && <8781637>(*s++ = *t++) != '\n') <8155842>continue; <627758>l = t - iop->_ptr; <627758>iop->_ptr = t; <627758>iop->_cnt -= l; if(<627758>t[-1] == '\n' || <1963>(n -= l) <= 0) <625861>break; } <5298>l = getc(iop); if(<5298>l == EOF) <1612>break; <3686>*s++ = l; if(<3686>l == '\n') <193>break; } <627666>*s = 0; return <627666>s - ptr; }

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.