|
|
1.1 root 1: /* Copyright 1990, AT&T Bell Labs */
2: #include <stdlib.h>
3: #include <ctype.h>
4: #include "fsort.h"
5:
6:
7:
8: static char *modifiers(struct field*, char*, int);
9: static char *keyspec(struct pos*, char*);
10: static void globalmods(struct field*);
11: static void chkfieldno(struct field*);
12:
13: struct field fields[NF] = {
14: { 0, 0, 0, 0, 0, 0, 0, { 0, 0 }, { NP, 0 } }
15: };
16: int nfields = 0;
17:
18: int tab;
19: int signedrflag;
20: int simplekeyed;
21:
22: #define blank(p) (*(p)==' ' || *(p)=='\t')
23:
24: enum { OLD, NEW };
25:
26: /* interpret 1 or 2 arguments and return how many */
27: int
28: fieldarg(char *argv1, char *argv2)
29: {
30: char *av1 = argv1;
31: char *av2 = argv2;
32: struct field *field;
33:
34: if(av1[0] == '+' && isdigit(av1[1])) {
35: if(++nfields >= NF)
36: fatal("too many fields", argv1, 0);
37: field = &fields[nfields];
38: field->end.fieldno = NP+1;
39: field->style = OLD;
40:
41: av1 = keyspec(&field->begin, av1+1);
42: if(*modifiers(field, av1, 0))
43: goto bad;
44:
45: if(av2==0 || av2[0]!='-' || !isdigit(av2[1]))
46: return 1;
47: av2 = keyspec(&field->end, av2+1);
48: argv1 = argv2; /* in case of diagnostic */
49: if(*modifiers(field, av2, 1))
50: goto bad;
51: return 2;
52: } else if(*modifiers(fields, av1+1, -1))
53: goto bad; /* believed not to happen */
54: return 1;
55: bad:
56: fatal("bad field specification", argv1, 0);
57: return 0; /* dummy */
58: }
59:
60: void
61: optionk(char *arg, struct field *fields, int *nfields)
62: {
63: char *a = arg;
64: struct field *field;
65: if(++*nfields >= NF)
66: fatal("too many fields", arg, 0);
67: field = &fields[*nfields];
68: field->begin.charno = 1;
69: field->end.fieldno = NP+1;
70: field->style = NEW;
71:
72: a = keyspec(&field->begin, a);
73: a = modifiers(field, a, 0);
74: if(*a == ',') {
75: a = keyspec(&field->end, a+1);
76: a = modifiers(field, a, 1);
77: }
78: if(*a == 0)
79: return;
80: bad:
81: fatal("bad -k specification", arg, 0);
82: }
83:
84: static char *
85: keyspec(struct pos *p, char *arg)
86: {
87: if(!isdigit(*arg))
88: fatal("missing field number", "", 0);
89: p->fieldno = strtoul(arg, &arg, 10);
90: if(*arg == '.')
91: if(!isdigit(*++arg))
92: fatal("missing character number", "", 0);
93: else
94: p->charno = strtoul(arg, &arg, 10);
95: return arg;
96: }
97:
98: /* keyed = 1 if there are fields present (+ options) or if
99: numeric (-ng), translation (-f) or deletion (-idb) options
100: are present. In these cases, a separate key is constructed
101: for rsort. The key, however is not carried on
102: intermediate files. (It would be interesting to try.)
103: It must be reconstructed for the merge phase, and that
104: may be expensive, since relatively few comparisons
105: happen in that phase. simplekeyed = 1 if there are options,
106: so that pure ascii comparison won't work, but no fields, no
107: months, no numerics. */
108:
109: void
110: fieldwrapup(void)
111: {
112: int i;
113: if(nfields==0 && aflag)
114: fatal("-a without -k", "", 0);
115: if(fields->coder == 0) fields->coder = tcode;
116: if(fields->trans == 0) fields->trans = ident;
117: if(fields->keep == 0) fields->keep = all;
118: for(i=1; i<=nfields; i++) {
119: globalmods(&fields[i]);
120: chkfieldno(&fields[i]);
121: }
122: for(i=1; i<=naccum; i++) {
123: chkaccum(&accum[i]);
124: chkfieldno(&accum[i]);
125: }
126: signedrflag = fields->rflag? -1: 1; /* used only by merge.c*/
127: simplekeyed = nfields==0 && fields->coder==tcode
128: && (fields->trans!=ident || fields->keep!=all);
129: if(nfields==0 && !keyed) /* used only by rsort.c */
130: rflag = fields->rflag;
131: if(nfields > 0)
132: keyed = 1;
133: }
134:
135: static void
136: conflict(void)
137: {
138: warn("conflicting key types", "", 0);
139: }
140:
141: static void
142: dupla(uchar **oldp, uchar *new)
143: {
144: if(*oldp != 0 && *oldp != new)
145: conflict();
146: *oldp = new;
147: }
148:
149: static void
150: duplb(int (**oldp)(uchar*,uchar*,int,struct field*), int (*new)(uchar*,uchar*,int,struct field*))
151: {
152: if(*oldp != 0 && *oldp != new)
153: conflict();
154: *oldp = new;
155: }
156:
157: /* eflag=-1 global flags, =0 field start, =1 field end */
158:
159: static char *
160: modifiers(struct field *field, char *argv1, int eflag)
161: {
162: for( ; *argv1; argv1++) {
163: switch(*argv1) {
164: case 'b': if(eflag==1) field->eflag = 1;
165: else field->bflag = 1; goto ckglob;
166: case 'r': field->rflag = 1; goto ckglob;
167: case 'f': dupla(&field->trans, fold); break;
168: case 'd': dupla(&field->keep, dict); break;
169: case 'i': dupla(&field->keep, ascii); break;
170: case 'g': duplb(&field->coder, gcode); break;
171: case 'n': duplb(&field->coder, ncode); break;
172: case 'M': duplb(&field->coder, Mcode); break;
173: default:
174: goto done;
175: }
176: keyed = 1;
177: ckglob:
178: if(field==fields && nfields>0)
179: warn("field spec precedes global option",argv1,1);
180: }
181: done:
182: if(field->coder==ncode && field->keep)
183: conflict();
184: return argv1;
185: }
186:
187: static void
188: globalmods(struct field *field)
189: {
190: int flagged = field->bflag | field->eflag | field->rflag;
191: if(!field->coder) field->coder = tcode;
192: else flagged++;
193: if(!field->trans) field->trans = ident;
194: else flagged++;
195: if(!field->keep) field->keep = all;
196: else flagged++;
197: if(!flagged) {
198: field->coder = fields->coder;
199: field->trans = fields->trans;
200: field->keep = fields->keep;
201: field->rflag = fields->rflag;
202: field->bflag = fields->bflag;
203: if(field->style == NEW)
204: field->eflag = fields->bflag;
205: }
206: }
207:
208: /* convert field representation from numbers given in arguments
209: to a 0-origin first,last+1 representation, with a negative
210: quantity for a character offset to the end of this field */
211:
212: static void
213: chkfieldno(struct field *field)
214: {
215: if(field->style == NEW) {
216: if(--field->begin.fieldno < 0 ||
217: --field->begin.charno < 0 ||
218: --field->end.fieldno < 0)
219: fatal("improper 0 in field specifier", "", 0);
220: if(field->end.charno == 0)
221: field->end.charno--;
222: } else if(field->end.charno==0 && field->end.fieldno>0) {
223: if(tab && field->eflag)
224: fatal("skipping blanks right after tab char"
225: " is ill-defined", "", 0);
226: field->end.fieldno--;
227: field->end.charno--;
228: }
229: if(field->begin.fieldno > NP)
230: field->begin.fieldno = NP;
231: if(field->end.fieldno > NP)
232: field->end.fieldno = NP;
233: /* fprintf(stderr,"%d %d.%d,%d.%d\n",field-fields,field->begin.fieldno, field->begin.charno,field->end.fieldno, field->end.charno);*/
234: }
235:
236: int
237: fieldcode(uchar *dp, uchar *kp, int len, uchar *b, struct field *fields, int nfields)
238: {
239: uchar *posns[NP+1]; /* field start positions */
240: uchar *cp;
241: struct field *field;
242: uchar *op = kp;
243: uchar *ep;
244: uchar *bound = kp + MAXREC;
245: int i;
246: int np;
247: if(bound > b)
248: bound = b;
249: posns[0] = dp;
250: if(tab)
251: for(np=1, i=len, cp=dp; i>0 && np<NP; i--) {
252: if(*cp++ != tab)
253: continue;
254: posns[np++] = cp;
255: }
256: else
257: for(np=1, i=len, cp=dp; i>0 && np<NP; ) {
258: while(blank(cp) && i>0)
259: cp++, i--;
260: while(!blank(cp) && i>0)
261: cp++, i--;
262: posns[np++] = cp;
263: }
264:
265: if(nfields > 0)
266: field = &fields[1];
267: else
268: field = &fields[0];
269: i = nfields;
270: do {
271: int t = field->begin.fieldno;
272: uchar *xp = dp + len;
273: if(t < np) {
274: cp = posns[t];
275: if(field->bflag && nfields)
276: while(cp<xp && blank(cp))
277: cp++;
278: cp += field->begin.charno;
279: if(cp > xp)
280: cp = xp;
281: } else
282: cp = xp;
283: t = field->end.fieldno;
284: if(t < np) {
285: if(field->end.charno < 0) {
286: if(t >= np-1)
287: ep = xp;
288: else {
289: ep = posns[t+1];
290: if(tab) ep--;
291: }
292: } else {
293: ep = posns[t];
294: if(field->eflag)
295: while(ep<xp && blank(ep))
296: ep++;
297: ep += field->end.charno;
298: }
299: if(ep > xp)
300: ep = xp;
301: else if(ep < cp)
302: ep = cp;
303: } else
304: ep = xp;
305: t = ep - cp;
306: if(field->coder != acode && op+room(t) > bound)
307: return -1;
308: op += (*field->coder)(cp, op, ep-cp, field);
309: field++;
310: } while(--i > 0);
311: return op - kp;
312: }
313:
314: /* Encode text field subject to options -r -fdi -b.
315: Fields are separated by 0 (or 255 if rflag is set)
316: the anti-ambiguity stuff prevents such codes from
317: happening otherwise by coding real zeros and ones
318: as 0x0101 and 0x0102, and similarly for complements */
319:
320: int
321: tcode(uchar *dp, uchar *kp, int len, struct field *f)
322: {
323: uchar *cp = kp;
324: int c;
325: uchar *keep = f->keep;
326: uchar *trans = f->trans;
327: int reverse = f->rflag? ~0: 0;
328: while(--len >= 0) {
329: c = *dp++;
330: if(keep[c]) {
331: c = trans[c];
332: if(c <= 1) { /* anti-ambiguity */
333: *cp++ = 1^reverse;
334: c++;
335: } else if(c >= 254) {
336: *cp++ = 255^reverse;
337: c--;
338: }
339: *cp++ = c^reverse;
340: }
341: }
342: *cp++ = reverse;
343: return cp - kp;
344: }
345:
346: static char *month[] = { "jan", "feb", "mar", "apr", "may",
347: "jun", "jul", "aug", "sep", "oct", "nov", "dec" };
348:
349: int
350: Mcode(uchar *dp, uchar *kp, int len, struct field *f)
351: {
352: int j = -1;
353: int i;
354: uchar *cp;
355: for( ; len>0; dp++, len--) {
356: if(*dp!=' ' && *dp!='\t')
357: break;
358: }
359: if(len >= 3)
360: while(++j < 12) {
361: cp = (uchar*)month[j];
362: for(i=0; i<3; i++)
363: if((dp[i]|('a'-'A')) != *cp++)
364: break;
365: if(i >= 3)
366: break;
367: }
368: *kp = j>=12? 0: j+1;
369: if(f->rflag)
370: *kp ^= ~0;
371: return 1;
372: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.