|
|
1.1 ! root 1: /* Copyright 1990, AT&T Bell Labs */ ! 2: #include <stdlib.h> ! 3: #include <ctype.h> ! 4: #include "fsort.h" ! 5: ! 6: ! 7: ! 8: static char *modifiers(struct field*, char*, int); ! 9: static char *keyspec(struct pos*, char*); ! 10: static void globalmods(struct field*); ! 11: static void chkfieldno(struct field*); ! 12: ! 13: struct field fields[NF] = { ! 14: { 0, 0, 0, 0, 0, 0, 0, { 0, 0 }, { NP, 0 } } ! 15: }; ! 16: int nfields = 0; ! 17: ! 18: int tab; ! 19: int signedrflag; ! 20: int simplekeyed; ! 21: ! 22: #define blank(p) (*(p)==' ' || *(p)=='\t') ! 23: ! 24: enum { OLD, NEW }; ! 25: ! 26: /* interpret 1 or 2 arguments and return how many */ ! 27: int ! 28: fieldarg(char *argv1, char *argv2) ! 29: { ! 30: char *av1 = argv1; ! 31: char *av2 = argv2; ! 32: struct field *field; ! 33: ! 34: if(av1[0] == '+' && isdigit(av1[1])) { ! 35: if(++nfields >= NF) ! 36: fatal("too many fields", argv1, 0); ! 37: field = &fields[nfields]; ! 38: field->end.fieldno = NP+1; ! 39: field->style = OLD; ! 40: ! 41: av1 = keyspec(&field->begin, av1+1); ! 42: if(*modifiers(field, av1, 0)) ! 43: goto bad; ! 44: ! 45: if(av2==0 || av2[0]!='-' || !isdigit(av2[1])) ! 46: return 1; ! 47: av2 = keyspec(&field->end, av2+1); ! 48: argv1 = argv2; /* in case of diagnostic */ ! 49: if(*modifiers(field, av2, 1)) ! 50: goto bad; ! 51: return 2; ! 52: } else if(*modifiers(fields, av1+1, -1)) ! 53: goto bad; /* believed not to happen */ ! 54: return 1; ! 55: bad: ! 56: fatal("bad field specification", argv1, 0); ! 57: return 0; /* dummy */ ! 58: } ! 59: ! 60: void ! 61: optionk(char *arg, struct field *fields, int *nfields) ! 62: { ! 63: char *a = arg; ! 64: struct field *field; ! 65: if(++*nfields >= NF) ! 66: fatal("too many fields", arg, 0); ! 67: field = &fields[*nfields]; ! 68: field->begin.charno = 1; ! 69: field->end.fieldno = NP+1; ! 70: field->style = NEW; ! 71: ! 72: a = keyspec(&field->begin, a); ! 73: a = modifiers(field, a, 0); ! 74: if(*a == ',') { ! 75: a = keyspec(&field->end, a+1); ! 76: a = modifiers(field, a, 1); ! 77: } ! 78: if(*a == 0) ! 79: return; ! 80: bad: ! 81: fatal("bad -k specification", arg, 0); ! 82: } ! 83: ! 84: static char * ! 85: keyspec(struct pos *p, char *arg) ! 86: { ! 87: if(!isdigit(*arg)) ! 88: fatal("missing field number", "", 0); ! 89: p->fieldno = strtoul(arg, &arg, 10); ! 90: if(*arg == '.') ! 91: if(!isdigit(*++arg)) ! 92: fatal("missing character number", "", 0); ! 93: else ! 94: p->charno = strtoul(arg, &arg, 10); ! 95: return arg; ! 96: } ! 97: ! 98: /* keyed = 1 if there are fields present (+ options) or if ! 99: numeric (-ng), translation (-f) or deletion (-idb) options ! 100: are present. In these cases, a separate key is constructed ! 101: for rsort. The key, however is not carried on ! 102: intermediate files. (It would be interesting to try.) ! 103: It must be reconstructed for the merge phase, and that ! 104: may be expensive, since relatively few comparisons ! 105: happen in that phase. simplekeyed = 1 if there are options, ! 106: so that pure ascii comparison won't work, but no fields, no ! 107: months, no numerics. */ ! 108: ! 109: void ! 110: fieldwrapup(void) ! 111: { ! 112: int i; ! 113: if(nfields==0 && aflag) ! 114: fatal("-a without -k", "", 0); ! 115: if(fields->coder == 0) fields->coder = tcode; ! 116: if(fields->trans == 0) fields->trans = ident; ! 117: if(fields->keep == 0) fields->keep = all; ! 118: for(i=1; i<=nfields; i++) { ! 119: globalmods(&fields[i]); ! 120: chkfieldno(&fields[i]); ! 121: } ! 122: for(i=1; i<=naccum; i++) { ! 123: chkaccum(&accum[i]); ! 124: chkfieldno(&accum[i]); ! 125: } ! 126: signedrflag = fields->rflag? -1: 1; /* used only by merge.c*/ ! 127: simplekeyed = nfields==0 && fields->coder==tcode ! 128: && (fields->trans!=ident || fields->keep!=all); ! 129: if(nfields==0 && !keyed) /* used only by rsort.c */ ! 130: rflag = fields->rflag; ! 131: if(nfields > 0) ! 132: keyed = 1; ! 133: } ! 134: ! 135: static void ! 136: conflict(void) ! 137: { ! 138: warn("conflicting key types", "", 0); ! 139: } ! 140: ! 141: static void ! 142: dupla(uchar **oldp, uchar *new) ! 143: { ! 144: if(*oldp != 0 && *oldp != new) ! 145: conflict(); ! 146: *oldp = new; ! 147: } ! 148: ! 149: static void ! 150: duplb(int (**oldp)(uchar*,uchar*,int,struct field*), int (*new)(uchar*,uchar*,int,struct field*)) ! 151: { ! 152: if(*oldp != 0 && *oldp != new) ! 153: conflict(); ! 154: *oldp = new; ! 155: } ! 156: ! 157: /* eflag=-1 global flags, =0 field start, =1 field end */ ! 158: ! 159: static char * ! 160: modifiers(struct field *field, char *argv1, int eflag) ! 161: { ! 162: for( ; *argv1; argv1++) { ! 163: switch(*argv1) { ! 164: case 'b': if(eflag==1) field->eflag = 1; ! 165: else field->bflag = 1; goto ckglob; ! 166: case 'r': field->rflag = 1; goto ckglob; ! 167: case 'f': dupla(&field->trans, fold); break; ! 168: case 'd': dupla(&field->keep, dict); break; ! 169: case 'i': dupla(&field->keep, ascii); break; ! 170: case 'g': duplb(&field->coder, gcode); break; ! 171: case 'n': duplb(&field->coder, ncode); break; ! 172: case 'M': duplb(&field->coder, Mcode); break; ! 173: default: ! 174: goto done; ! 175: } ! 176: keyed = 1; ! 177: ckglob: ! 178: if(field==fields && nfields>0) ! 179: warn("field spec precedes global option",argv1,1); ! 180: } ! 181: done: ! 182: if(field->coder==ncode && field->keep) ! 183: conflict(); ! 184: return argv1; ! 185: } ! 186: ! 187: static void ! 188: globalmods(struct field *field) ! 189: { ! 190: int flagged = field->bflag | field->eflag | field->rflag; ! 191: if(!field->coder) field->coder = tcode; ! 192: else flagged++; ! 193: if(!field->trans) field->trans = ident; ! 194: else flagged++; ! 195: if(!field->keep) field->keep = all; ! 196: else flagged++; ! 197: if(!flagged) { ! 198: field->coder = fields->coder; ! 199: field->trans = fields->trans; ! 200: field->keep = fields->keep; ! 201: field->rflag = fields->rflag; ! 202: field->bflag = fields->bflag; ! 203: if(field->style == NEW) ! 204: field->eflag = fields->bflag; ! 205: } ! 206: } ! 207: ! 208: /* convert field representation from numbers given in arguments ! 209: to a 0-origin first,last+1 representation, with a negative ! 210: quantity for a character offset to the end of this field */ ! 211: ! 212: static void ! 213: chkfieldno(struct field *field) ! 214: { ! 215: if(field->style == NEW) { ! 216: if(--field->begin.fieldno < 0 || ! 217: --field->begin.charno < 0 || ! 218: --field->end.fieldno < 0) ! 219: fatal("improper 0 in field specifier", "", 0); ! 220: if(field->end.charno == 0) ! 221: field->end.charno--; ! 222: } else if(field->end.charno==0 && field->end.fieldno>0) { ! 223: if(tab && field->eflag) ! 224: fatal("skipping blanks right after tab char" ! 225: " is ill-defined", "", 0); ! 226: field->end.fieldno--; ! 227: field->end.charno--; ! 228: } ! 229: if(field->begin.fieldno > NP) ! 230: field->begin.fieldno = NP; ! 231: if(field->end.fieldno > NP) ! 232: field->end.fieldno = NP; ! 233: /* fprintf(stderr,"%d %d.%d,%d.%d\n",field-fields,field->begin.fieldno, field->begin.charno,field->end.fieldno, field->end.charno);*/ ! 234: } ! 235: ! 236: int ! 237: fieldcode(uchar *dp, uchar *kp, int len, uchar *b, struct field *fields, int nfields) ! 238: { ! 239: uchar *posns[NP+1]; /* field start positions */ ! 240: uchar *cp; ! 241: struct field *field; ! 242: uchar *op = kp; ! 243: uchar *ep; ! 244: uchar *bound = kp + MAXREC; ! 245: int i; ! 246: int np; ! 247: if(bound > b) ! 248: bound = b; ! 249: posns[0] = dp; ! 250: if(tab) ! 251: for(np=1, i=len, cp=dp; i>0 && np<NP; i--) { ! 252: if(*cp++ != tab) ! 253: continue; ! 254: posns[np++] = cp; ! 255: } ! 256: else ! 257: for(np=1, i=len, cp=dp; i>0 && np<NP; ) { ! 258: while(blank(cp) && i>0) ! 259: cp++, i--; ! 260: while(!blank(cp) && i>0) ! 261: cp++, i--; ! 262: posns[np++] = cp; ! 263: } ! 264: ! 265: if(nfields > 0) ! 266: field = &fields[1]; ! 267: else ! 268: field = &fields[0]; ! 269: i = nfields; ! 270: do { ! 271: int t = field->begin.fieldno; ! 272: uchar *xp = dp + len; ! 273: if(t < np) { ! 274: cp = posns[t]; ! 275: if(field->bflag && nfields) ! 276: while(cp<xp && blank(cp)) ! 277: cp++; ! 278: cp += field->begin.charno; ! 279: if(cp > xp) ! 280: cp = xp; ! 281: } else ! 282: cp = xp; ! 283: t = field->end.fieldno; ! 284: if(t < np) { ! 285: if(field->end.charno < 0) { ! 286: if(t >= np-1) ! 287: ep = xp; ! 288: else { ! 289: ep = posns[t+1]; ! 290: if(tab) ep--; ! 291: } ! 292: } else { ! 293: ep = posns[t]; ! 294: if(field->eflag) ! 295: while(ep<xp && blank(ep)) ! 296: ep++; ! 297: ep += field->end.charno; ! 298: } ! 299: if(ep > xp) ! 300: ep = xp; ! 301: else if(ep < cp) ! 302: ep = cp; ! 303: } else ! 304: ep = xp; ! 305: t = ep - cp; ! 306: if(field->coder != acode && op+room(t) > bound) ! 307: return -1; ! 308: op += (*field->coder)(cp, op, ep-cp, field); ! 309: field++; ! 310: } while(--i > 0); ! 311: return op - kp; ! 312: } ! 313: ! 314: /* Encode text field subject to options -r -fdi -b. ! 315: Fields are separated by 0 (or 255 if rflag is set) ! 316: the anti-ambiguity stuff prevents such codes from ! 317: happening otherwise by coding real zeros and ones ! 318: as 0x0101 and 0x0102, and similarly for complements */ ! 319: ! 320: int ! 321: tcode(uchar *dp, uchar *kp, int len, struct field *f) ! 322: { ! 323: uchar *cp = kp; ! 324: int c; ! 325: uchar *keep = f->keep; ! 326: uchar *trans = f->trans; ! 327: int reverse = f->rflag? ~0: 0; ! 328: while(--len >= 0) { ! 329: c = *dp++; ! 330: if(keep[c]) { ! 331: c = trans[c]; ! 332: if(c <= 1) { /* anti-ambiguity */ ! 333: *cp++ = 1^reverse; ! 334: c++; ! 335: } else if(c >= 254) { ! 336: *cp++ = 255^reverse; ! 337: c--; ! 338: } ! 339: *cp++ = c^reverse; ! 340: } ! 341: } ! 342: *cp++ = reverse; ! 343: return cp - kp; ! 344: } ! 345: ! 346: static char *month[] = { "jan", "feb", "mar", "apr", "may", ! 347: "jun", "jul", "aug", "sep", "oct", "nov", "dec" }; ! 348: ! 349: int ! 350: Mcode(uchar *dp, uchar *kp, int len, struct field *f) ! 351: { ! 352: int j = -1; ! 353: int i; ! 354: uchar *cp; ! 355: for( ; len>0; dp++, len--) { ! 356: if(*dp!=' ' && *dp!='\t') ! 357: break; ! 358: } ! 359: if(len >= 3) ! 360: while(++j < 12) { ! 361: cp = (uchar*)month[j]; ! 362: for(i=0; i<3; i++) ! 363: if((dp[i]|('a'-'A')) != *cp++) ! 364: break; ! 365: if(i >= 3) ! 366: break; ! 367: } ! 368: *kp = j>=12? 0: j+1; ! 369: if(f->rflag) ! 370: *kp ^= ~0; ! 371: return 1; ! 372: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.