|
|
1.1 ! root 1: /* join F1 F2 on stuff */ ! 2: ! 3: #include <stdio.h> ! 4: #include <string.h> ! 5: #include <stdlib.h> ! 6: #include <ctype.h> ! 7: ! 8: enum { F1, F2, FX, F0 }; ! 9: #define NFLD 100 /* max field per line */ ! 10: #define comp() strcmp(ppi[F1][j1],ppi[F2][j2]) ! 11: ! 12: FILE *f[2]; ! 13: char buf[2][BUFSIZ]; /*input lines */ ! 14: char *ppi[2][NFLD+1]; /* pointers to fields in lines */ ! 15: int j1 = 1; /* join on this field in file 1 */ ! 16: int j2 = 1; /* join on this field in file 2 */ ! 17: int olist[2*NFLD]; /* output these fields */ ! 18: int olistf[2*NFLD]; /* from these files */ ! 19: int no; /* number of entries in olist */ ! 20: int sep1 = ' '; /* default field separator */ ! 21: int sep2 = '\t'; ! 22: int discard; /* count of truncated lines */ ! 23: char* null = ""; ! 24: int a1; ! 25: int a2; ! 26: int vflg; ! 27: ! 28: char* jcom = "join: -j is archaic, replaced by -1 and -2\n"; ! 29: char* ocom = "join: archaic -o; commas are preferred\n"; ! 30: char* usage = "usage: join [-1 n] [-2 n] [-o m.n,m.n,...]" ! 31: " [-t c] [-e s] [-a m] [-v m] file1 file2"; ! 32: ! 33: int getopt(int, char**, char*); ! 34: extern char *optarg; ! 35: extern int optind; ! 36: ! 37: int onelet(char*); ! 38: int xatoi(char*); ! 39: void optiono(int, char**); ! 40: void output(int, int); ! 41: int input(int); ! 42: void oparse(char*); ! 43: void error(char*, char*); ! 44: void seek1(void), seek2(void); ! 45: ! 46: int ! 47: main(int argc, char **argv) ! 48: { ! 49: int i; ! 50: ! 51: for(;;) { ! 52: switch(getopt(argc, argv, "1:2:a:e:j:o:t:v:")) { ! 53: case -1: ! 54: break; ! 55: case '?': ! 56: error(usage, ""); ! 57: case 'v': ! 58: vflg++; ! 59: case 'a': ! 60: switch(onelet(optarg)) { ! 61: case '1': ! 62: a1++; ! 63: continue; ! 64: case '2': ! 65: a2++; ! 66: continue; ! 67: } ! 68: error(usage, ""); ! 69: case 'e': ! 70: null = optarg; ! 71: continue; ! 72: case 't': ! 73: sep1 = sep2 = onelet(optarg); ! 74: continue; ! 75: case '1': ! 76: j1 = xatoi(optarg); ! 77: continue; ! 78: case '2': ! 79: j2 = xatoi(optarg); ! 80: continue; ! 81: case 'j': ! 82: fprintf(stderr, jcom); ! 83: if(optarg[-1] != 'j') { ! 84: j1 = j2 = xatoi(optarg); ! 85: continue; ! 86: } ! 87: switch(onelet(optarg)) { ! 88: case '1': ! 89: j1 = xatoi(argv[optind++]); ! 90: continue; ! 91: case '2': ! 92: j2 = xatoi(argv[optind++]); ! 93: continue; ! 94: } ! 95: error("improper -j", ""); ! 96: case 'o': ! 97: optiono(argc, argv); ! 98: continue; ! 99: } ! 100: break; ! 101: } ! 102: proceed: ! 103: if (argc-optind != 2) ! 104: error(usage,""); ! 105: for (i = 0; i < no; i++) ! 106: if (--olist[i] >= NFLD) /* 0 origin */ ! 107: error("field number too big in -o",""); ! 108: if(--j1<0 || --j2<0) ! 109: error("join field number not positive", ""); ! 110: if(j1>=NFLD || j2>=NFLD) ! 111: error("join field number too big", ""); ! 112: if (strcmp(argv[optind], "-") == 0) ! 113: f[F1] = stdin; ! 114: else if ((f[F1] = fopen(argv[optind], "r")) == 0) ! 115: error("can't open %s", argv[optind]); ! 116: if(strcmp(argv[optind+1], "-") == 0) { ! 117: f[F2] = stdin; ! 118: } else if ((f[F2] = fopen(argv[optind+1], "r")) == 0) ! 119: error("can't open %s", argv[optind+1]); ! 120: ! 121: if(ftell(f[F2]) >= 0) ! 122: seek2(); ! 123: else if(ftell(f[F1]) >= 0) ! 124: seek1(); ! 125: else ! 126: error("sorry, need one random-access file",""); ! 127: if (discard) ! 128: error("some input line was truncated", ""); ! 129: return 0; ! 130: } ! 131: ! 132: /* lazy. there ought to be a clean way to combine seek1 & seek2 */ ! 133: #define get1() n1=input(F1) ! 134: #define get2() n2=input(F2) ! 135: void ! 136: seek2() ! 137: { ! 138: int n1, n2; /* number of fields in each record */ ! 139: long top2; ! 140: long bot2 = ftell(f[F2]); ! 141: get1(); ! 142: get2(); ! 143: while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { ! 144: if(n1>0 && n2>0 && comp()>0 || n1==0) { ! 145: if(a2) output(0, n2); ! 146: bot2 = ftell(f[F2]); ! 147: get2(); ! 148: } else if(n1>0 && n2>0 && comp()<0 || n2==0) { ! 149: if(a1) output(n1, 0); ! 150: get1(); ! 151: } else /*(n1>0 && n2>0 && comp()==0)*/ { ! 152: while(n2>0 && comp()==0) { ! 153: if(!vflg) output(n1, n2); ! 154: top2 = ftell(f[F2]); ! 155: get2(); ! 156: } ! 157: fseek(f[F2], bot2, 0); ! 158: get2(); ! 159: get1(); ! 160: for(;;) { ! 161: if(n1>0 && n2>0 && comp()==0) { ! 162: if(!vflg) output(n1, n2); ! 163: get2(); ! 164: } else if(n1>0 && n2>0 && comp()<0 || n2==0) { ! 165: fseek(f[F2], bot2, 0); ! 166: get2(); ! 167: get1(); ! 168: } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{ ! 169: fseek(f[F2], top2, 0); ! 170: bot2 = top2; ! 171: get2(); ! 172: break; ! 173: } ! 174: } ! 175: } ! 176: } ! 177: } ! 178: void ! 179: seek1() ! 180: { ! 181: int n1, n2; /* number of fields in each record */ ! 182: long top1; ! 183: long bot1 = ftell(f[F1]); ! 184: get1(); ! 185: get2(); ! 186: while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { ! 187: if(n1>0 && n2>0 && comp()>0 || n1==0) { ! 188: if(a2) output(0, n2); ! 189: get2(); ! 190: } else if(n1>0 && n2>0 && comp()<0 || n2==0) { ! 191: if(a1) output(n1, 0); ! 192: bot1 = ftell(f[F1]); ! 193: get1(); ! 194: } else /*(n1>0 && n2>0 && comp()==0)*/ { ! 195: while(n2>0 && comp()==0) { ! 196: if(!vflg) output(n1, n2); ! 197: top1 = ftell(f[F1]); ! 198: get1(); ! 199: } ! 200: fseek(f[F1], bot1, 0); ! 201: get2(); ! 202: get1(); ! 203: for(;;) { ! 204: if(n1>0 && n2>0 && comp()==0) { ! 205: if(!vflg) output(n1, n2); ! 206: get1(); ! 207: } else if(n1>0 && n2>0 && comp()>0 || n1==0) { ! 208: fseek(f[F1], bot1, 0); ! 209: get2(); ! 210: get1(); ! 211: } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{ ! 212: fseek(f[F1], top1, 0); ! 213: bot1 = top1; ! 214: get1(); ! 215: break; ! 216: } ! 217: } ! 218: } ! 219: } ! 220: } ! 221: ! 222: int ! 223: input(int n) /* get line, split, return field count */ ! 224: { ! 225: register int i, c; ! 226: char *bp; ! 227: char **pp; ! 228: ! 229: bp = buf[n]; ! 230: pp = ppi[n]; ! 231: pp[j1] = pp[j2] = ""; /* for absent join field */ ! 232: if (fgets(bp, BUFSIZ, f[n]) == 0) ! 233: return(0); ! 234: i = 0; ! 235: do { ! 236: i++; ! 237: if (sep1 == ' ') /* strip multiples */ ! 238: while ((c = *bp) == sep1 || c == sep2) ! 239: bp++; /* skip blanks */ ! 240: else ! 241: c = *bp; ! 242: *pp++ = bp; /* record beginning */ ! 243: while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0') ! 244: bp++; ! 245: *bp++ = '\0'; /* mark end by overwriting blank */ ! 246: } while (c != '\n' && c != '\0' && i < NFLD-1); ! 247: if (c != '\n') ! 248: discard++; ! 249: return(i); ! 250: } ! 251: ! 252: void ! 253: output(int on1, int on2) /* print items from olist */ ! 254: { ! 255: int i; ! 256: char *temp; ! 257: ! 258: if (no <= 0) { /* default case */ ! 259: printf("%s", j1<on1? ppi[F1][j1]: ! 260: j2<on2? ppi[F2][j2]: null); ! 261: for (i = 0; i < on1; i++) ! 262: if (i != j1) ! 263: printf("%c%s", sep1, ppi[F1][i]); ! 264: for (i = 0; i < on2; i++) ! 265: if (i != j2) ! 266: printf("%c%s", sep1, ppi[F2][i]); ! 267: printf("\n"); ! 268: } else { ! 269: for (i = 0; i < no; i++) { ! 270: temp = null; ! 271: switch(olistf[i]) { ! 272: case F0: ! 273: if(j1 < on1) ! 274: temp = ppi[F1][j1]; ! 275: else if(j2 < on2) ! 276: temp = ppi[F2][j2]; ! 277: break; ! 278: case F1: ! 279: if(olist[i] < on1) ! 280: temp = ppi[F1][olist[i]]; ! 281: break; ! 282: case F2: ! 283: if(olist[i] < on2) ! 284: temp = ppi[F2][olist[i]]; ! 285: break; ! 286: } ! 287: printf("%s", temp); ! 288: if (i == no - 1) ! 289: printf("\n"); ! 290: else ! 291: printf("%c", sep1); ! 292: } ! 293: } ! 294: } ! 295: ! 296: void ! 297: error(char *s1, char *s2) ! 298: { ! 299: fprintf(stderr, "join: "); ! 300: fprintf(stderr, s1, s2); ! 301: fprintf(stderr, "\n"); ! 302: exit(1); ! 303: } ! 304: ! 305: char * ! 306: getoptarg(int *argcp, char ***argvp) ! 307: { ! 308: int argc = *argcp; ! 309: char **argv = *argvp; ! 310: if(argv[1][2] != 0) ! 311: return &argv[1][2]; ! 312: if(argc<=2 || argv[2][0]=='-') ! 313: error("incomplete option %s", argv[1]); ! 314: *argcp = --argc; ! 315: *argvp = ++argv; ! 316: return argv[1]; ! 317: } ! 318: ! 319: void ! 320: oparse(char *s) ! 321: { ! 322: for (no = 0; no<2*NFLD && *s; no++, s++) { ! 323: switch(*s) { ! 324: case 0: ! 325: return; ! 326: case '0': ! 327: olistf[no] = F0; ! 328: break; ! 329: case '1': ! 330: case '2': ! 331: if(s[1] == '.' && isdigit(s[2])) { ! 332: olistf[no] = *s=='1'? F1: F2; ! 333: olist[no] = xatoi(s += 2); ! 334: break; ! 335: } /* fall thru */ ! 336: default: ! 337: error("invalid -o list", ""); ! 338: } ! 339: if(s[1] == ',') ! 340: s++; ! 341: } ! 342: } ! 343: ! 344: int ! 345: xatoi(char *s) ! 346: { ! 347: if(!isdigit(*s)) ! 348: error("numeric argument expected", ""); ! 349: return atoi(s); ! 350: } ! 351: ! 352: int ! 353: onelet(char *s) ! 354: { ! 355: if(s[0] == 0 || s[1] != 0) ! 356: error(usage, ""); ! 357: return s[0]; ! 358: } ! 359: ! 360: void ! 361: optiono(int argc, char **argv) ! 362: { ! 363: static multi; ! 364: int f; ! 365: for (no=0; *optarg; no++) { ! 366: if(no >= 2*NFLD) ! 367: error("too many output fields", ""); ! 368: f = *optarg++; ! 369: f = f=='0'? F0: f=='1'? F1: f=='2'? F2: FX; ! 370: if(f == FX) ! 371: error("improper -o", ""); ! 372: olistf[no] = f; ! 373: if(f != F0) { ! 374: if(*optarg++ != '.') ! 375: error("improper -o", ""); ! 376: olist[no] = xatoi(optarg); ! 377: while(isdigit(*optarg)) ! 378: optarg++; ! 379: } ! 380: if(*optarg==',' || *optarg==' ') ! 381: optarg++; ! 382: else if(*optarg==0 && ! 383: (multi || no==0) && ! 384: optind<argc-2 && ! 385: isdigit(*argv[optind])) { ! 386: optarg = argv[optind++]; ! 387: if(multi++ == 0) ! 388: fprintf(stderr, ocom); ! 389: } ! 390: } ! 391: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.