|
|
1.1 root 1: /* join F1 F2 on stuff */
2:
3: #include <stdio.h>
4: #include <string.h>
5: #include <stdlib.h>
6: #include <ctype.h>
7:
8: enum { F1, F2, FX, F0 };
9: #define NFLD 100 /* max field per line */
10: #define comp() strcmp(ppi[F1][j1],ppi[F2][j2])
11:
12: FILE *f[2];
13: char buf[2][BUFSIZ]; /*input lines */
14: char *ppi[2][NFLD+1]; /* pointers to fields in lines */
15: int j1 = 1; /* join on this field in file 1 */
16: int j2 = 1; /* join on this field in file 2 */
17: int olist[2*NFLD]; /* output these fields */
18: int olistf[2*NFLD]; /* from these files */
19: int no; /* number of entries in olist */
20: int sep1 = ' '; /* default field separator */
21: int sep2 = '\t';
22: int discard; /* count of truncated lines */
23: char* null = "";
24: int a1;
25: int a2;
26: int vflg;
27:
28: char* jcom = "join: -j is archaic, replaced by -1 and -2\n";
29: char* ocom = "join: archaic -o; commas are preferred\n";
30: char* usage = "usage: join [-1 n] [-2 n] [-o m.n,m.n,...]"
31: " [-t c] [-e s] [-a m] [-v m] file1 file2";
32:
33: int getopt(int, char**, char*);
34: extern char *optarg;
35: extern int optind;
36:
37: int onelet(char*);
38: int xatoi(char*);
39: void optiono(int, char**);
40: void output(int, int);
41: int input(int);
42: void oparse(char*);
43: void error(char*, char*);
44: void seek1(void), seek2(void);
45:
46: int
47: main(int argc, char **argv)
48: {
49: int i;
50:
51: for(;;) {
52: switch(getopt(argc, argv, "1:2:a:e:j:o:t:v:")) {
53: case -1:
54: break;
55: case '?':
56: error(usage, "");
57: case 'v':
58: vflg++;
59: case 'a':
60: switch(onelet(optarg)) {
61: case '1':
62: a1++;
63: continue;
64: case '2':
65: a2++;
66: continue;
67: }
68: error(usage, "");
69: case 'e':
70: null = optarg;
71: continue;
72: case 't':
73: sep1 = sep2 = onelet(optarg);
74: continue;
75: case '1':
76: j1 = xatoi(optarg);
77: continue;
78: case '2':
79: j2 = xatoi(optarg);
80: continue;
81: case 'j':
82: fprintf(stderr, jcom);
83: if(optarg[-1] != 'j') {
84: j1 = j2 = xatoi(optarg);
85: continue;
86: }
87: switch(onelet(optarg)) {
88: case '1':
89: j1 = xatoi(argv[optind++]);
90: continue;
91: case '2':
92: j2 = xatoi(argv[optind++]);
93: continue;
94: }
95: error("improper -j", "");
96: case 'o':
97: optiono(argc, argv);
98: continue;
99: }
100: break;
101: }
102: proceed:
103: if (argc-optind != 2)
104: error(usage,"");
105: for (i = 0; i < no; i++)
106: if (--olist[i] >= NFLD) /* 0 origin */
107: error("field number too big in -o","");
108: if(--j1<0 || --j2<0)
109: error("join field number not positive", "");
110: if(j1>=NFLD || j2>=NFLD)
111: error("join field number too big", "");
112: if (strcmp(argv[optind], "-") == 0)
113: f[F1] = stdin;
114: else if ((f[F1] = fopen(argv[optind], "r")) == 0)
115: error("can't open %s", argv[optind]);
116: if(strcmp(argv[optind+1], "-") == 0) {
117: f[F2] = stdin;
118: } else if ((f[F2] = fopen(argv[optind+1], "r")) == 0)
119: error("can't open %s", argv[optind+1]);
120:
121: if(ftell(f[F2]) >= 0)
122: seek2();
123: else if(ftell(f[F1]) >= 0)
124: seek1();
125: else
126: error("sorry, need one random-access file","");
127: if (discard)
128: error("some input line was truncated", "");
129: return 0;
130: }
131:
132: /* lazy. there ought to be a clean way to combine seek1 & seek2 */
133: #define get1() n1=input(F1)
134: #define get2() n2=input(F2)
135: void
136: seek2()
137: {
138: int n1, n2; /* number of fields in each record */
139: long top2;
140: long bot2 = ftell(f[F2]);
141: get1();
142: get2();
143: while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
144: if(n1>0 && n2>0 && comp()>0 || n1==0) {
145: if(a2) output(0, n2);
146: bot2 = ftell(f[F2]);
147: get2();
148: } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
149: if(a1) output(n1, 0);
150: get1();
151: } else /*(n1>0 && n2>0 && comp()==0)*/ {
152: while(n2>0 && comp()==0) {
153: if(!vflg) output(n1, n2);
154: top2 = ftell(f[F2]);
155: get2();
156: }
157: fseek(f[F2], bot2, 0);
158: get2();
159: get1();
160: for(;;) {
161: if(n1>0 && n2>0 && comp()==0) {
162: if(!vflg) output(n1, n2);
163: get2();
164: } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
165: fseek(f[F2], bot2, 0);
166: get2();
167: get1();
168: } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{
169: fseek(f[F2], top2, 0);
170: bot2 = top2;
171: get2();
172: break;
173: }
174: }
175: }
176: }
177: }
178: void
179: seek1()
180: {
181: int n1, n2; /* number of fields in each record */
182: long top1;
183: long bot1 = ftell(f[F1]);
184: get1();
185: get2();
186: while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
187: if(n1>0 && n2>0 && comp()>0 || n1==0) {
188: if(a2) output(0, n2);
189: get2();
190: } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
191: if(a1) output(n1, 0);
192: bot1 = ftell(f[F1]);
193: get1();
194: } else /*(n1>0 && n2>0 && comp()==0)*/ {
195: while(n2>0 && comp()==0) {
196: if(!vflg) output(n1, n2);
197: top1 = ftell(f[F1]);
198: get1();
199: }
200: fseek(f[F1], bot1, 0);
201: get2();
202: get1();
203: for(;;) {
204: if(n1>0 && n2>0 && comp()==0) {
205: if(!vflg) output(n1, n2);
206: get1();
207: } else if(n1>0 && n2>0 && comp()>0 || n1==0) {
208: fseek(f[F1], bot1, 0);
209: get2();
210: get1();
211: } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{
212: fseek(f[F1], top1, 0);
213: bot1 = top1;
214: get1();
215: break;
216: }
217: }
218: }
219: }
220: }
221:
222: int
223: input(int n) /* get line, split, return field count */
224: {
225: register int i, c;
226: char *bp;
227: char **pp;
228:
229: bp = buf[n];
230: pp = ppi[n];
231: pp[j1] = pp[j2] = ""; /* for absent join field */
232: if (fgets(bp, BUFSIZ, f[n]) == 0)
233: return(0);
234: i = 0;
235: do {
236: i++;
237: if (sep1 == ' ') /* strip multiples */
238: while ((c = *bp) == sep1 || c == sep2)
239: bp++; /* skip blanks */
240: else
241: c = *bp;
242: *pp++ = bp; /* record beginning */
243: while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0')
244: bp++;
245: *bp++ = '\0'; /* mark end by overwriting blank */
246: } while (c != '\n' && c != '\0' && i < NFLD-1);
247: if (c != '\n')
248: discard++;
249: return(i);
250: }
251:
252: void
253: output(int on1, int on2) /* print items from olist */
254: {
255: int i;
256: char *temp;
257:
258: if (no <= 0) { /* default case */
259: printf("%s", j1<on1? ppi[F1][j1]:
260: j2<on2? ppi[F2][j2]: null);
261: for (i = 0; i < on1; i++)
262: if (i != j1)
263: printf("%c%s", sep1, ppi[F1][i]);
264: for (i = 0; i < on2; i++)
265: if (i != j2)
266: printf("%c%s", sep1, ppi[F2][i]);
267: printf("\n");
268: } else {
269: for (i = 0; i < no; i++) {
270: temp = null;
271: switch(olistf[i]) {
272: case F0:
273: if(j1 < on1)
274: temp = ppi[F1][j1];
275: else if(j2 < on2)
276: temp = ppi[F2][j2];
277: break;
278: case F1:
279: if(olist[i] < on1)
280: temp = ppi[F1][olist[i]];
281: break;
282: case F2:
283: if(olist[i] < on2)
284: temp = ppi[F2][olist[i]];
285: break;
286: }
287: printf("%s", temp);
288: if (i == no - 1)
289: printf("\n");
290: else
291: printf("%c", sep1);
292: }
293: }
294: }
295:
296: void
297: error(char *s1, char *s2)
298: {
299: fprintf(stderr, "join: ");
300: fprintf(stderr, s1, s2);
301: fprintf(stderr, "\n");
302: exit(1);
303: }
304:
305: char *
306: getoptarg(int *argcp, char ***argvp)
307: {
308: int argc = *argcp;
309: char **argv = *argvp;
310: if(argv[1][2] != 0)
311: return &argv[1][2];
312: if(argc<=2 || argv[2][0]=='-')
313: error("incomplete option %s", argv[1]);
314: *argcp = --argc;
315: *argvp = ++argv;
316: return argv[1];
317: }
318:
319: void
320: oparse(char *s)
321: {
322: for (no = 0; no<2*NFLD && *s; no++, s++) {
323: switch(*s) {
324: case 0:
325: return;
326: case '0':
327: olistf[no] = F0;
328: break;
329: case '1':
330: case '2':
331: if(s[1] == '.' && isdigit(s[2])) {
332: olistf[no] = *s=='1'? F1: F2;
333: olist[no] = xatoi(s += 2);
334: break;
335: } /* fall thru */
336: default:
337: error("invalid -o list", "");
338: }
339: if(s[1] == ',')
340: s++;
341: }
342: }
343:
344: int
345: xatoi(char *s)
346: {
347: if(!isdigit(*s))
348: error("numeric argument expected", "");
349: return atoi(s);
350: }
351:
352: int
353: onelet(char *s)
354: {
355: if(s[0] == 0 || s[1] != 0)
356: error(usage, "");
357: return s[0];
358: }
359:
360: void
361: optiono(int argc, char **argv)
362: {
363: static multi;
364: int f;
365: for (no=0; *optarg; no++) {
366: if(no >= 2*NFLD)
367: error("too many output fields", "");
368: f = *optarg++;
369: f = f=='0'? F0: f=='1'? F1: f=='2'? F2: FX;
370: if(f == FX)
371: error("improper -o", "");
372: olistf[no] = f;
373: if(f != F0) {
374: if(*optarg++ != '.')
375: error("improper -o", "");
376: olist[no] = xatoi(optarg);
377: while(isdigit(*optarg))
378: optarg++;
379: }
380: if(*optarg==',' || *optarg==' ')
381: optarg++;
382: else if(*optarg==0 &&
383: (multi || no==0) &&
384: optind<argc-2 &&
385: isdigit(*argv[optind])) {
386: optarg = argv[optind++];
387: if(multi++ == 0)
388: fprintf(stderr, ocom);
389: }
390: }
391: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.