|
|
1.1 root 1: /* join F1 F2 on stuff */
2:
3: #include <stdio.h>
4: #define F1 0
5: #define F2 1
6: #define NFLD 20 /* max field per line */
7: #define comp() cmp(ppi[F1][j1],ppi[F2][j2])
8:
9: FILE *f[2];
10: char buf[2][BUFSIZ]; /*input lines */
11: char *ppi[2][NFLD]; /* pointers to fields in lines */
12: char *s1,*s2;
13: int j1 = 1; /* join of this field of file 1 */
14: int j2 = 1; /* join of this field of file 2 */
15: int olist[2*NFLD]; /* output these fields */
16: int olistf[2*NFLD]; /* from these files */
17: int no; /* number of entries in olist */
18: int sep1 = ' '; /* default field separator */
19: int sep2 = '\t';
20: char* null = "";
21: int unpub1;
22: int unpub2;
23: int aflg;
24:
25: main(argc, argv)
26: char *argv[];
27: {
28: int i;
29: int n1, n2;
30: long top2, bot2;
31: long ftell();
32:
33: while (argc > 1 && argv[1][0] == '-') {
34: if (argv[1][1] == '\0')
35: break;
36: switch (argv[1][1]) {
37: case 'a':
38: switch(argv[1][2]) {
39: case '1':
40: aflg |= 1;
41: break;
42: case '2':
43: aflg |= 2;
44: break;
45: default:
46: aflg |= 3;
47: }
48: break;
49: case 'e':
50: null = argv[2];
51: argv++;
52: argc--;
53: break;
54: case 't':
55: sep1 = sep2 = argv[1][2];
56: break;
57: case 'o':
58: for (no = 0; no < 2*NFLD; no++) {
59: if (argv[2][0] == '1' && argv[2][1] == '.') {
60: olistf[no] = F1;
61: olist[no] = atoi(&argv[2][2]);
62: } else if (argv[2][0] == '2' && argv[2][1] == '.') {
63: olist[no] = atoi(&argv[2][2]);
64: olistf[no] = F2;
65: } else
66: break;
67: argc--;
68: argv++;
69: }
70: break;
71: case 'j':
72: if (argv[1][2] == '1')
73: j1 = atoi(argv[2]);
74: else if (argv[1][2] == '2')
75: j2 = atoi(argv[2]);
76: else
77: j1 = j2 = atoi(argv[2]);
78: argc--;
79: argv++;
80: break;
81: }
82: argc--;
83: argv++;
84: }
85: for (i = 0; i < no; i++)
86: olist[i]--; /* 0 origin */
87: if (argc != 3)
88: error("usage: join [-j1 x -j2 y] [-o list] file1 file2");
89: j1--;
90: j2--; /* everyone else believes in 0 origin */
91: s1 = ppi[F1][j1];
92: s2 = ppi[F2][j2];
93: if (argv[1][0] == '-')
94: f[F1] = stdin;
95: else if ((f[F1] = fopen(argv[1], "r")) == NULL)
96: error("can't open %s", argv[1]);
97: if ((f[F2] = fopen(argv[2], "r")) == NULL)
98: error("can't open %s", argv[2]);
99:
100: #define get1() n1=input(F1)
101: #define get2() n2=input(F2)
102: get1();
103: bot2 = ftell(f[F2]);
104: get2();
105: while(n1>0 && n2>0 || aflg!=0 && n1+n2>0) {
106: if(n1>0 && n2>0 && comp()>0 || n1==0) {
107: if(aflg&2) output(0, n2);
108: bot2 = ftell(f[F2]);
109: get2();
110: } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
111: if(aflg&1) output(n1, 0);
112: get1();
113: } else /*(n1>0 && n2>0 && comp()==0)*/ {
114: while(n2>0 && comp()==0) {
115: output(n1, n2);
116: top2 = ftell(f[F2]);
117: get2();
118: }
119: fseek(f[F2], bot2, 0);
120: get2();
121: get1();
122: for(;;) {
123: if(n1>0 && n2>0 && comp()==0) {
124: output(n1, n2);
125: get2();
126: } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
127: fseek(f[F2], bot2, 0);
128: get2();
129: get1();
130: } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{
131: fseek(f[F2], top2, 0);
132: bot2 = top2;
133: get2();
134: break;
135: }
136: }
137: }
138: }
139: return(0);
140: }
141:
142: input(n) /* get input line and split into fields */
143: {
144: register int i, c;
145: char *bp;
146: char **pp;
147:
148: bp = buf[n];
149: pp = ppi[n];
150: if (fgets(bp, BUFSIZ, f[n]) == NULL)
151: return(0);
152: for (i = 0; ; i++) {
153: if (sep1 == ' ') /* strip multiples */
154: while ((c = *bp) == sep1 || c == sep2)
155: bp++; /* skip blanks */
156: else
157: c = *bp;
158: if (c == '\n' || c == '\0')
159: break;
160: *pp++ = bp; /* record beginning */
161: while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0')
162: bp++;
163: *bp++ = '\0'; /* mark end by overwriting blank */
164: /* fails badly if string doesn't have \n at end */
165: }
166: *pp = 0;
167: return(i);
168: }
169:
170: output(on1, on2) /* print items from olist */
171: int on1, on2;
172: {
173: int i;
174: char *temp;
175:
176: if (no <= 0) { /* default case */
177: printf("%s", on1? ppi[F1][j1]: ppi[F2][j2]);
178: for (i = 0; i < on1; i++)
179: if (i != j1)
180: printf("%c%s", sep1, ppi[F1][i]);
181: for (i = 0; i < on2; i++)
182: if (i != j2)
183: printf("%c%s", sep1, ppi[F2][i]);
184: printf("\n");
185: } else {
186: for (i = 0; i < no; i++) {
187: temp = ppi[olistf[i]][olist[i]];
188: if(olistf[i]==F1 && on1<=olist[i] ||
189: olistf[i]==F2 && on2<=olist[i] ||
190: *temp==0)
191: temp = null;
192: printf("%s", temp);
193: if (i == no - 1)
194: printf("\n");
195: else
196: printf("%c", sep1);
197: }
198: }
199: }
200:
201: error(s1, s2, s3, s4, s5)
202: char *s1;
203: {
204: fprintf(stderr, "join: ");
205: fprintf(stderr, s1, s2, s3, s4, s5);
206: fprintf(stderr, "\n");
207: exit(1);
208: }
209:
210: cmp(s1, s2)
211: char *s1, *s2;
212: {
213: return(strcmp(s1, s2));
214: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.