|
|
1.1 root 1: /* join F1 F2 on stuff */
2:
3: #include <stdio.h>
4: #define F1 0
5: #define F2 1
6: #define F0 3
7: #define NFLD 100 /* max field per line */
8: #define comp() cmp(ppi[F1][j1],ppi[F2][j2])
9:
10: FILE *f[2];
11: char buf[2][BUFSIZ]; /*input lines */
12: char *ppi[2][NFLD]; /* pointers to fields in lines */
13: char *s1,*s2;
14: int j1 = 1; /* join of this field of file 1 */
15: int j2 = 1; /* join of this field of file 2 */
16: int olist[2*NFLD]; /* output these fields */
17: int olistf[2*NFLD]; /* from these files */
18: int no; /* number of entries in olist */
19: int sep1 = ' '; /* default field separator */
20: int sep2 = '\t';
21: char* null = "";
22: int unpub1;
23: int unpub2;
24: int aflg;
25:
26: main(argc, argv)
27: char *argv[];
28: {
29: int i;
30: int n1, n2;
31: long top2, bot2;
32: long ftell();
33:
34: while (argc > 1 && argv[1][0] == '-') {
35: if (argv[1][1] == '\0')
36: break;
37: switch (argv[1][1]) {
38: case 'a':
39: switch(argv[1][2]) {
40: case '1':
41: aflg |= 1;
42: break;
43: case '2':
44: aflg |= 2;
45: break;
46: default:
47: aflg |= 3;
48: }
49: break;
50: case 'e':
51: null = argv[2];
52: argv++;
53: argc--;
54: break;
55: case 't':
56: sep1 = sep2 = argv[1][2];
57: break;
58: case 'o':
59: for (no = 0; no < 2*NFLD; no++) {
60: if (argv[2][0] == '1' && argv[2][1] == '.') {
61: olistf[no] = F1;
62: olist[no] = atoi(&argv[2][2]);
63: } else if (argv[2][0] == '2' && argv[2][1] == '.') {
64: olist[no] = atoi(&argv[2][2]);
65: olistf[no] = F2;
66: } else if (argv[2][0] == '0')
67: olistf[no] = F0;
68: else
69: break;
70: argc--;
71: argv++;
72: }
73: break;
74: case 'j':
75: if (argv[1][2] == '1')
76: j1 = atoi(argv[2]);
77: else if (argv[1][2] == '2')
78: j2 = atoi(argv[2]);
79: else
80: j1 = j2 = atoi(argv[2]);
81: argc--;
82: argv++;
83: break;
84: }
85: argc--;
86: argv++;
87: }
88: for (i = 0; i < no; i++)
89: olist[i]--; /* 0 origin */
90: if (argc != 3)
91: error("usage: join [-j1 x -j2 y] [-o list] file1 file2");
92: j1--;
93: j2--; /* everyone else believes in 0 origin */
94: s1 = ppi[F1][j1];
95: s2 = ppi[F2][j2];
96: if (argv[1][0] == '-')
97: f[F1] = stdin;
98: else if ((f[F1] = fopen(argv[1], "r")) == NULL)
99: error("can't open %s", argv[1]);
100: if ((f[F2] = fopen(argv[2], "r")) == NULL)
101: error("can't open %s", argv[2]);
102:
103: #define get1() n1=input(F1)
104: #define get2() n2=input(F2)
105: get1();
106: bot2 = ftell(f[F2]);
107: get2();
108: while(n1>0 && n2>0 || aflg!=0 && n1+n2>0) {
109: if(n1>0 && n2>0 && comp()>0 || n1==0) {
110: if(aflg&2) output(0, n2);
111: bot2 = ftell(f[F2]);
112: get2();
113: } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
114: if(aflg&1) output(n1, 0);
115: get1();
116: } else /*(n1>0 && n2>0 && comp()==0)*/ {
117: while(n2>0 && comp()==0) {
118: output(n1, n2);
119: top2 = ftell(f[F2]);
120: get2();
121: }
122: fseek(f[F2], bot2, 0);
123: get2();
124: get1();
125: for(;;) {
126: if(n1>0 && n2>0 && comp()==0) {
127: output(n1, n2);
128: get2();
129: } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
130: fseek(f[F2], bot2, 0);
131: get2();
132: get1();
133: } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{
134: fseek(f[F2], top2, 0);
135: bot2 = top2;
136: get2();
137: break;
138: }
139: }
140: }
141: }
142: return(0);
143: }
144:
145: input(n) /* get input line and split into fields */
146: {
147: register int i, c;
148: char *bp;
149: char **pp;
150:
151: bp = buf[n];
152: pp = ppi[n];
153: if (fgets(bp, BUFSIZ, f[n]) == NULL)
154: return(0);
155: i = 0;
156: do {
157: i++;
158: if (sep1 == ' ') /* strip multiples */
159: while ((c = *bp) == sep1 || c == sep2)
160: bp++; /* skip blanks */
161: else
162: c = *bp;
163: *pp++ = bp; /* record beginning */
164: while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0')
165: bp++;
166: *bp++ = '\0'; /* mark end by overwriting blank */
167: /* fails badly if string doesn't have \n at end */
168: } while (c != '\n' && c != '\0');
169:
170: *pp = 0;
171: return(i);
172: }
173:
174: output(on1, on2) /* print items from olist */
175: int on1, on2;
176: {
177: int i;
178: char *temp;
179:
180: if (no <= 0) { /* default case */
181: printf("%s", on1? ppi[F1][j1]: ppi[F2][j2]);
182: for (i = 0; i < on1; i++)
183: if (i != j1)
184: printf("%c%s", sep1, ppi[F1][i]);
185: for (i = 0; i < on2; i++)
186: if (i != j2)
187: printf("%c%s", sep1, ppi[F2][i]);
188: printf("\n");
189: } else {
190: for (i = 0; i < no; i++) {
191: if (olistf[i]==F0 && on1>j1)
192: temp = ppi[F1][j1];
193: else if (olistf[i]==F0 && on2>j2)
194: temp = ppi[F2][j2];
195: else {
196: temp = ppi[olistf[i]][olist[i]];
197: if(olistf[i]==F1 && on1<=olist[i] ||
198: olistf[i]==F2 && on2<=olist[i] ||
199: *temp==0)
200: temp = null;
201: }
202: printf("%s", temp);
203: if (i == no - 1)
204: printf("\n");
205: else
206: printf("%c", sep1);
207: }
208: }
209: }
210:
211: error(s1, s2, s3, s4, s5)
212: char *s1;
213: {
214: fprintf(stderr, "join: ");
215: fprintf(stderr, s1, s2, s3, s4, s5);
216: fprintf(stderr, "\n");
217: exit(1);
218: }
219:
220: cmp(s1, s2)
221: char *s1, *s2;
222: {
223: return(strcmp(s1, s2));
224: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.