|
|
1.1 root 1: #ifndef lint
2: static char *sccsid = "@(#)inv1.c 4.1 (Berkeley) 5/6/83";
3: #endif
4:
5: #include <stdio.h>
6: #include <assert.h>
7:
8: main(argc, argv)
9: char *argv[];
10: {
11: /* Make inverted file indexes. Reads a stream from mkey which
12: * gives record pointer items and keys. Generates set of files
13: * a. NHASH pointers to file b.
14: * b. lists of record numbers.
15: * c. record pointer items.
16: *
17: * these files are named xxx.ia, xxx.ib, xxx.ic;
18: * where xxx is taken from arg1.
19: * If the files exist they are updated.
20: */
21:
22: FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd;
23: int nhash = 256;
24: int appflg = 1;
25: int keepkey = 0, pipein = 0;
26: char nma[100], nmb[100], nmc[100], com[100], nmd[100];
27: char tmpa[20], tmpb[20], tmpc[20];
28: char *remove = NULL;
29: int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status;
30: int i,j,k;
31: long keys;
32: int iflong =0;
33: char *sortdir;
34:
35: sortdir = (access("/crp/tmp", 06)==0) ? "/crp/tmp" : "/usr/tmp";
36: while (argv[1][0] == '-')
37: {
38: switch(argv[1][1])
39: {
40: case 'h': /* size of hash table */
41: nhash = atoi (argv[1]+2);
42: break;
43: case 'n': /* new, don't append */
44: appflg=0;
45: break;
46: case 'a': /* append to old file */
47: appflg=1;
48: break;
49: case 'v': /* verbose output */
50: chatty=1;
51: break;
52: case 'd': /* keep keys on file .id for check on searching */
53: keepkey=1;
54: break;
55: case 'p': /* pipe into sort (saves space, costs time)*/
56: pipein = 1;
57: break;
58: case 'i': /* input is on file, not stdin */
59: close(0);
60: if (open(argv[2], 0) != 0)
61: err("Can't read input %s", argv[2]);
62: if (argv[1][2]=='u') /* unlink */
63: remove = argv[2];
64: argc--;
65: argv++;
66: break;
67: }
68: argc--;
69: argv++;
70: }
71: strcpy (nma, argc >= 2 ? argv[1] : "Index");
72: strcpy (nmb, nma);
73: strcpy (nmc, nma);
74: strcpy (nmd, nma);
75: strcat (nma, ".ia");
76: strcat (nmb, ".ib");
77: strcat (nmc, ".ic");
78: strcat (nmd, ".id");
79:
80: sprintf(tmpa, "junk%di", getpid());
81: if (pipein)
82: {
83: pipe(fp);
84: fr=fp[0];
85: fw=fp[1];
86: if ( (pfork=fork()) == 0)
87: {
88: close(fw);
89: close(0);
90: _assert(dup(fr)==0);
91: close(fr);
92: execl("/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0);
93: execl("/usr/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0);
94: _assert(0);
95: }
96: _assert(pfork!= -1);
97: close(fr);
98: fta = fopen("/dev/null", "w");
99: close(fta->_file);
100: fta->_file = fw;
101: }
102: else /* use tmp file */
103: {
104: fta = fopen(tmpa, "w");
105: _assert (fta != NULL);
106: }
107: fb = 0;
108: if (appflg )
109: {
110: if (fb = fopen(nmb, "r"))
111: {
112: sprintf(tmpb, "junk%dj", getpid());
113: ftb = fopen(tmpb, "w");
114: if (ftb==NULL)
115: err("Can't get scratch file %s",tmpb);
116: nhash = recopy(ftb, fb, fopen(nma, "r"));
117: fclose(ftb);
118: }
119: else
120: appflg=0;
121: }
122: fc = fopen(nmc, appflg ? "a" : "w");
123: if (keepkey)
124: fd = keepkey ? fopen(nmd, "w") : 0;
125: docs = newkeys(fta, stdin, fc, nhash, fd, &iflong);
126: fclose(stdin);
127: if (remove != NULL)
128: unlink(remove);
129: fclose(fta);
130: if (pipein)
131: {
132: pwait = wait(&status);
133: printf("pfork %o pwait %o status %d\n",pfork,pwait,status);
134: _assert(pwait==pfork);
135: _assert(status==0);
136: }
137: else
138: {
139: sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa);
140: system(com);
141: }
142: if (appflg)
143: {
144: sprintf(tmpc, "junk%dk", getpid());
145: sprintf(com, "mv %s %s", tmpa, tmpc);
146: system(com);
147: sprintf(com, "sort -T %s -m %s %s -o %s", sortdir,
148: tmpb, tmpc, tmpa);
149: system(com);
150: }
151: fta = fopen(tmpa, "r");
152: fa = fopen(nma, "w");
153: fb = fopen(nmb, "w");
154: whash(fta, fa, fb, nhash, iflong, &keys, &hashes);
155: fclose(fta);
156: # ifndef D1
157: unlink(tmpa);
158: # endif
159: if (appflg)
160: {
161: unlink(tmpb);
162: unlink(tmpc);
163: }
164: if (chatty)
165:
166: printf ("%ld key occurrences, %d hashes, %d docs\n",
167: keys, hashes, docs);
168: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.