|
|
1.1 root 1: # include "stdio.h"
2: # include "assert.h"
3:
4: main(argc, argv)
5: char *argv[];
6: {
7: /* make inverted file indexes. Reads a stream from mkey which
8: gives record pointer items and keys. Generates a set of files
9: a. NHASH pointers to file b.
10: b. lists of record numbers.
11: c. record pointer items.
12:
13: these files are named xxx.ia, xxx.ib, xxx.ic;
14: where xxx is taken from arg1.
15: If the files exist they are updated.
16: */
17:
18: FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd;
19: int nhash = 256;
20: int appflg = 1;
21: int keepkey = 0, pipein = 0;
22: char nma[100], nmb[100], nmc[100], com[100], nmd[100];
23: char tmpa[20], tmpb[20], tmpc[20];
24: char *remove = NULL;
25: int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status;
26: int i,j,k;
27: long keys;
28: int iflong =0;
29: char *sortdir;
30:
31: sortdir = (access("/crp/tmp", 06)==0) ? "/crp/tmp" : "/usr/tmp";
32: while (argv[1][0] == '-')
33: {
34: switch(argv[1][1])
35: {
36: case 'h': /* size of hash table */
37: nhash = atoi (argv[1]+2); break;
38: case 'n': /* new, don't append */
39: appflg=0; break;
40: case 'a': /* append to old file */
41: appflg=1; break;
42: case 'v': /* verbose output */
43: chatty=1; break;
44: case 'd': /* keep keys on file .id for check on searching */
45: keepkey=1; break;
46: case 'p': /* pipe into sort (saves space, costs time)*/
47: pipein = 1; break;
48: case 'i': /* input is on file, not stdin */
49: close(0);
50: if (open(argv[2], 0) != 0)
51: err("Can't read input %s", argv[2]);
52: if (argv[1][2]=='u') /* unlink */
53: remove = argv[2];
54: argc--; argv++;
55: break;
56: }
57: argc--;
58: argv++;
59: }
60:
61: strcpy (nma, argc >= 2 ? argv[1] : "Index");
62: strcpy (nmb, nma);
63: strcpy (nmc, nma);
64: strcpy (nmd, nma);
65: strcat (nma, ".ia");
66: strcat (nmb, ".ib");
67: strcat (nmc, ".ic");
68: strcat (nmd, ".id");
69:
70: sprintf(tmpa, "junk%di", getpid());
71: if (pipein)
72: {
73: pipe(fp); fr=fp[0]; fw=fp[1];
74: if ( (pfork=fork()) == 0)
75: {
76: close(fw);
77: close(0);
78: _assert(dup(fr)==0);
79: close(fr);
80: execl("/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0);
81: execl("/usr/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0);
82: _assert(0);
83: }
84: _assert(pfork!= -1);
85: close(fr);
86: fta = fopen("/dev/null", "w");
87: close(fta->_file);
88: fta->_file = fw;
89: }
90: else /* use tmp file */
91: {
92: fta = fopen(tmpa, "w");
93: _assert (fta != NULL);
94: }
95: fb = 0;
96: if (appflg )
97: {
98: if (fb = fopen(nmb, "r"))
99: {
100: sprintf(tmpb, "junk%dj", getpid());
101: ftb = fopen(tmpb, "w");
102: if (ftb==NULL)
103: err("Can't get scratch file %s",tmpb);
104: nhash = recopy(ftb, fb, fopen(nma, "r"));
105: fclose(ftb);
106: }
107: else
108: appflg=0;
109: }
110: fc = fopen(nmc, appflg ? "a" : "w");
111: if (keepkey)
112: fd = keepkey ? fopen(nmd, "w") : 0;
113: docs = newkeys(fta, stdin, fc, nhash, fd, &iflong);
114: fclose(stdin);
115: if (remove != NULL)
116: unlink(remove);
117: fclose(fta);
118: if (pipein)
119: {
120: pwait = wait(&status);
121: printf("pfork %o pwait %o status %d\n",pfork,pwait,status);
122: _assert(pwait==pfork);
123: _assert(status==0);
124: }
125: else
126: {
127: sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa);
128: system(com);
129: }
130:
131: if (appflg)
132: {
133: sprintf(tmpc, "junk%dk", getpid());
134: sprintf(com, "mv %s %s", tmpa, tmpc);
135: system(com);
136: sprintf(com, "sort -T %s -m %s %s -o %s", sortdir,
137: tmpb, tmpc, tmpa);
138: system(com);
139: }
140: fta = fopen(tmpa, "r");
141: fa = fopen(nma, "w");
142: fb = fopen(nmb, "w");
143: whash(fta, fa, fb, nhash, iflong, &keys, &hashes);
144: fclose(fta);
145: # ifndef D1
146: unlink(tmpa);
147: # endif
148: if (appflg)
149: {
150: unlink(tmpb);
151: unlink(tmpc);
152: }
153: if (chatty)
154:
155: printf ("%ld key occurrences, %d hashes, %d docs\n",
156: keys, hashes, docs);
157: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.