|
|
1.1 ! root 1: #ifndef lint ! 2: static char *sccsid = "@(#)inv1.c 4.1 (Berkeley) 5/6/83"; ! 3: #endif ! 4: ! 5: #include <stdio.h> ! 6: #include <assert.h> ! 7: ! 8: main(argc, argv) ! 9: char *argv[]; ! 10: { ! 11: /* Make inverted file indexes. Reads a stream from mkey which ! 12: * gives record pointer items and keys. Generates set of files ! 13: * a. NHASH pointers to file b. ! 14: * b. lists of record numbers. ! 15: * c. record pointer items. ! 16: * ! 17: * these files are named xxx.ia, xxx.ib, xxx.ic; ! 18: * where xxx is taken from arg1. ! 19: * If the files exist they are updated. ! 20: */ ! 21: ! 22: FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd; ! 23: int nhash = 256; ! 24: int appflg = 1; ! 25: int keepkey = 0, pipein = 0; ! 26: char nma[100], nmb[100], nmc[100], com[100], nmd[100]; ! 27: char tmpa[20], tmpb[20], tmpc[20]; ! 28: char *remove = NULL; ! 29: int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status; ! 30: int i,j,k; ! 31: long keys; ! 32: int iflong =0; ! 33: char *sortdir; ! 34: ! 35: sortdir = (access("/crp/tmp", 06)==0) ? "/crp/tmp" : "/usr/tmp"; ! 36: while (argv[1][0] == '-') ! 37: { ! 38: switch(argv[1][1]) ! 39: { ! 40: case 'h': /* size of hash table */ ! 41: nhash = atoi (argv[1]+2); ! 42: break; ! 43: case 'n': /* new, don't append */ ! 44: appflg=0; ! 45: break; ! 46: case 'a': /* append to old file */ ! 47: appflg=1; ! 48: break; ! 49: case 'v': /* verbose output */ ! 50: chatty=1; ! 51: break; ! 52: case 'd': /* keep keys on file .id for check on searching */ ! 53: keepkey=1; ! 54: break; ! 55: case 'p': /* pipe into sort (saves space, costs time)*/ ! 56: pipein = 1; ! 57: break; ! 58: case 'i': /* input is on file, not stdin */ ! 59: close(0); ! 60: if (open(argv[2], 0) != 0) ! 61: err("Can't read input %s", argv[2]); ! 62: if (argv[1][2]=='u') /* unlink */ ! 63: remove = argv[2]; ! 64: argc--; ! 65: argv++; ! 66: break; ! 67: } ! 68: argc--; ! 69: argv++; ! 70: } ! 71: strcpy (nma, argc >= 2 ? argv[1] : "Index"); ! 72: strcpy (nmb, nma); ! 73: strcpy (nmc, nma); ! 74: strcpy (nmd, nma); ! 75: strcat (nma, ".ia"); ! 76: strcat (nmb, ".ib"); ! 77: strcat (nmc, ".ic"); ! 78: strcat (nmd, ".id"); ! 79: ! 80: sprintf(tmpa, "junk%di", getpid()); ! 81: if (pipein) ! 82: { ! 83: pipe(fp); ! 84: fr=fp[0]; ! 85: fw=fp[1]; ! 86: if ( (pfork=fork()) == 0) ! 87: { ! 88: close(fw); ! 89: close(0); ! 90: _assert(dup(fr)==0); ! 91: close(fr); ! 92: execl("/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0); ! 93: execl("/usr/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0); ! 94: _assert(0); ! 95: } ! 96: _assert(pfork!= -1); ! 97: close(fr); ! 98: fta = fopen("/dev/null", "w"); ! 99: close(fta->_file); ! 100: fta->_file = fw; ! 101: } ! 102: else /* use tmp file */ ! 103: { ! 104: fta = fopen(tmpa, "w"); ! 105: _assert (fta != NULL); ! 106: } ! 107: fb = 0; ! 108: if (appflg ) ! 109: { ! 110: if (fb = fopen(nmb, "r")) ! 111: { ! 112: sprintf(tmpb, "junk%dj", getpid()); ! 113: ftb = fopen(tmpb, "w"); ! 114: if (ftb==NULL) ! 115: err("Can't get scratch file %s",tmpb); ! 116: nhash = recopy(ftb, fb, fopen(nma, "r")); ! 117: fclose(ftb); ! 118: } ! 119: else ! 120: appflg=0; ! 121: } ! 122: fc = fopen(nmc, appflg ? "a" : "w"); ! 123: if (keepkey) ! 124: fd = keepkey ? fopen(nmd, "w") : 0; ! 125: docs = newkeys(fta, stdin, fc, nhash, fd, &iflong); ! 126: fclose(stdin); ! 127: if (remove != NULL) ! 128: unlink(remove); ! 129: fclose(fta); ! 130: if (pipein) ! 131: { ! 132: pwait = wait(&status); ! 133: printf("pfork %o pwait %o status %d\n",pfork,pwait,status); ! 134: _assert(pwait==pfork); ! 135: _assert(status==0); ! 136: } ! 137: else ! 138: { ! 139: sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa); ! 140: system(com); ! 141: } ! 142: if (appflg) ! 143: { ! 144: sprintf(tmpc, "junk%dk", getpid()); ! 145: sprintf(com, "mv %s %s", tmpa, tmpc); ! 146: system(com); ! 147: sprintf(com, "sort -T %s -m %s %s -o %s", sortdir, ! 148: tmpb, tmpc, tmpa); ! 149: system(com); ! 150: } ! 151: fta = fopen(tmpa, "r"); ! 152: fa = fopen(nma, "w"); ! 153: fb = fopen(nmb, "w"); ! 154: whash(fta, fa, fb, nhash, iflong, &keys, &hashes); ! 155: fclose(fta); ! 156: # ifndef D1 ! 157: unlink(tmpa); ! 158: # endif ! 159: if (appflg) ! 160: { ! 161: unlink(tmpb); ! 162: unlink(tmpc); ! 163: } ! 164: if (chatty) ! 165: ! 166: printf ("%ld key occurrences, %d hashes, %d docs\n", ! 167: keys, hashes, docs); ! 168: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.