|
|
1.1 ! root 1: # include "stdio.h" ! 2: # include "assert.h" ! 3: ! 4: main(argc, argv) ! 5: char *argv[]; ! 6: { ! 7: /* make inverted file indexes. Reads a stream from mkey which ! 8: gives record pointer items and keys. Generates a set of files ! 9: a. NHASH pointers to file b. ! 10: b. lists of record numbers. ! 11: c. record pointer items. ! 12: ! 13: these files are named xxx.ia, xxx.ib, xxx.ic; ! 14: where xxx is taken from arg1. ! 15: If the files exist they are updated. ! 16: */ ! 17: ! 18: FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd; ! 19: int nhash = 256; ! 20: int appflg = 1; ! 21: int keepkey = 0, pipein = 0; ! 22: char nma[100], nmb[100], nmc[100], com[100], nmd[100]; ! 23: char tmpa[20], tmpb[20], tmpc[20]; ! 24: char *remove = NULL; ! 25: int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status; ! 26: int i,j,k; ! 27: long keys; ! 28: int iflong =0; ! 29: char *sortdir; ! 30: ! 31: sortdir = (access("/crp/tmp", 06)==0) ? "/crp/tmp" : "/usr/tmp"; ! 32: while (argv[1][0] == '-') ! 33: { ! 34: switch(argv[1][1]) ! 35: { ! 36: case 'h': /* size of hash table */ ! 37: nhash = atoi (argv[1]+2); break; ! 38: case 'n': /* new, don't append */ ! 39: appflg=0; break; ! 40: case 'a': /* append to old file */ ! 41: appflg=1; break; ! 42: case 'v': /* verbose output */ ! 43: chatty=1; break; ! 44: case 'd': /* keep keys on file .id for check on searching */ ! 45: keepkey=1; break; ! 46: case 'p': /* pipe into sort (saves space, costs time)*/ ! 47: pipein = 1; break; ! 48: case 'i': /* input is on file, not stdin */ ! 49: close(0); ! 50: if (open(argv[2], 0) != 0) ! 51: err("Can't read input %s", argv[2]); ! 52: if (argv[1][2]=='u') /* unlink */ ! 53: remove = argv[2]; ! 54: argc--; argv++; ! 55: break; ! 56: } ! 57: argc--; ! 58: argv++; ! 59: } ! 60: ! 61: strcpy (nma, argc >= 2 ? argv[1] : "Index"); ! 62: strcpy (nmb, nma); ! 63: strcpy (nmc, nma); ! 64: strcpy (nmd, nma); ! 65: strcat (nma, ".ia"); ! 66: strcat (nmb, ".ib"); ! 67: strcat (nmc, ".ic"); ! 68: strcat (nmd, ".id"); ! 69: ! 70: sprintf(tmpa, "junk%di", getpid()); ! 71: if (pipein) ! 72: { ! 73: pipe(fp); fr=fp[0]; fw=fp[1]; ! 74: if ( (pfork=fork()) == 0) ! 75: { ! 76: close(fw); ! 77: close(0); ! 78: _assert(dup(fr)==0); ! 79: close(fr); ! 80: execl("/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0); ! 81: execl("/usr/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0); ! 82: _assert(0); ! 83: } ! 84: _assert(pfork!= -1); ! 85: close(fr); ! 86: fta = fopen("/dev/null", "w"); ! 87: close(fta->_file); ! 88: fta->_file = fw; ! 89: } ! 90: else /* use tmp file */ ! 91: { ! 92: fta = fopen(tmpa, "w"); ! 93: _assert (fta != NULL); ! 94: } ! 95: fb = 0; ! 96: if (appflg ) ! 97: { ! 98: if (fb = fopen(nmb, "r")) ! 99: { ! 100: sprintf(tmpb, "junk%dj", getpid()); ! 101: ftb = fopen(tmpb, "w"); ! 102: if (ftb==NULL) ! 103: err("Can't get scratch file %s",tmpb); ! 104: nhash = recopy(ftb, fb, fopen(nma, "r")); ! 105: fclose(ftb); ! 106: } ! 107: else ! 108: appflg=0; ! 109: } ! 110: fc = fopen(nmc, appflg ? "a" : "w"); ! 111: if (keepkey) ! 112: fd = keepkey ? fopen(nmd, "w") : 0; ! 113: docs = newkeys(fta, stdin, fc, nhash, fd, &iflong); ! 114: fclose(stdin); ! 115: if (remove != NULL) ! 116: unlink(remove); ! 117: fclose(fta); ! 118: if (pipein) ! 119: { ! 120: pwait = wait(&status); ! 121: printf("pfork %o pwait %o status %d\n",pfork,pwait,status); ! 122: _assert(pwait==pfork); ! 123: _assert(status==0); ! 124: } ! 125: else ! 126: { ! 127: sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa); ! 128: system(com); ! 129: } ! 130: ! 131: if (appflg) ! 132: { ! 133: sprintf(tmpc, "junk%dk", getpid()); ! 134: sprintf(com, "mv %s %s", tmpa, tmpc); ! 135: system(com); ! 136: sprintf(com, "sort -T %s -m %s %s -o %s", sortdir, ! 137: tmpb, tmpc, tmpa); ! 138: system(com); ! 139: } ! 140: fta = fopen(tmpa, "r"); ! 141: fa = fopen(nma, "w"); ! 142: fb = fopen(nmb, "w"); ! 143: whash(fta, fa, fb, nhash, iflong, &keys, &hashes); ! 144: fclose(fta); ! 145: # ifndef D1 ! 146: unlink(tmpa); ! 147: # endif ! 148: if (appflg) ! 149: { ! 150: unlink(tmpb); ! 151: unlink(tmpc); ! 152: } ! 153: if (chatty) ! 154: ! 155: printf ("%ld key occurrences, %d hashes, %d docs\n", ! 156: keys, hashes, docs); ! 157: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.