|
|
1.1 ! root 1: # include "stdio.h" ! 2: # include "assert.h" ! 3: newkeys (outf, inf, recf, nhash, fd, iflong) ! 4: FILE *outf, *inf, *recf, *fd; ! 5: int *iflong; ! 6: { ! 7: /* reads key lines from inf; hashes and writes on outf; writes orig ! 8: key on recf, records pointer on outf too. ! 9: format of outf is : hash code space record pointer ! 10: */ ! 11: ! 12: # define LINESIZ 1250 ! 13: long lp, ftell(); ! 14: long ld = 0; int ll = 0, lt = 0; ! 15: char line[LINESIZ]; ! 16: char key[30], bkeys[40]; ! 17: char *p, *s; ! 18: char *keyv[500]; ! 19: int i, nk, ndoc = 0, more = 0, c; ! 20: ! 21: lp = ftell (recf); ! 22: while (fgets(line, LINESIZ, inf)) ! 23: { ! 24: p = line; ! 25: while (*p != '\t') p++; ! 26: *p++ =0; ! 27: fputs(line, recf); ! 28: if (fd) ! 29: { ! 30: sprintf(bkeys, ";%ld", ld); ! 31: ll = strlen(p); ! 32: lt = strlen(bkeys); ! 33: fputs(bkeys, recf); ! 34: sprintf(bkeys, ",%d", ll); ! 35: lt += strlen(bkeys); ! 36: fputs(bkeys, recf); ! 37: ld += ll; ! 38: fputs(p, fd); ! 39: } ! 40: putc('\n',recf); ! 41: for(s=p; *s; s++); ! 42: if (*--s == '\n') ! 43: { ! 44: more=0; ! 45: *s=0; ! 46: } ! 47: else ! 48: more=1; ! 49: assert (fd==0 || more==0); ! 50: nk = getargs(p, keyv); ! 51: if (more) ! 52: nk--; ! 53: for(i=0; i<nk; i++) ! 54: fprintf(outf,"%04d %06ld\n",hash(keyv[i])%nhash, lp); ! 55: # if D1 ! 56: for(i=0; i<nk; i++) ! 57: printf("key %s hash %d\n",keyv[i],hash(keyv[i])%nhash); ! 58: # endif ! 59: if (more) /* allow more than LINESIZ keys */ ! 60: { ! 61: strcpy(key, keyv[nk]); ! 62: for(s=key; *s; s++); ! 63: while ( (c=getc(inf)) != '\n') ! 64: { ! 65: if (c != ' ') ! 66: { ! 67: *s++ = c; ! 68: continue; ! 69: } ! 70: *s=0; ! 71: if (s>key) ! 72: fprintf(outf, "%04d %06ld\n",hash(key)%nhash, lp); ! 73: s = key; ! 74: } ! 75: } ! 76: lp += (strlen(line)+lt+1); ! 77: ndoc++; ! 78: } ! 79: *iflong = (lp>=65536L); ! 80: fclose(recf); ! 81: return(ndoc); ! 82: } ! 83: trimnl(p) ! 84: char *p; ! 85: { ! 86: while (*p) p++; ! 87: p--; ! 88: if (*p == '\n') *p=0; ! 89: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.