|
|
1.1 ! root 1: # include <stdio.h> ! 2: # include <ctype.h> ! 3: # include "strfile.h" ! 4: ! 5: /* ! 6: * This program takes a file composed of strings seperated by ! 7: * lines starting with two consecutive delimiting character (default ! 8: * character is '%') and creates another file which consists of a table ! 9: * describing the file (structure from "strfile.h"), a table of seek ! 10: * pointers to the start of the strings, and the strings, each terinated ! 11: * by a null byte. Usage: ! 12: * ! 13: * % strfile [ - ] [ -cC ] [ -sv ] [ -oir ] sourcefile [ datafile ] ! 14: * ! 15: * - - Give a usage summary useful for jogging the memory ! 16: * c - Change delimiting character from '%' to 'C' ! 17: * s - Silent. Give no summary of data processed at the end of ! 18: * the run. ! 19: * v - Verbose. Give summary of data processed. (Default) ! 20: * o - order the strings in alphabetic order ! 21: * i - if ordering, ignore case ! 22: * r - randomize the order of the strings ! 23: * ! 24: * Ken Arnold Sept. 7, 1978 -- ! 25: * ! 26: * Added method to indicate dividers. A "%-" will cause the address ! 27: * to be added to the structure in one of the pointer elements. ! 28: * ! 29: * Ken Arnold Nov., 1984 -- ! 30: * ! 31: * Added ordering options. ! 32: */ ! 33: ! 34: # define TRUE 1 ! 35: # define FALSE 0 ! 36: ! 37: # define DELIM_CH '-' ! 38: ! 39: typedef struct { ! 40: char first; ! 41: long pos; ! 42: } STR; ! 43: ! 44: char *Infile = NULL, /* input file name */ ! 45: Outfile[100] = "", /* output file name */ ! 46: Delimch = '%', /* delimiting character */ ! 47: *Usage[] = { /* usage summary */ ! 48: "usage: strfile [ - ] [ -cC ] [ -sv ] [ -oir ] inputfile [ datafile ]", ! 49: " - - Give this usage summary", ! 50: " c - Replace delimiting character with 'C'", ! 51: " s - Silent. Give no summary", ! 52: " v - Verbose. Give summary. (default)", ! 53: " o - order strings alphabetically", ! 54: " i - ignore case in ordering", ! 55: " r - randomize the order of the strings", ! 56: " Default \"datafile\" is inputfile.dat", ! 57: NULL ! 58: }; ! 59: ! 60: int Sflag = FALSE; /* silent run flag */ ! 61: int Oflag = FALSE; /* ordering flag */ ! 62: int Iflag = FALSE; /* ignore case flag */ ! 63: int Rflag = FALSE; /* randomize order flag */ ! 64: int Delim = 0; /* current delimiter number */ ! 65: ! 66: long *Seekpts; ! 67: ! 68: FILE *Sort_1, *Sort_2; /* pointers for sorting */ ! 69: ! 70: STRFILE Tbl; /* statistics table */ ! 71: ! 72: STR *Firstch; /* first chars of each string */ ! 73: ! 74: char *fgets(), *malloc(), *strcpy(), *strcat(); ! 75: ! 76: long ftell(); ! 77: ! 78: main(ac, av) ! 79: int ac; ! 80: char **av; ! 81: { ! 82: register char *sp, dc; ! 83: register long *lp; ! 84: register unsigned int curseek; /* number of strings */ ! 85: register long *seekpts, li; /* table of seek pointers */ ! 86: register FILE *inf, *outf; ! 87: register int first; ! 88: register char *nsp; ! 89: register STR *fp; ! 90: static char string[257]; ! 91: ! 92: getargs(ac, av); /* evalute arguments */ ! 93: ! 94: /* ! 95: * initial counting of input file ! 96: */ ! 97: ! 98: dc = Delimch; ! 99: if ((inf = fopen(Infile, "r")) == NULL) { ! 100: perror(Infile); ! 101: exit(-1); ! 102: } ! 103: for (curseek = 0; (sp = fgets(string, 256, inf)) != NULL; ) ! 104: if (*sp++ == dc && (*sp == dc || *sp == DELIM_CH)) ! 105: curseek++; ! 106: curseek++; ! 107: ! 108: /* ! 109: * save space at begginning of file for tables ! 110: */ ! 111: ! 112: if ((outf = fopen(Outfile, "w")) == NULL) { ! 113: perror(Outfile); ! 114: exit(-1); ! 115: } ! 116: ! 117: /* ! 118: * Allocate space for the pointers, adding one to the end so the ! 119: * length of the final string can be calculated. ! 120: */ ! 121: ++curseek; ! 122: seekpts = (long *) malloc(sizeof *seekpts * curseek); /* NOSTRICT */ ! 123: if (seekpts == NULL) { ! 124: perror("calloc"); ! 125: exit(-1); ! 126: } ! 127: if (Oflag) { ! 128: Firstch = (STR *) malloc(sizeof *Firstch * curseek); ! 129: if (Firstch == NULL) { ! 130: perror("calloc"); ! 131: exit(-1); ! 132: } ! 133: } ! 134: ! 135: (void) fseek(outf, (long) (sizeof Tbl + sizeof *seekpts * curseek), 0); ! 136: (void) fseek(inf, (long) 0, 0); /* goto start of input */ ! 137: ! 138: /* ! 139: * write the strings onto the file ! 140: */ ! 141: ! 142: Tbl.str_longlen = 0; ! 143: Tbl.str_shortlen = (unsigned int) 0xffffffff; ! 144: lp = seekpts; ! 145: first = Oflag; ! 146: *seekpts = ftell(outf); ! 147: fp = Firstch; ! 148: do { ! 149: sp = fgets(string, 256, inf); ! 150: if (sp == NULL || ! 151: (*sp == dc && (sp[1] == dc || sp[1] == DELIM_CH))) { ! 152: putc('\0', outf); ! 153: *++lp = ftell(outf); ! 154: li = ftell(outf) - lp[-1] - 1; ! 155: if (Tbl.str_longlen < li) ! 156: Tbl.str_longlen = li; ! 157: if (Tbl.str_shortlen > li) ! 158: Tbl.str_shortlen = li; ! 159: if (sp && sp[1] == DELIM_CH && Delim < MAXDELIMS) ! 160: Tbl.str_delims[Delim++] = lp - seekpts; ! 161: first = Oflag; ! 162: } ! 163: else { ! 164: if (first) { ! 165: for (nsp = sp; !isalnum(*nsp); nsp++) ! 166: continue; ! 167: if (Iflag && isupper(*nsp)) ! 168: fp->first = tolower(*nsp); ! 169: else ! 170: fp->first = *nsp; ! 171: fp->pos = *lp; ! 172: fp++; ! 173: first = FALSE; ! 174: } ! 175: fputs(sp, outf); ! 176: } ! 177: } while (sp != NULL); ! 178: ! 179: /* ! 180: * write the tables in ! 181: */ ! 182: ! 183: (void) fclose(inf); ! 184: Tbl.str_numstr = curseek - 1; ! 185: ! 186: if (Oflag) ! 187: do_order(seekpts, outf); ! 188: else if (Rflag) ! 189: randomize(seekpts); ! 190: ! 191: (void) fseek(outf, (long) 0, 0); ! 192: (void) fwrite((char *) &Tbl, sizeof Tbl, 1, outf); ! 193: (void) fwrite((char *) seekpts, sizeof *seekpts, curseek, outf); ! 194: (void) fclose(outf); ! 195: ! 196: if (!Sflag) { ! 197: printf("\"%s\" converted to \"%s\"\n", Infile, Outfile); ! 198: if (curseek == 0) ! 199: puts("There was 1 string"); ! 200: else ! 201: printf("There were %u strings\n", curseek - 1); ! 202: printf("Longest string: %u byte%s\n", Tbl.str_longlen, ! 203: Tbl.str_longlen == 1 ? "" : "s"); ! 204: printf("Shortest string: %u byte%s\n", Tbl.str_shortlen, ! 205: Tbl.str_shortlen == 1 ? "" : "s"); ! 206: } ! 207: exit(0); ! 208: } ! 209: ! 210: /* ! 211: * This routine evaluates arguments from the command line ! 212: */ ! 213: getargs(ac, av) ! 214: register int ac; ! 215: register char **av; ! 216: { ! 217: register char *sp; ! 218: register int i; ! 219: register int bad, j; ! 220: ! 221: bad = 0; ! 222: for (i = 1; i < ac; i++) ! 223: if (*av[i] == '-' && av[i][1]) { ! 224: for (sp = &av[i][1]; *sp; sp++) ! 225: switch (*sp) { ! 226: case 'c': /* new delimiting char */ ! 227: if ((Delimch = *++sp) == '\0') { ! 228: --sp; ! 229: Delimch = *av[++i]; ! 230: } ! 231: if (Delimch <= 0 || Delimch > '~' || ! 232: Delimch == DELIM_CH) { ! 233: printf("bad delimiting character: '\\%o\n'", ! 234: Delimch); ! 235: bad++; ! 236: } ! 237: break; ! 238: case 's': /* silent */ ! 239: Sflag++; ! 240: break; ! 241: case 'v': /* verbose */ ! 242: Sflag = 0; ! 243: break; ! 244: case 'o': /* order strings */ ! 245: Oflag++; ! 246: break; ! 247: case 'i': /* ignore case in ordering */ ! 248: Iflag++; ! 249: break; ! 250: case 'r': /* ignore case in ordering */ ! 251: Rflag++; ! 252: break; ! 253: default: /* unknown flag */ ! 254: bad++; ! 255: printf("bad flag: '%c'\n", *sp); ! 256: break; ! 257: } ! 258: } ! 259: else if (*av[i] == '-') { ! 260: for (j = 0; Usage[j]; j++) ! 261: puts(Usage[j]); ! 262: exit(0); ! 263: } ! 264: else if (Infile) ! 265: (void) strcpy(Outfile, av[i]); ! 266: else ! 267: Infile = av[i]; ! 268: if (!Infile) { ! 269: bad++; ! 270: puts("No input file name"); ! 271: } ! 272: if (*Outfile == '\0' && !bad) { ! 273: (void) strcpy(Outfile, Infile); ! 274: (void) strcat(Outfile, ".dat"); ! 275: } ! 276: if (bad) { ! 277: puts("use \"strfile -\" to get usage"); ! 278: exit(-1); ! 279: } ! 280: } ! 281: ! 282: /* ! 283: * do_order: ! 284: * Order the strings alphabetically (possibly ignoring case). ! 285: */ ! 286: do_order(seekpts, outf) ! 287: long *seekpts; ! 288: FILE *outf; ! 289: { ! 290: register int i; ! 291: register long *lp; ! 292: register STR *fp; ! 293: extern int cmp_str(); ! 294: ! 295: (void) fflush(outf); ! 296: Sort_1 = fopen(Outfile, "r"); ! 297: Sort_2 = fopen(Outfile, "r"); ! 298: Seekpts = seekpts; ! 299: qsort((char *) Firstch, Tbl.str_numstr, sizeof *Firstch, cmp_str); ! 300: i = Tbl.str_numstr; ! 301: lp = seekpts; ! 302: fp = Firstch; ! 303: while (i--) ! 304: *lp++ = fp++->pos; ! 305: (void) fclose(Sort_1); ! 306: (void) fclose(Sort_2); ! 307: Tbl.str_flags |= STR_ORDERED; ! 308: } ! 309: ! 310: /* ! 311: * cmp_str: ! 312: * Compare two strings in the file ! 313: */ ! 314: cmp_str(p1, p2) ! 315: STR *p1, *p2; ! 316: { ! 317: register int c1, c2; ! 318: ! 319: c1 = p1->first; ! 320: c2 = p2->first; ! 321: if (c1 != c2) ! 322: return c1 - c2; ! 323: ! 324: (void) fseek(Sort_1, p1->pos, 0); ! 325: (void) fseek(Sort_2, p2->pos, 0); ! 326: ! 327: while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0') ! 328: continue; ! 329: while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0') ! 330: continue; ! 331: ! 332: while (c1 != '\0' && c2 != '\0') { ! 333: if (Iflag) { ! 334: if (isupper(c1)) ! 335: c1 = tolower(c1); ! 336: if (isupper(c2)) ! 337: c2 = tolower(c2); ! 338: } ! 339: if (c1 != c2) ! 340: return c1 - c2; ! 341: c1 = getc(Sort_1); ! 342: c2 = getc(Sort_2); ! 343: } ! 344: return c1 - c2; ! 345: } ! 346: ! 347: /* ! 348: * randomize: ! 349: * Randomize the order of the string table. We must be careful ! 350: * not to randomize across delimiter boundaries. All ! 351: * randomization is done within each block. ! 352: */ ! 353: randomize(seekpts) ! 354: register long *seekpts; ! 355: { ! 356: register int cnt, i, j, start; ! 357: register long tmp; ! 358: register long *origsp; ! 359: ! 360: Tbl.str_flags |= STR_RANDOM; ! 361: srnd(time((long *) NULL) + getpid()); ! 362: origsp = seekpts; ! 363: for (j = 0; j <= Delim; j++) { ! 364: ! 365: /* ! 366: * get the starting place for the block ! 367: */ ! 368: ! 369: if (j == 0) ! 370: start = 0; ! 371: else ! 372: start = Tbl.str_delims[j - 1]; ! 373: ! 374: /* ! 375: * get the ending point ! 376: */ ! 377: ! 378: if (j == Delim) ! 379: cnt = Tbl.str_numstr; ! 380: else ! 381: cnt = Tbl.str_delims[j]; ! 382: ! 383: /* ! 384: * move things around randomly ! 385: */ ! 386: ! 387: for (seekpts = &origsp[start]; cnt > start; cnt--, seekpts++) { ! 388: i = rnd(cnt - start); ! 389: tmp = seekpts[0]; ! 390: seekpts[0] = seekpts[i]; ! 391: seekpts[i] = tmp; ! 392: } ! 393: } ! 394: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.