|
|
1.1 ! root 1: /* ! 2: * Copyright (c) 1986 Regents of the University of California. ! 3: * All rights reserved. The Berkeley software License Agreement ! 4: * specifies the terms and conditions for redistribution. ! 5: */ ! 6: ! 7: #ifndef lint ! 8: static char sccsid[] = "@(#)strfile.c 1.1 (Berkeley) 12/9/86"; ! 9: #endif not lint ! 10: ! 11: # include <stdio.h> ! 12: # include <ctype.h> ! 13: # include "strfile.h" ! 14: ! 15: /* ! 16: * This program takes a file composed of strings seperated by ! 17: * lines starting with two consecutive delimiting character (default ! 18: * character is '%') and creates another file which consists of a table ! 19: * describing the file (structure from "strfile.h"), a table of seek ! 20: * pointers to the start of the strings, and the strings, each terinated ! 21: * by a null byte. Usage: ! 22: * ! 23: * % strfile [ - ] [ -cC ] [ -sv ] [ -oir ] sourcefile [ datafile ] ! 24: * ! 25: * - - Give a usage summary useful for jogging the memory ! 26: * c - Change delimiting character from '%' to 'C' ! 27: * s - Silent. Give no summary of data processed at the end of ! 28: * the run. ! 29: * v - Verbose. Give summary of data processed. (Default) ! 30: * o - order the strings in alphabetic order ! 31: * i - if ordering, ignore case ! 32: * r - randomize the order of the strings ! 33: * ! 34: * Ken Arnold Sept. 7, 1978 -- ! 35: * ! 36: * Added method to indicate dividers. A "%-" will cause the address ! 37: * to be added to the structure in one of the pointer elements. ! 38: * ! 39: * Ken Arnold Nov., 1984 -- ! 40: * ! 41: * Added ordering options. ! 42: */ ! 43: ! 44: # define TRUE 1 ! 45: # define FALSE 0 ! 46: ! 47: # define DELIM_CH '-' ! 48: ! 49: typedef struct { ! 50: char first; ! 51: long pos; ! 52: } STR; ! 53: ! 54: char *Infile = NULL, /* input file name */ ! 55: Outfile[100] = "", /* output file name */ ! 56: Delimch = '%', /* delimiting character */ ! 57: *Usage[] = { /* usage summary */ ! 58: "usage: strfile [ - ] [ -cC ] [ -sv ] [ -oir ] inputfile [ datafile ]", ! 59: " - - Give this usage summary", ! 60: " c - Replace delimiting character with 'C'", ! 61: " s - Silent. Give no summary", ! 62: " v - Verbose. Give summary. (default)", ! 63: " o - order strings alphabetically", ! 64: " i - ignore case in ordering", ! 65: " r - randomize the order of the strings", ! 66: " Default \"datafile\" is inputfile.dat", ! 67: NULL ! 68: }; ! 69: ! 70: int Sflag = FALSE; /* silent run flag */ ! 71: int Oflag = FALSE; /* ordering flag */ ! 72: int Iflag = FALSE; /* ignore case flag */ ! 73: int Rflag = FALSE; /* randomize order flag */ ! 74: int Delim = 0; /* current delimiter number */ ! 75: ! 76: long *Seekpts; ! 77: ! 78: FILE *Sort_1, *Sort_2; /* pointers for sorting */ ! 79: ! 80: STRFILE Tbl; /* statistics table */ ! 81: ! 82: STR *Firstch; /* first chars of each string */ ! 83: ! 84: char *fgets(), *malloc(), *strcpy(), *strcat(); ! 85: ! 86: long ftell(); ! 87: ! 88: main(ac, av) ! 89: int ac; ! 90: char **av; ! 91: { ! 92: register char *sp, dc; ! 93: register long *lp; ! 94: register unsigned int curseek; /* number of strings */ ! 95: register long *seekpts, li; /* table of seek pointers */ ! 96: register FILE *inf, *outf; ! 97: register int first; ! 98: register char *nsp; ! 99: register STR *fp; ! 100: static char string[257]; ! 101: ! 102: getargs(ac, av); /* evalute arguments */ ! 103: ! 104: /* ! 105: * initial counting of input file ! 106: */ ! 107: ! 108: dc = Delimch; ! 109: if ((inf = fopen(Infile, "r")) == NULL) { ! 110: perror(Infile); ! 111: exit(-1); ! 112: } ! 113: for (curseek = 0; (sp = fgets(string, 256, inf)) != NULL; ) ! 114: if (*sp++ == dc && (*sp == dc || *sp == DELIM_CH)) ! 115: curseek++; ! 116: curseek++; ! 117: ! 118: /* ! 119: * save space at begginning of file for tables ! 120: */ ! 121: ! 122: if ((outf = fopen(Outfile, "w")) == NULL) { ! 123: perror(Outfile); ! 124: exit(-1); ! 125: } ! 126: ! 127: /* ! 128: * Allocate space for the pointers, adding one to the end so the ! 129: * length of the final string can be calculated. ! 130: */ ! 131: ++curseek; ! 132: seekpts = (long *) malloc(sizeof *seekpts * curseek); /* NOSTRICT */ ! 133: if (seekpts == NULL) { ! 134: perror("calloc"); ! 135: exit(-1); ! 136: } ! 137: if (Oflag) { ! 138: Firstch = (STR *) malloc(sizeof *Firstch * curseek); ! 139: if (Firstch == NULL) { ! 140: perror("calloc"); ! 141: exit(-1); ! 142: } ! 143: } ! 144: ! 145: (void) fseek(outf, (long) (sizeof Tbl + sizeof *seekpts * curseek), 0); ! 146: (void) fseek(inf, (long) 0, 0); /* goto start of input */ ! 147: ! 148: /* ! 149: * write the strings onto the file ! 150: */ ! 151: ! 152: Tbl.str_longlen = 0; ! 153: Tbl.str_shortlen = (unsigned int) 0xffffffff; ! 154: lp = seekpts; ! 155: first = Oflag; ! 156: *seekpts = ftell(outf); ! 157: fp = Firstch; ! 158: do { ! 159: sp = fgets(string, 256, inf); ! 160: if (sp == NULL || ! 161: (*sp == dc && (sp[1] == dc || sp[1] == DELIM_CH))) { ! 162: putc('\0', outf); ! 163: *++lp = ftell(outf); ! 164: li = ftell(outf) - lp[-1] - 1; ! 165: if (Tbl.str_longlen < li) ! 166: Tbl.str_longlen = li; ! 167: if (Tbl.str_shortlen > li) ! 168: Tbl.str_shortlen = li; ! 169: if (sp && sp[1] == DELIM_CH && Delim < MAXDELIMS) ! 170: Tbl.str_delims[Delim++] = lp - seekpts; ! 171: first = Oflag; ! 172: } ! 173: else { ! 174: if (first) { ! 175: for (nsp = sp; !isalnum(*nsp); nsp++) ! 176: continue; ! 177: if (Iflag && isupper(*nsp)) ! 178: fp->first = tolower(*nsp); ! 179: else ! 180: fp->first = *nsp; ! 181: fp->pos = *lp; ! 182: fp++; ! 183: first = FALSE; ! 184: } ! 185: fputs(sp, outf); ! 186: } ! 187: } while (sp != NULL); ! 188: ! 189: /* ! 190: * write the tables in ! 191: */ ! 192: ! 193: (void) fclose(inf); ! 194: Tbl.str_numstr = curseek - 1; ! 195: ! 196: if (Oflag) ! 197: do_order(seekpts, outf); ! 198: else if (Rflag) ! 199: randomize(seekpts); ! 200: ! 201: (void) fseek(outf, (long) 0, 0); ! 202: (void) fwrite((char *) &Tbl, sizeof Tbl, 1, outf); ! 203: (void) fwrite((char *) seekpts, sizeof *seekpts, curseek, outf); ! 204: (void) fclose(outf); ! 205: ! 206: if (!Sflag) { ! 207: printf("\"%s\" converted to \"%s\"\n", Infile, Outfile); ! 208: if (curseek == 0) ! 209: puts("There was 1 string"); ! 210: else ! 211: printf("There were %u strings\n", curseek - 1); ! 212: printf("Longest string: %u byte%s\n", Tbl.str_longlen, ! 213: Tbl.str_longlen == 1 ? "" : "s"); ! 214: printf("Shortest string: %u byte%s\n", Tbl.str_shortlen, ! 215: Tbl.str_shortlen == 1 ? "" : "s"); ! 216: } ! 217: exit(0); ! 218: } ! 219: ! 220: /* ! 221: * This routine evaluates arguments from the command line ! 222: */ ! 223: getargs(ac, av) ! 224: register int ac; ! 225: register char **av; ! 226: { ! 227: register char *sp; ! 228: register int i; ! 229: register int bad, j; ! 230: ! 231: bad = 0; ! 232: for (i = 1; i < ac; i++) ! 233: if (*av[i] == '-' && av[i][1]) { ! 234: for (sp = &av[i][1]; *sp; sp++) ! 235: switch (*sp) { ! 236: case 'c': /* new delimiting char */ ! 237: if ((Delimch = *++sp) == '\0') { ! 238: --sp; ! 239: Delimch = *av[++i]; ! 240: } ! 241: if (Delimch <= 0 || Delimch > '~' || ! 242: Delimch == DELIM_CH) { ! 243: printf("bad delimiting character: '\\%o\n'", ! 244: Delimch); ! 245: bad++; ! 246: } ! 247: break; ! 248: case 's': /* silent */ ! 249: Sflag++; ! 250: break; ! 251: case 'v': /* verbose */ ! 252: Sflag = 0; ! 253: break; ! 254: case 'o': /* order strings */ ! 255: Oflag++; ! 256: break; ! 257: case 'i': /* ignore case in ordering */ ! 258: Iflag++; ! 259: break; ! 260: case 'r': /* ignore case in ordering */ ! 261: Rflag++; ! 262: break; ! 263: default: /* unknown flag */ ! 264: bad++; ! 265: printf("bad flag: '%c'\n", *sp); ! 266: break; ! 267: } ! 268: } ! 269: else if (*av[i] == '-') { ! 270: for (j = 0; Usage[j]; j++) ! 271: puts(Usage[j]); ! 272: exit(0); ! 273: } ! 274: else if (Infile) ! 275: (void) strcpy(Outfile, av[i]); ! 276: else ! 277: Infile = av[i]; ! 278: if (!Infile) { ! 279: bad++; ! 280: puts("No input file name"); ! 281: } ! 282: if (*Outfile == '\0' && !bad) { ! 283: (void) strcpy(Outfile, Infile); ! 284: (void) strcat(Outfile, ".dat"); ! 285: } ! 286: if (bad) { ! 287: puts("use \"strfile -\" to get usage"); ! 288: exit(-1); ! 289: } ! 290: } ! 291: ! 292: /* ! 293: * do_order: ! 294: * Order the strings alphabetically (possibly ignoring case). ! 295: */ ! 296: do_order(seekpts, outf) ! 297: long *seekpts; ! 298: FILE *outf; ! 299: { ! 300: register int i; ! 301: register long *lp; ! 302: register STR *fp; ! 303: extern int cmp_str(); ! 304: ! 305: (void) fflush(outf); ! 306: Sort_1 = fopen(Outfile, "r"); ! 307: Sort_2 = fopen(Outfile, "r"); ! 308: Seekpts = seekpts; ! 309: qsort((char *) Firstch, Tbl.str_numstr, sizeof *Firstch, cmp_str); ! 310: i = Tbl.str_numstr; ! 311: lp = seekpts; ! 312: fp = Firstch; ! 313: while (i--) ! 314: *lp++ = fp++->pos; ! 315: (void) fclose(Sort_1); ! 316: (void) fclose(Sort_2); ! 317: Tbl.str_flags |= STR_ORDERED; ! 318: } ! 319: ! 320: /* ! 321: * cmp_str: ! 322: * Compare two strings in the file ! 323: */ ! 324: cmp_str(p1, p2) ! 325: STR *p1, *p2; ! 326: { ! 327: register int c1, c2; ! 328: ! 329: c1 = p1->first; ! 330: c2 = p2->first; ! 331: if (c1 != c2) ! 332: return c1 - c2; ! 333: ! 334: (void) fseek(Sort_1, p1->pos, 0); ! 335: (void) fseek(Sort_2, p2->pos, 0); ! 336: ! 337: while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0') ! 338: continue; ! 339: while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0') ! 340: continue; ! 341: ! 342: while (c1 != '\0' && c2 != '\0') { ! 343: if (Iflag) { ! 344: if (isupper(c1)) ! 345: c1 = tolower(c1); ! 346: if (isupper(c2)) ! 347: c2 = tolower(c2); ! 348: } ! 349: if (c1 != c2) ! 350: return c1 - c2; ! 351: c1 = getc(Sort_1); ! 352: c2 = getc(Sort_2); ! 353: } ! 354: return c1 - c2; ! 355: } ! 356: ! 357: /* ! 358: * randomize: ! 359: * Randomize the order of the string table. We must be careful ! 360: * not to randomize across delimiter boundaries. All ! 361: * randomization is done within each block. ! 362: */ ! 363: randomize(seekpts) ! 364: register long *seekpts; ! 365: { ! 366: register int cnt, i, j, start; ! 367: register long tmp; ! 368: register long *origsp; ! 369: ! 370: Tbl.str_flags |= STR_RANDOM; ! 371: srnd(time((long *) NULL) + getpid()); ! 372: origsp = seekpts; ! 373: for (j = 0; j <= Delim; j++) { ! 374: ! 375: /* ! 376: * get the starting place for the block ! 377: */ ! 378: ! 379: if (j == 0) ! 380: start = 0; ! 381: else ! 382: start = Tbl.str_delims[j - 1]; ! 383: ! 384: /* ! 385: * get the ending point ! 386: */ ! 387: ! 388: if (j == Delim) ! 389: cnt = Tbl.str_numstr; ! 390: else ! 391: cnt = Tbl.str_delims[j]; ! 392: ! 393: /* ! 394: * move things around randomly ! 395: */ ! 396: ! 397: for (seekpts = &origsp[start]; cnt > start; cnt--, seekpts++) { ! 398: i = rnd(cnt - start); ! 399: tmp = seekpts[0]; ! 400: seekpts[0] = seekpts[i]; ! 401: seekpts[i] = tmp; ! 402: } ! 403: } ! 404: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.