|
|
1.1 ! root 1: /* ! 2: * Copyright (c) 1989 The Regents of the University of California. ! 3: * All rights reserved. ! 4: * ! 5: * This code is derived from software contributed to Berkeley by ! 6: * Ken Arnold. ! 7: * ! 8: * Redistribution and use in source and binary forms are permitted ! 9: * provided that the above copyright notice and this paragraph are ! 10: * duplicated in all such forms and that any documentation, ! 11: * advertising materials, and other materials related to such ! 12: * distribution and use acknowledge that the software was developed ! 13: * by the University of California, Berkeley. The name of the ! 14: * University may not be used to endorse or promote products derived ! 15: * from this software without specific prior written permission. ! 16: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR ! 17: * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED ! 18: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. ! 19: */ ! 20: ! 21: #ifndef lint ! 22: char copyright[] = ! 23: "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ ! 24: All rights reserved.\n"; ! 25: #endif /* not lint */ ! 26: ! 27: #ifndef lint ! 28: static char sccsid[] = "@(#)strfile.c 5.11 (Berkeley) 12/15/89"; ! 29: #endif /* not lint */ ! 30: ! 31: # include <machine/endian.h> ! 32: # include <sys/param.h> ! 33: # include <stdio.h> ! 34: # include <ctype.h> ! 35: # include "strfile.h" ! 36: ! 37: # ifndef MAXPATHLEN ! 38: # define MAXPATHLEN 1024 ! 39: # endif /* MAXPATHLEN */ ! 40: ! 41: /* ! 42: * This program takes a file composed of strings seperated by ! 43: * lines starting with two consecutive delimiting character (default ! 44: * character is '%') and creates another file which consists of a table ! 45: * describing the file (structure from "strfile.h"), a table of seek ! 46: * pointers to the start of the strings, and the strings, each terminated ! 47: * by a null byte. Usage: ! 48: * ! 49: * % strfile [-iorsx] [ -cC ] sourcefile [ datafile ] ! 50: * ! 51: * c - Change delimiting character from '%' to 'C' ! 52: * s - Silent. Give no summary of data processed at the end of ! 53: * the run. ! 54: * o - order the strings in alphabetic order ! 55: * i - if ordering, ignore case ! 56: * r - randomize the order of the strings ! 57: * x - set rotated bit ! 58: * ! 59: * Ken Arnold Sept. 7, 1978 -- ! 60: * ! 61: * Added ordering options. ! 62: */ ! 63: ! 64: # define TRUE 1 ! 65: # define FALSE 0 ! 66: ! 67: # define STORING_PTRS (Oflag || Rflag) ! 68: # define CHUNKSIZE 512 ! 69: ! 70: #ifdef lint ! 71: # define ALWAYS atoi("1") ! 72: #else ! 73: # define ALWAYS 1 ! 74: #endif ! 75: # define ALLOC(ptr,sz) if (ALWAYS) { \ ! 76: if (ptr == NULL) \ ! 77: ptr = malloc((unsigned int) (CHUNKSIZE * sizeof *ptr)); \ ! 78: else if (((sz) + 1) % CHUNKSIZE == 0) \ ! 79: ptr = realloc((void *) ptr, ((unsigned int) ((sz) + CHUNKSIZE) * sizeof *ptr)); \ ! 80: if (ptr == NULL) { \ ! 81: fprintf(stderr, "out of space\n"); \ ! 82: exit(1); \ ! 83: } \ ! 84: } else ! 85: ! 86: #ifdef NO_VOID ! 87: # define void char ! 88: #endif ! 89: ! 90: typedef struct { ! 91: char first; ! 92: off_t pos; ! 93: } STR; ! 94: ! 95: char *Infile = NULL, /* input file name */ ! 96: Outfile[MAXPATHLEN] = "", /* output file name */ ! 97: Delimch = '%'; /* delimiting character */ ! 98: ! 99: int Sflag = FALSE; /* silent run flag */ ! 100: int Oflag = FALSE; /* ordering flag */ ! 101: int Iflag = FALSE; /* ignore case flag */ ! 102: int Rflag = FALSE; /* randomize order flag */ ! 103: int Xflag = FALSE; /* set rotated bit */ ! 104: long Num_pts = 0; /* number of pointers/strings */ ! 105: ! 106: off_t *Seekpts; ! 107: ! 108: FILE *Sort_1, *Sort_2; /* pointers for sorting */ ! 109: ! 110: STRFILE Tbl; /* statistics table */ ! 111: ! 112: STR *Firstch; /* first chars of each string */ ! 113: ! 114: char *fgets(), *strcpy(), *strcat(); ! 115: ! 116: void *malloc(), *realloc(); ! 117: ! 118: /* ! 119: * main: ! 120: * Drive the sucker. There are two main modes -- either we store ! 121: * the seek pointers, if the table is to be sorted or randomized, ! 122: * or we write the pointer directly to the file, if we are to stay ! 123: * in file order. If the former, we allocate and re-allocate in ! 124: * CHUNKSIZE blocks; if the latter, we just write each pointer, ! 125: * and then seek back to the beginning to write in the table. ! 126: */ ! 127: main(ac, av) ! 128: int ac; ! 129: char **av; ! 130: { ! 131: register char *sp, dc; ! 132: register FILE *inf, *outf; ! 133: register off_t last_off, length, pos, *p; ! 134: register int first, cnt; ! 135: register char *nsp; ! 136: register STR *fp; ! 137: static char string[257]; ! 138: ! 139: getargs(ac, av); /* evalute arguments */ ! 140: dc = Delimch; ! 141: if ((inf = fopen(Infile, "r")) == NULL) { ! 142: perror(Infile); ! 143: exit(1); ! 144: } ! 145: ! 146: if ((outf = fopen(Outfile, "w")) == NULL) { ! 147: perror(Outfile); ! 148: exit(1); ! 149: } ! 150: if (!STORING_PTRS) ! 151: (void) fseek(outf, sizeof Tbl, 0); ! 152: ! 153: /* ! 154: * Write the strings onto the file ! 155: */ ! 156: ! 157: Tbl.str_longlen = 0; ! 158: Tbl.str_shortlen = (unsigned int) 0xffffffff; ! 159: Tbl.str_delim = dc; ! 160: Tbl.str_version = VERSION; ! 161: first = Oflag; ! 162: add_offset(outf, ftell(inf)); ! 163: last_off = 0; ! 164: do { ! 165: sp = fgets(string, 256, inf); ! 166: if (sp == NULL || sp[0] == dc && sp[1] == '\n') { ! 167: pos = ftell(inf); ! 168: length = pos - last_off - (sp ? strlen(sp) : 0); ! 169: last_off = pos; ! 170: if (!length) ! 171: continue; ! 172: add_offset(outf, pos); ! 173: if (Tbl.str_longlen < length) ! 174: Tbl.str_longlen = length; ! 175: if (Tbl.str_shortlen > length) ! 176: Tbl.str_shortlen = length; ! 177: first = Oflag; ! 178: } ! 179: else if (first) { ! 180: for (nsp = sp; !isalnum(*nsp); nsp++) ! 181: continue; ! 182: ALLOC(Firstch, Num_pts); ! 183: fp = &Firstch[Num_pts - 1]; ! 184: if (Iflag && isupper(*nsp)) ! 185: fp->first = tolower(*nsp); ! 186: else ! 187: fp->first = *nsp; ! 188: fp->pos = Seekpts[Num_pts - 1]; ! 189: first = FALSE; ! 190: } ! 191: } while (sp != NULL); ! 192: ! 193: /* ! 194: * write the tables in ! 195: */ ! 196: ! 197: (void) fclose(inf); ! 198: ! 199: if (Oflag) ! 200: do_order(); ! 201: else if (Rflag) ! 202: randomize(); ! 203: ! 204: if (Xflag) ! 205: Tbl.str_flags |= STR_ROTATED; ! 206: ! 207: if (!Sflag) { ! 208: printf("\"%s\" created\n", Outfile); ! 209: if (Num_pts == 2) ! 210: puts("There was 1 string"); ! 211: else ! 212: printf("There were %d strings\n", Num_pts - 1); ! 213: printf("Longest string: %lu byte%s\n", Tbl.str_longlen, ! 214: Tbl.str_longlen == 1 ? "" : "s"); ! 215: printf("Shortest string: %lu byte%s\n", Tbl.str_shortlen, ! 216: Tbl.str_shortlen == 1 ? "" : "s"); ! 217: } ! 218: ! 219: (void) fseek(outf, (off_t) 0, 0); ! 220: Tbl.str_version = htonl(Tbl.str_version); ! 221: Tbl.str_numstr = htonl(Num_pts - 1); ! 222: Tbl.str_longlen = htonl(Tbl.str_longlen); ! 223: Tbl.str_shortlen = htonl(Tbl.str_shortlen); ! 224: Tbl.str_flags = htonl(Tbl.str_flags); ! 225: (void) fwrite((char *) &Tbl, sizeof Tbl, 1, outf); ! 226: if (STORING_PTRS) { ! 227: for (p = Seekpts, cnt = Num_pts; cnt--; ++p) ! 228: *p = htonl(*p); ! 229: (void) fwrite((char *) Seekpts, sizeof *Seekpts, (int) Num_pts, outf); ! 230: } ! 231: (void) fclose(outf); ! 232: exit(0); ! 233: } ! 234: ! 235: /* ! 236: * This routine evaluates arguments from the command line ! 237: */ ! 238: getargs(argc, argv) ! 239: int argc; ! 240: char **argv; ! 241: { ! 242: extern char *optarg; ! 243: extern int optind; ! 244: int ch; ! 245: ! 246: while ((ch = getopt(argc, argv, "c:iorsx")) != EOF) ! 247: switch(ch) { ! 248: case 'c': /* new delimiting char */ ! 249: Delimch = *optarg; ! 250: if (!isascii(Delimch)) { ! 251: printf("bad delimiting character: '\\%o\n'", ! 252: Delimch); ! 253: } ! 254: break; ! 255: case 'i': /* ignore case in ordering */ ! 256: Iflag++; ! 257: break; ! 258: case 'o': /* order strings */ ! 259: Oflag++; ! 260: break; ! 261: case 'r': /* randomize pointers */ ! 262: Rflag++; ! 263: break; ! 264: case 's': /* silent */ ! 265: Sflag++; ! 266: break; ! 267: case 'x': /* set the rotated bit */ ! 268: Xflag++; ! 269: break; ! 270: case '?': ! 271: default: ! 272: usage(); ! 273: } ! 274: argv += optind; ! 275: ! 276: if (*argv) { ! 277: Infile = *argv; ! 278: if (*++argv) ! 279: (void) strcpy(Outfile, *argv); ! 280: } ! 281: if (!Infile) { ! 282: puts("No input file name"); ! 283: usage(); ! 284: } ! 285: if (*Outfile == '\0') { ! 286: (void) strcpy(Outfile, Infile); ! 287: (void) strcat(Outfile, ".dat"); ! 288: } ! 289: } ! 290: ! 291: usage() ! 292: { ! 293: (void) fprintf(stderr, ! 294: "strfile [-iorsx] [-c char] sourcefile [datafile]\n"); ! 295: exit(1); ! 296: } ! 297: ! 298: /* ! 299: * add_offset: ! 300: * Add an offset to the list, or write it out, as appropriate. ! 301: */ ! 302: add_offset(fp, off) ! 303: FILE *fp; ! 304: off_t off; ! 305: { ! 306: off_t net; ! 307: ! 308: if (!STORING_PTRS) { ! 309: net = htonl(off); ! 310: fwrite(&net, 1, sizeof net, fp); ! 311: } else { ! 312: ALLOC(Seekpts, Num_pts + 1); ! 313: Seekpts[Num_pts] = off; ! 314: } ! 315: Num_pts++; ! 316: } ! 317: ! 318: /* ! 319: * do_order: ! 320: * Order the strings alphabetically (possibly ignoring case). ! 321: */ ! 322: do_order() ! 323: { ! 324: register int i; ! 325: register off_t *lp; ! 326: register STR *fp; ! 327: extern int cmp_str(); ! 328: ! 329: Sort_1 = fopen(Infile, "r"); ! 330: Sort_2 = fopen(Infile, "r"); ! 331: qsort((char *) Firstch, (int) Tbl.str_numstr, sizeof *Firstch, cmp_str); ! 332: i = Tbl.str_numstr; ! 333: lp = Seekpts; ! 334: fp = Firstch; ! 335: while (i--) ! 336: *lp++ = fp++->pos; ! 337: (void) fclose(Sort_1); ! 338: (void) fclose(Sort_2); ! 339: Tbl.str_flags |= STR_ORDERED; ! 340: } ! 341: ! 342: /* ! 343: * cmp_str: ! 344: * Compare two strings in the file ! 345: */ ! 346: char * ! 347: unctrl(c) ! 348: char c; ! 349: { ! 350: static char buf[3]; ! 351: ! 352: if (isprint(c)) { ! 353: buf[0] = c; ! 354: buf[1] = '\0'; ! 355: } ! 356: else if (c == 0177) { ! 357: buf[0] = '^'; ! 358: buf[1] = '?'; ! 359: } ! 360: else { ! 361: buf[0] = '^'; ! 362: buf[1] = c + 'A' - 1; ! 363: } ! 364: return buf; ! 365: } ! 366: ! 367: cmp_str(p1, p2) ! 368: STR *p1, *p2; ! 369: { ! 370: register int c1, c2; ! 371: register int n1, n2; ! 372: ! 373: # define SET_N(nf,ch) (nf = (ch == '\n')) ! 374: # define IS_END(ch,nf) (ch == Delimch && nf) ! 375: ! 376: c1 = p1->first; ! 377: c2 = p2->first; ! 378: if (c1 != c2) ! 379: return c1 - c2; ! 380: ! 381: (void) fseek(Sort_1, p1->pos, 0); ! 382: (void) fseek(Sort_2, p2->pos, 0); ! 383: ! 384: n1 = FALSE; ! 385: n2 = FALSE; ! 386: while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0') ! 387: SET_N(n1, c1); ! 388: while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0') ! 389: SET_N(n2, c2); ! 390: ! 391: while (!IS_END(c1, n1) && !IS_END(c2, n2)) { ! 392: if (Iflag) { ! 393: if (isupper(c1)) ! 394: c1 = tolower(c1); ! 395: if (isupper(c2)) ! 396: c2 = tolower(c2); ! 397: } ! 398: if (c1 != c2) ! 399: return c1 - c2; ! 400: SET_N(n1, c1); ! 401: SET_N(n2, c2); ! 402: c1 = getc(Sort_1); ! 403: c2 = getc(Sort_2); ! 404: } ! 405: if (IS_END(c1, n1)) ! 406: c1 = 0; ! 407: if (IS_END(c2, n2)) ! 408: c2 = 0; ! 409: return c1 - c2; ! 410: } ! 411: ! 412: /* ! 413: * randomize: ! 414: * Randomize the order of the string table. We must be careful ! 415: * not to randomize across delimiter boundaries. All ! 416: * randomization is done within each block. ! 417: */ ! 418: randomize() ! 419: { ! 420: register int cnt, i; ! 421: register off_t tmp; ! 422: register off_t *sp; ! 423: extern time_t time(); ! 424: ! 425: srandom((int)(time((time_t *) NULL) + getpid())); ! 426: ! 427: Tbl.str_flags |= STR_RANDOM; ! 428: cnt = Tbl.str_numstr; ! 429: ! 430: /* ! 431: * move things around randomly ! 432: */ ! 433: ! 434: for (sp = Seekpts; cnt > 0; cnt--, sp++) { ! 435: i = random() % cnt; ! 436: tmp = sp[0]; ! 437: sp[0] = sp[i]; ! 438: sp[i] = tmp; ! 439: } ! 440: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.