|
|
1.1 root 1: /*
2: * Copyright (c) 1986 Regents of the University of California.
3: * All rights reserved. The Berkeley software License Agreement
4: * specifies the terms and conditions for redistribution.
5: */
6:
7: #ifndef lint
8: static char sccsid[] = "@(#)strfile.c 1.1 (Berkeley) 12/9/86";
9: #endif not lint
10:
11: # include <stdio.h>
12: # include <ctype.h>
13: # include "strfile.h"
14:
15: /*
16: * This program takes a file composed of strings seperated by
17: * lines starting with two consecutive delimiting character (default
18: * character is '%') and creates another file which consists of a table
19: * describing the file (structure from "strfile.h"), a table of seek
20: * pointers to the start of the strings, and the strings, each terinated
21: * by a null byte. Usage:
22: *
23: * % strfile [ - ] [ -cC ] [ -sv ] [ -oir ] sourcefile [ datafile ]
24: *
25: * - - Give a usage summary useful for jogging the memory
26: * c - Change delimiting character from '%' to 'C'
27: * s - Silent. Give no summary of data processed at the end of
28: * the run.
29: * v - Verbose. Give summary of data processed. (Default)
30: * o - order the strings in alphabetic order
31: * i - if ordering, ignore case
32: * r - randomize the order of the strings
33: *
34: * Ken Arnold Sept. 7, 1978 --
35: *
36: * Added method to indicate dividers. A "%-" will cause the address
37: * to be added to the structure in one of the pointer elements.
38: *
39: * Ken Arnold Nov., 1984 --
40: *
41: * Added ordering options.
42: */
43:
44: # define TRUE 1
45: # define FALSE 0
46:
47: # define DELIM_CH '-'
48:
49: typedef struct {
50: char first;
51: long pos;
52: } STR;
53:
54: char *Infile = NULL, /* input file name */
55: Outfile[100] = "", /* output file name */
56: Delimch = '%', /* delimiting character */
57: *Usage[] = { /* usage summary */
58: "usage: strfile [ - ] [ -cC ] [ -sv ] [ -oir ] inputfile [ datafile ]",
59: " - - Give this usage summary",
60: " c - Replace delimiting character with 'C'",
61: " s - Silent. Give no summary",
62: " v - Verbose. Give summary. (default)",
63: " o - order strings alphabetically",
64: " i - ignore case in ordering",
65: " r - randomize the order of the strings",
66: " Default \"datafile\" is inputfile.dat",
67: NULL
68: };
69:
70: int Sflag = FALSE; /* silent run flag */
71: int Oflag = FALSE; /* ordering flag */
72: int Iflag = FALSE; /* ignore case flag */
73: int Rflag = FALSE; /* randomize order flag */
74: int Delim = 0; /* current delimiter number */
75:
76: long *Seekpts;
77:
78: FILE *Sort_1, *Sort_2; /* pointers for sorting */
79:
80: STRFILE Tbl; /* statistics table */
81:
82: STR *Firstch; /* first chars of each string */
83:
84: char *fgets(), *malloc(), *strcpy(), *strcat();
85:
86: long ftell();
87:
88: main(ac, av)
89: int ac;
90: char **av;
91: {
92: register char *sp, dc;
93: register long *lp;
94: register unsigned int curseek; /* number of strings */
95: register long *seekpts, li; /* table of seek pointers */
96: register FILE *inf, *outf;
97: register int first;
98: register char *nsp;
99: register STR *fp;
100: static char string[257];
101:
102: getargs(ac, av); /* evalute arguments */
103:
104: /*
105: * initial counting of input file
106: */
107:
108: dc = Delimch;
109: if ((inf = fopen(Infile, "r")) == NULL) {
110: perror(Infile);
111: exit(-1);
112: }
113: for (curseek = 0; (sp = fgets(string, 256, inf)) != NULL; )
114: if (*sp++ == dc && (*sp == dc || *sp == DELIM_CH))
115: curseek++;
116: curseek++;
117:
118: /*
119: * save space at begginning of file for tables
120: */
121:
122: if ((outf = fopen(Outfile, "w")) == NULL) {
123: perror(Outfile);
124: exit(-1);
125: }
126:
127: /*
128: * Allocate space for the pointers, adding one to the end so the
129: * length of the final string can be calculated.
130: */
131: ++curseek;
132: seekpts = (long *) malloc(sizeof *seekpts * curseek); /* NOSTRICT */
133: if (seekpts == NULL) {
134: perror("calloc");
135: exit(-1);
136: }
137: if (Oflag) {
138: Firstch = (STR *) malloc(sizeof *Firstch * curseek);
139: if (Firstch == NULL) {
140: perror("calloc");
141: exit(-1);
142: }
143: }
144:
145: (void) fseek(outf, (long) (sizeof Tbl + sizeof *seekpts * curseek), 0);
146: (void) fseek(inf, (long) 0, 0); /* goto start of input */
147:
148: /*
149: * write the strings onto the file
150: */
151:
152: Tbl.str_longlen = 0;
153: Tbl.str_shortlen = (unsigned int) 0xffffffff;
154: lp = seekpts;
155: first = Oflag;
156: *seekpts = ftell(outf);
157: fp = Firstch;
158: do {
159: sp = fgets(string, 256, inf);
160: if (sp == NULL ||
161: (*sp == dc && (sp[1] == dc || sp[1] == DELIM_CH))) {
162: putc('\0', outf);
163: *++lp = ftell(outf);
164: li = ftell(outf) - lp[-1] - 1;
165: if (Tbl.str_longlen < li)
166: Tbl.str_longlen = li;
167: if (Tbl.str_shortlen > li)
168: Tbl.str_shortlen = li;
169: if (sp && sp[1] == DELIM_CH && Delim < MAXDELIMS)
170: Tbl.str_delims[Delim++] = lp - seekpts;
171: first = Oflag;
172: }
173: else {
174: if (first) {
175: for (nsp = sp; !isalnum(*nsp); nsp++)
176: continue;
177: if (Iflag && isupper(*nsp))
178: fp->first = tolower(*nsp);
179: else
180: fp->first = *nsp;
181: fp->pos = *lp;
182: fp++;
183: first = FALSE;
184: }
185: fputs(sp, outf);
186: }
187: } while (sp != NULL);
188:
189: /*
190: * write the tables in
191: */
192:
193: (void) fclose(inf);
194: Tbl.str_numstr = curseek - 1;
195:
196: if (Oflag)
197: do_order(seekpts, outf);
198: else if (Rflag)
199: randomize(seekpts);
200:
201: (void) fseek(outf, (long) 0, 0);
202: (void) fwrite((char *) &Tbl, sizeof Tbl, 1, outf);
203: (void) fwrite((char *) seekpts, sizeof *seekpts, curseek, outf);
204: (void) fclose(outf);
205:
206: if (!Sflag) {
207: printf("\"%s\" converted to \"%s\"\n", Infile, Outfile);
208: if (curseek == 0)
209: puts("There was 1 string");
210: else
211: printf("There were %u strings\n", curseek - 1);
212: printf("Longest string: %u byte%s\n", Tbl.str_longlen,
213: Tbl.str_longlen == 1 ? "" : "s");
214: printf("Shortest string: %u byte%s\n", Tbl.str_shortlen,
215: Tbl.str_shortlen == 1 ? "" : "s");
216: }
217: exit(0);
218: }
219:
220: /*
221: * This routine evaluates arguments from the command line
222: */
223: getargs(ac, av)
224: register int ac;
225: register char **av;
226: {
227: register char *sp;
228: register int i;
229: register int bad, j;
230:
231: bad = 0;
232: for (i = 1; i < ac; i++)
233: if (*av[i] == '-' && av[i][1]) {
234: for (sp = &av[i][1]; *sp; sp++)
235: switch (*sp) {
236: case 'c': /* new delimiting char */
237: if ((Delimch = *++sp) == '\0') {
238: --sp;
239: Delimch = *av[++i];
240: }
241: if (Delimch <= 0 || Delimch > '~' ||
242: Delimch == DELIM_CH) {
243: printf("bad delimiting character: '\\%o\n'",
244: Delimch);
245: bad++;
246: }
247: break;
248: case 's': /* silent */
249: Sflag++;
250: break;
251: case 'v': /* verbose */
252: Sflag = 0;
253: break;
254: case 'o': /* order strings */
255: Oflag++;
256: break;
257: case 'i': /* ignore case in ordering */
258: Iflag++;
259: break;
260: case 'r': /* ignore case in ordering */
261: Rflag++;
262: break;
263: default: /* unknown flag */
264: bad++;
265: printf("bad flag: '%c'\n", *sp);
266: break;
267: }
268: }
269: else if (*av[i] == '-') {
270: for (j = 0; Usage[j]; j++)
271: puts(Usage[j]);
272: exit(0);
273: }
274: else if (Infile)
275: (void) strcpy(Outfile, av[i]);
276: else
277: Infile = av[i];
278: if (!Infile) {
279: bad++;
280: puts("No input file name");
281: }
282: if (*Outfile == '\0' && !bad) {
283: (void) strcpy(Outfile, Infile);
284: (void) strcat(Outfile, ".dat");
285: }
286: if (bad) {
287: puts("use \"strfile -\" to get usage");
288: exit(-1);
289: }
290: }
291:
292: /*
293: * do_order:
294: * Order the strings alphabetically (possibly ignoring case).
295: */
296: do_order(seekpts, outf)
297: long *seekpts;
298: FILE *outf;
299: {
300: register int i;
301: register long *lp;
302: register STR *fp;
303: extern int cmp_str();
304:
305: (void) fflush(outf);
306: Sort_1 = fopen(Outfile, "r");
307: Sort_2 = fopen(Outfile, "r");
308: Seekpts = seekpts;
309: qsort((char *) Firstch, Tbl.str_numstr, sizeof *Firstch, cmp_str);
310: i = Tbl.str_numstr;
311: lp = seekpts;
312: fp = Firstch;
313: while (i--)
314: *lp++ = fp++->pos;
315: (void) fclose(Sort_1);
316: (void) fclose(Sort_2);
317: Tbl.str_flags |= STR_ORDERED;
318: }
319:
320: /*
321: * cmp_str:
322: * Compare two strings in the file
323: */
324: cmp_str(p1, p2)
325: STR *p1, *p2;
326: {
327: register int c1, c2;
328:
329: c1 = p1->first;
330: c2 = p2->first;
331: if (c1 != c2)
332: return c1 - c2;
333:
334: (void) fseek(Sort_1, p1->pos, 0);
335: (void) fseek(Sort_2, p2->pos, 0);
336:
337: while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0')
338: continue;
339: while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0')
340: continue;
341:
342: while (c1 != '\0' && c2 != '\0') {
343: if (Iflag) {
344: if (isupper(c1))
345: c1 = tolower(c1);
346: if (isupper(c2))
347: c2 = tolower(c2);
348: }
349: if (c1 != c2)
350: return c1 - c2;
351: c1 = getc(Sort_1);
352: c2 = getc(Sort_2);
353: }
354: return c1 - c2;
355: }
356:
357: /*
358: * randomize:
359: * Randomize the order of the string table. We must be careful
360: * not to randomize across delimiter boundaries. All
361: * randomization is done within each block.
362: */
363: randomize(seekpts)
364: register long *seekpts;
365: {
366: register int cnt, i, j, start;
367: register long tmp;
368: register long *origsp;
369:
370: Tbl.str_flags |= STR_RANDOM;
371: srnd(time((long *) NULL) + getpid());
372: origsp = seekpts;
373: for (j = 0; j <= Delim; j++) {
374:
375: /*
376: * get the starting place for the block
377: */
378:
379: if (j == 0)
380: start = 0;
381: else
382: start = Tbl.str_delims[j - 1];
383:
384: /*
385: * get the ending point
386: */
387:
388: if (j == Delim)
389: cnt = Tbl.str_numstr;
390: else
391: cnt = Tbl.str_delims[j];
392:
393: /*
394: * move things around randomly
395: */
396:
397: for (seekpts = &origsp[start]; cnt > start; cnt--, seekpts++) {
398: i = rnd(cnt - start);
399: tmp = seekpts[0];
400: seekpts[0] = seekpts[i];
401: seekpts[i] = tmp;
402: }
403: }
404: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.