|
|
1.1 root 1: # include <stdio.h>
2: # include <ctype.h>
3: # include "strfile.h"
4:
5: /*
6: * This program takes a file composed of strings seperated by
7: * lines starting with two consecutive delimiting character (default
8: * character is '%') and creates another file which consists of a table
9: * describing the file (structure from "strfile.h"), a table of seek
10: * pointers to the start of the strings, and the strings, each terinated
11: * by a null byte. Usage:
12: *
13: * % strfile [ - ] [ -cC ] [ -sv ] [ -oir ] sourcefile [ datafile ]
14: *
15: * - - Give a usage summary useful for jogging the memory
16: * c - Change delimiting character from '%' to 'C'
17: * s - Silent. Give no summary of data processed at the end of
18: * the run.
19: * v - Verbose. Give summary of data processed. (Default)
20: * o - order the strings in alphabetic order
21: * i - if ordering, ignore case
22: * r - randomize the order of the strings
23: *
24: * Ken Arnold Sept. 7, 1978 --
25: *
26: * Added method to indicate dividers. A "%-" will cause the address
27: * to be added to the structure in one of the pointer elements.
28: *
29: * Ken Arnold Nov., 1984 --
30: *
31: * Added ordering options.
32: */
33:
34: # define TRUE 1
35: # define FALSE 0
36:
37: # define DELIM_CH '-'
38:
39: typedef struct {
40: char first;
41: long pos;
42: } STR;
43:
44: char *Infile = NULL, /* input file name */
45: Outfile[100] = "", /* output file name */
46: Delimch = '%', /* delimiting character */
47: *Usage[] = { /* usage summary */
48: "usage: strfile [ - ] [ -cC ] [ -sv ] [ -oir ] inputfile [ datafile ]",
49: " - - Give this usage summary",
50: " c - Replace delimiting character with 'C'",
51: " s - Silent. Give no summary",
52: " v - Verbose. Give summary. (default)",
53: " o - order strings alphabetically",
54: " i - ignore case in ordering",
55: " r - randomize the order of the strings",
56: " Default \"datafile\" is inputfile.dat",
57: NULL
58: };
59:
60: int Sflag = FALSE; /* silent run flag */
61: int Oflag = FALSE; /* ordering flag */
62: int Iflag = FALSE; /* ignore case flag */
63: int Rflag = FALSE; /* randomize order flag */
64: int Delim = 0; /* current delimiter number */
65:
66: long *Seekpts;
67:
68: FILE *Sort_1, *Sort_2; /* pointers for sorting */
69:
70: STRFILE Tbl; /* statistics table */
71:
72: STR *Firstch; /* first chars of each string */
73:
74: char *fgets(), *malloc(), *strcpy(), *strcat();
75:
76: long ftell();
77:
78: main(ac, av)
79: int ac;
80: char **av;
81: {
82: register char *sp, dc;
83: register long *lp;
84: register unsigned int curseek; /* number of strings */
85: register long *seekpts, li; /* table of seek pointers */
86: register FILE *inf, *outf;
87: register int first;
88: register char *nsp;
89: register STR *fp;
90: static char string[257];
91:
92: getargs(ac, av); /* evalute arguments */
93:
94: /*
95: * initial counting of input file
96: */
97:
98: dc = Delimch;
99: if ((inf = fopen(Infile, "r")) == NULL) {
100: perror(Infile);
101: exit(-1);
102: }
103: for (curseek = 0; (sp = fgets(string, 256, inf)) != NULL; )
104: if (*sp++ == dc && (*sp == dc || *sp == DELIM_CH))
105: curseek++;
106: curseek++;
107:
108: /*
109: * save space at begginning of file for tables
110: */
111:
112: if ((outf = fopen(Outfile, "w")) == NULL) {
113: perror(Outfile);
114: exit(-1);
115: }
116:
117: /*
118: * Allocate space for the pointers, adding one to the end so the
119: * length of the final string can be calculated.
120: */
121: ++curseek;
122: seekpts = (long *) malloc(sizeof *seekpts * curseek); /* NOSTRICT */
123: if (seekpts == NULL) {
124: perror("calloc");
125: exit(-1);
126: }
127: if (Oflag) {
128: Firstch = (STR *) malloc(sizeof *Firstch * curseek);
129: if (Firstch == NULL) {
130: perror("calloc");
131: exit(-1);
132: }
133: }
134:
135: (void) fseek(outf, (long) (sizeof Tbl + sizeof *seekpts * curseek), 0);
136: (void) fseek(inf, (long) 0, 0); /* goto start of input */
137:
138: /*
139: * write the strings onto the file
140: */
141:
142: Tbl.str_longlen = 0;
143: Tbl.str_shortlen = (unsigned int) 0xffffffff;
144: lp = seekpts;
145: first = Oflag;
146: *seekpts = ftell(outf);
147: fp = Firstch;
148: do {
149: sp = fgets(string, 256, inf);
150: if (sp == NULL ||
151: (*sp == dc && (sp[1] == dc || sp[1] == DELIM_CH))) {
152: putc('\0', outf);
153: *++lp = ftell(outf);
154: li = ftell(outf) - lp[-1] - 1;
155: if (Tbl.str_longlen < li)
156: Tbl.str_longlen = li;
157: if (Tbl.str_shortlen > li)
158: Tbl.str_shortlen = li;
159: if (sp && sp[1] == DELIM_CH && Delim < MAXDELIMS)
160: Tbl.str_delims[Delim++] = lp - seekpts;
161: first = Oflag;
162: }
163: else {
164: if (first) {
165: for (nsp = sp; !isalnum(*nsp); nsp++)
166: continue;
167: if (Iflag && isupper(*nsp))
168: fp->first = tolower(*nsp);
169: else
170: fp->first = *nsp;
171: fp->pos = *lp;
172: fp++;
173: first = FALSE;
174: }
175: fputs(sp, outf);
176: }
177: } while (sp != NULL);
178:
179: /*
180: * write the tables in
181: */
182:
183: (void) fclose(inf);
184: Tbl.str_numstr = curseek - 1;
185:
186: if (Oflag)
187: do_order(seekpts, outf);
188: else if (Rflag)
189: randomize(seekpts);
190:
191: (void) fseek(outf, (long) 0, 0);
192: (void) fwrite((char *) &Tbl, sizeof Tbl, 1, outf);
193: (void) fwrite((char *) seekpts, sizeof *seekpts, curseek, outf);
194: (void) fclose(outf);
195:
196: if (!Sflag) {
197: printf("\"%s\" converted to \"%s\"\n", Infile, Outfile);
198: if (curseek == 0)
199: puts("There was 1 string");
200: else
201: printf("There were %u strings\n", curseek - 1);
202: printf("Longest string: %u byte%s\n", Tbl.str_longlen,
203: Tbl.str_longlen == 1 ? "" : "s");
204: printf("Shortest string: %u byte%s\n", Tbl.str_shortlen,
205: Tbl.str_shortlen == 1 ? "" : "s");
206: }
207: exit(0);
208: }
209:
210: /*
211: * This routine evaluates arguments from the command line
212: */
213: getargs(ac, av)
214: register int ac;
215: register char **av;
216: {
217: register char *sp;
218: register int i;
219: register int bad, j;
220:
221: bad = 0;
222: for (i = 1; i < ac; i++)
223: if (*av[i] == '-' && av[i][1]) {
224: for (sp = &av[i][1]; *sp; sp++)
225: switch (*sp) {
226: case 'c': /* new delimiting char */
227: if ((Delimch = *++sp) == '\0') {
228: --sp;
229: Delimch = *av[++i];
230: }
231: if (Delimch <= 0 || Delimch > '~' ||
232: Delimch == DELIM_CH) {
233: printf("bad delimiting character: '\\%o\n'",
234: Delimch);
235: bad++;
236: }
237: break;
238: case 's': /* silent */
239: Sflag++;
240: break;
241: case 'v': /* verbose */
242: Sflag = 0;
243: break;
244: case 'o': /* order strings */
245: Oflag++;
246: break;
247: case 'i': /* ignore case in ordering */
248: Iflag++;
249: break;
250: case 'r': /* ignore case in ordering */
251: Rflag++;
252: break;
253: default: /* unknown flag */
254: bad++;
255: printf("bad flag: '%c'\n", *sp);
256: break;
257: }
258: }
259: else if (*av[i] == '-') {
260: for (j = 0; Usage[j]; j++)
261: puts(Usage[j]);
262: exit(0);
263: }
264: else if (Infile)
265: (void) strcpy(Outfile, av[i]);
266: else
267: Infile = av[i];
268: if (!Infile) {
269: bad++;
270: puts("No input file name");
271: }
272: if (*Outfile == '\0' && !bad) {
273: (void) strcpy(Outfile, Infile);
274: (void) strcat(Outfile, ".dat");
275: }
276: if (bad) {
277: puts("use \"strfile -\" to get usage");
278: exit(-1);
279: }
280: }
281:
282: /*
283: * do_order:
284: * Order the strings alphabetically (possibly ignoring case).
285: */
286: do_order(seekpts, outf)
287: long *seekpts;
288: FILE *outf;
289: {
290: register int i;
291: register long *lp;
292: register STR *fp;
293: extern int cmp_str();
294:
295: (void) fflush(outf);
296: Sort_1 = fopen(Outfile, "r");
297: Sort_2 = fopen(Outfile, "r");
298: Seekpts = seekpts;
299: qsort((char *) Firstch, Tbl.str_numstr, sizeof *Firstch, cmp_str);
300: i = Tbl.str_numstr;
301: lp = seekpts;
302: fp = Firstch;
303: while (i--)
304: *lp++ = fp++->pos;
305: (void) fclose(Sort_1);
306: (void) fclose(Sort_2);
307: Tbl.str_flags |= STR_ORDERED;
308: }
309:
310: /*
311: * cmp_str:
312: * Compare two strings in the file
313: */
314: cmp_str(p1, p2)
315: STR *p1, *p2;
316: {
317: register int c1, c2;
318:
319: c1 = p1->first;
320: c2 = p2->first;
321: if (c1 != c2)
322: return c1 - c2;
323:
324: (void) fseek(Sort_1, p1->pos, 0);
325: (void) fseek(Sort_2, p2->pos, 0);
326:
327: while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0')
328: continue;
329: while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0')
330: continue;
331:
332: while (c1 != '\0' && c2 != '\0') {
333: if (Iflag) {
334: if (isupper(c1))
335: c1 = tolower(c1);
336: if (isupper(c2))
337: c2 = tolower(c2);
338: }
339: if (c1 != c2)
340: return c1 - c2;
341: c1 = getc(Sort_1);
342: c2 = getc(Sort_2);
343: }
344: return c1 - c2;
345: }
346:
347: /*
348: * randomize:
349: * Randomize the order of the string table. We must be careful
350: * not to randomize across delimiter boundaries. All
351: * randomization is done within each block.
352: */
353: randomize(seekpts)
354: register long *seekpts;
355: {
356: register int cnt, i, j, start;
357: register long tmp;
358: register long *origsp;
359:
360: Tbl.str_flags |= STR_RANDOM;
361: srnd(time((long *) NULL) + getpid());
362: origsp = seekpts;
363: for (j = 0; j <= Delim; j++) {
364:
365: /*
366: * get the starting place for the block
367: */
368:
369: if (j == 0)
370: start = 0;
371: else
372: start = Tbl.str_delims[j - 1];
373:
374: /*
375: * get the ending point
376: */
377:
378: if (j == Delim)
379: cnt = Tbl.str_numstr;
380: else
381: cnt = Tbl.str_delims[j];
382:
383: /*
384: * move things around randomly
385: */
386:
387: for (seekpts = &origsp[start]; cnt > start; cnt--, seekpts++) {
388: i = rnd(cnt - start);
389: tmp = seekpts[0];
390: seekpts[0] = seekpts[i];
391: seekpts[i] = tmp;
392: }
393: }
394: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.