|
|
1.1 root 1: #ifndef lint
2: static char *sccsid = "@(#)ptx.c 4.5 (Berkeley) 5/11/89";
3: #endif /* not lint */
4:
5: /* permuted title index
6: ptx [-t] [-i ignore] [-o only] [-w num] [-f] [input] [output]
7: Ptx reads the input file and permutes on words in it.
8: It excludes all words in the ignore file.
9: Alternately it includes words in the only file.
10: if neither is given it excludes the words in _PATH_EIGN.
11:
12: The width of the output line can be changed to num
13: characters. If omitted 72 is default unless troff than 100.
14: the -f flag tells the program to fold the output
15: the -t flag says the output is for troff and the
16: output is then wider.
17:
18: */
19:
20: #include <stdio.h>
21: #include <ctype.h>
22: #include <signal.h>
23: #include "pathnames.h"
24:
25: #define TILDE 0177
26: #define N 30
27: #define MAX N*BUFSIZ
28: #define LMAX 200
29: #define MAXT 2048
30: #define MASK 03777
31: #define SET 1
32:
33: #define isabreak(c) (btable[c])
34:
35: extern char *calloc(), *mktemp();
36: extern char *getline();
37: int status;
38:
39:
40: char *hasht[MAXT];
41: char line[LMAX];
42: char btable[128];
43: int ignore;
44: int only;
45: int llen = 72;
46: int gap = 3;
47: int gutter = 3;
48: int mlen = LMAX;
49: int wlen;
50: int rflag;
51: int halflen;
52: char *strtbufp, *endbufp;
53: char *empty = "";
54:
55: char *infile;
56: FILE *inptr = stdin;
57:
58: char *outfile;
59: FILE *outptr = stdout;
60:
61: char sortfile[] = _PATH_TMP; /* output of sort program */
62: char nofold[] = {'-', 'd', 't', TILDE, 0};
63: char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
64: char *sortopt = nofold;
65: FILE *sortptr;
66:
67: char *bfile; /*contains user supplied break chars */
68: FILE *bptr;
69:
70: main(argc,argv)
71: int argc;
72: char **argv;
73: {
74: register int c;
75: register char *bufp;
76: int pid;
77: char *pend;
78: extern onintr();
79:
80: char *xfile;
81: FILE *xptr;
82:
83: if(signal(SIGHUP,onintr)==SIG_IGN)
84: signal(SIGHUP,SIG_IGN);
85: if(signal(SIGINT,onintr)==SIG_IGN)
86: signal(SIGINT,SIG_IGN);
87: signal(SIGPIPE,onintr);
88: signal(SIGTERM,onintr);
89:
90: /* argument decoding */
91:
92: xfile = _PATH_EIGN;
93: argv++;
94: while(argc>1 && **argv == '-') {
95: switch (*++*argv){
96:
97: case 'r':
98: rflag++;
99: break;
100: case 'f':
101: sortopt = fold;
102: break;
103:
104: case 'w':
105: if(argc >= 2) {
106: argc--;
107: wlen++;
108: llen = atoi(*++argv);
109: if(llen == 0)
110: diag("Wrong width:",*argv);
111: if(llen > LMAX) {
112: llen = LMAX;
113: msg("Lines truncated to 200 chars.",empty);
114: }
115: break;
116: }
117:
118: case 't':
119: if(wlen == 0)
120: llen = 100;
121: break;
122: case 'g':
123: if(argc >=2) {
124: argc--;
125: gap = gutter = atoi(*++argv);
126: }
127: break;
128:
129: case 'i':
130: if(only)
131: diag("Only file already given.",empty);
132: if (argc>=2){
133: argc--;
134: ignore++;
135: xfile = *++argv;
136: }
137: break;
138:
139: case 'o':
140: if(ignore)
141: diag("Ignore file already given",empty);
142: if (argc>=2){
143: only++;
144: argc--;
145: xfile = *++argv;
146: }
147: break;
148:
149: case 'b':
150: if(argc>=2) {
151: argc--;
152: bfile = *++argv;
153: }
154: break;
155:
156: default:
157: msg("Illegal argument:",*argv);
158: }
159: argc--;
160: argv++;
161: }
162:
163: if(argc>3)
164: diag("Too many filenames",empty);
165: else if(argc==3){
166: infile = *argv++;
167: outfile = *argv;
168: if((outptr = fopen(outfile,"w")) == NULL)
169: diag("Cannot open output file:",outfile);
170: } else if(argc==2) {
171: infile = *argv;
172: outfile = 0;
173: }
174:
175:
176: /* Default breaks of blank, tab and newline */
177: btable[' '] = SET;
178: btable['\t'] = SET;
179: btable['\n'] = SET;
180: if(bfile) {
181: if((bptr = fopen(bfile,"r")) == NULL)
182: diag("Cannot open break char file",bfile);
183:
184: while((c = getc(bptr)) != EOF)
185: btable[c] = SET;
186: }
187:
188: /* Allocate space for a buffer. If only or ignore file present
189: read it into buffer. Else read in default ignore file
190: and put resulting words in buffer.
191: */
192:
193:
194: if((strtbufp = calloc(N,BUFSIZ)) == NULL)
195: diag("Out of memory space",empty);
196: bufp = strtbufp;
197: endbufp = strtbufp+MAX;
198:
199: if((xptr = fopen(xfile,"r")) == NULL)
200: diag("Cannot open file",xfile);
201:
202: while(bufp < endbufp && (c = getc(xptr)) != EOF) {
203: if(isabreak(c)) {
204: if(storeh(hash(strtbufp,bufp),strtbufp))
205: diag("Too many words",xfile);
206: *bufp++ = '\0';
207: strtbufp = bufp;
208: }
209: else {
210: *bufp++ = (isupper(c)?tolower(c):c);
211: }
212: }
213: if (bufp >= endbufp)
214: diag("Too many words in file",xfile);
215: endbufp = --bufp;
216:
217: /* open output file for sorting */
218:
219: mktemp(sortfile);
220: if((sortptr = fopen(sortfile, "w")) == NULL)
221: diag("Cannot open output for sorting:",sortfile);
222:
223: /* get a line of data and compare each word for
224: inclusion or exclusion in the sort phase
225: */
226:
227: if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
228: diag("Cannot open data: ",infile);
229: while(pend=getline())
230: cmpline(pend);
231: fclose(sortptr);
232:
233: switch (pid = fork()){
234:
235: case -1: /* cannot fork */
236: diag("Cannot fork",empty);
237:
238: case 0: /* child */
239: execl(_PATH_SORT, "sort", sortopt, "+0", "-1", "+1",
240: sortfile, "-o", sortfile, 0);
241:
242: default: /* parent */
243: while(wait(&status) != pid);
244: }
245:
246:
247: getsort();
248: unlink(sortfile);
249: exit(0);
250: }
251:
252: msg(s,arg)
253: char *s;
254: char *arg;
255: {
256: fprintf(stderr,"%s %s\n",s,arg);
257: return;
258: }
259: diag(s,arg)
260: char *s, *arg;
261: {
262:
263: msg(s,arg);
264: exit(1);
265: }
266:
267:
268: char *getline()
269: {
270:
271: register c;
272: register char *linep;
273: char *endlinep;
274:
275:
276: endlinep= line + mlen;
277: linep = line;
278: /* Throw away leading white space */
279:
280: while(isspace(c=getc(inptr)))
281: ;
282: if(c==EOF)
283: return(0);
284: ungetc(c,inptr);
285: while(( c=getc(inptr)) != EOF) {
286: switch (c) {
287:
288: case '\t':
289: if(linep<endlinep)
290: *linep++ = ' ';
291: break;
292: case '\n':
293: while(isspace(*--linep));
294: *++linep = '\n';
295: return(linep);
296: default:
297: if(linep < endlinep)
298: *linep++ = c;
299: }
300: }
301: return(0);
302: }
303:
304: cmpline(pend)
305: char *pend;
306: {
307:
308: char *pstrt, *pchar, *cp;
309: char **hp;
310: int flag;
311:
312: pchar = line;
313: if(rflag)
314: while(pchar<pend&&!isspace(*pchar))
315: pchar++;
316: while(pchar<pend){
317: /* eliminate white space */
318: if(isabreak(*pchar++))
319: continue;
320: pstrt = --pchar;
321:
322: flag = 1;
323: while(flag){
324: if(isabreak(*pchar)) {
325: hp = &hasht[hash(pstrt,pchar)];
326: pchar--;
327: while(cp = *hp++){
328: if(hp == &hasht[MAXT])
329: hp = hasht;
330: /* possible match */
331: if(cmpword(pstrt,pchar,cp)){
332: /* exact match */
333: if(!ignore && only)
334: putline(pstrt,pend);
335: flag = 0;
336: break;
337: }
338: }
339: /* no match */
340: if(flag){
341: if(ignore || !only)
342: putline(pstrt,pend);
343: flag = 0;
344: }
345: }
346: pchar++;
347: }
348: }
349: }
350:
351: cmpword(cpp,pend,hpp)
352: char *cpp, *pend, *hpp;
353: {
354: char c;
355:
356: while(*hpp != '\0'){
357: c = *cpp++;
358: if((isupper(c)?tolower(c):c) != *hpp++)
359: return(0);
360: }
361: if(--cpp == pend) return(1);
362: return(0);
363: }
364:
365: putline(strt, end)
366: char *strt, *end;
367: {
368: char *cp;
369:
370: for(cp=strt; cp<end; cp++)
371: putc(*cp, sortptr);
372: /* Add extra blank before TILDE to sort correctly
373: with -fd option */
374: putc(' ',sortptr);
375: putc(TILDE,sortptr);
376: for (cp=line; cp<strt; cp++)
377: putc(*cp,sortptr);
378: putc('\n',sortptr);
379: }
380:
381: getsort()
382: {
383: register c;
384: register char *tilde, *linep, *ref;
385: char *p1a,*p1b,*p2a,*p2b,*p3a,*p3b,*p4a,*p4b;
386: int w;
387: char *rtrim(), *ltrim();
388:
389: if((sortptr = fopen(sortfile,"r")) == NULL)
390: diag("Cannot open sorted data:",sortfile);
391:
392: halflen = (llen-gutter)/2;
393: linep = line;
394: while((c = getc(sortptr)) != EOF) {
395: switch(c) {
396:
397: case TILDE:
398: tilde = linep;
399: break;
400:
401: case '\n':
402: while(isspace(linep[-1]))
403: linep--;
404: ref = tilde;
405: if(rflag) {
406: while(ref<linep&&!isspace(*ref))
407: ref++;
408: *ref++ = 0;
409: }
410: /* the -1 is an overly conservative test to leave
411: space for the / that signifies truncation*/
412: p3b = rtrim(p3a=line,tilde,halflen-1);
413: if(p3b-p3a>halflen-1)
414: p3b = p3a+halflen-1;
415: p2a = ltrim(ref,p2b=linep,halflen-1);
416: if(p2b-p2a>halflen-1)
417: p2a = p2b-halflen-1;
418: p1b = rtrim(p1a=p3b+(isspace(p3b[0])!=0),tilde,
419: w=halflen-(p2b-p2a)-gap);
420: if(p1b-p1a>w)
421: p1b = p1a;
422: p4a = ltrim(ref,p4b=p2a-(isspace(p2a[-1])!=0),
423: w=halflen-(p3b-p3a)-gap);
424: if(p4b-p4a>w)
425: p4a = p4b;
426: fprintf(outptr,".xx \"");
427: putout(p1a,p1b);
428: /* tilde-1 to account for extra space before TILDE */
429: if(p1b!=(tilde-1) && p1a!=p1b)
430: fprintf(outptr,"/");
431: fprintf(outptr,"\" \"");
432: if(p4a==p4b && p2a!=ref && p2a!=p2b)
433: fprintf(outptr,"/");
434: putout(p2a,p2b);
435: fprintf(outptr,"\" \"");
436: putout(p3a,p3b);
437: /* ++p3b to account for extra blank after TILDE */
438: /* ++p3b to account for extra space before TILDE */
439: if(p1a==p1b && ++p3b!=tilde)
440: fprintf(outptr,"/");
441: fprintf(outptr,"\" \"");
442: if(p1a==p1b && p4a!=ref && p4a!=p4b)
443: fprintf(outptr,"/");
444: putout(p4a,p4b);
445: if(rflag)
446: fprintf(outptr,"\" %s\n",tilde);
447: else
448: fprintf(outptr,"\"\n");
449: linep = line;
450: break;
451:
452: case '"':
453: /* put double " for " */
454: *linep++ = c;
455: default:
456: *linep++ = c;
457: }
458: }
459: }
460:
461: char *rtrim(a,c,d)
462: char *a,*c;
463: {
464: char *b,*x;
465: b = c;
466: for(x=a+1; x<=c&&x-a<=d; x++)
467: if((x==c||isspace(x[0]))&&!isspace(x[-1]))
468: b = x;
469: if(b<c&&!isspace(b[0]))
470: b++;
471: return(b);
472: }
473:
474: char *ltrim(c,b,d)
475: char *c,*b;
476: {
477: char *a,*x;
478: a = c;
479: for(x=b-1; x>=c&&b-x<=d; x--)
480: if(!isspace(x[0])&&(x==c||isspace(x[-1])))
481: a = x;
482: if(a>c&&!isspace(a[-1]))
483: a--;
484: return(a);
485: }
486:
487: putout(strt,end)
488: char *strt, *end;
489: {
490: char *cp;
491:
492: cp = strt;
493:
494: for(cp=strt; cp<end; cp++) {
495: putc(*cp,outptr);
496: }
497: }
498:
499: onintr()
500: {
501:
502: unlink(sortfile);
503: exit(1);
504: }
505:
506: hash(strtp,endp)
507: char *strtp, *endp;
508: {
509: char *cp, c;
510: int i, j, k;
511:
512: /* Return zero hash number for single letter words */
513: if((endp - strtp) == 1)
514: return(0);
515:
516: cp = strtp;
517: c = *cp++;
518: i = (isupper(c)?tolower(c):c);
519: c = *cp;
520: j = (isupper(c)?tolower(c):c);
521: i = i*j;
522: cp = --endp;
523: c = *cp--;
524: k = (isupper(c)?tolower(c):c);
525: c = *cp;
526: j = (isupper(c)?tolower(c):c);
527: j = k*j;
528:
529: k = (i ^ (j>>2)) & MASK;
530: return(k);
531: }
532:
533: storeh(num,strtp)
534: int num;
535: char *strtp;
536: {
537: int i;
538:
539: for(i=num; i<MAXT; i++) {
540: if(hasht[i] == 0) {
541: hasht[i] = strtp;
542: return(0);
543: }
544: }
545: for(i=0; i<num; i++) {
546: if(hasht[i] == 0) {
547: hasht[i] = strtp;
548: return(0);
549: }
550: }
551: return(1);
552: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.