|
|
1.1 root 1: static char *sccsid = "@(#)ptx.c 4.2 (Berkeley) 9/23/85";
2: #
3:
4: /* permuted title index
5: ptx [-t] [-i ignore] [-o only] [-w num] [-f] [input] [output]
6: Ptx reads the input file and permutes on words in it.
7: It excludes all words in the ignore file.
8: Alternately it includes words in the only file.
9: if neither is given it excludes the words in /usr/lib/eign.
10:
11: The width of the output line can be changed to num
12: characters. If omitted 72 is default unless troff than 100.
13: the -f flag tells the program to fold the output
14: the -t flag says the output is for troff and the
15: output is then wider.
16:
17: make: cc ptx.c -lS
18: */
19:
20: #include <stdio.h>
21: #include <ctype.h>
22: #include <signal.h>
23: #define DEFLTX "/usr/lib/eign"
24: #define TILDE 0177
25: #define SORT "/usr/bin/sort"
26: #define N 30
27: #define MAX N*BUFSIZ
28: #define LMAX 200
29: #define MAXT 2048
30: #define MASK 03777
31: #define SET 1
32:
33: #define isabreak(c) (btable[c])
34:
35: extern char *calloc(), *mktemp();
36: extern char *getline();
37: int status;
38:
39:
40: char *hasht[MAXT];
41: char line[LMAX];
42: char btable[128];
43: int ignore;
44: int only;
45: int llen = 72;
46: int gap = 3;
47: int gutter = 3;
48: int mlen = LMAX;
49: int wlen;
50: int rflag;
51: int halflen;
52: char *strtbufp, *endbufp;
53: char *empty = "";
54:
55: char *infile;
56: FILE *inptr = stdin;
57:
58: char *outfile;
59: FILE *outptr = stdout;
60:
61: char *sortfile; /* output of sort program */
62: char nofold[] = {'-', 'd', 't', TILDE, 0};
63: char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
64: char *sortopt = nofold;
65: FILE *sortptr;
66:
67: char *bfile; /*contains user supplied break chars */
68: FILE *bptr;
69:
70: main(argc,argv)
71: int argc;
72: char **argv;
73: {
74: register int c;
75: register char *bufp;
76: int pid;
77: char *pend;
78: extern onintr();
79:
80: char *xfile;
81: FILE *xptr;
82:
83: if(signal(SIGHUP,onintr)==SIG_IGN)
84: signal(SIGHUP,SIG_IGN);
85: if(signal(SIGINT,onintr)==SIG_IGN)
86: signal(SIGINT,SIG_IGN);
87: signal(SIGPIPE,onintr);
88: signal(SIGTERM,onintr);
89:
90: /* argument decoding */
91:
92: xfile = DEFLTX;
93: argv++;
94: while(argc>1 && **argv == '-') {
95: switch (*++*argv){
96:
97: case 'r':
98: rflag++;
99: break;
100: case 'f':
101: sortopt = fold;
102: break;
103:
104: case 'w':
105: if(argc >= 2) {
106: argc--;
107: wlen++;
108: llen = atoi(*++argv);
109: if(llen == 0)
110: diag("Wrong width:",*argv);
111: if(llen > LMAX) {
112: llen = LMAX;
113: msg("Lines truncated to 200 chars.",empty);
114: }
115: break;
116: }
117:
118: case 't':
119: if(wlen == 0)
120: llen = 100;
121: break;
122: case 'g':
123: if(argc >=2) {
124: argc--;
125: gap = gutter = atoi(*++argv);
126: }
127: break;
128:
129: case 'i':
130: if(only)
131: diag("Only file already given.",empty);
132: if (argc>=2){
133: argc--;
134: ignore++;
135: xfile = *++argv;
136: }
137: break;
138:
139: case 'o':
140: if(ignore)
141: diag("Ignore file already given",empty);
142: if (argc>=2){
143: only++;
144: argc--;
145: xfile = *++argv;
146: }
147: break;
148:
149: case 'b':
150: if(argc>=2) {
151: argc--;
152: bfile = *++argv;
153: }
154: break;
155:
156: default:
157: msg("Illegal argument:",*argv);
158: }
159: argc--;
160: argv++;
161: }
162:
163: if(argc>3)
164: diag("Too many filenames",empty);
165: else if(argc==3){
166: infile = *argv++;
167: outfile = *argv;
168: if((outptr = fopen(outfile,"w")) == NULL)
169: diag("Cannot open output file:",outfile);
170: } else if(argc==2) {
171: infile = *argv;
172: outfile = 0;
173: }
174:
175:
176: /* Default breaks of blank, tab and newline */
177: btable[' '] = SET;
178: btable['\t'] = SET;
179: btable['\n'] = SET;
180: if(bfile) {
181: if((bptr = fopen(bfile,"r")) == NULL)
182: diag("Cannot open break char file",bfile);
183:
184: while((c = getc(bptr)) != EOF)
185: btable[c] = SET;
186: }
187:
188: /* Allocate space for a buffer. If only or ignore file present
189: read it into buffer. Else read in default ignore file
190: and put resulting words in buffer.
191: */
192:
193:
194: if((strtbufp = calloc(N,BUFSIZ)) == NULL)
195: diag("Out of memory space",empty);
196: bufp = strtbufp;
197: endbufp = strtbufp+MAX;
198:
199: if((xptr = fopen(xfile,"r")) == NULL)
200: diag("Cannot open file",xfile);
201:
202: while(bufp < endbufp && (c = getc(xptr)) != EOF) {
203: if(isabreak(c)) {
204: if(storeh(hash(strtbufp,bufp),strtbufp))
205: diag("Too many words",xfile);
206: *bufp++ = '\0';
207: strtbufp = bufp;
208: }
209: else {
210: *bufp++ = (isupper(c)?tolower(c):c);
211: }
212: }
213: if (bufp >= endbufp)
214: diag("Too many words in file",xfile);
215: endbufp = --bufp;
216:
217: /* open output file for sorting */
218:
219: sortfile = mktemp("/tmp/ptxsXXXXX");
220: if((sortptr = fopen(sortfile, "w")) == NULL)
221: diag("Cannot open output for sorting:",sortfile);
222:
223: /* get a line of data and compare each word for
224: inclusion or exclusion in the sort phase
225: */
226:
227: if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
228: diag("Cannot open data: ",infile);
229: while(pend=getline())
230: cmpline(pend);
231: fclose(sortptr);
232:
233: switch (pid = fork()){
234:
235: case -1: /* cannot fork */
236: diag("Cannot fork",empty);
237:
238: case 0: /* child */
239: execl(SORT, SORT, sortopt, "+0", "-1", "+1",
240: sortfile, "-o", sortfile, 0);
241:
242: default: /* parent */
243: while(wait(&status) != pid);
244: }
245:
246:
247: getsort();
248: if(*sortfile)
249: unlink(sortfile);
250: exit(0);
251: }
252:
253: msg(s,arg)
254: char *s;
255: char *arg;
256: {
257: fprintf(stderr,"%s %s\n",s,arg);
258: return;
259: }
260: diag(s,arg)
261: char *s, *arg;
262: {
263:
264: msg(s,arg);
265: exit(1);
266: }
267:
268:
269: char *getline()
270: {
271:
272: register c;
273: register char *linep;
274: char *endlinep;
275:
276:
277: endlinep= line + mlen;
278: linep = line;
279: /* Throw away leading white space */
280:
281: while(isspace(c=getc(inptr)))
282: ;
283: if(c==EOF)
284: return(0);
285: ungetc(c,inptr);
286: while(( c=getc(inptr)) != EOF) {
287: switch (c) {
288:
289: case '\t':
290: if(linep<endlinep)
291: *linep++ = ' ';
292: break;
293: case '\n':
294: while(isspace(*--linep));
295: *++linep = '\n';
296: return(linep);
297: default:
298: if(linep < endlinep)
299: *linep++ = c;
300: }
301: }
302: return(0);
303: }
304:
305: cmpline(pend)
306: char *pend;
307: {
308:
309: char *pstrt, *pchar, *cp;
310: char **hp;
311: int flag;
312:
313: pchar = line;
314: if(rflag)
315: while(pchar<pend&&!isspace(*pchar))
316: pchar++;
317: while(pchar<pend){
318: /* eliminate white space */
319: if(isabreak(*pchar++))
320: continue;
321: pstrt = --pchar;
322:
323: flag = 1;
324: while(flag){
325: if(isabreak(*pchar)) {
326: hp = &hasht[hash(pstrt,pchar)];
327: pchar--;
328: while(cp = *hp++){
329: if(hp == &hasht[MAXT])
330: hp = hasht;
331: /* possible match */
332: if(cmpword(pstrt,pchar,cp)){
333: /* exact match */
334: if(!ignore && only)
335: putline(pstrt,pend);
336: flag = 0;
337: break;
338: }
339: }
340: /* no match */
341: if(flag){
342: if(ignore || !only)
343: putline(pstrt,pend);
344: flag = 0;
345: }
346: }
347: pchar++;
348: }
349: }
350: }
351:
352: cmpword(cpp,pend,hpp)
353: char *cpp, *pend, *hpp;
354: {
355: char c;
356:
357: while(*hpp != '\0'){
358: c = *cpp++;
359: if((isupper(c)?tolower(c):c) != *hpp++)
360: return(0);
361: }
362: if(--cpp == pend) return(1);
363: return(0);
364: }
365:
366: putline(strt, end)
367: char *strt, *end;
368: {
369: char *cp;
370:
371: for(cp=strt; cp<end; cp++)
372: putc(*cp, sortptr);
373: /* Add extra blank before TILDE to sort correctly
374: with -fd option */
375: putc(' ',sortptr);
376: putc(TILDE,sortptr);
377: for (cp=line; cp<strt; cp++)
378: putc(*cp,sortptr);
379: putc('\n',sortptr);
380: }
381:
382: getsort()
383: {
384: register c;
385: register char *tilde, *linep, *ref;
386: char *p1a,*p1b,*p2a,*p2b,*p3a,*p3b,*p4a,*p4b;
387: int w;
388: char *rtrim(), *ltrim();
389:
390: if((sortptr = fopen(sortfile,"r")) == NULL)
391: diag("Cannot open sorted data:",sortfile);
392:
393: halflen = (llen-gutter)/2;
394: linep = line;
395: while((c = getc(sortptr)) != EOF) {
396: switch(c) {
397:
398: case TILDE:
399: tilde = linep;
400: break;
401:
402: case '\n':
403: while(isspace(linep[-1]))
404: linep--;
405: ref = tilde;
406: if(rflag) {
407: while(ref<linep&&!isspace(*ref))
408: ref++;
409: *ref++ = 0;
410: }
411: /* the -1 is an overly conservative test to leave
412: space for the / that signifies truncation*/
413: p3b = rtrim(p3a=line,tilde,halflen-1);
414: if(p3b-p3a>halflen-1)
415: p3b = p3a+halflen-1;
416: p2a = ltrim(ref,p2b=linep,halflen-1);
417: if(p2b-p2a>halflen-1)
418: p2a = p2b-halflen-1;
419: p1b = rtrim(p1a=p3b+(isspace(p3b[0])!=0),tilde,
420: w=halflen-(p2b-p2a)-gap);
421: if(p1b-p1a>w)
422: p1b = p1a;
423: p4a = ltrim(ref,p4b=p2a-(isspace(p2a[-1])!=0),
424: w=halflen-(p3b-p3a)-gap);
425: if(p4b-p4a>w)
426: p4a = p4b;
427: fprintf(outptr,".xx \"");
428: putout(p1a,p1b);
429: /* tilde-1 to account for extra space before TILDE */
430: if(p1b!=(tilde-1) && p1a!=p1b)
431: fprintf(outptr,"/");
432: fprintf(outptr,"\" \"");
433: if(p4a==p4b && p2a!=ref && p2a!=p2b)
434: fprintf(outptr,"/");
435: putout(p2a,p2b);
436: fprintf(outptr,"\" \"");
437: putout(p3a,p3b);
438: /* ++p3b to account for extra blank after TILDE */
439: /* ++p3b to account for extra space before TILDE */
440: if(p1a==p1b && ++p3b!=tilde)
441: fprintf(outptr,"/");
442: fprintf(outptr,"\" \"");
443: if(p1a==p1b && p4a!=ref && p4a!=p4b)
444: fprintf(outptr,"/");
445: putout(p4a,p4b);
446: if(rflag)
447: fprintf(outptr,"\" %s\n",tilde);
448: else
449: fprintf(outptr,"\"\n");
450: linep = line;
451: break;
452:
453: case '"':
454: /* put double " for " */
455: *linep++ = c;
456: default:
457: *linep++ = c;
458: }
459: }
460: }
461:
462: char *rtrim(a,c,d)
463: char *a,*c;
464: {
465: char *b,*x;
466: b = c;
467: for(x=a+1; x<=c&&x-a<=d; x++)
468: if((x==c||isspace(x[0]))&&!isspace(x[-1]))
469: b = x;
470: if(b<c&&!isspace(b[0]))
471: b++;
472: return(b);
473: }
474:
475: char *ltrim(c,b,d)
476: char *c,*b;
477: {
478: char *a,*x;
479: a = c;
480: for(x=b-1; x>=c&&b-x<=d; x--)
481: if(!isspace(x[0])&&(x==c||isspace(x[-1])))
482: a = x;
483: if(a>c&&!isspace(a[-1]))
484: a--;
485: return(a);
486: }
487:
488: putout(strt,end)
489: char *strt, *end;
490: {
491: char *cp;
492:
493: cp = strt;
494:
495: for(cp=strt; cp<end; cp++) {
496: putc(*cp,outptr);
497: }
498: }
499:
500: onintr()
501: {
502:
503: if(*sortfile)
504: unlink(sortfile);
505: exit(1);
506: }
507:
508: hash(strtp,endp)
509: char *strtp, *endp;
510: {
511: char *cp, c;
512: int i, j, k;
513:
514: /* Return zero hash number for single letter words */
515: if((endp - strtp) == 1)
516: return(0);
517:
518: cp = strtp;
519: c = *cp++;
520: i = (isupper(c)?tolower(c):c);
521: c = *cp;
522: j = (isupper(c)?tolower(c):c);
523: i = i*j;
524: cp = --endp;
525: c = *cp--;
526: k = (isupper(c)?tolower(c):c);
527: c = *cp;
528: j = (isupper(c)?tolower(c):c);
529: j = k*j;
530:
531: k = (i ^ (j>>2)) & MASK;
532: return(k);
533: }
534:
535: storeh(num,strtp)
536: int num;
537: char *strtp;
538: {
539: int i;
540:
541: for(i=num; i<MAXT; i++) {
542: if(hasht[i] == 0) {
543: hasht[i] = strtp;
544: return(0);
545: }
546: }
547: for(i=0; i<num; i++) {
548: if(hasht[i] == 0) {
549: hasht[i] = strtp;
550: return(0);
551: }
552: }
553: return(1);
554: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.