|
|
1.1 root 1: #
2:
3: /* permuted title index
4: ptx [-t] [-i ignore] [-o only] [-w num] [-f] [input] [output]
5: Ptx reads the input file and permutes on words in it.
6: It excludes all words in the ignore file.
7: Alternately it includes words in the only file.
8: if neither is given it excludes the words in /usr/lib/eign.
9:
10: The width of the output line can be changed to num
11: characters. If omitted 72 is default unless troff than 100.
12: the -f flag tells the program to fold the output
13: the -t flag says the output is for troff and the
14: output is then wider.
15:
16: make: cc ptx.c -lS
17: */
18:
19: #include <stdio.h>
20: #include <ctype.h>
21: #include <signal.h>
22: #define DEFLTX "/usr/lib/eign"
23: #define TILDE 0177
24: #define SORT "/usr/bin/sort"
25: #define N 30
26: #define MAX N*BUFSIZ
27: #define LMAX 200
28: #define MAXT 2048
29: #define MASK 03777
30: #define SET 1
31:
32: #define isabreak(c) (btable[c])
33:
34: extern char *calloc(), *mktemp();
35: extern char *getline();
36: int status;
37:
38:
39: char *hasht[MAXT];
40: char line[LMAX];
41: char btable[128];
42: int ignore;
43: int only;
44: int llen = 72;
45: int gap = 3;
46: int gutter = 3;
47: int mlen = LMAX;
48: int wlen;
49: int rflag;
50: int halflen;
51: char *strtbufp, *endbufp;
52: char *empty = "";
53:
54: char *infile;
55: FILE *inptr = stdin;
56:
57: char *outfile;
58: FILE *outptr = stdout;
59:
60: char *sortfile; /* output of sort program */
61: char nofold[] = {'-', 'd', 't', TILDE, 0};
62: char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
63: char *sortopt = nofold;
64: FILE *sortptr;
65:
66: char *bfile; /*contains user supplied break chars */
67: FILE *bptr;
68:
69: main(argc,argv)
70: int argc;
71: char **argv;
72: {
73: register int c;
74: register char *bufp;
75: int pid;
76: char *pend;
77: extern onintr();
78:
79: char *xfile;
80: FILE *xptr;
81:
82: if(signal(SIGHUP,onintr)==SIG_IGN)
83: signal(SIGHUP,SIG_IGN);
84: if(signal(SIGINT,onintr)==SIG_IGN)
85: signal(SIGINT,SIG_IGN);
86: signal(SIGPIPE,onintr);
87: signal(SIGTERM,onintr);
88:
89: /* argument decoding */
90:
91: xfile = DEFLTX;
92: argv++;
93: while(argc>1 && **argv == '-') {
94: switch (*++*argv){
95:
96: case 'r':
97: rflag++;
98: break;
99: case 'f':
100: sortopt = fold;
101: break;
102:
103: case 'w':
104: if(argc >= 2) {
105: argc--;
106: wlen++;
107: llen = atoi(*++argv);
108: if(llen == 0)
109: diag("Wrong width:",*argv);
110: if(llen > LMAX) {
111: llen = LMAX;
112: msg("Lines truncated to 200 chars.",empty);
113: }
114: break;
115: }
116:
117: case 't':
118: if(wlen == 0)
119: llen = 100;
120: break;
121: case 'g':
122: if(argc >=2) {
123: argc--;
124: gap = gutter = atoi(*++argv);
125: }
126: break;
127:
128: case 'i':
129: if(only)
130: diag("Only file already given.",empty);
131: if (argc>=2){
132: argc--;
133: ignore++;
134: xfile = *++argv;
135: }
136: break;
137:
138: case 'o':
139: if(ignore)
140: diag("Ignore file already given",empty);
141: if (argc>=2){
142: only++;
143: argc--;
144: xfile = *++argv;
145: }
146: break;
147:
148: case 'b':
149: if(argc>=2) {
150: argc--;
151: bfile = *++argv;
152: }
153: break;
154:
155: default:
156: msg("Illegal argument:",*argv);
157: }
158: argc--;
159: argv++;
160: }
161:
162: if(argc>3)
163: diag("Too many filenames",empty);
164: else if(argc==3){
165: infile = *argv++;
166: outfile = *argv;
167: if((outptr = fopen(outfile,"w")) == NULL)
168: diag("Cannot open output file:",outfile);
169: } else if(argc==2) {
170: infile = *argv;
171: outfile = 0;
172: }
173:
174:
175: /* Default breaks of blank, tab and newline */
176: btable[' '] = SET;
177: btable['\t'] = SET;
178: btable['\n'] = SET;
179: if(bfile) {
180: if((bptr = fopen(bfile,"r")) == NULL)
181: diag("Cannot open break char file",bfile);
182:
183: while((c = getc(bptr)) != EOF)
184: btable[c] = SET;
185: }
186:
187: /* Allocate space for a buffer. If only or ignore file present
188: read it into buffer. Else read in default ignore file
189: and put resulting words in buffer.
190: */
191:
192:
193: if((strtbufp = calloc(N,BUFSIZ)) == NULL)
194: diag("Out of memory space",empty);
195: bufp = strtbufp;
196: endbufp = strtbufp+MAX;
197:
198: if((xptr = fopen(xfile,"r")) == NULL)
199: diag("Cannot open file",xfile);
200:
201: while(bufp < endbufp && (c = getc(xptr)) != EOF) {
202: if(isabreak(c)) {
203: if(storeh(hash(strtbufp,bufp),strtbufp))
204: diag("Too many words",xfile);
205: *bufp++ = '\0';
206: strtbufp = bufp;
207: }
208: else {
209: *bufp++ = (isupper(c)?tolower(c):c);
210: }
211: }
212: if (bufp >= endbufp)
213: diag("Too many words in file",xfile);
214: endbufp = --bufp;
215:
216: /* open output file for sorting */
217:
218: sortfile = mktemp("/tmp/ptxsXXXXX");
219: if((sortptr = fopen(sortfile, "w")) == NULL)
220: diag("Cannot open output for sorting:",sortfile);
221:
222: /* get a line of data and compare each word for
223: inclusion or exclusion in the sort phase
224: */
225:
226: if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
227: diag("Cannot open data: ",infile);
228: while(pend=getline())
229: cmpline(pend);
230: fclose(sortptr);
231:
232: switch (pid = fork()){
233:
234: case -1: /* cannot fork */
235: diag("Cannot fork",empty);
236:
237: case 0: /* child */
238: execl(SORT, SORT, sortopt, "+0", "-1", "+1",
239: sortfile, "-o", sortfile, 0);
240:
241: default: /* parent */
242: while(wait(&status) != pid);
243: }
244:
245:
246: getsort();
247: onintr();
248: }
249:
250: msg(s,arg)
251: char *s;
252: char *arg;
253: {
254: fprintf(stderr,"%s %s\n",s,arg);
255: return;
256: }
257: diag(s,arg)
258: char *s, *arg;
259: {
260:
261: msg(s,arg);
262: exit(1);
263: }
264:
265:
266: char *getline()
267: {
268:
269: register c;
270: register char *linep;
271: char *endlinep;
272:
273:
274: endlinep= line + mlen;
275: linep = line;
276: /* Throw away leading white space */
277:
278: while(isspace(c=getc(inptr)))
279: ;
280: if(c==EOF)
281: return(0);
282: ungetc(c,inptr);
283: while(( c=getc(inptr)) != EOF) {
284: switch (c) {
285:
286: case '\t':
287: if(linep<endlinep)
288: *linep++ = ' ';
289: break;
290: case '\n':
291: while(isspace(*--linep));
292: *++linep = '\n';
293: return(linep);
294: default:
295: if(linep < endlinep)
296: *linep++ = c;
297: }
298: }
299: return(0);
300: }
301:
302: cmpline(pend)
303: char *pend;
304: {
305:
306: char *pstrt, *pchar, *cp;
307: char **hp;
308: int flag;
309:
310: pchar = line;
311: if(rflag)
312: while(pchar<pend&&!isspace(*pchar))
313: pchar++;
314: while(pchar<pend){
315: /* eliminate white space */
316: if(isabreak(*pchar++))
317: continue;
318: pstrt = --pchar;
319:
320: flag = 1;
321: while(flag){
322: if(isabreak(*pchar)) {
323: hp = &hasht[hash(pstrt,pchar)];
324: pchar--;
325: while(cp = *hp++){
326: if(hp == &hasht[MAXT])
327: hp = hasht;
328: /* possible match */
329: if(cmpword(pstrt,pchar,cp)){
330: /* exact match */
331: if(!ignore && only)
332: putline(pstrt,pend);
333: flag = 0;
334: break;
335: }
336: }
337: /* no match */
338: if(flag){
339: if(ignore || !only)
340: putline(pstrt,pend);
341: flag = 0;
342: }
343: }
344: pchar++;
345: }
346: }
347: }
348:
349: cmpword(cpp,pend,hpp)
350: char *cpp, *pend, *hpp;
351: {
352: char c;
353:
354: while(*hpp != '\0'){
355: c = *cpp++;
356: if((isupper(c)?tolower(c):c) != *hpp++)
357: return(0);
358: }
359: if(--cpp == pend) return(1);
360: return(0);
361: }
362:
363: putline(strt, end)
364: char *strt, *end;
365: {
366: char *cp;
367:
368: for(cp=strt; cp<end; cp++)
369: putc(*cp, sortptr);
370: /* Add extra blank before TILDE to sort correctly
371: with -fd option */
372: putc(' ',sortptr);
373: putc(TILDE,sortptr);
374: for (cp=line; cp<strt; cp++)
375: putc(*cp,sortptr);
376: putc('\n',sortptr);
377: }
378:
379: getsort()
380: {
381: register c;
382: register char *tilde, *linep, *ref;
383: char *p1a,*p1b,*p2a,*p2b,*p3a,*p3b,*p4a,*p4b;
384: int w;
385: char *rtrim(), *ltrim();
386:
387: if((sortptr = fopen(sortfile,"r")) == NULL)
388: diag("Cannot open sorted data:",sortfile);
389:
390: halflen = (llen-gutter)/2;
391: linep = line;
392: while((c = getc(sortptr)) != EOF) {
393: switch(c) {
394:
395: case TILDE:
396: tilde = linep;
397: break;
398:
399: case '\n':
400: while(isspace(linep[-1]))
401: linep--;
402: ref = tilde;
403: if(rflag) {
404: while(ref<linep&&!isspace(*ref))
405: ref++;
406: *ref++ = 0;
407: }
408: /* the -1 is an overly conservative test to leave
409: space for the / that signifies truncation*/
410: p3b = rtrim(p3a=line,tilde,halflen-1);
411: if(p3b-p3a>halflen-1)
412: p3b = p3a+halflen-1;
413: p2a = ltrim(ref,p2b=linep,halflen-1);
414: if(p2b-p2a>halflen-1)
415: p2a = p2b-halflen-1;
416: p1b = rtrim(p1a=p3b+(isspace(p3b[0])!=0),tilde,
417: w=halflen-(p2b-p2a)-gap);
418: if(p1b-p1a>w)
419: p1b = p1a;
420: p4a = ltrim(ref,p4b=p2a-(isspace(p2a[-1])!=0),
421: w=halflen-(p3b-p3a)-gap);
422: if(p4b-p4a>w)
423: p4a = p4b;
424: fprintf(outptr,".xx \"");
425: putout(p1a,p1b);
426: /* tilde-1 to account for extra space before TILDE */
427: if(p1b!=(tilde-1) && p1a!=p1b)
428: fprintf(outptr,"/");
429: fprintf(outptr,"\" \"");
430: if(p4a==p4b && p2a!=ref && p2a!=p2b)
431: fprintf(outptr,"/");
432: putout(p2a,p2b);
433: fprintf(outptr,"\" \"");
434: putout(p3a,p3b);
435: /* ++p3b to account for extra blank after TILDE */
436: /* ++p3b to account for extra space before TILDE */
437: if(p1a==p1b && ++p3b!=tilde)
438: fprintf(outptr,"/");
439: fprintf(outptr,"\" \"");
440: if(p1a==p1b && p4a!=ref && p4a!=p4b)
441: fprintf(outptr,"/");
442: putout(p4a,p4b);
443: if(rflag)
444: fprintf(outptr,"\" %s\n",tilde);
445: else
446: fprintf(outptr,"\"\n");
447: linep = line;
448: break;
449:
450: case '"':
451: /* put double " for " */
452: *linep++ = c;
453: default:
454: *linep++ = c;
455: }
456: }
457: }
458:
459: char *rtrim(a,c,d)
460: char *a,*c;
461: {
462: char *b,*x;
463: b = c;
464: for(x=a+1; x<=c&&x-a<=d; x++)
465: if((x==c||isspace(x[0]))&&!isspace(x[-1]))
466: b = x;
467: if(b<c&&!isspace(b[0]))
468: b++;
469: return(b);
470: }
471:
472: char *ltrim(c,b,d)
473: char *c,*b;
474: {
475: char *a,*x;
476: a = c;
477: for(x=b-1; x>=c&&b-x<=d; x--)
478: if(!isspace(x[0])&&(x==c||isspace(x[-1])))
479: a = x;
480: if(a>c&&!isspace(a[-1]))
481: a--;
482: return(a);
483: }
484:
485: putout(strt,end)
486: char *strt, *end;
487: {
488: char *cp;
489:
490: cp = strt;
491:
492: for(cp=strt; cp<end; cp++) {
493: putc(*cp,outptr);
494: }
495: }
496:
497: onintr()
498: {
499:
500: if(*sortfile)
501: unlink(sortfile);
502: exit(1);
503: }
504:
505: hash(strtp,endp)
506: char *strtp, *endp;
507: {
508: char *cp, c;
509: int i, j, k;
510:
511: /* Return zero hash number for single letter words */
512: if((endp - strtp) == 1)
513: return(0);
514:
515: cp = strtp;
516: c = *cp++;
517: i = (isupper(c)?tolower(c):c);
518: c = *cp;
519: j = (isupper(c)?tolower(c):c);
520: i = i*j;
521: cp = --endp;
522: c = *cp--;
523: k = (isupper(c)?tolower(c):c);
524: c = *cp;
525: j = (isupper(c)?tolower(c):c);
526: j = k*j;
527:
528: k = (i ^ (j>>2)) & MASK;
529: return(k);
530: }
531:
532: storeh(num,strtp)
533: int num;
534: char *strtp;
535: {
536: int i;
537:
538: for(i=num; i<MAXT; i++) {
539: if(hasht[i] == 0) {
540: hasht[i] = strtp;
541: return(0);
542: }
543: }
544: for(i=0; i<num; i++) {
545: if(hasht[i] == 0) {
546: hasht[i] = strtp;
547: return(0);
548: }
549: }
550: return(1);
551: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.