|
|
1.1 root 1: #
2:
3: /* permuted title index
4: ptx [-t] [-i ignore] [-o only] [-w num] [-f] [input] [output]
5: Ptx reads the input file and permutes on words in it.
6: It excludes all words in the ignore file.
7: Alternately it includes words in the only file.
8: if neither is given it excludes the words in /usr/lib/eign.
9:
10: The width of the output line can be changed to num
11: characters. If omitted 72 is default unless troff than 100.
12: the -f flag tells the program to fold the output
13: the -t flag says the output is for troff and the
14: output is then wider.
15:
16: make: cc ptx.c -lS
17: */
18:
19: #include <stdio.h>
20: #include <ctype.h>
21: #include <signal.h>
22: #define DEFLTX "/usr/lib/eign"
23: #define TILDE 0177
24: #define N 30
25: #define MAX N*BUFSIZ
26: #define LMAX 200
27: #define MAXT 2048
28: #define MASK 03777
29: #define SET 1
30:
31: #define isabreak(c) (btable[c])
32:
33: extern char *calloc(), *mktemp();
34: extern char *getline();
35: int status;
36:
37:
38: char *hasht[MAXT];
39: char line[LMAX];
40: char btable[128];
41: int ignore;
42: int only;
43: int llen = 72;
44: int gap = 3;
45: int gutter = 3;
46: int mlen = LMAX;
47: int wlen;
48: int rflag;
49: int halflen;
50: char *strtbufp, *endbufp;
51: char *empty = "";
52:
53: char *infile;
54: FILE *inptr = stdin;
55:
56: char *outfile;
57: FILE *outptr = stdout;
58:
59: char *sortfile; /* output of sort program */
60: char nofold[] = {'-', 'd', 't', TILDE, 0};
61: char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
62: char *sortopt = nofold;
63: FILE *sortptr;
64:
65: char *bfile; /*contains user supplied break chars */
66: FILE *bptr;
67:
68: main(argc,argv)
69: int argc;
70: char **argv;
71: {
72: register int c;
73: register char *bufp;
74: char *pend;
75: extern onintr();
76: char sortcmd[50];
77:
78: char *xfile;
79: FILE *xptr;
80:
81: if(signal(SIGHUP,onintr)==SIG_IGN)
82: signal(SIGHUP,SIG_IGN);
83: if(signal(SIGINT,onintr)==SIG_IGN)
84: signal(SIGINT,SIG_IGN);
85: signal(SIGPIPE,onintr);
86: signal(SIGTERM,onintr);
87:
88: /* argument decoding */
89:
90: xfile = DEFLTX;
91: argv++;
92: while(argc>1 && **argv == '-') {
93: switch (*++*argv){
94:
95: case 'r':
96: rflag++;
97: break;
98: case 'f':
99: sortopt = fold;
100: break;
101:
102: case 'w':
103: if(argc >= 2) {
104: argc--;
105: wlen++;
106: llen = atoi(*++argv);
107: if(llen == 0)
108: diag("Wrong width:",*argv);
109: if(llen > LMAX) {
110: llen = LMAX;
111: msg("Lines truncated to 200 chars.",empty);
112: }
113: break;
114: }
115:
116: case 't':
117: if(wlen == 0)
118: llen = 100;
119: break;
120: case 'g':
121: if(argc >=2) {
122: argc--;
123: gap = gutter = atoi(*++argv);
124: }
125: break;
126:
127: case 'i':
128: if(only)
129: diag("Only file already given.",empty);
130: if (argc>=2){
131: argc--;
132: ignore++;
133: xfile = *++argv;
134: }
135: break;
136:
137: case 'o':
138: if(ignore)
139: diag("Ignore file already given",empty);
140: if (argc>=2){
141: only++;
142: argc--;
143: xfile = *++argv;
144: }
145: break;
146:
147: case 'b':
148: if(argc>=2) {
149: argc--;
150: bfile = *++argv;
151: }
152: break;
153:
154: default:
155: msg("Illegal argument:",*argv);
156: }
157: argc--;
158: argv++;
159: }
160:
161: if(argc>3)
162: diag("Too many filenames",empty);
163: else if(argc==3){
164: infile = *argv++;
165: outfile = *argv;
166: if((outptr = fopen(outfile,"w")) == NULL)
167: diag("Cannot open output file:",outfile);
168: } else if(argc==2) {
169: infile = *argv;
170: outfile = 0;
171: }
172:
173:
174: /* Default breaks of blank, tab and newline */
175: btable[' '] = SET;
176: btable['\t'] = SET;
177: btable['\n'] = SET;
178: if(bfile) {
179: if((bptr = fopen(bfile,"r")) == NULL)
180: diag("Cannot open break char file",bfile);
181:
182: while((c = getc(bptr)) != EOF)
183: btable[c] = SET;
184: }
185:
186: /* Allocate space for a buffer. If only or ignore file present
187: read it into buffer. Else read in default ignore file
188: and put resulting words in buffer.
189: */
190:
191:
192: if((strtbufp = calloc(N,BUFSIZ)) == NULL)
193: diag("Out of memory space",empty);
194: bufp = strtbufp;
195: endbufp = strtbufp+MAX;
196:
197: if((xptr = fopen(xfile,"r")) == NULL)
198: diag("Cannot open file",xfile);
199:
200: while(bufp < endbufp && (c = getc(xptr)) != EOF) {
201: if(isabreak(c)) {
202: if(storeh(hash(strtbufp,bufp),strtbufp))
203: diag("Too many words",xfile);
204: *bufp++ = '\0';
205: strtbufp = bufp;
206: }
207: else {
208: *bufp++ = (isupper(c)?tolower(c):c);
209: }
210: }
211: if (bufp >= endbufp)
212: diag("Too many words in file",xfile);
213: endbufp = --bufp;
214:
215: /* open output file for sorting */
216:
217: sortfile = mktemp("/tmp/ptxsXXXXX");
218: if((sortptr = fopen(sortfile, "w")) == NULL)
219: diag("Cannot open output for sorting:",sortfile);
220:
221: /* get a line of data and compare each word for
222: inclusion or exclusion in the sort phase
223: */
224:
225: if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
226: diag("Cannot open data: ",infile);
227: while(pend=getline())
228: cmpline(pend);
229: fclose(sortptr);
230:
231: sprintf(sortcmd,"sort %s +0 -1 +1 %s -o %s",
232: sortopt, sortfile, sortfile);
233: if(system(sortcmd)!=0)
234: diag("Sort failed","");
235:
236: getsort();
237: onintr();
238: }
239:
240: msg(s,arg)
241: char *s;
242: char *arg;
243: {
244: fprintf(stderr,"ptx: %s %s\n",s,arg);
245: return;
246: }
247: diag(s,arg)
248: char *s, *arg;
249: {
250:
251: msg(s,arg);
252: exit(1);
253: }
254:
255:
256: char *getline()
257: {
258:
259: register c;
260: register char *linep;
261: char *endlinep;
262:
263:
264: endlinep= line + mlen;
265: linep = line;
266: /* Throw away leading white space */
267:
268: while(isspace(c=getc(inptr)))
269: ;
270: if(c==EOF)
271: return(0);
272: ungetc(c,inptr);
273: while(( c=getc(inptr)) != EOF) {
274: switch (c) {
275:
276: case '\t':
277: if(linep<endlinep)
278: *linep++ = ' ';
279: break;
280: case '\n':
281: while(isspace(*--linep));
282: *++linep = '\n';
283: return(linep);
284: default:
285: if(linep < endlinep)
286: *linep++ = c;
287: }
288: }
289: return(0);
290: }
291:
292: cmpline(pend)
293: char *pend;
294: {
295:
296: char *pstrt, *pchar, *cp;
297: char **hp;
298: int flag;
299:
300: pchar = line;
301: if(rflag)
302: while(pchar<pend&&!isspace(*pchar))
303: pchar++;
304: while(pchar<pend){
305: /* eliminate white space */
306: if(isabreak(*pchar++))
307: continue;
308: pstrt = --pchar;
309:
310: flag = 1;
311: while(flag){
312: if(isabreak(*pchar)) {
313: hp = &hasht[hash(pstrt,pchar)];
314: pchar--;
315: while(cp = *hp++){
316: if(hp == &hasht[MAXT])
317: hp = hasht;
318: /* possible match */
319: if(cmpword(pstrt,pchar,cp)){
320: /* exact match */
321: if(!ignore && only)
322: putline(pstrt,pend);
323: flag = 0;
324: break;
325: }
326: }
327: /* no match */
328: if(flag){
329: if(ignore || !only)
330: putline(pstrt,pend);
331: flag = 0;
332: }
333: }
334: pchar++;
335: }
336: }
337: }
338:
339: cmpword(cpp,pend,hpp)
340: char *cpp, *pend, *hpp;
341: {
342: char c;
343:
344: while(*hpp != '\0'){
345: c = *cpp++;
346: if((isupper(c)?tolower(c):c) != *hpp++)
347: return(0);
348: }
349: if(--cpp == pend) return(1);
350: return(0);
351: }
352:
353: putline(strt, end)
354: char *strt, *end;
355: {
356: char *cp;
357:
358: for(cp=strt; cp<end; cp++)
359: putc(*cp, sortptr);
360: /* Add extra blank before TILDE to sort correctly
361: with -fd option */
362: putc(' ',sortptr);
363: putc(TILDE,sortptr);
364: for (cp=line; cp<strt; cp++)
365: putc(*cp,sortptr);
366: putc('\n',sortptr);
367: }
368:
369: getsort()
370: {
371: register c;
372: register char *tilde, *linep, *ref;
373: char *p1a,*p1b,*p2a,*p2b,*p3a,*p3b,*p4a,*p4b;
374: int w;
375: char *rtrim(), *ltrim();
376:
377: if((sortptr = fopen(sortfile,"r")) == NULL)
378: diag("Cannot open sorted data:",sortfile);
379:
380: halflen = (llen-gutter)/2;
381: linep = line;
382: while((c = getc(sortptr)) != EOF) {
383: switch(c) {
384:
385: case TILDE:
386: tilde = linep;
387: break;
388:
389: case '\n':
390: while(isspace(linep[-1]))
391: linep--;
392: ref = tilde;
393: if(rflag) {
394: while(ref<linep&&!isspace(*ref))
395: ref++;
396: *ref++ = 0;
397: }
398: /* the -1 is an overly conservative test to leave
399: space for the / that signifies truncation*/
400: p3b = rtrim(p3a=line,tilde,halflen-1);
401: if(p3b-p3a>halflen-1)
402: p3b = p3a+halflen-1;
403: p2a = ltrim(ref,p2b=linep,halflen-1);
404: if(p2b-p2a>halflen-1)
405: p2a = p2b-halflen-1;
406: p1b = rtrim(p1a=p3b+(isspace(p3b[0])!=0),tilde,
407: w=halflen-(p2b-p2a)-gap);
408: if(p1b-p1a>w)
409: p1b = p1a;
410: p4a = ltrim(ref,p4b=p2a-(isspace(p2a[-1])!=0),
411: w=halflen-(p3b-p3a)-gap);
412: if(p4b-p4a>w)
413: p4a = p4b;
414: fprintf(outptr,".xx \"");
415: putout(p1a,p1b);
416: /* tilde-1 to account for extra space before TILDE */
417: if(p1b!=(tilde-1) && p1a!=p1b)
418: fprintf(outptr,"/");
419: fprintf(outptr,"\" \"");
420: if(p4a==p4b && p2a!=ref && p2a!=p2b)
421: fprintf(outptr,"/");
422: putout(p2a,p2b);
423: fprintf(outptr,"\" \"");
424: putout(p3a,p3b);
425: /* ++p3b to account for extra blank after TILDE */
426: /* ++p3b to account for extra space before TILDE */
427: if(p1a==p1b && ++p3b!=tilde)
428: fprintf(outptr,"/");
429: fprintf(outptr,"\" \"");
430: if(p1a==p1b && p4a!=ref && p4a!=p4b)
431: fprintf(outptr,"/");
432: putout(p4a,p4b);
433: if(rflag)
434: fprintf(outptr,"\" %s\n",tilde);
435: else
436: fprintf(outptr,"\"\n");
437: linep = line;
438: break;
439:
440: case '"':
441: /* put double " for " */
442: *linep++ = c;
443: default:
444: *linep++ = c;
445: }
446: }
447: }
448:
449: char *rtrim(a,c,d)
450: char *a,*c;
451: {
452: char *b,*x;
453: b = c;
454: for(x=a+1; x<=c&&x-a<=d; x++)
455: if((x==c||isspace(x[0]))&&!isspace(x[-1]))
456: b = x;
457: if(b<c&&!isspace(b[0]))
458: b++;
459: return(b);
460: }
461:
462: char *ltrim(c,b,d)
463: char *c,*b;
464: {
465: char *a,*x;
466: a = c;
467: for(x=b-1; x>=c&&b-x<=d; x--)
468: if(!isspace(x[0])&&(x==c||isspace(x[-1])))
469: a = x;
470: if(a>c&&!isspace(a[-1]))
471: a--;
472: return(a);
473: }
474:
475: putout(strt,end)
476: char *strt, *end;
477: {
478: char *cp;
479:
480: cp = strt;
481:
482: for(cp=strt; cp<end; cp++) {
483: putc(*cp,outptr);
484: }
485: }
486:
487: onintr()
488: {
489:
490: if(*sortfile)
491: unlink(sortfile);
492: exit(1);
493: }
494:
495: hash(strtp,endp)
496: char *strtp, *endp;
497: {
498: char *cp, c;
499: int i, j, k;
500:
501: /* Return zero hash number for single letter words */
502: if((endp - strtp) == 1)
503: return(0);
504:
505: cp = strtp;
506: c = *cp++;
507: i = (isupper(c)?tolower(c):c);
508: c = *cp;
509: j = (isupper(c)?tolower(c):c);
510: i = i*j;
511: cp = --endp;
512: c = *cp--;
513: k = (isupper(c)?tolower(c):c);
514: c = *cp;
515: j = (isupper(c)?tolower(c):c);
516: j = k*j;
517:
518: k = (i ^ (j>>2)) & MASK;
519: return(k);
520: }
521:
522: storeh(num,strtp)
523: int num;
524: char *strtp;
525: {
526: int i;
527:
528: for(i=num; i<MAXT; i++) {
529: if(hasht[i] == 0) {
530: hasht[i] = strtp;
531: return(0);
532: }
533: }
534: for(i=0; i<num; i++) {
535: if(hasht[i] == 0) {
536: hasht[i] = strtp;
537: return(0);
538: }
539: }
540: return(1);
541: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.