|
|
1.1 ! root 1: # ! 2: ! 3: /* permuted title index ! 4: ptx [-t] [-i ignore] [-o only] [-w num] [-f] [input] [output] ! 5: Ptx reads the input file and permutes on words in it. ! 6: It excludes all words in the ignore file. ! 7: Alternately it includes words in the only file. ! 8: if neither is given it excludes the words in /usr/lib/eign. ! 9: ! 10: The width of the output line can be changed to num ! 11: characters. If omitted 72 is default unless troff than 100. ! 12: the -f flag tells the program to fold the output ! 13: the -t flag says the output is for troff and the ! 14: output is then wider. ! 15: ! 16: make: cc ptx.c -lS ! 17: */ ! 18: ! 19: #include <stdio.h> ! 20: #include <ctype.h> ! 21: #include <signal.h> ! 22: #define DEFLTX "/usr/lib/eign" ! 23: #define TILDE 0177 ! 24: #define N 30 ! 25: #define MAX N*BUFSIZ ! 26: #define LMAX 200 ! 27: #define MAXT 2048 ! 28: #define MASK 03777 ! 29: #define SET 1 ! 30: ! 31: #define isabreak(c) (btable[c]) ! 32: ! 33: extern char *calloc(), *mktemp(); ! 34: extern char *getline(); ! 35: int status; ! 36: ! 37: ! 38: char *hasht[MAXT]; ! 39: char line[LMAX]; ! 40: char btable[128]; ! 41: int ignore; ! 42: int only; ! 43: int llen = 72; ! 44: int gap = 3; ! 45: int gutter = 3; ! 46: int mlen = LMAX; ! 47: int wlen; ! 48: int rflag; ! 49: int halflen; ! 50: char *strtbufp, *endbufp; ! 51: char *empty = ""; ! 52: ! 53: char *infile; ! 54: FILE *inptr = stdin; ! 55: ! 56: char *outfile; ! 57: FILE *outptr = stdout; ! 58: ! 59: char *sortfile; /* output of sort program */ ! 60: char nofold[] = {'-', 'd', 't', TILDE, 0}; ! 61: char fold[] = {'-', 'd', 'f', 't', TILDE, 0}; ! 62: char *sortopt = nofold; ! 63: FILE *sortptr; ! 64: ! 65: char *bfile; /*contains user supplied break chars */ ! 66: FILE *bptr; ! 67: ! 68: main(argc,argv) ! 69: int argc; ! 70: char **argv; ! 71: { ! 72: register int c; ! 73: register char *bufp; ! 74: char *pend; ! 75: extern onintr(); ! 76: char sortcmd[50]; ! 77: ! 78: char *xfile; ! 79: FILE *xptr; ! 80: ! 81: if(signal(SIGHUP,onintr)==SIG_IGN) ! 82: signal(SIGHUP,SIG_IGN); ! 83: if(signal(SIGINT,onintr)==SIG_IGN) ! 84: signal(SIGINT,SIG_IGN); ! 85: signal(SIGPIPE,onintr); ! 86: signal(SIGTERM,onintr); ! 87: ! 88: /* argument decoding */ ! 89: ! 90: xfile = DEFLTX; ! 91: argv++; ! 92: while(argc>1 && **argv == '-') { ! 93: switch (*++*argv){ ! 94: ! 95: case 'r': ! 96: rflag++; ! 97: break; ! 98: case 'f': ! 99: sortopt = fold; ! 100: break; ! 101: ! 102: case 'w': ! 103: if(argc >= 2) { ! 104: argc--; ! 105: wlen++; ! 106: llen = atoi(*++argv); ! 107: if(llen == 0) ! 108: diag("Wrong width:",*argv); ! 109: if(llen > LMAX) { ! 110: llen = LMAX; ! 111: msg("Lines truncated to 200 chars.",empty); ! 112: } ! 113: break; ! 114: } ! 115: ! 116: case 't': ! 117: if(wlen == 0) ! 118: llen = 100; ! 119: break; ! 120: case 'g': ! 121: if(argc >=2) { ! 122: argc--; ! 123: gap = gutter = atoi(*++argv); ! 124: } ! 125: break; ! 126: ! 127: case 'i': ! 128: if(only) ! 129: diag("Only file already given.",empty); ! 130: if (argc>=2){ ! 131: argc--; ! 132: ignore++; ! 133: xfile = *++argv; ! 134: } ! 135: break; ! 136: ! 137: case 'o': ! 138: if(ignore) ! 139: diag("Ignore file already given",empty); ! 140: if (argc>=2){ ! 141: only++; ! 142: argc--; ! 143: xfile = *++argv; ! 144: } ! 145: break; ! 146: ! 147: case 'b': ! 148: if(argc>=2) { ! 149: argc--; ! 150: bfile = *++argv; ! 151: } ! 152: break; ! 153: ! 154: default: ! 155: msg("Illegal argument:",*argv); ! 156: } ! 157: argc--; ! 158: argv++; ! 159: } ! 160: ! 161: if(argc>3) ! 162: diag("Too many filenames",empty); ! 163: else if(argc==3){ ! 164: infile = *argv++; ! 165: outfile = *argv; ! 166: if((outptr = fopen(outfile,"w")) == NULL) ! 167: diag("Cannot open output file:",outfile); ! 168: } else if(argc==2) { ! 169: infile = *argv; ! 170: outfile = 0; ! 171: } ! 172: ! 173: ! 174: /* Default breaks of blank, tab and newline */ ! 175: btable[' '] = SET; ! 176: btable['\t'] = SET; ! 177: btable['\n'] = SET; ! 178: if(bfile) { ! 179: if((bptr = fopen(bfile,"r")) == NULL) ! 180: diag("Cannot open break char file",bfile); ! 181: ! 182: while((c = getc(bptr)) != EOF) ! 183: btable[c] = SET; ! 184: } ! 185: ! 186: /* Allocate space for a buffer. If only or ignore file present ! 187: read it into buffer. Else read in default ignore file ! 188: and put resulting words in buffer. ! 189: */ ! 190: ! 191: ! 192: if((strtbufp = calloc(N,BUFSIZ)) == NULL) ! 193: diag("Out of memory space",empty); ! 194: bufp = strtbufp; ! 195: endbufp = strtbufp+MAX; ! 196: ! 197: if((xptr = fopen(xfile,"r")) == NULL) ! 198: diag("Cannot open file",xfile); ! 199: ! 200: while(bufp < endbufp && (c = getc(xptr)) != EOF) { ! 201: if(isabreak(c)) { ! 202: if(storeh(hash(strtbufp,bufp),strtbufp)) ! 203: diag("Too many words",xfile); ! 204: *bufp++ = '\0'; ! 205: strtbufp = bufp; ! 206: } ! 207: else { ! 208: *bufp++ = (isupper(c)?tolower(c):c); ! 209: } ! 210: } ! 211: if (bufp >= endbufp) ! 212: diag("Too many words in file",xfile); ! 213: endbufp = --bufp; ! 214: ! 215: /* open output file for sorting */ ! 216: ! 217: sortfile = mktemp("/tmp/ptxsXXXXX"); ! 218: if((sortptr = fopen(sortfile, "w")) == NULL) ! 219: diag("Cannot open output for sorting:",sortfile); ! 220: ! 221: /* get a line of data and compare each word for ! 222: inclusion or exclusion in the sort phase ! 223: */ ! 224: ! 225: if (infile!=0 && (inptr = fopen(infile,"r")) == NULL) ! 226: diag("Cannot open data: ",infile); ! 227: while(pend=getline()) ! 228: cmpline(pend); ! 229: fclose(sortptr); ! 230: ! 231: sprintf(sortcmd,"sort %s +0 -1 +1 %s -o %s", ! 232: sortopt, sortfile, sortfile); ! 233: if(system(sortcmd)!=0) ! 234: diag("Sort failed",""); ! 235: ! 236: getsort(); ! 237: onintr(); ! 238: } ! 239: ! 240: msg(s,arg) ! 241: char *s; ! 242: char *arg; ! 243: { ! 244: fprintf(stderr,"ptx: %s %s\n",s,arg); ! 245: return; ! 246: } ! 247: diag(s,arg) ! 248: char *s, *arg; ! 249: { ! 250: ! 251: msg(s,arg); ! 252: exit(1); ! 253: } ! 254: ! 255: ! 256: char *getline() ! 257: { ! 258: ! 259: register c; ! 260: register char *linep; ! 261: char *endlinep; ! 262: ! 263: ! 264: endlinep= line + mlen; ! 265: linep = line; ! 266: /* Throw away leading white space */ ! 267: ! 268: while(isspace(c=getc(inptr))) ! 269: ; ! 270: if(c==EOF) ! 271: return(0); ! 272: ungetc(c,inptr); ! 273: while(( c=getc(inptr)) != EOF) { ! 274: switch (c) { ! 275: ! 276: case '\t': ! 277: if(linep<endlinep) ! 278: *linep++ = ' '; ! 279: break; ! 280: case '\n': ! 281: while(isspace(*--linep)); ! 282: *++linep = '\n'; ! 283: return(linep); ! 284: default: ! 285: if(linep < endlinep) ! 286: *linep++ = c; ! 287: } ! 288: } ! 289: return(0); ! 290: } ! 291: ! 292: cmpline(pend) ! 293: char *pend; ! 294: { ! 295: ! 296: char *pstrt, *pchar, *cp; ! 297: char **hp; ! 298: int flag; ! 299: ! 300: pchar = line; ! 301: if(rflag) ! 302: while(pchar<pend&&!isspace(*pchar)) ! 303: pchar++; ! 304: while(pchar<pend){ ! 305: /* eliminate white space */ ! 306: if(isabreak(*pchar++)) ! 307: continue; ! 308: pstrt = --pchar; ! 309: ! 310: flag = 1; ! 311: while(flag){ ! 312: if(isabreak(*pchar)) { ! 313: hp = &hasht[hash(pstrt,pchar)]; ! 314: pchar--; ! 315: while(cp = *hp++){ ! 316: if(hp == &hasht[MAXT]) ! 317: hp = hasht; ! 318: /* possible match */ ! 319: if(cmpword(pstrt,pchar,cp)){ ! 320: /* exact match */ ! 321: if(!ignore && only) ! 322: putline(pstrt,pend); ! 323: flag = 0; ! 324: break; ! 325: } ! 326: } ! 327: /* no match */ ! 328: if(flag){ ! 329: if(ignore || !only) ! 330: putline(pstrt,pend); ! 331: flag = 0; ! 332: } ! 333: } ! 334: pchar++; ! 335: } ! 336: } ! 337: } ! 338: ! 339: cmpword(cpp,pend,hpp) ! 340: char *cpp, *pend, *hpp; ! 341: { ! 342: char c; ! 343: ! 344: while(*hpp != '\0'){ ! 345: c = *cpp++; ! 346: if((isupper(c)?tolower(c):c) != *hpp++) ! 347: return(0); ! 348: } ! 349: if(--cpp == pend) return(1); ! 350: return(0); ! 351: } ! 352: ! 353: putline(strt, end) ! 354: char *strt, *end; ! 355: { ! 356: char *cp; ! 357: ! 358: for(cp=strt; cp<end; cp++) ! 359: putc(*cp, sortptr); ! 360: /* Add extra blank before TILDE to sort correctly ! 361: with -fd option */ ! 362: putc(' ',sortptr); ! 363: putc(TILDE,sortptr); ! 364: for (cp=line; cp<strt; cp++) ! 365: putc(*cp,sortptr); ! 366: putc('\n',sortptr); ! 367: } ! 368: ! 369: getsort() ! 370: { ! 371: register c; ! 372: register char *tilde, *linep, *ref; ! 373: char *p1a,*p1b,*p2a,*p2b,*p3a,*p3b,*p4a,*p4b; ! 374: int w; ! 375: char *rtrim(), *ltrim(); ! 376: ! 377: if((sortptr = fopen(sortfile,"r")) == NULL) ! 378: diag("Cannot open sorted data:",sortfile); ! 379: ! 380: halflen = (llen-gutter)/2; ! 381: linep = line; ! 382: while((c = getc(sortptr)) != EOF) { ! 383: switch(c) { ! 384: ! 385: case TILDE: ! 386: tilde = linep; ! 387: break; ! 388: ! 389: case '\n': ! 390: while(isspace(linep[-1])) ! 391: linep--; ! 392: ref = tilde; ! 393: if(rflag) { ! 394: while(ref<linep&&!isspace(*ref)) ! 395: ref++; ! 396: *ref++ = 0; ! 397: } ! 398: /* the -1 is an overly conservative test to leave ! 399: space for the / that signifies truncation*/ ! 400: p3b = rtrim(p3a=line,tilde,halflen-1); ! 401: if(p3b-p3a>halflen-1) ! 402: p3b = p3a+halflen-1; ! 403: p2a = ltrim(ref,p2b=linep,halflen-1); ! 404: if(p2b-p2a>halflen-1) ! 405: p2a = p2b-halflen-1; ! 406: p1b = rtrim(p1a=p3b+(isspace(p3b[0])!=0),tilde, ! 407: w=halflen-(p2b-p2a)-gap); ! 408: if(p1b-p1a>w) ! 409: p1b = p1a; ! 410: p4a = ltrim(ref,p4b=p2a-(isspace(p2a[-1])!=0), ! 411: w=halflen-(p3b-p3a)-gap); ! 412: if(p4b-p4a>w) ! 413: p4a = p4b; ! 414: fprintf(outptr,".xx \""); ! 415: putout(p1a,p1b); ! 416: /* tilde-1 to account for extra space before TILDE */ ! 417: if(p1b!=(tilde-1) && p1a!=p1b) ! 418: fprintf(outptr,"/"); ! 419: fprintf(outptr,"\" \""); ! 420: if(p4a==p4b && p2a!=ref && p2a!=p2b) ! 421: fprintf(outptr,"/"); ! 422: putout(p2a,p2b); ! 423: fprintf(outptr,"\" \""); ! 424: putout(p3a,p3b); ! 425: /* ++p3b to account for extra blank after TILDE */ ! 426: /* ++p3b to account for extra space before TILDE */ ! 427: if(p1a==p1b && ++p3b!=tilde) ! 428: fprintf(outptr,"/"); ! 429: fprintf(outptr,"\" \""); ! 430: if(p1a==p1b && p4a!=ref && p4a!=p4b) ! 431: fprintf(outptr,"/"); ! 432: putout(p4a,p4b); ! 433: if(rflag) ! 434: fprintf(outptr,"\" %s\n",tilde); ! 435: else ! 436: fprintf(outptr,"\"\n"); ! 437: linep = line; ! 438: break; ! 439: ! 440: case '"': ! 441: /* put double " for " */ ! 442: *linep++ = c; ! 443: default: ! 444: *linep++ = c; ! 445: } ! 446: } ! 447: } ! 448: ! 449: char *rtrim(a,c,d) ! 450: char *a,*c; ! 451: { ! 452: char *b,*x; ! 453: b = c; ! 454: for(x=a+1; x<=c&&x-a<=d; x++) ! 455: if((x==c||isspace(x[0]))&&!isspace(x[-1])) ! 456: b = x; ! 457: if(b<c&&!isspace(b[0])) ! 458: b++; ! 459: return(b); ! 460: } ! 461: ! 462: char *ltrim(c,b,d) ! 463: char *c,*b; ! 464: { ! 465: char *a,*x; ! 466: a = c; ! 467: for(x=b-1; x>=c&&b-x<=d; x--) ! 468: if(!isspace(x[0])&&(x==c||isspace(x[-1]))) ! 469: a = x; ! 470: if(a>c&&!isspace(a[-1])) ! 471: a--; ! 472: return(a); ! 473: } ! 474: ! 475: putout(strt,end) ! 476: char *strt, *end; ! 477: { ! 478: char *cp; ! 479: ! 480: cp = strt; ! 481: ! 482: for(cp=strt; cp<end; cp++) { ! 483: putc(*cp,outptr); ! 484: } ! 485: } ! 486: ! 487: onintr() ! 488: { ! 489: ! 490: if(*sortfile) ! 491: unlink(sortfile); ! 492: exit(1); ! 493: } ! 494: ! 495: hash(strtp,endp) ! 496: char *strtp, *endp; ! 497: { ! 498: char *cp, c; ! 499: int i, j, k; ! 500: ! 501: /* Return zero hash number for single letter words */ ! 502: if((endp - strtp) == 1) ! 503: return(0); ! 504: ! 505: cp = strtp; ! 506: c = *cp++; ! 507: i = (isupper(c)?tolower(c):c); ! 508: c = *cp; ! 509: j = (isupper(c)?tolower(c):c); ! 510: i = i*j; ! 511: cp = --endp; ! 512: c = *cp--; ! 513: k = (isupper(c)?tolower(c):c); ! 514: c = *cp; ! 515: j = (isupper(c)?tolower(c):c); ! 516: j = k*j; ! 517: ! 518: k = (i ^ (j>>2)) & MASK; ! 519: return(k); ! 520: } ! 521: ! 522: storeh(num,strtp) ! 523: int num; ! 524: char *strtp; ! 525: { ! 526: int i; ! 527: ! 528: for(i=num; i<MAXT; i++) { ! 529: if(hasht[i] == 0) { ! 530: hasht[i] = strtp; ! 531: return(0); ! 532: } ! 533: } ! 534: for(i=0; i<num; i++) { ! 535: if(hasht[i] == 0) { ! 536: hasht[i] = strtp; ! 537: return(0); ! 538: } ! 539: } ! 540: return(1); ! 541: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.