|
|
1.1 ! root 1: #ifndef lint ! 2: static char sccsid[] = "@(#)spell.c 4.2 6/3/86"; ! 3: #endif ! 4: ! 5: #include "spell.h" ! 6: #define DLEV 2 ! 7: ! 8: char *strcat(); ! 9: int strip(); ! 10: char *skipv(); ! 11: int an(); ! 12: int s(); ! 13: int es(); ! 14: int ily(); ! 15: int ncy(); ! 16: int CCe(); ! 17: int VCe(); ! 18: int bility(); ! 19: int tion(); ! 20: int ize(); ! 21: int y_to_e(); ! 22: int i_to_y(); ! 23: int nop(); ! 24: int metry(); ! 25: ! 26: struct suftab { ! 27: char *suf; ! 28: int (*p1)(); ! 29: int n1; ! 30: char *d1; ! 31: char *a1; ! 32: int (*p2)(); ! 33: int n2; ! 34: char *d2; ! 35: char *a2; ! 36: } suftab[] = { ! 37: {"ssen",ily,4,"-y+iness","+ness" }, ! 38: {"ssel",ily,4,"-y+i+less","+less" }, ! 39: {"se",s,1,"","+s", es,2,"-y+ies","+es" }, ! 40: {"s'",s,2,"","+'s"}, ! 41: {"s",s,1,"","+s"}, ! 42: {"ecn",ncy,1,"","-t+ce"}, ! 43: {"ycn",ncy,1,"","-cy+t"}, ! 44: {"ytilb",nop,0,"",""}, ! 45: {"ytilib",bility,5,"-le+ility",""}, ! 46: {"elbaif",i_to_y,4,"-y+iable",""}, ! 47: {"elba",CCe,4,"-e+able","+able"}, ! 48: {"yti",CCe,3,"-e+ity","+ity"}, ! 49: {"ylb",y_to_e,1,"-e+y",""}, ! 50: {"yl",ily,2,"-y+ily","+ly"}, ! 51: {"laci",strip,2,"","+al"}, ! 52: {"latnem",strip,2,"","+al"}, ! 53: {"lanoi",strip,2,"","+al"}, ! 54: {"tnem",strip,4,"","+ment"}, ! 55: {"gni",CCe,3,"-e+ing","+ing"}, ! 56: {"reta",nop,0,"",""}, ! 57: {"re",strip,1,"","+r", i_to_y,2,"-y+ier","+er"}, ! 58: {"de",strip,1,"","+d", i_to_y,2,"-y+ied","+ed"}, ! 59: {"citsi",strip,2,"","+ic"}, ! 60: {"cihparg",i_to_y,1,"-y+ic",""}, ! 61: {"tse",strip,2,"","+st", i_to_y,3,"-y+iest","+est"}, ! 62: {"cirtem",i_to_y,1,"-y+ic",""}, ! 63: {"yrtem",metry,0,"-ry+er",""}, ! 64: {"cigol",i_to_y,1,"-y+ic",""}, ! 65: {"tsigol",i_to_y,2,"-y+ist",""}, ! 66: {"tsi",VCe,3,"-e+ist","+ist"}, ! 67: {"msi",VCe,3,"-e+ism","+ist"}, ! 68: {"noitacif",i_to_y,6,"-y+ication",""}, ! 69: {"noitazi",ize,5,"-e+ation",""}, ! 70: {"rota",tion,2,"-e+or",""}, ! 71: {"noit",tion,3,"-e+ion","+ion"}, ! 72: {"naino",an,3,"","+ian"}, ! 73: {"na",an,1,"","+n"}, ! 74: {"evit",tion,3,"-e+ive","+ive"}, ! 75: {"ezi",CCe,3,"-e+ize","+ize"}, ! 76: {"pihs",strip,4,"","+ship"}, ! 77: {"dooh",ily,4,"-y+hood","+hood"}, ! 78: {"ekil",strip,4,"","+like"}, ! 79: 0 ! 80: }; ! 81: ! 82: char *preftab[] = { ! 83: "anti", ! 84: "bio", ! 85: "dis", ! 86: "electro", ! 87: "en", ! 88: "fore", ! 89: "hyper", ! 90: "intra", ! 91: "inter", ! 92: "iso", ! 93: "kilo", ! 94: "magneto", ! 95: "meta", ! 96: "micro", ! 97: "milli", ! 98: "mis", ! 99: "mono", ! 100: "multi", ! 101: "non", ! 102: "out", ! 103: "over", ! 104: "photo", ! 105: "poly", ! 106: "pre", ! 107: "pseudo", ! 108: "re", ! 109: "semi", ! 110: "stereo", ! 111: "sub", ! 112: "super", ! 113: "thermo", ! 114: "ultra", ! 115: "under", /*must precede un*/ ! 116: "un", ! 117: 0 ! 118: }; ! 119: ! 120: int vflag; ! 121: int xflag; ! 122: char word[100]; ! 123: char original[100]; ! 124: char *deriv[40]; ! 125: char affix[40]; ! 126: ! 127: main(argc,argv) ! 128: char **argv; ! 129: { ! 130: register char *ep, *cp; ! 131: register char *dp; ! 132: int fold; ! 133: int j; ! 134: FILE *file, *found; ! 135: if(!prime(argc,argv)) { ! 136: fprintf(stderr, ! 137: "spell: cannot initialize hash table\n"); ! 138: exit(1); ! 139: } ! 140: found = fopen(argv[2],"w"); ! 141: for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++) ! 142: switch(argv[0][1]) { ! 143: case 'b': ! 144: ise(); ! 145: break; ! 146: case 'v': ! 147: vflag++; ! 148: break; ! 149: case 'x': ! 150: xflag++; ! 151: break; ! 152: } ! 153: for(;; fprintf(file,"%s%s\n",affix,original)) { ! 154: affix[0] = 0; ! 155: file = found; ! 156: for(ep=word;(*ep=j=getchar())!='\n';ep++) ! 157: if(j == EOF) { ! 158: fclose(found); ! 159: exit(0); ! 160: } ! 161: for(cp=word,dp=original; cp<ep; ) ! 162: *dp++ = *cp++; ! 163: *dp = 0; ! 164: fold = 0; ! 165: for(cp=word;cp<ep;cp++) ! 166: if(islower(*cp)) ! 167: goto lcase; ! 168: if(putsuf(ep,".",0)) ! 169: continue; ! 170: ++fold; ! 171: for(cp=original+1,dp=word+1;dp<ep;dp++,cp++) ! 172: *dp = Tolower(*cp); ! 173: lcase: ! 174: if(putsuf(ep,".",0)||suffix(ep,0)) ! 175: continue; ! 176: if(isupper(word[0])) { ! 177: for(cp=original,dp=word; *dp = *cp++; dp++) ! 178: if (fold) *dp = Tolower(*dp); ! 179: word[0] = Tolower(word[0]); ! 180: goto lcase; ! 181: } ! 182: file = stdout; ! 183: } ! 184: } ! 185: ! 186: suffix(ep,lev) ! 187: char *ep; ! 188: { ! 189: register struct suftab *t; ! 190: register char *cp, *sp; ! 191: lev += DLEV; ! 192: deriv[lev] = deriv[lev-1] = 0; ! 193: for(t= &suftab[0];sp=t->suf;t++) { ! 194: cp = ep; ! 195: while(*sp) ! 196: if(*--cp!=*sp++) ! 197: goto next; ! 198: for(sp=cp; --sp>=word&&!vowel(*sp); ) ; ! 199: if(sp<word) ! 200: return(0); ! 201: if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1)) ! 202: return(1); ! 203: if(t->p2!=0) { ! 204: deriv[lev] = deriv[lev+1] = 0; ! 205: return((*t->p2)(ep-t->n2,t->d2,t->a2,lev)); ! 206: } ! 207: return(0); ! 208: next: ; ! 209: } ! 210: return(0); ! 211: } ! 212: ! 213: nop() ! 214: { ! 215: return(0); ! 216: } ! 217: ! 218: strip(ep,d,a,lev) ! 219: char *ep,*d,*a; ! 220: { ! 221: return(putsuf(ep,a,lev)||suffix(ep,lev)); ! 222: } ! 223: ! 224: s(ep,d,a,lev) ! 225: char *ep,*d,*a; ! 226: { ! 227: if(lev>DLEV+1) ! 228: return(0); ! 229: if(*ep=='s'&&ep[-1]=='s') ! 230: return(0); ! 231: return(strip(ep,d,a,lev)); ! 232: } ! 233: ! 234: an(ep,d,a,lev) ! 235: char *ep,*d,*a; ! 236: { ! 237: if(!isupper(*word)) /*must be proper name*/ ! 238: return(0); ! 239: return(putsuf(ep,a,lev)); ! 240: } ! 241: ! 242: ize(ep,d,a,lev) ! 243: char *ep,*d,*a; ! 244: { ! 245: *ep++ = 'e'; ! 246: return(strip(ep,"",d,lev)); ! 247: } ! 248: ! 249: y_to_e(ep,d,a,lev) ! 250: char *ep,*d,*a; ! 251: { ! 252: *ep++ = 'e'; ! 253: return(strip(ep,"",d,lev)); ! 254: } ! 255: ! 256: ily(ep,d,a,lev) ! 257: char *ep,*d,*a; ! 258: { ! 259: if(ep[-1]=='i') ! 260: return(i_to_y(ep,d,a,lev)); ! 261: else ! 262: return(strip(ep,d,a,lev)); ! 263: } ! 264: ! 265: ncy(ep,d,a,lev) ! 266: char *ep, *d, *a; ! 267: { ! 268: if(skipv(skipv(ep-1))<word) ! 269: return(0); ! 270: ep[-1] = 't'; ! 271: return(strip(ep,d,a,lev)); ! 272: } ! 273: ! 274: bility(ep,d,a,lev) ! 275: char *ep,*d,*a; ! 276: { ! 277: *ep++ = 'l'; ! 278: return(y_to_e(ep,d,a,lev)); ! 279: } ! 280: ! 281: i_to_y(ep,d,a,lev) ! 282: char *ep,*d,*a; ! 283: { ! 284: if(ep[-1]=='i') { ! 285: ep[-1] = 'y'; ! 286: a = d; ! 287: } ! 288: return(strip(ep,"",a,lev)); ! 289: } ! 290: ! 291: es(ep,d,a,lev) ! 292: char *ep,*d,*a; ! 293: { ! 294: if(lev>DLEV) ! 295: return(0); ! 296: switch(ep[-1]) { ! 297: default: ! 298: return(0); ! 299: case 'i': ! 300: return(i_to_y(ep,d,a,lev)); ! 301: case 's': ! 302: case 'h': ! 303: case 'z': ! 304: case 'x': ! 305: return(strip(ep,d,a,lev)); ! 306: } ! 307: } ! 308: ! 309: metry(ep,d,a,lev) ! 310: char *ep, *d,*a; ! 311: { ! 312: ep[-2] = 'e'; ! 313: ep[-1] = 'r'; ! 314: return(strip(ep,d,a,lev)); ! 315: } ! 316: ! 317: tion(ep,d,a,lev) ! 318: char *ep,*d,*a; ! 319: { ! 320: switch(ep[-2]) { ! 321: case 'c': ! 322: case 'r': ! 323: return(putsuf(ep,a,lev)); ! 324: case 'a': ! 325: return(y_to_e(ep,d,a,lev)); ! 326: } ! 327: return(0); ! 328: } ! 329: ! 330: /* possible consonant-consonant-e ending*/ ! 331: CCe(ep,d,a,lev) ! 332: char *ep,*d,*a; ! 333: { ! 334: switch(ep[-1]) { ! 335: case 'l': ! 336: if(vowel(ep[-2])) ! 337: break; ! 338: switch(ep[-2]) { ! 339: case 'l': ! 340: case 'r': ! 341: case 'w': ! 342: break; ! 343: default: ! 344: return(y_to_e(ep,d,a,lev)); ! 345: } ! 346: break; ! 347: case 's': ! 348: if(ep[-2]=='s') ! 349: break; ! 350: case 'c': ! 351: case 'g': ! 352: if(*ep=='a') ! 353: return(0); ! 354: case 'v': ! 355: case 'z': ! 356: if(vowel(ep[-2])) ! 357: break; ! 358: case 'u': ! 359: if(y_to_e(ep,d,a,lev)) ! 360: return(1); ! 361: if(!(ep[-2]=='n'&&ep[-1]=='g')) ! 362: return(0); ! 363: } ! 364: return(VCe(ep,d,a,lev)); ! 365: } ! 366: ! 367: /* possible consonant-vowel-consonant-e ending*/ ! 368: VCe(ep,d,a,lev) ! 369: char *ep,*d,*a; ! 370: { ! 371: char c; ! 372: c = ep[-1]; ! 373: if(c=='e') ! 374: return(0); ! 375: if(!vowel(c) && vowel(ep[-2])) { ! 376: c = *ep; ! 377: *ep++ = 'e'; ! 378: if(putsuf(ep,d,lev)||suffix(ep,lev)) ! 379: return(1); ! 380: ep--; ! 381: *ep = c; ! 382: } ! 383: return(strip(ep,d,a,lev)); ! 384: } ! 385: ! 386: char *lookuppref(wp,ep) ! 387: char **wp; ! 388: char *ep; ! 389: { ! 390: register char **sp; ! 391: register char *bp,*cp; ! 392: for(sp=preftab;*sp;sp++) { ! 393: bp = *wp; ! 394: for(cp= *sp;*cp;cp++,bp++) ! 395: if(Tolower(*bp)!=*cp) ! 396: goto next; ! 397: for(cp=bp;cp<ep;cp++) ! 398: if(vowel(*cp)) { ! 399: *wp = bp; ! 400: return(*sp); ! 401: } ! 402: next: ; ! 403: } ! 404: return(0); ! 405: } ! 406: ! 407: putsuf(ep,a,lev) ! 408: char *ep,*a; ! 409: { ! 410: register char *cp; ! 411: char *bp; ! 412: register char *pp; ! 413: int val = 0; ! 414: char space[20]; ! 415: deriv[lev] = a; ! 416: if(putw(word,ep,lev)) ! 417: return(1); ! 418: bp = word; ! 419: pp = space; ! 420: deriv[lev+1] = pp; ! 421: while(cp=lookuppref(&bp,ep)) { ! 422: *pp++ = '+'; ! 423: while(*pp = *cp++) ! 424: pp++; ! 425: if(putw(bp,ep,lev+1)) { ! 426: val = 1; ! 427: break; ! 428: } ! 429: } ! 430: deriv[lev+1] = deriv[lev+2] = 0; ! 431: return(val); ! 432: } ! 433: ! 434: putw(bp,ep,lev) ! 435: char *bp,*ep; ! 436: { ! 437: register i, j; ! 438: char duple[3]; ! 439: if(ep-bp<=1) ! 440: return(0); ! 441: if(vowel(*ep)) { ! 442: if(monosyl(bp,ep)) ! 443: return(0); ! 444: } ! 445: i = dict(bp,ep); ! 446: if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) { ! 447: ep--; ! 448: deriv[++lev] = duple; ! 449: duple[0] = '+'; ! 450: duple[1] = *ep; ! 451: duple[2] = 0; ! 452: i = dict(bp,ep); ! 453: } ! 454: if(vflag==0||i==0) ! 455: return(i); ! 456: j = lev; ! 457: do { ! 458: if(deriv[j]) ! 459: strcat(affix,deriv[j]); ! 460: } while(--j>0); ! 461: strcat(affix,"\t"); ! 462: return(i); ! 463: } ! 464: ! 465: ! 466: monosyl(bp,ep) ! 467: char *bp, *ep; ! 468: { ! 469: if(ep<bp+2) ! 470: return(0); ! 471: if(vowel(*--ep)||!vowel(*--ep) ! 472: ||ep[1]=='x'||ep[1]=='w') ! 473: return(0); ! 474: while(--ep>=bp) ! 475: if(vowel(*ep)) ! 476: return(0); ! 477: return(1); ! 478: } ! 479: ! 480: char * ! 481: skipv(s) ! 482: char *s; ! 483: { ! 484: if(s>=word&&vowel(*s)) ! 485: s--; ! 486: while(s>=word&&!vowel(*s)) ! 487: s--; ! 488: return(s); ! 489: } ! 490: ! 491: vowel(c) ! 492: { ! 493: switch(Tolower(c)) { ! 494: case 'a': ! 495: case 'e': ! 496: case 'i': ! 497: case 'o': ! 498: case 'u': ! 499: case 'y': ! 500: return(1); ! 501: } ! 502: return(0); ! 503: } ! 504: ! 505: /* crummy way to Britishise */ ! 506: ise() ! 507: { ! 508: register struct suftab *p; ! 509: for(p = suftab;p->suf;p++) { ! 510: ztos(p->suf); ! 511: ztos(p->d1); ! 512: ztos(p->a1); ! 513: } ! 514: } ! 515: ztos(s) ! 516: char *s; ! 517: { ! 518: for(;*s;s++) ! 519: if(*s=='z') ! 520: *s = 's'; ! 521: } ! 522: ! 523: dict(bp,ep) ! 524: char *bp, *ep; ! 525: { ! 526: register char *wp; ! 527: long h; ! 528: register long *lp; ! 529: register i; ! 530: if(xflag) ! 531: printf("=%.*s\n",ep-bp,bp); ! 532: for(i=0; i<NP; i++) { ! 533: for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp) ! 534: h += *wp * *lp; ! 535: h += '\n' * *lp; ! 536: h %= p[i]; ! 537: if(get(h)==0) ! 538: return(0); ! 539: } ! 540: return(1); ! 541: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.