|
|
1.1 ! root 1: #ifndef lint ! 2: static char sccsid[] = "@(#)spell.c 4.3 5/15/90"; ! 3: #endif ! 4: ! 5: #include "spell.h" ! 6: #define DLEV 2 ! 7: ! 8: char *strcat(); ! 9: int strip(); ! 10: char *skipv(); ! 11: int an(); ! 12: int s(); ! 13: int es(); ! 14: int ily(); ! 15: int ncy(); ! 16: int CCe(); ! 17: int VCe(); ! 18: int bility(); ! 19: int tion(); ! 20: int ize(); ! 21: int y_to_e(); ! 22: int i_to_y(); ! 23: int nop(); ! 24: int metry(); ! 25: ! 26: struct suftab { ! 27: char *suf; ! 28: int (*p1)(); ! 29: int n1; ! 30: char *d1; ! 31: char *a1; ! 32: int (*p2)(); ! 33: int n2; ! 34: char *d2; ! 35: char *a2; ! 36: } suftab[] = { ! 37: {"ssen",ily,4,"-y+iness","+ness" }, ! 38: {"ssel",ily,4,"-y+i+less","+less" }, ! 39: {"se",s,1,"","+s", es,2,"-y+ies","+es" }, ! 40: {"s'",s,2,"","+'s"}, ! 41: {"s",s,1,"","+s"}, ! 42: {"ecn",ncy,1,"","-t+ce"}, ! 43: {"ycn",ncy,1,"","-cy+t"}, ! 44: {"ytilb",nop,0,"",""}, ! 45: {"ytilib",bility,5,"-le+ility",""}, ! 46: {"elbaif",i_to_y,4,"-y+iable",""}, ! 47: {"elba",CCe,4,"-e+able","+able"}, ! 48: {"yti",CCe,3,"-e+ity","+ity"}, ! 49: {"ylb",y_to_e,1,"-e+y",""}, ! 50: {"yl",ily,2,"-y+ily","+ly"}, ! 51: {"laci",strip,2,"","+al"}, ! 52: {"latnem",strip,2,"","+al"}, ! 53: {"lanoi",strip,2,"","+al"}, ! 54: {"tnem",strip,4,"","+ment"}, ! 55: {"gni",CCe,3,"-e+ing","+ing"}, ! 56: {"reta",nop,0,"",""}, ! 57: {"re",strip,1,"","+r", i_to_y,2,"-y+ier","+er"}, ! 58: {"de",strip,1,"","+d", i_to_y,2,"-y+ied","+ed"}, ! 59: {"citsi",strip,2,"","+ic"}, ! 60: {"cihparg",i_to_y,1,"-y+ic",""}, ! 61: {"tse",strip,2,"","+st", i_to_y,3,"-y+iest","+est"}, ! 62: {"cirtem",i_to_y,1,"-y+ic",""}, ! 63: {"yrtem",metry,0,"-ry+er",""}, ! 64: {"cigol",i_to_y,1,"-y+ic",""}, ! 65: {"tsigol",i_to_y,2,"-y+ist",""}, ! 66: {"tsi",VCe,3,"-e+ist","+ist"}, ! 67: {"msi",VCe,3,"-e+ism","+ist"}, ! 68: {"noitacif",i_to_y,6,"-y+ication",""}, ! 69: {"noitazi",ize,5,"-e+ation",""}, ! 70: {"rota",tion,2,"-e+or",""}, ! 71: {"noit",tion,3,"-e+ion","+ion"}, ! 72: {"naino",an,3,"","+ian"}, ! 73: {"na",an,1,"","+n"}, ! 74: {"evit",tion,3,"-e+ive","+ive"}, ! 75: {"ezi",CCe,3,"-e+ize","+ize"}, ! 76: {"pihs",strip,4,"","+ship"}, ! 77: {"dooh",ily,4,"-y+hood","+hood"}, ! 78: {"ekil",strip,4,"","+like"}, ! 79: 0 ! 80: }; ! 81: ! 82: char *preftab[] = { ! 83: "anti", ! 84: "bio", ! 85: "dis", ! 86: "electro", ! 87: "en", ! 88: "fore", ! 89: "hyper", ! 90: "intra", ! 91: "inter", ! 92: "iso", ! 93: "kilo", ! 94: "magneto", ! 95: "meta", ! 96: "micro", ! 97: "milli", ! 98: "mis", ! 99: "mono", ! 100: "multi", ! 101: "non", ! 102: "out", ! 103: "over", ! 104: "photo", ! 105: "poly", ! 106: "pre", ! 107: "pseudo", ! 108: "re", ! 109: "semi", ! 110: "stereo", ! 111: "sub", ! 112: "super", ! 113: "thermo", ! 114: "ultra", ! 115: "under", /*must precede un*/ ! 116: "un", ! 117: 0 ! 118: }; ! 119: ! 120: int vflag; ! 121: int xflag; ! 122: char word[100]; ! 123: char original[100]; ! 124: char *deriv[40]; ! 125: char affix[40]; ! 126: ! 127: main(argc,argv) ! 128: char **argv; ! 129: { ! 130: register char *ep, *cp; ! 131: register char *dp; ! 132: int fold; ! 133: int j; ! 134: FILE *file, *found; ! 135: if(!prime(argc,argv)) { ! 136: fprintf(stderr, ! 137: "spell: cannot initialize hash table\n"); ! 138: exit(1); ! 139: } ! 140: found = fopen(argv[2],"w"); ! 141: for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++) ! 142: switch(argv[0][1]) { ! 143: case 'b': ! 144: ise(); ! 145: break; ! 146: case 'v': ! 147: vflag++; ! 148: break; ! 149: case 'x': ! 150: xflag++; ! 151: break; ! 152: } ! 153: for(;; fprintf(file,"%s%s\n",affix,original)) { ! 154: affix[0] = 0; ! 155: file = found; ! 156: for(ep=word;(*ep=j=getchar())!='\n';ep++) ! 157: if(j == EOF) { ! 158: fclose(found); ! 159: exit(0); ! 160: } ! 161: for(cp=word,dp=original; cp<ep; ) ! 162: *dp++ = *cp++; ! 163: *dp = 0; ! 164: fold = 0; ! 165: for(cp=word;cp<ep;cp++) ! 166: if(islower(*cp)) ! 167: goto lcase; ! 168: if(putsuf(ep,".",0)) ! 169: continue; ! 170: ++fold; ! 171: for(cp=original+1,dp=word+1;dp<ep;dp++,cp++) ! 172: *dp = Tolower(*cp); ! 173: lcase: ! 174: if(putsuf(ep,".",0)||suffix(ep,0)) ! 175: continue; ! 176: if(isupper(word[0])) { ! 177: for(cp=original,dp=word; *dp = *cp++; dp++) ! 178: if (fold) *dp = Tolower(*dp); ! 179: word[0] = Tolower(word[0]); ! 180: goto lcase; ! 181: } ! 182: file = stdout; ! 183: } ! 184: } ! 185: ! 186: suffix(ep,lev) ! 187: char *ep; ! 188: { ! 189: register struct suftab *t; ! 190: register char *cp, *sp; ! 191: lev += DLEV; ! 192: deriv[lev] = deriv[lev-1] = 0; ! 193: for(t= &suftab[0];sp=t->suf;t++) { ! 194: cp = ep; ! 195: while(*sp) ! 196: if(*--cp!=*sp++) ! 197: goto next; ! 198: for(sp=cp; --sp>=word&&!vowel(*sp); ) ; ! 199: if(sp<word) ! 200: return(0); ! 201: if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1)) ! 202: return(1); ! 203: if(t->p2!=0) { ! 204: deriv[lev] = deriv[lev+1] = 0; ! 205: return((*t->p2)(ep-t->n2,t->d2,t->a2,lev)); ! 206: } ! 207: return(0); ! 208: next: ; ! 209: } ! 210: return(0); ! 211: } ! 212: ! 213: nop() ! 214: { ! 215: return(0); ! 216: } ! 217: ! 218: strip(ep,d,a,lev) ! 219: char *ep,*d,*a; ! 220: { ! 221: return(putsuf(ep,a,lev)||suffix(ep,lev)); ! 222: } ! 223: ! 224: s(ep,d,a,lev) ! 225: char *ep,*d,*a; ! 226: { ! 227: if(lev>DLEV+1) ! 228: return(0); ! 229: if(*ep=='s'&&ep[-1]=='s') ! 230: return(0); ! 231: return(strip(ep,d,a,lev)); ! 232: } ! 233: ! 234: an(ep,d,a,lev) ! 235: char *ep,*d,*a; ! 236: { ! 237: if(!isupper(*word)) /*must be proper name*/ ! 238: return(0); ! 239: return(putsuf(ep,a,lev)); ! 240: } ! 241: ! 242: ize(ep,d,a,lev) ! 243: char *ep,*d,*a; ! 244: { ! 245: *ep++ = 'e'; ! 246: return(strip(ep,"",d,lev)); ! 247: } ! 248: ! 249: y_to_e(ep,d,a,lev) ! 250: char *ep,*d,*a; ! 251: { ! 252: char c = *ep; ! 253: *ep++ = 'e'; ! 254: if (strip(ep,"",d,lev)) ! 255: return (1); ! 256: ep[-1] = c; ! 257: return (0); ! 258: } ! 259: ! 260: ily(ep,d,a,lev) ! 261: char *ep,*d,*a; ! 262: { ! 263: if(ep[-1]=='i') ! 264: return(i_to_y(ep,d,a,lev)); ! 265: else ! 266: return(strip(ep,d,a,lev)); ! 267: } ! 268: ! 269: ncy(ep,d,a,lev) ! 270: char *ep, *d, *a; ! 271: { ! 272: if(skipv(skipv(ep-1))<word) ! 273: return(0); ! 274: ep[-1] = 't'; ! 275: return(strip(ep,d,a,lev)); ! 276: } ! 277: ! 278: bility(ep,d,a,lev) ! 279: char *ep,*d,*a; ! 280: { ! 281: *ep++ = 'l'; ! 282: return(y_to_e(ep,d,a,lev)); ! 283: } ! 284: ! 285: i_to_y(ep,d,a,lev) ! 286: char *ep,*d,*a; ! 287: { ! 288: if(ep[-1]=='i') { ! 289: ep[-1] = 'y'; ! 290: a = d; ! 291: } ! 292: return(strip(ep,"",a,lev)); ! 293: } ! 294: ! 295: es(ep,d,a,lev) ! 296: char *ep,*d,*a; ! 297: { ! 298: if(lev>DLEV) ! 299: return(0); ! 300: switch(ep[-1]) { ! 301: default: ! 302: return(0); ! 303: case 'i': ! 304: return(i_to_y(ep,d,a,lev)); ! 305: case 's': ! 306: case 'h': ! 307: case 'z': ! 308: case 'x': ! 309: return(strip(ep,d,a,lev)); ! 310: } ! 311: } ! 312: ! 313: metry(ep,d,a,lev) ! 314: char *ep, *d,*a; ! 315: { ! 316: ep[-2] = 'e'; ! 317: ep[-1] = 'r'; ! 318: return(strip(ep,d,a,lev)); ! 319: } ! 320: ! 321: tion(ep,d,a,lev) ! 322: char *ep,*d,*a; ! 323: { ! 324: switch(ep[-2]) { ! 325: case 'c': ! 326: case 'r': ! 327: return(putsuf(ep,a,lev)); ! 328: case 'a': ! 329: return(y_to_e(ep,d,a,lev)); ! 330: } ! 331: return(0); ! 332: } ! 333: ! 334: /* possible consonant-consonant-e ending*/ ! 335: CCe(ep,d,a,lev) ! 336: char *ep,*d,*a; ! 337: { ! 338: switch(ep[-1]) { ! 339: case 'l': ! 340: if(vowel(ep[-2])) ! 341: break; ! 342: switch(ep[-2]) { ! 343: case 'l': ! 344: case 'r': ! 345: case 'w': ! 346: break; ! 347: default: ! 348: return(y_to_e(ep,d,a,lev)); ! 349: } ! 350: break; ! 351: case 's': ! 352: if(ep[-2]=='s') ! 353: break; ! 354: case 'c': ! 355: case 'g': ! 356: if(*ep=='a') ! 357: return(0); ! 358: case 'v': ! 359: case 'z': ! 360: if(vowel(ep[-2])) ! 361: break; ! 362: case 'u': ! 363: if(y_to_e(ep,d,a,lev)) ! 364: return(1); ! 365: if(!(ep[-2]=='n'&&ep[-1]=='g')) ! 366: return(0); ! 367: } ! 368: return(VCe(ep,d,a,lev)); ! 369: } ! 370: ! 371: /* possible consonant-vowel-consonant-e ending*/ ! 372: VCe(ep,d,a,lev) ! 373: char *ep,*d,*a; ! 374: { ! 375: char c; ! 376: c = ep[-1]; ! 377: if(c=='e') ! 378: return(0); ! 379: if(!vowel(c) && vowel(ep[-2])) { ! 380: c = *ep; ! 381: *ep++ = 'e'; ! 382: if(putsuf(ep,d,lev)||suffix(ep,lev)) ! 383: return(1); ! 384: ep--; ! 385: *ep = c; ! 386: } ! 387: return(strip(ep,d,a,lev)); ! 388: } ! 389: ! 390: char *lookuppref(wp,ep) ! 391: char **wp; ! 392: char *ep; ! 393: { ! 394: register char **sp; ! 395: register char *bp,*cp; ! 396: for(sp=preftab;*sp;sp++) { ! 397: bp = *wp; ! 398: for(cp= *sp;*cp;cp++,bp++) ! 399: if(Tolower(*bp)!=*cp) ! 400: goto next; ! 401: for(cp=bp;cp<ep;cp++) ! 402: if(vowel(*cp)) { ! 403: *wp = bp; ! 404: return(*sp); ! 405: } ! 406: next: ; ! 407: } ! 408: return(0); ! 409: } ! 410: ! 411: putsuf(ep,a,lev) ! 412: char *ep,*a; ! 413: { ! 414: register char *cp; ! 415: char *bp; ! 416: register char *pp; ! 417: int val = 0; ! 418: char space[20]; ! 419: deriv[lev] = a; ! 420: if(putw(word,ep,lev)) ! 421: return(1); ! 422: bp = word; ! 423: pp = space; ! 424: deriv[lev+1] = pp; ! 425: while(cp=lookuppref(&bp,ep)) { ! 426: *pp++ = '+'; ! 427: while(*pp = *cp++) ! 428: pp++; ! 429: if(putw(bp,ep,lev+1)) { ! 430: val = 1; ! 431: break; ! 432: } ! 433: } ! 434: deriv[lev+1] = deriv[lev+2] = 0; ! 435: return(val); ! 436: } ! 437: ! 438: putw(bp,ep,lev) ! 439: char *bp,*ep; ! 440: { ! 441: register i, j; ! 442: char duple[3]; ! 443: if(ep-bp<=1) ! 444: return(0); ! 445: if(vowel(*ep)) { ! 446: if(monosyl(bp,ep)) ! 447: return(0); ! 448: } ! 449: i = dict(bp,ep); ! 450: if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) { ! 451: ep--; ! 452: deriv[++lev] = duple; ! 453: duple[0] = '+'; ! 454: duple[1] = *ep; ! 455: duple[2] = 0; ! 456: i = dict(bp,ep); ! 457: } ! 458: if(vflag==0||i==0) ! 459: return(i); ! 460: j = lev; ! 461: do { ! 462: if(deriv[j]) ! 463: strcat(affix,deriv[j]); ! 464: } while(--j>0); ! 465: strcat(affix,"\t"); ! 466: return(i); ! 467: } ! 468: ! 469: ! 470: monosyl(bp,ep) ! 471: char *bp, *ep; ! 472: { ! 473: if(ep<bp+2) ! 474: return(0); ! 475: if(vowel(*--ep)||!vowel(*--ep) ! 476: ||ep[1]=='x'||ep[1]=='w') ! 477: return(0); ! 478: while(--ep>=bp) ! 479: if(vowel(*ep)) ! 480: return(0); ! 481: return(1); ! 482: } ! 483: ! 484: char * ! 485: skipv(s) ! 486: char *s; ! 487: { ! 488: if(s>=word&&vowel(*s)) ! 489: s--; ! 490: while(s>=word&&!vowel(*s)) ! 491: s--; ! 492: return(s); ! 493: } ! 494: ! 495: vowel(c) ! 496: { ! 497: switch(Tolower(c)) { ! 498: case 'a': ! 499: case 'e': ! 500: case 'i': ! 501: case 'o': ! 502: case 'u': ! 503: case 'y': ! 504: return(1); ! 505: } ! 506: return(0); ! 507: } ! 508: ! 509: /* crummy way to Britishise */ ! 510: ise() ! 511: { ! 512: register struct suftab *p; ! 513: for(p = suftab;p->suf;p++) { ! 514: ztos(p->suf); ! 515: ztos(p->d1); ! 516: ztos(p->a1); ! 517: } ! 518: } ! 519: ztos(s) ! 520: char *s; ! 521: { ! 522: for(;*s;s++) ! 523: if(*s=='z') ! 524: *s = 's'; ! 525: } ! 526: ! 527: dict(bp,ep) ! 528: char *bp, *ep; ! 529: { ! 530: register char *wp; ! 531: long h; ! 532: register long *lp; ! 533: register i; ! 534: if(xflag) ! 535: printf("=%.*s\n",ep-bp,bp); ! 536: for(i=0; i<NP; i++) { ! 537: for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp) ! 538: h += *wp * *lp; ! 539: h += '\n' * *lp; ! 540: h %= p[i]; ! 541: if(get(h)==0) ! 542: return(0); ! 543: } ! 544: return(1); ! 545: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.