|
|
1.1 ! root 1: ! 2: #include "code.h" ! 3: ! 4: #ifndef _POSIX_SOURCE ! 5: ! 6: #include <stdio.h> ! 7: #include <ctype.h> ! 8: #include <libc.h> ! 9: ! 10: #else ! 11: ! 12: #include <fcntl.h> ! 13: #include <stdio.h> ! 14: #include <ctype.h> ! 15: #include <stdlib.h> ! 16: #include <unistd.h> ! 17: #include <string.h> ! 18: ! 19: #endif ! 20: ! 21: #define isvowel(c) voweltab[c] ! 22: #define Tolower(c) (isupper(c)? (c)-'A'+'a': (c)) ! 23: #define pair(a,b) (((a)<<8) | (b)) ! 24: #define DLEV 2 ! 25: #define DSIZ 40 ! 26: ! 27: typedef long Bits; ! 28: typedef unsigned char uchar; ! 29: #define Set(h, f) ((long)(h) & (f)) ! 30: ! 31: Bits nop(char*, char*, char*, int, int); ! 32: Bits strip(char*, char*, char*, int, int); ! 33: Bits ize(char*, char*, char*, int, int); ! 34: Bits i_to_y(char*, char*, char*, int, int); ! 35: Bits ily(char*, char*, char*, int, int); ! 36: Bits subst(char*, char*, char*, int, int); ! 37: Bits CCe(char*, char*, char*, int, int); ! 38: Bits tion(char*, char*, char*, int, int); ! 39: Bits an(char*, char*, char*, int, int); ! 40: Bits s(char*, char*, char*, int, int); ! 41: Bits es(char*, char*, char*, int, int); ! 42: Bits bility(char*, char*, char*, int, int); ! 43: Bits y_to_e(char*, char*, char*, int, int); ! 44: Bits VCe(char*, char*, char*, int, int); ! 45: ! 46: Bits trypref(char*, char*, int, int); ! 47: Bits tryword(char*, char*, int, int); ! 48: Bits trysuff(char*, int, int); ! 49: Bits dict(char*, char*); ! 50: void typeprint(Bits); ! 51: void pcomma(char*); ! 52: ! 53: void runout(char*); ! 54: void ise(void); ! 55: int ordinal(void); ! 56: char* skipv(char*); ! 57: int inun(char*, Bits); ! 58: char* ztos(char*); ! 59: void readdict(char*); ! 60: ! 61: char *strdupl(char *); ! 62: ! 63: typedef struct Ptab Ptab; ! 64: struct Ptab ! 65: { ! 66: char* s; ! 67: int flag; ! 68: }; ! 69: ! 70: typedef struct Suftab Suftab; ! 71: struct Suftab ! 72: { ! 73: char *suf; ! 74: Bits (*p1)(char*, char*, char*, int, int); ! 75: int n1; ! 76: char *d1; ! 77: char *a1; ! 78: int flag; ! 79: int affixable; ! 80: Bits (*p2)(char*, char*, char*, int, int); ! 81: int n2; ! 82: char *d2; ! 83: char *a2; ! 84: }; ! 85: ! 86: Suftab staba[] = { ! 87: {"aibohp",subst,1,"-e+ia","",NOUN, NOUN}, ! 88: 0 ! 89: }; ! 90: ! 91: Suftab stabc[] = ! 92: { ! 93: {"cai",strip,1,"","+c",N_AFFIX, ADJ|NOUN}, ! 94: {"citsi",strip,2,"","+ic",N_AFFIX, ADJ | N_AFFIX | NOUN}, ! 95: {"citi",ize,1,"-e+ic","",N_AFFIX, ADJ }, ! 96: {"cihparg",i_to_y,1,"-y+ic","",NOUN, ADJ|NOUN }, ! 97: {"cipocs",ize,1,"-e+ic","",NOUN, ADJ }, ! 98: {"cirtem",i_to_y,1,"-y+ic","",NOUN, ADJ }, ! 99: {"cigol",i_to_y,1,"-y+ic","",NOUN, ADJ }, ! 100: {"cimono",i_to_y,1,"-y+ic","",NOUN, ADJ }, ! 101: {"cibohp",subst,1,"-e+ic","",NOUN, ADJ }, ! 102: 0 ! 103: }; ! 104: Suftab stabd[] = ! 105: { ! 106: {"de",strip,1,"","+d",ED,ADJ |COMP,i_to_y,2,"-y+ied","+ed"}, ! 107: {"dooh",ily,4,"-y+ihood","+hood",NOUN | ADV, NOUN}, ! 108: 0 ! 109: }; ! 110: Suftab stabe[] = ! 111: { ! 112: /* ! 113: * V_affix for comment ->commence->commentment?? ! 114: */ ! 115: {"ecn",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX}, ! 116: {"elbaif",i_to_y,4,"-y+iable","",V_IRREG,ADJ}, ! 117: {"elba",CCe,4,"-e+able","+able",V_AFFIX,ADJ}, ! 118: {"evi",subst,0,"-ion+ive","",N_AFFIX | V_AFFIX,NOUN | N_AFFIX| ADJ}, ! 119: {"ezi",CCe,3,"-e+ize","+ize",N_AFFIX|ADJ ,V_AFFIX | VERB |ION | COMP}, ! 120: {"ekil",strip,4,"","+like",N_AFFIX ,ADJ}, ! 121: 0 ! 122: }; ! 123: Suftab stabg[] = ! 124: { ! 125: {"gniee",strip,3,"","+ing",V_IRREG ,ADJ|NOUN}, ! 126: {"gnikam",strip,6,"","+making",NOUN,NOUN}, ! 127: {"gnipeek",strip,7,"","+keeping",NOUN,NOUN}, ! 128: {"gni",CCe,3,"-e+ing","+ing",V_IRREG ,ADJ|ED|NOUN}, ! 129: 0 ! 130: }; ! 131: Suftab stabl[] = ! 132: { ! 133: {"ladio",strip,2,"","+al",NOUN |ADJ,ADJ}, ! 134: {"laci",strip,2,"","+al",NOUN |ADJ,ADJ |NOUN|N_AFFIX}, ! 135: {"latnem",strip,2,"","+al",N_AFFIX,ADJ}, ! 136: {"lanoi",strip,2,"","+al",N_AFFIX,ADJ|NOUN}, ! 137: {"luf",ily,3,"-y+iful","+ful",N_AFFIX,ADJ | NOUN}, ! 138: 0 ! 139: }; ! 140: Suftab stabm[] = ! 141: { ! 142: /* congregational + ism */ ! 143: {"msi",CCe,3,"-e+ism","ism",N_AFFIX|ADJ,NOUN}, ! 144: {"margo",subst,-1,"-ph+m","",NOUN,NOUN}, ! 145: 0 ! 146: }; ! 147: Suftab stabn[] = ! 148: { ! 149: {"noitacifi",i_to_y,6,"-y+ication","",ION,NOUN | N_AFFIX}, ! 150: {"noitazi",ize,4,"-e+ation","",ION,NOUN| N_AFFIX}, ! 151: {"noit",tion,3,"-e+ion","+ion",ION,NOUN| N_AFFIX | V_AFFIX |VERB|ACTOR}, ! 152: {"naino",an,3,"","+ian",NOUN|PROP_COLLECT,NOUN| N_AFFIX}, ! 153: {"namow",strip,5,"","+woman",MAN,PROP_COLLECT|N_AFFIX}, ! 154: {"nam",strip,3,"","+man",MAN,PROP_COLLECT | N_AFFIX | VERB}, ! 155: {"na",an,1,"","+n",NOUN|PROP_COLLECT,NOUN | N_AFFIX}, ! 156: {"nemow",strip,5,"","+women",MAN,PROP_COLLECT}, ! 157: {"nem",strip,3,"","+man",MAN,PROP_COLLECT}, ! 158: 0 ! 159: }; ! 160: Suftab stabp[] = ! 161: { ! 162: {"pihs",strip,4,"","+ship",NOUN|PROP_COLLECT,NOUN| N_AFFIX}, ! 163: 0 ! 164: }; ! 165: Suftab stabr[] = ! 166: { ! 167: {"rehparg",subst,1,"-y+er","",ACTOR,NOUN,strip,2,"","+er"}, ! 168: {"reyhparg",nop,0,"","",0,NOUN}, ! 169: {"rekam",strip,5,"","+maker",NOUN,NOUN}, ! 170: {"repeek",strip,6,"","+keeper",NOUN,NOUN}, ! 171: {"re",strip,1,"","+r",ACTOR,NOUN | N_AFFIX|VERB|ADJ, i_to_y,2,"-y+ier","+er"}, ! 172: {"rota",tion,2,"-e+or","",ION,NOUN| N_AFFIX|_Y}, ! 173: {"rotc",tion,2,"","+or",ION,NOUN| N_AFFIX}, ! 174: {"rotp",tion,2,"","+or",ION,NOUN| N_AFFIX}, ! 175: 0 ! 176: }; ! 177: Suftab stabs[] = ! 178: { ! 179: {"ssen",ily,4,"-y+iness","+ness",ADJ|ADV,NOUN| N_AFFIX}, ! 180: {"ssel",ily,4,"-y+iless","+less",NOUN | PROP_COLLECT,ADJ }, ! 181: {"se",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH , es,2,"-y+ies","+es"}, ! 182: {"s'",s,2,"","+'s",PROP_COLLECT | NOUN,DONT_TOUCH }, ! 183: {"s",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH }, ! 184: 0 ! 185: }; ! 186: Suftab stabt[] = ! 187: { ! 188: {"tnem",strip,4,"","+ment",V_AFFIX,NOUN | N_AFFIX | ADJ|VERB}, ! 189: {"tse",strip,2,"","+st",EST,DONT_TOUCH, i_to_y,3,"-y+iest","+est" }, ! 190: {"tsigol",i_to_y,2,"-y+ist","",N_AFFIX,NOUN | N_AFFIX}, ! 191: {"tsi",CCe,3,"-e+ist","+ist",N_AFFIX|ADJ,NOUN | N_AFFIX|COMP}, ! 192: 0 ! 193: }; ! 194: Suftab staby[] = ! 195: { ! 196: {"ytilb",nop,0,"","",0,NOUN}, ! 197: {"ycn",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX}, ! 198: {"ytilib",bility,5,"-le+ility","",ADJ | V_AFFIX,NOUN | N_AFFIX}, ! 199: {"ytisuo",nop,0,"","",NOUN}, ! 200: {"yti",CCe,3,"-e+ity","+ity",ADJ ,NOUN | N_AFFIX }, ! 201: {"ylb",y_to_e,1,"-e+y","",ADJ,ADV}, ! 202: {"ylc",nop,0,"","",0}, ! 203: {"yl",ily,2,"-y+ily","+ly",ADJ,ADV|COMP}, ! 204: {"yrtem",subst,0,"-er+ry","",NOUN,NOUN | N_AFFIX}, ! 205: {"y",CCe,1,"-e+y","+y",_Y,ADJ|COMP}, ! 206: 0 ! 207: }; ! 208: Suftab stabz[] = ! 209: { ! 210: 0 ! 211: }; ! 212: Suftab* suftab[] = ! 213: { ! 214: staba, ! 215: stabz, ! 216: stabc, ! 217: stabd, ! 218: stabe, ! 219: stabz, ! 220: stabg, ! 221: stabz, ! 222: stabz, ! 223: stabz, ! 224: stabz, ! 225: stabl, ! 226: stabm, ! 227: stabn, ! 228: stabz, ! 229: stabp, ! 230: stabz, ! 231: stabr, ! 232: stabs, ! 233: stabt, ! 234: stabz, ! 235: stabz, ! 236: stabz, ! 237: stabz, ! 238: staby, ! 239: stabz, ! 240: }; ! 241: ! 242: Ptab ptaba[] = ! 243: { ! 244: "anti", 0, ! 245: "auto", 0, ! 246: 0 ! 247: }; ! 248: Ptab ptabb[] = ! 249: { ! 250: "bio", 0, ! 251: 0 ! 252: }; ! 253: Ptab ptabc[] = ! 254: { ! 255: "counter", 0, ! 256: 0 ! 257: }; ! 258: Ptab ptabd[] = ! 259: { ! 260: "dis", 0, ! 261: 0 ! 262: }; ! 263: Ptab ptabe[] = ! 264: { ! 265: "electro", 0, ! 266: 0 ! 267: }; ! 268: Ptab ptabf[] = ! 269: { ! 270: "femto", 0, ! 271: 0 ! 272: }; ! 273: Ptab ptabg[] = ! 274: { ! 275: "geo", 0, ! 276: "giga", 0, ! 277: 0 ! 278: }; ! 279: Ptab ptabh[] = ! 280: { ! 281: "hyper", 0, ! 282: 0 ! 283: }; ! 284: Ptab ptabi[] = ! 285: { ! 286: "immuno", 0, ! 287: "im", IN, ! 288: "intra", 0, ! 289: "inter", 0, ! 290: "in", IN, ! 291: "ir", IN, ! 292: "iso", 0, ! 293: 0 ! 294: }; ! 295: Ptab ptabj[] = ! 296: { ! 297: 0 ! 298: }; ! 299: Ptab ptabk[] = ! 300: { ! 301: "kilo", 0, ! 302: 0 ! 303: }; ! 304: Ptab ptabl[] = ! 305: { ! 306: 0 ! 307: }; ! 308: Ptab ptabm[] = ! 309: { ! 310: "magneto", 0, ! 311: "mega", 0, ! 312: "meta", 0, ! 313: "micro", 0, ! 314: "mid", 0, ! 315: "milli", 0, ! 316: "mini", 0, ! 317: "mis", 0, ! 318: "mono", 0, ! 319: "multi", 0, ! 320: 0 ! 321: }; ! 322: Ptab ptabn[] = ! 323: { ! 324: "nano", 0, ! 325: "neuro", 0, ! 326: "non", 0, ! 327: 0 ! 328: }; ! 329: Ptab ptabo[] = ! 330: { ! 331: "out", 0, ! 332: "over", 0, ! 333: 0 ! 334: }; ! 335: Ptab ptabp[] = ! 336: { ! 337: "para", 0, ! 338: "photo", 0, ! 339: "pico", 0, ! 340: "poly", 0, ! 341: "pre", 0, ! 342: "pseudo", 0, ! 343: "psycho", 0, ! 344: 0 ! 345: }; ! 346: Ptab ptabq[] = ! 347: { ! 348: "quasi", 0, ! 349: 0 ! 350: }; ! 351: Ptab ptabr[] = ! 352: { ! 353: "radio", 0, ! 354: "re", 0, ! 355: 0 ! 356: }; ! 357: Ptab ptabs[] = ! 358: { ! 359: "semi", 0, ! 360: "stereo", 0, ! 361: "sub", 0, ! 362: "super", 0, ! 363: 0 ! 364: }; ! 365: Ptab ptabt[] = ! 366: { ! 367: "tele", 0, ! 368: "thermo", 0, ! 369: 0 ! 370: }; ! 371: Ptab ptabu[] = ! 372: { ! 373: "ultra", 0, ! 374: "under", 0, /*must precede un*/ ! 375: "un", IN, ! 376: 0 ! 377: }; ! 378: Ptab ptabv[] = ! 379: { ! 380: 0 ! 381: }; ! 382: Ptab ptabw[] = ! 383: { ! 384: 0 ! 385: }; ! 386: Ptab ptabx[] = ! 387: { ! 388: 0 ! 389: }; ! 390: Ptab ptaby[] = ! 391: { ! 392: 0 ! 393: }; ! 394: Ptab ptabz[] = ! 395: { ! 396: 0 ! 397: }; ! 398: ! 399: Ptab* preftab[] = ! 400: { ! 401: ptaba, ! 402: ptabb, ! 403: ptabc, ! 404: ptabd, ! 405: ptabe, ! 406: ptabf, ! 407: ptabg, ! 408: ptabh, ! 409: ptabi, ! 410: ptabj, ! 411: ptabk, ! 412: ptabl, ! 413: ptabm, ! 414: ptabn, ! 415: ptabo, ! 416: ptabp, ! 417: ptabq, ! 418: ptabr, ! 419: ptabs, ! 420: ptabt, ! 421: ptabu, ! 422: ptabv, ! 423: ptabw, ! 424: ptabx, ! 425: ptaby, ! 426: ptabz, ! 427: }; ! 428: ! 429: typedef struct { ! 430: char *mesg; ! 431: enum { NONE, SUFF, PREF} type; ! 432: } Deriv; ! 433: ! 434: int cflag; ! 435: int fflag; ! 436: int vflag; ! 437: int xflag; ! 438: char word[500]; ! 439: char original[500]; ! 440: Deriv deriv[DSIZ+3]; ! 441: Deriv emptyderiv; ! 442: char affix[DSIZ*10]; /* 10 is longest affix message */ ! 443: int prefcount; ! 444: int suffcount; ! 445: char space[300000]; /* must be as large as "words"+"space" in pcode run */ ! 446: Bits encode[2048]; /* must be as long as "codes" in pcode run */ ! 447: int nencode; ! 448: char voweltab[128]; ! 449: char* spacep[128*128+1]; /* pointer to words starting with 'xx' */ ! 450: ! 451: char* codefile = "/usr/lib/spell/amspell"; ! 452: char* brfile = "/usr/lib/spell/brspell"; ! 453: ! 454: main(int argc, char *argv[]) ! 455: { ! 456: char *ep, *cp; ! 457: char *dp; ! 458: int j, i, c; ! 459: int low = 0; ! 460: Bits h; ! 461: ! 462: for(i=0; c = "aeiouyAEIOUY"[i]; i++) ! 463: voweltab[c] = 1; ! 464: while(argc > 1) { ! 465: if(argv[1][0] != '-') ! 466: break; ! 467: for(i=1; c = argv[1][i]; i++) ! 468: switch(c) { ! 469: default: ! 470: fprintf(stderr, "usage: spell [-bcvx] [-f file]\n"); ! 471: exit(1); ! 472: ! 473: case 'b': ! 474: ise(); ! 475: if(!fflag) ! 476: codefile = brfile; ! 477: continue; ! 478: ! 479: case 'C': ! 480: vflag++; ! 481: case 'c': ! 482: setbuf(stdout,0); ! 483: cflag++; ! 484: continue; ! 485: ! 486: case 'v': ! 487: vflag++; ! 488: continue; ! 489: ! 490: case 'x': ! 491: xflag++; ! 492: continue; ! 493: ! 494: case 'f': ! 495: if(argc <= 2) { ! 496: fprintf(stderr, "spell: -f requires another argument\n"); ! 497: exit(1); ! 498: } ! 499: argv++; ! 500: argc--; ! 501: fflag++; ! 502: codefile = argv[1]; ! 503: goto brk; ! 504: } ! 505: brk: ! 506: argv++; ! 507: argc--; ! 508: } ! 509: readdict(codefile); ! 510: if(argc > 1) { ! 511: fprintf(stderr, "usage: spell [-bcvx] [-f file]\n"); ! 512: exit(1); ! 513: } ! 514: for(;;) { ! 515: loop: ! 516: affix[0] = 0; ! 517: for(ep=original; ; ep++) { ! 518: if(ep >= original + sizeof(original) - 1) { ! 519: *ep = 0; ! 520: runout(original); ! 521: goto loop; ! 522: } ! 523: j = getchar(); ! 524: if(j == EOF) ! 525: exit(0); ! 526: if(j != '\n') ! 527: *ep = j; ! 528: else { ! 529: *ep = 0; ! 530: break; ! 531: } ! 532: } ! 533: low = 0; ! 534: for(ep=word,dp=original; j = *dp; ep++,dp++) { ! 535: if(islower(j)) ! 536: low++; ! 537: if(ep >= word+sizeof(word)-1) ! 538: break; ! 539: *ep = j; ! 540: } ! 541: *ep = 0; ! 542: ! 543: h = ~STOP; ! 544: if(word[1] == 0 && isalnum(word[0]) || ! 545: isdigit(word[0]) && ordinal()) ! 546: goto check; ! 547: ! 548: h = 0; ! 549: if(!low && !(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH))) ! 550: for(cp=original+1,dp=word+1; dp<ep; dp++,cp++) ! 551: *dp = Tolower(*cp); ! 552: if(!h) ! 553: for(;;) { /* at most twice */ ! 554: if(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH)) ! 555: break; ! 556: if(h = trysuff(ep,0,ALL|STOP|DONT_TOUCH)) ! 557: break; ! 558: if(!isupper(word[0])) ! 559: break; ! 560: cp = original; ! 561: dp = word; ! 562: while(*dp = *cp++) { ! 563: if(!low) ! 564: *dp = Tolower(*dp); ! 565: dp++; ! 566: } ! 567: word[0] = Tolower(word[0]); ! 568: } ! 569: check: ! 570: if(cflag) { ! 571: if(!h || Set(h,STOP)) ! 572: putchar('-'); ! 573: else if(!vflag) ! 574: putchar('+'); ! 575: else ! 576: putchar('0' + (suffcount>0) + ! 577: (prefcount>4? 8: 2*prefcount)); ! 578: } else if(!h || Set(h,STOP)) ! 579: printf("%s\n", original); ! 580: else if(affix[0] != 0 && affix[0] != '.') ! 581: printf("%s\t%s\n", affix, original); ! 582: } ! 583: } ! 584: ! 585: /* strip exactly one suffix and do ! 586: * indicated routine(s), which may recursively ! 587: * strip suffixes ! 588: */ ! 589: Bits ! 590: trysuff(char* ep, int lev, int flag) ! 591: { ! 592: Suftab *t; ! 593: char *cp, *sp; ! 594: Bits h = 0; ! 595: int initchar = ep[-1]; ! 596: ! 597: flag &= ~MONO; ! 598: lev += DLEV; ! 599: if(lev<DSIZ) ! 600: deriv[lev] = deriv[lev-1] = emptyderiv; ! 601: if(!islower(initchar)) ! 602: return h; ! 603: for(t=suftab[initchar-'a']; sp=t->suf; t++) { ! 604: cp = ep; ! 605: while(*sp) ! 606: if(*--cp != *sp++) ! 607: goto next; ! 608: for(sp=ep-t->n1; --sp >= word && !isvowel(*sp);) ! 609: ; ! 610: if(sp < word) ! 611: continue; ! 612: if(!(t->affixable & flag)) ! 613: return 0; ! 614: h = (*t->p1)(ep-t->n1, t->d1, t->a1, lev+1, t->flag|STOP); ! 615: if(!h && t->p2!=0) { ! 616: if(lev<DSIZ) ! 617: deriv[lev] = deriv[lev+1] = emptyderiv; ! 618: h = (*t->p2)(ep-t->n2, t->d2, t->a2, lev, t->flag|STOP); ! 619: } ! 620: break; ! 621: next:; ! 622: } ! 623: return h; ! 624: } ! 625: ! 626: Bits ! 627: nop(char* ep, char* d, char* a, int lev, int flag) ! 628: { ! 629: #pragma ref ep ! 630: #pragma ref d ! 631: #pragma ref a ! 632: #pragma ref lev ! 633: #pragma ref flag ! 634: return 0; ! 635: } ! 636: ! 637: Bits ! 638: cstrip(char* ep, char* d, char* a, int lev, int flag) ! 639: { ! 640: int temp = ep[0]; ! 641: ! 642: if(isvowel(temp) && isvowel(ep[-1])) { ! 643: switch(pair(ep[-1],ep[0])) { ! 644: case pair('a', 'a'): ! 645: case pair('a', 'e'): ! 646: case pair('a', 'i'): ! 647: case pair('e', 'a'): ! 648: case pair('e', 'e'): ! 649: case pair('e', 'i'): ! 650: case pair('i', 'i'): ! 651: case pair('o', 'a'): ! 652: return 0; ! 653: } ! 654: } else ! 655: if(temp==ep[-1]&&temp==ep[-2]) ! 656: return 0; ! 657: return strip(ep,d,a,lev,flag); ! 658: } ! 659: ! 660: Bits ! 661: strip(char* ep, char* d, char* a, int lev, int flag) ! 662: { ! 663: #pragma ref d ! 664: Bits h = trypref(ep, a, lev, flag); ! 665: ! 666: if(Set(h,MONO) && isvowel(*ep) && isvowel(ep[-2])) ! 667: h = 0; ! 668: if(h) ! 669: return h; ! 670: if(isvowel(*ep) && !isvowel(ep[-1]) && ep[-1]==ep[-2]) { ! 671: h = trypref(ep-1,a,lev,flag|MONO); ! 672: if(h) ! 673: return h; ! 674: } ! 675: return trysuff(ep,lev,flag); ! 676: } ! 677: ! 678: Bits ! 679: s(char* ep, char* d, char* a, int lev, int flag) ! 680: { ! 681: if(lev > DLEV+1) ! 682: return 0; ! 683: if(*ep=='s') { ! 684: switch(ep[-1]) { ! 685: case 'y': ! 686: if(isvowel(ep[-2])||isupper(*word)) ! 687: break; /*says Kennedys*/ ! 688: case 'x': ! 689: case 'z': ! 690: case 's': ! 691: return 0; ! 692: case 'h': ! 693: switch(ep[-2]) { ! 694: case 'c': ! 695: case 's': ! 696: return 0; ! 697: } ! 698: } ! 699: } ! 700: return strip(ep,d,a,lev,flag); ! 701: } ! 702: ! 703: Bits ! 704: an(char* ep, char* d, char* a, int lev, int flag) ! 705: { ! 706: #pragma ref d ! 707: if(!isupper(*word)) /*must be proper name*/ ! 708: return 0; ! 709: return trypref(ep,a,lev,flag); ! 710: } ! 711: ! 712: Bits ! 713: ize(char* ep, char* d, char* a, int lev, int flag) ! 714: { ! 715: #pragma ref a ! 716: int temp = ep[-1]; ! 717: Bits h; ! 718: ! 719: ep[-1] = 'e'; ! 720: h = strip(ep,"",d,lev,flag); ! 721: ep[-1] = temp; ! 722: return h; ! 723: } ! 724: ! 725: Bits ! 726: y_to_e(char* ep, char* d, char* a, int lev, int flag) ! 727: { ! 728: #pragma ref a ! 729: Bits h; ! 730: int temp; ! 731: ! 732: switch(ep[-1]) { ! 733: case 'a': ! 734: case 'e': ! 735: case 'i': ! 736: return 0; ! 737: } ! 738: temp = *ep; ! 739: *ep++ = 'e'; ! 740: h = strip(ep,"",d,lev,flag); ! 741: *--ep = temp; ! 742: return h; ! 743: } ! 744: ! 745: Bits ! 746: ily(char* ep, char* d, char* a, int lev, int flag) ! 747: { ! 748: int temp = ep[0]; ! 749: char *cp = ep; ! 750: ! 751: if(temp==ep[-1]&&temp==ep[-2]) /* sillly */ ! 752: return 0; ! 753: if(*--cp=='y' && !isvowel(*--cp)) /* happyly */ ! 754: while(cp>word) ! 755: if(isvowel(*--cp)) /* shyness */ ! 756: return 0; ! 757: if(ep[-1]=='i') ! 758: return i_to_y(ep,d,a,lev,flag); ! 759: return cstrip(ep,d,a,lev,flag); ! 760: } ! 761: ! 762: Bits ! 763: bility(char* ep, char* d, char* a, int lev, int flag) ! 764: { ! 765: *ep++ = 'l'; ! 766: return y_to_e(ep,d,a,lev,flag); ! 767: } ! 768: ! 769: Bits ! 770: i_to_y(char* ep, char* d, char* a, int lev, int flag) ! 771: { ! 772: Bits h; ! 773: int temp; ! 774: ! 775: if(isupper(*word)) ! 776: return 0; ! 777: if((temp=ep[-1])=='i' && !isvowel(ep[-2])) { ! 778: ep[-1] = 'y'; ! 779: a = d; ! 780: } ! 781: h = cstrip(ep,"",a,lev,flag); ! 782: ep[-1] = temp; ! 783: return h; ! 784: } ! 785: ! 786: Bits ! 787: es(char* ep, char* d, char* a, int lev, int flag) ! 788: { ! 789: if(lev>DLEV) ! 790: return 0; ! 791: switch(ep[-1]) { ! 792: default: ! 793: return 0; ! 794: case 'i': ! 795: return i_to_y(ep,d,a,lev,flag); ! 796: case 'h': ! 797: switch(ep[-2]) { ! 798: default: ! 799: return 0; ! 800: case 'c': ! 801: case 's': ! 802: break; ! 803: } ! 804: case 's': ! 805: case 'z': ! 806: case 'x': ! 807: return strip(ep,d,a,lev,flag); ! 808: } ! 809: } ! 810: ! 811: Bits ! 812: subst(char* ep, char* d, char* a, int lev, int flag) ! 813: { ! 814: #pragma ref a ! 815: char *u,*t; ! 816: Bits h; ! 817: ! 818: if(skipv(skipv(ep-1)) < word) ! 819: return 0; ! 820: for(t=d; *t!='+'; t++) ! 821: continue; ! 822: for(u=ep; *--t!='-';) ! 823: *--u = *t; ! 824: h = strip(ep,"",d,lev,flag); ! 825: while(*++t != '+') ! 826: continue; ! 827: while(*++t) ! 828: *u++ = *t; ! 829: return h; ! 830: } ! 831: ! 832: Bits ! 833: tion(char* ep, char* d, char* a, int lev, int flag) ! 834: { ! 835: switch(ep[-2]) { ! 836: default: ! 837: return trypref(ep,a,lev,flag); ! 838: case 'a': ! 839: case 'e': ! 840: case 'i': ! 841: case 'o': ! 842: case 'u': ! 843: return y_to_e(ep,d,a,lev,flag); ! 844: } ! 845: } ! 846: ! 847: /* ! 848: * possible consonant-consonant-e ending ! 849: */ ! 850: Bits ! 851: CCe(char* ep, char* d, char* a, int lev, int flag) ! 852: { ! 853: Bits h; ! 854: ! 855: switch(ep[-1]) { ! 856: case 'l': ! 857: if(isvowel(ep[-2])) ! 858: break; ! 859: switch(ep[-2]) { ! 860: case 'l': ! 861: case 'r': ! 862: case 'w': ! 863: break; ! 864: default: ! 865: return y_to_e(ep,d,a,lev,flag); ! 866: } ! 867: break; ! 868: case 'c': ! 869: case 'g': ! 870: if(*ep == 'a') /* prevent -able for -eable */ ! 871: return 0; ! 872: case 's': ! 873: case 'v': ! 874: case 'z': ! 875: if(ep[-2]==ep[-1]) ! 876: break; ! 877: if(isvowel(ep[-2])) ! 878: break; ! 879: case 'u': ! 880: if(h = y_to_e(ep,d,a,lev,flag)) ! 881: return h; ! 882: if(!(ep[-2]=='n' && ep[-1]=='g')) ! 883: return 0; ! 884: } ! 885: return VCe(ep,d,a,lev,flag); ! 886: } ! 887: ! 888: /* ! 889: * possible consonant-vowel-consonant-e ending ! 890: */ ! 891: Bits ! 892: VCe(char* ep, char* d, char* a, int lev, int flag) ! 893: { ! 894: int c; ! 895: Bits h; ! 896: ! 897: c = ep[-1]; ! 898: if(c=='e') ! 899: return 0; ! 900: if(!isvowel(c) && isvowel(ep[-2])) { ! 901: c = *ep; ! 902: *ep++ = 'e'; ! 903: h = trypref(ep,d,lev,flag); ! 904: if(!h) ! 905: h = trysuff(ep,lev,flag); ! 906: if(h) ! 907: return h; ! 908: ep--; ! 909: *ep = c; ! 910: } ! 911: return cstrip(ep,d,a,lev,flag); ! 912: } ! 913: ! 914: Ptab* ! 915: lookuppref(char** wp, char* ep) ! 916: { ! 917: Ptab *sp; ! 918: char *bp,*cp; ! 919: int initchar = Tolower(**wp); ! 920: ! 921: if(!isalpha(initchar)) ! 922: return 0; ! 923: for(sp=preftab[initchar-'a'];sp->s;sp++) { ! 924: bp = *wp; ! 925: for(cp= sp->s;*cp; ) ! 926: if(*bp++!=*cp++) ! 927: goto next; ! 928: for(cp=bp;cp<ep;cp++) ! 929: if(isvowel(*cp)) { ! 930: *wp = bp; ! 931: return sp; ! 932: } ! 933: next:; ! 934: } ! 935: return 0; ! 936: } ! 937: ! 938: /* while word is not in dictionary try stripping ! 939: * prefixes. Fail if no more prefixes. ! 940: */ ! 941: Bits ! 942: trypref(char* ep, char* a, int lev, int flag) ! 943: { ! 944: Ptab *tp; ! 945: char *bp, *cp; ! 946: char *pp; ! 947: Bits h; ! 948: char space[20]; ! 949: ! 950: if(lev<DSIZ) { ! 951: deriv[lev].mesg = a; ! 952: deriv[lev].type = *a=='.'? NONE: SUFF; ! 953: } ! 954: if(h = tryword(word,ep,lev,flag)) { ! 955: if(Set(h, flag&~MONO) && (flag&MONO) <= Set(h, MONO)) ! 956: return h; ! 957: h = 0; ! 958: } ! 959: bp = word; ! 960: pp = space; ! 961: if(lev<DSIZ) { ! 962: deriv[lev+1].mesg = pp; ! 963: deriv[lev+1].type = 0; ! 964: } ! 965: while(tp=lookuppref(&bp,ep)) { ! 966: *pp++ = '+'; ! 967: cp = tp->s; ! 968: while(pp<space+sizeof(space) && (*pp = *cp++)) ! 969: pp++; ! 970: deriv[lev+1].type += PREF; ! 971: h = tryword(bp,ep,lev+1,flag); ! 972: if(Set(h,NOPREF) || ! 973: ((tp->flag&IN) && inun(bp-2,h)==0)) { ! 974: h = 0; ! 975: break; ! 976: } ! 977: if(Set(h,flag&~MONO) && (flag&MONO) <= Set(h, MONO)) ! 978: break; ! 979: h = 0; ! 980: } ! 981: if(lev<DSIZ) ! 982: deriv[lev+1] = deriv[lev+2] = emptyderiv; ! 983: return h; ! 984: } ! 985: ! 986: Bits ! 987: tryword(char* bp, char* ep, int lev, int flag) ! 988: { ! 989: int j; ! 990: Bits h = 0; ! 991: char duple[3]; ! 992: ! 993: if(ep-bp <= 1) ! 994: return h; ! 995: if(flag&MONO) { ! 996: if(lev<DSIZ) { ! 997: deriv[++lev].mesg = duple; ! 998: deriv[lev].type = SUFF; ! 999: } ! 1000: duple[0] = '+'; ! 1001: duple[1] = *ep; ! 1002: duple[2] = 0; ! 1003: } ! 1004: h = dict(bp, ep); ! 1005: if(vflag==0 || h==0) ! 1006: return h; ! 1007: /* ! 1008: * when derivations are wanted, collect them ! 1009: * for printing ! 1010: */ ! 1011: j = lev; ! 1012: prefcount = suffcount = 0; ! 1013: do { ! 1014: if(j<DSIZ && deriv[j].type) { ! 1015: strcat(affix, deriv[j].mesg); ! 1016: if(deriv[j].type == SUFF) ! 1017: suffcount++; ! 1018: else if(deriv[j].type != NONE) ! 1019: prefcount = deriv[j].type/PREF; ! 1020: } ! 1021: } while(--j > 0); ! 1022: return h; ! 1023: } ! 1024: ! 1025: int ! 1026: inun(char* bp, Bits h) ! 1027: { ! 1028: if(*bp == 'u') ! 1029: return Set(h, IN) == 0; ! 1030: /* *bp == 'i' */ ! 1031: if(Set(h, IN) == 0) ! 1032: return 0; ! 1033: switch(bp[2]) { ! 1034: case 'r': ! 1035: return bp[1] == 'r'; ! 1036: case 'm': ! 1037: case 'p': ! 1038: return bp[1] == 'm'; ! 1039: } ! 1040: return bp[1] == 'n'; ! 1041: } ! 1042: ! 1043: char* ! 1044: skipv(char *s) ! 1045: { ! 1046: if(s >= word && isvowel(*s)) ! 1047: s--; ! 1048: while(s >= word && !isvowel(*s)) ! 1049: s--; ! 1050: return s; ! 1051: } ! 1052: ! 1053: /* ! 1054: * crummy way to Britishise ! 1055: */ ! 1056: void ! 1057: ise(void) ! 1058: { ! 1059: Suftab *p; ! 1060: int i; ! 1061: ! 1062: for(i=0; i<26; i++) ! 1063: for(p = suftab[i]; p->suf; p++) { ! 1064: p->suf = ztos(p->suf); ! 1065: p->d1 = ztos(p->d1); ! 1066: p->a1 = ztos(p->a1); ! 1067: } ! 1068: } ! 1069: ! 1070: char* ! 1071: ztos(char *as) ! 1072: { ! 1073: char *s, *ds; ! 1074: ! 1075: for(s=as; *s; s++) ! 1076: if(*s == 'z') ! 1077: goto copy; ! 1078: return as; ! 1079: ! 1080: copy: ! 1081: ds = strdupl(as); ! 1082: for(s=ds; *s; s++) ! 1083: if(*s == 'z') ! 1084: *s = 's'; ! 1085: return ds; ! 1086: } ! 1087: ! 1088: Bits ! 1089: dict(char* bp, char* ep) ! 1090: { ! 1091: char *cp, *cp1, *w, *wp, *we; ! 1092: int n, f; ! 1093: ! 1094: w = bp; ! 1095: we = ep; ! 1096: n = ep-bp; ! 1097: if(n <= 1) ! 1098: return NOUN; ! 1099: ! 1100: f = w[0] & 0x7f; ! 1101: f *= 128; ! 1102: f += w[1] & 0x7f; ! 1103: bp = spacep[f]; ! 1104: ep = spacep[f+1]; ! 1105: ! 1106: loop: ! 1107: if(bp >= ep) { ! 1108: if(xflag) ! 1109: fprintf(stderr,"=%.*s\n",n, w); ! 1110: return 0; ! 1111: } ! 1112: /* ! 1113: * find the beginning of some word in the middle ! 1114: */ ! 1115: cp = bp + (ep-bp)/2; ! 1116: ! 1117: while(cp > bp && !(*cp & 0x80)) ! 1118: cp--; ! 1119: while(cp > bp && (cp[-1] & 0x80)) ! 1120: cp--; ! 1121: ! 1122: wp = w + 2; /* skip two letters */ ! 1123: cp1 = cp + 2; /* skip affix code */ ! 1124: for(;;) { ! 1125: if(wp >= we) { ! 1126: if(*cp1&0x80) ! 1127: goto found; ! 1128: else ! 1129: f = 1; ! 1130: break; ! 1131: } ! 1132: if(*cp1&0x80) { ! 1133: f = -1; ! 1134: break; ! 1135: } ! 1136: f = *cp1++ - *wp++; ! 1137: if(f != 0) ! 1138: break; ! 1139: } ! 1140: ! 1141: if(f < 0) { ! 1142: while(!(*cp1&0x80)) ! 1143: cp1++; ! 1144: bp = cp1; ! 1145: goto loop; ! 1146: } ! 1147: ep = cp; ! 1148: goto loop; ! 1149: found: ! 1150: f = ((cp[0] & 0x7) << 8) | ! 1151: (cp[1] & 0xff); ! 1152: if(xflag) { ! 1153: fprintf(stderr,"=%.*s ",n,w); ! 1154: typeprint(encode[f]); ! 1155: } ! 1156: return encode[f]; ! 1157: } ! 1158: ! 1159: void ! 1160: typeprint(Bits h) ! 1161: { ! 1162: ! 1163: pcomma(""); ! 1164: if(h & NOUN) ! 1165: pcomma("n"); ! 1166: if(h & PROP_COLLECT) ! 1167: pcomma("pc"); ! 1168: if(h & VERB) { ! 1169: if((h & VERB) == VERB) ! 1170: pcomma("v"); ! 1171: else ! 1172: if((h & VERB) == V_IRREG) ! 1173: pcomma("vi"); ! 1174: else ! 1175: if(h & ED) ! 1176: pcomma("ed"); ! 1177: } ! 1178: if(h & ADJ) ! 1179: pcomma("a"); ! 1180: if(h & COMP) { ! 1181: if((h & COMP) == ACTOR) ! 1182: pcomma("er"); ! 1183: else ! 1184: pcomma("comp"); ! 1185: } ! 1186: if(h & DONT_TOUCH) ! 1187: pcomma("d"); ! 1188: if(h & N_AFFIX) ! 1189: pcomma("na"); ! 1190: if(h & ADV) ! 1191: pcomma("adv"); ! 1192: if(h & ION) ! 1193: pcomma("ion"); ! 1194: if(h & V_AFFIX) ! 1195: pcomma("va"); ! 1196: if(h & MAN) ! 1197: pcomma("man"); ! 1198: if(h & NOPREF) ! 1199: pcomma("nopref"); ! 1200: if(h & MONO) ! 1201: pcomma("ms"); ! 1202: if(h & IN) ! 1203: pcomma("in"); ! 1204: if(h & _Y) ! 1205: pcomma("y"); ! 1206: if(h & STOP) ! 1207: pcomma("s"); ! 1208: fprintf(stderr, "\n"); ! 1209: } ! 1210: ! 1211: void ! 1212: pcomma(char *s) ! 1213: { ! 1214: static flag; ! 1215: ! 1216: if(*s == 0) { ! 1217: flag = 0; ! 1218: return; ! 1219: } ! 1220: if(!flag) { ! 1221: fprintf(stderr, "%s", s); ! 1222: flag = 1; ! 1223: } else ! 1224: fprintf(stderr, ",%s", s); ! 1225: } ! 1226: ! 1227: int ! 1228: ordinal(void) ! 1229: { ! 1230: char *cp = word; ! 1231: static char sp[4]; ! 1232: ! 1233: while(isdigit(*cp)) ! 1234: cp++; ! 1235: strncpy(sp,cp,3); ! 1236: if(isupper(cp[0]) && isupper(cp[1])) { ! 1237: sp[0] = Tolower(cp[0]); ! 1238: sp[1] = Tolower(cp[1]); ! 1239: } ! 1240: return 0 == strncmp(sp, ! 1241: cp[-2]=='1'? "th": /* out of bounds if 1 digit */ ! 1242: *--cp=='1'? "st": /* harmless */ ! 1243: *cp=='2'? "nd": ! 1244: *cp=='3'? "rd": ! 1245: "th", 3); ! 1246: } ! 1247: ! 1248: /* layout of file entry: first byte has bit 0x80 turned on. ! 1249: next 4 bits count number of characters common between this ! 1250: entry and previous one. last three bits concatenated with ! 1251: second byte are the affixing code, so arranged that the 0x80 ! 1252: bit is zero in all bytes but the first. 3rd and following ! 1253: bytes are the remainder of the dictionary word. ! 1254: ! 1255: layout in memory: common prefixes are expanded, and the ! 1256: first two letters of each word are deleted and found ! 1257: instead by lookup in table spacep, which points to the ! 1258: first word for each two-letter prefix. ! 1259: */ ! 1260: ! 1261: void ! 1262: readdict(char *file) ! 1263: { ! 1264: char *s, *is, *lasts, *ls; ! 1265: int c, i, sp, p; ! 1266: int f; ! 1267: long l; ! 1268: ! 1269: f = open(file, 0); ! 1270: if(f == -1) { ! 1271: fprintf(stderr, "spell: cannot open %s\n", file); ! 1272: exit(1); ! 1273: } ! 1274: if(read(f, space, 2) != 2) ! 1275: goto bad; ! 1276: nencode = ((space[0]&0xff)<<8) | (space[1]&0xff); ! 1277: if(nencode > sizeof(encode)/sizeof(*encode)) ! 1278: goto noroom; ! 1279: if(read(f, space, nencode*sizeof(*encode)) ! 1280: != nencode*sizeof(*encode)) ! 1281: goto bad; ! 1282: s = space; ! 1283: for(i=0; i<nencode; i++) { ! 1284: l = (long)(s[0] & 0xff) << 24; ! 1285: l |= (s[1] & 0xff) << 16; ! 1286: l |= (s[2] & 0xff) << 8; ! 1287: l |= s[3] & 0xff; ! 1288: encode[i] = (Bits)l; ! 1289: s += 4; ! 1290: } ! 1291: l = read(f, space, sizeof(space)); ! 1292: if(l >= sizeof(space)) ! 1293: goto noroom; ! 1294: is = space + (sizeof(space) - l); ! 1295: memmove(is, space, l); ! 1296: ! 1297: s = space; ! 1298: c = *is++ & 0xff; ! 1299: sp = -1; ! 1300: i = 0; ! 1301: lasts = 0; /* to prevent diagnostics */ ! 1302: loop: ! 1303: if(s > is) ! 1304: goto noroom; ! 1305: if(c < 0) { ! 1306: close(f); ! 1307: while(sp < 128*128) ! 1308: spacep[++sp] = s; ! 1309: *s++ = 0x80; /* fence */ ! 1310: return; ! 1311: } ! 1312: p = (c>>3) & 0xf; ! 1313: *s++ = c; ! 1314: *s++ = *is++ & 0xff; ! 1315: if(p <= 0) ! 1316: i = (*is++ & 0xff)*128; ! 1317: if(p <= 1) { ! 1318: if(!(*is & 0x80)) ! 1319: i = i/128*128 + (*is++ & 0xff); ! 1320: if(i <= sp) { ! 1321: fprintf(stderr, "spell: the dict isn't " ! 1322: "sorted or memmove didn't work\n"); ! 1323: goto bad; ! 1324: } ! 1325: while(sp < i) ! 1326: spacep[++sp] = s-2; ! 1327: } ! 1328: ls = lasts; ! 1329: lasts = s; ! 1330: for(p-=2; p>0; p--) ! 1331: *s++ = *ls++; ! 1332: for(;;) { ! 1333: if(is >= space+sizeof(space)) { ! 1334: c = -1; ! 1335: break; ! 1336: } ! 1337: c = *is++ & 0xff; ! 1338: if(c & 0x80) ! 1339: break; ! 1340: *s++ = c; ! 1341: } ! 1342: *s = 0; ! 1343: goto loop; ! 1344: ! 1345: bad: ! 1346: fprintf(stderr, "spell: trouble reading %s\n", file); ! 1347: exit(1); ! 1348: noroom: ! 1349: fprintf(stderr,"spell: not enough space for dictionary\n"); ! 1350: exit(1); ! 1351: } ! 1352: ! 1353: char * /* same as strdup; portability hack */ ! 1354: strdupl(char *s) ! 1355: { ! 1356: char *t = (char*)malloc(strlen(s)+1); ! 1357: strcpy(t, s); ! 1358: return t; ! 1359: } ! 1360: ! 1361: void ! 1362: runout(char *s) ! 1363: { ! 1364: int c; ! 1365: if(!cflag) ! 1366: printf("%s", s); ! 1367: else { ! 1368: putchar('-'); ! 1369: fflush(stdout); ! 1370: } ! 1371: do { ! 1372: if((c=getchar()) == EOF) ! 1373: exit(0); ! 1374: if(!cflag) ! 1375: putchar(c); ! 1376: } while(c != '\n'); ! 1377: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.