|
|
1.1 ! root 1: ! 2: #include "code.h" ! 3: ! 4: #ifndef _POSIX_SOURCE ! 5: ! 6: #include <stdio.h> ! 7: #include <ctype.h> ! 8: #include <libc.h> ! 9: ! 10: #else ! 11: ! 12: #include <fcntl.h> ! 13: #include <stdio.h> ! 14: #include <ctype.h> ! 15: #include <stdlib.h> ! 16: #include <unistd.h> ! 17: #include <string.h> ! 18: ! 19: #endif ! 20: ! 21: #define isvowel(c) voweltab[c] ! 22: #define Tolower(c) (isupper(c)? (c)-'A'+'a': (c)) ! 23: #define pair(a,b) (((a)<<8) | (b)) ! 24: #define DLEV 2 ! 25: #define DSIZ 40 ! 26: ! 27: typedef long Bits; ! 28: typedef unsigned char uchar; ! 29: #define Set(h, f) ((long)(h) & (f)) ! 30: ! 31: Bits nop(char*, char*, char*, int, int); ! 32: Bits strip(char*, char*, char*, int, int); ! 33: Bits ize(char*, char*, char*, int, int); ! 34: Bits i_to_y(char*, char*, char*, int, int); ! 35: Bits ily(char*, char*, char*, int, int); ! 36: Bits subst(char*, char*, char*, int, int); ! 37: Bits CCe(char*, char*, char*, int, int); ! 38: Bits tion(char*, char*, char*, int, int); ! 39: Bits an(char*, char*, char*, int, int); ! 40: Bits s(char*, char*, char*, int, int); ! 41: Bits es(char*, char*, char*, int, int); ! 42: Bits bility(char*, char*, char*, int, int); ! 43: Bits y_to_e(char*, char*, char*, int, int); ! 44: Bits VCe(char*, char*, char*, int, int); ! 45: ! 46: Bits trypref(char*, char*, int, int); ! 47: Bits tryword(char*, char*, int, int); ! 48: Bits trysuff(char*, int, int); ! 49: Bits dict(char*, char*); ! 50: void typeprint(Bits); ! 51: void pcomma(char*); ! 52: ! 53: void runout(char*); ! 54: void ise(void); ! 55: int ordinal(void); ! 56: char* skipv(char*); ! 57: int inun(char*, Bits); ! 58: char* ztos(char*); ! 59: void readdict(char*); ! 60: ! 61: char *strdupl(char *); ! 62: ! 63: typedef struct Ptab Ptab; ! 64: struct Ptab ! 65: { ! 66: char* s; ! 67: int flag; ! 68: }; ! 69: ! 70: typedef struct Suftab Suftab; ! 71: struct Suftab ! 72: { ! 73: char *suf; ! 74: Bits (*p1)(char*, char*, char*, int, int); ! 75: int n1; ! 76: char *d1; ! 77: char *a1; ! 78: int flag; ! 79: int affixable; ! 80: Bits (*p2)(char*, char*, char*, int, int); ! 81: int n2; ! 82: char *d2; ! 83: char *a2; ! 84: }; ! 85: ! 86: Suftab staba[] = { ! 87: {"aibohp",subst,1,"-e+ia","",NOUN, NOUN}, ! 88: 0 ! 89: }; ! 90: ! 91: Suftab stabc[] = ! 92: { ! 93: {"cai",strip,1,"","+c",N_AFFIX, ADJ|NOUN}, ! 94: {"citsi",strip,2,"","+ic",N_AFFIX, ADJ | N_AFFIX | NOUN}, ! 95: {"citi",ize,1,"-e+ic","",N_AFFIX, ADJ }, ! 96: {"cihparg",i_to_y,1,"-y+ic","",NOUN, ADJ|NOUN }, ! 97: {"cipocs",ize,1,"-e+ic","",NOUN, ADJ }, ! 98: {"cirtem",i_to_y,1,"-y+ic","",NOUN, ADJ }, ! 99: {"cigol",i_to_y,1,"-y+ic","",NOUN, ADJ }, ! 100: {"cimono",i_to_y,1,"-y+ic","",NOUN, ADJ }, ! 101: {"cibohp",subst,1,"-e+ic","",NOUN, ADJ }, ! 102: 0 ! 103: }; ! 104: Suftab stabd[] = ! 105: { ! 106: {"de",strip,1,"","+d",ED,ADJ |COMP,i_to_y,2,"-y+ied","+ed"}, ! 107: {"dooh",ily,4,"-y+ihood","+hood",NOUN | ADV, NOUN}, ! 108: 0 ! 109: }; ! 110: Suftab stabe[] = ! 111: { ! 112: /* ! 113: * V_affix for comment ->commence->commentment?? ! 114: */ ! 115: {"ecn",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX}, ! 116: {"elbaif",i_to_y,4,"-y+iable","",V_IRREG,ADJ}, ! 117: {"elba",CCe,4,"-e+able","+able",V_AFFIX,ADJ}, ! 118: {"evi",subst,0,"-ion+ive","",N_AFFIX | V_AFFIX,NOUN | N_AFFIX| ADJ}, ! 119: {"ezi",CCe,3,"-e+ize","+ize",N_AFFIX|ADJ ,V_AFFIX | VERB |ION | COMP}, ! 120: {"ekil",strip,4,"","+like",N_AFFIX ,ADJ}, ! 121: 0 ! 122: }; ! 123: Suftab stabg[] = ! 124: { ! 125: {"gniee",strip,3,"","+ing",V_IRREG ,ADJ|NOUN}, ! 126: {"gnikam",strip,6,"","+making",NOUN,NOUN}, ! 127: {"gnipeek",strip,7,"","+keeping",NOUN,NOUN}, ! 128: {"gni",CCe,3,"-e+ing","+ing",V_IRREG ,ADJ|ED|NOUN}, ! 129: 0 ! 130: }; ! 131: Suftab stabl[] = ! 132: { ! 133: {"ladio",strip,2,"","+al",NOUN |ADJ,ADJ}, ! 134: {"laci",strip,2,"","+al",NOUN |ADJ,ADJ |NOUN|N_AFFIX}, ! 135: {"latnem",strip,2,"","+al",N_AFFIX,ADJ}, ! 136: {"lanoi",strip,2,"","+al",N_AFFIX,ADJ|NOUN}, ! 137: {"luf",ily,3,"-y+iful","+ful",N_AFFIX,ADJ | NOUN}, ! 138: 0 ! 139: }; ! 140: Suftab stabm[] = ! 141: { ! 142: /* congregational + ism */ ! 143: {"msi",CCe,3,"-e+ism","ism",N_AFFIX|ADJ,NOUN}, ! 144: {"margo",subst,-1,"-ph+m","",NOUN,NOUN}, ! 145: 0 ! 146: }; ! 147: Suftab stabn[] = ! 148: { ! 149: {"noitacifi",i_to_y,6,"-y+ication","",ION,NOUN | N_AFFIX}, ! 150: {"noitazi",ize,4,"-e+ation","",ION,NOUN| N_AFFIX}, ! 151: {"noit",tion,3,"-e+ion","+ion",ION,NOUN| N_AFFIX | V_AFFIX |VERB|ACTOR}, ! 152: {"naino",an,3,"","+ian",NOUN|PROP_COLLECT,NOUN| N_AFFIX}, ! 153: {"namow",strip,5,"","+woman",MAN,PROP_COLLECT|N_AFFIX}, ! 154: {"nam",strip,3,"","+man",MAN,PROP_COLLECT | N_AFFIX | VERB}, ! 155: {"na",an,1,"","+n",NOUN|PROP_COLLECT,NOUN | N_AFFIX}, ! 156: {"nemow",strip,5,"","+women",MAN,PROP_COLLECT}, ! 157: {"nem",strip,3,"","+man",MAN,PROP_COLLECT}, ! 158: 0 ! 159: }; ! 160: Suftab stabp[] = ! 161: { ! 162: {"pihs",strip,4,"","+ship",NOUN|PROP_COLLECT,NOUN| N_AFFIX}, ! 163: 0 ! 164: }; ! 165: Suftab stabr[] = ! 166: { ! 167: {"rehparg",subst,1,"-y+er","",ACTOR,NOUN,strip,2,"","+er"}, ! 168: {"reyhparg",nop,0,"","",0,NOUN}, ! 169: {"rekam",strip,5,"","+maker",NOUN,NOUN}, ! 170: {"repeek",strip,6,"","+keeper",NOUN,NOUN}, ! 171: {"re",strip,1,"","+r",ACTOR,NOUN | N_AFFIX|VERB|ADJ, i_to_y,2,"-y+ier","+er"}, ! 172: {"rota",tion,2,"-e+or","",ION,NOUN| N_AFFIX|_Y}, ! 173: {"rotc",tion,2,"","+or",ION,NOUN| N_AFFIX}, ! 174: {"rotp",tion,2,"","+or",ION,NOUN| N_AFFIX}, ! 175: 0 ! 176: }; ! 177: Suftab stabs[] = ! 178: { ! 179: {"ssen",ily,4,"-y+iness","+ness",ADJ|ADV,NOUN| N_AFFIX}, ! 180: {"ssel",ily,4,"-y+iless","+less",NOUN | PROP_COLLECT,ADJ }, ! 181: {"se",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH , es,2,"-y+ies","+es"}, ! 182: {"s'",s,2,"","+'s",PROP_COLLECT | NOUN,DONT_TOUCH }, ! 183: {"s",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH }, ! 184: 0 ! 185: }; ! 186: Suftab stabt[] = ! 187: { ! 188: {"tnem",strip,4,"","+ment",V_AFFIX,NOUN | N_AFFIX | ADJ|VERB}, ! 189: {"tse",strip,2,"","+st",EST,DONT_TOUCH, i_to_y,3,"-y+iest","+est" }, ! 190: {"tsigol",i_to_y,2,"-y+ist","",N_AFFIX,NOUN | N_AFFIX}, ! 191: {"tsi",CCe,3,"-e+ist","+ist",N_AFFIX|ADJ,NOUN | N_AFFIX|COMP}, ! 192: 0 ! 193: }; ! 194: Suftab staby[] = ! 195: { ! 196: {"ytilb",nop,0,"","",0,NOUN}, ! 197: {"ycn",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX}, ! 198: {"ytilib",bility,5,"-le+ility","",ADJ | V_AFFIX,NOUN | N_AFFIX}, ! 199: {"ytisuo",nop,0,"","",NOUN}, ! 200: {"yti",CCe,3,"-e+ity","+ity",ADJ ,NOUN | N_AFFIX }, ! 201: {"ylb",y_to_e,1,"-e+y","",ADJ,ADV}, ! 202: {"ylc",nop,0,"","",0}, ! 203: {"yl",ily,2,"-y+ily","+ly",ADJ,ADV|COMP}, ! 204: {"yrtem",subst,0,"-er+ry","",NOUN,NOUN | N_AFFIX}, ! 205: {"y",CCe,1,"-e+y","+y",_Y,ADJ|COMP}, ! 206: 0 ! 207: }; ! 208: Suftab stabz[] = ! 209: { ! 210: 0 ! 211: }; ! 212: Suftab* suftab[] = ! 213: { ! 214: staba, ! 215: stabz, ! 216: stabc, ! 217: stabd, ! 218: stabe, ! 219: stabz, ! 220: stabg, ! 221: stabz, ! 222: stabz, ! 223: stabz, ! 224: stabz, ! 225: stabl, ! 226: stabm, ! 227: stabn, ! 228: stabz, ! 229: stabp, ! 230: stabz, ! 231: stabr, ! 232: stabs, ! 233: stabt, ! 234: stabz, ! 235: stabz, ! 236: stabz, ! 237: stabz, ! 238: staby, ! 239: stabz, ! 240: }; ! 241: ! 242: Ptab ptaba[] = ! 243: { ! 244: "anti", 0, ! 245: "auto", 0, ! 246: 0 ! 247: }; ! 248: Ptab ptabb[] = ! 249: { ! 250: "bio", 0, ! 251: 0 ! 252: }; ! 253: Ptab ptabc[] = ! 254: { ! 255: "counter", 0, ! 256: 0 ! 257: }; ! 258: Ptab ptabd[] = ! 259: { ! 260: "dis", 0, ! 261: 0 ! 262: }; ! 263: Ptab ptabe[] = ! 264: { ! 265: "electro", 0, ! 266: 0 ! 267: }; ! 268: Ptab ptabf[] = ! 269: { ! 270: "femto", 0, ! 271: 0 ! 272: }; ! 273: Ptab ptabg[] = ! 274: { ! 275: "geo", 0, ! 276: "giga", 0, ! 277: 0 ! 278: }; ! 279: Ptab ptabh[] = ! 280: { ! 281: "hyper", 0, ! 282: 0 ! 283: }; ! 284: Ptab ptabi[] = ! 285: { ! 286: "immuno", 0, ! 287: "im", IN, ! 288: "intra", 0, ! 289: "inter", 0, ! 290: "in", IN, ! 291: "ir", IN, ! 292: "iso", 0, ! 293: 0 ! 294: }; ! 295: Ptab ptabj[] = ! 296: { ! 297: 0 ! 298: }; ! 299: Ptab ptabk[] = ! 300: { ! 301: "kilo", 0, ! 302: 0 ! 303: }; ! 304: Ptab ptabl[] = ! 305: { ! 306: 0 ! 307: }; ! 308: Ptab ptabm[] = ! 309: { ! 310: "magneto", 0, ! 311: "mega", 0, ! 312: "meta", 0, ! 313: "micro", 0, ! 314: "mid", 0, ! 315: "milli", 0, ! 316: "mini", 0, ! 317: "mis", 0, ! 318: "mono", 0, ! 319: "multi", 0, ! 320: 0 ! 321: }; ! 322: Ptab ptabn[] = ! 323: { ! 324: "nano", 0, ! 325: "neuro", 0, ! 326: "non", 0, ! 327: 0 ! 328: }; ! 329: Ptab ptabo[] = ! 330: { ! 331: "out", 0, ! 332: "over", 0, ! 333: 0 ! 334: }; ! 335: Ptab ptabp[] = ! 336: { ! 337: "para", 0, ! 338: "photo", 0, ! 339: "pico", 0, ! 340: "poly", 0, ! 341: "pre", 0, ! 342: "pseudo", 0, ! 343: "psycho", 0, ! 344: 0 ! 345: }; ! 346: Ptab ptabq[] = ! 347: { ! 348: "quasi", 0, ! 349: 0 ! 350: }; ! 351: Ptab ptabr[] = ! 352: { ! 353: "radio", 0, ! 354: "re", 0, ! 355: 0 ! 356: }; ! 357: Ptab ptabs[] = ! 358: { ! 359: "semi", 0, ! 360: "stereo", 0, ! 361: "sub", 0, ! 362: "super", 0, ! 363: 0 ! 364: }; ! 365: Ptab ptabt[] = ! 366: { ! 367: "tele", 0, ! 368: "tera", 0, ! 369: "thermo", 0, ! 370: 0 ! 371: }; ! 372: Ptab ptabu[] = ! 373: { ! 374: "ultra", 0, ! 375: "under", 0, /*must precede un*/ ! 376: "un", IN, ! 377: 0 ! 378: }; ! 379: Ptab ptabv[] = ! 380: { ! 381: 0 ! 382: }; ! 383: Ptab ptabw[] = ! 384: { ! 385: 0 ! 386: }; ! 387: Ptab ptabx[] = ! 388: { ! 389: 0 ! 390: }; ! 391: Ptab ptaby[] = ! 392: { ! 393: 0 ! 394: }; ! 395: Ptab ptabz[] = ! 396: { ! 397: 0 ! 398: }; ! 399: ! 400: Ptab* preftab[] = ! 401: { ! 402: ptaba, ! 403: ptabb, ! 404: ptabc, ! 405: ptabd, ! 406: ptabe, ! 407: ptabf, ! 408: ptabg, ! 409: ptabh, ! 410: ptabi, ! 411: ptabj, ! 412: ptabk, ! 413: ptabl, ! 414: ptabm, ! 415: ptabn, ! 416: ptabo, ! 417: ptabp, ! 418: ptabq, ! 419: ptabr, ! 420: ptabs, ! 421: ptabt, ! 422: ptabu, ! 423: ptabv, ! 424: ptabw, ! 425: ptabx, ! 426: ptaby, ! 427: ptabz, ! 428: }; ! 429: ! 430: typedef struct { ! 431: char *mesg; ! 432: enum { NONE, SUFF, PREF} type; ! 433: } Deriv; ! 434: ! 435: int cflag; ! 436: int fflag; ! 437: int vflag; ! 438: int xflag; ! 439: char word[500]; ! 440: char original[500]; ! 441: Deriv deriv[DSIZ+3]; ! 442: Deriv emptyderiv; ! 443: char affix[DSIZ*10]; /* 10 is longest affix message */ ! 444: int prefcount; ! 445: int suffcount; ! 446: char space[300000]; /* must be as large as "words"+"space" in pcode run */ ! 447: Bits encode[2048]; /* must be as long as "codes" in pcode run */ ! 448: int nencode; ! 449: char voweltab[128]; ! 450: char* spacep[128*128+1]; /* pointer to words starting with 'xx' */ ! 451: ! 452: char* codefile = "/usr/lib/spell/amspell"; ! 453: char* brfile = "/usr/lib/spell/brspell"; ! 454: ! 455: main(int argc, char *argv[]) ! 456: { ! 457: char *ep, *cp; ! 458: char *dp; ! 459: int j, i, c; ! 460: int low = 0; ! 461: Bits h; ! 462: ! 463: for(i=0; c = "aeiouyAEIOUY"[i]; i++) ! 464: voweltab[c] = 1; ! 465: while(argc > 1) { ! 466: if(argv[1][0] != '-') ! 467: break; ! 468: for(i=1; c = argv[1][i]; i++) ! 469: switch(c) { ! 470: default: ! 471: fprintf(stderr, "usage: spell [-bcvx] [-f file]\n"); ! 472: exit(1); ! 473: ! 474: case 'b': ! 475: ise(); ! 476: if(!fflag) ! 477: codefile = brfile; ! 478: continue; ! 479: ! 480: case 'C': ! 481: vflag++; ! 482: case 'c': ! 483: setbuf(stdout,0); ! 484: cflag++; ! 485: continue; ! 486: ! 487: case 'v': ! 488: vflag++; ! 489: continue; ! 490: ! 491: case 'x': ! 492: xflag++; ! 493: continue; ! 494: ! 495: case 'f': ! 496: if(argc <= 2) { ! 497: fprintf(stderr, "spell: -f requires another argument\n"); ! 498: exit(1); ! 499: } ! 500: argv++; ! 501: argc--; ! 502: fflag++; ! 503: codefile = argv[1]; ! 504: goto brk; ! 505: } ! 506: brk: ! 507: argv++; ! 508: argc--; ! 509: } ! 510: readdict(codefile); ! 511: if(argc > 1) { ! 512: fprintf(stderr, "usage: spell [-bcvx] [-f file]\n"); ! 513: exit(1); ! 514: } ! 515: for(;;) { ! 516: loop: ! 517: affix[0] = 0; ! 518: for(ep=original; ; ep++) { ! 519: if(ep >= original + sizeof(original) - 1) { ! 520: *ep = 0; ! 521: runout(original); ! 522: goto loop; ! 523: } ! 524: j = getchar(); ! 525: if(j == EOF) ! 526: exit(0); ! 527: if(j != '\n') ! 528: *ep = j; ! 529: else { ! 530: *ep = 0; ! 531: break; ! 532: } ! 533: } ! 534: low = 0; ! 535: for(ep=word,dp=original; j = *dp; ep++,dp++) { ! 536: if(islower(j)) ! 537: low++; ! 538: if(ep >= word+sizeof(word)-1) ! 539: break; ! 540: *ep = j; ! 541: } ! 542: *ep = 0; ! 543: ! 544: h = ~STOP; ! 545: if(word[1] == 0 && isalnum(word[0]) || ! 546: isdigit(word[0]) && ordinal()) ! 547: goto check; ! 548: ! 549: h = 0; ! 550: if(!low && !(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH))) ! 551: for(cp=original+1,dp=word+1; dp<ep; dp++,cp++) ! 552: *dp = Tolower(*cp); ! 553: if(!h) ! 554: for(;;) { /* at most twice */ ! 555: if(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH)) ! 556: break; ! 557: if(h = trysuff(ep,0,ALL|STOP|DONT_TOUCH)) ! 558: break; ! 559: if(!isupper(word[0])) ! 560: break; ! 561: cp = original; ! 562: dp = word; ! 563: while(*dp = *cp++) { ! 564: if(!low) ! 565: *dp = Tolower(*dp); ! 566: dp++; ! 567: } ! 568: word[0] = Tolower(word[0]); ! 569: } ! 570: check: ! 571: if(cflag) { ! 572: if(!h || Set(h,STOP)) ! 573: putchar('-'); ! 574: else if(!vflag) ! 575: putchar('+'); ! 576: else ! 577: putchar('0' + (suffcount>0) + ! 578: (prefcount>4? 8: 2*prefcount)); ! 579: } else if(!h || Set(h,STOP)) ! 580: printf("%s\n", original); ! 581: else if(affix[0] != 0 && affix[0] != '.') ! 582: printf("%s\t%s\n", affix, original); ! 583: } ! 584: } ! 585: ! 586: /* strip exactly one suffix and do ! 587: * indicated routine(s), which may recursively ! 588: * strip suffixes ! 589: */ ! 590: Bits ! 591: trysuff(char* ep, int lev, int flag) ! 592: { ! 593: Suftab *t; ! 594: char *cp, *sp; ! 595: Bits h = 0; ! 596: int initchar = ep[-1]; ! 597: ! 598: flag &= ~MONO; ! 599: lev += DLEV; ! 600: if(lev<DSIZ) ! 601: deriv[lev] = deriv[lev-1] = emptyderiv; ! 602: if(!islower(initchar)) ! 603: return h; ! 604: for(t=suftab[initchar-'a']; sp=t->suf; t++) { ! 605: cp = ep; ! 606: while(*sp) ! 607: if(*--cp != *sp++) ! 608: goto next; ! 609: for(sp=ep-t->n1; --sp >= word && !isvowel(*sp);) ! 610: ; ! 611: if(sp < word) ! 612: continue; ! 613: if(!(t->affixable & flag)) ! 614: return 0; ! 615: h = (*t->p1)(ep-t->n1, t->d1, t->a1, lev+1, t->flag|STOP); ! 616: if(!h && t->p2!=0) { ! 617: if(lev<DSIZ) ! 618: deriv[lev] = deriv[lev+1] = emptyderiv; ! 619: h = (*t->p2)(ep-t->n2, t->d2, t->a2, lev, t->flag|STOP); ! 620: } ! 621: break; ! 622: next:; ! 623: } ! 624: return h; ! 625: } ! 626: ! 627: Bits ! 628: nop(char* ep, char* d, char* a, int lev, int flag) ! 629: { ! 630: #pragma ref ep ! 631: #pragma ref d ! 632: #pragma ref a ! 633: #pragma ref lev ! 634: #pragma ref flag ! 635: return 0; ! 636: } ! 637: ! 638: Bits ! 639: cstrip(char* ep, char* d, char* a, int lev, int flag) ! 640: { ! 641: int temp = ep[0]; ! 642: ! 643: if(isvowel(temp) && isvowel(ep[-1])) { ! 644: switch(pair(ep[-1],ep[0])) { ! 645: case pair('a', 'a'): ! 646: case pair('a', 'e'): ! 647: case pair('a', 'i'): ! 648: case pair('e', 'a'): ! 649: case pair('e', 'e'): ! 650: case pair('e', 'i'): ! 651: case pair('i', 'i'): ! 652: case pair('o', 'a'): ! 653: return 0; ! 654: } ! 655: } else ! 656: if(temp==ep[-1]&&temp==ep[-2]) ! 657: return 0; ! 658: return strip(ep,d,a,lev,flag); ! 659: } ! 660: ! 661: Bits ! 662: strip(char* ep, char* d, char* a, int lev, int flag) ! 663: { ! 664: #pragma ref d ! 665: Bits h = trypref(ep, a, lev, flag); ! 666: ! 667: if(Set(h,MONO) && isvowel(*ep) && isvowel(ep[-2])) ! 668: h = 0; ! 669: if(h) ! 670: return h; ! 671: if(isvowel(*ep) && !isvowel(ep[-1]) && ep[-1]==ep[-2]) { ! 672: h = trypref(ep-1,a,lev,flag|MONO); ! 673: if(h) ! 674: return h; ! 675: } ! 676: return trysuff(ep,lev,flag); ! 677: } ! 678: ! 679: Bits ! 680: s(char* ep, char* d, char* a, int lev, int flag) ! 681: { ! 682: if(lev > DLEV+1) ! 683: return 0; ! 684: if(*ep=='s') { ! 685: switch(ep[-1]) { ! 686: case 'y': ! 687: if(isvowel(ep[-2])||isupper(*word)) ! 688: break; /*says Kennedys*/ ! 689: case 'x': ! 690: case 'z': ! 691: case 's': ! 692: return 0; ! 693: case 'h': ! 694: switch(ep[-2]) { ! 695: case 'c': ! 696: case 's': ! 697: return 0; ! 698: } ! 699: } ! 700: } ! 701: return strip(ep,d,a,lev,flag); ! 702: } ! 703: ! 704: Bits ! 705: an(char* ep, char* d, char* a, int lev, int flag) ! 706: { ! 707: #pragma ref d ! 708: if(!isupper(*word)) /*must be proper name*/ ! 709: return 0; ! 710: return trypref(ep,a,lev,flag); ! 711: } ! 712: ! 713: Bits ! 714: ize(char* ep, char* d, char* a, int lev, int flag) ! 715: { ! 716: #pragma ref a ! 717: int temp = ep[-1]; ! 718: Bits h; ! 719: ! 720: ep[-1] = 'e'; ! 721: h = strip(ep,"",d,lev,flag); ! 722: ep[-1] = temp; ! 723: return h; ! 724: } ! 725: ! 726: Bits ! 727: y_to_e(char* ep, char* d, char* a, int lev, int flag) ! 728: { ! 729: #pragma ref a ! 730: Bits h; ! 731: int temp; ! 732: ! 733: switch(ep[-1]) { ! 734: case 'a': ! 735: case 'e': ! 736: case 'i': ! 737: return 0; ! 738: } ! 739: temp = *ep; ! 740: *ep++ = 'e'; ! 741: h = strip(ep,"",d,lev,flag); ! 742: *--ep = temp; ! 743: return h; ! 744: } ! 745: ! 746: Bits ! 747: ily(char* ep, char* d, char* a, int lev, int flag) ! 748: { ! 749: int temp = ep[0]; ! 750: char *cp = ep; ! 751: ! 752: if(temp==ep[-1]&&temp==ep[-2]) /* sillly */ ! 753: return 0; ! 754: if(*--cp=='y' && !isvowel(*--cp)) /* happyly */ ! 755: while(cp>word) ! 756: if(isvowel(*--cp)) /* shyness */ ! 757: return 0; ! 758: if(ep[-1]=='i') ! 759: return i_to_y(ep,d,a,lev,flag); ! 760: return cstrip(ep,d,a,lev,flag); ! 761: } ! 762: ! 763: Bits ! 764: bility(char* ep, char* d, char* a, int lev, int flag) ! 765: { ! 766: *ep++ = 'l'; ! 767: return y_to_e(ep,d,a,lev,flag); ! 768: } ! 769: ! 770: Bits ! 771: i_to_y(char* ep, char* d, char* a, int lev, int flag) ! 772: { ! 773: Bits h; ! 774: int temp; ! 775: ! 776: if(isupper(*word)) ! 777: return 0; ! 778: if((temp=ep[-1])=='i' && !isvowel(ep[-2])) { ! 779: ep[-1] = 'y'; ! 780: a = d; ! 781: } ! 782: h = cstrip(ep,"",a,lev,flag); ! 783: ep[-1] = temp; ! 784: return h; ! 785: } ! 786: ! 787: Bits ! 788: es(char* ep, char* d, char* a, int lev, int flag) ! 789: { ! 790: if(lev>DLEV) ! 791: return 0; ! 792: switch(ep[-1]) { ! 793: default: ! 794: return 0; ! 795: case 'i': ! 796: return i_to_y(ep,d,a,lev,flag); ! 797: case 'h': ! 798: switch(ep[-2]) { ! 799: default: ! 800: return 0; ! 801: case 'c': ! 802: case 's': ! 803: break; ! 804: } ! 805: case 's': ! 806: case 'z': ! 807: case 'x': ! 808: return strip(ep,d,a,lev,flag); ! 809: } ! 810: } ! 811: ! 812: Bits ! 813: subst(char* ep, char* d, char* a, int lev, int flag) ! 814: { ! 815: #pragma ref a ! 816: char *u,*t; ! 817: Bits h; ! 818: ! 819: if(skipv(skipv(ep-1)) < word) ! 820: return 0; ! 821: for(t=d; *t!='+'; t++) ! 822: continue; ! 823: for(u=ep; *--t!='-';) ! 824: *--u = *t; ! 825: h = strip(ep,"",d,lev,flag); ! 826: while(*++t != '+') ! 827: continue; ! 828: while(*++t) ! 829: *u++ = *t; ! 830: return h; ! 831: } ! 832: ! 833: Bits ! 834: tion(char* ep, char* d, char* a, int lev, int flag) ! 835: { ! 836: switch(ep[-2]) { ! 837: default: ! 838: return trypref(ep,a,lev,flag); ! 839: case 'a': ! 840: case 'e': ! 841: case 'i': ! 842: case 'o': ! 843: case 'u': ! 844: return y_to_e(ep,d,a,lev,flag); ! 845: } ! 846: } ! 847: ! 848: /* ! 849: * possible consonant-consonant-e ending ! 850: */ ! 851: Bits ! 852: CCe(char* ep, char* d, char* a, int lev, int flag) ! 853: { ! 854: Bits h; ! 855: ! 856: switch(ep[-1]) { ! 857: case 'l': ! 858: if(isvowel(ep[-2])) ! 859: break; ! 860: switch(ep[-2]) { ! 861: case 'l': ! 862: case 'r': ! 863: case 'w': ! 864: break; ! 865: default: ! 866: return y_to_e(ep,d,a,lev,flag); ! 867: } ! 868: break; ! 869: case 'c': ! 870: case 'g': ! 871: if(*ep == 'a') /* prevent -able for -eable */ ! 872: return 0; ! 873: case 's': ! 874: case 'v': ! 875: case 'z': ! 876: if(ep[-2]==ep[-1]) ! 877: break; ! 878: if(isvowel(ep[-2])) ! 879: break; ! 880: case 'u': ! 881: if(h = y_to_e(ep,d,a,lev,flag)) ! 882: return h; ! 883: if(!(ep[-2]=='n' && ep[-1]=='g')) ! 884: return 0; ! 885: } ! 886: return VCe(ep,d,a,lev,flag); ! 887: } ! 888: ! 889: /* ! 890: * possible consonant-vowel-consonant-e ending ! 891: */ ! 892: Bits ! 893: VCe(char* ep, char* d, char* a, int lev, int flag) ! 894: { ! 895: int c; ! 896: Bits h; ! 897: ! 898: c = ep[-1]; ! 899: if(c=='e') ! 900: return 0; ! 901: if(!isvowel(c) && isvowel(ep[-2])) { ! 902: c = *ep; ! 903: *ep++ = 'e'; ! 904: h = trypref(ep,d,lev,flag); ! 905: if(!h) ! 906: h = trysuff(ep,lev,flag); ! 907: if(h) ! 908: return h; ! 909: ep--; ! 910: *ep = c; ! 911: } ! 912: return cstrip(ep,d,a,lev,flag); ! 913: } ! 914: ! 915: Ptab* ! 916: lookuppref(char** wp, char* ep) ! 917: { ! 918: Ptab *sp; ! 919: char *bp,*cp; ! 920: int initchar = Tolower(**wp); ! 921: ! 922: if(!isalpha(initchar)) ! 923: return 0; ! 924: for(sp=preftab[initchar-'a'];sp->s;sp++) { ! 925: bp = *wp; ! 926: for(cp= sp->s;*cp; ) ! 927: if(*bp++!=*cp++) ! 928: goto next; ! 929: for(cp=bp;cp<ep;cp++) ! 930: if(isvowel(*cp)) { ! 931: *wp = bp; ! 932: return sp; ! 933: } ! 934: next:; ! 935: } ! 936: return 0; ! 937: } ! 938: ! 939: /* while word is not in dictionary try stripping ! 940: * prefixes. Fail if no more prefixes. ! 941: */ ! 942: Bits ! 943: trypref(char* ep, char* a, int lev, int flag) ! 944: { ! 945: Ptab *tp; ! 946: char *bp, *cp; ! 947: char *pp; ! 948: Bits h; ! 949: char space[20]; ! 950: ! 951: if(lev<DSIZ) { ! 952: deriv[lev].mesg = a; ! 953: deriv[lev].type = *a=='.'? NONE: SUFF; ! 954: } ! 955: if(h = tryword(word,ep,lev,flag)) { ! 956: if(Set(h, flag&~MONO) && (flag&MONO) <= Set(h, MONO)) ! 957: return h; ! 958: h = 0; ! 959: } ! 960: bp = word; ! 961: pp = space; ! 962: if(lev<DSIZ) { ! 963: deriv[lev+1].mesg = pp; ! 964: deriv[lev+1].type = 0; ! 965: } ! 966: while(tp=lookuppref(&bp,ep)) { ! 967: *pp++ = '+'; ! 968: cp = tp->s; ! 969: while(pp<space+sizeof(space) && (*pp = *cp++)) ! 970: pp++; ! 971: deriv[lev+1].type += PREF; ! 972: h = tryword(bp,ep,lev+1,flag); ! 973: if(Set(h,NOPREF) || ! 974: ((tp->flag&IN) && inun(bp-2,h)==0)) { ! 975: h = 0; ! 976: break; ! 977: } ! 978: if(Set(h,flag&~MONO) && (flag&MONO) <= Set(h, MONO)) ! 979: break; ! 980: h = 0; ! 981: } ! 982: if(lev<DSIZ) ! 983: deriv[lev+1] = deriv[lev+2] = emptyderiv; ! 984: return h; ! 985: } ! 986: ! 987: Bits ! 988: tryword(char* bp, char* ep, int lev, int flag) ! 989: { ! 990: int j; ! 991: Bits h = 0; ! 992: char duple[3]; ! 993: ! 994: if(ep-bp <= 1) ! 995: return h; ! 996: if(flag&MONO) { ! 997: if(lev<DSIZ) { ! 998: deriv[++lev].mesg = duple; ! 999: deriv[lev].type = SUFF; ! 1000: } ! 1001: duple[0] = '+'; ! 1002: duple[1] = *ep; ! 1003: duple[2] = 0; ! 1004: } ! 1005: h = dict(bp, ep); ! 1006: if(vflag==0 || h==0) ! 1007: return h; ! 1008: /* ! 1009: * when derivations are wanted, collect them ! 1010: * for printing ! 1011: */ ! 1012: j = lev; ! 1013: prefcount = suffcount = 0; ! 1014: do { ! 1015: if(j<DSIZ && deriv[j].type) { ! 1016: strcat(affix, deriv[j].mesg); ! 1017: if(deriv[j].type == SUFF) ! 1018: suffcount++; ! 1019: else if(deriv[j].type != NONE) ! 1020: prefcount = deriv[j].type/PREF; ! 1021: } ! 1022: } while(--j > 0); ! 1023: return h; ! 1024: } ! 1025: ! 1026: int ! 1027: inun(char* bp, Bits h) ! 1028: { ! 1029: if(*bp == 'u') ! 1030: return Set(h, IN) == 0; ! 1031: /* *bp == 'i' */ ! 1032: if(Set(h, IN) == 0) ! 1033: return 0; ! 1034: switch(bp[2]) { ! 1035: case 'r': ! 1036: return bp[1] == 'r'; ! 1037: case 'm': ! 1038: case 'p': ! 1039: return bp[1] == 'm'; ! 1040: } ! 1041: return bp[1] == 'n'; ! 1042: } ! 1043: ! 1044: char* ! 1045: skipv(char *s) ! 1046: { ! 1047: if(s >= word && isvowel(*s)) ! 1048: s--; ! 1049: while(s >= word && !isvowel(*s)) ! 1050: s--; ! 1051: return s; ! 1052: } ! 1053: ! 1054: /* ! 1055: * crummy way to Britishise ! 1056: */ ! 1057: void ! 1058: ise(void) ! 1059: { ! 1060: Suftab *p; ! 1061: int i; ! 1062: ! 1063: for(i=0; i<26; i++) ! 1064: for(p = suftab[i]; p->suf; p++) { ! 1065: p->suf = ztos(p->suf); ! 1066: p->d1 = ztos(p->d1); ! 1067: p->a1 = ztos(p->a1); ! 1068: } ! 1069: } ! 1070: ! 1071: char* ! 1072: ztos(char *as) ! 1073: { ! 1074: char *s, *ds; ! 1075: ! 1076: for(s=as; *s; s++) ! 1077: if(*s == 'z') ! 1078: goto copy; ! 1079: return as; ! 1080: ! 1081: copy: ! 1082: ds = strdupl(as); ! 1083: for(s=ds; *s; s++) ! 1084: if(*s == 'z') ! 1085: *s = 's'; ! 1086: return ds; ! 1087: } ! 1088: ! 1089: Bits ! 1090: dict(char* bp, char* ep) ! 1091: { ! 1092: char *cp, *cp1, *w, *wp, *we; ! 1093: int n, f; ! 1094: ! 1095: w = bp; ! 1096: we = ep; ! 1097: n = ep-bp; ! 1098: if(n <= 1) ! 1099: return NOUN; ! 1100: ! 1101: f = w[0] & 0x7f; ! 1102: f *= 128; ! 1103: f += w[1] & 0x7f; ! 1104: bp = spacep[f]; ! 1105: ep = spacep[f+1]; ! 1106: ! 1107: loop: ! 1108: if(bp >= ep) { ! 1109: if(xflag) ! 1110: fprintf(stderr,"=%.*s\n",n, w); ! 1111: return 0; ! 1112: } ! 1113: /* ! 1114: * find the beginning of some word in the middle ! 1115: */ ! 1116: cp = bp + (ep-bp)/2; ! 1117: ! 1118: while(cp > bp && !(*cp & 0x80)) ! 1119: cp--; ! 1120: while(cp > bp && (cp[-1] & 0x80)) ! 1121: cp--; ! 1122: ! 1123: wp = w + 2; /* skip two letters */ ! 1124: cp1 = cp + 2; /* skip affix code */ ! 1125: for(;;) { ! 1126: if(wp >= we) { ! 1127: if(*cp1&0x80) ! 1128: goto found; ! 1129: else ! 1130: f = 1; ! 1131: break; ! 1132: } ! 1133: if(*cp1&0x80) { ! 1134: f = -1; ! 1135: break; ! 1136: } ! 1137: f = *cp1++ - *wp++; ! 1138: if(f != 0) ! 1139: break; ! 1140: } ! 1141: ! 1142: if(f < 0) { ! 1143: while(!(*cp1&0x80)) ! 1144: cp1++; ! 1145: bp = cp1; ! 1146: goto loop; ! 1147: } ! 1148: ep = cp; ! 1149: goto loop; ! 1150: found: ! 1151: f = ((cp[0] & 0x7) << 8) | ! 1152: (cp[1] & 0xff); ! 1153: if(xflag) { ! 1154: fprintf(stderr,"=%.*s ",n,w); ! 1155: typeprint(encode[f]); ! 1156: } ! 1157: return encode[f]; ! 1158: } ! 1159: ! 1160: void ! 1161: typeprint(Bits h) ! 1162: { ! 1163: ! 1164: pcomma(""); ! 1165: if(h & NOUN) ! 1166: pcomma("n"); ! 1167: if(h & PROP_COLLECT) ! 1168: pcomma("pc"); ! 1169: if(h & VERB) { ! 1170: if((h & VERB) == VERB) ! 1171: pcomma("v"); ! 1172: else ! 1173: if((h & VERB) == V_IRREG) ! 1174: pcomma("vi"); ! 1175: else ! 1176: if(h & ED) ! 1177: pcomma("ed"); ! 1178: } ! 1179: if(h & ADJ) ! 1180: pcomma("a"); ! 1181: if(h & COMP) { ! 1182: if((h & COMP) == ACTOR) ! 1183: pcomma("er"); ! 1184: else ! 1185: pcomma("comp"); ! 1186: } ! 1187: if(h & DONT_TOUCH) ! 1188: pcomma("d"); ! 1189: if(h & N_AFFIX) ! 1190: pcomma("na"); ! 1191: if(h & ADV) ! 1192: pcomma("adv"); ! 1193: if(h & ION) ! 1194: pcomma("ion"); ! 1195: if(h & V_AFFIX) ! 1196: pcomma("va"); ! 1197: if(h & MAN) ! 1198: pcomma("man"); ! 1199: if(h & NOPREF) ! 1200: pcomma("nopref"); ! 1201: if(h & MONO) ! 1202: pcomma("ms"); ! 1203: if(h & IN) ! 1204: pcomma("in"); ! 1205: if(h & _Y) ! 1206: pcomma("y"); ! 1207: if(h & STOP) ! 1208: pcomma("s"); ! 1209: fprintf(stderr, "\n"); ! 1210: } ! 1211: ! 1212: void ! 1213: pcomma(char *s) ! 1214: { ! 1215: static flag; ! 1216: ! 1217: if(*s == 0) { ! 1218: flag = 0; ! 1219: return; ! 1220: } ! 1221: if(!flag) { ! 1222: fprintf(stderr, "%s", s); ! 1223: flag = 1; ! 1224: } else ! 1225: fprintf(stderr, ",%s", s); ! 1226: } ! 1227: ! 1228: int ! 1229: ordinal(void) ! 1230: { ! 1231: char *cp = word; ! 1232: static char sp[4]; ! 1233: ! 1234: while(isdigit(*cp)) ! 1235: cp++; ! 1236: strncpy(sp,cp,3); ! 1237: if(isupper(cp[0]) && isupper(cp[1])) { ! 1238: sp[0] = Tolower(cp[0]); ! 1239: sp[1] = Tolower(cp[1]); ! 1240: } ! 1241: return 0 == strncmp(sp, ! 1242: cp[-2]=='1'? "th": /* out of bounds if 1 digit */ ! 1243: *--cp=='1'? "st": /* harmless */ ! 1244: *cp=='2'? "nd": ! 1245: *cp=='3'? "rd": ! 1246: "th", 3); ! 1247: } ! 1248: ! 1249: /* layout of file entry: first byte has bit 0x80 turned on. ! 1250: next 4 bits count number of characters common between this ! 1251: entry and previous one. last three bits concatenated with ! 1252: second byte are the affixing code, so arranged that the 0x80 ! 1253: bit is zero in all bytes but the first. 3rd and following ! 1254: bytes are the remainder of the dictionary word. ! 1255: ! 1256: layout in memory: common prefixes are expanded, and the ! 1257: first two letters of each word are deleted and found ! 1258: instead by lookup in table spacep, which points to the ! 1259: first word for each two-letter prefix. ! 1260: */ ! 1261: ! 1262: void ! 1263: readdict(char *file) ! 1264: { ! 1265: char *s, *is, *lasts, *ls; ! 1266: int c, i, sp, p; ! 1267: int f; ! 1268: long l; ! 1269: ! 1270: f = open(file, 0); ! 1271: if(f == -1) { ! 1272: fprintf(stderr, "spell: cannot open %s\n", file); ! 1273: exit(1); ! 1274: } ! 1275: if(read(f, space, 2) != 2) ! 1276: goto bad; ! 1277: nencode = ((space[0]&0xff)<<8) | (space[1]&0xff); ! 1278: if(nencode > sizeof(encode)/sizeof(*encode)) ! 1279: goto noroom; ! 1280: if(read(f, space, nencode*sizeof(*encode)) ! 1281: != nencode*sizeof(*encode)) ! 1282: goto bad; ! 1283: s = space; ! 1284: for(i=0; i<nencode; i++) { ! 1285: l = (long)(s[0] & 0xff) << 24; ! 1286: l |= (s[1] & 0xff) << 16; ! 1287: l |= (s[2] & 0xff) << 8; ! 1288: l |= s[3] & 0xff; ! 1289: encode[i] = (Bits)l; ! 1290: s += 4; ! 1291: } ! 1292: l = read(f, space, sizeof(space)); ! 1293: if(l >= sizeof(space)) ! 1294: goto noroom; ! 1295: is = space + (sizeof(space) - l); ! 1296: memmove(is, space, l); ! 1297: ! 1298: s = space; ! 1299: c = *is++ & 0xff; ! 1300: sp = -1; ! 1301: i = 0; ! 1302: lasts = 0; /* to prevent diagnostics */ ! 1303: loop: ! 1304: if(s > is) ! 1305: goto noroom; ! 1306: if(c < 0) { ! 1307: close(f); ! 1308: while(sp < 128*128) ! 1309: spacep[++sp] = s; ! 1310: *s++ = 0x80; /* fence */ ! 1311: return; ! 1312: } ! 1313: p = (c>>3) & 0xf; ! 1314: *s++ = c; ! 1315: *s++ = *is++ & 0xff; ! 1316: if(p <= 0) ! 1317: i = (*is++ & 0xff)*128; ! 1318: if(p <= 1) { ! 1319: if(!(*is & 0x80)) ! 1320: i = i/128*128 + (*is++ & 0xff); ! 1321: if(i <= sp) { ! 1322: fprintf(stderr, "spell: the dict isn't " ! 1323: "sorted or memmove didn't work\n"); ! 1324: goto bad; ! 1325: } ! 1326: while(sp < i) ! 1327: spacep[++sp] = s-2; ! 1328: } ! 1329: ls = lasts; ! 1330: lasts = s; ! 1331: for(p-=2; p>0; p--) ! 1332: *s++ = *ls++; ! 1333: for(;;) { ! 1334: if(is >= space+sizeof(space)) { ! 1335: c = -1; ! 1336: break; ! 1337: } ! 1338: c = *is++ & 0xff; ! 1339: if(c & 0x80) ! 1340: break; ! 1341: *s++ = c; ! 1342: } ! 1343: *s = 0; ! 1344: goto loop; ! 1345: ! 1346: bad: ! 1347: fprintf(stderr, "spell: trouble reading %s\n", file); ! 1348: exit(1); ! 1349: noroom: ! 1350: fprintf(stderr,"spell: not enough space for dictionary\n"); ! 1351: exit(1); ! 1352: } ! 1353: ! 1354: char * /* same as strdup; portability hack */ ! 1355: strdupl(char *s) ! 1356: { ! 1357: char *t = (char*)malloc(strlen(s)+1); ! 1358: strcpy(t, s); ! 1359: return t; ! 1360: } ! 1361: ! 1362: void ! 1363: runout(char *s) ! 1364: { ! 1365: int c; ! 1366: if(!cflag) ! 1367: printf("%s", s); ! 1368: else { ! 1369: putchar('-'); ! 1370: fflush(stdout); ! 1371: } ! 1372: do { ! 1373: if((c=getchar()) == EOF) ! 1374: exit(0); ! 1375: if(!cflag) ! 1376: putchar(c); ! 1377: } while(c != '\n'); ! 1378: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.