|
|
1.1 ! root 1: %{ ! 2: /* break out words, output cap + word(inverted) */ ! 3: #include <stdio.h> ! 4: #define OUT() for(i=yyleng-1;i>=0; i--)putchar(yytext[i]); putchar('\n') ! 5: #define OUT1(nam) printf("%c:%s\n",nam,yytext) ! 6: #define OUTN(string) printf("%s\n",string) ! 7: #include "names.h" ! 8: #include "nhash.c" ! 9: #include "dict.c" ! 10: #include "ydict.c" ! 11: char nt[] = "D:n't"; ! 12: char qs[] = "c:'s"; ! 13: char fin[] = "E:."; ! 14: int i,j; ! 15: int dot = 0; ! 16: int first = 1; ! 17: int qflg,nflg; ! 18: int cap = 0; ! 19: %} ! 20: %p 3000 ! 21: %a 2500 ! 22: ! 23: L [a-z] ! 24: N [0-9] ! 25: C [A-Z] ! 26: ! 27: %% ! 28: (St|Dr|Drs|Mr|Mrs|Ms)"." { ! 29: OUT1(NOUN); ! 30: } ! 31: {C}{L}*'[s] { ! 32: pos(1); ! 33: if(first==1)first=0; ! 34: } ! 35: {C}+['][s]* { ! 36: OUT1(POS); ! 37: } ! 38: (({C}+{L}*)|({C}*{L}+))+([-](({C}*{L}+)|({C}+{L}*))+)+ { ! 39: OUT1(NOUN_ADJ); ! 40: } ! 41: {C}{C}+ { ! 42: if((i=input()) == 's'){ ! 43: yytext[yyleng++] = 's'; ! 44: yytext[yyleng] = '\0'; ! 45: OUT1(NOUN); ! 46: } ! 47: else { ! 48: unput(i); ! 49: for(i=0;i<yyleng;i++)yytext[i]+= 'a' - 'A'; ! 50: goto wd; ! 51: } ! 52: } ! 53: [LD][']{C}{L}* { ! 54: OUT1(NOUN_ADJ); ! 55: } ! 56: {C}{L}* { ! 57: if(first==1) ! 58: first=0; ! 59: else cap = 1; ! 60: if(yyleng==1 && yytext[0] == 'I'){ ! 61: cap = 0; ! 62: goto wd; ! 63: } ! 64: yytext[0]+= 'a' - 'A'; ! 65: goto wd; ! 66: } ! 67: ({N}+[-]{N}+[-]*)+ { ! 68: OUT1(NOUN_ADJ); ! 69: } ! 70: ({N}+[-]*{L}+[-]*)+ { ! 71: OUT1(NOUN_ADJ); ! 72: } ! 73: ({N}*[,])*({N}+".")+[ \t\n]+{C} { ! 74: for(i=yyleng-1;i>0;i--) ! 75: if(yytext[i] == '.')break; ! 76: unput(yytext[yyleng-1]); ! 77: yytext[i] = '\0'; ! 78: OUT1(NOUN_ADJ); ! 79: OUTN(fin); ! 80: first = 1; ! 81: } ! 82: [ \t`][a-zA-Z0-9.]*("\/"[a-zA-Z0-9]+"."*)+[']* { ! 83: if(yytext[yyleng-1] == '.')dot=1; ! 84: OUT1(NOUN_ADJ); ! 85: } ! 86: {N}+([,]{N}+)*("."{N}+)*[']*[s]* { ! 87: OUT1(NOUN_ADJ); ! 88: } ! 89: {N}*([,]{N}+)*("."{N}+)+[']*[s]* { ! 90: OUT1(NOUN_ADJ); ! 91: } ! 92: {N}+([,]{N}+)*("."{N}*)*[']*[s]* { ! 93: if(yytext[yyleng-1] == '.')dot=1; ! 94: OUT1(NOUN_ADJ); ! 95: } ! 96: {L}+[-]*{N}+ { ! 97: OUT1(NOUN_ADJ); ! 98: } ! 99: {C}+[-]*{N}+ { ! 100: OUT1(NOUN_ADJ); ! 101: } ! 102: {N}+[-]+{C}+ { ! 103: OUT1(NOUN_ADJ); ! 104: } ! 105: {N}+[%] { ! 106: OUT1(NOUN_ADJ); ! 107: } ! 108: "$"{N}+([,]{N}+)*("."{N}*)* { ! 109: if(yytext[yyleng-1] == '.')dot=1; ! 110: OUT1(NOUN); ! 111: } ! 112: [Aa]"."[ ]*[Mm]"." { ! 113: OUT1(ADJ_ADV); ! 114: } ! 115: [Pp]"."[ ]*[Mm]"." { ! 116: OUT1(ADJ_ADV); ! 117: } ! 118: "a."[ ]*"d." { ! 119: OUT1(ADJ_ADV); ! 120: } ! 121: "b."[ ]*"c." { ! 122: OUT1(ADJ_ADV); ! 123: } ! 124: "i."[ ]*"e." { ! 125: OUT1(PREP); ! 126: } ! 127: "e."[ ]*"g." { ! 128: OUT1(PREP); ! 129: } ! 130: "etc."[ \n]*[,)]* { ! 131: i = yytext[4]; ! 132: yytext[4] = '\0'; ! 133: OUT1(NOUN); ! 134: yytext[4] = i; ! 135: yytext[0] = yytext[yyleng-1]; ! 136: yytext[1] = '\0'; ! 137: if(yytext[0] == ',' || yytext[0] == ')') ! 138: OUT1(','); ! 139: else { ! 140: OUTN(fin); ! 141: first = 1; ! 142: } ! 143: } ! 144: "et al." { ! 145: OUT1(NOUN); ! 146: } ! 147: [Nn][Oo][s]*"." { ! 148: OUT1(NOUN_ADJ); ! 149: } ! 150: [Ff]ig[s]*"." { ! 151: OUT1(NOUN_ADJ); ! 152: } ! 153: [Dd]ept[s]*"." { ! 154: OUT1(NOUN_ADJ); ! 155: } ! 156: [Ee]q"." { ! 157: OUT1(NOUN_ADJ); ! 158: } ! 159: dB"." { ! 160: OUT1(NOUN_ADJ); ! 161: } ! 162: vs"." { ! 163: OUT1(PREP); ! 164: } ! 165: in"."[ \n]*{C} { ! 166: unput(yytext[yyleng-1]); ! 167: yytext[2] = '\0'; ! 168: OUT1(PREP); ! 169: OUTN(fin); ! 170: first = 1; ! 171: } ! 172: (in|ft|yr|ckts|mi)"." { ! 173: OUT1(NOUN_ADJ); ! 174: } ! 175: Ph"."[ ]*[Dd]"." { ! 176: OUT1(ADJ); ! 177: } ! 178: [Jj]r"." { ! 179: OUT1(ADJ); ! 180: } ! 181: [Cc]h"." { ! 182: OUT1(NOUN_ADJ); ! 183: } ! 184: [Rr]ef[s]*"." { ! 185: OUT1(NOUN_ADJ); ! 186: } ! 187: Inc"." { ! 188: OUT1(ADJ); ! 189: } ! 190: [A-Z]"." { ! 191: dot=1; ! 192: OUT1(NOUN); ! 193: } ! 194: can't { ! 195: yytext[3]='\0'; ! 196: yyleng -= 2; ! 197: nflg=1; ! 198: goto wd; ! 199: } ! 200: won't { ! 201: OUT1('X'); ! 202: } ! 203: {L}+n't { ! 204: nflg=1; ! 205: yytext[yyleng-3]='\0'; ! 206: yyleng -= 3; ! 207: goto wd; ! 208: } ! 209: [A-Z]{L}+n't { ! 210: yytext[0]+= 'a' - 'A'; ! 211: nflg=1; ! 212: yytext[yyleng-3]='\0'; ! 213: yyleng -= 3; ! 214: goto wd; ! 215: } ! 216: o'clock { ! 217: OUT1(ADV); ! 218: } ! 219: {L}+'[s] { ! 220: pos(0); ! 221: } ! 222: 'll { ! 223: OUT1(lookup("will",1,0)); ! 224: } ! 225: 've { ! 226: OUT1(lookup("have",1,0)); ! 227: } ! 228: 're { ! 229: OUT1(lookup("are",1,0)); ! 230: } ! 231: 'd { ! 232: OUT1(lookup("had",1,0)); ! 233: } ! 234: 'm { ! 235: OUT1(lookup("am",1,0)); ! 236: } ! 237: 'ld { ! 238: OUT1(lookup("would",1,0)); ! 239: } ! 240: {L}+ { ! 241: wd: ! 242: if((j = lookup(yytext,1,0)) != 0){ ! 243: first=0; ! 244: if(cap){ ! 245: yytext[0] += 'A' - 'a'; ! 246: cap = 0; ! 247: if(dot)OUTN(fin); ! 248: } ! 249: dot=0; ! 250: OUT1(j); ! 251: if(nflg==1){ ! 252: nflg=0; ! 253: OUTN(nt); ! 254: } ! 255: } ! 256: else{ ! 257: first = dot=0; ! 258: if(yytext[yyleng-1] == 'y' && cap == 0){ ! 259: switch(yytext[yyleng-2]){ ! 260: case 'c': look(cy,yyleng-2,NOUN); ! 261: break; ! 262: case 'f': look(fy,yyleng-2,VERB); ! 263: break; ! 264: case 'l': look(ly,yyleng-2,ADV); ! 265: break; ! 266: case 'g': if(yytext[yyleng-3] == 'o'){ ! 267: OUT1(NOUN); ! 268: break; ! 269: } ! 270: look(gy,yyleng-2,ADJ); ! 271: break; ! 272: case 'r': switch(yytext[yyleng-3]){ ! 273: case 'a': look(ary,yyleng-3,ADJ); ! 274: break; ! 275: case 'o': look(ory,yyleng-3,ADJ); ! 276: break; ! 277: case 'e': look(ery,yyleng-3,NOUN); ! 278: break; ! 279: default: look(ry,yyleng-2,NOUN); ! 280: } ! 281: break; ! 282: case 't': if(yytext[yyleng-3] == 'i')look(ity,yyleng-3,NOUN); ! 283: else look(ty,yyleng-2,ADJ); ! 284: break; ! 285: default: OUT(); ! 286: } ! 287: } ! 288: else { ! 289: if(cap){ ! 290: yytext[0] += 'A' - 'a'; ! 291: cap = 0; ! 292: OUT1(NOUN_ADJ); ! 293: } ! 294: else { ! 295: OUT(); ! 296: } ! 297: } ! 298: } ! 299: } ! 300: [\n] ; ! 301: [ ]+ ; ! 302: [\t]+ ; ! 303: ";" { ! 304: OUT1(';'); ! 305: first=1; ! 306: } ! 307: (\"|`|')+ { ! 308: if(dot){ ! 309: OUTN(fin); ! 310: dot=0; ! 311: } ! 312: if(qflg==1){ ! 313: qflg=0; ! 314: OUT1('"'); ! 315: } ! 316: else { ! 317: qflg=1; ! 318: first=1; ! 319: OUT1('"'); ! 320: } ! 321: } ! 322: ".\"" { ! 323: qflg=0; ! 324: first=1; ! 325: OUT1(END); ! 326: } ! 327: "..." { ! 328: OUT1(','); ! 329: } ! 330: "/." { ! 331: first = 1; ! 332: OUT1(END); ! 333: } ! 334: "." { ! 335: first=1; ! 336: OUT1(END); ! 337: } ! 338: "!\"" { ! 339: qflg=0; ! 340: first=1; ! 341: OUT1(END); ! 342: } ! 343: "!" { ! 344: first=1; ! 345: OUT1(END); ! 346: } ! 347: "?\"" { ! 348: qflg=0; ! 349: first=1; ! 350: OUT1(END); ! 351: } ! 352: "?" { ! 353: first=1; ! 354: OUT1(END); ! 355: } ! 356: ":" { ! 357: OUT1(','); ! 358: first=1; ! 359: } ! 360: [-]+ { ! 361: OUT1(','); ! 362: first=1; ! 363: } ! 364: "," { ! 365: OUT1(','); ! 366: } ! 367: (\[|\(|\{|\]|\)|\}) { ! 368: OUT1(','); ! 369: } ! 370: . { ! 371: /* fprintf(stderr,"nwords funny char: %c\n",yytext[0])*/ ; ! 372: } ! 373: %% ! 374: look(f,n,cc) ! 375: char (*f)(); ! 376: int n; ! 377: char cc; ! 378: { ! 379: int nn; ! 380: char save; ! 381: save=yytext[n]; ! 382: yytext[n] = '\0'; ! 383: nn=(*f)(yytext,1,0); ! 384: yytext[n] = save; ! 385: if(nn != 0){ ! 386: OUT1(nn); ! 387: } ! 388: else { ! 389: OUT1(cc); ! 390: } ! 391: } ! 392: pos(flg){ ! 393: int ii,j; ! 394: if(flg==1)yytext[0] += 'a' - 'A'; ! 395: for(ii=yyleng-1;yytext[ii] != '\''; ii--); ! 396: yytext[ii] = '\0'; ! 397: if((j=lookup(yytext,1,0)) != 0){ ! 398: yyleng = ii; ! 399: OUT1(j); ! 400: OUTN(qs); ! 401: } ! 402: else{ ! 403: if(flg==1)yytext[0] += 'A' - 'a'; ! 404: yytext[ii] = '\''; ! 405: OUT1(POS); ! 406: } ! 407: } ! 408: char *filename="-"; ! 409: ! 410: main(argc,argv) ! 411: int argc; ! 412: char *argv[]; ! 413: { ! 414: register int rc=0; ! 415: putchar(':'); putchar('\n'); ! 416: getd(); ! 417: ygetd(); ! 418: if(argc<=1) { ! 419: yylex(); ! 420: }else{ ! 421: while(argc>1) { ! 422: if(freopen(argv[1],"r",stdin)==NULL) { ! 423: fprintf(stderr,"%s: cannot open\n", argv[1]); ! 424: rc++; ! 425: }else{ ! 426: filename=argv[1]; ! 427: yylex(); ! 428: } ! 429: argc--; argv++; ! 430: } ! 431: } ! 432: return(rc); ! 433: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.