|
|
1.1 ! root 1: #include <libc.h> ! 2: #include "regprog.h" ! 3: ! 4: /* ! 5: * Parser Information ! 6: */ ! 7: typedef struct Node{ ! 8: Inst *first; ! 9: Inst *last; ! 10: }Node; ! 11: #define NSTACK 20 ! 12: static Node andstack[NSTACK]; ! 13: static Node *andp; ! 14: static int atorstack[NSTACK]; ! 15: static int *atorp; ! 16: static int cursubid; /* id of current subexpression */ ! 17: static int subidstack[NSTACK]; /* parallel to atorstack */ ! 18: static int *subidp; ! 19: static int lastwasand; /* Last token was operand */ ! 20: static int nbra; ! 21: static char *exprp; /* pointer to next character in source expression */ ! 22: static int nclass; ! 23: static Class *classp; ! 24: static Inst *freep; ! 25: static int errors; ! 26: ! 27: /* predeclared crap */ ! 28: static void operator(); ! 29: static void pushand(); ! 30: static void pushator(); ! 31: static void evaluntil(); ! 32: static void bldcclass(); ! 33: ! 34: static void ! 35: rcerror(s) ! 36: char *s; ! 37: { ! 38: errors++; ! 39: regerror(s); ! 40: } ! 41: ! 42: static Inst * ! 43: newinst(t) ! 44: int t; ! 45: { ! 46: freep->type=t; ! 47: freep->left=0; ! 48: freep->right=0; ! 49: return freep++; ! 50: } ! 51: ! 52: static void ! 53: operand(t) ! 54: int t; ! 55: { ! 56: register Inst *i; ! 57: if(lastwasand) ! 58: operator(CAT); /* catenate is implicit */ ! 59: i=newinst(t); ! 60: if(t==CCLASS) /* ugh */ ! 61: i->right=(Inst *)&(classp[nclass-1]); /* UGH! */ ! 62: pushand(i, i); ! 63: lastwasand=TRUE; ! 64: } ! 65: ! 66: static void ! 67: operator(t) ! 68: int t; ! 69: { ! 70: if(t==RBRA && --nbra<0) ! 71: rcerror("unmatched right paren"); ! 72: if(t==LBRA) { ! 73: if (++cursubid >= NSUBEXP) ! 74: rcerror ("too many subexpressions"); ! 75: nbra++; ! 76: if (lastwasand) ! 77: operator(CAT); ! 78: } else ! 79: evaluntil(t); ! 80: if(t!=RBRA) ! 81: pushator(t); ! 82: lastwasand=FALSE; ! 83: if(t==STAR || t==QUEST || t==PLUS || t==RBRA) ! 84: lastwasand=TRUE; /* these look like operands */ ! 85: } ! 86: ! 87: static void ! 88: regerr2(s, c) ! 89: char *s; ! 90: { ! 91: char buf[100]; ! 92: char *cp = buf; ! 93: while(*s) ! 94: *cp++ = *s++; ! 95: *cp++ = c; ! 96: *cp = '\0'; ! 97: rcerror(buf); ! 98: } ! 99: ! 100: static void ! 101: cant(s) ! 102: char *s; ! 103: { ! 104: char buf[100]; ! 105: strcpy(buf, "can't happen: "); ! 106: strcat(buf, s); ! 107: rcerror(buf); ! 108: } ! 109: ! 110: static void ! 111: pushand(f, l) ! 112: Inst *f, *l; ! 113: { ! 114: if(andp >= &andstack[NSTACK]) ! 115: cant("operand stack overflow"); ! 116: andp->first=f; ! 117: andp->last=l; ! 118: andp++; ! 119: } ! 120: ! 121: static void ! 122: pushator(t) ! 123: int t; ! 124: { ! 125: if(atorp >= &atorstack[NSTACK]) ! 126: cant("operator stack overflow"); ! 127: *atorp++=t; ! 128: *subidp++=cursubid; ! 129: } ! 130: ! 131: static Node * ! 132: popand(op) ! 133: { ! 134: register Inst *inst; ! 135: ! 136: if(andp <= &andstack[0]) { ! 137: regerr2("missing operand for ", op); ! 138: inst=newinst(NOP); ! 139: pushand(inst,inst); ! 140: } ! 141: return --andp; ! 142: } ! 143: ! 144: static int ! 145: popator() ! 146: { ! 147: if(atorp <= &atorstack[0]) ! 148: cant("operator stack underflow"); ! 149: --subidp; ! 150: return *--atorp; ! 151: } ! 152: ! 153: static void ! 154: evaluntil(pri) ! 155: register pri; ! 156: { ! 157: register Node *op1, *op2; ! 158: register Inst *inst1, *inst2; ! 159: ! 160: while(pri==RBRA || atorp[-1]>=pri){ ! 161: switch(popator()){ ! 162: default: ! 163: rcerror("unknown operator in evaluntil"); ! 164: break; ! 165: case LBRA: /* must have been RBRA */ ! 166: op1=popand('('); ! 167: inst2=newinst(RBRA); ! 168: inst2->subid = *subidp; ! 169: op1->last->next = inst2; ! 170: inst1=newinst(LBRA); ! 171: inst1->subid = *subidp; ! 172: inst1->next=op1->first; ! 173: pushand(inst1, inst2); ! 174: return; ! 175: case OR: ! 176: op2=popand('|'); ! 177: op1=popand('|'); ! 178: inst2=newinst(NOP); ! 179: op2->last->next=inst2; ! 180: op1->last->next=inst2; ! 181: inst1=newinst(OR); ! 182: inst1->right=op1->first; ! 183: inst1->left=op2->first; ! 184: pushand(inst1, inst2); ! 185: break; ! 186: case CAT: ! 187: op2=popand(0); ! 188: op1=popand(0); ! 189: op1->last->next=op2->first; ! 190: pushand(op1->first, op2->last); ! 191: break; ! 192: case STAR: ! 193: op2=popand('*'); ! 194: inst1=newinst(OR); ! 195: op2->last->next=inst1; ! 196: inst1->right=op2->first; ! 197: pushand(inst1, inst1); ! 198: break; ! 199: case PLUS: ! 200: op2=popand('+'); ! 201: inst1=newinst(OR); ! 202: op2->last->next=inst1; ! 203: inst1->right=op2->first; ! 204: pushand(op2->first, inst1); ! 205: break; ! 206: case QUEST: ! 207: op2=popand('?'); ! 208: inst1=newinst(OR); ! 209: inst2=newinst(NOP); ! 210: inst1->left=inst2; ! 211: inst1->right=op2->first; ! 212: op2->last->next=inst2; ! 213: pushand(inst1, inst2); ! 214: break; ! 215: } ! 216: } ! 217: } ! 218: ! 219: static Prog * ! 220: optimize(pp) ! 221: Prog *pp; ! 222: { ! 223: register Inst *inst, *target; ! 224: int size; ! 225: Prog *npp; ! 226: int diff; ! 227: ! 228: /* ! 229: * get rid of NOOP chains ! 230: */ ! 231: for(inst=pp->firstinst; inst->type!=END; inst++){ ! 232: target=inst->next; ! 233: while(target->type == NOP) ! 234: target=target->next; ! 235: inst->next=target; ! 236: } ! 237: ! 238: /* ! 239: * The original allocation is for an area larger than ! 240: * necessary. Reallocate to the actual space used ! 241: * and then relocate the code. ! 242: */ ! 243: size = sizeof(Prog) + (freep - pp->firstinst)*sizeof(Inst); ! 244: npp = (Prog *)realloc((char *)pp, size); ! 245: if(npp==NULL || npp==pp) ! 246: return(pp); ! 247: diff = (char *)npp - (char *)pp; ! 248: freep = (Inst *)((char *)freep + diff); ! 249: for(inst=npp->firstinst; inst<freep; inst++){ ! 250: switch(inst->type){ ! 251: case OR: ! 252: case STAR: ! 253: case PLUS: ! 254: case QUEST: ! 255: case CCLASS: ! 256: *(char **)&inst->right += diff; ! 257: break; ! 258: } ! 259: *(char **)&inst->left += diff; ! 260: } ! 261: *(char **)&npp->startinst += diff; ! 262: return(npp); ! 263: } ! 264: ! 265: #ifdef DEBUG ! 266: static void ! 267: dumpstack(){ ! 268: Node *stk; ! 269: int *ip; ! 270: ! 271: printf("operators\n"); ! 272: for(ip=atorstack; ip<atorp; ip++) ! 273: printf("0%o\n", *ip); ! 274: printf("operands\n"); ! 275: for(stk=andstack; stk<andp; stk++) ! 276: printf("0%o\t0%o\n", stk->first->type, stk->last->type); ! 277: } ! 278: ! 279: static void ! 280: dump(pp) ! 281: Prog *pp; ! 282: { ! 283: Inst *l; ! 284: ! 285: l=pp->firstinst; ! 286: do{ ! 287: printf("%d:\t0%o\t%d\t%d\n", l-pp->firstinst, l->type, ! 288: l->left-pp->firstinst, l->right-pp->firstinst); ! 289: }while(l++->type); ! 290: } ! 291: #endif ! 292: ! 293: static void ! 294: startlex(s) ! 295: char *s; ! 296: { ! 297: exprp=s; ! 298: nclass=0; ! 299: nbra=0; ! 300: } ! 301: ! 302: static Class * ! 303: newclass(){ ! 304: register Class *p; ! 305: register n; ! 306: ! 307: if(nclass >= NCLASS) ! 308: regerr2("too many character classes; limit", NCLASS+'0'); ! 309: p = &(classp[nclass++]); ! 310: for(n=0; n<16; n++) ! 311: p->map[n]=0; ! 312: return p; ! 313: } ! 314: ! 315: static int ! 316: lex(){ ! 317: register c= *exprp++; ! 318: ! 319: switch(c){ ! 320: case '\\': ! 321: if(*exprp) ! 322: c= *exprp++; ! 323: break; ! 324: case 0: ! 325: c=END; ! 326: --exprp; /* In case we come here again */ ! 327: break; ! 328: case '*': ! 329: c=STAR; ! 330: break; ! 331: case '?': ! 332: c=QUEST; ! 333: break; ! 334: case '+': ! 335: c=PLUS; ! 336: break; ! 337: case '|': ! 338: c=OR; ! 339: break; ! 340: case '.': ! 341: c=ANY; ! 342: break; ! 343: case '(': ! 344: c=LBRA; ! 345: break; ! 346: case ')': ! 347: c=RBRA; ! 348: break; ! 349: case '^': ! 350: c=BOL; ! 351: break; ! 352: case '$': ! 353: c=EOL; ! 354: break; ! 355: case '[': ! 356: c=CCLASS; ! 357: bldcclass(); ! 358: break; ! 359: } ! 360: return c; ! 361: } ! 362: ! 363: static int ! 364: nextc(){ ! 365: if(exprp[0]==0 || (exprp[0]=='\\' && exprp[1]==0)) ! 366: rcerror("malformed '[]'"); ! 367: if(exprp[0]=='\\'){ ! 368: exprp++; ! 369: return *exprp++|0200; ! 370: } ! 371: return *exprp++; ! 372: } ! 373: ! 374: static void ! 375: bldcclass(){ ! 376: register c1, c2; ! 377: register Class *classp; ! 378: register negate=FALSE; ! 379: ! 380: classp=newclass(); ! 381: /* we have already seen the '[' */ ! 382: if(*exprp=='^'){ ! 383: negate=TRUE; ! 384: exprp++; ! 385: } ! 386: while((c1=c2=nextc()) != ']'){ ! 387: if(*exprp=='-'){ ! 388: exprp++; /* eat '-' */ ! 389: if((c2=nextc()) == ']') ! 390: rcerror("malformed '[]'"); ! 391: } ! 392: for((c1&=0177), (c2&=0177); c1<=c2; c1++) ! 393: classp->map[c1/8] |= 1<<(c1&07); ! 394: } ! 395: if(negate) ! 396: for(c1=0; c1<16; c1++) ! 397: classp->map[c1]^=0377; ! 398: classp->map[0] &= 0376; /* exclude NUL */ ! 399: } ! 400: ! 401: extern regexp * ! 402: regcomp(s) ! 403: char *s; ! 404: { ! 405: register token; ! 406: Prog *pp; ! 407: ! 408: /* get memory for the program */ ! 409: pp = (Prog *)malloc(sizeof(Prog) + 3*sizeof(Inst)*strlen(s)); ! 410: if (pp == NULL) { ! 411: rcerror("out of memory"); ! 412: return NULL; ! 413: } ! 414: freep = pp->firstinst; ! 415: classp = pp->class; ! 416: errors = 0; ! 417: ! 418: /* go compile the sucker */ ! 419: startlex(s); ! 420: atorp=atorstack; ! 421: andp=andstack; ! 422: subidp=subidstack; ! 423: lastwasand=FALSE; ! 424: cursubid=0; ! 425: ! 426: /* Start with a low priority operator to prime parser */ ! 427: pushator(START-1); ! 428: while((token=lex()) != END){ ! 429: if((token&0300) == OPERATOR) ! 430: operator(token); ! 431: else ! 432: operand(token); ! 433: } ! 434: ! 435: /* Close with a low priority operator */ ! 436: evaluntil(START); ! 437: ! 438: /* Force END */ ! 439: operand(END); ! 440: evaluntil(START); ! 441: #ifdef DEBUG ! 442: dumpstack(); ! 443: #endif ! 444: if(nbra) ! 445: rcerror("unmatched left paren"); ! 446: --andp; /* points to first and only operand */ ! 447: pp->startinst=andp->first; ! 448: #ifdef DEBUG ! 449: dump(pp); ! 450: #endif ! 451: pp = optimize(pp); ! 452: #ifdef DEBUG ! 453: printf("start: %d\n", andp->first-pp->firstinst); ! 454: dump(pp); ! 455: #endif ! 456: if (errors) { ! 457: free((char *)pp); ! 458: pp = NULL; ! 459: } ! 460: return (regexp *)pp; ! 461: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.