|
|
1.1 ! root 1: #include "regprog.h" ! 2: ! 3: /* ! 4: * Parser Information ! 5: */ ! 6: typedef struct Node{ ! 7: Inst *first; ! 8: Inst *last; ! 9: }Node; ! 10: #define NSTACK 20 ! 11: static Node andstack[NSTACK]; ! 12: static Node *andp; ! 13: static int atorstack[NSTACK]; ! 14: static int *atorp; ! 15: static int cursubid; /* id of current subexpression */ ! 16: static int subidstack[NSTACK]; /* parallel to atorstack */ ! 17: static int *subidp; ! 18: static int lastwasand; /* Last token was operand */ ! 19: static int nbra; ! 20: static char *exprp; /* pointer to next character in source expression */ ! 21: static int nclass; ! 22: static Class *classp; ! 23: static Inst *freep; ! 24: static int errors; ! 25: ! 26: /* predeclared crap */ ! 27: static void operator(); ! 28: static void pushand(); ! 29: static void pushator(); ! 30: static void evaluntil(); ! 31: static void bldcclass(); ! 32: ! 33: static void ! 34: rcerror(s) ! 35: char *s; ! 36: { ! 37: errors++; ! 38: regerror(s); ! 39: } ! 40: ! 41: static Inst * ! 42: newinst(t) ! 43: int t; ! 44: { ! 45: freep->type=t; ! 46: freep->left=0; ! 47: freep->right=0; ! 48: return freep++; ! 49: } ! 50: ! 51: static void ! 52: operand(t) ! 53: int t; ! 54: { ! 55: register Inst *i; ! 56: if(lastwasand) ! 57: operator(CAT); /* catenate is implicit */ ! 58: i=newinst(t); ! 59: if(t==CCLASS) /* ugh */ ! 60: i->right=(Inst *)&(classp[nclass-1]); /* UGH! */ ! 61: pushand(i, i); ! 62: lastwasand=TRUE; ! 63: } ! 64: ! 65: static void ! 66: operator(t) ! 67: int t; ! 68: { ! 69: if(t==RBRA && --nbra<0) ! 70: rcerror("unmatched right paren"); ! 71: if(t==LBRA) { ! 72: if (++cursubid >= NSUBEXP) ! 73: rcerror ("too many subexpressions"); ! 74: nbra++; ! 75: if (lastwasand) ! 76: operator(CAT); ! 77: } else ! 78: evaluntil(t); ! 79: if(t!=RBRA) ! 80: pushator(t); ! 81: lastwasand=FALSE; ! 82: if(t==STAR || t==QUEST || t==PLUS || t==RBRA) ! 83: lastwasand=TRUE; /* these look like operands */ ! 84: } ! 85: ! 86: static void ! 87: regerr2(s, c) ! 88: char *s; ! 89: { ! 90: char buf[100]; ! 91: char *cp = buf; ! 92: while(*s) ! 93: *cp++ = *s++; ! 94: *cp++ = c; ! 95: *cp = '\0'; ! 96: rcerror(buf); ! 97: } ! 98: ! 99: static void ! 100: cant(s) ! 101: char *s; ! 102: { ! 103: char buf[100]; ! 104: strcpy(buf, "can't happen: "); ! 105: strcat(buf, s); ! 106: rcerror(buf); ! 107: } ! 108: ! 109: static void ! 110: pushand(f, l) ! 111: Inst *f, *l; ! 112: { ! 113: if(andp >= &andstack[NSTACK]) ! 114: cant("operand stack overflow"); ! 115: andp->first=f; ! 116: andp->last=l; ! 117: andp++; ! 118: } ! 119: ! 120: static void ! 121: pushator(t) ! 122: int t; ! 123: { ! 124: if(atorp >= &atorstack[NSTACK]) ! 125: cant("operator stack overflow"); ! 126: *atorp++=t; ! 127: *subidp++=cursubid; ! 128: } ! 129: ! 130: static Node * ! 131: popand(op) ! 132: { ! 133: register Inst *inst; ! 134: ! 135: if(andp <= &andstack[0]) { ! 136: regerr2("missing operand for ", op); ! 137: inst=newinst(NOP); ! 138: pushand(inst,inst); ! 139: } ! 140: return --andp; ! 141: } ! 142: ! 143: static int ! 144: popator() ! 145: { ! 146: if(atorp <= &atorstack[0]) ! 147: cant("operator stack underflow"); ! 148: --subidp; ! 149: return *--atorp; ! 150: } ! 151: ! 152: static void ! 153: evaluntil(pri) ! 154: register pri; ! 155: { ! 156: register Node *op1, *op2; ! 157: register Inst *inst1, *inst2; ! 158: ! 159: while(pri==RBRA || atorp[-1]>=pri){ ! 160: switch(popator()){ ! 161: default: ! 162: rcerror("unknown operator in evaluntil"); ! 163: break; ! 164: case LBRA: /* must have been RBRA */ ! 165: op1=popand('('); ! 166: inst2=newinst(RBRA); ! 167: inst2->subid = *subidp; ! 168: op1->last->next = inst2; ! 169: inst1=newinst(LBRA); ! 170: inst1->subid = *subidp; ! 171: inst1->next=op1->first; ! 172: pushand(inst1, inst2); ! 173: return; ! 174: case OR: ! 175: op2=popand('|'); ! 176: op1=popand('|'); ! 177: inst2=newinst(NOP); ! 178: op2->last->next=inst2; ! 179: op1->last->next=inst2; ! 180: inst1=newinst(OR); ! 181: inst1->right=op1->first; ! 182: inst1->left=op2->first; ! 183: pushand(inst1, inst2); ! 184: break; ! 185: case CAT: ! 186: op2=popand(0); ! 187: op1=popand(0); ! 188: op1->last->next=op2->first; ! 189: pushand(op1->first, op2->last); ! 190: break; ! 191: case STAR: ! 192: op2=popand('*'); ! 193: inst1=newinst(OR); ! 194: op2->last->next=inst1; ! 195: inst1->right=op2->first; ! 196: pushand(inst1, inst1); ! 197: break; ! 198: case PLUS: ! 199: op2=popand('+'); ! 200: inst1=newinst(OR); ! 201: op2->last->next=inst1; ! 202: inst1->right=op2->first; ! 203: pushand(op2->first, inst1); ! 204: break; ! 205: case QUEST: ! 206: op2=popand('?'); ! 207: inst1=newinst(OR); ! 208: inst2=newinst(NOP); ! 209: inst1->left=inst2; ! 210: inst1->right=op2->first; ! 211: op2->last->next=inst2; ! 212: pushand(inst1, inst2); ! 213: break; ! 214: } ! 215: } ! 216: } ! 217: ! 218: static void ! 219: optimize(pp) ! 220: Prog *pp; ! 221: { ! 222: register Inst *inst, *target; ! 223: ! 224: for(inst=pp->firstinst; inst->type!=END; inst++){ ! 225: target=inst->next; ! 226: while(target->type == NOP) ! 227: target=target->next; ! 228: inst->next=target; ! 229: } ! 230: } ! 231: ! 232: #ifdef DEBUG ! 233: static void ! 234: dumpstack(){ ! 235: Node *stk; ! 236: int *ip; ! 237: ! 238: printf("operators\n"); ! 239: for(ip=atorstack; ip<atorp; ip++) ! 240: printf("0%o\n", *ip); ! 241: printf("operands\n"); ! 242: for(stk=andstack; stk<andp; stk++) ! 243: printf("0%o\t0%o\n", stk->first->type, stk->last->type); ! 244: } ! 245: ! 246: static void ! 247: dump(pp) ! 248: Prog *pp; ! 249: { ! 250: Inst *l; ! 251: ! 252: l=pp->firstinst; ! 253: do{ ! 254: printf("%d:\t0%o\t%d\t%d\n", l-pp->firstinst, l->type, ! 255: l->left-pp->firstinst, l->right-pp->firstinst); ! 256: }while(l++->type); ! 257: } ! 258: #endif ! 259: ! 260: static void ! 261: startlex(s) ! 262: char *s; ! 263: { ! 264: exprp=s; ! 265: nclass=0; ! 266: nbra=0; ! 267: } ! 268: ! 269: static Class * ! 270: newclass(){ ! 271: register Class *p; ! 272: register n; ! 273: ! 274: if(nclass >= NCLASS) ! 275: regerr2("too many character classes; limit", NCLASS+'0'); ! 276: p = &(classp[nclass++]); ! 277: for(n=0; n<16; n++) ! 278: p->map[n]=0; ! 279: return p; ! 280: } ! 281: ! 282: static int ! 283: lex(){ ! 284: register c= *exprp++; ! 285: ! 286: switch(c){ ! 287: case '\\': ! 288: if(*exprp) ! 289: c= *exprp++; ! 290: break; ! 291: case 0: ! 292: c=END; ! 293: --exprp; /* In case we come here again */ ! 294: break; ! 295: case '*': ! 296: c=STAR; ! 297: break; ! 298: case '?': ! 299: c=QUEST; ! 300: break; ! 301: case '+': ! 302: c=PLUS; ! 303: break; ! 304: case '|': ! 305: c=OR; ! 306: break; ! 307: case '.': ! 308: c=ANY; ! 309: break; ! 310: case '(': ! 311: c=LBRA; ! 312: break; ! 313: case ')': ! 314: c=RBRA; ! 315: break; ! 316: case '^': ! 317: c=BOL; ! 318: break; ! 319: case '$': ! 320: c=EOL; ! 321: break; ! 322: case '[': ! 323: c=CCLASS; ! 324: bldcclass(); ! 325: break; ! 326: } ! 327: return c; ! 328: } ! 329: ! 330: static int ! 331: nextc(){ ! 332: if(exprp[0]==0 || (exprp[0]=='\\' && exprp[1]==0)) ! 333: rcerror("malformed '[]'"); ! 334: if(exprp[0]=='\\'){ ! 335: exprp++; ! 336: return *exprp++|0200; ! 337: } ! 338: return *exprp++; ! 339: } ! 340: ! 341: static void ! 342: bldcclass(){ ! 343: register c1, c2; ! 344: register Class *classp; ! 345: register negate=FALSE; ! 346: ! 347: classp=newclass(); ! 348: /* we have already seen the '[' */ ! 349: if(*exprp=='^'){ ! 350: negate=TRUE; ! 351: exprp++; ! 352: } ! 353: while((c1=c2=nextc()) != ']'){ ! 354: if(*exprp=='-'){ ! 355: exprp++; /* eat '-' */ ! 356: if((c2=nextc()) == ']') ! 357: rcerror("malformed '[]'"); ! 358: } ! 359: for((c1&=0177), (c2&=0177); c1<=c2; c1++) ! 360: classp->map[c1/8] |= 1<<(c1&07); ! 361: } ! 362: if(negate) ! 363: for(c1=0; c1<16; c1++) ! 364: classp->map[c1]^=0377; ! 365: classp->map[0] &= 0376; /* exclude NUL */ ! 366: } ! 367: ! 368: extern regexp * ! 369: regcomp(s) ! 370: char *s; ! 371: { ! 372: register token; ! 373: Prog *pp; ! 374: ! 375: /* get memory for the program */ ! 376: pp = (Prog *)malloc(sizeof(Prog) + 3*sizeof(Inst)*strlen(s)); ! 377: if (pp == NULL) { ! 378: rcerror("out of memory"); ! 379: return NULL; ! 380: } ! 381: freep = pp->firstinst; ! 382: classp = pp->class; ! 383: errors = 0; ! 384: ! 385: /* go compile the sucker */ ! 386: startlex(s); ! 387: atorp=atorstack; ! 388: andp=andstack; ! 389: subidp=subidstack; ! 390: lastwasand=FALSE; ! 391: cursubid=0; ! 392: ! 393: /* Start with a low priority operator to prime parser */ ! 394: pushator(START-1); ! 395: while((token=lex()) != END){ ! 396: if((token&0300) == OPERATOR) ! 397: operator(token); ! 398: else ! 399: operand(token); ! 400: } ! 401: ! 402: /* Close with a low priority operator */ ! 403: evaluntil(START); ! 404: ! 405: /* Force END */ ! 406: operand(END); ! 407: evaluntil(START); ! 408: #ifdef DEBUG ! 409: dumpstack(); ! 410: #endif ! 411: if(nbra) ! 412: rcerror("unmatched left paren"); ! 413: --andp; /* points to first and only operand */ ! 414: pp->startinst=andp->first; ! 415: #ifdef DEBUG ! 416: dump(pp); ! 417: #endif ! 418: optimize(pp); ! 419: #ifdef DEBUG ! 420: printf("start: %d\n", andp->first-pp->firstinst); ! 421: dump(pp); ! 422: #endif ! 423: if (errors) { ! 424: free(pp); ! 425: pp = NULL; ! 426: } ! 427: return (regexp *)pp; ! 428: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.