|
|
1.1 root 1: #include "regprog.h"
2:
3: /*
4: * Parser Information
5: */
6: typedef struct Node{
7: Inst *first;
8: Inst *last;
9: }Node;
10: #define NSTACK 20
11: static Node andstack[NSTACK];
12: static Node *andp;
13: static int atorstack[NSTACK];
14: static int *atorp;
15: static int cursubid; /* id of current subexpression */
16: static int subidstack[NSTACK]; /* parallel to atorstack */
17: static int *subidp;
18: static int lastwasand; /* Last token was operand */
19: static int nbra;
20: static char *exprp; /* pointer to next character in source expression */
21: static int nclass;
22: static Class *classp;
23: static Inst *freep;
24: static int errors;
25:
26: /* predeclared crap */
27: static void operator();
28: static void pushand();
29: static void pushator();
30: static void evaluntil();
31: static void bldcclass();
32:
33: static void
34: rcerror(s)
35: char *s;
36: {
37: errors++;
38: regerror(s);
39: }
40:
41: static Inst *
42: newinst(t)
43: int t;
44: {
45: freep->type=t;
46: freep->left=0;
47: freep->right=0;
48: return freep++;
49: }
50:
51: static void
52: operand(t)
53: int t;
54: {
55: register Inst *i;
56: if(lastwasand)
57: operator(CAT); /* catenate is implicit */
58: i=newinst(t);
59: if(t==CCLASS) /* ugh */
60: i->right=(Inst *)&(classp[nclass-1]); /* UGH! */
61: pushand(i, i);
62: lastwasand=TRUE;
63: }
64:
65: static void
66: operator(t)
67: int t;
68: {
69: if(t==RBRA && --nbra<0)
70: rcerror("unmatched right paren");
71: if(t==LBRA) {
72: if (++cursubid >= NSUBEXP)
73: rcerror ("too many subexpressions");
74: nbra++;
75: if (lastwasand)
76: operator(CAT);
77: } else
78: evaluntil(t);
79: if(t!=RBRA)
80: pushator(t);
81: lastwasand=FALSE;
82: if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
83: lastwasand=TRUE; /* these look like operands */
84: }
85:
86: static void
87: regerr2(s, c)
88: char *s;
89: {
90: char buf[100];
91: char *cp = buf;
92: while(*s)
93: *cp++ = *s++;
94: *cp++ = c;
95: *cp = '\0';
96: rcerror(buf);
97: }
98:
99: static void
100: cant(s)
101: char *s;
102: {
103: char buf[100];
104: strcpy(buf, "can't happen: ");
105: strcat(buf, s);
106: rcerror(buf);
107: }
108:
109: static void
110: pushand(f, l)
111: Inst *f, *l;
112: {
113: if(andp >= &andstack[NSTACK])
114: cant("operand stack overflow");
115: andp->first=f;
116: andp->last=l;
117: andp++;
118: }
119:
120: static void
121: pushator(t)
122: int t;
123: {
124: if(atorp >= &atorstack[NSTACK])
125: cant("operator stack overflow");
126: *atorp++=t;
127: *subidp++=cursubid;
128: }
129:
130: static Node *
131: popand(op)
132: {
133: register Inst *inst;
134:
135: if(andp <= &andstack[0]) {
136: regerr2("missing operand for ", op);
137: inst=newinst(NOP);
138: pushand(inst,inst);
139: }
140: return --andp;
141: }
142:
143: static int
144: popator()
145: {
146: if(atorp <= &atorstack[0])
147: cant("operator stack underflow");
148: --subidp;
149: return *--atorp;
150: }
151:
152: static void
153: evaluntil(pri)
154: register pri;
155: {
156: register Node *op1, *op2;
157: register Inst *inst1, *inst2;
158:
159: while(pri==RBRA || atorp[-1]>=pri){
160: switch(popator()){
161: default:
162: rcerror("unknown operator in evaluntil");
163: break;
164: case LBRA: /* must have been RBRA */
165: op1=popand('(');
166: inst2=newinst(RBRA);
167: inst2->subid = *subidp;
168: op1->last->next = inst2;
169: inst1=newinst(LBRA);
170: inst1->subid = *subidp;
171: inst1->next=op1->first;
172: pushand(inst1, inst2);
173: return;
174: case OR:
175: op2=popand('|');
176: op1=popand('|');
177: inst2=newinst(NOP);
178: op2->last->next=inst2;
179: op1->last->next=inst2;
180: inst1=newinst(OR);
181: inst1->right=op1->first;
182: inst1->left=op2->first;
183: pushand(inst1, inst2);
184: break;
185: case CAT:
186: op2=popand(0);
187: op1=popand(0);
188: op1->last->next=op2->first;
189: pushand(op1->first, op2->last);
190: break;
191: case STAR:
192: op2=popand('*');
193: inst1=newinst(OR);
194: op2->last->next=inst1;
195: inst1->right=op2->first;
196: pushand(inst1, inst1);
197: break;
198: case PLUS:
199: op2=popand('+');
200: inst1=newinst(OR);
201: op2->last->next=inst1;
202: inst1->right=op2->first;
203: pushand(op2->first, inst1);
204: break;
205: case QUEST:
206: op2=popand('?');
207: inst1=newinst(OR);
208: inst2=newinst(NOP);
209: inst1->left=inst2;
210: inst1->right=op2->first;
211: op2->last->next=inst2;
212: pushand(inst1, inst2);
213: break;
214: }
215: }
216: }
217:
218: static void
219: optimize(pp)
220: Prog *pp;
221: {
222: register Inst *inst, *target;
223:
224: for(inst=pp->firstinst; inst->type!=END; inst++){
225: target=inst->next;
226: while(target->type == NOP)
227: target=target->next;
228: inst->next=target;
229: }
230: }
231:
232: #ifdef DEBUG
233: static void
234: dumpstack(){
235: Node *stk;
236: int *ip;
237:
238: printf("operators\n");
239: for(ip=atorstack; ip<atorp; ip++)
240: printf("0%o\n", *ip);
241: printf("operands\n");
242: for(stk=andstack; stk<andp; stk++)
243: printf("0%o\t0%o\n", stk->first->type, stk->last->type);
244: }
245:
246: static void
247: dump(pp)
248: Prog *pp;
249: {
250: Inst *l;
251:
252: l=pp->firstinst;
253: do{
254: printf("%d:\t0%o\t%d\t%d\n", l-pp->firstinst, l->type,
255: l->left-pp->firstinst, l->right-pp->firstinst);
256: }while(l++->type);
257: }
258: #endif
259:
260: static void
261: startlex(s)
262: char *s;
263: {
264: exprp=s;
265: nclass=0;
266: nbra=0;
267: }
268:
269: static Class *
270: newclass(){
271: register Class *p;
272: register n;
273:
274: if(nclass >= NCLASS)
275: regerr2("too many character classes; limit", NCLASS+'0');
276: p = &(classp[nclass++]);
277: for(n=0; n<16; n++)
278: p->map[n]=0;
279: return p;
280: }
281:
282: static int
283: lex(){
284: register c= *exprp++;
285:
286: switch(c){
287: case '\\':
288: if(*exprp)
289: c= *exprp++;
290: break;
291: case 0:
292: c=END;
293: --exprp; /* In case we come here again */
294: break;
295: case '*':
296: c=STAR;
297: break;
298: case '?':
299: c=QUEST;
300: break;
301: case '+':
302: c=PLUS;
303: break;
304: case '|':
305: c=OR;
306: break;
307: case '.':
308: c=ANY;
309: break;
310: case '(':
311: c=LBRA;
312: break;
313: case ')':
314: c=RBRA;
315: break;
316: case '^':
317: c=BOL;
318: break;
319: case '$':
320: c=EOL;
321: break;
322: case '[':
323: c=CCLASS;
324: bldcclass();
325: break;
326: }
327: return c;
328: }
329:
330: static int
331: nextc(){
332: if(exprp[0]==0 || (exprp[0]=='\\' && exprp[1]==0))
333: rcerror("malformed '[]'");
334: if(exprp[0]=='\\'){
335: exprp++;
336: return *exprp++|0200;
337: }
338: return *exprp++;
339: }
340:
341: static void
342: bldcclass(){
343: register c1, c2;
344: register Class *classp;
345: register negate=FALSE;
346:
347: classp=newclass();
348: /* we have already seen the '[' */
349: if(*exprp=='^'){
350: negate=TRUE;
351: exprp++;
352: }
353: while((c1=c2=nextc()) != ']'){
354: if(*exprp=='-'){
355: exprp++; /* eat '-' */
356: if((c2=nextc()) == ']')
357: rcerror("malformed '[]'");
358: }
359: for((c1&=0177), (c2&=0177); c1<=c2; c1++)
360: classp->map[c1/8] |= 1<<(c1&07);
361: }
362: if(negate)
363: for(c1=0; c1<16; c1++)
364: classp->map[c1]^=0377;
365: classp->map[0] &= 0376; /* exclude NUL */
366: }
367:
368: extern regexp *
369: regcomp(s)
370: char *s;
371: {
372: register token;
373: Prog *pp;
374:
375: /* get memory for the program */
376: pp = (Prog *)malloc(sizeof(Prog) + 3*sizeof(Inst)*strlen(s));
377: if (pp == NULL) {
378: rcerror("out of memory");
379: return NULL;
380: }
381: freep = pp->firstinst;
382: classp = pp->class;
383: errors = 0;
384:
385: /* go compile the sucker */
386: startlex(s);
387: atorp=atorstack;
388: andp=andstack;
389: subidp=subidstack;
390: lastwasand=FALSE;
391: cursubid=0;
392:
393: /* Start with a low priority operator to prime parser */
394: pushator(START-1);
395: while((token=lex()) != END){
396: if((token&0300) == OPERATOR)
397: operator(token);
398: else
399: operand(token);
400: }
401:
402: /* Close with a low priority operator */
403: evaluntil(START);
404:
405: /* Force END */
406: operand(END);
407: evaluntil(START);
408: #ifdef DEBUG
409: dumpstack();
410: #endif
411: if(nbra)
412: rcerror("unmatched left paren");
413: --andp; /* points to first and only operand */
414: pp->startinst=andp->first;
415: #ifdef DEBUG
416: dump(pp);
417: #endif
418: optimize(pp);
419: #ifdef DEBUG
420: printf("start: %d\n", andp->first-pp->firstinst);
421: dump(pp);
422: #endif
423: if (errors) {
424: free(pp);
425: pp = NULL;
426: }
427: return (regexp *)pp;
428: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.