|
|
1.1 root 1: #include "re.h"
2: #include "lre.h"
3: #include "hdr.h"
4:
5: typedef struct Link
6: {
7: unsigned char l;
8: struct Node *node;
9: struct Link *next;
10: } Link;
11: static Link *froot;
12:
13: #define NEW(N) (froot?(t = froot, froot = froot->next, t->next = 0, t->node = N, t): newlink(c, 0, N))
14: #define ADD(N) if(qtail) qtail = qtail->next = NEW(N);\
15: else qtail = qhead = NEW(N)
16: #define DEL() { Link *tmp = qhead;if((qhead = qhead->next) == 0)\
17: qtail = 0; tmp->next = froot; froot = tmp;}
18:
19: typedef struct Node
20: {
21: short out;
22: short d;
23: short shift1;
24: short shift2;
25: long id;
26: struct Node **tab;
27: Link *alts;
28: struct Node *fail;
29: } Node;
30:
31: static Link *newlink(re_cw*, int, Node*);
32: static Node *newnode(re_cw*, int);
33: static void zeroroot(Node*, Node*);
34: static void shifttab(Node*);
35: static void shiftprop(re_cw*,Node*);
36: #ifdef DEBUG
37: static void cwpr(register Node *n);
38: #endif
39:
40: re_cw *
41: re_cwinit(unsigned char *cmap)
42: {
43: register i;
44: register re_cw *c;
45:
46: if((c = (re_cw *)egmalloc(sizeof *c, "malloc failed in re_cwinit")) == 0)
47: return((re_cw *)0);
48: c->nodeid = 0;
49: c->maxdepth = 0;
50: c->mindepth = 10000;
51: c->seenerror = 0;
52: c->root = newnode(c, 0);
53: if(cmap)
54: memmove((char *)c->map, (char *)cmap, sizeof c->map);
55: else
56: for(i = 0; i < sizeof c->map; i++)
57: c->map[i] = i;
58: return(c);
59: }
60:
61: void
62: re_cwadd(register re_cw *c, register unsigned char *s, register unsigned char *e)
63: {
64: register Node *p, *state;
65: register Link *l;
66: int depth;
67:
68: for(state = c->root; s <= --e;){
69: for(l = state->alts; l; l = l->next)
70: if(l->l == c->map[*e]) break;
71: if(l == 0)
72: break;
73: else
74: state = l->node;
75: }
76: if(s <= e){
77: depth = state->d+1;
78: l = newlink(c, *e--, p = newnode(c, depth++));
79: if((l == 0) || (p == 0)){
80: c->seenerror = 1;
81: return;
82: }
83: l->next = state->alts;
84: state->alts = l;
85: state = p;
86: while(s <= e){
87: state->alts = newlink(c, *e--, p = newnode(c, depth++));
88: if((state->alts == 0) || (p == 0)){
89: c->seenerror = 1;
90: return;
91: }
92: state = p;
93: }
94: }
95: if(c->mindepth > state->d)
96: c->mindepth = state->d;
97: state->out = 1;
98: }
99:
100: #ifdef DEBUG
101: static void
102: cwpr(register Node *n)
103: {
104: register Link *l;
105:
106: Fprint(1, "%d[%d,%d]: ", n->id, n->shift1, n->shift2);
107: for(l = n->alts; l; l = l->next){
108: Fprint(1, "edge from \"%d\" to \"%d\" label {\"%c\"};",
109: n->id, l->node->id, l->l);
110: if(l->node->out) Fprint(1, " draw \"%d\" as Doublecircle;", l->node->id);
111: if(l->node->fail)
112: Fprint(1, " edge from \"%d\" to \"%d\" dashed;", l->node->id, l->node->fail->id);
113: Fprint(1, "\n");
114: cwpr(l->node);
115: }
116: }
117: #endif
118:
119: static void
120: fail(re_cw *c, Node *root)
121: {
122: Link *qhead = 0, *qtail = 0;
123: register Link *l, *ll;
124: register Link *t;
125: register Node *state, *r, *s;
126: int a;
127:
128: for(l = root->alts; l; l = l->next){
129: ADD(l->node);
130: l->node->fail = root;
131: }
132: while(qhead){
133: r = qhead->node;
134: DEL();
135: for(l = r->alts; l; l = l->next){
136: s = l->node;
137: a = l->l;
138: ADD(s);
139: for(state = r->fail; state;){
140: for(ll = state->alts; ll; ll = ll->next)
141: if(ll->l == a)
142: break;
143: if(ll || (state == root)){
144: if(ll)
145: state = ll->node;
146: /*
147: do it here as only other exit is
148: state 0
149: */
150: if(state->out){
151: s->out = 1;
152: }
153: break;
154: } else
155: state = state->fail;
156: }
157: s->fail = state;
158: }
159: }
160: zeroroot(root, root);
161: }
162:
163: static void
164: zeroroot(register Node *root, register Node *n)
165: {
166: register Link *l;
167:
168: if(n->fail == root)
169: n->fail = 0;
170: for(l = n->alts; l; l = l->next)
171: zeroroot(root, l->node);
172: }
173:
174: static void
175: shift(re_cw *c)
176: {
177: Link *qhead = 0, *qtail = 0;
178: register Link *l;
179: register Link *t;
180: register Node *state, *r, *s;
181: int k;
182:
183: for(k = 0; k < 256; k++)
184: c->step[k] = c->mindepth+1;
185: c->root->shift1 = 1;
186: c->root->shift2 = c->mindepth;
187: for(l = c->root->alts; l; l = l->next){
188: /* l->node->shift2 = c->root->shift2;/**/
189: ADD(l->node);
190: l->node->fail = 0;
191: }
192: while(qhead){
193: r = qhead->node;
194: DEL();
195: r->shift1 = c->mindepth;
196: r->shift2 = c->mindepth;
197: if(state = r->fail){
198: do {
199: k = r->d - state->d;
200: if(k < state->shift1){
201: state->shift1 = k;
202: }
203: if(r->out && (k < state->shift2)){
204: state->shift2 = k;
205: }
206: } while(state = state->fail);
207: }
208: for(l = r->alts; l; l = l->next){
209: s = l->node;
210: ADD(s);
211: }
212: }
213: shiftprop(c, c->root);
214: shifttab(c->root);
215: c->step[0] = 1;
216: }
217:
218: static void
219: shifttab(register Node *n)
220: {
221: register Link *l;
222: register Node **nn;
223:
224: n->tab = nn = (Node **)calloc(256, sizeof(Node *));
225: for(l = n->alts; l; l = l->next)
226: nn[l->l] = l->node;
227: }
228:
229: static void
230: shiftprop(register re_cw *c, register Node *n)
231: {
232: register Link *l;
233: register Node *nn;
234:
235: for(l = n->alts; l; l = l->next){
236: if(c->step[l->l] > l->node->d)
237: c->step[l->l] = l->node->d;
238: nn = l->node;
239: if(n->shift2 < nn->shift2)
240: nn->shift2 = n->shift2;
241: shiftprop(c, nn);
242: }
243: }
244:
245: void
246: re_cwcomp(re_cw *c)
247: {
248: if(c->seenerror)
249: return;
250: fail(c, c->root);
251: shift(c);
252: #ifdef DEBUG
253: cwpr(c->root);
254: #endif
255: }
256:
257: re_cwexec(void *re_c, RDFN rdfn, MATCHFN matchfn)
258: {
259: re_cw *c = (re_cw*)re_c;
260: register Node *state;
261: register Link *l;
262: register unsigned char *sp;
263: register unsigned char *e, *s;
264: register Node *ostate;
265: register k;
266: unsigned char *rs, *re;
267: unsigned char fake[1];
268: unsigned char mappedsp;
269:
270: if(c->seenerror)
271: return(-1);
272: fake[0] = 0;
273: state = c->root;
274: for(rs = 0; (k = (*rdfn)((char**)&rs,(char**) &re)) > 0;){
275: doneline:
276: s = rs+c->mindepth-1;
277: e = re;
278: while(s < e){
279: /* scan */
280: for(sp = s; ostate = state;){
281: if(state->tab){
282: if((state = state->tab[c->map[*sp]]) == 0)
283: goto nomatch;
284: } else {
285: mappedsp = c->map[*sp];
286: for(l = state->alts; ; l = l->next){
287: if(l == 0)
288: goto nomatch;
289: if(l->l == mappedsp){
290: state = l->node;
291: break;
292: }
293: }
294: }
295: #ifdef DEBUG
296: print("state %d -0x%x-> state %d (out=%d)\n", ostate->id, *sp&0xFF, state->id, state->out);
297: #endif
298: if(state->out){
299: unsigned char *bm = sp, *em = s+1;
300: if((k = (*matchfn)((char**)&bm, (char**)&em)) <= 0)
301: return(k);
302: rs = bm;
303: re = em;
304: state = c->root;
305: goto doneline;
306: }
307: if(--sp < rs){
308: sp = fake;
309: break;
310: }
311: }
312: nomatch:
313: k = c->step[c->map[*sp]]-ostate->d-1;
314: if(k < ostate->shift1)
315: k = ostate->shift1;
316: if(k > ostate->shift2)
317: k = ostate->shift2;
318: s += k;
319: state = c->root;
320: }
321: #ifdef DEBUG
322: print("end of s<e loop: s=%d e=%d, rs=%d, re=%d\n", s, e, rs, re);
323: #endif
324: rs = re; /* we have analysed evrything up to here */
325: }
326: return(k? -1:0);
327: }
328:
329: static void
330: freenode(Node *n)
331: {
332: register Link *l, *ll;
333:
334: if(n->tab)
335: free((char *)n->tab);
336: for(l = n->alts; l; l = ll){
337: ll = l->next;
338: freenode(l->node);
339: }
340: free((char *)n);
341: }
342:
343: void
344: re_cwfree(re_cw *cw)
345: {
346: register Link *l;
347:
348: while(froot){
349: l = froot->next;
350: free((char *)froot);
351: froot = l;
352: }
353: freenode(cw->root);
354: free((char *)cw);
355: }
356:
357: static Node *
358: newnode(re_cw *c, int d)
359: {
360: static Node *next = 0, *lim = 0;
361: static incr = 1000;
362:
363: if(next == lim){
364: next = (Node *)malloc(incr*sizeof(Node));
365: if(next == 0){
366: re_error("node malloc fail");
367: return 0;
368: }
369: lim = next+incr;
370: }
371: next->d = d;
372: if(d > c->maxdepth) c->maxdepth = d;
373: next->id = c->nodeid++;
374: next->alts = 0;
375: next->tab = 0;
376: next->out = 0;
377: return(next++);
378: }
379:
380: static Link *
381: newlink(re_cw *c, int l, Node *n)
382: {
383: static Link *next = 0, *lim = 0;
384: static incr = 1000;
385:
386: if(next == lim){
387: next = (Link *)malloc(incr*sizeof(Node));
388: if(next == 0){
389: re_error("link malloc fail");
390: return 0;
391: }
392: lim = next+incr;
393: }
394: next->l = c->map[l];
395: next->node = n;
396: next->next = 0;
397: return(next++);
398: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.