|
|
1.1 root 1: #include "re.h"
2: #include "lre.h"
3: #include "hdr.h"
4:
5: #define DEBUG
6:
7: static unsigned char *eg_slowmatch(Br *, unsigned char *, unsigned char *, int);
8: static unsigned char *wholeb, *wholee;
9: static unsigned char *start[10];
10: static int len[10];
11: static void undobr(Br *); /* undo group assignements */
12:
13: eg_match(register re_re *r, register unsigned char *b, register unsigned char *e, unsigned char **rb, unsigned char **re)
14: {
15: int i, ret;
16:
17: #ifdef DEBUG
18: if(TRACE(2)){
19: PR "eg_match(%d->%d, %d, %d)\n", (int)r, (int)r->br, (int)b, (int)e);
20: if(r->br)
21: eg_brpr(r->br);
22: }
23: #endif
24: if((rb == 0) != (re == 0)){
25: re_error("must supply both or none of group pointers");
26: return(0);
27: }
28: if(r->backref || r->parens || rb){
29: wholeb = e;
30: for(i = 1; i < 10; i++)
31: start[i] = 0;
32: if(r->br == 0)
33: egbr(r);
34: ret = (wholee = eg_slowmatch(r->br, b, e, RE_BEG|RE_END)) != 0;
35: if(rb && ret){
36: rb[0] = wholeb;
37: re[0] = wholee;
38: for(i = 1; i < 10; i++){
39: rb[i] = start[i];
40: re[i] = rb[i]+len[i];
41: }
42: #ifdef DEBUG
43: if(TRACE(1)){
44: PR "eg_match groups:");
45: for(i = 0; i < 10; i++)
46: if(rb[i])PR " %d: %d@%d", i, rb[i], re[i]-rb[i]);
47: PR "\n");
48: }
49: #endif
50: }
51: #ifdef DEBUG
52: else {
53: if(TRACE(1)){
54: PR "eg_match groups: [%d - %d]\n", wholeb, wholee);
55: for(i = 1; i < 10; i++)
56: if(start[i])PR " %d: %d@%d", i, start[i], len[i]);
57: PR "\n");
58: }
59: }
60: #endif
61: } else
62: ret = eg_quickmatch(r, b, e, RE_BEG|RE_END) != 0;
63: return(ret);
64: }
65:
66: static unsigned char *
67: eg_slowmatch(Br *br, unsigned char *b, unsigned char *e, int endpts)
68: {
69: int i;
70: unsigned char *me, *end;
71: unsigned char *beg, *lbeg, *llbeg, *rbeg, *rend, *lm, *rm;
72: #ifdef DEBUG
73: char buf[EPRINTSIZE];
74: static id = 1;
75: int myid = id++;
76: #endif
77:
78: #define BOFF(x) ((x)? (endpts&~RE_BEG):endpts)
79: #define EOFF(x) ((x)? (endpts&~RE_END):endpts)
80:
81: if(br == 0) /* nothing to match - we won! */
82: return(b);
83: #ifdef DEBUG
84: if(TRACE(3))
85: PR "slowmatch(br=%d, [b,e]=%d,%d id=%d, endpt=%d)\n", br, b, e, myid, endpts);
86: #endif
87: switch(br->type)
88: {
89: case br_br:
90: i = br->group;
91: #ifdef DEBUG
92: if(TRACE(3))
93: PR "br[%d]: %d,%d b=%d,e=%d\n", i, (int)start[i], len[i], b, e);
94: #endif
95: if(start[i] == 0)
96: return(0);
97: if((len[i] > e-b) || memcmp((char *)b, (char *)start[i], len[i]))
98: return(0);
99: if(wholeb > b) wholeb = b;
100: #ifdef DEBUG
101: if(TRACE(3))
102: PR "br[%d]: matched\n", i);
103: #endif
104: return(b+len[i]);
105:
106: case br_re:
107: #ifdef DEBUG
108: if(TRACE(3)){
109: eg_epr(br->e, buf, 0);
110: PR "matching RE(%s)@%d against '", buf, br->r);
111: WR((char *)b, e-b);
112: PR "' id=%d\n", myid);
113: }
114: #endif
115: if((me = eg_lquickmatch(br->r, b, e, endpts)) == 0)
116: return(0);
117: #ifdef DEBUG
118: if(TRACE(3)){
119: PR "--%s matched '", buf);
120: WR((char *)b, me-b);
121: PR "'[%d %d] id=%d\n", (int)b, (int)me, myid);
122: }
123: #endif
124: if(wholeb > b)
125: wholeb = b;
126: return(me);
127:
128: case br_group:
129: #ifdef DEBUG
130: if(TRACE(3)){
131: PR "matching GROUP%d against '", br->group);
132: WR((char *)b, e-b);
133: PR "' id=%d\n", myid);
134: }
135: #endif
136: if((me = eg_slowmatch(br->lb, b, e, endpts)) == 0){
137: undobr(br->lb);
138: return(0);
139: }
140: #ifdef DEBUG
141: if(TRACE(3)){
142: PR "--G%d matched '", br->group);
143: WR((char *)b, me-b);
144: PR "'[%d %d]\n", (int)b, (int)me);
145: }
146: #endif
147: if(wholeb > b)
148: wholeb = b;
149: start[br->group] = b;
150: len[br->group] = me-b;
151: return(me);
152:
153: case br_quest:
154: #ifdef DEBUG
155: if(TRACE(3)){
156: PR "matching BR? against '", buf);
157: WR((char *)b, e-b);
158: PR "'\n");
159: }
160: #endif
161: if(lbeg = eg_slowmatch(br->lb, b, e, endpts)){
162: return(lbeg);
163: }
164: undobr(br->lb);
165: return(b);
166:
167: case br_plus:
168: #ifdef DEBUG
169: if(TRACE(3)){
170: PR "matching BR+ against '", buf);
171: WR((char *)b, e-b);
172: PR "' id=%d\n", myid);
173: }
174: #endif
175: if((lbeg = eg_slowmatch(br->lb, b, e, endpts)) == 0){
176: undobr(br->lb);
177: return(0);
178: }
179: llbeg = b;
180: while(beg = eg_slowmatch(br->lb, lbeg, e, BOFF(lbeg != b))){
181: llbeg = lbeg, lbeg = beg;
182: }
183: #ifdef DEBUG
184: if(TRACE(3)){
185: PR "--+ matched [%d %d]'", (int)llbeg, (int)lbeg);
186: WR((char *)llbeg, lbeg-llbeg);
187: PR "' id=%d\n", myid);
188: }
189: #endif
190: return(eg_slowmatch(br->lb, llbeg, e, BOFF(llbeg != b)));
191:
192: case br_star:
193: #ifdef DEBUG
194: if(TRACE(3)){
195: PR "matching BR* against '", buf);
196: WR((char *)b, e-b);
197: PR "'\n");
198: }
199: #endif
200: llbeg = lbeg = b;
201: while(beg = eg_slowmatch(br->lb, lbeg, e, BOFF(lbeg != b)))
202: llbeg = lbeg, lbeg = beg;
203: #ifdef DEBUG
204: if(TRACE(3)){
205: PR "--* matched '");
206: WR((char *)lbeg, lbeg-llbeg);
207: PR "'[%d %d]\n", (int)llbeg, (int)lbeg);
208: }
209: #endif
210: if(beg = eg_slowmatch(br->lb, llbeg, e, BOFF(llbeg != b)))
211: return(beg);
212: undobr(br->lb);
213: return(b);
214:
215: case br_cat:
216: #ifdef DEBUG
217: if(TRACE(3)){
218: PR "matching BRcat against '", buf);
219: WR((char *)b, e-b);
220: PR "' id=%d\n", myid);
221: }
222: #endif
223: /*
224: this is not so hard.
225: we try all possible matches of the left half,
226: and record the match that gave the longest
227: valid match on the right half
228: */
229: rend = 0;
230: for(end = e; b <= e; e = beg-1){
231: if((beg = eg_slowmatch(br->lb, b, e, EOFF(e != end))) == 0){
232: break;
233: }
234: #ifdef DEBUG
235: if(TRACE(3)){
236: PR "--cat matched '");
237: WR((char *)b, beg-b);
238: PR "'[%d %d] id=%d\n", (int)b, (int)beg, myid);
239: }
240: #endif
241: if((me = eg_slowmatch(br->rb, beg, end, BOFF(beg != b))) == 0){
242: continue; /* no match of right half */
243: }
244: #ifdef DEBUG
245: if(TRACE(3)){
246: PR "----cat matched '");
247: WR((char *)b, beg-b);
248: PR "'[%d %d] id=%d\n", (int)b, (int)beg, myid);
249: }
250: #endif
251: if(me > rend){
252: rend = me;
253: rbeg = beg;
254: #ifdef DEBUG
255: if(TRACE(3)){
256: PR "--++-- cat new max rb=%d re=%d\n", (int)rbeg, (int)rend);
257: }
258: #endif
259: }
260: }
261: if(rend == 0){
262: undobr(br->lb);
263: undobr(br->rb);
264: return(0);
265: }
266: (void)eg_slowmatch(br->lb, b, rbeg, EOFF(rbeg != end));
267: return(eg_slowmatch(br->rb, rbeg, end, BOFF(rbeg != b)));
268:
269: case br_alt:
270: #ifdef DEBUG
271: if(TRACE(3)){
272: PR "matching BR| against '", buf);
273: WR((char *)b, e-b);
274: PR "'\n");
275: }
276: #endif
277: if(lm = eg_slowmatch(br->lb, b, e, endpts)){
278: #ifdef DEBUG
279: if(TRACE(3)){
280: PR "--|L matched '");
281: WR((char *)b, lm-b);
282: PR "'[%d %d]\n", (int)b, (int)lm);
283: }
284: #endif
285: }
286: if(rm = eg_slowmatch(br->rb, b, e, endpts)){
287: #ifdef DEBUG
288: if(TRACE(3)){
289: PR "--|R matched '");
290: WR((char *)b, rm-b);
291: PR "'[%d %d]\n", (int)b, (int)rm);
292: }
293: #endif
294: }
295: if(lm > rm){
296: undobr(br->rb);
297: return(eg_slowmatch(br->lb, b, e, endpts));
298: } else {
299: if(rm == 0){
300: undobr(br->lb);
301: undobr(br->rb);
302: return(0);
303: } else {
304: undobr(br->lb);
305: return(beg);
306: }
307: }
308: }
309: abort();
310: return(0);
311: }
312:
313: static void
314: undobr(register Br *br)
315: {
316: switch(br->type)
317: {
318: case br_group:
319: start[br->group] = 0;
320: undobr(br->lb);
321: break;
322: case br_star:
323: case br_plus:
324: case br_quest:
325: undobr(br->lb);
326: break;
327: case br_cat:
328: case br_alt:
329: undobr(br->lb);
330: undobr(br->rb);
331: break;
332: }
333: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.