|
|
1.1 root 1: /* Extended regular expression matching and search.
2: Copyright (C) 1985 Free Software Foundation, Inc.
3:
4: This program is free software; you can redistribute it and/or modify
5: it under the terms of the GNU General Public License as published by
6: the Free Software Foundation; either version 1, or (at your option)
7: any later version.
8:
9: This program is distributed in the hope that it will be useful,
10: but WITHOUT ANY WARRANTY; without even the implied warranty of
11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12: GNU General Public License for more details.
13:
14: You should have received a copy of the GNU General Public License
15: along with this program; if not, write to the Free Software
16: Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17:
18: In other words, you are welcome to use, share and improve this program.
19: You are forbidden to forbid anyone else to use, share and improve
20: what you give them. Help stamp out software-hoarding! */
21:
22:
23: /* To test, compile with -Dtest.
24: This Dtestable feature turns this into a self-contained program
25: which reads a pattern, describes how it compiles,
26: then reads a string and searches for it. */
27:
28:
29: #ifdef emacs
30:
31: /* The `emacs' switch turns on certain special matching commands
32: that make sense only in emacs. */
33:
34: #include "config.h"
35: #include "lisp.h"
36: #include "buffer.h"
37: #include "syntax.h"
38:
39: #else /* not emacs */
40:
41: /*
42: * Define the syntax stuff, so we can do the \<...\> things.
43: */
44:
45: #ifndef Sword /* must be non-zero in some of the tests below... */
46: #define Sword 1
47: #endif
48:
49: #define SYNTAX(c) re_syntax_table[c]
50:
51: #ifdef SYNTAX_TABLE
52:
53: char *re_syntax_table;
54:
55: #else
56:
57: static char re_syntax_table[256];
58:
59: static void
60: init_syntax_once ()
61: {
62: register int c;
63: static int done = 0;
64:
65: if (done)
66: return;
67:
68: bzero (re_syntax_table, sizeof re_syntax_table);
69:
70: for (c = 'a'; c <= 'z'; c++)
71: re_syntax_table[c] = Sword;
72:
73: for (c = 'A'; c <= 'Z'; c++)
74: re_syntax_table[c] = Sword;
75:
76: for (c = '0'; c <= '9'; c++)
77: re_syntax_table[c] = Sword;
78:
79: done = 1;
80: }
81:
82: #endif /* SYNTAX_TABLE */
83: #endif /* not emacs */
84:
85: #include "regex.h"
86:
87: /* Number of failure points to allocate space for initially,
88: when matching. If this number is exceeded, more space is allocated,
89: so it is not a hard limit. */
90:
91: #ifndef NFAILURES
92: #define NFAILURES 80
93: #endif NFAILURES
94:
95: /* width of a byte in bits */
96:
97: #define BYTEWIDTH 8
98:
99: #ifndef SIGN_EXTEND_CHAR
100: #define SIGN_EXTEND_CHAR(x) (x)
101: #endif
102:
103: static int obscure_syntax = 0;
104:
105: /* Specify the precise syntax of regexp for compilation.
106: This provides for compatibility for various utilities
107: which historically have different, incompatible syntaxes.
108:
109: The argument SYNTAX is a bit-mask containing the two bits
110: RE_NO_BK_PARENS and RE_NO_BK_VBAR. */
111:
112: int
113: re_set_syntax (syntax)
114: {
115: int ret;
116:
117: ret = obscure_syntax;
118: obscure_syntax = syntax;
119: return ret;
120: }
121:
122: /* re_compile_pattern takes a regular-expression string
123: and converts it into a buffer full of byte commands for matching.
124:
125: PATTERN is the address of the pattern string
126: SIZE is the length of it.
127: BUFP is a struct re_pattern_buffer * which points to the info
128: on where to store the byte commands.
129: This structure contains a char * which points to the
130: actual space, which should have been obtained with malloc.
131: re_compile_pattern may use realloc to grow the buffer space.
132:
133: The number of bytes of commands can be found out by looking in
134: the struct re_pattern_buffer that bufp pointed to,
135: after re_compile_pattern returns.
136: */
137:
138: #define PATPUSH(ch) (*b++ = (char) (ch))
139:
140: #define PATFETCH(c) \
141: {if (p == pend) goto end_of_pattern; \
142: c = * (unsigned char *) p++; \
143: if (translate) c = translate[c]; }
144:
145: #define PATFETCH_RAW(c) \
146: {if (p == pend) goto end_of_pattern; \
147: c = * (unsigned char *) p++; }
148:
149: #define PATUNFETCH p--
150:
151: #define EXTEND_BUFFER \
152: { char *old_buffer = bufp->buffer; \
153: if (bufp->allocated == (1<<16)) goto too_big; \
154: bufp->allocated *= 2; \
155: if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
156: if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
157: goto memory_exhausted; \
158: c = bufp->buffer - old_buffer; \
159: b += c; \
160: if (fixup_jump) \
161: fixup_jump += c; \
162: if (laststart) \
163: laststart += c; \
164: begalt += c; \
165: if (pending_exact) \
166: pending_exact += c; \
167: }
168:
169: static int store_jump (), insert_jump ();
170:
171: char *
172: re_compile_pattern (pattern, size, bufp)
173: char *pattern;
174: int size;
175: struct re_pattern_buffer *bufp;
176: {
177: register char *b = bufp->buffer;
178: register char *p = pattern;
179: char *pend = pattern + size;
180: register unsigned c, c1;
181: char *p1;
182: unsigned char *translate = (unsigned char *) bufp->translate;
183:
184: /* address of the count-byte of the most recently inserted "exactn" command.
185: This makes it possible to tell whether a new exact-match character
186: can be added to that command or requires a new "exactn" command. */
187:
188: char *pending_exact = 0;
189:
190: /* address of the place where a forward-jump should go
191: to the end of the containing expression.
192: Each alternative of an "or", except the last, ends with a forward-jump
193: of this sort. */
194:
195: char *fixup_jump = 0;
196:
197: /* address of start of the most recently finished expression.
198: This tells postfix * where to find the start of its operand. */
199:
200: char *laststart = 0;
201:
202: /* In processing a repeat, 1 means zero matches is allowed */
203:
204: char zero_times_ok;
205:
206: /* In processing a repeat, 1 means many matches is allowed */
207:
208: char many_times_ok;
209:
210: /* address of beginning of regexp, or inside of last \( */
211:
212: char *begalt = b;
213:
214: /* Stack of information saved by \( and restored by \).
215: Four stack elements are pushed by each \(:
216: First, the value of b.
217: Second, the value of fixup_jump.
218: Third, the value of regnum.
219: Fourth, the value of begalt. */
220:
221: int stackb[40];
222: int *stackp = stackb;
223: int *stacke = stackb + 40;
224: int *stackt;
225:
226: /* Counts \('s as they are encountered. Remembered for the matching \),
227: where it becomes the "register number" to put in the stop_memory command */
228:
229: int regnum = 1;
230:
231: bufp->fastmap_accurate = 0;
232:
233: #ifndef emacs
234: #ifndef SYNTAX_TABLE
235: /*
236: * Initialize the syntax table.
237: */
238: init_syntax_once();
239: #endif
240: #endif
241:
242: if (bufp->allocated == 0)
243: {
244: bufp->allocated = 28;
245: if (bufp->buffer)
246: /* EXTEND_BUFFER loses when bufp->allocated is 0 */
247: bufp->buffer = (char *) realloc (bufp->buffer, 28);
248: else
249: /* Caller did not allocate a buffer. Do it for him */
250: bufp->buffer = (char *) malloc (28);
251: if (!bufp->buffer) goto memory_exhausted;
252: begalt = b = bufp->buffer;
253: }
254:
255: while (p != pend)
256: {
257: if (b - bufp->buffer > bufp->allocated - 10)
258: /* Note that EXTEND_BUFFER clobbers c */
259: EXTEND_BUFFER;
260:
261: PATFETCH (c);
262:
263: switch (c)
264: {
265: case '$':
266: if (obscure_syntax & RE_TIGHT_VBAR)
267: {
268: if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)
269: goto normal_char;
270: /* Make operand of last vbar end before this `$'. */
271: if (fixup_jump)
272: store_jump (fixup_jump, jump, b);
273: fixup_jump = 0;
274: PATPUSH (endline);
275: break;
276: }
277:
278: /* $ means succeed if at end of line, but only in special contexts.
279: If randomly in the middle of a pattern, it is a normal character. */
280: if (p == pend || *p == '\n'
281: || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
282: || (obscure_syntax & RE_NO_BK_PARENS
283: ? *p == ')'
284: : *p == '\\' && p[1] == ')')
285: || (obscure_syntax & RE_NO_BK_VBAR
286: ? *p == '|'
287: : *p == '\\' && p[1] == '|'))
288: {
289: PATPUSH (endline);
290: break;
291: }
292: goto normal_char;
293:
294: case '^':
295: /* ^ means succeed if at beg of line, but only if no preceding pattern. */
296:
297: if (laststart && p[-2] != '\n'
298: && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
299: goto normal_char;
300: if (obscure_syntax & RE_TIGHT_VBAR)
301: {
302: if (p != pattern + 1
303: && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
304: goto normal_char;
305: PATPUSH (begline);
306: begalt = b;
307: }
308: else
309: PATPUSH (begline);
310: break;
311:
312: case '+':
313: case '?':
314: if (obscure_syntax & RE_BK_PLUS_QM)
315: goto normal_char;
316: handle_plus:
317: case '*':
318: /* If there is no previous pattern, char not special. */
319: if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
320: goto normal_char;
321: /* If there is a sequence of repetition chars,
322: collapse it down to equivalent to just one. */
323: zero_times_ok = 0;
324: many_times_ok = 0;
325: while (1)
326: {
327: zero_times_ok |= c != '+';
328: many_times_ok |= c != '?';
329: if (p == pend)
330: break;
331: PATFETCH (c);
332: if (c == '*')
333: ;
334: else if (!(obscure_syntax & RE_BK_PLUS_QM)
335: && (c == '+' || c == '?'))
336: ;
337: else if ((obscure_syntax & RE_BK_PLUS_QM)
338: && c == '\\')
339: {
340: int c1;
341: PATFETCH (c1);
342: if (!(c1 == '+' || c1 == '?'))
343: {
344: PATUNFETCH;
345: PATUNFETCH;
346: break;
347: }
348: c = c1;
349: }
350: else
351: {
352: PATUNFETCH;
353: break;
354: }
355: }
356:
357: /* Star, etc. applied to an empty pattern is equivalent
358: to an empty pattern. */
359: if (!laststart)
360: break;
361:
362: /* Now we know whether 0 matches is allowed,
363: and whether 2 or more matches is allowed. */
364: if (many_times_ok)
365: {
366: /* If more than one repetition is allowed,
367: put in a backward jump at the end. */
368: store_jump (b, maybe_finalize_jump, laststart - 3);
369: b += 3;
370: }
371: insert_jump (on_failure_jump, laststart, b + 3, b);
372: pending_exact = 0;
373: b += 3;
374: if (!zero_times_ok)
375: {
376: /* At least one repetition required: insert before the loop
377: a skip over the initial on-failure-jump instruction */
378: insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
379: b += 3;
380: }
381: break;
382:
383: case '.':
384: laststart = b;
385: PATPUSH (anychar);
386: break;
387:
388: case '[':
389: while (b - bufp->buffer
390: > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
391: /* Note that EXTEND_BUFFER clobbers c */
392: EXTEND_BUFFER;
393:
394: laststart = b;
395: if (*p == '^')
396: PATPUSH (charset_not), p++;
397: else
398: PATPUSH (charset);
399: p1 = p;
400:
401: PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
402: /* Clear the whole map */
403: bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
404: /* Read in characters and ranges, setting map bits */
405: while (1)
406: {
407: PATFETCH (c);
408: if (c == ']' && p != p1 + 1) break;
409: if (*p == '-' && p[1] != ']')
410: {
411: PATFETCH (c1);
412: PATFETCH (c1);
413: if (translate)
414: while (c <= c1)
415: {
416: register unsigned char mapped_c = translate[c];
417: b[mapped_c / BYTEWIDTH] |= 1 << (mapped_c % BYTEWIDTH);
418: c++;
419: }
420: else
421: while (c <= c1)
422: b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
423: }
424: else
425: {
426: if (translate)
427: c = translate[c];
428: b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
429: }
430: }
431: /* Discard any bitmap bytes that are all 0 at the end of the map.
432: Decrement the map-length byte too. */
433: while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
434: b[-1]--;
435: b += b[-1];
436: break;
437:
438: case '(':
439: if (! (obscure_syntax & RE_NO_BK_PARENS))
440: goto normal_char;
441: else
442: goto handle_open;
443:
444: case ')':
445: if (! (obscure_syntax & RE_NO_BK_PARENS))
446: goto normal_char;
447: else
448: goto handle_close;
449:
450: case '\n':
451: if (! (obscure_syntax & RE_NEWLINE_OR))
452: goto normal_char;
453: else
454: goto handle_bar;
455:
456: case '|':
457: if (! (obscure_syntax & RE_NO_BK_VBAR))
458: goto normal_char;
459: else
460: goto handle_bar;
461:
462: case '\\':
463: if (p == pend) goto invalid_pattern;
464: PATFETCH_RAW (c);
465: switch (c)
466: {
467: case '(':
468: if (obscure_syntax & RE_NO_BK_PARENS)
469: goto normal_backsl;
470: handle_open:
471: if (stackp == stacke) goto nesting_too_deep;
472: if (regnum < RE_NREGS)
473: {
474: PATPUSH (start_memory);
475: PATPUSH (regnum);
476: }
477: *stackp++ = b - bufp->buffer;
478: *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
479: *stackp++ = regnum++;
480: *stackp++ = begalt - bufp->buffer;
481: fixup_jump = 0;
482: laststart = 0;
483: begalt = b;
484: break;
485:
486: case ')':
487: if (obscure_syntax & RE_NO_BK_PARENS)
488: goto normal_backsl;
489: handle_close:
490: if (stackp == stackb) goto unmatched_close;
491: begalt = *--stackp + bufp->buffer;
492: if (fixup_jump)
493: store_jump (fixup_jump, jump, b);
494: if (stackp[-1] < RE_NREGS)
495: {
496: PATPUSH (stop_memory);
497: PATPUSH (stackp[-1]);
498: }
499: stackp -= 2;
500: fixup_jump = 0;
501: if (*stackp)
502: fixup_jump = *stackp + bufp->buffer - 1;
503: laststart = *--stackp + bufp->buffer;
504: break;
505:
506: case '|':
507: if (obscure_syntax & RE_NO_BK_VBAR)
508: goto normal_backsl;
509: handle_bar:
510: insert_jump (on_failure_jump, begalt, b + 6, b);
511: pending_exact = 0;
512: b += 3;
513: if (fixup_jump)
514: store_jump (fixup_jump, jump, b);
515: fixup_jump = b;
516: b += 3;
517: laststart = 0;
518: begalt = b;
519: break;
520:
521: #ifdef emacs
522: case '=':
523: PATPUSH (at_dot);
524: break;
525:
526: case 's':
527: laststart = b;
528: PATPUSH (syntaxspec);
529: PATFETCH (c);
530: PATPUSH (syntax_spec_code[c]);
531: break;
532:
533: case 'S':
534: laststart = b;
535: PATPUSH (notsyntaxspec);
536: PATFETCH (c);
537: PATPUSH (syntax_spec_code[c]);
538: break;
539: #endif emacs
540:
541: case 'w':
542: laststart = b;
543: PATPUSH (wordchar);
544: break;
545:
546: case 'W':
547: laststart = b;
548: PATPUSH (notwordchar);
549: break;
550:
551: case '<':
552: PATPUSH (wordbeg);
553: break;
554:
555: case '>':
556: PATPUSH (wordend);
557: break;
558:
559: case 'b':
560: PATPUSH (wordbound);
561: break;
562:
563: case 'B':
564: PATPUSH (notwordbound);
565: break;
566:
567: case '`':
568: PATPUSH (begbuf);
569: break;
570:
571: case '\'':
572: PATPUSH (endbuf);
573: break;
574:
575: case '1':
576: case '2':
577: case '3':
578: case '4':
579: case '5':
580: case '6':
581: case '7':
582: case '8':
583: case '9':
584: c1 = c - '0';
585: if (c1 >= regnum)
586: goto normal_char;
587: for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
588: if (*stackt == c1)
589: goto normal_char;
590: laststart = b;
591: PATPUSH (duplicate);
592: PATPUSH (c1);
593: break;
594:
595: case '+':
596: case '?':
597: if (obscure_syntax & RE_BK_PLUS_QM)
598: goto handle_plus;
599:
600: default:
601: normal_backsl:
602: /* You might think it would be useful for \ to mean
603: not to translate; but if we don't translate it
604: it will never match anything. */
605: if (translate) c = translate[c];
606: goto normal_char;
607: }
608: break;
609:
610: default:
611: normal_char:
612: if (!pending_exact || pending_exact + *pending_exact + 1 != b
613: || *pending_exact == 0177 || *p == '*' || *p == '^'
614: || ((obscure_syntax & RE_BK_PLUS_QM)
615: ? *p == '\\' && (p[1] == '+' || p[1] == '?')
616: : (*p == '+' || *p == '?')))
617: {
618: laststart = b;
619: PATPUSH (exactn);
620: pending_exact = b;
621: PATPUSH (0);
622: }
623: PATPUSH (c);
624: (*pending_exact)++;
625: }
626: }
627:
628: if (fixup_jump)
629: store_jump (fixup_jump, jump, b);
630:
631: if (stackp != stackb) goto unmatched_open;
632:
633: bufp->used = b - bufp->buffer;
634: return 0;
635:
636: invalid_pattern:
637: return "Invalid regular expression";
638:
639: unmatched_open:
640: return "Unmatched \\(";
641:
642: unmatched_close:
643: return "Unmatched \\)";
644:
645: end_of_pattern:
646: return "Premature end of regular expression";
647:
648: nesting_too_deep:
649: return "Nesting too deep";
650:
651: too_big:
652: return "Regular expression too big";
653:
654: memory_exhausted:
655: return "Memory exhausted";
656: }
657:
658: /* Store where `from' points a jump operation to jump to where `to' points.
659: `opcode' is the opcode to store. */
660:
661: static int
662: store_jump (from, opcode, to)
663: char *from, *to;
664: char opcode;
665: {
666: from[0] = opcode;
667: from[1] = (to - (from + 3)) & 0377;
668: from[2] = (to - (from + 3)) >> 8;
669: }
670:
671: /* Open up space at char FROM, and insert there a jump to TO.
672: CURRENT_END gives te end of the storage no in use,
673: so we know how much data to copy up.
674: OP is the opcode of the jump to insert.
675:
676: If you call this function, you must zero out pending_exact. */
677:
678: static int
679: insert_jump (op, from, to, current_end)
680: char op;
681: char *from, *to, *current_end;
682: {
683: register char *pto = current_end + 3;
684: register char *pfrom = current_end;
685: while (pfrom != from)
686: *--pto = *--pfrom;
687: store_jump (from, op, to);
688: }
689:
690: /* Given a pattern, compute a fastmap from it.
691: The fastmap records which of the (1 << BYTEWIDTH) possible characters
692: can start a string that matches the pattern.
693: This fastmap is used by re_search to skip quickly over totally implausible text.
694:
695: The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
696: as bufp->fastmap.
697: The other components of bufp describe the pattern to be used. */
698:
699: void
700: re_compile_fastmap (bufp)
701: struct re_pattern_buffer *bufp;
702: {
703: unsigned char *pattern = (unsigned char *) bufp->buffer;
704: int size = bufp->used;
705: register char *fastmap = bufp->fastmap;
706: register unsigned char *p = pattern;
707: register unsigned char *pend = pattern + size;
708: register int j, k;
709: unsigned char *translate = (unsigned char *) bufp->translate;
710:
711: unsigned char *stackb[NFAILURES];
712: unsigned char **stackp = stackb;
713:
714: bzero (fastmap, (1 << BYTEWIDTH));
715: bufp->fastmap_accurate = 1;
716: bufp->can_be_null = 0;
717:
718: while (p)
719: {
720: if (p == pend)
721: {
722: bufp->can_be_null = 1;
723: break;
724: }
725: #ifdef SWITCH_ENUM_BUG
726: switch ((int) ((enum regexpcode) *p++))
727: #else
728: switch ((enum regexpcode) *p++)
729: #endif
730: {
731: case exactn:
732: if (translate)
733: fastmap[translate[p[1]]] = 1;
734: else
735: fastmap[p[1]] = 1;
736: break;
737:
738: case begline:
739: case before_dot:
740: case at_dot:
741: case after_dot:
742: case begbuf:
743: case endbuf:
744: case wordbound:
745: case notwordbound:
746: case wordbeg:
747: case wordend:
748: continue;
749:
750: case endline:
751: if (translate)
752: fastmap[translate['\n']] = 1;
753: else
754: fastmap['\n'] = 1;
755: if (bufp->can_be_null != 1)
756: bufp->can_be_null = 2;
757: break;
758:
759: case finalize_jump:
760: case maybe_finalize_jump:
761: case jump:
762: case dummy_failure_jump:
763: bufp->can_be_null = 1;
764: j = *p++ & 0377;
765: j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
766: p += j + 1; /* The 1 compensates for missing ++ above */
767: if (j > 0)
768: continue;
769: /* Jump backward reached implies we just went through
770: the body of a loop and matched nothing.
771: Opcode jumped to should be an on_failure_jump.
772: Just treat it like an ordinary jump.
773: For a * loop, it has pushed its failure point already;
774: if so, discard that as redundant. */
775: if ((enum regexpcode) *p != on_failure_jump)
776: continue;
777: p++;
778: j = *p++ & 0377;
779: j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
780: p += j + 1; /* The 1 compensates for missing ++ above */
781: if (stackp != stackb && *stackp == p)
782: stackp--;
783: continue;
784:
785: case on_failure_jump:
786: j = *p++ & 0377;
787: j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
788: p++;
789: *++stackp = p + j;
790: continue;
791:
792: case start_memory:
793: case stop_memory:
794: p++;
795: continue;
796:
797: case duplicate:
798: bufp->can_be_null = 1;
799: fastmap['\n'] = 1;
800: case anychar:
801: for (j = 0; j < (1 << BYTEWIDTH); j++)
802: if (j != '\n')
803: fastmap[j] = 1;
804: if (bufp->can_be_null)
805: return;
806: /* Don't return; check the alternative paths
807: so we can set can_be_null if appropriate. */
808: break;
809:
810: case wordchar:
811: for (j = 0; j < (1 << BYTEWIDTH); j++)
812: if (SYNTAX (j) == Sword)
813: fastmap[j] = 1;
814: break;
815:
816: case notwordchar:
817: for (j = 0; j < (1 << BYTEWIDTH); j++)
818: if (SYNTAX (j) != Sword)
819: fastmap[j] = 1;
820: break;
821:
822: #ifdef emacs
823: case syntaxspec:
824: k = *p++;
825: for (j = 0; j < (1 << BYTEWIDTH); j++)
826: if (SYNTAX (j) == (enum syntaxcode) k)
827: fastmap[j] = 1;
828: break;
829:
830: case notsyntaxspec:
831: k = *p++;
832: for (j = 0; j < (1 << BYTEWIDTH); j++)
833: if (SYNTAX (j) != (enum syntaxcode) k)
834: fastmap[j] = 1;
835: break;
836: #endif emacs
837:
838: case charset:
839: for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
840: if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
841: {
842: if (translate)
843: fastmap[translate[j]] = 1;
844: else
845: fastmap[j] = 1;
846: }
847: break;
848:
849: case charset_not:
850: /* Chars beyond end of map must be allowed */
851: for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
852: if (translate)
853: fastmap[translate[j]] = 1;
854: else
855: fastmap[j] = 1;
856:
857: for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
858: if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
859: {
860: if (translate)
861: fastmap[translate[j]] = 1;
862: else
863: fastmap[j] = 1;
864: }
865: break;
866: }
867:
868: /* Get here means we have successfully found the possible starting characters
869: of one path of the pattern. We need not follow this path any farther.
870: Instead, look at the next alternative remembered in the stack. */
871: if (stackp != stackb)
872: p = *stackp--;
873: else
874: break;
875: }
876: }
877:
878: /* Like re_search_2, below, but only one string is specified. */
879:
880: int
881: re_search (pbufp, string, size, startpos, range, regs)
882: struct re_pattern_buffer *pbufp;
883: char *string;
884: int size, startpos, range;
885: struct re_registers *regs;
886: {
887: return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
888: }
889:
890: /* Like re_match_2 but tries first a match starting at index STARTPOS,
891: then at STARTPOS + 1, and so on.
892: RANGE is the number of places to try before giving up.
893: If RANGE is negative, the starting positions tried are
894: STARTPOS, STARTPOS - 1, etc.
895: It is up to the caller to make sure that range is not so large
896: as to take the starting position outside of the input strings.
897:
898: The value returned is the position at which the match was found,
899: or -1 if no match was found,
900: or -2 if error (such as failure stack overflow). */
901:
902: int
903: re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
904: struct re_pattern_buffer *pbufp;
905: char *string1, *string2;
906: int size1, size2;
907: int startpos;
908: register int range;
909: struct re_registers *regs;
910: int mstop;
911: {
912: register char *fastmap = pbufp->fastmap;
913: register unsigned char *translate = (unsigned char *) pbufp->translate;
914: int total = size1 + size2;
915: int val;
916:
917: /* Update the fastmap now if not correct already */
918: if (fastmap && !pbufp->fastmap_accurate)
919: re_compile_fastmap (pbufp);
920:
921: /* Don't waste time in a long search for a pattern
922: that says it is anchored. */
923: if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
924: && range > 0)
925: {
926: if (startpos > 0)
927: return -1;
928: else
929: range = 1;
930: }
931:
932: while (1)
933: {
934: /* If a fastmap is supplied, skip quickly over characters
935: that cannot possibly be the start of a match.
936: Note, however, that if the pattern can possibly match
937: the null string, we must test it at each starting point
938: so that we take the first null string we get. */
939:
940: if (fastmap && startpos < total && pbufp->can_be_null != 1)
941: {
942: if (range > 0)
943: {
944: register int lim = 0;
945: register unsigned char *p;
946: int irange = range;
947: if (startpos < size1 && startpos + range >= size1)
948: lim = range - (size1 - startpos);
949:
950: p = ((unsigned char *)
951: &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
952:
953: if (translate)
954: {
955: while (range > lim && !fastmap[translate[*p++]])
956: range--;
957: }
958: else
959: {
960: while (range > lim && !fastmap[*p++])
961: range--;
962: }
963: startpos += irange - range;
964: }
965: else
966: {
967: register unsigned char c;
968: if (startpos >= size1)
969: c = string2[startpos - size1];
970: else
971: c = string1[startpos];
972: c &= 0xff;
973: if (translate ? !fastmap[translate[c]] : !fastmap[c])
974: goto advance;
975: }
976: }
977:
978: if (range >= 0 && startpos == total
979: && fastmap && pbufp->can_be_null == 0)
980: return -1;
981:
982: val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs,
983: mstop);
984: /* Propagate error indication if worse than mere failure. */
985: if (val == -2)
986: return -2;
987: /* Return position on success. */
988: if (0 <= val)
989: return startpos;
990:
991: #ifdef C_ALLOCA
992: alloca (0);
993: #endif /* C_ALLOCA */
994:
995: advance:
996: if (!range) break;
997: if (range > 0) range--, startpos++; else range++, startpos--;
998: }
999: return -1;
1000: }
1001:
1002: #ifndef emacs /* emacs never uses this */
1003: int
1004: re_match (pbufp, string, size, pos, regs)
1005: struct re_pattern_buffer *pbufp;
1006: char *string;
1007: int size, pos;
1008: struct re_registers *regs;
1009: {
1010: return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
1011: }
1012: #endif /* emacs */
1013:
1014: /* Maximum size of failure stack. Beyond this, overflow is an error. */
1015:
1016: int re_max_failures = 2000;
1017:
1018: static int bcmp_translate();
1019: /* Match the pattern described by PBUFP
1020: against data which is the virtual concatenation of STRING1 and STRING2.
1021: SIZE1 and SIZE2 are the sizes of the two data strings.
1022: Start the match at position POS.
1023: Do not consider matching past the position MSTOP.
1024:
1025: If pbufp->fastmap is nonzero, then it had better be up to date.
1026:
1027: The reason that the data to match are specified as two components
1028: which are to be regarded as concatenated
1029: is so this function can be used directly on the contents of an Emacs buffer.
1030:
1031: -1 is returned if there is no match. -2 is returned if there is
1032: an error (such as match stack overflow). Otherwise the value is the length
1033: of the substring which was matched. */
1034:
1035: int
1036: re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
1037: struct re_pattern_buffer *pbufp;
1038: unsigned char *string1, *string2;
1039: int size1, size2;
1040: int pos;
1041: struct re_registers *regs;
1042: int mstop;
1043: {
1044: register unsigned char *p = (unsigned char *) pbufp->buffer;
1045: register unsigned char *pend = p + pbufp->used;
1046: /* End of first string */
1047: unsigned char *end1;
1048: /* End of second string */
1049: unsigned char *end2;
1050: /* Pointer just past last char to consider matching */
1051: unsigned char *end_match_1, *end_match_2;
1052: register unsigned char *d, *dend;
1053: register int mcnt;
1054: unsigned char *translate = (unsigned char *) pbufp->translate;
1055:
1056: /* Failure point stack. Each place that can handle a failure further down the line
1057: pushes a failure point on this stack. It consists of two char *'s.
1058: The first one pushed is where to resume scanning the pattern;
1059: the second pushed is where to resume scanning the strings.
1060: If the latter is zero, the failure point is a "dummy".
1061: If a failure happens and the innermost failure point is dormant,
1062: it discards that failure point and tries the next one. */
1063:
1064: unsigned char *initial_stack[2 * NFAILURES];
1065: unsigned char **stackb = initial_stack;
1066: unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
1067:
1068: /* Information on the "contents" of registers.
1069: These are pointers into the input strings; they record
1070: just what was matched (on this attempt) by some part of the pattern.
1071: The start_memory command stores the start of a register's contents
1072: and the stop_memory command stores the end.
1073:
1074: At that point, regstart[regnum] points to the first character in the register,
1075: regend[regnum] points to the first character beyond the end of the register,
1076: regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
1077: and regend_seg1[regnum] is true iff regend[regnum] points into string1. */
1078:
1079: unsigned char *regstart[RE_NREGS];
1080: unsigned char *regend[RE_NREGS];
1081: unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
1082:
1083: /* Set up pointers to ends of strings.
1084: Don't allow the second string to be empty unless both are empty. */
1085: if (!size2)
1086: {
1087: string2 = string1;
1088: size2 = size1;
1089: string1 = 0;
1090: size1 = 0;
1091: }
1092: end1 = string1 + size1;
1093: end2 = string2 + size2;
1094:
1095: /* Compute where to stop matching, within the two strings */
1096: if (mstop <= size1)
1097: {
1098: end_match_1 = string1 + mstop;
1099: end_match_2 = string2;
1100: }
1101: else
1102: {
1103: end_match_1 = end1;
1104: end_match_2 = string2 + mstop - size1;
1105: }
1106:
1107: /* Initialize \) text positions to -1
1108: to mark ones that no \( or \) has been seen for. */
1109:
1110: for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++)
1111: regend[mcnt] = (unsigned char *) -1;
1112:
1113: /* `p' scans through the pattern as `d' scans through the data.
1114: `dend' is the end of the input string that `d' points within.
1115: `d' is advanced into the following input string whenever necessary,
1116: but this happens before fetching;
1117: therefore, at the beginning of the loop,
1118: `d' can be pointing at the end of a string,
1119: but it cannot equal string2. */
1120:
1121: if (pos <= size1)
1122: d = string1 + pos, dend = end_match_1;
1123: else
1124: d = string2 + pos - size1, dend = end_match_2;
1125:
1126: /* Write PREFETCH; just before fetching a character with *d. */
1127: #define PREFETCH \
1128: while (d == dend) \
1129: { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \
1130: d = string2; /* end of string1 => advance to string2. */ \
1131: dend = end_match_2; }
1132:
1133: /* This loop loops over pattern commands.
1134: It exits by returning from the function if match is complete,
1135: or it drops through if match fails at this starting point in the input data. */
1136:
1137: while (1)
1138: {
1139: if (p == pend)
1140: /* End of pattern means we have succeeded! */
1141: {
1142: /* If caller wants register contents data back, convert it to indices */
1143: if (regs)
1144: {
1145: regs->start[0] = pos;
1146: if (dend == end_match_1)
1147: regs->end[0] = d - string1;
1148: else
1149: regs->end[0] = d - string2 + size1;
1150: for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
1151: {
1152: if (regend[mcnt] == (unsigned char *) -1)
1153: {
1154: regs->start[mcnt] = -1;
1155: regs->end[mcnt] = -1;
1156: continue;
1157: }
1158: if (regstart_seg1[mcnt])
1159: regs->start[mcnt] = regstart[mcnt] - string1;
1160: else
1161: regs->start[mcnt] = regstart[mcnt] - string2 + size1;
1162: if (regend_seg1[mcnt])
1163: regs->end[mcnt] = regend[mcnt] - string1;
1164: else
1165: regs->end[mcnt] = regend[mcnt] - string2 + size1;
1166: }
1167: }
1168: if (dend == end_match_1)
1169: return (d - string1 - pos);
1170: else
1171: return d - string2 + size1 - pos;
1172: }
1173:
1174: /* Otherwise match next pattern command */
1175: #ifdef SWITCH_ENUM_BUG
1176: switch ((int) ((enum regexpcode) *p++))
1177: #else
1178: switch ((enum regexpcode) *p++)
1179: #endif
1180: {
1181:
1182: /* \( is represented by a start_memory, \) by a stop_memory.
1183: Both of those commands contain a "register number" argument.
1184: The text matched within the \( and \) is recorded under that number.
1185: Then, \<digit> turns into a `duplicate' command which
1186: is followed by the numeric value of <digit> as the register number. */
1187:
1188: case start_memory:
1189: regstart[*p] = d;
1190: regstart_seg1[*p++] = (dend == end_match_1);
1191: break;
1192:
1193: case stop_memory:
1194: regend[*p] = d;
1195: regend_seg1[*p++] = (dend == end_match_1);
1196: break;
1197:
1198: case duplicate:
1199: {
1200: int regno = *p++; /* Get which register to match against */
1201: register unsigned char *d2, *dend2;
1202:
1203: /* Don't allow matching a register that hasn't been used.
1204: This isn't fully reliable in the current version,
1205: but it is better than crashing. */
1206: if ((int) regend[regno] == -1)
1207: goto fail;
1208:
1209: d2 = regstart[regno];
1210: dend2 = ((regstart_seg1[regno] == regend_seg1[regno])
1211: ? regend[regno] : end_match_1);
1212: while (1)
1213: {
1214: /* Advance to next segment in register contents, if necessary */
1215: while (d2 == dend2)
1216: {
1217: if (dend2 == end_match_2) break;
1218: if (dend2 == regend[regno]) break;
1219: d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
1220: }
1221: /* At end of register contents => success */
1222: if (d2 == dend2) break;
1223:
1224: /* Advance to next segment in data being matched, if necessary */
1225: PREFETCH;
1226:
1227: /* mcnt gets # consecutive chars to compare */
1228: mcnt = dend - d;
1229: if (mcnt > dend2 - d2)
1230: mcnt = dend2 - d2;
1231: /* Compare that many; failure if mismatch, else skip them. */
1232: if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
1233: goto fail;
1234: d += mcnt, d2 += mcnt;
1235: }
1236: }
1237: break;
1238:
1239: case anychar:
1240: /* fetch a data character */
1241: PREFETCH;
1242: /* Match anything but a newline. */
1243: if ((translate ? translate[*d++] : *d++) == '\n')
1244: goto fail;
1245: break;
1246:
1247: case charset:
1248: case charset_not:
1249: {
1250: /* Nonzero for charset_not */
1251: int not = 0;
1252: register int c;
1253: if (*(p - 1) == (unsigned char) charset_not)
1254: not = 1;
1255:
1256: /* fetch a data character */
1257: PREFETCH;
1258:
1259: if (translate)
1260: c = translate [*d];
1261: else
1262: c = *d;
1263:
1264: if (c < *p * BYTEWIDTH
1265: && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1266: not = !not;
1267:
1268: p += 1 + *p;
1269:
1270: if (!not) goto fail;
1271: d++;
1272: break;
1273: }
1274:
1275: case begline:
1276: if (d == string1 || d[-1] == '\n')
1277: break;
1278: goto fail;
1279:
1280: case endline:
1281: if (d == end2
1282: || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
1283: break;
1284: goto fail;
1285:
1286: /* "or" constructs ("|") are handled by starting each alternative
1287: with an on_failure_jump that points to the start of the next alternative.
1288: Each alternative except the last ends with a jump to the joining point.
1289: (Actually, each jump except for the last one really jumps
1290: to the following jump, because tensioning the jumps is a hassle.) */
1291:
1292: /* The start of a stupid repeat has an on_failure_jump that points
1293: past the end of the repeat text.
1294: This makes a failure point so that, on failure to match a repetition,
1295: matching restarts past as many repetitions have been found
1296: with no way to fail and look for another one. */
1297:
1298: /* A smart repeat is similar but loops back to the on_failure_jump
1299: so that each repetition makes another failure point. */
1300:
1301: case on_failure_jump:
1302: if (stackp == stacke)
1303: {
1304: unsigned char **stackx;
1305: if (stacke - stackb > re_max_failures)
1306: return -2;
1307: stackx = (unsigned char **) alloca (2 * (stacke - stackb)
1308: * sizeof (char *));
1309: bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
1310: stackp = stackx + (stackp - stackb);
1311: stacke = stackx + 2 * (stacke - stackb);
1312: stackb = stackx;
1313: }
1314: mcnt = *p++ & 0377;
1315: mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1316: p++;
1317: *stackp++ = mcnt + p;
1318: *stackp++ = d;
1319: break;
1320:
1321: /* The end of a smart repeat has an maybe_finalize_jump back.
1322: Change it either to a finalize_jump or an ordinary jump. */
1323:
1324: case maybe_finalize_jump:
1325: mcnt = *p++ & 0377;
1326: mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1327: p++;
1328: /* Compare what follows with the begining of the repeat.
1329: If we can establish that there is nothing that they would
1330: both match, we can change to finalize_jump */
1331: if (p == pend)
1332: p[-3] = (unsigned char) finalize_jump;
1333: else if (*p == (unsigned char) exactn
1334: || *p == (unsigned char) endline)
1335: {
1336: register int c = *p == (unsigned char) endline ? '\n' : p[2];
1337: register unsigned char *p1 = p + mcnt;
1338: /* p1[0] ... p1[2] are an on_failure_jump.
1339: Examine what follows that */
1340: if (p1[3] == (unsigned char) exactn && p1[5] != c)
1341: p[-3] = (unsigned char) finalize_jump;
1342: else if (p1[3] == (unsigned char) charset
1343: || p1[3] == (unsigned char) charset_not)
1344: {
1345: int not = p1[3] == (unsigned char) charset_not;
1346: if (c < p1[4] * BYTEWIDTH
1347: && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1348: not = !not;
1349: /* not is 1 if c would match */
1350: /* That means it is not safe to finalize */
1351: if (!not)
1352: p[-3] = (unsigned char) finalize_jump;
1353: }
1354: }
1355: p -= 2;
1356: if (p[-1] != (unsigned char) finalize_jump)
1357: {
1358: p[-1] = (unsigned char) jump;
1359: goto nofinalize;
1360: }
1361:
1362: /* The end of a stupid repeat has a finalize-jump
1363: back to the start, where another failure point will be made
1364: which will point after all the repetitions found so far. */
1365:
1366: case finalize_jump:
1367: stackp -= 2;
1368:
1369: case jump:
1370: nofinalize:
1371: mcnt = *p++ & 0377;
1372: mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1373: p += mcnt + 1; /* The 1 compensates for missing ++ above */
1374: break;
1375:
1376: case dummy_failure_jump:
1377: if (stackp == stacke)
1378: {
1379: unsigned char **stackx
1380: = (unsigned char **) alloca (2 * (stacke - stackb)
1381: * sizeof (char *));
1382: bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
1383: stackp = stackx + (stackp - stackb);
1384: stacke = stackx + 2 * (stacke - stackb);
1385: stackb = stackx;
1386: }
1387: *stackp++ = 0;
1388: *stackp++ = 0;
1389: goto nofinalize;
1390:
1391: case wordbound:
1392: if (d == string1 /* Points to first char */
1393: || d == end2 /* Points to end */
1394: || (d == end1 && size2 == 0)) /* Points to end */
1395: break;
1396: if ((SYNTAX (d[-1]) == Sword)
1397: != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1398: break;
1399: goto fail;
1400:
1401: case notwordbound:
1402: if (d == string1 /* Points to first char */
1403: || d == end2 /* Points to end */
1404: || (d == end1 && size2 == 0)) /* Points to end */
1405: goto fail;
1406: if ((SYNTAX (d[-1]) == Sword)
1407: != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1408: goto fail;
1409: break;
1410:
1411: case wordbeg:
1412: if (d == end2 /* Points to end */
1413: || (d == end1 && size2 == 0) /* Points to end */
1414: || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
1415: goto fail;
1416: if (d == string1 /* Points to first char */
1417: || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1418: break;
1419: goto fail;
1420:
1421: case wordend:
1422: if (d == string1 /* Points to first char */
1423: || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1424: goto fail;
1425: if (d == end2 /* Points to end */
1426: || (d == end1 && size2 == 0) /* Points to end */
1427: || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
1428: break;
1429: goto fail;
1430:
1431: #ifdef emacs
1432: case before_dot:
1433: if (PTR_CHAR_POS (d) + 1 >= point)
1434: goto fail;
1435: break;
1436:
1437: case at_dot:
1438: if (PTR_CHAR_POS (d) + 1 != point)
1439: goto fail;
1440: break;
1441:
1442: case after_dot:
1443: if (PTR_CHAR_POS (d) + 1 <= point)
1444: goto fail;
1445: break;
1446:
1447: case wordchar:
1448: mcnt = (int) Sword;
1449: goto matchsyntax;
1450:
1451: case syntaxspec:
1452: mcnt = *p++;
1453: matchsyntax:
1454: PREFETCH;
1455: if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
1456: break;
1457:
1458: case notwordchar:
1459: mcnt = (int) Sword;
1460: goto matchnotsyntax;
1461:
1462: case notsyntaxspec:
1463: mcnt = *p++;
1464: matchnotsyntax:
1465: PREFETCH;
1466: if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
1467: break;
1468: #else
1469: case wordchar:
1470: PREFETCH;
1471: if (SYNTAX (*d++) == 0) goto fail;
1472: break;
1473:
1474: case notwordchar:
1475: PREFETCH;
1476: if (SYNTAX (*d++) != 0) goto fail;
1477: break;
1478: #endif not emacs
1479:
1480: case begbuf:
1481: if (d == string1) /* Note, d cannot equal string2 */
1482: break; /* unless string1 == string2. */
1483: goto fail;
1484:
1485: case endbuf:
1486: if (d == end2 || (d == end1 && size2 == 0))
1487: break;
1488: goto fail;
1489:
1490: case exactn:
1491: /* Match the next few pattern characters exactly.
1492: mcnt is how many characters to match. */
1493: mcnt = *p++;
1494: if (translate)
1495: {
1496: do
1497: {
1498: PREFETCH;
1499: if (translate[*d++] != *p++) goto fail;
1500: }
1501: while (--mcnt);
1502: }
1503: else
1504: {
1505: do
1506: {
1507: PREFETCH;
1508: if (*d++ != *p++) goto fail;
1509: }
1510: while (--mcnt);
1511: }
1512: break;
1513: }
1514: continue; /* Successfully matched one pattern command; keep matching */
1515:
1516: /* Jump here if any matching operation fails. */
1517: fail:
1518: if (stackp != stackb)
1519: /* A restart point is known. Restart there and pop it. */
1520: {
1521: if (!stackp[-2])
1522: { /* If innermost failure point is dormant, flush it and keep looking */
1523: stackp -= 2;
1524: goto fail;
1525: }
1526: d = *--stackp;
1527: p = *--stackp;
1528: if (d >= string1 && d <= end1)
1529: dend = end_match_1;
1530: }
1531: else break; /* Matching at this starting point really fails! */
1532: }
1533: return -1; /* Failure to match */
1534: }
1535:
1536: static int
1537: bcmp_translate (s1, s2, len, translate)
1538: unsigned char *s1, *s2;
1539: register int len;
1540: unsigned char *translate;
1541: {
1542: register unsigned char *p1 = s1, *p2 = s2;
1543: while (len)
1544: {
1545: if (translate [*p1++] != translate [*p2++]) return 1;
1546: len--;
1547: }
1548: return 0;
1549: }
1550:
1551: /* Entry points compatible with bsd4.2 regex library */
1552:
1553: #ifndef emacs
1554:
1555: static struct re_pattern_buffer re_comp_buf;
1556:
1557: char *
1558: re_comp (s)
1559: char *s;
1560: {
1561: if (!s)
1562: {
1563: if (!re_comp_buf.buffer)
1564: return "No previous regular expression";
1565: return 0;
1566: }
1567:
1568: if (!re_comp_buf.buffer)
1569: {
1570: if (!(re_comp_buf.buffer = (char *) malloc (200)))
1571: return "Memory exhausted";
1572: re_comp_buf.allocated = 200;
1573: if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
1574: return "Memory exhausted";
1575: }
1576: return re_compile_pattern (s, strlen (s), &re_comp_buf);
1577: }
1578:
1579: int
1580: re_exec (s)
1581: char *s;
1582: {
1583: int len = strlen (s);
1584: return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
1585: }
1586:
1587: #endif /* emacs */
1588:
1589: #ifdef test
1590:
1591: #include <stdio.h>
1592:
1593: /* Indexed by a character, gives the upper case equivalent of the character */
1594:
1595: static char upcase[0400] =
1596: { 000, 001, 002, 003, 004, 005, 006, 007,
1597: 010, 011, 012, 013, 014, 015, 016, 017,
1598: 020, 021, 022, 023, 024, 025, 026, 027,
1599: 030, 031, 032, 033, 034, 035, 036, 037,
1600: 040, 041, 042, 043, 044, 045, 046, 047,
1601: 050, 051, 052, 053, 054, 055, 056, 057,
1602: 060, 061, 062, 063, 064, 065, 066, 067,
1603: 070, 071, 072, 073, 074, 075, 076, 077,
1604: 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1605: 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1606: 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1607: 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
1608: 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1609: 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1610: 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1611: 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
1612: 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
1613: 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
1614: 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
1615: 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
1616: 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
1617: 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1618: 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
1619: 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
1620: 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
1621: 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
1622: 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
1623: 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1624: 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
1625: 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
1626: 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
1627: 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
1628: };
1629:
1630: main (argc, argv)
1631: int argc;
1632: char **argv;
1633: {
1634: char pat[80];
1635: struct re_pattern_buffer buf;
1636: int i;
1637: char c;
1638: char fastmap[(1 << BYTEWIDTH)];
1639:
1640: /* Allow a command argument to specify the style of syntax. */
1641: if (argc > 1)
1642: obscure_syntax = atoi (argv[1]);
1643:
1644: buf.allocated = 40;
1645: buf.buffer = (char *) malloc (buf.allocated);
1646: buf.fastmap = fastmap;
1647: buf.translate = upcase;
1648:
1649: while (1)
1650: {
1651: gets (pat);
1652:
1653: if (*pat)
1654: {
1655: re_compile_pattern (pat, strlen(pat), &buf);
1656:
1657: for (i = 0; i < buf.used; i++)
1658: printchar (buf.buffer[i]);
1659:
1660: putchar ('\n');
1661:
1662: printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
1663:
1664: re_compile_fastmap (&buf);
1665: printf ("Allowed by fastmap: ");
1666: for (i = 0; i < (1 << BYTEWIDTH); i++)
1667: if (fastmap[i]) printchar (i);
1668: putchar ('\n');
1669: }
1670:
1671: gets (pat); /* Now read the string to match against */
1672:
1673: i = re_match (&buf, pat, strlen (pat), 0, 0);
1674: printf ("Match value %d.\n", i);
1675: }
1676: }
1677:
1678: #ifdef NOTDEF
1679: print_buf (bufp)
1680: struct re_pattern_buffer *bufp;
1681: {
1682: int i;
1683:
1684: printf ("buf is :\n----------------\n");
1685: for (i = 0; i < bufp->used; i++)
1686: printchar (bufp->buffer[i]);
1687:
1688: printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
1689:
1690: printf ("Allowed by fastmap: ");
1691: for (i = 0; i < (1 << BYTEWIDTH); i++)
1692: if (bufp->fastmap[i])
1693: printchar (i);
1694: printf ("\nAllowed by translate: ");
1695: if (bufp->translate)
1696: for (i = 0; i < (1 << BYTEWIDTH); i++)
1697: if (bufp->translate[i])
1698: printchar (i);
1699: printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
1700: printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
1701: }
1702: #endif
1703:
1704: printchar (c)
1705: char c;
1706: {
1707: if (c < 041 || c >= 0177)
1708: {
1709: putchar ('\\');
1710: putchar (((c >> 6) & 3) + '0');
1711: putchar (((c >> 3) & 7) + '0');
1712: putchar ((c & 7) + '0');
1713: }
1714: else
1715: putchar (c);
1716: }
1717:
1718: error (string)
1719: char *string;
1720: {
1721: puts (string);
1722: exit (1);
1723: }
1724:
1725: #endif test
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.