|
|
1.1 root 1: /* Extended regular expression matching and search.
2: Copyright (C) 1985 Richard M. Stallman
3:
4: This program is distributed in the hope that it will be useful,
5: but without any warranty. No author or distributor
6: accepts responsibility to anyone for the consequences of using it
7: or for whether it serves any particular purpose or works at all,
8: unless he says so in writing.
9:
10: Permission is granted to anyone to distribute verbatim copies
11: of this program's source code as received, in any medium, provided that
12: the copyright notice, the nonwarraty notice above
13: and this permission notice are preserved,
14: and that the distributor grants the recipient all rights
15: for further redistribution as permitted by this notice,
16: and informs him of these rights.
17:
18: Permission is granted to distribute modified versions of this
19: program's source code, or of portions of it, under the above
20: conditions, plus the conditions that all changed files carry
21: prominent notices stating who last changed them and that the
22: derived material, including anything packaged together with it and
23: conceptually functioning as a modification of it rather than an
24: application of it, is in its entirety subject to a permission
25: notice identical to this one.
26:
27: Permission is granted to distribute this program (verbatim or
28: as modified) in compiled or executable form, provided verbatim
29: redistribution is permitted as stated above for source code, and
30: A. it is accompanied by the corresponding machine-readable
31: source code, under the above conditions, or
32: B. it is accompanied by a written offer, with no time limit,
33: to distribute the corresponding machine-readable source code,
34: under the above conditions, to any one, in return for reimbursement
35: of the cost of distribution. Verbatim redistribution of the
36: written offer must be permitted. Or,
37: C. it is distributed by someone who received only the
38: compiled or executable form, and is accompanied by a copy of the
39: written offer of source code which he received along with it.
40:
41: Permission is granted to distribute this program (verbatim or as modified)
42: in executable form as part of a larger system provided that the source
43: code for this program, including any modifications used,
44: is also distributed or offered as stated in the preceding paragraph.
45:
46: In other words, you are welcome to use, share and improve this program.
47: You are forbidden to forbid anyone else to use, share and improve
48: what you give them. Help stamp out software-hoarding! */
49:
50:
51: /* To test, compile with -Dtest.
52: This Dtestable feature turns this into a self-contained program
53: which reads a pattern, describes how it compiles,
54: then reads a string and searches for it. */
55:
56:
57: #ifdef emacs
58:
59: /* The `emacs' switch turns on certain special matching commands
60: that make sense only in emacs. */
61:
62: #include "config.h"
63: #include "lisp.h"
64: #include "buffer.h"
65: #include "syntax.h"
66:
67: #else /* not emacs */
68:
69: /*
70: * Define the syntax stuff, so we can do the \<...\> things.
71: */
72: #define Sword 1
73:
74: #define SYNTAX(c) syntax_table[c]
75:
76: static char syntax_table[256];
77:
78: #endif /* not emacs */
79:
80: #include "regex.h"
81:
82: /* Number of failure points to allocate space for initially,
83: when matching. If this number is exceeded, more space is allocated,
84: so it is not a hard limit. */
85:
86: #ifndef NFAILURES
87: #define NFAILURES 80
88: #endif NFAILURES
89:
90: /* width of a byte in bits */
91:
92: #define BYTEWIDTH 8
93:
94: /* These are the command codes that appear in compiled regular expressions, one per byte.
95: Some command codes are followed by argument bytes.
96: A command code can specify any interpretation whatever for its arguments.
97: Zero-bytes may appear in the compiled regular expression. */
98:
99: enum regexpcode
100: {
101: unused,
102: exactn, /* followed by one byte giving n, and then by n literal bytes */
103: begline, /* fails unless at beginning of line */
104: endline, /* fails unless at end of line */
105: jump, /* followed by two bytes giving relative address to jump to */
106: on_failure_jump, /* followed by two bytes giving relative address of place
107: to resume at in case of failure. */
108: finalize_jump, /* Throw away latest failure point and then jump to address. */
109: maybe_finalize_jump, /* Like jump but finalize if safe to do so.
110: This is used to jump back to the beginning
111: of a repeat. If the command that follows
112: this jump is clearly incompatible with the
113: one at the beginning of the repeat, such that
114: we can be sure that there is no use backtracking
115: out of repetitions already completed,
116: then we finalize. */
117: dummy_failure_jump, /* jump, and push a dummy failure point.
118: This failure point will be thrown away
119: if an attempt is made to use it for a failure.
120: A + construct makes this before the first repeat. */
121: anychar, /* matches any one character */
122: charset, /* matches any one char belonging to specified set.
123: First following byte is # bitmap bytes.
124: Then come bytes for a bit-map saying which chars are in.
125: Bits in each byte are ordered low-bit-first.
126: A character is in the set if its bit is 1.
127: A character too large to have a bit in the map
128: is automatically not in the set */
129: charset_not, /* similar but match any character that is NOT one of those specified */
130: start_memory, /* starts remembering the text that is matched
131: and stores it in a memory register.
132: followed by one byte containing the register number.
133: Register numbers must be in the range 0 through NREGS. */
134: stop_memory, /* stops remembering the text that is matched
135: and stores it in a memory register.
136: followed by one byte containing the register number.
137: Register numbers must be in the range 0 through NREGS. */
138: duplicate, /* match a duplicate of something remembered.
139: Followed by one byte containing the index of the memory register. */
140: before_dot, /* Succeeds if before dot */
141: at_dot, /* Succeeds if at dot */
142: after_dot, /* Succeeds if after dot */
143: begbuf, /* Succeeds if at beginning of buffer */
144: endbuf, /* Succeeds if at end of buffer */
145: wordchar, /* Matches any word-constituent character */
146: notwordchar, /* Matches any char that is not a word-constituent */
147: wordbeg, /* Succeeds if at word beginning */
148: wordend, /* Succeeds if at word end */
149: wordbound, /* Succeeds if at a word boundary */
150: notwordbound, /* Succeeds if not at a word boundary */
151: syntaxspec, /* Matches any character whose syntax is specified.
152: followed by a byte which contains a syntax code, Sword or such like */
153: notsyntaxspec /* Matches any character whose syntax differs from the specified. */
154: };
155:
156: #ifndef SIGN_EXTEND_CHAR
157: #define SIGN_EXTEND_CHAR(x) (x)
158: #endif
159:
160: /* compile_pattern takes a regular-expression descriptor string in the user's format
161: and converts it into a buffer full of byte commands for matching.
162:
163: pattern is the address of the pattern string
164: size is the length of it.
165: bufp is a struct re_pattern_buffer * which points to the info
166: on where to store the byte commands.
167: This structure contains a char * which points to the
168: actual space, which should have been obtained with malloc.
169: compile_pattern may use realloc to grow the buffer space.
170:
171: The number of bytes of commands can be found out by looking in
172: the struct re_pattern_buffer that bufp pointed to,
173: after compile_pattern returns.
174: */
175:
176: #define PATPUSH(ch) (*b++ = (char) (ch))
177:
178: #define PATFETCH(c) \
179: {if (p == pend) goto end_of_pattern; \
180: c = * (unsigned char *) p++; \
181: if (translate) c = translate[c]; }
182:
183: #define PATFETCH_RAW(c) \
184: {if (p == pend) goto end_of_pattern; \
185: c = * (unsigned char *) p++; }
186:
187: #define PATUNFETCH p--
188:
189: #define EXTEND_BUFFER \
190: { old_buffer = bufp->buffer; \
191: if (bufp->allocated == (1<<16)) goto too_big; \
192: bufp->allocated *= 2; \
193: if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
194: if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
195: goto memory_exhausted; \
196: c = bufp->buffer - old_buffer; \
197: b += c; \
198: if (fixup_jump) \
199: fixup_jump += c; \
200: if (laststart) \
201: laststart += c; \
202: begalt += c; \
203: if (pending_exact) \
204: pending_exact += c; \
205: }
206:
207: static int store_jump (), insert_jump ();
208:
209: char *
210: re_compile_pattern (pattern, size, bufp)
211: char *pattern;
212: int size;
213: struct re_pattern_buffer *bufp;
214: {
215: register char *b = bufp->buffer;
216: register char *p = pattern;
217: char *pend = pattern + size;
218: register unsigned c, c1;
219: char *p1;
220: unsigned char *translate = (unsigned char *) bufp->translate;
221:
222: /* Temporary used when buffer is made bigger. */
223:
224: char *old_buffer;
225:
226: /* address of the count-byte of the most recently inserted "exactn" command.
227: This makes it possible to tell whether a new exact-match character
228: can be added to that command or requires a new "exactn" command. */
229:
230: char *pending_exact = 0;
231:
232: /* address of the place where a forward-jump should go
233: to the end of the containing expression.
234: Each alternative of an "or", except the last, ends with a forward-jump
235: of this sort. */
236:
237: char *fixup_jump = 0;
238:
239: /* address of start of the most recently finished expression.
240: This tells postfix * where to find the start of its operand. */
241:
242: char *laststart = 0;
243:
244: /* In processing a repeat, 1 means zero matches is allowed */
245:
246: char zero_times_ok;
247:
248: /* In processing a repeat, 1 means many matches is allowed */
249:
250: char many_times_ok;
251:
252: /* address of beginning of regexp, or inside of last \( */
253:
254: char *begalt = b;
255:
256: /* Stack of information saved by \( and restored by \).
257: Four stack elements are pushed by each \(:
258: First, the value of b.
259: Second, the value of fixup_jump.
260: Third, the value of regnum.
261: Fourth, the value of begalt. */
262:
263: int stackb[40];
264: int *stackp = stackb;
265: int *stacke = stackb + 40;
266: int *stackt;
267:
268: /* Counts \('s as they are encountered. Remembered for the matching \),
269: where it becomes the "register number" to put in the stop_memory command */
270:
271: int regnum = 1;
272:
273: bufp->fastmap_accurate = 0;
274:
275: #ifndef emacs
276: /*
277: * Initialize the syntax table.
278: */
279: init_syntax_once();
280: #endif emacs
281:
282: while (p != pend)
283: {
284: if (b - bufp->buffer
285: > bufp->allocated - 10)
286: /* Note that EXTEND_BUFFER clobbers c */
287: EXTEND_BUFFER;
288:
289: PATFETCH (c);
290:
291: switch (c)
292: {
293: case '$':
294: /* $ means succeed if at end of line, but only in special contexts.
295: If randonly in the middle of a pattern, it is a normal character. */
296: if (p == pend || (*p == '\\' && (p[1] == ')' || p[1] == '|')))
297: {
298: PATPUSH (endline);
299: break;
300: }
301: goto normal_char;
302:
303: case '^':
304: /* ^ means succeed if at beg of line, but only if no preceding pattern. */
305: if (laststart) goto normal_char;
306: PATPUSH (begline);
307: break;
308:
309: case '*':
310: case '+':
311: case '?':
312: /* If there is no previous pattern, char not special. */
313: if (!laststart)
314: goto normal_char;
315: /* If there is a sequence of repetition chars,
316: collapse it down to equivalent to just one. */
317: zero_times_ok = 0;
318: many_times_ok = 0;
319: while (1)
320: {
321: zero_times_ok |= c != '+';
322: many_times_ok |= c != '?';
323: if (p == pend)
324: break;
325: PATFETCH (c);
326: if (!(c == '*' || c == '+' || c == '?'))
327: {
328: PATUNFETCH;
329: break;
330: }
331: }
332:
333: /* Now we know whether 0 matches is allowed,
334: and whether 2 or more matches is allowed. */
335: if (many_times_ok)
336: {
337: /* If more than one repetition is allowed,
338: put in a backward jump at the end. */
339: store_jump (b, maybe_finalize_jump, laststart - 3);
340: b += 3;
341: }
342: insert_jump (on_failure_jump, laststart, b + 3, b);
343: pending_exact = 0;
344: b += 3;
345: if (!zero_times_ok)
346: {
347: /* At least one repetition required: insert before the loop
348: a skip over the initial on-failure-jump instruction */
349: insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
350: b += 3;
351: }
352: break;
353:
354: case '.':
355: laststart = b;
356: PATPUSH (anychar);
357: break;
358:
359: case '[':
360: if (b - bufp->buffer
361: > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
362: /* Note that EXTEND_BUFFER clobbers c */
363: EXTEND_BUFFER;
364:
365: laststart = b;
366: if (*p == '^')
367: PATPUSH (charset_not), p++;
368: else
369: PATPUSH (charset);
370: p1 = p;
371:
372: PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
373: /* Clear the whole map */
374: bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
375: /* Read in characters and ranges, setting map bits */
376: while (1)
377: {
378: PATFETCH (c);
379: if (c == ']' && p != p1 + 1) break;
380: if (*p == '-')
381: {
382: PATFETCH (c1);
383: PATFETCH (c1);
384: while (c <= c1)
385: b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
386: }
387: else
388: {
389: b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
390: }
391: }
392: /* Discard any bitmap bytes that are all 0 at the end of the map.
393: Decrement the map-length byte too. */
394: while (b[-1] > 0 && b[b[-1] - 1] == 0)
395: b[-1]--;
396: b += b[-1];
397: break;
398:
399: case '\\':
400: if (p == pend) goto invalid_pattern;
401: PATFETCH_RAW (c);
402: switch (c)
403: {
404: case '(':
405: if (stackp == stacke) goto nesting_too_deep;
406: if (regnum < RE_NREGS)
407: {
408: PATPUSH (start_memory);
409: PATPUSH (regnum);
410: }
411: *stackp++ = b - bufp->buffer;
412: *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
413: *stackp++ = regnum++;
414: *stackp++ = begalt - bufp->buffer;
415: fixup_jump = 0;
416: laststart = 0;
417: begalt = b;
418: break;
419:
420: case ')':
421: if (stackp == stackb) goto unmatched_close;
422: begalt = *--stackp + bufp->buffer;
423: if (fixup_jump)
424: store_jump (fixup_jump, jump, b);
425: if (stackp[-1] < RE_NREGS)
426: {
427: PATPUSH (stop_memory);
428: PATPUSH (stackp[-1]);
429: }
430: stackp -= 2;
431: fixup_jump = 0;
432: if (*stackp)
433: fixup_jump = *stackp + bufp->buffer - 1;
434: laststart = *--stackp + bufp->buffer;
435: break;
436:
437: case '|':
438: insert_jump (on_failure_jump, begalt, b + 6, b);
439: pending_exact = 0;
440: b += 3;
441: if (fixup_jump)
442: store_jump (fixup_jump, jump, b);
443: fixup_jump = b;
444: b += 3;
445: laststart = 0;
446: begalt = b;
447: break;
448:
449: #ifdef emacs
450: case '=':
451: PATPUSH (at_dot);
452: break;
453:
454: case 's':
455: laststart = b;
456: PATPUSH (syntaxspec);
457: PATFETCH (c);
458: PATPUSH (syntax_spec_code[c]);
459: break;
460:
461: case 'S':
462: laststart = b;
463: PATPUSH (notsyntaxspec);
464: PATFETCH (c);
465: PATPUSH (syntax_spec_code[c]);
466: break;
467: #endif emacs
468:
469: case 'w':
470: laststart = b;
471: PATPUSH (wordchar);
472: break;
473:
474: case 'W':
475: laststart = b;
476: PATPUSH (notwordchar);
477: break;
478:
479: case '<':
480: PATPUSH (wordbeg);
481: break;
482:
483: case '>':
484: PATPUSH (wordend);
485: break;
486:
487: case 'b':
488: PATPUSH (wordbound);
489: break;
490:
491: case 'B':
492: PATPUSH (notwordbound);
493: break;
494:
495: case '`':
496: PATPUSH (begbuf);
497: break;
498:
499: case '\'':
500: PATPUSH (endbuf);
501: break;
502:
503: case '1':
504: case '2':
505: case '3':
506: case '4':
507: case '5':
508: case '6':
509: case '7':
510: case '8':
511: case '9':
512: c1 = c - '0';
513: if (c1 >= regnum)
514: goto normal_char;
515: for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
516: if (*stackt == c1)
517: goto normal_char;
518: laststart = b;
519: PATPUSH (duplicate);
520: PATPUSH (c1);
521: break;
522: default:
523: goto normal_char;
524: }
525: break;
526:
527: default:
528: normal_char:
529: if (!pending_exact || pending_exact + *pending_exact + 1 != b
530: || *pending_exact == 0177 || *p == '*' || *p == '^'
531: || *p == '+' || *p == '?')
532: {
533: laststart = b;
534: PATPUSH (exactn);
535: pending_exact = b;
536: PATPUSH (0);
537: }
538: PATPUSH (c);
539: (*pending_exact)++;
540: }
541: }
542:
543: if (fixup_jump)
544: store_jump (fixup_jump, jump, b);
545:
546: if (stackp != stackb) goto unmatched_open;
547:
548: bufp->used = b - bufp->buffer;
549: return 0;
550:
551: invalid_pattern:
552: return "Invalid regular expression";
553:
554: unmatched_open:
555: return "Unmatched \\(";
556:
557: unmatched_close:
558: return "Unmatched \\)";
559:
560: end_of_pattern:
561: return "Premature end of regular expression";
562:
563: nesting_too_deep:
564: return "Nesting too deep";
565:
566: too_big:
567: return "Regular expression too big";
568:
569: memory_exhausted:
570: return "Memory exhausted";
571: }
572:
573: #ifndef emacs
574: init_syntax_once ()
575: {
576: register int c;
577: static int done = 0;
578:
579: if (done)
580: return;
581:
582: bzero (syntax_table, sizeof syntax_table);
583:
584: for (c = 'a'; c <= 'z'; c++)
585: syntax_table[c] = Sword;
586:
587: for (c = 'A'; c <= 'Z'; c++)
588: syntax_table[c] = Sword;
589:
590: for (c = '0'; c <= '9'; c++)
591: syntax_table[c] = Sword;
592:
593: done = 1;
594: }
595: #endif not emacs
596:
597: /* Store where `from' points a jump operation to jump to where `to' points.
598: `opcode' is the opcode to store. */
599:
600: static int
601: store_jump (from, opcode, to)
602: char *from, *to;
603: char opcode;
604: {
605: from[0] = opcode;
606: from[1] = (to - (from + 3)) & 0377;
607: from[2] = (to - (from + 3)) >> 8;
608: }
609:
610: /* Open up space at char FROM, and insert there a jump to TO.
611: CURRENT_END gives te end of the storage no in use,
612: so we know how much data to copy up.
613: OP is the opcode of the jump to insert.
614:
615: If you call this function, you must zero out pending_exact. */
616:
617: static int
618: insert_jump (op, from, to, current_end)
619: char op;
620: char *from, *to, *current_end;
621: {
622: register char *pto = current_end + 3;
623: register char *pfrom = current_end;
624: while (pfrom != from)
625: *--pto = *--pfrom;
626: store_jump (from, op, to);
627: }
628:
629: /* Given a pattern, compute a fastmap from it.
630: The fastmap records which of the (1 << BYTEWIDTH) possible characters
631: can start a string that matches the pattern.
632: This fastmap is used by re_search to skip quickly over totally implausible text.
633:
634: The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
635: as bufp->fastmap.
636: The other components of bufp describe the pattern to be used. */
637:
638: re_compile_fastmap (bufp)
639: struct re_pattern_buffer *bufp;
640: {
641: char *pattern = bufp->buffer;
642: int size = bufp->used;
643: register char *fastmap = bufp->fastmap;
644: register char *p = pattern;
645: register char *pend = pattern + size;
646: register int j, k;
647: unsigned char *translate = (unsigned char *) bufp->translate;
648:
649: char *stackb[NFAILURES];
650: char **stackp = stackb;
651:
652: bzero (fastmap, (1 << BYTEWIDTH));
653: bufp->fastmap_accurate = 1;
654: bufp->can_be_null = 0;
655:
656: while (p)
657: {
658: if (p == pend)
659: {
660: bufp->can_be_null = 1;
661: break;
662: }
663: #ifdef SWITCH_ENUM_BUG
664: switch ((int) ((enum regexpcode) *p++))
665: #else
666: switch ((enum regexpcode) *p++)
667: #endif
668: {
669: case exactn:
670: if (translate)
671: fastmap[translate[p[1]]] = 1;
672: else
673: fastmap[p[1]] = 1;
674: break;
675:
676: case begline:
677: case before_dot:
678: case at_dot:
679: case after_dot:
680: case begbuf:
681: case endbuf:
682: case wordbound:
683: case notwordbound:
684: case wordbeg:
685: case wordend:
686: continue;
687:
688: case endline:
689: if (translate)
690: fastmap[translate['\n']] = 1;
691: else
692: fastmap['\n'] = 1;
693: bufp->can_be_null = 1;
694: break;
695:
696: case finalize_jump:
697: case maybe_finalize_jump:
698: case jump:
699: case dummy_failure_jump:
700: bufp->can_be_null = 1;
701: j = *p++ & 0377;
702: j += SIGN_EXTEND_CHAR (*p++) << 8;
703: p += j;
704: if (j > 0)
705: continue;
706: /* Jump backward reached implies we just went through
707: the body of a loop and matched nothing.
708: Opcode jumped to should be an on_failure_jump.
709: Just treat it like an ordinary jump.
710: For a * loop, it has pushed its failure point already;
711: if so, discard that as redundant. */
712: if ((enum regexpcode) *p != on_failure_jump)
713: continue;
714: p++;
715: j = *p++ & 0377;
716: j += SIGN_EXTEND_CHAR (*p++) << 8;
717: p += j;
718: if (stackp != stackb && *stackp == p)
719: stackp--;
720: continue;
721:
722: case on_failure_jump:
723: j = *p++ & 0377;
724: j += SIGN_EXTEND_CHAR (*p++) << 8;
725: *++stackp = p + j;
726: continue;
727:
728: case start_memory:
729: case stop_memory:
730: p++;
731: continue;
732:
733: case duplicate:
734: bufp->can_be_null = 1;
735: case anychar:
736: for (j = 0; j < (1 << BYTEWIDTH); j++)
737: fastmap[j] = 1;
738: return;
739:
740: case wordchar:
741: for (j = 0; j < (1 << BYTEWIDTH); j++)
742: if (SYNTAX (j) == Sword)
743: fastmap[j] = 1;
744: break;
745:
746: case notwordchar:
747: for (j = 0; j < (1 << BYTEWIDTH); j++)
748: if (SYNTAX (j) != Sword)
749: fastmap[j] = 1;
750: break;
751:
752: #ifdef emacs
753: case syntaxspec:
754: k = *p++;
755: for (j = 0; j < (1 << BYTEWIDTH); j++)
756: if (SYNTAX (j) == (enum syntaxcode) k)
757: fastmap[j] = 1;
758: break;
759:
760: case notsyntaxspec:
761: for (j = 0; j < (1 << BYTEWIDTH); j++)
762: if (SYNTAX (j) != (enum syntaxcode) k)
763: fastmap[j] = 1;
764: break;
765: #endif emacs
766:
767: case charset:
768: for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
769: if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
770: {
771: if (translate)
772: fastmap[translate[j]] = 1;
773: else
774: fastmap[j] = 1;
775: }
776: break;
777:
778: case charset_not:
779: /* Chars beyond end of map must be allowed */
780: for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
781: if (translate)
782: fastmap[translate[j]] = 1;
783: else
784: fastmap[j] = 1;
785:
786: for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
787: if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
788: {
789: if (translate)
790: fastmap[translate[j]] = 1;
791: else
792: fastmap[j] = 1;
793: }
794: break;
795: }
796:
797: /* Get here means we have successfully found the possible starting characters
798: of one path of the pattern. We need not follow this path any farther.
799: Instead, look at the next alternative remembered in the stack. */
800: if (stackp != stackb)
801: p = *stackp--;
802: else
803: break;
804: }
805: }
806:
807: /* Like re_search_2, below, but only one string is specified. */
808:
809: re_search (pbufp, string, size, startpos, range, regs)
810: struct re_pattern_buffer *pbufp;
811: char *string;
812: int size, startpos, range;
813: struct re_registers *regs;
814: {
815: return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
816: }
817:
818: /* Like re_match_2 but tries first a match starting at index `startpos',
819: then at startpos + 1, and so on.
820: `range' is the number of places to try before giving up.
821: If `range' is negative, the starting positions tried are
822: startpos, startpos - 1, etc.
823: It is up to the caller to make sure that range is not so large
824: as to take the starting position outside of the input strings.
825:
826: The value returned is the position at which the match was found,
827: or -1 if no match was found. */
828:
829: int
830: re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
831: struct re_pattern_buffer *pbufp;
832: char *string1, *string2;
833: int size1, size2;
834: int startpos;
835: register int range;
836: struct re_registers *regs;
837: int mstop;
838: {
839: register char *fastmap = pbufp->fastmap;
840: register char *translate = pbufp->translate;
841: int total = size1 + size2;
842:
843: /* Update the fastmap now if not correct already */
844: if (fastmap && !pbufp->fastmap_accurate)
845: re_compile_fastmap (pbufp);
846:
847: while (1)
848: {
849: /* If a fastmap is supplied, skip quickly over characters
850: that cannot possibly be the start of a match.
851: Note, however, that if the pattern can possibly match
852: the null string, we must test it at each starting point
853: so that we take the first null string we get. */
854:
855: if (fastmap && startpos < total && !pbufp->can_be_null)
856: {
857: if (range > 0)
858: {
859: register int lim = 0;
860: register char *p;
861: int irange = range;
862: if (startpos < size1 && startpos + range >= size1)
863: lim = range - (size1 - startpos);
864:
865: p = &(startpos >= size1 ? string2 - size1 : string1)[startpos];
866:
867: if (translate)
868: {
869: while (range > lim && !fastmap[translate[*p++]])
870: range--;
871: }
872: else
873: {
874: while (range > lim && !fastmap[*p++])
875: range--;
876: }
877: startpos += irange - range;
878: }
879: else
880: {
881: register char c;
882: if (startpos >= size1) c = string2[startpos - size1];
883: else c = string1[startpos];
884: if (translate ? !fastmap[translate[c]] : !fastmap[c])
885: goto advance;
886: }
887: }
888:
889: if (range >= 0 && startpos == total
890: && fastmap && !pbufp->can_be_null)
891: return -1;
892:
893: if (0 <= re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop))
894: return startpos;
895:
896: advance:
897: if (!range) break;
898: if (range > 0) range--, startpos++; else range++, startpos--;
899: }
900: return -1;
901: }
902:
903: #ifndef emacs /* emacs never uses this */
904: re_match (pbufp, string, size, pos, regs)
905: struct re_pattern_buffer *pbufp;
906: char *string;
907: int size, pos;
908: struct re_registers *regs;
909: {
910: return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
911: }
912: #endif /* emacs */
913:
914: /* Match the pattern described by `pbufp'
915: against data which is the virtual concatenation of `string1' and `string2'.
916: `size1' and `size2' are the sizes of the two data strings.
917: Start the match at position `pos'.
918: Do not consider matching past the position `mstop'.
919:
920: If pbufp->fastmap is nonzero, then it had better be up to date.
921:
922: The reason that the data to match is specified as two components
923: which are to be regarded as concatenated
924: is so that this function can be used directly on the contents of an Emacs buffer.
925:
926: -1 is returned if there is no match. Otherwise the value is the length
927: of the substring which was matched.
928: */
929:
930: int
931: re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
932: struct re_pattern_buffer *pbufp;
933: char *string1, *string2;
934: int size1, size2;
935: int pos;
936: struct re_registers *regs;
937: int mstop;
938: {
939: register char *p = pbufp->buffer;
940: register char *pend = p + pbufp->used;
941: /* End of first string */
942: char *end1;
943: /* End of second string */
944: char *end2;
945: /* Pointer just past last char to consider matching */
946: char *end_match_1, *end_match_2;
947: register char *d, *dend;
948: register int mcnt;
949: char *translate = pbufp->translate;
950:
951: /* Failure point stack. Each place that can handle a failure further down the line
952: pushes a failure point on this stack. It consists of two char *'s.
953: The first one pushed is where to resume scanning the pattern;
954: the second pushed is where to resume scanning the strings.
955: If the latter is zero, the failure point is a "dummy".
956: If a failure happens and the innermost failure point is dormant,
957: it discards that failure point and tries the next one. */
958:
959: char **stackb = (char **) alloca (2 * NFAILURES * sizeof (char *));
960: char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
961:
962: /* Information on the "contents" of registers.
963: These are pointers into the input strings; they record
964: just what was matched (on this attempt) by some part of the pattern.
965: The start_memory command stores the start of a register's contents
966: and the stop_memory command stores the end.
967:
968: At that point, regstart[regnum] points to the first character in the register,
969: regend[regnum] points to the first character beyond the end of the register,
970: and regstart_segend[regnum] is either the same as regend[regnum]
971: or else points to the end of the input string into which regstart[regnum] points.
972: The latter case happens when regstart[regnum] is in string1 and
973: regend[regnum] is in string2. */
974:
975: char *regstart[RE_NREGS];
976: char *regstart_segend[RE_NREGS];
977: char *regend[RE_NREGS];
978:
979: /* Set up pointers to ends of strings.
980: Don't allow the second string to be empty unless both are empty. */
981: if (!size2)
982: {
983: string2 = string1;
984: size2 = size1;
985: string1 = 0;
986: size1 = 0;
987: }
988: end1 = string1 + size1;
989: end2 = string2 + size2;
990:
991: /* Compute where to stop matching, within the two strings */
992: if (mstop <= size1)
993: {
994: end_match_1 = string1 + mstop;
995: end_match_2 = string2;
996: }
997: else
998: {
999: end_match_1 = end1;
1000: end_match_2 = string2 + mstop - size1;
1001: }
1002:
1003: /* Initialize \( and \) text positions to -1
1004: to mark ones that no \( or \) has been seen for. */
1005:
1006: for (mcnt = 0; mcnt < sizeof (regstart) / sizeof (*regstart); mcnt++)
1007: regstart[mcnt] = (char *) -1;
1008:
1009: /* `p' scans through the pattern as `d' scans through the data.
1010: `dend' is the end of the input string that `d' points within.
1011: `d' is advanced into the following input string whenever necessary,
1012: but this happens before fetching;
1013: therefore, at the beginning of the loop,
1014: `d' can be pointing at the end of a string,
1015: but it cannot equal string2. */
1016:
1017: if (pos <= size1)
1018: d = string1 + pos, dend = end_match_1;
1019: else
1020: d = string2 + pos - size1, dend = end_match_2;
1021:
1022: /* Write PREFETCH; just before fetching a character with *d. */
1023: #define PREFETCH \
1024: while (d == dend) \
1025: { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \
1026: d = string2; /* end of string1 => advance to string2. */ \
1027: dend = end_match_2; }
1028:
1029: /* This loop loops over pattern commands.
1030: It exits by returning from the function if match is complete,
1031: or it drops through if match fails at this starting point in the input data. */
1032:
1033: while (1)
1034: {
1035: if (p == pend)
1036: /* End of pattern means we have succeeded! */
1037: {
1038: /* If caller wants register contents data back, convert it to indices */
1039: if (regs)
1040: {
1041: bzero (regs, sizeof (*regs));
1042:
1043: regend[0] = d;
1044: regstart[0] = string1;
1045: for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
1046: {
1047: if (mcnt && regstart[mcnt] == (char *) -1) continue;
1048: if (regstart[mcnt] - string1 < 0 || regstart[mcnt] - string1 > size1)
1049: regs->start[mcnt] = regstart[mcnt] - string2 + size1;
1050: else
1051: regs->start[mcnt] = regstart[mcnt] - string1;
1052: if (regend[mcnt] - string1 < 0 || regend[mcnt] - string1 > size1)
1053: regs->end[mcnt] = regend[mcnt] - string2 + size1;
1054: else
1055: regs->end[mcnt] = regend[mcnt] - string1;
1056: }
1057: regs->start[0] = pos;
1058: }
1059: if (d - string1 >= 0 && d - string1 <= size1)
1060: return d - string1 - pos;
1061: else
1062: return d - string2 + size1 - pos;
1063: }
1064:
1065: /* Otherwise match next pattern command */
1066: #ifdef SWITCH_ENUM_BUG
1067: switch ((int) ((enum regexpcode) *p++))
1068: #else
1069: switch ((enum regexpcode) *p++)
1070: #endif
1071: {
1072:
1073: /* \( is represented by a start_memory, \) by a stop_memory.
1074: Both of those commands contain a "register number" argument.
1075: The text matched within the \( and \) is recorded under that number.
1076: Then, \<digit> turns into a `duplicate' command which
1077: is followed by the numeric value of <digit> as the register number. */
1078:
1079: case start_memory:
1080: regstart[*p] = d;
1081: regstart_segend[*p++] = dend;
1082: break;
1083:
1084: case stop_memory:
1085: regend[*p] = d;
1086: if (regstart_segend[*p] == dend)
1087: regstart_segend[*p] = d;
1088: p++;
1089: break;
1090:
1091: case duplicate:
1092: {
1093: int regno = *p++; /* Get which register to match against */
1094: register char *d2, *dend2;
1095:
1096: d2 = regstart[regno];
1097: dend2 = regstart_segend[regno];
1098: while (1)
1099: {
1100: /* Advance to next segment in register contents, if necessary */
1101: while (d2 == dend2)
1102: {
1103: if (dend2 == end_match_2) break;
1104: if (dend2 == regend[regno]) break;
1105: d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
1106: }
1107: /* At end of register contents => success */
1108: if (d2 == dend2) break;
1109:
1110: /* Advance to next segment in data being matched, if necessary */
1111: PREFETCH;
1112:
1113: /* mcnt gets # consecutive chars to compare */
1114: mcnt = dend - d;
1115: if (mcnt > dend2 - d2)
1116: mcnt = dend2 - d2;
1117: /* Compare that many; failure if mismatch, else skip them. */
1118: if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
1119: goto fail;
1120: d += mcnt, d2 += mcnt;
1121: }
1122: }
1123: break;
1124:
1125: case anychar:
1126: /* fetch a data character */
1127: PREFETCH;
1128: /* Match anything but a newline. */
1129: if ((translate ? translate[*d++] : *d++) == '\n')
1130: goto fail;
1131: break;
1132:
1133: case charset:
1134: case charset_not:
1135: {
1136: /* Nonzero for charset_not */
1137: int not = 0;
1138: register int c;
1139: if (*(p - 1) == (char) charset_not)
1140: not = 1;
1141:
1142: /* fetch a data character */
1143: PREFETCH;
1144:
1145: if (translate)
1146: c = translate [*(unsigned char *)d];
1147: else
1148: c = *(unsigned char *)d;
1149:
1150: if (c < *p * BYTEWIDTH
1151: && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1152: not = !not;
1153:
1154: p += 1 + *p;
1155:
1156: if (!not) goto fail;
1157: d++;
1158: break;
1159: }
1160:
1161: case begline:
1162: if (d == string1 || d[-1] == '\n')
1163: break;
1164: goto fail;
1165:
1166: case endline:
1167: if (d == end2
1168: || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
1169: break;
1170: goto fail;
1171:
1172: /* "or" constructs ("|") are handled by starting each alternative
1173: with an on_failure_jump that points to the start of the next alternative.
1174: Each alternative except the last ends with a jump to the joining point.
1175: (Actually, each jump except for the last one really jumps
1176: to the following jump, because tensioning the jumps is a hassle.) */
1177:
1178: /* The start of a stupid repeat has an on_failure_jump that points
1179: past the end of the repeat text.
1180: This makes a failure point so that, on failure to match a repetition,
1181: matching restarts past as many repetitions have been found
1182: with no way to fail and look for another one. */
1183:
1184: /* A smart repeat is similar but loops back to the on_failure_jump
1185: so that each repetition makes another failure point. */
1186:
1187: case on_failure_jump:
1188: if (stackp == stacke)
1189: {
1190: char **stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *));
1191: bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
1192: stackp += stackx - stackb;
1193: stacke = stackx + 2 * (stacke - stackb);
1194: stackb = stackx;
1195: }
1196: mcnt = *p++ & 0377;
1197: mcnt += SIGN_EXTEND_CHAR (*p++) << 8;
1198: *stackp++ = mcnt + p;
1199: *stackp++ = d;
1200: break;
1201:
1202: /* The end of a smart repeat has an maybe_finalize_jump back.
1203: Change it either to a finalize_jump or an ordinary jump. */
1204:
1205: case maybe_finalize_jump:
1206: mcnt = *p++ & 0377;
1207: mcnt += SIGN_EXTEND_CHAR (*p++) << 8;
1208: /* Compare what follows with the begining of the repeat.
1209: If we can establish that there is nothing that they would
1210: both match, we can change to finalize_jump */
1211: if (p == pend)
1212: p[-3] = (char) finalize_jump;
1213: else if (*p == (char) exactn || *p == (char) endline)
1214: {
1215: register int c = *p == (char) endline ? '\n' : p[2];
1216: register char *p1 = p + mcnt;
1217: /* p1[0] ... p1[2] are an on_failure_jump.
1218: Examine what follows that */
1219: if (p1[3] == (char) exactn && p1[5] != c)
1220: p[-3] = (char) finalize_jump;
1221: else if (p1[3] == (char) charset || p1[3] == (char) charset_not)
1222: {
1223: int not = p1[3] == (char) charset_not;
1224: if (c < p1[4] * BYTEWIDTH
1225: && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1226: not = !not;
1227: /* not is 1 if c would match */
1228: /* That means it is not safe to finalize */
1229: if (!not)
1230: p[-3] = (char) finalize_jump;
1231: }
1232: }
1233: p -= 2;
1234: if (p[-1] != (char) finalize_jump)
1235: {
1236: p[-1] = (char) jump;
1237: goto nofinalize;
1238: }
1239:
1240: /* The end of a stupid repeat has a finalize-jump
1241: back to the start, where another failure point will be made
1242: which will point after all the repetitions found so far. */
1243:
1244: case finalize_jump:
1245: stackp -= 2;
1246:
1247: case jump:
1248: nofinalize:
1249: mcnt = *p++ & 0377;
1250: mcnt += SIGN_EXTEND_CHAR (*p++) << 8;
1251: p += mcnt;
1252: break;
1253:
1254: case dummy_failure_jump:
1255: if (stackp == stacke)
1256: {
1257: char **stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *));
1258: bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
1259: stackp += stackx - stackb;
1260: stacke = stackx + 2 * (stacke - stackb);
1261: stackb = stackx;
1262: }
1263: *stackp++ = 0;
1264: *stackp++ = 0;
1265: goto nofinalize;
1266:
1267: case wordbound:
1268: if (d == string1 /* Points to first char */
1269: || d == end2 /* Points to end */
1270: || (d == end1 && size2 == 0)) /* Points to end */
1271: break;
1272: if ((SYNTAX (((unsigned char *)d)[-1]) == Sword)
1273: != (SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) == Sword))
1274: break;
1275: goto fail;
1276:
1277: case notwordbound:
1278: if (d == string1 /* Points to first char */
1279: || d == end2 /* Points to end */
1280: || (d == end1 && size2 == 0)) /* Points to end */
1281: goto fail;
1282: if ((SYNTAX (((unsigned char *)d)[-1]) == Sword)
1283: != (SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) == Sword))
1284: goto fail;
1285: break;
1286:
1287: case wordbeg:
1288: if (d == end2 /* Points to end */
1289: || (d == end1 && size2 == 0) /* Points to end */
1290: || SYNTAX (*(unsigned char *) (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
1291: goto fail;
1292: if (d == string1 /* Points to first char */
1293: || SYNTAX (((unsigned char *)d)[-1]) != Sword) /* prev char not letter */
1294: break;
1295: goto fail;
1296:
1297: case wordend:
1298: if (d == string1 /* Points to first char */
1299: || SYNTAX (((unsigned char *)d)[-1]) != Sword) /* prev char not letter */
1300: goto fail;
1301: if (d == end2 /* Points to end */
1302: || (d == end1 && size2 == 0) /* Points to end */
1303: || SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) != Sword) /* Next char not a letter */
1304: break;
1305: goto fail;
1306:
1307: #ifdef emacs
1308: case before_dot:
1309: if (((d - string2 <= (unsigned) size2)
1310: ? d - (char *) bf_p2 : d - (char *) bf_p1)
1311: <= point)
1312: goto fail;
1313: break;
1314:
1315: case at_dot:
1316: if (((d - string2 <= (unsigned) size2)
1317: ? d - (char *) bf_p2 : d - (char *) bf_p1)
1318: == point)
1319: goto fail;
1320: break;
1321:
1322: case after_dot:
1323: if (((d - string2 <= (unsigned) size2)
1324: ? d - (char *) bf_p2 : d - (char *) bf_p1)
1325: >= point)
1326: goto fail;
1327: break;
1328:
1329: case wordchar:
1330: mcnt = (int) Sword;
1331: goto matchsyntax;
1332:
1333: case syntaxspec:
1334: mcnt = *p++;
1335: matchsyntax:
1336: PREFETCH;
1337: if (SYNTAX (*(unsigned char *)d++) != (enum syntaxcode) mcnt) goto fail;
1338: break;
1339:
1340: case notwordchar:
1341: mcnt = (int) Sword;
1342: goto matchnotsyntax;
1343:
1344: case notsyntaxspec:
1345: mcnt = *p++;
1346: matchnotsyntax:
1347: PREFETCH;
1348: if (SYNTAX (*(unsigned char *)d++) == (enum syntaxcode) mcnt) goto fail;
1349: break;
1350: #else
1351: case wordchar:
1352: PREFETCH;
1353: if (SYNTAX (*(unsigned char *)d++) == 0) goto fail;
1354: break;
1355:
1356: case notwordchar:
1357: PREFETCH;
1358: if (SYNTAX (*(unsigned char *)d++) != 0) goto fail;
1359: break;
1360: #endif not emacs
1361:
1362: case begbuf:
1363: if (d == string1) /* Note, d cannot equal string2 */
1364: break; /* unless string1 == string2. */
1365: goto fail;
1366:
1367: case endbuf:
1368: if (d == end2 || (d == end1 && size2 == 0))
1369: break;
1370: goto fail;
1371:
1372: case exactn:
1373: /* Match the next few pattern characters exactly.
1374: mcnt is how many characters to match. */
1375: mcnt = *p++;
1376: if (translate)
1377: {
1378: do
1379: {
1380: PREFETCH;
1381: if (translate[*(unsigned char *)d++] != *p++) goto fail;
1382: }
1383: while (--mcnt);
1384: }
1385: else
1386: {
1387: do
1388: {
1389: PREFETCH;
1390: if (*d++ != *p++) goto fail;
1391: }
1392: while (--mcnt);
1393: }
1394: break;
1395: }
1396: continue; /* Successfully matched one pattern command; keep matching */
1397:
1398: /* Jump here if any matching operation fails. */
1399: fail:
1400: if (stackp != stackb)
1401: /* A restart point is known. Restart there and pop it. */
1402: {
1403: if (!stackp[-2])
1404: { /* If innermost failure point is dormant, flush it and keep looking */
1405: stackp -= 2;
1406: goto fail;
1407: }
1408: d = *--stackp;
1409: p = *--stackp;
1410: if (d >= string1 && d <= end1)
1411: dend = end_match_1;
1412: }
1413: else break; /* Matching at this starting point really fails! */
1414: }
1415: return -1; /* Failure to match */
1416: }
1417:
1418: static int
1419: bcmp_translate (s1, s2, len, translate)
1420: char *s1, *s2;
1421: register int len;
1422: char *translate;
1423: {
1424: register char *p1 = s1, *p2 = s2;
1425: while (len)
1426: {
1427: if (translate [*p1++] != translate [*p2++]) return 1;
1428: len--;
1429: }
1430: return 0;
1431: }
1432:
1433: /* Entry points compatible with bsd4.2 regex library */
1434:
1435: #ifndef emacs
1436:
1437: static struct re_pattern_buffer re_comp_buf;
1438:
1439: char *
1440: re_comp (s)
1441: char *s;
1442: {
1443: if (!s)
1444: {
1445: if (!re_comp_buf.buffer)
1446: return "No previous regular expression";
1447: return 0;
1448: }
1449:
1450: if (!re_comp_buf.buffer)
1451: {
1452: if (!(re_comp_buf.buffer = (char *) malloc (200)))
1453: return "Memory exhausted";
1454: re_comp_buf.allocated = 200;
1455: if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
1456: return "Memory exhausted";
1457: }
1458: return re_compile_pattern (s, strlen (s), &re_comp_buf);
1459: }
1460:
1461: int
1462: re_exec (s)
1463: char *s;
1464: {
1465: int len = strlen (s);
1466: return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
1467: }
1468:
1469: #endif /* emacs */
1470:
1471: #ifdef test
1472:
1473: #include <stdio.h>
1474:
1475: /* Indexed by a character, gives the upper case equivalent of the character */
1476:
1477: static char upcase[0400] =
1478: { 000, 001, 002, 003, 004, 005, 006, 007,
1479: 010, 011, 012, 013, 014, 015, 016, 017,
1480: 020, 021, 022, 023, 024, 025, 026, 027,
1481: 030, 031, 032, 033, 034, 035, 036, 037,
1482: 040, 041, 042, 043, 044, 045, 046, 047,
1483: 050, 051, 052, 053, 054, 055, 056, 057,
1484: 060, 061, 062, 063, 064, 065, 066, 067,
1485: 070, 071, 072, 073, 074, 075, 076, 077,
1486: 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1487: 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1488: 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1489: 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
1490: 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1491: 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1492: 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1493: 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
1494: 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
1495: 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
1496: 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
1497: 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
1498: 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
1499: 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1500: 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
1501: 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
1502: 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
1503: 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
1504: 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
1505: 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1506: 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
1507: 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
1508: 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
1509: 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
1510: };
1511:
1512: main ()
1513: {
1514: char pat[80];
1515: struct re_pattern_buffer buf;
1516: int i;
1517: char c;
1518: char fastmap[(1 << BYTEWIDTH)];
1519:
1520: buf.allocated = 40;
1521: buf.buffer = (char *) malloc (buf.allocated);
1522: buf.fastmap = fastmap;
1523: buf.translate = upcase;
1524:
1525: while (1)
1526: {
1527: gets (pat);
1528:
1529: if (*pat)
1530: {
1531: re_compile_pattern (pat, strlen(pat), &buf);
1532:
1533: for (i = 0; i < buf.used; i++)
1534: printchar (buf.buffer[i]);
1535:
1536: putchar ('\n');
1537:
1538: printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
1539:
1540: re_compile_fastmap (&buf);
1541: printf ("Allowed by fastmap: ");
1542: for (i = 0; i < (1 << BYTEWIDTH); i++)
1543: if (fastmap[i]) printchar (i);
1544: putchar ('\n');
1545: }
1546:
1547: gets (pat); /* Now read the string to match against */
1548:
1549: i = re_match (&buf, pat, strlen (pat), 0, 0);
1550: printf ("Match value %d.\n", i);
1551: }
1552: }
1553:
1554: #ifdef NOTDEF
1555: print_buf (bufp)
1556: struct re_pattern_buffer *bufp;
1557: {
1558: int i;
1559:
1560: printf ("buf is :\n----------------\n");
1561: for (i = 0; i < bufp->used; i++)
1562: printchar (bufp->buffer[i]);
1563:
1564: printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
1565:
1566: printf ("Allowed by fastmap: ");
1567: for (i = 0; i < (1 << BYTEWIDTH); i++)
1568: if (bufp->fastmap[i])
1569: printchar (i);
1570: printf ("\nAllowed by translate: ");
1571: if (bufp->translate)
1572: for (i = 0; i < (1 << BYTEWIDTH); i++)
1573: if (bufp->translate[i])
1574: printchar (i);
1575: printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
1576: printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
1577: }
1578: #endif
1579:
1580: printchar (c)
1581: char c;
1582: {
1583: if (c < 041 || c >= 0177)
1584: {
1585: putchar ('\\');
1586: putchar (((c >> 6) & 3) + '0');
1587: putchar (((c >> 3) & 7) + '0');
1588: putchar ((c & 7) + '0');
1589: }
1590: else
1591: putchar (c);
1592: }
1593:
1594: error (string)
1595: char *string;
1596: {
1597: puts (string);
1598: exit (1);
1599: }
1600:
1601: #endif test
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.