|
|
1.1 root 1: /* Definitions for data structures callers pass the regex library.
2: Copyright (C) 1985 Free Software Foundation, Inc.
3:
4: This program is free software; you can redistribute it and/or modify
5: it under the terms of the GNU General Public License as published by
6: the Free Software Foundation; either version 1, or (at your option)
7: any later version.
8:
9: This program is distributed in the hope that it will be useful,
10: but WITHOUT ANY WARRANTY; without even the implied warranty of
11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12: GNU General Public License for more details.
13:
14: You should have received a copy of the GNU General Public License
15: along with this program; if not, write to the Free Software
16: Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17:
18: In other words, you are welcome to use, share and improve this program.
19: You are forbidden to forbid anyone else to use, share and improve
20: what you give them. Help stamp out software-hoarding! */
21:
22:
23: /* Define number of parens for which we record the beginnings and ends.
24: This affects how much space the `struct re_registers' type takes up. */
25: #ifndef RE_NREGS
26: #define RE_NREGS 10
27: #endif
28:
29: /* These bits are used in the obscure_syntax variable to choose among
30: alternative regexp syntaxes. */
31:
32: /* 1 means plain parentheses serve as grouping, and backslash
33: parentheses are needed for literal searching.
34: 0 means backslash-parentheses are grouping, and plain parentheses
35: are for literal searching. */
36: #define RE_NO_BK_PARENS 1
37:
38: /* 1 means plain | serves as the "or"-operator, and \| is a literal.
39: 0 means \| serves as the "or"-operator, and | is a literal. */
40: #define RE_NO_BK_VBAR 2
41:
42: /* 0 means plain + or ? serves as an operator, and \+, \? are literals.
43: 1 means \+, \? are operators and plain +, ? are literals. */
44: #define RE_BK_PLUS_QM 4
45:
46: /* 1 means | binds tighter than ^ or $.
47: 0 means the contrary. */
48: #define RE_TIGHT_VBAR 8
49:
50: /* 1 means treat \n as an _OR operator
51: 0 means treat it as a normal character */
52: #define RE_NEWLINE_OR 16
53:
54: /* 0 means that a special characters (such as *, ^, and $) always have
55: their special meaning regardless of the surrounding context.
56: 1 means that special characters may act as normal characters in some
57: contexts. Specifically, this applies to:
58: ^ - only special at the beginning, or after ( or |
59: $ - only special at the end, or before ) or |
60: *, +, ? - only special when not after the beginning, (, or | */
61: #define RE_CONTEXT_INDEP_OPS 32
62:
63: /* Now define combinations of bits for the standard possibilities. */
64: #define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS)
65: #define RE_SYNTAX_EGREP (RE_SYNTAX_AWK | RE_NEWLINE_OR)
66: #define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
67: #define RE_SYNTAX_EMACS 0
68:
69: /* This data structure is used to represent a compiled pattern. */
70:
71: struct re_pattern_buffer
72: {
73: char *buffer; /* Space holding the compiled pattern commands. */
74: int allocated; /* Size of space that buffer points to */
75: int used; /* Length of portion of buffer actually occupied */
76: char *fastmap; /* Pointer to fastmap, if any, or zero if none. */
77: /* re_search uses the fastmap, if there is one,
78: to skip quickly over totally implausible characters */
79: char *translate; /* Translate table to apply to all characters before comparing.
80: Or zero for no translation.
81: The translation is applied to a pattern when it is compiled
82: and to data when it is matched. */
83: char fastmap_accurate;
84: /* Set to zero when a new pattern is stored,
85: set to one when the fastmap is updated from it. */
86: char can_be_null; /* Set to one by compiling fastmap
87: if this pattern might match the null string.
88: It does not necessarily match the null string
89: in that case, but if this is zero, it cannot.
90: 2 as value means can match null string
91: but at end of range or before a character
92: listed in the fastmap. */
93: };
94:
95: /* Structure to store "register" contents data in.
96:
97: Pass the address of such a structure as an argument to re_match, etc.,
98: if you want this information back.
99:
100: start[i] and end[i] record the string matched by \( ... \) grouping i,
101: for i from 1 to RE_NREGS - 1.
102: start[0] and end[0] record the entire string matched. */
103:
104: struct re_registers
105: {
106: int start[RE_NREGS];
107: int end[RE_NREGS];
108: };
109:
110: /* These are the command codes that appear in compiled regular expressions, one per byte.
111: Some command codes are followed by argument bytes.
112: A command code can specify any interpretation whatever for its arguments.
113: Zero-bytes may appear in the compiled regular expression. */
114:
115: enum regexpcode
116: {
117: unused,
118: exactn, /* followed by one byte giving n, and then by n literal bytes */
119: begline, /* fails unless at beginning of line */
120: endline, /* fails unless at end of line */
121: jump, /* followed by two bytes giving relative address to jump to */
122: on_failure_jump, /* followed by two bytes giving relative address of place
123: to resume at in case of failure. */
124: finalize_jump, /* Throw away latest failure point and then jump to address. */
125: maybe_finalize_jump, /* Like jump but finalize if safe to do so.
126: This is used to jump back to the beginning
127: of a repeat. If the command that follows
128: this jump is clearly incompatible with the
129: one at the beginning of the repeat, such that
130: we can be sure that there is no use backtracking
131: out of repetitions already completed,
132: then we finalize. */
133: dummy_failure_jump, /* jump, and push a dummy failure point.
134: This failure point will be thrown away
135: if an attempt is made to use it for a failure.
136: A + construct makes this before the first repeat. */
137: anychar, /* matches any one character */
138: charset, /* matches any one char belonging to specified set.
139: First following byte is # bitmap bytes.
140: Then come bytes for a bit-map saying which chars are in.
141: Bits in each byte are ordered low-bit-first.
142: A character is in the set if its bit is 1.
143: A character too large to have a bit in the map
144: is automatically not in the set */
145: charset_not, /* similar but match any character that is NOT one of those specified */
146: start_memory, /* starts remembering the text that is matched
147: and stores it in a memory register.
148: followed by one byte containing the register number.
149: Register numbers must be in the range 0 through NREGS. */
150: stop_memory, /* stops remembering the text that is matched
151: and stores it in a memory register.
152: followed by one byte containing the register number.
153: Register numbers must be in the range 0 through NREGS. */
154: duplicate, /* match a duplicate of something remembered.
155: Followed by one byte containing the index of the memory register. */
156: before_dot, /* Succeeds if before dot */
157: at_dot, /* Succeeds if at dot */
158: after_dot, /* Succeeds if after dot */
159: begbuf, /* Succeeds if at beginning of buffer */
160: endbuf, /* Succeeds if at end of buffer */
161: wordchar, /* Matches any word-constituent character */
162: notwordchar, /* Matches any char that is not a word-constituent */
163: wordbeg, /* Succeeds if at word beginning */
164: wordend, /* Succeeds if at word end */
165: wordbound, /* Succeeds if at a word boundary */
166: notwordbound, /* Succeeds if not at a word boundary */
167: syntaxspec, /* Matches any character whose syntax is specified.
168: followed by a byte which contains a syntax code, Sword or such like */
169: notsyntaxspec /* Matches any character whose syntax differs from the specified. */
170: };
171:
172: extern char *re_compile_pattern ();
173: /* Is this really advertised? */
174: extern void re_compile_fastmap ();
175: extern int re_search (), re_search_2 ();
176: extern int re_match (), re_match_2 ();
177:
178: /* 4.2 bsd compatibility (yuck) */
179: extern char *re_comp ();
180: extern int re_exec ();
181:
182: #ifdef SYNTAX_TABLE
183: extern char *re_syntax_table;
184: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.