|
|
1.1 root 1: /* String search routines for GNU Emacs.
2: Copyright (C) 1985, 1986, 1987 Free Software Foundation, Inc.
3:
4: This file is part of GNU Emacs.
5:
6: GNU Emacs is distributed in the hope that it will be useful,
7: but WITHOUT ANY WARRANTY. No author or distributor
8: accepts responsibility to anyone for the consequences of using it
9: or for whether it serves any particular purpose or works at all,
10: unless he says so in writing. Refer to the GNU Emacs General Public
11: License for full details.
12:
13: Everyone is granted permission to copy, modify and redistribute
14: GNU Emacs, but only under the conditions described in the
15: GNU Emacs General Public License. A copy of this license is
16: supposed to have been given to you along with GNU Emacs so you
17: can know your rights and responsibilities. It should be in a
18: file named COPYING. Among other things, the copyright notice
19: and this notice must be preserved on all copies. */
20:
21:
22: #include "config.h"
23: #include "lisp.h"
24: #include "syntax.h"
25: #include "buffer.h"
26: #include "commands.h"
27: #include "regex.h"
28:
29: #define max(a, b) ((a) > (b) ? (a) : (b))
30: #define min(a, b) ((a) < (b) ? (a) : (b))
31:
32: unsigned char downcase_table[01000] = {0}; /* folds upper to lower case */
33: /* A WHEEL WILL FALL OFF IF, IN A trt, CHARACTER A */
34: /* TRANSLATES INTO CHARACTER B AND CHARACTER B DOES NOT */
35: /* ALSO TRANSLATE INTO CHARACTER B. */
36: /* If that constraint is met, compute_trt_inverse will follow a */
37: /* translation table with its inverse. The inverse of a table */
38: /* follows the table at table[0400]. The form of this is that if */
39: /* table[a]=b then the chain starting at table[0400+b], linked by */
40: /* link(x)=table[0400+x] and ended by b must include a. */
41:
42: /* At present compute_trt_inverse is blinded and the inverse for this */
43: /* particular table is created by a single-purpose loop. */
44: /* compute_trt_inverse has been tested on the following cases: */
45: /* trt[x]=x, trt[x]=(+ 3 (logand x, 0370)), trt[x]='a', and the */
46: /* downcase table. */
47:
48: /* We compile regexps into this buffer and then use it for searching. */
49:
50: struct re_pattern_buffer searchbuf;
51:
52: char search_fastmap[0400];
53:
54: /* Last regexp we compiled */
55:
56: Lisp_Object last_regexp;
57:
58: /* Every call to re_match, etc., must pass &search_regs as the regs argument
59: unless you can show it is unnecessary (i.e., if re_match is certainly going
60: to be called again before region-around-match can be called). */
61:
62: static struct re_registers search_regs;
63:
64: /* error condition signalled when regexp compile_pattern fails */
65:
66: Lisp_Object Qinvalid_regexp;
67:
68: /* Compile a regexp and signal a Lisp error if anything goes wrong. */
69:
70: compile_pattern (pattern, bufp, translate)
71: Lisp_Object pattern;
72: struct re_pattern_buffer *bufp;
73: char *translate;
74: {
75: char *val;
76: Lisp_Object dummy;
77:
78: if (EQ (pattern, last_regexp)
79: && translate == bufp->translate)
80: return;
81: last_regexp = Qnil;
82: bufp->translate = translate;
83: val = re_compile_pattern (XSTRING (pattern)->data,
84: XSTRING (pattern)->size,
85: bufp);
86: if (val)
87: {
88: dummy = build_string (val);
89: while (1)
90: Fsignal (Qinvalid_regexp, Fcons (dummy, Qnil));
91: }
92: last_regexp = pattern;
93: return;
94: }
95:
96: /* Error condition used for failing searches */
97: Lisp_Object Qsearch_failed;
98:
99: Lisp_Object
100: signal_failure (arg)
101: Lisp_Object arg;
102: {
103: Fsignal (Qsearch_failed, Fcons (arg, Qnil));
104: return Qnil;
105: }
106:
107: DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
108: "t if text after point matches regular expression PAT.")
109: (string)
110: Lisp_Object string;
111: {
112: Lisp_Object val;
113: unsigned char *p1, *p2;
114: int s1, s2;
115: register int i;
116:
117: CHECK_STRING (string, 0);
118: compile_pattern (string, &searchbuf,
119: !NULL (bf_cur->case_fold_search) ? (char *) downcase_table : 0);
120:
121: immediate_quit = 1;
122: QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
123:
124: /* Get pointers and sizes of the two strings
125: that make up the visible portion of the buffer. */
126:
127: p1 = bf_p1 + bf_head_clip;
128: s1 = bf_s1 - (bf_head_clip - 1);
129: p2 = bf_p2 + bf_s1 + 1;
130: s2 = bf_s2 - bf_tail_clip;
131: if (s1 < 0)
132: {
133: p2 -= s1;
134: s2 += s1;
135: s1 = 0;
136: }
137: if (s2 < 0)
138: {
139: s1 += s2;
140: s2 = 0;
141: }
142:
143: val = (0 <= re_match_2 (&searchbuf, p1, s1, p2, s2,
144: point - FirstCharacter, &search_regs,
145: NumCharacters + 1 - FirstCharacter)
146: ? Qt : Qnil);
147: for (i = 0; i < RE_NREGS; i++)
148: if (search_regs.start[i] >= 0)
149: {
150: search_regs.start[i] += FirstCharacter;
151: search_regs.end[i] += FirstCharacter;
152: }
153: immediate_quit = 0;
154: return val;
155: }
156:
157: DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
158: "Return index of start of first match for REGEXP in STRING, or nil.\n\
159: If third arg START is non-nil, start search at that index in STRING.\n\
160: For index of first char beyond the match, do (match-end 0).\n\
161: match-end and match-beginning also give indices of substrings\n\
162: matched by parenthesis constructs in the pattern.")
163: (regexp, string, start)
164: Lisp_Object regexp, string, start;
165: {
166: int val;
167: int s;
168:
169: CHECK_STRING (regexp, 0);
170: CHECK_STRING (string, 1);
171:
172: if (NULL (start))
173: s = 0;
174: else
175: {
176: int len = XSTRING (string)->size;
177:
178: CHECK_NUMBER (start, 2);
179: s = XINT (start);
180: if (s < 0 && -s <= len)
181: s = len - s;
182: else if (0 > s || s > len)
183: args_out_of_range (string, start);
184: }
185:
186: compile_pattern (regexp, &searchbuf,
187: !NULL (bf_cur->case_fold_search) ? (char *) downcase_table : 0);
188: immediate_quit = 1;
189: val = re_search (&searchbuf, XSTRING (string)->data, XSTRING (string)->size,
190: s, XSTRING (string)->size - s, &search_regs);
191: immediate_quit = 0;
192: if (val < 0) return Qnil;
193: return make_number (val);
194: }
195:
196: scan_buffer (target, pos, cnt, shortage)
197: int *shortage, pos;
198: register int cnt, target;
199: {
200: int lim = ((cnt > 0) ? NumCharacters : FirstCharacter);
201: int direction = ((cnt > 0) ? 1 : -1);
202: register int lim0;
203: unsigned char *base;
204: register unsigned char *cursor, *limit;
205:
206: if (shortage != 0)
207: *shortage = 0;
208:
209: immediate_quit = 1;
210:
211: if (cnt > 0)
212: while (pos != lim + 1)
213: {
214: lim0 = BufferSafeCeiling (pos);
215: lim0 = min (lim, lim0);
216: limit = &CharAt (lim0) + 1;
217: base = (cursor = &CharAt (pos));
218: while (1)
219: {
220: while (*cursor != target && ++cursor != limit)
221: ;
222: if (cursor != limit)
223: {
224: if (--cnt == 0)
225: {
226: immediate_quit = 0;
227: return (pos + cursor - base + 1);
228: }
229: else
230: if (++cursor == limit)
231: break;
232: }
233: else
234: break;
235: }
236: pos += cursor - base;
237: }
238: else
239: {
240: pos--; /* first character we scan */
241: while (pos > lim - 1)
242: { /* we WILL scan under pos */
243: lim0 = BufferSafeFloor (pos);
244: lim0 = max (lim, lim0);
245: limit = &CharAt (lim0) - 1;
246: base = (cursor = &CharAt (pos));
247: cursor++;
248: while (1)
249: {
250: while (--cursor != limit && *cursor != target)
251: ;
252: if (cursor != limit)
253: {
254: if (++cnt == 0)
255: {
256: immediate_quit = 0;
257: return (pos + cursor - base + 1);
258: }
259: }
260: else
261: break;
262: }
263: pos += cursor - base;
264: }
265: }
266: immediate_quit = 0;
267: if (shortage != 0)
268: *shortage = cnt * direction;
269: return (pos + ((direction == 1 ? 0 : 1)));
270: }
271:
272: int
273: find_next_newline (from, cnt)
274: register int from, cnt;
275: {
276: return (scan_buffer ('\n', from, cnt, (int *) 0));
277: }
278:
279: DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
280: "Move point forward, stopping before a char not in CHARS, or at position LIM.\n\
281: CHARS is like the inside of a [...] in a regular expression\n\
282: except that ] is never special and \\ quotes ^, - or \\.\n\
283: Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
284: With arg \"^a-zA-Z\", skips nonletters stopping before first letter.")
285: (string, lim)
286: Lisp_Object string, lim;
287: {
288: skip_chars (1, string, lim);
289: return Qnil;
290: }
291:
292: DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
293: "Move point backward, stopping after a char not in CHARS, or at position LIM.\n\
294: See skip-chars-forward for details.")
295: (string, lim)
296: Lisp_Object string, lim;
297: {
298: skip_chars (0, string, lim);
299: return Qnil;
300: }
301:
302: skip_chars (forwardp, string, lim)
303: int forwardp;
304: Lisp_Object string, lim;
305: {
306: register unsigned char *p, *pend;
307: register unsigned char c;
308: unsigned char fastmap[0400];
309: int negate = 0;
310: register int i;
311:
312: CHECK_STRING (string, 0);
313:
314: if (NULL (lim))
315: XSETINT (lim, forwardp ? NumCharacters + 1 : FirstCharacter);
316: else
317: CHECK_NUMBER_COERCE_MARKER (lim, 1);
318:
319: p = XSTRING (string)->data;
320: pend = p + XSTRING (string)->size;
321: bzero (fastmap, sizeof fastmap);
322:
323: if (p != pend && *p == '^')
324: {
325: negate = 1; p++;
326: }
327:
328: /* Find the characters specified and set their elements of fastmap. */
329:
330: while (p != pend)
331: {
332: c = *p++;
333: if (c == '\\')
334: {
335: if (p == pend) break;
336: c = *p++;
337: }
338: if (p != pend && *p == '-')
339: {
340: p++;
341: if (p == pend) break;
342: while (c <= *p)
343: {
344: fastmap[c] = 1;
345: c++;
346: }
347: p++;
348: }
349: else
350: fastmap[c] = 1;
351: }
352:
353: /* If ^ was the first character, complement the fastmap. */
354:
355: if (negate)
356: for (i = 0; i < sizeof fastmap; i++)
357: fastmap[i] ^= 1;
358:
359: immediate_quit = 1;
360: if (forwardp)
361: {
362: while (point < XINT (lim) && fastmap[CharAt (point)])
363: PointRight (1);
364: }
365: else
366: {
367: while (point > XINT (lim) && fastmap[CharAt (point - 1)])
368: PointLeft (1);
369: }
370: immediate_quit = 0;
371: }
372:
373: /* Subroutines of Lisp buffer search functions. */
374:
375: static Lisp_Object
376: search_command (string, bound, noerror, count, direction, RE)
377: Lisp_Object string, bound, noerror, count;
378: int direction;
379: int RE;
380: {
381: register int np;
382: int lim;
383: int n = direction;
384:
385: if (!NULL (count))
386: {
387: CHECK_NUMBER (count, 3);
388: n *= XINT (count);
389: }
390:
391: CHECK_STRING (string, 0);
392: if (NULL (bound))
393: lim = n > 0 ? NumCharacters + 1 : FirstCharacter;
394: else
395: {
396: CHECK_NUMBER_COERCE_MARKER (bound, 1);
397: lim = XINT (bound);
398: if (n > 0 ? lim < point : lim > point)
399: error ("Invalid search bound (wrong side of point)");
400: if (lim > NumCharacters + 1)
401: lim = NumCharacters + 1;
402: if (lim < FirstCharacter)
403: lim = FirstCharacter;
404: }
405:
406: np = search_buffer (string, point, lim, n, RE,
407: !NULL (bf_cur->case_fold_search) ? downcase_table : 0);
408: if (np <= 0)
409: {
410: if (NULL (noerror))
411: return signal_failure (string);
412: if (!EQ (noerror, Qt))
413: {
414: if (lim < FirstCharacter || lim > NumCharacters + 1)
415: abort ();
416: SetPoint (lim);
417: }
418: return Qnil;
419: }
420:
421: if (np < FirstCharacter || np > NumCharacters + 1)
422: abort ();
423:
424: SetPoint (np);
425:
426: return Qt;
427: }
428:
429: /* search for the n'th occurrence of `string' in the current buffer,
430: starting at position `from' and stopping at position `lim',
431: treating `pat' as a literal string if `RE' is false or as
432: a regular expression if `RE' is true.
433:
434: If `n' is positive, searching is forward and `lim' must be greater than `from'.
435: If `n' is negative, searching is backward and `lim' must be less than `from'.
436:
437: Returns -x if only `n'-x occurrences found (x > 0),
438: or else the position at the beginning of the `n'th occurrence (if searching backward)
439: or the end (if searching forward). */
440:
441: /* INTERFACE CHANGE ALERT!!!! search_buffer now returns -x if only */
442: /* n-x occurences are found. */
443:
444: search_buffer (string, pos, lim, n, RE, trt)
445: Lisp_Object string;
446: int pos;
447: int lim;
448: int n;
449: int RE;
450: register unsigned char *trt;
451: {
452: int len = XSTRING (string)->size;
453: unsigned char *base_pat = XSTRING (string)->data;
454: register int *BM_tab;
455: int *BM_tab_base;
456: register int direction = ((n > 0) ? 1 : -1);
457: register int dirlen;
458: int infinity, limit, k, stride_for_teases;
459: register unsigned char *pat, *cursor, *p_limit;
460: register int i, j;
461: unsigned char *p1, *p2;
462: int s1, s2;
463:
464:
465: if (!len)
466: return (0);
467:
468: if (RE)
469: compile_pattern (string, &searchbuf, (char *) trt);
470:
471: if (RE /* Here we detect whether the */
472: /* generality of an RE search is */
473: /* really needed. */
474: && *(searchbuf.buffer) == (char) exactn /* first item is "exact match" */
475: && searchbuf.buffer[1] + 2 == searchbuf.used) /*first is ONLY item */
476: {
477: RE = 0; /* can do straight (non RE) search */
478: pat = (base_pat = (unsigned char *) searchbuf.buffer + 2);
479: /* trt already applied */
480: len = searchbuf.used - 2;
481: }
482: else if (!RE)
483: {
484: pat = (unsigned char *) alloca (len);
485:
486: for (i = len; i--;) /* Copy the pattern; apply trt */
487: *pat++ = (((int) trt) ? trt [*base_pat++] : *base_pat++);
488: pat -= len; base_pat = pat;
489: }
490:
491: if (RE)
492: {
493: immediate_quit = 1; /* Quit immediately if user types ^G,
494: because letting this function finish
495: can take too long. */
496: QUIT; /* Do a pending quit right away,
497: to avoid paradoxical behavior */
498: /* Get pointers and sizes of the two strings
499: that make up the visible portion of the buffer. */
500:
501: p1 = bf_p1 + bf_head_clip;
502: s1 = bf_s1 - (bf_head_clip - 1);
503: p2 = bf_p2 + bf_s1 + 1;
504: s2 = bf_s2 - bf_tail_clip;
505: if (s1 < 0)
506: {
507: p2 -= s1;
508: s2 += s1;
509: s1 = 0;
510: }
511: if (s2 < 0)
512: {
513: s1 += s2;
514: s2 = 0;
515: }
516: while (n < 0)
517: {
518: if (re_search_2 (&searchbuf, p1, s1, p2, s2,
519: pos - FirstCharacter, lim - pos, &search_regs,
520: /* Don't allow match past current point */
521: pos - FirstCharacter)
522: >= 0)
523: {
524: j = FirstCharacter;
525: for (i = 0; i < RE_NREGS; i++)
526: if (search_regs.start[i] >= 0)
527: {
528: search_regs.start[i] += j;
529: search_regs.end[i] += j;
530: }
531: /* Set pos to the new position. */
532: pos = search_regs.start[0];
533: }
534: else
535: {
536: immediate_quit = 0;
537: return (n);
538: }
539: n++;
540: }
541: while (n > 0)
542: {
543: if (re_search_2 (&searchbuf, p1, s1, p2, s2,
544: pos - FirstCharacter, lim - pos, &search_regs,
545: lim - FirstCharacter)
546: >= 0)
547: {
548: j = FirstCharacter;
549: for (i = 0; i < RE_NREGS; i++)
550: if (search_regs.start[i] >= 0)
551: {
552: search_regs.start[i] += j;
553: search_regs.end[i] += j;
554: }
555: pos = search_regs.end[0];
556: }
557: else
558: {
559: immediate_quit = 0;
560: return (0 - n);
561: }
562: n--;
563: }
564: immediate_quit = 0;
565: return (pos);
566: }
567: else /* non-RE case */
568: {
569: #ifdef C_ALLOCA
570: int BM_tab_space[0400];
571: BM_tab = &BM_tab_space[0];
572: #else
573: BM_tab = (int *) alloca (0400 * sizeof (int));
574: #endif
575: /* The general approach is that we are going to maintain that we know */
576: /* the first (closest to the present position, in whatever direction */
577: /* we're searching) character that could possibly be the last */
578: /* (furthest from present position) character of a valid match. We */
579: /* advance the state of our knowledge by looking at that character */
580: /* and seeing whether it indeed matches the last character of the */
581: /* pattern. If it does, we take a closer look. If it does not, we */
582: /* move our pointer (to putative last characters) as far as is */
583: /* logically possible. This amount of movement, which I call a */
584: /* stride, will be the length of the pattern if the actual character */
585: /* appears nowhere in the pattern, otherwise it will be the distance */
586: /* from the last occurrence of that character to the end of the */
587: /* pattern. */
588: /* As a coding trick, an enormous stride is coded into the table for */
589: /* characters that match the last character. This allows use of only */
590: /* a single test, a test for having gone past the end of the */
591: /* permissible match region, to test for both possible matches (when */
592: /* the stride goes past the end immediately) and failure to */
593: /* match (where you get nudged past the end one stride at a time). */
594:
595: /* Here we make a "mickey mouse" BM table. The stride of the search */
596: /* is determined only by the last character of the putative match. */
597: /* If that character does not match, we will stride the proper */
598: /* distance to propose a match that superimposes it on the last */
599: /* instance of a character that matches it (per trt), or misses */
600: /* it entirely if there is none. */
601:
602: dirlen = len * direction;
603: infinity = dirlen - (lim + pos + len + len) * direction;
604: if (direction < 0)
605: pat = (base_pat += len - 1);
606: BM_tab_base = BM_tab;
607: BM_tab += 0400;
608: j = dirlen; /* to get it in a register */
609: /* A character that does not appear in the pattern induces a */
610: /* stride equal to the pattern length. */
611: while (BM_tab_base != BM_tab)
612: {
613: *--BM_tab = j;
614: *--BM_tab = j;
615: *--BM_tab = j;
616: *--BM_tab = j;
617: }
618: i = 0;
619: while (i != infinity)
620: {
621: j = pat[i]; i += direction;
622: if (i == dirlen) i = infinity;
623: if ((int) trt)
624: {
625: k = (j = trt[j]);
626: if (i == infinity)
627: stride_for_teases = BM_tab[j];
628: BM_tab[j] = dirlen - i;
629: /* A translation table is followed by its inverse -- see */
630: /* comment following downcase_table for details */
631:
632: while ((j = trt[0400+j]) != k)
633: BM_tab[j] = dirlen - i;
634: }
635: else
636: {
637: if (i == infinity)
638: stride_for_teases = BM_tab[j];
639: BM_tab[j] = dirlen - i;
640: }
641: /* stride_for_teases tells how much to stride if we get a */
642: /* match on the far character but are subsequently */
643: /* disappointed, by recording what the stride would have been */
644: /* for that character if the last character had been */
645: /* different. */
646: }
647: infinity = dirlen - infinity;
648: pos += dirlen - ((direction > 0) ? direction : 0);
649: /* loop invariant - pos points at where last char (first char if reverse)
650: of pattern would align in a possible match. */
651: while (n != 0)
652: {
653: if ((lim - pos - (direction > 0)) * direction < 0)
654: return (n * (0 - direction));
655: /* First we do the part we can by pointers (maybe nothing) */
656: QUIT;
657: pat = base_pat;
658: limit = pos - dirlen + direction;
659: limit = ((direction > 0)
660: ? BufferSafeCeiling (limit)
661: : BufferSafeFloor (limit));
662: /* LIMIT is now the last (not beyond-last!) value
663: POS can take on without hitting edge of buffer or the gap. */
664: limit = ((direction > 0)
665: ? min (lim - 1, min (limit, pos + 20000))
666: : max (lim, max (limit, pos - 20000)));
667: if ((limit - pos) * direction > 20)
668: {
669: p_limit = &CharAt (limit);
670: p2 = (cursor = &CharAt (pos));
671: /* In this loop, pos + cursor - p2 is the surrogate for pos */
672: while (1) /* use one cursor setting as long as i can */
673: {
674: if (direction > 0) /* worth duplicating */
675: {
676: /* Use signed comparison if appropriate
677: to make cursor+infinity sure to be > p_limit.
678: Assuming that the buffer lies in a range of addresses
679: that are all "positive" (as ints) or all "negative",
680: either kind of comparison will work as long
681: as we don't step by infinity. So pick the kind
682: that works when we do step by infinity. */
683: if ((int) (p_limit + infinity) > (int) p_limit)
684: while ((int) cursor <= (int) p_limit)
685: cursor += BM_tab[*cursor];
686: else
687: while ((unsigned int) cursor <= (unsigned int) p_limit)
688: cursor += BM_tab[*cursor];
689: }
690: else
691: {
692: if ((int) (p_limit + infinity) < (int) p_limit)
693: while ((int) cursor >= (int) p_limit)
694: cursor += BM_tab[*cursor];
695: else
696: while ((unsigned int) cursor >= (unsigned int) p_limit)
697: cursor += BM_tab[*cursor];
698: }
699: /* If you are here, cursor is beyond the end of the searched region. */
700: /* This can happen if you match on the far character of the pattern, */
701: /* because the "stride" of that character is infinity, a number able */
702: /* to throw you well beyond the end of the search. It can also */
703: /* happen if you fail to match within the permitted region and would */
704: /* otherwise try a character beyond that region */
705: if ((cursor - p_limit) * direction <= len)
706: break; /* a small overrun is genuine */
707: cursor -= infinity; /* large overrun = hit */
708: i = dirlen - direction;
709: if ((int) trt)
710: {
711: while ((i -= direction) + direction != 0)
712: if (pat[i] != trt[*(cursor -= direction)])
713: break;
714: }
715: else
716: {
717: while ((i -= direction) + direction != 0)
718: if (pat[i] != *(cursor -= direction))
719: break;
720: }
721: cursor += dirlen - i - direction; /* fix cursor */
722: if (i + direction == 0)
723: {
724: cursor -= direction;
725: search_regs.start[0]
726: = pos + cursor - p2 + ((direction > 0)
727: ? 1 - len : 0);
728: search_regs.end[0] = len + search_regs.start[0];
729: if ((n -= direction) != 0)
730: cursor += dirlen; /* to resume search */
731: else
732: return ((direction > 0)
733: ? search_regs.end[0] : search_regs.start[0]);
734: }
735: else
736: cursor += stride_for_teases; /* <sigh> we lose - */
737: }
738: pos += cursor - p2;
739: }
740: else
741: /* Now we'll pick up a clump that has to be done the hard */
742: /* way because it covers a discontinuity */
743: {
744: limit = ((direction > 0)
745: ? BufferSafeCeiling (pos - dirlen + 1)
746: : BufferSafeFloor (pos - dirlen - 1));
747: limit = ((direction > 0)
748: ? min (limit + len, lim - 1)
749: : max (limit - len, lim));
750: /* LIMIT is now the last value POS can have
751: and still be valid for a possible match. */
752: while (1)
753: {
754: /* This loop can be coded for space rather than */
755: /* speed because it will usually run only once. */
756: /* (the reach is at most len + 21, and typically */
757: /* does not exceed len) */
758: while ((limit - pos) * direction >= 0)
759: pos += BM_tab[CharAt(pos)];
760: /* now run the same tests to distinguish going off the */
761: /* end, a match or a phoney match. */
762: if ((pos - limit) * direction <= len)
763: break; /* ran off the end */
764: /* Found what might be a match.
765: Set POS back to last (first if reverse) char pos. */
766: pos -= infinity;
767: i = dirlen - direction;
768: while ((i -= direction) + direction != 0)
769: {
770: pos -= direction;
771: if (pat[i] != (((int) trt)
772: ? trt[CharAt(pos)]
773: : CharAt (pos)))
774: break;
775: }
776: /* Above loop has moved POS part or all the way
777: back to the first char pos (last char pos if reverse).
778: Set it once again at the last (first if reverse) char. */
779: pos += dirlen - i- direction;
780: if (i + direction == 0)
781: {
782: pos -= direction;
783: search_regs.start[0]
784: = pos + ((direction > 0) ? 1 - len : 0);
785: search_regs.end[0] = len + search_regs.start[0];
786: if ((n -= direction) != 0)
787: pos += dirlen; /* to resume search */
788: else
789: return ((direction > 0)
790: ? search_regs.end[0] : search_regs.start[0]);
791: }
792: else
793: pos += stride_for_teases;
794: }
795: }
796: /* We have done one clump. Can we continue? */
797: if ((lim - pos) * direction < 0)
798: return ((0 - n) * direction);
799: }
800: }
801: }
802:
803: /* Given a string of words separated by word delimiters,
804: compute a regexp that matches those exact words
805: separated by arbitrary punctuation. */
806:
807: static Lisp_Object
808: wordify (string)
809: Lisp_Object string;
810: {
811: register unsigned char *p, *o;
812: register int i, len, punct_count = 0, word_count = 0;
813: Lisp_Object val;
814:
815: CHECK_STRING (string, 0);
816: p = XSTRING (string)->data;
817: len = XSTRING (string)->size;
818:
819: for (i = 0; i < len; i++)
820: if (SYNTAX (p[i]) != Sword)
821: {
822: punct_count++;
823: if (i > 0 && SYNTAX (p[i-1]) == Sword) word_count++;
824: }
825: if (SYNTAX (p[len-1]) == Sword) word_count++;
826: if (!word_count) return build_string ("");
827:
828: val = make_string (p, len - punct_count + 5 * (word_count - 1) + 4);
829:
830: o = XSTRING (val)->data;
831: *o++ = '\\';
832: *o++ = 'b';
833:
834: for (i = 0; i < len; i++)
835: if (SYNTAX (p[i]) == Sword)
836: *o++ = p[i];
837: else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
838: {
839: *o++ = '\\';
840: *o++ = 'W';
841: *o++ = '\\';
842: *o++ = 'W';
843: *o++ = '*';
844: }
845:
846: *o++ = '\\';
847: *o++ = 'b';
848:
849: return val;
850: }
851:
852: DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
853: "sSearch backward: ",
854: "Search backward from point for STRING.\n\
855: Set point to the beginning of the occurrence found, and return t.\n\
856: An optional second argument bounds the search; it is a buffer position.\n\
857: The match found must not extend before that position.\n\
858: Optional third argument, if t, means if fail just return nil (no error).\n\
859: If not nil and not t, position at limit of search and return nil.\n\
860: Optional fourth argument is repeat count--search for successive occurrences.")
861: (string, bound, noerror, count)
862: Lisp_Object string, bound, noerror, count;
863: {
864: return search_command (string, bound, noerror, count, -1, 0);
865: }
866:
867: DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "sSearch: ",
868: "Search forward from point for STRING.\n\
869: Set point to the end of the occurrence found, and return t.\n\
870: An optional second argument bounds the search; it is a buffer position.\n\
871: The match found must not extend after that position.\n\
872: Optional third argument, if t, means if fail just return nil (no error).\n\
873: If not nil and not t, move to limit of search and return nil.\n\
874: Optional fourth argument is repeat count--search for successive occurrences.")
875: (string, bound, noerror, count)
876: Lisp_Object string, bound, noerror, count;
877: {
878: return search_command (string, bound, noerror, count, 1, 0);
879: }
880:
881: DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
882: "sWord search backward: ",
883: "Search backward from point for STRING, ignoring differences in punctuation.\n\
884: Set point to the beginning of the occurrence found, and return t.\n\
885: An optional second argument bounds the search; it is a buffer position.\n\
886: The match found must not extend before that position.\n\
887: Optional third argument, if t, means if fail just return nil (no error).\n\
888: If not nil and not t, move to limit of search and return nil.\n\
889: Optional fourth argument is repeat count--search for successive occurrences.")
890: (string, bound, noerror, count)
891: Lisp_Object string, bound, noerror, count;
892: {
893: return search_command (wordify (string), bound, noerror, count, -1, 1);
894: }
895:
896: DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
897: "sWord search: ",
898: "Search forward from point for STRING, ignoring differences in punctuation.\n\
899: Set point to the end of the occurrence found, and return t.\n\
900: An optional second argument bounds the search; it is a buffer position.\n\
901: The match found must not extend after that position.\n\
902: Optional third argument, if t, means if fail just return nil (no error).\n\
903: If not nil and not t, move to limit of search and return nil.\n\
904: Optional fourth argument is repeat count--search for successive occurrences.")
905: (string, bound, noerror, count)
906: Lisp_Object string, bound, noerror, count;
907: {
908: return search_command (wordify (string), bound, noerror, count, 1, 1);
909: }
910:
911: DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
912: "sRE search backward: ",
913: "Search backward from point for match for regular expression REGEXP.\n\
914: Set point to the beginning of the match, and return t.\n\
915: The match found is the one starting last in the buffer\n\
916: and yet ending before the place the origin of the search.\n\
917: An optional second argument bounds the search; it is a buffer position.\n\
918: The match found must start at or after that position.\n\
919: Optional third argument, if t, means if fail just return nil (no error).\n\
920: If not nil and not t, move to limit of search and return nil.\n\
921: Optional fourth argument is repeat count--search for successive occurrences.\n\
922: See also the functions match-beginning and match-end and replace-match.")
923: (string, bound, noerror, count)
924: Lisp_Object string, bound, noerror, count;
925: {
926: return search_command (string, bound, noerror, count, -1, 1);
927: }
928:
929: DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
930: "sRE search: ",
931: "Search forward from point for regular expression REGEXP.\n\
932: Set point to the end of the occurrence found, and return t.\n\
933: An optional second argument bounds the search; it is a buffer position.\n\
934: The match found must not extend after that position.\n\
935: Optional third argument, if t, means if fail just return nil (no error).\n\
936: If not nil and not t, move to limit of search and return nil.\n\
937: Optional fourth argument is repeat count--search for successive occurrences.\n\
938: See also the functions match-beginning and match-end and replace-match.")
939: (string, bound, noerror, count)
940: Lisp_Object string, bound, noerror, count;
941: {
942: return search_command (string, bound, noerror, count, 1, 1);
943: }
944:
945: DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 3, 0,
946: "Replace text matched by last search with NEWTEXT.\n\
947: If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
948: Otherwise convert to all caps or cap initials, like replaced text.\n\
949: If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
950: Otherwise treat \\ as special:\n\
951: \\& in NEWTEXT means substitute original matched text,\n\
952: \\N means substitute match for \\(...\\) number N,\n\
953: \\\\ means insert one \\.\n\
954: Leaves point at end of replacement text.")
955: (string, fixedcase, literal)
956: Lisp_Object string, fixedcase, literal;
957: {
958: enum { nochange, all_caps, cap_initial } case_action;
959: register int pos, last;
960: int some_multiletter_word;
961: int some_letter = 0;
962: register int c, prevc;
963: int inslen;
964:
965: CHECK_STRING (string, 0);
966:
967: case_action = nochange; /* We tried an initialization */
968: /* but some C compilers blew it */
969: if (search_regs.start[0] < FirstCharacter
970: || search_regs.start[0] > search_regs.end[0]
971: || search_regs.end[0] > NumCharacters + 1)
972: args_out_of_range(make_number (search_regs.start[0]),
973: make_number (search_regs.end[0]));
974:
975: if (NULL (fixedcase))
976: {
977: /* Decide how to casify by examining the matched text. */
978:
979: last = search_regs.end[0];
980: prevc = '\n';
981: case_action = all_caps;
982:
983: /* some_multiletter_word is set nonzero if any original word
984: is more than one letter long. */
985: some_multiletter_word = 0;
986:
987: for (pos = search_regs.start[0]; pos < last; pos++)
988: {
989: c = CharAt (pos);
990: if (LOWERCASEP (c))
991: {
992: /* Cannot be all caps if any original char is lower case */
993:
994: case_action = cap_initial;
995: if (SYNTAX (prevc) != Sword)
996: {
997: /* Cannot even be cap initials
998: if some original initial is lower case */
999: case_action = nochange;
1000: break;
1001: }
1002: else
1003: some_multiletter_word = 1;
1004: }
1005: else if (!NOCASEP (c))
1006: {
1007: some_letter = 1;
1008: if (!some_multiletter_word && SYNTAX (prevc) == Sword)
1009: some_multiletter_word = 1;
1010: }
1011:
1012: prevc = c;
1013: }
1014:
1015: /* Do not make new text all caps
1016: if the original text contained only single letter words. */
1017: if (case_action == all_caps && !some_multiletter_word)
1018: case_action = cap_initial;
1019:
1020: if (!some_letter) case_action = nochange;
1021: }
1022:
1023: SetPoint (search_regs.end[0]);
1024: if (!NULL (literal))
1025: Finsert (1, &string);
1026: else
1027: {
1028: for (pos = 0; pos < XSTRING (string)->size; pos++)
1029: {
1030: c = XSTRING (string)->data[pos];
1031: if (c == '\\')
1032: {
1033: c = XSTRING (string)->data[++pos];
1034: if (c == '&')
1035: place (search_regs.start[0],
1036: search_regs.end[0]);
1037: else if (c >= '1' && c <= RE_NREGS + '0')
1038: {
1039: if (search_regs.start[c - '0'] >= 1)
1040: place (search_regs.start[c - '0'],
1041: search_regs.end[c - '0']);
1042: }
1043: else
1044: insert_char (c);
1045: }
1046: else
1047: insert_char (c);
1048: }
1049: }
1050:
1051: inslen = point - (search_regs.end[0]);
1052: del_range (search_regs.start[0], search_regs.end[0]);
1053:
1054: if (case_action == all_caps)
1055: Fupcase_region (make_number (point - inslen), make_number (point));
1056: else if (case_action == cap_initial)
1057: upcase_initials_region (make_number (point - inslen), make_number (point));
1058: return Qnil;
1059: }
1060:
1061: static
1062: place (l1, l2)
1063: int l1, l2;
1064: {
1065: if (l1 < FirstCharacter)
1066: l1 = FirstCharacter;
1067: if (l1 >= NumCharacters + 1)
1068: l1 = NumCharacters + 1;
1069: if (l2 < l1) l2 = l1;
1070: if (l2 >= NumCharacters + 1)
1071: l2 = NumCharacters + 1;
1072: move_gap (point);
1073: InsCStr (&CharAt (l1), l2 - l1);
1074: }
1075:
1076: static Lisp_Object
1077: match_limit (num, beginningp)
1078: Lisp_Object num;
1079: int beginningp;
1080: {
1081: register int n;
1082:
1083: CHECK_NUMBER (num, 0);
1084: n = XINT (num);
1085: if (n < 0 || n >= RE_NREGS)
1086: args_out_of_range (num, make_number (RE_NREGS));
1087: if (search_regs.start[n] < 0)
1088: return Qnil;
1089: return (make_number ((beginningp) ? search_regs.start[n]
1090: : search_regs.end[n]));
1091: }
1092:
1093: DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
1094: "Return the character number of start of text matched by last regexp searched for.\n\
1095: ARG, a number, specifies which parenthesized expression in the last regexp.\n\
1096: Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
1097: Zero means the entire text matched by the whole regexp.")
1098: (num)
1099: Lisp_Object num;
1100: {
1101: return match_limit (num, 1);
1102: }
1103:
1104: DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
1105: "Return the character number of end of text matched by last regexp searched for.\n\
1106: ARG, a number, specifies which parenthesized expression in the last regexp.\n\
1107: Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
1108: Zero means the entire text matched by the whole regexp.")
1109: (num)
1110: Lisp_Object num;
1111: {
1112: return match_limit (num, 0);
1113: }
1114:
1115: DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 0, 0,
1116: "Return list containing all info on what the last search matched.\n\
1117: Element 2N is (match-beginning N); element 2N + 1 is (match-end N).\n\
1118: All the elements are markers or nil (nil if the Nth pair didn't match).")
1119: ()
1120: {
1121: Lisp_Object data[2 * RE_NREGS];
1122: int i, len;
1123:
1124: len = -1;
1125: for (i = 0; i < RE_NREGS; i++)
1126: {
1127: int start = search_regs.start[i];
1128: if (start >= 0)
1129: {
1130: data[2 * i] = Fmake_marker ();
1131: Fset_marker (data[2 * i], make_number (start), Qnil);
1132: data[2 * i + 1] = Fmake_marker ();
1133: Fset_marker (data[2 * i + 1],
1134: make_number (search_regs.end[i]), Qnil);
1135: len = i;
1136: }
1137: else
1138: data[2 * i] = data [2 * i + 1] = Qnil;
1139: }
1140: return Flist (2 * len + 2, data);
1141: }
1142:
1143:
1144: DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
1145: "Set internal data on last search match from elements of LIST.\n\
1146: LIST should have been created by calling match-data previously.")
1147: (list)
1148: register Lisp_Object list;
1149: {
1150: register int i;
1151: register Lisp_Object marker;
1152:
1153: if (!CONSP (list) && !NULL (list))
1154: list = wrong_type_argument (Qconsp, list, 0);
1155:
1156: for (i = 0; i < RE_NREGS; i++)
1157: {
1158: marker = Fcar (list);
1159: if (NULL (marker))
1160: {
1161: search_regs.start[i] = -1;
1162: list = Fcdr (list);
1163: }
1164: else
1165: {
1166: CHECK_MARKER (marker, 0);
1167: search_regs.start[i] = marker_position (marker);
1168: list = Fcdr (list);
1169:
1170: marker = Fcar (list);
1171: CHECK_MARKER (marker, 0);
1172: search_regs.end[i] = marker_position (marker);
1173: }
1174: list = Fcdr (list);
1175: }
1176:
1177: return Qnil;
1178: }
1179:
1180: /* Quote a string to inactivate reg-expr chars */
1181:
1182: DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
1183: "Return a regexp string which matches exactly STRING and nothing else.")
1184: (str)
1185: Lisp_Object str;
1186: {
1187: register unsigned char *p, *cp, *end;
1188: register int size;
1189: Lisp_Object ostr;
1190:
1191: CHECK_STRING (str, 0);
1192: size = XSTRING (str)->size;
1193:
1194: /* Increment `size' for the escapes we will need to insert */
1195:
1196: for (cp = XSTRING (str)->data, end = cp + size; cp != end; cp++)
1197: if (*cp == '[' || *cp == ']'
1198: || *cp == '*' || *cp == '.' || *cp == '\\'
1199: || *cp == '?' || *cp == '+'
1200: || *cp == '^' || *cp == '$')
1201: size++;
1202:
1203: ostr = Fmake_string (make_number (size), make_number (0));
1204:
1205: /* Now copy the data into the new string, inserting escapes. */
1206:
1207: p = XSTRING (ostr)->data;
1208: for (cp = XSTRING (str)->data; cp != end; cp++)
1209: {
1210: if (*cp == '[' || *cp == ']'
1211: || *cp == '*' || *cp == '.' || *cp == '\\'
1212: || *cp == '?' || *cp == '+'
1213: || *cp == '^' || *cp == '$')
1214: *p++ = '\\';
1215: *p++ = *cp;
1216: }
1217: return ostr;
1218: }
1219:
1220: /* This code should be unzapped when there comes to be multiple */
1221: /* translation tables. It has been certified on various cases. */
1222: /*
1223: void
1224: compute_trt_inverse (trt)
1225: register unsigned char *trt;
1226: {
1227: register int i = 0400;
1228: register unsigned char c, q;
1229:
1230: while (i--)
1231: trt[0400+i] = i;
1232: i = 0400;
1233: while (i--)
1234: {
1235: if ((q = trt[i]) != (unsigned char) i)
1236: {
1237: c = trt[q + 0400];
1238: trt[q + 0400] = i;
1239: trt[0400 + i] = c;
1240: }
1241: }
1242: }
1243: */
1244:
1245: syms_of_search ()
1246: {
1247: register int i;
1248:
1249: for (i = 0; i < 0400; i++)
1250: {
1251: downcase_table[i] = (i >= 'A' && i <= 'Z') ? i + 040 : i;
1252: /* We do this instead of using compute_trt_inverse to save space. */
1253: /* Does it? */
1254: downcase_table[0400+i]
1255: = ((i >= 'A' && i <= 'Z')
1256: ? i + ('a' - 'A')
1257: : ((i >= 'a' && i <= 'z')
1258: ? i + ('A' - 'a')
1259: : i));
1260: }
1261: /* Use this instead when there come to be multiple translation tables.
1262: compute_trt_inverse (downcase_table); */
1263:
1264: searchbuf.allocated = 100;
1265: searchbuf.buffer = (char *) malloc (searchbuf.allocated);
1266: searchbuf.fastmap = search_fastmap;
1267:
1268: Qsearch_failed = intern ("search-failed");
1269: staticpro (&Qsearch_failed);
1270: Qinvalid_regexp = intern ("invalid-regexp");
1271: staticpro (&Qinvalid_regexp);
1272:
1273: Fput (Qsearch_failed, Qerror_conditions,
1274: Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
1275: Fput (Qsearch_failed, Qerror_message,
1276: build_string ("Search failed"));
1277:
1278: Fput (Qinvalid_regexp, Qerror_conditions,
1279: Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
1280: Fput (Qinvalid_regexp, Qerror_message,
1281: build_string ("Invalid regexp"));
1282:
1283: last_regexp = Qnil;
1284: staticpro (&last_regexp);
1285:
1286: defsubr (&Sstring_match);
1287: defsubr (&Slooking_at);
1288: defsubr (&Sskip_chars_forward);
1289: defsubr (&Sskip_chars_backward);
1290: defsubr (&Ssearch_forward);
1291: defsubr (&Ssearch_backward);
1292: defsubr (&Sword_search_forward);
1293: defsubr (&Sword_search_backward);
1294: defsubr (&Sre_search_forward);
1295: defsubr (&Sre_search_backward);
1296: defsubr (&Sreplace_match);
1297: defsubr (&Smatch_beginning);
1298: defsubr (&Smatch_end);
1299: defsubr (&Smatch_data);
1300: defsubr (&Sstore_match_data);
1301: defsubr (&Sregexp_quote);
1302: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.