|
|
1.1 root 1: /* String search routines for GNU Emacs.
2: Copyright (C) 1985, 1986, 1987 Free Software Foundation, Inc.
3:
4: This file is part of GNU Emacs.
5:
6: GNU Emacs is free software; you can redistribute it and/or modify
7: it under the terms of the GNU General Public License as published by
8: the Free Software Foundation; either version 1, or (at your option)
9: any later version.
10:
11: GNU Emacs is distributed in the hope that it will be useful,
12: but WITHOUT ANY WARRANTY; without even the implied warranty of
13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14: GNU General Public License for more details.
15:
16: You should have received a copy of the GNU General Public License
17: along with GNU Emacs; see the file COPYING. If not, write to
18: the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19:
20:
21: #include "config.h"
22: #include "lisp.h"
23: #include "syntax.h"
24: #include "buffer.h"
25: #include "commands.h"
26: #include "regex.h"
27:
28: #define max(a, b) ((a) > (b) ? (a) : (b))
29: #define min(a, b) ((a) < (b) ? (a) : (b))
30:
31: unsigned char downcase_table[01000] = {0}; /* folds upper to lower case */
32: /* A WHEEL WILL FALL OFF IF, IN A trt, CHARACTER A */
33: /* TRANSLATES INTO CHARACTER B AND CHARACTER B DOES NOT */
34: /* ALSO TRANSLATE INTO CHARACTER B. */
35: /* If that constraint is met, compute_trt_inverse will follow a */
36: /* translation table with its inverse. The inverse of a table */
37: /* follows the table at table[0400]. The form of this is that if */
38: /* table[a]=b then the chain starting at table[0400+b], linked by */
39: /* link(x)=table[0400+x] and ended by b must include a. */
40:
41: /* At present compute_trt_inverse is blinded and the inverse for this */
42: /* particular table is created by a single-purpose loop. */
43: /* compute_trt_inverse has been tested on the following cases: */
44: /* trt[x]=x, trt[x]=(+ 3 (logand x, 0370)), trt[x]='a', and the */
45: /* downcase table. */
46:
47: /* We compile regexps into this buffer and then use it for searching. */
48:
49: struct re_pattern_buffer searchbuf;
50:
51: extern int re_max_failures;
52:
53: char search_fastmap[0400];
54:
55: /* Last regexp we compiled */
56:
57: Lisp_Object last_regexp;
58:
59: /* Every call to re_match, etc., must pass &search_regs as the regs argument
60: unless you can show it is unnecessary (i.e., if re_match is certainly going
61: to be called again before region-around-match can be called). */
62:
63: static struct re_registers search_regs;
64:
65: /* error condition signalled when regexp compile_pattern fails */
66:
67: Lisp_Object Qinvalid_regexp;
68:
69: /* Compile a regexp and signal a Lisp error if anything goes wrong. */
70:
71: compile_pattern (pattern, bufp, translate)
72: Lisp_Object pattern;
73: struct re_pattern_buffer *bufp;
74: char *translate;
75: {
76: char *val;
77: Lisp_Object dummy;
78:
79: if (EQ (pattern, last_regexp)
80: && translate == bufp->translate)
81: return;
82:
83: last_regexp = Qnil;
84: bufp->translate = translate;
85: val = re_compile_pattern (XSTRING (pattern)->data,
86: XSTRING (pattern)->size,
87: bufp);
88: if (val)
89: {
90: dummy = build_string (val);
91: while (1)
92: Fsignal (Qinvalid_regexp, Fcons (dummy, Qnil));
93: }
94: last_regexp = pattern;
95: return;
96: }
97:
98: /* Error condition used for failing searches */
99: Lisp_Object Qsearch_failed;
100:
101: Lisp_Object
102: signal_failure (arg)
103: Lisp_Object arg;
104: {
105: Fsignal (Qsearch_failed, Fcons (arg, Qnil));
106: return Qnil;
107: }
108:
109: DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
110: "t if text after point matches regular expression PAT.")
111: (string)
112: Lisp_Object string;
113: {
114: Lisp_Object val;
115: unsigned char *p1, *p2;
116: int s1, s2;
117: register int i;
118:
119: CHECK_STRING (string, 0);
120: compile_pattern (string, &searchbuf,
121: !NULL (current_buffer->case_fold_search) ? (char *) downcase_table : 0);
122:
123: immediate_quit = 1;
124: QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
125:
126: /* Get pointers and sizes of the two strings
127: that make up the visible portion of the buffer. */
128:
129: p1 = BEGV_ADDR;
130: s1 = GPT - BEGV;
131: p2 = GAP_END_ADDR;
132: s2 = ZV - GPT;
133: if (s1 < 0)
134: {
135: p2 = p1;
136: s2 = ZV - BEGV;
137: s1 = 0;
138: }
139: if (s2 < 0)
140: {
141: s1 = ZV - BEGV;
142: s2 = 0;
143: }
144:
145: val = (0 <= re_match_2 (&searchbuf, p1, s1, p2, s2,
146: point - BEGV, &search_regs, ZV - BEGV)
147: ? Qt : Qnil);
148: for (i = 0; i < RE_NREGS; i++)
149: if (search_regs.start[i] >= 0)
150: {
151: search_regs.start[i] += BEGV;
152: search_regs.end[i] += BEGV;
153: }
154: immediate_quit = 0;
155: return val;
156: }
157:
158: DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
159: "Return index of start of first match for REGEXP in STRING, or nil.\n\
160: If third arg START is non-nil, start search at that index in STRING.\n\
161: For index of first char beyond the match, do (match-end 0).\n\
162: match-end and match-beginning also give indices of substrings\n\
163: matched by parenthesis constructs in the pattern.")
164: (regexp, string, start)
165: Lisp_Object regexp, string, start;
166: {
167: int val;
168: int s;
169:
170: CHECK_STRING (regexp, 0);
171: CHECK_STRING (string, 1);
172:
173: if (NULL (start))
174: s = 0;
175: else
176: {
177: int len = XSTRING (string)->size;
178:
179: CHECK_NUMBER (start, 2);
180: s = XINT (start);
181: if (s < 0 && -s <= len)
182: s = len - s;
183: else if (0 > s || s > len)
184: args_out_of_range (string, start);
185: }
186:
187: compile_pattern (regexp, &searchbuf,
188: !NULL (current_buffer->case_fold_search) ? (char *) downcase_table : 0);
189: immediate_quit = 1;
190: val = re_search (&searchbuf, XSTRING (string)->data, XSTRING (string)->size,
191: s, XSTRING (string)->size - s, &search_regs);
192: immediate_quit = 0;
193: if (val == -2)
194: error ("Overflow in regular expression matching");
195: if (val < 0) return Qnil;
196: return make_number (val);
197: }
198:
199: scan_buffer (target, pos, cnt, shortage)
200: int *shortage, pos;
201: register int cnt, target;
202: {
203: int lim = ((cnt > 0) ? ZV - 1 : BEGV);
204: int direction = ((cnt > 0) ? 1 : -1);
205: register int lim0;
206: unsigned char *base;
207: register unsigned char *cursor, *limit;
208:
209: if (shortage != 0)
210: *shortage = 0;
211:
212: immediate_quit = 1;
213:
214: if (cnt > 0)
215: while (pos != lim + 1)
216: {
217: lim0 = BufferSafeCeiling (pos);
218: lim0 = min (lim, lim0);
219: limit = &FETCH_CHAR (lim0) + 1;
220: base = (cursor = &FETCH_CHAR (pos));
221: while (1)
222: {
223: while (*cursor != target && ++cursor != limit)
224: ;
225: if (cursor != limit)
226: {
227: if (--cnt == 0)
228: {
229: immediate_quit = 0;
230: return (pos + cursor - base + 1);
231: }
232: else
233: if (++cursor == limit)
234: break;
235: }
236: else
237: break;
238: }
239: pos += cursor - base;
240: }
241: else
242: {
243: pos--; /* first character we scan */
244: while (pos > lim - 1)
245: { /* we WILL scan under pos */
246: lim0 = BufferSafeFloor (pos);
247: lim0 = max (lim, lim0);
248: limit = &FETCH_CHAR (lim0) - 1;
249: base = (cursor = &FETCH_CHAR (pos));
250: cursor++;
251: while (1)
252: {
253: while (--cursor != limit && *cursor != target)
254: ;
255: if (cursor != limit)
256: {
257: if (++cnt == 0)
258: {
259: immediate_quit = 0;
260: return (pos + cursor - base + 1);
261: }
262: }
263: else
264: break;
265: }
266: pos += cursor - base;
267: }
268: }
269: immediate_quit = 0;
270: if (shortage != 0)
271: *shortage = cnt * direction;
272: return (pos + ((direction == 1 ? 0 : 1)));
273: }
274:
275: int
276: find_next_newline (from, cnt)
277: register int from, cnt;
278: {
279: return (scan_buffer ('\n', from, cnt, (int *) 0));
280: }
281:
282: DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
283: "Move point forward, stopping before a char not in CHARS, or at position LIM.\n\
284: CHARS is like the inside of a [...] in a regular expression\n\
285: except that ] is never special and \\ quotes ^, - or \\.\n\
286: Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
287: With arg \"^a-zA-Z\", skips nonletters stopping before first letter.")
288: (string, lim)
289: Lisp_Object string, lim;
290: {
291: skip_chars (1, string, lim);
292: return Qnil;
293: }
294:
295: DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
296: "Move point backward, stopping after a char not in CHARS, or at position LIM.\n\
297: See skip-chars-forward for details.")
298: (string, lim)
299: Lisp_Object string, lim;
300: {
301: skip_chars (0, string, lim);
302: return Qnil;
303: }
304:
305: skip_chars (forwardp, string, lim)
306: int forwardp;
307: Lisp_Object string, lim;
308: {
309: register unsigned char *p, *pend;
310: register unsigned char c;
311: unsigned char fastmap[0400];
312: int negate = 0;
313: register int i;
314:
315: CHECK_STRING (string, 0);
316:
317: if (NULL (lim))
318: XFASTINT (lim) = forwardp ? ZV : BEGV;
319: else
320: CHECK_NUMBER_COERCE_MARKER (lim, 1);
321:
322: /* In any case, don't allow scan outside bounds of buffer. */
323: if (XFASTINT (lim) > ZV)
324: XFASTINT (lim) = ZV;
325: if (XFASTINT (lim) < BEGV)
326: XFASTINT (lim) = BEGV;
327:
328: p = XSTRING (string)->data;
329: pend = p + XSTRING (string)->size;
330: bzero (fastmap, sizeof fastmap);
331:
332: if (p != pend && *p == '^')
333: {
334: negate = 1; p++;
335: }
336:
337: /* Find the characters specified and set their elements of fastmap. */
338:
339: while (p != pend)
340: {
341: c = *p++;
342: if (c == '\\')
343: {
344: if (p == pend) break;
345: c = *p++;
346: }
347: if (p != pend && *p == '-')
348: {
349: p++;
350: if (p == pend) break;
351: while (c <= *p)
352: {
353: fastmap[c] = 1;
354: c++;
355: }
356: p++;
357: }
358: else
359: fastmap[c] = 1;
360: }
361:
362: /* If ^ was the first character, complement the fastmap. */
363:
364: if (negate)
365: for (i = 0; i < sizeof fastmap; i++)
366: fastmap[i] ^= 1;
367:
368: immediate_quit = 1;
369: if (forwardp)
370: {
371: while (point < XINT (lim) && fastmap[FETCH_CHAR (point)])
372: SET_PT (point + 1);
373: }
374: else
375: {
376: while (point > XINT (lim) && fastmap[FETCH_CHAR (point - 1)])
377: SET_PT (point - 1);
378: }
379: immediate_quit = 0;
380: }
381:
382: /* Subroutines of Lisp buffer search functions. */
383:
384: static Lisp_Object
385: search_command (string, bound, noerror, count, direction, RE)
386: Lisp_Object string, bound, noerror, count;
387: int direction;
388: int RE;
389: {
390: register int np;
391: int lim;
392: int n = direction;
393:
394: if (!NULL (count))
395: {
396: CHECK_NUMBER (count, 3);
397: n *= XINT (count);
398: }
399:
400: CHECK_STRING (string, 0);
401: if (NULL (bound))
402: lim = n > 0 ? ZV : BEGV;
403: else
404: {
405: CHECK_NUMBER_COERCE_MARKER (bound, 1);
406: lim = XINT (bound);
407: if (n > 0 ? lim < point : lim > point)
408: error ("Invalid search bound (wrong side of point)");
409: if (lim > ZV)
410: lim = ZV;
411: if (lim < BEGV)
412: lim = BEGV;
413: }
414:
415: np = search_buffer (string, point, lim, n, RE,
416: !NULL (current_buffer->case_fold_search) ? downcase_table : 0);
417: if (np <= 0)
418: {
419: if (NULL (noerror))
420: return signal_failure (string);
421: if (!EQ (noerror, Qt))
422: {
423: if (lim < BEGV || lim > ZV)
424: abort ();
425: SET_PT (lim);
426: }
427: return Qnil;
428: }
429:
430: if (np < BEGV || np > ZV)
431: abort ();
432:
433: SET_PT (np);
434:
435: return Qt;
436: }
437:
438: /* search for the n'th occurrence of `string' in the current buffer,
439: starting at position `from' and stopping at position `lim',
440: treating `pat' as a literal string if `RE' is false or as
441: a regular expression if `RE' is true.
442:
443: If `n' is positive, searching is forward and `lim' must be greater than `from'.
444: If `n' is negative, searching is backward and `lim' must be less than `from'.
445:
446: Returns -x if only `n'-x occurrences found (x > 0),
447: or else the position at the beginning of the `n'th occurrence (if searching backward)
448: or the end (if searching forward). */
449:
450: /* INTERFACE CHANGE ALERT!!!! search_buffer now returns -x if only */
451: /* n-x occurences are found. */
452:
453: search_buffer (string, pos, lim, n, RE, trt)
454: Lisp_Object string;
455: int pos;
456: int lim;
457: int n;
458: int RE;
459: register unsigned char *trt;
460: {
461: int len = XSTRING (string)->size;
462: unsigned char *base_pat = XSTRING (string)->data;
463: register int *BM_tab;
464: int *BM_tab_base;
465: register int direction = ((n > 0) ? 1 : -1);
466: register int dirlen;
467: int infinity, limit, k, stride_for_teases;
468: register unsigned char *pat, *cursor, *p_limit;
469: register int i, j;
470: unsigned char *p1, *p2;
471: int s1, s2;
472:
473:
474: if (!len)
475: return (0);
476:
477: if (RE)
478: compile_pattern (string, &searchbuf, (char *) trt);
479:
480: if (RE /* Here we detect whether the */
481: /* generality of an RE search is */
482: /* really needed. */
483: && *(searchbuf.buffer) == (char) exactn /* first item is "exact match" */
484: && searchbuf.buffer[1] + 2 == searchbuf.used) /*first is ONLY item */
485: {
486: RE = 0; /* can do straight (non RE) search */
487: pat = (base_pat = (unsigned char *) searchbuf.buffer + 2);
488: /* trt already applied */
489: len = searchbuf.used - 2;
490: }
491: else if (!RE)
492: {
493: pat = (unsigned char *) alloca (len);
494:
495: for (i = len; i--;) /* Copy the pattern; apply trt */
496: *pat++ = (((int) trt) ? trt [*base_pat++] : *base_pat++);
497: pat -= len; base_pat = pat;
498: }
499:
500: if (RE)
501: {
502: immediate_quit = 1; /* Quit immediately if user types ^G,
503: because letting this function finish
504: can take too long. */
505: QUIT; /* Do a pending quit right away,
506: to avoid paradoxical behavior */
507: /* Get pointers and sizes of the two strings
508: that make up the visible portion of the buffer. */
509:
510: p1 = BEGV_ADDR;
511: s1 = GPT - BEGV;
512: p2 = GAP_END_ADDR;
513: s2 = ZV - GPT;
514: if (s1 < 0)
515: {
516: p2 = p1;
517: s2 = ZV - BEGV;
518: s1 = 0;
519: }
520: if (s2 < 0)
521: {
522: s1 = ZV - BEGV;
523: s2 = 0;
524: }
525: while (n < 0)
526: {
527: int value = re_search_2 (&searchbuf, p1, s1, p2, s2,
528: pos - BEGV, lim - pos, &search_regs,
529: /* Don't allow match past current point */
530: pos - BEGV);
531: if (value == -2)
532: error ("Overflow in regular expression matching");
533:
534: if (value >= 0)
535: {
536: j = BEGV;
537: for (i = 0; i < RE_NREGS; i++)
538: if (search_regs.start[i] >= 0)
539: {
540: search_regs.start[i] += j;
541: search_regs.end[i] += j;
542: }
543: /* Set pos to the new position. */
544: pos = search_regs.start[0];
545: }
546: else
547: {
548: immediate_quit = 0;
549: return (n);
550: }
551: n++;
552: }
553: while (n > 0)
554: {
555: int value = re_search_2 (&searchbuf, p1, s1, p2, s2,
556: pos - BEGV, lim - pos, &search_regs,
557: lim - BEGV);
558: if (value == -2)
559: error ("Overflow in regular expression matching");
560:
561: if (value >= 0)
562: {
563: j = BEGV;
564: for (i = 0; i < RE_NREGS; i++)
565: if (search_regs.start[i] >= 0)
566: {
567: search_regs.start[i] += j;
568: search_regs.end[i] += j;
569: }
570: pos = search_regs.end[0];
571: }
572: else
573: {
574: immediate_quit = 0;
575: return (0 - n);
576: }
577: n--;
578: }
579: immediate_quit = 0;
580: return (pos);
581: }
582: else /* non-RE case */
583: {
584: #ifdef C_ALLOCA
585: int BM_tab_space[0400];
586: BM_tab = &BM_tab_space[0];
587: #else
588: BM_tab = (int *) alloca (0400 * sizeof (int));
589: #endif
590: /* The general approach is that we are going to maintain that we know */
591: /* the first (closest to the present position, in whatever direction */
592: /* we're searching) character that could possibly be the last */
593: /* (furthest from present position) character of a valid match. We */
594: /* advance the state of our knowledge by looking at that character */
595: /* and seeing whether it indeed matches the last character of the */
596: /* pattern. If it does, we take a closer look. If it does not, we */
597: /* move our pointer (to putative last characters) as far as is */
598: /* logically possible. This amount of movement, which I call a */
599: /* stride, will be the length of the pattern if the actual character */
600: /* appears nowhere in the pattern, otherwise it will be the distance */
601: /* from the last occurrence of that character to the end of the */
602: /* pattern. */
603: /* As a coding trick, an enormous stride is coded into the table for */
604: /* characters that match the last character. This allows use of only */
605: /* a single test, a test for having gone past the end of the */
606: /* permissible match region, to test for both possible matches (when */
607: /* the stride goes past the end immediately) and failure to */
608: /* match (where you get nudged past the end one stride at a time). */
609:
610: /* Here we make a "mickey mouse" BM table. The stride of the search */
611: /* is determined only by the last character of the putative match. */
612: /* If that character does not match, we will stride the proper */
613: /* distance to propose a match that superimposes it on the last */
614: /* instance of a character that matches it (per trt), or misses */
615: /* it entirely if there is none. */
616:
617: dirlen = len * direction;
618: infinity = dirlen - (lim + pos + len + len) * direction;
619: if (direction < 0)
620: pat = (base_pat += len - 1);
621: BM_tab_base = BM_tab;
622: BM_tab += 0400;
623: j = dirlen; /* to get it in a register */
624: /* A character that does not appear in the pattern induces a */
625: /* stride equal to the pattern length. */
626: while (BM_tab_base != BM_tab)
627: {
628: *--BM_tab = j;
629: *--BM_tab = j;
630: *--BM_tab = j;
631: *--BM_tab = j;
632: }
633: i = 0;
634: while (i != infinity)
635: {
636: j = pat[i]; i += direction;
637: if (i == dirlen) i = infinity;
638: if ((int) trt)
639: {
640: k = (j = trt[j]);
641: if (i == infinity)
642: stride_for_teases = BM_tab[j];
643: BM_tab[j] = dirlen - i;
644: /* A translation table is followed by its inverse -- see */
645: /* comment following downcase_table for details */
646:
647: while ((j = trt[0400+j]) != k)
648: BM_tab[j] = dirlen - i;
649: }
650: else
651: {
652: if (i == infinity)
653: stride_for_teases = BM_tab[j];
654: BM_tab[j] = dirlen - i;
655: }
656: /* stride_for_teases tells how much to stride if we get a */
657: /* match on the far character but are subsequently */
658: /* disappointed, by recording what the stride would have been */
659: /* for that character if the last character had been */
660: /* different. */
661: }
662: infinity = dirlen - infinity;
663: pos += dirlen - ((direction > 0) ? direction : 0);
664: /* loop invariant - pos points at where last char (first char if reverse)
665: of pattern would align in a possible match. */
666: while (n != 0)
667: {
668: if ((lim - pos - (direction > 0)) * direction < 0)
669: return (n * (0 - direction));
670: /* First we do the part we can by pointers (maybe nothing) */
671: QUIT;
672: pat = base_pat;
673: limit = pos - dirlen + direction;
674: limit = ((direction > 0)
675: ? BufferSafeCeiling (limit)
676: : BufferSafeFloor (limit));
677: /* LIMIT is now the last (not beyond-last!) value
678: POS can take on without hitting edge of buffer or the gap. */
679: limit = ((direction > 0)
680: ? min (lim - 1, min (limit, pos + 20000))
681: : max (lim, max (limit, pos - 20000)));
682: if ((limit - pos) * direction > 20)
683: {
684: p_limit = &FETCH_CHAR (limit);
685: p2 = (cursor = &FETCH_CHAR (pos));
686: /* In this loop, pos + cursor - p2 is the surrogate for pos */
687: while (1) /* use one cursor setting as long as i can */
688: {
689: if (direction > 0) /* worth duplicating */
690: {
691: /* Use signed comparison if appropriate
692: to make cursor+infinity sure to be > p_limit.
693: Assuming that the buffer lies in a range of addresses
694: that are all "positive" (as ints) or all "negative",
695: either kind of comparison will work as long
696: as we don't step by infinity. So pick the kind
697: that works when we do step by infinity. */
698: if ((int) (p_limit + infinity) > (int) p_limit)
699: while ((int) cursor <= (int) p_limit)
700: cursor += BM_tab[*cursor];
701: else
702: while ((unsigned int) cursor <= (unsigned int) p_limit)
703: cursor += BM_tab[*cursor];
704: }
705: else
706: {
707: if ((int) (p_limit + infinity) < (int) p_limit)
708: while ((int) cursor >= (int) p_limit)
709: cursor += BM_tab[*cursor];
710: else
711: while ((unsigned int) cursor >= (unsigned int) p_limit)
712: cursor += BM_tab[*cursor];
713: }
714: /* If you are here, cursor is beyond the end of the searched region. */
715: /* This can happen if you match on the far character of the pattern, */
716: /* because the "stride" of that character is infinity, a number able */
717: /* to throw you well beyond the end of the search. It can also */
718: /* happen if you fail to match within the permitted region and would */
719: /* otherwise try a character beyond that region */
720: if ((cursor - p_limit) * direction <= len)
721: break; /* a small overrun is genuine */
722: cursor -= infinity; /* large overrun = hit */
723: i = dirlen - direction;
724: if ((int) trt)
725: {
726: while ((i -= direction) + direction != 0)
727: if (pat[i] != trt[*(cursor -= direction)])
728: break;
729: }
730: else
731: {
732: while ((i -= direction) + direction != 0)
733: if (pat[i] != *(cursor -= direction))
734: break;
735: }
736: cursor += dirlen - i - direction; /* fix cursor */
737: if (i + direction == 0)
738: {
739: cursor -= direction;
740: search_regs.start[0]
741: = pos + cursor - p2 + ((direction > 0)
742: ? 1 - len : 0);
743: search_regs.end[0] = len + search_regs.start[0];
744: if ((n -= direction) != 0)
745: cursor += dirlen; /* to resume search */
746: else
747: return ((direction > 0)
748: ? search_regs.end[0] : search_regs.start[0]);
749: }
750: else
751: cursor += stride_for_teases; /* <sigh> we lose - */
752: }
753: pos += cursor - p2;
754: }
755: else
756: /* Now we'll pick up a clump that has to be done the hard */
757: /* way because it covers a discontinuity */
758: {
759: limit = ((direction > 0)
760: ? BufferSafeCeiling (pos - dirlen + 1)
761: : BufferSafeFloor (pos - dirlen - 1));
762: limit = ((direction > 0)
763: ? min (limit + len, lim - 1)
764: : max (limit - len, lim));
765: /* LIMIT is now the last value POS can have
766: and still be valid for a possible match. */
767: while (1)
768: {
769: /* This loop can be coded for space rather than */
770: /* speed because it will usually run only once. */
771: /* (the reach is at most len + 21, and typically */
772: /* does not exceed len) */
773: while ((limit - pos) * direction >= 0)
774: pos += BM_tab[FETCH_CHAR(pos)];
775: /* now run the same tests to distinguish going off the */
776: /* end, a match or a phoney match. */
777: if ((pos - limit) * direction <= len)
778: break; /* ran off the end */
779: /* Found what might be a match.
780: Set POS back to last (first if reverse) char pos. */
781: pos -= infinity;
782: i = dirlen - direction;
783: while ((i -= direction) + direction != 0)
784: {
785: pos -= direction;
786: if (pat[i] != (((int) trt)
787: ? trt[FETCH_CHAR(pos)]
788: : FETCH_CHAR (pos)))
789: break;
790: }
791: /* Above loop has moved POS part or all the way
792: back to the first char pos (last char pos if reverse).
793: Set it once again at the last (first if reverse) char. */
794: pos += dirlen - i- direction;
795: if (i + direction == 0)
796: {
797: pos -= direction;
798: search_regs.start[0]
799: = pos + ((direction > 0) ? 1 - len : 0);
800: search_regs.end[0] = len + search_regs.start[0];
801: if ((n -= direction) != 0)
802: pos += dirlen; /* to resume search */
803: else
804: return ((direction > 0)
805: ? search_regs.end[0] : search_regs.start[0]);
806: }
807: else
808: pos += stride_for_teases;
809: }
810: }
811: /* We have done one clump. Can we continue? */
812: if ((lim - pos) * direction < 0)
813: return ((0 - n) * direction);
814: }
815: return pos;
816: }
817: }
818:
819: /* Given a string of words separated by word delimiters,
820: compute a regexp that matches those exact words
821: separated by arbitrary punctuation. */
822:
823: static Lisp_Object
824: wordify (string)
825: Lisp_Object string;
826: {
827: register unsigned char *p, *o;
828: register int i, len, punct_count = 0, word_count = 0;
829: Lisp_Object val;
830:
831: CHECK_STRING (string, 0);
832: p = XSTRING (string)->data;
833: len = XSTRING (string)->size;
834:
835: for (i = 0; i < len; i++)
836: if (SYNTAX (p[i]) != Sword)
837: {
838: punct_count++;
839: if (i > 0 && SYNTAX (p[i-1]) == Sword) word_count++;
840: }
841: if (SYNTAX (p[len-1]) == Sword) word_count++;
842: if (!word_count) return build_string ("");
843:
844: val = make_string (p, len - punct_count + 5 * (word_count - 1) + 4);
845:
846: o = XSTRING (val)->data;
847: *o++ = '\\';
848: *o++ = 'b';
849:
850: for (i = 0; i < len; i++)
851: if (SYNTAX (p[i]) == Sword)
852: *o++ = p[i];
853: else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
854: {
855: *o++ = '\\';
856: *o++ = 'W';
857: *o++ = '\\';
858: *o++ = 'W';
859: *o++ = '*';
860: }
861:
862: *o++ = '\\';
863: *o++ = 'b';
864:
865: return val;
866: }
867:
868: DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
869: "sSearch backward: ",
870: "Search backward from point for STRING.\n\
871: Set point to the beginning of the occurrence found, and return t.\n\
872: An optional second argument bounds the search; it is a buffer position.\n\
873: The match found must not extend before that position.\n\
874: Optional third argument, if t, means if fail just return nil (no error).\n\
875: If not nil and not t, position at limit of search and return nil.\n\
876: Optional fourth argument is repeat count--search for successive occurrences.\n\
877: See also the functions match-beginning and match-end and replace-match.")
878: (string, bound, noerror, count)
879: Lisp_Object string, bound, noerror, count;
880: {
881: return search_command (string, bound, noerror, count, -1, 0);
882: }
883:
884: DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "sSearch: ",
885: "Search forward from point for STRING.\n\
886: Set point to the end of the occurrence found, and return t.\n\
887: An optional second argument bounds the search; it is a buffer position.\n\
888: The match found must not extend after that position.\n\
889: Optional third argument, if t, means if fail just return nil (no error).\n\
890: If not nil and not t, move to limit of search and return nil.\n\
891: Optional fourth argument is repeat count--search for successive occurrences.\n\
892: See also the functions match-beginning and match-end and replace-match.")
893: (string, bound, noerror, count)
894: Lisp_Object string, bound, noerror, count;
895: {
896: return search_command (string, bound, noerror, count, 1, 0);
897: }
898:
899: DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
900: "sWord search backward: ",
901: "Search backward from point for STRING, ignoring differences in punctuation.\n\
902: Set point to the beginning of the occurrence found, and return t.\n\
903: An optional second argument bounds the search; it is a buffer position.\n\
904: The match found must not extend before that position.\n\
905: Optional third argument, if t, means if fail just return nil (no error).\n\
906: If not nil and not t, move to limit of search and return nil.\n\
907: Optional fourth argument is repeat count--search for successive occurrences.")
908: (string, bound, noerror, count)
909: Lisp_Object string, bound, noerror, count;
910: {
911: return search_command (wordify (string), bound, noerror, count, -1, 1);
912: }
913:
914: DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
915: "sWord search: ",
916: "Search forward from point for STRING, ignoring differences in punctuation.\n\
917: Set point to the end of the occurrence found, and return t.\n\
918: An optional second argument bounds the search; it is a buffer position.\n\
919: The match found must not extend after that position.\n\
920: Optional third argument, if t, means if fail just return nil (no error).\n\
921: If not nil and not t, move to limit of search and return nil.\n\
922: Optional fourth argument is repeat count--search for successive occurrences.")
923: (string, bound, noerror, count)
924: Lisp_Object string, bound, noerror, count;
925: {
926: return search_command (wordify (string), bound, noerror, count, 1, 1);
927: }
928:
929: DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
930: "sRE search backward: ",
931: "Search backward from point for match for regular expression REGEXP.\n\
932: Set point to the beginning of the match, and return t.\n\
933: The match found is the one starting last in the buffer\n\
934: and yet ending before the place the origin of the search.\n\
935: An optional second argument bounds the search; it is a buffer position.\n\
936: The match found must start at or after that position.\n\
937: Optional third argument, if t, means if fail just return nil (no error).\n\
938: If not nil and not t, move to limit of search and return nil.\n\
939: Optional fourth argument is repeat count--search for successive occurrences.\n\
940: See also the functions match-beginning and match-end and replace-match.")
941: (string, bound, noerror, count)
942: Lisp_Object string, bound, noerror, count;
943: {
944: return search_command (string, bound, noerror, count, -1, 1);
945: }
946:
947: DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
948: "sRE search: ",
949: "Search forward from point for regular expression REGEXP.\n\
950: Set point to the end of the occurrence found, and return t.\n\
951: An optional second argument bounds the search; it is a buffer position.\n\
952: The match found must not extend after that position.\n\
953: Optional third argument, if t, means if fail just return nil (no error).\n\
954: If not nil and not t, move to limit of search and return nil.\n\
955: Optional fourth argument is repeat count--search for successive occurrences.\n\
956: See also the functions match-beginning and match-end and replace-match.")
957: (string, bound, noerror, count)
958: Lisp_Object string, bound, noerror, count;
959: {
960: return search_command (string, bound, noerror, count, 1, 1);
961: }
962:
963: DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 3, 0,
964: "Replace text matched by last search with NEWTEXT.\n\
965: If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
966: Otherwise convert to all caps or cap initials, like replaced text.\n\
967: If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
968: Otherwise treat \\ as special:\n\
969: \\& in NEWTEXT means substitute original matched text,\n\
970: \\N means substitute match for \\(...\\) number N,\n\
971: \\\\ means insert one \\.\n\
972: Leaves point at end of replacement text.")
973: (string, fixedcase, literal)
974: Lisp_Object string, fixedcase, literal;
975: {
976: enum { nochange, all_caps, cap_initial } case_action;
977: register int pos, last;
978: int some_multiletter_word;
979: int some_letter = 0;
980: register int c, prevc;
981: int inslen;
982:
983: CHECK_STRING (string, 0);
984:
985: case_action = nochange; /* We tried an initialization */
986: /* but some C compilers blew it */
987: if (search_regs.start[0] < BEGV
988: || search_regs.start[0] > search_regs.end[0]
989: || search_regs.end[0] > ZV)
990: args_out_of_range(make_number (search_regs.start[0]),
991: make_number (search_regs.end[0]));
992:
993: if (NULL (fixedcase))
994: {
995: /* Decide how to casify by examining the matched text. */
996:
997: last = search_regs.end[0];
998: prevc = '\n';
999: case_action = all_caps;
1000:
1001: /* some_multiletter_word is set nonzero if any original word
1002: is more than one letter long. */
1003: some_multiletter_word = 0;
1004:
1005: for (pos = search_regs.start[0]; pos < last; pos++)
1006: {
1007: c = FETCH_CHAR (pos);
1008: if (LOWERCASEP (c))
1009: {
1010: /* Cannot be all caps if any original char is lower case */
1011:
1012: case_action = cap_initial;
1013: if (SYNTAX (prevc) != Sword)
1014: {
1015: /* Cannot even be cap initials
1016: if some original initial is lower case */
1017: case_action = nochange;
1018: break;
1019: }
1020: else
1021: some_multiletter_word = 1;
1022: }
1023: else if (!NOCASEP (c))
1024: {
1025: some_letter = 1;
1026: if (!some_multiletter_word && SYNTAX (prevc) == Sword)
1027: some_multiletter_word = 1;
1028: }
1029:
1030: prevc = c;
1031: }
1032:
1033: /* Do not make new text all caps
1034: if the original text contained only single letter words. */
1035: if (case_action == all_caps && !some_multiletter_word)
1036: case_action = cap_initial;
1037:
1038: if (!some_letter) case_action = nochange;
1039: }
1040:
1041: SET_PT (search_regs.end[0]);
1042: if (!NULL (literal))
1043: Finsert (1, &string);
1044: else
1045: {
1046: struct gcpro gcpro1;
1047: GCPRO1 (string);
1048: for (pos = 0; pos < XSTRING (string)->size; pos++)
1049: {
1050: c = XSTRING (string)->data[pos];
1051: if (c == '\\')
1052: {
1053: c = XSTRING (string)->data[++pos];
1054: if (c == '&')
1055: Finsert_buffer_substring (Fcurrent_buffer (),
1056: make_number (search_regs.start[0]),
1057: make_number (search_regs.end[0]));
1058: else if (c >= '1' && c <= RE_NREGS + '0')
1059: {
1060: if (search_regs.start[c - '0'] >= 1)
1061: Finsert_buffer_substring (Fcurrent_buffer (),
1062: make_number (search_regs.start[c - '0']),
1063: make_number (search_regs.end[c - '0']));
1064: }
1065: else
1066: insert_char (c);
1067: }
1068: else
1069: insert_char (c);
1070: }
1071: UNGCPRO;
1072: }
1073:
1074: inslen = point - (search_regs.end[0]);
1075: del_range (search_regs.start[0], search_regs.end[0]);
1076:
1077: if (case_action == all_caps)
1078: Fupcase_region (make_number (point - inslen), make_number (point));
1079: else if (case_action == cap_initial)
1080: upcase_initials_region (make_number (point - inslen), make_number (point));
1081: return Qnil;
1082: }
1083:
1084: static Lisp_Object
1085: match_limit (num, beginningp)
1086: Lisp_Object num;
1087: int beginningp;
1088: {
1089: register int n;
1090:
1091: CHECK_NUMBER (num, 0);
1092: n = XINT (num);
1093: if (n < 0 || n >= RE_NREGS)
1094: args_out_of_range (num, make_number (RE_NREGS));
1095: if (search_regs.start[n] < 0)
1096: return Qnil;
1097: return (make_number ((beginningp) ? search_regs.start[n]
1098: : search_regs.end[n]));
1099: }
1100:
1101: DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
1102: "Return the character number of start of text matched by last search.\n\
1103: ARG, a number, specifies which parenthesized expression in the last regexp.\n\
1104: Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
1105: Zero means the entire text matched by the whole regexp or whole string.")
1106: (num)
1107: Lisp_Object num;
1108: {
1109: return match_limit (num, 1);
1110: }
1111:
1112: DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
1113: "Return the character number of end of text matched by last search.\n\
1114: ARG, a number, specifies which parenthesized expression in the last regexp.\n\
1115: Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
1116: Zero means the entire text matched by the whole regexp or whole string.")
1117: (num)
1118: Lisp_Object num;
1119: {
1120: return match_limit (num, 0);
1121: }
1122:
1123: DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 0, 0,
1124: "Return list containing all info on what the last search matched.\n\
1125: Element 2N is (match-beginning N); element 2N + 1 is (match-end N).\n\
1126: All the elements are normally markers, or nil if the Nth pair didn't match.\n\
1127: 0 is also possible, when matching was done with `string-match',\n\
1128: if a match began at index 0 in the string.")
1129: ()
1130: {
1131: Lisp_Object data[2 * RE_NREGS];
1132: int i, len;
1133:
1134: len = -1;
1135: for (i = 0; i < RE_NREGS; i++)
1136: {
1137: int start = search_regs.start[i];
1138: if (start >= 0)
1139: {
1140: /* Use an integer if the value is out of range for the
1141: size of the current buffer. */
1142: if (start < BEG || start > Z)
1143: XFASTINT (data[2 * i]) = start;
1144: else
1145: {
1146: data[2 * i] = Fmake_marker ();
1147: Fset_marker (data[2 * i], make_number (start), Qnil);
1148: }
1149:
1150: if (search_regs.end[i] < BEG || search_regs.end[i] > Z)
1151: XFASTINT (data[2 * i + 1]) = search_regs.end[i];
1152: else
1153: {
1154: data[2 * i + 1] = Fmake_marker ();
1155: Fset_marker (data[2 * i + 1],
1156: make_number (search_regs.end[i]), Qnil);
1157: }
1158: len = i;
1159: }
1160: else
1161: data[2 * i] = data [2 * i + 1] = Qnil;
1162: }
1163: return Flist (2 * len + 2, data);
1164: }
1165:
1166:
1167: DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
1168: "Set internal data on last search match from elements of LIST.\n\
1169: LIST should have been created by calling match-data previously.")
1170: (list)
1171: register Lisp_Object list;
1172: {
1173: register int i;
1174: register Lisp_Object marker;
1175:
1176: if (!CONSP (list) && !NULL (list))
1177: list = wrong_type_argument (Qconsp, list, 0);
1178:
1179: for (i = 0; i < RE_NREGS; i++)
1180: {
1181: marker = Fcar (list);
1182: if (NULL (marker))
1183: {
1184: search_regs.start[i] = -1;
1185: list = Fcdr (list);
1186: }
1187: else
1188: {
1189: if (XTYPE (marker) == Lisp_Marker
1190: && XMARKER (marker)->buffer == 0)
1191: XFASTINT (marker) = 0;
1192:
1193: CHECK_NUMBER_COERCE_MARKER (marker, 0);
1194: search_regs.start[i] = XINT (marker);
1195: list = Fcdr (list);
1196:
1197: marker = Fcar (list);
1198: if (XTYPE (marker) == Lisp_Marker
1199: && XMARKER (marker)->buffer == 0)
1200: XFASTINT (marker) = 0;
1201:
1202: CHECK_NUMBER_COERCE_MARKER (marker, 0);
1203: search_regs.end[i] = XINT (marker);
1204: }
1205: list = Fcdr (list);
1206: }
1207:
1208: return Qnil;
1209: }
1210:
1211: /* Quote a string to inactivate reg-expr chars */
1212:
1213: DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
1214: "Return a regexp string which matches exactly STRING and nothing else.")
1215: (str)
1216: Lisp_Object str;
1217: {
1218: register unsigned char *in, *out, *end;
1219: register unsigned char *temp;
1220:
1221: CHECK_STRING (str, 0);
1222:
1223: temp = (unsigned char *) alloca (XSTRING (str)->size * 2);
1224:
1225: /* Now copy the data into the new string, inserting escapes. */
1226:
1227: in = XSTRING (str)->data;
1228: end = in + XSTRING (str)->size;
1229: out = temp;
1230:
1231: for (; in != end; in++)
1232: {
1233: if (*in == '[' || *in == ']'
1234: || *in == '*' || *in == '.' || *in == '\\'
1235: || *in == '?' || *in == '+'
1236: || *in == '^' || *in == '$')
1237: *out++ = '\\';
1238: *out++ = *in;
1239: }
1240:
1241: return make_string (temp, out - temp);
1242: }
1243:
1244: /* This code should be unzapped when there comes to be multiple */
1245: /* translation tables. It has been certified on various cases. */
1246: /*
1247: void
1248: compute_trt_inverse (trt)
1249: register unsigned char *trt;
1250: {
1251: register int i = 0400;
1252: register unsigned char c, q;
1253:
1254: while (i--)
1255: trt[0400+i] = i;
1256: i = 0400;
1257: while (i--)
1258: {
1259: if ((q = trt[i]) != (unsigned char) i)
1260: {
1261: c = trt[q + 0400];
1262: trt[q + 0400] = i;
1263: trt[0400 + i] = c;
1264: }
1265: }
1266: }
1267: */
1268:
1269: syms_of_search ()
1270: {
1271: register int i;
1272:
1273: /* Avoid running out of regexp stack quite so soon. */
1274: re_max_failures = 10000;
1275:
1276: for (i = 0; i < 0400; i++)
1277: {
1278: downcase_table[i] = (i >= 'A' && i <= 'Z') ? i + 040 : i;
1279: /* We do this instead of using compute_trt_inverse to save space. */
1280: /* Does it? */
1281: downcase_table[0400+i]
1282: = ((i >= 'A' && i <= 'Z')
1283: ? i + ('a' - 'A')
1284: : ((i >= 'a' && i <= 'z')
1285: ? i + ('A' - 'a')
1286: : i));
1287: }
1288: /* Use this instead when there come to be multiple translation tables.
1289: compute_trt_inverse (downcase_table); */
1290:
1291: searchbuf.allocated = 100;
1292: searchbuf.buffer = (char *) malloc (searchbuf.allocated);
1293: searchbuf.fastmap = search_fastmap;
1294:
1295: Qsearch_failed = intern ("search-failed");
1296: staticpro (&Qsearch_failed);
1297: Qinvalid_regexp = intern ("invalid-regexp");
1298: staticpro (&Qinvalid_regexp);
1299:
1300: Fput (Qsearch_failed, Qerror_conditions,
1301: Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
1302: Fput (Qsearch_failed, Qerror_message,
1303: build_string ("Search failed"));
1304:
1305: Fput (Qinvalid_regexp, Qerror_conditions,
1306: Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
1307: Fput (Qinvalid_regexp, Qerror_message,
1308: build_string ("Invalid regexp"));
1309:
1310: last_regexp = Qnil;
1311: staticpro (&last_regexp);
1312:
1313: defsubr (&Sstring_match);
1314: defsubr (&Slooking_at);
1315: defsubr (&Sskip_chars_forward);
1316: defsubr (&Sskip_chars_backward);
1317: defsubr (&Ssearch_forward);
1318: defsubr (&Ssearch_backward);
1319: defsubr (&Sword_search_forward);
1320: defsubr (&Sword_search_backward);
1321: defsubr (&Sre_search_forward);
1322: defsubr (&Sre_search_backward);
1323: defsubr (&Sreplace_match);
1324: defsubr (&Smatch_beginning);
1325: defsubr (&Smatch_end);
1326: defsubr (&Smatch_data);
1327: defsubr (&Sstore_match_data);
1328: defsubr (&Sregexp_quote);
1329: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.