|
|
1.1 root 1: /*************************************************************************
2: * This program is copyright (C) 1985, 1986 by Jonathan Payne. It is *
3: * provided to you without charge for use only on a licensed Unix *
4: * system. You may copy JOVE provided that this notice is included with *
5: * the copy. You may not sell copies of this program or versions *
6: * modified for use on microcomputer systems, unless the copies are *
7: * included with a Unix system distribution and the source is provided. *
8: *************************************************************************/
9:
10: /* Search package. */
11:
12: #include "jove.h"
13: #include "ctype.h"
14:
15: #define NALTS 10 /* number of alternate search strings */
16:
17: char searchstr[128],
18: compbuf[128], /* global default compbuf */
19: rep_search[128], /* replace search string */
20: rep_str[128], /* contains replacement string */
21: *cur_compb, /* usually points at compbuf */
22: REbuf[LBSIZE], /* points at line we're scanning */
23: *alternates[NALTS];
24:
25: int REdirection;
26:
27: int CaseIgnore = 0,
28: WrapScan = 0,
29: UseRE = 0;
30:
31: #define cind_cmp(a, b) (Upper(a) == Upper(b))
32:
33: private int REpeekc;
34: private char *REptr;
35:
36: private
37: REgetc()
38: {
39: int c;
40:
41: if ((c = REpeekc) != -1)
42: REpeekc = -1;
43: else if (*REptr)
44: c = *REptr++;
45: else
46: c = 0;
47:
48: return c;
49: }
50:
51: #define STAR 01 /* Match any number of last RE. */
52: #define AT_BOL 2 /* ^ */
53: #define AT_EOL 4 /* $ */
54: #define AT_BOW 6 /* \< */
55: #define AT_EOW 8 /* \> */
56: #define OPENP 10 /* \( */
57: #define CLOSEP 12 /* \) */
58: #define CURLYB 14 /* \{ */
59:
60: #define NOSTR 14 /* Codes <= NOSTR can't be *'d. */
61:
62: #define ANYC NOSTR+2 /* . */
63: #define NORMC ANYC+2 /* normal character */
64: #define CINDC NORMC+2 /* case independent character */
65: #define ONE_OF CINDC+2 /* [xxx] */
66: #define NONE_OF ONE_OF+2 /* [^xxx] */
67: #define BACKREF NONE_OF+2 /* \# */
68: #define EOP BACKREF+2 /* end of pattern */
69:
70: #define NPAR 9 /* [1-9] */
71: private int nparens;
72: private char *comp_p,
73: **alt_p,
74: **alt_endp;
75:
76: REcompile(pattern, re, into_buf, alt_bufp)
77: char *pattern,
78: *into_buf,
79: **alt_bufp;
80: {
81: REptr = pattern;
82: REpeekc = -1;
83: comp_p = cur_compb = into_buf;
84: alt_p = alt_bufp;
85: alt_endp = alt_p + NALTS;
86: *alt_p++ = comp_p;
87: nparens = 0;
88: (void) do_comp(re ? OKAY_RE : NORM);
89: *alt_p = 0;
90: }
91:
92: /* Compile the pattern into an internal code. */
93:
94: private
95: do_comp(kind)
96: {
97: char *last_p;
98: int parens[NPAR],
99: *parenp,
100: c,
101: ret_code;
102:
103: parenp = parens;
104: last_p = 0;
105: ret_code = 1;
106:
107: while (c = REgetc()) {
108: if (comp_p > &cur_compb[(sizeof compbuf) - 4])
109: complain("Search string too long.");
110: if (c != '*')
111: last_p = comp_p;
112: if (kind == NORM && index(".[*", c) != 0)
113: goto defchar;
114: switch (c) {
115: case '\\':
116: switch (c = REgetc()) {
117: case 0:
118: complain("Premature end of pattern.");
119:
120: case '{':
121: {
122: char *wcntp; /* Word count. */
123:
124: *comp_p++ = CURLYB;
125: wcntp = comp_p;
126: *comp_p++ = 0;
127: for (;;) {
128: int comp_val;
129: char *comp_len;
130:
131: comp_len = comp_p++;
132: comp_val = do_comp(IN_CB);
133: *comp_len = comp_p - comp_len;
134: (*wcntp)++;
135: if (comp_val == 0)
136: break;
137: }
138: continue;
139: }
140:
141: case '}':
142: if (kind != IN_CB)
143: complain("Unexpected \}.");
144: ret_code = 0;
145: goto outahere;
146:
147: case '(':
148: if (nparens >= NPAR)
149: complain("Too many ('s; max is %d.", NPAR);
150: *comp_p++ = OPENP;
151: *comp_p++ = nparens;
152: *parenp++ = nparens++;
153: continue;
154:
155: case ')':
156: if (parenp == parens)
157: complain("Too many )'s.");
158: *comp_p++ = CLOSEP;
159: *comp_p++ = *--parenp;
160: continue;
161:
162: case '|':
163: if (alt_p >= alt_endp)
164: complain("Too many alternates; max %d.", NALTS);
165: *comp_p++ = EOP;
166: *alt_p++ = comp_p;
167: continue;
168:
169: case '1':
170: case '2':
171: case '3':
172: case '4':
173: case '5':
174: case '6':
175: case '7':
176: case '8':
177: case '9':
178: *comp_p++ = BACKREF;
179: *comp_p++ = c - '1';
180: continue;
181:
182: case '<':
183: *comp_p++ = AT_BOW;
184: continue;
185:
186: case '>':
187: *comp_p++ = AT_EOW;
188: continue;
189:
190: default:
191: goto defchar;
192: }
193:
194: case ',':
195: if (kind != IN_CB)
196: goto defchar;
197: goto outahere;
198:
199: case '.':
200: *comp_p++ = ANYC;
201: continue;
202:
203: case '^':
204: if (comp_p == cur_compb || comp_p[-1] == EOP) {
205: *comp_p++ = AT_BOL;
206: continue;
207: }
208: goto defchar;
209:
210: case '$':
211: if ((REpeekc = REgetc()) != 0 && REpeekc != '\\')
212: goto defchar;
213: *comp_p++ = AT_EOL;
214: continue;
215:
216: case '[':
217: {
218: int chrcnt;
219:
220: *comp_p++ = ONE_OF;
221: bzero(comp_p, 16);
222: if ((REpeekc = REgetc()) == '^') {
223: *last_p = NONE_OF;
224: /* Get it for real this time. */
225: (void) REgetc();
226: }
227: chrcnt = 1;
228: while ((c = REgetc()) != ']' && c != 0) {
229: if (c == '\\')
230: c = REgetc();
231: else if ((REpeekc = REgetc()) == '-') {
232: int c2;
233:
234: (void) REgetc(); /* read '-' */
235: c2 = REgetc();
236: while (c < c2) {
237: comp_p[c/8] |= (1 << (c%8));
238: c++;
239: }
240: }
241: comp_p[c/8] |= (1 << (c%8));
242: chrcnt++;
243: }
244: if (c == 0)
245: complain("Missing ].");
246: if (chrcnt == 1)
247: complain("Empty [].");
248: comp_p += 16;
249: continue;
250: }
251:
252: case '*':
253: if (last_p == 0 || *last_p <= NOSTR)
254: goto defchar;
255: *last_p |= STAR;
256: continue;
257:
258: default:
259: defchar: *comp_p++ = (CaseIgnore) ? CINDC : NORMC;
260: *comp_p++ = c;
261: }
262: }
263: outahere:
264: /* End of pattern, let's do some error checking. */
265: if (parenp != parens)
266: complain("Unmatched ()'s.");
267: if (kind == IN_CB && c == 0) /* End of pattern with \}. */
268: complain("Missing \}.");
269: *comp_p++ = EOP;
270:
271: return ret_code;
272: }
273:
274: private char *pstrtlst[NPAR], /* index into REbuf */
275: *pendlst[NPAR],
276: *REbolp,
277: *locs,
278: *loc1,
279: *loc2;
280:
281: int REbom,
282: REeom, /* beginning and end of match */
283: REalt_num; /* if alternatives, which one matched? */
284:
285: private
286: backref(n, linep)
287: register char *linep;
288: {
289: register char *backsp,
290: *backep;
291:
292: backsp = pstrtlst[n];
293: backep = pendlst[n];
294: while (*backsp++ == *linep++)
295: if (backsp >= backep)
296: return 1;
297: return 0;
298: }
299:
300: private
301: member(comp_p, c, af)
302: register char *comp_p;
303: register int c;
304: {
305: register int n;
306: char *base;
307:
308: if (c == 0)
309: return 0; /* try to match EOL always fails */
310: if (comp_p[c/8] & (1 << (c%8)))
311: return af;
312: return !af;
313: }
314:
315: private
316: REmatch(linep, comp_p)
317: register char *linep,
318: *comp_p;
319: {
320: char *first_p = linep;
321: register int n;
322:
323: for (;;) switch (*comp_p++) {
324: case NORMC:
325: if (*linep++ == *comp_p++)
326: continue;
327: return 0;
328:
329: case CINDC: /* case independent comparison */
330: if (cind_cmp(*linep++, *comp_p++))
331: continue;
332: return 0;
333:
334: case EOP:
335: loc2 = linep;
336: REeom = (loc2 - REbolp);
337: return 1; /* Success! */
338:
339: case AT_BOL:
340: if (linep == REbolp)
341: continue;
342: return 0;
343:
344: case AT_EOL:
345: if (*linep == 0)
346: continue;
347: return 0;
348:
349: case ANYC:
350: if (*linep++ != 0)
351: continue;
352: return 0;
353:
354: case AT_BOW:
355: if (ismword(*linep) && (linep == REbolp || !ismword(linep[-1])))
356: continue;
357: return 0;
358:
359: case AT_EOW:
360: if ((*linep == 0 || !ismword(*linep)) &&
361: (linep != REbolp && ismword(linep[-1])))
362: continue;
363: return 0;
364:
365: case ONE_OF:
366: case NONE_OF:
367: if (member(comp_p, *linep++, comp_p[-1] == ONE_OF)) {
368: comp_p += 16;
369: continue;
370: }
371: return 0;
372:
373: case OPENP:
374: pstrtlst[*comp_p++] = linep;
375: continue;
376:
377: case CLOSEP:
378: pendlst[*comp_p++] = linep;
379: continue;
380:
381: case BACKREF:
382: if (pstrtlst[n = *comp_p++] == 0)
383: complain("\\%d was not specified.", n + 1);
384: if (backref(n, linep)) {
385: linep += pendlst[n] - pstrtlst[n];
386: continue;
387: }
388: return 0;
389:
390: case CURLYB:
391: {
392: int wcnt,
393: any;
394:
395: wcnt = *comp_p++;
396: any = 0;
397:
398: while (--wcnt >= 0) {
399: if (any == 0)
400: any = REmatch(linep, comp_p + 1);
401: comp_p += *comp_p;
402: }
403: if (any == 0)
404: return 0;
405: linep = loc2;
406: continue;
407: }
408:
409: case ANYC | STAR:
410: first_p = linep;
411: while (*linep++)
412: ;
413: goto star;
414:
415: case NORMC | STAR:
416: first_p = linep;
417: while (*comp_p == *linep++)
418: ;
419: comp_p++;
420: goto star;
421:
422: case CINDC | STAR:
423: first_p = linep;
424: while (cind_cmp(*comp_p, *linep++))
425: ;
426: comp_p++;
427: goto star;
428:
429: case ONE_OF | STAR:
430: case NONE_OF | STAR:
431: first_p = linep;
432: while (member(comp_p, *linep++, comp_p[-1] == (ONE_OF | STAR)))
433: ;
434: comp_p += 16;
435: goto star;
436:
437: case BACKREF | STAR:
438: first_p = linep;
439: n = *comp_p++;
440: while (backref(n, linep))
441: linep += pendlst[n] - pstrtlst[n];
442: while (linep >= first_p) {
443: if (REmatch(linep, comp_p))
444: return 1;
445: linep -= pendlst[n] - pstrtlst[n];
446: }
447: continue;
448:
449: star: do {
450: linep--;
451: if (linep < locs)
452: break;
453: if (REmatch(linep, comp_p))
454: return 1;
455: } while (linep > first_p);
456: return 0;
457:
458: default:
459: complain("RE error match (%d).", comp_p[-1]);
460: }
461: /* NOTREACHED. */
462: }
463:
464: private
465: REreset()
466: {
467: register int i;
468:
469: for (i = 0; i < NPAR; i++)
470: pstrtlst[i] = pendlst[i] = 0;
471: }
472:
473: /* Index LINE at OFFSET, the compiled EXPR, with alternates ALTS. If
474: lbuf_okay is nonzero it's okay to use linebuf if LINE is the current
475: line. This should save lots of time in things like paren matching in
476: LISP mode. Saves all that copying from linebuf to REbuf. substitute()
477: is the guy who calls re_lindex with lbuf_okay as 0, since the substitution
478: gets placed in linebuf ... doesn't work too well when the source and
479: destination strings are the same. I hate all these arguments! */
480:
481: re_lindex(line, offset, expr, alts, lbuf_okay)
482: Line *line;
483: char *expr,
484: **alts;
485: {
486: int isquick;
487: register int firstc,
488: c;
489: register char *resp;
490:
491: REreset();
492: if (lbuf_okay) {
493: REbolp = lbptr(line);
494: if (offset == -1)
495: offset = strlen(REbolp); /* arg! */
496: } else {
497: REbolp = ltobuf(line, REbuf);
498: if (offset == -1) { /* Reverse search, find end of line. */
499: extern int Jr_Len;
500:
501: offset = Jr_Len; /* Just Read Len. */
502: }
503: }
504: resp = REbolp;
505: isquick = (expr[0] == NORMC && alternates[1] == 0);
506: if (isquick)
507: firstc = expr[1];
508: locs = REbolp + offset;
509:
510: if (REdirection == FORWARD) {
511: do {
512: char **altp = alts;
513:
514: if (isquick) {
515: while ((c = *locs++) != 0 && c != firstc)
516: ;
517: if (*--locs == 0)
518: break;
519: }
520: REalt_num = 1;
521: while (*altp) {
522: if (REmatch(locs, *altp++)) {
523: loc1 = locs;
524: REbom = loc1 - REbolp;
525: return 1;
526: }
527: REalt_num++;
528: }
529: } while (*locs++);
530: } else {
531: do {
532: char **altp = alts;
533:
534: if (isquick) {
535: while (locs >= REbolp && *locs-- != firstc)
536: ;
537: if (*++locs != firstc)
538: break;
539: }
540: REalt_num = 1;
541: while (*altp) {
542: if (REmatch(locs, *altp++)) {
543: loc1 = locs;
544: REbom = loc1 - REbolp;
545: return 1;
546: }
547: REalt_num++;
548: }
549: } while (--locs >= resp);
550: }
551:
552: return 0;
553: }
554:
555: int okay_wrap = 0; /* Do a wrap search ... not when we're
556: parsing errors ... */
557:
558: Bufpos *
559: dosearch(pattern, dir, re)
560: char *pattern;
561: {
562: Bufpos *pos;
563:
564: if (bobp() && eobp()) /* Can't match! There's no buffer. */
565: return 0;
566:
567: REcompile(pattern, re, compbuf, alternates);
568:
569: okay_wrap++;
570: pos = docompiled(dir, compbuf, alternates);
571: okay_wrap = 0;
572: return pos;
573: }
574:
575: Bufpos *
576: docompiled(dir, expr, alts)
577: char *expr,
578: **alts;
579: {
580: static Bufpos ret;
581: Bufpos push_dot;
582: register Line *lp;
583: register int offset;
584: int we_wrapped = 0;
585:
586: lsave();
587: /* Search now lsave()'s so it doesn't make any assumptions on
588: whether the the contents of curline/curchar are in linebuf.
589: Nowhere does search write all over linebuf. However, we have to
590: be careful about what calls we make here, because many of them
591: assume (and rightly so) that curline is in linebuf. */
592:
593: REdirection = dir;
594: DOTsave(&push_dot);
595: if (dir == BACKWARD) {
596: if (bobp()) {
597: if (okay_wrap && WrapScan) {
598: lp = curline;
599: goto doit;
600: }
601: return 0;
602: }
603: /* here we simulate BackChar() */
604: if (bolp()) {
605: curline = curline->l_prev;
606: curchar = strlen(lbptr(curline));
607: } else
608: --curchar;
609: } else if (dir == FORWARD && (lbptr(curline)[curchar] == '\0') && !lastp(curline)) {
610: curline = curline->l_next;
611: curchar = 0;
612: }
613: lp = curline;
614: offset = curchar;
615:
616: do {
617: if (re_lindex(lp, offset, expr, alts, 1))
618: break;
619: doit: lp = (dir == FORWARD) ? lp->l_next : lp->l_prev;
620: if (lp == 0) {
621: if (okay_wrap && WrapScan) {
622: lp = (dir == FORWARD) ?
623: curbuf->b_first : curbuf->b_last;
624: we_wrapped++;
625: } else
626: break;
627: }
628: if (dir == FORWARD)
629: offset = 0;
630: else
631: offset = -1; /* Signals re_lindex ... */
632:
633: } while (lp != push_dot.p_line);
634:
635: if (lp == push_dot.p_line && we_wrapped)
636: lp = 0;
637: curline = push_dot.p_line;
638: curchar = push_dot.p_char;
639: if (lp == 0)
640: return 0;
641: ret.p_line = lp;
642: ret.p_char = (dir == FORWARD) ? REeom : REbom;
643: return &ret;
644: }
645:
646: private char *
647: insert(off, endp, which)
648: char *off,
649: *endp;
650: {
651: register char *pp;
652: register int n;
653:
654: n = pendlst[which] - pstrtlst[which];
655: pp = pstrtlst[which];
656: while (--n >= 0) {
657: *off++ = *pp++;
658: if (off >= endp)
659: len_error(ERROR);
660: }
661: return off;
662: }
663:
664: /* Perform the substitution. If DELP is nonzero the matched string is
665: deleted, i.e., the substitution string is not inserted. */
666:
667: re_dosub(tobuf, delp)
668: char *tobuf;
669: {
670: register char *tp,
671: *rp,
672: *repp;
673: int c;
674: char *endp;
675:
676: tp = tobuf;
677: endp = tp + LBSIZE;
678: rp = REbuf;
679: repp = rep_str;
680:
681: while (rp < loc1)
682: *tp++ = *rp++;
683:
684: if (!delp) while (c = *repp++) {
685: if (c == '\\' && (c = *repp++) >= '1' && c <= nparens + '1') {
686: tp = insert(tp, endp, c - '1');
687: continue;
688: }
689: *tp++ = c;
690: if (tp >= endp)
691: len_error(ERROR);
692: }
693: rp = loc2;
694: loc2 = REbuf + max(1, tp - tobuf);
695: REeom = loc2 - REbuf;
696: /* At least one character past the match, to prevent an infinite
697: number of replacements in the same position, e.g.,
698: replace "^" with "". */
699: while (*tp++ = *rp++)
700: if (tp >= endp)
701: len_error(ERROR);
702: }
703:
704: putmatch(which, buf, size)
705: char *buf;
706: {
707: *(insert(buf, buf + size, which - 1)) = 0;
708: }
709:
710: setsearch(str)
711: char *str;
712: {
713: strcpy(searchstr, str);
714: }
715:
716: char *
717: getsearch()
718: {
719: return searchstr;
720: }
721:
722: RErecur()
723: {
724: char sbuf[sizeof searchstr],
725: cbuf[sizeof compbuf],
726: repbuf[sizeof rep_str],
727: *altbuf[NALTS];
728: int npars;
729: Mark *m = MakeMark(curline, REbom, FLOATER);
730:
731: message("Type C-X C-C to continue with query replace.");
732:
733: npars = nparens;
734: byte_copy(compbuf, cbuf, sizeof compbuf);
735: byte_copy(searchstr, sbuf, sizeof searchstr);
736: byte_copy(rep_str, repbuf, sizeof rep_str);
737: byte_copy((char *) alternates, (char *) altbuf, sizeof alternates);
738: Recur();
739: nparens = npars;
740: byte_copy(cbuf, compbuf, sizeof compbuf);
741: byte_copy(sbuf, searchstr, sizeof searchstr);
742: byte_copy(repbuf, rep_str, sizeof rep_str);
743: byte_copy((char *) altbuf, (char *) alternates, sizeof alternates);
744: if (!exp_p)
745: ToMark(m);
746: DelMark(m);
747: }
748:
749: ForSearch()
750: {
751: search(FORWARD, UseRE);
752: }
753:
754: RevSearch()
755: {
756: search(BACKWARD, UseRE);
757: }
758:
759: private
760: search(dir, re)
761: {
762: Bufpos *newdot;
763:
764: setsearch(ask(searchstr, ProcFmt));
765: if ((newdot = dosearch(searchstr, dir, re)) == 0) {
766: if (WrapScan)
767: complain("No \"%s\" in buffer.", searchstr);
768: else
769: complain("No \"%s\" found to %s.", searchstr,
770: (dir == FORWARD) ? "bottom" : "top");
771: }
772: PushPntp(newdot->p_line);
773: SetDot(newdot);
774: }
775:
776: /* Do we match PATTERN at OFFSET in BUF? */
777:
778: LookingAt(pattern, buf, offset)
779: char *pattern,
780: *buf;
781: {
782: register char **alt = alternates;
783:
784: REcompile(pattern, 1, compbuf, alternates);
785: REreset();
786: locs = buf + offset;
787: REbolp = buf;
788:
789: while (*alt)
790: if (REmatch(locs, *alt++))
791: return 1;
792: return 0;
793: }
794:
795: look_at(expr)
796: char *expr;
797: {
798: REcompile(expr, 0, compbuf, alternates);
799: REreset();
800: locs = linebuf + curchar;
801: REbolp = linebuf;
802: if (REmatch(locs, alternates[0]))
803: return 1;
804: return 0;
805: }
806:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.