|
|
1.1 root 1: /*
2: * Copyright (c) 1980 Regents of the University of California.
3: * All rights reserved. The Berkeley software License Agreement
4: * specifies the terms and conditions for redistribution.
5: */
6:
7: #ifndef lint
8: static char *sccsid = "@(#)ex_re.c 7.6 (Berkeley) 3/9/87";
9: #endif not lint
10:
11: #include "ex.h"
12: #include "ex_re.h"
13:
14: /*
15: * Global, substitute and regular expressions.
16: * Very similar to ed, with some re extensions and
17: * confirmed substitute.
18: */
19: global(k)
20: bool k;
21: {
22: register char *gp;
23: register int c;
24: register line *a1;
25: char globuf[GBSIZE], *Cwas;
26: int lines = lineDOL();
27: int oinglobal = inglobal;
28: char *oglobp = globp;
29:
30: Cwas = Command;
31: /*
32: * States of inglobal:
33: * 0: ordinary - not in a global command.
34: * 1: text coming from some buffer, not tty.
35: * 2: like 1, but the source of the buffer is a global command.
36: * Hence you're only in a global command if inglobal==2. This
37: * strange sounding convention is historically derived from
38: * everybody simulating a global command.
39: */
40: if (inglobal==2)
41: error("Global within global@not allowed");
42: markDOT();
43: setall();
44: nonzero();
45: if (skipend())
46: error("Global needs re|Missing regular expression for global");
47: c = ex_getchar();
48: ignore(compile(c, 1));
49: savere(scanre);
50: gp = globuf;
51: while ((c = ex_getchar()) != '\n') {
52: switch (c) {
53:
54: case EOF:
55: c = '\n';
56: goto brkwh;
57:
58: case '\\':
59: c = ex_getchar();
60: switch (c) {
61:
62: case '\\':
63: ungetchar(c);
64: break;
65:
66: case '\n':
67: break;
68:
69: default:
70: *gp++ = '\\';
71: break;
72: }
73: break;
74: }
75: *gp++ = c;
76: if (gp >= &globuf[GBSIZE - 2])
77: error("Global command too long");
78: }
79: brkwh:
80: ungetchar(c);
81: newline();
82: *gp++ = c;
83: *gp++ = 0;
84: saveall();
85: inglobal = 2;
86: for (a1 = one; a1 <= dol; a1++) {
87: *a1 &= ~01;
88: if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
89: *a1 |= 01;
90: }
91: #ifdef notdef
92: /*
93: * This code is commented out for now. The problem is that we don't
94: * fix up the undo area the way we should. Basically, I think what has
95: * to be done is to copy the undo area down (since we shrunk everything)
96: * and move the various pointers into it down too. I will do this later
97: * when I have time. (Mark, 10-20-80)
98: */
99: /*
100: * Special case: g/.../d (avoid n^2 algorithm)
101: */
102: if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') {
103: gdelete();
104: return;
105: }
106: #endif
107: if (inopen)
108: inopen = -1;
109: /*
110: * Now for each marked line, set dot there and do the commands.
111: * Note the n^2 behavior here for lots of lines matching.
112: * This is really needed: in some cases you could delete lines,
113: * causing a marked line to be moved before a1 and missed if
114: * we didn't restart at zero each time.
115: */
116: for (a1 = one; a1 <= dol; a1++) {
117: if (*a1 & 01) {
118: *a1 &= ~01;
119: dot = a1;
120: globp = globuf;
121: commands(1, 1);
122: a1 = zero;
123: }
124: }
125: globp = oglobp;
126: inglobal = oinglobal;
127: endline = 1;
128: Command = Cwas;
129: netchHAD(lines);
130: setlastchar(EOF);
131: if (inopen) {
132: ungetchar(EOF);
133: inopen = 1;
134: }
135: }
136:
137: /*
138: * gdelete: delete inside a global command. Handles the
139: * special case g/r.e./d. All lines to be deleted have
140: * already been marked. Squeeze the remaining lines together.
141: * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/,
142: * and g/r.e./.,/r.e.2/d are not treated specially. There is no
143: * good reason for this except the question: where to you draw the line?
144: */
145: gdelete()
146: {
147: register line *a1, *a2, *a3;
148:
149: a3 = dol;
150: /* find first marked line. can skip all before it */
151: for (a1=zero; (*a1&01)==0; a1++)
152: if (a1>=a3)
153: return;
154: /* copy down unmarked lines, compacting as we go. */
155: for (a2=a1+1; a2<=a3;) {
156: if (*a2&01) {
157: a2++; /* line is marked, skip it */
158: dot = a1; /* dot left after line deletion */
159: } else
160: *a1++ = *a2++; /* unmarked, copy it */
161: }
162: dol = a1-1;
163: if (dot>dol)
164: dot = dol;
165: change();
166: }
167:
168: bool cflag;
169: int scount, slines, stotal;
170:
171: substitute(c)
172: int c;
173: {
174: register line *addr;
175: register int n;
176: int gsubf, hopcount;
177:
178: gsubf = compsub(c);
179: if(FIXUNDO)
180: save12(), undkind = UNDCHANGE;
181: stotal = 0;
182: slines = 0;
183: for (addr = addr1; addr <= addr2; addr++) {
184: scount = hopcount = 0;
185: if (dosubcon(0, addr) == 0)
186: continue;
187: if (gsubf) {
188: /*
189: * The loop can happen from s/\</&/g
190: * but we don't want to break other, reasonable cases.
191: */
192: while (*loc2) {
193: if (++hopcount > sizeof linebuf)
194: error("substitution loop");
195: if (dosubcon(1, addr) == 0)
196: break;
197: }
198: }
199: if (scount) {
200: stotal += scount;
201: slines++;
202: putmark(addr);
203: n = append(getsub, addr);
204: addr += n;
205: addr2 += n;
206: }
207: }
208: if (stotal == 0 && !inglobal && !cflag)
209: error("Fail|Substitute pattern match failed");
210: snote(stotal, slines);
211: return (stotal);
212: }
213:
214: compsub(ch)
215: {
216: register int seof, c, uselastre;
217: static int gsubf;
218:
219: if (!value(EDCOMPATIBLE))
220: gsubf = cflag = 0;
221: uselastre = 0;
222: switch (ch) {
223:
224: case 's':
225: ignore(skipwh());
226: seof = ex_getchar();
227: if (endcmd(seof) || any(seof, "gcr")) {
228: ungetchar(seof);
229: goto redo;
230: }
231: if (isalpha(seof) || isdigit(seof))
232: error("Substitute needs re|Missing regular expression for substitute");
233: seof = compile(seof, 1);
234: uselastre = 1;
235: comprhs(seof);
236: gsubf = 0;
237: cflag = 0;
238: break;
239:
240: case '~':
241: uselastre = 1;
242: /* fall into ... */
243: case '&':
244: redo:
245: if (re.Expbuf[0] == 0)
246: error("No previous re|No previous regular expression");
247: if (subre.Expbuf[0] == 0)
248: error("No previous substitute re|No previous substitute to repeat");
249: break;
250: }
251: for (;;) {
252: c = ex_getchar();
253: switch (c) {
254:
255: case 'g':
256: gsubf = !gsubf;
257: continue;
258:
259: case 'c':
260: cflag = !cflag;
261: continue;
262:
263: case 'r':
264: uselastre = 1;
265: continue;
266:
267: default:
268: ungetchar(c);
269: setcount();
270: newline();
271: if (uselastre)
272: savere(subre);
273: else
274: resre(subre);
275: return (gsubf);
276: }
277: }
278: }
279:
280: comprhs(seof)
281: int seof;
282: {
283: register char *rp, *orp;
284: register int c;
285: char orhsbuf[RHSSIZE];
286:
287: rp = rhsbuf;
288: CP(orhsbuf, rp);
289: for (;;) {
290: c = ex_getchar();
291: if (c == seof)
292: break;
293: switch (c) {
294:
295: case '\\':
296: c = ex_getchar();
297: if (c == EOF) {
298: ungetchar(c);
299: break;
300: }
301: if (value(MAGIC)) {
302: /*
303: * When "magic", \& turns into a plain &,
304: * and all other chars work fine quoted.
305: */
306: if (c != '&')
307: c |= QUOTE;
308: break;
309: }
310: magic:
311: if (c == '~') {
312: for (orp = orhsbuf; *orp; *rp++ = *orp++)
313: if (rp >= &rhsbuf[RHSSIZE - 1])
314: goto toobig;
315: continue;
316: }
317: c |= QUOTE;
318: break;
319:
320: case '\n':
321: case EOF:
322: if (!(globp && globp[0])) {
323: ungetchar(c);
324: goto endrhs;
325: }
326:
327: case '~':
328: case '&':
329: if (value(MAGIC))
330: goto magic;
331: break;
332: }
333: if (rp >= &rhsbuf[RHSSIZE - 1]) {
334: toobig:
335: *rp = 0;
336: error("Replacement pattern too long@- limit 256 characters");
337: }
338: *rp++ = c;
339: }
340: endrhs:
341: *rp++ = 0;
342: }
343:
344: getsub()
345: {
346: register char *p;
347:
348: if ((p = linebp) == 0)
349: return (EOF);
350: strcLIN(p);
351: linebp = 0;
352: return (0);
353: }
354:
355: dosubcon(f, a)
356: bool f;
357: line *a;
358: {
359:
360: if (execute(f, a) == 0)
361: return (0);
362: if (confirmed(a)) {
363: dosub();
364: scount++;
365: }
366: return (1);
367: }
368:
369: confirmed(a)
370: line *a;
371: {
372: register int c, ch;
373:
374: if (cflag == 0)
375: return (1);
376: pofix();
377: pline(lineno(a));
378: if (inopen)
379: ex_putchar('\n' | QUOTE);
380: c = column(loc1 - 1);
381: ugo(c - 1 + (inopen ? 1 : 0), ' ');
382: ugo(column(loc2 - 1) - c, '^');
383: flush();
384: ch = c = getkey();
385: again:
386: if (c == '\r')
387: c = '\n';
388: if (inopen)
389: ex_putchar(c), flush();
390: if (c != '\n' && c != EOF) {
391: c = getkey();
392: goto again;
393: }
394: noteinp();
395: return (ch == 'y');
396: }
397:
398: getch()
399: {
400: char c;
401:
402: if (read(2, &c, 1) != 1)
403: return (EOF);
404: return (c & TRIM);
405: }
406:
407: ugo(cnt, with)
408: int with;
409: int cnt;
410: {
411:
412: if (cnt > 0)
413: do
414: ex_putchar(with);
415: while (--cnt > 0);
416: }
417:
418: int casecnt;
419: bool destuc;
420:
421: dosub()
422: {
423: register char *lp, *sp, *rp;
424: int c;
425:
426: lp = linebuf;
427: sp = genbuf;
428: rp = rhsbuf;
429: while (lp < loc1)
430: *sp++ = *lp++;
431: casecnt = 0;
432: while (c = *rp++) {
433: /* ^V <return> from vi to split lines */
434: if (c == '\r')
435: c = '\n';
436:
437: if (c & QUOTE)
438: switch (c & TRIM) {
439:
440: case '&':
441: sp = place(sp, loc1, loc2);
442: if (sp == 0)
443: goto ovflo;
444: continue;
445:
446: case 'l':
447: casecnt = 1;
448: destuc = 0;
449: continue;
450:
451: case 'L':
452: casecnt = LBSIZE;
453: destuc = 0;
454: continue;
455:
456: case 'u':
457: casecnt = 1;
458: destuc = 1;
459: continue;
460:
461: case 'U':
462: casecnt = LBSIZE;
463: destuc = 1;
464: continue;
465:
466: case 'E':
467: case 'e':
468: casecnt = 0;
469: continue;
470: }
471: if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') {
472: sp = place(sp, braslist[c - '1'], braelist[c - '1']);
473: if (sp == 0)
474: goto ovflo;
475: continue;
476: }
477: if (casecnt)
478: *sp++ = fixcase(c & TRIM);
479: else
480: *sp++ = c & TRIM;
481: if (sp >= &genbuf[LBSIZE])
482: ovflo:
483: error("Line overflow@in substitute");
484: }
485: lp = loc2;
486: loc2 = sp + (linebuf - genbuf);
487: while (*sp++ = *lp++)
488: if (sp >= &genbuf[LBSIZE])
489: goto ovflo;
490: strcLIN(genbuf);
491: }
492:
493: fixcase(c)
494: register int c;
495: {
496:
497: if (casecnt == 0)
498: return (c);
499: casecnt--;
500: if (destuc) {
501: if (islower(c))
502: c = toupper(c);
503: } else
504: if (isupper(c))
505: c = tolower(c);
506: return (c);
507: }
508:
509: char *
510: place(sp, l1, l2)
511: register char *sp, *l1, *l2;
512: {
513:
514: while (l1 < l2) {
515: *sp++ = fixcase(*l1++);
516: if (sp >= &genbuf[LBSIZE])
517: return (0);
518: }
519: return (sp);
520: }
521:
522: snote(total, lines)
523: register int total, lines;
524: {
525:
526: if (!notable(total))
527: return;
528: ex_printf(mesg("%d subs|%d substitutions"), total);
529: if (lines != 1 && lines != total)
530: ex_printf(" on %d lines", lines);
531: noonl();
532: flush();
533: }
534:
535: compile(eof, oknl)
536: int eof;
537: int oknl;
538: {
539: register int c;
540: register char *ep;
541: char *lastep;
542: char bracket[NBRA], *bracketp, *rhsp;
543: int cclcnt;
544:
545: if (isalpha(eof) || isdigit(eof))
546: error("Regular expressions cannot be delimited by letters or digits");
547: ep = expbuf;
548: c = ex_getchar();
549: if (eof == '\\')
550: switch (c) {
551:
552: case '/':
553: case '?':
554: if (scanre.Expbuf[0] == 0)
555: error("No previous scan re|No previous scanning regular expression");
556: resre(scanre);
557: return (c);
558:
559: case '&':
560: if (subre.Expbuf[0] == 0)
561: error("No previous substitute re|No previous substitute regular expression");
562: resre(subre);
563: return (c);
564:
565: default:
566: error("Badly formed re|Regular expression \\ must be followed by / or ?");
567: }
568: if (c == eof || c == '\n' || c == EOF) {
569: if (*ep == 0)
570: error("No previous re|No previous regular expression");
571: if (c == '\n' && oknl == 0)
572: error("Missing closing delimiter@for regular expression");
573: if (c != eof)
574: ungetchar(c);
575: return (eof);
576: }
577: bracketp = bracket;
578: nbra = 0;
579: circfl = 0;
580: if (c == '^') {
581: c = ex_getchar();
582: circfl++;
583: }
584: ungetchar(c);
585: for (;;) {
586: if (ep >= &expbuf[ESIZE - 2])
587: complex:
588: cerror("Re too complex|Regular expression too complicated");
589: c = ex_getchar();
590: if (c == eof || c == EOF) {
591: if (bracketp != bracket)
592: cerror("Unmatched \\(|More \\('s than \\)'s in regular expression");
593: *ep++ = CEOFC;
594: if (c == EOF)
595: ungetchar(c);
596: return (eof);
597: }
598: if (value(MAGIC)) {
599: if (c != '*' || ep == expbuf)
600: lastep = ep;
601: } else
602: if (c != '\\' || peekchar() != '*' || ep == expbuf)
603: lastep = ep;
604: switch (c) {
605:
606: case '\\':
607: c = ex_getchar();
608: switch (c) {
609:
610: case '(':
611: if (nbra >= NBRA)
612: cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression");
613: *bracketp++ = nbra;
614: *ep++ = CBRA;
615: *ep++ = nbra++;
616: continue;
617:
618: case ')':
619: if (bracketp <= bracket)
620: cerror("Extra \\)|More \\)'s than \\('s in regular expression");
621: *ep++ = CKET;
622: *ep++ = *--bracketp;
623: continue;
624:
625: case '<':
626: *ep++ = CBRC;
627: continue;
628:
629: case '>':
630: *ep++ = CLET;
631: continue;
632: }
633: if (value(MAGIC) == 0)
634: magic:
635: switch (c) {
636:
637: case '.':
638: *ep++ = CDOT;
639: continue;
640:
641: case '~':
642: rhsp = rhsbuf;
643: while (*rhsp) {
644: if (*rhsp & QUOTE) {
645: c = *rhsp & TRIM;
646: if (c == '&')
647: error("Replacement pattern contains &@- cannot use in re");
648: if (c >= '1' && c <= '9')
649: error("Replacement pattern contains \\d@- cannot use in re");
650: }
651: if (ep >= &expbuf[ESIZE-2])
652: goto complex;
653: *ep++ = CCHR;
654: *ep++ = *rhsp++ & TRIM;
655: }
656: continue;
657:
658: case '*':
659: if (ep == expbuf)
660: break;
661: if (*lastep == CBRA || *lastep == CKET)
662: cerror("Illegal *|Can't * a \\( ... \\) in regular expression");
663: if (*lastep == CCHR && (lastep[1] & QUOTE))
664: cerror("Illegal *|Can't * a \\n in regular expression");
665: *lastep |= STAR;
666: continue;
667:
668: case '[':
669: *ep++ = CCL;
670: *ep++ = 0;
671: cclcnt = 1;
672: c = ex_getchar();
673: if (c == '^') {
674: c = ex_getchar();
675: ep[-2] = NCCL;
676: }
677: if (c == ']')
678: cerror("Bad character class|Empty character class '[]' or '[^]' cannot match");
679: while (c != ']') {
680: if (c == '\\' && any(peekchar(), "]-^\\"))
681: c = ex_getchar() | QUOTE;
682: if (c == '\n' || c == EOF)
683: cerror("Missing ]");
684: *ep++ = c;
685: cclcnt++;
686: if (ep >= &expbuf[ESIZE])
687: goto complex;
688: c = ex_getchar();
689: }
690: lastep[1] = cclcnt;
691: continue;
692: }
693: if (c == EOF) {
694: ungetchar(EOF);
695: c = '\\';
696: goto defchar;
697: }
698: *ep++ = CCHR;
699: if (c == '\n')
700: cerror("No newlines in re's|Can't escape newlines into regular expressions");
701: /*
702: if (c < '1' || c > NBRA + '1') {
703: */
704: *ep++ = c;
705: continue;
706: /*
707: }
708: c -= '1';
709: if (c >= nbra)
710: cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s");
711: *ep++ = c | QUOTE;
712: continue;
713: */
714:
715: case '\n':
716: if (oknl) {
717: ungetchar(c);
718: *ep++ = CEOFC;
719: return (eof);
720: }
721: cerror("Badly formed re|Missing closing delimiter for regular expression");
722:
723: case '$':
724: if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') {
725: *ep++ = CDOL;
726: continue;
727: }
728: goto defchar;
729:
730: case '.':
731: case '~':
732: case '*':
733: case '[':
734: if (value(MAGIC))
735: goto magic;
736: defchar:
737: default:
738: *ep++ = CCHR;
739: *ep++ = c;
740: continue;
741: }
742: }
743: }
744:
745: cerror(s)
746: char *s;
747: {
748:
749: expbuf[0] = 0;
750: error(s);
751: }
752:
753: same(a, b)
754: register int a, b;
755: {
756:
757: return (a == b || value(IGNORECASE) &&
758: ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a)));
759: }
760:
761: char *locs;
762:
763: /* VARARGS1 */
764: execute(gf, addr)
765: line *addr;
766: {
767: register char *p1, *p2;
768: register int c;
769:
770: if (gf) {
771: if (circfl)
772: return (0);
773: locs = p1 = loc2;
774: } else {
775: if (addr == zero)
776: return (0);
777: p1 = linebuf;
778: getline(*addr);
779: locs = 0;
780: }
781: p2 = expbuf;
782: if (circfl) {
783: loc1 = p1;
784: return (advance(p1, p2));
785: }
786: /* fast check for first character */
787: if (*p2 == CCHR) {
788: c = p2[1];
789: do {
790: if (c != *p1 && (!value(IGNORECASE) ||
791: !((islower(c) && toupper(c) == *p1) ||
792: (islower(*p1) && toupper(*p1) == c))))
793: continue;
794: if (advance(p1, p2)) {
795: loc1 = p1;
796: return (1);
797: }
798: } while (*p1++);
799: return (0);
800: }
801: /* regular algorithm */
802: do {
803: if (advance(p1, p2)) {
804: loc1 = p1;
805: return (1);
806: }
807: } while (*p1++);
808: return (0);
809: }
810:
811: #define uletter(c) (isalpha(c) || c == '_')
812:
813: advance(lp, ep)
814: register char *lp, *ep;
815: {
816: register char *curlp;
817:
818: for (;;) switch (*ep++) {
819:
820: case CCHR:
821: /* useless
822: if (*ep & QUOTE) {
823: c = *ep++ & TRIM;
824: sp = braslist[c];
825: sp1 = braelist[c];
826: while (sp < sp1) {
827: if (!same(*sp, *lp))
828: return (0);
829: sp++, lp++;
830: }
831: continue;
832: }
833: */
834: if (!same(*ep, *lp))
835: return (0);
836: ep++, lp++;
837: continue;
838:
839: case CDOT:
840: if (*lp++)
841: continue;
842: return (0);
843:
844: case CDOL:
845: if (*lp == 0)
846: continue;
847: return (0);
848:
849: case CEOFC:
850: loc2 = lp;
851: return (1);
852:
853: case CCL:
854: if (cclass(ep, *lp++, 1)) {
855: ep += *ep;
856: continue;
857: }
858: return (0);
859:
860: case NCCL:
861: if (cclass(ep, *lp++, 0)) {
862: ep += *ep;
863: continue;
864: }
865: return (0);
866:
867: case CBRA:
868: braslist[*ep++] = lp;
869: continue;
870:
871: case CKET:
872: braelist[*ep++] = lp;
873: continue;
874:
875: case CDOT|STAR:
876: curlp = lp;
877: while (*lp++)
878: continue;
879: goto star;
880:
881: case CCHR|STAR:
882: curlp = lp;
883: while (same(*lp, *ep))
884: lp++;
885: lp++;
886: ep++;
887: goto star;
888:
889: case CCL|STAR:
890: case NCCL|STAR:
891: curlp = lp;
892: while (cclass(ep, *lp++, ep[-1] == (CCL|STAR)))
893: continue;
894: ep += *ep;
895: goto star;
896: star:
897: do {
898: lp--;
899: if (lp == locs)
900: break;
901: if (advance(lp, ep))
902: return (1);
903: } while (lp > curlp);
904: return (0);
905:
906: case CBRC:
907: if (lp == linebuf)
908: continue;
909: if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1]))
910: continue;
911: return (0);
912:
913: case CLET:
914: if (!uletter(*lp) && !isdigit(*lp))
915: continue;
916: return (0);
917:
918: default:
919: error("Re internal error");
920: }
921: }
922:
923: cclass(set, c, af)
924: register char *set;
925: register int c;
926: int af;
927: {
928: register int n;
929:
930: if (c == 0)
931: return (0);
932: if (value(IGNORECASE) && isupper(c))
933: c = tolower(c);
934: n = *set++;
935: while (--n)
936: if (n > 2 && set[1] == '-') {
937: if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM))
938: return (af);
939: set += 3;
940: n -= 2;
941: } else
942: if ((*set++ & TRIM) == c)
943: return (af);
944: return (!af);
945: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.