|
|
1.1 root 1: /*
2: * Copyright (c) 1980 Regents of the University of California.
3: * All rights reserved. The Berkeley software License Agreement
4: * specifies the terms and conditions for redistribution.
5: */
6:
7: #ifndef lint
8: static char *sccsid = "@(#)ex_re.c 7.5 (Berkeley) 6/7/85";
9: #endif not lint
10:
11: #include "ex.h"
12: #include "ex_re.h"
13:
14: /*
15: * Global, substitute and regular expressions.
16: * Very similar to ed, with some re extensions and
17: * confirmed substitute.
18: */
19: global(k)
20: bool k;
21: {
22: register char *gp;
23: register int c;
24: register line *a1;
25: char globuf[GBSIZE], *Cwas;
26: int lines = lineDOL();
27: int oinglobal = inglobal;
28: char *oglobp = globp;
29:
30: Cwas = Command;
31: /*
32: * States of inglobal:
33: * 0: ordinary - not in a global command.
34: * 1: text coming from some buffer, not tty.
35: * 2: like 1, but the source of the buffer is a global command.
36: * Hence you're only in a global command if inglobal==2. This
37: * strange sounding convention is historically derived from
38: * everybody simulating a global command.
39: */
40: if (inglobal==2)
41: error("Global within global@not allowed");
42: markDOT();
43: setall();
44: nonzero();
45: if (skipend())
46: error("Global needs re|Missing regular expression for global");
47: c = getchar();
48: ignore(compile(c, 1));
49: savere(scanre);
50: gp = globuf;
51: while ((c = getchar()) != '\n') {
52: switch (c) {
53:
54: case EOF:
55: c = '\n';
56: goto brkwh;
57:
58: case '\\':
59: c = getchar();
60: switch (c) {
61:
62: case '\\':
63: ungetchar(c);
64: break;
65:
66: case '\n':
67: break;
68:
69: default:
70: *gp++ = '\\';
71: break;
72: }
73: break;
74: }
75: *gp++ = c;
76: if (gp >= &globuf[GBSIZE - 2])
77: error("Global command too long");
78: }
79: brkwh:
80: ungetchar(c);
81: out:
82: newline();
83: *gp++ = c;
84: *gp++ = 0;
85: saveall();
86: inglobal = 2;
87: for (a1 = one; a1 <= dol; a1++) {
88: *a1 &= ~01;
89: if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
90: *a1 |= 01;
91: }
92: #ifdef notdef
93: /*
94: * This code is commented out for now. The problem is that we don't
95: * fix up the undo area the way we should. Basically, I think what has
96: * to be done is to copy the undo area down (since we shrunk everything)
97: * and move the various pointers into it down too. I will do this later
98: * when I have time. (Mark, 10-20-80)
99: */
100: /*
101: * Special case: g/.../d (avoid n^2 algorithm)
102: */
103: if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') {
104: gdelete();
105: return;
106: }
107: #endif
108: if (inopen)
109: inopen = -1;
110: /*
111: * Now for each marked line, set dot there and do the commands.
112: * Note the n^2 behavior here for lots of lines matching.
113: * This is really needed: in some cases you could delete lines,
114: * causing a marked line to be moved before a1 and missed if
115: * we didn't restart at zero each time.
116: */
117: for (a1 = one; a1 <= dol; a1++) {
118: if (*a1 & 01) {
119: *a1 &= ~01;
120: dot = a1;
121: globp = globuf;
122: commands(1, 1);
123: a1 = zero;
124: }
125: }
126: globp = oglobp;
127: inglobal = oinglobal;
128: endline = 1;
129: Command = Cwas;
130: netchHAD(lines);
131: setlastchar(EOF);
132: if (inopen) {
133: ungetchar(EOF);
134: inopen = 1;
135: }
136: }
137:
138: /*
139: * gdelete: delete inside a global command. Handles the
140: * special case g/r.e./d. All lines to be deleted have
141: * already been marked. Squeeze the remaining lines together.
142: * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/,
143: * and g/r.e./.,/r.e.2/d are not treated specially. There is no
144: * good reason for this except the question: where to you draw the line?
145: */
146: gdelete()
147: {
148: register line *a1, *a2, *a3;
149:
150: a3 = dol;
151: /* find first marked line. can skip all before it */
152: for (a1=zero; (*a1&01)==0; a1++)
153: if (a1>=a3)
154: return;
155: /* copy down unmarked lines, compacting as we go. */
156: for (a2=a1+1; a2<=a3;) {
157: if (*a2&01) {
158: a2++; /* line is marked, skip it */
159: dot = a1; /* dot left after line deletion */
160: } else
161: *a1++ = *a2++; /* unmarked, copy it */
162: }
163: dol = a1-1;
164: if (dot>dol)
165: dot = dol;
166: change();
167: }
168:
169: bool cflag;
170: int scount, slines, stotal;
171:
172: substitute(c)
173: int c;
174: {
175: register line *addr;
176: register int n;
177: int gsubf, hopcount;
178:
179: gsubf = compsub(c);
180: if(FIXUNDO)
181: save12(), undkind = UNDCHANGE;
182: stotal = 0;
183: slines = 0;
184: for (addr = addr1; addr <= addr2; addr++) {
185: scount = hopcount = 0;
186: if (dosubcon(0, addr) == 0)
187: continue;
188: if (gsubf) {
189: /*
190: * The loop can happen from s/\</&/g
191: * but we don't want to break other, reasonable cases.
192: */
193: while (*loc2) {
194: if (++hopcount > sizeof linebuf)
195: error("substitution loop");
196: if (dosubcon(1, addr) == 0)
197: break;
198: }
199: }
200: if (scount) {
201: stotal += scount;
202: slines++;
203: putmark(addr);
204: n = append(getsub, addr);
205: addr += n;
206: addr2 += n;
207: }
208: }
209: if (stotal == 0 && !inglobal && !cflag)
210: error("Fail|Substitute pattern match failed");
211: snote(stotal, slines);
212: return (stotal);
213: }
214:
215: compsub(ch)
216: {
217: register int seof, c, uselastre;
218: static int gsubf;
219:
220: if (!value(EDCOMPATIBLE))
221: gsubf = cflag = 0;
222: uselastre = 0;
223: switch (ch) {
224:
225: case 's':
226: ignore(skipwh());
227: seof = getchar();
228: if (endcmd(seof) || any(seof, "gcr")) {
229: ungetchar(seof);
230: goto redo;
231: }
232: if (isalpha(seof) || isdigit(seof))
233: error("Substitute needs re|Missing regular expression for substitute");
234: seof = compile(seof, 1);
235: uselastre = 1;
236: comprhs(seof);
237: gsubf = 0;
238: cflag = 0;
239: break;
240:
241: case '~':
242: uselastre = 1;
243: /* fall into ... */
244: case '&':
245: redo:
246: if (re.Expbuf[0] == 0)
247: error("No previous re|No previous regular expression");
248: if (subre.Expbuf[0] == 0)
249: error("No previous substitute re|No previous substitute to repeat");
250: break;
251: }
252: for (;;) {
253: c = getchar();
254: switch (c) {
255:
256: case 'g':
257: gsubf = !gsubf;
258: continue;
259:
260: case 'c':
261: cflag = !cflag;
262: continue;
263:
264: case 'r':
265: uselastre = 1;
266: continue;
267:
268: default:
269: ungetchar(c);
270: setcount();
271: newline();
272: if (uselastre)
273: savere(subre);
274: else
275: resre(subre);
276: return (gsubf);
277: }
278: }
279: }
280:
281: comprhs(seof)
282: int seof;
283: {
284: register char *rp, *orp;
285: register int c;
286: char orhsbuf[RHSSIZE];
287:
288: rp = rhsbuf;
289: CP(orhsbuf, rp);
290: for (;;) {
291: c = getchar();
292: if (c == seof)
293: break;
294: switch (c) {
295:
296: case '\\':
297: c = getchar();
298: if (c == EOF) {
299: ungetchar(c);
300: break;
301: }
302: if (value(MAGIC)) {
303: /*
304: * When "magic", \& turns into a plain &,
305: * and all other chars work fine quoted.
306: */
307: if (c != '&')
308: c |= QUOTE;
309: break;
310: }
311: magic:
312: if (c == '~') {
313: for (orp = orhsbuf; *orp; *rp++ = *orp++)
314: if (rp >= &rhsbuf[RHSSIZE - 1])
315: goto toobig;
316: continue;
317: }
318: c |= QUOTE;
319: break;
320:
321: case '\n':
322: case EOF:
323: if (!(globp && globp[0])) {
324: ungetchar(c);
325: goto endrhs;
326: }
327:
328: case '~':
329: case '&':
330: if (value(MAGIC))
331: goto magic;
332: break;
333: }
334: if (rp >= &rhsbuf[RHSSIZE - 1]) {
335: toobig:
336: *rp = 0;
337: error("Replacement pattern too long@- limit 256 characters");
338: }
339: *rp++ = c;
340: }
341: endrhs:
342: *rp++ = 0;
343: }
344:
345: getsub()
346: {
347: register char *p;
348:
349: if ((p = linebp) == 0)
350: return (EOF);
351: strcLIN(p);
352: linebp = 0;
353: return (0);
354: }
355:
356: dosubcon(f, a)
357: bool f;
358: line *a;
359: {
360:
361: if (execute(f, a) == 0)
362: return (0);
363: if (confirmed(a)) {
364: dosub();
365: scount++;
366: }
367: return (1);
368: }
369:
370: confirmed(a)
371: line *a;
372: {
373: register int c, ch;
374:
375: if (cflag == 0)
376: return (1);
377: pofix();
378: pline(lineno(a));
379: if (inopen)
380: putchar('\n' | QUOTE);
381: c = column(loc1 - 1);
382: ugo(c - 1 + (inopen ? 1 : 0), ' ');
383: ugo(column(loc2 - 1) - c, '^');
384: flush();
385: ch = c = getkey();
386: again:
387: if (c == '\r')
388: c = '\n';
389: if (inopen)
390: putchar(c), flush();
391: if (c != '\n' && c != EOF) {
392: c = getkey();
393: goto again;
394: }
395: noteinp();
396: return (ch == 'y');
397: }
398:
399: getch()
400: {
401: char c;
402:
403: if (read(2, &c, 1) != 1)
404: return (EOF);
405: return (c & TRIM);
406: }
407:
408: ugo(cnt, with)
409: int with;
410: int cnt;
411: {
412:
413: if (cnt > 0)
414: do
415: putchar(with);
416: while (--cnt > 0);
417: }
418:
419: int casecnt;
420: bool destuc;
421:
422: dosub()
423: {
424: register char *lp, *sp, *rp;
425: int c;
426:
427: lp = linebuf;
428: sp = genbuf;
429: rp = rhsbuf;
430: while (lp < loc1)
431: *sp++ = *lp++;
432: casecnt = 0;
433: while (c = *rp++) {
434: /* ^V <return> from vi to split lines */
435: if (c == '\r')
436: c = '\n';
437:
438: if (c & QUOTE)
439: switch (c & TRIM) {
440:
441: case '&':
442: sp = place(sp, loc1, loc2);
443: if (sp == 0)
444: goto ovflo;
445: continue;
446:
447: case 'l':
448: casecnt = 1;
449: destuc = 0;
450: continue;
451:
452: case 'L':
453: casecnt = LBSIZE;
454: destuc = 0;
455: continue;
456:
457: case 'u':
458: casecnt = 1;
459: destuc = 1;
460: continue;
461:
462: case 'U':
463: casecnt = LBSIZE;
464: destuc = 1;
465: continue;
466:
467: case 'E':
468: case 'e':
469: casecnt = 0;
470: continue;
471: }
472: if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') {
473: sp = place(sp, braslist[c - '1'], braelist[c - '1']);
474: if (sp == 0)
475: goto ovflo;
476: continue;
477: }
478: if (casecnt)
479: *sp++ = fixcase(c & TRIM);
480: else
481: *sp++ = c & TRIM;
482: if (sp >= &genbuf[LBSIZE])
483: ovflo:
484: error("Line overflow@in substitute");
485: }
486: lp = loc2;
487: loc2 = sp + (linebuf - genbuf);
488: while (*sp++ = *lp++)
489: if (sp >= &genbuf[LBSIZE])
490: goto ovflo;
491: strcLIN(genbuf);
492: }
493:
494: fixcase(c)
495: register int c;
496: {
497:
498: if (casecnt == 0)
499: return (c);
500: casecnt--;
501: if (destuc) {
502: if (islower(c))
503: c = toupper(c);
504: } else
505: if (isupper(c))
506: c = tolower(c);
507: return (c);
508: }
509:
510: char *
511: place(sp, l1, l2)
512: register char *sp, *l1, *l2;
513: {
514:
515: while (l1 < l2) {
516: *sp++ = fixcase(*l1++);
517: if (sp >= &genbuf[LBSIZE])
518: return (0);
519: }
520: return (sp);
521: }
522:
523: snote(total, lines)
524: register int total, lines;
525: {
526:
527: if (!notable(total))
528: return;
529: printf(mesg("%d subs|%d substitutions"), total);
530: if (lines != 1 && lines != total)
531: printf(" on %d lines", lines);
532: noonl();
533: flush();
534: }
535:
536: compile(eof, oknl)
537: int eof;
538: int oknl;
539: {
540: register int c;
541: register char *ep;
542: char *lastep;
543: char bracket[NBRA], *bracketp, *rhsp;
544: int cclcnt;
545:
546: if (isalpha(eof) || isdigit(eof))
547: error("Regular expressions cannot be delimited by letters or digits");
548: ep = expbuf;
549: c = getchar();
550: if (eof == '\\')
551: switch (c) {
552:
553: case '/':
554: case '?':
555: if (scanre.Expbuf[0] == 0)
556: error("No previous scan re|No previous scanning regular expression");
557: resre(scanre);
558: return (c);
559:
560: case '&':
561: if (subre.Expbuf[0] == 0)
562: error("No previous substitute re|No previous substitute regular expression");
563: resre(subre);
564: return (c);
565:
566: default:
567: error("Badly formed re|Regular expression \\ must be followed by / or ?");
568: }
569: if (c == eof || c == '\n' || c == EOF) {
570: if (*ep == 0)
571: error("No previous re|No previous regular expression");
572: if (c == '\n' && oknl == 0)
573: error("Missing closing delimiter@for regular expression");
574: if (c != eof)
575: ungetchar(c);
576: return (eof);
577: }
578: bracketp = bracket;
579: nbra = 0;
580: circfl = 0;
581: if (c == '^') {
582: c = getchar();
583: circfl++;
584: }
585: ungetchar(c);
586: for (;;) {
587: if (ep >= &expbuf[ESIZE - 2])
588: complex:
589: cerror("Re too complex|Regular expression too complicated");
590: c = getchar();
591: if (c == eof || c == EOF) {
592: if (bracketp != bracket)
593: cerror("Unmatched \\(|More \\('s than \\)'s in regular expression");
594: *ep++ = CEOFC;
595: if (c == EOF)
596: ungetchar(c);
597: return (eof);
598: }
599: if (value(MAGIC)) {
600: if (c != '*' || ep == expbuf)
601: lastep = ep;
602: } else
603: if (c != '\\' || peekchar() != '*' || ep == expbuf)
604: lastep = ep;
605: switch (c) {
606:
607: case '\\':
608: c = getchar();
609: switch (c) {
610:
611: case '(':
612: if (nbra >= NBRA)
613: cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression");
614: *bracketp++ = nbra;
615: *ep++ = CBRA;
616: *ep++ = nbra++;
617: continue;
618:
619: case ')':
620: if (bracketp <= bracket)
621: cerror("Extra \\)|More \\)'s than \\('s in regular expression");
622: *ep++ = CKET;
623: *ep++ = *--bracketp;
624: continue;
625:
626: case '<':
627: *ep++ = CBRC;
628: continue;
629:
630: case '>':
631: *ep++ = CLET;
632: continue;
633: }
634: if (value(MAGIC) == 0)
635: magic:
636: switch (c) {
637:
638: case '.':
639: *ep++ = CDOT;
640: continue;
641:
642: case '~':
643: rhsp = rhsbuf;
644: while (*rhsp) {
645: if (*rhsp & QUOTE) {
646: c = *rhsp & TRIM;
647: if (c == '&')
648: error("Replacement pattern contains &@- cannot use in re");
649: if (c >= '1' && c <= '9')
650: error("Replacement pattern contains \\d@- cannot use in re");
651: }
652: if (ep >= &expbuf[ESIZE-2])
653: goto complex;
654: *ep++ = CCHR;
655: *ep++ = *rhsp++ & TRIM;
656: }
657: continue;
658:
659: case '*':
660: if (ep == expbuf)
661: break;
662: if (*lastep == CBRA || *lastep == CKET)
663: cerror("Illegal *|Can't * a \\( ... \\) in regular expression");
664: if (*lastep == CCHR && (lastep[1] & QUOTE))
665: cerror("Illegal *|Can't * a \\n in regular expression");
666: *lastep |= STAR;
667: continue;
668:
669: case '[':
670: *ep++ = CCL;
671: *ep++ = 0;
672: cclcnt = 1;
673: c = getchar();
674: if (c == '^') {
675: c = getchar();
676: ep[-2] = NCCL;
677: }
678: if (c == ']')
679: cerror("Bad character class|Empty character class '[]' or '[^]' cannot match");
680: while (c != ']') {
681: if (c == '\\' && any(peekchar(), "]-^\\"))
682: c = getchar() | QUOTE;
683: if (c == '\n' || c == EOF)
684: cerror("Missing ]");
685: *ep++ = c;
686: cclcnt++;
687: if (ep >= &expbuf[ESIZE])
688: goto complex;
689: c = getchar();
690: }
691: lastep[1] = cclcnt;
692: continue;
693: }
694: if (c == EOF) {
695: ungetchar(EOF);
696: c = '\\';
697: goto defchar;
698: }
699: *ep++ = CCHR;
700: if (c == '\n')
701: cerror("No newlines in re's|Can't escape newlines into regular expressions");
702: /*
703: if (c < '1' || c > NBRA + '1') {
704: */
705: *ep++ = c;
706: continue;
707: /*
708: }
709: c -= '1';
710: if (c >= nbra)
711: cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s");
712: *ep++ = c | QUOTE;
713: continue;
714: */
715:
716: case '\n':
717: if (oknl) {
718: ungetchar(c);
719: *ep++ = CEOFC;
720: return (eof);
721: }
722: cerror("Badly formed re|Missing closing delimiter for regular expression");
723:
724: case '$':
725: if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') {
726: *ep++ = CDOL;
727: continue;
728: }
729: goto defchar;
730:
731: case '.':
732: case '~':
733: case '*':
734: case '[':
735: if (value(MAGIC))
736: goto magic;
737: defchar:
738: default:
739: *ep++ = CCHR;
740: *ep++ = c;
741: continue;
742: }
743: }
744: }
745:
746: cerror(s)
747: char *s;
748: {
749:
750: expbuf[0] = 0;
751: error(s);
752: }
753:
754: same(a, b)
755: register int a, b;
756: {
757:
758: return (a == b || value(IGNORECASE) &&
759: ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a)));
760: }
761:
762: char *locs;
763:
764: execute(gf, addr)
765: line *addr;
766: {
767: register char *p1, *p2;
768: register int c;
769:
770: if (gf) {
771: if (circfl)
772: return (0);
773: locs = p1 = loc2;
774: } else {
775: if (addr == zero)
776: return (0);
777: p1 = linebuf;
778: getline(*addr);
779: locs = 0;
780: }
781: p2 = expbuf;
782: if (circfl) {
783: loc1 = p1;
784: return (advance(p1, p2));
785: }
786: /* fast check for first character */
787: if (*p2 == CCHR) {
788: c = p2[1];
789: do {
790: if (c != *p1 && (!value(IGNORECASE) ||
791: !((islower(c) && toupper(c) == *p1) ||
792: (islower(*p1) && toupper(*p1) == c))))
793: continue;
794: if (advance(p1, p2)) {
795: loc1 = p1;
796: return (1);
797: }
798: } while (*p1++);
799: return (0);
800: }
801: /* regular algorithm */
802: do {
803: if (advance(p1, p2)) {
804: loc1 = p1;
805: return (1);
806: }
807: } while (*p1++);
808: return (0);
809: }
810:
811: #define uletter(c) (isalpha(c) || c == '_')
812:
813: advance(lp, ep)
814: register char *lp, *ep;
815: {
816: register char *curlp;
817: char *sp, *sp1;
818: int c;
819:
820: for (;;) switch (*ep++) {
821:
822: case CCHR:
823: /* useless
824: if (*ep & QUOTE) {
825: c = *ep++ & TRIM;
826: sp = braslist[c];
827: sp1 = braelist[c];
828: while (sp < sp1) {
829: if (!same(*sp, *lp))
830: return (0);
831: sp++, lp++;
832: }
833: continue;
834: }
835: */
836: if (!same(*ep, *lp))
837: return (0);
838: ep++, lp++;
839: continue;
840:
841: case CDOT:
842: if (*lp++)
843: continue;
844: return (0);
845:
846: case CDOL:
847: if (*lp == 0)
848: continue;
849: return (0);
850:
851: case CEOFC:
852: loc2 = lp;
853: return (1);
854:
855: case CCL:
856: if (cclass(ep, *lp++, 1)) {
857: ep += *ep;
858: continue;
859: }
860: return (0);
861:
862: case NCCL:
863: if (cclass(ep, *lp++, 0)) {
864: ep += *ep;
865: continue;
866: }
867: return (0);
868:
869: case CBRA:
870: braslist[*ep++] = lp;
871: continue;
872:
873: case CKET:
874: braelist[*ep++] = lp;
875: continue;
876:
877: case CDOT|STAR:
878: curlp = lp;
879: while (*lp++)
880: continue;
881: goto star;
882:
883: case CCHR|STAR:
884: curlp = lp;
885: while (same(*lp, *ep))
886: lp++;
887: lp++;
888: ep++;
889: goto star;
890:
891: case CCL|STAR:
892: case NCCL|STAR:
893: curlp = lp;
894: while (cclass(ep, *lp++, ep[-1] == (CCL|STAR)))
895: continue;
896: ep += *ep;
897: goto star;
898: star:
899: do {
900: lp--;
901: if (lp == locs)
902: break;
903: if (advance(lp, ep))
904: return (1);
905: } while (lp > curlp);
906: return (0);
907:
908: case CBRC:
909: if (lp == linebuf)
910: continue;
911: if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1]))
912: continue;
913: return (0);
914:
915: case CLET:
916: if (!uletter(*lp) && !isdigit(*lp))
917: continue;
918: return (0);
919:
920: default:
921: error("Re internal error");
922: }
923: }
924:
925: cclass(set, c, af)
926: register char *set;
927: register int c;
928: int af;
929: {
930: register int n;
931:
932: if (c == 0)
933: return (0);
934: if (value(IGNORECASE) && isupper(c))
935: c = tolower(c);
936: n = *set++;
937: while (--n)
938: if (n > 2 && set[1] == '-') {
939: if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM))
940: return (af);
941: set += 3;
942: n -= 2;
943: } else
944: if ((*set++ & TRIM) == c)
945: return (af);
946: return (!af);
947: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.