|
|
1.1 root 1: /* Copyright (c) 1981 Regents of the University of California */
2: static char *sccsid = "@(#)ex_re.c 7.2 10/16/81";
3: #include "ex.h"
4: #include "ex_re.h"
5:
6: /*
7: * Global, substitute and regular expressions.
8: * Very similar to ed, with some re extensions and
9: * confirmed substitute.
10: */
11: global(k)
12: bool k;
13: {
14: register char *gp;
15: register int c;
16: register line *a1;
17: char globuf[GBSIZE], *Cwas;
18: int lines = lineDOL();
19: int oinglobal = inglobal;
20: char *oglobp = globp;
21:
22: Cwas = Command;
23: /*
24: * States of inglobal:
25: * 0: ordinary - not in a global command.
26: * 1: text coming from some buffer, not tty.
27: * 2: like 1, but the source of the buffer is a global command.
28: * Hence you're only in a global command if inglobal==2. This
29: * strange sounding convention is historically derived from
30: * everybody simulating a global command.
31: */
32: if (inglobal==2)
33: error("Global within global@not allowed");
34: markDOT();
35: setall();
36: nonzero();
37: if (skipend())
38: error("Global needs re|Missing regular expression for global");
39: c = getchar();
40: ignore(compile(c, 1));
41: savere(scanre);
42: gp = globuf;
43: while ((c = getchar()) != '\n') {
44: switch (c) {
45:
46: case EOF:
47: c = '\n';
48: goto brkwh;
49:
50: case '\\':
51: c = getchar();
52: switch (c) {
53:
54: case '\\':
55: ungetchar(c);
56: break;
57:
58: case '\n':
59: break;
60:
61: default:
62: *gp++ = '\\';
63: break;
64: }
65: break;
66: }
67: *gp++ = c;
68: if (gp >= &globuf[GBSIZE - 2])
69: error("Global command too long");
70: }
71: brkwh:
72: ungetchar(c);
73: out:
74: newline();
75: *gp++ = c;
76: *gp++ = 0;
77: saveall();
78: inglobal = 2;
79: for (a1 = one; a1 <= dol; a1++) {
80: *a1 &= ~01;
81: if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
82: *a1 |= 01;
83: }
84: #ifdef notdef
85: /*
86: * This code is commented out for now. The problem is that we don't
87: * fix up the undo area the way we should. Basically, I think what has
88: * to be done is to copy the undo area down (since we shrunk everything)
89: * and move the various pointers into it down too. I will do this later
90: * when I have time. (Mark, 10-20-80)
91: */
92: /*
93: * Special case: g/.../d (avoid n^2 algorithm)
94: */
95: if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') {
96: gdelete();
97: return;
98: }
99: #endif
100: if (inopen)
101: inopen = -1;
102: /*
103: * Now for each marked line, set dot there and do the commands.
104: * Note the n^2 behavior here for lots of lines matching.
105: * This is really needed: in some cases you could delete lines,
106: * causing a marked line to be moved before a1 and missed if
107: * we didn't restart at zero each time.
108: */
109: for (a1 = one; a1 <= dol; a1++) {
110: if (*a1 & 01) {
111: *a1 &= ~01;
112: dot = a1;
113: globp = globuf;
114: commands(1, 1);
115: a1 = zero;
116: }
117: }
118: globp = oglobp;
119: inglobal = oinglobal;
120: endline = 1;
121: Command = Cwas;
122: netchHAD(lines);
123: setlastchar(EOF);
124: if (inopen) {
125: ungetchar(EOF);
126: inopen = 1;
127: }
128: }
129:
130: /*
131: * gdelete: delete inside a global command. Handles the
132: * special case g/r.e./d. All lines to be deleted have
133: * already been marked. Squeeze the remaining lines together.
134: * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/,
135: * and g/r.e./.,/r.e.2/d are not treated specially. There is no
136: * good reason for this except the question: where to you draw the line?
137: */
138: gdelete()
139: {
140: register line *a1, *a2, *a3;
141:
142: a3 = dol;
143: /* find first marked line. can skip all before it */
144: for (a1=zero; (*a1&01)==0; a1++)
145: if (a1>=a3)
146: return;
147: /* copy down unmarked lines, compacting as we go. */
148: for (a2=a1+1; a2<=a3;) {
149: if (*a2&01) {
150: a2++; /* line is marked, skip it */
151: dot = a1; /* dot left after line deletion */
152: } else
153: *a1++ = *a2++; /* unmarked, copy it */
154: }
155: dol = a1-1;
156: if (dot>dol)
157: dot = dol;
158: change();
159: }
160:
161: bool cflag;
162: int scount, slines, stotal;
163:
164: substitute(c)
165: int c;
166: {
167: register line *addr;
168: register int n;
169: int gsubf, hopcount;
170:
171: gsubf = compsub(c);
172: if(FIXUNDO)
173: save12(), undkind = UNDCHANGE;
174: stotal = 0;
175: slines = 0;
176: for (addr = addr1; addr <= addr2; addr++) {
177: scount = hopcount = 0;
178: if (dosubcon(0, addr) == 0)
179: continue;
180: if (gsubf) {
181: /*
182: * The loop can happen from s/\</&/g
183: * but we don't want to break other, reasonable cases.
184: */
185: while (*loc2) {
186: if (++hopcount > sizeof linebuf)
187: error("substitution loop");
188: if (dosubcon(1, addr) == 0)
189: break;
190: }
191: }
192: if (scount) {
193: stotal += scount;
194: slines++;
195: putmark(addr);
196: n = append(getsub, addr);
197: addr += n;
198: addr2 += n;
199: }
200: }
201: if (stotal == 0 && !inglobal && !cflag)
202: error("Fail|Substitute pattern match failed");
203: snote(stotal, slines);
204: return (stotal);
205: }
206:
207: compsub(ch)
208: {
209: register int seof, c, uselastre;
210: static int gsubf;
211:
212: if (!value(EDCOMPATIBLE))
213: gsubf = cflag = 0;
214: uselastre = 0;
215: switch (ch) {
216:
217: case 's':
218: ignore(skipwh());
219: seof = getchar();
220: if (endcmd(seof) || any(seof, "gcr")) {
221: ungetchar(seof);
222: goto redo;
223: }
224: if (isalpha(seof) || isdigit(seof))
225: error("Substitute needs re|Missing regular expression for substitute");
226: seof = compile(seof, 1);
227: uselastre = 1;
228: comprhs(seof);
229: gsubf = 0;
230: cflag = 0;
231: break;
232:
233: case '~':
234: uselastre = 1;
235: /* fall into ... */
236: case '&':
237: redo:
238: if (re.Expbuf[0] == 0)
239: error("No previous re|No previous regular expression");
240: if (subre.Expbuf[0] == 0)
241: error("No previous substitute re|No previous substitute to repeat");
242: break;
243: }
244: for (;;) {
245: c = getchar();
246: switch (c) {
247:
248: case 'g':
249: gsubf = !gsubf;
250: continue;
251:
252: case 'c':
253: cflag = !cflag;
254: continue;
255:
256: case 'r':
257: uselastre = 1;
258: continue;
259:
260: default:
261: ungetchar(c);
262: setcount();
263: newline();
264: if (uselastre)
265: savere(subre);
266: else
267: resre(subre);
268: return (gsubf);
269: }
270: }
271: }
272:
273: comprhs(seof)
274: int seof;
275: {
276: register char *rp, *orp;
277: register int c;
278: char orhsbuf[RHSSIZE];
279:
280: rp = rhsbuf;
281: CP(orhsbuf, rp);
282: for (;;) {
283: c = getchar();
284: if (c == seof)
285: break;
286: switch (c) {
287:
288: case '\\':
289: c = getchar();
290: if (c == EOF) {
291: ungetchar(c);
292: break;
293: }
294: if (value(MAGIC)) {
295: /*
296: * When "magic", \& turns into a plain &,
297: * and all other chars work fine quoted.
298: */
299: if (c != '&')
300: c |= QUOTE;
301: break;
302: }
303: magic:
304: if (c == '~') {
305: for (orp = orhsbuf; *orp; *rp++ = *orp++)
306: if (rp >= &rhsbuf[RHSSIZE - 1])
307: goto toobig;
308: continue;
309: }
310: c |= QUOTE;
311: break;
312:
313: case '\n':
314: case EOF:
315: if (!(globp && globp[0])) {
316: ungetchar(c);
317: goto endrhs;
318: }
319:
320: case '~':
321: case '&':
322: if (value(MAGIC))
323: goto magic;
324: break;
325: }
326: if (rp >= &rhsbuf[RHSSIZE - 1]) {
327: toobig:
328: *rp = 0;
329: error("Replacement pattern too long@- limit 256 characters");
330: }
331: *rp++ = c;
332: }
333: endrhs:
334: *rp++ = 0;
335: }
336:
337: getsub()
338: {
339: register char *p;
340:
341: if ((p = linebp) == 0)
342: return (EOF);
343: strcLIN(p);
344: linebp = 0;
345: return (0);
346: }
347:
348: dosubcon(f, a)
349: bool f;
350: line *a;
351: {
352:
353: if (execute(f, a) == 0)
354: return (0);
355: if (confirmed(a)) {
356: dosub();
357: scount++;
358: }
359: return (1);
360: }
361:
362: confirmed(a)
363: line *a;
364: {
365: register int c, ch;
366:
367: if (cflag == 0)
368: return (1);
369: pofix();
370: pline(lineno(a));
371: if (inopen)
372: putchar('\n' | QUOTE);
373: c = column(loc1 - 1);
374: ugo(c - 1 + (inopen ? 1 : 0), ' ');
375: ugo(column(loc2 - 1) - c, '^');
376: flush();
377: ch = c = getkey();
378: again:
379: if (c == '\r')
380: c = '\n';
381: if (inopen)
382: putchar(c), flush();
383: if (c != '\n' && c != EOF) {
384: c = getkey();
385: goto again;
386: }
387: noteinp();
388: return (ch == 'y');
389: }
390:
391: getch()
392: {
393: char c;
394:
395: if (read(2, &c, 1) != 1)
396: return (EOF);
397: return (c & TRIM);
398: }
399:
400: ugo(cnt, with)
401: int with;
402: int cnt;
403: {
404:
405: if (cnt > 0)
406: do
407: putchar(with);
408: while (--cnt > 0);
409: }
410:
411: int casecnt;
412: bool destuc;
413:
414: dosub()
415: {
416: register char *lp, *sp, *rp;
417: int c;
418:
419: lp = linebuf;
420: sp = genbuf;
421: rp = rhsbuf;
422: while (lp < loc1)
423: *sp++ = *lp++;
424: casecnt = 0;
425: while (c = *rp++) {
426: /* ^V <return> from vi to split lines */
427: if (c == '\r')
428: c = '\n';
429:
430: if (c & QUOTE)
431: switch (c & TRIM) {
432:
433: case '&':
434: sp = place(sp, loc1, loc2);
435: if (sp == 0)
436: goto ovflo;
437: continue;
438:
439: case 'l':
440: casecnt = 1;
441: destuc = 0;
442: continue;
443:
444: case 'L':
445: casecnt = LBSIZE;
446: destuc = 0;
447: continue;
448:
449: case 'u':
450: casecnt = 1;
451: destuc = 1;
452: continue;
453:
454: case 'U':
455: casecnt = LBSIZE;
456: destuc = 1;
457: continue;
458:
459: case 'E':
460: case 'e':
461: casecnt = 0;
462: continue;
463: }
464: if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') {
465: sp = place(sp, braslist[c - '1'], braelist[c - '1']);
466: if (sp == 0)
467: goto ovflo;
468: continue;
469: }
470: if (casecnt)
471: *sp++ = fixcase(c & TRIM);
472: else
473: *sp++ = c & TRIM;
474: if (sp >= &genbuf[LBSIZE])
475: ovflo:
476: error("Line overflow@in substitute");
477: }
478: lp = loc2;
479: loc2 = sp + (linebuf - genbuf);
480: while (*sp++ = *lp++)
481: if (sp >= &genbuf[LBSIZE])
482: goto ovflo;
483: strcLIN(genbuf);
484: }
485:
486: fixcase(c)
487: register int c;
488: {
489:
490: if (casecnt == 0)
491: return (c);
492: casecnt--;
493: if (destuc) {
494: if (islower(c))
495: c = toupper(c);
496: } else
497: if (isupper(c))
498: c = tolower(c);
499: return (c);
500: }
501:
502: char *
503: place(sp, l1, l2)
504: register char *sp, *l1, *l2;
505: {
506:
507: while (l1 < l2) {
508: *sp++ = fixcase(*l1++);
509: if (sp >= &genbuf[LBSIZE])
510: return (0);
511: }
512: return (sp);
513: }
514:
515: snote(total, lines)
516: register int total, lines;
517: {
518:
519: if (!notable(total))
520: return;
521: printf(mesg("%d subs|%d substitutions"), total);
522: if (lines != 1 && lines != total)
523: printf(" on %d lines", lines);
524: noonl();
525: flush();
526: }
527:
528: compile(eof, oknl)
529: int eof;
530: int oknl;
531: {
532: register int c;
533: register char *ep;
534: char *lastep;
535: char bracket[NBRA], *bracketp, *rhsp;
536: int cclcnt;
537:
538: if (isalpha(eof) || isdigit(eof))
539: error("Regular expressions cannot be delimited by letters or digits");
540: ep = expbuf;
541: c = getchar();
542: if (eof == '\\')
543: switch (c) {
544:
545: case '/':
546: case '?':
547: if (scanre.Expbuf[0] == 0)
548: error("No previous scan re|No previous scanning regular expression");
549: resre(scanre);
550: return (c);
551:
552: case '&':
553: if (subre.Expbuf[0] == 0)
554: error("No previous substitute re|No previous substitute regular expression");
555: resre(subre);
556: return (c);
557:
558: default:
559: error("Badly formed re|Regular expression \\ must be followed by / or ?");
560: }
561: if (c == eof || c == '\n' || c == EOF) {
562: if (*ep == 0)
563: error("No previous re|No previous regular expression");
564: if (c == '\n' && oknl == 0)
565: error("Missing closing delimiter@for regular expression");
566: if (c != eof)
567: ungetchar(c);
568: return (eof);
569: }
570: bracketp = bracket;
571: nbra = 0;
572: circfl = 0;
573: if (c == '^') {
574: c = getchar();
575: circfl++;
576: }
577: ungetchar(c);
578: for (;;) {
579: if (ep >= &expbuf[ESIZE - 2])
580: complex:
581: cerror("Re too complex|Regular expression too complicated");
582: c = getchar();
583: if (c == eof || c == EOF) {
584: if (bracketp != bracket)
585: cerror("Unmatched \\(|More \\('s than \\)'s in regular expression");
586: *ep++ = CEOFC;
587: if (c == EOF)
588: ungetchar(c);
589: return (eof);
590: }
591: if (value(MAGIC)) {
592: if (c != '*' || ep == expbuf)
593: lastep = ep;
594: } else
595: if (c != '\\' || peekchar() != '*' || ep == expbuf)
596: lastep = ep;
597: switch (c) {
598:
599: case '\\':
600: c = getchar();
601: switch (c) {
602:
603: case '(':
604: if (nbra >= NBRA)
605: cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression");
606: *bracketp++ = nbra;
607: *ep++ = CBRA;
608: *ep++ = nbra++;
609: continue;
610:
611: case ')':
612: if (bracketp <= bracket)
613: cerror("Extra \\)|More \\)'s than \\('s in regular expression");
614: *ep++ = CKET;
615: *ep++ = *--bracketp;
616: continue;
617:
618: case '<':
619: *ep++ = CBRC;
620: continue;
621:
622: case '>':
623: *ep++ = CLET;
624: continue;
625: }
626: if (value(MAGIC) == 0)
627: magic:
628: switch (c) {
629:
630: case '.':
631: *ep++ = CDOT;
632: continue;
633:
634: case '~':
635: rhsp = rhsbuf;
636: while (*rhsp) {
637: if (*rhsp & QUOTE) {
638: c = *rhsp & TRIM;
639: if (c == '&')
640: error("Replacement pattern contains &@- cannot use in re");
641: if (c >= '1' && c <= '9')
642: error("Replacement pattern contains \\d@- cannot use in re");
643: }
644: if (ep >= &expbuf[ESIZE-2])
645: goto complex;
646: *ep++ = CCHR;
647: *ep++ = *rhsp++ & TRIM;
648: }
649: continue;
650:
651: case '*':
652: if (ep == expbuf)
653: break;
654: if (*lastep == CBRA || *lastep == CKET)
655: cerror("Illegal *|Can't * a \\( ... \\) in regular expression");
656: if (*lastep == CCHR && (lastep[1] & QUOTE))
657: cerror("Illegal *|Can't * a \\n in regular expression");
658: *lastep |= STAR;
659: continue;
660:
661: case '[':
662: *ep++ = CCL;
663: *ep++ = 0;
664: cclcnt = 1;
665: c = getchar();
666: if (c == '^') {
667: c = getchar();
668: ep[-2] = NCCL;
669: }
670: if (c == ']')
671: cerror("Bad character class|Empty character class '[]' or '[^]' cannot match");
672: while (c != ']') {
673: if (c == '\\' && any(peekchar(), "]-^\\"))
674: c = getchar() | QUOTE;
675: if (c == '\n' || c == EOF)
676: cerror("Missing ]");
677: *ep++ = c;
678: cclcnt++;
679: if (ep >= &expbuf[ESIZE])
680: goto complex;
681: c = getchar();
682: }
683: lastep[1] = cclcnt;
684: continue;
685: }
686: if (c == EOF) {
687: ungetchar(EOF);
688: c = '\\';
689: goto defchar;
690: }
691: *ep++ = CCHR;
692: if (c == '\n')
693: cerror("No newlines in re's|Can't escape newlines into regular expressions");
694: /*
695: if (c < '1' || c > NBRA + '1') {
696: */
697: *ep++ = c;
698: continue;
699: /*
700: }
701: c -= '1';
702: if (c >= nbra)
703: cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s");
704: *ep++ = c | QUOTE;
705: continue;
706: */
707:
708: case '\n':
709: if (oknl) {
710: ungetchar(c);
711: *ep++ = CEOFC;
712: return (eof);
713: }
714: cerror("Badly formed re|Missing closing delimiter for regular expression");
715:
716: case '$':
717: if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') {
718: *ep++ = CDOL;
719: continue;
720: }
721: goto defchar;
722:
723: case '.':
724: case '~':
725: case '*':
726: case '[':
727: if (value(MAGIC))
728: goto magic;
729: defchar:
730: default:
731: *ep++ = CCHR;
732: *ep++ = c;
733: continue;
734: }
735: }
736: }
737:
738: cerror(s)
739: char *s;
740: {
741:
742: expbuf[0] = 0;
743: error(s);
744: }
745:
746: same(a, b)
747: register int a, b;
748: {
749:
750: return (a == b || value(IGNORECASE) &&
751: ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a)));
752: }
753:
754: char *locs;
755:
756: execute(gf, addr)
757: line *addr;
758: {
759: register char *p1, *p2;
760: register int c;
761:
762: if (gf) {
763: if (circfl)
764: return (0);
765: locs = p1 = loc2;
766: } else {
767: if (addr == zero)
768: return (0);
769: p1 = linebuf;
770: getline(*addr);
771: locs = 0;
772: }
773: p2 = expbuf;
774: if (circfl) {
775: loc1 = p1;
776: return (advance(p1, p2));
777: }
778: /* fast check for first character */
779: if (*p2 == CCHR) {
780: c = p2[1];
781: do {
782: if (c != *p1 && (!value(IGNORECASE) ||
783: !((islower(c) && toupper(c) == *p1) ||
784: (islower(*p1) && toupper(*p1) == c))))
785: continue;
786: if (advance(p1, p2)) {
787: loc1 = p1;
788: return (1);
789: }
790: } while (*p1++);
791: return (0);
792: }
793: /* regular algorithm */
794: do {
795: if (advance(p1, p2)) {
796: loc1 = p1;
797: return (1);
798: }
799: } while (*p1++);
800: return (0);
801: }
802:
803: #define uletter(c) (isalpha(c) || c == '_')
804:
805: advance(lp, ep)
806: register char *lp, *ep;
807: {
808: register char *curlp;
809: char *sp, *sp1;
810: int c;
811:
812: for (;;) switch (*ep++) {
813:
814: case CCHR:
815: /* useless
816: if (*ep & QUOTE) {
817: c = *ep++ & TRIM;
818: sp = braslist[c];
819: sp1 = braelist[c];
820: while (sp < sp1) {
821: if (!same(*sp, *lp))
822: return (0);
823: sp++, lp++;
824: }
825: continue;
826: }
827: */
828: if (!same(*ep, *lp))
829: return (0);
830: ep++, lp++;
831: continue;
832:
833: case CDOT:
834: if (*lp++)
835: continue;
836: return (0);
837:
838: case CDOL:
839: if (*lp == 0)
840: continue;
841: return (0);
842:
843: case CEOFC:
844: loc2 = lp;
845: return (1);
846:
847: case CCL:
848: if (cclass(ep, *lp++, 1)) {
849: ep += *ep;
850: continue;
851: }
852: return (0);
853:
854: case NCCL:
855: if (cclass(ep, *lp++, 0)) {
856: ep += *ep;
857: continue;
858: }
859: return (0);
860:
861: case CBRA:
862: braslist[*ep++] = lp;
863: continue;
864:
865: case CKET:
866: braelist[*ep++] = lp;
867: continue;
868:
869: case CDOT|STAR:
870: curlp = lp;
871: while (*lp++)
872: continue;
873: goto star;
874:
875: case CCHR|STAR:
876: curlp = lp;
877: while (same(*lp, *ep))
878: lp++;
879: lp++;
880: ep++;
881: goto star;
882:
883: case CCL|STAR:
884: case NCCL|STAR:
885: curlp = lp;
886: while (cclass(ep, *lp++, ep[-1] == (CCL|STAR)))
887: continue;
888: ep += *ep;
889: goto star;
890: star:
891: do {
892: lp--;
893: if (lp == locs)
894: break;
895: if (advance(lp, ep))
896: return (1);
897: } while (lp > curlp);
898: return (0);
899:
900: case CBRC:
901: if (lp == expbuf)
902: continue;
903: if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1]))
904: continue;
905: return (0);
906:
907: case CLET:
908: if (!uletter(*lp) && !isdigit(*lp))
909: continue;
910: return (0);
911:
912: default:
913: error("Re internal error");
914: }
915: }
916:
917: cclass(set, c, af)
918: register char *set;
919: register int c;
920: int af;
921: {
922: register int n;
923:
924: if (c == 0)
925: return (0);
926: if (value(IGNORECASE) && isupper(c))
927: c = tolower(c);
928: n = *set++;
929: while (--n)
930: if (n > 2 && set[1] == '-') {
931: if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM))
932: return (af);
933: set += 3;
934: n -= 2;
935: } else
936: if ((*set++ & TRIM) == c)
937: return (af);
938: return (!af);
939: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.