|
|
1.1 root 1: /* Copyright (c) 1980 Regents of the University of California */
2: static char *sccsid = "@(#)ex_re.c 6.2 10/23/80";
3: #include "ex.h"
4: #include "ex_re.h"
5:
6: /*
7: * Global, substitute and regular expressions.
8: * Very similar to ed, with some re extensions and
9: * confirmed substitute.
10: */
11: global(k)
12: bool k;
13: {
14: register char *gp;
15: register int c;
16: register line *a1;
17: char globuf[GBSIZE], *Cwas;
18: int lines = lineDOL();
19: int oinglobal = inglobal;
20: char *oglobp = globp;
21:
22: Cwas = Command;
23: /*
24: * States of inglobal:
25: * 0: ordinary - not in a global command.
26: * 1: text coming from some buffer, not tty.
27: * 2: like 1, but the source of the buffer is a global command.
28: * Hence you're only in a global command if inglobal==2. This
29: * strange sounding convention is historically derived from
30: * everybody simulating a global command.
31: */
32: if (inglobal==2)
33: error("Global within global@not allowed");
34: markDOT();
35: setall();
36: nonzero();
37: if (skipend())
38: error("Global needs re|Missing regular expression for global");
39: c = getchar();
40: ignore(compile(c, 1));
41: savere(scanre);
42: gp = globuf;
43: while ((c = getchar()) != '\n') {
44: switch (c) {
45:
46: case EOF:
47: c = '\n';
48: goto brkwh;
49:
50: case '\\':
51: c = getchar();
52: switch (c) {
53:
54: case '\\':
55: ungetchar(c);
56: break;
57:
58: case '\n':
59: break;
60:
61: default:
62: *gp++ = '\\';
63: break;
64: }
65: break;
66: }
67: *gp++ = c;
68: if (gp >= &globuf[GBSIZE - 2])
69: error("Global command too long");
70: }
71: brkwh:
72: ungetchar(c);
73: out:
74: newline();
75: *gp++ = c;
76: *gp++ = 0;
77: saveall();
78: inglobal = 2;
79: for (a1 = one; a1 <= dol; a1++) {
80: *a1 &= ~01;
81: if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
82: *a1 |= 01;
83: }
84: #ifdef notdef
85: /*
86: * This code is commented out for now. The problem is that we don't
87: * fix up the undo area the way we should. Basically, I think what has
88: * to be done is to copy the undo area down (since we shrunk everything)
89: * and move the various pointers into it down too. I will do this later
90: * when I have time. (Mark, 10-20-80)
91: */
92: /*
93: * Special case: g/.../d (avoid n^2 algorithm)
94: */
95: if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') {
96: gdelete();
97: return;
98: }
99: #endif
100: if (inopen)
101: inopen = -1;
102: /*
103: * Now for each marked line, set dot there and do the commands.
104: * Note the n^2 behavior here for lots of lines matching.
105: * This is really needed: in some cases you could delete lines,
106: * causing a marked line to be moved before a1 and missed if
107: * we didn't restart at zero each time.
108: */
109: for (a1 = one; a1 <= dol; a1++) {
110: if (*a1 & 01) {
111: *a1 &= ~01;
112: dot = a1;
113: globp = globuf;
114: commands(1, 1);
115: a1 = zero;
116: }
117: }
118: globp = oglobp;
119: inglobal = oinglobal;
120: endline = 1;
121: Command = Cwas;
122: netchHAD(lines);
123: setlastchar(EOF);
124: if (inopen) {
125: ungetchar(EOF);
126: inopen = 1;
127: }
128: }
129:
130: /*
131: * gdelete: delete inside a global command. Handles the
132: * special case g/r.e./d. All lines to be deleted have
133: * already been marked. Squeeze the remaining lines together.
134: * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/,
135: * and g/r.e./.,/r.e.2/d are not treated specially. There is no
136: * good reason for this except the question: where to you draw the line?
137: */
138: gdelete()
139: {
140: register line *a1, *a2, *a3;
141:
142: a3 = dol;
143: /* find first marked line. can skip all before it */
144: for (a1=zero; (*a1&01)==0; a1++)
145: if (a1>=a3)
146: return;
147: /* copy down unmarked lines, compacting as we go. */
148: for (a2=a1+1; a2<=a3;) {
149: if (*a2&01) {
150: a2++; /* line is marked, skip it */
151: dot = a1; /* dot left after line deletion */
152: } else
153: *a1++ = *a2++; /* unmarked, copy it */
154: }
155: dol = a1-1;
156: if (dot>dol)
157: dot = dol;
158: change();
159: }
160:
161: bool cflag;
162: int scount, slines, stotal;
163:
164: substitute(c)
165: int c;
166: {
167: register line *addr;
168: register int n;
169: int gsubf, hopcount = 0;
170:
171: gsubf = compsub(c);
172: if(FIXUNDO)
173: save12(), undkind = UNDCHANGE;
174: stotal = 0;
175: slines = 0;
176: for (addr = addr1; addr <= addr2; addr++) {
177: scount = 0;
178: if (dosubcon(0, addr) == 0)
179: continue;
180: if (gsubf) {
181: /*
182: * The loop can happen from s/\</&/g
183: * but we don't want to break other, reasonable cases.
184: */
185: while (*loc2) {
186: if (++hopcount > sizeof linebuf)
187: error("substitution loop");
188: if (dosubcon(1, addr) == 0)
189: break;
190: }
191: }
192: if (scount) {
193: stotal += scount;
194: slines++;
195: putmark(addr);
196: n = append(getsub, addr);
197: addr += n;
198: addr2 += n;
199: }
200: }
201: if (stotal == 0 && !inglobal && !cflag)
202: error("Fail|Substitute pattern match failed");
203: snote(stotal, slines);
204: return (stotal);
205: }
206:
207: compsub(ch)
208: {
209: register int seof, c, uselastre;
210: static int gsubf;
211:
212: if (!value(EDCOMPATIBLE))
213: gsubf = cflag = 0;
214: uselastre = 0;
215: switch (ch) {
216:
217: case 's':
218: ignore(skipwh());
219: seof = getchar();
220: if (endcmd(seof) || any(seof, "gcr")) {
221: ungetchar(seof);
222: goto redo;
223: }
224: if (isalpha(seof) || isdigit(seof))
225: error("Substitute needs re|Missing regular expression for substitute");
226: seof = compile(seof, 1);
227: uselastre = 1;
228: comprhs(seof);
229: gsubf = 0;
230: cflag = 0;
231: break;
232:
233: case '~':
234: uselastre = 1;
235: /* fall into ... */
236: case '&':
237: redo:
238: if (re.Expbuf[0] == 0)
239: error("No previous re|No previous regular expression");
240: if (subre.Expbuf[0] == 0)
241: error("No previous substitute re|No previous substitute to repeat");
242: break;
243: }
244: for (;;) {
245: c = getchar();
246: switch (c) {
247:
248: case 'g':
249: gsubf = !gsubf;
250: continue;
251:
252: case 'c':
253: cflag = !cflag;
254: continue;
255:
256: case 'r':
257: uselastre = 1;
258: continue;
259:
260: default:
261: ungetchar(c);
262: setcount();
263: newline();
264: if (uselastre)
265: savere(subre);
266: else
267: resre(subre);
268: return (gsubf);
269: }
270: }
271: }
272:
273: comprhs(seof)
274: int seof;
275: {
276: register char *rp, *orp;
277: register int c;
278: char orhsbuf[RHSSIZE];
279:
280: rp = rhsbuf;
281: CP(orhsbuf, rp);
282: for (;;) {
283: c = getchar();
284: if (c == seof)
285: break;
286: switch (c) {
287:
288: case '\\':
289: c = getchar();
290: if (c == EOF) {
291: ungetchar(c);
292: break;
293: }
294: if (value(MAGIC)) {
295: /*
296: * When "magic", \& turns into a plain &,
297: * and all other chars work fine quoted.
298: */
299: if (c != '&')
300: c |= QUOTE;
301: break;
302: }
303: magic:
304: if (c == '~') {
305: for (orp = orhsbuf; *orp; *rp++ = *orp++)
306: if (rp >= &rhsbuf[RHSSIZE - 1])
307: goto toobig;
308: continue;
309: }
310: c |= QUOTE;
311: break;
312:
313: case '\n':
314: case EOF:
315: if (!(globp && globp[0])) {
316: ungetchar(c);
317: goto endrhs;
318: }
319:
320: case '~':
321: case '&':
322: if (value(MAGIC))
323: goto magic;
324: break;
325: }
326: if (rp >= &rhsbuf[RHSSIZE - 1]) {
327: toobig:
328: *rp = 0;
329: error("Replacement pattern too long@- limit 256 characters");
330: }
331: *rp++ = c;
332: }
333: endrhs:
334: *rp++ = 0;
335: }
336:
337: getsub()
338: {
339: register char *p;
340:
341: if ((p = linebp) == 0)
342: return (EOF);
343: strcLIN(p);
344: linebp = 0;
345: return (0);
346: }
347:
348: dosubcon(f, a)
349: bool f;
350: line *a;
351: {
352:
353: if (execute(f, a) == 0)
354: return (0);
355: if (confirmed(a)) {
356: dosub();
357: scount++;
358: }
359: return (1);
360: }
361:
362: confirmed(a)
363: line *a;
364: {
365: register int c, ch;
366:
367: if (cflag == 0)
368: return (1);
369: pofix();
370: pline(lineno(a));
371: if (inopen)
372: putchar('\n' | QUOTE);
373: c = column(loc1 - 1);
374: ugo(c - 1 + (inopen ? 1 : 0), ' ');
375: ugo(column(loc2 - 1) - c, '^');
376: flush();
377: ch = c = getkey();
378: again:
379: if (c == '\r')
380: c = '\n';
381: if (inopen)
382: putchar(c), flush();
383: if (c != '\n' && c != EOF) {
384: c = getkey();
385: goto again;
386: }
387: noteinp();
388: return (ch == 'y');
389: }
390:
391: getch()
392: {
393: char c;
394:
395: if (read(2, &c, 1) != 1)
396: return (EOF);
397: return (c & TRIM);
398: }
399:
400: ugo(cnt, with)
401: int with;
402: int cnt;
403: {
404:
405: if (cnt > 0)
406: do
407: putchar(with);
408: while (--cnt > 0);
409: }
410:
411: int casecnt;
412: bool destuc;
413:
414: dosub()
415: {
416: register char *lp, *sp, *rp;
417: int c;
418:
419: lp = linebuf;
420: sp = genbuf;
421: rp = rhsbuf;
422: while (lp < loc1)
423: *sp++ = *lp++;
424: casecnt = 0;
425: while (c = *rp++) {
426: if (c & QUOTE)
427: switch (c & TRIM) {
428:
429: case '&':
430: sp = place(sp, loc1, loc2);
431: if (sp == 0)
432: goto ovflo;
433: continue;
434:
435: case 'l':
436: casecnt = 1;
437: destuc = 0;
438: continue;
439:
440: case 'L':
441: casecnt = LBSIZE;
442: destuc = 0;
443: continue;
444:
445: case 'u':
446: casecnt = 1;
447: destuc = 1;
448: continue;
449:
450: case 'U':
451: casecnt = LBSIZE;
452: destuc = 1;
453: continue;
454:
455: case 'E':
456: case 'e':
457: casecnt = 0;
458: continue;
459: }
460: if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') {
461: sp = place(sp, braslist[c - '1'], braelist[c - '1']);
462: if (sp == 0)
463: goto ovflo;
464: continue;
465: }
466: if (casecnt)
467: *sp++ = fixcase(c & TRIM);
468: else
469: *sp++ = c & TRIM;
470: if (sp >= &genbuf[LBSIZE])
471: ovflo:
472: error("Line overflow@in substitute");
473: }
474: lp = loc2;
475: loc2 = sp + (linebuf - genbuf);
476: while (*sp++ = *lp++)
477: if (sp >= &genbuf[LBSIZE])
478: goto ovflo;
479: strcLIN(genbuf);
480: }
481:
482: fixcase(c)
483: register int c;
484: {
485:
486: if (casecnt == 0)
487: return (c);
488: casecnt--;
489: if (destuc) {
490: if (islower(c))
491: c = toupper(c);
492: } else
493: if (isupper(c))
494: c = tolower(c);
495: return (c);
496: }
497:
498: char *
499: place(sp, l1, l2)
500: register char *sp, *l1, *l2;
501: {
502:
503: while (l1 < l2) {
504: *sp++ = fixcase(*l1++);
505: if (sp >= &genbuf[LBSIZE])
506: return (0);
507: }
508: return (sp);
509: }
510:
511: snote(total, lines)
512: register int total, lines;
513: {
514:
515: if (!notable(total))
516: return;
517: printf(mesg("%d subs|%d substitutions"), total);
518: if (lines != 1 && lines != total)
519: printf(" on %d lines", lines);
520: noonl();
521: flush();
522: }
523:
524: compile(eof, oknl)
525: int eof;
526: int oknl;
527: {
528: register int c;
529: register char *ep;
530: char *lastep;
531: char bracket[NBRA], *bracketp, *rhsp;
532: int cclcnt;
533:
534: if (isalpha(eof) || isdigit(eof))
535: error("Regular expressions cannot be delimited by letters or digits");
536: ep = expbuf;
537: c = getchar();
538: if (eof == '\\')
539: switch (c) {
540:
541: case '/':
542: case '?':
543: if (scanre.Expbuf[0] == 0)
544: error("No previous scan re|No previous scanning regular expression");
545: resre(scanre);
546: return (c);
547:
548: case '&':
549: if (subre.Expbuf[0] == 0)
550: error("No previous substitute re|No previous substitute regular expression");
551: resre(subre);
552: return (c);
553:
554: default:
555: error("Badly formed re|Regular expression \\ must be followed by / or ?");
556: }
557: if (c == eof || c == '\n' || c == EOF) {
558: if (*ep == 0)
559: error("No previous re|No previous regular expression");
560: if (c == '\n' && oknl == 0)
561: error("Missing closing delimiter@for regular expression");
562: if (c != eof)
563: ungetchar(c);
564: return (eof);
565: }
566: bracketp = bracket;
567: nbra = 0;
568: circfl = 0;
569: if (c == '^') {
570: c = getchar();
571: circfl++;
572: }
573: ungetchar(c);
574: for (;;) {
575: if (ep >= &expbuf[ESIZE - 2])
576: complex:
577: cerror("Re too complex|Regular expression too complicated");
578: c = getchar();
579: if (c == eof || c == EOF) {
580: if (bracketp != bracket)
581: cerror("Unmatched \\(|More \\('s than \\)'s in regular expression");
582: *ep++ = CEOFC;
583: if (c == EOF)
584: ungetchar(c);
585: return (eof);
586: }
587: if (value(MAGIC)) {
588: if (c != '*' || ep == expbuf)
589: lastep = ep;
590: } else
591: if (c != '\\' || peekchar() != '*' || ep == expbuf)
592: lastep = ep;
593: switch (c) {
594:
595: case '\\':
596: c = getchar();
597: switch (c) {
598:
599: case '(':
600: if (nbra >= NBRA)
601: cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression");
602: *bracketp++ = nbra;
603: *ep++ = CBRA;
604: *ep++ = nbra++;
605: continue;
606:
607: case ')':
608: if (bracketp <= bracket)
609: cerror("Extra \\)|More \\)'s than \\('s in regular expression");
610: *ep++ = CKET;
611: *ep++ = *--bracketp;
612: continue;
613:
614: case '<':
615: *ep++ = CBRC;
616: continue;
617:
618: case '>':
619: *ep++ = CLET;
620: continue;
621: }
622: if (value(MAGIC) == 0)
623: magic:
624: switch (c) {
625:
626: case '.':
627: *ep++ = CDOT;
628: continue;
629:
630: case '~':
631: rhsp = rhsbuf;
632: while (*rhsp) {
633: if (*rhsp & QUOTE) {
634: c = *rhsp & TRIM;
635: if (c == '&')
636: error("Replacement pattern contains &@- cannot use in re");
637: if (c >= '1' && c <= '9')
638: error("Replacement pattern contains \\d@- cannot use in re");
639: }
640: if (ep >= &expbuf[ESIZE-2])
641: goto complex;
642: *ep++ = CCHR;
643: *ep++ = *rhsp++ & TRIM;
644: }
645: continue;
646:
647: case '*':
648: if (ep == expbuf)
649: break;
650: if (*lastep == CBRA || *lastep == CKET)
651: cerror("Illegal *|Can't * a \\( ... \\) in regular expression");
652: if (*lastep == CCHR && (lastep[1] & QUOTE))
653: cerror("Illegal *|Can't * a \\n in regular expression");
654: *lastep |= STAR;
655: continue;
656:
657: case '[':
658: *ep++ = CCL;
659: *ep++ = 0;
660: cclcnt = 1;
661: c = getchar();
662: if (c == '^') {
663: c = getchar();
664: ep[-2] = NCCL;
665: }
666: if (c == ']')
667: cerror("Bad character class|Empty character class '[]' or '[^]' cannot match");
668: while (c != ']') {
669: if (c == '\\' && any(peekchar(), "]-^\\"))
670: c = getchar() | QUOTE;
671: if (c == '\n' || c == EOF)
672: cerror("Missing ]");
673: *ep++ = c;
674: cclcnt++;
675: if (ep >= &expbuf[ESIZE])
676: goto complex;
677: c = getchar();
678: }
679: lastep[1] = cclcnt;
680: continue;
681: }
682: if (c == EOF) {
683: ungetchar(EOF);
684: c = '\\';
685: goto defchar;
686: }
687: *ep++ = CCHR;
688: if (c == '\n')
689: cerror("No newlines in re's|Can't escape newlines into regular expressions");
690: /*
691: if (c < '1' || c > NBRA + '1') {
692: */
693: *ep++ = c;
694: continue;
695: /*
696: }
697: c -= '1';
698: if (c >= nbra)
699: cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s");
700: *ep++ = c | QUOTE;
701: continue;
702: */
703:
704: case '\n':
705: if (oknl) {
706: ungetchar(c);
707: *ep++ = CEOFC;
708: return (eof);
709: }
710: cerror("Badly formed re|Missing closing delimiter for regular expression");
711:
712: case '$':
713: if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') {
714: *ep++ = CDOL;
715: continue;
716: }
717: goto defchar;
718:
719: case '.':
720: case '~':
721: case '*':
722: case '[':
723: if (value(MAGIC))
724: goto magic;
725: defchar:
726: default:
727: *ep++ = CCHR;
728: *ep++ = c;
729: continue;
730: }
731: }
732: }
733:
734: cerror(s)
735: char *s;
736: {
737:
738: expbuf[0] = 0;
739: error(s);
740: }
741:
742: same(a, b)
743: register int a, b;
744: {
745:
746: return (a == b || value(IGNORECASE) &&
747: ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a)));
748: }
749:
750: char *locs;
751:
752: execute(gf, addr)
753: line *addr;
754: {
755: register char *p1, *p2;
756: register int c;
757:
758: if (gf) {
759: if (circfl)
760: return (0);
761: locs = p1 = loc2;
762: } else {
763: if (addr == zero)
764: return (0);
765: p1 = linebuf;
766: getline(*addr);
767: locs = 0;
768: }
769: p2 = expbuf;
770: if (circfl) {
771: loc1 = p1;
772: return (advance(p1, p2));
773: }
774: /* fast check for first character */
775: if (*p2 == CCHR) {
776: c = p2[1];
777: do {
778: if (c != *p1 && (!value(IGNORECASE) ||
779: !((islower(c) && toupper(c) == *p1) ||
780: (islower(*p1) && toupper(*p1) == c))))
781: continue;
782: if (advance(p1, p2)) {
783: loc1 = p1;
784: return (1);
785: }
786: } while (*p1++);
787: return (0);
788: }
789: /* regular algorithm */
790: do {
791: if (advance(p1, p2)) {
792: loc1 = p1;
793: return (1);
794: }
795: } while (*p1++);
796: return (0);
797: }
798:
799: #define uletter(c) (isalpha(c) || c == '_')
800:
801: advance(lp, ep)
802: register char *lp, *ep;
803: {
804: register char *curlp;
805: char *sp, *sp1;
806: int c;
807:
808: for (;;) switch (*ep++) {
809:
810: case CCHR:
811: /* useless
812: if (*ep & QUOTE) {
813: c = *ep++ & TRIM;
814: sp = braslist[c];
815: sp1 = braelist[c];
816: while (sp < sp1) {
817: if (!same(*sp, *lp))
818: return (0);
819: sp++, lp++;
820: }
821: continue;
822: }
823: */
824: if (!same(*ep, *lp))
825: return (0);
826: ep++, lp++;
827: continue;
828:
829: case CDOT:
830: if (*lp++)
831: continue;
832: return (0);
833:
834: case CDOL:
835: if (*lp == 0)
836: continue;
837: return (0);
838:
839: case CEOFC:
840: loc2 = lp;
841: return (1);
842:
843: case CCL:
844: if (cclass(ep, *lp++, 1)) {
845: ep += *ep;
846: continue;
847: }
848: return (0);
849:
850: case NCCL:
851: if (cclass(ep, *lp++, 0)) {
852: ep += *ep;
853: continue;
854: }
855: return (0);
856:
857: case CBRA:
858: braslist[*ep++] = lp;
859: continue;
860:
861: case CKET:
862: braelist[*ep++] = lp;
863: continue;
864:
865: case CDOT|STAR:
866: curlp = lp;
867: while (*lp++)
868: continue;
869: goto star;
870:
871: case CCHR|STAR:
872: curlp = lp;
873: while (same(*lp, *ep))
874: lp++;
875: lp++;
876: ep++;
877: goto star;
878:
879: case CCL|STAR:
880: case NCCL|STAR:
881: curlp = lp;
882: while (cclass(ep, *lp++, ep[-1] == (CCL|STAR)))
883: continue;
884: ep += *ep;
885: goto star;
886: star:
887: do {
888: lp--;
889: if (lp == locs)
890: break;
891: if (advance(lp, ep))
892: return (1);
893: } while (lp > curlp);
894: return (0);
895:
896: case CBRC:
897: if (lp == expbuf)
898: continue;
899: if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1]))
900: continue;
901: return (0);
902:
903: case CLET:
904: if (!uletter(*lp) && !isdigit(*lp))
905: continue;
906: return (0);
907:
908: default:
909: error("Re internal error");
910: }
911: }
912:
913: cclass(set, c, af)
914: register char *set;
915: register int c;
916: int af;
917: {
918: register int n;
919:
920: if (c == 0)
921: return (0);
922: if (value(IGNORECASE) && isupper(c))
923: c = tolower(c);
924: n = *set++;
925: while (--n)
926: if (n > 2 && set[1] == '-') {
927: if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM))
928: return (af);
929: set += 3;
930: n -= 2;
931: } else
932: if ((*set++ & TRIM) == c)
933: return (af);
934: return (!af);
935: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.