|
|
1.1 root 1: /* Copyright (c) 1979 Regents of the University of California */
2: #include "ex.h"
3: #include "ex_re.h"
4:
5: /*
6: * Global, substitute and regular expressions.
7: * Very similar to ed, with some re extensions and
8: * confirmed substitute.
9: */
10: global(k)
11: bool k;
12: {
13: register char *gp;
14: register int c;
15: register line *a1;
16: char globuf[GBSIZE], *Cwas;
17: int lines = lineDOL();
18: int oinglobal = inglobal;
19: char *oglobp = globp;
20:
21: Cwas = Command;
22: /*
23: * States of inglobal:
24: * 0: ordinary - not in a global command.
25: * 1: text coming from some buffer, not tty.
26: * 2: like 1, but the source of the buffer is a global command.
27: * Hence you're only in a global command if inglobal==2. This
28: * strange sounding convention is historically derived from
29: * everybody simulating a global command.
30: */
31: if (inglobal==2)
32: error("Global within global@not allowed");
33: markDOT();
34: setall();
35: nonzero();
36: if (skipend())
37: error("Global needs re|Missing regular expression for global");
38: c = getchar();
39: ignore(compile(c, 1));
40: savere(scanre);
41: gp = globuf;
42: while ((c = getchar()) != '\n') {
43: switch (c) {
44:
45: case EOF:
46: c = '\n';
47: goto brkwh;
48:
49: case '\\':
50: c = getchar();
51: switch (c) {
52:
53: case '\\':
54: ungetchar(c);
55: break;
56:
57: case '\n':
58: break;
59:
60: default:
61: *gp++ = '\\';
62: break;
63: }
64: break;
65: }
66: *gp++ = c;
67: if (gp >= &globuf[GBSIZE - 2])
68: error("Global command too long");
69: }
70: brkwh:
71: ungetchar(c);
72: out:
73: newline();
74: *gp++ = c;
75: *gp++ = 0;
76: inglobal = 2;
77: for (a1 = one; a1 <= dol; a1++) {
78: *a1 &= ~01;
79: if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
80: *a1 |= 01;
81: }
82: /* should use gdelete from ed to avoid n**2 here on g/.../d */
83: saveall();
84: if (inopen)
85: inopen = -1;
86: for (a1 = one; a1 <= dol; a1++) {
87: if (*a1 & 01) {
88: *a1 &= ~01;
89: dot = a1;
90: globp = globuf;
91: commands(1, 1);
92: a1 = zero;
93: }
94: }
95: globp = oglobp;
96: inglobal = oinglobal;
97: endline = 1;
98: Command = Cwas;
99: netchHAD(lines);
100: setlastchar(EOF);
101: if (inopen) {
102: ungetchar(EOF);
103: inopen = 1;
104: }
105: }
106:
107: bool xflag;
108: int scount, slines, stotal;
109:
110: substitute(c)
111: int c;
112: {
113: register line *addr;
114: register int n;
115: int gsubf;
116:
117: gsubf = compsub(c);
118: if (!inglobal)
119: save12(), undkind = UNDCHANGE;
120: stotal = 0;
121: slines = 0;
122: for (addr = addr1; addr <= addr2; addr++) {
123: scount = 0;
124: if (dosubcon(0, addr) == 0)
125: continue;
126: if (gsubf) {
127: #ifdef notdef
128: /*
129: * should check but loc2 is already munged.
130: * This needs a fancier check later.
131: */
132: if (loc1 == loc2)
133: error("substitution loop");
134: #endif
135: while (*loc2)
136: if (dosubcon(1, addr) == 0)
137: break;
138: }
139: if (scount) {
140: stotal += scount;
141: slines++;
142: putmark(addr);
143: n = append(getsub, addr);
144: addr += n;
145: addr2 += n;
146: }
147: }
148: if (stotal == 0 && !inglobal && !xflag)
149: error("Fail|Substitute pattern match failed");
150: snote(stotal, slines);
151: return (stotal);
152: }
153:
154: compsub(ch)
155: {
156: register int seof, c, uselastre;
157: static int gsubf;
158:
159: if (!value(EDCOMPATIBLE))
160: gsubf = xflag = 0;
161: uselastre = 0;
162: switch (ch) {
163:
164: case 's':
165: ignore(skipwh());
166: seof = getchar();
167: if (endcmd(seof) || any(seof, "gcr")) {
168: ungetchar(seof);
169: goto redo;
170: }
171: if (isalpha(seof) || isdigit(seof))
172: error("Substitute needs re|Missing regular expression for substitute");
173: seof = compile(seof, 1);
174: uselastre = 1;
175: comprhs(seof);
176: gsubf = 0;
177: xflag = 0;
178: break;
179:
180: case '~':
181: uselastre = 1;
182: /* fall into ... */
183: case '&':
184: redo:
185: if (re.Expbuf[0] == 0)
186: error("No previous re|No previous regular expression");
187: break;
188: }
189: for (;;) {
190: c = getchar();
191: switch (c) {
192:
193: case 'g':
194: gsubf = !gsubf;
195: continue;
196:
197: case 'c':
198: xflag = !xflag;
199: continue;
200:
201: case 'r':
202: uselastre = 1;
203: continue;
204:
205: default:
206: ungetchar(c);
207: setcount();
208: newline();
209: if (uselastre)
210: savere(subre);
211: else
212: resre(subre);
213: return (gsubf);
214: }
215: }
216: }
217:
218: comprhs(seof)
219: int seof;
220: {
221: register char *rp, *orp;
222: register int c;
223: char orhsbuf[LBSIZE / 2];
224:
225: rp = rhsbuf;
226: CP(orhsbuf, rp);
227: for (;;) {
228: c = getchar();
229: if (c == seof)
230: break;
231: switch (c) {
232:
233: case '\\':
234: c = getchar();
235: if (c == EOF) {
236: ungetchar(c);
237: break;
238: }
239: if (value(MAGIC)) {
240: /*
241: * When "magic", \& turns into a plain &,
242: * and all other chars work fine quoted.
243: */
244: if (c != '&')
245: c |= QUOTE;
246: break;
247: }
248: magic:
249: if (c == '~') {
250: for (orp = orhsbuf; *orp; *rp++ = *orp++)
251: if (rp >= &rhsbuf[LBSIZE / 2 + 1])
252: goto toobig;
253: continue;
254: }
255: c |= QUOTE;
256: break;
257:
258: case '\n':
259: case EOF:
260: ungetchar(c);
261: goto endrhs;
262:
263: case '~':
264: case '&':
265: if (value(MAGIC))
266: goto magic;
267: break;
268: }
269: if (rp >= &rhsbuf[LBSIZE / 2 - 1])
270: toobig:
271: error("Replacement pattern too long@- limit 256 characters");
272: *rp++ = c;
273: }
274: endrhs:
275: *rp++ = 0;
276: }
277:
278: getsub()
279: {
280: register char *p;
281:
282: if ((p = linebp) == 0)
283: return (EOF);
284: strcLIN(p);
285: linebp = 0;
286: return (0);
287: }
288:
289: dosubcon(f, a)
290: bool f;
291: line *a;
292: {
293:
294: if (execute(f, a) == 0)
295: return (0);
296: if (confirmed(a)) {
297: dosub();
298: scount++;
299: }
300: return (1);
301: }
302:
303: confirmed(a)
304: line *a;
305: {
306: register int c, ch;
307:
308: if (xflag == 0)
309: return (1);
310: pofix();
311: pline(lineno(a));
312: if (inopen)
313: putchar('\n' | QUOTE);
314: c = column(loc1 - 1);
315: ugo(c - 1 + (inopen ? 1 : 0), ' ');
316: ugo(column(loc2 - 1) - c, '^');
317: flush();
318: ch = c = getkey();
319: again:
320: if (c == '\r')
321: c = '\n';
322: if (inopen)
323: putchar(c), flush();
324: if (c != '\n' && c != EOF) {
325: c = getkey();
326: goto again;
327: }
328: noteinp();
329: return (ch == 'y');
330: }
331:
332: getch()
333: {
334: char c;
335:
336: if (read(2, &c, 1) != 1)
337: return (EOF);
338: return (c & TRIM);
339: }
340:
341: ugo(cnt, with)
342: int with;
343: int cnt;
344: {
345:
346: if (cnt > 0)
347: do
348: putchar(with);
349: while (--cnt > 0);
350: }
351:
352: int casecnt;
353: bool destuc;
354:
355: dosub()
356: {
357: register char *lp, *sp, *rp;
358: int c;
359:
360: lp = linebuf;
361: sp = genbuf;
362: rp = rhsbuf;
363: while (lp < loc1)
364: *sp++ = *lp++;
365: casecnt = 0;
366: while (c = *rp++) {
367: if (c & QUOTE)
368: switch (c & TRIM) {
369:
370: case '&':
371: sp = place(sp, loc1, loc2);
372: if (sp == 0)
373: goto ovflo;
374: continue;
375:
376: case 'l':
377: casecnt = 1;
378: destuc = 0;
379: continue;
380:
381: case 'L':
382: casecnt = LBSIZE;
383: destuc = 0;
384: continue;
385:
386: case 'u':
387: casecnt = 1;
388: destuc = 1;
389: continue;
390:
391: case 'U':
392: casecnt = LBSIZE;
393: destuc = 1;
394: continue;
395:
396: case 'E':
397: case 'e':
398: casecnt = 0;
399: continue;
400: }
401: if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') {
402: sp = place(sp, braslist[c - '1'], braelist[c - '1']);
403: if (sp == 0)
404: goto ovflo;
405: continue;
406: }
407: if (casecnt)
408: *sp++ = fixcase(c & TRIM);
409: else
410: *sp++ = c & TRIM;
411: if (sp >= &genbuf[LBSIZE])
412: ovflo:
413: error("Line overflow@in substitute");
414: }
415: lp = loc2;
416: loc2 = sp + (linebuf - genbuf);
417: while (*sp++ = *lp++)
418: if (sp >= &genbuf[LBSIZE])
419: goto ovflo;
420: strcLIN(genbuf);
421: }
422:
423: fixcase(c)
424: register int c;
425: {
426:
427: if (casecnt == 0)
428: return (c);
429: casecnt--;
430: if (destuc) {
431: if (islower(c))
432: c = toupper(c);
433: } else
434: if (isupper(c))
435: c = tolower(c);
436: return (c);
437: }
438:
439: char *
440: place(sp, l1, l2)
441: register char *sp, *l1, *l2;
442: {
443:
444: while (l1 < l2) {
445: *sp++ = fixcase(*l1++);
446: if (sp >= &genbuf[LBSIZE])
447: return (0);
448: }
449: return (sp);
450: }
451:
452: snote(total, lines)
453: register int total, lines;
454: {
455:
456: if (!notable(total))
457: return;
458: printf(mesg("%d subs|%d substitutions"), total);
459: if (lines != 1 && lines != total)
460: printf(" on %d lines", lines);
461: noonl();
462: flush();
463: }
464:
465: compile(eof, oknl)
466: int eof;
467: int oknl;
468: {
469: register int c;
470: register char *ep;
471: char *lastep;
472: char bracket[NBRA], *bracketp, *rhsp;
473: int cclcnt;
474:
475: if (isalpha(eof) || isdigit(eof))
476: error("Regular expressions cannot be delimited by letters or digits");
477: ep = expbuf;
478: c = getchar();
479: if (eof == '\\')
480: switch (c) {
481:
482: case '/':
483: case '?':
484: if (scanre.Expbuf[0] == 0)
485: error("No previous scan re|No previous scanning regular expression");
486: resre(scanre);
487: return (c);
488:
489: case '&':
490: if (subre.Expbuf[0] == 0)
491: error("No previous substitute re|No previous substitute regular expression");
492: resre(subre);
493: return (c);
494:
495: default:
496: error("Badly formed re|Regular expression \\ must be followed by / or ?");
497: }
498: if (c == eof || c == '\n' || c == EOF) {
499: if (*ep == 0)
500: error("No previous re|No previous regular expression");
501: if (c == '\n' && oknl == 0)
502: error("Missing closing delimiter@for regular expression");
503: if (c != eof)
504: ungetchar(c);
505: return (eof);
506: }
507: bracketp = bracket;
508: nbra = 0;
509: circfl = 0;
510: if (c == '^') {
511: c = getchar();
512: circfl++;
513: }
514: ungetchar(c);
515: for (;;) {
516: if (ep >= &expbuf[ESIZE - 2])
517: complex:
518: cerror("Re too complex|Regular expression too complicated");
519: c = getchar();
520: if (c == eof || c == EOF) {
521: if (bracketp != bracket)
522: cerror("Unmatched \\(|More \\('s than \\)'s in regular expression");
523: *ep++ = CEOF;
524: if (c == EOF)
525: ungetchar(c);
526: return (eof);
527: }
528: if (value(MAGIC)) {
529: if (c != '*' || ep == expbuf)
530: lastep = ep;
531: } else
532: if (c != '\\' || peekchar() != '*' || ep == expbuf)
533: lastep = ep;
534: switch (c) {
535:
536: case '\\':
537: c = getchar();
538: switch (c) {
539:
540: case '(':
541: if (nbra >= NBRA)
542: cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression");
543: *bracketp++ = nbra;
544: *ep++ = CBRA;
545: *ep++ = nbra++;
546: continue;
547:
548: case ')':
549: if (bracketp <= bracket)
550: cerror("Extra \\)|More \\)'s than \\('s in regular expression");
551: *ep++ = CKET;
552: *ep++ = *--bracketp;
553: continue;
554:
555: case '<':
556: *ep++ = CBRC;
557: continue;
558:
559: case '>':
560: *ep++ = CLET;
561: continue;
562: }
563: if (value(MAGIC) == 0)
564: magic:
565: switch (c) {
566:
567: case '.':
568: *ep++ = CDOT;
569: continue;
570:
571: case '~':
572: rhsp = rhsbuf;
573: while (*rhsp) {
574: if (*rhsp & QUOTE) {
575: c = *rhsp & TRIM;
576: if (c == '&')
577: error("Replacement pattern contains &@- cannot use in re");
578: if (c >= '1' && c <= '9')
579: error("Replacement pattern contains \\d@- cannot use in re");
580: }
581: if (ep >= &expbuf[ESIZE-2])
582: goto complex;
583: *ep++ = CCHR;
584: *ep++ = *rhsp++ & TRIM;
585: }
586: continue;
587:
588: case '*':
589: if (ep == expbuf)
590: break;
591: if (*lastep == CBRA || *lastep == CKET)
592: cerror("Illegal *|Can't * a \\( ... \\) in regular expression");
593: if (*lastep == CCHR && (lastep[1] & QUOTE))
594: cerror("Illegal *|Can't * a \\n in regular expression");
595: *lastep |= STAR;
596: continue;
597:
598: case '[':
599: *ep++ = CCL;
600: *ep++ = 0;
601: cclcnt = 1;
602: c = getchar();
603: if (c == '^') {
604: c = getchar();
605: ep[-2] = NCCL;
606: }
607: if (c == ']')
608: cerror("Bad character class|Empty character class '[]' or '[^]' cannot match");
609: while (c != ']') {
610: if (c == '\\' && any(peekchar(), "]-^\\"))
611: c = getchar() | QUOTE;
612: if (c == '\n' || c == EOF)
613: cerror("Missing ]");
614: *ep++ = c;
615: cclcnt++;
616: if (ep >= &expbuf[ESIZE])
617: goto complex;
618: c = getchar();
619: }
620: lastep[1] = cclcnt;
621: continue;
622: }
623: if (c == EOF) {
624: ungetchar(EOF);
625: c = '\\';
626: goto defchar;
627: }
628: *ep++ = CCHR;
629: if (c == '\n')
630: cerror("No newlines in re's|Can't escape newlines into regular expressions");
631: /*
632: if (c < '1' || c > NBRA + '1') {
633: */
634: *ep++ = c;
635: continue;
636: /*
637: }
638: c -= '1';
639: if (c >= nbra)
640: cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s");
641: *ep++ = c | QUOTE;
642: continue;
643: */
644:
645: case '\n':
646: if (oknl) {
647: ungetchar(c);
648: *ep++ = CEOF;
649: return (eof);
650: }
651: cerror("Badly formed re|Missing closing delimiter for regular expression");
652:
653: case '$':
654: if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') {
655: *ep++ = CDOL;
656: continue;
657: }
658: goto defchar;
659:
660: case '.':
661: case '~':
662: case '*':
663: case '[':
664: if (value(MAGIC))
665: goto magic;
666: defchar:
667: default:
668: *ep++ = CCHR;
669: *ep++ = c;
670: continue;
671: }
672: }
673: }
674:
675: cerror(s)
676: char *s;
677: {
678:
679: expbuf[0] = 0;
680: error(s);
681: }
682:
683: same(a, b)
684: register int a, b;
685: {
686:
687: return (a == b || value(IGNORECASE) &&
688: ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a)));
689: }
690:
691: char *locs;
692:
693: execute(gf, addr)
694: line *addr;
695: {
696: register char *p1, *p2;
697: register int c;
698:
699: if (gf) {
700: if (circfl)
701: return (0);
702: #ifdef notdef
703: if (loc1 == loc2)
704: loc2++;
705: #endif
706: locs = p1 = loc2;
707: } else {
708: if (addr == zero)
709: return (0);
710: p1 = linebuf;
711: getline(*addr);
712: locs = 0;
713: }
714: p2 = expbuf;
715: if (circfl) {
716: loc1 = p1;
717: return (advance(p1, p2));
718: }
719: /* fast check for first character */
720: if (*p2 == CCHR) {
721: c = p2[1];
722: do {
723: if (c != *p1 && (!value(IGNORECASE) ||
724: !((islower(c) && toupper(c) == *p1) ||
725: (islower(*p1) && toupper(*p1) == c))))
726: continue;
727: if (advance(p1, p2)) {
728: loc1 = p1;
729: return (1);
730: }
731: } while (*p1++);
732: return (0);
733: }
734: /* regular algorithm */
735: do {
736: if (advance(p1, p2)) {
737: loc1 = p1;
738: return (1);
739: }
740: } while (*p1++);
741: return (0);
742: }
743:
744: #define uletter(c) (isalpha(c) || c == '_')
745:
746: advance(lp, ep)
747: register char *lp, *ep;
748: {
749: register char *curlp;
750: char *sp, *sp1;
751: int c;
752:
753: for (;;) switch (*ep++) {
754:
755: case CCHR:
756: /* useless
757: if (*ep & QUOTE) {
758: c = *ep++ & TRIM;
759: sp = braslist[c];
760: sp1 = braelist[c];
761: while (sp < sp1) {
762: if (!same(*sp, *lp))
763: return (0);
764: sp++, lp++;
765: }
766: continue;
767: }
768: */
769: if (!same(*ep, *lp))
770: return (0);
771: ep++, lp++;
772: continue;
773:
774: case CDOT:
775: if (*lp++)
776: continue;
777: return (0);
778:
779: case CDOL:
780: if (*lp == 0)
781: continue;
782: return (0);
783:
784: case CEOF:
785: loc2 = lp;
786: return (1);
787:
788: case CCL:
789: if (cclass(ep, *lp++, 1)) {
790: ep += *ep;
791: continue;
792: }
793: return (0);
794:
795: case NCCL:
796: if (cclass(ep, *lp++, 0)) {
797: ep += *ep;
798: continue;
799: }
800: return (0);
801:
802: case CBRA:
803: braslist[*ep++] = lp;
804: continue;
805:
806: case CKET:
807: braelist[*ep++] = lp;
808: continue;
809:
810: case CDOT|STAR:
811: curlp = lp;
812: while (*lp++)
813: continue;
814: goto star;
815:
816: case CCHR|STAR:
817: curlp = lp;
818: while (same(*lp, *ep))
819: lp++;
820: lp++;
821: ep++;
822: goto star;
823:
824: case CCL|STAR:
825: case NCCL|STAR:
826: curlp = lp;
827: while (cclass(ep, *lp++, ep[-1] == (CCL|STAR)))
828: continue;
829: ep += *ep;
830: goto star;
831: star:
832: do {
833: lp--;
834: if (lp == locs)
835: break;
836: if (advance(lp, ep))
837: return (1);
838: } while (lp > curlp);
839: return (0);
840:
841: case CBRC:
842: if (lp == expbuf)
843: continue;
844: if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1]))
845: continue;
846: return (0);
847:
848: case CLET:
849: if (!uletter(*lp) && !isdigit(*lp))
850: continue;
851: return (0);
852:
853: default:
854: error("Re internal error");
855: }
856: }
857:
858: cclass(set, c, af)
859: register char *set;
860: register int c;
861: int af;
862: {
863: register int n;
864:
865: if (c == 0)
866: return (0);
867: if (value(IGNORECASE) && isupper(c))
868: c = tolower(c);
869: n = *set++;
870: while (--n)
871: if (n > 2 && set[1] == '-') {
872: if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM))
873: return (af);
874: set += 3;
875: n -= 2;
876: } else
877: if ((*set++ & TRIM) == c)
878: return (af);
879: return (!af);
880: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.