/*- * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * * %sccs.include.proprietary.c% */ #ifndef lint static char sccsid[] = "@(#)ex_re.c 8.1 (Berkeley) 06/09/93"; #endif /* not lint */ #include "ex.h" #include "ex_re.h" /* * Global, substitute and regular expressions. * Very similar to ed, with some re extensions and * confirmed substitute. */ global(k) bool k; { register char *gp; register int c; register line *a1; char globuf[GBSIZE], *Cwas; int lines = lineDOL(); int oinglobal = inglobal; char *oglobp = globp; Cwas = Command; /* * States of inglobal: * 0: ordinary - not in a global command. * 1: text coming from some buffer, not tty. * 2: like 1, but the source of the buffer is a global command. * Hence you're only in a global command if inglobal==2. This * strange sounding convention is historically derived from * everybody simulating a global command. */ if (inglobal==2) error("Global within global@not allowed"); markDOT(); setall(); nonzero(); if (skipend()) error("Global needs re|Missing regular expression for global"); c = ex_getchar(); ignore(compile(c, 1)); savere(scanre); gp = globuf; while ((c = ex_getchar()) != '\n') { switch (c) { case EOF: c = '\n'; goto brkwh; case '\\': c = ex_getchar(); switch (c) { case '\\': ungetchar(c); break; case '\n': break; default: *gp++ = '\\'; break; } break; } *gp++ = c; if (gp >= &globuf[GBSIZE - 2]) error("Global command too long"); } brkwh: ungetchar(c); newline(); *gp++ = c; *gp++ = 0; saveall(); inglobal = 2; for (a1 = one; a1 <= dol; a1++) { *a1 &= ~01; if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k) *a1 |= 01; } #ifdef notdef /* * This code is commented out for now. The problem is that we don't * fix up the undo area the way we should. Basically, I think what has * to be done is to copy the undo area down (since we shrunk everything) * and move the various pointers into it down too. I will do this later * when I have time. (Mark, 10-20-80) */ /* * Special case: g/.../d (avoid n^2 algorithm) */ if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') { gdelete(); return; } #endif if (inopen) inopen = -1; /* * Now for each marked line, set dot there and do the commands. * Note the n^2 behavior here for lots of lines matching. * This is really needed: in some cases you could delete lines, * causing a marked line to be moved before a1 and missed if * we didn't restart at zero each time. */ for (a1 = one; a1 <= dol; a1++) { if (*a1 & 01) { *a1 &= ~01; dot = a1; globp = globuf; commands(1, 1); a1 = zero; } } globp = oglobp; inglobal = oinglobal; endline = 1; Command = Cwas; netchHAD(lines); setlastchar(EOF); if (inopen) { ungetchar(EOF); inopen = 1; } } /* * gdelete: delete inside a global command. Handles the * special case g/r.e./d. All lines to be deleted have * already been marked. Squeeze the remaining lines together. * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/, * and g/r.e./.,/r.e.2/d are not treated specially. There is no * good reason for this except the question: where to you draw the line? */ gdelete() { register line *a1, *a2, *a3; a3 = dol; /* find first marked line. can skip all before it */ for (a1=zero; (*a1&01)==0; a1++) if (a1>=a3) return; /* copy down unmarked lines, compacting as we go. */ for (a2=a1+1; a2<=a3;) { if (*a2&01) { a2++; /* line is marked, skip it */ dot = a1; /* dot left after line deletion */ } else *a1++ = *a2++; /* unmarked, copy it */ } dol = a1-1; if (dot>dol) dot = dol; change(); } bool cflag; int scount, slines, stotal; substitute(c) int c; { register line *addr; register int n; int gsubf, hopcount; gsubf = compsub(c); if(FIXUNDO) save12(), undkind = UNDCHANGE; stotal = 0; slines = 0; for (addr = addr1; addr <= addr2; addr++) { scount = hopcount = 0; if (dosubcon(0, addr) == 0) continue; if (gsubf) { /* * The loop can happen from s/\ sizeof linebuf) error("substitution loop"); if (dosubcon(1, addr) == 0) break; } } if (scount) { stotal += scount; slines++; putmark(addr); n = append(getsub, addr); addr += n; addr2 += n; } } if (stotal == 0 && !inglobal && !cflag) error("Fail|Substitute pattern match failed"); snote(stotal, slines); return (stotal); } compsub(ch) { register int seof, c, uselastre; static int gsubf; if (!value(EDCOMPATIBLE)) gsubf = cflag = 0; uselastre = 0; switch (ch) { case 's': ignore(skipwh()); seof = ex_getchar(); if (endcmd(seof) || any(seof, "gcr")) { ungetchar(seof); goto redo; } if (isalpha(seof) || isdigit(seof)) error("Substitute needs re|Missing regular expression for substitute"); seof = compile(seof, 1); uselastre = 1; comprhs(seof); gsubf = 0; cflag = 0; break; case '~': uselastre = 1; /* fall into ... */ case '&': redo: if (re.Expbuf[0] == 0) error("No previous re|No previous regular expression"); if (subre.Expbuf[0] == 0) error("No previous substitute re|No previous substitute to repeat"); break; } for (;;) { c = ex_getchar(); switch (c) { case 'g': gsubf = !gsubf; continue; case 'c': cflag = !cflag; continue; case 'r': uselastre = 1; continue; default: ungetchar(c); setcount(); newline(); if (uselastre) savere(subre); else resre(subre); return (gsubf); } } } comprhs(seof) int seof; { register char *rp, *orp; register int c; char orhsbuf[RHSSIZE]; rp = rhsbuf; CP(orhsbuf, rp); for (;;) { c = ex_getchar(); if (c == seof) break; switch (c) { case '\\': c = ex_getchar(); if (c == EOF) { ungetchar(c); break; } if (value(MAGIC)) { /* * When "magic", \& turns into a plain &, * and all other chars work fine quoted. */ if (c != '&') c |= QUOTE; break; } magic: if (c == '~') { for (orp = orhsbuf; *orp; *rp++ = *orp++) if (rp >= &rhsbuf[RHSSIZE - 1]) goto toobig; continue; } c |= QUOTE; break; case '\n': case EOF: if (!(globp && globp[0])) { ungetchar(c); goto endrhs; } case '~': case '&': if (value(MAGIC)) goto magic; break; } if (rp >= &rhsbuf[RHSSIZE - 1]) { toobig: *rp = 0; error("Replacement pattern too long@- limit 256 characters"); } *rp++ = c; } endrhs: *rp++ = 0; } getsub() { register char *p; if ((p = linebp) == 0) return (EOF); strcLIN(p); linebp = 0; return (0); } dosubcon(f, a) bool f; line *a; { if (execute(f, a) == 0) return (0); if (confirmed(a)) { dosub(); scount++; } return (1); } confirmed(a) line *a; { register int c, ch; if (cflag == 0) return (1); pofix(); pline(lineno(a)); if (inopen) ex_putchar('\n' | QUOTE); c = column(loc1 - 1); ugo(c - 1 + (inopen ? 1 : 0), ' '); ugo(column(loc2 - 1) - c, '^'); flush(); ch = c = getkey(); again: if (c == '\r') c = '\n'; if (inopen) ex_putchar(c), flush(); if (c != '\n' && c != EOF) { c = getkey(); goto again; } noteinp(); return (ch == 'y'); } getch() { char c; if (read(2, &c, 1) != 1) return (EOF); return (c & TRIM); } ugo(cnt, with) int with; int cnt; { if (cnt > 0) do ex_putchar(with); while (--cnt > 0); } int casecnt; bool destuc; dosub() { register char *lp, *sp, *rp; int c; lp = linebuf; sp = genbuf; rp = rhsbuf; while (lp < loc1) *sp++ = *lp++; casecnt = 0; while (c = *rp++) { /* ^V from vi to split lines */ if (c == '\r') c = '\n'; if (c & QUOTE) switch (c & TRIM) { case '&': sp = place(sp, loc1, loc2); if (sp == 0) goto ovflo; continue; case 'l': casecnt = 1; destuc = 0; continue; case 'L': casecnt = LBSIZE; destuc = 0; continue; case 'u': casecnt = 1; destuc = 1; continue; case 'U': casecnt = LBSIZE; destuc = 1; continue; case 'E': case 'e': casecnt = 0; continue; } if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') { sp = place(sp, braslist[c - '1'], braelist[c - '1']); if (sp == 0) goto ovflo; continue; } if (casecnt) *sp++ = fixcase(c & TRIM); else *sp++ = c & TRIM; if (sp >= &genbuf[LBSIZE]) ovflo: error("Line overflow@in substitute"); } lp = loc2; loc2 = sp + (linebuf - genbuf); while (*sp++ = *lp++) if (sp >= &genbuf[LBSIZE]) goto ovflo; strcLIN(genbuf); } fixcase(c) register int c; { if (casecnt == 0) return (c); casecnt--; if (destuc) { if (islower(c)) c = toupper(c); } else if (isupper(c)) c = tolower(c); return (c); } char * place(sp, l1, l2) register char *sp, *l1, *l2; { while (l1 < l2) { *sp++ = fixcase(*l1++); if (sp >= &genbuf[LBSIZE]) return (0); } return (sp); } snote(total, lines) register int total, lines; { if (!notable(total)) return; ex_printf(mesg("%d subs|%d substitutions"), total); if (lines != 1 && lines != total) ex_printf(" on %d lines", lines); noonl(); flush(); } compile(eof, oknl) int eof; int oknl; { register int c; register char *ep; char *lastep; char bracket[NBRA], *bracketp, *rhsp; int cclcnt; if (isalpha(eof) || isdigit(eof)) error("Regular expressions cannot be delimited by letters or digits"); ep = expbuf; c = ex_getchar(); if (eof == '\\') switch (c) { case '/': case '?': if (scanre.Expbuf[0] == 0) error("No previous scan re|No previous scanning regular expression"); resre(scanre); return (c); case '&': if (subre.Expbuf[0] == 0) error("No previous substitute re|No previous substitute regular expression"); resre(subre); return (c); default: error("Badly formed re|Regular expression \\ must be followed by / or ?"); } if (c == eof || c == '\n' || c == EOF) { if (*ep == 0) error("No previous re|No previous regular expression"); if (c == '\n' && oknl == 0) error("Missing closing delimiter@for regular expression"); if (c != eof) ungetchar(c); return (eof); } bracketp = bracket; nbra = 0; circfl = 0; if (c == '^') { c = ex_getchar(); circfl++; } ungetchar(c); for (;;) { if (ep >= &expbuf[ESIZE - 2]) complex: cerror("Re too complex|Regular expression too complicated"); c = ex_getchar(); if (c == eof || c == EOF) { if (bracketp != bracket) cerror("Unmatched \\(|More \\('s than \\)'s in regular expression"); *ep++ = CEOFC; if (c == EOF) ungetchar(c); return (eof); } if (value(MAGIC)) { if (c != '*' || ep == expbuf) lastep = ep; } else if (c != '\\' || peekchar() != '*' || ep == expbuf) lastep = ep; switch (c) { case '\\': c = ex_getchar(); switch (c) { case '(': if (nbra >= NBRA) cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression"); *bracketp++ = nbra; *ep++ = CBRA; *ep++ = nbra++; continue; case ')': if (bracketp <= bracket) cerror("Extra \\)|More \\)'s than \\('s in regular expression"); *ep++ = CKET; *ep++ = *--bracketp; continue; case '<': *ep++ = CBRC; continue; case '>': *ep++ = CLET; continue; } if (value(MAGIC) == 0) magic: switch (c) { case '.': *ep++ = CDOT; continue; case '~': rhsp = rhsbuf; while (*rhsp) { if (*rhsp & QUOTE) { c = *rhsp & TRIM; if (c == '&') error("Replacement pattern contains &@- cannot use in re"); if (c >= '1' && c <= '9') error("Replacement pattern contains \\d@- cannot use in re"); } if (ep >= &expbuf[ESIZE-2]) goto complex; *ep++ = CCHR; *ep++ = *rhsp++ & TRIM; } continue; case '*': if (ep == expbuf) break; if (*lastep == CBRA || *lastep == CKET) cerror("Illegal *|Can't * a \\( ... \\) in regular expression"); if (*lastep == CCHR && (lastep[1] & QUOTE)) cerror("Illegal *|Can't * a \\n in regular expression"); *lastep |= STAR; continue; case '[': *ep++ = CCL; *ep++ = 0; cclcnt = 1; c = ex_getchar(); if (c == '^') { c = ex_getchar(); ep[-2] = NCCL; } if (c == ']') cerror("Bad character class|Empty character class '[]' or '[^]' cannot match"); while (c != ']') { if (c == '\\' && any(peekchar(), "]-^\\")) c = ex_getchar() | QUOTE; if (c == '\n' || c == EOF) cerror("Missing ]"); *ep++ = c; cclcnt++; if (ep >= &expbuf[ESIZE]) goto complex; c = ex_getchar(); } lastep[1] = cclcnt; continue; } if (c == EOF) { ungetchar(EOF); c = '\\'; goto defchar; } *ep++ = CCHR; if (c == '\n') cerror("No newlines in re's|Can't escape newlines into regular expressions"); /* if (c < '1' || c > NBRA + '1') { */ *ep++ = c; continue; /* } c -= '1'; if (c >= nbra) cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s"); *ep++ = c | QUOTE; continue; */ case '\n': if (oknl) { ungetchar(c); *ep++ = CEOFC; return (eof); } cerror("Badly formed re|Missing closing delimiter for regular expression"); case '$': if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') { *ep++ = CDOL; continue; } goto defchar; case '.': case '~': case '*': case '[': if (value(MAGIC)) goto magic; defchar: default: *ep++ = CCHR; *ep++ = c; continue; } } } cerror(s) char *s; { expbuf[0] = 0; error(s); } same(a, b) register int a, b; { return (a == b || value(IGNORECASE) && ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a))); } char *locs; /* VARARGS1 */ execute(gf, addr) line *addr; { register char *p1, *p2; register int c; if (gf) { if (circfl) return (0); locs = p1 = loc2; } else { if (addr == zero) return (0); p1 = linebuf; getline(*addr); locs = 0; } p2 = expbuf; if (circfl) { loc1 = p1; return (advance(p1, p2)); } /* fast check for first character */ if (*p2 == CCHR) { c = p2[1]; do { if (c != *p1 && (!value(IGNORECASE) || !((islower(c) && toupper(c) == *p1) || (islower(*p1) && toupper(*p1) == c)))) continue; if (advance(p1, p2)) { loc1 = p1; return (1); } } while (*p1++); return (0); } /* regular algorithm */ do { if (advance(p1, p2)) { loc1 = p1; return (1); } } while (*p1++); return (0); } #define uletter(c) (isalpha(c) || c == '_') advance(lp, ep) register char *lp, *ep; { register char *curlp; for (;;) switch (*ep++) { case CCHR: /* useless if (*ep & QUOTE) { c = *ep++ & TRIM; sp = braslist[c]; sp1 = braelist[c]; while (sp < sp1) { if (!same(*sp, *lp)) return (0); sp++, lp++; } continue; } */ if (!same(*ep, *lp)) return (0); ep++, lp++; continue; case CDOT: if (*lp++) continue; return (0); case CDOL: if (*lp == 0) continue; return (0); case CEOFC: loc2 = lp; return (1); case CCL: if (cclass(ep, *lp++, 1)) { ep += *ep; continue; } return (0); case NCCL: if (cclass(ep, *lp++, 0)) { ep += *ep; continue; } return (0); case CBRA: braslist[*ep++] = lp; continue; case CKET: braelist[*ep++] = lp; continue; case CDOT|STAR: curlp = lp; while (*lp++) continue; goto star; case CCHR|STAR: curlp = lp; while (same(*lp, *ep)) lp++; lp++; ep++; goto star; case CCL|STAR: case NCCL|STAR: curlp = lp; while (cclass(ep, *lp++, ep[-1] == (CCL|STAR))) continue; ep += *ep; goto star; star: do { lp--; if (lp == locs) break; if (advance(lp, ep)) return (1); } while (lp > curlp); return (0); case CBRC: if (lp == linebuf) continue; if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1])) continue; return (0); case CLET: if (!uletter(*lp) && !isdigit(*lp)) continue; return (0); default: error("Re internal error"); } } cclass(set, c, af) register char *set; register int c; int af; { register int n; if (c == 0) return (0); if (value(IGNORECASE) && isupper(c)) c = tolower(c); n = *set++; while (--n) if (n > 2 && set[1] == '-') { if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM)) return (af); set += 3; n -= 2; } else if ((*set++ & TRIM) == c) return (af); return (!af); }