1b8ba871bSPeter Wemm /*- 2b8ba871bSPeter Wemm * Copyright (c) 1992, 1993, 1994 3b8ba871bSPeter Wemm * The Regents of the University of California. All rights reserved. 4b8ba871bSPeter Wemm * Copyright (c) 1992, 1993, 1994, 1995, 1996 5b8ba871bSPeter Wemm * Keith Bostic. All rights reserved. 6b8ba871bSPeter Wemm * 7b8ba871bSPeter Wemm * See the LICENSE file for redistribution information. 8b8ba871bSPeter Wemm */ 9b8ba871bSPeter Wemm 10b8ba871bSPeter Wemm #include "config.h" 11b8ba871bSPeter Wemm 12b8ba871bSPeter Wemm #ifndef lint 13f0957ccaSPeter Wemm static const char sccsid[] = "$Id: ex_subst.c,v 10.53 2011/12/21 20:40:35 zy Exp $"; 14b8ba871bSPeter Wemm #endif /* not lint */ 15b8ba871bSPeter Wemm 16b8ba871bSPeter Wemm #include <sys/types.h> 17b8ba871bSPeter Wemm #include <sys/queue.h> 18b8ba871bSPeter Wemm #include <sys/time.h> 19b8ba871bSPeter Wemm 20b8ba871bSPeter Wemm #include <bitstring.h> 21b8ba871bSPeter Wemm #include <ctype.h> 22b8ba871bSPeter Wemm #include <errno.h> 23b8ba871bSPeter Wemm #include <limits.h> 24b8ba871bSPeter Wemm #include <stdio.h> 25b8ba871bSPeter Wemm #include <stdlib.h> 26b8ba871bSPeter Wemm #include <string.h> 27b8ba871bSPeter Wemm #include <unistd.h> 28b8ba871bSPeter Wemm 29b8ba871bSPeter Wemm #include "../common/common.h" 30b8ba871bSPeter Wemm #include "../vi/vi.h" 31b8ba871bSPeter Wemm 32b8ba871bSPeter Wemm #define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */ 33b8ba871bSPeter Wemm #define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */ 34b8ba871bSPeter Wemm 35f0957ccaSPeter Wemm static int re_conv __P((SCR *, CHAR_T **, size_t *, int *)); 36f0957ccaSPeter Wemm static int re_cscope_conv __P((SCR *, CHAR_T **, size_t *, int *)); 37b8ba871bSPeter Wemm static int re_sub __P((SCR *, 38f0957ccaSPeter Wemm CHAR_T *, CHAR_T **, size_t *, size_t *, regmatch_t [10])); 39f0957ccaSPeter Wemm static int re_tag_conv __P((SCR *, CHAR_T **, size_t *, int *)); 40f0957ccaSPeter Wemm static int s __P((SCR *, EXCMD *, CHAR_T *, regex_t *, u_int)); 41b8ba871bSPeter Wemm 42b8ba871bSPeter Wemm /* 43b8ba871bSPeter Wemm * ex_s -- 44b8ba871bSPeter Wemm * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]] 45b8ba871bSPeter Wemm * 46b8ba871bSPeter Wemm * Substitute on lines matching a pattern. 47b8ba871bSPeter Wemm * 48b8ba871bSPeter Wemm * PUBLIC: int ex_s __P((SCR *, EXCMD *)); 49b8ba871bSPeter Wemm */ 50b8ba871bSPeter Wemm int 51f0957ccaSPeter Wemm ex_s(SCR *sp, EXCMD *cmdp) 52b8ba871bSPeter Wemm { 53b8ba871bSPeter Wemm regex_t *re; 54b8ba871bSPeter Wemm size_t blen, len; 55b8ba871bSPeter Wemm u_int flags; 56b8ba871bSPeter Wemm int delim; 57f0957ccaSPeter Wemm CHAR_T *bp, *p, *ptrn, *rep, *t; 58b8ba871bSPeter Wemm 59b8ba871bSPeter Wemm /* 60b8ba871bSPeter Wemm * Skip leading white space. 61b8ba871bSPeter Wemm * 62b8ba871bSPeter Wemm * !!! 63b8ba871bSPeter Wemm * Historic vi allowed any non-alphanumeric to serve as the 64b8ba871bSPeter Wemm * substitution command delimiter. 65b8ba871bSPeter Wemm * 66b8ba871bSPeter Wemm * !!! 67b8ba871bSPeter Wemm * If the arguments are empty, it's the same as &, i.e. we 68b8ba871bSPeter Wemm * repeat the last substitution. 69b8ba871bSPeter Wemm */ 70b8ba871bSPeter Wemm if (cmdp->argc == 0) 71b8ba871bSPeter Wemm goto subagain; 72b8ba871bSPeter Wemm for (p = cmdp->argv[0]->bp, 73b8ba871bSPeter Wemm len = cmdp->argv[0]->len; len > 0; --len, ++p) { 74f0957ccaSPeter Wemm if (!cmdskip(*p)) 75b8ba871bSPeter Wemm break; 76b8ba871bSPeter Wemm } 77b8ba871bSPeter Wemm if (len == 0) 78b8ba871bSPeter Wemm subagain: return (ex_subagain(sp, cmdp)); 79b8ba871bSPeter Wemm 80b8ba871bSPeter Wemm delim = *p++; 81f0957ccaSPeter Wemm if (!isascii(delim) || isalnum(delim) || delim == '\\') 82b8ba871bSPeter Wemm return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR)); 83b8ba871bSPeter Wemm 84b8ba871bSPeter Wemm /* 85b8ba871bSPeter Wemm * !!! 86b8ba871bSPeter Wemm * The full-blown substitute command reset the remembered 87b8ba871bSPeter Wemm * state of the 'c' and 'g' suffices. 88b8ba871bSPeter Wemm */ 89b8ba871bSPeter Wemm sp->c_suffix = sp->g_suffix = 0; 90b8ba871bSPeter Wemm 91b8ba871bSPeter Wemm /* 92b8ba871bSPeter Wemm * Get the pattern string, toss escaping characters. 93b8ba871bSPeter Wemm * 94b8ba871bSPeter Wemm * !!! 95b8ba871bSPeter Wemm * Historic vi accepted any of the following forms: 96b8ba871bSPeter Wemm * 97b8ba871bSPeter Wemm * :s/abc/def/ change "abc" to "def" 98b8ba871bSPeter Wemm * :s/abc/def change "abc" to "def" 99b8ba871bSPeter Wemm * :s/abc/ delete "abc" 100b8ba871bSPeter Wemm * :s/abc delete "abc" 101b8ba871bSPeter Wemm * 102b8ba871bSPeter Wemm * QUOTING NOTE: 103b8ba871bSPeter Wemm * 104b8ba871bSPeter Wemm * Only toss an escaping character if it escapes a delimiter. 105b8ba871bSPeter Wemm * This means that "s/A/\\\\f" replaces "A" with "\\f". It 106b8ba871bSPeter Wemm * would be nice to be more regular, i.e. for each layer of 107b8ba871bSPeter Wemm * escaping a single escaping character is removed, but that's 108b8ba871bSPeter Wemm * not how the historic vi worked. 109b8ba871bSPeter Wemm */ 110b8ba871bSPeter Wemm for (ptrn = t = p;;) { 111b8ba871bSPeter Wemm if (p[0] == '\0' || p[0] == delim) { 112b8ba871bSPeter Wemm if (p[0] == delim) 113b8ba871bSPeter Wemm ++p; 114b8ba871bSPeter Wemm /* 115b8ba871bSPeter Wemm * !!! 116b8ba871bSPeter Wemm * Nul terminate the pattern string -- it's passed 117b8ba871bSPeter Wemm * to regcomp which doesn't understand anything else. 118b8ba871bSPeter Wemm */ 119b8ba871bSPeter Wemm *t = '\0'; 120b8ba871bSPeter Wemm break; 121b8ba871bSPeter Wemm } 122b8ba871bSPeter Wemm if (p[0] == '\\') 123b8ba871bSPeter Wemm if (p[1] == delim) 124b8ba871bSPeter Wemm ++p; 125b8ba871bSPeter Wemm else if (p[1] == '\\') 126b8ba871bSPeter Wemm *t++ = *p++; 127b8ba871bSPeter Wemm *t++ = *p++; 128b8ba871bSPeter Wemm } 129b8ba871bSPeter Wemm 130b8ba871bSPeter Wemm /* 131b8ba871bSPeter Wemm * If the pattern string is empty, use the last RE (not just the 132b8ba871bSPeter Wemm * last substitution RE). 133b8ba871bSPeter Wemm */ 134b8ba871bSPeter Wemm if (*ptrn == '\0') { 135b8ba871bSPeter Wemm if (sp->re == NULL) { 136b8ba871bSPeter Wemm ex_emsg(sp, NULL, EXM_NOPREVRE); 137b8ba871bSPeter Wemm return (1); 138b8ba871bSPeter Wemm } 139b8ba871bSPeter Wemm 140b8ba871bSPeter Wemm /* Re-compile the RE if necessary. */ 141f0957ccaSPeter Wemm if (!F_ISSET(sp, SC_RE_SEARCH) && 142f0957ccaSPeter Wemm re_compile(sp, sp->re, sp->re_len, 143f0957ccaSPeter Wemm NULL, NULL, &sp->re_c, RE_C_SEARCH)) 144b8ba871bSPeter Wemm return (1); 145b8ba871bSPeter Wemm flags = 0; 146b8ba871bSPeter Wemm } else { 147b8ba871bSPeter Wemm /* 148b8ba871bSPeter Wemm * !!! 149b8ba871bSPeter Wemm * Compile the RE. Historic practice is that substitutes set 150b8ba871bSPeter Wemm * the search direction as well as both substitute and search 151b8ba871bSPeter Wemm * RE's. We compile the RE twice, as we don't want to bother 152b8ba871bSPeter Wemm * ref counting the pattern string and (opaque) structure. 153b8ba871bSPeter Wemm */ 154f0957ccaSPeter Wemm if (re_compile(sp, ptrn, t - ptrn, &sp->re, 155f0957ccaSPeter Wemm &sp->re_len, &sp->re_c, RE_C_SEARCH)) 156b8ba871bSPeter Wemm return (1); 157f0957ccaSPeter Wemm if (re_compile(sp, ptrn, t - ptrn, &sp->subre, 158f0957ccaSPeter Wemm &sp->subre_len, &sp->subre_c, RE_C_SUBST)) 159b8ba871bSPeter Wemm return (1); 160b8ba871bSPeter Wemm 161b8ba871bSPeter Wemm flags = SUB_FIRST; 162b8ba871bSPeter Wemm sp->searchdir = FORWARD; 163b8ba871bSPeter Wemm } 164b8ba871bSPeter Wemm re = &sp->re_c; 165b8ba871bSPeter Wemm 166b8ba871bSPeter Wemm /* 167b8ba871bSPeter Wemm * Get the replacement string. 168b8ba871bSPeter Wemm * 169b8ba871bSPeter Wemm * The special character & (\& if O_MAGIC not set) matches the 170b8ba871bSPeter Wemm * entire RE. No handling of & is required here, it's done by 171b8ba871bSPeter Wemm * re_sub(). 172b8ba871bSPeter Wemm * 173b8ba871bSPeter Wemm * The special character ~ (\~ if O_MAGIC not set) inserts the 174b8ba871bSPeter Wemm * previous replacement string into this replacement string. 175b8ba871bSPeter Wemm * Count ~'s to figure out how much space we need. We could 176b8ba871bSPeter Wemm * special case nonexistent last patterns or whether or not 177b8ba871bSPeter Wemm * O_MAGIC is set, but it's probably not worth the effort. 178b8ba871bSPeter Wemm * 179b8ba871bSPeter Wemm * QUOTING NOTE: 180b8ba871bSPeter Wemm * 181b8ba871bSPeter Wemm * Only toss an escaping character if it escapes a delimiter or 182b8ba871bSPeter Wemm * if O_MAGIC is set and it escapes a tilde. 183b8ba871bSPeter Wemm * 184b8ba871bSPeter Wemm * !!! 185b8ba871bSPeter Wemm * If the entire replacement pattern is "%", then use the last 186b8ba871bSPeter Wemm * replacement pattern. This semantic was added to vi in System 187b8ba871bSPeter Wemm * V and then percolated elsewhere, presumably around the time 188b8ba871bSPeter Wemm * that it was added to their version of ed(1). 189b8ba871bSPeter Wemm */ 190b8ba871bSPeter Wemm if (p[0] == '\0' || p[0] == delim) { 191b8ba871bSPeter Wemm if (p[0] == delim) 192b8ba871bSPeter Wemm ++p; 193b8ba871bSPeter Wemm if (sp->repl != NULL) 194b8ba871bSPeter Wemm free(sp->repl); 195b8ba871bSPeter Wemm sp->repl = NULL; 196b8ba871bSPeter Wemm sp->repl_len = 0; 197b8ba871bSPeter Wemm } else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim)) 198b8ba871bSPeter Wemm p += p[1] == delim ? 2 : 1; 199b8ba871bSPeter Wemm else { 200b8ba871bSPeter Wemm for (rep = p, len = 0; 201b8ba871bSPeter Wemm p[0] != '\0' && p[0] != delim; ++p, ++len) 202b8ba871bSPeter Wemm if (p[0] == '~') 203b8ba871bSPeter Wemm len += sp->repl_len; 204f0957ccaSPeter Wemm GET_SPACE_RETW(sp, bp, blen, len); 205b8ba871bSPeter Wemm for (t = bp, len = 0, p = rep;;) { 206b8ba871bSPeter Wemm if (p[0] == '\0' || p[0] == delim) { 207b8ba871bSPeter Wemm if (p[0] == delim) 208b8ba871bSPeter Wemm ++p; 209b8ba871bSPeter Wemm break; 210b8ba871bSPeter Wemm } 211b8ba871bSPeter Wemm if (p[0] == '\\') { 212b8ba871bSPeter Wemm if (p[1] == delim) 213b8ba871bSPeter Wemm ++p; 214b8ba871bSPeter Wemm else if (p[1] == '\\') { 215b8ba871bSPeter Wemm *t++ = *p++; 216b8ba871bSPeter Wemm ++len; 217b8ba871bSPeter Wemm } else if (p[1] == '~') { 218b8ba871bSPeter Wemm ++p; 219b8ba871bSPeter Wemm if (!O_ISSET(sp, O_MAGIC)) 220b8ba871bSPeter Wemm goto tilde; 221b8ba871bSPeter Wemm } 222b8ba871bSPeter Wemm } else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) { 223b8ba871bSPeter Wemm tilde: ++p; 224f0957ccaSPeter Wemm MEMCPY(t, sp->repl, sp->repl_len); 225b8ba871bSPeter Wemm t += sp->repl_len; 226b8ba871bSPeter Wemm len += sp->repl_len; 227b8ba871bSPeter Wemm continue; 228b8ba871bSPeter Wemm } 229b8ba871bSPeter Wemm *t++ = *p++; 230b8ba871bSPeter Wemm ++len; 231b8ba871bSPeter Wemm } 232b8ba871bSPeter Wemm if ((sp->repl_len = len) != 0) { 233b8ba871bSPeter Wemm if (sp->repl != NULL) 234b8ba871bSPeter Wemm free(sp->repl); 235f0957ccaSPeter Wemm MALLOC(sp, sp->repl, CHAR_T *, len * sizeof(CHAR_T)); 236f0957ccaSPeter Wemm if (sp->repl == NULL) { 237f0957ccaSPeter Wemm FREE_SPACEW(sp, bp, blen); 238b8ba871bSPeter Wemm return (1); 239b8ba871bSPeter Wemm } 240f0957ccaSPeter Wemm MEMCPY(sp->repl, bp, len); 241b8ba871bSPeter Wemm } 242f0957ccaSPeter Wemm FREE_SPACEW(sp, bp, blen); 243b8ba871bSPeter Wemm } 244b8ba871bSPeter Wemm return (s(sp, cmdp, p, re, flags)); 245b8ba871bSPeter Wemm } 246b8ba871bSPeter Wemm 247b8ba871bSPeter Wemm /* 248b8ba871bSPeter Wemm * ex_subagain -- 249b8ba871bSPeter Wemm * [line [,line]] & [cgr] [count] [#lp]] 250b8ba871bSPeter Wemm * 251b8ba871bSPeter Wemm * Substitute using the last substitute RE and replacement pattern. 252b8ba871bSPeter Wemm * 253b8ba871bSPeter Wemm * PUBLIC: int ex_subagain __P((SCR *, EXCMD *)); 254b8ba871bSPeter Wemm */ 255b8ba871bSPeter Wemm int 256f0957ccaSPeter Wemm ex_subagain(SCR *sp, EXCMD *cmdp) 257b8ba871bSPeter Wemm { 258b8ba871bSPeter Wemm if (sp->subre == NULL) { 259b8ba871bSPeter Wemm ex_emsg(sp, NULL, EXM_NOPREVRE); 260b8ba871bSPeter Wemm return (1); 261b8ba871bSPeter Wemm } 262f0957ccaSPeter Wemm if (!F_ISSET(sp, SC_RE_SUBST) && 263f0957ccaSPeter Wemm re_compile(sp, sp->subre, sp->subre_len, 264f0957ccaSPeter Wemm NULL, NULL, &sp->subre_c, RE_C_SUBST)) 265b8ba871bSPeter Wemm return (1); 266b8ba871bSPeter Wemm return (s(sp, 267b8ba871bSPeter Wemm cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0)); 268b8ba871bSPeter Wemm } 269b8ba871bSPeter Wemm 270b8ba871bSPeter Wemm /* 271b8ba871bSPeter Wemm * ex_subtilde -- 272b8ba871bSPeter Wemm * [line [,line]] ~ [cgr] [count] [#lp]] 273b8ba871bSPeter Wemm * 274b8ba871bSPeter Wemm * Substitute using the last RE and last substitute replacement pattern. 275b8ba871bSPeter Wemm * 276b8ba871bSPeter Wemm * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *)); 277b8ba871bSPeter Wemm */ 278b8ba871bSPeter Wemm int 279f0957ccaSPeter Wemm ex_subtilde(SCR *sp, EXCMD *cmdp) 280b8ba871bSPeter Wemm { 281b8ba871bSPeter Wemm if (sp->re == NULL) { 282b8ba871bSPeter Wemm ex_emsg(sp, NULL, EXM_NOPREVRE); 283b8ba871bSPeter Wemm return (1); 284b8ba871bSPeter Wemm } 285f0957ccaSPeter Wemm if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, sp->re, 286f0957ccaSPeter Wemm sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH)) 287b8ba871bSPeter Wemm return (1); 288b8ba871bSPeter Wemm return (s(sp, 289b8ba871bSPeter Wemm cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0)); 290b8ba871bSPeter Wemm } 291b8ba871bSPeter Wemm 292b8ba871bSPeter Wemm /* 293b8ba871bSPeter Wemm * s -- 294b8ba871bSPeter Wemm * Do the substitution. This stuff is *really* tricky. There are lots of 295b8ba871bSPeter Wemm * special cases, and general nastiness. Don't mess with it unless you're 296b8ba871bSPeter Wemm * pretty confident. 297b8ba871bSPeter Wemm * 298b8ba871bSPeter Wemm * The nasty part of the substitution is what happens when the replacement 299b8ba871bSPeter Wemm * string contains newlines. It's a bit tricky -- consider the information 300b8ba871bSPeter Wemm * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is 301b8ba871bSPeter Wemm * to build a set of newline offsets which we use to break the line up later, 302b8ba871bSPeter Wemm * when the replacement is done. Don't change it unless you're *damned* 303b8ba871bSPeter Wemm * confident. 304b8ba871bSPeter Wemm */ 305b8ba871bSPeter Wemm #define NEEDNEWLINE(sp) { \ 306b8ba871bSPeter Wemm if (sp->newl_len == sp->newl_cnt) { \ 307b8ba871bSPeter Wemm sp->newl_len += 25; \ 308b8ba871bSPeter Wemm REALLOC(sp, sp->newl, size_t *, \ 309b8ba871bSPeter Wemm sp->newl_len * sizeof(size_t)); \ 310b8ba871bSPeter Wemm if (sp->newl == NULL) { \ 311b8ba871bSPeter Wemm sp->newl_len = 0; \ 312b8ba871bSPeter Wemm return (1); \ 313b8ba871bSPeter Wemm } \ 314b8ba871bSPeter Wemm } \ 315b8ba871bSPeter Wemm } 316b8ba871bSPeter Wemm 317b8ba871bSPeter Wemm #define BUILD(sp, l, len) { \ 318b8ba871bSPeter Wemm if (lbclen + (len) > lblen) { \ 319f0957ccaSPeter Wemm lblen = p2roundup(MAX(lbclen + (len), 256)); \ 320f0957ccaSPeter Wemm REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \ 321b8ba871bSPeter Wemm if (lb == NULL) { \ 322b8ba871bSPeter Wemm lbclen = 0; \ 323b8ba871bSPeter Wemm return (1); \ 324b8ba871bSPeter Wemm } \ 325b8ba871bSPeter Wemm } \ 326f0957ccaSPeter Wemm MEMCPY(lb + lbclen, l, len); \ 327b8ba871bSPeter Wemm lbclen += len; \ 328b8ba871bSPeter Wemm } 329b8ba871bSPeter Wemm 330b8ba871bSPeter Wemm #define NEEDSP(sp, len, pnt) { \ 331b8ba871bSPeter Wemm if (lbclen + (len) > lblen) { \ 332f0957ccaSPeter Wemm lblen = p2roundup(MAX(lbclen + (len), 256)); \ 333f0957ccaSPeter Wemm REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \ 334b8ba871bSPeter Wemm if (lb == NULL) { \ 335b8ba871bSPeter Wemm lbclen = 0; \ 336b8ba871bSPeter Wemm return (1); \ 337b8ba871bSPeter Wemm } \ 338b8ba871bSPeter Wemm pnt = lb + lbclen; \ 339b8ba871bSPeter Wemm } \ 340b8ba871bSPeter Wemm } 341b8ba871bSPeter Wemm 342b8ba871bSPeter Wemm static int 343f0957ccaSPeter Wemm s(SCR *sp, EXCMD *cmdp, CHAR_T *s, regex_t *re, u_int flags) 344b8ba871bSPeter Wemm { 345b8ba871bSPeter Wemm EVENT ev; 346b8ba871bSPeter Wemm MARK from, to; 347f0957ccaSPeter Wemm TEXTH tiq[] = {{ 0 }}; 348b8ba871bSPeter Wemm recno_t elno, lno, slno; 349f0957ccaSPeter Wemm u_long ul; 350b8ba871bSPeter Wemm regmatch_t match[10]; 351b8ba871bSPeter Wemm size_t blen, cnt, last, lbclen, lblen, len, llen; 352b8ba871bSPeter Wemm size_t offset, saved_offset, scno; 353b8ba871bSPeter Wemm int cflag, lflag, nflag, pflag, rflag; 354b8ba871bSPeter Wemm int didsub, do_eol_match, eflags, empty_ok, eval; 355b8ba871bSPeter Wemm int linechanged, matched, quit, rval; 356f0957ccaSPeter Wemm CHAR_T *bp, *lb; 357f0957ccaSPeter Wemm enum nresult nret; 358b8ba871bSPeter Wemm 359b8ba871bSPeter Wemm NEEDFILE(sp, cmdp); 360b8ba871bSPeter Wemm 361b8ba871bSPeter Wemm slno = sp->lno; 362b8ba871bSPeter Wemm scno = sp->cno; 363b8ba871bSPeter Wemm 364b8ba871bSPeter Wemm /* 365b8ba871bSPeter Wemm * !!! 366b8ba871bSPeter Wemm * Historically, the 'g' and 'c' suffices were always toggled as flags, 367b8ba871bSPeter Wemm * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was 368b8ba871bSPeter Wemm * not set, they were initialized to 0 for all substitute commands. If 369b8ba871bSPeter Wemm * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user 370b8ba871bSPeter Wemm * specified substitute/replacement patterns (see ex_s()). 371b8ba871bSPeter Wemm */ 372b8ba871bSPeter Wemm if (!O_ISSET(sp, O_EDCOMPATIBLE)) 373b8ba871bSPeter Wemm sp->c_suffix = sp->g_suffix = 0; 374b8ba871bSPeter Wemm 375b8ba871bSPeter Wemm /* 376b8ba871bSPeter Wemm * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but 377b8ba871bSPeter Wemm * it only displayed the last change. I'd disallow them, but they are 378b8ba871bSPeter Wemm * useful in combination with the [v]global commands. In the current 379b8ba871bSPeter Wemm * model the problem is combining them with the 'c' flag -- the screen 380b8ba871bSPeter Wemm * would have to flip back and forth between the confirm screen and the 381b8ba871bSPeter Wemm * ex print screen, which would be pretty awful. We do display all 382b8ba871bSPeter Wemm * changes, though, for what that's worth. 383b8ba871bSPeter Wemm * 384b8ba871bSPeter Wemm * !!! 385b8ba871bSPeter Wemm * Historic vi was fairly strict about the order of "options", the 386b8ba871bSPeter Wemm * count, and "flags". I'm somewhat fuzzy on the difference between 387b8ba871bSPeter Wemm * options and flags, anyway, so this is a simpler approach, and we 388b8ba871bSPeter Wemm * just take it them in whatever order the user gives them. (The ex 389b8ba871bSPeter Wemm * usage statement doesn't reflect this.) 390b8ba871bSPeter Wemm */ 391b8ba871bSPeter Wemm cflag = lflag = nflag = pflag = rflag = 0; 392b8ba871bSPeter Wemm if (s == NULL) 393b8ba871bSPeter Wemm goto noargs; 394b8ba871bSPeter Wemm for (lno = OOBLNO; *s != '\0'; ++s) 395b8ba871bSPeter Wemm switch (*s) { 396b8ba871bSPeter Wemm case ' ': 397b8ba871bSPeter Wemm case '\t': 398b8ba871bSPeter Wemm continue; 399b8ba871bSPeter Wemm case '+': 400b8ba871bSPeter Wemm ++cmdp->flagoff; 401b8ba871bSPeter Wemm break; 402b8ba871bSPeter Wemm case '-': 403b8ba871bSPeter Wemm --cmdp->flagoff; 404b8ba871bSPeter Wemm break; 405b8ba871bSPeter Wemm case '0': case '1': case '2': case '3': case '4': 406b8ba871bSPeter Wemm case '5': case '6': case '7': case '8': case '9': 407b8ba871bSPeter Wemm if (lno != OOBLNO) 408b8ba871bSPeter Wemm goto usage; 409b8ba871bSPeter Wemm errno = 0; 410f0957ccaSPeter Wemm nret = nget_uslong(&ul, s, &s, 10); 411f0957ccaSPeter Wemm lno = ul; 412b8ba871bSPeter Wemm if (*s == '\0') /* Loop increment correction. */ 413b8ba871bSPeter Wemm --s; 414f0957ccaSPeter Wemm if (nret != NUM_OK) { 415f0957ccaSPeter Wemm if (nret == NUM_OVER) 416b8ba871bSPeter Wemm msgq(sp, M_ERR, "153|Count overflow"); 417f0957ccaSPeter Wemm else if (nret == NUM_UNDER) 418b8ba871bSPeter Wemm msgq(sp, M_ERR, "154|Count underflow"); 419b8ba871bSPeter Wemm else 420b8ba871bSPeter Wemm msgq(sp, M_SYSERR, NULL); 421b8ba871bSPeter Wemm return (1); 422b8ba871bSPeter Wemm } 423b8ba871bSPeter Wemm /* 424b8ba871bSPeter Wemm * In historic vi, the count was inclusive from the 425b8ba871bSPeter Wemm * second address. 426b8ba871bSPeter Wemm */ 427b8ba871bSPeter Wemm cmdp->addr1.lno = cmdp->addr2.lno; 428b8ba871bSPeter Wemm cmdp->addr2.lno += lno - 1; 429b8ba871bSPeter Wemm if (!db_exist(sp, cmdp->addr2.lno) && 430b8ba871bSPeter Wemm db_last(sp, &cmdp->addr2.lno)) 431b8ba871bSPeter Wemm return (1); 432b8ba871bSPeter Wemm break; 433b8ba871bSPeter Wemm case '#': 434b8ba871bSPeter Wemm nflag = 1; 435b8ba871bSPeter Wemm break; 436b8ba871bSPeter Wemm case 'c': 437b8ba871bSPeter Wemm sp->c_suffix = !sp->c_suffix; 438b8ba871bSPeter Wemm 439b8ba871bSPeter Wemm /* Ex text structure initialization. */ 440f0957ccaSPeter Wemm if (F_ISSET(sp, SC_EX)) 441f0957ccaSPeter Wemm TAILQ_INIT(tiq); 442b8ba871bSPeter Wemm break; 443b8ba871bSPeter Wemm case 'g': 444b8ba871bSPeter Wemm sp->g_suffix = !sp->g_suffix; 445b8ba871bSPeter Wemm break; 446b8ba871bSPeter Wemm case 'l': 447b8ba871bSPeter Wemm lflag = 1; 448b8ba871bSPeter Wemm break; 449b8ba871bSPeter Wemm case 'p': 450b8ba871bSPeter Wemm pflag = 1; 451b8ba871bSPeter Wemm break; 452b8ba871bSPeter Wemm case 'r': 453b8ba871bSPeter Wemm if (LF_ISSET(SUB_FIRST)) { 454b8ba871bSPeter Wemm msgq(sp, M_ERR, 455b8ba871bSPeter Wemm "155|Regular expression specified; r flag meaningless"); 456b8ba871bSPeter Wemm return (1); 457b8ba871bSPeter Wemm } 458b8ba871bSPeter Wemm if (!F_ISSET(sp, SC_RE_SEARCH)) { 459b8ba871bSPeter Wemm ex_emsg(sp, NULL, EXM_NOPREVRE); 460b8ba871bSPeter Wemm return (1); 461b8ba871bSPeter Wemm } 462b8ba871bSPeter Wemm rflag = 1; 463b8ba871bSPeter Wemm re = &sp->re_c; 464b8ba871bSPeter Wemm break; 465b8ba871bSPeter Wemm default: 466b8ba871bSPeter Wemm goto usage; 467b8ba871bSPeter Wemm } 468b8ba871bSPeter Wemm 469f0957ccaSPeter Wemm if (*s != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) { 470b8ba871bSPeter Wemm usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE); 471b8ba871bSPeter Wemm return (1); 472b8ba871bSPeter Wemm } 473b8ba871bSPeter Wemm 474b8ba871bSPeter Wemm noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) { 475b8ba871bSPeter Wemm msgq(sp, M_ERR, 476b8ba871bSPeter Wemm "156|The #, l and p flags may not be combined with the c flag in vi mode"); 477b8ba871bSPeter Wemm return (1); 478b8ba871bSPeter Wemm } 479b8ba871bSPeter Wemm 480b8ba871bSPeter Wemm /* 481b8ba871bSPeter Wemm * bp: if interactive, line cache 482b8ba871bSPeter Wemm * blen: if interactive, line cache length 483b8ba871bSPeter Wemm * lb: build buffer pointer. 484b8ba871bSPeter Wemm * lbclen: current length of built buffer. 485b8ba871bSPeter Wemm * lblen; length of build buffer. 486b8ba871bSPeter Wemm */ 487b8ba871bSPeter Wemm bp = lb = NULL; 488b8ba871bSPeter Wemm blen = lbclen = lblen = 0; 489b8ba871bSPeter Wemm 490b8ba871bSPeter Wemm /* For each line... */ 491f0957ccaSPeter Wemm lno = cmdp->addr1.lno == 0 ? 1 : cmdp->addr1.lno; 492f0957ccaSPeter Wemm for (matched = quit = 0, 493b8ba871bSPeter Wemm elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) { 494b8ba871bSPeter Wemm 495b8ba871bSPeter Wemm /* Someone's unhappy, time to stop. */ 496b8ba871bSPeter Wemm if (INTERRUPTED(sp)) 497b8ba871bSPeter Wemm break; 498b8ba871bSPeter Wemm 499b8ba871bSPeter Wemm /* Get the line. */ 500b8ba871bSPeter Wemm if (db_get(sp, lno, DBG_FATAL, &s, &llen)) 501b8ba871bSPeter Wemm goto err; 502b8ba871bSPeter Wemm 503b8ba871bSPeter Wemm /* 504b8ba871bSPeter Wemm * Make a local copy if doing confirmation -- when calling 505b8ba871bSPeter Wemm * the confirm routine we're likely to lose the cached copy. 506b8ba871bSPeter Wemm */ 507b8ba871bSPeter Wemm if (sp->c_suffix) { 508b8ba871bSPeter Wemm if (bp == NULL) { 509f0957ccaSPeter Wemm GET_SPACE_RETW(sp, bp, blen, llen); 510b8ba871bSPeter Wemm } else 511f0957ccaSPeter Wemm ADD_SPACE_RETW(sp, bp, blen, llen); 512f0957ccaSPeter Wemm MEMCPY(bp, s, llen); 513b8ba871bSPeter Wemm s = bp; 514b8ba871bSPeter Wemm } 515b8ba871bSPeter Wemm 516b8ba871bSPeter Wemm /* Start searching from the beginning. */ 517b8ba871bSPeter Wemm offset = 0; 518b8ba871bSPeter Wemm len = llen; 519b8ba871bSPeter Wemm 520b8ba871bSPeter Wemm /* Reset the build buffer offset. */ 521b8ba871bSPeter Wemm lbclen = 0; 522b8ba871bSPeter Wemm 523b8ba871bSPeter Wemm /* Reset empty match flag. */ 524b8ba871bSPeter Wemm empty_ok = 1; 525b8ba871bSPeter Wemm 526b8ba871bSPeter Wemm /* 527b8ba871bSPeter Wemm * We don't want to have to do a setline if the line didn't 528b8ba871bSPeter Wemm * change -- keep track of whether or not this line changed. 529b8ba871bSPeter Wemm * If doing confirmations, don't want to keep setting the 530b8ba871bSPeter Wemm * line if change is refused -- keep track of substitutions. 531b8ba871bSPeter Wemm */ 532b8ba871bSPeter Wemm didsub = linechanged = 0; 533b8ba871bSPeter Wemm 534b8ba871bSPeter Wemm /* New line, do an EOL match. */ 535b8ba871bSPeter Wemm do_eol_match = 1; 536b8ba871bSPeter Wemm 537b8ba871bSPeter Wemm /* It's not nul terminated, but we pretend it is. */ 538b8ba871bSPeter Wemm eflags = REG_STARTEND; 539b8ba871bSPeter Wemm 540b8ba871bSPeter Wemm /* 541b8ba871bSPeter Wemm * The search area is from s + offset to the EOL. 542b8ba871bSPeter Wemm * 543b8ba871bSPeter Wemm * Generally, match[0].rm_so is the offset of the start 544b8ba871bSPeter Wemm * of the match from the start of the search, and offset 545b8ba871bSPeter Wemm * is the offset of the start of the last search. 546b8ba871bSPeter Wemm */ 547b8ba871bSPeter Wemm nextmatch: match[0].rm_so = 0; 548b8ba871bSPeter Wemm match[0].rm_eo = len; 549b8ba871bSPeter Wemm 550b8ba871bSPeter Wemm /* Get the next match. */ 551f0957ccaSPeter Wemm eval = regexec(re, s + offset, 10, match, eflags); 552b8ba871bSPeter Wemm 553b8ba871bSPeter Wemm /* 554b8ba871bSPeter Wemm * There wasn't a match or if there was an error, deal with 555b8ba871bSPeter Wemm * it. If there was a previous match in this line, resolve 556b8ba871bSPeter Wemm * the changes into the database. Otherwise, just move on. 557b8ba871bSPeter Wemm */ 558b8ba871bSPeter Wemm if (eval == REG_NOMATCH) 559b8ba871bSPeter Wemm goto endmatch; 560b8ba871bSPeter Wemm if (eval != 0) { 561b8ba871bSPeter Wemm re_error(sp, eval, re); 562b8ba871bSPeter Wemm goto err; 563b8ba871bSPeter Wemm } 564b8ba871bSPeter Wemm matched = 1; 565b8ba871bSPeter Wemm 566b8ba871bSPeter Wemm /* Only the first search can match an anchored expression. */ 567b8ba871bSPeter Wemm eflags |= REG_NOTBOL; 568b8ba871bSPeter Wemm 569b8ba871bSPeter Wemm /* 570b8ba871bSPeter Wemm * !!! 571b8ba871bSPeter Wemm * It's possible to match 0-length strings -- for example, the 572b8ba871bSPeter Wemm * command s;a*;X;, when matched against the string "aabb" will 573b8ba871bSPeter Wemm * result in "XbXbX", i.e. the matches are "aa", the space 574b8ba871bSPeter Wemm * between the b's and the space between the b's and the end of 575b8ba871bSPeter Wemm * the string. There is a similar space between the beginning 576b8ba871bSPeter Wemm * of the string and the a's. The rule that we use (because vi 577b8ba871bSPeter Wemm * historically used it) is that any 0-length match, occurring 578b8ba871bSPeter Wemm * immediately after a match, is ignored. Otherwise, the above 579b8ba871bSPeter Wemm * example would have resulted in "XXbXbX". Another example is 580b8ba871bSPeter Wemm * incorrectly using " *" to replace groups of spaces with one 581b8ba871bSPeter Wemm * space. 582b8ba871bSPeter Wemm * 583b8ba871bSPeter Wemm * The way we do this is that if we just had a successful match, 584b8ba871bSPeter Wemm * the starting offset does not skip characters, and the match 585b8ba871bSPeter Wemm * is empty, ignore the match and move forward. If there's no 586b8ba871bSPeter Wemm * more characters in the string, we were attempting to match 587b8ba871bSPeter Wemm * after the last character, so quit. 588b8ba871bSPeter Wemm */ 589b8ba871bSPeter Wemm if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) { 590b8ba871bSPeter Wemm empty_ok = 1; 591b8ba871bSPeter Wemm if (len == 0) 592b8ba871bSPeter Wemm goto endmatch; 593b8ba871bSPeter Wemm BUILD(sp, s + offset, 1) 594b8ba871bSPeter Wemm ++offset; 595b8ba871bSPeter Wemm --len; 596b8ba871bSPeter Wemm goto nextmatch; 597b8ba871bSPeter Wemm } 598b8ba871bSPeter Wemm 599b8ba871bSPeter Wemm /* Confirm change. */ 600b8ba871bSPeter Wemm if (sp->c_suffix) { 601b8ba871bSPeter Wemm /* 602b8ba871bSPeter Wemm * Set the cursor position for confirmation. Note, 603b8ba871bSPeter Wemm * if we matched on a '$', the cursor may be past 604b8ba871bSPeter Wemm * the end of line. 605b8ba871bSPeter Wemm */ 606b8ba871bSPeter Wemm from.lno = to.lno = lno; 607b8ba871bSPeter Wemm from.cno = match[0].rm_so + offset; 608b8ba871bSPeter Wemm to.cno = match[0].rm_eo + offset; 609b8ba871bSPeter Wemm /* 610b8ba871bSPeter Wemm * Both ex and vi have to correct for a change before 611b8ba871bSPeter Wemm * the first character in the line. 612b8ba871bSPeter Wemm */ 613b8ba871bSPeter Wemm if (llen == 0) 614b8ba871bSPeter Wemm from.cno = to.cno = 0; 615b8ba871bSPeter Wemm if (F_ISSET(sp, SC_VI)) { 616b8ba871bSPeter Wemm /* 617b8ba871bSPeter Wemm * Only vi has to correct for a change after 618b8ba871bSPeter Wemm * the last character in the line. 619b8ba871bSPeter Wemm * 620b8ba871bSPeter Wemm * XXX 621b8ba871bSPeter Wemm * It would be nice to change the vi code so 622b8ba871bSPeter Wemm * that we could display a cursor past EOL. 623b8ba871bSPeter Wemm */ 624b8ba871bSPeter Wemm if (to.cno >= llen) 625b8ba871bSPeter Wemm to.cno = llen - 1; 626b8ba871bSPeter Wemm if (from.cno >= llen) 627b8ba871bSPeter Wemm from.cno = llen - 1; 628b8ba871bSPeter Wemm 629b8ba871bSPeter Wemm sp->lno = from.lno; 630b8ba871bSPeter Wemm sp->cno = from.cno; 631b8ba871bSPeter Wemm if (vs_refresh(sp, 1)) 632b8ba871bSPeter Wemm goto err; 633b8ba871bSPeter Wemm 634b8ba871bSPeter Wemm vs_update(sp, msg_cat(sp, 635b8ba871bSPeter Wemm "169|Confirm change? [n]", NULL), NULL); 636b8ba871bSPeter Wemm 637b8ba871bSPeter Wemm if (v_event_get(sp, &ev, 0, 0)) 638b8ba871bSPeter Wemm goto err; 639b8ba871bSPeter Wemm switch (ev.e_event) { 640b8ba871bSPeter Wemm case E_CHARACTER: 641b8ba871bSPeter Wemm break; 642b8ba871bSPeter Wemm case E_EOF: 643b8ba871bSPeter Wemm case E_ERR: 644b8ba871bSPeter Wemm case E_INTERRUPT: 645b8ba871bSPeter Wemm goto lquit; 646b8ba871bSPeter Wemm default: 647b8ba871bSPeter Wemm v_event_err(sp, &ev); 648b8ba871bSPeter Wemm goto lquit; 649b8ba871bSPeter Wemm } 650b8ba871bSPeter Wemm } else { 651b8ba871bSPeter Wemm if (ex_print(sp, cmdp, &from, &to, 0) || 652b8ba871bSPeter Wemm ex_scprint(sp, &from, &to)) 653b8ba871bSPeter Wemm goto lquit; 654f0957ccaSPeter Wemm if (ex_txt(sp, tiq, 0, TXT_CR)) 655b8ba871bSPeter Wemm goto err; 656f0957ccaSPeter Wemm ev.e_c = TAILQ_FIRST(tiq)->lb[0]; 657b8ba871bSPeter Wemm } 658b8ba871bSPeter Wemm 659b8ba871bSPeter Wemm switch (ev.e_c) { 660b8ba871bSPeter Wemm case CH_YES: 661b8ba871bSPeter Wemm break; 662b8ba871bSPeter Wemm default: 663b8ba871bSPeter Wemm case CH_NO: 664b8ba871bSPeter Wemm didsub = 0; 665b8ba871bSPeter Wemm BUILD(sp, s +offset, match[0].rm_eo); 666b8ba871bSPeter Wemm goto skip; 667b8ba871bSPeter Wemm case CH_QUIT: 668b8ba871bSPeter Wemm /* Set the quit/interrupted flags. */ 669b8ba871bSPeter Wemm lquit: quit = 1; 670b8ba871bSPeter Wemm F_SET(sp->gp, G_INTERRUPTED); 671b8ba871bSPeter Wemm 672b8ba871bSPeter Wemm /* 673b8ba871bSPeter Wemm * Resolve any changes, then return to (and 674b8ba871bSPeter Wemm * exit from) the main loop. 675b8ba871bSPeter Wemm */ 676b8ba871bSPeter Wemm goto endmatch; 677b8ba871bSPeter Wemm } 678b8ba871bSPeter Wemm } 679b8ba871bSPeter Wemm 680b8ba871bSPeter Wemm /* 681b8ba871bSPeter Wemm * Set the cursor to the last position changed, converting 682b8ba871bSPeter Wemm * from 1-based to 0-based. 683b8ba871bSPeter Wemm */ 684b8ba871bSPeter Wemm sp->lno = lno; 685b8ba871bSPeter Wemm sp->cno = match[0].rm_so; 686b8ba871bSPeter Wemm 687b8ba871bSPeter Wemm /* Copy the bytes before the match into the build buffer. */ 688b8ba871bSPeter Wemm BUILD(sp, s + offset, match[0].rm_so); 689b8ba871bSPeter Wemm 690b8ba871bSPeter Wemm /* Substitute the matching bytes. */ 691b8ba871bSPeter Wemm didsub = 1; 692b8ba871bSPeter Wemm if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match)) 693b8ba871bSPeter Wemm goto err; 694b8ba871bSPeter Wemm 695b8ba871bSPeter Wemm /* Set the change flag so we know this line was modified. */ 696b8ba871bSPeter Wemm linechanged = 1; 697b8ba871bSPeter Wemm 698b8ba871bSPeter Wemm /* Move past the matched bytes. */ 699b8ba871bSPeter Wemm skip: offset += match[0].rm_eo; 700b8ba871bSPeter Wemm len -= match[0].rm_eo; 701b8ba871bSPeter Wemm 702b8ba871bSPeter Wemm /* A match cannot be followed by an empty pattern. */ 703b8ba871bSPeter Wemm empty_ok = 0; 704b8ba871bSPeter Wemm 705b8ba871bSPeter Wemm /* 706b8ba871bSPeter Wemm * If doing a global change with confirmation, we have to 707b8ba871bSPeter Wemm * update the screen. The basic idea is to store the line 708b8ba871bSPeter Wemm * so the screen update routines can find it, and restart. 709b8ba871bSPeter Wemm */ 710b8ba871bSPeter Wemm if (didsub && sp->c_suffix && sp->g_suffix) { 711b8ba871bSPeter Wemm /* 712b8ba871bSPeter Wemm * The new search offset will be the end of the 713b8ba871bSPeter Wemm * modified line. 714b8ba871bSPeter Wemm */ 715b8ba871bSPeter Wemm saved_offset = lbclen; 716b8ba871bSPeter Wemm 717b8ba871bSPeter Wemm /* Copy the rest of the line. */ 718b8ba871bSPeter Wemm if (len) 719b8ba871bSPeter Wemm BUILD(sp, s + offset, len) 720b8ba871bSPeter Wemm 721b8ba871bSPeter Wemm /* Set the new offset. */ 722b8ba871bSPeter Wemm offset = saved_offset; 723b8ba871bSPeter Wemm 724b8ba871bSPeter Wemm /* Store inserted lines, adjusting the build buffer. */ 725b8ba871bSPeter Wemm last = 0; 726b8ba871bSPeter Wemm if (sp->newl_cnt) { 727b8ba871bSPeter Wemm for (cnt = 0; 728b8ba871bSPeter Wemm cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) { 729b8ba871bSPeter Wemm if (db_insert(sp, lno, 730b8ba871bSPeter Wemm lb + last, sp->newl[cnt] - last)) 731b8ba871bSPeter Wemm goto err; 732b8ba871bSPeter Wemm last = sp->newl[cnt] + 1; 733b8ba871bSPeter Wemm ++sp->rptlines[L_ADDED]; 734b8ba871bSPeter Wemm } 735b8ba871bSPeter Wemm lbclen -= last; 736b8ba871bSPeter Wemm offset -= last; 737b8ba871bSPeter Wemm sp->newl_cnt = 0; 738b8ba871bSPeter Wemm } 739b8ba871bSPeter Wemm 740b8ba871bSPeter Wemm /* Store and retrieve the line. */ 741b8ba871bSPeter Wemm if (db_set(sp, lno, lb + last, lbclen)) 742b8ba871bSPeter Wemm goto err; 743b8ba871bSPeter Wemm if (db_get(sp, lno, DBG_FATAL, &s, &llen)) 744b8ba871bSPeter Wemm goto err; 745f0957ccaSPeter Wemm ADD_SPACE_RETW(sp, bp, blen, llen) 746f0957ccaSPeter Wemm MEMCPY(bp, s, llen); 747b8ba871bSPeter Wemm s = bp; 748b8ba871bSPeter Wemm len = llen - offset; 749b8ba871bSPeter Wemm 750b8ba871bSPeter Wemm /* Restart the build. */ 751b8ba871bSPeter Wemm lbclen = 0; 752b8ba871bSPeter Wemm BUILD(sp, s, offset); 753b8ba871bSPeter Wemm 754b8ba871bSPeter Wemm /* 755b8ba871bSPeter Wemm * If we haven't already done the after-the-string 756b8ba871bSPeter Wemm * match, do one. Set REG_NOTEOL so the '$' pattern 757b8ba871bSPeter Wemm * only matches once. 758b8ba871bSPeter Wemm */ 759b8ba871bSPeter Wemm if (!do_eol_match) 760b8ba871bSPeter Wemm goto endmatch; 761b8ba871bSPeter Wemm if (offset == len) { 762b8ba871bSPeter Wemm do_eol_match = 0; 763b8ba871bSPeter Wemm eflags |= REG_NOTEOL; 764b8ba871bSPeter Wemm } 765b8ba871bSPeter Wemm goto nextmatch; 766b8ba871bSPeter Wemm } 767b8ba871bSPeter Wemm 768b8ba871bSPeter Wemm /* 769b8ba871bSPeter Wemm * If it's a global: 770b8ba871bSPeter Wemm * 771b8ba871bSPeter Wemm * If at the end of the string, do a test for the after 772b8ba871bSPeter Wemm * the string match. Set REG_NOTEOL so the '$' pattern 773b8ba871bSPeter Wemm * only matches once. 774b8ba871bSPeter Wemm */ 775b8ba871bSPeter Wemm if (sp->g_suffix && do_eol_match) { 776b8ba871bSPeter Wemm if (len == 0) { 777b8ba871bSPeter Wemm do_eol_match = 0; 778b8ba871bSPeter Wemm eflags |= REG_NOTEOL; 779b8ba871bSPeter Wemm } 780b8ba871bSPeter Wemm goto nextmatch; 781b8ba871bSPeter Wemm } 782b8ba871bSPeter Wemm 783b8ba871bSPeter Wemm endmatch: if (!linechanged) 784b8ba871bSPeter Wemm continue; 785b8ba871bSPeter Wemm 786b8ba871bSPeter Wemm /* Copy any remaining bytes into the build buffer. */ 787b8ba871bSPeter Wemm if (len) 788b8ba871bSPeter Wemm BUILD(sp, s + offset, len) 789b8ba871bSPeter Wemm 790b8ba871bSPeter Wemm /* Store inserted lines, adjusting the build buffer. */ 791b8ba871bSPeter Wemm last = 0; 792b8ba871bSPeter Wemm if (sp->newl_cnt) { 793b8ba871bSPeter Wemm for (cnt = 0; 794b8ba871bSPeter Wemm cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) { 795b8ba871bSPeter Wemm if (db_insert(sp, 796b8ba871bSPeter Wemm lno, lb + last, sp->newl[cnt] - last)) 797b8ba871bSPeter Wemm goto err; 798b8ba871bSPeter Wemm last = sp->newl[cnt] + 1; 799b8ba871bSPeter Wemm ++sp->rptlines[L_ADDED]; 800b8ba871bSPeter Wemm } 801b8ba871bSPeter Wemm lbclen -= last; 802b8ba871bSPeter Wemm sp->newl_cnt = 0; 803b8ba871bSPeter Wemm } 804b8ba871bSPeter Wemm 805b8ba871bSPeter Wemm /* Store the changed line. */ 806b8ba871bSPeter Wemm if (db_set(sp, lno, lb + last, lbclen)) 807b8ba871bSPeter Wemm goto err; 808b8ba871bSPeter Wemm 809b8ba871bSPeter Wemm /* Update changed line counter. */ 810b8ba871bSPeter Wemm if (sp->rptlchange != lno) { 811b8ba871bSPeter Wemm sp->rptlchange = lno; 812b8ba871bSPeter Wemm ++sp->rptlines[L_CHANGED]; 813b8ba871bSPeter Wemm } 814b8ba871bSPeter Wemm 815b8ba871bSPeter Wemm /* 816b8ba871bSPeter Wemm * !!! 817b8ba871bSPeter Wemm * Display as necessary. Historic practice is to only 818b8ba871bSPeter Wemm * display the last line of a line split into multiple 819b8ba871bSPeter Wemm * lines. 820b8ba871bSPeter Wemm */ 821b8ba871bSPeter Wemm if (lflag || nflag || pflag) { 822b8ba871bSPeter Wemm from.lno = to.lno = lno; 823b8ba871bSPeter Wemm from.cno = to.cno = 0; 824b8ba871bSPeter Wemm if (lflag) 825b8ba871bSPeter Wemm (void)ex_print(sp, cmdp, &from, &to, E_C_LIST); 826b8ba871bSPeter Wemm if (nflag) 827b8ba871bSPeter Wemm (void)ex_print(sp, cmdp, &from, &to, E_C_HASH); 828b8ba871bSPeter Wemm if (pflag) 829b8ba871bSPeter Wemm (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT); 830b8ba871bSPeter Wemm } 831b8ba871bSPeter Wemm } 832b8ba871bSPeter Wemm 833b8ba871bSPeter Wemm /* 834b8ba871bSPeter Wemm * !!! 835b8ba871bSPeter Wemm * Historically, vi attempted to leave the cursor at the same place if 836b8ba871bSPeter Wemm * the substitution was done at the current cursor position. Otherwise 837b8ba871bSPeter Wemm * it moved it to the first non-blank of the last line changed. There 838b8ba871bSPeter Wemm * were some problems: for example, :s/$/foo/ with the cursor on the 839b8ba871bSPeter Wemm * last character of the line left the cursor on the last character, or 840b8ba871bSPeter Wemm * the & command with multiple occurrences of the matching string in the 841b8ba871bSPeter Wemm * line usually left the cursor in a fairly random position. 842b8ba871bSPeter Wemm * 843b8ba871bSPeter Wemm * We try to do the same thing, with the exception that if the user is 844b8ba871bSPeter Wemm * doing substitution with confirmation, we move to the last line about 845b8ba871bSPeter Wemm * which the user was consulted, as opposed to the last line that they 846b8ba871bSPeter Wemm * actually changed. This prevents a screen flash if the user doesn't 847b8ba871bSPeter Wemm * change many of the possible lines. 848b8ba871bSPeter Wemm */ 849b8ba871bSPeter Wemm if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) { 850b8ba871bSPeter Wemm sp->cno = 0; 851b8ba871bSPeter Wemm (void)nonblank(sp, sp->lno, &sp->cno); 852b8ba871bSPeter Wemm } 853b8ba871bSPeter Wemm 854b8ba871bSPeter Wemm /* 855b8ba871bSPeter Wemm * If not in a global command, and nothing matched, say so. 856b8ba871bSPeter Wemm * Else, if none of the lines displayed, put something up. 857b8ba871bSPeter Wemm */ 858b8ba871bSPeter Wemm rval = 0; 859b8ba871bSPeter Wemm if (!matched) { 860b8ba871bSPeter Wemm if (!F_ISSET(sp, SC_EX_GLOBAL)) { 861b8ba871bSPeter Wemm msgq(sp, M_ERR, "157|No match found"); 862b8ba871bSPeter Wemm goto err; 863b8ba871bSPeter Wemm } 864b8ba871bSPeter Wemm } else if (!lflag && !nflag && !pflag) 865b8ba871bSPeter Wemm F_SET(cmdp, E_AUTOPRINT); 866b8ba871bSPeter Wemm 867b8ba871bSPeter Wemm if (0) { 868b8ba871bSPeter Wemm err: rval = 1; 869b8ba871bSPeter Wemm } 870b8ba871bSPeter Wemm 871b8ba871bSPeter Wemm if (bp != NULL) 872f0957ccaSPeter Wemm FREE_SPACEW(sp, bp, blen); 873b8ba871bSPeter Wemm if (lb != NULL) 874b8ba871bSPeter Wemm free(lb); 875b8ba871bSPeter Wemm return (rval); 876b8ba871bSPeter Wemm } 877b8ba871bSPeter Wemm 878b8ba871bSPeter Wemm /* 879b8ba871bSPeter Wemm * re_compile -- 880b8ba871bSPeter Wemm * Compile the RE. 881b8ba871bSPeter Wemm * 882b8ba871bSPeter Wemm * PUBLIC: int re_compile __P((SCR *, 883f0957ccaSPeter Wemm * PUBLIC: CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int)); 884b8ba871bSPeter Wemm */ 885b8ba871bSPeter Wemm int 886f0957ccaSPeter Wemm re_compile(SCR *sp, CHAR_T *ptrn, size_t plen, CHAR_T **ptrnp, size_t *lenp, regex_t *rep, u_int flags) 887b8ba871bSPeter Wemm { 888b8ba871bSPeter Wemm size_t len; 889b8ba871bSPeter Wemm int reflags, replaced, rval; 890f0957ccaSPeter Wemm CHAR_T *p; 891b8ba871bSPeter Wemm 892b8ba871bSPeter Wemm /* Set RE flags. */ 893b8ba871bSPeter Wemm reflags = 0; 894b8ba871bSPeter Wemm if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) { 895b8ba871bSPeter Wemm if (O_ISSET(sp, O_EXTENDED)) 896b8ba871bSPeter Wemm reflags |= REG_EXTENDED; 897b8ba871bSPeter Wemm if (O_ISSET(sp, O_IGNORECASE)) 898b8ba871bSPeter Wemm reflags |= REG_ICASE; 899b8ba871bSPeter Wemm if (O_ISSET(sp, O_ICLOWER)) { 900b8ba871bSPeter Wemm for (p = ptrn, len = plen; len > 0; ++p, --len) 901f0957ccaSPeter Wemm if (ISUPPER(*p)) 902b8ba871bSPeter Wemm break; 903b8ba871bSPeter Wemm if (len == 0) 904b8ba871bSPeter Wemm reflags |= REG_ICASE; 905b8ba871bSPeter Wemm } 906b8ba871bSPeter Wemm } 907b8ba871bSPeter Wemm 908b8ba871bSPeter Wemm /* If we're replacing a saved value, clear the old one. */ 909b8ba871bSPeter Wemm if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) { 910b8ba871bSPeter Wemm regfree(&sp->re_c); 911b8ba871bSPeter Wemm F_CLR(sp, SC_RE_SEARCH); 912b8ba871bSPeter Wemm } 913b8ba871bSPeter Wemm if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) { 914b8ba871bSPeter Wemm regfree(&sp->subre_c); 915b8ba871bSPeter Wemm F_CLR(sp, SC_RE_SUBST); 916b8ba871bSPeter Wemm } 917b8ba871bSPeter Wemm 918b8ba871bSPeter Wemm /* 919b8ba871bSPeter Wemm * If we're saving the string, it's a pattern we haven't seen before, 920b8ba871bSPeter Wemm * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for 921b8ba871bSPeter Wemm * later recompilation. Free any previously saved value. 922b8ba871bSPeter Wemm */ 923b8ba871bSPeter Wemm if (ptrnp != NULL) { 924f0957ccaSPeter Wemm replaced = 0; 925b8ba871bSPeter Wemm if (LF_ISSET(RE_C_CSCOPE)) { 926b8ba871bSPeter Wemm if (re_cscope_conv(sp, &ptrn, &plen, &replaced)) 927b8ba871bSPeter Wemm return (1); 928b8ba871bSPeter Wemm /* 929b8ba871bSPeter Wemm * XXX 930b8ba871bSPeter Wemm * Currently, the match-any-<blank> expression used in 931b8ba871bSPeter Wemm * re_cscope_conv() requires extended RE's. This may 932b8ba871bSPeter Wemm * not be right or safe. 933b8ba871bSPeter Wemm */ 934b8ba871bSPeter Wemm reflags |= REG_EXTENDED; 935b8ba871bSPeter Wemm } else if (LF_ISSET(RE_C_TAG)) { 936b8ba871bSPeter Wemm if (re_tag_conv(sp, &ptrn, &plen, &replaced)) 937b8ba871bSPeter Wemm return (1); 938b8ba871bSPeter Wemm } else 939b8ba871bSPeter Wemm if (re_conv(sp, &ptrn, &plen, &replaced)) 940b8ba871bSPeter Wemm return (1); 941b8ba871bSPeter Wemm 942b8ba871bSPeter Wemm /* Discard previous pattern. */ 943b8ba871bSPeter Wemm if (*ptrnp != NULL) { 944b8ba871bSPeter Wemm free(*ptrnp); 945b8ba871bSPeter Wemm *ptrnp = NULL; 946b8ba871bSPeter Wemm } 947b8ba871bSPeter Wemm if (lenp != NULL) 948b8ba871bSPeter Wemm *lenp = plen; 949b8ba871bSPeter Wemm 950b8ba871bSPeter Wemm /* 951b8ba871bSPeter Wemm * Copy the string into allocated memory. 952b8ba871bSPeter Wemm * 953b8ba871bSPeter Wemm * XXX 954b8ba871bSPeter Wemm * Regcomp isn't 8-bit clean, so the pattern is nul-terminated 955b8ba871bSPeter Wemm * for now. There's just no other solution. 956b8ba871bSPeter Wemm */ 957f0957ccaSPeter Wemm MALLOC(sp, *ptrnp, CHAR_T *, (plen + 1) * sizeof(CHAR_T)); 958b8ba871bSPeter Wemm if (*ptrnp != NULL) { 959f0957ccaSPeter Wemm MEMCPY(*ptrnp, ptrn, plen); 960b8ba871bSPeter Wemm (*ptrnp)[plen] = '\0'; 961b8ba871bSPeter Wemm } 962b8ba871bSPeter Wemm 963b8ba871bSPeter Wemm /* Free up conversion-routine-allocated memory. */ 964b8ba871bSPeter Wemm if (replaced) 965f0957ccaSPeter Wemm FREE_SPACEW(sp, ptrn, 0); 966b8ba871bSPeter Wemm 967b8ba871bSPeter Wemm if (*ptrnp == NULL) 968b8ba871bSPeter Wemm return (1); 969b8ba871bSPeter Wemm 970b8ba871bSPeter Wemm ptrn = *ptrnp; 971b8ba871bSPeter Wemm } 972b8ba871bSPeter Wemm 973b8ba871bSPeter Wemm /* 974b8ba871bSPeter Wemm * XXX 975b8ba871bSPeter Wemm * Regcomp isn't 8-bit clean, so we just lost if the pattern 976b8ba871bSPeter Wemm * contained a nul. Bummer! 977b8ba871bSPeter Wemm */ 978b8ba871bSPeter Wemm if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) { 979b8ba871bSPeter Wemm if (!LF_ISSET(RE_C_SILENT)) 980b8ba871bSPeter Wemm re_error(sp, rval, rep); 981b8ba871bSPeter Wemm return (1); 982b8ba871bSPeter Wemm } 983b8ba871bSPeter Wemm 984b8ba871bSPeter Wemm if (LF_ISSET(RE_C_SEARCH)) 985b8ba871bSPeter Wemm F_SET(sp, SC_RE_SEARCH); 986b8ba871bSPeter Wemm if (LF_ISSET(RE_C_SUBST)) 987b8ba871bSPeter Wemm F_SET(sp, SC_RE_SUBST); 988b8ba871bSPeter Wemm 989b8ba871bSPeter Wemm return (0); 990b8ba871bSPeter Wemm } 991b8ba871bSPeter Wemm 992b8ba871bSPeter Wemm /* 993b8ba871bSPeter Wemm * re_conv -- 994b8ba871bSPeter Wemm * Convert vi's regular expressions into something that the 995b8ba871bSPeter Wemm * the POSIX 1003.2 RE functions can handle. 996b8ba871bSPeter Wemm * 997b8ba871bSPeter Wemm * There are three conversions we make to make vi's RE's (specifically 998b8ba871bSPeter Wemm * the global, search, and substitute patterns) work with POSIX RE's. 999b8ba871bSPeter Wemm * 1000b8ba871bSPeter Wemm * 1: If O_MAGIC is not set, strip backslashes from the magic character 1001b8ba871bSPeter Wemm * set (.[*~) that have them, and add them to the ones that don't. 1002b8ba871bSPeter Wemm * 2: If O_MAGIC is not set, the string "\~" is replaced with the text 1003b8ba871bSPeter Wemm * from the last substitute command's replacement string. If O_MAGIC 1004b8ba871bSPeter Wemm * is set, it's the string "~". 1005b8ba871bSPeter Wemm * 3: The pattern \<ptrn\> does "word" searches, convert it to use the 1006b8ba871bSPeter Wemm * new RE escapes. 1007b8ba871bSPeter Wemm * 1008b8ba871bSPeter Wemm * !!!/XXX 1009b8ba871bSPeter Wemm * This doesn't exactly match the historic behavior of vi because we do 1010b8ba871bSPeter Wemm * the ~ substitution before calling the RE engine, so magic characters 1011b8ba871bSPeter Wemm * in the replacement string will be expanded by the RE engine, and they 1012b8ba871bSPeter Wemm * weren't historically. It's a bug. 1013b8ba871bSPeter Wemm */ 1014b8ba871bSPeter Wemm static int 1015f0957ccaSPeter Wemm re_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp) 1016b8ba871bSPeter Wemm { 1017b8ba871bSPeter Wemm size_t blen, len, needlen; 1018b8ba871bSPeter Wemm int magic; 1019f0957ccaSPeter Wemm CHAR_T *bp, *p, *t; 1020b8ba871bSPeter Wemm 1021b8ba871bSPeter Wemm /* 1022b8ba871bSPeter Wemm * First pass through, we figure out how much space we'll need. 1023b8ba871bSPeter Wemm * We do it in two passes, on the grounds that most of the time 1024b8ba871bSPeter Wemm * the user is doing a search and won't have magic characters. 1025b8ba871bSPeter Wemm * That way we can skip most of the memory allocation and copies. 1026b8ba871bSPeter Wemm */ 1027b8ba871bSPeter Wemm magic = 0; 1028b8ba871bSPeter Wemm for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len) 1029b8ba871bSPeter Wemm switch (*p) { 1030b8ba871bSPeter Wemm case '\\': 1031b8ba871bSPeter Wemm if (len > 1) { 1032b8ba871bSPeter Wemm --len; 1033b8ba871bSPeter Wemm switch (*++p) { 1034b8ba871bSPeter Wemm case '<': 1035b8ba871bSPeter Wemm magic = 1; 1036f0957ccaSPeter Wemm needlen += RE_WSTART_LEN + 1; 1037b8ba871bSPeter Wemm break; 1038b8ba871bSPeter Wemm case '>': 1039b8ba871bSPeter Wemm magic = 1; 1040f0957ccaSPeter Wemm needlen += RE_WSTOP_LEN + 1; 1041b8ba871bSPeter Wemm break; 1042b8ba871bSPeter Wemm case '~': 1043b8ba871bSPeter Wemm if (!O_ISSET(sp, O_MAGIC)) { 1044b8ba871bSPeter Wemm magic = 1; 1045b8ba871bSPeter Wemm needlen += sp->repl_len; 1046b8ba871bSPeter Wemm } 1047b8ba871bSPeter Wemm break; 1048b8ba871bSPeter Wemm case '.': 1049b8ba871bSPeter Wemm case '[': 1050b8ba871bSPeter Wemm case '*': 1051b8ba871bSPeter Wemm if (!O_ISSET(sp, O_MAGIC)) { 1052b8ba871bSPeter Wemm magic = 1; 1053b8ba871bSPeter Wemm needlen += 1; 1054b8ba871bSPeter Wemm } 1055b8ba871bSPeter Wemm break; 1056b8ba871bSPeter Wemm default: 1057b8ba871bSPeter Wemm needlen += 2; 1058b8ba871bSPeter Wemm } 1059b8ba871bSPeter Wemm } else 1060b8ba871bSPeter Wemm needlen += 1; 1061b8ba871bSPeter Wemm break; 1062b8ba871bSPeter Wemm case '~': 1063b8ba871bSPeter Wemm if (O_ISSET(sp, O_MAGIC)) { 1064b8ba871bSPeter Wemm magic = 1; 1065b8ba871bSPeter Wemm needlen += sp->repl_len; 1066b8ba871bSPeter Wemm } 1067b8ba871bSPeter Wemm break; 1068b8ba871bSPeter Wemm case '.': 1069b8ba871bSPeter Wemm case '[': 1070b8ba871bSPeter Wemm case '*': 1071b8ba871bSPeter Wemm if (!O_ISSET(sp, O_MAGIC)) { 1072b8ba871bSPeter Wemm magic = 1; 1073b8ba871bSPeter Wemm needlen += 2; 1074b8ba871bSPeter Wemm } 1075b8ba871bSPeter Wemm break; 1076b8ba871bSPeter Wemm default: 1077b8ba871bSPeter Wemm needlen += 1; 1078b8ba871bSPeter Wemm break; 1079b8ba871bSPeter Wemm } 1080b8ba871bSPeter Wemm 1081b8ba871bSPeter Wemm if (!magic) { 1082b8ba871bSPeter Wemm *replacedp = 0; 1083b8ba871bSPeter Wemm return (0); 1084b8ba871bSPeter Wemm } 1085b8ba871bSPeter Wemm 1086b8ba871bSPeter Wemm /* Get enough memory to hold the final pattern. */ 1087b8ba871bSPeter Wemm *replacedp = 1; 1088f0957ccaSPeter Wemm GET_SPACE_RETW(sp, bp, blen, needlen); 1089b8ba871bSPeter Wemm 1090b8ba871bSPeter Wemm for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len) 1091b8ba871bSPeter Wemm switch (*p) { 1092b8ba871bSPeter Wemm case '\\': 1093b8ba871bSPeter Wemm if (len > 1) { 1094b8ba871bSPeter Wemm --len; 1095b8ba871bSPeter Wemm switch (*++p) { 1096b8ba871bSPeter Wemm case '<': 1097f0957ccaSPeter Wemm MEMCPY(t, 1098f0957ccaSPeter Wemm RE_WSTART, RE_WSTART_LEN); 1099f0957ccaSPeter Wemm t += RE_WSTART_LEN; 1100b8ba871bSPeter Wemm break; 1101b8ba871bSPeter Wemm case '>': 1102f0957ccaSPeter Wemm MEMCPY(t, 1103f0957ccaSPeter Wemm RE_WSTOP, RE_WSTOP_LEN); 1104f0957ccaSPeter Wemm t += RE_WSTOP_LEN; 1105b8ba871bSPeter Wemm break; 1106b8ba871bSPeter Wemm case '~': 1107b8ba871bSPeter Wemm if (O_ISSET(sp, O_MAGIC)) 1108b8ba871bSPeter Wemm *t++ = '~'; 1109b8ba871bSPeter Wemm else { 1110f0957ccaSPeter Wemm MEMCPY(t, 1111b8ba871bSPeter Wemm sp->repl, sp->repl_len); 1112b8ba871bSPeter Wemm t += sp->repl_len; 1113b8ba871bSPeter Wemm } 1114b8ba871bSPeter Wemm break; 1115b8ba871bSPeter Wemm case '.': 1116b8ba871bSPeter Wemm case '[': 1117b8ba871bSPeter Wemm case '*': 1118b8ba871bSPeter Wemm if (O_ISSET(sp, O_MAGIC)) 1119b8ba871bSPeter Wemm *t++ = '\\'; 1120b8ba871bSPeter Wemm *t++ = *p; 1121b8ba871bSPeter Wemm break; 1122b8ba871bSPeter Wemm default: 1123b8ba871bSPeter Wemm *t++ = '\\'; 1124b8ba871bSPeter Wemm *t++ = *p; 1125b8ba871bSPeter Wemm } 1126b8ba871bSPeter Wemm } else 1127b8ba871bSPeter Wemm *t++ = '\\'; 1128b8ba871bSPeter Wemm break; 1129b8ba871bSPeter Wemm case '~': 1130b8ba871bSPeter Wemm if (O_ISSET(sp, O_MAGIC)) { 1131f0957ccaSPeter Wemm MEMCPY(t, sp->repl, sp->repl_len); 1132b8ba871bSPeter Wemm t += sp->repl_len; 1133b8ba871bSPeter Wemm } else 1134b8ba871bSPeter Wemm *t++ = '~'; 1135b8ba871bSPeter Wemm break; 1136b8ba871bSPeter Wemm case '.': 1137b8ba871bSPeter Wemm case '[': 1138b8ba871bSPeter Wemm case '*': 1139b8ba871bSPeter Wemm if (!O_ISSET(sp, O_MAGIC)) 1140b8ba871bSPeter Wemm *t++ = '\\'; 1141b8ba871bSPeter Wemm *t++ = *p; 1142b8ba871bSPeter Wemm break; 1143b8ba871bSPeter Wemm default: 1144b8ba871bSPeter Wemm *t++ = *p; 1145b8ba871bSPeter Wemm break; 1146b8ba871bSPeter Wemm } 1147b8ba871bSPeter Wemm 1148b8ba871bSPeter Wemm *ptrnp = bp; 1149b8ba871bSPeter Wemm *plenp = t - bp; 1150b8ba871bSPeter Wemm return (0); 1151b8ba871bSPeter Wemm } 1152b8ba871bSPeter Wemm 1153b8ba871bSPeter Wemm /* 1154b8ba871bSPeter Wemm * re_tag_conv -- 1155b8ba871bSPeter Wemm * Convert a tags search path into something that the POSIX 1156b8ba871bSPeter Wemm * 1003.2 RE functions can handle. 1157b8ba871bSPeter Wemm */ 1158b8ba871bSPeter Wemm static int 1159f0957ccaSPeter Wemm re_tag_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp) 1160b8ba871bSPeter Wemm { 1161b8ba871bSPeter Wemm size_t blen, len; 1162b8ba871bSPeter Wemm int lastdollar; 1163f0957ccaSPeter Wemm CHAR_T *bp, *p, *t; 1164b8ba871bSPeter Wemm 1165b8ba871bSPeter Wemm len = *plenp; 1166b8ba871bSPeter Wemm 1167b8ba871bSPeter Wemm /* Max memory usage is 2 times the length of the string. */ 1168b8ba871bSPeter Wemm *replacedp = 1; 1169f0957ccaSPeter Wemm GET_SPACE_RETW(sp, bp, blen, len * 2); 1170b8ba871bSPeter Wemm 1171b8ba871bSPeter Wemm p = *ptrnp; 1172b8ba871bSPeter Wemm t = bp; 1173b8ba871bSPeter Wemm 1174b8ba871bSPeter Wemm /* If the last character is a '/' or '?', we just strip it. */ 1175b8ba871bSPeter Wemm if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?')) 1176b8ba871bSPeter Wemm --len; 1177b8ba871bSPeter Wemm 1178b8ba871bSPeter Wemm /* If the next-to-last or last character is a '$', it's magic. */ 1179b8ba871bSPeter Wemm if (len > 0 && p[len - 1] == '$') { 1180b8ba871bSPeter Wemm --len; 1181b8ba871bSPeter Wemm lastdollar = 1; 1182b8ba871bSPeter Wemm } else 1183b8ba871bSPeter Wemm lastdollar = 0; 1184b8ba871bSPeter Wemm 1185b8ba871bSPeter Wemm /* If the first character is a '/' or '?', we just strip it. */ 1186b8ba871bSPeter Wemm if (len > 0 && (p[0] == '/' || p[0] == '?')) { 1187b8ba871bSPeter Wemm ++p; 1188b8ba871bSPeter Wemm --len; 1189b8ba871bSPeter Wemm } 1190b8ba871bSPeter Wemm 1191b8ba871bSPeter Wemm /* If the first or second character is a '^', it's magic. */ 1192b8ba871bSPeter Wemm if (p[0] == '^') { 1193b8ba871bSPeter Wemm *t++ = *p++; 1194b8ba871bSPeter Wemm --len; 1195b8ba871bSPeter Wemm } 1196b8ba871bSPeter Wemm 1197b8ba871bSPeter Wemm /* 1198b8ba871bSPeter Wemm * Escape every other magic character we can find, meanwhile stripping 1199b8ba871bSPeter Wemm * the backslashes ctags inserts when escaping the search delimiter 1200b8ba871bSPeter Wemm * characters. 1201b8ba871bSPeter Wemm */ 1202b8ba871bSPeter Wemm for (; len > 0; --len) { 1203b8ba871bSPeter Wemm if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) { 1204b8ba871bSPeter Wemm ++p; 1205b8ba871bSPeter Wemm --len; 1206f0957ccaSPeter Wemm } else if (STRCHR(L("^.[]$*"), p[0])) 1207b8ba871bSPeter Wemm *t++ = '\\'; 1208b8ba871bSPeter Wemm *t++ = *p++; 1209b8ba871bSPeter Wemm } 1210b8ba871bSPeter Wemm if (lastdollar) 1211b8ba871bSPeter Wemm *t++ = '$'; 1212b8ba871bSPeter Wemm 1213b8ba871bSPeter Wemm *ptrnp = bp; 1214b8ba871bSPeter Wemm *plenp = t - bp; 1215b8ba871bSPeter Wemm return (0); 1216b8ba871bSPeter Wemm } 1217b8ba871bSPeter Wemm 1218b8ba871bSPeter Wemm /* 1219b8ba871bSPeter Wemm * re_cscope_conv -- 1220b8ba871bSPeter Wemm * Convert a cscope search path into something that the POSIX 1221b8ba871bSPeter Wemm * 1003.2 RE functions can handle. 1222b8ba871bSPeter Wemm */ 1223b8ba871bSPeter Wemm static int 1224f0957ccaSPeter Wemm re_cscope_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp) 1225b8ba871bSPeter Wemm { 1226b8ba871bSPeter Wemm size_t blen, len, nspaces; 1227f0957ccaSPeter Wemm CHAR_T *bp, *t; 1228f0957ccaSPeter Wemm CHAR_T *p; 1229f0957ccaSPeter Wemm CHAR_T *wp; 1230f0957ccaSPeter Wemm size_t wlen; 1231b8ba871bSPeter Wemm 1232b8ba871bSPeter Wemm /* 1233b8ba871bSPeter Wemm * Each space in the source line printed by cscope represents an 1234b8ba871bSPeter Wemm * arbitrary sequence of spaces, tabs, and comments. 1235b8ba871bSPeter Wemm */ 1236b8ba871bSPeter Wemm #define CSCOPE_RE_SPACE "([ \t]|/\\*([^*]|\\*/)*\\*/)*" 1237f0957ccaSPeter Wemm #define CSCOPE_LEN sizeof(CSCOPE_RE_SPACE) - 1 1238f0957ccaSPeter Wemm CHAR2INT(sp, CSCOPE_RE_SPACE, CSCOPE_LEN, wp, wlen); 1239b8ba871bSPeter Wemm for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len) 1240b8ba871bSPeter Wemm if (*p == ' ') 1241b8ba871bSPeter Wemm ++nspaces; 1242b8ba871bSPeter Wemm 1243b8ba871bSPeter Wemm /* 1244b8ba871bSPeter Wemm * Allocate plenty of space: 1245b8ba871bSPeter Wemm * the string, plus potential escaping characters; 1246b8ba871bSPeter Wemm * nspaces + 2 copies of CSCOPE_RE_SPACE; 1247b8ba871bSPeter Wemm * ^, $, nul terminator characters. 1248b8ba871bSPeter Wemm */ 1249b8ba871bSPeter Wemm *replacedp = 1; 1250b8ba871bSPeter Wemm len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3; 1251f0957ccaSPeter Wemm GET_SPACE_RETW(sp, bp, blen, len); 1252b8ba871bSPeter Wemm 1253b8ba871bSPeter Wemm p = *ptrnp; 1254b8ba871bSPeter Wemm t = bp; 1255b8ba871bSPeter Wemm 1256b8ba871bSPeter Wemm *t++ = '^'; 1257f0957ccaSPeter Wemm MEMCPY(t, wp, wlen); 1258f0957ccaSPeter Wemm t += wlen; 1259b8ba871bSPeter Wemm 1260b8ba871bSPeter Wemm for (len = *plenp; len > 0; ++p, --len) 1261b8ba871bSPeter Wemm if (*p == ' ') { 1262f0957ccaSPeter Wemm MEMCPY(t, wp, wlen); 1263f0957ccaSPeter Wemm t += wlen; 1264b8ba871bSPeter Wemm } else { 1265f0957ccaSPeter Wemm if (STRCHR(L("\\^.[]$*+?()|{}"), *p)) 1266b8ba871bSPeter Wemm *t++ = '\\'; 1267b8ba871bSPeter Wemm *t++ = *p; 1268b8ba871bSPeter Wemm } 1269b8ba871bSPeter Wemm 1270f0957ccaSPeter Wemm MEMCPY(t, wp, wlen); 1271f0957ccaSPeter Wemm t += wlen; 1272b8ba871bSPeter Wemm *t++ = '$'; 1273b8ba871bSPeter Wemm 1274b8ba871bSPeter Wemm *ptrnp = bp; 1275b8ba871bSPeter Wemm *plenp = t - bp; 1276b8ba871bSPeter Wemm return (0); 1277b8ba871bSPeter Wemm } 1278b8ba871bSPeter Wemm 1279b8ba871bSPeter Wemm /* 1280b8ba871bSPeter Wemm * re_error -- 1281b8ba871bSPeter Wemm * Report a regular expression error. 1282b8ba871bSPeter Wemm * 1283b8ba871bSPeter Wemm * PUBLIC: void re_error __P((SCR *, int, regex_t *)); 1284b8ba871bSPeter Wemm */ 1285b8ba871bSPeter Wemm void 1286f0957ccaSPeter Wemm re_error(SCR *sp, int errcode, regex_t *preg) 1287b8ba871bSPeter Wemm { 1288b8ba871bSPeter Wemm size_t s; 1289b8ba871bSPeter Wemm char *oe; 1290b8ba871bSPeter Wemm 1291b8ba871bSPeter Wemm s = regerror(errcode, preg, "", 0); 1292f0957ccaSPeter Wemm MALLOC(sp, oe, char *, s); 1293f0957ccaSPeter Wemm if (oe != NULL) { 1294b8ba871bSPeter Wemm (void)regerror(errcode, preg, oe, s); 1295b8ba871bSPeter Wemm msgq(sp, M_ERR, "RE error: %s", oe); 1296b8ba871bSPeter Wemm free(oe); 1297b8ba871bSPeter Wemm } 1298b8ba871bSPeter Wemm } 1299b8ba871bSPeter Wemm 1300b8ba871bSPeter Wemm /* 1301b8ba871bSPeter Wemm * re_sub -- 1302b8ba871bSPeter Wemm * Do the substitution for a regular expression. 1303b8ba871bSPeter Wemm */ 1304b8ba871bSPeter Wemm static int 1305f0957ccaSPeter Wemm re_sub( 1306f0957ccaSPeter Wemm SCR *sp, 1307f0957ccaSPeter Wemm CHAR_T *ip, /* Input line. */ 1308f0957ccaSPeter Wemm CHAR_T **lbp, 1309f0957ccaSPeter Wemm size_t *lbclenp, 1310f0957ccaSPeter Wemm size_t *lblenp, 1311f0957ccaSPeter Wemm regmatch_t match[10]) 1312b8ba871bSPeter Wemm { 1313b8ba871bSPeter Wemm enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv; 1314b8ba871bSPeter Wemm size_t lbclen, lblen; /* Local copies. */ 1315b8ba871bSPeter Wemm size_t mlen; /* Match length. */ 1316b8ba871bSPeter Wemm size_t rpl; /* Remaining replacement length. */ 1317f0957ccaSPeter Wemm CHAR_T *rp; /* Replacement pointer. */ 1318b8ba871bSPeter Wemm int ch; 1319b8ba871bSPeter Wemm int no; /* Match replacement offset. */ 1320f0957ccaSPeter Wemm CHAR_T *p, *t; /* Buffer pointers. */ 1321f0957ccaSPeter Wemm CHAR_T *lb; /* Local copies. */ 1322b8ba871bSPeter Wemm 1323b8ba871bSPeter Wemm lb = *lbp; /* Get local copies. */ 1324b8ba871bSPeter Wemm lbclen = *lbclenp; 1325b8ba871bSPeter Wemm lblen = *lblenp; 1326b8ba871bSPeter Wemm 1327b8ba871bSPeter Wemm /* 1328b8ba871bSPeter Wemm * QUOTING NOTE: 1329b8ba871bSPeter Wemm * 1330b8ba871bSPeter Wemm * There are some special sequences that vi provides in the 1331b8ba871bSPeter Wemm * replacement patterns. 1332b8ba871bSPeter Wemm * & string the RE matched (\& if nomagic set) 1333b8ba871bSPeter Wemm * \# n-th regular subexpression 1334b8ba871bSPeter Wemm * \E end \U, \L conversion 1335b8ba871bSPeter Wemm * \e end \U, \L conversion 1336b8ba871bSPeter Wemm * \l convert the next character to lower-case 1337b8ba871bSPeter Wemm * \L convert to lower-case, until \E, \e, or end of replacement 1338b8ba871bSPeter Wemm * \u convert the next character to upper-case 1339b8ba871bSPeter Wemm * \U convert to upper-case, until \E, \e, or end of replacement 1340b8ba871bSPeter Wemm * 1341b8ba871bSPeter Wemm * Otherwise, since this is the lowest level of replacement, discard 1342b8ba871bSPeter Wemm * all escaping characters. This (hopefully) matches historic practice. 1343b8ba871bSPeter Wemm */ 1344b8ba871bSPeter Wemm #define OUTCH(ch, nltrans) { \ 1345f0957ccaSPeter Wemm ARG_CHAR_T __ch = (ch); \ 1346f0957ccaSPeter Wemm e_key_t __value = KEY_VAL(sp, __ch); \ 1347b8ba871bSPeter Wemm if (nltrans && (__value == K_CR || __value == K_NL)) { \ 1348b8ba871bSPeter Wemm NEEDNEWLINE(sp); \ 1349b8ba871bSPeter Wemm sp->newl[sp->newl_cnt++] = lbclen; \ 1350b8ba871bSPeter Wemm } else if (conv != C_NOTSET) { \ 1351b8ba871bSPeter Wemm switch (conv) { \ 1352b8ba871bSPeter Wemm case C_ONELOWER: \ 1353b8ba871bSPeter Wemm conv = C_NOTSET; \ 1354b8ba871bSPeter Wemm /* FALLTHROUGH */ \ 1355b8ba871bSPeter Wemm case C_LOWER: \ 1356f0957ccaSPeter Wemm if (ISUPPER(__ch)) \ 1357f0957ccaSPeter Wemm __ch = TOLOWER(__ch); \ 1358b8ba871bSPeter Wemm break; \ 1359b8ba871bSPeter Wemm case C_ONEUPPER: \ 1360b8ba871bSPeter Wemm conv = C_NOTSET; \ 1361b8ba871bSPeter Wemm /* FALLTHROUGH */ \ 1362b8ba871bSPeter Wemm case C_UPPER: \ 1363f0957ccaSPeter Wemm if (ISLOWER(__ch)) \ 1364f0957ccaSPeter Wemm __ch = TOUPPER(__ch); \ 1365b8ba871bSPeter Wemm break; \ 1366b8ba871bSPeter Wemm default: \ 1367b8ba871bSPeter Wemm abort(); \ 1368b8ba871bSPeter Wemm } \ 1369b8ba871bSPeter Wemm } \ 1370b8ba871bSPeter Wemm NEEDSP(sp, 1, p); \ 1371b8ba871bSPeter Wemm *p++ = __ch; \ 1372b8ba871bSPeter Wemm ++lbclen; \ 1373b8ba871bSPeter Wemm } 1374b8ba871bSPeter Wemm conv = C_NOTSET; 1375b8ba871bSPeter Wemm for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) { 1376b8ba871bSPeter Wemm switch (ch = *rp++) { 1377b8ba871bSPeter Wemm case '&': 1378b8ba871bSPeter Wemm if (O_ISSET(sp, O_MAGIC)) { 1379b8ba871bSPeter Wemm no = 0; 1380b8ba871bSPeter Wemm goto subzero; 1381b8ba871bSPeter Wemm } 1382b8ba871bSPeter Wemm break; 1383b8ba871bSPeter Wemm case '\\': 1384b8ba871bSPeter Wemm if (rpl == 0) 1385b8ba871bSPeter Wemm break; 1386b8ba871bSPeter Wemm --rpl; 1387b8ba871bSPeter Wemm switch (ch = *rp) { 1388b8ba871bSPeter Wemm case '&': 1389b8ba871bSPeter Wemm ++rp; 1390b8ba871bSPeter Wemm if (!O_ISSET(sp, O_MAGIC)) { 1391b8ba871bSPeter Wemm no = 0; 1392b8ba871bSPeter Wemm goto subzero; 1393b8ba871bSPeter Wemm } 1394b8ba871bSPeter Wemm break; 1395b8ba871bSPeter Wemm case '0': case '1': case '2': case '3': case '4': 1396b8ba871bSPeter Wemm case '5': case '6': case '7': case '8': case '9': 1397b8ba871bSPeter Wemm no = *rp++ - '0'; 1398b8ba871bSPeter Wemm subzero: if (match[no].rm_so == -1 || 1399b8ba871bSPeter Wemm match[no].rm_eo == -1) 1400b8ba871bSPeter Wemm break; 1401b8ba871bSPeter Wemm mlen = match[no].rm_eo - match[no].rm_so; 1402b8ba871bSPeter Wemm for (t = ip + match[no].rm_so; mlen--; ++t) 1403b8ba871bSPeter Wemm OUTCH(*t, 0); 1404b8ba871bSPeter Wemm continue; 1405b8ba871bSPeter Wemm case 'e': 1406b8ba871bSPeter Wemm case 'E': 1407b8ba871bSPeter Wemm ++rp; 1408b8ba871bSPeter Wemm conv = C_NOTSET; 1409b8ba871bSPeter Wemm continue; 1410b8ba871bSPeter Wemm case 'l': 1411b8ba871bSPeter Wemm ++rp; 1412b8ba871bSPeter Wemm conv = C_ONELOWER; 1413b8ba871bSPeter Wemm continue; 1414b8ba871bSPeter Wemm case 'L': 1415b8ba871bSPeter Wemm ++rp; 1416b8ba871bSPeter Wemm conv = C_LOWER; 1417b8ba871bSPeter Wemm continue; 1418b8ba871bSPeter Wemm case 'u': 1419b8ba871bSPeter Wemm ++rp; 1420b8ba871bSPeter Wemm conv = C_ONEUPPER; 1421b8ba871bSPeter Wemm continue; 1422b8ba871bSPeter Wemm case 'U': 1423b8ba871bSPeter Wemm ++rp; 1424b8ba871bSPeter Wemm conv = C_UPPER; 1425b8ba871bSPeter Wemm continue; 1426f0957ccaSPeter Wemm case '\r': 1427f0957ccaSPeter Wemm OUTCH(ch, 0); 1428f0957ccaSPeter Wemm continue; 1429b8ba871bSPeter Wemm default: 1430b8ba871bSPeter Wemm ++rp; 1431b8ba871bSPeter Wemm break; 1432b8ba871bSPeter Wemm } 1433b8ba871bSPeter Wemm } 1434b8ba871bSPeter Wemm OUTCH(ch, 1); 1435b8ba871bSPeter Wemm } 1436b8ba871bSPeter Wemm 1437b8ba871bSPeter Wemm *lbp = lb; /* Update caller's information. */ 1438b8ba871bSPeter Wemm *lbclenp = lbclen; 1439b8ba871bSPeter Wemm *lblenp = lblen; 1440b8ba871bSPeter Wemm return (0); 1441b8ba871bSPeter Wemm } 1442