1e0b8e63eSJohn Marino /*-
2e0b8e63eSJohn Marino * Copyright (c) 1992, 1993, 1994
3e0b8e63eSJohn Marino * The Regents of the University of California. All rights reserved.
4e0b8e63eSJohn Marino * Copyright (c) 1992, 1993, 1994, 1995, 1996
5e0b8e63eSJohn Marino * Keith Bostic. All rights reserved.
6e0b8e63eSJohn Marino *
7e0b8e63eSJohn Marino * See the LICENSE file for redistribution information.
8e0b8e63eSJohn Marino */
9e0b8e63eSJohn Marino
10e0b8e63eSJohn Marino #include "config.h"
11e0b8e63eSJohn Marino
12e0b8e63eSJohn Marino #include <sys/types.h>
13e0b8e63eSJohn Marino #include <sys/queue.h>
14e0b8e63eSJohn Marino #include <sys/time.h>
15e0b8e63eSJohn Marino
16e0b8e63eSJohn Marino #include <bitstring.h>
17e0b8e63eSJohn Marino #include <ctype.h>
18e0b8e63eSJohn Marino #include <errno.h>
19e0b8e63eSJohn Marino #include <limits.h>
20e0b8e63eSJohn Marino #include <stdio.h>
21e0b8e63eSJohn Marino #include <stdlib.h>
22e0b8e63eSJohn Marino #include <string.h>
23e0b8e63eSJohn Marino #include <unistd.h>
24e0b8e63eSJohn Marino
25e0b8e63eSJohn Marino #include "../common/common.h"
26e0b8e63eSJohn Marino #include "../vi/vi.h"
27e0b8e63eSJohn Marino
28e0b8e63eSJohn Marino #define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */
29e0b8e63eSJohn Marino #define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */
30e0b8e63eSJohn Marino
31e0b8e63eSJohn Marino static int re_conv(SCR *, CHAR_T **, size_t *, int *);
32e0b8e63eSJohn Marino static int re_cscope_conv(SCR *, CHAR_T **, size_t *, int *);
33e0b8e63eSJohn Marino static int re_sub(SCR *,
34e0b8e63eSJohn Marino CHAR_T *, CHAR_T **, size_t *, size_t *, regmatch_t [10]);
35e0b8e63eSJohn Marino static int re_tag_conv(SCR *, CHAR_T **, size_t *, int *);
36e0b8e63eSJohn Marino static int s(SCR *, EXCMD *, CHAR_T *, regex_t *, u_int);
37e0b8e63eSJohn Marino
38e0b8e63eSJohn Marino /*
39e0b8e63eSJohn Marino * ex_s --
40e0b8e63eSJohn Marino * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
41e0b8e63eSJohn Marino *
42e0b8e63eSJohn Marino * Substitute on lines matching a pattern.
43e0b8e63eSJohn Marino *
44e0b8e63eSJohn Marino * PUBLIC: int ex_s(SCR *, EXCMD *);
45e0b8e63eSJohn Marino */
46e0b8e63eSJohn Marino int
ex_s(SCR * sp,EXCMD * cmdp)47e0b8e63eSJohn Marino ex_s(SCR *sp, EXCMD *cmdp)
48e0b8e63eSJohn Marino {
49e0b8e63eSJohn Marino regex_t *re;
50e0b8e63eSJohn Marino size_t blen, len;
51e0b8e63eSJohn Marino u_int flags;
52e0b8e63eSJohn Marino int delim;
53e0b8e63eSJohn Marino CHAR_T *bp, *p, *ptrn, *rep, *t;
54e0b8e63eSJohn Marino
55e0b8e63eSJohn Marino /*
56e0b8e63eSJohn Marino * Skip leading white space.
57e0b8e63eSJohn Marino *
58e0b8e63eSJohn Marino * !!!
59e0b8e63eSJohn Marino * Historic vi allowed any non-alphanumeric to serve as the
60e0b8e63eSJohn Marino * substitution command delimiter.
61e0b8e63eSJohn Marino *
62e0b8e63eSJohn Marino * !!!
63e0b8e63eSJohn Marino * If the arguments are empty, it's the same as &, i.e. we
64e0b8e63eSJohn Marino * repeat the last substitution.
65e0b8e63eSJohn Marino */
66e0b8e63eSJohn Marino if (cmdp->argc == 0)
67e0b8e63eSJohn Marino goto subagain;
68e0b8e63eSJohn Marino for (p = cmdp->argv[0]->bp,
69e0b8e63eSJohn Marino len = cmdp->argv[0]->len; len > 0; --len, ++p) {
70e0b8e63eSJohn Marino if (!cmdskip(*p))
71e0b8e63eSJohn Marino break;
72e0b8e63eSJohn Marino }
73e0b8e63eSJohn Marino if (len == 0)
74e0b8e63eSJohn Marino subagain: return (ex_subagain(sp, cmdp));
75e0b8e63eSJohn Marino
76e0b8e63eSJohn Marino delim = *p++;
77*b1ac2ebbSDaniel Fojt if (is09azAZ(delim) || delim == '\\')
78e0b8e63eSJohn Marino return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
79e0b8e63eSJohn Marino
80e0b8e63eSJohn Marino /*
81e0b8e63eSJohn Marino * !!!
82e0b8e63eSJohn Marino * The full-blown substitute command reset the remembered
83e0b8e63eSJohn Marino * state of the 'c' and 'g' suffices.
84e0b8e63eSJohn Marino */
85e0b8e63eSJohn Marino sp->c_suffix = sp->g_suffix = 0;
86e0b8e63eSJohn Marino
87e0b8e63eSJohn Marino /*
88e0b8e63eSJohn Marino * Get the pattern string, toss escaping characters.
89e0b8e63eSJohn Marino *
90e0b8e63eSJohn Marino * !!!
91e0b8e63eSJohn Marino * Historic vi accepted any of the following forms:
92e0b8e63eSJohn Marino *
93e0b8e63eSJohn Marino * :s/abc/def/ change "abc" to "def"
94e0b8e63eSJohn Marino * :s/abc/def change "abc" to "def"
95e0b8e63eSJohn Marino * :s/abc/ delete "abc"
96e0b8e63eSJohn Marino * :s/abc delete "abc"
97e0b8e63eSJohn Marino *
98e0b8e63eSJohn Marino * QUOTING NOTE:
99e0b8e63eSJohn Marino *
100e0b8e63eSJohn Marino * Only toss an escaping character if it escapes a delimiter.
101e0b8e63eSJohn Marino * This means that "s/A/\\\\f" replaces "A" with "\\f". It
102e0b8e63eSJohn Marino * would be nice to be more regular, i.e. for each layer of
103e0b8e63eSJohn Marino * escaping a single escaping character is removed, but that's
104e0b8e63eSJohn Marino * not how the historic vi worked.
105e0b8e63eSJohn Marino */
106e0b8e63eSJohn Marino for (ptrn = t = p;;) {
107e0b8e63eSJohn Marino if (p[0] == '\0' || p[0] == delim) {
108e0b8e63eSJohn Marino if (p[0] == delim)
109e0b8e63eSJohn Marino ++p;
110e0b8e63eSJohn Marino /*
111e0b8e63eSJohn Marino * !!!
112e0b8e63eSJohn Marino * Nul terminate the pattern string -- it's passed
113e0b8e63eSJohn Marino * to regcomp which doesn't understand anything else.
114e0b8e63eSJohn Marino */
115e0b8e63eSJohn Marino *t = '\0';
116e0b8e63eSJohn Marino break;
117e0b8e63eSJohn Marino }
118e0b8e63eSJohn Marino if (p[0] == '\\')
119e0b8e63eSJohn Marino if (p[1] == delim)
120e0b8e63eSJohn Marino ++p;
121e0b8e63eSJohn Marino else if (p[1] == '\\')
122e0b8e63eSJohn Marino *t++ = *p++;
123e0b8e63eSJohn Marino *t++ = *p++;
124e0b8e63eSJohn Marino }
125e0b8e63eSJohn Marino
126e0b8e63eSJohn Marino /*
127e0b8e63eSJohn Marino * If the pattern string is empty, use the last RE (not just the
128e0b8e63eSJohn Marino * last substitution RE).
129e0b8e63eSJohn Marino */
130e0b8e63eSJohn Marino if (*ptrn == '\0') {
131e0b8e63eSJohn Marino if (sp->re == NULL) {
132e0b8e63eSJohn Marino ex_emsg(sp, NULL, EXM_NOPREVRE);
133e0b8e63eSJohn Marino return (1);
134e0b8e63eSJohn Marino }
135e0b8e63eSJohn Marino
136e0b8e63eSJohn Marino /* Re-compile the RE if necessary. */
137e0b8e63eSJohn Marino if (!F_ISSET(sp, SC_RE_SEARCH) &&
138e0b8e63eSJohn Marino re_compile(sp, sp->re, sp->re_len,
139e0b8e63eSJohn Marino NULL, NULL, &sp->re_c, RE_C_SEARCH))
140e0b8e63eSJohn Marino return (1);
141e0b8e63eSJohn Marino flags = 0;
142e0b8e63eSJohn Marino } else {
143e0b8e63eSJohn Marino /*
144e0b8e63eSJohn Marino * !!!
145e0b8e63eSJohn Marino * Compile the RE. Historic practice is that substitutes set
146e0b8e63eSJohn Marino * the search direction as well as both substitute and search
147e0b8e63eSJohn Marino * RE's. We compile the RE twice, as we don't want to bother
148e0b8e63eSJohn Marino * ref counting the pattern string and (opaque) structure.
149e0b8e63eSJohn Marino */
150e0b8e63eSJohn Marino if (re_compile(sp, ptrn, t - ptrn, &sp->re,
151e0b8e63eSJohn Marino &sp->re_len, &sp->re_c, RE_C_SEARCH))
152e0b8e63eSJohn Marino return (1);
153e0b8e63eSJohn Marino if (re_compile(sp, ptrn, t - ptrn, &sp->subre,
154e0b8e63eSJohn Marino &sp->subre_len, &sp->subre_c, RE_C_SUBST))
155e0b8e63eSJohn Marino return (1);
156e0b8e63eSJohn Marino
157e0b8e63eSJohn Marino flags = SUB_FIRST;
158e0b8e63eSJohn Marino sp->searchdir = FORWARD;
159e0b8e63eSJohn Marino }
160e0b8e63eSJohn Marino re = &sp->re_c;
161e0b8e63eSJohn Marino
162e0b8e63eSJohn Marino /*
163e0b8e63eSJohn Marino * Get the replacement string.
164e0b8e63eSJohn Marino *
165e0b8e63eSJohn Marino * The special character & (\& if O_MAGIC not set) matches the
166e0b8e63eSJohn Marino * entire RE. No handling of & is required here, it's done by
167e0b8e63eSJohn Marino * re_sub().
168e0b8e63eSJohn Marino *
169e0b8e63eSJohn Marino * The special character ~ (\~ if O_MAGIC not set) inserts the
170e0b8e63eSJohn Marino * previous replacement string into this replacement string.
171e0b8e63eSJohn Marino * Count ~'s to figure out how much space we need. We could
172e0b8e63eSJohn Marino * special case nonexistent last patterns or whether or not
173e0b8e63eSJohn Marino * O_MAGIC is set, but it's probably not worth the effort.
174e0b8e63eSJohn Marino *
175e0b8e63eSJohn Marino * QUOTING NOTE:
176e0b8e63eSJohn Marino *
177e0b8e63eSJohn Marino * Only toss an escaping character if it escapes a delimiter or
178e0b8e63eSJohn Marino * if O_MAGIC is set and it escapes a tilde.
179e0b8e63eSJohn Marino *
180e0b8e63eSJohn Marino * !!!
181e0b8e63eSJohn Marino * If the entire replacement pattern is "%", then use the last
182e0b8e63eSJohn Marino * replacement pattern. This semantic was added to vi in System
183e0b8e63eSJohn Marino * V and then percolated elsewhere, presumably around the time
184e0b8e63eSJohn Marino * that it was added to their version of ed(1).
185e0b8e63eSJohn Marino */
186e0b8e63eSJohn Marino if (p[0] == '\0' || p[0] == delim) {
187e0b8e63eSJohn Marino if (p[0] == delim)
188e0b8e63eSJohn Marino ++p;
189e0b8e63eSJohn Marino free(sp->repl);
190e0b8e63eSJohn Marino sp->repl = NULL;
191e0b8e63eSJohn Marino sp->repl_len = 0;
192e0b8e63eSJohn Marino } else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
193e0b8e63eSJohn Marino p += p[1] == delim ? 2 : 1;
194e0b8e63eSJohn Marino else {
195e0b8e63eSJohn Marino for (rep = p, len = 0;
196e0b8e63eSJohn Marino p[0] != '\0' && p[0] != delim; ++p, ++len)
197e0b8e63eSJohn Marino if (p[0] == '~')
198e0b8e63eSJohn Marino len += sp->repl_len;
199e0b8e63eSJohn Marino GET_SPACE_RETW(sp, bp, blen, len);
200e0b8e63eSJohn Marino for (t = bp, len = 0, p = rep;;) {
201e0b8e63eSJohn Marino if (p[0] == '\0' || p[0] == delim) {
202e0b8e63eSJohn Marino if (p[0] == delim)
203e0b8e63eSJohn Marino ++p;
204e0b8e63eSJohn Marino break;
205e0b8e63eSJohn Marino }
206e0b8e63eSJohn Marino if (p[0] == '\\') {
207e0b8e63eSJohn Marino if (p[1] == delim)
208e0b8e63eSJohn Marino ++p;
209e0b8e63eSJohn Marino else if (p[1] == '\\') {
210e0b8e63eSJohn Marino *t++ = *p++;
211e0b8e63eSJohn Marino ++len;
212e0b8e63eSJohn Marino } else if (p[1] == '~') {
213e0b8e63eSJohn Marino ++p;
214e0b8e63eSJohn Marino if (!O_ISSET(sp, O_MAGIC))
215e0b8e63eSJohn Marino goto tilde;
216e0b8e63eSJohn Marino }
217e0b8e63eSJohn Marino } else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) {
218e0b8e63eSJohn Marino tilde: ++p;
219e0b8e63eSJohn Marino MEMCPY(t, sp->repl, sp->repl_len);
220e0b8e63eSJohn Marino t += sp->repl_len;
221e0b8e63eSJohn Marino len += sp->repl_len;
222e0b8e63eSJohn Marino continue;
223e0b8e63eSJohn Marino }
224e0b8e63eSJohn Marino *t++ = *p++;
225e0b8e63eSJohn Marino ++len;
226e0b8e63eSJohn Marino }
227e0b8e63eSJohn Marino if ((sp->repl_len = len) != 0) {
228e0b8e63eSJohn Marino free(sp->repl);
229*b1ac2ebbSDaniel Fojt MALLOC(sp, sp->repl, len * sizeof(CHAR_T));
230e0b8e63eSJohn Marino if (sp->repl == NULL) {
231e0b8e63eSJohn Marino FREE_SPACEW(sp, bp, blen);
232e0b8e63eSJohn Marino return (1);
233e0b8e63eSJohn Marino }
234e0b8e63eSJohn Marino MEMCPY(sp->repl, bp, len);
235e0b8e63eSJohn Marino }
236e0b8e63eSJohn Marino FREE_SPACEW(sp, bp, blen);
237e0b8e63eSJohn Marino }
238e0b8e63eSJohn Marino return (s(sp, cmdp, p, re, flags));
239e0b8e63eSJohn Marino }
240e0b8e63eSJohn Marino
241e0b8e63eSJohn Marino /*
242e0b8e63eSJohn Marino * ex_subagain --
243e0b8e63eSJohn Marino * [line [,line]] & [cgr] [count] [#lp]]
244e0b8e63eSJohn Marino *
245e0b8e63eSJohn Marino * Substitute using the last substitute RE and replacement pattern.
246e0b8e63eSJohn Marino *
247e0b8e63eSJohn Marino * PUBLIC: int ex_subagain(SCR *, EXCMD *);
248e0b8e63eSJohn Marino */
249e0b8e63eSJohn Marino int
ex_subagain(SCR * sp,EXCMD * cmdp)250e0b8e63eSJohn Marino ex_subagain(SCR *sp, EXCMD *cmdp)
251e0b8e63eSJohn Marino {
252e0b8e63eSJohn Marino if (sp->subre == NULL) {
253e0b8e63eSJohn Marino ex_emsg(sp, NULL, EXM_NOPREVRE);
254e0b8e63eSJohn Marino return (1);
255e0b8e63eSJohn Marino }
256e0b8e63eSJohn Marino if (!F_ISSET(sp, SC_RE_SUBST) &&
257e0b8e63eSJohn Marino re_compile(sp, sp->subre, sp->subre_len,
258e0b8e63eSJohn Marino NULL, NULL, &sp->subre_c, RE_C_SUBST))
259e0b8e63eSJohn Marino return (1);
260e0b8e63eSJohn Marino return (s(sp,
261e0b8e63eSJohn Marino cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
262e0b8e63eSJohn Marino }
263e0b8e63eSJohn Marino
264e0b8e63eSJohn Marino /*
265e0b8e63eSJohn Marino * ex_subtilde --
266e0b8e63eSJohn Marino * [line [,line]] ~ [cgr] [count] [#lp]]
267e0b8e63eSJohn Marino *
268e0b8e63eSJohn Marino * Substitute using the last RE and last substitute replacement pattern.
269e0b8e63eSJohn Marino *
270e0b8e63eSJohn Marino * PUBLIC: int ex_subtilde(SCR *, EXCMD *);
271e0b8e63eSJohn Marino */
272e0b8e63eSJohn Marino int
ex_subtilde(SCR * sp,EXCMD * cmdp)273e0b8e63eSJohn Marino ex_subtilde(SCR *sp, EXCMD *cmdp)
274e0b8e63eSJohn Marino {
275e0b8e63eSJohn Marino if (sp->re == NULL) {
276e0b8e63eSJohn Marino ex_emsg(sp, NULL, EXM_NOPREVRE);
277e0b8e63eSJohn Marino return (1);
278e0b8e63eSJohn Marino }
279e0b8e63eSJohn Marino if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, sp->re,
280e0b8e63eSJohn Marino sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
281e0b8e63eSJohn Marino return (1);
282e0b8e63eSJohn Marino return (s(sp,
283e0b8e63eSJohn Marino cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
284e0b8e63eSJohn Marino }
285e0b8e63eSJohn Marino
286e0b8e63eSJohn Marino /*
287e0b8e63eSJohn Marino * s --
288e0b8e63eSJohn Marino * Do the substitution. This stuff is *really* tricky. There are lots of
289e0b8e63eSJohn Marino * special cases, and general nastiness. Don't mess with it unless you're
290e0b8e63eSJohn Marino * pretty confident.
291e0b8e63eSJohn Marino *
292e0b8e63eSJohn Marino * The nasty part of the substitution is what happens when the replacement
293e0b8e63eSJohn Marino * string contains newlines. It's a bit tricky -- consider the information
294e0b8e63eSJohn Marino * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is
295e0b8e63eSJohn Marino * to build a set of newline offsets which we use to break the line up later,
296e0b8e63eSJohn Marino * when the replacement is done. Don't change it unless you're *damned*
297e0b8e63eSJohn Marino * confident.
298e0b8e63eSJohn Marino */
299e0b8e63eSJohn Marino #define NEEDNEWLINE(sp) { \
300e0b8e63eSJohn Marino if (sp->newl_len == sp->newl_cnt) { \
301e0b8e63eSJohn Marino sp->newl_len += 25; \
302e0b8e63eSJohn Marino REALLOC(sp, sp->newl, size_t *, \
303e0b8e63eSJohn Marino sp->newl_len * sizeof(size_t)); \
304e0b8e63eSJohn Marino if (sp->newl == NULL) { \
305e0b8e63eSJohn Marino sp->newl_len = 0; \
306e0b8e63eSJohn Marino return (1); \
307e0b8e63eSJohn Marino } \
308e0b8e63eSJohn Marino } \
309e0b8e63eSJohn Marino }
310e0b8e63eSJohn Marino
311e0b8e63eSJohn Marino #define BUILD(sp, l, len) { \
312e0b8e63eSJohn Marino if (lbclen + (len) > lblen) { \
313e0b8e63eSJohn Marino lblen = p2roundup(MAX(lbclen + (len), 256)); \
314e0b8e63eSJohn Marino REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \
315e0b8e63eSJohn Marino if (lb == NULL) { \
316e0b8e63eSJohn Marino lbclen = 0; \
317e0b8e63eSJohn Marino return (1); \
318e0b8e63eSJohn Marino } \
319e0b8e63eSJohn Marino } \
320e0b8e63eSJohn Marino MEMCPY(lb + lbclen, l, len); \
321e0b8e63eSJohn Marino lbclen += len; \
322e0b8e63eSJohn Marino }
323e0b8e63eSJohn Marino
324e0b8e63eSJohn Marino #define NEEDSP(sp, len, pnt) { \
325e0b8e63eSJohn Marino if (lbclen + (len) > lblen) { \
326e0b8e63eSJohn Marino lblen = p2roundup(MAX(lbclen + (len), 256)); \
327e0b8e63eSJohn Marino REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \
328e0b8e63eSJohn Marino if (lb == NULL) { \
329e0b8e63eSJohn Marino lbclen = 0; \
330e0b8e63eSJohn Marino return (1); \
331e0b8e63eSJohn Marino } \
332e0b8e63eSJohn Marino pnt = lb + lbclen; \
333e0b8e63eSJohn Marino } \
334e0b8e63eSJohn Marino }
335e0b8e63eSJohn Marino
336e0b8e63eSJohn Marino static int
s(SCR * sp,EXCMD * cmdp,CHAR_T * s,regex_t * re,u_int flags)337e0b8e63eSJohn Marino s(SCR *sp, EXCMD *cmdp, CHAR_T *s, regex_t *re, u_int flags)
338e0b8e63eSJohn Marino {
339e0b8e63eSJohn Marino EVENT ev;
340e0b8e63eSJohn Marino MARK from, to;
341e0b8e63eSJohn Marino TEXTH tiq[] = {{ 0 }};
342e0b8e63eSJohn Marino recno_t elno, lno, slno;
343e0b8e63eSJohn Marino u_long ul;
344e0b8e63eSJohn Marino regmatch_t match[10];
345e0b8e63eSJohn Marino size_t blen, cnt, last, lbclen, lblen, len, llen;
346e0b8e63eSJohn Marino size_t offset, saved_offset, scno;
347e0b8e63eSJohn Marino int cflag, lflag, nflag, pflag, rflag;
348e0b8e63eSJohn Marino int didsub, do_eol_match, eflags, empty_ok, eval;
349e0b8e63eSJohn Marino int linechanged, matched, quit, rval;
350e0b8e63eSJohn Marino CHAR_T *bp, *lb;
351e0b8e63eSJohn Marino enum nresult nret;
352e0b8e63eSJohn Marino
353e0b8e63eSJohn Marino NEEDFILE(sp, cmdp);
354e0b8e63eSJohn Marino
355e0b8e63eSJohn Marino slno = sp->lno;
356e0b8e63eSJohn Marino scno = sp->cno;
357e0b8e63eSJohn Marino
358e0b8e63eSJohn Marino /*
359e0b8e63eSJohn Marino * !!!
360e0b8e63eSJohn Marino * Historically, the 'g' and 'c' suffices were always toggled as flags,
361e0b8e63eSJohn Marino * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was
362e0b8e63eSJohn Marino * not set, they were initialized to 0 for all substitute commands. If
363e0b8e63eSJohn Marino * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
364e0b8e63eSJohn Marino * specified substitute/replacement patterns (see ex_s()).
365e0b8e63eSJohn Marino */
366e0b8e63eSJohn Marino if (!O_ISSET(sp, O_EDCOMPATIBLE))
367e0b8e63eSJohn Marino sp->c_suffix = sp->g_suffix = 0;
368e0b8e63eSJohn Marino
369e0b8e63eSJohn Marino /*
370e0b8e63eSJohn Marino * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
371e0b8e63eSJohn Marino * it only displayed the last change. I'd disallow them, but they are
372e0b8e63eSJohn Marino * useful in combination with the [v]global commands. In the current
373e0b8e63eSJohn Marino * model the problem is combining them with the 'c' flag -- the screen
374e0b8e63eSJohn Marino * would have to flip back and forth between the confirm screen and the
375e0b8e63eSJohn Marino * ex print screen, which would be pretty awful. We do display all
376e0b8e63eSJohn Marino * changes, though, for what that's worth.
377e0b8e63eSJohn Marino *
378e0b8e63eSJohn Marino * !!!
379e0b8e63eSJohn Marino * Historic vi was fairly strict about the order of "options", the
380e0b8e63eSJohn Marino * count, and "flags". I'm somewhat fuzzy on the difference between
381e0b8e63eSJohn Marino * options and flags, anyway, so this is a simpler approach, and we
382e0b8e63eSJohn Marino * just take it them in whatever order the user gives them. (The ex
383e0b8e63eSJohn Marino * usage statement doesn't reflect this.)
384e0b8e63eSJohn Marino */
385e0b8e63eSJohn Marino cflag = lflag = nflag = pflag = rflag = 0;
386e0b8e63eSJohn Marino if (s == NULL)
387e0b8e63eSJohn Marino goto noargs;
388e0b8e63eSJohn Marino for (lno = OOBLNO; *s != '\0'; ++s)
389e0b8e63eSJohn Marino switch (*s) {
390e0b8e63eSJohn Marino case ' ':
391e0b8e63eSJohn Marino case '\t':
392e0b8e63eSJohn Marino continue;
393e0b8e63eSJohn Marino case '+':
394e0b8e63eSJohn Marino ++cmdp->flagoff;
395e0b8e63eSJohn Marino break;
396e0b8e63eSJohn Marino case '-':
397e0b8e63eSJohn Marino --cmdp->flagoff;
398e0b8e63eSJohn Marino break;
399e0b8e63eSJohn Marino case '0': case '1': case '2': case '3': case '4':
400e0b8e63eSJohn Marino case '5': case '6': case '7': case '8': case '9':
401e0b8e63eSJohn Marino if (lno != OOBLNO)
402e0b8e63eSJohn Marino goto usage;
403e0b8e63eSJohn Marino errno = 0;
404e0b8e63eSJohn Marino nret = nget_uslong(&ul, s, &s, 10);
405e0b8e63eSJohn Marino lno = ul;
406e0b8e63eSJohn Marino if (*s == '\0') /* Loop increment correction. */
407e0b8e63eSJohn Marino --s;
408e0b8e63eSJohn Marino if (nret != NUM_OK) {
409e0b8e63eSJohn Marino if (nret == NUM_OVER)
410e0b8e63eSJohn Marino msgq(sp, M_ERR, "153|Count overflow");
411e0b8e63eSJohn Marino else if (nret == NUM_UNDER)
412e0b8e63eSJohn Marino msgq(sp, M_ERR, "154|Count underflow");
413e0b8e63eSJohn Marino else
414e0b8e63eSJohn Marino msgq(sp, M_SYSERR, NULL);
415e0b8e63eSJohn Marino return (1);
416e0b8e63eSJohn Marino }
417e0b8e63eSJohn Marino /*
418e0b8e63eSJohn Marino * In historic vi, the count was inclusive from the
419e0b8e63eSJohn Marino * second address.
420e0b8e63eSJohn Marino */
421e0b8e63eSJohn Marino cmdp->addr1.lno = cmdp->addr2.lno;
422e0b8e63eSJohn Marino cmdp->addr2.lno += lno - 1;
423e0b8e63eSJohn Marino if (!db_exist(sp, cmdp->addr2.lno) &&
424e0b8e63eSJohn Marino db_last(sp, &cmdp->addr2.lno))
425e0b8e63eSJohn Marino return (1);
426e0b8e63eSJohn Marino break;
427e0b8e63eSJohn Marino case '#':
428e0b8e63eSJohn Marino nflag = 1;
429e0b8e63eSJohn Marino break;
430e0b8e63eSJohn Marino case 'c':
431e0b8e63eSJohn Marino sp->c_suffix = !sp->c_suffix;
432e0b8e63eSJohn Marino
433e0b8e63eSJohn Marino /* Ex text structure initialization. */
434e0b8e63eSJohn Marino if (F_ISSET(sp, SC_EX))
435e0b8e63eSJohn Marino TAILQ_INIT(tiq);
436e0b8e63eSJohn Marino break;
437e0b8e63eSJohn Marino case 'g':
438e0b8e63eSJohn Marino sp->g_suffix = !sp->g_suffix;
439e0b8e63eSJohn Marino break;
440e0b8e63eSJohn Marino case 'l':
441e0b8e63eSJohn Marino lflag = 1;
442e0b8e63eSJohn Marino break;
443e0b8e63eSJohn Marino case 'p':
444e0b8e63eSJohn Marino pflag = 1;
445e0b8e63eSJohn Marino break;
446e0b8e63eSJohn Marino case 'r':
447e0b8e63eSJohn Marino if (LF_ISSET(SUB_FIRST)) {
448e0b8e63eSJohn Marino msgq(sp, M_ERR,
449e0b8e63eSJohn Marino "155|Regular expression specified; r flag meaningless");
450e0b8e63eSJohn Marino return (1);
451e0b8e63eSJohn Marino }
452e0b8e63eSJohn Marino if (!F_ISSET(sp, SC_RE_SEARCH)) {
453e0b8e63eSJohn Marino ex_emsg(sp, NULL, EXM_NOPREVRE);
454e0b8e63eSJohn Marino return (1);
455e0b8e63eSJohn Marino }
456e0b8e63eSJohn Marino rflag = 1;
457e0b8e63eSJohn Marino re = &sp->re_c;
458e0b8e63eSJohn Marino break;
459e0b8e63eSJohn Marino default:
460e0b8e63eSJohn Marino goto usage;
461e0b8e63eSJohn Marino }
462e0b8e63eSJohn Marino
463e0b8e63eSJohn Marino if (*s != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) {
464e0b8e63eSJohn Marino usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
465e0b8e63eSJohn Marino return (1);
466e0b8e63eSJohn Marino }
467e0b8e63eSJohn Marino
468e0b8e63eSJohn Marino noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
469e0b8e63eSJohn Marino msgq(sp, M_ERR,
470e0b8e63eSJohn Marino "156|The #, l and p flags may not be combined with the c flag in vi mode");
471e0b8e63eSJohn Marino return (1);
472e0b8e63eSJohn Marino }
473e0b8e63eSJohn Marino
474e0b8e63eSJohn Marino /*
475e0b8e63eSJohn Marino * bp: if interactive, line cache
476e0b8e63eSJohn Marino * blen: if interactive, line cache length
477e0b8e63eSJohn Marino * lb: build buffer pointer.
478e0b8e63eSJohn Marino * lbclen: current length of built buffer.
479e0b8e63eSJohn Marino * lblen; length of build buffer.
480e0b8e63eSJohn Marino */
481e0b8e63eSJohn Marino bp = lb = NULL;
482e0b8e63eSJohn Marino blen = lbclen = lblen = 0;
483e0b8e63eSJohn Marino
484e0b8e63eSJohn Marino /* For each line... */
485e0b8e63eSJohn Marino lno = cmdp->addr1.lno == 0 ? 1 : cmdp->addr1.lno;
486e0b8e63eSJohn Marino for (matched = quit = 0,
487e0b8e63eSJohn Marino elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
488e0b8e63eSJohn Marino
489e0b8e63eSJohn Marino /* Someone's unhappy, time to stop. */
490e0b8e63eSJohn Marino if (INTERRUPTED(sp))
491e0b8e63eSJohn Marino break;
492e0b8e63eSJohn Marino
493e0b8e63eSJohn Marino /* Get the line. */
494e0b8e63eSJohn Marino if (db_get(sp, lno, DBG_FATAL, &s, &llen))
495e0b8e63eSJohn Marino goto err;
496e0b8e63eSJohn Marino
497e0b8e63eSJohn Marino /*
498e0b8e63eSJohn Marino * Make a local copy if doing confirmation -- when calling
499e0b8e63eSJohn Marino * the confirm routine we're likely to lose the cached copy.
500e0b8e63eSJohn Marino */
501e0b8e63eSJohn Marino if (sp->c_suffix) {
502e0b8e63eSJohn Marino if (bp == NULL) {
503e0b8e63eSJohn Marino GET_SPACE_RETW(sp, bp, blen, llen);
504e0b8e63eSJohn Marino } else
505e0b8e63eSJohn Marino ADD_SPACE_RETW(sp, bp, blen, llen);
506e0b8e63eSJohn Marino MEMCPY(bp, s, llen);
507e0b8e63eSJohn Marino s = bp;
508e0b8e63eSJohn Marino }
509e0b8e63eSJohn Marino
510e0b8e63eSJohn Marino /* Start searching from the beginning. */
511e0b8e63eSJohn Marino offset = 0;
512e0b8e63eSJohn Marino len = llen;
513e0b8e63eSJohn Marino
514e0b8e63eSJohn Marino /* Reset the build buffer offset. */
515e0b8e63eSJohn Marino lbclen = 0;
516e0b8e63eSJohn Marino
517e0b8e63eSJohn Marino /* Reset empty match flag. */
518e0b8e63eSJohn Marino empty_ok = 1;
519e0b8e63eSJohn Marino
520e0b8e63eSJohn Marino /*
521e0b8e63eSJohn Marino * We don't want to have to do a setline if the line didn't
522e0b8e63eSJohn Marino * change -- keep track of whether or not this line changed.
523e0b8e63eSJohn Marino * If doing confirmations, don't want to keep setting the
524e0b8e63eSJohn Marino * line if change is refused -- keep track of substitutions.
525e0b8e63eSJohn Marino */
526e0b8e63eSJohn Marino didsub = linechanged = 0;
527e0b8e63eSJohn Marino
528e0b8e63eSJohn Marino /* New line, do an EOL match. */
529e0b8e63eSJohn Marino do_eol_match = 1;
530e0b8e63eSJohn Marino
531e0b8e63eSJohn Marino /* It's not nul terminated, but we pretend it is. */
532e0b8e63eSJohn Marino eflags = REG_STARTEND;
533e0b8e63eSJohn Marino
534e0b8e63eSJohn Marino /*
535e0b8e63eSJohn Marino * The search area is from s + offset to the EOL.
536e0b8e63eSJohn Marino *
537e0b8e63eSJohn Marino * Generally, match[0].rm_so is the offset of the start
538e0b8e63eSJohn Marino * of the match from the start of the search, and offset
539e0b8e63eSJohn Marino * is the offset of the start of the last search.
540e0b8e63eSJohn Marino */
541e0b8e63eSJohn Marino nextmatch: match[0].rm_so = 0;
542e0b8e63eSJohn Marino match[0].rm_eo = len;
543e0b8e63eSJohn Marino
544e0b8e63eSJohn Marino /* Get the next match. */
545e0b8e63eSJohn Marino eval = regexec(re, s + offset, 10, match, eflags);
546e0b8e63eSJohn Marino
547e0b8e63eSJohn Marino /*
548e0b8e63eSJohn Marino * There wasn't a match or if there was an error, deal with
549e0b8e63eSJohn Marino * it. If there was a previous match in this line, resolve
550e0b8e63eSJohn Marino * the changes into the database. Otherwise, just move on.
551e0b8e63eSJohn Marino */
552e0b8e63eSJohn Marino if (eval == REG_NOMATCH)
553e0b8e63eSJohn Marino goto endmatch;
554e0b8e63eSJohn Marino if (eval != 0) {
555e0b8e63eSJohn Marino re_error(sp, eval, re);
556e0b8e63eSJohn Marino goto err;
557e0b8e63eSJohn Marino }
558e0b8e63eSJohn Marino matched = 1;
559e0b8e63eSJohn Marino
560e0b8e63eSJohn Marino /* Only the first search can match an anchored expression. */
561e0b8e63eSJohn Marino eflags |= REG_NOTBOL;
562e0b8e63eSJohn Marino
563e0b8e63eSJohn Marino /*
564e0b8e63eSJohn Marino * !!!
565e0b8e63eSJohn Marino * It's possible to match 0-length strings -- for example, the
566e0b8e63eSJohn Marino * command s;a*;X;, when matched against the string "aabb" will
567e0b8e63eSJohn Marino * result in "XbXbX", i.e. the matches are "aa", the space
568e0b8e63eSJohn Marino * between the b's and the space between the b's and the end of
569e0b8e63eSJohn Marino * the string. There is a similar space between the beginning
570e0b8e63eSJohn Marino * of the string and the a's. The rule that we use (because vi
571e0b8e63eSJohn Marino * historically used it) is that any 0-length match, occurring
572e0b8e63eSJohn Marino * immediately after a match, is ignored. Otherwise, the above
573e0b8e63eSJohn Marino * example would have resulted in "XXbXbX". Another example is
574e0b8e63eSJohn Marino * incorrectly using " *" to replace groups of spaces with one
575e0b8e63eSJohn Marino * space.
576e0b8e63eSJohn Marino *
577e0b8e63eSJohn Marino * The way we do this is that if we just had a successful match,
578e0b8e63eSJohn Marino * the starting offset does not skip characters, and the match
579e0b8e63eSJohn Marino * is empty, ignore the match and move forward. If there's no
580e0b8e63eSJohn Marino * more characters in the string, we were attempting to match
581e0b8e63eSJohn Marino * after the last character, so quit.
582e0b8e63eSJohn Marino */
583e0b8e63eSJohn Marino if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
584e0b8e63eSJohn Marino empty_ok = 1;
585e0b8e63eSJohn Marino if (len == 0)
586e0b8e63eSJohn Marino goto endmatch;
587e0b8e63eSJohn Marino BUILD(sp, s + offset, 1)
588e0b8e63eSJohn Marino ++offset;
589e0b8e63eSJohn Marino --len;
590e0b8e63eSJohn Marino goto nextmatch;
591e0b8e63eSJohn Marino }
592e0b8e63eSJohn Marino
593e0b8e63eSJohn Marino /* Confirm change. */
594e0b8e63eSJohn Marino if (sp->c_suffix) {
595e0b8e63eSJohn Marino /*
596e0b8e63eSJohn Marino * Set the cursor position for confirmation. Note,
597e0b8e63eSJohn Marino * if we matched on a '$', the cursor may be past
598e0b8e63eSJohn Marino * the end of line.
599e0b8e63eSJohn Marino */
600e0b8e63eSJohn Marino from.lno = to.lno = lno;
601e0b8e63eSJohn Marino from.cno = match[0].rm_so + offset;
602e0b8e63eSJohn Marino to.cno = match[0].rm_eo + offset;
603e0b8e63eSJohn Marino /*
604e0b8e63eSJohn Marino * Both ex and vi have to correct for a change before
605e0b8e63eSJohn Marino * the first character in the line.
606e0b8e63eSJohn Marino */
607e0b8e63eSJohn Marino if (llen == 0)
608e0b8e63eSJohn Marino from.cno = to.cno = 0;
609e0b8e63eSJohn Marino if (F_ISSET(sp, SC_VI)) {
610e0b8e63eSJohn Marino /*
611e0b8e63eSJohn Marino * Only vi has to correct for a change after
612e0b8e63eSJohn Marino * the last character in the line.
613e0b8e63eSJohn Marino *
614e0b8e63eSJohn Marino * XXX
615e0b8e63eSJohn Marino * It would be nice to change the vi code so
616e0b8e63eSJohn Marino * that we could display a cursor past EOL.
617e0b8e63eSJohn Marino */
618e0b8e63eSJohn Marino if (to.cno >= llen)
619e0b8e63eSJohn Marino to.cno = llen - 1;
620e0b8e63eSJohn Marino if (from.cno >= llen)
621e0b8e63eSJohn Marino from.cno = llen - 1;
622e0b8e63eSJohn Marino
623e0b8e63eSJohn Marino sp->lno = from.lno;
624e0b8e63eSJohn Marino sp->cno = from.cno;
625e0b8e63eSJohn Marino if (vs_refresh(sp, 1))
626e0b8e63eSJohn Marino goto err;
627e0b8e63eSJohn Marino
628e0b8e63eSJohn Marino vs_update(sp, msg_cat(sp,
629e0b8e63eSJohn Marino "169|Confirm change? [n]", NULL), NULL);
630e0b8e63eSJohn Marino
631e0b8e63eSJohn Marino if (v_event_get(sp, &ev, 0, 0))
632e0b8e63eSJohn Marino goto err;
633e0b8e63eSJohn Marino switch (ev.e_event) {
634e0b8e63eSJohn Marino case E_CHARACTER:
635e0b8e63eSJohn Marino break;
636e0b8e63eSJohn Marino case E_EOF:
637e0b8e63eSJohn Marino case E_ERR:
638e0b8e63eSJohn Marino case E_INTERRUPT:
639e0b8e63eSJohn Marino goto lquit;
640e0b8e63eSJohn Marino default:
641e0b8e63eSJohn Marino v_event_err(sp, &ev);
642e0b8e63eSJohn Marino goto lquit;
643e0b8e63eSJohn Marino }
644e0b8e63eSJohn Marino } else {
645e0b8e63eSJohn Marino if (ex_print(sp, cmdp, &from, &to, 0) ||
646e0b8e63eSJohn Marino ex_scprint(sp, &from, &to))
647e0b8e63eSJohn Marino goto lquit;
648e0b8e63eSJohn Marino if (ex_txt(sp, tiq, 0, TXT_CR))
649e0b8e63eSJohn Marino goto err;
650e0b8e63eSJohn Marino ev.e_c = TAILQ_FIRST(tiq)->lb[0];
651e0b8e63eSJohn Marino }
652e0b8e63eSJohn Marino
653e0b8e63eSJohn Marino switch (ev.e_c) {
654e0b8e63eSJohn Marino case CH_YES:
655e0b8e63eSJohn Marino break;
656e0b8e63eSJohn Marino default:
657e0b8e63eSJohn Marino case CH_NO:
658e0b8e63eSJohn Marino didsub = 0;
659e0b8e63eSJohn Marino BUILD(sp, s +offset, match[0].rm_eo);
660e0b8e63eSJohn Marino goto skip;
661e0b8e63eSJohn Marino case CH_QUIT:
662e0b8e63eSJohn Marino /* Set the quit/interrupted flags. */
663e0b8e63eSJohn Marino lquit: quit = 1;
664e0b8e63eSJohn Marino F_SET(sp->gp, G_INTERRUPTED);
665e0b8e63eSJohn Marino
666e0b8e63eSJohn Marino /*
667e0b8e63eSJohn Marino * Resolve any changes, then return to (and
668e0b8e63eSJohn Marino * exit from) the main loop.
669e0b8e63eSJohn Marino */
670e0b8e63eSJohn Marino goto endmatch;
671e0b8e63eSJohn Marino }
672e0b8e63eSJohn Marino }
673e0b8e63eSJohn Marino
674e0b8e63eSJohn Marino /*
675e0b8e63eSJohn Marino * Set the cursor to the last position changed, converting
676e0b8e63eSJohn Marino * from 1-based to 0-based.
677e0b8e63eSJohn Marino */
678e0b8e63eSJohn Marino sp->lno = lno;
679e0b8e63eSJohn Marino sp->cno = match[0].rm_so;
680e0b8e63eSJohn Marino
681e0b8e63eSJohn Marino /* Copy the bytes before the match into the build buffer. */
682e0b8e63eSJohn Marino BUILD(sp, s + offset, match[0].rm_so);
683e0b8e63eSJohn Marino
684e0b8e63eSJohn Marino /* Substitute the matching bytes. */
685e0b8e63eSJohn Marino didsub = 1;
686e0b8e63eSJohn Marino if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match))
687e0b8e63eSJohn Marino goto err;
688e0b8e63eSJohn Marino
689e0b8e63eSJohn Marino /* Set the change flag so we know this line was modified. */
690e0b8e63eSJohn Marino linechanged = 1;
691e0b8e63eSJohn Marino
692e0b8e63eSJohn Marino /* Move past the matched bytes. */
693e0b8e63eSJohn Marino skip: offset += match[0].rm_eo;
694e0b8e63eSJohn Marino len -= match[0].rm_eo;
695e0b8e63eSJohn Marino
696e0b8e63eSJohn Marino /* A match cannot be followed by an empty pattern. */
697e0b8e63eSJohn Marino empty_ok = 0;
698e0b8e63eSJohn Marino
699e0b8e63eSJohn Marino /*
700e0b8e63eSJohn Marino * If doing a global change with confirmation, we have to
701e0b8e63eSJohn Marino * update the screen. The basic idea is to store the line
702e0b8e63eSJohn Marino * so the screen update routines can find it, and restart.
703e0b8e63eSJohn Marino */
704e0b8e63eSJohn Marino if (didsub && sp->c_suffix && sp->g_suffix) {
705e0b8e63eSJohn Marino /*
706e0b8e63eSJohn Marino * The new search offset will be the end of the
707e0b8e63eSJohn Marino * modified line.
708e0b8e63eSJohn Marino */
709e0b8e63eSJohn Marino saved_offset = lbclen;
710e0b8e63eSJohn Marino
711e0b8e63eSJohn Marino /* Copy the rest of the line. */
712e0b8e63eSJohn Marino if (len)
713e0b8e63eSJohn Marino BUILD(sp, s + offset, len)
714e0b8e63eSJohn Marino
715e0b8e63eSJohn Marino /* Set the new offset. */
716e0b8e63eSJohn Marino offset = saved_offset;
717e0b8e63eSJohn Marino
718e0b8e63eSJohn Marino /* Store inserted lines, adjusting the build buffer. */
719e0b8e63eSJohn Marino last = 0;
720e0b8e63eSJohn Marino if (sp->newl_cnt) {
721e0b8e63eSJohn Marino for (cnt = 0;
722e0b8e63eSJohn Marino cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
723e0b8e63eSJohn Marino if (db_insert(sp, lno,
724e0b8e63eSJohn Marino lb + last, sp->newl[cnt] - last))
725e0b8e63eSJohn Marino goto err;
726e0b8e63eSJohn Marino last = sp->newl[cnt] + 1;
727e0b8e63eSJohn Marino ++sp->rptlines[L_ADDED];
728e0b8e63eSJohn Marino }
729e0b8e63eSJohn Marino lbclen -= last;
730e0b8e63eSJohn Marino offset -= last;
731e0b8e63eSJohn Marino sp->newl_cnt = 0;
732e0b8e63eSJohn Marino }
733e0b8e63eSJohn Marino
734e0b8e63eSJohn Marino /* Store and retrieve the line. */
735e0b8e63eSJohn Marino if (db_set(sp, lno, lb + last, lbclen))
736e0b8e63eSJohn Marino goto err;
737e0b8e63eSJohn Marino if (db_get(sp, lno, DBG_FATAL, &s, &llen))
738e0b8e63eSJohn Marino goto err;
739e0b8e63eSJohn Marino ADD_SPACE_RETW(sp, bp, blen, llen)
740e0b8e63eSJohn Marino MEMCPY(bp, s, llen);
741e0b8e63eSJohn Marino s = bp;
742e0b8e63eSJohn Marino len = llen - offset;
743e0b8e63eSJohn Marino
744e0b8e63eSJohn Marino /* Restart the build. */
745e0b8e63eSJohn Marino lbclen = 0;
746e0b8e63eSJohn Marino BUILD(sp, s, offset);
747e0b8e63eSJohn Marino
748e0b8e63eSJohn Marino /*
749e0b8e63eSJohn Marino * If we haven't already done the after-the-string
750e0b8e63eSJohn Marino * match, do one. Set REG_NOTEOL so the '$' pattern
751e0b8e63eSJohn Marino * only matches once.
752e0b8e63eSJohn Marino */
753e0b8e63eSJohn Marino if (!do_eol_match)
754e0b8e63eSJohn Marino goto endmatch;
755e0b8e63eSJohn Marino if (offset == len) {
756e0b8e63eSJohn Marino do_eol_match = 0;
757e0b8e63eSJohn Marino eflags |= REG_NOTEOL;
758e0b8e63eSJohn Marino }
759e0b8e63eSJohn Marino goto nextmatch;
760e0b8e63eSJohn Marino }
761e0b8e63eSJohn Marino
762e0b8e63eSJohn Marino /*
763e0b8e63eSJohn Marino * If it's a global:
764e0b8e63eSJohn Marino *
765e0b8e63eSJohn Marino * If at the end of the string, do a test for the after
766e0b8e63eSJohn Marino * the string match. Set REG_NOTEOL so the '$' pattern
767e0b8e63eSJohn Marino * only matches once.
768e0b8e63eSJohn Marino */
769e0b8e63eSJohn Marino if (sp->g_suffix && do_eol_match) {
770e0b8e63eSJohn Marino if (len == 0) {
771e0b8e63eSJohn Marino do_eol_match = 0;
772e0b8e63eSJohn Marino eflags |= REG_NOTEOL;
773e0b8e63eSJohn Marino }
774e0b8e63eSJohn Marino goto nextmatch;
775e0b8e63eSJohn Marino }
776e0b8e63eSJohn Marino
777e0b8e63eSJohn Marino endmatch: if (!linechanged)
778e0b8e63eSJohn Marino continue;
779e0b8e63eSJohn Marino
780e0b8e63eSJohn Marino /* Copy any remaining bytes into the build buffer. */
781e0b8e63eSJohn Marino if (len)
782e0b8e63eSJohn Marino BUILD(sp, s + offset, len)
783e0b8e63eSJohn Marino
784e0b8e63eSJohn Marino /* Store inserted lines, adjusting the build buffer. */
785e0b8e63eSJohn Marino last = 0;
786e0b8e63eSJohn Marino if (sp->newl_cnt) {
787e0b8e63eSJohn Marino for (cnt = 0;
788e0b8e63eSJohn Marino cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
789e0b8e63eSJohn Marino if (db_insert(sp,
790e0b8e63eSJohn Marino lno, lb + last, sp->newl[cnt] - last))
791e0b8e63eSJohn Marino goto err;
792e0b8e63eSJohn Marino last = sp->newl[cnt] + 1;
793e0b8e63eSJohn Marino ++sp->rptlines[L_ADDED];
794e0b8e63eSJohn Marino }
795e0b8e63eSJohn Marino lbclen -= last;
796e0b8e63eSJohn Marino sp->newl_cnt = 0;
797e0b8e63eSJohn Marino }
798e0b8e63eSJohn Marino
799e0b8e63eSJohn Marino /* Store the changed line. */
800e0b8e63eSJohn Marino if (db_set(sp, lno, lb + last, lbclen))
801e0b8e63eSJohn Marino goto err;
802e0b8e63eSJohn Marino
803e0b8e63eSJohn Marino /* Update changed line counter. */
804e0b8e63eSJohn Marino if (sp->rptlchange != lno) {
805e0b8e63eSJohn Marino sp->rptlchange = lno;
806e0b8e63eSJohn Marino ++sp->rptlines[L_CHANGED];
807e0b8e63eSJohn Marino }
808e0b8e63eSJohn Marino
809e0b8e63eSJohn Marino /*
810e0b8e63eSJohn Marino * !!!
811e0b8e63eSJohn Marino * Display as necessary. Historic practice is to only
812e0b8e63eSJohn Marino * display the last line of a line split into multiple
813e0b8e63eSJohn Marino * lines.
814e0b8e63eSJohn Marino */
815e0b8e63eSJohn Marino if (lflag || nflag || pflag) {
816e0b8e63eSJohn Marino from.lno = to.lno = lno;
817e0b8e63eSJohn Marino from.cno = to.cno = 0;
818e0b8e63eSJohn Marino if (lflag)
819e0b8e63eSJohn Marino (void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
820e0b8e63eSJohn Marino if (nflag)
821e0b8e63eSJohn Marino (void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
822e0b8e63eSJohn Marino if (pflag)
823e0b8e63eSJohn Marino (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
824e0b8e63eSJohn Marino }
825e0b8e63eSJohn Marino }
826e0b8e63eSJohn Marino
827e0b8e63eSJohn Marino /*
828e0b8e63eSJohn Marino * !!!
829e0b8e63eSJohn Marino * Historically, vi attempted to leave the cursor at the same place if
830e0b8e63eSJohn Marino * the substitution was done at the current cursor position. Otherwise
831e0b8e63eSJohn Marino * it moved it to the first non-blank of the last line changed. There
832e0b8e63eSJohn Marino * were some problems: for example, :s/$/foo/ with the cursor on the
833e0b8e63eSJohn Marino * last character of the line left the cursor on the last character, or
834e0b8e63eSJohn Marino * the & command with multiple occurrences of the matching string in the
835e0b8e63eSJohn Marino * line usually left the cursor in a fairly random position.
836e0b8e63eSJohn Marino *
837e0b8e63eSJohn Marino * We try to do the same thing, with the exception that if the user is
838e0b8e63eSJohn Marino * doing substitution with confirmation, we move to the last line about
839e0b8e63eSJohn Marino * which the user was consulted, as opposed to the last line that they
840e0b8e63eSJohn Marino * actually changed. This prevents a screen flash if the user doesn't
841e0b8e63eSJohn Marino * change many of the possible lines.
842e0b8e63eSJohn Marino */
843e0b8e63eSJohn Marino if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
844e0b8e63eSJohn Marino sp->cno = 0;
845e0b8e63eSJohn Marino (void)nonblank(sp, sp->lno, &sp->cno);
846e0b8e63eSJohn Marino }
847e0b8e63eSJohn Marino
848e0b8e63eSJohn Marino /*
849e0b8e63eSJohn Marino * If not in a global command, and nothing matched, say so.
850e0b8e63eSJohn Marino * Else, if none of the lines displayed, put something up.
851e0b8e63eSJohn Marino */
852e0b8e63eSJohn Marino rval = 0;
853e0b8e63eSJohn Marino if (!matched) {
854e0b8e63eSJohn Marino if (!F_ISSET(sp, SC_EX_GLOBAL)) {
855e0b8e63eSJohn Marino msgq(sp, M_ERR, "157|No match found");
856e0b8e63eSJohn Marino goto err;
857e0b8e63eSJohn Marino }
858e0b8e63eSJohn Marino } else if (!lflag && !nflag && !pflag)
859e0b8e63eSJohn Marino F_SET(cmdp, E_AUTOPRINT);
860e0b8e63eSJohn Marino
861e0b8e63eSJohn Marino if (0) {
862e0b8e63eSJohn Marino err: rval = 1;
863e0b8e63eSJohn Marino }
864e0b8e63eSJohn Marino
865e0b8e63eSJohn Marino if (bp != NULL)
866e0b8e63eSJohn Marino FREE_SPACEW(sp, bp, blen);
867e0b8e63eSJohn Marino free(lb);
868e0b8e63eSJohn Marino return (rval);
869e0b8e63eSJohn Marino }
870e0b8e63eSJohn Marino
871e0b8e63eSJohn Marino /*
872e0b8e63eSJohn Marino * re_compile --
873e0b8e63eSJohn Marino * Compile the RE.
874e0b8e63eSJohn Marino *
875e0b8e63eSJohn Marino * PUBLIC: int re_compile(SCR *,
876e0b8e63eSJohn Marino * PUBLIC: CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int);
877e0b8e63eSJohn Marino */
878e0b8e63eSJohn Marino int
re_compile(SCR * sp,CHAR_T * ptrn,size_t plen,CHAR_T ** ptrnp,size_t * lenp,regex_t * rep,u_int flags)879e0b8e63eSJohn Marino re_compile(SCR *sp, CHAR_T *ptrn, size_t plen, CHAR_T **ptrnp, size_t *lenp, regex_t *rep, u_int flags)
880e0b8e63eSJohn Marino {
881e0b8e63eSJohn Marino size_t len;
882e0b8e63eSJohn Marino int reflags, replaced, rval;
883e0b8e63eSJohn Marino CHAR_T *p;
884e0b8e63eSJohn Marino
885e0b8e63eSJohn Marino /* Set RE flags. */
886e0b8e63eSJohn Marino reflags = 0;
887e0b8e63eSJohn Marino if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) {
888e0b8e63eSJohn Marino if (O_ISSET(sp, O_EXTENDED))
889e0b8e63eSJohn Marino reflags |= REG_EXTENDED;
890e0b8e63eSJohn Marino if (O_ISSET(sp, O_IGNORECASE))
891e0b8e63eSJohn Marino reflags |= REG_ICASE;
892e0b8e63eSJohn Marino if (O_ISSET(sp, O_ICLOWER)) {
893e0b8e63eSJohn Marino for (p = ptrn, len = plen; len > 0; ++p, --len)
894e0b8e63eSJohn Marino if (ISUPPER(*p))
895e0b8e63eSJohn Marino break;
896e0b8e63eSJohn Marino if (len == 0)
897e0b8e63eSJohn Marino reflags |= REG_ICASE;
898e0b8e63eSJohn Marino }
899e0b8e63eSJohn Marino }
900e0b8e63eSJohn Marino
901e0b8e63eSJohn Marino /* If we're replacing a saved value, clear the old one. */
902e0b8e63eSJohn Marino if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
903e0b8e63eSJohn Marino regfree(&sp->re_c);
904e0b8e63eSJohn Marino F_CLR(sp, SC_RE_SEARCH);
905e0b8e63eSJohn Marino }
906e0b8e63eSJohn Marino if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) {
907e0b8e63eSJohn Marino regfree(&sp->subre_c);
908e0b8e63eSJohn Marino F_CLR(sp, SC_RE_SUBST);
909e0b8e63eSJohn Marino }
910e0b8e63eSJohn Marino
911e0b8e63eSJohn Marino /*
912e0b8e63eSJohn Marino * If we're saving the string, it's a pattern we haven't seen before,
913e0b8e63eSJohn Marino * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for
914e0b8e63eSJohn Marino * later recompilation. Free any previously saved value.
915e0b8e63eSJohn Marino */
916e0b8e63eSJohn Marino if (ptrnp != NULL) {
917e0b8e63eSJohn Marino replaced = 0;
918e0b8e63eSJohn Marino if (LF_ISSET(RE_C_CSCOPE)) {
919e0b8e63eSJohn Marino if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
920e0b8e63eSJohn Marino return (1);
921e0b8e63eSJohn Marino /*
922e0b8e63eSJohn Marino * XXX
923e0b8e63eSJohn Marino * Currently, the match-any-<blank> expression used in
924e0b8e63eSJohn Marino * re_cscope_conv() requires extended RE's. This may
925e0b8e63eSJohn Marino * not be right or safe.
926e0b8e63eSJohn Marino */
927e0b8e63eSJohn Marino reflags |= REG_EXTENDED;
928e0b8e63eSJohn Marino } else if (LF_ISSET(RE_C_TAG)) {
929e0b8e63eSJohn Marino if (re_tag_conv(sp, &ptrn, &plen, &replaced))
930e0b8e63eSJohn Marino return (1);
931e0b8e63eSJohn Marino } else
932e0b8e63eSJohn Marino if (re_conv(sp, &ptrn, &plen, &replaced))
933e0b8e63eSJohn Marino return (1);
934e0b8e63eSJohn Marino
935e0b8e63eSJohn Marino /* Discard previous pattern. */
936e0b8e63eSJohn Marino free(*ptrnp);
937e0b8e63eSJohn Marino *ptrnp = NULL;
938*b1ac2ebbSDaniel Fojt
939e0b8e63eSJohn Marino if (lenp != NULL)
940e0b8e63eSJohn Marino *lenp = plen;
941e0b8e63eSJohn Marino
942e0b8e63eSJohn Marino /*
943e0b8e63eSJohn Marino * Copy the string into allocated memory.
944e0b8e63eSJohn Marino *
945e0b8e63eSJohn Marino * XXX
946e0b8e63eSJohn Marino * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
947e0b8e63eSJohn Marino * for now. There's just no other solution.
948e0b8e63eSJohn Marino */
949*b1ac2ebbSDaniel Fojt MALLOC(sp, *ptrnp, (plen + 1) * sizeof(CHAR_T));
950e0b8e63eSJohn Marino if (*ptrnp != NULL) {
951e0b8e63eSJohn Marino MEMCPY(*ptrnp, ptrn, plen);
952e0b8e63eSJohn Marino (*ptrnp)[plen] = '\0';
953e0b8e63eSJohn Marino }
954e0b8e63eSJohn Marino
955e0b8e63eSJohn Marino /* Free up conversion-routine-allocated memory. */
956e0b8e63eSJohn Marino if (replaced)
957e0b8e63eSJohn Marino FREE_SPACEW(sp, ptrn, 0);
958e0b8e63eSJohn Marino
959e0b8e63eSJohn Marino if (*ptrnp == NULL)
960e0b8e63eSJohn Marino return (1);
961e0b8e63eSJohn Marino
962e0b8e63eSJohn Marino ptrn = *ptrnp;
963e0b8e63eSJohn Marino }
964e0b8e63eSJohn Marino
965e0b8e63eSJohn Marino /*
966e0b8e63eSJohn Marino * XXX
967e0b8e63eSJohn Marino * Regcomp isn't 8-bit clean, so we just lost if the pattern
968e0b8e63eSJohn Marino * contained a nul. Bummer!
969e0b8e63eSJohn Marino */
970e0b8e63eSJohn Marino if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
971e0b8e63eSJohn Marino if (!LF_ISSET(RE_C_SILENT))
972e0b8e63eSJohn Marino re_error(sp, rval, rep);
973e0b8e63eSJohn Marino return (1);
974e0b8e63eSJohn Marino }
975e0b8e63eSJohn Marino
976e0b8e63eSJohn Marino if (LF_ISSET(RE_C_SEARCH))
977e0b8e63eSJohn Marino F_SET(sp, SC_RE_SEARCH);
978e0b8e63eSJohn Marino if (LF_ISSET(RE_C_SUBST))
979e0b8e63eSJohn Marino F_SET(sp, SC_RE_SUBST);
980e0b8e63eSJohn Marino
981e0b8e63eSJohn Marino return (0);
982e0b8e63eSJohn Marino }
983e0b8e63eSJohn Marino
984e0b8e63eSJohn Marino /*
985e0b8e63eSJohn Marino * re_conv --
986e0b8e63eSJohn Marino * Convert vi's regular expressions into something that the
987e0b8e63eSJohn Marino * the POSIX 1003.2 RE functions can handle.
988e0b8e63eSJohn Marino *
989e0b8e63eSJohn Marino * There are three conversions we make to make vi's RE's (specifically
990e0b8e63eSJohn Marino * the global, search, and substitute patterns) work with POSIX RE's.
991e0b8e63eSJohn Marino *
992e0b8e63eSJohn Marino * 1: If O_MAGIC is not set, strip backslashes from the magic character
993e0b8e63eSJohn Marino * set (.[*~) that have them, and add them to the ones that don't.
994e0b8e63eSJohn Marino * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
995e0b8e63eSJohn Marino * from the last substitute command's replacement string. If O_MAGIC
996e0b8e63eSJohn Marino * is set, it's the string "~".
997e0b8e63eSJohn Marino * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
998e0b8e63eSJohn Marino * new RE escapes.
999e0b8e63eSJohn Marino *
1000e0b8e63eSJohn Marino * !!!/XXX
1001e0b8e63eSJohn Marino * This doesn't exactly match the historic behavior of vi because we do
1002e0b8e63eSJohn Marino * the ~ substitution before calling the RE engine, so magic characters
1003e0b8e63eSJohn Marino * in the replacement string will be expanded by the RE engine, and they
1004e0b8e63eSJohn Marino * weren't historically. It's a bug.
1005e0b8e63eSJohn Marino */
1006e0b8e63eSJohn Marino static int
re_conv(SCR * sp,CHAR_T ** ptrnp,size_t * plenp,int * replacedp)1007e0b8e63eSJohn Marino re_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1008e0b8e63eSJohn Marino {
1009e0b8e63eSJohn Marino size_t blen, len, needlen;
1010e0b8e63eSJohn Marino int magic;
1011e0b8e63eSJohn Marino CHAR_T *bp, *p, *t;
1012e0b8e63eSJohn Marino
1013e0b8e63eSJohn Marino /*
1014e0b8e63eSJohn Marino * First pass through, we figure out how much space we'll need.
1015e0b8e63eSJohn Marino * We do it in two passes, on the grounds that most of the time
1016e0b8e63eSJohn Marino * the user is doing a search and won't have magic characters.
1017e0b8e63eSJohn Marino * That way we can skip most of the memory allocation and copies.
1018e0b8e63eSJohn Marino */
1019e0b8e63eSJohn Marino magic = 0;
1020e0b8e63eSJohn Marino for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1021e0b8e63eSJohn Marino switch (*p) {
1022e0b8e63eSJohn Marino case '\\':
1023e0b8e63eSJohn Marino if (len > 1) {
1024e0b8e63eSJohn Marino --len;
1025e0b8e63eSJohn Marino switch (*++p) {
1026e0b8e63eSJohn Marino case '<':
1027e0b8e63eSJohn Marino magic = 1;
1028e0b8e63eSJohn Marino needlen += RE_WSTART_LEN + 1;
1029e0b8e63eSJohn Marino break;
1030e0b8e63eSJohn Marino case '>':
1031e0b8e63eSJohn Marino magic = 1;
1032e0b8e63eSJohn Marino needlen += RE_WSTOP_LEN + 1;
1033e0b8e63eSJohn Marino break;
1034e0b8e63eSJohn Marino case '~':
1035e0b8e63eSJohn Marino if (!O_ISSET(sp, O_MAGIC)) {
1036e0b8e63eSJohn Marino magic = 1;
1037e0b8e63eSJohn Marino needlen += sp->repl_len;
1038e0b8e63eSJohn Marino }
1039e0b8e63eSJohn Marino break;
1040e0b8e63eSJohn Marino case '.':
1041e0b8e63eSJohn Marino case '[':
1042e0b8e63eSJohn Marino case '*':
1043e0b8e63eSJohn Marino if (!O_ISSET(sp, O_MAGIC)) {
1044e0b8e63eSJohn Marino magic = 1;
1045e0b8e63eSJohn Marino needlen += 1;
1046e0b8e63eSJohn Marino }
1047e0b8e63eSJohn Marino break;
1048e0b8e63eSJohn Marino default:
1049e0b8e63eSJohn Marino needlen += 2;
1050e0b8e63eSJohn Marino }
1051e0b8e63eSJohn Marino } else
1052e0b8e63eSJohn Marino needlen += 1;
1053e0b8e63eSJohn Marino break;
1054e0b8e63eSJohn Marino case '~':
1055e0b8e63eSJohn Marino if (O_ISSET(sp, O_MAGIC)) {
1056e0b8e63eSJohn Marino magic = 1;
1057e0b8e63eSJohn Marino needlen += sp->repl_len;
1058e0b8e63eSJohn Marino }
1059e0b8e63eSJohn Marino break;
1060e0b8e63eSJohn Marino case '.':
1061e0b8e63eSJohn Marino case '[':
1062e0b8e63eSJohn Marino case '*':
1063e0b8e63eSJohn Marino if (!O_ISSET(sp, O_MAGIC)) {
1064e0b8e63eSJohn Marino magic = 1;
1065e0b8e63eSJohn Marino needlen += 2;
1066e0b8e63eSJohn Marino }
1067e0b8e63eSJohn Marino break;
1068e0b8e63eSJohn Marino default:
1069e0b8e63eSJohn Marino needlen += 1;
1070e0b8e63eSJohn Marino break;
1071e0b8e63eSJohn Marino }
1072e0b8e63eSJohn Marino
1073e0b8e63eSJohn Marino if (!magic) {
1074e0b8e63eSJohn Marino *replacedp = 0;
1075e0b8e63eSJohn Marino return (0);
1076e0b8e63eSJohn Marino }
1077e0b8e63eSJohn Marino
1078e0b8e63eSJohn Marino /* Get enough memory to hold the final pattern. */
1079e0b8e63eSJohn Marino *replacedp = 1;
1080e0b8e63eSJohn Marino GET_SPACE_RETW(sp, bp, blen, needlen);
1081e0b8e63eSJohn Marino
1082e0b8e63eSJohn Marino for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
1083e0b8e63eSJohn Marino switch (*p) {
1084e0b8e63eSJohn Marino case '\\':
1085e0b8e63eSJohn Marino if (len > 1) {
1086e0b8e63eSJohn Marino --len;
1087e0b8e63eSJohn Marino switch (*++p) {
1088e0b8e63eSJohn Marino case '<':
1089e0b8e63eSJohn Marino MEMCPY(t,
1090e0b8e63eSJohn Marino RE_WSTART, RE_WSTART_LEN);
1091e0b8e63eSJohn Marino t += RE_WSTART_LEN;
1092e0b8e63eSJohn Marino break;
1093e0b8e63eSJohn Marino case '>':
1094e0b8e63eSJohn Marino MEMCPY(t,
1095e0b8e63eSJohn Marino RE_WSTOP, RE_WSTOP_LEN);
1096e0b8e63eSJohn Marino t += RE_WSTOP_LEN;
1097e0b8e63eSJohn Marino break;
1098e0b8e63eSJohn Marino case '~':
1099e0b8e63eSJohn Marino if (O_ISSET(sp, O_MAGIC))
1100e0b8e63eSJohn Marino *t++ = '~';
1101e0b8e63eSJohn Marino else {
1102e0b8e63eSJohn Marino MEMCPY(t,
1103e0b8e63eSJohn Marino sp->repl, sp->repl_len);
1104e0b8e63eSJohn Marino t += sp->repl_len;
1105e0b8e63eSJohn Marino }
1106e0b8e63eSJohn Marino break;
1107e0b8e63eSJohn Marino case '.':
1108e0b8e63eSJohn Marino case '[':
1109e0b8e63eSJohn Marino case '*':
1110e0b8e63eSJohn Marino if (O_ISSET(sp, O_MAGIC))
1111e0b8e63eSJohn Marino *t++ = '\\';
1112e0b8e63eSJohn Marino *t++ = *p;
1113e0b8e63eSJohn Marino break;
1114e0b8e63eSJohn Marino default:
1115e0b8e63eSJohn Marino *t++ = '\\';
1116e0b8e63eSJohn Marino *t++ = *p;
1117e0b8e63eSJohn Marino }
1118e0b8e63eSJohn Marino } else
1119e0b8e63eSJohn Marino *t++ = '\\';
1120e0b8e63eSJohn Marino break;
1121e0b8e63eSJohn Marino case '~':
1122e0b8e63eSJohn Marino if (O_ISSET(sp, O_MAGIC)) {
1123e0b8e63eSJohn Marino MEMCPY(t, sp->repl, sp->repl_len);
1124e0b8e63eSJohn Marino t += sp->repl_len;
1125e0b8e63eSJohn Marino } else
1126e0b8e63eSJohn Marino *t++ = '~';
1127e0b8e63eSJohn Marino break;
1128e0b8e63eSJohn Marino case '.':
1129e0b8e63eSJohn Marino case '[':
1130e0b8e63eSJohn Marino case '*':
1131e0b8e63eSJohn Marino if (!O_ISSET(sp, O_MAGIC))
1132e0b8e63eSJohn Marino *t++ = '\\';
1133e0b8e63eSJohn Marino *t++ = *p;
1134e0b8e63eSJohn Marino break;
1135e0b8e63eSJohn Marino default:
1136e0b8e63eSJohn Marino *t++ = *p;
1137e0b8e63eSJohn Marino break;
1138e0b8e63eSJohn Marino }
1139e0b8e63eSJohn Marino
1140e0b8e63eSJohn Marino *ptrnp = bp;
1141e0b8e63eSJohn Marino *plenp = t - bp;
1142e0b8e63eSJohn Marino return (0);
1143e0b8e63eSJohn Marino }
1144e0b8e63eSJohn Marino
1145e0b8e63eSJohn Marino /*
1146e0b8e63eSJohn Marino * re_tag_conv --
1147e0b8e63eSJohn Marino * Convert a tags search path into something that the POSIX
1148e0b8e63eSJohn Marino * 1003.2 RE functions can handle.
1149e0b8e63eSJohn Marino */
1150e0b8e63eSJohn Marino static int
re_tag_conv(SCR * sp,CHAR_T ** ptrnp,size_t * plenp,int * replacedp)1151e0b8e63eSJohn Marino re_tag_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1152e0b8e63eSJohn Marino {
1153e0b8e63eSJohn Marino size_t blen, len;
1154e0b8e63eSJohn Marino int lastdollar;
1155e0b8e63eSJohn Marino CHAR_T *bp, *p, *t;
1156e0b8e63eSJohn Marino
1157e0b8e63eSJohn Marino len = *plenp;
1158e0b8e63eSJohn Marino
1159e0b8e63eSJohn Marino /* Max memory usage is 2 times the length of the string. */
1160e0b8e63eSJohn Marino *replacedp = 1;
1161e0b8e63eSJohn Marino GET_SPACE_RETW(sp, bp, blen, len * 2);
1162e0b8e63eSJohn Marino
1163e0b8e63eSJohn Marino p = *ptrnp;
1164e0b8e63eSJohn Marino t = bp;
1165e0b8e63eSJohn Marino
1166e0b8e63eSJohn Marino /* If the last character is a '/' or '?', we just strip it. */
1167e0b8e63eSJohn Marino if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1168e0b8e63eSJohn Marino --len;
1169e0b8e63eSJohn Marino
1170e0b8e63eSJohn Marino /* If the next-to-last or last character is a '$', it's magic. */
1171e0b8e63eSJohn Marino if (len > 0 && p[len - 1] == '$') {
1172e0b8e63eSJohn Marino --len;
1173e0b8e63eSJohn Marino lastdollar = 1;
1174e0b8e63eSJohn Marino } else
1175e0b8e63eSJohn Marino lastdollar = 0;
1176e0b8e63eSJohn Marino
1177e0b8e63eSJohn Marino /* If the first character is a '/' or '?', we just strip it. */
1178e0b8e63eSJohn Marino if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1179e0b8e63eSJohn Marino ++p;
1180e0b8e63eSJohn Marino --len;
1181e0b8e63eSJohn Marino }
1182e0b8e63eSJohn Marino
1183e0b8e63eSJohn Marino /* If the first or second character is a '^', it's magic. */
1184e0b8e63eSJohn Marino if (p[0] == '^') {
1185e0b8e63eSJohn Marino *t++ = *p++;
1186e0b8e63eSJohn Marino --len;
1187e0b8e63eSJohn Marino }
1188e0b8e63eSJohn Marino
1189e0b8e63eSJohn Marino /*
1190e0b8e63eSJohn Marino * Escape every other magic character we can find, meanwhile stripping
1191e0b8e63eSJohn Marino * the backslashes ctags inserts when escaping the search delimiter
1192e0b8e63eSJohn Marino * characters.
1193e0b8e63eSJohn Marino */
1194e0b8e63eSJohn Marino for (; len > 0; --len) {
1195e0b8e63eSJohn Marino if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1196e0b8e63eSJohn Marino ++p;
1197e0b8e63eSJohn Marino --len;
1198e0b8e63eSJohn Marino } else if (STRCHR(L("^.[]$*"), p[0]))
1199e0b8e63eSJohn Marino *t++ = '\\';
1200e0b8e63eSJohn Marino *t++ = *p++;
1201e0b8e63eSJohn Marino }
1202e0b8e63eSJohn Marino if (lastdollar)
1203e0b8e63eSJohn Marino *t++ = '$';
1204e0b8e63eSJohn Marino
1205e0b8e63eSJohn Marino *ptrnp = bp;
1206e0b8e63eSJohn Marino *plenp = t - bp;
1207e0b8e63eSJohn Marino return (0);
1208e0b8e63eSJohn Marino }
1209e0b8e63eSJohn Marino
1210e0b8e63eSJohn Marino /*
1211e0b8e63eSJohn Marino * re_cscope_conv --
1212e0b8e63eSJohn Marino * Convert a cscope search path into something that the POSIX
1213e0b8e63eSJohn Marino * 1003.2 RE functions can handle.
1214e0b8e63eSJohn Marino */
1215e0b8e63eSJohn Marino static int
re_cscope_conv(SCR * sp,CHAR_T ** ptrnp,size_t * plenp,int * replacedp)1216e0b8e63eSJohn Marino re_cscope_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1217e0b8e63eSJohn Marino {
1218e0b8e63eSJohn Marino size_t blen, len, nspaces;
1219e0b8e63eSJohn Marino CHAR_T *bp, *t;
1220e0b8e63eSJohn Marino CHAR_T *p;
1221e0b8e63eSJohn Marino CHAR_T *wp;
1222e0b8e63eSJohn Marino size_t wlen;
1223e0b8e63eSJohn Marino
1224e0b8e63eSJohn Marino /*
1225e0b8e63eSJohn Marino * Each space in the source line printed by cscope represents an
1226e0b8e63eSJohn Marino * arbitrary sequence of spaces, tabs, and comments.
1227e0b8e63eSJohn Marino */
1228e0b8e63eSJohn Marino #define CSCOPE_RE_SPACE "([ \t]|/\\*([^*]|\\*/)*\\*/)*"
1229e0b8e63eSJohn Marino #define CSCOPE_LEN sizeof(CSCOPE_RE_SPACE) - 1
1230e0b8e63eSJohn Marino CHAR2INT(sp, CSCOPE_RE_SPACE, CSCOPE_LEN, wp, wlen);
1231e0b8e63eSJohn Marino for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
1232e0b8e63eSJohn Marino if (*p == ' ')
1233e0b8e63eSJohn Marino ++nspaces;
1234e0b8e63eSJohn Marino
1235e0b8e63eSJohn Marino /*
1236e0b8e63eSJohn Marino * Allocate plenty of space:
1237e0b8e63eSJohn Marino * the string, plus potential escaping characters;
1238e0b8e63eSJohn Marino * nspaces + 2 copies of CSCOPE_RE_SPACE;
1239e0b8e63eSJohn Marino * ^, $, nul terminator characters.
1240e0b8e63eSJohn Marino */
1241e0b8e63eSJohn Marino *replacedp = 1;
1242e0b8e63eSJohn Marino len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
1243e0b8e63eSJohn Marino GET_SPACE_RETW(sp, bp, blen, len);
1244e0b8e63eSJohn Marino
1245e0b8e63eSJohn Marino p = *ptrnp;
1246e0b8e63eSJohn Marino t = bp;
1247e0b8e63eSJohn Marino
1248e0b8e63eSJohn Marino *t++ = '^';
1249e0b8e63eSJohn Marino MEMCPY(t, wp, wlen);
1250e0b8e63eSJohn Marino t += wlen;
1251e0b8e63eSJohn Marino
1252e0b8e63eSJohn Marino for (len = *plenp; len > 0; ++p, --len)
1253e0b8e63eSJohn Marino if (*p == ' ') {
1254e0b8e63eSJohn Marino MEMCPY(t, wp, wlen);
1255e0b8e63eSJohn Marino t += wlen;
1256e0b8e63eSJohn Marino } else {
1257e0b8e63eSJohn Marino if (STRCHR(L("\\^.[]$*+?()|{}"), *p))
1258e0b8e63eSJohn Marino *t++ = '\\';
1259e0b8e63eSJohn Marino *t++ = *p;
1260e0b8e63eSJohn Marino }
1261e0b8e63eSJohn Marino
1262e0b8e63eSJohn Marino MEMCPY(t, wp, wlen);
1263e0b8e63eSJohn Marino t += wlen;
1264e0b8e63eSJohn Marino *t++ = '$';
1265e0b8e63eSJohn Marino
1266e0b8e63eSJohn Marino *ptrnp = bp;
1267e0b8e63eSJohn Marino *plenp = t - bp;
1268e0b8e63eSJohn Marino return (0);
1269e0b8e63eSJohn Marino }
1270e0b8e63eSJohn Marino
1271e0b8e63eSJohn Marino /*
1272e0b8e63eSJohn Marino * re_error --
1273e0b8e63eSJohn Marino * Report a regular expression error.
1274e0b8e63eSJohn Marino *
1275e0b8e63eSJohn Marino * PUBLIC: void re_error(SCR *, int, regex_t *);
1276e0b8e63eSJohn Marino */
1277e0b8e63eSJohn Marino void
re_error(SCR * sp,int errcode,regex_t * preg)1278e0b8e63eSJohn Marino re_error(SCR *sp, int errcode, regex_t *preg)
1279e0b8e63eSJohn Marino {
1280e0b8e63eSJohn Marino size_t s;
1281e0b8e63eSJohn Marino char *oe;
1282e0b8e63eSJohn Marino
1283e0b8e63eSJohn Marino s = regerror(errcode, preg, "", 0);
1284*b1ac2ebbSDaniel Fojt MALLOC(sp, oe, s);
1285e0b8e63eSJohn Marino if (oe != NULL) {
1286e0b8e63eSJohn Marino (void)regerror(errcode, preg, oe, s);
1287e0b8e63eSJohn Marino msgq(sp, M_ERR, "RE error: %s", oe);
1288e0b8e63eSJohn Marino free(oe);
1289e0b8e63eSJohn Marino }
1290e0b8e63eSJohn Marino }
1291e0b8e63eSJohn Marino
1292e0b8e63eSJohn Marino /*
1293e0b8e63eSJohn Marino * re_sub --
1294e0b8e63eSJohn Marino * Do the substitution for a regular expression.
1295e0b8e63eSJohn Marino */
1296e0b8e63eSJohn Marino static int
re_sub(SCR * sp,CHAR_T * ip,CHAR_T ** lbp,size_t * lbclenp,size_t * lblenp,regmatch_t match[10])1297e0b8e63eSJohn Marino re_sub(
1298e0b8e63eSJohn Marino SCR *sp,
1299e0b8e63eSJohn Marino CHAR_T *ip, /* Input line. */
1300e0b8e63eSJohn Marino CHAR_T **lbp,
1301e0b8e63eSJohn Marino size_t *lbclenp,
1302e0b8e63eSJohn Marino size_t *lblenp,
1303e0b8e63eSJohn Marino regmatch_t match[10])
1304e0b8e63eSJohn Marino {
1305e0b8e63eSJohn Marino enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
1306e0b8e63eSJohn Marino size_t lbclen, lblen; /* Local copies. */
1307e0b8e63eSJohn Marino size_t mlen; /* Match length. */
1308e0b8e63eSJohn Marino size_t rpl; /* Remaining replacement length. */
1309e0b8e63eSJohn Marino CHAR_T *rp; /* Replacement pointer. */
1310e0b8e63eSJohn Marino int ch;
1311e0b8e63eSJohn Marino int no; /* Match replacement offset. */
1312e0b8e63eSJohn Marino CHAR_T *p, *t; /* Buffer pointers. */
1313e0b8e63eSJohn Marino CHAR_T *lb; /* Local copies. */
1314e0b8e63eSJohn Marino
1315e0b8e63eSJohn Marino lb = *lbp; /* Get local copies. */
1316e0b8e63eSJohn Marino lbclen = *lbclenp;
1317e0b8e63eSJohn Marino lblen = *lblenp;
1318e0b8e63eSJohn Marino
1319e0b8e63eSJohn Marino /*
1320e0b8e63eSJohn Marino * QUOTING NOTE:
1321e0b8e63eSJohn Marino *
1322e0b8e63eSJohn Marino * There are some special sequences that vi provides in the
1323e0b8e63eSJohn Marino * replacement patterns.
1324e0b8e63eSJohn Marino * & string the RE matched (\& if nomagic set)
1325e0b8e63eSJohn Marino * \# n-th regular subexpression
1326e0b8e63eSJohn Marino * \E end \U, \L conversion
1327e0b8e63eSJohn Marino * \e end \U, \L conversion
1328e0b8e63eSJohn Marino * \l convert the next character to lower-case
1329e0b8e63eSJohn Marino * \L convert to lower-case, until \E, \e, or end of replacement
1330e0b8e63eSJohn Marino * \u convert the next character to upper-case
1331e0b8e63eSJohn Marino * \U convert to upper-case, until \E, \e, or end of replacement
1332e0b8e63eSJohn Marino *
1333e0b8e63eSJohn Marino * Otherwise, since this is the lowest level of replacement, discard
1334e0b8e63eSJohn Marino * all escaping characters. This (hopefully) matches historic practice.
1335e0b8e63eSJohn Marino */
1336e0b8e63eSJohn Marino #define OUTCH(ch, nltrans) { \
1337e0b8e63eSJohn Marino ARG_CHAR_T __ch = (ch); \
1338e0b8e63eSJohn Marino e_key_t __value = KEY_VAL(sp, __ch); \
1339e0b8e63eSJohn Marino if (nltrans && (__value == K_CR || __value == K_NL)) { \
1340e0b8e63eSJohn Marino NEEDNEWLINE(sp); \
1341e0b8e63eSJohn Marino sp->newl[sp->newl_cnt++] = lbclen; \
1342e0b8e63eSJohn Marino } else if (conv != C_NOTSET) { \
1343e0b8e63eSJohn Marino switch (conv) { \
1344e0b8e63eSJohn Marino case C_ONELOWER: \
1345e0b8e63eSJohn Marino conv = C_NOTSET; \
1346e0b8e63eSJohn Marino /* FALLTHROUGH */ \
1347e0b8e63eSJohn Marino case C_LOWER: \
1348e0b8e63eSJohn Marino if (ISUPPER(__ch)) \
1349e0b8e63eSJohn Marino __ch = TOLOWER(__ch); \
1350e0b8e63eSJohn Marino break; \
1351e0b8e63eSJohn Marino case C_ONEUPPER: \
1352e0b8e63eSJohn Marino conv = C_NOTSET; \
1353e0b8e63eSJohn Marino /* FALLTHROUGH */ \
1354e0b8e63eSJohn Marino case C_UPPER: \
1355e0b8e63eSJohn Marino if (ISLOWER(__ch)) \
1356e0b8e63eSJohn Marino __ch = TOUPPER(__ch); \
1357e0b8e63eSJohn Marino break; \
1358e0b8e63eSJohn Marino default: \
1359e0b8e63eSJohn Marino abort(); \
1360e0b8e63eSJohn Marino } \
1361e0b8e63eSJohn Marino } \
1362e0b8e63eSJohn Marino NEEDSP(sp, 1, p); \
1363e0b8e63eSJohn Marino *p++ = __ch; \
1364e0b8e63eSJohn Marino ++lbclen; \
1365e0b8e63eSJohn Marino }
1366e0b8e63eSJohn Marino conv = C_NOTSET;
1367e0b8e63eSJohn Marino for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1368e0b8e63eSJohn Marino switch (ch = *rp++) {
1369e0b8e63eSJohn Marino case '&':
1370e0b8e63eSJohn Marino if (O_ISSET(sp, O_MAGIC)) {
1371e0b8e63eSJohn Marino no = 0;
1372e0b8e63eSJohn Marino goto subzero;
1373e0b8e63eSJohn Marino }
1374e0b8e63eSJohn Marino break;
1375e0b8e63eSJohn Marino case '\\':
1376e0b8e63eSJohn Marino if (rpl == 0)
1377e0b8e63eSJohn Marino break;
1378e0b8e63eSJohn Marino --rpl;
1379e0b8e63eSJohn Marino switch (ch = *rp) {
1380e0b8e63eSJohn Marino case '&':
1381e0b8e63eSJohn Marino ++rp;
1382e0b8e63eSJohn Marino if (!O_ISSET(sp, O_MAGIC)) {
1383e0b8e63eSJohn Marino no = 0;
1384e0b8e63eSJohn Marino goto subzero;
1385e0b8e63eSJohn Marino }
1386e0b8e63eSJohn Marino break;
1387e0b8e63eSJohn Marino case '0': case '1': case '2': case '3': case '4':
1388e0b8e63eSJohn Marino case '5': case '6': case '7': case '8': case '9':
1389e0b8e63eSJohn Marino no = *rp++ - '0';
1390e0b8e63eSJohn Marino subzero: if (match[no].rm_so == -1 ||
1391e0b8e63eSJohn Marino match[no].rm_eo == -1)
1392e0b8e63eSJohn Marino break;
1393e0b8e63eSJohn Marino mlen = match[no].rm_eo - match[no].rm_so;
1394e0b8e63eSJohn Marino for (t = ip + match[no].rm_so; mlen--; ++t)
1395e0b8e63eSJohn Marino OUTCH(*t, 0);
1396e0b8e63eSJohn Marino continue;
1397e0b8e63eSJohn Marino case 'e':
1398e0b8e63eSJohn Marino case 'E':
1399e0b8e63eSJohn Marino ++rp;
1400e0b8e63eSJohn Marino conv = C_NOTSET;
1401e0b8e63eSJohn Marino continue;
1402e0b8e63eSJohn Marino case 'l':
1403e0b8e63eSJohn Marino ++rp;
1404e0b8e63eSJohn Marino conv = C_ONELOWER;
1405e0b8e63eSJohn Marino continue;
1406e0b8e63eSJohn Marino case 'L':
1407e0b8e63eSJohn Marino ++rp;
1408e0b8e63eSJohn Marino conv = C_LOWER;
1409e0b8e63eSJohn Marino continue;
1410e0b8e63eSJohn Marino case 'u':
1411e0b8e63eSJohn Marino ++rp;
1412e0b8e63eSJohn Marino conv = C_ONEUPPER;
1413e0b8e63eSJohn Marino continue;
1414e0b8e63eSJohn Marino case 'U':
1415e0b8e63eSJohn Marino ++rp;
1416e0b8e63eSJohn Marino conv = C_UPPER;
1417e0b8e63eSJohn Marino continue;
1418e0b8e63eSJohn Marino case '\r':
1419e0b8e63eSJohn Marino OUTCH(ch, 0);
1420e0b8e63eSJohn Marino continue;
1421e0b8e63eSJohn Marino default:
1422e0b8e63eSJohn Marino ++rp;
1423e0b8e63eSJohn Marino break;
1424e0b8e63eSJohn Marino }
1425e0b8e63eSJohn Marino }
1426e0b8e63eSJohn Marino OUTCH(ch, 1);
1427e0b8e63eSJohn Marino }
1428e0b8e63eSJohn Marino
1429e0b8e63eSJohn Marino *lbp = lb; /* Update caller's information. */
1430e0b8e63eSJohn Marino *lbclenp = lbclen;
1431e0b8e63eSJohn Marino *lblenp = lblen;
1432e0b8e63eSJohn Marino return (0);
1433e0b8e63eSJohn Marino }
1434