xref: /minix/external/bsd/nvi/dist/ex/ex_subst.c (revision 84d9c625)
1 /*	$NetBSD: ex_subst.c,v 1.3 2013/11/25 22:43:46 christos Exp $ */
2 /*-
3  * Copyright (c) 1992, 1993, 1994
4  *	The Regents of the University of California.  All rights reserved.
5  * Copyright (c) 1992, 1993, 1994, 1995, 1996
6  *	Keith Bostic.  All rights reserved.
7  *
8  * See the LICENSE file for redistribution information.
9  */
10 
11 #include "config.h"
12 
13 #ifndef lint
14 static const char sccsid[] = "Id: ex_subst.c,v 10.50 2002/02/09 21:18:23 skimo Exp  (Berkeley) Date: 2002/02/09 21:18:23 ";
15 #endif /* not lint */
16 
17 #include <sys/types.h>
18 #include <sys/queue.h>
19 #include <sys/time.h>
20 
21 #include <bitstring.h>
22 #include <ctype.h>
23 #include <errno.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29 
30 #include "../common/common.h"
31 #include "../vi/vi.h"
32 
33 #define	SUB_FIRST	0x01		/* The 'r' flag isn't reasonable. */
34 #define	SUB_MUSTSETR	0x02		/* The 'r' flag is required. */
35 
36 static int re_conv __P((SCR *, CHAR_T **, size_t *, int *));
37 static int re_cscope_conv __P((SCR *, CHAR_T **, size_t *, int *));
38 static int re_sub __P((SCR *,
39 		CHAR_T *, CHAR_T **, size_t *, size_t *, regmatch_t [10]));
40 static int re_tag_conv __P((SCR *, CHAR_T **, size_t *, int *));
41 static int s __P((SCR *, EXCMD *, CHAR_T *, regex_t *, u_int));
42 
43 /*
44  * ex_s --
45  *	[line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
46  *
47  *	Substitute on lines matching a pattern.
48  *
49  * PUBLIC: int ex_s __P((SCR *, EXCMD *));
50  */
51 int
52 ex_s(SCR *sp, EXCMD *cmdp)
53 {
54 	regex_t *re;
55 	size_t blen, len;
56 	u_int flags;
57 	ARG_CHAR_T delim;
58 	CHAR_T *bp, *p, *ptrn, *rep, *t;
59 
60 	/*
61 	 * Skip leading white space.
62 	 *
63 	 * !!!
64 	 * Historic vi allowed any non-alphanumeric to serve as the
65 	 * substitution command delimiter.
66 	 *
67 	 * !!!
68 	 * If the arguments are empty, it's the same as &, i.e. we
69 	 * repeat the last substitution.
70 	 */
71 	if (cmdp->argc == 0)
72 		goto subagain;
73 	for (p = cmdp->argv[0]->bp,
74 	    len = cmdp->argv[0]->len; len > 0; --len, ++p) {
75 		if (!ISBLANK((UCHAR_T)*p))
76 			break;
77 	}
78 	if (len == 0)
79 subagain:	return (ex_subagain(sp, cmdp));
80 
81 	delim = (UCHAR_T)*p++;
82 	if (ISALNUM(delim) || delim == '\\')
83 		return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
84 
85 	/*
86 	 * !!!
87 	 * The full-blown substitute command reset the remembered
88 	 * state of the 'c' and 'g' suffices.
89 	 */
90 	sp->c_suffix = sp->g_suffix = 0;
91 
92 	/*
93 	 * Get the pattern string, toss escaping characters.
94 	 *
95 	 * !!!
96 	 * Historic vi accepted any of the following forms:
97 	 *
98 	 *	:s/abc/def/		change "abc" to "def"
99 	 *	:s/abc/def		change "abc" to "def"
100 	 *	:s/abc/			delete "abc"
101 	 *	:s/abc			delete "abc"
102 	 *
103 	 * QUOTING NOTE:
104 	 *
105 	 * Only toss an escaping character if it escapes a delimiter.
106 	 * This means that "s/A/\\\\f" replaces "A" with "\\f".  It
107 	 * would be nice to be more regular, i.e. for each layer of
108 	 * escaping a single escaping character is removed, but that's
109 	 * not how the historic vi worked.
110 	 */
111 	for (ptrn = t = p;;) {
112 		if (p[0] == '\0' || p[0] == delim) {
113 			if (p[0] == delim)
114 				++p;
115 			/*
116 			 * !!!
117 			 * Nul terminate the pattern string -- it's passed
118 			 * to regcomp which doesn't understand anything else.
119 			 */
120 			*t = '\0';
121 			break;
122 		}
123 		if (p[0] == '\\') {
124 			if (p[1] == delim)
125 				++p;
126 			else if (p[1] == '\\')
127 				*t++ = *p++;
128 		}
129 		*t++ = *p++;
130 	}
131 
132 	/*
133 	 * If the pattern string is empty, use the last RE (not just the
134 	 * last substitution RE).
135 	 */
136 	if (*ptrn == '\0') {
137 		if (sp->re == NULL) {
138 			ex_emsg(sp, NULL, EXM_NOPREVRE);
139 			return (1);
140 		}
141 
142 		/* Re-compile the RE if necessary. */
143 		if (!F_ISSET(sp, SC_RE_SEARCH) &&
144 		    re_compile(sp, sp->re, sp->re_len,
145 		    NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
146 			return (1);
147 		flags = 0;
148 	} else {
149 		/*
150 		 * !!!
151 		 * Compile the RE.  Historic practice is that substitutes set
152 		 * the search direction as well as both substitute and search
153 		 * RE's.  We compile the RE twice, as we don't want to bother
154 		 * ref counting the pattern string and (opaque) structure.
155 		 */
156 		if (re_compile(sp, ptrn, t - ptrn, &sp->re,
157 		    &sp->re_len, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
158 			return (1);
159 		if (re_compile(sp, ptrn, t - ptrn, &sp->subre,
160 		    &sp->subre_len, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
161 			return (1);
162 
163 		flags = SUB_FIRST;
164 		sp->searchdir = FORWARD;
165 	}
166 	re = &sp->re_c;
167 
168 	/*
169 	 * Get the replacement string.
170 	 *
171 	 * The special character & (\& if O_MAGIC not set) matches the
172 	 * entire RE.  No handling of & is required here, it's done by
173 	 * re_sub().
174 	 *
175 	 * The special character ~ (\~ if O_MAGIC not set) inserts the
176 	 * previous replacement string into this replacement string.
177 	 * Count ~'s to figure out how much space we need.  We could
178 	 * special case nonexistent last patterns or whether or not
179 	 * O_MAGIC is set, but it's probably not worth the effort.
180 	 *
181 	 * QUOTING NOTE:
182 	 *
183 	 * Only toss an escaping character if it escapes a delimiter or
184 	 * if O_MAGIC is set and it escapes a tilde.
185 	 *
186 	 * !!!
187 	 * If the entire replacement pattern is "%", then use the last
188 	 * replacement pattern.  This semantic was added to vi in System
189 	 * V and then percolated elsewhere, presumably around the time
190 	 * that it was added to their version of ed(1).
191 	 */
192 	if (p[0] == L('\0') || p[0] == delim) {
193 		if (p[0] == delim)
194 			++p;
195 		if (sp->repl != NULL)
196 			free(sp->repl);
197 		sp->repl = NULL;
198 		sp->repl_len = 0;
199 	} else if (p[0] == L('%') && (p[1] == L('\0') || p[1] == delim))
200 		p += p[1] == delim ? 2 : 1;
201 	else {
202 		for (rep = p, len = 0;
203 		    p[0] != L('\0') && p[0] != delim; ++p, ++len)
204 			if (p[0] == L('~'))
205 				len += sp->repl_len;
206 		GET_SPACE_RETW(sp, bp, blen, len);
207 		for (t = bp, len = 0, p = rep;;) {
208 			if (p[0] == L('\0') || p[0] == delim) {
209 				if (p[0] == delim)
210 					++p;
211 				break;
212 			}
213 			if (p[0] == L('\\')) {
214 				if (p[1] == delim)
215 					++p;
216 				else if (p[1] == L('\\')) {
217 					*t++ = *p++;
218 					++len;
219 				} else if (p[1] == L('~')) {
220 					++p;
221 					if (!O_ISSET(sp, O_MAGIC))
222 						goto tilde;
223 				}
224 			} else if (p[0] == L('~') && O_ISSET(sp, O_MAGIC)) {
225 tilde:				++p;
226 				MEMCPYW(t, sp->repl, sp->repl_len);
227 				t += sp->repl_len;
228 				len += sp->repl_len;
229 				continue;
230 			}
231 			*t++ = *p++;
232 			++len;
233 		}
234 		if ((sp->repl_len = len) != 0) {
235 			if (sp->repl != NULL)
236 				free(sp->repl);
237 			if ((sp->repl = malloc(len * sizeof(CHAR_T))) == NULL) {
238 				msgq(sp, M_SYSERR, NULL);
239 				FREE_SPACEW(sp, bp, blen);
240 				return (1);
241 			}
242 			MEMCPYW(sp->repl, bp, len);
243 		}
244 		FREE_SPACEW(sp, bp, blen);
245 	}
246 	return (s(sp, cmdp, p, re, flags));
247 }
248 
249 /*
250  * ex_subagain --
251  *	[line [,line]] & [cgr] [count] [#lp]]
252  *
253  *	Substitute using the last substitute RE and replacement pattern.
254  *
255  * PUBLIC: int ex_subagain __P((SCR *, EXCMD *));
256  */
257 int
258 ex_subagain(SCR *sp, EXCMD *cmdp)
259 {
260 	if (sp->subre == NULL) {
261 		ex_emsg(sp, NULL, EXM_NOPREVRE);
262 		return (1);
263 	}
264 	if (!F_ISSET(sp, SC_RE_SUBST) &&
265 	    re_compile(sp, sp->subre, sp->subre_len,
266 	    NULL, NULL, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
267 		return (1);
268 	return (s(sp,
269 	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
270 }
271 
272 /*
273  * ex_subtilde --
274  *	[line [,line]] ~ [cgr] [count] [#lp]]
275  *
276  *	Substitute using the last RE and last substitute replacement pattern.
277  *
278  * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *));
279  */
280 int
281 ex_subtilde(SCR *sp, EXCMD *cmdp)
282 {
283 	if (sp->re == NULL) {
284 		ex_emsg(sp, NULL, EXM_NOPREVRE);
285 		return (1);
286 	}
287 	if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, sp->re,
288 	    sp->re_len, NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
289 		return (1);
290 	return (s(sp,
291 	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
292 }
293 
294 /*
295  * s --
296  * Do the substitution.  This stuff is *really* tricky.  There are lots of
297  * special cases, and general nastiness.  Don't mess with it unless you're
298  * pretty confident.
299  *
300  * The nasty part of the substitution is what happens when the replacement
301  * string contains newlines.  It's a bit tricky -- consider the information
302  * that has to be retained for "s/f\(o\)o/^M\1^M\1/".  The solution here is
303  * to build a set of newline offsets which we use to break the line up later,
304  * when the replacement is done.  Don't change it unless you're *damned*
305  * confident.
306  */
307 #define	NEEDNEWLINE(sp) {						\
308 	if (sp->newl_len == sp->newl_cnt) {				\
309 		sp->newl_len += 25;					\
310 		REALLOC(sp, sp->newl, size_t *,				\
311 		    sp->newl_len * sizeof(size_t));			\
312 		if (sp->newl == NULL) {					\
313 			sp->newl_len = 0;				\
314 			return (1);					\
315 		}							\
316 	}								\
317 }
318 
319 #define	BUILD(sp, l, len) {						\
320 	if (lbclen + (len) > lblen) {					\
321 		lblen += MAX(lbclen + (len), 256);			\
322 		REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T));	\
323 		if (lb == NULL) {					\
324 			lbclen = 0;					\
325 			return (1);					\
326 		}							\
327 	}								\
328 	MEMCPYW(lb + lbclen, l, len);					\
329 	lbclen += len;							\
330 }
331 
332 #define	NEEDSP(sp, len, pnt) {						\
333 	if (lbclen + (len) > lblen) {					\
334 		lblen += MAX(lbclen + (len), 256);			\
335 		REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T));	\
336 		if (lb == NULL) {					\
337 			lbclen = 0;					\
338 			return (1);					\
339 		}							\
340 		pnt = lb + lbclen;					\
341 	}								\
342 }
343 
344 static int
345 s(SCR *sp, EXCMD *cmdp, CHAR_T *st, regex_t *re, u_int flags)
346 {
347 	EVENT ev;
348 	MARK from, to;
349 	TEXTH tiq;
350 	db_recno_t elno, lno, slno;
351 	u_long ul;
352 	regmatch_t match[10];
353 	size_t blen, cnt, last, lbclen, lblen, len, llen;
354 	size_t offset, saved_offset, scno;
355 	int lflag, nflag, pflag, rflag;
356 	int didsub, do_eol_match, eflags, empty_ok, eval;
357 	int linechanged, matched, quit, rval;
358 	CHAR_T *lb, *bp;
359 	enum nresult nret;
360 
361 	NEEDFILE(sp, cmdp);
362 
363 	slno = sp->lno;
364 	scno = sp->cno;
365 
366 	/*
367 	 * !!!
368 	 * Historically, the 'g' and 'c' suffices were always toggled as flags,
369 	 * so ":s/A/B/" was the same as ":s/A/B/ccgg".  If O_EDCOMPATIBLE was
370 	 * not set, they were initialized to 0 for all substitute commands.  If
371 	 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
372 	 * specified substitute/replacement patterns (see ex_s()).
373 	 */
374 	if (!O_ISSET(sp, O_EDCOMPATIBLE))
375 		sp->c_suffix = sp->g_suffix = 0;
376 
377 	/*
378 	 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
379 	 * it only displayed the last change.  I'd disallow them, but they are
380 	 * useful in combination with the [v]global commands.  In the current
381 	 * model the problem is combining them with the 'c' flag -- the screen
382 	 * would have to flip back and forth between the confirm screen and the
383 	 * ex print screen, which would be pretty awful.  We do display all
384 	 * changes, though, for what that's worth.
385 	 *
386 	 * !!!
387 	 * Historic vi was fairly strict about the order of "options", the
388 	 * count, and "flags".  I'm somewhat fuzzy on the difference between
389 	 * options and flags, anyway, so this is a simpler approach, and we
390 	 * just take it them in whatever order the user gives them.  (The ex
391 	 * usage statement doesn't reflect this.)
392 	 */
393 	lflag = nflag = pflag = rflag = 0;
394 	if (st == NULL)
395 		goto noargs;
396 	for (lno = OOBLNO; *st != '\0'; ++st)
397 		switch (*st) {
398 		case ' ':
399 		case '\t':
400 			continue;
401 		case '+':
402 			++cmdp->flagoff;
403 			break;
404 		case '-':
405 			--cmdp->flagoff;
406 			break;
407 		case '0': case '1': case '2': case '3': case '4':
408 		case '5': case '6': case '7': case '8': case '9':
409 			if (lno != OOBLNO)
410 				goto usage;
411 			errno = 0;
412 			nret = nget_uslong(sp, &ul, st, &st, 10);
413 			lno = ul;
414 			if (*st == '\0')		/* Loop increment correction. */
415 				--st;
416 			if (nret != NUM_OK) {
417 				if (nret == NUM_OVER)
418 					msgq(sp, M_ERR, "153|Count overflow");
419 				else if (nret == NUM_UNDER)
420 					msgq(sp, M_ERR, "154|Count underflow");
421 				else
422 					msgq(sp, M_SYSERR, NULL);
423 				return (1);
424 			}
425 			/*
426 			 * In historic vi, the count was inclusive from the
427 			 * second address.
428 			 */
429 			cmdp->addr1.lno = cmdp->addr2.lno;
430 			cmdp->addr2.lno += lno - 1;
431 			if (!db_exist(sp, cmdp->addr2.lno) &&
432 			    db_last(sp, &cmdp->addr2.lno))
433 				return (1);
434 			break;
435 		case '#':
436 			nflag = 1;
437 			break;
438 		case 'c':
439 			sp->c_suffix = !sp->c_suffix;
440 
441 			/* Ex text structure initialization. */
442 			if (F_ISSET(sp, SC_EX)) {
443 				memset(&tiq, 0, sizeof(TEXTH));
444 				TAILQ_INIT(&tiq);
445 			}
446 			break;
447 		case 'g':
448 			sp->g_suffix = !sp->g_suffix;
449 			break;
450 		case 'l':
451 			lflag = 1;
452 			break;
453 		case 'p':
454 			pflag = 1;
455 			break;
456 		case 'r':
457 			if (LF_ISSET(SUB_FIRST)) {
458 				msgq(sp, M_ERR,
459 		    "155|Regular expression specified; r flag meaningless");
460 				return (1);
461 			}
462 			if (!F_ISSET(sp, SC_RE_SEARCH)) {
463 				ex_emsg(sp, NULL, EXM_NOPREVRE);
464 				return (1);
465 			}
466 			rflag = 1;
467 			re = &sp->re_c;
468 			break;
469 		default:
470 			goto usage;
471 		}
472 
473 	if (*st != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) {
474 usage:		ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
475 		return (1);
476 	}
477 
478 noargs:	if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
479 		msgq(sp, M_ERR,
480 "156|The #, l and p flags may not be combined with the c flag in vi mode");
481 		return (1);
482 	}
483 
484 	/*
485 	 * bp:		if interactive, line cache
486 	 * blen:	if interactive, line cache length
487 	 * lb:		build buffer pointer.
488 	 * lbclen:	current length of built buffer.
489 	 * lblen;	length of build buffer.
490 	 */
491 	bp = lb = NULL;
492 	blen = lbclen = lblen = 0;
493 
494 	/* For each line... */
495 	lno = cmdp->addr1.lno == 0 ? 1 : cmdp->addr1.lno;
496 	for (matched = quit = 0,
497 	    elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
498 
499 		/* Someone's unhappy, time to stop. */
500 		if (INTERRUPTED(sp))
501 			break;
502 
503 		/* Get the line. */
504 		if (db_get(sp, lno, DBG_FATAL, &st, &llen))
505 			goto err;
506 
507 		/*
508 		 * Make a local copy if doing confirmation -- when calling
509 		 * the confirm routine we're likely to lose the cached copy.
510 		 */
511 		if (sp->c_suffix) {
512 			if (bp == NULL) {
513 				GET_SPACE_RETW(sp, bp, blen, llen);
514 			} else
515 				ADD_SPACE_RETW(sp, bp, blen, llen);
516 			MEMCPYW(bp, st, llen);
517 			st = bp;
518 		}
519 
520 		/* Start searching from the beginning. */
521 		offset = 0;
522 		len = llen;
523 
524 		/* Reset the build buffer offset. */
525 		lbclen = 0;
526 
527 		/* Reset empty match flag. */
528 		empty_ok = 1;
529 
530 		/*
531 		 * We don't want to have to do a setline if the line didn't
532 		 * change -- keep track of whether or not this line changed.
533 		 * If doing confirmations, don't want to keep setting the
534 		 * line if change is refused -- keep track of substitutions.
535 		 */
536 		didsub = linechanged = 0;
537 
538 		/* New line, do an EOL match. */
539 		do_eol_match = 1;
540 
541 		/* It's not nul terminated, but we pretend it is. */
542 		eflags = REG_STARTEND;
543 
544 		/*
545 		 * The search area is from st + offset to the EOL.
546 		 *
547 		 * Generally, match[0].rm_so is the offset of the start
548 		 * of the match from the start of the search, and offset
549 		 * is the offset of the start of the last search.
550 		 */
551 nextmatch:	match[0].rm_so = 0;
552 		match[0].rm_eo = len;
553 
554 		/* Get the next match. */
555 		eval = regexec(re, st + offset, 10, match, eflags);
556 
557 		/*
558 		 * There wasn't a match or if there was an error, deal with
559 		 * it.  If there was a previous match in this line, resolve
560 		 * the changes into the database.  Otherwise, just move on.
561 		 */
562 		if (eval == REG_NOMATCH)
563 			goto endmatch;
564 		if (eval != 0) {
565 			re_error(sp, eval, re);
566 			goto err;
567 		}
568 		matched = 1;
569 
570 		/* Only the first search can match an anchored expression. */
571 		eflags |= REG_NOTBOL;
572 
573 		/*
574 		 * !!!
575 		 * It's possible to match 0-length strings -- for example, the
576 		 * command s;a*;X;, when matched against the string "aabb" will
577 		 * result in "XbXbX", i.e. the matches are "aa", the space
578 		 * between the b's and the space between the b's and the end of
579 		 * the string.  There is a similar space between the beginning
580 		 * of the string and the a's.  The rule that we use (because vi
581 		 * historically used it) is that any 0-length match, occurring
582 		 * immediately after a match, is ignored.  Otherwise, the above
583 		 * example would have resulted in "XXbXbX".  Another example is
584 		 * incorrectly using " *" to replace groups of spaces with one
585 		 * space.
586 		 *
587 		 * The way we do this is that if we just had a successful match,
588 		 * the starting offset does not skip characters, and the match
589 		 * is empty, ignore the match and move forward.  If there's no
590 		 * more characters in the string, we were attempting to match
591 		 * after the last character, so quit.
592 		 */
593 		if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
594 			empty_ok = 1;
595 			if (len == 0)
596 				goto endmatch;
597 			BUILD(sp, st + offset, 1)
598 			++offset;
599 			--len;
600 			goto nextmatch;
601 		}
602 
603 		/* Confirm change. */
604 		if (sp->c_suffix) {
605 			/*
606 			 * Set the cursor position for confirmation.  Note,
607 			 * if we matched on a '$', the cursor may be past
608 			 * the end of line.
609 			 */
610 			from.lno = to.lno = lno;
611 			from.cno = match[0].rm_so + offset;
612 			to.cno = match[0].rm_eo + offset;
613 			/*
614 			 * Both ex and vi have to correct for a change before
615 			 * the first character in the line.
616 			 */
617 			if (llen == 0)
618 				from.cno = to.cno = 0;
619 			if (F_ISSET(sp, SC_VI)) {
620 				/*
621 				 * Only vi has to correct for a change after
622 				 * the last character in the line.
623 				 *
624 				 * XXX
625 				 * It would be nice to change the vi code so
626 				 * that we could display a cursor past EOL.
627 				 */
628 				if (to.cno >= llen)
629 					to.cno = llen - 1;
630 				if (from.cno >= llen)
631 					from.cno = llen - 1;
632 
633 				sp->lno = from.lno;
634 				sp->cno = from.cno;
635 				if (vs_refresh(sp, 1))
636 					goto err;
637 
638 				vs_update(sp, msg_cat(sp,
639 				    "169|Confirm change? [n]", NULL), NULL);
640 
641 				if (v_event_get(sp, &ev, 0, 0))
642 					goto err;
643 				switch (ev.e_event) {
644 				case E_CHARACTER:
645 					break;
646 				case E_EOF:
647 				case E_ERR:
648 				case E_INTERRUPT:
649 					goto lquit;
650 				default:
651 					v_event_err(sp, &ev);
652 					goto lquit;
653 				}
654 			} else {
655 				if (ex_print(sp, cmdp, &from, &to, 0) ||
656 				    ex_scprint(sp, &from, &to))
657 					goto lquit;
658 				if (ex_txt(sp, &tiq, 0, TXT_CR))
659 					goto err;
660 				ev.e_c = TAILQ_FIRST(&tiq)->lb[0];
661 			}
662 
663 			switch (ev.e_c) {
664 			case CH_YES:
665 				break;
666 			default:
667 			case CH_NO:
668 				didsub = 0;
669 				BUILD(sp, st + offset, match[0].rm_eo);
670 				goto skip;
671 			case CH_QUIT:
672 				/* Set the quit/interrupted flags. */
673 lquit:				quit = 1;
674 				F_SET(sp->gp, G_INTERRUPTED);
675 
676 				/*
677 				 * Resolve any changes, then return to (and
678 				 * exit from) the main loop.
679 				 */
680 				goto endmatch;
681 			}
682 		}
683 
684 		/*
685 		 * Set the cursor to the last position changed, converting
686 		 * from 1-based to 0-based.
687 		 */
688 		sp->lno = lno;
689 		sp->cno = match[0].rm_so;
690 
691 		/* Copy the bytes before the match into the build buffer. */
692 		BUILD(sp, st + offset, match[0].rm_so);
693 
694 		/* Substitute the matching bytes. */
695 		didsub = 1;
696 		if (re_sub(sp, st + offset, &lb, &lbclen, &lblen, match))
697 			goto err;
698 
699 		/* Set the change flag so we know this line was modified. */
700 		linechanged = 1;
701 
702 		/* Move past the matched bytes. */
703 skip:		offset += match[0].rm_eo;
704 		len -= match[0].rm_eo;
705 
706 		/* A match cannot be followed by an empty pattern. */
707 		empty_ok = 0;
708 
709 		/*
710 		 * If doing a global change with confirmation, we have to
711 		 * update the screen.  The basic idea is to store the line
712 		 * so the screen update routines can find it, and restart.
713 		 */
714 		if (didsub && sp->c_suffix && sp->g_suffix) {
715 			/*
716 			 * The new search offset will be the end of the
717 			 * modified line.
718 			 */
719 			saved_offset = lbclen;
720 
721 			/* Copy the rest of the line. */
722 			if (len)
723 				BUILD(sp, st + offset, len)
724 
725 			/* Set the new offset. */
726 			offset = saved_offset;
727 
728 			/* Store inserted lines, adjusting the build buffer. */
729 			last = 0;
730 			if (sp->newl_cnt) {
731 				for (cnt = 0;
732 				    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
733 					if (db_insert(sp, lno,
734 					    lb + last, sp->newl[cnt] - last))
735 						goto err;
736 					last = sp->newl[cnt] + 1;
737 					++sp->rptlines[L_ADDED];
738 				}
739 				lbclen -= last;
740 				offset -= last;
741 				sp->newl_cnt = 0;
742 			}
743 
744 			/* Store and retrieve the line. */
745 			if (db_set(sp, lno, lb + last, lbclen))
746 				goto err;
747 			if (db_get(sp, lno, DBG_FATAL, &st, &llen))
748 				goto err;
749 			ADD_SPACE_RETW(sp, bp, blen, llen)
750 			MEMCPYW(bp, st, llen);
751 			st = bp;
752 			len = llen - offset;
753 
754 			/* Restart the build. */
755 			lbclen = 0;
756 			BUILD(sp, st, offset);
757 
758 			/*
759 			 * If we haven't already done the after-the-string
760 			 * match, do one.  Set REG_NOTEOL so the '$' pattern
761 			 * only matches once.
762 			 */
763 			if (!do_eol_match)
764 				goto endmatch;
765 			if (offset == len) {
766 				do_eol_match = 0;
767 				eflags |= REG_NOTEOL;
768 			}
769 			goto nextmatch;
770 		}
771 
772 		/*
773 		 * If it's a global:
774 		 *
775 		 * If at the end of the string, do a test for the after
776 		 * the string match.  Set REG_NOTEOL so the '$' pattern
777 		 * only matches once.
778 		 */
779 		if (sp->g_suffix && do_eol_match) {
780 			if (len == 0) {
781 				do_eol_match = 0;
782 				eflags |= REG_NOTEOL;
783 			}
784 			goto nextmatch;
785 		}
786 
787 endmatch:	if (!linechanged)
788 			continue;
789 
790 		/* Copy any remaining bytes into the build buffer. */
791 		if (len)
792 			BUILD(sp, st + offset, len)
793 
794 		/* Store inserted lines, adjusting the build buffer. */
795 		last = 0;
796 		if (sp->newl_cnt) {
797 			for (cnt = 0;
798 			    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
799 				if (db_insert(sp,
800 				    lno, lb + last, sp->newl[cnt] - last))
801 					goto err;
802 				last = sp->newl[cnt] + 1;
803 				++sp->rptlines[L_ADDED];
804 			}
805 			lbclen -= last;
806 			sp->newl_cnt = 0;
807 		}
808 
809 		/* Store the changed line. */
810 		if (db_set(sp, lno, lb + last, lbclen))
811 			goto err;
812 
813 		/* Update changed line counter. */
814 		if (sp->rptlchange != lno) {
815 			sp->rptlchange = lno;
816 			++sp->rptlines[L_CHANGED];
817 		}
818 
819 		/*
820 		 * !!!
821 		 * Display as necessary.  Historic practice is to only
822 		 * display the last line of a line split into multiple
823 		 * lines.
824 		 */
825 		if (lflag || nflag || pflag) {
826 			from.lno = to.lno = lno;
827 			from.cno = to.cno = 0;
828 			if (lflag)
829 				(void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
830 			if (nflag)
831 				(void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
832 			if (pflag)
833 				(void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
834 		}
835 	}
836 
837 	/*
838 	 * !!!
839 	 * Historically, vi attempted to leave the cursor at the same place if
840 	 * the substitution was done at the current cursor position.  Otherwise
841 	 * it moved it to the first non-blank of the last line changed.  There
842 	 * were some problems: for example, :s/$/foo/ with the cursor on the
843 	 * last character of the line left the cursor on the last character, or
844 	 * the & command with multiple occurrences of the matching string in the
845 	 * line usually left the cursor in a fairly random position.
846 	 *
847 	 * We try to do the same thing, with the exception that if the user is
848 	 * doing substitution with confirmation, we move to the last line about
849 	 * which the user was consulted, as opposed to the last line that they
850 	 * actually changed.  This prevents a screen flash if the user doesn't
851 	 * change many of the possible lines.
852 	 */
853 	if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
854 		sp->cno = 0;
855 		(void)nonblank(sp, sp->lno, &sp->cno);
856 	}
857 
858 	/*
859 	 * If not in a global command, and nothing matched, say so.
860 	 * Else, if none of the lines displayed, put something up.
861 	 */
862 	rval = 0;
863 	if (!matched) {
864 		if (!F_ISSET(sp, SC_EX_GLOBAL)) {
865 			msgq(sp, M_ERR, "157|No match found");
866 			goto err;
867 		}
868 	} else if (!lflag && !nflag && !pflag)
869 		F_SET(cmdp, E_AUTOPRINT);
870 
871 	if (0) {
872 err:		rval = 1;
873 	}
874 
875 	if (bp != NULL)
876 		FREE_SPACEW(sp, bp, blen);
877 	if (lb != NULL)
878 		free(lb);
879 	return (rval);
880 }
881 
882 /*
883  * re_compile --
884  *	Compile the RE.
885  *
886  * PUBLIC: int re_compile __P((SCR *,
887  * PUBLIC:     CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int));
888  */
889 int
890 re_compile(SCR *sp, CHAR_T *ptrn, size_t plen, CHAR_T **ptrnp, size_t *lenp, regex_t *rep, u_int flags)
891 {
892 	size_t len;
893 	int reflags, replaced, rval;
894 	CHAR_T *p;
895 
896 	/* Set RE flags. */
897 	reflags = 0;
898 	if (LF_ISSET(SEARCH_EXTEND))
899 		reflags |= REG_EXTENDED;
900 	if (LF_ISSET(SEARCH_IC))
901 		reflags |= REG_ICASE;
902 	if (LF_ISSET(SEARCH_LITERAL))
903 		reflags |= REG_NOSPEC;
904 	if (!LF_ISSET(SEARCH_NOOPT | SEARCH_CSCOPE | SEARCH_TAG)) {
905 		if (O_ISSET(sp, O_EXTENDED))
906 			reflags |= REG_EXTENDED;
907 		if (O_ISSET(sp, O_IGNORECASE))
908 			reflags |= REG_ICASE;
909 		if (O_ISSET(sp, O_ICLOWER))
910 			goto iclower;
911 	}
912 	if (LF_ISSET(SEARCH_ICL)) {
913 iclower:	for (p = ptrn, len = plen; len > 0; ++p, --len)
914 			if (ISUPPER((UCHAR_T)*p))
915 				break;
916 		if (len == 0)
917 			reflags |= REG_ICASE;
918 	}
919 
920 	/* If we're replacing a saved value, clear the old one. */
921 	if (LF_ISSET(SEARCH_CSEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
922 		regfree(&sp->re_c);
923 		F_CLR(sp, SC_RE_SEARCH);
924 	}
925 	if (LF_ISSET(SEARCH_CSUBST) && F_ISSET(sp, SC_RE_SUBST)) {
926 		regfree(&sp->subre_c);
927 		F_CLR(sp, SC_RE_SUBST);
928 	}
929 
930 	/*
931 	 * If we're saving the string, it's a pattern we haven't seen before,
932 	 * so convert the vi-style RE's to POSIX 1003.2 RE's.  Save a copy for
933 	 * later recompilation.   Free any previously saved value.
934 	 */
935 	if (ptrnp != NULL) {
936 		replaced = 0;
937 		if (LF_ISSET(SEARCH_CSCOPE)) {
938 			if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
939 				return (1);
940 			/*
941 			 * XXX
942 			 * Currently, the match-any-<blank> expression used in
943 			 * re_cscope_conv() requires extended RE's.  This may
944 			 * not be right or safe.
945 			 */
946 			reflags |= REG_EXTENDED;
947 		} else if (LF_ISSET(SEARCH_TAG)) {
948 			if (re_tag_conv(sp, &ptrn, &plen, &replaced))
949 				return (1);
950 		} else if (!LF_ISSET(SEARCH_LITERAL))
951 			if (re_conv(sp, &ptrn, &plen, &replaced))
952 				return (1);
953 
954 		/* Discard previous pattern. */
955 		if (*ptrnp != NULL) {
956 			free(*ptrnp);
957 			*ptrnp = NULL;
958 		}
959 		if (lenp != NULL)
960 			*lenp = plen;
961 
962 		/*
963 		 * Copy the string into allocated memory.
964 		 *
965 		 * XXX
966 		 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
967 		 * for now.  There's just no other solution.
968 		 */
969 		MALLOC(sp, *ptrnp, CHAR_T *, (plen + 1) * sizeof(CHAR_T));
970 		if (*ptrnp != NULL) {
971 			MEMCPYW(*ptrnp, ptrn, plen);
972 			(*ptrnp)[plen] = '\0';
973 		}
974 
975 		/* Free up conversion-routine-allocated memory. */
976 		if (replaced)
977 			FREE_SPACEW(sp, ptrn, 0);
978 
979 		if (*ptrnp == NULL)
980 			return (1);
981 
982 		ptrn = *ptrnp;
983 	}
984 
985 	/*
986 	 * XXX
987 	 * Regcomp isn't 8-bit clean, so we just lost if the pattern
988 	 * contained a nul.  Bummer!
989 	 */
990 	if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
991 		if (LF_ISSET(SEARCH_MSG))
992 			re_error(sp, rval, rep);
993 		return (1);
994 	}
995 
996 	if (LF_ISSET(SEARCH_CSEARCH))
997 		F_SET(sp, SC_RE_SEARCH);
998 	if (LF_ISSET(SEARCH_CSUBST))
999 		F_SET(sp, SC_RE_SUBST);
1000 
1001 	return (0);
1002 }
1003 
1004 /*
1005  * re_conv --
1006  *	Convert vi's regular expressions into something that the
1007  *	the POSIX 1003.2 RE functions can handle.
1008  *
1009  * There are three conversions we make to make vi's RE's (specifically
1010  * the global, search, and substitute patterns) work with POSIX RE's.
1011  *
1012  * 1: If O_MAGIC is not set, strip backslashes from the magic character
1013  *    set (.[*~) that have them, and add them to the ones that don't.
1014  * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
1015  *    from the last substitute command's replacement string.  If O_MAGIC
1016  *    is set, it's the string "~".
1017  * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
1018  *    new RE escapes.
1019  *
1020  * !!!/XXX
1021  * This doesn't exactly match the historic behavior of vi because we do
1022  * the ~ substitution before calling the RE engine, so magic characters
1023  * in the replacement string will be expanded by the RE engine, and they
1024  * weren't historically.  It's a bug.
1025  */
1026 static int
1027 re_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1028 {
1029 	size_t blen, len, needlen;
1030 	int magic;
1031 	CHAR_T *bp, *p, *t;
1032 
1033 	/*
1034 	 * First pass through, we figure out how much space we'll need.
1035 	 * We do it in two passes, on the grounds that most of the time
1036 	 * the user is doing a search and won't have magic characters.
1037 	 * That way we can skip most of the memory allocation and copies.
1038 	 */
1039 	magic = 0;
1040 	for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1041 		switch (*p) {
1042 		case '\\':
1043 			if (len > 1) {
1044 				--len;
1045 				switch (*++p) {
1046 				case '<':
1047 					magic = 1;
1048 					needlen += RE_WSTART_LEN + 1;
1049 					break;
1050 				case '>':
1051 					magic = 1;
1052 					needlen += RE_WSTOP_LEN + 1;
1053 					break;
1054 				case '~':
1055 					if (!O_ISSET(sp, O_MAGIC)) {
1056 						magic = 1;
1057 						needlen += sp->repl_len;
1058 					}
1059 					break;
1060 				case '.':
1061 				case '[':
1062 				case '*':
1063 					if (!O_ISSET(sp, O_MAGIC)) {
1064 						magic = 1;
1065 						needlen += 1;
1066 					}
1067 					break;
1068 				default:
1069 					needlen += 2;
1070 				}
1071 			} else
1072 				needlen += 1;
1073 			break;
1074 		case '~':
1075 			if (O_ISSET(sp, O_MAGIC)) {
1076 				magic = 1;
1077 				needlen += sp->repl_len;
1078 			}
1079 			break;
1080 		case '.':
1081 		case '[':
1082 		case '*':
1083 			if (!O_ISSET(sp, O_MAGIC)) {
1084 				magic = 1;
1085 				needlen += 2;
1086 			}
1087 			break;
1088 		default:
1089 			needlen += 1;
1090 			break;
1091 		}
1092 
1093 	if (!magic) {
1094 		*replacedp = 0;
1095 		return (0);
1096 	}
1097 
1098 	/* Get enough memory to hold the final pattern. */
1099 	*replacedp = 1;
1100 	GET_SPACE_RETW(sp, bp, blen, needlen);
1101 
1102 	for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
1103 		switch (*p) {
1104 		case '\\':
1105 			if (len > 1) {
1106 				--len;
1107 				switch (*++p) {
1108 				case '<':
1109 					MEMCPY(t,
1110 					    RE_WSTART, RE_WSTART_LEN);
1111 					t += RE_WSTART_LEN;
1112 					break;
1113 				case '>':
1114 					MEMCPY(t,
1115 					    RE_WSTOP, RE_WSTOP_LEN);
1116 					t += RE_WSTOP_LEN;
1117 					break;
1118 				case '~':
1119 					if (O_ISSET(sp, O_MAGIC))
1120 						*t++ = '~';
1121 					else {
1122 						MEMCPYW(t,
1123 						    sp->repl, sp->repl_len);
1124 						t += sp->repl_len;
1125 					}
1126 					break;
1127 				case '.':
1128 				case '[':
1129 				case '*':
1130 					if (O_ISSET(sp, O_MAGIC))
1131 						*t++ = '\\';
1132 					*t++ = *p;
1133 					break;
1134 				default:
1135 					*t++ = '\\';
1136 					*t++ = *p;
1137 				}
1138 			} else
1139 				*t++ = '\\';
1140 			break;
1141 		case '~':
1142 			if (O_ISSET(sp, O_MAGIC)) {
1143 				MEMCPYW(t, sp->repl, sp->repl_len);
1144 				t += sp->repl_len;
1145 			} else
1146 				*t++ = '~';
1147 			break;
1148 		case '.':
1149 		case '[':
1150 		case '*':
1151 			if (!O_ISSET(sp, O_MAGIC))
1152 				*t++ = '\\';
1153 			*t++ = *p;
1154 			break;
1155 		default:
1156 			*t++ = *p;
1157 			break;
1158 		}
1159 
1160 	*ptrnp = bp;
1161 	*plenp = t - bp;
1162 	return (0);
1163 }
1164 
1165 /*
1166  * re_tag_conv --
1167  *	Convert a tags search path into something that the POSIX
1168  *	1003.2 RE functions can handle.
1169  */
1170 static int
1171 re_tag_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1172 {
1173 	size_t blen, len;
1174 	int lastdollar;
1175 	CHAR_T *bp, *p, *t;
1176 
1177 	len = *plenp;
1178 
1179 	/* Max memory usage is 2 times the length of the string. */
1180 	*replacedp = 1;
1181 	GET_SPACE_RETW(sp, bp, blen, len * 2);
1182 
1183 	p = *ptrnp;
1184 	t = bp;
1185 
1186 	/* If the last character is a '/' or '?', we just strip it. */
1187 	if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1188 		--len;
1189 
1190 	/* If the next-to-last or last character is a '$', it's magic. */
1191 	if (len > 0 && p[len - 1] == '$') {
1192 		--len;
1193 		lastdollar = 1;
1194 	} else
1195 		lastdollar = 0;
1196 
1197 	/* If the first character is a '/' or '?', we just strip it. */
1198 	if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1199 		++p;
1200 		--len;
1201 	}
1202 
1203 	/* If the first or second character is a '^', it's magic. */
1204 	if (p[0] == '^') {
1205 		*t++ = *p++;
1206 		--len;
1207 	}
1208 
1209 	/*
1210 	 * Escape every other magic character we can find, meanwhile stripping
1211 	 * the backslashes ctags inserts when escaping the search delimiter
1212 	 * characters.
1213 	 */
1214 	for (; len > 0; --len) {
1215 		if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1216 			++p;
1217 			--len;
1218 		} else if (strchr("^.[]$*", p[0]))
1219 			*t++ = '\\';
1220 		*t++ = *p++;
1221 	}
1222 	if (lastdollar)
1223 		*t++ = '$';
1224 
1225 	*ptrnp = bp;
1226 	*plenp = t - bp;
1227 	return (0);
1228 }
1229 
1230 /*
1231  * re_cscope_conv --
1232  *	 Convert a cscope search path into something that the POSIX
1233  *      1003.2 RE functions can handle.
1234  */
1235 static int
1236 re_cscope_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1237 {
1238 	size_t blen, len, nspaces;
1239 	CHAR_T *bp, *t;
1240 	CHAR_T *p;
1241 	const CHAR_T *wp;
1242 	size_t wlen;
1243 
1244 	/*
1245 	 * Each space in the source line printed by cscope represents an
1246 	 * arbitrary sequence of spaces, tabs, and comments.
1247 	 */
1248 #define	CSCOPE_RE_SPACE		"([ \t]|/\\*([^*]|\\*/)*\\*/)*"
1249 #define CSCOPE_LEN	sizeof(CSCOPE_RE_SPACE) - 1
1250 	CHAR2INT(sp, CSCOPE_RE_SPACE, CSCOPE_LEN, wp, wlen);
1251 	for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
1252 		if (*p == ' ')
1253 			++nspaces;
1254 
1255 	/*
1256 	 * Allocate plenty of space:
1257 	 *	the string, plus potential escaping characters;
1258 	 *	nspaces + 2 copies of CSCOPE_RE_SPACE;
1259 	 *	^, $, nul terminator characters.
1260 	 */
1261 	*replacedp = 1;
1262 	len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
1263 	GET_SPACE_RETW(sp, bp, blen, len);
1264 
1265 	p = *ptrnp;
1266 	t = bp;
1267 
1268 	*t++ = '^';
1269 	MEMCPYW(t, wp, wlen);
1270 	t += wlen;
1271 
1272 	for (len = *plenp; len > 0; ++p, --len)
1273 		if (*p == ' ') {
1274 			MEMCPYW(t, wp, wlen);
1275 			t += wlen;
1276 		} else {
1277 			if (strchr("\\^.[]$*+?()|{}", *p))
1278 				*t++ = '\\';
1279 			*t++ = *p;
1280 		}
1281 
1282 	MEMCPYW(t, wp, wlen);
1283 	t += wlen;
1284 	*t++ = '$';
1285 
1286 	*ptrnp = bp;
1287 	*plenp = t - bp;
1288 	return (0);
1289 }
1290 
1291 /*
1292  * re_error --
1293  *	Report a regular expression error.
1294  *
1295  * PUBLIC: void re_error __P((SCR *, int, regex_t *));
1296  */
1297 void
1298 re_error(SCR *sp, int errcode, regex_t *preg)
1299 {
1300 	size_t sz;
1301 	char *oe;
1302 
1303 	sz = regerror(errcode, preg, NULL, 0);
1304 	if ((oe = malloc(sz)) == NULL)
1305 		msgq(sp, M_SYSERR, NULL);
1306 	else {
1307 		(void)regerror(errcode, preg, oe, sz);
1308 		msgq(sp, M_ERR, "RE error: %s", oe);
1309 		free(oe);
1310 	}
1311 }
1312 
1313 /*
1314  * re_sub --
1315  * 	Do the substitution for a regular expression.
1316  */
1317 static int
1318 re_sub(SCR *sp, CHAR_T *ip, CHAR_T **lbp, size_t *lbclenp, size_t *lblenp, regmatch_t *match)
1319 
1320 	           			/* Input line. */
1321 
1322 
1323 
1324 {
1325 	enum { C_NOT_SET, C_LOWER, C_ONE_LOWER, C_ONE_UPPER, C_UPPER } conv;
1326 	size_t lbclen, lblen;		/* Local copies. */
1327 	size_t mlen;			/* Match length. */
1328 	size_t rpl;			/* Remaining replacement length. */
1329 	CHAR_T *rp;			/* Replacement pointer. */
1330 	int ch;
1331 	int no;				/* Match replacement offset. */
1332 	CHAR_T *p, *t;			/* Buffer pointers. */
1333 	CHAR_T *lb;			/* Local copies. */
1334 
1335 	lb = *lbp;			/* Get local copies. */
1336 	lbclen = *lbclenp;
1337 	lblen = *lblenp;
1338 
1339 	/*
1340 	 * QUOTING NOTE:
1341 	 *
1342 	 * There are some special sequences that vi provides in the
1343 	 * replacement patterns.
1344 	 *	 & string the RE matched (\& if nomagic set)
1345 	 *	\# n-th regular subexpression
1346 	 *	\E end \U, \L conversion
1347 	 *	\e end \U, \L conversion
1348 	 *	\l convert the next character to lower-case
1349 	 *	\L convert to lower-case, until \E, \e, or end of replacement
1350 	 *	\u convert the next character to upper-case
1351 	 *	\U convert to upper-case, until \E, \e, or end of replacement
1352 	 *
1353 	 * Otherwise, since this is the lowest level of replacement, discard
1354 	 * all escaping characters.  This (hopefully) matches historic practice.
1355 	 */
1356 #define	OUTCH(ch, nltrans) {						\
1357 	ARG_CHAR_T __ch = (ch);						\
1358 	e_key_t __value = KEY_VAL(sp, __ch);				\
1359 	if (nltrans && (__value == K_CR || __value == K_NL)) {		\
1360 		NEEDNEWLINE(sp);					\
1361 		sp->newl[sp->newl_cnt++] = lbclen;			\
1362 	} else if (conv != C_NOT_SET) {					\
1363 		switch (conv) {						\
1364 		case C_ONE_LOWER:					\
1365 			conv = C_NOT_SET;				\
1366 			/* FALLTHROUGH */				\
1367 		case C_LOWER:						\
1368 			if (ISUPPER(__ch))				\
1369 				__ch = TOLOWER(__ch);			\
1370 			break;						\
1371 		case C_ONE_UPPER:					\
1372 			conv = C_NOT_SET;				\
1373 			/* FALLTHROUGH */				\
1374 		case C_UPPER:						\
1375 			if (ISLOWER(__ch))				\
1376 				__ch = TOUPPER(__ch);			\
1377 			break;						\
1378 		default:						\
1379 			abort();					\
1380 		}							\
1381 	}								\
1382 	NEEDSP(sp, 1, p);						\
1383 	*p++ = __ch;							\
1384 	++lbclen;							\
1385 }
1386 	conv = C_NOT_SET;
1387 	for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1388 		switch (ch = *rp++) {
1389 		case '&':
1390 			if (O_ISSET(sp, O_MAGIC)) {
1391 				no = 0;
1392 				goto subzero;
1393 			}
1394 			break;
1395 		case '\\':
1396 			if (rpl == 0)
1397 				break;
1398 			--rpl;
1399 			switch (ch = *rp) {
1400 			case '&':
1401 				++rp;
1402 				if (!O_ISSET(sp, O_MAGIC)) {
1403 					no = 0;
1404 					goto subzero;
1405 				}
1406 				break;
1407 			case '0': case '1': case '2': case '3': case '4':
1408 			case '5': case '6': case '7': case '8': case '9':
1409 				no = *rp++ - '0';
1410 subzero:			if (match[no].rm_so == -1 ||
1411 			    	    match[no].rm_eo == -1)
1412 					break;
1413 				mlen = match[no].rm_eo - match[no].rm_so;
1414 				for (t = ip + match[no].rm_so; mlen--; ++t)
1415 					OUTCH((UCHAR_T)*t, 0);
1416 				continue;
1417 			case 'e':
1418 			case 'E':
1419 				++rp;
1420 				conv = C_NOT_SET;
1421 				continue;
1422 			case 'l':
1423 				++rp;
1424 				conv = C_ONE_LOWER;
1425 				continue;
1426 			case 'L':
1427 				++rp;
1428 				conv = C_LOWER;
1429 				continue;
1430 			case 'u':
1431 				++rp;
1432 				conv = C_ONE_UPPER;
1433 				continue;
1434 			case 'U':
1435 				++rp;
1436 				conv = C_UPPER;
1437 				continue;
1438 			default:
1439 				++rp;
1440 				break;
1441 			}
1442 		}
1443 		OUTCH(ch, 1);
1444 	}
1445 
1446 	*lbp = lb;			/* Update caller's information. */
1447 	*lbclenp = lbclen;
1448 	*lblenp = lblen;
1449 	return (0);
1450 }
1451