xref: /original-bsd/contrib/ed/re.c (revision 95dec232)
1 /*-
2  * Copyright (c) 1992, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rodney Ruddock of the University of Guelph.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #ifndef lint
12 static char sccsid[] = "@(#)re.c	8.1 (Berkeley) 05/31/93";
13 #endif /* not lint */
14 
15 #include <sys/types.h>
16 
17 #include <regex.h>
18 #include <setjmp.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 
23 #ifdef DBI
24 #include <db.h>
25 #endif
26 
27 #include "ed.h"
28 #include "extern.h"
29 
30 /*
31  * This finds the n-th occurrence of an RE in a line. If '^' was at the start
32  * of the RE then look once (in case n=1). There is no standard RE interface
33  * to do this.  Returns 0 for success.  NOTE: the #ifdef REG_STARTEND is if
34  * the regex package has the BSD extensions to it.
35  */
36 int
37 #ifdef REG_STARTEND
38 regexec_n(reprecomp, strg, num_subexp, reprematch, flags, n, len, pass)
39 #else
40 regexec_n(reprecomp, strg, num_subexp, reprematch, flags, n, offset, pass)
41 #endif
42 	regex_t *reprecomp;
43 	char *strg;
44 	size_t num_subexp;
45 	regmatch_t reprematch[];
46 	int flags, n;
47 #ifdef REG_STARTEND
48 	size_t len;
49 #else
50 	size_t *offset;
51 #endif
52 	int pass; /* if pass == 0 .rm_so user set, else set default */
53 {
54 	int l_cnt, l_flag=0;
55 #ifndef REG_STARTEND
56 	char *l_offset=strg, *l_end;
57 #endif
58 
59 	if (n <= 0)
60 		return (REG_NOMATCH);
61 #ifdef REG_STARTEND
62 	flags = (flags | REG_STARTEND);
63 	if (pass)
64 		reprematch[0].rm_so = 0;
65 	reprematch[0].rm_eo = len;
66 #else
67 	strg = &strg[offset];
68 	l_end = &strg[strlen(strg)];
69 #endif
70 	for (l_cnt = 0;;) {
71 		if (regexec(reprecomp,
72 		    strg, num_subexp, reprematch, flags) == 0)
73 			l_cnt++;
74 		else
75 			return (REG_NOMATCH);
76 
77 		if (l_cnt >= n)
78 			break;
79 #ifdef REG_STARTEND
80 		if (reprematch[0].rm_so == reprematch[0].rm_eo)
81 			reprematch[0].rm_eo++;
82 		reprematch[0].rm_so = reprematch[0].rm_eo;
83 		if (reprematch[0].rm_so == len)
84 			return (REG_NOMATCH);
85 		reprematch[0].rm_eo = len;
86 #else
87 		strg = &strg[reprematch[0].rm_eo];
88 		if (strg == l_end)
89                         return (REG_NOMATCH);
90 #endif
91 		/* if a "^" started the current RE we only loop once */
92 		if (RE_sol)
93 			return (REG_NOMATCH);
94 	}
95 #ifndef REG_STARTEND
96 	*offset = (size_t) (strg - l_offset);
97 #endif
98 	return (0);		/* success */
99 }
100 
101 /*
102  * Replace in the line specified at the found locations with the
103  * specified replacement. There is no standard RE interface to do
104  * this.
105  */
106 char *
107 #ifdef REG_STARTEND
108 re_replace(line, num_subexp, repmatch, replacer)
109 #else
110 re_replace(line, num_subexp, repmatch, replacer, offset)
111 #endif
112 	char *line;
113 	size_t num_subexp;
114 	regmatch_t repmatch[];
115 	char *replacer;
116 #ifndef REG_STARTEND
117 	size_t offset;
118 #endif
119 {
120 	static char *l_prev_r = NULL;
121 	static int l_prev_r_flag = 0;
122 	regoff_t l_len_before, l_len_whole, l_slen[RE_SEC];
123 	int l_cnt, l_len_new = 0, l_new_rm_eo = 0;
124 	char *l_string, *l_head;
125 
126 	if (l_prev_r_flag == 0) {
127 		l_prev_r_flag = 1;
128 		l_prev_r = NULL;
129 	}
130 	l_head = replacer;
131 	/* Length of what stays the same before. */
132 	l_len_before = (repmatch[0].rm_so);
133 	l_len_whole = strlen(line);
134 	if (num_subexp > RE_SEC - 1)
135 		num_subexp = RE_SEC - 1;
136 	for (l_cnt = 0; l_cnt <= num_subexp; l_cnt++)
137 		l_slen[l_cnt] =
138 		    (repmatch[l_cnt].rm_eo) - (repmatch[l_cnt].rm_so);
139 
140 	/*
141 	 * l_slen[0] == len of what is to be replaced.
142 	 * l_slen[1-9] == len of each backref.
143 	 */
144 	if ((*replacer == '%') && (replacer[1] == '\0')) {
145 		l_string = calloc(l_len_whole - l_slen[0] +
146 		    (strlen(l_prev_r)) + 2, sizeof(char));
147 		if (l_string == NULL) {
148 			/* *errnum = -1; */
149 			strcpy(help_msg, "out of memory error");
150 			return (NULL);
151 		}
152 #ifdef REG_STARTEND
153 		memmove(l_string, line, (int) l_len_before);
154 #else
155 		memmove(l_string, line, (int) l_len_before + offset);
156 #endif
157 #ifdef REG_STARTEND
158 		l_string[l_len_before] = '\0';
159 #else
160 		l_string[l_len_before + offset] = '\0';
161 #endif
162 		strcat(l_string, l_prev_r);
163 		l_new_rm_eo = strlen(l_string);
164 #ifdef REG_STARTEND
165 		strcat(l_string, &line[repmatch[0].rm_eo]);
166 #else
167 		strcat(l_string, &line[repmatch[0].rm_eo + offset]);
168 #endif
169 		repmatch[0].rm_eo = l_new_rm_eo;
170 		return (l_string);
171 	}
172 
173 	/* Figure out length of new line first. */
174 	while (*replacer != '\0') {
175 		/* Add in the length of the RE match. */
176 		if (*replacer == '&')
177 			l_len_new = l_len_new + l_slen[0];
178 		/* Add in the length of a backref. */
179 		else if (*replacer == '\\') {
180 			replacer++;
181 			if ((*replacer > '0') &&
182 			    (*replacer < ('9' + 1)) &&
183 			    (repmatch[*replacer - '0'].rm_so > -1))
184 				/* -1 - -1 = 0 */
185 				l_len_new = l_len_new + l_slen[*replacer - '0'];
186 			else
187 				l_len_new++;
188 		} else
189 			l_len_new++;
190 		replacer++;
191 	}
192 
193 	/* Create the line of an appropriate length. */
194 	l_string =
195 	    calloc(l_len_whole - l_slen[0] + l_len_new + 2, sizeof(char));
196 	if (l_string == NULL) {
197 		strcpy(help_msg, "out of memory error");
198 		return (NULL);
199 	}
200 	if (l_prev_r != NULL)
201 		free(l_prev_r);
202 	l_prev_r = calloc(l_len_new + 2, sizeof(char));
203 	if (l_prev_r == NULL) {
204 		strcpy(help_msg, "out of memory error");
205 		return (NULL);
206 	}
207 	/* Copy over what doesn't change before the chars to be replaced. */
208 #ifdef REG_STARTEND
209 	memmove(l_string, line, (size_t)l_len_before);
210 #else
211 	memmove(l_string, line, l_len_before + offset);
212 #endif
213 #ifdef REG_STARTEND
214 	l_string[l_len_before] = '\0';
215 #else
216 	l_string[l_len_before + offset] = '\0';
217 #endif
218 	l_prev_r[0] = '\0';
219 
220 	/* Make the replacement. */
221 	replacer = l_head;
222 	while (*replacer != '\0') {
223 		/* Put what matched the RE into the replacement. */
224 		if (*replacer == '&') {
225 #ifdef REG_STARTEND
226 			strncat(l_string,
227 			    &line[repmatch[0].rm_so], (int)l_slen[0]);
228 			strncat(l_prev_r,
229 			    &line[repmatch[0].rm_so], (int) l_slen[0]);
230 #else
231 			strncat(l_string,
232 			    &line[repmatch[0].rm_so + offset], (int) l_slen[0]);
233 			strncat(l_prev_r,
234 			    &line[repmatch[0].rm_so + offset], (int) l_slen[0]);
235 #endif
236 		} else if (*replacer == '\\') {
237 			/* Likely a backref to be included. */
238 			replacer++;
239 			if ((*replacer > '0') && (*replacer < ('9' + 1)) &&
240 			    (repmatch[*replacer - '0'].rm_so > -1)) {
241 #ifdef REG_STARTEND
242 				strncat(l_string,
243 				    &line[repmatch[*replacer - '0'].rm_so],
244 				    (int) l_slen[*replacer - '0']);
245 				strncat(l_prev_r,
246 				    &line[repmatch[*replacer - '0'].rm_so],
247 				    (int) l_slen[*replacer - '0']);
248 #else
249 				strncat(l_string,
250 				    &line[repmatch[*replacer - '0'].rm_so +
251 				    offset], (int) l_slen[*replacer - '0']);
252 				strncat(l_prev_r,
253 				    &line[repmatch[*replacer - '0'].rm_so +
254 				    offset], (int) l_slen[*replacer - '0']);
255 #endif
256 			}
257 			/* Put the replacement in. */
258 			else {
259 				strncat(l_string, replacer, 1);
260 				strncat(l_prev_r, replacer, 1);
261 			}
262 		}
263 		/* Put the replacement in. */
264 		else {
265 			strncat(l_string, replacer, 1);
266 			strncat(l_prev_r, replacer, 1);
267 		}
268 		replacer++;
269 	}
270 
271 	l_new_rm_eo = strlen(l_string);
272 
273 	/* Copy over what was after the chars to be replaced to the new line. */
274 #ifdef REG_STARTEND
275 	strcat(l_string, &line[repmatch[0].rm_eo]);
276 #else
277 	strcat(l_string, &line[repmatch[0].rm_eo + offset]);
278 #endif
279 
280 	repmatch[0].rm_eo = l_new_rm_eo;	/* Update rm_eo. */
281 #ifndef REG_STARTEND
282 	offset += l_new_rm_eo;			/* Update offset. */
283 #endif
284 	return (l_string);			/* Return the new line. */
285 }
286