1 /*
2  * Regex and string management functions.
3  *
4  * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 #include <types/global.h>
18 #include <common/config.h>
19 #include <common/defaults.h>
20 #include <common/regex.h>
21 #include <common/standard.h>
22 #include <proto/log.h>
23 
24 /* regex trash buffer used by various regex tests */
25 THREAD_LOCAL regmatch_t pmatch[MAX_MATCH];  /* rm_so, rm_eo for regular expressions */
26 
exp_replace(char * dst,unsigned int dst_size,char * src,const char * str,const regmatch_t * matches)27 int exp_replace(char *dst, unsigned int dst_size, char *src, const char *str, const regmatch_t *matches)
28 {
29 	char *old_dst = dst;
30 	char* dst_end = dst + dst_size;
31 
32 	while (*str) {
33 		if (*str == '\\') {
34 			str++;
35 			if (!*str)
36 				return -1;
37 
38 			if (isdigit((unsigned char)*str)) {
39 				int len, num;
40 
41 				num = *str - '0';
42 				str++;
43 
44 				if (matches[num].rm_eo > -1 && matches[num].rm_so > -1) {
45 					len = matches[num].rm_eo - matches[num].rm_so;
46 
47 					if (dst + len >= dst_end)
48 						return -1;
49 
50 					memcpy(dst, src + matches[num].rm_so, len);
51 					dst += len;
52 				}
53 
54 			} else if (*str == 'x') {
55 				unsigned char hex1, hex2;
56 				str++;
57 
58 				if (!*str)
59 					return -1;
60 
61 				hex1 = toupper(*str++) - '0';
62 
63 				if (!*str)
64 					return -1;
65 
66 				hex2 = toupper(*str++) - '0';
67 
68 				if (hex1 > 9) hex1 -= 'A' - '9' - 1;
69 				if (hex2 > 9) hex2 -= 'A' - '9' - 1;
70 
71 				if (dst >= dst_end)
72 					return -1;
73 
74 				*dst++ = (hex1<<4) + hex2;
75 			} else {
76 				if (dst >= dst_end)
77 					return -1;
78 
79 				*dst++ = *str++;
80 			}
81 		} else {
82 			if (dst >= dst_end)
83 				return -1;
84 
85 			*dst++ = *str++;
86 		}
87 	}
88 	if (dst >= dst_end)
89 		return -1;
90 
91 	*dst = '\0';
92 	return dst - old_dst;
93 }
94 
95 /* returns NULL if the replacement string <str> is valid, or the pointer to the first error */
check_replace_string(const char * str)96 const char *check_replace_string(const char *str)
97 {
98 	const char *err = NULL;
99 	while (*str) {
100 		if (*str == '\\') {
101 			err = str; /* in case of a backslash, we return the pointer to it */
102 			str++;
103 			if (!*str)
104 				return err;
105 			else if (isdigit((unsigned char)*str))
106 				err = NULL;
107 			else if (*str == 'x') {
108 				str++;
109 				if (!ishex(*str))
110 					return err;
111 				str++;
112 				if (!ishex(*str))
113 					return err;
114 				err = NULL;
115 			}
116 			else {
117 				ha_warning("'\\%c' : deprecated use of a backslash before something not '\\','x' or a digit.\n", *str);
118 				err = NULL;
119 			}
120 		}
121 		str++;
122 	}
123 	return err;
124 }
125 
126 
127 /* returns the pointer to an error in the replacement string, or NULL if OK */
chain_regex(struct hdr_exp ** head,struct my_regex * preg,int action,const char * replace,void * cond)128 const char *chain_regex(struct hdr_exp **head, struct my_regex *preg,
129 			int action, const char *replace, void *cond)
130 {
131 	struct hdr_exp *exp;
132 
133 	if (replace != NULL) {
134 		const char *err;
135 		err = check_replace_string(replace);
136 		if (err)
137 			return err;
138 	}
139 
140 	while (*head != NULL)
141 		head = &(*head)->next;
142 
143 	exp = calloc(1, sizeof(*exp));
144 
145 	exp->preg = preg;
146 	exp->replace = replace;
147 	exp->action = action;
148 	exp->cond = cond;
149 	*head = exp;
150 
151 	return NULL;
152 }
153 
154 /* This function apply regex. It take const null terminated char as input.
155  * If the function doesn't match, it returns false, else it returns true.
156  * When it is compiled with JIT, this function execute strlen on the subject.
157  * Currently the only supported flag is REG_NOTBOL.
158  */
regex_exec_match(const struct my_regex * preg,const char * subject,size_t nmatch,regmatch_t pmatch[],int flags)159 int regex_exec_match(const struct my_regex *preg, const char *subject,
160                      size_t nmatch, regmatch_t pmatch[], int flags) {
161 #if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
162 	int ret;
163 #ifdef USE_PCRE2
164 	PCRE2_SIZE *matches;
165 	pcre2_match_data *pm;
166 #else
167 	int matches[MAX_MATCH * 3];
168 #endif
169 	int enmatch;
170 	int i;
171 	int options;
172 
173 	/* Silently limit the number of allowed matches. max
174 	 * match i the maximum value for match, in fact this
175 	 * limit is not applyied.
176 	 */
177 
178 	enmatch = nmatch;
179 	if (enmatch > MAX_MATCH)
180 		enmatch = MAX_MATCH;
181 
182 	options = 0;
183 	if (flags & REG_NOTBOL)
184 #ifdef USE_PCRE2
185 		options |= PCRE2_NOTBOL;
186 #else
187 		options |= PCRE_NOTBOL;
188 #endif
189 
190 	/* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
191 	 * pair that has been set. For example, if two substrings have been captured,
192 	 * the returned value is 3. If there are no capturing subpatterns, the return
193 	 * value from a successful match is 1, indicating that just the first pair of
194 	 * offsets has been set.
195 	 *
196 	 * It seems that this function returns 0 if it detect more matches than avalaible
197 	 * space in the matches array.
198 	 */
199 #ifdef USE_PCRE2
200 	pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
201 	ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject), 0, options, pm, NULL);
202 
203 	if (ret < 0) {
204 		pcre2_match_data_free(pm);
205 		return 0;
206 	}
207 
208 	matches = pcre2_get_ovector_pointer(pm);
209 #else
210 	ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3);
211 
212 	if (ret < 0)
213 		return 0;
214 #endif
215 
216 	if (ret == 0)
217 		ret = enmatch;
218 
219 	for (i=0; i<nmatch; i++) {
220 		/* Copy offset. */
221 		if (i < ret) {
222 			pmatch[i].rm_so = matches[(i*2)];
223 			pmatch[i].rm_eo = matches[(i*2)+1];
224 			continue;
225 		}
226 		/* Set the unmatvh flag (-1). */
227 		pmatch[i].rm_so = -1;
228 		pmatch[i].rm_eo = -1;
229 	}
230 #ifdef USE_PCRE2
231 	pcre2_match_data_free(pm);
232 #endif
233 	return 1;
234 #else
235 	int match;
236 
237 	flags &= REG_NOTBOL;
238 	match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
239 	if (match == REG_NOMATCH)
240 		return 0;
241 	return 1;
242 #endif
243 }
244 
245 /* This function apply regex. It take a "char *" ans length as input. The
246  * <subject> can be modified during the processing. If the function doesn't
247  * match, it returns false, else it returns true.
248  * When it is compiled with standard POSIX regex or PCRE, this function add
249  * a temporary null chracters at the end of the <subject>. The <subject> must
250  * have a real length of <length> + 1. Currently the only supported flag is
251  * REG_NOTBOL.
252  */
regex_exec_match2(const struct my_regex * preg,char * subject,int length,size_t nmatch,regmatch_t pmatch[],int flags)253 int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
254                       size_t nmatch, regmatch_t pmatch[], int flags) {
255 #if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
256 	int ret;
257 #ifdef USE_PCRE2
258 	PCRE2_SIZE *matches;
259 	pcre2_match_data *pm;
260 #else
261 	int matches[MAX_MATCH * 3];
262 #endif
263 	int enmatch;
264 	int i;
265 	int options;
266 
267 	/* Silently limit the number of allowed matches. max
268 	 * match i the maximum value for match, in fact this
269 	 * limit is not applyied.
270 	 */
271 	enmatch = nmatch;
272 	if (enmatch > MAX_MATCH)
273 		enmatch = MAX_MATCH;
274 
275 	options = 0;
276 	if (flags & REG_NOTBOL)
277 #ifdef USE_PCRE2
278 		options |= PCRE2_NOTBOL;
279 #else
280 		options |= PCRE_NOTBOL;
281 #endif
282 
283 	/* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
284 	 * pair that has been set. For example, if two substrings have been captured,
285 	 * the returned value is 3. If there are no capturing subpatterns, the return
286 	 * value from a successful match is 1, indicating that just the first pair of
287 	 * offsets has been set.
288 	 *
289 	 * It seems that this function returns 0 if it detect more matches than avalaible
290 	 * space in the matches array.
291 	 */
292 #ifdef USE_PCRE2
293 	pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
294 	ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length, 0, options, pm, NULL);
295 
296 	if (ret < 0) {
297 		pcre2_match_data_free(pm);
298 		return 0;
299 	}
300 
301 	matches = pcre2_get_ovector_pointer(pm);
302 #else
303 	ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3);
304 	if (ret < 0)
305 		return 0;
306 #endif
307 
308 	if (ret == 0)
309 		ret = enmatch;
310 
311 	for (i=0; i<nmatch; i++) {
312 		/* Copy offset. */
313 		if (i < ret) {
314 			pmatch[i].rm_so = matches[(i*2)];
315 			pmatch[i].rm_eo = matches[(i*2)+1];
316 			continue;
317 		}
318 		/* Set the unmatvh flag (-1). */
319 		pmatch[i].rm_so = -1;
320 		pmatch[i].rm_eo = -1;
321 	}
322 #ifdef USE_PCRE2
323 	pcre2_match_data_free(pm);
324 #endif
325 	return 1;
326 #else
327 	char old_char = subject[length];
328 	int match;
329 
330 	flags &= REG_NOTBOL;
331 	subject[length] = 0;
332 	match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
333 	subject[length] = old_char;
334 	if (match == REG_NOMATCH)
335 		return 0;
336 	return 1;
337 #endif
338 }
339 
regex_comp(const char * str,struct my_regex * regex,int cs,int cap,char ** err)340 int regex_comp(const char *str, struct my_regex *regex, int cs, int cap, char **err)
341 {
342 #if defined(USE_PCRE) || defined(USE_PCRE_JIT)
343 	int flags = 0;
344 	const char *error;
345 	int erroffset;
346 
347 	if (!cs)
348 		flags |= PCRE_CASELESS;
349 	if (!cap)
350 		flags |= PCRE_NO_AUTO_CAPTURE;
351 
352 	regex->reg = pcre_compile(str, flags, &error, &erroffset, NULL);
353 	if (!regex->reg) {
354 		memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%d)", str, error, erroffset);
355 		return 0;
356 	}
357 
358 	regex->extra = pcre_study(regex->reg, PCRE_STUDY_JIT_COMPILE, &error);
359 	if (!regex->extra && error != NULL) {
360 		pcre_free(regex->reg);
361 		memprintf(err, "failed to compile regex '%s' (error=%s)", str, error);
362 		return 0;
363 	}
364 #elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
365 	int flags = 0;
366 	int errn;
367 #if defined(USE_PCRE2_JIT)
368 	int jit;
369 #endif
370 	PCRE2_UCHAR error[256];
371 	PCRE2_SIZE erroffset;
372 
373 	if (!cs)
374 		flags |= PCRE2_CASELESS;
375 	if (!cap)
376 		flags |= PCRE2_NO_AUTO_CAPTURE;
377 
378 	regex->reg = pcre2_compile((PCRE2_SPTR)str, PCRE2_ZERO_TERMINATED, flags, &errn, &erroffset, NULL);
379 	if (!regex->reg) {
380 		pcre2_get_error_message(errn, error, sizeof(error));
381 		memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%zu)", str, error, erroffset);
382 		return 0;
383 	}
384 
385 #if defined(USE_PCRE2_JIT)
386 	jit = pcre2_jit_compile(regex->reg, PCRE2_JIT_COMPLETE);
387 	/*
388 	 * We end if it is an error not related to lack of JIT support
389 	 * in a case of JIT support missing pcre2_jit_compile is "no-op"
390 	 */
391 	if (jit < 0 && jit != PCRE2_ERROR_JIT_BADOPTION) {
392 		pcre2_code_free(regex->reg);
393 		memprintf(err, "regex '%s' jit compilation failed", str);
394 		return 0;
395 	}
396 #endif
397 
398 #else
399 	int flags = REG_EXTENDED;
400 
401 	if (!cs)
402 		flags |= REG_ICASE;
403 	if (!cap)
404 		flags |= REG_NOSUB;
405 
406 	if (regcomp(&regex->regex, str, flags) != 0) {
407 		memprintf(err, "regex '%s' is invalid", str);
408 		return 0;
409 	}
410 #endif
411 	return 1;
412 }
413 
414 __attribute__((constructor))
__regex_init(void)415 static void __regex_init(void)
416 {
417 	char *ptr = NULL;
418 
419 #ifdef USE_PCRE
420 	memprintf(&ptr, "Built with PCRE version : %s", (HAP_XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
421 		HAP_XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
422 		HAP_XSTRING(PCRE_MAJOR.PCRE_MINOR) HAP_XSTRING(PCRE_PRERELEASE PCRE_DATE));
423 	memprintf(&ptr, "%s\nRunning on PCRE version : %s", ptr, pcre_version());
424 
425 	memprintf(&ptr, "%s\nPCRE library supports JIT : %s", ptr,
426 #ifdef USE_PCRE_JIT
427 		  ({
428 			  int r;
429 			  pcre_config(PCRE_CONFIG_JIT, &r);
430 			  r ? "yes" : "no (libpcre build without JIT?)";
431 		  })
432 #else
433 		  "no (USE_PCRE_JIT not set)"
434 #endif
435 		  );
436 #endif /* USE_PCRE */
437 
438 #ifdef USE_PCRE2
439 	memprintf(&ptr, "Built with PCRE2 version : %s", (HAP_XSTRING(Z PCRE2_PRERELEASE)[1] == 0) ?
440 	          HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
441 	          HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR) HAP_XSTRING(PCRE2_PRERELEASE PCRE2_DATE));
442 	memprintf(&ptr, "%s\nPCRE2 library supports JIT : %s", ptr,
443 #ifdef USE_PCRE2_JIT
444 		  ({
445 			  int r;
446 			  pcre2_config(PCRE2_CONFIG_JIT, &r);
447 			  r ? "yes" : "no (libpcre2 build without JIT?)";
448 		  })
449 #else
450 		  "no (USE_PCRE2_JIT not set)"
451 #endif
452 		  );
453 #endif /* USE_PCRE2 */
454 
455 #if !defined(USE_PCRE) && !defined(USE_PCRE2)
456 	memprintf(&ptr, "Built without PCRE or PCRE2 support (using libc's regex instead)");
457 #endif
458 	hap_register_build_opts(ptr, 1);
459 }
460 
461 /*
462  * Local variables:
463  *  c-indent-level: 8
464  *  c-basic-offset: 8
465  * End:
466  */
467