1 /*
2 * Regex and string management functions.
3 *
4 * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <string.h>
16
17 #include <types/global.h>
18 #include <common/config.h>
19 #include <common/defaults.h>
20 #include <common/regex.h>
21 #include <common/standard.h>
22 #include <proto/log.h>
23
24 /* regex trash buffer used by various regex tests */
25 THREAD_LOCAL regmatch_t pmatch[MAX_MATCH]; /* rm_so, rm_eo for regular expressions */
26
exp_replace(char * dst,unsigned int dst_size,char * src,const char * str,const regmatch_t * matches)27 int exp_replace(char *dst, unsigned int dst_size, char *src, const char *str, const regmatch_t *matches)
28 {
29 char *old_dst = dst;
30 char* dst_end = dst + dst_size;
31
32 while (*str) {
33 if (*str == '\\') {
34 str++;
35 if (!*str)
36 return -1;
37
38 if (isdigit((unsigned char)*str)) {
39 int len, num;
40
41 num = *str - '0';
42 str++;
43
44 if (matches[num].rm_eo > -1 && matches[num].rm_so > -1) {
45 len = matches[num].rm_eo - matches[num].rm_so;
46
47 if (dst + len >= dst_end)
48 return -1;
49
50 memcpy(dst, src + matches[num].rm_so, len);
51 dst += len;
52 }
53
54 } else if (*str == 'x') {
55 unsigned char hex1, hex2;
56 str++;
57
58 if (!*str)
59 return -1;
60
61 hex1 = toupper(*str++) - '0';
62
63 if (!*str)
64 return -1;
65
66 hex2 = toupper(*str++) - '0';
67
68 if (hex1 > 9) hex1 -= 'A' - '9' - 1;
69 if (hex2 > 9) hex2 -= 'A' - '9' - 1;
70
71 if (dst >= dst_end)
72 return -1;
73
74 *dst++ = (hex1<<4) + hex2;
75 } else {
76 if (dst >= dst_end)
77 return -1;
78
79 *dst++ = *str++;
80 }
81 } else {
82 if (dst >= dst_end)
83 return -1;
84
85 *dst++ = *str++;
86 }
87 }
88 if (dst >= dst_end)
89 return -1;
90
91 *dst = '\0';
92 return dst - old_dst;
93 }
94
95 /* returns NULL if the replacement string <str> is valid, or the pointer to the first error */
check_replace_string(const char * str)96 const char *check_replace_string(const char *str)
97 {
98 const char *err = NULL;
99 while (*str) {
100 if (*str == '\\') {
101 err = str; /* in case of a backslash, we return the pointer to it */
102 str++;
103 if (!*str)
104 return err;
105 else if (isdigit((unsigned char)*str))
106 err = NULL;
107 else if (*str == 'x') {
108 str++;
109 if (!ishex(*str))
110 return err;
111 str++;
112 if (!ishex(*str))
113 return err;
114 err = NULL;
115 }
116 else {
117 ha_warning("'\\%c' : deprecated use of a backslash before something not '\\','x' or a digit.\n", *str);
118 err = NULL;
119 }
120 }
121 str++;
122 }
123 return err;
124 }
125
126
127 /* returns the pointer to an error in the replacement string, or NULL if OK */
chain_regex(struct hdr_exp ** head,struct my_regex * preg,int action,const char * replace,void * cond)128 const char *chain_regex(struct hdr_exp **head, struct my_regex *preg,
129 int action, const char *replace, void *cond)
130 {
131 struct hdr_exp *exp;
132
133 if (replace != NULL) {
134 const char *err;
135 err = check_replace_string(replace);
136 if (err)
137 return err;
138 }
139
140 while (*head != NULL)
141 head = &(*head)->next;
142
143 exp = calloc(1, sizeof(*exp));
144
145 exp->preg = preg;
146 exp->replace = replace;
147 exp->action = action;
148 exp->cond = cond;
149 *head = exp;
150
151 return NULL;
152 }
153
154 /* This function apply regex. It take const null terminated char as input.
155 * If the function doesn't match, it returns false, else it returns true.
156 * When it is compiled with JIT, this function execute strlen on the subject.
157 * Currently the only supported flag is REG_NOTBOL.
158 */
regex_exec_match(const struct my_regex * preg,const char * subject,size_t nmatch,regmatch_t pmatch[],int flags)159 int regex_exec_match(const struct my_regex *preg, const char *subject,
160 size_t nmatch, regmatch_t pmatch[], int flags) {
161 #if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
162 int ret;
163 #ifdef USE_PCRE2
164 PCRE2_SIZE *matches;
165 pcre2_match_data *pm;
166 #else
167 int matches[MAX_MATCH * 3];
168 #endif
169 int enmatch;
170 int i;
171 int options;
172
173 /* Silently limit the number of allowed matches. max
174 * match i the maximum value for match, in fact this
175 * limit is not applyied.
176 */
177
178 enmatch = nmatch;
179 if (enmatch > MAX_MATCH)
180 enmatch = MAX_MATCH;
181
182 options = 0;
183 if (flags & REG_NOTBOL)
184 #ifdef USE_PCRE2
185 options |= PCRE2_NOTBOL;
186 #else
187 options |= PCRE_NOTBOL;
188 #endif
189
190 /* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
191 * pair that has been set. For example, if two substrings have been captured,
192 * the returned value is 3. If there are no capturing subpatterns, the return
193 * value from a successful match is 1, indicating that just the first pair of
194 * offsets has been set.
195 *
196 * It seems that this function returns 0 if it detect more matches than avalaible
197 * space in the matches array.
198 */
199 #ifdef USE_PCRE2
200 pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
201 ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject), 0, options, pm, NULL);
202
203 if (ret < 0) {
204 pcre2_match_data_free(pm);
205 return 0;
206 }
207
208 matches = pcre2_get_ovector_pointer(pm);
209 #else
210 ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3);
211
212 if (ret < 0)
213 return 0;
214 #endif
215
216 if (ret == 0)
217 ret = enmatch;
218
219 for (i=0; i<nmatch; i++) {
220 /* Copy offset. */
221 if (i < ret) {
222 pmatch[i].rm_so = matches[(i*2)];
223 pmatch[i].rm_eo = matches[(i*2)+1];
224 continue;
225 }
226 /* Set the unmatvh flag (-1). */
227 pmatch[i].rm_so = -1;
228 pmatch[i].rm_eo = -1;
229 }
230 #ifdef USE_PCRE2
231 pcre2_match_data_free(pm);
232 #endif
233 return 1;
234 #else
235 int match;
236
237 flags &= REG_NOTBOL;
238 match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
239 if (match == REG_NOMATCH)
240 return 0;
241 return 1;
242 #endif
243 }
244
245 /* This function apply regex. It take a "char *" ans length as input. The
246 * <subject> can be modified during the processing. If the function doesn't
247 * match, it returns false, else it returns true.
248 * When it is compiled with standard POSIX regex or PCRE, this function add
249 * a temporary null chracters at the end of the <subject>. The <subject> must
250 * have a real length of <length> + 1. Currently the only supported flag is
251 * REG_NOTBOL.
252 */
regex_exec_match2(const struct my_regex * preg,char * subject,int length,size_t nmatch,regmatch_t pmatch[],int flags)253 int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
254 size_t nmatch, regmatch_t pmatch[], int flags) {
255 #if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
256 int ret;
257 #ifdef USE_PCRE2
258 PCRE2_SIZE *matches;
259 pcre2_match_data *pm;
260 #else
261 int matches[MAX_MATCH * 3];
262 #endif
263 int enmatch;
264 int i;
265 int options;
266
267 /* Silently limit the number of allowed matches. max
268 * match i the maximum value for match, in fact this
269 * limit is not applyied.
270 */
271 enmatch = nmatch;
272 if (enmatch > MAX_MATCH)
273 enmatch = MAX_MATCH;
274
275 options = 0;
276 if (flags & REG_NOTBOL)
277 #ifdef USE_PCRE2
278 options |= PCRE2_NOTBOL;
279 #else
280 options |= PCRE_NOTBOL;
281 #endif
282
283 /* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
284 * pair that has been set. For example, if two substrings have been captured,
285 * the returned value is 3. If there are no capturing subpatterns, the return
286 * value from a successful match is 1, indicating that just the first pair of
287 * offsets has been set.
288 *
289 * It seems that this function returns 0 if it detect more matches than avalaible
290 * space in the matches array.
291 */
292 #ifdef USE_PCRE2
293 pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
294 ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length, 0, options, pm, NULL);
295
296 if (ret < 0) {
297 pcre2_match_data_free(pm);
298 return 0;
299 }
300
301 matches = pcre2_get_ovector_pointer(pm);
302 #else
303 ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3);
304 if (ret < 0)
305 return 0;
306 #endif
307
308 if (ret == 0)
309 ret = enmatch;
310
311 for (i=0; i<nmatch; i++) {
312 /* Copy offset. */
313 if (i < ret) {
314 pmatch[i].rm_so = matches[(i*2)];
315 pmatch[i].rm_eo = matches[(i*2)+1];
316 continue;
317 }
318 /* Set the unmatvh flag (-1). */
319 pmatch[i].rm_so = -1;
320 pmatch[i].rm_eo = -1;
321 }
322 #ifdef USE_PCRE2
323 pcre2_match_data_free(pm);
324 #endif
325 return 1;
326 #else
327 char old_char = subject[length];
328 int match;
329
330 flags &= REG_NOTBOL;
331 subject[length] = 0;
332 match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
333 subject[length] = old_char;
334 if (match == REG_NOMATCH)
335 return 0;
336 return 1;
337 #endif
338 }
339
regex_comp(const char * str,struct my_regex * regex,int cs,int cap,char ** err)340 int regex_comp(const char *str, struct my_regex *regex, int cs, int cap, char **err)
341 {
342 #if defined(USE_PCRE) || defined(USE_PCRE_JIT)
343 int flags = 0;
344 const char *error;
345 int erroffset;
346
347 if (!cs)
348 flags |= PCRE_CASELESS;
349 if (!cap)
350 flags |= PCRE_NO_AUTO_CAPTURE;
351
352 regex->reg = pcre_compile(str, flags, &error, &erroffset, NULL);
353 if (!regex->reg) {
354 memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%d)", str, error, erroffset);
355 return 0;
356 }
357
358 regex->extra = pcre_study(regex->reg, PCRE_STUDY_JIT_COMPILE, &error);
359 if (!regex->extra && error != NULL) {
360 pcre_free(regex->reg);
361 memprintf(err, "failed to compile regex '%s' (error=%s)", str, error);
362 return 0;
363 }
364 #elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
365 int flags = 0;
366 int errn;
367 #if defined(USE_PCRE2_JIT)
368 int jit;
369 #endif
370 PCRE2_UCHAR error[256];
371 PCRE2_SIZE erroffset;
372
373 if (!cs)
374 flags |= PCRE2_CASELESS;
375 if (!cap)
376 flags |= PCRE2_NO_AUTO_CAPTURE;
377
378 regex->reg = pcre2_compile((PCRE2_SPTR)str, PCRE2_ZERO_TERMINATED, flags, &errn, &erroffset, NULL);
379 if (!regex->reg) {
380 pcre2_get_error_message(errn, error, sizeof(error));
381 memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%zu)", str, error, erroffset);
382 return 0;
383 }
384
385 #if defined(USE_PCRE2_JIT)
386 jit = pcre2_jit_compile(regex->reg, PCRE2_JIT_COMPLETE);
387 /*
388 * We end if it is an error not related to lack of JIT support
389 * in a case of JIT support missing pcre2_jit_compile is "no-op"
390 */
391 if (jit < 0 && jit != PCRE2_ERROR_JIT_BADOPTION) {
392 pcre2_code_free(regex->reg);
393 memprintf(err, "regex '%s' jit compilation failed", str);
394 return 0;
395 }
396 #endif
397
398 #else
399 int flags = REG_EXTENDED;
400
401 if (!cs)
402 flags |= REG_ICASE;
403 if (!cap)
404 flags |= REG_NOSUB;
405
406 if (regcomp(®ex->regex, str, flags) != 0) {
407 memprintf(err, "regex '%s' is invalid", str);
408 return 0;
409 }
410 #endif
411 return 1;
412 }
413
414 __attribute__((constructor))
__regex_init(void)415 static void __regex_init(void)
416 {
417 char *ptr = NULL;
418
419 #ifdef USE_PCRE
420 memprintf(&ptr, "Built with PCRE version : %s", (HAP_XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
421 HAP_XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
422 HAP_XSTRING(PCRE_MAJOR.PCRE_MINOR) HAP_XSTRING(PCRE_PRERELEASE PCRE_DATE));
423 memprintf(&ptr, "%s\nRunning on PCRE version : %s", ptr, pcre_version());
424
425 memprintf(&ptr, "%s\nPCRE library supports JIT : %s", ptr,
426 #ifdef USE_PCRE_JIT
427 ({
428 int r;
429 pcre_config(PCRE_CONFIG_JIT, &r);
430 r ? "yes" : "no (libpcre build without JIT?)";
431 })
432 #else
433 "no (USE_PCRE_JIT not set)"
434 #endif
435 );
436 #endif /* USE_PCRE */
437
438 #ifdef USE_PCRE2
439 memprintf(&ptr, "Built with PCRE2 version : %s", (HAP_XSTRING(Z PCRE2_PRERELEASE)[1] == 0) ?
440 HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
441 HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR) HAP_XSTRING(PCRE2_PRERELEASE PCRE2_DATE));
442 memprintf(&ptr, "%s\nPCRE2 library supports JIT : %s", ptr,
443 #ifdef USE_PCRE2_JIT
444 ({
445 int r;
446 pcre2_config(PCRE2_CONFIG_JIT, &r);
447 r ? "yes" : "no (libpcre2 build without JIT?)";
448 })
449 #else
450 "no (USE_PCRE2_JIT not set)"
451 #endif
452 );
453 #endif /* USE_PCRE2 */
454
455 #if !defined(USE_PCRE) && !defined(USE_PCRE2)
456 memprintf(&ptr, "Built without PCRE or PCRE2 support (using libc's regex instead)");
457 #endif
458 hap_register_build_opts(ptr, 1);
459 }
460
461 /*
462 * Local variables:
463 * c-indent-level: 8
464 * c-basic-offset: 8
465 * End:
466 */
467