1 /*
2 ** Zabbix
3 ** Copyright (C) 2001-2021 Zabbix SIA
4 **
5 ** This program is free software; you can redistribute it and/or modify
6 ** it under the terms of the GNU General Public License as published by
7 ** the Free Software Foundation; either version 2 of the License, or
8 ** (at your option) any later version.
9 **
10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ** GNU General Public License for more details.
14 **
15 ** You should have received a copy of the GNU General Public License
16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 **/
19
20 #include "common.h"
21 #include "zbxregexp.h"
22 #include "log.h"
23
24 struct zbx_regexp
25 {
26 pcre *pcre_regexp;
27 struct pcre_extra *extra;
28 };
29
30 /* maps to ovector of pcre_exec() */
31 typedef struct
32 {
33 int rm_so;
34 int rm_eo;
35 }
36 zbx_regmatch_t;
37
38 #define ZBX_REGEXP_GROUPS_MAX 10 /* Max number of supported capture groups in regular expressions. */
39 /* Group \0 contains the matching part of string, groups \1 ...\9 */
40 /* contain captured groups (substrings). */
41
42 /******************************************************************************
43 * *
44 * Function: regexp_compile *
45 * *
46 * Purpose: compiles a regular expression *
47 * *
48 * Parameters: *
49 * pattern - [IN] regular expression as a text string. Empty *
50 * string ("") is allowed, it will match everything. *
51 * NULL is not allowed. *
52 * flags - [IN] regexp compilation parameters passed to pcre_compile. *
53 * PCRE_CASELESS, PCRE_NO_AUTO_CAPTURE, PCRE_MULTILINE. *
54 * regexp - [OUT] output regexp. *
55 * err_msg_static - [OUT] error message if any. Do not deallocate with *
56 * zbx_free(). *
57 * *
58 * Return value: SUCCEED or FAIL *
59 * *
60 ******************************************************************************/
regexp_compile(const char * pattern,int flags,zbx_regexp_t ** regexp,const char ** err_msg_static)61 static int regexp_compile(const char *pattern, int flags, zbx_regexp_t **regexp, const char **err_msg_static)
62 {
63 int error_offset = -1;
64 pcre *pcre_regexp;
65 struct pcre_extra *extra;
66
67 #ifdef PCRE_NO_AUTO_CAPTURE
68 /* If PCRE_NO_AUTO_CAPTURE bit is set in 'flags' but regular expression contains references to numbered */
69 /* capturing groups then reset PCRE_NO_AUTO_CAPTURE bit. Otherwise the regular expression might not compile. */
70
71 if (0 != (flags & PCRE_NO_AUTO_CAPTURE))
72 {
73 const char *pstart = pattern, *offset;
74
75 while (NULL != (offset = strchr(pstart, '\\')))
76 {
77 offset++;
78
79 if (('1' <= *offset && *offset <= '9') || 'g' == *offset)
80 {
81 flags ^= PCRE_NO_AUTO_CAPTURE;
82 break;
83 }
84
85 if (*offset == '\\')
86 offset++;
87
88 pstart = offset;
89 }
90 }
91 #endif
92 if (NULL == (pcre_regexp = pcre_compile(pattern, flags, err_msg_static, &error_offset, NULL)))
93 return FAIL;
94
95 if (NULL != regexp)
96 {
97 if (NULL == (extra = pcre_study(pcre_regexp, 0, err_msg_static)) && NULL != *err_msg_static)
98 {
99 pcre_free(pcre_regexp);
100 return FAIL;
101 }
102
103 *regexp = (zbx_regexp_t *)zbx_malloc(NULL, sizeof(zbx_regexp_t));
104 (*regexp)->pcre_regexp = pcre_regexp;
105 (*regexp)->extra = extra;
106 }
107 else
108 pcre_free(pcre_regexp);
109
110 return SUCCEED;
111 }
112
113 /*******************************************************
114 * *
115 * Function: zbx_regexp_compile *
116 * *
117 * Purpose: public wrapper for regexp_compile *
118 * *
119 *******************************************************/
zbx_regexp_compile(const char * pattern,zbx_regexp_t ** regexp,const char ** err_msg_static)120 int zbx_regexp_compile(const char *pattern, zbx_regexp_t **regexp, const char **err_msg_static)
121 {
122 #ifdef PCRE_NO_AUTO_CAPTURE
123 return regexp_compile(pattern, PCRE_MULTILINE | PCRE_NO_AUTO_CAPTURE, regexp, err_msg_static);
124 #else
125 return regexp_compile(pattern, PCRE_MULTILINE, regexp, err_msg_static);
126 #endif
127 }
128
129 /*******************************************************
130 * *
131 * Function: zbx_regexp_compile_ext *
132 * *
133 * Purpose: public wrapper for regexp_compile *
134 * *
135 *******************************************************/
zbx_regexp_compile_ext(const char * pattern,zbx_regexp_t ** regexp,int flags,const char ** err_msg_static)136 int zbx_regexp_compile_ext(const char *pattern, zbx_regexp_t **regexp, int flags, const char **err_msg_static)
137 {
138 return regexp_compile(pattern, flags, regexp, err_msg_static);
139 }
140
141 /****************************************************************************************************
142 * *
143 * Function: regexp_prepare *
144 * *
145 * Purpose: wrapper for zbx_regexp_compile. Caches and reuses the last used regexp. *
146 * *
147 ****************************************************************************************************/
regexp_prepare(const char * pattern,int flags,zbx_regexp_t ** regexp,const char ** err_msg_static)148 static int regexp_prepare(const char *pattern, int flags, zbx_regexp_t **regexp, const char **err_msg_static)
149 {
150 static ZBX_THREAD_LOCAL zbx_regexp_t *curr_regexp = NULL;
151 static ZBX_THREAD_LOCAL char *curr_pattern = NULL;
152 static ZBX_THREAD_LOCAL int curr_flags = 0;
153 int ret = SUCCEED;
154
155 if (NULL == curr_regexp || 0 != strcmp(curr_pattern, pattern) || curr_flags != flags)
156 {
157 if (NULL != curr_regexp)
158 {
159 zbx_regexp_free(curr_regexp);
160 zbx_free(curr_pattern);
161 }
162
163 curr_regexp = NULL;
164 curr_pattern = NULL;
165 curr_flags = 0;
166
167 if (SUCCEED == regexp_compile(pattern, flags, &curr_regexp, err_msg_static))
168 {
169 curr_pattern = zbx_strdup(curr_pattern, pattern);
170 curr_flags = flags;
171 }
172 else
173 ret = FAIL;
174 }
175
176 *regexp = curr_regexp;
177 return ret;
178 }
179
180 /***********************************************************************************
181 * *
182 * Function: regexp_exec *
183 * *
184 * Purpose: wrapper for pcre_exec(), searches for a given pattern, specified by *
185 * regexp, in the string *
186 * *
187 * Parameters: *
188 * string - [IN] string to be matched against 'regexp' *
189 * regexp - [IN] precompiled regular expression *
190 * flags - [IN] execution flags for matching *
191 * count - [IN] count of elements in matches array *
192 * matches - [OUT] matches (can be NULL if matching results are *
193 * not required) *
194 * *
195 * Return value: ZBX_REGEXP_MATCH - successful match *
196 * ZBX_REGEXP_NO_MATCH - no match *
197 * FAIL - error occurred *
198 * *
199 ***********************************************************************************/
regexp_exec(const char * string,const zbx_regexp_t * regexp,int flags,int count,zbx_regmatch_t * matches)200 static int regexp_exec(const char *string, const zbx_regexp_t *regexp, int flags, int count,
201 zbx_regmatch_t *matches)
202 {
203 #define MATCHES_BUFF_SIZE (ZBX_REGEXP_GROUPS_MAX * 3) /* see pcre_exec() in "man pcreapi" why 3 */
204
205 int result, r;
206 static ZBX_THREAD_LOCAL int matches_buff[MATCHES_BUFF_SIZE];
207 int *ovector = NULL;
208 int ovecsize = 3 * count; /* see pcre_exec() in "man pcreapi" why 3 */
209 struct pcre_extra extra, *pextra;
210 #if defined(PCRE_EXTRA_MATCH_LIMIT) && defined(PCRE_EXTRA_MATCH_LIMIT_RECURSION) && !defined(_WINDOWS) && !defined(__MINGW32__)
211 static unsigned long int recursion_limit = 0;
212
213 if (0 == recursion_limit)
214 {
215 struct rlimit rlim;
216
217 /* calculate recursion limit, PCRE man page suggests to reckon on about 500 bytes per recursion */
218 /* but to be on the safe side - reckon on 800 bytes and do not set limit higher than 100000 */
219 if (0 == getrlimit(RLIMIT_STACK, &rlim))
220 recursion_limit = rlim.rlim_cur < 80000000 ? rlim.rlim_cur / 800 : 100000;
221 else
222 recursion_limit = 10000; /* if stack size cannot be retrieved then assume ~8 MB */
223 }
224 #endif
225
226 if (ZBX_REGEXP_GROUPS_MAX < count)
227 ovector = (int *)zbx_malloc(NULL, (size_t)ovecsize * sizeof(int));
228 else
229 ovector = matches_buff;
230
231 if (NULL == regexp->extra)
232 {
233 pextra = &extra;
234 pextra->flags = 0;
235 }
236 else
237 pextra = regexp->extra;
238 #if defined(PCRE_EXTRA_MATCH_LIMIT) && defined(PCRE_EXTRA_MATCH_LIMIT_RECURSION)
239 pextra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
240 pextra->match_limit = 1000000;
241 #if defined(_WINDOWS) || defined(__MINGW32__)
242 pextra->match_limit_recursion = ZBX_PCRE_RECURSION_LIMIT;
243 #else
244 pextra->match_limit_recursion = recursion_limit;
245 #endif
246 #endif
247 /* see "man pcreapi" about pcre_exec() return value and 'ovector' size and layout */
248 if (0 <= (r = pcre_exec(regexp->pcre_regexp, pextra, string, strlen(string), flags, 0, ovector, ovecsize)))
249 {
250 if (NULL != matches)
251 memcpy(matches, ovector, (size_t)((0 < r) ? MIN(r, count) : count) * sizeof(zbx_regmatch_t));
252
253 result = ZBX_REGEXP_MATCH;
254 }
255 else if (PCRE_ERROR_NOMATCH == r)
256 {
257 result = ZBX_REGEXP_NO_MATCH;
258 }
259 else
260 {
261 zabbix_log(LOG_LEVEL_WARNING, "%s() failed with error %d", __func__, r);
262 result = FAIL;
263 }
264
265 if (ZBX_REGEXP_GROUPS_MAX < count)
266 zbx_free(ovector);
267
268 return result;
269 #undef MATCHES_BUFF_SIZE
270 }
271
272 /******************************************************************************
273 * *
274 * Function: zbx_regexp_free *
275 * *
276 * Purpose: wrapper for pcre_free *
277 * *
278 * Parameters: regexp - [IN] compiled regular expression *
279 * *
280 ******************************************************************************/
zbx_regexp_free(zbx_regexp_t * regexp)281 void zbx_regexp_free(zbx_regexp_t *regexp)
282 {
283 /* pcre_free_study() was added to the API for release 8.20 while extra was available before */
284 #ifdef PCRE_CONFIG_JIT
285 pcre_free_study(regexp->extra);
286 #else
287 pcre_free(regexp->extra);
288 #endif
289 pcre_free(regexp->pcre_regexp);
290 zbx_free(regexp);
291 }
292
293 /******************************************************************************
294 * *
295 * Function: zbx_regexp_match_precompiled *
296 * *
297 * Purpose: checks if string matches a precompiled regular expression without *
298 * returning matching groups *
299 * *
300 * Parameters: string - [IN] string to be matched *
301 * regex - [IN] precompiled regular expression *
302 * *
303 * Return value: 0 - successful match *
304 * nonzero - no match *
305 * *
306 * Comments: use this function for better performance if many strings need to *
307 * be matched against the same regular expression *
308 * *
309 ******************************************************************************/
zbx_regexp_match_precompiled(const char * string,const zbx_regexp_t * regexp)310 int zbx_regexp_match_precompiled(const char *string, const zbx_regexp_t *regexp)
311 {
312 return (ZBX_REGEXP_MATCH == regexp_exec(string, regexp, 0, 0, NULL)) ? 0 : -1;
313 }
314
315 /****************************************************************************************************
316 * *
317 * Function: zbx_regexp *
318 * *
319 * Purpose: compiles and executes a regexp *
320 * *
321 * Parameters: *
322 * string - [IN] string to be matched against 'regexp' *
323 * pattern - [IN] regular expression pattern *
324 * flags - [IN] execution flags for matching *
325 * len - [OUT] length of matched string, *
326 * 0 in case of no match or *
327 * FAIL if an error occurred. *
328 * *
329 * Return value: pointer to the matched substring or null *
330 * *
331 ****************************************************************************************************/
zbx_regexp(const char * string,const char * pattern,int flags,int * len)332 static char *zbx_regexp(const char *string, const char *pattern, int flags, int *len)
333 {
334 char *c = NULL;
335 zbx_regmatch_t match;
336 zbx_regexp_t *regexp = NULL;
337 const char* error = NULL;
338
339 if (NULL != len)
340 *len = FAIL;
341
342 if (SUCCEED != regexp_prepare(pattern, flags, ®exp, &error))
343 return NULL;
344
345 if (NULL != string)
346 {
347 int r;
348
349 if (ZBX_REGEXP_MATCH == (r = regexp_exec(string, regexp, 0, 1, &match)))
350 {
351 c = (char *)string + match.rm_so;
352
353 if (NULL != len)
354 *len = match.rm_eo - match.rm_so;
355 }
356 else if (ZBX_REGEXP_NO_MATCH == r && NULL != len)
357 *len = 0;
358 }
359
360 return c;
361 }
362
zbx_regexp_match(const char * string,const char * pattern,int * len)363 char *zbx_regexp_match(const char *string, const char *pattern, int *len)
364 {
365 return zbx_regexp(string, pattern, PCRE_MULTILINE, len);
366 }
367
368 /******************************************************************************
369 * *
370 * Function: strncpy_alloc *
371 * *
372 * Purpose: zbx_strncpy_alloc with maximum allocated memory limit. *
373 * *
374 * Parameters: str - [IN/OUT] destination buffer pointer *
375 * alloc_len - [IN/OUT] already allocated memory *
376 * offset - [IN/OUT] offset for writing *
377 * src - [IN] copied string *
378 * n - [IN] maximum number of bytes to copy *
379 * limit - [IN] maximum number of bytes to be allocated *
380 * *
381 ******************************************************************************/
strncpy_alloc(char ** str,size_t * alloc_len,size_t * offset,const char * src,size_t n,size_t limit)382 static void strncpy_alloc(char **str, size_t *alloc_len, size_t *offset, const char *src, size_t n, size_t limit)
383 {
384 if (0 != limit && *offset + n > limit)
385 n = (limit > *offset) ? (limit - *offset) : 0;
386
387 zbx_strncpy_alloc(str, alloc_len, offset, src, n);
388 }
389
390 /*********************************************************************************
391 * *
392 * Function: regexp_sub_replace *
393 * *
394 * Purpose: Constructs a string from the specified template and regexp match. *
395 * *
396 * Parameters: text - [IN] the input string. *
397 * output_template - [IN] the output string template. The output *
398 * string is constructed from template by *
399 * replacing \<n> sequences with the captured *
400 * regexp group. *
401 * If the output template is NULL or contains *
402 * empty string then a copy of the whole *
403 * input string is returned. *
404 * match - [IN] the captured group data *
405 * nmatch - [IN] the number of items in captured group data *
406 * limit - [IN] size limit for memory allocation *
407 * 0 means no limit *
408 * *
409 * Return value: Allocated string containing output value *
410 * *
411 *********************************************************************************/
regexp_sub_replace(const char * text,const char * output_template,zbx_regmatch_t * match,int nmatch,size_t limit)412 static char *regexp_sub_replace(const char *text, const char *output_template, zbx_regmatch_t *match, int nmatch,
413 size_t limit)
414 {
415 char *ptr = NULL;
416 const char *pstart = output_template, *pgroup;
417 size_t size = 0, offset = 0;
418 int group_index;
419
420 if (NULL == output_template || '\0' == *output_template)
421 return zbx_strdup(NULL, text);
422
423 while (NULL != (pgroup = strchr(pstart, '\\')))
424 {
425 switch (*(++pgroup))
426 {
427 case '\\':
428 strncpy_alloc(&ptr, &size, &offset, pstart, pgroup - pstart, limit);
429 pstart = pgroup + 1;
430 continue;
431
432 case '0':
433 case '1':
434 case '2':
435 case '3':
436 case '4':
437 case '5':
438 case '6':
439 case '7':
440 case '8':
441 case '9':
442 strncpy_alloc(&ptr, &size, &offset, pstart, pgroup - pstart - 1, limit);
443 group_index = *pgroup - '0';
444 if (group_index < nmatch && -1 != match[group_index].rm_so)
445 {
446 strncpy_alloc(&ptr, &size, &offset, text + match[group_index].rm_so,
447 match[group_index].rm_eo - match[group_index].rm_so, limit);
448 }
449 pstart = pgroup + 1;
450 continue;
451
452 case '@':
453 /* artificial construct to replace the first captured group or fail */
454 /* if the regular expression pattern contains no groups */
455 if (-1 == match[1].rm_so)
456 {
457 zbx_free(ptr);
458 goto out;
459 }
460
461 strncpy_alloc(&ptr, &size, &offset, text + match[1].rm_so,
462 match[1].rm_eo - match[1].rm_so, limit);
463
464 pstart = pgroup + 1;
465 continue;
466
467 default:
468 strncpy_alloc(&ptr, &size, &offset, pstart, pgroup - pstart, limit);
469 pstart = pgroup;
470 }
471
472 if (0 != limit && offset >= limit)
473 break;
474 }
475
476 if ('\0' != *pstart)
477 strncpy_alloc(&ptr, &size, &offset, pstart, strlen(pstart), limit);
478 out:
479 if (NULL != ptr)
480 {
481 if (0 != limit && offset >= limit)
482 {
483 size = offset;
484 offset--;
485
486 /* ensure that the string is not cut in the middle of UTF-8 sequence */
487 if (0x80 <= (0xc0 & ptr[offset]))
488 {
489 while (0x80 == (0xc0 & ptr[offset]) && 0 < offset)
490 offset--;
491
492 if (zbx_utf8_char_len(&ptr[offset]) != size - offset)
493 ptr[offset] = '\0';
494 }
495 }
496
497 /* Some regexp and output template combinations can produce invalid UTF-8 sequences. */
498 /* For example, regexp "(.)(.)" and output template "\1 \2" produce a valid UTF-8 sequence */
499 /* for single-byte UTF-8 characters and invalid sequence for multi-byte characters. */
500 /* Using (*UTF) modifier (e.g. "(*UTF)(.)(.)") solves the problem for multi-byte characters */
501 /* but it is up to user to add the modifier. To prevent producing invalid UTF-8 sequences do */
502 /* output sanitization. */
503
504 zbx_replace_invalid_utf8(ptr);
505 }
506
507 return ptr;
508 }
509
510 /*********************************************************************************
511 * *
512 * Function: regexp_sub *
513 * *
514 * Purpose: Test if a string matches the specified regular expression. If yes *
515 * then create a return value by substituting '\<n>' sequences in *
516 * output template with the captured groups. *
517 * *
518 * Parameters: string - [IN] the string to parse *
519 * pattern - [IN] the regular expression *
520 * output_template - [IN] the output string template. The output *
521 * string is constructed from template by *
522 * replacing \<n> sequences with the captured *
523 * regexp group. *
524 * If output template is NULL or contains *
525 * empty string then the whole input string *
526 * is used as output value. *
527 * flags - [IN] the pcre_compile() function flags. *
528 * See pcre_compile() manual. *
529 * out - [OUT] the output value if the input string *
530 * matches the specified regular expression *
531 * or NULL otherwise *
532 * *
533 * Return value: SUCCEED - the regular expression match was done *
534 * FAIL - failed to compile regexp *
535 * *
536 *********************************************************************************/
regexp_sub(const char * string,const char * pattern,const char * output_template,int flags,char ** out)537 static int regexp_sub(const char *string, const char *pattern, const char *output_template, int flags, char **out)
538 {
539 const char *error = NULL;
540 zbx_regexp_t *regexp = NULL;
541 zbx_regmatch_t match[ZBX_REGEXP_GROUPS_MAX];
542 unsigned int i;
543
544 if (NULL == string)
545 {
546 zbx_free(*out);
547 return SUCCEED;
548 }
549
550 #ifdef PCRE_NO_AUTO_CAPTURE
551 /* no subpatterns without an output template */
552 if (NULL == output_template || '\0' == *output_template)
553 flags |= PCRE_NO_AUTO_CAPTURE;
554 #endif
555
556 if (FAIL == regexp_prepare(pattern, flags, ®exp, &error))
557 return FAIL;
558
559 zbx_free(*out);
560
561 /* -1 is special pcre value for unused patterns */
562 for (i = 0; i < ARRSIZE(match); i++)
563 match[i].rm_so = match[i].rm_eo = -1;
564
565 if (ZBX_REGEXP_MATCH == regexp_exec(string, regexp, 0, ZBX_REGEXP_GROUPS_MAX, match))
566 *out = regexp_sub_replace(string, output_template, match, ZBX_REGEXP_GROUPS_MAX, 0);
567
568 return SUCCEED;
569 #undef MATCH_SIZE
570 }
571
572 /*********************************************************************************
573 * *
574 * Function: zbx_mregexp_sub_precompiled *
575 * *
576 * Purpose: Test if a string matches precompiled regular expression. If yes *
577 * then create a return value by substituting '\<n>' sequences in *
578 * output template with the captured groups. *
579 * *
580 * Parameters: string - [IN] the string to parse *
581 * regexp - [IN] the precompiled regular expression *
582 * output_template - [IN] the output string template. The output *
583 * string is constructed from template by *
584 * replacing \<n> sequences with the captured *
585 * regexp group. *
586 * If output template is NULL or contains *
587 * empty string then the whole input string *
588 * is used as output value. *
589 * limit - [IN] size limit for memory allocation *
590 * 0 means no limit *
591 * out - [OUT] the output value if the input string *
592 * matches the specified regular expression *
593 * or NULL otherwise *
594 * *
595 * Return value: SUCCEED - the regular expression match was done *
596 * FAIL - failed to match *
597 * *
598 * Comments: Multiline match is performed *
599 * *
600 *********************************************************************************/
zbx_mregexp_sub_precompiled(const char * string,const zbx_regexp_t * regexp,const char * output_template,size_t limit,char ** out)601 int zbx_mregexp_sub_precompiled(const char *string, const zbx_regexp_t *regexp, const char *output_template,
602 size_t limit, char **out)
603 {
604 zbx_regmatch_t match[ZBX_REGEXP_GROUPS_MAX];
605 unsigned int i;
606
607 zbx_free(*out);
608
609 /* -1 is special pcre value for unused patterns */
610 for (i = 0; i < ARRSIZE(match); i++)
611 match[i].rm_so = match[i].rm_eo = -1;
612
613 if (ZBX_REGEXP_MATCH == regexp_exec(string, regexp, 0, ZBX_REGEXP_GROUPS_MAX, match) &&
614 NULL != (*out = regexp_sub_replace(string, output_template, match, ZBX_REGEXP_GROUPS_MAX,
615 limit)))
616 {
617 return SUCCEED;
618 }
619
620 return FAIL;
621 }
622
623 /*********************************************************************************
624 * *
625 * Function: zbx_regexp_sub *
626 * *
627 * Purpose: Test if a string matches the specified regular expression. If yes *
628 * then create a return value by substituting '\<n>' sequences in *
629 * output template with the captured groups. *
630 * *
631 * Parameters: string - [IN] the string to parse *
632 * pattern - [IN] the regular expression *
633 * output_template - [IN] the output string template. The output *
634 * string is constructed from template by *
635 * replacing \<n> sequences with the captured *
636 * regexp group. *
637 * out - [OUT] the output value if the input string *
638 * matches the specified regular expression *
639 * or NULL otherwise *
640 * *
641 * Return value: SUCCEED - the regular expression match was done *
642 * FAIL - failed to compile regexp *
643 * *
644 * Comments: This function performs case sensitive match *
645 * *
646 *********************************************************************************/
zbx_regexp_sub(const char * string,const char * pattern,const char * output_template,char ** out)647 int zbx_regexp_sub(const char *string, const char *pattern, const char *output_template, char **out)
648 {
649 return regexp_sub(string, pattern, output_template, PCRE_MULTILINE, out);
650 }
651
652 /*********************************************************************************
653 * *
654 * Function: zbx_mregexp_sub *
655 * *
656 * Purpose: This function is similar to zbx_regexp_sub() with exception that *
657 * multiline matches are accepted. *
658 * *
659 *********************************************************************************/
zbx_mregexp_sub(const char * string,const char * pattern,const char * output_template,char ** out)660 int zbx_mregexp_sub(const char *string, const char *pattern, const char *output_template, char **out)
661 {
662 return regexp_sub(string, pattern, output_template, 0, out);
663 }
664
665 /*********************************************************************************
666 * *
667 * Function: zbx_iregexp_sub *
668 * *
669 * Purpose: This function is similar to zbx_regexp_sub() with exception that *
670 * case insensitive matches are accepted. *
671 * *
672 *********************************************************************************/
zbx_iregexp_sub(const char * string,const char * pattern,const char * output_template,char ** out)673 int zbx_iregexp_sub(const char *string, const char *pattern, const char *output_template, char **out)
674 {
675 return regexp_sub(string, pattern, output_template, PCRE_CASELESS, out);
676 }
677
678 /******************************************************************************
679 * *
680 * Function: zbx_regexp_clean_expressions *
681 * *
682 * Purpose: frees expression data retrieved by DCget_expressions function or *
683 * prepared with add_regexp_ex() function calls *
684 * *
685 * Parameters: expressions - [IN] a vector of expression data pointers *
686 * *
687 ******************************************************************************/
zbx_regexp_clean_expressions(zbx_vector_ptr_t * expressions)688 void zbx_regexp_clean_expressions(zbx_vector_ptr_t *expressions)
689 {
690 int i;
691
692 for (i = 0; i < expressions->values_num; i++)
693 {
694 zbx_expression_t *regexp = expressions->values[i];
695
696 zbx_free(regexp->name);
697 zbx_free(regexp->expression);
698 zbx_free(regexp);
699 }
700
701 zbx_vector_ptr_clear(expressions);
702 }
703
add_regexp_ex(zbx_vector_ptr_t * regexps,const char * name,const char * expression,int expression_type,char exp_delimiter,int case_sensitive)704 void add_regexp_ex(zbx_vector_ptr_t *regexps, const char *name, const char *expression, int expression_type,
705 char exp_delimiter, int case_sensitive)
706 {
707 zbx_expression_t *regexp;
708
709 regexp = zbx_malloc(NULL, sizeof(zbx_expression_t));
710
711 regexp->name = zbx_strdup(NULL, name);
712 regexp->expression = zbx_strdup(NULL, expression);
713
714 regexp->expression_type = expression_type;
715 regexp->exp_delimiter = exp_delimiter;
716 regexp->case_sensitive = case_sensitive;
717
718 zbx_vector_ptr_append(regexps, regexp);
719 }
720
721 /**********************************************************************************
722 * *
723 * Function: regexp_match_ex_regsub *
724 * *
725 * Purpose: Test if the string matches regular expression with the specified *
726 * case sensitivity option and allocates output variable to store the *
727 * result if necessary. *
728 * *
729 * Parameters: string - [IN] the string to check *
730 * pattern - [IN] the regular expression *
731 * case_sensitive - [IN] ZBX_IGNORE_CASE - case insensitive match. *
732 * ZBX_CASE_SENSITIVE - case sensitive match. *
733 * output_template - [IN] the output string template. The output *
734 * string is constructed from the template by *
735 * replacing \<n> sequences with the captured *
736 * regexp group. *
737 * If output_template is NULL the whole *
738 * matched string is returned. *
739 * output - [OUT] a reference to the variable where allocated *
740 * memory containing the resulting value *
741 * (substitution) is stored. *
742 * Specify NULL to skip output value creation. *
743 * *
744 * Return value: ZBX_REGEXP_MATCH - the string matches the specified regular *
745 * expression *
746 * ZBX_REGEXP_NO_MATCH - the string does not match the regular *
747 * expression *
748 * FAIL - the string is NULL or the specified *
749 * regular expression is invalid *
750 * *
751 **********************************************************************************/
regexp_match_ex_regsub(const char * string,const char * pattern,int case_sensitive,const char * output_template,char ** output)752 static int regexp_match_ex_regsub(const char *string, const char *pattern, int case_sensitive,
753 const char *output_template, char **output)
754 {
755 int regexp_flags = PCRE_MULTILINE, ret = FAIL;
756
757 if (ZBX_IGNORE_CASE == case_sensitive)
758 regexp_flags |= PCRE_CASELESS;
759
760 if (NULL == output)
761 {
762 if (NULL == zbx_regexp(string, pattern, regexp_flags, &ret))
763 {
764 if (FAIL != ret)
765 ret = ZBX_REGEXP_NO_MATCH;
766 }
767 else
768 ret = ZBX_REGEXP_MATCH;
769 }
770 else
771 {
772 if (SUCCEED == regexp_sub(string, pattern, output_template, regexp_flags, output))
773 {
774 ret = (NULL != *output ? ZBX_REGEXP_MATCH : ZBX_REGEXP_NO_MATCH);
775 }
776 else
777 ret = FAIL;
778 }
779
780 return ret;
781 }
782
783 /**********************************************************************************
784 * *
785 * Function: regexp_match_ex_substring *
786 * *
787 * Purpose: Test if the string contains substring with the specified case *
788 * sensitivity option. *
789 * *
790 * Parameters: string - [IN] the string to check *
791 * pattern - [IN] the substring to search *
792 * case_sensitive - [IN] ZBX_IGNORE_CASE - case insensitive search *
793 * ZBX_CASE_SENSITIVE - case sensitive search *
794 * *
795 * Return value: ZBX_REGEXP_MATCH - string contains the specified substring *
796 * ZBX_REGEXP_NO_MATCH - string does not contain the substring *
797 * *
798 **********************************************************************************/
regexp_match_ex_substring(const char * string,const char * pattern,int case_sensitive)799 static int regexp_match_ex_substring(const char *string, const char *pattern, int case_sensitive)
800 {
801 char *ptr = NULL;
802
803 switch (case_sensitive)
804 {
805 case ZBX_CASE_SENSITIVE:
806 ptr = strstr(string, pattern);
807 break;
808 case ZBX_IGNORE_CASE:
809 ptr = zbx_strcasestr(string, pattern);
810 break;
811 }
812
813 return (NULL != ptr ? ZBX_REGEXP_MATCH : ZBX_REGEXP_NO_MATCH);
814 }
815
816 /**********************************************************************************
817 * *
818 * Function: regexp_match_ex_substring_list *
819 * *
820 * Purpose: Test if the string contains a substring from list with the specified *
821 * delimiter and case sensitivity option. *
822 * *
823 * Parameters: string - [IN] the string to check *
824 * pattern - [IN] the substring list *
825 * case_sensitive - [IN] ZBX_IGNORE_CASE - case insensitive search *
826 * ZBX_CASE_SENSITIVE - case sensitive search *
827 * delimiter - [IN] the delimiter separating items in the *
828 * substring list *
829 * *
830 * Return value: ZBX_REGEXP_MATCH - string contains a substring from the list *
831 * ZBX_REGEXP_NO_MATCH - string does not contain any substrings *
832 * from the list *
833 * *
834 **********************************************************************************/
regexp_match_ex_substring_list(const char * string,char * pattern,int case_sensitive,char delimiter)835 static int regexp_match_ex_substring_list(const char *string, char *pattern, int case_sensitive, char delimiter)
836 {
837 int ret = ZBX_REGEXP_NO_MATCH;
838 char *s, *c;
839
840 for (s = pattern; '\0' != *s && ZBX_REGEXP_MATCH != ret;)
841 {
842 if (NULL != (c = strchr(s, delimiter)))
843 *c = '\0';
844
845 ret = regexp_match_ex_substring(string, s, case_sensitive);
846
847 if (NULL != c)
848 {
849 *c = delimiter;
850 s = ++c;
851 }
852 else
853 break;
854 }
855
856 return ret;
857 }
858
859 /**********************************************************************************
860 * *
861 * Function: regexp_sub_ex *
862 * *
863 * Purpose: Test if the string matches regular expression with the specified *
864 * case sensitivity option and allocates output variable to store the *
865 * result if necessary. *
866 * *
867 * Parameters: regexps - [IN] the global regular expression array *
868 * string - [IN] the string to check *
869 * pattern - [IN] the regular expression or global regular *
870 * expression name (@<global regexp name>). *
871 * case_sensitive - [IN] ZBX_IGNORE_CASE - case insensitive match *
872 * ZBX_CASE_SENSITIVE - case sensitive match *
873 * output_template - [IN] the output string template. For regular *
874 * expressions (type Result is TRUE) output *
875 * string is constructed from the template by *
876 * replacing '\<n>' sequences with the *
877 * captured regexp group. *
878 * If output_template is NULL then the whole *
879 * matched string is returned. *
880 * output - [OUT] a reference to the variable where allocated *
881 * memory containing the resulting value *
882 * (substitution) is stored. *
883 * Specify NULL to skip output value creation. *
884 * *
885 * Return value: ZBX_REGEXP_MATCH - the string matches the specified regular *
886 * expression *
887 * ZBX_REGEXP_NO_MATCH - the string does not match the specified *
888 * regular expression *
889 * FAIL - invalid regular expression *
890 * *
891 * Comments: For regular expressions and global regular expressions with 'Result *
892 * is TRUE' type the 'output_template' substitution result is stored *
893 * into 'output' variable. For other global regular expression types *
894 * the whole string is stored into 'output' variable. *
895 * *
896 **********************************************************************************/
regexp_sub_ex(const zbx_vector_ptr_t * regexps,const char * string,const char * pattern,int case_sensitive,const char * output_template,char ** output)897 int regexp_sub_ex(const zbx_vector_ptr_t *regexps, const char *string, const char *pattern,
898 int case_sensitive, const char *output_template, char **output)
899 {
900 int i, ret = FAIL;
901 char *output_accu; /* accumulator for 'output' when looping over global regexp subexpressions */
902
903 if (NULL == pattern || '\0' == *pattern)
904 {
905 /* always match when no pattern is specified */
906 ret = ZBX_REGEXP_MATCH;
907 goto out;
908 }
909
910 if ('@' != *pattern) /* not a global regexp */
911 {
912 ret = regexp_match_ex_regsub(string, pattern, case_sensitive, output_template, output);
913 goto out;
914 }
915
916 pattern++;
917 output_accu = NULL;
918
919 for (i = 0; i < regexps->values_num; i++) /* loop over global regexp subexpressions */
920 {
921 const zbx_expression_t *regexp = regexps->values[i];
922
923 if (0 != strcmp(regexp->name, pattern))
924 continue;
925
926 switch (regexp->expression_type)
927 {
928 case EXPRESSION_TYPE_TRUE:
929 if (NULL != output)
930 {
931 char *output_tmp = NULL;
932
933 if (ZBX_REGEXP_MATCH == (ret = regexp_match_ex_regsub(string,
934 regexp->expression, regexp->case_sensitive, output_template,
935 &output_tmp)))
936 {
937 zbx_free(output_accu);
938 output_accu = output_tmp;
939 }
940 }
941 else
942 {
943 ret = regexp_match_ex_regsub(string, regexp->expression, regexp->case_sensitive,
944 NULL, NULL);
945 }
946 break;
947 case EXPRESSION_TYPE_FALSE:
948 ret = regexp_match_ex_regsub(string, regexp->expression, regexp->case_sensitive,
949 NULL, NULL);
950 if (FAIL != ret) /* invert output value */
951 ret = (ZBX_REGEXP_MATCH == ret ? ZBX_REGEXP_NO_MATCH : ZBX_REGEXP_MATCH);
952 break;
953 case EXPRESSION_TYPE_INCLUDED:
954 ret = regexp_match_ex_substring(string, regexp->expression, regexp->case_sensitive);
955 break;
956 case EXPRESSION_TYPE_NOT_INCLUDED:
957 ret = regexp_match_ex_substring(string, regexp->expression, regexp->case_sensitive);
958 /* invert output value */
959 ret = (ZBX_REGEXP_MATCH == ret ? ZBX_REGEXP_NO_MATCH : ZBX_REGEXP_MATCH);
960 break;
961 case EXPRESSION_TYPE_ANY_INCLUDED:
962 ret = regexp_match_ex_substring_list(string, regexp->expression, regexp->case_sensitive,
963 regexp->exp_delimiter);
964 break;
965 default:
966 THIS_SHOULD_NEVER_HAPPEN;
967 ret = FAIL;
968 }
969
970 if (FAIL == ret || ZBX_REGEXP_NO_MATCH == ret)
971 {
972 zbx_free(output_accu);
973 break;
974 }
975 }
976
977 if (ZBX_REGEXP_MATCH == ret && NULL != output_accu)
978 {
979 *output = output_accu;
980 return ZBX_REGEXP_MATCH;
981 }
982 out:
983 if (ZBX_REGEXP_MATCH == ret && NULL != output && NULL == *output)
984 {
985 /* Handle output value allocation for global regular expression types */
986 /* that cannot perform output_template substitution (practically */
987 /* all global regular expression types except EXPRESSION_TYPE_TRUE). */
988 size_t offset = 0, size = 0;
989
990 zbx_strcpy_alloc(output, &size, &offset, string);
991 }
992
993 return ret;
994 }
995
regexp_match_ex(const zbx_vector_ptr_t * regexps,const char * string,const char * pattern,int case_sensitive)996 int regexp_match_ex(const zbx_vector_ptr_t *regexps, const char *string, const char *pattern, int case_sensitive)
997 {
998 return regexp_sub_ex(regexps, string, pattern, case_sensitive, NULL, NULL);
999 }
1000
zbx_global_regexp_exists(const char * name,const zbx_vector_ptr_t * regexps)1001 int zbx_global_regexp_exists(const char *name, const zbx_vector_ptr_t *regexps)
1002 {
1003 int i;
1004
1005 for (i = 0; i < regexps->values_num; i++)
1006 {
1007 const zbx_expression_t *regexp = (const zbx_expression_t *)regexps->values[i];
1008
1009 if (0 == strcmp(regexp->name, name))
1010 return SUCCEED;
1011 }
1012
1013 return FAIL;
1014 }
1015
1016 /**********************************************************************************
1017 * *
1018 * Function: zbx_regexp_escape_stringsize *
1019 * *
1020 * Purpose: calculate a string size after symbols escaping *
1021 * *
1022 * Parameters: string - [IN] the string to check *
1023 * *
1024 * Return value: new size of the string *
1025 * *
1026 **********************************************************************************/
zbx_regexp_escape_stringsize(const char * string)1027 static size_t zbx_regexp_escape_stringsize(const char *string)
1028 {
1029 size_t len = 0;
1030 const char *sptr;
1031
1032 if (NULL == string)
1033 return 0;
1034
1035 for (sptr = string; '\0' != *sptr; sptr++)
1036 {
1037 switch (*sptr)
1038 {
1039 case '.':
1040 case '\\':
1041 case '+':
1042 case '*':
1043 case '?':
1044 case '[':
1045 case '^':
1046 case ']':
1047 case '$':
1048 case '(':
1049 case ')':
1050 case '{':
1051 case '}':
1052 case '=':
1053 case '!':
1054 case '>':
1055 case '<':
1056 case '|':
1057 case ':':
1058 case '-':
1059 case '#':
1060 len += 2;
1061 break;
1062 default:
1063 len++;
1064 }
1065 }
1066
1067 return len;
1068 }
1069
1070 /**********************************************************************************
1071 * *
1072 * Function: zbx_regexp_escape_string *
1073 * *
1074 * Purpose: replace . \ + * ? [ ^ ] $ ( ) { } = ! < > | : - symbols in string *
1075 * with combination of \ and escaped symbol *
1076 * *
1077 * Parameters: p - [IN/OUT] buffer for new string after update *
1078 * string - [IN] the string to update *
1079 * *
1080 **********************************************************************************/
zbx_regexp_escape_string(char * p,const char * string)1081 static void zbx_regexp_escape_string(char *p, const char *string)
1082 {
1083 const char *sptr;
1084
1085 for (sptr = string; '\0' != *sptr; sptr++)
1086 {
1087 switch (*sptr)
1088 {
1089 case '.':
1090 case '\\':
1091 case '+':
1092 case '*':
1093 case '?':
1094 case '[':
1095 case '^':
1096 case ']':
1097 case '$':
1098 case '(':
1099 case ')':
1100 case '{':
1101 case '}':
1102 case '=':
1103 case '!':
1104 case '>':
1105 case '<':
1106 case '|':
1107 case ':':
1108 case '-':
1109 case '#':
1110 *p++ = '\\';
1111 *p++ = *sptr;
1112 break;
1113 default:
1114 *p++ = *sptr;
1115 }
1116 }
1117
1118 return;
1119 }
1120
1121 /**********************************************************************************
1122 * *
1123 * Function: zbx_regexp_escape *
1124 * *
1125 * Purpose: escaping of symbols for using in regexp expression *
1126 * *
1127 * Parameters: string - [IN/OUT] the string to update *
1128 * *
1129 **********************************************************************************/
zbx_regexp_escape(char ** string)1130 void zbx_regexp_escape(char **string)
1131 {
1132 size_t size;
1133 char *buffer;
1134
1135 if (0 == (size = zbx_regexp_escape_stringsize(*string)))
1136 return;
1137
1138 buffer = zbx_malloc(NULL, size + 1);
1139 buffer[size] = '\0';
1140 zbx_regexp_escape_string(buffer, *string);
1141 zbx_free(*string);
1142 *string = buffer;
1143 }
1144
1145 /**********************************************************************************
1146 * *
1147 * Function: zbx_wildcard_minimize *
1148 * *
1149 * Purpose: remove repeated wildcard characters from the expression *
1150 * *
1151 * Parameters: str - [IN/OUT] the string to update *
1152 * *
1153 **********************************************************************************/
zbx_wildcard_minimize(char * str)1154 void zbx_wildcard_minimize(char *str)
1155 {
1156 char *p1, *p2;
1157 int w = 0;
1158
1159 for(p1 = p2 = str; '\0' != *p2; p2++)
1160 {
1161 if ('*' == *p2)
1162 {
1163 if (0 != w)
1164 continue;
1165
1166 w = 1;
1167 }
1168 else
1169 w = 0;
1170
1171 *p1 = *p2;
1172 p1++;
1173 }
1174
1175 *p1 = '\0';
1176 }
1177
1178 /******************************************************************************
1179 * *
1180 * Function: zbx_wildcard_match *
1181 * *
1182 * Purpose: Matches string value to specified wildcard. *
1183 * Asterisk (*) characters match to any characters of any length. *
1184 * *
1185 * Parameters: value - [IN] string to match *
1186 * wildcard - [IN] wildcard string expression *
1187 * *
1188 * Return value: 1 - value match the wildcard *
1189 * 0 - otherwise *
1190 * *
1191 * Author: Andrejs Tumilovics *
1192 * *
1193 ******************************************************************************/
zbx_wildcard_match(const char * value,const char * wildcard)1194 int zbx_wildcard_match(const char *value, const char *wildcard)
1195 {
1196 const char *s_pivot = value, *w_pivot = wildcard;
1197
1198 while('\0' != *value && '*' != *wildcard)
1199 {
1200 if (*value++ != *wildcard++)
1201 return 0;
1202 }
1203
1204 while('\0' != *value)
1205 {
1206 if ('*' == *wildcard)
1207 {
1208 wildcard++;
1209
1210 if ('\0' == *wildcard)
1211 return 1;
1212
1213 w_pivot = wildcard;
1214 s_pivot = value + 1;
1215 }
1216 else if (*value == *wildcard)
1217 {
1218 value++;
1219 wildcard++;
1220 }
1221 else
1222 {
1223 wildcard = w_pivot;
1224 value = s_pivot++;
1225 }
1226 }
1227
1228 while('*' == *wildcard)
1229 wildcard++;
1230
1231 return '\0' == *wildcard;
1232 }
1233