1 /*
2 * Support for PCRE regex variant
3 *
4 * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
5 * Copyright (C) 2007-2013 Sourcefire, Inc.
6 *
7 * Authors: Kevin Lin
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21 * MA 02110-1301, USA.
22 */
23
24 #if HAVE_CONFIG_H
25 #include "clamav-config.h"
26 #endif
27
28 #if HAVE_PCRE
29 #if USING_PCRE2
30 #define PCRE2_CODE_UNIT_WIDTH 8
31 #include <pcre2.h>
32 #else
33 #include <pcre.h>
34 #endif
35
36 #include "clamav.h"
37 #include "others.h"
38 #include "regex_pcre.h"
39
40 #if USING_PCRE2
41 /* NOTE: pcre2 could use mpool through ext */
cli_pcre_malloc(size_t size,void * ext)42 void *cli_pcre_malloc(size_t size, void *ext)
43 {
44 UNUSEDPARAM(ext);
45 return cli_malloc(size);
46 }
47
cli_pcre_free(void * ptr,void * ext)48 void cli_pcre_free(void *ptr, void *ext)
49 {
50 UNUSEDPARAM(ext);
51 free(ptr);
52 }
53 #endif
54
55 /* cli_pcre_init_internal: redefine pcre_malloc and pcre_free; pcre2 does this during compile */
cli_pcre_init_internal()56 cl_error_t cli_pcre_init_internal()
57 {
58 #if !USING_PCRE2
59 pcre_malloc = cli_malloc;
60 pcre_free = free;
61 pcre_stack_malloc = cli_malloc;
62 pcre_stack_free = free;
63 #endif
64
65 return CL_SUCCESS;
66 }
67
cli_pcre_addoptions(struct cli_pcre_data * pd,const char ** opt,int errout)68 cl_error_t cli_pcre_addoptions(struct cli_pcre_data *pd, const char **opt, int errout)
69 {
70 if (!pd || !opt || !(*opt))
71 return CL_ENULLARG;
72
73 while (**opt != '\0') {
74 switch (**opt) {
75 #if USING_PCRE2
76 case 'i':
77 pd->options |= PCRE2_CASELESS;
78 break;
79 case 's':
80 pd->options |= PCRE2_DOTALL;
81 break;
82 case 'm':
83 pd->options |= PCRE2_MULTILINE;
84 break;
85 case 'x':
86 pd->options |= PCRE2_EXTENDED;
87 break;
88
89 /* these are pcre2 specific... don't work with perl */
90 case 'A':
91 pd->options |= PCRE2_ANCHORED;
92 break;
93 case 'E':
94 pd->options |= PCRE2_DOLLAR_ENDONLY;
95 break;
96 case 'U':
97 pd->options |= PCRE2_UNGREEDY;
98 break;
99 #else
100 case 'i':
101 pd->options |= PCRE_CASELESS;
102 break;
103 case 's':
104 pd->options |= PCRE_DOTALL;
105 break;
106 case 'm':
107 pd->options |= PCRE_MULTILINE;
108 break;
109 case 'x':
110 pd->options |= PCRE_EXTENDED;
111 break;
112
113 /* these are pcre specific... don't work with perl */
114 case 'A':
115 pd->options |= PCRE_ANCHORED;
116 break;
117 case 'E':
118 pd->options |= PCRE_DOLLAR_ENDONLY;
119 break;
120 case 'U':
121 pd->options |= PCRE_UNGREEDY;
122 break;
123 #endif
124 default:
125 if (errout) {
126 cli_errmsg("cli_pcre_addoptions: unknown/extra pcre option encountered %c\n", **opt);
127 return CL_EMALFDB;
128 } else
129 return CL_EPARSE; /* passed to caller to handle */
130 }
131 (*opt)++;
132 }
133
134 return CL_SUCCESS;
135 }
136
137 #if USING_PCRE2
cli_pcre_compile(struct cli_pcre_data * pd,long long unsigned match_limit,long long unsigned match_limit_recursion,unsigned int options,int opt_override)138 cl_error_t cli_pcre_compile(struct cli_pcre_data *pd, long long unsigned match_limit, long long unsigned match_limit_recursion, unsigned int options, int opt_override)
139 {
140 int errornum;
141 PCRE2_SIZE erroffset;
142 pcre2_general_context *gctx;
143 pcre2_compile_context *cctx;
144
145 if (!pd || !pd->expression) {
146 cli_errmsg("cli_pcre_compile: NULL pd or NULL pd->expression\n");
147 return CL_ENULLARG;
148 }
149
150 gctx = pcre2_general_context_create(cli_pcre_malloc, cli_pcre_free, NULL);
151 if (!gctx) {
152 cli_errmsg("cli_pcre_compile: Unable to allocate memory for general context\n");
153 return CL_EMEM;
154 }
155
156 cctx = pcre2_compile_context_create(gctx);
157 if (!cctx) {
158 cli_errmsg("cli_pcre_compile: Unable to allocate memory for compile context\n");
159 pcre2_general_context_free(gctx);
160 return CL_EMEM;
161 }
162
163 /* compile the pcre2 regex last arg is charset, allow for options override */
164 if (opt_override)
165 pd->re = pcre2_compile((PCRE2_SPTR8)pd->expression, PCRE2_ZERO_TERMINATED, options, &errornum, &erroffset, cctx); /* pd->re handled by pcre2 -> call pcre_free() -> calls free() */
166 else
167 pd->re = pcre2_compile((PCRE2_SPTR8)pd->expression, PCRE2_ZERO_TERMINATED, pd->options, &errornum, &erroffset, cctx); /* pd->re handled by pcre2 -> call pcre_free() -> calls free() */
168 if (pd->re == NULL) {
169 PCRE2_UCHAR errmsg[256];
170 pcre2_get_error_message(errornum, errmsg, sizeof(errmsg));
171 cli_errmsg("cli_pcre_compile: PCRE2 compilation failed at offset %llu: %s\n",
172 (long long unsigned)erroffset, errmsg);
173 pcre2_compile_context_free(cctx);
174 pcre2_general_context_free(gctx);
175 return CL_EMALFDB;
176 }
177
178 /* setup matching context and set the match limits */
179 pd->mctx = pcre2_match_context_create(gctx);
180 if (!pd->mctx) {
181 cli_errmsg("cli_pcre_compile: Unable to allocate memory for match context\n");
182 pcre2_compile_context_free(cctx);
183 pcre2_general_context_free(gctx);
184 return CL_EMEM;
185 }
186
187 pcre2_set_match_limit(pd->mctx, match_limit);
188 pcre2_set_recursion_limit(pd->mctx, match_limit_recursion);
189
190 /* non-dynamic allocated fields set by caller */
191 pcre2_compile_context_free(cctx);
192 pcre2_general_context_free(gctx);
193 return CL_SUCCESS;
194 }
195 #else
cli_pcre_compile(struct cli_pcre_data * pd,long long unsigned match_limit,long long unsigned match_limit_recursion,unsigned int options,int opt_override)196 cl_error_t cli_pcre_compile(struct cli_pcre_data *pd, long long unsigned match_limit, long long unsigned match_limit_recursion, unsigned int options, int opt_override)
197 {
198 const char *error;
199 int erroffset;
200
201 if (!pd || !pd->expression) {
202 cli_errmsg("cli_pcre_compile: NULL pd or NULL pd->expression\n");
203 return CL_ENULLARG;
204 }
205
206 /* compile the pcre regex last arg is charset, allow for options override */
207 if (opt_override)
208 pd->re = pcre_compile(pd->expression, options, &error, &erroffset, NULL); /* pd->re handled by pcre -> call pcre_free() -> calls free() */
209 else
210 pd->re = pcre_compile(pd->expression, pd->options, &error, &erroffset, NULL); /* pd->re handled by pcre -> call pcre_free() -> calls free() */
211 if (pd->re == NULL) {
212 cli_errmsg("cli_pcre_compile: PCRE compilation failed at offset %d: %s\n", erroffset, error);
213 return CL_EMALFDB;
214 }
215
216 /* now study it... (section totally not from snort) */
217 pd->ex = pcre_study(pd->re, 0, &error);
218 if (!(pd->ex)) {
219 pd->ex = (pcre_extra *)cli_calloc(1, sizeof(*(pd->ex)));
220 if (!(pd->ex)) {
221 cli_errmsg("cli_pcre_compile: Unable to allocate memory for extra data\n");
222 return CL_EMEM;
223 }
224 }
225
226 /* set the match limits */
227 if (pd->ex->flags & PCRE_EXTRA_MATCH_LIMIT) {
228 pd->ex->match_limit = match_limit;
229 } else {
230 pd->ex->flags |= PCRE_EXTRA_MATCH_LIMIT;
231 pd->ex->match_limit = match_limit;
232 }
233
234 /* set the recursion match limits */
235 #ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
236 if (pd->ex->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) {
237 pd->ex->match_limit_recursion = match_limit_recursion;
238 } else {
239 pd->ex->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
240 pd->ex->match_limit_recursion = match_limit_recursion;
241 }
242 #endif /* PCRE_EXTRA_MATCH_LIMIT_RECURSION */
243
244 /* non-dynamic allocated fields set by caller */
245 return CL_SUCCESS;
246 }
247 #endif
248
cli_pcre_match(struct cli_pcre_data * pd,const unsigned char * buffer,size_t buflen,size_t override_offset,int options,struct cli_pcre_results * results)249 int cli_pcre_match(struct cli_pcre_data *pd, const unsigned char *buffer, size_t buflen, size_t override_offset, int options, struct cli_pcre_results *results)
250 {
251 int rc;
252
253 #if USING_PCRE2
254 PCRE2_SIZE *ovector;
255 size_t startoffset;
256 #else
257 int startoffset;
258 #endif
259
260 /* set the startoffset, override if a value is specified */
261 startoffset = pd->search_offset;
262 if (override_offset != pd->search_offset)
263 startoffset = override_offset;
264
265 /* execute the pcre and return */
266 #if USING_PCRE2
267 rc = pcre2_match(pd->re, buffer, buflen, startoffset, options, results->match_data, pd->mctx);
268 if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) {
269 switch (rc) {
270 case PCRE2_ERROR_CALLOUT:
271 break;
272 case PCRE2_ERROR_NOMEMORY:
273 cli_errmsg("cli_pcre_match: pcre_exec: out of memory\n");
274 results->err = CL_EMEM;
275 break;
276 case PCRE2_ERROR_MATCHLIMIT:
277 cli_dbgmsg("cli_pcre_match: pcre_exec: match limit exceeded\n");
278 break;
279 case PCRE2_ERROR_RECURSIONLIMIT:
280 cli_dbgmsg("cli_pcre_match: pcre_exec: recursive limit exceeded\n");
281 break;
282 default:
283 cli_errmsg("cli_pcre_match: pcre_exec: returned error %d\n", rc);
284 results->err = CL_BREAK;
285 }
286 } else if (rc > 0) {
287 ovector = pcre2_get_ovector_pointer(results->match_data);
288
289 results->match[0] = ovector[0];
290 results->match[1] = ovector[1];
291 } else {
292 results->match[0] = results->match[1] = 0;
293 }
294 #else
295 rc = pcre_exec(pd->re, pd->ex, (const char *)buffer, (int)buflen, (int)startoffset, options, results->ovector, OVECCOUNT);
296 if (rc < 0 && rc != PCRE_ERROR_NOMATCH) {
297 switch (rc) {
298 case PCRE_ERROR_CALLOUT:
299 break;
300 case PCRE_ERROR_NOMEMORY:
301 cli_errmsg("cli_pcre_match: pcre_exec: out of memory\n");
302 results->err = CL_EMEM;
303 break;
304 case PCRE_ERROR_MATCHLIMIT:
305 cli_dbgmsg("cli_pcre_match: pcre_exec: match limit exceeded\n");
306 break;
307 case PCRE_ERROR_RECURSIONLIMIT:
308 cli_dbgmsg("cli_pcre_match: pcre_exec: recursive limit exceeded\n");
309 break;
310 default:
311 cli_errmsg("cli_pcre_match: pcre_exec: returned error %d\n", rc);
312 results->err = CL_BREAK;
313 }
314 } else if (rc > 0) {
315 results->match[0] = results->ovector[0];
316 results->match[1] = results->ovector[1];
317 } else {
318 results->match[0] = results->match[1] = 0;
319 }
320 #endif
321 return rc;
322 }
323
324 #define DISABLE_PCRE_REPORT 0
325 #define MATCH_MAXLEN 1028 /*because lolz*/
326
327 /* TODO: audit this function */
328 #if USING_PCRE2
named_substr_print(const struct cli_pcre_data * pd,const unsigned char * buffer,PCRE2_SIZE * ovector)329 static void named_substr_print(const struct cli_pcre_data *pd, const unsigned char *buffer, PCRE2_SIZE *ovector)
330 #else
331 static void named_substr_print(const struct cli_pcre_data *pd, const unsigned char *buffer, int *ovector)
332 #endif
333 {
334 int i, namecount, trunc;
335
336 #if USING_PCRE2
337 PCRE2_SIZE length, j;
338 #else
339 int length, j;
340 #endif
341
342 unsigned char *tabptr;
343 int name_entry_size;
344 unsigned char *name_table;
345 const char *start;
346 char outstr[2 * MATCH_MAXLEN + 1];
347
348 /* determine if there are named substrings */
349 #if USING_PCRE2
350 (void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMECOUNT, &namecount);
351 #else
352 (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMECOUNT, &namecount);
353 #endif
354 if (namecount <= 0) {
355 cli_dbgmsg("cli_pcre_report: no named substrings\n");
356 } else {
357 cli_dbgmsg("cli_pcre_report: named substrings\n");
358
359 /* extract named substring translation table */
360 #if USING_PCRE2
361 (void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMETABLE, &name_table);
362 (void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
363 #else
364 (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMETABLE, &name_table);
365 (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
366 #endif
367
368 /* print named substring information */
369 tabptr = name_table;
370 for (i = 0; i < namecount; i++) {
371 int n = (tabptr[0] << 8) | tabptr[1];
372
373 start = (const char *)buffer + ovector[2 * n];
374 length = ovector[2 * n + 1] - ovector[2 * n];
375
376 trunc = 0;
377 if (length > MATCH_MAXLEN) {
378 trunc = 1;
379 length = MATCH_MAXLEN;
380 }
381
382 for (j = 0; j < length; ++j)
383 snprintf(outstr + (2 * j), sizeof(outstr) - (2 * j), "%02x", (unsigned int)*(start + j));
384
385 cli_dbgmsg("cli_pcre_report: (%d) %*s: %s%s\n", n, name_entry_size - 3, tabptr + 2,
386 outstr, trunc ? " (trunc)" : "");
387 /*
388 cli_dbgmsg("named_substr: (%d) %*s: %.*s%s\n", n, name_entry_size - 3, tabptr + 2,
389 length, start, trunc ? " (trunc)":"");
390 */
391 tabptr += name_entry_size;
392 }
393 }
394 }
395
396 /* TODO: audit this function */
cli_pcre_report(const struct cli_pcre_data * pd,const unsigned char * buffer,size_t buflen,int rc,struct cli_pcre_results * results)397 void cli_pcre_report(const struct cli_pcre_data *pd, const unsigned char *buffer, size_t buflen, int rc, struct cli_pcre_results *results)
398 {
399 int i, trunc;
400
401 #if USING_PCRE2
402 PCRE2_SIZE length, j;
403 #else
404 int length, j;
405 #endif
406
407 const char *start;
408 char outstr[2 * MATCH_MAXLEN + 1];
409
410 #if USING_PCRE2
411 PCRE2_SIZE *ovector;
412 ovector = pcre2_get_ovector_pointer(results->match_data);
413 #else
414 int *ovector = results->ovector;
415 #endif
416
417 /* print out additional diagnostics if cli_debug_flag is set */
418 if (!DISABLE_PCRE_REPORT) {
419 cli_dbgmsg("\n");
420 #if USING_PCRE2
421 cli_dbgmsg("cli_pcre_report: PCRE2 Execution Report:\n");
422 #else
423 cli_dbgmsg("cli_pcre_report: PCRE Execution Report:\n");
424 #endif
425 cli_dbgmsg("cli_pcre_report: running regex /%s/ returns %d\n", pd->expression, rc);
426 if (rc > 0) {
427 /* print out full-match and capture groups */
428 for (i = 0; i < rc; ++i) {
429 start = (const char *)buffer + ovector[2 * i];
430 length = ovector[2 * i + 1] - ovector[2 * i];
431
432 #ifdef USING_PCRE2
433 if (ovector[2 * i + 1] > buflen) {
434 #else
435 if (ovector[2 * i + 1] > (int)buflen) {
436 #endif
437 cli_warnmsg("cli_pcre_report: reported match goes outside buffer\n");
438 continue;
439 }
440
441 trunc = 0;
442 if (length > MATCH_MAXLEN) {
443 trunc = 1;
444 length = MATCH_MAXLEN;
445 }
446
447 for (j = 0; j < length; ++j)
448 snprintf(outstr + (2 * j), sizeof(outstr) - (2 * j), "%02x", (unsigned int)*(start + j));
449
450 cli_dbgmsg("cli_pcre_report: %d: %s%s\n", i, outstr, trunc ? " (trunc)" : "");
451 //cli_dbgmsg("cli_pcre_report: %d: %.*s%s\n", i, length, start, trunc ? " (trunc)":"");
452 }
453
454 named_substr_print(pd, buffer, ovector);
455 }
456 #if USING_PCRE2
457 else if (rc == 0 || rc == PCRE2_ERROR_NOMATCH) {
458 #else
459 else if (rc == 0 || rc == PCRE_ERROR_NOMATCH) {
460 #endif
461 cli_dbgmsg("cli_pcre_report: no match found\n");
462 } else {
463 cli_dbgmsg("cli_pcre_report: error occurred in pcre_match: %d\n", rc);
464 /* error handled by caller */
465 }
466 cli_dbgmsg("cli_pcre_report: PCRE Execution Report End\n");
467 cli_dbgmsg("\n");
468 }
469 }
470
471 cl_error_t cli_pcre_results_reset(struct cli_pcre_results *results, const struct cli_pcre_data *pd)
472 {
473 results->err = CL_SUCCESS;
474 results->match[0] = results->match[1] = 0;
475 #if USING_PCRE2
476 if (results->match_data)
477 pcre2_match_data_free(results->match_data);
478
479 results->match_data = pcre2_match_data_create_from_pattern(pd->re, NULL);
480 if (!results->match_data)
481 return CL_EMEM;
482 #else
483 memset(results->ovector, 0, OVECCOUNT);
484 #endif
485 return CL_SUCCESS;
486 }
487
488 void cli_pcre_results_free(struct cli_pcre_results *results)
489 {
490 #if USING_PCRE2
491 if (results->match_data)
492 pcre2_match_data_free(results->match_data);
493 #endif
494 }
495
496 void cli_pcre_free_single(struct cli_pcre_data *pd)
497 {
498 #if USING_PCRE2
499 if (pd->re) {
500 pcre2_code_free(pd->re);
501 pd->re = NULL;
502 }
503
504 if (pd->mctx) {
505 pcre2_match_context_free(pd->mctx);
506 pd->mctx = NULL;
507 }
508 #else
509 if (pd->re) {
510 pcre_free(pd->re);
511 pd->re = NULL;
512 }
513 if (pd->ex) {
514 free(pd->ex);
515 pd->ex = NULL;
516 }
517 #endif
518 if (pd->expression) {
519 free(pd->expression);
520 pd->expression = NULL;
521 }
522 }
523 #endif /* HAVE_PCRE */
524