1 /*
2  *  Support for PCRE regex variant
3  *
4  *  Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
5  *  Copyright (C) 2007-2013 Sourcefire, Inc.
6  *
7  *  Authors: Kevin Lin
8  *
9  *  This program is free software; you can redistribute it and/or modify
10  *  it under the terms of the GNU General Public License version 2 as
11  *  published by the Free Software Foundation.
12  *
13  *  This program is distributed in the hope that it will be useful,
14  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *  GNU General Public License for more details.
17  *
18  *  You should have received a copy of the GNU General Public License
19  *  along with this program; if not, write to the Free Software
20  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21  *  MA 02110-1301, USA.
22  */
23 
24 #if HAVE_CONFIG_H
25 #include "clamav-config.h"
26 #endif
27 
28 #if HAVE_PCRE
29 #if USING_PCRE2
30 #define PCRE2_CODE_UNIT_WIDTH 8
31 #include <pcre2.h>
32 #else
33 #include <pcre.h>
34 #endif
35 
36 #include "clamav.h"
37 #include "others.h"
38 #include "regex_pcre.h"
39 
40 #if USING_PCRE2
41 /* NOTE: pcre2 could use mpool through ext */
cli_pcre_malloc(size_t size,void * ext)42 void *cli_pcre_malloc(size_t size, void *ext)
43 {
44     UNUSEDPARAM(ext);
45     return cli_malloc(size);
46 }
47 
cli_pcre_free(void * ptr,void * ext)48 void cli_pcre_free(void *ptr, void *ext)
49 {
50     UNUSEDPARAM(ext);
51     free(ptr);
52 }
53 #endif
54 
55 /* cli_pcre_init_internal: redefine pcre_malloc and pcre_free; pcre2 does this during compile */
cli_pcre_init_internal()56 cl_error_t cli_pcre_init_internal()
57 {
58 #if !USING_PCRE2
59     pcre_malloc       = cli_malloc;
60     pcre_free         = free;
61     pcre_stack_malloc = cli_malloc;
62     pcre_stack_free   = free;
63 #endif
64 
65     return CL_SUCCESS;
66 }
67 
cli_pcre_addoptions(struct cli_pcre_data * pd,const char ** opt,int errout)68 cl_error_t cli_pcre_addoptions(struct cli_pcre_data *pd, const char **opt, int errout)
69 {
70     if (!pd || !opt || !(*opt))
71         return CL_ENULLARG;
72 
73     while (**opt != '\0') {
74         switch (**opt) {
75 #if USING_PCRE2
76             case 'i':
77                 pd->options |= PCRE2_CASELESS;
78                 break;
79             case 's':
80                 pd->options |= PCRE2_DOTALL;
81                 break;
82             case 'm':
83                 pd->options |= PCRE2_MULTILINE;
84                 break;
85             case 'x':
86                 pd->options |= PCRE2_EXTENDED;
87                 break;
88 
89                 /* these are pcre2 specific... don't work with perl */
90             case 'A':
91                 pd->options |= PCRE2_ANCHORED;
92                 break;
93             case 'E':
94                 pd->options |= PCRE2_DOLLAR_ENDONLY;
95                 break;
96             case 'U':
97                 pd->options |= PCRE2_UNGREEDY;
98                 break;
99 #else
100             case 'i':
101                 pd->options |= PCRE_CASELESS;
102                 break;
103             case 's':
104                 pd->options |= PCRE_DOTALL;
105                 break;
106             case 'm':
107                 pd->options |= PCRE_MULTILINE;
108                 break;
109             case 'x':
110                 pd->options |= PCRE_EXTENDED;
111                 break;
112 
113                 /* these are pcre specific... don't work with perl */
114             case 'A':
115                 pd->options |= PCRE_ANCHORED;
116                 break;
117             case 'E':
118                 pd->options |= PCRE_DOLLAR_ENDONLY;
119                 break;
120             case 'U':
121                 pd->options |= PCRE_UNGREEDY;
122                 break;
123 #endif
124             default:
125                 if (errout) {
126                     cli_errmsg("cli_pcre_addoptions: unknown/extra pcre option encountered %c\n", **opt);
127                     return CL_EMALFDB;
128                 } else
129                     return CL_EPARSE; /* passed to caller to handle */
130         }
131         (*opt)++;
132     }
133 
134     return CL_SUCCESS;
135 }
136 
137 #if USING_PCRE2
cli_pcre_compile(struct cli_pcre_data * pd,long long unsigned match_limit,long long unsigned match_limit_recursion,unsigned int options,int opt_override)138 cl_error_t cli_pcre_compile(struct cli_pcre_data *pd, long long unsigned match_limit, long long unsigned match_limit_recursion, unsigned int options, int opt_override)
139 {
140     int errornum;
141     PCRE2_SIZE erroffset;
142     pcre2_general_context *gctx;
143     pcre2_compile_context *cctx;
144 
145     if (!pd || !pd->expression) {
146         cli_errmsg("cli_pcre_compile: NULL pd or NULL pd->expression\n");
147         return CL_ENULLARG;
148     }
149 
150     gctx = pcre2_general_context_create(cli_pcre_malloc, cli_pcre_free, NULL);
151     if (!gctx) {
152         cli_errmsg("cli_pcre_compile: Unable to allocate memory for general context\n");
153         return CL_EMEM;
154     }
155 
156     cctx = pcre2_compile_context_create(gctx);
157     if (!cctx) {
158         cli_errmsg("cli_pcre_compile: Unable to allocate memory for compile context\n");
159         pcre2_general_context_free(gctx);
160         return CL_EMEM;
161     }
162 
163     /* compile the pcre2 regex last arg is charset, allow for options override */
164     if (opt_override)
165         pd->re = pcre2_compile((PCRE2_SPTR8)pd->expression, PCRE2_ZERO_TERMINATED, options, &errornum, &erroffset, cctx); /* pd->re handled by pcre2 -> call pcre_free() -> calls free() */
166     else
167         pd->re = pcre2_compile((PCRE2_SPTR8)pd->expression, PCRE2_ZERO_TERMINATED, pd->options, &errornum, &erroffset, cctx); /* pd->re handled by pcre2 -> call pcre_free() -> calls free() */
168     if (pd->re == NULL) {
169         PCRE2_UCHAR errmsg[256];
170         pcre2_get_error_message(errornum, errmsg, sizeof(errmsg));
171         cli_errmsg("cli_pcre_compile: PCRE2 compilation failed at offset %llu: %s\n",
172                    (long long unsigned)erroffset, errmsg);
173         pcre2_compile_context_free(cctx);
174         pcre2_general_context_free(gctx);
175         return CL_EMALFDB;
176     }
177 
178     /* setup matching context and set the match limits */
179     pd->mctx = pcre2_match_context_create(gctx);
180     if (!pd->mctx) {
181         cli_errmsg("cli_pcre_compile: Unable to allocate memory for match context\n");
182         pcre2_compile_context_free(cctx);
183         pcre2_general_context_free(gctx);
184         return CL_EMEM;
185     }
186 
187     pcre2_set_match_limit(pd->mctx, match_limit);
188     pcre2_set_recursion_limit(pd->mctx, match_limit_recursion);
189 
190     /* non-dynamic allocated fields set by caller */
191     pcre2_compile_context_free(cctx);
192     pcre2_general_context_free(gctx);
193     return CL_SUCCESS;
194 }
195 #else
cli_pcre_compile(struct cli_pcre_data * pd,long long unsigned match_limit,long long unsigned match_limit_recursion,unsigned int options,int opt_override)196 cl_error_t cli_pcre_compile(struct cli_pcre_data *pd, long long unsigned match_limit, long long unsigned match_limit_recursion, unsigned int options, int opt_override)
197 {
198     const char *error;
199     int erroffset;
200 
201     if (!pd || !pd->expression) {
202         cli_errmsg("cli_pcre_compile: NULL pd or NULL pd->expression\n");
203         return CL_ENULLARG;
204     }
205 
206     /* compile the pcre regex last arg is charset, allow for options override */
207     if (opt_override)
208         pd->re = pcre_compile(pd->expression, options, &error, &erroffset, NULL); /* pd->re handled by pcre -> call pcre_free() -> calls free() */
209     else
210         pd->re = pcre_compile(pd->expression, pd->options, &error, &erroffset, NULL); /* pd->re handled by pcre -> call pcre_free() -> calls free() */
211     if (pd->re == NULL) {
212         cli_errmsg("cli_pcre_compile: PCRE compilation failed at offset %d: %s\n", erroffset, error);
213         return CL_EMALFDB;
214     }
215 
216     /* now study it... (section totally not from snort) */
217     pd->ex = pcre_study(pd->re, 0, &error);
218     if (!(pd->ex)) {
219         pd->ex = (pcre_extra *)cli_calloc(1, sizeof(*(pd->ex)));
220         if (!(pd->ex)) {
221             cli_errmsg("cli_pcre_compile: Unable to allocate memory for extra data\n");
222             return CL_EMEM;
223         }
224     }
225 
226     /* set the match limits */
227     if (pd->ex->flags & PCRE_EXTRA_MATCH_LIMIT) {
228         pd->ex->match_limit = match_limit;
229     } else {
230         pd->ex->flags |= PCRE_EXTRA_MATCH_LIMIT;
231         pd->ex->match_limit = match_limit;
232     }
233 
234     /* set the recursion match limits */
235 #ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
236     if (pd->ex->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) {
237         pd->ex->match_limit_recursion = match_limit_recursion;
238     } else {
239         pd->ex->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
240         pd->ex->match_limit_recursion = match_limit_recursion;
241     }
242 #endif /* PCRE_EXTRA_MATCH_LIMIT_RECURSION */
243 
244     /* non-dynamic allocated fields set by caller */
245     return CL_SUCCESS;
246 }
247 #endif
248 
cli_pcre_match(struct cli_pcre_data * pd,const unsigned char * buffer,size_t buflen,size_t override_offset,int options,struct cli_pcre_results * results)249 int cli_pcre_match(struct cli_pcre_data *pd, const unsigned char *buffer, size_t buflen, size_t override_offset, int options, struct cli_pcre_results *results)
250 {
251     int rc;
252 
253 #if USING_PCRE2
254     PCRE2_SIZE *ovector;
255     size_t startoffset;
256 #else
257     int startoffset;
258 #endif
259 
260     /* set the startoffset, override if a value is specified */
261     startoffset = pd->search_offset;
262     if (override_offset != pd->search_offset)
263         startoffset = override_offset;
264 
265         /* execute the pcre and return */
266 #if USING_PCRE2
267     rc = pcre2_match(pd->re, buffer, buflen, startoffset, options, results->match_data, pd->mctx);
268     if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) {
269         switch (rc) {
270             case PCRE2_ERROR_CALLOUT:
271                 break;
272             case PCRE2_ERROR_NOMEMORY:
273                 cli_errmsg("cli_pcre_match: pcre_exec: out of memory\n");
274                 results->err = CL_EMEM;
275                 break;
276             case PCRE2_ERROR_MATCHLIMIT:
277                 cli_dbgmsg("cli_pcre_match: pcre_exec: match limit exceeded\n");
278                 break;
279             case PCRE2_ERROR_RECURSIONLIMIT:
280                 cli_dbgmsg("cli_pcre_match: pcre_exec: recursive limit exceeded\n");
281                 break;
282             default:
283                 cli_errmsg("cli_pcre_match: pcre_exec: returned error %d\n", rc);
284                 results->err = CL_BREAK;
285         }
286     } else if (rc > 0) {
287         ovector = pcre2_get_ovector_pointer(results->match_data);
288 
289         results->match[0] = ovector[0];
290         results->match[1] = ovector[1];
291     } else {
292         results->match[0] = results->match[1] = 0;
293     }
294 #else
295     rc = pcre_exec(pd->re, pd->ex, (const char *)buffer, (int)buflen, (int)startoffset, options, results->ovector, OVECCOUNT);
296     if (rc < 0 && rc != PCRE_ERROR_NOMATCH) {
297         switch (rc) {
298             case PCRE_ERROR_CALLOUT:
299                 break;
300             case PCRE_ERROR_NOMEMORY:
301                 cli_errmsg("cli_pcre_match: pcre_exec: out of memory\n");
302                 results->err = CL_EMEM;
303                 break;
304             case PCRE_ERROR_MATCHLIMIT:
305                 cli_dbgmsg("cli_pcre_match: pcre_exec: match limit exceeded\n");
306                 break;
307             case PCRE_ERROR_RECURSIONLIMIT:
308                 cli_dbgmsg("cli_pcre_match: pcre_exec: recursive limit exceeded\n");
309                 break;
310             default:
311                 cli_errmsg("cli_pcre_match: pcre_exec: returned error %d\n", rc);
312                 results->err = CL_BREAK;
313         }
314     } else if (rc > 0) {
315         results->match[0] = results->ovector[0];
316         results->match[1] = results->ovector[1];
317     } else {
318         results->match[0] = results->match[1] = 0;
319     }
320 #endif
321     return rc;
322 }
323 
324 #define DISABLE_PCRE_REPORT 0
325 #define MATCH_MAXLEN 1028 /*because lolz*/
326 
327 /* TODO: audit this function */
328 #if USING_PCRE2
named_substr_print(const struct cli_pcre_data * pd,const unsigned char * buffer,PCRE2_SIZE * ovector)329 static void named_substr_print(const struct cli_pcre_data *pd, const unsigned char *buffer, PCRE2_SIZE *ovector)
330 #else
331 static void named_substr_print(const struct cli_pcre_data *pd, const unsigned char *buffer, int *ovector)
332 #endif
333 {
334     int i, namecount, trunc;
335 
336 #if USING_PCRE2
337     PCRE2_SIZE length, j;
338 #else
339     int length, j;
340 #endif
341 
342     unsigned char *tabptr;
343     int name_entry_size;
344     unsigned char *name_table;
345     const char *start;
346     char outstr[2 * MATCH_MAXLEN + 1];
347 
348     /* determine if there are named substrings */
349 #if USING_PCRE2
350     (void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMECOUNT, &namecount);
351 #else
352     (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMECOUNT, &namecount);
353 #endif
354     if (namecount <= 0) {
355         cli_dbgmsg("cli_pcre_report: no named substrings\n");
356     } else {
357         cli_dbgmsg("cli_pcre_report: named substrings\n");
358 
359         /* extract named substring translation table */
360 #if USING_PCRE2
361         (void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMETABLE, &name_table);
362         (void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
363 #else
364         (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMETABLE, &name_table);
365         (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
366 #endif
367 
368         /* print named substring information */
369         tabptr = name_table;
370         for (i = 0; i < namecount; i++) {
371             int n = (tabptr[0] << 8) | tabptr[1];
372 
373             start  = (const char *)buffer + ovector[2 * n];
374             length = ovector[2 * n + 1] - ovector[2 * n];
375 
376             trunc = 0;
377             if (length > MATCH_MAXLEN) {
378                 trunc  = 1;
379                 length = MATCH_MAXLEN;
380             }
381 
382             for (j = 0; j < length; ++j)
383                 snprintf(outstr + (2 * j), sizeof(outstr) - (2 * j), "%02x", (unsigned int)*(start + j));
384 
385             cli_dbgmsg("cli_pcre_report: (%d) %*s: %s%s\n", n, name_entry_size - 3, tabptr + 2,
386                        outstr, trunc ? " (trunc)" : "");
387             /*
388             cli_dbgmsg("named_substr:  (%d) %*s: %.*s%s\n", n, name_entry_size - 3, tabptr + 2,
389                        length, start, trunc ? " (trunc)":"");
390             */
391             tabptr += name_entry_size;
392         }
393     }
394 }
395 
396 /* TODO: audit this function */
cli_pcre_report(const struct cli_pcre_data * pd,const unsigned char * buffer,size_t buflen,int rc,struct cli_pcre_results * results)397 void cli_pcre_report(const struct cli_pcre_data *pd, const unsigned char *buffer, size_t buflen, int rc, struct cli_pcre_results *results)
398 {
399     int i, trunc;
400 
401 #if USING_PCRE2
402     PCRE2_SIZE length, j;
403 #else
404     int length, j;
405 #endif
406 
407     const char *start;
408     char outstr[2 * MATCH_MAXLEN + 1];
409 
410 #if USING_PCRE2
411     PCRE2_SIZE *ovector;
412     ovector = pcre2_get_ovector_pointer(results->match_data);
413 #else
414     int *ovector = results->ovector;
415 #endif
416 
417     /* print out additional diagnostics if cli_debug_flag is set */
418     if (!DISABLE_PCRE_REPORT) {
419         cli_dbgmsg("\n");
420 #if USING_PCRE2
421         cli_dbgmsg("cli_pcre_report: PCRE2 Execution Report:\n");
422 #else
423         cli_dbgmsg("cli_pcre_report: PCRE Execution Report:\n");
424 #endif
425         cli_dbgmsg("cli_pcre_report: running regex /%s/ returns %d\n", pd->expression, rc);
426         if (rc > 0) {
427             /* print out full-match and capture groups */
428             for (i = 0; i < rc; ++i) {
429                 start  = (const char *)buffer + ovector[2 * i];
430                 length = ovector[2 * i + 1] - ovector[2 * i];
431 
432 #ifdef USING_PCRE2
433                 if (ovector[2 * i + 1] > buflen) {
434 #else
435                 if (ovector[2 * i + 1] > (int)buflen) {
436 #endif
437                     cli_warnmsg("cli_pcre_report: reported match goes outside buffer\n");
438                     continue;
439                 }
440 
441                 trunc = 0;
442                 if (length > MATCH_MAXLEN) {
443                     trunc  = 1;
444                     length = MATCH_MAXLEN;
445                 }
446 
447                 for (j = 0; j < length; ++j)
448                     snprintf(outstr + (2 * j), sizeof(outstr) - (2 * j), "%02x", (unsigned int)*(start + j));
449 
450                 cli_dbgmsg("cli_pcre_report:  %d: %s%s\n", i, outstr, trunc ? " (trunc)" : "");
451                 //cli_dbgmsg("cli_pcre_report:  %d: %.*s%s\n", i, length, start, trunc ? " (trunc)":"");
452             }
453 
454             named_substr_print(pd, buffer, ovector);
455         }
456 #if USING_PCRE2
457         else if (rc == 0 || rc == PCRE2_ERROR_NOMATCH) {
458 #else
459         else if (rc == 0 || rc == PCRE_ERROR_NOMATCH) {
460 #endif
461             cli_dbgmsg("cli_pcre_report: no match found\n");
462         } else {
463             cli_dbgmsg("cli_pcre_report: error occurred in pcre_match: %d\n", rc);
464             /* error handled by caller */
465         }
466         cli_dbgmsg("cli_pcre_report: PCRE Execution Report End\n");
467         cli_dbgmsg("\n");
468     }
469 }
470 
471 cl_error_t cli_pcre_results_reset(struct cli_pcre_results *results, const struct cli_pcre_data *pd)
472 {
473     results->err      = CL_SUCCESS;
474     results->match[0] = results->match[1] = 0;
475 #if USING_PCRE2
476     if (results->match_data)
477         pcre2_match_data_free(results->match_data);
478 
479     results->match_data = pcre2_match_data_create_from_pattern(pd->re, NULL);
480     if (!results->match_data)
481         return CL_EMEM;
482 #else
483     memset(results->ovector, 0, OVECCOUNT);
484 #endif
485     return CL_SUCCESS;
486 }
487 
488 void cli_pcre_results_free(struct cli_pcre_results *results)
489 {
490 #if USING_PCRE2
491     if (results->match_data)
492         pcre2_match_data_free(results->match_data);
493 #endif
494 }
495 
496 void cli_pcre_free_single(struct cli_pcre_data *pd)
497 {
498 #if USING_PCRE2
499     if (pd->re) {
500         pcre2_code_free(pd->re);
501         pd->re = NULL;
502     }
503 
504     if (pd->mctx) {
505         pcre2_match_context_free(pd->mctx);
506         pd->mctx = NULL;
507     }
508 #else
509     if (pd->re) {
510         pcre_free(pd->re);
511         pd->re = NULL;
512     }
513     if (pd->ex) {
514         free(pd->ex);
515         pd->ex = NULL;
516     }
517 #endif
518     if (pd->expression) {
519         free(pd->expression);
520         pd->expression = NULL;
521     }
522 }
523 #endif /* HAVE_PCRE */
524