1 /*
2 * ProFTPD - FTP server daemon
3 * Copyright (c) 1997, 1998 Public Flood Software
4 * Copyright (c) 1999, 2000 MacGyver aka Habeeb J. Dihu <macgyver@tos.net>
5 * Copyright (c) 2001-2021 The ProFTPD Project team
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA.
20 *
21 * As a special exemption, Public Flood Software/MacGyver aka Habeeb J. Dihu
22 * and other respective copyright holders give permission to link this program
23 * with OpenSSL, and distribute the resulting executable, without including
24 * the source code for OpenSSL in the source distribution.
25 */
26
27 /* Regex management code. */
28
29 #include "conf.h"
30
31 #ifdef PR_USE_REGEX
32
33 #if defined(PR_USE_PCRE)
34 #include <pcre.h>
35
36 struct regexp_rec {
37 pool *regex_pool;
38
39 /* Owning module */
40 module *m;
41
42 /* Copy of the original regular expression pattern */
43 const char *pattern;
44
45 /* For callers wishing to use POSIX REs */
46 regex_t *re;
47
48 /* For callers wishing to use PCRE REs */
49 pcre *pcre;
50 pcre_extra *pcre_extra;
51
52 const char *pcre_errstr;
53 };
54
55 static unsigned long pcre_match_limit = 0;
56 static unsigned long pcre_match_limit_recursion = 0;
57
58 #else /* !PR_USE_PCRE */
59 struct regexp_rec {
60 pool *regex_pool;
61
62 /* Owning module */
63 module *m;
64
65 /* Copy of the original regular expression pattern */
66 const char *pattern;
67
68 /* For callers wishing to use POSIX REs */
69 regex_t *re;
70 };
71
72 #endif /* PR_USE_PCRE */
73
74 static pool *regexp_pool = NULL;
75 static array_header *regexp_list = NULL;
76
77 static const char *trace_channel = "regexp";
78
regexp_free(pr_regex_t * pre)79 static void regexp_free(pr_regex_t *pre) {
80 #if defined(PR_USE_PCRE)
81 if (pre->pcre != NULL) {
82 # if defined(HAVE_PCRE_PCRE_FREE_STUDY)
83 pcre_free_study(pre->pcre_extra);
84 # endif /* HAVE_PCRE_PCRE_FREE_STUDY */
85 pre->pcre_extra = NULL;
86 pcre_free(pre->pcre);
87 pre->pcre = NULL;
88 }
89 #endif /* PR_USE_PCRE */
90
91 if (pre->re != NULL) {
92 /* This frees memory associated with this pointer by regcomp(3). */
93 regfree(pre->re);
94 pre->re = NULL;
95 }
96
97 pre->pattern = NULL;
98 destroy_pool(pre->regex_pool);
99 }
100
regexp_cleanup(void)101 static void regexp_cleanup(void) {
102 /* Only perform this cleanup if necessary */
103 if (regexp_pool) {
104 register unsigned int i = 0;
105 pr_regex_t **pres = (pr_regex_t **) regexp_list->elts;
106
107 for (i = 0; i < regexp_list->nelts; i++) {
108 if (pres[i] != NULL) {
109 regexp_free(pres[i]);
110 pres[i] = NULL;
111 }
112 }
113
114 destroy_pool(regexp_pool);
115 regexp_pool = NULL;
116 regexp_list = NULL;
117 }
118 }
119
regexp_exit_ev(const void * event_data,void * user_data)120 static void regexp_exit_ev(const void *event_data, void *user_data) {
121 regexp_cleanup();
122 return;
123 }
124
regexp_restart_ev(const void * event_data,void * user_data)125 static void regexp_restart_ev(const void *event_data, void *user_data) {
126 regexp_cleanup();
127 return;
128 }
129
pr_regexp_alloc(module * m)130 pr_regex_t *pr_regexp_alloc(module *m) {
131 pr_regex_t *pre = NULL;
132 pool *re_pool = NULL;
133
134 /* If no regex-tracking list has been allocated, create one. Register a
135 * cleanup handler for this pool, to free up the data in the list.
136 */
137 if (regexp_pool == NULL) {
138 regexp_pool = make_sub_pool(permanent_pool);
139 pr_pool_tag(regexp_pool, "Regexp Pool");
140 regexp_list = make_array(regexp_pool, 0, sizeof(pr_regex_t *));
141 }
142
143 re_pool = pr_pool_create_sz(regexp_pool, 128);
144 pr_pool_tag(re_pool, "regexp pool");
145
146 pre = pcalloc(re_pool, sizeof(pr_regex_t));
147 pre->regex_pool = re_pool;
148 pre->m = m;
149
150 /* Add this pointer to the array. */
151 *((pr_regex_t **) push_array(regexp_list)) = pre;
152
153 return pre;
154 }
155
pr_regexp_free(module * m,pr_regex_t * pre)156 void pr_regexp_free(module *m, pr_regex_t *pre) {
157 register unsigned int i = 0;
158 pr_regex_t **pres = NULL;
159
160 if (regexp_list == NULL) {
161 return;
162 }
163
164 pres = (pr_regex_t **) regexp_list->elts;
165
166 for (i = 0; i < regexp_list->nelts; i++) {
167 if (pres[i] == NULL) {
168 continue;
169 }
170
171 if ((pre != NULL && pres[i] == pre) ||
172 (m != NULL && pres[i]->m == m)) {
173 regexp_free(pres[i]);
174 pres[i] = NULL;
175 }
176 }
177 }
178
179 #if defined(PR_USE_PCRE)
regexp_compile_pcre(pr_regex_t * pre,const char * pattern,int flags)180 static int regexp_compile_pcre(pr_regex_t *pre, const char *pattern,
181 int flags) {
182 int err_offset, study_flags = 0;
183
184 if (pre == NULL ||
185 pattern == NULL) {
186 errno = EINVAL;
187 return -1;
188 }
189
190 pr_trace_msg(trace_channel, 9, "compiling pattern '%s' into PCRE regex",
191 pattern);
192 pre->pattern = pstrdup(pre->regex_pool, pattern);
193
194 pre->pcre = pcre_compile(pattern, flags, &(pre->pcre_errstr), &err_offset,
195 NULL);
196 if (pre->pcre == NULL) {
197 pr_trace_msg(trace_channel, 4,
198 "error compiling pattern '%s' into PCRE regex: %s", pattern,
199 pre->pcre_errstr);
200 return -1;
201 }
202
203 /* Study the pattern as well, just in case. */
204 #ifdef PCRE_STUDY_JIT_COMPILE
205 study_flags = PCRE_STUDY_JIT_COMPILE;
206 #endif /* PCRE_STUDY_JIT_COMPILE */
207 pr_trace_msg(trace_channel, 9, "studying pattern '%s' for PCRE extra data",
208 pattern);
209 pre->pcre_extra = pcre_study(pre->pcre, study_flags, &(pre->pcre_errstr));
210 if (pre->pcre_extra == NULL) {
211 if (pre->pcre_errstr != NULL) {
212 pr_trace_msg(trace_channel, 4,
213 "error studying pattern '%s' for PCRE regex: %s", pattern,
214 pre->pcre_errstr);
215 }
216 }
217
218 return 0;
219 }
220 #endif /* PR_USE_PCRE */
221
pr_regexp_compile_posix(pr_regex_t * pre,const char * pattern,int flags)222 int pr_regexp_compile_posix(pr_regex_t *pre, const char *pattern, int flags) {
223 int res;
224
225 if (pre == NULL ||
226 pattern == NULL) {
227 errno = EINVAL;
228 return -1;
229 }
230
231 if (pre->re != NULL) {
232 regfree(pre->re);
233 pre->re = NULL;
234 }
235
236 pr_trace_msg(trace_channel, 9, "compiling pattern '%s' into POSIX regex",
237 pattern);
238 pre->pattern = pstrdup(pre->regex_pool, pattern);
239
240 #if defined(REG_EXTENDED)
241 /* Enable modern ("extended") POSIX regular expressions by default. */
242 flags |= REG_EXTENDED;
243 #endif /* REG_EXTENDED */
244
245 pre->re = pcalloc(pre->regex_pool, sizeof(regex_t));
246 res = regcomp(pre->re, pattern, flags);
247
248 return res;
249 }
250
pr_regexp_compile(pr_regex_t * pre,const char * pattern,int flags)251 int pr_regexp_compile(pr_regex_t *pre, const char *pattern, int flags) {
252 #if defined(PR_USE_PCRE)
253 int pcre_flags = 0;
254
255 /* Provide a simple mapping of POSIX regcomp(3) flags to
256 * PCRE pcre_compile() flags. The ProFTPD code tends not to use many
257 * of these flags.
258 */
259 if (flags & REG_ICASE) {
260 pcre_flags |= PCRE_CASELESS;
261 }
262
263 return regexp_compile_pcre(pre, pattern, pcre_flags);
264 #else
265 return pr_regexp_compile_posix(pre, pattern, flags);
266 #endif /* PR_USE_PCRE */
267 }
268
pr_regexp_error(int errcode,const pr_regex_t * pre,char * buf,size_t bufsz)269 size_t pr_regexp_error(int errcode, const pr_regex_t *pre, char *buf,
270 size_t bufsz) {
271 size_t res = 0;
272
273 if (pre == NULL ||
274 buf == NULL ||
275 bufsz == 0) {
276 return 0;
277 }
278
279 #if defined(PR_USE_PCRE)
280 if (pre->pcre_errstr != NULL) {
281 sstrncpy(buf, pre->pcre_errstr, bufsz);
282 return strlen(pre->pcre_errstr) + 1;
283 }
284 #endif /* PR_USE_PCRE */
285
286 if (pre->re != NULL) {
287 /* Make sure the given buffer is always zeroed out first. */
288 memset(buf, '\0', bufsz);
289 res = regerror(errcode, pre->re, buf, bufsz-1);
290 }
291
292 return res;
293 }
294
pr_regexp_get_pattern(const pr_regex_t * pre)295 const char *pr_regexp_get_pattern(const pr_regex_t *pre) {
296 if (pre == NULL) {
297 errno = EINVAL;
298 return NULL;
299 }
300
301 if (pre->pattern == NULL) {
302 errno = ENOENT;
303 return NULL;
304 }
305
306 return pre->pattern;
307 }
308
309 #if defined(PR_USE_PCRE)
regexp_exec_pcre(pr_regex_t * pre,const char * text,size_t nmatches,regmatch_t * matches,int flags,unsigned long match_limit,unsigned long match_limit_recursion)310 static int regexp_exec_pcre(pr_regex_t *pre, const char *text,
311 size_t nmatches, regmatch_t *matches, int flags, unsigned long match_limit,
312 unsigned long match_limit_recursion) {
313 int res, ovector_count = 0, *ovector = NULL;
314 size_t text_len;
315 pool *tmp_pool = NULL;
316
317 if (pre->pcre == NULL) {
318 errno = EINVAL;
319 return -1;
320 }
321
322 text_len = strlen(text);
323
324 /* Use the default match limits, if set and if the caller did not
325 * explicitly provide limits.
326 */
327 if (match_limit == 0) {
328 match_limit = pcre_match_limit;
329 }
330
331 if (match_limit_recursion == 0) {
332 match_limit_recursion = pcre_match_limit_recursion;
333 }
334
335 if (match_limit > 0) {
336 if (pre->pcre_extra == NULL) {
337 pre->pcre_extra = pcalloc(pre->regex_pool, sizeof(pcre_extra));
338 }
339
340 pre->pcre_extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
341 pre->pcre_extra->match_limit = match_limit;
342 }
343
344 if (match_limit_recursion > 0) {
345 if (pre->pcre_extra == NULL) {
346 pre->pcre_extra = pcalloc(pre->regex_pool, sizeof(pcre_extra));
347 }
348
349 pre->pcre_extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
350 pre->pcre_extra->match_limit_recursion = match_limit_recursion;
351 }
352
353 if (nmatches > 0 &&
354 matches != NULL) {
355 tmp_pool = make_sub_pool(pre->regex_pool);
356 pr_pool_tag(tmp_pool, "regexp tmp pool");
357
358 ovector_count = nmatches;
359 ovector = pcalloc(tmp_pool, sizeof(int) * nmatches * 3);
360 }
361
362 pr_trace_msg(trace_channel, 9,
363 "executing PCRE regex '%s' against subject '%s'",
364 pr_regexp_get_pattern(pre), text);
365 res = pcre_exec(pre->pcre, pre->pcre_extra, text, text_len, 0, flags,
366 ovector, ovector_count);
367
368 if (res < 0) {
369 if (tmp_pool != NULL) {
370 destroy_pool(tmp_pool);
371 }
372
373 if (pr_trace_get_level(trace_channel) >= 9) {
374 const char *reason = "unknown";
375
376 switch (res) {
377 case PCRE_ERROR_NOMATCH:
378 reason = "subject did not match pattern";
379 break;
380
381 case PCRE_ERROR_NULL:
382 reason = "null regex or subject";
383 break;
384
385 case PCRE_ERROR_BADOPTION:
386 reason = "unsupported options bit";
387 break;
388
389 case PCRE_ERROR_BADMAGIC:
390 reason = "bad magic number in regex";
391 break;
392
393 case PCRE_ERROR_UNKNOWN_OPCODE:
394 case PCRE_ERROR_INTERNAL:
395 reason = "internal PCRE error or corrupted regex";
396 break;
397
398 case PCRE_ERROR_NOMEMORY:
399 reason = "not enough memory for backreferences";
400 break;
401
402 case PCRE_ERROR_MATCHLIMIT:
403 reason = "match limit reached/exceeded";
404 break;
405
406 case PCRE_ERROR_RECURSIONLIMIT:
407 reason = "match limit recursion reached/exceeded";
408 break;
409
410 case PCRE_ERROR_BADUTF8:
411 reason = "invalid UTF8 subject used";
412 break;
413
414 case PCRE_ERROR_PARTIAL:
415 reason = "subject matched only partially; PCRE_PARTIAL flag not used";
416 break;
417 }
418
419 pr_trace_msg(trace_channel, 9,
420 "PCRE regex '%s' failed to match subject '%s': %s",
421 pr_regexp_get_pattern(pre), text, reason);
422 }
423
424 return res;
425 }
426
427 pr_trace_msg(trace_channel, 9,
428 "PCRE regex '%s' successfully matched subject '%s'",
429 pr_regexp_get_pattern(pre), text);
430
431 if (ovector_count > 0) {
432 /* Populate the provided POSIX regmatch_t array with the PCRE data. */
433 register unsigned int i;
434
435 for (i = 0; i < res; i++) {
436 matches[i].rm_so = ovector[i * 2];
437 matches[i].rm_eo = ovector[(i * 2) + 1];
438 }
439
440 /* Ensure the remaining items are set to proper defaults as well. */
441 for (; i < nmatches; i++) {
442 matches[i].rm_so = matches[i].rm_eo = -1;
443 }
444 }
445
446 destroy_pool(tmp_pool);
447
448 if (matches != NULL &&
449 pr_trace_get_level(trace_channel) >= 20) {
450 register unsigned int i;
451
452 for (i = 0; i < nmatches; i++) {
453 int match_len;
454 const char *match_text;
455
456 if (matches[i].rm_so == -1 ||
457 matches[i].rm_eo == -1) {
458 break;
459 }
460
461 match_text = &(text[matches[i].rm_so]);
462 match_len = matches[i].rm_eo - matches[i].rm_so;
463
464 pr_trace_msg(trace_channel, 20,
465 "PCRE regex '%s' match #%u: %.*s (start %ld, len %d)",
466 pr_regexp_get_pattern(pre), i, (int) match_len, match_text,
467 (long) matches[i].rm_so, match_len);
468 }
469 }
470
471 return 0;
472 }
473 #endif /* PR_USE_PCRE */
474
regexp_exec_posix(pr_regex_t * pre,const char * text,size_t nmatches,regmatch_t * matches,int flags)475 static int regexp_exec_posix(pr_regex_t *pre, const char *text,
476 size_t nmatches, regmatch_t *matches, int flags) {
477 int res;
478
479 pr_trace_msg(trace_channel, 9,
480 "executing POSIX regex '%s' against subject '%s'",
481 pr_regexp_get_pattern(pre), text);
482 res = regexec(pre->re, text, nmatches, matches, flags);
483 if (res == 0) {
484 pr_trace_msg(trace_channel, 9,
485 "POSIX regex '%s' successfully matched subject '%s'",
486 pr_regexp_get_pattern(pre), text);
487
488 if (matches != NULL &&
489 pr_trace_get_level(trace_channel) >= 20) {
490 register unsigned int i;
491
492 for (i = 0; i < nmatches; i++) {
493 int match_len;
494 const char *match_text;
495
496 if (matches[i].rm_so == -1 ||
497 matches[i].rm_eo == -1) {
498 break;
499 }
500
501 match_text = &(text[matches[i].rm_so]);
502 match_len = matches[i].rm_eo - matches[i].rm_so;
503
504 pr_trace_msg(trace_channel, 20,
505 "POSIX regex '%s' match #%u: %.*s (start %ld, len %d)",
506 pr_regexp_get_pattern(pre), i, (int) match_len, match_text,
507 (long) matches[i].rm_so, match_len);
508 }
509 }
510
511 } else {
512 if (pr_trace_get_level(trace_channel) >= 9) {
513 const char *reason = "subject did not match pattern";
514
515 /* NOTE: Expectation of `res` values here are mixed when PCRE
516 * support, and the <pcreposix.h> header, are involved.
517 */
518
519 pr_trace_msg(trace_channel, 9,
520 "POSIX regex '%s' failed to match subject '%s': %s (%d)",
521 pr_regexp_get_pattern(pre), text, reason, res);
522 }
523 }
524
525 return res;
526 }
527
pr_regexp_exec(pr_regex_t * pre,const char * text,size_t nmatches,regmatch_t * matches,int flags,unsigned long match_limit,unsigned long match_limit_recursion)528 int pr_regexp_exec(pr_regex_t *pre, const char *text, size_t nmatches,
529 regmatch_t *matches, int flags, unsigned long match_limit,
530 unsigned long match_limit_recursion) {
531 int res;
532
533 if (pre == NULL ||
534 text == NULL) {
535 errno = EINVAL;
536 return -1;
537 }
538
539 #if defined(PR_USE_PCRE)
540 if (pre->pcre != NULL) {
541 return regexp_exec_pcre(pre, text, nmatches, matches, flags, match_limit,
542 match_limit_recursion);
543 }
544 #endif /* PR_USE_PCRE */
545
546 res = regexp_exec_posix(pre, text, nmatches, matches, flags);
547
548 /* Make sure that we return a negative value to indicate a failed match;
549 * PCRE already does this.
550 */
551 if (res == REG_NOMATCH) {
552 res = -1;
553 }
554
555 return res;
556 }
557
pr_regexp_set_limits(unsigned long match_limit,unsigned long match_limit_recursion)558 int pr_regexp_set_limits(unsigned long match_limit,
559 unsigned long match_limit_recursion) {
560
561 #if defined(PR_USE_PCRE)
562 pcre_match_limit = match_limit;
563 pcre_match_limit_recursion = match_limit_recursion;
564 #endif
565
566 return 0;
567 }
568
init_regexp(void)569 void init_regexp(void) {
570
571 /* Register a restart handler for the regexp pool, so that when restarting,
572 * regfree(3) is called on each of the regex_t pointers in a
573 * regex_t-tracking array, thus preventing memory leaks on a long-running
574 * daemon.
575 *
576 * This registration is done here so that it only happens once.
577 */
578 pr_event_register(NULL, "core.restart", regexp_restart_ev, NULL);
579 pr_event_register(NULL, "core.exit", regexp_exit_ev, NULL);
580
581 #if defined(PR_USE_PCRE)
582 pr_log_debug(DEBUG2, "using PCRE %s", pcre_version());
583 #endif /* PR_USE_PCRE */
584 }
585
586 #endif
587