1 /*
2  * ProFTPD - FTP server daemon
3  * Copyright (c) 1997, 1998 Public Flood Software
4  * Copyright (c) 1999, 2000 MacGyver aka Habeeb J. Dihu <macgyver@tos.net>
5  * Copyright (c) 2001-2021 The ProFTPD Project team
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA.
20  *
21  * As a special exemption, Public Flood Software/MacGyver aka Habeeb J. Dihu
22  * and other respective copyright holders give permission to link this program
23  * with OpenSSL, and distribute the resulting executable, without including
24  * the source code for OpenSSL in the source distribution.
25  */
26 
27 /* Regex management code. */
28 
29 #include "conf.h"
30 
31 #ifdef PR_USE_REGEX
32 
33 #if defined(PR_USE_PCRE)
34 #include <pcre.h>
35 
36 struct regexp_rec {
37   pool *regex_pool;
38 
39   /* Owning module */
40   module *m;
41 
42   /* Copy of the original regular expression pattern */
43   const char *pattern;
44 
45   /* For callers wishing to use POSIX REs */
46   regex_t *re;
47 
48   /* For callers wishing to use PCRE REs */
49   pcre *pcre;
50   pcre_extra *pcre_extra;
51 
52   const char *pcre_errstr;
53 };
54 
55 static unsigned long pcre_match_limit = 0;
56 static unsigned long pcre_match_limit_recursion = 0;
57 
58 #else /* !PR_USE_PCRE */
59 struct regexp_rec {
60   pool *regex_pool;
61 
62   /* Owning module */
63   module *m;
64 
65   /* Copy of the original regular expression pattern */
66   const char *pattern;
67 
68   /* For callers wishing to use POSIX REs */
69   regex_t *re;
70 };
71 
72 #endif /* PR_USE_PCRE */
73 
74 static pool *regexp_pool = NULL;
75 static array_header *regexp_list = NULL;
76 
77 static const char *trace_channel = "regexp";
78 
regexp_free(pr_regex_t * pre)79 static void regexp_free(pr_regex_t *pre) {
80 #if defined(PR_USE_PCRE)
81   if (pre->pcre != NULL) {
82 # if defined(HAVE_PCRE_PCRE_FREE_STUDY)
83     pcre_free_study(pre->pcre_extra);
84 # endif /* HAVE_PCRE_PCRE_FREE_STUDY */
85     pre->pcre_extra = NULL;
86     pcre_free(pre->pcre);
87     pre->pcre = NULL;
88   }
89 #endif /* PR_USE_PCRE */
90 
91   if (pre->re != NULL) {
92     /* This frees memory associated with this pointer by regcomp(3). */
93     regfree(pre->re);
94     pre->re = NULL;
95   }
96 
97   pre->pattern = NULL;
98   destroy_pool(pre->regex_pool);
99 }
100 
regexp_cleanup(void)101 static void regexp_cleanup(void) {
102   /* Only perform this cleanup if necessary */
103   if (regexp_pool) {
104     register unsigned int i = 0;
105     pr_regex_t **pres = (pr_regex_t **) regexp_list->elts;
106 
107     for (i = 0; i < regexp_list->nelts; i++) {
108       if (pres[i] != NULL) {
109         regexp_free(pres[i]);
110         pres[i] = NULL;
111       }
112     }
113 
114     destroy_pool(regexp_pool);
115     regexp_pool = NULL;
116     regexp_list = NULL;
117   }
118 }
119 
regexp_exit_ev(const void * event_data,void * user_data)120 static void regexp_exit_ev(const void *event_data, void *user_data) {
121   regexp_cleanup();
122   return;
123 }
124 
regexp_restart_ev(const void * event_data,void * user_data)125 static void regexp_restart_ev(const void *event_data, void *user_data) {
126   regexp_cleanup();
127   return;
128 }
129 
pr_regexp_alloc(module * m)130 pr_regex_t *pr_regexp_alloc(module *m) {
131   pr_regex_t *pre = NULL;
132   pool *re_pool = NULL;
133 
134   /* If no regex-tracking list has been allocated, create one.  Register a
135    * cleanup handler for this pool, to free up the data in the list.
136    */
137   if (regexp_pool == NULL) {
138     regexp_pool = make_sub_pool(permanent_pool);
139     pr_pool_tag(regexp_pool, "Regexp Pool");
140     regexp_list = make_array(regexp_pool, 0, sizeof(pr_regex_t *));
141   }
142 
143   re_pool = pr_pool_create_sz(regexp_pool, 128);
144   pr_pool_tag(re_pool, "regexp pool");
145 
146   pre = pcalloc(re_pool, sizeof(pr_regex_t));
147   pre->regex_pool = re_pool;
148   pre->m = m;
149 
150   /* Add this pointer to the array. */
151   *((pr_regex_t **) push_array(regexp_list)) = pre;
152 
153   return pre;
154 }
155 
pr_regexp_free(module * m,pr_regex_t * pre)156 void pr_regexp_free(module *m, pr_regex_t *pre) {
157   register unsigned int i = 0;
158   pr_regex_t **pres = NULL;
159 
160   if (regexp_list == NULL) {
161     return;
162   }
163 
164   pres = (pr_regex_t **) regexp_list->elts;
165 
166   for (i = 0; i < regexp_list->nelts; i++) {
167     if (pres[i] == NULL) {
168       continue;
169     }
170 
171     if ((pre != NULL && pres[i] == pre) ||
172         (m != NULL && pres[i]->m == m)) {
173       regexp_free(pres[i]);
174       pres[i] = NULL;
175     }
176   }
177 }
178 
179 #if defined(PR_USE_PCRE)
regexp_compile_pcre(pr_regex_t * pre,const char * pattern,int flags)180 static int regexp_compile_pcre(pr_regex_t *pre, const char *pattern,
181     int flags) {
182   int err_offset, study_flags = 0;
183 
184   if (pre == NULL ||
185       pattern == NULL) {
186     errno = EINVAL;
187     return -1;
188   }
189 
190   pr_trace_msg(trace_channel, 9, "compiling pattern '%s' into PCRE regex",
191     pattern);
192   pre->pattern = pstrdup(pre->regex_pool, pattern);
193 
194   pre->pcre = pcre_compile(pattern, flags, &(pre->pcre_errstr), &err_offset,
195     NULL);
196   if (pre->pcre == NULL) {
197     pr_trace_msg(trace_channel, 4,
198       "error compiling pattern '%s' into PCRE regex: %s", pattern,
199       pre->pcre_errstr);
200     return -1;
201   }
202 
203   /* Study the pattern as well, just in case. */
204 #ifdef PCRE_STUDY_JIT_COMPILE
205   study_flags = PCRE_STUDY_JIT_COMPILE;
206 #endif /* PCRE_STUDY_JIT_COMPILE */
207   pr_trace_msg(trace_channel, 9, "studying pattern '%s' for PCRE extra data",
208     pattern);
209   pre->pcre_extra = pcre_study(pre->pcre, study_flags, &(pre->pcre_errstr));
210   if (pre->pcre_extra == NULL) {
211     if (pre->pcre_errstr != NULL) {
212       pr_trace_msg(trace_channel, 4,
213         "error studying pattern '%s' for PCRE regex: %s", pattern,
214         pre->pcre_errstr);
215     }
216   }
217 
218   return 0;
219 }
220 #endif /* PR_USE_PCRE */
221 
pr_regexp_compile_posix(pr_regex_t * pre,const char * pattern,int flags)222 int pr_regexp_compile_posix(pr_regex_t *pre, const char *pattern, int flags) {
223   int res;
224 
225   if (pre == NULL ||
226       pattern == NULL) {
227     errno = EINVAL;
228     return -1;
229   }
230 
231   if (pre->re != NULL) {
232     regfree(pre->re);
233     pre->re = NULL;
234   }
235 
236   pr_trace_msg(trace_channel, 9, "compiling pattern '%s' into POSIX regex",
237     pattern);
238   pre->pattern = pstrdup(pre->regex_pool, pattern);
239 
240 #if defined(REG_EXTENDED)
241   /* Enable modern ("extended") POSIX regular expressions by default. */
242   flags |= REG_EXTENDED;
243 #endif /* REG_EXTENDED */
244 
245   pre->re = pcalloc(pre->regex_pool, sizeof(regex_t));
246   res = regcomp(pre->re, pattern, flags);
247 
248   return res;
249 }
250 
pr_regexp_compile(pr_regex_t * pre,const char * pattern,int flags)251 int pr_regexp_compile(pr_regex_t *pre, const char *pattern, int flags) {
252 #if defined(PR_USE_PCRE)
253   int pcre_flags = 0;
254 
255   /* Provide a simple mapping of POSIX regcomp(3) flags to
256    * PCRE pcre_compile() flags.  The ProFTPD code tends not to use many
257    * of these flags.
258    */
259   if (flags & REG_ICASE) {
260     pcre_flags |= PCRE_CASELESS;
261   }
262 
263   return regexp_compile_pcre(pre, pattern, pcre_flags);
264 #else
265   return pr_regexp_compile_posix(pre, pattern, flags);
266 #endif /* PR_USE_PCRE */
267 }
268 
pr_regexp_error(int errcode,const pr_regex_t * pre,char * buf,size_t bufsz)269 size_t pr_regexp_error(int errcode, const pr_regex_t *pre, char *buf,
270     size_t bufsz) {
271   size_t res = 0;
272 
273   if (pre == NULL ||
274       buf == NULL ||
275       bufsz == 0) {
276     return 0;
277   }
278 
279 #if defined(PR_USE_PCRE)
280   if (pre->pcre_errstr != NULL) {
281     sstrncpy(buf, pre->pcre_errstr, bufsz);
282     return strlen(pre->pcre_errstr) + 1;
283   }
284 #endif /* PR_USE_PCRE */
285 
286   if (pre->re != NULL) {
287     /* Make sure the given buffer is always zeroed out first. */
288     memset(buf, '\0', bufsz);
289     res = regerror(errcode, pre->re, buf, bufsz-1);
290   }
291 
292   return res;
293 }
294 
pr_regexp_get_pattern(const pr_regex_t * pre)295 const char *pr_regexp_get_pattern(const pr_regex_t *pre) {
296   if (pre == NULL) {
297     errno = EINVAL;
298     return NULL;
299   }
300 
301   if (pre->pattern == NULL) {
302     errno = ENOENT;
303     return NULL;
304   }
305 
306   return pre->pattern;
307 }
308 
309 #if defined(PR_USE_PCRE)
regexp_exec_pcre(pr_regex_t * pre,const char * text,size_t nmatches,regmatch_t * matches,int flags,unsigned long match_limit,unsigned long match_limit_recursion)310 static int regexp_exec_pcre(pr_regex_t *pre, const char *text,
311     size_t nmatches, regmatch_t *matches, int flags, unsigned long match_limit,
312     unsigned long match_limit_recursion) {
313   int res, ovector_count = 0, *ovector = NULL;
314   size_t text_len;
315   pool *tmp_pool = NULL;
316 
317   if (pre->pcre == NULL) {
318     errno = EINVAL;
319     return -1;
320   }
321 
322   text_len = strlen(text);
323 
324   /* Use the default match limits, if set and if the caller did not
325    * explicitly provide limits.
326    */
327   if (match_limit == 0) {
328     match_limit = pcre_match_limit;
329   }
330 
331   if (match_limit_recursion == 0) {
332     match_limit_recursion = pcre_match_limit_recursion;
333   }
334 
335   if (match_limit > 0) {
336     if (pre->pcre_extra == NULL) {
337       pre->pcre_extra = pcalloc(pre->regex_pool, sizeof(pcre_extra));
338     }
339 
340     pre->pcre_extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
341     pre->pcre_extra->match_limit = match_limit;
342   }
343 
344   if (match_limit_recursion > 0) {
345     if (pre->pcre_extra == NULL) {
346       pre->pcre_extra = pcalloc(pre->regex_pool, sizeof(pcre_extra));
347     }
348 
349     pre->pcre_extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
350     pre->pcre_extra->match_limit_recursion = match_limit_recursion;
351   }
352 
353   if (nmatches > 0 &&
354       matches != NULL) {
355     tmp_pool = make_sub_pool(pre->regex_pool);
356     pr_pool_tag(tmp_pool, "regexp tmp pool");
357 
358     ovector_count = nmatches;
359     ovector = pcalloc(tmp_pool, sizeof(int) * nmatches * 3);
360   }
361 
362   pr_trace_msg(trace_channel, 9,
363     "executing PCRE regex '%s' against subject '%s'",
364     pr_regexp_get_pattern(pre), text);
365   res = pcre_exec(pre->pcre, pre->pcre_extra, text, text_len, 0, flags,
366     ovector, ovector_count);
367 
368   if (res < 0) {
369     if (tmp_pool != NULL) {
370       destroy_pool(tmp_pool);
371     }
372 
373     if (pr_trace_get_level(trace_channel) >= 9) {
374       const char *reason = "unknown";
375 
376       switch (res) {
377         case PCRE_ERROR_NOMATCH:
378           reason = "subject did not match pattern";
379           break;
380 
381         case PCRE_ERROR_NULL:
382           reason = "null regex or subject";
383           break;
384 
385         case PCRE_ERROR_BADOPTION:
386           reason = "unsupported options bit";
387           break;
388 
389         case PCRE_ERROR_BADMAGIC:
390           reason = "bad magic number in regex";
391           break;
392 
393         case PCRE_ERROR_UNKNOWN_OPCODE:
394         case PCRE_ERROR_INTERNAL:
395           reason = "internal PCRE error or corrupted regex";
396           break;
397 
398         case PCRE_ERROR_NOMEMORY:
399           reason = "not enough memory for backreferences";
400           break;
401 
402         case PCRE_ERROR_MATCHLIMIT:
403           reason = "match limit reached/exceeded";
404           break;
405 
406         case PCRE_ERROR_RECURSIONLIMIT:
407           reason = "match limit recursion reached/exceeded";
408           break;
409 
410         case PCRE_ERROR_BADUTF8:
411           reason = "invalid UTF8 subject used";
412           break;
413 
414         case PCRE_ERROR_PARTIAL:
415           reason = "subject matched only partially; PCRE_PARTIAL flag not used";
416           break;
417       }
418 
419       pr_trace_msg(trace_channel, 9,
420         "PCRE regex '%s' failed to match subject '%s': %s",
421         pr_regexp_get_pattern(pre), text, reason);
422     }
423 
424     return res;
425   }
426 
427   pr_trace_msg(trace_channel, 9,
428     "PCRE regex '%s' successfully matched subject '%s'",
429     pr_regexp_get_pattern(pre), text);
430 
431   if (ovector_count > 0) {
432     /* Populate the provided POSIX regmatch_t array with the PCRE data. */
433     register unsigned int i;
434 
435     for (i = 0; i < res; i++) {
436       matches[i].rm_so = ovector[i * 2];
437       matches[i].rm_eo = ovector[(i * 2) + 1];
438     }
439 
440     /* Ensure the remaining items are set to proper defaults as well. */
441     for (; i < nmatches; i++) {
442       matches[i].rm_so = matches[i].rm_eo = -1;
443     }
444   }
445 
446   destroy_pool(tmp_pool);
447 
448   if (matches != NULL &&
449       pr_trace_get_level(trace_channel) >= 20) {
450     register unsigned int i;
451 
452     for (i = 0; i < nmatches; i++) {
453       int match_len;
454       const char *match_text;
455 
456       if (matches[i].rm_so == -1 ||
457           matches[i].rm_eo == -1) {
458         break;
459       }
460 
461       match_text = &(text[matches[i].rm_so]);
462       match_len = matches[i].rm_eo - matches[i].rm_so;
463 
464       pr_trace_msg(trace_channel, 20,
465         "PCRE regex '%s' match #%u: %.*s (start %ld, len %d)",
466         pr_regexp_get_pattern(pre), i, (int) match_len, match_text,
467         (long) matches[i].rm_so, match_len);
468     }
469   }
470 
471   return 0;
472 }
473 #endif /* PR_USE_PCRE */
474 
regexp_exec_posix(pr_regex_t * pre,const char * text,size_t nmatches,regmatch_t * matches,int flags)475 static int regexp_exec_posix(pr_regex_t *pre, const char *text,
476     size_t nmatches, regmatch_t *matches, int flags) {
477   int res;
478 
479   pr_trace_msg(trace_channel, 9,
480     "executing POSIX regex '%s' against subject '%s'",
481     pr_regexp_get_pattern(pre), text);
482   res = regexec(pre->re, text, nmatches, matches, flags);
483   if (res == 0) {
484     pr_trace_msg(trace_channel, 9,
485       "POSIX regex '%s' successfully matched subject '%s'",
486       pr_regexp_get_pattern(pre), text);
487 
488      if (matches != NULL &&
489          pr_trace_get_level(trace_channel) >= 20) {
490        register unsigned int i;
491 
492        for (i = 0; i < nmatches; i++) {
493          int match_len;
494          const char *match_text;
495 
496          if (matches[i].rm_so == -1 ||
497              matches[i].rm_eo == -1) {
498            break;
499          }
500 
501          match_text = &(text[matches[i].rm_so]);
502          match_len = matches[i].rm_eo - matches[i].rm_so;
503 
504          pr_trace_msg(trace_channel, 20,
505            "POSIX regex '%s' match #%u: %.*s (start %ld, len %d)",
506            pr_regexp_get_pattern(pre), i, (int) match_len, match_text,
507            (long) matches[i].rm_so, match_len);
508        }
509      }
510 
511   } else {
512     if (pr_trace_get_level(trace_channel) >= 9) {
513       const char *reason = "subject did not match pattern";
514 
515       /* NOTE: Expectation of `res` values here are mixed when PCRE
516        * support, and the <pcreposix.h> header, are involved.
517        */
518 
519       pr_trace_msg(trace_channel, 9,
520         "POSIX regex '%s' failed to match subject '%s': %s (%d)",
521          pr_regexp_get_pattern(pre), text, reason, res);
522     }
523   }
524 
525   return res;
526 }
527 
pr_regexp_exec(pr_regex_t * pre,const char * text,size_t nmatches,regmatch_t * matches,int flags,unsigned long match_limit,unsigned long match_limit_recursion)528 int pr_regexp_exec(pr_regex_t *pre, const char *text, size_t nmatches,
529     regmatch_t *matches, int flags, unsigned long match_limit,
530     unsigned long match_limit_recursion) {
531   int res;
532 
533   if (pre == NULL ||
534       text == NULL) {
535     errno = EINVAL;
536     return -1;
537   }
538 
539 #if defined(PR_USE_PCRE)
540   if (pre->pcre != NULL) {
541     return regexp_exec_pcre(pre, text, nmatches, matches, flags, match_limit,
542       match_limit_recursion);
543   }
544 #endif /* PR_USE_PCRE */
545 
546   res = regexp_exec_posix(pre, text, nmatches, matches, flags);
547 
548   /* Make sure that we return a negative value to indicate a failed match;
549    * PCRE already does this.
550    */
551   if (res == REG_NOMATCH) {
552     res = -1;
553   }
554 
555   return res;
556 }
557 
pr_regexp_set_limits(unsigned long match_limit,unsigned long match_limit_recursion)558 int pr_regexp_set_limits(unsigned long match_limit,
559     unsigned long match_limit_recursion) {
560 
561 #if defined(PR_USE_PCRE)
562   pcre_match_limit = match_limit;
563   pcre_match_limit_recursion = match_limit_recursion;
564 #endif
565 
566   return 0;
567 }
568 
init_regexp(void)569 void init_regexp(void) {
570 
571   /* Register a restart handler for the regexp pool, so that when restarting,
572    * regfree(3) is called on each of the regex_t pointers in a
573    * regex_t-tracking array, thus preventing memory leaks on a long-running
574    * daemon.
575    *
576    * This registration is done here so that it only happens once.
577    */
578   pr_event_register(NULL, "core.restart", regexp_restart_ev, NULL);
579   pr_event_register(NULL, "core.exit", regexp_exit_ev, NULL);
580 
581 #if defined(PR_USE_PCRE)
582   pr_log_debug(DEBUG2, "using PCRE %s", pcre_version());
583 #endif /* PR_USE_PCRE */
584 }
585 
586 #endif
587