1 /**
2  * @file
3  * Manage regular expressions
4  *
5  * @authors
6  * Copyright (C) 2017 Richard Russon <rich@flatcap.org>
7  * Copyright (C) 2019 Simon Symeonidis <lethaljellybean@gmail.com>
8  *
9  * @copyright
10  * This program is free software: you can redistribute it and/or modify it under
11  * the terms of the GNU General Public License as published by the Free Software
12  * Foundation, either version 2 of the License, or (at your option) any later
13  * version.
14  *
15  * This program is distributed in the hope that it will be useful, but WITHOUT
16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License along with
21  * this program.  If not, see <http://www.gnu.org/licenses/>.
22  */
23 
24 /**
25  * @page mutt_regex Manage regular expressions
26  *
27  * Manage regular expressions.
28  */
29 
30 #include "config.h"
31 #include <ctype.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include "buffer.h"
38 #include "logging.h"
39 #include "mbyte.h"
40 #include "memory.h"
41 #include "message.h"
42 #include "queue.h"
43 #include "regex3.h"
44 #include "string2.h"
45 
46 /**
47  * mutt_regex_compile - Create an Regex from a string
48  * @param str   Regular expression
49  * @param flags Type flags, e.g. REG_ICASE
50  * @retval ptr New Regex object
51  * @retval NULL Error
52  */
mutt_regex_compile(const char * str,uint16_t flags)53 struct Regex *mutt_regex_compile(const char *str, uint16_t flags)
54 {
55   if (!str || (*str == '\0'))
56     return NULL;
57   struct Regex *rx = mutt_mem_calloc(1, sizeof(struct Regex));
58   rx->pattern = mutt_str_dup(str);
59   rx->regex = mutt_mem_calloc(1, sizeof(regex_t));
60   if (REG_COMP(rx->regex, str, flags) != 0)
61     mutt_regex_free(&rx);
62 
63   return rx;
64 }
65 
66 /**
67  * mutt_regex_new - Create an Regex from a string
68  * @param str   Regular expression
69  * @param flags Type flags, e.g. #DT_REGEX_MATCH_CASE
70  * @param err   Buffer for error messages
71  * @retval ptr New Regex object
72  * @retval NULL Error
73  */
mutt_regex_new(const char * str,uint32_t flags,struct Buffer * err)74 struct Regex *mutt_regex_new(const char *str, uint32_t flags, struct Buffer *err)
75 {
76   if (!str || (*str == '\0'))
77     return NULL;
78 
79   uint16_t rflags = 0;
80   struct Regex *reg = mutt_mem_calloc(1, sizeof(struct Regex));
81 
82   reg->regex = mutt_mem_calloc(1, sizeof(regex_t));
83   reg->pattern = mutt_str_dup(str);
84 
85   /* Should we use smart case matching? */
86   if (((flags & DT_REGEX_MATCH_CASE) == 0) && mutt_mb_is_lower(str))
87     rflags |= REG_ICASE;
88 
89   /* Is a prefix of '!' allowed? */
90   if (((flags & DT_REGEX_ALLOW_NOT) != 0) && (str[0] == '!'))
91   {
92     reg->pat_not = true;
93     str++;
94   }
95 
96   int rc = REG_COMP(reg->regex, str, rflags);
97   if ((rc != 0) && err)
98   {
99     regerror(rc, reg->regex, err->data, err->dsize);
100     mutt_regex_free(&reg);
101     return NULL;
102   }
103 
104   return reg;
105 }
106 
107 /**
108  * mutt_regex_free - Free a Regex object
109  * @param[out] r Regex to free
110  */
mutt_regex_free(struct Regex ** r)111 void mutt_regex_free(struct Regex **r)
112 {
113   if (!r || !*r)
114     return;
115 
116   FREE(&(*r)->pattern);
117   if ((*r)->regex)
118     regfree((*r)->regex);
119   FREE(&(*r)->regex);
120   FREE(r);
121 }
122 
123 /**
124  * mutt_regexlist_add - Compile a regex string and add it to a list
125  * @param rl    RegexList to add to
126  * @param str   String to compile into a regex
127  * @param flags Flags, e.g. REG_ICASE
128  * @param err   Buffer for error messages
129  * @retval 0  Success, Regex compiled and added to the list
130  * @retval -1 Error, see message in 'err'
131  */
mutt_regexlist_add(struct RegexList * rl,const char * str,uint16_t flags,struct Buffer * err)132 int mutt_regexlist_add(struct RegexList *rl, const char *str, uint16_t flags,
133                        struct Buffer *err)
134 {
135   if (!rl || !str || (*str == '\0'))
136     return 0;
137 
138   struct Regex *rx = mutt_regex_compile(str, flags);
139   if (!rx)
140   {
141     mutt_buffer_printf(err, "Bad regex: %s\n", str);
142     return -1;
143   }
144 
145   /* check to make sure the item is not already on this rl */
146   struct RegexNode *np = NULL;
147   STAILQ_FOREACH(np, rl, entries)
148   {
149     if (mutt_istr_equal(rx->pattern, np->regex->pattern))
150       break; /* already on the rl */
151   }
152 
153   if (np)
154   {
155     mutt_regex_free(&rx);
156   }
157   else
158   {
159     np = mutt_regexlist_new();
160     np->regex = rx;
161     STAILQ_INSERT_TAIL(rl, np, entries);
162   }
163 
164   return 0;
165 }
166 
167 /**
168  * mutt_regexlist_free - Free a RegexList object
169  * @param rl RegexList to free
170  */
mutt_regexlist_free(struct RegexList * rl)171 void mutt_regexlist_free(struct RegexList *rl)
172 {
173   if (!rl)
174     return;
175 
176   struct RegexNode *np = NULL, *tmp = NULL;
177   STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
178   {
179     STAILQ_REMOVE(rl, np, RegexNode, entries);
180     mutt_regex_free(&np->regex);
181     FREE(&np);
182   }
183   STAILQ_INIT(rl);
184 }
185 
186 /**
187  * mutt_regexlist_match - Does a string match any Regex in the list?
188  * @param rl  RegexList to match against
189  * @param str String to compare
190  * @retval true String matches one of the Regexes in the list
191  */
mutt_regexlist_match(struct RegexList * rl,const char * str)192 bool mutt_regexlist_match(struct RegexList *rl, const char *str)
193 {
194   if (!rl || !str)
195     return false;
196   struct RegexNode *np = NULL;
197   STAILQ_FOREACH(np, rl, entries)
198   {
199     if (mutt_regex_match(np->regex, str))
200     {
201       mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
202       return true;
203     }
204   }
205 
206   return false;
207 }
208 
209 /**
210  * mutt_regexlist_new - Create a new RegexList
211  * @retval ptr New RegexList object
212  */
mutt_regexlist_new(void)213 struct RegexNode *mutt_regexlist_new(void)
214 {
215   return mutt_mem_calloc(1, sizeof(struct RegexNode));
216 }
217 
218 /**
219  * mutt_regexlist_remove - Remove a Regex from a list
220  * @param rl  RegexList to alter
221  * @param str Pattern to remove from the list
222  * @retval 0  Success, pattern was found and removed from the list
223  * @retval -1 Error, pattern wasn't found
224  *
225  * If the pattern is "*", then all the Regexes are removed.
226  */
mutt_regexlist_remove(struct RegexList * rl,const char * str)227 int mutt_regexlist_remove(struct RegexList *rl, const char *str)
228 {
229   if (!rl || !str)
230     return -1;
231 
232   if (mutt_str_equal("*", str))
233   {
234     mutt_regexlist_free(rl); /* "unCMD *" means delete all current entries */
235     return 0;
236   }
237 
238   int rc = -1;
239   struct RegexNode *np = NULL, *tmp = NULL;
240   STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
241   {
242     if (mutt_istr_equal(str, np->regex->pattern))
243     {
244       STAILQ_REMOVE(rl, np, RegexNode, entries);
245       mutt_regex_free(&np->regex);
246       FREE(&np);
247       rc = 0;
248     }
249   }
250 
251   return rc;
252 }
253 
254 /**
255  * mutt_replacelist_add - Add a pattern and a template to a list
256  * @param rl    ReplaceList to add to
257  * @param pat   Pattern to compile into a regex
258  * @param templ Template string to associate with the pattern
259  * @param err   Buffer for error messages
260  * @retval 0  Success, pattern added to the ReplaceList
261  * @retval -1 Error, see message in 'err'
262  */
mutt_replacelist_add(struct ReplaceList * rl,const char * pat,const char * templ,struct Buffer * err)263 int mutt_replacelist_add(struct ReplaceList *rl, const char *pat,
264                          const char *templ, struct Buffer *err)
265 {
266   if (!rl || !pat || (*pat == '\0') || !templ)
267     return 0;
268 
269   struct Regex *rx = mutt_regex_compile(pat, REG_ICASE);
270   if (!rx)
271   {
272     if (err)
273       mutt_buffer_printf(err, _("Bad regex: %s"), pat);
274     return -1;
275   }
276 
277   /* check to make sure the item is not already on this rl */
278   struct Replace *np = NULL;
279   STAILQ_FOREACH(np, rl, entries)
280   {
281     if (mutt_istr_equal(rx->pattern, np->regex->pattern))
282     {
283       /* Already on the rl. Formerly we just skipped this case, but
284        * now we're supporting removals, which means we're supporting
285        * re-adds conceptually. So we probably want this to imply a
286        * removal, then do an add. We can achieve the removal by freeing
287        * the template, and leaving t pointed at the current item.  */
288       FREE(&np->templ);
289       break;
290     }
291   }
292 
293   /* If np is set, it's pointing into an extant ReplaceList* that we want to
294    * update. Otherwise we want to make a new one to link at the rl's end.  */
295   if (np)
296   {
297     mutt_regex_free(&rx);
298   }
299   else
300   {
301     np = mutt_replacelist_new();
302     np->regex = rx;
303     rx = NULL;
304     STAILQ_INSERT_TAIL(rl, np, entries);
305   }
306 
307   /* Now np is the Replace that we want to modify. It is prepared. */
308   np->templ = mutt_str_dup(templ);
309 
310   /* Find highest match number in template string */
311   np->nmatch = 0;
312   for (const char *p = templ; *p;)
313   {
314     if (*p == '%')
315     {
316       int n = 0;
317       if (mutt_str_atoi(++p, &n) < 0)
318         mutt_debug(LL_DEBUG2, "Invalid match number in replacelist: '%s'\n", p);
319       if (n > np->nmatch)
320         np->nmatch = n;
321       while (*p && isdigit((int) *p))
322         p++;
323     }
324     else
325       p++;
326   }
327 
328   if (np->nmatch > np->regex->regex->re_nsub)
329   {
330     if (err)
331       mutt_buffer_printf(err, "%s", _("Not enough subexpressions for template"));
332     mutt_replacelist_remove(rl, pat);
333     return -1;
334   }
335 
336   np->nmatch++; /* match 0 is always the whole expr */
337   return 0;
338 }
339 
340 /**
341  * mutt_replacelist_apply - Apply replacements to a buffer
342  * @param rl     ReplaceList to apply
343  * @param buf    Buffer for the result
344  * @param buflen Length of the buffer
345  * @param str    String to manipulate
346  * @retval ptr Pointer to 'buf'
347  *
348  * If 'buf' is NULL, a new string will be returned.  It must be freed by the caller.
349  *
350  * @note This function uses a fixed size buffer of 1024 and so should
351  * only be used for visual modifications, such as disp_subj.
352  */
mutt_replacelist_apply(struct ReplaceList * rl,char * buf,size_t buflen,const char * str)353 char *mutt_replacelist_apply(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
354 {
355   static regmatch_t *pmatch = NULL;
356   static size_t nmatch = 0;
357   static char twinbuf[2][1024];
358   int switcher = 0;
359   char *p = NULL;
360   size_t cpysize, tlen;
361   char *src = NULL, *dst = NULL;
362 
363   if (buf && (buflen != 0))
364     buf[0] = '\0';
365 
366   if (!rl || !str || (*str == '\0') || (buf && (buflen == 0)))
367     return buf;
368 
369   twinbuf[0][0] = '\0';
370   twinbuf[1][0] = '\0';
371   src = twinbuf[switcher];
372   dst = src;
373 
374   mutt_str_copy(src, str, 1024);
375 
376   struct Replace *np = NULL;
377   STAILQ_FOREACH(np, rl, entries)
378   {
379     /* If this pattern needs more matches, expand pmatch. */
380     if (np->nmatch > nmatch)
381     {
382       mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
383       nmatch = np->nmatch;
384     }
385 
386     if (mutt_regex_capture(np->regex, src, np->nmatch, pmatch))
387     {
388       tlen = 0;
389       switcher ^= 1;
390       dst = twinbuf[switcher];
391 
392       mutt_debug(LL_DEBUG5, "%s matches %s\n", src, np->regex->pattern);
393 
394       /* Copy into other twinbuf with substitutions */
395       if (np->templ)
396       {
397         for (p = np->templ; *p && (tlen < 1023);)
398         {
399           if (*p == '%')
400           {
401             p++;
402             if (*p == 'L')
403             {
404               p++;
405               cpysize = MIN(pmatch[0].rm_so, 1023 - tlen);
406               strncpy(&dst[tlen], src, cpysize);
407               tlen += cpysize;
408             }
409             else if (*p == 'R')
410             {
411               p++;
412               cpysize = MIN(strlen(src) - pmatch[0].rm_eo, 1023 - tlen);
413               strncpy(&dst[tlen], &src[pmatch[0].rm_eo], cpysize);
414               tlen += cpysize;
415             }
416             else
417             {
418               long n = strtoul(p, &p, 10);        /* get subst number */
419               while (isdigit((unsigned char) *p)) /* skip subst token */
420                 p++;
421               for (int i = pmatch[n].rm_so; (i < pmatch[n].rm_eo) && (tlen < 1023); i++)
422               {
423                 dst[tlen++] = src[i];
424               }
425             }
426           }
427           else
428             dst[tlen++] = *p++;
429         }
430       }
431       dst[tlen] = '\0';
432       mutt_debug(LL_DEBUG5, "subst %s\n", dst);
433     }
434     src = dst;
435   }
436 
437   if (buf)
438     mutt_str_copy(buf, dst, buflen);
439   else
440     buf = mutt_str_dup(dst);
441   return buf;
442 }
443 
444 /**
445  * mutt_replacelist_free - Free a ReplaceList object
446  * @param rl ReplaceList to free
447  */
mutt_replacelist_free(struct ReplaceList * rl)448 void mutt_replacelist_free(struct ReplaceList *rl)
449 {
450   if (!rl)
451     return;
452 
453   struct Replace *np = NULL, *tmp = NULL;
454   STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
455   {
456     STAILQ_REMOVE(rl, np, Replace, entries);
457     mutt_regex_free(&np->regex);
458     FREE(&np->templ);
459     FREE(&np);
460   }
461 }
462 
463 /**
464  * mutt_replacelist_match - Does a string match a pattern?
465  * @param rl     ReplaceList of patterns
466  * @param str    String to check
467  * @param buf    Buffer to save match
468  * @param buflen Buffer length
469  * @retval true String matches a patterh in the ReplaceList
470  *
471  * Match a string against the patterns defined by the 'spam' command and output
472  * the expanded format into `buf` when there is a match.  If buflen<=0, the
473  * match is performed but the format is not expanded and no assumptions are
474  * made about the value of `buf` so it may be NULL.
475  */
mutt_replacelist_match(struct ReplaceList * rl,char * buf,size_t buflen,const char * str)476 bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
477 {
478   if (!rl || !buf || !str)
479     return false;
480 
481   static regmatch_t *pmatch = NULL;
482   static size_t nmatch = 0;
483   int tlen = 0;
484   char *p = NULL;
485 
486   struct Replace *np = NULL;
487   STAILQ_FOREACH(np, rl, entries)
488   {
489     /* If this pattern needs more matches, expand pmatch. */
490     if (np->nmatch > nmatch)
491     {
492       mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
493       nmatch = np->nmatch;
494     }
495 
496     /* Does this pattern match? */
497     if (mutt_regex_capture(np->regex, str, (size_t) np->nmatch, pmatch))
498     {
499       mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
500       mutt_debug(LL_DEBUG5, "%d subs\n", (int) np->regex->regex->re_nsub);
501 
502       /* Copy template into buf, with substitutions. */
503       for (p = np->templ; *p && (tlen < (buflen - 1));)
504       {
505         /* backreference to pattern match substring, eg. %1, %2, etc) */
506         if (*p == '%')
507         {
508           char *e = NULL; /* used as pointer to end of integer backreference in strtol() call */
509 
510           p++; /* skip over % char */
511           long n = strtol(p, &e, 10);
512           /* Ensure that the integer conversion succeeded (e!=p) and bounds check.  The upper bound check
513            * should not strictly be necessary since add_to_spam_list() finds the largest value, and
514            * the static array above is always large enough based on that value. */
515           if ((e != p) && (n >= 0) && (n <= np->nmatch) && (pmatch[n].rm_so != -1))
516           {
517             /* copy as much of the substring match as will fit in the output buffer, saving space for
518              * the terminating nul char */
519             int idx;
520             for (idx = pmatch[n].rm_so;
521                  (idx < pmatch[n].rm_eo) && (tlen < (buflen - 1)); idx++)
522             {
523               buf[tlen++] = str[idx];
524             }
525           }
526           p = e; /* skip over the parsed integer */
527         }
528         else
529         {
530           buf[tlen++] = *p++;
531         }
532       }
533       /* tlen should always be less than buflen except when buflen<=0
534        * because the bounds checks in the above code leave room for the
535        * terminal nul char.   This should avoid returning an unterminated
536        * string to the caller.  When buflen<=0 we make no assumption about
537        * the validity of the buf pointer. */
538       if (tlen < buflen)
539       {
540         buf[tlen] = '\0';
541         mutt_debug(LL_DEBUG5, "\"%s\"\n", buf);
542       }
543       return true;
544     }
545   }
546 
547   return false;
548 }
549 
550 /**
551  * mutt_replacelist_new - Create a new ReplaceList
552  * @retval ptr New ReplaceList
553  */
mutt_replacelist_new(void)554 struct Replace *mutt_replacelist_new(void)
555 {
556   return mutt_mem_calloc(1, sizeof(struct Replace));
557 }
558 
559 /**
560  * mutt_replacelist_remove - Remove a pattern from a list
561  * @param rl  ReplaceList to modify
562  * @param pat Pattern to remove
563  * @retval num Matching patterns removed
564  */
mutt_replacelist_remove(struct ReplaceList * rl,const char * pat)565 int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
566 {
567   if (!rl || !pat)
568     return 0;
569 
570   int nremoved = 0;
571   struct Replace *np = NULL, *tmp = NULL;
572   STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
573   {
574     if (mutt_str_equal(np->regex->pattern, pat))
575     {
576       STAILQ_REMOVE(rl, np, Replace, entries);
577       mutt_regex_free(&np->regex);
578       FREE(&np->templ);
579       FREE(&np);
580       nremoved++;
581     }
582   }
583 
584   return nremoved;
585 }
586 
587 /**
588  * mutt_regex_capture - Match a regex against a string, with provided options
589  * @param regex   Regex to execute
590  * @param str     String to apply regex on
591  * @param nmatch  Length of matches
592  * @param matches regmatch_t to hold match indices
593  * @retval true  str matches
594  * @retval false str does not match
595  */
mutt_regex_capture(const struct Regex * regex,const char * str,size_t nmatch,regmatch_t matches[])596 bool mutt_regex_capture(const struct Regex *regex, const char *str,
597                         size_t nmatch, regmatch_t matches[])
598 {
599   if (!regex || !str || !regex->regex)
600     return false;
601 
602   int rc = regexec(regex->regex, str, nmatch, matches, 0);
603   return ((rc == 0) ^ regex->pat_not);
604 }
605 
606 /**
607  * mutt_regex_match - Shorthand to mutt_regex_capture()
608  * @param regex Regex which is desired to match against
609  * @param str   String to search with given regex
610  * @retval true  str matches
611  * @retval false str does not match
612  */
mutt_regex_match(const struct Regex * regex,const char * str)613 bool mutt_regex_match(const struct Regex *regex, const char *str)
614 {
615   return mutt_regex_capture(regex, str, 0, NULL);
616 }
617