1 /**
2 * @file
3 * Manage regular expressions
4 *
5 * @authors
6 * Copyright (C) 2017 Richard Russon <rich@flatcap.org>
7 * Copyright (C) 2019 Simon Symeonidis <lethaljellybean@gmail.com>
8 *
9 * @copyright
10 * This program is free software: you can redistribute it and/or modify it under
11 * the terms of the GNU General Public License as published by the Free Software
12 * Foundation, either version 2 of the License, or (at your option) any later
13 * version.
14 *
15 * This program is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18 * details.
19 *
20 * You should have received a copy of the GNU General Public License along with
21 * this program. If not, see <http://www.gnu.org/licenses/>.
22 */
23
24 /**
25 * @page mutt_regex Manage regular expressions
26 *
27 * Manage regular expressions.
28 */
29
30 #include "config.h"
31 #include <ctype.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include "buffer.h"
38 #include "logging.h"
39 #include "mbyte.h"
40 #include "memory.h"
41 #include "message.h"
42 #include "queue.h"
43 #include "regex3.h"
44 #include "string2.h"
45
46 /**
47 * mutt_regex_compile - Create an Regex from a string
48 * @param str Regular expression
49 * @param flags Type flags, e.g. REG_ICASE
50 * @retval ptr New Regex object
51 * @retval NULL Error
52 */
mutt_regex_compile(const char * str,uint16_t flags)53 struct Regex *mutt_regex_compile(const char *str, uint16_t flags)
54 {
55 if (!str || (*str == '\0'))
56 return NULL;
57 struct Regex *rx = mutt_mem_calloc(1, sizeof(struct Regex));
58 rx->pattern = mutt_str_dup(str);
59 rx->regex = mutt_mem_calloc(1, sizeof(regex_t));
60 if (REG_COMP(rx->regex, str, flags) != 0)
61 mutt_regex_free(&rx);
62
63 return rx;
64 }
65
66 /**
67 * mutt_regex_new - Create an Regex from a string
68 * @param str Regular expression
69 * @param flags Type flags, e.g. #DT_REGEX_MATCH_CASE
70 * @param err Buffer for error messages
71 * @retval ptr New Regex object
72 * @retval NULL Error
73 */
mutt_regex_new(const char * str,uint32_t flags,struct Buffer * err)74 struct Regex *mutt_regex_new(const char *str, uint32_t flags, struct Buffer *err)
75 {
76 if (!str || (*str == '\0'))
77 return NULL;
78
79 uint16_t rflags = 0;
80 struct Regex *reg = mutt_mem_calloc(1, sizeof(struct Regex));
81
82 reg->regex = mutt_mem_calloc(1, sizeof(regex_t));
83 reg->pattern = mutt_str_dup(str);
84
85 /* Should we use smart case matching? */
86 if (((flags & DT_REGEX_MATCH_CASE) == 0) && mutt_mb_is_lower(str))
87 rflags |= REG_ICASE;
88
89 /* Is a prefix of '!' allowed? */
90 if (((flags & DT_REGEX_ALLOW_NOT) != 0) && (str[0] == '!'))
91 {
92 reg->pat_not = true;
93 str++;
94 }
95
96 int rc = REG_COMP(reg->regex, str, rflags);
97 if ((rc != 0) && err)
98 {
99 regerror(rc, reg->regex, err->data, err->dsize);
100 mutt_regex_free(®);
101 return NULL;
102 }
103
104 return reg;
105 }
106
107 /**
108 * mutt_regex_free - Free a Regex object
109 * @param[out] r Regex to free
110 */
mutt_regex_free(struct Regex ** r)111 void mutt_regex_free(struct Regex **r)
112 {
113 if (!r || !*r)
114 return;
115
116 FREE(&(*r)->pattern);
117 if ((*r)->regex)
118 regfree((*r)->regex);
119 FREE(&(*r)->regex);
120 FREE(r);
121 }
122
123 /**
124 * mutt_regexlist_add - Compile a regex string and add it to a list
125 * @param rl RegexList to add to
126 * @param str String to compile into a regex
127 * @param flags Flags, e.g. REG_ICASE
128 * @param err Buffer for error messages
129 * @retval 0 Success, Regex compiled and added to the list
130 * @retval -1 Error, see message in 'err'
131 */
mutt_regexlist_add(struct RegexList * rl,const char * str,uint16_t flags,struct Buffer * err)132 int mutt_regexlist_add(struct RegexList *rl, const char *str, uint16_t flags,
133 struct Buffer *err)
134 {
135 if (!rl || !str || (*str == '\0'))
136 return 0;
137
138 struct Regex *rx = mutt_regex_compile(str, flags);
139 if (!rx)
140 {
141 mutt_buffer_printf(err, "Bad regex: %s\n", str);
142 return -1;
143 }
144
145 /* check to make sure the item is not already on this rl */
146 struct RegexNode *np = NULL;
147 STAILQ_FOREACH(np, rl, entries)
148 {
149 if (mutt_istr_equal(rx->pattern, np->regex->pattern))
150 break; /* already on the rl */
151 }
152
153 if (np)
154 {
155 mutt_regex_free(&rx);
156 }
157 else
158 {
159 np = mutt_regexlist_new();
160 np->regex = rx;
161 STAILQ_INSERT_TAIL(rl, np, entries);
162 }
163
164 return 0;
165 }
166
167 /**
168 * mutt_regexlist_free - Free a RegexList object
169 * @param rl RegexList to free
170 */
mutt_regexlist_free(struct RegexList * rl)171 void mutt_regexlist_free(struct RegexList *rl)
172 {
173 if (!rl)
174 return;
175
176 struct RegexNode *np = NULL, *tmp = NULL;
177 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
178 {
179 STAILQ_REMOVE(rl, np, RegexNode, entries);
180 mutt_regex_free(&np->regex);
181 FREE(&np);
182 }
183 STAILQ_INIT(rl);
184 }
185
186 /**
187 * mutt_regexlist_match - Does a string match any Regex in the list?
188 * @param rl RegexList to match against
189 * @param str String to compare
190 * @retval true String matches one of the Regexes in the list
191 */
mutt_regexlist_match(struct RegexList * rl,const char * str)192 bool mutt_regexlist_match(struct RegexList *rl, const char *str)
193 {
194 if (!rl || !str)
195 return false;
196 struct RegexNode *np = NULL;
197 STAILQ_FOREACH(np, rl, entries)
198 {
199 if (mutt_regex_match(np->regex, str))
200 {
201 mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
202 return true;
203 }
204 }
205
206 return false;
207 }
208
209 /**
210 * mutt_regexlist_new - Create a new RegexList
211 * @retval ptr New RegexList object
212 */
mutt_regexlist_new(void)213 struct RegexNode *mutt_regexlist_new(void)
214 {
215 return mutt_mem_calloc(1, sizeof(struct RegexNode));
216 }
217
218 /**
219 * mutt_regexlist_remove - Remove a Regex from a list
220 * @param rl RegexList to alter
221 * @param str Pattern to remove from the list
222 * @retval 0 Success, pattern was found and removed from the list
223 * @retval -1 Error, pattern wasn't found
224 *
225 * If the pattern is "*", then all the Regexes are removed.
226 */
mutt_regexlist_remove(struct RegexList * rl,const char * str)227 int mutt_regexlist_remove(struct RegexList *rl, const char *str)
228 {
229 if (!rl || !str)
230 return -1;
231
232 if (mutt_str_equal("*", str))
233 {
234 mutt_regexlist_free(rl); /* "unCMD *" means delete all current entries */
235 return 0;
236 }
237
238 int rc = -1;
239 struct RegexNode *np = NULL, *tmp = NULL;
240 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
241 {
242 if (mutt_istr_equal(str, np->regex->pattern))
243 {
244 STAILQ_REMOVE(rl, np, RegexNode, entries);
245 mutt_regex_free(&np->regex);
246 FREE(&np);
247 rc = 0;
248 }
249 }
250
251 return rc;
252 }
253
254 /**
255 * mutt_replacelist_add - Add a pattern and a template to a list
256 * @param rl ReplaceList to add to
257 * @param pat Pattern to compile into a regex
258 * @param templ Template string to associate with the pattern
259 * @param err Buffer for error messages
260 * @retval 0 Success, pattern added to the ReplaceList
261 * @retval -1 Error, see message in 'err'
262 */
mutt_replacelist_add(struct ReplaceList * rl,const char * pat,const char * templ,struct Buffer * err)263 int mutt_replacelist_add(struct ReplaceList *rl, const char *pat,
264 const char *templ, struct Buffer *err)
265 {
266 if (!rl || !pat || (*pat == '\0') || !templ)
267 return 0;
268
269 struct Regex *rx = mutt_regex_compile(pat, REG_ICASE);
270 if (!rx)
271 {
272 if (err)
273 mutt_buffer_printf(err, _("Bad regex: %s"), pat);
274 return -1;
275 }
276
277 /* check to make sure the item is not already on this rl */
278 struct Replace *np = NULL;
279 STAILQ_FOREACH(np, rl, entries)
280 {
281 if (mutt_istr_equal(rx->pattern, np->regex->pattern))
282 {
283 /* Already on the rl. Formerly we just skipped this case, but
284 * now we're supporting removals, which means we're supporting
285 * re-adds conceptually. So we probably want this to imply a
286 * removal, then do an add. We can achieve the removal by freeing
287 * the template, and leaving t pointed at the current item. */
288 FREE(&np->templ);
289 break;
290 }
291 }
292
293 /* If np is set, it's pointing into an extant ReplaceList* that we want to
294 * update. Otherwise we want to make a new one to link at the rl's end. */
295 if (np)
296 {
297 mutt_regex_free(&rx);
298 }
299 else
300 {
301 np = mutt_replacelist_new();
302 np->regex = rx;
303 rx = NULL;
304 STAILQ_INSERT_TAIL(rl, np, entries);
305 }
306
307 /* Now np is the Replace that we want to modify. It is prepared. */
308 np->templ = mutt_str_dup(templ);
309
310 /* Find highest match number in template string */
311 np->nmatch = 0;
312 for (const char *p = templ; *p;)
313 {
314 if (*p == '%')
315 {
316 int n = 0;
317 if (mutt_str_atoi(++p, &n) < 0)
318 mutt_debug(LL_DEBUG2, "Invalid match number in replacelist: '%s'\n", p);
319 if (n > np->nmatch)
320 np->nmatch = n;
321 while (*p && isdigit((int) *p))
322 p++;
323 }
324 else
325 p++;
326 }
327
328 if (np->nmatch > np->regex->regex->re_nsub)
329 {
330 if (err)
331 mutt_buffer_printf(err, "%s", _("Not enough subexpressions for template"));
332 mutt_replacelist_remove(rl, pat);
333 return -1;
334 }
335
336 np->nmatch++; /* match 0 is always the whole expr */
337 return 0;
338 }
339
340 /**
341 * mutt_replacelist_apply - Apply replacements to a buffer
342 * @param rl ReplaceList to apply
343 * @param buf Buffer for the result
344 * @param buflen Length of the buffer
345 * @param str String to manipulate
346 * @retval ptr Pointer to 'buf'
347 *
348 * If 'buf' is NULL, a new string will be returned. It must be freed by the caller.
349 *
350 * @note This function uses a fixed size buffer of 1024 and so should
351 * only be used for visual modifications, such as disp_subj.
352 */
mutt_replacelist_apply(struct ReplaceList * rl,char * buf,size_t buflen,const char * str)353 char *mutt_replacelist_apply(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
354 {
355 static regmatch_t *pmatch = NULL;
356 static size_t nmatch = 0;
357 static char twinbuf[2][1024];
358 int switcher = 0;
359 char *p = NULL;
360 size_t cpysize, tlen;
361 char *src = NULL, *dst = NULL;
362
363 if (buf && (buflen != 0))
364 buf[0] = '\0';
365
366 if (!rl || !str || (*str == '\0') || (buf && (buflen == 0)))
367 return buf;
368
369 twinbuf[0][0] = '\0';
370 twinbuf[1][0] = '\0';
371 src = twinbuf[switcher];
372 dst = src;
373
374 mutt_str_copy(src, str, 1024);
375
376 struct Replace *np = NULL;
377 STAILQ_FOREACH(np, rl, entries)
378 {
379 /* If this pattern needs more matches, expand pmatch. */
380 if (np->nmatch > nmatch)
381 {
382 mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
383 nmatch = np->nmatch;
384 }
385
386 if (mutt_regex_capture(np->regex, src, np->nmatch, pmatch))
387 {
388 tlen = 0;
389 switcher ^= 1;
390 dst = twinbuf[switcher];
391
392 mutt_debug(LL_DEBUG5, "%s matches %s\n", src, np->regex->pattern);
393
394 /* Copy into other twinbuf with substitutions */
395 if (np->templ)
396 {
397 for (p = np->templ; *p && (tlen < 1023);)
398 {
399 if (*p == '%')
400 {
401 p++;
402 if (*p == 'L')
403 {
404 p++;
405 cpysize = MIN(pmatch[0].rm_so, 1023 - tlen);
406 strncpy(&dst[tlen], src, cpysize);
407 tlen += cpysize;
408 }
409 else if (*p == 'R')
410 {
411 p++;
412 cpysize = MIN(strlen(src) - pmatch[0].rm_eo, 1023 - tlen);
413 strncpy(&dst[tlen], &src[pmatch[0].rm_eo], cpysize);
414 tlen += cpysize;
415 }
416 else
417 {
418 long n = strtoul(p, &p, 10); /* get subst number */
419 while (isdigit((unsigned char) *p)) /* skip subst token */
420 p++;
421 for (int i = pmatch[n].rm_so; (i < pmatch[n].rm_eo) && (tlen < 1023); i++)
422 {
423 dst[tlen++] = src[i];
424 }
425 }
426 }
427 else
428 dst[tlen++] = *p++;
429 }
430 }
431 dst[tlen] = '\0';
432 mutt_debug(LL_DEBUG5, "subst %s\n", dst);
433 }
434 src = dst;
435 }
436
437 if (buf)
438 mutt_str_copy(buf, dst, buflen);
439 else
440 buf = mutt_str_dup(dst);
441 return buf;
442 }
443
444 /**
445 * mutt_replacelist_free - Free a ReplaceList object
446 * @param rl ReplaceList to free
447 */
mutt_replacelist_free(struct ReplaceList * rl)448 void mutt_replacelist_free(struct ReplaceList *rl)
449 {
450 if (!rl)
451 return;
452
453 struct Replace *np = NULL, *tmp = NULL;
454 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
455 {
456 STAILQ_REMOVE(rl, np, Replace, entries);
457 mutt_regex_free(&np->regex);
458 FREE(&np->templ);
459 FREE(&np);
460 }
461 }
462
463 /**
464 * mutt_replacelist_match - Does a string match a pattern?
465 * @param rl ReplaceList of patterns
466 * @param str String to check
467 * @param buf Buffer to save match
468 * @param buflen Buffer length
469 * @retval true String matches a patterh in the ReplaceList
470 *
471 * Match a string against the patterns defined by the 'spam' command and output
472 * the expanded format into `buf` when there is a match. If buflen<=0, the
473 * match is performed but the format is not expanded and no assumptions are
474 * made about the value of `buf` so it may be NULL.
475 */
mutt_replacelist_match(struct ReplaceList * rl,char * buf,size_t buflen,const char * str)476 bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
477 {
478 if (!rl || !buf || !str)
479 return false;
480
481 static regmatch_t *pmatch = NULL;
482 static size_t nmatch = 0;
483 int tlen = 0;
484 char *p = NULL;
485
486 struct Replace *np = NULL;
487 STAILQ_FOREACH(np, rl, entries)
488 {
489 /* If this pattern needs more matches, expand pmatch. */
490 if (np->nmatch > nmatch)
491 {
492 mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
493 nmatch = np->nmatch;
494 }
495
496 /* Does this pattern match? */
497 if (mutt_regex_capture(np->regex, str, (size_t) np->nmatch, pmatch))
498 {
499 mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
500 mutt_debug(LL_DEBUG5, "%d subs\n", (int) np->regex->regex->re_nsub);
501
502 /* Copy template into buf, with substitutions. */
503 for (p = np->templ; *p && (tlen < (buflen - 1));)
504 {
505 /* backreference to pattern match substring, eg. %1, %2, etc) */
506 if (*p == '%')
507 {
508 char *e = NULL; /* used as pointer to end of integer backreference in strtol() call */
509
510 p++; /* skip over % char */
511 long n = strtol(p, &e, 10);
512 /* Ensure that the integer conversion succeeded (e!=p) and bounds check. The upper bound check
513 * should not strictly be necessary since add_to_spam_list() finds the largest value, and
514 * the static array above is always large enough based on that value. */
515 if ((e != p) && (n >= 0) && (n <= np->nmatch) && (pmatch[n].rm_so != -1))
516 {
517 /* copy as much of the substring match as will fit in the output buffer, saving space for
518 * the terminating nul char */
519 int idx;
520 for (idx = pmatch[n].rm_so;
521 (idx < pmatch[n].rm_eo) && (tlen < (buflen - 1)); idx++)
522 {
523 buf[tlen++] = str[idx];
524 }
525 }
526 p = e; /* skip over the parsed integer */
527 }
528 else
529 {
530 buf[tlen++] = *p++;
531 }
532 }
533 /* tlen should always be less than buflen except when buflen<=0
534 * because the bounds checks in the above code leave room for the
535 * terminal nul char. This should avoid returning an unterminated
536 * string to the caller. When buflen<=0 we make no assumption about
537 * the validity of the buf pointer. */
538 if (tlen < buflen)
539 {
540 buf[tlen] = '\0';
541 mutt_debug(LL_DEBUG5, "\"%s\"\n", buf);
542 }
543 return true;
544 }
545 }
546
547 return false;
548 }
549
550 /**
551 * mutt_replacelist_new - Create a new ReplaceList
552 * @retval ptr New ReplaceList
553 */
mutt_replacelist_new(void)554 struct Replace *mutt_replacelist_new(void)
555 {
556 return mutt_mem_calloc(1, sizeof(struct Replace));
557 }
558
559 /**
560 * mutt_replacelist_remove - Remove a pattern from a list
561 * @param rl ReplaceList to modify
562 * @param pat Pattern to remove
563 * @retval num Matching patterns removed
564 */
mutt_replacelist_remove(struct ReplaceList * rl,const char * pat)565 int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
566 {
567 if (!rl || !pat)
568 return 0;
569
570 int nremoved = 0;
571 struct Replace *np = NULL, *tmp = NULL;
572 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
573 {
574 if (mutt_str_equal(np->regex->pattern, pat))
575 {
576 STAILQ_REMOVE(rl, np, Replace, entries);
577 mutt_regex_free(&np->regex);
578 FREE(&np->templ);
579 FREE(&np);
580 nremoved++;
581 }
582 }
583
584 return nremoved;
585 }
586
587 /**
588 * mutt_regex_capture - Match a regex against a string, with provided options
589 * @param regex Regex to execute
590 * @param str String to apply regex on
591 * @param nmatch Length of matches
592 * @param matches regmatch_t to hold match indices
593 * @retval true str matches
594 * @retval false str does not match
595 */
mutt_regex_capture(const struct Regex * regex,const char * str,size_t nmatch,regmatch_t matches[])596 bool mutt_regex_capture(const struct Regex *regex, const char *str,
597 size_t nmatch, regmatch_t matches[])
598 {
599 if (!regex || !str || !regex->regex)
600 return false;
601
602 int rc = regexec(regex->regex, str, nmatch, matches, 0);
603 return ((rc == 0) ^ regex->pat_not);
604 }
605
606 /**
607 * mutt_regex_match - Shorthand to mutt_regex_capture()
608 * @param regex Regex which is desired to match against
609 * @param str String to search with given regex
610 * @retval true str matches
611 * @retval false str does not match
612 */
mutt_regex_match(const struct Regex * regex,const char * str)613 bool mutt_regex_match(const struct Regex *regex, const char *str)
614 {
615 return mutt_regex_capture(regex, str, 0, NULL);
616 }
617