1 /* comparator.c -- comparator functions
2  * Larry Greenfield
3  * Ken Murchison (rewritten to handle relational ops and non-terminated text)
4  *
5  * Copyright (c) 1994-2008 Carnegie Mellon University.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  *
19  * 3. The name "Carnegie Mellon University" must not be used to
20  *    endorse or promote products derived from this software without
21  *    prior written permission. For permission or any legal
22  *    details, please contact
23  *      Carnegie Mellon University
24  *      Center for Technology Transfer and Enterprise Creation
25  *      4615 Forbes Avenue
26  *      Suite 302
27  *      Pittsburgh, PA  15213
28  *      (412) 268-7393, fax: (412) 268-7395
29  *      innovation@andrew.cmu.edu
30  *
31  * 4. Redistributions of any form whatsoever must retain the following
32  *    acknowledgment:
33  *    "This product includes software developed by Computing Services
34  *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
35  *
36  * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
37  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
38  * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
39  * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
41  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
42  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43  */
44 
45 #ifdef HAVE_CONFIG_H
46 #include <config.h>
47 #endif
48 
49 #include <stdlib.h>
50 #include <ctype.h>
51 #include <string.h>
52 
53 #include "comparator.h"
54 #include "tree.h"
55 #include "sieve/sieve_interface.h"
56 #include "sieve/sieve.h"
57 #include "bytecode.h"
58 #include "util.h"
59 #include "xmalloc.h"
60 
61 /*!!! uses B_CONTAINS not CONTAINS, etc, only works with bytecode*/
62 
63 typedef int (*compare_t)(const void *, size_t, const void *);
64 
65 /* --- relational comparators --- */
66 
67 /* these are generic wrappers in which 'rock' is the compare function */
68 
rel_eq(const char * text,size_t tlen,const char * pat,void * rock)69 static int rel_eq(const char *text, size_t tlen, const char *pat, void *rock)
70 {
71     compare_t compar = (compare_t) rock;
72 
73     return (compar(text, tlen, pat) == 0);
74 }
75 
rel_ne(const char * text,size_t tlen,const char * pat,void * rock)76 static int rel_ne(const char *text, size_t tlen, const char *pat, void *rock)
77 {
78     compare_t compar = (compare_t) rock;
79 
80     return (compar(text, tlen, pat) != 0);
81 }
82 
rel_gt(const char * text,size_t tlen,const char * pat,void * rock)83 static int rel_gt(const char *text, size_t tlen, const char *pat, void *rock)
84 {
85     compare_t compar = (compare_t) rock;
86 
87     return (compar(text, tlen, pat) > 0);
88 }
89 
rel_ge(const char * text,size_t tlen,const char * pat,void * rock)90 static int rel_ge(const char *text, size_t tlen, const char *pat, void *rock)
91 {
92     compare_t compar = (compare_t) rock;
93 
94     return (compar(text, tlen, pat) >= 0);
95 }
96 
rel_lt(const char * text,size_t tlen,const char * pat,void * rock)97 static int rel_lt(const char *text, size_t tlen, const char *pat, void *rock)
98 {
99     compare_t compar = (compare_t) rock;
100 
101     return (compar(text, tlen, pat) < 0);
102 }
103 
rel_le(const char * text,size_t tlen,const char * pat,void * rock)104 static int rel_le(const char *text, size_t tlen, const char *pat, void *rock)
105 {
106     compare_t compar = (compare_t) rock;
107 
108     return (compar(text, tlen, pat) <= 0);
109 }
110 
111 /* --- i;octet comparators --- */
112 
113 /* just compare the two; pat should be NULL terminated */
octet_cmp_(const char * text,size_t tlen,const char * pat,int casemap)114 static int octet_cmp_(const char *text, size_t tlen,
115                       const char *pat, int casemap)
116 {
117     size_t plen, sl, i;
118     int r = 0;
119 
120     plen = strlen(pat);
121     sl = tlen < plen ? tlen : plen;
122 
123     for (i = 0; !r && i < sl; i++) {
124         r = casemap ? toupper(text[i]) - toupper(pat[i]) : text[i] - pat[i];
125     }
126 
127     if (r == 0)
128         return (tlen - plen);
129     else
130         return r;
131 }
132 
octet_cmp(const char * text,size_t tlen,const char * pat)133 static int octet_cmp(const char *text, size_t tlen, const char *pat)
134 {
135     return octet_cmp_(text, tlen, pat, 0);
136 }
137 
138 /* we implement boyer-moore for hell of it, since this is probably
139  not very useful for sieve */
140 #if 0
141 int boyer_moore(char *text, char *pat)
142 {
143     int i, j; /* indexes */
144     int M = strlen(pat); /* length of pattern */
145     int N = strlen(text); /* length of text */
146     int skip[256]; /* table of how much to skip, based on each character */
147 
148     /* initialize skip table */
149     for (i = 0; i < 256; i++)
150         skip[i] = M;
151     for (i = 0; i < M; i++)
152         skip[(int) pat[i]] = M-i-1;
153 
154     /* look for pat in text */
155     i = j = M-1;
156     do {
157         if (pat[j] == text[i]) {
158             i--;
159             j--;
160         } else {
161             if (M-j > skip[(int) text[i]]) {
162                 i = i + M - j;
163             } else {
164                 i = i + skip[(int) text[i]];
165             }
166             j = M-1;
167         }
168     } while (!((j < 0) || (i >= N)));
169     /* i+1 is the position of the match if i < N */
170     return (i < N) ? 1 : 0;
171 }
172 #endif
173 
174 /* we do a brute force attack */
octet_contains_(const char * text,size_t tlen,const char * pat,int casemap)175 static int octet_contains_(const char *text, size_t tlen,
176                            const char *pat, int casemap)
177 {
178     int N = tlen;
179     int M = strlen(pat);
180     int i, j;
181 
182     i = 0, j = 0;
183     while ((j < M) && (i < N)) {
184         if ((text[i] == pat[j]) ||
185             (casemap && (toupper(text[i]) == toupper(pat[j])))) {
186             i++; j++;
187         } else {
188             i = i - j + 1;
189             j = 0;
190         }
191     }
192 
193     return (j == M); /* we found a match! */
194 }
195 
octet_contains(const char * text,size_t tlen,const char * pat,void * rock)196 static int octet_contains(const char *text, size_t tlen, const char *pat,
197                           void *rock __attribute__((unused)))
198 {
199     return octet_contains_(text, tlen, pat, 0);
200 }
201 
append_var(int var_num,const char * val_start,const char * val_end,strarray_t * match_vars)202 static void append_var(int var_num, const char* val_start, const char* val_end,
203                        strarray_t *match_vars)
204 {
205     char *val = xstrndup(val_start, val_end - val_start);
206     strarray_setm(match_vars, var_num, val);
207 }
208 
octet_matches_(const char * text,size_t tlen,const char * pat,int casemap,strarray_t * match_vars)209 static int octet_matches_(const char *text, size_t tlen,
210                           const char *pat, int casemap, strarray_t *match_vars)
211 {
212     const char *p;
213     const char *t;
214     char c;
215     int var_num;
216     int eaten_chars = 0;
217     const char *val_start = text;
218     strarray_t returned_vars = STRARRAY_INITIALIZER;
219 
220     t = text;
221     p = pat;
222     for (;;) {
223         if (*p == '\0') {
224             /* ran out of pattern */
225             return (!tlen);
226         }
227         c = *p++;
228         switch (c) {
229         case '?':
230             var_num = strarray_append(match_vars, "");
231             val_start = t;
232             if (!tlen) {
233                 return 0;
234             }
235             t++; tlen--;
236             append_var(var_num, val_start, t, match_vars);
237             break;
238         case '*':
239             var_num = strarray_append(match_vars, "");
240             val_start = t;
241             while (*p == '*' || *p == '?') {
242                 if (*p == '?') {
243                     /* eat the character now */
244                     if (!tlen) {
245                         return 0;
246                     }
247                     t++; tlen--; eaten_chars++;
248                 } else {
249                     for (t -= eaten_chars; eaten_chars; eaten_chars--) {
250                         t++;
251                         var_num = strarray_append(match_vars, "");
252                         append_var(var_num, val_start, t, match_vars);
253                         val_start = t;
254                     }
255                     var_num = strarray_append(match_vars, "");
256                     val_start = t;
257                 }
258                 /* coalesce into a single wildcard */
259                 p++;
260             }
261             if (*p == '\0') {
262                 /* wildcard at end of string, any remaining text is ok */
263                 t += tlen;
264                 t -= eaten_chars;
265                 append_var(var_num, val_start, t, match_vars);
266                 for (val_start = t; eaten_chars; eaten_chars--) {
267                     t++;
268                     var_num = strarray_append(match_vars, "");
269                     append_var(var_num, val_start, t, match_vars);
270                     val_start = t;
271                 }
272                 return 1;
273             }
274 
275             while (tlen) {
276                 /* recurse */
277                 if (octet_matches_(t, tlen, p, casemap, &returned_vars)) {
278                     int i;
279                     t -= eaten_chars;
280                     append_var(var_num, val_start, t, match_vars);
281                     for (val_start = t; eaten_chars; eaten_chars--) {
282                         t++;
283                         var_num = strarray_append(match_vars, "");
284                         append_var(var_num, val_start, t, match_vars);
285                         val_start = t;
286                     }
287                     for (i = 0; i < returned_vars.count; i++) {
288                         strarray_append(match_vars, returned_vars.data[i]);
289                     }
290                     strarray_fini(&returned_vars);
291                     return 1;
292                 }
293                 strarray_fini(&returned_vars);
294                 t++; tlen--;
295             }
296             append_var(var_num, val_start, t, match_vars);
297             break;
298         case '\\':
299             c = *p++;
300             /* falls through */
301         default:
302             if ((c == *t) || (casemap && (toupper(c) == toupper(*t)))) {
303                 t++; tlen--;
304             } else {
305                 /* literal char doesn't match */
306                 return 0;
307             }
308         }
309     }
310 
311     /* never reaches */
312 }
313 
octet_matches(const char * text,size_t tlen,const char * pat,void * rock)314 static int octet_matches(const char *text, size_t tlen, const char *pat,
315                          void *rock)
316 {
317     int ret;
318     int needs_free = 0;
319     strarray_t temp = STRARRAY_INITIALIZER;
320     if (rock) {
321         strarray_fini((strarray_t*) rock);
322     } else {
323         rock = &temp;
324         needs_free = 1;
325     }
326     strarray_add((strarray_t*) rock, text);
327     ret = octet_matches_(text, tlen, pat, 0, rock);
328     if (!ret || needs_free) {
329         strarray_fini((strarray_t*) rock);
330     }
331     return ret;
332 }
333 
334 
335 #ifdef ENABLE_REGEX
336 #define MAX_MATCH 9  /* MUST support ${1} through ${9} per RFC 5229 */
337 
octet_regex(const char * text,size_t tlen,const char * pat,void * rock)338 static int octet_regex(const char *text, size_t tlen, const char *pat,
339                        void *rock)
340 {
341     strarray_t *match_vars = (strarray_t *) rock;
342     regmatch_t pm[MAX_MATCH+1];
343     size_t nmatch = 0;
344     int r;
345 
346     if (match_vars) {
347         strarray_fini(match_vars);
348         nmatch = MAX_MATCH+1;
349         memset(&pm, 0, sizeof(pm));
350     }
351 
352 #ifdef REG_STARTEND
353     /* pcre, BSD, some linuxes support this handy trick */
354     pm[0].rm_so = 0;
355     pm[0].rm_eo = tlen;
356     r = !regexec((regex_t *) pat, text, nmatch, pm, REG_STARTEND);
357 #elif defined(HAVE_RX_POSIX_H)
358     /* rx provides regnexec, that will work too */
359     r = !regnexec((regex_t *) pat, text, tlen, nmatch, pm, 0);
360 #else
361     /* regexec() requires a NUL-terminated string, and we have no
362      * guarantee that "text" is one.  Also, it may be only exactly
363      * tlen's length, so we can't safely check.  Always dup. */
364     char *buf = (char *) xstrndup(text, tlen);
365     r = !regexec((regex_t *) pat, buf, nmatch, pm, 0);
366     free(buf);
367 #endif /* REG_STARTEND */
368 
369     if (r) {
370         /* populate match variables */
371         size_t var_num;
372 
373         for (var_num = 0; var_num < nmatch; var_num++) {
374             regmatch_t *m = &pm[var_num];
375 
376             if (m->rm_so < 0) break;
377 
378             append_var(var_num, text + m->rm_so, text + m->rm_eo, match_vars);
379         }
380     }
381     return r;
382 }
383 #endif
384 
385 
386 /* --- i;ascii-casemap comparators --- */
387 
388 
ascii_casemap_cmp(const char * text,size_t tlen,const char * pat)389 static int ascii_casemap_cmp(const char *text, size_t tlen, const char *pat)
390 {
391     return octet_cmp_(text, tlen, pat, 1);
392 }
393 
ascii_casemap_contains(const char * text,size_t tlen,const char * pat,void * rock)394 static int ascii_casemap_contains(const char *text, size_t tlen,
395                                   const char *pat,
396                                   void *rock __attribute__((unused)))
397 {
398     return octet_contains_(text, tlen, pat, 1);
399 }
400 
ascii_casemap_matches(const char * text,size_t tlen,const char * pat,void * rock)401 static int ascii_casemap_matches(const char *text, size_t tlen,
402                                  const char *pat,
403                                  void *rock)
404 {
405     int ret;
406     int needs_free = 0;
407     strarray_t temp = STRARRAY_INITIALIZER;
408     if (rock) {
409         strarray_fini((strarray_t*) rock);
410     } else {
411         rock = &temp;
412         needs_free = 1;
413     }
414     strarray_add((strarray_t*) rock, text);
415     ret = octet_matches_(text, tlen, pat, 1, rock);
416     if (!ret || needs_free) {
417         strarray_fini((strarray_t*) rock);
418     }
419     return ret;
420 }
421 
422 /* i;ascii-numeric; only supports relational tests
423  *
424  *  A \ B    number   not-num
425  *  number   A ? B    A < B
426  *  not-num  A > B    A == B
427  */
428 
429 /* From RFC 2244:
430  *
431  * The i;ascii-numeric comparator interprets strings as decimal
432  * positive integers represented as US-ASCII digits.  All values
433  * which do not begin with a US-ASCII digit are considered equal
434  * with an ordinal value higher than all non-NIL single-valued
435  * attributes.  Otherwise, all US-ASCII digits (octet values
436  * 0x30 to 0x39) are interpreted starting from the beginning of
437  * the string to the first non-digit or the end of the string.
438  */
439 
ascii_numeric_cmp(const char * text,size_t tlen,const char * pat)440 static int ascii_numeric_cmp(const char *text, size_t tlen, const char *pat)
441 {
442     unsigned text_digit_len;
443     unsigned pat_digit_len;
444 
445     if (Uisdigit(*pat)) {
446         if (Uisdigit(*text)) {
447             /* Count how many digits each string has */
448             for (text_digit_len = 0;
449                  tlen-- && Uisdigit(text[text_digit_len]);
450                  text_digit_len++);
451             for (pat_digit_len = 0;
452                  Uisdigit(pat[pat_digit_len]);
453                  pat_digit_len++);
454 
455             if (text_digit_len < pat_digit_len) {
456                 /* Pad "text" with leading 0s */
457                 while (pat_digit_len > text_digit_len) {
458                     /* "text" can only be less or equal to "pat" */
459                     if ('0' < *pat) {
460                         return (-1);
461                     }
462                     pat++;
463                     pat_digit_len--;
464                 }
465             } else if (text_digit_len > pat_digit_len) {
466                 /* Pad "pad" with leading 0s */
467                 while (text_digit_len > pat_digit_len) {
468                     /* "pad" can only be greater or equal to "text" */
469                     if (*text > '0') {
470                         return 1;
471                     }
472                     text++;
473                     text_digit_len--;
474                 }
475             }
476 
477             /* CLAIM: If we here, we have two non-empty digital suffixes
478                of equal length */
479             while (text_digit_len > 0) {
480                 if (*text < *pat) {
481                     return -1;
482                 } else if (*text > *pat) {
483                     return 1;
484                 }
485                 /* Characters are equal, carry on */
486                 text++;
487                 pat++;
488                 text_digit_len--;
489             }
490 
491             return (0);
492         } else {
493             return 1;
494         }
495     } else if (Uisdigit(*text)) {
496         return -1;
497     } else {
498         return 0; /* both not digits */
499     }
500 }
501 
lookup_rel(int relation)502 static comparator_t *lookup_rel(int relation)
503 {
504     comparator_t *ret;
505 
506     ret = NULL;
507     switch (relation)
508       {
509       case B_EQ:
510         ret = &rel_eq;
511         break;
512       case B_NE:
513         ret = &rel_ne;
514         break;
515       case B_GT:
516         ret = &rel_gt;
517         break;
518       case B_GE:
519          ret = &rel_ge;
520          break;
521       case B_LT:
522         ret = &rel_lt;
523         break;
524       case B_LE:
525         ret = &rel_le;
526       }
527 
528     return ret;
529 }
530 
lookup_comp(int comp,int mode,int relation,void ** comprock)531 EXPORTED comparator_t *lookup_comp(int comp, int mode, int relation,
532                           void **comprock)
533 {
534     comparator_t *ret;
535 
536     ret = NULL;
537     *comprock = NULL;
538 #if VERBOSE
539     printf("comp%d mode%d relat%d     \n", comp, mode, relation);
540 #endif
541     switch (comp)
542       {
543       case B_OCTET:
544         switch (mode) {
545           case B_IS:
546             ret = &rel_eq;
547             *comprock = (void **) &octet_cmp;
548             break;
549           case B_CONTAINS:
550             ret = &octet_contains;
551             break;
552           case B_MATCHES:
553             ret = &octet_matches;
554             break;
555 #ifdef ENABLE_REGEX
556           case B_REGEX:
557             ret = &octet_regex;
558             break;
559 #endif
560           case B_VALUE:
561             ret = lookup_rel(relation);
562             *comprock = (void **) &octet_cmp;
563             break;
564         }
565         break; /*end of octet */
566       case B_ASCIICASEMAP:
567         switch (mode) {
568         case B_IS:
569             ret = &rel_eq;
570             *comprock = (void **) &ascii_casemap_cmp;
571             break;
572         case B_CONTAINS:
573             ret = &ascii_casemap_contains;
574             break;
575         case B_MATCHES:
576             ret = &ascii_casemap_matches;
577             break;
578 #ifdef ENABLE_REGEX
579         case B_REGEX:
580             /* the ascii-casemap destinction is made during
581                the compilation of the regex in verify_regex() */
582             ret = &octet_regex;
583             break;
584 #endif
585         case B_VALUE:
586             ret = lookup_rel(relation);
587             *comprock = (void **) &ascii_casemap_cmp;
588             break;
589         }
590         break;/*end of ascii casemap */
591       case B_ASCIINUMERIC:
592         switch (mode) {
593         case B_IS:
594             ret = &rel_eq;
595             *comprock = (void **) &ascii_numeric_cmp;
596             break;
597         case B_COUNT:
598         case B_VALUE:
599             ret = lookup_rel(relation);
600             *comprock = (void **) &ascii_numeric_cmp;
601             break;
602         }
603         break;
604       }
605     return ret;
606 }
607