1 /* comparator.c -- comparator functions
2  * Larry Greenfield
3  * Ken Murchison (rewritten to handle relational ops and non-terminated text)
4  *
5  * Copyright (c) 1994-2008 Carnegie Mellon University.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  *
19  * 3. The name "Carnegie Mellon University" must not be used to
20  *    endorse or promote products derived from this software without
21  *    prior written permission. For permission or any legal
22  *    details, please contact
23  *      Carnegie Mellon University
24  *      Center for Technology Transfer and Enterprise Creation
25  *      4615 Forbes Avenue
26  *      Suite 302
27  *      Pittsburgh, PA  15213
28  *      (412) 268-7393, fax: (412) 268-7395
29  *      innovation@andrew.cmu.edu
30  *
31  * 4. Redistributions of any form whatsoever must retain the following
32  *    acknowledgment:
33  *    "This product includes software developed by Computing Services
34  *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
35  *
36  * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
37  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
38  * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
39  * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
41  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
42  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43  */
44 
45 #ifdef HAVE_CONFIG_H
46 #include <config.h>
47 #endif
48 
49 #include <stdlib.h>
50 #include <ctype.h>
51 #include <string.h>
52 
53 #include "comparator.h"
54 #include "interp.h"
55 #include "tree.h"
56 #include "sieve/sieve_interface.h"
57 #include "sieve/sieve.h"
58 #include "bytecode.h"
59 #include "util.h"
60 #include "xmalloc.h"
61 
62 /*!!! uses B_CONTAINS not CONTAINS, etc, only works with bytecode*/
63 
64 typedef int (*compare_t)(const void *, size_t, const void *);
65 
66 /* --- relational comparators --- */
67 
68 /* these are generic wrappers in which 'rock' is the compare function */
69 
rel_eq(const char * text,size_t tlen,const char * pat,strarray_t * match_vars,void * rock)70 static int rel_eq(const char *text, size_t tlen, const char *pat,
71                   strarray_t *match_vars __attribute__((unused)),
72                   void *rock)
73 {
74     compare_t compar = (compare_t) rock;
75 
76     return (compar(text, tlen, pat) == 0);
77 }
78 
rel_ne(const char * text,size_t tlen,const char * pat,strarray_t * match_vars,void * rock)79 static int rel_ne(const char *text, size_t tlen, const char *pat,
80                   strarray_t *match_vars __attribute__((unused)),
81                   void *rock)
82 {
83     compare_t compar = (compare_t) rock;
84 
85     return (compar(text, tlen, pat) != 0);
86 }
87 
rel_gt(const char * text,size_t tlen,const char * pat,strarray_t * match_vars,void * rock)88 static int rel_gt(const char *text, size_t tlen, const char *pat,
89                   strarray_t *match_vars __attribute__((unused)),
90                   void *rock)
91 {
92     compare_t compar = (compare_t) rock;
93 
94     return (compar(text, tlen, pat) > 0);
95 }
96 
rel_ge(const char * text,size_t tlen,const char * pat,strarray_t * match_vars,void * rock)97 static int rel_ge(const char *text, size_t tlen, const char *pat,
98                   strarray_t *match_vars __attribute__((unused)),
99                   void *rock)
100 {
101     compare_t compar = (compare_t) rock;
102 
103     return (compar(text, tlen, pat) >= 0);
104 }
105 
rel_lt(const char * text,size_t tlen,const char * pat,strarray_t * match_vars,void * rock)106 static int rel_lt(const char *text, size_t tlen, const char *pat,
107                   strarray_t *match_vars __attribute__((unused)),
108                   void *rock)
109 {
110     compare_t compar = (compare_t) rock;
111 
112     return (compar(text, tlen, pat) < 0);
113 }
114 
rel_le(const char * text,size_t tlen,const char * pat,strarray_t * match_vars,void * rock)115 static int rel_le(const char *text, size_t tlen, const char *pat,
116                   strarray_t *match_vars __attribute__((unused)),
117                   void *rock)
118 {
119     compare_t compar = (compare_t) rock;
120 
121     return (compar(text, tlen, pat) <= 0);
122 }
123 
124 /* --- i;octet comparators --- */
125 
126 /* just compare the two; pat should be NULL terminated */
octet_cmp_(const char * text,size_t tlen,const char * pat,int casemap)127 static int octet_cmp_(const char *text, size_t tlen,
128                       const char *pat, int casemap)
129 {
130     size_t plen, sl, i;
131     int r = 0;
132 
133     plen = strlen(pat);
134     sl = tlen < plen ? tlen : plen;
135 
136     for (i = 0; !r && i < sl; i++) {
137         r = casemap ? toupper(text[i]) - toupper(pat[i]) : text[i] - pat[i];
138     }
139 
140     if (r == 0)
141         return (tlen - plen);
142     else
143         return r;
144 }
145 
octet_cmp(const char * text,size_t tlen,const char * pat)146 static int octet_cmp(const char *text, size_t tlen, const char *pat)
147 {
148     return octet_cmp_(text, tlen, pat, 0);
149 }
150 
151 /* we do a brute force attack */
octet_contains_(const char * text,size_t tlen,const char * pat,int casemap)152 static int octet_contains_(const char *text, size_t tlen,
153                            const char *pat, int casemap)
154 {
155     int N = tlen;
156     int M = strlen(pat);
157     int i, j;
158 
159     i = 0, j = 0;
160     while ((j < M) && (i < N)) {
161         if ((text[i] == pat[j]) ||
162             (casemap && (toupper(text[i]) == toupper(pat[j])))) {
163             i++; j++;
164         } else {
165             i = i - j + 1;
166             j = 0;
167         }
168     }
169 
170     return (j == M); /* we found a match! */
171 }
172 
octet_contains(const char * text,size_t tlen,const char * pat,strarray_t * match_vars,void * rock)173 static int octet_contains(const char *text, size_t tlen, const char *pat,
174                           strarray_t *match_vars __attribute__((unused)),
175                           void *rock __attribute__((unused)))
176 {
177     return octet_contains_(text, tlen, pat, 0);
178 }
179 
new_var(strarray_t * match_vars)180 static int new_var(strarray_t *match_vars)
181 {
182     int size = strarray_size(match_vars);
183 
184     if (size-1 > MAX_MATCH_VARS) return size;
185 
186     return strarray_append(match_vars, "");
187 }
188 
set_var(int var_num,const char * val_start,const char * val_end,strarray_t * match_vars)189 static void set_var(int var_num, const char* val_start, const char* val_end,
190                     strarray_t *match_vars)
191 {
192     if (var_num > MAX_MATCH_VARS) return;
193 
194     char *val = xstrndup(val_start, val_end - val_start);
195     strarray_setm(match_vars, var_num, val);
196 }
197 
append_var(const char * val_start,const char * val_end,strarray_t * match_vars)198 static int append_var(const char* val_start, const char* val_end,
199                        strarray_t *match_vars)
200 {
201     int size = strarray_size(match_vars);
202 
203     if (size-1 > MAX_MATCH_VARS) return size;
204 
205     char *val = xstrndup(val_start, val_end - val_start);
206     return strarray_appendm(match_vars, val);
207 }
208 
octet_matches_(const char * text,size_t tlen,const char * pat,int casemap,strarray_t * match_vars)209 static int octet_matches_(const char *text, size_t tlen,
210                           const char *pat, int casemap, strarray_t *match_vars)
211 {
212     const char *p;
213     const char *t;
214     char c;
215     int var_num;
216     int eaten_chars = 0;
217     const char *val_start = text;
218     strarray_t returned_vars = STRARRAY_INITIALIZER;
219 
220     t = text;
221     p = pat;
222     for (;;) {
223         if (*p == '\0') {
224             /* ran out of pattern */
225             return (!tlen);
226         }
227         c = *p++;
228         switch (c) {
229         case '?':
230             var_num = new_var(match_vars);
231             val_start = t;
232             if (!tlen) {
233                 return 0;
234             }
235             t++; tlen--;
236             set_var(var_num, val_start, t, match_vars);
237             break;
238         case '*':
239             var_num = new_var(match_vars);
240             val_start = t;
241             while (*p == '*' || *p == '?') {
242                 if (*p == '?') {
243                     /* eat the character now */
244                     if (!tlen) {
245                         return 0;
246                     }
247                     t++; tlen--; eaten_chars++;
248                 } else {
249                     for (t -= eaten_chars; eaten_chars; eaten_chars--) {
250                         t++;
251                         var_num = append_var(val_start, t, match_vars);
252                         val_start = t;
253                     }
254                     var_num = new_var(match_vars);
255                     val_start = t;
256                 }
257                 /* coalesce into a single wildcard */
258                 p++;
259             }
260             if (*p == '\0') {
261                 /* wildcard at end of string, any remaining text is ok */
262                 t += tlen;
263                 t -= eaten_chars;
264                 set_var(var_num, val_start, t, match_vars);
265                 for (val_start = t; eaten_chars; eaten_chars--) {
266                     t++;
267                     var_num = append_var(val_start, t, match_vars);
268                     val_start = t;
269                 }
270                 return 1;
271             }
272 
273             while (tlen) {
274                 /* recurse */
275                 if (octet_matches_(t, tlen, p, casemap, &returned_vars)) {
276                     int i;
277                     t -= eaten_chars;
278                     set_var(var_num, val_start, t, match_vars);
279                     for (val_start = t; eaten_chars; eaten_chars--) {
280                         t++;
281                         var_num = append_var(val_start, t, match_vars);
282                         val_start = t;
283                     }
284                     for (i = 0; i < returned_vars.count; i++) {
285                         if (var_num < MAX_MATCH_VARS) {
286                             var_num = strarray_append(match_vars,
287                                                       returned_vars.data[i]);
288                         }
289                     }
290                     strarray_fini(&returned_vars);
291                     return 1;
292                 }
293                 strarray_fini(&returned_vars);
294                 t++; tlen--;
295             }
296             set_var(var_num, val_start, t, match_vars);
297             break;
298         case '\\':
299             c = *p++;
300             /* falls through */
301         default:
302             if ((c == *t) || (casemap && (toupper(c) == toupper(*t)))) {
303                 t++; tlen--;
304             } else {
305                 /* literal char doesn't match */
306                 return 0;
307             }
308         }
309     }
310 
311     /* never reaches */
312 }
313 
octet_matches(const char * text,size_t tlen,const char * pat,strarray_t * match_vars,void * rock)314 static int octet_matches(const char *text, size_t tlen, const char *pat,
315                          strarray_t *match_vars,
316                          void *rock __attribute__((unused)))
317 {
318     int ret;
319     int needs_free = 0;
320     strarray_t temp = STRARRAY_INITIALIZER;
321     if (match_vars) {
322         strarray_fini(match_vars);
323     } else {
324         match_vars = &temp;
325         needs_free = 1;
326     }
327     strarray_add(match_vars, text);
328     ret = octet_matches_(text, tlen, pat, 0, match_vars);
329     if (!ret || needs_free) {
330         strarray_fini(match_vars);
331     }
332     return ret;
333 }
334 
335 
336 #ifdef ENABLE_REGEX
octet_regex(const char * text,size_t tlen,const char * pat,strarray_t * match_vars,void * rock)337 static int octet_regex(const char *text, size_t tlen, const char *pat,
338                        strarray_t *match_vars,
339                        void *rock __attribute__((unused)))
340 {
341     regmatch_t pm[MAX_MATCH_VARS+1];
342     size_t nmatch = 0;
343     int r;
344 
345     if (match_vars) {
346         strarray_fini(match_vars);
347         nmatch = MAX_MATCH_VARS+1;
348         memset(&pm, 0, sizeof(pm));
349     }
350 
351 #ifdef REG_STARTEND
352     /* pcre, BSD, some linuxes support this handy trick */
353     pm[0].rm_so = 0;
354     pm[0].rm_eo = tlen;
355     r = !regexec((regex_t *) pat, text, nmatch, pm, REG_STARTEND);
356 #elif defined(HAVE_RX_POSIX_H)
357     /* rx provides regnexec, that will work too */
358     r = !regnexec((regex_t *) pat, text, tlen, nmatch, pm, 0);
359 #else
360     /* regexec() requires a NUL-terminated string, and we have no
361      * guarantee that "text" is one.  Also, it may be only exactly
362      * tlen's length, so we can't safely check.  Always dup. */
363     char *buf = (char *) xstrndup(text, tlen);
364     r = !regexec((regex_t *) pat, buf, nmatch, pm, 0);
365     free(buf);
366 #endif /* REG_STARTEND */
367 
368     if (r) {
369         /* populate match variables */
370         size_t var_num;
371 
372         for (var_num = 0; var_num < nmatch; var_num++) {
373             regmatch_t *m = &pm[var_num];
374 
375             if (m->rm_so < 0) new_var(match_vars);
376             else append_var(text + m->rm_so, text + m->rm_eo, match_vars);
377         }
378     }
379     return r;
380 }
381 #endif
382 
383 
384 /* --- i;ascii-casemap comparators --- */
385 
386 
ascii_casemap_cmp(const char * text,size_t tlen,const char * pat)387 static int ascii_casemap_cmp(const char *text, size_t tlen, const char *pat)
388 {
389     return octet_cmp_(text, tlen, pat, 1);
390 }
391 
ascii_casemap_contains(const char * text,size_t tlen,const char * pat,strarray_t * match_vars,void * rock)392 static int ascii_casemap_contains(const char *text, size_t tlen,
393                                   const char *pat,
394                                   strarray_t *match_vars __attribute__((unused)),
395                                   void *rock __attribute__((unused)))
396 {
397     return octet_contains_(text, tlen, pat, 1);
398 }
399 
ascii_casemap_matches(const char * text,size_t tlen,const char * pat,strarray_t * match_vars,void * rock)400 static int ascii_casemap_matches(const char *text, size_t tlen,
401                                  const char *pat, strarray_t *match_vars,
402                                  void *rock __attribute__((unused)))
403 {
404     int ret;
405     int needs_free = 0;
406     strarray_t temp = STRARRAY_INITIALIZER;
407     if (match_vars) {
408         strarray_fini(match_vars);
409     } else {
410       match_vars = &temp;
411         needs_free = 1;
412     }
413     strarray_add(match_vars, text);
414     ret = octet_matches_(text, tlen, pat, 1, match_vars);
415     if (!ret || needs_free) {
416         strarray_fini(match_vars);
417     }
418     return ret;
419 }
420 
421 /* i;ascii-numeric; only supports relational tests
422  *
423  *  A \ B    number   not-num
424  *  number   A ? B    A < B
425  *  not-num  A > B    A == B
426  */
427 
428 /* From RFC 2244:
429  *
430  * The i;ascii-numeric comparator interprets strings as decimal
431  * positive integers represented as US-ASCII digits.  All values
432  * which do not begin with a US-ASCII digit are considered equal
433  * with an ordinal value higher than all non-NIL single-valued
434  * attributes.  Otherwise, all US-ASCII digits (octet values
435  * 0x30 to 0x39) are interpreted starting from the beginning of
436  * the string to the first non-digit or the end of the string.
437  */
438 
ascii_numeric_cmp(const char * text,size_t tlen,const char * pat)439 static int ascii_numeric_cmp(const char *text, size_t tlen, const char *pat)
440 {
441     unsigned text_digit_len;
442     unsigned pat_digit_len;
443 
444     if (Uisdigit(*pat)) {
445         if (Uisdigit(*text)) {
446             /* Count how many digits each string has */
447             for (text_digit_len = 0;
448                  tlen-- && Uisdigit(text[text_digit_len]);
449                  text_digit_len++);
450             for (pat_digit_len = 0;
451                  Uisdigit(pat[pat_digit_len]);
452                  pat_digit_len++);
453 
454             if (text_digit_len < pat_digit_len) {
455                 /* Pad "text" with leading 0s */
456                 while (pat_digit_len > text_digit_len) {
457                     /* "text" can only be less or equal to "pat" */
458                     if ('0' < *pat) {
459                         return (-1);
460                     }
461                     pat++;
462                     pat_digit_len--;
463                 }
464             } else if (text_digit_len > pat_digit_len) {
465                 /* Pad "pad" with leading 0s */
466                 while (text_digit_len > pat_digit_len) {
467                     /* "pad" can only be greater or equal to "text" */
468                     if (*text > '0') {
469                         return 1;
470                     }
471                     text++;
472                     text_digit_len--;
473                 }
474             }
475 
476             /* CLAIM: If we here, we have two non-empty digital suffixes
477                of equal length */
478             while (text_digit_len > 0) {
479                 if (*text < *pat) {
480                     return -1;
481                 } else if (*text > *pat) {
482                     return 1;
483                 }
484                 /* Characters are equal, carry on */
485                 text++;
486                 pat++;
487                 text_digit_len--;
488             }
489 
490             return (0);
491         } else {
492             return 1;
493         }
494     } else if (Uisdigit(*text)) {
495         return -1;
496     } else {
497         return 0; /* both not digits */
498     }
499 }
500 
lookup_rel(int relation)501 static comparator_t *lookup_rel(int relation)
502 {
503     comparator_t *ret;
504 
505     ret = NULL;
506     switch (relation)
507       {
508       case B_EQ:
509         ret = &rel_eq;
510         break;
511       case B_NE:
512         ret = &rel_ne;
513         break;
514       case B_GT:
515         ret = &rel_gt;
516         break;
517       case B_GE:
518          ret = &rel_ge;
519          break;
520       case B_LT:
521         ret = &rel_lt;
522         break;
523       case B_LE:
524         ret = &rel_le;
525       }
526 
527     return ret;
528 }
529 
lookup_comp(sieve_interp_t * i,int comp,int mode,int relation,void ** comprock)530 EXPORTED comparator_t *lookup_comp(sieve_interp_t *i, int comp, int mode,
531                                    int relation, void **comprock)
532 {
533     comparator_t *ret;
534 
535     ret = NULL;
536     *comprock = NULL;
537 #if VERBOSE
538     printf("comp%d mode%d relat%d     \n", comp, mode, relation);
539 #endif
540     if (mode == B_LIST) {
541         *comprock = (void **) i->interp_context;
542         return (comparator_t *) i->listcompare;
543     }
544 
545     switch (comp)
546       {
547       case B_OCTET:
548         switch (mode) {
549           case B_IS:
550             ret = &rel_eq;
551             *comprock = (void **) &octet_cmp;
552             break;
553           case B_CONTAINS:
554             ret = &octet_contains;
555             break;
556           case B_MATCHES:
557             ret = &octet_matches;
558             break;
559 #ifdef ENABLE_REGEX
560           case B_REGEX:
561             ret = &octet_regex;
562             break;
563 #endif
564           case B_VALUE:
565             ret = lookup_rel(relation);
566             *comprock = (void **) &octet_cmp;
567             break;
568         }
569         break; /*end of octet */
570       case B_ASCIICASEMAP:
571         switch (mode) {
572         case B_IS:
573             ret = &rel_eq;
574             *comprock = (void **) &ascii_casemap_cmp;
575             break;
576         case B_CONTAINS:
577             ret = &ascii_casemap_contains;
578             break;
579         case B_MATCHES:
580             ret = &ascii_casemap_matches;
581             break;
582 #ifdef ENABLE_REGEX
583         case B_REGEX:
584             /* the ascii-casemap distinction is made during
585                the compilation of the regex in verify_regex() */
586             ret = &octet_regex;
587             break;
588 #endif
589         case B_VALUE:
590             ret = lookup_rel(relation);
591             *comprock = (void **) &ascii_casemap_cmp;
592             break;
593         }
594         break;/*end of ascii casemap */
595       case B_ASCIINUMERIC:
596         switch (mode) {
597         case B_IS:
598             ret = &rel_eq;
599             *comprock = (void **) &ascii_numeric_cmp;
600             break;
601         case B_COUNT:
602         case B_VALUE:
603             ret = lookup_rel(relation);
604             *comprock = (void **) &ascii_numeric_cmp;
605             break;
606         }
607         break;
608       }
609     return ret;
610 }
611