1 /* comparator.c -- comparator functions
2 * Larry Greenfield
3 * Ken Murchison (rewritten to handle relational ops and non-terminated text)
4 *
5 * Copyright (c) 1994-2008 Carnegie Mellon University. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. The name "Carnegie Mellon University" must not be used to
20 * endorse or promote products derived from this software without
21 * prior written permission. For permission or any legal
22 * details, please contact
23 * Carnegie Mellon University
24 * Center for Technology Transfer and Enterprise Creation
25 * 4615 Forbes Avenue
26 * Suite 302
27 * Pittsburgh, PA 15213
28 * (412) 268-7393, fax: (412) 268-7395
29 * innovation@andrew.cmu.edu
30 *
31 * 4. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by Computing Services
34 * at Carnegie Mellon University (http://www.cmu.edu/computing/)."
35 *
36 * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
37 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
38 * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
39 * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
41 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
42 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43 */
44
45 #ifdef HAVE_CONFIG_H
46 #include <config.h>
47 #endif
48
49 #include <stdlib.h>
50 #include <ctype.h>
51 #include <string.h>
52
53 #include "comparator.h"
54 #include "tree.h"
55 #include "sieve/sieve_interface.h"
56 #include "sieve/sieve.h"
57 #include "bytecode.h"
58 #include "util.h"
59 #include "xmalloc.h"
60
61 /*!!! uses B_CONTAINS not CONTAINS, etc, only works with bytecode*/
62
63 typedef int (*compare_t)(const void *, size_t, const void *);
64
65 /* --- relational comparators --- */
66
67 /* these are generic wrappers in which 'rock' is the compare function */
68
rel_eq(const char * text,size_t tlen,const char * pat,void * rock)69 static int rel_eq(const char *text, size_t tlen, const char *pat, void *rock)
70 {
71 compare_t compar = (compare_t) rock;
72
73 return (compar(text, tlen, pat) == 0);
74 }
75
rel_ne(const char * text,size_t tlen,const char * pat,void * rock)76 static int rel_ne(const char *text, size_t tlen, const char *pat, void *rock)
77 {
78 compare_t compar = (compare_t) rock;
79
80 return (compar(text, tlen, pat) != 0);
81 }
82
rel_gt(const char * text,size_t tlen,const char * pat,void * rock)83 static int rel_gt(const char *text, size_t tlen, const char *pat, void *rock)
84 {
85 compare_t compar = (compare_t) rock;
86
87 return (compar(text, tlen, pat) > 0);
88 }
89
rel_ge(const char * text,size_t tlen,const char * pat,void * rock)90 static int rel_ge(const char *text, size_t tlen, const char *pat, void *rock)
91 {
92 compare_t compar = (compare_t) rock;
93
94 return (compar(text, tlen, pat) >= 0);
95 }
96
rel_lt(const char * text,size_t tlen,const char * pat,void * rock)97 static int rel_lt(const char *text, size_t tlen, const char *pat, void *rock)
98 {
99 compare_t compar = (compare_t) rock;
100
101 return (compar(text, tlen, pat) < 0);
102 }
103
rel_le(const char * text,size_t tlen,const char * pat,void * rock)104 static int rel_le(const char *text, size_t tlen, const char *pat, void *rock)
105 {
106 compare_t compar = (compare_t) rock;
107
108 return (compar(text, tlen, pat) <= 0);
109 }
110
111 /* --- i;octet comparators --- */
112
113 /* just compare the two; pat should be NULL terminated */
octet_cmp_(const char * text,size_t tlen,const char * pat,int casemap)114 static int octet_cmp_(const char *text, size_t tlen,
115 const char *pat, int casemap)
116 {
117 size_t plen, sl, i;
118 int r = 0;
119
120 plen = strlen(pat);
121 sl = tlen < plen ? tlen : plen;
122
123 for (i = 0; !r && i < sl; i++) {
124 r = casemap ? toupper(text[i]) - toupper(pat[i]) : text[i] - pat[i];
125 }
126
127 if (r == 0)
128 return (tlen - plen);
129 else
130 return r;
131 }
132
octet_cmp(const char * text,size_t tlen,const char * pat)133 static int octet_cmp(const char *text, size_t tlen, const char *pat)
134 {
135 return octet_cmp_(text, tlen, pat, 0);
136 }
137
138 /* we implement boyer-moore for hell of it, since this is probably
139 not very useful for sieve */
140 #if 0
141 int boyer_moore(char *text, char *pat)
142 {
143 int i, j; /* indexes */
144 int M = strlen(pat); /* length of pattern */
145 int N = strlen(text); /* length of text */
146 int skip[256]; /* table of how much to skip, based on each character */
147
148 /* initialize skip table */
149 for (i = 0; i < 256; i++)
150 skip[i] = M;
151 for (i = 0; i < M; i++)
152 skip[(int) pat[i]] = M-i-1;
153
154 /* look for pat in text */
155 i = j = M-1;
156 do {
157 if (pat[j] == text[i]) {
158 i--;
159 j--;
160 } else {
161 if (M-j > skip[(int) text[i]]) {
162 i = i + M - j;
163 } else {
164 i = i + skip[(int) text[i]];
165 }
166 j = M-1;
167 }
168 } while (!((j < 0) || (i >= N)));
169 /* i+1 is the position of the match if i < N */
170 return (i < N) ? 1 : 0;
171 }
172 #endif
173
174 /* we do a brute force attack */
octet_contains_(const char * text,size_t tlen,const char * pat,int casemap)175 static int octet_contains_(const char *text, size_t tlen,
176 const char *pat, int casemap)
177 {
178 int N = tlen;
179 int M = strlen(pat);
180 int i, j;
181
182 i = 0, j = 0;
183 while ((j < M) && (i < N)) {
184 if ((text[i] == pat[j]) ||
185 (casemap && (toupper(text[i]) == toupper(pat[j])))) {
186 i++; j++;
187 } else {
188 i = i - j + 1;
189 j = 0;
190 }
191 }
192
193 return (j == M); /* we found a match! */
194 }
195
octet_contains(const char * text,size_t tlen,const char * pat,void * rock)196 static int octet_contains(const char *text, size_t tlen, const char *pat,
197 void *rock __attribute__((unused)))
198 {
199 return octet_contains_(text, tlen, pat, 0);
200 }
201
append_var(int var_num,const char * val_start,const char * val_end,strarray_t * match_vars)202 static void append_var(int var_num, const char* val_start, const char* val_end,
203 strarray_t *match_vars)
204 {
205 char *val = xstrndup(val_start, val_end - val_start);
206 strarray_setm(match_vars, var_num, val);
207 }
208
octet_matches_(const char * text,size_t tlen,const char * pat,int casemap,strarray_t * match_vars)209 static int octet_matches_(const char *text, size_t tlen,
210 const char *pat, int casemap, strarray_t *match_vars)
211 {
212 const char *p;
213 const char *t;
214 char c;
215 int var_num;
216 int eaten_chars = 0;
217 const char *val_start = text;
218 strarray_t returned_vars = STRARRAY_INITIALIZER;
219
220 t = text;
221 p = pat;
222 for (;;) {
223 if (*p == '\0') {
224 /* ran out of pattern */
225 return (!tlen);
226 }
227 c = *p++;
228 switch (c) {
229 case '?':
230 var_num = strarray_append(match_vars, "");
231 val_start = t;
232 if (!tlen) {
233 return 0;
234 }
235 t++; tlen--;
236 append_var(var_num, val_start, t, match_vars);
237 break;
238 case '*':
239 var_num = strarray_append(match_vars, "");
240 val_start = t;
241 while (*p == '*' || *p == '?') {
242 if (*p == '?') {
243 /* eat the character now */
244 if (!tlen) {
245 return 0;
246 }
247 t++; tlen--; eaten_chars++;
248 } else {
249 for (t -= eaten_chars; eaten_chars; eaten_chars--) {
250 t++;
251 var_num = strarray_append(match_vars, "");
252 append_var(var_num, val_start, t, match_vars);
253 val_start = t;
254 }
255 var_num = strarray_append(match_vars, "");
256 val_start = t;
257 }
258 /* coalesce into a single wildcard */
259 p++;
260 }
261 if (*p == '\0') {
262 /* wildcard at end of string, any remaining text is ok */
263 t += tlen;
264 t -= eaten_chars;
265 append_var(var_num, val_start, t, match_vars);
266 for (val_start = t; eaten_chars; eaten_chars--) {
267 t++;
268 var_num = strarray_append(match_vars, "");
269 append_var(var_num, val_start, t, match_vars);
270 val_start = t;
271 }
272 return 1;
273 }
274
275 while (tlen) {
276 /* recurse */
277 if (octet_matches_(t, tlen, p, casemap, &returned_vars)) {
278 int i;
279 t -= eaten_chars;
280 append_var(var_num, val_start, t, match_vars);
281 for (val_start = t; eaten_chars; eaten_chars--) {
282 t++;
283 var_num = strarray_append(match_vars, "");
284 append_var(var_num, val_start, t, match_vars);
285 val_start = t;
286 }
287 for (i = 0; i < returned_vars.count; i++) {
288 strarray_append(match_vars, returned_vars.data[i]);
289 }
290 strarray_fini(&returned_vars);
291 return 1;
292 }
293 strarray_fini(&returned_vars);
294 t++; tlen--;
295 }
296 append_var(var_num, val_start, t, match_vars);
297 break;
298 case '\\':
299 c = *p++;
300 /* falls through */
301 default:
302 if ((c == *t) || (casemap && (toupper(c) == toupper(*t)))) {
303 t++; tlen--;
304 } else {
305 /* literal char doesn't match */
306 return 0;
307 }
308 }
309 }
310
311 /* never reaches */
312 }
313
octet_matches(const char * text,size_t tlen,const char * pat,void * rock)314 static int octet_matches(const char *text, size_t tlen, const char *pat,
315 void *rock)
316 {
317 int ret;
318 int needs_free = 0;
319 strarray_t temp = STRARRAY_INITIALIZER;
320 if (rock) {
321 strarray_fini((strarray_t*) rock);
322 } else {
323 rock = &temp;
324 needs_free = 1;
325 }
326 strarray_add((strarray_t*) rock, text);
327 ret = octet_matches_(text, tlen, pat, 0, rock);
328 if (!ret || needs_free) {
329 strarray_fini((strarray_t*) rock);
330 }
331 return ret;
332 }
333
334
335 #ifdef ENABLE_REGEX
336 #define MAX_MATCH 9 /* MUST support ${1} through ${9} per RFC 5229 */
337
octet_regex(const char * text,size_t tlen,const char * pat,void * rock)338 static int octet_regex(const char *text, size_t tlen, const char *pat,
339 void *rock)
340 {
341 strarray_t *match_vars = (strarray_t *) rock;
342 regmatch_t pm[MAX_MATCH+1];
343 size_t nmatch = 0;
344 int r;
345
346 if (match_vars) {
347 strarray_fini(match_vars);
348 nmatch = MAX_MATCH+1;
349 memset(&pm, 0, sizeof(pm));
350 }
351
352 #ifdef REG_STARTEND
353 /* pcre, BSD, some linuxes support this handy trick */
354 pm[0].rm_so = 0;
355 pm[0].rm_eo = tlen;
356 r = !regexec((regex_t *) pat, text, nmatch, pm, REG_STARTEND);
357 #elif defined(HAVE_RX_POSIX_H)
358 /* rx provides regnexec, that will work too */
359 r = !regnexec((regex_t *) pat, text, tlen, nmatch, pm, 0);
360 #else
361 /* regexec() requires a NUL-terminated string, and we have no
362 * guarantee that "text" is one. Also, it may be only exactly
363 * tlen's length, so we can't safely check. Always dup. */
364 char *buf = (char *) xstrndup(text, tlen);
365 r = !regexec((regex_t *) pat, buf, nmatch, pm, 0);
366 free(buf);
367 #endif /* REG_STARTEND */
368
369 if (r) {
370 /* populate match variables */
371 size_t var_num;
372
373 for (var_num = 0; var_num < nmatch; var_num++) {
374 regmatch_t *m = &pm[var_num];
375
376 if (m->rm_so < 0) break;
377
378 append_var(var_num, text + m->rm_so, text + m->rm_eo, match_vars);
379 }
380 }
381 return r;
382 }
383 #endif
384
385
386 /* --- i;ascii-casemap comparators --- */
387
388
ascii_casemap_cmp(const char * text,size_t tlen,const char * pat)389 static int ascii_casemap_cmp(const char *text, size_t tlen, const char *pat)
390 {
391 return octet_cmp_(text, tlen, pat, 1);
392 }
393
ascii_casemap_contains(const char * text,size_t tlen,const char * pat,void * rock)394 static int ascii_casemap_contains(const char *text, size_t tlen,
395 const char *pat,
396 void *rock __attribute__((unused)))
397 {
398 return octet_contains_(text, tlen, pat, 1);
399 }
400
ascii_casemap_matches(const char * text,size_t tlen,const char * pat,void * rock)401 static int ascii_casemap_matches(const char *text, size_t tlen,
402 const char *pat,
403 void *rock)
404 {
405 int ret;
406 int needs_free = 0;
407 strarray_t temp = STRARRAY_INITIALIZER;
408 if (rock) {
409 strarray_fini((strarray_t*) rock);
410 } else {
411 rock = &temp;
412 needs_free = 1;
413 }
414 strarray_add((strarray_t*) rock, text);
415 ret = octet_matches_(text, tlen, pat, 1, rock);
416 if (!ret || needs_free) {
417 strarray_fini((strarray_t*) rock);
418 }
419 return ret;
420 }
421
422 /* i;ascii-numeric; only supports relational tests
423 *
424 * A \ B number not-num
425 * number A ? B A < B
426 * not-num A > B A == B
427 */
428
429 /* From RFC 2244:
430 *
431 * The i;ascii-numeric comparator interprets strings as decimal
432 * positive integers represented as US-ASCII digits. All values
433 * which do not begin with a US-ASCII digit are considered equal
434 * with an ordinal value higher than all non-NIL single-valued
435 * attributes. Otherwise, all US-ASCII digits (octet values
436 * 0x30 to 0x39) are interpreted starting from the beginning of
437 * the string to the first non-digit or the end of the string.
438 */
439
ascii_numeric_cmp(const char * text,size_t tlen,const char * pat)440 static int ascii_numeric_cmp(const char *text, size_t tlen, const char *pat)
441 {
442 unsigned text_digit_len;
443 unsigned pat_digit_len;
444
445 if (Uisdigit(*pat)) {
446 if (Uisdigit(*text)) {
447 /* Count how many digits each string has */
448 for (text_digit_len = 0;
449 tlen-- && Uisdigit(text[text_digit_len]);
450 text_digit_len++);
451 for (pat_digit_len = 0;
452 Uisdigit(pat[pat_digit_len]);
453 pat_digit_len++);
454
455 if (text_digit_len < pat_digit_len) {
456 /* Pad "text" with leading 0s */
457 while (pat_digit_len > text_digit_len) {
458 /* "text" can only be less or equal to "pat" */
459 if ('0' < *pat) {
460 return (-1);
461 }
462 pat++;
463 pat_digit_len--;
464 }
465 } else if (text_digit_len > pat_digit_len) {
466 /* Pad "pad" with leading 0s */
467 while (text_digit_len > pat_digit_len) {
468 /* "pad" can only be greater or equal to "text" */
469 if (*text > '0') {
470 return 1;
471 }
472 text++;
473 text_digit_len--;
474 }
475 }
476
477 /* CLAIM: If we here, we have two non-empty digital suffixes
478 of equal length */
479 while (text_digit_len > 0) {
480 if (*text < *pat) {
481 return -1;
482 } else if (*text > *pat) {
483 return 1;
484 }
485 /* Characters are equal, carry on */
486 text++;
487 pat++;
488 text_digit_len--;
489 }
490
491 return (0);
492 } else {
493 return 1;
494 }
495 } else if (Uisdigit(*text)) {
496 return -1;
497 } else {
498 return 0; /* both not digits */
499 }
500 }
501
lookup_rel(int relation)502 static comparator_t *lookup_rel(int relation)
503 {
504 comparator_t *ret;
505
506 ret = NULL;
507 switch (relation)
508 {
509 case B_EQ:
510 ret = &rel_eq;
511 break;
512 case B_NE:
513 ret = &rel_ne;
514 break;
515 case B_GT:
516 ret = &rel_gt;
517 break;
518 case B_GE:
519 ret = &rel_ge;
520 break;
521 case B_LT:
522 ret = &rel_lt;
523 break;
524 case B_LE:
525 ret = &rel_le;
526 }
527
528 return ret;
529 }
530
lookup_comp(int comp,int mode,int relation,void ** comprock)531 EXPORTED comparator_t *lookup_comp(int comp, int mode, int relation,
532 void **comprock)
533 {
534 comparator_t *ret;
535
536 ret = NULL;
537 *comprock = NULL;
538 #if VERBOSE
539 printf("comp%d mode%d relat%d \n", comp, mode, relation);
540 #endif
541 switch (comp)
542 {
543 case B_OCTET:
544 switch (mode) {
545 case B_IS:
546 ret = &rel_eq;
547 *comprock = (void **) &octet_cmp;
548 break;
549 case B_CONTAINS:
550 ret = &octet_contains;
551 break;
552 case B_MATCHES:
553 ret = &octet_matches;
554 break;
555 #ifdef ENABLE_REGEX
556 case B_REGEX:
557 ret = &octet_regex;
558 break;
559 #endif
560 case B_VALUE:
561 ret = lookup_rel(relation);
562 *comprock = (void **) &octet_cmp;
563 break;
564 }
565 break; /*end of octet */
566 case B_ASCIICASEMAP:
567 switch (mode) {
568 case B_IS:
569 ret = &rel_eq;
570 *comprock = (void **) &ascii_casemap_cmp;
571 break;
572 case B_CONTAINS:
573 ret = &ascii_casemap_contains;
574 break;
575 case B_MATCHES:
576 ret = &ascii_casemap_matches;
577 break;
578 #ifdef ENABLE_REGEX
579 case B_REGEX:
580 /* the ascii-casemap destinction is made during
581 the compilation of the regex in verify_regex() */
582 ret = &octet_regex;
583 break;
584 #endif
585 case B_VALUE:
586 ret = lookup_rel(relation);
587 *comprock = (void **) &ascii_casemap_cmp;
588 break;
589 }
590 break;/*end of ascii casemap */
591 case B_ASCIINUMERIC:
592 switch (mode) {
593 case B_IS:
594 ret = &rel_eq;
595 *comprock = (void **) &ascii_numeric_cmp;
596 break;
597 case B_COUNT:
598 case B_VALUE:
599 ret = lookup_rel(relation);
600 *comprock = (void **) &ascii_numeric_cmp;
601 break;
602 }
603 break;
604 }
605 return ret;
606 }
607