1 /* search_expr.c -- query tree handling for SEARCH
2  *
3  * Copyright (c) 1994-2012 Carnegie Mellon University.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * 3. The name "Carnegie Mellon University" must not be used to
18  *    endorse or promote products derived from this software without
19  *    prior written permission. For permission or any legal
20  *    details, please contact
21  *      Carnegie Mellon University
22  *      Center for Technology Transfer and Enterprise Creation
23  *      4615 Forbes Avenue
24  *      Suite 302
25  *      Pittsburgh, PA  15213
26  *      (412) 268-7393, fax: (412) 268-7395
27  *      innovation@andrew.cmu.edu
28  *
29  * 4. Redistributions of any form whatsoever must retain the following
30  *    acknowledgment:
31  *    "This product includes software developed by Computing Services
32  *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
33  *
34  * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
35  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
36  * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
37  * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
38  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
39  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
40  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
41  */
42 
43 #include <config.h>
44 
45 #include <sys/types.h>
46 #include <stdlib.h>
47 #include <syslog.h>
48 #include <string.h>
49 #ifdef HAVE_UNISTD_H
50 #include <unistd.h>
51 #endif
52 
53 #include "assert.h"
54 #include "search_expr.h"
55 #include "index.h"
56 #include "message.h"
57 #include "charset.h"
58 #include "annotate.h"
59 #include "global.h"
60 #include "lsort.h"
61 #include "xstrlcpy.h"
62 #include "xmalloc.h"
63 
64 /* generated headers are not necessarily in current directory */
65 #include "imap/imap_err.h"
66 
67 #define DEBUG 0
68 
69 #if DEBUG
70 static search_expr_t **the_rootp;
71 static search_expr_t *the_focus;
72 #endif
73 static unsigned nnodes = 0;
74 
75 static void split(search_expr_t *e,
76                   void (*cb)(const char *, search_expr_t *, search_expr_t *, void *),
77                   void *rock);
78 
79 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
80 
append(search_expr_t * parent,search_expr_t * child)81 static search_expr_t *append(search_expr_t *parent, search_expr_t *child)
82 {
83     search_expr_t **tailp;
84 
85     for (tailp = &parent->children ; *tailp ; tailp = &(*tailp)->next)
86         ;
87     *tailp = child;
88     child->next = NULL;
89     child->parent = parent;
90 
91     return child;
92 }
93 
detachp(search_expr_t ** prevp)94 static search_expr_t *detachp(search_expr_t **prevp)
95 {
96     search_expr_t *child = *prevp;
97 
98     if (child) {
99         *prevp = child->next;
100         child->next = NULL;
101         child->parent = NULL;
102     }
103 
104     return child;
105 }
106 
search_expr_detach(search_expr_t * parent,search_expr_t * child)107 EXPORTED void search_expr_detach(search_expr_t *parent, search_expr_t *child)
108 {
109     search_expr_t **prevp;
110 
111     for (prevp = &parent->children ; *prevp && *prevp != child; prevp = &(*prevp)->next)
112         ;
113     detachp(prevp);
114 }
115 
116 /*
117  * Detach the node '*prevp' from the tree, and reparent its children to
118  * '*prevp' parent, preserving '*prevp's location and its children's
119  * order.
120  *
121  * Apparently this operation is called "splat" but I think that's
122  * a damn silly name.
123  */
elide(search_expr_t ** prevp)124 static search_expr_t *elide(search_expr_t **prevp)
125 {
126     search_expr_t *e = *prevp;
127     search_expr_t *child;
128 
129     *prevp = e->children;
130 
131     for (child = e->children ; child ; child = child->next) {
132         child->parent = e->parent;
133         prevp = &child->next;
134     }
135     *prevp = e->next;
136 
137     e->next = NULL;
138     e->children = NULL;
139     e->parent = NULL;
140 
141     return e;
142 }
143 
interpolate(search_expr_t ** prevp,enum search_op op)144 static search_expr_t *interpolate(search_expr_t **prevp, enum search_op op)
145 {
146     search_expr_t *e = search_expr_new(NULL, op);
147 
148     e->parent = (*prevp)->parent;
149     e->children = (*prevp);
150     e->next = (*prevp)->next;
151     (*prevp)->next = NULL;
152     (*prevp)->parent = e;
153     *prevp = e;
154 
155     return e;
156 }
157 
158 /*
159  * Create a new node in a search expression tree, with the given
160  * operation.  If 'parent' is not NULL, the new node is attached as the
161  * last child of 'parent'.  Returns a new node, never returns NULL.
162  */
search_expr_new(search_expr_t * parent,enum search_op op)163 EXPORTED search_expr_t *search_expr_new(search_expr_t *parent, enum search_op op)
164 {
165     search_expr_t *e = xzmalloc(sizeof(search_expr_t));
166     e->op = op;
167     if (parent) append(parent, e);
168     nnodes++;
169     return e;
170 }
171 
complexity_check(int r)172 static int complexity_check(int r)
173 {
174     unsigned max = (unsigned)config_getint(IMAPOPT_SEARCH_NORMALISATION_MAX);
175     return (max && nnodes >= max ? -1 : r);
176 }
177 
178 /*
179  * Append the given search expression tree 'e' to the end of the
180  * node 'parent'.  'e' must not already have a parent.
181  */
search_expr_append(search_expr_t * parent,search_expr_t * e)182 EXPORTED void search_expr_append(search_expr_t *parent, search_expr_t *e)
183 {
184     assert(e->parent == NULL);
185     append(parent, e);
186 }
187 
188 /*
189  * Recursively free a search expression tree including the given node
190  * and all descendent nodes.
191  */
search_expr_free(search_expr_t * e)192 EXPORTED void search_expr_free(search_expr_t *e)
193 {
194     if (!e) return;
195     while (e->children) {
196         search_expr_t *child = e->children;
197         search_expr_detach(e, child);
198         search_expr_free(child);
199     }
200     if (e->attr) {
201         if (e->attr->internalise) e->attr->internalise(NULL, NULL, &e->internalised);
202         if (e->attr->free) e->attr->free(&e->value);
203     }
204     free(e);
205 }
206 
207 /*
208  * Create and return a new search expression tree which is an
209  * exact duplicate of the given tree.
210  */
search_expr_duplicate(const search_expr_t * e)211 EXPORTED search_expr_t *search_expr_duplicate(const search_expr_t *e)
212 {
213     search_expr_t *newe;
214     search_expr_t *child;
215 
216     newe = search_expr_new(NULL, e->op);
217     newe->attr = e->attr;
218     if (newe->attr && newe->attr->duplicate)
219         newe->attr->duplicate(&newe->value, &e->value);
220     else
221         newe->value = e->value;
222 
223     for (child = e->children ; child ; child = child->next)
224         append(newe, search_expr_duplicate(child));
225 
226     return newe;
227 }
228 
229 /*
230  * Apply the given callback to every node in the search expression tree,
231  * in pre-order (i.e. parent before children), as long as the callback
232  * returns zero.  Returns the first non-zero return from the callback
233  * (which is typically an error code).
234  */
search_expr_apply(search_expr_t * e,int (* cb)(search_expr_t *,void *),void * rock)235 EXPORTED int search_expr_apply(search_expr_t *e,
236                                int (*cb)(search_expr_t *, void *),
237                                void *rock)
238 {
239     search_expr_t *child;
240     int r;
241 
242     r = cb(e, rock);
243     if (r) return r;
244 
245     for (child = e->children ; child ; child = child->next) {
246         r = search_expr_apply(child, cb, rock);
247         if (r) break;
248     }
249 
250     return r;
251 }
252 
253 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
254 
255 static const char *op_strings[] = {
256     "unknown", "true", "false",
257     "lt", "le", "gt", "ge", "match",
258     "fuzzymatch", "and", "or", "not"
259 };
260 
op_as_string(unsigned int op)261 static const char *op_as_string(unsigned int op)
262 {
263     return (op < VECTOR_SIZE(op_strings) ? op_strings[op] : "WTF?");
264 }
265 
serialise(const search_expr_t * e,struct buf * buf)266 static void serialise(const search_expr_t *e, struct buf *buf)
267 {
268     const search_expr_t *child;
269 
270 #if DEBUG
271     if (e == the_focus) buf_putc(buf, '<');
272 #endif
273     buf_putc(buf, '(');
274     buf_appendcstr(buf, op_as_string(e->op));
275     if (e->attr) {
276         buf_putc(buf, ' ');
277         buf_appendcstr(buf, e->attr->name);
278         buf_putc(buf, ' ');
279         if (e->attr->serialise) e->attr->serialise(buf, &e->value);
280     }
281     for (child = e->children ; child ; child = child->next) {
282         buf_putc(buf, ' ');
283         serialise(child, buf);
284     }
285     buf_putc(buf, ')');
286 #if DEBUG
287     if (e == the_focus) buf_putc(buf, '>');
288 #endif
289 }
290 
291 /*
292  * Given an expression tree, return a string which uniquely describes
293  * the tree.  The string is designed to be used as a cache key and for
294  * unit tests, not for human readability.
295  *
296  * Returns a new string which must be free()d by the caller.
297  */
search_expr_serialise(const search_expr_t * e)298 EXPORTED char *search_expr_serialise(const search_expr_t *e)
299 {
300     struct buf buf = BUF_INITIALIZER;
301     serialise(e, &buf);
302     return buf_release(&buf);
303 }
304 
305 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
306 
getseword(struct protstream * prot,char * buf,int maxlen)307 static int getseword(struct protstream *prot, char *buf, int maxlen)
308 {
309     int c = EOF;
310     int quoted = 0;
311 
312     c = prot_getc(prot);
313     if (c == '"')
314         quoted = 1;
315     else
316         prot_ungetc(c, prot);
317 
318     while (maxlen > 1 &&
319            (c = prot_getc(prot)) != EOF &&
320            (quoted ?
321                (c != '"') :
322                (c != ' ' && c != ')'))) {
323         *buf++ = c;
324         maxlen--;
325     }
326     *buf = '\0';
327     if (quoted && c != EOF)
328         c = prot_getc(prot);
329     return c;
330 }
331 
unserialise(search_expr_t * parent,struct protstream * prot)332 static search_expr_t *unserialise(search_expr_t *parent,
333                                   struct protstream *prot)
334 {
335     int c;
336     search_expr_t *e = NULL;
337     unsigned int op;
338     char tmp[128];
339 
340     c = prot_getc(prot);
341     if (c != '(')
342         goto bad;
343 
344     c = getseword(prot, tmp, sizeof(tmp));
345     if (c != ' ' && c != ')')
346         goto bad;
347 
348     for (op = 0 ; op < VECTOR_SIZE(op_strings) ; op++) {
349         if (!strcmp(tmp, op_strings[op]))
350             break;
351     }
352     if (op == VECTOR_SIZE(op_strings))
353         goto bad;
354 
355     e = search_expr_new(parent, op);
356     if (c == ')')
357         return e;    /* SEOP_TRUE, SEOP_FALSE */
358 
359     switch (op) {
360     case SEOP_AND:
361     case SEOP_OR:
362     case SEOP_NOT:
363         /* parse children */
364         for (;;) {
365             if (unserialise(e, prot) == NULL)
366                 goto bad;
367             c = prot_getc(prot);
368             if (c == ')')
369                 break;
370             if (c != ' ')
371                 goto bad;
372         }
373         break;
374     case SEOP_LT:
375     case SEOP_LE:
376     case SEOP_GT:
377     case SEOP_GE:
378     case SEOP_MATCH:
379     case SEOP_FUZZYMATCH:
380         /* parse attribute */
381         c = getseword(prot, tmp, sizeof(tmp));
382         if (c != ' ')
383             goto bad;
384         e->attr = search_attr_find(tmp);
385         if (e->attr == NULL)
386             goto bad;
387         /* parse value */
388         if (e->attr->unserialise)
389             c = e->attr->unserialise(prot, &e->value);
390         if (c != ')')
391             goto bad;
392         break;
393     default:
394         c = prot_getc(prot);
395         if (c != ')')
396             goto bad;
397         break;
398     }
399 
400     return e;
401 
402 bad:
403     if (e) {
404         e->op = SEOP_UNKNOWN;
405         if (parent == NULL)
406             search_expr_free(e);
407     }
408     return NULL;
409 }
410 
411 /*
412  * Given a string generated by search_expr_serialise(),
413  * parse it and return a new expression tree, or NULL if
414  * there were any errors.  Used mainly for unit tests.
415  */
search_expr_unserialise(const char * s)416 EXPORTED search_expr_t *search_expr_unserialise(const char *s)
417 {
418     struct protstream *prot;
419     search_expr_t *root = NULL;
420 
421     if (!s || !*s) return NULL;
422     prot = prot_readmap(s, strlen(s));
423     root = unserialise(NULL, prot);
424 
425 #if DEBUG
426     if (!root) {
427 #define MAX_CONTEXT 48
428         int off = ((const char *)prot->ptr - s);
429         int len = strlen(s);
430         int context_begin = off - MIN(off, MAX_CONTEXT);
431         int context_end = off + MIN((len-off), MAX_CONTEXT);
432         int i;
433         fputc('\n', stderr);
434         fprintf(stderr, "ERROR: failed to unserialise string at or near:\n");
435         if (context_begin) fputs("...", stderr);
436         fwrite(s+context_begin, 1, context_end-context_begin, stderr);
437         fputc('\n', stderr);
438         if (context_begin) fputs("---", stderr);
439         for (i = off - context_begin - 1 ; i > 0 ; i--)
440             fputc('-', stderr);
441         fputc('^', stderr);
442         fputc('\n', stderr);
443     }
444 #endif
445 
446     prot_free(prot);
447     return root;
448 }
449 
450 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
451 
452 enum {
453     DNF_OR, DNF_AND, DNF_NOT, DNF_CMP
454 };
455 
456 /* expected depth, in a full tree.  0 is rootmost, 3 is leafmost */
dnf_depth(const search_expr_t * e)457 static int dnf_depth(const search_expr_t *e)
458 {
459     switch (e->op) {
460     case SEOP_TRUE:
461     case SEOP_FALSE:
462     case SEOP_LT:
463     case SEOP_LE:
464     case SEOP_GT:
465     case SEOP_GE:
466     case SEOP_MATCH:
467     case SEOP_FUZZYMATCH:
468         return DNF_CMP;
469     case SEOP_AND:
470         return DNF_AND;
471     case SEOP_OR:
472         return DNF_OR;
473     case SEOP_NOT:
474         return DNF_NOT;
475     default: assert(0); return -1;
476     }
477     return -1;
478 }
479 
has_enough_children(const search_expr_t * e)480 static int has_enough_children(const search_expr_t *e)
481 {
482     const search_expr_t *child;
483     int min;
484     int n = 0;
485 
486     switch (e->op) {
487     case SEOP_OR:
488     case SEOP_AND:
489         min = 2;
490         break;
491     case SEOP_NOT:
492         min = 1;
493         break;
494     default:
495         return 1;
496     }
497 
498     for (child = e->children ; child ; child = child->next)
499         if (++n >= min) return 1;
500     return 0;
501 }
502 
apply_demorgan(search_expr_t ** ep,search_expr_t ** prevp)503 static int apply_demorgan(search_expr_t **ep, search_expr_t **prevp)
504 {
505     search_expr_t *child = *prevp;
506     search_expr_t **grandp;
507 
508     /* NOT nodes have exactly one child */
509     assert(*prevp != NULL);
510     assert((*prevp)->next == NULL);
511 
512     child->op = (child->op == SEOP_AND ? SEOP_OR : SEOP_AND);
513     for (grandp = &child->children ; *grandp ; grandp = &(*grandp)->next)
514         interpolate(grandp, SEOP_NOT);
515     search_expr_free(elide(ep));
516 
517     return complexity_check(1);
518 }
519 
apply_distribution(search_expr_t ** ep,search_expr_t ** prevp)520 static int apply_distribution(search_expr_t **ep, search_expr_t **prevp)
521 {
522     search_expr_t *newor;
523     search_expr_t *or;
524     search_expr_t *and;
525     search_expr_t *orchild;
526     search_expr_t *newand;
527     int r = 1;
528 
529     newor = interpolate(ep, SEOP_OR);
530     and = detachp(&newor->children);
531     or = detachp(prevp);
532 
533     while (complexity_check(r) >= 0) {
534         orchild = detachp(&or->children);
535         if (orchild == NULL) break;
536         newand = search_expr_duplicate(and);
537         append(newand, orchild);
538         append(newor, newand);
539     }
540 
541     search_expr_free(and);
542     search_expr_free(or);
543 
544     return complexity_check(r);
545 }
546 
invert(search_expr_t ** ep,search_expr_t ** prevp)547 static int invert(search_expr_t **ep, search_expr_t **prevp)
548 {
549     if ((*ep)->op == SEOP_NOT)
550         return apply_demorgan(ep, prevp);
551     else
552         return apply_distribution(ep, prevp);
553 }
554 
555 /* combine compatible boolean parent and child nodes */
combine(search_expr_t ** ep,search_expr_t ** prevp)556 static void combine(search_expr_t **ep, search_expr_t **prevp)
557 {
558     switch ((*ep)->op) {
559     case SEOP_NOT:
560         search_expr_free(elide(prevp));
561         search_expr_free(elide(ep));
562         break;
563     case SEOP_AND:
564     case SEOP_OR:
565         search_expr_free(elide(prevp));
566         break;
567     default:
568         break;
569     }
570 }
571 
572 /*
573  * Top-level normalisation step.  Returns 1 if it changed the subtree, 0
574  * if it didn't, and -1 on error (such as exceeding a complexity limit).
575  */
normalise(search_expr_t ** ep)576 static int normalise(search_expr_t **ep)
577 {
578     search_expr_t **prevp;
579     int depth;
580     int changed = -1;
581     int r;
582 
583 restart:
584     changed++;
585 
586 #if DEBUG
587     the_focus = *ep;
588     {
589         char *s = search_expr_serialise(*the_rootp);
590         fprintf(stderr, "normalise: tree=%s\n", s);
591         free(s);
592     }
593 #endif
594 
595     if (!has_enough_children(*ep)) {
596         /* eliminate trivial nodes: AND and ORs with
597          * a single child, NOTs with none */
598         search_expr_free(elide(ep));
599         goto restart;
600     }
601 
602     depth = dnf_depth(*ep);
603     for (prevp = &(*ep)->children ; *prevp ; prevp = &(*prevp)->next)
604     {
605         int child_depth = dnf_depth(*prevp);
606         if (child_depth == depth) {
607             combine(ep, prevp);
608             goto restart;
609         }
610         if (child_depth < depth) {
611             r = invert(ep, prevp);
612             if (r < 0) return -1;
613             goto restart;
614         }
615         r = normalise(prevp);
616         if (r < 0) return -1;
617         if (r > 0) goto restart;
618     }
619 
620     return complexity_check(changed);
621 }
622 
getnext(void * p)623 static void *getnext(void *p)
624 {
625     return ((search_expr_t *)p)->next;
626 }
627 
setnext(void * p,void * next)628 static void setnext(void *p, void *next)
629 {
630     ((search_expr_t *)p)->next = next;
631 }
632 
maxcost(const search_expr_t * e,hashu64_table * costcache)633 static int maxcost(const search_expr_t *e, hashu64_table *costcache)
634 {
635     if (!e) return 0;
636 
637     if (costcache) {
638         intptr_t cost = (intptr_t) hashu64_lookup((uint64_t) e, costcache);
639         assert(cost > INT_MIN && cost < INT_MAX);
640         if (cost) return cost > 0 ? cost : 0;
641     }
642 
643     int cost = e->attr ? e->attr->cost : 0;
644     search_expr_t *child;
645     for (child = e->children ; child ; child = child->next) {
646         int childcost = maxcost(child, costcache);
647         if (childcost > cost) cost = childcost;
648     }
649 
650     if (costcache) {
651         hashu64_insert((uint64_t) e, (void*)((intptr_t)(cost ? cost : -1)), costcache);
652     }
653     return cost;
654 }
655 
compare(void * p1,void * p2,void * calldata)656 static int compare(void *p1, void *p2, void *calldata)
657 {
658     const search_expr_t *e1 = p1;
659     const search_expr_t *e2 = p2;
660     int r = 0;
661 
662     if (!r)
663         r = maxcost(e1, calldata) - maxcost(e2, calldata);
664 
665     if (!r)
666         r = dnf_depth(e2) - dnf_depth(e1);
667 
668     if (!r)
669         r = strcasecmp(e1->attr ? e1->attr->name : "zzz",
670                        e2->attr ? e2->attr->name : "zzz");
671 
672     if (!r)
673         r = (int)e1->op - (int)e2->op;
674 
675     if (!r) {
676         struct buf b1 = BUF_INITIALIZER;
677         struct buf b2 = BUF_INITIALIZER;
678         if (e1->attr && e1->attr->serialise)
679             e1->attr->serialise(&b1, &e1->value);
680         if (e2->attr && e2->attr->serialise)
681             e2->attr->serialise(&b2, &e2->value);
682         r = strcmp(buf_cstring(&b1), buf_cstring(&b2));
683         buf_free(&b1);
684         buf_free(&b2);
685     }
686 
687     if (!r) {
688         if (e1->children || e2->children)
689             r = compare((void *)(e1->children ? e1->children : e1),
690                         (void *)(e2->children ? e2->children : e2),
691                         calldata);
692     }
693 
694     return r;
695 }
696 
sort_children_internal(search_expr_t * e,hashu64_table * costcache)697 static void sort_children_internal(search_expr_t *e, hashu64_table *costcache)
698 {
699     search_expr_t *child;
700 
701     for (child = e->children ; child ; child = child->next)
702         sort_children_internal(child, costcache);
703 
704     e->children = lsort(e->children, getnext, setnext, compare, costcache);
705 }
706 
sort_children(search_expr_t * e)707 static void sort_children(search_expr_t *e)
708 {
709     search_expr_t *child;
710     hashu64_table maxcostcache = HASHU64_TABLE_INITIALIZER;
711     construct_hashu64_table(&maxcostcache, 512, 0);
712     hashu64_table *costcache = &maxcostcache;
713 
714     if (sizeof(uint64_t) < sizeof(search_expr_t*)) {
715         costcache = NULL; // woot?
716     }
717 
718     for (child = e->children ; child ; child = child->next)
719         sort_children_internal(child, costcache);
720 
721     e->children = lsort(e->children, getnext, setnext, compare, costcache);
722 
723     free_hashu64_table(&maxcostcache, NULL);
724 }
725 
726 /*
727  * Reorganise a search expression tree into Disjunctive Normal Form.
728  * This form is useful for picking out cacheable and runnable sub-queries.
729  *
730  * An expression in DNF has a number of constraints:
731  *
732  * - it contains at most one OR node
733  * - if present the OR node is the root
734  * - NOT nodes if present have only comparisons as children
735  * - it contains at most 4 levels of nodes
736  * - nodes have a strict order of types, down from the root
737  *   they are: OR, AND, NOT, comparisons.
738  *
739  * DNF is useful for running queries.  Each of the children of the
740  * root OR node can be run as a separate sub-query, and cached
741  * independently because their results are just accumulated together
742  * without any further processing.  Each of those children is a single
743  * conjuctive clause which can implemented using an index lookup (or a
744  * scan of all messages) followed by a filtering step.  Finally, each of
745  * those conjunctive clauses can be analysed to discover which folders
746  * will need to be opened: no folders, a single specific folder,
747  * all folders, or all folders except some specific folders.
748  *
749  * We also enforce a fixed order on child nodes of any node, so
750  * that all logically equivalent trees are the same shape.  This
751  * helps when constructing a cache key from a tree.  The sorting
752  * criteria are:
753  *
754  * - NOT nodes after un-negated comparison nodes, then
755  * - comparison nodes sorted lexically on attribute, then
756  * - comparison nodes sorted lexically on stringified value
757  *
758  * Note that IMAP search syntax, when translated most directly into an
759  * expression tree, defines trees whose outermost node is always an AND.
760  * Those trees are not in any kind of normal form but more closely
761  * resemble Conjunctive Normal Form than DNF.  Any IMAP search program
762  * containing an OR criterion will require significant juggling to
763  * achieve DNF.
764  *
765  * Takes the root of the tree in *'ep' and returns a possibly reshaped
766  * tree whose root is stored in *'ep'.
767  *
768  * Returns 1 if the subtree was changed, 0 if it wasn't, and -1 on error
769  * (such as exceeding a complexity limit).
770  */
search_expr_normalise(search_expr_t ** ep)771 EXPORTED int search_expr_normalise(search_expr_t **ep)
772 {
773     int r;
774 
775 #if DEBUG
776     the_rootp = ep;
777 #endif
778     r = normalise(ep);
779     sort_children(*ep);
780 #if DEBUG
781     the_rootp = NULL;
782     the_focus = NULL;
783 #endif
784     return r;
785 }
786 
787 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
788 
internalise(search_expr_t * e,void * rock)789 static int internalise(search_expr_t *e, void *rock)
790 {
791     struct index_state *state = rock;
792     if (e->attr && e->attr->internalise)
793         e->attr->internalise(state, &e->value, &e->internalised);
794     return 0;
795 }
796 
797 /*
798  * Prepare the given expression for use with the given mailbox.
799  */
search_expr_internalise(struct index_state * state,search_expr_t * e)800 EXPORTED void search_expr_internalise(struct index_state *state, search_expr_t *e)
801 {
802     search_expr_apply(e, internalise, state);
803 }
804 
805 /* result:
806  * -1 definitely false (regardless of message)
807  *  0 depends on message
808  * +1 definitely true (regardless of message)
809  */
search_expr_always_same(const search_expr_t * e)810 EXPORTED int search_expr_always_same(const search_expr_t *e)
811 {
812     search_expr_t *child;
813 
814     switch (e->op) {
815     case SEOP_UNKNOWN:
816          assert(0); // fatally bad
817          return 0;
818     case SEOP_TRUE:
819          return 1;
820     case SEOP_FALSE:
821          return -1;
822     case SEOP_LT:
823     case SEOP_LE:
824     case SEOP_GT:
825     case SEOP_GE:
826     case SEOP_FUZZYMATCH:
827     case SEOP_MATCH:
828         return 0;
829     case SEOP_AND:
830         {
831             int res = 1;
832             for (child = e->children ; child ; child = child->next) {
833                 int cres = search_expr_always_same(child);
834                 if (cres == -1) return -1;
835                 if (cres == 0) res = 0; // could still be a definite no
836             }
837             return res;
838         }
839     case SEOP_OR:
840         {
841             int res = -1;
842             for (child = e->children ; child ; child = child->next) {
843                 int cres = search_expr_always_same(child);
844                 if (cres == 1) return 1;
845                 if (cres == 0) res = 0; // could still be a definite yes
846             }
847             return res;
848         }
849     case SEOP_NOT:
850         {
851             assert(e->children);
852             // reverse the result only
853             return 0 - search_expr_always_same(e->children);
854         }
855     }
856     return 0;
857 }
858 
859 /*
860  * Evaluate the given search expression for the given message,
861  * Returns nonzero if the expression is true, 0 otherwise.
862  */
search_expr_evaluate(message_t * m,const search_expr_t * e)863 EXPORTED int search_expr_evaluate(message_t *m, const search_expr_t *e)
864 {
865     search_expr_t *child;
866 
867     switch (e->op) {
868     case SEOP_UNKNOWN: assert(0); return 1;
869     case SEOP_TRUE: return 1;
870     case SEOP_FALSE: return 0;
871     case SEOP_LT:
872         assert(e->attr);
873         assert(e->attr->cmp);
874         return (e->attr->cmp(m, &e->value, e->internalised, e->attr->data1) < 0);
875     case SEOP_LE:
876         assert(e->attr);
877         assert(e->attr->cmp);
878         return (e->attr->cmp(m, &e->value, e->internalised, e->attr->data1) <= 0);
879     case SEOP_GT:
880         assert(e->attr);
881         assert(e->attr->cmp);
882         return (e->attr->cmp(m, &e->value, e->internalised, e->attr->data1) > 0);
883     case SEOP_GE:
884         assert(e->attr);
885         assert(e->attr->cmp);
886         return (e->attr->cmp(m, &e->value, e->internalised, e->attr->data1) >= 0);
887     case SEOP_FUZZYMATCH:
888         /* FUZZYMATCH should never be evaluated, as such nodes are
889          * picked out of the expression during query optimisation and
890          * used to drive search engine lookups.  But the nearest
891          * approximation would be MATCH. */
892     case SEOP_MATCH:
893         assert(e->attr);
894         assert(e->attr->match);
895         return e->attr->match(m, &e->value, e->internalised, e->attr->data1);
896     case SEOP_AND:
897         for (child = e->children ; child ; child = child->next)
898             if (!search_expr_evaluate(m, child))
899                 return 0;
900         return 1;
901     case SEOP_OR:
902         for (child = e->children ; child ; child = child->next)
903             if (search_expr_evaluate(m, child))
904                 return 1;
905         return 0;
906     case SEOP_NOT:
907         assert(e->children);
908         return !search_expr_evaluate(m, e->children);
909     }
910     return 0;
911 }
912 
913 /* ====================================================================== */
914 
uses_attr(search_expr_t * e,void * rock)915 static int uses_attr(search_expr_t *e, void *rock)
916 {
917     const search_attr_t *attr = rock;
918     return (e->attr == attr);
919 }
920 
921 /*
922  * Returns non-zero if any comparison node in the given search
923  * expression tree uses the attribute with the given name.
924  */
search_expr_uses_attr(const search_expr_t * e,const char * name)925 EXPORTED int search_expr_uses_attr(const search_expr_t *e, const char *name)
926 {
927     const search_attr_t *attr = search_attr_find(name);
928 
929     if (!attr) return 0;
930     return search_expr_apply((search_expr_t *)e, uses_attr, (void *)attr);
931 }
932 
933 /* ====================================================================== */
934 
935     /* NOTE: older than 'N' days will be a mutable search of course,
936      * but that fact isn't available down here - we only know the
937      * date range itself, and that isn't mutable.  So if you need
938      * immutable results, you'll need to maintain a fixed date range
939      * up in the higher level */
940 
is_mutable(search_expr_t * e,void * rock)941 static int is_mutable(search_expr_t *e, void *rock __attribute__((unused)))
942 {
943     return (e->attr && (e->attr->flags & SEA_MUTABLE));
944 }
945 
946 /*
947  * Return non-zero if the search expression is mutable, i.e. it might
948  * return a different set of messages if run again, assuming that
949  *
950  * a) no folders covered by the search have received new messages, and
951  * b) uidvalidities of folders covered by the search have not changed.
952  *
953  * Basically, mutable searches are on attributes of a message which are
954  * not derived solely from the message text itself and can be changed after
955  * the message is inserted.  For example: system flags are mutable, the
956  * From: header field is not.
957  */
search_expr_is_mutable(const search_expr_t * e)958 EXPORTED int search_expr_is_mutable(const search_expr_t *e)
959 {
960     return search_expr_apply((search_expr_t *)e, is_mutable, NULL);
961 }
962 
963 /* ====================================================================== */
964 
get_countability(search_expr_t * e,void * rock)965 static int get_countability(search_expr_t *e, void *rock)
966 {
967     unsigned int *maskp = rock;
968 
969     if (e->op == SEOP_TRUE)
970         *maskp |= SEC_EXISTS;
971     else if (e->op == SEOP_FALSE)
972         *maskp |= SEC_EXISTS|SEC_NOT;
973     else if (e->op == SEOP_NOT)
974         *maskp |= SEC_NOT;
975     else if (e->attr) {
976         if (e->attr->get_countability)
977             *maskp |= e->attr->get_countability(&e->value);
978         else
979             *maskp |= SEC_UNCOUNTED;
980     }
981 
982     return 0;
983 }
984 
985 /*
986  * Analyse the search expression to discover how countable the results are
987  * going to be.  By "countable" we mean "predictable from stored state,
988  * without searching every message".  Currently that means
989  *
990  * in message mode:
991  *    - total number of messages
992  *    - number unseen messages
993  *    - number seen messages (by inference)
994  *    - number recent messages
995  *    - number unrecent messages (by inference)
996  * in conversation mode:
997  *    - total number of conversations
998  *    - number of conversations with unseen messages
999  *    - number of conversations with no unseen messages (by inference)
1000  *
1001  * Returns a mask of SEC_* constants (e.g. SEC_SEEN) describing which
1002  * countable attributes are specified by the expression. The special value
1003  * SEC_UNCOUNTED means that at least one uncounted attribute was found.
1004  * Mask values with more than one bit set are effectively uncountable.
1005  *
1006  * Note: the heuristics used here are intended for normalised search
1007  * expressions, and may not work correctly otherwise.  In particular,
1008  * SEC_NOT doesn't do quite what you expect.
1009  */
1010 
search_expr_get_countability(const search_expr_t * e)1011 EXPORTED unsigned int search_expr_get_countability(const search_expr_t *e)
1012 {
1013     unsigned int mask = 0;
1014 
1015     if (!e)
1016         return 0;
1017 
1018     search_expr_apply((search_expr_t *)e, get_countability, &mask);
1019     return mask;
1020 }
1021 
1022 /* ====================================================================== */
1023 
1024 /*
1025  * Neutralise a node: make it always return success.  Useful for
1026  * changing the logic of an expression without reshaping it.
1027  */
search_expr_neutralise(search_expr_t * e)1028 EXPORTED void search_expr_neutralise(search_expr_t *e)
1029 {
1030     /* Leave the children and attribute in place.  This might be
1031      * wrong, but we are only called for MATCH nodes at the moment
1032      * and they seem to be able to tolerate such weirdness. */
1033     e->op = SEOP_TRUE;
1034 }
1035 
1036 /* ====================================================================== */
1037 
is_folder_node(const search_expr_t * e)1038 static int is_folder_node(const search_expr_t *e)
1039 {
1040     return (e->op == SEOP_MATCH &&
1041             e->attr &&
1042             !strcasecmp(e->attr->name, "folder"));
1043 }
1044 
is_indexed_node(const search_expr_t * e)1045 static int is_indexed_node(const search_expr_t *e)
1046 {
1047     if (e->op == SEOP_NOT)
1048         return is_indexed_node(e->children);
1049     return (e->op == SEOP_FUZZYMATCH &&
1050             e->attr &&
1051             e->attr->part != SEARCH_PART_NONE);
1052 }
1053 
is_folder_or_indexed(search_expr_t * e,void * rock)1054 static int is_folder_or_indexed(search_expr_t *e, void *rock __attribute__((unused)))
1055 {
1056     return (is_folder_node(e) || is_indexed_node(e));
1057 }
1058 
1059 /*
1060  * Split a search expression into one or more parts, each of which
1061  * satisfies the earliest of these conditions:
1062  *
1063  *  - contains at least one indexed match node
1064  *      (the callback's 'indexed' is non-NULL), or
1065  *
1066  *  - is limited to exactly one folder by a positive folder match node
1067  *      (the callback's 'mboxname' is non-NULL), or
1068  *
1069  *  - applies to all folders and is not indexed
1070  *      (both the callback's 'mboxname' and 'indexed' are NULL)
1071  *
1072  * Destroys the original expression as a side effect.
1073  *
1074  * The callback function 'cb' is called one or more times with up to two
1075  * expression trees which have just been detached from the original expression
1076  * tree.  Both of these trees will be in DNF and will be at most a
1077  * conjuctive node, i.e. no disjunctions.
1078  *
1079  * The 'indexed' tree, if not NULL, contains all the indexed search terms.
1080  * The 'scan' tree will never be NULL, although it may be a trivial tree
1081  * comprising a single (true) node.  It contains an expression that must be
1082  * matched by every message reported by the index or the folder scan.
1083  *
1084  * The callback is responsible for freeing the expression using
1085  * search_expr_free().  The callback may be called multiple times with
1086  * the same folder/index combination, in which case the expressions should
1087  * be considered logically ORed together.
1088  *
1089  * Does not require the input expression to be normalised, and may
1090  * normalise it during processing.  Expressions passed to the callback
1091  * are always normalised.
1092  */
search_expr_split_by_folder_and_index(search_expr_t * e,void (* cb)(const char *,search_expr_t *,search_expr_t *,void *),void * rock)1093 EXPORTED void search_expr_split_by_folder_and_index(search_expr_t *e,
1094                   void (*cb)(const char *, search_expr_t *, search_expr_t *, void *),
1095                   void *rock)
1096 {
1097     search_expr_t *copy = NULL;
1098 
1099     if (!search_expr_apply(e, is_folder_or_indexed, NULL)) {
1100         /* The expression contains neither a folder match node nor an
1101          * indexable node, which means we can short circuit the whole
1102          * normalisation and splitting process.  This optimisation helps
1103          * us cope with the FM web frontends generating queries with
1104          *
1105          * or or or or to "word" cc "word" bcc "word" from "word" subject "word"
1106          *
1107          * for every "word" the user types into the Search box, by
1108          * avoiding the complexity explosion due to normalising all
1109          * those OR nodes.
1110          *
1111          * But - we still sort it.  */
1112         sort_children(e);
1113         cb(NULL, NULL, e, rock);
1114         return;
1115     }
1116 
1117     copy = search_expr_duplicate(e);
1118     nnodes = 0;
1119     if (search_expr_normalise(&copy) < 0)
1120     {
1121         /* We blew the complexity limit because the expression has too
1122          * many ORs.  Rats.  Give up and scan folders with the original
1123          * expression */
1124         search_expr_free(copy);
1125         cb(NULL, NULL, e, rock);
1126         return;
1127     }
1128 
1129     search_expr_free(e);
1130     split(copy, cb, rock);
1131 }
1132 
split(search_expr_t * e,void (* cb)(const char *,search_expr_t *,search_expr_t *,void *),void * rock)1133 static void split(search_expr_t *e,
1134                   void (*cb)(const char *, search_expr_t *, search_expr_t *, void *),
1135                   void *rock)
1136 {
1137     search_expr_t *child;
1138 
1139     if (e->op == SEOP_OR) {
1140         /* top level node */
1141         while ((child = detachp(&e->children)) != NULL)
1142             split(child, cb, rock);
1143         search_expr_free(e);
1144     }
1145     else if (e->op == SEOP_AND) {
1146         search_expr_t **prevp;
1147         search_expr_t **folder_prevp = NULL;
1148         int nfolders = 0;
1149         int nindexes = 0;
1150 
1151         for (prevp = &e->children ; *prevp ; prevp = &(*prevp)->next) {
1152             if (is_indexed_node(*prevp)) {
1153                 nindexes++;
1154             }
1155             if (is_folder_node(*prevp)) {
1156                 nfolders++;
1157                 folder_prevp = prevp;
1158             }
1159         }
1160         if (nindexes) {
1161             /*
1162              * The presence of indexable fields overrides all other
1163              * considerations; we assume it's easier to consult the
1164              * index and then filter out folders later.  Note that this
1165              * assumption is broken for SQUAT which is per-folder.
1166              *
1167              * We remove the indexed matches from the conjunction and
1168              * build a new conjunction containing only those matches.
1169              * Note that this assumes the search engine does not give
1170              * false positives, which is also broken for SQUAT.
1171              */
1172             search_expr_t *indexed = search_expr_new(NULL, SEOP_AND);
1173 
1174             for (prevp = &e->children ; *prevp ; ) {
1175                 if (is_indexed_node(*prevp))
1176                     append(indexed, detachp(prevp));
1177                 else
1178                     prevp = &(*prevp)->next;
1179             }
1180             search_expr_normalise(&indexed);    /* in case of a trivial AND */
1181             if (!e->children) {
1182                 search_expr_free(e);
1183                 e = search_expr_new(NULL, SEOP_TRUE);
1184             }
1185             else
1186                 search_expr_normalise(&e);
1187             cb(NULL, indexed, e, rock);
1188         }
1189         else if (!nfolders) {
1190             /* No positive folder match: whole expression applies
1191              * to all folders */
1192             cb(NULL, NULL, e, rock);
1193         }
1194         else {
1195             /* One or more folder matches; Extract the first folder
1196              * to split the expression. Any remaining folder matches
1197              * will be evaluated in the subtree. */
1198             child = detachp(folder_prevp);
1199             /* normalise the remaining subtree */
1200             search_expr_normalise(&e);
1201             cb(child->value.s, NULL, e, rock);
1202             search_expr_free(child);
1203         }
1204     }
1205     else if (is_folder_node(e)) {
1206         cb(e->value.s, NULL, search_expr_new(NULL, SEOP_TRUE), rock);
1207         search_expr_free(e);
1208     }
1209     else if (is_indexed_node(e)) {
1210         cb(NULL, e, search_expr_new(NULL, SEOP_TRUE), rock);
1211     }
1212     else {
1213         cb(NULL, NULL, e, rock);
1214     }
1215 }
1216 
1217 /* ====================================================================== */
1218 
search_list_match(message_t * m,const union search_value * v,void * internalised,void * data1)1219 static int search_list_match(message_t *m,
1220                              const union search_value *v __attribute__((unused)),
1221                              void *internalised,
1222                              void *data1)
1223 {
1224     int r;
1225     struct buf buf = BUF_INITIALIZER;
1226     int (*getter)(message_t *, struct buf *) = (int(*)(message_t *, struct buf *))data1;
1227     strarray_t *internal = internalised;
1228 
1229     // XXX - this should never happen
1230 
1231     r = getter(m, &buf);
1232     if (!r && buf.len) {
1233         const char *val = buf_cstring(&buf);
1234         r = (strarray_find(internal, val, 0) >= 0) ? 1 : 0;
1235     }
1236     else
1237         r = 0;
1238     buf_free(&buf);
1239 
1240     return r;
1241 }
1242 
search_list_serialise(struct buf * b,const union search_value * v)1243 static void search_list_serialise(struct buf *b, const union search_value *v)
1244 {
1245     buf_putc(b, '(');
1246     int i;
1247     for (i = 0; i < strarray_size(v->list); i++) {
1248         if (i) buf_putc(b, ' ');
1249         buf_putc(b, '"');
1250         buf_appendcstr(b, strarray_nth(v->list, i));
1251         buf_putc(b, '"');
1252     }
1253     buf_putc(b, ')');
1254 }
1255 
search_list_unserialise(struct protstream * prot,union search_value * v)1256 static int search_list_unserialise(struct protstream *prot, union search_value *v)
1257 {
1258     int c;
1259     char tmp[1024];
1260 
1261     c = prot_getc(prot);
1262     if (c != '(') return EOF;
1263 
1264     v->list = strarray_new();
1265     do {
1266         c = getseword(prot, tmp, sizeof(tmp));
1267         strarray_append(v->list, tmp);
1268     } while (c == ' ');
1269 
1270     if (c != ')') return EOF;
1271 
1272     return prot_getc(prot);
1273 }
1274 
search_list_internalise(struct index_state * state,const union search_value * v,void ** internalisedp)1275 static void search_list_internalise(struct index_state *state __attribute__((unused)),
1276                                       const union search_value *v, void **internalisedp)
1277 {
1278     if (*internalisedp) *internalisedp = NULL;
1279     if (v) *internalisedp = v->list;
1280 }
1281 
search_list_duplicate(union search_value * new,const union search_value * old)1282 static void search_list_duplicate(union search_value *new,
1283                                   const union search_value *old)
1284 {
1285     new->list = strarray_dup(old->list);
1286 }
1287 
search_list_free(union search_value * v)1288 static void search_list_free(union search_value *v)
1289 {
1290     strarray_free(v->list);
1291     v->list = NULL;
1292 }
1293 
1294 /* ====================================================================== */
1295 
1296 struct search_string_internal {
1297     comp_pat *pat;
1298     char *s;
1299 };
1300 
search_string_match(message_t * m,const union search_value * v,void * internalised,void * data1)1301 static int search_string_match(message_t *m,
1302                                 const union search_value *v __attribute__((unused)),
1303                                 void *internalised,
1304                                 void *data1)
1305 {
1306     int r;
1307     struct buf buf = BUF_INITIALIZER;
1308     int (*getter)(message_t *, struct buf *) = (int(*)(message_t *, struct buf *))data1;
1309     struct search_string_internal *internal = internalised;
1310 
1311     r = getter(m, &buf);
1312     if (!r && buf.len)
1313         r = charset_searchstring(internal->s, internal->pat, buf.s, buf.len, charset_flags);
1314     else
1315         r = 0;
1316     buf_free(&buf);
1317 
1318     return r;
1319 }
1320 
search_string_serialise(struct buf * b,const union search_value * v)1321 static void search_string_serialise(struct buf *b, const union search_value *v)
1322 {
1323     buf_printf(b, "\"%s\"", v->s);
1324 }
1325 
search_string_unserialise(struct protstream * prot,union search_value * v)1326 static int search_string_unserialise(struct protstream *prot, union search_value *v)
1327 {
1328     int c;
1329     char tmp[1024];
1330 
1331     c = getseword(prot, tmp, sizeof(tmp));
1332     v->s = xstrdup(tmp);
1333     return c;
1334 }
1335 
search_string_internalise(struct index_state * state,const union search_value * v,void ** internalisedp)1336 static void search_string_internalise(struct index_state *state __attribute__((unused)),
1337                                       const union search_value *v, void **internalisedp)
1338 {
1339     if (*internalisedp) {
1340         struct search_string_internal *internal = *internalisedp;
1341         charset_freepat(internal->pat);
1342         free(internal->s);
1343         free(internal);
1344         *internalisedp = NULL;
1345     }
1346     if (v) {
1347         struct search_string_internal *internal = xzmalloc(sizeof(struct search_string_internal));
1348         charset_t utf8 = charset_lookupname("utf8");
1349         internal->s = charset_convert(v->s, utf8, charset_flags);
1350         internal->pat = charset_compilepat(internal->s);
1351         charset_free(&utf8);
1352         *internalisedp = internal;
1353     }
1354 }
1355 
search_string_duplicate(union search_value * new,const union search_value * old)1356 static void search_string_duplicate(union search_value *new,
1357                                     const union search_value *old)
1358 {
1359     new->s = xstrdup(old->s);
1360 }
1361 
search_string_free(union search_value * v)1362 static void search_string_free(union search_value *v)
1363 {
1364     free(v->s);
1365     v->s = NULL;
1366 }
1367 
1368 /* ====================================================================== */
1369 
search_listid_match(message_t * m,const union search_value * v,void * internalised,void * data1)1370 static int search_listid_match(message_t *m, const union search_value *v,
1371                                void *internalised,
1372                                void *data1 __attribute__((unused)))
1373 {
1374     int r;
1375     struct buf buf = BUF_INITIALIZER;
1376     comp_pat *pat = (comp_pat *)internalised;
1377 
1378     r = message_get_listid(m, &buf);
1379     if (!r) {
1380         r = charset_searchstring(v->s, pat, buf.s, buf.len, charset_flags);
1381         if (r) goto out;    // success
1382     }
1383 
1384     r = message_get_mailinglist(m, &buf);
1385     if (!r) {
1386         r = charset_searchstring(v->s, pat, buf.s, buf.len, charset_flags);
1387         if (r) goto out;    // success
1388     }
1389 
1390     r = 0;  // failure
1391 
1392 out:
1393     buf_free(&buf);
1394     return r;
1395 }
1396 
1397 /* ====================================================================== */
1398 
search_contenttype_match(message_t * m,const union search_value * v,void * internalised,void * data1)1399 static int search_contenttype_match(message_t *m, const union search_value *v,
1400                                     void *internalised,
1401                                     void *data1 __attribute__((unused)))
1402 {
1403     int r;
1404     comp_pat *pat = (comp_pat *)internalised;
1405     strarray_t types = STRARRAY_INITIALIZER;
1406     int i;
1407     char combined[128];
1408 
1409     if (!message_get_leaf_types(m, &types)) {
1410         for (i = 0 ; i < types.count ; i+= 2) {
1411             const char *type = types.data[i];
1412             const char *subtype = types.data[i+1];
1413 
1414             /* match against type */
1415             r = charset_searchstring(v->s, pat, type, strlen(type), charset_flags);
1416             if (r) goto out;    // success
1417 
1418             /* match against subtype */
1419             r = charset_searchstring(v->s, pat, subtype, strlen(subtype), charset_flags);
1420             if (r) goto out;    // success
1421 
1422             /* match against combined type_subtype */
1423             snprintf(combined, sizeof(combined), "%s_%s", type, subtype);
1424             r = charset_searchstring(v->s, pat, combined, strlen(combined), charset_flags);
1425             if (r) goto out;    // success
1426         }
1427     }
1428 
1429     r = 0;  // failure
1430 
1431 out:
1432     strarray_fini(&types);
1433     return r;
1434 }
1435 
1436 /* ====================================================================== */
1437 
search_header_match(message_t * m,const union search_value * v,void * internalised,void * data1)1438 static int search_header_match(message_t *m, const union search_value *v,
1439                                void *internalised,
1440                                void *data1)
1441 {
1442     int r;
1443     struct buf buf = BUF_INITIALIZER;
1444     const char *field = (const char *)data1;
1445     struct search_string_internal *internal = internalised;
1446 
1447     /* XXX MESSAGE_MULTIPLE is not actually implemented, but we still
1448      * XXX search all the values because it always returns all the values!
1449      */
1450     r = message_get_field(m, field,
1451                           MESSAGE_DECODED|MESSAGE_APPEND|MESSAGE_MULTIPLE,
1452                           &buf);
1453     if (!r) {
1454         if (*v->s) {
1455             r = charset_searchstring(internal->s, internal->pat, buf.s, buf.len, charset_flags);
1456         }
1457         else {
1458             /* RFC3501: If the string to search is zero-length, this matches
1459              * all messages that have a header line with the specified
1460              * field-name regardless of the contents. */
1461             r = buf.len ? 1 : 0;
1462         }
1463     }
1464     else
1465         r = 0;
1466     buf_free(&buf);
1467 
1468     return r;
1469 }
1470 
1471 /* ====================================================================== */
1472 
internalise_sequence(const union search_value * v,void ** internalisedp,unsigned maxval)1473 static void internalise_sequence(const union search_value *v,
1474                                  void **internalisedp, unsigned maxval)
1475 {
1476     if (*internalisedp) {
1477         seqset_free(*internalisedp);
1478         *internalisedp = NULL;
1479     }
1480     if (v) {
1481         *internalisedp = seqset_parse(v->s, NULL, maxval);
1482     }
1483 }
1484 
search_msgno_internalise(struct index_state * state,const union search_value * v,void ** internalisedp)1485 static void search_msgno_internalise(struct index_state *state,
1486                                      const union search_value *v, void **internalisedp)
1487 {
1488     internalise_sequence(v, internalisedp, (state ? state->exists : 0));
1489 }
1490 
search_uid_internalise(struct index_state * state,const union search_value * v,void ** internalisedp)1491 static void search_uid_internalise(struct index_state *state,
1492                                    const union search_value *v, void **internalisedp)
1493 {
1494     internalise_sequence(v, internalisedp, (state ? state->last_uid : 0));
1495 }
1496 
search_seq_match(message_t * m,const union search_value * v,void * internalised,void * data1)1497 static int search_seq_match(message_t *m,
1498                             const union search_value *v __attribute__((unused)),
1499                             void *internalised,
1500                             void *data1)
1501 {
1502     struct seqset *seq = internalised;
1503     int r;
1504     uint32_t u;
1505     int (*getter)(message_t *, uint32_t *) = (int(*)(message_t *, uint32_t *))data1;
1506 
1507     r = getter(m, &u);
1508     if (!r)
1509         r = seqset_ismember(seq, u);
1510     else
1511         r = 0;
1512 
1513     return r;
1514 }
1515 
search_seq_serialise(struct buf * b,const union search_value * v)1516 static void search_seq_serialise(struct buf *b, const union search_value *v)
1517 {
1518     buf_appendcstr(b, v->s);
1519 }
1520 
1521 /* ====================================================================== */
1522 
search_flags_match(message_t * m,const union search_value * v,void * internalised,void * data1)1523 static int search_flags_match(message_t *m, const union search_value *v,
1524                               void *internalised __attribute__((unused)),
1525                               void *data1)
1526 {
1527     int r;
1528     uint32_t u;
1529     int (*getter)(message_t *, uint32_t *) = (int(*)(message_t *, uint32_t *))data1;
1530 
1531     r = getter(m, &u);
1532     if (!r)
1533         r = !!(v->u & u);
1534     else
1535         r = 0;
1536 
1537     return r;
1538 }
1539 
search_systemflags_serialise(struct buf * b,const union search_value * v)1540 static void search_systemflags_serialise(struct buf *b, const union search_value *v)
1541 {
1542     if ((v->u & FLAG_ANSWERED))
1543         buf_appendcstr(b, "\\Answered");
1544     if ((v->u & FLAG_FLAGGED))
1545         buf_appendcstr(b, "\\Flagged");
1546     if ((v->u & FLAG_DELETED))
1547         buf_appendcstr(b, "\\Deleted");
1548     if ((v->u & FLAG_DRAFT))
1549         buf_appendcstr(b, "\\Draft");
1550     if ((v->u & FLAG_SEEN))
1551         buf_appendcstr(b, "\\Seen");
1552 }
1553 
search_systemflags_unserialise(struct protstream * prot,union search_value * v)1554 static int search_systemflags_unserialise(struct protstream *prot, union search_value *v)
1555 {
1556     int c;
1557     char tmp[64];
1558 
1559     c = getseword(prot, tmp, sizeof(tmp));
1560 
1561     if (!strcasecmp(tmp, "\\Answered"))
1562         v->u = FLAG_ANSWERED;
1563     else if (!strcasecmp(tmp, "\\Flagged"))
1564         v->u = FLAG_FLAGGED;
1565     else if (!strcasecmp(tmp, "\\Deleted"))
1566         v->u = FLAG_DELETED;
1567     else if (!strcasecmp(tmp, "\\Draft"))
1568         v->u = FLAG_DRAFT;
1569     else if (!strcasecmp(tmp, "\\Seen"))
1570         v->u = FLAG_SEEN;
1571     else
1572         return EOF;
1573     return c;
1574 }
1575 
search_indexflags_serialise(struct buf * b,const union search_value * v)1576 static void search_indexflags_serialise(struct buf *b, const union search_value *v)
1577 {
1578     if ((v->u & MESSAGE_SEEN))
1579         buf_appendcstr(b, "\\Seen");
1580     if ((v->u & MESSAGE_RECENT))
1581         buf_appendcstr(b, "\\Recent");
1582 }
1583 
search_indexflags_unserialise(struct protstream * prot,union search_value * v)1584 static int search_indexflags_unserialise(struct protstream *prot, union search_value *v)
1585 {
1586     int c;
1587     char tmp[64];
1588 
1589     c = getseword(prot, tmp, sizeof(tmp));
1590 
1591     if (!strcasecmp(tmp, "\\Seen"))
1592         v->u = MESSAGE_SEEN;
1593     else if (!strcasecmp(tmp, "\\Recent"))
1594         v->u = MESSAGE_RECENT;
1595     else
1596         return EOF;
1597     return c;
1598 }
1599 
search_indexflags_get_countability(const union search_value * v)1600 unsigned int search_indexflags_get_countability(const union search_value *v)
1601 {
1602     switch (v->u) {
1603     case MESSAGE_SEEN: return SEC_SEEN;
1604     case MESSAGE_RECENT: return SEC_RECENT;
1605     default: return SEC_UNCOUNTED;
1606     }
1607 }
1608 
1609 /* ====================================================================== */
1610 
search_keyword_internalise(struct index_state * state,const union search_value * v,void ** internalisedp)1611 static void search_keyword_internalise(struct index_state *state,
1612                                        const union search_value *v,
1613                                        void **internalisedp)
1614 {
1615     int r;
1616     int num = 0;
1617 
1618     if (state) {
1619         r = mailbox_user_flag(state->mailbox, v->s, &num, /*create*/0);
1620         if (!r)
1621             num++;
1622         else
1623             num = 0;
1624     }
1625     *internalisedp = (void*)(unsigned long)num;
1626 }
1627 
search_keyword_match(message_t * m,const union search_value * v,void * internalised,void * data1)1628 static int search_keyword_match(message_t *m,
1629                                 const union search_value *v __attribute__((unused)),
1630                                 void *internalised,
1631                                 void *data1 __attribute__((unused)))
1632 {
1633     int r;
1634     int num = (int)(unsigned long)internalised;
1635     uint32_t flags[MAX_USER_FLAGS/32];
1636 
1637     if (!num)
1638         return 0;   /* not a valid flag for this mailbox */
1639     num--;
1640 
1641     r = message_get_userflags(m, flags);
1642     if (!r)
1643         r = !!(flags[num/32] & (1<<(num % 32)));
1644     else
1645         r = 0;
1646 
1647     return r;
1648 }
1649 
1650 /* ====================================================================== */
1651 
search_time_t_cmp(message_t * m,const union search_value * v,void * internalised,void * data1)1652 static int search_time_t_cmp(message_t *m, const union search_value *v,
1653                              void *internalised __attribute__((unused)),
1654                              void *data1)
1655 {
1656     int r;
1657     time_t t;
1658     int (*getter)(message_t *, time_t *) = (int(*)(message_t *, time_t *))data1;
1659 
1660     r = getter(m, &t);
1661     if (!r) {
1662         if (t < v->t)
1663             r = -1;
1664         else if (t == v->t)
1665             r = 0;
1666         else
1667             r = 1;
1668     }
1669     else
1670         r = 0;
1671     return r;
1672 }
1673 
search_time_t_match(message_t * m,const union search_value * v,void * internalised,void * data1)1674 static int search_time_t_match(message_t *m, const union search_value *v,
1675                                void *internalised __attribute__((unused)),
1676                                void *data1)
1677 {
1678     int r;
1679     time_t t;
1680     int (*getter)(message_t *, time_t *) = (int(*)(message_t *, time_t *))data1;
1681 
1682     r = getter(m, &t);
1683     if (!r)
1684         r = (v->t == t);
1685     else
1686         r = 0;
1687 
1688     return r;
1689 }
1690 
search_time_t_serialise(struct buf * b,const union search_value * v)1691 static void search_time_t_serialise(struct buf *b, const union search_value *v)
1692 {
1693     buf_printf(b, "%lld", (long long)v->t);
1694 }
1695 
search_time_t_unserialise(struct protstream * prot,union search_value * v)1696 static int search_time_t_unserialise(struct protstream *prot, union search_value *v)
1697 {
1698     int c;
1699     char tmp[32];
1700 
1701     c = getseword(prot, tmp, sizeof(tmp));
1702     v->t = strtoll(tmp, NULL, 10);
1703     return c;
1704 }
1705 
1706 /* ====================================================================== */
1707 
search_uint64_cmp(message_t * m,const union search_value * v,void * internalised,void * data1)1708 static int search_uint64_cmp(message_t *m, const union search_value *v,
1709                              void *internalised __attribute__((unused)),
1710                              void *data1)
1711 {
1712     int r;
1713     uint64_t u;
1714     int (*getter)(message_t *, uint64_t *) = (int(*)(message_t *, uint64_t *))data1;
1715 
1716     r = getter(m, &u);
1717     if (!r) {
1718         if (u < v->u)
1719             r = -1;
1720         else if (u == v->u)
1721             r = 0;
1722         else
1723             r = 1;
1724     }
1725     else
1726         r = 0;
1727     return r;
1728 }
1729 
search_uint64_match(message_t * m,const union search_value * v,void * internalised,void * data1)1730 static int search_uint64_match(message_t *m, const union search_value *v,
1731                                void *internalised __attribute__((unused)),
1732                                void *data1)
1733 {
1734     int r;
1735     uint64_t u;
1736     int (*getter)(message_t *, uint64_t *) = (int(*)(message_t *, uint64_t *))data1;
1737 
1738     r = getter(m, &u);
1739     if (!r)
1740         r = (v->u == u);
1741     else
1742         r = 0;
1743 
1744     return r;
1745 }
1746 
search_uint64_serialise(struct buf * b,const union search_value * v)1747 static void search_uint64_serialise(struct buf *b, const union search_value *v)
1748 {
1749     buf_printf(b, "%llu", (unsigned long long)v->u);
1750 }
1751 
search_uint64_unserialise(struct protstream * prot,union search_value * v)1752 static int search_uint64_unserialise(struct protstream *prot, union search_value *v)
1753 {
1754     int c;
1755     char tmp[32];
1756 
1757     c = getseword(prot, tmp, sizeof(tmp));
1758     v->u = strtoull(tmp, NULL, 10);
1759     return c;
1760 }
1761 
1762 /* ====================================================================== */
1763 
search_cid_serialise(struct buf * b,const union search_value * v)1764 static void search_cid_serialise(struct buf *b, const union search_value *v)
1765 {
1766     buf_appendcstr(b, conversation_id_encode(v->u));
1767 }
1768 
search_cid_unserialise(struct protstream * prot,union search_value * v)1769 static int search_cid_unserialise(struct protstream *prot, union search_value *v)
1770 {
1771     int c;
1772     conversation_id_t cid;
1773     char tmp[32];
1774 
1775     c = getseword(prot, tmp, sizeof(tmp));
1776     if (!conversation_id_decode(&cid, tmp))
1777         return EOF;
1778     v->u = cid;
1779     return c;
1780 }
1781 
1782 /* ====================================================================== */
1783 
search_folder_internalise(struct index_state * state,const union search_value * v,void ** internalisedp)1784 static void search_folder_internalise(struct index_state *state,
1785                                       const union search_value *v,
1786                                       void **internalisedp)
1787 {
1788     if (state)
1789         *internalisedp = (void *)(unsigned long)(!strcmp(state->mailbox->name, v->s));
1790 }
1791 
search_folder_match(message_t * m,const union search_value * v,void * internalised,void * data1)1792 static int search_folder_match(message_t *m __attribute__((unused)),
1793                                const union search_value *v __attribute__((unused)),
1794                                void *internalised,
1795                                void *data1 __attribute__((unused)))
1796 {
1797     return (int)(unsigned long)internalised;
1798 }
1799 
search_folder_get_countability(const union search_value * v)1800 unsigned int search_folder_get_countability(const union search_value *v
1801                                             __attribute__((unused)))
1802 {
1803     return 0;
1804 }
1805 
1806 /* ====================================================================== */
1807 
search_annotation_internalise(struct index_state * state,const union search_value * v,void ** internalisedp)1808 static void search_annotation_internalise(struct index_state *state,
1809                                           const union search_value *v __attribute__((unused)),
1810                                           void **internalisedp)
1811 {
1812     if (state)
1813         *internalisedp = state->mailbox;
1814 }
1815 
1816 struct search_annot_rock {
1817     int result;
1818     const struct buf *match;
1819 };
1820 
_search_annot_match(const struct buf * match,const struct buf * value)1821 static int _search_annot_match(const struct buf *match,
1822                                const struct buf *value)
1823 {
1824     /* These cases are not explicitly defined in RFC5257 */
1825 
1826     /* NIL matches NIL and nothing else */
1827     if (match->s == NULL)
1828         return (value->s == NULL);
1829     if (value->s == NULL)
1830         return 0;
1831 
1832     /* empty matches empty and nothing else */
1833     if (match->len == 0)
1834         return (value->len == 0);
1835     if (value->len == 0)
1836         return 0;
1837 
1838     /* RFC5257 seems to define a simple CONTAINS style search */
1839     return !!memmem(value->s, value->len,
1840                     match->s, match->len);
1841 }
1842 
_search_annot_callback(const char * mboxname,uint32_t uid,const char * entry,struct attvaluelist * attvalues,void * rock)1843 static void _search_annot_callback(const char *mboxname __attribute__((unused)),
1844                                    uint32_t uid __attribute__((unused)),
1845                                    const char *entry __attribute__((unused)),
1846                                    struct attvaluelist *attvalues, void *rock)
1847 {
1848     struct search_annot_rock *sarock = rock;
1849     struct attvaluelist *l;
1850 
1851     for (l = attvalues ; l ; l = l->next) {
1852         if (_search_annot_match(sarock->match, &l->value))
1853             sarock->result = 1;
1854     }
1855 }
1856 
search_emailid_match(message_t * m,const union search_value * v,void * internalised,void * data1)1857 static int search_emailid_match(message_t *m, const union search_value *v,
1858                               void *internalised __attribute__((unused)),
1859                               void *data1 __attribute__((unused)))
1860 {
1861     const struct message_guid *guid = NULL;
1862     char emailid[26];
1863 
1864     int r = message_get_guid(m, &guid);
1865     if (r) return 0;
1866 
1867     emailid[0] = 'M';
1868     memcpy(emailid+1, message_guid_encode(guid), 24);
1869     emailid[25] = '\0';
1870 
1871     return !strcmp(v->s, emailid);
1872 }
1873 
search_threadid_match(message_t * m,const union search_value * v,void * internalised,void * data1)1874 static int search_threadid_match(message_t *m, const union search_value *v,
1875                                  void *internalised __attribute__((unused)),
1876                                  void *data1 __attribute__((unused)))
1877 {
1878     conversation_id_t cid = 0;
1879     char threadid[18];
1880 
1881     int r = message_get_cid(m, &cid);
1882     if (r) return 0;
1883 
1884     threadid[0] = 'T';
1885     memcpy(threadid+1, conversation_id_encode(cid), 16);
1886     threadid[17] = '\0';
1887 
1888     return !strcmp(v->s, threadid);
1889 }
1890 
search_annotation_match(message_t * m,const union search_value * v,void * internalised,void * data1)1891 static int search_annotation_match(message_t *m, const union search_value *v,
1892                                    void *internalised,
1893                                    void *data1 __attribute__((unused)))
1894 {
1895     struct mailbox *mailbox = (struct mailbox *)internalised;
1896     struct searchannot *sa = v->annot;
1897     strarray_t entries = STRARRAY_INITIALIZER;
1898     strarray_t attribs = STRARRAY_INITIALIZER;
1899     annotate_state_t *astate = NULL;
1900     struct search_annot_rock rock;
1901     uint32_t uid;
1902     int r;
1903 
1904     strarray_append(&entries, sa->entry);
1905     strarray_append(&attribs, sa->attrib);
1906 
1907     message_get_uid(m, &uid);
1908 
1909     r = mailbox_get_annotate_state(mailbox, uid, &astate);
1910     if (r) goto out;
1911     annotate_state_set_auth(astate, sa->isadmin,
1912                             sa->userid, sa->auth_state);
1913 
1914     memset(&rock, 0, sizeof(rock));
1915     rock.match = &sa->value;
1916 
1917     r = annotate_state_fetch(astate,
1918                              &entries, &attribs,
1919                              _search_annot_callback, &rock);
1920     if (r >= 0)
1921         r = rock.result;
1922 
1923 out:
1924     strarray_fini(&entries);
1925     strarray_fini(&attribs);
1926     return r;
1927 }
1928 
search_annotation_serialise(struct buf * b,const union search_value * v)1929 static void search_annotation_serialise(struct buf *b, const union search_value *v)
1930 {
1931     buf_printf(b, "(entry \"%s\" attrib \"%s\" value \"%s\")",
1932                 v->annot->entry, v->annot->attrib, buf_cstring(&v->annot->value));
1933 }
1934 
1935 /* Note: this won't be usable for execution as it lacks
1936  * namespace etc pointers.  Nor can it handle binary values. */
search_annotation_unserialise(struct protstream * prot,union search_value * v)1937 static int search_annotation_unserialise(struct protstream *prot, union search_value *v)
1938 {
1939     int c;
1940     char tmp[64];
1941     char entry[1024];
1942     char attrib[1024];
1943     char value[1024];
1944 
1945     c = prot_getc(prot);
1946     if (c != '(') return EOF;
1947 
1948     c = getseword(prot, tmp, sizeof(tmp));
1949     if (c != ' ') return EOF;
1950     if (strcmp(tmp, "entry")) return EOF;
1951     c = getseword(prot, entry, sizeof(entry));
1952     if (c != ' ') return EOF;
1953 
1954     c = getseword(prot, tmp, sizeof(tmp));
1955     if (c != ' ') return EOF;
1956     if (strcmp(tmp, "attrib")) return EOF;
1957     c = getseword(prot, attrib, sizeof(attrib));
1958     if (c != ' ') return EOF;
1959 
1960     c = getseword(prot, tmp, sizeof(tmp));
1961     if (c != ' ') return EOF;
1962     if (strcmp(tmp, "value")) return EOF;
1963     c = getseword(prot, value, sizeof(value));
1964     if (c != ')') return EOF;
1965 
1966     v->annot = (struct searchannot *)xzmalloc(sizeof(struct searchannot));
1967     v->annot->entry = xstrdup(entry);
1968     v->annot->attrib = xstrdup(attrib);
1969     buf_appendcstr(&v->annot->value, value);
1970     buf_cstring(&v->annot->value);
1971 
1972     c = prot_getc(prot);
1973     return c;
1974 }
1975 
search_annotation_duplicate(union search_value * new,const union search_value * old)1976 static void search_annotation_duplicate(union search_value *new,
1977                                         const union search_value *old)
1978 {
1979     new->annot = (struct searchannot *)xmemdup(old->annot, sizeof(*old->annot));
1980     new->annot->entry = xstrdup(new->annot->entry);
1981     new->annot->attrib = xstrdup(new->annot->attrib);
1982     memset(&new->annot->value, 0, sizeof(struct buf));
1983     buf_append(&new->annot->value, &old->annot->value);
1984 }
1985 
search_annotation_free(union search_value * v)1986 static void search_annotation_free(union search_value *v)
1987 {
1988     if (v->annot) {
1989         free(v->annot->entry);
1990         free(v->annot->attrib);
1991         buf_free(&v->annot->value);
1992         free(v->annot);
1993         v->annot = NULL;
1994     }
1995 }
1996 
1997 /* ====================================================================== */
1998 
1999 struct conv_rock {
2000     struct conversations_state *cstate;
2001     int cstate_is_ours;
2002     int num;        /* -1=invalid, 0=\Seen, 1+=index into cstate->counted_flags+1 */
2003 };
2004 
2005 static void conv_rock_new(struct mailbox *mailbox,
2006                           struct conv_rock **rockp);
2007 static void conv_rock_free(struct conv_rock **rockp);
2008 
search_convflags_internalise(struct index_state * state,const union search_value * v,void ** internalisedp)2009 static void search_convflags_internalise(struct index_state *state,
2010                                          const union search_value *v,
2011                                          void **internalisedp)
2012 {
2013     struct conv_rock **rockp = (struct conv_rock **)internalisedp;
2014     struct conv_rock *rock;
2015 
2016     conv_rock_free(rockp);
2017 
2018     if (state) {
2019         conv_rock_new(state->mailbox, rockp);
2020         rock = *rockp;
2021         if (rock->cstate) {
2022             if (!strcasecmp(v->s, "\\Seen")) {
2023                 rock->num = 0;
2024             }
2025             else if (!rock->cstate->counted_flags) {
2026                 rock->num = -1;
2027             }
2028             else {
2029                 rock->num = strarray_find_case(rock->cstate->counted_flags, v->s, 0);
2030                 /* rock->num might be -1 invalid */
2031                 if (rock->num >= 0)
2032                     rock->num++;
2033             }
2034         }
2035     }
2036 }
2037 
search_convflags_match(message_t * m,const union search_value * v,void * internalised,void * data1)2038 static int search_convflags_match(message_t *m,
2039                                   const union search_value *v __attribute__((unused)),
2040                                   void *internalised,
2041                                   void *data1 __attribute__((unused)))
2042 {
2043     struct conv_rock *rock = (struct conv_rock *)internalised;
2044     conversation_id_t cid = NULLCONVERSATION;
2045     conversation_t conv = CONVERSATION_INIT;
2046     int r = 0; /* invalid flag name */
2047 
2048     if (!rock->cstate) return 0;
2049 
2050     message_get_cid(m, &cid);
2051     if (conversation_load_advanced(rock->cstate, cid, &conv, /*flags*/0)) return 0;
2052     if (!conv.exists) return 0;
2053 
2054     if (rock->num == 0)
2055         r = (conv.unseen != conv.exists);
2056     else if (rock->num > 0)
2057         r = !!conv.counts[rock->num-1];
2058 
2059     conversation_fini(&conv);
2060     return r;
2061 }
2062 
search_allconvflags_match(message_t * m,const union search_value * v,void * internalised,void * data1)2063 static int search_allconvflags_match(message_t *m,
2064                                      const union search_value *v __attribute__((unused)),
2065                                      void *internalised,
2066                                      void *data1 __attribute__((unused)))
2067 {
2068     struct conv_rock *rock = (struct conv_rock *)internalised;
2069     conversation_id_t cid = NULLCONVERSATION;
2070     conversation_t conv = CONVERSATION_INIT;
2071     int r = 0; /* invalid flag name */
2072 
2073     if (!rock->cstate) return 0;
2074 
2075     message_get_cid(m, &cid);
2076     if (conversation_load_advanced(rock->cstate, cid, &conv, /*flags*/0)) return 0;
2077     if (!conv.exists) return 0;
2078 
2079     if (rock->num == 0)
2080         r = !conv.unseen;
2081     else if (rock->num > 0)
2082         r = (conv.counts[rock->num-1] == conv.exists);
2083 
2084     conversation_fini(&conv);
2085     return r;
2086 }
2087 
search_convflags_get_countability(const union search_value * v)2088 unsigned int search_convflags_get_countability(const union search_value *v)
2089 {
2090     if (!strcasecmp(v->s, "\\Seen"))
2091         return SEC_CONVSEEN;
2092     return SEC_UNCOUNTED;
2093 }
2094 
search_convmodseq_internalise(struct index_state * state,const union search_value * v,void ** internalisedp)2095 static void search_convmodseq_internalise(struct index_state *state,
2096                                           const union search_value *v __attribute__((unused)),
2097                                           void **internalisedp)
2098 {
2099     struct conv_rock **rockp = (struct conv_rock **)internalisedp;
2100 
2101     conv_rock_free(rockp);
2102 
2103     if (state) {
2104         conv_rock_new(state->mailbox, rockp);
2105     }
2106 }
2107 
search_convmodseq_match(message_t * m,const union search_value * v,void * internalised,void * data1)2108 static int search_convmodseq_match(message_t *m, const union search_value *v,
2109                                    void *internalised,
2110                                    void *data1 __attribute__((unused)))
2111 {
2112     struct conv_rock *rock = (struct conv_rock *)internalised;
2113     conversation_id_t cid = NULLCONVERSATION;
2114     conversation_t conv = CONVERSATION_INIT;
2115     int r;
2116 
2117     if (!rock->cstate) return 0;
2118 
2119     message_get_cid(m, &cid);
2120     if (conversation_load_advanced(rock->cstate, cid, &conv, /*flags*/0)) return 0;
2121     if (!conv.exists) return 0;
2122 
2123     r = (v->u == conv.modseq);
2124 
2125     conversation_fini(&conv);
2126     return r;
2127 }
2128 
conv_rock_new(struct mailbox * mailbox,struct conv_rock ** rockp)2129 static void conv_rock_new(struct mailbox *mailbox,
2130                           struct conv_rock **rockp)
2131 {
2132     struct conv_rock *rock = xzmalloc(sizeof(*rock));
2133 
2134     rock->cstate = conversations_get_mbox(mailbox->name);
2135     if (!rock->cstate) {
2136         if (conversations_open_mbox(mailbox->name, 1/*shared*/, &rock->cstate))
2137             rock->num = -1;         /* invalid */
2138         else
2139             rock->cstate_is_ours = 1;
2140     }
2141 
2142     *rockp = rock;
2143 }
2144 
conv_rock_free(struct conv_rock ** rockp)2145 static void conv_rock_free(struct conv_rock **rockp)
2146 {
2147     struct conv_rock *rock = *rockp;
2148     if (rock) {
2149         if (rock->cstate_is_ours)
2150             conversations_abort(&rock->cstate);
2151         free(rock);
2152         *rockp = NULL;
2153     }
2154 }
2155 
2156 
2157 /* ====================================================================== */
2158 
search_uint32_cmp(message_t * m,const union search_value * v,void * internalised,void * data1)2159 static int search_uint32_cmp(message_t *m, const union search_value *v,
2160                              void *internalised __attribute__((unused)),
2161                              void *data1)
2162 {
2163     int r;
2164     uint32_t u;
2165     int (*getter)(message_t *, uint32_t *) = (int(*)(message_t *, uint32_t *))data1;
2166 
2167     r = getter(m, &u);
2168     if (!r) {
2169         if (u < v->u)
2170             r = -1;
2171         else if (u == v->u)
2172             r = 0;
2173         else
2174             r = 1;
2175     }
2176     else
2177         r = 0;
2178     return r;
2179 }
2180 
search_uint32_match(message_t * m,const union search_value * v,void * internalised,void * data1)2181 static int search_uint32_match(message_t *m, const union search_value *v,
2182                                void *internalised __attribute__((unused)),
2183                                void *data1)
2184 {
2185     int r;
2186     uint32_t u;
2187     int (*getter)(message_t *, uint32_t *) = (int(*)(message_t *, uint32_t *))data1;
2188 
2189     r = getter(m, &u);
2190     if (!r)
2191         r = (v->u == u);
2192     else
2193         r = 0;
2194     return r;
2195 }
2196 
search_uint32_serialise(struct buf * b,const union search_value * v)2197 static void search_uint32_serialise(struct buf *b, const union search_value *v)
2198 {
2199     buf_printf(b, "%u", (uint32_t)v->u);
2200 }
2201 
search_uint32_unserialise(struct protstream * prot,union search_value * v)2202 static int search_uint32_unserialise(struct protstream *prot, union search_value *v)
2203 {
2204     int c;
2205     char tmp[32];
2206 
2207     c = getseword(prot, tmp, sizeof(tmp));
2208     v->u = strtoul(tmp, NULL, 10);
2209     return c;
2210 }
2211 
search_percent_serialise(struct buf * b,const union search_value * v)2212 static void search_percent_serialise(struct buf *b, const union search_value *v)
2213 {
2214     buf_printf(b, "%0.2f", ((float)v->u / 100));
2215 }
2216 
search_percent_unserialise(struct protstream * prot,union search_value * v)2217 static int search_percent_unserialise(struct protstream *prot, union search_value *v)
2218 {
2219     int c;
2220     char tmp[32];
2221 
2222     c = getseword(prot, tmp, sizeof(tmp));
2223     v->u = (int)((atof(tmp) * 100) + 0.5);
2224     return c;
2225 }
2226 
2227 /* ====================================================================== */
2228 
2229 /*
2230  * Search part of a message for a substring.
2231  */
2232 
2233 struct searchmsg_rock
2234 {
2235     const char *substr;
2236     comp_pat *pat;
2237     int skipheader;
2238     int result;
2239 };
2240 
searchmsg_cb(int isbody,charset_t charset,int encoding,const char * type,const char * subtype,const struct param * type_params,const char * disposition,const struct param * disposition_params,const struct message_guid * content_guid,const char * part,struct buf * data,void * rock)2241 static int searchmsg_cb(int isbody, charset_t charset, int encoding,
2242                         const char *type __attribute__((unused)),
2243                         const char *subtype __attribute__((unused)),
2244                         const struct param *type_params __attribute__((unused)),
2245                         const char *disposition __attribute__((unused)),
2246                         const struct param *disposition_params __attribute__((unused)),
2247                         const struct message_guid *content_guid __attribute__((unused)),
2248                         const char *part __attribute__((unused)),
2249                         struct buf *data, void *rock)
2250 {
2251     struct searchmsg_rock *sr = (struct searchmsg_rock *)rock;
2252 
2253     if (!isbody) {
2254         /* header-like */
2255         if (sr->skipheader) {
2256             sr->skipheader = 0; /* Only skip top-level message header */
2257             return 0;
2258         }
2259         sr->result = charset_search_mimeheader(sr->substr, sr->pat,
2260                                                buf_cstring(data), charset_flags);
2261     }
2262     else {
2263         /* body-like */
2264         if (charset == CHARSET_UNKNOWN_CHARSET) return 0;
2265         sr->result = charset_searchfile(sr->substr, sr->pat,
2266                                         data->s, data->len,
2267                                         charset, encoding, charset_flags);
2268     }
2269     if (sr->result) return 1; /* found it, exit early */
2270     return 0;
2271 }
2272 
search_text_match(message_t * m,const union search_value * v,void * internalised,void * data1)2273 static int search_text_match(message_t *m,
2274                              const union search_value *v __attribute__((unused)),
2275                              void *internalised,
2276                              void *data1)
2277 {
2278     struct searchmsg_rock sr;
2279     struct search_string_internal *internal = internalised;
2280 
2281     sr.substr = internal->s;
2282     sr.pat = internal->pat;
2283     sr.skipheader = (int)(unsigned long)data1;
2284     sr.result = 0;
2285     message_foreach_section(m, searchmsg_cb, &sr);
2286     return sr.result;
2287 }
2288 
2289 /* ====================================================================== */
2290 
2291 static hash_table attrs_by_name = HASH_TABLE_INITIALIZER;
2292 
2293 static int search_attr_initialized = 0;
2294 
done_cb(void * rock)2295 static void done_cb(void *rock __attribute__((unused))) {
2296     /* do nothing */
2297 }
2298 
init_internal()2299 static void init_internal() {
2300     if (!search_attr_initialized) {
2301         search_attr_init();
2302         cyrus_modules_add(done_cb, NULL);
2303     }
2304 }
2305 
2306 /*
2307  * Call search_attr_init() before doing any work with search
2308  * expressions.
2309  */
search_attr_init(void)2310 EXPORTED void search_attr_init(void)
2311 {
2312     unsigned int i;
2313 
2314     static const search_attr_t attrs[] = {
2315         {
2316             "bcclist",
2317             SEA_FUZZABLE|SEA_ISLIST,
2318             SEARCH_PART_BCC,
2319             SEARCH_COST_CACHE,
2320             search_list_internalise,
2321             /*cmp*/NULL,
2322             search_list_match,
2323             search_list_serialise,
2324             search_list_unserialise,
2325             /*get_countability*/NULL,
2326             search_list_duplicate,
2327             search_list_free,
2328             (void *)message_get_bcc
2329         },{
2330             "cclist",
2331             SEA_FUZZABLE|SEA_ISLIST,
2332             SEARCH_PART_CC,
2333             SEARCH_COST_CACHE,
2334             search_list_internalise,
2335             /*cmp*/NULL,
2336             search_list_match,
2337             search_list_serialise,
2338             search_list_unserialise,
2339             /*get_countability*/NULL,
2340             search_list_duplicate,
2341             search_list_free,
2342             (void *)message_get_cc
2343         },{
2344             "fromlist",
2345             SEA_FUZZABLE|SEA_ISLIST,
2346             SEARCH_PART_FROM,
2347             SEARCH_COST_CACHE,
2348             search_list_internalise,
2349             /*cmp*/NULL,
2350             search_list_match,
2351             search_list_serialise,
2352             search_list_unserialise,
2353             /*get_countability*/NULL,
2354             search_list_duplicate,
2355             search_list_free,
2356             (void *)message_get_from
2357         },{
2358             "tolist",
2359             SEA_FUZZABLE|SEA_ISLIST,
2360             SEARCH_PART_TO,
2361             SEARCH_COST_CACHE,
2362             search_list_internalise,
2363             /*cmp*/NULL,
2364             search_list_match,
2365             search_list_serialise,
2366             search_list_unserialise,
2367             /*get_countability*/NULL,
2368             search_list_duplicate,
2369             search_list_free,
2370             (void *)message_get_to
2371         },{
2372             "bcc",
2373             SEA_FUZZABLE,
2374             SEARCH_PART_BCC,
2375             SEARCH_COST_CACHE,
2376             search_string_internalise,
2377             /*cmp*/NULL,
2378             search_string_match,
2379             search_string_serialise,
2380             search_string_unserialise,
2381             /*get_countability*/NULL,
2382             search_string_duplicate,
2383             search_string_free,
2384             (void *)message_get_bcc
2385         },{
2386             "cc",
2387             SEA_FUZZABLE,
2388             SEARCH_PART_CC,
2389             SEARCH_COST_CACHE,
2390             search_string_internalise,
2391             /*cmp*/NULL,
2392             search_string_match,
2393             search_string_serialise,
2394             search_string_unserialise,
2395             /*get_countability*/NULL,
2396             search_string_duplicate,
2397             search_string_free,
2398             (void *)message_get_cc
2399         },{
2400             "from",
2401             SEA_FUZZABLE,
2402             SEARCH_PART_FROM,
2403             SEARCH_COST_CACHE,
2404             search_string_internalise,
2405             /*cmp*/NULL,
2406             search_string_match,
2407             search_string_serialise,
2408             search_string_unserialise,
2409             /*get_countability*/NULL,
2410             search_string_duplicate,
2411             search_string_free,
2412             (void *)message_get_from
2413         },{
2414             "message-id",
2415             /*flags*/0,
2416             SEARCH_PART_NONE,
2417             SEARCH_COST_CACHE,
2418             search_string_internalise,
2419             /*cmp*/NULL,
2420             search_string_match,
2421             search_string_serialise,
2422             search_string_unserialise,
2423             /*get_countability*/NULL,
2424             search_string_duplicate,
2425             search_string_free,
2426             (void *)message_get_messageid
2427         },{
2428             "listid",
2429             SEA_FUZZABLE,
2430             SEARCH_PART_LISTID,
2431             SEARCH_COST_CACHE,
2432             search_string_internalise,
2433             /*cmp*/NULL,
2434             search_listid_match,
2435             search_string_serialise,
2436             search_string_unserialise,
2437             /*get_countability*/NULL,
2438             search_string_duplicate,
2439             search_string_free,
2440             NULL
2441         },{
2442             "contenttype",
2443             SEA_FUZZABLE,
2444             SEARCH_PART_TYPE,
2445             SEARCH_COST_CACHE,
2446             search_string_internalise,
2447             /*cmp*/NULL,
2448             search_contenttype_match,
2449             search_string_serialise,
2450             search_string_unserialise,
2451             /*get_countability*/NULL,
2452             search_string_duplicate,
2453             search_string_free,
2454             NULL
2455         },{
2456             "subject",
2457             SEA_FUZZABLE,
2458             SEARCH_PART_SUBJECT,
2459             SEARCH_COST_CACHE,
2460             search_string_internalise,
2461             /*cmp*/NULL,
2462             search_string_match,
2463             search_string_serialise,
2464             search_string_unserialise,
2465             /*get_countability*/NULL,
2466             search_string_duplicate,
2467             search_string_free,
2468             (void *)message_get_subject
2469         },{
2470             "to",
2471             SEA_FUZZABLE,
2472             SEARCH_PART_TO,
2473             SEARCH_COST_CACHE,
2474             search_string_internalise,
2475             /*cmp*/NULL,
2476             search_string_match,
2477             search_string_serialise,
2478             search_string_unserialise,
2479             /*get_countability*/NULL,
2480             search_string_duplicate,
2481             search_string_free,
2482             (void *)message_get_to
2483         },{
2484             "msgno",
2485             SEA_MUTABLE,
2486             SEARCH_PART_NONE,
2487             SEARCH_COST_INDEX,
2488             search_msgno_internalise,
2489             /*cmp*/NULL,
2490             search_seq_match,
2491             search_seq_serialise,
2492             search_string_unserialise,
2493             /*get_countability*/NULL,
2494             search_string_duplicate,
2495             search_string_free,
2496             (void *)message_get_msgno
2497         },{
2498             "uid",
2499             /*flags*/0,
2500             SEARCH_PART_NONE,
2501             SEARCH_COST_INDEX,
2502             search_uid_internalise,
2503             /*cmp*/NULL,
2504             search_seq_match,
2505             search_seq_serialise,
2506             search_string_unserialise,
2507             /*get_countability*/NULL,
2508             search_string_duplicate,
2509             search_string_free,
2510             (void *)message_get_uid
2511         },{
2512             "systemflags",
2513             SEA_MUTABLE,
2514             SEARCH_PART_NONE,
2515             SEARCH_COST_INDEX,
2516             /*internalise*/NULL,
2517             /*cmp*/NULL,
2518             search_flags_match,
2519             search_systemflags_serialise,
2520             search_systemflags_unserialise,
2521             /*get_countability*/NULL,
2522             /*duplicate*/NULL,
2523             /*free*/NULL,
2524             (void *)message_get_systemflags
2525         },{
2526             "indexflags",
2527             SEA_MUTABLE,
2528             SEARCH_PART_NONE,
2529             SEARCH_COST_INDEX,
2530             /*internalise*/NULL,
2531             /*cmp*/NULL,
2532             search_flags_match,
2533             search_indexflags_serialise,
2534             search_indexflags_unserialise,
2535             search_indexflags_get_countability,
2536             /*duplicate*/NULL,
2537             /*free*/NULL,
2538             (void *)message_get_indexflags
2539         },{
2540             "keyword",
2541             SEA_MUTABLE,
2542             SEARCH_PART_NONE,
2543             SEARCH_COST_INDEX,
2544             search_keyword_internalise,
2545             /*cmp*/NULL,
2546             search_keyword_match,
2547             search_string_serialise,
2548             search_string_unserialise,
2549             /*get_countability*/NULL,
2550             search_string_duplicate,
2551             search_string_free,
2552             NULL
2553         },{
2554             "convflags",
2555             SEA_MUTABLE,
2556             SEARCH_PART_NONE,
2557             SEARCH_COST_CONV,
2558             search_convflags_internalise,
2559             /*cmp*/NULL,
2560             search_convflags_match,
2561             search_string_serialise,
2562             search_string_unserialise,
2563             search_convflags_get_countability,
2564             search_string_duplicate,
2565             search_string_free,
2566             NULL
2567         },{
2568             "allconvflags",
2569             SEA_MUTABLE,
2570             SEARCH_PART_NONE,
2571             SEARCH_COST_CONV,
2572             search_convflags_internalise,
2573             /*cmp*/NULL,
2574             search_allconvflags_match,
2575             search_string_serialise,
2576             search_string_unserialise,
2577             search_convflags_get_countability,
2578             search_string_duplicate,
2579             search_string_free,
2580             NULL
2581         },{
2582             "convmodseq",
2583             SEA_MUTABLE,
2584             SEARCH_PART_NONE,
2585             SEARCH_COST_CONV,
2586             search_convmodseq_internalise,
2587             /*cmp*/NULL,
2588             search_convmodseq_match,
2589             search_uint64_serialise,
2590             search_uint64_unserialise,
2591             /*get_countability*/NULL,
2592             /*duplicate*/NULL,
2593             /*free*/NULL,
2594             NULL
2595         },{
2596             "modseq",
2597             SEA_MUTABLE,
2598             SEARCH_PART_NONE,
2599             SEARCH_COST_INDEX,
2600             /*internalise*/NULL,
2601             search_uint64_cmp,
2602             search_uint64_match,
2603             search_uint64_serialise,
2604             search_uint64_unserialise,
2605             /*get_countability*/NULL,
2606             /*duplicate*/NULL,
2607             /*free*/NULL,
2608             (void *)message_get_modseq
2609         },{
2610             "cid",
2611             SEA_MUTABLE,
2612             SEARCH_PART_NONE,
2613             SEARCH_COST_INDEX,
2614             /*internalise*/NULL,
2615             search_uint64_cmp,
2616             search_uint64_match,
2617             search_cid_serialise,
2618             search_cid_unserialise,
2619             /*get_countability*/NULL,
2620             /*duplicate*/NULL,
2621             /*free*/NULL,
2622             (void *)message_get_cid
2623         },{
2624             "emailid",
2625             /* flags */0,
2626             SEARCH_PART_NONE,
2627             SEARCH_COST_INDEX,
2628             /*internalise*/NULL,
2629             /*cmp*/ NULL,
2630             search_emailid_match,
2631             search_string_serialise,
2632             search_string_unserialise,
2633             /*get_countability*/NULL,
2634             search_string_duplicate,
2635             search_string_free,
2636             (void *)NULL
2637         },{
2638             "threadid",
2639             /* flags */0,
2640             SEARCH_PART_NONE,
2641             SEARCH_COST_INDEX,
2642             /*internalise*/NULL,
2643             /*cmp*/ NULL,
2644             search_threadid_match,
2645             search_string_serialise,
2646             search_string_unserialise,
2647             /*get_countability*/NULL,
2648             search_string_duplicate,
2649             search_string_free,
2650             (void *)NULL
2651         },{
2652             "folder",
2653             /*flags*/0,
2654             SEARCH_PART_NONE,
2655             SEARCH_COST_NONE,
2656             search_folder_internalise,
2657             /*cmp*/NULL,
2658             search_folder_match,
2659             search_string_serialise,
2660             search_string_unserialise,
2661             search_folder_get_countability,
2662             search_string_duplicate,
2663             search_string_free,
2664             (void *)NULL
2665         },{
2666             "annotation",
2667             SEA_MUTABLE,
2668             SEARCH_PART_NONE,
2669             SEARCH_COST_ANNOT,
2670             search_annotation_internalise,
2671             /*cmp*/NULL,
2672             search_annotation_match,
2673             search_annotation_serialise,
2674             search_annotation_unserialise,
2675             /*get_countability*/NULL,
2676             search_annotation_duplicate,
2677             search_annotation_free,
2678             (void *)NULL
2679         },{
2680             "size",
2681             /*flags*/0,
2682             SEARCH_PART_NONE,
2683             SEARCH_COST_INDEX,
2684             /*internalise*/NULL,
2685             search_uint32_cmp,
2686             search_uint32_match,
2687             search_uint32_serialise,
2688             search_uint32_unserialise,
2689             /*get_countability*/NULL,
2690             /*duplicate*/NULL,
2691             /*free*/NULL,
2692             (void *)message_get_size
2693         },{
2694             "internaldate",
2695             /*flags*/0,
2696             SEARCH_PART_NONE,
2697             SEARCH_COST_INDEX,
2698             /*internalise*/NULL,
2699             search_time_t_cmp,
2700             search_time_t_match,
2701             search_time_t_serialise,
2702             search_time_t_unserialise,
2703             /*get_countability*/NULL,
2704             /*duplicate*/NULL,
2705             /*free*/NULL,
2706             (void *)message_get_internaldate
2707         },{
2708             "savedate",
2709             /*flags*/0,
2710             SEARCH_PART_NONE,
2711             SEARCH_COST_INDEX,
2712             /*internalise*/NULL,
2713             search_time_t_cmp,
2714             search_time_t_match,
2715             search_time_t_serialise,
2716             search_time_t_unserialise,
2717             /*get_countability*/NULL,
2718             /*duplicate*/NULL,
2719             /*free*/NULL,
2720             (void *)message_get_savedate
2721         },{
2722             "indexversion",
2723             /*flags*/0,
2724             SEARCH_PART_NONE,
2725             SEARCH_COST_NONE,
2726             /*internalise*/NULL,
2727             search_uint32_cmp,
2728             search_uint32_match,
2729             search_uint32_serialise,
2730             search_uint32_unserialise,
2731             /*get_countability*/NULL,
2732             /*duplicate*/NULL,
2733             /*free*/NULL,
2734             (void *)message_get_indexversion
2735         },{
2736             "sentdate",
2737             /*flags*/0,
2738             SEARCH_PART_NONE,
2739             SEARCH_COST_INDEX,
2740             /*internalise*/NULL,
2741             search_time_t_cmp,
2742             search_time_t_match,
2743             search_time_t_serialise,
2744             search_time_t_unserialise,
2745             /*get_countability*/NULL,
2746             /*duplicate*/NULL,
2747             /*free*/NULL,
2748             (void *)message_get_sentdate
2749         },{
2750             "spamscore",
2751             /*flags*/0,
2752             SEARCH_PART_NONE,
2753             SEARCH_COST_INDEX,
2754             /*internalise*/NULL,
2755             search_uint32_cmp,
2756             search_uint32_match,
2757             search_percent_serialise,
2758             search_percent_unserialise,
2759             /*get_countability*/NULL,
2760             /*duplicate*/NULL,
2761             /*free*/NULL,
2762             (void *)message_get_spamscore
2763         },{
2764             "body",
2765             SEA_FUZZABLE,
2766             SEARCH_PART_BODY,
2767             SEARCH_COST_BODY,
2768             search_string_internalise,
2769             /*cmp*/NULL,
2770             search_text_match,
2771             search_string_serialise,
2772             search_string_unserialise,
2773             /*get_countability*/NULL,
2774             search_string_duplicate,
2775             search_string_free,
2776             (void *)1       /* skipheader flag */
2777         },{
2778             "text",
2779             SEA_FUZZABLE,
2780             SEARCH_PART_ANY,
2781             SEARCH_COST_BODY,
2782             search_string_internalise,
2783             /*cmp*/NULL,
2784             search_text_match,
2785             search_string_serialise,
2786             search_string_unserialise,
2787             /*get_countability*/NULL,
2788             search_string_duplicate,
2789             search_string_free,
2790             (void *)0       /* skipheader flag */
2791         },{
2792             "date",
2793             /*flags*/0,
2794             SEARCH_PART_NONE,
2795             SEARCH_COST_INDEX,
2796             /*internalise*/NULL,
2797             search_time_t_cmp,
2798             search_time_t_match,
2799             search_time_t_serialise,
2800             search_time_t_unserialise,
2801             /*get_countability*/NULL,
2802             /*duplicate*/NULL,
2803             /*free*/NULL,
2804             (void *)message_get_gmtime
2805         },{
2806             "location",     /* for iCalendar */
2807             SEA_FUZZABLE,
2808             SEARCH_PART_LOCATION,
2809             SEARCH_COST_BODY,
2810             search_string_internalise,
2811             /*cmp*/NULL,
2812             search_string_match,
2813             search_string_serialise,
2814             search_string_unserialise,
2815             /*get_countability*/NULL,
2816             search_string_duplicate,
2817             search_string_free,
2818             (void *)0
2819         },{
2820             "attachmentname",
2821             SEA_FUZZABLE,
2822             SEARCH_PART_ATTACHMENTNAME,
2823             SEARCH_COST_BODY,
2824             search_string_internalise,
2825             /*cmp*/NULL,
2826             search_string_match,
2827             search_string_serialise,
2828             search_string_unserialise,
2829             /*get_countability*/NULL,
2830             search_string_duplicate,
2831             search_string_free,
2832             (void *)0
2833         },{
2834             "attachmentbody",
2835             SEA_FUZZABLE,
2836             SEARCH_PART_ATTACHMENTBODY,
2837             SEARCH_COST_BODY,
2838             search_string_internalise,
2839             /*cmp*/NULL,
2840             search_text_match,
2841             search_string_serialise,
2842             search_string_unserialise,
2843             /*get_countability*/NULL,
2844             search_string_duplicate,
2845             search_string_free,
2846             (void *)0       /* skipheader flag */
2847         }
2848     };
2849 
2850     construct_hash_table(&attrs_by_name, VECTOR_SIZE(attrs), 0);
2851     for (i = 0 ; i < VECTOR_SIZE(attrs) ; i++)
2852         hash_insert(attrs[i].name, (void *)&attrs[i], &attrs_by_name);
2853 
2854     search_attr_initialized = 1;
2855 }
2856 
2857 /*
2858  * Find and return a search attribute by name.  Used when building
2859  * comparison nodes in a search expression tree.  Name comparison is
2860  * case insensitive.  Returns a pointer to static data or NULL if there
2861  * is no attribute of the given name.
2862  */
search_attr_find(const char * name)2863 EXPORTED const search_attr_t *search_attr_find(const char *name)
2864 {
2865     char tmp[128];
2866 
2867     init_internal();
2868 
2869     strlcpy(tmp, name, sizeof(tmp));
2870     lcase(tmp);
2871     return hash_lookup(tmp, &attrs_by_name);
2872 }
2873 
2874 /*
2875  * Find and return a search attribute for the named header field.  Used
2876  * when building comparison nodes for the HEADER search criterion in a
2877  * search expression tree.  Field name comparison is case insensitive.
2878  * Returns a pointer to internally managed data or NULL if there is no
2879  * attribute of the given name.
2880  */
search_attr_find_field(const char * field)2881 EXPORTED const search_attr_t *search_attr_find_field(const char *field)
2882 {
2883     search_attr_t *attr;
2884     char *key = NULL;
2885     static const search_attr_t proto = {
2886         "name",
2887         SEA_FUZZABLE,
2888         SEARCH_PART_NONE,
2889         SEARCH_COST_NONE,
2890         search_string_internalise,
2891         /*cmp*/NULL,
2892         search_header_match,
2893         search_string_serialise,
2894         search_string_unserialise,
2895         /*get_countability*/NULL,
2896         search_string_duplicate,
2897         search_string_free,
2898         NULL
2899     };
2900 
2901     init_internal();
2902 
2903     /* some header fields can be reduced to search terms */
2904     if (!strcasecmp(field, "bcc") ||
2905         !strcasecmp(field, "cc") ||
2906         !strcasecmp(field, "to") ||
2907         !strcasecmp(field, "from") ||
2908         !strcasecmp(field, "subject") ||
2909         !strcasecmp(field, "message-id"))
2910         return search_attr_find(field);
2911 
2912     key = lcase(strconcat("header:", field, (char *)NULL));
2913     attr = (search_attr_t *)hash_lookup(key, &attrs_by_name);
2914 
2915     if (!attr) {
2916         attr = (search_attr_t *)xzmalloc(sizeof(search_attr_t));
2917         *attr = proto;
2918         attr->name = key;
2919         attr->cost = (mailbox_cached_header(field) != BIT32_MAX)
2920                    ? SEARCH_COST_CACHE : SEARCH_COST_BODY;
2921         attr->part = (config_getswitch(IMAPOPT_SEARCH_INDEX_HEADERS)
2922                         ? SEARCH_PART_HEADERS : -1);
2923         attr->data1 = strchr(key, ':')+1;
2924         hash_insert(attr->name, (void *)attr, &attrs_by_name);
2925         key = NULL;     /* attr takes this over */
2926     }
2927 
2928     free(key);
2929     return attr;
2930 }
2931 
2932 /*
2933  * Return non-zero if the given attribute may be used with a
2934  * SEOP_FUZZYMATCH operation.
2935  */
search_attr_is_fuzzable(const search_attr_t * attr)2936 EXPORTED int search_attr_is_fuzzable(const search_attr_t *attr)
2937 {
2938     return (attr->part != SEARCH_PART_NONE &&
2939             (attr->flags & SEA_FUZZABLE));
2940 }
2941 
search_attr_cost(const search_attr_t * attr)2942 EXPORTED enum search_cost search_attr_cost(const search_attr_t *attr)
2943 {
2944     return attr->cost;
2945 }
2946 
2947