1 /* squatter.c -- SQUAT-based message indexing tool
2  *
3  * Copyright (c) 1994-2012 Carnegie Mellon University.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * 3. The name "Carnegie Mellon University" must not be used to
18  *    endorse or promote products derived from this software without
19  *    prior written permission. For permission or any legal
20  *    details, please contact
21  *      Carnegie Mellon University
22  *      Center for Technology Transfer and Enterprise Creation
23  *      4615 Forbes Avenue
24  *      Suite 302
25  *      Pittsburgh, PA  15213
26  *      (412) 268-7393, fax: (412) 268-7395
27  *      innovation@andrew.cmu.edu
28  *
29  * 4. Redistributions of any form whatsoever must retain the following
30  *    acknowledgment:
31  *    "This product includes software developed by Computing Services
32  *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
33  *
34  * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
35  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
36  * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
37  * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
38  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
39  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
40  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
41  */
42 
43 /*
44   This is the tool that creates/updates search indexes for Cyrus mailboxes.
45 
46   Despite the name, it handles whichever search engine in configured
47   by the 'search_engine' option in imapd.conf.
48 */
49 
50 #include <config.h>
51 
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <stdlib.h>
56 #include <stdio.h>
57 #include <sys/stat.h>
58 #include <sys/types.h>
59 #include <sys/poll.h>
60 #include <errno.h>
61 #include <fcntl.h>
62 #include <sysexits.h>
63 #include <syslog.h>
64 #include <string.h>
65 #include <getopt.h>
66 
67 #include "annotate.h"
68 #include "assert.h"
69 #include "bitvector.h"
70 #include "bsearch.h"
71 #include "mboxlist.h"
72 #include "global.h"
73 #include "search_engines.h"
74 #include "sync_log.h"
75 #include "mailbox.h"
76 #include "xmalloc.h"
77 #include "xstrlcpy.h"
78 #include "xstrlcat.h"
79 #include "ptrarray.h"
80 #include "tok.h"
81 #include "acl.h"
82 #include "seen.h"
83 #include "mboxname.h"
84 #include "index.h"
85 #include "message.h"
86 #include "util.h"
87 
88 /* generated headers are not necessarily in current directory */
89 #include "imap/imap_err.h"
90 
91 extern char *optarg;
92 extern int optind;
93 
94 /* current namespace */
95 static struct namespace squat_namespace;
96 
97 static int verbose = 0;
98 static int skip_unmodified = -1;
99 static int incremental_mode = 0;
100 static int xapindexed_mode = 0;
101 static int recursive_flag = 0;
102 static int annotation_flag = 0;
103 static int sleepmicroseconds = 0;
104 static int allow_partials = 0;
105 static int allow_duplicateparts = 0;
106 static int reindex_partials = 0;
107 static int reindex_minlevel = 0;
108 static search_text_receiver_t *rx = NULL;
109 
110 static strarray_t *skip_domains = NULL;
111 static strarray_t *skip_users = NULL;
112 
113 static const char *name_starts_from = NULL;
114 
115 static void shut_down(int code) __attribute__((noreturn));
116 
usage(const char * name)117 __attribute__((noreturn)) static int usage(const char *name)
118 {
119     fprintf(stderr,
120             "usage: %s [mode] [options] [source]\n"
121             "\n"
122             "Mode flags: \n"
123             "  none                         index [source] (default)\n"
124             "  -a, --squat-annot            index [source] using /squat annotations\n"
125             "  -r, --recursive              index [source] recursively\n"
126             "  -f, --synclog=FILE           index from synclog file\n"
127             "  -R, --rolling                start rolling indexer\n"
128             "  -z, --compact=TIER           compact to TIER\n"
129             "  -l, --list                   list paths\n"
130             "  -A, --audit                  report unindexed messages\n"
131             "\n"
132             "Index mode options:\n"
133             "  -i, --incremental            index incrementally\n"
134             "  -p, --allow-partials         allow partially indexed messages\n"
135             "  -P, --reindex-partials       reindex partially indexed messages (implies -Z)\n"
136             "  -L, --reindex-minlevel=LEVEL reindex messages where indexlevel < LEVEL (implies -Z)\n"
137             "  -N, --name=NAME              index mailbox names starting with NAME\n"
138             "  -S, --sleep=SECONDS          sleep SECONDS between indexing mailboxes\n"
139             "  -Z, --internalindex          Xapian: use internal index rather than cyrus.indexed.db\n"
140             "  -s, --squat-skip[=DELTA]     skip unmodified mailboxes (requires squat backend)\n"
141             "\n"
142             "Index sources:\n"
143             "  none                         all mailboxes (default)\n"
144             "  mailbox...                   index mailboxes\n"
145             "  -u, --user=USER...           index mailboxes of USER\n"
146             "\n"
147             "Rolling indexer options:\n"
148             "  -n, --channel=CHANNEL        listen to CHANNEL\n"
149             "  -d, --nodaemon               don't background process\n"
150             "\n"
151             "Compact mode options:\n"
152             "  -t, --srctier=TIER,...       compact from TIER\n"
153             "  -F, --filter                 filter during compaction\n"
154             "  -T, --reindex-tier=TIER,...  reindex TIER\n"
155             "  -X, --reindex                reindex during compaction\n"
156             "  -o, --copydb                 copy db rather compacting\n"
157             "  -U, --only-upgrade           only compact if re-indexing\n"
158             " --B, --skip-locked            skip users that are locked by another process\n"
159             "\n"
160             "General options:\n"
161             "  -v, --verbose                be verbose\n"
162             "  -h, --help                   show usage\n",
163         name);
164 
165     exit(EX_USAGE);
166 }
167 
168 /* ====================================================================== */
169 
become_daemon(void)170 static void become_daemon(void)
171 {
172     pid_t pid;
173     int nfds = getdtablesize();
174     int nullfd;
175     int fd;
176 
177     nullfd = open("/dev/null", O_RDWR, 0);
178     if (nullfd < 0) {
179         perror("/dev/null");
180         exit(1);
181     }
182     dup2(nullfd, STDIN_FILENO);
183     dup2(nullfd, STDOUT_FILENO);
184     dup2(nullfd, STDERR_FILENO);
185     for (fd = 3 ; fd < nfds ; fd++)
186         close(fd);          /* this will close nullfd too */
187 
188     pid = fork();
189     if (pid == -1) {
190         perror("fork");
191         exit(1);
192     }
193 
194     if (pid)
195         exit(0); /* parent */
196 }
197 
should_index(const char * name)198 static int should_index(const char *name)
199 {
200     // skip early users
201     if (strcmpsafe(name, name_starts_from) < 0)
202         return 0;
203 
204     int ret = 1;
205     mbentry_t *mbentry = NULL;
206     mbname_t *mbname = mbname_from_intname(name);
207     /* Skip remote mailboxes */
208     int r = mboxlist_lookup(name, &mbentry, NULL);
209     if (r) {
210         /* Convert internal name to external */
211         char *extname = mboxname_to_external(name, &squat_namespace, NULL);
212         if (verbose) {
213             printf("error looking up %s: %s\n",
214                    extname, error_message(r));
215         }
216         syslog(LOG_INFO, "error looking up %s: %s\n",
217                extname, error_message(r));
218 
219         free(extname);
220         ret = 0;
221         goto done;
222     }
223 
224     // skip remote or not-real mailboxes
225     if (mbentry->mbtype & (MBTYPE_REMOTE|MBTYPE_DELETED|MBTYPE_INTERMEDIATE)) {
226         ret = 0;
227         goto done;
228     }
229 
230     // skip email submissions
231     if (mboxname_issubmissionmailbox(mbentry->name, mbentry->mbtype)) {
232         ret = 0;
233         goto done;
234     }
235 
236     // skip COLLECTION mailboxes (just files)
237     if (mbentry->mbtype & MBTYPE_COLLECTION) {
238         ret = 0;
239         goto done;
240     }
241 
242     // skip deleted mailboxes
243     if (mbname_isdeleted(mbname)) {
244         ret = 0;
245         goto done;
246     }
247 
248     // skip listed domains
249     if (mbname_domain(mbname) && skip_domains &&
250         strarray_find(skip_domains, mbname_domain(mbname), 0) >= 0) {
251         ret = 0;
252         goto done;
253     }
254 
255     // skip listed users
256     if (mbname_userid(mbname) && skip_users &&
257         strarray_find(skip_users, mbname_userid(mbname), 0) >= 0) {
258         ret = 0;
259         goto done;
260     }
261 
262 done:
263     mbname_free(&mbname);
264     mboxlist_entry_free(&mbentry);
265     return ret;
266 }
267 
268 /* ====================================================================== */
269 
270 /* This is called once for each mailbox we're told to index. */
index_one(const char * name,int blocking)271 static int index_one(const char *name, int blocking)
272 {
273     struct mailbox *mailbox = NULL;
274     int r;
275     int flags = SEARCH_UPDATE_BATCH;
276 
277     if (incremental_mode)
278         flags |= SEARCH_UPDATE_INCREMENTAL;
279     if (xapindexed_mode)
280         flags |= SEARCH_UPDATE_XAPINDEXED;
281     if (allow_partials)
282         flags |= SEARCH_UPDATE_ALLOW_PARTIALS;
283     if (reindex_partials)
284         flags |= SEARCH_UPDATE_REINDEX_PARTIALS;
285     if (allow_duplicateparts)
286         flags |= SEARCH_UPDATE_ALLOW_DUPPARTS;
287 
288     /* Convert internal name to external */
289     char *extname = mboxname_to_external(name, &squat_namespace, NULL);
290 
291     /* make sure the mailbox (or an ancestor) has
292        /vendor/cmu/cyrus-imapd/squat set to "true" */
293     if (annotation_flag) {
294         char buf[MAX_MAILBOX_BUFFER] = "", *p;
295         struct buf attrib = BUF_INITIALIZER;
296         int domainlen = 0;
297 
298         if (config_virtdomains && (p = strchr(name, '!')))
299             domainlen = p - name + 1;
300 
301         strlcpy(buf, name, sizeof(buf));
302 
303         /* since mailboxes inherit /vendor/cmu/cyrus-imapd/squat,
304            we need to iterate all the way up to "" (server entry) */
305         while (1) {
306             r = annotatemore_lookup(buf, IMAP_ANNOT_NS "squat", "",
307                                     &attrib);
308 
309             if (r ||                            /* error */
310                 attrib.s ||                     /* found an entry */
311                 !buf[0]) {                      /* done recursing */
312                 break;
313             }
314 
315             p = strrchr(buf, '.');              /* find parent mailbox */
316 
317             if (p && (p - buf > domainlen))     /* don't split subdomain */
318                 *p = '\0';
319             else if (!buf[domainlen])           /* server entry */
320                 buf[0] = '\0';
321             else                                /* domain entry */
322                 buf[domainlen] = '\0';
323         }
324 
325         if (r || !attrib.s || strcasecmp(attrib.s, "true")) {
326             buf_free(&attrib);
327             free(extname);
328             return 0;
329         }
330         buf_free(&attrib);
331     }
332 
333 again:
334     if (blocking)
335         r = mailbox_open_irl(name, &mailbox);
336     else
337         r = mailbox_open_irlnb(name, &mailbox);
338 
339     if (r == IMAP_MAILBOX_LOCKED) {
340         if (verbose) syslog(LOG_INFO, "mailbox %s locked, retrying", extname);
341         free(extname);
342         return r;
343     }
344     if (r) {
345         if (verbose) {
346             printf("error opening %s: %s\n", extname, error_message(r));
347         }
348         syslog(LOG_INFO, "error opening %s: %s\n", extname, error_message(r));
349         free(extname);
350 
351         return r;
352     }
353 
354     syslog(LOG_INFO, "indexing mailbox %s... ", extname);
355     if (verbose > 0) {
356         printf("Indexing mailbox %s... ", extname);
357     }
358 
359     if (skip_unmodified >= 0) {
360         const char *fname = mailbox_meta_fname(mailbox, META_SQUAT);
361         struct stat sbuf;
362         if (!stat(fname, &sbuf) &&
363                 skip_unmodified + mailbox->index_mtime < sbuf.st_mtime) {
364             syslog(LOG_DEBUG, "Squat skipping mailbox %s", extname);
365             if (verbose > 0) {
366                 printf("Skipping mailbox %s\n", extname);
367             }
368             mailbox_close(&mailbox);
369             free(extname);
370             return 0;
371         }
372     }
373 
374     r = search_update_mailbox(rx, mailbox, reindex_minlevel, flags);
375 
376     mailbox_close(&mailbox);
377 
378     /* in non-blocking (rolling) mode, only do one batch per mailbox at
379      * a time for fairness [IRIS-2471].  The squatter will re-insert the
380      * mailbox in the queue */
381     if (blocking && r == IMAP_AGAIN) goto again;
382     free(extname);
383 
384     return r;
385 }
386 
addmbox(const mbentry_t * mbentry,void * rock)387 static int addmbox(const mbentry_t *mbentry, void *rock)
388 {
389     strarray_t *sa = (strarray_t *) rock;
390     strarray_append(sa, mbentry->name);
391     return 0;
392 }
393 
expand_mboxnames(strarray_t * sa,int nmboxnames,const char ** mboxnames,int user_mode)394 static void expand_mboxnames(strarray_t *sa, int nmboxnames,
395                              const char **mboxnames, int user_mode)
396 {
397     int i;
398 
399     if (!nmboxnames) {
400         assert(!recursive_flag);
401         mboxlist_allmbox(NULL, addmbox, sa, 0);
402     }
403 
404     for (i = 0; i < nmboxnames; i++) {
405         if (user_mode) {
406             mboxlist_usermboxtree(mboxnames[i], NULL, addmbox, sa, 0);
407         }
408         else {
409             /* Translate any separators in mailboxname */
410             char *intname = mboxname_from_external(mboxnames[i], &squat_namespace, NULL);
411             int flags = recursive_flag ? 0 : MBOXTREE_SKIP_CHILDREN;
412             mboxlist_mboxtree(intname, addmbox, sa, flags);
413             free(intname);
414         }
415 
416         /* sort mboxnames */
417         strarray_sort(sa, cmpstringp_raw);
418         /* and deduplicate */
419         strarray_uniq(sa);
420     }
421 }
422 
do_indexer(const strarray_t * mboxnames)423 static int do_indexer(const strarray_t *mboxnames)
424 {
425     int r = 0;
426     int i;
427 
428     rx = search_begin_update(verbose);
429     if (rx == NULL)
430         return 0;       /* no indexer defined */
431 
432     for (i = 0 ; i < strarray_size(mboxnames) ; i++) {
433         const char *mboxname = strarray_nth(mboxnames, i);
434         if (!should_index(mboxname)) continue;
435         r = index_one(mboxname, /*blocking*/1);
436         if (r == IMAP_MAILBOX_NONEXISTENT)
437             r = 0;
438         if (r == IMAP_MAILBOX_LOCKED)
439             r = 0; /* XXX - try again? */
440         if (r) break;
441         if (sleepmicroseconds)
442             usleep(sleepmicroseconds);
443     }
444 
445     search_end_update(rx);
446 
447     return r;
448 }
449 
squatter_build_query(search_builder_t * bx,const char * query)450 static int squatter_build_query(search_builder_t *bx, const char *query)
451 {
452     tok_t tok = TOK_INITIALIZER(query, NULL, 0);
453     char *p;
454     char *q;
455     int r = 0;
456     int part;
457     charset_t utf8 = charset_lookupname("utf-8");
458 
459     while ((p = tok_next(&tok))) {
460         if (!strncasecmp(p, "__begin:", 8)) {
461             q = p + 8;
462             if (!strcasecmp(q, "and"))
463                 bx->begin_boolean(bx, SEARCH_OP_AND);
464             else if (!strcasecmp(q, "or"))
465                 bx->begin_boolean(bx, SEARCH_OP_OR);
466             else if (!strcasecmp(q, "not"))
467                 bx->begin_boolean(bx, SEARCH_OP_NOT);
468             else
469                 goto error;
470             continue;
471         }
472         if (!strncasecmp(p, "__end:", 6)) {
473             q = p + 6;
474             if (!strcasecmp(q, "and"))
475                 bx->end_boolean(bx, SEARCH_OP_AND);
476             else if (!strcasecmp(q, "or"))
477                 bx->end_boolean(bx, SEARCH_OP_OR);
478             else if (!strcasecmp(q, "not"))
479                 bx->end_boolean(bx, SEARCH_OP_NOT);
480             else
481                 goto error;
482             continue;
483         }
484 
485         /* everything else is a ->match() of some kind */
486         q = strchr(p, ':');
487         if (q) q++;
488         if (!q) {
489             part = SEARCH_PART_ANY;
490             q = p;
491         }
492         else if (!strncasecmp(p, "to:", 3))
493             part = SEARCH_PART_TO;
494         else if (!strncasecmp(p, "from:", 5))
495             part = SEARCH_PART_FROM;
496         else if (!strncasecmp(p, "cc:", 3))
497             part = SEARCH_PART_CC;
498         else if (!strncasecmp(p, "bcc:", 4))
499             part = SEARCH_PART_BCC;
500         else if (!strncasecmp(p, "subject:", 8))
501             part = SEARCH_PART_SUBJECT;
502         else if (!strncasecmp(p, "listid:", 7))
503             part = SEARCH_PART_LISTID;
504         else if (!strncasecmp(p, "contenttype:", 12))
505             part = SEARCH_PART_TYPE;
506         else if (!strncasecmp(p, "header:", 7))
507             part = SEARCH_PART_HEADERS;
508         else if (!strncasecmp(p, "body:", 5))
509             part = SEARCH_PART_BODY;
510         else
511             goto error;
512 
513         q = charset_convert(q, utf8, charset_flags);
514         bx->match(bx, part, q);
515         free(q);
516     }
517     r = 0;
518 
519 out:
520     charset_free(&utf8);
521     tok_fini(&tok);
522     return r;
523 
524 error:
525     syslog(LOG_ERR, "bad query expression at \"%s\"", p);
526     r = IMAP_PROTOCOL_ERROR;
527     goto out;
528 }
529 
print_search_hit(const char * mboxname,uint32_t uidvalidity,uint32_t uid,const strarray_t * partids,void * rock)530 static int print_search_hit(const char *mboxname, uint32_t uidvalidity,
531                             uint32_t uid,
532                             const strarray_t *partids __attribute__((unused)),
533                             void *rock)
534 {
535     int single = *(int *)rock;
536 
537     if (single)
538         printf("uid %u\n", uid);
539     else
540         printf("mailbox %s\nuidvalidity %u\nuid %u\n", mboxname, uidvalidity, uid);
541     return 0;
542 }
543 
do_list(const strarray_t * mboxnames)544 static int do_list(const strarray_t *mboxnames)
545 {
546     char *prev_userid = NULL;
547     strarray_t files = STRARRAY_INITIALIZER;
548     int i;
549     int r = 0;
550 
551     for (i = 0; i < strarray_size(mboxnames); i++) {
552         const char *mboxname = strarray_nth(mboxnames, i);
553         char *userid = mboxname_to_userid(mboxname);
554         if (!userid) continue;
555 
556         if (!strcmpsafe(prev_userid, userid)) {
557             free(userid);
558             continue;
559         }
560 
561         r = search_list_files(userid, &files);
562         if (r) break;
563 
564         int j;
565         for (j = 0; j < strarray_size(&files); j++) {
566             printf("%s\n", strarray_nth(&files, j));
567         }
568 
569         strarray_truncate(&files, 0);
570 
571         free(prev_userid);
572         prev_userid = userid;
573 
574         if (sleepmicroseconds)
575             usleep(sleepmicroseconds);
576     }
577 
578     strarray_fini(&files);
579     free(prev_userid);
580     return r;
581 }
582 
compact_mbox(const char * userid,const strarray_t * reindextiers,const strarray_t * srctiers,const char * desttier,int flags)583 static int compact_mbox(const char *userid, const strarray_t *reindextiers,
584                         const strarray_t *srctiers,
585                         const char *desttier, int flags)
586 {
587     return search_compact(userid, reindextiers, srctiers, desttier, flags);
588 }
589 
do_compact(const strarray_t * mboxnames,const strarray_t * reindextiers,const strarray_t * srctiers,const char * desttier,int flags)590 static int do_compact(const strarray_t *mboxnames, const strarray_t *reindextiers,
591                       const strarray_t *srctiers,
592                       const char *desttier, int flags)
593 {
594     char *prev_userid = NULL;
595     int i;
596 
597     for (i = 0; i < strarray_size(mboxnames); i++) {
598         const char *mboxname = strarray_nth(mboxnames, i);
599         if (!should_index(mboxname)) continue;
600         char *userid = mboxname_to_userid(mboxname);
601         if (!userid) continue;
602 
603         if (!strcmpsafe(prev_userid, userid)) {
604             free(userid);
605             continue;
606         }
607 
608         int retry;
609         for (retry = 1; retry <= 3; retry++) {
610             int r = compact_mbox(userid, reindextiers, srctiers, desttier, flags);
611             if (!r) break;
612             xsyslog(LOG_ERR, "IOERROR: failed to compact",
613                              "userid=<%s> retry=<%d> error=<%s>",
614                              userid, retry, error_message(r));
615         }
616 
617         free(prev_userid);
618         prev_userid = userid;
619 
620         if (sleepmicroseconds)
621             usleep(sleepmicroseconds);
622     }
623 
624     free(prev_userid);
625     return 0;
626 }
627 
do_search(const char * query,int single,const strarray_t * mboxnames)628 static int do_search(const char *query, int single, const strarray_t *mboxnames)
629 {
630     struct mailbox *mailbox = NULL;
631     int i;
632     int r;
633     search_builder_t *bx;
634     int opts = SEARCH_VERBOSE(verbose);
635 
636     if (!single)
637         opts |= SEARCH_MULTIPLE;
638 
639     for (i = 0 ; i < mboxnames->count ; i++) {
640         const char *mboxname = mboxnames->data[i];
641         if (!should_index(mboxname)) continue;
642 
643         r = mailbox_open_irl(mboxname, &mailbox);
644         if (r) {
645             fprintf(stderr, "Cannot open mailbox %s: %s\n",
646                     mboxname, error_message(r));
647             continue;
648         }
649         if (single)
650             printf("mailbox %s\n", mboxname);
651 
652         bx = search_begin_search(mailbox, opts);
653         if (bx) {
654             r = squatter_build_query(bx, query);
655             if (!r)
656                 bx->run(bx, print_search_hit, &single);
657             search_end_search(bx);
658         }
659 
660         mailbox_close(&mailbox);
661     }
662 
663     return 0;
664 }
665 
read_sync_log_items(sync_log_reader_t * slr)666 static strarray_t *read_sync_log_items(sync_log_reader_t *slr)
667 {
668     const char *args[3];
669     strarray_t *mboxnames = strarray_new();
670 
671     while (sync_log_reader_getitem(slr, args) == 0) {
672         if (!strcmp(args[0], "APPEND")) {
673             strarray_append(mboxnames, args[1]);
674         }
675         else if (!strcmp(args[0], "USER"))
676             mboxlist_usermboxtree(args[1], NULL, addmbox, mboxnames, /*flags*/0);
677     }
678 
679     return mboxnames;
680 }
681 
do_synclogfile(const char * synclogfile)682 static int do_synclogfile(const char *synclogfile)
683 {
684     strarray_t *mboxnames = NULL;
685     sync_log_reader_t *slr;
686     int nskipped = 0;
687     int i;
688     int r;
689 
690     slr = sync_log_reader_create_with_filename(synclogfile);
691     r = sync_log_reader_begin(slr);
692     if (r) goto out;
693     mboxnames = read_sync_log_items(slr);
694     sync_log_reader_end(slr);
695 
696     /* sort mboxnames for locality of reference in file processing mode */
697     strarray_sort(mboxnames, cmpstringp_raw);
698     /* and deduplicate */
699     strarray_uniq(mboxnames);
700 
701     signals_poll();
702 
703     /* have some due items in the queue, try to index them */
704     rx = search_begin_update(verbose);
705     if (NULL == rx) {
706         r = 1;
707         goto out;
708     }
709     for (i = 0; i < strarray_size(mboxnames); i++) {
710         const char *mboxname = strarray_nth(mboxnames, i);
711         if (!should_index(mboxname)) continue;
712         if (verbose > 1)
713             syslog(LOG_INFO, "do_synclogfile: indexing %s", mboxname);
714         r = index_one(mboxname, /*blocking*/1);
715         if (r == IMAP_MAILBOX_NONEXISTENT)
716             r = 0;
717         if (r == IMAP_MAILBOX_LOCKED || r == IMAP_AGAIN) {
718             nskipped++;
719             if (nskipped > 10000) {
720                 xsyslog(LOG_ERR, "IOERROR: skipped too many times",
721                                  "mailbox=<%s>", mboxname);
722                 break;
723             }
724             r = 0;
725             /* try again at the end */
726             strarray_append(mboxnames, mboxname);
727         }
728         if (r) {
729             xsyslog(LOG_ERR, "IOERROR: failed to index",
730                              "mailbox=<%s> error=<%s>",
731                              mboxname, error_message(r));
732             break;
733         }
734         if (sleepmicroseconds)
735             usleep(sleepmicroseconds);
736     }
737     search_end_update(rx);
738     rx = NULL;
739 
740 out:
741     strarray_free(mboxnames);
742     sync_log_reader_free(slr);
743     return r;
744 }
745 
do_rolling(const char * channel)746 static void do_rolling(const char *channel)
747 {
748     strarray_t *mboxnames = NULL;
749     sync_log_reader_t *slr;
750     int i;
751     int r;
752 
753     slr = sync_log_reader_create_with_channel(channel);
754 
755     for (;;) {
756         int sig = signals_poll();
757 
758         if (sig == SIGHUP && getenv("CYRUS_ISDAEMON")) {
759             syslog(LOG_DEBUG, "received SIGHUP, shutting down gracefully\n");
760             sync_log_reader_end(slr);
761             shut_down(0);
762         }
763 
764         if (shutdown_file(NULL, 0))
765             shut_down(EX_TEMPFAIL);
766 
767         r = sync_log_reader_begin(slr);
768         if (r) { /* including IMAP_AGAIN */
769             usleep(100000);    /* 1/10th second */
770             continue;
771         }
772 
773         mboxnames = read_sync_log_items(slr);
774 
775         if (mboxnames->count) {
776             /* sort mboxnames for locality of reference in file processing mode */
777             strarray_sort(mboxnames, cmpstringp_raw);
778             /* and deduplicate */
779             strarray_uniq(mboxnames);
780 
781             /* have some due items in the queue, try to index them */
782             rx = search_begin_update(verbose);
783             if (NULL == rx) {
784                 /* XXX if xapian, probably don't have conversations enabled? */
785                 fatal("could not construct search text receiver", EX_CONFIG);
786             }
787             for (i = 0; i < strarray_size(mboxnames); i++) {
788                 const char *mboxname = strarray_nth(mboxnames, i);
789                 if (!should_index(mboxname)) continue;
790                 if (verbose > 1)
791                     syslog(LOG_INFO, "do_rolling: indexing %s", mboxname);
792                 r = index_one(mboxname, /*blocking*/0);
793                 if (r == IMAP_AGAIN || r == IMAP_MAILBOX_LOCKED) {
794                     /* XXX: alternative, just append to strarray_t *mboxnames ... */
795                     sync_log_channel_append(channel, mboxname);
796                 }
797                 else if (r == IMAP_MAILBOX_NONEXISTENT) {
798                     /* should_index() checked for this, but we lost a race.
799                      * not an IOERROR, just annoying!
800                      */
801                     syslog(LOG_DEBUG, "skipping nonexistent mailbox: %s", mboxname);
802                 }
803                 else if (r) {
804                     xsyslog(LOG_ERR, "IOERROR: failed to index and forgetting",
805                                      "mailbox=<%s> error=<%s>",
806                                      mboxname, error_message(r));
807                 }
808                 if (sleepmicroseconds)
809                     usleep(sleepmicroseconds);
810             }
811             search_end_update(rx);
812             rx = NULL;
813         }
814 
815         strarray_free(mboxnames);
816         mboxnames = NULL;
817     }
818 
819     /* XXX - we don't really get here... */
820     strarray_free(mboxnames);
821     sync_log_reader_free(slr);
822 }
823 
audit_one(const char * mboxname,bitvector_t * unindexed)824 static int audit_one(const char *mboxname, bitvector_t *unindexed)
825 {
826     int r2, r = 0;
827     struct mailbox *mailbox = NULL;
828 
829     r = mailbox_open_irl(mboxname, &mailbox);
830     if (r) goto done;
831 
832     r = rx->begin_mailbox(rx, mailbox, SEARCH_UPDATE_AUDIT);
833     if (r) goto done;
834 
835     r = rx->audit_mailbox(rx, unindexed);
836     if (r) goto done;
837 
838 done:
839     r2 = rx->end_mailbox(rx, mailbox);
840     mailbox_close(&mailbox);
841     if (!r) r = r2;
842     return r;
843 }
844 
845 
do_audit(const strarray_t * mboxnames)846 static int do_audit(const strarray_t *mboxnames)
847 {
848     rx = search_begin_update(verbose);
849     if (rx == NULL)
850         return 0;       /* no indexer defined */
851 
852     int r = 0;
853     if (!rx->audit_mailbox) {
854         syslog(LOG_ERR, "squatter: indexer does not support audits");
855         r = IMAP_INTERNAL;
856         goto done;
857     }
858 
859     bitvector_t unindexed = BV_INITIALIZER;
860     int i;
861     for (i = 0 ; i < mboxnames->count ; i++) {
862         const char *mboxname = strarray_nth(mboxnames, i);
863         if (!should_index(mboxname)) continue;
864         r = audit_one(mboxname, &unindexed);
865         if (r == IMAP_MAILBOX_NONEXISTENT)
866             r = 0;
867         if (r == IMAP_MAILBOX_LOCKED)
868             r = 0; /* XXX - try again? */
869         if (r) break;
870         if (sleepmicroseconds)
871             usleep(sleepmicroseconds);
872 
873         if (bv_count(&unindexed)) {
874             printf("Unindexed message(s) in %s: ", mboxname);
875             int uid;
876             for (uid = bv_next_set(&unindexed, 0);
877                  uid != -1;
878                  uid = bv_next_set(&unindexed, uid+1)) {
879                 printf("%d ", uid);
880             }
881             printf("\n");
882         }
883         bv_clearall(&unindexed);
884     }
885     bv_fini(&unindexed);
886 
887 done:
888     search_end_update(rx);
889     return r;
890 }
891 
shut_down(int code)892 static void shut_down(int code)
893 {
894     seen_done();
895 
896     cyrus_done();
897 
898     index_text_extractor_destroy();
899 
900     exit(code);
901 }
902 
main(int argc,char ** argv)903 int main(int argc, char **argv)
904 {
905     int opt;
906     char *alt_config = NULL;
907     int r = IMAP_NOTFOUND;
908     strarray_t mboxnames = STRARRAY_INITIALIZER;
909     const char *query = NULL;
910     int background = 1;
911     const char *channel = "squatter";
912     const char *synclogfile = NULL;
913     int init_flags = CYRUSINIT_PERROR;
914     int multi_folder = 0;
915     int user_mode = 0;
916     int compact_flags = 0;
917     strarray_t *srctiers = NULL;
918     strarray_t *reindextiers = NULL;
919     const char *desttier = NULL;
920     char *errstr = NULL;
921     enum { UNKNOWN, INDEXER, SEARCH, ROLLING, SYNCLOG,
922            COMPACT, AUDIT, LIST } mode = UNKNOWN;
923 
924     setbuf(stdout, NULL);
925 
926     /* Keep these in alphabetic order */
927     static const char *short_options = "ABC:DFL:N:PRS:T:UXZade:f:hilmn:oprs:t:uvz:";
928 
929     /* Keep these ordered by mode */
930     static struct option long_options[] = {
931         /* audit-mode flags */
932         {"audit",  no_argument, 0, 'A' },
933 
934         /* compact-mode flags */
935         {"copydb", no_argument, 0, 'o' },
936         {"filter", no_argument, 0, 'F' },
937         {"skip-locked", no_argument, 0, 'B' },
938         {"only-upgrade", no_argument, 0, 'U' },
939         {"reindex-tier", required_argument, 0, 'T' },
940         {"srctier", required_argument, 0, 't' },
941         {"compact", required_argument, 0, 'z' },
942 
943         /* index-mode flags */
944         {"index-duplicates", no_argument, 0, 'D' },
945         {"incremental", no_argument, 0, 'i' },
946         {"allow-partials", no_argument, 0, 'p' },
947         {"name", required_argument, 0, 'N' },
948         {"internalindex", no_argument, 0, 'Z' },
949         {"user", no_argument, 0, 'u' },
950         {"reindex", no_argument, 0, 'X' },
951         {"reindex-minlevel", required_argument, 0, 'L' },
952         {"reindex-partials", no_argument, 0, 'P' },
953 
954         /* list-mode flags */
955         {"list", no_argument, 0, 'l' },
956 
957         /* rolling mode */
958         {"rolling", no_argument, 0, 'R' },
959         {"channel", required_argument, 0, 'n' },
960         {"nodaemon", no_argument, 0, 'd' },
961 
962         /* search-mode flags */
963         {"search-multifolder", no_argument, 0, 'm' },
964         {"search-term", required_argument, 0, 'e' },
965 
966         /* squat flags */
967         {"squat-annot", no_argument, 0, 'a' },
968         {"squat-skip", optional_argument, 0, 's' },
969 
970         /* synclog-mode flags */
971         {"synclog", required_argument, 0, 'f' },
972 
973         {"recursive", no_argument, 0, 'r' },
974         {"sleep", required_argument, 0, 'S' },
975 
976         /* misc */
977         {"help", no_argument, 0, 'h' },
978         {"verbose", no_argument, 0, 'v' },
979         // no long form for 'C' option
980 
981         {0, 0, 0, 0 }
982     };
983 
984     while ((opt = getopt_long(argc, argv, short_options, long_options, NULL)) != EOF) {
985         switch (opt) {
986         case 'A':
987             if (mode != UNKNOWN) usage(argv[0]);
988             mode = AUDIT;
989             break;
990 
991         case 'B':
992             compact_flags |= SEARCH_COMPACT_NONBLOCKING;
993             break;
994 
995         case 'C':               /* alt config file */
996             alt_config = optarg;
997             break;
998 
999         case 'F':
1000             compact_flags |= SEARCH_COMPACT_FILTER;
1001             break;
1002 
1003         case 'X':
1004             compact_flags |= SEARCH_COMPACT_REINDEX;
1005             break;
1006 
1007         case 'L':
1008             reindex_minlevel = atoi(optarg);
1009             if (reindex_minlevel < 1 || reindex_minlevel > SEARCH_INDEXLEVEL_MAX) {
1010                 fprintf(stderr, "%s: %s: invalid level argument\n", argv[0], optarg);
1011                 exit(EX_USAGE);
1012             }
1013             xapindexed_mode = 1;
1014             break;
1015 
1016         case 'P':
1017             reindex_partials = 1;
1018             xapindexed_mode = 1;
1019             break;
1020 
1021         case 'Z':
1022             xapindexed_mode = 1;
1023             break;
1024 
1025         case 'p':
1026             allow_partials = 1;
1027             break;
1028 
1029         case 'D':
1030             allow_duplicateparts = 1;
1031             break;
1032 
1033         case 'N':
1034             name_starts_from = optarg;
1035             break;
1036 
1037         case 'R':               /* rolling indexer */
1038             if (mode != UNKNOWN) usage(argv[0]);
1039             mode = ROLLING;
1040             incremental_mode = 1; /* always incremental if rolling */
1041             break;
1042 
1043         case 'l':               /* list paths */
1044             if (mode != UNKNOWN) usage(argv[0]);
1045             mode = LIST;
1046             break;
1047 
1048         case 'S':               /* sleep time in seconds */
1049             sleepmicroseconds = (atof(optarg) * 1000000);
1050             break;
1051 
1052         case 'd':               /* foreground (with -R) */
1053             background = 0;
1054             break;
1055 
1056         /* This option is deliberately undocumented, for testing only */
1057         case 'e':               /* add a search term */
1058             if (mode != UNKNOWN && mode != SEARCH) usage(argv[0]);
1059             query = optarg;
1060             mode = SEARCH;
1061             break;
1062 
1063         case 'f': /* alternate synclogfile used in SYNCLOG mode */
1064             synclogfile = optarg;
1065             mode = SYNCLOG;
1066             break;
1067 
1068         /* This option is deliberately undocumented, for testing only */
1069         case 'm':               /* multi-folder in SEARCH mode */
1070             if (mode != UNKNOWN && mode != SEARCH) usage(argv[0]);
1071             multi_folder = 1;
1072             mode = SEARCH;
1073             break;
1074 
1075         case 'n':               /* sync channel name (with -R) */
1076             channel = optarg;
1077             break;
1078 
1079         case 'o':               /* copy one DB rather than compressing */
1080             compact_flags |= SEARCH_COMPACT_COPYONE;
1081             break;
1082 
1083         case 'U':
1084             compact_flags |= SEARCH_COMPACT_ONLYUPGRADE;
1085             break;
1086 
1087         case 'v':               /* verbose */
1088             verbose++;
1089             break;
1090 
1091         case 'r':               /* recurse */
1092             if (mode != UNKNOWN && mode != INDEXER && mode != AUDIT) usage(argv[0]);
1093             recursive_flag = 1;
1094             if (mode == UNKNOWN) mode = INDEXER;
1095             break;
1096 
1097         case 'i':               /* incremental mode */
1098             incremental_mode = 1;
1099             break;
1100 
1101         case 'a':               /* use /squat annotation */
1102             if (mode != UNKNOWN && mode != INDEXER) usage(argv[0]);
1103             annotation_flag = 1;
1104             mode = INDEXER;
1105             break;
1106 
1107         case 's':
1108             if (mode != UNKNOWN && mode != INDEXER) usage(argv[0]);
1109             if (optarg) {
1110                 char *end;
1111                 long val = strtol(optarg, &end, 10);
1112                 if (val < 0 || val > INT_MAX || *end) {
1113                     usage(argv[0]);
1114                 }
1115                 skip_unmodified = (int) val;
1116             }
1117             else {
1118                 skip_unmodified = 60;
1119             }
1120             mode = INDEXER;
1121             break;
1122 
1123         case 'z':
1124             if (mode != UNKNOWN && mode != COMPACT) usage(argv[0]);
1125             desttier = optarg;
1126             mode = COMPACT;
1127             break;
1128 
1129         case 't':
1130             if (mode != UNKNOWN && mode != COMPACT) usage(argv[0]);
1131             srctiers = strarray_split(optarg, ",", 0);
1132             mode = COMPACT;
1133             break;
1134 
1135         case 'T':
1136             if (mode != UNKNOWN && mode != COMPACT) usage(argv[0]);
1137             reindextiers = strarray_split(optarg, ",", 0);
1138             mode = COMPACT;
1139             break;
1140 
1141         case 'u':
1142             user_mode = 1;
1143             break;
1144 
1145         case 'h':
1146         default:
1147             usage("squatter");
1148         }
1149     }
1150 
1151     if (xapindexed_mode) {
1152         /* we have two different flag types for the two different modes,
1153          * set both of them even though only one will be used */
1154         compact_flags |= SEARCH_COMPACT_XAPINDEXED;
1155     }
1156 
1157     compact_flags |= SEARCH_VERBOSE(verbose);
1158 
1159     if (mode == UNKNOWN)
1160         mode = INDEXER;
1161 
1162     if (mode == COMPACT && (!desttier || !srctiers)) {
1163         /* need both src and dest for compact */
1164         usage("squatter");
1165     }
1166 
1167     cyrus_init(alt_config, "squatter", init_flags, CONFIG_NEED_PARTITION_DATA);
1168 
1169     /* Set namespace -- force standard (internal) */
1170     if ((r = mboxname_init_namespace(&squat_namespace, 1)) != 0) {
1171         fatal(error_message(r), EX_CONFIG);
1172     }
1173 
1174     /* make sure we're correctly configured */
1175     if ((r = search_check_config(&errstr))) {
1176         if (errstr)
1177             fatal(errstr, EX_CONFIG);
1178         else
1179             fatal(error_message(r), EX_CONFIG);
1180     }
1181 
1182     if (mode == ROLLING || mode == SYNCLOG) {
1183         signals_set_shutdown(&shut_down);
1184         signals_add_handlers(0);
1185     }
1186 
1187     index_text_extractor_init(NULL);
1188 
1189     const char *conf;
1190     conf = config_getstring(IMAPOPT_SEARCH_INDEX_SKIP_DOMAINS);
1191     if (conf) skip_domains = strarray_split(conf, " ", STRARRAY_TRIM);
1192     conf = config_getstring(IMAPOPT_SEARCH_INDEX_SKIP_USERS);
1193     if (conf) skip_users = strarray_split(conf, " ", STRARRAY_TRIM);
1194 
1195     switch (mode) {
1196     case UNKNOWN:
1197         break;
1198     case INDEXER:
1199         /* -r requires at least one mailbox */
1200         if (recursive_flag && optind == argc) usage(argv[0]);
1201         expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1202         syslog(LOG_NOTICE, "indexing mailboxes");
1203         r = do_indexer(&mboxnames);
1204         syslog(LOG_NOTICE, "done indexing mailboxes");
1205         break;
1206     case SEARCH:
1207         if (recursive_flag && optind == argc) usage(argv[0]);
1208         expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1209         r = do_search(query, !multi_folder, &mboxnames);
1210         break;
1211     case ROLLING:
1212         if (background && !getenv("CYRUS_ISDAEMON"))
1213             become_daemon();
1214         do_rolling(channel);
1215         /* never returns */
1216         break;
1217     case SYNCLOG:
1218         r = do_synclogfile(synclogfile);
1219         break;
1220     case COMPACT:
1221         if (recursive_flag && optind == argc) usage(argv[0]);
1222         expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1223         r = do_compact(&mboxnames, reindextiers, srctiers, desttier, compact_flags);
1224         break;
1225     case AUDIT:
1226         if (recursive_flag && optind == argc) usage(argv[0]);
1227         expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1228         r = do_audit(&mboxnames);
1229         break;
1230     case LIST:
1231         if (recursive_flag && optind == argc) usage(argv[0]);
1232         expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1233         r = do_list(&mboxnames);
1234         break;
1235     }
1236 
1237     strarray_fini(&mboxnames);
1238     shut_down(r ? EX_TEMPFAIL : 0);
1239 }
1240