1 /* squatter.c -- SQUAT-based message indexing tool
2  *
3  * Copyright (c) 1994-2012 Carnegie Mellon University.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * 3. The name "Carnegie Mellon University" must not be used to
18  *    endorse or promote products derived from this software without
19  *    prior written permission. For permission or any legal
20  *    details, please contact
21  *      Carnegie Mellon University
22  *      Center for Technology Transfer and Enterprise Creation
23  *      4615 Forbes Avenue
24  *      Suite 302
25  *      Pittsburgh, PA  15213
26  *      (412) 268-7393, fax: (412) 268-7395
27  *      innovation@andrew.cmu.edu
28  *
29  * 4. Redistributions of any form whatsoever must retain the following
30  *    acknowledgment:
31  *    "This product includes software developed by Computing Services
32  *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
33  *
34  * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
35  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
36  * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
37  * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
38  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
39  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
40  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
41  */
42 
43 /*
44   This is the tool that creates/updates search indexes for Cyrus mailboxes.
45 
46   Despite the name, it handles whichever search engine in configured
47   by the 'search_engine' option in imapd.conf.
48 */
49 
50 #include <config.h>
51 
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <stdlib.h>
56 #include <stdio.h>
57 #include <sys/stat.h>
58 #include <sys/types.h>
59 #include <sys/poll.h>
60 #include <errno.h>
61 #include <fcntl.h>
62 #include <sysexits.h>
63 #include <syslog.h>
64 #include <string.h>
65 
66 #include "annotate.h"
67 #include "assert.h"
68 #include "bitvector.h"
69 #include "bsearch.h"
70 #include "mboxlist.h"
71 #include "global.h"
72 #include "search_engines.h"
73 #include "sync_log.h"
74 #include "mailbox.h"
75 #include "xmalloc.h"
76 #include "xstrlcpy.h"
77 #include "xstrlcat.h"
78 #include "ptrarray.h"
79 #include "tok.h"
80 #include "acl.h"
81 #include "seen.h"
82 #include "mboxname.h"
83 #include "index.h"
84 #include "message.h"
85 #include "util.h"
86 
87 /* generated headers are not necessarily in current directory */
88 #include "imap/imap_err.h"
89 
90 extern char *optarg;
91 extern int optind;
92 
93 /* current namespace */
94 static struct namespace squat_namespace;
95 
96 const int SKIP_FUZZ = 60;
97 
98 static int verbose = 0;
99 static int incremental_mode = 0;
100 static int batch_mode = 0;
101 static int xapindexed_mode = 0;
102 static int recursive_flag = 0;
103 static int annotation_flag = 0;
104 static int sleepmicroseconds = 0;
105 static const char *temp_root_dir = NULL;
106 static search_text_receiver_t *rx = NULL;
107 
108 static const char *name_starts_from = NULL;
109 
110 static void shut_down(int code) __attribute__((noreturn));
111 
usage(const char * name)112 __attribute__((noreturn)) static int usage(const char *name)
113 {
114     fprintf(stderr,
115             "usage: %s [mode] [options] [source]\n"
116             "\n"
117             "Mode flags: \n"
118             "  none        index [source] (default)\n"
119             "  -a          index [source] using /squat annotations\n"
120             "  -r          index [source] recursively\n"
121             "  -f file     index from synclog file\n"
122             "  -R          start rolling indexer\n"
123             "  -z tier     compact to tier\n"
124             "  -l          list paths\n"
125             "\n"
126             "Index mode options:\n"
127             "  -i          index incrementally\n"
128             "  -N name     index mailbox names starting with name\n"
129             "  -S seconds  sleep seconds between indexing mailboxes\n"
130             "  -Z          Xapian: use internal index rather than cyrus.indexed.db\n"
131             "\n"
132             "Index sources:\n"
133             "  none        all mailboxes (default)\n"
134             "  mailbox...  index mailboxes\n"
135             "  -u user...  index mailboxes of users\n"
136             "\n"
137             "Rolling indexer options:\n"
138             "  -n channel  listen to channel\n"
139             "  -d          don't background process\n"
140             "\n"
141             "Compact mode options:\n"
142             "  -t tier...  compact from tiers\n"
143             "  -F          filter during compaction\n"
144             "  -T dir      use temporary directory dir during compaction\n"
145             "  -X          reindex during compaction\n"
146             "  -o          copy db rather compacting\n"
147             "  -U          only compact if re-indexing\n"
148             "\n"
149             "General options:\n"
150             "  -v          be verbose\n"
151             "  -h          show usage\n",
152         name);
153 
154     exit(EX_USAGE);
155 }
156 
157 /* ====================================================================== */
158 
become_daemon(void)159 static void become_daemon(void)
160 {
161     pid_t pid;
162     int nfds = getdtablesize();
163     int nullfd;
164     int fd;
165 
166     nullfd = open("/dev/null", O_RDWR, 0);
167     if (nullfd < 0) {
168         perror("/dev/null");
169         exit(1);
170     }
171     dup2(nullfd, STDIN_FILENO);
172     dup2(nullfd, STDOUT_FILENO);
173     dup2(nullfd, STDERR_FILENO);
174     for (fd = 3 ; fd < nfds ; fd++)
175         close(fd);          /* this will close nullfd too */
176 
177     pid = fork();
178     if (pid == -1) {
179         perror("fork");
180         exit(1);
181     }
182 
183     if (pid)
184         exit(0); /* parent */
185 }
186 
should_index(const char * name)187 static int should_index(const char *name)
188 {
189     mbentry_t *mbentry = NULL;
190     /* Skip remote mailboxes */
191     int r = mboxlist_lookup(name, &mbentry, NULL);
192     if (r) {
193         /* Convert internal name to external */
194         char *extname = mboxname_to_external(name, &squat_namespace, NULL);
195         if (verbose) {
196             printf("error looking up %s: %s\n",
197                    extname, error_message(r));
198         }
199         syslog(LOG_INFO, "error looking up %s: %s\n",
200                extname, error_message(r));
201 
202         free(extname);
203         return 0;
204     }
205 
206     // skip remote or not-real mailboxes
207     if (mbentry->mbtype & (MBTYPE_REMOTE|MBTYPE_DELETED|MBTYPE_INTERMEDIATE)) {
208         mboxlist_entry_free(&mbentry);
209         return 0;
210     }
211 
212     // skip email submissions
213     if (mboxname_issubmissionmailbox(mbentry->name, mbentry->mbtype)) {
214         mboxlist_entry_free(&mbentry);
215         return 0;
216     }
217 
218     // skip COLLECTION mailboxes (just files)
219     if (mbentry->mbtype & MBTYPE_COLLECTION) {
220         mboxlist_entry_free(&mbentry);
221         return 0;
222     }
223 
224     mboxlist_entry_free(&mbentry);
225     return 1;
226 }
227 
228 /* ====================================================================== */
229 
230 /* This is called once for each mailbox we're told to index. */
index_one(const char * name,int blocking)231 static int index_one(const char *name, int blocking)
232 {
233     struct mailbox *mailbox = NULL;
234     int r;
235     int flags = 0;
236 
237     if (incremental_mode)
238         flags |= SEARCH_UPDATE_INCREMENTAL;
239     if (batch_mode)
240         flags |= SEARCH_UPDATE_BATCH;
241     if (xapindexed_mode)
242         flags |= SEARCH_UPDATE_XAPINDEXED;
243 
244     /* Convert internal name to external */
245     char *extname = mboxname_to_external(name, &squat_namespace, NULL);
246 
247     /* make sure the mailbox (or an ancestor) has
248        /vendor/cmu/cyrus-imapd/squat set to "true" */
249     if (annotation_flag) {
250         char buf[MAX_MAILBOX_BUFFER] = "", *p;
251         struct buf attrib = BUF_INITIALIZER;
252         int domainlen = 0;
253 
254         if (config_virtdomains && (p = strchr(name, '!')))
255             domainlen = p - name + 1;
256 
257         strlcpy(buf, name, sizeof(buf));
258 
259         /* since mailboxes inherit /vendor/cmu/cyrus-imapd/squat,
260            we need to iterate all the way up to "" (server entry) */
261         while (1) {
262             r = annotatemore_lookup(buf, IMAP_ANNOT_NS "squat", "",
263                                     &attrib);
264 
265             if (r ||                            /* error */
266                 attrib.s ||                     /* found an entry */
267                 !buf[0]) {                      /* done recursing */
268                 break;
269             }
270 
271             p = strrchr(buf, '.');              /* find parent mailbox */
272 
273             if (p && (p - buf > domainlen))     /* don't split subdomain */
274                 *p = '\0';
275             else if (!buf[domainlen])           /* server entry */
276                 buf[0] = '\0';
277             else                                /* domain entry */
278                 buf[domainlen] = '\0';
279         }
280 
281         if (r || !attrib.s || strcasecmp(attrib.s, "true")) {
282             buf_free(&attrib);
283             free(extname);
284             return 0;
285         }
286         buf_free(&attrib);
287     }
288 
289 again:
290     if (blocking)
291         r = mailbox_open_irl(name, &mailbox);
292     else
293         r = mailbox_open_irlnb(name, &mailbox);
294 
295     if (r == IMAP_MAILBOX_LOCKED) {
296         if (verbose) syslog(LOG_INFO, "mailbox %s locked, retrying", extname);
297         free(extname);
298         return r;
299     }
300     if (r) {
301         if (verbose) {
302             printf("error opening %s: %s\n", extname, error_message(r));
303         }
304         syslog(LOG_INFO, "error opening %s: %s\n", extname, error_message(r));
305         free(extname);
306 
307         return r;
308     }
309 
310     syslog(LOG_INFO, "indexing mailbox %s... ", extname);
311     if (verbose > 0) {
312         printf("Indexing mailbox %s... ", extname);
313     }
314 
315     r = search_update_mailbox(rx, mailbox, flags);
316 
317     mailbox_close(&mailbox);
318 
319     /* in non-blocking (rolling) mode, only do one batch per mailbox at
320      * a time for fairness [IRIS-2471].  The squatter will re-insert the
321      * mailbox in the queue */
322     if (blocking && r == IMAP_AGAIN) goto again;
323     free(extname);
324 
325     return r;
326 }
327 
addmbox(const mbentry_t * mbentry,void * rock)328 static int addmbox(const mbentry_t *mbentry, void *rock)
329 {
330     strarray_t *sa = (strarray_t *) rock;
331 
332     if (strcmpsafe(mbentry->name, name_starts_from) < 0)
333         return 0;
334     if (mboxname_isdeletedmailbox(mbentry->name, NULL))
335         return 0;
336 
337     strarray_append(sa, mbentry->name);
338     return 0;
339 }
340 
expand_mboxnames(strarray_t * sa,int nmboxnames,const char ** mboxnames,int user_mode)341 static void expand_mboxnames(strarray_t *sa, int nmboxnames,
342                              const char **mboxnames, int user_mode)
343 {
344     int i;
345 
346     if (!nmboxnames) {
347         assert(!recursive_flag);
348         mboxlist_allmbox(NULL, addmbox, sa, 0);
349     }
350 
351     for (i = 0; i < nmboxnames; i++) {
352         if (user_mode) {
353             mboxlist_usermboxtree(mboxnames[i], NULL, addmbox, sa, 0);
354         }
355         else {
356             /* Translate any separators in mailboxname */
357             char *intname = mboxname_from_external(mboxnames[i], &squat_namespace, NULL);
358             int flags = recursive_flag ? 0 : MBOXTREE_SKIP_CHILDREN;
359             mboxlist_mboxtree(intname, addmbox, sa, flags);
360             free(intname);
361         }
362     }
363 }
364 
do_indexer(const strarray_t * mboxnames)365 static int do_indexer(const strarray_t *mboxnames)
366 {
367     int r = 0;
368     int i;
369 
370     rx = search_begin_update(verbose);
371     if (rx == NULL)
372         return 0;       /* no indexer defined */
373 
374     for (i = 0 ; i < strarray_size(mboxnames) ; i++) {
375         const char *mboxname = strarray_nth(mboxnames, i);
376         if (!should_index(mboxname)) continue;
377         r = index_one(mboxname, /*blocking*/1);
378         if (r == IMAP_MAILBOX_NONEXISTENT)
379             r = 0;
380         if (r == IMAP_MAILBOX_LOCKED)
381             r = 0; /* XXX - try again? */
382         if (r) break;
383         if (sleepmicroseconds)
384             usleep(sleepmicroseconds);
385     }
386 
387     search_end_update(rx);
388 
389     return r;
390 }
391 
squatter_build_query(search_builder_t * bx,const char * query)392 static int squatter_build_query(search_builder_t *bx, const char *query)
393 {
394     tok_t tok = TOK_INITIALIZER(query, NULL, 0);
395     char *p;
396     char *q;
397     int r = 0;
398     int part;
399     charset_t utf8 = charset_lookupname("utf-8");
400 
401     while ((p = tok_next(&tok))) {
402         if (!strncasecmp(p, "__begin:", 8)) {
403             q = p + 8;
404             if (!strcasecmp(q, "and"))
405                 bx->begin_boolean(bx, SEARCH_OP_AND);
406             else if (!strcasecmp(q, "or"))
407                 bx->begin_boolean(bx, SEARCH_OP_OR);
408             else if (!strcasecmp(q, "not"))
409                 bx->begin_boolean(bx, SEARCH_OP_NOT);
410             else
411                 goto error;
412             continue;
413         }
414         if (!strncasecmp(p, "__end:", 6)) {
415             q = p + 6;
416             if (!strcasecmp(q, "and"))
417                 bx->end_boolean(bx, SEARCH_OP_AND);
418             else if (!strcasecmp(q, "or"))
419                 bx->end_boolean(bx, SEARCH_OP_OR);
420             else if (!strcasecmp(q, "not"))
421                 bx->end_boolean(bx, SEARCH_OP_NOT);
422             else
423                 goto error;
424             continue;
425         }
426 
427         /* everything else is a ->match() of some kind */
428         q = strchr(p, ':');
429         if (q) q++;
430         if (!q) {
431             part = SEARCH_PART_ANY;
432             q = p;
433         }
434         else if (!strncasecmp(p, "to:", 3))
435             part = SEARCH_PART_TO;
436         else if (!strncasecmp(p, "from:", 5))
437             part = SEARCH_PART_FROM;
438         else if (!strncasecmp(p, "cc:", 3))
439             part = SEARCH_PART_CC;
440         else if (!strncasecmp(p, "bcc:", 4))
441             part = SEARCH_PART_BCC;
442         else if (!strncasecmp(p, "subject:", 8))
443             part = SEARCH_PART_SUBJECT;
444         else if (!strncasecmp(p, "listid:", 7))
445             part = SEARCH_PART_LISTID;
446         else if (!strncasecmp(p, "contenttype:", 12))
447             part = SEARCH_PART_TYPE;
448         else if (!strncasecmp(p, "header:", 7))
449             part = SEARCH_PART_HEADERS;
450         else if (!strncasecmp(p, "body:", 5))
451             part = SEARCH_PART_BODY;
452         else
453             goto error;
454 
455         q = charset_convert(q, utf8, charset_flags);
456         bx->match(bx, part, q);
457         free(q);
458     }
459     r = 0;
460 
461 out:
462     charset_free(&utf8);
463     tok_fini(&tok);
464     return r;
465 
466 error:
467     syslog(LOG_ERR, "bad query expression at \"%s\"", p);
468     r = IMAP_PROTOCOL_ERROR;
469     goto out;
470 }
471 
print_search_hit(const char * mboxname,uint32_t uidvalidity,uint32_t uid,const strarray_t * partids,void * rock)472 static int print_search_hit(const char *mboxname, uint32_t uidvalidity,
473                             uint32_t uid,
474                             const strarray_t *partids __attribute__((unused)),
475                             void *rock)
476 {
477     int single = *(int *)rock;
478 
479     if (single)
480         printf("uid %u\n", uid);
481     else
482         printf("mailbox %s\nuidvalidity %u\nuid %u\n", mboxname, uidvalidity, uid);
483     return 0;
484 }
485 
do_list(const strarray_t * mboxnames)486 static int do_list(const strarray_t *mboxnames)
487 {
488     char *prev_userid = NULL;
489     strarray_t files = STRARRAY_INITIALIZER;
490     int i;
491     int r = 0;
492 
493     for (i = 0; i < strarray_size(mboxnames); i++) {
494         const char *mboxname = strarray_nth(mboxnames, i);
495         char *userid = mboxname_to_userid(mboxname);
496         if (!userid) continue;
497 
498         if (!strcmpsafe(prev_userid, userid)) {
499             free(userid);
500             continue;
501         }
502 
503         r = search_list_files(userid, &files);
504         if (r) break;
505 
506         int j;
507         for (j = 0; j < strarray_size(&files); j++) {
508             printf("%s\n", strarray_nth(&files, j));
509         }
510 
511         strarray_truncate(&files, 0);
512 
513         free(prev_userid);
514         prev_userid = userid;
515 
516         if (sleepmicroseconds)
517             usleep(sleepmicroseconds);
518     }
519 
520     strarray_fini(&files);
521     free(prev_userid);
522     return r;
523 }
524 
compact_mbox(const char * userid,const strarray_t * srctiers,const char * desttier,int flags)525 static int compact_mbox(const char *userid, const strarray_t *srctiers,
526                         const char *desttier, int flags)
527 {
528     return search_compact(userid, temp_root_dir, srctiers, desttier, flags);
529 }
530 
do_compact(const strarray_t * mboxnames,const strarray_t * srctiers,const char * desttier,int flags)531 static int do_compact(const strarray_t *mboxnames, const strarray_t *srctiers,
532                       const char *desttier, int flags)
533 {
534     char *prev_userid = NULL;
535     int i;
536 
537     for (i = 0; i < strarray_size(mboxnames); i++) {
538         const char *mboxname = strarray_nth(mboxnames, i);
539         char *userid = mboxname_to_userid(mboxname);
540         if (!userid) continue;
541 
542         if (!strcmpsafe(prev_userid, userid)) {
543             free(userid);
544             continue;
545         }
546 
547         int retry;
548         for (retry = 1; retry <= 3; retry++) {
549             int r = compact_mbox(userid, srctiers, desttier, flags);
550             if (!r) break;
551             syslog(LOG_ERR, "IOERROR: failed to compact %s (%d): %s",
552                    userid, retry, error_message(r));
553         }
554 
555         free(prev_userid);
556         prev_userid = userid;
557 
558         if (sleepmicroseconds)
559             usleep(sleepmicroseconds);
560     }
561 
562     free(prev_userid);
563     return 0;
564 }
565 
do_search(const char * query,int single,const strarray_t * mboxnames)566 static int do_search(const char *query, int single, const strarray_t *mboxnames)
567 {
568     struct mailbox *mailbox = NULL;
569     int i;
570     int r;
571     search_builder_t *bx;
572     int opts = SEARCH_VERBOSE(verbose);
573 
574     if (!single)
575         opts |= SEARCH_MULTIPLE;
576 
577     for (i = 0 ; i < mboxnames->count ; i++) {
578         const char *mboxname = mboxnames->data[i];
579         if (!should_index(mboxname)) continue;
580 
581         r = mailbox_open_irl(mboxname, &mailbox);
582         if (r) {
583             fprintf(stderr, "Cannot open mailbox %s: %s\n",
584                     mboxname, error_message(r));
585             continue;
586         }
587         if (single)
588             printf("mailbox %s\n", mboxname);
589 
590         bx = search_begin_search(mailbox, opts);
591         if (bx) {
592             r = squatter_build_query(bx, query);
593             if (!r)
594                 bx->run(bx, print_search_hit, &single);
595             search_end_search(bx);
596         }
597 
598         mailbox_close(&mailbox);
599     }
600 
601     return 0;
602 }
603 
read_sync_log_items(sync_log_reader_t * slr)604 static strarray_t *read_sync_log_items(sync_log_reader_t *slr)
605 {
606     const char *args[3];
607     strarray_t *mboxnames = strarray_new();
608 
609     while (sync_log_reader_getitem(slr, args) == 0) {
610         if (!strcmp(args[0], "APPEND")) {
611             if (!mboxname_isdeletedmailbox(args[1], NULL))
612                 strarray_add(mboxnames, args[1]);
613         }
614         else if (!strcmp(args[0], "USER"))
615             mboxlist_usermboxtree(args[1], NULL, addmbox, mboxnames, /*flags*/0);
616     }
617 
618     return mboxnames;
619 }
620 
do_synclogfile(const char * synclogfile)621 static int do_synclogfile(const char *synclogfile)
622 {
623     strarray_t *mboxnames = NULL;
624     sync_log_reader_t *slr;
625     int nskipped = 0;
626     int i;
627     int r;
628 
629     slr = sync_log_reader_create_with_filename(synclogfile);
630     r = sync_log_reader_begin(slr);
631     if (r) goto out;
632     mboxnames = read_sync_log_items(slr);
633     sync_log_reader_end(slr);
634 
635     /* sort mboxnames for locality of reference in file processing mode */
636     strarray_sort(mboxnames, cmpstringp_raw);
637 
638     signals_poll();
639 
640     /* have some due items in the queue, try to index them */
641     rx = search_begin_update(verbose);
642     if (NULL == rx) {
643         r = 1;
644         goto out;
645     }
646     for (i = 0; i < strarray_size(mboxnames); i++) {
647         const char *mboxname = strarray_nth(mboxnames, i);
648         if (!should_index(mboxname)) continue;
649         if (verbose > 1)
650             syslog(LOG_INFO, "do_synclogfile: indexing %s", mboxname);
651         r = index_one(mboxname, /*blocking*/1);
652         if (r == IMAP_MAILBOX_NONEXISTENT)
653             r = 0;
654         if (r == IMAP_MAILBOX_LOCKED || r == IMAP_AGAIN) {
655             nskipped++;
656             if (nskipped > 10000) {
657                 syslog(LOG_ERR, "IOERROR: skipped too many times at %s", mboxname);
658                 break;
659             }
660             r = 0;
661             /* try again at the end */
662             strarray_append(mboxnames, mboxname);
663         }
664         if (r) {
665             syslog(LOG_ERR, "IOERROR: failed to index %s: %s",
666                    mboxname, error_message(r));
667             break;
668         }
669         if (sleepmicroseconds)
670             usleep(sleepmicroseconds);
671     }
672     search_end_update(rx);
673     rx = NULL;
674 
675 out:
676     strarray_free(mboxnames);
677     sync_log_reader_free(slr);
678     return r;
679 }
680 
do_rolling(const char * channel)681 static void do_rolling(const char *channel)
682 {
683     strarray_t *mboxnames = NULL;
684     sync_log_reader_t *slr;
685     int i;
686     int r;
687 
688     slr = sync_log_reader_create_with_channel(channel);
689 
690     for (;;) {
691         int sig = signals_poll();
692 
693         if (sig == SIGHUP && getenv("CYRUS_ISDAEMON")) {
694             syslog(LOG_DEBUG, "received SIGHUP, shutting down gracefully\n");
695             sync_log_reader_end(slr);
696             shut_down(0);
697         }
698 
699         if (shutdown_file(NULL, 0))
700             shut_down(EX_TEMPFAIL);
701 
702         r = sync_log_reader_begin(slr);
703         if (r) { /* including IMAP_AGAIN */
704             usleep(100000);    /* 1/10th second */
705             continue;
706         }
707 
708         mboxnames = read_sync_log_items(slr);
709 
710         if (mboxnames->count) {
711             /* have some due items in the queue, try to index them */
712             rx = search_begin_update(verbose);
713             if (NULL == rx) {
714                 /* XXX if xapian, probably don't have conversations enabled? */
715                 fatal("could not construct search text receiver", EX_CONFIG);
716             }
717             for (i = 0; i < strarray_size(mboxnames); i++) {
718                 const char *mboxname = strarray_nth(mboxnames, i);
719                 if (!should_index(mboxname)) continue;
720                 if (verbose > 1)
721                     syslog(LOG_INFO, "do_rolling: indexing %s", mboxname);
722                 r = index_one(mboxname, /*blocking*/0);
723                 if (r == IMAP_AGAIN || r == IMAP_MAILBOX_LOCKED) {
724                     /* XXX: alternative, just append to strarray_t *mboxnames ... */
725                     sync_log_channel_append(channel, mboxname);
726                 }
727                 if (sleepmicroseconds)
728                     usleep(sleepmicroseconds);
729             }
730             search_end_update(rx);
731             rx = NULL;
732         }
733 
734         strarray_free(mboxnames);
735         mboxnames = NULL;
736     }
737 
738     /* XXX - we don't really get here... */
739     strarray_free(mboxnames);
740     sync_log_reader_free(slr);
741 }
742 
audit_one(const char * mboxname,bitvector_t * unindexed)743 static int audit_one(const char *mboxname, bitvector_t *unindexed)
744 {
745     int r2, r = 0;
746     struct mailbox *mailbox = NULL;
747 
748     r = mailbox_open_irl(mboxname, &mailbox);
749     if (r) goto done;
750 
751     r = rx->begin_mailbox(rx, mailbox, SEARCH_UPDATE_AUDIT);
752     if (r) goto done;
753 
754     r = rx->audit_mailbox(rx, unindexed);
755     if (r) goto done;
756 
757 done:
758     r2 = rx->end_mailbox(rx, mailbox);
759     mailbox_close(&mailbox);
760     if (!r) r = r2;
761     return r;
762 }
763 
764 
do_audit(const strarray_t * mboxnames)765 static int do_audit(const strarray_t *mboxnames)
766 {
767     rx = search_begin_update(verbose);
768     if (rx == NULL)
769         return 0;       /* no indexer defined */
770 
771     int r = 0;
772     if (!rx->audit_mailbox) {
773         syslog(LOG_ERR, "squatter: indexer does not support audits");
774         r = IMAP_INTERNAL;
775         goto done;
776     }
777 
778     bitvector_t unindexed = BV_INITIALIZER;
779     int i;
780     for (i = 0 ; i < mboxnames->count ; i++) {
781         const char *mboxname = strarray_nth(mboxnames, i);
782         if (!should_index(mboxname)) continue;
783         r = audit_one(mboxname, &unindexed);
784         if (r == IMAP_MAILBOX_NONEXISTENT)
785             r = 0;
786         if (r == IMAP_MAILBOX_LOCKED)
787             r = 0; /* XXX - try again? */
788         if (r) break;
789         if (sleepmicroseconds)
790             usleep(sleepmicroseconds);
791 
792         if (bv_count(&unindexed)) {
793             printf("Unindexed message(s) in %s: ", mboxname);
794             int uid;
795             for (uid = bv_next_set(&unindexed, 0);
796                  uid != -1;
797                  uid = bv_next_set(&unindexed, uid+1)) {
798                 printf("%d ", uid);
799             }
800             printf("\n");
801         }
802         bv_clearall(&unindexed);
803     }
804     bv_fini(&unindexed);
805 
806 done:
807     search_end_update(rx);
808     return r;
809 }
810 
shut_down(int code)811 static void shut_down(int code)
812 {
813     seen_done();
814 
815     cyrus_done();
816 
817     index_text_extractor_destroy();
818 
819     exit(code);
820 }
821 
main(int argc,char ** argv)822 int main(int argc, char **argv)
823 {
824     int opt;
825     char *alt_config = NULL;
826     int r = IMAP_NOTFOUND;
827     strarray_t mboxnames = STRARRAY_INITIALIZER;
828     const char *query = NULL;
829     int background = 1;
830     const char *channel = "squatter";
831     const char *synclogfile = NULL;
832     int init_flags = CYRUSINIT_PERROR;
833     int multi_folder = 0;
834     int user_mode = 0;
835     int compact_flags = 0;
836     strarray_t *srctiers = NULL;
837     const char *desttier = NULL;
838     char *errstr = NULL;
839     enum { UNKNOWN, INDEXER, SEARCH, ROLLING, SYNCLOG,
840            COMPACT, AUDIT, LIST } mode = UNKNOWN;
841 
842     setbuf(stdout, NULL);
843 
844     while ((opt = getopt(argc, argv, "C:N:RUXZT:S:Fde:f:mn:riavAz:t:ouhl")) != EOF) {
845         switch (opt) {
846         case 'A':
847             if (mode != UNKNOWN) usage(argv[0]);
848             mode = AUDIT;
849             break;
850 
851         case 'C':               /* alt config file */
852             alt_config = optarg;
853             break;
854 
855         case 'F':
856             compact_flags |= SEARCH_COMPACT_FILTER;
857             break;
858 
859         case 'X':
860             compact_flags |= SEARCH_COMPACT_REINDEX;
861             break;
862 
863         case 'Z':
864             /* we have two different flag types for the two different modes,
865              * set both of them even though only one will be used */
866             xapindexed_mode = 1;
867             compact_flags |= SEARCH_COMPACT_XAPINDEXED;
868             break;
869 
870         case 'N':
871             name_starts_from = optarg;
872             break;
873 
874         case 'R':               /* rolling indexer */
875             if (mode != UNKNOWN) usage(argv[0]);
876             mode = ROLLING;
877             incremental_mode = 1; /* always incremental if rolling */
878             batch_mode = 1;
879             break;
880 
881         case 'l':               /* list paths */
882             if (mode != UNKNOWN) usage(argv[0]);
883             mode = LIST;
884             break;
885 
886         case 'S':               /* sleep time in seconds */
887             sleepmicroseconds = (atof(optarg) * 1000000);
888             break;
889 
890         case 'T':               /* temporary root directory for search */
891             temp_root_dir = optarg;
892             break;
893 
894         case 'd':               /* foreground (with -R) */
895             background = 0;
896             break;
897 
898         /* This option is deliberately undocumented, for testing only */
899         case 'e':               /* add a search term */
900             if (mode != UNKNOWN && mode != SEARCH) usage(argv[0]);
901             query = optarg;
902             mode = SEARCH;
903             break;
904 
905         case 'f': /* alternate synclogfile used in SYNCLOG mode */
906             synclogfile = optarg;
907             mode = SYNCLOG;
908             break;
909 
910         /* This option is deliberately undocumented, for testing only */
911         case 'm':               /* multi-folder in SEARCH mode */
912             if (mode != UNKNOWN && mode != SEARCH) usage(argv[0]);
913             multi_folder = 1;
914             mode = SEARCH;
915             break;
916 
917         case 'n':               /* sync channel name (with -R) */
918             channel = optarg;
919             break;
920 
921         case 'o':               /* copy one DB rather than compressing */
922             compact_flags |= SEARCH_COMPACT_COPYONE;
923             break;
924 
925         case 'U':
926             compact_flags |= SEARCH_COMPACT_ONLYUPGRADE;
927             break;
928 
929         case 'v':               /* verbose */
930             verbose++;
931             break;
932 
933         case 'r':               /* recurse */
934             if (mode != UNKNOWN && mode != INDEXER && mode != AUDIT) usage(argv[0]);
935             recursive_flag = 1;
936             if (mode == UNKNOWN) mode = INDEXER;
937             break;
938 
939         case 'i':               /* incremental mode */
940             incremental_mode = 1;
941             break;
942 
943         case 'a':               /* use /squat annotation */
944             if (mode != UNKNOWN && mode != INDEXER) usage(argv[0]);
945             annotation_flag = 1;
946             mode = INDEXER;
947             break;
948 
949         case 'z':
950             if (mode != UNKNOWN && mode != COMPACT) usage(argv[0]);
951             desttier = optarg;
952             mode = COMPACT;
953             break;
954 
955         case 't':
956             if (mode != UNKNOWN && mode != COMPACT) usage(argv[0]);
957             srctiers = strarray_split(optarg, ",", 0);
958             mode = COMPACT;
959             break;
960 
961         case 'u':
962             user_mode = 1;
963             break;
964 
965         case 'h':
966         default:
967             usage("squatter");
968         }
969     }
970 
971     compact_flags |= SEARCH_VERBOSE(verbose);
972 
973     if (mode == UNKNOWN)
974         mode = INDEXER;
975 
976     if (mode == COMPACT && (!desttier || !srctiers)) {
977         /* need both src and dest for compact */
978         usage("squatter");
979     }
980 
981     cyrus_init(alt_config, "squatter", init_flags, CONFIG_NEED_PARTITION_DATA);
982 
983     /* Set namespace -- force standard (internal) */
984     if ((r = mboxname_init_namespace(&squat_namespace, 1)) != 0) {
985         fatal(error_message(r), EX_CONFIG);
986     }
987 
988     /* make sure we're correctly configured */
989     if ((r = search_check_config(&errstr))) {
990         if (errstr)
991             fatal(errstr, EX_CONFIG);
992         else
993             fatal(error_message(r), EX_CONFIG);
994     }
995 
996     if (mode == ROLLING || mode == SYNCLOG) {
997         signals_set_shutdown(&shut_down);
998         signals_add_handlers(0);
999     }
1000 
1001     index_text_extractor_init(NULL);
1002 
1003     switch (mode) {
1004     case UNKNOWN:
1005         break;
1006     case INDEXER:
1007         /* -r requires at least one mailbox */
1008         if (recursive_flag && optind == argc) usage(argv[0]);
1009         expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1010         syslog(LOG_NOTICE, "indexing mailboxes");
1011         r = do_indexer(&mboxnames);
1012         syslog(LOG_NOTICE, "done indexing mailboxes");
1013         break;
1014     case SEARCH:
1015         if (recursive_flag && optind == argc) usage(argv[0]);
1016         expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1017         r = do_search(query, !multi_folder, &mboxnames);
1018         break;
1019     case ROLLING:
1020         if (background && !getenv("CYRUS_ISDAEMON"))
1021             become_daemon();
1022         do_rolling(channel);
1023         /* never returns */
1024         break;
1025     case SYNCLOG:
1026         r = do_synclogfile(synclogfile);
1027         break;
1028     case COMPACT:
1029         if (recursive_flag && optind == argc) usage(argv[0]);
1030         expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1031         r = do_compact(&mboxnames, srctiers, desttier, compact_flags);
1032         break;
1033     case AUDIT:
1034         if (recursive_flag && optind == argc) usage(argv[0]);
1035         expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1036         r = do_audit(&mboxnames);
1037         break;
1038     case LIST:
1039         if (recursive_flag && optind == argc) usage(argv[0]);
1040         expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1041         r = do_list(&mboxnames);
1042         break;
1043     }
1044 
1045     strarray_fini(&mboxnames);
1046     shut_down(r ? EX_TEMPFAIL : 0);
1047 }
1048