1 /* squatter.c -- SQUAT-based message indexing tool
2 *
3 * Copyright (c) 1994-2012 Carnegie Mellon University. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. The name "Carnegie Mellon University" must not be used to
18 * endorse or promote products derived from this software without
19 * prior written permission. For permission or any legal
20 * details, please contact
21 * Carnegie Mellon University
22 * Center for Technology Transfer and Enterprise Creation
23 * 4615 Forbes Avenue
24 * Suite 302
25 * Pittsburgh, PA 15213
26 * (412) 268-7393, fax: (412) 268-7395
27 * innovation@andrew.cmu.edu
28 *
29 * 4. Redistributions of any form whatsoever must retain the following
30 * acknowledgment:
31 * "This product includes software developed by Computing Services
32 * at Carnegie Mellon University (http://www.cmu.edu/computing/)."
33 *
34 * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
35 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
36 * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
37 * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
38 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
39 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
40 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
41 */
42
43 /*
44 This is the tool that creates/updates search indexes for Cyrus mailboxes.
45
46 Despite the name, it handles whichever search engine in configured
47 by the 'search_engine' option in imapd.conf.
48 */
49
50 #include <config.h>
51
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <stdlib.h>
56 #include <stdio.h>
57 #include <sys/stat.h>
58 #include <sys/types.h>
59 #include <sys/poll.h>
60 #include <errno.h>
61 #include <fcntl.h>
62 #include <sysexits.h>
63 #include <syslog.h>
64 #include <string.h>
65 #include <getopt.h>
66
67 #include "annotate.h"
68 #include "assert.h"
69 #include "bitvector.h"
70 #include "bsearch.h"
71 #include "mboxlist.h"
72 #include "global.h"
73 #include "search_engines.h"
74 #include "sync_log.h"
75 #include "mailbox.h"
76 #include "xmalloc.h"
77 #include "xstrlcpy.h"
78 #include "xstrlcat.h"
79 #include "ptrarray.h"
80 #include "tok.h"
81 #include "acl.h"
82 #include "seen.h"
83 #include "mboxname.h"
84 #include "index.h"
85 #include "message.h"
86 #include "util.h"
87
88 /* generated headers are not necessarily in current directory */
89 #include "imap/imap_err.h"
90
91 extern char *optarg;
92 extern int optind;
93
94 /* current namespace */
95 static struct namespace squat_namespace;
96
97 static int verbose = 0;
98 static int skip_unmodified = -1;
99 static int incremental_mode = 0;
100 static int xapindexed_mode = 0;
101 static int recursive_flag = 0;
102 static int annotation_flag = 0;
103 static int sleepmicroseconds = 0;
104 static int allow_partials = 0;
105 static int allow_duplicateparts = 0;
106 static int reindex_partials = 0;
107 static int reindex_minlevel = 0;
108 static search_text_receiver_t *rx = NULL;
109
110 static strarray_t *skip_domains = NULL;
111 static strarray_t *skip_users = NULL;
112
113 static const char *name_starts_from = NULL;
114
115 static void shut_down(int code) __attribute__((noreturn));
116
usage(const char * name)117 __attribute__((noreturn)) static int usage(const char *name)
118 {
119 fprintf(stderr,
120 "usage: %s [mode] [options] [source]\n"
121 "\n"
122 "Mode flags: \n"
123 " none index [source] (default)\n"
124 " -a, --squat-annot index [source] using /squat annotations\n"
125 " -r, --recursive index [source] recursively\n"
126 " -f, --synclog=FILE index from synclog file\n"
127 " -R, --rolling start rolling indexer\n"
128 " -z, --compact=TIER compact to TIER\n"
129 " -l, --list list paths\n"
130 " -A, --audit report unindexed messages\n"
131 "\n"
132 "Index mode options:\n"
133 " -i, --incremental index incrementally\n"
134 " -p, --allow-partials allow partially indexed messages\n"
135 " -P, --reindex-partials reindex partially indexed messages (implies -Z)\n"
136 " -L, --reindex-minlevel=LEVEL reindex messages where indexlevel < LEVEL (implies -Z)\n"
137 " -N, --name=NAME index mailbox names starting with NAME\n"
138 " -S, --sleep=SECONDS sleep SECONDS between indexing mailboxes\n"
139 " -Z, --internalindex Xapian: use internal index rather than cyrus.indexed.db\n"
140 " -s, --squat-skip[=DELTA] skip unmodified mailboxes (requires squat backend)\n"
141 "\n"
142 "Index sources:\n"
143 " none all mailboxes (default)\n"
144 " mailbox... index mailboxes\n"
145 " -u, --user=USER... index mailboxes of USER\n"
146 "\n"
147 "Rolling indexer options:\n"
148 " -n, --channel=CHANNEL listen to CHANNEL\n"
149 " -d, --nodaemon don't background process\n"
150 "\n"
151 "Compact mode options:\n"
152 " -t, --srctier=TIER,... compact from TIER\n"
153 " -F, --filter filter during compaction\n"
154 " -T, --reindex-tier=TIER,... reindex TIER\n"
155 " -X, --reindex reindex during compaction\n"
156 " -o, --copydb copy db rather compacting\n"
157 " -U, --only-upgrade only compact if re-indexing\n"
158 " --B, --skip-locked skip users that are locked by another process\n"
159 "\n"
160 "General options:\n"
161 " -v, --verbose be verbose\n"
162 " -h, --help show usage\n",
163 name);
164
165 exit(EX_USAGE);
166 }
167
168 /* ====================================================================== */
169
become_daemon(void)170 static void become_daemon(void)
171 {
172 pid_t pid;
173 int nfds = getdtablesize();
174 int nullfd;
175 int fd;
176
177 nullfd = open("/dev/null", O_RDWR, 0);
178 if (nullfd < 0) {
179 perror("/dev/null");
180 exit(1);
181 }
182 dup2(nullfd, STDIN_FILENO);
183 dup2(nullfd, STDOUT_FILENO);
184 dup2(nullfd, STDERR_FILENO);
185 for (fd = 3 ; fd < nfds ; fd++)
186 close(fd); /* this will close nullfd too */
187
188 pid = fork();
189 if (pid == -1) {
190 perror("fork");
191 exit(1);
192 }
193
194 if (pid)
195 exit(0); /* parent */
196 }
197
should_index(const char * name)198 static int should_index(const char *name)
199 {
200 // skip early users
201 if (strcmpsafe(name, name_starts_from) < 0)
202 return 0;
203
204 int ret = 1;
205 mbentry_t *mbentry = NULL;
206 mbname_t *mbname = mbname_from_intname(name);
207 /* Skip remote mailboxes */
208 int r = mboxlist_lookup(name, &mbentry, NULL);
209 if (r) {
210 /* Convert internal name to external */
211 char *extname = mboxname_to_external(name, &squat_namespace, NULL);
212 if (verbose) {
213 printf("error looking up %s: %s\n",
214 extname, error_message(r));
215 }
216 syslog(LOG_INFO, "error looking up %s: %s\n",
217 extname, error_message(r));
218
219 free(extname);
220 ret = 0;
221 goto done;
222 }
223
224 // skip remote or not-real mailboxes
225 if (mbentry->mbtype & (MBTYPE_REMOTE|MBTYPE_DELETED|MBTYPE_INTERMEDIATE)) {
226 ret = 0;
227 goto done;
228 }
229
230 // skip email submissions
231 if (mboxname_issubmissionmailbox(mbentry->name, mbentry->mbtype)) {
232 ret = 0;
233 goto done;
234 }
235
236 // skip COLLECTION mailboxes (just files)
237 if (mbentry->mbtype & MBTYPE_COLLECTION) {
238 ret = 0;
239 goto done;
240 }
241
242 // skip deleted mailboxes
243 if (mbname_isdeleted(mbname)) {
244 ret = 0;
245 goto done;
246 }
247
248 // skip listed domains
249 if (mbname_domain(mbname) && skip_domains &&
250 strarray_find(skip_domains, mbname_domain(mbname), 0) >= 0) {
251 ret = 0;
252 goto done;
253 }
254
255 // skip listed users
256 if (mbname_userid(mbname) && skip_users &&
257 strarray_find(skip_users, mbname_userid(mbname), 0) >= 0) {
258 ret = 0;
259 goto done;
260 }
261
262 done:
263 mbname_free(&mbname);
264 mboxlist_entry_free(&mbentry);
265 return ret;
266 }
267
268 /* ====================================================================== */
269
270 /* This is called once for each mailbox we're told to index. */
index_one(const char * name,int blocking)271 static int index_one(const char *name, int blocking)
272 {
273 struct mailbox *mailbox = NULL;
274 int r;
275 int flags = SEARCH_UPDATE_BATCH;
276
277 if (incremental_mode)
278 flags |= SEARCH_UPDATE_INCREMENTAL;
279 if (xapindexed_mode)
280 flags |= SEARCH_UPDATE_XAPINDEXED;
281 if (allow_partials)
282 flags |= SEARCH_UPDATE_ALLOW_PARTIALS;
283 if (reindex_partials)
284 flags |= SEARCH_UPDATE_REINDEX_PARTIALS;
285 if (allow_duplicateparts)
286 flags |= SEARCH_UPDATE_ALLOW_DUPPARTS;
287
288 /* Convert internal name to external */
289 char *extname = mboxname_to_external(name, &squat_namespace, NULL);
290
291 /* make sure the mailbox (or an ancestor) has
292 /vendor/cmu/cyrus-imapd/squat set to "true" */
293 if (annotation_flag) {
294 char buf[MAX_MAILBOX_BUFFER] = "", *p;
295 struct buf attrib = BUF_INITIALIZER;
296 int domainlen = 0;
297
298 if (config_virtdomains && (p = strchr(name, '!')))
299 domainlen = p - name + 1;
300
301 strlcpy(buf, name, sizeof(buf));
302
303 /* since mailboxes inherit /vendor/cmu/cyrus-imapd/squat,
304 we need to iterate all the way up to "" (server entry) */
305 while (1) {
306 r = annotatemore_lookup(buf, IMAP_ANNOT_NS "squat", "",
307 &attrib);
308
309 if (r || /* error */
310 attrib.s || /* found an entry */
311 !buf[0]) { /* done recursing */
312 break;
313 }
314
315 p = strrchr(buf, '.'); /* find parent mailbox */
316
317 if (p && (p - buf > domainlen)) /* don't split subdomain */
318 *p = '\0';
319 else if (!buf[domainlen]) /* server entry */
320 buf[0] = '\0';
321 else /* domain entry */
322 buf[domainlen] = '\0';
323 }
324
325 if (r || !attrib.s || strcasecmp(attrib.s, "true")) {
326 buf_free(&attrib);
327 free(extname);
328 return 0;
329 }
330 buf_free(&attrib);
331 }
332
333 again:
334 if (blocking)
335 r = mailbox_open_irl(name, &mailbox);
336 else
337 r = mailbox_open_irlnb(name, &mailbox);
338
339 if (r == IMAP_MAILBOX_LOCKED) {
340 if (verbose) syslog(LOG_INFO, "mailbox %s locked, retrying", extname);
341 free(extname);
342 return r;
343 }
344 if (r) {
345 if (verbose) {
346 printf("error opening %s: %s\n", extname, error_message(r));
347 }
348 syslog(LOG_INFO, "error opening %s: %s\n", extname, error_message(r));
349 free(extname);
350
351 return r;
352 }
353
354 syslog(LOG_INFO, "indexing mailbox %s... ", extname);
355 if (verbose > 0) {
356 printf("Indexing mailbox %s... ", extname);
357 }
358
359 if (skip_unmodified >= 0) {
360 const char *fname = mailbox_meta_fname(mailbox, META_SQUAT);
361 struct stat sbuf;
362 if (!stat(fname, &sbuf) &&
363 skip_unmodified + mailbox->index_mtime < sbuf.st_mtime) {
364 syslog(LOG_DEBUG, "Squat skipping mailbox %s", extname);
365 if (verbose > 0) {
366 printf("Skipping mailbox %s\n", extname);
367 }
368 mailbox_close(&mailbox);
369 free(extname);
370 return 0;
371 }
372 }
373
374 r = search_update_mailbox(rx, mailbox, reindex_minlevel, flags);
375
376 mailbox_close(&mailbox);
377
378 /* in non-blocking (rolling) mode, only do one batch per mailbox at
379 * a time for fairness [IRIS-2471]. The squatter will re-insert the
380 * mailbox in the queue */
381 if (blocking && r == IMAP_AGAIN) goto again;
382 free(extname);
383
384 return r;
385 }
386
addmbox(const mbentry_t * mbentry,void * rock)387 static int addmbox(const mbentry_t *mbentry, void *rock)
388 {
389 strarray_t *sa = (strarray_t *) rock;
390 strarray_append(sa, mbentry->name);
391 return 0;
392 }
393
expand_mboxnames(strarray_t * sa,int nmboxnames,const char ** mboxnames,int user_mode)394 static void expand_mboxnames(strarray_t *sa, int nmboxnames,
395 const char **mboxnames, int user_mode)
396 {
397 int i;
398
399 if (!nmboxnames) {
400 assert(!recursive_flag);
401 mboxlist_allmbox(NULL, addmbox, sa, 0);
402 }
403
404 for (i = 0; i < nmboxnames; i++) {
405 if (user_mode) {
406 mboxlist_usermboxtree(mboxnames[i], NULL, addmbox, sa, 0);
407 }
408 else {
409 /* Translate any separators in mailboxname */
410 char *intname = mboxname_from_external(mboxnames[i], &squat_namespace, NULL);
411 int flags = recursive_flag ? 0 : MBOXTREE_SKIP_CHILDREN;
412 mboxlist_mboxtree(intname, addmbox, sa, flags);
413 free(intname);
414 }
415
416 /* sort mboxnames */
417 strarray_sort(sa, cmpstringp_raw);
418 /* and deduplicate */
419 strarray_uniq(sa);
420 }
421 }
422
do_indexer(const strarray_t * mboxnames)423 static int do_indexer(const strarray_t *mboxnames)
424 {
425 int r = 0;
426 int i;
427
428 rx = search_begin_update(verbose);
429 if (rx == NULL)
430 return 0; /* no indexer defined */
431
432 for (i = 0 ; i < strarray_size(mboxnames) ; i++) {
433 const char *mboxname = strarray_nth(mboxnames, i);
434 if (!should_index(mboxname)) continue;
435 r = index_one(mboxname, /*blocking*/1);
436 if (r == IMAP_MAILBOX_NONEXISTENT)
437 r = 0;
438 if (r == IMAP_MAILBOX_LOCKED)
439 r = 0; /* XXX - try again? */
440 if (r) break;
441 if (sleepmicroseconds)
442 usleep(sleepmicroseconds);
443 }
444
445 search_end_update(rx);
446
447 return r;
448 }
449
squatter_build_query(search_builder_t * bx,const char * query)450 static int squatter_build_query(search_builder_t *bx, const char *query)
451 {
452 tok_t tok = TOK_INITIALIZER(query, NULL, 0);
453 char *p;
454 char *q;
455 int r = 0;
456 int part;
457 charset_t utf8 = charset_lookupname("utf-8");
458
459 while ((p = tok_next(&tok))) {
460 if (!strncasecmp(p, "__begin:", 8)) {
461 q = p + 8;
462 if (!strcasecmp(q, "and"))
463 bx->begin_boolean(bx, SEARCH_OP_AND);
464 else if (!strcasecmp(q, "or"))
465 bx->begin_boolean(bx, SEARCH_OP_OR);
466 else if (!strcasecmp(q, "not"))
467 bx->begin_boolean(bx, SEARCH_OP_NOT);
468 else
469 goto error;
470 continue;
471 }
472 if (!strncasecmp(p, "__end:", 6)) {
473 q = p + 6;
474 if (!strcasecmp(q, "and"))
475 bx->end_boolean(bx, SEARCH_OP_AND);
476 else if (!strcasecmp(q, "or"))
477 bx->end_boolean(bx, SEARCH_OP_OR);
478 else if (!strcasecmp(q, "not"))
479 bx->end_boolean(bx, SEARCH_OP_NOT);
480 else
481 goto error;
482 continue;
483 }
484
485 /* everything else is a ->match() of some kind */
486 q = strchr(p, ':');
487 if (q) q++;
488 if (!q) {
489 part = SEARCH_PART_ANY;
490 q = p;
491 }
492 else if (!strncasecmp(p, "to:", 3))
493 part = SEARCH_PART_TO;
494 else if (!strncasecmp(p, "from:", 5))
495 part = SEARCH_PART_FROM;
496 else if (!strncasecmp(p, "cc:", 3))
497 part = SEARCH_PART_CC;
498 else if (!strncasecmp(p, "bcc:", 4))
499 part = SEARCH_PART_BCC;
500 else if (!strncasecmp(p, "subject:", 8))
501 part = SEARCH_PART_SUBJECT;
502 else if (!strncasecmp(p, "listid:", 7))
503 part = SEARCH_PART_LISTID;
504 else if (!strncasecmp(p, "contenttype:", 12))
505 part = SEARCH_PART_TYPE;
506 else if (!strncasecmp(p, "header:", 7))
507 part = SEARCH_PART_HEADERS;
508 else if (!strncasecmp(p, "body:", 5))
509 part = SEARCH_PART_BODY;
510 else
511 goto error;
512
513 q = charset_convert(q, utf8, charset_flags);
514 bx->match(bx, part, q);
515 free(q);
516 }
517 r = 0;
518
519 out:
520 charset_free(&utf8);
521 tok_fini(&tok);
522 return r;
523
524 error:
525 syslog(LOG_ERR, "bad query expression at \"%s\"", p);
526 r = IMAP_PROTOCOL_ERROR;
527 goto out;
528 }
529
print_search_hit(const char * mboxname,uint32_t uidvalidity,uint32_t uid,const strarray_t * partids,void * rock)530 static int print_search_hit(const char *mboxname, uint32_t uidvalidity,
531 uint32_t uid,
532 const strarray_t *partids __attribute__((unused)),
533 void *rock)
534 {
535 int single = *(int *)rock;
536
537 if (single)
538 printf("uid %u\n", uid);
539 else
540 printf("mailbox %s\nuidvalidity %u\nuid %u\n", mboxname, uidvalidity, uid);
541 return 0;
542 }
543
do_list(const strarray_t * mboxnames)544 static int do_list(const strarray_t *mboxnames)
545 {
546 char *prev_userid = NULL;
547 strarray_t files = STRARRAY_INITIALIZER;
548 int i;
549 int r = 0;
550
551 for (i = 0; i < strarray_size(mboxnames); i++) {
552 const char *mboxname = strarray_nth(mboxnames, i);
553 char *userid = mboxname_to_userid(mboxname);
554 if (!userid) continue;
555
556 if (!strcmpsafe(prev_userid, userid)) {
557 free(userid);
558 continue;
559 }
560
561 r = search_list_files(userid, &files);
562 if (r) break;
563
564 int j;
565 for (j = 0; j < strarray_size(&files); j++) {
566 printf("%s\n", strarray_nth(&files, j));
567 }
568
569 strarray_truncate(&files, 0);
570
571 free(prev_userid);
572 prev_userid = userid;
573
574 if (sleepmicroseconds)
575 usleep(sleepmicroseconds);
576 }
577
578 strarray_fini(&files);
579 free(prev_userid);
580 return r;
581 }
582
compact_mbox(const char * userid,const strarray_t * reindextiers,const strarray_t * srctiers,const char * desttier,int flags)583 static int compact_mbox(const char *userid, const strarray_t *reindextiers,
584 const strarray_t *srctiers,
585 const char *desttier, int flags)
586 {
587 return search_compact(userid, reindextiers, srctiers, desttier, flags);
588 }
589
do_compact(const strarray_t * mboxnames,const strarray_t * reindextiers,const strarray_t * srctiers,const char * desttier,int flags)590 static int do_compact(const strarray_t *mboxnames, const strarray_t *reindextiers,
591 const strarray_t *srctiers,
592 const char *desttier, int flags)
593 {
594 char *prev_userid = NULL;
595 int i;
596
597 for (i = 0; i < strarray_size(mboxnames); i++) {
598 const char *mboxname = strarray_nth(mboxnames, i);
599 if (!should_index(mboxname)) continue;
600 char *userid = mboxname_to_userid(mboxname);
601 if (!userid) continue;
602
603 if (!strcmpsafe(prev_userid, userid)) {
604 free(userid);
605 continue;
606 }
607
608 int retry;
609 for (retry = 1; retry <= 3; retry++) {
610 int r = compact_mbox(userid, reindextiers, srctiers, desttier, flags);
611 if (!r) break;
612 xsyslog(LOG_ERR, "IOERROR: failed to compact",
613 "userid=<%s> retry=<%d> error=<%s>",
614 userid, retry, error_message(r));
615 }
616
617 free(prev_userid);
618 prev_userid = userid;
619
620 if (sleepmicroseconds)
621 usleep(sleepmicroseconds);
622 }
623
624 free(prev_userid);
625 return 0;
626 }
627
do_search(const char * query,int single,const strarray_t * mboxnames)628 static int do_search(const char *query, int single, const strarray_t *mboxnames)
629 {
630 struct mailbox *mailbox = NULL;
631 int i;
632 int r;
633 search_builder_t *bx;
634 int opts = SEARCH_VERBOSE(verbose);
635
636 if (!single)
637 opts |= SEARCH_MULTIPLE;
638
639 for (i = 0 ; i < mboxnames->count ; i++) {
640 const char *mboxname = mboxnames->data[i];
641 if (!should_index(mboxname)) continue;
642
643 r = mailbox_open_irl(mboxname, &mailbox);
644 if (r) {
645 fprintf(stderr, "Cannot open mailbox %s: %s\n",
646 mboxname, error_message(r));
647 continue;
648 }
649 if (single)
650 printf("mailbox %s\n", mboxname);
651
652 bx = search_begin_search(mailbox, opts);
653 if (bx) {
654 r = squatter_build_query(bx, query);
655 if (!r)
656 bx->run(bx, print_search_hit, &single);
657 search_end_search(bx);
658 }
659
660 mailbox_close(&mailbox);
661 }
662
663 return 0;
664 }
665
read_sync_log_items(sync_log_reader_t * slr)666 static strarray_t *read_sync_log_items(sync_log_reader_t *slr)
667 {
668 const char *args[3];
669 strarray_t *mboxnames = strarray_new();
670
671 while (sync_log_reader_getitem(slr, args) == 0) {
672 if (!strcmp(args[0], "APPEND")) {
673 strarray_append(mboxnames, args[1]);
674 }
675 else if (!strcmp(args[0], "USER"))
676 mboxlist_usermboxtree(args[1], NULL, addmbox, mboxnames, /*flags*/0);
677 }
678
679 return mboxnames;
680 }
681
do_synclogfile(const char * synclogfile)682 static int do_synclogfile(const char *synclogfile)
683 {
684 strarray_t *mboxnames = NULL;
685 sync_log_reader_t *slr;
686 int nskipped = 0;
687 int i;
688 int r;
689
690 slr = sync_log_reader_create_with_filename(synclogfile);
691 r = sync_log_reader_begin(slr);
692 if (r) goto out;
693 mboxnames = read_sync_log_items(slr);
694 sync_log_reader_end(slr);
695
696 /* sort mboxnames for locality of reference in file processing mode */
697 strarray_sort(mboxnames, cmpstringp_raw);
698 /* and deduplicate */
699 strarray_uniq(mboxnames);
700
701 signals_poll();
702
703 /* have some due items in the queue, try to index them */
704 rx = search_begin_update(verbose);
705 if (NULL == rx) {
706 r = 1;
707 goto out;
708 }
709 for (i = 0; i < strarray_size(mboxnames); i++) {
710 const char *mboxname = strarray_nth(mboxnames, i);
711 if (!should_index(mboxname)) continue;
712 if (verbose > 1)
713 syslog(LOG_INFO, "do_synclogfile: indexing %s", mboxname);
714 r = index_one(mboxname, /*blocking*/1);
715 if (r == IMAP_MAILBOX_NONEXISTENT)
716 r = 0;
717 if (r == IMAP_MAILBOX_LOCKED || r == IMAP_AGAIN) {
718 nskipped++;
719 if (nskipped > 10000) {
720 xsyslog(LOG_ERR, "IOERROR: skipped too many times",
721 "mailbox=<%s>", mboxname);
722 break;
723 }
724 r = 0;
725 /* try again at the end */
726 strarray_append(mboxnames, mboxname);
727 }
728 if (r) {
729 xsyslog(LOG_ERR, "IOERROR: failed to index",
730 "mailbox=<%s> error=<%s>",
731 mboxname, error_message(r));
732 break;
733 }
734 if (sleepmicroseconds)
735 usleep(sleepmicroseconds);
736 }
737 search_end_update(rx);
738 rx = NULL;
739
740 out:
741 strarray_free(mboxnames);
742 sync_log_reader_free(slr);
743 return r;
744 }
745
do_rolling(const char * channel)746 static void do_rolling(const char *channel)
747 {
748 strarray_t *mboxnames = NULL;
749 sync_log_reader_t *slr;
750 int i;
751 int r;
752
753 slr = sync_log_reader_create_with_channel(channel);
754
755 for (;;) {
756 int sig = signals_poll();
757
758 if (sig == SIGHUP && getenv("CYRUS_ISDAEMON")) {
759 syslog(LOG_DEBUG, "received SIGHUP, shutting down gracefully\n");
760 sync_log_reader_end(slr);
761 shut_down(0);
762 }
763
764 if (shutdown_file(NULL, 0))
765 shut_down(EX_TEMPFAIL);
766
767 r = sync_log_reader_begin(slr);
768 if (r) { /* including IMAP_AGAIN */
769 usleep(100000); /* 1/10th second */
770 continue;
771 }
772
773 mboxnames = read_sync_log_items(slr);
774
775 if (mboxnames->count) {
776 /* sort mboxnames for locality of reference in file processing mode */
777 strarray_sort(mboxnames, cmpstringp_raw);
778 /* and deduplicate */
779 strarray_uniq(mboxnames);
780
781 /* have some due items in the queue, try to index them */
782 rx = search_begin_update(verbose);
783 if (NULL == rx) {
784 /* XXX if xapian, probably don't have conversations enabled? */
785 fatal("could not construct search text receiver", EX_CONFIG);
786 }
787 for (i = 0; i < strarray_size(mboxnames); i++) {
788 const char *mboxname = strarray_nth(mboxnames, i);
789 if (!should_index(mboxname)) continue;
790 if (verbose > 1)
791 syslog(LOG_INFO, "do_rolling: indexing %s", mboxname);
792 r = index_one(mboxname, /*blocking*/0);
793 if (r == IMAP_AGAIN || r == IMAP_MAILBOX_LOCKED) {
794 /* XXX: alternative, just append to strarray_t *mboxnames ... */
795 sync_log_channel_append(channel, mboxname);
796 }
797 else if (r == IMAP_MAILBOX_NONEXISTENT) {
798 /* should_index() checked for this, but we lost a race.
799 * not an IOERROR, just annoying!
800 */
801 syslog(LOG_DEBUG, "skipping nonexistent mailbox: %s", mboxname);
802 }
803 else if (r) {
804 xsyslog(LOG_ERR, "IOERROR: failed to index and forgetting",
805 "mailbox=<%s> error=<%s>",
806 mboxname, error_message(r));
807 }
808 if (sleepmicroseconds)
809 usleep(sleepmicroseconds);
810 }
811 search_end_update(rx);
812 rx = NULL;
813 }
814
815 strarray_free(mboxnames);
816 mboxnames = NULL;
817 }
818
819 /* XXX - we don't really get here... */
820 strarray_free(mboxnames);
821 sync_log_reader_free(slr);
822 }
823
audit_one(const char * mboxname,bitvector_t * unindexed)824 static int audit_one(const char *mboxname, bitvector_t *unindexed)
825 {
826 int r2, r = 0;
827 struct mailbox *mailbox = NULL;
828
829 r = mailbox_open_irl(mboxname, &mailbox);
830 if (r) goto done;
831
832 r = rx->begin_mailbox(rx, mailbox, SEARCH_UPDATE_AUDIT);
833 if (r) goto done;
834
835 r = rx->audit_mailbox(rx, unindexed);
836 if (r) goto done;
837
838 done:
839 r2 = rx->end_mailbox(rx, mailbox);
840 mailbox_close(&mailbox);
841 if (!r) r = r2;
842 return r;
843 }
844
845
do_audit(const strarray_t * mboxnames)846 static int do_audit(const strarray_t *mboxnames)
847 {
848 rx = search_begin_update(verbose);
849 if (rx == NULL)
850 return 0; /* no indexer defined */
851
852 int r = 0;
853 if (!rx->audit_mailbox) {
854 syslog(LOG_ERR, "squatter: indexer does not support audits");
855 r = IMAP_INTERNAL;
856 goto done;
857 }
858
859 bitvector_t unindexed = BV_INITIALIZER;
860 int i;
861 for (i = 0 ; i < mboxnames->count ; i++) {
862 const char *mboxname = strarray_nth(mboxnames, i);
863 if (!should_index(mboxname)) continue;
864 r = audit_one(mboxname, &unindexed);
865 if (r == IMAP_MAILBOX_NONEXISTENT)
866 r = 0;
867 if (r == IMAP_MAILBOX_LOCKED)
868 r = 0; /* XXX - try again? */
869 if (r) break;
870 if (sleepmicroseconds)
871 usleep(sleepmicroseconds);
872
873 if (bv_count(&unindexed)) {
874 printf("Unindexed message(s) in %s: ", mboxname);
875 int uid;
876 for (uid = bv_next_set(&unindexed, 0);
877 uid != -1;
878 uid = bv_next_set(&unindexed, uid+1)) {
879 printf("%d ", uid);
880 }
881 printf("\n");
882 }
883 bv_clearall(&unindexed);
884 }
885 bv_fini(&unindexed);
886
887 done:
888 search_end_update(rx);
889 return r;
890 }
891
shut_down(int code)892 static void shut_down(int code)
893 {
894 seen_done();
895
896 cyrus_done();
897
898 index_text_extractor_destroy();
899
900 exit(code);
901 }
902
main(int argc,char ** argv)903 int main(int argc, char **argv)
904 {
905 int opt;
906 char *alt_config = NULL;
907 int r = IMAP_NOTFOUND;
908 strarray_t mboxnames = STRARRAY_INITIALIZER;
909 const char *query = NULL;
910 int background = 1;
911 const char *channel = "squatter";
912 const char *synclogfile = NULL;
913 int init_flags = CYRUSINIT_PERROR;
914 int multi_folder = 0;
915 int user_mode = 0;
916 int compact_flags = 0;
917 strarray_t *srctiers = NULL;
918 strarray_t *reindextiers = NULL;
919 const char *desttier = NULL;
920 char *errstr = NULL;
921 enum { UNKNOWN, INDEXER, SEARCH, ROLLING, SYNCLOG,
922 COMPACT, AUDIT, LIST } mode = UNKNOWN;
923
924 setbuf(stdout, NULL);
925
926 /* Keep these in alphabetic order */
927 static const char *short_options = "ABC:DFL:N:PRS:T:UXZade:f:hilmn:oprs:t:uvz:";
928
929 /* Keep these ordered by mode */
930 static struct option long_options[] = {
931 /* audit-mode flags */
932 {"audit", no_argument, 0, 'A' },
933
934 /* compact-mode flags */
935 {"copydb", no_argument, 0, 'o' },
936 {"filter", no_argument, 0, 'F' },
937 {"skip-locked", no_argument, 0, 'B' },
938 {"only-upgrade", no_argument, 0, 'U' },
939 {"reindex-tier", required_argument, 0, 'T' },
940 {"srctier", required_argument, 0, 't' },
941 {"compact", required_argument, 0, 'z' },
942
943 /* index-mode flags */
944 {"index-duplicates", no_argument, 0, 'D' },
945 {"incremental", no_argument, 0, 'i' },
946 {"allow-partials", no_argument, 0, 'p' },
947 {"name", required_argument, 0, 'N' },
948 {"internalindex", no_argument, 0, 'Z' },
949 {"user", no_argument, 0, 'u' },
950 {"reindex", no_argument, 0, 'X' },
951 {"reindex-minlevel", required_argument, 0, 'L' },
952 {"reindex-partials", no_argument, 0, 'P' },
953
954 /* list-mode flags */
955 {"list", no_argument, 0, 'l' },
956
957 /* rolling mode */
958 {"rolling", no_argument, 0, 'R' },
959 {"channel", required_argument, 0, 'n' },
960 {"nodaemon", no_argument, 0, 'd' },
961
962 /* search-mode flags */
963 {"search-multifolder", no_argument, 0, 'm' },
964 {"search-term", required_argument, 0, 'e' },
965
966 /* squat flags */
967 {"squat-annot", no_argument, 0, 'a' },
968 {"squat-skip", optional_argument, 0, 's' },
969
970 /* synclog-mode flags */
971 {"synclog", required_argument, 0, 'f' },
972
973 {"recursive", no_argument, 0, 'r' },
974 {"sleep", required_argument, 0, 'S' },
975
976 /* misc */
977 {"help", no_argument, 0, 'h' },
978 {"verbose", no_argument, 0, 'v' },
979 // no long form for 'C' option
980
981 {0, 0, 0, 0 }
982 };
983
984 while ((opt = getopt_long(argc, argv, short_options, long_options, NULL)) != EOF) {
985 switch (opt) {
986 case 'A':
987 if (mode != UNKNOWN) usage(argv[0]);
988 mode = AUDIT;
989 break;
990
991 case 'B':
992 compact_flags |= SEARCH_COMPACT_NONBLOCKING;
993 break;
994
995 case 'C': /* alt config file */
996 alt_config = optarg;
997 break;
998
999 case 'F':
1000 compact_flags |= SEARCH_COMPACT_FILTER;
1001 break;
1002
1003 case 'X':
1004 compact_flags |= SEARCH_COMPACT_REINDEX;
1005 break;
1006
1007 case 'L':
1008 reindex_minlevel = atoi(optarg);
1009 if (reindex_minlevel < 1 || reindex_minlevel > SEARCH_INDEXLEVEL_MAX) {
1010 fprintf(stderr, "%s: %s: invalid level argument\n", argv[0], optarg);
1011 exit(EX_USAGE);
1012 }
1013 xapindexed_mode = 1;
1014 break;
1015
1016 case 'P':
1017 reindex_partials = 1;
1018 xapindexed_mode = 1;
1019 break;
1020
1021 case 'Z':
1022 xapindexed_mode = 1;
1023 break;
1024
1025 case 'p':
1026 allow_partials = 1;
1027 break;
1028
1029 case 'D':
1030 allow_duplicateparts = 1;
1031 break;
1032
1033 case 'N':
1034 name_starts_from = optarg;
1035 break;
1036
1037 case 'R': /* rolling indexer */
1038 if (mode != UNKNOWN) usage(argv[0]);
1039 mode = ROLLING;
1040 incremental_mode = 1; /* always incremental if rolling */
1041 break;
1042
1043 case 'l': /* list paths */
1044 if (mode != UNKNOWN) usage(argv[0]);
1045 mode = LIST;
1046 break;
1047
1048 case 'S': /* sleep time in seconds */
1049 sleepmicroseconds = (atof(optarg) * 1000000);
1050 break;
1051
1052 case 'd': /* foreground (with -R) */
1053 background = 0;
1054 break;
1055
1056 /* This option is deliberately undocumented, for testing only */
1057 case 'e': /* add a search term */
1058 if (mode != UNKNOWN && mode != SEARCH) usage(argv[0]);
1059 query = optarg;
1060 mode = SEARCH;
1061 break;
1062
1063 case 'f': /* alternate synclogfile used in SYNCLOG mode */
1064 synclogfile = optarg;
1065 mode = SYNCLOG;
1066 break;
1067
1068 /* This option is deliberately undocumented, for testing only */
1069 case 'm': /* multi-folder in SEARCH mode */
1070 if (mode != UNKNOWN && mode != SEARCH) usage(argv[0]);
1071 multi_folder = 1;
1072 mode = SEARCH;
1073 break;
1074
1075 case 'n': /* sync channel name (with -R) */
1076 channel = optarg;
1077 break;
1078
1079 case 'o': /* copy one DB rather than compressing */
1080 compact_flags |= SEARCH_COMPACT_COPYONE;
1081 break;
1082
1083 case 'U':
1084 compact_flags |= SEARCH_COMPACT_ONLYUPGRADE;
1085 break;
1086
1087 case 'v': /* verbose */
1088 verbose++;
1089 break;
1090
1091 case 'r': /* recurse */
1092 if (mode != UNKNOWN && mode != INDEXER && mode != AUDIT) usage(argv[0]);
1093 recursive_flag = 1;
1094 if (mode == UNKNOWN) mode = INDEXER;
1095 break;
1096
1097 case 'i': /* incremental mode */
1098 incremental_mode = 1;
1099 break;
1100
1101 case 'a': /* use /squat annotation */
1102 if (mode != UNKNOWN && mode != INDEXER) usage(argv[0]);
1103 annotation_flag = 1;
1104 mode = INDEXER;
1105 break;
1106
1107 case 's':
1108 if (mode != UNKNOWN && mode != INDEXER) usage(argv[0]);
1109 if (optarg) {
1110 char *end;
1111 long val = strtol(optarg, &end, 10);
1112 if (val < 0 || val > INT_MAX || *end) {
1113 usage(argv[0]);
1114 }
1115 skip_unmodified = (int) val;
1116 }
1117 else {
1118 skip_unmodified = 60;
1119 }
1120 mode = INDEXER;
1121 break;
1122
1123 case 'z':
1124 if (mode != UNKNOWN && mode != COMPACT) usage(argv[0]);
1125 desttier = optarg;
1126 mode = COMPACT;
1127 break;
1128
1129 case 't':
1130 if (mode != UNKNOWN && mode != COMPACT) usage(argv[0]);
1131 srctiers = strarray_split(optarg, ",", 0);
1132 mode = COMPACT;
1133 break;
1134
1135 case 'T':
1136 if (mode != UNKNOWN && mode != COMPACT) usage(argv[0]);
1137 reindextiers = strarray_split(optarg, ",", 0);
1138 mode = COMPACT;
1139 break;
1140
1141 case 'u':
1142 user_mode = 1;
1143 break;
1144
1145 case 'h':
1146 default:
1147 usage("squatter");
1148 }
1149 }
1150
1151 if (xapindexed_mode) {
1152 /* we have two different flag types for the two different modes,
1153 * set both of them even though only one will be used */
1154 compact_flags |= SEARCH_COMPACT_XAPINDEXED;
1155 }
1156
1157 compact_flags |= SEARCH_VERBOSE(verbose);
1158
1159 if (mode == UNKNOWN)
1160 mode = INDEXER;
1161
1162 if (mode == COMPACT && (!desttier || !srctiers)) {
1163 /* need both src and dest for compact */
1164 usage("squatter");
1165 }
1166
1167 cyrus_init(alt_config, "squatter", init_flags, CONFIG_NEED_PARTITION_DATA);
1168
1169 /* Set namespace -- force standard (internal) */
1170 if ((r = mboxname_init_namespace(&squat_namespace, 1)) != 0) {
1171 fatal(error_message(r), EX_CONFIG);
1172 }
1173
1174 /* make sure we're correctly configured */
1175 if ((r = search_check_config(&errstr))) {
1176 if (errstr)
1177 fatal(errstr, EX_CONFIG);
1178 else
1179 fatal(error_message(r), EX_CONFIG);
1180 }
1181
1182 if (mode == ROLLING || mode == SYNCLOG) {
1183 signals_set_shutdown(&shut_down);
1184 signals_add_handlers(0);
1185 }
1186
1187 index_text_extractor_init(NULL);
1188
1189 const char *conf;
1190 conf = config_getstring(IMAPOPT_SEARCH_INDEX_SKIP_DOMAINS);
1191 if (conf) skip_domains = strarray_split(conf, " ", STRARRAY_TRIM);
1192 conf = config_getstring(IMAPOPT_SEARCH_INDEX_SKIP_USERS);
1193 if (conf) skip_users = strarray_split(conf, " ", STRARRAY_TRIM);
1194
1195 switch (mode) {
1196 case UNKNOWN:
1197 break;
1198 case INDEXER:
1199 /* -r requires at least one mailbox */
1200 if (recursive_flag && optind == argc) usage(argv[0]);
1201 expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1202 syslog(LOG_NOTICE, "indexing mailboxes");
1203 r = do_indexer(&mboxnames);
1204 syslog(LOG_NOTICE, "done indexing mailboxes");
1205 break;
1206 case SEARCH:
1207 if (recursive_flag && optind == argc) usage(argv[0]);
1208 expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1209 r = do_search(query, !multi_folder, &mboxnames);
1210 break;
1211 case ROLLING:
1212 if (background && !getenv("CYRUS_ISDAEMON"))
1213 become_daemon();
1214 do_rolling(channel);
1215 /* never returns */
1216 break;
1217 case SYNCLOG:
1218 r = do_synclogfile(synclogfile);
1219 break;
1220 case COMPACT:
1221 if (recursive_flag && optind == argc) usage(argv[0]);
1222 expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1223 r = do_compact(&mboxnames, reindextiers, srctiers, desttier, compact_flags);
1224 break;
1225 case AUDIT:
1226 if (recursive_flag && optind == argc) usage(argv[0]);
1227 expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1228 r = do_audit(&mboxnames);
1229 break;
1230 case LIST:
1231 if (recursive_flag && optind == argc) usage(argv[0]);
1232 expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1233 r = do_list(&mboxnames);
1234 break;
1235 }
1236
1237 strarray_fini(&mboxnames);
1238 shut_down(r ? EX_TEMPFAIL : 0);
1239 }
1240