1 /* squatter.c -- SQUAT-based message indexing tool
2 *
3 * Copyright (c) 1994-2012 Carnegie Mellon University. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. The name "Carnegie Mellon University" must not be used to
18 * endorse or promote products derived from this software without
19 * prior written permission. For permission or any legal
20 * details, please contact
21 * Carnegie Mellon University
22 * Center for Technology Transfer and Enterprise Creation
23 * 4615 Forbes Avenue
24 * Suite 302
25 * Pittsburgh, PA 15213
26 * (412) 268-7393, fax: (412) 268-7395
27 * innovation@andrew.cmu.edu
28 *
29 * 4. Redistributions of any form whatsoever must retain the following
30 * acknowledgment:
31 * "This product includes software developed by Computing Services
32 * at Carnegie Mellon University (http://www.cmu.edu/computing/)."
33 *
34 * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
35 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
36 * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
37 * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
38 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
39 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
40 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
41 */
42
43 /*
44 This is the tool that creates/updates search indexes for Cyrus mailboxes.
45
46 Despite the name, it handles whichever search engine in configured
47 by the 'search_engine' option in imapd.conf.
48 */
49
50 #include <config.h>
51
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <stdlib.h>
56 #include <stdio.h>
57 #include <sys/stat.h>
58 #include <sys/types.h>
59 #include <sys/poll.h>
60 #include <errno.h>
61 #include <fcntl.h>
62 #include <sysexits.h>
63 #include <syslog.h>
64 #include <string.h>
65
66 #include "annotate.h"
67 #include "assert.h"
68 #include "bitvector.h"
69 #include "bsearch.h"
70 #include "mboxlist.h"
71 #include "global.h"
72 #include "search_engines.h"
73 #include "sync_log.h"
74 #include "mailbox.h"
75 #include "xmalloc.h"
76 #include "xstrlcpy.h"
77 #include "xstrlcat.h"
78 #include "ptrarray.h"
79 #include "tok.h"
80 #include "acl.h"
81 #include "seen.h"
82 #include "mboxname.h"
83 #include "index.h"
84 #include "message.h"
85 #include "util.h"
86
87 /* generated headers are not necessarily in current directory */
88 #include "imap/imap_err.h"
89
90 extern char *optarg;
91 extern int optind;
92
93 /* current namespace */
94 static struct namespace squat_namespace;
95
96 const int SKIP_FUZZ = 60;
97
98 static int verbose = 0;
99 static int incremental_mode = 0;
100 static int batch_mode = 0;
101 static int xapindexed_mode = 0;
102 static int recursive_flag = 0;
103 static int annotation_flag = 0;
104 static int sleepmicroseconds = 0;
105 static const char *temp_root_dir = NULL;
106 static search_text_receiver_t *rx = NULL;
107
108 static const char *name_starts_from = NULL;
109
110 static void shut_down(int code) __attribute__((noreturn));
111
usage(const char * name)112 __attribute__((noreturn)) static int usage(const char *name)
113 {
114 fprintf(stderr,
115 "usage: %s [mode] [options] [source]\n"
116 "\n"
117 "Mode flags: \n"
118 " none index [source] (default)\n"
119 " -a index [source] using /squat annotations\n"
120 " -r index [source] recursively\n"
121 " -f file index from synclog file\n"
122 " -R start rolling indexer\n"
123 " -z tier compact to tier\n"
124 " -l list paths\n"
125 "\n"
126 "Index mode options:\n"
127 " -i index incrementally\n"
128 " -N name index mailbox names starting with name\n"
129 " -S seconds sleep seconds between indexing mailboxes\n"
130 " -Z Xapian: use internal index rather than cyrus.indexed.db\n"
131 "\n"
132 "Index sources:\n"
133 " none all mailboxes (default)\n"
134 " mailbox... index mailboxes\n"
135 " -u user... index mailboxes of users\n"
136 "\n"
137 "Rolling indexer options:\n"
138 " -n channel listen to channel\n"
139 " -d don't background process\n"
140 "\n"
141 "Compact mode options:\n"
142 " -t tier... compact from tiers\n"
143 " -F filter during compaction\n"
144 " -T dir use temporary directory dir during compaction\n"
145 " -X reindex during compaction\n"
146 " -o copy db rather compacting\n"
147 " -U only compact if re-indexing\n"
148 "\n"
149 "General options:\n"
150 " -v be verbose\n"
151 " -h show usage\n",
152 name);
153
154 exit(EX_USAGE);
155 }
156
157 /* ====================================================================== */
158
become_daemon(void)159 static void become_daemon(void)
160 {
161 pid_t pid;
162 int nfds = getdtablesize();
163 int nullfd;
164 int fd;
165
166 nullfd = open("/dev/null", O_RDWR, 0);
167 if (nullfd < 0) {
168 perror("/dev/null");
169 exit(1);
170 }
171 dup2(nullfd, STDIN_FILENO);
172 dup2(nullfd, STDOUT_FILENO);
173 dup2(nullfd, STDERR_FILENO);
174 for (fd = 3 ; fd < nfds ; fd++)
175 close(fd); /* this will close nullfd too */
176
177 pid = fork();
178 if (pid == -1) {
179 perror("fork");
180 exit(1);
181 }
182
183 if (pid)
184 exit(0); /* parent */
185 }
186
should_index(const char * name)187 static int should_index(const char *name)
188 {
189 mbentry_t *mbentry = NULL;
190 /* Skip remote mailboxes */
191 int r = mboxlist_lookup(name, &mbentry, NULL);
192 if (r) {
193 /* Convert internal name to external */
194 char *extname = mboxname_to_external(name, &squat_namespace, NULL);
195 if (verbose) {
196 printf("error looking up %s: %s\n",
197 extname, error_message(r));
198 }
199 syslog(LOG_INFO, "error looking up %s: %s\n",
200 extname, error_message(r));
201
202 free(extname);
203 return 0;
204 }
205
206 // skip remote or not-real mailboxes
207 if (mbentry->mbtype & (MBTYPE_REMOTE|MBTYPE_DELETED|MBTYPE_INTERMEDIATE)) {
208 mboxlist_entry_free(&mbentry);
209 return 0;
210 }
211
212 // skip email submissions
213 if (mboxname_issubmissionmailbox(mbentry->name, mbentry->mbtype)) {
214 mboxlist_entry_free(&mbentry);
215 return 0;
216 }
217
218 // skip COLLECTION mailboxes (just files)
219 if (mbentry->mbtype & MBTYPE_COLLECTION) {
220 mboxlist_entry_free(&mbentry);
221 return 0;
222 }
223
224 mboxlist_entry_free(&mbentry);
225 return 1;
226 }
227
228 /* ====================================================================== */
229
230 /* This is called once for each mailbox we're told to index. */
index_one(const char * name,int blocking)231 static int index_one(const char *name, int blocking)
232 {
233 struct mailbox *mailbox = NULL;
234 int r;
235 int flags = 0;
236
237 if (incremental_mode)
238 flags |= SEARCH_UPDATE_INCREMENTAL;
239 if (batch_mode)
240 flags |= SEARCH_UPDATE_BATCH;
241 if (xapindexed_mode)
242 flags |= SEARCH_UPDATE_XAPINDEXED;
243
244 /* Convert internal name to external */
245 char *extname = mboxname_to_external(name, &squat_namespace, NULL);
246
247 /* make sure the mailbox (or an ancestor) has
248 /vendor/cmu/cyrus-imapd/squat set to "true" */
249 if (annotation_flag) {
250 char buf[MAX_MAILBOX_BUFFER] = "", *p;
251 struct buf attrib = BUF_INITIALIZER;
252 int domainlen = 0;
253
254 if (config_virtdomains && (p = strchr(name, '!')))
255 domainlen = p - name + 1;
256
257 strlcpy(buf, name, sizeof(buf));
258
259 /* since mailboxes inherit /vendor/cmu/cyrus-imapd/squat,
260 we need to iterate all the way up to "" (server entry) */
261 while (1) {
262 r = annotatemore_lookup(buf, IMAP_ANNOT_NS "squat", "",
263 &attrib);
264
265 if (r || /* error */
266 attrib.s || /* found an entry */
267 !buf[0]) { /* done recursing */
268 break;
269 }
270
271 p = strrchr(buf, '.'); /* find parent mailbox */
272
273 if (p && (p - buf > domainlen)) /* don't split subdomain */
274 *p = '\0';
275 else if (!buf[domainlen]) /* server entry */
276 buf[0] = '\0';
277 else /* domain entry */
278 buf[domainlen] = '\0';
279 }
280
281 if (r || !attrib.s || strcasecmp(attrib.s, "true")) {
282 buf_free(&attrib);
283 free(extname);
284 return 0;
285 }
286 buf_free(&attrib);
287 }
288
289 again:
290 if (blocking)
291 r = mailbox_open_irl(name, &mailbox);
292 else
293 r = mailbox_open_irlnb(name, &mailbox);
294
295 if (r == IMAP_MAILBOX_LOCKED) {
296 if (verbose) syslog(LOG_INFO, "mailbox %s locked, retrying", extname);
297 free(extname);
298 return r;
299 }
300 if (r) {
301 if (verbose) {
302 printf("error opening %s: %s\n", extname, error_message(r));
303 }
304 syslog(LOG_INFO, "error opening %s: %s\n", extname, error_message(r));
305 free(extname);
306
307 return r;
308 }
309
310 syslog(LOG_INFO, "indexing mailbox %s... ", extname);
311 if (verbose > 0) {
312 printf("Indexing mailbox %s... ", extname);
313 }
314
315 r = search_update_mailbox(rx, mailbox, flags);
316
317 mailbox_close(&mailbox);
318
319 /* in non-blocking (rolling) mode, only do one batch per mailbox at
320 * a time for fairness [IRIS-2471]. The squatter will re-insert the
321 * mailbox in the queue */
322 if (blocking && r == IMAP_AGAIN) goto again;
323 free(extname);
324
325 return r;
326 }
327
addmbox(const mbentry_t * mbentry,void * rock)328 static int addmbox(const mbentry_t *mbentry, void *rock)
329 {
330 strarray_t *sa = (strarray_t *) rock;
331
332 if (strcmpsafe(mbentry->name, name_starts_from) < 0)
333 return 0;
334 if (mboxname_isdeletedmailbox(mbentry->name, NULL))
335 return 0;
336
337 strarray_append(sa, mbentry->name);
338 return 0;
339 }
340
expand_mboxnames(strarray_t * sa,int nmboxnames,const char ** mboxnames,int user_mode)341 static void expand_mboxnames(strarray_t *sa, int nmboxnames,
342 const char **mboxnames, int user_mode)
343 {
344 int i;
345
346 if (!nmboxnames) {
347 assert(!recursive_flag);
348 mboxlist_allmbox(NULL, addmbox, sa, 0);
349 }
350
351 for (i = 0; i < nmboxnames; i++) {
352 if (user_mode) {
353 mboxlist_usermboxtree(mboxnames[i], NULL, addmbox, sa, 0);
354 }
355 else {
356 /* Translate any separators in mailboxname */
357 char *intname = mboxname_from_external(mboxnames[i], &squat_namespace, NULL);
358 int flags = recursive_flag ? 0 : MBOXTREE_SKIP_CHILDREN;
359 mboxlist_mboxtree(intname, addmbox, sa, flags);
360 free(intname);
361 }
362 }
363 }
364
do_indexer(const strarray_t * mboxnames)365 static int do_indexer(const strarray_t *mboxnames)
366 {
367 int r = 0;
368 int i;
369
370 rx = search_begin_update(verbose);
371 if (rx == NULL)
372 return 0; /* no indexer defined */
373
374 for (i = 0 ; i < strarray_size(mboxnames) ; i++) {
375 const char *mboxname = strarray_nth(mboxnames, i);
376 if (!should_index(mboxname)) continue;
377 r = index_one(mboxname, /*blocking*/1);
378 if (r == IMAP_MAILBOX_NONEXISTENT)
379 r = 0;
380 if (r == IMAP_MAILBOX_LOCKED)
381 r = 0; /* XXX - try again? */
382 if (r) break;
383 if (sleepmicroseconds)
384 usleep(sleepmicroseconds);
385 }
386
387 search_end_update(rx);
388
389 return r;
390 }
391
squatter_build_query(search_builder_t * bx,const char * query)392 static int squatter_build_query(search_builder_t *bx, const char *query)
393 {
394 tok_t tok = TOK_INITIALIZER(query, NULL, 0);
395 char *p;
396 char *q;
397 int r = 0;
398 int part;
399 charset_t utf8 = charset_lookupname("utf-8");
400
401 while ((p = tok_next(&tok))) {
402 if (!strncasecmp(p, "__begin:", 8)) {
403 q = p + 8;
404 if (!strcasecmp(q, "and"))
405 bx->begin_boolean(bx, SEARCH_OP_AND);
406 else if (!strcasecmp(q, "or"))
407 bx->begin_boolean(bx, SEARCH_OP_OR);
408 else if (!strcasecmp(q, "not"))
409 bx->begin_boolean(bx, SEARCH_OP_NOT);
410 else
411 goto error;
412 continue;
413 }
414 if (!strncasecmp(p, "__end:", 6)) {
415 q = p + 6;
416 if (!strcasecmp(q, "and"))
417 bx->end_boolean(bx, SEARCH_OP_AND);
418 else if (!strcasecmp(q, "or"))
419 bx->end_boolean(bx, SEARCH_OP_OR);
420 else if (!strcasecmp(q, "not"))
421 bx->end_boolean(bx, SEARCH_OP_NOT);
422 else
423 goto error;
424 continue;
425 }
426
427 /* everything else is a ->match() of some kind */
428 q = strchr(p, ':');
429 if (q) q++;
430 if (!q) {
431 part = SEARCH_PART_ANY;
432 q = p;
433 }
434 else if (!strncasecmp(p, "to:", 3))
435 part = SEARCH_PART_TO;
436 else if (!strncasecmp(p, "from:", 5))
437 part = SEARCH_PART_FROM;
438 else if (!strncasecmp(p, "cc:", 3))
439 part = SEARCH_PART_CC;
440 else if (!strncasecmp(p, "bcc:", 4))
441 part = SEARCH_PART_BCC;
442 else if (!strncasecmp(p, "subject:", 8))
443 part = SEARCH_PART_SUBJECT;
444 else if (!strncasecmp(p, "listid:", 7))
445 part = SEARCH_PART_LISTID;
446 else if (!strncasecmp(p, "contenttype:", 12))
447 part = SEARCH_PART_TYPE;
448 else if (!strncasecmp(p, "header:", 7))
449 part = SEARCH_PART_HEADERS;
450 else if (!strncasecmp(p, "body:", 5))
451 part = SEARCH_PART_BODY;
452 else
453 goto error;
454
455 q = charset_convert(q, utf8, charset_flags);
456 bx->match(bx, part, q);
457 free(q);
458 }
459 r = 0;
460
461 out:
462 charset_free(&utf8);
463 tok_fini(&tok);
464 return r;
465
466 error:
467 syslog(LOG_ERR, "bad query expression at \"%s\"", p);
468 r = IMAP_PROTOCOL_ERROR;
469 goto out;
470 }
471
print_search_hit(const char * mboxname,uint32_t uidvalidity,uint32_t uid,const strarray_t * partids,void * rock)472 static int print_search_hit(const char *mboxname, uint32_t uidvalidity,
473 uint32_t uid,
474 const strarray_t *partids __attribute__((unused)),
475 void *rock)
476 {
477 int single = *(int *)rock;
478
479 if (single)
480 printf("uid %u\n", uid);
481 else
482 printf("mailbox %s\nuidvalidity %u\nuid %u\n", mboxname, uidvalidity, uid);
483 return 0;
484 }
485
do_list(const strarray_t * mboxnames)486 static int do_list(const strarray_t *mboxnames)
487 {
488 char *prev_userid = NULL;
489 strarray_t files = STRARRAY_INITIALIZER;
490 int i;
491 int r = 0;
492
493 for (i = 0; i < strarray_size(mboxnames); i++) {
494 const char *mboxname = strarray_nth(mboxnames, i);
495 char *userid = mboxname_to_userid(mboxname);
496 if (!userid) continue;
497
498 if (!strcmpsafe(prev_userid, userid)) {
499 free(userid);
500 continue;
501 }
502
503 r = search_list_files(userid, &files);
504 if (r) break;
505
506 int j;
507 for (j = 0; j < strarray_size(&files); j++) {
508 printf("%s\n", strarray_nth(&files, j));
509 }
510
511 strarray_truncate(&files, 0);
512
513 free(prev_userid);
514 prev_userid = userid;
515
516 if (sleepmicroseconds)
517 usleep(sleepmicroseconds);
518 }
519
520 strarray_fini(&files);
521 free(prev_userid);
522 return r;
523 }
524
compact_mbox(const char * userid,const strarray_t * srctiers,const char * desttier,int flags)525 static int compact_mbox(const char *userid, const strarray_t *srctiers,
526 const char *desttier, int flags)
527 {
528 return search_compact(userid, temp_root_dir, srctiers, desttier, flags);
529 }
530
do_compact(const strarray_t * mboxnames,const strarray_t * srctiers,const char * desttier,int flags)531 static int do_compact(const strarray_t *mboxnames, const strarray_t *srctiers,
532 const char *desttier, int flags)
533 {
534 char *prev_userid = NULL;
535 int i;
536
537 for (i = 0; i < strarray_size(mboxnames); i++) {
538 const char *mboxname = strarray_nth(mboxnames, i);
539 char *userid = mboxname_to_userid(mboxname);
540 if (!userid) continue;
541
542 if (!strcmpsafe(prev_userid, userid)) {
543 free(userid);
544 continue;
545 }
546
547 int retry;
548 for (retry = 1; retry <= 3; retry++) {
549 int r = compact_mbox(userid, srctiers, desttier, flags);
550 if (!r) break;
551 syslog(LOG_ERR, "IOERROR: failed to compact %s (%d): %s",
552 userid, retry, error_message(r));
553 }
554
555 free(prev_userid);
556 prev_userid = userid;
557
558 if (sleepmicroseconds)
559 usleep(sleepmicroseconds);
560 }
561
562 free(prev_userid);
563 return 0;
564 }
565
do_search(const char * query,int single,const strarray_t * mboxnames)566 static int do_search(const char *query, int single, const strarray_t *mboxnames)
567 {
568 struct mailbox *mailbox = NULL;
569 int i;
570 int r;
571 search_builder_t *bx;
572 int opts = SEARCH_VERBOSE(verbose);
573
574 if (!single)
575 opts |= SEARCH_MULTIPLE;
576
577 for (i = 0 ; i < mboxnames->count ; i++) {
578 const char *mboxname = mboxnames->data[i];
579 if (!should_index(mboxname)) continue;
580
581 r = mailbox_open_irl(mboxname, &mailbox);
582 if (r) {
583 fprintf(stderr, "Cannot open mailbox %s: %s\n",
584 mboxname, error_message(r));
585 continue;
586 }
587 if (single)
588 printf("mailbox %s\n", mboxname);
589
590 bx = search_begin_search(mailbox, opts);
591 if (bx) {
592 r = squatter_build_query(bx, query);
593 if (!r)
594 bx->run(bx, print_search_hit, &single);
595 search_end_search(bx);
596 }
597
598 mailbox_close(&mailbox);
599 }
600
601 return 0;
602 }
603
read_sync_log_items(sync_log_reader_t * slr)604 static strarray_t *read_sync_log_items(sync_log_reader_t *slr)
605 {
606 const char *args[3];
607 strarray_t *mboxnames = strarray_new();
608
609 while (sync_log_reader_getitem(slr, args) == 0) {
610 if (!strcmp(args[0], "APPEND")) {
611 if (!mboxname_isdeletedmailbox(args[1], NULL))
612 strarray_add(mboxnames, args[1]);
613 }
614 else if (!strcmp(args[0], "USER"))
615 mboxlist_usermboxtree(args[1], NULL, addmbox, mboxnames, /*flags*/0);
616 }
617
618 return mboxnames;
619 }
620
do_synclogfile(const char * synclogfile)621 static int do_synclogfile(const char *synclogfile)
622 {
623 strarray_t *mboxnames = NULL;
624 sync_log_reader_t *slr;
625 int nskipped = 0;
626 int i;
627 int r;
628
629 slr = sync_log_reader_create_with_filename(synclogfile);
630 r = sync_log_reader_begin(slr);
631 if (r) goto out;
632 mboxnames = read_sync_log_items(slr);
633 sync_log_reader_end(slr);
634
635 /* sort mboxnames for locality of reference in file processing mode */
636 strarray_sort(mboxnames, cmpstringp_raw);
637
638 signals_poll();
639
640 /* have some due items in the queue, try to index them */
641 rx = search_begin_update(verbose);
642 if (NULL == rx) {
643 r = 1;
644 goto out;
645 }
646 for (i = 0; i < strarray_size(mboxnames); i++) {
647 const char *mboxname = strarray_nth(mboxnames, i);
648 if (!should_index(mboxname)) continue;
649 if (verbose > 1)
650 syslog(LOG_INFO, "do_synclogfile: indexing %s", mboxname);
651 r = index_one(mboxname, /*blocking*/1);
652 if (r == IMAP_MAILBOX_NONEXISTENT)
653 r = 0;
654 if (r == IMAP_MAILBOX_LOCKED || r == IMAP_AGAIN) {
655 nskipped++;
656 if (nskipped > 10000) {
657 syslog(LOG_ERR, "IOERROR: skipped too many times at %s", mboxname);
658 break;
659 }
660 r = 0;
661 /* try again at the end */
662 strarray_append(mboxnames, mboxname);
663 }
664 if (r) {
665 syslog(LOG_ERR, "IOERROR: failed to index %s: %s",
666 mboxname, error_message(r));
667 break;
668 }
669 if (sleepmicroseconds)
670 usleep(sleepmicroseconds);
671 }
672 search_end_update(rx);
673 rx = NULL;
674
675 out:
676 strarray_free(mboxnames);
677 sync_log_reader_free(slr);
678 return r;
679 }
680
do_rolling(const char * channel)681 static void do_rolling(const char *channel)
682 {
683 strarray_t *mboxnames = NULL;
684 sync_log_reader_t *slr;
685 int i;
686 int r;
687
688 slr = sync_log_reader_create_with_channel(channel);
689
690 for (;;) {
691 int sig = signals_poll();
692
693 if (sig == SIGHUP && getenv("CYRUS_ISDAEMON")) {
694 syslog(LOG_DEBUG, "received SIGHUP, shutting down gracefully\n");
695 sync_log_reader_end(slr);
696 shut_down(0);
697 }
698
699 if (shutdown_file(NULL, 0))
700 shut_down(EX_TEMPFAIL);
701
702 r = sync_log_reader_begin(slr);
703 if (r) { /* including IMAP_AGAIN */
704 usleep(100000); /* 1/10th second */
705 continue;
706 }
707
708 mboxnames = read_sync_log_items(slr);
709
710 if (mboxnames->count) {
711 /* have some due items in the queue, try to index them */
712 rx = search_begin_update(verbose);
713 if (NULL == rx) {
714 /* XXX if xapian, probably don't have conversations enabled? */
715 fatal("could not construct search text receiver", EX_CONFIG);
716 }
717 for (i = 0; i < strarray_size(mboxnames); i++) {
718 const char *mboxname = strarray_nth(mboxnames, i);
719 if (!should_index(mboxname)) continue;
720 if (verbose > 1)
721 syslog(LOG_INFO, "do_rolling: indexing %s", mboxname);
722 r = index_one(mboxname, /*blocking*/0);
723 if (r == IMAP_AGAIN || r == IMAP_MAILBOX_LOCKED) {
724 /* XXX: alternative, just append to strarray_t *mboxnames ... */
725 sync_log_channel_append(channel, mboxname);
726 }
727 if (sleepmicroseconds)
728 usleep(sleepmicroseconds);
729 }
730 search_end_update(rx);
731 rx = NULL;
732 }
733
734 strarray_free(mboxnames);
735 mboxnames = NULL;
736 }
737
738 /* XXX - we don't really get here... */
739 strarray_free(mboxnames);
740 sync_log_reader_free(slr);
741 }
742
audit_one(const char * mboxname,bitvector_t * unindexed)743 static int audit_one(const char *mboxname, bitvector_t *unindexed)
744 {
745 int r2, r = 0;
746 struct mailbox *mailbox = NULL;
747
748 r = mailbox_open_irl(mboxname, &mailbox);
749 if (r) goto done;
750
751 r = rx->begin_mailbox(rx, mailbox, SEARCH_UPDATE_AUDIT);
752 if (r) goto done;
753
754 r = rx->audit_mailbox(rx, unindexed);
755 if (r) goto done;
756
757 done:
758 r2 = rx->end_mailbox(rx, mailbox);
759 mailbox_close(&mailbox);
760 if (!r) r = r2;
761 return r;
762 }
763
764
do_audit(const strarray_t * mboxnames)765 static int do_audit(const strarray_t *mboxnames)
766 {
767 rx = search_begin_update(verbose);
768 if (rx == NULL)
769 return 0; /* no indexer defined */
770
771 int r = 0;
772 if (!rx->audit_mailbox) {
773 syslog(LOG_ERR, "squatter: indexer does not support audits");
774 r = IMAP_INTERNAL;
775 goto done;
776 }
777
778 bitvector_t unindexed = BV_INITIALIZER;
779 int i;
780 for (i = 0 ; i < mboxnames->count ; i++) {
781 const char *mboxname = strarray_nth(mboxnames, i);
782 if (!should_index(mboxname)) continue;
783 r = audit_one(mboxname, &unindexed);
784 if (r == IMAP_MAILBOX_NONEXISTENT)
785 r = 0;
786 if (r == IMAP_MAILBOX_LOCKED)
787 r = 0; /* XXX - try again? */
788 if (r) break;
789 if (sleepmicroseconds)
790 usleep(sleepmicroseconds);
791
792 if (bv_count(&unindexed)) {
793 printf("Unindexed message(s) in %s: ", mboxname);
794 int uid;
795 for (uid = bv_next_set(&unindexed, 0);
796 uid != -1;
797 uid = bv_next_set(&unindexed, uid+1)) {
798 printf("%d ", uid);
799 }
800 printf("\n");
801 }
802 bv_clearall(&unindexed);
803 }
804 bv_fini(&unindexed);
805
806 done:
807 search_end_update(rx);
808 return r;
809 }
810
shut_down(int code)811 static void shut_down(int code)
812 {
813 seen_done();
814
815 cyrus_done();
816
817 index_text_extractor_destroy();
818
819 exit(code);
820 }
821
main(int argc,char ** argv)822 int main(int argc, char **argv)
823 {
824 int opt;
825 char *alt_config = NULL;
826 int r = IMAP_NOTFOUND;
827 strarray_t mboxnames = STRARRAY_INITIALIZER;
828 const char *query = NULL;
829 int background = 1;
830 const char *channel = "squatter";
831 const char *synclogfile = NULL;
832 int init_flags = CYRUSINIT_PERROR;
833 int multi_folder = 0;
834 int user_mode = 0;
835 int compact_flags = 0;
836 strarray_t *srctiers = NULL;
837 const char *desttier = NULL;
838 char *errstr = NULL;
839 enum { UNKNOWN, INDEXER, SEARCH, ROLLING, SYNCLOG,
840 COMPACT, AUDIT, LIST } mode = UNKNOWN;
841
842 setbuf(stdout, NULL);
843
844 while ((opt = getopt(argc, argv, "C:N:RUXZT:S:Fde:f:mn:riavAz:t:ouhl")) != EOF) {
845 switch (opt) {
846 case 'A':
847 if (mode != UNKNOWN) usage(argv[0]);
848 mode = AUDIT;
849 break;
850
851 case 'C': /* alt config file */
852 alt_config = optarg;
853 break;
854
855 case 'F':
856 compact_flags |= SEARCH_COMPACT_FILTER;
857 break;
858
859 case 'X':
860 compact_flags |= SEARCH_COMPACT_REINDEX;
861 break;
862
863 case 'Z':
864 /* we have two different flag types for the two different modes,
865 * set both of them even though only one will be used */
866 xapindexed_mode = 1;
867 compact_flags |= SEARCH_COMPACT_XAPINDEXED;
868 break;
869
870 case 'N':
871 name_starts_from = optarg;
872 break;
873
874 case 'R': /* rolling indexer */
875 if (mode != UNKNOWN) usage(argv[0]);
876 mode = ROLLING;
877 incremental_mode = 1; /* always incremental if rolling */
878 batch_mode = 1;
879 break;
880
881 case 'l': /* list paths */
882 if (mode != UNKNOWN) usage(argv[0]);
883 mode = LIST;
884 break;
885
886 case 'S': /* sleep time in seconds */
887 sleepmicroseconds = (atof(optarg) * 1000000);
888 break;
889
890 case 'T': /* temporary root directory for search */
891 temp_root_dir = optarg;
892 break;
893
894 case 'd': /* foreground (with -R) */
895 background = 0;
896 break;
897
898 /* This option is deliberately undocumented, for testing only */
899 case 'e': /* add a search term */
900 if (mode != UNKNOWN && mode != SEARCH) usage(argv[0]);
901 query = optarg;
902 mode = SEARCH;
903 break;
904
905 case 'f': /* alternate synclogfile used in SYNCLOG mode */
906 synclogfile = optarg;
907 mode = SYNCLOG;
908 break;
909
910 /* This option is deliberately undocumented, for testing only */
911 case 'm': /* multi-folder in SEARCH mode */
912 if (mode != UNKNOWN && mode != SEARCH) usage(argv[0]);
913 multi_folder = 1;
914 mode = SEARCH;
915 break;
916
917 case 'n': /* sync channel name (with -R) */
918 channel = optarg;
919 break;
920
921 case 'o': /* copy one DB rather than compressing */
922 compact_flags |= SEARCH_COMPACT_COPYONE;
923 break;
924
925 case 'U':
926 compact_flags |= SEARCH_COMPACT_ONLYUPGRADE;
927 break;
928
929 case 'v': /* verbose */
930 verbose++;
931 break;
932
933 case 'r': /* recurse */
934 if (mode != UNKNOWN && mode != INDEXER && mode != AUDIT) usage(argv[0]);
935 recursive_flag = 1;
936 if (mode == UNKNOWN) mode = INDEXER;
937 break;
938
939 case 'i': /* incremental mode */
940 incremental_mode = 1;
941 break;
942
943 case 'a': /* use /squat annotation */
944 if (mode != UNKNOWN && mode != INDEXER) usage(argv[0]);
945 annotation_flag = 1;
946 mode = INDEXER;
947 break;
948
949 case 'z':
950 if (mode != UNKNOWN && mode != COMPACT) usage(argv[0]);
951 desttier = optarg;
952 mode = COMPACT;
953 break;
954
955 case 't':
956 if (mode != UNKNOWN && mode != COMPACT) usage(argv[0]);
957 srctiers = strarray_split(optarg, ",", 0);
958 mode = COMPACT;
959 break;
960
961 case 'u':
962 user_mode = 1;
963 break;
964
965 case 'h':
966 default:
967 usage("squatter");
968 }
969 }
970
971 compact_flags |= SEARCH_VERBOSE(verbose);
972
973 if (mode == UNKNOWN)
974 mode = INDEXER;
975
976 if (mode == COMPACT && (!desttier || !srctiers)) {
977 /* need both src and dest for compact */
978 usage("squatter");
979 }
980
981 cyrus_init(alt_config, "squatter", init_flags, CONFIG_NEED_PARTITION_DATA);
982
983 /* Set namespace -- force standard (internal) */
984 if ((r = mboxname_init_namespace(&squat_namespace, 1)) != 0) {
985 fatal(error_message(r), EX_CONFIG);
986 }
987
988 /* make sure we're correctly configured */
989 if ((r = search_check_config(&errstr))) {
990 if (errstr)
991 fatal(errstr, EX_CONFIG);
992 else
993 fatal(error_message(r), EX_CONFIG);
994 }
995
996 if (mode == ROLLING || mode == SYNCLOG) {
997 signals_set_shutdown(&shut_down);
998 signals_add_handlers(0);
999 }
1000
1001 index_text_extractor_init(NULL);
1002
1003 switch (mode) {
1004 case UNKNOWN:
1005 break;
1006 case INDEXER:
1007 /* -r requires at least one mailbox */
1008 if (recursive_flag && optind == argc) usage(argv[0]);
1009 expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1010 syslog(LOG_NOTICE, "indexing mailboxes");
1011 r = do_indexer(&mboxnames);
1012 syslog(LOG_NOTICE, "done indexing mailboxes");
1013 break;
1014 case SEARCH:
1015 if (recursive_flag && optind == argc) usage(argv[0]);
1016 expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1017 r = do_search(query, !multi_folder, &mboxnames);
1018 break;
1019 case ROLLING:
1020 if (background && !getenv("CYRUS_ISDAEMON"))
1021 become_daemon();
1022 do_rolling(channel);
1023 /* never returns */
1024 break;
1025 case SYNCLOG:
1026 r = do_synclogfile(synclogfile);
1027 break;
1028 case COMPACT:
1029 if (recursive_flag && optind == argc) usage(argv[0]);
1030 expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1031 r = do_compact(&mboxnames, srctiers, desttier, compact_flags);
1032 break;
1033 case AUDIT:
1034 if (recursive_flag && optind == argc) usage(argv[0]);
1035 expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1036 r = do_audit(&mboxnames);
1037 break;
1038 case LIST:
1039 if (recursive_flag && optind == argc) usage(argv[0]);
1040 expand_mboxnames(&mboxnames, argc-optind, (const char **)argv+optind, user_mode);
1041 r = do_list(&mboxnames);
1042 break;
1043 }
1044
1045 strarray_fini(&mboxnames);
1046 shut_down(r ? EX_TEMPFAIL : 0);
1047 }
1048