1 /* Copyright (C) 2004 J.F.Dockes
2  *   This program is free software; you can redistribute it and/or modify
3  *   it under the terms of the GNU General Public License as published by
4  *   the Free Software Foundation; either version 2 of the License, or
5  *   (at your option) any later version.
6  *
7  *   This program is distributed in the hope that it will be useful,
8  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
9  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  *   GNU General Public License for more details.
11  *
12  *   You should have received a copy of the GNU General Public License
13  *   along with this program; if not, write to the
14  *   Free Software Foundation, Inc.,
15  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16  */
17 #include "autoconfig.h"
18 
19 #include <stdio.h>
20 #include <signal.h>
21 #include <errno.h>
22 #include <fnmatch.h>
23 #ifndef _WIN32
24 #include <sys/time.h>
25 #include <sys/resource.h>
26 #else
27 #include <direct.h>
28 #endif
29 #include "safefcntl.h"
30 #include "safeunistd.h"
31 #include <getopt.h>
32 
33 #include <iostream>
34 #include <list>
35 #include <string>
36 #include <cstdlib>
37 
38 using namespace std;
39 
40 #include "log.h"
41 #include "rclinit.h"
42 #include "indexer.h"
43 #include "smallut.h"
44 #include "chrono.h"
45 #include "pathut.h"
46 #include "rclutil.h"
47 #include "rclmon.h"
48 #include "x11mon.h"
49 #include "cancelcheck.h"
50 #include "checkindexed.h"
51 #include "rcldb.h"
52 #include "readfile.h"
53 #ifndef DISABLE_WEB_INDEXER
54 #include "webqueue.h"
55 #endif
56 #include "recollindex.h"
57 #include "fsindexer.h"
58 #ifndef _WIN32
59 #include "rclionice.h"
60 #endif
61 #include "execmd.h"
62 #include "checkretryfailed.h"
63 #include "circache.h"
64 #include "idxdiags.h"
65 
66 // Command line options
67 static int     op_flags;
68 #define OPT_C 0x1
69 #define OPT_c 0x2
70 #define OPT_d 0x4
71 #define OPT_D 0x8
72 #define OPT_E 0x10
73 #define OPT_e 0x20
74 #define OPT_f 0x40
75 #define OPT_h 0x80
76 #define OPT_i 0x200
77 #define OPT_K 0x400
78 #define OPT_k 0x800
79 #define OPT_l 0x1000
80 #define OPT_m 0x2000
81 #define OPT_n 0x4000
82 #define OPT_P 0x8000
83 #define OPT_p 0x10000
84 #define OPT_R 0x20000
85 #define OPT_r 0x40000
86 #define OPT_S 0x80000
87 #define OPT_s 0x100000
88 #define OPT_w 0x200000
89 #define OPT_x 0x400000
90 #define OPT_Z 0x800000
91 #define OPT_z 0x1000000
92 
93 #define OPTVAL_WEBCACHE_COMPACT 1000
94 #define OPTVAL_WEBCACHE_BURST 1001
95 #define OPTVAL_DIAGS_NOTINDEXED 1002
96 #define OPTVAL_DIAGS_DIAGSFILE 1003
97 
98 static struct option long_options[] = {
99     {"webcache-compact", 0, 0, OPTVAL_WEBCACHE_COMPACT},
100     {"webcache-burst", required_argument, 0, OPTVAL_WEBCACHE_BURST},
101     {"notindexed", 0, 0, OPTVAL_DIAGS_NOTINDEXED},
102     {"diagsfile", required_argument, 0, OPTVAL_DIAGS_DIAGSFILE},
103     {0, 0, 0, 0}
104 };
105 
106 ReExec *o_reexec;
107 
108 // Globals for atexit cleanup
109 static ConfIndexer *confindexer;
110 
111 // This is set as an atexit routine,
cleanup()112 static void cleanup()
113 {
114     deleteZ(confindexer);
115     IdxDiags::theDiags().flush();
116     recoll_exitready();
117 }
118 
119 // This holds the state of topdirs (exist+nonempty) on indexing
120 // startup. If it changes after a resume from sleep we interrupt the
121 // indexing (the assumption being that a volume has been mounted or
122 // unmounted while we slept). This is not foolproof as the user can
123 // always pull out a removable volume while we work. It just avoids a
124 // harmful purge in a common case.
125 static vector<string> o_topdirs;
126 static vector<bool> o_topdirs_emptiness;
127 
topdirs_state(vector<bool> tdlstate)128 bool topdirs_state(vector<bool> tdlstate)
129 {
130     tdlstate.clear();
131     for (const auto& dir : o_topdirs) {
132         tdlstate.push_back(path_empty(dir));
133     }
134     return true;
135 }
136 
sigcleanup(int sig)137 static void sigcleanup(int sig)
138 {
139     if (sig == RCLSIG_RESUME) {
140         vector<bool> emptiness;
141         topdirs_state(emptiness);
142         if (emptiness != o_topdirs_emptiness) {
143             string msg = "Recollindex: resume: topdirs state changed while "
144                 "we were sleeping\n";
145             cerr << msg;
146             LOGDEB(msg);
147             CancelCheck::instance().setCancel();
148             stopindexing = 1;
149         }
150     } else {
151         cerr << "Recollindex: got signal " << sig << ", registering stop request\n";
152         LOGDEB("Got signal " << sig << ", registering stop request\n");
153         CancelCheck::instance().setCancel();
154         stopindexing = 1;
155     }
156 }
157 
makeIndexerOrExit(RclConfig * config,bool inPlaceReset)158 static void makeIndexerOrExit(RclConfig *config, bool inPlaceReset)
159 {
160     if (!confindexer) {
161         confindexer = new ConfIndexer(config);
162         if (inPlaceReset)
163             confindexer->setInPlaceReset();
164     }
165     if (!confindexer) {
166         cerr << "Cannot create indexer" << endl;
167         exit(1);
168     }
169 }
170 
171 // Adjust IO priority (if available), and also Linux Out-Of-Memory killer badness (idem)
rclIxIonice(const RclConfig * config)172 void rclIxIonice(const RclConfig *config)
173 {
174     PRETEND_USE(config);
175 #ifndef _WIN32
176     string clss, classdata;
177     if (!config->getConfParam("monioniceclass", clss) || clss.empty())
178         clss = "3";
179     // Classdata may be empty (must be for idle class)
180     config->getConfParam("monioniceclassdata", classdata);
181     rclionice(clss, classdata);
182 
183     std::string choompath;
184     if (ExecCmd::which("choom", choompath) && !choompath.empty()) {
185         std::string oomadj = "300";
186         config->getConfParam("oomadj", oomadj);
187         std::string spid = lltodecstr(getpid());
188         ExecCmd cmd;
189         std::string msg;
190         cmd.doexec(choompath, {"-n", oomadj, "-p", spid}, nullptr, &msg);
191         LOGDEB("rclIxIonice: oomadj output: " << msg);
192     }
193 
194 #endif
195 }
196 
setMyPriority(const RclConfig * config)197 static void setMyPriority(const RclConfig *config)
198 {
199     PRETEND_USE(config);
200 #ifndef _WIN32
201     int prio{19};
202     std::string sprio;
203     config->getConfParam("idxniceprio", sprio);
204     if (!sprio.empty()) {
205         prio = atoi(sprio.c_str());
206     }
207     if (setpriority(PRIO_PROCESS, 0, prio) != 0) {
208         LOGINFO("recollindex: can't setpriority(), errno " << errno << "\n");
209     }
210     // Try to ionice. This does not work on all platforms
211     rclIxIonice(config);
212 #endif
213 }
214 
215 
216 class MakeListWalkerCB : public FsTreeWalkerCB {
217 public:
MakeListWalkerCB(list<string> & files,const vector<string> & selpats)218     MakeListWalkerCB(list<string>& files, const vector<string>& selpats)
219         : m_files(files), m_pats(selpats) {}
processone(const string & fn,const struct PathStat *,FsTreeWalker::CbFlag flg)220     virtual FsTreeWalker::Status processone(
221         const string& fn, const struct PathStat *, FsTreeWalker::CbFlag flg) {
222         if (flg== FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwRegular){
223             if (m_pats.empty()) {
224                 m_files.push_back(fn);
225             } else {
226                 for (const auto& pat : m_pats) {
227                     if (fnmatch(pat.c_str(), fn.c_str(), 0) == 0) {
228                         m_files.push_back(fn);
229                         break;
230                     }
231                 }
232             }
233         }
234         return FsTreeWalker::FtwOk;
235     }
236     list<string>& m_files;
237     const vector<string>& m_pats;
238 };
239 
240 // Build a list of things to index, then call purgefiles and/or
241 // indexfiles.  This is basically the same as find xxx | recollindex
242 // -i [-e] without the find (so, simpler but less powerful)
recursive_index(RclConfig * config,const string & top,const vector<string> & selpats)243 bool recursive_index(RclConfig *config, const string& top,
244                      const vector<string>& selpats)
245 {
246     list<string> files;
247     MakeListWalkerCB cb(files, selpats);
248     FsTreeWalker walker;
249     walker.walk(top, cb);
250     bool ret = false;
251     if (op_flags & OPT_e) {
252         if (!(ret = purgefiles(config, files))) {
253             return ret;
254         }
255     }
256     if (!(op_flags & OPT_e) || ((op_flags & OPT_e) &&(op_flags & OPT_i))) {
257         ret = indexfiles(config, files);
258     }
259     return ret;
260 }
261 
262 // Index a list of files. We just call the top indexer method, which
263 // will sort out what belongs to the indexed trees and call the
264 // appropriate indexers.
265 //
266 // This is called either from the command line or from the monitor. In
267 // this case we're called repeatedly in the same process, and the
268 // confindexer is only created once by makeIndexerOrExit (but the db closed and
269 // flushed every time)
indexfiles(RclConfig * config,list<string> & filenames)270 bool indexfiles(RclConfig *config, list<string> &filenames)
271 {
272     if (filenames.empty())
273         return true;
274     makeIndexerOrExit(config, (op_flags & OPT_Z) != 0);
275     // The default is to retry failed files
276     int indexerFlags = ConfIndexer::IxFNone;
277     if (op_flags & OPT_K)
278         indexerFlags |= ConfIndexer::IxFNoRetryFailed;
279     if (op_flags & OPT_f)
280         indexerFlags |= ConfIndexer::IxFIgnoreSkip;
281     if (op_flags & OPT_P) {
282         indexerFlags |= ConfIndexer::IxFDoPurge;
283     }
284     return confindexer->indexFiles(filenames, indexerFlags);
285 }
286 
287 // Delete a list of files. Same comments about call contexts as indexfiles.
purgefiles(RclConfig * config,list<string> & filenames)288 bool purgefiles(RclConfig *config, list<string> &filenames)
289 {
290     if (filenames.empty())
291         return true;
292     makeIndexerOrExit(config, (op_flags & OPT_Z) != 0);
293     return confindexer->purgeFiles(filenames, ConfIndexer::IxFNone);
294 }
295 
296 // Create stemming and spelling databases
createAuxDbs(RclConfig * config)297 bool createAuxDbs(RclConfig *config)
298 {
299     makeIndexerOrExit(config, false);
300 
301     if (!confindexer->createStemmingDatabases())
302         return false;
303 
304     if (!confindexer->createAspellDict())
305         return false;
306 
307     return true;
308 }
309 
310 // Create additional stem database
createstemdb(RclConfig * config,const string & lang)311 static bool createstemdb(RclConfig *config, const string &lang)
312 {
313     makeIndexerOrExit(config, false);
314     return confindexer->createStemDb(lang);
315 }
316 
317 // Check that topdir entries are valid (successful tilde exp + abs
318 // path) or fail.
319 // In addition, topdirs, skippedPaths, daemSkippedPaths entries should
320 // match existing files or directories. Warn if they don't
checktopdirs(RclConfig * config,vector<string> & nonexist)321 static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
322 {
323     if (!config->getConfParam("topdirs", &o_topdirs)) {
324         cerr << "No 'topdirs' parameter in configuration\n";
325         LOGERR("recollindex:No 'topdirs' parameter in configuration\n");
326         return false;
327     }
328 
329     // If a restricted list for real-time monitoring exists check that
330     // all entries are descendants from a topdir
331     vector<string> mondirs;
332     if (config->getConfParam("monitordirs", &mondirs)) {
333         for (const auto& sub : mondirs) {
334             bool found{false};
335             for (const auto& top : o_topdirs) {
336                 if (path_isdesc(top, sub)) {
337                     found = true;
338                     break;
339                 }
340             }
341             if (!found) {
342                 string s("Real time monitoring directory entry " + sub +
343                          " is not part of the topdirs tree\n");
344                 cerr << s;
345                 LOGERR(s);
346                 return false;
347             }
348         }
349     }
350 
351     bool onegood{false};
352     for (auto& dir : o_topdirs) {
353         dir = path_tildexpand(dir);
354         if (!dir.size() || !path_isabsolute(dir)) {
355             if (dir[0] == '~') {
356                 cerr << "Tilde expansion failed: " << dir << endl;
357                 LOGERR("recollindex: tilde expansion failed: " << dir << "\n");
358             } else {
359                 cerr << "Not an absolute path: " << dir << endl;
360                 LOGERR("recollindex: not an absolute path: " << dir << "\n");
361             }
362             return false;
363         }
364         if (!path_exists(dir)) {
365             nonexist.push_back(dir);
366         } else {
367             onegood = true;
368         }
369     }
370     topdirs_state(o_topdirs_emptiness);
371 
372     // We'd like to check skippedPaths too, but these are wildcard
373     // exprs, so reasonably can't
374 
375     return onegood;
376 }
377 
378 
379 string thisprog;
380 
381 static const char usage [] =
382 "\n"
383 "recollindex [-h] \n"
384 "    Print help\n"
385 "recollindex [-z|-Z] [-k]\n"
386 "    Index everything according to configuration file\n"
387 "    -z : reset database before starting indexing\n"
388 "    -Z : in place reset: consider all documents as changed. Can also\n"
389 "         be combined with -i or -r but not -m\n"
390 "    -k : retry files on which we previously failed\n"
391 "    --diagsfile <outputpath> : list skipped or otherwise not indexed documents to <outputpath>\n"
392 "       <outputpath> will be truncated\n"
393 #ifdef RCL_MONITOR
394 "recollindex -m [-w <secs>] -x [-D] [-C]\n"
395 "    Perform real time indexing. Don't become a daemon if -D is set.\n"
396 "    -w sets number of seconds to wait before starting.\n"
397 "    -C disables monitoring config for changes/reexecuting.\n"
398 "    -n disables initial incremental indexing (!and purge!).\n"
399 #ifndef DISABLE_X11MON
400 "    -x disables exit on end of x11 session\n"
401 #endif /* DISABLE_X11MON */
402 #endif /* RCL_MONITOR */
403 "recollindex -e [<filepath [path ...]>]\n"
404 "    Purge data for individual files. No stem database updates.\n"
405 "    Reads paths on stdin if none is given as argument.\n"
406 "recollindex -i [-f] [-Z] [<filepath [path ...]>]\n"
407 "    Index individual files. No database purge or stem database updates\n"
408 "    Will read paths on stdin if none is given as argument\n"
409 "    -f : ignore skippedPaths and skippedNames while doing this\n"
410 "recollindex -r [-K] [-f] [-Z] [-p pattern] <top> \n"
411 "   Recursive partial reindex. \n"
412 "     -p : filter file names, multiple instances are allowed, e.g.: \n"
413 "        -p *.odt -p *.pdf\n"
414 "     -K : skip previously failed files (they are retried by default)\n"
415 "recollindex -l\n"
416 "    List available stemming languages\n"
417 "recollindex -s <lang>\n"
418 "    Build stem database for additional language <lang>\n"
419 "recollindex -E\n"
420 "    Check configuration file for topdirs and other paths existence\n"
421 "recollindex --webcache-compact : recover wasted space from the Web cache\n"
422 "recollindex --webcache-burst <targetdir> : extract entries from the Web cache to the target\n"
423 "recollindex --notindexed [filepath [filepath ...]] : check if the file arguments are indexed\n"
424 "   will read file paths from stdin if there are no arguments\n"
425 #ifdef FUTURE_IMPROVEMENT
426 "recollindex -W\n"
427 "    Process the Web queue\n"
428 #endif
429 #ifdef RCL_USE_ASPELL
430 "recollindex -S\n"
431 "    Build aspell spelling dictionary.>\n"
432 #endif
433 "Common options:\n"
434 "    -c <configdir> : specify config directory, overriding $RECOLL_CONFDIR\n"
435 #if defined(HAVE_POSIX_FADVISE)
436 "    -d : call fadvise() with the POSIX_FADV_DONTNEED flag on indexed files\n"
437 "          (avoids trashing the page cache)\n";
438 #endif
439 ;
440 
Usage()441 static void Usage()
442 {
443     FILE *fp = (op_flags & OPT_h) ? stdout : stderr;
444     fprintf(fp, "%s: Usage: %s", path_getsimple(thisprog).c_str(), usage);
445     fprintf(fp, "Recoll version: %s\n", Rcl::version_string().c_str());
446     exit((op_flags & OPT_h)==0);
447 }
448 
449 static RclConfig *config;
450 
lockorexit(Pidfile * pidfile,RclConfig * config)451 static void lockorexit(Pidfile *pidfile, RclConfig *config)
452 {
453     PRETEND_USE(config);
454     pid_t pid;
455     if ((pid = pidfile->open()) != 0) {
456         if (pid > 0) {
457             cerr << "Can't become exclusive indexer: " << pidfile->getreason()
458                  << ". Return (other pid?): " << pid << endl;
459 #ifndef _WIN32
460             // Have a look at the status file. If the other process is
461             // a monitor we can tell it to start an incremental pass
462             // by touching the configuration file
463             DbIxStatus status;
464             readIdxStatus(config, status);
465             if (status.hasmonitor) {
466                 string cmd("touch ");
467                 string path = path_cat(config->getConfDir(), "recoll.conf");
468                 cmd += path;
469                 int status;
470                 if ((status = system(cmd.c_str()))) {
471                     cerr << cmd << " failed with status " << status << endl;
472                 } else {
473                     cerr << "Monitoring indexer process was notified of "
474                         "indexing request\n";
475                 }
476             }
477 #endif
478         } else {
479             cerr << "Can't become exclusive indexer: " << pidfile->getreason()
480                  << endl;
481         }
482         exit(1);
483     }
484     if (pidfile->write_pid() != 0) {
485         cerr << "Can't become exclusive indexer: " << pidfile->getreason() <<
486             endl;
487         exit(1);
488     }
489 }
490 
491 static string reasonsfile;
492 extern ConfSimple idxreasons;
flushIdxReasons()493 static void flushIdxReasons()
494 {
495     if (reasonsfile.empty())
496         return;
497     if (reasonsfile == "stdout") {
498         idxreasons.write(cout);
499     } else if (reasonsfile == "stderr") {
500         idxreasons.write(std::cerr);
501     } else {
502         ofstream out;
503         try {
504             out.open(reasonsfile, ofstream::out|ofstream::trunc);
505             idxreasons.write(out);
506         } catch (...) {
507             std::cerr << "Could not write reasons file " << reasonsfile << endl;
508             idxreasons.write(std::cerr);
509         }
510     }
511 }
512 
513 // With more recent versions of mingw, we could use -municode to
514 // enable wmain.  Another workaround is to use main, then call
515 // GetCommandLineW and CommandLineToArgvW, to then call wmain(). If
516 // ever we need to build with mingw again.
517 #if defined(_WIN32) && defined(_MSC_VER)
518 #define USE_WMAIN 1
519 #endif
520 
521 #if USE_WMAIN
522 #define WARGTOSTRING(w) wchartoutf8(w)
argstovector(int argc,wchar_t ** argv,vector<string> & storage)523 static vector<const char*> argstovector(int argc, wchar_t **argv, vector<string>& storage)
524 #else
525 #define WARGTOSTRING(w) (w)
526     static vector<const char*> argstovector(int argc, char **argv, vector<string>& storage)
527 #endif
528 {
529     vector<const char *> args(argc+1);
530     storage.resize(argc+1);
531     thisprog = path_absolute(WARGTOSTRING(argv[0]));
532     for (int i = 0; i < argc; i++) {
533         storage[i] = WARGTOSTRING(argv[i]);
534         args[i] = storage[i].c_str();
535     }
536     return args;
537 }
538 
539 
540 // Working directory before we change: it's simpler to change early
541 // but some options need the original for computing absolute paths.
542 static std::string orig_cwd;
543 
544 // A bit of history: it's difficult to pass non-ASCII parameters
545 // (e.g. path names) on the command line under Windows without using
546 // Unicode. It was first thought possible to use a temporary file to
547 // hold the args, and make sure that the path for this would be ASCII,
548 // based on using shortpath(). Unfortunately, this does not work in
549 // all cases, so the second change was to use wmain(). The
550 // args-in-file was removed quite a long time after.
551 #if USE_WMAIN
wmain(int argc,wchar_t * argv[])552 int wmain(int argc, wchar_t *argv[])
553 #else
554 int main(int argc, char *argv[])
555 #endif
556 {
557 #ifndef _WIN32
558     // The reexec struct is used by the daemon to shed memory after
559     // the initial indexing pass and to restart when the configuration
560     // changes
561     o_reexec = new ReExec;
562     o_reexec->init(argc, argv);
563 #endif
564 
565     // Only actually useful on Windows: convert wargs to utf-8 chars
566     vector<string> astore;
567     vector<const char*> args = argstovector(argc, argv, astore);
568 
569     vector<string> selpatterns;
570     int sleepsecs{60};
571     string a_config;
572     int ret;
573     bool webcache_compact{false};
574     bool webcache_burst{false};
575     bool diags_notindexed{false};
576 
577     std::string burstdir;
578     std::string diagsfile;
579     while ((ret = getopt_long(argc, (char *const*)&args[0], "c:CDdEefhikKlmnPp:rR:sS:w:xZz",
580                               long_options, NULL)) != -1) {
581         switch (ret) {
582         case 'c':  op_flags |= OPT_c; a_config = optarg; break;
583 #ifdef RCL_MONITOR
584         case 'C': op_flags |= OPT_C; break;
585         case 'D': op_flags |= OPT_D; break;
586 #endif
587 #if defined(HAVE_POSIX_FADVISE)
588         case 'd': op_flags |= OPT_d; break;
589 #endif
590         case 'E': op_flags |= OPT_E; break;
591         case 'e': op_flags |= OPT_e; break;
592         case 'f': op_flags |= OPT_f; break;
593         case 'h': op_flags |= OPT_h; break;
594         case 'i': op_flags |= OPT_i; break;
595         case 'k': op_flags |= OPT_k; break;
596         case 'K': op_flags |= OPT_K; break;
597         case 'l': op_flags |= OPT_l; break;
598         case 'm': op_flags |= OPT_m; break;
599         case 'n': op_flags |= OPT_n; break;
600         case 'P': op_flags |= OPT_P; break;
601         case 'p': op_flags |= OPT_p; selpatterns.push_back(optarg); break;
602         case 'r': op_flags |= OPT_r; break;
603         case 'R':   op_flags |= OPT_R; reasonsfile = optarg; break;
604         case 's': op_flags |= OPT_s; break;
605 #ifdef RCL_USE_ASPELL
606         case 'S': op_flags |= OPT_S; break;
607 #endif
608         case 'w':   op_flags |= OPT_w;
609             if ((sscanf(optarg, "%d", &sleepsecs)) != 1)
610                 Usage();
611             break;
612         case 'x': op_flags |= OPT_x; break;
613         case 'Z': op_flags |= OPT_Z; break;
614         case 'z': op_flags |= OPT_z; break;
615 
616         case OPTVAL_WEBCACHE_COMPACT: webcache_compact = true; break;
617         case OPTVAL_WEBCACHE_BURST: burstdir = optarg; webcache_burst = true;break;
618         case OPTVAL_DIAGS_NOTINDEXED: diags_notindexed = true;break;
619         case OPTVAL_DIAGS_DIAGSFILE: diagsfile = optarg;break;
620         default: Usage(); break;
621         }
622     }
623     int aremain = argc - optind;
624 
625     if (op_flags & OPT_h)
626         Usage();
627 
628 #ifndef RCL_MONITOR
629     if (op_flags & (OPT_m | OPT_w|OPT_x)) {
630         std::cerr << "-m not available: real-time monitoring was not "
631             "configured in this build\n";
632         exit(1);
633     }
634 #endif
635 
636     if ((op_flags & OPT_z) && (op_flags & (OPT_i|OPT_e|OPT_r)))
637         Usage();
638     if ((op_flags & OPT_Z) && (op_flags & (OPT_m)))
639         Usage();
640     if ((op_flags & OPT_E) && (op_flags & ~(OPT_E|OPT_c))) {
641         Usage();
642     }
643 
644     string reason;
645     int flags = RCLINIT_IDX;
646     if ((op_flags & OPT_m) && !(op_flags&OPT_D)) {
647         flags |= RCLINIT_DAEMON;
648     }
649     config = recollinit(flags, cleanup, sigcleanup, reason, &a_config);
650     if (config == 0 || !config->ok()) {
651         addIdxReason("init", reason);
652         flushIdxReasons();
653         std::cerr << "Configuration problem: " << reason << endl;
654         exit(1);
655     }
656 
657     // Auxiliary, non-index-related things. Avoids having a separate binary.
658     if (webcache_compact || webcache_burst || diags_notindexed) {
659         std::string ccdir = config->getWebcacheDir();
660         std::string reason;
661         if (webcache_compact) {
662             if (!CirCache::compact(ccdir, &reason)) {
663                 std::cerr << "Web cache compact failed: " << reason << "\n";
664                 exit(1);
665             }
666         } else if (webcache_burst) {
667             if (!CirCache::burst(ccdir, burstdir, &reason)) {
668                 std::cerr << "Web cache burst failed: " << reason << "\n";
669                 exit(1);
670             }
671         } else if (diags_notindexed) {
672             std::vector<std::string> filepaths;
673             while (aremain--) {
674                 filepaths.push_back(args[optind++]);
675             }
676             if (!checkindexed(config, filepaths)) {
677                 exit(1);
678             }
679         }
680 
681         exit(0);
682     }
683 
684 #ifndef _WIN32
685     o_reexec->atexit(cleanup);
686 #endif
687 
688     vector<string> nonexist;
689     if (!checktopdirs(config, nonexist)) {
690         std::cerr << "topdirs not set or only contains invalid paths.\n";
691         addIdxReason("init", "topdirs not set or only contains invalid paths.");
692         flushIdxReasons();
693         exit(1);
694     }
695 
696     if (nonexist.size()) {
697         ostream& out = (op_flags & OPT_E) ? cout : cerr;
698         if (!(op_flags & OPT_E)) {
699             cerr << "Warning: invalid paths in topdirs, skippedPaths or "
700                 "daemSkippedPaths:\n";
701         }
702         for (const auto& entry : nonexist) {
703             out << entry << endl;
704         }
705     }
706     if ((op_flags & OPT_E)) {
707         exit(0);
708     }
709 
710     if (op_flags & OPT_l) {
711         if (aremain != 0)
712             Usage();
713         vector<string> stemmers = ConfIndexer::getStemmerNames();
714         for (const auto& stemmer : stemmers) {
715             cout << stemmer << endl;
716         }
717         exit(0);
718     }
719 
720     orig_cwd = path_cwd();
721     string rundir;
722     config->getConfParam("idxrundir", rundir);
723     if (!rundir.empty()) {
724         if (!rundir.compare("tmp")) {
725             rundir = tmplocation();
726         }
727         LOGINFO("recollindex: changing current directory to [" <<rundir<<"]\n");
728         if (!path_chdir(rundir)) {
729             LOGSYSERR("main", "chdir", rundir);
730         }
731     }
732 
733     if (!diagsfile.empty()) {
734         if (!IdxDiags::theDiags().init(diagsfile)) {
735             std::cerr << "Could not initialize diags file " << diagsfile << "\n";
736             LOGERR("recollindex: Could not initialize diags file " << diagsfile << "\n");
737         }
738     }
739     bool rezero((op_flags & OPT_z) != 0);
740     bool inPlaceReset((op_flags & OPT_Z) != 0);
741 
742     // The default is not to retry previously failed files by default.
743     // If -k is set, we do.
744     // If the checker script says so, we do too, except if -K is set.
745     int indexerFlags = ConfIndexer::IxFNoRetryFailed;
746     if (op_flags & OPT_k) {
747         indexerFlags &= ~ConfIndexer::IxFNoRetryFailed;
748     } else {
749         if (op_flags & OPT_K) {
750             indexerFlags |= ConfIndexer::IxFNoRetryFailed;
751         } else {
752             if (checkRetryFailed(config, false)) {
753                 indexerFlags &= ~ConfIndexer::IxFNoRetryFailed;
754             } else {
755                 indexerFlags |= ConfIndexer::IxFNoRetryFailed;
756             }
757         }
758     }
759     if (indexerFlags & ConfIndexer::IxFNoRetryFailed) {
760         LOGDEB("recollindex: files in error will not be retried\n");
761     } else {
762         LOGDEB("recollindex: files in error will be retried\n");
763     }
764 
765 #if defined(HAVE_POSIX_FADVISE)
766     if (op_flags & OPT_d) {
767         indexerFlags |= ConfIndexer::IxFCleanCache;
768     }
769 #endif
770 
771     Pidfile pidfile(config->getPidfile());
772     lockorexit(&pidfile, config);
773 
774     // Log something at LOGINFO to reset the trace file. Else at level
775     // 3 it's not even truncated if all docs are up to date.
776     LOGINFO("recollindex: starting up\n");
777     setMyPriority(config);
778 
779     // Init status updater
780     if (nullptr == statusUpdater(config, op_flags & OPT_x)) {
781         std::cerr << "Could not initialize status updater\n";
782         LOGERR("Could not initialize status updater\n");
783         exit(1);
784     }
785     statusUpdater()->update(DbIxStatus::DBIXS_NONE, "");
786 
787     if (op_flags & OPT_r) {
788         if (aremain != 1)
789             Usage();
790         string top = args[optind++]; aremain--;
791         top = path_canon(top, &orig_cwd);
792         bool status = recursive_index(config, top, selpatterns);
793         if (confindexer && !confindexer->getReason().empty()) {
794             addIdxReason("indexer", confindexer->getReason());
795             cerr << confindexer->getReason() << endl;
796         }
797         flushIdxReasons();
798         exit(status ? 0 : 1);
799     } else if (op_flags & (OPT_i|OPT_e)) {
800         list<string> filenames;
801         if (aremain == 0) {
802             // Read from stdin
803             char line[1024];
804             while (fgets(line, 1023, stdin)) {
805                 string sl(line);
806                 trimstring(sl, "\n\r");
807                 filenames.push_back(sl);
808             }
809         } else {
810             while (aremain--) {
811                 filenames.push_back(args[optind++]);
812             }
813         }
814 
815         // Note that -e and -i may be both set. In this case we first erase,
816         // then index. This is a slightly different from -Z -i because we
817         // warranty that all subdocs are purged.
818         bool status = true;
819         if (op_flags & OPT_e) {
820             status = purgefiles(config, filenames);
821         }
822         if (status && (op_flags & OPT_i)) {
823             status = indexfiles(config, filenames);
824         }
825         if (confindexer && !confindexer->getReason().empty()) {
826             addIdxReason("indexer", confindexer->getReason());
827             cerr << confindexer->getReason() << endl;
828         }
829         flushIdxReasons();
830         exit(status ? 0 : 1);
831     } else if (op_flags & OPT_s) {
832         if (aremain != 1)
833             Usage();
834         string lang = args[optind++]; aremain--;
835         exit(!createstemdb(config, lang));
836 
837 #ifdef RCL_USE_ASPELL
838     } else if (op_flags & OPT_S) {
839         makeIndexerOrExit(config, false);
840         exit(!confindexer->createAspellDict());
841 #endif // ASPELL
842 
843 #ifdef RCL_MONITOR
844     } else if (op_flags & OPT_m) {
845         if (aremain != 0)
846             Usage();
847         statusUpdater()->setMonitor(true);
848         if (!(op_flags&OPT_D)) {
849             LOGDEB("recollindex: daemonizing\n");
850 #ifndef _WIN32
851             if (daemon(0,0) != 0) {
852                 addIdxReason("monitor", "daemon() failed");
853                 cerr << "daemon() failed, errno " << errno << endl;
854                 LOGERR("daemon() failed, errno " << errno << "\n");
855                 flushIdxReasons();
856                 exit(1);
857             }
858 #endif
859         }
860         // Need to rewrite pid, it changed
861         pidfile.write_pid();
862         // Not too sure if I have to redo the nice thing after daemon(),
863         // can't hurt anyway (easier than testing on all platforms...)
864         setMyPriority(config);
865 
866         if (sleepsecs > 0) {
867             LOGDEB("recollindex: sleeping " << sleepsecs << "\n");
868             for (int i = 0; i < sleepsecs; i++) {
869                 sleep(1);
870                 // Check that x11 did not go away while we were sleeping.
871                 if (!(op_flags & OPT_x) && !x11IsAlive()) {
872                     LOGDEB("X11 session went away during initial sleep period\n");
873                     exit(0);
874                 }
875             }
876         }
877 
878         if (!(op_flags & OPT_n)) {
879             makeIndexerOrExit(config, inPlaceReset);
880             LOGDEB("Recollindex: initial indexing pass before monitoring\n");
881             if (!confindexer->index(rezero, ConfIndexer::IxTAll, indexerFlags)
882                 || stopindexing) {
883                 LOGERR("recollindex, initial indexing pass failed, "
884                        "not going into monitor mode\n");
885                 flushIdxReasons();
886                 exit(1);
887             } else {
888                 // Record success of indexing pass with failed files retries.
889                 if (!(indexerFlags & ConfIndexer::IxFNoRetryFailed)) {
890                     checkRetryFailed(config, true);
891                 }
892             }
893             deleteZ(confindexer);
894 #ifndef _WIN32
895             o_reexec->insertArgs(vector<string>(1, "-n"));
896             LOGINFO("recollindex: reexecuting with -n after initial full "
897                     "pass\n");
898             // Note that -n will be inside the reexec when we come
899             // back, but the monitor will explicitly strip it before
900             // starting a config change exec to ensure that we do a
901             // purging pass in this latter case (full restart).
902             o_reexec->reexec();
903 #endif
904         }
905 
906         statusUpdater()->update(DbIxStatus::DBIXS_MONITOR, "");
907 
908         int opts = RCLMON_NONE;
909         if (op_flags & OPT_D)
910             opts |= RCLMON_NOFORK;
911         if (op_flags & OPT_C)
912             opts |= RCLMON_NOCONFCHECK;
913         if (op_flags & OPT_x)
914             opts |= RCLMON_NOX11;
915         bool monret = startMonitor(config, opts);
916         MONDEB(("Monitor returned %d, exiting\n", monret));
917         exit(monret == false);
918 #endif // MONITOR
919 
920     }
921 
922     makeIndexerOrExit(config, inPlaceReset);
923     bool status = confindexer->index(rezero, ConfIndexer::IxTAll, indexerFlags);
924     // Record success of indexing pass with failed files retries.
925     if (status && !(indexerFlags & ConfIndexer::IxFNoRetryFailed)) {
926         checkRetryFailed(config, true);
927     }
928     if (!status)
929         cerr << "Indexing failed" << endl;
930     if (!confindexer->getReason().empty()) {
931         addIdxReason("indexer", confindexer->getReason());
932         cerr << confindexer->getReason() << endl;
933     }
934     statusUpdater()->update(DbIxStatus::DBIXS_DONE, "");
935     flushIdxReasons();
936     return !status;
937 }
938