1 /* Copyright (C) 2004 J.F.Dockes
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the
14 * Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 */
17 #include "autoconfig.h"
18
19 #include <stdio.h>
20 #include <signal.h>
21 #include <errno.h>
22 #include <fnmatch.h>
23 #ifndef _WIN32
24 #include <sys/time.h>
25 #include <sys/resource.h>
26 #else
27 #include <direct.h>
28 #endif
29 #include "safefcntl.h"
30 #include "safeunistd.h"
31 #include <getopt.h>
32
33 #include <iostream>
34 #include <list>
35 #include <string>
36 #include <cstdlib>
37
38 using namespace std;
39
40 #include "log.h"
41 #include "rclinit.h"
42 #include "indexer.h"
43 #include "smallut.h"
44 #include "chrono.h"
45 #include "pathut.h"
46 #include "rclutil.h"
47 #include "rclmon.h"
48 #include "x11mon.h"
49 #include "cancelcheck.h"
50 #include "checkindexed.h"
51 #include "rcldb.h"
52 #include "readfile.h"
53 #ifndef DISABLE_WEB_INDEXER
54 #include "webqueue.h"
55 #endif
56 #include "recollindex.h"
57 #include "fsindexer.h"
58 #ifndef _WIN32
59 #include "rclionice.h"
60 #endif
61 #include "execmd.h"
62 #include "checkretryfailed.h"
63 #include "circache.h"
64 #include "idxdiags.h"
65
66 // Command line options
67 static int op_flags;
68 #define OPT_C 0x1
69 #define OPT_c 0x2
70 #define OPT_d 0x4
71 #define OPT_D 0x8
72 #define OPT_E 0x10
73 #define OPT_e 0x20
74 #define OPT_f 0x40
75 #define OPT_h 0x80
76 #define OPT_i 0x200
77 #define OPT_K 0x400
78 #define OPT_k 0x800
79 #define OPT_l 0x1000
80 #define OPT_m 0x2000
81 #define OPT_n 0x4000
82 #define OPT_P 0x8000
83 #define OPT_p 0x10000
84 #define OPT_R 0x20000
85 #define OPT_r 0x40000
86 #define OPT_S 0x80000
87 #define OPT_s 0x100000
88 #define OPT_w 0x200000
89 #define OPT_x 0x400000
90 #define OPT_Z 0x800000
91 #define OPT_z 0x1000000
92
93 #define OPTVAL_WEBCACHE_COMPACT 1000
94 #define OPTVAL_WEBCACHE_BURST 1001
95 #define OPTVAL_DIAGS_NOTINDEXED 1002
96 #define OPTVAL_DIAGS_DIAGSFILE 1003
97
98 static struct option long_options[] = {
99 {"webcache-compact", 0, 0, OPTVAL_WEBCACHE_COMPACT},
100 {"webcache-burst", required_argument, 0, OPTVAL_WEBCACHE_BURST},
101 {"notindexed", 0, 0, OPTVAL_DIAGS_NOTINDEXED},
102 {"diagsfile", required_argument, 0, OPTVAL_DIAGS_DIAGSFILE},
103 {0, 0, 0, 0}
104 };
105
106 ReExec *o_reexec;
107
108 // Globals for atexit cleanup
109 static ConfIndexer *confindexer;
110
111 // This is set as an atexit routine,
cleanup()112 static void cleanup()
113 {
114 deleteZ(confindexer);
115 IdxDiags::theDiags().flush();
116 recoll_exitready();
117 }
118
119 // This holds the state of topdirs (exist+nonempty) on indexing
120 // startup. If it changes after a resume from sleep we interrupt the
121 // indexing (the assumption being that a volume has been mounted or
122 // unmounted while we slept). This is not foolproof as the user can
123 // always pull out a removable volume while we work. It just avoids a
124 // harmful purge in a common case.
125 static vector<string> o_topdirs;
126 static vector<bool> o_topdirs_emptiness;
127
topdirs_state(vector<bool> tdlstate)128 bool topdirs_state(vector<bool> tdlstate)
129 {
130 tdlstate.clear();
131 for (const auto& dir : o_topdirs) {
132 tdlstate.push_back(path_empty(dir));
133 }
134 return true;
135 }
136
sigcleanup(int sig)137 static void sigcleanup(int sig)
138 {
139 if (sig == RCLSIG_RESUME) {
140 vector<bool> emptiness;
141 topdirs_state(emptiness);
142 if (emptiness != o_topdirs_emptiness) {
143 string msg = "Recollindex: resume: topdirs state changed while "
144 "we were sleeping\n";
145 cerr << msg;
146 LOGDEB(msg);
147 CancelCheck::instance().setCancel();
148 stopindexing = 1;
149 }
150 } else {
151 cerr << "Recollindex: got signal " << sig << ", registering stop request\n";
152 LOGDEB("Got signal " << sig << ", registering stop request\n");
153 CancelCheck::instance().setCancel();
154 stopindexing = 1;
155 }
156 }
157
makeIndexerOrExit(RclConfig * config,bool inPlaceReset)158 static void makeIndexerOrExit(RclConfig *config, bool inPlaceReset)
159 {
160 if (!confindexer) {
161 confindexer = new ConfIndexer(config);
162 if (inPlaceReset)
163 confindexer->setInPlaceReset();
164 }
165 if (!confindexer) {
166 cerr << "Cannot create indexer" << endl;
167 exit(1);
168 }
169 }
170
171 // Adjust IO priority (if available), and also Linux Out-Of-Memory killer badness (idem)
rclIxIonice(const RclConfig * config)172 void rclIxIonice(const RclConfig *config)
173 {
174 PRETEND_USE(config);
175 #ifndef _WIN32
176 string clss, classdata;
177 if (!config->getConfParam("monioniceclass", clss) || clss.empty())
178 clss = "3";
179 // Classdata may be empty (must be for idle class)
180 config->getConfParam("monioniceclassdata", classdata);
181 rclionice(clss, classdata);
182
183 std::string choompath;
184 if (ExecCmd::which("choom", choompath) && !choompath.empty()) {
185 std::string oomadj = "300";
186 config->getConfParam("oomadj", oomadj);
187 std::string spid = lltodecstr(getpid());
188 ExecCmd cmd;
189 std::string msg;
190 cmd.doexec(choompath, {"-n", oomadj, "-p", spid}, nullptr, &msg);
191 LOGDEB("rclIxIonice: oomadj output: " << msg);
192 }
193
194 #endif
195 }
196
setMyPriority(const RclConfig * config)197 static void setMyPriority(const RclConfig *config)
198 {
199 PRETEND_USE(config);
200 #ifndef _WIN32
201 int prio{19};
202 std::string sprio;
203 config->getConfParam("idxniceprio", sprio);
204 if (!sprio.empty()) {
205 prio = atoi(sprio.c_str());
206 }
207 if (setpriority(PRIO_PROCESS, 0, prio) != 0) {
208 LOGINFO("recollindex: can't setpriority(), errno " << errno << "\n");
209 }
210 // Try to ionice. This does not work on all platforms
211 rclIxIonice(config);
212 #endif
213 }
214
215
216 class MakeListWalkerCB : public FsTreeWalkerCB {
217 public:
MakeListWalkerCB(list<string> & files,const vector<string> & selpats)218 MakeListWalkerCB(list<string>& files, const vector<string>& selpats)
219 : m_files(files), m_pats(selpats) {}
processone(const string & fn,const struct PathStat *,FsTreeWalker::CbFlag flg)220 virtual FsTreeWalker::Status processone(
221 const string& fn, const struct PathStat *, FsTreeWalker::CbFlag flg) {
222 if (flg== FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwRegular){
223 if (m_pats.empty()) {
224 m_files.push_back(fn);
225 } else {
226 for (const auto& pat : m_pats) {
227 if (fnmatch(pat.c_str(), fn.c_str(), 0) == 0) {
228 m_files.push_back(fn);
229 break;
230 }
231 }
232 }
233 }
234 return FsTreeWalker::FtwOk;
235 }
236 list<string>& m_files;
237 const vector<string>& m_pats;
238 };
239
240 // Build a list of things to index, then call purgefiles and/or
241 // indexfiles. This is basically the same as find xxx | recollindex
242 // -i [-e] without the find (so, simpler but less powerful)
recursive_index(RclConfig * config,const string & top,const vector<string> & selpats)243 bool recursive_index(RclConfig *config, const string& top,
244 const vector<string>& selpats)
245 {
246 list<string> files;
247 MakeListWalkerCB cb(files, selpats);
248 FsTreeWalker walker;
249 walker.walk(top, cb);
250 bool ret = false;
251 if (op_flags & OPT_e) {
252 if (!(ret = purgefiles(config, files))) {
253 return ret;
254 }
255 }
256 if (!(op_flags & OPT_e) || ((op_flags & OPT_e) &&(op_flags & OPT_i))) {
257 ret = indexfiles(config, files);
258 }
259 return ret;
260 }
261
262 // Index a list of files. We just call the top indexer method, which
263 // will sort out what belongs to the indexed trees and call the
264 // appropriate indexers.
265 //
266 // This is called either from the command line or from the monitor. In
267 // this case we're called repeatedly in the same process, and the
268 // confindexer is only created once by makeIndexerOrExit (but the db closed and
269 // flushed every time)
indexfiles(RclConfig * config,list<string> & filenames)270 bool indexfiles(RclConfig *config, list<string> &filenames)
271 {
272 if (filenames.empty())
273 return true;
274 makeIndexerOrExit(config, (op_flags & OPT_Z) != 0);
275 // The default is to retry failed files
276 int indexerFlags = ConfIndexer::IxFNone;
277 if (op_flags & OPT_K)
278 indexerFlags |= ConfIndexer::IxFNoRetryFailed;
279 if (op_flags & OPT_f)
280 indexerFlags |= ConfIndexer::IxFIgnoreSkip;
281 if (op_flags & OPT_P) {
282 indexerFlags |= ConfIndexer::IxFDoPurge;
283 }
284 return confindexer->indexFiles(filenames, indexerFlags);
285 }
286
287 // Delete a list of files. Same comments about call contexts as indexfiles.
purgefiles(RclConfig * config,list<string> & filenames)288 bool purgefiles(RclConfig *config, list<string> &filenames)
289 {
290 if (filenames.empty())
291 return true;
292 makeIndexerOrExit(config, (op_flags & OPT_Z) != 0);
293 return confindexer->purgeFiles(filenames, ConfIndexer::IxFNone);
294 }
295
296 // Create stemming and spelling databases
createAuxDbs(RclConfig * config)297 bool createAuxDbs(RclConfig *config)
298 {
299 makeIndexerOrExit(config, false);
300
301 if (!confindexer->createStemmingDatabases())
302 return false;
303
304 if (!confindexer->createAspellDict())
305 return false;
306
307 return true;
308 }
309
310 // Create additional stem database
createstemdb(RclConfig * config,const string & lang)311 static bool createstemdb(RclConfig *config, const string &lang)
312 {
313 makeIndexerOrExit(config, false);
314 return confindexer->createStemDb(lang);
315 }
316
317 // Check that topdir entries are valid (successful tilde exp + abs
318 // path) or fail.
319 // In addition, topdirs, skippedPaths, daemSkippedPaths entries should
320 // match existing files or directories. Warn if they don't
checktopdirs(RclConfig * config,vector<string> & nonexist)321 static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
322 {
323 if (!config->getConfParam("topdirs", &o_topdirs)) {
324 cerr << "No 'topdirs' parameter in configuration\n";
325 LOGERR("recollindex:No 'topdirs' parameter in configuration\n");
326 return false;
327 }
328
329 // If a restricted list for real-time monitoring exists check that
330 // all entries are descendants from a topdir
331 vector<string> mondirs;
332 if (config->getConfParam("monitordirs", &mondirs)) {
333 for (const auto& sub : mondirs) {
334 bool found{false};
335 for (const auto& top : o_topdirs) {
336 if (path_isdesc(top, sub)) {
337 found = true;
338 break;
339 }
340 }
341 if (!found) {
342 string s("Real time monitoring directory entry " + sub +
343 " is not part of the topdirs tree\n");
344 cerr << s;
345 LOGERR(s);
346 return false;
347 }
348 }
349 }
350
351 bool onegood{false};
352 for (auto& dir : o_topdirs) {
353 dir = path_tildexpand(dir);
354 if (!dir.size() || !path_isabsolute(dir)) {
355 if (dir[0] == '~') {
356 cerr << "Tilde expansion failed: " << dir << endl;
357 LOGERR("recollindex: tilde expansion failed: " << dir << "\n");
358 } else {
359 cerr << "Not an absolute path: " << dir << endl;
360 LOGERR("recollindex: not an absolute path: " << dir << "\n");
361 }
362 return false;
363 }
364 if (!path_exists(dir)) {
365 nonexist.push_back(dir);
366 } else {
367 onegood = true;
368 }
369 }
370 topdirs_state(o_topdirs_emptiness);
371
372 // We'd like to check skippedPaths too, but these are wildcard
373 // exprs, so reasonably can't
374
375 return onegood;
376 }
377
378
379 string thisprog;
380
381 static const char usage [] =
382 "\n"
383 "recollindex [-h] \n"
384 " Print help\n"
385 "recollindex [-z|-Z] [-k]\n"
386 " Index everything according to configuration file\n"
387 " -z : reset database before starting indexing\n"
388 " -Z : in place reset: consider all documents as changed. Can also\n"
389 " be combined with -i or -r but not -m\n"
390 " -k : retry files on which we previously failed\n"
391 " --diagsfile <outputpath> : list skipped or otherwise not indexed documents to <outputpath>\n"
392 " <outputpath> will be truncated\n"
393 #ifdef RCL_MONITOR
394 "recollindex -m [-w <secs>] -x [-D] [-C]\n"
395 " Perform real time indexing. Don't become a daemon if -D is set.\n"
396 " -w sets number of seconds to wait before starting.\n"
397 " -C disables monitoring config for changes/reexecuting.\n"
398 " -n disables initial incremental indexing (!and purge!).\n"
399 #ifndef DISABLE_X11MON
400 " -x disables exit on end of x11 session\n"
401 #endif /* DISABLE_X11MON */
402 #endif /* RCL_MONITOR */
403 "recollindex -e [<filepath [path ...]>]\n"
404 " Purge data for individual files. No stem database updates.\n"
405 " Reads paths on stdin if none is given as argument.\n"
406 "recollindex -i [-f] [-Z] [<filepath [path ...]>]\n"
407 " Index individual files. No database purge or stem database updates\n"
408 " Will read paths on stdin if none is given as argument\n"
409 " -f : ignore skippedPaths and skippedNames while doing this\n"
410 "recollindex -r [-K] [-f] [-Z] [-p pattern] <top> \n"
411 " Recursive partial reindex. \n"
412 " -p : filter file names, multiple instances are allowed, e.g.: \n"
413 " -p *.odt -p *.pdf\n"
414 " -K : skip previously failed files (they are retried by default)\n"
415 "recollindex -l\n"
416 " List available stemming languages\n"
417 "recollindex -s <lang>\n"
418 " Build stem database for additional language <lang>\n"
419 "recollindex -E\n"
420 " Check configuration file for topdirs and other paths existence\n"
421 "recollindex --webcache-compact : recover wasted space from the Web cache\n"
422 "recollindex --webcache-burst <targetdir> : extract entries from the Web cache to the target\n"
423 "recollindex --notindexed [filepath [filepath ...]] : check if the file arguments are indexed\n"
424 " will read file paths from stdin if there are no arguments\n"
425 #ifdef FUTURE_IMPROVEMENT
426 "recollindex -W\n"
427 " Process the Web queue\n"
428 #endif
429 #ifdef RCL_USE_ASPELL
430 "recollindex -S\n"
431 " Build aspell spelling dictionary.>\n"
432 #endif
433 "Common options:\n"
434 " -c <configdir> : specify config directory, overriding $RECOLL_CONFDIR\n"
435 #if defined(HAVE_POSIX_FADVISE)
436 " -d : call fadvise() with the POSIX_FADV_DONTNEED flag on indexed files\n"
437 " (avoids trashing the page cache)\n";
438 #endif
439 ;
440
Usage()441 static void Usage()
442 {
443 FILE *fp = (op_flags & OPT_h) ? stdout : stderr;
444 fprintf(fp, "%s: Usage: %s", path_getsimple(thisprog).c_str(), usage);
445 fprintf(fp, "Recoll version: %s\n", Rcl::version_string().c_str());
446 exit((op_flags & OPT_h)==0);
447 }
448
449 static RclConfig *config;
450
lockorexit(Pidfile * pidfile,RclConfig * config)451 static void lockorexit(Pidfile *pidfile, RclConfig *config)
452 {
453 PRETEND_USE(config);
454 pid_t pid;
455 if ((pid = pidfile->open()) != 0) {
456 if (pid > 0) {
457 cerr << "Can't become exclusive indexer: " << pidfile->getreason()
458 << ". Return (other pid?): " << pid << endl;
459 #ifndef _WIN32
460 // Have a look at the status file. If the other process is
461 // a monitor we can tell it to start an incremental pass
462 // by touching the configuration file
463 DbIxStatus status;
464 readIdxStatus(config, status);
465 if (status.hasmonitor) {
466 string cmd("touch ");
467 string path = path_cat(config->getConfDir(), "recoll.conf");
468 cmd += path;
469 int status;
470 if ((status = system(cmd.c_str()))) {
471 cerr << cmd << " failed with status " << status << endl;
472 } else {
473 cerr << "Monitoring indexer process was notified of "
474 "indexing request\n";
475 }
476 }
477 #endif
478 } else {
479 cerr << "Can't become exclusive indexer: " << pidfile->getreason()
480 << endl;
481 }
482 exit(1);
483 }
484 if (pidfile->write_pid() != 0) {
485 cerr << "Can't become exclusive indexer: " << pidfile->getreason() <<
486 endl;
487 exit(1);
488 }
489 }
490
491 static string reasonsfile;
492 extern ConfSimple idxreasons;
flushIdxReasons()493 static void flushIdxReasons()
494 {
495 if (reasonsfile.empty())
496 return;
497 if (reasonsfile == "stdout") {
498 idxreasons.write(cout);
499 } else if (reasonsfile == "stderr") {
500 idxreasons.write(std::cerr);
501 } else {
502 ofstream out;
503 try {
504 out.open(reasonsfile, ofstream::out|ofstream::trunc);
505 idxreasons.write(out);
506 } catch (...) {
507 std::cerr << "Could not write reasons file " << reasonsfile << endl;
508 idxreasons.write(std::cerr);
509 }
510 }
511 }
512
513 // With more recent versions of mingw, we could use -municode to
514 // enable wmain. Another workaround is to use main, then call
515 // GetCommandLineW and CommandLineToArgvW, to then call wmain(). If
516 // ever we need to build with mingw again.
517 #if defined(_WIN32) && defined(_MSC_VER)
518 #define USE_WMAIN 1
519 #endif
520
521 #if USE_WMAIN
522 #define WARGTOSTRING(w) wchartoutf8(w)
argstovector(int argc,wchar_t ** argv,vector<string> & storage)523 static vector<const char*> argstovector(int argc, wchar_t **argv, vector<string>& storage)
524 #else
525 #define WARGTOSTRING(w) (w)
526 static vector<const char*> argstovector(int argc, char **argv, vector<string>& storage)
527 #endif
528 {
529 vector<const char *> args(argc+1);
530 storage.resize(argc+1);
531 thisprog = path_absolute(WARGTOSTRING(argv[0]));
532 for (int i = 0; i < argc; i++) {
533 storage[i] = WARGTOSTRING(argv[i]);
534 args[i] = storage[i].c_str();
535 }
536 return args;
537 }
538
539
540 // Working directory before we change: it's simpler to change early
541 // but some options need the original for computing absolute paths.
542 static std::string orig_cwd;
543
544 // A bit of history: it's difficult to pass non-ASCII parameters
545 // (e.g. path names) on the command line under Windows without using
546 // Unicode. It was first thought possible to use a temporary file to
547 // hold the args, and make sure that the path for this would be ASCII,
548 // based on using shortpath(). Unfortunately, this does not work in
549 // all cases, so the second change was to use wmain(). The
550 // args-in-file was removed quite a long time after.
551 #if USE_WMAIN
wmain(int argc,wchar_t * argv[])552 int wmain(int argc, wchar_t *argv[])
553 #else
554 int main(int argc, char *argv[])
555 #endif
556 {
557 #ifndef _WIN32
558 // The reexec struct is used by the daemon to shed memory after
559 // the initial indexing pass and to restart when the configuration
560 // changes
561 o_reexec = new ReExec;
562 o_reexec->init(argc, argv);
563 #endif
564
565 // Only actually useful on Windows: convert wargs to utf-8 chars
566 vector<string> astore;
567 vector<const char*> args = argstovector(argc, argv, astore);
568
569 vector<string> selpatterns;
570 int sleepsecs{60};
571 string a_config;
572 int ret;
573 bool webcache_compact{false};
574 bool webcache_burst{false};
575 bool diags_notindexed{false};
576
577 std::string burstdir;
578 std::string diagsfile;
579 while ((ret = getopt_long(argc, (char *const*)&args[0], "c:CDdEefhikKlmnPp:rR:sS:w:xZz",
580 long_options, NULL)) != -1) {
581 switch (ret) {
582 case 'c': op_flags |= OPT_c; a_config = optarg; break;
583 #ifdef RCL_MONITOR
584 case 'C': op_flags |= OPT_C; break;
585 case 'D': op_flags |= OPT_D; break;
586 #endif
587 #if defined(HAVE_POSIX_FADVISE)
588 case 'd': op_flags |= OPT_d; break;
589 #endif
590 case 'E': op_flags |= OPT_E; break;
591 case 'e': op_flags |= OPT_e; break;
592 case 'f': op_flags |= OPT_f; break;
593 case 'h': op_flags |= OPT_h; break;
594 case 'i': op_flags |= OPT_i; break;
595 case 'k': op_flags |= OPT_k; break;
596 case 'K': op_flags |= OPT_K; break;
597 case 'l': op_flags |= OPT_l; break;
598 case 'm': op_flags |= OPT_m; break;
599 case 'n': op_flags |= OPT_n; break;
600 case 'P': op_flags |= OPT_P; break;
601 case 'p': op_flags |= OPT_p; selpatterns.push_back(optarg); break;
602 case 'r': op_flags |= OPT_r; break;
603 case 'R': op_flags |= OPT_R; reasonsfile = optarg; break;
604 case 's': op_flags |= OPT_s; break;
605 #ifdef RCL_USE_ASPELL
606 case 'S': op_flags |= OPT_S; break;
607 #endif
608 case 'w': op_flags |= OPT_w;
609 if ((sscanf(optarg, "%d", &sleepsecs)) != 1)
610 Usage();
611 break;
612 case 'x': op_flags |= OPT_x; break;
613 case 'Z': op_flags |= OPT_Z; break;
614 case 'z': op_flags |= OPT_z; break;
615
616 case OPTVAL_WEBCACHE_COMPACT: webcache_compact = true; break;
617 case OPTVAL_WEBCACHE_BURST: burstdir = optarg; webcache_burst = true;break;
618 case OPTVAL_DIAGS_NOTINDEXED: diags_notindexed = true;break;
619 case OPTVAL_DIAGS_DIAGSFILE: diagsfile = optarg;break;
620 default: Usage(); break;
621 }
622 }
623 int aremain = argc - optind;
624
625 if (op_flags & OPT_h)
626 Usage();
627
628 #ifndef RCL_MONITOR
629 if (op_flags & (OPT_m | OPT_w|OPT_x)) {
630 std::cerr << "-m not available: real-time monitoring was not "
631 "configured in this build\n";
632 exit(1);
633 }
634 #endif
635
636 if ((op_flags & OPT_z) && (op_flags & (OPT_i|OPT_e|OPT_r)))
637 Usage();
638 if ((op_flags & OPT_Z) && (op_flags & (OPT_m)))
639 Usage();
640 if ((op_flags & OPT_E) && (op_flags & ~(OPT_E|OPT_c))) {
641 Usage();
642 }
643
644 string reason;
645 int flags = RCLINIT_IDX;
646 if ((op_flags & OPT_m) && !(op_flags&OPT_D)) {
647 flags |= RCLINIT_DAEMON;
648 }
649 config = recollinit(flags, cleanup, sigcleanup, reason, &a_config);
650 if (config == 0 || !config->ok()) {
651 addIdxReason("init", reason);
652 flushIdxReasons();
653 std::cerr << "Configuration problem: " << reason << endl;
654 exit(1);
655 }
656
657 // Auxiliary, non-index-related things. Avoids having a separate binary.
658 if (webcache_compact || webcache_burst || diags_notindexed) {
659 std::string ccdir = config->getWebcacheDir();
660 std::string reason;
661 if (webcache_compact) {
662 if (!CirCache::compact(ccdir, &reason)) {
663 std::cerr << "Web cache compact failed: " << reason << "\n";
664 exit(1);
665 }
666 } else if (webcache_burst) {
667 if (!CirCache::burst(ccdir, burstdir, &reason)) {
668 std::cerr << "Web cache burst failed: " << reason << "\n";
669 exit(1);
670 }
671 } else if (diags_notindexed) {
672 std::vector<std::string> filepaths;
673 while (aremain--) {
674 filepaths.push_back(args[optind++]);
675 }
676 if (!checkindexed(config, filepaths)) {
677 exit(1);
678 }
679 }
680
681 exit(0);
682 }
683
684 #ifndef _WIN32
685 o_reexec->atexit(cleanup);
686 #endif
687
688 vector<string> nonexist;
689 if (!checktopdirs(config, nonexist)) {
690 std::cerr << "topdirs not set or only contains invalid paths.\n";
691 addIdxReason("init", "topdirs not set or only contains invalid paths.");
692 flushIdxReasons();
693 exit(1);
694 }
695
696 if (nonexist.size()) {
697 ostream& out = (op_flags & OPT_E) ? cout : cerr;
698 if (!(op_flags & OPT_E)) {
699 cerr << "Warning: invalid paths in topdirs, skippedPaths or "
700 "daemSkippedPaths:\n";
701 }
702 for (const auto& entry : nonexist) {
703 out << entry << endl;
704 }
705 }
706 if ((op_flags & OPT_E)) {
707 exit(0);
708 }
709
710 if (op_flags & OPT_l) {
711 if (aremain != 0)
712 Usage();
713 vector<string> stemmers = ConfIndexer::getStemmerNames();
714 for (const auto& stemmer : stemmers) {
715 cout << stemmer << endl;
716 }
717 exit(0);
718 }
719
720 orig_cwd = path_cwd();
721 string rundir;
722 config->getConfParam("idxrundir", rundir);
723 if (!rundir.empty()) {
724 if (!rundir.compare("tmp")) {
725 rundir = tmplocation();
726 }
727 LOGINFO("recollindex: changing current directory to [" <<rundir<<"]\n");
728 if (!path_chdir(rundir)) {
729 LOGSYSERR("main", "chdir", rundir);
730 }
731 }
732
733 if (!diagsfile.empty()) {
734 if (!IdxDiags::theDiags().init(diagsfile)) {
735 std::cerr << "Could not initialize diags file " << diagsfile << "\n";
736 LOGERR("recollindex: Could not initialize diags file " << diagsfile << "\n");
737 }
738 }
739 bool rezero((op_flags & OPT_z) != 0);
740 bool inPlaceReset((op_flags & OPT_Z) != 0);
741
742 // The default is not to retry previously failed files by default.
743 // If -k is set, we do.
744 // If the checker script says so, we do too, except if -K is set.
745 int indexerFlags = ConfIndexer::IxFNoRetryFailed;
746 if (op_flags & OPT_k) {
747 indexerFlags &= ~ConfIndexer::IxFNoRetryFailed;
748 } else {
749 if (op_flags & OPT_K) {
750 indexerFlags |= ConfIndexer::IxFNoRetryFailed;
751 } else {
752 if (checkRetryFailed(config, false)) {
753 indexerFlags &= ~ConfIndexer::IxFNoRetryFailed;
754 } else {
755 indexerFlags |= ConfIndexer::IxFNoRetryFailed;
756 }
757 }
758 }
759 if (indexerFlags & ConfIndexer::IxFNoRetryFailed) {
760 LOGDEB("recollindex: files in error will not be retried\n");
761 } else {
762 LOGDEB("recollindex: files in error will be retried\n");
763 }
764
765 #if defined(HAVE_POSIX_FADVISE)
766 if (op_flags & OPT_d) {
767 indexerFlags |= ConfIndexer::IxFCleanCache;
768 }
769 #endif
770
771 Pidfile pidfile(config->getPidfile());
772 lockorexit(&pidfile, config);
773
774 // Log something at LOGINFO to reset the trace file. Else at level
775 // 3 it's not even truncated if all docs are up to date.
776 LOGINFO("recollindex: starting up\n");
777 setMyPriority(config);
778
779 // Init status updater
780 if (nullptr == statusUpdater(config, op_flags & OPT_x)) {
781 std::cerr << "Could not initialize status updater\n";
782 LOGERR("Could not initialize status updater\n");
783 exit(1);
784 }
785 statusUpdater()->update(DbIxStatus::DBIXS_NONE, "");
786
787 if (op_flags & OPT_r) {
788 if (aremain != 1)
789 Usage();
790 string top = args[optind++]; aremain--;
791 top = path_canon(top, &orig_cwd);
792 bool status = recursive_index(config, top, selpatterns);
793 if (confindexer && !confindexer->getReason().empty()) {
794 addIdxReason("indexer", confindexer->getReason());
795 cerr << confindexer->getReason() << endl;
796 }
797 flushIdxReasons();
798 exit(status ? 0 : 1);
799 } else if (op_flags & (OPT_i|OPT_e)) {
800 list<string> filenames;
801 if (aremain == 0) {
802 // Read from stdin
803 char line[1024];
804 while (fgets(line, 1023, stdin)) {
805 string sl(line);
806 trimstring(sl, "\n\r");
807 filenames.push_back(sl);
808 }
809 } else {
810 while (aremain--) {
811 filenames.push_back(args[optind++]);
812 }
813 }
814
815 // Note that -e and -i may be both set. In this case we first erase,
816 // then index. This is a slightly different from -Z -i because we
817 // warranty that all subdocs are purged.
818 bool status = true;
819 if (op_flags & OPT_e) {
820 status = purgefiles(config, filenames);
821 }
822 if (status && (op_flags & OPT_i)) {
823 status = indexfiles(config, filenames);
824 }
825 if (confindexer && !confindexer->getReason().empty()) {
826 addIdxReason("indexer", confindexer->getReason());
827 cerr << confindexer->getReason() << endl;
828 }
829 flushIdxReasons();
830 exit(status ? 0 : 1);
831 } else if (op_flags & OPT_s) {
832 if (aremain != 1)
833 Usage();
834 string lang = args[optind++]; aremain--;
835 exit(!createstemdb(config, lang));
836
837 #ifdef RCL_USE_ASPELL
838 } else if (op_flags & OPT_S) {
839 makeIndexerOrExit(config, false);
840 exit(!confindexer->createAspellDict());
841 #endif // ASPELL
842
843 #ifdef RCL_MONITOR
844 } else if (op_flags & OPT_m) {
845 if (aremain != 0)
846 Usage();
847 statusUpdater()->setMonitor(true);
848 if (!(op_flags&OPT_D)) {
849 LOGDEB("recollindex: daemonizing\n");
850 #ifndef _WIN32
851 if (daemon(0,0) != 0) {
852 addIdxReason("monitor", "daemon() failed");
853 cerr << "daemon() failed, errno " << errno << endl;
854 LOGERR("daemon() failed, errno " << errno << "\n");
855 flushIdxReasons();
856 exit(1);
857 }
858 #endif
859 }
860 // Need to rewrite pid, it changed
861 pidfile.write_pid();
862 // Not too sure if I have to redo the nice thing after daemon(),
863 // can't hurt anyway (easier than testing on all platforms...)
864 setMyPriority(config);
865
866 if (sleepsecs > 0) {
867 LOGDEB("recollindex: sleeping " << sleepsecs << "\n");
868 for (int i = 0; i < sleepsecs; i++) {
869 sleep(1);
870 // Check that x11 did not go away while we were sleeping.
871 if (!(op_flags & OPT_x) && !x11IsAlive()) {
872 LOGDEB("X11 session went away during initial sleep period\n");
873 exit(0);
874 }
875 }
876 }
877
878 if (!(op_flags & OPT_n)) {
879 makeIndexerOrExit(config, inPlaceReset);
880 LOGDEB("Recollindex: initial indexing pass before monitoring\n");
881 if (!confindexer->index(rezero, ConfIndexer::IxTAll, indexerFlags)
882 || stopindexing) {
883 LOGERR("recollindex, initial indexing pass failed, "
884 "not going into monitor mode\n");
885 flushIdxReasons();
886 exit(1);
887 } else {
888 // Record success of indexing pass with failed files retries.
889 if (!(indexerFlags & ConfIndexer::IxFNoRetryFailed)) {
890 checkRetryFailed(config, true);
891 }
892 }
893 deleteZ(confindexer);
894 #ifndef _WIN32
895 o_reexec->insertArgs(vector<string>(1, "-n"));
896 LOGINFO("recollindex: reexecuting with -n after initial full "
897 "pass\n");
898 // Note that -n will be inside the reexec when we come
899 // back, but the monitor will explicitly strip it before
900 // starting a config change exec to ensure that we do a
901 // purging pass in this latter case (full restart).
902 o_reexec->reexec();
903 #endif
904 }
905
906 statusUpdater()->update(DbIxStatus::DBIXS_MONITOR, "");
907
908 int opts = RCLMON_NONE;
909 if (op_flags & OPT_D)
910 opts |= RCLMON_NOFORK;
911 if (op_flags & OPT_C)
912 opts |= RCLMON_NOCONFCHECK;
913 if (op_flags & OPT_x)
914 opts |= RCLMON_NOX11;
915 bool monret = startMonitor(config, opts);
916 MONDEB(("Monitor returned %d, exiting\n", monret));
917 exit(monret == false);
918 #endif // MONITOR
919
920 }
921
922 makeIndexerOrExit(config, inPlaceReset);
923 bool status = confindexer->index(rezero, ConfIndexer::IxTAll, indexerFlags);
924 // Record success of indexing pass with failed files retries.
925 if (status && !(indexerFlags & ConfIndexer::IxFNoRetryFailed)) {
926 checkRetryFailed(config, true);
927 }
928 if (!status)
929 cerr << "Indexing failed" << endl;
930 if (!confindexer->getReason().empty()) {
931 addIdxReason("indexer", confindexer->getReason());
932 cerr << confindexer->getReason() << endl;
933 }
934 statusUpdater()->update(DbIxStatus::DBIXS_DONE, "");
935 flushIdxReasons();
936 return !status;
937 }
938