1 /* Copyright (C) 2004-2021 J.F.Dockes
2  *   This program is free software; you can redistribute it and/or modify
3  *   it under the terms of the GNU General Public License as published by
4  *   the Free Software Foundation; either version 2 of the License, or
5  *   (at your option) any later version.
6  *
7  *   This program is distributed in the hope that it will be useful,
8  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
9  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  *   GNU General Public License for more details.
11  *
12  *   You should have received a copy of the GNU General Public License
13  *   along with this program; if not, write to the
14  *   Free Software Foundation, Inc.,
15  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16  */
17 #ifndef _INDEXER_H_INCLUDED_
18 #define _INDEXER_H_INCLUDED_
19 
20 #include <string>
21 #include <list>
22 #include <map>
23 #include <vector>
24 #include <mutex>
25 
26 #include "rclconfig.h"
27 #include "rcldb.h"
28 #include "rcldoc.h"
29 #include "idxstatus.h"
30 
31 class FsIndexer;
32 class WebQueueIndexer;
33 
34 /**
35  * The top level batch indexing object. Processes the configuration,
36  * then invokes file system walking or other to populate/update the
37  * database(s).
38  */
39 class ConfIndexer {
40 public:
41     enum runStatus {IndexerOk, IndexerError};
42     ConfIndexer(RclConfig *cnf);
43     virtual ~ConfIndexer();
44 
45     // Indexer types. Maybe we'll have something more dynamic one day
46     enum ixType {IxTNone, IxTFs=1, IxTWebQueue=2,
47                  IxTAll = IxTFs | IxTWebQueue};
48     // Misc indexing flags
49     enum IxFlag {IxFNone = 0,
50                  IxFIgnoreSkip = 1, // Ignore skipped lists
51                  IxFNoWeb = 2, // Do not process the web queue.
52                  // First pass: just do the top files so that the user can
53                  // try searching asap.
54                  IxFQuickShallow = 4,
55                  // Do not retry files which previously failed ('+' sigs)
56                  IxFNoRetryFailed = 8,
57                  // Do perform purge pass even if we can't be sure we saw
58                  // all files
59                  IxFDoPurge = 16,
60                  // Evict each indexed file from the page cache.
61                  IxFCleanCache = 32,
62     };
63 
64     /** Run indexers */
65     bool index(bool resetbefore, ixType typestorun, int f = IxFNone);
66 
getReason()67     const string &getReason() {return m_reason;}
68 
69     /** Stemming reset to config: create needed, delete unconfigured */
70     bool createStemmingDatabases();
71 
72     /** Create stem database for given language */
73     bool createStemDb(const string &lang);
74 
75     /** Create misspelling expansion dictionary if aspell i/f is available */
76     bool createAspellDict();
77 
78     /** List possible stemmer names */
79     static vector<string> getStemmerNames();
80 
81     /** Index a list of files. No db cleaning or stemdb updating */
82     bool indexFiles(std::list<std::string> &files, int f = IxFNone);
83 
84     /** Update index for list of documents given as list of docs (out of query)
85      */
86     bool updateDocs(vector<Rcl::Doc> &docs, IxFlag f = IxFNone);
87 
88     /** Purge a list of files. */
89     bool purgeFiles(std::list<std::string> &files, int f = IxFNone);
90 
91     /** Set in place reset mode */
setInPlaceReset()92     void setInPlaceReset() {m_db.setInPlaceReset();}
93 private:
94     RclConfig *m_config;
95     Rcl::Db    m_db;
96     FsIndexer *m_fsindexer{nullptr};
97     bool       m_doweb{false};
98     WebQueueIndexer *m_webindexer{nullptr};
99     string     m_reason;
100 
101     // The first time we index, we do things a bit differently to
102     // avoid user frustration (make at least some results available
103     // fast by using several passes, the first ones to index common
104     // interesting locations).
105     bool runFirstIndexing();
106     bool firstFsIndexingSequence();
107 };
108 
109 #endif /* _INDEXER_H_INCLUDED_ */
110