1 /* Copyright (C) 2004-2021 J.F.Dockes 2 * This program is free software; you can redistribute it and/or modify 3 * it under the terms of the GNU General Public License as published by 4 * the Free Software Foundation; either version 2 of the License, or 5 * (at your option) any later version. 6 * 7 * This program is distributed in the hope that it will be useful, 8 * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 * GNU General Public License for more details. 11 * 12 * You should have received a copy of the GNU General Public License 13 * along with this program; if not, write to the 14 * Free Software Foundation, Inc., 15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 */ 17 #ifndef _INDEXER_H_INCLUDED_ 18 #define _INDEXER_H_INCLUDED_ 19 20 #include <string> 21 #include <list> 22 #include <map> 23 #include <vector> 24 #include <mutex> 25 26 #include "rclconfig.h" 27 #include "rcldb.h" 28 #include "rcldoc.h" 29 #include "idxstatus.h" 30 31 class FsIndexer; 32 class WebQueueIndexer; 33 34 /** 35 * The top level batch indexing object. Processes the configuration, 36 * then invokes file system walking or other to populate/update the 37 * database(s). 38 */ 39 class ConfIndexer { 40 public: 41 enum runStatus {IndexerOk, IndexerError}; 42 ConfIndexer(RclConfig *cnf); 43 virtual ~ConfIndexer(); 44 45 // Indexer types. Maybe we'll have something more dynamic one day 46 enum ixType {IxTNone, IxTFs=1, IxTWebQueue=2, 47 IxTAll = IxTFs | IxTWebQueue}; 48 // Misc indexing flags 49 enum IxFlag {IxFNone = 0, 50 IxFIgnoreSkip = 1, // Ignore skipped lists 51 IxFNoWeb = 2, // Do not process the web queue. 52 // First pass: just do the top files so that the user can 53 // try searching asap. 54 IxFQuickShallow = 4, 55 // Do not retry files which previously failed ('+' sigs) 56 IxFNoRetryFailed = 8, 57 // Do perform purge pass even if we can't be sure we saw 58 // all files 59 IxFDoPurge = 16, 60 // Evict each indexed file from the page cache. 61 IxFCleanCache = 32, 62 }; 63 64 /** Run indexers */ 65 bool index(bool resetbefore, ixType typestorun, int f = IxFNone); 66 getReason()67 const string &getReason() {return m_reason;} 68 69 /** Stemming reset to config: create needed, delete unconfigured */ 70 bool createStemmingDatabases(); 71 72 /** Create stem database for given language */ 73 bool createStemDb(const string &lang); 74 75 /** Create misspelling expansion dictionary if aspell i/f is available */ 76 bool createAspellDict(); 77 78 /** List possible stemmer names */ 79 static vector<string> getStemmerNames(); 80 81 /** Index a list of files. No db cleaning or stemdb updating */ 82 bool indexFiles(std::list<std::string> &files, int f = IxFNone); 83 84 /** Update index for list of documents given as list of docs (out of query) 85 */ 86 bool updateDocs(vector<Rcl::Doc> &docs, IxFlag f = IxFNone); 87 88 /** Purge a list of files. */ 89 bool purgeFiles(std::list<std::string> &files, int f = IxFNone); 90 91 /** Set in place reset mode */ setInPlaceReset()92 void setInPlaceReset() {m_db.setInPlaceReset();} 93 private: 94 RclConfig *m_config; 95 Rcl::Db m_db; 96 FsIndexer *m_fsindexer{nullptr}; 97 bool m_doweb{false}; 98 WebQueueIndexer *m_webindexer{nullptr}; 99 string m_reason; 100 101 // The first time we index, we do things a bit differently to 102 // avoid user frustration (make at least some results available 103 // fast by using several passes, the first ones to index common 104 // interesting locations). 105 bool runFirstIndexing(); 106 bool firstFsIndexingSequence(); 107 }; 108 109 #endif /* _INDEXER_H_INCLUDED_ */ 110