1 /* 2 * Copyright 2005-2021 Fabrice Colin 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 */ 18 19 #ifndef _XAPIAN_INDEX_H 20 #define _XAPIAN_INDEX_H 21 22 #include <string> 23 #include <set> 24 #include <map> 25 26 #include "config.h" 27 #include "CJKVTokenizer.h" 28 #include "XapianDatabase.h" 29 #include "IndexInterface.h" 30 31 /// A Xapian-based index. 32 class XapianIndex : public IndexInterface 33 { 34 public: 35 XapianIndex(const std::string &indexName); 36 XapianIndex(const XapianIndex &other); 37 virtual ~XapianIndex(); 38 39 XapianIndex &operator=(const XapianIndex &other); 40 41 /// Returns false if the index couldn't be opened. 42 virtual bool isGood(void) const; 43 44 /// Gets metadata. 45 virtual std::string getMetadata(const std::string &name) const; 46 47 /// Sets metadata. 48 virtual bool setMetadata(const std::string &name, const std::string &value) const; 49 50 /// Gets the index location. 51 virtual std::string getLocation(void) const; 52 53 /// Returns a document's properties. 54 virtual bool getDocumentInfo(unsigned int docId, DocumentInfo &docInfo) const; 55 56 /// Returns a document's terms count. 57 virtual unsigned int getDocumentTermsCount(unsigned int docId) const; 58 59 /// Returns a document's terms. 60 virtual bool getDocumentTerms(unsigned int docId, 61 std::map<unsigned int, std::string> &wordsBuffer) const; 62 63 /// Sets the list of known labels. 64 virtual bool setLabels(const std::set<std::string> &labels, bool resetLabels); 65 66 /// Gets the list of known labels. 67 virtual bool getLabels(std::set<std::string> &labels) const; 68 69 /// Adds a label. 70 virtual bool addLabel(const std::string &name); 71 72 /// Deletes all references to a label. 73 virtual bool deleteLabel(const std::string &name); 74 75 /// Determines whether a document has a label. 76 virtual bool hasLabel(unsigned int docId, const std::string &name) const; 77 78 /// Returns a document's labels. 79 virtual bool getDocumentLabels(unsigned int docId, std::set<std::string> &labels) const; 80 81 /// Sets a document's labels. 82 virtual bool setDocumentLabels(unsigned int docId, const std::set<std::string> &labels, 83 bool resetLabels = true); 84 85 /// Sets documents' labels. 86 virtual bool setDocumentsLabels(const std::set<unsigned int> &docIds, 87 const std::set<std::string> &labels, bool resetLabels = true); 88 89 /// Checks whether the given URL is in the index. 90 virtual unsigned int hasDocument(const std::string &url) const; 91 92 /// Gets terms with the same root. 93 virtual unsigned int getCloseTerms(const std::string &term, std::set<std::string> &suggestions); 94 95 /// Returns the ID of the last document. 96 virtual unsigned int getLastDocumentID(void) const; 97 98 /// Returns the number of documents. 99 virtual unsigned int getDocumentsCount(const std::string &labelName = "") const; 100 101 /// Lists documents. 102 virtual unsigned int listDocuments(std::set<unsigned int> &docIDList, 103 unsigned int maxDocsCount = 0, unsigned int startDoc = 0) const; 104 105 /// Lists documents. 106 virtual bool listDocuments(const std::string &name, std::set<unsigned int> &docIds, 107 NameType type, unsigned int maxDocsCount = 0, unsigned int startDoc = 0) const; 108 109 /// Indexes the given data. 110 virtual bool indexDocument(const Document &doc, const std::set<std::string> &labels, 111 unsigned int &docId); 112 113 /// Updates the given document. 114 virtual bool updateDocument(unsigned int docId, const Document &doc); 115 116 /// Updates a document's properties. 117 virtual bool updateDocumentInfo(unsigned int docId, const DocumentInfo &docInfo); 118 119 /// Unindexes the given document. 120 virtual bool unindexDocument(unsigned int docId); 121 122 /// Unindexes the given document. 123 virtual bool unindexDocument(const std::string &location); 124 125 /// Unindexes documents. 126 virtual bool unindexDocuments(const std::string &name, NameType type); 127 128 /// Unindexes all documents. 129 virtual bool unindexAllDocuments(void); 130 131 /// Flushes recent changes to the disk. 132 virtual bool flush(void); 133 134 /// Reopens the index. 135 virtual bool reopen(void) const; 136 137 /// Resets the index. 138 virtual bool reset(void); 139 140 protected: 141 std::string m_databaseName; 142 bool m_goodIndex; 143 bool m_doSpelling; 144 std::string m_stemLanguage; 145 146 bool listDocumentsWithTerm(const std::string &term, std::set<unsigned int> &docIds, 147 unsigned int maxDocsCount = 0, unsigned int startDoc = 0) const; 148 149 void addPostingsToDocument(const Xapian::Utf8Iterator &itor, Xapian::Document &doc, 150 const Xapian::WritableDatabase &db, const std::string &prefix, 151 bool noStemming, bool &doSpelling, Xapian::termcount &termPos) const; 152 153 void addPostingsToDocument(Dijon::CJKVTokenizer &tokenizer, Xapian::Stem *pStemmer, 154 const std::string &text, Xapian::Document &doc, 155 const Xapian::WritableDatabase &db, const std::string &prefix, 156 bool &doSpelling, Xapian::termcount &termPos) const; 157 158 static void addLabelsToDocument(Xapian::Document &doc, 159 const std::set<std::string> &labels, bool skipInternals); 160 161 void removePostingsFromDocument(const Xapian::Utf8Iterator &itor, Xapian::Document &doc, 162 const Xapian::WritableDatabase &db, const std::string &prefix, 163 bool noStemming, bool &doSpelling) const; 164 165 void addCommonTerms(const DocumentInfo &info, Xapian::Document &doc, 166 const Xapian::WritableDatabase &db, Xapian::termcount &termPos); 167 168 void removeCommonTerms(Xapian::Document &doc, const Xapian::WritableDatabase &db); 169 170 std::string scanDocument(const std::string &suggestedLanguage, 171 const char *pData, off_t dataLength); 172 173 void setDocumentData(const DocumentInfo &info, Xapian::Document &doc, 174 const std::string &language) const; 175 176 bool deleteDocuments(const std::string &term); 177 178 }; 179 180 #endif // _XAPIAN_INDEX_H 181