1 /** @file omenquireinternal.h 2 * @brief Internals 3 */ 4 /* Copyright 1999,2000,2001 BrightStation PLC 5 * Copyright 2001,2002 Ananova Ltd 6 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2016 Olly Betts 7 * Copyright 2009 Lemur Consulting Ltd 8 * Copyright 2011 Action Without Borders 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License as 12 * published by the Free Software Foundation; either version 2 of the 13 * License, or (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 23 * USA 24 */ 25 26 #ifndef OM_HGUARD_OMENQUIREINTERNAL_H 27 #define OM_HGUARD_OMENQUIREINTERNAL_H 28 29 #include "xapian/database.h" 30 #include "xapian/document.h" 31 #include "xapian/enquire.h" 32 #include "xapian/query.h" 33 #include "xapian/keymaker.h" 34 35 #include <algorithm> 36 #include <cmath> 37 #include <map> 38 #include <set> 39 #include <unordered_map> 40 41 #include "weight/weightinternal.h" 42 43 using namespace std; 44 45 class OmExpand; 46 class MultiMatch; 47 48 namespace Xapian { 49 50 class TermIterator; 51 52 namespace Internal { 53 54 /** An item resulting from a query. 55 * This item contains the document id, and the weight calculated for 56 * the document. 57 */ 58 class MSetItem { 59 public: MSetItem(double wt_,Xapian::docid did_)60 MSetItem(double wt_, Xapian::docid did_) 61 : wt(wt_), did(did_), collapse_count(0) {} 62 MSetItem(double wt_,Xapian::docid did_,const string & key_)63 MSetItem(double wt_, Xapian::docid did_, const string &key_) 64 : wt(wt_), did(did_), collapse_key(key_), collapse_count(0) {} 65 MSetItem(double wt_,Xapian::docid did_,const string & key_,Xapian::doccount collapse_count_)66 MSetItem(double wt_, Xapian::docid did_, const string &key_, 67 Xapian::doccount collapse_count_) 68 : wt(wt_), did(did_), collapse_key(key_), 69 collapse_count(collapse_count_) {} 70 swap(MSetItem & o)71 void swap(MSetItem & o) { 72 std::swap(wt, o.wt); 73 std::swap(did, o.did); 74 std::swap(collapse_key, o.collapse_key); 75 std::swap(collapse_count, o.collapse_count); 76 std::swap(sort_key, o.sort_key); 77 } 78 79 /** Weight calculated. */ 80 double wt; 81 82 /** Document id. */ 83 Xapian::docid did; 84 85 /** Value which was used to collapse upon. 86 * 87 * If the collapse option is not being used, this will always 88 * have a null value. 89 * 90 * If the collapse option is in use, this will contain the collapse 91 * key's value for this particular item. If the key is not present 92 * for this item, the value will be a null string. Only one instance 93 * of each key value (apart from the null string) will be present in 94 * the items in the returned Xapian::MSet. 95 */ 96 string collapse_key; 97 98 /** Count of collapses done on collapse_key so far 99 * 100 * This is normally 0, and goes up for each collapse done 101 * It is not necessarily an indication of how many collapses 102 * might be done if an exhaustive match was done 103 */ 104 Xapian::doccount collapse_count; 105 106 /** Used when sorting by value. */ 107 string sort_key; 108 109 /// Return a string describing this object. 110 string get_description() const; 111 }; 112 113 } 114 115 /** Internals of enquire system. 116 * This allows the implementation of Xapian::Enquire to be hidden and reference 117 * counted. 118 */ 119 class Enquire::Internal : public Xapian::Internal::intrusive_base { 120 friend class MSet::Internal; 121 private: 122 /// The database which this enquire object uses. 123 const Xapian::Database db; 124 125 /// The user's query. 126 Query query; 127 128 /// The query length. 129 termcount qlen; 130 131 /// Copy not allowed 132 Internal(const Internal &); 133 /// Assignment not allowed 134 void operator=(const Internal &); 135 136 public: 137 typedef enum { REL, VAL, VAL_REL, REL_VAL } sort_setting; 138 139 Xapian::valueno collapse_key; 140 141 Xapian::doccount collapse_max; 142 143 Xapian::Enquire::docid_order order; 144 145 int percent_cutoff; 146 147 double weight_cutoff; 148 149 Xapian::valueno sort_key; 150 sort_setting sort_by; 151 bool sort_value_forward; 152 153 Xapian::Internal::opt_intrusive_ptr<KeyMaker> sorter; 154 155 double time_limit; 156 157 /** The weight to use for this query. 158 * 159 * This is mutable so that the default BM25Weight object can be 160 * created lazily when first required. 161 */ 162 mutable Weight * weight; 163 164 /// The weighting scheme to use for query expansion. 165 std::string eweightname; 166 167 /// The parameter required for TradWeight query expansion. 168 double expand_k; 169 170 vector<Xapian::Internal::opt_intrusive_ptr<MatchSpy>> spies; 171 172 explicit Internal(const Xapian::Database &databases); 173 ~Internal(); 174 175 /** Request a document from the database. 176 */ 177 void request_doc(const Xapian::Internal::MSetItem &item) const; 178 179 /** Read a previously requested document from the database. 180 */ 181 Xapian::Document read_doc(const Xapian::Internal::MSetItem &item) const; 182 183 Xapian::Document get_document(const Xapian::Internal::MSetItem &item) const; 184 185 void set_query(const Query & query_, termcount qlen_); 186 const Query & get_query() const; 187 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, 188 Xapian::doccount check_at_least, 189 const RSet *omrset, 190 const MatchDecider *mdecider) const; 191 192 ESet get_eset(Xapian::termcount maxitems, const RSet & omrset, int flags, 193 const ExpandDecider *edecider, double min_wt) const; 194 195 TermIterator get_matching_terms(Xapian::docid did) const; 196 TermIterator get_matching_terms(const Xapian::MSetIterator &it) const; 197 198 Xapian::doccount get_termfreq(const string &tname) const; 199 200 string get_description() const; 201 }; 202 203 class MSet::Internal : public Xapian::Internal::intrusive_base { 204 public: 205 /// Factor to multiply weights by to convert them to percentages. 206 double percent_factor; 207 208 private: 209 /** The set of documents which have been requested but not yet 210 * collected. 211 */ 212 mutable set<Xapian::doccount> requested_docs; 213 214 /// Cache of documents, indexed by MSet index. 215 mutable map<Xapian::doccount, Xapian::Document> indexeddocs; 216 217 /// Read and cache the documents so far requested. 218 void read_docs() const; 219 220 /// Copy not allowed 221 Internal(const Internal &); 222 /// Assignment not allowed 223 void operator=(const Internal &); 224 225 mutable std::unordered_map<std::string, double> snippet_bg_relevance; 226 227 public: 228 /// Xapian::Enquire reference, for getting documents. 229 Xapian::Internal::intrusive_ptr<const Enquire::Internal> enquire; 230 231 /** Provides the term frequency and weight for each term in the query. */ 232 Xapian::Weight::Internal * stats; 233 234 /// A list of items comprising the (selected part of the) MSet. 235 vector<Xapian::Internal::MSetItem> items; 236 237 /// Rank of first item in MSet. 238 Xapian::doccount firstitem; 239 240 Xapian::doccount matches_lower_bound; 241 242 Xapian::doccount matches_estimated; 243 244 Xapian::doccount matches_upper_bound; 245 246 Xapian::doccount uncollapsed_lower_bound; 247 248 Xapian::doccount uncollapsed_estimated; 249 250 Xapian::doccount uncollapsed_upper_bound; 251 252 double max_possible; 253 254 double max_attained; 255 Internal()256 Internal() 257 : percent_factor(0), 258 stats(NULL), 259 firstitem(0), 260 matches_lower_bound(0), 261 matches_estimated(0), 262 matches_upper_bound(0), 263 uncollapsed_lower_bound(0), 264 uncollapsed_estimated(0), 265 uncollapsed_upper_bound(0), 266 max_possible(0), 267 max_attained(0) {} 268 269 /// Note: destroys parameter items. Internal(Xapian::doccount firstitem_,Xapian::doccount matches_upper_bound_,Xapian::doccount matches_lower_bound_,Xapian::doccount matches_estimated_,Xapian::doccount uncollapsed_upper_bound_,Xapian::doccount uncollapsed_lower_bound_,Xapian::doccount uncollapsed_estimated_,double max_possible_,double max_attained_,vector<Xapian::Internal::MSetItem> & items_,double percent_factor_)270 Internal(Xapian::doccount firstitem_, 271 Xapian::doccount matches_upper_bound_, 272 Xapian::doccount matches_lower_bound_, 273 Xapian::doccount matches_estimated_, 274 Xapian::doccount uncollapsed_upper_bound_, 275 Xapian::doccount uncollapsed_lower_bound_, 276 Xapian::doccount uncollapsed_estimated_, 277 double max_possible_, 278 double max_attained_, 279 vector<Xapian::Internal::MSetItem> &items_, 280 double percent_factor_) 281 : percent_factor(percent_factor_), 282 stats(NULL), 283 firstitem(firstitem_), 284 matches_lower_bound(matches_lower_bound_), 285 matches_estimated(matches_estimated_), 286 matches_upper_bound(matches_upper_bound_), 287 uncollapsed_lower_bound(uncollapsed_lower_bound_), 288 uncollapsed_estimated(uncollapsed_estimated_), 289 uncollapsed_upper_bound(uncollapsed_upper_bound_), 290 max_possible(max_possible_), 291 max_attained(max_attained_) { 292 std::swap(items, items_); 293 } 294 ~Internal()295 ~Internal() { delete stats; } 296 297 /// get a document by index in MSet, via the cache. 298 Xapian::Document get_doc_by_index(Xapian::doccount index) const; 299 300 /// Converts a weight to a percentage weight 301 int convert_to_percent_internal(double wt) const; 302 303 std::string snippet(const std::string & text, size_t length, 304 const Xapian::Stem & stemmer, 305 unsigned flags, 306 const std::string & hi_start, 307 const std::string & hi_end, 308 const std::string & omit) const; 309 310 /// Return a string describing this object. 311 string get_description() const; 312 313 /** Fetch items specified into the document cache. 314 */ 315 void fetch_items(Xapian::doccount first, Xapian::doccount last) const; 316 }; 317 318 class RSet::Internal : public Xapian::Internal::intrusive_base { 319 friend class Xapian::RSet; 320 321 private: 322 /// Items in the relevance set. 323 set<Xapian::docid> items; 324 325 public: get_items()326 const set<Xapian::docid> & get_items() const { return items; } 327 328 /// Return a string describing this object. 329 string get_description() const; 330 }; 331 332 } 333 334 #endif // OM_HGUARD_OMENQUIREINTERNAL_H 335