1 /** @file 2 * @brief N-way OR postlist with wt=max(wt_i) 3 */ 4 /* Copyright (C) 2007,2009,2010,2011,2012,2013 Olly Betts 5 * Copyright (C) 2009 Lemur Consulting Ltd 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License as 9 * published by the Free Software Foundation; either version 2 of the 10 * License, or (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22 #ifndef XAPIAN_INCLUDED_MAXPOSTLIST_H 23 #define XAPIAN_INCLUDED_MAXPOSTLIST_H 24 25 #include "multimatch.h" 26 #include "api/postlist.h" 27 #include <algorithm> 28 29 class MultiMatch; 30 31 /// N-way OR postlist with wt=max(wt_i). 32 class MaxPostList : public PostList { 33 /// Don't allow assignment. 34 void operator=(const MaxPostList &); 35 36 /// Don't allow copying. 37 MaxPostList(const MaxPostList &); 38 39 /// The current docid, or zero if we haven't started or are at_end. 40 Xapian::docid did; 41 42 /// The number of sub-postlists. 43 size_t n_kids; 44 45 /// Array of pointers to sub-postlists. 46 PostList ** plist; 47 48 /// Cached answer to get_maxweight. 49 double max_cached; 50 51 /// The number of documents in the database. 52 Xapian::doccount db_size; 53 54 /// Pointer to the matcher object, so we can report pruning. 55 MultiMatch *matcher; 56 57 /// Erase a sub-postlist. erase_sublist(size_t i)58 void erase_sublist(size_t i) { 59 delete plist[i]; 60 --n_kids; 61 for (size_t j = i; j < n_kids; ++j) { 62 plist[j] = plist[j + 1]; 63 } 64 matcher->recalc_maxweight(); 65 } 66 67 public: 68 /** Construct from 2 random-access iterators to a container of PostList*, 69 * a pointer to the matcher, and the document collection size. 70 */ 71 template<class RandomItor> MaxPostList(RandomItor pl_begin,RandomItor pl_end,MultiMatch * matcher_,Xapian::doccount db_size_)72 MaxPostList(RandomItor pl_begin, RandomItor pl_end, 73 MultiMatch * matcher_, Xapian::doccount db_size_) 74 : did(0), n_kids(pl_end - pl_begin), plist(NULL), 75 max_cached(0), db_size(db_size_), matcher(matcher_) 76 { 77 plist = new PostList * [n_kids]; 78 std::copy(pl_begin, pl_end, plist); 79 } 80 81 ~MaxPostList(); 82 83 Xapian::doccount get_termfreq_min() const; 84 85 Xapian::doccount get_termfreq_max() const; 86 87 Xapian::doccount get_termfreq_est() const; 88 89 TermFreqs get_termfreq_est_using_stats( 90 const Xapian::Weight::Internal & stats) const; 91 92 double get_maxweight() const; 93 94 Xapian::docid get_docid() const; 95 96 Xapian::termcount get_doclength() const; 97 98 Xapian::termcount get_unique_terms() const; 99 100 double get_weight() const; 101 102 bool at_end() const; 103 104 double recalc_maxweight(); 105 read_position_list()106 PositionList * read_position_list() { 107 return NULL; 108 } 109 110 PostList* next(double w_min); 111 112 PostList* skip_to(Xapian::docid, double w_min); 113 114 std::string get_description() const; 115 116 /** get_wdf() for MaxPostlist returns the sum of the wdfs of the 117 * sub postlists which match the current docid. 118 * 119 * The wdf isn't really meaningful in many situations, but if the lists 120 * are being combined as a synonym we want the sum of the wdfs, so we do 121 * that in general. 122 */ 123 Xapian::termcount get_wdf() const; 124 125 Xapian::termcount count_matching_subqs() const; 126 }; 127 128 #endif // XAPIAN_INCLUDED_MAXPOSTLIST_H 129