1 /* mergepostlist.cc: merge postlists from different databases
2  *
3  * Copyright 1999,2000,2001 BrightStation PLC
4  * Copyright 2002 Ananova Ltd
5  * Copyright 2002,2003,2004,2006,2008,2009,2011,2015,2016 Olly Betts
6  * Copyright 2007,2009 Lemur Consulting Ltd
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
21  * USA
22  */
23 
24 #include <config.h>
25 #include "mergepostlist.h"
26 
27 #include "multimatch.h"
28 #include "api/emptypostlist.h"
29 #include "branchpostlist.h"
30 #include "debuglog.h"
31 #include "omassert.h"
32 #include "valuestreamdocument.h"
33 
34 #include "xapian/error.h"
35 
36 // NB don't prune - even with one sublist we still translate docids...
37 
~MergePostList()38 MergePostList::~MergePostList()
39 {
40     LOGCALL_DTOR(MATCH, "MergePostList");
41     std::vector<PostList *>::const_iterator i;
42     for (i = plists.begin(); i != plists.end(); ++i) {
43 	delete *i;
44     }
45 }
46 
47 PostList *
next(double w_min)48 MergePostList::next(double w_min)
49 {
50     LOGCALL(MATCH, PostList *, "MergePostList::next", w_min);
51     LOGVALUE(MATCH, current);
52     if (current == -1) current = 0;
53     while (true) {
54 	// FIXME: should skip over Remote matchers which aren't ready yet
55 	// and come back to them later...
56 	next_handling_prune(plists[current], w_min, matcher);
57 	if (!plists[current]->at_end()) break;
58 	++current;
59 	if (unsigned(current) >= plists.size()) break;
60 	vsdoc.new_subdb(current);
61 	if (matcher) matcher->recalc_maxweight();
62     }
63     LOGVALUE(MATCH, current);
64     RETURN(NULL);
65 }
66 
67 PostList *
skip_to(Xapian::docid did,double w_min)68 MergePostList::skip_to(Xapian::docid did, double w_min)
69 {
70     LOGCALL(MATCH, PostList *, "MergePostList::skip_to", did | w_min);
71     (void)did;
72     (void)w_min;
73     // MergePostList doesn't return documents in docid order, so skip_to
74     // isn't a meaningful operation.
75     throw Xapian::InvalidOperationError("MergePostList doesn't support skip_to");
76 }
77 
78 Xapian::termcount
get_wdf() const79 MergePostList::get_wdf() const
80 {
81     LOGCALL(MATCH, Xapian::termcount, "MergePostList::get_wdf", NO_ARGS);
82     RETURN(plists[current]->get_wdf());
83 }
84 
85 Xapian::doccount
get_termfreq_max() const86 MergePostList::get_termfreq_max() const
87 {
88     LOGCALL(MATCH, Xapian::doccount, "MergePostList::get_termfreq_max", NO_ARGS);
89     // sum of termfreqs for all children
90     Xapian::doccount total = 0;
91     vector<PostList *>::const_iterator i;
92     for (i = plists.begin(); i != plists.end(); ++i) {
93 	total += (*i)->get_termfreq_max();
94     }
95     RETURN(total);
96 }
97 
98 Xapian::doccount
get_termfreq_min() const99 MergePostList::get_termfreq_min() const
100 {
101     LOGCALL(MATCH, Xapian::doccount, "MergePostList::get_termfreq_min", NO_ARGS);
102     // sum of termfreqs for all children
103     Xapian::doccount total = 0;
104     vector<PostList *>::const_iterator i;
105     for (i = plists.begin(); i != plists.end(); ++i) {
106 	total += (*i)->get_termfreq_min();
107     }
108     RETURN(total);
109 }
110 
111 Xapian::doccount
get_termfreq_est() const112 MergePostList::get_termfreq_est() const
113 {
114     LOGCALL(MATCH, Xapian::doccount, "MergePostList::get_termfreq_est", NO_ARGS);
115     // sum of termfreqs for all children
116     Xapian::doccount total = 0;
117     vector<PostList *>::const_iterator i;
118     for (i = plists.begin(); i != plists.end(); ++i) {
119 	total += (*i)->get_termfreq_est();
120     }
121     RETURN(total);
122 }
123 
124 Xapian::docid
get_docid() const125 MergePostList::get_docid() const
126 {
127     LOGCALL(MATCH, Xapian::docid, "MergePostList::get_docid", NO_ARGS);
128     Assert(current != -1);
129     // FIXME: this needs fixing so we can prune plists - see MultiPostlist
130     // for code which does this...
131     RETURN((plists[current]->get_docid() - 1) * plists.size() + current + 1);
132 }
133 
134 double
get_weight() const135 MergePostList::get_weight() const
136 {
137     LOGCALL(MATCH, double, "MergePostList::get_weight", NO_ARGS);
138     Assert(current != -1);
139     RETURN(plists[current]->get_weight());
140 }
141 
142 const string *
get_sort_key() const143 MergePostList::get_sort_key() const
144 {
145     LOGCALL(MATCH, const string *, "MergePostList::get_sort_key", NO_ARGS);
146     Assert(current != -1);
147     RETURN(plists[current]->get_sort_key());
148 }
149 
150 const string *
get_collapse_key() const151 MergePostList::get_collapse_key() const
152 {
153     LOGCALL(MATCH, const string *, "MergePostList::get_collapse_key", NO_ARGS);
154     Assert(current != -1);
155     RETURN(plists[current]->get_collapse_key());
156 }
157 
158 double
get_maxweight() const159 MergePostList::get_maxweight() const
160 {
161     LOGCALL(MATCH, double, "MergePostList::get_maxweight", NO_ARGS);
162     RETURN(w_max);
163 }
164 
165 double
recalc_maxweight()166 MergePostList::recalc_maxweight()
167 {
168     LOGCALL(MATCH, double, "MergePostList::recalc_maxweight", NO_ARGS);
169     w_max = 0;
170     vector<PostList *>::iterator i;
171     for (i = plists.begin(); i != plists.end(); ++i) {
172 	double w = (*i)->recalc_maxweight();
173 	if (w > w_max) w_max = w;
174     }
175     RETURN(w_max);
176 }
177 
178 bool
at_end() const179 MergePostList::at_end() const
180 {
181     LOGCALL(MATCH, bool, "MergePostList::at_end", NO_ARGS);
182     Assert(current != -1);
183     RETURN(unsigned(current) >= plists.size());
184 }
185 
186 string
get_description() const187 MergePostList::get_description() const
188 {
189     string desc = "( Merge ";
190     vector<PostList *>::const_iterator i;
191     for (i = plists.begin(); i != plists.end(); ++i) {
192 	desc += (*i)->get_description() + " ";
193     }
194     return desc + ")";
195 }
196 
197 Xapian::termcount
get_doclength() const198 MergePostList::get_doclength() const
199 {
200     LOGCALL(MATCH, Xapian::termcount, "MergePostList::get_doclength", NO_ARGS);
201     Assert(current != -1);
202     RETURN(plists[current]->get_doclength());
203 }
204 
205 Xapian::termcount
get_unique_terms() const206 MergePostList::get_unique_terms() const
207 {
208     LOGCALL(MATCH, Xapian::termcount, "MergePostList::get_unique_terms", NO_ARGS);
209     Assert(current != -1);
210     RETURN(plists[current]->get_unique_terms());
211 }
212 
213 Xapian::termcount
count_matching_subqs() const214 MergePostList::count_matching_subqs() const
215 {
216     LOGCALL(MATCH, Xapian::termcount, "MergePostList::count_matching_subqs", NO_ARGS);
217     RETURN(plists[current]->count_matching_subqs());
218 }
219