1 /** @file
2  * @brief Return document ids from an external source.
3  */
4 /* Copyright 2008,2009,2010,2011 Olly Betts
5  * Copyright 2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
20  */
21 
22 #include <config.h>
23 
24 #include "externalpostlist.h"
25 
26 #include <xapian/error.h>
27 #include <xapian/postingsource.h>
28 
29 #include "debuglog.h"
30 #include "omassert.h"
31 
32 using namespace std;
33 
ExternalPostList(const Xapian::Database & db,Xapian::PostingSource * source_,double factor_,MultiMatch * matcher,Xapian::doccount shard_index)34 ExternalPostList::ExternalPostList(const Xapian::Database& db,
35 				   Xapian::PostingSource* source_,
36 				   double factor_,
37 				   MultiMatch* matcher,
38 				   Xapian::doccount shard_index)
39     : current(0), factor(factor_)
40 {
41     Assert(source_);
42     Xapian::PostingSource* newsource = source_->clone();
43     if (newsource != NULL) {
44 	source = newsource->release();
45     } else if (shard_index == 0) {
46 	// Allow use of a non-clone-able PostingSource with a non-sharded
47 	// Database.
48 	source = source_;
49     } else {
50 	throw Xapian::InvalidOperationError("PostingSource subclass must "
51 					    "implement clone() to support use "
52 					    "with a sharded database");
53     }
54     source->register_matcher_(static_cast<void*>(matcher));
55     source->init(db);
56 }
57 
58 Xapian::doccount
get_termfreq_min() const59 ExternalPostList::get_termfreq_min() const
60 {
61     Assert(source.get());
62     return source->get_termfreq_min();
63 }
64 
65 Xapian::doccount
get_termfreq_est() const66 ExternalPostList::get_termfreq_est() const
67 {
68     Assert(source.get());
69     return source->get_termfreq_est();
70 }
71 
72 Xapian::doccount
get_termfreq_max() const73 ExternalPostList::get_termfreq_max() const
74 {
75     Assert(source.get());
76     return source->get_termfreq_max();
77 }
78 
79 double
get_maxweight() const80 ExternalPostList::get_maxweight() const
81 {
82     LOGCALL(MATCH, double, "ExternalPostList::get_maxweight", NO_ARGS);
83     // source will be NULL here if we've reached the end.
84     if (source.get() == NULL) RETURN(0.0);
85     if (factor == 0.0) RETURN(0.0);
86     RETURN(factor * source->get_maxweight());
87 }
88 
89 Xapian::docid
get_docid() const90 ExternalPostList::get_docid() const
91 {
92     LOGCALL(MATCH, Xapian::docid, "ExternalPostList::get_docid", NO_ARGS);
93     Assert(current);
94     RETURN(current);
95 }
96 
97 double
get_weight() const98 ExternalPostList::get_weight() const
99 {
100     LOGCALL(MATCH, double, "ExternalPostList::get_weight", NO_ARGS);
101     Assert(source.get());
102     if (factor == 0.0) RETURN(factor);
103     RETURN(factor * source->get_weight());
104 }
105 
106 Xapian::termcount
get_doclength() const107 ExternalPostList::get_doclength() const
108 {
109     Assert(false);
110     return 0;
111 }
112 
113 Xapian::termcount
get_unique_terms() const114 ExternalPostList::get_unique_terms() const
115 {
116     Assert(false);
117     return 0;
118 }
119 
120 double
recalc_maxweight()121 ExternalPostList::recalc_maxweight()
122 {
123     return ExternalPostList::get_maxweight();
124 }
125 
126 PositionList *
read_position_list()127 ExternalPostList::read_position_list()
128 {
129     return NULL;
130 }
131 
132 PostList *
update_after_advance()133 ExternalPostList::update_after_advance() {
134     LOGCALL(MATCH, PostList *, "ExternalPostList::update_after_advance", NO_ARGS);
135     Assert(source.get());
136     if (source->at_end()) {
137 	LOGLINE(MATCH, "ExternalPostList now at end");
138 	source = NULL;
139     } else {
140 	current = source->get_docid();
141     }
142     RETURN(NULL);
143 }
144 
145 PostList *
next(double w_min)146 ExternalPostList::next(double w_min)
147 {
148     LOGCALL(MATCH, PostList *, "ExternalPostList::next", w_min);
149     Assert(source.get());
150     source->next(w_min);
151     RETURN(update_after_advance());
152 }
153 
154 PostList *
skip_to(Xapian::docid did,double w_min)155 ExternalPostList::skip_to(Xapian::docid did, double w_min)
156 {
157     LOGCALL(MATCH, PostList *, "ExternalPostList::skip_to", did | w_min);
158     Assert(source.get());
159     if (did <= current) RETURN(NULL);
160     source->skip_to(did, w_min);
161     RETURN(update_after_advance());
162 }
163 
164 PostList *
check(Xapian::docid did,double w_min,bool & valid)165 ExternalPostList::check(Xapian::docid did, double w_min, bool &valid)
166 {
167     LOGCALL(MATCH, PostList *, "ExternalPostList::check", did | w_min | valid);
168     Assert(source.get());
169     if (did <= current) {
170 	valid = true;
171 	RETURN(NULL);
172     }
173     valid = source->check(did, w_min);
174     if (source->at_end()) {
175 	LOGLINE(MATCH, "ExternalPostList now at end");
176 	source = NULL;
177     } else {
178 	current = valid ? source->get_docid() : current;
179     }
180     RETURN(NULL);
181 }
182 
183 bool
at_end() const184 ExternalPostList::at_end() const
185 {
186     LOGCALL(MATCH, bool, "ExternalPostList::at_end", NO_ARGS);
187     RETURN(source.get() == NULL);
188 }
189 
190 Xapian::termcount
count_matching_subqs() const191 ExternalPostList::count_matching_subqs() const
192 {
193     return 1;
194 }
195 
196 string
get_description() const197 ExternalPostList::get_description() const
198 {
199     string desc = "ExternalPostList(";
200     if (source.get()) desc += source->get_description();
201     desc += ")";
202     return desc;
203 }
204