1 /** @file
2  * @brief Xapian::Query API class
3  */
4 /* Copyright (C) 2011,2012,2013,2015,2016,2017,2018 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19  */
20 
21 #include <config.h>
22 
23 #include "xapian/query.h"
24 #include "queryinternal.h"
25 
26 #include <algorithm>
27 
28 #include "debuglog.h"
29 #include "omassert.h"
30 #include "vectortermlist.h"
31 
32 #include "xapian/error.h"
33 
34 using namespace std;
35 
36 namespace Xapian {
37 
38 // Extra () are needed to resolve ambiguity with method declaration.
39 const Query Query::MatchAll((string()));
40 
41 const Query Query::MatchNothing;
42 
Query(const string & term,Xapian::termcount wqf,Xapian::termpos pos)43 Query::Query(const string & term, Xapian::termcount wqf, Xapian::termpos pos)
44     : internal(new Xapian::Internal::QueryTerm(term, wqf, pos))
45 {
46     LOGCALL_CTOR(API, "Query", term | wqf | pos);
47 }
48 
Query(Xapian::PostingSource * source)49 Query::Query(Xapian::PostingSource * source)
50     : internal(new Xapian::Internal::QueryPostingSource(source))
51 {
52     LOGCALL_CTOR(API, "Query", source);
53 }
54 
Query(double factor,const Xapian::Query & subquery)55 Query::Query(double factor, const Xapian::Query & subquery)
56 {
57     LOGCALL_CTOR(API, "Query", factor | subquery);
58 
59     if (!subquery.empty())
60 	internal = new Xapian::Internal::QueryScaleWeight(factor, subquery);
61 }
62 
Query(op op_,const Xapian::Query & subquery,double factor)63 Query::Query(op op_, const Xapian::Query & subquery, double factor)
64 {
65     LOGCALL_CTOR(API, "Query", op_ | subquery | factor);
66 
67     if (rare(op_ != OP_SCALE_WEIGHT))
68 	throw Xapian::InvalidArgumentError("op must be OP_SCALE_WEIGHT");
69     // If the subquery is MatchNothing then generate Query() which matches
70     // nothing.
71     if (!subquery.internal.get()) return;
72     switch (subquery.internal->get_type()) {
73 	case OP_VALUE_RANGE:
74 	case OP_VALUE_GE:
75 	case OP_VALUE_LE:
76 	    // These operators always return weight 0, so OP_SCALE_WEIGHT has
77 	    // no effect on them.
78 	    internal = subquery.internal;
79 	    return;
80 	default:
81 	    break;
82     }
83     internal = new Xapian::Internal::QueryScaleWeight(factor, subquery);
84 }
85 
Query(op op_,Xapian::valueno slot,const std::string & limit)86 Query::Query(op op_, Xapian::valueno slot, const std::string & limit)
87 {
88     LOGCALL_CTOR(API, "Query", op_ | slot | limit);
89 
90     if (op_ == OP_VALUE_GE) {
91 	if (limit.empty())
92 	    internal = new Xapian::Internal::QueryTerm();
93 	else
94 	    internal = new Xapian::Internal::QueryValueGE(slot, limit);
95     } else if (usual(op_ == OP_VALUE_LE)) {
96 	internal = new Xapian::Internal::QueryValueLE(slot, limit);
97     } else {
98 	throw Xapian::InvalidArgumentError("op must be OP_VALUE_LE or OP_VALUE_GE");
99     }
100 }
101 
Query(op op_,Xapian::valueno slot,const std::string & begin,const std::string & end)102 Query::Query(op op_, Xapian::valueno slot,
103 	     const std::string & begin, const std::string & end)
104 {
105     LOGCALL_CTOR(API, "Query", op_ | slot | begin | end);
106 
107     if (rare(op_ != OP_VALUE_RANGE))
108 	throw Xapian::InvalidArgumentError("op must be OP_VALUE_RANGE");
109     // If begin > end then generate Query() which matches nothing.
110     if (begin.empty()) {
111 	internal = new Xapian::Internal::QueryValueLE(slot, end);
112     } else if (usual(begin <= end)) {
113 	internal = new Xapian::Internal::QueryValueRange(slot, begin, end);
114     }
115 }
116 
Query(op op_,const std::string & pattern,Xapian::termcount max_expansion,int max_type,op combiner)117 Query::Query(op op_,
118 	     const std::string & pattern,
119 	     Xapian::termcount max_expansion,
120 	     int max_type,
121 	     op combiner)
122 {
123     LOGCALL_CTOR(API, "Query", op_ | pattern | max_expansion | max_type | combiner);
124     if (rare(op_ != OP_WILDCARD))
125 	throw Xapian::InvalidArgumentError("op must be OP_WILDCARD");
126     if (rare(combiner != OP_SYNONYM && combiner != OP_MAX && combiner != OP_OR))
127 	throw Xapian::InvalidArgumentError("combiner must be OP_SYNONYM or OP_MAX or OP_OR");
128     internal = new Xapian::Internal::QueryWildcard(pattern,
129 						   max_expansion,
130 						   max_type,
131 						   combiner);
132 }
133 
134 const TermIterator
get_terms_begin() const135 Query::get_terms_begin() const
136 {
137     if (!internal.get())
138 	return TermIterator();
139 
140     vector<pair<Xapian::termpos, string>> terms;
141     internal->gather_terms(static_cast<void*>(&terms));
142     sort(terms.begin(), terms.end());
143 
144     vector<string> v;
145     const string * old_term = NULL;
146     Xapian::termpos old_pos = 0;
147     for (auto && i : terms) {
148 	// Remove duplicates (same term at the same position).
149 	if (old_term && old_pos == i.first && *old_term == i.second)
150 	    continue;
151 
152 	v.push_back(i.second);
153 	old_pos = i.first;
154 	old_term = &(i.second);
155     }
156     return TermIterator(new VectorTermList(v.begin(), v.end()));
157 }
158 
159 const TermIterator
get_unique_terms_begin() const160 Query::get_unique_terms_begin() const
161 {
162     if (!internal.get())
163 	return TermIterator();
164 
165     vector<pair<Xapian::termpos, string>> terms;
166     internal->gather_terms(static_cast<void*>(&terms));
167     sort(terms.begin(), terms.end(), [](
168 		const pair<Xapian::termpos, string>& a,
169 		const pair<Xapian::termpos, string>& b) {
170 	return a.second < b.second;
171     });
172 
173     vector<string> v;
174     const string * old_term = NULL;
175     for (auto && i : terms) {
176 	// Remove duplicate term names.
177 	if (old_term && *old_term == i.second)
178 	    continue;
179 
180 	v.push_back(i.second);
181 	old_term = &(i.second);
182     }
183     return TermIterator(new VectorTermList(v.begin(), v.end()));
184 }
185 
186 Xapian::termcount
get_length() const187 Query::get_length() const XAPIAN_NOEXCEPT
188 {
189     return (internal.get() ? internal->get_length() : 0);
190 }
191 
192 string
serialise() const193 Query::serialise() const
194 {
195     string result;
196     if (internal.get())
197 	internal->serialise(result);
198     return result;
199 }
200 
201 const Query
unserialise(const string & s,const Registry & reg)202 Query::unserialise(const string & s, const Registry & reg)
203 {
204     const char * p = s.data();
205     const char * end = p + s.size();
206     Query::Internal * q = Query::Internal::unserialise(&p, end, reg);
207     AssertEq(p, end);
208     return Query(q);
209 }
210 
211 Xapian::Query::op
get_type() const212 Query::get_type() const XAPIAN_NOEXCEPT
213 {
214     if (!internal.get())
215 	return Xapian::Query::LEAF_MATCH_NOTHING;
216     return internal->get_type();
217 }
218 
219 size_t
get_num_subqueries() const220 Query::get_num_subqueries() const XAPIAN_NOEXCEPT
221 {
222     return internal.get() ? internal->get_num_subqueries() : 0;
223 }
224 
225 const Query
get_subquery(size_t n) const226 Query::get_subquery(size_t n) const
227 {
228     return internal->get_subquery(n);
229 }
230 
231 string
get_description() const232 Query::get_description() const
233 {
234     string desc = "Query(";
235     if (internal.get())
236 	desc += internal->get_description();
237     desc += ")";
238     return desc;
239 }
240 
241 void
init(op op_,size_t n_subqueries,Xapian::termcount parameter)242 Query::init(op op_, size_t n_subqueries, Xapian::termcount parameter)
243 {
244     if (parameter > 0 &&
245 	op_ != OP_NEAR && op_ != OP_PHRASE && op_ != OP_ELITE_SET)
246 	throw InvalidArgumentError("parameter only valid with OP_NEAR, "
247 				   "OP_PHRASE or OP_ELITE_SET");
248 
249     switch (op_) {
250 	case OP_AND:
251 	    internal = new Xapian::Internal::QueryAnd(n_subqueries);
252 	    break;
253 	case OP_OR:
254 	    internal = new Xapian::Internal::QueryOr(n_subqueries);
255 	    break;
256 	case OP_AND_NOT:
257 	    internal = new Xapian::Internal::QueryAndNot(n_subqueries);
258 	    break;
259 	case OP_XOR:
260 	    internal = new Xapian::Internal::QueryXor(n_subqueries);
261 	    break;
262 	case OP_AND_MAYBE:
263 	    internal = new Xapian::Internal::QueryAndMaybe(n_subqueries);
264 	    break;
265 	case OP_FILTER:
266 	    internal = new Xapian::Internal::QueryFilter(n_subqueries);
267 	    break;
268 	case OP_NEAR:
269 	    internal = new Xapian::Internal::QueryNear(n_subqueries,
270 						       parameter);
271 	    break;
272 	case OP_PHRASE:
273 	    internal = new Xapian::Internal::QueryPhrase(n_subqueries,
274 							 parameter);
275 	    break;
276 	case OP_ELITE_SET:
277 	    internal = new Xapian::Internal::QueryEliteSet(n_subqueries,
278 							   parameter);
279 	    break;
280 	case OP_SYNONYM:
281 	    internal = new Xapian::Internal::QuerySynonym(n_subqueries);
282 	    break;
283 	case OP_MAX:
284 	    internal = new Xapian::Internal::QueryMax(n_subqueries);
285 	    break;
286 	default:
287 	    if (op_ == OP_INVALID && n_subqueries == 0) {
288 		internal = new Xapian::Internal::QueryInvalid();
289 		break;
290 	    }
291 	    throw InvalidArgumentError("op not valid with a list of subqueries");
292     }
293 }
294 
295 void
add_subquery(bool positional,const Xapian::Query & subquery)296 Query::add_subquery(bool positional, const Xapian::Query & subquery)
297 {
298     // We could handle this in a type-safe way, but we'd need to at least
299     // declare Xapian::Internal::QueryBranch in the API header, which seems
300     // less desirable than a static_cast<> here.
301     Xapian::Internal::QueryBranch * branch_query =
302 	static_cast<Xapian::Internal::QueryBranch*>(internal.get());
303     Assert(branch_query);
304     if (positional) {
305 	switch (subquery.get_type()) {
306 	    case LEAF_TERM:
307 		break;
308 	    case LEAF_POSTING_SOURCE:
309 	    case LEAF_MATCH_ALL:
310 	    case LEAF_MATCH_NOTHING:
311 		// None of these have positions, so positional operators won't
312 		// match.  Add MatchNothing as that is has special handling in
313 		// AND-like queries to reduce the parent query to MatchNothing,
314 		// which is appropriate in this case.
315 		branch_query->add_subquery(MatchNothing);
316 		return;
317 	    case OP_OR:
318 		// OP_OR is now handled below OP_NEAR and OP_PHRASE.
319 		break;
320 	    default:
321 		throw Xapian::UnimplementedError("OP_NEAR and OP_PHRASE only currently support leaf subqueries");
322 	}
323     }
324     branch_query->add_subquery(subquery);
325 }
326 
327 void
done()328 Query::done()
329 {
330     Xapian::Internal::QueryBranch * branch_query =
331 	static_cast<Xapian::Internal::QueryBranch*>(internal.get());
332     if (branch_query)
333 	internal = branch_query->done();
334 }
335 
336 }
337