1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6
7 #include "LuceneInc.h"
8 #include "MultiTermQuery.h"
9 #include "_MultiTermQuery.h"
10 #include "ConstantScoreQuery.h"
11 #include "MultiTermQueryWrapperFilter.h"
12 #include "QueryWrapperFilter.h"
13 #include "BooleanQuery.h"
14 #include "Term.h"
15 #include "TermQuery.h"
16 #include "TermDocs.h"
17 #include "FilteredTermEnum.h"
18 #include "IndexReader.h"
19 #include "MiscUtils.h"
20
21 namespace Lucene {
22
MultiTermQuery()23 MultiTermQuery::MultiTermQuery() {
24 numberOfTerms = 0;
25 rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT();
26 }
27
~MultiTermQuery()28 MultiTermQuery::~MultiTermQuery() {
29 }
30
CONSTANT_SCORE_FILTER_REWRITE()31 RewriteMethodPtr MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE() {
32 static RewriteMethodPtr _CONSTANT_SCORE_FILTER_REWRITE;
33 if (!_CONSTANT_SCORE_FILTER_REWRITE) {
34 _CONSTANT_SCORE_FILTER_REWRITE = newLucene<ConstantScoreFilterRewrite>();
35 CycleCheck::addStatic(_CONSTANT_SCORE_FILTER_REWRITE);
36 }
37 return _CONSTANT_SCORE_FILTER_REWRITE;
38 }
39
SCORING_BOOLEAN_QUERY_REWRITE()40 RewriteMethodPtr MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE() {
41 static RewriteMethodPtr _SCORING_BOOLEAN_QUERY_REWRITE;
42 if (!_SCORING_BOOLEAN_QUERY_REWRITE) {
43 _SCORING_BOOLEAN_QUERY_REWRITE = newLucene<ScoringBooleanQueryRewrite>();
44 CycleCheck::addStatic(_SCORING_BOOLEAN_QUERY_REWRITE);
45 }
46 return _SCORING_BOOLEAN_QUERY_REWRITE;
47 }
48
CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE()49 RewriteMethodPtr MultiTermQuery::CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE() {
50 static RewriteMethodPtr _CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE;
51 if (!_CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE) {
52 _CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = newLucene<ConstantScoreBooleanQueryRewrite>();
53 CycleCheck::addStatic(_CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
54 }
55 return _CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE;
56 }
57
CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()58 RewriteMethodPtr MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT() {
59 static RewriteMethodPtr _CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
60 if (!_CONSTANT_SCORE_AUTO_REWRITE_DEFAULT) {
61 _CONSTANT_SCORE_AUTO_REWRITE_DEFAULT = newLucene<ConstantScoreAutoRewriteDefault>();
62 CycleCheck::addStatic(_CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
63 }
64 return _CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
65 }
66
getTotalNumberOfTerms()67 int32_t MultiTermQuery::getTotalNumberOfTerms() {
68 return numberOfTerms;
69 }
70
clearTotalNumberOfTerms()71 void MultiTermQuery::clearTotalNumberOfTerms() {
72 numberOfTerms = 0;
73 }
74
incTotalNumberOfTerms(int32_t inc)75 void MultiTermQuery::incTotalNumberOfTerms(int32_t inc) {
76 numberOfTerms += inc;
77 }
78
rewrite(const IndexReaderPtr & reader)79 QueryPtr MultiTermQuery::rewrite(const IndexReaderPtr& reader) {
80 return rewriteMethod->rewrite(reader, shared_from_this());
81 }
82
getRewriteMethod()83 RewriteMethodPtr MultiTermQuery::getRewriteMethod() {
84 return rewriteMethod;
85 }
86
setRewriteMethod(const RewriteMethodPtr & method)87 void MultiTermQuery::setRewriteMethod(const RewriteMethodPtr& method) {
88 rewriteMethod = method;
89 }
90
clone(const LuceneObjectPtr & other)91 LuceneObjectPtr MultiTermQuery::clone(const LuceneObjectPtr& other) {
92 LuceneObjectPtr clone = Query::clone(other);
93 MultiTermQueryPtr cloneQuery(boost::dynamic_pointer_cast<MultiTermQuery>(clone));
94 cloneQuery->rewriteMethod = rewriteMethod;
95 cloneQuery->numberOfTerms = numberOfTerms;
96 return cloneQuery;
97 }
98
hashCode()99 int32_t MultiTermQuery::hashCode() {
100 int32_t prime = 31;
101 int32_t result = 1;
102 result = prime * result + MiscUtils::doubleToIntBits(getBoost());
103 result = prime * result;
104 result += rewriteMethod->hashCode();
105 return result;
106 }
107
equals(const LuceneObjectPtr & other)108 bool MultiTermQuery::equals(const LuceneObjectPtr& other) {
109 if (LuceneObject::equals(other)) {
110 return true;
111 }
112 if (!other) {
113 return false;
114 }
115 if (!MiscUtils::equalTypes(shared_from_this(), other)) {
116 return false;
117 }
118 MultiTermQueryPtr otherMultiTermQuery(boost::dynamic_pointer_cast<MultiTermQuery>(other));
119 if (!otherMultiTermQuery) {
120 return false;
121 }
122 if (MiscUtils::doubleToIntBits(getBoost()) != MiscUtils::doubleToIntBits(otherMultiTermQuery->getBoost())) {
123 return false;
124 }
125 if (!rewriteMethod->equals(otherMultiTermQuery->rewriteMethod)) {
126 return false;
127 }
128 return true;
129 }
130
~RewriteMethod()131 RewriteMethod::~RewriteMethod() {
132 }
133
~ConstantScoreFilterRewrite()134 ConstantScoreFilterRewrite::~ConstantScoreFilterRewrite() {
135 }
136
rewrite(const IndexReaderPtr & reader,const MultiTermQueryPtr & query)137 QueryPtr ConstantScoreFilterRewrite::rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) {
138 QueryPtr result(newLucene<ConstantScoreQuery>(newLucene<MultiTermQueryWrapperFilter>(query)));
139 result->setBoost(query->getBoost());
140 return result;
141 }
142
~ScoringBooleanQueryRewrite()143 ScoringBooleanQueryRewrite::~ScoringBooleanQueryRewrite() {
144 }
145
rewrite(const IndexReaderPtr & reader,const MultiTermQueryPtr & query)146 QueryPtr ScoringBooleanQueryRewrite::rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) {
147 FilteredTermEnumPtr enumerator(query->getEnum(reader));
148 BooleanQueryPtr result(newLucene<BooleanQuery>(true));
149 int32_t count = 0;
150 LuceneException finally;
151 try {
152 do {
153 TermPtr t(enumerator->term());
154 if (t) {
155 TermQueryPtr tq(newLucene<TermQuery>(t)); // found a match
156 tq->setBoost(query->getBoost() * enumerator->difference()); // set the boost
157 result->add(tq, BooleanClause::SHOULD); // add to query
158 ++count;
159 }
160 } while (enumerator->next());
161 } catch (LuceneException& e) {
162 finally = e;
163 }
164 enumerator->close();
165 finally.throwException();
166 query->incTotalNumberOfTerms(count);
167 return result;
168 }
169
~ConstantScoreBooleanQueryRewrite()170 ConstantScoreBooleanQueryRewrite::~ConstantScoreBooleanQueryRewrite() {
171 }
172
rewrite(const IndexReaderPtr & reader,const MultiTermQueryPtr & query)173 QueryPtr ConstantScoreBooleanQueryRewrite::rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) {
174 // strip the scores off
175 QueryPtr result(newLucene<ConstantScoreQuery>(newLucene<QueryWrapperFilter>(ScoringBooleanQueryRewrite::rewrite(reader, query))));
176 result->setBoost(query->getBoost());
177 return result;
178 }
179
180 // Defaults derived from rough tests with a 20.0 million doc Wikipedia index. With more than 350 terms
181 // in the query, the filter method is fastest
182 const int32_t ConstantScoreAutoRewrite::DEFAULT_TERM_COUNT_CUTOFF = 350;
183
184 // If the query will hit more than 1 in 1000 of the docs in the index (0.1%), the filter method is fastest
185 const double ConstantScoreAutoRewrite::DEFAULT_DOC_COUNT_PERCENT = 0.1;
186
ConstantScoreAutoRewrite()187 ConstantScoreAutoRewrite::ConstantScoreAutoRewrite() {
188 termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF;
189 docCountPercent = DEFAULT_DOC_COUNT_PERCENT;
190 }
191
~ConstantScoreAutoRewrite()192 ConstantScoreAutoRewrite::~ConstantScoreAutoRewrite() {
193 }
194
setTermCountCutoff(int32_t count)195 void ConstantScoreAutoRewrite::setTermCountCutoff(int32_t count) {
196 termCountCutoff = count;
197 }
198
getTermCountCutoff()199 int32_t ConstantScoreAutoRewrite::getTermCountCutoff() {
200 return termCountCutoff;
201 }
202
setDocCountPercent(double percent)203 void ConstantScoreAutoRewrite::setDocCountPercent(double percent) {
204 docCountPercent = percent;
205 }
206
getDocCountPercent()207 double ConstantScoreAutoRewrite::getDocCountPercent() {
208 return docCountPercent;
209 }
210
rewrite(const IndexReaderPtr & reader,const MultiTermQueryPtr & query)211 QueryPtr ConstantScoreAutoRewrite::rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) {
212 // Get the enum and start visiting terms. If we exhaust the enum before hitting either of the
213 // cutoffs, we use ConstantBooleanQueryRewrite; else ConstantFilterRewrite
214 Collection<TermPtr> pendingTerms(Collection<TermPtr>::newInstance());
215 int32_t docCountCutoff = (int32_t)((docCountPercent / 100.0) * (double)reader->maxDoc());
216 int32_t termCountLimit = std::min(BooleanQuery::getMaxClauseCount(), termCountCutoff);
217 int32_t docVisitCount = 0;
218
219 FilteredTermEnumPtr enumerator(query->getEnum(reader));
220 QueryPtr result;
221 LuceneException finally;
222 try {
223 while (true) {
224 TermPtr t(enumerator->term());
225 if (t) {
226 pendingTerms.add(t);
227 // Loading the TermInfo from the terms dict here should not be costly, because 1) the
228 // query/filter will load the TermInfo when it runs, and 2) the terms dict has a cache
229 docVisitCount += reader->docFreq(t);
230 }
231
232 if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) {
233 // Too many terms -- make a filter.
234 result = newLucene<ConstantScoreQuery>(newLucene<MultiTermQueryWrapperFilter>(query));
235 result->setBoost(query->getBoost());
236 break;
237 } else if (!enumerator->next()) {
238 // Enumeration is done, and we hit a small enough number of terms and docs -
239 // just make a BooleanQuery, now
240 BooleanQueryPtr bq(newLucene<BooleanQuery>(true));
241 for (Collection<TermPtr>::iterator term = pendingTerms.begin(); term != pendingTerms.end(); ++ term) {
242 TermQueryPtr tq(newLucene<TermQuery>(*term));
243 bq->add(tq, BooleanClause::SHOULD);
244 }
245 // Strip scores
246 result = newLucene<ConstantScoreQuery>(newLucene<QueryWrapperFilter>(bq));
247 result->setBoost(query->getBoost());
248 query->incTotalNumberOfTerms(pendingTerms.size());
249 break;
250 }
251 }
252 } catch (LuceneException& e) {
253 finally = e;
254 }
255 enumerator->close();
256 finally.throwException();
257 return result;
258 }
259
hashCode()260 int32_t ConstantScoreAutoRewrite::hashCode() {
261 int32_t prime = 1279;
262 return (int32_t)(prime * termCountCutoff + MiscUtils::doubleToLongBits(docCountPercent));
263 }
264
equals(const LuceneObjectPtr & other)265 bool ConstantScoreAutoRewrite::equals(const LuceneObjectPtr& other) {
266 if (RewriteMethod::equals(other)) {
267 return true;
268 }
269 if (!other) {
270 return false;
271 }
272 if (!MiscUtils::equalTypes(shared_from_this(), other)) {
273 return false;
274 }
275
276 ConstantScoreAutoRewritePtr otherConstantScoreAutoRewrite(boost::dynamic_pointer_cast<ConstantScoreAutoRewrite>(other));
277 if (!otherConstantScoreAutoRewrite) {
278 return false;
279 }
280
281 if (termCountCutoff != otherConstantScoreAutoRewrite->termCountCutoff) {
282 return false;
283 }
284
285 if (MiscUtils::doubleToLongBits(docCountPercent) != MiscUtils::doubleToLongBits(otherConstantScoreAutoRewrite->docCountPercent)) {
286 return false;
287 }
288
289 return true;
290 }
291
~ConstantScoreAutoRewriteDefault()292 ConstantScoreAutoRewriteDefault::~ConstantScoreAutoRewriteDefault() {
293 }
294
setTermCountCutoff(int32_t count)295 void ConstantScoreAutoRewriteDefault::setTermCountCutoff(int32_t count) {
296 boost::throw_exception(UnsupportedOperationException(L"Please create a private instance"));
297 }
298
setDocCountPercent(double percent)299 void ConstantScoreAutoRewriteDefault::setDocCountPercent(double percent) {
300 boost::throw_exception(UnsupportedOperationException(L"Please create a private instance"));
301 }
302
303 }
304