1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6 
7 #include "LuceneInc.h"
8 #include "MultiTermQuery.h"
9 #include "_MultiTermQuery.h"
10 #include "ConstantScoreQuery.h"
11 #include "MultiTermQueryWrapperFilter.h"
12 #include "QueryWrapperFilter.h"
13 #include "BooleanQuery.h"
14 #include "Term.h"
15 #include "TermQuery.h"
16 #include "TermDocs.h"
17 #include "FilteredTermEnum.h"
18 #include "IndexReader.h"
19 #include "MiscUtils.h"
20 
21 namespace Lucene {
22 
MultiTermQuery()23 MultiTermQuery::MultiTermQuery() {
24     numberOfTerms = 0;
25     rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT();
26 }
27 
~MultiTermQuery()28 MultiTermQuery::~MultiTermQuery() {
29 }
30 
CONSTANT_SCORE_FILTER_REWRITE()31 RewriteMethodPtr MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE() {
32     static RewriteMethodPtr _CONSTANT_SCORE_FILTER_REWRITE;
33     if (!_CONSTANT_SCORE_FILTER_REWRITE) {
34         _CONSTANT_SCORE_FILTER_REWRITE = newLucene<ConstantScoreFilterRewrite>();
35         CycleCheck::addStatic(_CONSTANT_SCORE_FILTER_REWRITE);
36     }
37     return _CONSTANT_SCORE_FILTER_REWRITE;
38 }
39 
SCORING_BOOLEAN_QUERY_REWRITE()40 RewriteMethodPtr MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE() {
41     static RewriteMethodPtr _SCORING_BOOLEAN_QUERY_REWRITE;
42     if (!_SCORING_BOOLEAN_QUERY_REWRITE) {
43         _SCORING_BOOLEAN_QUERY_REWRITE = newLucene<ScoringBooleanQueryRewrite>();
44         CycleCheck::addStatic(_SCORING_BOOLEAN_QUERY_REWRITE);
45     }
46     return _SCORING_BOOLEAN_QUERY_REWRITE;
47 }
48 
CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE()49 RewriteMethodPtr MultiTermQuery::CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE() {
50     static RewriteMethodPtr _CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE;
51     if (!_CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE) {
52         _CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = newLucene<ConstantScoreBooleanQueryRewrite>();
53         CycleCheck::addStatic(_CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
54     }
55     return _CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE;
56 }
57 
CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()58 RewriteMethodPtr MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT() {
59     static RewriteMethodPtr _CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
60     if (!_CONSTANT_SCORE_AUTO_REWRITE_DEFAULT) {
61         _CONSTANT_SCORE_AUTO_REWRITE_DEFAULT = newLucene<ConstantScoreAutoRewriteDefault>();
62         CycleCheck::addStatic(_CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
63     }
64     return _CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
65 }
66 
getTotalNumberOfTerms()67 int32_t MultiTermQuery::getTotalNumberOfTerms() {
68     return numberOfTerms;
69 }
70 
clearTotalNumberOfTerms()71 void MultiTermQuery::clearTotalNumberOfTerms() {
72     numberOfTerms = 0;
73 }
74 
incTotalNumberOfTerms(int32_t inc)75 void MultiTermQuery::incTotalNumberOfTerms(int32_t inc) {
76     numberOfTerms += inc;
77 }
78 
rewrite(const IndexReaderPtr & reader)79 QueryPtr MultiTermQuery::rewrite(const IndexReaderPtr& reader) {
80     return rewriteMethod->rewrite(reader, shared_from_this());
81 }
82 
getRewriteMethod()83 RewriteMethodPtr MultiTermQuery::getRewriteMethod() {
84     return rewriteMethod;
85 }
86 
setRewriteMethod(const RewriteMethodPtr & method)87 void MultiTermQuery::setRewriteMethod(const RewriteMethodPtr& method) {
88     rewriteMethod = method;
89 }
90 
clone(const LuceneObjectPtr & other)91 LuceneObjectPtr MultiTermQuery::clone(const LuceneObjectPtr& other) {
92     LuceneObjectPtr clone = Query::clone(other);
93     MultiTermQueryPtr cloneQuery(boost::dynamic_pointer_cast<MultiTermQuery>(clone));
94     cloneQuery->rewriteMethod = rewriteMethod;
95     cloneQuery->numberOfTerms = numberOfTerms;
96     return cloneQuery;
97 }
98 
hashCode()99 int32_t MultiTermQuery::hashCode() {
100     int32_t prime = 31;
101     int32_t result = 1;
102     result = prime * result + MiscUtils::doubleToIntBits(getBoost());
103     result = prime * result;
104     result += rewriteMethod->hashCode();
105     return result;
106 }
107 
equals(const LuceneObjectPtr & other)108 bool MultiTermQuery::equals(const LuceneObjectPtr& other) {
109     if (LuceneObject::equals(other)) {
110         return true;
111     }
112     if (!other) {
113         return false;
114     }
115     if (!MiscUtils::equalTypes(shared_from_this(), other)) {
116         return false;
117     }
118     MultiTermQueryPtr otherMultiTermQuery(boost::dynamic_pointer_cast<MultiTermQuery>(other));
119     if (!otherMultiTermQuery) {
120         return false;
121     }
122     if (MiscUtils::doubleToIntBits(getBoost()) != MiscUtils::doubleToIntBits(otherMultiTermQuery->getBoost())) {
123         return false;
124     }
125     if (!rewriteMethod->equals(otherMultiTermQuery->rewriteMethod)) {
126         return false;
127     }
128     return true;
129 }
130 
~RewriteMethod()131 RewriteMethod::~RewriteMethod() {
132 }
133 
~ConstantScoreFilterRewrite()134 ConstantScoreFilterRewrite::~ConstantScoreFilterRewrite() {
135 }
136 
rewrite(const IndexReaderPtr & reader,const MultiTermQueryPtr & query)137 QueryPtr ConstantScoreFilterRewrite::rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) {
138     QueryPtr result(newLucene<ConstantScoreQuery>(newLucene<MultiTermQueryWrapperFilter>(query)));
139     result->setBoost(query->getBoost());
140     return result;
141 }
142 
~ScoringBooleanQueryRewrite()143 ScoringBooleanQueryRewrite::~ScoringBooleanQueryRewrite() {
144 }
145 
rewrite(const IndexReaderPtr & reader,const MultiTermQueryPtr & query)146 QueryPtr ScoringBooleanQueryRewrite::rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) {
147     FilteredTermEnumPtr enumerator(query->getEnum(reader));
148     BooleanQueryPtr result(newLucene<BooleanQuery>(true));
149     int32_t count = 0;
150     LuceneException finally;
151     try {
152         do {
153             TermPtr t(enumerator->term());
154             if (t) {
155                 TermQueryPtr tq(newLucene<TermQuery>(t)); // found a match
156                 tq->setBoost(query->getBoost() * enumerator->difference()); // set the boost
157                 result->add(tq, BooleanClause::SHOULD); // add to query
158                 ++count;
159             }
160         } while (enumerator->next());
161     } catch (LuceneException& e) {
162         finally = e;
163     }
164     enumerator->close();
165     finally.throwException();
166     query->incTotalNumberOfTerms(count);
167     return result;
168 }
169 
~ConstantScoreBooleanQueryRewrite()170 ConstantScoreBooleanQueryRewrite::~ConstantScoreBooleanQueryRewrite() {
171 }
172 
rewrite(const IndexReaderPtr & reader,const MultiTermQueryPtr & query)173 QueryPtr ConstantScoreBooleanQueryRewrite::rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) {
174     // strip the scores off
175     QueryPtr result(newLucene<ConstantScoreQuery>(newLucene<QueryWrapperFilter>(ScoringBooleanQueryRewrite::rewrite(reader, query))));
176     result->setBoost(query->getBoost());
177     return result;
178 }
179 
180 // Defaults derived from rough tests with a 20.0 million doc Wikipedia index.  With more than 350 terms
181 // in the query, the filter method is fastest
182 const int32_t ConstantScoreAutoRewrite::DEFAULT_TERM_COUNT_CUTOFF = 350;
183 
184 // If the query will hit more than 1 in 1000 of the docs in the index (0.1%), the filter method is fastest
185 const double ConstantScoreAutoRewrite::DEFAULT_DOC_COUNT_PERCENT = 0.1;
186 
ConstantScoreAutoRewrite()187 ConstantScoreAutoRewrite::ConstantScoreAutoRewrite() {
188     termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF;
189     docCountPercent = DEFAULT_DOC_COUNT_PERCENT;
190 }
191 
~ConstantScoreAutoRewrite()192 ConstantScoreAutoRewrite::~ConstantScoreAutoRewrite() {
193 }
194 
setTermCountCutoff(int32_t count)195 void ConstantScoreAutoRewrite::setTermCountCutoff(int32_t count) {
196     termCountCutoff = count;
197 }
198 
getTermCountCutoff()199 int32_t ConstantScoreAutoRewrite::getTermCountCutoff() {
200     return termCountCutoff;
201 }
202 
setDocCountPercent(double percent)203 void ConstantScoreAutoRewrite::setDocCountPercent(double percent) {
204     docCountPercent = percent;
205 }
206 
getDocCountPercent()207 double ConstantScoreAutoRewrite::getDocCountPercent() {
208     return docCountPercent;
209 }
210 
rewrite(const IndexReaderPtr & reader,const MultiTermQueryPtr & query)211 QueryPtr ConstantScoreAutoRewrite::rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) {
212     // Get the enum and start visiting terms.  If we exhaust the enum before hitting either of the
213     // cutoffs, we use ConstantBooleanQueryRewrite; else ConstantFilterRewrite
214     Collection<TermPtr> pendingTerms(Collection<TermPtr>::newInstance());
215     int32_t docCountCutoff = (int32_t)((docCountPercent / 100.0) * (double)reader->maxDoc());
216     int32_t termCountLimit = std::min(BooleanQuery::getMaxClauseCount(), termCountCutoff);
217     int32_t docVisitCount = 0;
218 
219     FilteredTermEnumPtr enumerator(query->getEnum(reader));
220     QueryPtr result;
221     LuceneException finally;
222     try {
223         while (true) {
224             TermPtr t(enumerator->term());
225             if (t) {
226                 pendingTerms.add(t);
227                 // Loading the TermInfo from the terms dict here should not be costly, because 1) the
228                 // query/filter will load the TermInfo when it runs, and 2) the terms dict has a cache
229                 docVisitCount += reader->docFreq(t);
230             }
231 
232             if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) {
233                 // Too many terms -- make a filter.
234                 result = newLucene<ConstantScoreQuery>(newLucene<MultiTermQueryWrapperFilter>(query));
235                 result->setBoost(query->getBoost());
236                 break;
237             } else if (!enumerator->next()) {
238                 // Enumeration is done, and we hit a small enough number of terms and docs -
239                 // just make a BooleanQuery, now
240                 BooleanQueryPtr bq(newLucene<BooleanQuery>(true));
241                 for (Collection<TermPtr>::iterator term = pendingTerms.begin(); term != pendingTerms.end(); ++ term) {
242                     TermQueryPtr tq(newLucene<TermQuery>(*term));
243                     bq->add(tq, BooleanClause::SHOULD);
244                 }
245                 // Strip scores
246                 result = newLucene<ConstantScoreQuery>(newLucene<QueryWrapperFilter>(bq));
247                 result->setBoost(query->getBoost());
248                 query->incTotalNumberOfTerms(pendingTerms.size());
249                 break;
250             }
251         }
252     } catch (LuceneException& e) {
253         finally = e;
254     }
255     enumerator->close();
256     finally.throwException();
257     return result;
258 }
259 
hashCode()260 int32_t ConstantScoreAutoRewrite::hashCode() {
261     int32_t prime = 1279;
262     return (int32_t)(prime * termCountCutoff + MiscUtils::doubleToLongBits(docCountPercent));
263 }
264 
equals(const LuceneObjectPtr & other)265 bool ConstantScoreAutoRewrite::equals(const LuceneObjectPtr& other) {
266     if (RewriteMethod::equals(other)) {
267         return true;
268     }
269     if (!other) {
270         return false;
271     }
272     if (!MiscUtils::equalTypes(shared_from_this(), other)) {
273         return false;
274     }
275 
276     ConstantScoreAutoRewritePtr otherConstantScoreAutoRewrite(boost::dynamic_pointer_cast<ConstantScoreAutoRewrite>(other));
277     if (!otherConstantScoreAutoRewrite) {
278         return false;
279     }
280 
281     if (termCountCutoff != otherConstantScoreAutoRewrite->termCountCutoff) {
282         return false;
283     }
284 
285     if (MiscUtils::doubleToLongBits(docCountPercent) != MiscUtils::doubleToLongBits(otherConstantScoreAutoRewrite->docCountPercent)) {
286         return false;
287     }
288 
289     return true;
290 }
291 
~ConstantScoreAutoRewriteDefault()292 ConstantScoreAutoRewriteDefault::~ConstantScoreAutoRewriteDefault() {
293 }
294 
setTermCountCutoff(int32_t count)295 void ConstantScoreAutoRewriteDefault::setTermCountCutoff(int32_t count) {
296     boost::throw_exception(UnsupportedOperationException(L"Please create a private instance"));
297 }
298 
setDocCountPercent(double percent)299 void ConstantScoreAutoRewriteDefault::setDocCountPercent(double percent) {
300     boost::throw_exception(UnsupportedOperationException(L"Please create a private instance"));
301 }
302 
303 }
304