1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6 
7 #ifndef MULTITERMQUERY_H
8 #define MULTITERMQUERY_H
9 
10 #include "Query.h"
11 
12 namespace Lucene {
13 
14 /// An abstract {@link Query} that matches documents containing a subset of terms provided by a {@link
15 /// FilteredTermEnum} enumeration.
16 ///
17 /// This query cannot be used directly; you must subclass it and define {@link #getEnum} to provide a
18 /// {@link FilteredTermEnum} that iterates through the terms to be matched.
19 ///
20 /// NOTE: if {@link #setRewriteMethod} is either {@link #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} or {@link
21 /// #SCORING_BOOLEAN_QUERY_REWRITE}, you may encounter a {@link BooleanQuery.TooManyClauses} exception
22 /// during searching, which happens when the number of terms to be searched exceeds {@link
23 /// BooleanQuery#getMaxClauseCount()}.  Setting {@link #setRewriteMethod} to {@link
24 /// #CONSTANT_SCORE_FILTER_REWRITE} prevents this.
25 ///
26 /// The recommended rewrite method is {@link #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}: it doesn't spend CPU
27 /// computing unhelpful scores, and it tries to pick the most performant rewrite method given the query.
28 ///
29 /// Note that {@link QueryParser} produces MultiTermQueries using {@link #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
30 /// by default.
31 class LPPAPI MultiTermQuery : public Query {
32 public:
33     MultiTermQuery();
34     virtual ~MultiTermQuery();
35 
36     LUCENE_CLASS(MultiTermQuery);
37 
38 protected:
39     RewriteMethodPtr rewriteMethod;
40     int32_t numberOfTerms;
41 
42 public:
43     /// A rewrite method that first creates a private Filter, by visiting each term in sequence and marking
44     /// all docs for that term.  Matching documents are assigned a constant score equal to the query's boost.
45     ///
46     /// This method is faster than the BooleanQuery rewrite methods when the number of matched terms or matched
47     /// documents is non-trivial.  Also, it will never hit an errant TooManyClauses exception.
48     ///
49     /// @see #setRewriteMethod
50     static RewriteMethodPtr CONSTANT_SCORE_FILTER_REWRITE();
51 
52     /// A rewrite method that first translates each term into {@link BooleanClause.Occur#SHOULD} clause in a
53     /// BooleanQuery, and keeps the scores as computed by the query.  Note that typically such scores are
54     /// meaningless to the user, and require non-trivial CPU to compute, so it's almost always better to use
55     /// {@link #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} instead.
56     ///
57     /// NOTE: This rewrite method will hit {@link BooleanQuery.TooManyClauses} if the number of terms exceeds
58     /// {@link BooleanQuery#getMaxClauseCount}.
59     ///
60     /// @see #setRewriteMethod
61     static RewriteMethodPtr SCORING_BOOLEAN_QUERY_REWRITE();
62 
63     /// Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except scores are not computed.  Instead, each matching
64     /// document receives a constant score equal to the query's boost.
65     ///
66     /// NOTE: This rewrite method will hit TooManyClauses if the number of terms exceeds {@link
67     /// BooleanQuery#getMaxClauseCount}.
68     ///
69     /// @see #setRewriteMethod
70     static RewriteMethodPtr CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE();
71 
72     /// Read-only default instance of {@link ConstantScoreAutoRewrite}, with {@link
73     /// ConstantScoreAutoRewrite#setTermCountCutoff} set to {@link ConstantScoreAutoRewrite#DEFAULT_TERM_COUNT_CUTOFF}
74     /// and {@link ConstantScoreAutoRewrite#setDocCountPercent} set to {@link
75     /// ConstantScoreAutoRewrite#DEFAULT_DOC_COUNT_PERCENT}.  Note that you cannot alter the configuration of
76     /// this instance; you'll need to create a private instance instead.
77     static RewriteMethodPtr CONSTANT_SCORE_AUTO_REWRITE_DEFAULT();
78 
79     /// Return the number of unique terms visited during execution of the query.  If there are many of them,
80     /// you may consider using another query type or optimize your total term count in index.
81     ///
82     /// This method is not thread safe, be sure to only call it when no query is running!  If you re-use the
83     /// same query instance for another search, be sure to first reset the term counter with {@link
84     /// #clearTotalNumberOfTerms}.
85     ///
86     /// On optimized indexes / no MultiReaders, you get the correct number of unique terms for the whole index.
87     /// Use this number to compare different queries.  For non-optimized indexes this number can also be achieved
88     /// in non-constant-score mode.  In constant-score mode you get the total number of terms seeked for all
89     /// segments / sub-readers.
90     /// @see #clearTotalNumberOfTerms
91     int32_t getTotalNumberOfTerms();
92 
93     /// Resets the counting of unique terms.  Do this before executing the query/filter.
94     /// @see #getTotalNumberOfTerms
95     void clearTotalNumberOfTerms();
96 
97     virtual QueryPtr rewrite(const IndexReaderPtr& reader);
98 
99     /// @see #setRewriteMethod
100     virtual RewriteMethodPtr getRewriteMethod();
101 
102     /// Sets the rewrite method to be used when executing the query.  You can use one of the four core methods,
103     /// or implement your own subclass of {@link RewriteMethod}.
104     virtual void setRewriteMethod(const RewriteMethodPtr& method);
105 
106     virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr());
107     virtual int32_t hashCode();
108     virtual bool equals(const LuceneObjectPtr& other);
109 
110 protected:
111     /// Construct the enumeration to be used, expanding the pattern term.
112     virtual FilteredTermEnumPtr getEnum(const IndexReaderPtr& reader) = 0;
113 
114     void incTotalNumberOfTerms(int32_t inc);
115 
116     friend class MultiTermQueryWrapperFilter;
117     friend class ScoringBooleanQueryRewrite;
118     friend class ConstantScoreAutoRewrite;
119 };
120 
121 /// Abstract class that defines how the query is rewritten.
122 class LPPAPI RewriteMethod : public LuceneObject {
123 public:
124     virtual ~RewriteMethod();
125     LUCENE_CLASS(RewriteMethod);
126 
127 public:
128     virtual QueryPtr rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) = 0;
129 };
130 
131 /// A rewrite method that tries to pick the best constant-score rewrite method based on term and document
132 /// counts from the query.  If both the number of terms and documents is small enough, then {@link
133 /// #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} is used.  Otherwise, {@link #CONSTANT_SCORE_FILTER_REWRITE} is
134 /// used.
135 class LPPAPI ConstantScoreAutoRewrite : public RewriteMethod {
136 public:
137     ConstantScoreAutoRewrite();
138     virtual ~ConstantScoreAutoRewrite();
139 
140     LUCENE_CLASS(ConstantScoreAutoRewrite);
141 
142 public:
143     // Defaults derived from rough tests with a 20.0 million doc Wikipedia index.  With more than 350 terms
144     // in the query, the filter method is fastest
145     static const int32_t DEFAULT_TERM_COUNT_CUTOFF;
146 
147     // If the query will hit more than 1 in 1000 of the docs in the index (0.1%), the filter method is fastest
148     static const double DEFAULT_DOC_COUNT_PERCENT;
149 
150 protected:
151     int32_t termCountCutoff;
152     double docCountPercent;
153 
154 public:
155     /// If the number of terms in this query is equal to or larger than this setting then {@link
156     /// #CONSTANT_SCORE_FILTER_REWRITE} is used.
157     virtual void setTermCountCutoff(int32_t count);
158 
159     /// @see #setTermCountCutoff
160     virtual int32_t getTermCountCutoff();
161 
162     /// If the number of documents to be visited in the postings exceeds this specified percentage of the
163     /// maxDoc() for the index, then {@link #CONSTANT_SCORE_FILTER_REWRITE} is used.
164     /// @param percent 0.0 to 100.0
165     virtual void setDocCountPercent(double percent);
166 
167     /// @see #setDocCountPercent
168     virtual double getDocCountPercent();
169 
170     virtual QueryPtr rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query);
171 
172     virtual int32_t hashCode();
173     virtual bool equals(const LuceneObjectPtr& other);
174 };
175 
176 }
177 
178 #endif
179