1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6 
7 #ifndef FIELDCACHE_H
8 #define FIELDCACHE_H
9 
10 #include <boost/any.hpp>
11 #include "LuceneObject.h"
12 
13 namespace Lucene {
14 
15 /// Maintains caches of term values.
16 /// @see FieldCacheSanityChecker
17 class LPPAPI FieldCache {
18 public:
19     virtual ~FieldCache();
20     LUCENE_INTERFACE(FieldCache);
21 
22 public:
23     /// Specifies whether and how a field should be stored.
24     enum CacheType {
25         CACHE_BYTE = 1,
26         CACHE_INT,
27         CACHE_LONG,
28         CACHE_DOUBLE,
29         CACHE_STRING,
30         CACHE_STRING_INDEX
31     };
32 
33     /// Indicator for StringIndex values in the cache.
34     /// NOTE: the value assigned to this constant must not be the same as any of those in SortField
35     static const int32_t STRING_INDEX;
36 
37 public:
38     /// The cache used internally by sorting and range query classes.
39     static FieldCachePtr DEFAULT();
40 
41     /// The default parser for byte values, which are encoded by StringUtils::toInt
42     static ByteParserPtr DEFAULT_BYTE_PARSER();
43 
44     /// The default parser for int values, which are encoded by StringUtils::toInt
45     static IntParserPtr DEFAULT_INT_PARSER();
46 
47     /// The default parser for int values, which are encoded by StringUtils::toLong
48     static LongParserPtr DEFAULT_LONG_PARSER();
49 
50     /// The default parser for double values, which are encoded by StringUtils::toDouble
51     static DoubleParserPtr DEFAULT_DOUBLE_PARSER();
52 
53     /// A parser instance for int values encoded by {@link NumericUtils#prefixCodedToInt(String)},
54     /// eg. when indexed via {@link NumericField}/{@link NumericTokenStream}.
55     static IntParserPtr NUMERIC_UTILS_INT_PARSER();
56 
57     /// A parser instance for long values encoded by {@link NumericUtils#prefixCodedToLong(String)},
58     /// eg. when indexed via {@link NumericField}/{@link NumericTokenStream}.
59     static LongParserPtr NUMERIC_UTILS_LONG_PARSER();
60 
61     /// A parser instance for double values encoded by {@link NumericUtils},
62     /// eg. when indexed via {@link NumericField}/{@link NumericTokenStream}.
63     static DoubleParserPtr NUMERIC_UTILS_DOUBLE_PARSER();
64 
65     /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in
66     /// field as a single byte and returns an array of size reader.maxDoc() of the value each document
67     /// has in the given field.
68     /// @param reader Used to get field values.
69     /// @param field Which field contains the single byte values.
70     /// @return The values in the given field for each document.
71     virtual Collection<uint8_t> getBytes(const IndexReaderPtr& reader, const String& field);
72 
73     /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in
74     /// field as bytes and returns an array of size reader.maxDoc() of the value each document has in
75     /// the given field.
76     /// @param reader Used to get field values.
77     /// @param field Which field contains the bytes.
78     /// @param parser Computes byte for string values.
79     /// @return The values in the given field for each document.
80     virtual Collection<uint8_t> getBytes(const IndexReaderPtr& reader, const String& field, const ByteParserPtr& parser);
81 
82     /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in
83     /// field as integers and returns an array of size reader.maxDoc() of the value each document has in
84     /// the given field.
85     /// @param reader Used to get field values.
86     /// @param field Which field contains the integers.
87     /// @return The values in the given field for each document.
88     virtual Collection<int32_t> getInts(const IndexReaderPtr& reader, const String& field);
89 
90     /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in
91     /// field as integers and returns an array of size reader.maxDoc() of the value each document has in
92     /// the given field.
93     /// @param reader Used to get field values.
94     /// @param field Which field contains the integers.
95     /// @param parser Computes integer for string values.
96     /// @return The values in the given field for each document.
97     virtual Collection<int32_t> getInts(const IndexReaderPtr& reader, const String& field, const IntParserPtr& parser);
98 
99     /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in
100     /// field as longs and returns an array of size reader.maxDoc() of the value each document has in
101     /// the given field.
102     /// @param reader Used to get field values.
103     /// @param field Which field contains the longs.
104     /// @return The values in the given field for each document.
105     virtual Collection<int64_t> getLongs(const IndexReaderPtr& reader, const String& field);
106 
107     /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in
108     /// field as longs and returns an array of size reader.maxDoc() of the value each document has in
109     /// the given field.
110     /// @param reader Used to get field values.
111     /// @param field Which field contains the longs.
112     /// @param parser Computes long for string values.
113     /// @return The values in the given field for each document.
114     virtual Collection<int64_t> getLongs(const IndexReaderPtr& reader, const String& field, const LongParserPtr& parser);
115 
116     /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in
117     /// field as integers and returns an array of size reader.maxDoc() of the value each document has in
118     /// the given field.
119     /// @param reader Used to get field values.
120     /// @param field Which field contains the doubles.
121     /// @return The values in the given field for each document.
122     virtual Collection<double> getDoubles(const IndexReaderPtr& reader, const String& field);
123 
124     /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in
125     /// field as doubles and returns an array of size reader.maxDoc() of the value each document has in
126     /// the given field.
127     /// @param reader Used to get field values.
128     /// @param field Which field contains the doubles.
129     /// @param parser Computes double for string values.
130     /// @return The values in the given field for each document.
131     virtual Collection<double> getDoubles(const IndexReaderPtr& reader, const String& field, const DoubleParserPtr& parser);
132 
133     /// Checks the internal cache for an appropriate entry, and if none are found, reads the term values in
134     /// field and returns an array of size reader.maxDoc() containing the value each document has in
135     /// the given field.
136     /// @param reader Used to get field values.
137     /// @param field Which field contains the strings.
138     /// @return The values in the given field for each document.
139     virtual Collection<String> getStrings(const IndexReaderPtr& reader, const String& field);
140 
141     /// Checks the internal cache for an appropriate entry, and if none are found reads the term values in
142     /// field and returns an array of them in natural order, along with an array telling which element in
143     /// the term array each document uses.
144     /// @param reader Used to get field values.
145     /// @param field Which field contains the strings.
146     /// @return Array of terms and index into the array for each document.
147     virtual StringIndexPtr getStringIndex(const IndexReaderPtr& reader, const String& field);
148 
149     /// Generates an array of CacheEntry objects representing all items currently in the FieldCache.
150     virtual Collection<FieldCacheEntryPtr> getCacheEntries() = 0;
151 
152     /// Instructs the FieldCache to forcibly expunge all entries from the underlying caches.  This is intended
153     /// only to be used for test methods as a way to ensure a known base state of the Cache.  It should not be
154     /// relied on for "Cache maintenance" in general application code.
155     virtual void purgeAllCaches() = 0;
156 
157     /// Drops all cache entries associated with this reader.  NOTE: this reader must precisely match the reader
158     /// that the cache entry is keyed on. If you pass a top-level reader, it usually will have no effect as
159     /// Lucene now caches at the segment reader level.
160     virtual void purge(const IndexReaderPtr& r) = 0;
161 
162     /// If non-null, FieldCacheImpl will warn whenever entries are created that are not sane according to
163     /// {@link FieldCacheSanityChecker}.
164     virtual void setInfoStream(const InfoStreamPtr& stream);
165 
166     /// @see #setInfoStream
167     virtual InfoStreamPtr getInfoStream();
168 };
169 
170 class LPPAPI CreationPlaceholder : public LuceneObject {
171 public:
172     virtual ~CreationPlaceholder();
173     LUCENE_CLASS(CreationPlaceholder);
174 
175 public:
176     boost::any value;
177 };
178 
179 /// Stores term text values and document ordering data.
180 class LPPAPI StringIndex : public LuceneObject {
181 public:
182     StringIndex(Collection<int32_t> values, Collection<String> lookup);
183     virtual ~StringIndex();
184 
185     LUCENE_CLASS(StringIndex);
186 
187 public:
188     /// All the term values, in natural order.
189     Collection<String> lookup;
190 
191     /// For each document, an index into the lookup array.
192     Collection<int32_t> order;
193 
194 public:
195     int32_t binarySearchLookup(const String& key);
196 };
197 
198 /// Marker interface as super-interface to all parsers.  It is used to specify a custom parser to {@link
199 /// SortField#SortField(String, Parser)}.
200 class LPPAPI Parser : public LuceneObject {
201 public:
202     virtual ~Parser();
203     LUCENE_CLASS(Parser);
204 };
205 
206 /// Interface to parse bytes from document fields.
207 /// @see FieldCache#getBytes(IndexReaderPtr, String, ByteParserPtr)
208 class LPPAPI ByteParser : public Parser {
209 public:
210     virtual ~ByteParser();
211     LUCENE_CLASS(ByteParser);
212 
213 public:
214     /// Return a single Byte representation of this field's value.
215     virtual uint8_t parseByte(const String& string);
216 };
217 
218 /// Interface to parse ints from document fields.
219 /// @see FieldCache#getInts(IndexReaderPtr, String, IntParserPtr)
220 class LPPAPI IntParser : public Parser {
221 public:
222     virtual ~IntParser();
223     LUCENE_CLASS(IntParser);
224 
225 public:
226     /// Return a integer representation of this field's value.
227     virtual int32_t parseInt(const String& string);
228 };
229 
230 /// Interface to parse longs from document fields.
231 /// @see FieldCache#getLongs(IndexReaderPtr, String, LongParserPtr)
232 class LPPAPI LongParser : public Parser {
233 public:
234     virtual ~LongParser();
235     LUCENE_CLASS(LongParser);
236 
237 public:
238     /// Return a long representation of this field's value.
239     virtual int64_t parseLong(const String& string);
240 };
241 
242 /// Interface to parse doubles from document fields.
243 /// @see FieldCache#getDoubles(IndexReaderPtr, String, DoubleParserPtr)
244 class LPPAPI DoubleParser : public Parser {
245 public:
246     virtual ~DoubleParser();
247     LUCENE_CLASS(DoubleParser);
248 
249 public:
250     /// Return a double representation of this field's value.
251     virtual double parseDouble(const String& string);
252 };
253 
254 /// A unique Identifier/Description for each item in the FieldCache.  Can be useful for logging/debugging.
255 class LPPAPI FieldCacheEntry : public LuceneObject {
256 public:
257     virtual ~FieldCacheEntry();
258     LUCENE_CLASS(FieldCacheEntry);
259 
260 public:
261     virtual LuceneObjectPtr getReaderKey() = 0;
262     virtual String getFieldName() = 0;
263     virtual int32_t getCacheType() = 0;
264     virtual boost::any getCustom() = 0;
265     virtual boost::any getValue() = 0;
266 
267     virtual String toString();
268 };
269 
270 }
271 
272 #endif
273