1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6
7 #include "LuceneInc.h"
8 #include "FieldCacheImpl.h"
9 #include "FieldCacheSanityChecker.h"
10 #include "IndexReader.h"
11 #include "InfoStream.h"
12 #include "TermEnum.h"
13 #include "TermDocs.h"
14 #include "Term.h"
15 #include "StringUtils.h"
16 #include "VariantUtils.h"
17
18 namespace Lucene {
19
FieldCacheImpl()20 FieldCacheImpl::FieldCacheImpl() {
21 }
22
~FieldCacheImpl()23 FieldCacheImpl::~FieldCacheImpl() {
24 }
25
initialize()26 void FieldCacheImpl::initialize() {
27 caches = MapStringCache::newInstance();
28 caches.put(CACHE_BYTE, newLucene<ByteCache>(shared_from_this()));
29 caches.put(CACHE_INT, newLucene<IntCache>(shared_from_this()));
30 caches.put(CACHE_LONG, newLucene<LongCache>(shared_from_this()));
31 caches.put(CACHE_DOUBLE, newLucene<DoubleCache>(shared_from_this()));
32 caches.put(CACHE_STRING, newLucene<StringCache>(shared_from_this()));
33 caches.put(CACHE_STRING_INDEX, newLucene<StringIndexCache>(shared_from_this()));
34 }
35
purgeAllCaches()36 void FieldCacheImpl::purgeAllCaches() {
37 initialize();
38 }
39
purge(const IndexReaderPtr & r)40 void FieldCacheImpl::purge(const IndexReaderPtr& r) {
41 for (MapStringCache::iterator cache = caches.begin(); cache != caches.end(); ++cache) {
42 cache->second->purge(r);
43 }
44 }
45
getCacheEntries()46 Collection<FieldCacheEntryPtr> FieldCacheImpl::getCacheEntries() {
47 Collection<FieldCacheEntryPtr> result(Collection<FieldCacheEntryPtr>::newInstance());
48 for (MapStringCache::iterator cache = caches.begin(); cache != caches.end(); ++cache) {
49 for (WeakMapLuceneObjectMapEntryAny::iterator key = cache->second->readerCache.begin(); key != cache->second->readerCache.end(); ++key) {
50 LuceneObjectPtr readerKey(key->first.lock());
51
52 // we've now materialized a hard ref
53 if (readerKey) {
54 for (MapEntryAny::iterator mapEntry = key->second.begin(); mapEntry != key->second.end(); ++mapEntry) {
55 result.add(newLucene<FieldCacheEntryImpl>(readerKey, mapEntry->first->field, cache->first, mapEntry->first->custom, mapEntry->second));
56 }
57 }
58 }
59 }
60 return result;
61 }
62
getBytes(const IndexReaderPtr & reader,const String & field)63 Collection<uint8_t> FieldCacheImpl::getBytes(const IndexReaderPtr& reader, const String& field) {
64 return getBytes(reader, field, ByteParserPtr());
65 }
66
getBytes(const IndexReaderPtr & reader,const String & field,const ByteParserPtr & parser)67 Collection<uint8_t> FieldCacheImpl::getBytes(const IndexReaderPtr& reader, const String& field, const ByteParserPtr& parser) {
68 return VariantUtils::get< Collection<uint8_t> >(caches.get(CACHE_BYTE)->get(reader, newLucene<Entry>(field, parser)));
69 }
70
getInts(const IndexReaderPtr & reader,const String & field)71 Collection<int32_t> FieldCacheImpl::getInts(const IndexReaderPtr& reader, const String& field) {
72 return getInts(reader, field, IntParserPtr());
73 }
74
getInts(const IndexReaderPtr & reader,const String & field,const IntParserPtr & parser)75 Collection<int32_t> FieldCacheImpl::getInts(const IndexReaderPtr& reader, const String& field, const IntParserPtr& parser) {
76 return VariantUtils::get< Collection<int32_t> >(caches.get(CACHE_INT)->get(reader, newLucene<Entry>(field, parser)));
77 }
78
getLongs(const IndexReaderPtr & reader,const String & field)79 Collection<int64_t> FieldCacheImpl::getLongs(const IndexReaderPtr& reader, const String& field) {
80 return getLongs(reader, field, LongParserPtr());
81 }
82
getLongs(const IndexReaderPtr & reader,const String & field,const LongParserPtr & parser)83 Collection<int64_t> FieldCacheImpl::getLongs(const IndexReaderPtr& reader, const String& field, const LongParserPtr& parser) {
84 return VariantUtils::get< Collection<int64_t> >(caches.get(CACHE_LONG)->get(reader, newLucene<Entry>(field, parser)));
85 }
86
getDoubles(const IndexReaderPtr & reader,const String & field)87 Collection<double> FieldCacheImpl::getDoubles(const IndexReaderPtr& reader, const String& field) {
88 return getDoubles(reader, field, DoubleParserPtr());
89 }
90
getDoubles(const IndexReaderPtr & reader,const String & field,const DoubleParserPtr & parser)91 Collection<double> FieldCacheImpl::getDoubles(const IndexReaderPtr& reader, const String& field, const DoubleParserPtr& parser) {
92 return VariantUtils::get< Collection<double> >(caches.get(CACHE_DOUBLE)->get(reader, newLucene<Entry>(field, parser)));
93 }
94
getStrings(const IndexReaderPtr & reader,const String & field)95 Collection<String> FieldCacheImpl::getStrings(const IndexReaderPtr& reader, const String& field) {
96 return VariantUtils::get< Collection<String> >(caches.get(CACHE_STRING)->get(reader, newLucene<Entry>(field, ParserPtr())));
97 }
98
getStringIndex(const IndexReaderPtr & reader,const String & field)99 StringIndexPtr FieldCacheImpl::getStringIndex(const IndexReaderPtr& reader, const String& field) {
100 return VariantUtils::get< StringIndexPtr >(caches.get(CACHE_STRING_INDEX)->get(reader, newLucene<Entry>(field, ParserPtr())));
101 }
102
setInfoStream(const InfoStreamPtr & stream)103 void FieldCacheImpl::setInfoStream(const InfoStreamPtr& stream) {
104 infoStream = stream;
105 }
106
getInfoStream()107 InfoStreamPtr FieldCacheImpl::getInfoStream() {
108 return infoStream;
109 }
110
Entry(const String & field,const boost::any & custom)111 Entry::Entry(const String& field, const boost::any& custom) {
112 this->field = field;
113 this->custom = custom;
114 }
115
~Entry()116 Entry::~Entry() {
117 }
118
equals(const LuceneObjectPtr & other)119 bool Entry::equals(const LuceneObjectPtr& other) {
120 if (LuceneObject::equals(other)) {
121 return true;
122 }
123
124 EntryPtr otherEntry(boost::dynamic_pointer_cast<Entry>(other));
125 if (otherEntry) {
126 if (otherEntry->field == field) {
127 return VariantUtils::equalsType(custom, otherEntry->custom);
128 }
129 }
130 return false;
131 }
132
hashCode()133 int32_t Entry::hashCode() {
134 return StringUtils::hashCode(field) ^ VariantUtils::hashCode(custom);
135 }
136
Cache(const FieldCachePtr & wrapper)137 Cache::Cache(const FieldCachePtr& wrapper) {
138 this->_wrapper = wrapper;
139 this->readerCache = WeakMapLuceneObjectMapEntryAny::newInstance();
140 }
141
~Cache()142 Cache::~Cache() {
143 }
144
purge(const IndexReaderPtr & r)145 void Cache::purge(const IndexReaderPtr& r) {
146 LuceneObjectPtr readerKey(r->getFieldCacheKey());
147 SyncLock cacheLock(&readerCache);
148 readerCache.remove(readerKey);
149 }
150
get(const IndexReaderPtr & reader,const EntryPtr & key)151 boost::any Cache::get(const IndexReaderPtr& reader, const EntryPtr& key) {
152 MapEntryAny innerCache;
153 boost::any value;
154 LuceneObjectPtr readerKey(reader->getFieldCacheKey());
155 {
156 SyncLock cacheLock(&readerCache);
157 innerCache = readerCache.get(readerKey);
158 if (!innerCache) {
159 innerCache = MapEntryAny::newInstance();
160 readerCache.put(readerKey, innerCache);
161 } else if (innerCache.contains(key)) {
162 value = innerCache[key];
163 }
164 if (VariantUtils::isNull(value)) {
165 value = newLucene<CreationPlaceholder>();
166 innerCache.put(key, value);
167 }
168 }
169 if (VariantUtils::typeOf<CreationPlaceholderPtr>(value)) {
170 CreationPlaceholderPtr progress(VariantUtils::get<CreationPlaceholderPtr>(value));
171 SyncLock valueLock(progress);
172 if (VariantUtils::isNull(progress->value)) {
173 progress->value = createValue(reader, key);
174 {
175 SyncLock cacheLock(&readerCache);
176 innerCache.put(key, progress->value);
177 }
178
179 FieldCachePtr wrapper(_wrapper);
180
181 // Only check if key.custom (the parser) is non-null; else, we check twice for a single
182 // call to FieldCache.getXXX
183 if (!VariantUtils::isNull(key->custom) && wrapper) {
184 InfoStreamPtr infoStream(wrapper->getInfoStream());
185 if (infoStream) {
186 printNewInsanity(infoStream, progress->value);
187 }
188 }
189 }
190 return progress->value;
191 }
192 return value;
193 }
194
printNewInsanity(const InfoStreamPtr & infoStream,const boost::any & value)195 void Cache::printNewInsanity(const InfoStreamPtr& infoStream, const boost::any& value) {
196 Collection<InsanityPtr> insanities(FieldCacheSanityChecker::checkSanity(FieldCachePtr(_wrapper)));
197 for (Collection<InsanityPtr>::iterator insanity = insanities.begin(); insanity != insanities.end(); ++insanity) {
198 Collection<FieldCacheEntryPtr> entries((*insanity)->getCacheEntries());
199 for (Collection<FieldCacheEntryPtr>::iterator entry = entries.begin(); entry != entries.end(); ++entry) {
200 if (VariantUtils::equalsType((*entry)->getValue(), value)) {
201 // OK this insanity involves our entry
202 *infoStream << L"WARNING: new FieldCache insanity created\nDetails: " + (*insanity)->toString() << L"\n";
203 break;
204 }
205 }
206 }
207 }
208
ByteCache(const FieldCachePtr & wrapper)209 ByteCache::ByteCache(const FieldCachePtr& wrapper) : Cache(wrapper) {
210 }
211
~ByteCache()212 ByteCache::~ByteCache() {
213 }
214
createValue(const IndexReaderPtr & reader,const EntryPtr & key)215 boost::any ByteCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) {
216 EntryPtr entry(key);
217 String field(entry->field);
218 ByteParserPtr parser(VariantUtils::get<ByteParserPtr>(entry->custom));
219 if (!parser) {
220 return FieldCachePtr(_wrapper)->getBytes(reader, field, FieldCache::DEFAULT_BYTE_PARSER());
221 }
222 Collection<uint8_t> retArray(Collection<uint8_t>::newInstance(reader->maxDoc()));
223 TermDocsPtr termDocs(reader->termDocs());
224 TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
225 LuceneException finally;
226 try {
227 do {
228 TermPtr term(termEnum->term());
229 if (!term || term->field() != field) {
230 break;
231 }
232 uint8_t termval = parser->parseByte(term->text());
233 termDocs->seek(termEnum);
234 while (termDocs->next()) {
235 retArray[termDocs->doc()] = termval;
236 }
237 } while (termEnum->next());
238 } catch (StopFillCacheException&) {
239 } catch (LuceneException& e) {
240 finally = e;
241 }
242 termDocs->close();
243 termEnum->close();
244 finally.throwException();
245 return retArray;
246 }
247
IntCache(const FieldCachePtr & wrapper)248 IntCache::IntCache(const FieldCachePtr& wrapper) : Cache(wrapper) {
249 }
250
~IntCache()251 IntCache::~IntCache() {
252 }
253
createValue(const IndexReaderPtr & reader,const EntryPtr & key)254 boost::any IntCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) {
255 EntryPtr entry(key);
256 String field(entry->field);
257 IntParserPtr parser(VariantUtils::get<IntParserPtr>(entry->custom));
258 if (!parser) {
259 FieldCachePtr wrapper(_wrapper);
260 boost::any ints;
261 try {
262 ints = wrapper->getInts(reader, field, FieldCache::DEFAULT_INT_PARSER());
263 } catch (NumberFormatException&) {
264 ints = wrapper->getInts(reader, field, FieldCache::NUMERIC_UTILS_INT_PARSER());
265 }
266 return ints;
267 }
268 Collection<int32_t> retArray;
269 TermDocsPtr termDocs(reader->termDocs());
270 TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
271 LuceneException finally;
272 try {
273 do {
274 TermPtr term(termEnum->term());
275 if (!term || term->field() != field) {
276 break;
277 }
278 int32_t termval = parser->parseInt(term->text());
279 if (!retArray) { // late init
280 retArray = Collection<int32_t>::newInstance(reader->maxDoc());
281 }
282 termDocs->seek(termEnum);
283 while (termDocs->next()) {
284 retArray[termDocs->doc()] = termval;
285 }
286 } while (termEnum->next());
287 } catch (StopFillCacheException&) {
288 } catch (LuceneException& e) {
289 finally = e;
290 }
291 termDocs->close();
292 termEnum->close();
293 finally.throwException();
294 if (!retArray) { // no values
295 retArray = Collection<int32_t>::newInstance(reader->maxDoc());
296 }
297 return retArray;
298 }
299
LongCache(const FieldCachePtr & wrapper)300 LongCache::LongCache(const FieldCachePtr& wrapper) : Cache(wrapper) {
301 }
302
~LongCache()303 LongCache::~LongCache() {
304 }
305
createValue(const IndexReaderPtr & reader,const EntryPtr & key)306 boost::any LongCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) {
307 EntryPtr entry(key);
308 String field(entry->field);
309 LongParserPtr parser(VariantUtils::get<LongParserPtr>(entry->custom));
310 if (!parser) {
311 FieldCachePtr wrapper(_wrapper);
312 boost::any longs;
313 try {
314 longs = wrapper->getLongs(reader, field, FieldCache::DEFAULT_LONG_PARSER());
315 } catch (NumberFormatException&) {
316 longs = wrapper->getLongs(reader, field, FieldCache::NUMERIC_UTILS_LONG_PARSER());
317 }
318 return longs;
319 }
320 Collection<int64_t> retArray;
321 TermDocsPtr termDocs(reader->termDocs());
322 TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
323 LuceneException finally;
324 try {
325 do {
326 TermPtr term(termEnum->term());
327 if (!term || term->field() != field) {
328 break;
329 }
330 int64_t termval = parser->parseLong(term->text());
331 if (!retArray) { // late init
332 retArray = Collection<int64_t>::newInstance(reader->maxDoc());
333 }
334 termDocs->seek(termEnum);
335 while (termDocs->next()) {
336 retArray[termDocs->doc()] = termval;
337 }
338 } while (termEnum->next());
339 } catch (StopFillCacheException&) {
340 } catch (LuceneException& e) {
341 finally = e;
342 }
343 termDocs->close();
344 termEnum->close();
345 finally.throwException();
346 if (!retArray) { // no values
347 retArray = Collection<int64_t>::newInstance(reader->maxDoc());
348 }
349 return retArray;
350 }
351
DoubleCache(const FieldCachePtr & wrapper)352 DoubleCache::DoubleCache(const FieldCachePtr& wrapper) : Cache(wrapper) {
353 }
354
~DoubleCache()355 DoubleCache::~DoubleCache() {
356 }
357
createValue(const IndexReaderPtr & reader,const EntryPtr & key)358 boost::any DoubleCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) {
359 EntryPtr entry(key);
360 String field(entry->field);
361 DoubleParserPtr parser(VariantUtils::get<DoubleParserPtr>(entry->custom));
362 if (!parser) {
363 FieldCachePtr wrapper(_wrapper);
364 boost::any doubles;
365 try {
366 doubles = wrapper->getDoubles(reader, field, FieldCache::DEFAULT_DOUBLE_PARSER());
367 } catch (NumberFormatException&) {
368 doubles = wrapper->getDoubles(reader, field, FieldCache::NUMERIC_UTILS_DOUBLE_PARSER());
369 }
370 return doubles;
371 }
372 Collection<double> retArray;
373 TermDocsPtr termDocs(reader->termDocs());
374 TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
375 LuceneException finally;
376 try {
377 do {
378 TermPtr term(termEnum->term());
379 if (!term || term->field() != field) {
380 break;
381 }
382 double termval = parser->parseDouble(term->text());
383 if (!retArray) { // late init
384 retArray = Collection<double>::newInstance(reader->maxDoc());
385 }
386 termDocs->seek(termEnum);
387 while (termDocs->next()) {
388 retArray[termDocs->doc()] = termval;
389 }
390 } while (termEnum->next());
391 } catch (StopFillCacheException&) {
392 } catch (LuceneException& e) {
393 finally = e;
394 }
395 termDocs->close();
396 termEnum->close();
397 finally.throwException();
398 if (!retArray) { // no values
399 retArray = Collection<double>::newInstance(reader->maxDoc());
400 }
401 return retArray;
402 }
403
StringCache(const FieldCachePtr & wrapper)404 StringCache::StringCache(const FieldCachePtr& wrapper) : Cache(wrapper) {
405 }
406
~StringCache()407 StringCache::~StringCache() {
408 }
409
createValue(const IndexReaderPtr & reader,const EntryPtr & key)410 boost::any StringCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) {
411 EntryPtr entry(key);
412 String field(entry->field);
413 Collection<String> retArray(Collection<String>::newInstance(reader->maxDoc()));
414 TermDocsPtr termDocs(reader->termDocs());
415 TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
416 LuceneException finally;
417 try {
418 do {
419 TermPtr term(termEnum->term());
420 if (!term || term->field() != field) {
421 break;
422 }
423 String termval(term->text());
424 termDocs->seek(termEnum);
425 while (termDocs->next()) {
426 retArray[termDocs->doc()] = termval;
427 }
428 } while (termEnum->next());
429 } catch (LuceneException& e) {
430 finally = e;
431 }
432 termDocs->close();
433 termEnum->close();
434 finally.throwException();
435 return retArray;
436 }
437
StringIndexCache(const FieldCachePtr & wrapper)438 StringIndexCache::StringIndexCache(const FieldCachePtr& wrapper) : Cache(wrapper) {
439 }
440
~StringIndexCache()441 StringIndexCache::~StringIndexCache() {
442 }
443
createValue(const IndexReaderPtr & reader,const EntryPtr & key)444 boost::any StringIndexCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) {
445 EntryPtr entry(key);
446 String field(entry->field);
447 Collection<int32_t> retArray(Collection<int32_t>::newInstance(reader->maxDoc()));
448 Collection<String> mterms(Collection<String>::newInstance(reader->maxDoc() + 1));
449 TermDocsPtr termDocs(reader->termDocs());
450 TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
451 int32_t t = 0; // current term number
452
453 // an entry for documents that have no terms in this field should a document with no terms be at
454 // top or bottom? This puts them at the top - if it is changed, FieldDocSortedHitQueue needs to
455 // change as well.
456 mterms[t++] = L"";
457
458 LuceneException finally;
459 try {
460 do {
461 TermPtr term(termEnum->term());
462 if (!term || term->field() != field || t >= mterms.size() ) {
463 break;
464 }
465
466 // store term text
467 mterms[t] = term->text();
468
469 termDocs->seek(termEnum);
470 while (termDocs->next()) {
471 retArray[termDocs->doc()] = t;
472 }
473
474 ++t;
475 } while (termEnum->next());
476 } catch (LuceneException& e) {
477 finally = e;
478 }
479 termDocs->close();
480 termEnum->close();
481 finally.throwException();
482
483 if (t == 0) {
484 // if there are no terms, make the term array have a single null entry
485 mterms = Collection<String>::newInstance(1);
486 } else if (t < mterms.size()) {
487 // if there are less terms than documents, trim off the dead array space
488 mterms.resize(t);
489 }
490
491 return newLucene<StringIndex>(retArray, mterms);
492 }
493
FieldCacheEntryImpl(const LuceneObjectPtr & readerKey,const String & fieldName,int32_t cacheType,const boost::any & custom,const boost::any & value)494 FieldCacheEntryImpl::FieldCacheEntryImpl(const LuceneObjectPtr& readerKey, const String& fieldName, int32_t cacheType, const boost::any& custom, const boost::any& value) {
495 this->readerKey = readerKey;
496 this->fieldName = fieldName;
497 this->cacheType = cacheType;
498 this->custom = custom;
499 this->value = value;
500 }
501
~FieldCacheEntryImpl()502 FieldCacheEntryImpl::~FieldCacheEntryImpl() {
503 }
504
getReaderKey()505 LuceneObjectPtr FieldCacheEntryImpl::getReaderKey() {
506 return readerKey;
507 }
508
getFieldName()509 String FieldCacheEntryImpl::getFieldName() {
510 return fieldName;
511 }
512
getCacheType()513 int32_t FieldCacheEntryImpl::getCacheType() {
514 return cacheType;
515 }
516
getCustom()517 boost::any FieldCacheEntryImpl::getCustom() {
518 return custom;
519 }
520
getValue()521 boost::any FieldCacheEntryImpl::getValue() {
522 return value;
523 }
524
525 }
526