1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6 
7 #include "LuceneInc.h"
8 #include "FieldCacheImpl.h"
9 #include "FieldCacheSanityChecker.h"
10 #include "IndexReader.h"
11 #include "InfoStream.h"
12 #include "TermEnum.h"
13 #include "TermDocs.h"
14 #include "Term.h"
15 #include "StringUtils.h"
16 #include "VariantUtils.h"
17 
18 namespace Lucene {
19 
FieldCacheImpl()20 FieldCacheImpl::FieldCacheImpl() {
21 }
22 
~FieldCacheImpl()23 FieldCacheImpl::~FieldCacheImpl() {
24 }
25 
initialize()26 void FieldCacheImpl::initialize() {
27     caches = MapStringCache::newInstance();
28     caches.put(CACHE_BYTE, newLucene<ByteCache>(shared_from_this()));
29     caches.put(CACHE_INT, newLucene<IntCache>(shared_from_this()));
30     caches.put(CACHE_LONG, newLucene<LongCache>(shared_from_this()));
31     caches.put(CACHE_DOUBLE, newLucene<DoubleCache>(shared_from_this()));
32     caches.put(CACHE_STRING, newLucene<StringCache>(shared_from_this()));
33     caches.put(CACHE_STRING_INDEX, newLucene<StringIndexCache>(shared_from_this()));
34 }
35 
purgeAllCaches()36 void FieldCacheImpl::purgeAllCaches() {
37     initialize();
38 }
39 
purge(const IndexReaderPtr & r)40 void FieldCacheImpl::purge(const IndexReaderPtr& r) {
41     for (MapStringCache::iterator cache = caches.begin(); cache != caches.end(); ++cache) {
42         cache->second->purge(r);
43     }
44 }
45 
getCacheEntries()46 Collection<FieldCacheEntryPtr> FieldCacheImpl::getCacheEntries() {
47     Collection<FieldCacheEntryPtr> result(Collection<FieldCacheEntryPtr>::newInstance());
48     for (MapStringCache::iterator cache = caches.begin(); cache != caches.end(); ++cache) {
49         for (WeakMapLuceneObjectMapEntryAny::iterator key = cache->second->readerCache.begin(); key != cache->second->readerCache.end(); ++key) {
50             LuceneObjectPtr readerKey(key->first.lock());
51 
52             // we've now materialized a hard ref
53             if (readerKey) {
54                 for (MapEntryAny::iterator mapEntry = key->second.begin(); mapEntry != key->second.end(); ++mapEntry) {
55                     result.add(newLucene<FieldCacheEntryImpl>(readerKey, mapEntry->first->field, cache->first, mapEntry->first->custom, mapEntry->second));
56                 }
57             }
58         }
59     }
60     return result;
61 }
62 
getBytes(const IndexReaderPtr & reader,const String & field)63 Collection<uint8_t> FieldCacheImpl::getBytes(const IndexReaderPtr& reader, const String& field) {
64     return getBytes(reader, field, ByteParserPtr());
65 }
66 
getBytes(const IndexReaderPtr & reader,const String & field,const ByteParserPtr & parser)67 Collection<uint8_t> FieldCacheImpl::getBytes(const IndexReaderPtr& reader, const String& field, const ByteParserPtr& parser) {
68     return VariantUtils::get< Collection<uint8_t> >(caches.get(CACHE_BYTE)->get(reader, newLucene<Entry>(field, parser)));
69 }
70 
getInts(const IndexReaderPtr & reader,const String & field)71 Collection<int32_t> FieldCacheImpl::getInts(const IndexReaderPtr& reader, const String& field) {
72     return getInts(reader, field, IntParserPtr());
73 }
74 
getInts(const IndexReaderPtr & reader,const String & field,const IntParserPtr & parser)75 Collection<int32_t> FieldCacheImpl::getInts(const IndexReaderPtr& reader, const String& field, const IntParserPtr& parser) {
76     return VariantUtils::get< Collection<int32_t> >(caches.get(CACHE_INT)->get(reader, newLucene<Entry>(field, parser)));
77 }
78 
getLongs(const IndexReaderPtr & reader,const String & field)79 Collection<int64_t> FieldCacheImpl::getLongs(const IndexReaderPtr& reader, const String& field) {
80     return getLongs(reader, field, LongParserPtr());
81 }
82 
getLongs(const IndexReaderPtr & reader,const String & field,const LongParserPtr & parser)83 Collection<int64_t> FieldCacheImpl::getLongs(const IndexReaderPtr& reader, const String& field, const LongParserPtr& parser) {
84     return VariantUtils::get< Collection<int64_t> >(caches.get(CACHE_LONG)->get(reader, newLucene<Entry>(field, parser)));
85 }
86 
getDoubles(const IndexReaderPtr & reader,const String & field)87 Collection<double> FieldCacheImpl::getDoubles(const IndexReaderPtr& reader, const String& field) {
88     return getDoubles(reader, field, DoubleParserPtr());
89 }
90 
getDoubles(const IndexReaderPtr & reader,const String & field,const DoubleParserPtr & parser)91 Collection<double> FieldCacheImpl::getDoubles(const IndexReaderPtr& reader, const String& field, const DoubleParserPtr& parser) {
92     return VariantUtils::get< Collection<double> >(caches.get(CACHE_DOUBLE)->get(reader, newLucene<Entry>(field, parser)));
93 }
94 
getStrings(const IndexReaderPtr & reader,const String & field)95 Collection<String> FieldCacheImpl::getStrings(const IndexReaderPtr& reader, const String& field) {
96     return VariantUtils::get< Collection<String> >(caches.get(CACHE_STRING)->get(reader, newLucene<Entry>(field, ParserPtr())));
97 }
98 
getStringIndex(const IndexReaderPtr & reader,const String & field)99 StringIndexPtr FieldCacheImpl::getStringIndex(const IndexReaderPtr& reader, const String& field) {
100     return VariantUtils::get< StringIndexPtr >(caches.get(CACHE_STRING_INDEX)->get(reader, newLucene<Entry>(field, ParserPtr())));
101 }
102 
setInfoStream(const InfoStreamPtr & stream)103 void FieldCacheImpl::setInfoStream(const InfoStreamPtr& stream) {
104     infoStream = stream;
105 }
106 
getInfoStream()107 InfoStreamPtr FieldCacheImpl::getInfoStream() {
108     return infoStream;
109 }
110 
Entry(const String & field,const boost::any & custom)111 Entry::Entry(const String& field, const boost::any& custom) {
112     this->field = field;
113     this->custom = custom;
114 }
115 
~Entry()116 Entry::~Entry() {
117 }
118 
equals(const LuceneObjectPtr & other)119 bool Entry::equals(const LuceneObjectPtr& other) {
120     if (LuceneObject::equals(other)) {
121         return true;
122     }
123 
124     EntryPtr otherEntry(boost::dynamic_pointer_cast<Entry>(other));
125     if (otherEntry) {
126         if (otherEntry->field == field) {
127             return VariantUtils::equalsType(custom, otherEntry->custom);
128         }
129     }
130     return false;
131 }
132 
hashCode()133 int32_t Entry::hashCode() {
134     return StringUtils::hashCode(field) ^ VariantUtils::hashCode(custom);
135 }
136 
Cache(const FieldCachePtr & wrapper)137 Cache::Cache(const FieldCachePtr& wrapper) {
138     this->_wrapper = wrapper;
139     this->readerCache = WeakMapLuceneObjectMapEntryAny::newInstance();
140 }
141 
~Cache()142 Cache::~Cache() {
143 }
144 
purge(const IndexReaderPtr & r)145 void Cache::purge(const IndexReaderPtr& r) {
146     LuceneObjectPtr readerKey(r->getFieldCacheKey());
147     SyncLock cacheLock(&readerCache);
148     readerCache.remove(readerKey);
149 }
150 
get(const IndexReaderPtr & reader,const EntryPtr & key)151 boost::any Cache::get(const IndexReaderPtr& reader, const EntryPtr& key) {
152     MapEntryAny innerCache;
153     boost::any value;
154     LuceneObjectPtr readerKey(reader->getFieldCacheKey());
155     {
156         SyncLock cacheLock(&readerCache);
157         innerCache = readerCache.get(readerKey);
158         if (!innerCache) {
159             innerCache = MapEntryAny::newInstance();
160             readerCache.put(readerKey, innerCache);
161         } else if (innerCache.contains(key)) {
162             value = innerCache[key];
163         }
164         if (VariantUtils::isNull(value)) {
165             value = newLucene<CreationPlaceholder>();
166             innerCache.put(key, value);
167         }
168     }
169     if (VariantUtils::typeOf<CreationPlaceholderPtr>(value)) {
170         CreationPlaceholderPtr progress(VariantUtils::get<CreationPlaceholderPtr>(value));
171         SyncLock valueLock(progress);
172         if (VariantUtils::isNull(progress->value)) {
173             progress->value = createValue(reader, key);
174             {
175                 SyncLock cacheLock(&readerCache);
176                 innerCache.put(key, progress->value);
177             }
178 
179             FieldCachePtr wrapper(_wrapper);
180 
181             // Only check if key.custom (the parser) is non-null; else, we check twice for a single
182             // call to FieldCache.getXXX
183             if (!VariantUtils::isNull(key->custom) && wrapper) {
184                 InfoStreamPtr infoStream(wrapper->getInfoStream());
185                 if (infoStream) {
186                     printNewInsanity(infoStream, progress->value);
187                 }
188             }
189         }
190         return progress->value;
191     }
192     return value;
193 }
194 
printNewInsanity(const InfoStreamPtr & infoStream,const boost::any & value)195 void Cache::printNewInsanity(const InfoStreamPtr& infoStream, const boost::any& value) {
196     Collection<InsanityPtr> insanities(FieldCacheSanityChecker::checkSanity(FieldCachePtr(_wrapper)));
197     for (Collection<InsanityPtr>::iterator insanity = insanities.begin(); insanity != insanities.end(); ++insanity) {
198         Collection<FieldCacheEntryPtr> entries((*insanity)->getCacheEntries());
199         for (Collection<FieldCacheEntryPtr>::iterator entry = entries.begin(); entry != entries.end(); ++entry) {
200             if (VariantUtils::equalsType((*entry)->getValue(), value)) {
201                 // OK this insanity involves our entry
202                 *infoStream << L"WARNING: new FieldCache insanity created\nDetails: " + (*insanity)->toString() << L"\n";
203                 break;
204             }
205         }
206     }
207 }
208 
ByteCache(const FieldCachePtr & wrapper)209 ByteCache::ByteCache(const FieldCachePtr& wrapper) : Cache(wrapper) {
210 }
211 
~ByteCache()212 ByteCache::~ByteCache() {
213 }
214 
createValue(const IndexReaderPtr & reader,const EntryPtr & key)215 boost::any ByteCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) {
216     EntryPtr entry(key);
217     String field(entry->field);
218     ByteParserPtr parser(VariantUtils::get<ByteParserPtr>(entry->custom));
219     if (!parser) {
220         return FieldCachePtr(_wrapper)->getBytes(reader, field, FieldCache::DEFAULT_BYTE_PARSER());
221     }
222     Collection<uint8_t> retArray(Collection<uint8_t>::newInstance(reader->maxDoc()));
223     TermDocsPtr termDocs(reader->termDocs());
224     TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
225     LuceneException finally;
226     try {
227         do {
228             TermPtr term(termEnum->term());
229             if (!term || term->field() != field) {
230                 break;
231             }
232             uint8_t termval = parser->parseByte(term->text());
233             termDocs->seek(termEnum);
234             while (termDocs->next()) {
235                 retArray[termDocs->doc()] = termval;
236             }
237         } while (termEnum->next());
238     } catch (StopFillCacheException&) {
239     } catch (LuceneException& e) {
240         finally = e;
241     }
242     termDocs->close();
243     termEnum->close();
244     finally.throwException();
245     return retArray;
246 }
247 
IntCache(const FieldCachePtr & wrapper)248 IntCache::IntCache(const FieldCachePtr& wrapper) : Cache(wrapper) {
249 }
250 
~IntCache()251 IntCache::~IntCache() {
252 }
253 
createValue(const IndexReaderPtr & reader,const EntryPtr & key)254 boost::any IntCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) {
255     EntryPtr entry(key);
256     String field(entry->field);
257     IntParserPtr parser(VariantUtils::get<IntParserPtr>(entry->custom));
258     if (!parser) {
259         FieldCachePtr wrapper(_wrapper);
260         boost::any ints;
261         try {
262             ints = wrapper->getInts(reader, field, FieldCache::DEFAULT_INT_PARSER());
263         } catch (NumberFormatException&) {
264             ints = wrapper->getInts(reader, field, FieldCache::NUMERIC_UTILS_INT_PARSER());
265         }
266         return ints;
267     }
268     Collection<int32_t> retArray;
269     TermDocsPtr termDocs(reader->termDocs());
270     TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
271     LuceneException finally;
272     try {
273         do {
274             TermPtr term(termEnum->term());
275             if (!term || term->field() != field) {
276                 break;
277             }
278             int32_t termval = parser->parseInt(term->text());
279             if (!retArray) { // late init
280                 retArray = Collection<int32_t>::newInstance(reader->maxDoc());
281             }
282             termDocs->seek(termEnum);
283             while (termDocs->next()) {
284                 retArray[termDocs->doc()] = termval;
285             }
286         } while (termEnum->next());
287     } catch (StopFillCacheException&) {
288     } catch (LuceneException& e) {
289         finally = e;
290     }
291     termDocs->close();
292     termEnum->close();
293     finally.throwException();
294     if (!retArray) { // no values
295         retArray = Collection<int32_t>::newInstance(reader->maxDoc());
296     }
297     return retArray;
298 }
299 
LongCache(const FieldCachePtr & wrapper)300 LongCache::LongCache(const FieldCachePtr& wrapper) : Cache(wrapper) {
301 }
302 
~LongCache()303 LongCache::~LongCache() {
304 }
305 
createValue(const IndexReaderPtr & reader,const EntryPtr & key)306 boost::any LongCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) {
307     EntryPtr entry(key);
308     String field(entry->field);
309     LongParserPtr parser(VariantUtils::get<LongParserPtr>(entry->custom));
310     if (!parser) {
311         FieldCachePtr wrapper(_wrapper);
312         boost::any longs;
313         try {
314             longs = wrapper->getLongs(reader, field, FieldCache::DEFAULT_LONG_PARSER());
315         } catch (NumberFormatException&) {
316             longs = wrapper->getLongs(reader, field, FieldCache::NUMERIC_UTILS_LONG_PARSER());
317         }
318         return longs;
319     }
320     Collection<int64_t> retArray;
321     TermDocsPtr termDocs(reader->termDocs());
322     TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
323     LuceneException finally;
324     try {
325         do {
326             TermPtr term(termEnum->term());
327             if (!term || term->field() != field) {
328                 break;
329             }
330             int64_t termval = parser->parseLong(term->text());
331             if (!retArray) { // late init
332                 retArray = Collection<int64_t>::newInstance(reader->maxDoc());
333             }
334             termDocs->seek(termEnum);
335             while (termDocs->next()) {
336                 retArray[termDocs->doc()] = termval;
337             }
338         } while (termEnum->next());
339     } catch (StopFillCacheException&) {
340     } catch (LuceneException& e) {
341         finally = e;
342     }
343     termDocs->close();
344     termEnum->close();
345     finally.throwException();
346     if (!retArray) { // no values
347         retArray = Collection<int64_t>::newInstance(reader->maxDoc());
348     }
349     return retArray;
350 }
351 
DoubleCache(const FieldCachePtr & wrapper)352 DoubleCache::DoubleCache(const FieldCachePtr& wrapper) : Cache(wrapper) {
353 }
354 
~DoubleCache()355 DoubleCache::~DoubleCache() {
356 }
357 
createValue(const IndexReaderPtr & reader,const EntryPtr & key)358 boost::any DoubleCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) {
359     EntryPtr entry(key);
360     String field(entry->field);
361     DoubleParserPtr parser(VariantUtils::get<DoubleParserPtr>(entry->custom));
362     if (!parser) {
363         FieldCachePtr wrapper(_wrapper);
364         boost::any doubles;
365         try {
366             doubles = wrapper->getDoubles(reader, field, FieldCache::DEFAULT_DOUBLE_PARSER());
367         } catch (NumberFormatException&) {
368             doubles = wrapper->getDoubles(reader, field, FieldCache::NUMERIC_UTILS_DOUBLE_PARSER());
369         }
370         return doubles;
371     }
372     Collection<double> retArray;
373     TermDocsPtr termDocs(reader->termDocs());
374     TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
375     LuceneException finally;
376     try {
377         do {
378             TermPtr term(termEnum->term());
379             if (!term || term->field() != field) {
380                 break;
381             }
382             double termval = parser->parseDouble(term->text());
383             if (!retArray) { // late init
384                 retArray = Collection<double>::newInstance(reader->maxDoc());
385             }
386             termDocs->seek(termEnum);
387             while (termDocs->next()) {
388                 retArray[termDocs->doc()] = termval;
389             }
390         } while (termEnum->next());
391     } catch (StopFillCacheException&) {
392     } catch (LuceneException& e) {
393         finally = e;
394     }
395     termDocs->close();
396     termEnum->close();
397     finally.throwException();
398     if (!retArray) { // no values
399         retArray = Collection<double>::newInstance(reader->maxDoc());
400     }
401     return retArray;
402 }
403 
StringCache(const FieldCachePtr & wrapper)404 StringCache::StringCache(const FieldCachePtr& wrapper) : Cache(wrapper) {
405 }
406 
~StringCache()407 StringCache::~StringCache() {
408 }
409 
createValue(const IndexReaderPtr & reader,const EntryPtr & key)410 boost::any StringCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) {
411     EntryPtr entry(key);
412     String field(entry->field);
413     Collection<String> retArray(Collection<String>::newInstance(reader->maxDoc()));
414     TermDocsPtr termDocs(reader->termDocs());
415     TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
416     LuceneException finally;
417     try {
418         do {
419             TermPtr term(termEnum->term());
420             if (!term || term->field() != field) {
421                 break;
422             }
423             String termval(term->text());
424             termDocs->seek(termEnum);
425             while (termDocs->next()) {
426                 retArray[termDocs->doc()] = termval;
427             }
428         } while (termEnum->next());
429     } catch (LuceneException& e) {
430         finally = e;
431     }
432     termDocs->close();
433     termEnum->close();
434     finally.throwException();
435     return retArray;
436 }
437 
StringIndexCache(const FieldCachePtr & wrapper)438 StringIndexCache::StringIndexCache(const FieldCachePtr& wrapper) : Cache(wrapper) {
439 }
440 
~StringIndexCache()441 StringIndexCache::~StringIndexCache() {
442 }
443 
createValue(const IndexReaderPtr & reader,const EntryPtr & key)444 boost::any StringIndexCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) {
445     EntryPtr entry(key);
446     String field(entry->field);
447     Collection<int32_t> retArray(Collection<int32_t>::newInstance(reader->maxDoc()));
448     Collection<String> mterms(Collection<String>::newInstance(reader->maxDoc() + 1));
449     TermDocsPtr termDocs(reader->termDocs());
450     TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
451     int32_t t = 0; // current term number
452 
453     // an entry for documents that have no terms in this field should a document with no terms be at
454     // top or bottom?  This puts them at the top - if it is changed, FieldDocSortedHitQueue needs to
455     // change as well.
456     mterms[t++] = L"";
457 
458     LuceneException finally;
459     try {
460         do {
461             TermPtr term(termEnum->term());
462             if (!term || term->field() != field || t >= mterms.size() ) {
463                 break;
464             }
465 
466             // store term text
467             mterms[t] = term->text();
468 
469             termDocs->seek(termEnum);
470             while (termDocs->next()) {
471                 retArray[termDocs->doc()] = t;
472             }
473 
474             ++t;
475         } while (termEnum->next());
476     } catch (LuceneException& e) {
477         finally = e;
478     }
479     termDocs->close();
480     termEnum->close();
481     finally.throwException();
482 
483     if (t == 0) {
484         // if there are no terms, make the term array have a single null entry
485         mterms = Collection<String>::newInstance(1);
486     } else if (t < mterms.size()) {
487         // if there are less terms than documents, trim off the dead array space
488         mterms.resize(t);
489     }
490 
491     return newLucene<StringIndex>(retArray, mterms);
492 }
493 
FieldCacheEntryImpl(const LuceneObjectPtr & readerKey,const String & fieldName,int32_t cacheType,const boost::any & custom,const boost::any & value)494 FieldCacheEntryImpl::FieldCacheEntryImpl(const LuceneObjectPtr& readerKey, const String& fieldName, int32_t cacheType, const boost::any& custom, const boost::any& value) {
495     this->readerKey = readerKey;
496     this->fieldName = fieldName;
497     this->cacheType = cacheType;
498     this->custom = custom;
499     this->value = value;
500 }
501 
~FieldCacheEntryImpl()502 FieldCacheEntryImpl::~FieldCacheEntryImpl() {
503 }
504 
getReaderKey()505 LuceneObjectPtr FieldCacheEntryImpl::getReaderKey() {
506     return readerKey;
507 }
508 
getFieldName()509 String FieldCacheEntryImpl::getFieldName() {
510     return fieldName;
511 }
512 
getCacheType()513 int32_t FieldCacheEntryImpl::getCacheType() {
514     return cacheType;
515 }
516 
getCustom()517 boost::any FieldCacheEntryImpl::getCustom() {
518     return custom;
519 }
520 
getValue()521 boost::any FieldCacheEntryImpl::getValue() {
522     return value;
523 }
524 
525 }
526