1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6
7 #include "LuceneInc.h"
8 #include "SortedTermVectorMapper.h"
9 #include "TermVectorEntry.h"
10
11 namespace Lucene {
12
13 const wchar_t* SortedTermVectorMapper::ALL = L"_ALL_";
14
SortedTermVectorMapper(TermVectorEntryComparator comparator)15 SortedTermVectorMapper::SortedTermVectorMapper(TermVectorEntryComparator comparator) : TermVectorMapper(false, false) {
16 this->storeOffsets = false;
17 this->storePositions = false;
18 this->comparator = comparator;
19 this->currentSet = Collection<TermVectorEntryPtr>::newInstance();
20 this->termToTVE = MapStringTermVectorEntry::newInstance();
21 }
22
SortedTermVectorMapper(bool ignoringPositions,bool ignoringOffsets,TermVectorEntryComparator comparator)23 SortedTermVectorMapper::SortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, TermVectorEntryComparator comparator)
24 : TermVectorMapper(ignoringPositions, ignoringPositions) {
25 this->storeOffsets = false;
26 this->storePositions = false;
27 this->comparator = comparator;
28 this->currentSet = Collection<TermVectorEntryPtr>::newInstance();
29 this->termToTVE = MapStringTermVectorEntry::newInstance();
30 }
31
~SortedTermVectorMapper()32 SortedTermVectorMapper::~SortedTermVectorMapper() {
33 }
34
map(const String & term,int32_t frequency,Collection<TermVectorOffsetInfoPtr> offsets,Collection<int32_t> positions)35 void SortedTermVectorMapper::map(const String& term, int32_t frequency, Collection<TermVectorOffsetInfoPtr> offsets, Collection<int32_t> positions) {
36 // We need to combine any previous mentions of the term
37 TermVectorEntryPtr entry(termToTVE.get(term));
38 if (!entry) {
39 entry = newLucene<TermVectorEntry>(ALL, term, frequency, storeOffsets ? offsets : Collection<TermVectorOffsetInfoPtr>(), storePositions ? positions : Collection<int32_t>());
40 termToTVE.put(term, entry);
41
42 if (!currentSet.contains_if(luceneEqualTo<TermVectorEntryPtr>(entry))) {
43 currentSet.insert(std::upper_bound(currentSet.begin(), currentSet.end(), entry, comparator), entry);
44 }
45 } else {
46 entry->setFrequency(entry->getFrequency() + frequency);
47 if (storeOffsets) {
48 Collection<TermVectorOffsetInfoPtr> existingOffsets(entry->getOffsets());
49 // A few diff. cases here: offsets is null, existing offsets is null, both are null, same for positions
50 if (existingOffsets && offsets && !offsets.empty()) {
51 // copy over the existing offsets
52 Collection<TermVectorOffsetInfoPtr> newOffsets(Collection<TermVectorOffsetInfoPtr>::newInstance(existingOffsets.begin(), existingOffsets.end()));
53 newOffsets.addAll(offsets.begin(), offsets.end());
54 entry->setOffsets(newOffsets);
55 } else if (!existingOffsets && offsets && !offsets.empty()) {
56 entry->setOffsets(offsets);
57 }
58 // else leave it alone
59 }
60 if (storePositions) {
61 Collection<int32_t> existingPositions(entry->getPositions());
62 if (existingPositions && positions && !positions.empty()) {
63 Collection<int32_t> newPositions(existingPositions);
64 newPositions.addAll(positions.begin(), positions.end());
65 entry->setPositions(newPositions);
66 } else if (!existingPositions && positions && !positions.empty()) {
67 entry->setPositions(positions);
68 }
69 // else leave it alone
70 }
71 }
72 }
73
setExpectations(const String & field,int32_t numTerms,bool storeOffsets,bool storePositions)74 void SortedTermVectorMapper::setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) {
75 this->storeOffsets = storeOffsets;
76 this->storePositions = storePositions;
77 }
78
getTermVectorEntrySet()79 Collection<TermVectorEntryPtr> SortedTermVectorMapper::getTermVectorEntrySet() {
80 return currentSet;
81 }
82
83 }
84