1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6
7 #include "LuceneInc.h"
8 #include "SegmentTermDocs.h"
9 #include "SegmentReader.h"
10 #include "_SegmentReader.h"
11 #include "SegmentTermEnum.h"
12 #include "IndexInput.h"
13 #include "TermInfosReader.h"
14 #include "FieldInfos.h"
15 #include "FieldInfo.h"
16 #include "Term.h"
17 #include "TermInfo.h"
18 #include "DefaultSkipListReader.h"
19 #include "BitVector.h"
20 #include "MiscUtils.h"
21
22 namespace Lucene {
23
SegmentTermDocs(const SegmentReaderPtr & parent)24 SegmentTermDocs::SegmentTermDocs(const SegmentReaderPtr& parent) {
25 this->_parent = parent;
26 this->count = 0;
27 this->df = 0;
28 this->_doc = 0;
29 this->_freq = 0;
30 this->freqBasePointer = 0;
31 this->proxBasePointer = 0;
32 this->skipPointer = 0;
33 this->haveSkipped = false;
34 this->currentFieldStoresPayloads = false;
35 this->currentFieldOmitTermFreqAndPositions = false;
36
37 this->_freqStream = boost::dynamic_pointer_cast<IndexInput>(parent->core->freqStream->clone());
38 {
39 SyncLock parentLock(parent);
40 this->deletedDocs = parent->deletedDocs;
41 }
42 this->skipInterval = parent->core->getTermsReader()->getSkipInterval();
43 this->maxSkipLevels = parent->core->getTermsReader()->getMaxSkipLevels();
44 }
45
~SegmentTermDocs()46 SegmentTermDocs::~SegmentTermDocs() {
47 }
48
seek(const TermPtr & term)49 void SegmentTermDocs::seek(const TermPtr& term) {
50 TermInfoPtr ti(SegmentReaderPtr(_parent)->core->getTermsReader()->get(term));
51 seek(ti, term);
52 }
53
seek(const TermEnumPtr & termEnum)54 void SegmentTermDocs::seek(const TermEnumPtr& termEnum) {
55 TermInfoPtr ti;
56 TermPtr term;
57
58 SegmentTermEnumPtr segmentTermEnum(boost::dynamic_pointer_cast<SegmentTermEnum>(termEnum));
59 SegmentReaderPtr parent(_parent);
60
61 // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs
62 if (segmentTermEnum && segmentTermEnum->fieldInfos == parent->core->fieldInfos) { // optimized case
63 term = segmentTermEnum->term();
64 ti = segmentTermEnum->termInfo();
65 } else { // punt case
66 term = termEnum->term();
67 ti = parent->core->getTermsReader()->get(term);
68 }
69
70 seek(ti, term);
71 }
72
seek(const TermInfoPtr & ti,const TermPtr & term)73 void SegmentTermDocs::seek(const TermInfoPtr& ti, const TermPtr& term) {
74 count = 0;
75 FieldInfoPtr fi(SegmentReaderPtr(_parent)->core->fieldInfos->fieldInfo(term->_field));
76 currentFieldOmitTermFreqAndPositions = fi ? fi->omitTermFreqAndPositions : false;
77 currentFieldStoresPayloads = fi ? fi->storePayloads : false;
78 if (!ti) {
79 df = 0;
80 } else {
81 df = ti->docFreq;
82 _doc = 0;
83 freqBasePointer = ti->freqPointer;
84 proxBasePointer = ti->proxPointer;
85 skipPointer = freqBasePointer + ti->skipOffset;
86 _freqStream->seek(freqBasePointer);
87 haveSkipped = false;
88 }
89 }
90
close()91 void SegmentTermDocs::close() {
92 _freqStream->close();
93 if (skipListReader) {
94 skipListReader->close();
95 }
96 }
97
doc()98 int32_t SegmentTermDocs::doc() {
99 return _doc;
100 }
101
freq()102 int32_t SegmentTermDocs::freq() {
103 return _freq;
104 }
105
skippingDoc()106 void SegmentTermDocs::skippingDoc() {
107 }
108
next()109 bool SegmentTermDocs::next() {
110 while (true) {
111 if (count == df) {
112 return false;
113 }
114 int32_t docCode = _freqStream->readVInt();
115
116 if (currentFieldOmitTermFreqAndPositions) {
117 _doc += docCode;
118 _freq = 1;
119 } else {
120 _doc += MiscUtils::unsignedShift(docCode, 1); // shift off low bit
121 if ((docCode & 1) != 0) { // if low bit is set
122 _freq = 1; // freq is one
123 } else {
124 _freq = _freqStream->readVInt(); // else read freq
125 }
126 }
127
128 ++count;
129
130 if (!deletedDocs || !deletedDocs->get(_doc)) {
131 break;
132 }
133 skippingDoc();
134 }
135 return true;
136 }
137
read(Collection<int32_t> docs,Collection<int32_t> freqs)138 int32_t SegmentTermDocs::read(Collection<int32_t> docs, Collection<int32_t> freqs) {
139 int32_t length = docs.size();
140 if (currentFieldOmitTermFreqAndPositions) {
141 return readNoTf(docs, freqs, length);
142 } else {
143 int32_t i = 0;
144 while (i < length && count < df) {
145 // manually inlined call to next() for speed
146 int32_t docCode = _freqStream->readVInt();
147 _doc += MiscUtils::unsignedShift(docCode, 1); // shift off low bit
148 if ((docCode & 1) != 0) { // if low bit is set
149 _freq = 1; // freq is one
150 } else {
151 _freq = _freqStream->readVInt(); // else read freq
152 }
153 ++count;
154
155 if (!deletedDocs || !deletedDocs->get(_doc)) {
156 docs[i] = _doc;
157 freqs[i] = _freq;
158 ++i;
159 }
160 }
161 return i;
162 }
163 }
164
readNoTf(Collection<int32_t> docs,Collection<int32_t> freqs,int32_t length)165 int32_t SegmentTermDocs::readNoTf(Collection<int32_t> docs, Collection<int32_t> freqs, int32_t length) {
166 int32_t i = 0;
167 while (i < length && count < df) {
168 // manually inlined call to next() for speed
169 _doc += _freqStream->readVInt();
170 ++count;
171
172 if (!deletedDocs || !deletedDocs->get(_doc)) {
173 docs[i] = _doc;
174
175 // Hardware freq to 1 when term freqs were not stored in the index
176 freqs[i] = 1;
177 ++i;
178 }
179 }
180 return i;
181 }
182
skipProx(int64_t proxPointer,int32_t payloadLength)183 void SegmentTermDocs::skipProx(int64_t proxPointer, int32_t payloadLength) {
184 }
185
skipTo(int32_t target)186 bool SegmentTermDocs::skipTo(int32_t target) {
187 if (df >= skipInterval) { // optimized case
188 if (!skipListReader) {
189 skipListReader = newLucene<DefaultSkipListReader>(boost::dynamic_pointer_cast<IndexInput>(_freqStream->clone()), maxSkipLevels, skipInterval); // lazily clone
190 }
191
192 if (!haveSkipped) { // lazily initialize skip stream
193 skipListReader->init(skipPointer, freqBasePointer, proxBasePointer, df, currentFieldStoresPayloads);
194 haveSkipped = true;
195 }
196
197 int32_t newCount = skipListReader->skipTo(target);
198 if (newCount > count) {
199 _freqStream->seek(skipListReader->getFreqPointer());
200 skipProx(skipListReader->getProxPointer(), skipListReader->getPayloadLength());
201
202 _doc = skipListReader->getDoc();
203 count = newCount;
204 }
205 }
206
207 // done skipping, now just scan
208 do {
209 if (!next()) {
210 return false;
211 }
212 } while (target > _doc);
213 return true;
214 }
215
freqStream()216 IndexInputPtr SegmentTermDocs::freqStream() {
217 return _freqStream;
218 }
219
freqStream(const IndexInputPtr & freqStream)220 void SegmentTermDocs::freqStream(const IndexInputPtr& freqStream) {
221 _freqStream = freqStream;
222 }
223
224 }
225