1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6 
7 #include "LuceneInc.h"
8 #include "SegmentTermDocs.h"
9 #include "SegmentReader.h"
10 #include "_SegmentReader.h"
11 #include "SegmentTermEnum.h"
12 #include "IndexInput.h"
13 #include "TermInfosReader.h"
14 #include "FieldInfos.h"
15 #include "FieldInfo.h"
16 #include "Term.h"
17 #include "TermInfo.h"
18 #include "DefaultSkipListReader.h"
19 #include "BitVector.h"
20 #include "MiscUtils.h"
21 
22 namespace Lucene {
23 
SegmentTermDocs(const SegmentReaderPtr & parent)24 SegmentTermDocs::SegmentTermDocs(const SegmentReaderPtr& parent) {
25     this->_parent = parent;
26     this->count = 0;
27     this->df = 0;
28     this->_doc = 0;
29     this->_freq = 0;
30     this->freqBasePointer = 0;
31     this->proxBasePointer = 0;
32     this->skipPointer = 0;
33     this->haveSkipped = false;
34     this->currentFieldStoresPayloads = false;
35     this->currentFieldOmitTermFreqAndPositions = false;
36 
37     this->_freqStream = boost::dynamic_pointer_cast<IndexInput>(parent->core->freqStream->clone());
38     {
39         SyncLock parentLock(parent);
40         this->deletedDocs = parent->deletedDocs;
41     }
42     this->skipInterval = parent->core->getTermsReader()->getSkipInterval();
43     this->maxSkipLevels = parent->core->getTermsReader()->getMaxSkipLevels();
44 }
45 
~SegmentTermDocs()46 SegmentTermDocs::~SegmentTermDocs() {
47 }
48 
seek(const TermPtr & term)49 void SegmentTermDocs::seek(const TermPtr& term) {
50     TermInfoPtr ti(SegmentReaderPtr(_parent)->core->getTermsReader()->get(term));
51     seek(ti, term);
52 }
53 
seek(const TermEnumPtr & termEnum)54 void SegmentTermDocs::seek(const TermEnumPtr& termEnum) {
55     TermInfoPtr ti;
56     TermPtr term;
57 
58     SegmentTermEnumPtr segmentTermEnum(boost::dynamic_pointer_cast<SegmentTermEnum>(termEnum));
59     SegmentReaderPtr parent(_parent);
60 
61     // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs
62     if (segmentTermEnum && segmentTermEnum->fieldInfos == parent->core->fieldInfos) { // optimized case
63         term = segmentTermEnum->term();
64         ti = segmentTermEnum->termInfo();
65     } else { // punt case
66         term = termEnum->term();
67         ti = parent->core->getTermsReader()->get(term);
68     }
69 
70     seek(ti, term);
71 }
72 
seek(const TermInfoPtr & ti,const TermPtr & term)73 void SegmentTermDocs::seek(const TermInfoPtr& ti, const TermPtr& term) {
74     count = 0;
75     FieldInfoPtr fi(SegmentReaderPtr(_parent)->core->fieldInfos->fieldInfo(term->_field));
76     currentFieldOmitTermFreqAndPositions = fi ? fi->omitTermFreqAndPositions : false;
77     currentFieldStoresPayloads = fi ? fi->storePayloads : false;
78     if (!ti) {
79         df = 0;
80     } else {
81         df = ti->docFreq;
82         _doc = 0;
83         freqBasePointer = ti->freqPointer;
84         proxBasePointer = ti->proxPointer;
85         skipPointer = freqBasePointer + ti->skipOffset;
86         _freqStream->seek(freqBasePointer);
87         haveSkipped = false;
88     }
89 }
90 
close()91 void SegmentTermDocs::close() {
92     _freqStream->close();
93     if (skipListReader) {
94         skipListReader->close();
95     }
96 }
97 
doc()98 int32_t SegmentTermDocs::doc() {
99     return _doc;
100 }
101 
freq()102 int32_t SegmentTermDocs::freq() {
103     return _freq;
104 }
105 
skippingDoc()106 void SegmentTermDocs::skippingDoc() {
107 }
108 
next()109 bool SegmentTermDocs::next() {
110     while (true) {
111         if (count == df) {
112             return false;
113         }
114         int32_t docCode = _freqStream->readVInt();
115 
116         if (currentFieldOmitTermFreqAndPositions) {
117             _doc += docCode;
118             _freq = 1;
119         } else {
120             _doc += MiscUtils::unsignedShift(docCode, 1); // shift off low bit
121             if ((docCode & 1) != 0) { // if low bit is set
122                 _freq = 1;    // freq is one
123             } else {
124                 _freq = _freqStream->readVInt();    // else read freq
125             }
126         }
127 
128         ++count;
129 
130         if (!deletedDocs || !deletedDocs->get(_doc)) {
131             break;
132         }
133         skippingDoc();
134     }
135     return true;
136 }
137 
read(Collection<int32_t> docs,Collection<int32_t> freqs)138 int32_t SegmentTermDocs::read(Collection<int32_t> docs, Collection<int32_t> freqs) {
139     int32_t length = docs.size();
140     if (currentFieldOmitTermFreqAndPositions) {
141         return readNoTf(docs, freqs, length);
142     } else {
143         int32_t i = 0;
144         while (i < length && count < df) {
145             // manually inlined call to next() for speed
146             int32_t docCode = _freqStream->readVInt();
147             _doc += MiscUtils::unsignedShift(docCode, 1); // shift off low bit
148             if ((docCode & 1) != 0) { // if low bit is set
149                 _freq = 1;    // freq is one
150             } else {
151                 _freq = _freqStream->readVInt();    // else read freq
152             }
153             ++count;
154 
155             if (!deletedDocs || !deletedDocs->get(_doc)) {
156                 docs[i] = _doc;
157                 freqs[i] = _freq;
158                 ++i;
159             }
160         }
161         return i;
162     }
163 }
164 
readNoTf(Collection<int32_t> docs,Collection<int32_t> freqs,int32_t length)165 int32_t SegmentTermDocs::readNoTf(Collection<int32_t> docs, Collection<int32_t> freqs, int32_t length) {
166     int32_t i = 0;
167     while (i < length && count < df) {
168         // manually inlined call to next() for speed
169         _doc += _freqStream->readVInt();
170         ++count;
171 
172         if (!deletedDocs || !deletedDocs->get(_doc)) {
173             docs[i] = _doc;
174 
175             // Hardware freq to 1 when term freqs were not stored in the index
176             freqs[i] = 1;
177             ++i;
178         }
179     }
180     return i;
181 }
182 
skipProx(int64_t proxPointer,int32_t payloadLength)183 void SegmentTermDocs::skipProx(int64_t proxPointer, int32_t payloadLength) {
184 }
185 
skipTo(int32_t target)186 bool SegmentTermDocs::skipTo(int32_t target) {
187     if (df >= skipInterval) { // optimized case
188         if (!skipListReader) {
189             skipListReader = newLucene<DefaultSkipListReader>(boost::dynamic_pointer_cast<IndexInput>(_freqStream->clone()), maxSkipLevels, skipInterval);    // lazily clone
190         }
191 
192         if (!haveSkipped) { // lazily initialize skip stream
193             skipListReader->init(skipPointer, freqBasePointer, proxBasePointer, df, currentFieldStoresPayloads);
194             haveSkipped = true;
195         }
196 
197         int32_t newCount = skipListReader->skipTo(target);
198         if (newCount > count) {
199             _freqStream->seek(skipListReader->getFreqPointer());
200             skipProx(skipListReader->getProxPointer(), skipListReader->getPayloadLength());
201 
202             _doc = skipListReader->getDoc();
203             count = newCount;
204         }
205     }
206 
207     // done skipping, now just scan
208     do {
209         if (!next()) {
210             return false;
211         }
212     } while (target > _doc);
213     return true;
214 }
215 
freqStream()216 IndexInputPtr SegmentTermDocs::freqStream() {
217     return _freqStream;
218 }
219 
freqStream(const IndexInputPtr & freqStream)220 void SegmentTermDocs::freqStream(const IndexInputPtr& freqStream) {
221     _freqStream = freqStream;
222 }
223 
224 }
225