1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6
7 #include "TestInc.h"
8 #include "LuceneTestFixture.h"
9 #include "TestUtils.h"
10 #include "DefaultSimilarity.h"
11 #include "StandardAnalyzer.h"
12 #include "FSDirectory.h"
13 #include "IndexWriter.h"
14 #include "Document.h"
15 #include "Field.h"
16 #include "MockRAMDirectory.h"
17 #include "LogDocMergePolicy.h"
18 #include "WhitespaceAnalyzer.h"
19 #include "SegmentReader.h"
20 #include "_SegmentReader.h"
21 #include "FileUtils.h"
22
23 using namespace Lucene;
24
25 class SimilarityOne : public DefaultSimilarity {
26 public:
~SimilarityOne()27 virtual ~SimilarityOne() {
28 }
29
30 public:
lengthNorm(const String & fieldName,int32_t numTokens)31 virtual double lengthNorm(const String& fieldName, int32_t numTokens) {
32 return 1.0;
33 }
34 };
35
36 class IndexReaderCloneNormsTest : public LuceneTestFixture {
37 public:
IndexReaderCloneNormsTest()38 IndexReaderCloneNormsTest() {
39 similarityOne = newLucene<SimilarityOne>();
40 anlzr = newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT);
41 numDocNorms = 0;
42 lastNorm = 0.0;
43 normDelta = 0.001;
44 }
45
~IndexReaderCloneNormsTest()46 virtual ~IndexReaderCloneNormsTest() {
47 }
48
49 protected:
50 static const int32_t NUM_FIELDS;
51
52 SimilarityPtr similarityOne;
53 AnalyzerPtr anlzr;
54 int32_t numDocNorms;
55 Collection<double> norms;
56 Collection<double> modifiedNorms;
57 double lastNorm;
58 double normDelta;
59
60 public:
createIndex(const DirectoryPtr & dir)61 void createIndex(const DirectoryPtr& dir) {
62 IndexWriterPtr iw = newLucene<IndexWriter>(dir, anlzr, true, IndexWriter::MaxFieldLengthLIMITED);
63 iw->setMaxBufferedDocs(5);
64 iw->setMergeFactor(3);
65 iw->setSimilarity(similarityOne);
66 iw->setUseCompoundFile(true);
67 iw->close();
68 }
69
createIndex(const DirectoryPtr & dir,bool multiSegment)70 void createIndex(const DirectoryPtr& dir, bool multiSegment) {
71 IndexWriter::unlock(dir);
72 IndexWriterPtr w = newLucene<IndexWriter>(dir, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED);
73
74 w->setMergePolicy(newLucene<LogDocMergePolicy>(w));
75
76 for (int32_t i = 0; i < 100; ++i) {
77 w->addDocument(createDocument(i, 4));
78 if (multiSegment && (i % 10) == 0) {
79 w->commit();
80 }
81 }
82
83 if (!multiSegment) {
84 w->optimize();
85 }
86
87 w->close();
88
89 IndexReaderPtr r = IndexReader::open(dir, false);
90 if (multiSegment) {
91 EXPECT_TRUE(r->getSequentialSubReaders().size() > 1);
92 } else {
93 EXPECT_EQ(r->getSequentialSubReaders().size(), 1);
94 }
95 r->close();
96 }
97
createDocument(int32_t n,int32_t numFields)98 DocumentPtr createDocument(int32_t n, int32_t numFields) {
99 StringStream sb;
100 DocumentPtr doc = newLucene<Document>();
101 sb << L"a" << n;
102 doc->add(newLucene<Field>(L"field1", sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED));
103 doc->add(newLucene<Field>(L"fielda", sb.str(), Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS));
104 doc->add(newLucene<Field>(L"fieldb", sb.str(), Field::STORE_YES, Field::INDEX_NO));
105 sb << L" b" << n;
106 for (int32_t i = 1; i < numFields; ++i) {
107 doc->add(newLucene<Field>(L"field" + StringUtils::toString(i + 1), sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED));
108 }
109 return doc;
110 }
111
112 /// try cloning and reopening the norms
doTestNorms(const DirectoryPtr & dir)113 void doTestNorms(const DirectoryPtr& dir) {
114 addDocs(dir, 12, true);
115 IndexReaderPtr ir = IndexReader::open(dir, false);
116 verifyIndex(ir);
117 modifyNormsForF1(ir);
118 IndexReaderPtr irc = boost::dynamic_pointer_cast<IndexReader>(ir->clone());
119 verifyIndex(irc);
120
121 modifyNormsForF1(irc);
122
123 IndexReaderPtr irc3 = boost::dynamic_pointer_cast<IndexReader>(irc->clone());
124 verifyIndex(irc3);
125 modifyNormsForF1(irc3);
126 verifyIndex(irc3);
127 irc3->flush();
128 irc3->close();
129 }
130
modifyNormsForF1(const DirectoryPtr & dir)131 void modifyNormsForF1(const DirectoryPtr& dir) {
132 IndexReaderPtr ir = IndexReader::open(dir, false);
133 modifyNormsForF1(ir);
134 }
135
modifyNormsForF1(const IndexReaderPtr & ir)136 void modifyNormsForF1(const IndexReaderPtr& ir) {
137 int32_t n = ir->maxDoc();
138 for (int32_t i = 0; i < n; i += 3) { // modify for every third doc
139 int32_t k = (i * 3) % modifiedNorms.size();
140 double origNorm = modifiedNorms[i];
141 double newNorm = modifiedNorms[k];
142 modifiedNorms[i] = newNorm;
143 modifiedNorms[k] = origNorm;
144 ir->setNorm(i, L"f1", newNorm);
145 ir->setNorm(k, L"f1", origNorm);
146 }
147 }
148
addDocs(const DirectoryPtr & dir,int32_t ndocs,bool compound)149 void addDocs(const DirectoryPtr& dir, int32_t ndocs, bool compound) {
150 IndexWriterPtr iw = newLucene<IndexWriter>(dir, anlzr, false, IndexWriter::MaxFieldLengthLIMITED);
151 iw->setMaxBufferedDocs(5);
152 iw->setMergeFactor(3);
153 iw->setSimilarity(similarityOne);
154 iw->setUseCompoundFile(compound);
155 for (int32_t i = 0; i < ndocs; ++i) {
156 iw->addDocument(newDoc());
157 }
158 iw->close();
159 }
160
newDoc()161 DocumentPtr newDoc() {
162 DocumentPtr d = newLucene<Document>();
163 double boost = nextNorm();
164 for (int32_t i = 0; i < 10; ++i) {
165 FieldPtr f = newLucene<Field>(L"f" + StringUtils::toString(i), L"v" + StringUtils::toString(i), Field::STORE_NO, Field::INDEX_NOT_ANALYZED);
166 f->setBoost(boost);
167 d->add(f);
168 }
169 return d;
170 }
171
nextNorm()172 double nextNorm() {
173 double norm = lastNorm + normDelta;
174 do {
175 double norm1 = Similarity::decodeNorm(Similarity::encodeNorm(norm));
176 if (norm1 > lastNorm) {
177 norm = norm1;
178 break;
179 }
180 norm += normDelta;
181 } while (true);
182 norms.add(numDocNorms, norm);
183 modifiedNorms.add(numDocNorms, norm);
184 ++numDocNorms;
185 // there's a limit to how many distinct values can be stored in a single byte
186 lastNorm = (norm > 10 ? 0 : norm);
187 return norm;
188 }
189
verifyIndex(const DirectoryPtr & dir)190 void verifyIndex(const DirectoryPtr& dir) {
191 IndexReaderPtr ir = IndexReader::open(dir, false);
192 verifyIndex(ir);
193 ir->close();
194 }
195
verifyIndex(const IndexReaderPtr & ir)196 void verifyIndex(const IndexReaderPtr& ir) {
197 for (int32_t i = 0; i < NUM_FIELDS; ++i) {
198 String field = L"f" + StringUtils::toString(i);
199 ByteArray b = ir->norms(field);
200 EXPECT_EQ(numDocNorms, b.size());
201 Collection<double> storedNorms = (i == 1 ? modifiedNorms : norms);
202 for (int32_t j = 0; j < b.size(); ++j) {
203 double norm = Similarity::decodeNorm(b[j]);
204 double norm1 = storedNorms[j];
205 EXPECT_EQ(norm, norm1); // 0.000001 ??
206 }
207 }
208 }
209 };
210
211 const int32_t IndexReaderCloneNormsTest::NUM_FIELDS = 10;
212
213 /// Test that norms values are preserved as the index is maintained. Including separate norms.
214 /// Including merging indexes with separate norms. Including optimize.
TEST_F(IndexReaderCloneNormsTest,testNorms)215 TEST_F(IndexReaderCloneNormsTest, testNorms) {
216 // test with a single index: index1
217 String indexDir1(FileUtils::joinPath(getTempDir(), L"lucenetestindex1"));
218 DirectoryPtr dir1 = FSDirectory::open(indexDir1);
219 IndexWriter::unlock(dir1);
220
221 norms = Collection<double>::newInstance();
222 modifiedNorms = Collection<double>::newInstance();
223
224 createIndex(dir1);
225 doTestNorms(dir1);
226
227 // test with a single index: index2
228 Collection<double> norms1 = norms;
229 Collection<double> modifiedNorms1 = modifiedNorms;
230 int32_t numDocNorms1 = numDocNorms;
231
232 norms = Collection<double>::newInstance();
233 modifiedNorms = Collection<double>::newInstance();
234 numDocNorms = 0;
235
236 String indexDir2(FileUtils::joinPath(getTempDir(), L"lucenetestindex2"));
237 DirectoryPtr dir2 = FSDirectory::open(indexDir2);
238
239 createIndex(dir2);
240 doTestNorms(dir2);
241
242 // add index1 and index2 to a third index: index3
243 String indexDir3(FileUtils::joinPath(getTempDir(), L"lucenetestindex3"));
244 DirectoryPtr dir3 = FSDirectory::open(indexDir3);
245
246 createIndex(dir3);
247 IndexWriterPtr iw = newLucene<IndexWriter>(dir3, anlzr, false, IndexWriter::MaxFieldLengthLIMITED);
248 iw->setMaxBufferedDocs(5);
249 iw->setMergeFactor(3);
250 iw->addIndexesNoOptimize(newCollection<DirectoryPtr>(dir1, dir2));
251 iw->optimize();
252 iw->close();
253
254 norms1.addAll(norms.begin(), norms.end());
255 norms = norms1;
256 modifiedNorms1.addAll(modifiedNorms.begin(), modifiedNorms.end());
257 modifiedNorms = modifiedNorms1;
258 numDocNorms += numDocNorms1;
259
260 // test with index3
261 verifyIndex(dir3);
262 doTestNorms(dir3);
263
264 // now with optimize
265 iw = newLucene<IndexWriter>(dir3, anlzr, false, IndexWriter::MaxFieldLengthLIMITED);
266 iw->setMaxBufferedDocs(5);
267 iw->setMergeFactor(3);
268 iw->optimize();
269 iw->close();
270 verifyIndex(dir3);
271
272 dir1->close();
273 dir2->close();
274 dir3->close();
275 }
276
TEST_F(IndexReaderCloneNormsTest,testNormsClose)277 TEST_F(IndexReaderCloneNormsTest, testNormsClose) {
278 DirectoryPtr dir1 = newLucene<MockRAMDirectory>();
279 createIndex(dir1, false);
280 SegmentReaderPtr reader1 = SegmentReader::getOnlySegmentReader(dir1);
281 reader1->norms(L"field1");
282 NormPtr r1norm = reader1->_norms.get(L"field1");
283 SegmentReaderRefPtr r1BytesRef = r1norm->bytesRef();
284 SegmentReaderPtr reader2 = boost::dynamic_pointer_cast<SegmentReader>(reader1->clone());
285 EXPECT_EQ(2, r1norm->bytesRef()->refCount());
286 reader1->close();
287 EXPECT_EQ(1, r1BytesRef->refCount());
288 reader2->norms(L"field1");
289 reader2->close();
290 dir1->close();
291 }
292
TEST_F(IndexReaderCloneNormsTest,testNormsRefCounting)293 TEST_F(IndexReaderCloneNormsTest, testNormsRefCounting) {
294 DirectoryPtr dir1 = newLucene<MockRAMDirectory>();
295 createIndex(dir1, false);
296
297 IndexReaderPtr reader1 = IndexReader::open(dir1, false);
298
299 IndexReaderPtr reader2C = boost::dynamic_pointer_cast<IndexReader>(reader1->clone());
300 SegmentReaderPtr segmentReader2C = SegmentReader::getOnlySegmentReader(reader2C);
301 segmentReader2C->norms(L"field1"); // load the norms for the field
302 NormPtr reader2CNorm = segmentReader2C->_norms.get(L"field1");
303 EXPECT_EQ(2, reader2CNorm->bytesRef()->refCount());
304
305 IndexReaderPtr reader3C = boost::dynamic_pointer_cast<IndexReader>(reader2C->clone());
306 SegmentReaderPtr segmentReader3C = SegmentReader::getOnlySegmentReader(reader3C);
307 NormPtr reader3CCNorm = segmentReader3C->_norms.get(L"field1");
308 EXPECT_EQ(3, reader3CCNorm->bytesRef()->refCount());
309
310 // edit a norm and the refcount should be 1
311 IndexReaderPtr reader4C = boost::dynamic_pointer_cast<IndexReader>(reader3C->clone());
312 SegmentReaderPtr segmentReader4C = SegmentReader::getOnlySegmentReader(reader4C);
313 EXPECT_EQ(4, reader3CCNorm->bytesRef()->refCount());
314 reader4C->setNorm(5, L"field1", 0.33);
315
316 // generate a cannot update exception in reader1
317 try {
318 reader3C->setNorm(1, L"field1", 0.99);
319 } catch (LockObtainFailedException& e) {
320 EXPECT_TRUE(check_exception(LuceneException::LockObtainFailed)(e));
321 }
322
323 // norm values should be different
324 EXPECT_NE(Similarity::decodeNorm(segmentReader3C->norms(L"field1")[5]), Similarity::decodeNorm(segmentReader4C->norms(L"field1")[5]));
325 NormPtr reader4CCNorm = segmentReader4C->_norms.get(L"field1");
326 EXPECT_EQ(3, reader3CCNorm->bytesRef()->refCount());
327 EXPECT_EQ(1, reader4CCNorm->bytesRef()->refCount());
328
329 IndexReaderPtr reader5C = boost::dynamic_pointer_cast<IndexReader>(reader4C->clone());
330 SegmentReaderPtr segmentReader5C = SegmentReader::getOnlySegmentReader(reader5C);
331 NormPtr reader5CCNorm = segmentReader5C->_norms.get(L"field1");
332 reader5C->setNorm(5, L"field1", 0.7);
333 EXPECT_EQ(1, reader5CCNorm->bytesRef()->refCount());
334
335 reader5C->close();
336 reader4C->close();
337 reader3C->close();
338 reader2C->close();
339 reader1->close();
340 dir1->close();
341 }
342