1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6 
7 #include "TestInc.h"
8 #include "LuceneTestFixture.h"
9 #include "TestUtils.h"
10 #include "DefaultSimilarity.h"
11 #include "StandardAnalyzer.h"
12 #include "FSDirectory.h"
13 #include "IndexWriter.h"
14 #include "Document.h"
15 #include "Field.h"
16 #include "MockRAMDirectory.h"
17 #include "LogDocMergePolicy.h"
18 #include "WhitespaceAnalyzer.h"
19 #include "SegmentReader.h"
20 #include "_SegmentReader.h"
21 #include "FileUtils.h"
22 
23 using namespace Lucene;
24 
25 class SimilarityOne : public DefaultSimilarity {
26 public:
~SimilarityOne()27     virtual ~SimilarityOne() {
28     }
29 
30 public:
lengthNorm(const String & fieldName,int32_t numTokens)31     virtual double lengthNorm(const String& fieldName, int32_t numTokens) {
32         return 1.0;
33     }
34 };
35 
36 class IndexReaderCloneNormsTest : public LuceneTestFixture {
37 public:
IndexReaderCloneNormsTest()38     IndexReaderCloneNormsTest() {
39         similarityOne = newLucene<SimilarityOne>();
40         anlzr = newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT);
41         numDocNorms = 0;
42         lastNorm = 0.0;
43         normDelta = 0.001;
44     }
45 
~IndexReaderCloneNormsTest()46     virtual ~IndexReaderCloneNormsTest() {
47     }
48 
49 protected:
50     static const int32_t NUM_FIELDS;
51 
52     SimilarityPtr similarityOne;
53     AnalyzerPtr anlzr;
54     int32_t numDocNorms;
55     Collection<double> norms;
56     Collection<double> modifiedNorms;
57     double lastNorm;
58     double normDelta;
59 
60 public:
createIndex(const DirectoryPtr & dir)61     void createIndex(const DirectoryPtr& dir) {
62         IndexWriterPtr iw = newLucene<IndexWriter>(dir, anlzr, true, IndexWriter::MaxFieldLengthLIMITED);
63         iw->setMaxBufferedDocs(5);
64         iw->setMergeFactor(3);
65         iw->setSimilarity(similarityOne);
66         iw->setUseCompoundFile(true);
67         iw->close();
68     }
69 
createIndex(const DirectoryPtr & dir,bool multiSegment)70     void createIndex(const DirectoryPtr& dir, bool multiSegment) {
71         IndexWriter::unlock(dir);
72         IndexWriterPtr w = newLucene<IndexWriter>(dir, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED);
73 
74         w->setMergePolicy(newLucene<LogDocMergePolicy>(w));
75 
76         for (int32_t i = 0; i < 100; ++i) {
77             w->addDocument(createDocument(i, 4));
78             if (multiSegment && (i % 10) == 0) {
79                 w->commit();
80             }
81         }
82 
83         if (!multiSegment) {
84             w->optimize();
85         }
86 
87         w->close();
88 
89         IndexReaderPtr r = IndexReader::open(dir, false);
90         if (multiSegment) {
91             EXPECT_TRUE(r->getSequentialSubReaders().size() > 1);
92         } else {
93             EXPECT_EQ(r->getSequentialSubReaders().size(), 1);
94         }
95         r->close();
96     }
97 
createDocument(int32_t n,int32_t numFields)98     DocumentPtr createDocument(int32_t n, int32_t numFields) {
99         StringStream sb;
100         DocumentPtr doc = newLucene<Document>();
101         sb << L"a" << n;
102         doc->add(newLucene<Field>(L"field1", sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED));
103         doc->add(newLucene<Field>(L"fielda", sb.str(), Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS));
104         doc->add(newLucene<Field>(L"fieldb", sb.str(), Field::STORE_YES, Field::INDEX_NO));
105         sb << L" b" << n;
106         for (int32_t i = 1; i < numFields; ++i) {
107             doc->add(newLucene<Field>(L"field" + StringUtils::toString(i + 1), sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED));
108         }
109         return doc;
110     }
111 
112     /// try cloning and reopening the norms
doTestNorms(const DirectoryPtr & dir)113     void doTestNorms(const DirectoryPtr& dir) {
114         addDocs(dir, 12, true);
115         IndexReaderPtr ir = IndexReader::open(dir, false);
116         verifyIndex(ir);
117         modifyNormsForF1(ir);
118         IndexReaderPtr irc = boost::dynamic_pointer_cast<IndexReader>(ir->clone());
119         verifyIndex(irc);
120 
121         modifyNormsForF1(irc);
122 
123         IndexReaderPtr irc3 = boost::dynamic_pointer_cast<IndexReader>(irc->clone());
124         verifyIndex(irc3);
125         modifyNormsForF1(irc3);
126         verifyIndex(irc3);
127         irc3->flush();
128         irc3->close();
129     }
130 
modifyNormsForF1(const DirectoryPtr & dir)131     void modifyNormsForF1(const DirectoryPtr& dir) {
132         IndexReaderPtr ir = IndexReader::open(dir, false);
133         modifyNormsForF1(ir);
134     }
135 
modifyNormsForF1(const IndexReaderPtr & ir)136     void modifyNormsForF1(const IndexReaderPtr& ir) {
137         int32_t n = ir->maxDoc();
138         for (int32_t i = 0; i < n; i += 3) { // modify for every third doc
139             int32_t k = (i * 3) % modifiedNorms.size();
140             double origNorm = modifiedNorms[i];
141             double newNorm = modifiedNorms[k];
142             modifiedNorms[i] = newNorm;
143             modifiedNorms[k] = origNorm;
144             ir->setNorm(i, L"f1", newNorm);
145             ir->setNorm(k, L"f1", origNorm);
146         }
147     }
148 
addDocs(const DirectoryPtr & dir,int32_t ndocs,bool compound)149     void addDocs(const DirectoryPtr& dir, int32_t ndocs, bool compound) {
150         IndexWriterPtr iw = newLucene<IndexWriter>(dir, anlzr, false, IndexWriter::MaxFieldLengthLIMITED);
151         iw->setMaxBufferedDocs(5);
152         iw->setMergeFactor(3);
153         iw->setSimilarity(similarityOne);
154         iw->setUseCompoundFile(compound);
155         for (int32_t i = 0; i < ndocs; ++i) {
156             iw->addDocument(newDoc());
157         }
158         iw->close();
159     }
160 
newDoc()161     DocumentPtr newDoc() {
162         DocumentPtr d = newLucene<Document>();
163         double boost = nextNorm();
164         for (int32_t i = 0; i < 10; ++i) {
165             FieldPtr f = newLucene<Field>(L"f" + StringUtils::toString(i), L"v" + StringUtils::toString(i), Field::STORE_NO, Field::INDEX_NOT_ANALYZED);
166             f->setBoost(boost);
167             d->add(f);
168         }
169         return d;
170     }
171 
nextNorm()172     double nextNorm() {
173         double norm = lastNorm + normDelta;
174         do {
175             double norm1 = Similarity::decodeNorm(Similarity::encodeNorm(norm));
176             if (norm1 > lastNorm) {
177                 norm = norm1;
178                 break;
179             }
180             norm += normDelta;
181         } while (true);
182         norms.add(numDocNorms, norm);
183         modifiedNorms.add(numDocNorms, norm);
184         ++numDocNorms;
185         // there's a limit to how many distinct values can be stored in a single byte
186         lastNorm = (norm > 10 ? 0 : norm);
187         return norm;
188     }
189 
verifyIndex(const DirectoryPtr & dir)190     void verifyIndex(const DirectoryPtr& dir) {
191         IndexReaderPtr ir = IndexReader::open(dir, false);
192         verifyIndex(ir);
193         ir->close();
194     }
195 
verifyIndex(const IndexReaderPtr & ir)196     void verifyIndex(const IndexReaderPtr& ir) {
197         for (int32_t i = 0; i < NUM_FIELDS; ++i) {
198             String field = L"f" + StringUtils::toString(i);
199             ByteArray b = ir->norms(field);
200             EXPECT_EQ(numDocNorms, b.size());
201             Collection<double> storedNorms = (i == 1 ? modifiedNorms : norms);
202             for (int32_t j = 0; j < b.size(); ++j) {
203                 double norm = Similarity::decodeNorm(b[j]);
204                 double norm1 = storedNorms[j];
205                 EXPECT_EQ(norm, norm1); // 0.000001 ??
206             }
207         }
208     }
209 };
210 
211 const int32_t IndexReaderCloneNormsTest::NUM_FIELDS = 10;
212 
213 /// Test that norms values are preserved as the index is maintained.  Including separate norms.
214 /// Including merging indexes with separate norms. Including optimize.
TEST_F(IndexReaderCloneNormsTest,testNorms)215 TEST_F(IndexReaderCloneNormsTest, testNorms) {
216     // test with a single index: index1
217     String indexDir1(FileUtils::joinPath(getTempDir(), L"lucenetestindex1"));
218     DirectoryPtr dir1 = FSDirectory::open(indexDir1);
219     IndexWriter::unlock(dir1);
220 
221     norms = Collection<double>::newInstance();
222     modifiedNorms = Collection<double>::newInstance();
223 
224     createIndex(dir1);
225     doTestNorms(dir1);
226 
227     // test with a single index: index2
228     Collection<double> norms1 = norms;
229     Collection<double> modifiedNorms1 = modifiedNorms;
230     int32_t numDocNorms1 = numDocNorms;
231 
232     norms = Collection<double>::newInstance();
233     modifiedNorms = Collection<double>::newInstance();
234     numDocNorms = 0;
235 
236     String indexDir2(FileUtils::joinPath(getTempDir(), L"lucenetestindex2"));
237     DirectoryPtr dir2 = FSDirectory::open(indexDir2);
238 
239     createIndex(dir2);
240     doTestNorms(dir2);
241 
242     // add index1 and index2 to a third index: index3
243     String indexDir3(FileUtils::joinPath(getTempDir(), L"lucenetestindex3"));
244     DirectoryPtr dir3 = FSDirectory::open(indexDir3);
245 
246     createIndex(dir3);
247     IndexWriterPtr iw = newLucene<IndexWriter>(dir3, anlzr, false, IndexWriter::MaxFieldLengthLIMITED);
248     iw->setMaxBufferedDocs(5);
249     iw->setMergeFactor(3);
250     iw->addIndexesNoOptimize(newCollection<DirectoryPtr>(dir1, dir2));
251     iw->optimize();
252     iw->close();
253 
254     norms1.addAll(norms.begin(), norms.end());
255     norms = norms1;
256     modifiedNorms1.addAll(modifiedNorms.begin(), modifiedNorms.end());
257     modifiedNorms = modifiedNorms1;
258     numDocNorms += numDocNorms1;
259 
260     // test with index3
261     verifyIndex(dir3);
262     doTestNorms(dir3);
263 
264     // now with optimize
265     iw = newLucene<IndexWriter>(dir3, anlzr, false, IndexWriter::MaxFieldLengthLIMITED);
266     iw->setMaxBufferedDocs(5);
267     iw->setMergeFactor(3);
268     iw->optimize();
269     iw->close();
270     verifyIndex(dir3);
271 
272     dir1->close();
273     dir2->close();
274     dir3->close();
275 }
276 
TEST_F(IndexReaderCloneNormsTest,testNormsClose)277 TEST_F(IndexReaderCloneNormsTest, testNormsClose) {
278     DirectoryPtr dir1 = newLucene<MockRAMDirectory>();
279     createIndex(dir1, false);
280     SegmentReaderPtr reader1 = SegmentReader::getOnlySegmentReader(dir1);
281     reader1->norms(L"field1");
282     NormPtr r1norm = reader1->_norms.get(L"field1");
283     SegmentReaderRefPtr r1BytesRef = r1norm->bytesRef();
284     SegmentReaderPtr reader2 = boost::dynamic_pointer_cast<SegmentReader>(reader1->clone());
285     EXPECT_EQ(2, r1norm->bytesRef()->refCount());
286     reader1->close();
287     EXPECT_EQ(1, r1BytesRef->refCount());
288     reader2->norms(L"field1");
289     reader2->close();
290     dir1->close();
291 }
292 
TEST_F(IndexReaderCloneNormsTest,testNormsRefCounting)293 TEST_F(IndexReaderCloneNormsTest, testNormsRefCounting) {
294     DirectoryPtr dir1 = newLucene<MockRAMDirectory>();
295     createIndex(dir1, false);
296 
297     IndexReaderPtr reader1 = IndexReader::open(dir1, false);
298 
299     IndexReaderPtr reader2C = boost::dynamic_pointer_cast<IndexReader>(reader1->clone());
300     SegmentReaderPtr segmentReader2C = SegmentReader::getOnlySegmentReader(reader2C);
301     segmentReader2C->norms(L"field1"); // load the norms for the field
302     NormPtr reader2CNorm = segmentReader2C->_norms.get(L"field1");
303     EXPECT_EQ(2, reader2CNorm->bytesRef()->refCount());
304 
305     IndexReaderPtr reader3C = boost::dynamic_pointer_cast<IndexReader>(reader2C->clone());
306     SegmentReaderPtr segmentReader3C = SegmentReader::getOnlySegmentReader(reader3C);
307     NormPtr reader3CCNorm = segmentReader3C->_norms.get(L"field1");
308     EXPECT_EQ(3, reader3CCNorm->bytesRef()->refCount());
309 
310     // edit a norm and the refcount should be 1
311     IndexReaderPtr reader4C = boost::dynamic_pointer_cast<IndexReader>(reader3C->clone());
312     SegmentReaderPtr segmentReader4C = SegmentReader::getOnlySegmentReader(reader4C);
313     EXPECT_EQ(4, reader3CCNorm->bytesRef()->refCount());
314     reader4C->setNorm(5, L"field1", 0.33);
315 
316     // generate a cannot update exception in reader1
317     try {
318         reader3C->setNorm(1, L"field1", 0.99);
319     } catch (LockObtainFailedException& e) {
320         EXPECT_TRUE(check_exception(LuceneException::LockObtainFailed)(e));
321     }
322 
323     // norm values should be different
324     EXPECT_NE(Similarity::decodeNorm(segmentReader3C->norms(L"field1")[5]), Similarity::decodeNorm(segmentReader4C->norms(L"field1")[5]));
325     NormPtr reader4CCNorm = segmentReader4C->_norms.get(L"field1");
326     EXPECT_EQ(3, reader3CCNorm->bytesRef()->refCount());
327     EXPECT_EQ(1, reader4CCNorm->bytesRef()->refCount());
328 
329     IndexReaderPtr reader5C = boost::dynamic_pointer_cast<IndexReader>(reader4C->clone());
330     SegmentReaderPtr segmentReader5C = SegmentReader::getOnlySegmentReader(reader5C);
331     NormPtr reader5CCNorm = segmentReader5C->_norms.get(L"field1");
332     reader5C->setNorm(5, L"field1", 0.7);
333     EXPECT_EQ(1, reader5CCNorm->bytesRef()->refCount());
334 
335     reader5C->close();
336     reader4C->close();
337     reader3C->close();
338     reader2C->close();
339     reader1->close();
340     dir1->close();
341 }
342