1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6
7 #include "TestInc.h"
8 #include "LuceneTestFixture.h"
9 #include "IndexWriter.h"
10 #include "IndexReader.h"
11 #include "WhitespaceAnalyzer.h"
12 #include "LogDocMergePolicy.h"
13 #include "Field.h"
14 #include "Document.h"
15 #include "Term.h"
16 #include "TermDocs.h"
17 #include "PhraseQuery.h"
18 #include "MockRAMDirectory.h"
19 #include "LogByteSizeMergePolicy.h"
20 #include "SerialMergeScheduler.h"
21 #include "SegmentInfo.h"
22
23 using namespace Lucene;
24
25 typedef LuceneTestFixture AddIndexesNoOptimizeTest;
26
newWriter(const DirectoryPtr & dir,bool create)27 static IndexWriterPtr newWriter(const DirectoryPtr& dir, bool create) {
28 IndexWriterPtr writer = newLucene<IndexWriter>(dir, newLucene<WhitespaceAnalyzer>(), create, IndexWriter::MaxFieldLengthUNLIMITED);
29 writer->setMergePolicy(newLucene<LogDocMergePolicy>(writer));
30 return writer;
31 }
32
addDocs(const IndexWriterPtr & writer,int32_t numDocs)33 static void addDocs(const IndexWriterPtr& writer, int32_t numDocs) {
34 for (int32_t i = 0; i < numDocs; ++i) {
35 DocumentPtr doc = newLucene<Document>();
36 doc->add(newLucene<Field>(L"content", L"aaa", Field::STORE_NO, Field::INDEX_ANALYZED));
37 writer->addDocument(doc);
38 }
39 }
40
addDocs2(const IndexWriterPtr & writer,int32_t numDocs)41 static void addDocs2(const IndexWriterPtr& writer, int32_t numDocs) {
42 for (int32_t i = 0; i < numDocs; ++i) {
43 DocumentPtr doc = newLucene<Document>();
44 doc->add(newLucene<Field>(L"content", L"bbb", Field::STORE_NO, Field::INDEX_ANALYZED));
45 writer->addDocument(doc);
46 }
47 }
48
verifyNumDocs(const DirectoryPtr & dir,int32_t numDocs)49 static void verifyNumDocs(const DirectoryPtr& dir, int32_t numDocs) {
50 IndexReaderPtr reader = IndexReader::open(dir, true);
51 EXPECT_EQ(reader->maxDoc(), numDocs);
52 EXPECT_EQ(reader->numDocs(), numDocs);
53 reader->close();
54 }
55
verifyTermDocs(const DirectoryPtr & dir,const TermPtr & term,int32_t numDocs)56 static void verifyTermDocs(const DirectoryPtr& dir, const TermPtr& term, int32_t numDocs) {
57 IndexReaderPtr reader = IndexReader::open(dir, true);
58 TermDocsPtr termDocs = reader->termDocs(term);
59 int32_t count = 0;
60 while (termDocs->next()) {
61 ++count;
62 }
63 EXPECT_EQ(count, numDocs);
64 reader->close();
65 }
66
setUpDirs(const DirectoryPtr & dir,const DirectoryPtr & aux)67 static void setUpDirs(const DirectoryPtr& dir, const DirectoryPtr& aux) {
68 IndexWriterPtr writer;
69
70 writer = newWriter(dir, true);
71 writer->setMaxBufferedDocs(1000);
72 // add 1000 documents in 1 segment
73 addDocs(writer, 1000);
74 EXPECT_EQ(1000, writer->maxDoc());
75 EXPECT_EQ(1, writer->getSegmentCount());
76 writer->close();
77
78 writer = newWriter(aux, true);
79 writer->setUseCompoundFile(false); // use one without a compound file
80 writer->setMaxBufferedDocs(100);
81 writer->setMergeFactor(10);
82 // add 30 documents in 3 segments
83 for (int32_t i = 0; i < 3; ++i) {
84 addDocs(writer, 10);
85 writer->close();
86 writer = newWriter(aux, false);
87 writer->setUseCompoundFile(false); // use one without a compound file
88 writer->setMaxBufferedDocs(100);
89 writer->setMergeFactor(10);
90 }
91 EXPECT_EQ(30, writer->maxDoc());
92 EXPECT_EQ(3, writer->getSegmentCount());
93 writer->close();
94 }
95
TEST_F(AddIndexesNoOptimizeTest,testSimpleCase)96 TEST_F(AddIndexesNoOptimizeTest, testSimpleCase) {
97 // main directory
98 DirectoryPtr dir = newLucene<RAMDirectory>();
99 // two auxiliary directories
100 DirectoryPtr aux = newLucene<RAMDirectory>();
101 DirectoryPtr aux2 = newLucene<RAMDirectory>();
102
103 IndexWriterPtr writer = newWriter(dir, true);
104 // add 100 documents
105 addDocs(writer, 100);
106 EXPECT_EQ(writer->maxDoc(), 100);
107 writer->close();
108
109 writer = newWriter(aux, true);
110 writer->setUseCompoundFile(false); // use one without a compound file
111 // add 40 documents in separate files
112 addDocs(writer, 40);
113 EXPECT_EQ(writer->maxDoc(), 40);
114 writer->close();
115
116 writer = newWriter(aux2, true);
117 // add 40 documents in compound files
118 addDocs2(writer, 50);
119 EXPECT_EQ(writer->maxDoc(), 50);
120 writer->close();
121
122 // test doc count before segments are merged
123 writer = newWriter(dir, false);
124 EXPECT_EQ(writer->maxDoc(), 100);
125 writer->addIndexesNoOptimize(newCollection<DirectoryPtr>(aux, aux2));
126 EXPECT_EQ(writer->maxDoc(), 190);
127 writer->close();
128
129 // make sure the old index is correct
130 verifyNumDocs(aux, 40);
131
132 // make sure the new index is correct
133 verifyNumDocs(dir, 190);
134
135 // now add another set in.
136 DirectoryPtr aux3 = newLucene<RAMDirectory>();
137 writer = newWriter(aux3, true);
138 // add 40 documents
139 addDocs(writer, 40);
140 EXPECT_EQ(writer->maxDoc(), 40);
141 writer->close();
142
143 // test doc count before segments are merged/index is optimized
144 writer = newWriter(dir, false);
145 EXPECT_EQ(writer->maxDoc(), 190);
146 writer->addIndexesNoOptimize(newCollection<DirectoryPtr>(aux3));
147 EXPECT_EQ(writer->maxDoc(), 230);
148 writer->close();
149
150 // make sure the new index is correct
151 verifyNumDocs(dir, 230);
152
153 verifyTermDocs(dir, newLucene<Term>(L"content", L"aaa"), 180);
154
155 verifyTermDocs(dir, newLucene<Term>(L"content", L"bbb"), 50);
156
157 // now optimize it.
158 writer = newWriter(dir, false);
159 writer->optimize();
160 writer->close();
161
162 // make sure the new index is correct
163 verifyNumDocs(dir, 230);
164
165 verifyTermDocs(dir, newLucene<Term>(L"content", L"aaa"), 180);
166
167 verifyTermDocs(dir, newLucene<Term>(L"content", L"bbb"), 50);
168
169 // now add a single document
170 DirectoryPtr aux4 = newLucene<RAMDirectory>();
171 writer = newWriter(aux4, true);
172 addDocs2(writer, 1);
173 writer->close();
174
175 writer = newWriter(dir, false);
176 EXPECT_EQ(writer->maxDoc(), 230);
177 writer->addIndexesNoOptimize(newCollection<DirectoryPtr>(aux4));
178 EXPECT_EQ(writer->maxDoc(), 231);
179 writer->close();
180
181 verifyNumDocs(dir, 231);
182
183 verifyTermDocs(dir, newLucene<Term>(L"content", L"bbb"), 51);
184 }
185
TEST_F(AddIndexesNoOptimizeTest,testWithPendingDeletes)186 TEST_F(AddIndexesNoOptimizeTest, testWithPendingDeletes) {
187 // main directory
188 DirectoryPtr dir = newLucene<RAMDirectory>();
189 // auxiliary directory
190 DirectoryPtr aux = newLucene<RAMDirectory>();
191
192 setUpDirs(dir, aux);
193 IndexWriterPtr writer = newWriter(dir, false);
194 writer->addIndexesNoOptimize(newCollection<DirectoryPtr>(aux));
195
196 // Adds 10 docs, then replaces them with another 10 docs, so 10 pending deletes
197 for (int32_t i = 0; i < 20; ++i) {
198 DocumentPtr doc = newLucene<Document>();
199 doc->add(newLucene<Field>(L"id", StringUtils::toString(i % 10), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
200 doc->add(newLucene<Field>(L"content", L"bbb " + StringUtils::toString(i), Field::STORE_NO, Field::INDEX_ANALYZED));
201 writer->updateDocument(newLucene<Term>(L"id", StringUtils::toString(i % 10)), doc);
202 }
203
204 // Deletes one of the 10 added docs, leaving 9
205 PhraseQueryPtr q = newLucene<PhraseQuery>();
206 q->add(newLucene<Term>(L"content", L"bbb"));
207 q->add(newLucene<Term>(L"content", L"14"));
208 writer->deleteDocuments(q);
209
210 writer->optimize();
211 writer->commit();
212
213 verifyNumDocs(dir, 1039);
214 verifyTermDocs(dir, newLucene<Term>(L"content", L"aaa"), 1030);
215 verifyTermDocs(dir, newLucene<Term>(L"content", L"bbb"), 9);
216
217 writer->close();
218 dir->close();
219 aux->close();
220 }
221
TEST_F(AddIndexesNoOptimizeTest,testWithPendingDeletes2)222 TEST_F(AddIndexesNoOptimizeTest, testWithPendingDeletes2) {
223 // main directory
224 DirectoryPtr dir = newLucene<RAMDirectory>();
225 // auxiliary directory
226 DirectoryPtr aux = newLucene<RAMDirectory>();
227
228 setUpDirs(dir, aux);
229 IndexWriterPtr writer = newWriter(dir, false);
230
231 // Adds 10 docs, then replaces them with another 10 docs, so 10 pending deletes
232 for (int32_t i = 0; i < 20; ++i) {
233 DocumentPtr doc = newLucene<Document>();
234 doc->add(newLucene<Field>(L"id", StringUtils::toString(i % 10), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
235 doc->add(newLucene<Field>(L"content", L"bbb " + StringUtils::toString(i), Field::STORE_NO, Field::INDEX_ANALYZED));
236 writer->updateDocument(newLucene<Term>(L"id", StringUtils::toString(i % 10)), doc);
237 }
238
239 writer->addIndexesNoOptimize(newCollection<DirectoryPtr>(aux));
240
241 // Deletes one of the 10 added docs, leaving 9
242 PhraseQueryPtr q = newLucene<PhraseQuery>();
243 q->add(newLucene<Term>(L"content", L"bbb"));
244 q->add(newLucene<Term>(L"content", L"14"));
245 writer->deleteDocuments(q);
246
247 writer->optimize();
248 writer->commit();
249
250 verifyNumDocs(dir, 1039);
251 verifyTermDocs(dir, newLucene<Term>(L"content", L"aaa"), 1030);
252 verifyTermDocs(dir, newLucene<Term>(L"content", L"bbb"), 9);
253
254 writer->close();
255 dir->close();
256 aux->close();
257 }
258
TEST_F(AddIndexesNoOptimizeTest,testWithPendingDeletes3)259 TEST_F(AddIndexesNoOptimizeTest, testWithPendingDeletes3) {
260 // main directory
261 DirectoryPtr dir = newLucene<RAMDirectory>();
262 // auxiliary directory
263 DirectoryPtr aux = newLucene<RAMDirectory>();
264
265 setUpDirs(dir, aux);
266 IndexWriterPtr writer = newWriter(dir, false);
267
268 // Adds 10 docs, then replaces them with another 10 docs, so 10 pending deletes
269 for (int32_t i = 0; i < 20; ++i) {
270 DocumentPtr doc = newLucene<Document>();
271 doc->add(newLucene<Field>(L"id", StringUtils::toString(i % 10), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
272 doc->add(newLucene<Field>(L"content", L"bbb " + StringUtils::toString(i), Field::STORE_NO, Field::INDEX_ANALYZED));
273 writer->updateDocument(newLucene<Term>(L"id", StringUtils::toString(i % 10)), doc);
274 }
275
276 // Deletes one of the 10 added docs, leaving 9
277 PhraseQueryPtr q = newLucene<PhraseQuery>();
278 q->add(newLucene<Term>(L"content", L"bbb"));
279 q->add(newLucene<Term>(L"content", L"14"));
280 writer->deleteDocuments(q);
281
282 writer->addIndexesNoOptimize(newCollection<DirectoryPtr>(aux));
283
284 writer->optimize();
285 writer->commit();
286
287 verifyNumDocs(dir, 1039);
288 verifyTermDocs(dir, newLucene<Term>(L"content", L"aaa"), 1030);
289 verifyTermDocs(dir, newLucene<Term>(L"content", L"bbb"), 9);
290
291 writer->close();
292 dir->close();
293 aux->close();
294 }
295
TEST_F(AddIndexesNoOptimizeTest,testAddSelf)296 TEST_F(AddIndexesNoOptimizeTest, testAddSelf) {
297 // main directory
298 DirectoryPtr dir = newLucene<RAMDirectory>();
299 // auxiliary directory
300 DirectoryPtr aux = newLucene<RAMDirectory>();
301
302 IndexWriterPtr writer = newWriter(dir, true);
303 // add 100 documents
304 addDocs(writer, 100);
305 EXPECT_EQ(100, writer->maxDoc());
306 writer->close();
307
308 writer = newWriter(aux, true);
309 writer->setUseCompoundFile(false); // use one without a compound file
310 writer->setMaxBufferedDocs(1000);
311 // add 140 documents in separate files
312 addDocs(writer, 40);
313 writer->close();
314 writer = newWriter(aux, true);
315 writer->setUseCompoundFile(false); // use one without a compound file
316 writer->setMaxBufferedDocs(1000);
317 addDocs(writer, 100);
318 writer->close();
319
320 writer = newWriter(dir, false);
321 try {
322 writer->addIndexesNoOptimize(newCollection<DirectoryPtr>(aux, dir));
323 } catch (LuceneException& e) {
324 EXPECT_TRUE(check_exception(LuceneException::IllegalArgument)(e)); // cannot add self
325 }
326 EXPECT_EQ(100, writer->maxDoc());
327
328 writer->close();
329
330 // make sure the index is correct
331 verifyNumDocs(dir, 100);
332
333 dir->close();
334 aux->close();
335 }
336
TEST_F(AddIndexesNoOptimizeTest,testNoTailSegments)337 TEST_F(AddIndexesNoOptimizeTest, testNoTailSegments) {
338 // main directory
339 DirectoryPtr dir = newLucene<RAMDirectory>();
340 // auxiliary directory
341 DirectoryPtr aux = newLucene<RAMDirectory>();
342
343 setUpDirs(dir, aux);
344
345 IndexWriterPtr writer = newWriter(dir, false);
346 writer->setMaxBufferedDocs(10);
347 writer->setMergeFactor(4);
348 addDocs(writer, 10);
349
350 writer->addIndexesNoOptimize(newCollection<DirectoryPtr>(aux));
351
352 EXPECT_EQ(1040, writer->maxDoc());
353 EXPECT_EQ(2, writer->getSegmentCount());
354 EXPECT_EQ(1000, writer->getDocCount(0));
355 writer->close();
356
357 // make sure the index is correct
358 verifyNumDocs(dir, 1040);
359
360 dir->close();
361 aux->close();
362 }
363
TEST_F(AddIndexesNoOptimizeTest,testNoCopySegments)364 TEST_F(AddIndexesNoOptimizeTest, testNoCopySegments) {
365 // main directory
366 DirectoryPtr dir = newLucene<RAMDirectory>();
367 // auxiliary directory
368 DirectoryPtr aux = newLucene<RAMDirectory>();
369
370 setUpDirs(dir, aux);
371
372 IndexWriterPtr writer = newWriter(dir, false);
373 writer->setMaxBufferedDocs(9);
374 writer->setMergeFactor(4);
375 addDocs(writer, 2);
376
377 writer->addIndexesNoOptimize(newCollection<DirectoryPtr>(aux));
378
379 EXPECT_EQ(1032, writer->maxDoc());
380 EXPECT_EQ(2, writer->getSegmentCount());
381 EXPECT_EQ(1000, writer->getDocCount(0));
382 writer->close();
383
384 // make sure the index is correct
385 verifyNumDocs(dir, 1032);
386
387 dir->close();
388 aux->close();
389 }
390
TEST_F(AddIndexesNoOptimizeTest,testNoMergeAfterCopy)391 TEST_F(AddIndexesNoOptimizeTest, testNoMergeAfterCopy) {
392 // main directory
393 DirectoryPtr dir = newLucene<RAMDirectory>();
394 // auxiliary directory
395 DirectoryPtr aux = newLucene<RAMDirectory>();
396
397 setUpDirs(dir, aux);
398
399 IndexWriterPtr writer = newWriter(dir, false);
400 writer->setMaxBufferedDocs(10);
401 writer->setMergeFactor(4);
402
403 writer->addIndexesNoOptimize(newCollection<DirectoryPtr>(aux, newLucene<RAMDirectory>(aux)));
404
405 EXPECT_EQ(1060, writer->maxDoc());
406 EXPECT_EQ(1000, writer->getDocCount(0));
407 writer->close();
408
409 // make sure the index is correct
410 verifyNumDocs(dir, 1060);
411
412 dir->close();
413 aux->close();
414 }
415
TEST_F(AddIndexesNoOptimizeTest,testMergeAfterCopy)416 TEST_F(AddIndexesNoOptimizeTest, testMergeAfterCopy) {
417 // main directory
418 DirectoryPtr dir = newLucene<RAMDirectory>();
419 // auxiliary directory
420 DirectoryPtr aux = newLucene<RAMDirectory>();
421
422 setUpDirs(dir, aux);
423
424 IndexReaderPtr reader = IndexReader::open(aux, false);
425 for (int32_t i = 0; i < 20; ++i) {
426 reader->deleteDocument(i);
427 }
428 EXPECT_EQ(10, reader->numDocs());
429 reader->close();
430
431 IndexWriterPtr writer = newWriter(dir, false);
432 writer->setMaxBufferedDocs(4);
433 writer->setMergeFactor(4);
434
435 writer->addIndexesNoOptimize(newCollection<DirectoryPtr>(aux, newLucene<RAMDirectory>(aux)));
436
437 EXPECT_EQ(1020, writer->maxDoc());
438 EXPECT_EQ(1000, writer->getDocCount(0));
439 writer->close();
440
441 // make sure the index is correct
442 verifyNumDocs(dir, 1020);
443
444 dir->close();
445 aux->close();
446 }
447
TEST_F(AddIndexesNoOptimizeTest,testMoreMerges)448 TEST_F(AddIndexesNoOptimizeTest, testMoreMerges) {
449 // main directory
450 DirectoryPtr dir = newLucene<RAMDirectory>();
451 // auxiliary directory
452 DirectoryPtr aux = newLucene<RAMDirectory>();
453 DirectoryPtr aux2 = newLucene<RAMDirectory>();
454
455 setUpDirs(dir, aux);
456
457 IndexWriterPtr writer = newWriter(aux2, true);
458 writer->setMaxBufferedDocs(100);
459 writer->setMergeFactor(10);
460
461 writer->addIndexesNoOptimize(newCollection<DirectoryPtr>(aux));
462
463 EXPECT_EQ(30, writer->maxDoc());
464 EXPECT_EQ(3, writer->getSegmentCount());
465 writer->close();
466
467 IndexReaderPtr reader = IndexReader::open(aux, false);
468 for (int32_t i = 0; i < 27; ++i) {
469 reader->deleteDocument(i);
470 }
471 EXPECT_EQ(3, reader->numDocs());
472 reader->close();
473
474 reader = IndexReader::open(aux2, false);
475 for (int32_t i = 0; i < 8; ++i) {
476 reader->deleteDocument(i);
477 }
478 EXPECT_EQ(22, reader->numDocs());
479 reader->close();
480
481 writer = newWriter(dir, false);
482 writer->setMaxBufferedDocs(6);
483 writer->setMergeFactor(4);
484
485 writer->addIndexesNoOptimize(newCollection<DirectoryPtr>(aux, aux2));
486
487 EXPECT_EQ(1025, writer->maxDoc());
488 EXPECT_EQ(1000, writer->getDocCount(0));
489 writer->close();
490
491 // make sure the index is correct
492 verifyNumDocs(dir, 1025);
493
494 dir->close();
495 aux->close();
496 }
497
TEST_F(AddIndexesNoOptimizeTest,testHangOnClose)498 TEST_F(AddIndexesNoOptimizeTest, testHangOnClose) {
499 DirectoryPtr dir = newLucene<MockRAMDirectory>();
500 IndexWriterPtr writer = newLucene<IndexWriter>(dir, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
501 writer->setMergePolicy(newLucene<LogByteSizeMergePolicy>(writer));
502 writer->setMaxBufferedDocs(5);
503 writer->setUseCompoundFile(false);
504 writer->setMergeFactor(100);
505
506 DocumentPtr doc = newLucene<Document>();
507 doc->add(newLucene<Field>(L"content", L"aaa bbb ccc ddd eee fff ggg hhh iii", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS));
508
509 for (int32_t i = 0; i < 60; ++i) {
510 writer->addDocument(doc);
511 }
512 writer->setMaxBufferedDocs(200);
513 DocumentPtr doc2 = newLucene<Document>();
514
515 doc2->add(newLucene<Field>(L"content", L"aaa bbb ccc ddd eee fff ggg hhh iii", Field::STORE_YES, Field::INDEX_NO));
516 doc2->add(newLucene<Field>(L"content", L"aaa bbb ccc ddd eee fff ggg hhh iii", Field::STORE_YES, Field::INDEX_NO));
517 doc2->add(newLucene<Field>(L"content", L"aaa bbb ccc ddd eee fff ggg hhh iii", Field::STORE_YES, Field::INDEX_NO));
518 doc2->add(newLucene<Field>(L"content", L"aaa bbb ccc ddd eee fff ggg hhh iii", Field::STORE_YES, Field::INDEX_NO));
519
520 for (int32_t i = 0; i < 60; ++i) {
521 writer->addDocument(doc2);
522 }
523 writer->close();
524
525 DirectoryPtr dir2 = newLucene<MockRAMDirectory>();
526 writer = newLucene<IndexWriter>(dir2, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
527 LogByteSizeMergePolicyPtr lmp = newLucene<LogByteSizeMergePolicy>(writer);
528 lmp->setMinMergeMB(0.0001);
529 writer->setMergePolicy(lmp);
530 writer->setMergeFactor(4);
531 writer->setUseCompoundFile(false);
532 writer->setMergeScheduler(newLucene<SerialMergeScheduler>());
533 writer->addIndexesNoOptimize(newCollection<DirectoryPtr>(dir));
534 writer->close();
535 dir->close();
536 dir2->close();
537 }
538
TEST_F(AddIndexesNoOptimizeTest,testTargetCFS)539 TEST_F(AddIndexesNoOptimizeTest, testTargetCFS) {
540 // make sure CFS of destination indexwriter is respected when copying tail segments
541 DirectoryPtr dir = newLucene<RAMDirectory>();
542 IndexWriterPtr writer = newWriter(dir, true);
543 writer->setUseCompoundFile(false);
544 addDocs(writer, 1);
545 writer->close();
546
547 DirectoryPtr other = newLucene<RAMDirectory>();
548 writer = newWriter(other, true);
549 writer->setUseCompoundFile(true);
550 writer->addIndexesNoOptimize(newCollection<DirectoryPtr>(dir));
551 EXPECT_TRUE(writer->newestSegment()->getUseCompoundFile());
552 writer->close();
553 }
554