1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6
7 #include "TestInc.h"
8 #include "LuceneTestFixture.h"
9 #include "TestUtils.h"
10 #include "SimpleFSDirectory.h"
11 #include "_SimpleFSDirectory.h"
12 #include "IndexOutput.h"
13 #include "IndexInput.h"
14 #include "CompoundFileWriter.h"
15 #include "CompoundFileReader.h"
16 #include "Random.h"
17 #include "MiscUtils.h"
18 #include "FileUtils.h"
19
20 using namespace Lucene;
21
22 class CompoundFileTest : public LuceneTestFixture {
23 public:
CompoundFileTest()24 CompoundFileTest() {
25 indexDir = FileUtils::joinPath(getTempDir(), L"testIndex");
26 FileUtils::removeDirectory(indexDir);
27
28 // use a simple FSDir here, to be sure to have SimpleFSInputs
29 dir = newLucene<SimpleFSDirectory>(indexDir);
30 }
31
~CompoundFileTest()32 virtual ~CompoundFileTest() {
33 dir->close();
34 FileUtils::removeDirectory(indexDir);
35 }
36
37 protected:
38 String indexDir;
39 DirectoryPtr dir;
40
41 public:
42 /// Creates a file of the specified size with random data.
createRandomFile(const DirectoryPtr & dir,const String & name,int32_t size)43 void createRandomFile(const DirectoryPtr& dir, const String& name, int32_t size) {
44 IndexOutputPtr os = dir->createOutput(name);
45 RandomPtr r = newLucene<Random>();
46 for (int32_t i = 0; i < size; ++i) {
47 os->writeByte((uint8_t)r->nextInt(256));
48 }
49 os->close();
50 }
51
createSequenceFile(const DirectoryPtr & dir,const String & name,uint8_t start,int32_t size)52 void createSequenceFile(const DirectoryPtr& dir, const String& name, uint8_t start, int32_t size) {
53 IndexOutputPtr os = dir->createOutput(name);
54 for (int32_t i = 0; i < size; ++i) {
55 os->writeByte(start);
56 ++start;
57 }
58 os->close();
59 }
60
checkSameStreams(const IndexInputPtr & expected,const IndexInputPtr & test)61 void checkSameStreams(const IndexInputPtr& expected, const IndexInputPtr& test) {
62 EXPECT_TRUE(expected);
63 EXPECT_TRUE(test);
64 EXPECT_EQ(expected->length(), test->length());
65 EXPECT_EQ(expected->getFilePointer(), test->getFilePointer());
66
67 ByteArray expectedBuffer(ByteArray::newInstance(512));
68 ByteArray testBuffer(ByteArray::newInstance(expectedBuffer.size()));
69
70 int64_t remainder = expected->length() - expected->getFilePointer();
71 while (remainder > 0) {
72 int32_t readLen = std::min((int32_t)remainder, expectedBuffer.size());
73 expected->readBytes(expectedBuffer.get(), 0, readLen);
74 test->readBytes(testBuffer.get(), 0, readLen);
75 checkEqualArrays(expectedBuffer, testBuffer, 0, readLen);
76 remainder -= readLen;
77 }
78 }
79
checkSameStreams(const IndexInputPtr & expected,const IndexInputPtr & actual,int64_t seekTo)80 void checkSameStreams(const IndexInputPtr& expected, const IndexInputPtr& actual, int64_t seekTo) {
81 if (seekTo >= 0 && seekTo < (int64_t)expected->length()) {
82 expected->seek(seekTo);
83 actual->seek(seekTo);
84 checkSameStreams(expected, actual);
85 }
86 }
87
checkSameSeekBehavior(const IndexInputPtr & expected,const IndexInputPtr & actual)88 void checkSameSeekBehavior(const IndexInputPtr& expected, const IndexInputPtr& actual) {
89 // seek to 0
90 int64_t point = 0;
91 checkSameStreams(expected, actual, point);
92
93 // seek to middle
94 point = expected->length() / 2l;
95 checkSameStreams(expected, actual, point);
96
97 // seek to end - 2
98 point = expected->length() - 2;
99 checkSameStreams(expected, actual, point);
100
101 // seek to end - 1
102 point = expected->length() - 1;
103 checkSameStreams(expected, actual, point);
104
105 // seek to the end
106 point = expected->length();
107 checkSameStreams(expected, actual, point);
108
109 // seek past end
110 point = expected->length() + 1;
111 checkSameStreams(expected, actual, point);
112 }
113
checkEqualArrays(ByteArray expected,ByteArray test,int32_t start,int32_t length)114 void checkEqualArrays(ByteArray expected, ByteArray test, int32_t start, int32_t length) {
115 EXPECT_TRUE(expected);
116 EXPECT_TRUE(test);
117 for (int32_t i = start; i < length; ++i) {
118 EXPECT_EQ(expected[i], test[i]);
119 }
120 }
121
122 /// Setup a larger compound file with a number of components, each of which is a sequential file (so that we can
123 /// easily tell that we are reading in the right byte). The methods sets up 20 files - f0 to f19, the size of each
124 /// file is 1000 bytes.
setUpLarger()125 void setUpLarger() {
126 CompoundFileWriterPtr cw = newLucene<CompoundFileWriter>(dir, L"f.comp");
127 for (int32_t i = 0; i < 20; ++i) {
128 createSequenceFile(dir, L"f" + StringUtils::toString(i), 0, 2000);
129 cw->addFile(L"f" + StringUtils::toString(i));
130 }
131 cw->close();
132 }
133
isCSIndexInputOpen(const IndexInputPtr & is)134 bool isCSIndexInputOpen(const IndexInputPtr& is) {
135 if (MiscUtils::typeOf<CSIndexInput>(is)) {
136 CSIndexInputPtr cis = boost::dynamic_pointer_cast<CSIndexInput>(is);
137 return isSimpleFSIndexInputOpen(cis->base);
138 } else {
139 return false;
140 }
141 }
142
isSimpleFSIndexInputOpen(const IndexInputPtr & is)143 bool isSimpleFSIndexInputOpen(const IndexInputPtr& is) {
144 if (MiscUtils::typeOf<SimpleFSIndexInput>(is)) {
145 SimpleFSIndexInputPtr fis = boost::dynamic_pointer_cast<SimpleFSIndexInput>(is);
146 return fis->isValid();
147 } else {
148 return false;
149 }
150 }
151 };
152
153 /// This test creates compound file based on a single file. Files of different sizes are tested: 0, 1, 10, 100 bytes.
TEST_F(CompoundFileTest,testSingleFile)154 TEST_F(CompoundFileTest, testSingleFile) {
155 IntArray data(IntArray::newInstance(4));
156 data[0] = 0;
157 data[1] = 1;
158 data[2] = 10;
159 data[3] = 100;
160 for (int32_t i = 0; i < data.size(); ++i) {
161 String name = L"t" + StringUtils::toString(data[i]);
162 createSequenceFile(dir, name, 0, data[i]);
163 CompoundFileWriterPtr csw = newLucene<CompoundFileWriter>(dir, name + L".cfs");
164 csw->addFile(name);
165 csw->close();
166
167 CompoundFileReaderPtr csr = newLucene<CompoundFileReader>(dir, name + L".cfs");
168 IndexInputPtr expected = dir->openInput(name);
169 IndexInputPtr actual = csr->openInput(name);
170 checkSameStreams(expected, actual);
171 checkSameSeekBehavior(expected, actual);
172 expected->close();
173 actual->close();
174 csr->close();
175 }
176 }
177
178 /// This test creates compound file based on two files.
TEST_F(CompoundFileTest,testTwoFiles)179 TEST_F(CompoundFileTest, testTwoFiles) {
180 createSequenceFile(dir, L"d1", 0, 15);
181 createSequenceFile(dir, L"d2", 0, 114);
182
183 CompoundFileWriterPtr csw = newLucene<CompoundFileWriter>(dir, L"d.csf");
184 csw->addFile(L"d1");
185 csw->addFile(L"d2");
186 csw->close();
187
188 CompoundFileReaderPtr csr = newLucene<CompoundFileReader>(dir, L"d.csf");
189 IndexInputPtr expected = dir->openInput(L"d1");
190 IndexInputPtr actual = csr->openInput(L"d1");
191 checkSameStreams(expected, actual);
192 checkSameSeekBehavior(expected, actual);
193 expected->close();
194 actual->close();
195
196 expected = dir->openInput(L"d2");
197 actual = csr->openInput(L"d2");
198 checkSameStreams(expected, actual);
199 checkSameSeekBehavior(expected, actual);
200 expected->close();
201 actual->close();
202 csr->close();
203 }
204
205 /// This test creates a compound file based on a large number of files of various length. The file content is generated randomly.
206 /// The sizes range from 0 to 1Mb. Some of the sizes are selected to test the buffering logic in the file reading code.
207 /// For this the chunk variable is set to the length of the buffer used internally by the compound file logic.
TEST_F(CompoundFileTest,testRandomFiles)208 TEST_F(CompoundFileTest, testRandomFiles) {
209 // Setup the test segment
210 String segment = L"test";
211 int32_t chunk = 1024; // internal buffer size used by the stream
212 createRandomFile(dir, segment + L".zero", 0);
213 createRandomFile(dir, segment + L".one", 1);
214 createRandomFile(dir, segment + L".ten", 10);
215 createRandomFile(dir, segment + L".hundred", 100);
216 createRandomFile(dir, segment + L".big1", chunk);
217 createRandomFile(dir, segment + L".big2", chunk - 1);
218 createRandomFile(dir, segment + L".big3", chunk + 1);
219 createRandomFile(dir, segment + L".big4", 3 * chunk);
220 createRandomFile(dir, segment + L".big5", 3 * chunk - 1);
221 createRandomFile(dir, segment + L".big6", 3 * chunk + 1);
222 createRandomFile(dir, segment + L".big7", 1000 * chunk);
223
224 // Setup extraneous files
225 createRandomFile(dir, L"onetwothree", 100);
226 createRandomFile(dir, segment + L".notIn", 50);
227 createRandomFile(dir, segment + L".notIn2", 51);
228
229 // Now test
230 CompoundFileWriterPtr csw = newLucene<CompoundFileWriter>(dir, L"test.cfs");
231
232 Collection<String> data(Collection<String>::newInstance());
233 data.add(L".zero");
234 data.add(L".one");
235 data.add(L".ten");
236 data.add(L".hundred");
237 data.add(L".big1");
238 data.add(L".big2");
239 data.add(L".big3");
240 data.add(L".big4");
241 data.add(L".big5");
242 data.add(L".big6");
243 data.add(L".big7");
244
245 for (Collection<String>::iterator name = data.begin(); name != data.end(); ++name) {
246 csw->addFile(segment + *name);
247 }
248 csw->close();
249
250 CompoundFileReaderPtr csr = newLucene<CompoundFileReader>(dir, L"test.cfs");
251 for (Collection<String>::iterator name = data.begin(); name != data.end(); ++name) {
252 IndexInputPtr check = dir->openInput(segment + *name);
253 IndexInputPtr test = csr->openInput(segment + *name);
254 checkSameStreams(check, test);
255 checkSameSeekBehavior(check, test);
256 test->close();
257 check->close();
258 }
259 csr->close();
260 }
261
TEST_F(CompoundFileTest,testReadAfterClose)262 TEST_F(CompoundFileTest, testReadAfterClose) {
263 // Setup the test file - we need more than 1024 bytes
264 IndexOutputPtr os = dir->createOutput(L"test");
265 for (int32_t i = 0; i < 2000; ++i) {
266 os->writeByte((uint8_t)i);
267 }
268 os->close();
269
270 IndexInputPtr in = dir->openInput(L"test");
271
272 // This read primes the buffer in IndexInput
273 uint8_t b = in->readByte();
274
275 // Close the file
276 in->close();
277
278 // ERROR: this call should fail, but succeeds because the buffer is still filled
279 b = in->readByte();
280
281 // ERROR: this call should fail, but succeeds for some reason as well
282 in->seek(1099);
283
284 try {
285 in->readByte();
286 } catch (LuceneException& e) {
287 EXPECT_TRUE(check_exception(LuceneException::IO)(e));
288 }
289 }
290
TEST_F(CompoundFileTest,testClonedStreamsClosing)291 TEST_F(CompoundFileTest, testClonedStreamsClosing) {
292 setUpLarger();
293
294 CompoundFileReaderPtr cr = newLucene<CompoundFileReader>(dir, L"f.comp");
295
296 // basic clone
297 IndexInputPtr expected = dir->openInput(L"f11");
298
299 // this test only works for FSIndexInput
300 EXPECT_TRUE(MiscUtils::typeOf<SimpleFSIndexInput>(expected));
301 EXPECT_TRUE(isSimpleFSIndexInputOpen(expected));
302
303 IndexInputPtr one = cr->openInput(L"f11");
304 EXPECT_TRUE(isCSIndexInputOpen(one));
305
306 IndexInputPtr two = boost::dynamic_pointer_cast<IndexInput>(one->clone());
307 EXPECT_TRUE(isCSIndexInputOpen(two));
308
309 checkSameStreams(expected, one);
310 expected->seek(0);
311 checkSameStreams(expected, two);
312
313 // Now close the first stream
314 one->close();
315 EXPECT_TRUE(isCSIndexInputOpen(one)); // Only close when cr is closed
316
317 // The following should really fail since we couldn't expect to access a file once close has been called
318 // on it (regardless of buffering and/or clone magic)
319 expected->seek(0);
320 two->seek(0);
321 checkSameStreams(expected, two); // basic clone two/2
322
323 // Now close the compound reader
324 cr->close();
325 EXPECT_TRUE(!isCSIndexInputOpen(one));
326 EXPECT_TRUE(!isCSIndexInputOpen(two));
327
328 // The following may also fail since the compound stream is closed
329 expected->seek(0);
330 two->seek(0);
331
332 // Now close the second clone
333 two->close();
334 expected->seek(0);
335 two->seek(0);
336
337 expected->close();
338 }
339
340 /// This test opens two files from a compound stream and verifies that their file positions are independent of each other.
TEST_F(CompoundFileTest,testRandomAccess)341 TEST_F(CompoundFileTest, testRandomAccess) {
342 setUpLarger();
343
344 CompoundFileReaderPtr cr = newLucene<CompoundFileReader>(dir, L"f.comp");
345
346 // Open two files
347 IndexInputPtr e1 = dir->openInput(L"f11");
348 IndexInputPtr e2 = dir->openInput(L"f3");
349
350 IndexInputPtr a1 = cr->openInput(L"f11");
351 IndexInputPtr a2 = dir->openInput(L"f3");
352
353 // Seek the first pair
354 e1->seek(100);
355 a1->seek(100);
356 EXPECT_EQ(100, e1->getFilePointer());
357 EXPECT_EQ(100, a1->getFilePointer());
358 uint8_t be1 = e1->readByte();
359 uint8_t ba1 = a1->readByte();
360 EXPECT_EQ(be1, ba1);
361
362 // Now seek the second pair
363 e2->seek(1027);
364 a2->seek(1027);
365 EXPECT_EQ(1027, e2->getFilePointer());
366 EXPECT_EQ(1027, a2->getFilePointer());
367 uint8_t be2 = e2->readByte();
368 uint8_t ba2 = a2->readByte();
369 EXPECT_EQ(be2, ba2);
370
371 // Now make sure the first one didn't move
372 EXPECT_EQ(101, e1->getFilePointer());
373 EXPECT_EQ(101, a1->getFilePointer());
374 be1 = e1->readByte();
375 ba1 = a1->readByte();
376 EXPECT_EQ(be1, ba1);
377
378 // Now more the first one again, past the buffer length
379 e1->seek(1910);
380 a1->seek(1910);
381 EXPECT_EQ(1910, e1->getFilePointer());
382 EXPECT_EQ(1910, a1->getFilePointer());
383 be1 = e1->readByte();
384 ba1 = a1->readByte();
385 EXPECT_EQ(be1, ba1);
386
387 // Now make sure the second set didn't move
388 EXPECT_EQ(1028, e2->getFilePointer());
389 EXPECT_EQ(1028, a2->getFilePointer());
390 be2 = e2->readByte();
391 ba2 = a2->readByte();
392 EXPECT_EQ(be2, ba2);
393
394 // Move the second set back, again cross the buffer size
395 e2->seek(17);
396 a2->seek(17);
397 EXPECT_EQ(17, e2->getFilePointer());
398 EXPECT_EQ(17, a2->getFilePointer());
399 be2 = e2->readByte();
400 ba2 = a2->readByte();
401 EXPECT_EQ(be2, ba2);
402
403 // Finally, make sure the first set didn't move
404 // Now make sure the first one didn't move
405 EXPECT_EQ(1911, e1->getFilePointer());
406 EXPECT_EQ(1911, a1->getFilePointer());
407 be1 = e1->readByte();
408 ba1 = a1->readByte();
409 EXPECT_EQ(be1, ba1);
410
411 e1->close();
412 e2->close();
413 a1->close();
414 a2->close();
415 cr->close();
416 }
417
418 /// This test opens two files from a compound stream and verifies that their file positions are independent of each other.
TEST_F(CompoundFileTest,testRandomAccessClones)419 TEST_F(CompoundFileTest, testRandomAccessClones) {
420 setUpLarger();
421
422 CompoundFileReaderPtr cr = newLucene<CompoundFileReader>(dir, L"f.comp");
423
424 // Open two files
425 IndexInputPtr e1 = cr->openInput(L"f11");
426 IndexInputPtr e2 = cr->openInput(L"f3");
427
428 IndexInputPtr a1 = boost::dynamic_pointer_cast<IndexInput>(e1->clone());
429 IndexInputPtr a2 = boost::dynamic_pointer_cast<IndexInput>(e2->clone());
430
431 // Seek the first pair
432 e1->seek(100);
433 a1->seek(100);
434 EXPECT_EQ(100, e1->getFilePointer());
435 EXPECT_EQ(100, a1->getFilePointer());
436 uint8_t be1 = e1->readByte();
437 uint8_t ba1 = a1->readByte();
438 EXPECT_EQ(be1, ba1);
439
440 // Now seek the second pair
441 e2->seek(1027);
442 a2->seek(1027);
443 EXPECT_EQ(1027, e2->getFilePointer());
444 EXPECT_EQ(1027, a2->getFilePointer());
445 uint8_t be2 = e2->readByte();
446 uint8_t ba2 = a2->readByte();
447 EXPECT_EQ(be2, ba2);
448
449 // Now make sure the first one didn't move
450 EXPECT_EQ(101, e1->getFilePointer());
451 EXPECT_EQ(101, a1->getFilePointer());
452 be1 = e1->readByte();
453 ba1 = a1->readByte();
454 EXPECT_EQ(be1, ba1);
455
456 // Now more the first one again, past the buffer length
457 e1->seek(1910);
458 a1->seek(1910);
459 EXPECT_EQ(1910, e1->getFilePointer());
460 EXPECT_EQ(1910, a1->getFilePointer());
461 be1 = e1->readByte();
462 ba1 = a1->readByte();
463 EXPECT_EQ(be1, ba1);
464
465 // Now make sure the second set didn't move
466 EXPECT_EQ(1028, e2->getFilePointer());
467 EXPECT_EQ(1028, a2->getFilePointer());
468 be2 = e2->readByte();
469 ba2 = a2->readByte();
470 EXPECT_EQ(be2, ba2);
471
472 // Move the second set back, again cross the buffer size
473 e2->seek(17);
474 a2->seek(17);
475 EXPECT_EQ(17, e2->getFilePointer());
476 EXPECT_EQ(17, a2->getFilePointer());
477 be2 = e2->readByte();
478 ba2 = a2->readByte();
479 EXPECT_EQ(be2, ba2);
480
481 // Finally, make sure the first set didn't move
482 // Now make sure the first one didn't move
483 EXPECT_EQ(1911, e1->getFilePointer());
484 EXPECT_EQ(1911, a1->getFilePointer());
485 be1 = e1->readByte();
486 ba1 = a1->readByte();
487 EXPECT_EQ(be1, ba1);
488
489 e1->close();
490 e2->close();
491 a1->close();
492 a2->close();
493 cr->close();
494 }
495
TEST_F(CompoundFileTest,testFileNotFound)496 TEST_F(CompoundFileTest, testFileNotFound) {
497 setUpLarger();
498
499 CompoundFileReaderPtr cr = newLucene<CompoundFileReader>(dir, L"f.comp");
500 IndexInputPtr e1;
501
502 // Open two files
503 try {
504 e1 = cr->openInput(L"bogus");
505 } catch (LuceneException& e) {
506 EXPECT_TRUE(check_exception(LuceneException::IO)(e));
507 }
508
509 cr->close();
510 }
511
TEST_F(CompoundFileTest,testReadPastEOF)512 TEST_F(CompoundFileTest, testReadPastEOF) {
513 setUpLarger();
514
515 CompoundFileReaderPtr cr = newLucene<CompoundFileReader>(dir, L"f.comp");
516 IndexInputPtr is = cr->openInput(L"f2");
517 is->seek(is->length() - 10);
518 ByteArray b(ByteArray::newInstance(100));
519 is->readBytes(b.get(), 0, 10);
520 uint8_t test = 0;
521
522 try {
523 test = is->readByte();
524 } catch (LuceneException& e) {
525 EXPECT_TRUE(check_exception(LuceneException::IO)(e));
526 }
527
528 is->seek(is->length() - 10);
529
530 try {
531 is->readBytes(b.get(), 0, 50);
532 } catch (LuceneException& e) {
533 EXPECT_TRUE(check_exception(LuceneException::IO)(e));
534 }
535
536 is->close();
537 cr->close();
538 }
539
540 /// This test that writes larger than the size of the buffer output will correctly increment the file pointer.
TEST_F(CompoundFileTest,testLargeWrites)541 TEST_F(CompoundFileTest, testLargeWrites) {
542 IndexOutputPtr os = dir->createOutput(L"testBufferStart.txt");
543 RandomPtr r = newLucene<Random>();
544
545 ByteArray largeBuf(ByteArray::newInstance(2048));
546 for (int32_t i = 0; i < largeBuf.size(); ++i) {
547 largeBuf[i] = (uint8_t)r->nextInt(256);
548 }
549
550 int64_t currentPos = os->getFilePointer();
551 os->writeBytes(largeBuf.get(), largeBuf.size());
552
553 EXPECT_EQ(currentPos + largeBuf.size(), os->getFilePointer());
554
555 os->close();
556 }
557