1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6 
7 #include "TestInc.h"
8 #include "LuceneTestFixture.h"
9 #include "TestUtils.h"
10 #include "SimpleFSDirectory.h"
11 #include "_SimpleFSDirectory.h"
12 #include "IndexOutput.h"
13 #include "IndexInput.h"
14 #include "CompoundFileWriter.h"
15 #include "CompoundFileReader.h"
16 #include "Random.h"
17 #include "MiscUtils.h"
18 #include "FileUtils.h"
19 
20 using namespace Lucene;
21 
22 class CompoundFileTest : public LuceneTestFixture {
23 public:
CompoundFileTest()24     CompoundFileTest() {
25         indexDir = FileUtils::joinPath(getTempDir(), L"testIndex");
26         FileUtils::removeDirectory(indexDir);
27 
28         // use a simple FSDir here, to be sure to have SimpleFSInputs
29         dir = newLucene<SimpleFSDirectory>(indexDir);
30     }
31 
~CompoundFileTest()32     virtual ~CompoundFileTest() {
33         dir->close();
34         FileUtils::removeDirectory(indexDir);
35     }
36 
37 protected:
38     String indexDir;
39     DirectoryPtr dir;
40 
41 public:
42     /// Creates a file of the specified size with random data.
createRandomFile(const DirectoryPtr & dir,const String & name,int32_t size)43     void createRandomFile(const DirectoryPtr& dir, const String& name, int32_t size) {
44         IndexOutputPtr os = dir->createOutput(name);
45         RandomPtr r = newLucene<Random>();
46         for (int32_t i = 0; i < size; ++i) {
47             os->writeByte((uint8_t)r->nextInt(256));
48         }
49         os->close();
50     }
51 
createSequenceFile(const DirectoryPtr & dir,const String & name,uint8_t start,int32_t size)52     void createSequenceFile(const DirectoryPtr& dir, const String& name, uint8_t start, int32_t size) {
53         IndexOutputPtr os = dir->createOutput(name);
54         for (int32_t i = 0; i < size; ++i) {
55             os->writeByte(start);
56             ++start;
57         }
58         os->close();
59     }
60 
checkSameStreams(const IndexInputPtr & expected,const IndexInputPtr & test)61     void checkSameStreams(const IndexInputPtr& expected, const IndexInputPtr& test) {
62         EXPECT_TRUE(expected);
63         EXPECT_TRUE(test);
64         EXPECT_EQ(expected->length(), test->length());
65         EXPECT_EQ(expected->getFilePointer(), test->getFilePointer());
66 
67         ByteArray expectedBuffer(ByteArray::newInstance(512));
68         ByteArray testBuffer(ByteArray::newInstance(expectedBuffer.size()));
69 
70         int64_t remainder = expected->length() - expected->getFilePointer();
71         while (remainder > 0) {
72             int32_t readLen = std::min((int32_t)remainder, expectedBuffer.size());
73             expected->readBytes(expectedBuffer.get(), 0, readLen);
74             test->readBytes(testBuffer.get(), 0, readLen);
75             checkEqualArrays(expectedBuffer, testBuffer, 0, readLen);
76             remainder -= readLen;
77         }
78     }
79 
checkSameStreams(const IndexInputPtr & expected,const IndexInputPtr & actual,int64_t seekTo)80     void checkSameStreams(const IndexInputPtr& expected, const IndexInputPtr& actual, int64_t seekTo) {
81         if (seekTo >= 0 && seekTo < (int64_t)expected->length()) {
82             expected->seek(seekTo);
83             actual->seek(seekTo);
84             checkSameStreams(expected, actual);
85         }
86     }
87 
checkSameSeekBehavior(const IndexInputPtr & expected,const IndexInputPtr & actual)88     void checkSameSeekBehavior(const IndexInputPtr& expected, const IndexInputPtr& actual) {
89         // seek to 0
90         int64_t point = 0;
91         checkSameStreams(expected, actual, point);
92 
93         // seek to middle
94         point = expected->length() / 2l;
95         checkSameStreams(expected, actual, point);
96 
97         // seek to end - 2
98         point = expected->length() - 2;
99         checkSameStreams(expected, actual, point);
100 
101         // seek to end - 1
102         point = expected->length() - 1;
103         checkSameStreams(expected, actual, point);
104 
105         // seek to the end
106         point = expected->length();
107         checkSameStreams(expected, actual, point);
108 
109         // seek past end
110         point = expected->length() + 1;
111         checkSameStreams(expected, actual, point);
112     }
113 
checkEqualArrays(ByteArray expected,ByteArray test,int32_t start,int32_t length)114     void checkEqualArrays(ByteArray expected, ByteArray test, int32_t start, int32_t length) {
115         EXPECT_TRUE(expected);
116         EXPECT_TRUE(test);
117         for (int32_t i = start; i < length; ++i) {
118             EXPECT_EQ(expected[i], test[i]);
119         }
120     }
121 
122     /// Setup a larger compound file with a number of components, each of which is a sequential file (so that we can
123     /// easily tell that we are reading in the right byte). The methods sets up 20 files - f0 to f19, the size of each
124     /// file is 1000 bytes.
setUpLarger()125     void setUpLarger() {
126         CompoundFileWriterPtr cw = newLucene<CompoundFileWriter>(dir, L"f.comp");
127         for (int32_t i = 0; i < 20; ++i) {
128             createSequenceFile(dir, L"f" + StringUtils::toString(i), 0, 2000);
129             cw->addFile(L"f" + StringUtils::toString(i));
130         }
131         cw->close();
132     }
133 
isCSIndexInputOpen(const IndexInputPtr & is)134     bool isCSIndexInputOpen(const IndexInputPtr& is) {
135         if (MiscUtils::typeOf<CSIndexInput>(is)) {
136             CSIndexInputPtr cis = boost::dynamic_pointer_cast<CSIndexInput>(is);
137             return isSimpleFSIndexInputOpen(cis->base);
138         } else {
139             return false;
140         }
141     }
142 
isSimpleFSIndexInputOpen(const IndexInputPtr & is)143     bool isSimpleFSIndexInputOpen(const IndexInputPtr& is) {
144         if (MiscUtils::typeOf<SimpleFSIndexInput>(is)) {
145             SimpleFSIndexInputPtr fis = boost::dynamic_pointer_cast<SimpleFSIndexInput>(is);
146             return fis->isValid();
147         } else {
148             return false;
149         }
150     }
151 };
152 
153 /// This test creates compound file based on a single file.  Files of different sizes are tested: 0, 1, 10, 100 bytes.
TEST_F(CompoundFileTest,testSingleFile)154 TEST_F(CompoundFileTest, testSingleFile) {
155     IntArray data(IntArray::newInstance(4));
156     data[0] = 0;
157     data[1] = 1;
158     data[2] = 10;
159     data[3] = 100;
160     for (int32_t i = 0; i < data.size(); ++i) {
161         String name = L"t" + StringUtils::toString(data[i]);
162         createSequenceFile(dir, name, 0, data[i]);
163         CompoundFileWriterPtr csw = newLucene<CompoundFileWriter>(dir, name + L".cfs");
164         csw->addFile(name);
165         csw->close();
166 
167         CompoundFileReaderPtr csr = newLucene<CompoundFileReader>(dir, name + L".cfs");
168         IndexInputPtr expected = dir->openInput(name);
169         IndexInputPtr actual = csr->openInput(name);
170         checkSameStreams(expected, actual);
171         checkSameSeekBehavior(expected, actual);
172         expected->close();
173         actual->close();
174         csr->close();
175     }
176 }
177 
178 /// This test creates compound file based on two files.
TEST_F(CompoundFileTest,testTwoFiles)179 TEST_F(CompoundFileTest, testTwoFiles) {
180     createSequenceFile(dir, L"d1", 0, 15);
181     createSequenceFile(dir, L"d2", 0, 114);
182 
183     CompoundFileWriterPtr csw = newLucene<CompoundFileWriter>(dir, L"d.csf");
184     csw->addFile(L"d1");
185     csw->addFile(L"d2");
186     csw->close();
187 
188     CompoundFileReaderPtr csr = newLucene<CompoundFileReader>(dir, L"d.csf");
189     IndexInputPtr expected = dir->openInput(L"d1");
190     IndexInputPtr actual = csr->openInput(L"d1");
191     checkSameStreams(expected, actual);
192     checkSameSeekBehavior(expected, actual);
193     expected->close();
194     actual->close();
195 
196     expected = dir->openInput(L"d2");
197     actual = csr->openInput(L"d2");
198     checkSameStreams(expected, actual);
199     checkSameSeekBehavior(expected, actual);
200     expected->close();
201     actual->close();
202     csr->close();
203 }
204 
205 /// This test creates a compound file based on a large number of files of various length. The file content is generated randomly.
206 /// The sizes range from 0 to 1Mb.  Some of the sizes are selected to test the buffering logic in the file reading code.
207 /// For this the chunk variable is set to the length of the buffer used internally by the compound file logic.
TEST_F(CompoundFileTest,testRandomFiles)208 TEST_F(CompoundFileTest, testRandomFiles) {
209     // Setup the test segment
210     String segment = L"test";
211     int32_t chunk = 1024; // internal buffer size used by the stream
212     createRandomFile(dir, segment + L".zero", 0);
213     createRandomFile(dir, segment + L".one", 1);
214     createRandomFile(dir, segment + L".ten", 10);
215     createRandomFile(dir, segment + L".hundred", 100);
216     createRandomFile(dir, segment + L".big1", chunk);
217     createRandomFile(dir, segment + L".big2", chunk - 1);
218     createRandomFile(dir, segment + L".big3", chunk + 1);
219     createRandomFile(dir, segment + L".big4", 3 * chunk);
220     createRandomFile(dir, segment + L".big5", 3 * chunk - 1);
221     createRandomFile(dir, segment + L".big6", 3 * chunk + 1);
222     createRandomFile(dir, segment + L".big7", 1000 * chunk);
223 
224     // Setup extraneous files
225     createRandomFile(dir, L"onetwothree", 100);
226     createRandomFile(dir, segment + L".notIn", 50);
227     createRandomFile(dir, segment + L".notIn2", 51);
228 
229     // Now test
230     CompoundFileWriterPtr csw = newLucene<CompoundFileWriter>(dir, L"test.cfs");
231 
232     Collection<String> data(Collection<String>::newInstance());
233     data.add(L".zero");
234     data.add(L".one");
235     data.add(L".ten");
236     data.add(L".hundred");
237     data.add(L".big1");
238     data.add(L".big2");
239     data.add(L".big3");
240     data.add(L".big4");
241     data.add(L".big5");
242     data.add(L".big6");
243     data.add(L".big7");
244 
245     for (Collection<String>::iterator name = data.begin(); name != data.end(); ++name) {
246         csw->addFile(segment + *name);
247     }
248     csw->close();
249 
250     CompoundFileReaderPtr csr = newLucene<CompoundFileReader>(dir, L"test.cfs");
251     for (Collection<String>::iterator name = data.begin(); name != data.end(); ++name) {
252         IndexInputPtr check = dir->openInput(segment + *name);
253         IndexInputPtr test = csr->openInput(segment + *name);
254         checkSameStreams(check, test);
255         checkSameSeekBehavior(check, test);
256         test->close();
257         check->close();
258     }
259     csr->close();
260 }
261 
TEST_F(CompoundFileTest,testReadAfterClose)262 TEST_F(CompoundFileTest, testReadAfterClose) {
263     // Setup the test file - we need more than 1024 bytes
264     IndexOutputPtr os = dir->createOutput(L"test");
265     for (int32_t i = 0; i < 2000; ++i) {
266         os->writeByte((uint8_t)i);
267     }
268     os->close();
269 
270     IndexInputPtr in = dir->openInput(L"test");
271 
272     // This read primes the buffer in IndexInput
273     uint8_t b = in->readByte();
274 
275     // Close the file
276     in->close();
277 
278     // ERROR: this call should fail, but succeeds because the buffer is still filled
279     b = in->readByte();
280 
281     // ERROR: this call should fail, but succeeds for some reason as well
282     in->seek(1099);
283 
284     try {
285         in->readByte();
286     } catch (LuceneException& e) {
287         EXPECT_TRUE(check_exception(LuceneException::IO)(e));
288     }
289 }
290 
TEST_F(CompoundFileTest,testClonedStreamsClosing)291 TEST_F(CompoundFileTest, testClonedStreamsClosing) {
292     setUpLarger();
293 
294     CompoundFileReaderPtr cr = newLucene<CompoundFileReader>(dir, L"f.comp");
295 
296     // basic clone
297     IndexInputPtr expected = dir->openInput(L"f11");
298 
299     // this test only works for FSIndexInput
300     EXPECT_TRUE(MiscUtils::typeOf<SimpleFSIndexInput>(expected));
301     EXPECT_TRUE(isSimpleFSIndexInputOpen(expected));
302 
303     IndexInputPtr one = cr->openInput(L"f11");
304     EXPECT_TRUE(isCSIndexInputOpen(one));
305 
306     IndexInputPtr two = boost::dynamic_pointer_cast<IndexInput>(one->clone());
307     EXPECT_TRUE(isCSIndexInputOpen(two));
308 
309     checkSameStreams(expected, one);
310     expected->seek(0);
311     checkSameStreams(expected, two);
312 
313     // Now close the first stream
314     one->close();
315     EXPECT_TRUE(isCSIndexInputOpen(one)); // Only close when cr is closed
316 
317     // The following should really fail since we couldn't expect to access a file once close has been called
318     // on it (regardless of buffering and/or clone magic)
319     expected->seek(0);
320     two->seek(0);
321     checkSameStreams(expected, two); // basic clone two/2
322 
323     // Now close the compound reader
324     cr->close();
325     EXPECT_TRUE(!isCSIndexInputOpen(one));
326     EXPECT_TRUE(!isCSIndexInputOpen(two));
327 
328     // The following may also fail since the compound stream is closed
329     expected->seek(0);
330     two->seek(0);
331 
332     // Now close the second clone
333     two->close();
334     expected->seek(0);
335     two->seek(0);
336 
337     expected->close();
338 }
339 
340 /// This test opens two files from a compound stream and verifies that their file positions are independent of each other.
TEST_F(CompoundFileTest,testRandomAccess)341 TEST_F(CompoundFileTest, testRandomAccess) {
342     setUpLarger();
343 
344     CompoundFileReaderPtr cr = newLucene<CompoundFileReader>(dir, L"f.comp");
345 
346     // Open two files
347     IndexInputPtr e1 = dir->openInput(L"f11");
348     IndexInputPtr e2 = dir->openInput(L"f3");
349 
350     IndexInputPtr a1 = cr->openInput(L"f11");
351     IndexInputPtr a2 = dir->openInput(L"f3");
352 
353     // Seek the first pair
354     e1->seek(100);
355     a1->seek(100);
356     EXPECT_EQ(100, e1->getFilePointer());
357     EXPECT_EQ(100, a1->getFilePointer());
358     uint8_t be1 = e1->readByte();
359     uint8_t ba1 = a1->readByte();
360     EXPECT_EQ(be1, ba1);
361 
362     // Now seek the second pair
363     e2->seek(1027);
364     a2->seek(1027);
365     EXPECT_EQ(1027, e2->getFilePointer());
366     EXPECT_EQ(1027, a2->getFilePointer());
367     uint8_t be2 = e2->readByte();
368     uint8_t ba2 = a2->readByte();
369     EXPECT_EQ(be2, ba2);
370 
371     // Now make sure the first one didn't move
372     EXPECT_EQ(101, e1->getFilePointer());
373     EXPECT_EQ(101, a1->getFilePointer());
374     be1 = e1->readByte();
375     ba1 = a1->readByte();
376     EXPECT_EQ(be1, ba1);
377 
378     // Now more the first one again, past the buffer length
379     e1->seek(1910);
380     a1->seek(1910);
381     EXPECT_EQ(1910, e1->getFilePointer());
382     EXPECT_EQ(1910, a1->getFilePointer());
383     be1 = e1->readByte();
384     ba1 = a1->readByte();
385     EXPECT_EQ(be1, ba1);
386 
387     // Now make sure the second set didn't move
388     EXPECT_EQ(1028, e2->getFilePointer());
389     EXPECT_EQ(1028, a2->getFilePointer());
390     be2 = e2->readByte();
391     ba2 = a2->readByte();
392     EXPECT_EQ(be2, ba2);
393 
394     // Move the second set back, again cross the buffer size
395     e2->seek(17);
396     a2->seek(17);
397     EXPECT_EQ(17, e2->getFilePointer());
398     EXPECT_EQ(17, a2->getFilePointer());
399     be2 = e2->readByte();
400     ba2 = a2->readByte();
401     EXPECT_EQ(be2, ba2);
402 
403     // Finally, make sure the first set didn't move
404     // Now make sure the first one didn't move
405     EXPECT_EQ(1911, e1->getFilePointer());
406     EXPECT_EQ(1911, a1->getFilePointer());
407     be1 = e1->readByte();
408     ba1 = a1->readByte();
409     EXPECT_EQ(be1, ba1);
410 
411     e1->close();
412     e2->close();
413     a1->close();
414     a2->close();
415     cr->close();
416 }
417 
418 /// This test opens two files from a compound stream and verifies that their file positions are independent of each other.
TEST_F(CompoundFileTest,testRandomAccessClones)419 TEST_F(CompoundFileTest, testRandomAccessClones) {
420     setUpLarger();
421 
422     CompoundFileReaderPtr cr = newLucene<CompoundFileReader>(dir, L"f.comp");
423 
424     // Open two files
425     IndexInputPtr e1 = cr->openInput(L"f11");
426     IndexInputPtr e2 = cr->openInput(L"f3");
427 
428     IndexInputPtr a1 = boost::dynamic_pointer_cast<IndexInput>(e1->clone());
429     IndexInputPtr a2 = boost::dynamic_pointer_cast<IndexInput>(e2->clone());
430 
431     // Seek the first pair
432     e1->seek(100);
433     a1->seek(100);
434     EXPECT_EQ(100, e1->getFilePointer());
435     EXPECT_EQ(100, a1->getFilePointer());
436     uint8_t be1 = e1->readByte();
437     uint8_t ba1 = a1->readByte();
438     EXPECT_EQ(be1, ba1);
439 
440     // Now seek the second pair
441     e2->seek(1027);
442     a2->seek(1027);
443     EXPECT_EQ(1027, e2->getFilePointer());
444     EXPECT_EQ(1027, a2->getFilePointer());
445     uint8_t be2 = e2->readByte();
446     uint8_t ba2 = a2->readByte();
447     EXPECT_EQ(be2, ba2);
448 
449     // Now make sure the first one didn't move
450     EXPECT_EQ(101, e1->getFilePointer());
451     EXPECT_EQ(101, a1->getFilePointer());
452     be1 = e1->readByte();
453     ba1 = a1->readByte();
454     EXPECT_EQ(be1, ba1);
455 
456     // Now more the first one again, past the buffer length
457     e1->seek(1910);
458     a1->seek(1910);
459     EXPECT_EQ(1910, e1->getFilePointer());
460     EXPECT_EQ(1910, a1->getFilePointer());
461     be1 = e1->readByte();
462     ba1 = a1->readByte();
463     EXPECT_EQ(be1, ba1);
464 
465     // Now make sure the second set didn't move
466     EXPECT_EQ(1028, e2->getFilePointer());
467     EXPECT_EQ(1028, a2->getFilePointer());
468     be2 = e2->readByte();
469     ba2 = a2->readByte();
470     EXPECT_EQ(be2, ba2);
471 
472     // Move the second set back, again cross the buffer size
473     e2->seek(17);
474     a2->seek(17);
475     EXPECT_EQ(17, e2->getFilePointer());
476     EXPECT_EQ(17, a2->getFilePointer());
477     be2 = e2->readByte();
478     ba2 = a2->readByte();
479     EXPECT_EQ(be2, ba2);
480 
481     // Finally, make sure the first set didn't move
482     // Now make sure the first one didn't move
483     EXPECT_EQ(1911, e1->getFilePointer());
484     EXPECT_EQ(1911, a1->getFilePointer());
485     be1 = e1->readByte();
486     ba1 = a1->readByte();
487     EXPECT_EQ(be1, ba1);
488 
489     e1->close();
490     e2->close();
491     a1->close();
492     a2->close();
493     cr->close();
494 }
495 
TEST_F(CompoundFileTest,testFileNotFound)496 TEST_F(CompoundFileTest, testFileNotFound) {
497     setUpLarger();
498 
499     CompoundFileReaderPtr cr = newLucene<CompoundFileReader>(dir, L"f.comp");
500     IndexInputPtr e1;
501 
502     // Open two files
503     try {
504         e1 = cr->openInput(L"bogus");
505     } catch (LuceneException& e) {
506         EXPECT_TRUE(check_exception(LuceneException::IO)(e));
507     }
508 
509     cr->close();
510 }
511 
TEST_F(CompoundFileTest,testReadPastEOF)512 TEST_F(CompoundFileTest, testReadPastEOF) {
513     setUpLarger();
514 
515     CompoundFileReaderPtr cr = newLucene<CompoundFileReader>(dir, L"f.comp");
516     IndexInputPtr is = cr->openInput(L"f2");
517     is->seek(is->length() - 10);
518     ByteArray b(ByteArray::newInstance(100));
519     is->readBytes(b.get(), 0, 10);
520     uint8_t test = 0;
521 
522     try {
523         test = is->readByte();
524     } catch (LuceneException& e) {
525         EXPECT_TRUE(check_exception(LuceneException::IO)(e));
526     }
527 
528     is->seek(is->length() - 10);
529 
530     try {
531         is->readBytes(b.get(), 0, 50);
532     } catch (LuceneException& e) {
533         EXPECT_TRUE(check_exception(LuceneException::IO)(e));
534     }
535 
536     is->close();
537     cr->close();
538 }
539 
540 /// This test that writes larger than the size of the buffer output will correctly increment the file pointer.
TEST_F(CompoundFileTest,testLargeWrites)541 TEST_F(CompoundFileTest, testLargeWrites) {
542     IndexOutputPtr os = dir->createOutput(L"testBufferStart.txt");
543     RandomPtr r = newLucene<Random>();
544 
545     ByteArray largeBuf(ByteArray::newInstance(2048));
546     for (int32_t i = 0; i < largeBuf.size(); ++i) {
547         largeBuf[i] = (uint8_t)r->nextInt(256);
548     }
549 
550     int64_t currentPos = os->getFilePointer();
551     os->writeBytes(largeBuf.get(), largeBuf.size());
552 
553     EXPECT_EQ(currentPos + largeBuf.size(), os->getFilePointer());
554 
555     os->close();
556 }
557