1 /*
2  * Copyright (C) 2020 Veloman Yunkan
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
11  * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
12  * NON-INFRINGEMENT.  See the GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
17  *
18  */
19 
20 #include <zim/zim.h>
21 #include <zim/file.h>
22 
23 #include "tools.h"
24 #include "../src/fs.h"
25 
26 #include "gtest/gtest.h"
27 
28 namespace
29 {
30 
31 using zim::unittests::TempFile;
32 
33 using TestContextImpl = std::vector<std::pair<std::string, std::string> >;
34 struct TestContext : TestContextImpl {
TestContext__anon66dcba290111::TestContext35   TestContext(const std::initializer_list<value_type>& il)
36     : TestContextImpl(il)
37   {}
38 };
39 
operator <<(std::ostream & out,const TestContext & ctx)40 std::ostream& operator<<(std::ostream& out, const TestContext& ctx)
41 {
42   out << "Test context:\n";
43   for ( const auto& kv : ctx )
44     out << "\t" << kv.first << ": " << kv.second << "\n";
45   out << std::endl;
46   return out;
47 }
48 
49 std::string
emptyZimFileContent()50 emptyZimFileContent()
51 {
52   std::string content;
53   content += "ZIM\x04"; // Magic
54   content += "\x05" + std::string(3, '\0'); // Version
55   content += std::string(16, '\0'); // uuid
56   content += std::string(4, '\0'); // article count
57   content += std::string(4, '\0'); // cluster count
58   content += "\x50" + std::string(7, '\0'); // url ptr pos
59   content += "\x50" + std::string(7, '\0'); // title ptr pos
60   content += "\x50" + std::string(7, '\0'); // cluster ptr pos
61   content += "\x50" + std::string(7, '\0'); // mimelist ptr pos
62   content += std::string(4, '\0'); // main page index
63   content += std::string(4, '\0'); // layout page index
64   content += "\x50" + std::string(7, '\0'); // checksum pos
65   content += "\x8a\xbb\xad\x98\x64\xd5\x48\xb2\xb9\x71\xab\x30\xed\x29\xa4\x01"; // md5sum
66   return content;
67 }
68 
69 std::unique_ptr<TempFile>
makeTempFile(const char * name,const std::string & content)70 makeTempFile(const char* name, const std::string& content)
71 {
72   std::unique_ptr<TempFile> p(new TempFile(name));
73   write(p->fd(), &content[0], content.size());
74   p->close();
75   return p;
76 }
77 
78 
TEST(ZimFile,openingAnInvalidZimFileFails)79 TEST(ZimFile, openingAnInvalidZimFileFails)
80 {
81   const char* const prefixes[] = { "ZIM\x04", "" };
82   const unsigned char bytes[] = {0x00, 0x01, 0x11, 0x30, 0xFF};
83   for ( const std::string prefix : prefixes ) {
84     for ( const unsigned char byte : bytes ) {
85       for ( int count = 0; count < 100; count += 10 ) {
86         const TestContext ctx{
87                 {"prefix",  prefix.size() ? "yes" : "no" },
88                 {"byte", std::to_string(byte) },
89                 {"count", std::to_string(count) }
90         };
91         const std::string zimfileContent = prefix + std::string(count, byte);
92         const auto tmpfile = makeTempFile("invalid_zim_file", zimfileContent);
93 
94         EXPECT_THROW( zim::File(tmpfile->path()), std::runtime_error ) << ctx;
95       }
96     }
97   }
98 }
99 
TEST(ZimFile,openingAnEmptyZimFileSucceeds)100 TEST(ZimFile, openingAnEmptyZimFileSucceeds)
101 {
102   const auto tmpfile = makeTempFile("empty_zim_file", emptyZimFileContent());
103 
104   zim::File zimfile(tmpfile->path());
105   ASSERT_TRUE(zimfile.verify());
106 }
107 
isNastyOffset(int offset)108 bool isNastyOffset(int offset) {
109   if ( 6 <= offset && offset < 24 ) // Minor version or uuid
110     return false;
111 
112   if ( 64 <= offset && offset < 72 ) // page or layout index
113     return false;
114 
115   return true;
116 }
117 
TEST(ZimFile,nastyEmptyZimFile)118 TEST(ZimFile, nastyEmptyZimFile)
119 {
120   const std::string correctContent = emptyZimFileContent();
121   for ( int offset = 0; offset < 80; ++offset ) {
122     if ( isNastyOffset(offset) ) {
123       const TestContext ctx{ {"offset", std::to_string(offset) } };
124       std::string nastyContent(correctContent);
125       nastyContent[offset] = '\xff';
126       const auto tmpfile = makeTempFile("wrong_checksum_empty_zim_file", nastyContent);
127       EXPECT_THROW( zim::File(tmpfile->path()), std::runtime_error ) << ctx;
128     }
129   }
130 }
131 
TEST(ZimFile,wrongChecksumInEmptyZimFile)132 TEST(ZimFile, wrongChecksumInEmptyZimFile)
133 {
134   std::string zimfileContent = emptyZimFileContent();
135   zimfileContent[85] = '\xff';
136   const auto tmpfile = makeTempFile("wrong_checksum_empty_zim_file", zimfileContent);
137 
138   zim::File zimfile(tmpfile->path());
139   ASSERT_FALSE(zimfile.verify());
140 }
141 
TEST(ZimFile,openRealZimFile)142 TEST(ZimFile, openRealZimFile)
143 {
144   const char* const zimfiles[] = {
145     "small.zim",
146     "wikibooks_be_all_nopic_2017-02.zim",
147     "wikibooks_be_all_nopic_2017-02_splitted.zim",
148     "wikipedia_en_climate_change_nopic_2020-01.zim"
149   };
150 
151   for ( const std::string fname : zimfiles ) {
152     const std::string path = zim::DEFAULTFS::join("data", fname);
153     const TestContext ctx{ {"path", path } };
154     std::unique_ptr<zim::File> zimfile;
155     EXPECT_NO_THROW( zimfile.reset(new zim::File(path)) ) << ctx;
156     if ( zimfile ) {
157       EXPECT_TRUE( zimfile->verify() ) << ctx;
158     }
159   }
160 }
161 
162 class CapturedStderr
163 {
164   std::ostringstream buffer;
165   std::streambuf* const sbuf;
166 public:
CapturedStderr()167   CapturedStderr()
168     : sbuf(std::cerr.rdbuf())
169   {
170     std::cerr.rdbuf(buffer.rdbuf());
171   }
172 
173   CapturedStderr(const CapturedStderr&) = delete;
174 
~CapturedStderr()175   ~CapturedStderr()
176   {
177     std::cerr.rdbuf(sbuf);
178   }
179 
operator std::string() const180   operator std::string() const { return buffer.str(); }
181 };
182 
183 #define EXPECT_BROKEN_ZIMFILE(zimpath, expected_stderror_text)    \
184   {                                                               \
185     zim::IntegrityCheckList checksToRun;                          \
186     checksToRun.set();                                            \
187     checksToRun.reset(size_t(zim::IntegrityCheck::CHECKSUM));     \
188     CapturedStderr stderror;                                      \
189     EXPECT_FALSE(zim::validate(zimpath, checksToRun));            \
190     EXPECT_EQ(expected_stderror_text, std::string(stderror));     \
191   }
192 
TEST(ZimFile,validate)193 TEST(ZimFile, validate)
194 {
195   zim::IntegrityCheckList all;
196   all.set();
197 
198   ASSERT_TRUE(zim::validate("./data/small.zim", all));
199 
200   EXPECT_BROKEN_ZIMFILE(
201     "./data/invalid.smaller_than_header.zim",
202     "zim-file is too small to contain a header\n"
203   );
204 
205   EXPECT_BROKEN_ZIMFILE(
206     "./data/invalid.outofbounds_urlptrpos.zim",
207     "Dirent pointer table outside (or not fully inside) ZIM file.\n"
208   );
209 
210   EXPECT_BROKEN_ZIMFILE(
211     "./data/invalid.outofbounds_titleptrpos.zim",
212     "Title index table outside (or not fully inside) ZIM file.\n"
213   );
214 
215   EXPECT_BROKEN_ZIMFILE(
216     "./data/invalid.outofbounds_clusterptrpos.zim",
217     "Cluster pointer table outside (or not fully inside) ZIM file.\n"
218   );
219 
220   EXPECT_BROKEN_ZIMFILE(
221     "./data/invalid.invalid_mimelistpos.zim",
222     "mimelistPos must be 80.\n"
223   );
224 
225   EXPECT_BROKEN_ZIMFILE(
226     "./data/invalid.invalid_checksumpos.zim",
227     "Checksum position is not valid\n"
228   );
229 
230   EXPECT_BROKEN_ZIMFILE(
231     "./data/invalid.outofbounds_first_direntptr.zim",
232     "Invalid dirent pointer\n"
233   );
234 
235   EXPECT_BROKEN_ZIMFILE(
236     "./data/invalid.outofbounds_last_direntptr.zim",
237     "Invalid dirent pointer\n"
238   );
239 
240   EXPECT_BROKEN_ZIMFILE(
241     "./data/invalid.outofbounds_first_title_entry.zim",
242     "Invalid title index entry\n"
243   );
244 
245   EXPECT_BROKEN_ZIMFILE(
246     "./data/invalid.outofbounds_last_title_entry.zim",
247     "Invalid title index entry\n"
248   );
249 
250   EXPECT_BROKEN_ZIMFILE(
251     "./data/invalid.outofbounds_first_clusterptr.zim",
252     "Invalid cluster pointer\n"
253   );
254 }
255 
TEST(ZimFile,multipart)256 TEST(ZimFile, multipart)
257 {
258   const zim::File zimfile1("./data/wikibooks_be_all_nopic_2017-02.zim");
259   const zim::File zimfile2("./data/wikibooks_be_all_nopic_2017-02_splitted.zim");
260   ASSERT_FALSE(zimfile1.is_multiPart());
261   ASSERT_TRUE (zimfile2.is_multiPart());
262 
263   EXPECT_EQ(zimfile1.getFilesize(), zimfile2.getFilesize());
264   EXPECT_EQ(zimfile1.getCountClusters(), zimfile2.getCountClusters());
265   EXPECT_EQ(zimfile1.getNamespaces(), zimfile2.getNamespaces());
266 
267   ASSERT_EQ(zimfile1.getCountArticles(), zimfile2.getCountArticles());
268 
269   ASSERT_EQ(118, zimfile1.getCountArticles()); // ==> below loop is not a noop
270   for ( zim::article_index_type i = 0; i < zimfile1.getCountArticles(); ++i ) {
271     zim::Article article1 = zimfile1.getArticle(i);
272     zim::Article article2 = zimfile2.getArticle(i);
273     ASSERT_EQ(i, article1.getIndex());
274     ASSERT_EQ(i, article2.getIndex());
275     ASSERT_EQ(article1.getClusterNumber(), article2.getClusterNumber());
276     ASSERT_EQ(article1.getOffset(), article2.getOffset());
277     ASSERT_EQ(article1.getParameter(), article2.getParameter());
278     ASSERT_EQ(article1.getTitle(), article2.getTitle());
279     ASSERT_EQ(article1.getUrl(), article2.getUrl());
280     ASSERT_EQ(article1.getLongUrl(), article2.getLongUrl());
281     ASSERT_EQ(article1.getLibraryMimeType(), article2.getLibraryMimeType());
282     ASSERT_EQ(article1.isRedirect(), article2.isRedirect());
283     ASSERT_EQ(article1.isLinktarget(), article2.isLinktarget());
284     ASSERT_EQ(article1.isDeleted(), article2.isDeleted());
285     ASSERT_EQ(article1.getNamespace(), article2.getNamespace());
286     ASSERT_EQ(article1.getArticleSize(), article2.getArticleSize());
287     ASSERT_EQ(article1.getData(), article2.getData());
288     if ( !article1.isRedirect() && ! article1.isLinktarget() && !article1.isLinktarget() ) {
289       ASSERT_EQ(article1.getPage(true, 5), article2.getPage(true, 5));
290       ASSERT_EQ(article1.getPage(false, 5), article2.getPage(false, 5));
291     }
292     ASSERT_EQ(zimfile1.getArticleByTitle(i).getIndex(),
293               zimfile2.getArticleByTitle(i).getIndex()
294     );
295     ASSERT_EQ(zimfile1.getArticleByClusterOrder(i).getIndex(),
296               zimfile2.getArticleByClusterOrder(i).getIndex()
297     );
298   }
299 }
300 
301 } // unnamed namespace
302