1 /*
2 * Copyright (C) 2020 Veloman Yunkan
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
11 * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
12 * NON-INFRINGEMENT. See the GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 *
18 */
19
20 #include <zim/zim.h>
21 #include <zim/file.h>
22
23 #include "tools.h"
24 #include "../src/fs.h"
25
26 #include "gtest/gtest.h"
27
28 namespace
29 {
30
31 using zim::unittests::TempFile;
32
33 using TestContextImpl = std::vector<std::pair<std::string, std::string> >;
34 struct TestContext : TestContextImpl {
TestContext__anon66dcba290111::TestContext35 TestContext(const std::initializer_list<value_type>& il)
36 : TestContextImpl(il)
37 {}
38 };
39
operator <<(std::ostream & out,const TestContext & ctx)40 std::ostream& operator<<(std::ostream& out, const TestContext& ctx)
41 {
42 out << "Test context:\n";
43 for ( const auto& kv : ctx )
44 out << "\t" << kv.first << ": " << kv.second << "\n";
45 out << std::endl;
46 return out;
47 }
48
49 std::string
emptyZimFileContent()50 emptyZimFileContent()
51 {
52 std::string content;
53 content += "ZIM\x04"; // Magic
54 content += "\x05" + std::string(3, '\0'); // Version
55 content += std::string(16, '\0'); // uuid
56 content += std::string(4, '\0'); // article count
57 content += std::string(4, '\0'); // cluster count
58 content += "\x50" + std::string(7, '\0'); // url ptr pos
59 content += "\x50" + std::string(7, '\0'); // title ptr pos
60 content += "\x50" + std::string(7, '\0'); // cluster ptr pos
61 content += "\x50" + std::string(7, '\0'); // mimelist ptr pos
62 content += std::string(4, '\0'); // main page index
63 content += std::string(4, '\0'); // layout page index
64 content += "\x50" + std::string(7, '\0'); // checksum pos
65 content += "\x8a\xbb\xad\x98\x64\xd5\x48\xb2\xb9\x71\xab\x30\xed\x29\xa4\x01"; // md5sum
66 return content;
67 }
68
69 std::unique_ptr<TempFile>
makeTempFile(const char * name,const std::string & content)70 makeTempFile(const char* name, const std::string& content)
71 {
72 std::unique_ptr<TempFile> p(new TempFile(name));
73 write(p->fd(), &content[0], content.size());
74 p->close();
75 return p;
76 }
77
78
TEST(ZimFile,openingAnInvalidZimFileFails)79 TEST(ZimFile, openingAnInvalidZimFileFails)
80 {
81 const char* const prefixes[] = { "ZIM\x04", "" };
82 const unsigned char bytes[] = {0x00, 0x01, 0x11, 0x30, 0xFF};
83 for ( const std::string prefix : prefixes ) {
84 for ( const unsigned char byte : bytes ) {
85 for ( int count = 0; count < 100; count += 10 ) {
86 const TestContext ctx{
87 {"prefix", prefix.size() ? "yes" : "no" },
88 {"byte", std::to_string(byte) },
89 {"count", std::to_string(count) }
90 };
91 const std::string zimfileContent = prefix + std::string(count, byte);
92 const auto tmpfile = makeTempFile("invalid_zim_file", zimfileContent);
93
94 EXPECT_THROW( zim::File(tmpfile->path()), std::runtime_error ) << ctx;
95 }
96 }
97 }
98 }
99
TEST(ZimFile,openingAnEmptyZimFileSucceeds)100 TEST(ZimFile, openingAnEmptyZimFileSucceeds)
101 {
102 const auto tmpfile = makeTempFile("empty_zim_file", emptyZimFileContent());
103
104 zim::File zimfile(tmpfile->path());
105 ASSERT_TRUE(zimfile.verify());
106 }
107
isNastyOffset(int offset)108 bool isNastyOffset(int offset) {
109 if ( 6 <= offset && offset < 24 ) // Minor version or uuid
110 return false;
111
112 if ( 64 <= offset && offset < 72 ) // page or layout index
113 return false;
114
115 return true;
116 }
117
TEST(ZimFile,nastyEmptyZimFile)118 TEST(ZimFile, nastyEmptyZimFile)
119 {
120 const std::string correctContent = emptyZimFileContent();
121 for ( int offset = 0; offset < 80; ++offset ) {
122 if ( isNastyOffset(offset) ) {
123 const TestContext ctx{ {"offset", std::to_string(offset) } };
124 std::string nastyContent(correctContent);
125 nastyContent[offset] = '\xff';
126 const auto tmpfile = makeTempFile("wrong_checksum_empty_zim_file", nastyContent);
127 EXPECT_THROW( zim::File(tmpfile->path()), std::runtime_error ) << ctx;
128 }
129 }
130 }
131
TEST(ZimFile,wrongChecksumInEmptyZimFile)132 TEST(ZimFile, wrongChecksumInEmptyZimFile)
133 {
134 std::string zimfileContent = emptyZimFileContent();
135 zimfileContent[85] = '\xff';
136 const auto tmpfile = makeTempFile("wrong_checksum_empty_zim_file", zimfileContent);
137
138 zim::File zimfile(tmpfile->path());
139 ASSERT_FALSE(zimfile.verify());
140 }
141
TEST(ZimFile,openRealZimFile)142 TEST(ZimFile, openRealZimFile)
143 {
144 const char* const zimfiles[] = {
145 "small.zim",
146 "wikibooks_be_all_nopic_2017-02.zim",
147 "wikibooks_be_all_nopic_2017-02_splitted.zim",
148 "wikipedia_en_climate_change_nopic_2020-01.zim"
149 };
150
151 for ( const std::string fname : zimfiles ) {
152 const std::string path = zim::DEFAULTFS::join("data", fname);
153 const TestContext ctx{ {"path", path } };
154 std::unique_ptr<zim::File> zimfile;
155 EXPECT_NO_THROW( zimfile.reset(new zim::File(path)) ) << ctx;
156 if ( zimfile ) {
157 EXPECT_TRUE( zimfile->verify() ) << ctx;
158 }
159 }
160 }
161
162 class CapturedStderr
163 {
164 std::ostringstream buffer;
165 std::streambuf* const sbuf;
166 public:
CapturedStderr()167 CapturedStderr()
168 : sbuf(std::cerr.rdbuf())
169 {
170 std::cerr.rdbuf(buffer.rdbuf());
171 }
172
173 CapturedStderr(const CapturedStderr&) = delete;
174
~CapturedStderr()175 ~CapturedStderr()
176 {
177 std::cerr.rdbuf(sbuf);
178 }
179
operator std::string() const180 operator std::string() const { return buffer.str(); }
181 };
182
183 #define EXPECT_BROKEN_ZIMFILE(zimpath, expected_stderror_text) \
184 { \
185 zim::IntegrityCheckList checksToRun; \
186 checksToRun.set(); \
187 checksToRun.reset(size_t(zim::IntegrityCheck::CHECKSUM)); \
188 CapturedStderr stderror; \
189 EXPECT_FALSE(zim::validate(zimpath, checksToRun)); \
190 EXPECT_EQ(expected_stderror_text, std::string(stderror)); \
191 }
192
TEST(ZimFile,validate)193 TEST(ZimFile, validate)
194 {
195 zim::IntegrityCheckList all;
196 all.set();
197
198 ASSERT_TRUE(zim::validate("./data/small.zim", all));
199
200 EXPECT_BROKEN_ZIMFILE(
201 "./data/invalid.smaller_than_header.zim",
202 "zim-file is too small to contain a header\n"
203 );
204
205 EXPECT_BROKEN_ZIMFILE(
206 "./data/invalid.outofbounds_urlptrpos.zim",
207 "Dirent pointer table outside (or not fully inside) ZIM file.\n"
208 );
209
210 EXPECT_BROKEN_ZIMFILE(
211 "./data/invalid.outofbounds_titleptrpos.zim",
212 "Title index table outside (or not fully inside) ZIM file.\n"
213 );
214
215 EXPECT_BROKEN_ZIMFILE(
216 "./data/invalid.outofbounds_clusterptrpos.zim",
217 "Cluster pointer table outside (or not fully inside) ZIM file.\n"
218 );
219
220 EXPECT_BROKEN_ZIMFILE(
221 "./data/invalid.invalid_mimelistpos.zim",
222 "mimelistPos must be 80.\n"
223 );
224
225 EXPECT_BROKEN_ZIMFILE(
226 "./data/invalid.invalid_checksumpos.zim",
227 "Checksum position is not valid\n"
228 );
229
230 EXPECT_BROKEN_ZIMFILE(
231 "./data/invalid.outofbounds_first_direntptr.zim",
232 "Invalid dirent pointer\n"
233 );
234
235 EXPECT_BROKEN_ZIMFILE(
236 "./data/invalid.outofbounds_last_direntptr.zim",
237 "Invalid dirent pointer\n"
238 );
239
240 EXPECT_BROKEN_ZIMFILE(
241 "./data/invalid.outofbounds_first_title_entry.zim",
242 "Invalid title index entry\n"
243 );
244
245 EXPECT_BROKEN_ZIMFILE(
246 "./data/invalid.outofbounds_last_title_entry.zim",
247 "Invalid title index entry\n"
248 );
249
250 EXPECT_BROKEN_ZIMFILE(
251 "./data/invalid.outofbounds_first_clusterptr.zim",
252 "Invalid cluster pointer\n"
253 );
254 }
255
TEST(ZimFile,multipart)256 TEST(ZimFile, multipart)
257 {
258 const zim::File zimfile1("./data/wikibooks_be_all_nopic_2017-02.zim");
259 const zim::File zimfile2("./data/wikibooks_be_all_nopic_2017-02_splitted.zim");
260 ASSERT_FALSE(zimfile1.is_multiPart());
261 ASSERT_TRUE (zimfile2.is_multiPart());
262
263 EXPECT_EQ(zimfile1.getFilesize(), zimfile2.getFilesize());
264 EXPECT_EQ(zimfile1.getCountClusters(), zimfile2.getCountClusters());
265 EXPECT_EQ(zimfile1.getNamespaces(), zimfile2.getNamespaces());
266
267 ASSERT_EQ(zimfile1.getCountArticles(), zimfile2.getCountArticles());
268
269 ASSERT_EQ(118, zimfile1.getCountArticles()); // ==> below loop is not a noop
270 for ( zim::article_index_type i = 0; i < zimfile1.getCountArticles(); ++i ) {
271 zim::Article article1 = zimfile1.getArticle(i);
272 zim::Article article2 = zimfile2.getArticle(i);
273 ASSERT_EQ(i, article1.getIndex());
274 ASSERT_EQ(i, article2.getIndex());
275 ASSERT_EQ(article1.getClusterNumber(), article2.getClusterNumber());
276 ASSERT_EQ(article1.getOffset(), article2.getOffset());
277 ASSERT_EQ(article1.getParameter(), article2.getParameter());
278 ASSERT_EQ(article1.getTitle(), article2.getTitle());
279 ASSERT_EQ(article1.getUrl(), article2.getUrl());
280 ASSERT_EQ(article1.getLongUrl(), article2.getLongUrl());
281 ASSERT_EQ(article1.getLibraryMimeType(), article2.getLibraryMimeType());
282 ASSERT_EQ(article1.isRedirect(), article2.isRedirect());
283 ASSERT_EQ(article1.isLinktarget(), article2.isLinktarget());
284 ASSERT_EQ(article1.isDeleted(), article2.isDeleted());
285 ASSERT_EQ(article1.getNamespace(), article2.getNamespace());
286 ASSERT_EQ(article1.getArticleSize(), article2.getArticleSize());
287 ASSERT_EQ(article1.getData(), article2.getData());
288 if ( !article1.isRedirect() && ! article1.isLinktarget() && !article1.isLinktarget() ) {
289 ASSERT_EQ(article1.getPage(true, 5), article2.getPage(true, 5));
290 ASSERT_EQ(article1.getPage(false, 5), article2.getPage(false, 5));
291 }
292 ASSERT_EQ(zimfile1.getArticleByTitle(i).getIndex(),
293 zimfile2.getArticleByTitle(i).getIndex()
294 );
295 ASSERT_EQ(zimfile1.getArticleByClusterOrder(i).getIndex(),
296 zimfile2.getArticleByClusterOrder(i).getIndex()
297 );
298 }
299 }
300
301 } // unnamed namespace
302