1 /*
2 * Copyright (C) 2009 Tommi Maekitalo
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
11 * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
12 * NON-INFRINGEMENT. See the GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 *
18 */
19
20 #include <algorithm>
21 #include <cstdio>
22 #include <cstring>
23 #include <fstream>
24 #include <memory>
25 #include <sstream>
26 #include <stdexcept>
27 #if defined(_MSC_VER)
28 # include <BaseTsd.h>
29 typedef SSIZE_T ssize_t;
30 #else
31 # include <unistd.h>
32 #endif
33
34 #ifdef _WIN32
35 #include <windows.h>
36 #include <fcntl.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <io.h>
40 #include <fileapi.h>
41 #undef min
42 #undef max
43 #endif
44
45 #include "gtest/gtest.h"
46
47 #include <zim/zim.h>
48
49 #include "../src/buffer.h"
50 #include "../src/cluster.h"
51 #include "../src/file_part.h"
52 #include "../src/file_compound.h"
53 #include "../src/buffer_reader.h"
54 #include "../src/writer/cluster.h"
55 #include "../src/endian_tools.h"
56 #include "../src/config.h"
57
58 #include "tools.h"
59
60 namespace
61 {
62
63 using zim::unittests::TempFile;
64 using zim::unittests::write_to_buffer;
65
TEST(ClusterTest,create_cluster)66 TEST(ClusterTest, create_cluster)
67 {
68 zim::writer::Cluster cluster(zim::zimcompNone);
69
70 ASSERT_EQ(cluster.count().v, 0U);
71
72 std::string blob0("123456789012345678901234567890");
73 std::string blob1("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
74 std::string blob2("abcdefghijklmnopqrstuvwxyz");
75
76 cluster.addData(blob0.data(), zim::zsize_t(blob0.size()));
77 cluster.addData(blob1.data(), zim::zsize_t(blob1.size()));
78 cluster.addData(blob2.data(), zim::zsize_t(blob2.size()));
79
80 ASSERT_EQ(cluster.count().v, 3U);
81 ASSERT_EQ(cluster.getBlobSize(zim::blob_index_t(0)).v, blob0.size());
82 ASSERT_EQ(cluster.getBlobSize(zim::blob_index_t(1)).v, blob1.size());
83 ASSERT_EQ(cluster.getBlobSize(zim::blob_index_t(2)).v, blob2.size());
84 }
85
TEST(ClusterTest,read_write_cluster)86 TEST(ClusterTest, read_write_cluster)
87 {
88 zim::writer::Cluster cluster(zim::zimcompNone);
89
90 std::string blob0("123456789012345678901234567890");
91 std::string blob1("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
92 std::string blob2("abcdefghijklmnop vwxyz");
93
94 cluster.addData(blob0.data(), zim::zsize_t(blob0.size()));
95 cluster.addData(blob1.data(), zim::zsize_t(blob1.size()));
96 cluster.addData(blob2.data(), zim::zsize_t(blob2.size()));
97
98 cluster.close();
99 auto buffer = write_to_buffer(cluster);
100 const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0));
101 zim::Cluster& cluster2 = *cluster2shptr;
102 ASSERT_EQ(cluster2.getCompression(), zim::zimcompNone);
103 ASSERT_EQ(cluster2.isExtended, false);
104 ASSERT_EQ(cluster2.count().v, 3U);
105 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(0)).v, blob0.size());
106 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(1)).v, blob1.size());
107 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(2)).v, blob2.size());
108 }
109
TEST(ClusterTest,read_write_empty)110 TEST(ClusterTest, read_write_empty)
111 {
112 zim::writer::Cluster cluster(zim::zimcompNone);
113
114 cluster.addData(0, zim::zsize_t(0));
115 cluster.addData(0, zim::zsize_t(0));
116 cluster.addData(0, zim::zsize_t(0));
117
118 cluster.close();
119 auto buffer = write_to_buffer(cluster);
120 const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0));
121 zim::Cluster& cluster2 = *cluster2shptr;
122 ASSERT_EQ(cluster2.getCompression(), zim::zimcompNone);
123 ASSERT_EQ(cluster2.isExtended, false);
124 ASSERT_EQ(cluster2.count().v, 3U);
125 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(0)).v, 0U);
126 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(1)).v, 0U);
127 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(2)).v, 0U);
128 }
129
TEST(ClusterTest,read_write_clusterLzma)130 TEST(ClusterTest, read_write_clusterLzma)
131 {
132 zim::writer::Cluster cluster(zim::zimcompLzma);
133
134 std::string blob0("123456789012345678901234567890");
135 std::string blob1("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
136 std::string blob2("abcdefghijklmnopqrstuvwxyz");
137
138 cluster.addData(blob0.data(), zim::zsize_t(blob0.size()));
139 cluster.addData(blob1.data(), zim::zsize_t(blob1.size()));
140 cluster.addData(blob2.data(), zim::zsize_t(blob2.size()));
141
142 cluster.close();
143 auto buffer = write_to_buffer(cluster);
144 const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0));
145 zim::Cluster& cluster2 = *cluster2shptr;
146 ASSERT_EQ(cluster2.isExtended, false);
147 ASSERT_EQ(cluster2.count().v, 3U);
148 ASSERT_EQ(cluster2.getCompression(), zim::zimcompLzma);
149 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(0)).v, blob0.size());
150 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(1)).v, blob1.size());
151 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(2)).v, blob2.size());
152 ASSERT_EQ(blob0, std::string(cluster2.getBlob(zim::blob_index_t(0))));
153 ASSERT_EQ(blob1, std::string(cluster2.getBlob(zim::blob_index_t(1))));
154 ASSERT_EQ(blob2, std::string(cluster2.getBlob(zim::blob_index_t(2))));
155 }
156
TEST(ClusterTest,read_write_clusterZstd)157 TEST(ClusterTest, read_write_clusterZstd)
158 {
159 zim::writer::Cluster cluster(zim::zimcompZstd);
160
161 std::string blob0("123456789012345678901234567890");
162 std::string blob1("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
163 std::string blob2("abcdefghijklmnopqrstuvwxyz");
164
165 cluster.addData(blob0.data(), zim::zsize_t(blob0.size()));
166 cluster.addData(blob1.data(), zim::zsize_t(blob1.size()));
167 cluster.addData(blob2.data(), zim::zsize_t(blob2.size()));
168
169 cluster.close();
170 auto buffer = write_to_buffer(cluster);
171 const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0));
172 zim::Cluster& cluster2 = *cluster2shptr;
173 ASSERT_EQ(cluster2.isExtended, false);
174 ASSERT_EQ(cluster2.count().v, 3U);
175 ASSERT_EQ(cluster2.getCompression(), zim::zimcompZstd);
176 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(0)).v, blob0.size());
177 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(1)).v, blob1.size());
178 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(2)).v, blob2.size());
179 ASSERT_EQ(blob0, std::string(cluster2.getBlob(zim::blob_index_t(0))));
180 ASSERT_EQ(blob1, std::string(cluster2.getBlob(zim::blob_index_t(1))));
181 ASSERT_EQ(blob2, std::string(cluster2.getBlob(zim::blob_index_t(2))));
182 }
183
184 #if !defined(__APPLE__)
TEST(ClusterTest,read_write_extended_cluster)185 TEST(ClusterTest, read_write_extended_cluster)
186 {
187 //zim::writer doesn't suport 32 bits architectures.
188 if (SIZE_MAX == UINT32_MAX) {
189 return;
190 }
191
192 char* SKIP_BIG_MEMORY_TEST = std::getenv("SKIP_BIG_MEMORY_TEST");
193 if (SKIP_BIG_MEMORY_TEST != nullptr && std::string(SKIP_BIG_MEMORY_TEST) == "1") {
194 return;
195 }
196
197 // MEM = 0
198 std::string blob0("123456789012345678901234567890");
199 std::string blob1("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
200 std::string blob2("abcdefghijklmnopqrstuvwxyz");
201 zim::size_type bigger_than_4g = 1024LL*1024LL*1024LL*4LL+1024LL;
202
203 auto buffer = zim::Buffer::makeBuffer(nullptr, zim::zsize_t(0));
204 {
205 char* blob3 = nullptr;
206 try {
207 blob3 = new char[bigger_than_4g];
208 // MEM = 4GiB
209 } catch (std::bad_alloc& e) {
210 // Not enough memory, we cannot test cluster bigger than 4Go :(
211 return;
212 }
213
214 {
215 zim::writer::Cluster cluster(zim::zimcompNone);
216 cluster.addData(blob0.data(), zim::zsize_t(blob0.size()));
217 cluster.addData(blob1.data(), zim::zsize_t(blob1.size()));
218 cluster.addData(blob2.data(), zim::zsize_t(blob2.size()));
219 try {
220 cluster.addData(blob3, zim::zsize_t(bigger_than_4g));
221 // MEM = 8GiB
222 } catch (std::bad_alloc& e) {
223 // Not enough memory, we cannot test cluster bigger than 4Go :(
224 delete[] blob3;
225 return;
226 }
227 ASSERT_EQ(cluster.is_extended(), true);
228
229 delete[] blob3;
230 // MEM = 4GiB
231
232 cluster.close();
233 buffer = write_to_buffer(cluster);
234 }
235 }
236 const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0));
237 zim::Cluster& cluster2 = *cluster2shptr;
238 ASSERT_EQ(cluster2.isExtended, true);
239 ASSERT_EQ(cluster2.count().v, 4U);
240 ASSERT_EQ(cluster2.getCompression(), zim::zimcompNone);
241 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(0)).v, blob0.size());
242 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(1)).v, blob1.size());
243 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(2)).v, blob2.size());
244 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(3)).v, bigger_than_4g);
245 ASSERT_EQ(blob0, std::string(cluster2.getBlob(zim::blob_index_t(0))));
246 ASSERT_EQ(blob1, std::string(cluster2.getBlob(zim::blob_index_t(1))));
247 ASSERT_EQ(blob2, std::string(cluster2.getBlob(zim::blob_index_t(2))));
248 }
249 #endif
250
TEST(ClusterTest,read_extended_cluster)251 TEST(ClusterTest, read_extended_cluster)
252 {
253 std::FILE* tmpfile = std::tmpfile();
254 int fd = fileno(tmpfile);
255 ssize_t bytes_written;
256
257 std::string blob0("123456789012345678901234567890");
258 std::string blob1("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
259 std::string blob2("abcdefghijklmnopqrstuvwxyz");
260
261 zim::size_type bigger_than_4g = 1024LL*1024LL*1024LL*4LL+1024LL;
262
263 zim::offset_type offset = 5*sizeof(uint64_t);
264
265 char a = 0x11;
266 bytes_written = write(fd, &a, 1);
267
268 char out_buf[sizeof(uint64_t)];
269
270 zim::toLittleEndian(offset, out_buf);
271 bytes_written = write(fd, out_buf, sizeof(uint64_t));
272
273 offset += blob0.size();
274 zim::toLittleEndian(offset, out_buf);
275 bytes_written = write(fd, out_buf, sizeof(uint64_t));
276
277 offset += blob1.size();
278 zim::toLittleEndian(offset, out_buf);
279 bytes_written = write(fd, out_buf, sizeof(uint64_t));
280
281 offset += blob2.size();
282 zim::toLittleEndian(offset, out_buf);
283 bytes_written = write(fd, out_buf, sizeof(uint64_t));
284
285 offset += bigger_than_4g;
286 zim::toLittleEndian(offset, out_buf);
287 bytes_written = write(fd, out_buf, sizeof(uint64_t));
288
289 bytes_written = write(fd, blob0.c_str(), blob0.size());
290 ASSERT_EQ(bytes_written, (ssize_t)blob0.size());
291
292 bytes_written = write(fd, blob1.c_str(), blob1.size());
293 ASSERT_EQ(bytes_written, (ssize_t)blob1.size());
294
295 bytes_written = write(fd, blob2.c_str(), blob2.size());
296 ASSERT_EQ(bytes_written, (ssize_t)blob2.size());
297
298 #ifdef _WIN32
299 # define LSEEK _lseeki64
300 #else
301 # define LSEEK lseek
302 #endif
303 LSEEK(fd , bigger_than_4g-1, SEEK_CUR);
304 #undef LSEEK
305 // std::fseek(tmpfile, bigger_than_4g-1, SEEK_CUR);
306 a = '\0';
307 bytes_written = write(fd, &a, 1);
308 fflush(tmpfile);
309
310 auto filePart = new zim::FilePart<>(fileno(tmpfile));
311 auto fileCompound = std::shared_ptr<zim::FileCompound>(new zim::FileCompound(filePart));
312 const auto cluster2shptr = zim::Cluster::read(zim::FileReader(fileCompound), zim::offset_t(0));
313 zim::Cluster& cluster2 = *cluster2shptr;
314 ASSERT_EQ(cluster2.isExtended, true);
315 ASSERT_EQ(cluster2.count().v, 4U);
316 ASSERT_EQ(cluster2.getCompression(), zim::zimcompNone);
317 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(0)).v, blob0.size());
318 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(1)).v, blob1.size());
319 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(2)).v, blob2.size());
320 ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(3)).v, bigger_than_4g);
321
322
323 ASSERT_EQ(blob0, std::string(cluster2.getBlob(zim::blob_index_t(0))));
324 ASSERT_EQ(blob1, std::string(cluster2.getBlob(zim::blob_index_t(1))));
325 ASSERT_EQ(blob2, std::string(cluster2.getBlob(zim::blob_index_t(2))));
326
327 const zim::Blob b = cluster2.getBlob(zim::blob_index_t(3));
328 if (SIZE_MAX == UINT32_MAX) {
329 ASSERT_EQ(b.data(), nullptr);
330 ASSERT_EQ(b.size(), 0U);
331 } else {
332 ASSERT_EQ(b.size(), bigger_than_4g);
333 }
334
335 fclose(tmpfile);
336 }
337
338
339 } // namespace
340