1 /*
2  * Copyright (C) 2009 Tommi Maekitalo
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
11  * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
12  * NON-INFRINGEMENT.  See the GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
17  *
18  */
19 
20 #include <algorithm>
21 #include <cstdio>
22 #include <cstring>
23 #include <fstream>
24 #include <memory>
25 #include <sstream>
26 #include <stdexcept>
27 #if defined(_MSC_VER)
28 # include <BaseTsd.h>
29   typedef SSIZE_T ssize_t;
30 #else
31 # include <unistd.h>
32 #endif
33 
34 #ifdef _WIN32
35 #include <windows.h>
36 #include <fcntl.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <io.h>
40 #include <fileapi.h>
41 #undef min
42 #undef max
43 #endif
44 
45 #include "gtest/gtest.h"
46 
47 #include <zim/zim.h>
48 
49 #include "../src/buffer.h"
50 #include "../src/cluster.h"
51 #include "../src/file_part.h"
52 #include "../src/file_compound.h"
53 #include "../src/buffer_reader.h"
54 #include "../src/writer/cluster.h"
55 #include "../src/endian_tools.h"
56 #include "../src/config.h"
57 
58 #include "tools.h"
59 
60 namespace
61 {
62 
63 using zim::unittests::TempFile;
64 using zim::unittests::write_to_buffer;
65 
TEST(ClusterTest,create_cluster)66 TEST(ClusterTest, create_cluster)
67 {
68   zim::writer::Cluster cluster(zim::zimcompNone);
69 
70   ASSERT_EQ(cluster.count().v, 0U);
71 
72   std::string blob0("123456789012345678901234567890");
73   std::string blob1("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
74   std::string blob2("abcdefghijklmnopqrstuvwxyz");
75 
76   cluster.addData(blob0.data(), zim::zsize_t(blob0.size()));
77   cluster.addData(blob1.data(), zim::zsize_t(blob1.size()));
78   cluster.addData(blob2.data(), zim::zsize_t(blob2.size()));
79 
80   ASSERT_EQ(cluster.count().v, 3U);
81   ASSERT_EQ(cluster.getBlobSize(zim::blob_index_t(0)).v, blob0.size());
82   ASSERT_EQ(cluster.getBlobSize(zim::blob_index_t(1)).v, blob1.size());
83   ASSERT_EQ(cluster.getBlobSize(zim::blob_index_t(2)).v, blob2.size());
84 }
85 
TEST(ClusterTest,read_write_cluster)86 TEST(ClusterTest, read_write_cluster)
87 {
88   zim::writer::Cluster cluster(zim::zimcompNone);
89 
90   std::string blob0("123456789012345678901234567890");
91   std::string blob1("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
92   std::string blob2("abcdefghijklmnop vwxyz");
93 
94   cluster.addData(blob0.data(), zim::zsize_t(blob0.size()));
95   cluster.addData(blob1.data(), zim::zsize_t(blob1.size()));
96   cluster.addData(blob2.data(), zim::zsize_t(blob2.size()));
97 
98   cluster.close();
99   auto buffer = write_to_buffer(cluster);
100   const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0));
101   zim::Cluster& cluster2 = *cluster2shptr;
102   ASSERT_EQ(cluster2.getCompression(), zim::zimcompNone);
103   ASSERT_EQ(cluster2.isExtended, false);
104   ASSERT_EQ(cluster2.count().v, 3U);
105   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(0)).v, blob0.size());
106   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(1)).v, blob1.size());
107   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(2)).v, blob2.size());
108 }
109 
TEST(ClusterTest,read_write_empty)110 TEST(ClusterTest, read_write_empty)
111 {
112   zim::writer::Cluster cluster(zim::zimcompNone);
113 
114   cluster.addData(0, zim::zsize_t(0));
115   cluster.addData(0, zim::zsize_t(0));
116   cluster.addData(0, zim::zsize_t(0));
117 
118   cluster.close();
119   auto buffer = write_to_buffer(cluster);
120   const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0));
121   zim::Cluster& cluster2 = *cluster2shptr;
122   ASSERT_EQ(cluster2.getCompression(), zim::zimcompNone);
123   ASSERT_EQ(cluster2.isExtended, false);
124   ASSERT_EQ(cluster2.count().v, 3U);
125   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(0)).v, 0U);
126   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(1)).v, 0U);
127   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(2)).v, 0U);
128 }
129 
TEST(ClusterTest,read_write_clusterLzma)130 TEST(ClusterTest, read_write_clusterLzma)
131 {
132   zim::writer::Cluster cluster(zim::zimcompLzma);
133 
134   std::string blob0("123456789012345678901234567890");
135   std::string blob1("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
136   std::string blob2("abcdefghijklmnopqrstuvwxyz");
137 
138   cluster.addData(blob0.data(), zim::zsize_t(blob0.size()));
139   cluster.addData(blob1.data(), zim::zsize_t(blob1.size()));
140   cluster.addData(blob2.data(), zim::zsize_t(blob2.size()));
141 
142   cluster.close();
143   auto buffer = write_to_buffer(cluster);
144   const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0));
145   zim::Cluster& cluster2 = *cluster2shptr;
146   ASSERT_EQ(cluster2.isExtended, false);
147   ASSERT_EQ(cluster2.count().v, 3U);
148   ASSERT_EQ(cluster2.getCompression(), zim::zimcompLzma);
149   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(0)).v, blob0.size());
150   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(1)).v, blob1.size());
151   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(2)).v, blob2.size());
152   ASSERT_EQ(blob0, std::string(cluster2.getBlob(zim::blob_index_t(0))));
153   ASSERT_EQ(blob1, std::string(cluster2.getBlob(zim::blob_index_t(1))));
154   ASSERT_EQ(blob2, std::string(cluster2.getBlob(zim::blob_index_t(2))));
155 }
156 
TEST(ClusterTest,read_write_clusterZstd)157 TEST(ClusterTest, read_write_clusterZstd)
158 {
159   zim::writer::Cluster cluster(zim::zimcompZstd);
160 
161   std::string blob0("123456789012345678901234567890");
162   std::string blob1("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
163   std::string blob2("abcdefghijklmnopqrstuvwxyz");
164 
165   cluster.addData(blob0.data(), zim::zsize_t(blob0.size()));
166   cluster.addData(blob1.data(), zim::zsize_t(blob1.size()));
167   cluster.addData(blob2.data(), zim::zsize_t(blob2.size()));
168 
169   cluster.close();
170   auto buffer = write_to_buffer(cluster);
171   const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0));
172   zim::Cluster& cluster2 = *cluster2shptr;
173   ASSERT_EQ(cluster2.isExtended, false);
174   ASSERT_EQ(cluster2.count().v, 3U);
175   ASSERT_EQ(cluster2.getCompression(), zim::zimcompZstd);
176   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(0)).v, blob0.size());
177   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(1)).v, blob1.size());
178   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(2)).v, blob2.size());
179   ASSERT_EQ(blob0, std::string(cluster2.getBlob(zim::blob_index_t(0))));
180   ASSERT_EQ(blob1, std::string(cluster2.getBlob(zim::blob_index_t(1))));
181   ASSERT_EQ(blob2, std::string(cluster2.getBlob(zim::blob_index_t(2))));
182 }
183 
184 #if !defined(__APPLE__)
TEST(ClusterTest,read_write_extended_cluster)185 TEST(ClusterTest, read_write_extended_cluster)
186 {
187   //zim::writer doesn't suport 32 bits architectures.
188   if (SIZE_MAX == UINT32_MAX) {
189     return;
190   }
191 
192   char* SKIP_BIG_MEMORY_TEST = std::getenv("SKIP_BIG_MEMORY_TEST");
193   if (SKIP_BIG_MEMORY_TEST != nullptr && std::string(SKIP_BIG_MEMORY_TEST) == "1") {
194     return;
195   }
196 
197   // MEM = 0
198   std::string blob0("123456789012345678901234567890");
199   std::string blob1("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
200   std::string blob2("abcdefghijklmnopqrstuvwxyz");
201   zim::size_type bigger_than_4g = 1024LL*1024LL*1024LL*4LL+1024LL;
202 
203   auto buffer = zim::Buffer::makeBuffer(nullptr, zim::zsize_t(0));
204   {
205     char* blob3 = nullptr;
206     try {
207       blob3 = new char[bigger_than_4g];
208       // MEM = 4GiB
209     } catch (std::bad_alloc& e) {
210       // Not enough memory, we cannot test cluster bigger than 4Go :(
211       return;
212     }
213 
214     {
215       zim::writer::Cluster cluster(zim::zimcompNone);
216       cluster.addData(blob0.data(), zim::zsize_t(blob0.size()));
217       cluster.addData(blob1.data(), zim::zsize_t(blob1.size()));
218       cluster.addData(blob2.data(), zim::zsize_t(blob2.size()));
219       try {
220         cluster.addData(blob3, zim::zsize_t(bigger_than_4g));
221         // MEM = 8GiB
222       } catch (std::bad_alloc& e) {
223         // Not enough memory, we cannot test cluster bigger than 4Go :(
224         delete[] blob3;
225         return;
226       }
227       ASSERT_EQ(cluster.is_extended(), true);
228 
229       delete[] blob3;
230       // MEM = 4GiB
231 
232       cluster.close();
233       buffer = write_to_buffer(cluster);
234     }
235   }
236   const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0));
237   zim::Cluster& cluster2 = *cluster2shptr;
238   ASSERT_EQ(cluster2.isExtended, true);
239   ASSERT_EQ(cluster2.count().v, 4U);
240   ASSERT_EQ(cluster2.getCompression(), zim::zimcompNone);
241   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(0)).v, blob0.size());
242   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(1)).v, blob1.size());
243   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(2)).v, blob2.size());
244   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(3)).v, bigger_than_4g);
245   ASSERT_EQ(blob0, std::string(cluster2.getBlob(zim::blob_index_t(0))));
246   ASSERT_EQ(blob1, std::string(cluster2.getBlob(zim::blob_index_t(1))));
247   ASSERT_EQ(blob2, std::string(cluster2.getBlob(zim::blob_index_t(2))));
248 }
249 #endif
250 
TEST(ClusterTest,read_extended_cluster)251 TEST(ClusterTest, read_extended_cluster)
252 {
253   std::FILE* tmpfile = std::tmpfile();
254   int fd = fileno(tmpfile);
255   ssize_t bytes_written;
256 
257   std::string blob0("123456789012345678901234567890");
258   std::string blob1("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
259   std::string blob2("abcdefghijklmnopqrstuvwxyz");
260 
261   zim::size_type bigger_than_4g = 1024LL*1024LL*1024LL*4LL+1024LL;
262 
263   zim::offset_type offset = 5*sizeof(uint64_t);
264 
265   char a = 0x11;
266   bytes_written = write(fd, &a, 1);
267 
268   char out_buf[sizeof(uint64_t)];
269 
270   zim::toLittleEndian(offset, out_buf);
271   bytes_written = write(fd, out_buf, sizeof(uint64_t));
272 
273   offset += blob0.size();
274   zim::toLittleEndian(offset, out_buf);
275   bytes_written = write(fd, out_buf, sizeof(uint64_t));
276 
277   offset += blob1.size();
278   zim::toLittleEndian(offset, out_buf);
279   bytes_written = write(fd, out_buf, sizeof(uint64_t));
280 
281   offset += blob2.size();
282   zim::toLittleEndian(offset, out_buf);
283   bytes_written = write(fd, out_buf, sizeof(uint64_t));
284 
285   offset += bigger_than_4g;
286   zim::toLittleEndian(offset, out_buf);
287   bytes_written = write(fd, out_buf, sizeof(uint64_t));
288 
289   bytes_written = write(fd, blob0.c_str(), blob0.size());
290   ASSERT_EQ(bytes_written, (ssize_t)blob0.size());
291 
292   bytes_written = write(fd, blob1.c_str(), blob1.size());
293   ASSERT_EQ(bytes_written, (ssize_t)blob1.size());
294 
295   bytes_written = write(fd, blob2.c_str(), blob2.size());
296   ASSERT_EQ(bytes_written, (ssize_t)blob2.size());
297 
298 #ifdef _WIN32
299 # define LSEEK _lseeki64
300 #else
301 # define LSEEK lseek
302 #endif
303   LSEEK(fd , bigger_than_4g-1, SEEK_CUR);
304 #undef LSEEK
305 //  std::fseek(tmpfile, bigger_than_4g-1, SEEK_CUR);
306   a = '\0';
307   bytes_written = write(fd, &a, 1);
308   fflush(tmpfile);
309 
310   auto filePart = new zim::FilePart<>(fileno(tmpfile));
311   auto fileCompound = std::shared_ptr<zim::FileCompound>(new zim::FileCompound(filePart));
312   const auto cluster2shptr = zim::Cluster::read(zim::FileReader(fileCompound), zim::offset_t(0));
313   zim::Cluster& cluster2 = *cluster2shptr;
314   ASSERT_EQ(cluster2.isExtended, true);
315   ASSERT_EQ(cluster2.count().v, 4U);
316   ASSERT_EQ(cluster2.getCompression(), zim::zimcompNone);
317   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(0)).v, blob0.size());
318   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(1)).v, blob1.size());
319   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(2)).v, blob2.size());
320   ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(3)).v, bigger_than_4g);
321 
322 
323   ASSERT_EQ(blob0, std::string(cluster2.getBlob(zim::blob_index_t(0))));
324   ASSERT_EQ(blob1, std::string(cluster2.getBlob(zim::blob_index_t(1))));
325   ASSERT_EQ(blob2, std::string(cluster2.getBlob(zim::blob_index_t(2))));
326 
327   const zim::Blob b = cluster2.getBlob(zim::blob_index_t(3));
328   if (SIZE_MAX == UINT32_MAX) {
329     ASSERT_EQ(b.data(), nullptr);
330     ASSERT_EQ(b.size(), 0U);
331   } else {
332     ASSERT_EQ(b.size(), bigger_than_4g);
333   }
334 
335   fclose(tmpfile);
336 }
337 
338 
339 }  // namespace
340