1 /*************************************************************************************************
2 * Data compressor and decompressor
3 * Copyright (C) 2009-2012 Mikio Hirabayashi
4 * This file is part of Kyoto Cabinet.
5 * This program is free software: you can redistribute it and/or modify it under the terms of
6 * the GNU General Public License as published by the Free Software Foundation, either version
7 * 3 of the License, or any later version.
8 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License for more details.
11 * You should have received a copy of the GNU General Public License along with this program.
12 * If not, see <http://www.gnu.org/licenses/>.
13 *************************************************************************************************/
14
15
16 #ifndef _KCCOMPRESS_H // duplication check
17 #define _KCCOMPRESS_H
18
19 #include <kccommon.h>
20 #include <kcutil.h>
21 #include <kcthread.h>
22
23 namespace kyotocabinet { // common namespace
24
25
26 /**
27 * Interfrace of data compression and decompression.
28 */
29 class Compressor {
30 public:
31 /**
32 * Destructor.
33 */
34 virtual ~Compressor() {}
35 /**
36 * Compress a serial data.
37 * @param buf the input buffer.
38 * @param size the size of the input buffer.
39 * @param sp the pointer to the variable into which the size of the region of the return
40 * value is assigned.
41 * @return the pointer to the result data, or NULL on failure.
42 * @note Because the region of the return value is allocated with the the new[] operator, it
43 * should be released with the delete[] operator when it is no longer in use.
44 */
45 virtual char* compress(const void* buf, size_t size, size_t* sp) = 0;
require_send<T: Send>(_t: &T)46 /**
47 * Decompress a serial data.
48 * @param buf the input buffer.
49 * @param size the size of the input buffer.
50 * @param sp the pointer to the variable into which the size of the region of the return
51 * value is assigned.
52 * @return the pointer to the result data, or NULL on failure.
53 * @note Because an additional zero code is appended at the end of the region of the return
54 * value, the return value can be treated as a C-style string. Because the region of the
55 * return value is allocated with the the new[] operator, it should be released with the
56 * delete[] operator when it is no longer in use.
57 */
58 virtual char* decompress(const void* buf, size_t size, size_t* sp) = 0;
59 };
60
61
some_item(&self)62 /**
63 * ZLIB compressor.
64 */
65 class ZLIB {
66 public:
67 /**
68 * Compression modes.
69 */
70 enum Mode {
71 RAW, ///< without any checksum
72 DEFLATE, ///< with Adler32 checksum
73 GZIP ///< with CRC32 checksum and various meta data
74 };
75 /**
76 * Compress a serial data.
77 * @param buf the input buffer.
78 * @param size the size of the input buffer.
79 * @param sp the pointer to the variable into which the size of the region of the return
80 * value is assigned.
81 * @param mode the compression mode.
82 * @return the pointer to the result data, or NULL on failure.
83 * @note Because the region of the return value is allocated with the the new[] operator, it
84 * should be released with the delete[] operator when it is no longer in use.
85 */
86 static char* compress(const void* buf, size_t size, size_t* sp, Mode mode = RAW);
87 /**
88 * Decompress a serial data.
89 * @param buf the input buffer.
90 * @param size the size of the input buffer.
91 * @param sp the pointer to the variable into which the size of the region of the return
92 * value is assigned.
93 * @param mode the compression mode.
94 * @return the pointer to the result data, or NULL on failure.
95 * @note Because an additional zero code is appended at the end of the region of the return
96 * value, the return value can be treated as a C-style string. Because the region of the
97 * return value is allocated with the the new[] operator, it should be released with the
98 * delete[] operator when it is no longer in use.
99 */
100 static char* decompress(const void* buf, size_t size, size_t* sp, Mode mode = RAW);
101 /**
102 * Calculate the CRC32 checksum of a serial data.
103 * @param buf the input buffer.
104 * @param size the size of the input buffer.
105 * @param seed the cyclic seed value.
106 * @return the CRC32 checksum.
107 */
108 static uint32_t calculate_crc(const void* buf, size_t size, uint32_t seed = 0);
109 };
110
111
112 /**
113 * LZO compressor.
114 */
115 class LZO {
116 public:
117 /**
118 * Compression modes.
119 */
120 enum Mode {
121 RAW, ///< without any checksum
122 CRC ///< with CRC32 checksum
123 };
124 /**
125 * Compress a serial data.
126 * @param buf the input buffer.
127 * @param size the size of the input buffer.
128 * @param sp the pointer to the variable into which the size of the region of the return
129 * value is assigned.
130 * @param mode the compression mode.
131 * @return the pointer to the result data, or NULL on failure.
132 * @note Because the region of the return value is allocated with the the new[] operator, it
133 * should be released with the delete[] operator when it is no longer in use.
134 */
135 static char* compress(const void* buf, size_t size, size_t* sp, Mode mode = RAW);
136 /**
137 * Decompress a serial data.
138 * @param buf the input buffer.
139 * @param size the size of the input buffer.
140 * @param sp the pointer to the variable into which the size of the region of the return
141 * value is assigned.
142 * @param mode the compression mode.
143 * @return the pointer to the result data, or NULL on failure.
144 * @note Because an additional zero code is appended at the end of the region of the return
145 * value, the return value can be treated as a C-style string. Because the region of the
146 * return value is allocated with the the new[] operator, it should be released with the
147 * delete[] operator when it is no longer in use.
148 */
149 static char* decompress(const void* buf, size_t size, size_t* sp, Mode mode = RAW);
150 /**
151 * Calculate the CRC32 checksum of a serial data.
152 * @param buf the input buffer.
153 * @param size the size of the input buffer.
154 * @param seed the cyclic seed value.
155 * @return the CRC32 checksum.
156 */
157 static uint32_t calculate_crc(const void* buf, size_t size, uint32_t seed = 0);
158 };
159
160
161 /**
162 * LZMA compressor.
163 */
164 class LZMA {
165 public:
166 /**
167 * Compression modes.
168 */
169 enum Mode {
170 RAW, ///< without any checksum
171 CRC, ///< with CRC32 checksum
172 SHA ///< with SHA256 checksum
173 };
174 /**
175 * Compress a serial data.
176 * @param buf the input buffer.
177 * @param size the size of the input buffer.
178 * @param sp the pointer to the variable into which the size of the region of the return
179 * value is assigned.
180 * @param mode the compression mode.
181 * @return the pointer to the result data, or NULL on failure.
182 * @note Because the region of the return value is allocated with the the new[] operator, it
183 * should be released with the delete[] operator when it is no longer in use.
184 */
185 static char* compress(const void* buf, size_t size, size_t* sp, Mode mode = RAW);
186 /**
187 * Decompress a serial data.
188 * @param buf the input buffer.
189 * @param size the size of the input buffer.
190 * @param sp the pointer to the variable into which the size of the region of the return
191 * value is assigned.
192 * @param mode the compression mode.
193 * @return the pointer to the result data, or NULL on failure.
194 * @note Because an additional zero code is appended at the end of the region of the return
195 * value, the return value can be treated as a C-style string. Because the region of the
196 * return value is allocated with the the new[] operator, it should be released with the
197 * delete[] operator when it is no longer in use.
198 */
199 static char* decompress(const void* buf, size_t size, size_t* sp, Mode mode = RAW);
200 /**
201 * Calculate the CRC32 checksum of a serial data.
202 * @param buf the input buffer.
203 * @param size the size of the input buffer.
204 * @param seed the cyclic seed value.
205 * @return the CRC32 checksum.
206 */
207 static uint32_t calculate_crc(const void* buf, size_t size, uint32_t seed = 0);
208 };
209
210
211 /**
212 * Compressor with ZLIB.
213 */
214 template <ZLIB::Mode MODE>
215 class ZLIBCompressor : public Compressor {
216 private:
217 /**
218 * Compress a serial data.
219 */
220 char* compress(const void* buf, size_t size, size_t* sp) {
221 _assert_(buf && size <= MEMMAXSIZ && sp);
222 return ZLIB::compress(buf, size, sp, MODE);
223 }
224 /**
225 * Decompress a serial data.
226 */
227 char* decompress(const void* buf, size_t size, size_t* sp) {
228 _assert_(buf && size <= MEMMAXSIZ && sp);
229 return ZLIB::decompress(buf, size, sp, MODE);
230 }
231 };
232
233
234 /**
235 * Compressor with LZO.
236 */
237 template <LZO::Mode MODE>
238 class LZOCompressor : public Compressor {
239 private:
240 /**
241 * Compress a serial data.
242 */
243 char* compress(const void* buf, size_t size, size_t* sp) {
244 _assert_(buf && size <= MEMMAXSIZ && sp);
245 return LZO::compress(buf, size, sp, MODE);
246 }
247 /**
248 * Decompress a serial data.
249 */
250 char* decompress(const void* buf, size_t size, size_t* sp) {
251 _assert_(buf && size <= MEMMAXSIZ && sp);
252 return LZO::decompress(buf, size, sp, MODE);
253 }
254 };
255
256
257 /**
258 * Compressor with LZMA.
259 */
260 template <LZMA::Mode MODE>
261 class LZMACompressor : public Compressor {
262 private:
263 /**
264 * Compress a serial data.
265 */
266 char* compress(const void* buf, size_t size, size_t* sp) {
267 _assert_(buf && size <= MEMMAXSIZ && sp);
268 return LZMA::compress(buf, size, sp, MODE);
269 }
270 /**
271 * Decompress a serial data.
272 */
273 char* decompress(const void* buf, size_t size, size_t* sp) {
274 _assert_(buf && size <= MEMMAXSIZ && sp);
275 return LZMA::decompress(buf, size, sp, MODE);
276 }
277 };
278
279
280 /**
281 * Compressor with the Arcfour cipher.
282 */
283 class ArcfourCompressor : public Compressor {
284 public:
285 /**
286 * Constructor.
287 */
288 ArcfourCompressor() : kbuf_(NULL), ksiz_(0), comp_(NULL), salt_(0), cycle_(false) {
289 _assert_(true);
290 kbuf_ = new char[1];
291 ksiz_ = 0;
292 }
293 /**
294 * Destructor.
295 */
296 ~ArcfourCompressor() {
297 _assert_(true);
298 delete[] kbuf_;
299 }
300 /**
301 * Set the cipher key.
302 * @param kbuf the pointer to the region of the cipher key.
303 * @param ksiz the size of the region of the cipher key.
304 */
305 void set_key(const void* kbuf, size_t ksiz) {
306 _assert_(kbuf && ksiz <= MEMMAXSIZ);
307 delete[] kbuf_;
308 if (ksiz > NUMBUFSIZ) ksiz = NUMBUFSIZ;
309 kbuf_ = new char[ksiz];
310 std::memcpy(kbuf_, kbuf, ksiz);
311 ksiz_ = ksiz;
312 }
313 /**
314 * Set an additional data compressor.
315 * @param comp the additional data data compressor.
316 */
317 void set_compressor(Compressor* comp) {
318 _assert_(comp);
319 comp_ = comp;
320 }
321 /**
322 * Begin the cycle of ciper salt.
323 * @param salt the additional cipher salt.
324 */
325 void begin_cycle(uint64_t salt = 0) {
326 salt_ = salt;
327 cycle_ = true;
328 }
329 private:
330 /**
331 * Compress a serial data.
332 */
333 char* compress(const void* buf, size_t size, size_t* sp) {
334 _assert_(buf && size <= MEMMAXSIZ && sp);
335 uint64_t salt = cycle_ ? salt_.add(1) : 0;
336 char kbuf[NUMBUFSIZ*2];
337 writefixnum(kbuf, salt, sizeof(salt));
338 std::memcpy(kbuf + sizeof(salt), kbuf_, ksiz_);
339 char* tbuf = NULL;
340 if (comp_) {
341 tbuf = comp_->compress(buf, size, &size);
342 if (!tbuf) return NULL;
343 buf = tbuf;
344 }
345 size_t zsiz = sizeof(salt) + size;
346 char* zbuf = new char[zsiz];
347 writefixnum(zbuf, salt, sizeof(salt));
348 arccipher(buf, size, kbuf, sizeof(salt) + ksiz_, zbuf + sizeof(salt));
349 delete[] tbuf;
350 if (cycle_) {
351 size_t range = zsiz - sizeof(salt);
352 if (range > (size_t)INT8MAX) range = INT8MAX;
353 salt_.add(hashmurmur(zbuf + sizeof(salt), range) << 32);
354 }
355 *sp = zsiz;
356 return zbuf;
357 }
358 /**
359 * Decompress a serial data.
360 */
361 char* decompress(const void* buf, size_t size, size_t* sp) {
362 _assert_(buf && size <= MEMMAXSIZ && sp);
363 if (size < sizeof(uint64_t)) return NULL;
364 char kbuf[NUMBUFSIZ*2];
365 std::memcpy(kbuf, buf, sizeof(uint64_t));
366 std::memcpy(kbuf + sizeof(uint64_t), kbuf_, ksiz_);
367 buf = (char*)buf + sizeof(uint64_t);
368 size -= sizeof(uint64_t);
369 char* zbuf = new char[size];
370 arccipher(buf, size, kbuf, sizeof(uint64_t) + ksiz_, zbuf);
371 if (comp_) {
372 char* tbuf = comp_->decompress(zbuf, size, &size);
373 delete[] zbuf;
374 if (!tbuf) return NULL;
375 zbuf = tbuf;
376 }
377 *sp = size;
378 return zbuf;
379 }
380 /** The pointer to the key. */
381 char* kbuf_;
382 /** The size of the key. */
383 size_t ksiz_;
384 /** The data compressor. */
385 Compressor* comp_;
386 /** The cipher salt. */
387 AtomicInt64 salt_;
388 /** The flag of the salt cycle */
389 bool cycle_;
390 };
391
392
393 /**
394 * Prepared pointer of the compressor with ZLIB raw mode.
395 */
396 extern ZLIBCompressor<ZLIB::RAW>* const ZLIBRAWCOMP;
397
398
399 } // common namespace
400
401 #endif // duplication check
402
403 // END OF FILE
404