1 /*************************************************************************************************
2  * Data compressor and decompressor
3  *                                                      Copyright (C) 2009-2012 Mikio Hirabayashi
4  * This file is part of Kyoto Cabinet.
5  * This program is free software: you can redistribute it and/or modify it under the terms of
6  * the GNU General Public License as published by the Free Software Foundation, either version
7  * 3 of the License, or any later version.
8  * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10  * See the GNU General Public License for more details.
11  * You should have received a copy of the GNU General Public License along with this program.
12  * If not, see <http://www.gnu.org/licenses/>.
13  *************************************************************************************************/
14 
15 
16 #ifndef _KCCOMPRESS_H                    // duplication check
17 #define _KCCOMPRESS_H
18 
19 #include <kccommon.h>
20 #include <kcutil.h>
21 #include <kcthread.h>
22 
23 namespace kyotocabinet {                 // common namespace
24 
25 
26 /**
27  * Interfrace of data compression and decompression.
28  */
29 class Compressor {
30  public:
31   /**
32    * Destructor.
33    */
34   virtual ~Compressor() {}
35   /**
36    * Compress a serial data.
37    * @param buf the input buffer.
38    * @param size the size of the input buffer.
39    * @param sp the pointer to the variable into which the size of the region of the return
40    * value is assigned.
41    * @return the pointer to the result data, or NULL on failure.
42    * @note Because the region of the return value is allocated with the the new[] operator, it
43    * should be released with the delete[] operator when it is no longer in use.
44    */
45   virtual char* compress(const void* buf, size_t size, size_t* sp) = 0;
require_send<T: Send>(_t: &T)46   /**
47    * Decompress a serial data.
48    * @param buf the input buffer.
49    * @param size the size of the input buffer.
50    * @param sp the pointer to the variable into which the size of the region of the return
51    * value is assigned.
52    * @return the pointer to the result data, or NULL on failure.
53    * @note Because an additional zero code is appended at the end of the region of the return
54    * value, the return value can be treated as a C-style string.  Because the region of the
55    * return value is allocated with the the new[] operator, it should be released with the
56    * delete[] operator when it is no longer in use.
57    */
58   virtual char* decompress(const void* buf, size_t size, size_t* sp) = 0;
59 };
60 
61 
some_item(&self)62 /**
63  * ZLIB compressor.
64  */
65 class ZLIB {
66  public:
67   /**
68    * Compression modes.
69    */
70   enum Mode {
71     RAW,                                 ///< without any checksum
72     DEFLATE,                             ///< with Adler32 checksum
73     GZIP                                 ///< with CRC32 checksum and various meta data
74   };
75   /**
76    * Compress a serial data.
77    * @param buf the input buffer.
78    * @param size the size of the input buffer.
79    * @param sp the pointer to the variable into which the size of the region of the return
80    * value is assigned.
81    * @param mode the compression mode.
82    * @return the pointer to the result data, or NULL on failure.
83    * @note Because the region of the return value is allocated with the the new[] operator, it
84    * should be released with the delete[] operator when it is no longer in use.
85    */
86   static char* compress(const void* buf, size_t size, size_t* sp, Mode mode = RAW);
87   /**
88    * Decompress a serial data.
89    * @param buf the input buffer.
90    * @param size the size of the input buffer.
91    * @param sp the pointer to the variable into which the size of the region of the return
92    * value is assigned.
93    * @param mode the compression mode.
94    * @return the pointer to the result data, or NULL on failure.
95    * @note Because an additional zero code is appended at the end of the region of the return
96    * value, the return value can be treated as a C-style string.  Because the region of the
97    * return value is allocated with the the new[] operator, it should be released with the
98    * delete[] operator when it is no longer in use.
99    */
100   static char* decompress(const void* buf, size_t size, size_t* sp, Mode mode = RAW);
101   /**
102    * Calculate the CRC32 checksum of a serial data.
103    * @param buf the input buffer.
104    * @param size the size of the input buffer.
105    * @param seed the cyclic seed value.
106    * @return the CRC32 checksum.
107    */
108   static uint32_t calculate_crc(const void* buf, size_t size, uint32_t seed = 0);
109 };
110 
111 
112 /**
113  * LZO compressor.
114  */
115 class LZO {
116  public:
117   /**
118    * Compression modes.
119    */
120   enum Mode {
121     RAW,                                 ///< without any checksum
122     CRC                                  ///< with CRC32 checksum
123   };
124   /**
125    * Compress a serial data.
126    * @param buf the input buffer.
127    * @param size the size of the input buffer.
128    * @param sp the pointer to the variable into which the size of the region of the return
129    * value is assigned.
130    * @param mode the compression mode.
131    * @return the pointer to the result data, or NULL on failure.
132    * @note Because the region of the return value is allocated with the the new[] operator, it
133    * should be released with the delete[] operator when it is no longer in use.
134    */
135   static char* compress(const void* buf, size_t size, size_t* sp, Mode mode = RAW);
136   /**
137    * Decompress a serial data.
138    * @param buf the input buffer.
139    * @param size the size of the input buffer.
140    * @param sp the pointer to the variable into which the size of the region of the return
141    * value is assigned.
142    * @param mode the compression mode.
143    * @return the pointer to the result data, or NULL on failure.
144    * @note Because an additional zero code is appended at the end of the region of the return
145    * value, the return value can be treated as a C-style string.  Because the region of the
146    * return value is allocated with the the new[] operator, it should be released with the
147    * delete[] operator when it is no longer in use.
148    */
149   static char* decompress(const void* buf, size_t size, size_t* sp, Mode mode = RAW);
150   /**
151    * Calculate the CRC32 checksum of a serial data.
152    * @param buf the input buffer.
153    * @param size the size of the input buffer.
154    * @param seed the cyclic seed value.
155    * @return the CRC32 checksum.
156    */
157   static uint32_t calculate_crc(const void* buf, size_t size, uint32_t seed = 0);
158 };
159 
160 
161 /**
162  * LZMA compressor.
163  */
164 class LZMA {
165  public:
166   /**
167    * Compression modes.
168    */
169   enum Mode {
170     RAW,                                 ///< without any checksum
171     CRC,                                 ///< with CRC32 checksum
172     SHA                                  ///< with SHA256 checksum
173   };
174   /**
175    * Compress a serial data.
176    * @param buf the input buffer.
177    * @param size the size of the input buffer.
178    * @param sp the pointer to the variable into which the size of the region of the return
179    * value is assigned.
180    * @param mode the compression mode.
181    * @return the pointer to the result data, or NULL on failure.
182    * @note Because the region of the return value is allocated with the the new[] operator, it
183    * should be released with the delete[] operator when it is no longer in use.
184    */
185   static char* compress(const void* buf, size_t size, size_t* sp, Mode mode = RAW);
186   /**
187    * Decompress a serial data.
188    * @param buf the input buffer.
189    * @param size the size of the input buffer.
190    * @param sp the pointer to the variable into which the size of the region of the return
191    * value is assigned.
192    * @param mode the compression mode.
193    * @return the pointer to the result data, or NULL on failure.
194    * @note Because an additional zero code is appended at the end of the region of the return
195    * value, the return value can be treated as a C-style string.  Because the region of the
196    * return value is allocated with the the new[] operator, it should be released with the
197    * delete[] operator when it is no longer in use.
198    */
199   static char* decompress(const void* buf, size_t size, size_t* sp, Mode mode = RAW);
200   /**
201    * Calculate the CRC32 checksum of a serial data.
202    * @param buf the input buffer.
203    * @param size the size of the input buffer.
204    * @param seed the cyclic seed value.
205    * @return the CRC32 checksum.
206    */
207   static uint32_t calculate_crc(const void* buf, size_t size, uint32_t seed = 0);
208 };
209 
210 
211 /**
212  * Compressor with ZLIB.
213  */
214 template <ZLIB::Mode MODE>
215 class ZLIBCompressor : public Compressor {
216  private:
217   /**
218    * Compress a serial data.
219    */
220   char* compress(const void* buf, size_t size, size_t* sp) {
221     _assert_(buf && size <= MEMMAXSIZ && sp);
222     return ZLIB::compress(buf, size, sp, MODE);
223   }
224   /**
225    * Decompress a serial data.
226    */
227   char* decompress(const void* buf, size_t size, size_t* sp) {
228     _assert_(buf && size <= MEMMAXSIZ && sp);
229     return ZLIB::decompress(buf, size, sp, MODE);
230   }
231 };
232 
233 
234 /**
235  * Compressor with LZO.
236  */
237 template <LZO::Mode MODE>
238 class LZOCompressor : public Compressor {
239  private:
240   /**
241    * Compress a serial data.
242    */
243   char* compress(const void* buf, size_t size, size_t* sp) {
244     _assert_(buf && size <= MEMMAXSIZ && sp);
245     return LZO::compress(buf, size, sp, MODE);
246   }
247   /**
248    * Decompress a serial data.
249    */
250   char* decompress(const void* buf, size_t size, size_t* sp) {
251     _assert_(buf && size <= MEMMAXSIZ && sp);
252     return LZO::decompress(buf, size, sp, MODE);
253   }
254 };
255 
256 
257 /**
258  * Compressor with LZMA.
259  */
260 template <LZMA::Mode MODE>
261 class LZMACompressor : public Compressor {
262  private:
263   /**
264    * Compress a serial data.
265    */
266   char* compress(const void* buf, size_t size, size_t* sp) {
267     _assert_(buf && size <= MEMMAXSIZ && sp);
268     return LZMA::compress(buf, size, sp, MODE);
269   }
270   /**
271    * Decompress a serial data.
272    */
273   char* decompress(const void* buf, size_t size, size_t* sp) {
274     _assert_(buf && size <= MEMMAXSIZ && sp);
275     return LZMA::decompress(buf, size, sp, MODE);
276   }
277 };
278 
279 
280 /**
281  * Compressor with the Arcfour cipher.
282  */
283 class ArcfourCompressor : public Compressor {
284  public:
285   /**
286    * Constructor.
287    */
288   ArcfourCompressor() : kbuf_(NULL), ksiz_(0), comp_(NULL), salt_(0), cycle_(false) {
289     _assert_(true);
290     kbuf_ = new char[1];
291     ksiz_ = 0;
292   }
293   /**
294    * Destructor.
295    */
296   ~ArcfourCompressor() {
297     _assert_(true);
298     delete[] kbuf_;
299   }
300   /**
301    * Set the cipher key.
302    * @param kbuf the pointer to the region of the cipher key.
303    * @param ksiz the size of the region of the cipher key.
304    */
305   void set_key(const void* kbuf, size_t ksiz) {
306     _assert_(kbuf && ksiz <= MEMMAXSIZ);
307     delete[] kbuf_;
308     if (ksiz > NUMBUFSIZ) ksiz = NUMBUFSIZ;
309     kbuf_ = new char[ksiz];
310     std::memcpy(kbuf_, kbuf, ksiz);
311     ksiz_ = ksiz;
312   }
313   /**
314    * Set an additional data compressor.
315    * @param comp the additional data data compressor.
316    */
317   void set_compressor(Compressor* comp) {
318     _assert_(comp);
319     comp_ = comp;
320   }
321   /**
322    * Begin the cycle of ciper salt.
323    * @param salt the additional cipher salt.
324    */
325   void begin_cycle(uint64_t salt = 0) {
326     salt_ = salt;
327     cycle_ = true;
328   }
329  private:
330   /**
331    * Compress a serial data.
332    */
333   char* compress(const void* buf, size_t size, size_t* sp) {
334     _assert_(buf && size <= MEMMAXSIZ && sp);
335     uint64_t salt = cycle_ ? salt_.add(1) : 0;
336     char kbuf[NUMBUFSIZ*2];
337     writefixnum(kbuf, salt, sizeof(salt));
338     std::memcpy(kbuf + sizeof(salt), kbuf_, ksiz_);
339     char* tbuf = NULL;
340     if (comp_) {
341       tbuf = comp_->compress(buf, size, &size);
342       if (!tbuf) return NULL;
343       buf = tbuf;
344     }
345     size_t zsiz = sizeof(salt) + size;
346     char* zbuf = new char[zsiz];
347     writefixnum(zbuf, salt, sizeof(salt));
348     arccipher(buf, size, kbuf, sizeof(salt) + ksiz_, zbuf + sizeof(salt));
349     delete[] tbuf;
350     if (cycle_) {
351       size_t range = zsiz - sizeof(salt);
352       if (range > (size_t)INT8MAX) range = INT8MAX;
353       salt_.add(hashmurmur(zbuf + sizeof(salt), range) << 32);
354     }
355     *sp = zsiz;
356     return zbuf;
357   }
358   /**
359    * Decompress a serial data.
360    */
361   char* decompress(const void* buf, size_t size, size_t* sp) {
362     _assert_(buf && size <= MEMMAXSIZ && sp);
363     if (size < sizeof(uint64_t)) return NULL;
364     char kbuf[NUMBUFSIZ*2];
365     std::memcpy(kbuf, buf, sizeof(uint64_t));
366     std::memcpy(kbuf + sizeof(uint64_t), kbuf_, ksiz_);
367     buf = (char*)buf + sizeof(uint64_t);
368     size -= sizeof(uint64_t);
369     char* zbuf = new char[size];
370     arccipher(buf, size, kbuf, sizeof(uint64_t) + ksiz_, zbuf);
371     if (comp_) {
372       char* tbuf = comp_->decompress(zbuf, size, &size);
373       delete[] zbuf;
374       if (!tbuf) return NULL;
375       zbuf = tbuf;
376     }
377     *sp = size;
378     return zbuf;
379   }
380   /** The pointer to the key. */
381   char* kbuf_;
382   /** The size of the key. */
383   size_t ksiz_;
384   /** The data compressor. */
385   Compressor* comp_;
386   /** The cipher salt. */
387   AtomicInt64 salt_;
388   /** The flag of the salt cycle */
389   bool cycle_;
390 };
391 
392 
393 /**
394  * Prepared pointer of the compressor with ZLIB raw mode.
395  */
396 extern ZLIBCompressor<ZLIB::RAW>* const ZLIBRAWCOMP;
397 
398 
399 }                                        // common namespace
400 
401 #endif                                   // duplication check
402 
403 // END OF FILE
404