1 /**
2  * @file   compression_filter.h
3  *
4  * @section LICENSE
5  *
6  * The MIT License
7  *
8  * @copyright Copyright (c) 2017-2021 TileDB, Inc.
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  *
28  * @section DESCRIPTION
29  *
30  * This file declares class CompressionFilter.
31  */
32 
33 #ifndef TILEDB_COMPRESSION_FILTER_H
34 #define TILEDB_COMPRESSION_FILTER_H
35 
36 #include "tiledb/common/status.h"
37 #include "tiledb/sm/filter/filter.h"
38 
39 using namespace tiledb::common;
40 
41 namespace tiledb {
42 namespace sm {
43 
44 enum class Compressor : uint8_t;
45 
46 /**
47  * A filter that compresses/decompresses its input data. The FilterBuffer input
48  * to a filter may contain multiple buffers. Each input buffer is termed a
49  * "part", and is compressed separately by this filter. Input metadata is
50  * compressed as well.
51  *
52  * The forward (compress) output metadata has the format:
53  *   uint32_t - Number of compressed metadata parts
54  *   uint32_t - Number of compressed data parts
55  *   metadata_part0
56  *   ...
57  *   metadata_partN
58  *   data_part0
59  *   ...
60  *   data_partN
61  * Where each metadata_part/data_part has the format:
62  *   uint32_t - part uncompressed length
63  *   uint32_t - part compressed length
64  *
65  * The forward output data format is just the concatenated compressed bytes:
66  *   uint8_t[] - metadata_part0's array of compressed bytes
67  *   ...
68  *   uint8_t[] - metadata_partN's array of compressed bytes
69  *   uint8_t[] - data_part0's array of compressed bytes
70  *   ...
71  *   uint8_t[] - data_partN's array of compressed bytes
72  *
73  * The reverse (decompress) output format is simply:
74  *   uint8_t[] - Array of uncompressed bytes
75  */
76 class CompressionFilter : public Filter {
77  public:
78   /**
79    * Constructor.
80    *
81    * @param compressor Compressor to use
82    * @param level Compression level to use
83    */
84   CompressionFilter(Compressor compressor, int level);
85 
86   /**
87    * Constructor.
88    *
89    * @param compressor Compressor to use
90    * @param level Compression level to use
91    */
92   CompressionFilter(FilterType compressor, int level);
93 
94   /** Return the compressor used by this filter instance. */
95   Compressor compressor() const;
96 
97   /** Return the compression level used by this filter instance. */
98   int compression_level() const;
99 
100   /** Dumps the filter details in ASCII format in the selected output. */
101   void dump(FILE* out) const override;
102 
103   /**
104    * Compress the given input into the given output.
105    */
106   Status run_forward(
107       FilterBuffer* input_metadata,
108       FilterBuffer* input,
109       FilterBuffer* output_metadata,
110       FilterBuffer* output) const override;
111 
112   /**
113    * Decompress the given input into the given output.
114    */
115   Status run_reverse(
116       FilterBuffer* input_metadata,
117       FilterBuffer* input,
118       FilterBuffer* output_metadata,
119       FilterBuffer* output,
120       const Config& config) const override;
121 
122   /** Set the compressor used by this filter instance. */
123   void set_compressor(Compressor compressor);
124 
125   /** Set the compression level used by this filter instance. */
126   void set_compression_level(int compressor_level);
127 
128  private:
129   /** The compressor. */
130   Compressor compressor_;
131 
132   /** The compression level. */
133   int level_;
134 
135   /** Returns a new clone of this filter. */
136   CompressionFilter* clone_impl() const override;
137 
138   /** Helper function to compress a single contiguous buffer (part). */
139   Status compress_part(
140       ConstBuffer* part, Buffer* output, FilterBuffer* output_metadata) const;
141 
142   /** Return the FilterType corresponding to the given Compressor. */
143   static FilterType compressor_to_filter(Compressor compressor);
144 
145   /**
146    * Helper function to decompress a single contiguous buffer (part), appending
147    * onto the single output buffer.
148    */
149   Status decompress_part(
150       FilterBuffer* input, Buffer* output, FilterBuffer* input_metadata) const;
151 
152   /** Deserializes this filter's metadata from the given buffer. */
153   Status deserialize_impl(ConstBuffer* buff) override;
154 
155   /** Gets an option from this filter. */
156   Status get_option_impl(FilterOption option, void* value) const override;
157 
158   /** Return the Compressor corresponding to the given FilterType. */
159   static Compressor filter_to_compressor(FilterType type);
160 
161   /** Computes the compression overhead on nbytes of the input data. */
162   uint64_t overhead(uint64_t nbytes) const;
163 
164   /** Sets an option on this filter. */
165   Status set_option_impl(FilterOption option, const void* value) override;
166 
167   /** Serializes this filter's metadata to the given buffer. */
168   Status serialize_impl(Buffer* buff) const override;
169 };
170 
171 }  // namespace sm
172 }  // namespace tiledb
173 
174 #endif  // TILEDB_COMPRESSION_FILTER_H
175