1 /**
2 * @file dd_compressor.cc
3 *
4 * @section LICENSE
5 *
6 * The MIT License
7 *
8 * @copyright Copyright (c) 2017-2021 TileDB, Inc.
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 *
28 * @section DESCRIPTION
29 *
30 * This file implements the double delta compressor class.
31 */
32
33 #include "tiledb/sm/compressors/dd_compressor.h"
34 #include "tiledb/common/logger.h"
35 #include "tiledb/sm/buffer/buffer.h"
36 #include "tiledb/sm/enums/datatype.h"
37
38 /* ****************************** */
39 /* MACROS */
40 /* ****************************** */
41
42 #define ABS(x) ((x) < 0) ? uint64_t(-(x)) : uint64_t(x)
43
44 using namespace tiledb::common;
45
46 namespace tiledb {
47 namespace sm {
48
49 const uint64_t DoubleDelta::OVERHEAD = 17;
50
51 /* ****************************** */
52 /* API */
53 /* ****************************** */
54
compress(Datatype type,ConstBuffer * input_buffer,Buffer * output_buffer)55 Status DoubleDelta::compress(
56 Datatype type, ConstBuffer* input_buffer, Buffer* output_buffer) {
57 switch (type) {
58 case Datatype::INT8:
59 return DoubleDelta::compress<int8_t>(input_buffer, output_buffer);
60 case Datatype::UINT8:
61 return DoubleDelta::compress<uint8_t>(input_buffer, output_buffer);
62 case Datatype::INT16:
63 return DoubleDelta::compress<int16_t>(input_buffer, output_buffer);
64 case Datatype::UINT16:
65 return DoubleDelta::compress<uint16_t>(input_buffer, output_buffer);
66 case Datatype::INT32:
67 return DoubleDelta::compress<int>(input_buffer, output_buffer);
68 case Datatype::UINT32:
69 return DoubleDelta::compress<uint32_t>(input_buffer, output_buffer);
70 case Datatype::INT64:
71 return DoubleDelta::compress<int64_t>(input_buffer, output_buffer);
72 case Datatype::UINT64:
73 return DoubleDelta::compress<uint64_t>(input_buffer, output_buffer);
74 case Datatype::CHAR:
75 return DoubleDelta::compress<char>(input_buffer, output_buffer);
76 case Datatype::DATETIME_YEAR:
77 case Datatype::DATETIME_MONTH:
78 case Datatype::DATETIME_WEEK:
79 case Datatype::DATETIME_DAY:
80 case Datatype::DATETIME_HR:
81 case Datatype::DATETIME_MIN:
82 case Datatype::DATETIME_SEC:
83 case Datatype::DATETIME_MS:
84 case Datatype::DATETIME_US:
85 case Datatype::DATETIME_NS:
86 case Datatype::DATETIME_PS:
87 case Datatype::DATETIME_FS:
88 case Datatype::DATETIME_AS:
89 case Datatype::TIME_HR:
90 case Datatype::TIME_MIN:
91 case Datatype::TIME_SEC:
92 case Datatype::TIME_MS:
93 case Datatype::TIME_US:
94 case Datatype::TIME_NS:
95 case Datatype::TIME_PS:
96 case Datatype::TIME_FS:
97 case Datatype::TIME_AS:
98 return DoubleDelta::compress<int64_t>(input_buffer, output_buffer);
99 case Datatype::STRING_ASCII:
100 case Datatype::STRING_UTF8:
101 case Datatype::STRING_UTF16:
102 case Datatype::STRING_UTF32:
103 case Datatype::STRING_UCS2:
104 case Datatype::STRING_UCS4:
105 case Datatype::ANY:
106 return DoubleDelta::compress<uint8_t>(input_buffer, output_buffer);
107 case Datatype::FLOAT32:
108 case Datatype::FLOAT64:
109 return LOG_STATUS(Status::CompressionError(
110 "Cannot compress tile with DoubleDelta; Float "
111 "datatypes are not supported"));
112 }
113
114 assert(false);
115 return LOG_STATUS(Status::CompressionError(
116 "Cannot compress tile with DoubleDelta; Not supported datatype"));
117 }
118
decompress(Datatype type,ConstBuffer * input_buffer,PreallocatedBuffer * output_buffer)119 Status DoubleDelta::decompress(
120 Datatype type,
121 ConstBuffer* input_buffer,
122 PreallocatedBuffer* output_buffer) {
123 switch (type) {
124 case Datatype::INT8:
125 return DoubleDelta::decompress<int8_t>(input_buffer, output_buffer);
126 case Datatype::UINT8:
127 return DoubleDelta::decompress<uint8_t>(input_buffer, output_buffer);
128 case Datatype::INT16:
129 return DoubleDelta::decompress<int16_t>(input_buffer, output_buffer);
130 case Datatype::UINT16:
131 return DoubleDelta::decompress<uint16_t>(input_buffer, output_buffer);
132 case Datatype::INT32:
133 return DoubleDelta::decompress<int>(input_buffer, output_buffer);
134 case Datatype::UINT32:
135 return DoubleDelta::decompress<uint32_t>(input_buffer, output_buffer);
136 case Datatype::INT64:
137 return DoubleDelta::decompress<int64_t>(input_buffer, output_buffer);
138 case Datatype::UINT64:
139 return DoubleDelta::decompress<uint64_t>(input_buffer, output_buffer);
140 case Datatype::CHAR:
141 return DoubleDelta::decompress<char>(input_buffer, output_buffer);
142 case Datatype::DATETIME_YEAR:
143 case Datatype::DATETIME_MONTH:
144 case Datatype::DATETIME_WEEK:
145 case Datatype::DATETIME_DAY:
146 case Datatype::DATETIME_HR:
147 case Datatype::DATETIME_MIN:
148 case Datatype::DATETIME_SEC:
149 case Datatype::DATETIME_MS:
150 case Datatype::DATETIME_US:
151 case Datatype::DATETIME_NS:
152 case Datatype::DATETIME_PS:
153 case Datatype::DATETIME_FS:
154 case Datatype::DATETIME_AS:
155 case Datatype::TIME_HR:
156 case Datatype::TIME_MIN:
157 case Datatype::TIME_SEC:
158 case Datatype::TIME_MS:
159 case Datatype::TIME_US:
160 case Datatype::TIME_NS:
161 case Datatype::TIME_PS:
162 case Datatype::TIME_FS:
163 case Datatype::TIME_AS:
164 return DoubleDelta::decompress<int64_t>(input_buffer, output_buffer);
165 case Datatype::STRING_ASCII:
166 case Datatype::STRING_UTF8:
167 case Datatype::STRING_UTF16:
168 case Datatype::STRING_UTF32:
169 case Datatype::STRING_UCS2:
170 case Datatype::STRING_UCS4:
171 case Datatype::ANY:
172 return DoubleDelta::decompress<uint8_t>(input_buffer, output_buffer);
173 case Datatype::FLOAT32:
174 case Datatype::FLOAT64:
175 return LOG_STATUS(Status::CompressionError(
176 "Cannot decompress tile with DoubleDelta; Float "
177 "datatypes are not supported"));
178 }
179
180 assert(false);
181 return LOG_STATUS(Status::CompressionError(
182 "Cannot decompress tile with DoubleDelta; Not supported datatype"));
183 }
184
overhead(uint64_t nbytes)185 uint64_t DoubleDelta::overhead(uint64_t nbytes) {
186 // DoubleDelta has a fixed size overhead
187 (void)nbytes;
188 return DoubleDelta::OVERHEAD;
189 }
190
191 /* ****************************** */
192 /* PRIVATE METHODS */
193 /* ****************************** */
194
195 template <class T>
compress(ConstBuffer * input_buffer,Buffer * output_buffer)196 Status DoubleDelta::compress(ConstBuffer* input_buffer, Buffer* output_buffer) {
197 // Calculate number of values and handle trivial case
198 uint64_t value_size = sizeof(T);
199 uint64_t num = input_buffer->size() / value_size;
200 assert(num > 0 && (input_buffer->size() % value_size == 0));
201
202 // Calculate bitsize (ignoring the sign bit)
203 auto in = (T*)input_buffer->data();
204 unsigned int bitsize;
205 RETURN_NOT_OK(compute_bitsize(in, num, &bitsize));
206 assert(bitsize <= std::numeric_limits<uint8_t>::max());
207 auto bitsize_c = static_cast<uint8_t>(bitsize);
208
209 // Write bitsize and number of values
210 RETURN_NOT_OK(output_buffer->write(&bitsize_c, sizeof(uint8_t)));
211 RETURN_NOT_OK(output_buffer->write(&num, sizeof(uint64_t)));
212
213 // Trivial case - no compression
214 if (bitsize >= sizeof(T) * 8 - 1) {
215 RETURN_NOT_OK(output_buffer->write(in, input_buffer->size()));
216 return Status::Ok();
217 }
218
219 // Write first value
220 RETURN_NOT_OK(output_buffer->write(&in[0], value_size));
221 if (num == 1)
222 return Status::Ok();
223
224 // Write second value
225 RETURN_NOT_OK(output_buffer->write(&in[1], value_size));
226 if (num == 2)
227 return Status::Ok();
228
229 // Write double deltas
230 int64_t prev_delta = int64_t(in[1]) - int64_t(in[0]);
231 int bit_in_chunk = 63; // Leftmost bit (MSB)
232 uint64_t chunk = 0;
233 for (uint64_t i = 2; i < num; ++i) {
234 int64_t cur_delta = int64_t(in[i]) - int64_t(in[i - 1]);
235 int64_t dd = cur_delta - prev_delta;
236 RETURN_NOT_OK(
237 write_double_delta(output_buffer, dd, bitsize, &chunk, &bit_in_chunk));
238 prev_delta = cur_delta;
239 }
240
241 // Write whatever is left in the chunk
242 if (bit_in_chunk < 63)
243 RETURN_NOT_OK(output_buffer->write(&chunk, sizeof(uint64_t)));
244
245 return Status::Ok();
246 }
247
248 template <class T>
compute_bitsize(T * in,uint64_t num,unsigned int * bitsize)249 Status DoubleDelta::compute_bitsize(
250 T* in, uint64_t num, unsigned int* bitsize) {
251 // Find maximum absolute double delta
252 *bitsize = 0;
253 int64_t max = 0;
254 if (num <= 2) {
255 *bitsize = 0;
256 return Status::Ok();
257 }
258 int64_t prev_delta = int64_t(in[1]) - int64_t(in[0]);
259 char delta_out_of_bounds = 0;
260 for (uint64_t i = 2; i < num; ++i) {
261 int64_t cur_delta = int64_t(in[i]) - int64_t(in[i - 1]);
262 int64_t dd = cur_delta - prev_delta;
263 delta_out_of_bounds |= (char)(cur_delta < 0 && prev_delta > 0 && dd > 0);
264 delta_out_of_bounds |= (char)(cur_delta > 0 && prev_delta < 0 && dd < 0);
265 max = std::max(std::abs(dd), max);
266 prev_delta = cur_delta;
267 }
268 // Handle error
269 if (delta_out_of_bounds) {
270 return LOG_STATUS(
271 Status::CompressionError("Cannot compress with DoubleDelta; Some "
272 "negative double delta is out of bounds"));
273 }
274 // Calculate bitsize of the maximum absolute double delta
275 do {
276 ++(*bitsize);
277 max >>= 1;
278 } while (max);
279 return Status::Ok();
280 }
281
282 template <class T>
decompress(ConstBuffer * input_buffer,PreallocatedBuffer * output_buffer)283 Status DoubleDelta::decompress(
284 ConstBuffer* input_buffer, PreallocatedBuffer* output_buffer) {
285 // Read bitsize and number of values
286 uint8_t bitsize_c = 0;
287 uint64_t num = 0;
288 uint64_t value_size = sizeof(T);
289 RETURN_NOT_OK(input_buffer->read(&bitsize_c, sizeof(uint8_t)));
290 RETURN_NOT_OK(input_buffer->read(&num, sizeof(uint64_t)));
291 auto bitsize = static_cast<unsigned int>(bitsize_c);
292 auto out = (T*)output_buffer->cur_data();
293
294 // Trivial case - no compression
295 if (bitsize >= sizeof(T) * 8 - 1) {
296 RETURN_NOT_OK(output_buffer->write(
297 input_buffer->cur_data(), input_buffer->nbytes_left_to_read()));
298 return Status::Ok();
299 }
300
301 // Read first value
302 T value;
303 RETURN_NOT_OK(input_buffer->read(&value, value_size));
304 RETURN_NOT_OK(output_buffer->write(&value, value_size));
305 if (num == 1)
306 return Status::Ok();
307
308 // Read second value
309 RETURN_NOT_OK(input_buffer->read(&value, value_size));
310 RETURN_NOT_OK(output_buffer->write(&value, value_size));
311 if (num == 2)
312 return Status::Ok();
313
314 // Read first chunk
315 uint64_t chunk;
316 RETURN_NOT_OK(input_buffer->read(&chunk, sizeof(uint64_t)));
317 int bit_in_chunk = 63; // Leftmost bit (MSB)
318
319 // Decompress rest of the values
320 int64_t dd;
321 for (uint64_t i = 2; i < num; ++i) {
322 RETURN_NOT_OK(
323 read_double_delta(input_buffer, &dd, bitsize, &chunk, &bit_in_chunk));
324 value = (T)(dd + 2 * (int64_t)out[i - 1] - (int64_t)out[i - 2]);
325 RETURN_NOT_OK(output_buffer->write(&value, value_size));
326 }
327
328 return Status::Ok();
329 }
330
read_double_delta(ConstBuffer * buff,int64_t * double_delta,int bitsize,uint64_t * chunk,int * bit_in_chunk)331 Status DoubleDelta::read_double_delta(
332 ConstBuffer* buff,
333 int64_t* double_delta,
334 int bitsize,
335 uint64_t* chunk,
336 int* bit_in_chunk) {
337 // Read sign
338 uint64_t to_and = 1;
339 to_and <<= (*bit_in_chunk);
340 int sign = (((*chunk) & to_and) == 0) ? 1 : -1;
341 --(*bit_in_chunk);
342
343 // Read chunk and reset
344 if (*bit_in_chunk < 0) {
345 RETURN_NOT_OK(buff->read(chunk, sizeof(uint64_t)));
346 *bit_in_chunk = 63;
347 }
348
349 // Read double delta
350 int bits_left_to_read = bitsize;
351 int bits_to_read_from_chunk = std::min(*bit_in_chunk + 1, bits_left_to_read);
352 uint64_t tmp_chunk;
353 int bit_in_dd = bitsize - 1;
354 *double_delta = 0;
355 while (bits_left_to_read > 0) {
356 // Read from chunk
357 if (bits_to_read_from_chunk > 0) {
358 tmp_chunk = (((*chunk) << (63 - *bit_in_chunk)) >> (63 - bit_in_dd));
359 (*double_delta) |= tmp_chunk;
360 bit_in_dd -= bits_to_read_from_chunk;
361 (*bit_in_chunk) -= bits_to_read_from_chunk;
362 bits_left_to_read -= bits_to_read_from_chunk;
363 }
364
365 // Read chunk and reset
366 if (*bit_in_chunk < 0 && buff->offset() != buff->size()) {
367 RETURN_NOT_OK(buff->read(chunk, sizeof(uint64_t)));
368 *bit_in_chunk = 63;
369 bits_to_read_from_chunk = std::min(*bit_in_chunk + 1, bits_left_to_read);
370 }
371 }
372
373 // Set sign
374 (*double_delta) *= sign;
375
376 return Status::Ok();
377 }
378
write_double_delta(Buffer * buff,int64_t double_delta,int bitsize,uint64_t * chunk,int * bit_in_chunk)379 Status DoubleDelta::write_double_delta(
380 Buffer* buff,
381 int64_t double_delta,
382 int bitsize,
383 uint64_t* chunk,
384 int* bit_in_chunk) {
385 // Write sign
386 uint64_t to_or = (double_delta < 0) ? 1 : 0;
387 to_or <<= (*bit_in_chunk);
388 (*chunk) |= to_or;
389 --(*bit_in_chunk);
390
391 // Write chunk and reset
392 if (*bit_in_chunk < 0) {
393 RETURN_NOT_OK(buff->write(chunk, sizeof(uint64_t)));
394 *bit_in_chunk = 63;
395 *chunk = 0;
396 }
397
398 // Write rest of bits
399 int bits_left_to_write = bitsize;
400 int bit_in_dd = bitsize - 1;
401 int bits_to_fill_in_chunk = std::min((*bit_in_chunk) + 1, bits_left_to_write);
402 uint64_t abs_dd = ABS(double_delta);
403 uint64_t tmp_abs_dd;
404
405 while (bits_left_to_write > 0) {
406 if (bits_to_fill_in_chunk > 0) {
407 tmp_abs_dd = (abs_dd << (63 - bit_in_dd));
408 tmp_abs_dd >>= (63 - (*bit_in_chunk));
409 (*chunk) |= tmp_abs_dd;
410 (*bit_in_chunk) -= bits_to_fill_in_chunk;
411 bit_in_dd -= bits_to_fill_in_chunk;
412 bits_left_to_write -= bits_to_fill_in_chunk;
413 }
414
415 // Write chunk and reset
416 if (*bit_in_chunk < 0) {
417 RETURN_NOT_OK(buff->write(chunk, sizeof(uint64_t)));
418 *bit_in_chunk = 63;
419 *chunk = 0;
420 bits_to_fill_in_chunk = std::min((*bit_in_chunk) + 1, bits_left_to_write);
421 }
422 }
423
424 return Status::Ok();
425 }
426
427 // Explicit template instantiations
428
429 template Status DoubleDelta::compress<char>(
430 ConstBuffer* input_buffer, Buffer* output_buffer);
431 template Status DoubleDelta::compress<int8_t>(
432 ConstBuffer* input_buffer, Buffer* output_buffer);
433 template Status DoubleDelta::compress<uint8_t>(
434 ConstBuffer* input_buffer, Buffer* output_buffer);
435 template Status DoubleDelta::compress<int16_t>(
436 ConstBuffer* input_buffer, Buffer* output_buffer);
437 template Status DoubleDelta::compress<uint16_t>(
438 ConstBuffer* input_buffer, Buffer* output_buffer);
439 template Status DoubleDelta::compress<int>(
440 ConstBuffer* input_buffer, Buffer* output_buffer);
441 template Status DoubleDelta::compress<uint32_t>(
442 ConstBuffer* input_buffer, Buffer* output_buffer);
443 template Status DoubleDelta::compress<int64_t>(
444 ConstBuffer* input_buffer, Buffer* output_buffer);
445 template Status DoubleDelta::compress<uint64_t>(
446 ConstBuffer* input_buffer, Buffer* output_buffer);
447
448 template Status DoubleDelta::decompress<char>(
449 ConstBuffer* input_buffer, PreallocatedBuffer* output_buffer);
450 template Status DoubleDelta::decompress<int8_t>(
451 ConstBuffer* input_buffer, PreallocatedBuffer* output_buffer);
452 template Status DoubleDelta::decompress<uint8_t>(
453 ConstBuffer* input_buffer, PreallocatedBuffer* output_buffer);
454 template Status DoubleDelta::decompress<int16_t>(
455 ConstBuffer* input_buffer, PreallocatedBuffer* output_buffer);
456 template Status DoubleDelta::decompress<uint16_t>(
457 ConstBuffer* input_buffer, PreallocatedBuffer* output_buffer);
458 template Status DoubleDelta::decompress<int>(
459 ConstBuffer* input_buffer, PreallocatedBuffer* output_buffer);
460 template Status DoubleDelta::decompress<uint32_t>(
461 ConstBuffer* input_buffer, PreallocatedBuffer* output_buffer);
462 template Status DoubleDelta::decompress<int64_t>(
463 ConstBuffer* input_buffer, PreallocatedBuffer* output_buffer);
464 template Status DoubleDelta::decompress<uint64_t>(
465 ConstBuffer* input_buffer, PreallocatedBuffer* output_buffer);
466
467 }; // namespace sm
468 } // namespace tiledb
469