1 /*
2 * Copyright (C) 2018-present Francesc Alted, Aleix Alcacer.
3 * Copyright (C) 2019-present Blosc Development team <blosc@blosc.org>
4 * All rights reserved.
5 *
6 * This source code is licensed under both the BSD-style license (found in the
7 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8 * in the COPYING file in the root directory of this source tree).
9 * You may select, at your option, one of the above-listed licenses.
10 */
11
12 /** @file caterva.h
13 * @brief Caterva header file.
14 *
15 * This file contains Caterva public API and the structures needed to use it.
16 * @author Blosc Development team <blosc@blosc.org>
17 */
18
19 #ifndef CATERVA_CATERVA_H_
20 #define CATERVA_CATERVA_H_
21
22 #include <blosc2.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25
26 /* Version numbers */
27 #define CATERVA_VERSION_MAJOR 0 /* for major interface/format changes */
28 #define CATERVA_VERSION_MINOR 5 /* for minor interface/format changes */
29 #define CATERVA_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */
30
31 #define CATERVA_VERSION_STRING "0.5.1.dev0" /* string version. Sync with above! */
32 #define CATERVA_VERSION_DATE "2021-07-13" /* date version */
33
34 /* Error handling */
35 #define CATERVA_SUCCEED 0
36 #define CATERVA_ERR_INVALID_ARGUMENT 1
37 #define CATERVA_ERR_BLOSC_FAILED 2
38 #define CATERVA_ERR_CONTAINER_FILLED 3
39 #define CATERVA_ERR_INVALID_STORAGE 4
40 #define CATERVA_ERR_NULL_POINTER 5
41 #define CATERVA_ERR_INVALID_INDEX 5
42
43
44 /* Tracing macros */
45 #define CATERVA_TRACE_ERROR(fmt, ...) CATERVA_TRACE(error, fmt, ##__VA_ARGS__)
46 #define CATERVA_TRACE_WARNING(fmt, ...) CATERVA_TRACE(warning, fmt, ##__VA_ARGS__)
47
48 #define CATERVA_TRACE(cat, msg, ...) \
49 do { \
50 const char *__e = getenv("CATERVA_TRACE"); \
51 if (!__e) { break; } \
52 fprintf(stderr, "[%s] - %s:%d\n " msg "\n", #cat, __FILE__, __LINE__, ##__VA_ARGS__); \
53 } while(0)
54
55 #define CATERVA_ERROR(rc) \
56 do { \
57 int rc_ = rc; \
58 if (rc_ != CATERVA_SUCCEED) { \
59 char *error_msg = print_error(rc_); \
60 CATERVA_TRACE_ERROR("%s", error_msg); \
61 return rc_; \
62 } \
63 } while (0)
64
65 #define CATERVA_ERROR_NULL(pointer) \
66 do { \
67 char *error_msg = print_error(CATERVA_ERR_NULL_POINTER); \
68 if ((pointer) == NULL) { \
69 CATERVA_TRACE_ERROR("%s", error_msg); \
70 return CATERVA_ERR_NULL_POINTER; \
71 } \
72 } while (0)
73
74 #define CATERVA_UNUSED_PARAM(x) ((void) (x))
75 #ifdef __GNUC__
76 #define CATERVA_ATTRIBUTE_UNUSED __attribute__((unused))
77 #else
78 #define CATERVA_ATTRIBUTE_UNUSED
79 #endif
80
81 static char *print_error(int rc) CATERVA_ATTRIBUTE_UNUSED;
print_error(int rc)82 static char *print_error(int rc) {
83 switch (rc) {
84 case CATERVA_ERR_INVALID_STORAGE:
85 return "Invalid storage";
86 case CATERVA_ERR_NULL_POINTER:
87 return "Pointer is null";
88 case CATERVA_ERR_BLOSC_FAILED:
89 return "Blosc failed";
90 case CATERVA_ERR_INVALID_ARGUMENT:
91 return "Invalid argument";
92 default:
93 return "Unknown error";
94 }
95 }
96
97 /* The version for metalayer format; starts from 0 and it must not exceed 127 */
98 #define CATERVA_METALAYER_VERSION 0
99
100 /* The maximum number of dimensions for caterva arrays */
101 #define CATERVA_MAX_DIM 8
102
103 /* The maximum number of metalayers for caterva arrays */
104 #define CATERVA_MAX_METALAYERS (BLOSC2_MAX_METALAYERS - 1)
105
106 /**
107 * @brief Configuration parameters used to create a caterva context.
108 */
109 typedef struct {
110 void *(*alloc)(size_t);
111 //!< The memory allocation function used internally.
112 void (*free)(void *);
113 //!< The memory release function used internally.
114 uint8_t compcodec;
115 //!< Defines the codec used in compression.
116 uint8_t compmeta;
117 //!< The metadata for the compressor codec.
118 uint8_t complevel;
119 //!< Determines the compression level used in Blosc.
120 int32_t splitmode;
121 //!< Whether the blocks should be split or not.
122 int usedict;
123 //!< Indicates whether a dictionary is used to compress data or not.
124 int16_t nthreads;
125 //!< Determines the maximum number of threads that can be used.
126 uint8_t filters[BLOSC2_MAX_FILTERS];
127 //!< Defines the filters used in compression.
128 uint8_t filtersmeta[BLOSC2_MAX_FILTERS];
129 //!< Indicates the meta filters used in Blosc.
130 blosc2_prefilter_fn prefilter;
131 //!< Defines the function that is applied to the data before compressing it.
132 blosc2_prefilter_params *pparams;
133 //!< Indicates the parameters of the prefilter function.
134 blosc2_btune *udbtune;
135 //!< Indicates user-defined parameters for btune.
136 } caterva_config_t;
137
138 /**
139 * @brief The default configuration parameters used in caterva.
140 */
141 static const caterva_config_t CATERVA_CONFIG_DEFAULTS = {.alloc = malloc,
142 .free = free,
143 .compcodec = BLOSC_ZSTD,
144 .compmeta = 0,
145 .complevel = 5,
146 .splitmode = BLOSC_AUTO_SPLIT,
147 .usedict = 0,
148 .nthreads = 1,
149 .filters = {0, 0, 0, 0, 0, BLOSC_SHUFFLE},
150 .filtersmeta = {0, 0, 0, 0, 0, 0},
151 .prefilter = NULL,
152 .pparams = NULL,
153 .udbtune = NULL,
154 };
155
156 /**
157 * @brief Context for caterva arrays that specifies the functions used to manage memory and
158 * the compression/decompression parameters used in Blosc.
159 */
160 typedef struct {
161 caterva_config_t *cfg;
162 //!< The configuration paramters.
163 } caterva_ctx_t;
164
165
166 /**
167 * @brief The metalayer data needed to store it on an array
168 */
169 typedef struct {
170 char *name;
171 //!< The name of the metalayer
172 uint8_t *sdata;
173 //!< The serialized data to store
174 int32_t size;
175 //!< The size of the serialized data
176 } caterva_metalayer_t;
177
178 /**
179 * @brief The storage properties for an array backed by a Blosc super-chunk.
180 */
181 typedef struct {
182 int32_t chunkshape[CATERVA_MAX_DIM];
183 //!< The shape of each chunk of Blosc.
184 int32_t blockshape[CATERVA_MAX_DIM];
185 //!< The shape of each block of Blosc.
186 bool sequencial;
187 //!< Flag to indicate if the super-chunk is stored sequentially or sparsely.
188 char *urlpath;
189 //!< The super-chunk name. If @p urlpath is not @p NULL, the super-chunk will be stored on
190 //!< disk.
191 caterva_metalayer_t metalayers[CATERVA_MAX_METALAYERS];
192 //!< List with the metalayers desired.
193 int32_t nmetalayers;
194 //!< The number of metalayers.
195 } caterva_storage_t;
196
197 /**
198 * @brief General parameters needed for the creation of a caterva array.
199 */
200 typedef struct {
201 uint8_t itemsize;
202 //!< The size of each item of the array.
203 int64_t shape[CATERVA_MAX_DIM];
204 //!< The array shape.
205 uint8_t ndim;
206 //!< The array dimensions.
207 } caterva_params_t;
208
209 /**
210 * @brief An *optional* cache for a single block.
211 *
212 * When a chunk is needed, it is copied into this cache. In this way, if the same chunk is needed
213 * again afterwards, it is not necessary to recover it because it is already in the cache.
214 */
215 struct chunk_cache_s {
216 uint8_t *data;
217 //!< The chunk data.
218 int32_t nchunk;
219 //!< The chunk number in cache. If @p nchunk equals to -1, it means that the cache is empty.
220 };
221
222 /**
223 * @brief A multidimensional array of data that can be compressed.
224 */
225 typedef struct {
226 caterva_config_t *cfg;
227 //!< Array configuration.
228 blosc2_schunk *sc;
229 //!< Pointer to a Blosc super-chunk
230 int64_t shape[CATERVA_MAX_DIM];
231 //!< Shape of original data.
232 int32_t chunkshape[CATERVA_MAX_DIM];
233 //!< Shape of each chunk.
234 int64_t extshape[CATERVA_MAX_DIM];
235 //!< Shape of padded data.
236 int32_t blockshape[CATERVA_MAX_DIM];
237 //!< Shape of each block.
238 int64_t extchunkshape[CATERVA_MAX_DIM];
239 //!< Shape of padded chunk.
240 int64_t nitems;
241 //!< Number of items in original data.
242 int32_t chunknitems;
243 //!< Number of items in each chunk.
244 int64_t extnitems;
245 //!< Number of items in padded data.
246 int32_t blocknitems;
247 //!< Number of items in each block.
248 int64_t extchunknitems;
249 //!< Number of items in a padded chunk.
250 uint8_t ndim;
251 //!< Data dimensions.
252 uint8_t itemsize;
253 //!< Size of each item.
254 int64_t nchunks;
255 //!< Number of chunks in the array.
256 struct chunk_cache_s chunk_cache;
257 //!< A partition cache.
258 } caterva_array_t;
259
260 /**
261 * @brief Create a context for caterva.
262 *
263 * @param cfg The configuration parameters needed for the context creation.
264 * @param ctx The memory pointer where the context will be created.
265 *
266 * @return An error code.
267 */
268 int caterva_ctx_new(caterva_config_t *cfg, caterva_ctx_t **ctx);
269
270 /**
271 * @brief Free a context.
272 *
273 * @param ctx The The context to be freed.
274 *
275 * @return An error code.
276 */
277 int caterva_ctx_free(caterva_ctx_t **ctx);
278
279 /**
280 * @brief Create an empty array.
281 *
282 * @param ctx The caterva context to be used.
283 * @param params The general params of the array desired.
284 * @param storage The storage params of the array desired.
285 * @param array The memory pointer where the array will be created.
286 *
287 * @return An error code.
288 */
289 int caterva_empty(caterva_ctx_t *ctx, caterva_params_t *params,
290 caterva_storage_t *storage, caterva_array_t **array);
291
292
293 /**
294 * Create an array, with zero being used as the default value for
295 * uninitialized portions of the array.
296 *
297 * @param ctx The caterva context to be used.
298 * @param params The general params of the array.
299 * @param storage The storage params of the array.
300 * @param array The memory pointer where the array will be created.
301 *
302 * @return An error code.
303 */
304 int caterva_zeros(caterva_ctx_t *ctx, caterva_params_t *params,
305 caterva_storage_t *storage, caterva_array_t **array);
306
307
308 /**
309 * Create an array, with @p fill_value being used as the default value for
310 * uninitialized portions of the array.
311 *
312 * @param ctx The caterva context to be used.
313 * @param params The general params of the array.
314 * @param storage The storage params of the array.
315 * @param fill_value Default value for uninitialized portions of the array.
316 * @param array The memory pointer where the array will be created.
317 *
318 * @return An error code.
319 */
320 int caterva_full(caterva_ctx_t *ctx, caterva_params_t *params,
321 caterva_storage_t *storage, void *fill_value, caterva_array_t **array);
322 /**
323 * @brief Free an array.
324 *
325 * @param ctx The caterva context to be used.
326 * @param array The memory pointer where the array is placed.
327 *
328 * @return An error code.
329 */
330 int caterva_free(caterva_ctx_t *ctx, caterva_array_t **array);
331
332 /**
333 * Append a chunk to a caterva array (until it is completely filled).
334 *
335 * @param ctx The caterva context to be used.
336 * @param array The caterva array.
337 * @param chunk The buffer where the chunk data is stored.
338 * @param chunksize Size (in bytes) of the buffer.
339 *
340 * @return An error code.
341 */
342 int caterva_append(caterva_ctx_t *ctx, caterva_array_t *array, void *chunk,
343 int64_t chunksize);
344
345 /**
346 * @brief Create a caterva array from a super-chunk. It can only be used if the array
347 * is backed by a blosc super-chunk.
348 *
349 * @param ctx The caterva context to be used.
350 * @param schunk The blosc super-chunk where the caterva array is stored.
351 * @param array The memory pointer where the array will be created.
352 *
353 * @return An error code.
354 */
355 int
356 caterva_from_schunk(caterva_ctx_t *ctx, blosc2_schunk *schunk, caterva_array_t **array);
357
358 /**
359 * @brief Create a caterva array from a serialized super-chunk. It can only be used if the array
360 * is backed by a blosc super-chunk.
361 *
362 * @param ctx The caterva context to be used.
363 * @param serial_schunk The serialized super-chunk where the caterva array is stored.
364 * @param len The size (in bytes) of the serialized super-chunk.
365 * @param array The memory pointer where the array will be created.
366 *
367 * @return An error code.
368 */
369 int caterva_from_serial_schunk(caterva_ctx_t *ctx, uint8_t *serial_schunk, int64_t len,
370 caterva_array_t **array);
371
372 /**
373 * @brief Read a caterva array from disk.
374 *
375 * @param ctx The caterva context to be used.
376 * @param urlpath The urlpath of the caterva array on disk.
377 * @param array The memory pointer where the array will be created.
378 *
379 * @return An error code.
380 */
381 int caterva_open(caterva_ctx_t *ctx, const char *urlpath, caterva_array_t **array);
382
383 /**
384 * @brief Save caterva array into a specific urlpath.
385 *
386 * @param ctx The context to be used.
387 * @param array The array to be saved.
388 * @param urlpath The urlpath where the array will be stored.
389 *
390 * @return An error code.
391 */
392 int caterva_save(caterva_ctx_t *ctx, caterva_array_t *array, char *urlpath);
393
394 /**
395 * @brief Create a caterva array from the data stored in a buffer.
396 *
397 * @param ctx The caterva context to be used.
398 * @param buffer The buffer where source data is stored.
399 * @param buffersize The size (in bytes) of the buffer.
400 * @param params The general params of the array desired.
401 * @param storage The storage params of the array desired.
402 * @param array The memory pointer where the array will be created.
403 *
404 * @return An error code.
405 */
406 int caterva_from_buffer(caterva_ctx_t *ctx, void *buffer, int64_t buffersize,
407 caterva_params_t *params, caterva_storage_t *storage,
408 caterva_array_t **array);
409
410 /**
411 * @brief Extract the data into a C buffer from a caterva array.
412 *
413 * @param ctx The caterva context to be used.
414 * @param array The caterva array.
415 * @param buffer The buffer where the data will be stored.
416 * @param buffersize Size (in bytes) of the buffer.
417 *
418 * @return An error code.
419 */
420 int caterva_to_buffer(caterva_ctx_t *ctx, caterva_array_t *array, void *buffer,
421 int64_t buffersize);
422
423 /**
424 * @brief Get a slice from an array and store it into a new array.
425 *
426 * @param ctx The caterva context to be used.
427 * @param src The array from which the slice will be extracted
428 * @param start The coordinates where the slice will begin.
429 * @param stop The coordinates where the slice will end.
430 * @param storage The storage params of the array desired.
431 * @param array The memory pointer where the array will be created.
432 *
433 * @return An error code.
434 */
435 int caterva_get_slice(caterva_ctx_t *ctx, caterva_array_t *src, int64_t *start,
436 int64_t *stop, caterva_storage_t *storage, caterva_array_t **array);
437
438 /**
439 * @brief Squeeze a caterva array
440 *
441 * This function remove selected single-dimensional entries from the shape of a
442 caterva array.
443 *
444 * @param ctx The caterva context to be used.
445 * @param array The caterva array.
446 * @param index Indexes of the single-dimensional entries to remove.
447 *
448 * @return An error code
449 */
450 int caterva_squeeze_index(caterva_ctx_t *ctx, caterva_array_t *array,
451 bool *index);
452
453 /**
454 * @brief Squeeze a caterva array
455 *
456 * This function remove single-dimensional entries from the shape of a caterva array.
457 *
458 * @param ctx The caterva context to be used.
459 * @param array The caterva array.
460 *
461 * @return An error code
462 */
463 int caterva_squeeze(caterva_ctx_t *ctx, caterva_array_t *array);
464
465 /**
466 * @brief Get a slice from an array and store it into a C buffer.
467 *
468 * @param ctx The caterva context to be used.
469 * @param array The array from which the slice will be extracted.
470 * @param start The coordinates where the slice will begin.
471 * @param stop The coordinates where the slice will end.
472 * @param buffershape The shape of the buffer.
473 * @param buffer The buffer where the data will be stored.
474 * @param buffersize The size (in bytes) of the buffer.
475 *
476 * @return An error code.
477 */
478 int caterva_get_slice_buffer(caterva_ctx_t *ctx, caterva_array_t *array,
479 int64_t *start, int64_t *stop,
480 void *buffer, int64_t *buffershape, int64_t buffersize);
481
482 /**
483 * @brief Set a slice into a caterva array from a C buffer.
484 *
485 * @param ctx The caterva context to be used.
486 * @param buffer The buffer where the slice data is.
487 * @param buffersize The size (in bytes) of the buffer.
488 * @param start The coordinates where the slice will begin.
489 * @param stop The coordinates where the slice will end.
490 * @param buffershape The shape of the buffer.
491 * @param array The caterva array where the slice will be set
492 *
493 * @return An error code.
494 */
495 int caterva_set_slice_buffer(caterva_ctx_t *ctx,
496 void *buffer, int64_t *buffershape, int64_t buffersize,
497 int64_t *start, int64_t *stop, caterva_array_t *array);
498
499 /**
500 * @brief Make a copy of the array data. The copy is done into a new caterva array.
501 *
502 * @param ctx The caterva context to be used.
503 * @param src The array from which data is copied.
504 * @param storage The storage params of the array desired.
505 * @param array The memory pointer where the array will be created.
506 *
507 * @return An error code
508 */
509 int caterva_copy(caterva_ctx_t *ctx, caterva_array_t *src, caterva_storage_t *storage,
510 caterva_array_t **array);
511
512
513 /**
514 * @brief Remove a Caterva file from the file system. Both backends are supported.
515 *
516 * @param ctx The caterva context to be used.
517 * @param urlpath The urlpath of the array to be removed.
518 *
519 * @return An error code
520 */
521 int caterva_remove(caterva_ctx_t *ctx, char *urlpath);
522
523
524 /**
525 * @brief Add a vl-metalayer to the Caterva array.
526 *
527 * @param ctx The context to be used.
528 * @param array The array where the metalayer will be added.
529 * @param name The vl-metalayer to add.
530 *
531 * @return An error code
532 */
533 int caterva_vlmeta_add(caterva_ctx_t *ctx, caterva_array_t *array, caterva_metalayer_t *vlmeta);
534
535
536 /**
537 *
538 * @brief Get a vl-metalayer from a Caterva array.
539 *
540 * @param ctx The context to be used.
541 * @param array The array where the vl-metalayer will be added.
542 * @param name The vl-metalayer name.
543 * @param vlmeta Pointer to the metalayer where the data will be stored.
544 *
545 * @warning The contents of `vlmeta` are allocated inside the function.
546 * Therefore, they must be released with a `free`.
547 *
548 * @return An error code
549 */
550 int caterva_vlmeta_get(caterva_ctx_t *ctx, caterva_array_t *array,
551 const char *name, caterva_metalayer_t *vlmeta);
552
553 /**
554 * @brief Check if a vl-metalayer exists or not.
555 *
556 * @param ctx The context to be used.
557 * @param array The array where the check will be done.
558 * @param name The name of the vl-metalayer to check.
559 * @param exists Pointer where the result will be stored.
560 *
561 * @return An error code
562 */
563 int caterva_vlmeta_exists(caterva_ctx_t *ctx, caterva_array_t *array,
564 const char *name, bool *exists);
565
566 /**
567 * @brief Update a vl-metalayer content in a Caterva array.
568 *
569 * @param ctx The context to be used.
570 * @param array The array where the vl-metalayer will be updated.
571 * @param vlmeta The vl-metalayer to update.
572 *
573 * @return An error code
574 */
575 int caterva_vlmeta_update(caterva_ctx_t *ctx, caterva_array_t *array,
576 caterva_metalayer_t *vlmeta);
577
578 /**
579 *
580 * @brief Get a metalayer from a Caterva array.
581 *
582 * @param ctx The context to be used.
583 * @param array The array where the metalayer will be added.
584 * @param name The vl-metalayer name.
585 * @param meta Pointer to the metalayer where the data will be stored.
586 *
587 * @warning The contents of `meta` are allocated inside the function.
588 * Therefore, they must be released with a `free`.
589 *
590 * @return An error code
591 */
592 int caterva_meta_get(caterva_ctx_t *ctx, caterva_array_t *array,
593 const char *name, caterva_metalayer_t *meta);
594
595 /**
596 * @brief Check if a metalayer exists or not.
597 *
598 * @param ctx The context to be used.
599 * @param array The array where the check will be done.
600 * @param name The name of the metalayer to check.
601 * @param exists Pointer where the result will be stored.
602 *
603 * @return An error code
604 */
605 int caterva_meta_exists(caterva_ctx_t *ctx, caterva_array_t *array,
606 const char *name, bool *exists);
607
608 /**
609 * @brief Update a metalayer content in a Caterva array.
610 *
611 * @param ctx The context to be used.
612 * @param array The array where the metalayer will be updated.
613 * @param meta The metalayer to update.
614 *
615 * @return An error code
616 */
617 int caterva_meta_update(caterva_ctx_t *ctx, caterva_array_t *array,
618 caterva_metalayer_t *meta);
619
620 #endif // CATERVA_CATERVA_H_
621