1 /*
2  * Copyright (C) 2018-present Francesc Alted, Aleix Alcacer.
3  * Copyright (C) 2019-present Blosc Development team <blosc@blosc.org>
4  * All rights reserved.
5  *
6  * This source code is licensed under both the BSD-style license (found in the
7  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8  * in the COPYING file in the root directory of this source tree).
9  * You may select, at your option, one of the above-listed licenses.
10  */
11 
12 /** @file caterva.h
13  * @brief Caterva header file.
14  *
15  * This file contains Caterva public API and the structures needed to use it.
16  * @author Blosc Development team <blosc@blosc.org>
17  */
18 
19 #ifndef CATERVA_CATERVA_H_
20 #define CATERVA_CATERVA_H_
21 
22 #include <blosc2.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 
26 /* Version numbers */
27 #define CATERVA_VERSION_MAJOR 0         /* for major interface/format changes  */
28 #define CATERVA_VERSION_MINOR 5         /* for minor interface/format changes  */
29 #define CATERVA_VERSION_RELEASE 1       /* for tweaks, bug-fixes, or development */
30 
31 #define CATERVA_VERSION_STRING "0.5.1.dev0" /* string version. Sync with above! */
32 #define CATERVA_VERSION_DATE "2021-07-13"  /* date version */
33 
34 /* Error handling */
35 #define CATERVA_SUCCEED 0
36 #define CATERVA_ERR_INVALID_ARGUMENT 1
37 #define CATERVA_ERR_BLOSC_FAILED 2
38 #define CATERVA_ERR_CONTAINER_FILLED 3
39 #define CATERVA_ERR_INVALID_STORAGE 4
40 #define CATERVA_ERR_NULL_POINTER 5
41 #define CATERVA_ERR_INVALID_INDEX  5
42 
43 
44 /* Tracing macros */
45 #define CATERVA_TRACE_ERROR(fmt, ...) CATERVA_TRACE(error, fmt, ##__VA_ARGS__)
46 #define CATERVA_TRACE_WARNING(fmt, ...) CATERVA_TRACE(warning, fmt, ##__VA_ARGS__)
47 
48 #define CATERVA_TRACE(cat, msg, ...)                                 \
49     do {                                                             \
50          const char *__e = getenv("CATERVA_TRACE");                  \
51          if (!__e) { break; }                                        \
52          fprintf(stderr, "[%s] - %s:%d\n    " msg "\n", #cat, __FILE__, __LINE__, ##__VA_ARGS__);   \
53     } while(0)
54 
55 #define CATERVA_ERROR(rc)                           \
56     do {                                            \
57         int rc_ = rc;                               \
58         if (rc_ != CATERVA_SUCCEED) {               \
59             char *error_msg = print_error(rc_);     \
60             CATERVA_TRACE_ERROR("%s", error_msg); \
61             return rc_;                             \
62         }                                           \
63     } while (0)
64 
65 #define CATERVA_ERROR_NULL(pointer)                                 \
66     do {                                                            \
67         char *error_msg = print_error(CATERVA_ERR_NULL_POINTER);    \
68         if ((pointer) == NULL) {                                    \
69             CATERVA_TRACE_ERROR("%s", error_msg);                   \
70             return CATERVA_ERR_NULL_POINTER;                        \
71         }                                                           \
72     } while (0)
73 
74 #define CATERVA_UNUSED_PARAM(x) ((void) (x))
75 #ifdef __GNUC__
76 #define CATERVA_ATTRIBUTE_UNUSED __attribute__((unused))
77 #else
78 #define CATERVA_ATTRIBUTE_UNUSED
79 #endif
80 
81 static char *print_error(int rc) CATERVA_ATTRIBUTE_UNUSED;
print_error(int rc)82 static char *print_error(int rc) {
83     switch (rc) {
84         case CATERVA_ERR_INVALID_STORAGE:
85             return "Invalid storage";
86         case CATERVA_ERR_NULL_POINTER:
87             return "Pointer is null";
88         case CATERVA_ERR_BLOSC_FAILED:
89             return "Blosc failed";
90         case CATERVA_ERR_INVALID_ARGUMENT:
91             return "Invalid argument";
92         default:
93             return "Unknown error";
94     }
95 }
96 
97 /* The version for metalayer format; starts from 0 and it must not exceed 127 */
98 #define CATERVA_METALAYER_VERSION 0
99 
100 /* The maximum number of dimensions for caterva arrays */
101 #define CATERVA_MAX_DIM 8
102 
103 /* The maximum number of metalayers for caterva arrays */
104 #define CATERVA_MAX_METALAYERS (BLOSC2_MAX_METALAYERS - 1)
105 
106 /**
107  * @brief Configuration parameters used to create a caterva context.
108  */
109 typedef struct {
110     void *(*alloc)(size_t);
111     //!< The memory allocation function used internally.
112     void (*free)(void *);
113     //!< The memory release function used internally.
114     uint8_t compcodec;
115     //!< Defines the codec used in compression.
116     uint8_t compmeta;
117     //!< The metadata for the compressor codec.
118     uint8_t complevel;
119     //!< Determines the compression level used in Blosc.
120     int32_t splitmode;
121     //!< Whether the blocks should be split or not.
122     int usedict;
123     //!< Indicates whether a dictionary is used to compress data or not.
124     int16_t nthreads;
125     //!< Determines the maximum number of threads that can be used.
126     uint8_t filters[BLOSC2_MAX_FILTERS];
127     //!< Defines the filters used in compression.
128     uint8_t filtersmeta[BLOSC2_MAX_FILTERS];
129     //!< Indicates the meta filters used in Blosc.
130     blosc2_prefilter_fn prefilter;
131     //!< Defines the function that is applied to the data before compressing it.
132     blosc2_prefilter_params *pparams;
133     //!< Indicates the parameters of the prefilter function.
134     blosc2_btune *udbtune;
135     //!< Indicates user-defined parameters for btune.
136 } caterva_config_t;
137 
138 /**
139  * @brief The default configuration parameters used in caterva.
140  */
141 static const caterva_config_t CATERVA_CONFIG_DEFAULTS = {.alloc = malloc,
142                                                          .free = free,
143                                                          .compcodec = BLOSC_ZSTD,
144                                                          .compmeta = 0,
145                                                          .complevel = 5,
146                                                          .splitmode = BLOSC_AUTO_SPLIT,
147                                                          .usedict = 0,
148                                                          .nthreads = 1,
149                                                          .filters = {0, 0, 0, 0, 0, BLOSC_SHUFFLE},
150                                                          .filtersmeta = {0, 0, 0, 0, 0, 0},
151                                                          .prefilter = NULL,
152                                                          .pparams = NULL,
153                                                          .udbtune = NULL,
154                                                          };
155 
156 /**
157  * @brief Context for caterva arrays that specifies the functions used to manage memory and
158  * the compression/decompression parameters used in Blosc.
159  */
160 typedef struct {
161     caterva_config_t *cfg;
162     //!< The configuration paramters.
163 } caterva_ctx_t;
164 
165 
166 /**
167  * @brief The metalayer data needed to store it on an array
168  */
169 typedef struct {
170     char *name;
171     //!< The name of the metalayer
172     uint8_t *sdata;
173     //!< The serialized data to store
174     int32_t size;
175     //!< The size of the serialized data
176 } caterva_metalayer_t;
177 
178 /**
179  * @brief The storage properties for an array backed by a Blosc super-chunk.
180  */
181 typedef struct {
182     int32_t chunkshape[CATERVA_MAX_DIM];
183     //!< The shape of each chunk of Blosc.
184     int32_t blockshape[CATERVA_MAX_DIM];
185     //!< The shape of each block of Blosc.
186     bool sequencial;
187     //!< Flag to indicate if the super-chunk is stored sequentially or sparsely.
188     char *urlpath;
189     //!< The super-chunk name. If @p urlpath is not @p NULL, the super-chunk will be stored on
190     //!< disk.
191     caterva_metalayer_t metalayers[CATERVA_MAX_METALAYERS];
192     //!< List with the metalayers desired.
193     int32_t nmetalayers;
194     //!< The number of metalayers.
195 } caterva_storage_t;
196 
197 /**
198  * @brief General parameters needed for the creation of a caterva array.
199  */
200 typedef struct {
201     uint8_t itemsize;
202     //!< The size of each item of the array.
203     int64_t shape[CATERVA_MAX_DIM];
204     //!< The array shape.
205     uint8_t ndim;
206     //!< The array dimensions.
207 } caterva_params_t;
208 
209 /**
210  * @brief An *optional* cache for a single block.
211  *
212  * When a chunk is needed, it is copied into this cache. In this way, if the same chunk is needed
213  * again afterwards, it is not necessary to recover it because it is already in the cache.
214  */
215 struct chunk_cache_s {
216     uint8_t *data;
217     //!< The chunk data.
218     int32_t nchunk;
219     //!< The chunk number in cache. If @p nchunk equals to -1, it means that the cache is empty.
220 };
221 
222 /**
223  * @brief A multidimensional array of data that can be compressed.
224  */
225 typedef struct {
226     caterva_config_t *cfg;
227     //!< Array configuration.
228     blosc2_schunk *sc;
229     //!< Pointer to a Blosc super-chunk
230     int64_t shape[CATERVA_MAX_DIM];
231     //!< Shape of original data.
232     int32_t chunkshape[CATERVA_MAX_DIM];
233     //!< Shape of each chunk.
234     int64_t extshape[CATERVA_MAX_DIM];
235     //!< Shape of padded data.
236     int32_t blockshape[CATERVA_MAX_DIM];
237     //!< Shape of each block.
238     int64_t extchunkshape[CATERVA_MAX_DIM];
239     //!< Shape of padded chunk.
240     int64_t nitems;
241     //!< Number of items in original data.
242     int32_t chunknitems;
243     //!< Number of items in each chunk.
244     int64_t extnitems;
245     //!< Number of items in padded data.
246     int32_t blocknitems;
247     //!< Number of items in each block.
248     int64_t extchunknitems;
249     //!< Number of items in a padded chunk.
250     uint8_t ndim;
251     //!< Data dimensions.
252     uint8_t itemsize;
253     //!< Size of each item.
254     int64_t nchunks;
255     //!< Number of chunks in the array.
256     struct chunk_cache_s chunk_cache;
257     //!< A partition cache.
258 } caterva_array_t;
259 
260 /**
261  * @brief Create a context for caterva.
262  *
263  * @param cfg The configuration parameters needed for the context creation.
264  * @param ctx The memory pointer where the context will be created.
265  *
266  * @return An error code.
267  */
268 int caterva_ctx_new(caterva_config_t *cfg, caterva_ctx_t **ctx);
269 
270 /**
271  * @brief Free a context.
272  *
273  * @param ctx The The context to be freed.
274  *
275  * @return An error code.
276  */
277 int caterva_ctx_free(caterva_ctx_t **ctx);
278 
279 /**
280  * @brief Create an empty array.
281  *
282  * @param ctx The caterva context to be used.
283  * @param params The general params of the array desired.
284  * @param storage The storage params of the array desired.
285  * @param array The memory pointer where the array will be created.
286  *
287  * @return An error code.
288  */
289 int caterva_empty(caterva_ctx_t *ctx, caterva_params_t *params,
290                   caterva_storage_t *storage, caterva_array_t **array);
291 
292 
293 /**
294  * Create an array, with zero being used as the default value for
295  * uninitialized portions of the array.
296  *
297  * @param ctx The caterva context to be used.
298  * @param params The general params of the array.
299  * @param storage The storage params of the array.
300  * @param array The memory pointer where the array will be created.
301  *
302  * @return An error code.
303  */
304 int caterva_zeros(caterva_ctx_t *ctx, caterva_params_t *params,
305                   caterva_storage_t *storage, caterva_array_t **array);
306 
307 
308 /**
309  * Create an array, with @p fill_value being used as the default value for
310  * uninitialized portions of the array.
311  *
312  * @param ctx The caterva context to be used.
313  * @param params The general params of the array.
314  * @param storage The storage params of the array.
315  * @param fill_value Default value for uninitialized portions of the array.
316  * @param array The memory pointer where the array will be created.
317  *
318  * @return An error code.
319  */
320 int caterva_full(caterva_ctx_t *ctx, caterva_params_t *params,
321                  caterva_storage_t *storage, void *fill_value, caterva_array_t **array);
322 /**
323  * @brief Free an array.
324  *
325  * @param ctx The caterva context to be used.
326  * @param array The memory pointer where the array is placed.
327  *
328  * @return An error code.
329  */
330 int caterva_free(caterva_ctx_t *ctx, caterva_array_t **array);
331 
332 /**
333  * Append a chunk to a caterva array (until it is completely filled).
334  *
335  * @param ctx The caterva context to be used.
336  * @param array The caterva array.
337  * @param chunk The buffer where the chunk data is stored.
338  * @param chunksize Size (in bytes) of the buffer.
339  *
340  * @return An error code.
341  */
342 int caterva_append(caterva_ctx_t *ctx, caterva_array_t *array, void *chunk,
343                    int64_t chunksize);
344 
345 /**
346  * @brief Create a caterva array from a super-chunk. It can only be used if the array
347  * is backed by a blosc super-chunk.
348  *
349  * @param ctx The caterva context to be used.
350  * @param schunk The blosc super-chunk where the caterva array is stored.
351  * @param array The memory pointer where the array will be created.
352  *
353  * @return An error code.
354  */
355 int
356 caterva_from_schunk(caterva_ctx_t *ctx, blosc2_schunk *schunk, caterva_array_t **array);
357 
358 /**
359  * @brief Create a caterva array from a serialized super-chunk. It can only be used if the array
360  * is backed by a blosc super-chunk.
361  *
362  * @param ctx The caterva context to be used.
363  * @param serial_schunk The serialized super-chunk where the caterva array is stored.
364  * @param len The size (in bytes) of the serialized super-chunk.
365  * @param array The memory pointer where the array will be created.
366  *
367  * @return An error code.
368  */
369 int caterva_from_serial_schunk(caterva_ctx_t *ctx, uint8_t *serial_schunk, int64_t len,
370                                caterva_array_t **array);
371 
372 /**
373  * @brief Read a caterva array from disk.
374  *
375  * @param ctx The caterva context to be used.
376  * @param urlpath The urlpath of the caterva array on disk.
377  * @param array The memory pointer where the array will be created.
378  *
379  * @return An error code.
380  */
381 int caterva_open(caterva_ctx_t *ctx, const char *urlpath, caterva_array_t **array);
382 
383 /**
384  * @brief Save caterva array into a specific urlpath.
385  *
386  * @param ctx The context to be used.
387  * @param array The array to be saved.
388  * @param urlpath The urlpath where the array will be stored.
389  *
390  * @return An error code.
391  */
392 int caterva_save(caterva_ctx_t *ctx, caterva_array_t *array, char *urlpath);
393 
394 /**
395  * @brief Create a caterva array from the data stored in a buffer.
396  *
397  * @param ctx The caterva context to be used.
398  * @param buffer The buffer where source data is stored.
399  * @param buffersize The size (in bytes) of the buffer.
400  * @param params The general params of the array desired.
401  * @param storage The storage params of the array desired.
402  * @param array The memory pointer where the array will be created.
403  *
404  * @return An error code.
405  */
406 int caterva_from_buffer(caterva_ctx_t *ctx, void *buffer, int64_t buffersize,
407                         caterva_params_t *params, caterva_storage_t *storage,
408                         caterva_array_t **array);
409 
410 /**
411  * @brief Extract the data into a C buffer from a caterva array.
412  *
413  * @param ctx The caterva context to be used.
414  * @param array The caterva array.
415  * @param buffer The buffer where the data will be stored.
416  * @param buffersize Size (in bytes) of the buffer.
417  *
418  * @return An error code.
419  */
420 int caterva_to_buffer(caterva_ctx_t *ctx, caterva_array_t *array, void *buffer,
421                       int64_t buffersize);
422 
423 /**
424  * @brief Get a slice from an array and store it into a new array.
425  *
426  * @param ctx The caterva context to be used.
427  * @param src The array from which the slice will be extracted
428  * @param start The coordinates where the slice will begin.
429  * @param stop The coordinates where the slice will end.
430  * @param storage The storage params of the array desired.
431  * @param array The memory pointer where the array will be created.
432  *
433  * @return An error code.
434  */
435 int caterva_get_slice(caterva_ctx_t *ctx, caterva_array_t *src, int64_t *start,
436                       int64_t *stop, caterva_storage_t *storage, caterva_array_t **array);
437 
438 /**
439  * @brief Squeeze a caterva array
440  *
441  * This function remove selected single-dimensional entries from the shape of a
442  caterva array.
443  *
444  * @param ctx The caterva context to be used.
445  * @param array The caterva array.
446  * @param index Indexes of the single-dimensional entries to remove.
447  *
448  * @return An error code
449  */
450 int caterva_squeeze_index(caterva_ctx_t *ctx, caterva_array_t *array,
451                           bool *index);
452 
453 /**
454  * @brief Squeeze a caterva array
455  *
456  * This function remove single-dimensional entries from the shape of a caterva array.
457  *
458  * @param ctx The caterva context to be used.
459  * @param array The caterva array.
460  *
461  * @return An error code
462  */
463 int caterva_squeeze(caterva_ctx_t *ctx, caterva_array_t *array);
464 
465 /**
466  * @brief Get a slice from an array and store it into a C buffer.
467  *
468  * @param ctx The caterva context to be used.
469  * @param array The array from which the slice will be extracted.
470  * @param start The coordinates where the slice will begin.
471  * @param stop The coordinates where the slice will end.
472  * @param buffershape The shape of the buffer.
473  * @param buffer The buffer where the data will be stored.
474  * @param buffersize The size (in bytes) of the buffer.
475  *
476  * @return An error code.
477  */
478 int caterva_get_slice_buffer(caterva_ctx_t *ctx, caterva_array_t *array,
479                              int64_t *start, int64_t *stop,
480                              void *buffer, int64_t *buffershape, int64_t buffersize);
481 
482 /**
483  * @brief Set a slice into a caterva array from a C buffer.
484  *
485  * @param ctx The caterva context to be used.
486  * @param buffer The buffer where the slice data is.
487  * @param buffersize The size (in bytes) of the buffer.
488  * @param start The coordinates where the slice will begin.
489  * @param stop The coordinates where the slice will end.
490  * @param buffershape The shape of the buffer.
491  * @param array The caterva array where the slice will be set
492  *
493  * @return An error code.
494  */
495 int caterva_set_slice_buffer(caterva_ctx_t *ctx,
496                              void *buffer, int64_t *buffershape, int64_t buffersize,
497                              int64_t *start, int64_t *stop, caterva_array_t *array);
498 
499 /**
500  * @brief Make a copy of the array data. The copy is done into a new caterva array.
501  *
502  * @param ctx The caterva context to be used.
503  * @param src The array from which data is copied.
504  * @param storage The storage params of the array desired.
505  * @param array The memory pointer where the array will be created.
506  *
507  * @return An error code
508  */
509 int caterva_copy(caterva_ctx_t *ctx, caterva_array_t *src, caterva_storage_t *storage,
510                  caterva_array_t **array);
511 
512 
513 /**
514  * @brief Remove a Caterva file from the file system. Both backends are supported.
515  *
516  * @param ctx The caterva context to be used.
517  * @param urlpath The urlpath of the array to be removed.
518  *
519  * @return An error code
520  */
521 int caterva_remove(caterva_ctx_t *ctx, char *urlpath);
522 
523 
524 /**
525  * @brief Add a vl-metalayer to the Caterva array.
526  *
527  * @param ctx The context to be used.
528  * @param array The array where the metalayer will be added.
529  * @param name The vl-metalayer to add.
530  *
531  * @return An error code
532  */
533 int caterva_vlmeta_add(caterva_ctx_t *ctx, caterva_array_t *array, caterva_metalayer_t *vlmeta);
534 
535 
536 /**
537  *
538  * @brief Get a vl-metalayer from a Caterva array.
539  *
540  * @param ctx The context to be used.
541  * @param array The array where the vl-metalayer will be added.
542  * @param name The vl-metalayer name.
543  * @param vlmeta Pointer to the metalayer where the data will be stored.
544  *
545  * @warning The contents of `vlmeta` are allocated inside the function.
546  * Therefore, they must be released with a `free`.
547  *
548  * @return An error code
549  */
550 int caterva_vlmeta_get(caterva_ctx_t *ctx, caterva_array_t *array,
551                        const char *name, caterva_metalayer_t *vlmeta);
552 
553 /**
554  * @brief Check if a vl-metalayer exists or not.
555  *
556  * @param ctx The context to be used.
557  * @param array The array where the check will be done.
558  * @param name The name of the vl-metalayer to check.
559  * @param exists Pointer where the result will be stored.
560  *
561  * @return An error code
562  */
563 int caterva_vlmeta_exists(caterva_ctx_t *ctx, caterva_array_t *array,
564                           const char *name, bool *exists);
565 
566 /**
567  * @brief Update a vl-metalayer content in a Caterva array.
568  *
569  * @param ctx The context to be used.
570  * @param array The array where the vl-metalayer will be updated.
571  * @param vlmeta The vl-metalayer to update.
572  *
573  * @return An error code
574  */
575 int caterva_vlmeta_update(caterva_ctx_t *ctx, caterva_array_t *array,
576                           caterva_metalayer_t *vlmeta);
577 
578 /**
579  *
580  * @brief Get a metalayer from a Caterva array.
581  *
582  * @param ctx The context to be used.
583  * @param array The array where the metalayer will be added.
584  * @param name The vl-metalayer name.
585  * @param meta Pointer to the metalayer where the data will be stored.
586  *
587  * @warning The contents of `meta` are allocated inside the function.
588  * Therefore, they must be released with a `free`.
589  *
590  * @return An error code
591  */
592 int caterva_meta_get(caterva_ctx_t *ctx, caterva_array_t *array,
593                        const char *name, caterva_metalayer_t *meta);
594 
595 /**
596  * @brief Check if a metalayer exists or not.
597  *
598  * @param ctx The context to be used.
599  * @param array The array where the check will be done.
600  * @param name The name of the metalayer to check.
601  * @param exists Pointer where the result will be stored.
602  *
603  * @return An error code
604  */
605 int caterva_meta_exists(caterva_ctx_t *ctx, caterva_array_t *array,
606                           const char *name, bool *exists);
607 
608 /**
609  * @brief Update a metalayer content in a Caterva array.
610  *
611  * @param ctx The context to be used.
612  * @param array The array where the metalayer will be updated.
613  * @param meta The metalayer to update.
614  *
615  * @return An error code
616  */
617 int caterva_meta_update(caterva_ctx_t *ctx, caterva_array_t *array,
618                           caterva_metalayer_t *meta);
619 
620 #endif  // CATERVA_CATERVA_H_
621