1 /*
2     Copyright (C) 2010-2016  Francesc Alted
3     http://blosc.org
4     License: MIT (see LICENSE.txt)
5 
6     Filter program that allows the use of the Blosc filter in HDF5.
7 
8     This is based on the LZF filter interface (http://h5py.alfven.org)
9     by Andrew Collette.
10 
11 */
12 
13 
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <errno.h>
18 #include "hdf5.h"
19 #include "blosc_filter.h"
20 
21 #if defined(__GNUC__)
22 #define PUSH_ERR(func, minor, str, ...) H5Epush(H5E_DEFAULT, __FILE__, func, __LINE__, H5E_ERR_CLS, H5E_PLINE, minor, str, ##__VA_ARGS__)
23 #elif defined(_MSC_VER)
24 #define PUSH_ERR(func, minor, str, ...) H5Epush(H5E_DEFAULT, __FILE__, func, __LINE__, H5E_ERR_CLS, H5E_PLINE, minor, str, __VA_ARGS__)
25 #else
26 /* This version is portable but it's better to use compiler-supported
27    approaches for handling the trailing comma issue when possible. */
28 #define PUSH_ERR(func, minor, ...) H5Epush(H5E_DEFAULT, __FILE__, func, __LINE__, H5E_ERR_CLS, H5E_PLINE, minor, __VA_ARGS__)
29 #endif	/* defined(__GNUC__) */
30 
31 #define GET_FILTER(a,b,c,d,e,f,g) H5Pget_filter_by_id(a,b,c,d,e,f,g,NULL)
32 
33 
34 size_t blosc_filter(unsigned flags, size_t cd_nelmts,
35                     const unsigned cd_values[], size_t nbytes,
36                     size_t *buf_size, void **buf);
37 
38 herr_t blosc_set_local(hid_t dcpl, hid_t type, hid_t space);
39 
40 
41 /* Register the filter, passing on the HDF5 return value */
register_blosc(char ** version,char ** date)42 int register_blosc(char **version, char **date){
43 
44     int retval;
45 
46     H5Z_class_t filter_class = {
47         H5Z_CLASS_T_VERS,
48         (H5Z_filter_t)(FILTER_BLOSC),
49         1, 1,
50         "blosc",
51         NULL,
52         (H5Z_set_local_func_t)(blosc_set_local),
53         (H5Z_func_t)(blosc_filter)
54     };
55 
56     retval = H5Zregister(&filter_class);
57     if(retval<0){
58         PUSH_ERR("register_blosc", H5E_CANTREGISTER, "Can't register Blosc filter");
59     }
60     *version = strdup(BLOSC_VERSION_STRING);
61     *date = strdup(BLOSC_VERSION_DATE);
62     return 1; /* lib is available */
63 }
64 
65 /*  Filter setup.  Records the following inside the DCPL:
66 
67     1. If version information is not present, set slots 0 and 1 to the filter
68        revision and Blosc version, respectively.
69 
70     2. Compute the type size in bytes and store it in slot 2.
71 
72     3. Compute the chunk size in bytes and store it in slot 3.
73 */
blosc_set_local(hid_t dcpl,hid_t type,hid_t space)74 herr_t blosc_set_local(hid_t dcpl, hid_t type, hid_t space){
75 
76     int ndims;
77     int i;
78     herr_t r;
79 
80     unsigned int typesize, basetypesize;
81     unsigned int bufsize;
82     hsize_t chunkdims[32];
83     unsigned int flags;
84     size_t nelements = 8;
85     unsigned int values[] = {0,0,0,0,0,0,0,0};
86     hid_t super_type;
87     H5T_class_t classt;
88 
89     r = GET_FILTER(dcpl, FILTER_BLOSC, &flags, &nelements, values, 0, NULL);
90     if(r<0) return -1;
91 
92     if(nelements < 4) nelements = 4;  /* First 4 slots reserved. */
93 
94     /* Set Blosc info in first two slots */
95     values[0] = FILTER_BLOSC_VERSION;
96     values[1] = BLOSC_VERSION_FORMAT;
97 
98     ndims = H5Pget_chunk(dcpl, 32, chunkdims);
99     if(ndims<0) return -1;
100     if(ndims>32){
101         PUSH_ERR("blosc_set_local", H5E_CALLBACK, "Chunk rank exceeds limit");
102         return -1;
103     }
104 
105     typesize = H5Tget_size(type);
106     if (typesize==0) return -1;
107     /* Get the size of the base type, even for ARRAY types */
108     classt = H5Tget_class(type);
109     if (classt == H5T_ARRAY) {
110       /* Get the array base component */
111       super_type = H5Tget_super(type);
112       basetypesize = H5Tget_size(super_type);
113       /* Release resources */
114       H5Tclose(super_type);
115     }
116     else {
117       basetypesize = typesize;
118     }
119 
120     /* Limit large typesizes (they are pretty inneficient to shuffle
121        and, in addition, Blosc does not handle typesizes larger than
122        256 bytes). */
123     if (basetypesize > BLOSC_MAX_TYPESIZE) basetypesize = 1;
124     values[2] = basetypesize;
125 
126     /* Get the size of the chunk */
127     bufsize = typesize;
128     for (i=0; i<ndims; i++) {
129         bufsize *= chunkdims[i];
130     }
131     values[3] = bufsize;
132 
133 #ifdef BLOSC_DEBUG
134     fprintf(stderr, "Blosc: Computed buffer size %d\n", bufsize);
135 #endif
136 
137     r = H5Pmodify_filter(dcpl, FILTER_BLOSC, flags, nelements, values);
138     if(r<0) return -1;
139 
140     return 1;
141 }
142 
143 
144 /* The filter function */
blosc_filter(unsigned flags,size_t cd_nelmts,const unsigned cd_values[],size_t nbytes,size_t * buf_size,void ** buf)145 size_t blosc_filter(unsigned flags, size_t cd_nelmts,
146                     const unsigned cd_values[], size_t nbytes,
147                     size_t *buf_size, void **buf){
148 
149     void* outbuf = NULL;
150     int status = 0;                /* Return code from Blosc routines */
151     size_t typesize;
152     size_t outbuf_size;
153     int clevel = 5;                /* Compression level default */
154     int doshuffle = 1;             /* Shuffle default */
155     int compcode;                  /* Blosc compressor */
156     int code;
157     char *compname = "blosclz";    /* The compressor by default */
158     char *complist;
159     char errmsg[256];
160 
161     /* Filter params that are always set */
162     typesize = cd_values[2];      /* The datatype size */
163     outbuf_size = cd_values[3];   /* Precomputed buffer guess */
164     /* Optional params */
165     if (cd_nelmts >= 5) {
166         clevel = cd_values[4];        /* The compression level */
167     }
168     if (cd_nelmts >= 6) {
169         doshuffle = cd_values[5];  /* BLOSC_SHUFFLE, BLOSC_BITSHUFFLE */
170 	/* bitshuffle is only meant for production in >= 1.8.0 */
171 #if ( (BLOSC_VERSION_MAJOR <= 1) && (BLOSC_VERSION_MINOR < 8) )
172 	if (doshuffle == BLOSC_BITSHUFFLE) {
173 	  PUSH_ERR("blosc_filter", H5E_CALLBACK,
174 		   "this Blosc library version is not supported.  Please update to >= 1.8");
175 	  goto failed;
176 	}
177 #endif
178     }
179     if (cd_nelmts >= 7) {
180         compcode = cd_values[6];     /* The Blosc compressor used */
181 	/* Check that we actually have support for the compressor code */
182         complist = blosc_list_compressors();
183 	code = blosc_compcode_to_compname(compcode, &compname);
184 	if (code == -1) {
185             PUSH_ERR("blosc_filter", H5E_CALLBACK,
186                      "this Blosc library does not have support for "
187                      "the '%s' compressor, but only for: %s",
188                      compname, complist);
189             goto failed;
190 	}
191     }
192 
193     /* We're compressing */
194     if(!(flags & H5Z_FLAG_REVERSE)){
195 
196         /* Allocate an output buffer exactly as long as the input data; if
197            the result is larger, we simply return 0.  The filter is flagged
198            as optional, so HDF5 marks the chunk as uncompressed and
199            proceeds.
200         */
201 
202         outbuf_size = (*buf_size);
203 
204 #ifdef BLOSC_DEBUG
205         fprintf(stderr, "Blosc: Compress %zd chunk w/buffer %zd\n",
206 		nbytes, outbuf_size);
207 #endif
208 
209         outbuf = malloc(outbuf_size);
210 
211         if (outbuf == NULL){
212             PUSH_ERR("blosc_filter", H5E_CALLBACK,
213                      "Can't allocate compression buffer");
214             goto failed;
215         }
216 
217         blosc_set_compressor(compname);
218         status = blosc_compress(clevel, doshuffle, typesize, nbytes,
219                                 *buf, outbuf, nbytes);
220         if (status < 0) {
221           PUSH_ERR("blosc_filter", H5E_CALLBACK, "Blosc compression error");
222           goto failed;
223         }
224 
225     /* We're decompressing */
226     } else {
227         /* declare dummy variables */
228         size_t cbytes, blocksize;
229 
230         free(outbuf);
231 
232         /* Extract the exact outbuf_size from the buffer header.
233          *
234          * NOTE: the guess value got from "cd_values" corresponds to the
235          * uncompressed chunk size but it should not be used in a general
236          * cases since other filters in the pipeline can modify the buffere
237          *  size.
238          */
239         blosc_cbuffer_sizes(*buf, &outbuf_size, &cbytes, &blocksize);
240 
241 #ifdef BLOSC_DEBUG
242         fprintf(stderr, "Blosc: Decompress %zd chunk w/buffer %zd\n", nbytes, outbuf_size);
243 #endif
244 
245         outbuf = malloc(outbuf_size);
246 
247         if(outbuf == NULL){
248           PUSH_ERR("blosc_filter", H5E_CALLBACK, "Can't allocate decompression buffer");
249           goto failed;
250         }
251 
252         status = blosc_decompress(*buf, outbuf, outbuf_size);
253         if(status <= 0){    /* decompression failed */
254           PUSH_ERR("blosc_filter", H5E_CALLBACK, "Blosc decompression error");
255           goto failed;
256         } /* if !status */
257 
258     } /* compressing vs decompressing */
259 
260     if(status != 0){
261         free(*buf);
262         *buf = outbuf;
263         *buf_size = outbuf_size;
264         return status;  /* Size of compressed/decompressed data */
265     }
266 
267  failed:
268     free(outbuf);
269     return 0;
270 
271 } /* End filter function */
272