1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  * Copyright by The HDF Group.                                               *
3  * Copyright by the Board of Trustees of the University of Illinois.         *
4  * All rights reserved.                                                      *
5  *                                                                           *
6  * This file is part of HDF5.  The full HDF5 copyright notice, including     *
7  * terms governing use, modification, and redistribution, is contained in    *
8  * the COPYING file, which can be found at the root of the source code       *
9  * distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases.  *
10  * If you do not have access to either file, you may request a copy from     *
11  * help@hdfgroup.org.                                                        *
12  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
13 
14 #include "h5repack.h"
15 #include "h5tools.h"
16 #include "h5tools_utils.h"
17 
18 /*-------------------------------------------------------------------------
19  * Function: parse_filter
20  *
21  * Purpose: read filter information
22  *
23  * Return: a list of names, the number of names and its compression type
24  *
25  * <name of filter> can be:
26  *  GZIP, to apply the HDF5 GZIP filter (GZIP compression)
27  *  SZIP, to apply the HDF5 SZIP filter (SZIP compression)
28  *  SHUF, to apply the HDF5 shuffle filter
29  *  FLET, to apply the HDF5 checksum filter
30  *  NBIT, to apply the HDF5 NBIT filter (NBIT compression)
31  *  SOFF, to apply the HDF5 scale+offset filter (compression)
32  *  UD, to apply a User Defined filter k,m,n1[,…,nm]
33  *  NONE, to remove the filter
34  *
35  * Examples:
36  * "GZIP=6"
37  * "A,B:NONE"
38  *-------------------------------------------------------------------------
39  */
parse_filter(const char * str,unsigned * n_objs,filter_info_t * filt,pack_opt_t * options,int * is_glb)40 obj_list_t* parse_filter(const char *str, unsigned *n_objs, filter_info_t *filt,
41         pack_opt_t *options, int *is_glb) {
42     size_t      i, m, u;
43     char        c;
44     size_t      len = HDstrlen(str);
45     int         f, k, l, p, q, end_obj = -1, no_param = 0;
46     unsigned    j, n;
47     char        sobj[MAX_NC_NAME];
48     char        scomp[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
49     char        stype[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
50     char        smask[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
51     obj_list_t* obj_list = NULL;
52     unsigned    pixels_per_block;
53 
54     /* initialize compression  info */
55     HDmemset(filt, 0, sizeof(filter_info_t));
56     *is_glb = 0;
57 
58     /* check for the end of object list and number of objects */
59     for (i = 0, n = 0; i < len; i++) {
60         c = str[i];
61         if (c == ':') {
62             end_obj = (int) i;
63             break;
64         }
65         if (c == ',')
66             n++;
67     }
68     n++;
69 
70     /* Check for missing : */
71     if (end_obj == -1) {
72         /* apply to all objects */
73         options->all_filter = 1;
74         *is_glb = 1;
75         *n_objs = 1;
76     }
77     else
78         *n_objs = n;
79 
80     obj_list = (obj_list_t *) HDmalloc(n * sizeof(obj_list_t));
81     if (obj_list == NULL) {
82         error_msg("could not allocate object list\n");
83         return NULL;
84     }
85 
86     /* get object list */
87     if (end_obj > 0)
88         for (j = 0, k = 0, n = 0; j < (unsigned) end_obj; j++, k++) {
89             c = str[j];
90             sobj[k] = c;
91             if (c == ',' || j == (unsigned) (end_obj - 1)) {
92                 if (c == ',')
93                     sobj[k] = '\0';
94                 else
95                     sobj[k + 1] = '\0';
96 
97                 HDstrcpy(obj_list[n].obj, sobj);
98                 HDmemset(sobj, 0, sizeof(sobj));
99                 n++;
100                 k = -1;
101             }
102         }
103     /* nothing after : */
104     if (end_obj + 1 == (int) len) {
105         if (obj_list)
106             HDfree(obj_list);
107         error_msg("input Error: Invalid compression type in <%s>\n", str);
108         HDexit(EXIT_FAILURE);
109     }
110 
111     /* get filter additional parameters */
112     m = 0;
113     for (i = (size_t)(end_obj + 1), k = 0, j = 0; i < len; i++, k++) {
114         c = str[i];
115         scomp[k] = c;
116         if (c == '=' || i == len - 1) {
117             if (c == '=') { /*one more parameter */
118                 scomp[k] = '\0'; /*cut space */
119                 /*-------------------------------------------------------------------------
120                 * H5Z_FILTER_SZIP
121                 * szip has the format SZIP=<pixels per block,coding>
122                 * pixels per block is a even number in 2-32 and coding method is 'EC' or 'NN'
123                 * example SZIP=8,NN
124                 *-------------------------------------------------------------------------
125                 */
126                 if (HDstrcmp(scomp, "SZIP") == 0) {
127                     l = -1; /* mask index check */
128                     for (m = 0, u = i + 1; u < len; u++, m++) {
129                         if (str[u] == ',') {
130                             stype[m] = '\0'; /* end digit of szip */
131                             l = 0; /* start EC or NN search */
132                             u++; /* skip ',' */
133                         }
134                         c = str[u];
135                         if (!HDisdigit(c) && l == -1) {
136                             if (obj_list)
137                                 HDfree(obj_list);
138                             error_msg("compression parameter not digit in <%s>\n", str);
139                             HDexit(EXIT_FAILURE);
140                         }
141                         if (l == -1)
142                             stype[m] = c;
143                         else {
144                             smask[l] = c;
145                             l++;
146                             if (l == 2) {
147                                 smask[l] = '\0';
148                                 i = len - 1; /* end */
149                                 if (HDstrcmp(smask,"NN") == 0)
150                                     filt->cd_values[j++] = H5_SZIP_NN_OPTION_MASK;
151                                 else if (HDstrcmp(smask,"EC") == 0)
152                                     filt->cd_values[j++] = H5_SZIP_EC_OPTION_MASK;
153                                 else {
154                                     error_msg("szip mask must be 'NN' or 'EC' \n");
155                                     HDexit(EXIT_FAILURE);
156                                 }
157                             }
158                         }
159                     } /* u */
160                 } /*if */
161 
162                 /*-------------------------------------------------------------------------
163                 * H5Z_FILTER_SCALEOFFSET
164                 * scaleoffset has the format SOFF=<scale_factor,scale_type>
165                 * scale_type can be
166                 *   integer datatype, H5Z_SO_INT (IN)
167                 *   float datatype using D-scaling method, H5Z_SO_FLOAT_DSCALE  (DS)
168                 *   float datatype using E-scaling method, H5Z_SO_FLOAT_ESCALE  (ES) , not yet implemented
169                 * for integer datatypes, scale_factor denotes Minimum Bits
170                 * for float datatypes, scale_factor denotes decimal scale factor
171                 *  examples
172                 *  SOFF=31,IN
173                 *  SOFF=3,DF
174                 *-------------------------------------------------------------------------
175                 */
176                 else if (HDstrcmp(scomp, "SOFF") == 0) {
177                     l = -1; /* mask index check */
178                     for (m = 0, u = i + 1; u < len; u++, m++) {
179                         if (str[u] == ',') {
180                             stype[m] = '\0'; /* end digit */
181                             l = 0; /* start 'IN' , 'DS', or 'ES' search */
182                             u++; /* skip ',' */
183                         }
184                         c = str[u];
185                         if (!HDisdigit(c) && l == -1) {
186                             if (obj_list)
187                                 HDfree(obj_list);
188                             error_msg("compression parameter is not a digit in <%s>\n", str);
189                             HDexit(EXIT_FAILURE);
190                         }
191                         if (l == -1)
192                             stype[m] = c;
193                         else {
194                             smask[l] = c;
195                             l++;
196                             if (l == 2) {
197                                 smask[l] = '\0';
198                                 i = len - 1; /* end */
199                                 if (HDstrcmp(smask,"IN") == 0)
200                                     filt->cd_values[j++] = H5Z_SO_INT;
201                                 else if (HDstrcmp(smask, "DS") == H5Z_SO_FLOAT_DSCALE)
202                                     filt->cd_values[j++] = H5Z_SO_FLOAT_DSCALE;
203                                 else {
204                                     error_msg("scale type must be 'IN' or 'DS' \n");
205                                     HDexit(EXIT_FAILURE);
206                                 }
207                             }
208                         }
209                     } /* u */
210                 } /*if */
211 
212                 /*-------------------------------------------------------------------------
213                 * User Defined
214                 *   has the format UD=<filter_number,filter_flag,cd_value_count,value_1[,value_2,...,value_N]>
215                 *  BZIP2 example
216                 *  UD=307,0,1,9
217                 *-------------------------------------------------------------------------
218                 */
219                 else if (HDstrcmp(scomp, "UD") == 0) {
220                     l = -1; /* filter number index check */
221                     f = -1; /* filter flag index check */
222                     p = -1; /* CD_VAL count check */
223                     for (m = 0, q = 0, u = i + 1; u < len; u++, m++, q++) {
224                         if (str[u] == ',') {
225                             stype[q] = '\0'; /* end digit */
226                             if (l == -1) {
227                                 filt->filtn = HDatoi(stype);
228                                 l = 0;
229                             }
230                             else if (f == -1) {
231                                 filt->filt_flag = HDstrtoul(stype, NULL, 0);
232                                 f = 0;
233                             }
234                             else if (p == -1) {
235                                 filt->cd_nelmts = HDstrtoull(stype, NULL, 0);
236                                 p = 0;
237                             }
238                             else {
239                                 filt->cd_values[j++] = (unsigned)HDstrtoul(stype, NULL, 0);
240                             }
241                             q = 0;
242                             u++; /* skip ',' */
243                         }
244                         c = str[u];
245                         if (!HDisdigit(c) && l == -1) {
246                             if (obj_list)
247                                 HDfree(obj_list);
248                             error_msg("filter number parameter is not a digit in <%s>\n", str);
249                             HDexit(EXIT_FAILURE);
250                         }
251                         else if (!HDisdigit(c) && f == -1) {
252                             if (obj_list)
253                                 HDfree(obj_list);
254                             error_msg("filter flag parameter is not a digit in <%s>\n", str);
255                             HDexit(EXIT_FAILURE);
256                         }
257                         stype[q] = c;
258                     } /* for u */
259                     stype[q] = '\0';
260                 } /*if */
261 
262                 /*-------------------------------------------------------------------------
263                 * all other filters
264                 *-------------------------------------------------------------------------
265                 */
266                 else {
267                     /* here we could have 1 or 2 digits  */
268                     for (m = 0, u = i + 1; u < len; u++, m++) {
269                         c = str[u];
270                         if (!HDisdigit(c)) {
271                             if (obj_list)
272                                 HDfree(obj_list);
273                             error_msg("compression parameter is not a digit in <%s>\n", str);
274                             HDexit(EXIT_FAILURE);
275                         }
276                         stype[m] = c;
277                     } /* u */
278 
279                     stype[m] = '\0';
280                 } /*if */
281 
282                 filt->cd_values[j++] = (unsigned) HDstrtoul(stype, NULL, 0);
283                 if(filt->cd_nelmts == 0)
284                     j = 0;
285                 i += m; /* jump */
286             }
287             else if (i == len - 1) { /*no more parameters */
288                 scomp[k + 1] = '\0';
289                 no_param = 1;
290             }
291 
292             /*-------------------------------------------------------------------------
293             * translate from string to filter symbol
294             *-------------------------------------------------------------------------
295             */
296 
297             /*-------------------------------------------------------------------------
298             * H5Z_FILTER_NONE
299             *-------------------------------------------------------------------------
300             */
301             if (HDstrcmp(scomp, "NONE") == 0) {
302                 filt->filtn = H5Z_FILTER_NONE;
303                 filt->cd_nelmts = 0;
304             }
305 
306             /*-------------------------------------------------------------------------
307             * H5Z_FILTER_DEFLATE
308             *-------------------------------------------------------------------------
309             */
310             else if (HDstrcmp(scomp, "GZIP") == 0) {
311                 filt->filtn = H5Z_FILTER_DEFLATE;
312                 filt->cd_nelmts = 1;
313                 if (no_param) { /*no more parameters, GZIP must have parameter */
314                     if (obj_list)
315                         HDfree(obj_list);
316                     error_msg("missing compression parameter in <%s>\n", str);
317                     HDexit(EXIT_FAILURE);
318                 }
319             }
320 
321             /*-------------------------------------------------------------------------
322             * H5Z_FILTER_SZIP
323             *-------------------------------------------------------------------------
324             */
325             else if (HDstrcmp(scomp, "SZIP") == 0) {
326                 filt->filtn = H5Z_FILTER_SZIP;
327                 filt->cd_nelmts = 2;
328                 if (no_param) { /*no more parameters, SZIP must have parameter */
329                     if (obj_list)
330                         HDfree(obj_list);
331                     error_msg("missing compression parameter in <%s>\n", str);
332                     HDexit(EXIT_FAILURE);
333                 }
334             }
335 
336             /*-------------------------------------------------------------------------
337             * H5Z_FILTER_SHUFFLE
338             *-------------------------------------------------------------------------
339             */
340             else if (HDstrcmp(scomp, "SHUF") == 0) {
341                 filt->filtn = H5Z_FILTER_SHUFFLE;
342                 filt->cd_nelmts = 0;
343                 if (m > 0) { /*shuffle does not have parameter */
344                     if (obj_list)
345                         HDfree(obj_list);
346                     error_msg("extra parameter in SHUF <%s>\n", str);
347                     HDexit(EXIT_FAILURE);
348                 }
349             }
350             /*-------------------------------------------------------------------------
351             * H5Z_FILTER_FLETCHER32
352             *-------------------------------------------------------------------------
353             */
354             else if (HDstrcmp(scomp, "FLET") == 0) {
355                 filt->filtn = H5Z_FILTER_FLETCHER32;
356                 filt->cd_nelmts = 0;
357                 if (m > 0) { /*shuffle does not have parameter */
358                     if (obj_list)
359                         HDfree(obj_list);
360                     error_msg("extra parameter in FLET <%s>\n", str);
361                     HDexit(EXIT_FAILURE);
362                 }
363             }
364             /*-------------------------------------------------------------------------
365             * H5Z_FILTER_NBIT
366             *-------------------------------------------------------------------------
367             */
368             else if (HDstrcmp(scomp, "NBIT") == 0) {
369                 filt->filtn = H5Z_FILTER_NBIT;
370                 filt->cd_nelmts = 0;
371                 if (m > 0) { /*nbit does not have parameter */
372                     if (obj_list)
373                         HDfree(obj_list);
374                     error_msg("extra parameter in NBIT <%s>\n", str);
375                     HDexit(EXIT_FAILURE);
376                 }
377             }
378             /*-------------------------------------------------------------------------
379             * H5Z_FILTER_SCALEOFFSET
380             *-------------------------------------------------------------------------
381             */
382             else if (HDstrcmp(scomp, "SOFF") == 0) {
383                 filt->filtn = H5Z_FILTER_SCALEOFFSET;
384                 filt->cd_nelmts = 2;
385                 if (no_param) { /*no more parameters, SOFF must have parameter */
386                     if (obj_list)
387                         HDfree(obj_list);
388                     error_msg("missing compression parameter in <%s>\n", str);
389                     HDexit(EXIT_FAILURE);
390                 }
391             }
392             /*-------------------------------------------------------------------------
393             * User Defined Filter
394             *-------------------------------------------------------------------------
395             */
396             else if (HDstrcmp(scomp, "UD") == 0) {
397                 /* parameters does not match count */
398                 if (filt->cd_nelmts != j) {
399                     if (obj_list)
400                         HDfree(obj_list);
401                     error_msg("incorrect number of compression parameters in <%s>\n", str);
402                     HDexit(EXIT_FAILURE);
403                 }
404             }
405             else {
406                 if (obj_list)
407                     HDfree(obj_list);
408                 error_msg("invalid filter type in <%s>\n", str);
409                 HDexit(EXIT_FAILURE);
410             }
411             break;
412         }
413     } /*i*/
414 
415     /*-------------------------------------------------------------------------
416     * check valid parameters
417     *-------------------------------------------------------------------------
418     */
419 
420     switch (filt->filtn) {
421     /*-------------------------------------------------------------------------
422     * H5Z_FILTER_DEFLATE
423     *-------------------------------------------------------------------------
424     */
425     case H5Z_FILTER_DEFLATE:
426         if (filt->cd_values[0] > 9) {
427             if (obj_list)
428                 HDfree(obj_list);
429             error_msg("invalid compression parameter in <%s>\n", str);
430             HDexit(EXIT_FAILURE);
431         }
432         break;
433         /*-------------------------------------------------------------------------
434         * H5Z_FILTER_SZIP
435         *-------------------------------------------------------------------------
436         */
437     case H5Z_FILTER_SZIP:
438         pixels_per_block = filt->cd_values[0];
439         if ((pixels_per_block % 2) == 1) {
440             if (obj_list)
441                 HDfree(obj_list);
442             error_msg("pixels_per_block is not even in <%s>\n", str);
443             HDexit(EXIT_FAILURE);
444         }
445         if (pixels_per_block > H5_SZIP_MAX_PIXELS_PER_BLOCK) {
446             if (obj_list)
447                 HDfree(obj_list);
448             error_msg("pixels_per_block is too large in <%s>\n", str);
449             HDexit(EXIT_FAILURE);
450         }
451         if ((HDstrcmp(smask,"NN") != 0) && (HDstrcmp(smask,"EC") != 0)) {
452             if (obj_list)
453                 HDfree(obj_list);
454             error_msg("szip mask must be 'NN' or 'EC' \n");
455             HDexit(EXIT_FAILURE);
456         }
457         break;
458     default:
459         break;
460     };
461 
462     return obj_list;
463 }
464 
465 
466 /*-------------------------------------------------------------------------
467  * Function: parse_layout
468  *
469  * Purpose: read layout info
470  *
471  * Return: a list of names, the number of names and its chunking info for
472  *  chunked. NULL, on error
473  * the layout type can be:
474  *  CHUNK, to apply chunking layout
475  *  CONTI, to apply contiguous layout
476  *  COMPA, to apply compact layout
477  *
478  * Example:
479  * "AA,B,CDE:CHUNK=10X10"
480  *
481  * Programmer: Pedro Vicente, pvn@ncsa.uiuc.edu
482  *
483  * Date: December 30, 2003
484  *
485  *-------------------------------------------------------------------------
486  */
parse_layout(const char * str,unsigned * n_objs,pack_info_t * pack,pack_opt_t * options)487 obj_list_t* parse_layout(const char *str, unsigned *n_objs, pack_info_t *pack, /* info about layout needed */
488 pack_opt_t *options) {
489     obj_list_t* obj_list = NULL;
490     unsigned    i, j, n;
491     char        c;
492     size_t      len = HDstrlen(str);
493     int         k, end_obj = -1, c_index;
494     char        sobj[MAX_NC_NAME];
495     char        sdim[10];
496     char        slayout[10];
497 
498     HDmemset(sdim, '\0', sizeof(sdim));
499     HDmemset(sobj, '\0', sizeof(sobj));
500     HDmemset(slayout, '\0', sizeof(slayout));
501 
502     /* check for the end of object list and number of objects */
503     for (i = 0, n = 0; i < len; i++) {
504         c = str[i];
505         if (c == ':')
506             end_obj = (int) i;
507         if (c == ',')
508             n++;
509     }
510 
511     if (end_obj == -1) { /* missing : chunk all */
512         options->all_layout = 1;
513     }
514 
515     n++;
516     obj_list = (obj_list_t*) HDmalloc(n * sizeof(obj_list_t));
517     if (obj_list == NULL) {
518         error_msg("could not allocate object list\n");
519         return NULL;
520     }
521     *n_objs = n;
522 
523     /* get object list */
524     if (end_obj > 0)
525         for (j = 0, k = 0, n = 0; j < (unsigned) end_obj; j++, k++) {
526             c = str[j];
527             sobj[k] = c;
528             if (c == ',' || j == (unsigned) (end_obj - 1)) {
529                 if (c == ',')
530                     sobj[k] = '\0';
531                 else
532                     sobj[k + 1] = '\0';
533                 HDstrcpy(obj_list[n].obj, sobj);
534                 HDmemset(sobj, 0, sizeof(sobj));
535                 n++;
536                 k = -1;
537             }
538         }
539 
540     /* nothing after : */
541     if (end_obj + 1 == (int) len) {
542         if (obj_list)
543             HDfree(obj_list);
544         error_msg("in parse layout, no characters after : in <%s>\n", str);
545         HDexit(EXIT_FAILURE);
546     }
547 
548     /* get layout info */
549     for (j = (unsigned) (end_obj + 1), n = 0; n <= 5; j++, n++) {
550         if (n == 5) {
551             slayout[n] = '\0'; /*cut string */
552             if (HDstrcmp(slayout, "COMPA") == 0)
553                 pack->layout = H5D_COMPACT;
554             else if (HDstrcmp(slayout, "CONTI") == 0)
555                 pack->layout = H5D_CONTIGUOUS;
556             else if (HDstrcmp(slayout, "CHUNK") == 0)
557                 pack->layout = H5D_CHUNKED;
558             else {
559                 error_msg("in parse layout, not a valid layout in <%s>\n", str);
560                 HDexit(EXIT_FAILURE);
561             }
562         }
563         else {
564             c = str[j];
565             slayout[n] = c;
566         }
567     } /* j */
568 
569     if (pack->layout == H5D_CHUNKED) {
570         /*-------------------------------------------------------------------------
571         * get chunk info
572         *-------------------------------------------------------------------------
573         */
574         k = 0;
575         if (j > len) {
576             if (obj_list)
577                 HDfree(obj_list);
578             error_msg("in parse layout,  <%s> Chunk dimensions missing\n", str);
579             HDexit(EXIT_FAILURE);
580         }
581 
582         for (i = j, c_index = 0; i < len; i++) {
583             c = str[i];
584             sdim[k] = c;
585             k++; /*increment sdim index */
586 
587             if (!HDisdigit(c) && c != 'x' && c != 'N' && c != 'O' && c != 'N' && c != 'E') {
588                 if (obj_list)
589                     HDfree(obj_list);
590                 error_msg("in parse layout, <%s> Not a valid character in <%s>\n", sdim, str);
591                 HDexit(EXIT_FAILURE);
592             }
593 
594             if (c == 'x' || i == len - 1) {
595                 if (c == 'x') {
596                     sdim[k - 1] = '\0';
597                     k = 0;
598                     pack->chunk.chunk_lengths[c_index] = HDstrtoull(sdim, NULL, 0);
599                     if (pack->chunk.chunk_lengths[c_index] == 0) {
600                         if (obj_list)
601                             HDfree(obj_list);
602                         error_msg("in parse layout, <%s> conversion to number in <%s>\n", sdim, str);
603                         HDexit(EXIT_FAILURE);
604                     }
605                     c_index++;
606                 }
607                 else if (i == len - 1) { /*no more parameters */
608                     sdim[k] = '\0';
609                     k = 0;
610                     if (HDstrcmp(sdim,"NONE") == 0) {
611                         pack->chunk.rank = -2;
612                     }
613                     else {
614                         pack->chunk.chunk_lengths[c_index] = HDstrtoull(sdim, NULL, 0);
615                         if (pack->chunk.chunk_lengths[c_index] == 0) {
616                             if (obj_list)
617                                 HDfree(obj_list);
618                             error_msg("in parse layout, <%s> conversion to number in <%s>\n", sdim, str);
619                             HDexit(EXIT_FAILURE);
620                         }
621                         pack->chunk.rank = c_index + 1;
622                     }
623                 } /*if */
624             } /*if c=='x' || i==len-1 */
625         } /*i*/
626     } /*H5D_CHUNKED*/
627 
628     return obj_list;
629 }
630