1 /** \file
2   This header file contains the definitions of structs used to hold
3   netCDF file metadata in memory.
4 
5   Copyright 2005-2011 University Corporation for Atmospheric
6   Research/Unidata.
7 */
8 
9 #ifndef _NC4INTERNAL_
10 #define _NC4INTERNAL_
11 
12 #include "config.h"
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <ctype.h>
16 #include <stdarg.h>
17 #include <string.h>
18 #include <vtk_hdf5.h>
19 
20 
21 #include "ncdimscale.h"
22 #include "nc_logging.h"
23 
24 #ifdef USE_PARALLEL
25 #include "netcdf_par.h"
26 #endif /* USE_PARALLEL */
27 #include "xxxnetcdf.h"
28 #include "netcdf_f.h"
29 
30 /* Always needed */
31 #include "nc.h"
32 
33 #ifdef USE_HDF4
34 #include <mfhdf.h>
35 #endif
36 
37 #define FILE_ID_MASK (0xffff0000)
38 #define GRP_ID_MASK (0x0000ffff)
39 #define ID_SHIFT (16)
40 
41 typedef enum {GET, PUT} NC_PG_T;
42 typedef enum {VAR, DIM, ATT} NC_OBJ_T;
43 
44 #define NC_MAX_HDF5_NAME (NC_MAX_NAME + 10)
45 #define NC_V2_ERR (-1)
46 
47 /* The name of the root group. */
48 #define NC_GROUP_NAME "/"
49 
50 #define MEGABYTE 1048576
51 
52 /*
53  * limits of the external representation
54  */
55 #define X_SCHAR_MIN	(-128)
56 #define X_SCHAR_MAX	127
57 #define X_UCHAR_MAX	255U
58 #define X_SHORT_MIN	(-32768)
59 #define X_SHRT_MIN	X_SHORT_MIN	/* alias compatible with limits.h */
60 #define X_SHORT_MAX	32767
61 #define X_SHRT_MAX	X_SHORT_MAX	/* alias compatible with limits.h */
62 #define X_USHORT_MAX	65535U
63 #define X_USHRT_MAX	X_USHORT_MAX	/* alias compatible with limits.h */
64 #define X_INT_MIN	(-2147483647-1)
65 #define X_INT_MAX	2147483647
66 #define X_LONG_MIN	X_INT_MIN
67 #define X_LONG_MAX	X_INT_MAX
68 #define X_UINT_MAX	4294967295U
69 #define X_INT64_MIN	(-9223372036854775807LL-1LL)
70 #define X_INT64_MAX	9223372036854775807LL
71 #define X_UINT64_MAX	18446744073709551615ULL
72 #ifdef WIN32 /* Windows, of course, has to be a *little* different. */
73 #define X_FLOAT_MAX	3.402823466e+38f
74 #else
75 #define X_FLOAT_MAX	3.40282347e+38f
76 #endif /* WIN32 */
77 #define X_FLOAT_MIN	(-X_FLOAT_MAX)
78 #define X_DOUBLE_MAX	1.7976931348623157e+308
79 #define X_DOUBLE_MIN	(-X_DOUBLE_MAX)
80 
81 /* These have to do with creating chuncked datasets in HDF5. */
82 #define NC_HDF5_UNLIMITED_DIMSIZE (0)
83 #define NC_HDF5_CHUNKSIZE_FACTOR (10)
84 #define NC_HDF5_MIN_CHUNK_SIZE (2)
85 
86 #define NC_EMPTY_SCALE "NC_EMPTY_SCALE"
87 
88 /* This is an attribute I had to add to handle multidimensional
89  * coordinate variables. */
90 #define COORDINATES "_Netcdf4Coordinates"
91 #define COORDINATES_LEN (NC_MAX_NAME * 5)
92 
93 /* This is used when the user defines a non-coordinate variable with
94  * same name as a dimension. */
95 #define NON_COORD_PREPEND "_nc4_non_coord_"
96 
97 /* An attribute in the HDF5 root group of this name means that the
98  * file must follow strict netCDF classic format rules. */
99 #define NC3_STRICT_ATT_NAME "_nc3_strict"
100 
101 /* If this attribute is present on a dimscale variable, use the value
102  * as the netCDF dimid. */
103 #define NC_DIMID_ATT_NAME "_Netcdf4Dimid"
104 
105 /* Boolean type, to make the code easier to read */
106 typedef enum {NC_FALSE = 0, NC_TRUE = 1} nc_bool_t;
107 
108 /*Forward*/
109 struct NCFILEINFO;
110 
111 /* Generic doubly-linked list node */
112 typedef struct NC_LIST_NODE
113 {
114    void *next;
115    void *prev;
116 } NC_LIST_NODE_T;
117 
118 /* This is a struct to handle the dim metadata. */
119 typedef struct NC_DIM_INFO
120 {
121    NC_LIST_NODE_T l;            /* Use generic doubly-linked list (must be first) */
122    char *name;
123    size_t len;
124    uint32_t hash;
125    int dimid;
126    nc_bool_t unlimited;         /* True if the dimension is unlimited */
127    nc_bool_t extended;          /* True if the dimension needs to be extended */
128    nc_bool_t too_long;          /* True if len is too big to fit in local size_t. */
129    hid_t hdf_dimscaleid;
130    HDF5_OBJID_T hdf5_objid;
131    struct NC_VAR_INFO *coord_var; /* The coord var, if it exists. */
132 } NC_DIM_INFO_T;
133 
134 typedef struct NC_ATT_INFO
135 {
136    NC_LIST_NODE_T l;            /* Use generic doubly-linked list (must be first) */
137    char *name;
138    int len;
139    nc_bool_t dirty;             /* True if attribute modified */
140    nc_bool_t created;           /* True if attribute already created */
141    nc_type nc_typeid;           /* netCDF type of attribute's data */
142    hid_t native_hdf_typeid;     /* Native HDF5 datatype for attribute's data */
143    int attnum;
144    void *data;
145    nc_vlen_t *vldata; /* only used for vlen */
146    char **stdata; /* only for string type. */
147 } NC_ATT_INFO_T;
148 
149 /* This is a struct to handle the var metadata. */
150 typedef struct NC_VAR_INFO
151 {
152    char *name;
153    char *hdf5_name; /* used if different from name */
154    int ndims;
155    int *dimids;
156    NC_DIM_INFO_T **dim;
157    int varid;
158    int natts;
159    uint32_t hash;
160    nc_bool_t is_new_var;        /* True if variable is newly created */
161    nc_bool_t was_coord_var;     /* True if variable was a coordinate var, but either the dim or var has been renamed */
162    nc_bool_t became_coord_var;  /* True if variable _became_ a coordinate var, because either the dim or var has been renamed */
163    nc_bool_t fill_val_changed;  /* True if variable's fill value changes after it has been created */
164    nc_bool_t attr_dirty;        /* True if variable's attributes are dirty and should be rewritten */
165    nc_bool_t created;           /* Variable has already been created (_not_ that it was just created) */
166    nc_bool_t written_to;        /* True if variable has data written to it */
167    struct NC_TYPE_INFO *type_info;
168    hid_t hdf_datasetid;
169    NC_ATT_INFO_T *att;
170    nc_bool_t no_fill;           /* True if no fill value is defined for var */
171    void *fill_value;
172    size_t *chunksizes;
173    nc_bool_t contiguous;        /* True if variable is stored contiguously in HDF5 file */
174    int parallel_access;         /* Type of parallel access for I/O on variable (collective or independent) */
175    nc_bool_t dimscale;          /* True if var is a dimscale */
176    nc_bool_t *dimscale_attached;        /* Array of flags that are true if dimscale is attached for that dim index */
177    HDF5_OBJID_T *dimscale_hdf5_objids;
178    nc_bool_t deflate;           /* True if var has deflate filter applied */
179    int deflate_level;
180    nc_bool_t shuffle;           /* True if var has shuffle filter applied */
181    nc_bool_t fletcher32;        /* True if var has fletcher32 filter applied */
182    nc_bool_t szip;              /* True if var has szip filter applied */
183    int options_mask;
184    int pixels_per_block;
185    size_t chunk_cache_size, chunk_cache_nelems;
186    float chunk_cache_preemption;
187 #ifdef USE_HDF4
188    /* Stuff below is for hdf4 files. */
189    int sdsid;
190    int hdf4_data_type;
191 #endif /* USE_HDF4 */
192    /* Stuff below for diskless data files. */
193    void *diskless_data;
194 } NC_VAR_INFO_T;
195 
196 typedef struct NC_FIELD_INFO
197 {
198    NC_LIST_NODE_T l;            /* Use generic doubly-linked list (must be first) */
199    nc_type nc_typeid;
200    hid_t hdf_typeid;
201    hid_t native_hdf_typeid;
202    size_t offset;
203    char *name;
204    int fieldid;                 /* ID (index?) of field */
205    int ndims;
206    int *dim_size;
207 } NC_FIELD_INFO_T;
208 
209 typedef struct NC_ENUM_MEMBER_INFO
210 {
211    NC_LIST_NODE_T l;            /* Use generic doubly-linked list (must be first) */
212    char *name;
213    void *value;
214 } NC_ENUM_MEMBER_INFO_T;
215 
216 typedef struct NC_TYPE_INFO
217 {
218    NC_LIST_NODE_T l;            /* Use generic doubly-linked list (must be first) */
219    char *name;
220    nc_type nc_typeid;           /* netCDF type ID, equivalent to a pre-defined type
221                                  * for atomic types, but a dynamically
222                                  * defined value for user-defined types (stored
223                                  * as named datatypes in the HDF5 file).
224                                  */
225    unsigned rc;                 /* Ref. count of objects using this type */
226    hid_t hdf_typeid;            /* HDF5 type ID, in the file */
227    hid_t native_hdf_typeid;     /* HDF5 type ID, in memory */
228    int endianness;              /* What endianness for the type? */
229                                 /* (Set for integer types as well as "complex"
230                                  *  types, like compound/enum/vlen, used for the
231                                  *  endianness of the fields and/or base type)
232                                  */
233    size_t size;                 /* Size of the type in memory, in bytes */
234    nc_bool_t committed;         /* True when datatype is committed in the file */
235    nc_type nc_type_class;       /* NC_VLEN, NC_COMPOUND, NC_OPAQUE, or NC_ENUM
236                                  * NOTE: NC_INT is used for all integer types,
237                                  *      NC_FLOAT is used for all floating-point
238                                  *      types, and NC_STRING is also used for
239                                  *      fixed- and variable-length strings.
240                                  *      (NC_CHAR is used for characters though)
241                                  *
242                                  *      This is somewhat redundant with the
243                                  *      nc_type field, but allows the code to
244                                  *      have a single location to look at for
245                                  *      the "kind" of a type.
246                                  */
247 
248    /* Information for each type or class */
249    union {
250       struct {
251          int num_members;
252          NC_ENUM_MEMBER_INFO_T *enum_member;
253          nc_type base_nc_typeid;
254          hid_t base_hdf_typeid;
255       } e;                      /* Enum */
256       struct {
257          int num_fields;
258          NC_FIELD_INFO_T *field;
259       } c;                      /* Compound */
260       struct {
261          nc_type base_nc_typeid;
262          hid_t base_hdf_typeid;
263       } v;                      /* Variable-length */
264    } u;                         /* Union of structs, for each type/class */
265 } NC_TYPE_INFO_T;
266 
267 typedef struct NC_VAR_ARRAY_T {
268 	size_t nalloc;		/* number allocated >= nelems */
269 	size_t nelems;		/* length of the array */
270 	NC_VAR_INFO_T **value;
271 } NC_VAR_ARRAY_T;
272 
273 /* This holds information for one group. Groups reproduce with
274  * parthenogenesis. */
275 typedef struct NC_GRP_INFO
276 {
277    NC_LIST_NODE_T l;            /* Use generic doubly-linked list (must be first) */
278    char *name;
279    hid_t hdf_grpid;
280    int nc_grpid;
281    struct NC_HDF5_FILE_INFO *nc4_info;
282    struct NC_GRP_INFO *parent;
283    struct NC_GRP_INFO *children;
284    NC_VAR_ARRAY_T vars;
285    NC_DIM_INFO_T *dim;
286    NC_ATT_INFO_T *att;
287    NC_TYPE_INFO_T *type;
288    int nvars;
289    int natts;
290 } NC_GRP_INFO_T;
291 
292 /* These constants apply to the cmode parameter in the
293  * HDF5_FILE_INFO_T defined below. */
294 #define NC_CREAT 2	/* in create phase, cleared by ncendef */
295 #define NC_INDEF 8	/* in define mode, cleared by ncendef */
296 #define NC_NSYNC 0x10	/* synchronise numrecs on change */
297 #define NC_HSYNC 0x20	/* synchronise whole header on change */
298 #define NC_NDIRTY 0x40	/* numrecs has changed */
299 #define NC_HDIRTY 0x80  /* header info has changed */
300 
301 
302 /* This is the metadata we need to keep track of for each
303    netcdf-4/HDF5 file. */
304 typedef struct  NC_HDF5_FILE_INFO
305 {
306    NC* controller;
307    hid_t hdfid;
308 #ifdef USE_PARALLEL4
309    MPI_Comm comm;    /* Copy of MPI Communicator used to open the file */
310    MPI_Info info;    /* Copy of MPI Information Object used to open the file */
311 #endif
312    int flags;
313    int cmode;
314    nc_bool_t parallel;          /* True if file is open for parallel access */
315    nc_bool_t redef;             /* True if redefining an existing file */
316    int fill_mode;               /* Fill mode for vars - Unused internally currently */
317    nc_bool_t no_write;          /* true if nc_open has mode NC_NOWRITE. */
318    NC_GRP_INFO_T *root_grp;
319    short next_nc_grpid;
320    NC_TYPE_INFO_T *type;
321    int next_typeid;
322    int next_dimid;
323 #ifdef USE_HDF4
324    nc_bool_t hdf4;              /* True for HDF4 file */
325    int sdid;
326 #endif /* USE_HDF4 */
327    struct NCFILEINFO* fileinfo;
328 } NC_HDF5_FILE_INFO_T;
329 
330 
331 /* Defined in lookup3.c */
332 extern uint32_t hash_fast(const void *key, size_t length);
333 
334 /* These functions convert between netcdf and HDF5 types. */
335 int nc4_get_typelen_mem(NC_HDF5_FILE_INFO_T *h5, nc_type xtype,
336 			int is_long, size_t *len);
337 int nc4_convert_type(const void *src, void *dest,
338 		     const nc_type src_type, const nc_type dest_type,
339 		     const size_t len, int *range_error,
340 		     const void *fill_value, int strict_nc3, int src_long,
341 		     int dest_long);
342 
343 /* These functions do HDF5 things. */
344 int rec_detach_scales(NC_GRP_INFO_T *grp, int dimid, hid_t dimscaleid);
345 int rec_reattach_scales(NC_GRP_INFO_T *grp, int dimid, hid_t dimscaleid);
346 int nc4_open_var_grp2(NC_GRP_INFO_T *grp, int varid, hid_t *dataset);
347 int nc4_put_vara(NC *nc, int ncid, int varid, const size_t *startp,
348 		 const size_t *countp, nc_type xtype, int is_long, void *op);
349 int nc4_get_vara(NC *nc, int ncid, int varid, const size_t *startp,
350 		 const size_t *countp, nc_type xtype, int is_long, void *op);
351 int nc4_rec_match_dimscales(NC_GRP_INFO_T *grp);
352 int nc4_rec_detect_need_to_preserve_dimids(NC_GRP_INFO_T *grp, nc_bool_t *bad_coord_orderp);
353 int nc4_rec_write_metadata(NC_GRP_INFO_T *grp, nc_bool_t bad_coord_order);
354 int nc4_rec_write_groups_types(NC_GRP_INFO_T *grp);
355 int nc4_enddef_netcdf4_file(NC_HDF5_FILE_INFO_T *h5);
356 int nc4_reopen_dataset(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var);
357 int nc4_adjust_var_cache(NC_GRP_INFO_T *grp, NC_VAR_INFO_T * var);
358 
359 /* The following functions manipulate the in-memory linked list of
360    metadata, without using HDF calls. */
361 int nc4_find_nc_grp_h5(int ncid, NC **nc, NC_GRP_INFO_T **grp,
362 		       NC_HDF5_FILE_INFO_T **h5);
363 int nc4_find_grp_h5(int ncid, NC_GRP_INFO_T **grp, NC_HDF5_FILE_INFO_T **h5);
364 int nc4_find_nc4_grp(int ncid, NC_GRP_INFO_T **grp);
365 NC_GRP_INFO_T *nc4_find_nc_grp(int ncid);
366 NC_GRP_INFO_T *nc4_rec_find_grp(NC_GRP_INFO_T *start_grp, int target_nc_grpid);
367 NC *nc4_find_nc_file(int ncid, NC_HDF5_FILE_INFO_T**);
368 int nc4_find_dim(NC_GRP_INFO_T *grp, int dimid, NC_DIM_INFO_T **dim, NC_GRP_INFO_T **dim_grp);
369 int nc4_find_var(NC_GRP_INFO_T *grp, const char *name, NC_VAR_INFO_T **var);
370 int nc4_find_dim_len(NC_GRP_INFO_T *grp, int dimid, size_t **len);
371 int nc4_find_type(const NC_HDF5_FILE_INFO_T *h5, int typeid1, NC_TYPE_INFO_T **type);
372 NC_TYPE_INFO_T *nc4_rec_find_nc_type(const NC_GRP_INFO_T *start_grp, nc_type target_nc_typeid);
373 NC_TYPE_INFO_T *nc4_rec_find_hdf_type(NC_GRP_INFO_T *start_grp, hid_t target_hdf_typeid);
374 NC_TYPE_INFO_T *nc4_rec_find_named_type(NC_GRP_INFO_T *start_grp, char *name);
375 NC_TYPE_INFO_T *nc4_rec_find_equal_type(NC_GRP_INFO_T *start_grp, int ncid1, NC_TYPE_INFO_T *type);
376 int nc4_find_nc_att(int ncid, int varid, const char *name, int attnum,
377 		    NC_ATT_INFO_T **att);
378 int nc4_find_g_var_nc(NC *nc, int ncid, int varid,
379 		      NC_GRP_INFO_T **grp, NC_VAR_INFO_T **var);
380 int nc4_find_grp_att(NC_GRP_INFO_T *grp, int varid, const char *name, int attnum,
381 		     NC_ATT_INFO_T **att);
382 int nc4_get_hdf_typeid(NC_HDF5_FILE_INFO_T *h5, nc_type xtype,
383 		       hid_t *hdf_typeid, int endianness);
384 int nc4_get_typeclass(const NC_HDF5_FILE_INFO_T *h5, nc_type xtype,
385                       int *type_class);
386 
387 /* Free various types */
388 int nc4_type_free(NC_TYPE_INFO_T *type);
389 
390 /* These list functions add and delete vars, atts. */
391 int nc4_nc4f_list_add(NC *nc, const char *path, int mode);
392 int nc4_var_add(NC_VAR_INFO_T **var);
393 int nc4_var_del(NC_VAR_INFO_T *var);
394 int nc4_dim_list_add(NC_DIM_INFO_T **list, NC_DIM_INFO_T **dim);
395 int nc4_dim_list_del(NC_DIM_INFO_T **list, NC_DIM_INFO_T *dim);
396 int nc4_att_list_add(NC_ATT_INFO_T **list, NC_ATT_INFO_T **att);
397 int nc4_type_list_add(NC_GRP_INFO_T *grp, size_t size, const char *name,
398                   NC_TYPE_INFO_T **type);
399 int nc4_field_list_add(NC_FIELD_INFO_T **list, int fieldid, const char *name,
400 		       size_t offset, hid_t field_hdf_typeid, hid_t native_typeid,
401 		       nc_type xtype, int ndims, const int *dim_sizesp);
402 void nc4_file_list_del(NC *nc);
403 int nc4_att_list_del(NC_ATT_INFO_T **list, NC_ATT_INFO_T *att);
404 int nc4_grp_list_add(NC_GRP_INFO_T **list, int new_nc_grpid, NC_GRP_INFO_T *parent_grp,
405 		     NC *nc, char *name, NC_GRP_INFO_T **grp);
406 int nc4_rec_grp_del(NC_GRP_INFO_T **list, NC_GRP_INFO_T *grp);
407 int nc4_enum_member_add(NC_ENUM_MEMBER_INFO_T **list, size_t size,
408 			const char *name, const void *value);
409 
410 /* Break & reform coordinate variables */
411 int nc4_break_coord_var(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *coord_var, NC_DIM_INFO_T *dim);
412 int nc4_reform_coord_var(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *coord_var, NC_DIM_INFO_T *dim);
413 
414 /* Check and normalize names. */
415 int NC_check_name(const char *name);
416 int nc4_check_name(const char *name, char *norm_name);
417 int nc4_normalize_name(const char *name, char *norm_name);
418 int nc4_check_dup_name(NC_GRP_INFO_T *grp, char *norm_name);
419 
420 /* HDF5 initialization */
421 extern int nc4_hdf5_initialized;
422 extern void nc4_hdf5_initialize(void);
423 
424 /* This is only included if --enable-logging is used for configure; it
425    prints info about the metadata to stderr. */
426 #ifdef LOGGING
427 int log_metadata_nc(NC *nc);
428 #endif
429 
430 /* Define accessors for the dispatchdata */
431 #define NC4_DATA(nc) ((NC_HDF5_FILE_INFO_T*)(nc)->dispatchdata)
432 #define NC4_DATA_SET(nc,data) ((nc)->dispatchdata = (void*)(data))
433 
434 /* Reserved Attributes */
435 extern const char* NC_RESERVED_VARATT_LIST[];
436 extern const char* NC_RESERVED_ATT_LIST[];
437 extern const char* NC_RESERVED_SPECIAL_LIST[];
438 #define NC_ATT_REFERENCE_LIST "REFERENCE_LIST"
439 #define NC_ATT_CLASS "CLASS"
440 #define NC_ATT_DIMENSION_LIST "DIMENSION_LIST"
441 #define NC_ATT_NAME "NAME"
442 #define NC_ATT_COORDINATES COORDINATES /*defined above*/
443 #define NC_ATT_FORMAT "_Format"
444 
445 /**************************************************/
446 /**
447 For netcdf4 files, capture state information about the following:
448 1. Global: netcdf library version
449 2. Global: hdf5 library version
450 3. Per file: superblock version
451 4. Per File: was it created by netcdf-4?
452 5. Per file: _NCProperties attribute
453 */
454 
455 #define NCPROPS "_NCProperties"
456 #define NCPROPS_VERSION (1)
457 #define NCPROPSSEP  '|'
458 
459 /* Currently used properties */
460 #define NCPVERSION "version" /* Of the properties format */
461 #define NCPHDF5LIBVERSION "hdf5libversion"
462 #define NCPNCLIBVERSION "netcdflibversion"
463 
464 /* Other hidden attributes */
465 #define ISNETCDF4ATT "_IsNetcdf4"
466 #define SUPERBLOCKATT "_SuperblockVersion"
467 
468 struct NCFILEINFO {
469     int superblockversion;
470     /* Following is filled from NCPROPS attribute or from global version */
471     struct NCPROPINFO {
472         int version; /* 0 => not defined */
473         char hdf5ver[NC_MAX_NAME+1];
474         char netcdfver[NC_MAX_NAME+1];
475     } propattr;
476 };
477 
478 extern struct NCPROPINFO globalpropinfo;
479 
480 extern int NC4_fileinfo_init(void); /*libsrc4/ncinfo.c*/
481 extern int NC4_get_fileinfo(struct NC_HDF5_FILE_INFO* info, struct NCPROPINFO*); /*libsrc4/ncinfo.c*/
482 extern int NC4_put_propattr(struct NC_HDF5_FILE_INFO* info); /*libsrc4/ncinfo.c*/
483 extern int NC4_buildpropinfo(struct NCPROPINFO* info,char** propdatap);
484 
485 extern int NC4_hdf5get_libversion(unsigned*,unsigned*,unsigned*);/*libsrc4/nc4hdf.c*/
486 extern int NC4_hdf5get_superblock(struct NC_HDF5_FILE_INFO*, int*);/*libsrc4/nc4hdf.c*/
487 extern int NC4_isnetcdf4(struct NC_HDF5_FILE_INFO*); /*libsrc4/nc4hdf.c*/
488 
489 #endif /* _NETCDF4_ */
490