1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
8 //                    Luke Shulenburger, lshulen@sandia.gov, Sandia National Laboratories
9 //
10 // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
11 //////////////////////////////////////////////////////////////////////////////////////
12 
13 
14 #ifndef QMCPLUSPLUS_HDF5_ARCHIVE_H
15 #define QMCPLUSPLUS_HDF5_ARCHIVE_H
16 
17 #include <config.h>
18 #include "hdf_datatype.h"
19 #include "hdf_dataspace.h"
20 #include "hdf_dataproxy.h"
21 #if defined(HAVE_LIBHDF5)
22 #include "hdf_pete.h"
23 #include "hdf_stl.h"
24 #include "hdf_hyperslab.h"
25 //#include "hdf_double_hyperslab.h"
26 #endif
27 #include <stack>
28 #include <bitset>
29 #ifdef HAVE_MPI
30 #include "mpi3/communicator.hpp"
31 #endif
32 
33 class Communicate;
34 
35 namespace qmcplusplus
36 {
37 /** class to handle hdf file
38  */
39 class hdf_archive
40 {
41 private:
42   enum
43   {
44     IS_PARALLEL = 0,
45     IS_MASTER,
46     NOIO
47   };
48   static const hid_t is_closed = -1;
49   /** bitset of the io mode
50    * Mode[IS_PARALLEL] : true, if parallel
51    * Mode[IS_MASTER] : true, if the node is master
52    * Mode[NOIO] : true, if I/O is not performed
53    */
54   std::bitset<4> Mode;
55   ///file id
56   hid_t file_id;
57   ///access id
58   hid_t access_id;
59   ///transfer property
60   hid_t xfer_plist;
61   ///error type
62   H5E_auto2_t err_func;
63   ///error handling
64   void* client_data;
65   ///FILO to handle H5Group
66   std::stack<hid_t> group_id;
67 
68 public:
69   ///Public pointer to communicator. Ugly. Relation between  MPI, hdf_archive, and other classed to be rethought.
70   Communicate* myComm;
71 
72   /** constructor
73    * @param c communicator
74    * @param request_pio turns on parallel I/O,
75    *        if true and PHDF5 is available, hdf_archive is in parallel collective IO mode
76    *        if true and PHDF5 is not available, hdf_archive is in master-only IO mode
77    *        if false, hdf_archive is in independent IO mode
78    */
79   template<class Comm = Communicate*>
file_id(is_closed)80   hdf_archive(Comm c, bool request_pio = false) : file_id(is_closed), access_id(H5P_DEFAULT), xfer_plist(H5P_DEFAULT)
81   {
82     H5Eget_auto2(H5E_DEFAULT, &err_func, &client_data);
83     H5Eset_auto2(H5E_DEFAULT, NULL, NULL);
84     set_access_plist(request_pio, c);
85   }
86 
hdf_archive()87   hdf_archive() : file_id(is_closed), access_id(H5P_DEFAULT), xfer_plist(H5P_DEFAULT)
88   {
89     H5Eget_auto2(H5E_DEFAULT, &err_func, &client_data);
90     H5Eset_auto2(H5E_DEFAULT, NULL, NULL);
91     set_access_plist();
92   }
93   ///destructor
94   ~hdf_archive();
95 
96   ///set the access property
97   void set_access_plist(bool request_pio, Communicate* comm);
98 #ifdef HAVE_MPI
99   void set_access_plist(bool request_pio, boost::mpi3::communicator& comm);
100 #endif
101   void set_access_plist();
102 
103   ///return true if parallel i/o
is_parallel()104   inline bool is_parallel() const { return Mode[IS_PARALLEL]; }
105 
106   ///return true if master in parallel i/o
is_master()107   inline bool is_master() const { return Mode[IS_MASTER]; }
108 
109   ///return file_id. should be only be used for connecting to old codes when porting
getFileID()110   hid_t getFileID() const { return file_id; }
111 
112   /** create a file
113    * @param fname name of hdf5 file
114    * @param flags i/o mode
115    * @return true, if creation is successful
116    */
117   bool create(const std::string& fname, unsigned flags = H5F_ACC_TRUNC);
118 
119   /** open a file
120    * @param fname name of hdf5 file
121    * @param flags i/o mode
122    * @return file_id, if open is successful
123    */
124   bool open(const std::string& fname, unsigned flags = H5F_ACC_RDWR);
125 
126   ///close all the open groups and file
127   void close();
128 
129   ///flush a file
flush()130   inline void flush()
131   {
132     if (file_id != is_closed)
133       H5Fflush(file_id, H5F_SCOPE_LOCAL);
134   }
135 
136   ///return true if the file is closed
closed()137   inline bool closed() { return file_id == is_closed; }
138 
139   /** check if aname is a group
140    * @param aname group's name
141    * @return true, if aname exists and it is a group
142    */
143   bool is_group(const std::string& aname);
144 
145   /** return the top of the group stack
146    */
top()147   inline hid_t top() const { return group_id.empty() ? is_closed : group_id.top(); }
148 
149   /** check if any groups are open
150    *  group stack will have entries if so
151    *  @return true if any groups are open
152    */
open_groups()153   inline bool open_groups() { return group_id.empty(); }
154 
155   /** push a group to the group stack
156    * @param gname name of the group
157    * @param createit if true, group is create when missing
158    */
159   hid_t push(const std::string& gname, bool createit = true);
160 
pop()161   inline void pop()
162   {
163     if (file_id == is_closed || group_id.empty())
164       return;
165     hid_t g = group_id.top();
166     group_id.pop();
167     H5Gclose(g);
168   }
169 
170   /** read the shape of multidimensional filespace from the group aname
171    * this function can be used to query dataset for preparing containers.
172    * The dimensions contributed by T is excluded.
173    * See how exactly user dimensions are calculated in getDataShape function definition.
174    * @return true if successful
175    */
176   template<typename T>
getShape(const std::string & aname,std::vector<int> & sizes_out)177   bool getShape(const std::string& aname, std::vector<int>& sizes_out)
178   {
179     if (Mode[NOIO])
180       return true;
181     hid_t p = group_id.empty() ? file_id : group_id.top();
182     return getDataShape<T>(p, aname, sizes_out);
183   }
184 
185   /** write the data to the group aname and return status
186    * use write() for inbuilt error checking
187    * @return true if successful
188    */
189   template<typename T>
writeEntry(T & data,const std::string & aname)190   bool writeEntry(T& data, const std::string& aname)
191   {
192     if (Mode[NOIO])
193       return true;
194     if (!(Mode[IS_PARALLEL] || Mode[IS_MASTER]))
195       throw std::runtime_error("Only write data in parallel or by master but not every rank!");
196     hid_t p = group_id.empty() ? file_id : group_id.top();
197     h5data_proxy<T> e(data);
198     return e.write(p, aname, xfer_plist);
199   }
200 
201   /** write the data to the group aname and check status
202    * runtime error is issued on I/O error
203    */
204   template<typename T>
write(T & data,const std::string & aname)205   void write(T& data, const std::string& aname)
206   {
207     if (!writeEntry(data, aname))
208     {
209       throw std::runtime_error("HDF5 write failure in hdf_archive::write " + aname);
210     }
211   }
212 
213   /** write the container data with a specific shape and check status
214    * @param data container, linear storage required.
215    * @param shape shape on the hdf file
216    * @param aname dataset name in the file
217    * runtime error is issued on I/O error
218    */
219   template<typename T, typename IT, std::size_t RANK>
writeSlabReshaped(T & data,const std::array<IT,RANK> & shape,const std::string & aname)220   void writeSlabReshaped(T& data, const std::array<IT, RANK>& shape, const std::string& aname)
221   {
222     std::array<hsize_t, RANK> globals, counts, offsets;
223     for(int dim = 0; dim < RANK; dim++)
224     {
225       globals[dim] = static_cast<hsize_t>(shape[dim]);
226       counts[dim] = static_cast<hsize_t>(shape[dim]);
227       offsets[dim] = 0;
228     }
229 
230     hyperslab_proxy<T, RANK> pxy(data, globals, counts, offsets);
231     write(pxy, aname);
232   }
233 
234   /** read the data from the group aname and return status
235    * use read() for inbuilt error checking
236    * @return true if successful
237    */
238   template<typename T>
readEntry(T & data,const std::string & aname)239   bool readEntry(T& data, const std::string& aname)
240   {
241     if (Mode[NOIO])
242       return true;
243     hid_t p = group_id.empty() ? file_id : group_id.top();
244     h5data_proxy<T> e(data);
245     return e.read(p, aname, xfer_plist);
246   }
247 
248   /** read the data from the group aname and check status
249    * runtime error is issued on I/O error
250    */
251   template<typename T>
read(T & data,const std::string & aname)252   void read(T& data, const std::string& aname)
253   {
254     if (!readEntry(data, aname))
255     {
256       throw std::runtime_error("HDF5 read failure in hdf_archive::read " + aname);
257     }
258   }
259 
260   /** read file dataset with a specific shape into a container and check status
261    * @param data container, linear storage required.
262    * @param shape shape on the hdf file
263    * @param aname dataset name in the file
264    * runtime error is issued on I/O error
265    */
266   template<typename T, typename IT, std::size_t RANK>
readSlabReshaped(T & data,const std::array<IT,RANK> & shape,const std::string & aname)267   void readSlabReshaped(T& data, const std::array<IT, RANK>& shape, const std::string& aname)
268   {
269     std::array<hsize_t, RANK> globals, counts, offsets;
270     for(int dim = 0; dim < RANK; dim++)
271     {
272       globals[dim] = static_cast<hsize_t>(shape[dim]);
273       counts[dim] = static_cast<hsize_t>(shape[dim]);
274       offsets[dim] = 0;
275     }
276 
277     hyperslab_proxy<T, RANK> pxy(data, globals, counts, offsets);
278     read(pxy, aname);
279   }
280 
281   /** read a portion of the data from the group aname and check status
282    * runtime error is issued on I/O error
283    *
284    * note the readSpec array must have dimensionality corresponding to the dataset,
285    * values for a dimension must be [0,num_entries-1] for that dimension to specify
286    * which value to hold and a -1 to grab all elements from that dimension
287    * for example, if the dataset was [5,2,6] and the vector contained (2,1,-1),
288    * this would grab 6 elements corresponding to [2,1,:]
289    */
290   template<typename T, typename IT, std::size_t RANK>
readSlabSelection(T & data,const std::array<IT,RANK> & readSpec,const std::string & aname)291   void readSlabSelection(T& data, const std::array<IT, RANK>& readSpec, const std::string& aname)
292   {
293     std::array<hsize_t, RANK> globals, counts, offsets;
294     for(int dim = 0; dim < RANK; dim++)
295     {
296       globals[dim] = 0;
297       if (readSpec[dim] < 0)
298       {
299         counts[dim] = 0;
300         offsets[dim] = 0;
301       }
302       else
303       {
304         counts[dim] = 1;
305         offsets[dim] = static_cast<hsize_t>(readSpec[dim]);
306       }
307     }
308 
309     hyperslab_proxy<T, RANK> pxy(data, globals, counts, offsets);
310     read(pxy, aname);
311   }
312 
unlink(const std::string & aname)313   inline void unlink(const std::string& aname)
314   {
315     if (Mode[NOIO])
316       return;
317     hid_t p       = group_id.empty() ? file_id : group_id.top();
318     herr_t status = H5Gunlink(p, aname.c_str());
319   }
320 };
321 
322 } // namespace qmcplusplus
323 #endif
324