1 #include "MUQ/Utilities/HDF5/HDF5File.h"
2
3 #include<iostream>
4 #include<fstream>
5
6 using namespace muq::Utilities;
7
HDF5File(std::string const & filename_)8 HDF5File::HDF5File(std::string const& filename_){
9
10 // make sure the file is not open
11 assert(fileID<0);
12
13 // create (or open) the file
14 Open(filename_);
15 }
16
~HDF5File()17 HDF5File::~HDF5File() {
18 // close the file
19 Close();
20
21 // make sure the file is closed
22 assert(fileID<=0);
23 }
24
DoesFileExist(const std::string & name) const25 bool HDF5File::DoesFileExist(const std::string& name) const {
26 std::ifstream f(name.c_str());
27 return f.good();
28 }
29
Open(std::string const & filename_)30 void HDF5File::Open(std::string const& filename_) {
31
32 if( fileID>=0 ) { // if a file is already open ...
33 // ... close it.
34 Close();
35 }
36
37 // save the file name;
38 filename = filename_;
39
40 // Set up file access property list with parallel I/O access
41 hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
42
43 if( DoesFileExist(filename) ){ // if the file exists ...
44 // ... open it.
45 fileID = H5Fopen(filename.c_str(), H5F_ACC_RDWR, plist_id);
46 } else { // if the file does not exist ...
47 // ... create it.
48 fileID = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, plist_id);
49 }
50
51 // close the property list
52 H5Pclose(plist_id);
53
54 // make sure the file is open
55 assert(fileID>=0);
56 }
57
Close()58 void HDF5File::Close() {
59
60 if( fileID<0 ) { // if the file is already closed ...
61 // ... do nothing
62 return;
63 }
64
65 // flush the file
66 FlushFile();
67
68 // close the file
69 H5Fclose(fileID);
70
71 // set the file ID to something invalid
72 fileID = -1;
73 filename = "";
74 }
75
Copy(std::string const & dstName,std::shared_ptr<HDF5File> srcFile,std::string const & srcName)76 void HDF5File::Copy(std::string const& dstName, std::shared_ptr<HDF5File> srcFile, std::string const& srcName)
77 {
78
79 // make sure both files are open
80 assert(fileID>0);
81 assert(srcFile->fileID>0);
82
83 herr_t err;
84 err = H5Ocopy(srcFile->fileID, srcName.c_str(), fileID, dstName.c_str(), H5P_DEFAULT, H5P_DEFAULT);
85
86 if(err<0)
87 {
88 std::cerr << "WARNING: HDF5 could not copy " << srcName << " to " << dstName << std::endl;
89 };
90
91 }
92
DoesGroupExist(std::string const & name) const93 bool HDF5File::DoesGroupExist(std::string const& name) const {
94
95 // if the group is the root, return true
96 if( (name.compare("/")==0) || (name.compare("")==0) || (name.compare("/.")==0) ) {
97 return true;
98 }
99
100 // make sure the file is open
101 assert(fileID>0);
102
103 // get the group path and the path to it's parent
104 std::string parentPath = GetParentPath(name);
105
106 // recursivly check if the parent exists and make sure the current group exists
107 return DoesGroupExist(parentPath) && (H5Lexists(fileID, name.c_str(), H5P_DEFAULT)>0);
108 }
109
DoesDataSetExist(std::string const & name) const110 bool HDF5File::DoesDataSetExist(std::string const& name) const {
111
112 // make sure the file is open
113 assert(fileID>0);
114
115 // get the group path and the path to it's parent
116 std::string parentPath = GetParentPath(name);
117
118 // recursivly check if the parent group exists and make sure the current data set exists
119 return DoesGroupExist(parentPath) && (H5Lexists(fileID, name.c_str(), H5P_DEFAULT) > 0);
120 }
121
GetDataSetSize(std::string const name) const122 Eigen::VectorXi HDF5File::GetDataSetSize(std::string const name) const {
123
124 // make sure the file is open
125 assert(fileID>0);
126
127 if( !DoesDataSetExist(name) ) { // if the data set does not exist ...
128 // return an empty vector.
129 return Eigen::VectorXi();
130 }
131
132 // make sure the file is open
133 assert(fileID>0);
134
135 // open the data
136 hid_t dataset = H5Dopen2(fileID, name.c_str(), H5P_DEFAULT);
137
138 // get the id for the dataspace of the dataset
139 hid_t space_id = H5Dget_space(dataset);
140
141 // get the dimensionality of the dataspace
142 int rank = H5Sget_simple_extent_ndims(space_id);
143
144 // get the dataspace dimension size and the max. size
145 hsize_t* dims = (hsize_t*)malloc(rank*sizeof(hsize_t));
146 hsize_t* max_dims = (hsize_t*)malloc(rank*sizeof(hsize_t));
147 H5Sget_simple_extent_dims(space_id, dims, max_dims);
148
149 // close the dataspace and the dataset
150 H5Sclose(space_id);
151 H5Dclose(dataset);
152
153 // convert the dimensionality into an Eigen::VectorXi
154 Eigen::VectorXi output(rank);
155 for( int i=0; i<rank; ++i ) {
156 output(i) = dims[i];
157 }
158
159 // free the memory
160 free(dims);
161 free(max_dims);
162
163 // return the dimensionality
164 return output;
165 }
166
CreateGroup(std::string const & name)167 void HDF5File::CreateGroup(std::string const& name) {
168
169 // make sure the file is open
170 assert(fileID>0);
171
172 if( (DoesGroupExist(name))||(name.compare("")==0)||(name.compare("/")==0) ) { return; }
173
174 // get the group path and the path to it's parent
175 std::string parentPath = GetParentPath(name);
176
177 // make sure the parent exists by recursively creating it
178 if(!DoesGroupExist(parentPath))
179 CreateGroup(parentPath);
180
181 // create the group
182 hid_t newgroup = H5Gcreate2(fileID, name.c_str(), H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
183
184 // close the group
185 H5Gclose(newgroup);
186 }
187
WriteStringAttribute(std::string const & datasetName,std::string const & attributeName,std::string const & attribute)188 void HDF5File::WriteStringAttribute(std::string const& datasetName,
189 std::string const& attributeName,
190 std::string const& attribute)
191 {
192 // make sure the file is open
193 assert(fileID>0);
194
195 // Create the group or dataset in necessary
196 if( !DoesDataSetExist(datasetName) || !DoesGroupExist(datasetName) )
197 CreateGroup(datasetName);
198
199 // write the attribute
200 H5LTset_attribute_string(fileID, datasetName.c_str(), attributeName.c_str(), attribute.c_str());
201 }
202
GetStringAttribute(std::string const & datasetName,std::string const & attributeName) const203 std::string HDF5File::GetStringAttribute(std::string const& datasetName, std::string const& attributeName) const {
204 /*#if MUQ_MPI==1
205 std::unique_ptr<mpi::communicator> worldComm(new mpi::communicator);
206
207 assert(worldComm->rank()==write);
208 #endif*/
209
210 // make sure the file is open
211 assert(fileID>0);
212
213 // make sure the dataset exists
214 assert(DoesDataSetExist(datasetName) || DoesGroupExist(datasetName));
215
216 // get the string attribute
217 char tempStr[256];
218 H5LTget_attribute_string(fileID, datasetName.c_str(), attributeName.c_str(), tempStr);
219
220 // return it as a strng
221 return std::string(tempStr);
222 }
223
FlushFile()224 void HDF5File::FlushFile() {
225 if( fileID>0 ) { // if the file is open ...
226 // flush it.
227 H5Fflush(fileID, H5F_SCOPE_GLOBAL);
228 }
229 }
230
231 struct DataFileInfo {
DataFileInfoDataFileInfo232 DataFileInfo(std::shared_ptr<HDF5File> const& hdf5file) : hdf5file(hdf5file) {}
233
234 const std::shared_ptr<HDF5File> hdf5file;
235 };
236
CopyObjectToGlobalFile(hid_t o_id,const char * name,const H5O_info_t * info,void * op_data)237 herr_t CopyObjectToGlobalFile(hid_t o_id, const char *name, const H5O_info_t *info, void *op_data) {
238 std::string nameBuffer(name);
239 std::string fullGroupName = "/" + nameBuffer;
240
241 // get the file we are copying into
242 DataFileInfo* fileInfo = static_cast<DataFileInfo*>(op_data);
243
244 if( info->type==H5O_TYPE_DATASET ) { // data sets
245 if( !fileInfo->hdf5file->DoesDataSetExist(fullGroupName) ) { // if the data set does not exist ...
246 // ... copy it over
247 H5Ocopy(o_id, name, fileInfo->hdf5file->fileID, fullGroupName.c_str(), H5P_DEFAULT, H5P_DEFAULT);
248 }
249 } else if( info->type == H5O_TYPE_GROUP ) { // groups
250 if( !fileInfo->hdf5file->DoesGroupExist(fullGroupName) ) { // if the group does not exist ...
251 // ... copy it over.
252 H5Ocopy(o_id, name, fileInfo->hdf5file->fileID, fullGroupName.c_str(), H5P_DEFAULT, H5P_DEFAULT);
253 }
254 }
255
256 return 0;
257 }
258
MergeFile(std::shared_ptr<HDF5File> const & otherFile)259 void HDF5File::MergeFile(std::shared_ptr<HDF5File> const& otherFile) {
260
261 // make sure the other file is open
262 assert(otherFile->fileID>0);
263
264 // make sure this file is open
265 assert(fileID>0);
266
267 // open the root group in the other file
268 const std::string rootGroupName = "/";
269 const hid_t otherRootGroup = H5Gopen2(otherFile->fileID, rootGroupName.c_str(), H5P_DEFAULT);
270
271 auto dataInfo = std::make_shared<DataFileInfo>(shared_from_this());
272
273 // copy the file
274 const herr_t status = H5Ovisit(otherRootGroup, H5_INDEX_NAME, H5_ITER_NATIVE, &CopyObjectToGlobalFile, static_cast<void*>(dataInfo.get()));
275
276 assert(status >= 0);
277
278 // close the other file's root group
279 H5Gclose(otherRootGroup);
280 }
281
282
IsDataSet(std::string const & name) const283 bool HDF5File::IsDataSet(std::string const& name) const
284 {
285
286 if(!DoesDataSetExist(name))
287 return false;
288
289 herr_t status;
290 H5O_info_t info;
291
292 status = H5Oget_info_by_name(fileID, name.c_str(), &info, H5P_DEFAULT);
293
294 if(status<0)
295 return false;
296
297 return info.type == H5O_TYPE_DATASET;
298 }
299
IsGroup(std::string const & name) const300 bool HDF5File::IsGroup(std::string const& name) const
301 {
302
303 if(!DoesGroupExist(name))
304 return false;
305
306 herr_t status;
307 H5O_info_t info;
308
309 status = H5Oget_info_by_name(fileID, name.c_str(), &info, H5P_DEFAULT);
310
311 if(status<0)
312 return false;
313
314 return info.type == H5O_TYPE_GROUP;
315 }
316
GetChildren(std::string base) const317 std::vector<std::string> HDF5File::GetChildren(std::string base) const
318 {
319 // Make sure the HDF5 file is open
320 assert(fileID>0);
321
322 if(IsDataSet(base))
323 return std::vector<std::string>();
324
325 // Make sure the group exists
326 assert(DoesGroupExist(base));
327
328 // open the group
329 hid_t gid = H5Gopen2(fileID, base.c_str(), H5P_DEFAULT);
330
331 char name[1024];
332 ssize_t len;
333 hsize_t nobj;
334
335 // get the number of objects in this group
336 herr_t status = H5Gget_num_objs(gid, &nobj);
337
338 // Intialize the vector of strings
339 std::vector<std::string> output(nobj);
340
341 // Fill in the output vector
342 for(int i = 0; i < nobj; i++)
343 {
344 len = H5Gget_objname_by_idx(gid, (hsize_t)i, name, (size_t)1024);
345 output.at(i) = std::string(name,name + len);
346 }
347
348 return output;
349
350 };
351