1 #include "MUQ/Utilities/HDF5/HDF5File.h"
2 
3 #include<iostream>
4 #include<fstream>
5 
6 using namespace muq::Utilities;
7 
HDF5File(std::string const & filename_)8 HDF5File::HDF5File(std::string const& filename_){
9 
10   // make sure the file is not open
11   assert(fileID<0);
12 
13   // create (or open) the file
14   Open(filename_);
15 }
16 
~HDF5File()17 HDF5File::~HDF5File() {
18   // close the file
19   Close();
20 
21   // make sure the file is closed
22   assert(fileID<=0);
23 }
24 
DoesFileExist(const std::string & name) const25 bool HDF5File::DoesFileExist(const std::string& name) const {
26   std::ifstream f(name.c_str());
27   return f.good();
28 }
29 
Open(std::string const & filename_)30 void HDF5File::Open(std::string const& filename_) {
31 
32     if( fileID>=0 ) { // if a file is already open ...
33       // ... close it.
34       Close();
35     }
36 
37     // save the file name;
38     filename = filename_;
39 
40     // Set up file access property list with parallel I/O access
41     hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
42 
43     if( DoesFileExist(filename) ){ // if the file exists ...
44       // ... open it.
45       fileID = H5Fopen(filename.c_str(), H5F_ACC_RDWR, plist_id);
46     } else { // if the file does not exist ...
47       // ... create it.
48       fileID = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, plist_id);
49     }
50 
51     // close the property list
52     H5Pclose(plist_id);
53 
54     // make sure the file is open
55     assert(fileID>=0);
56 }
57 
Close()58 void HDF5File::Close() {
59 
60     if( fileID<0 ) { // if the file is already closed ...
61       // ... do nothing
62       return;
63     }
64 
65     // flush the file
66     FlushFile();
67 
68     // close the file
69     H5Fclose(fileID);
70 
71     // set the file ID to something invalid
72     fileID = -1;
73     filename = "";
74 }
75 
Copy(std::string const & dstName,std::shared_ptr<HDF5File> srcFile,std::string const & srcName)76 void HDF5File::Copy(std::string const& dstName, std::shared_ptr<HDF5File> srcFile, std::string const& srcName)
77 {
78 
79   // make sure both files are open
80   assert(fileID>0);
81   assert(srcFile->fileID>0);
82 
83   herr_t err;
84   err = H5Ocopy(srcFile->fileID, srcName.c_str(), fileID, dstName.c_str(), H5P_DEFAULT, H5P_DEFAULT);
85 
86   if(err<0)
87   {
88     std::cerr << "WARNING: HDF5 could not copy " << srcName << " to " << dstName << std::endl;
89   };
90 
91 }
92 
DoesGroupExist(std::string const & name) const93 bool HDF5File::DoesGroupExist(std::string const& name) const {
94 
95   // if the group is the root, return true
96   if( (name.compare("/")==0) || (name.compare("")==0) || (name.compare("/.")==0) ) {
97     return true;
98   }
99 
100   // make sure the file is open
101   assert(fileID>0);
102 
103   // get the group path and the path to it's parent
104   std::string parentPath = GetParentPath(name);
105 
106   // recursivly check if the parent exists and make sure the current group exists
107   return DoesGroupExist(parentPath) && (H5Lexists(fileID, name.c_str(), H5P_DEFAULT)>0);
108 }
109 
DoesDataSetExist(std::string const & name) const110 bool HDF5File::DoesDataSetExist(std::string const& name) const {
111 
112   // make sure the file is open
113   assert(fileID>0);
114 
115   // get the group path and the path to it's parent
116   std::string parentPath = GetParentPath(name);
117 
118   // recursivly check if the parent group exists and make sure the current data set exists
119   return DoesGroupExist(parentPath) && (H5Lexists(fileID, name.c_str(), H5P_DEFAULT) > 0);
120 }
121 
GetDataSetSize(std::string const name) const122 Eigen::VectorXi HDF5File::GetDataSetSize(std::string const name) const {
123 
124   // make sure the file is open
125   assert(fileID>0);
126 
127   if( !DoesDataSetExist(name) ) { // if the data set does not exist ...
128     // return an empty vector.
129     return Eigen::VectorXi();
130   }
131 
132   // make sure the file is open
133   assert(fileID>0);
134 
135   // open the data
136   hid_t dataset = H5Dopen2(fileID, name.c_str(), H5P_DEFAULT);
137 
138   // get the id for the dataspace of the dataset
139   hid_t space_id = H5Dget_space(dataset);
140 
141   // get the dimensionality of the dataspace
142   int rank = H5Sget_simple_extent_ndims(space_id);
143 
144   // get the dataspace dimension size and the max. size
145   hsize_t* dims = (hsize_t*)malloc(rank*sizeof(hsize_t));
146   hsize_t* max_dims = (hsize_t*)malloc(rank*sizeof(hsize_t));
147   H5Sget_simple_extent_dims(space_id, dims, max_dims);
148 
149   // close the dataspace and the dataset
150   H5Sclose(space_id);
151   H5Dclose(dataset);
152 
153   // convert the dimensionality into an Eigen::VectorXi
154   Eigen::VectorXi output(rank);
155   for( int i=0; i<rank; ++i ) {
156     output(i) = dims[i];
157   }
158 
159   // free the memory
160   free(dims);
161   free(max_dims);
162 
163   // return the dimensionality
164   return output;
165 }
166 
CreateGroup(std::string const & name)167 void HDF5File::CreateGroup(std::string const& name) {
168 
169     // make sure the file is open
170     assert(fileID>0);
171 
172     if( (DoesGroupExist(name))||(name.compare("")==0)||(name.compare("/")==0) ) { return; }
173 
174     // get the group path and the path to it's parent
175     std::string parentPath = GetParentPath(name);
176 
177     // make sure the parent exists by recursively creating it
178     if(!DoesGroupExist(parentPath))
179 	CreateGroup(parentPath);
180 
181     // create the group
182     hid_t newgroup = H5Gcreate2(fileID, name.c_str(), H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
183 
184     // close the group
185     H5Gclose(newgroup);
186 }
187 
WriteStringAttribute(std::string const & datasetName,std::string const & attributeName,std::string const & attribute)188 void HDF5File::WriteStringAttribute(std::string const& datasetName,
189 				    std::string const& attributeName,
190 				    std::string const& attribute)
191 {
192     // make sure the file is open
193     assert(fileID>0);
194 
195     // Create the group or dataset in necessary
196     if( !DoesDataSetExist(datasetName) || !DoesGroupExist(datasetName) )
197 	CreateGroup(datasetName);
198 
199     // write the attribute
200     H5LTset_attribute_string(fileID, datasetName.c_str(), attributeName.c_str(), attribute.c_str());
201 }
202 
GetStringAttribute(std::string const & datasetName,std::string const & attributeName) const203 std::string HDF5File::GetStringAttribute(std::string const& datasetName, std::string const& attributeName) const {
204   /*#if MUQ_MPI==1
205   std::unique_ptr<mpi::communicator> worldComm(new mpi::communicator);
206 
207   assert(worldComm->rank()==write);
208   #endif*/
209 
210   // make sure the file is open
211   assert(fileID>0);
212 
213   // make sure the dataset exists
214   assert(DoesDataSetExist(datasetName) || DoesGroupExist(datasetName));
215 
216   // get the string attribute
217   char tempStr[256];
218   H5LTget_attribute_string(fileID, datasetName.c_str(), attributeName.c_str(), tempStr);
219 
220   // return it as a strng
221   return std::string(tempStr);
222 }
223 
FlushFile()224 void HDF5File::FlushFile() {
225   if( fileID>0 ) { // if the file is open ...
226     // flush it.
227     H5Fflush(fileID, H5F_SCOPE_GLOBAL);
228   }
229 }
230 
231 struct DataFileInfo {
DataFileInfoDataFileInfo232     DataFileInfo(std::shared_ptr<HDF5File> const& hdf5file) : hdf5file(hdf5file) {}
233 
234     const std::shared_ptr<HDF5File> hdf5file;
235 };
236 
CopyObjectToGlobalFile(hid_t o_id,const char * name,const H5O_info_t * info,void * op_data)237 herr_t CopyObjectToGlobalFile(hid_t o_id, const char *name, const H5O_info_t *info, void *op_data) {
238     std::string nameBuffer(name);
239     std::string fullGroupName = "/" + nameBuffer;
240 
241     // get the file we are copying into
242     DataFileInfo* fileInfo = static_cast<DataFileInfo*>(op_data);
243 
244     if( info->type==H5O_TYPE_DATASET ) {  // data sets
245 	if( !fileInfo->hdf5file->DoesDataSetExist(fullGroupName) ) { // if the data set does not exist ...
246 	    // ... copy it over
247 	    H5Ocopy(o_id, name, fileInfo->hdf5file->fileID, fullGroupName.c_str(), H5P_DEFAULT, H5P_DEFAULT);
248 	}
249     } else if( info->type == H5O_TYPE_GROUP ) { // groups
250 	if( !fileInfo->hdf5file->DoesGroupExist(fullGroupName) ) { // if the group does not exist ...
251 	    // ... copy it over.
252 	    H5Ocopy(o_id, name, fileInfo->hdf5file->fileID, fullGroupName.c_str(), H5P_DEFAULT, H5P_DEFAULT);
253 	}
254     }
255 
256     return 0;
257 }
258 
MergeFile(std::shared_ptr<HDF5File> const & otherFile)259 void HDF5File::MergeFile(std::shared_ptr<HDF5File> const& otherFile) {
260 
261     // make sure the other file is open
262     assert(otherFile->fileID>0);
263 
264     // make sure this file is open
265     assert(fileID>0);
266 
267     // open the root group in the other file
268     const std::string rootGroupName = "/";
269     const hid_t otherRootGroup = H5Gopen2(otherFile->fileID, rootGroupName.c_str(), H5P_DEFAULT);
270 
271     auto dataInfo = std::make_shared<DataFileInfo>(shared_from_this());
272 
273     // copy the file
274     const herr_t status = H5Ovisit(otherRootGroup, H5_INDEX_NAME, H5_ITER_NATIVE, &CopyObjectToGlobalFile, static_cast<void*>(dataInfo.get()));
275 
276     assert(status >= 0);
277 
278     // close the other file's root group
279     H5Gclose(otherRootGroup);
280 }
281 
282 
IsDataSet(std::string const & name) const283 bool HDF5File::IsDataSet(std::string const& name) const
284 {
285 
286     if(!DoesDataSetExist(name))
287 	return false;
288 
289     herr_t status;
290     H5O_info_t info;
291 
292     status = H5Oget_info_by_name(fileID, name.c_str(), &info, H5P_DEFAULT);
293 
294     if(status<0)
295 	return false;
296 
297     return info.type == H5O_TYPE_DATASET;
298 }
299 
IsGroup(std::string const & name) const300 bool HDF5File::IsGroup(std::string const& name) const
301 {
302 
303     if(!DoesGroupExist(name))
304 	return false;
305 
306     herr_t status;
307     H5O_info_t info;
308 
309     status = H5Oget_info_by_name(fileID, name.c_str(), &info, H5P_DEFAULT);
310 
311     if(status<0)
312 	return false;
313 
314     return info.type == H5O_TYPE_GROUP;
315 }
316 
GetChildren(std::string base) const317 std::vector<std::string> HDF5File::GetChildren(std::string base) const
318 {
319     // Make sure the HDF5 file is open
320     assert(fileID>0);
321 
322     if(IsDataSet(base))
323 	return std::vector<std::string>();
324 
325     // Make sure the group exists
326     assert(DoesGroupExist(base));
327 
328     // open the group
329     hid_t gid = H5Gopen2(fileID, base.c_str(), H5P_DEFAULT);
330 
331     char name[1024];
332     ssize_t len;
333     hsize_t nobj;
334 
335     // get the number of objects in this group
336     herr_t status = H5Gget_num_objs(gid, &nobj);
337 
338     // Intialize the vector of strings
339     std::vector<std::string> output(nobj);
340 
341     // Fill in the output vector
342     for(int i = 0; i < nobj; i++)
343     {
344 	len = H5Gget_objname_by_idx(gid, (hsize_t)i, name, (size_t)1024);
345 	output.at(i) = std::string(name,name + len);
346     }
347 
348     return output;
349 
350 };
351