1 /*****************************************************************************/
2 /*                                    XDMF                                   */
3 /*                       eXtensible Data Model and Format                    */
4 /*                                                                           */
5 /*  Id : XdmfHDF5ControllerDSM.cpp                                           */
6 /*                                                                           */
7 /*  Author:                                                                  */
8 /*     Kenneth Leiter                                                        */
9 /*     kenneth.leiter@arl.army.mil                                           */
10 /*     US Army Research Laboratory                                           */
11 /*     Aberdeen Proving Ground, MD                                           */
12 /*                                                                           */
13 /*     Copyright @ 2011 US Army Research Laboratory                          */
14 /*     All Rights Reserved                                                   */
15 /*     See Copyright.txt for details                                         */
16 /*                                                                           */
17 /*     This software is distributed WITHOUT ANY WARRANTY; without            */
18 /*     even the implied warranty of MERCHANTABILITY or FITNESS               */
19 /*     FOR A PARTICULAR PURPOSE.  See the above copyright notice             */
20 /*     for more information.                                                 */
21 /*                                                                           */
22 /*****************************************************************************/
23 
24 #ifdef XDMF_BUILD_DSM_THREADS
25   #include <H5FDdsm.h>
26   #include <H5FDdsmManager.h>
27   #include <H5FDdsmBuffer.h>
28 #endif
29 #include <H5public.h>
30 #include <hdf5.h>
31 #include "XdmfArray.hpp"
32 #include "XdmfArrayType.hpp"
33 #include "XdmfHDF5ControllerDSM.hpp"
34 #include "XdmfDSMManager.hpp"
35 #include "XdmfDSMBuffer.hpp"
36 #include "XdmfDSMCommMPI.hpp"
37 #include "XdmfDSMDriver.hpp"
38 #include "XdmfError.hpp"
39 
40 #ifdef XDMF_BUILD_DSM_THREADS
41 
42 shared_ptr<XdmfHDF5ControllerDSM>
New(const std::string & hdf5FilePath,const std::string & dataSetPath,const shared_ptr<const XdmfArrayType> type,const std::vector<unsigned int> & start,const std::vector<unsigned int> & stride,const std::vector<unsigned int> & dimensions,const std::vector<unsigned int> & datspaceDimensions,H5FDdsmBuffer * const dsmBuffer)43 XdmfHDF5ControllerDSM::New(const std::string & hdf5FilePath,
44                            const std::string & dataSetPath,
45                            const shared_ptr<const XdmfArrayType> type,
46                            const std::vector<unsigned int> & start,
47                            const std::vector<unsigned int> & stride,
48                            const std::vector<unsigned int> & dimensions,
49                            const std::vector<unsigned int> & datspaceDimensions,
50                            H5FDdsmBuffer * const dsmBuffer)
51 {
52   shared_ptr<XdmfHDF5ControllerDSM>
53     p(new XdmfHDF5ControllerDSM(hdf5FilePath,
54                                 dataSetPath,
55                                 type,
56                                 start,
57                                 stride,
58                                 dimensions,
59                                 datspaceDimensions,
60                                 dsmBuffer));
61   return p;
62 }
63 
64 shared_ptr<XdmfHDF5ControllerDSM>
New(const std::string & hdf5FilePath,const std::string & dataSetPath,const shared_ptr<const XdmfArrayType> type,const std::vector<unsigned int> & start,const std::vector<unsigned int> & stride,const std::vector<unsigned int> & dimensions,const std::vector<unsigned int> & datspaceDimensions,MPI_Comm comm,unsigned int bufferSize)65 XdmfHDF5ControllerDSM::New(const std::string & hdf5FilePath,
66                            const std::string & dataSetPath,
67                            const shared_ptr<const XdmfArrayType> type,
68                            const std::vector<unsigned int> & start,
69                            const std::vector<unsigned int> & stride,
70                            const std::vector<unsigned int> & dimensions,
71                            const std::vector<unsigned int> & datspaceDimensions,
72                            MPI_Comm comm,
73                            unsigned int bufferSize)
74 {
75   shared_ptr<XdmfHDF5ControllerDSM>
76     p(new XdmfHDF5ControllerDSM(hdf5FilePath,
77                                 dataSetPath,
78                                 type,
79                                 start,
80                                 stride,
81                                 dimensions,
82                                 datspaceDimensions,
83                                 comm,
84                                 bufferSize));
85   return p;
86 }
87 
88 #endif
89 
90 // Server/ nonthreaded versions
91 shared_ptr<XdmfHDF5ControllerDSM>
New(const std::string & hdf5FilePath,const std::string & dataSetPath,const shared_ptr<const XdmfArrayType> type,const std::vector<unsigned int> & start,const std::vector<unsigned int> & stride,const std::vector<unsigned int> & dimensions,const std::vector<unsigned int> & datspaceDimensions,XdmfDSMBuffer * const dsmBuffer)92 XdmfHDF5ControllerDSM::New(const std::string & hdf5FilePath,
93                            const std::string & dataSetPath,
94                            const shared_ptr<const XdmfArrayType> type,
95                            const std::vector<unsigned int> & start,
96                            const std::vector<unsigned int> & stride,
97                            const std::vector<unsigned int> & dimensions,
98                            const std::vector<unsigned int> & datspaceDimensions,
99                            XdmfDSMBuffer * const dsmBuffer)
100 {
101   shared_ptr<XdmfHDF5ControllerDSM>
102     p(new XdmfHDF5ControllerDSM(hdf5FilePath,
103                                 dataSetPath,
104                                 type,
105                                 start,
106                                 stride,
107                                 dimensions,
108                                 datspaceDimensions,
109                                 dsmBuffer));
110   return p;
111 }
112 
113 shared_ptr<XdmfHDF5ControllerDSM>
New(const std::string & hdf5FilePath,const std::string & dataSetPath,const shared_ptr<const XdmfArrayType> type,const std::vector<unsigned int> & start,const std::vector<unsigned int> & stride,const std::vector<unsigned int> & dimensions,const std::vector<unsigned int> & datspaceDimensions,MPI_Comm comm,unsigned int bufferSize,int startCoreIndex,int endCoreIndex)114 XdmfHDF5ControllerDSM::New(const std::string & hdf5FilePath,
115                            const std::string & dataSetPath,
116                            const shared_ptr<const XdmfArrayType> type,
117                            const std::vector<unsigned int> & start,
118                            const std::vector<unsigned int> & stride,
119                            const std::vector<unsigned int> & dimensions,
120                            const std::vector<unsigned int> & datspaceDimensions,
121                            MPI_Comm comm,
122                            unsigned int bufferSize,
123                            int startCoreIndex,
124                            int endCoreIndex)
125 {
126   shared_ptr<XdmfHDF5ControllerDSM>
127     p(new XdmfHDF5ControllerDSM(hdf5FilePath,
128                                 dataSetPath,
129                                 type,
130                                 start,
131                                 stride,
132                                 dimensions,
133                                 datspaceDimensions,
134                                 comm,
135                                 bufferSize,
136                                 startCoreIndex,
137                                 endCoreIndex));
138   return p;
139 }
140 
141 #ifdef XDMF_BUILD_DSM_THREADS
142 
XdmfHDF5ControllerDSM(const std::string & hdf5FilePath,const std::string & dataSetPath,const shared_ptr<const XdmfArrayType> type,const std::vector<unsigned int> & start,const std::vector<unsigned int> & stride,const std::vector<unsigned int> & dimensions,const std::vector<unsigned int> & dataspaceDimensions,H5FDdsmBuffer * const dsmBuffer)143 XdmfHDF5ControllerDSM::XdmfHDF5ControllerDSM(const std::string & hdf5FilePath,
144                                              const std::string & dataSetPath,
145                                              const shared_ptr<const XdmfArrayType> type,
146                                              const std::vector<unsigned int> & start,
147                                              const std::vector<unsigned int> & stride,
148                                              const std::vector<unsigned int> & dimensions,
149                                              const std::vector<unsigned int> & dataspaceDimensions,
150                                              H5FDdsmBuffer * const dsmBuffer) :
151   XdmfHDF5Controller(hdf5FilePath,
152                      dataSetPath,
153                      type,
154                      start,
155                      stride,
156                      dimensions,
157                      dataspaceDimensions),
158   mDSMManager(NULL),
159   mDSMBuffer(dsmBuffer),
160   mDSMServerBuffer(NULL),
161   mDSMServerManager(NULL),
162   mWorkerComm(MPI_COMM_NULL),
163   mServerMode(false)
164 {
165 }
166 
XdmfHDF5ControllerDSM(const std::string & hdf5FilePath,const std::string & dataSetPath,const shared_ptr<const XdmfArrayType> type,const std::vector<unsigned int> & start,const std::vector<unsigned int> & stride,const std::vector<unsigned int> & dimensions,const std::vector<unsigned int> & dataspaceDimensions,MPI_Comm comm,unsigned int bufferSize)167 XdmfHDF5ControllerDSM::XdmfHDF5ControllerDSM(const std::string & hdf5FilePath,
168                                              const std::string & dataSetPath,
169                                              const shared_ptr<const XdmfArrayType> type,
170                                              const std::vector<unsigned int> & start,
171                                              const std::vector<unsigned int> & stride,
172                                              const std::vector<unsigned int> & dimensions,
173                                              const std::vector<unsigned int> & dataspaceDimensions,
174                                              MPI_Comm comm,
175                                              unsigned int bufferSize) :
176   XdmfHDF5Controller(hdf5FilePath,
177                      dataSetPath,
178                      type,
179                      start,
180                      stride,
181                      dimensions,
182                      dataspaceDimensions),
183   mDSMServerBuffer(NULL),
184   mDSMServerManager(NULL),
185   mWorkerComm(MPI_COMM_NULL),
186   mServerMode(false)
187 
188 {
189   H5FDdsmManager * newManager = new H5FDdsmManager();
190   newManager->SetMpiComm(comm);
191   newManager->SetLocalBufferSizeMBytes(bufferSize);
192   newManager->SetIsStandAlone(H5FD_DSM_TRUE);
193   newManager->Create();
194 
195   H5FD_dsm_set_manager(newManager);
196 
197   H5FD_dsm_set_options(H5FD_DSM_LOCK_ASYNCHRONOUS);
198 
199   H5FDdsmBuffer * newBuffer = newManager->GetDsmBuffer();
200 
201   mDSMManager = newManager;
202   mDSMBuffer = newBuffer;
203 }
204 
205 #endif
206 
XdmfHDF5ControllerDSM(const std::string & hdf5FilePath,const std::string & dataSetPath,const shared_ptr<const XdmfArrayType> type,const std::vector<unsigned int> & start,const std::vector<unsigned int> & stride,const std::vector<unsigned int> & dimensions,const std::vector<unsigned int> & dataspaceDimensions,XdmfDSMBuffer * const dsmBuffer)207 XdmfHDF5ControllerDSM::XdmfHDF5ControllerDSM(const std::string & hdf5FilePath,
208                                              const std::string & dataSetPath,
209                                              const shared_ptr<const XdmfArrayType> type,
210                                              const std::vector<unsigned int> & start,
211                                              const std::vector<unsigned int> & stride,
212                                              const std::vector<unsigned int> & dimensions,
213                                              const std::vector<unsigned int> & dataspaceDimensions,
214                                              XdmfDSMBuffer * const dsmBuffer) :
215   XdmfHDF5Controller(hdf5FilePath,
216                      dataSetPath,
217                      type,
218                      start,
219                      stride,
220                      dimensions,
221                      dataspaceDimensions),
222 #ifdef XDMF_BUILD_DSM_THREADS
223   mDSMManager(NULL),
224   mDSMBuffer(NULL),
225 #endif
226   mDSMServerBuffer(dsmBuffer),
227   mDSMServerManager(NULL),
228   mServerMode(true)
229 {
230   mWorkerComm = mDSMServerBuffer->GetComm()->GetIntraComm();
231   if (xdmf_dsm_get_manager() == NULL) {
232     mDSMServerManager = new XdmfDSMManager();
233     mDSMServerManager->SetLocalBufferSizeMBytes(mDSMServerBuffer->GetLength());
234     mDSMServerManager->SetInterCommType(XDMF_DSM_COMM_MPI);
235     mDSMServerManager->SetIsServer(false);
236     mDSMServerManager->SetMpiComm(mDSMServerBuffer->GetComm()->GetIntraComm());
237     mDSMServerManager->SetDsmBuffer(mDSMServerBuffer);
238     XDMF_dsm_set_manager(mDSMServerManager);
239   }
240   else {
241     static_cast<XdmfDSMManager *>(xdmf_dsm_get_manager())->SetDsmBuffer(mDSMServerBuffer);
242   }
243 }
244 
XdmfHDF5ControllerDSM(const std::string & hdf5FilePath,const std::string & dataSetPath,const shared_ptr<const XdmfArrayType> type,const std::vector<unsigned int> & start,const std::vector<unsigned int> & stride,const std::vector<unsigned int> & dimensions,const std::vector<unsigned int> & dataspaceDimensions,MPI_Comm comm,unsigned int bufferSize,int startCoreIndex,int endCoreIndex)245 XdmfHDF5ControllerDSM::XdmfHDF5ControllerDSM(const std::string & hdf5FilePath,
246                                              const std::string & dataSetPath,
247                                              const shared_ptr<const XdmfArrayType> type,
248                                              const std::vector<unsigned int> & start,
249                                              const std::vector<unsigned int> & stride,
250                                              const std::vector<unsigned int> & dimensions,
251                                              const std::vector<unsigned int> & dataspaceDimensions,
252                                              MPI_Comm comm,
253                                              unsigned int bufferSize,
254                                              int startCoreIndex,
255                                              int endCoreIndex) :
256   XdmfHDF5Controller(hdf5FilePath,
257                      dataSetPath,
258                      type,
259                      start,
260                      stride,
261                      dimensions,
262                      dataspaceDimensions),
263 #ifdef XDMF_BUILD_DSM_THREADS
264   mDSMBuffer(NULL),
265   mDSMManager(NULL),
266 #endif
267   mServerMode(true)
268 
269 {
270 
271   int rank, size;
272 
273   MPI_Comm_size(comm, &size);
274   MPI_Comm_rank(comm, &rank);
275 
276   // Negative values will be changed to maximum range
277   if (startCoreIndex < 0) {
278     startCoreIndex = 0;
279   }
280   if (endCoreIndex < 0) {
281     endCoreIndex = size - 1;
282   }
283 
284   // Ensure start index is less than end index
285   if (startCoreIndex > endCoreIndex) {
286     int tempholder = startCoreIndex;
287     startCoreIndex = endCoreIndex;
288     endCoreIndex = tempholder;
289   }
290 
291   MPI_Comm serverComm;
292 
293   MPI_Group workers, dsmgroup, serversplit, servergroup;
294 
295   int * ServerIds = (int *)calloc((3), sizeof(int));
296   unsigned int index = 0;
297   for(int i=startCoreIndex ; i <= endCoreIndex ; ++i) {
298     ServerIds[index++] = i;
299   }
300 
301   MPI_Comm_group(comm, &serversplit);
302   MPI_Group_incl(serversplit, index, ServerIds, &servergroup);
303   MPI_Comm_create(comm, servergroup, &serverComm);
304   MPI_Comm_group(comm, &dsmgroup);
305   MPI_Group_excl(dsmgroup, index, ServerIds, &workers);
306   MPI_Comm_create(comm, workers, &mWorkerComm);
307   cfree(ServerIds);
308 
309   // Create the manager
310 
311   mDSMServerManager = new XdmfDSMManager();
312 
313   mDSMServerManager->SetLocalBufferSizeMBytes(bufferSize);
314   mDSMServerManager->SetInterCommType(XDMF_DSM_COMM_MPI);
315 
316   if (rank >= startCoreIndex && rank <= endCoreIndex) {
317     mDSMServerManager->SetMpiComm(serverComm);
318     mDSMServerManager->Create();
319   }
320   else {
321     mDSMServerManager->SetMpiComm(mWorkerComm);
322     mDSMServerManager->SetIsServer(false);
323     mDSMServerManager->Create(startCoreIndex, endCoreIndex);
324   }
325 
326   XDMF_dsm_set_manager(mDSMServerManager);
327 
328   mDSMServerBuffer = mDSMServerManager->GetDsmBuffer();
329 
330   mDSMServerBuffer->GetComm()->DupInterComm(comm);
331   mDSMServerBuffer->SetIsConnected(true);
332 
333   if (startCoreIndex < size) {
334     if (rank >= startCoreIndex && rank <= endCoreIndex) {
335       mDSMServerManager->GetDsmBuffer()->ReceiveInfo();
336     }
337     else {
338       mDSMServerManager->GetDsmBuffer()->SendInfo();
339     }
340   }
341 
342   MPI_Barrier(comm);
343 
344   // Loop needs to be started before anything can be done to the file
345   // since the service is what sets up the file
346 
347   if (rank < startCoreIndex || rank > endCoreIndex) {
348     // Turn off the server designation
349     mDSMServerBuffer->SetIsServer(false);
350     // If this is set to false then the buffer will attempt to
351     // connect to the intercomm for DSM stuff
352     mDSMServerManager->SetIsServer(false);
353   }
354   else {
355     // On cores where memory is set up, start the service loop
356     // This should iterate infinitely until a value to end the loop is passed
357     int returnOpCode;
358     mDSMServerBuffer->BufferServiceLoop(&returnOpCode);
359   }
360 }
361 
~XdmfHDF5ControllerDSM()362 XdmfHDF5ControllerDSM::~XdmfHDF5ControllerDSM()
363 {
364 }
365 
deleteManager()366 void XdmfHDF5ControllerDSM::deleteManager()
367 {
368 #ifdef XDMF_BUILD_DSM_THREADS
369   if (mDSMManager != NULL) {
370     delete mDSMManager;
371   }
372 #endif
373   if (mDSMServerManager != NULL) {
374     delete mDSMServerManager;
375   }
376 }
377 
getName() const378 std::string XdmfHDF5ControllerDSM::getName() const
379 {
380   return "HDFDSM";
381 }
382 
383 #ifdef XDMF_BUILD_DSM_THREADS
384 
getBuffer()385 H5FDdsmBuffer * XdmfHDF5ControllerDSM::getBuffer()
386 {
387   return mDSMBuffer;
388 }
389 
getManager()390 H5FDdsmManager * XdmfHDF5ControllerDSM::getManager()
391 {
392   return mDSMManager;
393 }
394 
395 #endif
396 
getServerBuffer()397 XdmfDSMBuffer * XdmfHDF5ControllerDSM::getServerBuffer()
398 {
399   return mDSMServerBuffer;
400 }
401 
getServerManager()402 XdmfDSMManager * XdmfHDF5ControllerDSM::getServerManager()
403 {
404   return mDSMServerManager;
405 }
406 
getServerMode() const407 bool XdmfHDF5ControllerDSM::getServerMode() const
408 {
409   return mServerMode;
410 }
411 
getWorkerComm()412 MPI_Comm XdmfHDF5ControllerDSM::getWorkerComm()
413 {
414   MPI_Comm returnComm = MPI_COMM_NULL;
415   if (mWorkerComm != MPI_COMM_NULL) {
416     MPI_Comm_dup(mWorkerComm, &returnComm);
417   }
418   return returnComm;
419 }
420 
setManager(XdmfDSMManager * newManager)421 void XdmfHDF5ControllerDSM::setManager(XdmfDSMManager * newManager)
422 {
423   XdmfDSMBuffer * newBuffer = newManager->GetDsmBuffer();
424   mDSMServerManager = newManager;
425   mDSMServerBuffer = newBuffer;
426 }
427 
428 #ifdef XDMF_BUILD_DSM_THREADS
429 
setManager(H5FDdsmManager * newManager)430 void XdmfHDF5ControllerDSM::setManager(H5FDdsmManager * newManager)
431 {
432   H5FDdsmBuffer * newBuffer = newManager->GetDsmBuffer();
433   mDSMManager = newManager;
434   mDSMBuffer = newBuffer;
435 }
436 
437 #endif
438 
setBuffer(XdmfDSMBuffer * newBuffer)439 void XdmfHDF5ControllerDSM::setBuffer(XdmfDSMBuffer * newBuffer)
440 {
441   mDSMServerBuffer = newBuffer;
442 }
443 
444 #ifdef XDMF_BUILD_DSM_THREADS
445 
setBuffer(H5FDdsmBuffer * newBuffer)446 void XdmfHDF5ControllerDSM::setBuffer(H5FDdsmBuffer * newBuffer)
447 {
448   mDSMBuffer = newBuffer;
449 }
450 
451 #endif
452 
setServerMode(bool newMode)453 void XdmfHDF5ControllerDSM::setServerMode(bool newMode)
454 {
455   mServerMode = newMode;
456 }
457 
setWorkerComm(MPI_Comm comm)458 void XdmfHDF5ControllerDSM::setWorkerComm(MPI_Comm comm)
459 {
460   int status;
461 #ifndef OPEN_MPI
462   if (mWorkerComm != MPI_COMM_NULL) {
463     status = MPI_Comm_free(&mWorkerComm);
464     if (status != MPI_SUCCESS) {
465       XdmfError::message(XdmfError::FATAL, "Failed to disconnect Comm");
466     }
467   }
468 #endif
469   if (comm != MPI_COMM_NULL) {
470     status = MPI_Comm_dup(comm, &mWorkerComm);
471     if (status != MPI_SUCCESS) {
472       XdmfError::message(XdmfError::FATAL, "Failed to duplicate Comm");
473     }
474   }
475   mDSMServerBuffer->GetComm()->DupComm(comm);
476 }
477 
stopDSM()478 void XdmfHDF5ControllerDSM::stopDSM()
479 {
480   if (mServerMode) {
481     // Send manually
482     for (int i = mDSMServerBuffer->GetStartServerId();
483          i <= mDSMServerBuffer->GetEndServerId();
484          ++i) {
485       mDSMServerBuffer->SendCommandHeader(XDMF_DSM_OPCODE_DONE, i, 0, 0, XDMF_DSM_INTER_COMM);
486     }
487   }
488   else {
489     XdmfError::message(XdmfError::FATAL, "Error: Stopping DSM manually only available in server mode.");
490   }
491 }
492 
restartDSM()493 void XdmfHDF5ControllerDSM::restartDSM()
494 {
495   if (mServerMode) {
496     if (mDSMServerBuffer->GetComm()->GetInterId() >=
497           mDSMServerBuffer->GetStartServerId() &&
498         mDSMServerBuffer->GetComm()->GetInterId() <=
499           mDSMServerBuffer->GetEndServerId()) {
500       int returnOpCode;
501       mDSMServerBuffer->BufferServiceLoop(&returnOpCode);
502     }
503   }
504   else {
505     XdmfError::message(XdmfError::FATAL, "Error: Restarting DSM only available in server mode.");
506   }
507 }
508 
read(XdmfArray * const array)509 void XdmfHDF5ControllerDSM::read(XdmfArray * const array)
510 {
511   // Set file access property list for DSM
512   hid_t fapl = H5Pcreate(H5P_FILE_ACCESS);
513 
514   // Use DSM driver
515   if (mServerMode) {
516     if (mWorkerComm != MPI_COMM_NULL) {
517       XDMFH5Pset_fapl_dsm(fapl, mWorkerComm, mDSMServerBuffer, 0);
518     }
519   }
520   else {
521 #ifdef XDMF_BUILD_DSM_THREADS
522     H5Pset_fapl_dsm(fapl, MPI_COMM_WORLD, mDSMBuffer, 0);
523 #else
524     XdmfError::message(XdmfError::FATAL, "Error: Threaded DSM not enabled");
525 #endif
526   }
527 
528   // Read from DSM Buffer
529   XdmfHDF5Controller::read(array, fapl);
530 
531   // Close file access property list
532   H5Pclose(fapl);
533 }
534