1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  * Copyright by The HDF Group.                                               *
3  * Copyright by the Board of Trustees of the University of Illinois.         *
4  * All rights reserved.                                                      *
5  *                                                                           *
6  * This file is part of HDF5.  The full HDF5 copyright notice, including     *
7  * terms governing use, modification, and redistribution, is contained in    *
8  * the COPYING file, which can be found at the root of the source code       *
9  * distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases.  *
10  * If you do not have access to either file, you may request a copy from     *
11  * help@hdfgroup.org.                                                        *
12  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
13 
14 /*
15  * Programmer:  Robb Matzke <matzke@llnl.gov>
16  *              Thursday, July 29, 1999
17  *
18  * Purpose:    This is the MPI-2 I/O driver.
19  *
20  */
21 
22 #include "H5FDdrvr_module.h" /* This source code file is part of the H5FD driver module */
23 
24 
25 #include "H5private.h"        /* Generic Functions            */
26 #include "H5CXprivate.h"        /* API Contexts                         */
27 #include "H5Dprivate.h"        /* Dataset functions            */
28 #include "H5Eprivate.h"        /* Error handling              */
29 #include "H5Fprivate.h"        /* File access                */
30 #include "H5FDprivate.h"    /* File drivers                */
31 #include "H5FDmpi.h"            /* MPI-based file drivers        */
32 #include "H5Iprivate.h"        /* IDs                      */
33 #include "H5MMprivate.h"    /* Memory management            */
34 #include "H5Pprivate.h"         /* Property lists                       */
35 
36 #ifdef H5_HAVE_PARALLEL
37 
38 /*
39  * The driver identification number, initialized at runtime if H5_HAVE_PARALLEL
40  * is defined. This allows applications to still have the H5FD_MPIO
41  * "constants" in their source code.
42  */
43 static hid_t H5FD_MPIO_g = 0;
44 
45 /* Whether to allow collective I/O operations */
46 /* (Value can be set from environment variable also) */
47 hbool_t H5FD_mpi_opt_types_g = TRUE;
48 
49 /*
50  * The view is set to this value
51  */
52 static char H5FD_mpi_native_g[] = "native";
53 
54 /*
55  * The description of a file belonging to this driver.
56  * The EOF value is only used just after the file is opened in order for the
57  * library to determine whether the file is empty, truncated, or okay. The MPIO
58  * driver doesn't bother to keep it updated since it's an expensive operation.
59  */
60 typedef struct H5FD_mpio_t {
61     H5FD_t    pub;        /*public stuff, must be first        */
62     MPI_File    f;        /*MPIO file handle            */
63     MPI_Comm    comm;        /*communicator                */
64     MPI_Info    info;        /*file information            */
65     int         mpi_rank;       /* This process's rank                  */
66     int         mpi_size;       /* Total number of processes            */
67     haddr_t    eof;        /*end-of-file marker            */
68     haddr_t    eoa;        /*end-of-address marker            */
69     haddr_t    last_eoa;    /* Last known end-of-address marker    */
70     haddr_t    local_eof;    /* Local end-of-file address for each process */
71 } H5FD_mpio_t;
72 
73 /* Private Prototypes */
74 
75 /* Callbacks */
76 static herr_t H5FD_mpio_term(void);
77 static void *H5FD_mpio_fapl_get(H5FD_t *_file);
78 static void *H5FD_mpio_fapl_copy(const void *_old_fa);
79 static herr_t H5FD_mpio_fapl_free(void *_fa);
80 static H5FD_t *H5FD_mpio_open(const char *name, unsigned flags, hid_t fapl_id,
81                 haddr_t maxaddr);
82 static herr_t H5FD_mpio_close(H5FD_t *_file);
83 static herr_t H5FD_mpio_query(const H5FD_t *_f1, unsigned long *flags);
84 static haddr_t H5FD_mpio_get_eoa(const H5FD_t *_file, H5FD_mem_t type);
85 static herr_t H5FD_mpio_set_eoa(H5FD_t *_file, H5FD_mem_t type, haddr_t addr);
86 static haddr_t H5FD_mpio_get_eof(const H5FD_t *_file, H5FD_mem_t type);
87 static herr_t  H5FD_mpio_get_handle(H5FD_t *_file, hid_t fapl, void** file_handle);
88 static herr_t H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
89             size_t size, void *buf);
90 static herr_t H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
91             size_t size, const void *buf);
92 static herr_t H5FD_mpio_flush(H5FD_t *_file, hid_t dxpl_id, hbool_t closing);
93 static herr_t H5FD_mpio_truncate(H5FD_t *_file, hid_t dxpl_id, hbool_t closing);
94 static int H5FD_mpio_mpi_rank(const H5FD_t *_file);
95 static int H5FD_mpio_mpi_size(const H5FD_t *_file);
96 static MPI_Comm H5FD_mpio_communicator(const H5FD_t *_file);
97 static herr_t  H5FD_mpio_get_info(H5FD_t *_file, void** mpi_info);
98 
99 /* The MPIO file driver information */
100 static const H5FD_class_mpi_t H5FD_mpio_g = {
101     {   /* Start of superclass information */
102     "mpio",                    /*name            */
103     HADDR_MAX,                    /*maxaddr        */
104     H5F_CLOSE_SEMI,                /* fc_degree        */
105     H5FD_mpio_term,                             /*terminate             */
106     NULL,                    /*sb_size        */
107     NULL,                    /*sb_encode        */
108     NULL,                    /*sb_decode        */
109     sizeof(H5FD_mpio_fapl_t),            /*fapl_size        */
110     H5FD_mpio_fapl_get,                /*fapl_get        */
111     H5FD_mpio_fapl_copy,            /*fapl_copy        */
112     H5FD_mpio_fapl_free,             /*fapl_free        */
113     0,                                /*dxpl_size        */
114     NULL,                    /*dxpl_copy        */
115     NULL,                    /*dxpl_free        */
116     H5FD_mpio_open,                /*open            */
117     H5FD_mpio_close,                /*close            */
118     NULL,                    /*cmp            */
119     H5FD_mpio_query,                        /*query            */
120     NULL,                    /*get_type_map        */
121     NULL,                    /*alloc            */
122     NULL,                    /*free            */
123     H5FD_mpio_get_eoa,                /*get_eoa        */
124     H5FD_mpio_set_eoa,                 /*set_eoa        */
125     H5FD_mpio_get_eof,                /*get_eof        */
126     H5FD_mpio_get_handle,                       /*get_handle            */
127     H5FD_mpio_read,                /*read            */
128     H5FD_mpio_write,                /*write            */
129     H5FD_mpio_flush,                /*flush            */
130     H5FD_mpio_truncate,                /*truncate        */
131     NULL,                                       /*lock                  */
132     NULL,                                       /*unlock                */
133     H5FD_FLMAP_DICHOTOMY                        /*fl_map                */
134     },  /* End of superclass information */
135     H5FD_mpio_mpi_rank,                         /*get_rank              */
136     H5FD_mpio_mpi_size,                         /*get_size              */
137     H5FD_mpio_communicator,                     /*get_comm              */
138     H5FD_mpio_get_info                          /*get_info              */
139 };
140 
141 #ifdef H5FDmpio_DEBUG
142 /* Flags to control debug actions in H5Fmpio.
143  * Meant to be indexed by characters.
144  *
145  * 'c' show result of MPI_Get_count after read
146  * 'r' show read offset and size
147  * 't' trace function entry and exit
148  * 'w' show write offset and size
149  */
150 static int H5FD_mpio_Debug[256] =
151         { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
152           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
153           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
154           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
155           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
156           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
157           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
158           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
159 #endif
160 
161 
162 /*--------------------------------------------------------------------------
163 NAME
164    H5FD__init_package -- Initialize interface-specific information
165 USAGE
166     herr_t H5FD__init_package()
167 RETURNS
168     Non-negative on success/Negative on failure
169 DESCRIPTION
170     Initializes any interface-specific data or routines.  (Just calls
171     H5FD_mpio_init currently).
172 
173 --------------------------------------------------------------------------*/
174 static herr_t
H5FD__init_package(void)175 H5FD__init_package(void)
176 {
177     herr_t ret_value = SUCCEED;
178 
179     FUNC_ENTER_STATIC
180 
181     if(H5FD_mpio_init() < 0)
182         HGOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "unable to initialize mpio VFD")
183 
184 done:
185     FUNC_LEAVE_NOAPI(ret_value)
186 } /* H5FD__init_package() */
187 
188 
189 /*-------------------------------------------------------------------------
190  * Function:    H5FD_mpio_init
191  *
192  * Purpose:    Initialize this driver by registering the driver with the
193  *        library.
194  *
195  * Return:    Success:    The driver ID for the mpio driver.
196  *        Failure:    Negative.
197  *
198  * Programmer:    Robb Matzke
199  *              Thursday, August 5, 1999
200  *
201  *-------------------------------------------------------------------------
202  */
203 hid_t
H5FD_mpio_init(void)204 H5FD_mpio_init(void)
205 {
206 #ifdef H5FDmpio_DEBUG
207     static int H5FD_mpio_Debug_inited = 0;
208 #endif /* H5FDmpio_DEBUG */
209     const char *s;              /* String for environment variables */
210     hid_t ret_value;            /* Return value */
211 
212     FUNC_ENTER_NOAPI(FAIL)
213 
214     /* Register the MPI-IO VFD, if it isn't already */
215     if(H5I_VFL != H5I_get_type(H5FD_MPIO_g))
216         H5FD_MPIO_g = H5FD_register((const H5FD_class_t *)&H5FD_mpio_g, sizeof(H5FD_class_mpi_t), FALSE);
217 
218     /* Allow MPI buf-and-file-type optimizations? */
219     s = HDgetenv("HDF5_MPI_OPT_TYPES");
220     if(s && HDisdigit(*s))
221         H5FD_mpi_opt_types_g = (hbool_t)HDstrtol(s, NULL, 0);
222 
223 #ifdef H5FDmpio_DEBUG
224     if(!H5FD_mpio_Debug_inited) {
225         /* Retrieve MPI-IO debugging environment variable */
226         s = HDgetenv("H5FD_mpio_Debug");
227         if(s) {
228             /* Set debug mask */
229         while(*s) {
230         H5FD_mpio_Debug[(int)*s]++;
231         s++;
232         } /* end while */
233         } /* end if */
234     H5FD_mpio_Debug_inited++;
235     } /* end if */
236 #endif /* H5FDmpio_DEBUG */
237 
238     /* Set return value */
239     ret_value = H5FD_MPIO_g;
240 
241 done:
242     FUNC_LEAVE_NOAPI(ret_value)
243 } /* end H5FD_mpio_init() */
244 
245 
246 /*---------------------------------------------------------------------------
247  * Function:    H5FD_mpio_term
248  *
249  * Purpose:    Shut down the VFD
250  *
251  * Returns:     Non-negative on success or negative on failure
252  *
253  * Programmer:  Quincey Koziol
254  *              Friday, Jan 30, 2004
255  *
256  *---------------------------------------------------------------------------
257  */
258 static herr_t
H5FD_mpio_term(void)259 H5FD_mpio_term(void)
260 {
261     FUNC_ENTER_NOAPI_NOINIT_NOERR
262 
263     /* Reset VFL ID */
264     H5FD_MPIO_g=0;
265 
266     FUNC_LEAVE_NOAPI(SUCCEED)
267 } /* end H5FD_mpio_term() */
268 
269 
270 /*-------------------------------------------------------------------------
271  * Function:    H5Pset_fapl_mpio
272  *
273  * Purpose:    Store the user supplied MPIO communicator comm and info in
274  *        the file access property list FAPL_ID which can then be used
275  *        to create and/or open the file.  This function is available
276  *        only in the parallel HDF5 library and is not collective.
277  *
278  *        comm is the MPI communicator to be used for file open as
279  *        defined in MPI_FILE_OPEN of MPI-2. This function makes a
280  *        duplicate of comm. Any modification to comm after this function
281  *        call returns has no effect on the access property list.
282  *
283  *        info is the MPI Info object to be used for file open as
284  *        defined in MPI_FILE_OPEN of MPI-2. This function makes a
285  *        duplicate of info. Any modification to info after this
286  *        function call returns has no effect on the access property
287  *        list.
288  *
289  *              If fapl_id has previously set comm and info values, they
290  *              will be replaced and the old communicator and Info object
291  *              are freed.
292  *
293  * Return:    Success:    Non-negative
294  *
295  *         Failure:    Negative
296  *
297  * Programmer:    Albert Cheng
298  *        Feb 3, 1998
299  *
300  *-------------------------------------------------------------------------
301  */
302 herr_t
H5Pset_fapl_mpio(hid_t fapl_id,MPI_Comm comm,MPI_Info info)303 H5Pset_fapl_mpio(hid_t fapl_id, MPI_Comm comm, MPI_Info info)
304 {
305     H5FD_mpio_fapl_t    fa;
306     H5P_genplist_t *plist;      /* Property list pointer */
307     herr_t ret_value;
308 
309     FUNC_ENTER_API(FAIL)
310     H5TRACE3("e", "iMcMi", fapl_id, comm, info);
311 
312     if(fapl_id == H5P_DEFAULT)
313         HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "can't set values in default property list")
314 
315     /* Check arguments */
316     if(NULL == (plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS)))
317         HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a file access list")
318     if(MPI_COMM_NULL == comm)
319     HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a valid communicator")
320 
321     /* Initialize driver specific properties */
322     fa.comm = comm;
323     fa.info = info;
324 
325     /* duplication is done during driver setting. */
326     ret_value = H5P_set_driver(plist, H5FD_MPIO, &fa);
327 
328 done:
329     FUNC_LEAVE_API(ret_value)
330 } /* H5Pset_fapl_mpio() */
331 
332 
333 /*-------------------------------------------------------------------------
334  * Function:    H5Pget_fapl_mpio
335  *
336  * Purpose:    If the file access property list is set to the H5FD_MPIO
337  *        driver then this function returns duplicates of the MPI
338  *        communicator and Info object stored through the comm and
339  *        info pointers.  It is the responsibility of the application
340  *        to free the returned communicator and Info object.
341  *
342  * Return:    Success:    Non-negative with the communicator and
343  *                Info object returned through the comm and
344  *                info arguments if non-null. Since they are
345  *                duplicates of the stored objects, future
346  *                modifications to the access property list do
347  *                not affect them and it is the responsibility
348  *                of the application to free them.
349  *
350  *         Failure:    Negative
351  *
352  * Programmer:    Robb Matzke
353  *        Thursday, February 26, 1998
354  *
355  *-------------------------------------------------------------------------
356  */
357 herr_t
H5Pget_fapl_mpio(hid_t fapl_id,MPI_Comm * comm,MPI_Info * info)358 H5Pget_fapl_mpio(hid_t fapl_id, MPI_Comm *comm/*out*/, MPI_Info *info/*out*/)
359 {
360     H5P_genplist_t *plist;      /* Property list pointer */
361     const H5FD_mpio_fapl_t *fa;       /* MPIO fapl info */
362     MPI_Comm    comm_tmp = MPI_COMM_NULL;
363     hbool_t     comm_copied = FALSE;    /* MPI Comm has been duplicated */
364     int        mpi_code;        /* MPI return code */
365     herr_t      ret_value = SUCCEED;    /* Return value */
366 
367     FUNC_ENTER_API(FAIL)
368     H5TRACE3("e", "ixx", fapl_id, comm, info);
369 
370     if(NULL == (plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS)))
371         HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a file access list")
372     if(H5FD_MPIO != H5P_peek_driver(plist))
373         HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "incorrect VFL driver")
374     if(NULL == (fa = (const H5FD_mpio_fapl_t *)H5P_peek_driver_info(plist)))
375         HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "bad VFL driver info")
376 
377     /* Store the duplicated communicator in a temporary variable for error */
378     /* recovery in case the INFO duplication fails. */
379     if(comm) {
380     if(MPI_SUCCESS != (mpi_code = MPI_Comm_dup(fa->comm, &comm_tmp)))
381         HMPI_GOTO_ERROR(FAIL, "MPI_Comm_dup failed", mpi_code)
382         comm_copied = TRUE;
383     } /* end if */
384 
385     if(info) {
386     if(MPI_INFO_NULL != fa->info) {
387         if(MPI_SUCCESS != (mpi_code = MPI_Info_dup(fa->info, info)))
388         HMPI_GOTO_ERROR(FAIL, "MPI_Info_dup failed", mpi_code)
389     } /* end if */
390         else
391         /* do not dup it */
392         *info = MPI_INFO_NULL;
393     } /* end if */
394 
395     /* Store the copied communicator, now that the Info object has been
396      *  successfully copied.
397      */
398     if(comm)
399         *comm = comm_tmp;
400 
401 done:
402     if(ret_value < 0)
403     /* need to free anything created here */
404     if(comm_copied)
405         MPI_Comm_free(&comm_tmp);
406 
407     FUNC_LEAVE_API(ret_value)
408 } /* end H5Pget_fapl_mpio() */
409 
410 
411 /*-------------------------------------------------------------------------
412  * Function:    H5Pset_dxpl_mpio
413  *
414  * Purpose:    Set the data transfer property list DXPL_ID to use transfer
415  *        mode XFER_MODE. The property list can then be used to control
416  *        the I/O transfer mode during data I/O operations. The valid
417  *        transfer modes are:
418  *
419  *         H5FD_MPIO_INDEPENDENT:
420  *            Use independent I/O access (the default).
421  *
422  *         H5FD_MPIO_COLLECTIVE:
423  *            Use collective I/O access.
424  *
425  * Return:    Success:    Non-negative
426  *         Failure:    Negative
427  *
428  * Programmer:    Albert Cheng
429  *        April 2, 1998
430  *
431  *-------------------------------------------------------------------------
432  */
433 herr_t
H5Pset_dxpl_mpio(hid_t dxpl_id,H5FD_mpio_xfer_t xfer_mode)434 H5Pset_dxpl_mpio(hid_t dxpl_id, H5FD_mpio_xfer_t xfer_mode)
435 {
436     H5P_genplist_t *plist;      /* Property list pointer */
437     herr_t ret_value = SUCCEED; /* Return value */
438 
439     FUNC_ENTER_API(FAIL)
440     H5TRACE2("e", "iDt", dxpl_id, xfer_mode);
441 
442     if(dxpl_id == H5P_DEFAULT)
443         HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "can't set values in default property list")
444 
445     /* Check arguments */
446     if(NULL == (plist = H5P_object_verify(dxpl_id, H5P_DATASET_XFER)))
447         HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dxpl")
448     if(H5FD_MPIO_INDEPENDENT != xfer_mode && H5FD_MPIO_COLLECTIVE != xfer_mode)
449         HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "incorrect xfer_mode")
450 
451     /* Set the transfer mode */
452     if(H5P_set(plist, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode) < 0)
453         HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "unable to set value")
454 
455 done:
456     FUNC_LEAVE_API(ret_value)
457 } /* end H5Pset_dxpl_mpio() */
458 
459 
460 /*-------------------------------------------------------------------------
461  * Function:    H5Pget_dxpl_mpio
462  *
463  * Purpose:    Queries the transfer mode current set in the data transfer
464  *        property list DXPL_ID. This is not collective.
465  *
466  * Return:    Success:    Non-negative, with the transfer mode returned
467  *                through the XFER_MODE argument if it is
468  *                non-null.
469  *
470  *         Failure:    Negative
471  *
472  * Programmer:    Albert Cheng
473  *        April 2, 1998
474  *
475  *-------------------------------------------------------------------------
476  */
477 herr_t
H5Pget_dxpl_mpio(hid_t dxpl_id,H5FD_mpio_xfer_t * xfer_mode)478 H5Pget_dxpl_mpio(hid_t dxpl_id, H5FD_mpio_xfer_t *xfer_mode/*out*/)
479 {
480     H5P_genplist_t *plist;              /* Property list pointer */
481     herr_t      ret_value = SUCCEED;    /* Return value */
482 
483     FUNC_ENTER_API(FAIL)
484     H5TRACE2("e", "ix", dxpl_id, xfer_mode);
485 
486     if(NULL == (plist = H5P_object_verify(dxpl_id, H5P_DATASET_XFER)))
487         HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dxpl")
488 
489     /* Get the transfer mode */
490     if(xfer_mode)
491         if(H5P_get(plist, H5D_XFER_IO_XFER_MODE_NAME, xfer_mode) < 0)
492             HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "unable to get value")
493 
494 done:
495     FUNC_LEAVE_API(ret_value)
496 } /* end H5Pget_dxpl_mpio() */
497 
498 
499 /*-------------------------------------------------------------------------
500  * Function:    H5Pset_dxpl_mpio_collective_opt
501  *
502  * Purpose:    To set a flag to choose linked chunk I/O or multi-chunk I/O
503  *        without involving decision-making inside HDF5
504  *
505  * Note:    The library will do linked chunk I/O or multi-chunk I/O without
506  *        involving communications for decision-making process.
507  *        The library won't behave as it asks for only when we find
508  *        that the low-level MPI-IO package doesn't support this.
509  *
510  * Return:    Success:    Non-negative
511  *         Failure:    Negative
512  *
513  * Programmer:    Kent Yang
514  *        ? ?, ?
515  *
516  *-------------------------------------------------------------------------
517  */
518 herr_t
H5Pset_dxpl_mpio_collective_opt(hid_t dxpl_id,H5FD_mpio_collective_opt_t opt_mode)519 H5Pset_dxpl_mpio_collective_opt(hid_t dxpl_id, H5FD_mpio_collective_opt_t opt_mode)
520 {
521     H5P_genplist_t *plist;      /* Property list pointer */
522     herr_t ret_value = SUCCEED; /* Return value */
523 
524     FUNC_ENTER_API(FAIL)
525     H5TRACE2("e", "iDc", dxpl_id, opt_mode);
526 
527     if(dxpl_id == H5P_DEFAULT)
528         HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "can't set values in default property list")
529 
530     /* Check arguments */
531     if(NULL == (plist = H5P_object_verify(dxpl_id, H5P_DATASET_XFER)))
532         HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dxpl")
533 
534     /* Set the transfer mode */
535     if(H5P_set(plist, H5D_XFER_MPIO_COLLECTIVE_OPT_NAME, &opt_mode) < 0)
536         HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "unable to set value")
537 
538 done:
539     FUNC_LEAVE_API(ret_value)
540 } /* end H5Pset_dxpl_mpio_collective_opt() */
541 
542 
543 /*-------------------------------------------------------------------------
544  * Function:    H5Pset_dxpl_mpio_chunk_opt
545  *
546  * Purpose:    To set a flag to choose linked chunk I/O or multi-chunk I/O
547  *        without involving decision-making inside HDF5
548  *
549  * Note:    The library will do linked chunk I/O or multi-chunk I/O without
550  *        involving communications for decision-making process.
551  *        The library won't behave as it asks for only when we find
552  *        that the low-level MPI-IO package doesn't support this.
553  *
554  * Return:    Success:    Non-negative
555  *         Failure:    Negative
556  *
557  * Programmer:    Kent Yang
558  *        ? ?, ?
559  *
560  *-------------------------------------------------------------------------
561  */
562 herr_t
H5Pset_dxpl_mpio_chunk_opt(hid_t dxpl_id,H5FD_mpio_chunk_opt_t opt_mode)563 H5Pset_dxpl_mpio_chunk_opt(hid_t dxpl_id, H5FD_mpio_chunk_opt_t opt_mode)
564 {
565     H5P_genplist_t *plist;      /* Property list pointer */
566     herr_t ret_value = SUCCEED; /* Return value */
567 
568     FUNC_ENTER_API(FAIL)
569     H5TRACE2("e", "iDh", dxpl_id, opt_mode);
570 
571     if(dxpl_id == H5P_DEFAULT)
572         HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "can't set values in default property list")
573 
574     /* Check arguments */
575     if(NULL == (plist = H5P_object_verify(dxpl_id, H5P_DATASET_XFER)))
576         HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dxpl")
577 
578     /* Set the transfer mode */
579     if(H5P_set(plist, H5D_XFER_MPIO_CHUNK_OPT_HARD_NAME, &opt_mode) < 0)
580         HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "unable to set value")
581 
582 done:
583     FUNC_LEAVE_API(ret_value)
584 } /* end H5Pset_dxpl_mpio_chunk_opt() */
585 
586 
587 /*-------------------------------------------------------------------------
588  * Function:    H5Pset_dxpl_mpio_chunk_opt_num
589  *
590  * Purpose:    To set a threshold for doing linked chunk IO
591  *
592  * Note:    If the number is greater than the threshold set by the user,
593  *        the library will do linked chunk I/O; otherwise, I/O will be
594  *        done for every chunk.
595  *
596  * Return:    Success:    Non-negative
597  *         Failure:    Negative
598  *
599  * Programmer:    Kent Yang
600  *        ? ?, ?
601  *
602  *-------------------------------------------------------------------------
603  */
604 herr_t
H5Pset_dxpl_mpio_chunk_opt_num(hid_t dxpl_id,unsigned num_chunk_per_proc)605 H5Pset_dxpl_mpio_chunk_opt_num(hid_t dxpl_id, unsigned num_chunk_per_proc)
606 {
607     H5P_genplist_t *plist;      /* Property list pointer */
608     herr_t ret_value = SUCCEED; /* Return value */
609 
610     FUNC_ENTER_API(FAIL)
611     H5TRACE2("e", "iIu", dxpl_id, num_chunk_per_proc);
612 
613     if(dxpl_id == H5P_DEFAULT)
614         HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "can't set values in default property list")
615 
616     /* Check arguments */
617     if(NULL == (plist = H5P_object_verify(dxpl_id, H5P_DATASET_XFER)))
618         HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dxpl")
619 
620     /* Set the transfer mode */
621     if(H5P_set(plist, H5D_XFER_MPIO_CHUNK_OPT_NUM_NAME, &num_chunk_per_proc) < 0)
622         HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "unable to set value")
623 
624 done:
625     FUNC_LEAVE_API(ret_value)
626 } /* end H5Pset_dxpl_mpio_chunk_opt_num() */
627 
628 
629 /*-------------------------------------------------------------------------
630  * Function:    H5Pset_dxpl_mpio_chunk_opt_ratio
631  *
632  * Purpose:    To set a threshold for doing collective I/O for each chunk
633  *
634  * Note:    The library will calculate the percentage of the number of
635  *        process holding selections at each chunk. If that percentage
636  *        of number of process in the individual chunk is greater than
637  *        the threshold set by the user, the library will do collective
638  *        chunk I/O for this chunk; otherwise, independent I/O will be
639  *        done for this chunk.
640  *
641  * Return:    Success:    Non-negative
642  *         Failure:    Negative
643  *
644  * Programmer:    Kent Yang
645  *        ? ?, ?
646  *
647  *-------------------------------------------------------------------------
648  */
649 herr_t
H5Pset_dxpl_mpio_chunk_opt_ratio(hid_t dxpl_id,unsigned percent_num_proc_per_chunk)650 H5Pset_dxpl_mpio_chunk_opt_ratio(hid_t dxpl_id, unsigned percent_num_proc_per_chunk)
651 {
652     H5P_genplist_t *plist;      /* Property list pointer */
653     herr_t ret_value = SUCCEED; /* Return value */
654 
655     FUNC_ENTER_API(FAIL)
656     H5TRACE2("e", "iIu", dxpl_id, percent_num_proc_per_chunk);
657 
658     if(dxpl_id == H5P_DEFAULT)
659         HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "can't set values in default property list")
660 
661     /* Check arguments */
662     if(NULL == (plist = H5P_object_verify(dxpl_id, H5P_DATASET_XFER)))
663         HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dxpl")
664 
665     /* Set the transfer mode */
666     if(H5P_set(plist, H5D_XFER_MPIO_CHUNK_OPT_RATIO_NAME, &percent_num_proc_per_chunk) < 0)
667         HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "unable to set value")
668 
669 done:
670     FUNC_LEAVE_API(ret_value)
671 } /* end H5Pset_dxpl_mpio_chunk_opt_ratio() */
672 
673 
674 /*-------------------------------------------------------------------------
675  * Function:    H5FD_mpio_fapl_get
676  *
677  * Purpose:    Returns a file access property list which could be used to
678  *        create another file the same as this one.
679  *
680  * Return:    Success:    Ptr to new file access property list with all
681  *                fields copied from the file pointer.
682  *
683  *        Failure:    NULL
684  *
685  * Programmer:    Robb Matzke
686  *              Friday, August 13, 1999
687  *
688  *-------------------------------------------------------------------------
689  */
690 static void *
H5FD_mpio_fapl_get(H5FD_t * _file)691 H5FD_mpio_fapl_get(H5FD_t *_file)
692 {
693     H5FD_mpio_t        *file = (H5FD_mpio_t*)_file;
694     H5FD_mpio_fapl_t    *fa = NULL;
695     void      *ret_value;       /* Return value */
696 
697     FUNC_ENTER_NOAPI_NOINIT
698 
699     HDassert(file);
700     HDassert(H5FD_MPIO == file->pub.driver_id);
701 
702     if(NULL == (fa = (H5FD_mpio_fapl_t *)H5MM_calloc(sizeof(H5FD_mpio_fapl_t))))
703         HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed")
704 
705     /* Duplicate communicator and Info object. */
706     if(FAIL == H5FD_mpi_comm_info_dup(file->comm, file->info, &fa->comm, &fa->info))
707     HGOTO_ERROR(H5E_INTERNAL, H5E_CANTCOPY, NULL, "Communicator/Info duplicate failed")
708 
709     /* Set return value */
710     ret_value = fa;
711 
712 done:
713     FUNC_LEAVE_NOAPI(ret_value)
714 }
715 
716 
717 /*-------------------------------------------------------------------------
718  * Function:    H5FD_mpio_fapl_copy
719  *
720  * Purpose:    Copies the mpio-specific file access properties.
721  *
722  * Return:    Success:    Ptr to a new property list
723  *
724  *        Failure:    NULL
725  *
726  * Programmer:    Albert Cheng
727  *              Jan  8, 2003
728  *
729  *-------------------------------------------------------------------------
730  */
731 static void *
H5FD_mpio_fapl_copy(const void * _old_fa)732 H5FD_mpio_fapl_copy(const void *_old_fa)
733 {
734     void        *ret_value = NULL;
735     const H5FD_mpio_fapl_t *old_fa = (const H5FD_mpio_fapl_t*)_old_fa;
736     H5FD_mpio_fapl_t    *new_fa = NULL;
737 
738     FUNC_ENTER_NOAPI_NOINIT
739 #ifdef H5FDmpio_DEBUG
740 if (H5FD_mpio_Debug[(int)'t'])
741 HDfprintf(stderr, "enter H5FD_mpio_fapl_copy\n");
742 #endif
743 
744     if(NULL == (new_fa = (H5FD_mpio_fapl_t *)H5MM_malloc(sizeof(H5FD_mpio_fapl_t))))
745         HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed")
746 
747     /* Copy the general information */
748     HDmemcpy(new_fa, old_fa, sizeof(H5FD_mpio_fapl_t));
749 
750     /* Duplicate communicator and Info object. */
751     if(FAIL == H5FD_mpi_comm_info_dup(old_fa->comm, old_fa->info, &new_fa->comm, &new_fa->info))
752     HGOTO_ERROR(H5E_INTERNAL, H5E_CANTCOPY, NULL, "Communicator/Info duplicate failed")
753     ret_value = new_fa;
754 
755 done:
756     if (NULL == ret_value){
757     /* cleanup */
758     if (new_fa)
759         H5MM_xfree(new_fa);
760     }
761 
762 #ifdef H5FDmpio_DEBUG
763 if (H5FD_mpio_Debug[(int)'t'])
764 HDfprintf(stderr, "leaving H5FD_mpio_fapl_copy\n");
765 #endif
766     FUNC_LEAVE_NOAPI(ret_value)
767 } /* end H5FD_mpio_fapl_copy() */
768 
769 
770 /*-------------------------------------------------------------------------
771  * Function:    H5FD_mpio_fapl_free
772  *
773  * Purpose:    Frees the mpio-specific file access properties.
774  *
775  * Return:    Success:    0
776  *
777  *        Failure:    -1
778  *
779  * Programmer:    Albert Cheng
780  *              Jan  8, 2003
781  *
782  * Modifications:
783  *
784  *-------------------------------------------------------------------------
785  */
786 static herr_t
H5FD_mpio_fapl_free(void * _fa)787 H5FD_mpio_fapl_free(void *_fa)
788 {
789     herr_t        ret_value = SUCCEED;
790     H5FD_mpio_fapl_t    *fa = (H5FD_mpio_fapl_t*)_fa;
791 
792     FUNC_ENTER_NOAPI_NOINIT_NOERR
793 #ifdef H5FDmpio_DEBUG
794 if (H5FD_mpio_Debug[(int)'t'])
795 HDfprintf(stderr, "in H5FD_mpio_fapl_free\n");
796 #endif
797     HDassert(fa);
798 
799     /* Free the internal communicator and INFO object */
800     HDassert(MPI_COMM_NULL!=fa->comm);
801     H5FD_mpi_comm_info_free(&fa->comm, &fa->info);
802     H5MM_xfree(fa);
803 
804 #ifdef H5FDmpio_DEBUG
805 if (H5FD_mpio_Debug[(int)'t'])
806 HDfprintf(stderr, "leaving H5FD_mpio_fapl_free\n");
807 #endif
808     FUNC_LEAVE_NOAPI(ret_value)
809 } /* end H5FD_mpio_fapl_free() */
810 
811 
812 /*-------------------------------------------------------------------------
813  * Function:    H5FD_set_mpio_atomicity
814  *
815  * Purpose:    Sets the atomicity mode
816  *
817  * Return:    Success:    Non-negative
818  *
819  *         Failure:    Negative
820  *
821  * Programmer:    Mohamad Chaarawi
822  *        Feb 14, 2012
823  *
824  *-------------------------------------------------------------------------
825  */
826 herr_t
H5FD_set_mpio_atomicity(H5FD_t * _file,hbool_t flag)827 H5FD_set_mpio_atomicity(H5FD_t *_file, hbool_t flag)
828 {
829     H5FD_mpio_t *file = (H5FD_mpio_t*)_file;
830     int          mpi_code;               /* MPI return code */
831     int          temp_flag;
832     herr_t       ret_value = SUCCEED;
833 
834     FUNC_ENTER_NOAPI_NOINIT
835 
836 #ifdef H5FDmpio_DEBUG
837     if (H5FD_mpio_Debug[(int)'t'])
838         HDfprintf(stdout, "Entering H5FD_set_mpio_atomicity\n");
839 #endif
840 
841     if (FALSE == flag)
842         temp_flag = 0;
843     else
844         temp_flag = 1;
845 
846     /* set atomicity value */
847     if (MPI_SUCCESS != (mpi_code=MPI_File_set_atomicity(file->f, temp_flag)))
848         HMPI_GOTO_ERROR(FAIL, "MPI_File_set_atomicity", mpi_code)
849 
850 done:
851 #ifdef H5FDmpio_DEBUG
852     if (H5FD_mpio_Debug[(int)'t'])
853         HDfprintf(stdout, "Leaving H5FD_set_mpio_atomicity\n");
854 #endif
855     FUNC_LEAVE_NOAPI(ret_value)
856 }
857 
858 
859 /*-------------------------------------------------------------------------
860  * Function:    H5FD_get_mpio_atomicity
861  *
862  * Purpose:    Returns the atomicity mode
863  *
864  * Return:    Success:    Non-negative
865  *
866  *         Failure:    Negative
867  *
868  * Programmer:    Mohamad Chaarawi
869  *        Feb 14, 2012
870  *
871  *-------------------------------------------------------------------------
872  */
873 herr_t
H5FD_get_mpio_atomicity(H5FD_t * _file,hbool_t * flag)874 H5FD_get_mpio_atomicity(H5FD_t *_file, hbool_t *flag)
875 {
876     H5FD_mpio_t *file = (H5FD_mpio_t*)_file;
877     int          mpi_code;               /* MPI return code */
878     int          temp_flag;
879     herr_t       ret_value = SUCCEED;
880 
881     FUNC_ENTER_NOAPI_NOINIT
882 
883 #ifdef H5FDmpio_DEBUG
884     if (H5FD_mpio_Debug[(int)'t'])
885         HDfprintf(stdout, "Entering H5FD_get_mpio_atomicity\n");
886 #endif
887 
888     /* get atomicity value */
889     if (MPI_SUCCESS != (mpi_code=MPI_File_get_atomicity(file->f, &temp_flag)))
890         HMPI_GOTO_ERROR(FAIL, "MPI_File_get_atomicity", mpi_code)
891 
892     if (0 != temp_flag)
893         *flag = TRUE;
894     else
895         *flag = FALSE;
896 
897 done:
898 #ifdef H5FDmpio_DEBUG
899     if (H5FD_mpio_Debug[(int)'t'])
900         HDfprintf(stdout, "Leaving H5FD_get_mpio_atomicity\n");
901 #endif
902     FUNC_LEAVE_NOAPI(ret_value)
903 }
904 
905 
906 /*-------------------------------------------------------------------------
907  * Function:    H5FD_mpio_open
908  *
909  * Purpose:     Opens a file with name NAME.  The FLAGS are a bit field with
910  *        purpose similar to the second argument of open(2) and which
911  *        are defined in H5Fpublic.h. The file access property list
912  *        FAPL_ID contains the properties driver properties and MAXADDR
913  *        is the largest address which this file will be expected to
914  *        access.  This is collective.
915  *
916  * Return:      Success:        A new file pointer.
917  *
918  *              Failure:        NULL
919  *
920  * Programmer:
921  *              January 30, 1998
922  *
923  *-------------------------------------------------------------------------
924  */
925 static H5FD_t *
H5FD_mpio_open(const char * name,unsigned flags,hid_t fapl_id,haddr_t H5_ATTR_UNUSED maxaddr)926 H5FD_mpio_open(const char *name, unsigned flags, hid_t fapl_id,
927         haddr_t H5_ATTR_UNUSED maxaddr)
928 {
929     H5FD_mpio_t            *file=NULL;
930     MPI_File            fh;
931     unsigned                    file_opened=0;  /* Flag to indicate that the file was successfully opened */
932     int                mpi_amode;
933     int                mpi_rank;       /* MPI rank of this process */
934     int                mpi_size;       /* Total number of MPI processes */
935     int                mpi_code;    /* mpi return code */
936     MPI_Offset            size;
937     const H5FD_mpio_fapl_t    *fa = NULL;
938     H5FD_mpio_fapl_t        _fa;
939     H5P_genplist_t *plist;      /* Property list pointer */
940     MPI_Comm                    comm_dup = MPI_COMM_NULL;
941     MPI_Info                    info_dup = MPI_INFO_NULL;
942     H5FD_t            *ret_value;     /* Return value */
943 
944     FUNC_ENTER_NOAPI_NOINIT
945 
946 #ifdef H5FDmpio_DEBUG
947     if (H5FD_mpio_Debug[(int)'t']) {
948         HDfprintf(stdout, "Entering H5FD_mpio_open(name=\"%s\", flags=0x%x, "
949         "fapl_id=%d, maxaddr=%lu)\n", name, flags, (int)fapl_id, (unsigned long)maxaddr);
950     }
951 #endif
952 
953     /* Obtain a pointer to mpio-specific file access properties */
954     if(NULL == (plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS)))
955         HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, NULL, "not a file access property list")
956     if(H5P_FILE_ACCESS_DEFAULT == fapl_id || H5FD_MPIO != H5P_peek_driver(plist)) {
957     _fa.comm = MPI_COMM_SELF; /*default*/
958     _fa.info = MPI_INFO_NULL; /*default*/
959     fa = &_fa;
960     } /* end if */
961     else {
962         if(NULL == (fa = (const H5FD_mpio_fapl_t *)H5P_peek_driver_info(plist)))
963         HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, NULL, "bad VFL driver info")
964     } /* end else */
965 
966     /* Duplicate communicator and Info object for use by this file. */
967     if(FAIL == H5FD_mpi_comm_info_dup(fa->comm, fa->info, &comm_dup, &info_dup))
968     HGOTO_ERROR(H5E_INTERNAL, H5E_CANTCOPY, NULL, "Communicator/Info duplicate failed")
969 
970     /* convert HDF5 flags to MPI-IO flags */
971     /* some combinations are illegal; let MPI-IO figure it out */
972     mpi_amode  = (flags & H5F_ACC_RDWR) ? MPI_MODE_RDWR : MPI_MODE_RDONLY;
973     if(flags & H5F_ACC_CREAT)
974         mpi_amode |= MPI_MODE_CREATE;
975     if(flags & H5F_ACC_EXCL)
976         mpi_amode |= MPI_MODE_EXCL;
977 
978 #ifdef H5FDmpio_DEBUG
979     /* Check for debug commands in the info parameter */
980     {
981         if(MPI_INFO_NULL != info_dup) {
982             char debug_str[128];
983             int flag;
984 
985             MPI_Info_get(fa->info, H5F_MPIO_DEBUG_KEY, sizeof(debug_str) - 1, debug_str, &flag);
986             if(flag) {
987                 int i;
988 
989                 HDfprintf(stdout, "H5FD_mpio debug flags = '%s'\n", debug_str);
990                 for(i = 0; debug_str[i]/*end of string*/ && i < 128/*just in case*/; ++i)
991                     H5FD_mpio_Debug[(int)debug_str[i]] = 1;
992             }
993         }
994     }
995 #endif
996 
997     if(MPI_SUCCESS != (mpi_code = MPI_File_open(comm_dup, name, mpi_amode, info_dup, &fh)))
998         HMPI_GOTO_ERROR(NULL, "MPI_File_open failed", mpi_code)
999     file_opened=1;
1000 
1001     /* Get the MPI rank of this process and the total number of processes */
1002     if (MPI_SUCCESS != (mpi_code=MPI_Comm_rank (comm_dup, &mpi_rank)))
1003         HMPI_GOTO_ERROR(NULL, "MPI_Comm_rank failed", mpi_code)
1004     if (MPI_SUCCESS != (mpi_code=MPI_Comm_size (comm_dup, &mpi_size)))
1005         HMPI_GOTO_ERROR(NULL, "MPI_Comm_size failed", mpi_code)
1006 
1007     /* Build the return value and initialize it */
1008     if(NULL == (file = (H5FD_mpio_t *)H5MM_calloc(sizeof(H5FD_mpio_t))))
1009         HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed")
1010     file->f = fh;
1011     file->comm = comm_dup;
1012     file->info = info_dup;
1013     file->mpi_rank = mpi_rank;
1014     file->mpi_size = mpi_size;
1015 
1016     /* Only processor p0 will get the filesize and broadcast it. */
1017     if (mpi_rank == 0) {
1018         if (MPI_SUCCESS != (mpi_code=MPI_File_get_size(fh, &size)))
1019             HMPI_GOTO_ERROR(NULL, "MPI_File_get_size failed", mpi_code)
1020     } /* end if */
1021 
1022     /* Broadcast file size */
1023     if(MPI_SUCCESS != (mpi_code = MPI_Bcast(&size, (int)sizeof(MPI_Offset), MPI_BYTE, 0, comm_dup)))
1024         HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code)
1025 
1026     /* Determine if the file should be truncated */
1027     if(size && (flags & H5F_ACC_TRUNC)) {
1028         if (MPI_SUCCESS != (mpi_code=MPI_File_set_size(fh, (MPI_Offset)0)))
1029             HMPI_GOTO_ERROR(NULL, "MPI_File_set_size failed", mpi_code)
1030 
1031         /* Don't let any proc return until all have truncated the file. */
1032         if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(comm_dup)))
1033             HMPI_GOTO_ERROR(NULL, "MPI_Barrier failed", mpi_code)
1034 
1035         /* File is zero size now */
1036         size = 0;
1037     } /* end if */
1038 
1039     /* Set the size of the file (from library's perspective) */
1040     file->eof = H5FD_mpi_MPIOff_to_haddr(size);
1041     file->local_eof = file->eof;
1042 
1043     /* Set return value */
1044     ret_value=(H5FD_t*)file;
1045 
1046 done:
1047     if(ret_value==NULL) {
1048         if(file_opened)
1049             MPI_File_close(&fh);
1050     if (MPI_COMM_NULL != comm_dup)
1051         MPI_Comm_free(&comm_dup);
1052     if (MPI_INFO_NULL != info_dup)
1053         MPI_Info_free(&info_dup);
1054     if (file)
1055         H5MM_xfree(file);
1056     } /* end if */
1057 
1058 #ifdef H5FDmpio_DEBUG
1059     if (H5FD_mpio_Debug[(int)'t'])
1060         HDfprintf(stdout, "Leaving H5FD_mpio_open\n" );
1061 #endif
1062     FUNC_LEAVE_NOAPI(ret_value)
1063 } /* end H5FD_mpio_open() */
1064 
1065 
1066 /*-------------------------------------------------------------------------
1067  * Function:    H5FD_mpio_close
1068  *
1069  * Purpose:     Closes a file.  This is collective.
1070  *
1071  * Return:      Success:    Non-negative
1072  *
1073  *         Failure:    Negative
1074  *
1075  * Programmer:  Unknown
1076  *              January 30, 1998
1077  *
1078  * Modifications:
1079  *         Robb Matzke, 1998-02-18
1080  *        Added the ACCESS_PARMS argument.
1081  *
1082  *         Robb Matzke, 1999-08-06
1083  *        Modified to work with the virtual file layer.
1084  *
1085  *         Albert Cheng, 2003-04-17
1086  *        Free the communicator stored.
1087  *-------------------------------------------------------------------------
1088  */
1089 static herr_t
H5FD_mpio_close(H5FD_t * _file)1090 H5FD_mpio_close(H5FD_t *_file)
1091 {
1092     H5FD_mpio_t    *file = (H5FD_mpio_t*)_file;
1093     int        mpi_code;            /* MPI return code */
1094     herr_t      ret_value=SUCCEED;      /* Return value */
1095 
1096     FUNC_ENTER_NOAPI_NOINIT
1097 
1098 #ifdef H5FDmpio_DEBUG
1099     if (H5FD_mpio_Debug[(int)'t'])
1100         HDfprintf(stdout, "Entering H5FD_mpio_close\n");
1101 #endif
1102     HDassert(file);
1103     HDassert(H5FD_MPIO==file->pub.driver_id);
1104 
1105     /* MPI_File_close sets argument to MPI_FILE_NULL */
1106     if (MPI_SUCCESS != (mpi_code=MPI_File_close(&(file->f)/*in,out*/)))
1107         HMPI_GOTO_ERROR(FAIL, "MPI_File_close failed", mpi_code)
1108 
1109     /* Clean up other stuff */
1110     H5FD_mpi_comm_info_free(&file->comm, &file->info);
1111     H5MM_xfree(file);
1112 
1113 done:
1114 #ifdef H5FDmpio_DEBUG
1115     if (H5FD_mpio_Debug[(int)'t'])
1116         HDfprintf(stdout, "Leaving H5FD_mpio_close\n");
1117 #endif
1118     FUNC_LEAVE_NOAPI(ret_value)
1119 }
1120 
1121 
1122 /*-------------------------------------------------------------------------
1123  * Function:    H5FD_mpio_query
1124  *
1125  * Purpose:    Set the flags that this VFL driver is capable of supporting.
1126  *              (listed in H5FDpublic.h)
1127  *
1128  * Return:    Success:    non-negative
1129  *
1130  *        Failure:    negative
1131  *
1132  * Programmer:    Quincey Koziol
1133  *              Friday, August 25, 2000
1134  *
1135  * Modifications:
1136  *
1137  *        John Mainzer -- 9/21/05
1138  *        Modified code to turn off the
1139  *        H5FD_FEAT_ACCUMULATE_METADATA_WRITE flag.
1140  *              With the movement of
1141  *        all cache writes to process 0, this flag has become
1142  *        problematic in PHDF5.
1143  *
1144  *-------------------------------------------------------------------------
1145  */
1146 static herr_t
H5FD_mpio_query(const H5FD_t H5_ATTR_UNUSED * _file,unsigned long * flags)1147 H5FD_mpio_query(const H5FD_t H5_ATTR_UNUSED *_file, unsigned long *flags /* out */)
1148 {
1149     FUNC_ENTER_NOAPI_NOINIT_NOERR
1150 
1151     /* Set the VFL feature flags that this driver supports */
1152     if(flags) {
1153         *flags=0;
1154         *flags |= H5FD_FEAT_AGGREGATE_METADATA;     /* OK to aggregate metadata allocations                             */
1155         *flags |= H5FD_FEAT_AGGREGATE_SMALLDATA;    /* OK to aggregate "small" raw data allocations                     */
1156         *flags |= H5FD_FEAT_HAS_MPI;                /* This driver uses MPI                                             */
1157         *flags |= H5FD_FEAT_ALLOCATE_EARLY;         /* Allocate space early instead of late                             */
1158         *flags |= H5FD_FEAT_DEFAULT_VFD_COMPATIBLE; /* VFD creates a file which can be opened with the default VFD      */
1159     } /* end if */
1160 
1161     FUNC_LEAVE_NOAPI(SUCCEED)
1162 }
1163 
1164 
1165 /*-------------------------------------------------------------------------
1166  * Function:    H5FD_mpio_get_eoa
1167  *
1168  * Purpose:    Gets the end-of-address marker for the file. The EOA marker
1169  *        is the first address past the last byte allocated in the
1170  *        format address space.
1171  *
1172  * Return:    Success:    The end-of-address marker.
1173  *
1174  *        Failure:    HADDR_UNDEF
1175  *
1176  * Programmer:    Robb Matzke
1177  *              Friday, August  6, 1999
1178  *
1179  * Modifications:
1180  *              Raymond Lu
1181  *              21 Dec. 2006
1182  *              Added the parameter TYPE.  It's only used for MULTI driver.
1183  *
1184  *-------------------------------------------------------------------------
1185  */
1186 static haddr_t
H5FD_mpio_get_eoa(const H5FD_t * _file,H5FD_mem_t H5_ATTR_UNUSED type)1187 H5FD_mpio_get_eoa(const H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type)
1188 {
1189     const H5FD_mpio_t    *file = (const H5FD_mpio_t*)_file;
1190 
1191     FUNC_ENTER_NOAPI_NOINIT_NOERR
1192 
1193     HDassert(file);
1194     HDassert(H5FD_MPIO==file->pub.driver_id);
1195 
1196     FUNC_LEAVE_NOAPI(file->eoa)
1197 }
1198 
1199 
1200 /*-------------------------------------------------------------------------
1201  * Function:    H5FD_mpio_set_eoa
1202  *
1203  * Purpose:    Set the end-of-address marker for the file. This function is
1204  *        called shortly after an existing HDF5 file is opened in order
1205  *        to tell the driver where the end of the HDF5 data is located.
1206  *
1207  * Return:    Success:    0
1208  *
1209  *        Failure:    -1
1210  *
1211  * Programmer:    Robb Matzke
1212  *              Friday, August 6, 1999
1213  *
1214  * Modifications:
1215  *              Raymond Lu
1216  *              21 Dec. 2006
1217  *              Added the parameter TYPE.  It's only used for MULTI driver.
1218  *
1219  *-------------------------------------------------------------------------
1220  */
1221 static herr_t
H5FD_mpio_set_eoa(H5FD_t * _file,H5FD_mem_t H5_ATTR_UNUSED type,haddr_t addr)1222 H5FD_mpio_set_eoa(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, haddr_t addr)
1223 {
1224     H5FD_mpio_t    *file = (H5FD_mpio_t*)_file;
1225 
1226     FUNC_ENTER_NOAPI_NOINIT_NOERR
1227 
1228     HDassert(file);
1229     HDassert(H5FD_MPIO==file->pub.driver_id);
1230 
1231     file->eoa = addr;
1232 
1233     FUNC_LEAVE_NOAPI(SUCCEED)
1234 }
1235 
1236 
1237 /*-------------------------------------------------------------------------
1238  * Function:    H5FD_mpio_get_eof
1239  *
1240  * Purpose:    Gets the end-of-file marker for the file. The EOF marker
1241  *        is the real size of the file.
1242  *
1243  *        The MPIO driver doesn't bother keeping this field updated
1244  *        since that's a relatively expensive operation. Fortunately
1245  *        the library only needs the EOF just after the file is opened
1246  *        in order to determine whether the file is empty, truncated,
1247  *        or okay.  Therefore, any MPIO I/O function will set its value
1248  *        to HADDR_UNDEF which is the error return value of this
1249  *        function.
1250  *
1251  *              Keeping the EOF updated (during write calls) is expensive
1252  *              because any process may extend the physical end of the
1253  *              file. -QAK
1254  *
1255  * Return:    Success:    The end-of-address marker.
1256  *
1257  *        Failure:    HADDR_UNDEF
1258  *
1259  * Programmer:    Robb Matzke
1260  *              Friday, August  6, 1999
1261  *
1262  * Modifications:
1263  *
1264  *-------------------------------------------------------------------------
1265  */
1266 static haddr_t
H5FD_mpio_get_eof(const H5FD_t * _file,H5FD_mem_t H5_ATTR_UNUSED type)1267 H5FD_mpio_get_eof(const H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type)
1268 {
1269     const H5FD_mpio_t    *file = (const H5FD_mpio_t*)_file;
1270 
1271     FUNC_ENTER_NOAPI_NOINIT_NOERR
1272 
1273     HDassert(file);
1274     HDassert(H5FD_MPIO==file->pub.driver_id);
1275 
1276     FUNC_LEAVE_NOAPI(file->eof)
1277 }
1278 
1279 
1280 /*-------------------------------------------------------------------------
1281  * Function:       H5FD_mpio_get_handle
1282  *
1283  * Purpose:        Returns the file handle of MPIO file driver.
1284  *
1285  * Returns:        Non-negative if succeed or negative if fails.
1286  *
1287  * Programmer:     Raymond Lu
1288  *                 Sept. 16, 2002
1289  *
1290  * Modifications:
1291  *
1292  *-------------------------------------------------------------------------
1293 */
1294 static herr_t
H5FD_mpio_get_handle(H5FD_t * _file,hid_t H5_ATTR_UNUSED fapl,void ** file_handle)1295 H5FD_mpio_get_handle(H5FD_t *_file, hid_t H5_ATTR_UNUSED fapl, void** file_handle)
1296 {
1297     H5FD_mpio_t         *file = (H5FD_mpio_t *)_file;
1298     herr_t              ret_value = SUCCEED;
1299 
1300     FUNC_ENTER_NOAPI_NOINIT
1301 
1302     if(!file_handle)
1303         HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "file handle not valid")
1304 
1305     *file_handle = &(file->f);
1306 
1307 done:
1308     FUNC_LEAVE_NOAPI(ret_value)
1309 }
1310 
1311 
1312 /*-------------------------------------------------------------------------
1313  * Function:       H5FD_mpio_get_info
1314  *
1315  * Purpose:        Returns the file info of MPIO file driver.
1316  *
1317  * Returns:        Non-negative if succeed or negative if fails.
1318  *
1319  * Programmer:     John Mainzer
1320  *                 April 4, 2017
1321  *
1322  * Modifications:
1323  *
1324  *-------------------------------------------------------------------------
1325 */
1326 static herr_t
H5FD_mpio_get_info(H5FD_t * _file,void ** mpi_info)1327 H5FD_mpio_get_info(H5FD_t *_file, void** mpi_info)
1328 {
1329     H5FD_mpio_t         *file = (H5FD_mpio_t *)_file;
1330     herr_t              ret_value = SUCCEED;
1331 
1332     FUNC_ENTER_NOAPI_NOINIT
1333 
1334     if(!mpi_info)
1335         HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "mpi info not valid")
1336 
1337     *mpi_info = &(file->info);
1338 
1339 done:
1340     FUNC_LEAVE_NOAPI(ret_value)
1341 
1342 } /* H5FD_mpio_get_info() */
1343 
1344 
1345 /*-------------------------------------------------------------------------
1346  * Function:    H5FD_mpio_read
1347  *
1348  * Purpose:    Reads SIZE bytes of data from FILE beginning at address ADDR
1349  *        into buffer BUF according to data transfer properties in
1350  *        DXPL_ID using potentially complex file and buffer types to
1351  *        effect the transfer.
1352  *
1353  *        Reading past the end of the MPI file returns zeros instead of
1354  *        failing.  MPI is able to coalesce requests from different
1355  *        processes (collective or independent).
1356  *
1357  * Return:    Success:    Zero. Result is stored in caller-supplied
1358  *                buffer BUF.
1359  *
1360  *        Failure:    -1, Contents of buffer BUF are undefined.
1361  *
1362  * Programmer:    rky, 1998-01-30
1363  *
1364  * Modifications:
1365  *         Robb Matzke, 1998-02-18
1366  *        Added the ACCESS_PARMS argument.
1367  *
1368  *         rky, 1998-04-10
1369  *        Call independent or collective MPI read, based on
1370  *        ACCESS_PARMS.
1371  *
1372  *         Albert Cheng, 1998-06-01
1373  *        Added XFER_MODE to control independent or collective MPI
1374  *        read.
1375  *
1376  *         rky, 1998-08-16
1377  *        Use BTYPE, FTYPE, and DISP from access parms. The guts of
1378  *        H5FD_mpio_read and H5FD_mpio_write should be replaced by a
1379  *        single dual-purpose routine.
1380  *
1381  *         Robb Matzke, 1999-04-21
1382  *        Changed XFER_MODE to XFER_PARMS for all H5F_*_read()
1383  *        callbacks.
1384  *
1385  *         Robb Matzke, 1999-07-28
1386  *        The ADDR argument is passed by value.
1387  *
1388  *         Robb Matzke, 1999-08-06
1389  *        Modified to work with the virtual file layer.
1390  *
1391  *        Quincey Koziol,  2002-05-14
1392  *        Only call MPI_Get_count if we can use MPI_BYTE for the MPI type
1393  *              for the I/O transfer.  Someday we might include code to decode
1394  *              the MPI type used for more complicated transfers and call
1395  *              MPI_Get_count all the time.
1396  *
1397  *              Quincey Koziol - 2002/06/17
1398  *              Removed 'disp' parameter from H5FD_mpio_setup routine and use
1399  *              the address of the dataset in MPI_File_set_view() calls, as
1400  *              necessary.
1401  *
1402  *              Quincey Koziol - 2002/06/24
1403  *              Removed "lazy" MPI_File_set_view() calls, since they would fail
1404  *              if the first I/O was a collective I/O using MPI derived types
1405  *              and the next I/O was an independent I/O.
1406  *
1407  *              Quincey Koziol - 2003/10/22-31
1408  *              Restructured code massively, straightening out logic and finally
1409  *              getting the bytes_read stuff working.
1410  *
1411  *-------------------------------------------------------------------------
1412  */
1413 static herr_t
H5FD_mpio_read(H5FD_t * _file,H5FD_mem_t H5_ATTR_UNUSED type,hid_t H5_ATTR_UNUSED dxpl_id,haddr_t addr,size_t size,void * buf)1414 H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type,
1415     hid_t H5_ATTR_UNUSED dxpl_id, haddr_t addr, size_t size, void *buf/*out*/)
1416 {
1417     H5FD_mpio_t *file = (H5FD_mpio_t*)_file;
1418     MPI_Offset  mpi_off;
1419     MPI_Status  mpi_stat;       /* Status from I/O operation */
1420     int         mpi_code;       /* mpi return code */
1421     MPI_Datatype buf_type = MPI_BYTE;      /* MPI description of the selection in memory */
1422     int         size_i;         /* Integer copy of 'size' to read */
1423 #if MPI_VERSION >= 3
1424     MPI_Count   bytes_read = 0; /* Number of bytes read in */
1425     MPI_Count   type_size;      /* MPI datatype used for I/O's size */
1426     MPI_Count   io_size;        /* Actual number of bytes requested */
1427     MPI_Count   n;
1428 #else
1429     int         bytes_read = 0; /* Number of bytes read in */
1430     int         type_size;      /* MPI datatype used for I/O's size */
1431     int         io_size;        /* Actual number of bytes requested */
1432     int         n;
1433 #endif
1434     hbool_t     use_view_this_time = FALSE;
1435     hbool_t     rank0_bcast = FALSE; /* If read-with-rank0-and-bcast flag was used */
1436     herr_t      ret_value = SUCCEED;
1437 
1438     FUNC_ENTER_NOAPI_NOINIT
1439 
1440 #ifdef H5FDmpio_DEBUG
1441     if(H5FD_mpio_Debug[(int)'t'])
1442         HDfprintf(stdout, "%s: Entering\n", FUNC);
1443 #endif
1444 
1445     /* Sanity checks */
1446     HDassert(file);
1447     HDassert(H5FD_MPIO==file->pub.driver_id);
1448     HDassert(buf);
1449 
1450     /* Portably initialize MPI status variable */
1451     HDmemset(&mpi_stat,0,sizeof(MPI_Status));
1452 
1453     /* some numeric conversions */
1454     if(H5FD_mpi_haddr_to_MPIOff(addr, &mpi_off/*out*/) < 0)
1455         HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off")
1456     size_i = (int)size;
1457     if((hsize_t)size_i != size)
1458         HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size to size_i")
1459 
1460 #ifdef H5FDmpio_DEBUG
1461     if(H5FD_mpio_Debug[(int)'r'])
1462         HDfprintf(stdout, "%s: mpi_off = %ld  size_i = %d\n", FUNC, (long)mpi_off, size_i);
1463 #endif
1464 
1465     /* Only look for MPI views for raw data transfers */
1466     if(type == H5FD_MEM_DRAW) {
1467         H5FD_mpio_xfer_t xfer_mode;   /* I/O transfer mode */
1468 
1469         /* Get the transfer mode from the API context */
1470         if(H5CX_get_io_xfer_mode(&xfer_mode) < 0)
1471             HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode")
1472 
1473         /*
1474          * Set up for a fancy xfer using complex types, or single byte block. We
1475          * wouldn't need to rely on the use_view field if MPI semantics allowed
1476          * us to test that btype=ftype=MPI_BYTE (or even MPI_TYPE_NULL, which
1477          * could mean "use MPI_BYTE" by convention).
1478          */
1479         if(xfer_mode == H5FD_MPIO_COLLECTIVE) {
1480             MPI_Datatype file_type;
1481 
1482             /* Remember that views are used */
1483             use_view_this_time = TRUE;
1484 
1485             /* Prepare for a full-blown xfer using btype, ftype, and disp */
1486             if(H5CX_get_mpi_coll_datatypes(&buf_type, &file_type) < 0)
1487                 HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O datatypes")
1488 
1489             /*
1490              * Set the file view when we are using MPI derived types
1491              */
1492             if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, H5FD_mpi_native_g, file->info)))
1493                 HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
1494 
1495             /* When using types, use the address as the displacement for
1496              * MPI_File_set_view and reset the address for the read to zero
1497              */
1498             mpi_off = 0;
1499         } /* end if */
1500     } /* end if */
1501 
1502     /* Read the data. */
1503     if(use_view_this_time) {
1504         H5FD_mpio_collective_opt_t coll_opt_mode;
1505 
1506 #ifdef H5FDmpio_DEBUG
1507         if(H5FD_mpio_Debug[(int)'r'])
1508             HDfprintf(stdout, "%s: using MPIO collective mode\n", FUNC);
1509 #endif
1510         /* Get the collective_opt property to check whether the application wants to do IO individually. */
1511         if(H5CX_get_mpio_coll_opt(&coll_opt_mode) < 0)
1512             HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O collective_op property")
1513 
1514         if(coll_opt_mode == H5FD_MPIO_COLLECTIVE_IO) {
1515 #ifdef H5FDmpio_DEBUG
1516             if(H5FD_mpio_Debug[(int)'r'])
1517                 HDfprintf(stdout, "%s: doing MPI collective IO\n", FUNC);
1518 #endif
1519             /* Check whether we should read from rank 0 and broadcast to other ranks */
1520             if(H5CX_get_mpio_rank0_bcast()) {
1521 #ifdef H5FDmpio_DEBUG
1522                 if(H5FD_mpio_Debug[(int)'r'])
1523                     HDfprintf(stdout, "%s: doing read-rank0-and-MPI_Bcast\n", FUNC);
1524 #endif
1525                 /* Indicate path we've taken */
1526                 rank0_bcast = TRUE;
1527 
1528                 /* Read on rank 0 Bcast to other ranks */
1529                 if(file->mpi_rank == 0)
1530                     if(MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
1531                         HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
1532                 if(MPI_SUCCESS != (mpi_code = MPI_Bcast(buf, size_i, buf_type, 0, file->comm)))
1533                     HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
1534             } /* end if */
1535             else
1536                 if(MPI_SUCCESS != (mpi_code = MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
1537                     HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code)
1538         } /* end if */
1539         else {
1540 #ifdef H5FDmpio_DEBUG
1541             if(H5FD_mpio_Debug[(int)'r'])
1542                 HDfprintf(stdout, "%s: doing MPI independent IO\n", FUNC);
1543 #endif
1544 
1545             if(MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
1546                 HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
1547         } /* end else */
1548 
1549         /*
1550          * Reset the file view when we used MPI derived types
1551          */
1552         if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, H5FD_mpi_native_g, file->info)))
1553             HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
1554     } /* end if */
1555     else
1556         if(MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
1557             HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
1558 
1559     /* Only retrieve bytes read if this rank _actually_ participated in I/O */
1560     if(!rank0_bcast || (rank0_bcast && file->mpi_rank == 0) ) {
1561         /* How many bytes were actually read? */
1562 #if MPI_VERSION >= 3
1563         if(MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, buf_type, &bytes_read)))
1564 #else
1565         if(MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read)))
1566 #endif
1567             HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code)
1568     } /* end if */
1569 
1570     /* If the rank0-bcast feature was used, broadcast the # of bytes read to
1571      * other ranks, which didn't perform any I/O.
1572      */
1573     /* NOTE: This could be optimized further to be combined with the broadcast
1574      *          of the data.  (QAK - 2019/1/2)
1575      */
1576     if(rank0_bcast)
1577         if(MPI_SUCCESS != MPI_Bcast(&bytes_read, 1, MPI_LONG_LONG, 0, file->comm))
1578             HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", 0)
1579 
1580     /* Get the type's size */
1581 #if MPI_VERSION >= 3
1582     if(MPI_SUCCESS != (mpi_code = MPI_Type_size_x(buf_type, &type_size)))
1583 #else
1584     if(MPI_SUCCESS != (mpi_code = MPI_Type_size(buf_type, &type_size)))
1585 #endif
1586         HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mpi_code)
1587 
1588     /* Compute the actual number of bytes requested */
1589     io_size = type_size * size_i;
1590 
1591     /* Check for read failure */
1592     if(bytes_read < 0 || bytes_read > io_size)
1593         HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed")
1594 
1595     /*
1596      * This gives us zeroes beyond end of physical MPI file.
1597      */
1598     if((n = (io_size - bytes_read)) > 0)
1599         HDmemset((char*)buf+bytes_read, 0, (size_t)n);
1600 
1601 done:
1602 #ifdef H5FDmpio_DEBUG
1603     if(H5FD_mpio_Debug[(int)'t'])
1604        HDfprintf(stdout, "%s: Leaving\n", FUNC);
1605 #endif
1606 
1607     FUNC_LEAVE_NOAPI(ret_value)
1608 }
1609 
1610 
1611 /*-------------------------------------------------------------------------
1612  * Function:    H5FD_mpio_write
1613  *
1614  * Purpose:    Writes SIZE bytes of data to FILE beginning at address ADDR
1615  *        from buffer BUF according to data transfer properties in
1616  *        DXPL_ID using potentially complex file and buffer types to
1617  *        effect the transfer.
1618  *
1619  *        MPI is able to coalesce requests from different processes
1620  *        (collective and independent).
1621  *
1622  * Return:    Success:    Zero. USE_TYPES and OLD_USE_TYPES in the
1623  *                access params are altered.
1624  *
1625  *        Failure:    -1, USE_TYPES and OLD_USE_TYPES in the
1626  *                access params may be altered.
1627  *
1628  * Programmer:    Unknown
1629  *              January 30, 1998
1630  *
1631  * Modifications:
1632  *        rky, 1998-08-28
1633  *        If the file->allsame flag is set, we assume that all the
1634  *        procs in the relevant MPI communicator will write identical
1635  *        data at identical offsets in the file, so only proc 0 will
1636  *        write, and all other procs will wait for p0 to finish. This
1637  *        is useful for writing metadata, for example. Note that we
1638  *        don't _check_ that the data is identical. Also, the mechanism
1639  *        we use to eliminate the redundant writes is by requiring a
1640  *        call to H5FD_mpio_tas_allsame before the write, which is
1641  *        rather klugey. Would it be better to pass a parameter to
1642  *        low-level writes like H5F_block_write and H5F_low_write,
1643  *        instead?  Or...??? Also, when I created this mechanism I
1644  *        wanted to minimize the difference in behavior between the old
1645  *        way of doing things (i.e., all procs write) and the new way,
1646  *        so the writes are eliminated at the very lowest level, here
1647  *        in H5FD_mpio_write. It may be better to rethink that, and
1648  *        short-circuit the writes at a higher level (e.g., at the
1649  *        points in the code where H5FD_mpio_tas_allsame is called).
1650  *
1651  *
1652  *         Robb Matzke, 1998-02-18
1653  *        Added the ACCESS_PARMS argument.
1654  *
1655  *         rky, 1998-04-10
1656  *        Call independent or collective MPI write, based on
1657  *        ACCESS_PARMS.
1658  *
1659  *         rky, 1998-04-24
1660  *        Removed redundant write from H5FD_mpio_write.
1661  *
1662  *         Albert Cheng, 1998-06-01
1663  *        Added XFER_MODE to control independent or collective MPI
1664  *        write.
1665  *
1666  *         rky, 1998-08-16
1667  *        Use BTYPE, FTYPE, and DISP from access parms. The guts of
1668  *        H5FD_mpio_read and H5FD_mpio_write should be replaced by a
1669  *        single dual-purpose routine.
1670  *
1671  *         rky, 1998-08-28
1672  *        Added ALLSAME parameter to make all but proc 0 skip the
1673  *        actual write.
1674  *
1675  *         Robb Matzke, 1999-04-21
1676  *        Changed XFER_MODE to XFER_PARMS for all H5FD_*_write()
1677  *        callbacks.
1678  *
1679  *         Robb Matzke, 1999-07-28
1680  *        The ADDR argument is passed by value.
1681  *
1682  *         Robb Matzke, 1999-08-06
1683  *        Modified to work with the virtual file layer.
1684  *
1685  *        Albert Cheng, 1999-12-19
1686  *        When only-p0-write-allsame-data, p0 Bcasts the
1687  *        ret_value to other processes.  This prevents
1688  *        a racing condition (that other processes try to
1689  *        read the file before p0 finishes writing) and also
1690  *        allows all processes to report the same ret_value.
1691  *
1692  *        Kim Yates, Pat Weidhaas,  2000-09-26
1693  *        Move block of coding where only p0 writes after the
1694  *              MPI_File_set_view call.
1695  *
1696  *        Quincey Koziol,  2002-05-10
1697  *        Instead of always writing metadata from process 0, spread the
1698  *              burden among all the processes by using a round-robin rotation
1699  *              scheme.
1700  *
1701  *        Quincey Koziol,  2002-05-10
1702  *        Removed allsame code, keying off the type parameter instead.
1703  *
1704  *        Quincey Koziol,  2002-05-14
1705  *        Only call MPI_Get_count if we can use MPI_BYTE for the MPI type
1706  *              for the I/O transfer.  Someday we might include code to decode
1707  *              the MPI type used for more complicated transfers and call
1708  *              MPI_Get_count all the time.
1709  *
1710  *              Quincey Koziol - 2002/06/17
1711  *              Removed 'disp' parameter from H5FD_mpio_setup routine and use
1712  *              the address of the dataset in MPI_File_set_view() calls, as
1713  *              necessary.
1714  *
1715  *              Quincey Koziol - 2002/06/24
1716  *              Removed "lazy" MPI_File_set_view() calls, since they would fail
1717  *              if the first I/O was a collective I/O using MPI derived types
1718  *              and the next I/O was an independent I/O.
1719  *
1720  *              Quincey Koziol - 2002/07/18
1721  *              Added "block_before_meta_write" dataset transfer flag, which
1722  *              is set during writes from a metadata cache flush and indicates
1723  *              that all the processes must sync up before (one of them)
1724  *              writing metadata.
1725  *
1726  *              Quincey Koziol - 2003/10/22-31
1727  *              Restructured code massively, straightening out logic and finally
1728  *              getting the bytes_written stuff working.
1729  *
1730  *-------------------------------------------------------------------------
1731  */
1732 static herr_t
H5FD_mpio_write(H5FD_t * _file,H5FD_mem_t type,hid_t H5_ATTR_UNUSED dxpl_id,haddr_t addr,size_t size,const void * buf)1733 H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id,
1734     haddr_t addr, size_t size, const void *buf)
1735 {
1736     H5FD_mpio_t            *file = (H5FD_mpio_t*)_file;
1737     MPI_Offset              mpi_off;
1738     MPI_Status          mpi_stat;       /* Status from I/O operation */
1739     MPI_Datatype        buf_type = MPI_BYTE;      /* MPI description of the selection in memory */
1740     int                    mpi_code;    /* MPI return code */
1741 #if MPI_VERSION >= 3
1742     MPI_Count                 bytes_written;
1743     MPI_Count                   type_size;      /* MPI datatype used for I/O's size */
1744     MPI_Count                   io_size;        /* Actual number of bytes requested */
1745 #else
1746     int                         bytes_written;
1747     int                         type_size;      /* MPI datatype used for I/O's size */
1748     int                         io_size;        /* Actual number of bytes requested */
1749 #endif
1750     int                         size_i;
1751     hbool_t            use_view_this_time = FALSE;
1752     H5FD_mpio_xfer_t            xfer_mode;   /* I/O transfer mode */
1753     herr_t                  ret_value = SUCCEED;
1754 
1755     FUNC_ENTER_NOAPI_NOINIT
1756 
1757 #ifdef H5FDmpio_DEBUG
1758     if (H5FD_mpio_Debug[(int)'t'])
1759         HDfprintf(stdout, "Entering H5FD_mpio_write\n" );
1760 #endif
1761     HDassert(file);
1762     HDassert(H5FD_MPIO==file->pub.driver_id);
1763     HDassert(buf);
1764 
1765     /* Verify that no data is written when between MPI_Barrier()s during file flush */
1766     HDassert(!H5CX_get_mpi_file_flushing());
1767 
1768     /* Portably initialize MPI status variable */
1769     HDmemset(&mpi_stat, 0, sizeof(MPI_Status));
1770 
1771     /* some numeric conversions */
1772     if(H5FD_mpi_haddr_to_MPIOff(addr, &mpi_off) < 0)
1773         HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off")
1774     size_i = (int)size;
1775     if((hsize_t)size_i != size)
1776         HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size to size_i")
1777 
1778 #ifdef H5FDmpio_DEBUG
1779     if(H5FD_mpio_Debug[(int)'w'])
1780         HDfprintf(stdout, "in H5FD_mpio_write  mpi_off=%ld  size_i=%d\n", (long)mpi_off, size_i);
1781 #endif
1782 
1783     /* Get the transfer mode from the API context */
1784     if(H5CX_get_io_xfer_mode(&xfer_mode) < 0)
1785         HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode")
1786 
1787     /*
1788      * Set up for a fancy xfer using complex types, or single byte block. We
1789      * wouldn't need to rely on the use_view field if MPI semantics allowed
1790      * us to test that btype=ftype=MPI_BYTE (or even MPI_TYPE_NULL, which
1791      * could mean "use MPI_BYTE" by convention).
1792      */
1793     if(xfer_mode == H5FD_MPIO_COLLECTIVE) {
1794         MPI_Datatype        file_type;
1795 
1796         /* Remember that views are used */
1797         use_view_this_time = TRUE;
1798 
1799         /* Prepare for a full-blown xfer using btype, ftype, and disp */
1800         if(H5CX_get_mpi_coll_datatypes(&buf_type, &file_type) < 0)
1801             HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O datatypes")
1802 
1803         /*
1804          * Set the file view when we are using MPI derived types
1805          */
1806         if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, H5FD_mpi_native_g, file->info)))
1807             HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
1808 
1809         /* When using types, use the address as the displacement for
1810          * MPI_File_set_view and reset the address for the read to zero
1811          */
1812         mpi_off = 0;
1813     } /* end if */
1814 
1815     /* Write the data. */
1816     if(use_view_this_time) {
1817         H5FD_mpio_collective_opt_t coll_opt_mode;
1818 
1819 #ifdef H5FDmpio_DEBUG
1820         if(H5FD_mpio_Debug[(int)'t'])
1821             HDfprintf(stdout, "H5FD_mpio_write: using MPIO collective mode\n");
1822 #endif
1823 
1824         /* Get the collective_opt property to check whether the application wants to do IO individually. */
1825         if(H5CX_get_mpio_coll_opt(&coll_opt_mode) < 0)
1826             HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O collective_op property")
1827 
1828         if(coll_opt_mode == H5FD_MPIO_COLLECTIVE_IO) {
1829 #ifdef H5FDmpio_DEBUG
1830             if(H5FD_mpio_Debug[(int)'t'])
1831                 HDfprintf(stdout, "H5FD_mpio_write: doing MPI collective IO\n");
1832 #endif
1833             if(MPI_SUCCESS != (mpi_code = MPI_File_write_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
1834                 HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code)
1835         } /* end if */
1836         else {
1837             if(type != H5FD_MEM_DRAW)
1838                 HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "Metadata Coll opt property should be collective at this point")
1839 #ifdef H5FDmpio_DEBUG
1840             if(H5FD_mpio_Debug[(int)'t'])
1841                 HDfprintf(stdout, "H5FD_mpio_write: doing MPI independent IO\n");
1842 #endif
1843             if(MPI_SUCCESS != (mpi_code = MPI_File_write_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
1844                 HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code)
1845         } /* end else */
1846 
1847         /* Reset the file view when we used MPI derived types */
1848         if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, H5FD_mpi_native_g,  file->info)))
1849             HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
1850     } else {
1851         if(MPI_SUCCESS != (mpi_code = MPI_File_write_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
1852             HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code)
1853     }
1854 
1855     /* How many bytes were actually written? */
1856 #if MPI_VERSION >= 3
1857     if(MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, buf_type, &bytes_written)))
1858 #else
1859     if(MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_written)))
1860 #endif
1861         HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code)
1862 
1863     /* Get the type's size */
1864 #if MPI_VERSION >= 3
1865     if(MPI_SUCCESS != (mpi_code = MPI_Type_size_x(buf_type, &type_size)))
1866 #else
1867     if(MPI_SUCCESS != (mpi_code = MPI_Type_size(buf_type, &type_size)))
1868 #endif
1869         HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mpi_code)
1870 
1871     /* Compute the actual number of bytes requested */
1872     io_size = type_size * size_i;
1873 
1874     /* Check for write failure */
1875     if(bytes_written != io_size)
1876         HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "file write failed")
1877 
1878     /* Each process will keep track of its perceived EOF value locally, and
1879      * ultimately we will reduce this value to the maximum amongst all
1880      * processes, but until then keep the actual eof at HADDR_UNDEF just in
1881      * case something bad happens before that point. (rather have a value
1882      * we know is wrong sitting around rather than one that could only
1883      * potentially be wrong.) */
1884     file->eof = HADDR_UNDEF;
1885 
1886     if(bytes_written && ((bytes_written + addr) > file->local_eof))
1887         file->local_eof = addr + bytes_written;
1888 
1889 done:
1890 #ifdef H5FDmpio_DEBUG
1891     if(H5FD_mpio_Debug[(int)'t'])
1892         HDfprintf(stdout, "proc %d: Leaving H5FD_mpio_write with ret_value=%d\n",
1893         file->mpi_rank, ret_value );
1894 #endif
1895     FUNC_LEAVE_NOAPI(ret_value)
1896 } /* end H5FD_mpio_write() */
1897 
1898 
1899 /*-------------------------------------------------------------------------
1900  * Function:    H5FD_mpio_flush
1901  *
1902  * Purpose:     Makes sure that all data is on disk.  This is collective.
1903  *
1904  * Return:      Success:    Non-negative
1905  *
1906  *         Failure:    Negative
1907  *
1908  * Programmer:  Robb Matzke
1909  *              January 30, 1998
1910  *
1911  *-------------------------------------------------------------------------
1912  */
1913 static herr_t
H5FD_mpio_flush(H5FD_t * _file,hid_t H5_ATTR_UNUSED dxpl_id,hbool_t closing)1914 H5FD_mpio_flush(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, hbool_t closing)
1915 {
1916     H5FD_mpio_t        *file = (H5FD_mpio_t*)_file;
1917     int            mpi_code;    /* mpi return code */
1918     herr_t              ret_value = SUCCEED;
1919 
1920     FUNC_ENTER_NOAPI_NOINIT
1921 
1922 #ifdef H5FDmpio_DEBUG
1923     if(H5FD_mpio_Debug[(int)'t'])
1924         HDfprintf(stdout, "Entering %s\n", FUNC);
1925 #endif
1926     HDassert(file);
1927     HDassert(H5FD_MPIO == file->pub.driver_id);
1928 
1929     /* Only sync the file if we are not going to immediately close it */
1930     if(!closing)
1931         if(MPI_SUCCESS != (mpi_code = MPI_File_sync(file->f)))
1932             HMPI_GOTO_ERROR(FAIL, "MPI_File_sync failed", mpi_code)
1933 
1934 done:
1935 #ifdef H5FDmpio_DEBUG
1936     if(H5FD_mpio_Debug[(int)'t'])
1937         HDfprintf(stdout, "Leaving %s\n", FUNC);
1938 #endif
1939 
1940     FUNC_LEAVE_NOAPI(ret_value)
1941 } /* end H5FD_mpio_flush() */
1942 
1943 
1944 /*-------------------------------------------------------------------------
1945  * Function:    H5FD_mpio_truncate
1946  *
1947  * Purpose:     Make certain the file's size matches it's allocated size
1948  *
1949  *              This is a little sticky in the mpio case, as it is not
1950  *              easy for us to track the current EOF by extracting it from
1951  *              write calls.
1952  *
1953  *              Instead, we first check to see if the eoa has changed since
1954  *              the last call to this function.  If it has, we call
1955  *              MPI_File_get_size() to determine the current EOF, and
1956  *              only call MPI_File_set_size() if this value disagrees
1957  *              with the current eoa.
1958  *
1959  * Return:      Success:    Non-negative
1960  *         Failure:    Negative
1961  *
1962  * Programmer:  Quincey Koziol
1963  *              January 31, 2008
1964  *
1965  * Changes:     Heavily reworked to avoid unnecessary MPI_File_set_size()
1966  *              calls.  The hope is that these calls are superfluous in the
1967  *              typical case, allowing us to avoid truncates most of the
1968  *              time.
1969  *
1970  *              The basic idea is to query the file system to get the
1971  *              current eof, and only truncate if the file systems
1972  *              conception of the eof disagrees with our eoa.
1973  *
1974  *                                                 JRM -- 10/27/17
1975  *
1976  *-------------------------------------------------------------------------
1977  */
1978 static herr_t
H5FD_mpio_truncate(H5FD_t * _file,hid_t H5_ATTR_UNUSED dxpl_id,hbool_t H5_ATTR_UNUSED closing)1979 H5FD_mpio_truncate(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, hbool_t H5_ATTR_UNUSED closing)
1980 {
1981     H5FD_mpio_t        *file = (H5FD_mpio_t*)_file;
1982     herr_t              ret_value = SUCCEED;
1983 
1984     FUNC_ENTER_NOAPI_NOINIT
1985 
1986 #ifdef H5FDmpio_DEBUG
1987     if(H5FD_mpio_Debug[(int)'t'])
1988         HDfprintf(stdout, "Entering %s\n", FUNC);
1989 #endif
1990     HDassert(file);
1991     HDassert(H5FD_MPIO == file->pub.driver_id);
1992 
1993     if(!H5F_addr_eq(file->eoa, file->last_eoa)) {
1994         int             mpi_code;       /* mpi return code */
1995         MPI_Offset      size;
1996         MPI_Offset      needed_eof;
1997 
1998         /* In principle, it is possible for the size returned by the
1999          * call to MPI_File_get_size() to depend on whether writes from
2000          * all proceeses have completed at the time process 0 makes the
2001          * call.
2002          *
2003          * In practice, most (all?) truncate calls will come after a barrier
2004          * and with no interviening writes to the file (with the possible
2005          * exception of sueprblock / superblock extension message updates).
2006          *
2007          * Check the "MPI file closing" flag in the API context to determine
2008          * if we can skip the barrier.
2009          */
2010         if(!H5CX_get_mpi_file_flushing())
2011             if(MPI_SUCCESS != (mpi_code = MPI_Barrier(file->comm)))
2012                 HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code)
2013 
2014         /* Only processor p0 will get the filesize and broadcast it. */
2015         /* (Note that throwing an error here will cause non-rank 0 processes
2016          *      to hang in following Bcast.  -QAK, 3/17/2018)
2017          */
2018         if(0 == file->mpi_rank)
2019             if(MPI_SUCCESS != (mpi_code = MPI_File_get_size(file->f, &size)))
2020                 HMPI_GOTO_ERROR(FAIL, "MPI_File_get_size failed", mpi_code)
2021 
2022         /* Broadcast file size */
2023         if(MPI_SUCCESS != (mpi_code = MPI_Bcast(&size, (int)sizeof(MPI_Offset), MPI_BYTE, 0, file->comm)))
2024             HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
2025 
2026         if(H5FD_mpi_haddr_to_MPIOff(file->eoa, &needed_eof) < 0)
2027             HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "cannot convert from haddr_t to MPI_Offset")
2028 
2029         /* eoa != eof.  Set eof to eoa */
2030         if(size != needed_eof) {
2031             /* Extend the file's size */
2032             if(MPI_SUCCESS != (mpi_code = MPI_File_set_size(file->f, needed_eof)))
2033                 HMPI_GOTO_ERROR(FAIL, "MPI_File_set_size failed", mpi_code)
2034 
2035             /* In general, we must wait until all processes have finished
2036              * the truncate before any process can continue, since it is
2037              * possible that a process would write at the end of the
2038              * file, and this write would be discarded by the truncate.
2039              *
2040              * While this is an issue for a user initiated flush, it may
2041              * not be an issue at file close.  If so, we may be able to
2042              * optimize out the following barrier in that case.
2043              */
2044             if(MPI_SUCCESS != (mpi_code = MPI_Barrier(file->comm)))
2045                 HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code)
2046         } /* end if */
2047 
2048         /* Update the 'last' eoa value */
2049         file->last_eoa = file->eoa;
2050     } /* end if */
2051 
2052 done:
2053 #ifdef H5FDmpio_DEBUG
2054     if(H5FD_mpio_Debug[(int)'t'])
2055         HDfprintf(stdout, "Leaving %s\n", FUNC);
2056 #endif
2057 
2058     FUNC_LEAVE_NOAPI(ret_value)
2059 } /* end H5FD_mpio_truncate() */
2060 
2061 
2062 /*-------------------------------------------------------------------------
2063  * Function:    H5FD_mpio_mpi_rank
2064  *
2065  * Purpose:    Returns the MPI rank for a process
2066  *
2067  * Return:    Success: non-negative
2068  *        Failure: negative
2069  *
2070  * Programmer:    Quincey Koziol
2071  *              Thursday, May 16, 2002
2072  *
2073  * Modifications:
2074  *
2075  *-------------------------------------------------------------------------
2076  */
2077 static int
H5FD_mpio_mpi_rank(const H5FD_t * _file)2078 H5FD_mpio_mpi_rank(const H5FD_t *_file)
2079 {
2080     const H5FD_mpio_t    *file = (const H5FD_mpio_t*)_file;
2081 
2082     FUNC_ENTER_NOAPI_NOINIT_NOERR
2083 
2084     HDassert(file);
2085     HDassert(H5FD_MPIO==file->pub.driver_id);
2086 
2087     FUNC_LEAVE_NOAPI(file->mpi_rank)
2088 } /* end H5FD_mpio_mpi_rank() */
2089 
2090 
2091 /*-------------------------------------------------------------------------
2092  * Function:    H5FD_mpio_mpi_size
2093  *
2094  * Purpose:    Returns the number of MPI processes
2095  *
2096  * Return:    Success: non-negative
2097  *        Failure: negative
2098  *
2099  * Programmer:    Quincey Koziol
2100  *              Thursday, May 16, 2002
2101  *
2102  * Modifications:
2103  *
2104  *-------------------------------------------------------------------------
2105  */
2106 static int
H5FD_mpio_mpi_size(const H5FD_t * _file)2107 H5FD_mpio_mpi_size(const H5FD_t *_file)
2108 {
2109     const H5FD_mpio_t    *file = (const H5FD_mpio_t*)_file;
2110 
2111     FUNC_ENTER_NOAPI_NOINIT_NOERR
2112 
2113     HDassert(file);
2114     HDassert(H5FD_MPIO==file->pub.driver_id);
2115 
2116     FUNC_LEAVE_NOAPI(file->mpi_size)
2117 } /* end H5FD_mpio_mpi_size() */
2118 
2119 
2120 /*-------------------------------------------------------------------------
2121  * Function:    H5FD_mpio_communicator
2122  *
2123  * Purpose:    Returns the MPI communicator for the file.
2124  *
2125  * Return:    Success:    The communicator
2126  *
2127  *        Failure:    NULL
2128  *
2129  * Programmer:    Robb Matzke
2130  *              Monday, August  9, 1999
2131  *
2132  * Modifications:
2133  *
2134  *-------------------------------------------------------------------------
2135  */
2136 static MPI_Comm
H5FD_mpio_communicator(const H5FD_t * _file)2137 H5FD_mpio_communicator(const H5FD_t *_file)
2138 {
2139     const H5FD_mpio_t    *file = (const H5FD_mpio_t*)_file;
2140 
2141     FUNC_ENTER_NOAPI_NOINIT_NOERR
2142 
2143     HDassert(file);
2144     HDassert(H5FD_MPIO==file->pub.driver_id);
2145 
2146     FUNC_LEAVE_NOAPI(file->comm)
2147 } /* end H5FD_mpio_communicator() */
2148 
2149 #endif /* H5_HAVE_PARALLEL */
2150 
2151