1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  * Copyright by The HDF Group.                                               *
3  * Copyright by the Board of Trustees of the University of Illinois.         *
4  * All rights reserved.                                                      *
5  *                                                                           *
6  * This file is part of HDF5.  The full HDF5 copyright notice, including     *
7  * terms governing use, modification, and redistribution, is contained in    *
8  * the COPYING file, which can be found at the root of the source code       *
9  * distribution tree, or in https://www.hdfgroup.org/licenses.               *
10  * If you do not have access to either file, you may request a copy from     *
11  * help@hdfgroup.org.                                                        *
12  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
13 
14 /*
15  * Programmer:  rky 980813
16  *
17  * Purpose:     Create MPI data types for HDF5 selections.
18  *
19  */
20 
21 /****************/
22 /* Module Setup */
23 /****************/
24 
25 #include "H5Smodule.h" /* This source code file is part of the H5S module */
26 
27 /***********/
28 /* Headers */
29 /***********/
30 #include "H5private.h"   /* Generic Functions			*/
31 #include "H5Dprivate.h"  /* Datasets				*/
32 #include "H5Eprivate.h"  /* Error handling		  	*/
33 #include "H5FLprivate.h" /* Free Lists				*/
34 #include "H5MMprivate.h" /* Memory management                    */
35 #include "H5Spkg.h"      /* Dataspaces 				*/
36 #include "H5VMprivate.h" /* Vector and array functions		*/
37 
38 #ifdef H5_HAVE_PARALLEL
39 
40 /****************/
41 /* Local Macros */
42 /****************/
43 #define H5S_MPIO_INITIAL_ALLOC_COUNT 256
44 
45 /*******************/
46 /* Local Variables */
47 /*******************/
48 
49 /******************/
50 /* Local Typedefs */
51 /******************/
52 
53 /* Node in linked list of MPI data types created during traversal of irregular hyperslab selection */
54 typedef struct H5S_mpio_mpitype_node_t {
55     MPI_Datatype                    type; /* MPI Datatype */
56     struct H5S_mpio_mpitype_node_t *next; /* Pointer to next node in list */
57 } H5S_mpio_mpitype_node_t;
58 
59 /* List to track MPI data types generated during traversal of irregular hyperslab selection */
60 typedef struct H5S_mpio_mpitype_list_t {
61     H5S_mpio_mpitype_node_t *head; /* Pointer to head of list */
62     H5S_mpio_mpitype_node_t *tail; /* Pointer to tail of list */
63 } H5S_mpio_mpitype_list_t;
64 
65 /********************/
66 /* Local Prototypes */
67 /********************/
68 static herr_t H5S__mpio_all_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count,
69                                  hbool_t *is_derived_type);
70 static herr_t H5S__mpio_none_type(MPI_Datatype *new_type, int *count, hbool_t *is_derived_type);
71 static herr_t H5S__mpio_create_point_datatype(size_t elmt_size, hsize_t num_points, MPI_Aint *disp,
72                                               MPI_Datatype *new_type);
73 static herr_t H5S__mpio_point_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count,
74                                    hbool_t *is_derived_type, hbool_t do_permute, hsize_t **permute_map,
75                                    hbool_t *is_permuted);
76 static herr_t H5S__mpio_permute_type(const H5S_t *space, size_t elmt_size, hsize_t **permute_map,
77                                      MPI_Datatype *new_type, int *count, hbool_t *is_derived_type);
78 static herr_t H5S__mpio_reg_hyper_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type,
79                                        int *count, hbool_t *is_derived_type);
80 static herr_t H5S__mpio_span_hyper_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type,
81                                         int *count, hbool_t *is_derived_type);
82 static herr_t H5S__release_datatype(H5S_mpio_mpitype_list_t *type_list);
83 static herr_t H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down, size_t elmt_size,
84                                    const MPI_Datatype *elmt_type, MPI_Datatype *span_type,
85                                    H5S_mpio_mpitype_list_t *type_list, unsigned op_info_i, uint64_t op_gen);
86 
87 /*****************************/
88 /* Library Private Variables */
89 /*****************************/
90 
91 /*********************/
92 /* Package Variables */
93 /*********************/
94 
95 /* Declare a free list to manage the H5S_mpio_mpitype_node_t struct */
96 H5FL_DEFINE_STATIC(H5S_mpio_mpitype_node_t);
97 
98 /*-------------------------------------------------------------------------
99  * Function:	H5S__mpio_all_type
100  *
101  * Purpose:	Translate an HDF5 "all" selection into an MPI type.
102  *
103  * Return:	Non-negative on success, negative on failure.
104  *
105  * Outputs:	*new_type	  the MPI type corresponding to the selection
106  *		*count		  how many objects of the new_type in selection
107  *				  (useful if this is the buffer type for xfer)
108  *		*is_derived_type  0 if MPI primitive type, 1 if derived
109  *
110  * Programmer:	rky 980813
111  *
112  *-------------------------------------------------------------------------
113  */
114 static herr_t
H5S__mpio_all_type(const H5S_t * space,size_t elmt_size,MPI_Datatype * new_type,int * count,hbool_t * is_derived_type)115 H5S__mpio_all_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count,
116                    hbool_t *is_derived_type)
117 {
118     hsize_t  total_bytes;
119     hssize_t snelmts;             /* Total number of elmts	(signed) */
120     hsize_t  nelmts;              /* Total number of elmts	*/
121     hsize_t  bigio_count;         /* Transition point to create derived type */
122     herr_t   ret_value = SUCCEED; /* Return value */
123 
124     FUNC_ENTER_STATIC
125 
126     /* Check args */
127     HDassert(space);
128 
129     /* Just treat the entire extent as a block of bytes */
130     if ((snelmts = (hssize_t)H5S_GET_EXTENT_NPOINTS(space)) < 0)
131         HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "src dataspace has invalid selection")
132     H5_CHECKED_ASSIGN(nelmts, hsize_t, snelmts, hssize_t);
133 
134     total_bytes = (hsize_t)elmt_size * nelmts;
135     bigio_count = H5_mpi_get_bigio_count();
136 
137     /* Verify that the size can be expressed as a 32 bit integer */
138     if (bigio_count >= total_bytes) {
139         /* fill in the return values */
140         *new_type = MPI_BYTE;
141         H5_CHECKED_ASSIGN(*count, int, total_bytes, hsize_t);
142         *is_derived_type = FALSE;
143     }
144     else {
145         /* Create a LARGE derived datatype for this transfer */
146         if (H5_mpio_create_large_type(total_bytes, 0, MPI_BYTE, new_type) < 0)
147             HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
148                         "couldn't create a large datatype from the all selection")
149         *count           = 1;
150         *is_derived_type = TRUE;
151     }
152 
153 done:
154     FUNC_LEAVE_NOAPI(ret_value)
155 } /* H5S__mpio_all_type() */
156 
157 /*-------------------------------------------------------------------------
158  * Function:	H5S__mpio_none_type
159  *
160  * Purpose:	Translate an HDF5 "none" selection into an MPI type.
161  *
162  * Return:	Non-negative on success, negative on failure.
163  *
164  * Outputs:	*new_type	  the MPI type corresponding to the selection
165  *		*count		  how many objects of the new_type in selection
166  *				  (useful if this is the buffer type for xfer)
167  *		*is_derived_type  0 if MPI primitive type, 1 if derived
168  *
169  * Programmer:	Quincey Koziol, October 29, 2002
170  *
171  *-------------------------------------------------------------------------
172  */
173 static herr_t
H5S__mpio_none_type(MPI_Datatype * new_type,int * count,hbool_t * is_derived_type)174 H5S__mpio_none_type(MPI_Datatype *new_type, int *count, hbool_t *is_derived_type)
175 {
176     FUNC_ENTER_STATIC_NOERR
177 
178     /* fill in the return values */
179     *new_type        = MPI_BYTE;
180     *count           = 0;
181     *is_derived_type = FALSE;
182 
183     FUNC_LEAVE_NOAPI(SUCCEED)
184 } /* H5S__mpio_none_type() */
185 
186 /*-------------------------------------------------------------------------
187  * Function:	H5S__mpio_create_point_datatype
188  *
189  * Purpose:	Create a derived datatype for point selections.
190  *
191  * Return:	Non-negative on success, negative on failure.
192  *
193  * Outputs:	*new_type	  the MPI type corresponding to the selection
194  *
195  * Programmer:	Mohamad Chaarawi
196  *
197  *-------------------------------------------------------------------------
198  */
199 static herr_t
H5S__mpio_create_point_datatype(size_t elmt_size,hsize_t num_points,MPI_Aint * disp,MPI_Datatype * new_type)200 H5S__mpio_create_point_datatype(size_t elmt_size, hsize_t num_points, MPI_Aint *disp, MPI_Datatype *new_type)
201 {
202     MPI_Datatype  elmt_type;                 /* MPI datatype for individual element */
203     hbool_t       elmt_type_created = FALSE; /* Whether the element MPI datatype was created */
204     int *         inner_blocks      = NULL;  /* Arrays for MPI datatypes when "large" datatype needed */
205     MPI_Aint *    inner_disps       = NULL;
206     MPI_Datatype *inner_types       = NULL;
207 #if MPI_VERSION < 3
208     int *   blocks = NULL; /* Array of block sizes for MPI hindexed create call */
209     hsize_t u;             /* Local index variable */
210 #endif
211     hsize_t bigio_count;         /* Transition point to create derived type */
212     int     mpi_code;            /* MPI error code */
213     herr_t  ret_value = SUCCEED; /* Return value */
214 
215     FUNC_ENTER_STATIC
216 
217     /* Create an MPI datatype for an element */
218     if (MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &elmt_type)))
219         HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
220     elmt_type_created = TRUE;
221 
222     bigio_count = H5_mpi_get_bigio_count();
223 
224     /* Check whether standard or BIGIO processing will be employeed */
225     if (bigio_count >= num_points) {
226 #if MPI_VERSION >= 3
227         /* Create an MPI datatype for the whole point selection */
228         if (MPI_SUCCESS !=
229             (mpi_code = MPI_Type_create_hindexed_block((int)num_points, 1, disp, elmt_type, new_type)))
230             HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_indexed_block failed", mpi_code)
231 #else
232         /* Allocate block sizes for MPI datatype call */
233         if (NULL == (blocks = (int *)H5MM_malloc(sizeof(int) * num_points)))
234             HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks")
235 
236         for (u = 0; u < num_points; u++)
237             blocks[u] = 1;
238 
239         /* Create an MPI datatype for the whole point selection */
240         if (MPI_SUCCESS !=
241             (mpi_code = MPI_Type_create_hindexed((int)num_points, blocks, disp, elmt_type, new_type)))
242             HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
243 #endif
244 
245         /* Commit MPI datatype for later use */
246         if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type)))
247             HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
248     }
249     else {
250         /* use LARGE_DATATYPE::
251          * We'll create an hindexed_block type for every 2G point count and then combine
252          * those and any remaining points into a single large datatype.
253          */
254         int     total_types, i;
255         int     remaining_points;
256         int     num_big_types;
257         hsize_t leftover;
258 
259         /* Calculate how many Big MPI datatypes are needed to represent the buffer */
260         num_big_types = (int)(num_points / bigio_count);
261 
262         leftover = (hsize_t)num_points - (hsize_t)num_big_types * (hsize_t)bigio_count;
263         H5_CHECKED_ASSIGN(remaining_points, int, leftover, hsize_t);
264 
265         total_types = (int)(remaining_points) ? (num_big_types + 1) : num_big_types;
266 
267         /* Allocate array if MPI derived types needed */
268         if (NULL == (inner_types = (MPI_Datatype *)H5MM_malloc((sizeof(MPI_Datatype) * (size_t)total_types))))
269             HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks")
270 
271         if (NULL == (inner_blocks = (int *)H5MM_malloc(sizeof(int) * (size_t)total_types)))
272             HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks")
273 
274         if (NULL == (inner_disps = (MPI_Aint *)H5MM_malloc(sizeof(MPI_Aint) * (size_t)total_types)))
275             HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks")
276 
277 #if MPI_VERSION < 3
278         /* Allocate block sizes for MPI datatype call */
279         if (NULL == (blocks = (int *)H5MM_malloc(sizeof(int) * bigio_count)))
280             HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks")
281 
282         for (u = 0; u < bigio_count; u++)
283             blocks[u] = 1;
284 #endif
285 
286         for (i = 0; i < num_big_types; i++) {
287 #if MPI_VERSION >= 3
288             if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed_block((int)bigio_count, 1,
289                                                                           &disp[(hsize_t)i * bigio_count],
290                                                                           elmt_type, &inner_types[i])))
291                 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed_block failed", mpi_code);
292 #else
293             if (MPI_SUCCESS !=
294                 (mpi_code = MPI_Type_create_hindexed((int)bigio_count, blocks, &disp[i * bigio_count],
295                                                      elmt_type, &inner_types[i])))
296                 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
297 #endif
298             inner_blocks[i] = 1;
299             inner_disps[i]  = 0;
300         } /* end for*/
301 
302         if (remaining_points) {
303 #if MPI_VERSION >= 3
304             if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed_block(
305                                     remaining_points, 1, &disp[(hsize_t)num_big_types * bigio_count],
306                                     elmt_type, &inner_types[num_big_types])))
307                 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed_block failed", mpi_code);
308 #else
309             if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)remaining_points, blocks,
310                                                                     &disp[num_big_types * bigio_count],
311                                                                     elmt_type, &inner_types[num_big_types])))
312                 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
313 #endif
314             inner_blocks[num_big_types] = 1;
315             inner_disps[num_big_types]  = 0;
316         }
317 
318         if (MPI_SUCCESS != (mpi_code = MPI_Type_create_struct(total_types, inner_blocks, inner_disps,
319                                                               inner_types, new_type)))
320             HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct", mpi_code);
321 
322         for (i = 0; i < total_types; i++)
323             MPI_Type_free(&inner_types[i]);
324 
325         /* Commit MPI datatype for later use */
326         if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type)))
327             HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
328     } /* end else */
329 
330 done:
331     if (elmt_type_created)
332         MPI_Type_free(&elmt_type);
333 #if MPI_VERSION < 3
334     if (blocks)
335         H5MM_free(blocks);
336 #endif
337     if (inner_types)
338         H5MM_free(inner_types);
339     if (inner_blocks)
340         H5MM_free(inner_blocks);
341     if (inner_disps)
342         H5MM_free(inner_disps);
343 
344     FUNC_LEAVE_NOAPI(ret_value)
345 } /* H5S__mpio_create_point_datatype() */
346 
347 /*-------------------------------------------------------------------------
348  * Function:	H5S__mpio_point_type
349  *
350  * Purpose:	Translate an HDF5 "point" selection into an MPI type.
351  *              Create a permutation array to handle out-of-order point selections.
352  *
353  * Return:	Non-negative on success, negative on failure.
354  *
355  * Outputs:	*new_type	  the MPI type corresponding to the selection
356  *		*count		  how many objects of the new_type in selection
357  *				  (useful if this is the buffer type for xfer)
358  *		*is_derived_type  0 if MPI primitive type, 1 if derived
359  *              *permute_map      the permutation of the displacements to create
360  *                                the MPI_Datatype
361  *              *is_permuted      0 if the displacements are permuted, 1 if not
362  *
363  * Programmer:	Mohamad Chaarawi
364  *
365  *-------------------------------------------------------------------------
366  */
367 static herr_t
H5S__mpio_point_type(const H5S_t * space,size_t elmt_size,MPI_Datatype * new_type,int * count,hbool_t * is_derived_type,hbool_t do_permute,hsize_t ** permute,hbool_t * is_permuted)368 H5S__mpio_point_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count,
369                      hbool_t *is_derived_type, hbool_t do_permute, hsize_t **permute, hbool_t *is_permuted)
370 {
371     MPI_Aint *      disp = NULL;         /* Datatype displacement for each point*/
372     H5S_pnt_node_t *curr = NULL;         /* Current point being operated on in from the selection */
373     hssize_t        snum_points;         /* Signed number of elements in selection */
374     hsize_t         num_points;          /* Sumber of points in the selection */
375     hsize_t         u;                   /* Local index variable */
376     herr_t          ret_value = SUCCEED; /* Return value */
377 
378     FUNC_ENTER_STATIC
379 
380     /* Check args */
381     HDassert(space);
382 
383     /* Get the total number of points selected */
384     if ((snum_points = (hssize_t)H5S_GET_SELECT_NPOINTS(space)) < 0)
385         HGOTO_ERROR(H5E_DATASPACE, H5E_CANTCOUNT, FAIL, "can't get number of elements selected")
386     num_points = (hsize_t)snum_points;
387 
388     /* Allocate array for element displacements */
389     if (NULL == (disp = (MPI_Aint *)H5MM_malloc(sizeof(MPI_Aint) * num_points)))
390         HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of displacements")
391 
392     /* Allocate array for element permutation - returned to caller */
393     if (do_permute)
394         if (NULL == (*permute = (hsize_t *)H5MM_malloc(sizeof(hsize_t) * num_points)))
395             HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate permutation array")
396 
397     /* Iterate through list of elements */
398     curr = space->select.sel_info.pnt_lst->head;
399     for (u = 0; u < num_points; u++) {
400         /* Calculate the displacement of the current point */
401         hsize_t disp_tmp = H5VM_array_offset(space->extent.rank, space->extent.size, curr->pnt);
402         if (disp_tmp > LONG_MAX) /* Maximum value of type long */
403             HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "disp overflow")
404         disp[u] = (MPI_Aint)disp_tmp;
405         disp[u] *= (MPI_Aint)elmt_size;
406 
407         /* This is a File Space used to set the file view, so adjust the displacements
408          * to have them monotonically non-decreasing.
409          * Generate the permutation array by indicating at each point being selected,
410          * the position it will shifted in the new displacement. Example:
411          * Suppose 4 points with corresponding are selected
412          * Pt 1: disp=6 ; Pt 2: disp=3 ; Pt 3: disp=0 ; Pt 4: disp=4
413          * The permute map to sort the displacements in order will be:
414          * point 1: map[0] = L, indicating that this point is not moved (1st point selected)
415          * point 2: map[1] = 0, indicating that this point is moved to the first position,
416          *                      since disp_pt1(6) > disp_pt2(3)
417          * point 3: map[2] = 0, move to position 0, bec it has the lowest disp between
418          *                      the points selected so far.
419          * point 4: map[3] = 2, move the 2nd position since point 1 has a higher disp,
420          *                      but points 2 and 3 have lower displacements.
421          */
422         if (do_permute) {
423             if (u > 0 && disp[u] < disp[u - 1]) {
424                 hsize_t s = 0, l = u, m = u / 2;
425 
426                 *is_permuted = TRUE;
427                 do {
428                     if (disp[u] > disp[m])
429                         s = m + 1;
430                     else if (disp[u] < disp[m])
431                         l = m;
432                     else
433                         break;
434                     m = s + ((l - s) / 2);
435                 } while (s < l);
436 
437                 if (m < u) {
438                     MPI_Aint temp;
439 
440                     temp = disp[u];
441                     HDmemmove(disp + m + 1, disp + m, (u - m) * sizeof(MPI_Aint));
442                     disp[m] = temp;
443                 } /* end if */
444                 (*permute)[u] = m;
445             } /* end if */
446             else
447                 (*permute)[u] = num_points;
448         } /* end if */
449         /* this is a memory space, and no permutation is necessary to create
450            the derived datatype */
451         else {
452             ; /* do nothing */
453         }     /* end else */
454 
455         /* get the next point */
456         curr = curr->next;
457     } /* end for */
458 
459     /* Create the MPI datatype for the set of element displacements */
460     if (H5S__mpio_create_point_datatype(elmt_size, num_points, disp, new_type) < 0)
461         HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create an MPI Datatype from point selection")
462 
463     /* Set values about MPI datatype created */
464     *count           = 1;
465     *is_derived_type = TRUE;
466 
467 done:
468     if (NULL != disp)
469         H5MM_free(disp);
470 
471     /* Release the permutation buffer, if it wasn't used */
472     if (!(*is_permuted) && (*permute)) {
473         H5MM_free(*permute);
474         *permute = NULL;
475     } /* end if */
476 
477     FUNC_LEAVE_NOAPI(ret_value)
478 } /* H5S__mpio_point_type() */
479 
480 /*-------------------------------------------------------------------------
481  * Function:	H5S__mpio_permute_type
482  *
483  * Purpose:	Translate an HDF5 "all/hyper/point" selection into an MPI type,
484  *              while applying the permutation map. This function is called if
485  *              the file space selection is permuted due to out-of-order point
486  *              selection and so the memory datatype has to be permuted using the
487  *              permutation map created by the file selection.
488  *
489  * Note:	This routine is called from H5S_mpio_space_type(), which is
490  *              called first for the file dataspace and creates
491  *
492  * Return:	Non-negative on success, negative on failure.
493  *
494  * Outputs:	*new_type	  the MPI type corresponding to the selection
495  *		*count		  how many objects of the new_type in selection
496  *				  (useful if this is the buffer type for xfer)
497  *		*is_derived_type  0 if MPI primitive type, 1 if derived
498  *
499  * Programmer:	Mohamad Chaarawi
500  *
501  *-------------------------------------------------------------------------
502  */
503 static herr_t
H5S__mpio_permute_type(const H5S_t * space,size_t elmt_size,hsize_t ** permute,MPI_Datatype * new_type,int * count,hbool_t * is_derived_type)504 H5S__mpio_permute_type(const H5S_t *space, size_t elmt_size, hsize_t **permute, MPI_Datatype *new_type,
505                        int *count, hbool_t *is_derived_type)
506 {
507     MPI_Aint *     disp = NULL;           /* Datatype displacement for each point*/
508     H5S_sel_iter_t sel_iter;              /* Selection iteration info */
509     hbool_t        sel_iter_init = FALSE; /* Selection iteration info has been initialized */
510     hssize_t       snum_points;           /* Signed number of elements in selection */
511     hsize_t        num_points;            /* Number of points in the selection */
512     size_t         max_elem;              /* Maximum number of elements allowed in sequences */
513     hsize_t        u;                     /* Local index variable */
514     herr_t         ret_value = SUCCEED;   /* Return value */
515 
516     FUNC_ENTER_STATIC
517 
518     /* Check args */
519     HDassert(space);
520 
521     /* Get the total number of points selected */
522     if ((snum_points = (hssize_t)H5S_GET_SELECT_NPOINTS(space)) < 0)
523         HGOTO_ERROR(H5E_DATASPACE, H5E_CANTCOUNT, FAIL, "can't get number of elements selected")
524     num_points = (hsize_t)snum_points;
525 
526     /* Allocate array to store point displacements */
527     if (NULL == (disp = (MPI_Aint *)H5MM_malloc(sizeof(MPI_Aint) * num_points)))
528         HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of displacements")
529 
530     /* Initialize selection iterator */
531     if (H5S_select_iter_init(&sel_iter, space, elmt_size, 0) < 0)
532         HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to initialize selection iterator")
533     sel_iter_init = TRUE; /* Selection iteration info has been initialized */
534 
535     /* Set the number of elements to iterate over */
536     H5_CHECKED_ASSIGN(max_elem, size_t, num_points, hsize_t);
537 
538     /* Loop, while elements left in selection */
539     u = 0;
540     while (max_elem > 0) {
541         hsize_t off[H5D_IO_VECTOR_SIZE]; /* Array to store sequence offsets */
542         size_t  len[H5D_IO_VECTOR_SIZE]; /* Array to store sequence lengths */
543         size_t  nelem;                   /* Number of elements used in sequences */
544         size_t  nseq;                    /* Number of sequences generated */
545         size_t  curr_seq;                /* Current sequence being worked on */
546 
547         /* Get the sequences of bytes */
548         if (H5S_SELECT_ITER_GET_SEQ_LIST(&sel_iter, (size_t)H5D_IO_VECTOR_SIZE, max_elem, &nseq, &nelem, off,
549                                          len) < 0)
550             HGOTO_ERROR(H5E_DATASPACE, H5E_UNSUPPORTED, FAIL, "sequence length generation failed")
551 
552         /* Loop, while sequences left to process */
553         for (curr_seq = 0; curr_seq < nseq; curr_seq++) {
554             hsize_t curr_off; /* Current offset within sequence */
555             size_t  curr_len; /* Length of bytes left to process in sequence */
556 
557             /* Get the current offset */
558             curr_off = off[curr_seq];
559 
560             /* Get the number of bytes in sequence */
561             curr_len = len[curr_seq];
562 
563             /* Loop, while bytes left in sequence */
564             while (curr_len > 0) {
565                 /* Set the displacement of the current point */
566                 if (curr_off > LONG_MAX)
567                     HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "curr_off overflow")
568                 disp[u] = (MPI_Aint)curr_off;
569 
570                 /* This is a memory displacement, so for each point selected,
571                  * apply the map that was generated by the file selection */
572                 if ((*permute)[u] != num_points) {
573                     MPI_Aint temp = disp[u];
574 
575                     HDmemmove(disp + (*permute)[u] + 1, disp + (*permute)[u],
576                               (u - (*permute)[u]) * sizeof(MPI_Aint));
577                     disp[(*permute)[u]] = temp;
578                 } /* end if */
579 
580                 /* Advance to next element */
581                 u++;
582 
583                 /* Increment offset in dataspace */
584                 curr_off += elmt_size;
585 
586                 /* Decrement number of bytes left in sequence */
587                 curr_len -= elmt_size;
588             } /* end while */
589         }     /* end for */
590 
591         /* Decrement number of elements left to process */
592         max_elem -= nelem;
593     } /* end while */
594 
595     /* Create the MPI datatype for the set of element displacements */
596     if (H5S__mpio_create_point_datatype(elmt_size, num_points, disp, new_type) < 0)
597         HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create an MPI Datatype from point selection")
598 
599     /* Set values about MPI datatype created */
600     *count           = 1;
601     *is_derived_type = TRUE;
602 
603 done:
604     /* Release selection iterator */
605     if (sel_iter_init)
606         if (H5S_SELECT_ITER_RELEASE(&sel_iter) < 0)
607             HDONE_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "unable to release selection iterator")
608 
609     /* Free memory */
610     if (disp)
611         H5MM_free(disp);
612     if (*permute) {
613         H5MM_free(*permute);
614         *permute = NULL;
615     } /* end if */
616 
617     FUNC_LEAVE_NOAPI(ret_value)
618 } /* H5S__mpio_permute_type() */
619 
620 /*-------------------------------------------------------------------------
621  * Function:	H5S__mpio_reg_hyper_type
622  *
623  * Purpose:	Translate a regular HDF5 hyperslab selection into an MPI type.
624  *
625  * Return:	Non-negative on success, negative on failure.
626  *
627  * Outputs:	*new_type	  the MPI type corresponding to the selection
628  *		*count		  how many objects of the new_type in selection
629  *				  (useful if this is the buffer type for xfer)
630  *		*is_derived_type  0 if MPI primitive type, 1 if derived
631  *
632  * Programmer:	rky 980813
633  *
634  *-------------------------------------------------------------------------
635  */
636 static herr_t
H5S__mpio_reg_hyper_type(const H5S_t * space,size_t elmt_size,MPI_Datatype * new_type,int * count,hbool_t * is_derived_type)637 H5S__mpio_reg_hyper_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count,
638                          hbool_t *is_derived_type)
639 {
640     H5S_sel_iter_t sel_iter;              /* Selection iteration info */
641     hbool_t        sel_iter_init = FALSE; /* Selection iteration info has been initialized */
642 
643     struct dim { /* less hassle than malloc/free & ilk */
644         hssize_t start;
645         hsize_t  strid;
646         hsize_t  block;
647         hsize_t  xtent;
648         hsize_t  count;
649     } d[H5S_MAX_RANK];
650 
651     hsize_t          bigio_count; /* Transition point to create derived type */
652     hsize_t          offset[H5S_MAX_RANK];
653     hsize_t          max_xtent[H5S_MAX_RANK];
654     H5S_hyper_dim_t *diminfo; /* [rank] */
655     unsigned         rank;
656     MPI_Datatype     inner_type, outer_type;
657     MPI_Aint         extent_len, start_disp, new_extent;
658     MPI_Aint         lb;       /* Needed as an argument for MPI_Type_get_extent */
659     unsigned         u;        /* Local index variable */
660     int              i;        /* Local index variable */
661     int              mpi_code; /* MPI return code */
662     herr_t           ret_value = SUCCEED;
663 
664     FUNC_ENTER_STATIC
665 
666     /* Check args */
667     HDassert(space);
668     HDassert(sizeof(MPI_Aint) >= sizeof(elmt_size));
669 
670     bigio_count = H5_mpi_get_bigio_count();
671     /* Initialize selection iterator */
672     if (H5S_select_iter_init(&sel_iter, space, elmt_size, 0) < 0)
673         HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to initialize selection iterator")
674     sel_iter_init = TRUE; /* Selection iteration info has been initialized */
675 
676     /* Abbreviate args */
677     diminfo = sel_iter.u.hyp.diminfo;
678     HDassert(diminfo);
679 
680     /* Make a local copy of the dimension info so we can operate with them */
681 
682     /* Check if this is a "flattened" regular hyperslab selection */
683     if (sel_iter.u.hyp.iter_rank != 0 && sel_iter.u.hyp.iter_rank < space->extent.rank) {
684         /* Flattened selection */
685         rank = sel_iter.u.hyp.iter_rank;
686 #ifdef H5S_DEBUG
687         if (H5DEBUG(S))
688             HDfprintf(H5DEBUG(S), "%s: Flattened selection\n", FUNC);
689 #endif
690         for (u = 0; u < rank; ++u) {
691             H5_CHECK_OVERFLOW(diminfo[u].start, hsize_t, hssize_t)
692             d[u].start = (hssize_t)diminfo[u].start + sel_iter.u.hyp.sel_off[u];
693             d[u].strid = diminfo[u].stride;
694             d[u].block = diminfo[u].block;
695             d[u].count = diminfo[u].count;
696             d[u].xtent = sel_iter.u.hyp.size[u];
697 
698 #ifdef H5S_DEBUG
699             if (H5DEBUG(S)) {
700                 HDfprintf(H5DEBUG(S),
701                           "%s: start=%" PRIdHSIZE "  stride=%" PRIuHSIZE "  count=%" PRIuHSIZE
702                           "  block=%" PRIuHSIZE "  xtent=%" PRIuHSIZE,
703                           FUNC, d[u].start, d[u].strid, d[u].count, d[u].block, d[u].xtent);
704                 if (u == 0)
705                     HDfprintf(H5DEBUG(S), "  rank=%u\n", rank);
706                 else
707                     HDfprintf(H5DEBUG(S), "\n");
708             }
709 #endif
710 
711             /* Sanity check */
712             HDassert(d[u].block > 0);
713             HDassert(d[u].count > 0);
714             HDassert(d[u].xtent > 0);
715         } /* end for */
716     }     /* end if */
717     else {
718         /* Non-flattened selection */
719         rank = space->extent.rank;
720 #ifdef H5S_DEBUG
721         if (H5DEBUG(S))
722             HDfprintf(H5DEBUG(S), "%s: Non-flattened selection\n", FUNC);
723 #endif
724         for (u = 0; u < rank; ++u) {
725             H5_CHECK_OVERFLOW(diminfo[u].start, hsize_t, hssize_t)
726             d[u].start = (hssize_t)diminfo[u].start + space->select.offset[u];
727             d[u].strid = diminfo[u].stride;
728             d[u].block = diminfo[u].block;
729             d[u].count = diminfo[u].count;
730             d[u].xtent = space->extent.size[u];
731 
732 #ifdef H5S_DEBUG
733             if (H5DEBUG(S)) {
734                 HDfprintf(H5DEBUG(S),
735                           "%s: start=%" PRIdHSIZE "  stride=%" PRIuHSIZE "  count=%" PRIuHSIZE
736                           "  block=%" PRIuHSIZE "  xtent=%" PRIuHSIZE,
737                           FUNC, d[u].start, d[u].strid, d[u].count, d[u].block, d[u].xtent);
738                 if (u == 0)
739                     HDfprintf(H5DEBUG(S), "  rank=%u\n", rank);
740                 else
741                     HDfprintf(H5DEBUG(S), "\n");
742             }
743 #endif
744 
745             /* Sanity check */
746             HDassert(d[u].block > 0);
747             HDassert(d[u].count > 0);
748             HDassert(d[u].xtent > 0);
749         } /* end for */
750     }     /* end else */
751 
752     /**********************************************************************
753         Compute array "offset[rank]" which gives the offsets for a multi-
754         dimensional array with dimensions "d[i].xtent" (i=0,1,...,rank-1).
755     **********************************************************************/
756     offset[rank - 1]    = 1;
757     max_xtent[rank - 1] = d[rank - 1].xtent;
758 #ifdef H5S_DEBUG
759     if (H5DEBUG(S)) {
760         i = ((int)rank) - 1;
761         HDfprintf(H5DEBUG(S), " offset[%2d]=%" PRIuHSIZE "; max_xtent[%2d]=%" PRIuHSIZE "\n", i, offset[i], i,
762                   max_xtent[i]);
763     }
764 #endif
765     for (i = ((int)rank) - 2; i >= 0; --i) {
766         offset[i]    = offset[i + 1] * d[i + 1].xtent;
767         max_xtent[i] = max_xtent[i + 1] * d[i].xtent;
768 #ifdef H5S_DEBUG
769         if (H5DEBUG(S))
770             HDfprintf(H5DEBUG(S), " offset[%2d]=%" PRIuHSIZE "; max_xtent[%2d]=%" PRIuHSIZE "\n", i,
771                       offset[i], i, max_xtent[i]);
772 #endif
773     } /* end for */
774 
775     /*  Create a type covering the selected hyperslab.
776      *  Multidimensional dataspaces are stored in row-major order.
777      *  The type is built from the inside out, going from the
778      *  fastest-changing (i.e., inner) dimension * to the slowest (outer).
779      */
780 
781 /*******************************************************
782  *  Construct contig type for inner contig dims:
783  *******************************************************/
784 #ifdef H5S_DEBUG
785     if (H5DEBUG(S)) {
786         HDfprintf(H5DEBUG(S), "%s: Making contig type %zu MPI_BYTEs\n", FUNC, elmt_size);
787         for (i = ((int)rank) - 1; i >= 0; --i)
788             HDfprintf(H5DEBUG(S), "d[%d].xtent=%" PRIuHSIZE "\n", i, d[i].xtent);
789     }
790 #endif
791 
792     /* LARGE_DATATYPE::
793      * Check if the number of elements to form the inner type fits into a 32 bit integer.
794      * If yes then just create the innertype with MPI_Type_contiguous.
795      * Otherwise create a compound datatype by iterating as many times as needed
796      * for the innertype to be created.
797      */
798     if (bigio_count >= elmt_size) {
799         /* Use a single MPI datatype that has a 32 bit size */
800         if (MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &inner_type)))
801             HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
802     }
803     else
804         /* Create the compound datatype for this operation (> 2GB) */
805         if (H5_mpio_create_large_type(elmt_size, 0, MPI_BYTE, &inner_type) < 0)
806         HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
807                     "couldn't create a large inner datatype in hyper selection")
808 
809     /*******************************************************
810      *  Construct the type by walking the hyperslab dims
811      *  from the inside out:
812      *******************************************************/
813     for (i = ((int)rank) - 1; i >= 0; --i) {
814 #ifdef H5S_DEBUG
815         if (H5DEBUG(S))
816             HDfprintf(H5DEBUG(S),
817                       "%s: Dimension i=%d \n"
818                       "start=%" PRIdHSIZE " count=%" PRIuHSIZE " block=%" PRIuHSIZE " stride=%" PRIuHSIZE
819                       ", xtent=%" PRIuHSIZE " max_xtent=%" PRIuHSIZE "\n",
820                       FUNC, i, d[i].start, d[i].count, d[i].block, d[i].strid, d[i].xtent, max_xtent[i]);
821 #endif
822 
823 #ifdef H5S_DEBUG
824         if (H5DEBUG(S))
825             HDfprintf(H5DEBUG(S), "%s: i=%d  Making vector-type \n", FUNC, i);
826 #endif
827         /****************************************
828          * Build vector type of the selection.
829          ****************************************/
830         if (bigio_count >= d[i].count && bigio_count >= d[i].block && bigio_count >= d[i].strid) {
831             /* All the parameters fit into 32 bit integers so create the vector type normally */
832             mpi_code = MPI_Type_vector((int)(d[i].count), /* count */
833                                        (int)(d[i].block), /* blocklength */
834                                        (int)(d[i].strid), /* stride */
835                                        inner_type,        /* old type */
836                                        &outer_type);      /* new type */
837 
838             MPI_Type_free(&inner_type);
839             if (mpi_code != MPI_SUCCESS)
840                 HMPI_GOTO_ERROR(FAIL, "couldn't create MPI vector type", mpi_code)
841         }
842         else {
843             /* Things get a bit more complicated and require LARGE_DATATYPE processing
844              * There are two MPI datatypes that need to be created:
845              *   1) an internal contiguous block; and
846              *   2) a collection of elements where an element is a contiguous block(1).
847              * Remember that the input arguments to the MPI-IO functions use integer
848              * values to represent element counts.  We ARE allowed however, in the
849              * more recent MPI implementations to use constructed datatypes whereby
850              * the total number of bytes in a transfer could be :
851              *   (2GB-1)number_of_blocks * the_datatype_extent.
852              */
853 
854             MPI_Aint     stride_in_bytes, inner_extent;
855             MPI_Datatype block_type;
856 
857             /* Create a contiguous datatype inner_type x number of BLOCKS.
858              * Again we need to check that the number of BLOCKS can fit into
859              * a 32 bit integer */
860             if (bigio_count < d[i].block) {
861                 if (H5_mpio_create_large_type(d[i].block, 0, inner_type, &block_type) < 0)
862                     HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
863                                 "couldn't create a large block datatype in hyper selection")
864             }
865             else if (MPI_SUCCESS !=
866                      (mpi_code = MPI_Type_contiguous((int)d[i].block, inner_type, &block_type)))
867                 HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
868 
869             /* As of version 4.0, OpenMPI now turns off MPI-1 API calls by default,
870              * so we're using the MPI-2 version even though we don't need the lb
871              * value.
872              */
873             {
874                 MPI_Aint unused_lb_arg;
875                 MPI_Type_get_extent(inner_type, &unused_lb_arg, &inner_extent);
876             }
877             stride_in_bytes = inner_extent * (MPI_Aint)d[i].strid;
878 
879             /* If the element count is larger than what a 32 bit integer can hold,
880              * we call the large type creation function to handle that
881              */
882             if (bigio_count < d[i].count) {
883                 if (H5_mpio_create_large_type(d[i].count, stride_in_bytes, block_type, &outer_type) < 0)
884                     HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
885                                 "couldn't create a large outer datatype in hyper selection")
886             }
887             /* otherwise a regular create_hvector will do */
888             else if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector((int)d[i].count, /* count */
889                                                                         1,               /* blocklength */
890                                                                         stride_in_bytes, /* stride in bytes*/
891                                                                         block_type,      /* old type */
892                                                                         &outer_type)))   /* new type */
893                 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
894 
895             MPI_Type_free(&block_type);
896             MPI_Type_free(&inner_type);
897         } /* end else */
898 
899         /****************************************
900          *  Then build the dimension type as (start, vector type, xtent).
901          ****************************************/
902 
903         /* Calculate start and extent values of this dimension */
904         /* Check if value overflow to cast to type MPI_Aint */
905         if (d[i].start > LONG_MAX || offset[i] > LONG_MAX || elmt_size > LONG_MAX)
906             HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "result overflow")
907         start_disp = (MPI_Aint)d[i].start * (MPI_Aint)offset[i] * (MPI_Aint)elmt_size;
908 
909         if (max_xtent[i] > LONG_MAX)
910             HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "max_xtent overflow")
911         new_extent = (MPI_Aint)elmt_size * (MPI_Aint)max_xtent[i];
912         if (MPI_SUCCESS != (mpi_code = MPI_Type_get_extent(outer_type, &lb, &extent_len)))
913             HMPI_GOTO_ERROR(FAIL, "MPI_Type_get_extent failed", mpi_code)
914 
915         /*************************************************
916          *  Restructure this datatype ("outer_type")
917          *  so that it still starts at 0, but its extent
918          *  is the full extent in this dimension.
919          *************************************************/
920         if (start_disp > 0 || extent_len < new_extent) {
921             MPI_Datatype interm_type;
922             int          block_len = 1;
923 
924             HDassert(0 == lb);
925 
926             mpi_code = MPI_Type_create_hindexed(1, &block_len, &start_disp, outer_type, &interm_type);
927             MPI_Type_free(&outer_type);
928             if (mpi_code != MPI_SUCCESS)
929                 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
930 
931             mpi_code = MPI_Type_create_resized(interm_type, lb, new_extent, &inner_type);
932             MPI_Type_free(&interm_type);
933             if (mpi_code != MPI_SUCCESS)
934                 HMPI_GOTO_ERROR(FAIL, "couldn't resize MPI vector type", mpi_code)
935         } /* end if */
936         else
937             inner_type = outer_type;
938     } /* end for */
939       /******************************************
940        *  End of loop, walking through dimensions.
941        *******************************************/
942 
943     /* At this point inner_type is actually the outermost type, even for 0-trip loop */
944     *new_type = inner_type;
945     if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type)))
946         HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
947 
948     /* fill in the remaining return values */
949     *count           = 1; /* only have to move one of these suckers! */
950     *is_derived_type = TRUE;
951 
952 done:
953     /* Release selection iterator */
954     if (sel_iter_init)
955         if (H5S_SELECT_ITER_RELEASE(&sel_iter) < 0)
956             HDONE_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "unable to release selection iterator")
957 
958 #ifdef H5S_DEBUG
959     if (H5DEBUG(S))
960         HDfprintf(H5DEBUG(S), "Leave %s, count=%d  is_derived_type=%s\n", FUNC, *count,
961                   (*is_derived_type) ? "TRUE" : "FALSE");
962 #endif
963     FUNC_LEAVE_NOAPI(ret_value)
964 } /* end H5S__mpio_reg_hyper_type() */
965 
966 /*-------------------------------------------------------------------------
967  * Function:	H5S__mpio_span_hyper_type
968  *
969  * Purpose:	Translate an HDF5 irregular hyperslab selection into an
970                 MPI type.
971  *
972  * Return:	Non-negative on success, negative on failure.
973  *
974  * Outputs:	*new_type	  the MPI type corresponding to the selection
975  *		*count		  how many objects of the new_type in selection
976  *				  (useful if this is the buffer type for xfer)
977  *		*is_derived_type  0 if MPI primitive type, 1 if derived
978  *
979  * Programmer:  kyang
980  *
981  *-------------------------------------------------------------------------
982  */
983 static herr_t
H5S__mpio_span_hyper_type(const H5S_t * space,size_t elmt_size,MPI_Datatype * new_type,int * count,hbool_t * is_derived_type)984 H5S__mpio_span_hyper_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count,
985                           hbool_t *is_derived_type)
986 {
987     H5S_mpio_mpitype_list_t type_list;                    /* List to track MPI data types created */
988     MPI_Datatype            elmt_type;                    /* MPI datatype for an element */
989     hbool_t                 elmt_type_is_derived = FALSE; /* Whether the element type has been created */
990     MPI_Datatype            span_type;                    /* MPI datatype for overall span tree */
991     hsize_t                 bigio_count;                  /* Transition point to create derived type */
992     hsize_t                 down[H5S_MAX_RANK];           /* 'down' sizes for each dimension */
993     uint64_t                op_gen;                       /* Operation generation value */
994     int                     mpi_code;                     /* MPI return code */
995     herr_t                  ret_value = SUCCEED;          /* Return value */
996 
997     FUNC_ENTER_STATIC
998 
999     /* Check args */
1000     HDassert(space);
1001     HDassert(space->extent.size);
1002     HDassert(space->select.sel_info.hslab->span_lst);
1003     HDassert(space->select.sel_info.hslab->span_lst->head);
1004 
1005     bigio_count = H5_mpi_get_bigio_count();
1006     /* Create the base type for an element */
1007     if (bigio_count >= elmt_size) {
1008         if (MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &elmt_type)))
1009             HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
1010     }
1011     else if (H5_mpio_create_large_type(elmt_size, 0, MPI_BYTE, &elmt_type) < 0)
1012         HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
1013                     "couldn't create a large element datatype in span_hyper selection")
1014     elmt_type_is_derived = TRUE;
1015 
1016     /* Compute 'down' sizes for each dimension */
1017     H5VM_array_down(space->extent.rank, space->extent.size, down);
1018 
1019     /* Acquire an operation generation value for creating MPI datatypes */
1020     op_gen = H5S__hyper_get_op_gen();
1021 
1022     /* Obtain derived MPI data type */
1023     /* Always use op_info[0] since we own this op_info, so there can be no
1024      * simultaneous operations */
1025     type_list.head = type_list.tail = NULL;
1026     if (H5S__obtain_datatype(space->select.sel_info.hslab->span_lst, down, elmt_size, &elmt_type, &span_type,
1027                              &type_list, 0, op_gen) < 0)
1028         HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't obtain MPI derived data type")
1029     if (MPI_SUCCESS != (mpi_code = MPI_Type_dup(span_type, new_type)))
1030         HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
1031     if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type)))
1032         HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
1033 
1034     /* Release MPI data types generated during span tree traversal */
1035     if (H5S__release_datatype(&type_list) < 0)
1036         HGOTO_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "couldn't release MPI derived data type")
1037 
1038     /* fill in the remaining return values */
1039     *count           = 1;
1040     *is_derived_type = TRUE;
1041 
1042 done:
1043     /* Release resources */
1044     if (elmt_type_is_derived)
1045         if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&elmt_type)))
1046             HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
1047 
1048     FUNC_LEAVE_NOAPI(ret_value)
1049 } /* end H5S__mpio_span_hyper_type() */
1050 
1051 /*-------------------------------------------------------------------------
1052  * Function:	H5S__release_datatype
1053  *
1054  * Purpose:	Release the MPI derived datatypes for span-tree hyperslab selection
1055  *
1056  * Return:	Non-negative on success, negative on failure.
1057  *
1058  * Programmer:	Quincey Koziol, February 2, 2019
1059  *
1060  *-------------------------------------------------------------------------
1061  */
1062 static herr_t
H5S__release_datatype(H5S_mpio_mpitype_list_t * type_list)1063 H5S__release_datatype(H5S_mpio_mpitype_list_t *type_list)
1064 {
1065     H5S_mpio_mpitype_node_t *curr;                /* Pointer to head of list */
1066     herr_t                   ret_value = SUCCEED; /* Return value */
1067 
1068     FUNC_ENTER_STATIC
1069 
1070     /* Sanity check */
1071     HDassert(type_list);
1072 
1073     /* Iterate over the list, freeing the MPI data types */
1074     curr = type_list->head;
1075     while (curr) {
1076         H5S_mpio_mpitype_node_t *next;     /* Pointer to next node in list */
1077         int                      mpi_code; /* MPI return status code */
1078 
1079         /* Release the MPI data type for this span tree */
1080         if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&curr->type)))
1081             HMPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
1082 
1083         /* Get pointer to next node in list */
1084         next = curr->next;
1085 
1086         /* Free the current node */
1087         curr = H5FL_FREE(H5S_mpio_mpitype_node_t, curr);
1088 
1089         /* Advance to next node */
1090         curr = next;
1091     } /* end while */
1092 
1093 done:
1094     FUNC_LEAVE_NOAPI(ret_value)
1095 } /* end H5S__release_datatype() */
1096 
1097 /*-------------------------------------------------------------------------
1098  * Function:	H5S__obtain_datatype
1099  *
1100  * Purpose:	Obtain an MPI derived datatype for span-tree hyperslab selection
1101  *
1102  * Return:	Non-negative on success, negative on failure.
1103  *
1104  * Outputs:	*span_type	 the MPI type corresponding to the selection
1105  *
1106  * Programmer:  kyang
1107  *
1108  *-------------------------------------------------------------------------
1109  */
1110 static herr_t
H5S__obtain_datatype(H5S_hyper_span_info_t * spans,const hsize_t * down,size_t elmt_size,const MPI_Datatype * elmt_type,MPI_Datatype * span_type,H5S_mpio_mpitype_list_t * type_list,unsigned op_info_i,uint64_t op_gen)1111 H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down, size_t elmt_size,
1112                      const MPI_Datatype *elmt_type, MPI_Datatype *span_type,
1113                      H5S_mpio_mpitype_list_t *type_list, unsigned op_info_i, uint64_t op_gen)
1114 {
1115     H5S_hyper_span_t *span;                  /* Hyperslab span to iterate with */
1116     hsize_t           bigio_count;           /* Transition point to create derived type */
1117     size_t            alloc_count       = 0; /* Number of span tree nodes allocated at this level */
1118     size_t            outercount        = 0; /* Number of span tree nodes at this level */
1119     MPI_Datatype *    inner_type        = NULL;
1120     hbool_t           inner_types_freed = FALSE; /* Whether the inner_type MPI datatypes have been freed */
1121     int *             blocklen          = NULL;
1122     MPI_Aint *        disp              = NULL;
1123     size_t            u;                   /* Local index variable */
1124     int               mpi_code;            /* MPI return status code */
1125     herr_t            ret_value = SUCCEED; /* Return value */
1126 
1127     FUNC_ENTER_STATIC
1128 
1129     /* Sanity check */
1130     HDassert(spans);
1131     HDassert(type_list);
1132 
1133     bigio_count = H5_mpi_get_bigio_count();
1134     /* Check if we've visited this span tree before */
1135     if (spans->op_info[op_info_i].op_gen != op_gen) {
1136         H5S_mpio_mpitype_node_t *type_node; /* Pointer to new node in MPI data type list */
1137 
1138         /* Allocate the initial displacement & block length buffers */
1139         alloc_count = H5S_MPIO_INITIAL_ALLOC_COUNT;
1140         if (NULL == (disp = (MPI_Aint *)H5MM_malloc(alloc_count * sizeof(MPI_Aint))))
1141             HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of displacements")
1142         if (NULL == (blocklen = (int *)H5MM_malloc(alloc_count * sizeof(int))))
1143             HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of block lengths")
1144 
1145         /* If this is the fastest changing dimension, it is the base case for derived datatype. */
1146         span = spans->head;
1147         if (NULL == span->down) {
1148             hbool_t large_block = FALSE; /* Wether the block length is larger than 32 bit integer */
1149 
1150             outercount = 0;
1151             while (span) {
1152                 hsize_t nelmts; /* # of elements covered by current span */
1153 
1154                 /* Check if we need to increase the size of the buffers */
1155                 if (outercount >= alloc_count) {
1156                     MPI_Aint *tmp_disp;     /* Temporary pointer to new displacement buffer */
1157                     int *     tmp_blocklen; /* Temporary pointer to new block length buffer */
1158 
1159                     /* Double the allocation count */
1160                     alloc_count *= 2;
1161 
1162                     /* Re-allocate the buffers */
1163                     if (NULL == (tmp_disp = (MPI_Aint *)H5MM_realloc(disp, alloc_count * sizeof(MPI_Aint))))
1164                         HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
1165                                     "can't allocate array of displacements")
1166                     disp = tmp_disp;
1167                     if (NULL == (tmp_blocklen = (int *)H5MM_realloc(blocklen, alloc_count * sizeof(int))))
1168                         HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
1169                                     "can't allocate array of block lengths")
1170                     blocklen = tmp_blocklen;
1171                 } /* end if */
1172 
1173                 /* Compute the number of elements to attempt in this span */
1174                 nelmts = (span->high - span->low) + 1;
1175 
1176                 /* Store displacement & block length */
1177                 disp[outercount] = (MPI_Aint)elmt_size * (MPI_Aint)span->low;
1178                 H5_CHECK_OVERFLOW(nelmts, hsize_t, int)
1179                 blocklen[outercount] = (int)nelmts;
1180 
1181                 if (bigio_count < (hsize_t)blocklen[outercount])
1182                     large_block = TRUE; /* at least one block type is large, so set this flag to true */
1183 
1184                 span = span->next;
1185                 outercount++;
1186             } /* end while */
1187 
1188             /* Everything fits into integers, so cast them and use hindexed */
1189             if (bigio_count >= outercount && large_block == FALSE) {
1190                 if (MPI_SUCCESS !=
1191                     (mpi_code = MPI_Type_create_hindexed((int)outercount, blocklen, disp, *elmt_type,
1192                                                          &spans->op_info[op_info_i].u.down_type)))
1193                     HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
1194             }      /* end if */
1195             else { /* LARGE_DATATYPE:: Something doesn't fit into a 32 bit integer */
1196                 for (u = 0; u < outercount; u++) {
1197                     MPI_Datatype temp_type = MPI_DATATYPE_NULL;
1198 
1199                     /* create the block type from elmt_type while checking the 32 bit int limit */
1200                     if ((hsize_t)(blocklen[u]) > bigio_count) {
1201                         if (H5_mpio_create_large_type((hsize_t)blocklen[u], 0, *elmt_type, &temp_type) < 0)
1202                             HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
1203                                         "couldn't create a large element datatype in span_hyper selection")
1204                     } /* end if */
1205                     else if (MPI_SUCCESS !=
1206                              (mpi_code = MPI_Type_contiguous((int)blocklen[u], *elmt_type, &temp_type)))
1207                         HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
1208 
1209                     /* Combine the current datatype that is created with this current block type */
1210                     if (0 == u) /* first iteration, there is no combined datatype yet */
1211                         spans->op_info[op_info_i].u.down_type = temp_type;
1212                     else {
1213                         int          bl[2] = {1, 1};
1214                         MPI_Aint     ds[2] = {disp[u - 1], disp[u]};
1215                         MPI_Datatype dt[2] = {spans->op_info[op_info_i].u.down_type, temp_type};
1216 
1217                         if (MPI_SUCCESS != (mpi_code = MPI_Type_create_struct(
1218                                                 2,                                        /* count */
1219                                                 bl,                                       /* blocklength */
1220                                                 ds,                                       /* stride in bytes*/
1221                                                 dt,                                       /* old type */
1222                                                 &spans->op_info[op_info_i].u.down_type))) /* new type */
1223                             HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
1224 
1225                         /* Release previous temporary datatype */
1226                         if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&temp_type)))
1227                             HMPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
1228                     } /* end else */
1229                 }     /* end for */
1230             }         /* end else (LARGE_DATATYPE::) */
1231         }             /* end if */
1232         else {
1233             MPI_Aint stride; /* Distance between inner MPI datatypes */
1234 
1235             if (NULL == (inner_type = (MPI_Datatype *)H5MM_malloc(alloc_count * sizeof(MPI_Datatype))))
1236                 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of inner MPI datatypes")
1237 
1238             /* Calculate the total bytes of the lower dimension */
1239             stride = (MPI_Aint)(*down) * (MPI_Aint)elmt_size;
1240 
1241             /* Loop over span nodes */
1242             outercount = 0;
1243             while (span) {
1244                 MPI_Datatype down_type; /* Temporary MPI datatype for a span tree node's children */
1245                 hsize_t      nelmts;    /* # of elements covered by current span */
1246 
1247                 /* Check if we need to increase the size of the buffers */
1248                 if (outercount >= alloc_count) {
1249                     MPI_Aint *    tmp_disp;       /* Temporary pointer to new displacement buffer */
1250                     int *         tmp_blocklen;   /* Temporary pointer to new block length buffer */
1251                     MPI_Datatype *tmp_inner_type; /* Temporary pointer to inner MPI datatype buffer */
1252 
1253                     /* Double the allocation count */
1254                     alloc_count *= 2;
1255 
1256                     /* Re-allocate the buffers */
1257                     if (NULL == (tmp_disp = (MPI_Aint *)H5MM_realloc(disp, alloc_count * sizeof(MPI_Aint))))
1258                         HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
1259                                     "can't allocate array of displacements")
1260                     disp = tmp_disp;
1261                     if (NULL == (tmp_blocklen = (int *)H5MM_realloc(blocklen, alloc_count * sizeof(int))))
1262                         HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
1263                                     "can't allocate array of block lengths")
1264                     blocklen = tmp_blocklen;
1265                     if (NULL == (tmp_inner_type = (MPI_Datatype *)H5MM_realloc(
1266                                      inner_type, alloc_count * sizeof(MPI_Datatype))))
1267                         HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
1268                                     "can't allocate array of inner MPI datatypes")
1269                     inner_type = tmp_inner_type;
1270                 } /* end if */
1271 
1272                 /* Displacement should be in byte and should have dimension information */
1273                 /* First using MPI Type vector to build derived data type for this span only */
1274                 /* Need to calculate the disp in byte for this dimension. */
1275                 disp[outercount]     = (MPI_Aint)span->low * stride;
1276                 blocklen[outercount] = 1;
1277 
1278                 /* Generate MPI datatype for next dimension down */
1279                 if (H5S__obtain_datatype(span->down, down + 1, elmt_size, elmt_type, &down_type, type_list,
1280                                          op_info_i, op_gen) < 0)
1281                     HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't obtain MPI derived data type")
1282 
1283                 /* Compute the number of elements to attempt in this span */
1284                 nelmts = (span->high - span->low) + 1;
1285 
1286                 /* Build the MPI datatype for this node */
1287                 H5_CHECK_OVERFLOW(nelmts, hsize_t, int)
1288                 if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector((int)nelmts, 1, stride, down_type,
1289                                                                        &inner_type[outercount])))
1290                     HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
1291 
1292                 span = span->next;
1293                 outercount++;
1294             } /* end while */
1295 
1296             /* Building the whole vector datatype */
1297             H5_CHECK_OVERFLOW(outercount, size_t, int)
1298             if (MPI_SUCCESS != (mpi_code = MPI_Type_create_struct((int)outercount, blocklen, disp, inner_type,
1299                                                                   &spans->op_info[op_info_i].u.down_type)))
1300                 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
1301 
1302             /* Release inner node types */
1303             for (u = 0; u < outercount; u++)
1304                 if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&inner_type[u])))
1305                     HMPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
1306             inner_types_freed = TRUE;
1307         } /* end else */
1308 
1309         /* Allocate space for the MPI data type list node */
1310         if (NULL == (type_node = H5FL_MALLOC(H5S_mpio_mpitype_node_t)))
1311             HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate MPI data type list node")
1312 
1313         /* Set up MPI type node */
1314         type_node->type = spans->op_info[op_info_i].u.down_type;
1315         type_node->next = NULL;
1316 
1317         /* Add MPI type node to list */
1318         if (type_list->head == NULL)
1319             type_list->head = type_list->tail = type_node;
1320         else {
1321             type_list->tail->next = type_node;
1322             type_list->tail       = type_node;
1323         } /* end else */
1324 
1325         /* Remember that we've visited this span tree */
1326         spans->op_info[op_info_i].op_gen = op_gen;
1327     } /* end else */
1328 
1329     /* Return MPI data type for span tree */
1330     *span_type = spans->op_info[op_info_i].u.down_type;
1331 
1332 done:
1333     /* General cleanup */
1334     if (inner_type != NULL) {
1335         if (!inner_types_freed)
1336             for (u = 0; u < outercount; u++)
1337                 if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&inner_type[u])))
1338                     HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
1339         H5MM_free(inner_type);
1340     } /* end if */
1341     if (blocklen != NULL)
1342         H5MM_free(blocklen);
1343     if (disp != NULL)
1344         H5MM_free(disp);
1345 
1346     FUNC_LEAVE_NOAPI(ret_value)
1347 } /* end H5S__obtain_datatype() */
1348 
1349 /*-------------------------------------------------------------------------
1350  * Function:	H5S_mpio_space_type
1351  *
1352  * Purpose:	Translate an HDF5 dataspace selection into an MPI type.
1353  *		Currently handle only hyperslab and "all" selections.
1354  *
1355  * Return:	Non-negative on success, negative on failure.
1356  *
1357  * Outputs:	*new_type	  the MPI type corresponding to the selection
1358  *		*count		  how many objects of the new_type in selection
1359  *				  (useful if this is the buffer type for xfer)
1360  *		*is_derived_type  0 if MPI primitive type, 1 if derived
1361  *
1362  * Programmer:	rky 980813
1363  *
1364  *-------------------------------------------------------------------------
1365  */
1366 herr_t
H5S_mpio_space_type(const H5S_t * space,size_t elmt_size,MPI_Datatype * new_type,int * count,hbool_t * is_derived_type,hbool_t do_permute,hsize_t ** permute_map,hbool_t * is_permuted)1367 H5S_mpio_space_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count,
1368                     hbool_t *is_derived_type, hbool_t do_permute, hsize_t **permute_map, hbool_t *is_permuted)
1369 {
1370     herr_t ret_value = SUCCEED; /* Return value */
1371 
1372     FUNC_ENTER_NOAPI_NOINIT
1373 
1374     /* Check args */
1375     HDassert(space);
1376     HDassert(elmt_size);
1377 
1378     /* Create MPI type based on the kind of selection */
1379     switch (H5S_GET_EXTENT_TYPE(space)) {
1380         case H5S_NULL:
1381         case H5S_SCALAR:
1382         case H5S_SIMPLE:
1383             /* If the file space has been permuted previously due to
1384              * out-of-order point selection, then permute this selection which
1385              * should be a memory selection to match the file space permutation.
1386              */
1387             if (TRUE == *is_permuted) {
1388                 switch (H5S_GET_SELECT_TYPE(space)) {
1389                     case H5S_SEL_NONE:
1390                         if (H5S__mpio_none_type(new_type, count, is_derived_type) < 0)
1391                             HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
1392                                         "couldn't convert 'none' selection to MPI type")
1393                         break;
1394 
1395                     case H5S_SEL_ALL:
1396                     case H5S_SEL_POINTS:
1397                     case H5S_SEL_HYPERSLABS:
1398                         /* Sanity check */
1399                         HDassert(!do_permute);
1400 
1401                         if (H5S__mpio_permute_type(space, elmt_size, permute_map, new_type, count,
1402                                                    is_derived_type) < 0)
1403                             HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
1404                                         "couldn't convert 'all' selection to MPI type")
1405                         break;
1406 
1407                     case H5S_SEL_ERROR:
1408                     case H5S_SEL_N:
1409                     default:
1410                         HDassert("unknown selection type" && 0);
1411                         break;
1412                 } /* end switch */
1413             }     /* end if */
1414             /* the file space is not permuted, so do a regular selection */
1415             else {
1416                 switch (H5S_GET_SELECT_TYPE(space)) {
1417                     case H5S_SEL_NONE:
1418                         if (H5S__mpio_none_type(new_type, count, is_derived_type) < 0)
1419                             HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
1420                                         "couldn't convert 'none' selection to MPI type")
1421                         break;
1422 
1423                     case H5S_SEL_ALL:
1424                         if (H5S__mpio_all_type(space, elmt_size, new_type, count, is_derived_type) < 0)
1425                             HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
1426                                         "couldn't convert 'all' selection to MPI type")
1427                         break;
1428 
1429                     case H5S_SEL_POINTS:
1430                         if (H5S__mpio_point_type(space, elmt_size, new_type, count, is_derived_type,
1431                                                  do_permute, permute_map, is_permuted) < 0)
1432                             HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
1433                                         "couldn't convert 'point' selection to MPI type")
1434                         break;
1435 
1436                     case H5S_SEL_HYPERSLABS:
1437                         if ((H5S_SELECT_IS_REGULAR(space) == TRUE)) {
1438                             if (H5S__mpio_reg_hyper_type(space, elmt_size, new_type, count, is_derived_type) <
1439                                 0)
1440                                 HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
1441                                             "couldn't convert regular 'hyperslab' selection to MPI type")
1442                         } /* end if */
1443                         else if (H5S__mpio_span_hyper_type(space, elmt_size, new_type, count,
1444                                                            is_derived_type) < 0)
1445                             HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
1446                                         "couldn't convert irregular 'hyperslab' selection to MPI type")
1447                         break;
1448 
1449                     case H5S_SEL_ERROR:
1450                     case H5S_SEL_N:
1451                     default:
1452                         HDassert("unknown selection type" && 0);
1453                         break;
1454                 } /* end switch */
1455             }     /* end else */
1456             break;
1457 
1458         case H5S_NO_CLASS:
1459         default:
1460             HDassert("unknown dataspace type" && 0);
1461             break;
1462     } /* end switch */
1463 
1464 done:
1465     FUNC_LEAVE_NOAPI(ret_value)
1466 } /* end H5S_mpio_space_type() */
1467 
1468 #endif /* H5_HAVE_PARALLEL */
1469