1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Copyright by The HDF Group. *
3 * Copyright by the Board of Trustees of the University of Illinois. *
4 * All rights reserved. *
5 * *
6 * This file is part of HDF5. The full HDF5 copyright notice, including *
7 * terms governing use, modification, and redistribution, is contained in *
8 * the COPYING file, which can be found at the root of the source code *
9 * distribution tree, or in https://www.hdfgroup.org/licenses. *
10 * If you do not have access to either file, you may request a copy from *
11 * help@hdfgroup.org. *
12 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
13
14 /*
15 * Programmer: rky 980813
16 *
17 * Purpose: Create MPI data types for HDF5 selections.
18 *
19 */
20
21 /****************/
22 /* Module Setup */
23 /****************/
24
25 #include "H5Smodule.h" /* This source code file is part of the H5S module */
26
27 /***********/
28 /* Headers */
29 /***********/
30 #include "H5private.h" /* Generic Functions */
31 #include "H5Dprivate.h" /* Datasets */
32 #include "H5Eprivate.h" /* Error handling */
33 #include "H5FLprivate.h" /* Free Lists */
34 #include "H5MMprivate.h" /* Memory management */
35 #include "H5Spkg.h" /* Dataspaces */
36 #include "H5VMprivate.h" /* Vector and array functions */
37
38 #ifdef H5_HAVE_PARALLEL
39
40 /****************/
41 /* Local Macros */
42 /****************/
43 #define H5S_MPIO_INITIAL_ALLOC_COUNT 256
44
45 /*******************/
46 /* Local Variables */
47 /*******************/
48
49 /******************/
50 /* Local Typedefs */
51 /******************/
52
53 /* Node in linked list of MPI data types created during traversal of irregular hyperslab selection */
54 typedef struct H5S_mpio_mpitype_node_t {
55 MPI_Datatype type; /* MPI Datatype */
56 struct H5S_mpio_mpitype_node_t *next; /* Pointer to next node in list */
57 } H5S_mpio_mpitype_node_t;
58
59 /* List to track MPI data types generated during traversal of irregular hyperslab selection */
60 typedef struct H5S_mpio_mpitype_list_t {
61 H5S_mpio_mpitype_node_t *head; /* Pointer to head of list */
62 H5S_mpio_mpitype_node_t *tail; /* Pointer to tail of list */
63 } H5S_mpio_mpitype_list_t;
64
65 /********************/
66 /* Local Prototypes */
67 /********************/
68 static herr_t H5S__mpio_all_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count,
69 hbool_t *is_derived_type);
70 static herr_t H5S__mpio_none_type(MPI_Datatype *new_type, int *count, hbool_t *is_derived_type);
71 static herr_t H5S__mpio_create_point_datatype(size_t elmt_size, hsize_t num_points, MPI_Aint *disp,
72 MPI_Datatype *new_type);
73 static herr_t H5S__mpio_point_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count,
74 hbool_t *is_derived_type, hbool_t do_permute, hsize_t **permute_map,
75 hbool_t *is_permuted);
76 static herr_t H5S__mpio_permute_type(const H5S_t *space, size_t elmt_size, hsize_t **permute_map,
77 MPI_Datatype *new_type, int *count, hbool_t *is_derived_type);
78 static herr_t H5S__mpio_reg_hyper_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type,
79 int *count, hbool_t *is_derived_type);
80 static herr_t H5S__mpio_span_hyper_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type,
81 int *count, hbool_t *is_derived_type);
82 static herr_t H5S__release_datatype(H5S_mpio_mpitype_list_t *type_list);
83 static herr_t H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down, size_t elmt_size,
84 const MPI_Datatype *elmt_type, MPI_Datatype *span_type,
85 H5S_mpio_mpitype_list_t *type_list, unsigned op_info_i, uint64_t op_gen);
86
87 /*****************************/
88 /* Library Private Variables */
89 /*****************************/
90
91 /*********************/
92 /* Package Variables */
93 /*********************/
94
95 /* Declare a free list to manage the H5S_mpio_mpitype_node_t struct */
96 H5FL_DEFINE_STATIC(H5S_mpio_mpitype_node_t);
97
98 /*-------------------------------------------------------------------------
99 * Function: H5S__mpio_all_type
100 *
101 * Purpose: Translate an HDF5 "all" selection into an MPI type.
102 *
103 * Return: Non-negative on success, negative on failure.
104 *
105 * Outputs: *new_type the MPI type corresponding to the selection
106 * *count how many objects of the new_type in selection
107 * (useful if this is the buffer type for xfer)
108 * *is_derived_type 0 if MPI primitive type, 1 if derived
109 *
110 * Programmer: rky 980813
111 *
112 *-------------------------------------------------------------------------
113 */
114 static herr_t
H5S__mpio_all_type(const H5S_t * space,size_t elmt_size,MPI_Datatype * new_type,int * count,hbool_t * is_derived_type)115 H5S__mpio_all_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count,
116 hbool_t *is_derived_type)
117 {
118 hsize_t total_bytes;
119 hssize_t snelmts; /* Total number of elmts (signed) */
120 hsize_t nelmts; /* Total number of elmts */
121 hsize_t bigio_count; /* Transition point to create derived type */
122 herr_t ret_value = SUCCEED; /* Return value */
123
124 FUNC_ENTER_STATIC
125
126 /* Check args */
127 HDassert(space);
128
129 /* Just treat the entire extent as a block of bytes */
130 if ((snelmts = (hssize_t)H5S_GET_EXTENT_NPOINTS(space)) < 0)
131 HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "src dataspace has invalid selection")
132 H5_CHECKED_ASSIGN(nelmts, hsize_t, snelmts, hssize_t);
133
134 total_bytes = (hsize_t)elmt_size * nelmts;
135 bigio_count = H5_mpi_get_bigio_count();
136
137 /* Verify that the size can be expressed as a 32 bit integer */
138 if (bigio_count >= total_bytes) {
139 /* fill in the return values */
140 *new_type = MPI_BYTE;
141 H5_CHECKED_ASSIGN(*count, int, total_bytes, hsize_t);
142 *is_derived_type = FALSE;
143 }
144 else {
145 /* Create a LARGE derived datatype for this transfer */
146 if (H5_mpio_create_large_type(total_bytes, 0, MPI_BYTE, new_type) < 0)
147 HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
148 "couldn't create a large datatype from the all selection")
149 *count = 1;
150 *is_derived_type = TRUE;
151 }
152
153 done:
154 FUNC_LEAVE_NOAPI(ret_value)
155 } /* H5S__mpio_all_type() */
156
157 /*-------------------------------------------------------------------------
158 * Function: H5S__mpio_none_type
159 *
160 * Purpose: Translate an HDF5 "none" selection into an MPI type.
161 *
162 * Return: Non-negative on success, negative on failure.
163 *
164 * Outputs: *new_type the MPI type corresponding to the selection
165 * *count how many objects of the new_type in selection
166 * (useful if this is the buffer type for xfer)
167 * *is_derived_type 0 if MPI primitive type, 1 if derived
168 *
169 * Programmer: Quincey Koziol, October 29, 2002
170 *
171 *-------------------------------------------------------------------------
172 */
173 static herr_t
H5S__mpio_none_type(MPI_Datatype * new_type,int * count,hbool_t * is_derived_type)174 H5S__mpio_none_type(MPI_Datatype *new_type, int *count, hbool_t *is_derived_type)
175 {
176 FUNC_ENTER_STATIC_NOERR
177
178 /* fill in the return values */
179 *new_type = MPI_BYTE;
180 *count = 0;
181 *is_derived_type = FALSE;
182
183 FUNC_LEAVE_NOAPI(SUCCEED)
184 } /* H5S__mpio_none_type() */
185
186 /*-------------------------------------------------------------------------
187 * Function: H5S__mpio_create_point_datatype
188 *
189 * Purpose: Create a derived datatype for point selections.
190 *
191 * Return: Non-negative on success, negative on failure.
192 *
193 * Outputs: *new_type the MPI type corresponding to the selection
194 *
195 * Programmer: Mohamad Chaarawi
196 *
197 *-------------------------------------------------------------------------
198 */
199 static herr_t
H5S__mpio_create_point_datatype(size_t elmt_size,hsize_t num_points,MPI_Aint * disp,MPI_Datatype * new_type)200 H5S__mpio_create_point_datatype(size_t elmt_size, hsize_t num_points, MPI_Aint *disp, MPI_Datatype *new_type)
201 {
202 MPI_Datatype elmt_type; /* MPI datatype for individual element */
203 hbool_t elmt_type_created = FALSE; /* Whether the element MPI datatype was created */
204 int * inner_blocks = NULL; /* Arrays for MPI datatypes when "large" datatype needed */
205 MPI_Aint * inner_disps = NULL;
206 MPI_Datatype *inner_types = NULL;
207 #if MPI_VERSION < 3
208 int * blocks = NULL; /* Array of block sizes for MPI hindexed create call */
209 hsize_t u; /* Local index variable */
210 #endif
211 hsize_t bigio_count; /* Transition point to create derived type */
212 int mpi_code; /* MPI error code */
213 herr_t ret_value = SUCCEED; /* Return value */
214
215 FUNC_ENTER_STATIC
216
217 /* Create an MPI datatype for an element */
218 if (MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &elmt_type)))
219 HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
220 elmt_type_created = TRUE;
221
222 bigio_count = H5_mpi_get_bigio_count();
223
224 /* Check whether standard or BIGIO processing will be employeed */
225 if (bigio_count >= num_points) {
226 #if MPI_VERSION >= 3
227 /* Create an MPI datatype for the whole point selection */
228 if (MPI_SUCCESS !=
229 (mpi_code = MPI_Type_create_hindexed_block((int)num_points, 1, disp, elmt_type, new_type)))
230 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_indexed_block failed", mpi_code)
231 #else
232 /* Allocate block sizes for MPI datatype call */
233 if (NULL == (blocks = (int *)H5MM_malloc(sizeof(int) * num_points)))
234 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks")
235
236 for (u = 0; u < num_points; u++)
237 blocks[u] = 1;
238
239 /* Create an MPI datatype for the whole point selection */
240 if (MPI_SUCCESS !=
241 (mpi_code = MPI_Type_create_hindexed((int)num_points, blocks, disp, elmt_type, new_type)))
242 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
243 #endif
244
245 /* Commit MPI datatype for later use */
246 if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type)))
247 HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
248 }
249 else {
250 /* use LARGE_DATATYPE::
251 * We'll create an hindexed_block type for every 2G point count and then combine
252 * those and any remaining points into a single large datatype.
253 */
254 int total_types, i;
255 int remaining_points;
256 int num_big_types;
257 hsize_t leftover;
258
259 /* Calculate how many Big MPI datatypes are needed to represent the buffer */
260 num_big_types = (int)(num_points / bigio_count);
261
262 leftover = (hsize_t)num_points - (hsize_t)num_big_types * (hsize_t)bigio_count;
263 H5_CHECKED_ASSIGN(remaining_points, int, leftover, hsize_t);
264
265 total_types = (int)(remaining_points) ? (num_big_types + 1) : num_big_types;
266
267 /* Allocate array if MPI derived types needed */
268 if (NULL == (inner_types = (MPI_Datatype *)H5MM_malloc((sizeof(MPI_Datatype) * (size_t)total_types))))
269 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks")
270
271 if (NULL == (inner_blocks = (int *)H5MM_malloc(sizeof(int) * (size_t)total_types)))
272 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks")
273
274 if (NULL == (inner_disps = (MPI_Aint *)H5MM_malloc(sizeof(MPI_Aint) * (size_t)total_types)))
275 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks")
276
277 #if MPI_VERSION < 3
278 /* Allocate block sizes for MPI datatype call */
279 if (NULL == (blocks = (int *)H5MM_malloc(sizeof(int) * bigio_count)))
280 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks")
281
282 for (u = 0; u < bigio_count; u++)
283 blocks[u] = 1;
284 #endif
285
286 for (i = 0; i < num_big_types; i++) {
287 #if MPI_VERSION >= 3
288 if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed_block((int)bigio_count, 1,
289 &disp[(hsize_t)i * bigio_count],
290 elmt_type, &inner_types[i])))
291 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed_block failed", mpi_code);
292 #else
293 if (MPI_SUCCESS !=
294 (mpi_code = MPI_Type_create_hindexed((int)bigio_count, blocks, &disp[i * bigio_count],
295 elmt_type, &inner_types[i])))
296 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
297 #endif
298 inner_blocks[i] = 1;
299 inner_disps[i] = 0;
300 } /* end for*/
301
302 if (remaining_points) {
303 #if MPI_VERSION >= 3
304 if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed_block(
305 remaining_points, 1, &disp[(hsize_t)num_big_types * bigio_count],
306 elmt_type, &inner_types[num_big_types])))
307 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed_block failed", mpi_code);
308 #else
309 if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)remaining_points, blocks,
310 &disp[num_big_types * bigio_count],
311 elmt_type, &inner_types[num_big_types])))
312 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
313 #endif
314 inner_blocks[num_big_types] = 1;
315 inner_disps[num_big_types] = 0;
316 }
317
318 if (MPI_SUCCESS != (mpi_code = MPI_Type_create_struct(total_types, inner_blocks, inner_disps,
319 inner_types, new_type)))
320 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct", mpi_code);
321
322 for (i = 0; i < total_types; i++)
323 MPI_Type_free(&inner_types[i]);
324
325 /* Commit MPI datatype for later use */
326 if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type)))
327 HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
328 } /* end else */
329
330 done:
331 if (elmt_type_created)
332 MPI_Type_free(&elmt_type);
333 #if MPI_VERSION < 3
334 if (blocks)
335 H5MM_free(blocks);
336 #endif
337 if (inner_types)
338 H5MM_free(inner_types);
339 if (inner_blocks)
340 H5MM_free(inner_blocks);
341 if (inner_disps)
342 H5MM_free(inner_disps);
343
344 FUNC_LEAVE_NOAPI(ret_value)
345 } /* H5S__mpio_create_point_datatype() */
346
347 /*-------------------------------------------------------------------------
348 * Function: H5S__mpio_point_type
349 *
350 * Purpose: Translate an HDF5 "point" selection into an MPI type.
351 * Create a permutation array to handle out-of-order point selections.
352 *
353 * Return: Non-negative on success, negative on failure.
354 *
355 * Outputs: *new_type the MPI type corresponding to the selection
356 * *count how many objects of the new_type in selection
357 * (useful if this is the buffer type for xfer)
358 * *is_derived_type 0 if MPI primitive type, 1 if derived
359 * *permute_map the permutation of the displacements to create
360 * the MPI_Datatype
361 * *is_permuted 0 if the displacements are permuted, 1 if not
362 *
363 * Programmer: Mohamad Chaarawi
364 *
365 *-------------------------------------------------------------------------
366 */
367 static herr_t
H5S__mpio_point_type(const H5S_t * space,size_t elmt_size,MPI_Datatype * new_type,int * count,hbool_t * is_derived_type,hbool_t do_permute,hsize_t ** permute,hbool_t * is_permuted)368 H5S__mpio_point_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count,
369 hbool_t *is_derived_type, hbool_t do_permute, hsize_t **permute, hbool_t *is_permuted)
370 {
371 MPI_Aint * disp = NULL; /* Datatype displacement for each point*/
372 H5S_pnt_node_t *curr = NULL; /* Current point being operated on in from the selection */
373 hssize_t snum_points; /* Signed number of elements in selection */
374 hsize_t num_points; /* Sumber of points in the selection */
375 hsize_t u; /* Local index variable */
376 herr_t ret_value = SUCCEED; /* Return value */
377
378 FUNC_ENTER_STATIC
379
380 /* Check args */
381 HDassert(space);
382
383 /* Get the total number of points selected */
384 if ((snum_points = (hssize_t)H5S_GET_SELECT_NPOINTS(space)) < 0)
385 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTCOUNT, FAIL, "can't get number of elements selected")
386 num_points = (hsize_t)snum_points;
387
388 /* Allocate array for element displacements */
389 if (NULL == (disp = (MPI_Aint *)H5MM_malloc(sizeof(MPI_Aint) * num_points)))
390 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of displacements")
391
392 /* Allocate array for element permutation - returned to caller */
393 if (do_permute)
394 if (NULL == (*permute = (hsize_t *)H5MM_malloc(sizeof(hsize_t) * num_points)))
395 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate permutation array")
396
397 /* Iterate through list of elements */
398 curr = space->select.sel_info.pnt_lst->head;
399 for (u = 0; u < num_points; u++) {
400 /* Calculate the displacement of the current point */
401 hsize_t disp_tmp = H5VM_array_offset(space->extent.rank, space->extent.size, curr->pnt);
402 if (disp_tmp > LONG_MAX) /* Maximum value of type long */
403 HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "disp overflow")
404 disp[u] = (MPI_Aint)disp_tmp;
405 disp[u] *= (MPI_Aint)elmt_size;
406
407 /* This is a File Space used to set the file view, so adjust the displacements
408 * to have them monotonically non-decreasing.
409 * Generate the permutation array by indicating at each point being selected,
410 * the position it will shifted in the new displacement. Example:
411 * Suppose 4 points with corresponding are selected
412 * Pt 1: disp=6 ; Pt 2: disp=3 ; Pt 3: disp=0 ; Pt 4: disp=4
413 * The permute map to sort the displacements in order will be:
414 * point 1: map[0] = L, indicating that this point is not moved (1st point selected)
415 * point 2: map[1] = 0, indicating that this point is moved to the first position,
416 * since disp_pt1(6) > disp_pt2(3)
417 * point 3: map[2] = 0, move to position 0, bec it has the lowest disp between
418 * the points selected so far.
419 * point 4: map[3] = 2, move the 2nd position since point 1 has a higher disp,
420 * but points 2 and 3 have lower displacements.
421 */
422 if (do_permute) {
423 if (u > 0 && disp[u] < disp[u - 1]) {
424 hsize_t s = 0, l = u, m = u / 2;
425
426 *is_permuted = TRUE;
427 do {
428 if (disp[u] > disp[m])
429 s = m + 1;
430 else if (disp[u] < disp[m])
431 l = m;
432 else
433 break;
434 m = s + ((l - s) / 2);
435 } while (s < l);
436
437 if (m < u) {
438 MPI_Aint temp;
439
440 temp = disp[u];
441 HDmemmove(disp + m + 1, disp + m, (u - m) * sizeof(MPI_Aint));
442 disp[m] = temp;
443 } /* end if */
444 (*permute)[u] = m;
445 } /* end if */
446 else
447 (*permute)[u] = num_points;
448 } /* end if */
449 /* this is a memory space, and no permutation is necessary to create
450 the derived datatype */
451 else {
452 ; /* do nothing */
453 } /* end else */
454
455 /* get the next point */
456 curr = curr->next;
457 } /* end for */
458
459 /* Create the MPI datatype for the set of element displacements */
460 if (H5S__mpio_create_point_datatype(elmt_size, num_points, disp, new_type) < 0)
461 HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create an MPI Datatype from point selection")
462
463 /* Set values about MPI datatype created */
464 *count = 1;
465 *is_derived_type = TRUE;
466
467 done:
468 if (NULL != disp)
469 H5MM_free(disp);
470
471 /* Release the permutation buffer, if it wasn't used */
472 if (!(*is_permuted) && (*permute)) {
473 H5MM_free(*permute);
474 *permute = NULL;
475 } /* end if */
476
477 FUNC_LEAVE_NOAPI(ret_value)
478 } /* H5S__mpio_point_type() */
479
480 /*-------------------------------------------------------------------------
481 * Function: H5S__mpio_permute_type
482 *
483 * Purpose: Translate an HDF5 "all/hyper/point" selection into an MPI type,
484 * while applying the permutation map. This function is called if
485 * the file space selection is permuted due to out-of-order point
486 * selection and so the memory datatype has to be permuted using the
487 * permutation map created by the file selection.
488 *
489 * Note: This routine is called from H5S_mpio_space_type(), which is
490 * called first for the file dataspace and creates
491 *
492 * Return: Non-negative on success, negative on failure.
493 *
494 * Outputs: *new_type the MPI type corresponding to the selection
495 * *count how many objects of the new_type in selection
496 * (useful if this is the buffer type for xfer)
497 * *is_derived_type 0 if MPI primitive type, 1 if derived
498 *
499 * Programmer: Mohamad Chaarawi
500 *
501 *-------------------------------------------------------------------------
502 */
503 static herr_t
H5S__mpio_permute_type(const H5S_t * space,size_t elmt_size,hsize_t ** permute,MPI_Datatype * new_type,int * count,hbool_t * is_derived_type)504 H5S__mpio_permute_type(const H5S_t *space, size_t elmt_size, hsize_t **permute, MPI_Datatype *new_type,
505 int *count, hbool_t *is_derived_type)
506 {
507 MPI_Aint * disp = NULL; /* Datatype displacement for each point*/
508 H5S_sel_iter_t sel_iter; /* Selection iteration info */
509 hbool_t sel_iter_init = FALSE; /* Selection iteration info has been initialized */
510 hssize_t snum_points; /* Signed number of elements in selection */
511 hsize_t num_points; /* Number of points in the selection */
512 size_t max_elem; /* Maximum number of elements allowed in sequences */
513 hsize_t u; /* Local index variable */
514 herr_t ret_value = SUCCEED; /* Return value */
515
516 FUNC_ENTER_STATIC
517
518 /* Check args */
519 HDassert(space);
520
521 /* Get the total number of points selected */
522 if ((snum_points = (hssize_t)H5S_GET_SELECT_NPOINTS(space)) < 0)
523 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTCOUNT, FAIL, "can't get number of elements selected")
524 num_points = (hsize_t)snum_points;
525
526 /* Allocate array to store point displacements */
527 if (NULL == (disp = (MPI_Aint *)H5MM_malloc(sizeof(MPI_Aint) * num_points)))
528 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of displacements")
529
530 /* Initialize selection iterator */
531 if (H5S_select_iter_init(&sel_iter, space, elmt_size, 0) < 0)
532 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to initialize selection iterator")
533 sel_iter_init = TRUE; /* Selection iteration info has been initialized */
534
535 /* Set the number of elements to iterate over */
536 H5_CHECKED_ASSIGN(max_elem, size_t, num_points, hsize_t);
537
538 /* Loop, while elements left in selection */
539 u = 0;
540 while (max_elem > 0) {
541 hsize_t off[H5D_IO_VECTOR_SIZE]; /* Array to store sequence offsets */
542 size_t len[H5D_IO_VECTOR_SIZE]; /* Array to store sequence lengths */
543 size_t nelem; /* Number of elements used in sequences */
544 size_t nseq; /* Number of sequences generated */
545 size_t curr_seq; /* Current sequence being worked on */
546
547 /* Get the sequences of bytes */
548 if (H5S_SELECT_ITER_GET_SEQ_LIST(&sel_iter, (size_t)H5D_IO_VECTOR_SIZE, max_elem, &nseq, &nelem, off,
549 len) < 0)
550 HGOTO_ERROR(H5E_DATASPACE, H5E_UNSUPPORTED, FAIL, "sequence length generation failed")
551
552 /* Loop, while sequences left to process */
553 for (curr_seq = 0; curr_seq < nseq; curr_seq++) {
554 hsize_t curr_off; /* Current offset within sequence */
555 size_t curr_len; /* Length of bytes left to process in sequence */
556
557 /* Get the current offset */
558 curr_off = off[curr_seq];
559
560 /* Get the number of bytes in sequence */
561 curr_len = len[curr_seq];
562
563 /* Loop, while bytes left in sequence */
564 while (curr_len > 0) {
565 /* Set the displacement of the current point */
566 if (curr_off > LONG_MAX)
567 HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "curr_off overflow")
568 disp[u] = (MPI_Aint)curr_off;
569
570 /* This is a memory displacement, so for each point selected,
571 * apply the map that was generated by the file selection */
572 if ((*permute)[u] != num_points) {
573 MPI_Aint temp = disp[u];
574
575 HDmemmove(disp + (*permute)[u] + 1, disp + (*permute)[u],
576 (u - (*permute)[u]) * sizeof(MPI_Aint));
577 disp[(*permute)[u]] = temp;
578 } /* end if */
579
580 /* Advance to next element */
581 u++;
582
583 /* Increment offset in dataspace */
584 curr_off += elmt_size;
585
586 /* Decrement number of bytes left in sequence */
587 curr_len -= elmt_size;
588 } /* end while */
589 } /* end for */
590
591 /* Decrement number of elements left to process */
592 max_elem -= nelem;
593 } /* end while */
594
595 /* Create the MPI datatype for the set of element displacements */
596 if (H5S__mpio_create_point_datatype(elmt_size, num_points, disp, new_type) < 0)
597 HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create an MPI Datatype from point selection")
598
599 /* Set values about MPI datatype created */
600 *count = 1;
601 *is_derived_type = TRUE;
602
603 done:
604 /* Release selection iterator */
605 if (sel_iter_init)
606 if (H5S_SELECT_ITER_RELEASE(&sel_iter) < 0)
607 HDONE_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "unable to release selection iterator")
608
609 /* Free memory */
610 if (disp)
611 H5MM_free(disp);
612 if (*permute) {
613 H5MM_free(*permute);
614 *permute = NULL;
615 } /* end if */
616
617 FUNC_LEAVE_NOAPI(ret_value)
618 } /* H5S__mpio_permute_type() */
619
620 /*-------------------------------------------------------------------------
621 * Function: H5S__mpio_reg_hyper_type
622 *
623 * Purpose: Translate a regular HDF5 hyperslab selection into an MPI type.
624 *
625 * Return: Non-negative on success, negative on failure.
626 *
627 * Outputs: *new_type the MPI type corresponding to the selection
628 * *count how many objects of the new_type in selection
629 * (useful if this is the buffer type for xfer)
630 * *is_derived_type 0 if MPI primitive type, 1 if derived
631 *
632 * Programmer: rky 980813
633 *
634 *-------------------------------------------------------------------------
635 */
636 static herr_t
H5S__mpio_reg_hyper_type(const H5S_t * space,size_t elmt_size,MPI_Datatype * new_type,int * count,hbool_t * is_derived_type)637 H5S__mpio_reg_hyper_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count,
638 hbool_t *is_derived_type)
639 {
640 H5S_sel_iter_t sel_iter; /* Selection iteration info */
641 hbool_t sel_iter_init = FALSE; /* Selection iteration info has been initialized */
642
643 struct dim { /* less hassle than malloc/free & ilk */
644 hssize_t start;
645 hsize_t strid;
646 hsize_t block;
647 hsize_t xtent;
648 hsize_t count;
649 } d[H5S_MAX_RANK];
650
651 hsize_t bigio_count; /* Transition point to create derived type */
652 hsize_t offset[H5S_MAX_RANK];
653 hsize_t max_xtent[H5S_MAX_RANK];
654 H5S_hyper_dim_t *diminfo; /* [rank] */
655 unsigned rank;
656 MPI_Datatype inner_type, outer_type;
657 MPI_Aint extent_len, start_disp, new_extent;
658 MPI_Aint lb; /* Needed as an argument for MPI_Type_get_extent */
659 unsigned u; /* Local index variable */
660 int i; /* Local index variable */
661 int mpi_code; /* MPI return code */
662 herr_t ret_value = SUCCEED;
663
664 FUNC_ENTER_STATIC
665
666 /* Check args */
667 HDassert(space);
668 HDassert(sizeof(MPI_Aint) >= sizeof(elmt_size));
669
670 bigio_count = H5_mpi_get_bigio_count();
671 /* Initialize selection iterator */
672 if (H5S_select_iter_init(&sel_iter, space, elmt_size, 0) < 0)
673 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to initialize selection iterator")
674 sel_iter_init = TRUE; /* Selection iteration info has been initialized */
675
676 /* Abbreviate args */
677 diminfo = sel_iter.u.hyp.diminfo;
678 HDassert(diminfo);
679
680 /* Make a local copy of the dimension info so we can operate with them */
681
682 /* Check if this is a "flattened" regular hyperslab selection */
683 if (sel_iter.u.hyp.iter_rank != 0 && sel_iter.u.hyp.iter_rank < space->extent.rank) {
684 /* Flattened selection */
685 rank = sel_iter.u.hyp.iter_rank;
686 #ifdef H5S_DEBUG
687 if (H5DEBUG(S))
688 HDfprintf(H5DEBUG(S), "%s: Flattened selection\n", FUNC);
689 #endif
690 for (u = 0; u < rank; ++u) {
691 H5_CHECK_OVERFLOW(diminfo[u].start, hsize_t, hssize_t)
692 d[u].start = (hssize_t)diminfo[u].start + sel_iter.u.hyp.sel_off[u];
693 d[u].strid = diminfo[u].stride;
694 d[u].block = diminfo[u].block;
695 d[u].count = diminfo[u].count;
696 d[u].xtent = sel_iter.u.hyp.size[u];
697
698 #ifdef H5S_DEBUG
699 if (H5DEBUG(S)) {
700 HDfprintf(H5DEBUG(S),
701 "%s: start=%" PRIdHSIZE " stride=%" PRIuHSIZE " count=%" PRIuHSIZE
702 " block=%" PRIuHSIZE " xtent=%" PRIuHSIZE,
703 FUNC, d[u].start, d[u].strid, d[u].count, d[u].block, d[u].xtent);
704 if (u == 0)
705 HDfprintf(H5DEBUG(S), " rank=%u\n", rank);
706 else
707 HDfprintf(H5DEBUG(S), "\n");
708 }
709 #endif
710
711 /* Sanity check */
712 HDassert(d[u].block > 0);
713 HDassert(d[u].count > 0);
714 HDassert(d[u].xtent > 0);
715 } /* end for */
716 } /* end if */
717 else {
718 /* Non-flattened selection */
719 rank = space->extent.rank;
720 #ifdef H5S_DEBUG
721 if (H5DEBUG(S))
722 HDfprintf(H5DEBUG(S), "%s: Non-flattened selection\n", FUNC);
723 #endif
724 for (u = 0; u < rank; ++u) {
725 H5_CHECK_OVERFLOW(diminfo[u].start, hsize_t, hssize_t)
726 d[u].start = (hssize_t)diminfo[u].start + space->select.offset[u];
727 d[u].strid = diminfo[u].stride;
728 d[u].block = diminfo[u].block;
729 d[u].count = diminfo[u].count;
730 d[u].xtent = space->extent.size[u];
731
732 #ifdef H5S_DEBUG
733 if (H5DEBUG(S)) {
734 HDfprintf(H5DEBUG(S),
735 "%s: start=%" PRIdHSIZE " stride=%" PRIuHSIZE " count=%" PRIuHSIZE
736 " block=%" PRIuHSIZE " xtent=%" PRIuHSIZE,
737 FUNC, d[u].start, d[u].strid, d[u].count, d[u].block, d[u].xtent);
738 if (u == 0)
739 HDfprintf(H5DEBUG(S), " rank=%u\n", rank);
740 else
741 HDfprintf(H5DEBUG(S), "\n");
742 }
743 #endif
744
745 /* Sanity check */
746 HDassert(d[u].block > 0);
747 HDassert(d[u].count > 0);
748 HDassert(d[u].xtent > 0);
749 } /* end for */
750 } /* end else */
751
752 /**********************************************************************
753 Compute array "offset[rank]" which gives the offsets for a multi-
754 dimensional array with dimensions "d[i].xtent" (i=0,1,...,rank-1).
755 **********************************************************************/
756 offset[rank - 1] = 1;
757 max_xtent[rank - 1] = d[rank - 1].xtent;
758 #ifdef H5S_DEBUG
759 if (H5DEBUG(S)) {
760 i = ((int)rank) - 1;
761 HDfprintf(H5DEBUG(S), " offset[%2d]=%" PRIuHSIZE "; max_xtent[%2d]=%" PRIuHSIZE "\n", i, offset[i], i,
762 max_xtent[i]);
763 }
764 #endif
765 for (i = ((int)rank) - 2; i >= 0; --i) {
766 offset[i] = offset[i + 1] * d[i + 1].xtent;
767 max_xtent[i] = max_xtent[i + 1] * d[i].xtent;
768 #ifdef H5S_DEBUG
769 if (H5DEBUG(S))
770 HDfprintf(H5DEBUG(S), " offset[%2d]=%" PRIuHSIZE "; max_xtent[%2d]=%" PRIuHSIZE "\n", i,
771 offset[i], i, max_xtent[i]);
772 #endif
773 } /* end for */
774
775 /* Create a type covering the selected hyperslab.
776 * Multidimensional dataspaces are stored in row-major order.
777 * The type is built from the inside out, going from the
778 * fastest-changing (i.e., inner) dimension * to the slowest (outer).
779 */
780
781 /*******************************************************
782 * Construct contig type for inner contig dims:
783 *******************************************************/
784 #ifdef H5S_DEBUG
785 if (H5DEBUG(S)) {
786 HDfprintf(H5DEBUG(S), "%s: Making contig type %zu MPI_BYTEs\n", FUNC, elmt_size);
787 for (i = ((int)rank) - 1; i >= 0; --i)
788 HDfprintf(H5DEBUG(S), "d[%d].xtent=%" PRIuHSIZE "\n", i, d[i].xtent);
789 }
790 #endif
791
792 /* LARGE_DATATYPE::
793 * Check if the number of elements to form the inner type fits into a 32 bit integer.
794 * If yes then just create the innertype with MPI_Type_contiguous.
795 * Otherwise create a compound datatype by iterating as many times as needed
796 * for the innertype to be created.
797 */
798 if (bigio_count >= elmt_size) {
799 /* Use a single MPI datatype that has a 32 bit size */
800 if (MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &inner_type)))
801 HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
802 }
803 else
804 /* Create the compound datatype for this operation (> 2GB) */
805 if (H5_mpio_create_large_type(elmt_size, 0, MPI_BYTE, &inner_type) < 0)
806 HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
807 "couldn't create a large inner datatype in hyper selection")
808
809 /*******************************************************
810 * Construct the type by walking the hyperslab dims
811 * from the inside out:
812 *******************************************************/
813 for (i = ((int)rank) - 1; i >= 0; --i) {
814 #ifdef H5S_DEBUG
815 if (H5DEBUG(S))
816 HDfprintf(H5DEBUG(S),
817 "%s: Dimension i=%d \n"
818 "start=%" PRIdHSIZE " count=%" PRIuHSIZE " block=%" PRIuHSIZE " stride=%" PRIuHSIZE
819 ", xtent=%" PRIuHSIZE " max_xtent=%" PRIuHSIZE "\n",
820 FUNC, i, d[i].start, d[i].count, d[i].block, d[i].strid, d[i].xtent, max_xtent[i]);
821 #endif
822
823 #ifdef H5S_DEBUG
824 if (H5DEBUG(S))
825 HDfprintf(H5DEBUG(S), "%s: i=%d Making vector-type \n", FUNC, i);
826 #endif
827 /****************************************
828 * Build vector type of the selection.
829 ****************************************/
830 if (bigio_count >= d[i].count && bigio_count >= d[i].block && bigio_count >= d[i].strid) {
831 /* All the parameters fit into 32 bit integers so create the vector type normally */
832 mpi_code = MPI_Type_vector((int)(d[i].count), /* count */
833 (int)(d[i].block), /* blocklength */
834 (int)(d[i].strid), /* stride */
835 inner_type, /* old type */
836 &outer_type); /* new type */
837
838 MPI_Type_free(&inner_type);
839 if (mpi_code != MPI_SUCCESS)
840 HMPI_GOTO_ERROR(FAIL, "couldn't create MPI vector type", mpi_code)
841 }
842 else {
843 /* Things get a bit more complicated and require LARGE_DATATYPE processing
844 * There are two MPI datatypes that need to be created:
845 * 1) an internal contiguous block; and
846 * 2) a collection of elements where an element is a contiguous block(1).
847 * Remember that the input arguments to the MPI-IO functions use integer
848 * values to represent element counts. We ARE allowed however, in the
849 * more recent MPI implementations to use constructed datatypes whereby
850 * the total number of bytes in a transfer could be :
851 * (2GB-1)number_of_blocks * the_datatype_extent.
852 */
853
854 MPI_Aint stride_in_bytes, inner_extent;
855 MPI_Datatype block_type;
856
857 /* Create a contiguous datatype inner_type x number of BLOCKS.
858 * Again we need to check that the number of BLOCKS can fit into
859 * a 32 bit integer */
860 if (bigio_count < d[i].block) {
861 if (H5_mpio_create_large_type(d[i].block, 0, inner_type, &block_type) < 0)
862 HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
863 "couldn't create a large block datatype in hyper selection")
864 }
865 else if (MPI_SUCCESS !=
866 (mpi_code = MPI_Type_contiguous((int)d[i].block, inner_type, &block_type)))
867 HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
868
869 /* As of version 4.0, OpenMPI now turns off MPI-1 API calls by default,
870 * so we're using the MPI-2 version even though we don't need the lb
871 * value.
872 */
873 {
874 MPI_Aint unused_lb_arg;
875 MPI_Type_get_extent(inner_type, &unused_lb_arg, &inner_extent);
876 }
877 stride_in_bytes = inner_extent * (MPI_Aint)d[i].strid;
878
879 /* If the element count is larger than what a 32 bit integer can hold,
880 * we call the large type creation function to handle that
881 */
882 if (bigio_count < d[i].count) {
883 if (H5_mpio_create_large_type(d[i].count, stride_in_bytes, block_type, &outer_type) < 0)
884 HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
885 "couldn't create a large outer datatype in hyper selection")
886 }
887 /* otherwise a regular create_hvector will do */
888 else if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector((int)d[i].count, /* count */
889 1, /* blocklength */
890 stride_in_bytes, /* stride in bytes*/
891 block_type, /* old type */
892 &outer_type))) /* new type */
893 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
894
895 MPI_Type_free(&block_type);
896 MPI_Type_free(&inner_type);
897 } /* end else */
898
899 /****************************************
900 * Then build the dimension type as (start, vector type, xtent).
901 ****************************************/
902
903 /* Calculate start and extent values of this dimension */
904 /* Check if value overflow to cast to type MPI_Aint */
905 if (d[i].start > LONG_MAX || offset[i] > LONG_MAX || elmt_size > LONG_MAX)
906 HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "result overflow")
907 start_disp = (MPI_Aint)d[i].start * (MPI_Aint)offset[i] * (MPI_Aint)elmt_size;
908
909 if (max_xtent[i] > LONG_MAX)
910 HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "max_xtent overflow")
911 new_extent = (MPI_Aint)elmt_size * (MPI_Aint)max_xtent[i];
912 if (MPI_SUCCESS != (mpi_code = MPI_Type_get_extent(outer_type, &lb, &extent_len)))
913 HMPI_GOTO_ERROR(FAIL, "MPI_Type_get_extent failed", mpi_code)
914
915 /*************************************************
916 * Restructure this datatype ("outer_type")
917 * so that it still starts at 0, but its extent
918 * is the full extent in this dimension.
919 *************************************************/
920 if (start_disp > 0 || extent_len < new_extent) {
921 MPI_Datatype interm_type;
922 int block_len = 1;
923
924 HDassert(0 == lb);
925
926 mpi_code = MPI_Type_create_hindexed(1, &block_len, &start_disp, outer_type, &interm_type);
927 MPI_Type_free(&outer_type);
928 if (mpi_code != MPI_SUCCESS)
929 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
930
931 mpi_code = MPI_Type_create_resized(interm_type, lb, new_extent, &inner_type);
932 MPI_Type_free(&interm_type);
933 if (mpi_code != MPI_SUCCESS)
934 HMPI_GOTO_ERROR(FAIL, "couldn't resize MPI vector type", mpi_code)
935 } /* end if */
936 else
937 inner_type = outer_type;
938 } /* end for */
939 /******************************************
940 * End of loop, walking through dimensions.
941 *******************************************/
942
943 /* At this point inner_type is actually the outermost type, even for 0-trip loop */
944 *new_type = inner_type;
945 if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type)))
946 HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
947
948 /* fill in the remaining return values */
949 *count = 1; /* only have to move one of these suckers! */
950 *is_derived_type = TRUE;
951
952 done:
953 /* Release selection iterator */
954 if (sel_iter_init)
955 if (H5S_SELECT_ITER_RELEASE(&sel_iter) < 0)
956 HDONE_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "unable to release selection iterator")
957
958 #ifdef H5S_DEBUG
959 if (H5DEBUG(S))
960 HDfprintf(H5DEBUG(S), "Leave %s, count=%d is_derived_type=%s\n", FUNC, *count,
961 (*is_derived_type) ? "TRUE" : "FALSE");
962 #endif
963 FUNC_LEAVE_NOAPI(ret_value)
964 } /* end H5S__mpio_reg_hyper_type() */
965
966 /*-------------------------------------------------------------------------
967 * Function: H5S__mpio_span_hyper_type
968 *
969 * Purpose: Translate an HDF5 irregular hyperslab selection into an
970 MPI type.
971 *
972 * Return: Non-negative on success, negative on failure.
973 *
974 * Outputs: *new_type the MPI type corresponding to the selection
975 * *count how many objects of the new_type in selection
976 * (useful if this is the buffer type for xfer)
977 * *is_derived_type 0 if MPI primitive type, 1 if derived
978 *
979 * Programmer: kyang
980 *
981 *-------------------------------------------------------------------------
982 */
983 static herr_t
H5S__mpio_span_hyper_type(const H5S_t * space,size_t elmt_size,MPI_Datatype * new_type,int * count,hbool_t * is_derived_type)984 H5S__mpio_span_hyper_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count,
985 hbool_t *is_derived_type)
986 {
987 H5S_mpio_mpitype_list_t type_list; /* List to track MPI data types created */
988 MPI_Datatype elmt_type; /* MPI datatype for an element */
989 hbool_t elmt_type_is_derived = FALSE; /* Whether the element type has been created */
990 MPI_Datatype span_type; /* MPI datatype for overall span tree */
991 hsize_t bigio_count; /* Transition point to create derived type */
992 hsize_t down[H5S_MAX_RANK]; /* 'down' sizes for each dimension */
993 uint64_t op_gen; /* Operation generation value */
994 int mpi_code; /* MPI return code */
995 herr_t ret_value = SUCCEED; /* Return value */
996
997 FUNC_ENTER_STATIC
998
999 /* Check args */
1000 HDassert(space);
1001 HDassert(space->extent.size);
1002 HDassert(space->select.sel_info.hslab->span_lst);
1003 HDassert(space->select.sel_info.hslab->span_lst->head);
1004
1005 bigio_count = H5_mpi_get_bigio_count();
1006 /* Create the base type for an element */
1007 if (bigio_count >= elmt_size) {
1008 if (MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &elmt_type)))
1009 HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
1010 }
1011 else if (H5_mpio_create_large_type(elmt_size, 0, MPI_BYTE, &elmt_type) < 0)
1012 HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
1013 "couldn't create a large element datatype in span_hyper selection")
1014 elmt_type_is_derived = TRUE;
1015
1016 /* Compute 'down' sizes for each dimension */
1017 H5VM_array_down(space->extent.rank, space->extent.size, down);
1018
1019 /* Acquire an operation generation value for creating MPI datatypes */
1020 op_gen = H5S__hyper_get_op_gen();
1021
1022 /* Obtain derived MPI data type */
1023 /* Always use op_info[0] since we own this op_info, so there can be no
1024 * simultaneous operations */
1025 type_list.head = type_list.tail = NULL;
1026 if (H5S__obtain_datatype(space->select.sel_info.hslab->span_lst, down, elmt_size, &elmt_type, &span_type,
1027 &type_list, 0, op_gen) < 0)
1028 HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't obtain MPI derived data type")
1029 if (MPI_SUCCESS != (mpi_code = MPI_Type_dup(span_type, new_type)))
1030 HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
1031 if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type)))
1032 HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
1033
1034 /* Release MPI data types generated during span tree traversal */
1035 if (H5S__release_datatype(&type_list) < 0)
1036 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "couldn't release MPI derived data type")
1037
1038 /* fill in the remaining return values */
1039 *count = 1;
1040 *is_derived_type = TRUE;
1041
1042 done:
1043 /* Release resources */
1044 if (elmt_type_is_derived)
1045 if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&elmt_type)))
1046 HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
1047
1048 FUNC_LEAVE_NOAPI(ret_value)
1049 } /* end H5S__mpio_span_hyper_type() */
1050
1051 /*-------------------------------------------------------------------------
1052 * Function: H5S__release_datatype
1053 *
1054 * Purpose: Release the MPI derived datatypes for span-tree hyperslab selection
1055 *
1056 * Return: Non-negative on success, negative on failure.
1057 *
1058 * Programmer: Quincey Koziol, February 2, 2019
1059 *
1060 *-------------------------------------------------------------------------
1061 */
1062 static herr_t
H5S__release_datatype(H5S_mpio_mpitype_list_t * type_list)1063 H5S__release_datatype(H5S_mpio_mpitype_list_t *type_list)
1064 {
1065 H5S_mpio_mpitype_node_t *curr; /* Pointer to head of list */
1066 herr_t ret_value = SUCCEED; /* Return value */
1067
1068 FUNC_ENTER_STATIC
1069
1070 /* Sanity check */
1071 HDassert(type_list);
1072
1073 /* Iterate over the list, freeing the MPI data types */
1074 curr = type_list->head;
1075 while (curr) {
1076 H5S_mpio_mpitype_node_t *next; /* Pointer to next node in list */
1077 int mpi_code; /* MPI return status code */
1078
1079 /* Release the MPI data type for this span tree */
1080 if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&curr->type)))
1081 HMPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
1082
1083 /* Get pointer to next node in list */
1084 next = curr->next;
1085
1086 /* Free the current node */
1087 curr = H5FL_FREE(H5S_mpio_mpitype_node_t, curr);
1088
1089 /* Advance to next node */
1090 curr = next;
1091 } /* end while */
1092
1093 done:
1094 FUNC_LEAVE_NOAPI(ret_value)
1095 } /* end H5S__release_datatype() */
1096
1097 /*-------------------------------------------------------------------------
1098 * Function: H5S__obtain_datatype
1099 *
1100 * Purpose: Obtain an MPI derived datatype for span-tree hyperslab selection
1101 *
1102 * Return: Non-negative on success, negative on failure.
1103 *
1104 * Outputs: *span_type the MPI type corresponding to the selection
1105 *
1106 * Programmer: kyang
1107 *
1108 *-------------------------------------------------------------------------
1109 */
1110 static herr_t
H5S__obtain_datatype(H5S_hyper_span_info_t * spans,const hsize_t * down,size_t elmt_size,const MPI_Datatype * elmt_type,MPI_Datatype * span_type,H5S_mpio_mpitype_list_t * type_list,unsigned op_info_i,uint64_t op_gen)1111 H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down, size_t elmt_size,
1112 const MPI_Datatype *elmt_type, MPI_Datatype *span_type,
1113 H5S_mpio_mpitype_list_t *type_list, unsigned op_info_i, uint64_t op_gen)
1114 {
1115 H5S_hyper_span_t *span; /* Hyperslab span to iterate with */
1116 hsize_t bigio_count; /* Transition point to create derived type */
1117 size_t alloc_count = 0; /* Number of span tree nodes allocated at this level */
1118 size_t outercount = 0; /* Number of span tree nodes at this level */
1119 MPI_Datatype * inner_type = NULL;
1120 hbool_t inner_types_freed = FALSE; /* Whether the inner_type MPI datatypes have been freed */
1121 int * blocklen = NULL;
1122 MPI_Aint * disp = NULL;
1123 size_t u; /* Local index variable */
1124 int mpi_code; /* MPI return status code */
1125 herr_t ret_value = SUCCEED; /* Return value */
1126
1127 FUNC_ENTER_STATIC
1128
1129 /* Sanity check */
1130 HDassert(spans);
1131 HDassert(type_list);
1132
1133 bigio_count = H5_mpi_get_bigio_count();
1134 /* Check if we've visited this span tree before */
1135 if (spans->op_info[op_info_i].op_gen != op_gen) {
1136 H5S_mpio_mpitype_node_t *type_node; /* Pointer to new node in MPI data type list */
1137
1138 /* Allocate the initial displacement & block length buffers */
1139 alloc_count = H5S_MPIO_INITIAL_ALLOC_COUNT;
1140 if (NULL == (disp = (MPI_Aint *)H5MM_malloc(alloc_count * sizeof(MPI_Aint))))
1141 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of displacements")
1142 if (NULL == (blocklen = (int *)H5MM_malloc(alloc_count * sizeof(int))))
1143 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of block lengths")
1144
1145 /* If this is the fastest changing dimension, it is the base case for derived datatype. */
1146 span = spans->head;
1147 if (NULL == span->down) {
1148 hbool_t large_block = FALSE; /* Wether the block length is larger than 32 bit integer */
1149
1150 outercount = 0;
1151 while (span) {
1152 hsize_t nelmts; /* # of elements covered by current span */
1153
1154 /* Check if we need to increase the size of the buffers */
1155 if (outercount >= alloc_count) {
1156 MPI_Aint *tmp_disp; /* Temporary pointer to new displacement buffer */
1157 int * tmp_blocklen; /* Temporary pointer to new block length buffer */
1158
1159 /* Double the allocation count */
1160 alloc_count *= 2;
1161
1162 /* Re-allocate the buffers */
1163 if (NULL == (tmp_disp = (MPI_Aint *)H5MM_realloc(disp, alloc_count * sizeof(MPI_Aint))))
1164 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
1165 "can't allocate array of displacements")
1166 disp = tmp_disp;
1167 if (NULL == (tmp_blocklen = (int *)H5MM_realloc(blocklen, alloc_count * sizeof(int))))
1168 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
1169 "can't allocate array of block lengths")
1170 blocklen = tmp_blocklen;
1171 } /* end if */
1172
1173 /* Compute the number of elements to attempt in this span */
1174 nelmts = (span->high - span->low) + 1;
1175
1176 /* Store displacement & block length */
1177 disp[outercount] = (MPI_Aint)elmt_size * (MPI_Aint)span->low;
1178 H5_CHECK_OVERFLOW(nelmts, hsize_t, int)
1179 blocklen[outercount] = (int)nelmts;
1180
1181 if (bigio_count < (hsize_t)blocklen[outercount])
1182 large_block = TRUE; /* at least one block type is large, so set this flag to true */
1183
1184 span = span->next;
1185 outercount++;
1186 } /* end while */
1187
1188 /* Everything fits into integers, so cast them and use hindexed */
1189 if (bigio_count >= outercount && large_block == FALSE) {
1190 if (MPI_SUCCESS !=
1191 (mpi_code = MPI_Type_create_hindexed((int)outercount, blocklen, disp, *elmt_type,
1192 &spans->op_info[op_info_i].u.down_type)))
1193 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
1194 } /* end if */
1195 else { /* LARGE_DATATYPE:: Something doesn't fit into a 32 bit integer */
1196 for (u = 0; u < outercount; u++) {
1197 MPI_Datatype temp_type = MPI_DATATYPE_NULL;
1198
1199 /* create the block type from elmt_type while checking the 32 bit int limit */
1200 if ((hsize_t)(blocklen[u]) > bigio_count) {
1201 if (H5_mpio_create_large_type((hsize_t)blocklen[u], 0, *elmt_type, &temp_type) < 0)
1202 HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
1203 "couldn't create a large element datatype in span_hyper selection")
1204 } /* end if */
1205 else if (MPI_SUCCESS !=
1206 (mpi_code = MPI_Type_contiguous((int)blocklen[u], *elmt_type, &temp_type)))
1207 HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
1208
1209 /* Combine the current datatype that is created with this current block type */
1210 if (0 == u) /* first iteration, there is no combined datatype yet */
1211 spans->op_info[op_info_i].u.down_type = temp_type;
1212 else {
1213 int bl[2] = {1, 1};
1214 MPI_Aint ds[2] = {disp[u - 1], disp[u]};
1215 MPI_Datatype dt[2] = {spans->op_info[op_info_i].u.down_type, temp_type};
1216
1217 if (MPI_SUCCESS != (mpi_code = MPI_Type_create_struct(
1218 2, /* count */
1219 bl, /* blocklength */
1220 ds, /* stride in bytes*/
1221 dt, /* old type */
1222 &spans->op_info[op_info_i].u.down_type))) /* new type */
1223 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
1224
1225 /* Release previous temporary datatype */
1226 if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&temp_type)))
1227 HMPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
1228 } /* end else */
1229 } /* end for */
1230 } /* end else (LARGE_DATATYPE::) */
1231 } /* end if */
1232 else {
1233 MPI_Aint stride; /* Distance between inner MPI datatypes */
1234
1235 if (NULL == (inner_type = (MPI_Datatype *)H5MM_malloc(alloc_count * sizeof(MPI_Datatype))))
1236 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of inner MPI datatypes")
1237
1238 /* Calculate the total bytes of the lower dimension */
1239 stride = (MPI_Aint)(*down) * (MPI_Aint)elmt_size;
1240
1241 /* Loop over span nodes */
1242 outercount = 0;
1243 while (span) {
1244 MPI_Datatype down_type; /* Temporary MPI datatype for a span tree node's children */
1245 hsize_t nelmts; /* # of elements covered by current span */
1246
1247 /* Check if we need to increase the size of the buffers */
1248 if (outercount >= alloc_count) {
1249 MPI_Aint * tmp_disp; /* Temporary pointer to new displacement buffer */
1250 int * tmp_blocklen; /* Temporary pointer to new block length buffer */
1251 MPI_Datatype *tmp_inner_type; /* Temporary pointer to inner MPI datatype buffer */
1252
1253 /* Double the allocation count */
1254 alloc_count *= 2;
1255
1256 /* Re-allocate the buffers */
1257 if (NULL == (tmp_disp = (MPI_Aint *)H5MM_realloc(disp, alloc_count * sizeof(MPI_Aint))))
1258 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
1259 "can't allocate array of displacements")
1260 disp = tmp_disp;
1261 if (NULL == (tmp_blocklen = (int *)H5MM_realloc(blocklen, alloc_count * sizeof(int))))
1262 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
1263 "can't allocate array of block lengths")
1264 blocklen = tmp_blocklen;
1265 if (NULL == (tmp_inner_type = (MPI_Datatype *)H5MM_realloc(
1266 inner_type, alloc_count * sizeof(MPI_Datatype))))
1267 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
1268 "can't allocate array of inner MPI datatypes")
1269 inner_type = tmp_inner_type;
1270 } /* end if */
1271
1272 /* Displacement should be in byte and should have dimension information */
1273 /* First using MPI Type vector to build derived data type for this span only */
1274 /* Need to calculate the disp in byte for this dimension. */
1275 disp[outercount] = (MPI_Aint)span->low * stride;
1276 blocklen[outercount] = 1;
1277
1278 /* Generate MPI datatype for next dimension down */
1279 if (H5S__obtain_datatype(span->down, down + 1, elmt_size, elmt_type, &down_type, type_list,
1280 op_info_i, op_gen) < 0)
1281 HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't obtain MPI derived data type")
1282
1283 /* Compute the number of elements to attempt in this span */
1284 nelmts = (span->high - span->low) + 1;
1285
1286 /* Build the MPI datatype for this node */
1287 H5_CHECK_OVERFLOW(nelmts, hsize_t, int)
1288 if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector((int)nelmts, 1, stride, down_type,
1289 &inner_type[outercount])))
1290 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
1291
1292 span = span->next;
1293 outercount++;
1294 } /* end while */
1295
1296 /* Building the whole vector datatype */
1297 H5_CHECK_OVERFLOW(outercount, size_t, int)
1298 if (MPI_SUCCESS != (mpi_code = MPI_Type_create_struct((int)outercount, blocklen, disp, inner_type,
1299 &spans->op_info[op_info_i].u.down_type)))
1300 HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
1301
1302 /* Release inner node types */
1303 for (u = 0; u < outercount; u++)
1304 if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&inner_type[u])))
1305 HMPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
1306 inner_types_freed = TRUE;
1307 } /* end else */
1308
1309 /* Allocate space for the MPI data type list node */
1310 if (NULL == (type_node = H5FL_MALLOC(H5S_mpio_mpitype_node_t)))
1311 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate MPI data type list node")
1312
1313 /* Set up MPI type node */
1314 type_node->type = spans->op_info[op_info_i].u.down_type;
1315 type_node->next = NULL;
1316
1317 /* Add MPI type node to list */
1318 if (type_list->head == NULL)
1319 type_list->head = type_list->tail = type_node;
1320 else {
1321 type_list->tail->next = type_node;
1322 type_list->tail = type_node;
1323 } /* end else */
1324
1325 /* Remember that we've visited this span tree */
1326 spans->op_info[op_info_i].op_gen = op_gen;
1327 } /* end else */
1328
1329 /* Return MPI data type for span tree */
1330 *span_type = spans->op_info[op_info_i].u.down_type;
1331
1332 done:
1333 /* General cleanup */
1334 if (inner_type != NULL) {
1335 if (!inner_types_freed)
1336 for (u = 0; u < outercount; u++)
1337 if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&inner_type[u])))
1338 HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
1339 H5MM_free(inner_type);
1340 } /* end if */
1341 if (blocklen != NULL)
1342 H5MM_free(blocklen);
1343 if (disp != NULL)
1344 H5MM_free(disp);
1345
1346 FUNC_LEAVE_NOAPI(ret_value)
1347 } /* end H5S__obtain_datatype() */
1348
1349 /*-------------------------------------------------------------------------
1350 * Function: H5S_mpio_space_type
1351 *
1352 * Purpose: Translate an HDF5 dataspace selection into an MPI type.
1353 * Currently handle only hyperslab and "all" selections.
1354 *
1355 * Return: Non-negative on success, negative on failure.
1356 *
1357 * Outputs: *new_type the MPI type corresponding to the selection
1358 * *count how many objects of the new_type in selection
1359 * (useful if this is the buffer type for xfer)
1360 * *is_derived_type 0 if MPI primitive type, 1 if derived
1361 *
1362 * Programmer: rky 980813
1363 *
1364 *-------------------------------------------------------------------------
1365 */
1366 herr_t
H5S_mpio_space_type(const H5S_t * space,size_t elmt_size,MPI_Datatype * new_type,int * count,hbool_t * is_derived_type,hbool_t do_permute,hsize_t ** permute_map,hbool_t * is_permuted)1367 H5S_mpio_space_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count,
1368 hbool_t *is_derived_type, hbool_t do_permute, hsize_t **permute_map, hbool_t *is_permuted)
1369 {
1370 herr_t ret_value = SUCCEED; /* Return value */
1371
1372 FUNC_ENTER_NOAPI_NOINIT
1373
1374 /* Check args */
1375 HDassert(space);
1376 HDassert(elmt_size);
1377
1378 /* Create MPI type based on the kind of selection */
1379 switch (H5S_GET_EXTENT_TYPE(space)) {
1380 case H5S_NULL:
1381 case H5S_SCALAR:
1382 case H5S_SIMPLE:
1383 /* If the file space has been permuted previously due to
1384 * out-of-order point selection, then permute this selection which
1385 * should be a memory selection to match the file space permutation.
1386 */
1387 if (TRUE == *is_permuted) {
1388 switch (H5S_GET_SELECT_TYPE(space)) {
1389 case H5S_SEL_NONE:
1390 if (H5S__mpio_none_type(new_type, count, is_derived_type) < 0)
1391 HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
1392 "couldn't convert 'none' selection to MPI type")
1393 break;
1394
1395 case H5S_SEL_ALL:
1396 case H5S_SEL_POINTS:
1397 case H5S_SEL_HYPERSLABS:
1398 /* Sanity check */
1399 HDassert(!do_permute);
1400
1401 if (H5S__mpio_permute_type(space, elmt_size, permute_map, new_type, count,
1402 is_derived_type) < 0)
1403 HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
1404 "couldn't convert 'all' selection to MPI type")
1405 break;
1406
1407 case H5S_SEL_ERROR:
1408 case H5S_SEL_N:
1409 default:
1410 HDassert("unknown selection type" && 0);
1411 break;
1412 } /* end switch */
1413 } /* end if */
1414 /* the file space is not permuted, so do a regular selection */
1415 else {
1416 switch (H5S_GET_SELECT_TYPE(space)) {
1417 case H5S_SEL_NONE:
1418 if (H5S__mpio_none_type(new_type, count, is_derived_type) < 0)
1419 HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
1420 "couldn't convert 'none' selection to MPI type")
1421 break;
1422
1423 case H5S_SEL_ALL:
1424 if (H5S__mpio_all_type(space, elmt_size, new_type, count, is_derived_type) < 0)
1425 HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
1426 "couldn't convert 'all' selection to MPI type")
1427 break;
1428
1429 case H5S_SEL_POINTS:
1430 if (H5S__mpio_point_type(space, elmt_size, new_type, count, is_derived_type,
1431 do_permute, permute_map, is_permuted) < 0)
1432 HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
1433 "couldn't convert 'point' selection to MPI type")
1434 break;
1435
1436 case H5S_SEL_HYPERSLABS:
1437 if ((H5S_SELECT_IS_REGULAR(space) == TRUE)) {
1438 if (H5S__mpio_reg_hyper_type(space, elmt_size, new_type, count, is_derived_type) <
1439 0)
1440 HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
1441 "couldn't convert regular 'hyperslab' selection to MPI type")
1442 } /* end if */
1443 else if (H5S__mpio_span_hyper_type(space, elmt_size, new_type, count,
1444 is_derived_type) < 0)
1445 HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
1446 "couldn't convert irregular 'hyperslab' selection to MPI type")
1447 break;
1448
1449 case H5S_SEL_ERROR:
1450 case H5S_SEL_N:
1451 default:
1452 HDassert("unknown selection type" && 0);
1453 break;
1454 } /* end switch */
1455 } /* end else */
1456 break;
1457
1458 case H5S_NO_CLASS:
1459 default:
1460 HDassert("unknown dataspace type" && 0);
1461 break;
1462 } /* end switch */
1463
1464 done:
1465 FUNC_LEAVE_NOAPI(ret_value)
1466 } /* end H5S_mpio_space_type() */
1467
1468 #endif /* H5_HAVE_PARALLEL */
1469