1 /*
2 * Copyright (C) by Argonne National Laboratory
3 * See COPYRIGHT in top-level directory
4 */
5
6 #include "adio.h"
7 #include "adio_extern.h"
8
9 #ifdef MPL_USE_DBG_LOGGING
10 #define FLATTEN_DEBUG 1
11 #endif
12
flatlist_node_new(MPI_Datatype datatype,MPI_Count count)13 static ADIOI_Flatlist_node *flatlist_node_new(MPI_Datatype datatype, MPI_Count count)
14 {
15 ADIOI_Flatlist_node *flat;
16 flat = ADIOI_Malloc(sizeof(ADIOI_Flatlist_node));
17
18 flat->type = datatype;
19 flat->blocklens = NULL;
20 flat->indices = NULL;
21 flat->lb_idx = flat->ub_idx = -1;
22 flat->refct = 1;
23 flat->count = count;
24 flat->flag = 0;
25
26 flat->blocklens = (ADIO_Offset *) ADIOI_Calloc(flat->count * 2, sizeof(ADIO_Offset));
27 flat->indices = flat->blocklens + flat->count;
28 return flat;
29 }
30
31 /*
32 * I don't really expect this to ever trigger, but without the below safety
33 * valve, the design relies on the Count function coming out >= whatever
34 * the Flatten function comes up with. There are enough differences between
35 * the two that it's hard to be positive this will always be true. So every
36 * time something's added to flat's arrays, let's make sure they're big enough
37 * and re-alloc if not.
38 */
flatlist_node_grow(ADIOI_Flatlist_node * flat,int idx)39 static void flatlist_node_grow(ADIOI_Flatlist_node * flat, int idx)
40 {
41 if (idx >= flat->count) {
42 ADIO_Offset *new_blocklens;
43 ADIO_Offset *new_indices;
44 int new_count = (flat->count * 1.25 + 4);
45 new_blocklens = (ADIO_Offset *) ADIOI_Calloc(new_count * 2, sizeof(ADIO_Offset));
46 new_indices = new_blocklens + new_count;
47 if (flat->count) {
48 memcpy(new_blocklens, flat->blocklens, flat->count * sizeof(ADIO_Offset));
49 memcpy(new_indices, flat->indices, flat->count * sizeof(ADIO_Offset));
50 ADIOI_Free(flat->blocklens);
51 }
52 flat->blocklens = new_blocklens;
53 flat->indices = new_indices;
54 flat->count = new_count;
55 }
56 }
57
58 void ADIOI_Optimize_flattened(ADIOI_Flatlist_node * flat_type);
59 /* flatten datatype and add it to Flatlist */
ADIOI_Flatten_datatype(MPI_Datatype datatype)60 ADIOI_Flatlist_node *ADIOI_Flatten_datatype(MPI_Datatype datatype)
61 {
62 MPI_Count flat_count, curr_index = 0;
63 int is_contig, flag;
64 ADIOI_Flatlist_node *flat;
65
66 if (ADIOI_Flattened_type_keyval == MPI_KEYVAL_INVALID) {
67 /* ADIOI_End_call will take care of cleanup */
68 MPI_Type_create_keyval(ADIOI_Flattened_type_copy,
69 ADIOI_Flattened_type_delete, &ADIOI_Flattened_type_keyval, NULL);
70 }
71
72 /* check if necessary to flatten. */
73
74 /* has it already been flattened? */
75 MPI_Type_get_attr(datatype, ADIOI_Flattened_type_keyval, &flat, &flag);
76 if (flag) {
77 #ifdef FLATTEN_DEBUG
78 DBG_FPRINTF(stderr, "ADIOI_Flatten_datatype:: found datatype %#X\n", datatype);
79 #endif
80 return flat;
81 }
82
83 /* is it entirely contiguous? */
84 ADIOI_Datatype_iscontig(datatype, &is_contig);
85
86 #ifdef FLATTEN_DEBUG
87 DBG_FPRINTF(stderr, "ADIOI_Flatten_datatype:: is_contig %#X\n", is_contig);
88 #endif
89 /* it would be great if ADIOI_Count_contiguous_blocks and the rest of the
90 * flattening code operated on the built-in named types, but
91 * it recursively processes types, stopping when it hits a named type. So
92 * we will do the little bit of work that named types require right here,
93 * and avoid touching the scary flattening code. */
94
95 if (is_contig)
96 flat_count = 1;
97 else {
98 flat_count = ADIOI_Count_contiguous_blocks(datatype, &curr_index);
99 }
100 /* flatten and add to datatype */
101 flat = flatlist_node_new(datatype, flat_count);
102 if (is_contig) {
103 MPI_Type_size_x(datatype, &(flat->blocklens[0]));
104 flat->indices[0] = 0;
105 } else {
106
107 curr_index = 0;
108 ADIOI_Flatten(datatype, flat, 0, &curr_index);
109 #ifdef FLATTEN_DEBUG
110 DBG_FPRINTF(stderr, "ADIOI_Flatten_datatype:: ADIOI_Flatten\n");
111 #endif
112
113 /*
114 * Setting flat->count to curr_index, since curr_index is the most fundamentally
115 * correct updated value that represents what's in the indices/blocklens arrays.
116 * It would be nice if the counter function and the flatten function were in sync,
117 * but the numerous cases that decrement flat->count in the flatten function show
118 * that syncing them is a hack, and as long as the counter doesn't under-count
119 * it's good enough.
120 */
121 flat->count = curr_index;
122
123 ADIOI_Optimize_flattened(flat);
124 /* debug */
125 #ifdef FLATTEN_DEBUG
126 {
127 int i;
128 for (i = 0; i < flat->count; i++) {
129 DBG_FPRINTF(stderr,
130 "ADIOI_Flatten_datatype:: i %#X, blocklens %#llX, indices %#llX\n", i,
131 flat->blocklens[i], flat->indices[i]);
132 }
133 }
134 #endif
135 }
136 MPI_Type_set_attr(datatype, ADIOI_Flattened_type_keyval, flat);
137 return flat;
138
139 }
140
141 /* ADIOI_Flatten()
142 *
143 * Assumption: input datatype is not a basic!!!!
144 */
ADIOI_Flatten(MPI_Datatype datatype,ADIOI_Flatlist_node * flat,ADIO_Offset st_offset,MPI_Count * curr_index)145 void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node * flat,
146 ADIO_Offset st_offset, MPI_Count * curr_index)
147 {
148 int k, m, n, is_hindexed_block = 0;
149 int lb_updated = 0;
150 int combiner, old_combiner, old_is_contig;
151 int nints, nadds, ntypes, old_nints, old_nadds, old_ntypes;
152 /* By using ADIO_Offset we preserve +/- sign and
153 * avoid >2G integer arithmetic problems */
154 ADIO_Offset top_count;
155 MPI_Count i, j, old_size, prev_index, basic_num, num, nonzeroth;
156 MPI_Aint old_extent; /* Assume extents are non-negative */
157 int *ints;
158 MPI_Aint *adds; /* Make no assumptions about +/- sign on these */
159 MPI_Datatype *types;
160 MPI_Type_get_envelope(datatype, &nints, &nadds, &ntypes, &combiner);
161 ints = (int *) ADIOI_Malloc((nints + 1) * sizeof(int));
162 adds = (MPI_Aint *) ADIOI_Malloc((nadds + 1) * sizeof(MPI_Aint));
163 types = (MPI_Datatype *) ADIOI_Malloc((ntypes + 1) * sizeof(MPI_Datatype));
164 MPI_Type_get_contents(datatype, nints, nadds, ntypes, ints, adds, types);
165
166 #ifdef FLATTEN_DEBUG
167 DBG_FPRINTF(stderr, "ADIOI_Flatten:: st_offset %#llX, curr_index %#llX\n", st_offset,
168 *curr_index);
169 DBG_FPRINTF(stderr, "ADIOI_Flatten:: nints %#X, nadds %#X, ntypes %#X\n", nints, nadds, ntypes);
170 for (i = 0; i < nints; ++i) {
171 DBG_FPRINTF(stderr, "ADIOI_Flatten:: ints[%lld]=%#X\n", (long long) i, ints[i]);
172 }
173 for (i = 0; i < nadds; ++i) {
174 DBG_FPRINTF(stderr, "ADIOI_Flatten:: adds[%lld]=" MPI_AINT_FMT_HEX_SPEC "\n",
175 (long long) i, adds[i]);
176 }
177 for (i = 0; i < ntypes; ++i) {
178 DBG_FPRINTF(stderr, "ADIOI_Flatten:: types[%lld]=%#llX\n", (long long) i,
179 (unsigned long long) (unsigned long) types[i]);
180 }
181 #endif
182 /* Chapter 4, page 83: when processing datatypes, note this item from the
183 * standard:
184 Most datatype constructors have replication count or block length
185 arguments. Allowed values are non-negative integers. If the value is
186 zero, no elements are generated in the type map and there is no effect
187 on datatype bounds or extent. */
188
189 switch (combiner) {
190 #ifdef MPIIMPL_HAVE_MPI_COMBINER_DUP
191 case MPI_COMBINER_DUP:
192 #ifdef FLATTEN_DEBUG
193 DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_DUP\n");
194 #endif
195 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
196 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
197 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
198 ADIOI_Flatten(types[0], flat, st_offset, curr_index);
199 break;
200 #endif
201 #ifdef MPIIMPL_HAVE_MPI_COMBINER_SUBARRAY
202 case MPI_COMBINER_SUBARRAY:
203 {
204 int dims = ints[0];
205 MPI_Datatype stype;
206 #ifdef FLATTEN_DEBUG
207 DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_SUBARRAY\n");
208 #endif
209
210 ADIO_Type_create_subarray(dims, &ints[1], /* sizes */
211 &ints[dims + 1], /* subsizes */
212 &ints[2 * dims + 1], /* starts */
213 ints[3 * dims + 1], /* order */
214 types[0], /* type */
215 &stype);
216 ADIOI_Flatten(stype, flat, st_offset, curr_index);
217 MPI_Type_free(&stype);
218 }
219 break;
220 #endif
221 #ifdef MPIIMPL_HAVE_MPI_COMBINER_DARRAY
222 case MPI_COMBINER_DARRAY:
223 {
224 int dims = ints[2];
225 MPI_Datatype dtype;
226 #ifdef FLATTEN_DEBUG
227 DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_DARRAY\n");
228 #endif
229
230 ADIO_Type_create_darray(ints[0], /* size */
231 ints[1], /* rank */
232 dims, &ints[3], /* gsizes */
233 &ints[dims + 3], /* distribs */
234 &ints[2 * dims + 3], /* dargs */
235 &ints[3 * dims + 3], /* psizes */
236 ints[4 * dims + 3], /* order */
237 types[0], &dtype);
238 #ifdef FLATTEN_DEBUG
239 DBG_FPRINTF(stderr,
240 "ADIOI_Flatten:: MPI_COMBINER_DARRAY <ADIOI_Flatten(dtype, flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX, st_offset %#llX, curr_index %#llX);\n",
241 0, flat->indices[0], 0, flat->blocklens[0], st_offset, *curr_index);
242 #endif
243 ADIOI_Flatten(dtype, flat, st_offset, curr_index);
244 #ifdef FLATTEN_DEBUG
245 DBG_FPRINTF(stderr,
246 "ADIOI_Flatten:: MPI_COMBINER_DARRAY >ADIOI_Flatten(dtype, flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX, st_offset %#llX, curr_index %#llX);\n",
247 0, flat->indices[0], 0, flat->blocklens[0], st_offset, *curr_index);
248 #endif
249 MPI_Type_free(&dtype);
250 }
251 break;
252 #endif
253 case MPI_COMBINER_CONTIGUOUS:
254 #ifdef FLATTEN_DEBUG
255 DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_CONTIGUOUS\n");
256 #endif
257 top_count = ints[0];
258 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
259 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
260
261 prev_index = *curr_index;
262 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
263 ADIOI_Flatten(types[0], flat, st_offset, curr_index);
264
265 if (prev_index == *curr_index) {
266 /* simplest case, made up of basic or contiguous types */
267 j = *curr_index;
268 flatlist_node_grow(flat, j);
269 flat->indices[j] = st_offset;
270 MPI_Type_size_x(types[0], &old_size);
271 flat->blocklens[j] = top_count * old_size;
272 #ifdef FLATTEN_DEBUG
273 DBG_FPRINTF(stderr,
274 "ADIOI_Flatten:: simple flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",
275 j, flat->indices[j], j, flat->blocklens[j]);
276 #endif
277 (*curr_index)++;
278 } else {
279 /* made up of noncontiguous derived types */
280 j = *curr_index;
281 num = *curr_index - prev_index;
282
283 /* The noncontiguous types have to be replicated count times */
284 MPI_Type_extent(types[0], &old_extent);
285 for (m = 1; m < top_count; m++) {
286 for (i = 0; i < num; i++) {
287 flatlist_node_grow(flat, j);
288 flat->indices[j] =
289 flat->indices[j - num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
290 flat->blocklens[j] = flat->blocklens[j - num];
291 #ifdef FLATTEN_DEBUG
292 DBG_FPRINTF(stderr,
293 "ADIOI_Flatten:: derived flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",
294 j, flat->indices[j], j, flat->blocklens[j]);
295 #endif
296 j++;
297 }
298 }
299 *curr_index = j;
300 }
301 break;
302
303 case MPI_COMBINER_VECTOR:
304 #ifdef FLATTEN_DEBUG
305 DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_VECTOR\n");
306 #endif
307 top_count = ints[0];
308 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
309 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
310
311 prev_index = *curr_index;
312 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
313 ADIOI_Flatten(types[0], flat, st_offset, curr_index);
314
315 if (prev_index == *curr_index) {
316 /* simplest case, vector of basic or contiguous types */
317 /* By using ADIO_Offset we preserve +/- sign and
318 * avoid >2G integer arithmetic problems */
319 ADIO_Offset blocklength = ints[1], stride = ints[2];
320 j = *curr_index;
321 flatlist_node_grow(flat, j);
322 flat->indices[j] = st_offset;
323 MPI_Type_size_x(types[0], &old_size);
324 flat->blocklens[j] = blocklength * old_size;
325 for (i = j + 1; i < j + top_count; i++) {
326 flatlist_node_grow(flat, i);
327 flat->indices[i] = flat->indices[i - 1] + stride * old_size;
328 flat->blocklens[i] = flat->blocklens[j];
329 }
330 *curr_index = i;
331 } else {
332 /* vector of noncontiguous derived types */
333 /* By using ADIO_Offset we preserve +/- sign and
334 * avoid >2G integer arithmetic problems */
335 ADIO_Offset blocklength = ints[1], stride = ints[2];
336
337 j = *curr_index;
338 num = *curr_index - prev_index;
339
340 /* The noncontiguous types have to be replicated blocklen times
341 and then strided. Replicate the first one. */
342 MPI_Type_extent(types[0], &old_extent);
343 for (m = 1; m < blocklength; m++) {
344 for (i = 0; i < num; i++) {
345 flatlist_node_grow(flat, j);
346 flat->indices[j] =
347 flat->indices[j - num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
348 flat->blocklens[j] = flat->blocklens[j - num];
349 j++;
350 }
351 }
352 *curr_index = j;
353
354 /* Now repeat with strides. */
355 num = *curr_index - prev_index;
356 for (i = 1; i < top_count; i++) {
357 for (m = 0; m < num; m++) {
358 flatlist_node_grow(flat, j);
359 flat->indices[j] =
360 flat->indices[j - num] + stride * ADIOI_AINT_CAST_TO_OFFSET old_extent;
361 flat->blocklens[j] = flat->blocklens[j - num];
362 j++;
363 }
364 }
365 *curr_index = j;
366 }
367 break;
368
369 case MPI_COMBINER_HVECTOR:
370 case MPI_COMBINER_HVECTOR_INTEGER:
371 #ifdef FLATTEN_DEBUG
372 DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_HVECTOR_INTEGER\n");
373 #endif
374 top_count = ints[0];
375 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
376 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
377
378 prev_index = *curr_index;
379 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
380 ADIOI_Flatten(types[0], flat, st_offset, curr_index);
381
382 if (prev_index == *curr_index) {
383 /* simplest case, vector of basic or contiguous types */
384 /* By using ADIO_Offset we preserve +/- sign and
385 * avoid >2G integer arithmetic problems */
386 ADIO_Offset blocklength = ints[1];
387 j = *curr_index;
388 flatlist_node_grow(flat, j);
389 flat->indices[j] = st_offset;
390 MPI_Type_size_x(types[0], &old_size);
391 flat->blocklens[j] = blocklength * old_size;
392 for (i = j + 1; i < j + top_count; i++) {
393 flatlist_node_grow(flat, i);
394 flat->indices[i] = flat->indices[i - 1] + adds[0];
395 flat->blocklens[i] = flat->blocklens[j];
396 }
397 *curr_index = i;
398 } else {
399 /* vector of noncontiguous derived types */
400 /* By using ADIO_Offset we preserve +/- sign and
401 * avoid >2G integer arithmetic problems */
402 ADIO_Offset blocklength = ints[1];
403
404 j = *curr_index;
405 num = *curr_index - prev_index;
406
407 /* The noncontiguous types have to be replicated blocklen times
408 and then strided. Replicate the first one. */
409 MPI_Type_extent(types[0], &old_extent);
410 for (m = 1; m < blocklength; m++) {
411 for (i = 0; i < num; i++) {
412 flatlist_node_grow(flat, j);
413 flat->indices[j] =
414 flat->indices[j - num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
415 flat->blocklens[j] = flat->blocklens[j - num];
416 j++;
417 }
418 }
419 *curr_index = j;
420
421 /* Now repeat with strides. */
422 num = *curr_index - prev_index;
423 for (i = 1; i < top_count; i++) {
424 for (m = 0; m < num; m++) {
425 flatlist_node_grow(flat, j);
426 flat->indices[j] = flat->indices[j - num] + adds[0];
427 flat->blocklens[j] = flat->blocklens[j - num];
428 j++;
429 }
430 }
431 *curr_index = j;
432 }
433 break;
434
435 case MPI_COMBINER_INDEXED:
436 #ifdef FLATTEN_DEBUG
437 DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_INDEXED\n");
438 #endif
439 top_count = ints[0];
440 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
441 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
442 MPI_Type_extent(types[0], &old_extent);
443
444 prev_index = *curr_index;
445 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) {
446 /* By using ADIO_Offset we preserve +/- sign and
447 * avoid >2G integer arithmetic problems */
448 ADIO_Offset stride = ints[top_count + 1];
449 ADIOI_Flatten(types[0], flat,
450 st_offset + stride * ADIOI_AINT_CAST_TO_OFFSET old_extent,
451 curr_index);
452 }
453
454 if (prev_index == *curr_index) {
455 /* simplest case, indexed type made up of basic or contiguous types */
456 j = *curr_index;
457 for (i = j, nonzeroth = i; i < j + top_count; i++) {
458 /* By using ADIO_Offset we preserve +/- sign and
459 * avoid >2G integer arithmetic problems */
460 ADIO_Offset blocklength = ints[1 + i - j], stride = ints[top_count + 1 + i - j];
461 if (blocklength > 0) {
462 flatlist_node_grow(flat, nonzeroth);
463 flat->indices[nonzeroth] =
464 st_offset + stride * ADIOI_AINT_CAST_TO_OFFSET old_extent;
465 flat->blocklens[nonzeroth] =
466 blocklength * ADIOI_AINT_CAST_TO_OFFSET old_extent;
467 nonzeroth++;
468 }
469 }
470 *curr_index = nonzeroth;
471 } else {
472 /* indexed type made up of noncontiguous derived types */
473
474 j = *curr_index;
475 num = *curr_index - prev_index;
476 basic_num = num;
477
478 /* The noncontiguous types have to be replicated blocklens[i] times
479 and then strided. Replicate the first one. */
480 for (m = 1; m < ints[1]; m++) {
481 for (i = 0, nonzeroth = j; i < num; i++) {
482 if (flat->blocklens[j - num] > 0) {
483 flatlist_node_grow(flat, nonzeroth);
484 flat->indices[nonzeroth] =
485 flat->indices[nonzeroth - num] +
486 ADIOI_AINT_CAST_TO_OFFSET old_extent;
487 flat->blocklens[nonzeroth] = flat->blocklens[nonzeroth - num];
488 j++;
489 nonzeroth++;
490 }
491 }
492 }
493 *curr_index = j;
494
495 /* Now repeat with strides. */
496 for (i = 1; i < top_count; i++) {
497 num = *curr_index - prev_index;
498 prev_index = *curr_index;
499 for (m = 0, nonzeroth = j; m < basic_num; m++) {
500 /* By using ADIO_Offset we preserve +/- sign and
501 * avoid >2G integer arithmetic problems */
502 ADIO_Offset stride = ints[top_count + 1 + i] - ints[top_count + i];
503 if (flat->blocklens[j - num] > 0) {
504 flatlist_node_grow(flat, nonzeroth);
505 flat->indices[nonzeroth] =
506 flat->indices[j - num] +
507 stride * ADIOI_AINT_CAST_TO_OFFSET old_extent;
508 flat->blocklens[nonzeroth] = flat->blocklens[j - num];
509 j++;
510 nonzeroth++;
511 }
512 }
513 *curr_index = j;
514 for (m = 1; m < ints[1 + i]; m++) {
515 for (k = 0, nonzeroth = j; k < basic_num; k++) {
516 if (flat->blocklens[j - basic_num] > 0) {
517 flatlist_node_grow(flat, nonzeroth);
518 flat->indices[nonzeroth] =
519 flat->indices[j - basic_num] +
520 ADIOI_AINT_CAST_TO_OFFSET old_extent;
521 flat->blocklens[nonzeroth] = flat->blocklens[j - basic_num];
522 j++;
523 nonzeroth++;
524 }
525 }
526 }
527 *curr_index = j;
528 }
529 }
530 break;
531
532 #if defined HAVE_DECL_MPI_COMBINER_HINDEXED_BLOCK && HAVE_DECL_MPI_COMBINER_HINDEXED_BLOCK
533 case MPI_COMBINER_HINDEXED_BLOCK:
534 is_hindexed_block = 1;
535 /* deliberate fall-through */
536 MPL_FALLTHROUGH;
537 #endif
538 case MPI_COMBINER_INDEXED_BLOCK:
539 #ifdef FLATTEN_DEBUG
540 DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_INDEXED_BLOCK\n");
541 #endif
542 top_count = ints[0];
543 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
544 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
545 MPI_Type_extent(types[0], &old_extent);
546
547 prev_index = *curr_index;
548 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) {
549 /* By using ADIO_Offset we preserve +/- sign and
550 * avoid >2G integer arithmetic problems */
551 ADIO_Offset stride = ints[1 + 1];
552 if (is_hindexed_block) {
553 ADIOI_Flatten(types[0], flat, st_offset + adds[0], curr_index);
554 } else {
555 ADIOI_Flatten(types[0], flat,
556 st_offset + stride * ADIOI_AINT_CAST_TO_OFFSET old_extent,
557 curr_index);
558 }
559 }
560
561 if (prev_index == *curr_index) {
562 /* simplest case, indexed type made up of basic or contiguous types */
563 j = *curr_index;
564 for (i = j; i < j + top_count; i++) {
565 /* By using ADIO_Offset we preserve +/- sign and
566 * avoid >2G integer arithmetic problems */
567 ADIO_Offset blocklength = ints[1];
568 if (is_hindexed_block) {
569 flatlist_node_grow(flat, i);
570 flat->indices[i] = st_offset + adds[i - j];
571 } else {
572 ADIO_Offset stride = ints[1 + 1 + i - j];
573 flatlist_node_grow(flat, i);
574 flat->indices[i] = st_offset +
575 stride * ADIOI_AINT_CAST_TO_OFFSET old_extent;
576 }
577 flat->blocklens[i] = blocklength * ADIOI_AINT_CAST_TO_OFFSET old_extent;
578 }
579 *curr_index = i;
580 } else {
581 /* vector of noncontiguous derived types */
582
583 j = *curr_index;
584 num = *curr_index - prev_index;
585
586 /* The noncontiguous types have to be replicated blocklens[i] times
587 and then strided. Replicate the first one. */
588 for (m = 1; m < ints[1]; m++) {
589 for (i = 0; i < num; i++) {
590 if (is_hindexed_block) {
591 /* this is the one place the hindexed case uses the
592 * extent of a type */
593 MPI_Type_extent(types[0], &old_extent);
594 }
595 flatlist_node_grow(flat, j);
596 flat->indices[j] = flat->indices[j - num] +
597 ADIOI_AINT_CAST_TO_OFFSET old_extent;
598 flat->blocklens[j] = flat->blocklens[j - num];
599 j++;
600 }
601 }
602 *curr_index = j;
603
604 /* Now repeat with strides. */
605 num = *curr_index - prev_index;
606 for (i = 1; i < top_count; i++) {
607 for (m = 0; m < num; m++) {
608 if (is_hindexed_block) {
609 flatlist_node_grow(flat, j);
610 flat->indices[j] = flat->indices[j - num] + adds[i] - adds[i - 1];
611 } else {
612 /* By using ADIO_Offset we preserve +/- sign and
613 * avoid >2G integer arithmetic problems */
614 ADIO_Offset stride = ints[2 + i] - ints[1 + i];
615 flatlist_node_grow(flat, j);
616 flat->indices[j] = flat->indices[j - num] +
617 stride * ADIOI_AINT_CAST_TO_OFFSET old_extent;
618 }
619 flat->blocklens[j] = flat->blocklens[j - num];
620 j++;
621 }
622 }
623 *curr_index = j;
624 }
625 break;
626
627 case MPI_COMBINER_HINDEXED:
628 case MPI_COMBINER_HINDEXED_INTEGER:
629 #ifdef FLATTEN_DEBUG
630 DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_HINDEXED_INTEGER\n");
631 #endif
632 top_count = ints[0];
633 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
634 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
635
636 prev_index = *curr_index;
637 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) {
638 ADIOI_Flatten(types[0], flat, st_offset + adds[0], curr_index);
639 }
640
641 if (prev_index == *curr_index) {
642 /* simplest case, indexed type made up of basic or contiguous types */
643 j = *curr_index;
644 MPI_Type_size_x(types[0], &old_size);
645 for (i = j, nonzeroth = j; i < j + top_count; i++) {
646 if (ints[1 + i - j] > 0) {
647 /* By using ADIO_Offset we preserve +/- sign and
648 * avoid >2G integer arithmetic problems */
649 ADIO_Offset blocklength = ints[1 + i - j];
650 flatlist_node_grow(flat, nonzeroth);
651 flat->indices[nonzeroth] = st_offset + adds[i - j];
652 flat->blocklens[nonzeroth] = blocklength * old_size;
653 nonzeroth++;
654 }
655 }
656 *curr_index = nonzeroth;
657 } else {
658 /* indexed type made up of noncontiguous derived types */
659
660 j = *curr_index;
661 num = *curr_index - prev_index;
662 basic_num = num;
663
664 /* The noncontiguous types have to be replicated blocklens[i] times
665 and then strided. Replicate the first one. */
666 MPI_Type_extent(types[0], &old_extent);
667 for (m = 1; m < ints[1]; m++) {
668 for (i = 0, nonzeroth = j; i < num; i++) {
669 if (flat->blocklens[j - num] > 0) {
670 flatlist_node_grow(flat, nonzeroth);
671 flat->indices[nonzeroth] =
672 flat->indices[j - num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
673 flat->blocklens[nonzeroth] = flat->blocklens[j - num];
674 j++;
675 nonzeroth++;
676 }
677 }
678 }
679 *curr_index = j;
680
681 /* Now repeat with strides. */
682 for (i = 1; i < top_count; i++) {
683 num = *curr_index - prev_index;
684 prev_index = *curr_index;
685 for (m = 0, nonzeroth = j; m < basic_num; m++) {
686 if (flat->blocklens[j - num] > 0) {
687 flatlist_node_grow(flat, nonzeroth);
688 flat->indices[nonzeroth] =
689 flat->indices[j - num] + adds[i] - adds[i - 1];
690 flat->blocklens[nonzeroth] = flat->blocklens[j - num];
691 j++;
692 nonzeroth++;
693 }
694 }
695 *curr_index = j;
696 for (m = 1; m < ints[1 + i]; m++) {
697 for (k = 0, nonzeroth = j; k < basic_num; k++) {
698 if (flat->blocklens[j - basic_num] > 0) {
699 flatlist_node_grow(flat, nonzeroth);
700 flat->indices[nonzeroth] =
701 flat->indices[j - basic_num] +
702 ADIOI_AINT_CAST_TO_OFFSET old_extent;
703 flat->blocklens[nonzeroth] = flat->blocklens[j - basic_num];
704 j++;
705 nonzeroth++;
706 }
707 }
708 }
709 *curr_index = j;
710 }
711 }
712 break;
713
714 case MPI_COMBINER_STRUCT:
715 case MPI_COMBINER_STRUCT_INTEGER:
716 #ifdef FLATTEN_DEBUG
717 DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_STRUCT_INTEGER\n");
718 #endif
719 top_count = ints[0];
720 for (n = 0; n < top_count; n++) {
721 MPI_Type_get_envelope(types[n], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
722 ADIOI_Datatype_iscontig(types[n], &old_is_contig);
723
724 prev_index = *curr_index;
725 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
726 ADIOI_Flatten(types[n], flat, st_offset + adds[n], curr_index);
727
728 if (prev_index == *curr_index) {
729 /* simplest case, current type is basic or contiguous types */
730 /* By using ADIO_Offset we preserve +/- sign and
731 * avoid >2G integer arithmetic problems */
732 if (ints[1 + n] > 0 || types[n] == MPI_LB || types[n] == MPI_UB) {
733 ADIO_Offset blocklength = ints[1 + n];
734 j = *curr_index;
735 flatlist_node_grow(flat, j);
736 flat->indices[j] = st_offset + adds[n];
737 MPI_Type_size_x(types[n], &old_size);
738 flat->blocklens[j] = blocklength * old_size;
739 if (types[n] == MPI_LB)
740 flat->lb_idx = j;
741 if (types[n] == MPI_UB)
742 flat->ub_idx = j;
743 #ifdef FLATTEN_DEBUG
744 DBG_FPRINTF(stderr,
745 "ADIOI_Flatten:: simple adds[%#X] " MPI_AINT_FMT_HEX_SPEC
746 ", flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",
747 n, adds[n], j, flat->indices[j], j, flat->blocklens[j]);
748 #endif
749 (*curr_index)++;
750 }
751 } else {
752 /* current type made up of noncontiguous derived types */
753
754 j = *curr_index;
755 num = *curr_index - prev_index;
756
757 /* The current type has to be replicated blocklens[n] times */
758 MPI_Type_extent(types[n], &old_extent);
759 for (m = 1; m < ints[1 + n]; m++) {
760 for (i = 0; i < num; i++) {
761 flatlist_node_grow(flat, j);
762 flat->indices[j] =
763 flat->indices[j - num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
764 flat->blocklens[j] = flat->blocklens[j - num];
765 #ifdef FLATTEN_DEBUG
766 DBG_FPRINTF(stderr,
767 "ADIOI_Flatten:: simple old_extent " MPI_AINT_FMT_HEX_SPEC
768 ", flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",
769 old_extent, j, flat->indices[j], j, flat->blocklens[j]);
770 #endif
771 j++;
772 }
773 }
774 *curr_index = j;
775 }
776 }
777 break;
778
779 case MPI_COMBINER_RESIZED:
780 #ifdef FLATTEN_DEBUG
781 DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_RESIZED\n");
782 #endif
783
784 /* This is done similar to a type_struct with an lb, datatype, ub */
785
786 /* handle the Lb */
787 j = *curr_index;
788 /* when we process resized types, we (recursively) process the lower
789 * bound, the type being resized, then the upper bound. In the
790 * resized-of-resized case, we might find ourselves updating the upper
791 * bound based on the inner type, but the lower bound based on the
792 * upper type. check both lb and ub to prevent mixing updates */
793 if (flat->lb_idx == -1 && flat->ub_idx == -1) {
794 flatlist_node_grow(flat, j);
795 flat->indices[j] = st_offset + adds[0];
796 /* this zero-length blocklens[] element, unlike elsewhere in the
797 * flattening code, is correct and is used to indicate a lower bound
798 * marker */
799 flat->blocklens[j] = 0;
800 flat->lb_idx = *curr_index;
801 lb_updated = 1;
802
803 #ifdef FLATTEN_DEBUG
804 DBG_FPRINTF(stderr,
805 "ADIOI_Flatten:: simple adds[%#X] " MPI_AINT_FMT_HEX_SPEC
806 ", flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n", 0,
807 adds[0], j, flat->indices[j], j, flat->blocklens[j]);
808 #endif
809
810 (*curr_index)++;
811 } else {
812 /* skipped over this chunk because something else higher-up in the
813 * type construction set this for us already */
814 st_offset -= adds[0];
815 }
816
817 /* handle the datatype */
818
819 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
820 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
821
822 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) {
823 ADIOI_Flatten(types[0], flat, st_offset + adds[0], curr_index);
824 } else {
825 /* current type is basic or contiguous */
826 j = *curr_index;
827 flatlist_node_grow(flat, j);
828 flat->indices[j] = st_offset;
829 MPI_Type_size_x(types[0], &old_size);
830 flat->blocklens[j] = old_size;
831
832 #ifdef FLATTEN_DEBUG
833 DBG_FPRINTF(stderr,
834 "ADIOI_Flatten:: simple adds[%#X] " MPI_AINT_FMT_HEX_SPEC
835 ", flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n", 0,
836 adds[0], j, flat->indices[j], j, flat->blocklens[j]);
837 #endif
838
839 (*curr_index)++;
840 }
841
842 /* take care of the extent as a UB */
843 /* see note above about mixing updates for why we check lb and ub */
844 if ((flat->lb_idx == -1 && flat->ub_idx == -1) || lb_updated) {
845 j = *curr_index;
846 flatlist_node_grow(flat, j);
847 flat->indices[j] = st_offset + adds[0] + adds[1];
848 /* again, zero-element ok: an upper-bound marker explicitly set by the
849 * constructor of this resized type */
850 flat->blocklens[j] = 0;
851 flat->ub_idx = *curr_index;
852 } else {
853 /* skipped over this chunk because something else higher-up in the
854 * type construction set this for us already */
855 (*curr_index)--;
856 }
857
858 #ifdef FLATTEN_DEBUG
859 DBG_FPRINTF(stderr,
860 "ADIOI_Flatten:: simple adds[%#X] " MPI_AINT_FMT_HEX_SPEC
861 ", flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n", 1, adds[1],
862 j, flat->indices[j], j, flat->blocklens[j]);
863 #endif
864
865 (*curr_index)++;
866
867 break;
868
869 default:
870 /* TODO: FIXME (requires changing prototypes to return errors...) */
871 DBG_FPRINTF(stderr, "Error: Unsupported datatype passed to ADIOI_Flatten\n");
872 MPI_Abort(MPI_COMM_WORLD, 1);
873 }
874
875 for (i = 0; i < ntypes; i++) {
876 MPI_Type_get_envelope(types[i], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
877 if (old_combiner != MPI_COMBINER_NAMED)
878 MPI_Type_free(types + i);
879 }
880
881 ADIOI_Free(ints);
882 ADIOI_Free(adds);
883 ADIOI_Free(types);
884
885 #ifdef FLATTEN_DEBUG
886 DBG_FPRINTF(stderr, "ADIOI_Flatten:: return st_offset %#llX, curr_index %#llX\n", st_offset,
887 *curr_index);
888 #endif
889
890 }
891
892 /********************************************************/
893
894 /* ADIOI_Count_contiguous_blocks
895 *
896 * Returns number of contiguous blocks in type, and also updates
897 * curr_index to reflect the space for the additional blocks.
898 *
899 * ASSUMES THAT TYPE IS NOT A BASIC!!!
900 */
ADIOI_Count_contiguous_blocks(MPI_Datatype datatype,MPI_Count * curr_index)901 MPI_Count ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, MPI_Count * curr_index)
902 {
903 int i, n;
904 MPI_Count count = 0, prev_index, num, basic_num;
905 int top_count, combiner, old_combiner, old_is_contig;
906 int nints, nadds, ntypes, old_nints, old_nadds, old_ntypes;
907 int *ints;
908 MPI_Aint *adds; /* Make no assumptions about +/- sign on these */
909 MPI_Datatype *types;
910
911 MPI_Type_get_envelope(datatype, &nints, &nadds, &ntypes, &combiner);
912 ints = (int *) ADIOI_Malloc((nints + 1) * sizeof(int));
913 adds = (MPI_Aint *) ADIOI_Malloc((nadds + 1) * sizeof(MPI_Aint));
914 types = (MPI_Datatype *) ADIOI_Malloc((ntypes + 1) * sizeof(MPI_Datatype));
915 MPI_Type_get_contents(datatype, nints, nadds, ntypes, ints, adds, types);
916
917 switch (combiner) {
918 #ifdef MPIIMPL_HAVE_MPI_COMBINER_DUP
919 case MPI_COMBINER_DUP:
920 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
921 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
922 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
923 count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
924 else {
925 count = 1;
926 (*curr_index)++;
927 }
928 break;
929 #endif
930 #ifdef MPIIMPL_HAVE_MPI_COMBINER_SUBARRAY
931 case MPI_COMBINER_SUBARRAY:
932 {
933 int dims = ints[0];
934 MPI_Datatype stype;
935
936 ADIO_Type_create_subarray(dims, &ints[1], /* sizes */
937 &ints[dims + 1], /* subsizes */
938 &ints[2 * dims + 1], /* starts */
939 ints[3 * dims + 1], /* order */
940 types[0], /* type */
941 &stype);
942 count = ADIOI_Count_contiguous_blocks(stype, curr_index);
943 /* curr_index will have already been updated; just pass
944 * count back up.
945 */
946 MPI_Type_free(&stype);
947
948 }
949 break;
950 #endif
951 #ifdef MPIIMPL_HAVE_MPI_COMBINER_DARRAY
952 case MPI_COMBINER_DARRAY:
953 {
954 int dims = ints[2];
955 MPI_Datatype dtype;
956
957 ADIO_Type_create_darray(ints[0], /* size */
958 ints[1], /* rank */
959 dims, &ints[3], /* gsizes */
960 &ints[dims + 3], /* distribs */
961 &ints[2 * dims + 3], /* dargs */
962 &ints[3 * dims + 3], /* psizes */
963 ints[4 * dims + 3], /* order */
964 types[0], &dtype);
965 count = ADIOI_Count_contiguous_blocks(dtype, curr_index);
966 /* curr_index will have already been updated; just pass
967 * count back up.
968 */
969 MPI_Type_free(&dtype);
970 }
971 break;
972 #endif
973 case MPI_COMBINER_CONTIGUOUS:
974 top_count = ints[0];
975 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
976 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
977
978 prev_index = *curr_index;
979 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
980 count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
981 else
982 count = 1;
983
984 if (prev_index == *curr_index)
985 /* simplest case, made up of basic or contiguous types */
986 (*curr_index)++;
987 else {
988 /* made up of noncontiguous derived types */
989 num = *curr_index - prev_index;
990 count *= top_count;
991 *curr_index += (top_count - 1) * num;
992 }
993 break;
994
995 case MPI_COMBINER_VECTOR:
996 case MPI_COMBINER_HVECTOR:
997 case MPI_COMBINER_HVECTOR_INTEGER:
998 top_count = ints[0];
999 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
1000 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
1001
1002 prev_index = *curr_index;
1003 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
1004 count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
1005 else
1006 count = 1;
1007
1008 if (prev_index == *curr_index) {
1009 /* simplest case, vector of basic or contiguous types */
1010 count = top_count;
1011 *curr_index += count;
1012 } else {
1013 /* vector of noncontiguous derived types */
1014 num = *curr_index - prev_index;
1015
1016 /* The noncontiguous types have to be replicated blocklen times
1017 and then strided. */
1018 count *= ints[1] * top_count;
1019
1020 /* First one */
1021 *curr_index += (ints[1] - 1) * num;
1022
1023 /* Now repeat with strides. */
1024 num = *curr_index - prev_index;
1025 *curr_index += (top_count - 1) * num;
1026 }
1027 break;
1028
1029 case MPI_COMBINER_INDEXED:
1030 case MPI_COMBINER_HINDEXED:
1031 case MPI_COMBINER_HINDEXED_INTEGER:
1032 top_count = ints[0];
1033 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
1034 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
1035
1036 prev_index = *curr_index;
1037 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
1038 count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
1039 else
1040 count = 1;
1041
1042 if (prev_index == *curr_index) {
1043 /* simplest case, indexed type made up of basic or contiguous types */
1044 count = top_count;
1045 *curr_index += count;
1046 } else {
1047 /* indexed type made up of noncontiguous derived types */
1048 basic_num = *curr_index - prev_index;
1049
1050 /* The noncontiguous types have to be replicated blocklens[i] times
1051 and then strided. */
1052 *curr_index += (ints[1] - 1) * basic_num;
1053 count *= ints[1];
1054
1055 /* Now repeat with strides. */
1056 for (i = 1; i < top_count; i++) {
1057 count += ints[1 + i] * basic_num;
1058 *curr_index += ints[1 + i] * basic_num;
1059 }
1060 }
1061 break;
1062
1063 #if defined HAVE_DECL_MPI_COMBINER_HINDEXED_BLOCK && HAVE_DECL_MPI_COMBINER_HINDEXED_BLOCK
1064 case MPI_COMBINER_HINDEXED_BLOCK:
1065 #endif
1066 case MPI_COMBINER_INDEXED_BLOCK:
1067 top_count = ints[0];
1068 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
1069 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
1070
1071 prev_index = *curr_index;
1072 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
1073 count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
1074 else
1075 count = 1;
1076
1077 if (prev_index == *curr_index) {
1078 /* simplest case, indexed type made up of basic or contiguous types */
1079 count = top_count;
1080 *curr_index += count;
1081 } else {
1082 /* indexed type made up of noncontiguous derived types */
1083 basic_num = *curr_index - prev_index;
1084
1085 /* The noncontiguous types have to be replicated blocklens[i] times
1086 and then strided. */
1087 *curr_index += (ints[1] - 1) * basic_num;
1088 count *= ints[1];
1089
1090 /* Now repeat with strides. */
1091 *curr_index += (top_count - 1) * count;
1092 count *= top_count;
1093 }
1094 break;
1095
1096 case MPI_COMBINER_STRUCT:
1097 case MPI_COMBINER_STRUCT_INTEGER:
1098 top_count = ints[0];
1099 count = 0;
1100 for (n = 0; n < top_count; n++) {
1101 MPI_Type_get_envelope(types[n], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
1102 ADIOI_Datatype_iscontig(types[n], &old_is_contig);
1103
1104 prev_index = *curr_index;
1105 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
1106 count += ADIOI_Count_contiguous_blocks(types[n], curr_index);
1107
1108 if (prev_index == *curr_index) {
1109 /* simplest case, current type is basic or contiguous types */
1110 count++;
1111 (*curr_index)++;
1112 } else {
1113 /* current type made up of noncontiguous derived types */
1114 /* The current type has to be replicated blocklens[n] times */
1115
1116 num = *curr_index - prev_index;
1117 count += (ints[1 + n] - 1) * num;
1118 (*curr_index) += (ints[1 + n] - 1) * num;
1119 }
1120 }
1121 break;
1122
1123 case MPI_COMBINER_RESIZED:
1124 /* treat it as a struct with lb, type, ub */
1125
1126 /* add 2 for lb and ub */
1127 (*curr_index) += 2;
1128 count += 2;
1129
1130 /* add for datatype */
1131 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
1132 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
1133
1134 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) {
1135 count += ADIOI_Count_contiguous_blocks(types[0], curr_index);
1136 } else {
1137 /* basic or contiguous type */
1138 count++;
1139 (*curr_index)++;
1140 }
1141 break;
1142
1143 default:
1144 /* TODO: FIXME */
1145 DBG_FPRINTF(stderr,
1146 "Error: Unsupported datatype passed to ADIOI_Count_contiguous_blocks, combiner = %d\n",
1147 combiner);
1148 MPI_Abort(MPI_COMM_WORLD, 1);
1149 }
1150
1151 for (i = 0; i < ntypes; i++) {
1152 MPI_Type_get_envelope(types[i], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
1153 if (old_combiner != MPI_COMBINER_NAMED)
1154 MPI_Type_free(types + i);
1155 }
1156
1157 ADIOI_Free(ints);
1158 ADIOI_Free(adds);
1159 ADIOI_Free(types);
1160 return count;
1161 }
1162
1163
1164 /****************************************************************/
1165
1166 /* ADIOI_Optimize_flattened()
1167 *
1168 * Scans the blocks of a flattened type and merges adjacent blocks
1169 * together, resulting in a shorter blocklist (and thus fewer
1170 * contiguous operations).
1171 *
1172 * NOTE: a further optimization would be to remove zero length blocks. However,
1173 * the first and last blocks must remain as zero length first or last block
1174 * indicates UB and LB. Furthermore, once the "zero length blocklen" fix
1175 * went in, the flattened representation should no longer have zero-length
1176 * blocks except for UB and LB markers.
1177 */
ADIOI_Optimize_flattened(ADIOI_Flatlist_node * flat_type)1178 void ADIOI_Optimize_flattened(ADIOI_Flatlist_node * flat_type)
1179 {
1180 int i, j, opt_blocks;
1181 ADIO_Offset *opt_blocklens;
1182 ADIO_Offset *opt_indices;
1183
1184 opt_blocks = 1;
1185
1186 for (j = -1, i = 0; i < flat_type->count; i++) {
1187 /* save number of noncontiguous blocks in opt_blocks */
1188 if (i < flat_type->count - 1 &&
1189 (flat_type->indices[i] + flat_type->blocklens[i] != flat_type->indices[i + 1]))
1190 opt_blocks++;
1191
1192 /* Check if any of the displacements is negative */
1193 if (flat_type->blocklens[i] > 0 && flat_type->indices[i] < 0)
1194 flat_type->flag |= ADIOI_TYPE_NEGATIVE;
1195
1196 if (flat_type->blocklens[i] == 0) /* skip zero-length block */
1197 continue;
1198 else if (j == -1) {
1199 j = i; /* set j the first non-zero-length block index */
1200 continue;
1201 }
1202
1203 /* Check if displacements are in a monotonic nondecreasing order */
1204 if (flat_type->indices[j] > flat_type->indices[i])
1205 flat_type->flag |= ADIOI_TYPE_DECREASE;
1206
1207 /* Check for overlapping regions */
1208 if (flat_type->indices[j] + flat_type->blocklens[j] > flat_type->indices[i])
1209 flat_type->flag |= ADIOI_TYPE_OVERLAP;
1210
1211 j = i; /* j is the previous non-zero-length block index */
1212 }
1213
1214 /* if we can't reduce the number of blocks, quit now */
1215 if (opt_blocks == flat_type->count)
1216 return;
1217
1218 opt_blocklens = (ADIO_Offset *) ADIOI_Calloc(opt_blocks * 2, sizeof(ADIO_Offset));
1219 opt_indices = opt_blocklens + opt_blocks;
1220
1221 /* fill in new blocklists */
1222 opt_blocklens[0] = flat_type->blocklens[0];
1223 opt_indices[0] = flat_type->indices[0];
1224 j = 0;
1225 for (i = 0; i < (flat_type->count - 1); i++) {
1226 if ((flat_type->indices[i] + flat_type->blocklens[i] == flat_type->indices[i + 1]))
1227 opt_blocklens[j] += flat_type->blocklens[i + 1];
1228 else {
1229 j++;
1230 opt_indices[j] = flat_type->indices[i + 1];
1231 opt_blocklens[j] = flat_type->blocklens[i + 1];
1232 }
1233 }
1234 flat_type->count = opt_blocks;
1235 ADIOI_Free(flat_type->blocklens);
1236 flat_type->blocklens = opt_blocklens;
1237 flat_type->indices = opt_indices;
1238 return;
1239 }
1240
1241 int ADIOI_Flattened_type_keyval = MPI_KEYVAL_INVALID;
1242
ADIOI_Flattened_type_copy(MPI_Datatype oldtype,int type_keyval,void * extra_state,void * attribute_val_in,void * attribute_val_out,int * flag)1243 int ADIOI_Flattened_type_copy(MPI_Datatype oldtype,
1244 int type_keyval, void *extra_state, void *attribute_val_in,
1245 void *attribute_val_out, int *flag)
1246 {
1247 ADIOI_Flatlist_node *node = (ADIOI_Flatlist_node *) attribute_val_in;
1248 if (node != NULL)
1249 node->refct++;
1250 *(ADIOI_Flatlist_node **) attribute_val_out = node;
1251 *flag = 1; /* attribute copied to new communicator */
1252 return MPI_SUCCESS;
1253 }
1254
ADIOI_Flattened_type_delete(MPI_Datatype datatype,int type_keyval,void * attribute_val,void * extra_state)1255 int ADIOI_Flattened_type_delete(MPI_Datatype datatype,
1256 int type_keyval, void *attribute_val, void *extra_state)
1257 {
1258 ADIOI_Flatlist_node *node = (ADIOI_Flatlist_node *) attribute_val;
1259 ADIOI_Assert(node != NULL);
1260 node->refct--;
1261
1262 if (node->refct <= 0) {
1263 ADIOI_Free(node->blocklens);
1264 ADIOI_Free(node);
1265 }
1266
1267 return MPI_SUCCESS;
1268 }
1269
ADIOI_Flatten_and_find(MPI_Datatype datatype)1270 ADIOI_Flatlist_node *ADIOI_Flatten_and_find(MPI_Datatype datatype)
1271 {
1272 ADIOI_Flatlist_node *node;
1273 int flag = 0;
1274
1275 if (ADIOI_Flattened_type_keyval == MPI_KEYVAL_INVALID) {
1276 /* ADIOI_End_call will take care of cleanup */
1277 MPI_Type_create_keyval(ADIOI_Flattened_type_copy,
1278 ADIOI_Flattened_type_delete, &ADIOI_Flattened_type_keyval, NULL);
1279 }
1280
1281 MPI_Type_get_attr(datatype, ADIOI_Flattened_type_keyval, &node, &flag);
1282 if (flag == 0) {
1283 node = ADIOI_Flatten_datatype(datatype);
1284 }
1285
1286 return node;
1287 }
1288