1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  */
5 
6 #include "adio.h"
7 #include "adio_extern.h"
8 
9 #ifdef MPL_USE_DBG_LOGGING
10 #define FLATTEN_DEBUG 1
11 #endif
12 
flatlist_node_new(MPI_Datatype datatype,MPI_Count count)13 static ADIOI_Flatlist_node *flatlist_node_new(MPI_Datatype datatype, MPI_Count count)
14 {
15     ADIOI_Flatlist_node *flat;
16     flat = ADIOI_Malloc(sizeof(ADIOI_Flatlist_node));
17 
18     flat->type = datatype;
19     flat->blocklens = NULL;
20     flat->indices = NULL;
21     flat->lb_idx = flat->ub_idx = -1;
22     flat->refct = 1;
23     flat->count = count;
24     flat->flag = 0;
25 
26     flat->blocklens = (ADIO_Offset *) ADIOI_Calloc(flat->count * 2, sizeof(ADIO_Offset));
27     flat->indices = flat->blocklens + flat->count;
28     return flat;
29 }
30 
31 /*
32  * I don't really expect this to ever trigger, but without the below safety
33  * valve, the design relies on the Count function coming out >= whatever
34  * the Flatten function comes up with.  There are enough differences between
35  * the two that it's hard to be positive this will always be true.  So every
36  * time something's added to flat's arrays, let's make sure they're big enough
37  * and re-alloc if not.
38  */
flatlist_node_grow(ADIOI_Flatlist_node * flat,int idx)39 static void flatlist_node_grow(ADIOI_Flatlist_node * flat, int idx)
40 {
41     if (idx >= flat->count) {
42         ADIO_Offset *new_blocklens;
43         ADIO_Offset *new_indices;
44         int new_count = (flat->count * 1.25 + 4);
45         new_blocklens = (ADIO_Offset *) ADIOI_Calloc(new_count * 2, sizeof(ADIO_Offset));
46         new_indices = new_blocklens + new_count;
47         if (flat->count) {
48             memcpy(new_blocklens, flat->blocklens, flat->count * sizeof(ADIO_Offset));
49             memcpy(new_indices, flat->indices, flat->count * sizeof(ADIO_Offset));
50             ADIOI_Free(flat->blocklens);
51         }
52         flat->blocklens = new_blocklens;
53         flat->indices = new_indices;
54         flat->count = new_count;
55     }
56 }
57 
58 void ADIOI_Optimize_flattened(ADIOI_Flatlist_node * flat_type);
59 /* flatten datatype and add it to Flatlist */
ADIOI_Flatten_datatype(MPI_Datatype datatype)60 ADIOI_Flatlist_node *ADIOI_Flatten_datatype(MPI_Datatype datatype)
61 {
62     MPI_Count flat_count, curr_index = 0;
63     int is_contig, flag;
64     ADIOI_Flatlist_node *flat;
65 
66     if (ADIOI_Flattened_type_keyval == MPI_KEYVAL_INVALID) {
67         /* ADIOI_End_call will take care of cleanup */
68         MPI_Type_create_keyval(ADIOI_Flattened_type_copy,
69                                ADIOI_Flattened_type_delete, &ADIOI_Flattened_type_keyval, NULL);
70     }
71 
72     /* check if necessary to flatten. */
73 
74     /* has it already been flattened? */
75     MPI_Type_get_attr(datatype, ADIOI_Flattened_type_keyval, &flat, &flag);
76     if (flag) {
77 #ifdef FLATTEN_DEBUG
78         DBG_FPRINTF(stderr, "ADIOI_Flatten_datatype:: found datatype %#X\n", datatype);
79 #endif
80         return flat;
81     }
82 
83     /* is it entirely contiguous? */
84     ADIOI_Datatype_iscontig(datatype, &is_contig);
85 
86 #ifdef FLATTEN_DEBUG
87     DBG_FPRINTF(stderr, "ADIOI_Flatten_datatype:: is_contig %#X\n", is_contig);
88 #endif
89     /* it would be great if ADIOI_Count_contiguous_blocks and the rest of the
90      * flattening code operated on the built-in named types, but
91      * it recursively processes types, stopping when it hits a named type. So
92      * we will do the little bit of work that named types require right here,
93      * and avoid touching the scary flattening code. */
94 
95     if (is_contig)
96         flat_count = 1;
97     else {
98         flat_count = ADIOI_Count_contiguous_blocks(datatype, &curr_index);
99     }
100     /* flatten and add to datatype */
101     flat = flatlist_node_new(datatype, flat_count);
102     if (is_contig) {
103         MPI_Type_size_x(datatype, &(flat->blocklens[0]));
104         flat->indices[0] = 0;
105     } else {
106 
107         curr_index = 0;
108         ADIOI_Flatten(datatype, flat, 0, &curr_index);
109 #ifdef FLATTEN_DEBUG
110         DBG_FPRINTF(stderr, "ADIOI_Flatten_datatype:: ADIOI_Flatten\n");
111 #endif
112 
113 /*
114  * Setting flat->count to curr_index, since curr_index is the most fundamentally
115  * correct updated value that represents what's in the indices/blocklens arrays.
116  * It would be nice if the counter function and the flatten function were in sync,
117  * but the numerous cases that decrement flat->count in the flatten function show
118  * that syncing them is a hack, and as long as the counter doesn't under-count
119  * it's good enough.
120  */
121         flat->count = curr_index;
122 
123         ADIOI_Optimize_flattened(flat);
124 /* debug */
125 #ifdef FLATTEN_DEBUG
126         {
127             int i;
128             for (i = 0; i < flat->count; i++) {
129                 DBG_FPRINTF(stderr,
130                             "ADIOI_Flatten_datatype:: i %#X, blocklens %#llX, indices %#llX\n", i,
131                             flat->blocklens[i], flat->indices[i]);
132             }
133         }
134 #endif
135     }
136     MPI_Type_set_attr(datatype, ADIOI_Flattened_type_keyval, flat);
137     return flat;
138 
139 }
140 
141 /* ADIOI_Flatten()
142  *
143  * Assumption: input datatype is not a basic!!!!
144  */
ADIOI_Flatten(MPI_Datatype datatype,ADIOI_Flatlist_node * flat,ADIO_Offset st_offset,MPI_Count * curr_index)145 void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node * flat,
146                    ADIO_Offset st_offset, MPI_Count * curr_index)
147 {
148     int k, m, n, is_hindexed_block = 0;
149     int lb_updated = 0;
150     int combiner, old_combiner, old_is_contig;
151     int nints, nadds, ntypes, old_nints, old_nadds, old_ntypes;
152     /* By using ADIO_Offset we preserve +/- sign and
153      * avoid >2G integer arithmetic problems */
154     ADIO_Offset top_count;
155     MPI_Count i, j, old_size, prev_index, basic_num, num, nonzeroth;
156     MPI_Aint old_extent;        /* Assume extents are non-negative */
157     int *ints;
158     MPI_Aint *adds;             /* Make no assumptions about +/- sign on these */
159     MPI_Datatype *types;
160     MPI_Type_get_envelope(datatype, &nints, &nadds, &ntypes, &combiner);
161     ints = (int *) ADIOI_Malloc((nints + 1) * sizeof(int));
162     adds = (MPI_Aint *) ADIOI_Malloc((nadds + 1) * sizeof(MPI_Aint));
163     types = (MPI_Datatype *) ADIOI_Malloc((ntypes + 1) * sizeof(MPI_Datatype));
164     MPI_Type_get_contents(datatype, nints, nadds, ntypes, ints, adds, types);
165 
166 #ifdef FLATTEN_DEBUG
167     DBG_FPRINTF(stderr, "ADIOI_Flatten:: st_offset %#llX, curr_index %#llX\n", st_offset,
168                 *curr_index);
169     DBG_FPRINTF(stderr, "ADIOI_Flatten:: nints %#X, nadds %#X, ntypes %#X\n", nints, nadds, ntypes);
170     for (i = 0; i < nints; ++i) {
171         DBG_FPRINTF(stderr, "ADIOI_Flatten:: ints[%lld]=%#X\n", (long long) i, ints[i]);
172     }
173     for (i = 0; i < nadds; ++i) {
174         DBG_FPRINTF(stderr, "ADIOI_Flatten:: adds[%lld]=" MPI_AINT_FMT_HEX_SPEC "\n",
175                     (long long) i, adds[i]);
176     }
177     for (i = 0; i < ntypes; ++i) {
178         DBG_FPRINTF(stderr, "ADIOI_Flatten:: types[%lld]=%#llX\n", (long long) i,
179                     (unsigned long long) (unsigned long) types[i]);
180     }
181 #endif
182     /* Chapter 4, page 83: when processing datatypes, note this item from the
183      * standard:
184      Most datatype constructors have replication count or block length
185      arguments.  Allowed values are non-negative integers. If the value is
186      zero, no elements are generated in the type map and there is no effect
187      on datatype bounds or extent.  */
188 
189     switch (combiner) {
190 #ifdef MPIIMPL_HAVE_MPI_COMBINER_DUP
191         case MPI_COMBINER_DUP:
192 #ifdef FLATTEN_DEBUG
193             DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_DUP\n");
194 #endif
195             MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
196             ADIOI_Datatype_iscontig(types[0], &old_is_contig);
197             if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
198                 ADIOI_Flatten(types[0], flat, st_offset, curr_index);
199             break;
200 #endif
201 #ifdef MPIIMPL_HAVE_MPI_COMBINER_SUBARRAY
202         case MPI_COMBINER_SUBARRAY:
203             {
204                 int dims = ints[0];
205                 MPI_Datatype stype;
206 #ifdef FLATTEN_DEBUG
207                 DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_SUBARRAY\n");
208 #endif
209 
210                 ADIO_Type_create_subarray(dims, &ints[1],       /* sizes */
211                                           &ints[dims + 1],      /* subsizes */
212                                           &ints[2 * dims + 1],  /* starts */
213                                           ints[3 * dims + 1],   /* order */
214                                           types[0],     /* type */
215                                           &stype);
216                 ADIOI_Flatten(stype, flat, st_offset, curr_index);
217                 MPI_Type_free(&stype);
218             }
219             break;
220 #endif
221 #ifdef MPIIMPL_HAVE_MPI_COMBINER_DARRAY
222         case MPI_COMBINER_DARRAY:
223             {
224                 int dims = ints[2];
225                 MPI_Datatype dtype;
226 #ifdef FLATTEN_DEBUG
227                 DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_DARRAY\n");
228 #endif
229 
230                 ADIO_Type_create_darray(ints[0],        /* size */
231                                         ints[1],        /* rank */
232                                         dims, &ints[3], /* gsizes */
233                                         &ints[dims + 3],        /* distribs */
234                                         &ints[2 * dims + 3],    /* dargs */
235                                         &ints[3 * dims + 3],    /* psizes */
236                                         ints[4 * dims + 3],     /* order */
237                                         types[0], &dtype);
238 #ifdef FLATTEN_DEBUG
239                 DBG_FPRINTF(stderr,
240                             "ADIOI_Flatten:: MPI_COMBINER_DARRAY <ADIOI_Flatten(dtype, flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX, st_offset %#llX, curr_index %#llX);\n",
241                             0, flat->indices[0], 0, flat->blocklens[0], st_offset, *curr_index);
242 #endif
243                 ADIOI_Flatten(dtype, flat, st_offset, curr_index);
244 #ifdef FLATTEN_DEBUG
245                 DBG_FPRINTF(stderr,
246                             "ADIOI_Flatten:: MPI_COMBINER_DARRAY >ADIOI_Flatten(dtype, flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX, st_offset %#llX, curr_index %#llX);\n",
247                             0, flat->indices[0], 0, flat->blocklens[0], st_offset, *curr_index);
248 #endif
249                 MPI_Type_free(&dtype);
250             }
251             break;
252 #endif
253         case MPI_COMBINER_CONTIGUOUS:
254 #ifdef FLATTEN_DEBUG
255             DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_CONTIGUOUS\n");
256 #endif
257             top_count = ints[0];
258             MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
259             ADIOI_Datatype_iscontig(types[0], &old_is_contig);
260 
261             prev_index = *curr_index;
262             if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
263                 ADIOI_Flatten(types[0], flat, st_offset, curr_index);
264 
265             if (prev_index == *curr_index) {
266 /* simplest case, made up of basic or contiguous types */
267                 j = *curr_index;
268                 flatlist_node_grow(flat, j);
269                 flat->indices[j] = st_offset;
270                 MPI_Type_size_x(types[0], &old_size);
271                 flat->blocklens[j] = top_count * old_size;
272 #ifdef FLATTEN_DEBUG
273                 DBG_FPRINTF(stderr,
274                             "ADIOI_Flatten:: simple flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",
275                             j, flat->indices[j], j, flat->blocklens[j]);
276 #endif
277                 (*curr_index)++;
278             } else {
279 /* made up of noncontiguous derived types */
280                 j = *curr_index;
281                 num = *curr_index - prev_index;
282 
283 /* The noncontiguous types have to be replicated count times */
284                 MPI_Type_extent(types[0], &old_extent);
285                 for (m = 1; m < top_count; m++) {
286                     for (i = 0; i < num; i++) {
287                         flatlist_node_grow(flat, j);
288                         flat->indices[j] =
289                             flat->indices[j - num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
290                         flat->blocklens[j] = flat->blocklens[j - num];
291 #ifdef FLATTEN_DEBUG
292                         DBG_FPRINTF(stderr,
293                                     "ADIOI_Flatten:: derived flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",
294                                     j, flat->indices[j], j, flat->blocklens[j]);
295 #endif
296                         j++;
297                     }
298                 }
299                 *curr_index = j;
300             }
301             break;
302 
303         case MPI_COMBINER_VECTOR:
304 #ifdef FLATTEN_DEBUG
305             DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_VECTOR\n");
306 #endif
307             top_count = ints[0];
308             MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
309             ADIOI_Datatype_iscontig(types[0], &old_is_contig);
310 
311             prev_index = *curr_index;
312             if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
313                 ADIOI_Flatten(types[0], flat, st_offset, curr_index);
314 
315             if (prev_index == *curr_index) {
316 /* simplest case, vector of basic or contiguous types */
317                 /* By using ADIO_Offset we preserve +/- sign and
318                  * avoid >2G integer arithmetic problems */
319                 ADIO_Offset blocklength = ints[1], stride = ints[2];
320                 j = *curr_index;
321                 flatlist_node_grow(flat, j);
322                 flat->indices[j] = st_offset;
323                 MPI_Type_size_x(types[0], &old_size);
324                 flat->blocklens[j] = blocklength * old_size;
325                 for (i = j + 1; i < j + top_count; i++) {
326                     flatlist_node_grow(flat, i);
327                     flat->indices[i] = flat->indices[i - 1] + stride * old_size;
328                     flat->blocklens[i] = flat->blocklens[j];
329                 }
330                 *curr_index = i;
331             } else {
332 /* vector of noncontiguous derived types */
333                 /* By using ADIO_Offset we preserve +/- sign and
334                  * avoid >2G integer arithmetic problems */
335                 ADIO_Offset blocklength = ints[1], stride = ints[2];
336 
337                 j = *curr_index;
338                 num = *curr_index - prev_index;
339 
340 /* The noncontiguous types have to be replicated blocklen times
341    and then strided. Replicate the first one. */
342                 MPI_Type_extent(types[0], &old_extent);
343                 for (m = 1; m < blocklength; m++) {
344                     for (i = 0; i < num; i++) {
345                         flatlist_node_grow(flat, j);
346                         flat->indices[j] =
347                             flat->indices[j - num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
348                         flat->blocklens[j] = flat->blocklens[j - num];
349                         j++;
350                     }
351                 }
352                 *curr_index = j;
353 
354 /* Now repeat with strides. */
355                 num = *curr_index - prev_index;
356                 for (i = 1; i < top_count; i++) {
357                     for (m = 0; m < num; m++) {
358                         flatlist_node_grow(flat, j);
359                         flat->indices[j] =
360                             flat->indices[j - num] + stride * ADIOI_AINT_CAST_TO_OFFSET old_extent;
361                         flat->blocklens[j] = flat->blocklens[j - num];
362                         j++;
363                     }
364                 }
365                 *curr_index = j;
366             }
367             break;
368 
369         case MPI_COMBINER_HVECTOR:
370         case MPI_COMBINER_HVECTOR_INTEGER:
371 #ifdef FLATTEN_DEBUG
372             DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_HVECTOR_INTEGER\n");
373 #endif
374             top_count = ints[0];
375             MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
376             ADIOI_Datatype_iscontig(types[0], &old_is_contig);
377 
378             prev_index = *curr_index;
379             if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
380                 ADIOI_Flatten(types[0], flat, st_offset, curr_index);
381 
382             if (prev_index == *curr_index) {
383 /* simplest case, vector of basic or contiguous types */
384                 /* By using ADIO_Offset we preserve +/- sign and
385                  * avoid >2G integer arithmetic problems */
386                 ADIO_Offset blocklength = ints[1];
387                 j = *curr_index;
388                 flatlist_node_grow(flat, j);
389                 flat->indices[j] = st_offset;
390                 MPI_Type_size_x(types[0], &old_size);
391                 flat->blocklens[j] = blocklength * old_size;
392                 for (i = j + 1; i < j + top_count; i++) {
393                     flatlist_node_grow(flat, i);
394                     flat->indices[i] = flat->indices[i - 1] + adds[0];
395                     flat->blocklens[i] = flat->blocklens[j];
396                 }
397                 *curr_index = i;
398             } else {
399 /* vector of noncontiguous derived types */
400                 /* By using ADIO_Offset we preserve +/- sign and
401                  * avoid >2G integer arithmetic problems */
402                 ADIO_Offset blocklength = ints[1];
403 
404                 j = *curr_index;
405                 num = *curr_index - prev_index;
406 
407 /* The noncontiguous types have to be replicated blocklen times
408    and then strided. Replicate the first one. */
409                 MPI_Type_extent(types[0], &old_extent);
410                 for (m = 1; m < blocklength; m++) {
411                     for (i = 0; i < num; i++) {
412                         flatlist_node_grow(flat, j);
413                         flat->indices[j] =
414                             flat->indices[j - num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
415                         flat->blocklens[j] = flat->blocklens[j - num];
416                         j++;
417                     }
418                 }
419                 *curr_index = j;
420 
421 /* Now repeat with strides. */
422                 num = *curr_index - prev_index;
423                 for (i = 1; i < top_count; i++) {
424                     for (m = 0; m < num; m++) {
425                         flatlist_node_grow(flat, j);
426                         flat->indices[j] = flat->indices[j - num] + adds[0];
427                         flat->blocklens[j] = flat->blocklens[j - num];
428                         j++;
429                     }
430                 }
431                 *curr_index = j;
432             }
433             break;
434 
435         case MPI_COMBINER_INDEXED:
436 #ifdef FLATTEN_DEBUG
437             DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_INDEXED\n");
438 #endif
439             top_count = ints[0];
440             MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
441             ADIOI_Datatype_iscontig(types[0], &old_is_contig);
442             MPI_Type_extent(types[0], &old_extent);
443 
444             prev_index = *curr_index;
445             if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) {
446                 /* By using ADIO_Offset we preserve +/- sign and
447                  * avoid >2G integer arithmetic problems */
448                 ADIO_Offset stride = ints[top_count + 1];
449                 ADIOI_Flatten(types[0], flat,
450                               st_offset + stride * ADIOI_AINT_CAST_TO_OFFSET old_extent,
451                               curr_index);
452             }
453 
454             if (prev_index == *curr_index) {
455 /* simplest case, indexed type made up of basic or contiguous types */
456                 j = *curr_index;
457                 for (i = j, nonzeroth = i; i < j + top_count; i++) {
458                     /* By using ADIO_Offset we preserve +/- sign and
459                      * avoid >2G integer arithmetic problems */
460                     ADIO_Offset blocklength = ints[1 + i - j], stride = ints[top_count + 1 + i - j];
461                     if (blocklength > 0) {
462                         flatlist_node_grow(flat, nonzeroth);
463                         flat->indices[nonzeroth] =
464                             st_offset + stride * ADIOI_AINT_CAST_TO_OFFSET old_extent;
465                         flat->blocklens[nonzeroth] =
466                             blocklength * ADIOI_AINT_CAST_TO_OFFSET old_extent;
467                         nonzeroth++;
468                     }
469                 }
470                 *curr_index = nonzeroth;
471             } else {
472 /* indexed type made up of noncontiguous derived types */
473 
474                 j = *curr_index;
475                 num = *curr_index - prev_index;
476                 basic_num = num;
477 
478 /* The noncontiguous types have to be replicated blocklens[i] times
479    and then strided. Replicate the first one. */
480                 for (m = 1; m < ints[1]; m++) {
481                     for (i = 0, nonzeroth = j; i < num; i++) {
482                         if (flat->blocklens[j - num] > 0) {
483                             flatlist_node_grow(flat, nonzeroth);
484                             flat->indices[nonzeroth] =
485                                 flat->indices[nonzeroth - num] +
486                                 ADIOI_AINT_CAST_TO_OFFSET old_extent;
487                             flat->blocklens[nonzeroth] = flat->blocklens[nonzeroth - num];
488                             j++;
489                             nonzeroth++;
490                         }
491                     }
492                 }
493                 *curr_index = j;
494 
495 /* Now repeat with strides. */
496                 for (i = 1; i < top_count; i++) {
497                     num = *curr_index - prev_index;
498                     prev_index = *curr_index;
499                     for (m = 0, nonzeroth = j; m < basic_num; m++) {
500                         /* By using ADIO_Offset we preserve +/- sign and
501                          * avoid >2G integer arithmetic problems */
502                         ADIO_Offset stride = ints[top_count + 1 + i] - ints[top_count + i];
503                         if (flat->blocklens[j - num] > 0) {
504                             flatlist_node_grow(flat, nonzeroth);
505                             flat->indices[nonzeroth] =
506                                 flat->indices[j - num] +
507                                 stride * ADIOI_AINT_CAST_TO_OFFSET old_extent;
508                             flat->blocklens[nonzeroth] = flat->blocklens[j - num];
509                             j++;
510                             nonzeroth++;
511                         }
512                     }
513                     *curr_index = j;
514                     for (m = 1; m < ints[1 + i]; m++) {
515                         for (k = 0, nonzeroth = j; k < basic_num; k++) {
516                             if (flat->blocklens[j - basic_num] > 0) {
517                                 flatlist_node_grow(flat, nonzeroth);
518                                 flat->indices[nonzeroth] =
519                                     flat->indices[j - basic_num] +
520                                     ADIOI_AINT_CAST_TO_OFFSET old_extent;
521                                 flat->blocklens[nonzeroth] = flat->blocklens[j - basic_num];
522                                 j++;
523                                 nonzeroth++;
524                             }
525                         }
526                     }
527                     *curr_index = j;
528                 }
529             }
530             break;
531 
532 #if defined HAVE_DECL_MPI_COMBINER_HINDEXED_BLOCK && HAVE_DECL_MPI_COMBINER_HINDEXED_BLOCK
533         case MPI_COMBINER_HINDEXED_BLOCK:
534             is_hindexed_block = 1;
535             /* deliberate fall-through */
536             MPL_FALLTHROUGH;
537 #endif
538         case MPI_COMBINER_INDEXED_BLOCK:
539 #ifdef FLATTEN_DEBUG
540             DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_INDEXED_BLOCK\n");
541 #endif
542             top_count = ints[0];
543             MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
544             ADIOI_Datatype_iscontig(types[0], &old_is_contig);
545             MPI_Type_extent(types[0], &old_extent);
546 
547             prev_index = *curr_index;
548             if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) {
549                 /* By using ADIO_Offset we preserve +/- sign and
550                  * avoid >2G integer arithmetic problems */
551                 ADIO_Offset stride = ints[1 + 1];
552                 if (is_hindexed_block) {
553                     ADIOI_Flatten(types[0], flat, st_offset + adds[0], curr_index);
554                 } else {
555                     ADIOI_Flatten(types[0], flat,
556                                   st_offset + stride * ADIOI_AINT_CAST_TO_OFFSET old_extent,
557                                   curr_index);
558                 }
559             }
560 
561             if (prev_index == *curr_index) {
562 /* simplest case, indexed type made up of basic or contiguous types */
563                 j = *curr_index;
564                 for (i = j; i < j + top_count; i++) {
565                     /* By using ADIO_Offset we preserve +/- sign and
566                      * avoid >2G integer arithmetic problems */
567                     ADIO_Offset blocklength = ints[1];
568                     if (is_hindexed_block) {
569                         flatlist_node_grow(flat, i);
570                         flat->indices[i] = st_offset + adds[i - j];
571                     } else {
572                         ADIO_Offset stride = ints[1 + 1 + i - j];
573                         flatlist_node_grow(flat, i);
574                         flat->indices[i] = st_offset +
575                             stride * ADIOI_AINT_CAST_TO_OFFSET old_extent;
576                     }
577                     flat->blocklens[i] = blocklength * ADIOI_AINT_CAST_TO_OFFSET old_extent;
578                 }
579                 *curr_index = i;
580             } else {
581 /* vector of noncontiguous derived types */
582 
583                 j = *curr_index;
584                 num = *curr_index - prev_index;
585 
586 /* The noncontiguous types have to be replicated blocklens[i] times
587    and then strided. Replicate the first one. */
588                 for (m = 1; m < ints[1]; m++) {
589                     for (i = 0; i < num; i++) {
590                         if (is_hindexed_block) {
591                             /* this is the one place the hindexed case uses the
592                              * extent of a type */
593                             MPI_Type_extent(types[0], &old_extent);
594                         }
595                         flatlist_node_grow(flat, j);
596                         flat->indices[j] = flat->indices[j - num] +
597                             ADIOI_AINT_CAST_TO_OFFSET old_extent;
598                         flat->blocklens[j] = flat->blocklens[j - num];
599                         j++;
600                     }
601                 }
602                 *curr_index = j;
603 
604 /* Now repeat with strides. */
605                 num = *curr_index - prev_index;
606                 for (i = 1; i < top_count; i++) {
607                     for (m = 0; m < num; m++) {
608                         if (is_hindexed_block) {
609                             flatlist_node_grow(flat, j);
610                             flat->indices[j] = flat->indices[j - num] + adds[i] - adds[i - 1];
611                         } else {
612                             /* By using ADIO_Offset we preserve +/- sign and
613                              * avoid >2G integer arithmetic problems */
614                             ADIO_Offset stride = ints[2 + i] - ints[1 + i];
615                             flatlist_node_grow(flat, j);
616                             flat->indices[j] = flat->indices[j - num] +
617                                 stride * ADIOI_AINT_CAST_TO_OFFSET old_extent;
618                         }
619                         flat->blocklens[j] = flat->blocklens[j - num];
620                         j++;
621                     }
622                 }
623                 *curr_index = j;
624             }
625             break;
626 
627         case MPI_COMBINER_HINDEXED:
628         case MPI_COMBINER_HINDEXED_INTEGER:
629 #ifdef FLATTEN_DEBUG
630             DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_HINDEXED_INTEGER\n");
631 #endif
632             top_count = ints[0];
633             MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
634             ADIOI_Datatype_iscontig(types[0], &old_is_contig);
635 
636             prev_index = *curr_index;
637             if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) {
638                 ADIOI_Flatten(types[0], flat, st_offset + adds[0], curr_index);
639             }
640 
641             if (prev_index == *curr_index) {
642 /* simplest case, indexed type made up of basic or contiguous types */
643                 j = *curr_index;
644                 MPI_Type_size_x(types[0], &old_size);
645                 for (i = j, nonzeroth = j; i < j + top_count; i++) {
646                     if (ints[1 + i - j] > 0) {
647                         /* By using ADIO_Offset we preserve +/- sign and
648                          * avoid >2G integer arithmetic problems */
649                         ADIO_Offset blocklength = ints[1 + i - j];
650                         flatlist_node_grow(flat, nonzeroth);
651                         flat->indices[nonzeroth] = st_offset + adds[i - j];
652                         flat->blocklens[nonzeroth] = blocklength * old_size;
653                         nonzeroth++;
654                     }
655                 }
656                 *curr_index = nonzeroth;
657             } else {
658 /* indexed type made up of noncontiguous derived types */
659 
660                 j = *curr_index;
661                 num = *curr_index - prev_index;
662                 basic_num = num;
663 
664 /* The noncontiguous types have to be replicated blocklens[i] times
665    and then strided. Replicate the first one. */
666                 MPI_Type_extent(types[0], &old_extent);
667                 for (m = 1; m < ints[1]; m++) {
668                     for (i = 0, nonzeroth = j; i < num; i++) {
669                         if (flat->blocklens[j - num] > 0) {
670                             flatlist_node_grow(flat, nonzeroth);
671                             flat->indices[nonzeroth] =
672                                 flat->indices[j - num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
673                             flat->blocklens[nonzeroth] = flat->blocklens[j - num];
674                             j++;
675                             nonzeroth++;
676                         }
677                     }
678                 }
679                 *curr_index = j;
680 
681 /* Now repeat with strides. */
682                 for (i = 1; i < top_count; i++) {
683                     num = *curr_index - prev_index;
684                     prev_index = *curr_index;
685                     for (m = 0, nonzeroth = j; m < basic_num; m++) {
686                         if (flat->blocklens[j - num] > 0) {
687                             flatlist_node_grow(flat, nonzeroth);
688                             flat->indices[nonzeroth] =
689                                 flat->indices[j - num] + adds[i] - adds[i - 1];
690                             flat->blocklens[nonzeroth] = flat->blocklens[j - num];
691                             j++;
692                             nonzeroth++;
693                         }
694                     }
695                     *curr_index = j;
696                     for (m = 1; m < ints[1 + i]; m++) {
697                         for (k = 0, nonzeroth = j; k < basic_num; k++) {
698                             if (flat->blocklens[j - basic_num] > 0) {
699                                 flatlist_node_grow(flat, nonzeroth);
700                                 flat->indices[nonzeroth] =
701                                     flat->indices[j - basic_num] +
702                                     ADIOI_AINT_CAST_TO_OFFSET old_extent;
703                                 flat->blocklens[nonzeroth] = flat->blocklens[j - basic_num];
704                                 j++;
705                                 nonzeroth++;
706                             }
707                         }
708                     }
709                     *curr_index = j;
710                 }
711             }
712             break;
713 
714         case MPI_COMBINER_STRUCT:
715         case MPI_COMBINER_STRUCT_INTEGER:
716 #ifdef FLATTEN_DEBUG
717             DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_STRUCT_INTEGER\n");
718 #endif
719             top_count = ints[0];
720             for (n = 0; n < top_count; n++) {
721                 MPI_Type_get_envelope(types[n], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
722                 ADIOI_Datatype_iscontig(types[n], &old_is_contig);
723 
724                 prev_index = *curr_index;
725                 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
726                     ADIOI_Flatten(types[n], flat, st_offset + adds[n], curr_index);
727 
728                 if (prev_index == *curr_index) {
729 /* simplest case, current type is basic or contiguous types */
730                     /* By using ADIO_Offset we preserve +/- sign and
731                      * avoid >2G integer arithmetic problems */
732                     if (ints[1 + n] > 0 || types[n] == MPI_LB || types[n] == MPI_UB) {
733                         ADIO_Offset blocklength = ints[1 + n];
734                         j = *curr_index;
735                         flatlist_node_grow(flat, j);
736                         flat->indices[j] = st_offset + adds[n];
737                         MPI_Type_size_x(types[n], &old_size);
738                         flat->blocklens[j] = blocklength * old_size;
739                         if (types[n] == MPI_LB)
740                             flat->lb_idx = j;
741                         if (types[n] == MPI_UB)
742                             flat->ub_idx = j;
743 #ifdef FLATTEN_DEBUG
744                         DBG_FPRINTF(stderr,
745                                     "ADIOI_Flatten:: simple adds[%#X] " MPI_AINT_FMT_HEX_SPEC
746                                     ", flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",
747                                     n, adds[n], j, flat->indices[j], j, flat->blocklens[j]);
748 #endif
749                         (*curr_index)++;
750                     }
751                 } else {
752 /* current type made up of noncontiguous derived types */
753 
754                     j = *curr_index;
755                     num = *curr_index - prev_index;
756 
757 /* The current type has to be replicated blocklens[n] times */
758                     MPI_Type_extent(types[n], &old_extent);
759                     for (m = 1; m < ints[1 + n]; m++) {
760                         for (i = 0; i < num; i++) {
761                             flatlist_node_grow(flat, j);
762                             flat->indices[j] =
763                                 flat->indices[j - num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
764                             flat->blocklens[j] = flat->blocklens[j - num];
765 #ifdef FLATTEN_DEBUG
766                             DBG_FPRINTF(stderr,
767                                         "ADIOI_Flatten:: simple old_extent " MPI_AINT_FMT_HEX_SPEC
768                                         ", flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",
769                                         old_extent, j, flat->indices[j], j, flat->blocklens[j]);
770 #endif
771                             j++;
772                         }
773                     }
774                     *curr_index = j;
775                 }
776             }
777             break;
778 
779         case MPI_COMBINER_RESIZED:
780 #ifdef FLATTEN_DEBUG
781             DBG_FPRINTF(stderr, "ADIOI_Flatten:: MPI_COMBINER_RESIZED\n");
782 #endif
783 
784             /* This is done similar to a type_struct with an lb, datatype, ub */
785 
786             /* handle the Lb */
787             j = *curr_index;
788             /* when we process resized types, we (recursively) process the lower
789              * bound, the type being resized, then the upper bound.  In the
790              * resized-of-resized case, we might find ourselves updating the upper
791              * bound based on the inner type, but the lower bound based on the
792              * upper type.  check both lb and ub to prevent mixing updates */
793             if (flat->lb_idx == -1 && flat->ub_idx == -1) {
794                 flatlist_node_grow(flat, j);
795                 flat->indices[j] = st_offset + adds[0];
796                 /* this zero-length blocklens[] element, unlike elsewhere in the
797                  * flattening code, is correct and is used to indicate a lower bound
798                  * marker */
799                 flat->blocklens[j] = 0;
800                 flat->lb_idx = *curr_index;
801                 lb_updated = 1;
802 
803 #ifdef FLATTEN_DEBUG
804                 DBG_FPRINTF(stderr,
805                             "ADIOI_Flatten:: simple adds[%#X] " MPI_AINT_FMT_HEX_SPEC
806                             ", flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n", 0,
807                             adds[0], j, flat->indices[j], j, flat->blocklens[j]);
808 #endif
809 
810                 (*curr_index)++;
811             } else {
812                 /* skipped over this chunk because something else higher-up in the
813                  * type construction set this for us already */
814                 st_offset -= adds[0];
815             }
816 
817             /* handle the datatype */
818 
819             MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
820             ADIOI_Datatype_iscontig(types[0], &old_is_contig);
821 
822             if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) {
823                 ADIOI_Flatten(types[0], flat, st_offset + adds[0], curr_index);
824             } else {
825                 /* current type is basic or contiguous */
826                 j = *curr_index;
827                 flatlist_node_grow(flat, j);
828                 flat->indices[j] = st_offset;
829                 MPI_Type_size_x(types[0], &old_size);
830                 flat->blocklens[j] = old_size;
831 
832 #ifdef FLATTEN_DEBUG
833                 DBG_FPRINTF(stderr,
834                             "ADIOI_Flatten:: simple adds[%#X] " MPI_AINT_FMT_HEX_SPEC
835                             ", flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n", 0,
836                             adds[0], j, flat->indices[j], j, flat->blocklens[j]);
837 #endif
838 
839                 (*curr_index)++;
840             }
841 
842             /* take care of the extent as a UB */
843             /* see note above about mixing updates for why we check lb and ub */
844             if ((flat->lb_idx == -1 && flat->ub_idx == -1) || lb_updated) {
845                 j = *curr_index;
846                 flatlist_node_grow(flat, j);
847                 flat->indices[j] = st_offset + adds[0] + adds[1];
848                 /* again, zero-element ok: an upper-bound marker explicitly set by the
849                  * constructor of this resized type */
850                 flat->blocklens[j] = 0;
851                 flat->ub_idx = *curr_index;
852             } else {
853                 /* skipped over this chunk because something else higher-up in the
854                  * type construction set this for us already */
855                 (*curr_index)--;
856             }
857 
858 #ifdef FLATTEN_DEBUG
859             DBG_FPRINTF(stderr,
860                         "ADIOI_Flatten:: simple adds[%#X] " MPI_AINT_FMT_HEX_SPEC
861                         ", flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n", 1, adds[1],
862                         j, flat->indices[j], j, flat->blocklens[j]);
863 #endif
864 
865             (*curr_index)++;
866 
867             break;
868 
869         default:
870             /* TODO: FIXME (requires changing prototypes to return errors...) */
871             DBG_FPRINTF(stderr, "Error: Unsupported datatype passed to ADIOI_Flatten\n");
872             MPI_Abort(MPI_COMM_WORLD, 1);
873     }
874 
875     for (i = 0; i < ntypes; i++) {
876         MPI_Type_get_envelope(types[i], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
877         if (old_combiner != MPI_COMBINER_NAMED)
878             MPI_Type_free(types + i);
879     }
880 
881     ADIOI_Free(ints);
882     ADIOI_Free(adds);
883     ADIOI_Free(types);
884 
885 #ifdef FLATTEN_DEBUG
886     DBG_FPRINTF(stderr, "ADIOI_Flatten:: return st_offset %#llX, curr_index %#llX\n", st_offset,
887                 *curr_index);
888 #endif
889 
890 }
891 
892 /********************************************************/
893 
894 /* ADIOI_Count_contiguous_blocks
895  *
896  * Returns number of contiguous blocks in type, and also updates
897  * curr_index to reflect the space for the additional blocks.
898  *
899  * ASSUMES THAT TYPE IS NOT A BASIC!!!
900  */
ADIOI_Count_contiguous_blocks(MPI_Datatype datatype,MPI_Count * curr_index)901 MPI_Count ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, MPI_Count * curr_index)
902 {
903     int i, n;
904     MPI_Count count = 0, prev_index, num, basic_num;
905     int top_count, combiner, old_combiner, old_is_contig;
906     int nints, nadds, ntypes, old_nints, old_nadds, old_ntypes;
907     int *ints;
908     MPI_Aint *adds;             /* Make no assumptions about +/- sign on these */
909     MPI_Datatype *types;
910 
911     MPI_Type_get_envelope(datatype, &nints, &nadds, &ntypes, &combiner);
912     ints = (int *) ADIOI_Malloc((nints + 1) * sizeof(int));
913     adds = (MPI_Aint *) ADIOI_Malloc((nadds + 1) * sizeof(MPI_Aint));
914     types = (MPI_Datatype *) ADIOI_Malloc((ntypes + 1) * sizeof(MPI_Datatype));
915     MPI_Type_get_contents(datatype, nints, nadds, ntypes, ints, adds, types);
916 
917     switch (combiner) {
918 #ifdef MPIIMPL_HAVE_MPI_COMBINER_DUP
919         case MPI_COMBINER_DUP:
920             MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
921             ADIOI_Datatype_iscontig(types[0], &old_is_contig);
922             if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
923                 count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
924             else {
925                 count = 1;
926                 (*curr_index)++;
927             }
928             break;
929 #endif
930 #ifdef MPIIMPL_HAVE_MPI_COMBINER_SUBARRAY
931         case MPI_COMBINER_SUBARRAY:
932             {
933                 int dims = ints[0];
934                 MPI_Datatype stype;
935 
936                 ADIO_Type_create_subarray(dims, &ints[1],       /* sizes */
937                                           &ints[dims + 1],      /* subsizes */
938                                           &ints[2 * dims + 1],  /* starts */
939                                           ints[3 * dims + 1],   /* order */
940                                           types[0],     /* type */
941                                           &stype);
942                 count = ADIOI_Count_contiguous_blocks(stype, curr_index);
943                 /* curr_index will have already been updated; just pass
944                  * count back up.
945                  */
946                 MPI_Type_free(&stype);
947 
948             }
949             break;
950 #endif
951 #ifdef MPIIMPL_HAVE_MPI_COMBINER_DARRAY
952         case MPI_COMBINER_DARRAY:
953             {
954                 int dims = ints[2];
955                 MPI_Datatype dtype;
956 
957                 ADIO_Type_create_darray(ints[0],        /* size */
958                                         ints[1],        /* rank */
959                                         dims, &ints[3], /* gsizes */
960                                         &ints[dims + 3],        /* distribs */
961                                         &ints[2 * dims + 3],    /* dargs */
962                                         &ints[3 * dims + 3],    /* psizes */
963                                         ints[4 * dims + 3],     /* order */
964                                         types[0], &dtype);
965                 count = ADIOI_Count_contiguous_blocks(dtype, curr_index);
966                 /* curr_index will have already been updated; just pass
967                  * count back up.
968                  */
969                 MPI_Type_free(&dtype);
970             }
971             break;
972 #endif
973         case MPI_COMBINER_CONTIGUOUS:
974             top_count = ints[0];
975             MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
976             ADIOI_Datatype_iscontig(types[0], &old_is_contig);
977 
978             prev_index = *curr_index;
979             if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
980                 count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
981             else
982                 count = 1;
983 
984             if (prev_index == *curr_index)
985 /* simplest case, made up of basic or contiguous types */
986                 (*curr_index)++;
987             else {
988 /* made up of noncontiguous derived types */
989                 num = *curr_index - prev_index;
990                 count *= top_count;
991                 *curr_index += (top_count - 1) * num;
992             }
993             break;
994 
995         case MPI_COMBINER_VECTOR:
996         case MPI_COMBINER_HVECTOR:
997         case MPI_COMBINER_HVECTOR_INTEGER:
998             top_count = ints[0];
999             MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
1000             ADIOI_Datatype_iscontig(types[0], &old_is_contig);
1001 
1002             prev_index = *curr_index;
1003             if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
1004                 count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
1005             else
1006                 count = 1;
1007 
1008             if (prev_index == *curr_index) {
1009 /* simplest case, vector of basic or contiguous types */
1010                 count = top_count;
1011                 *curr_index += count;
1012             } else {
1013 /* vector of noncontiguous derived types */
1014                 num = *curr_index - prev_index;
1015 
1016 /* The noncontiguous types have to be replicated blocklen times
1017    and then strided. */
1018                 count *= ints[1] * top_count;
1019 
1020 /* First one */
1021                 *curr_index += (ints[1] - 1) * num;
1022 
1023 /* Now repeat with strides. */
1024                 num = *curr_index - prev_index;
1025                 *curr_index += (top_count - 1) * num;
1026             }
1027             break;
1028 
1029         case MPI_COMBINER_INDEXED:
1030         case MPI_COMBINER_HINDEXED:
1031         case MPI_COMBINER_HINDEXED_INTEGER:
1032             top_count = ints[0];
1033             MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
1034             ADIOI_Datatype_iscontig(types[0], &old_is_contig);
1035 
1036             prev_index = *curr_index;
1037             if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
1038                 count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
1039             else
1040                 count = 1;
1041 
1042             if (prev_index == *curr_index) {
1043 /* simplest case, indexed type made up of basic or contiguous types */
1044                 count = top_count;
1045                 *curr_index += count;
1046             } else {
1047 /* indexed type made up of noncontiguous derived types */
1048                 basic_num = *curr_index - prev_index;
1049 
1050 /* The noncontiguous types have to be replicated blocklens[i] times
1051    and then strided. */
1052                 *curr_index += (ints[1] - 1) * basic_num;
1053                 count *= ints[1];
1054 
1055 /* Now repeat with strides. */
1056                 for (i = 1; i < top_count; i++) {
1057                     count += ints[1 + i] * basic_num;
1058                     *curr_index += ints[1 + i] * basic_num;
1059                 }
1060             }
1061             break;
1062 
1063 #if defined HAVE_DECL_MPI_COMBINER_HINDEXED_BLOCK && HAVE_DECL_MPI_COMBINER_HINDEXED_BLOCK
1064         case MPI_COMBINER_HINDEXED_BLOCK:
1065 #endif
1066         case MPI_COMBINER_INDEXED_BLOCK:
1067             top_count = ints[0];
1068             MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
1069             ADIOI_Datatype_iscontig(types[0], &old_is_contig);
1070 
1071             prev_index = *curr_index;
1072             if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
1073                 count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
1074             else
1075                 count = 1;
1076 
1077             if (prev_index == *curr_index) {
1078 /* simplest case, indexed type made up of basic or contiguous types */
1079                 count = top_count;
1080                 *curr_index += count;
1081             } else {
1082 /* indexed type made up of noncontiguous derived types */
1083                 basic_num = *curr_index - prev_index;
1084 
1085 /* The noncontiguous types have to be replicated blocklens[i] times
1086    and then strided. */
1087                 *curr_index += (ints[1] - 1) * basic_num;
1088                 count *= ints[1];
1089 
1090 /* Now repeat with strides. */
1091                 *curr_index += (top_count - 1) * count;
1092                 count *= top_count;
1093             }
1094             break;
1095 
1096         case MPI_COMBINER_STRUCT:
1097         case MPI_COMBINER_STRUCT_INTEGER:
1098             top_count = ints[0];
1099             count = 0;
1100             for (n = 0; n < top_count; n++) {
1101                 MPI_Type_get_envelope(types[n], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
1102                 ADIOI_Datatype_iscontig(types[n], &old_is_contig);
1103 
1104                 prev_index = *curr_index;
1105                 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
1106                     count += ADIOI_Count_contiguous_blocks(types[n], curr_index);
1107 
1108                 if (prev_index == *curr_index) {
1109 /* simplest case, current type is basic or contiguous types */
1110                     count++;
1111                     (*curr_index)++;
1112                 } else {
1113 /* current type made up of noncontiguous derived types */
1114 /* The current type has to be replicated blocklens[n] times */
1115 
1116                     num = *curr_index - prev_index;
1117                     count += (ints[1 + n] - 1) * num;
1118                     (*curr_index) += (ints[1 + n] - 1) * num;
1119                 }
1120             }
1121             break;
1122 
1123         case MPI_COMBINER_RESIZED:
1124             /* treat it as a struct with lb, type, ub */
1125 
1126             /* add 2 for lb and ub */
1127             (*curr_index) += 2;
1128             count += 2;
1129 
1130             /* add for datatype */
1131             MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
1132             ADIOI_Datatype_iscontig(types[0], &old_is_contig);
1133 
1134             if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) {
1135                 count += ADIOI_Count_contiguous_blocks(types[0], curr_index);
1136             } else {
1137                 /* basic or contiguous type */
1138                 count++;
1139                 (*curr_index)++;
1140             }
1141             break;
1142 
1143         default:
1144             /* TODO: FIXME */
1145             DBG_FPRINTF(stderr,
1146                         "Error: Unsupported datatype passed to ADIOI_Count_contiguous_blocks, combiner = %d\n",
1147                         combiner);
1148             MPI_Abort(MPI_COMM_WORLD, 1);
1149     }
1150 
1151     for (i = 0; i < ntypes; i++) {
1152         MPI_Type_get_envelope(types[i], &old_nints, &old_nadds, &old_ntypes, &old_combiner);
1153         if (old_combiner != MPI_COMBINER_NAMED)
1154             MPI_Type_free(types + i);
1155     }
1156 
1157     ADIOI_Free(ints);
1158     ADIOI_Free(adds);
1159     ADIOI_Free(types);
1160     return count;
1161 }
1162 
1163 
1164 /****************************************************************/
1165 
1166 /* ADIOI_Optimize_flattened()
1167  *
1168  * Scans the blocks of a flattened type and merges adjacent blocks
1169  * together, resulting in a shorter blocklist (and thus fewer
1170  * contiguous operations).
1171  *
1172  * NOTE: a further optimization would be to remove zero length blocks. However,
1173  * the first and last blocks must remain as zero length first or last block
1174  * indicates UB and LB.  Furthermore, once the "zero length blocklen" fix
1175  * went in, the flattened representation should no longer have zero-length
1176  * blocks except for UB and LB markers.
1177  */
ADIOI_Optimize_flattened(ADIOI_Flatlist_node * flat_type)1178 void ADIOI_Optimize_flattened(ADIOI_Flatlist_node * flat_type)
1179 {
1180     int i, j, opt_blocks;
1181     ADIO_Offset *opt_blocklens;
1182     ADIO_Offset *opt_indices;
1183 
1184     opt_blocks = 1;
1185 
1186     for (j = -1, i = 0; i < flat_type->count; i++) {
1187         /* save number of noncontiguous blocks in opt_blocks */
1188         if (i < flat_type->count - 1 &&
1189             (flat_type->indices[i] + flat_type->blocklens[i] != flat_type->indices[i + 1]))
1190             opt_blocks++;
1191 
1192         /* Check if any of the displacements is negative */
1193         if (flat_type->blocklens[i] > 0 && flat_type->indices[i] < 0)
1194             flat_type->flag |= ADIOI_TYPE_NEGATIVE;
1195 
1196         if (flat_type->blocklens[i] == 0)       /* skip zero-length block */
1197             continue;
1198         else if (j == -1) {
1199             j = i;      /* set j the first non-zero-length block index */
1200             continue;
1201         }
1202 
1203         /* Check if displacements are in a monotonic nondecreasing order */
1204         if (flat_type->indices[j] > flat_type->indices[i])
1205             flat_type->flag |= ADIOI_TYPE_DECREASE;
1206 
1207         /* Check for overlapping regions */
1208         if (flat_type->indices[j] + flat_type->blocklens[j] > flat_type->indices[i])
1209             flat_type->flag |= ADIOI_TYPE_OVERLAP;
1210 
1211         j = i;  /* j is the previous non-zero-length block index */
1212     }
1213 
1214     /* if we can't reduce the number of blocks, quit now */
1215     if (opt_blocks == flat_type->count)
1216         return;
1217 
1218     opt_blocklens = (ADIO_Offset *) ADIOI_Calloc(opt_blocks * 2, sizeof(ADIO_Offset));
1219     opt_indices = opt_blocklens + opt_blocks;
1220 
1221     /* fill in new blocklists */
1222     opt_blocklens[0] = flat_type->blocklens[0];
1223     opt_indices[0] = flat_type->indices[0];
1224     j = 0;
1225     for (i = 0; i < (flat_type->count - 1); i++) {
1226         if ((flat_type->indices[i] + flat_type->blocklens[i] == flat_type->indices[i + 1]))
1227             opt_blocklens[j] += flat_type->blocklens[i + 1];
1228         else {
1229             j++;
1230             opt_indices[j] = flat_type->indices[i + 1];
1231             opt_blocklens[j] = flat_type->blocklens[i + 1];
1232         }
1233     }
1234     flat_type->count = opt_blocks;
1235     ADIOI_Free(flat_type->blocklens);
1236     flat_type->blocklens = opt_blocklens;
1237     flat_type->indices = opt_indices;
1238     return;
1239 }
1240 
1241 int ADIOI_Flattened_type_keyval = MPI_KEYVAL_INVALID;
1242 
ADIOI_Flattened_type_copy(MPI_Datatype oldtype,int type_keyval,void * extra_state,void * attribute_val_in,void * attribute_val_out,int * flag)1243 int ADIOI_Flattened_type_copy(MPI_Datatype oldtype,
1244                               int type_keyval, void *extra_state, void *attribute_val_in,
1245                               void *attribute_val_out, int *flag)
1246 {
1247     ADIOI_Flatlist_node *node = (ADIOI_Flatlist_node *) attribute_val_in;
1248     if (node != NULL)
1249         node->refct++;
1250     *(ADIOI_Flatlist_node **) attribute_val_out = node;
1251     *flag = 1;  /* attribute copied to new communicator */
1252     return MPI_SUCCESS;
1253 }
1254 
ADIOI_Flattened_type_delete(MPI_Datatype datatype,int type_keyval,void * attribute_val,void * extra_state)1255 int ADIOI_Flattened_type_delete(MPI_Datatype datatype,
1256                                 int type_keyval, void *attribute_val, void *extra_state)
1257 {
1258     ADIOI_Flatlist_node *node = (ADIOI_Flatlist_node *) attribute_val;
1259     ADIOI_Assert(node != NULL);
1260     node->refct--;
1261 
1262     if (node->refct <= 0) {
1263         ADIOI_Free(node->blocklens);
1264         ADIOI_Free(node);
1265     }
1266 
1267     return MPI_SUCCESS;
1268 }
1269 
ADIOI_Flatten_and_find(MPI_Datatype datatype)1270 ADIOI_Flatlist_node *ADIOI_Flatten_and_find(MPI_Datatype datatype)
1271 {
1272     ADIOI_Flatlist_node *node;
1273     int flag = 0;
1274 
1275     if (ADIOI_Flattened_type_keyval == MPI_KEYVAL_INVALID) {
1276         /* ADIOI_End_call will take care of cleanup */
1277         MPI_Type_create_keyval(ADIOI_Flattened_type_copy,
1278                                ADIOI_Flattened_type_delete, &ADIOI_Flattened_type_keyval, NULL);
1279     }
1280 
1281     MPI_Type_get_attr(datatype, ADIOI_Flattened_type_keyval, &node, &flag);
1282     if (flag == 0) {
1283         node = ADIOI_Flatten_datatype(datatype);
1284     }
1285 
1286     return node;
1287 }
1288