1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 /*
3 * Copyright (C) 1997 University of Chicago.
4 * See COPYRIGHT notice in top-level directory.
5 */
6
7 #include "adio.h"
8 #include "adio_extern.h"
9 /* #ifdef MPISGI
10 #include "mpisgi2.h"
11 #endif */
12
13 #ifdef USE_DBG_LOGGING
14 #define FLATTEN_DEBUG 1
15 #endif
16
17 void ADIOI_Optimize_flattened(ADIOI_Flatlist_node *flat_type);
18 /* flatten datatype and add it to Flatlist */
ADIOI_Flatten_datatype(MPI_Datatype datatype)19 void ADIOI_Flatten_datatype(MPI_Datatype datatype)
20 {
21 #ifdef HAVE_MPIR_TYPE_FLATTEN
22 MPI_Aint flatten_idx;
23 #endif
24 int curr_index=0, is_contig;
25 ADIOI_Flatlist_node *flat, *prev=0;
26
27 /* check if necessary to flatten. */
28
29 /* is it entirely contiguous? */
30 ADIOI_Datatype_iscontig(datatype, &is_contig);
31 #ifdef FLATTEN_DEBUG
32 DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: is_contig %#X\n",is_contig);
33 #endif
34 if (is_contig) return;
35
36 /* has it already been flattened? */
37 flat = ADIOI_Flatlist;
38 while (flat) {
39 if (flat->type == datatype) {
40 #ifdef FLATTEN_DEBUG
41 DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: found datatype %#X\n", datatype);
42 #endif
43 return;
44 }
45 else {
46 prev = flat;
47 flat = flat->next;
48 }
49 }
50
51 /* flatten and add to the list */
52 flat = prev;
53 flat->next = (ADIOI_Flatlist_node *)ADIOI_Malloc(sizeof(ADIOI_Flatlist_node));
54 flat = flat->next;
55
56 flat->type = datatype;
57 flat->next = NULL;
58 flat->blocklens = NULL;
59 flat->indices = NULL;
60
61 flat->count = ADIOI_Count_contiguous_blocks(datatype, &curr_index);
62 #ifdef FLATTEN_DEBUG
63 DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: count %#X, cur_idx = %#X\n",flat->count,curr_index);
64 #endif
65 /* DBG_FPRINTF(stderr, "%d\n", flat->count);*/
66
67 if (flat->count) {
68 flat->blocklens = (ADIO_Offset *) ADIOI_Malloc(flat->count * sizeof(ADIO_Offset));
69 flat->indices = (ADIO_Offset *) ADIOI_Malloc(flat->count * sizeof(ADIO_Offset));
70 }
71
72 curr_index = 0;
73 #ifdef HAVE_MPIR_TYPE_FLATTEN
74 flatten_idx = (MPI_Aint) flat->count;
75 MPIR_Type_flatten(datatype, flat->indices, flat->blocklens, &flatten_idx);
76 #ifdef FLATTEN_DEBUG
77 DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: MPIR_Type_flatten\n");
78 #endif
79 #else
80 ADIOI_Flatten(datatype, flat, 0, &curr_index);
81 #ifdef FLATTEN_DEBUG
82 DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: ADIOI_Flatten\n");
83 #endif
84
85 ADIOI_Optimize_flattened(flat);
86 #endif
87 /* debug */
88 #ifdef FLATTEN_DEBUG
89 {
90 int i;
91 for (i=0; i<flat->count; i++)
92 DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: i %#X, blocklens %#llX, indices %#llX\n",
93 i,
94 flat->blocklens[i],
95 flat->indices[i]
96 );
97 }
98 #endif
99
100 }
101
102 /* ADIOI_Flatten()
103 *
104 * Assumption: input datatype is not a basic!!!!
105 */
ADIOI_Flatten(MPI_Datatype datatype,ADIOI_Flatlist_node * flat,ADIO_Offset st_offset,int * curr_index)106 void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
107 ADIO_Offset st_offset, int *curr_index)
108 {
109 int i, j, k, m, n, num, basic_num, prev_index;
110 int combiner, old_combiner, old_is_contig;
111 int nints, nadds, ntypes, old_nints, old_nadds, old_ntypes;
112 /* By using ADIO_Offset we preserve +/- sign and
113 avoid >2G integer arithmetic problems */
114 ADIO_Offset top_count;
115 /* By using unsigned we avoid >2G integer arithmetic problems */
116 unsigned old_size;
117 MPI_Aint old_extent;/* Assume extents are non-negative */
118 int *ints;
119 MPI_Aint *adds; /* Make no assumptions about +/- sign on these */
120 MPI_Datatype *types;
121 MPI_Type_get_envelope(datatype, &nints, &nadds, &ntypes, &combiner);
122 ints = (int *) ADIOI_Malloc((nints+1)*sizeof(int));
123 adds = (MPI_Aint *) ADIOI_Malloc((nadds+1)*sizeof(MPI_Aint));
124 types = (MPI_Datatype *) ADIOI_Malloc((ntypes+1)*sizeof(MPI_Datatype));
125 MPI_Type_get_contents(datatype, nints, nadds, ntypes, ints, adds, types);
126
127 #ifdef FLATTEN_DEBUG
128 DBG_FPRINTF(stderr,"ADIOI_Flatten:: st_offset %#llX, curr_index %#X\n",st_offset,*curr_index);
129 DBG_FPRINTF(stderr,"ADIOI_Flatten:: nints %#X, nadds %#X, ntypes %#X\n",nints, nadds, ntypes);
130 for(i=0; i< nints; ++i)
131 {
132 DBG_FPRINTF(stderr,"ADIOI_Flatten:: ints[%d]=%#X\n",i,ints[i]);
133 }
134 for(i=0; i< nadds; ++i)
135 {
136 DBG_FPRINTF(stderr,"ADIOI_Flatten:: adds[%d]="MPI_AINT_FMT_HEX_SPEC"\n",i,adds[i]);
137 }
138 for(i=0; i< ntypes; ++i)
139 {
140 DBG_FPRINTF(stderr,"ADIOI_Flatten:: types[%d]=%#llX\n",i,(unsigned long long)(unsigned long)types[i]);
141 }
142 #endif
143 switch (combiner) {
144 #ifdef MPIIMPL_HAVE_MPI_COMBINER_DUP
145 case MPI_COMBINER_DUP:
146 #ifdef FLATTEN_DEBUG
147 DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DUP\n");
148 #endif
149 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
150 &old_ntypes, &old_combiner);
151 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
152 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
153 ADIOI_Flatten(types[0], flat, st_offset, curr_index);
154 break;
155 #endif
156 #ifdef MPIIMPL_HAVE_MPI_COMBINER_SUBARRAY
157 case MPI_COMBINER_SUBARRAY:
158 {
159 int dims = ints[0];
160 MPI_Datatype stype;
161 #ifdef FLATTEN_DEBUG
162 DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_SUBARRAY\n");
163 #endif
164
165 ADIO_Type_create_subarray(dims,
166 &ints[1], /* sizes */
167 &ints[dims+1], /* subsizes */
168 &ints[2*dims+1], /* starts */
169 ints[3*dims+1], /* order */
170 types[0], /* type */
171 &stype);
172 ADIOI_Flatten(stype, flat, st_offset, curr_index);
173 MPI_Type_free(&stype);
174 }
175 break;
176 #endif
177 #ifdef MPIIMPL_HAVE_MPI_COMBINER_DARRAY
178 case MPI_COMBINER_DARRAY:
179 {
180 int dims = ints[2];
181 MPI_Datatype dtype;
182 #ifdef FLATTEN_DEBUG
183 DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DARRAY\n");
184 #endif
185
186 ADIO_Type_create_darray(ints[0], /* size */
187 ints[1], /* rank */
188 dims,
189 &ints[3], /* gsizes */
190 &ints[dims+3], /* distribs */
191 &ints[2*dims+3], /* dargs */
192 &ints[3*dims+3], /* psizes */
193 ints[4*dims+3], /* order */
194 types[0],
195 &dtype);
196 #ifdef FLATTEN_DEBUG
197 DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DARRAY <ADIOI_Flatten(dtype, flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX, st_offset %#llX, curr_index %#X);\n",
198 0, flat->indices[0], 0, flat->blocklens[0], st_offset, *curr_index);
199 #endif
200 ADIOI_Flatten(dtype, flat, st_offset, curr_index);
201 #ifdef FLATTEN_DEBUG
202 DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DARRAY >ADIOI_Flatten(dtype, flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX, st_offset %#llX, curr_index %#X);\n",
203 0, flat->indices[0], 0, flat->blocklens[0], st_offset, *curr_index);
204 #endif
205 MPI_Type_free(&dtype);
206 }
207 break;
208 #endif
209 case MPI_COMBINER_CONTIGUOUS:
210 #ifdef FLATTEN_DEBUG
211 DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_CONTIGUOUS\n");
212 #endif
213 top_count = ints[0];
214 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
215 &old_ntypes, &old_combiner);
216 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
217
218 prev_index = *curr_index;
219 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
220 ADIOI_Flatten(types[0], flat, st_offset, curr_index);
221
222 if (prev_index == *curr_index) {
223 /* simplest case, made up of basic or contiguous types */
224 j = *curr_index;
225 flat->indices[j] = st_offset;
226 MPI_Type_size(types[0], (int*)&old_size);
227 flat->blocklens[j] = top_count * old_size;
228 #ifdef FLATTEN_DEBUG
229 DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",j, flat->indices[j], j, flat->blocklens[j]);
230 #endif
231 (*curr_index)++;
232 }
233 else {
234 /* made up of noncontiguous derived types */
235 j = *curr_index;
236 num = *curr_index - prev_index;
237
238 /* The noncontiguous types have to be replicated count times */
239 MPI_Type_extent(types[0], &old_extent);
240 for (m=1; m<top_count; m++) {
241 for (i=0; i<num; i++) {
242 flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
243 flat->blocklens[j] = flat->blocklens[j-num];
244 #ifdef FLATTEN_DEBUG
245 DBG_FPRINTF(stderr,"ADIOI_Flatten:: derived flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",j, flat->indices[j], j, flat->blocklens[j]);
246 #endif
247 j++;
248 }
249 }
250 *curr_index = j;
251 }
252 break;
253
254 case MPI_COMBINER_VECTOR:
255 #ifdef FLATTEN_DEBUG
256 DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_VECTOR\n");
257 #endif
258 top_count = ints[0];
259 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
260 &old_ntypes, &old_combiner);
261 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
262
263 prev_index = *curr_index;
264 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
265 ADIOI_Flatten(types[0], flat, st_offset, curr_index);
266
267 if (prev_index == *curr_index) {
268 /* simplest case, vector of basic or contiguous types */
269 /* By using ADIO_Offset we preserve +/- sign and
270 avoid >2G integer arithmetic problems */
271 ADIO_Offset blocklength = ints[1], stride = ints[2];
272 j = *curr_index;
273 flat->indices[j] = st_offset;
274 MPI_Type_size(types[0], (int*)&old_size);
275 flat->blocklens[j] = blocklength * old_size;
276 for (i=j+1; i<j+top_count; i++) {
277 flat->indices[i] = flat->indices[i-1] + stride * old_size;
278 flat->blocklens[i] = flat->blocklens[j];
279 }
280 *curr_index = i;
281 }
282 else {
283 /* vector of noncontiguous derived types */
284 /* By using ADIO_Offset we preserve +/- sign and
285 avoid >2G integer arithmetic problems */
286 ADIO_Offset blocklength = ints[1], stride = ints[2];
287
288 j = *curr_index;
289 num = *curr_index - prev_index;
290
291 /* The noncontiguous types have to be replicated blocklen times
292 and then strided. Replicate the first one. */
293 MPI_Type_extent(types[0], &old_extent);
294 for (m=1; m<blocklength; m++) {
295 for (i=0; i<num; i++) {
296 flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
297 flat->blocklens[j] = flat->blocklens[j-num];
298 j++;
299 }
300 }
301 *curr_index = j;
302
303 /* Now repeat with strides. */
304 num = *curr_index - prev_index;
305 for (i=1; i<top_count; i++) {
306 for (m=0; m<num; m++) {
307 flat->indices[j] = flat->indices[j-num] + stride * ADIOI_AINT_CAST_TO_OFFSET old_extent;
308 flat->blocklens[j] = flat->blocklens[j-num];
309 j++;
310 }
311 }
312 *curr_index = j;
313 }
314 break;
315
316 case MPI_COMBINER_HVECTOR:
317 case MPI_COMBINER_HVECTOR_INTEGER:
318 #ifdef FLATTEN_DEBUG
319 DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_HVECTOR_INTEGER\n");
320 #endif
321 top_count = ints[0];
322 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
323 &old_ntypes, &old_combiner);
324 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
325
326 prev_index = *curr_index;
327 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
328 ADIOI_Flatten(types[0], flat, st_offset, curr_index);
329
330 if (prev_index == *curr_index) {
331 /* simplest case, vector of basic or contiguous types */
332 /* By using ADIO_Offset we preserve +/- sign and
333 avoid >2G integer arithmetic problems */
334 ADIO_Offset blocklength = ints[1];
335 j = *curr_index;
336 flat->indices[j] = st_offset;
337 MPI_Type_size(types[0], (int*)&old_size);
338 flat->blocklens[j] = blocklength * old_size;
339 for (i=j+1; i<j+top_count; i++) {
340 flat->indices[i] = flat->indices[i-1] + adds[0];
341 flat->blocklens[i] = flat->blocklens[j];
342 }
343 *curr_index = i;
344 }
345 else {
346 /* vector of noncontiguous derived types */
347 /* By using ADIO_Offset we preserve +/- sign and
348 avoid >2G integer arithmetic problems */
349 ADIO_Offset blocklength = ints[1];
350
351 j = *curr_index;
352 num = *curr_index - prev_index;
353
354 /* The noncontiguous types have to be replicated blocklen times
355 and then strided. Replicate the first one. */
356 MPI_Type_extent(types[0], &old_extent);
357 for (m=1; m<blocklength; m++) {
358 for (i=0; i<num; i++) {
359 flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
360 flat->blocklens[j] = flat->blocklens[j-num];
361 j++;
362 }
363 }
364 *curr_index = j;
365
366 /* Now repeat with strides. */
367 num = *curr_index - prev_index;
368 for (i=1; i<top_count; i++) {
369 for (m=0; m<num; m++) {
370 flat->indices[j] = flat->indices[j-num] + adds[0];
371 flat->blocklens[j] = flat->blocklens[j-num];
372 j++;
373 }
374 }
375 *curr_index = j;
376 }
377 break;
378
379 case MPI_COMBINER_INDEXED:
380 #ifdef FLATTEN_DEBUG
381 DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_INDEXED\n");
382 #endif
383 top_count = ints[0];
384 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
385 &old_ntypes, &old_combiner);
386 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
387 MPI_Type_extent(types[0], &old_extent);
388
389 prev_index = *curr_index;
390 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
391 {
392 /* By using ADIO_Offset we preserve +/- sign and
393 avoid >2G integer arithmetic problems */
394 ADIO_Offset stride = ints[top_count+1];
395 ADIOI_Flatten(types[0], flat,
396 st_offset+stride* ADIOI_AINT_CAST_TO_OFFSET old_extent, curr_index);
397 }
398
399 if (prev_index == *curr_index) {
400 /* simplest case, indexed type made up of basic or contiguous types */
401 j = *curr_index;
402 for (i=j; i<j+top_count; i++) {
403 /* By using ADIO_Offset we preserve +/- sign and
404 avoid >2G integer arithmetic problems */
405 ADIO_Offset blocklength = ints[1+i-j], stride = ints[top_count+1+i-j];
406 flat->indices[i] = st_offset + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent;
407 flat->blocklens[i] = blocklength* ADIOI_AINT_CAST_TO_OFFSET old_extent;
408 }
409 *curr_index = i;
410 }
411 else {
412 /* indexed type made up of noncontiguous derived types */
413
414 j = *curr_index;
415 num = *curr_index - prev_index;
416 basic_num = num;
417
418 /* The noncontiguous types have to be replicated blocklens[i] times
419 and then strided. Replicate the first one. */
420 for (m=1; m<ints[1]; m++) {
421 for (i=0; i<num; i++) {
422 flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
423 flat->blocklens[j] = flat->blocklens[j-num];
424 j++;
425 }
426 }
427 *curr_index = j;
428
429 /* Now repeat with strides. */
430 for (i=1; i<top_count; i++) {
431 num = *curr_index - prev_index;
432 prev_index = *curr_index;
433 for (m=0; m<basic_num; m++) {
434 /* By using ADIO_Offset we preserve +/- sign and
435 avoid >2G integer arithmetic problems */
436 ADIO_Offset stride = ints[top_count+1+i]-ints[top_count+i];
437 flat->indices[j] = flat->indices[j-num] + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent;
438 flat->blocklens[j] = flat->blocklens[j-num];
439 j++;
440 }
441 *curr_index = j;
442 for (m=1; m<ints[1+i]; m++) {
443 for (k=0; k<basic_num; k++) {
444 flat->indices[j] = flat->indices[j-basic_num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
445 flat->blocklens[j] = flat->blocklens[j-basic_num];
446 j++;
447 }
448 }
449 *curr_index = j;
450 }
451 }
452 break;
453
454 /* FIXME: using the same code as indexed_block for
455 * hindexed_block doesn't look correct. Needs to be carefully
456 * looked into. */
457 case MPIX_COMBINER_HINDEXED_BLOCK:
458 case MPI_COMBINER_INDEXED_BLOCK:
459 #ifdef FLATTEN_DEBUG
460 DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_INDEXED_BLOCK\n");
461 #endif
462 top_count = ints[0];
463 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
464 &old_ntypes, &old_combiner);
465 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
466 MPI_Type_extent(types[0], &old_extent);
467
468 prev_index = *curr_index;
469 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
470 {
471 /* By using ADIO_Offset we preserve +/- sign and
472 avoid >2G integer arithmetic problems */
473 ADIO_Offset stride = ints[1+1];
474 ADIOI_Flatten(types[0], flat,
475 st_offset+stride* ADIOI_AINT_CAST_TO_OFFSET old_extent, curr_index);
476 }
477
478 if (prev_index == *curr_index) {
479 /* simplest case, indexed type made up of basic or contiguous types */
480 j = *curr_index;
481 for (i=j; i<j+top_count; i++) {
482 /* By using ADIO_Offset we preserve +/- sign and
483 avoid >2G integer arithmetic problems */
484 ADIO_Offset blocklength = ints[1], stride = ints[1+1+i-j];
485 flat->indices[i] = st_offset + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent;
486 flat->blocklens[i] = blocklength* ADIOI_AINT_CAST_TO_OFFSET old_extent;
487 }
488 *curr_index = i;
489 }
490 else {
491 /* vector of noncontiguous derived types */
492
493 j = *curr_index;
494 num = *curr_index - prev_index;
495
496 /* The noncontiguous types have to be replicated blocklens[i] times
497 and then strided. Replicate the first one. */
498 for (m=1; m<ints[1]; m++) {
499 for (i=0; i<num; i++) {
500 flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
501 flat->blocklens[j] = flat->blocklens[j-num];
502 j++;
503 }
504 }
505 *curr_index = j;
506
507 /* Now repeat with strides. */
508 num = *curr_index - prev_index;
509 for (i=1; i<top_count; i++) {
510 for (m=0; m<num; m++) {
511 /* By using ADIO_Offset we preserve +/- sign and
512 avoid >2G integer arithmetic problems */
513 ADIO_Offset stride = ints[2+i]-ints[1+i];
514 flat->indices[j] = flat->indices[j-num] + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent;
515 flat->blocklens[j] = flat->blocklens[j-num];
516 j++;
517 }
518 }
519 *curr_index = j;
520 }
521 break;
522
523 case MPI_COMBINER_HINDEXED:
524 case MPI_COMBINER_HINDEXED_INTEGER:
525 #ifdef FLATTEN_DEBUG
526 DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_HINDEXED_INTEGER\n");
527 #endif
528 top_count = ints[0];
529 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
530 &old_ntypes, &old_combiner);
531 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
532
533 prev_index = *curr_index;
534 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
535 {
536 ADIOI_Flatten(types[0], flat, st_offset+adds[0], curr_index);
537 }
538
539 if (prev_index == *curr_index) {
540 /* simplest case, indexed type made up of basic or contiguous types */
541 j = *curr_index;
542 MPI_Type_size(types[0], (int*)&old_size);
543 for (i=j; i<j+top_count; i++) {
544 /* By using ADIO_Offset we preserve +/- sign and
545 avoid >2G integer arithmetic problems */
546 ADIO_Offset blocklength = ints[1+i-j];
547 flat->indices[i] = st_offset + adds[i-j];
548 flat->blocklens[i] = blocklength*old_size;
549 }
550 *curr_index = i;
551 }
552 else {
553 /* indexed type made up of noncontiguous derived types */
554
555 j = *curr_index;
556 num = *curr_index - prev_index;
557 basic_num = num;
558
559 /* The noncontiguous types have to be replicated blocklens[i] times
560 and then strided. Replicate the first one. */
561 MPI_Type_extent(types[0], &old_extent);
562 for (m=1; m<ints[1]; m++) {
563 for (i=0; i<num; i++) {
564 flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
565 flat->blocklens[j] = flat->blocklens[j-num];
566 j++;
567 }
568 }
569 *curr_index = j;
570
571 /* Now repeat with strides. */
572 for (i=1; i<top_count; i++) {
573 num = *curr_index - prev_index;
574 prev_index = *curr_index;
575 for (m=0; m<basic_num; m++) {
576 flat->indices[j] = flat->indices[j-num] + adds[i] - adds[i-1];
577 flat->blocklens[j] = flat->blocklens[j-num];
578 j++;
579 }
580 *curr_index = j;
581 for (m=1; m<ints[1+i]; m++) {
582 for (k=0; k<basic_num; k++) {
583 flat->indices[j] = flat->indices[j-basic_num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
584 flat->blocklens[j] = flat->blocklens[j-basic_num];
585 j++;
586 }
587 }
588 *curr_index = j;
589 }
590 }
591 break;
592
593 case MPI_COMBINER_STRUCT:
594 case MPI_COMBINER_STRUCT_INTEGER:
595 #ifdef FLATTEN_DEBUG
596 DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_STRUCT_INTEGER\n");
597 #endif
598 top_count = ints[0];
599 for (n=0; n<top_count; n++) {
600 MPI_Type_get_envelope(types[n], &old_nints, &old_nadds,
601 &old_ntypes, &old_combiner);
602 ADIOI_Datatype_iscontig(types[n], &old_is_contig);
603
604 prev_index = *curr_index;
605 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
606 ADIOI_Flatten(types[n], flat, st_offset+adds[n], curr_index);
607
608 if (prev_index == *curr_index) {
609 /* simplest case, current type is basic or contiguous types */
610 /* By using ADIO_Offset we preserve +/- sign and
611 avoid >2G integer arithmetic problems */
612 ADIO_Offset blocklength = ints[1+n];
613 j = *curr_index;
614 flat->indices[j] = st_offset + adds[n];
615 MPI_Type_size(types[n], (int*)&old_size);
616 flat->blocklens[j] = blocklength * old_size;
617 #ifdef FLATTEN_DEBUG
618 DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",n,adds[n],j, flat->indices[j], j, flat->blocklens[j]);
619 #endif
620 (*curr_index)++;
621 }
622 else {
623 /* current type made up of noncontiguous derived types */
624
625 j = *curr_index;
626 num = *curr_index - prev_index;
627
628 /* The current type has to be replicated blocklens[n] times */
629 MPI_Type_extent(types[n], &old_extent);
630 for (m=1; m<ints[1+n]; m++) {
631 for (i=0; i<num; i++) {
632 flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
633 flat->blocklens[j] = flat->blocklens[j-num];
634 #ifdef FLATTEN_DEBUG
635 DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple old_extent "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",old_extent,j, flat->indices[j], j, flat->blocklens[j]);
636 #endif
637 j++;
638 }
639 }
640 *curr_index = j;
641 }
642 }
643 break;
644
645 case MPI_COMBINER_RESIZED:
646 #ifdef FLATTEN_DEBUG
647 DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_RESIZED\n");
648 #endif
649
650 /* This is done similar to a type_struct with an lb, datatype, ub */
651
652 /* handle the Lb */
653 j = *curr_index;
654 flat->indices[j] = st_offset + adds[0];
655 flat->blocklens[j] = 0;
656
657 #ifdef FLATTEN_DEBUG
658 DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",0,adds[0],j, flat->indices[j], j, flat->blocklens[j]);
659 #endif
660
661 (*curr_index)++;
662
663 /* handle the datatype */
664
665 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
666 &old_ntypes, &old_combiner);
667 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
668
669 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) {
670 ADIOI_Flatten(types[0], flat, st_offset+adds[0], curr_index);
671 }
672 else {
673 /* current type is basic or contiguous */
674 j = *curr_index;
675 flat->indices[j] = st_offset;
676 MPI_Type_size(types[0], (int*)&old_size);
677 flat->blocklens[j] = old_size;
678
679 #ifdef FLATTEN_DEBUG
680 DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",0,adds[0],j, flat->indices[j], j, flat->blocklens[j]);
681 #endif
682
683 (*curr_index)++;
684 }
685
686 /* take care of the extent as a UB */
687 j = *curr_index;
688 flat->indices[j] = st_offset + adds[0] + adds[1];
689 flat->blocklens[j] = 0;
690
691 #ifdef FLATTEN_DEBUG
692 DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",1,adds[1],j, flat->indices[j], j, flat->blocklens[j]);
693 #endif
694
695 (*curr_index)++;
696
697 break;
698
699 default:
700 /* TODO: FIXME (requires changing prototypes to return errors...) */
701 DBG_FPRINTF(stderr, "Error: Unsupported datatype passed to ADIOI_Flatten\n");
702 MPI_Abort(MPI_COMM_WORLD, 1);
703 }
704
705 #ifndef MPISGI
706 /* There is a bug in SGI's impl. of MPI_Type_get_contents. It doesn't
707 return new datatypes. Therefore no need to free. */
708 for (i=0; i<ntypes; i++) {
709 MPI_Type_get_envelope(types[i], &old_nints, &old_nadds, &old_ntypes,
710 &old_combiner);
711 if (old_combiner != MPI_COMBINER_NAMED) MPI_Type_free(types+i);
712 }
713 #endif
714
715 ADIOI_Free(ints);
716 ADIOI_Free(adds);
717 ADIOI_Free(types);
718
719 #ifdef FLATTEN_DEBUG
720 DBG_FPRINTF(stderr,"ADIOI_Flatten:: return st_offset %#llX, curr_index %#X\n",st_offset,*curr_index);
721 #endif
722
723 }
724
725 /********************************************************/
726
727 /* ADIOI_Count_contiguous_blocks
728 *
729 * Returns number of contiguous blocks in type, and also updates
730 * curr_index to reflect the space for the additional blocks.
731 *
732 * ASSUMES THAT TYPE IS NOT A BASIC!!!
733 */
ADIOI_Count_contiguous_blocks(MPI_Datatype datatype,int * curr_index)734 int ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, int *curr_index)
735 {
736 #ifdef HAVE_MPIR_TYPE_GET_CONTIG_BLOCKS
737 /* MPICH2 can get us this value without all the envelope/contents calls */
738 int blks;
739 MPIR_Type_get_contig_blocks(datatype, &blks);
740 *curr_index = blks;
741 return blks;
742 #else
743 int count=0, i, n, num, basic_num, prev_index;
744 int top_count, combiner, old_combiner, old_is_contig;
745 int nints, nadds, ntypes, old_nints, old_nadds, old_ntypes;
746 int *ints;
747 MPI_Aint *adds; /* Make no assumptions about +/- sign on these */
748 MPI_Datatype *types;
749
750 MPI_Type_get_envelope(datatype, &nints, &nadds, &ntypes, &combiner);
751 ints = (int *) ADIOI_Malloc((nints+1)*sizeof(int));
752 adds = (MPI_Aint *) ADIOI_Malloc((nadds+1)*sizeof(MPI_Aint));
753 types = (MPI_Datatype *) ADIOI_Malloc((ntypes+1)*sizeof(MPI_Datatype));
754 MPI_Type_get_contents(datatype, nints, nadds, ntypes, ints, adds, types);
755
756 switch (combiner) {
757 #ifdef MPIIMPL_HAVE_MPI_COMBINER_DUP
758 case MPI_COMBINER_DUP:
759 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
760 &old_ntypes, &old_combiner);
761 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
762 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
763 count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
764 else {
765 count = 1;
766 (*curr_index)++;
767 }
768 break;
769 #endif
770 #ifdef MPIIMPL_HAVE_MPI_COMBINER_SUBARRAY
771 case MPI_COMBINER_SUBARRAY:
772 {
773 int dims = ints[0];
774 MPI_Datatype stype;
775
776 ADIO_Type_create_subarray(dims,
777 &ints[1], /* sizes */
778 &ints[dims+1], /* subsizes */
779 &ints[2*dims+1], /* starts */
780 ints[3*dims+1], /* order */
781 types[0], /* type */
782 &stype);
783 count = ADIOI_Count_contiguous_blocks(stype, curr_index);
784 /* curr_index will have already been updated; just pass
785 * count back up.
786 */
787 MPI_Type_free(&stype);
788
789 }
790 break;
791 #endif
792 #ifdef MPIIMPL_HAVE_MPI_COMBINER_DARRAY
793 case MPI_COMBINER_DARRAY:
794 {
795 int dims = ints[2];
796 MPI_Datatype dtype;
797
798 ADIO_Type_create_darray(ints[0], /* size */
799 ints[1], /* rank */
800 dims,
801 &ints[3], /* gsizes */
802 &ints[dims+3], /* distribs */
803 &ints[2*dims+3], /* dargs */
804 &ints[3*dims+3], /* psizes */
805 ints[4*dims+3], /* order */
806 types[0],
807 &dtype);
808 count = ADIOI_Count_contiguous_blocks(dtype, curr_index);
809 /* curr_index will have already been updated; just pass
810 * count back up.
811 */
812 MPI_Type_free(&dtype);
813 }
814 break;
815 #endif
816 case MPI_COMBINER_CONTIGUOUS:
817 top_count = ints[0];
818 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
819 &old_ntypes, &old_combiner);
820 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
821
822 prev_index = *curr_index;
823 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
824 count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
825 else count = 1;
826
827 if (prev_index == *curr_index)
828 /* simplest case, made up of basic or contiguous types */
829 (*curr_index)++;
830 else {
831 /* made up of noncontiguous derived types */
832 num = *curr_index - prev_index;
833 count *= top_count;
834 *curr_index += (top_count - 1)*num;
835 }
836 break;
837
838 case MPI_COMBINER_VECTOR:
839 case MPI_COMBINER_HVECTOR:
840 case MPI_COMBINER_HVECTOR_INTEGER:
841 top_count = ints[0];
842 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
843 &old_ntypes, &old_combiner);
844 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
845
846 prev_index = *curr_index;
847 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
848 count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
849 else count = 1;
850
851 if (prev_index == *curr_index) {
852 /* simplest case, vector of basic or contiguous types */
853 count = top_count;
854 *curr_index += count;
855 }
856 else {
857 /* vector of noncontiguous derived types */
858 num = *curr_index - prev_index;
859
860 /* The noncontiguous types have to be replicated blocklen times
861 and then strided. */
862 count *= ints[1] * top_count;
863
864 /* First one */
865 *curr_index += (ints[1] - 1)*num;
866
867 /* Now repeat with strides. */
868 num = *curr_index - prev_index;
869 *curr_index += (top_count - 1)*num;
870 }
871 break;
872
873 case MPI_COMBINER_INDEXED:
874 case MPI_COMBINER_HINDEXED:
875 case MPI_COMBINER_HINDEXED_INTEGER:
876 top_count = ints[0];
877 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
878 &old_ntypes, &old_combiner);
879 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
880
881 prev_index = *curr_index;
882 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
883 count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
884 else count = 1;
885
886 if (prev_index == *curr_index) {
887 /* simplest case, indexed type made up of basic or contiguous types */
888 count = top_count;
889 *curr_index += count;
890 }
891 else {
892 /* indexed type made up of noncontiguous derived types */
893 basic_num = *curr_index - prev_index;
894
895 /* The noncontiguous types have to be replicated blocklens[i] times
896 and then strided. */
897 *curr_index += (ints[1]-1) * basic_num;
898 count *= ints[1];
899
900 /* Now repeat with strides. */
901 for (i=1; i<top_count; i++) {
902 count += ints[1+i] * basic_num;
903 *curr_index += ints[1+i] * basic_num;
904 }
905 }
906 break;
907
908 case MPI_COMBINER_INDEXED_BLOCK:
909 top_count = ints[0];
910 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
911 &old_ntypes, &old_combiner);
912 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
913
914 prev_index = *curr_index;
915 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
916 count = ADIOI_Count_contiguous_blocks(types[0], curr_index);
917 else count = 1;
918
919 if (prev_index == *curr_index) {
920 /* simplest case, indexed type made up of basic or contiguous types */
921 count = top_count;
922 *curr_index += count;
923 }
924 else {
925 /* indexed type made up of noncontiguous derived types */
926 basic_num = *curr_index - prev_index;
927
928 /* The noncontiguous types have to be replicated blocklens[i] times
929 and then strided. */
930 *curr_index += (ints[1]-1) * basic_num;
931 count *= ints[1];
932
933 /* Now repeat with strides. */
934 *curr_index += (top_count-1) * count;
935 count *= top_count;
936 }
937 break;
938
939 case MPI_COMBINER_STRUCT:
940 case MPI_COMBINER_STRUCT_INTEGER:
941 top_count = ints[0];
942 count = 0;
943 for (n=0; n<top_count; n++) {
944 MPI_Type_get_envelope(types[n], &old_nints, &old_nadds,
945 &old_ntypes, &old_combiner);
946 ADIOI_Datatype_iscontig(types[n], &old_is_contig);
947
948 prev_index = *curr_index;
949 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
950 count += ADIOI_Count_contiguous_blocks(types[n], curr_index);
951
952 if (prev_index == *curr_index) {
953 /* simplest case, current type is basic or contiguous types */
954 count++;
955 (*curr_index)++;
956 }
957 else {
958 /* current type made up of noncontiguous derived types */
959 /* The current type has to be replicated blocklens[n] times */
960
961 num = *curr_index - prev_index;
962 count += (ints[1+n]-1)*num;
963 (*curr_index) += (ints[1+n]-1)*num;
964 }
965 }
966 break;
967
968 case MPI_COMBINER_RESIZED:
969 /* treat it as a struct with lb, type, ub */
970
971 /* add 2 for lb and ub */
972 (*curr_index) += 2;
973 count += 2;
974
975 /* add for datatype */
976 MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
977 &old_ntypes, &old_combiner);
978 ADIOI_Datatype_iscontig(types[0], &old_is_contig);
979
980 if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) {
981 count += ADIOI_Count_contiguous_blocks(types[0], curr_index);
982 }
983 else {
984 /* basic or contiguous type */
985 count++;
986 (*curr_index)++;
987 }
988 break;
989
990 default:
991 /* TODO: FIXME */
992 DBG_FPRINTF(stderr, "Error: Unsupported datatype passed to ADIOI_Count_contiguous_blocks, combiner = %d\n", combiner);
993 MPI_Abort(MPI_COMM_WORLD, 1);
994 }
995
996 #ifndef MPISGI
997 /* There is a bug in SGI's impl. of MPI_Type_get_contents. It doesn't
998 return new datatypes. Therefore no need to free. */
999 for (i=0; i<ntypes; i++) {
1000 MPI_Type_get_envelope(types[i], &old_nints, &old_nadds, &old_ntypes,
1001 &old_combiner);
1002 if (old_combiner != MPI_COMBINER_NAMED) MPI_Type_free(types+i);
1003 }
1004 #endif
1005
1006 ADIOI_Free(ints);
1007 ADIOI_Free(adds);
1008 ADIOI_Free(types);
1009 return count;
1010 #endif /* HAVE_MPIR_TYPE_GET_CONTIG_BLOCKS */
1011 }
1012
1013 /* removezeros() make a second pass over the
1014 * flattented type knocking out zero-length blocks, but leave first and last
1015 * alone (they mark LB and UB) */
1016
removezeros(ADIOI_Flatlist_node * flat_type)1017 static void removezeros(ADIOI_Flatlist_node *flat_type)
1018 {
1019 int i,j,opt_blocks;
1020 ADIO_Offset *opt_blocklens;
1021 ADIO_Offset *opt_indices;
1022
1023 /* short-circuit: there is nothing to do if there are
1024 * - 1 block: what can we remove?
1025 * - 2 blocks: either both blocks are data (and not zero)
1026 * or one block is the UB or LB */
1027 if (flat_type->count <= 2) return;
1028
1029 opt_blocks = 2; /* LB and UB */
1030 for (i=1; i < flat_type->count -1; i++) {
1031 if(flat_type->blocklens[i] != 0)
1032 opt_blocks++;
1033 }
1034 /* no optimization possible */
1035 if (opt_blocks == flat_type->count) return;
1036 opt_blocklens = (ADIO_Offset *) ADIOI_Malloc(opt_blocks * sizeof(ADIO_Offset));
1037 opt_indices = (ADIO_Offset *)ADIOI_Malloc(opt_blocks*sizeof(ADIO_Offset));
1038
1039 /* fill in new blocklists, keeping first and last no matter what */
1040 opt_blocklens[0] = flat_type->blocklens[0];
1041 opt_indices[0] = flat_type->indices[0];
1042 j = 1; /* always two entries: one for LB and UB ([0] and [j])*/
1043 for (i=1; i< flat_type->count -1; i++) {
1044 if( flat_type->blocklens[i] != 0) {
1045 opt_indices[j] = flat_type->indices[i];
1046 opt_blocklens[j] = flat_type->blocklens[i];
1047 j++;
1048 }
1049 }
1050 opt_indices[j] = flat_type->indices[flat_type->count -1];
1051 opt_blocklens[j] = flat_type->blocklens[flat_type->count -1];
1052
1053 flat_type->count = opt_blocks;
1054 ADIOI_Free(flat_type->blocklens);
1055 ADIOI_Free(flat_type->indices);
1056 flat_type->blocklens = opt_blocklens;
1057 flat_type->indices = opt_indices;
1058 return;
1059 }
1060
1061 /****************************************************************/
1062
1063 /* ADIOI_Optimize_flattened()
1064 *
1065 * Scans the blocks of a flattened type and merges adjacent blocks
1066 * together, resulting in a shorter blocklist (and thus fewer
1067 * contiguous operations).
1068 *
1069 * NOTE: a further optimization would be to remove zero length blocks. However,
1070 * the first and last blocks must remain as zero length first or last block
1071 * indicates UB and LB.
1072 *
1073 */
ADIOI_Optimize_flattened(ADIOI_Flatlist_node * flat_type)1074 void ADIOI_Optimize_flattened(ADIOI_Flatlist_node *flat_type)
1075 {
1076 int i, j, opt_blocks;
1077 ADIO_Offset *opt_blocklens;
1078 ADIO_Offset *opt_indices;
1079
1080 opt_blocks = 1;
1081
1082 /* save number of noncontiguous blocks in opt_blocks */
1083 for (i=0; i < (flat_type->count - 1); i++) {
1084 if ((flat_type->indices[i] + flat_type->blocklens[i] !=
1085 flat_type->indices[i + 1]))
1086 opt_blocks++;
1087 }
1088
1089 /* if we can't reduce the number of blocks, quit now */
1090 if (opt_blocks == flat_type->count) return;
1091
1092 opt_blocklens = (ADIO_Offset *) ADIOI_Malloc(opt_blocks * sizeof(ADIO_Offset));
1093 opt_indices = (ADIO_Offset *)ADIOI_Malloc(opt_blocks*sizeof(ADIO_Offset));
1094
1095 /* fill in new blocklists */
1096 opt_blocklens[0] = flat_type->blocklens[0];
1097 opt_indices[0] = flat_type->indices[0];
1098 j = 0;
1099 for (i=0; i < (flat_type->count - 1); i++) {
1100 if ((flat_type->indices[i] + flat_type->blocklens[i] ==
1101 flat_type->indices[i + 1]))
1102 opt_blocklens[j] += flat_type->blocklens[i + 1];
1103 else {
1104 j++;
1105 opt_indices[j] = flat_type->indices[i + 1];
1106 opt_blocklens[j] = flat_type->blocklens[i + 1];
1107 }
1108 }
1109 flat_type->count = opt_blocks;
1110 ADIOI_Free(flat_type->blocklens);
1111 ADIOI_Free(flat_type->indices);
1112 flat_type->blocklens = opt_blocklens;
1113 flat_type->indices = opt_indices;
1114 removezeros(flat_type);
1115 return;
1116 }
1117
ADIOI_Delete_flattened(MPI_Datatype datatype)1118 void ADIOI_Delete_flattened(MPI_Datatype datatype)
1119 {
1120 ADIOI_Flatlist_node *flat, *prev;
1121
1122 prev = flat = ADIOI_Flatlist;
1123 while (flat && (flat->type != datatype)) {
1124 prev = flat;
1125 flat = flat->next;
1126 }
1127 if (flat) {
1128 prev->next = flat->next;
1129 if (flat->blocklens) ADIOI_Free(flat->blocklens);
1130 if (flat->indices) ADIOI_Free(flat->indices);
1131 ADIOI_Free(flat);
1132 }
1133 }
1134