1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2
3 /*
4 * (C) 2001 by Argonne National Laboratory.
5 * See COPYRIGHT in top-level directory.
6 */
7
8 #include <stdlib.h>
9 #include <limits.h>
10
11 #include "./dataloop.h"
12
13 static void DLOOP_Dataloop_create_named(MPI_Datatype type,
14 DLOOP_Dataloop **dlp_p,
15 int *dlsz_p,
16 int *dldepth_p,
17 int flag);
18
PREPEND_PREFIX(Dataloop_create)19 void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type,
20 DLOOP_Dataloop **dlp_p,
21 int *dlsz_p,
22 int *dldepth_p,
23 int flag)
24 {
25 int i;
26 int err;
27
28 int nr_ints, nr_aints, nr_types, combiner;
29 MPI_Datatype *types;
30 int *ints;
31 MPI_Aint *aints;
32
33 DLOOP_Dataloop *old_dlp;
34 int old_dlsz, old_dldepth;
35
36 int dummy1, dummy2, dummy3, type0_combiner, ndims;
37 MPI_Datatype tmptype;
38
39 MPI_Aint stride;
40 MPI_Aint *disps;
41
42 MPIR_Type_get_envelope_impl(type, &nr_ints, &nr_aints, &nr_types, &combiner);
43
44 /* some named types do need dataloops; handle separately. */
45 if (combiner == MPI_COMBINER_NAMED) {
46 DLOOP_Dataloop_create_named(type, dlp_p, dlsz_p, dldepth_p, flag);
47 return;
48 }
49 else if (combiner == MPI_COMBINER_F90_REAL ||
50 combiner == MPI_COMBINER_F90_COMPLEX ||
51 combiner == MPI_COMBINER_F90_INTEGER)
52 {
53 MPI_Datatype f90basetype;
54 DLOOP_Handle_get_basic_type_macro(type, f90basetype);
55 PREPEND_PREFIX(Dataloop_create_contiguous)(1 /* count */,
56 f90basetype,
57 dlp_p, dlsz_p,
58 dldepth_p,
59 flag);
60 return;
61 }
62
63 /* Q: should we also check for "hasloop", or is the COMBINER
64 * check above enough to weed out everything that wouldn't
65 * have a loop?
66 */
67 DLOOP_Handle_get_loopptr_macro(type, old_dlp, flag);
68 if (old_dlp != NULL) {
69 /* dataloop already created; just return it. */
70 *dlp_p = old_dlp;
71 DLOOP_Handle_get_loopsize_macro(type, *dlsz_p, flag);
72 DLOOP_Handle_get_loopdepth_macro(type, *dldepth_p, flag);
73 return;
74 }
75
76 PREPEND_PREFIX(Type_access_contents)(type, &ints, &aints, &types);
77
78 /* first check for zero count on types where that makes sense */
79 switch(combiner) {
80 case MPI_COMBINER_CONTIGUOUS:
81 case MPI_COMBINER_VECTOR:
82 case MPI_COMBINER_HVECTOR_INTEGER:
83 case MPI_COMBINER_HVECTOR:
84 case MPI_COMBINER_INDEXED_BLOCK:
85 case MPIX_COMBINER_HINDEXED_BLOCK:
86 case MPI_COMBINER_INDEXED:
87 case MPI_COMBINER_HINDEXED_INTEGER:
88 case MPI_COMBINER_HINDEXED:
89 case MPI_COMBINER_STRUCT_INTEGER:
90 case MPI_COMBINER_STRUCT:
91 if (ints[0] == 0) {
92 PREPEND_PREFIX(Dataloop_create_contiguous)(0,
93 MPI_INT,
94 dlp_p,
95 dlsz_p,
96 dldepth_p,
97 flag);
98 goto clean_exit;
99 }
100 break;
101 default:
102 break;
103 }
104
105 /* recurse, processing types "below" this one before processing
106 * this one, if those type don't already have dataloops.
107 *
108 * note: in the struct case below we'll handle any additional
109 * types "below" the current one.
110 */
111 MPIR_Type_get_envelope_impl(types[0], &dummy1, &dummy2, &dummy3, &type0_combiner);
112 if (type0_combiner != MPI_COMBINER_NAMED)
113 {
114 DLOOP_Handle_get_loopptr_macro(types[0], old_dlp, flag);
115 if (old_dlp == NULL)
116 {
117 /* no dataloop already present; create and store one */
118 PREPEND_PREFIX(Dataloop_create)(types[0],
119 &old_dlp,
120 &old_dlsz,
121 &old_dldepth,
122 flag);
123
124 DLOOP_Handle_set_loopptr_macro(types[0], old_dlp, flag);
125 DLOOP_Handle_set_loopsize_macro(types[0], old_dlsz, flag);
126 DLOOP_Handle_set_loopdepth_macro(types[0], old_dldepth, flag);
127 }
128 else {
129 DLOOP_Handle_get_loopsize_macro(types[0], old_dlsz, flag);
130 DLOOP_Handle_get_loopdepth_macro(types[0], old_dldepth, flag);
131 }
132 }
133
134 switch(combiner)
135 {
136 case MPI_COMBINER_DUP:
137 if (type0_combiner != MPI_COMBINER_NAMED) {
138 PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p);
139 *dlsz_p = old_dlsz;
140 *dldepth_p = old_dldepth;
141 }
142 else {
143 PREPEND_PREFIX(Dataloop_create_contiguous)(1,
144 types[0],
145 dlp_p, dlsz_p,
146 dldepth_p,
147 flag);
148 }
149 break;
150 case MPI_COMBINER_RESIZED:
151 if (type0_combiner != MPI_COMBINER_NAMED) {
152 PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p);
153 *dlsz_p = old_dlsz;
154 *dldepth_p = old_dldepth;
155 }
156 else {
157 PREPEND_PREFIX(Dataloop_create_contiguous)(1,
158 types[0],
159 dlp_p, dlsz_p,
160 dldepth_p,
161 flag);
162
163 (*dlp_p)->el_extent = aints[1]; /* extent */
164 }
165 break;
166 case MPI_COMBINER_CONTIGUOUS:
167 PREPEND_PREFIX(Dataloop_create_contiguous)(ints[0] /* count */,
168 types[0] /* oldtype */,
169 dlp_p, dlsz_p,
170 dldepth_p,
171 flag);
172 break;
173 case MPI_COMBINER_VECTOR:
174 PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */,
175 ints[1] /* blklen */,
176 ints[2] /* stride */,
177 0 /* stride not bytes */,
178 types[0] /* oldtype */,
179 dlp_p, dlsz_p, dldepth_p,
180 flag);
181 break;
182 case MPI_COMBINER_HVECTOR_INTEGER:
183 case MPI_COMBINER_HVECTOR:
184 /* fortran hvector has integer stride in bytes */
185 if (combiner == MPI_COMBINER_HVECTOR_INTEGER) {
186 stride = (MPI_Aint) ints[2];
187 }
188 else {
189 stride = aints[0];
190 }
191
192 PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */,
193 ints[1] /* blklen */,
194 stride,
195 1 /* stride in bytes */,
196 types[0] /* oldtype */,
197 dlp_p, dlsz_p, dldepth_p,
198 flag);
199 break;
200 case MPI_COMBINER_INDEXED_BLOCK:
201 PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */,
202 ints[1] /* blklen */,
203 &ints[2] /* disps */,
204 0 /* disp not bytes */,
205 types[0] /* oldtype */,
206 dlp_p, dlsz_p,
207 dldepth_p,
208 flag);
209 break;
210 case MPIX_COMBINER_HINDEXED_BLOCK:
211 disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));
212 for (i = 0; i < ints[0]; i++)
213 disps[i] = aints[i];
214 PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */,
215 ints[1] /* blklen */,
216 disps /* disps */,
217 1 /* disp is bytes */,
218 types[0] /* oldtype */,
219 dlp_p, dlsz_p,
220 dldepth_p,
221 flag);
222 DLOOP_Free(disps);
223 break;
224 case MPI_COMBINER_INDEXED:
225 PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */,
226 &ints[1] /* blklens */,
227 &ints[ints[0]+1] /* disp */,
228 0 /* disp not in bytes */,
229 types[0] /* oldtype */,
230 dlp_p, dlsz_p, dldepth_p,
231 flag);
232 break;
233 case MPI_COMBINER_HINDEXED_INTEGER:
234 case MPI_COMBINER_HINDEXED:
235 if (combiner == MPI_COMBINER_HINDEXED_INTEGER) {
236 disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));
237
238 for (i=0; i < ints[0]; i++) {
239 disps[i] = (MPI_Aint) ints[ints[0] + 1 + i];
240 }
241 }
242 else {
243 disps = aints;
244 }
245
246 PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */,
247 &ints[1] /* blklens */,
248 disps,
249 1 /* disp in bytes */,
250 types[0] /* oldtype */,
251 dlp_p, dlsz_p, dldepth_p,
252 flag);
253
254 if (combiner == MPI_COMBINER_HINDEXED_INTEGER) {
255 DLOOP_Free(disps);
256 }
257
258 break;
259 case MPI_COMBINER_STRUCT_INTEGER:
260 case MPI_COMBINER_STRUCT:
261 for (i = 1; i < ints[0]; i++) {
262 int type_combiner;
263 MPIR_Type_get_envelope_impl(types[i], &dummy1, &dummy2, &dummy3, &type_combiner);
264
265 if (type_combiner != MPI_COMBINER_NAMED) {
266 DLOOP_Handle_get_loopptr_macro(types[i], old_dlp, flag);
267 if (old_dlp == NULL)
268 {
269 PREPEND_PREFIX(Dataloop_create)(types[i],
270 &old_dlp,
271 &old_dlsz,
272 &old_dldepth,
273 flag);
274
275 DLOOP_Handle_set_loopptr_macro(types[i], old_dlp,
276 flag);
277 DLOOP_Handle_set_loopsize_macro(types[i], old_dlsz,
278 flag);
279 DLOOP_Handle_set_loopdepth_macro(types[i], old_dldepth,
280 flag);
281 }
282 }
283 }
284 if (combiner == MPI_COMBINER_STRUCT_INTEGER) {
285 disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));
286
287 for (i=0; i < ints[0]; i++) {
288 disps[i] = (MPI_Aint) ints[ints[0] + 1 + i];
289 }
290 }
291 else {
292 disps = aints;
293 }
294
295 err = PREPEND_PREFIX(Dataloop_create_struct)(ints[0] /* count */,
296 &ints[1] /* blklens */,
297 disps,
298 types /* oldtype array */,
299 dlp_p, dlsz_p, dldepth_p,
300 flag);
301 /* TODO if/when this function returns error codes, propagate this failure instead */
302 DLOOP_Assert(0 == err);
303 /* if (err) return err; */
304
305 if (combiner == MPI_COMBINER_STRUCT_INTEGER) {
306 DLOOP_Free(disps);
307 }
308 break;
309 case MPI_COMBINER_SUBARRAY:
310 ndims = ints[0];
311 PREPEND_PREFIX(Type_convert_subarray)(ndims,
312 &ints[1] /* sizes */,
313 &ints[1+ndims] /* subsizes */,
314 &ints[1+2*ndims] /* starts */,
315 ints[1+3*ndims] /* order */,
316 types[0],
317 &tmptype);
318
319 PREPEND_PREFIX(Dataloop_create)(tmptype,
320 dlp_p,
321 dlsz_p,
322 dldepth_p,
323 flag);
324
325 MPIR_Type_free_impl(&tmptype);
326 break;
327 case MPI_COMBINER_DARRAY:
328 ndims = ints[2];
329 PREPEND_PREFIX(Type_convert_darray)(ints[0] /* size */,
330 ints[1] /* rank */,
331 ndims,
332 &ints[3] /* gsizes */,
333 &ints[3+ndims] /*distribs */,
334 &ints[3+2*ndims] /* dargs */,
335 &ints[3+3*ndims] /* psizes */,
336 ints[3+4*ndims] /* order */,
337 types[0],
338 &tmptype);
339
340 PREPEND_PREFIX(Dataloop_create)(tmptype,
341 dlp_p,
342 dlsz_p,
343 dldepth_p,
344 flag);
345
346 MPIR_Type_free_impl(&tmptype);
347 break;
348 default:
349 DLOOP_Assert(0);
350 break;
351 }
352
353 clean_exit:
354
355 PREPEND_PREFIX(Type_release_contents)(type, &ints, &aints, &types);
356
357 /* for now we just leave the intermediate dataloops in place.
358 * could remove them to save space if we wanted.
359 */
360
361 return;
362 }
363
364 /*@
365 DLOOP_Dataloop_create_named - create a dataloop for a "named" type
366 if necessary.
367
368 "named" types are ones for which MPI_Type_get_envelope() returns a
369 combiner of MPI_COMBINER_NAMED. some types that fit this category,
370 such as MPI_SHORT_INT, have multiple elements with potential gaps
371 and padding. these types need dataloops for correct processing.
372 @*/
DLOOP_Dataloop_create_named(MPI_Datatype type,DLOOP_Dataloop ** dlp_p,int * dlsz_p,int * dldepth_p,int flag)373 static void DLOOP_Dataloop_create_named(MPI_Datatype type,
374 DLOOP_Dataloop **dlp_p,
375 int *dlsz_p,
376 int *dldepth_p,
377 int flag)
378 {
379 DLOOP_Dataloop *dlp;
380
381 /* special case: pairtypes need dataloops too.
382 *
383 * note: not dealing with MPI_2INT because size == extent
384 * in all cases for that type.
385 *
386 * note: MPICH2 always precreates these, so we will never call
387 * Dataloop_create_pairtype() from here in the MPICH2
388 * case.
389 */
390 if (type == MPI_FLOAT_INT || type == MPI_DOUBLE_INT ||
391 type == MPI_LONG_INT || type == MPI_SHORT_INT ||
392 type == MPI_LONG_DOUBLE_INT)
393 {
394 DLOOP_Handle_get_loopptr_macro(type, dlp, flag);
395 if (dlp != NULL) {
396 /* dataloop already created; just return it. */
397 *dlp_p = dlp;
398 DLOOP_Handle_get_loopsize_macro(type, *dlsz_p, flag);
399 DLOOP_Handle_get_loopdepth_macro(type, *dldepth_p, flag);
400 }
401 else {
402 PREPEND_PREFIX(Dataloop_create_pairtype)(type,
403 dlp_p,
404 dlsz_p,
405 dldepth_p,
406 flag);
407 }
408 return;
409 }
410 /* no other combiners need dataloops; exit. */
411 else {
412 *dlp_p = NULL;
413 *dlsz_p = 0;
414 *dldepth_p = 0;
415 return;
416 }
417 }
418