1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 
3 /*
4  *  (C) 2001 by Argonne National Laboratory.
5  *      See COPYRIGHT in top-level directory.
6  */
7 
8 #include <stdlib.h>
9 #include <limits.h>
10 
11 #include "./dataloop.h"
12 
13 static void DLOOP_Dataloop_create_named(MPI_Datatype type,
14 					DLOOP_Dataloop **dlp_p,
15 					int *dlsz_p,
16 					int *dldepth_p,
17 					int flag);
18 
PREPEND_PREFIX(Dataloop_create)19 void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type,
20 				     DLOOP_Dataloop **dlp_p,
21 				     int *dlsz_p,
22 				     int *dldepth_p,
23 				     int flag)
24 {
25     int i;
26     int err;
27 
28     int nr_ints, nr_aints, nr_types, combiner;
29     MPI_Datatype *types;
30     int *ints;
31     MPI_Aint *aints;
32 
33     DLOOP_Dataloop *old_dlp;
34     int old_dlsz, old_dldepth;
35 
36     int dummy1, dummy2, dummy3, type0_combiner, ndims;
37     MPI_Datatype tmptype;
38 
39     MPI_Aint stride;
40     MPI_Aint *disps;
41 
42     MPIR_Type_get_envelope_impl(type, &nr_ints, &nr_aints, &nr_types, &combiner);
43 
44     /* some named types do need dataloops; handle separately. */
45     if (combiner == MPI_COMBINER_NAMED) {
46 	DLOOP_Dataloop_create_named(type, dlp_p, dlsz_p, dldepth_p, flag);
47 	return;
48     }
49     else if (combiner == MPI_COMBINER_F90_REAL ||
50              combiner == MPI_COMBINER_F90_COMPLEX ||
51              combiner == MPI_COMBINER_F90_INTEGER)
52     {
53         MPI_Datatype f90basetype;
54         DLOOP_Handle_get_basic_type_macro(type, f90basetype);
55         PREPEND_PREFIX(Dataloop_create_contiguous)(1 /* count */,
56                                                    f90basetype,
57                                                    dlp_p, dlsz_p,
58                                                    dldepth_p,
59                                                    flag);
60         return;
61     }
62 
63     /* Q: should we also check for "hasloop", or is the COMBINER
64      *    check above enough to weed out everything that wouldn't
65      *    have a loop?
66      */
67     DLOOP_Handle_get_loopptr_macro(type, old_dlp, flag);
68     if (old_dlp != NULL) {
69 	/* dataloop already created; just return it. */
70 	*dlp_p = old_dlp;
71 	DLOOP_Handle_get_loopsize_macro(type, *dlsz_p, flag);
72 	DLOOP_Handle_get_loopdepth_macro(type, *dldepth_p, flag);
73 	return;
74     }
75 
76     PREPEND_PREFIX(Type_access_contents)(type, &ints, &aints, &types);
77 
78     /* first check for zero count on types where that makes sense */
79     switch(combiner) {
80 	case MPI_COMBINER_CONTIGUOUS:
81 	case MPI_COMBINER_VECTOR:
82 	case MPI_COMBINER_HVECTOR_INTEGER:
83 	case MPI_COMBINER_HVECTOR:
84 	case MPI_COMBINER_INDEXED_BLOCK:
85 	case MPIX_COMBINER_HINDEXED_BLOCK:
86 	case MPI_COMBINER_INDEXED:
87 	case MPI_COMBINER_HINDEXED_INTEGER:
88 	case MPI_COMBINER_HINDEXED:
89 	case MPI_COMBINER_STRUCT_INTEGER:
90 	case MPI_COMBINER_STRUCT:
91 	    if (ints[0] == 0) {
92 		PREPEND_PREFIX(Dataloop_create_contiguous)(0,
93 							   MPI_INT,
94 							   dlp_p,
95 							   dlsz_p,
96 							   dldepth_p,
97 							   flag);
98 		goto clean_exit;
99 	    }
100 	    break;
101 	default:
102 	    break;
103     }
104 
105     /* recurse, processing types "below" this one before processing
106      * this one, if those type don't already have dataloops.
107      *
108      * note: in the struct case below we'll handle any additional
109      *       types "below" the current one.
110      */
111     MPIR_Type_get_envelope_impl(types[0], &dummy1, &dummy2, &dummy3, &type0_combiner);
112     if (type0_combiner != MPI_COMBINER_NAMED)
113     {
114 	DLOOP_Handle_get_loopptr_macro(types[0], old_dlp, flag);
115 	if (old_dlp == NULL)
116 	{
117 	    /* no dataloop already present; create and store one */
118 	    PREPEND_PREFIX(Dataloop_create)(types[0],
119 					    &old_dlp,
120 					    &old_dlsz,
121 					    &old_dldepth,
122 					    flag);
123 
124 	    DLOOP_Handle_set_loopptr_macro(types[0], old_dlp, flag);
125 	    DLOOP_Handle_set_loopsize_macro(types[0], old_dlsz, flag);
126 	    DLOOP_Handle_set_loopdepth_macro(types[0], old_dldepth, flag);
127 	}
128 	else {
129 	    DLOOP_Handle_get_loopsize_macro(types[0], old_dlsz, flag);
130 	    DLOOP_Handle_get_loopdepth_macro(types[0], old_dldepth, flag);
131 	}
132     }
133 
134     switch(combiner)
135     {
136 	case MPI_COMBINER_DUP:
137 	    if (type0_combiner != MPI_COMBINER_NAMED) {
138 		PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p);
139 		*dlsz_p    = old_dlsz;
140 		*dldepth_p = old_dldepth;
141 	    }
142 	    else {
143 		PREPEND_PREFIX(Dataloop_create_contiguous)(1,
144 							   types[0],
145 							   dlp_p, dlsz_p,
146 							   dldepth_p,
147 							   flag);
148 	    }
149 	    break;
150 	case MPI_COMBINER_RESIZED:
151 	    if (type0_combiner != MPI_COMBINER_NAMED) {
152 		PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p);
153 		*dlsz_p    = old_dlsz;
154 		*dldepth_p = old_dldepth;
155 	    }
156 	    else {
157 		PREPEND_PREFIX(Dataloop_create_contiguous)(1,
158 							   types[0],
159 							   dlp_p, dlsz_p,
160 							   dldepth_p,
161 							   flag);
162 
163 		(*dlp_p)->el_extent = aints[1]; /* extent */
164 	    }
165 	    break;
166 	case MPI_COMBINER_CONTIGUOUS:
167 	    PREPEND_PREFIX(Dataloop_create_contiguous)(ints[0] /* count */,
168 						       types[0] /* oldtype */,
169 						       dlp_p, dlsz_p,
170 						       dldepth_p,
171 						       flag);
172 	    break;
173 	case MPI_COMBINER_VECTOR:
174 	    PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */,
175 						   ints[1] /* blklen */,
176 						   ints[2] /* stride */,
177 						   0 /* stride not bytes */,
178 						   types[0] /* oldtype */,
179 						   dlp_p, dlsz_p, dldepth_p,
180 						   flag);
181 	    break;
182 	case MPI_COMBINER_HVECTOR_INTEGER:
183 	case MPI_COMBINER_HVECTOR:
184 	    /* fortran hvector has integer stride in bytes */
185 	    if (combiner == MPI_COMBINER_HVECTOR_INTEGER) {
186 		stride = (MPI_Aint) ints[2];
187 	    }
188 	    else {
189 		stride = aints[0];
190 	    }
191 
192 	    PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */,
193 						   ints[1] /* blklen */,
194 						   stride,
195 						   1 /* stride in bytes */,
196 						   types[0] /* oldtype */,
197 						   dlp_p, dlsz_p, dldepth_p,
198 						   flag);
199 	    break;
200 	case MPI_COMBINER_INDEXED_BLOCK:
201 	    PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */,
202 							 ints[1] /* blklen */,
203 							 &ints[2] /* disps */,
204 							 0 /* disp not bytes */,
205 							 types[0] /* oldtype */,
206 							 dlp_p, dlsz_p,
207 							 dldepth_p,
208 							 flag);
209 	    break;
210 	case MPIX_COMBINER_HINDEXED_BLOCK:
211             disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));
212             for (i = 0; i < ints[0]; i++)
213                 disps[i] = aints[i];
214 	    PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */,
215 							 ints[1] /* blklen */,
216 							 disps /* disps */,
217 							 1 /* disp is bytes */,
218 							 types[0] /* oldtype */,
219 							 dlp_p, dlsz_p,
220 							 dldepth_p,
221 							 flag);
222             DLOOP_Free(disps);
223 	    break;
224 	case MPI_COMBINER_INDEXED:
225 	    PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */,
226 						    &ints[1] /* blklens */,
227 						    &ints[ints[0]+1] /* disp */,
228 						    0 /* disp not in bytes */,
229 						    types[0] /* oldtype */,
230 						    dlp_p, dlsz_p, dldepth_p,
231 						    flag);
232 	    break;
233 	case MPI_COMBINER_HINDEXED_INTEGER:
234 	case MPI_COMBINER_HINDEXED:
235 	    if (combiner == MPI_COMBINER_HINDEXED_INTEGER) {
236 		disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));
237 
238 		for (i=0; i < ints[0]; i++) {
239 		    disps[i] = (MPI_Aint) ints[ints[0] + 1 + i];
240 		}
241 	    }
242 	    else {
243 		disps = aints;
244 	    }
245 
246 	    PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */,
247 						    &ints[1] /* blklens */,
248 						    disps,
249 						    1 /* disp in bytes */,
250 						    types[0] /* oldtype */,
251 						    dlp_p, dlsz_p, dldepth_p,
252 						    flag);
253 
254 	    if (combiner == MPI_COMBINER_HINDEXED_INTEGER) {
255 		DLOOP_Free(disps);
256 	    }
257 
258 	    break;
259 	case MPI_COMBINER_STRUCT_INTEGER:
260 	case MPI_COMBINER_STRUCT:
261 	    for (i = 1; i < ints[0]; i++) {
262 		int type_combiner;
263 		MPIR_Type_get_envelope_impl(types[i], &dummy1, &dummy2, &dummy3, &type_combiner);
264 
265 		if (type_combiner != MPI_COMBINER_NAMED) {
266 		    DLOOP_Handle_get_loopptr_macro(types[i], old_dlp, flag);
267 		    if (old_dlp == NULL)
268 		    {
269 			PREPEND_PREFIX(Dataloop_create)(types[i],
270 							&old_dlp,
271 							&old_dlsz,
272 							&old_dldepth,
273 							flag);
274 
275 			DLOOP_Handle_set_loopptr_macro(types[i], old_dlp,
276 						       flag);
277 			DLOOP_Handle_set_loopsize_macro(types[i], old_dlsz,
278 							flag);
279 			DLOOP_Handle_set_loopdepth_macro(types[i], old_dldepth,
280 							 flag);
281 		    }
282 		}
283 	    }
284 	    if (combiner == MPI_COMBINER_STRUCT_INTEGER) {
285 		disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));
286 
287 		for (i=0; i < ints[0]; i++) {
288 		    disps[i] = (MPI_Aint) ints[ints[0] + 1 + i];
289 		}
290 	    }
291 	    else {
292 		disps = aints;
293 	    }
294 
295             err = PREPEND_PREFIX(Dataloop_create_struct)(ints[0] /* count */,
296                                                          &ints[1] /* blklens */,
297                                                          disps,
298                                                          types /* oldtype array */,
299                                                          dlp_p, dlsz_p, dldepth_p,
300                                                          flag);
301             /* TODO if/when this function returns error codes, propagate this failure instead */
302             DLOOP_Assert(0 == err);
303             /* if (err) return err; */
304 
305 	    if (combiner == MPI_COMBINER_STRUCT_INTEGER) {
306 		DLOOP_Free(disps);
307 	    }
308 	    break;
309 	case MPI_COMBINER_SUBARRAY:
310 	    ndims = ints[0];
311 	    PREPEND_PREFIX(Type_convert_subarray)(ndims,
312 						  &ints[1] /* sizes */,
313 						  &ints[1+ndims] /* subsizes */,
314 						  &ints[1+2*ndims] /* starts */,
315 						  ints[1+3*ndims] /* order */,
316 						  types[0],
317 						  &tmptype);
318 
319 	    PREPEND_PREFIX(Dataloop_create)(tmptype,
320 					    dlp_p,
321 					    dlsz_p,
322 					    dldepth_p,
323 					    flag);
324 
325 	    MPIR_Type_free_impl(&tmptype);
326 	    break;
327 	case MPI_COMBINER_DARRAY:
328 	    ndims = ints[2];
329 	    PREPEND_PREFIX(Type_convert_darray)(ints[0] /* size */,
330 						ints[1] /* rank */,
331 						ndims,
332 						&ints[3] /* gsizes */,
333 						&ints[3+ndims] /*distribs */,
334 						&ints[3+2*ndims] /* dargs */,
335 						&ints[3+3*ndims] /* psizes */,
336 						ints[3+4*ndims] /* order */,
337 						types[0],
338 						&tmptype);
339 
340 	    PREPEND_PREFIX(Dataloop_create)(tmptype,
341 					    dlp_p,
342 					    dlsz_p,
343 					    dldepth_p,
344 					    flag);
345 
346 	    MPIR_Type_free_impl(&tmptype);
347 	    break;
348 	default:
349 	    DLOOP_Assert(0);
350 	    break;
351     }
352 
353  clean_exit:
354 
355     PREPEND_PREFIX(Type_release_contents)(type, &ints, &aints, &types);
356 
357     /* for now we just leave the intermediate dataloops in place.
358      * could remove them to save space if we wanted.
359      */
360 
361     return;
362 }
363 
364 /*@
365   DLOOP_Dataloop_create_named - create a dataloop for a "named" type
366   if necessary.
367 
368   "named" types are ones for which MPI_Type_get_envelope() returns a
369   combiner of MPI_COMBINER_NAMED. some types that fit this category,
370   such as MPI_SHORT_INT, have multiple elements with potential gaps
371   and padding. these types need dataloops for correct processing.
372 @*/
DLOOP_Dataloop_create_named(MPI_Datatype type,DLOOP_Dataloop ** dlp_p,int * dlsz_p,int * dldepth_p,int flag)373 static void DLOOP_Dataloop_create_named(MPI_Datatype type,
374 					DLOOP_Dataloop **dlp_p,
375 					int *dlsz_p,
376 					int *dldepth_p,
377 					int flag)
378 {
379     DLOOP_Dataloop *dlp;
380 
381     /* special case: pairtypes need dataloops too.
382      *
383      * note: not dealing with MPI_2INT because size == extent
384      *       in all cases for that type.
385      *
386      * note: MPICH2 always precreates these, so we will never call
387      *       Dataloop_create_pairtype() from here in the MPICH2
388      *       case.
389      */
390     if (type == MPI_FLOAT_INT || type == MPI_DOUBLE_INT ||
391 	type == MPI_LONG_INT || type == MPI_SHORT_INT ||
392 	type == MPI_LONG_DOUBLE_INT)
393     {
394 	DLOOP_Handle_get_loopptr_macro(type, dlp, flag);
395 	if (dlp != NULL) {
396 	    /* dataloop already created; just return it. */
397 	    *dlp_p = dlp;
398 	    DLOOP_Handle_get_loopsize_macro(type, *dlsz_p, flag);
399 	    DLOOP_Handle_get_loopdepth_macro(type, *dldepth_p, flag);
400 	}
401 	else {
402 	    PREPEND_PREFIX(Dataloop_create_pairtype)(type,
403 						     dlp_p,
404 						     dlsz_p,
405 						     dldepth_p,
406 						     flag);
407 	}
408 	return;
409     }
410     /* no other combiners need dataloops; exit. */
411     else {
412 	*dlp_p = NULL;
413 	*dlsz_p = 0;
414 	*dldepth_p = 0;
415 	return;
416     }
417 }
418