1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 
3 /*
4  *  (C) 2001 by Argonne National Laboratory.
5  *      See COPYRIGHT in top-level directory.
6  */
7 
8 #include <stdio.h>
9 
10 #include "./dataloop.h"
11 
12 static void DLOOP_Type_blockindexed_array_copy(DLOOP_Count count,
13 					       const void *disp_array,
14 					       DLOOP_Offset *out_disp_array,
15 					       int dispinbytes,
16 					       DLOOP_Offset old_extent);
17 
18 /*@
19    Dataloop_create_blockindexed - create blockindexed dataloop
20 
21    Arguments:
22 +  int count
23 .  void *displacement_array (array of either MPI_Aints or ints)
24 .  int displacement_in_bytes (boolean)
25 .  MPI_Datatype old_type
26 .  DLOOP_Dataloop **output_dataloop_ptr
27 .  int output_dataloop_size
28 .  int output_dataloop_depth
29 -  int flag
30 
31 .N Errors
32 .N Returns 0 on success, -1 on failure.
33 @*/
PREPEND_PREFIX(Dataloop_create_blockindexed)34 int PREPEND_PREFIX(Dataloop_create_blockindexed)(int icount,
35 						 int iblklen,
36 						 const void *disp_array,
37 						 int dispinbytes,
38 						 DLOOP_Type oldtype,
39 						 DLOOP_Dataloop **dlp_p,
40 						 int *dlsz_p,
41 						 int *dldepth_p,
42 						 int flag)
43 {
44     int err, is_builtin, is_vectorizable = 1;
45     int i, new_loop_sz, old_loop_depth;
46 
47     DLOOP_Count contig_count, count, blklen;
48     DLOOP_Offset old_extent, eff_disp0, eff_disp1, last_stride;
49     DLOOP_Dataloop *new_dlp;
50 
51     count  = (DLOOP_Count) icount; /* avoid subsequent casting */
52     blklen = (DLOOP_Count) iblklen;
53 
54     /* if count or blklen are zero, handle with contig code, call it a int */
55     if (count == 0 || blklen == 0)
56     {
57 	err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
58 							 MPI_INT,
59 							 dlp_p,
60 							 dlsz_p,
61 							 dldepth_p,
62 							 flag);
63 	return err;
64     }
65 
66     is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;
67 
68     if (is_builtin)
69     {
70 	DLOOP_Handle_get_size_macro(oldtype, old_extent);
71 	old_loop_depth = 0;
72     }
73     else
74     {
75 	DLOOP_Handle_get_extent_macro(oldtype, old_extent);
76 	DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);
77     }
78 
79     contig_count = PREPEND_PREFIX(Type_blockindexed_count_contig)(count,
80                                                                   blklen,
81                                                                   disp_array,
82                                                                   dispinbytes,
83                                                                   old_extent);
84 
85     /* optimization:
86      *
87      * if contig_count == 1 and block starts at displacement 0,
88      * store it as a contiguous rather than a blockindexed dataloop.
89      */
90     if ((contig_count == 1) &&
91 	((!dispinbytes && ((int *) disp_array)[0] == 0) ||
92 	 (dispinbytes && ((MPI_Aint *) disp_array)[0] == 0)))
93     {
94 	err = PREPEND_PREFIX(Dataloop_create_contiguous)(icount * iblklen,
95 							 oldtype,
96 							 dlp_p,
97 							 dlsz_p,
98 							 dldepth_p,
99 							 flag);
100 	return err;
101     }
102 
103     /* optimization:
104      *
105      * if contig_count == 1 store it as a blockindexed with one
106      * element rather than as a lot of individual blocks.
107      */
108     if (contig_count == 1)
109     {
110 	/* adjust count and blklen and drop through */
111 	blklen *= count;
112 	count = 1;
113 	iblklen *= icount;
114 	icount = 1;
115     }
116 
117     /* optimization:
118      *
119      * if displacements start at zero and result in a fixed stride,
120      * store it as a vector rather than a blockindexed dataloop.
121      */
122     eff_disp0 = (dispinbytes) ? ((DLOOP_Offset) ((MPI_Aint *) disp_array)[0]) :
123 	(((DLOOP_Offset) ((int *) disp_array)[0]) * old_extent);
124 
125     if (count > 1 && eff_disp0 == (DLOOP_Offset) 0)
126     {
127 	eff_disp1 = (dispinbytes) ?
128 	    ((DLOOP_Offset) ((MPI_Aint *) disp_array)[1]) :
129 	    (((DLOOP_Offset) ((int *) disp_array)[1]) * old_extent);
130 	last_stride = eff_disp1 - eff_disp0;
131 
132 	for (i=2; i < count; i++) {
133 	    eff_disp0 = eff_disp1;
134 	    eff_disp1 = (dispinbytes) ?
135 		((DLOOP_Offset) ((MPI_Aint *) disp_array)[i]) :
136 		(((DLOOP_Offset) ((int *) disp_array)[i]) * old_extent);
137 	    if (eff_disp1 - eff_disp0 != last_stride) {
138 		is_vectorizable = 0;
139 		break;
140 	    }
141 	}
142 	if (is_vectorizable)
143 	{
144 	    err = PREPEND_PREFIX(Dataloop_create_vector)(count,
145 							 blklen,
146 							 last_stride,
147 							 1, /* strideinbytes */
148 							 oldtype,
149 							 dlp_p,
150 							 dlsz_p,
151 							 dldepth_p,
152 							 flag);
153 	    return err;
154 	}
155     }
156 
157     /* TODO: optimization:
158      *
159      * if displacements result in a fixed stride, but first displacement
160      * is not zero, store it as a blockindexed (blklen == 1) of a vector.
161      */
162 
163     /* TODO: optimization:
164      *
165      * if a blockindexed of a contig, absorb the contig into the blocklen
166      * parameter and keep the same overall depth
167      */
168 
169     /* otherwise storing as a blockindexed dataloop */
170 
171     /* Q: HOW CAN WE TELL IF IT IS WORTH IT TO STORE AS AN
172      * INDEXED WITH FEWER CONTIG BLOCKS (IF CONTIG_COUNT IS SMALL)?
173      */
174 
175     if (is_builtin)
176     {
177 	PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_BLOCKINDEXED,
178 				       count,
179 				       &new_dlp,
180 				       &new_loop_sz);
181 	/* --BEGIN ERROR HANDLING-- */
182 	if (!new_dlp) return -1;
183 	/* --END ERROR HANDLING-- */
184 
185 	new_dlp->kind = DLOOP_KIND_BLOCKINDEXED | DLOOP_FINAL_MASK;
186 
187 	if (flag == DLOOP_DATALOOP_ALL_BYTES)
188 	{
189 	    blklen            *= old_extent;
190 	    new_dlp->el_size   = 1;
191 	    new_dlp->el_extent = 1;
192 	    new_dlp->el_type   = MPI_BYTE;
193 	}
194 	else
195 	{
196 	    new_dlp->el_size   = old_extent;
197 	    new_dlp->el_extent = old_extent;
198 	    new_dlp->el_type   = oldtype;
199 	}
200     }
201     else
202     {
203 	DLOOP_Dataloop *old_loop_ptr = NULL;
204 	int old_loop_sz = 0;
205 
206 	DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
207 	DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);
208 
209 	PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_BLOCKINDEXED,
210 						count,
211 						old_loop_ptr,
212 						old_loop_sz,
213 						&new_dlp,
214 						&new_loop_sz);
215 	/* --BEGIN ERROR HANDLING-- */
216 	if (!new_dlp) return -1;
217 	/* --END ERROR HANDLING-- */
218 
219 	new_dlp->kind = DLOOP_KIND_BLOCKINDEXED;
220 
221 	DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
222 	DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
223 	DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
224     }
225 
226     new_dlp->loop_params.bi_t.count     = count;
227     new_dlp->loop_params.bi_t.blocksize = blklen;
228 
229     /* copy in displacement parameters
230      *
231      * regardless of dispinbytes, we store displacements in bytes in loop.
232      */
233     DLOOP_Type_blockindexed_array_copy(count,
234 				       disp_array,
235 				       new_dlp->loop_params.bi_t.offset_array,
236 				       dispinbytes,
237 				       old_extent);
238 
239     *dlp_p     = new_dlp;
240     *dlsz_p    = new_loop_sz;
241     *dldepth_p = old_loop_depth + 1;
242 
243     return 0;
244 }
245 
246 /* DLOOP_Type_blockindexed_array_copy
247  *
248  * Unlike the indexed version, this one does not compact adjacent
249  * blocks, because that would really mess up the blockindexed type!
250  */
DLOOP_Type_blockindexed_array_copy(DLOOP_Count count,const void * in_disp_array,DLOOP_Offset * out_disp_array,int dispinbytes,DLOOP_Offset old_extent)251 static void DLOOP_Type_blockindexed_array_copy(DLOOP_Count count,
252 					       const void *in_disp_array,
253 					       DLOOP_Offset *out_disp_array,
254 					       int dispinbytes,
255 					       DLOOP_Offset old_extent)
256 {
257     int i;
258     if (!dispinbytes)
259     {
260 	for (i=0; i < count; i++)
261 	{
262 	    out_disp_array[i] =
263 		((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent;
264 	}
265     }
266     else
267     {
268 	for (i=0; i < count; i++)
269 	{
270 	    out_disp_array[i] =
271 		((DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i]);
272 	}
273     }
274     return;
275 }
276 
PREPEND_PREFIX(Type_blockindexed_count_contig)277 DLOOP_Count PREPEND_PREFIX(Type_blockindexed_count_contig)(DLOOP_Count count,
278                                                            DLOOP_Count blklen,
279                                                            const void *disp_array,
280                                                            int dispinbytes,
281                                                            DLOOP_Offset old_extent)
282 {
283     int i, contig_count = 1;
284 
285     if (!dispinbytes)
286     {
287 	/* this is from the MPI type, is of type int */
288 	DLOOP_Offset cur_tdisp = (DLOOP_Offset) ((int *) disp_array)[0];
289 
290 	for (i=1; i < count; i++)
291 	{
292 	    DLOOP_Offset next_tdisp = (DLOOP_Offset) ((int *) disp_array)[i];
293 
294 	    if (cur_tdisp + (DLOOP_Offset) blklen != next_tdisp)
295 	    {
296 		contig_count++;
297 	    }
298 	    cur_tdisp = next_tdisp;
299 	}
300     }
301     else
302     {
303 	/* this is from the MPI type, is of type MPI_Aint */
304 	DLOOP_Offset cur_bdisp = (DLOOP_Offset) ((MPI_Aint *) disp_array)[0];
305 
306 	for (i=1; i < count; i++)
307 	{
308 	    DLOOP_Offset next_bdisp =
309 		(DLOOP_Offset) ((MPI_Aint *) disp_array)[i];
310 
311 	    if (cur_bdisp + (DLOOP_Offset) blklen * old_extent != next_bdisp)
312 	    {
313 		contig_count++;
314 	    }
315 	    cur_bdisp = next_bdisp;
316 	}
317     }
318     return contig_count;
319 }
320