1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3  * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
4  *                         University Research and Technology
5  *                         Corporation.  All rights reserved.
6  * Copyright (c) 2004-2015 The University of Tennessee and The University
7  *                         of Tennessee Research Foundation.  All rights
8  *                         reserved.
9  * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
10  *                         University of Stuttgart.  All rights reserved.
11  * Copyright (c) 2004-2006 The Regents of the University of California.
12  *                         All rights reserved.
13  * Copyright (c) 2009      Sun Microsystems, Inc. All rights reserved.
14  * Copyright (c) 2009      Oak Ridge National Labs.  All rights reserved.
15  * Copyright (c) 2010      Cisco Systems, Inc.  All rights reserved.
16  * Copyright (c) 2016      Los Alamos National Security, LLC. All rights
17  *                         reserved.
18  * Copyright (c) 2017      IBM Corporation. All rights reserved.
19  * $COPYRIGHT$
20  *
21  * Additional copyrights may follow
22  *
23  * $HEADER$
24  */
25 
26 #include "ompi_config.h"
27 
28 #include <stddef.h>
29 
30 #include "ompi/datatype/ompi_datatype.h"
31 
32 static int
block(const int * gsize_array,int dim,int ndims,int nprocs,int rank,int darg,int order,ptrdiff_t orig_extent,ompi_datatype_t * type_old,ompi_datatype_t ** type_new,ptrdiff_t * st_offset)33 block(const int *gsize_array, int dim, int ndims, int nprocs,
34       int rank, int darg, int order, ptrdiff_t orig_extent,
35       ompi_datatype_t *type_old, ompi_datatype_t **type_new,
36       ptrdiff_t *st_offset)
37 {
38     int blksize, global_size, mysize, i, j, rc, start_loop, step;
39     ptrdiff_t stride, disps[2];
40 
41     global_size = gsize_array[dim];
42 
43     if (darg == MPI_DISTRIBUTE_DFLT_DARG)
44         blksize = (global_size + nprocs - 1) / nprocs;
45     else {
46         blksize = darg;
47     }
48 
49     j = global_size - blksize*rank;
50     mysize = blksize < j ? blksize : j;
51     if (mysize < 0) mysize = 0;
52 
53     if (MPI_ORDER_C == order) {
54         start_loop = ndims - 1 ; step = -1;
55     } else {
56         start_loop = 0 ; step = 1;
57     }
58 
59     stride = orig_extent;
60     if (dim == start_loop) {
61         rc = ompi_datatype_create_contiguous(mysize, type_old, type_new);
62         if (OMPI_SUCCESS != rc) return rc;
63     } else {
64         for (i = start_loop ; i != dim ; i += step) {
65             stride *= gsize_array[i];
66         }
67         rc = ompi_datatype_create_hvector(mysize, 1, stride, type_old, type_new);
68         if (OMPI_SUCCESS != rc) return rc;
69     }
70 
71     *st_offset = blksize * rank;
72     /* in terms of no. of elements of type oldtype in this dimension */
73     if (mysize == 0) *st_offset = 0;
74 
75     /* need to set the UB for block-cyclic to work */
76     disps[0] = 0;         disps[1] = orig_extent;
77     if (order == MPI_ORDER_FORTRAN) {
78         for(i=0; i<=dim; i++) {
79             disps[1] *= gsize_array[i];
80         }
81     } else {
82         for(i=ndims-1; i>=dim; i--) {
83             disps[1] *= gsize_array[i];
84         }
85     }
86     rc = opal_datatype_resize( &(*type_new)->super, disps[0], disps[1] );
87     if (OMPI_SUCCESS != rc) return rc;
88 
89     return OMPI_SUCCESS;
90 }
91 
92 
93 static int
cyclic(const int * gsize_array,int dim,int ndims,int nprocs,int rank,int darg,int order,ptrdiff_t orig_extent,ompi_datatype_t * type_old,ompi_datatype_t ** type_new,ptrdiff_t * st_offset)94 cyclic(const int *gsize_array, int dim, int ndims, int nprocs,
95        int rank, int darg, int order, ptrdiff_t orig_extent,
96        ompi_datatype_t* type_old, ompi_datatype_t **type_new,
97        ptrdiff_t *st_offset)
98 {
99     int blksize, i, blklens[2], st_index, end_index, local_size, rem, count, rc;
100     ptrdiff_t stride, disps[2];
101     ompi_datatype_t *type_tmp, *types[2];
102 
103     if (darg == MPI_DISTRIBUTE_DFLT_DARG) {
104         blksize = 1;
105     } else {
106         blksize = darg;
107     }
108 
109     st_index = rank * blksize;
110     end_index = gsize_array[dim] - 1;
111 
112     if (end_index < st_index) {
113         local_size = 0;
114     } else {
115         local_size = ((end_index - st_index + 1)/(nprocs*blksize))*blksize;
116         rem = (end_index - st_index + 1) % (nprocs*blksize);
117         local_size += rem < blksize ? rem : blksize;
118     }
119 
120     count = local_size / blksize;
121     rem = local_size % blksize;
122 
123     stride = nprocs*blksize*orig_extent;
124     if (order == MPI_ORDER_FORTRAN) {
125         for (i=0; i<dim; i++) {
126             stride *= gsize_array[i];
127         }
128     } else {
129         for (i=ndims-1; i>dim; i--) {
130             stride *= gsize_array[i];
131         }
132     }
133 
134     rc = ompi_datatype_create_hvector(count, blksize, stride, type_old, type_new);
135     if (OMPI_SUCCESS != rc) return rc;
136 
137     if (rem) {
138         /* if the last block is of size less than blksize, include
139            it separately using MPI_Type_struct */
140 
141         types  [0] = *type_new; types  [1] = type_old;
142         disps  [0] = 0;         disps  [1] = count*stride;
143         blklens[0] = 1;         blklens[1] = rem;
144 
145         rc = ompi_datatype_create_struct(2, blklens, disps, types, &type_tmp);
146         ompi_datatype_destroy(type_new);
147         /* even in error condition, need to destroy type_new, so check
148            for error after destroy. */
149         if (OMPI_SUCCESS != rc) return rc;
150         *type_new = type_tmp;
151     }
152 
153     /* need to set the UB for block-cyclic to work */
154     disps[0] = 0;         disps[1] = orig_extent;
155     if (order == MPI_ORDER_FORTRAN) {
156         for(i=0; i<=dim; i++) {
157             disps[1] *= gsize_array[i];
158         }
159     } else {
160         for(i=ndims-1; i>=dim; i--) {
161             disps[1] *= gsize_array[i];
162         }
163     }
164     rc = opal_datatype_resize( &(*type_new)->super, disps[0], disps[1] );
165     if (OMPI_SUCCESS != rc) return rc;
166 
167     *st_offset = rank * blksize;
168     /* in terms of no. of elements of type oldtype in this dimension */
169     if (local_size == 0) *st_offset = 0;
170 
171     return OMPI_SUCCESS;
172 }
173 
ompi_datatype_create_darray(int size,int rank,int ndims,int const * gsize_array,int const * distrib_array,int const * darg_array,int const * psize_array,int order,const ompi_datatype_t * oldtype,ompi_datatype_t ** newtype)174 int32_t ompi_datatype_create_darray(int size,
175                                     int rank,
176                                     int ndims,
177                                     int const* gsize_array,
178                                     int const* distrib_array,
179                                     int const* darg_array,
180                                     int const* psize_array,
181                                     int order,
182                                     const ompi_datatype_t* oldtype,
183                                     ompi_datatype_t** newtype)
184 {
185     ompi_datatype_t *lastType;
186     ptrdiff_t orig_extent, *st_offsets = NULL;
187     int i, start_loop, end_loop, step;
188     int *coords = NULL, rc = OMPI_SUCCESS;
189     ptrdiff_t displs[2], tmp_size = 1;
190 
191     /* speedy corner case */
192     if (ndims < 1) {
193         /* Don't just return MPI_DATATYPE_NULL as that can't be
194            MPI_TYPE_FREE()ed, and that seems bad */
195         return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newtype);
196     }
197 
198     rc = ompi_datatype_type_extent(oldtype, &orig_extent);
199     if (MPI_SUCCESS != rc) goto cleanup;
200 
201     /* calculate position in grid using row-major ordering */
202     {
203         int tmp_rank = rank, procs = size;
204 
205         coords = (int *) malloc(ndims * sizeof(int));
206         displs[1] = orig_extent;
207         for (i = 0 ; i < ndims ; i++) {
208             procs = procs / psize_array[i];
209             coords[i] = tmp_rank / procs;
210             tmp_rank = tmp_rank % procs;
211             /* compute the upper bound of the datatype, including all dimensions */
212             displs[1] *= gsize_array[i];
213         }
214     }
215 
216     st_offsets = (ptrdiff_t *) malloc(ndims * sizeof(ptrdiff_t));
217 
218     /* duplicate type to here to 1) deal with constness without
219        casting and 2) eliminate need to for conditional destroy below.
220        Lame, yes.  But cleaner code all around. */
221     rc = ompi_datatype_duplicate(oldtype, &lastType);
222     if (OMPI_SUCCESS != rc) goto cleanup;
223 
224     /* figure out ordering issues */
225     if (MPI_ORDER_C == order) {
226         start_loop = ndims - 1 ; step = -1; end_loop = -1;
227     } else {
228         start_loop = 0 ; step = 1; end_loop = ndims;
229     }
230 
231     /* Build up array */
232     for (i = start_loop; i != end_loop; i += step) {
233         int nprocs, tmp_rank;
234 
235         switch(distrib_array[i]) {
236         case MPI_DISTRIBUTE_BLOCK:
237             rc = block(gsize_array, i, ndims, psize_array[i], coords[i],
238                        darg_array[i], order, orig_extent,
239                        lastType, newtype, st_offsets+i);
240             break;
241         case MPI_DISTRIBUTE_CYCLIC:
242             rc = cyclic(gsize_array, i, ndims, psize_array[i], coords[i],
243                         darg_array[i], order, orig_extent,
244                         lastType, newtype, st_offsets+i);
245             break;
246         case MPI_DISTRIBUTE_NONE:
247             /* treat it as a block distribution on 1 process */
248             if (order == MPI_ORDER_C) {
249                 nprocs = psize_array[i]; tmp_rank = coords[i];
250             } else {
251                 nprocs = 1; tmp_rank = 0;
252             }
253 
254             rc = block(gsize_array, i, ndims, nprocs, tmp_rank,
255                        MPI_DISTRIBUTE_DFLT_DARG, order, orig_extent,
256                        lastType, newtype, st_offsets+i);
257             break;
258         default:
259             rc = MPI_ERR_ARG;
260         }
261         ompi_datatype_destroy(&lastType);
262         /* need to destroy the old type even in error condition, so
263            don't check return code from above until after cleanup. */
264         if (MPI_SUCCESS != rc) goto cleanup;
265         lastType = *newtype;
266     }
267 
268     /**
269      * We need to shift the content (useful data) of the datatype, so
270      * we need to force the displacement to be moved. Therefore, we
271      * cannot use resize as it will only set the soft lb and ub
272      * markers without moving the data. Instead, we have to create a
273      * new data, and insert the last_Type with the correct
274      * displacement.
275      */
276     displs[0] = st_offsets[start_loop];
277     for (i = start_loop + step; i != end_loop; i += step) {
278         tmp_size *= gsize_array[i - step];
279         displs[0] += tmp_size * st_offsets[i];
280     }
281     displs[0] *= orig_extent;
282 
283     *newtype = ompi_datatype_create(lastType->super.desc.used);
284     rc = ompi_datatype_add(*newtype, lastType, 1, displs[0], displs[1]);
285     ompi_datatype_destroy(&lastType);
286     /* need to destroy the old type even in error condition, so
287        don't check return code from above until after cleanup. */
288     if (MPI_SUCCESS != rc) {
289         ompi_datatype_destroy (newtype);
290     } else {
291         (void) opal_datatype_resize( &(*newtype)->super, 0, displs[1]);
292     }
293 
294  cleanup:
295     free(st_offsets);
296     free(coords);
297     return rc;
298 }
299