1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4 * University Research and Technology
5 * Corporation. All rights reserved.
6 * Copyright (c) 2004-2017 The University of Tennessee and The University
7 * of Tennessee Research Foundation. All rights
8 * reserved.
9 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10 * University of Stuttgart. All rights reserved.
11 * Copyright (c) 2004-2005 The Regents of the University of California.
12 * All rights reserved.
13 * Copyright (c) 2013 Los Alamos National Security, LLC. All rights
14 * reserved.
15 * Copyright (c) 2015-2016 Research Organization for Information Science
16 * and Technology (RIST). All rights reserved.
17 * $COPYRIGHT$
18 *
19 * Additional copyrights may follow
20 *
21 * $HEADER$
22 */
23
24 #include "ompi_config.h"
25
26 #include "mpi.h"
27 #include "ompi/constants.h"
28 #include "ompi/datatype/ompi_datatype.h"
29 #include "ompi/communicator/communicator.h"
30 #include "ompi/mca/coll/coll.h"
31 #include "ompi/mca/coll/base/coll_tags.h"
32 #include "ompi/mca/pml/pml.h"
33 #include "ompi/mca/coll/base/coll_base_functions.h"
34 #include "coll_base_topo.h"
35 #include "coll_base_util.h"
36
37 int
ompi_coll_base_scatter_intra_binomial(const void * sbuf,int scount,struct ompi_datatype_t * sdtype,void * rbuf,int rcount,struct ompi_datatype_t * rdtype,int root,struct ompi_communicator_t * comm,mca_coll_base_module_t * module)38 ompi_coll_base_scatter_intra_binomial( const void *sbuf, int scount,
39 struct ompi_datatype_t *sdtype,
40 void *rbuf, int rcount,
41 struct ompi_datatype_t *rdtype,
42 int root,
43 struct ompi_communicator_t *comm,
44 mca_coll_base_module_t *module)
45 {
46 int line = -1, i, rank, vrank, size, total_send = 0, err;
47 char *ptmp, *tempbuf = NULL;
48 ompi_coll_tree_t* bmtree;
49 MPI_Status status;
50 mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
51 mca_coll_base_comm_t *data = base_module->base_data;
52 ptrdiff_t sextent, rextent, ssize, rsize, sgap = 0, rgap = 0;
53
54
55 size = ompi_comm_size(comm);
56 rank = ompi_comm_rank(comm);
57
58 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
59 "ompi_coll_base_scatter_intra_binomial rank %d", rank));
60
61 /* create the binomial tree */
62 COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root );
63 bmtree = data->cached_in_order_bmtree;
64
65 ompi_datatype_type_extent(rdtype, &rextent);
66
67 rsize = opal_datatype_span(&rdtype->super, (int64_t)rcount * size, &rgap);
68
69 vrank = (rank - root + size) % size;
70 ptmp = (char *) rbuf; /* by default suppose leaf nodes, just use rbuf */
71
72 if (rank == root) {
73 ompi_datatype_type_extent(sdtype, &sextent);
74 ssize = opal_datatype_span(&sdtype->super, (int64_t)scount * size, &sgap);
75 if (0 == root) {
76 /* root on 0, just use the send buffer */
77 ptmp = (char *) sbuf;
78 if (rbuf != MPI_IN_PLACE) {
79 /* local copy to rbuf */
80 err = ompi_datatype_sndrcv(sbuf, scount, sdtype,
81 rbuf, rcount, rdtype);
82 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
83 }
84 } else {
85 /* root is not on 0, allocate temp buffer for send */
86 tempbuf = (char *) malloc(ssize);
87 if (NULL == tempbuf) {
88 err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
89 }
90 ptmp = tempbuf - sgap;
91
92 /* and rotate data so they will eventually in the right place */
93 err = ompi_datatype_copy_content_same_ddt(sdtype, (ptrdiff_t)scount * (ptrdiff_t)(size - root),
94 ptmp, (char *) sbuf + sextent * (ptrdiff_t)root * (ptrdiff_t)scount);
95 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
96
97
98 err = ompi_datatype_copy_content_same_ddt(sdtype, (ptrdiff_t)scount * (ptrdiff_t)root,
99 ptmp + sextent * (ptrdiff_t)scount * (ptrdiff_t)(size - root), (char *)sbuf);
100 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
101
102 if (rbuf != MPI_IN_PLACE) {
103 /* local copy to rbuf */
104 err = ompi_datatype_sndrcv(ptmp, scount, sdtype,
105 rbuf, rcount, rdtype);
106 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
107 }
108 }
109 total_send = scount;
110 } else if (!(vrank % 2)) {
111 /* non-root, non-leaf nodes, allocte temp buffer for recv
112 * the most we need is rcount*size/2 */
113 tempbuf = (char *) malloc(rsize);
114 if (NULL == tempbuf) {
115 err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
116 }
117 ptmp = tempbuf - rgap;
118
119 sdtype = rdtype;
120 scount = rcount;
121 sextent = rextent;
122 total_send = scount;
123 }
124
125 if (!(vrank % 2)) {
126 if (rank != root) {
127 /* recv from parent on non-root */
128 err = MCA_PML_CALL(recv(ptmp, (ptrdiff_t)rcount * (ptrdiff_t)size, rdtype, bmtree->tree_prev,
129 MCA_COLL_BASE_TAG_SCATTER, comm, &status));
130 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
131 /* local copy to rbuf */
132 err = ompi_datatype_sndrcv(ptmp, scount, sdtype,
133 rbuf, rcount, rdtype);
134 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
135 }
136 /* send to children on all non-leaf */
137 for (i = 0; i < bmtree->tree_nextsize; i++) {
138 size_t mycount = 0;
139 int vkid;
140 /* figure out how much data I have to send to this child */
141 vkid = (bmtree->tree_next[i] - root + size) % size;
142 mycount = vkid - vrank;
143 if( (int)mycount > (size - vkid) )
144 mycount = size - vkid;
145 mycount *= scount;
146
147 err = MCA_PML_CALL(send(ptmp + (ptrdiff_t)total_send * sextent, mycount, sdtype,
148 bmtree->tree_next[i],
149 MCA_COLL_BASE_TAG_SCATTER,
150 MCA_PML_BASE_SEND_STANDARD, comm));
151 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
152
153 total_send += mycount;
154 }
155
156 if (NULL != tempbuf)
157 free(tempbuf);
158 } else {
159 /* recv from parent on leaf nodes */
160 err = MCA_PML_CALL(recv(ptmp, rcount, rdtype, bmtree->tree_prev,
161 MCA_COLL_BASE_TAG_SCATTER, comm, &status));
162 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
163 }
164
165 return MPI_SUCCESS;
166
167 err_hndl:
168 if (NULL != tempbuf)
169 free(tempbuf);
170
171 OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d",
172 __FILE__, line, err, rank));
173 (void)line; // silence compiler warning
174 return err;
175 }
176
177 /*
178 * Linear functions are copied from the BASIC coll module
179 * they do not segment the message and are simple implementations
180 * but for some small number of nodes and/or small data sizes they
181 * are just as fast as base/tree based segmenting operations
182 * and as such may be selected by the decision functions
183 * These are copied into this module due to the way we select modules
184 * in V1. i.e. in V2 we will handle this differently and so will not
185 * have to duplicate code.
186 * JPG following the examples from other coll_base implementations. Dec06.
187 */
188
189 /* copied function (with appropriate renaming) starts here */
190 /*
191 * scatter_intra
192 *
193 * Function: - basic scatter operation
194 * Accepts: - same arguments as MPI_Scatter()
195 * Returns: - MPI_SUCCESS or error code
196 */
197 int
ompi_coll_base_scatter_intra_basic_linear(const void * sbuf,int scount,struct ompi_datatype_t * sdtype,void * rbuf,int rcount,struct ompi_datatype_t * rdtype,int root,struct ompi_communicator_t * comm,mca_coll_base_module_t * module)198 ompi_coll_base_scatter_intra_basic_linear(const void *sbuf, int scount,
199 struct ompi_datatype_t *sdtype,
200 void *rbuf, int rcount,
201 struct ompi_datatype_t *rdtype,
202 int root,
203 struct ompi_communicator_t *comm,
204 mca_coll_base_module_t *module)
205 {
206 int i, rank, size, err;
207 ptrdiff_t incr;
208 char *ptmp;
209
210 /* Initialize */
211
212 rank = ompi_comm_rank(comm);
213 size = ompi_comm_size(comm);
214
215 /* If not root, receive data. */
216
217 if (rank != root) {
218 err = MCA_PML_CALL(recv(rbuf, rcount, rdtype, root,
219 MCA_COLL_BASE_TAG_SCATTER,
220 comm, MPI_STATUS_IGNORE));
221 return err;
222 }
223
224 /* I am the root, loop sending data. */
225
226 err = ompi_datatype_type_extent(sdtype, &incr);
227 if (OMPI_SUCCESS != err) {
228 return OMPI_ERROR;
229 }
230
231 incr *= scount;
232 for (i = 0, ptmp = (char *) sbuf; i < size; ++i, ptmp += incr) {
233
234 /* simple optimization */
235
236 if (i == rank) {
237 if (MPI_IN_PLACE != rbuf) {
238 err =
239 ompi_datatype_sndrcv(ptmp, scount, sdtype, rbuf, rcount,
240 rdtype);
241 }
242 } else {
243 err = MCA_PML_CALL(send(ptmp, scount, sdtype, i,
244 MCA_COLL_BASE_TAG_SCATTER,
245 MCA_PML_BASE_SEND_STANDARD, comm));
246 }
247 if (MPI_SUCCESS != err) {
248 return err;
249 }
250 }
251
252 /* All done */
253
254 return MPI_SUCCESS;
255 }
256
257
258 /* copied function (with appropriate renaming) ends here */
259