1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4  *                         University Research and Technology
5  *                         Corporation.  All rights reserved.
6  * Copyright (c) 2004-2017 The University of Tennessee and The University
7  *                         of Tennessee Research Foundation.  All rights
8  *                         reserved.
9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10  *                         University of Stuttgart.  All rights reserved.
11  * Copyright (c) 2004-2005 The Regents of the University of California.
12  *                         All rights reserved.
13  * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
14  *                         reserved.
15  * Copyright (c) 2015-2016 Research Organization for Information Science
16  *                         and Technology (RIST). All rights reserved.
17  * $COPYRIGHT$
18  *
19  * Additional copyrights may follow
20  *
21  * $HEADER$
22  */
23 
24 #include "ompi_config.h"
25 
26 #include "mpi.h"
27 #include "ompi/constants.h"
28 #include "ompi/datatype/ompi_datatype.h"
29 #include "ompi/communicator/communicator.h"
30 #include "ompi/mca/coll/coll.h"
31 #include "ompi/mca/coll/base/coll_tags.h"
32 #include "ompi/mca/pml/pml.h"
33 #include "ompi/mca/coll/base/coll_base_functions.h"
34 #include "coll_base_topo.h"
35 #include "coll_base_util.h"
36 
37 int
ompi_coll_base_scatter_intra_binomial(const void * sbuf,int scount,struct ompi_datatype_t * sdtype,void * rbuf,int rcount,struct ompi_datatype_t * rdtype,int root,struct ompi_communicator_t * comm,mca_coll_base_module_t * module)38 ompi_coll_base_scatter_intra_binomial( const void *sbuf, int scount,
39                                        struct ompi_datatype_t *sdtype,
40                                        void *rbuf, int rcount,
41                                        struct ompi_datatype_t *rdtype,
42                                        int root,
43                                        struct ompi_communicator_t *comm,
44                                        mca_coll_base_module_t *module)
45 {
46     int line = -1, i, rank, vrank, size, total_send = 0, err;
47     char *ptmp, *tempbuf = NULL;
48     ompi_coll_tree_t* bmtree;
49     MPI_Status status;
50     mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
51     mca_coll_base_comm_t *data = base_module->base_data;
52     ptrdiff_t sextent, rextent, ssize, rsize, sgap = 0, rgap = 0;
53 
54 
55     size = ompi_comm_size(comm);
56     rank = ompi_comm_rank(comm);
57 
58     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
59                  "ompi_coll_base_scatter_intra_binomial rank %d", rank));
60 
61     /* create the binomial tree */
62     COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root );
63     bmtree = data->cached_in_order_bmtree;
64 
65     ompi_datatype_type_extent(rdtype, &rextent);
66 
67     rsize = opal_datatype_span(&rdtype->super, (int64_t)rcount * size, &rgap);
68 
69     vrank = (rank - root + size) % size;
70     ptmp = (char *) rbuf;  /* by default suppose leaf nodes, just use rbuf */
71 
72     if (rank == root) {
73         ompi_datatype_type_extent(sdtype, &sextent);
74         ssize = opal_datatype_span(&sdtype->super, (int64_t)scount * size, &sgap);
75         if (0 == root) {
76             /* root on 0, just use the send buffer */
77             ptmp = (char *) sbuf;
78             if (rbuf != MPI_IN_PLACE) {
79                 /* local copy to rbuf */
80                 err = ompi_datatype_sndrcv(sbuf, scount, sdtype,
81                                            rbuf, rcount, rdtype);
82                 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
83             }
84         } else {
85             /* root is not on 0, allocate temp buffer for send */
86             tempbuf = (char *) malloc(ssize);
87             if (NULL == tempbuf) {
88                 err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
89             }
90             ptmp = tempbuf - sgap;
91 
92             /* and rotate data so they will eventually in the right place */
93             err = ompi_datatype_copy_content_same_ddt(sdtype, (ptrdiff_t)scount * (ptrdiff_t)(size - root),
94                                                       ptmp, (char *) sbuf + sextent * (ptrdiff_t)root * (ptrdiff_t)scount);
95             if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
96 
97 
98             err = ompi_datatype_copy_content_same_ddt(sdtype, (ptrdiff_t)scount * (ptrdiff_t)root,
99                                                       ptmp + sextent * (ptrdiff_t)scount * (ptrdiff_t)(size - root), (char *)sbuf);
100             if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
101 
102             if (rbuf != MPI_IN_PLACE) {
103                 /* local copy to rbuf */
104                 err = ompi_datatype_sndrcv(ptmp, scount, sdtype,
105                                            rbuf, rcount, rdtype);
106                 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
107             }
108         }
109         total_send = scount;
110     } else if (!(vrank % 2)) {
111         /* non-root, non-leaf nodes, allocte temp buffer for recv
112          * the most we need is rcount*size/2 */
113         tempbuf = (char *) malloc(rsize);
114         if (NULL == tempbuf) {
115             err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
116         }
117         ptmp = tempbuf - rgap;
118 
119         sdtype = rdtype;
120         scount = rcount;
121         sextent = rextent;
122         total_send = scount;
123     }
124 
125     if (!(vrank % 2)) {
126         if (rank != root) {
127             /* recv from parent on non-root */
128             err = MCA_PML_CALL(recv(ptmp, (ptrdiff_t)rcount * (ptrdiff_t)size, rdtype, bmtree->tree_prev,
129                                     MCA_COLL_BASE_TAG_SCATTER, comm, &status));
130             if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
131             /* local copy to rbuf */
132             err = ompi_datatype_sndrcv(ptmp, scount, sdtype,
133                                        rbuf, rcount, rdtype);
134             if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
135         }
136         /* send to children on all non-leaf */
137         for (i = 0; i < bmtree->tree_nextsize; i++) {
138             size_t mycount = 0;
139             int vkid;
140             /* figure out how much data I have to send to this child */
141             vkid = (bmtree->tree_next[i] - root + size) % size;
142             mycount = vkid - vrank;
143             if( (int)mycount > (size - vkid) )
144                 mycount = size - vkid;
145             mycount *= scount;
146 
147             err = MCA_PML_CALL(send(ptmp + (ptrdiff_t)total_send * sextent, mycount, sdtype,
148                                     bmtree->tree_next[i],
149                                     MCA_COLL_BASE_TAG_SCATTER,
150                                     MCA_PML_BASE_SEND_STANDARD, comm));
151             if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
152 
153             total_send += mycount;
154         }
155 
156         if (NULL != tempbuf)
157             free(tempbuf);
158     } else {
159         /* recv from parent on leaf nodes */
160         err = MCA_PML_CALL(recv(ptmp, rcount, rdtype, bmtree->tree_prev,
161                                 MCA_COLL_BASE_TAG_SCATTER, comm, &status));
162         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
163     }
164 
165     return MPI_SUCCESS;
166 
167  err_hndl:
168     if (NULL != tempbuf)
169         free(tempbuf);
170 
171     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
172                  __FILE__, line, err, rank));
173     (void)line;  // silence compiler warning
174     return err;
175 }
176 
177 /*
178  * Linear functions are copied from the BASIC coll module
179  * they do not segment the message and are simple implementations
180  * but for some small number of nodes and/or small data sizes they
181  * are just as fast as base/tree based segmenting operations
182  * and as such may be selected by the decision functions
183  * These are copied into this module due to the way we select modules
184  * in V1. i.e. in V2 we will handle this differently and so will not
185  * have to duplicate code.
186  * JPG following the examples from other coll_base implementations. Dec06.
187  */
188 
189 /* copied function (with appropriate renaming) starts here */
190 /*
191  *	scatter_intra
192  *
193  *	Function:	- basic scatter operation
194  *	Accepts:	- same arguments as MPI_Scatter()
195  *	Returns:	- MPI_SUCCESS or error code
196  */
197 int
ompi_coll_base_scatter_intra_basic_linear(const void * sbuf,int scount,struct ompi_datatype_t * sdtype,void * rbuf,int rcount,struct ompi_datatype_t * rdtype,int root,struct ompi_communicator_t * comm,mca_coll_base_module_t * module)198 ompi_coll_base_scatter_intra_basic_linear(const void *sbuf, int scount,
199                                           struct ompi_datatype_t *sdtype,
200                                           void *rbuf, int rcount,
201                                           struct ompi_datatype_t *rdtype,
202                                           int root,
203                                           struct ompi_communicator_t *comm,
204                                           mca_coll_base_module_t *module)
205 {
206     int i, rank, size, err;
207     ptrdiff_t incr;
208     char *ptmp;
209 
210     /* Initialize */
211 
212     rank = ompi_comm_rank(comm);
213     size = ompi_comm_size(comm);
214 
215     /* If not root, receive data. */
216 
217     if (rank != root) {
218         err = MCA_PML_CALL(recv(rbuf, rcount, rdtype, root,
219                                 MCA_COLL_BASE_TAG_SCATTER,
220                                 comm, MPI_STATUS_IGNORE));
221         return err;
222     }
223 
224     /* I am the root, loop sending data. */
225 
226     err = ompi_datatype_type_extent(sdtype, &incr);
227     if (OMPI_SUCCESS != err) {
228         return OMPI_ERROR;
229     }
230 
231     incr *= scount;
232     for (i = 0, ptmp = (char *) sbuf; i < size; ++i, ptmp += incr) {
233 
234         /* simple optimization */
235 
236         if (i == rank) {
237             if (MPI_IN_PLACE != rbuf) {
238                 err =
239                     ompi_datatype_sndrcv(ptmp, scount, sdtype, rbuf, rcount,
240                                          rdtype);
241             }
242         } else {
243             err = MCA_PML_CALL(send(ptmp, scount, sdtype, i,
244                                     MCA_COLL_BASE_TAG_SCATTER,
245                                     MCA_PML_BASE_SEND_STANDARD, comm));
246         }
247         if (MPI_SUCCESS != err) {
248             return err;
249         }
250     }
251 
252     /* All done */
253 
254     return MPI_SUCCESS;
255 }
256 
257 
258 /* copied function (with appropriate renaming) ends here */
259