1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  */
5 
6 #include "mpiimpl.h"
7 
8 /* Algorithm: Bruck's
9  *
10  * This algorithm is from the IEEE TPDS Nov 97 paper by Jehoshua Bruck
11  * et al.  It is a variant of the disemmination algorithm for barrier.
12  * It takes ceiling(lg p) steps.
13  *
14  * Cost = lgp.alpha + n.((p-1)/p).beta
15  * where n is total size of data gathered on each process.
16  */
MPIR_Iallgather_intra_sched_brucks(const void * sendbuf,int sendcount,MPI_Datatype sendtype,void * recvbuf,int recvcount,MPI_Datatype recvtype,MPIR_Comm * comm_ptr,MPIR_Sched_t s)17 int MPIR_Iallgather_intra_sched_brucks(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
18                                        void *recvbuf, int recvcount, MPI_Datatype recvtype,
19                                        MPIR_Comm * comm_ptr, MPIR_Sched_t s)
20 {
21     int mpi_errno = MPI_SUCCESS;
22     int pof2, curr_cnt, rem, src, dst;
23     int rank, comm_size;
24     MPI_Aint recvtype_extent, recvtype_sz;
25     void *tmp_buf = NULL;
26     MPIR_SCHED_CHKPMEM_DECL(1);
27 
28     comm_size = comm_ptr->local_size;
29     rank = comm_ptr->rank;
30 
31     MPIR_Datatype_get_extent_macro(recvtype, recvtype_extent);
32     /* allocate a temporary buffer of the same size as recvbuf. */
33     MPIR_Datatype_get_size_macro(recvtype, recvtype_sz);
34     MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, recvcount * comm_size * recvtype_sz, mpi_errno,
35                               "tmp_buf", MPL_MEM_BUFFER);
36 
37     /* copy local data to the top of tmp_buf */
38     if (sendbuf != MPI_IN_PLACE) {
39         mpi_errno = MPIR_Sched_copy(sendbuf, sendcount, sendtype, tmp_buf,
40                                     recvcount * recvtype_sz, MPI_BYTE, s);
41         MPIR_ERR_CHECK(mpi_errno);
42         MPIR_SCHED_BARRIER(s);
43     } else {
44         mpi_errno = MPIR_Sched_copy((char *) recvbuf + rank * recvcount * recvtype_extent,
45                                     recvcount, recvtype, tmp_buf, recvcount * recvtype_sz, MPI_BYTE,
46                                     s);
47         MPIR_ERR_CHECK(mpi_errno);
48         MPIR_SCHED_BARRIER(s);
49     }
50 
51     /* do the first \floor(\lg p) steps */
52 
53     curr_cnt = recvcount;
54     pof2 = 1;
55     while (pof2 <= comm_size / 2) {
56         src = (rank + pof2) % comm_size;
57         dst = (rank - pof2 + comm_size) % comm_size;
58 
59         mpi_errno = MPIR_Sched_send(tmp_buf, curr_cnt * recvtype_sz, MPI_BYTE, dst, comm_ptr, s);
60         MPIR_ERR_CHECK(mpi_errno);
61         /* logically sendrecv, so no barrier here */
62         mpi_errno = MPIR_Sched_recv(((char *) tmp_buf + curr_cnt * recvtype_sz),
63                                     curr_cnt * recvtype_sz, MPI_BYTE, src, comm_ptr, s);
64         MPIR_ERR_CHECK(mpi_errno);
65         MPIR_SCHED_BARRIER(s);
66 
67         curr_cnt *= 2;
68         pof2 *= 2;
69     }
70 
71     /* if comm_size is not a power of two, one more step is needed */
72 
73     rem = comm_size - pof2;
74     if (rem) {
75         src = (rank + pof2) % comm_size;
76         dst = (rank - pof2 + comm_size) % comm_size;
77 
78         mpi_errno =
79             MPIR_Sched_send(tmp_buf, rem * recvcount * recvtype_sz, MPI_BYTE, dst, comm_ptr, s);
80         MPIR_ERR_CHECK(mpi_errno);
81         /* logically sendrecv, so no barrier here */
82         mpi_errno = MPIR_Sched_recv((char *) tmp_buf + curr_cnt * recvtype_sz,
83                                     rem * recvcount * recvtype_sz, MPI_BYTE, src, comm_ptr, s);
84         MPIR_ERR_CHECK(mpi_errno);
85         MPIR_SCHED_BARRIER(s);
86     }
87 
88     /* Rotate blocks in tmp_buf down by (rank) blocks and store
89      * result in recvbuf. */
90 
91     mpi_errno = MPIR_Sched_copy(tmp_buf, (comm_size - rank) * recvcount * recvtype_sz, MPI_BYTE,
92                                 ((char *) recvbuf + rank * recvcount * recvtype_extent),
93                                 (comm_size - rank) * recvcount, recvtype, s);
94     MPIR_ERR_CHECK(mpi_errno);
95     MPIR_SCHED_BARRIER(s);
96 
97     if (rank) {
98         mpi_errno =
99             MPIR_Sched_copy((char *) tmp_buf + (comm_size - rank) * recvcount * recvtype_sz,
100                             rank * recvcount * recvtype_sz, MPI_BYTE,
101                             recvbuf, rank * recvcount, recvtype, s);
102         MPIR_ERR_CHECK(mpi_errno);
103     }
104 
105     MPIR_SCHED_CHKPMEM_COMMIT(s);
106   fn_exit:
107     return mpi_errno;
108   fn_fail:
109     MPIR_SCHED_CHKPMEM_REAP(s);
110     goto fn_exit;
111 }
112