1 /*
2 * Copyright (C) by Argonne National Laboratory
3 * See COPYRIGHT in top-level directory
4 */
5
6 #include "mpiimpl.h"
7
8 /* Algorithm: Bruck's
9 *
10 * This algorithm is from the IEEE TPDS Nov 97 paper by Jehoshua Bruck
11 * et al. It is a variant of the disemmination algorithm for barrier.
12 * It takes ceiling(lg p) steps.
13 *
14 * Cost = lgp.alpha + n.((p-1)/p).beta
15 * where n is total size of data gathered on each process.
16 */
MPIR_Iallgather_intra_sched_brucks(const void * sendbuf,int sendcount,MPI_Datatype sendtype,void * recvbuf,int recvcount,MPI_Datatype recvtype,MPIR_Comm * comm_ptr,MPIR_Sched_t s)17 int MPIR_Iallgather_intra_sched_brucks(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
18 void *recvbuf, int recvcount, MPI_Datatype recvtype,
19 MPIR_Comm * comm_ptr, MPIR_Sched_t s)
20 {
21 int mpi_errno = MPI_SUCCESS;
22 int pof2, curr_cnt, rem, src, dst;
23 int rank, comm_size;
24 MPI_Aint recvtype_extent, recvtype_sz;
25 void *tmp_buf = NULL;
26 MPIR_SCHED_CHKPMEM_DECL(1);
27
28 comm_size = comm_ptr->local_size;
29 rank = comm_ptr->rank;
30
31 MPIR_Datatype_get_extent_macro(recvtype, recvtype_extent);
32 /* allocate a temporary buffer of the same size as recvbuf. */
33 MPIR_Datatype_get_size_macro(recvtype, recvtype_sz);
34 MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, recvcount * comm_size * recvtype_sz, mpi_errno,
35 "tmp_buf", MPL_MEM_BUFFER);
36
37 /* copy local data to the top of tmp_buf */
38 if (sendbuf != MPI_IN_PLACE) {
39 mpi_errno = MPIR_Sched_copy(sendbuf, sendcount, sendtype, tmp_buf,
40 recvcount * recvtype_sz, MPI_BYTE, s);
41 MPIR_ERR_CHECK(mpi_errno);
42 MPIR_SCHED_BARRIER(s);
43 } else {
44 mpi_errno = MPIR_Sched_copy((char *) recvbuf + rank * recvcount * recvtype_extent,
45 recvcount, recvtype, tmp_buf, recvcount * recvtype_sz, MPI_BYTE,
46 s);
47 MPIR_ERR_CHECK(mpi_errno);
48 MPIR_SCHED_BARRIER(s);
49 }
50
51 /* do the first \floor(\lg p) steps */
52
53 curr_cnt = recvcount;
54 pof2 = 1;
55 while (pof2 <= comm_size / 2) {
56 src = (rank + pof2) % comm_size;
57 dst = (rank - pof2 + comm_size) % comm_size;
58
59 mpi_errno = MPIR_Sched_send(tmp_buf, curr_cnt * recvtype_sz, MPI_BYTE, dst, comm_ptr, s);
60 MPIR_ERR_CHECK(mpi_errno);
61 /* logically sendrecv, so no barrier here */
62 mpi_errno = MPIR_Sched_recv(((char *) tmp_buf + curr_cnt * recvtype_sz),
63 curr_cnt * recvtype_sz, MPI_BYTE, src, comm_ptr, s);
64 MPIR_ERR_CHECK(mpi_errno);
65 MPIR_SCHED_BARRIER(s);
66
67 curr_cnt *= 2;
68 pof2 *= 2;
69 }
70
71 /* if comm_size is not a power of two, one more step is needed */
72
73 rem = comm_size - pof2;
74 if (rem) {
75 src = (rank + pof2) % comm_size;
76 dst = (rank - pof2 + comm_size) % comm_size;
77
78 mpi_errno =
79 MPIR_Sched_send(tmp_buf, rem * recvcount * recvtype_sz, MPI_BYTE, dst, comm_ptr, s);
80 MPIR_ERR_CHECK(mpi_errno);
81 /* logically sendrecv, so no barrier here */
82 mpi_errno = MPIR_Sched_recv((char *) tmp_buf + curr_cnt * recvtype_sz,
83 rem * recvcount * recvtype_sz, MPI_BYTE, src, comm_ptr, s);
84 MPIR_ERR_CHECK(mpi_errno);
85 MPIR_SCHED_BARRIER(s);
86 }
87
88 /* Rotate blocks in tmp_buf down by (rank) blocks and store
89 * result in recvbuf. */
90
91 mpi_errno = MPIR_Sched_copy(tmp_buf, (comm_size - rank) * recvcount * recvtype_sz, MPI_BYTE,
92 ((char *) recvbuf + rank * recvcount * recvtype_extent),
93 (comm_size - rank) * recvcount, recvtype, s);
94 MPIR_ERR_CHECK(mpi_errno);
95 MPIR_SCHED_BARRIER(s);
96
97 if (rank) {
98 mpi_errno =
99 MPIR_Sched_copy((char *) tmp_buf + (comm_size - rank) * recvcount * recvtype_sz,
100 rank * recvcount * recvtype_sz, MPI_BYTE,
101 recvbuf, rank * recvcount, recvtype, s);
102 MPIR_ERR_CHECK(mpi_errno);
103 }
104
105 MPIR_SCHED_CHKPMEM_COMMIT(s);
106 fn_exit:
107 return mpi_errno;
108 fn_fail:
109 MPIR_SCHED_CHKPMEM_REAP(s);
110 goto fn_exit;
111 }
112