1 /*-
2 * Copyright (c) 2012-2017 Ilya Kaliman
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #ifdef EFP_USE_MPI
28 #include <mpi.h>
29 #endif
30
31 #ifdef _OPENMP
32 #include <omp.h>
33 #endif
34
35 #include "balance.h"
36 #include "private.h"
37
38 #ifdef EFP_USE_MPI
39 struct master {
40 int total, range[2];
41 };
42
43 #define MPI_CHUNK_SIZE 16
44
45 static int
master_get_work(struct master * master,int range[2])46 master_get_work(struct master *master, int range[2])
47 {
48 #ifdef _OPENMP
49 #pragma omp critical
50 #endif
51 {
52 master->range[0] = master->range[1];
53 master->range[1] += MPI_CHUNK_SIZE;
54
55 if (master->range[1] > master->total)
56 master->range[1] = master->total;
57
58 range[0] = master->range[0];
59 range[1] = master->range[1];
60 }
61 return range[1] > range[0];
62 }
63
64 static void
master_on_master(struct master * master)65 master_on_master(struct master *master)
66 {
67 MPI_Status status;
68 int size, range[2];
69
70 MPI_Comm_size(MPI_COMM_WORLD, &size);
71
72 while (master_get_work(master, range)) {
73 MPI_Recv(NULL, 0, MPI_INT, MPI_ANY_SOURCE, 0,
74 MPI_COMM_WORLD, &status);
75 MPI_Send(range, 2, MPI_INT, status.MPI_SOURCE, 0,
76 MPI_COMM_WORLD);
77 }
78
79 range[0] = range[1] = -1;
80
81 for (int i = 1; i < size; i++) {
82 MPI_Recv(NULL, 0, MPI_INT, MPI_ANY_SOURCE, 0,
83 MPI_COMM_WORLD, &status);
84 MPI_Send(range, 2, MPI_INT, status.MPI_SOURCE, 0,
85 MPI_COMM_WORLD);
86 }
87 }
88
89 static void
slave_on_master(struct master * master,struct efp * efp,work_fn fn,void * data)90 slave_on_master(struct master *master, struct efp *efp, work_fn fn, void *data)
91 {
92 int range[2];
93
94 while (master_get_work(master, range))
95 fn(efp, range[0], range[1], data);
96 }
97
98 #ifndef _OPENMP
99 static int
omp_get_thread_num(void)100 omp_get_thread_num(void)
101 {
102 return 0;
103 }
104 #endif /* _OPENMP */
105
106 static void
do_master(struct efp * efp,work_fn fn,void * data)107 do_master(struct efp *efp, work_fn fn, void *data)
108 {
109 struct master master;
110
111 master.total = (int)efp->n_frag;
112 master.range[0] = master.range[1] = 0;
113
114 #ifdef _OPENMP
115 #pragma omp parallel
116 #endif
117 {
118 if (omp_get_thread_num() == 0)
119 master_on_master(&master);
120 else
121 slave_on_master(&master, efp, fn, data);
122 }
123 }
124
125 static void
do_slave(struct efp * efp,work_fn fn,void * data)126 do_slave(struct efp *efp, work_fn fn, void *data)
127 {
128 int range[2];
129
130 for (;;) {
131 MPI_Send(NULL, 0, MPI_INT, 0, 0, MPI_COMM_WORLD);
132 MPI_Recv(range, 2, MPI_INT, 0, 0, MPI_COMM_WORLD,
133 MPI_STATUS_IGNORE);
134
135 if (range[0] == -1 ||
136 range[1] == -1)
137 break;
138
139 fn(efp, range[0], range[1], data);
140 }
141 }
142 #endif /* EFP_USE_MPI */
143
144 void
efp_allreduce(double * x,size_t n)145 efp_allreduce(double *x, size_t n)
146 {
147 #ifdef EFP_USE_MPI
148 MPI_Allreduce(MPI_IN_PLACE, x, (int)n, MPI_DOUBLE,
149 MPI_SUM, MPI_COMM_WORLD);
150 #else
151 (void)x;
152 (void)n;
153 #endif
154 }
155
156 void
efp_balance_work(struct efp * efp,work_fn fn,void * data)157 efp_balance_work(struct efp *efp, work_fn fn, void *data)
158 {
159 #ifdef EFP_USE_MPI
160 int rank, size;
161
162 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
163 MPI_Comm_size(MPI_COMM_WORLD, &size);
164
165 if (size == 1)
166 fn(efp, 0, efp->n_frag, data);
167 else {
168 MPI_Barrier(MPI_COMM_WORLD);
169
170 if (rank == 0)
171 do_master(efp, fn, data);
172 else
173 do_slave(efp, fn, data);
174
175 MPI_Barrier(MPI_COMM_WORLD);
176 }
177 #else
178 fn(efp, 0, efp->n_frag, data);
179 #endif
180 }
181