1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 #include "_hypre_parcsr_mv.h"
9
10 /*==========================================================================*/
11
12 #ifdef HYPRE_USING_PERSISTENT_COMM
getJobTypeOf(HYPRE_Int job)13 static CommPkgJobType getJobTypeOf(HYPRE_Int job)
14 {
15 CommPkgJobType job_type = HYPRE_COMM_PKG_JOB_COMPLEX;
16 switch (job)
17 {
18 case 1:
19 job_type = HYPRE_COMM_PKG_JOB_COMPLEX;
20 break;
21 case 2:
22 job_type = HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE;
23 break;
24 case 11:
25 job_type = HYPRE_COMM_PKG_JOB_INT;
26 break;
27 case 12:
28 job_type = HYPRE_COMM_PKG_JOB_INT_TRANSPOSE;
29 break;
30 case 21:
31 job_type = HYPRE_COMM_PKG_JOB_BIGINT;
32 break;
33 case 22:
34 job_type = HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE;
35 break;
36 } // switch (job)
37
38 return job_type;
39 }
40
41 /**
42 * When send_data and recv_data are NULL, buffers are internally allocated
43 * and CommHandle owns the buffer
44 */
45 hypre_ParCSRPersistentCommHandle*
hypre_ParCSRPersistentCommHandleCreate(HYPRE_Int job,hypre_ParCSRCommPkg * comm_pkg)46 hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg )
47 {
48 HYPRE_Int i;
49 size_t num_bytes_send, num_bytes_recv;
50
51 hypre_ParCSRPersistentCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRPersistentCommHandle, 1, HYPRE_MEMORY_HOST);
52
53 CommPkgJobType job_type = getJobTypeOf(job);
54
55 HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
56 HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
57 MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg);
58
59 HYPRE_Int num_requests = num_sends + num_recvs;
60 hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST);
61
62 hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests;
63 hypre_ParCSRCommHandleRequests(comm_handle) = requests;
64
65 void *send_buff = NULL, *recv_buff = NULL;
66
67 switch (job_type)
68 {
69 case HYPRE_COMM_PKG_JOB_COMPLEX:
70 num_bytes_send = sizeof(HYPRE_Complex) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends);
71 num_bytes_recv = sizeof(HYPRE_Complex) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs);
72 send_buff = hypre_TAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
73 recv_buff = hypre_TAlloc(HYPRE_Complex, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), HYPRE_MEMORY_HOST);
74 for (i = 0; i < num_recvs; ++i)
75 {
76 HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
77 HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
78 HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start;
79 hypre_MPI_Recv_init( (HYPRE_Complex *)recv_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX,
80 ip, 0, comm, requests + i );
81 }
82 for (i = 0; i < num_sends; ++i)
83 {
84 HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
85 HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
86 HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start;
87 hypre_MPI_Send_init( (HYPRE_Complex *)send_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX,
88 ip, 0, comm, requests + num_recvs + i );
89 }
90 break;
91
92 case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE:
93 num_bytes_recv = sizeof(HYPRE_Complex) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends);
94 num_bytes_send = sizeof(HYPRE_Complex) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs);
95 recv_buff = hypre_TAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
96 send_buff = hypre_TAlloc(HYPRE_Complex, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), HYPRE_MEMORY_HOST);
97 for (i = 0; i < num_sends; ++i)
98 {
99 HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
100 HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
101 HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start;
102 hypre_MPI_Recv_init( (HYPRE_Complex *)recv_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX,
103 ip, 0, comm, requests + i );
104 }
105 for (i = 0; i < num_recvs; ++i)
106 {
107 HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
108 HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
109 HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start;
110 hypre_MPI_Send_init( (HYPRE_Complex *)send_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX,
111 ip, 0, comm, requests + num_sends + i );
112 }
113 break;
114
115 case HYPRE_COMM_PKG_JOB_INT:
116 num_bytes_send = sizeof(HYPRE_Int) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends);
117 num_bytes_recv = sizeof(HYPRE_Int) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs);
118 send_buff = hypre_TAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
119 recv_buff = hypre_TAlloc(HYPRE_Int, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), HYPRE_MEMORY_HOST);
120 for (i = 0; i < num_recvs; ++i)
121 {
122 HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
123 HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
124 HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start;
125 hypre_MPI_Recv_init( (HYPRE_Int *)recv_buff + vec_start, vec_len, HYPRE_MPI_INT,
126 ip, 0, comm, requests + i );
127 }
128 for (i = 0; i < num_sends; ++i)
129 {
130 HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
131 HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
132 HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start;
133 hypre_MPI_Send_init( (HYPRE_Int *)send_buff + vec_start, vec_len, HYPRE_MPI_INT,
134 ip, 0, comm, requests + num_recvs + i );
135 }
136 break;
137
138 case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE:
139 num_bytes_recv = sizeof(HYPRE_Int) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends);
140 num_bytes_send = sizeof(HYPRE_Int) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs);
141 recv_buff = hypre_TAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
142 send_buff = hypre_TAlloc(HYPRE_Int, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), HYPRE_MEMORY_HOST);
143 for (i = 0; i < num_sends; ++i)
144 {
145 HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
146 HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
147 HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start;
148 hypre_MPI_Recv_init( (HYPRE_Int *)recv_buff + vec_start, vec_len, HYPRE_MPI_INT,
149 ip, 0, comm, requests + i );
150 }
151 for (i = 0; i < num_recvs; ++i)
152 {
153 HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
154 HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
155 HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start;
156 hypre_MPI_Send_init( (HYPRE_Int *)send_buff + vec_start, vec_len, HYPRE_MPI_INT,
157 ip, 0, comm, requests + num_sends + i );
158 }
159 break;
160
161 case HYPRE_COMM_PKG_JOB_BIGINT:
162 num_bytes_send = sizeof(HYPRE_BigInt) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends);
163 num_bytes_recv = sizeof(HYPRE_BigInt) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs);
164 send_buff = hypre_TAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
165 recv_buff = hypre_TAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), HYPRE_MEMORY_HOST);
166 for (i = 0; i < num_recvs; ++i)
167 {
168 HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
169 HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
170 HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start;
171 hypre_MPI_Recv_init( (HYPRE_BigInt *)recv_buff + (HYPRE_BigInt)vec_start, vec_len, HYPRE_MPI_BIG_INT,
172 ip, 0, comm, requests + i );
173 }
174 for (i = 0; i < num_sends; ++i)
175 {
176 HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
177 HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
178 HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start;
179 hypre_MPI_Send_init( (HYPRE_BigInt *)send_buff + (HYPRE_BigInt)vec_start, vec_len, HYPRE_MPI_BIG_INT,
180 ip, 0, comm, requests + num_recvs + i);
181 }
182 break;
183
184 case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE:
185 num_bytes_recv = sizeof(HYPRE_BigInt) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends);
186 num_bytes_send = sizeof(HYPRE_BigInt) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs);
187 recv_buff = hypre_TAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
188 send_buff = hypre_TAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), HYPRE_MEMORY_HOST);
189 for (i = 0; i < num_sends; ++i)
190 {
191 HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
192 HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
193 HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start;
194 hypre_MPI_Recv_init( (HYPRE_BigInt *)recv_buff + (HYPRE_BigInt)vec_start, vec_len, HYPRE_MPI_BIG_INT,
195 ip, 0, comm, requests + i );
196 }
197 for (i = 0; i < num_recvs; ++i)
198 {
199 HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
200 HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
201 HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start;
202
203 hypre_MPI_Send_init( (HYPRE_BigInt *)send_buff + (HYPRE_BigInt)vec_start, vec_len, HYPRE_MPI_BIG_INT,
204 ip, 0, comm, requests + num_sends + i);
205 }
206 break;
207 default:
208 hypre_assert(1 == 0);
209 break;
210 } // switch (job_type)
211
212 hypre_ParCSRCommHandleRecvDataBuffer(comm_handle) = recv_buff;
213 hypre_ParCSRCommHandleSendDataBuffer(comm_handle) = send_buff;
214 hypre_ParCSRCommHandleNumSendBytes(comm_handle) = num_bytes_send;
215 hypre_ParCSRCommHandleNumRecvBytes(comm_handle) = num_bytes_recv;
216
217 return ( comm_handle );
218 }
219
220
221 hypre_ParCSRPersistentCommHandle*
hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_Int job,hypre_ParCSRCommPkg * comm_pkg)222 hypre_ParCSRCommPkgGetPersistentCommHandle( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg )
223 {
224 CommPkgJobType type = getJobTypeOf(job);
225 if (!comm_pkg->persistent_comm_handles[type])
226 {
227 /* data is owned by persistent comm handle */
228 comm_pkg->persistent_comm_handles[type] = hypre_ParCSRPersistentCommHandleCreate(job, comm_pkg);
229 }
230
231 return comm_pkg->persistent_comm_handles[type];
232 }
233
234 void
hypre_ParCSRPersistentCommHandleDestroy(hypre_ParCSRPersistentCommHandle * comm_handle)235 hypre_ParCSRPersistentCommHandleDestroy( hypre_ParCSRPersistentCommHandle *comm_handle )
236 {
237 hypre_TFree(hypre_ParCSRCommHandleSendDataBuffer(comm_handle), HYPRE_MEMORY_HOST);
238 hypre_TFree(hypre_ParCSRCommHandleRecvDataBuffer(comm_handle), HYPRE_MEMORY_HOST);
239 hypre_TFree(comm_handle->requests, HYPRE_MEMORY_HOST);
240 hypre_TFree(comm_handle, HYPRE_MEMORY_HOST);
241 }
242
hypre_ParCSRPersistentCommHandleStart(hypre_ParCSRPersistentCommHandle * comm_handle,HYPRE_MemoryLocation send_memory_location,void * send_data)243 void hypre_ParCSRPersistentCommHandleStart( hypre_ParCSRPersistentCommHandle *comm_handle,
244 HYPRE_MemoryLocation send_memory_location,
245 void *send_data )
246 {
247 hypre_ParCSRCommHandleSendData(comm_handle) = send_data;
248 hypre_ParCSRCommHandleSendMemoryLocation(comm_handle) = send_memory_location;
249
250 if (hypre_ParCSRCommHandleNumRequests(comm_handle) > 0)
251 {
252 hypre_TMemcpy( hypre_ParCSRCommHandleSendDataBuffer(comm_handle),
253 send_data,
254 char,
255 hypre_ParCSRCommHandleNumSendBytes(comm_handle),
256 HYPRE_MEMORY_HOST,
257 send_memory_location );
258
259 HYPRE_Int ret = hypre_MPI_Startall(hypre_ParCSRCommHandleNumRequests(comm_handle),
260 hypre_ParCSRCommHandleRequests(comm_handle));
261 if (hypre_MPI_SUCCESS != ret)
262 {
263 hypre_error_w_msg(HYPRE_ERROR_GENERIC,"MPI error\n");
264 /*hypre_printf("MPI error %d in %s (%s, line %u)\n", ret, __FUNCTION__, __FILE__, __LINE__);*/
265 }
266 }
267 }
268
hypre_ParCSRPersistentCommHandleWait(hypre_ParCSRPersistentCommHandle * comm_handle,HYPRE_MemoryLocation recv_memory_location,void * recv_data)269 void hypre_ParCSRPersistentCommHandleWait( hypre_ParCSRPersistentCommHandle *comm_handle,
270 HYPRE_MemoryLocation recv_memory_location,
271 void *recv_data )
272 {
273 hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data;
274 hypre_ParCSRCommHandleRecvMemoryLocation(comm_handle) = recv_memory_location;
275
276 if (hypre_ParCSRCommHandleNumRequests(comm_handle) > 0)
277 {
278 HYPRE_Int ret = hypre_MPI_Waitall(hypre_ParCSRCommHandleNumRequests(comm_handle),
279 hypre_ParCSRCommHandleRequests(comm_handle),
280 hypre_MPI_STATUSES_IGNORE);
281 if (hypre_MPI_SUCCESS != ret)
282 {
283 hypre_error_w_msg(HYPRE_ERROR_GENERIC,"MPI error\n");
284 /*hypre_printf("MPI error %d in %s (%s, line %u)\n", ret, __FUNCTION__, __FILE__, __LINE__);*/
285 }
286
287 hypre_TMemcpy(recv_data,
288 hypre_ParCSRCommHandleRecvDataBuffer(comm_handle),
289 char,
290 hypre_ParCSRCommHandleNumRecvBytes(comm_handle),
291 recv_memory_location,
292 HYPRE_MEMORY_HOST);
293 }
294 }
295 #endif // HYPRE_USING_PERSISTENT_COMM
296
297 hypre_ParCSRCommHandle*
hypre_ParCSRCommHandleCreate(HYPRE_Int job,hypre_ParCSRCommPkg * comm_pkg,void * send_data,void * recv_data)298 hypre_ParCSRCommHandleCreate ( HYPRE_Int job,
299 hypre_ParCSRCommPkg *comm_pkg,
300 void *send_data,
301 void *recv_data )
302 {
303 return hypre_ParCSRCommHandleCreate_v2(job, comm_pkg, HYPRE_MEMORY_HOST, send_data,
304 HYPRE_MEMORY_HOST, recv_data);
305 }
306
307 hypre_ParCSRCommHandle*
hypre_ParCSRCommHandleCreate_v2(HYPRE_Int job,hypre_ParCSRCommPkg * comm_pkg,HYPRE_MemoryLocation send_memory_location,void * send_data_in,HYPRE_MemoryLocation recv_memory_location,void * recv_data_in)308 hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job,
309 hypre_ParCSRCommPkg *comm_pkg,
310 HYPRE_MemoryLocation send_memory_location,
311 void *send_data_in,
312 HYPRE_MemoryLocation recv_memory_location,
313 void *recv_data_in )
314 {
315 HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
316 HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
317 MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg);
318 HYPRE_Int num_send_bytes = 0;
319 HYPRE_Int num_recv_bytes = 0;
320 hypre_ParCSRCommHandle *comm_handle;
321 HYPRE_Int num_requests;
322 hypre_MPI_Request *requests;
323 HYPRE_Int i, j;
324 HYPRE_Int my_id, num_procs;
325 HYPRE_Int ip, vec_start, vec_len;
326 void *send_data;
327 void *recv_data;
328
329 /*--------------------------------------------------------------------
330 * hypre_Initialize sets up a communication handle,
331 * posts receives and initiates sends. It always requires num_sends,
332 * num_recvs, recv_procs and send_procs to be set in comm_pkg.
333 * There are different options for job:
334 * job = 1 : is used to initialize communication exchange for the parts
335 * of vector needed to perform a Matvec, it requires send_data
336 * and recv_data to be doubles, recv_vec_starts and
337 * send_map_starts need to be set in comm_pkg.
338 * job = 2 : is used to initialize communication exchange for the parts
339 * of vector needed to perform a MatvecT, it requires send_data
340 * and recv_data to be doubles, recv_vec_starts and
341 * send_map_starts need to be set in comm_pkg.
342 * job = 11: similar to job = 1, but exchanges data of type HYPRE_Int (not HYPRE_Complex),
343 * requires send_data and recv_data to be ints
344 * recv_vec_starts and send_map_starts need to be set in comm_pkg.
345 * job = 12: similar to job = 2, but exchanges data of type HYPRE_Int (not HYPRE_Complex),
346 * requires send_data and recv_data to be ints
347 * recv_vec_starts and send_map_starts need to be set in comm_pkg.
348 * job = 21: similar to job = 1, but exchanges data of type HYPRE_BigInt (not HYPRE_Complex),
349 * requires send_data and recv_data to be ints
350 * recv_vec_starts and send_map_starts need to be set in comm_pkg.
351 * job = 22: similar to job = 2, but exchanges data of type HYPRE_BigInt (not HYPRE_Complex),
352 * requires send_data and recv_data to be ints
353 * recv_vec_starts and send_map_starts need to be set in comm_pkg.
354 * default: ignores send_data and recv_data, requires send_mpi_types
355 * and recv_mpi_types to be set in comm_pkg.
356 * datatypes need to point to absolute
357 * addresses, e.g. generated using hypre_MPI_Address .
358 *--------------------------------------------------------------------*/
359 #ifndef HYPRE_WITH_GPU_AWARE_MPI
360 switch (job)
361 {
362 case 1:
363 num_send_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_Complex);
364 num_recv_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_Complex);
365 break;
366 case 2:
367 num_send_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_Complex);
368 num_recv_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_Complex);
369 break;
370 case 11:
371 num_send_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_Int);
372 num_recv_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_Int);
373 break;
374 case 12:
375 num_send_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_Int);
376 num_recv_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_Int);
377 break;
378 case 21:
379 num_send_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_BigInt);
380 num_recv_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_BigInt);
381 break;
382 case 22:
383 num_send_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_BigInt);
384 num_recv_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_BigInt);
385 break;
386 }
387
388 hypre_MemoryLocation act_send_memory_location = hypre_GetActualMemLocation(send_memory_location);
389
390 if ( act_send_memory_location == hypre_MEMORY_DEVICE || act_send_memory_location == hypre_MEMORY_UNIFIED )
391 {
392 //send_data = _hypre_TAlloc(char, num_send_bytes, hypre_MEMORY_HOST_PINNED);
393 send_data = hypre_TAlloc(char, num_send_bytes, HYPRE_MEMORY_HOST);
394 hypre_TMemcpy(send_data, send_data_in, char, num_send_bytes, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE);
395 }
396 else
397 {
398 send_data = send_data_in;
399 }
400
401 hypre_MemoryLocation act_recv_memory_location = hypre_GetActualMemLocation(recv_memory_location);
402
403 if ( act_recv_memory_location == hypre_MEMORY_DEVICE || act_recv_memory_location == hypre_MEMORY_UNIFIED )
404 {
405 //recv_data = hypre_TAlloc(char, num_recv_bytes, hypre_MEMORY_HOST_PINNED);
406 recv_data = hypre_TAlloc(char, num_recv_bytes, HYPRE_MEMORY_HOST);
407 }
408 else
409 {
410 recv_data = recv_data_in;
411 }
412 #else /* #ifndef HYPRE_WITH_GPU_AWARE_MPI */
413 send_data = send_data_in;
414 recv_data = recv_data_in;
415 // TODO RL: it seems that we need to sync the CUDA stream before doing GPU-GPU MPI.
416 // Need to check MPI documentation whether this is acutally true
417 hypre_SyncCudaComputeStream(hypre_handle());
418 #endif
419
420 num_requests = num_sends + num_recvs;
421 requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST);
422
423 hypre_MPI_Comm_size(comm, &num_procs);
424 hypre_MPI_Comm_rank(comm, &my_id);
425
426 j = 0;
427 switch (job)
428 {
429 case 1:
430 {
431 HYPRE_Complex *d_send_data = (HYPRE_Complex *) send_data;
432 HYPRE_Complex *d_recv_data = (HYPRE_Complex *) recv_data;
433 for (i = 0; i < num_recvs; i++)
434 {
435 ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
436 vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i);
437 vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start;
438 hypre_MPI_Irecv(&d_recv_data[vec_start], vec_len, HYPRE_MPI_COMPLEX,
439 ip, 0, comm, &requests[j++]);
440 }
441 for (i = 0; i < num_sends; i++)
442 {
443 ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
444 vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
445 vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1)-vec_start;
446 hypre_MPI_Isend(&d_send_data[vec_start], vec_len, HYPRE_MPI_COMPLEX,
447 ip, 0, comm, &requests[j++]);
448 }
449 break;
450 }
451 case 2:
452 {
453 HYPRE_Complex *d_send_data = (HYPRE_Complex *) send_data;
454 HYPRE_Complex *d_recv_data = (HYPRE_Complex *) recv_data;
455 for (i = 0; i < num_sends; i++)
456 {
457 ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
458 vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
459 vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1) - vec_start;
460 hypre_MPI_Irecv(&d_recv_data[vec_start], vec_len, HYPRE_MPI_COMPLEX,
461 ip, 0, comm, &requests[j++]);
462 }
463 for (i = 0; i < num_recvs; i++)
464 {
465 ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
466 vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i);
467 vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start;
468 hypre_MPI_Isend(&d_send_data[vec_start], vec_len, HYPRE_MPI_COMPLEX,
469 ip, 0, comm, &requests[j++]);
470 }
471 break;
472 }
473 case 11:
474 {
475 HYPRE_Int *i_send_data = (HYPRE_Int *) send_data;
476 HYPRE_Int *i_recv_data = (HYPRE_Int *) recv_data;
477 for (i = 0; i < num_recvs; i++)
478 {
479 ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
480 vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i);
481 vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start;
482 hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_INT,
483 ip, 0, comm, &requests[j++]);
484 }
485 for (i = 0; i < num_sends; i++)
486 {
487 ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
488 vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
489 vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1)-vec_start;
490 hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_INT,
491 ip, 0, comm, &requests[j++]);
492 }
493 break;
494 }
495 case 12:
496 {
497 HYPRE_Int *i_send_data = (HYPRE_Int *) send_data;
498 HYPRE_Int *i_recv_data = (HYPRE_Int *) recv_data;
499 for (i = 0; i < num_sends; i++)
500 {
501 ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
502 vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
503 vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1) - vec_start;
504 hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_INT,
505 ip, 0, comm, &requests[j++]);
506 }
507 for (i = 0; i < num_recvs; i++)
508 {
509 ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
510 vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i);
511 vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start;
512 hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_INT,
513 ip, 0, comm, &requests[j++]);
514 }
515 break;
516 }
517 case 21:
518 {
519 HYPRE_BigInt *i_send_data = (HYPRE_BigInt *) send_data;
520 HYPRE_BigInt *i_recv_data = (HYPRE_BigInt *) recv_data;
521 for (i = 0; i < num_recvs; i++)
522 {
523 ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
524 vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i);
525 vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start;
526 hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_BIG_INT,
527 ip, 0, comm, &requests[j++]);
528 }
529 for (i = 0; i < num_sends; i++)
530 {
531 vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
532 vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1)-vec_start;
533 ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
534 hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_BIG_INT,
535 ip, 0, comm, &requests[j++]);
536 }
537 break;
538 }
539 case 22:
540 {
541 HYPRE_BigInt *i_send_data = (HYPRE_BigInt *) send_data;
542 HYPRE_BigInt *i_recv_data = (HYPRE_BigInt *) recv_data;
543 for (i = 0; i < num_sends; i++)
544 {
545 vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
546 vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1) - vec_start;
547 ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
548 hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_BIG_INT,
549 ip, 0, comm, &requests[j++]);
550 }
551 for (i = 0; i < num_recvs; i++)
552 {
553 ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
554 vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i);
555 vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start;
556 hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_BIG_INT,
557 ip, 0, comm, &requests[j++]);
558 }
559 break;
560 }
561 }
562 /*--------------------------------------------------------------------
563 * set up comm_handle and return
564 *--------------------------------------------------------------------*/
565
566 comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST);
567
568 hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg;
569 hypre_ParCSRCommHandleSendMemoryLocation(comm_handle) = send_memory_location;
570 hypre_ParCSRCommHandleRecvMemoryLocation(comm_handle) = recv_memory_location;
571 hypre_ParCSRCommHandleNumSendBytes(comm_handle) = num_send_bytes;
572 hypre_ParCSRCommHandleNumRecvBytes(comm_handle) = num_recv_bytes;
573 hypre_ParCSRCommHandleSendData(comm_handle) = send_data_in;
574 hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data_in;
575 hypre_ParCSRCommHandleSendDataBuffer(comm_handle) = send_data;
576 hypre_ParCSRCommHandleRecvDataBuffer(comm_handle) = recv_data;
577 hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests;
578 hypre_ParCSRCommHandleRequests(comm_handle) = requests;
579
580 return ( comm_handle );
581 }
582
583 HYPRE_Int
hypre_ParCSRCommHandleDestroy(hypre_ParCSRCommHandle * comm_handle)584 hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle )
585 {
586 if ( comm_handle == NULL )
587 {
588 return hypre_error_flag;
589 }
590
591 if (hypre_ParCSRCommHandleNumRequests(comm_handle))
592 {
593 hypre_MPI_Status *status0;
594 status0 = hypre_CTAlloc(hypre_MPI_Status,
595 hypre_ParCSRCommHandleNumRequests(comm_handle), HYPRE_MEMORY_HOST);
596 hypre_MPI_Waitall(hypre_ParCSRCommHandleNumRequests(comm_handle),
597 hypre_ParCSRCommHandleRequests(comm_handle), status0);
598 hypre_TFree(status0, HYPRE_MEMORY_HOST);
599 }
600
601 #ifndef HYPRE_WITH_GPU_AWARE_MPI
602 hypre_MemoryLocation act_send_memory_location = hypre_GetActualMemLocation(hypre_ParCSRCommHandleSendMemoryLocation(comm_handle));
603 if ( act_send_memory_location == hypre_MEMORY_DEVICE || act_send_memory_location == hypre_MEMORY_UNIFIED )
604 {
605 //hypre_HostPinnedFree(hypre_ParCSRCommHandleSendDataBuffer(comm_handle));
606 hypre_TFree(hypre_ParCSRCommHandleSendDataBuffer(comm_handle), HYPRE_MEMORY_HOST);
607 }
608
609 hypre_MemoryLocation act_recv_memory_location = hypre_GetActualMemLocation(hypre_ParCSRCommHandleRecvMemoryLocation(comm_handle));
610 if ( act_recv_memory_location == hypre_MEMORY_DEVICE || act_recv_memory_location == hypre_MEMORY_UNIFIED )
611 {
612 hypre_TMemcpy( hypre_ParCSRCommHandleRecvData(comm_handle),
613 hypre_ParCSRCommHandleRecvDataBuffer(comm_handle),
614 char,
615 hypre_ParCSRCommHandleNumRecvBytes(comm_handle),
616 HYPRE_MEMORY_DEVICE,
617 HYPRE_MEMORY_HOST );
618
619 //hypre_HostPinnedFree(hypre_ParCSRCommHandleRecvDataBuffer(comm_handle));
620 hypre_TFree(hypre_ParCSRCommHandleRecvDataBuffer(comm_handle), HYPRE_MEMORY_HOST);
621 }
622 #endif
623
624 hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST);
625 hypre_TFree(comm_handle, HYPRE_MEMORY_HOST);
626
627 return hypre_error_flag;
628 }
629
630
631 /* hypre_MatCommPkgCreate_core does all the communications and computations for
632 hypre_MatCommPkgCreate ( hypre_ParCSRMatrix *A) and
633 hypre_BoolMatCommPkgCreate ( hypre_ParCSRBooleanMatrix *A) To support both
634 data types, it has hardly any data structures other than HYPRE_Int*.
635 */
636
637 void
hypre_ParCSRCommPkgCreate_core(MPI_Comm comm,HYPRE_BigInt * col_map_offd,HYPRE_BigInt first_col_diag,HYPRE_BigInt * col_starts,HYPRE_Int num_cols_diag,HYPRE_Int num_cols_offd,HYPRE_Int * p_num_recvs,HYPRE_Int ** p_recv_procs,HYPRE_Int ** p_recv_vec_starts,HYPRE_Int * p_num_sends,HYPRE_Int ** p_send_procs,HYPRE_Int ** p_send_map_starts,HYPRE_Int ** p_send_map_elmts)638 hypre_ParCSRCommPkgCreate_core(
639 /* input args: */
640 MPI_Comm comm,
641 HYPRE_BigInt *col_map_offd,
642 HYPRE_BigInt first_col_diag,
643 HYPRE_BigInt *col_starts,
644 HYPRE_Int num_cols_diag,
645 HYPRE_Int num_cols_offd,
646 /* pointers to output args: */
647 HYPRE_Int *p_num_recvs,
648 HYPRE_Int **p_recv_procs,
649 HYPRE_Int **p_recv_vec_starts,
650 HYPRE_Int *p_num_sends,
651 HYPRE_Int **p_send_procs,
652 HYPRE_Int **p_send_map_starts,
653 HYPRE_Int **p_send_map_elmts
654 )
655 {
656 HYPRE_Int i, j;
657 HYPRE_Int num_procs, my_id, proc_num, num_elmts;
658 HYPRE_Int local_info;
659 HYPRE_BigInt offd_col;
660 HYPRE_BigInt *big_buf_data = NULL;
661 HYPRE_Int *proc_mark, *proc_add, *tmp, *recv_buf, *displs, *info;
662 /* outputs: */
663 HYPRE_Int num_recvs, *recv_procs, *recv_vec_starts;
664 HYPRE_Int num_sends, *send_procs, *send_map_starts, *send_map_elmts;
665 HYPRE_Int ip, vec_start, vec_len, num_requests;
666
667 hypre_MPI_Request *requests;
668 hypre_MPI_Status *status;
669
670 hypre_MPI_Comm_size(comm, &num_procs);
671 hypre_MPI_Comm_rank(comm, &my_id);
672
673 proc_mark = hypre_CTAlloc(HYPRE_Int, num_procs, HYPRE_MEMORY_HOST);
674 proc_add = hypre_CTAlloc(HYPRE_Int, num_procs, HYPRE_MEMORY_HOST);
675 info = hypre_CTAlloc(HYPRE_Int, num_procs, HYPRE_MEMORY_HOST);
676
677 /* ----------------------------------------------------------------------
678 * determine which processors to receive from (set proc_mark) and num_recvs,
679 * at the end of the loop proc_mark[i] contains the number of elements to be
680 * received from Proc. i
681 * ---------------------------------------------------------------------*/
682
683 for (i=0; i < num_procs; i++)
684 proc_add[i] = 0;
685
686 proc_num = 0;
687 if (num_cols_offd) offd_col = col_map_offd[0];
688 num_recvs=0;
689 j = 0;
690 for (i=0; i < num_cols_offd; i++)
691 {
692 if (num_cols_diag)
693 proc_num = hypre_min(num_procs-1, (HYPRE_Int)(offd_col / (HYPRE_BigInt)num_cols_diag));
694 while (col_starts[proc_num] > offd_col )
695 proc_num = proc_num-1;
696 while (col_starts[proc_num+1]-1 < offd_col )
697 proc_num = proc_num+1;
698 proc_mark[num_recvs] = proc_num;
699 j = i;
700 while (col_starts[proc_num+1] > offd_col)
701 {
702 proc_add[num_recvs]++;
703 if (j < num_cols_offd-1)
704 {
705 j++;
706 offd_col = col_map_offd[j];
707 }
708 else
709 {
710 j++;
711 offd_col = col_starts[num_procs];
712 }
713 }
714 num_recvs++;
715 if (j < num_cols_offd) i = j-1;
716 else i=j;
717 }
718
719 local_info = 2*num_recvs;
720
721 hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm);
722
723 /* ----------------------------------------------------------------------
724 * generate information to be sent: tmp contains for each recv_proc:
725 * id of recv_procs, number of elements to be received for this processor,
726 * indices of elements (in this order)
727 * ---------------------------------------------------------------------*/
728
729 displs = hypre_CTAlloc(HYPRE_Int, num_procs+1, HYPRE_MEMORY_HOST);
730 displs[0] = 0;
731 for (i=1; i < num_procs+1; i++)
732 displs[i] = displs[i-1]+info[i-1];
733 recv_buf = hypre_CTAlloc(HYPRE_Int, displs[num_procs], HYPRE_MEMORY_HOST);
734
735 recv_procs = NULL;
736 tmp = NULL;
737 if (num_recvs)
738 {
739 recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST);
740 tmp = hypre_CTAlloc(HYPRE_Int, local_info, HYPRE_MEMORY_HOST);
741 }
742 recv_vec_starts = hypre_CTAlloc(HYPRE_Int, num_recvs+1, HYPRE_MEMORY_HOST);
743
744 j = 0;
745 if (num_recvs) recv_vec_starts[0] = 0;
746 for (i=0; i < num_recvs; i++)
747 {
748 num_elmts = proc_add[i];
749 recv_procs[i] = proc_mark[i];
750 recv_vec_starts[i+1] = recv_vec_starts[i]+num_elmts;
751 tmp[j++] = proc_mark[i];
752 tmp[j++] = num_elmts;
753 }
754
755 hypre_MPI_Allgatherv(tmp,local_info,HYPRE_MPI_INT,recv_buf,info,
756 displs,HYPRE_MPI_INT,comm);
757
758 /* ----------------------------------------------------------------------
759 * determine num_sends and number of elements to be sent
760 * ---------------------------------------------------------------------*/
761
762 num_sends = 0;
763 num_elmts = 0;
764 proc_add[0] = 0;
765 for (i=0; i < num_procs; i++)
766 {
767 j = displs[i];
768 while ( j < displs[i+1])
769 {
770 if (recv_buf[j++] == my_id)
771 {
772 proc_mark[num_sends] = i;
773 num_sends++;
774 proc_add[num_sends] = proc_add[num_sends-1]+recv_buf[j];
775 break;
776 }
777 j++;
778 }
779 }
780
781 /* ----------------------------------------------------------------------
782 * determine send_procs and actual elements to be send (in send_map_elmts)
783 * and send_map_starts whose i-th entry points to the beginning of the
784 * elements to be send to proc. i
785 * ---------------------------------------------------------------------*/
786
787 send_procs = NULL;
788 send_map_elmts = NULL;
789
790 if (num_sends)
791 {
792 send_procs = hypre_CTAlloc(HYPRE_Int, num_sends, HYPRE_MEMORY_HOST);
793 send_map_elmts = hypre_CTAlloc(HYPRE_Int, proc_add[num_sends], HYPRE_MEMORY_HOST);
794 big_buf_data = hypre_CTAlloc(HYPRE_BigInt, proc_add[num_sends], HYPRE_MEMORY_HOST);
795 }
796 send_map_starts = hypre_CTAlloc(HYPRE_Int, num_sends+1, HYPRE_MEMORY_HOST);
797 num_requests = num_recvs+num_sends;
798 if (num_requests)
799 {
800 requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST);
801 status = hypre_CTAlloc(hypre_MPI_Status, num_requests, HYPRE_MEMORY_HOST);
802 }
803
804 if (num_sends) send_map_starts[0] = 0;
805 for (i=0; i < num_sends; i++)
806 {
807 send_map_starts[i+1] = proc_add[i+1];
808 send_procs[i] = proc_mark[i];
809 }
810
811 j=0;
812 for (i=0; i < num_sends; i++)
813 {
814 vec_start = send_map_starts[i];
815 vec_len = send_map_starts[i+1] - vec_start;
816 ip = send_procs[i];
817 hypre_MPI_Irecv(&big_buf_data[vec_start], vec_len, HYPRE_MPI_BIG_INT,
818 ip, 0, comm, &requests[j++]);
819 }
820 for (i=0; i < num_recvs; i++)
821 {
822 vec_start = recv_vec_starts[i];
823 vec_len = recv_vec_starts[i+1] - vec_start;
824 ip = recv_procs[i];
825 hypre_MPI_Isend(&col_map_offd[vec_start], vec_len, HYPRE_MPI_BIG_INT,
826 ip, 0, comm, &requests[j++]);
827 }
828
829 if (num_requests)
830 {
831 hypre_MPI_Waitall(num_requests, requests, status);
832 hypre_TFree(requests, HYPRE_MEMORY_HOST);
833 hypre_TFree(status, HYPRE_MEMORY_HOST);
834 }
835
836 if (num_sends)
837 {
838 for (i=0; i<send_map_starts[num_sends]; i++)
839 send_map_elmts[i] = (HYPRE_Int)(big_buf_data[i]-first_col_diag);
840 }
841
842 hypre_TFree(proc_add, HYPRE_MEMORY_HOST);
843 hypre_TFree(proc_mark, HYPRE_MEMORY_HOST);
844 hypre_TFree(tmp, HYPRE_MEMORY_HOST);
845 hypre_TFree(recv_buf, HYPRE_MEMORY_HOST);
846 hypre_TFree(displs, HYPRE_MEMORY_HOST);
847 hypre_TFree(info, HYPRE_MEMORY_HOST);
848 hypre_TFree(big_buf_data, HYPRE_MEMORY_HOST);
849
850 /* finish up with the hand-coded call-by-reference... */
851 *p_num_recvs = num_recvs;
852 *p_recv_procs = recv_procs;
853 *p_recv_vec_starts = recv_vec_starts;
854 *p_num_sends = num_sends;
855 *p_send_procs = send_procs;
856 *p_send_map_starts = send_map_starts;
857 *p_send_map_elmts = send_map_elmts;
858 }
859
860
861 HYPRE_Int
hypre_ParCSRCommPkgCreate(MPI_Comm comm,HYPRE_BigInt * col_map_offd,HYPRE_BigInt first_col_diag,HYPRE_BigInt * col_starts,HYPRE_Int num_cols_diag,HYPRE_Int num_cols_offd,hypre_ParCSRCommPkg * comm_pkg)862 hypre_ParCSRCommPkgCreate
863 (
864 /* inputs */
865 MPI_Comm comm,
866 HYPRE_BigInt *col_map_offd,
867 HYPRE_BigInt first_col_diag,
868 HYPRE_BigInt *col_starts,
869 HYPRE_Int num_cols_diag,
870 HYPRE_Int num_cols_offd,
871 /* output */
872 hypre_ParCSRCommPkg *comm_pkg
873 )
874 {
875 HYPRE_Int num_sends;
876 HYPRE_Int *send_procs;
877 HYPRE_Int *send_map_starts;
878 HYPRE_Int *send_map_elmts;
879
880 HYPRE_Int num_recvs;
881 HYPRE_Int *recv_procs;
882 HYPRE_Int *recv_vec_starts;
883
884 hypre_ParCSRCommPkgCreate_core
885 ( comm, col_map_offd, first_col_diag, col_starts,
886 num_cols_diag, num_cols_offd,
887 &num_recvs, &recv_procs, &recv_vec_starts,
888 &num_sends, &send_procs, &send_map_starts,
889 &send_map_elmts );
890
891 hypre_ParCSRCommPkgComm (comm_pkg) = comm;
892 hypre_ParCSRCommPkgNumRecvs (comm_pkg) = num_recvs;
893 hypre_ParCSRCommPkgRecvProcs (comm_pkg) = recv_procs;
894 hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) = recv_vec_starts;
895 hypre_ParCSRCommPkgNumSends (comm_pkg) = num_sends;
896 hypre_ParCSRCommPkgSendProcs (comm_pkg) = send_procs;
897 hypre_ParCSRCommPkgSendMapStarts(comm_pkg) = send_map_starts;
898 hypre_ParCSRCommPkgSendMapElmts (comm_pkg) = send_map_elmts;
899
900 return hypre_error_flag;
901 }
902
903 /* ----------------------------------------------------------------------
904 * hypre_MatvecCommPkgCreate
905 * generates the comm_pkg for A
906 * if no row and/or column partitioning is given, the routine determines
907 * them with MPE_Decomp1d
908 * ---------------------------------------------------------------------*/
909
910 HYPRE_Int
hypre_MatvecCommPkgCreate(hypre_ParCSRMatrix * A)911 hypre_MatvecCommPkgCreate ( hypre_ParCSRMatrix *A )
912 {
913 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
914 hypre_IJAssumedPart *apart = hypre_ParCSRMatrixAssumedPartition(A);
915 hypre_ParCSRCommPkg *comm_pkg;
916
917 HYPRE_BigInt first_col_diag = hypre_ParCSRMatrixFirstColDiag(A);
918 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
919 HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A));
920 HYPRE_BigInt global_num_cols = hypre_ParCSRMatrixGlobalNumCols(A);
921
922 HYPRE_ANNOTATE_FUNC_BEGIN;
923
924 /* Create the assumed partition and should own it */
925 if (apart == NULL)
926 {
927 hypre_ParCSRMatrixCreateAssumedPartition(A);
928 hypre_ParCSRMatrixOwnsAssumedPartition(A) = 1;
929 apart = hypre_ParCSRMatrixAssumedPartition(A);
930 }
931
932 /*-----------------------------------------------------------
933 * setup commpkg
934 *----------------------------------------------------------*/
935 comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg, 1, HYPRE_MEMORY_HOST);
936 hypre_ParCSRMatrixCommPkg(A) = comm_pkg;
937 hypre_ParCSRCommPkgCreateApart( comm, col_map_offd, first_col_diag,
938 num_cols_offd, global_num_cols,
939 apart,
940 comm_pkg );
941
942 HYPRE_ANNOTATE_FUNC_END;
943
944 return hypre_error_flag;
945 }
946
947 HYPRE_Int
hypre_MatvecCommPkgDestroy(hypre_ParCSRCommPkg * comm_pkg)948 hypre_MatvecCommPkgDestroy( hypre_ParCSRCommPkg *comm_pkg )
949 {
950 #ifdef HYPRE_USING_PERSISTENT_COMM
951 HYPRE_Int i;
952 for (i = HYPRE_COMM_PKG_JOB_COMPLEX; i < NUM_OF_COMM_PKG_JOB_TYPE; ++i)
953 {
954 if (comm_pkg->persistent_comm_handles[i])
955 {
956 hypre_ParCSRPersistentCommHandleDestroy(comm_pkg->persistent_comm_handles[i]);
957 }
958 }
959 #endif
960
961 if (hypre_ParCSRCommPkgNumSends(comm_pkg))
962 {
963 hypre_TFree(hypre_ParCSRCommPkgSendProcs(comm_pkg), HYPRE_MEMORY_HOST);
964 hypre_TFree(hypre_ParCSRCommPkgSendMapElmts(comm_pkg), HYPRE_MEMORY_HOST);
965 hypre_TFree(hypre_ParCSRCommPkgDeviceSendMapElmts(comm_pkg), HYPRE_MEMORY_DEVICE);
966 }
967 hypre_TFree(hypre_ParCSRCommPkgSendMapStarts(comm_pkg), HYPRE_MEMORY_HOST);
968 /* if (hypre_ParCSRCommPkgSendMPITypes(comm_pkg))
969 hypre_TFree(hypre_ParCSRCommPkgSendMPITypes(comm_pkg), HYPRE_MEMORY_HOST); */
970 if (hypre_ParCSRCommPkgNumRecvs(comm_pkg))
971 {
972 hypre_TFree(hypre_ParCSRCommPkgRecvProcs(comm_pkg), HYPRE_MEMORY_HOST);
973 }
974 hypre_TFree(hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), HYPRE_MEMORY_HOST);
975 /* if (hypre_ParCSRCommPkgRecvMPITypes(comm_pkg))
976 hypre_TFree(hypre_ParCSRCommPkgRecvMPITypes(comm_pkg), HYPRE_MEMORY_HOST); */
977
978 #if defined(HYPRE_USING_GPU)
979 hypre_TFree(hypre_ParCSRCommPkgTmpData(comm_pkg), HYPRE_MEMORY_DEVICE);
980 hypre_TFree(hypre_ParCSRCommPkgBufData(comm_pkg), HYPRE_MEMORY_DEVICE);
981 //_hypre_TFree(hypre_ParCSRCommPkgTmpData(comm_pkg), hypre_MEMORY_DEVICE);
982 //_hypre_TFree(hypre_ParCSRCommPkgBufData(comm_pkg), hypre_MEMORY_DEVICE);
983 hypre_TFree(hypre_ParCSRCommPkgWorkSpace(comm_pkg), HYPRE_MEMORY_DEVICE);
984 #endif
985
986 hypre_TFree(comm_pkg, HYPRE_MEMORY_HOST);
987
988 return hypre_error_flag;
989 }
990
991 /* AHB 11/06 : alternate to the extend function below - creates a
992 * second comm pkg based on indices - this makes it easier to use the
993 * global partition
994 * RL: renamed and moved it here
995 */
996 HYPRE_Int
hypre_ParCSRFindExtendCommPkg(MPI_Comm comm,HYPRE_BigInt global_num,HYPRE_BigInt my_first,HYPRE_Int local_num,HYPRE_BigInt * starts,hypre_IJAssumedPart * apart,HYPRE_Int indices_len,HYPRE_BigInt * indices,hypre_ParCSRCommPkg ** extend_comm_pkg)997 hypre_ParCSRFindExtendCommPkg(MPI_Comm comm,
998 HYPRE_BigInt global_num,
999 HYPRE_BigInt my_first,
1000 HYPRE_Int local_num,
1001 HYPRE_BigInt *starts,
1002 hypre_IJAssumedPart *apart,
1003 HYPRE_Int indices_len,
1004 HYPRE_BigInt *indices,
1005 hypre_ParCSRCommPkg **extend_comm_pkg)
1006 {
1007 /*-----------------------------------------------------------
1008 * setup commpkg
1009 *----------------------------------------------------------*/
1010 hypre_ParCSRCommPkg *new_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg, 1, HYPRE_MEMORY_HOST);
1011 *extend_comm_pkg = new_comm_pkg;
1012
1013 hypre_assert(apart != NULL);
1014 hypre_ParCSRCommPkgCreateApart ( comm, indices, my_first, indices_len, global_num, apart,
1015 new_comm_pkg );
1016
1017 return hypre_error_flag;
1018 }
1019
1020
1021 HYPRE_Int
hypre_BuildCSRMatrixMPIDataType(HYPRE_Int num_nonzeros,HYPRE_Int num_rows,HYPRE_Complex * a_data,HYPRE_Int * a_i,HYPRE_Int * a_j,hypre_MPI_Datatype * csr_matrix_datatype)1022 hypre_BuildCSRMatrixMPIDataType( HYPRE_Int num_nonzeros,
1023 HYPRE_Int num_rows,
1024 HYPRE_Complex *a_data,
1025 HYPRE_Int *a_i,
1026 HYPRE_Int *a_j,
1027 hypre_MPI_Datatype *csr_matrix_datatype )
1028 {
1029 HYPRE_Int block_lens[3];
1030 hypre_MPI_Aint displ[3];
1031 hypre_MPI_Datatype types[3];
1032
1033 block_lens[0] = num_nonzeros;
1034 block_lens[1] = num_rows+1;
1035 block_lens[2] = num_nonzeros;
1036
1037 types[0] = HYPRE_MPI_COMPLEX;
1038 types[1] = HYPRE_MPI_INT;
1039 types[2] = HYPRE_MPI_INT;
1040
1041 hypre_MPI_Address(a_data, &displ[0]);
1042 hypre_MPI_Address(a_i, &displ[1]);
1043 hypre_MPI_Address(a_j, &displ[2]);
1044 hypre_MPI_Type_struct(3,block_lens,displ,types,csr_matrix_datatype);
1045 hypre_MPI_Type_commit(csr_matrix_datatype);
1046
1047 return hypre_error_flag;
1048 }
1049
1050 HYPRE_Int
hypre_BuildCSRJDataType(HYPRE_Int num_nonzeros,HYPRE_Complex * a_data,HYPRE_Int * a_j,hypre_MPI_Datatype * csr_jdata_datatype)1051 hypre_BuildCSRJDataType( HYPRE_Int num_nonzeros,
1052 HYPRE_Complex *a_data,
1053 HYPRE_Int *a_j,
1054 hypre_MPI_Datatype *csr_jdata_datatype )
1055 {
1056 HYPRE_Int block_lens[2];
1057 hypre_MPI_Aint displs[2];
1058 hypre_MPI_Datatype types[2];
1059
1060 block_lens[0] = num_nonzeros;
1061 block_lens[1] = num_nonzeros;
1062
1063 types[0] = HYPRE_MPI_COMPLEX;
1064 types[1] = HYPRE_MPI_INT;
1065
1066 hypre_MPI_Address(a_data, &displs[0]);
1067 hypre_MPI_Address(a_j, &displs[1]);
1068
1069 hypre_MPI_Type_struct(2,block_lens,displs,types,csr_jdata_datatype);
1070 hypre_MPI_Type_commit(csr_jdata_datatype);
1071
1072 return hypre_error_flag;
1073 }
1074