1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 #include "_hypre_parcsr_mv.h"
9 
10 /*==========================================================================*/
11 
12 #ifdef HYPRE_USING_PERSISTENT_COMM
getJobTypeOf(HYPRE_Int job)13 static CommPkgJobType getJobTypeOf(HYPRE_Int job)
14 {
15    CommPkgJobType job_type = HYPRE_COMM_PKG_JOB_COMPLEX;
16    switch (job)
17    {
18       case  1:
19          job_type = HYPRE_COMM_PKG_JOB_COMPLEX;
20          break;
21       case  2:
22          job_type = HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE;
23          break;
24       case  11:
25          job_type = HYPRE_COMM_PKG_JOB_INT;
26          break;
27       case  12:
28          job_type = HYPRE_COMM_PKG_JOB_INT_TRANSPOSE;
29          break;
30       case  21:
31          job_type = HYPRE_COMM_PKG_JOB_BIGINT;
32          break;
33       case  22:
34          job_type = HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE;
35          break;
36    } // switch (job)
37 
38    return job_type;
39 }
40 
41 /**
42  * When send_data and recv_data are NULL, buffers are internally allocated
43  * and CommHandle owns the buffer
44  */
45 hypre_ParCSRPersistentCommHandle*
hypre_ParCSRPersistentCommHandleCreate(HYPRE_Int job,hypre_ParCSRCommPkg * comm_pkg)46 hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg )
47 {
48    HYPRE_Int i;
49    size_t num_bytes_send, num_bytes_recv;
50 
51    hypre_ParCSRPersistentCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRPersistentCommHandle, 1, HYPRE_MEMORY_HOST);
52 
53    CommPkgJobType job_type = getJobTypeOf(job);
54 
55    HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
56    HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
57    MPI_Comm  comm      = hypre_ParCSRCommPkgComm(comm_pkg);
58 
59    HYPRE_Int num_requests = num_sends + num_recvs;
60    hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST);
61 
62    hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests;
63    hypre_ParCSRCommHandleRequests(comm_handle)    = requests;
64 
65    void *send_buff = NULL, *recv_buff = NULL;
66 
67    switch (job_type)
68    {
69       case HYPRE_COMM_PKG_JOB_COMPLEX:
70          num_bytes_send = sizeof(HYPRE_Complex) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends);
71          num_bytes_recv = sizeof(HYPRE_Complex) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs);
72          send_buff = hypre_TAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
73          recv_buff = hypre_TAlloc(HYPRE_Complex, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), HYPRE_MEMORY_HOST);
74          for (i = 0; i < num_recvs; ++i)
75          {
76             HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
77             HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
78             HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start;
79             hypre_MPI_Recv_init( (HYPRE_Complex *)recv_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX,
80                                   ip, 0, comm, requests + i );
81          }
82          for (i = 0; i < num_sends; ++i)
83          {
84             HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
85             HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
86             HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start;
87             hypre_MPI_Send_init( (HYPRE_Complex *)send_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX,
88                                  ip, 0, comm, requests + num_recvs + i );
89          }
90          break;
91 
92       case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE:
93          num_bytes_recv = sizeof(HYPRE_Complex) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends);
94          num_bytes_send = sizeof(HYPRE_Complex) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs);
95          recv_buff = hypre_TAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
96          send_buff = hypre_TAlloc(HYPRE_Complex, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), HYPRE_MEMORY_HOST);
97          for (i = 0; i < num_sends; ++i)
98          {
99             HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
100             HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
101             HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start;
102             hypre_MPI_Recv_init( (HYPRE_Complex *)recv_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX,
103                                  ip, 0, comm, requests + i );
104          }
105          for (i = 0; i < num_recvs; ++i)
106          {
107             HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
108             HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
109             HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start;
110             hypre_MPI_Send_init( (HYPRE_Complex *)send_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX,
111                                  ip, 0, comm, requests + num_sends + i );
112          }
113          break;
114 
115       case HYPRE_COMM_PKG_JOB_INT:
116          num_bytes_send = sizeof(HYPRE_Int) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends);
117          num_bytes_recv = sizeof(HYPRE_Int) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs);
118          send_buff = hypre_TAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
119          recv_buff = hypre_TAlloc(HYPRE_Int, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), HYPRE_MEMORY_HOST);
120          for (i = 0; i < num_recvs; ++i)
121          {
122             HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
123             HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
124             HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start;
125             hypre_MPI_Recv_init( (HYPRE_Int *)recv_buff + vec_start, vec_len, HYPRE_MPI_INT,
126                                  ip, 0, comm, requests + i );
127          }
128          for (i = 0; i < num_sends; ++i)
129          {
130             HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
131             HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
132             HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start;
133             hypre_MPI_Send_init( (HYPRE_Int *)send_buff + vec_start, vec_len, HYPRE_MPI_INT,
134                                  ip, 0, comm, requests + num_recvs + i );
135          }
136          break;
137 
138       case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE:
139          num_bytes_recv = sizeof(HYPRE_Int) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends);
140          num_bytes_send = sizeof(HYPRE_Int) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs);
141          recv_buff = hypre_TAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
142          send_buff = hypre_TAlloc(HYPRE_Int, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), HYPRE_MEMORY_HOST);
143          for (i = 0; i < num_sends; ++i)
144          {
145             HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
146             HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
147             HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start;
148             hypre_MPI_Recv_init( (HYPRE_Int *)recv_buff + vec_start, vec_len, HYPRE_MPI_INT,
149                                  ip, 0, comm, requests + i );
150          }
151          for (i = 0; i < num_recvs; ++i)
152          {
153             HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
154             HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
155             HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start;
156             hypre_MPI_Send_init( (HYPRE_Int *)send_buff + vec_start, vec_len, HYPRE_MPI_INT,
157                                  ip, 0, comm, requests + num_sends + i );
158          }
159          break;
160 
161       case HYPRE_COMM_PKG_JOB_BIGINT:
162          num_bytes_send = sizeof(HYPRE_BigInt) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends);
163          num_bytes_recv = sizeof(HYPRE_BigInt) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs);
164          send_buff = hypre_TAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
165          recv_buff = hypre_TAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), HYPRE_MEMORY_HOST);
166          for (i = 0; i < num_recvs; ++i)
167          {
168             HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
169             HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
170             HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start;
171             hypre_MPI_Recv_init( (HYPRE_BigInt *)recv_buff + (HYPRE_BigInt)vec_start, vec_len, HYPRE_MPI_BIG_INT,
172                                  ip, 0, comm, requests + i );
173          }
174          for (i = 0; i < num_sends; ++i)
175          {
176             HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
177             HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
178             HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start;
179             hypre_MPI_Send_init( (HYPRE_BigInt *)send_buff + (HYPRE_BigInt)vec_start, vec_len, HYPRE_MPI_BIG_INT,
180                                  ip, 0, comm, requests + num_recvs + i);
181          }
182          break;
183 
184       case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE:
185          num_bytes_recv = sizeof(HYPRE_BigInt) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends);
186          num_bytes_send = sizeof(HYPRE_BigInt) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs);
187          recv_buff = hypre_TAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
188          send_buff = hypre_TAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), HYPRE_MEMORY_HOST);
189          for (i = 0; i < num_sends; ++i)
190          {
191             HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
192             HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
193             HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start;
194             hypre_MPI_Recv_init( (HYPRE_BigInt *)recv_buff + (HYPRE_BigInt)vec_start, vec_len, HYPRE_MPI_BIG_INT,
195                                  ip, 0, comm, requests + i );
196          }
197          for (i = 0; i < num_recvs; ++i)
198          {
199             HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
200             HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
201             HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start;
202 
203             hypre_MPI_Send_init( (HYPRE_BigInt *)send_buff + (HYPRE_BigInt)vec_start, vec_len, HYPRE_MPI_BIG_INT,
204                                  ip, 0, comm, requests + num_sends + i);
205          }
206          break;
207       default:
208          hypre_assert(1 == 0);
209          break;
210    } // switch (job_type)
211 
212    hypre_ParCSRCommHandleRecvDataBuffer(comm_handle) = recv_buff;
213    hypre_ParCSRCommHandleSendDataBuffer(comm_handle) = send_buff;
214    hypre_ParCSRCommHandleNumSendBytes(comm_handle)   = num_bytes_send;
215    hypre_ParCSRCommHandleNumRecvBytes(comm_handle)   = num_bytes_recv;
216 
217    return ( comm_handle );
218 }
219 
220 
221 hypre_ParCSRPersistentCommHandle*
hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_Int job,hypre_ParCSRCommPkg * comm_pkg)222 hypre_ParCSRCommPkgGetPersistentCommHandle( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg )
223 {
224    CommPkgJobType type = getJobTypeOf(job);
225    if (!comm_pkg->persistent_comm_handles[type])
226    {
227       /* data is owned by persistent comm handle */
228       comm_pkg->persistent_comm_handles[type] = hypre_ParCSRPersistentCommHandleCreate(job, comm_pkg);
229    }
230 
231    return comm_pkg->persistent_comm_handles[type];
232 }
233 
234 void
hypre_ParCSRPersistentCommHandleDestroy(hypre_ParCSRPersistentCommHandle * comm_handle)235 hypre_ParCSRPersistentCommHandleDestroy( hypre_ParCSRPersistentCommHandle *comm_handle )
236 {
237    hypre_TFree(hypre_ParCSRCommHandleSendDataBuffer(comm_handle), HYPRE_MEMORY_HOST);
238    hypre_TFree(hypre_ParCSRCommHandleRecvDataBuffer(comm_handle), HYPRE_MEMORY_HOST);
239    hypre_TFree(comm_handle->requests, HYPRE_MEMORY_HOST);
240    hypre_TFree(comm_handle, HYPRE_MEMORY_HOST);
241 }
242 
hypre_ParCSRPersistentCommHandleStart(hypre_ParCSRPersistentCommHandle * comm_handle,HYPRE_MemoryLocation send_memory_location,void * send_data)243 void hypre_ParCSRPersistentCommHandleStart( hypre_ParCSRPersistentCommHandle *comm_handle,
244                                             HYPRE_MemoryLocation              send_memory_location,
245                                             void                             *send_data )
246 {
247    hypre_ParCSRCommHandleSendData(comm_handle) = send_data;
248    hypre_ParCSRCommHandleSendMemoryLocation(comm_handle) = send_memory_location;
249 
250    if (hypre_ParCSRCommHandleNumRequests(comm_handle) > 0)
251    {
252       hypre_TMemcpy( hypre_ParCSRCommHandleSendDataBuffer(comm_handle),
253                      send_data,
254                      char,
255                      hypre_ParCSRCommHandleNumSendBytes(comm_handle),
256                      HYPRE_MEMORY_HOST,
257                      send_memory_location );
258 
259       HYPRE_Int ret = hypre_MPI_Startall(hypre_ParCSRCommHandleNumRequests(comm_handle),
260                                          hypre_ParCSRCommHandleRequests(comm_handle));
261       if (hypre_MPI_SUCCESS != ret)
262       {
263          hypre_error_w_msg(HYPRE_ERROR_GENERIC,"MPI error\n");
264          /*hypre_printf("MPI error %d in %s (%s, line %u)\n", ret, __FUNCTION__, __FILE__, __LINE__);*/
265       }
266    }
267 }
268 
hypre_ParCSRPersistentCommHandleWait(hypre_ParCSRPersistentCommHandle * comm_handle,HYPRE_MemoryLocation recv_memory_location,void * recv_data)269 void hypre_ParCSRPersistentCommHandleWait( hypre_ParCSRPersistentCommHandle *comm_handle,
270                                            HYPRE_MemoryLocation              recv_memory_location,
271                                            void                             *recv_data )
272 {
273    hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data;
274    hypre_ParCSRCommHandleRecvMemoryLocation(comm_handle) = recv_memory_location;
275 
276    if (hypre_ParCSRCommHandleNumRequests(comm_handle) > 0)
277    {
278       HYPRE_Int ret = hypre_MPI_Waitall(hypre_ParCSRCommHandleNumRequests(comm_handle),
279                                         hypre_ParCSRCommHandleRequests(comm_handle),
280                                         hypre_MPI_STATUSES_IGNORE);
281       if (hypre_MPI_SUCCESS != ret)
282       {
283          hypre_error_w_msg(HYPRE_ERROR_GENERIC,"MPI error\n");
284          /*hypre_printf("MPI error %d in %s (%s, line %u)\n", ret, __FUNCTION__, __FILE__, __LINE__);*/
285       }
286 
287       hypre_TMemcpy(recv_data,
288                     hypre_ParCSRCommHandleRecvDataBuffer(comm_handle),
289                     char,
290                     hypre_ParCSRCommHandleNumRecvBytes(comm_handle),
291                     recv_memory_location,
292                     HYPRE_MEMORY_HOST);
293    }
294 }
295 #endif // HYPRE_USING_PERSISTENT_COMM
296 
297 hypre_ParCSRCommHandle*
hypre_ParCSRCommHandleCreate(HYPRE_Int job,hypre_ParCSRCommPkg * comm_pkg,void * send_data,void * recv_data)298 hypre_ParCSRCommHandleCreate ( HYPRE_Int            job,
299                                hypre_ParCSRCommPkg *comm_pkg,
300                                void                *send_data,
301                                void                *recv_data )
302 {
303    return hypre_ParCSRCommHandleCreate_v2(job, comm_pkg, HYPRE_MEMORY_HOST, send_data,
304                                           HYPRE_MEMORY_HOST, recv_data);
305 }
306 
307 hypre_ParCSRCommHandle*
hypre_ParCSRCommHandleCreate_v2(HYPRE_Int job,hypre_ParCSRCommPkg * comm_pkg,HYPRE_MemoryLocation send_memory_location,void * send_data_in,HYPRE_MemoryLocation recv_memory_location,void * recv_data_in)308 hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int            job,
309                                   hypre_ParCSRCommPkg *comm_pkg,
310                                   HYPRE_MemoryLocation send_memory_location,
311                                   void                *send_data_in,
312                                   HYPRE_MemoryLocation recv_memory_location,
313                                   void                *recv_data_in )
314 {
315    HYPRE_Int                  num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
316    HYPRE_Int                  num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
317    MPI_Comm                   comm      = hypre_ParCSRCommPkgComm(comm_pkg);
318    HYPRE_Int                  num_send_bytes = 0;
319    HYPRE_Int                  num_recv_bytes = 0;
320    hypre_ParCSRCommHandle    *comm_handle;
321    HYPRE_Int                  num_requests;
322    hypre_MPI_Request         *requests;
323    HYPRE_Int                  i, j;
324    HYPRE_Int                  my_id, num_procs;
325    HYPRE_Int                  ip, vec_start, vec_len;
326    void                      *send_data;
327    void                      *recv_data;
328 
329    /*--------------------------------------------------------------------
330     * hypre_Initialize sets up a communication handle,
331     * posts receives and initiates sends. It always requires num_sends,
332     * num_recvs, recv_procs and send_procs to be set in comm_pkg.
333     * There are different options for job:
334     * job = 1 : is used to initialize communication exchange for the parts
335     *           of vector needed to perform a Matvec,  it requires send_data
336     *           and recv_data to be doubles, recv_vec_starts and
337     *           send_map_starts need to be set in comm_pkg.
338     * job = 2 : is used to initialize communication exchange for the parts
339     *           of vector needed to perform a MatvecT,  it requires send_data
340     *           and recv_data to be doubles, recv_vec_starts and
341     *           send_map_starts need to be set in comm_pkg.
342     * job = 11: similar to job = 1, but exchanges data of type HYPRE_Int (not HYPRE_Complex),
343     *           requires send_data and recv_data to be ints
344     *           recv_vec_starts and send_map_starts need to be set in comm_pkg.
345     * job = 12: similar to job = 2, but exchanges data of type HYPRE_Int (not HYPRE_Complex),
346     *           requires send_data and recv_data to be ints
347     *           recv_vec_starts and send_map_starts need to be set in comm_pkg.
348     * job = 21: similar to job = 1, but exchanges data of type HYPRE_BigInt (not HYPRE_Complex),
349     *           requires send_data and recv_data to be ints
350     *           recv_vec_starts and send_map_starts need to be set in comm_pkg.
351     * job = 22: similar to job = 2, but exchanges data of type HYPRE_BigInt (not HYPRE_Complex),
352     *           requires send_data and recv_data to be ints
353     *           recv_vec_starts and send_map_starts need to be set in comm_pkg.
354     * default: ignores send_data and recv_data, requires send_mpi_types
355     *           and recv_mpi_types to be set in comm_pkg.
356     *           datatypes need to point to absolute
357     *           addresses, e.g. generated using hypre_MPI_Address .
358     *--------------------------------------------------------------------*/
359 #ifndef HYPRE_WITH_GPU_AWARE_MPI
360    switch (job)
361    {
362       case 1:
363          num_send_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_Complex);
364          num_recv_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_Complex);
365          break;
366       case 2:
367          num_send_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_Complex);
368          num_recv_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_Complex);
369          break;
370       case 11:
371          num_send_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_Int);
372          num_recv_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_Int);
373          break;
374       case 12:
375          num_send_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_Int);
376          num_recv_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_Int);
377          break;
378       case 21:
379          num_send_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_BigInt);
380          num_recv_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_BigInt);
381          break;
382       case 22:
383          num_send_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_BigInt);
384          num_recv_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_BigInt);
385          break;
386    }
387 
388    hypre_MemoryLocation act_send_memory_location = hypre_GetActualMemLocation(send_memory_location);
389 
390    if ( act_send_memory_location == hypre_MEMORY_DEVICE || act_send_memory_location == hypre_MEMORY_UNIFIED )
391    {
392       //send_data = _hypre_TAlloc(char, num_send_bytes, hypre_MEMORY_HOST_PINNED);
393       send_data = hypre_TAlloc(char, num_send_bytes, HYPRE_MEMORY_HOST);
394       hypre_TMemcpy(send_data, send_data_in, char, num_send_bytes, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE);
395    }
396    else
397    {
398       send_data = send_data_in;
399    }
400 
401    hypre_MemoryLocation act_recv_memory_location = hypre_GetActualMemLocation(recv_memory_location);
402 
403    if ( act_recv_memory_location == hypre_MEMORY_DEVICE || act_recv_memory_location == hypre_MEMORY_UNIFIED )
404    {
405       //recv_data = hypre_TAlloc(char, num_recv_bytes, hypre_MEMORY_HOST_PINNED);
406       recv_data = hypre_TAlloc(char, num_recv_bytes, HYPRE_MEMORY_HOST);
407    }
408    else
409    {
410       recv_data = recv_data_in;
411    }
412 #else /* #ifndef HYPRE_WITH_GPU_AWARE_MPI */
413    send_data = send_data_in;
414    recv_data = recv_data_in;
415    // TODO RL: it seems that we need to sync the CUDA stream before doing GPU-GPU MPI.
416    // Need to check MPI documentation whether this is acutally true
417    hypre_SyncCudaComputeStream(hypre_handle());
418 #endif
419 
420    num_requests = num_sends + num_recvs;
421    requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST);
422 
423    hypre_MPI_Comm_size(comm, &num_procs);
424    hypre_MPI_Comm_rank(comm, &my_id);
425 
426    j = 0;
427    switch (job)
428    {
429       case  1:
430       {
431          HYPRE_Complex *d_send_data = (HYPRE_Complex *) send_data;
432          HYPRE_Complex *d_recv_data = (HYPRE_Complex *) recv_data;
433          for (i = 0; i < num_recvs; i++)
434          {
435             ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
436             vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i);
437             vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start;
438             hypre_MPI_Irecv(&d_recv_data[vec_start], vec_len, HYPRE_MPI_COMPLEX,
439                             ip, 0, comm, &requests[j++]);
440          }
441          for (i = 0; i < num_sends; i++)
442          {
443             ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
444             vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
445             vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1)-vec_start;
446             hypre_MPI_Isend(&d_send_data[vec_start], vec_len, HYPRE_MPI_COMPLEX,
447                             ip, 0, comm, &requests[j++]);
448          }
449          break;
450       }
451       case  2:
452       {
453          HYPRE_Complex *d_send_data = (HYPRE_Complex *) send_data;
454          HYPRE_Complex *d_recv_data = (HYPRE_Complex *) recv_data;
455          for (i = 0; i < num_sends; i++)
456          {
457             ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
458             vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
459             vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1) - vec_start;
460             hypre_MPI_Irecv(&d_recv_data[vec_start], vec_len, HYPRE_MPI_COMPLEX,
461                             ip, 0, comm, &requests[j++]);
462          }
463          for (i = 0; i < num_recvs; i++)
464          {
465             ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
466             vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i);
467             vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start;
468             hypre_MPI_Isend(&d_send_data[vec_start], vec_len, HYPRE_MPI_COMPLEX,
469                             ip, 0, comm, &requests[j++]);
470          }
471          break;
472       }
473       case  11:
474       {
475          HYPRE_Int *i_send_data = (HYPRE_Int *) send_data;
476          HYPRE_Int *i_recv_data = (HYPRE_Int *) recv_data;
477          for (i = 0; i < num_recvs; i++)
478          {
479             ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
480             vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i);
481             vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start;
482             hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_INT,
483                             ip, 0, comm, &requests[j++]);
484          }
485          for (i = 0; i < num_sends; i++)
486          {
487             ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
488             vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
489             vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1)-vec_start;
490             hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_INT,
491                             ip, 0, comm, &requests[j++]);
492          }
493          break;
494       }
495       case  12:
496       {
497          HYPRE_Int *i_send_data = (HYPRE_Int *) send_data;
498          HYPRE_Int *i_recv_data = (HYPRE_Int *) recv_data;
499          for (i = 0; i < num_sends; i++)
500          {
501             ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
502             vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
503             vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1) - vec_start;
504             hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_INT,
505                             ip, 0, comm, &requests[j++]);
506          }
507          for (i = 0; i < num_recvs; i++)
508          {
509             ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
510             vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i);
511             vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start;
512             hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_INT,
513                             ip, 0, comm, &requests[j++]);
514          }
515          break;
516       }
517       case  21:
518       {
519          HYPRE_BigInt *i_send_data = (HYPRE_BigInt *) send_data;
520          HYPRE_BigInt *i_recv_data = (HYPRE_BigInt *) recv_data;
521          for (i = 0; i < num_recvs; i++)
522          {
523             ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
524             vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i);
525             vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start;
526             hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_BIG_INT,
527                             ip, 0, comm, &requests[j++]);
528          }
529          for (i = 0; i < num_sends; i++)
530          {
531             vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
532             vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1)-vec_start;
533             ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
534             hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_BIG_INT,
535                             ip, 0, comm, &requests[j++]);
536          }
537          break;
538       }
539       case  22:
540       {
541          HYPRE_BigInt *i_send_data = (HYPRE_BigInt *) send_data;
542          HYPRE_BigInt *i_recv_data = (HYPRE_BigInt *) recv_data;
543          for (i = 0; i < num_sends; i++)
544          {
545             vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
546             vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1) - vec_start;
547             ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
548             hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_BIG_INT,
549                             ip, 0, comm, &requests[j++]);
550          }
551          for (i = 0; i < num_recvs; i++)
552          {
553             ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
554             vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i);
555             vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start;
556             hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_BIG_INT,
557                             ip, 0, comm, &requests[j++]);
558          }
559          break;
560       }
561    }
562    /*--------------------------------------------------------------------
563     * set up comm_handle and return
564     *--------------------------------------------------------------------*/
565 
566    comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle,  1, HYPRE_MEMORY_HOST);
567 
568    hypre_ParCSRCommHandleCommPkg(comm_handle)            = comm_pkg;
569    hypre_ParCSRCommHandleSendMemoryLocation(comm_handle) = send_memory_location;
570    hypre_ParCSRCommHandleRecvMemoryLocation(comm_handle) = recv_memory_location;
571    hypre_ParCSRCommHandleNumSendBytes(comm_handle)       = num_send_bytes;
572    hypre_ParCSRCommHandleNumRecvBytes(comm_handle)       = num_recv_bytes;
573    hypre_ParCSRCommHandleSendData(comm_handle)           = send_data_in;
574    hypre_ParCSRCommHandleRecvData(comm_handle)           = recv_data_in;
575    hypre_ParCSRCommHandleSendDataBuffer(comm_handle)     = send_data;
576    hypre_ParCSRCommHandleRecvDataBuffer(comm_handle)     = recv_data;
577    hypre_ParCSRCommHandleNumRequests(comm_handle)        = num_requests;
578    hypre_ParCSRCommHandleRequests(comm_handle)           = requests;
579 
580    return ( comm_handle );
581 }
582 
583 HYPRE_Int
hypre_ParCSRCommHandleDestroy(hypre_ParCSRCommHandle * comm_handle)584 hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle )
585 {
586    if ( comm_handle == NULL )
587    {
588       return hypre_error_flag;
589    }
590 
591    if (hypre_ParCSRCommHandleNumRequests(comm_handle))
592    {
593       hypre_MPI_Status *status0;
594       status0 = hypre_CTAlloc(hypre_MPI_Status,
595                               hypre_ParCSRCommHandleNumRequests(comm_handle), HYPRE_MEMORY_HOST);
596       hypre_MPI_Waitall(hypre_ParCSRCommHandleNumRequests(comm_handle),
597                         hypre_ParCSRCommHandleRequests(comm_handle), status0);
598       hypre_TFree(status0, HYPRE_MEMORY_HOST);
599    }
600 
601 #ifndef HYPRE_WITH_GPU_AWARE_MPI
602    hypre_MemoryLocation act_send_memory_location = hypre_GetActualMemLocation(hypre_ParCSRCommHandleSendMemoryLocation(comm_handle));
603    if ( act_send_memory_location == hypre_MEMORY_DEVICE || act_send_memory_location == hypre_MEMORY_UNIFIED )
604    {
605       //hypre_HostPinnedFree(hypre_ParCSRCommHandleSendDataBuffer(comm_handle));
606       hypre_TFree(hypre_ParCSRCommHandleSendDataBuffer(comm_handle), HYPRE_MEMORY_HOST);
607    }
608 
609    hypre_MemoryLocation act_recv_memory_location = hypre_GetActualMemLocation(hypre_ParCSRCommHandleRecvMemoryLocation(comm_handle));
610    if ( act_recv_memory_location == hypre_MEMORY_DEVICE || act_recv_memory_location == hypre_MEMORY_UNIFIED )
611    {
612       hypre_TMemcpy( hypre_ParCSRCommHandleRecvData(comm_handle),
613                      hypre_ParCSRCommHandleRecvDataBuffer(comm_handle),
614                      char,
615                      hypre_ParCSRCommHandleNumRecvBytes(comm_handle),
616                      HYPRE_MEMORY_DEVICE,
617                      HYPRE_MEMORY_HOST );
618 
619       //hypre_HostPinnedFree(hypre_ParCSRCommHandleRecvDataBuffer(comm_handle));
620       hypre_TFree(hypre_ParCSRCommHandleRecvDataBuffer(comm_handle), HYPRE_MEMORY_HOST);
621    }
622 #endif
623 
624    hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST);
625    hypre_TFree(comm_handle, HYPRE_MEMORY_HOST);
626 
627    return hypre_error_flag;
628 }
629 
630 
631 /* hypre_MatCommPkgCreate_core does all the communications and computations for
632    hypre_MatCommPkgCreate ( hypre_ParCSRMatrix *A) and
633    hypre_BoolMatCommPkgCreate ( hypre_ParCSRBooleanMatrix *A) To support both
634    data types, it has hardly any data structures other than HYPRE_Int*.
635 */
636 
637 void
hypre_ParCSRCommPkgCreate_core(MPI_Comm comm,HYPRE_BigInt * col_map_offd,HYPRE_BigInt first_col_diag,HYPRE_BigInt * col_starts,HYPRE_Int num_cols_diag,HYPRE_Int num_cols_offd,HYPRE_Int * p_num_recvs,HYPRE_Int ** p_recv_procs,HYPRE_Int ** p_recv_vec_starts,HYPRE_Int * p_num_sends,HYPRE_Int ** p_send_procs,HYPRE_Int ** p_send_map_starts,HYPRE_Int ** p_send_map_elmts)638 hypre_ParCSRCommPkgCreate_core(
639    /* input args: */
640    MPI_Comm   comm,
641    HYPRE_BigInt *col_map_offd,
642    HYPRE_BigInt  first_col_diag,
643    HYPRE_BigInt *col_starts,
644    HYPRE_Int  num_cols_diag,
645    HYPRE_Int  num_cols_offd,
646    /* pointers to output args: */
647    HYPRE_Int  *p_num_recvs,
648    HYPRE_Int **p_recv_procs,
649    HYPRE_Int **p_recv_vec_starts,
650    HYPRE_Int  *p_num_sends,
651    HYPRE_Int **p_send_procs,
652    HYPRE_Int **p_send_map_starts,
653    HYPRE_Int **p_send_map_elmts
654    )
655 {
656    HYPRE_Int    i, j;
657    HYPRE_Int    num_procs, my_id, proc_num, num_elmts;
658    HYPRE_Int    local_info;
659    HYPRE_BigInt offd_col;
660    HYPRE_BigInt *big_buf_data = NULL;
661    HYPRE_Int    *proc_mark, *proc_add, *tmp, *recv_buf, *displs, *info;
662    /* outputs: */
663    HYPRE_Int  num_recvs, *recv_procs, *recv_vec_starts;
664    HYPRE_Int  num_sends, *send_procs, *send_map_starts, *send_map_elmts;
665    HYPRE_Int  ip, vec_start, vec_len, num_requests;
666 
667    hypre_MPI_Request *requests;
668    hypre_MPI_Status *status;
669 
670    hypre_MPI_Comm_size(comm, &num_procs);
671    hypre_MPI_Comm_rank(comm, &my_id);
672 
673    proc_mark = hypre_CTAlloc(HYPRE_Int,  num_procs, HYPRE_MEMORY_HOST);
674    proc_add = hypre_CTAlloc(HYPRE_Int,  num_procs, HYPRE_MEMORY_HOST);
675    info = hypre_CTAlloc(HYPRE_Int,  num_procs, HYPRE_MEMORY_HOST);
676 
677    /* ----------------------------------------------------------------------
678     * determine which processors to receive from (set proc_mark) and num_recvs,
679     * at the end of the loop proc_mark[i] contains the number of elements to be
680     * received from Proc. i
681     * ---------------------------------------------------------------------*/
682 
683    for (i=0; i < num_procs; i++)
684       proc_add[i] = 0;
685 
686    proc_num = 0;
687    if (num_cols_offd) offd_col = col_map_offd[0];
688    num_recvs=0;
689    j = 0;
690    for (i=0; i < num_cols_offd; i++)
691    {
692       if (num_cols_diag)
693          proc_num = hypre_min(num_procs-1, (HYPRE_Int)(offd_col / (HYPRE_BigInt)num_cols_diag));
694       while (col_starts[proc_num] > offd_col )
695          proc_num = proc_num-1;
696       while (col_starts[proc_num+1]-1 < offd_col )
697          proc_num = proc_num+1;
698       proc_mark[num_recvs] = proc_num;
699       j = i;
700       while (col_starts[proc_num+1] > offd_col)
701       {
702          proc_add[num_recvs]++;
703          if (j < num_cols_offd-1)
704          {
705             j++;
706             offd_col = col_map_offd[j];
707          }
708          else
709          {
710             j++;
711             offd_col = col_starts[num_procs];
712          }
713       }
714       num_recvs++;
715       if (j < num_cols_offd) i = j-1;
716       else i=j;
717    }
718 
719    local_info = 2*num_recvs;
720 
721    hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm);
722 
723    /* ----------------------------------------------------------------------
724     * generate information to be sent: tmp contains for each recv_proc:
725     * id of recv_procs, number of elements to be received for this processor,
726     * indices of elements (in this order)
727     * ---------------------------------------------------------------------*/
728 
729    displs = hypre_CTAlloc(HYPRE_Int,  num_procs+1, HYPRE_MEMORY_HOST);
730    displs[0] = 0;
731    for (i=1; i < num_procs+1; i++)
732       displs[i] = displs[i-1]+info[i-1];
733    recv_buf = hypre_CTAlloc(HYPRE_Int,  displs[num_procs], HYPRE_MEMORY_HOST);
734 
735    recv_procs = NULL;
736    tmp = NULL;
737    if (num_recvs)
738    {
739       recv_procs = hypre_CTAlloc(HYPRE_Int,  num_recvs, HYPRE_MEMORY_HOST);
740       tmp = hypre_CTAlloc(HYPRE_Int,  local_info, HYPRE_MEMORY_HOST);
741    }
742    recv_vec_starts = hypre_CTAlloc(HYPRE_Int,  num_recvs+1, HYPRE_MEMORY_HOST);
743 
744    j = 0;
745    if (num_recvs) recv_vec_starts[0] = 0;
746    for (i=0; i < num_recvs; i++)
747    {
748       num_elmts = proc_add[i];
749       recv_procs[i] = proc_mark[i];
750       recv_vec_starts[i+1] = recv_vec_starts[i]+num_elmts;
751       tmp[j++] = proc_mark[i];
752       tmp[j++] = num_elmts;
753    }
754 
755    hypre_MPI_Allgatherv(tmp,local_info,HYPRE_MPI_INT,recv_buf,info,
756                         displs,HYPRE_MPI_INT,comm);
757 
758    /* ----------------------------------------------------------------------
759     * determine num_sends and number of elements to be sent
760     * ---------------------------------------------------------------------*/
761 
762    num_sends = 0;
763    num_elmts = 0;
764    proc_add[0] = 0;
765    for (i=0; i < num_procs; i++)
766    {
767       j = displs[i];
768       while ( j < displs[i+1])
769       {
770          if (recv_buf[j++] == my_id)
771          {
772             proc_mark[num_sends] = i;
773             num_sends++;
774             proc_add[num_sends] = proc_add[num_sends-1]+recv_buf[j];
775             break;
776          }
777          j++;
778       }
779    }
780 
781    /* ----------------------------------------------------------------------
782     * determine send_procs and actual elements to be send (in send_map_elmts)
783     * and send_map_starts whose i-th entry points to the beginning of the
784     * elements to be send to proc. i
785     * ---------------------------------------------------------------------*/
786 
787    send_procs = NULL;
788    send_map_elmts = NULL;
789 
790    if (num_sends)
791    {
792       send_procs = hypre_CTAlloc(HYPRE_Int,  num_sends, HYPRE_MEMORY_HOST);
793       send_map_elmts = hypre_CTAlloc(HYPRE_Int,  proc_add[num_sends], HYPRE_MEMORY_HOST);
794       big_buf_data = hypre_CTAlloc(HYPRE_BigInt,  proc_add[num_sends], HYPRE_MEMORY_HOST);
795    }
796    send_map_starts = hypre_CTAlloc(HYPRE_Int,  num_sends+1, HYPRE_MEMORY_HOST);
797    num_requests = num_recvs+num_sends;
798    if (num_requests)
799    {
800       requests = hypre_CTAlloc(hypre_MPI_Request,  num_requests, HYPRE_MEMORY_HOST);
801       status = hypre_CTAlloc(hypre_MPI_Status,  num_requests, HYPRE_MEMORY_HOST);
802    }
803 
804    if (num_sends) send_map_starts[0] = 0;
805    for (i=0; i < num_sends; i++)
806    {
807       send_map_starts[i+1] = proc_add[i+1];
808       send_procs[i] = proc_mark[i];
809    }
810 
811    j=0;
812    for (i=0; i < num_sends; i++)
813    {
814       vec_start = send_map_starts[i];
815       vec_len = send_map_starts[i+1] - vec_start;
816       ip = send_procs[i];
817       hypre_MPI_Irecv(&big_buf_data[vec_start], vec_len, HYPRE_MPI_BIG_INT,
818                       ip, 0, comm, &requests[j++]);
819    }
820    for (i=0; i < num_recvs; i++)
821    {
822       vec_start = recv_vec_starts[i];
823       vec_len = recv_vec_starts[i+1] - vec_start;
824       ip = recv_procs[i];
825       hypre_MPI_Isend(&col_map_offd[vec_start], vec_len, HYPRE_MPI_BIG_INT,
826                       ip, 0, comm, &requests[j++]);
827    }
828 
829    if (num_requests)
830    {
831       hypre_MPI_Waitall(num_requests, requests, status);
832       hypre_TFree(requests, HYPRE_MEMORY_HOST);
833       hypre_TFree(status, HYPRE_MEMORY_HOST);
834    }
835 
836    if (num_sends)
837    {
838       for (i=0; i<send_map_starts[num_sends]; i++)
839          send_map_elmts[i] = (HYPRE_Int)(big_buf_data[i]-first_col_diag);
840    }
841 
842    hypre_TFree(proc_add, HYPRE_MEMORY_HOST);
843    hypre_TFree(proc_mark, HYPRE_MEMORY_HOST);
844    hypre_TFree(tmp, HYPRE_MEMORY_HOST);
845    hypre_TFree(recv_buf, HYPRE_MEMORY_HOST);
846    hypre_TFree(displs, HYPRE_MEMORY_HOST);
847    hypre_TFree(info, HYPRE_MEMORY_HOST);
848    hypre_TFree(big_buf_data, HYPRE_MEMORY_HOST);
849 
850    /* finish up with the hand-coded call-by-reference... */
851    *p_num_recvs = num_recvs;
852    *p_recv_procs = recv_procs;
853    *p_recv_vec_starts = recv_vec_starts;
854    *p_num_sends = num_sends;
855    *p_send_procs = send_procs;
856    *p_send_map_starts = send_map_starts;
857    *p_send_map_elmts = send_map_elmts;
858 }
859 
860 
861 HYPRE_Int
hypre_ParCSRCommPkgCreate(MPI_Comm comm,HYPRE_BigInt * col_map_offd,HYPRE_BigInt first_col_diag,HYPRE_BigInt * col_starts,HYPRE_Int num_cols_diag,HYPRE_Int num_cols_offd,hypre_ParCSRCommPkg * comm_pkg)862 hypre_ParCSRCommPkgCreate
863 (
864    /* inputs */
865    MPI_Comm   comm,
866    HYPRE_BigInt *col_map_offd,
867    HYPRE_BigInt  first_col_diag,
868    HYPRE_BigInt *col_starts,
869    HYPRE_Int  num_cols_diag,
870    HYPRE_Int  num_cols_offd,
871    /* output */
872    hypre_ParCSRCommPkg *comm_pkg
873 )
874 {
875    HYPRE_Int  num_sends;
876    HYPRE_Int *send_procs;
877    HYPRE_Int *send_map_starts;
878    HYPRE_Int *send_map_elmts;
879 
880    HYPRE_Int  num_recvs;
881    HYPRE_Int *recv_procs;
882    HYPRE_Int *recv_vec_starts;
883 
884    hypre_ParCSRCommPkgCreate_core
885       ( comm, col_map_offd, first_col_diag, col_starts,
886         num_cols_diag, num_cols_offd,
887         &num_recvs, &recv_procs, &recv_vec_starts,
888         &num_sends, &send_procs, &send_map_starts,
889         &send_map_elmts );
890 
891    hypre_ParCSRCommPkgComm         (comm_pkg) = comm;
892    hypre_ParCSRCommPkgNumRecvs     (comm_pkg) = num_recvs;
893    hypre_ParCSRCommPkgRecvProcs    (comm_pkg) = recv_procs;
894    hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) = recv_vec_starts;
895    hypre_ParCSRCommPkgNumSends     (comm_pkg) = num_sends;
896    hypre_ParCSRCommPkgSendProcs    (comm_pkg) = send_procs;
897    hypre_ParCSRCommPkgSendMapStarts(comm_pkg) = send_map_starts;
898    hypre_ParCSRCommPkgSendMapElmts (comm_pkg) = send_map_elmts;
899 
900    return hypre_error_flag;
901 }
902 
903 /* ----------------------------------------------------------------------
904  * hypre_MatvecCommPkgCreate
905  * generates the comm_pkg for A
906  * if no row and/or column partitioning is given, the routine determines
907  * them with MPE_Decomp1d
908  * ---------------------------------------------------------------------*/
909 
910 HYPRE_Int
hypre_MatvecCommPkgCreate(hypre_ParCSRMatrix * A)911 hypre_MatvecCommPkgCreate ( hypre_ParCSRMatrix *A )
912 {
913    MPI_Comm             comm  = hypre_ParCSRMatrixComm(A);
914    hypre_IJAssumedPart *apart = hypre_ParCSRMatrixAssumedPartition(A);
915    hypre_ParCSRCommPkg *comm_pkg;
916 
917    HYPRE_BigInt         first_col_diag  = hypre_ParCSRMatrixFirstColDiag(A);
918    HYPRE_BigInt        *col_map_offd    = hypre_ParCSRMatrixColMapOffd(A);
919    HYPRE_Int            num_cols_offd   = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A));
920    HYPRE_BigInt         global_num_cols = hypre_ParCSRMatrixGlobalNumCols(A);
921 
922    HYPRE_ANNOTATE_FUNC_BEGIN;
923 
924    /* Create the assumed partition and should own it */
925    if (apart == NULL)
926    {
927       hypre_ParCSRMatrixCreateAssumedPartition(A);
928       hypre_ParCSRMatrixOwnsAssumedPartition(A) = 1;
929       apart = hypre_ParCSRMatrixAssumedPartition(A);
930    }
931 
932    /*-----------------------------------------------------------
933     * setup commpkg
934     *----------------------------------------------------------*/
935    comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg, 1, HYPRE_MEMORY_HOST);
936    hypre_ParCSRMatrixCommPkg(A) = comm_pkg;
937    hypre_ParCSRCommPkgCreateApart( comm, col_map_offd, first_col_diag,
938                                    num_cols_offd, global_num_cols,
939                                    apart,
940                                    comm_pkg );
941 
942    HYPRE_ANNOTATE_FUNC_END;
943 
944    return hypre_error_flag;
945 }
946 
947 HYPRE_Int
hypre_MatvecCommPkgDestroy(hypre_ParCSRCommPkg * comm_pkg)948 hypre_MatvecCommPkgDestroy( hypre_ParCSRCommPkg *comm_pkg )
949 {
950 #ifdef HYPRE_USING_PERSISTENT_COMM
951    HYPRE_Int i;
952    for (i = HYPRE_COMM_PKG_JOB_COMPLEX; i < NUM_OF_COMM_PKG_JOB_TYPE; ++i)
953    {
954       if (comm_pkg->persistent_comm_handles[i])
955       {
956          hypre_ParCSRPersistentCommHandleDestroy(comm_pkg->persistent_comm_handles[i]);
957       }
958    }
959 #endif
960 
961    if (hypre_ParCSRCommPkgNumSends(comm_pkg))
962    {
963       hypre_TFree(hypre_ParCSRCommPkgSendProcs(comm_pkg), HYPRE_MEMORY_HOST);
964       hypre_TFree(hypre_ParCSRCommPkgSendMapElmts(comm_pkg), HYPRE_MEMORY_HOST);
965       hypre_TFree(hypre_ParCSRCommPkgDeviceSendMapElmts(comm_pkg), HYPRE_MEMORY_DEVICE);
966    }
967    hypre_TFree(hypre_ParCSRCommPkgSendMapStarts(comm_pkg), HYPRE_MEMORY_HOST);
968    /* if (hypre_ParCSRCommPkgSendMPITypes(comm_pkg))
969       hypre_TFree(hypre_ParCSRCommPkgSendMPITypes(comm_pkg), HYPRE_MEMORY_HOST); */
970    if (hypre_ParCSRCommPkgNumRecvs(comm_pkg))
971    {
972       hypre_TFree(hypre_ParCSRCommPkgRecvProcs(comm_pkg), HYPRE_MEMORY_HOST);
973    }
974    hypre_TFree(hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), HYPRE_MEMORY_HOST);
975    /* if (hypre_ParCSRCommPkgRecvMPITypes(comm_pkg))
976       hypre_TFree(hypre_ParCSRCommPkgRecvMPITypes(comm_pkg), HYPRE_MEMORY_HOST); */
977 
978 #if defined(HYPRE_USING_GPU)
979    hypre_TFree(hypre_ParCSRCommPkgTmpData(comm_pkg),   HYPRE_MEMORY_DEVICE);
980    hypre_TFree(hypre_ParCSRCommPkgBufData(comm_pkg),   HYPRE_MEMORY_DEVICE);
981    //_hypre_TFree(hypre_ParCSRCommPkgTmpData(comm_pkg), hypre_MEMORY_DEVICE);
982    //_hypre_TFree(hypre_ParCSRCommPkgBufData(comm_pkg), hypre_MEMORY_DEVICE);
983    hypre_TFree(hypre_ParCSRCommPkgWorkSpace(comm_pkg), HYPRE_MEMORY_DEVICE);
984 #endif
985 
986    hypre_TFree(comm_pkg, HYPRE_MEMORY_HOST);
987 
988    return hypre_error_flag;
989 }
990 
991 /* AHB 11/06 : alternate to the extend function below - creates a
992  * second comm pkg based on indices - this makes it easier to use the
993  * global partition
994  * RL: renamed and moved it here
995  */
996 HYPRE_Int
hypre_ParCSRFindExtendCommPkg(MPI_Comm comm,HYPRE_BigInt global_num,HYPRE_BigInt my_first,HYPRE_Int local_num,HYPRE_BigInt * starts,hypre_IJAssumedPart * apart,HYPRE_Int indices_len,HYPRE_BigInt * indices,hypre_ParCSRCommPkg ** extend_comm_pkg)997 hypre_ParCSRFindExtendCommPkg(MPI_Comm              comm,
998                               HYPRE_BigInt          global_num,
999                               HYPRE_BigInt          my_first,
1000                               HYPRE_Int             local_num,
1001                               HYPRE_BigInt         *starts,
1002                               hypre_IJAssumedPart  *apart,
1003                               HYPRE_Int             indices_len,
1004                               HYPRE_BigInt         *indices,
1005                               hypre_ParCSRCommPkg **extend_comm_pkg)
1006 {
1007    /*-----------------------------------------------------------
1008     * setup commpkg
1009     *----------------------------------------------------------*/
1010    hypre_ParCSRCommPkg *new_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg, 1, HYPRE_MEMORY_HOST);
1011    *extend_comm_pkg = new_comm_pkg;
1012 
1013    hypre_assert(apart != NULL);
1014    hypre_ParCSRCommPkgCreateApart ( comm, indices, my_first, indices_len, global_num, apart,
1015                                     new_comm_pkg );
1016 
1017    return hypre_error_flag;
1018 }
1019 
1020 
1021 HYPRE_Int
hypre_BuildCSRMatrixMPIDataType(HYPRE_Int num_nonzeros,HYPRE_Int num_rows,HYPRE_Complex * a_data,HYPRE_Int * a_i,HYPRE_Int * a_j,hypre_MPI_Datatype * csr_matrix_datatype)1022 hypre_BuildCSRMatrixMPIDataType( HYPRE_Int num_nonzeros,
1023                                  HYPRE_Int num_rows,
1024                                  HYPRE_Complex *a_data,
1025                                  HYPRE_Int *a_i,
1026                                  HYPRE_Int *a_j,
1027                                  hypre_MPI_Datatype *csr_matrix_datatype )
1028 {
1029    HYPRE_Int            block_lens[3];
1030    hypre_MPI_Aint       displ[3];
1031    hypre_MPI_Datatype   types[3];
1032 
1033    block_lens[0] = num_nonzeros;
1034    block_lens[1] = num_rows+1;
1035    block_lens[2] = num_nonzeros;
1036 
1037    types[0] = HYPRE_MPI_COMPLEX;
1038    types[1] = HYPRE_MPI_INT;
1039    types[2] = HYPRE_MPI_INT;
1040 
1041    hypre_MPI_Address(a_data, &displ[0]);
1042    hypre_MPI_Address(a_i, &displ[1]);
1043    hypre_MPI_Address(a_j, &displ[2]);
1044    hypre_MPI_Type_struct(3,block_lens,displ,types,csr_matrix_datatype);
1045    hypre_MPI_Type_commit(csr_matrix_datatype);
1046 
1047    return hypre_error_flag;
1048 }
1049 
1050 HYPRE_Int
hypre_BuildCSRJDataType(HYPRE_Int num_nonzeros,HYPRE_Complex * a_data,HYPRE_Int * a_j,hypre_MPI_Datatype * csr_jdata_datatype)1051 hypre_BuildCSRJDataType( HYPRE_Int num_nonzeros,
1052                          HYPRE_Complex *a_data,
1053                          HYPRE_Int *a_j,
1054                          hypre_MPI_Datatype *csr_jdata_datatype )
1055 {
1056    HYPRE_Int          block_lens[2];
1057    hypre_MPI_Aint     displs[2];
1058    hypre_MPI_Datatype types[2];
1059 
1060    block_lens[0] = num_nonzeros;
1061    block_lens[1] = num_nonzeros;
1062 
1063    types[0] = HYPRE_MPI_COMPLEX;
1064    types[1] = HYPRE_MPI_INT;
1065 
1066    hypre_MPI_Address(a_data, &displs[0]);
1067    hypre_MPI_Address(a_j, &displs[1]);
1068 
1069    hypre_MPI_Type_struct(2,block_lens,displs,types,csr_jdata_datatype);
1070    hypre_MPI_Type_commit(csr_jdata_datatype);
1071 
1072    return hypre_error_flag;
1073 }
1074