1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 #include "_hypre_parcsr_ls.h"
9 
10 HYPRE_Int
hypre_GetCommPkgRTFromCommPkgA(hypre_ParCSRMatrix * RT,hypre_ParCSRMatrix * A,HYPRE_Int * fine_to_coarse,HYPRE_Int * tmp_map_offd)11 hypre_GetCommPkgRTFromCommPkgA( hypre_ParCSRMatrix *RT,
12                                 hypre_ParCSRMatrix *A,
13                                 HYPRE_Int *fine_to_coarse,
14                                 HYPRE_Int *tmp_map_offd)
15 {
16    MPI_Comm comm = hypre_ParCSRMatrixComm(RT);
17    hypre_ParCSRCommPkg *comm_pkg_A = hypre_ParCSRMatrixCommPkg(A);
18    hypre_ParCSRCommHandle *comm_handle;
19    HYPRE_Int num_recvs_A = hypre_ParCSRCommPkgNumRecvs(comm_pkg_A);
20    HYPRE_Int *recv_procs_A = hypre_ParCSRCommPkgRecvProcs(comm_pkg_A);
21    HYPRE_Int *recv_vec_starts_A = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_A);
22    HYPRE_Int num_sends_A = hypre_ParCSRCommPkgNumSends(comm_pkg_A);
23    HYPRE_Int *send_procs_A = hypre_ParCSRCommPkgSendProcs(comm_pkg_A);
24    HYPRE_Int *send_map_starts_A = hypre_ParCSRCommPkgSendMapStarts(comm_pkg_A);
25 
26    hypre_ParCSRCommPkg *comm_pkg;
27    HYPRE_Int num_recvs_RT;
28    HYPRE_Int *recv_procs_RT;
29    HYPRE_Int *recv_vec_starts_RT;
30    HYPRE_Int num_sends_RT;
31    HYPRE_Int *send_procs_RT;
32    HYPRE_Int *send_map_starts_RT;
33    HYPRE_Int *send_map_elmts_RT;
34 
35    HYPRE_BigInt *col_map_offd_RT = hypre_ParCSRMatrixColMapOffd(RT);
36    HYPRE_Int num_cols_offd_RT = hypre_CSRMatrixNumCols( hypre_ParCSRMatrixOffd(RT));
37    HYPRE_BigInt first_col_diag = hypre_ParCSRMatrixFirstColDiag(RT);
38    HYPRE_Int n_fine = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A));
39    HYPRE_Int num_cols_A_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A));
40    HYPRE_BigInt *fine_to_coarse_offd = NULL;
41    HYPRE_BigInt *big_buf_data = NULL;
42    HYPRE_BigInt *send_big_elmts = NULL;
43    HYPRE_BigInt my_first_cpt;
44 
45    HYPRE_Int i, j;
46    HYPRE_Int vec_len, vec_start;
47    HYPRE_Int num_procs, my_id;
48    HYPRE_Int ierr = 0;
49    HYPRE_Int num_requests;
50    HYPRE_Int offd_col, proc_num;
51    HYPRE_Int num_threads = hypre_NumThreads();
52    HYPRE_Int size, rest, ns, ne, start;
53    HYPRE_Int index;
54 
55    HYPRE_Int *proc_mark;
56    HYPRE_Int *change_array;
57    HYPRE_Int *coarse_counter;
58    HYPRE_Int coarse_shift;
59 
60    hypre_MPI_Request *requests;
61    hypre_MPI_Status *status;
62 
63    hypre_MPI_Comm_size(comm,&num_procs);
64    hypre_MPI_Comm_rank(comm,&my_id);
65 
66    /*--------------------------------------------------------------------------
67     * determine num_recvs, recv_procs and recv_vec_starts for RT
68     *--------------------------------------------------------------------------*/
69 
70    proc_mark = hypre_CTAlloc(HYPRE_Int,  num_recvs_A, HYPRE_MEMORY_HOST);
71 
72    for (i=0; i < num_recvs_A; i++)
73       proc_mark[i] = 0;
74 
75    proc_num = 0;
76    num_recvs_RT = 0;
77    if (num_cols_offd_RT)
78    {
79       for (i=0; i < num_recvs_A; i++)
80       {
81          for (j=recv_vec_starts_A[i]; j<recv_vec_starts_A[i+1]; j++)
82          {
83             offd_col = tmp_map_offd[proc_num];
84             if (offd_col == j)
85             {
86                proc_mark[i]++;
87                proc_num++;
88                if (proc_num == num_cols_offd_RT) break;
89             }
90          }
91          if (proc_mark[i]) num_recvs_RT++;
92          if (proc_num == num_cols_offd_RT) break;
93       }
94    }
95 
96    fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, num_cols_A_offd, HYPRE_MEMORY_HOST);
97    big_buf_data = hypre_CTAlloc(HYPRE_BigInt, send_map_starts_A[num_sends_A], HYPRE_MEMORY_HOST);
98    coarse_counter = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
99 
100    my_first_cpt = hypre_ParCSRMatrixColStarts(RT)[0];
101 
102 #ifdef HYPRE_USING_OPENMP
103 #pragma omp parallel for private(i,j,ns,ne,size,rest,coarse_shift) HYPRE_SMP_SCHEDULE
104 #endif
105    for (j = 0; j < num_threads; j++)
106    {
107       coarse_shift = 0;
108       if (j > 0) coarse_shift = coarse_counter[j-1];
109       size = n_fine/num_threads;
110       rest = n_fine - size*num_threads;
111       if (j < rest)
112       {
113          ns = j*size+j;
114          ne = (j+1)*size+j+1;
115       }
116       else
117       {
118          ns = j*size+rest;
119          ne = (j+1)*size+rest;
120       }
121       for (i = ns; i < ne; i++)
122          fine_to_coarse[i] += coarse_shift;
123    }
124 
125    index = 0;
126    for (i = 0; i < num_sends_A; i++)
127    {
128       start = hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i);
129       for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i+1); j++)
130          big_buf_data[index++] = my_first_cpt+
131             (HYPRE_BigInt)fine_to_coarse[hypre_ParCSRCommPkgSendMapElmt(comm_pkg_A,j)];
132    }
133 
134    comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg_A, big_buf_data,
135          fine_to_coarse_offd);
136 
137    hypre_ParCSRCommHandleDestroy(comm_handle);
138 
139    for (i=0; i < num_cols_offd_RT; i++)
140       col_map_offd_RT[i] = fine_to_coarse_offd[tmp_map_offd[i]];
141 
142    hypre_TFree(big_buf_data, HYPRE_MEMORY_HOST);
143    hypre_TFree(fine_to_coarse_offd, HYPRE_MEMORY_HOST);
144    hypre_TFree(coarse_counter, HYPRE_MEMORY_HOST);
145    //hypre_TFree(tmp_map_offd, HYPRE_MEMORY_HOST);
146 
147    recv_procs_RT = hypre_CTAlloc(HYPRE_Int, num_recvs_RT, HYPRE_MEMORY_HOST);
148    recv_vec_starts_RT = hypre_CTAlloc(HYPRE_Int,  num_recvs_RT+1, HYPRE_MEMORY_HOST);
149 
150    j = 0;
151    recv_vec_starts_RT[0] = 0;
152    for (i=0; i < num_recvs_A; i++)
153    {
154       if (proc_mark[i])
155       {
156          recv_procs_RT[j] = recv_procs_A[i];
157          recv_vec_starts_RT[j+1] = recv_vec_starts_RT[j]+proc_mark[i];
158          j++;
159       }
160    }
161 
162    /*--------------------------------------------------------------------------
163     * send num_changes to recv_procs_A and receive change_array from send_procs_A
164     *--------------------------------------------------------------------------*/
165 
166    num_requests = num_recvs_A+num_sends_A;
167    requests = hypre_CTAlloc(hypre_MPI_Request,  num_requests, HYPRE_MEMORY_HOST);
168    status = hypre_CTAlloc(hypre_MPI_Status,  num_requests, HYPRE_MEMORY_HOST);
169 
170    change_array = hypre_CTAlloc(HYPRE_Int,  num_sends_A, HYPRE_MEMORY_HOST);
171 
172    j = 0;
173    for (i=0; i < num_sends_A; i++)
174       hypre_MPI_Irecv(&change_array[i],1,HYPRE_MPI_INT,send_procs_A[i],0,comm,
175             &requests[j++]);
176 
177    for (i=0; i < num_recvs_A; i++)
178       hypre_MPI_Isend(&proc_mark[i],1,HYPRE_MPI_INT,recv_procs_A[i],0,comm,
179             &requests[j++]);
180 
181    hypre_MPI_Waitall(num_requests,requests,status);
182 
183    hypre_TFree(proc_mark, HYPRE_MEMORY_HOST);
184 
185    /*--------------------------------------------------------------------------
186     * if change_array[i] is 0 , omit send_procs_A[i] in send_procs_RT
187     *--------------------------------------------------------------------------*/
188 
189    num_sends_RT = 0;
190    for (i=0; i < num_sends_A; i++)
191       if (change_array[i])
192       {
193          num_sends_RT++;
194       }
195 
196    send_procs_RT = hypre_CTAlloc(HYPRE_Int,  num_sends_RT, HYPRE_MEMORY_HOST);
197    send_map_starts_RT = hypre_CTAlloc(HYPRE_Int,  num_sends_RT+1, HYPRE_MEMORY_HOST);
198 
199    j = 0;
200    send_map_starts_RT[0] = 0;
201    for (i=0; i < num_sends_A; i++)
202    {
203       if (change_array[i])
204       {
205          send_procs_RT[j] = send_procs_A[i];
206          send_map_starts_RT[j+1] = send_map_starts_RT[j]+change_array[i];
207          j++;
208       }
209    }
210 
211    /*--------------------------------------------------------------------------
212     * generate send_map_elmts
213     *--------------------------------------------------------------------------*/
214 
215    send_map_elmts_RT = hypre_CTAlloc(HYPRE_Int, send_map_starts_RT[num_sends_RT], HYPRE_MEMORY_HOST);
216    send_big_elmts = hypre_CTAlloc(HYPRE_BigInt, send_map_starts_RT[num_sends_RT], HYPRE_MEMORY_HOST);
217 
218    j = 0;
219    for (i=0; i < num_sends_RT; i++)
220    {
221       vec_start = send_map_starts_RT[i];
222       vec_len = send_map_starts_RT[i+1]-vec_start;
223       hypre_MPI_Irecv(&send_big_elmts[vec_start],vec_len,HYPRE_MPI_BIG_INT,
224             send_procs_RT[i],0,comm,&requests[j++]);
225    }
226 
227    for (i=0; i < num_recvs_RT; i++)
228    {
229       vec_start = recv_vec_starts_RT[i];
230       vec_len = recv_vec_starts_RT[i+1] - vec_start;
231       hypre_MPI_Isend(&col_map_offd_RT[vec_start],vec_len,HYPRE_MPI_BIG_INT,
232             recv_procs_RT[i],0,comm,&requests[j++]);
233    }
234 
235    hypre_MPI_Waitall(j,requests,status);
236 
237    for (i=0; i < send_map_starts_RT[num_sends_RT]; i++)
238       send_map_elmts_RT[i] = (HYPRE_Int)(send_big_elmts[i]-first_col_diag);
239 
240    comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg, 1, HYPRE_MEMORY_HOST);
241 
242    hypre_ParCSRCommPkgComm(comm_pkg) = comm;
243    hypre_ParCSRCommPkgNumSends(comm_pkg) = num_sends_RT;
244    hypre_ParCSRCommPkgNumRecvs(comm_pkg) = num_recvs_RT;
245    hypre_ParCSRCommPkgSendProcs(comm_pkg) = send_procs_RT;
246    hypre_ParCSRCommPkgRecvProcs(comm_pkg) = recv_procs_RT;
247    hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) = recv_vec_starts_RT;
248    hypre_ParCSRCommPkgSendMapStarts(comm_pkg) = send_map_starts_RT;
249    hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = send_map_elmts_RT;
250 
251    hypre_TFree(status, HYPRE_MEMORY_HOST);
252    hypre_TFree(requests, HYPRE_MEMORY_HOST);
253    hypre_TFree(send_big_elmts, HYPRE_MEMORY_HOST);
254 
255    hypre_ParCSRMatrixCommPkg(RT) = comm_pkg;
256    hypre_TFree(change_array, HYPRE_MEMORY_HOST);
257 
258    return ierr;
259 }
260 
261 HYPRE_Int
hypre_GenerateSendMapAndCommPkg(MPI_Comm comm,HYPRE_Int num_sends,HYPRE_Int num_recvs,HYPRE_Int * recv_procs,HYPRE_Int * send_procs,HYPRE_Int * recv_vec_starts,hypre_ParCSRMatrix * A)262 hypre_GenerateSendMapAndCommPkg(MPI_Comm comm, HYPRE_Int num_sends, HYPRE_Int num_recvs,
263                                 HYPRE_Int *recv_procs, HYPRE_Int *send_procs,
264                                 HYPRE_Int *recv_vec_starts, hypre_ParCSRMatrix *A)
265 {
266    HYPRE_Int *send_map_starts;
267    HYPRE_Int *send_map_elmts;
268    HYPRE_Int i, j;
269    HYPRE_Int num_requests = num_sends+num_recvs;
270    hypre_MPI_Request *requests;
271    hypre_MPI_Status *status;
272    HYPRE_Int vec_len, vec_start;
273    hypre_ParCSRCommPkg *comm_pkg;
274    HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
275    HYPRE_BigInt first_col_diag = hypre_ParCSRMatrixFirstColDiag(A);
276    HYPRE_BigInt *send_big_elmts = NULL;
277 
278    /*--------------------------------------------------------------------------
279     * generate send_map_starts and send_map_elmts
280     *--------------------------------------------------------------------------*/
281    requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST);
282    status = hypre_CTAlloc(hypre_MPI_Status, num_requests, HYPRE_MEMORY_HOST);
283    send_map_starts = hypre_CTAlloc(HYPRE_Int,  num_sends+1, HYPRE_MEMORY_HOST);
284    j = 0;
285    for (i=0; i < num_sends; i++)
286       hypre_MPI_Irecv(&send_map_starts[i+1],1,HYPRE_MPI_INT,send_procs[i],0,comm,
287             &requests[j++]);
288 
289    for (i=0; i < num_recvs; i++)
290    {
291       vec_len = recv_vec_starts[i+1] - recv_vec_starts[i];
292       hypre_MPI_Isend(&vec_len,1,HYPRE_MPI_INT, recv_procs[i],0,comm,&requests[j++]);
293    }
294 
295    hypre_MPI_Waitall(j,requests,status);
296 
297    send_map_starts[0] = 0;
298    for (i=0; i < num_sends; i++)
299       send_map_starts[i+1] += send_map_starts[i];
300 
301    send_map_elmts = hypre_CTAlloc(HYPRE_Int, send_map_starts[num_sends], HYPRE_MEMORY_HOST);
302    send_big_elmts = hypre_CTAlloc(HYPRE_BigInt, send_map_starts[num_sends], HYPRE_MEMORY_HOST);
303 
304    j = 0;
305    for (i=0; i < num_sends; i++)
306    {
307       vec_start = send_map_starts[i];
308       vec_len = send_map_starts[i+1]-vec_start;
309       hypre_MPI_Irecv(&send_big_elmts[vec_start],vec_len,HYPRE_MPI_BIG_INT,
310             send_procs[i],0,comm,&requests[j++]);
311    }
312 
313    for (i=0; i < num_recvs; i++)
314    {
315       vec_start = recv_vec_starts[i];
316       vec_len = recv_vec_starts[i+1] - vec_start;
317       hypre_MPI_Isend(&col_map_offd[vec_start],vec_len,HYPRE_MPI_BIG_INT,
318             recv_procs[i],0,comm,&requests[j++]);
319    }
320 
321    hypre_MPI_Waitall(j,requests,status);
322 
323    for (i=0; i < send_map_starts[num_sends]; i++)
324       send_map_elmts[i] = (HYPRE_Int)(send_big_elmts[i]-first_col_diag);
325 
326    comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg, 1, HYPRE_MEMORY_HOST);
327 
328    hypre_ParCSRCommPkgComm(comm_pkg) = comm;
329    hypre_ParCSRCommPkgNumSends(comm_pkg) = num_sends;
330    hypre_ParCSRCommPkgNumRecvs(comm_pkg) = num_recvs;
331    hypre_ParCSRCommPkgSendProcs(comm_pkg) = send_procs;
332    hypre_ParCSRCommPkgRecvProcs(comm_pkg) = recv_procs;
333    hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) = recv_vec_starts;
334    hypre_ParCSRCommPkgSendMapStarts(comm_pkg) = send_map_starts;
335    hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = send_map_elmts;
336 
337    hypre_TFree(status, HYPRE_MEMORY_HOST);
338    hypre_TFree(requests, HYPRE_MEMORY_HOST);
339    hypre_TFree(send_big_elmts, HYPRE_MEMORY_HOST);
340 
341    hypre_ParCSRMatrixCommPkg(A) = comm_pkg;
342    return 0;
343 }
344