1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 #include "_hypre_parcsr_mv.h"
9 
10 #include "_hypre_utilities.h"
11 #include "../parcsr_mv/_hypre_parcsr_mv.h"
12 
13 /*--------------------------------------------------------------------------
14  * hypre_ParCSRMatMat : multiplies two ParCSRMatrices A and B and returns
15  * the product in ParCSRMatrix C
16  * Note that C does not own the partitionings since its row_starts
17  * is owned by A and col_starts by B.
18  *--------------------------------------------------------------------------*/
19 
20 hypre_ParCSRMatrix*
hypre_ParCSRMatMatHost(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * B)21 hypre_ParCSRMatMatHost( hypre_ParCSRMatrix  *A,
22                         hypre_ParCSRMatrix  *B )
23 {
24    MPI_Comm         comm = hypre_ParCSRMatrixComm(A);
25 
26    hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
27 
28    hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
29 
30    HYPRE_BigInt    *row_starts_A = hypre_ParCSRMatrixRowStarts(A);
31    HYPRE_Int        num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag);
32    HYPRE_Int        num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
33 
34    hypre_CSRMatrix *B_diag = hypre_ParCSRMatrixDiag(B);
35 
36    hypre_CSRMatrix *B_offd = hypre_ParCSRMatrixOffd(B);
37    HYPRE_BigInt    *col_map_offd_B = hypre_ParCSRMatrixColMapOffd(B);
38 
39    HYPRE_BigInt     first_col_diag_B = hypre_ParCSRMatrixFirstColDiag(B);
40    HYPRE_BigInt     last_col_diag_B;
41    HYPRE_BigInt    *col_starts_B = hypre_ParCSRMatrixColStarts(B);
42    HYPRE_Int        num_rows_diag_B = hypre_CSRMatrixNumRows(B_diag);
43    HYPRE_Int        num_cols_diag_B = hypre_CSRMatrixNumCols(B_diag);
44    HYPRE_Int        num_cols_offd_B = hypre_CSRMatrixNumCols(B_offd);
45 
46    hypre_ParCSRMatrix *C;
47    HYPRE_BigInt    *col_map_offd_C = NULL;
48    HYPRE_Int       *map_B_to_C=NULL;
49 
50    hypre_CSRMatrix *C_diag = NULL;
51 
52    hypre_CSRMatrix *C_offd = NULL;
53 
54    HYPRE_Int        num_cols_offd_C = 0;
55 
56    hypre_CSRMatrix *Bs_ext;
57 
58    hypre_CSRMatrix *Bext_diag;
59 
60    hypre_CSRMatrix *Bext_offd;
61 
62    hypre_CSRMatrix *AB_diag;
63    hypre_CSRMatrix *AB_offd;
64    HYPRE_Int        AB_offd_num_nonzeros;
65    HYPRE_Int       *AB_offd_j;
66    hypre_CSRMatrix *ABext_diag;
67    hypre_CSRMatrix *ABext_offd;
68 
69    HYPRE_BigInt     n_rows_A, n_cols_A;
70    HYPRE_BigInt     n_rows_B, n_cols_B;
71    HYPRE_Int        cnt, i;
72    HYPRE_Int        num_procs;
73    HYPRE_Int        my_id;
74 
75    n_rows_A = hypre_ParCSRMatrixGlobalNumRows(A);
76    n_cols_A = hypre_ParCSRMatrixGlobalNumCols(A);
77    n_rows_B = hypre_ParCSRMatrixGlobalNumRows(B);
78    n_cols_B = hypre_ParCSRMatrixGlobalNumCols(B);
79 
80    if (n_cols_A != n_rows_B || num_cols_diag_A != num_rows_diag_B)
81    {
82       hypre_error_in_arg(1);
83       hypre_printf(" Error! Incompatible matrix dimensions!\n");
84       return NULL;
85    }
86 
87    /*-----------------------------------------------------------------------
88     *  Extract B_ext, i.e. portion of B that is stored on neighbor procs
89     *  and needed locally for matrix matrix product
90     *-----------------------------------------------------------------------*/
91 
92    hypre_MPI_Comm_size(comm, &num_procs);
93    hypre_MPI_Comm_rank(comm, &my_id);
94    last_col_diag_B = first_col_diag_B + num_cols_diag_B -1;
95 
96    if (num_procs > 1)
97    {
98       /*---------------------------------------------------------------------
99        * If there exists no CommPkg for A, a CommPkg is generated using
100        * equally load balanced partitionings within
101        * hypre_ParCSRMatrixExtractBExt
102        *--------------------------------------------------------------------*/
103       Bs_ext = hypre_ParCSRMatrixExtractBExt(B, A, 1); /* contains communication
104                                                           which should be explicitly included to allow for overlap */
105 
106 
107       hypre_CSRMatrixSplit(Bs_ext, first_col_diag_B, last_col_diag_B, num_cols_offd_B, col_map_offd_B,
108                            &num_cols_offd_C, &col_map_offd_C, &Bext_diag, &Bext_offd);
109 
110       hypre_CSRMatrixDestroy(Bs_ext);
111 
112       /* These are local and could be overlapped with communication */
113       AB_diag = hypre_CSRMatrixMultiplyHost(A_diag, B_diag);
114       AB_offd = hypre_CSRMatrixMultiplyHost(A_diag, B_offd);
115 
116       /* These require data from other processes */
117       ABext_diag = hypre_CSRMatrixMultiplyHost(A_offd, Bext_diag);
118       ABext_offd = hypre_CSRMatrixMultiplyHost(A_offd, Bext_offd);
119 
120       hypre_CSRMatrixDestroy(Bext_diag);
121       hypre_CSRMatrixDestroy(Bext_offd);
122 
123       if (num_cols_offd_B)
124       {
125          HYPRE_Int i;
126          map_B_to_C = hypre_CTAlloc(HYPRE_Int, num_cols_offd_B, HYPRE_MEMORY_HOST);
127 
128          cnt = 0;
129          for (i = 0; i < num_cols_offd_C; i++)
130          {
131             if (col_map_offd_C[i] == col_map_offd_B[cnt])
132             {
133                map_B_to_C[cnt++] = i;
134                if (cnt == num_cols_offd_B)
135                {
136                   break;
137                }
138             }
139          }
140       }
141       AB_offd_num_nonzeros = hypre_CSRMatrixNumNonzeros(AB_offd);
142       AB_offd_j = hypre_CSRMatrixJ(AB_offd);
143       for (i=0; i < AB_offd_num_nonzeros; i++)
144       {
145          AB_offd_j[i] = map_B_to_C[AB_offd_j[i]];
146       }
147 
148       if (num_cols_offd_B)
149       {
150          hypre_TFree(map_B_to_C, HYPRE_MEMORY_HOST);
151       }
152 
153       hypre_CSRMatrixNumCols(AB_diag) = num_cols_diag_B;
154       hypre_CSRMatrixNumCols(ABext_diag) = num_cols_diag_B;
155       hypre_CSRMatrixNumCols(AB_offd) = num_cols_offd_C;
156       hypre_CSRMatrixNumCols(ABext_offd) = num_cols_offd_C;
157       C_diag = hypre_CSRMatrixAdd(1.0, AB_diag, 1.0, ABext_diag);
158       C_offd = hypre_CSRMatrixAdd(1.0, AB_offd, 1.0, ABext_offd);
159 
160       hypre_CSRMatrixDestroy(AB_diag);
161       hypre_CSRMatrixDestroy(ABext_diag);
162       hypre_CSRMatrixDestroy(AB_offd);
163       hypre_CSRMatrixDestroy(ABext_offd);
164    }
165    else
166    {
167       C_diag = hypre_CSRMatrixMultiplyHost(A_diag, B_diag);
168       C_offd = hypre_CSRMatrixCreate(num_rows_diag_A, 0, 0);
169       hypre_CSRMatrixInitialize_v2(C_offd, 0, hypre_CSRMatrixMemoryLocation(C_diag));
170    }
171 
172    /*-----------------------------------------------------------------------
173     *  Allocate C_diag_data and C_diag_j arrays.
174     *  Allocate C_offd_data and C_offd_j arrays.
175     *-----------------------------------------------------------------------*/
176 
177    C = hypre_ParCSRMatrixCreate(comm, n_rows_A, n_cols_B, row_starts_A,
178                                 col_starts_B, num_cols_offd_C,
179                                 C_diag->num_nonzeros, C_offd->num_nonzeros);
180 
181    hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(C));
182    hypre_ParCSRMatrixDiag(C) = C_diag;
183 
184    hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(C));
185    hypre_ParCSRMatrixOffd(C) = C_offd;
186 
187    if (num_cols_offd_C)
188    {
189       hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;
190    }
191 
192    return C;
193 }
194 
195 hypre_ParCSRMatrix*
hypre_ParCSRMatMat(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * B)196 hypre_ParCSRMatMat( hypre_ParCSRMatrix  *A,
197                     hypre_ParCSRMatrix  *B )
198 {
199 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
200    hypre_GpuProfilingPushRange("Mat-Mat");
201 #endif
202 
203    hypre_ParCSRMatrix *C = NULL;
204 
205 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
206    HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy2( hypre_ParCSRMatrixMemoryLocation(A),
207                                                       hypre_ParCSRMatrixMemoryLocation(B) );
208 
209    if (exec == HYPRE_EXEC_DEVICE)
210    {
211       C = hypre_ParCSRMatMatDevice(A,B);
212    }
213    else
214 #endif
215    {
216       C = hypre_ParCSRMatMatHost(A,B);
217    }
218 
219 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
220    hypre_GpuProfilingPopRange();
221 #endif
222 
223    return C;
224 }
225 
226 /*--------------------------------------------------------------------------
227  * hypre_ParCSRTMatMatKT : multiplies two ParCSRMatrices transpose(A) and B and returns
228  * the product in ParCSRMatrix C
229  * Note that C does not own the partitionings since its row_starts
230  * is owned by A and col_starts by B.
231  *--------------------------------------------------------------------------*/
232 
233 hypre_ParCSRMatrix*
hypre_ParCSRTMatMatKTHost(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * B,HYPRE_Int keep_transpose)234 hypre_ParCSRTMatMatKTHost( hypre_ParCSRMatrix  *A,
235                            hypre_ParCSRMatrix  *B,
236                            HYPRE_Int            keep_transpose)
237 {
238    MPI_Comm             comm       = hypre_ParCSRMatrixComm(A);
239    hypre_ParCSRCommPkg *comm_pkg_A = NULL;
240 
241    hypre_CSRMatrix *A_diag  = hypre_ParCSRMatrixDiag(A);
242    hypre_CSRMatrix *A_offd  = hypre_ParCSRMatrixOffd(A);
243    hypre_CSRMatrix *B_diag  = hypre_ParCSRMatrixDiag(B);
244    hypre_CSRMatrix *B_offd  = hypre_ParCSRMatrixOffd(B);
245    hypre_CSRMatrix *AT_diag = NULL;
246 
247    HYPRE_Int num_rows_diag_A  = hypre_CSRMatrixNumRows(A_diag);
248    HYPRE_Int num_cols_diag_A  = hypre_CSRMatrixNumCols(A_diag);
249    HYPRE_Int num_rows_diag_B  = hypre_CSRMatrixNumRows(B_diag);
250    HYPRE_Int num_cols_diag_B  = hypre_CSRMatrixNumCols(B_diag);
251    HYPRE_Int num_cols_offd_B  = hypre_CSRMatrixNumCols(B_offd);
252    HYPRE_BigInt first_col_diag_B = hypre_ParCSRMatrixFirstColDiag(B);
253 
254    HYPRE_BigInt *col_map_offd_B = hypre_ParCSRMatrixColMapOffd(B);
255 
256    HYPRE_BigInt *col_starts_A = hypre_ParCSRMatrixColStarts(A);
257    HYPRE_BigInt *col_starts_B = hypre_ParCSRMatrixColStarts(B);
258 
259    hypre_ParCSRMatrix *C;
260    hypre_CSRMatrix *C_diag = NULL;
261    hypre_CSRMatrix *C_offd = NULL;
262 
263    HYPRE_BigInt *col_map_offd_C = NULL;
264    HYPRE_Int *map_B_to_C;
265    HYPRE_BigInt  first_col_diag_C;
266    HYPRE_BigInt  last_col_diag_C;
267    HYPRE_Int  num_cols_offd_C = 0;
268 
269    HYPRE_BigInt n_rows_A, n_cols_A;
270    HYPRE_BigInt n_rows_B, n_cols_B;
271    HYPRE_Int j_indx, cnt;
272    HYPRE_Int num_procs, my_id;
273 
274    n_rows_A = hypre_ParCSRMatrixGlobalNumRows(A);
275    n_cols_A = hypre_ParCSRMatrixGlobalNumCols(A);
276    n_rows_B = hypre_ParCSRMatrixGlobalNumRows(B);
277    n_cols_B = hypre_ParCSRMatrixGlobalNumCols(B);
278 
279    hypre_MPI_Comm_size(comm,&num_procs);
280    hypre_MPI_Comm_rank(comm, &my_id);
281 
282    if (n_rows_A != n_rows_B || num_rows_diag_A != num_rows_diag_B)
283    {
284       hypre_error_w_msg(HYPRE_ERROR_GENERIC," Error! Incompatible matrix dimensions!\n");
285       return NULL;
286    }
287 
288    /*if (num_cols_diag_A == num_cols_diag_B) allsquare = 1;*/
289 
290    hypre_CSRMatrixTranspose(A_diag, &AT_diag, 1);
291 
292    if (num_procs == 1)
293    {
294       C_diag = hypre_CSRMatrixMultiplyHost(AT_diag, B_diag);
295       C_offd = hypre_CSRMatrixCreate(num_cols_diag_A, 0, 0);
296       hypre_CSRMatrixInitialize_v2(C_offd, 0, hypre_CSRMatrixMemoryLocation(C_diag));
297       hypre_CSRMatrixNumRownnz(C_offd) = 0;
298       if (keep_transpose)
299       {
300          A->diagT = AT_diag;
301       }
302       else
303       {
304          hypre_CSRMatrixDestroy(AT_diag);
305       }
306    }
307    else
308    {
309       hypre_CSRMatrix *AT_offd    = NULL;
310       hypre_CSRMatrix *C_tmp_diag = NULL;
311       hypre_CSRMatrix *C_tmp_offd = NULL;
312       hypre_CSRMatrix *C_int      = NULL;
313       hypre_CSRMatrix *C_ext      = NULL;
314       hypre_CSRMatrix *C_ext_diag = NULL;
315       hypre_CSRMatrix *C_ext_offd = NULL;
316       hypre_CSRMatrix *C_int_diag = NULL;
317       hypre_CSRMatrix *C_int_offd = NULL;
318 
319       HYPRE_Int  i;
320       HYPRE_Int *C_tmp_offd_i;
321       HYPRE_Int *C_tmp_offd_j;
322       HYPRE_Int *send_map_elmts_A;
323       void      *request;
324 
325       hypre_CSRMatrixTranspose(A_offd, &AT_offd, 1);
326 
327       C_int_diag = hypre_CSRMatrixMultiplyHost(AT_offd, B_diag);
328       C_int_offd = hypre_CSRMatrixMultiplyHost(AT_offd, B_offd);
329 
330       hypre_ParCSRMatrixDiag(B) = C_int_diag;
331       hypre_ParCSRMatrixOffd(B) = C_int_offd;
332 
333       C_int = hypre_MergeDiagAndOffd(B);
334 
335       hypre_ParCSRMatrixDiag(B) = B_diag;
336       hypre_ParCSRMatrixOffd(B) = B_offd;
337 
338       if (!hypre_ParCSRMatrixCommPkg(A))
339       {
340          hypre_MatvecCommPkgCreate(A);
341       }
342       comm_pkg_A = hypre_ParCSRMatrixCommPkg(A);
343 
344       /* contains communication; should be explicitly included to allow for overlap */
345       hypre_ExchangeExternalRowsInit(C_int, comm_pkg_A, &request);
346       C_ext = hypre_ExchangeExternalRowsWait(request);
347 
348       hypre_CSRMatrixDestroy(C_int);
349       hypre_CSRMatrixDestroy(C_int_diag);
350       hypre_CSRMatrixDestroy(C_int_offd);
351 
352       C_tmp_diag = hypre_CSRMatrixMultiplyHost(AT_diag, B_diag);
353       C_tmp_offd = hypre_CSRMatrixMultiplyHost(AT_diag, B_offd);
354 
355       if (keep_transpose)
356       {
357         A->diagT = AT_diag;
358       }
359       else
360       {
361         hypre_CSRMatrixDestroy(AT_diag);
362       }
363 
364       if (keep_transpose)
365       {
366         A->offdT = AT_offd;
367       }
368       else
369       {
370         hypre_CSRMatrixDestroy(AT_offd);
371       }
372 
373       /*-----------------------------------------------------------------------
374        *  Add contents of C_ext to C_tmp_diag and C_tmp_offd
375        *  to obtain C_diag and C_offd
376        *-----------------------------------------------------------------------*/
377 
378       /* split C_ext in local C_ext_diag and nonlocal part C_ext_offd,
379          also generate new col_map_offd and adjust column indices accordingly */
380       first_col_diag_C = first_col_diag_B;
381       last_col_diag_C = first_col_diag_B + num_cols_diag_B - 1;
382 
383       if (C_ext)
384       {
385          hypre_CSRMatrixSplit(C_ext, first_col_diag_C, last_col_diag_C,
386                               num_cols_offd_B, col_map_offd_B, &num_cols_offd_C, &col_map_offd_C,
387                               &C_ext_diag, &C_ext_offd);
388 
389          hypre_CSRMatrixDestroy(C_ext);
390          C_ext = NULL;
391       }
392 
393       C_tmp_offd_i = hypre_CSRMatrixI(C_tmp_offd);
394       C_tmp_offd_j = hypre_CSRMatrixJ(C_tmp_offd);
395 
396       if (num_cols_offd_B)
397       {
398          map_B_to_C = hypre_CTAlloc(HYPRE_Int, num_cols_offd_B, HYPRE_MEMORY_HOST);
399 
400          cnt = 0;
401          for (i=0; i < num_cols_offd_C; i++)
402          {
403             if (col_map_offd_C[i] == col_map_offd_B[cnt])
404             {
405                map_B_to_C[cnt++] = i;
406                if (cnt == num_cols_offd_B)
407                {
408                   break;
409                }
410             }
411          }
412          for (i=0; i < C_tmp_offd_i[hypre_CSRMatrixNumRows(C_tmp_offd)]; i++)
413          {
414             j_indx = C_tmp_offd_j[i];
415             C_tmp_offd_j[i] = map_B_to_C[j_indx];
416          }
417          hypre_TFree(map_B_to_C, HYPRE_MEMORY_HOST);
418       }
419 
420       /*-----------------------------------------------------------------------
421        *  Need to compute C_diag = C_tmp_diag + C_ext_diag
422        *  and  C_offd = C_tmp_offd + C_ext_offd   !!!!
423        *-----------------------------------------------------------------------*/
424       send_map_elmts_A = hypre_ParCSRCommPkgSendMapElmts(comm_pkg_A);
425       C_diag = hypre_CSRMatrixAddPartial(C_tmp_diag, C_ext_diag, send_map_elmts_A);
426       hypre_CSRMatrixNumCols(C_tmp_offd) = num_cols_offd_C;
427       C_offd = hypre_CSRMatrixAddPartial(C_tmp_offd, C_ext_offd, send_map_elmts_A);
428 
429       hypre_CSRMatrixDestroy(C_tmp_diag);
430       hypre_CSRMatrixDestroy(C_tmp_offd);
431       hypre_CSRMatrixDestroy(C_ext_diag);
432       hypre_CSRMatrixDestroy(C_ext_offd);
433    }
434 
435    C = hypre_ParCSRMatrixCreate(comm, n_cols_A, n_cols_B, col_starts_A, col_starts_B,
436                                 num_cols_offd_C, C_diag->num_nonzeros, C_offd->num_nonzeros);
437 
438    hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(C));
439    hypre_ParCSRMatrixDiag(C) = C_diag;
440 
441    hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(C));
442    hypre_ParCSRMatrixOffd(C) = C_offd;
443 
444    hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;
445 
446    return C;
447 }
448 
449 hypre_ParCSRMatrix*
hypre_ParCSRTMatMatKT(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * B,HYPRE_Int keep_transpose)450 hypre_ParCSRTMatMatKT( hypre_ParCSRMatrix  *A,
451                        hypre_ParCSRMatrix  *B,
452                        HYPRE_Int            keep_transpose)
453 {
454 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
455    hypre_GpuProfilingPushRange("Mat-T-Mat");
456 #endif
457 
458    hypre_ParCSRMatrix *C = NULL;
459 
460 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
461    HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy2( hypre_ParCSRMatrixMemoryLocation(A),
462                                                       hypre_ParCSRMatrixMemoryLocation(B) );
463 
464    if (exec == HYPRE_EXEC_DEVICE)
465    {
466       C = hypre_ParCSRTMatMatKTDevice(A, B, keep_transpose);
467    }
468    else
469 #endif
470    {
471       C = hypre_ParCSRTMatMatKTHost(A, B, keep_transpose);
472    }
473 
474 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
475    hypre_GpuProfilingPopRange();
476 #endif
477 
478    return C;
479 }
480 
481 hypre_ParCSRMatrix*
hypre_ParCSRTMatMat(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * B)482 hypre_ParCSRTMatMat( hypre_ParCSRMatrix  *A,
483                      hypre_ParCSRMatrix  *B)
484 {
485    return hypre_ParCSRTMatMatKT( A, B, 0);
486 }
487 
488 hypre_ParCSRMatrix*
hypre_ParCSRMatrixRAPKTHost(hypre_ParCSRMatrix * R,hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * P,HYPRE_Int keep_transpose)489 hypre_ParCSRMatrixRAPKTHost( hypre_ParCSRMatrix *R,
490                              hypre_ParCSRMatrix *A,
491                              hypre_ParCSRMatrix *P,
492                              HYPRE_Int           keep_transpose )
493 {
494    MPI_Comm         comm = hypre_ParCSRMatrixComm(A);
495 
496    hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
497 
498    hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
499 
500    HYPRE_BigInt    *row_starts_A = hypre_ParCSRMatrixRowStarts(A);
501    HYPRE_Int        num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
502    HYPRE_Int        num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag);
503    HYPRE_Int        num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
504 
505    hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
506 
507    hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
508    HYPRE_BigInt    *col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P);
509 
510    HYPRE_BigInt     first_col_diag_P = hypre_ParCSRMatrixFirstColDiag(P);
511    HYPRE_BigInt    *col_starts_P = hypre_ParCSRMatrixColStarts(P);
512    HYPRE_Int        num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag);
513    HYPRE_Int        num_cols_diag_P = hypre_CSRMatrixNumCols(P_diag);
514    HYPRE_Int        num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd);
515 
516    hypre_ParCSRMatrix *Q;
517    HYPRE_BigInt       *col_map_offd_Q = NULL;
518    HYPRE_Int          *map_P_to_Q=NULL;
519 
520    hypre_CSRMatrix *Q_diag = NULL;
521 
522    hypre_CSRMatrix *Q_offd = NULL;
523 
524    HYPRE_Int        num_cols_offd_Q = 0;
525 
526    hypre_CSRMatrix *Ps_ext;
527 
528    hypre_CSRMatrix *Pext_diag;
529 
530    hypre_CSRMatrix *Pext_offd;
531 
532    hypre_CSRMatrix *AP_diag;
533    hypre_CSRMatrix *AP_offd;
534    HYPRE_Int        AP_offd_num_nonzeros;
535    HYPRE_Int       *AP_offd_j;
536    hypre_CSRMatrix *APext_diag;
537    hypre_CSRMatrix *APext_offd;
538 
539    hypre_ParCSRCommPkg *comm_pkg_R = hypre_ParCSRMatrixCommPkg(R);
540 
541    hypre_CSRMatrix *R_diag = hypre_ParCSRMatrixDiag(R);
542    hypre_CSRMatrix *RT_diag = NULL;
543 
544    hypre_CSRMatrix *R_offd = hypre_ParCSRMatrixOffd(R);
545 
546    HYPRE_Int    num_rows_diag_R = hypre_CSRMatrixNumRows(R_diag);
547    HYPRE_Int    num_cols_diag_R = hypre_CSRMatrixNumCols(R_diag);
548    HYPRE_Int    num_cols_offd_R = hypre_CSRMatrixNumCols(R_offd);
549 
550    HYPRE_BigInt *col_starts_R = hypre_ParCSRMatrixColStarts(R);
551 
552    hypre_ParCSRMatrix *C;
553    HYPRE_BigInt       *col_map_offd_C = NULL;
554    HYPRE_Int          *map_Q_to_C;
555 
556    hypre_CSRMatrix *C_diag = NULL;
557 
558    HYPRE_BigInt    first_col_diag_C;
559    HYPRE_BigInt    last_col_diag_C;
560 
561    hypre_CSRMatrix *C_offd = NULL;
562 
563    HYPRE_Int        num_cols_offd_C = 0;
564 
565    HYPRE_Int        j_indx;
566 
567    HYPRE_BigInt     n_rows_R, n_cols_R;
568    HYPRE_Int        num_procs, my_id;
569    HYPRE_BigInt     n_rows_A, n_cols_A;
570    HYPRE_BigInt     n_rows_P, n_cols_P;
571    HYPRE_Int        cnt, i;
572 
573    n_rows_R = hypre_ParCSRMatrixGlobalNumRows(R);
574    n_cols_R = hypre_ParCSRMatrixGlobalNumCols(R);
575    n_rows_A = hypre_ParCSRMatrixGlobalNumRows(A);
576    n_cols_A = hypre_ParCSRMatrixGlobalNumCols(A);
577    n_rows_P = hypre_ParCSRMatrixGlobalNumRows(P);
578    n_cols_P = hypre_ParCSRMatrixGlobalNumCols(P);
579 
580    hypre_MPI_Comm_size(comm,&num_procs);
581    hypre_MPI_Comm_rank(comm, &my_id);
582 
583    if ( n_rows_R != n_rows_A || num_rows_diag_R != num_rows_diag_A ||
584         n_cols_A != n_rows_P || num_cols_diag_A != num_rows_diag_P )
585    {
586       hypre_error_w_msg(HYPRE_ERROR_GENERIC," Error! Incompatible matrix dimensions!\n");
587       return NULL;
588    }
589 
590 
591    /*hypre_CSRMatrixTranspose(R_diag, &RT_diag, 1);*/
592 
593    if (num_procs > 1)
594    {
595       HYPRE_BigInt     last_col_diag_P;
596       hypre_CSRMatrix *RT_offd = NULL;
597       hypre_CSRMatrix *C_tmp_diag = NULL;
598       hypre_CSRMatrix *C_tmp_offd = NULL;
599       hypre_CSRMatrix *C_int = NULL;
600       hypre_CSRMatrix *C_ext = NULL;
601       hypre_CSRMatrix *C_ext_diag = NULL;
602       hypre_CSRMatrix *C_ext_offd = NULL;
603       hypre_CSRMatrix *C_int_diag = NULL;
604       hypre_CSRMatrix *C_int_offd = NULL;
605 
606       HYPRE_Int   *C_tmp_offd_i;
607       HYPRE_Int   *C_tmp_offd_j;
608 
609       HYPRE_Int   *send_map_elmts_R;
610       void        *request;
611       /*---------------------------------------------------------------------
612        * If there exists no CommPkg for A, a CommPkg is generated using
613        * equally load balanced partitionings within
614        * hypre_ParCSRMatrixExtractBExt
615        *--------------------------------------------------------------------*/
616       Ps_ext = hypre_ParCSRMatrixExtractBExt(P, A, 1); /* contains communication
617                                                           which should be explicitly included to allow for overlap */
618       if (num_cols_offd_A)
619       {
620          last_col_diag_P = first_col_diag_P + num_cols_diag_P -1;
621          hypre_CSRMatrixSplit(Ps_ext, first_col_diag_P, last_col_diag_P, num_cols_offd_P, col_map_offd_P,
622                               &num_cols_offd_Q, &col_map_offd_Q, &Pext_diag, &Pext_offd);
623          /* These require data from other processes */
624          APext_diag = hypre_CSRMatrixMultiplyHost(A_offd, Pext_diag);
625          APext_offd = hypre_CSRMatrixMultiplyHost(A_offd, Pext_offd);
626 
627          hypre_CSRMatrixDestroy(Pext_diag);
628          hypre_CSRMatrixDestroy(Pext_offd);
629       }
630       else
631       {
632          num_cols_offd_Q = num_cols_offd_P;
633          col_map_offd_Q = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_Q, HYPRE_MEMORY_HOST);
634          for (i=0; i < num_cols_offd_P; i++)
635          {
636             col_map_offd_Q[i] = col_map_offd_P[i];
637          }
638       }
639       hypre_CSRMatrixDestroy(Ps_ext);
640       /* These are local and could be overlapped with communication */
641       AP_diag = hypre_CSRMatrixMultiplyHost(A_diag, P_diag);
642 
643       if (num_cols_offd_P)
644       {
645          HYPRE_Int i;
646          AP_offd = hypre_CSRMatrixMultiplyHost(A_diag, P_offd);
647          if (num_cols_offd_Q > num_cols_offd_P)
648          {
649             map_P_to_Q = hypre_CTAlloc(HYPRE_Int,num_cols_offd_P, HYPRE_MEMORY_HOST);
650 
651             cnt = 0;
652             for (i=0; i < num_cols_offd_Q; i++)
653             {
654                if (col_map_offd_Q[i] == col_map_offd_P[cnt])
655                {
656                   map_P_to_Q[cnt++] = i;
657                   if (cnt == num_cols_offd_P)
658                   {
659                      break;
660                   }
661                }
662             }
663             AP_offd_num_nonzeros = hypre_CSRMatrixNumNonzeros(AP_offd);
664             AP_offd_j = hypre_CSRMatrixJ(AP_offd);
665             for (i=0; i < AP_offd_num_nonzeros; i++)
666             {
667                AP_offd_j[i] = map_P_to_Q[AP_offd_j[i]];
668             }
669 
670             hypre_TFree(map_P_to_Q, HYPRE_MEMORY_HOST);
671             hypre_CSRMatrixNumCols(AP_offd) = num_cols_offd_Q;
672          }
673       }
674 
675       if (num_cols_offd_A) /* number of rows for Pext_diag */
676       {
677          Q_diag = hypre_CSRMatrixAdd(1.0, AP_diag, 1.0, APext_diag);
678          hypre_CSRMatrixDestroy(AP_diag);
679          hypre_CSRMatrixDestroy(APext_diag);
680       }
681       else
682       {
683          Q_diag = AP_diag;
684       }
685 
686       if (num_cols_offd_P && num_cols_offd_A)
687       {
688          Q_offd = hypre_CSRMatrixAdd(1.0, AP_offd, 1.0, APext_offd);
689          hypre_CSRMatrixDestroy(APext_offd);
690          hypre_CSRMatrixDestroy(AP_offd);
691       }
692       else if (num_cols_offd_A)
693       {
694          Q_offd = APext_offd;
695       }
696       else if (num_cols_offd_P)
697       {
698          Q_offd = AP_offd;
699       }
700       else
701       {
702          Q_offd = hypre_CSRMatrixClone(A_offd, 1);
703       }
704 
705       Q = hypre_ParCSRMatrixCreate(comm, n_rows_A, n_cols_P, row_starts_A,
706                                    col_starts_P, num_cols_offd_Q,
707                                    Q_diag->num_nonzeros, Q_offd->num_nonzeros);
708 
709       hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(Q));
710       hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(Q));
711       hypre_ParCSRMatrixDiag(Q) = Q_diag;
712       hypre_ParCSRMatrixOffd(Q) = Q_offd;
713       hypre_ParCSRMatrixColMapOffd(Q) = col_map_offd_Q;
714 
715       hypre_CSRMatrixTranspose(R_diag, &RT_diag, 1);
716       C_tmp_diag = hypre_CSRMatrixMultiplyHost(RT_diag, Q_diag);
717       if (num_cols_offd_Q)
718       {
719          C_tmp_offd = hypre_CSRMatrixMultiplyHost(RT_diag, Q_offd);
720       }
721       else
722       {
723          C_tmp_offd = hypre_CSRMatrixClone(Q_offd, 1);
724          hypre_CSRMatrixNumRows(C_tmp_offd) = num_cols_diag_R;
725       }
726 
727       if (keep_transpose)
728       {
729          R->diagT = RT_diag;
730       }
731       else
732       {
733          hypre_CSRMatrixDestroy(RT_diag);
734       }
735 
736       if (num_cols_offd_R)
737       {
738          hypre_CSRMatrixTranspose(R_offd, &RT_offd, 1);
739          C_int_diag = hypre_CSRMatrixMultiplyHost(RT_offd, Q_diag);
740          C_int_offd = hypre_CSRMatrixMultiplyHost(RT_offd, Q_offd);
741 
742          hypre_ParCSRMatrixDiag(Q) = C_int_diag;
743          hypre_ParCSRMatrixOffd(Q) = C_int_offd;
744          C_int = hypre_MergeDiagAndOffd(Q);
745          hypre_ParCSRMatrixDiag(Q) = Q_diag;
746          hypre_ParCSRMatrixOffd(Q) = Q_offd;
747       }
748       else
749       {
750          C_int = hypre_CSRMatrixCreate(0,0,0);
751          hypre_CSRMatrixInitialize(C_int);
752       }
753 
754       /* contains communication; should be explicitly included to allow for overlap */
755       hypre_ExchangeExternalRowsInit(C_int, comm_pkg_R, &request);
756       C_ext = hypre_ExchangeExternalRowsWait(request);
757 
758       hypre_CSRMatrixDestroy(C_int);
759       if (num_cols_offd_R)
760       {
761          hypre_CSRMatrixDestroy(C_int_diag);
762          hypre_CSRMatrixDestroy(C_int_offd);
763          if (keep_transpose)
764          {
765             R->offdT = RT_offd;
766          }
767          else
768          {
769             hypre_CSRMatrixDestroy(RT_offd);
770          }
771       }
772 
773       /*-----------------------------------------------------------------------
774        *  Add contents of C_ext to C_tmp_diag and C_tmp_offd
775        *  to obtain C_diag and C_offd
776        *-----------------------------------------------------------------------*/
777 
778       /* split C_ext in local C_ext_diag and nonlocal part C_ext_offd,
779          also generate new col_map_offd and adjust column indices accordingly */
780 
781       if (C_ext)
782       {
783          first_col_diag_C = first_col_diag_P;
784          last_col_diag_C = first_col_diag_P + num_cols_diag_P - 1;
785 
786          hypre_CSRMatrixSplit(C_ext, first_col_diag_C, last_col_diag_C,
787                               num_cols_offd_Q, col_map_offd_Q, &num_cols_offd_C, &col_map_offd_C,
788                               &C_ext_diag, &C_ext_offd);
789 
790          hypre_CSRMatrixDestroy(C_ext);
791          C_ext = NULL;
792          /*if (C_ext_offd->num_nonzeros == 0) C_ext_offd->num_cols = 0;*/
793       }
794 
795       if (num_cols_offd_Q && C_tmp_offd->num_cols)
796       {
797          C_tmp_offd_i = hypre_CSRMatrixI(C_tmp_offd);
798          C_tmp_offd_j = hypre_CSRMatrixJ(C_tmp_offd);
799 
800          map_Q_to_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_Q, HYPRE_MEMORY_HOST);
801 
802          cnt = 0;
803          for (i=0; i < num_cols_offd_C; i++)
804          {
805             if (col_map_offd_C[i] == col_map_offd_Q[cnt])
806             {
807                map_Q_to_C[cnt++] = i;
808                if (cnt == num_cols_offd_Q)
809                {
810                   break;
811                }
812             }
813          }
814          for (i=0; i < C_tmp_offd_i[hypre_CSRMatrixNumRows(C_tmp_offd)]; i++)
815          {
816             j_indx = C_tmp_offd_j[i];
817             C_tmp_offd_j[i] = map_Q_to_C[j_indx];
818          }
819          hypre_TFree(map_Q_to_C, HYPRE_MEMORY_HOST);
820       }
821       hypre_CSRMatrixNumCols(C_tmp_offd) = num_cols_offd_C;
822       hypre_ParCSRMatrixDestroy(Q);
823 
824       /*-----------------------------------------------------------------------
825        *  Need to compute C_diag = C_tmp_diag + C_ext_diag
826        *  and  C_offd = C_tmp_offd + C_ext_offd   !!!!
827        *-----------------------------------------------------------------------*/
828       send_map_elmts_R = hypre_ParCSRCommPkgSendMapElmts(comm_pkg_R);
829       if (C_ext_diag)
830       {
831          C_diag = hypre_CSRMatrixAddPartial(C_tmp_diag, C_ext_diag, send_map_elmts_R);
832          hypre_CSRMatrixDestroy(C_tmp_diag);
833          hypre_CSRMatrixDestroy(C_ext_diag);
834       }
835       else
836       {
837          C_diag = C_tmp_diag;
838       }
839       if (C_ext_offd)
840       {
841          C_offd = hypre_CSRMatrixAddPartial(C_tmp_offd, C_ext_offd, send_map_elmts_R);
842          hypre_CSRMatrixDestroy(C_tmp_offd);
843          hypre_CSRMatrixDestroy(C_ext_offd);
844       }
845       else
846       {
847          C_offd = C_tmp_offd;
848       }
849    }
850    else
851    {
852       Q_diag = hypre_CSRMatrixMultiplyHost(A_diag, P_diag);
853       hypre_CSRMatrixTranspose(R_diag, &RT_diag, 1);
854       C_diag = hypre_CSRMatrixMultiplyHost(RT_diag, Q_diag);
855       C_offd = hypre_CSRMatrixCreate(num_cols_diag_R, 0, 0);
856       hypre_CSRMatrixInitialize_v2(C_offd, 0, hypre_CSRMatrixMemoryLocation(C_diag));
857       if (keep_transpose)
858       {
859          R->diagT = RT_diag;
860       }
861       else
862       {
863          hypre_CSRMatrixDestroy(RT_diag);
864       }
865       hypre_CSRMatrixDestroy(Q_diag);
866    }
867 
868    C = hypre_ParCSRMatrixCreate(comm, n_cols_R, n_cols_P, col_starts_R,
869                                 col_starts_P, num_cols_offd_C, 0, 0);
870 
871    hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(C));
872    hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(C));
873    hypre_ParCSRMatrixDiag(C) = C_diag;
874 
875    if (C_offd)
876    {
877       hypre_ParCSRMatrixOffd(C) = C_offd;
878    }
879    else
880    {
881       C_offd = hypre_CSRMatrixCreate(num_cols_diag_R, 0, 0);
882       hypre_CSRMatrixInitialize(C_offd);
883       hypre_ParCSRMatrixOffd(C) = C_offd;
884    }
885 
886    hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;
887 
888    if (num_procs > 1)
889    {
890       /* hypre_GenerateRAPCommPkg(RAP, A); */
891       hypre_MatvecCommPkgCreate(C);
892    }
893 
894    return C;
895 }
896 
897 hypre_ParCSRMatrix*
hypre_ParCSRMatrixRAPKT(hypre_ParCSRMatrix * R,hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * P,HYPRE_Int keep_transpose)898 hypre_ParCSRMatrixRAPKT( hypre_ParCSRMatrix  *R,
899                          hypre_ParCSRMatrix  *A,
900                          hypre_ParCSRMatrix  *P,
901                          HYPRE_Int            keep_transpose)
902 {
903 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
904    hypre_GpuProfilingPushRange("TripleMat-RAP");
905 #endif
906 
907    hypre_ParCSRMatrix *C = NULL;
908 
909 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
910    HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy2( hypre_ParCSRMatrixMemoryLocation(R),
911                                                       hypre_ParCSRMatrixMemoryLocation(A) );
912 
913    if (exec == HYPRE_EXEC_DEVICE)
914    {
915       C = hypre_ParCSRMatrixRAPKTDevice(R, A, P, keep_transpose);
916    }
917    else
918 #endif
919    {
920       C = hypre_ParCSRMatrixRAPKTHost(R, A, P, keep_transpose);
921    }
922 
923 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
924    hypre_GpuProfilingPopRange();
925 #endif
926 
927    return C;
928 }
929 
930 hypre_ParCSRMatrix*
hypre_ParCSRMatrixRAP(hypre_ParCSRMatrix * R,hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * P)931 hypre_ParCSRMatrixRAP( hypre_ParCSRMatrix *R,
932                        hypre_ParCSRMatrix *A,
933                        hypre_ParCSRMatrix *P )
934 {
935    return hypre_ParCSRMatrixRAPKT(R, A, P, 0);
936 }
937 
938 /*--------------------------------------------------------------------------
939  * OLD NOTES:
940  * Sketch of John's code to build RAP
941  *
942  * Uses two integer arrays icg and ifg as marker arrays
943  *
944  *  icg needs to be of size n_fine; size of ia.
945  *     A negative value of icg(i) indicates i is a f-point, otherwise
946  *     icg(i) is the converts from fine to coarse grid orderings.
947  *     Note that I belive the code assumes that if i<j and both are
948  *     c-points, then icg(i) < icg(j).
949  *  ifg needs to be of size n_coarse; size of irap
950  *     I don't think it has meaning as either input or output.
951  *
952  * In the code, both the interpolation and restriction operator
953  * are stored row-wise in the array b. If i is a f-point,
954  * ib(i) points the row of the interpolation operator for point
955  * i. If i is a c-point, ib(i) points the row of the restriction
956  * operator for point i.
957  *
958  * In the CSR storage for rap, its guaranteed that the rows will
959  * be ordered ( i.e. ic<jc -> irap(ic) < irap(jc)) but I don't
960  * think there is a guarantee that the entries within a row will
961  * be ordered in any way except that the diagonal entry comes first.
962  *
963  * As structured now, the code requires that the size of rap be
964  * predicted up front. To avoid this, one could execute the code
965  * twice, the first time would only keep track of icg ,ifg and ka.
966  * Then you would know how much memory to allocate for rap and jrap.
967  * The second time would fill in these arrays. Actually you might
968  * be able to include the filling in of jrap into the first pass;
969  * just overestimate its size (its an integer array) and cut it
970  * back before the second time through. This would avoid some if tests
971  * in the second pass.
972  *
973  * Questions
974  *            1) parallel (PetSc) version?
975  *            2) what if we don't store R row-wise and don't
976  *               even want to store a copy of it in this form
977  *               temporarily?
978  *--------------------------------------------------------------------------*/
979