1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 #include "_hypre_parcsr_mv.h"
9
10 #include "_hypre_utilities.h"
11 #include "../parcsr_mv/_hypre_parcsr_mv.h"
12
13 /*--------------------------------------------------------------------------
14 * hypre_ParCSRMatMat : multiplies two ParCSRMatrices A and B and returns
15 * the product in ParCSRMatrix C
16 * Note that C does not own the partitionings since its row_starts
17 * is owned by A and col_starts by B.
18 *--------------------------------------------------------------------------*/
19
20 hypre_ParCSRMatrix*
hypre_ParCSRMatMatHost(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * B)21 hypre_ParCSRMatMatHost( hypre_ParCSRMatrix *A,
22 hypre_ParCSRMatrix *B )
23 {
24 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
25
26 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
27
28 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
29
30 HYPRE_BigInt *row_starts_A = hypre_ParCSRMatrixRowStarts(A);
31 HYPRE_Int num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag);
32 HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
33
34 hypre_CSRMatrix *B_diag = hypre_ParCSRMatrixDiag(B);
35
36 hypre_CSRMatrix *B_offd = hypre_ParCSRMatrixOffd(B);
37 HYPRE_BigInt *col_map_offd_B = hypre_ParCSRMatrixColMapOffd(B);
38
39 HYPRE_BigInt first_col_diag_B = hypre_ParCSRMatrixFirstColDiag(B);
40 HYPRE_BigInt last_col_diag_B;
41 HYPRE_BigInt *col_starts_B = hypre_ParCSRMatrixColStarts(B);
42 HYPRE_Int num_rows_diag_B = hypre_CSRMatrixNumRows(B_diag);
43 HYPRE_Int num_cols_diag_B = hypre_CSRMatrixNumCols(B_diag);
44 HYPRE_Int num_cols_offd_B = hypre_CSRMatrixNumCols(B_offd);
45
46 hypre_ParCSRMatrix *C;
47 HYPRE_BigInt *col_map_offd_C = NULL;
48 HYPRE_Int *map_B_to_C=NULL;
49
50 hypre_CSRMatrix *C_diag = NULL;
51
52 hypre_CSRMatrix *C_offd = NULL;
53
54 HYPRE_Int num_cols_offd_C = 0;
55
56 hypre_CSRMatrix *Bs_ext;
57
58 hypre_CSRMatrix *Bext_diag;
59
60 hypre_CSRMatrix *Bext_offd;
61
62 hypre_CSRMatrix *AB_diag;
63 hypre_CSRMatrix *AB_offd;
64 HYPRE_Int AB_offd_num_nonzeros;
65 HYPRE_Int *AB_offd_j;
66 hypre_CSRMatrix *ABext_diag;
67 hypre_CSRMatrix *ABext_offd;
68
69 HYPRE_BigInt n_rows_A, n_cols_A;
70 HYPRE_BigInt n_rows_B, n_cols_B;
71 HYPRE_Int cnt, i;
72 HYPRE_Int num_procs;
73 HYPRE_Int my_id;
74
75 n_rows_A = hypre_ParCSRMatrixGlobalNumRows(A);
76 n_cols_A = hypre_ParCSRMatrixGlobalNumCols(A);
77 n_rows_B = hypre_ParCSRMatrixGlobalNumRows(B);
78 n_cols_B = hypre_ParCSRMatrixGlobalNumCols(B);
79
80 if (n_cols_A != n_rows_B || num_cols_diag_A != num_rows_diag_B)
81 {
82 hypre_error_in_arg(1);
83 hypre_printf(" Error! Incompatible matrix dimensions!\n");
84 return NULL;
85 }
86
87 /*-----------------------------------------------------------------------
88 * Extract B_ext, i.e. portion of B that is stored on neighbor procs
89 * and needed locally for matrix matrix product
90 *-----------------------------------------------------------------------*/
91
92 hypre_MPI_Comm_size(comm, &num_procs);
93 hypre_MPI_Comm_rank(comm, &my_id);
94 last_col_diag_B = first_col_diag_B + num_cols_diag_B -1;
95
96 if (num_procs > 1)
97 {
98 /*---------------------------------------------------------------------
99 * If there exists no CommPkg for A, a CommPkg is generated using
100 * equally load balanced partitionings within
101 * hypre_ParCSRMatrixExtractBExt
102 *--------------------------------------------------------------------*/
103 Bs_ext = hypre_ParCSRMatrixExtractBExt(B, A, 1); /* contains communication
104 which should be explicitly included to allow for overlap */
105
106
107 hypre_CSRMatrixSplit(Bs_ext, first_col_diag_B, last_col_diag_B, num_cols_offd_B, col_map_offd_B,
108 &num_cols_offd_C, &col_map_offd_C, &Bext_diag, &Bext_offd);
109
110 hypre_CSRMatrixDestroy(Bs_ext);
111
112 /* These are local and could be overlapped with communication */
113 AB_diag = hypre_CSRMatrixMultiplyHost(A_diag, B_diag);
114 AB_offd = hypre_CSRMatrixMultiplyHost(A_diag, B_offd);
115
116 /* These require data from other processes */
117 ABext_diag = hypre_CSRMatrixMultiplyHost(A_offd, Bext_diag);
118 ABext_offd = hypre_CSRMatrixMultiplyHost(A_offd, Bext_offd);
119
120 hypre_CSRMatrixDestroy(Bext_diag);
121 hypre_CSRMatrixDestroy(Bext_offd);
122
123 if (num_cols_offd_B)
124 {
125 HYPRE_Int i;
126 map_B_to_C = hypre_CTAlloc(HYPRE_Int, num_cols_offd_B, HYPRE_MEMORY_HOST);
127
128 cnt = 0;
129 for (i = 0; i < num_cols_offd_C; i++)
130 {
131 if (col_map_offd_C[i] == col_map_offd_B[cnt])
132 {
133 map_B_to_C[cnt++] = i;
134 if (cnt == num_cols_offd_B)
135 {
136 break;
137 }
138 }
139 }
140 }
141 AB_offd_num_nonzeros = hypre_CSRMatrixNumNonzeros(AB_offd);
142 AB_offd_j = hypre_CSRMatrixJ(AB_offd);
143 for (i=0; i < AB_offd_num_nonzeros; i++)
144 {
145 AB_offd_j[i] = map_B_to_C[AB_offd_j[i]];
146 }
147
148 if (num_cols_offd_B)
149 {
150 hypre_TFree(map_B_to_C, HYPRE_MEMORY_HOST);
151 }
152
153 hypre_CSRMatrixNumCols(AB_diag) = num_cols_diag_B;
154 hypre_CSRMatrixNumCols(ABext_diag) = num_cols_diag_B;
155 hypre_CSRMatrixNumCols(AB_offd) = num_cols_offd_C;
156 hypre_CSRMatrixNumCols(ABext_offd) = num_cols_offd_C;
157 C_diag = hypre_CSRMatrixAdd(1.0, AB_diag, 1.0, ABext_diag);
158 C_offd = hypre_CSRMatrixAdd(1.0, AB_offd, 1.0, ABext_offd);
159
160 hypre_CSRMatrixDestroy(AB_diag);
161 hypre_CSRMatrixDestroy(ABext_diag);
162 hypre_CSRMatrixDestroy(AB_offd);
163 hypre_CSRMatrixDestroy(ABext_offd);
164 }
165 else
166 {
167 C_diag = hypre_CSRMatrixMultiplyHost(A_diag, B_diag);
168 C_offd = hypre_CSRMatrixCreate(num_rows_diag_A, 0, 0);
169 hypre_CSRMatrixInitialize_v2(C_offd, 0, hypre_CSRMatrixMemoryLocation(C_diag));
170 }
171
172 /*-----------------------------------------------------------------------
173 * Allocate C_diag_data and C_diag_j arrays.
174 * Allocate C_offd_data and C_offd_j arrays.
175 *-----------------------------------------------------------------------*/
176
177 C = hypre_ParCSRMatrixCreate(comm, n_rows_A, n_cols_B, row_starts_A,
178 col_starts_B, num_cols_offd_C,
179 C_diag->num_nonzeros, C_offd->num_nonzeros);
180
181 hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(C));
182 hypre_ParCSRMatrixDiag(C) = C_diag;
183
184 hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(C));
185 hypre_ParCSRMatrixOffd(C) = C_offd;
186
187 if (num_cols_offd_C)
188 {
189 hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;
190 }
191
192 return C;
193 }
194
195 hypre_ParCSRMatrix*
hypre_ParCSRMatMat(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * B)196 hypre_ParCSRMatMat( hypre_ParCSRMatrix *A,
197 hypre_ParCSRMatrix *B )
198 {
199 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
200 hypre_GpuProfilingPushRange("Mat-Mat");
201 #endif
202
203 hypre_ParCSRMatrix *C = NULL;
204
205 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
206 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy2( hypre_ParCSRMatrixMemoryLocation(A),
207 hypre_ParCSRMatrixMemoryLocation(B) );
208
209 if (exec == HYPRE_EXEC_DEVICE)
210 {
211 C = hypre_ParCSRMatMatDevice(A,B);
212 }
213 else
214 #endif
215 {
216 C = hypre_ParCSRMatMatHost(A,B);
217 }
218
219 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
220 hypre_GpuProfilingPopRange();
221 #endif
222
223 return C;
224 }
225
226 /*--------------------------------------------------------------------------
227 * hypre_ParCSRTMatMatKT : multiplies two ParCSRMatrices transpose(A) and B and returns
228 * the product in ParCSRMatrix C
229 * Note that C does not own the partitionings since its row_starts
230 * is owned by A and col_starts by B.
231 *--------------------------------------------------------------------------*/
232
233 hypre_ParCSRMatrix*
hypre_ParCSRTMatMatKTHost(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * B,HYPRE_Int keep_transpose)234 hypre_ParCSRTMatMatKTHost( hypre_ParCSRMatrix *A,
235 hypre_ParCSRMatrix *B,
236 HYPRE_Int keep_transpose)
237 {
238 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
239 hypre_ParCSRCommPkg *comm_pkg_A = NULL;
240
241 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
242 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
243 hypre_CSRMatrix *B_diag = hypre_ParCSRMatrixDiag(B);
244 hypre_CSRMatrix *B_offd = hypre_ParCSRMatrixOffd(B);
245 hypre_CSRMatrix *AT_diag = NULL;
246
247 HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
248 HYPRE_Int num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag);
249 HYPRE_Int num_rows_diag_B = hypre_CSRMatrixNumRows(B_diag);
250 HYPRE_Int num_cols_diag_B = hypre_CSRMatrixNumCols(B_diag);
251 HYPRE_Int num_cols_offd_B = hypre_CSRMatrixNumCols(B_offd);
252 HYPRE_BigInt first_col_diag_B = hypre_ParCSRMatrixFirstColDiag(B);
253
254 HYPRE_BigInt *col_map_offd_B = hypre_ParCSRMatrixColMapOffd(B);
255
256 HYPRE_BigInt *col_starts_A = hypre_ParCSRMatrixColStarts(A);
257 HYPRE_BigInt *col_starts_B = hypre_ParCSRMatrixColStarts(B);
258
259 hypre_ParCSRMatrix *C;
260 hypre_CSRMatrix *C_diag = NULL;
261 hypre_CSRMatrix *C_offd = NULL;
262
263 HYPRE_BigInt *col_map_offd_C = NULL;
264 HYPRE_Int *map_B_to_C;
265 HYPRE_BigInt first_col_diag_C;
266 HYPRE_BigInt last_col_diag_C;
267 HYPRE_Int num_cols_offd_C = 0;
268
269 HYPRE_BigInt n_rows_A, n_cols_A;
270 HYPRE_BigInt n_rows_B, n_cols_B;
271 HYPRE_Int j_indx, cnt;
272 HYPRE_Int num_procs, my_id;
273
274 n_rows_A = hypre_ParCSRMatrixGlobalNumRows(A);
275 n_cols_A = hypre_ParCSRMatrixGlobalNumCols(A);
276 n_rows_B = hypre_ParCSRMatrixGlobalNumRows(B);
277 n_cols_B = hypre_ParCSRMatrixGlobalNumCols(B);
278
279 hypre_MPI_Comm_size(comm,&num_procs);
280 hypre_MPI_Comm_rank(comm, &my_id);
281
282 if (n_rows_A != n_rows_B || num_rows_diag_A != num_rows_diag_B)
283 {
284 hypre_error_w_msg(HYPRE_ERROR_GENERIC," Error! Incompatible matrix dimensions!\n");
285 return NULL;
286 }
287
288 /*if (num_cols_diag_A == num_cols_diag_B) allsquare = 1;*/
289
290 hypre_CSRMatrixTranspose(A_diag, &AT_diag, 1);
291
292 if (num_procs == 1)
293 {
294 C_diag = hypre_CSRMatrixMultiplyHost(AT_diag, B_diag);
295 C_offd = hypre_CSRMatrixCreate(num_cols_diag_A, 0, 0);
296 hypre_CSRMatrixInitialize_v2(C_offd, 0, hypre_CSRMatrixMemoryLocation(C_diag));
297 hypre_CSRMatrixNumRownnz(C_offd) = 0;
298 if (keep_transpose)
299 {
300 A->diagT = AT_diag;
301 }
302 else
303 {
304 hypre_CSRMatrixDestroy(AT_diag);
305 }
306 }
307 else
308 {
309 hypre_CSRMatrix *AT_offd = NULL;
310 hypre_CSRMatrix *C_tmp_diag = NULL;
311 hypre_CSRMatrix *C_tmp_offd = NULL;
312 hypre_CSRMatrix *C_int = NULL;
313 hypre_CSRMatrix *C_ext = NULL;
314 hypre_CSRMatrix *C_ext_diag = NULL;
315 hypre_CSRMatrix *C_ext_offd = NULL;
316 hypre_CSRMatrix *C_int_diag = NULL;
317 hypre_CSRMatrix *C_int_offd = NULL;
318
319 HYPRE_Int i;
320 HYPRE_Int *C_tmp_offd_i;
321 HYPRE_Int *C_tmp_offd_j;
322 HYPRE_Int *send_map_elmts_A;
323 void *request;
324
325 hypre_CSRMatrixTranspose(A_offd, &AT_offd, 1);
326
327 C_int_diag = hypre_CSRMatrixMultiplyHost(AT_offd, B_diag);
328 C_int_offd = hypre_CSRMatrixMultiplyHost(AT_offd, B_offd);
329
330 hypre_ParCSRMatrixDiag(B) = C_int_diag;
331 hypre_ParCSRMatrixOffd(B) = C_int_offd;
332
333 C_int = hypre_MergeDiagAndOffd(B);
334
335 hypre_ParCSRMatrixDiag(B) = B_diag;
336 hypre_ParCSRMatrixOffd(B) = B_offd;
337
338 if (!hypre_ParCSRMatrixCommPkg(A))
339 {
340 hypre_MatvecCommPkgCreate(A);
341 }
342 comm_pkg_A = hypre_ParCSRMatrixCommPkg(A);
343
344 /* contains communication; should be explicitly included to allow for overlap */
345 hypre_ExchangeExternalRowsInit(C_int, comm_pkg_A, &request);
346 C_ext = hypre_ExchangeExternalRowsWait(request);
347
348 hypre_CSRMatrixDestroy(C_int);
349 hypre_CSRMatrixDestroy(C_int_diag);
350 hypre_CSRMatrixDestroy(C_int_offd);
351
352 C_tmp_diag = hypre_CSRMatrixMultiplyHost(AT_diag, B_diag);
353 C_tmp_offd = hypre_CSRMatrixMultiplyHost(AT_diag, B_offd);
354
355 if (keep_transpose)
356 {
357 A->diagT = AT_diag;
358 }
359 else
360 {
361 hypre_CSRMatrixDestroy(AT_diag);
362 }
363
364 if (keep_transpose)
365 {
366 A->offdT = AT_offd;
367 }
368 else
369 {
370 hypre_CSRMatrixDestroy(AT_offd);
371 }
372
373 /*-----------------------------------------------------------------------
374 * Add contents of C_ext to C_tmp_diag and C_tmp_offd
375 * to obtain C_diag and C_offd
376 *-----------------------------------------------------------------------*/
377
378 /* split C_ext in local C_ext_diag and nonlocal part C_ext_offd,
379 also generate new col_map_offd and adjust column indices accordingly */
380 first_col_diag_C = first_col_diag_B;
381 last_col_diag_C = first_col_diag_B + num_cols_diag_B - 1;
382
383 if (C_ext)
384 {
385 hypre_CSRMatrixSplit(C_ext, first_col_diag_C, last_col_diag_C,
386 num_cols_offd_B, col_map_offd_B, &num_cols_offd_C, &col_map_offd_C,
387 &C_ext_diag, &C_ext_offd);
388
389 hypre_CSRMatrixDestroy(C_ext);
390 C_ext = NULL;
391 }
392
393 C_tmp_offd_i = hypre_CSRMatrixI(C_tmp_offd);
394 C_tmp_offd_j = hypre_CSRMatrixJ(C_tmp_offd);
395
396 if (num_cols_offd_B)
397 {
398 map_B_to_C = hypre_CTAlloc(HYPRE_Int, num_cols_offd_B, HYPRE_MEMORY_HOST);
399
400 cnt = 0;
401 for (i=0; i < num_cols_offd_C; i++)
402 {
403 if (col_map_offd_C[i] == col_map_offd_B[cnt])
404 {
405 map_B_to_C[cnt++] = i;
406 if (cnt == num_cols_offd_B)
407 {
408 break;
409 }
410 }
411 }
412 for (i=0; i < C_tmp_offd_i[hypre_CSRMatrixNumRows(C_tmp_offd)]; i++)
413 {
414 j_indx = C_tmp_offd_j[i];
415 C_tmp_offd_j[i] = map_B_to_C[j_indx];
416 }
417 hypre_TFree(map_B_to_C, HYPRE_MEMORY_HOST);
418 }
419
420 /*-----------------------------------------------------------------------
421 * Need to compute C_diag = C_tmp_diag + C_ext_diag
422 * and C_offd = C_tmp_offd + C_ext_offd !!!!
423 *-----------------------------------------------------------------------*/
424 send_map_elmts_A = hypre_ParCSRCommPkgSendMapElmts(comm_pkg_A);
425 C_diag = hypre_CSRMatrixAddPartial(C_tmp_diag, C_ext_diag, send_map_elmts_A);
426 hypre_CSRMatrixNumCols(C_tmp_offd) = num_cols_offd_C;
427 C_offd = hypre_CSRMatrixAddPartial(C_tmp_offd, C_ext_offd, send_map_elmts_A);
428
429 hypre_CSRMatrixDestroy(C_tmp_diag);
430 hypre_CSRMatrixDestroy(C_tmp_offd);
431 hypre_CSRMatrixDestroy(C_ext_diag);
432 hypre_CSRMatrixDestroy(C_ext_offd);
433 }
434
435 C = hypre_ParCSRMatrixCreate(comm, n_cols_A, n_cols_B, col_starts_A, col_starts_B,
436 num_cols_offd_C, C_diag->num_nonzeros, C_offd->num_nonzeros);
437
438 hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(C));
439 hypre_ParCSRMatrixDiag(C) = C_diag;
440
441 hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(C));
442 hypre_ParCSRMatrixOffd(C) = C_offd;
443
444 hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;
445
446 return C;
447 }
448
449 hypre_ParCSRMatrix*
hypre_ParCSRTMatMatKT(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * B,HYPRE_Int keep_transpose)450 hypre_ParCSRTMatMatKT( hypre_ParCSRMatrix *A,
451 hypre_ParCSRMatrix *B,
452 HYPRE_Int keep_transpose)
453 {
454 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
455 hypre_GpuProfilingPushRange("Mat-T-Mat");
456 #endif
457
458 hypre_ParCSRMatrix *C = NULL;
459
460 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
461 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy2( hypre_ParCSRMatrixMemoryLocation(A),
462 hypre_ParCSRMatrixMemoryLocation(B) );
463
464 if (exec == HYPRE_EXEC_DEVICE)
465 {
466 C = hypre_ParCSRTMatMatKTDevice(A, B, keep_transpose);
467 }
468 else
469 #endif
470 {
471 C = hypre_ParCSRTMatMatKTHost(A, B, keep_transpose);
472 }
473
474 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
475 hypre_GpuProfilingPopRange();
476 #endif
477
478 return C;
479 }
480
481 hypre_ParCSRMatrix*
hypre_ParCSRTMatMat(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * B)482 hypre_ParCSRTMatMat( hypre_ParCSRMatrix *A,
483 hypre_ParCSRMatrix *B)
484 {
485 return hypre_ParCSRTMatMatKT( A, B, 0);
486 }
487
488 hypre_ParCSRMatrix*
hypre_ParCSRMatrixRAPKTHost(hypre_ParCSRMatrix * R,hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * P,HYPRE_Int keep_transpose)489 hypre_ParCSRMatrixRAPKTHost( hypre_ParCSRMatrix *R,
490 hypre_ParCSRMatrix *A,
491 hypre_ParCSRMatrix *P,
492 HYPRE_Int keep_transpose )
493 {
494 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
495
496 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
497
498 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
499
500 HYPRE_BigInt *row_starts_A = hypre_ParCSRMatrixRowStarts(A);
501 HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
502 HYPRE_Int num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag);
503 HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
504
505 hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
506
507 hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
508 HYPRE_BigInt *col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P);
509
510 HYPRE_BigInt first_col_diag_P = hypre_ParCSRMatrixFirstColDiag(P);
511 HYPRE_BigInt *col_starts_P = hypre_ParCSRMatrixColStarts(P);
512 HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag);
513 HYPRE_Int num_cols_diag_P = hypre_CSRMatrixNumCols(P_diag);
514 HYPRE_Int num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd);
515
516 hypre_ParCSRMatrix *Q;
517 HYPRE_BigInt *col_map_offd_Q = NULL;
518 HYPRE_Int *map_P_to_Q=NULL;
519
520 hypre_CSRMatrix *Q_diag = NULL;
521
522 hypre_CSRMatrix *Q_offd = NULL;
523
524 HYPRE_Int num_cols_offd_Q = 0;
525
526 hypre_CSRMatrix *Ps_ext;
527
528 hypre_CSRMatrix *Pext_diag;
529
530 hypre_CSRMatrix *Pext_offd;
531
532 hypre_CSRMatrix *AP_diag;
533 hypre_CSRMatrix *AP_offd;
534 HYPRE_Int AP_offd_num_nonzeros;
535 HYPRE_Int *AP_offd_j;
536 hypre_CSRMatrix *APext_diag;
537 hypre_CSRMatrix *APext_offd;
538
539 hypre_ParCSRCommPkg *comm_pkg_R = hypre_ParCSRMatrixCommPkg(R);
540
541 hypre_CSRMatrix *R_diag = hypre_ParCSRMatrixDiag(R);
542 hypre_CSRMatrix *RT_diag = NULL;
543
544 hypre_CSRMatrix *R_offd = hypre_ParCSRMatrixOffd(R);
545
546 HYPRE_Int num_rows_diag_R = hypre_CSRMatrixNumRows(R_diag);
547 HYPRE_Int num_cols_diag_R = hypre_CSRMatrixNumCols(R_diag);
548 HYPRE_Int num_cols_offd_R = hypre_CSRMatrixNumCols(R_offd);
549
550 HYPRE_BigInt *col_starts_R = hypre_ParCSRMatrixColStarts(R);
551
552 hypre_ParCSRMatrix *C;
553 HYPRE_BigInt *col_map_offd_C = NULL;
554 HYPRE_Int *map_Q_to_C;
555
556 hypre_CSRMatrix *C_diag = NULL;
557
558 HYPRE_BigInt first_col_diag_C;
559 HYPRE_BigInt last_col_diag_C;
560
561 hypre_CSRMatrix *C_offd = NULL;
562
563 HYPRE_Int num_cols_offd_C = 0;
564
565 HYPRE_Int j_indx;
566
567 HYPRE_BigInt n_rows_R, n_cols_R;
568 HYPRE_Int num_procs, my_id;
569 HYPRE_BigInt n_rows_A, n_cols_A;
570 HYPRE_BigInt n_rows_P, n_cols_P;
571 HYPRE_Int cnt, i;
572
573 n_rows_R = hypre_ParCSRMatrixGlobalNumRows(R);
574 n_cols_R = hypre_ParCSRMatrixGlobalNumCols(R);
575 n_rows_A = hypre_ParCSRMatrixGlobalNumRows(A);
576 n_cols_A = hypre_ParCSRMatrixGlobalNumCols(A);
577 n_rows_P = hypre_ParCSRMatrixGlobalNumRows(P);
578 n_cols_P = hypre_ParCSRMatrixGlobalNumCols(P);
579
580 hypre_MPI_Comm_size(comm,&num_procs);
581 hypre_MPI_Comm_rank(comm, &my_id);
582
583 if ( n_rows_R != n_rows_A || num_rows_diag_R != num_rows_diag_A ||
584 n_cols_A != n_rows_P || num_cols_diag_A != num_rows_diag_P )
585 {
586 hypre_error_w_msg(HYPRE_ERROR_GENERIC," Error! Incompatible matrix dimensions!\n");
587 return NULL;
588 }
589
590
591 /*hypre_CSRMatrixTranspose(R_diag, &RT_diag, 1);*/
592
593 if (num_procs > 1)
594 {
595 HYPRE_BigInt last_col_diag_P;
596 hypre_CSRMatrix *RT_offd = NULL;
597 hypre_CSRMatrix *C_tmp_diag = NULL;
598 hypre_CSRMatrix *C_tmp_offd = NULL;
599 hypre_CSRMatrix *C_int = NULL;
600 hypre_CSRMatrix *C_ext = NULL;
601 hypre_CSRMatrix *C_ext_diag = NULL;
602 hypre_CSRMatrix *C_ext_offd = NULL;
603 hypre_CSRMatrix *C_int_diag = NULL;
604 hypre_CSRMatrix *C_int_offd = NULL;
605
606 HYPRE_Int *C_tmp_offd_i;
607 HYPRE_Int *C_tmp_offd_j;
608
609 HYPRE_Int *send_map_elmts_R;
610 void *request;
611 /*---------------------------------------------------------------------
612 * If there exists no CommPkg for A, a CommPkg is generated using
613 * equally load balanced partitionings within
614 * hypre_ParCSRMatrixExtractBExt
615 *--------------------------------------------------------------------*/
616 Ps_ext = hypre_ParCSRMatrixExtractBExt(P, A, 1); /* contains communication
617 which should be explicitly included to allow for overlap */
618 if (num_cols_offd_A)
619 {
620 last_col_diag_P = first_col_diag_P + num_cols_diag_P -1;
621 hypre_CSRMatrixSplit(Ps_ext, first_col_diag_P, last_col_diag_P, num_cols_offd_P, col_map_offd_P,
622 &num_cols_offd_Q, &col_map_offd_Q, &Pext_diag, &Pext_offd);
623 /* These require data from other processes */
624 APext_diag = hypre_CSRMatrixMultiplyHost(A_offd, Pext_diag);
625 APext_offd = hypre_CSRMatrixMultiplyHost(A_offd, Pext_offd);
626
627 hypre_CSRMatrixDestroy(Pext_diag);
628 hypre_CSRMatrixDestroy(Pext_offd);
629 }
630 else
631 {
632 num_cols_offd_Q = num_cols_offd_P;
633 col_map_offd_Q = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_Q, HYPRE_MEMORY_HOST);
634 for (i=0; i < num_cols_offd_P; i++)
635 {
636 col_map_offd_Q[i] = col_map_offd_P[i];
637 }
638 }
639 hypre_CSRMatrixDestroy(Ps_ext);
640 /* These are local and could be overlapped with communication */
641 AP_diag = hypre_CSRMatrixMultiplyHost(A_diag, P_diag);
642
643 if (num_cols_offd_P)
644 {
645 HYPRE_Int i;
646 AP_offd = hypre_CSRMatrixMultiplyHost(A_diag, P_offd);
647 if (num_cols_offd_Q > num_cols_offd_P)
648 {
649 map_P_to_Q = hypre_CTAlloc(HYPRE_Int,num_cols_offd_P, HYPRE_MEMORY_HOST);
650
651 cnt = 0;
652 for (i=0; i < num_cols_offd_Q; i++)
653 {
654 if (col_map_offd_Q[i] == col_map_offd_P[cnt])
655 {
656 map_P_to_Q[cnt++] = i;
657 if (cnt == num_cols_offd_P)
658 {
659 break;
660 }
661 }
662 }
663 AP_offd_num_nonzeros = hypre_CSRMatrixNumNonzeros(AP_offd);
664 AP_offd_j = hypre_CSRMatrixJ(AP_offd);
665 for (i=0; i < AP_offd_num_nonzeros; i++)
666 {
667 AP_offd_j[i] = map_P_to_Q[AP_offd_j[i]];
668 }
669
670 hypre_TFree(map_P_to_Q, HYPRE_MEMORY_HOST);
671 hypre_CSRMatrixNumCols(AP_offd) = num_cols_offd_Q;
672 }
673 }
674
675 if (num_cols_offd_A) /* number of rows for Pext_diag */
676 {
677 Q_diag = hypre_CSRMatrixAdd(1.0, AP_diag, 1.0, APext_diag);
678 hypre_CSRMatrixDestroy(AP_diag);
679 hypre_CSRMatrixDestroy(APext_diag);
680 }
681 else
682 {
683 Q_diag = AP_diag;
684 }
685
686 if (num_cols_offd_P && num_cols_offd_A)
687 {
688 Q_offd = hypre_CSRMatrixAdd(1.0, AP_offd, 1.0, APext_offd);
689 hypre_CSRMatrixDestroy(APext_offd);
690 hypre_CSRMatrixDestroy(AP_offd);
691 }
692 else if (num_cols_offd_A)
693 {
694 Q_offd = APext_offd;
695 }
696 else if (num_cols_offd_P)
697 {
698 Q_offd = AP_offd;
699 }
700 else
701 {
702 Q_offd = hypre_CSRMatrixClone(A_offd, 1);
703 }
704
705 Q = hypre_ParCSRMatrixCreate(comm, n_rows_A, n_cols_P, row_starts_A,
706 col_starts_P, num_cols_offd_Q,
707 Q_diag->num_nonzeros, Q_offd->num_nonzeros);
708
709 hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(Q));
710 hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(Q));
711 hypre_ParCSRMatrixDiag(Q) = Q_diag;
712 hypre_ParCSRMatrixOffd(Q) = Q_offd;
713 hypre_ParCSRMatrixColMapOffd(Q) = col_map_offd_Q;
714
715 hypre_CSRMatrixTranspose(R_diag, &RT_diag, 1);
716 C_tmp_diag = hypre_CSRMatrixMultiplyHost(RT_diag, Q_diag);
717 if (num_cols_offd_Q)
718 {
719 C_tmp_offd = hypre_CSRMatrixMultiplyHost(RT_diag, Q_offd);
720 }
721 else
722 {
723 C_tmp_offd = hypre_CSRMatrixClone(Q_offd, 1);
724 hypre_CSRMatrixNumRows(C_tmp_offd) = num_cols_diag_R;
725 }
726
727 if (keep_transpose)
728 {
729 R->diagT = RT_diag;
730 }
731 else
732 {
733 hypre_CSRMatrixDestroy(RT_diag);
734 }
735
736 if (num_cols_offd_R)
737 {
738 hypre_CSRMatrixTranspose(R_offd, &RT_offd, 1);
739 C_int_diag = hypre_CSRMatrixMultiplyHost(RT_offd, Q_diag);
740 C_int_offd = hypre_CSRMatrixMultiplyHost(RT_offd, Q_offd);
741
742 hypre_ParCSRMatrixDiag(Q) = C_int_diag;
743 hypre_ParCSRMatrixOffd(Q) = C_int_offd;
744 C_int = hypre_MergeDiagAndOffd(Q);
745 hypre_ParCSRMatrixDiag(Q) = Q_diag;
746 hypre_ParCSRMatrixOffd(Q) = Q_offd;
747 }
748 else
749 {
750 C_int = hypre_CSRMatrixCreate(0,0,0);
751 hypre_CSRMatrixInitialize(C_int);
752 }
753
754 /* contains communication; should be explicitly included to allow for overlap */
755 hypre_ExchangeExternalRowsInit(C_int, comm_pkg_R, &request);
756 C_ext = hypre_ExchangeExternalRowsWait(request);
757
758 hypre_CSRMatrixDestroy(C_int);
759 if (num_cols_offd_R)
760 {
761 hypre_CSRMatrixDestroy(C_int_diag);
762 hypre_CSRMatrixDestroy(C_int_offd);
763 if (keep_transpose)
764 {
765 R->offdT = RT_offd;
766 }
767 else
768 {
769 hypre_CSRMatrixDestroy(RT_offd);
770 }
771 }
772
773 /*-----------------------------------------------------------------------
774 * Add contents of C_ext to C_tmp_diag and C_tmp_offd
775 * to obtain C_diag and C_offd
776 *-----------------------------------------------------------------------*/
777
778 /* split C_ext in local C_ext_diag and nonlocal part C_ext_offd,
779 also generate new col_map_offd and adjust column indices accordingly */
780
781 if (C_ext)
782 {
783 first_col_diag_C = first_col_diag_P;
784 last_col_diag_C = first_col_diag_P + num_cols_diag_P - 1;
785
786 hypre_CSRMatrixSplit(C_ext, first_col_diag_C, last_col_diag_C,
787 num_cols_offd_Q, col_map_offd_Q, &num_cols_offd_C, &col_map_offd_C,
788 &C_ext_diag, &C_ext_offd);
789
790 hypre_CSRMatrixDestroy(C_ext);
791 C_ext = NULL;
792 /*if (C_ext_offd->num_nonzeros == 0) C_ext_offd->num_cols = 0;*/
793 }
794
795 if (num_cols_offd_Q && C_tmp_offd->num_cols)
796 {
797 C_tmp_offd_i = hypre_CSRMatrixI(C_tmp_offd);
798 C_tmp_offd_j = hypre_CSRMatrixJ(C_tmp_offd);
799
800 map_Q_to_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_Q, HYPRE_MEMORY_HOST);
801
802 cnt = 0;
803 for (i=0; i < num_cols_offd_C; i++)
804 {
805 if (col_map_offd_C[i] == col_map_offd_Q[cnt])
806 {
807 map_Q_to_C[cnt++] = i;
808 if (cnt == num_cols_offd_Q)
809 {
810 break;
811 }
812 }
813 }
814 for (i=0; i < C_tmp_offd_i[hypre_CSRMatrixNumRows(C_tmp_offd)]; i++)
815 {
816 j_indx = C_tmp_offd_j[i];
817 C_tmp_offd_j[i] = map_Q_to_C[j_indx];
818 }
819 hypre_TFree(map_Q_to_C, HYPRE_MEMORY_HOST);
820 }
821 hypre_CSRMatrixNumCols(C_tmp_offd) = num_cols_offd_C;
822 hypre_ParCSRMatrixDestroy(Q);
823
824 /*-----------------------------------------------------------------------
825 * Need to compute C_diag = C_tmp_diag + C_ext_diag
826 * and C_offd = C_tmp_offd + C_ext_offd !!!!
827 *-----------------------------------------------------------------------*/
828 send_map_elmts_R = hypre_ParCSRCommPkgSendMapElmts(comm_pkg_R);
829 if (C_ext_diag)
830 {
831 C_diag = hypre_CSRMatrixAddPartial(C_tmp_diag, C_ext_diag, send_map_elmts_R);
832 hypre_CSRMatrixDestroy(C_tmp_diag);
833 hypre_CSRMatrixDestroy(C_ext_diag);
834 }
835 else
836 {
837 C_diag = C_tmp_diag;
838 }
839 if (C_ext_offd)
840 {
841 C_offd = hypre_CSRMatrixAddPartial(C_tmp_offd, C_ext_offd, send_map_elmts_R);
842 hypre_CSRMatrixDestroy(C_tmp_offd);
843 hypre_CSRMatrixDestroy(C_ext_offd);
844 }
845 else
846 {
847 C_offd = C_tmp_offd;
848 }
849 }
850 else
851 {
852 Q_diag = hypre_CSRMatrixMultiplyHost(A_diag, P_diag);
853 hypre_CSRMatrixTranspose(R_diag, &RT_diag, 1);
854 C_diag = hypre_CSRMatrixMultiplyHost(RT_diag, Q_diag);
855 C_offd = hypre_CSRMatrixCreate(num_cols_diag_R, 0, 0);
856 hypre_CSRMatrixInitialize_v2(C_offd, 0, hypre_CSRMatrixMemoryLocation(C_diag));
857 if (keep_transpose)
858 {
859 R->diagT = RT_diag;
860 }
861 else
862 {
863 hypre_CSRMatrixDestroy(RT_diag);
864 }
865 hypre_CSRMatrixDestroy(Q_diag);
866 }
867
868 C = hypre_ParCSRMatrixCreate(comm, n_cols_R, n_cols_P, col_starts_R,
869 col_starts_P, num_cols_offd_C, 0, 0);
870
871 hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(C));
872 hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(C));
873 hypre_ParCSRMatrixDiag(C) = C_diag;
874
875 if (C_offd)
876 {
877 hypre_ParCSRMatrixOffd(C) = C_offd;
878 }
879 else
880 {
881 C_offd = hypre_CSRMatrixCreate(num_cols_diag_R, 0, 0);
882 hypre_CSRMatrixInitialize(C_offd);
883 hypre_ParCSRMatrixOffd(C) = C_offd;
884 }
885
886 hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;
887
888 if (num_procs > 1)
889 {
890 /* hypre_GenerateRAPCommPkg(RAP, A); */
891 hypre_MatvecCommPkgCreate(C);
892 }
893
894 return C;
895 }
896
897 hypre_ParCSRMatrix*
hypre_ParCSRMatrixRAPKT(hypre_ParCSRMatrix * R,hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * P,HYPRE_Int keep_transpose)898 hypre_ParCSRMatrixRAPKT( hypre_ParCSRMatrix *R,
899 hypre_ParCSRMatrix *A,
900 hypre_ParCSRMatrix *P,
901 HYPRE_Int keep_transpose)
902 {
903 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
904 hypre_GpuProfilingPushRange("TripleMat-RAP");
905 #endif
906
907 hypre_ParCSRMatrix *C = NULL;
908
909 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
910 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy2( hypre_ParCSRMatrixMemoryLocation(R),
911 hypre_ParCSRMatrixMemoryLocation(A) );
912
913 if (exec == HYPRE_EXEC_DEVICE)
914 {
915 C = hypre_ParCSRMatrixRAPKTDevice(R, A, P, keep_transpose);
916 }
917 else
918 #endif
919 {
920 C = hypre_ParCSRMatrixRAPKTHost(R, A, P, keep_transpose);
921 }
922
923 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
924 hypre_GpuProfilingPopRange();
925 #endif
926
927 return C;
928 }
929
930 hypre_ParCSRMatrix*
hypre_ParCSRMatrixRAP(hypre_ParCSRMatrix * R,hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * P)931 hypre_ParCSRMatrixRAP( hypre_ParCSRMatrix *R,
932 hypre_ParCSRMatrix *A,
933 hypre_ParCSRMatrix *P )
934 {
935 return hypre_ParCSRMatrixRAPKT(R, A, P, 0);
936 }
937
938 /*--------------------------------------------------------------------------
939 * OLD NOTES:
940 * Sketch of John's code to build RAP
941 *
942 * Uses two integer arrays icg and ifg as marker arrays
943 *
944 * icg needs to be of size n_fine; size of ia.
945 * A negative value of icg(i) indicates i is a f-point, otherwise
946 * icg(i) is the converts from fine to coarse grid orderings.
947 * Note that I belive the code assumes that if i<j and both are
948 * c-points, then icg(i) < icg(j).
949 * ifg needs to be of size n_coarse; size of irap
950 * I don't think it has meaning as either input or output.
951 *
952 * In the code, both the interpolation and restriction operator
953 * are stored row-wise in the array b. If i is a f-point,
954 * ib(i) points the row of the interpolation operator for point
955 * i. If i is a c-point, ib(i) points the row of the restriction
956 * operator for point i.
957 *
958 * In the CSR storage for rap, its guaranteed that the rows will
959 * be ordered ( i.e. ic<jc -> irap(ic) < irap(jc)) but I don't
960 * think there is a guarantee that the entries within a row will
961 * be ordered in any way except that the diagonal entry comes first.
962 *
963 * As structured now, the code requires that the size of rap be
964 * predicted up front. To avoid this, one could execute the code
965 * twice, the first time would only keep track of icg ,ifg and ka.
966 * Then you would know how much memory to allocate for rap and jrap.
967 * The second time would fill in these arrays. Actually you might
968 * be able to include the filling in of jrap into the first pass;
969 * just overestimate its size (its an integer array) and cut it
970 * back before the second time through. This would avoid some if tests
971 * in the second pass.
972 *
973 * Questions
974 * 1) parallel (PetSc) version?
975 * 2) what if we don't store R row-wise and don't
976 * even want to store a copy of it in this form
977 * temporarily?
978 *--------------------------------------------------------------------------*/
979