1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 #include "_hypre_utilities.h"
9 #include "_hypre_parcsr_mv.h"
10 #include "_hypre_lapack.h"
11 #include "_hypre_blas.h"
12
13 /*--------------------------------------------------------------------------
14 * hypre_ParMatmul_RowSizes:
15 *
16 * Computes sizes of C rows. Formerly part of hypre_ParMatmul but removed
17 * so it can also be used for multiplication of Boolean matrices.
18 *
19 * Arrays computed: C_diag_i, C_offd_i.
20 *
21 * Arrays needed: (17, all HYPRE_Int*)
22 * rownnz_A,
23 * A_diag_i, A_diag_j,
24 * A_offd_i, A_offd_j,
25 * B_diag_i, B_diag_j,
26 * B_offd_i, B_offd_j,
27 * B_ext_i, B_ext_j,
28 * col_map_offd_B, col_map_offd_B,
29 * B_offd_i, B_offd_j,
30 * B_ext_i, B_ext_j.
31 *
32 * Scalars computed: C_diag_size, C_offd_size.
33 *
34 * Scalars needed:
35 * num_rownnz_A, num_rows_diag_A, num_cols_offd_A, allsquare,
36 * first_col_diag_B, num_cols_diag_B, num_cols_offd_B, num_cols_offd_C
37 *--------------------------------------------------------------------------*/
38
39 void
hypre_ParMatmul_RowSizes(HYPRE_MemoryLocation memory_location,HYPRE_Int ** C_diag_i,HYPRE_Int ** C_offd_i,HYPRE_Int * rownnz_A,HYPRE_Int * A_diag_i,HYPRE_Int * A_diag_j,HYPRE_Int * A_offd_i,HYPRE_Int * A_offd_j,HYPRE_Int * B_diag_i,HYPRE_Int * B_diag_j,HYPRE_Int * B_offd_i,HYPRE_Int * B_offd_j,HYPRE_Int * B_ext_diag_i,HYPRE_Int * B_ext_diag_j,HYPRE_Int * B_ext_offd_i,HYPRE_Int * B_ext_offd_j,HYPRE_Int * map_B_to_C,HYPRE_Int * C_diag_size,HYPRE_Int * C_offd_size,HYPRE_Int num_rownnz_A,HYPRE_Int num_rows_diag_A,HYPRE_Int num_cols_offd_A,HYPRE_Int allsquare,HYPRE_Int num_cols_diag_B,HYPRE_Int num_cols_offd_B,HYPRE_Int num_cols_offd_C)40 hypre_ParMatmul_RowSizes( HYPRE_MemoryLocation memory_location,
41 HYPRE_Int **C_diag_i,
42 HYPRE_Int **C_offd_i,
43 HYPRE_Int *rownnz_A,
44 HYPRE_Int *A_diag_i,
45 HYPRE_Int *A_diag_j,
46 HYPRE_Int *A_offd_i,
47 HYPRE_Int *A_offd_j,
48 HYPRE_Int *B_diag_i,
49 HYPRE_Int *B_diag_j,
50 HYPRE_Int *B_offd_i,
51 HYPRE_Int *B_offd_j,
52 HYPRE_Int *B_ext_diag_i,
53 HYPRE_Int *B_ext_diag_j,
54 HYPRE_Int *B_ext_offd_i,
55 HYPRE_Int *B_ext_offd_j,
56 HYPRE_Int *map_B_to_C,
57 HYPRE_Int *C_diag_size,
58 HYPRE_Int *C_offd_size,
59 HYPRE_Int num_rownnz_A,
60 HYPRE_Int num_rows_diag_A,
61 HYPRE_Int num_cols_offd_A,
62 HYPRE_Int allsquare,
63 HYPRE_Int num_cols_diag_B,
64 HYPRE_Int num_cols_offd_B,
65 HYPRE_Int num_cols_offd_C )
66 {
67 HYPRE_Int *jj_count_diag_array;
68 HYPRE_Int *jj_count_offd_array;
69
70 HYPRE_Int start_indexing = 0; /* start indexing for C_data at 0 */
71 HYPRE_Int num_threads = hypre_NumThreads();
72
73 *C_diag_i = hypre_CTAlloc(HYPRE_Int, num_rows_diag_A+1, memory_location);
74 *C_offd_i = hypre_CTAlloc(HYPRE_Int, num_rows_diag_A+1, memory_location);
75
76 jj_count_diag_array = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
77 jj_count_offd_array = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
78
79 /*-----------------------------------------------------------------------
80 * Loop over rows of A
81 *-----------------------------------------------------------------------*/
82 #ifdef HYPRE_USING_OPENMP
83 #pragma omp parallel
84 #endif
85 {
86 HYPRE_Int *B_marker = NULL;
87 HYPRE_Int jj_row_begin_diag, jj_count_diag;
88 HYPRE_Int jj_row_begin_offd, jj_count_offd;
89 HYPRE_Int i1, ii1, i2, i3, jj2, jj3;
90 HYPRE_Int size, rest, num_threads;
91 HYPRE_Int ii, ns, ne;
92
93 num_threads = hypre_NumActiveThreads();
94 size = num_rownnz_A/num_threads;
95 rest = num_rownnz_A - size*num_threads;
96
97 ii = hypre_GetThreadNum();
98 if (ii < rest)
99 {
100 ns = ii*size+ii;
101 ne = (ii+1)*size+ii+1;
102 }
103 else
104 {
105 ns = ii*size+rest;
106 ne = (ii+1)*size+rest;
107 }
108 jj_count_diag = start_indexing;
109 jj_count_offd = start_indexing;
110
111 if (num_cols_diag_B || num_cols_offd_C)
112 {
113 B_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_B + num_cols_offd_C, HYPRE_MEMORY_HOST);
114 }
115
116 for (i1 = 0; i1 < num_cols_diag_B + num_cols_offd_C; i1++)
117 {
118 B_marker[i1] = -1;
119 }
120
121 for (i1 = ns; i1 < ne; i1++)
122 {
123 jj_row_begin_diag = jj_count_diag;
124 jj_row_begin_offd = jj_count_offd;
125 if (rownnz_A)
126 {
127 ii1 = rownnz_A[i1];
128 }
129 else
130 {
131 ii1 = i1;
132
133 /*--------------------------------------------------------------------
134 * Set marker for diagonal entry, C_{i1,i1} (for square matrices).
135 *--------------------------------------------------------------------*/
136
137 if (allsquare)
138 {
139 B_marker[i1] = jj_count_diag;
140 jj_count_diag++;
141 }
142 }
143
144 /*-----------------------------------------------------------------
145 * Loop over entries in row ii1 of A_offd.
146 *-----------------------------------------------------------------*/
147
148 if (num_cols_offd_A)
149 {
150 for (jj2 = A_offd_i[ii1]; jj2 < A_offd_i[ii1+1]; jj2++)
151 {
152 i2 = A_offd_j[jj2];
153
154 /*-----------------------------------------------------------
155 * Loop over entries in row i2 of B_ext.
156 *-----------------------------------------------------------*/
157
158 for (jj3 = B_ext_offd_i[i2]; jj3 < B_ext_offd_i[i2+1]; jj3++)
159 {
160 i3 = num_cols_diag_B+B_ext_offd_j[jj3];
161
162 /*--------------------------------------------------------
163 * Check B_marker to see that C_{ii1,i3} has not already
164 * been accounted for. If it has not, mark it and increment
165 * counter.
166 *--------------------------------------------------------*/
167
168 if (B_marker[i3] < jj_row_begin_offd)
169 {
170 B_marker[i3] = jj_count_offd;
171 jj_count_offd++;
172 }
173 }
174
175 for (jj3 = B_ext_diag_i[i2]; jj3 < B_ext_diag_i[i2+1]; jj3++)
176 {
177 i3 = B_ext_diag_j[jj3];
178
179 if (B_marker[i3] < jj_row_begin_diag)
180 {
181 B_marker[i3] = jj_count_diag;
182 jj_count_diag++;
183 }
184 }
185 }
186 }
187
188 /*-----------------------------------------------------------------
189 * Loop over entries in row ii1 of A_diag.
190 *-----------------------------------------------------------------*/
191
192 for (jj2 = A_diag_i[ii1]; jj2 < A_diag_i[ii1+1]; jj2++)
193 {
194 i2 = A_diag_j[jj2];
195
196 /*-----------------------------------------------------------
197 * Loop over entries in row i2 of B_diag.
198 *-----------------------------------------------------------*/
199
200 for (jj3 = B_diag_i[i2]; jj3 < B_diag_i[i2+1]; jj3++)
201 {
202 i3 = B_diag_j[jj3];
203
204 /*--------------------------------------------------------
205 * Check B_marker to see that C_{ii1,i3} has not already
206 * been accounted for. If it has not, mark it and increment
207 * counter.
208 *--------------------------------------------------------*/
209
210 if (B_marker[i3] < jj_row_begin_diag)
211 {
212 B_marker[i3] = jj_count_diag;
213 jj_count_diag++;
214 }
215 }
216
217 /*-----------------------------------------------------------
218 * Loop over entries in row i2 of B_offd.
219 *-----------------------------------------------------------*/
220
221 if (num_cols_offd_B)
222 {
223 for (jj3 = B_offd_i[i2]; jj3 < B_offd_i[i2+1]; jj3++)
224 {
225 i3 = num_cols_diag_B+map_B_to_C[B_offd_j[jj3]];
226
227 /*--------------------------------------------------------
228 * Check B_marker to see that C_{ii1,i3} has not already
229 * been accounted for. If it has not, mark it and increment
230 * counter.
231 *--------------------------------------------------------*/
232
233 if (B_marker[i3] < jj_row_begin_offd)
234 {
235 B_marker[i3] = jj_count_offd;
236 jj_count_offd++;
237 }
238 }
239 }
240 }
241
242 /*--------------------------------------------------------------------
243 * Set C_diag_i and C_offd_i for this row.
244 *--------------------------------------------------------------------*/
245
246 (*C_diag_i)[ii1] = jj_row_begin_diag;
247 (*C_offd_i)[ii1] = jj_row_begin_offd;
248 }
249
250 jj_count_diag_array[ii] = jj_count_diag;
251 jj_count_offd_array[ii] = jj_count_offd;
252
253 hypre_TFree(B_marker, HYPRE_MEMORY_HOST);
254 #ifdef HYPRE_USING_OPENMP
255 #pragma omp barrier
256 #endif
257
258 /* Correct diag_i and offd_i - phase 1 */
259 if (ii)
260 {
261 jj_count_diag = jj_count_diag_array[0];
262 jj_count_offd = jj_count_offd_array[0];
263 for (i1 = 1; i1 < ii; i1++)
264 {
265 jj_count_diag += jj_count_diag_array[i1];
266 jj_count_offd += jj_count_offd_array[i1];
267 }
268
269 for (i1 = ns; i1 < ne; i1++)
270 {
271 ii1 = rownnz_A ? rownnz_A[i1] : i1;
272 (*C_diag_i)[ii1] += jj_count_diag;
273 (*C_offd_i)[ii1] += jj_count_offd;
274 }
275 }
276 else
277 {
278 (*C_diag_i)[num_rows_diag_A] = 0;
279 (*C_offd_i)[num_rows_diag_A] = 0;
280 for (i1 = 0; i1 < num_threads; i1++)
281 {
282 (*C_diag_i)[num_rows_diag_A] += jj_count_diag_array[i1];
283 (*C_offd_i)[num_rows_diag_A] += jj_count_offd_array[i1];
284 }
285 }
286
287 /* Correct diag_i and offd_i - phase 2 */
288 if (rownnz_A != NULL)
289 {
290 #ifdef HYPRE_USING_OPENMP
291 #pragma omp barrier
292 #endif
293 for (i1 = ns; i1 < (ne-1); i1++)
294 {
295 for (ii1 = rownnz_A[i1] + 1; ii1 < rownnz_A[i1+1]; ii1++)
296 {
297 (*C_diag_i)[ii1] = (*C_diag_i)[rownnz_A[i1+1]];
298 (*C_offd_i)[ii1] = (*C_offd_i)[rownnz_A[i1+1]];
299 }
300 }
301
302 if (ii < (num_threads - 1))
303 {
304 for (ii1 = rownnz_A[ne-1] + 1; ii1 < rownnz_A[ne]; ii1++)
305 {
306 (*C_diag_i)[ii1] = (*C_diag_i)[rownnz_A[ne]];
307 (*C_offd_i)[ii1] = (*C_offd_i)[rownnz_A[ne]];
308 }
309 }
310 else
311 {
312 for (ii1 = rownnz_A[ne-1] + 1; ii1 < num_rows_diag_A; ii1++)
313 {
314 (*C_diag_i)[ii1] = (*C_diag_i)[num_rows_diag_A];
315 (*C_offd_i)[ii1] = (*C_offd_i)[num_rows_diag_A];
316 }
317 }
318 }
319 } /* end parallel loop */
320
321 *C_diag_size = (*C_diag_i)[num_rows_diag_A];
322 *C_offd_size = (*C_offd_i)[num_rows_diag_A];
323
324 #ifdef HYPRE_DEBUG
325 HYPRE_Int i;
326
327 for (i = 0; i < num_rows_diag_A; i++)
328 {
329 hypre_assert((*C_diag_i)[i] <= (*C_diag_i)[i+1]);
330 hypre_assert((*C_offd_i)[i] <= (*C_offd_i)[i+1]);
331 }
332 #endif
333
334 hypre_TFree(jj_count_diag_array, HYPRE_MEMORY_HOST);
335 hypre_TFree(jj_count_offd_array, HYPRE_MEMORY_HOST);
336
337 /* End of First Pass */
338 }
339
340 /*--------------------------------------------------------------------------
341 * hypre_ParMatmul:
342 *
343 * Multiplies two ParCSRMatrices A and B and returns the product in
344 * ParCSRMatrix C.
345 *--------------------------------------------------------------------------*/
346
347 hypre_ParCSRMatrix*
hypre_ParMatmul(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * B)348 hypre_ParMatmul( hypre_ParCSRMatrix *A,
349 hypre_ParCSRMatrix *B )
350 {
351 #ifdef HYPRE_PROFILE
352 hypre_profile_times[HYPRE_TIMER_ID_MATMUL] -= hypre_MPI_Wtime();
353 #endif
354
355 /* ParCSRMatrix A */
356 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
357 HYPRE_BigInt nrows_A = hypre_ParCSRMatrixGlobalNumRows(A);
358 HYPRE_BigInt ncols_A = hypre_ParCSRMatrixGlobalNumCols(A);
359 HYPRE_BigInt *row_starts_A = hypre_ParCSRMatrixRowStarts(A);
360 HYPRE_Int num_rownnz_A;
361 HYPRE_Int *rownnz_A = NULL;
362
363 /* ParCSRMatrix B */
364 HYPRE_BigInt nrows_B = hypre_ParCSRMatrixGlobalNumRows(B);
365 HYPRE_BigInt ncols_B = hypre_ParCSRMatrixGlobalNumCols(B);
366 HYPRE_BigInt first_col_diag_B = hypre_ParCSRMatrixFirstColDiag(B);
367 HYPRE_BigInt *col_starts_B = hypre_ParCSRMatrixColStarts(B);
368 HYPRE_BigInt last_col_diag_B;
369
370 /* A_diag */
371 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
372 HYPRE_Complex *A_diag_data = hypre_CSRMatrixData(A_diag);
373 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
374 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
375 HYPRE_Int *A_diag_ir = hypre_CSRMatrixRownnz(A_diag);
376 HYPRE_Int num_rownnz_diag_A = hypre_CSRMatrixNumRownnz(A_diag);
377 HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
378 HYPRE_Int num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag);
379
380 /* A_offd */
381 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
382 HYPRE_Complex *A_offd_data = hypre_CSRMatrixData(A_offd);
383 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
384 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
385 HYPRE_Int *A_offd_ir = hypre_CSRMatrixRownnz(A_offd);
386 HYPRE_Int num_rownnz_offd_A = hypre_CSRMatrixNumRownnz(A_offd);
387 HYPRE_Int num_rows_offd_A = hypre_CSRMatrixNumRows(A_offd);
388 HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
389
390 /* B_diag */
391 hypre_CSRMatrix *B_diag = hypre_ParCSRMatrixDiag(B);
392 HYPRE_Complex *B_diag_data = hypre_CSRMatrixData(B_diag);
393 HYPRE_Int *B_diag_i = hypre_CSRMatrixI(B_diag);
394 HYPRE_Int *B_diag_j = hypre_CSRMatrixJ(B_diag);
395 HYPRE_Int num_rows_diag_B = hypre_CSRMatrixNumRows(B_diag);
396 HYPRE_Int num_cols_diag_B = hypre_CSRMatrixNumCols(B_diag);
397
398 /* B_offd */
399 hypre_CSRMatrix *B_offd = hypre_ParCSRMatrixOffd(B);
400 HYPRE_BigInt *col_map_offd_B = hypre_ParCSRMatrixColMapOffd(B);
401 HYPRE_Complex *B_offd_data = hypre_CSRMatrixData(B_offd);
402 HYPRE_Int *B_offd_i = hypre_CSRMatrixI(B_offd);
403 HYPRE_Int *B_offd_j = hypre_CSRMatrixJ(B_offd);
404 HYPRE_Int num_cols_offd_B = hypre_CSRMatrixNumCols(B_offd);
405
406 /* ParCSRMatrix C */
407 hypre_ParCSRMatrix *C;
408 HYPRE_BigInt *col_map_offd_C;
409 HYPRE_Int *map_B_to_C = NULL;
410
411 /* C_diag */
412 hypre_CSRMatrix *C_diag;
413 HYPRE_Complex *C_diag_data;
414 HYPRE_Int *C_diag_i;
415 HYPRE_Int *C_diag_j;
416 HYPRE_Int C_offd_size;
417 HYPRE_Int num_cols_offd_C = 0;
418
419 /* C_offd */
420 hypre_CSRMatrix *C_offd;
421 HYPRE_Complex *C_offd_data = NULL;
422 HYPRE_Int *C_offd_i = NULL;
423 HYPRE_Int *C_offd_j = NULL;
424 HYPRE_Int C_diag_size;
425
426 /* Bs_ext */
427 hypre_CSRMatrix *Bs_ext;
428 HYPRE_Complex *Bs_ext_data;
429 HYPRE_Int *Bs_ext_i;
430 HYPRE_BigInt *Bs_ext_j;
431 HYPRE_Complex *B_ext_diag_data;
432 HYPRE_Int *B_ext_diag_i;
433 HYPRE_Int *B_ext_diag_j;
434 HYPRE_Int B_ext_diag_size;
435 HYPRE_Complex *B_ext_offd_data;
436 HYPRE_Int *B_ext_offd_i;
437 HYPRE_Int *B_ext_offd_j;
438 HYPRE_BigInt *B_big_offd_j = NULL;
439 HYPRE_Int B_ext_offd_size;
440
441 HYPRE_Int allsquare = 0;
442 HYPRE_Int num_procs;
443 HYPRE_Int *my_diag_array;
444 HYPRE_Int *my_offd_array;
445 HYPRE_Int max_num_threads;
446
447 HYPRE_Complex zero = 0.0;
448
449 HYPRE_MemoryLocation memory_location_A = hypre_ParCSRMatrixMemoryLocation(A);
450 HYPRE_MemoryLocation memory_location_B = hypre_ParCSRMatrixMemoryLocation(B);
451
452 /* RL: TODO cannot guarantee, maybe should never assert
453 hypre_assert(memory_location_A == memory_location_B);
454 */
455
456 /* RL: in the case of A=H, B=D, or A=D, B=H, let C = D,
457 * not sure if this is the right thing to do.
458 * Also, need something like this in other places
459 * TODO */
460 HYPRE_MemoryLocation memory_location_C = hypre_max(memory_location_A, memory_location_B);
461
462 HYPRE_ANNOTATE_FUNC_BEGIN;
463
464 max_num_threads = hypre_NumThreads();
465 my_diag_array = hypre_CTAlloc(HYPRE_Int, max_num_threads, HYPRE_MEMORY_HOST);
466 my_offd_array = hypre_CTAlloc(HYPRE_Int, max_num_threads, HYPRE_MEMORY_HOST);
467
468 if (ncols_A != nrows_B || num_cols_diag_A != num_rows_diag_B)
469 {
470 hypre_error_w_msg(HYPRE_ERROR_GENERIC," Error! Incompatible matrix dimensions!\n");
471
472 HYPRE_ANNOTATE_FUNC_END;
473 return NULL;
474 }
475
476 /* if C=A*B is square globally and locally, then C_diag should be square also */
477 if ( num_rows_diag_A == num_cols_diag_B && nrows_A == ncols_B )
478 {
479 allsquare = 1;
480 }
481
482 /* Set rownnz of A */
483 if (num_rownnz_diag_A != num_rows_diag_A &&
484 num_rownnz_offd_A != num_rows_offd_A )
485 {
486 hypre_MergeOrderedArrays(num_rownnz_diag_A, A_diag_ir,
487 num_rownnz_offd_A, A_offd_ir,
488 &num_rownnz_A, &rownnz_A);
489 }
490 else
491 {
492 num_rownnz_A = hypre_max(num_rows_diag_A, num_rows_offd_A);
493 }
494
495 /*-----------------------------------------------------------------------
496 * Extract B_ext, i.e. portion of B that is stored on neighbor procs
497 * and needed locally for matrix matrix product
498 *-----------------------------------------------------------------------*/
499
500 hypre_MPI_Comm_size(comm, &num_procs);
501
502 #ifdef HYPRE_PROFILE
503 hypre_profile_times[HYPRE_TIMER_ID_RENUMBER_COLIDX] -= hypre_MPI_Wtime();
504 #endif
505
506 if (num_procs > 1)
507 {
508 /*---------------------------------------------------------------------
509 * If there exists no CommPkg for A, a CommPkg is generated using
510 * equally load balanced partitionings within
511 * hypre_ParCSRMatrixExtractBExt
512 *--------------------------------------------------------------------*/
513 Bs_ext = hypre_ParCSRMatrixExtractBExt(B,A,1);
514 Bs_ext_data = hypre_CSRMatrixData(Bs_ext);
515 Bs_ext_i = hypre_CSRMatrixI(Bs_ext);
516 Bs_ext_j = hypre_CSRMatrixBigJ(Bs_ext);
517 }
518 B_ext_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1, HYPRE_MEMORY_HOST);
519 B_ext_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1, HYPRE_MEMORY_HOST);
520 B_ext_diag_size = 0;
521 B_ext_offd_size = 0;
522 last_col_diag_B = first_col_diag_B + (HYPRE_BigInt) num_cols_diag_B - 1;
523
524 #ifdef HYPRE_CONCURRENT_HOPSCOTCH
525 hypre_UnorderedBigIntSet set;
526
527 #pragma omp parallel
528 {
529 HYPRE_Int size, rest, ii;
530 HYPRE_Int ns, ne;
531 HYPRE_Int i1, i, j;
532 HYPRE_Int my_offd_size, my_diag_size;
533 HYPRE_Int cnt_offd, cnt_diag;
534 HYPRE_Int num_threads = hypre_NumActiveThreads();
535
536 size = num_cols_offd_A/num_threads;
537 rest = num_cols_offd_A - size*num_threads;
538 ii = hypre_GetThreadNum();
539 if (ii < rest)
540 {
541 ns = ii*size+ii;
542 ne = (ii+1)*size+ii+1;
543 }
544 else
545 {
546 ns = ii*size+rest;
547 ne = (ii+1)*size+rest;
548 }
549
550 my_diag_size = 0;
551 my_offd_size = 0;
552 for (i = ns; i < ne; i++)
553 {
554 B_ext_diag_i[i] = my_diag_size;
555 B_ext_offd_i[i] = my_offd_size;
556 for (j = Bs_ext_i[i]; j < Bs_ext_i[i+1]; j++)
557 {
558 if (Bs_ext_j[j] < first_col_diag_B ||
559 Bs_ext_j[j] > last_col_diag_B)
560 {
561 my_offd_size++;
562 }
563 else
564 {
565 my_diag_size++;
566 }
567 }
568 }
569 my_diag_array[ii] = my_diag_size;
570 my_offd_array[ii] = my_offd_size;
571
572 #pragma omp barrier
573
574 if (ii)
575 {
576 my_diag_size = my_diag_array[0];
577 my_offd_size = my_offd_array[0];
578 for (i1 = 1; i1 < ii; i1++)
579 {
580 my_diag_size += my_diag_array[i1];
581 my_offd_size += my_offd_array[i1];
582 }
583
584 for (i1 = ns; i1 < ne; i1++)
585 {
586 B_ext_diag_i[i1] += my_diag_size;
587 B_ext_offd_i[i1] += my_offd_size;
588 }
589 }
590 else
591 {
592 B_ext_diag_size = 0;
593 B_ext_offd_size = 0;
594 for (i1 = 0; i1 < num_threads; i1++)
595 {
596 B_ext_diag_size += my_diag_array[i1];
597 B_ext_offd_size += my_offd_array[i1];
598 }
599 B_ext_diag_i[num_cols_offd_A] = B_ext_diag_size;
600 B_ext_offd_i[num_cols_offd_A] = B_ext_offd_size;
601
602 if (B_ext_diag_size)
603 {
604 B_ext_diag_j = hypre_CTAlloc(HYPRE_Int, B_ext_diag_size, HYPRE_MEMORY_HOST);
605 B_ext_diag_data = hypre_CTAlloc(HYPRE_Complex, B_ext_diag_size, HYPRE_MEMORY_HOST);
606 }
607 if (B_ext_offd_size)
608 {
609 B_ext_offd_j = hypre_CTAlloc(HYPRE_Int, B_ext_offd_size, HYPRE_MEMORY_HOST);
610 B_big_offd_j = hypre_CTAlloc(HYPRE_BigInt, B_ext_offd_size, HYPRE_MEMORY_HOST);
611 B_ext_offd_data = hypre_CTAlloc(HYPRE_Complex, B_ext_offd_size, HYPRE_MEMORY_HOST);
612 }
613 hypre_UnorderedBigIntSetCreate(&set, B_ext_offd_size + num_cols_offd_B, 16*hypre_NumThreads());
614 }
615
616
617 #pragma omp barrier
618
619 cnt_offd = B_ext_offd_i[ns];
620 cnt_diag = B_ext_diag_i[ns];
621 for (i = ns; i < ne; i++)
622 {
623 for (j = Bs_ext_i[i]; j < Bs_ext_i[i+1]; j++)
624 {
625 if (Bs_ext_j[j] < first_col_diag_B ||
626 Bs_ext_j[j] > last_col_diag_B)
627 {
628 hypre_UnorderedBigIntSetPut(&set, Bs_ext_j[j]);
629 B_big_offd_j[cnt_offd] = Bs_ext_j[j];
630 //Bs_ext_j[cnt_offd] = Bs_ext_j[j];
631 B_ext_offd_data[cnt_offd++] = Bs_ext_data[j];
632 }
633 else
634 {
635 B_ext_diag_j[cnt_diag] = (HYPRE_Int)(Bs_ext_j[j] - first_col_diag_B);
636 B_ext_diag_data[cnt_diag++] = Bs_ext_data[j];
637 }
638 }
639 }
640
641 HYPRE_Int i_begin, i_end;
642 hypre_GetSimpleThreadPartition(&i_begin, &i_end, num_cols_offd_B);
643 for (i = i_begin; i < i_end; i++)
644 {
645 hypre_UnorderedBigIntSetPut(&set, col_map_offd_B[i]);
646 }
647 } /* omp parallel */
648
649 col_map_offd_C = hypre_UnorderedBigIntSetCopyToArray(&set, &num_cols_offd_C);
650 hypre_UnorderedBigIntSetDestroy(&set);
651 hypre_UnorderedBigIntMap col_map_offd_C_inverse;
652 hypre_big_sort_and_create_inverse_map(col_map_offd_C,
653 num_cols_offd_C,
654 &col_map_offd_C,
655 &col_map_offd_C_inverse);
656
657 HYPRE_Int i, j;
658 #pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
659 for (i = 0; i < num_cols_offd_A; i++)
660 {
661 for (j = B_ext_offd_i[i]; j < B_ext_offd_i[i+1]; j++)
662 {
663 //B_ext_offd_j[j] = hypre_UnorderedIntMapGet(&col_map_offd_C_inverse, B_ext_offd_j[j]);
664 B_ext_offd_j[j] = hypre_UnorderedBigIntMapGet(&col_map_offd_C_inverse, B_big_offd_j[j]);
665 }
666 }
667
668 if (num_cols_offd_C)
669 {
670 hypre_UnorderedBigIntMapDestroy(&col_map_offd_C_inverse);
671 }
672
673 hypre_TFree(my_diag_array, HYPRE_MEMORY_HOST);
674 hypre_TFree(my_offd_array, HYPRE_MEMORY_HOST);
675
676 if (num_cols_offd_B)
677 {
678 HYPRE_Int i;
679 map_B_to_C = hypre_CTAlloc(HYPRE_Int, num_cols_offd_B, HYPRE_MEMORY_HOST);
680
681 #pragma omp parallel private(i)
682 {
683 HYPRE_Int i_begin, i_end;
684 hypre_GetSimpleThreadPartition(&i_begin, &i_end, num_cols_offd_C);
685
686 HYPRE_Int cnt;
687 if (i_end > i_begin)
688 {
689 cnt = hypre_BigLowerBound(col_map_offd_B,
690 col_map_offd_B + (HYPRE_BigInt)num_cols_offd_B,
691 col_map_offd_C[i_begin]) - col_map_offd_B;
692 }
693
694 for (i = i_begin; i < i_end && cnt < num_cols_offd_B; i++)
695 {
696 if (col_map_offd_C[i] == col_map_offd_B[cnt])
697 {
698 map_B_to_C[cnt++] = i;
699 }
700 }
701 }
702 }
703 if (num_procs > 1)
704 {
705 hypre_CSRMatrixDestroy(Bs_ext);
706 Bs_ext = NULL;
707 }
708
709 #else /* !HYPRE_CONCURRENT_HOPSCOTCH */
710
711 HYPRE_BigInt *temp;
712 #ifdef HYPRE_USING_OPENMP
713 #pragma omp parallel
714 #endif
715 {
716 HYPRE_Int size, rest, ii;
717 HYPRE_Int ns, ne;
718 HYPRE_Int i1, i, j;
719 HYPRE_Int my_offd_size, my_diag_size;
720 HYPRE_Int cnt_offd, cnt_diag;
721
722 HYPRE_Int num_threads = hypre_NumActiveThreads();
723
724 size = num_cols_offd_A/num_threads;
725 rest = num_cols_offd_A - size*num_threads;
726 ii = hypre_GetThreadNum();
727 if (ii < rest)
728 {
729 ns = ii*size+ii;
730 ne = (ii+1)*size+ii+1;
731 }
732 else
733 {
734 ns = ii*size+rest;
735 ne = (ii+1)*size+rest;
736 }
737
738 my_diag_size = 0;
739 my_offd_size = 0;
740 for (i = ns; i < ne; i++)
741 {
742 B_ext_diag_i[i] = my_diag_size;
743 B_ext_offd_i[i] = my_offd_size;
744 for (j = Bs_ext_i[i]; j < Bs_ext_i[i+1]; j++)
745 {
746 if (Bs_ext_j[j] < first_col_diag_B ||
747 Bs_ext_j[j] > last_col_diag_B)
748 {
749 my_offd_size++;
750 }
751 else
752 {
753 my_diag_size++;
754 }
755 }
756 }
757 my_diag_array[ii] = my_diag_size;
758 my_offd_array[ii] = my_offd_size;
759
760 #ifdef HYPRE_USING_OPENMP
761 #pragma omp barrier
762 #endif
763
764 if (ii)
765 {
766 my_diag_size = my_diag_array[0];
767 my_offd_size = my_offd_array[0];
768 for (i1 = 1; i1 < ii; i1++)
769 {
770 my_diag_size += my_diag_array[i1];
771 my_offd_size += my_offd_array[i1];
772 }
773
774 for (i1 = ns; i1 < ne; i1++)
775 {
776 B_ext_diag_i[i1] += my_diag_size;
777 B_ext_offd_i[i1] += my_offd_size;
778 }
779 }
780 else
781 {
782 B_ext_diag_size = 0;
783 B_ext_offd_size = 0;
784 for (i1 = 0; i1 < num_threads; i1++)
785 {
786 B_ext_diag_size += my_diag_array[i1];
787 B_ext_offd_size += my_offd_array[i1];
788 }
789 B_ext_diag_i[num_cols_offd_A] = B_ext_diag_size;
790 B_ext_offd_i[num_cols_offd_A] = B_ext_offd_size;
791
792 if (B_ext_diag_size)
793 {
794 B_ext_diag_j = hypre_CTAlloc(HYPRE_Int, B_ext_diag_size, HYPRE_MEMORY_HOST);
795 B_ext_diag_data = hypre_CTAlloc(HYPRE_Complex, B_ext_diag_size, HYPRE_MEMORY_HOST);
796 }
797
798 if (B_ext_offd_size)
799 {
800 B_ext_offd_j = hypre_CTAlloc(HYPRE_Int, B_ext_offd_size, HYPRE_MEMORY_HOST);
801 B_big_offd_j = hypre_CTAlloc(HYPRE_BigInt, B_ext_offd_size, HYPRE_MEMORY_HOST);
802 B_ext_offd_data = hypre_CTAlloc(HYPRE_Complex, B_ext_offd_size, HYPRE_MEMORY_HOST);
803 }
804
805 if (B_ext_offd_size || num_cols_offd_B)
806 {
807 temp = hypre_CTAlloc(HYPRE_BigInt, B_ext_offd_size+num_cols_offd_B, HYPRE_MEMORY_HOST);
808 }
809 }
810
811 #ifdef HYPRE_USING_OPENMP
812 #pragma omp barrier
813 #endif
814
815 cnt_offd = B_ext_offd_i[ns];
816 cnt_diag = B_ext_diag_i[ns];
817 for (i = ns; i < ne; i++)
818 {
819 for (j = Bs_ext_i[i]; j < Bs_ext_i[i+1]; j++)
820 {
821 if (Bs_ext_j[j] < first_col_diag_B ||
822 Bs_ext_j[j] > last_col_diag_B)
823 {
824 temp[cnt_offd] = Bs_ext_j[j];
825 B_big_offd_j[cnt_offd] = Bs_ext_j[j];
826 //Bs_ext_j[cnt_offd] = Bs_ext_j[j];
827 B_ext_offd_data[cnt_offd++] = Bs_ext_data[j];
828 }
829 else
830 {
831 B_ext_diag_j[cnt_diag] = (HYPRE_Int)(Bs_ext_j[j] - first_col_diag_B);
832 B_ext_diag_data[cnt_diag++] = Bs_ext_data[j];
833 }
834 }
835 }
836
837 #ifdef HYPRE_USING_OPENMP
838 #pragma omp barrier
839 #endif
840
841 if (ii == 0)
842 {
843 HYPRE_Int cnt;
844
845 if (num_procs > 1)
846 {
847 hypre_CSRMatrixDestroy(Bs_ext);
848 Bs_ext = NULL;
849 }
850
851 cnt = 0;
852 if (B_ext_offd_size || num_cols_offd_B)
853 {
854 cnt = B_ext_offd_size;
855 for (i = 0; i < num_cols_offd_B; i++)
856 {
857 temp[cnt++] = col_map_offd_B[i];
858 }
859
860 if (cnt)
861 {
862 HYPRE_BigInt value;
863
864 hypre_BigQsort0(temp, 0, cnt-1);
865 num_cols_offd_C = 1;
866 value = temp[0];
867 for (i = 1; i < cnt; i++)
868 {
869 if (temp[i] > value)
870 {
871 value = temp[i];
872 temp[num_cols_offd_C++] = value;
873 }
874 }
875 }
876
877 if (num_cols_offd_C)
878 {
879 col_map_offd_C = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_C, HYPRE_MEMORY_HOST);
880 }
881
882 for (i = 0; i < num_cols_offd_C; i++)
883 {
884 col_map_offd_C[i] = temp[i];
885 }
886
887 hypre_TFree(temp, HYPRE_MEMORY_HOST);
888 }
889 }
890
891
892 #ifdef HYPRE_USING_OPENMP
893 #pragma omp barrier
894 #endif
895
896 for (i = ns; i < ne; i++)
897 {
898 for (j = B_ext_offd_i[i]; j < B_ext_offd_i[i+1]; j++)
899 {
900 B_ext_offd_j[j] = hypre_BigBinarySearch(col_map_offd_C, B_big_offd_j[j],
901 //B_ext_offd_j[j] = hypre_BigBinarySearch(col_map_offd_C, Bs_ext_j[j],
902 num_cols_offd_C);
903 }
904 }
905
906 } /* end parallel region */
907 hypre_TFree(B_big_offd_j, HYPRE_MEMORY_HOST);
908
909 hypre_TFree(my_diag_array, HYPRE_MEMORY_HOST);
910 hypre_TFree(my_offd_array, HYPRE_MEMORY_HOST);
911
912 if (num_cols_offd_B)
913 {
914 HYPRE_Int i, cnt;
915 map_B_to_C = hypre_CTAlloc(HYPRE_Int, num_cols_offd_B, HYPRE_MEMORY_HOST);
916
917 cnt = 0;
918 for (i = 0; i < num_cols_offd_C; i++)
919 {
920 if (col_map_offd_C[i] == col_map_offd_B[cnt])
921 {
922 map_B_to_C[cnt++] = i;
923 if (cnt == num_cols_offd_B) break;
924 }
925 }
926 }
927
928 #endif /* !HYPRE_CONCURRENT_HOPSCOTCH */
929
930 #ifdef HYPRE_PROFILE
931 hypre_profile_times[HYPRE_TIMER_ID_RENUMBER_COLIDX] += hypre_MPI_Wtime();
932 #endif
933
934 HYPRE_ANNOTATE_REGION_BEGIN("%s", "First pass");
935 hypre_ParMatmul_RowSizes(memory_location_C, &C_diag_i, &C_offd_i,
936 rownnz_A, A_diag_i, A_diag_j,
937 A_offd_i, A_offd_j,
938 B_diag_i, B_diag_j,
939 B_offd_i, B_offd_j,
940 B_ext_diag_i, B_ext_diag_j,
941 B_ext_offd_i, B_ext_offd_j, map_B_to_C,
942 &C_diag_size, &C_offd_size,
943 num_rownnz_A, num_rows_diag_A, num_cols_offd_A,
944 allsquare, num_cols_diag_B, num_cols_offd_B,
945 num_cols_offd_C);
946 HYPRE_ANNOTATE_REGION_END("%s", "First pass");
947
948 /*-----------------------------------------------------------------------
949 * Allocate C_diag_data and C_diag_j arrays.
950 * Allocate C_offd_data and C_offd_j arrays.
951 *-----------------------------------------------------------------------*/
952
953 last_col_diag_B = first_col_diag_B + (HYPRE_BigInt)num_cols_diag_B - 1;
954 C_diag_data = hypre_CTAlloc(HYPRE_Complex, C_diag_size, memory_location_C);
955 C_diag_j = hypre_CTAlloc(HYPRE_Int, C_diag_size, memory_location_C);
956 if (C_offd_size)
957 {
958 C_offd_data = hypre_CTAlloc(HYPRE_Complex, C_offd_size, memory_location_C);
959 C_offd_j = hypre_CTAlloc(HYPRE_Int, C_offd_size, memory_location_C);
960 }
961
962 /*-----------------------------------------------------------------------
963 * Second Pass: Fill in C_diag_data and C_diag_j.
964 * Second Pass: Fill in C_offd_data and C_offd_j.
965 *-----------------------------------------------------------------------*/
966
967 HYPRE_ANNOTATE_REGION_BEGIN("%s", "Second pass");
968
969 #ifdef HYPRE_USING_OPENMP
970 #pragma omp parallel
971 #endif
972 {
973 HYPRE_Int *B_marker = NULL;
974 HYPRE_Int ns, ne, size, rest, ii;
975 HYPRE_Int i1, ii1, i2, i3, jj2, jj3;
976 HYPRE_Int jj_row_begin_diag, jj_count_diag;
977 HYPRE_Int jj_row_begin_offd, jj_count_offd;
978 HYPRE_Int num_threads;
979 HYPRE_Complex a_entry; /*, a_b_product;*/
980
981 num_threads = hypre_NumActiveThreads();
982 size = num_rownnz_A/num_threads;
983 rest = num_rownnz_A - size*num_threads;
984
985 ii = hypre_GetThreadNum();
986 if (ii < rest)
987 {
988 ns = ii*size+ii;
989 ne = (ii+1)*size+ii+1;
990 }
991 else
992 {
993 ns = ii*size+rest;
994 ne = (ii+1)*size+rest;
995 }
996 jj_count_diag = C_diag_i[rownnz_A ? rownnz_A[ns] : ns];
997 jj_count_offd = C_offd_i[rownnz_A ? rownnz_A[ns] : ns];
998
999 if (num_cols_diag_B || num_cols_offd_C)
1000 {
1001 B_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_B + num_cols_offd_C,
1002 HYPRE_MEMORY_HOST);
1003 for (i1 = 0; i1 < num_cols_diag_B + num_cols_offd_C; i1++)
1004 {
1005 B_marker[i1] = -1;
1006 }
1007 }
1008
1009 /*-----------------------------------------------------------------------
1010 * Loop over interior c-points.
1011 *-----------------------------------------------------------------------*/
1012 for (i1 = ns; i1 < ne; i1++)
1013 {
1014 jj_row_begin_diag = jj_count_diag;
1015 jj_row_begin_offd = jj_count_offd;
1016 if (rownnz_A)
1017 {
1018 ii1 = rownnz_A[i1];
1019 }
1020 else
1021 {
1022 ii1 = i1;
1023
1024 /*--------------------------------------------------------------------
1025 * Create diagonal entry, C_{i1,i1}
1026 *--------------------------------------------------------------------*/
1027
1028 if (allsquare)
1029 {
1030 B_marker[i1] = jj_count_diag;
1031 C_diag_data[jj_count_diag] = zero;
1032 C_diag_j[jj_count_diag] = i1;
1033 jj_count_diag++;
1034 }
1035 }
1036
1037 /*-----------------------------------------------------------------
1038 * Loop over entries in row i1 of A_offd.
1039 *-----------------------------------------------------------------*/
1040
1041 if (num_cols_offd_A)
1042 {
1043 for (jj2 = A_offd_i[ii1]; jj2 < A_offd_i[ii1+1]; jj2++)
1044 {
1045 i2 = A_offd_j[jj2];
1046 a_entry = A_offd_data[jj2];
1047
1048 /*-----------------------------------------------------------
1049 * Loop over entries in row i2 of B_ext.
1050 *-----------------------------------------------------------*/
1051
1052 for (jj3 = B_ext_offd_i[i2]; jj3 < B_ext_offd_i[i2+1]; jj3++)
1053 {
1054 i3 = num_cols_diag_B+B_ext_offd_j[jj3];
1055
1056 /*--------------------------------------------------------
1057 * Check B_marker to see that C_{ii1,i3} has not already
1058 * been accounted for. If it has not, create a new entry.
1059 * If it has, add new contribution.
1060 *--------------------------------------------------------*/
1061
1062 if (B_marker[i3] < jj_row_begin_offd)
1063 {
1064 B_marker[i3] = jj_count_offd;
1065 C_offd_data[jj_count_offd] = a_entry*B_ext_offd_data[jj3];
1066 C_offd_j[jj_count_offd] = i3-num_cols_diag_B;
1067 jj_count_offd++;
1068 }
1069 else
1070 {
1071 C_offd_data[B_marker[i3]] += a_entry*B_ext_offd_data[jj3];
1072 }
1073 }
1074 for (jj3 = B_ext_diag_i[i2]; jj3 < B_ext_diag_i[i2+1]; jj3++)
1075 {
1076 i3 = B_ext_diag_j[jj3];
1077 if (B_marker[i3] < jj_row_begin_diag)
1078 {
1079 B_marker[i3] = jj_count_diag;
1080 C_diag_data[jj_count_diag] = a_entry*B_ext_diag_data[jj3];
1081 C_diag_j[jj_count_diag] = i3;
1082 jj_count_diag++;
1083 }
1084 else
1085 {
1086 C_diag_data[B_marker[i3]] += a_entry*B_ext_diag_data[jj3];
1087 }
1088 }
1089 }
1090 }
1091
1092 /*-----------------------------------------------------------------
1093 * Loop over entries in row ii1 of A_diag.
1094 *-----------------------------------------------------------------*/
1095
1096 for (jj2 = A_diag_i[ii1]; jj2 < A_diag_i[ii1+1]; jj2++)
1097 {
1098 i2 = A_diag_j[jj2];
1099 a_entry = A_diag_data[jj2];
1100
1101 /*-----------------------------------------------------------
1102 * Loop over entries in row i2 of B_diag.
1103 *-----------------------------------------------------------*/
1104
1105 for (jj3 = B_diag_i[i2]; jj3 < B_diag_i[i2+1]; jj3++)
1106 {
1107 i3 = B_diag_j[jj3];
1108
1109 /*--------------------------------------------------------
1110 * Check B_marker to see that C_{ii1,i3} has not already
1111 * been accounted for. If it has not, create a new entry.
1112 * If it has, add new contribution.
1113 *--------------------------------------------------------*/
1114
1115 if (B_marker[i3] < jj_row_begin_diag)
1116 {
1117 B_marker[i3] = jj_count_diag;
1118 C_diag_data[jj_count_diag] = a_entry*B_diag_data[jj3];
1119 C_diag_j[jj_count_diag] = i3;
1120 jj_count_diag++;
1121 }
1122 else
1123 {
1124 C_diag_data[B_marker[i3]] += a_entry*B_diag_data[jj3];
1125 }
1126 }
1127 if (num_cols_offd_B)
1128 {
1129 for (jj3 = B_offd_i[i2]; jj3 < B_offd_i[i2+1]; jj3++)
1130 {
1131 i3 = num_cols_diag_B+map_B_to_C[B_offd_j[jj3]];
1132
1133 /*--------------------------------------------------------
1134 * Check B_marker to see that C_{ii1,i3} has not already
1135 * been accounted for. If it has not, create a new entry.
1136 * If it has, add new contribution.
1137 *--------------------------------------------------------*/
1138
1139 if (B_marker[i3] < jj_row_begin_offd)
1140 {
1141 B_marker[i3] = jj_count_offd;
1142 C_offd_data[jj_count_offd] = a_entry*B_offd_data[jj3];
1143 C_offd_j[jj_count_offd] = i3-num_cols_diag_B;
1144 jj_count_offd++;
1145 }
1146 else
1147 {
1148 C_offd_data[B_marker[i3]] += a_entry*B_offd_data[jj3];
1149 }
1150 }
1151 }
1152 }
1153 }
1154
1155 hypre_TFree(B_marker, HYPRE_MEMORY_HOST);
1156 } /*end parallel region */
1157 HYPRE_ANNOTATE_REGION_END("%s", "Second pass");
1158
1159 C = hypre_ParCSRMatrixCreate(comm, nrows_A, ncols_B, row_starts_A,
1160 col_starts_B, num_cols_offd_C,
1161 C_diag_size, C_offd_size);
1162
1163 C_diag = hypre_ParCSRMatrixDiag(C);
1164 hypre_CSRMatrixData(C_diag) = C_diag_data;
1165 hypre_CSRMatrixI(C_diag) = C_diag_i;
1166 hypre_CSRMatrixJ(C_diag) = C_diag_j;
1167 hypre_CSRMatrixSetRownnz(C_diag);
1168
1169 C_offd = hypre_ParCSRMatrixOffd(C);
1170 hypre_CSRMatrixI(C_offd) = C_offd_i;
1171 hypre_ParCSRMatrixOffd(C) = C_offd;
1172 if (num_cols_offd_C)
1173 {
1174 hypre_CSRMatrixData(C_offd) = C_offd_data;
1175 hypre_CSRMatrixJ(C_offd) = C_offd_j;
1176 hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;
1177 }
1178 hypre_CSRMatrixSetRownnz(C_offd);
1179
1180 hypre_CSRMatrixMemoryLocation(C_diag) = memory_location_C;
1181 hypre_CSRMatrixMemoryLocation(C_offd) = memory_location_C;
1182
1183 /*-----------------------------------------------------------------------
1184 * Free various arrays
1185 *-----------------------------------------------------------------------*/
1186 hypre_TFree(B_ext_diag_i, HYPRE_MEMORY_HOST);
1187 if (B_ext_diag_size)
1188 {
1189 hypre_TFree(B_ext_diag_j, HYPRE_MEMORY_HOST);
1190 hypre_TFree(B_ext_diag_data, HYPRE_MEMORY_HOST);
1191 }
1192 hypre_TFree(B_ext_offd_i, HYPRE_MEMORY_HOST);
1193 if (B_ext_offd_size)
1194 {
1195 hypre_TFree(B_ext_offd_j, HYPRE_MEMORY_HOST);
1196 hypre_TFree(B_ext_offd_data, HYPRE_MEMORY_HOST);
1197 }
1198 if (num_cols_offd_B)
1199 {
1200 hypre_TFree(map_B_to_C, HYPRE_MEMORY_HOST);
1201 }
1202 hypre_TFree(rownnz_A, HYPRE_MEMORY_HOST);
1203
1204 #ifdef HYPRE_PROFILE
1205 hypre_profile_times[HYPRE_TIMER_ID_MATMUL] += hypre_MPI_Wtime();
1206 #endif
1207
1208 HYPRE_ANNOTATE_FUNC_END;
1209
1210 return C;
1211 }
1212
1213 /* The following function was formerly part of hypre_ParCSRMatrixExtractBExt
1214 but the code was removed so it can be used for a corresponding function
1215 for Boolean matrices
1216
1217 JSP: to allow communication overlapping, it returns comm_handle_idx and
1218 comm_handle_data. Before accessing B, they should be destroyed (including
1219 send_data contained in the comm_handle).
1220 */
1221
hypre_ParCSRMatrixExtractBExt_Arrays_Overlap(HYPRE_Int ** pB_ext_i,HYPRE_BigInt ** pB_ext_j,HYPRE_Complex ** pB_ext_data,HYPRE_BigInt ** pB_ext_row_map,HYPRE_Int * num_nonzeros,HYPRE_Int data,HYPRE_Int find_row_map,MPI_Comm comm,hypre_ParCSRCommPkg * comm_pkg,HYPRE_Int num_cols_B,HYPRE_Int num_recvs,HYPRE_Int num_sends,HYPRE_BigInt first_col_diag,HYPRE_BigInt * row_starts,HYPRE_Int * recv_vec_starts,HYPRE_Int * send_map_starts,HYPRE_Int * send_map_elmts,HYPRE_Int * diag_i,HYPRE_Int * diag_j,HYPRE_Int * offd_i,HYPRE_Int * offd_j,HYPRE_BigInt * col_map_offd,HYPRE_Real * diag_data,HYPRE_Real * offd_data,hypre_ParCSRCommHandle ** comm_handle_idx,hypre_ParCSRCommHandle ** comm_handle_data,HYPRE_Int * CF_marker,HYPRE_Int * CF_marker_offd,HYPRE_Int skip_fine,HYPRE_Int skip_same_sign)1222 void hypre_ParCSRMatrixExtractBExt_Arrays_Overlap(
1223 HYPRE_Int ** pB_ext_i,
1224 HYPRE_BigInt ** pB_ext_j,
1225 HYPRE_Complex ** pB_ext_data,
1226 HYPRE_BigInt ** pB_ext_row_map,
1227 HYPRE_Int * num_nonzeros,
1228 HYPRE_Int data,
1229 HYPRE_Int find_row_map,
1230 MPI_Comm comm,
1231 hypre_ParCSRCommPkg * comm_pkg,
1232 HYPRE_Int num_cols_B,
1233 HYPRE_Int num_recvs,
1234 HYPRE_Int num_sends,
1235 HYPRE_BigInt first_col_diag,
1236 HYPRE_BigInt * row_starts,
1237 HYPRE_Int * recv_vec_starts,
1238 HYPRE_Int * send_map_starts,
1239 HYPRE_Int * send_map_elmts,
1240 HYPRE_Int * diag_i,
1241 HYPRE_Int * diag_j,
1242 HYPRE_Int * offd_i,
1243 HYPRE_Int * offd_j,
1244 HYPRE_BigInt * col_map_offd,
1245 HYPRE_Real * diag_data,
1246 HYPRE_Real * offd_data,
1247 hypre_ParCSRCommHandle **comm_handle_idx,
1248 hypre_ParCSRCommHandle **comm_handle_data,
1249 HYPRE_Int *CF_marker, HYPRE_Int *CF_marker_offd,
1250 HYPRE_Int skip_fine, /* 1 if only coarse points are needed */
1251 HYPRE_Int skip_same_sign /* 1 if only points that have the same sign are needed */
1252 // extended based long range interpolation: skip_fine = 1, skip_same_sign = 0 for S matrix, skip_fine = 1, skip_same_sign = 1 for A matrix
1253 // other interpolation: skip_fine = 0, skip_same_sign = 0
1254 )
1255 {
1256 hypre_ParCSRCommHandle *comm_handle, *row_map_comm_handle = NULL;
1257 hypre_ParCSRCommPkg *tmp_comm_pkg;
1258 HYPRE_Int *B_int_i;
1259 HYPRE_BigInt *B_int_j;
1260 HYPRE_Int *B_ext_i;
1261 HYPRE_BigInt * B_ext_j;
1262 HYPRE_Complex * B_ext_data;
1263 HYPRE_Complex * B_int_data;
1264 HYPRE_BigInt * B_int_row_map;
1265 HYPRE_BigInt * B_ext_row_map;
1266 HYPRE_Int num_procs, my_id;
1267 HYPRE_Int *jdata_recv_vec_starts;
1268 HYPRE_Int *jdata_send_map_starts;
1269
1270 HYPRE_Int i, j, k;
1271 HYPRE_Int start_index;
1272 /*HYPRE_Int jrow;*/
1273 HYPRE_Int num_rows_B_ext;
1274 HYPRE_Int *prefix_sum_workspace;
1275
1276 hypre_MPI_Comm_size(comm,&num_procs);
1277 hypre_MPI_Comm_rank(comm,&my_id);
1278
1279 HYPRE_BigInt first_row_index = row_starts[0];
1280
1281 num_rows_B_ext = recv_vec_starts[num_recvs];
1282 if ( num_rows_B_ext < 0 ) { /* no B_ext, no communication */
1283 *pB_ext_i = NULL;
1284 *pB_ext_j = NULL;
1285 if ( data ) *pB_ext_data = NULL;
1286 if ( find_row_map ) *pB_ext_row_map = NULL;
1287 *num_nonzeros = 0;
1288 return;
1289 };
1290 B_int_i = hypre_CTAlloc(HYPRE_Int, send_map_starts[num_sends]+1, HYPRE_MEMORY_HOST);
1291 B_ext_i = hypre_CTAlloc(HYPRE_Int, num_rows_B_ext+1, HYPRE_MEMORY_HOST);
1292 *pB_ext_i = B_ext_i;
1293 if ( find_row_map ) {
1294 B_int_row_map = hypre_CTAlloc( HYPRE_BigInt, send_map_starts[num_sends]+1 , HYPRE_MEMORY_HOST);
1295 B_ext_row_map = hypre_CTAlloc( HYPRE_BigInt, num_rows_B_ext+1 , HYPRE_MEMORY_HOST);
1296 *pB_ext_row_map = B_ext_row_map;
1297 };
1298
1299 /*--------------------------------------------------------------------------
1300 * generate B_int_i through adding number of row-elements of offd and diag
1301 * for corresponding rows. B_int_i[j+1] contains the number of elements of
1302 * a row j (which is determined through send_map_elmts)
1303 *--------------------------------------------------------------------------*/
1304
1305 jdata_send_map_starts = hypre_CTAlloc(HYPRE_Int, num_sends+1, HYPRE_MEMORY_HOST);
1306 jdata_recv_vec_starts = hypre_CTAlloc(HYPRE_Int, num_recvs+1, HYPRE_MEMORY_HOST);
1307 jdata_send_map_starts[0] = B_int_i[0] = 0;
1308
1309 /*HYPRE_Int prefix_sum_workspace[(hypre_NumThreads() + 1)*num_sends];*/
1310 prefix_sum_workspace = hypre_TAlloc(HYPRE_Int, (hypre_NumThreads() + 1)*num_sends, HYPRE_MEMORY_HOST);
1311
1312 #ifdef HYPRE_USING_OPENMP
1313 #pragma omp parallel private(i,j,k)
1314 #endif
1315 {
1316 /*HYPRE_Int counts[num_sends];*/
1317 HYPRE_Int *counts;
1318 counts = hypre_TAlloc(HYPRE_Int, num_sends, HYPRE_MEMORY_HOST);
1319 for (i=0; i < num_sends; i++)
1320 {
1321 HYPRE_Int j_begin, j_end;
1322 hypre_GetSimpleThreadPartition(&j_begin, &j_end, send_map_starts[i + 1] - send_map_starts[i]);
1323 j_begin += send_map_starts[i];
1324 j_end += send_map_starts[i];
1325
1326 HYPRE_Int count = 0;
1327 if (skip_fine && skip_same_sign)
1328 {
1329 for (j = j_begin; j < j_end; j++)
1330 {
1331 HYPRE_Int jrow = send_map_elmts[j];
1332 HYPRE_Int len = 0;
1333
1334 if (diag_data[diag_i[jrow]] >= 0)
1335 {
1336 for (k = diag_i[jrow] + 1; k < diag_i[jrow + 1]; k++)
1337 {
1338 if (diag_data[k] < 0 && CF_marker[diag_j[k]] >= 0) len++;
1339 }
1340 for (k = offd_i[jrow]; k < offd_i[jrow + 1]; k++)
1341 {
1342 if (offd_data[k] < 0) len++;
1343 }
1344 }
1345 else
1346 {
1347 for (k = diag_i[jrow] + 1; k < diag_i[jrow + 1]; k++)
1348 {
1349 if (diag_data[k] > 0 && CF_marker[diag_j[k]] >= 0) len++;
1350 }
1351 for (k = offd_i[jrow]; k < offd_i[jrow + 1]; k++)
1352 {
1353 if (offd_data[k] > 0) len++;
1354 }
1355 }
1356
1357 B_int_i[j + 1] = len;
1358 count += len;
1359 }
1360 }
1361 else if (skip_fine)
1362 {
1363 for (j = j_begin; j < j_end; j++)
1364 {
1365 HYPRE_Int jrow = send_map_elmts[j];
1366 HYPRE_Int len = 0;
1367
1368 for (k = diag_i[jrow]; k < diag_i[jrow + 1]; k++)
1369 {
1370 if (CF_marker[diag_j[k]] >= 0) len++;
1371 }
1372 for (k = offd_i[jrow]; k < offd_i[jrow + 1]; k++)
1373 {
1374 if (CF_marker_offd[offd_j[k]] >= 0) len++;
1375 }
1376
1377 B_int_i[j + 1] = len;
1378 count += len;
1379 }
1380 }
1381 else
1382 {
1383 for (j = j_begin; j < j_end; j++)
1384 {
1385 HYPRE_Int jrow = send_map_elmts[j];
1386 HYPRE_Int len = diag_i[jrow + 1] - diag_i[jrow];
1387 len += offd_i[jrow + 1] - offd_i[jrow];
1388 B_int_i[j + 1] = len;
1389 count += len;
1390 }
1391 }
1392
1393 if (find_row_map)
1394 {
1395 for (j = j_begin; j < j_end; j++)
1396 {
1397 HYPRE_Int jrow = send_map_elmts[j];
1398 B_int_row_map[j] = (HYPRE_BigInt)jrow + first_row_index;
1399 }
1400 }
1401
1402 counts[i] = count;
1403 }
1404
1405 hypre_prefix_sum_multiple(counts, jdata_send_map_starts + 1, num_sends, prefix_sum_workspace);
1406
1407 #ifdef HYPRE_USING_OPENMP
1408 #pragma omp master
1409 #endif
1410 {
1411 for (i = 1; i < num_sends; i++)
1412 {
1413 jdata_send_map_starts[i + 1] += jdata_send_map_starts[i];
1414 }
1415
1416 /*--------------------------------------------------------------------------
1417 * initialize communication
1418 *--------------------------------------------------------------------------*/
1419
1420 comm_handle = hypre_ParCSRCommHandleCreate(11,comm_pkg,
1421 &B_int_i[1],&(B_ext_i[1]) );
1422 if ( find_row_map )
1423 {
1424 /* scatter/gather B_int row numbers to form array of B_ext row numbers */
1425 row_map_comm_handle = hypre_ParCSRCommHandleCreate
1426 (21,comm_pkg, B_int_row_map, B_ext_row_map );
1427 }
1428
1429 B_int_j = hypre_TAlloc(HYPRE_BigInt, jdata_send_map_starts[num_sends], HYPRE_MEMORY_HOST);
1430 if (data) B_int_data = hypre_TAlloc(HYPRE_Complex, jdata_send_map_starts[num_sends], HYPRE_MEMORY_HOST);
1431 }
1432 #ifdef HYPRE_USING_OPENMP
1433 #pragma omp barrier
1434 #endif
1435
1436 for (i = 0; i < num_sends; i++)
1437 {
1438 HYPRE_Int j_begin, j_end;
1439 hypre_GetSimpleThreadPartition(&j_begin, &j_end, send_map_starts[i + 1] - send_map_starts[i]);
1440 j_begin += send_map_starts[i];
1441 j_end += send_map_starts[i];
1442
1443 HYPRE_Int count = counts[i] + jdata_send_map_starts[i];
1444
1445 if (data)
1446 {
1447 if (skip_same_sign && skip_fine)
1448 {
1449 for (j = j_begin; j < j_end; j++)
1450 {
1451 HYPRE_Int jrow = send_map_elmts[j];
1452 /*HYPRE_Int count_begin = count;*/
1453
1454 if (diag_data[diag_i[jrow]] >= 0)
1455 {
1456 for (k = diag_i[jrow] + 1; k < diag_i[jrow + 1]; k++)
1457 {
1458 if (diag_data[k] < 0 && CF_marker[diag_j[k]] >= 0)
1459 {
1460 B_int_j[count] = (HYPRE_BigInt)diag_j[k]+first_col_diag;
1461 B_int_data[count] = diag_data[k];
1462 count++;
1463 }
1464 }
1465 for (k = offd_i[jrow]; k < offd_i[jrow + 1]; k++)
1466 {
1467 HYPRE_Int c = offd_j[k];
1468 HYPRE_BigInt c_global = col_map_offd[c];
1469 if (offd_data[k] < 0)
1470 {
1471 B_int_j[count] = c_global;
1472 B_int_data[count] = offd_data[k];
1473 count++;
1474 }
1475 }
1476 }
1477 else
1478 {
1479 for (k = diag_i[jrow] + 1; k < diag_i[jrow + 1]; k++)
1480 {
1481 if (diag_data[k] > 0 && CF_marker[diag_j[k]] >= 0)
1482 {
1483 B_int_j[count] = (HYPRE_BigInt)diag_j[k]+first_col_diag;
1484 B_int_data[count] = diag_data[k];
1485 count++;
1486 }
1487 }
1488 for (k = offd_i[jrow]; k < offd_i[jrow + 1]; k++)
1489 {
1490 HYPRE_Int c = offd_j[k];
1491 HYPRE_BigInt c_global = col_map_offd[c];
1492 if (offd_data[k] > 0)
1493 {
1494 B_int_j[count] = c_global;
1495 B_int_data[count] = offd_data[k];
1496 count++;
1497 }
1498 }
1499 }
1500 }
1501 }
1502 else
1503 {
1504 for (j = j_begin; j < j_end; ++j) {
1505 HYPRE_Int jrow = send_map_elmts[j];
1506 for (k = diag_i[jrow]; k < diag_i[jrow+1]; k++)
1507 {
1508 B_int_j[count] = (HYPRE_BigInt)diag_j[k]+first_col_diag;
1509 B_int_data[count] = diag_data[k];
1510 count++;
1511 }
1512 for (k = offd_i[jrow]; k < offd_i[jrow+1]; k++)
1513 {
1514 B_int_j[count] = col_map_offd[offd_j[k]];
1515 B_int_data[count] = offd_data[k];
1516 count++;
1517 }
1518 }
1519 }
1520 } // data
1521 else
1522 {
1523 if (skip_fine)
1524 {
1525 for (j = j_begin; j < j_end; j++)
1526 {
1527 HYPRE_Int jrow = send_map_elmts[j];
1528 for (k = diag_i[jrow]; k < diag_i[jrow + 1]; k++)
1529 {
1530 if (CF_marker[diag_j[k]] >= 0)
1531 {
1532 B_int_j[count] = (HYPRE_BigInt)diag_j[k] + first_col_diag;
1533 count++;
1534 }
1535 }
1536 for (k = offd_i[jrow]; k < offd_i[jrow + 1]; k++)
1537 {
1538 if (CF_marker_offd[offd_j[k]] >= 0)
1539 {
1540 B_int_j[count] = col_map_offd[offd_j[k]];
1541 count++;
1542 }
1543 }
1544 }
1545 }
1546 else
1547 {
1548 for (j = j_begin; j < j_end; ++j) {
1549 HYPRE_Int jrow = send_map_elmts[j];
1550 for (k = diag_i[jrow]; k < diag_i[jrow+1]; k++)
1551 {
1552 B_int_j[count] = (HYPRE_BigInt)diag_j[k]+first_col_diag;
1553 count++;
1554 }
1555 for (k = offd_i[jrow]; k < offd_i[jrow+1]; k++)
1556 {
1557 B_int_j[count] = col_map_offd[offd_j[k]];
1558 count++;
1559 }
1560 }
1561 }
1562 } // !data
1563 } /* for each send target */
1564 hypre_TFree(counts, HYPRE_MEMORY_HOST);
1565 } /* omp parallel. JSP: this takes most of time in this function */
1566 hypre_TFree(prefix_sum_workspace, HYPRE_MEMORY_HOST);
1567
1568 tmp_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg, 1, HYPRE_MEMORY_HOST);
1569 hypre_ParCSRCommPkgComm(tmp_comm_pkg) = comm;
1570 hypre_ParCSRCommPkgNumSends(tmp_comm_pkg) = num_sends;
1571 hypre_ParCSRCommPkgNumRecvs(tmp_comm_pkg) = num_recvs;
1572 hypre_ParCSRCommPkgSendProcs(tmp_comm_pkg) =
1573 hypre_ParCSRCommPkgSendProcs(comm_pkg);
1574 hypre_ParCSRCommPkgRecvProcs(tmp_comm_pkg) =
1575 hypre_ParCSRCommPkgRecvProcs(comm_pkg);
1576 hypre_ParCSRCommPkgSendMapStarts(tmp_comm_pkg) = jdata_send_map_starts;
1577
1578 hypre_ParCSRCommHandleDestroy(comm_handle);
1579 comm_handle = NULL;
1580
1581 /*--------------------------------------------------------------------------
1582 * after communication exchange B_ext_i[j+1] contains the number of elements
1583 * of a row j !
1584 * evaluate B_ext_i and compute *num_nonzeros for B_ext
1585 *--------------------------------------------------------------------------*/
1586
1587 for (i = 0; i < num_recvs; i++)
1588 {
1589 for (j = recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++)
1590 {
1591 B_ext_i[j+1] += B_ext_i[j];
1592 }
1593 }
1594
1595 *num_nonzeros = B_ext_i[num_rows_B_ext];
1596
1597 *pB_ext_j = hypre_TAlloc(HYPRE_BigInt, *num_nonzeros, HYPRE_MEMORY_HOST);
1598 B_ext_j = *pB_ext_j;
1599 if (data)
1600 {
1601 *pB_ext_data = hypre_TAlloc(HYPRE_Complex, *num_nonzeros, HYPRE_MEMORY_HOST);
1602 B_ext_data = *pB_ext_data;
1603 }
1604
1605 for (i = 0; i < num_recvs; i++)
1606 {
1607 start_index = B_ext_i[recv_vec_starts[i]];
1608 *num_nonzeros = B_ext_i[recv_vec_starts[i+1]]-start_index;
1609 jdata_recv_vec_starts[i+1] = B_ext_i[recv_vec_starts[i+1]];
1610 }
1611
1612 hypre_ParCSRCommPkgRecvVecStarts(tmp_comm_pkg) = jdata_recv_vec_starts;
1613
1614 *comm_handle_idx = hypre_ParCSRCommHandleCreate(21,tmp_comm_pkg,B_int_j,B_ext_j);
1615 if (data)
1616 {
1617 *comm_handle_data = hypre_ParCSRCommHandleCreate(1,tmp_comm_pkg,B_int_data,
1618 B_ext_data);
1619 }
1620
1621 if (row_map_comm_handle)
1622 {
1623 hypre_ParCSRCommHandleDestroy(row_map_comm_handle);
1624 row_map_comm_handle = NULL;
1625 }
1626
1627 hypre_TFree(jdata_send_map_starts, HYPRE_MEMORY_HOST);
1628 hypre_TFree(jdata_recv_vec_starts, HYPRE_MEMORY_HOST);
1629 hypre_TFree(tmp_comm_pkg, HYPRE_MEMORY_HOST);
1630 hypre_TFree(B_int_i, HYPRE_MEMORY_HOST);
1631 if ( find_row_map ) hypre_TFree(B_int_row_map, HYPRE_MEMORY_HOST);
1632
1633 /* end generic part */
1634 }
1635
hypre_ParCSRMatrixExtractBExt_Arrays(HYPRE_Int ** pB_ext_i,HYPRE_BigInt ** pB_ext_j,HYPRE_Complex ** pB_ext_data,HYPRE_BigInt ** pB_ext_row_map,HYPRE_Int * num_nonzeros,HYPRE_Int data,HYPRE_Int find_row_map,MPI_Comm comm,hypre_ParCSRCommPkg * comm_pkg,HYPRE_Int num_cols_B,HYPRE_Int num_recvs,HYPRE_Int num_sends,HYPRE_BigInt first_col_diag,HYPRE_BigInt * row_starts,HYPRE_Int * recv_vec_starts,HYPRE_Int * send_map_starts,HYPRE_Int * send_map_elmts,HYPRE_Int * diag_i,HYPRE_Int * diag_j,HYPRE_Int * offd_i,HYPRE_Int * offd_j,HYPRE_BigInt * col_map_offd,HYPRE_Real * diag_data,HYPRE_Real * offd_data)1636 void hypre_ParCSRMatrixExtractBExt_Arrays(
1637 HYPRE_Int ** pB_ext_i,
1638 HYPRE_BigInt ** pB_ext_j,
1639 HYPRE_Complex ** pB_ext_data,
1640 HYPRE_BigInt ** pB_ext_row_map,
1641 HYPRE_Int * num_nonzeros,
1642 HYPRE_Int data,
1643 HYPRE_Int find_row_map,
1644 MPI_Comm comm,
1645 hypre_ParCSRCommPkg * comm_pkg,
1646 HYPRE_Int num_cols_B,
1647 HYPRE_Int num_recvs,
1648 HYPRE_Int num_sends,
1649 HYPRE_BigInt first_col_diag,
1650 HYPRE_BigInt * row_starts,
1651 HYPRE_Int * recv_vec_starts,
1652 HYPRE_Int * send_map_starts,
1653 HYPRE_Int * send_map_elmts,
1654 HYPRE_Int * diag_i,
1655 HYPRE_Int * diag_j,
1656 HYPRE_Int * offd_i,
1657 HYPRE_Int * offd_j,
1658 HYPRE_BigInt * col_map_offd,
1659 HYPRE_Real * diag_data,
1660 HYPRE_Real * offd_data
1661 )
1662 {
1663 hypre_ParCSRCommHandle *comm_handle_idx, *comm_handle_data;
1664
1665 hypre_ParCSRMatrixExtractBExt_Arrays_Overlap(
1666 pB_ext_i, pB_ext_j, pB_ext_data, pB_ext_row_map, num_nonzeros,
1667 data, find_row_map, comm, comm_pkg, num_cols_B, num_recvs, num_sends,
1668 first_col_diag, row_starts, recv_vec_starts, send_map_starts, send_map_elmts,
1669 diag_i, diag_j, offd_i, offd_j, col_map_offd, diag_data, offd_data,
1670 &comm_handle_idx, &comm_handle_data,
1671 NULL, NULL,
1672 0, 0);
1673
1674 HYPRE_Int *send_idx = (HYPRE_Int *)comm_handle_idx->send_data;
1675 hypre_ParCSRCommHandleDestroy(comm_handle_idx);
1676 hypre_TFree(send_idx, HYPRE_MEMORY_HOST);
1677
1678 if (data)
1679 {
1680 HYPRE_Real *send_data = (HYPRE_Real *)comm_handle_data->send_data;
1681 hypre_ParCSRCommHandleDestroy(comm_handle_data);
1682 hypre_TFree(send_data, HYPRE_MEMORY_HOST);
1683 }
1684 }
1685
1686 /*--------------------------------------------------------------------------
1687 * hypre_ParCSRMatrixExtractBExt : extracts rows from B which are located on
1688 * other processors and needed for multiplication with A locally. The rows
1689 * are returned as CSRMatrix.
1690 *--------------------------------------------------------------------------*/
1691
1692 hypre_CSRMatrix *
hypre_ParCSRMatrixExtractBExt_Overlap(hypre_ParCSRMatrix * B,hypre_ParCSRMatrix * A,HYPRE_Int data,hypre_ParCSRCommHandle ** comm_handle_idx,hypre_ParCSRCommHandle ** comm_handle_data,HYPRE_Int * CF_marker,HYPRE_Int * CF_marker_offd,HYPRE_Int skip_fine,HYPRE_Int skip_same_sign)1693 hypre_ParCSRMatrixExtractBExt_Overlap( hypre_ParCSRMatrix *B,
1694 hypre_ParCSRMatrix *A,
1695 HYPRE_Int data,
1696 hypre_ParCSRCommHandle **comm_handle_idx,
1697 hypre_ParCSRCommHandle **comm_handle_data,
1698 HYPRE_Int *CF_marker, HYPRE_Int *CF_marker_offd,
1699 HYPRE_Int skip_fine, HYPRE_Int skip_same_sign )
1700 {
1701 MPI_Comm comm = hypre_ParCSRMatrixComm(B);
1702 HYPRE_BigInt first_col_diag = hypre_ParCSRMatrixFirstColDiag(B);
1703 /*HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(B);*/
1704 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(B);
1705
1706 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1707 HYPRE_Int num_recvs;
1708 HYPRE_Int *recv_vec_starts;
1709 HYPRE_Int num_sends;
1710 HYPRE_Int *send_map_starts;
1711 HYPRE_Int *send_map_elmts;
1712
1713 hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(B);
1714
1715 HYPRE_Int *diag_i = hypre_CSRMatrixI(diag);
1716 HYPRE_Int *diag_j = hypre_CSRMatrixJ(diag);
1717 HYPRE_Real *diag_data = hypre_CSRMatrixData(diag);
1718
1719 hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(B);
1720
1721 HYPRE_Int *offd_i = hypre_CSRMatrixI(offd);
1722 HYPRE_Int *offd_j = hypre_CSRMatrixJ(offd);
1723 HYPRE_Real *offd_data = hypre_CSRMatrixData(offd);
1724
1725 HYPRE_Int num_cols_B, num_nonzeros;
1726 HYPRE_Int num_rows_B_ext;
1727
1728 hypre_CSRMatrix *B_ext;
1729
1730 HYPRE_Int *B_ext_i;
1731 HYPRE_BigInt *B_ext_j;
1732 HYPRE_Complex *B_ext_data;
1733 HYPRE_BigInt *idummy;
1734
1735 /*---------------------------------------------------------------------
1736 * If there exists no CommPkg for A, a CommPkg is generated using
1737 * equally load balanced partitionings
1738 *--------------------------------------------------------------------*/
1739
1740 if (!hypre_ParCSRMatrixCommPkg(A))
1741 {
1742 hypre_MatvecCommPkgCreate(A);
1743 }
1744
1745 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1746 num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
1747 recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
1748 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1749 send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
1750 send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
1751
1752 num_cols_B = hypre_ParCSRMatrixGlobalNumCols(B);
1753 num_rows_B_ext = recv_vec_starts[num_recvs];
1754
1755 hypre_ParCSRMatrixExtractBExt_Arrays_Overlap
1756 ( &B_ext_i, &B_ext_j, &B_ext_data, &idummy,
1757 &num_nonzeros,
1758 data, 0, comm, comm_pkg,
1759 num_cols_B, num_recvs, num_sends,
1760 first_col_diag, B->row_starts,
1761 recv_vec_starts, send_map_starts, send_map_elmts,
1762 diag_i, diag_j, offd_i, offd_j, col_map_offd,
1763 diag_data, offd_data,
1764 comm_handle_idx, comm_handle_data,
1765 CF_marker, CF_marker_offd,
1766 skip_fine, skip_same_sign
1767 );
1768
1769 B_ext = hypre_CSRMatrixCreate(num_rows_B_ext,num_cols_B,num_nonzeros);
1770 hypre_CSRMatrixMemoryLocation(B_ext) = HYPRE_MEMORY_HOST;
1771 hypre_CSRMatrixI(B_ext) = B_ext_i;
1772 hypre_CSRMatrixBigJ(B_ext) = B_ext_j;
1773 if (data) hypre_CSRMatrixData(B_ext) = B_ext_data;
1774
1775 return B_ext;
1776 }
1777
1778 hypre_CSRMatrix *
hypre_ParCSRMatrixExtractBExt(hypre_ParCSRMatrix * B,hypre_ParCSRMatrix * A,HYPRE_Int want_data)1779 hypre_ParCSRMatrixExtractBExt( hypre_ParCSRMatrix *B,
1780 hypre_ParCSRMatrix *A,
1781 HYPRE_Int want_data )
1782 {
1783 #if 0
1784 hypre_ParCSRCommHandle *comm_handle_idx, *comm_handle_data;
1785
1786 hypre_CSRMatrix *B_ext = hypre_ParCSRMatrixExtractBExt_Overlap(B, A, want_data, &comm_handle_idx, &comm_handle_data, NULL, NULL, 0, 0);
1787
1788 HYPRE_Int *send_idx = (HYPRE_Int *)comm_handle_idx->send_data;
1789 hypre_ParCSRCommHandleDestroy(comm_handle_idx);
1790 hypre_TFree(send_idx, HYPRE_MEMORY_HOST);
1791
1792 if (want_data)
1793 {
1794 HYPRE_Real *send_data = (HYPRE_Real *)comm_handle_data->send_data;
1795 hypre_ParCSRCommHandleDestroy(comm_handle_data);
1796 hypre_TFree(send_data, HYPRE_MEMORY_HOST);
1797 }
1798 #else
1799 hypre_assert( hypre_CSRMatrixMemoryLocation(hypre_ParCSRMatrixDiag(B)) ==
1800 hypre_CSRMatrixMemoryLocation(hypre_ParCSRMatrixOffd(B)) );
1801
1802 hypre_CSRMatrix *B_ext;
1803 void *request;
1804
1805 if (!hypre_ParCSRMatrixCommPkg(A))
1806 {
1807 hypre_MatvecCommPkgCreate(A);
1808 }
1809
1810 hypre_ParcsrGetExternalRowsInit(B,
1811 hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A)),
1812 hypre_ParCSRMatrixColMapOffd(A),
1813 hypre_ParCSRMatrixCommPkg(A),
1814 want_data,
1815 &request);
1816
1817 B_ext = hypre_ParcsrGetExternalRowsWait(request);
1818 #endif
1819
1820 return B_ext;
1821 }
1822
1823 /*--------------------------------------------------------------------------
1824 * hypre_ParCSRMatrixTransposeHost
1825 *--------------------------------------------------------------------------*/
1826
1827 HYPRE_Int
hypre_ParCSRMatrixTransposeHost(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix ** AT_ptr,HYPRE_Int data)1828 hypre_ParCSRMatrixTransposeHost( hypre_ParCSRMatrix *A,
1829 hypre_ParCSRMatrix **AT_ptr,
1830 HYPRE_Int data )
1831 {
1832 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
1833 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1834 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
1835 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
1836 HYPRE_Int num_cols = hypre_ParCSRMatrixNumCols(A);
1837 HYPRE_BigInt first_row_index = hypre_ParCSRMatrixFirstRowIndex(A);
1838 HYPRE_BigInt *row_starts = hypre_ParCSRMatrixRowStarts(A);
1839 HYPRE_BigInt *col_starts = hypre_ParCSRMatrixColStarts(A);
1840
1841 HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
1842 HYPRE_Int num_sends, num_recvs, num_cols_offd_AT;
1843 HYPRE_Int i, j, k, index, counter, j_row;
1844 HYPRE_BigInt value;
1845
1846 hypre_ParCSRMatrix *AT;
1847 hypre_CSRMatrix *AT_diag;
1848 hypre_CSRMatrix *AT_offd;
1849 hypre_CSRMatrix *AT_tmp;
1850
1851 HYPRE_BigInt first_row_index_AT, first_col_diag_AT;
1852 HYPRE_Int local_num_rows_AT, local_num_cols_AT;
1853
1854 HYPRE_Int *AT_tmp_i;
1855 HYPRE_Int *AT_tmp_j;
1856 HYPRE_BigInt *AT_big_j = NULL;
1857 HYPRE_Complex *AT_tmp_data;
1858
1859 HYPRE_Int *AT_buf_i;
1860 HYPRE_BigInt *AT_buf_j;
1861 HYPRE_Complex *AT_buf_data;
1862
1863 HYPRE_Int *AT_offd_i;
1864 HYPRE_Int *AT_offd_j;
1865 HYPRE_Complex *AT_offd_data;
1866 HYPRE_BigInt *col_map_offd_AT;
1867 HYPRE_BigInt row_starts_AT[2];
1868 HYPRE_BigInt col_starts_AT[2];
1869
1870 HYPRE_Int num_procs, my_id;
1871
1872 HYPRE_Int *recv_procs, *send_procs;
1873 HYPRE_Int *recv_vec_starts;
1874 HYPRE_Int *send_map_starts;
1875 HYPRE_Int *send_map_elmts;
1876 HYPRE_Int *tmp_recv_vec_starts;
1877 HYPRE_Int *tmp_send_map_starts;
1878 hypre_ParCSRCommPkg *tmp_comm_pkg;
1879 hypre_ParCSRCommHandle *comm_handle;
1880
1881 hypre_MPI_Comm_size(comm,&num_procs);
1882 hypre_MPI_Comm_rank(comm,&my_id);
1883
1884 num_cols_offd_AT = 0;
1885 counter = 0;
1886 AT_offd_j = NULL;
1887 AT_offd_data = NULL;
1888 col_map_offd_AT = NULL;
1889
1890 HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A);
1891
1892 /*---------------------------------------------------------------------
1893 * If there exists no CommPkg for A, a CommPkg is generated using
1894 * equally load balanced partitionings
1895 *--------------------------------------------------------------------*/
1896
1897 if (!comm_pkg)
1898 {
1899 hypre_MatvecCommPkgCreate(A);
1900 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1901 }
1902
1903 if (num_procs > 1)
1904 {
1905 hypre_CSRMatrixTranspose (A_offd, &AT_tmp, data);
1906
1907 AT_tmp_i = hypre_CSRMatrixI(AT_tmp);
1908 AT_tmp_j = hypre_CSRMatrixJ(AT_tmp);
1909 if (data)
1910 {
1911 AT_tmp_data = hypre_CSRMatrixData(AT_tmp);
1912 }
1913
1914 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1915 num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
1916 recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
1917 send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
1918 recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
1919 send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
1920 send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
1921
1922 AT_buf_i = hypre_CTAlloc(HYPRE_Int, send_map_starts[num_sends], HYPRE_MEMORY_HOST);
1923 if (AT_tmp_i[num_cols_offd])
1924 {
1925 AT_big_j = hypre_CTAlloc(HYPRE_BigInt, AT_tmp_i[num_cols_offd], HYPRE_MEMORY_HOST);
1926 }
1927
1928 for (i = 0; i < AT_tmp_i[num_cols_offd]; i++)
1929 {
1930 //AT_tmp_j[i] += first_row_index;
1931 AT_big_j[i] = (HYPRE_BigInt)AT_tmp_j[i]+first_row_index;
1932 }
1933
1934 for (i = 0; i < num_cols_offd; i++)
1935 {
1936 AT_tmp_i[i] = AT_tmp_i[i+1]-AT_tmp_i[i];
1937 }
1938
1939 comm_handle = hypre_ParCSRCommHandleCreate(12, comm_pkg, AT_tmp_i, AT_buf_i);
1940 }
1941
1942 hypre_CSRMatrixTranspose(A_diag, &AT_diag, data);
1943
1944 AT_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols+1, memory_location);
1945
1946 if (num_procs > 1)
1947 {
1948 hypre_ParCSRCommHandleDestroy(comm_handle);
1949 comm_handle = NULL;
1950
1951 tmp_send_map_starts = hypre_CTAlloc(HYPRE_Int, num_sends+1, HYPRE_MEMORY_HOST);
1952 tmp_recv_vec_starts = hypre_CTAlloc(HYPRE_Int, num_recvs+1, HYPRE_MEMORY_HOST);
1953
1954 tmp_send_map_starts[0] = send_map_starts[0];
1955 for (i = 0; i < num_sends; i++)
1956 {
1957 tmp_send_map_starts[i+1] = tmp_send_map_starts[i];
1958 for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
1959 {
1960 tmp_send_map_starts[i+1] += AT_buf_i[j];
1961 AT_offd_i[send_map_elmts[j]+1] += AT_buf_i[j];
1962 }
1963 }
1964 for (i = 0; i < num_cols; i++)
1965 {
1966 AT_offd_i[i+1] += AT_offd_i[i];
1967 }
1968
1969 tmp_recv_vec_starts[0] = recv_vec_starts[0];
1970 for (i = 0; i < num_recvs; i++)
1971 {
1972 tmp_recv_vec_starts[i+1] = tmp_recv_vec_starts[i];
1973 for (j = recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++)
1974 {
1975 tmp_recv_vec_starts[i+1] += AT_tmp_i[j];
1976 }
1977 }
1978
1979 tmp_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg, 1, HYPRE_MEMORY_HOST);
1980 hypre_ParCSRCommPkgComm(tmp_comm_pkg) = comm;
1981 hypre_ParCSRCommPkgNumSends(tmp_comm_pkg) = num_sends;
1982 hypre_ParCSRCommPkgNumRecvs(tmp_comm_pkg) = num_recvs;
1983 hypre_ParCSRCommPkgRecvProcs(tmp_comm_pkg) = recv_procs;
1984 hypre_ParCSRCommPkgSendProcs(tmp_comm_pkg) = send_procs;
1985 hypre_ParCSRCommPkgRecvVecStarts(tmp_comm_pkg) = tmp_recv_vec_starts;
1986 hypre_ParCSRCommPkgSendMapStarts(tmp_comm_pkg) = tmp_send_map_starts;
1987
1988 AT_buf_j = hypre_CTAlloc(HYPRE_BigInt, tmp_send_map_starts[num_sends], HYPRE_MEMORY_HOST);
1989 comm_handle = hypre_ParCSRCommHandleCreate(22, tmp_comm_pkg, AT_big_j,
1990 AT_buf_j);
1991 hypre_ParCSRCommHandleDestroy(comm_handle);
1992 comm_handle = NULL;
1993 hypre_TFree(AT_big_j, HYPRE_MEMORY_HOST);
1994
1995 if (data)
1996 {
1997 AT_buf_data = hypre_CTAlloc(HYPRE_Complex, tmp_send_map_starts[num_sends], HYPRE_MEMORY_HOST);
1998 comm_handle = hypre_ParCSRCommHandleCreate(2,tmp_comm_pkg,AT_tmp_data,
1999 AT_buf_data);
2000 hypre_ParCSRCommHandleDestroy(comm_handle);
2001 comm_handle = NULL;
2002 }
2003
2004 hypre_TFree(tmp_recv_vec_starts, HYPRE_MEMORY_HOST);
2005 hypre_TFree(tmp_send_map_starts, HYPRE_MEMORY_HOST);
2006 hypre_TFree(tmp_comm_pkg, HYPRE_MEMORY_HOST);
2007 hypre_CSRMatrixDestroy(AT_tmp);
2008
2009 if (AT_offd_i[num_cols])
2010 {
2011 AT_offd_j = hypre_CTAlloc(HYPRE_Int, AT_offd_i[num_cols], memory_location);
2012 AT_big_j = hypre_CTAlloc(HYPRE_BigInt, AT_offd_i[num_cols], HYPRE_MEMORY_HOST);
2013 if (data)
2014 {
2015 AT_offd_data = hypre_CTAlloc(HYPRE_Complex, AT_offd_i[num_cols], memory_location);
2016 }
2017 }
2018 else
2019 {
2020 AT_offd_j = NULL;
2021 AT_offd_data = NULL;
2022 }
2023
2024 counter = 0;
2025 for (i = 0; i < num_sends; i++)
2026 {
2027 for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
2028 {
2029 j_row = send_map_elmts[j];
2030 index = AT_offd_i[j_row];
2031 for (k = 0; k < AT_buf_i[j]; k++)
2032 {
2033 if (data)
2034 {
2035 AT_offd_data[index] = AT_buf_data[counter];
2036 }
2037 AT_big_j[index++] = AT_buf_j[counter++];
2038 }
2039 AT_offd_i[j_row] = index;
2040 }
2041 }
2042 for (i = num_cols; i > 0; i--)
2043 {
2044 AT_offd_i[i] = AT_offd_i[i-1];
2045 }
2046 AT_offd_i[0] = 0;
2047
2048 if (counter)
2049 {
2050 hypre_BigQsort0(AT_buf_j,0,counter-1);
2051 num_cols_offd_AT = 1;
2052 value = AT_buf_j[0];
2053 for (i = 1; i < counter; i++)
2054 {
2055 if (value < AT_buf_j[i])
2056 {
2057 AT_buf_j[num_cols_offd_AT++] = AT_buf_j[i];
2058 value = AT_buf_j[i];
2059 }
2060 }
2061 }
2062
2063 if (num_cols_offd_AT)
2064 {
2065 col_map_offd_AT = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_AT, HYPRE_MEMORY_HOST);
2066 }
2067 else
2068 {
2069 col_map_offd_AT = NULL;
2070 }
2071
2072 for (i = 0; i < num_cols_offd_AT; i++)
2073 {
2074 col_map_offd_AT[i] = AT_buf_j[i];
2075 }
2076 hypre_TFree(AT_buf_i, HYPRE_MEMORY_HOST);
2077 hypre_TFree(AT_buf_j, HYPRE_MEMORY_HOST);
2078 if (data)
2079 {
2080 hypre_TFree(AT_buf_data, HYPRE_MEMORY_HOST);
2081 }
2082
2083 for (i = 0; i < counter; i++)
2084 {
2085 AT_offd_j[i] = hypre_BigBinarySearch(col_map_offd_AT,AT_big_j[i],
2086 num_cols_offd_AT);
2087 }
2088 hypre_TFree(AT_big_j, HYPRE_MEMORY_HOST);
2089 }
2090
2091 AT_offd = hypre_CSRMatrixCreate(num_cols, num_cols_offd_AT, counter);
2092 hypre_CSRMatrixMemoryLocation(AT_offd) = memory_location;
2093 hypre_CSRMatrixI(AT_offd) = AT_offd_i;
2094 hypre_CSRMatrixJ(AT_offd) = AT_offd_j;
2095 hypre_CSRMatrixData(AT_offd) = AT_offd_data;
2096
2097 for (i = 0; i < 2; i++)
2098 {
2099 row_starts_AT[i] = col_starts[i];
2100 col_starts_AT[i] = row_starts[i];
2101 }
2102
2103 first_row_index_AT = row_starts_AT[0];
2104 first_col_diag_AT = col_starts_AT[0];
2105
2106 local_num_rows_AT = (HYPRE_Int)(row_starts_AT[1]-first_row_index_AT );
2107 local_num_cols_AT = (HYPRE_Int)(col_starts_AT[1]-first_col_diag_AT);
2108
2109 AT = hypre_CTAlloc(hypre_ParCSRMatrix, 1, HYPRE_MEMORY_HOST);
2110 hypre_ParCSRMatrixComm(AT) = comm;
2111 hypre_ParCSRMatrixDiag(AT) = AT_diag;
2112 hypre_ParCSRMatrixOffd(AT) = AT_offd;
2113 hypre_ParCSRMatrixGlobalNumRows(AT) = hypre_ParCSRMatrixGlobalNumCols(A);
2114 hypre_ParCSRMatrixGlobalNumCols(AT) = hypre_ParCSRMatrixGlobalNumRows(A);
2115 hypre_ParCSRMatrixRowStarts(AT)[0] = row_starts_AT[0];
2116 hypre_ParCSRMatrixRowStarts(AT)[1] = row_starts_AT[1];
2117 hypre_ParCSRMatrixColStarts(AT)[0] = col_starts_AT[0];
2118 hypre_ParCSRMatrixColStarts(AT)[1] = col_starts_AT[1];
2119 hypre_ParCSRMatrixColMapOffd(AT) = col_map_offd_AT;
2120
2121 hypre_ParCSRMatrixFirstRowIndex(AT) = first_row_index_AT;
2122 hypre_ParCSRMatrixFirstColDiag(AT) = first_col_diag_AT;
2123
2124 hypre_ParCSRMatrixLastRowIndex(AT) = first_row_index_AT + local_num_rows_AT - 1;
2125 hypre_ParCSRMatrixLastColDiag(AT) = first_col_diag_AT + local_num_cols_AT - 1;
2126
2127 hypre_ParCSRMatrixOwnsData(AT) = 1;
2128 hypre_ParCSRMatrixCommPkg(AT) = NULL;
2129 hypre_ParCSRMatrixCommPkgT(AT) = NULL;
2130
2131 hypre_ParCSRMatrixRowindices(AT) = NULL;
2132 hypre_ParCSRMatrixRowvalues(AT) = NULL;
2133 hypre_ParCSRMatrixGetrowactive(AT) = 0;
2134
2135 hypre_ParCSRMatrixOwnsAssumedPartition(AT) = 1;
2136
2137 *AT_ptr = AT;
2138
2139 return hypre_error_flag;
2140 }
2141
2142 /*--------------------------------------------------------------------------
2143 * hypre_ParCSRMatrixTranspose
2144 *--------------------------------------------------------------------------*/
2145
2146 HYPRE_Int
hypre_ParCSRMatrixTranspose(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix ** AT_ptr,HYPRE_Int data)2147 hypre_ParCSRMatrixTranspose( hypre_ParCSRMatrix *A,
2148 hypre_ParCSRMatrix **AT_ptr,
2149 HYPRE_Int data )
2150 {
2151 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
2152 hypre_GpuProfilingPushRange("ParCSRMatrixTranspose");
2153 #endif
2154
2155 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
2156 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
2157
2158 if (exec == HYPRE_EXEC_DEVICE)
2159 {
2160 hypre_ParCSRMatrixTransposeDevice(A, AT_ptr, data);
2161 }
2162 else
2163 #endif
2164 {
2165 hypre_ParCSRMatrixTransposeHost(A, AT_ptr, data);
2166 }
2167
2168 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
2169 hypre_GpuProfilingPopRange();
2170 #endif
2171
2172 return hypre_error_flag;
2173 }
2174
2175 /* -----------------------------------------------------------------------------
2176 * generate a parallel spanning tree (for Maxwell Equation)
2177 * G_csr is the node to edge connectivity matrix
2178 * ----------------------------------------------------------------------------- */
2179
2180 void
hypre_ParCSRMatrixGenSpanningTree(hypre_ParCSRMatrix * G_csr,HYPRE_Int ** indices,HYPRE_Int G_type)2181 hypre_ParCSRMatrixGenSpanningTree( hypre_ParCSRMatrix *G_csr,
2182 HYPRE_Int **indices,
2183 HYPRE_Int G_type )
2184 {
2185 HYPRE_BigInt nrows_G, ncols_G;
2186 HYPRE_Int *G_diag_i, *G_diag_j, *GT_diag_mat, i, j, k, edge;
2187 HYPRE_Int *nodes_marked, *edges_marked, *queue, queue_tail, queue_head, node;
2188 HYPRE_Int mypid, nprocs, n_children, *children, nsends, *send_procs, *recv_cnts;
2189 HYPRE_Int nrecvs, *recv_procs, n_proc_array, *proc_array, *pgraph_i, *pgraph_j;
2190 HYPRE_Int parent, proc, proc2, node2, found, *t_indices, tree_size, *T_diag_i;
2191 HYPRE_Int *T_diag_j, *counts, offset;
2192 MPI_Comm comm;
2193 hypre_ParCSRCommPkg *comm_pkg;
2194 hypre_CSRMatrix *G_diag;
2195
2196 /* fetch G matrix (G_type = 0 ==> node to edge) */
2197
2198 if (G_type == 0)
2199 {
2200 nrows_G = hypre_ParCSRMatrixGlobalNumRows(G_csr);
2201 ncols_G = hypre_ParCSRMatrixGlobalNumCols(G_csr);
2202 G_diag = hypre_ParCSRMatrixDiag(G_csr);
2203 G_diag_i = hypre_CSRMatrixI(G_diag);
2204 G_diag_j = hypre_CSRMatrixJ(G_diag);
2205 }
2206 else
2207 {
2208 nrows_G = hypre_ParCSRMatrixGlobalNumCols(G_csr);
2209 ncols_G = hypre_ParCSRMatrixGlobalNumRows(G_csr);
2210 G_diag = hypre_ParCSRMatrixDiag(G_csr);
2211 T_diag_i = hypre_CSRMatrixI(G_diag);
2212 T_diag_j = hypre_CSRMatrixJ(G_diag);
2213 counts = hypre_TAlloc(HYPRE_Int, nrows_G , HYPRE_MEMORY_HOST);
2214 for (i = 0; i < nrows_G; i++) counts[i] = 0;
2215 for (i = 0; i < T_diag_i[ncols_G]; i++) counts[T_diag_j[i]]++;
2216 G_diag_i = hypre_TAlloc(HYPRE_Int, (nrows_G+1) , HYPRE_MEMORY_HOST);
2217 G_diag_j = hypre_TAlloc(HYPRE_Int, T_diag_i[ncols_G] , HYPRE_MEMORY_HOST);
2218 G_diag_i[0] = 0;
2219 for (i = 1; i <= nrows_G; i++) G_diag_i[i] = G_diag_i[i-1] + counts[i-1];
2220 for (i = 0; i < ncols_G; i++)
2221 {
2222 for (j = T_diag_i[i]; j < T_diag_i[i+1]; j++)
2223 {
2224 k = T_diag_j[j];
2225 offset = G_diag_i[k]++;
2226 G_diag_j[offset] = i;
2227 }
2228 }
2229 G_diag_i[0] = 0;
2230 for (i = 1; i <= nrows_G; i++)
2231 {
2232 G_diag_i[i] = G_diag_i[i-1] + counts[i-1];
2233 }
2234 hypre_TFree(counts, HYPRE_MEMORY_HOST);
2235 }
2236
2237 /* form G transpose in special form (2 nodes per edge max) */
2238
2239 GT_diag_mat = hypre_TAlloc(HYPRE_Int, 2 * ncols_G , HYPRE_MEMORY_HOST);
2240 for (i = 0; i < 2 * ncols_G; i++) GT_diag_mat[i] = -1;
2241 for (i = 0; i < nrows_G; i++)
2242 {
2243 for (j = G_diag_i[i]; j < G_diag_i[i+1]; j++)
2244 {
2245 edge = G_diag_j[j];
2246 if (GT_diag_mat[edge*2] == -1) GT_diag_mat[edge*2] = i;
2247 else GT_diag_mat[edge*2+1] = i;
2248 }
2249 }
2250
2251 /* BFS on the local matrix graph to find tree */
2252
2253 nodes_marked = hypre_TAlloc(HYPRE_Int, nrows_G , HYPRE_MEMORY_HOST);
2254 edges_marked = hypre_TAlloc(HYPRE_Int, ncols_G , HYPRE_MEMORY_HOST);
2255 for (i = 0; i < nrows_G; i++) nodes_marked[i] = 0;
2256 for (i = 0; i < ncols_G; i++) edges_marked[i] = 0;
2257 queue = hypre_TAlloc(HYPRE_Int, nrows_G , HYPRE_MEMORY_HOST);
2258 queue_head = 0;
2259 queue_tail = 1;
2260 queue[0] = 0;
2261 nodes_marked[0] = 1;
2262 while ((queue_tail-queue_head) > 0)
2263 {
2264 node = queue[queue_tail-1];
2265 queue_tail--;
2266 for (i = G_diag_i[node]; i < G_diag_i[node+1]; i++)
2267 {
2268 edge = G_diag_j[i];
2269 if (edges_marked[edge] == 0)
2270 {
2271 if (GT_diag_mat[2*edge+1] != -1)
2272 {
2273 node2 = GT_diag_mat[2*edge];
2274 if (node2 == node) node2 = GT_diag_mat[2*edge+1];
2275 if (nodes_marked[node2] == 0)
2276 {
2277 nodes_marked[node2] = 1;
2278 edges_marked[edge] = 1;
2279 queue[queue_tail] = node2;
2280 queue_tail++;
2281 }
2282 }
2283 }
2284 }
2285 }
2286 hypre_TFree(nodes_marked, HYPRE_MEMORY_HOST);
2287 hypre_TFree(queue, HYPRE_MEMORY_HOST);
2288 hypre_TFree(GT_diag_mat, HYPRE_MEMORY_HOST);
2289
2290 /* fetch the communication information from */
2291
2292 comm = hypre_ParCSRMatrixComm(G_csr);
2293 hypre_MPI_Comm_rank(comm, &mypid);
2294 hypre_MPI_Comm_size(comm, &nprocs);
2295 comm_pkg = hypre_ParCSRMatrixCommPkg(G_csr);
2296 if (nprocs == 1 && comm_pkg == NULL)
2297 {
2298
2299 hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) G_csr);
2300
2301 comm_pkg = hypre_ParCSRMatrixCommPkg(G_csr);
2302 }
2303
2304 /* construct processor graph based on node-edge connection */
2305 /* (local edges connected to neighbor processor nodes) */
2306
2307 n_children = 0;
2308 nrecvs = nsends = 0;
2309 if (nprocs > 1)
2310 {
2311 nsends = hypre_ParCSRCommPkgNumSends(comm_pkg);
2312 send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
2313 nrecvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
2314 recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
2315 proc_array = NULL;
2316 if ((nsends+nrecvs) > 0)
2317 {
2318 n_proc_array = 0;
2319 proc_array = hypre_TAlloc(HYPRE_Int, (nsends+nrecvs) , HYPRE_MEMORY_HOST);
2320 for (i = 0; i < nsends; i++) proc_array[i] = send_procs[i];
2321 for (i = 0; i < nrecvs; i++) proc_array[nsends+i] = recv_procs[i];
2322 hypre_qsort0(proc_array, 0, nsends+nrecvs-1);
2323 n_proc_array = 1;
2324 for (i = 1; i < nrecvs+nsends; i++)
2325 if (proc_array[i] != proc_array[n_proc_array])
2326 proc_array[n_proc_array++] = proc_array[i];
2327 }
2328 pgraph_i = hypre_TAlloc(HYPRE_Int, (nprocs+1) , HYPRE_MEMORY_HOST);
2329 recv_cnts = hypre_TAlloc(HYPRE_Int, nprocs , HYPRE_MEMORY_HOST);
2330 hypre_MPI_Allgather(&n_proc_array, 1, HYPRE_MPI_INT, recv_cnts, 1,
2331 HYPRE_MPI_INT, comm);
2332 pgraph_i[0] = 0;
2333 for (i = 1; i <= nprocs; i++)
2334 pgraph_i[i] = pgraph_i[i-1] + recv_cnts[i-1];
2335 pgraph_j = hypre_TAlloc(HYPRE_Int, pgraph_i[nprocs] , HYPRE_MEMORY_HOST);
2336 hypre_MPI_Allgatherv(proc_array, n_proc_array, HYPRE_MPI_INT, pgraph_j,
2337 recv_cnts, pgraph_i, HYPRE_MPI_INT, comm);
2338 hypre_TFree(recv_cnts, HYPRE_MEMORY_HOST);
2339
2340 /* BFS on the processor graph to determine parent and children */
2341
2342 nodes_marked = hypre_TAlloc(HYPRE_Int, nprocs , HYPRE_MEMORY_HOST);
2343 for (i = 0; i < nprocs; i++) nodes_marked[i] = -1;
2344 queue = hypre_TAlloc(HYPRE_Int, nprocs , HYPRE_MEMORY_HOST);
2345 queue_head = 0;
2346 queue_tail = 1;
2347 node = 0;
2348 queue[0] = node;
2349 while ((queue_tail-queue_head) > 0)
2350 {
2351 proc = queue[queue_tail-1];
2352 queue_tail--;
2353 for (i = pgraph_i[proc]; i < pgraph_i[proc+1]; i++)
2354 {
2355 proc2 = pgraph_j[i];
2356 if (nodes_marked[proc2] < 0)
2357 {
2358 nodes_marked[proc2] = proc;
2359 queue[queue_tail] = proc2;
2360 queue_tail++;
2361 }
2362 }
2363 }
2364 parent = nodes_marked[mypid];
2365 n_children = 0;
2366 for (i = 0; i < nprocs; i++) if (nodes_marked[i] == mypid) n_children++;
2367 if (n_children == 0) {n_children = 0; children = NULL;}
2368 else
2369 {
2370 children = hypre_TAlloc(HYPRE_Int, n_children , HYPRE_MEMORY_HOST);
2371 n_children = 0;
2372 for (i = 0; i < nprocs; i++)
2373 if (nodes_marked[i] == mypid) children[n_children++] = i;
2374 }
2375 hypre_TFree(nodes_marked, HYPRE_MEMORY_HOST);
2376 hypre_TFree(queue, HYPRE_MEMORY_HOST);
2377 hypre_TFree(pgraph_i, HYPRE_MEMORY_HOST);
2378 hypre_TFree(pgraph_j, HYPRE_MEMORY_HOST);
2379 }
2380
2381 /* first, connection with my parent : if the edge in my parent *
2382 * is incident to one of my nodes, then my parent will mark it */
2383
2384 found = 0;
2385 for (i = 0; i < nrecvs; i++)
2386 {
2387 proc = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
2388 if (proc == parent)
2389 {
2390 found = 1;
2391 break;
2392 }
2393 }
2394
2395 /* but if all the edges connected to my parent are on my side, *
2396 * then I will just pick one of them as tree edge */
2397
2398 if (found == 0)
2399 {
2400 for (i = 0; i < nsends; i++)
2401 {
2402 proc = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
2403 if (proc == parent)
2404 {
2405 k = hypre_ParCSRCommPkgSendMapStart(comm_pkg,i);
2406 edge = hypre_ParCSRCommPkgSendMapElmt(comm_pkg,k);
2407 edges_marked[edge] = 1;
2408 break;
2409 }
2410 }
2411 }
2412
2413 /* next, if my processor has an edge incident on one node in my *
2414 * child, put this edge on the tree. But if there is no such *
2415 * edge, then I will assume my child will pick up an edge */
2416
2417 for (j = 0; j < n_children; j++)
2418 {
2419 proc = children[j];
2420 for (i = 0; i < nsends; i++)
2421 {
2422 proc2 = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
2423 if (proc == proc2)
2424 {
2425 k = hypre_ParCSRCommPkgSendMapStart(comm_pkg,i);
2426 edge = hypre_ParCSRCommPkgSendMapElmt(comm_pkg,k);
2427 edges_marked[edge] = 1;
2428 break;
2429 }
2430 }
2431 }
2432 if (n_children > 0)
2433 {
2434 hypre_TFree(children, HYPRE_MEMORY_HOST);
2435 }
2436
2437 /* count the size of the tree */
2438
2439 tree_size = 0;
2440 for (i = 0; i < ncols_G; i++)
2441 if (edges_marked[i] == 1) tree_size++;
2442 t_indices = hypre_TAlloc(HYPRE_Int, (tree_size+1) , HYPRE_MEMORY_HOST);
2443 t_indices[0] = tree_size;
2444 tree_size = 1;
2445 for (i = 0; i < ncols_G; i++)
2446 if (edges_marked[i] == 1) t_indices[tree_size++] = i;
2447 (*indices) = t_indices;
2448 hypre_TFree(edges_marked, HYPRE_MEMORY_HOST);
2449 if (G_type != 0)
2450 {
2451 hypre_TFree(G_diag_i, HYPRE_MEMORY_HOST);
2452 hypre_TFree(G_diag_j, HYPRE_MEMORY_HOST);
2453 }
2454 }
2455
2456 /* -----------------------------------------------------------------------------
2457 * extract submatrices based on given indices
2458 * ----------------------------------------------------------------------------- */
2459
hypre_ParCSRMatrixExtractSubmatrices(hypre_ParCSRMatrix * A_csr,HYPRE_Int * indices2,hypre_ParCSRMatrix *** submatrices)2460 void hypre_ParCSRMatrixExtractSubmatrices( hypre_ParCSRMatrix *A_csr,
2461 HYPRE_Int *indices2,
2462 hypre_ParCSRMatrix ***submatrices )
2463 {
2464 HYPRE_Int nrows_A, nindices, *indices, *A_diag_i, *A_diag_j, mypid, nprocs;
2465 HYPRE_Int i, j, k, *proc_offsets1, *proc_offsets2, *exp_indices;
2466 HYPRE_BigInt *itmp_array;
2467 HYPRE_Int nnz11, nnz12, nnz21, nnz22, col, ncols_offd, nnz_offd, nnz_diag;
2468 HYPRE_Int nrows, nnz;
2469 HYPRE_BigInt global_nrows, global_ncols, *row_starts, *col_starts;
2470 HYPRE_Int *diag_i, *diag_j, row, *offd_i;
2471 HYPRE_Complex *A_diag_a, *diag_a;
2472 hypre_ParCSRMatrix *A11_csr, *A12_csr, *A21_csr, *A22_csr;
2473 hypre_CSRMatrix *A_diag, *diag, *offd;
2474 MPI_Comm comm;
2475
2476 /* -----------------------------------------------------
2477 * first make sure the incoming indices are in order
2478 * ----------------------------------------------------- */
2479
2480 nindices = indices2[0];
2481 indices = &(indices2[1]);
2482 hypre_qsort0(indices, 0, nindices-1);
2483
2484 /* -----------------------------------------------------
2485 * fetch matrix information
2486 * ----------------------------------------------------- */
2487
2488 nrows_A = (HYPRE_Int) hypre_ParCSRMatrixGlobalNumRows(A_csr);
2489 A_diag = hypre_ParCSRMatrixDiag(A_csr);
2490 A_diag_i = hypre_CSRMatrixI(A_diag);
2491 A_diag_j = hypre_CSRMatrixJ(A_diag);
2492 A_diag_a = hypre_CSRMatrixData(A_diag);
2493 comm = hypre_ParCSRMatrixComm(A_csr);
2494 hypre_MPI_Comm_rank(comm, &mypid);
2495 hypre_MPI_Comm_size(comm, &nprocs);
2496 if (nprocs > 1)
2497 {
2498 hypre_error_w_msg(HYPRE_ERROR_GENERIC,"ExtractSubmatrices: cannot handle nprocs > 1 yet.\n");
2499 exit(1);
2500 }
2501
2502 /* -----------------------------------------------------
2503 * compute new matrix dimensions
2504 * ----------------------------------------------------- */
2505
2506 proc_offsets1 = hypre_TAlloc(HYPRE_Int, (nprocs+1) , HYPRE_MEMORY_HOST);
2507 proc_offsets2 = hypre_TAlloc(HYPRE_Int, (nprocs+1) , HYPRE_MEMORY_HOST);
2508 hypre_MPI_Allgather(&nindices, 1, HYPRE_MPI_INT, proc_offsets1, 1,
2509 HYPRE_MPI_INT, comm);
2510 k = 0;
2511 for (i = 0; i < nprocs; i++)
2512 {
2513 j = proc_offsets1[i];
2514 proc_offsets1[i] = k;
2515 k += j;
2516 }
2517 proc_offsets1[nprocs] = k;
2518 itmp_array = hypre_ParCSRMatrixRowStarts(A_csr);
2519 for (i = 0; i <= nprocs; i++)
2520 {
2521 proc_offsets2[i] = itmp_array[i] - proc_offsets1[i];
2522 }
2523
2524 /* -----------------------------------------------------
2525 * assign id's to row and col for later processing
2526 * ----------------------------------------------------- */
2527
2528 exp_indices = hypre_TAlloc(HYPRE_Int, nrows_A , HYPRE_MEMORY_HOST);
2529 for (i = 0; i < nrows_A; i++) exp_indices[i] = -1;
2530 for (i = 0; i < nindices; i++)
2531 {
2532 if (exp_indices[indices[i]] == -1) exp_indices[indices[i]] = i;
2533 else
2534 {
2535 hypre_error_w_msg(HYPRE_ERROR_GENERIC,"ExtractSubmatrices: wrong index %d %d\n");
2536 exit(1);
2537 }
2538 }
2539 k = 0;
2540 for (i = 0; i < nrows_A; i++)
2541 {
2542 if (exp_indices[i] < 0)
2543 {
2544 exp_indices[i] = - k - 1;
2545 k++;
2546 }
2547 }
2548
2549 /* -----------------------------------------------------
2550 * compute number of nonzeros for each block
2551 * ----------------------------------------------------- */
2552
2553 nnz11 = nnz12 = nnz21 = nnz22 = 0;
2554 for (i = 0; i < nrows_A; i++)
2555 {
2556 if (exp_indices[i] >= 0)
2557 {
2558 for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
2559 {
2560 col = A_diag_j[j];
2561 if (exp_indices[col] >= 0) nnz11++;
2562 else nnz12++;
2563 }
2564 }
2565 else
2566 {
2567 for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
2568 {
2569 col = A_diag_j[j];
2570 if (exp_indices[col] >= 0) nnz21++;
2571 else nnz22++;
2572 }
2573 }
2574 }
2575
2576 /* -----------------------------------------------------
2577 * create A11 matrix (assume sequential for the moment)
2578 * ----------------------------------------------------- */
2579
2580 ncols_offd = 0;
2581 nnz_offd = 0;
2582 nnz_diag = nnz11;
2583 /* This case is not yet implemented! */
2584 global_nrows = 0;
2585 global_ncols = 0;
2586 row_starts = NULL;
2587 col_starts = NULL;
2588 A11_csr = hypre_ParCSRMatrixCreate(comm, global_nrows, global_ncols,
2589 row_starts, col_starts, ncols_offd,
2590 nnz_diag, nnz_offd);
2591 nrows = nindices;
2592 diag_i = hypre_CTAlloc(HYPRE_Int, nrows+1, HYPRE_MEMORY_HOST);
2593 diag_j = hypre_CTAlloc(HYPRE_Int, nnz_diag, HYPRE_MEMORY_HOST);
2594 diag_a = hypre_CTAlloc(HYPRE_Complex, nnz_diag, HYPRE_MEMORY_HOST);
2595 nnz = 0;
2596 row = 0;
2597 diag_i[0] = 0;
2598 for (i = 0; i < nrows_A; i++)
2599 {
2600 if (exp_indices[i] >= 0)
2601 {
2602 for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
2603 {
2604 col = A_diag_j[j];
2605 if (exp_indices[col] >= 0)
2606 {
2607 diag_j[nnz] = exp_indices[col];
2608 diag_a[nnz++] = A_diag_a[j];
2609 }
2610 }
2611 row++;
2612 diag_i[row] = nnz;
2613 }
2614 }
2615 diag = hypre_ParCSRMatrixDiag(A11_csr);
2616 hypre_CSRMatrixI(diag) = diag_i;
2617 hypre_CSRMatrixJ(diag) = diag_j;
2618 hypre_CSRMatrixData(diag) = diag_a;
2619
2620 offd_i = hypre_CTAlloc(HYPRE_Int, nrows+1, HYPRE_MEMORY_HOST);
2621 for (i = 0; i <= nrows; i++) offd_i[i] = 0;
2622 offd = hypre_ParCSRMatrixOffd(A11_csr);
2623 hypre_CSRMatrixI(offd) = offd_i;
2624 hypre_CSRMatrixJ(offd) = NULL;
2625 hypre_CSRMatrixData(offd) = NULL;
2626
2627 /* -----------------------------------------------------
2628 * create A12 matrix (assume sequential for the moment)
2629 * ----------------------------------------------------- */
2630
2631 ncols_offd = 0;
2632 nnz_offd = 0;
2633 nnz_diag = nnz12;
2634 global_nrows = (HYPRE_BigInt)proc_offsets1[nprocs];
2635 global_ncols = (HYPRE_BigInt)proc_offsets2[nprocs];
2636 row_starts = hypre_CTAlloc(HYPRE_BigInt, nprocs+1, HYPRE_MEMORY_HOST);
2637 col_starts = hypre_CTAlloc(HYPRE_BigInt, nprocs+1, HYPRE_MEMORY_HOST);
2638 for (i = 0; i <= nprocs; i++)
2639 {
2640 row_starts[i] = (HYPRE_BigInt)proc_offsets1[i];
2641 col_starts[i] = (HYPRE_BigInt)proc_offsets2[i];
2642 }
2643 A12_csr = hypre_ParCSRMatrixCreate(comm, global_nrows, global_ncols,
2644 row_starts, col_starts, ncols_offd,
2645 nnz_diag, nnz_offd);
2646 nrows = nindices;
2647 diag_i = hypre_CTAlloc(HYPRE_Int, nrows+1, HYPRE_MEMORY_HOST);
2648 diag_j = hypre_CTAlloc(HYPRE_Int, nnz_diag, HYPRE_MEMORY_HOST);
2649 diag_a = hypre_CTAlloc(HYPRE_Complex, nnz_diag, HYPRE_MEMORY_HOST);
2650 nnz = 0;
2651 row = 0;
2652 diag_i[0] = 0;
2653 for (i = 0; i < nrows_A; i++)
2654 {
2655 if (exp_indices[i] >= 0)
2656 {
2657 for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
2658 {
2659 col = A_diag_j[j];
2660 if (exp_indices[col] < 0)
2661 {
2662 diag_j[nnz] = - exp_indices[col] - 1;
2663 diag_a[nnz++] = A_diag_a[j];
2664 }
2665 }
2666 row++;
2667 diag_i[row] = nnz;
2668 }
2669 }
2670
2671 if (nnz > nnz_diag)
2672 {
2673 hypre_assert(0);
2674 hypre_error(HYPRE_ERROR_GENERIC);
2675 }
2676
2677 diag = hypre_ParCSRMatrixDiag(A12_csr);
2678 hypre_CSRMatrixI(diag) = diag_i;
2679 hypre_CSRMatrixJ(diag) = diag_j;
2680 hypre_CSRMatrixData(diag) = diag_a;
2681
2682 offd_i = hypre_CTAlloc(HYPRE_Int, nrows+1, HYPRE_MEMORY_HOST);
2683 for (i = 0; i <= nrows; i++) offd_i[i] = 0;
2684 offd = hypre_ParCSRMatrixOffd(A12_csr);
2685 hypre_CSRMatrixI(offd) = offd_i;
2686 hypre_CSRMatrixJ(offd) = NULL;
2687 hypre_CSRMatrixData(offd) = NULL;
2688 hypre_TFree(row_starts, HYPRE_MEMORY_HOST);
2689 hypre_TFree(col_starts, HYPRE_MEMORY_HOST);
2690
2691 /* -----------------------------------------------------
2692 * create A21 matrix (assume sequential for the moment)
2693 * ----------------------------------------------------- */
2694
2695 ncols_offd = 0;
2696 nnz_offd = 0;
2697 nnz_diag = nnz21;
2698 global_nrows = (HYPRE_BigInt)proc_offsets2[nprocs];
2699 global_ncols = (HYPRE_BigInt)proc_offsets1[nprocs];
2700 row_starts = hypre_CTAlloc(HYPRE_BigInt, nprocs+1, HYPRE_MEMORY_HOST);
2701 col_starts = hypre_CTAlloc(HYPRE_BigInt, nprocs+1, HYPRE_MEMORY_HOST);
2702 for (i = 0; i <= nprocs; i++)
2703 {
2704 row_starts[i] = (HYPRE_BigInt)proc_offsets2[i];
2705 col_starts[i] = (HYPRE_BigInt)proc_offsets1[i];
2706 }
2707 A21_csr = hypre_ParCSRMatrixCreate(comm, global_nrows, global_ncols,
2708 row_starts, col_starts, ncols_offd,
2709 nnz_diag, nnz_offd);
2710 nrows = nrows_A - nindices;
2711 diag_i = hypre_CTAlloc(HYPRE_Int, nrows+1, HYPRE_MEMORY_HOST);
2712 diag_j = hypre_CTAlloc(HYPRE_Int, nnz_diag, HYPRE_MEMORY_HOST);
2713 diag_a = hypre_CTAlloc(HYPRE_Complex, nnz_diag, HYPRE_MEMORY_HOST);
2714 nnz = 0;
2715 row = 0;
2716 diag_i[0] = 0;
2717 for (i = 0; i < nrows_A; i++)
2718 {
2719 if (exp_indices[i] < 0)
2720 {
2721 for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
2722 {
2723 col = A_diag_j[j];
2724 if (exp_indices[col] >= 0)
2725 {
2726 diag_j[nnz] = exp_indices[col];
2727 diag_a[nnz++] = A_diag_a[j];
2728 }
2729 }
2730 row++;
2731 diag_i[row] = nnz;
2732 }
2733 }
2734 diag = hypre_ParCSRMatrixDiag(A21_csr);
2735 hypre_CSRMatrixI(diag) = diag_i;
2736 hypre_CSRMatrixJ(diag) = diag_j;
2737 hypre_CSRMatrixData(diag) = diag_a;
2738
2739 offd_i = hypre_CTAlloc(HYPRE_Int, nrows+1, HYPRE_MEMORY_HOST);
2740 for (i = 0; i <= nrows; i++) offd_i[i] = 0;
2741 offd = hypre_ParCSRMatrixOffd(A21_csr);
2742 hypre_CSRMatrixI(offd) = offd_i;
2743 hypre_CSRMatrixJ(offd) = NULL;
2744 hypre_CSRMatrixData(offd) = NULL;
2745 hypre_TFree(row_starts, HYPRE_MEMORY_HOST);
2746 hypre_TFree(col_starts, HYPRE_MEMORY_HOST);
2747
2748 /* -----------------------------------------------------
2749 * create A22 matrix (assume sequential for the moment)
2750 * ----------------------------------------------------- */
2751
2752 ncols_offd = 0;
2753 nnz_offd = 0;
2754 nnz_diag = nnz22;
2755 global_nrows = (HYPRE_BigInt)proc_offsets2[nprocs];
2756 global_ncols = (HYPRE_BigInt)proc_offsets2[nprocs];
2757 row_starts = hypre_CTAlloc(HYPRE_BigInt, nprocs+1, HYPRE_MEMORY_HOST);
2758 col_starts = hypre_CTAlloc(HYPRE_BigInt, nprocs+1, HYPRE_MEMORY_HOST);
2759 for (i = 0; i <= nprocs; i++)
2760 {
2761 row_starts[i] = (HYPRE_BigInt)proc_offsets2[i];
2762 col_starts[i] = (HYPRE_BigInt)proc_offsets2[i];
2763 }
2764 A22_csr = hypre_ParCSRMatrixCreate(comm, global_nrows, global_ncols,
2765 row_starts, col_starts, ncols_offd,
2766 nnz_diag, nnz_offd);
2767 nrows = nrows_A - nindices;
2768 diag_i = hypre_CTAlloc(HYPRE_Int, nrows+1, HYPRE_MEMORY_HOST);
2769 diag_j = hypre_CTAlloc(HYPRE_Int, nnz_diag, HYPRE_MEMORY_HOST);
2770 diag_a = hypre_CTAlloc(HYPRE_Complex, nnz_diag, HYPRE_MEMORY_HOST);
2771 nnz = 0;
2772 row = 0;
2773 diag_i[0] = 0;
2774 for (i = 0; i < nrows_A; i++)
2775 {
2776 if (exp_indices[i] < 0)
2777 {
2778 for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
2779 {
2780 col = A_diag_j[j];
2781 if (exp_indices[col] < 0)
2782 {
2783 diag_j[nnz] = - exp_indices[col] - 1;
2784 diag_a[nnz++] = A_diag_a[j];
2785 }
2786 }
2787 row++;
2788 diag_i[row] = nnz;
2789 }
2790 }
2791 diag = hypre_ParCSRMatrixDiag(A22_csr);
2792 hypre_CSRMatrixI(diag) = diag_i;
2793 hypre_CSRMatrixJ(diag) = diag_j;
2794 hypre_CSRMatrixData(diag) = diag_a;
2795
2796 offd_i = hypre_CTAlloc(HYPRE_Int, nrows+1, HYPRE_MEMORY_HOST);
2797 for (i = 0; i <= nrows; i++) offd_i[i] = 0;
2798 offd = hypre_ParCSRMatrixOffd(A22_csr);
2799 hypre_CSRMatrixI(offd) = offd_i;
2800 hypre_CSRMatrixJ(offd) = NULL;
2801 hypre_CSRMatrixData(offd) = NULL;
2802 hypre_TFree(row_starts, HYPRE_MEMORY_HOST);
2803 hypre_TFree(col_starts, HYPRE_MEMORY_HOST);
2804
2805 /* -----------------------------------------------------
2806 * hand the matrices back to the caller and clean up
2807 * ----------------------------------------------------- */
2808
2809 (*submatrices)[0] = A11_csr;
2810 (*submatrices)[1] = A12_csr;
2811 (*submatrices)[2] = A21_csr;
2812 (*submatrices)[3] = A22_csr;
2813 hypre_TFree(proc_offsets1, HYPRE_MEMORY_HOST);
2814 hypre_TFree(proc_offsets2, HYPRE_MEMORY_HOST);
2815 hypre_TFree(exp_indices, HYPRE_MEMORY_HOST);
2816 }
2817
2818 /* -----------------------------------------------------------------------------
2819 * extract submatrices of a rectangular matrix
2820 * ----------------------------------------------------------------------------- */
2821
hypre_ParCSRMatrixExtractRowSubmatrices(hypre_ParCSRMatrix * A_csr,HYPRE_Int * indices2,hypre_ParCSRMatrix *** submatrices)2822 void hypre_ParCSRMatrixExtractRowSubmatrices( hypre_ParCSRMatrix *A_csr,
2823 HYPRE_Int *indices2,
2824 hypre_ParCSRMatrix ***submatrices )
2825 {
2826 HYPRE_Int nrows_A, nindices, *indices, *A_diag_i, *A_diag_j, mypid, nprocs;
2827 HYPRE_Int i, j, k, *proc_offsets1, *proc_offsets2, *exp_indices;
2828 HYPRE_Int nnz11, nnz21, col, ncols_offd, nnz_offd, nnz_diag;
2829 HYPRE_Int *A_offd_i, *A_offd_j;
2830 HYPRE_Int nrows, nnz;
2831 HYPRE_BigInt global_nrows, global_ncols, *row_starts, *col_starts, *itmp_array;
2832 HYPRE_Int *diag_i, *diag_j, row, *offd_i, *offd_j, nnz11_offd, nnz21_offd;
2833 HYPRE_Complex *A_diag_a, *diag_a, *offd_a;
2834 hypre_ParCSRMatrix *A11_csr, *A21_csr;
2835 hypre_CSRMatrix *A_diag, *diag, *A_offd, *offd;
2836 MPI_Comm comm;
2837
2838 /* -----------------------------------------------------
2839 * first make sure the incoming indices are in order
2840 * ----------------------------------------------------- */
2841
2842 nindices = indices2[0];
2843 indices = &(indices2[1]);
2844 hypre_qsort0(indices, 0, nindices-1);
2845
2846 /* -----------------------------------------------------
2847 * fetch matrix information
2848 * ----------------------------------------------------- */
2849
2850 nrows_A = (HYPRE_Int)hypre_ParCSRMatrixGlobalNumRows(A_csr);
2851 A_diag = hypre_ParCSRMatrixDiag(A_csr);
2852 A_diag_i = hypre_CSRMatrixI(A_diag);
2853 A_diag_j = hypre_CSRMatrixJ(A_diag);
2854 A_diag_a = hypre_CSRMatrixData(A_diag);
2855 A_offd = hypre_ParCSRMatrixOffd(A_csr);
2856 A_offd_i = hypre_CSRMatrixI(A_offd);
2857 A_offd_j = hypre_CSRMatrixJ(A_offd);
2858 comm = hypre_ParCSRMatrixComm(A_csr);
2859 hypre_MPI_Comm_rank(comm, &mypid);
2860 hypre_MPI_Comm_size(comm, &nprocs);
2861
2862 /* -----------------------------------------------------
2863 * compute new matrix dimensions
2864 * ----------------------------------------------------- */
2865
2866 proc_offsets1 = hypre_TAlloc(HYPRE_Int, (nprocs+1) , HYPRE_MEMORY_HOST);
2867 proc_offsets2 = hypre_TAlloc(HYPRE_Int, (nprocs+1) , HYPRE_MEMORY_HOST);
2868 hypre_MPI_Allgather(&nindices, 1, HYPRE_MPI_INT, proc_offsets1, 1,
2869 HYPRE_MPI_INT, comm);
2870 k = 0;
2871 for (i = 0; i < nprocs; i++)
2872 {
2873 j = proc_offsets1[i];
2874 proc_offsets1[i] = k;
2875 k += j;
2876 }
2877 proc_offsets1[nprocs] = k;
2878 itmp_array = hypre_ParCSRMatrixRowStarts(A_csr);
2879 for (i = 0; i <= nprocs; i++)
2880 proc_offsets2[i] = (HYPRE_Int)(itmp_array[i] - proc_offsets1[i]);
2881
2882 /* -----------------------------------------------------
2883 * assign id's to row and col for later processing
2884 * ----------------------------------------------------- */
2885
2886 exp_indices = hypre_TAlloc(HYPRE_Int, nrows_A , HYPRE_MEMORY_HOST);
2887 for (i = 0; i < nrows_A; i++) exp_indices[i] = -1;
2888 for (i = 0; i < nindices; i++)
2889 {
2890 if (exp_indices[indices[i]] == -1) exp_indices[indices[i]] = i;
2891 else
2892 {
2893 hypre_error_w_msg(HYPRE_ERROR_GENERIC,"ExtractRowSubmatrices: wrong index %d %d\n");
2894 exit(1);
2895 }
2896 }
2897 k = 0;
2898 for (i = 0; i < nrows_A; i++)
2899 {
2900 if (exp_indices[i] < 0)
2901 {
2902 exp_indices[i] = - k - 1;
2903 k++;
2904 }
2905 }
2906
2907 /* -----------------------------------------------------
2908 * compute number of nonzeros for each block
2909 * ----------------------------------------------------- */
2910
2911 nnz11 = nnz21 = nnz11_offd = nnz21_offd = 0;
2912 for (i = 0; i < nrows_A; i++)
2913 {
2914 if (exp_indices[i] >= 0)
2915 {
2916 for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
2917 {
2918 col = A_diag_j[j];
2919 if (exp_indices[col] >= 0) nnz11++;
2920 }
2921 nnz11_offd += A_offd_i[i+1] - A_offd_i[i];
2922 }
2923 else
2924 {
2925 for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
2926 {
2927 col = A_diag_j[j];
2928 if (exp_indices[col] < 0) nnz21++;
2929 }
2930 nnz21_offd += A_offd_i[i+1] - A_offd_i[i];
2931 }
2932 }
2933
2934 /* -----------------------------------------------------
2935 * create A11 matrix (assume sequential for the moment)
2936 * ----------------------------------------------------- */
2937
2938 ncols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixDiag(A_csr));
2939 nnz_diag = nnz11;
2940 nnz_offd = nnz11_offd;
2941
2942 global_nrows = (HYPRE_BigInt)proc_offsets1[nprocs];
2943 itmp_array = hypre_ParCSRMatrixColStarts(A_csr);
2944 global_ncols = itmp_array[nprocs];
2945 row_starts = hypre_CTAlloc(HYPRE_BigInt, nprocs+1, HYPRE_MEMORY_HOST);
2946 col_starts = hypre_CTAlloc(HYPRE_BigInt, nprocs+1, HYPRE_MEMORY_HOST);
2947 for (i = 0; i <= nprocs; i++)
2948 {
2949 row_starts[i] = (HYPRE_BigInt)proc_offsets1[i];
2950 col_starts[i] = itmp_array[i];
2951 }
2952 A11_csr = hypre_ParCSRMatrixCreate(comm, global_nrows, global_ncols,
2953 row_starts, col_starts, ncols_offd,
2954 nnz_diag, nnz_offd);
2955 nrows = nindices;
2956 diag_i = hypre_CTAlloc(HYPRE_Int, nrows+1, HYPRE_MEMORY_HOST);
2957 diag_j = hypre_CTAlloc(HYPRE_Int, nnz_diag, HYPRE_MEMORY_HOST);
2958 diag_a = hypre_CTAlloc(HYPRE_Complex, nnz_diag, HYPRE_MEMORY_HOST);
2959 nnz = 0;
2960 row = 0;
2961 diag_i[0] = 0;
2962 for (i = 0; i < nrows_A; i++)
2963 {
2964 if (exp_indices[i] >= 0)
2965 {
2966 for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
2967 {
2968 col = A_diag_j[j];
2969 if (exp_indices[col] >= 0)
2970 {
2971 diag_j[nnz] = exp_indices[col];
2972 diag_a[nnz++] = A_diag_a[j];
2973 }
2974 }
2975 row++;
2976 diag_i[row] = nnz;
2977 }
2978 }
2979 diag = hypre_ParCSRMatrixDiag(A11_csr);
2980 hypre_CSRMatrixI(diag) = diag_i;
2981 hypre_CSRMatrixJ(diag) = diag_j;
2982 hypre_CSRMatrixData(diag) = diag_a;
2983
2984 offd_i = hypre_CTAlloc(HYPRE_Int, nrows+1, HYPRE_MEMORY_HOST);
2985 offd_j = hypre_CTAlloc(HYPRE_Int, nnz_offd, HYPRE_MEMORY_HOST);
2986 offd_a = hypre_CTAlloc(HYPRE_Complex, nnz_offd, HYPRE_MEMORY_HOST);
2987 nnz = 0;
2988 row = 0;
2989 offd_i[0] = 0;
2990 for (i = 0; i < nrows_A; i++)
2991 {
2992 if (exp_indices[i] >= 0)
2993 {
2994 for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
2995 {
2996 offd_j[nnz] = A_offd_j[j];
2997 offd_a[nnz++] = A_diag_a[j];
2998 }
2999 row++;
3000 offd_i[row] = nnz;
3001 }
3002 }
3003 offd = hypre_ParCSRMatrixOffd(A11_csr);
3004 hypre_CSRMatrixI(offd) = offd_i;
3005 hypre_CSRMatrixJ(offd) = offd_j;
3006 hypre_CSRMatrixData(offd) = offd_a;
3007 hypre_TFree(row_starts, HYPRE_MEMORY_HOST);
3008 hypre_TFree(col_starts, HYPRE_MEMORY_HOST);
3009
3010 /* -----------------------------------------------------
3011 * create A21 matrix
3012 * ----------------------------------------------------- */
3013
3014 ncols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixDiag(A_csr));
3015 nnz_offd = nnz21_offd;
3016 nnz_diag = nnz21;
3017 global_nrows = (HYPRE_BigInt)proc_offsets2[nprocs];
3018 itmp_array = hypre_ParCSRMatrixColStarts(A_csr);
3019 global_ncols = itmp_array[nprocs];
3020 row_starts = hypre_CTAlloc(HYPRE_BigInt, nprocs+1, HYPRE_MEMORY_HOST);
3021 col_starts = hypre_CTAlloc(HYPRE_BigInt, nprocs+1, HYPRE_MEMORY_HOST);
3022 for (i = 0; i <= nprocs; i++)
3023 {
3024 row_starts[i] = (HYPRE_BigInt)proc_offsets2[i];
3025 col_starts[i] = itmp_array[i];
3026 }
3027 A21_csr = hypre_ParCSRMatrixCreate(comm, global_nrows, global_ncols,
3028 row_starts, col_starts, ncols_offd,
3029 nnz_diag, nnz_offd);
3030 nrows = nrows_A - nindices;
3031 diag_i = hypre_CTAlloc(HYPRE_Int, nrows+1, HYPRE_MEMORY_HOST);
3032 diag_j = hypre_CTAlloc(HYPRE_Int, nnz_diag, HYPRE_MEMORY_HOST);
3033 diag_a = hypre_CTAlloc(HYPRE_Complex, nnz_diag, HYPRE_MEMORY_HOST);
3034 nnz = 0;
3035 row = 0;
3036 diag_i[0] = 0;
3037 for (i = 0; i < nrows_A; i++)
3038 {
3039 if (exp_indices[i] < 0)
3040 {
3041 for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
3042 {
3043 diag_j[nnz] = A_diag_j[j];
3044 diag_a[nnz++] = A_diag_a[j];
3045 }
3046 row++;
3047 diag_i[row] = nnz;
3048 }
3049 }
3050 diag = hypre_ParCSRMatrixDiag(A21_csr);
3051 hypre_CSRMatrixI(diag) = diag_i;
3052 hypre_CSRMatrixJ(diag) = diag_j;
3053 hypre_CSRMatrixData(diag) = diag_a;
3054
3055 offd_i = hypre_CTAlloc(HYPRE_Int, nrows+1, HYPRE_MEMORY_HOST);
3056 offd_j = hypre_CTAlloc(HYPRE_Int, nnz_offd, HYPRE_MEMORY_HOST);
3057 offd_a = hypre_CTAlloc(HYPRE_Complex, nnz_offd, HYPRE_MEMORY_HOST);
3058 nnz = 0;
3059 row = 0;
3060 offd_i[0] = 0;
3061 for (i = 0; i < nrows_A; i++)
3062 {
3063 if (exp_indices[i] < 0)
3064 {
3065 for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
3066 {
3067 offd_j[nnz] = A_offd_j[j];
3068 offd_a[nnz++] = A_diag_a[j];
3069 }
3070 row++;
3071 offd_i[row] = nnz;
3072 }
3073 }
3074 offd = hypre_ParCSRMatrixOffd(A21_csr);
3075 hypre_CSRMatrixI(offd) = offd_i;
3076 hypre_CSRMatrixJ(offd) = offd_j;
3077 hypre_CSRMatrixData(offd) = offd_a;
3078 hypre_TFree(row_starts, HYPRE_MEMORY_HOST);
3079 hypre_TFree(col_starts, HYPRE_MEMORY_HOST);
3080
3081 /* -----------------------------------------------------
3082 * hand the matrices back to the caller and clean up
3083 * ----------------------------------------------------- */
3084
3085 (*submatrices)[0] = A11_csr;
3086 (*submatrices)[1] = A21_csr;
3087 hypre_TFree(proc_offsets1, HYPRE_MEMORY_HOST);
3088 hypre_TFree(proc_offsets2, HYPRE_MEMORY_HOST);
3089 hypre_TFree(exp_indices, HYPRE_MEMORY_HOST);
3090 }
3091
3092 /* -----------------------------------------------------------------------------
3093 * return the sum of all local elements of the matrix
3094 * ----------------------------------------------------------------------------- */
3095
hypre_ParCSRMatrixLocalSumElts(hypre_ParCSRMatrix * A)3096 HYPRE_Complex hypre_ParCSRMatrixLocalSumElts( hypre_ParCSRMatrix * A )
3097 {
3098 hypre_CSRMatrix * A_diag = hypre_ParCSRMatrixDiag( A );
3099 hypre_CSRMatrix * A_offd = hypre_ParCSRMatrixOffd( A );
3100
3101 return hypre_CSRMatrixSumElts(A_diag) + hypre_CSRMatrixSumElts(A_offd);
3102 }
3103
3104 /*--------------------------------------------------------------------------
3105 * hypre_ParCSRMatrixMatAminvDB
3106 * computes C = (A - inv(D)B) where D is a diagonal matrix
3107 * Note: Data structure of A is expected to be a subset of data structure of B!
3108 *--------------------------------------------------------------------------*/
3109
3110 HYPRE_Int
hypre_ParCSRMatrixAminvDB(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * B,HYPRE_Complex * d,hypre_ParCSRMatrix ** C_ptr)3111 hypre_ParCSRMatrixAminvDB( hypre_ParCSRMatrix *A,
3112 hypre_ParCSRMatrix *B,
3113 HYPRE_Complex *d,
3114 hypre_ParCSRMatrix **C_ptr)
3115 {
3116 MPI_Comm comm = hypre_ParCSRMatrixComm(B);
3117 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
3118 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
3119 hypre_ParCSRMatrix *C = NULL;
3120 HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
3121
3122 hypre_ParCSRCommPkg *comm_pkg_B = hypre_ParCSRMatrixCommPkg(B);
3123 hypre_CSRMatrix *B_diag = hypre_ParCSRMatrixDiag(B);
3124 hypre_CSRMatrix *B_offd = hypre_ParCSRMatrixOffd(B);
3125 HYPRE_Int num_cols_offd_B = hypre_CSRMatrixNumCols(B_offd);
3126 HYPRE_Int num_sends_B, num_recvs_B;
3127 HYPRE_Int i, j, cnt;
3128
3129 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
3130 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
3131 HYPRE_Complex *A_diag_data = hypre_CSRMatrixData(A_diag);
3132 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
3133 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
3134 HYPRE_Complex *A_offd_data = hypre_CSRMatrixData(A_offd);
3135 HYPRE_BigInt *col_map_offd_A = hypre_ParCSRMatrixColMapOffd(A);
3136
3137 HYPRE_Int num_rows = hypre_CSRMatrixNumRows(B_diag);
3138 HYPRE_Int *B_diag_i = hypre_CSRMatrixI(B_diag);
3139 HYPRE_Int *B_diag_j = hypre_CSRMatrixJ(B_diag);
3140 HYPRE_Complex *B_diag_data = hypre_CSRMatrixData(B_diag);
3141 HYPRE_Int *B_offd_i = hypre_CSRMatrixI(B_offd);
3142 HYPRE_Int *B_offd_j = hypre_CSRMatrixJ(B_offd);
3143 HYPRE_Complex *B_offd_data = hypre_CSRMatrixData(B_offd);
3144 HYPRE_BigInt *col_map_offd_B = hypre_ParCSRMatrixColMapOffd(B);
3145
3146 hypre_CSRMatrix *C_diag = NULL;
3147 hypre_CSRMatrix *C_offd = NULL;
3148 HYPRE_Int *C_diag_i = NULL;
3149 HYPRE_Int *C_diag_j = NULL;
3150 HYPRE_Complex *C_diag_data = NULL;
3151 HYPRE_Int *C_offd_i = NULL;
3152 HYPRE_Int *C_offd_j = NULL;
3153 HYPRE_Complex *C_offd_data = NULL;
3154
3155 HYPRE_Int num_procs, my_id;
3156
3157 HYPRE_Int *recv_procs_B;
3158 HYPRE_Int *send_procs_B;
3159 HYPRE_Int *recv_vec_starts_B;
3160 HYPRE_Int *send_map_starts_B;
3161 HYPRE_Int *send_map_elmts_B;
3162 hypre_ParCSRCommPkg *comm_pkg_C;
3163 HYPRE_Int *recv_procs_C;
3164 HYPRE_Int *send_procs_C;
3165 HYPRE_Int *recv_vec_starts_C;
3166 HYPRE_Int *send_map_starts_C;
3167 HYPRE_Int *send_map_elmts_C;
3168 HYPRE_Int *map_to_B;
3169
3170 /*HYPRE_Int *C_diag_array;
3171 HYPRE_Int *C_offd_array;*/
3172 HYPRE_Complex *D_tmp;
3173 HYPRE_Int size, rest, num_threads, ii;
3174
3175 hypre_MPI_Comm_size(comm,&num_procs);
3176 hypre_MPI_Comm_rank(comm,&my_id);
3177
3178 num_threads = hypre_NumThreads();
3179 /*C_diag_array = hypre_CTAlloc(HYPRE_Int, num_threads);
3180 C_offd_array = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);*/
3181
3182 /*---------------------------------------------------------------------
3183 * If there exists no CommPkg for B, a CommPkg is generated
3184 *--------------------------------------------------------------------*/
3185
3186 if (!comm_pkg_B)
3187 {
3188 hypre_MatvecCommPkgCreate(B);
3189 comm_pkg_B = hypre_ParCSRMatrixCommPkg(B);
3190 }
3191
3192 C = hypre_ParCSRMatrixClone(B, 0);
3193 /*hypre_ParCSRMatrixInitialize(C);*/
3194
3195 C_diag = hypre_ParCSRMatrixDiag(C);
3196 C_diag_i = hypre_CSRMatrixI(C_diag);
3197 C_diag_j = hypre_CSRMatrixJ(C_diag);
3198 C_diag_data = hypre_CSRMatrixData(C_diag);
3199 C_offd = hypre_ParCSRMatrixOffd(C);
3200 C_offd_i = hypre_CSRMatrixI(C_offd);
3201 C_offd_j = hypre_CSRMatrixJ(C_offd);
3202 C_offd_data = hypre_CSRMatrixData(C_offd);
3203
3204 size = num_rows/num_threads;
3205 rest = num_rows - size*num_threads;
3206
3207 D_tmp = hypre_CTAlloc(HYPRE_Complex, num_rows, HYPRE_MEMORY_HOST);
3208
3209 if (num_cols_offd_A)
3210 {
3211 map_to_B = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A, HYPRE_MEMORY_HOST);
3212 cnt = 0;
3213 for (i = 0; i < num_cols_offd_A; i++)
3214 {
3215 while (col_map_offd_B[cnt] < col_map_offd_A[i])
3216 {
3217 cnt++;
3218 }
3219 map_to_B[i] = cnt;
3220 cnt++;
3221 }
3222 }
3223
3224 #ifdef HYPRE_USING_OPENMP
3225 #pragma omp parallel for private(ii, i, j)
3226 #endif
3227 for (ii=0; ii < num_threads; ii++)
3228 {
3229 HYPRE_Int *A_marker = NULL;
3230 HYPRE_Int ns, ne, A_col, num_cols, nmax;
3231 if (ii < rest)
3232 {
3233 ns = ii*size+ii;
3234 ne = (ii+1)*size+ii+1;
3235 }
3236 else
3237 {
3238 ns = ii*size+rest;
3239 ne = (ii+1)*size+rest;
3240 }
3241 nmax = hypre_max(num_rows, num_cols_offd_B);
3242 A_marker = hypre_CTAlloc(HYPRE_Int, nmax, HYPRE_MEMORY_HOST);
3243
3244 for (i = 0; i < num_rows; i++)
3245 {
3246 A_marker[i] = -1;
3247 }
3248
3249 for (i = ns; i < ne; i++)
3250 {
3251 D_tmp[i] = 1.0/d[i];
3252 }
3253
3254 num_cols = C_diag_i[ns];
3255 for (i = ns; i < ne; i++)
3256 {
3257 for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
3258 {
3259 A_col = A_diag_j[j];
3260 if (A_marker[A_col] < C_diag_i[i])
3261 {
3262 A_marker[A_col] = num_cols;
3263 C_diag_j[num_cols] = A_col;
3264 C_diag_data[num_cols] = A_diag_data[j];
3265 num_cols++;
3266 }
3267 else
3268 {
3269 C_diag_data[A_marker[A_col]] += A_diag_data[j];
3270 }
3271 }
3272 for (j = B_diag_i[i]; j < B_diag_i[i+1]; j++)
3273 {
3274 A_col = B_diag_j[j];
3275 if (A_marker[A_col] < C_diag_i[i])
3276 {
3277 A_marker[A_col] = num_cols;
3278 C_diag_j[num_cols] = A_col;
3279 C_diag_data[num_cols] = -D_tmp[i]*B_diag_data[j];
3280 num_cols++;
3281 }
3282 else
3283 {
3284 C_diag_data[A_marker[A_col]] -= D_tmp[i]*B_diag_data[j];
3285 }
3286 }
3287 }
3288
3289 for (i = 0; i < num_cols_offd_B; i++)
3290 {
3291 A_marker[i] = -1;
3292 }
3293
3294 num_cols = C_offd_i[ns];
3295 for (i = ns; i < ne; i++)
3296 {
3297 for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
3298 {
3299 A_col = map_to_B[A_offd_j[j]];
3300 if (A_marker[A_col] < B_offd_i[i])
3301 {
3302 A_marker[A_col] = num_cols;
3303 C_offd_j[num_cols] = A_col;
3304 C_offd_data[num_cols] = A_offd_data[j];
3305 num_cols++;
3306 }
3307 else
3308 {
3309 C_offd_data[A_marker[A_col]] += A_offd_data[j];
3310 }
3311 }
3312 for (j = B_offd_i[i]; j < B_offd_i[i+1]; j++)
3313 {
3314 A_col = B_offd_j[j];
3315 if (A_marker[A_col] < B_offd_i[i])
3316 {
3317 A_marker[A_col] = num_cols;
3318 C_offd_j[num_cols] = A_col;
3319 C_offd_data[num_cols] = -D_tmp[i]*B_offd_data[j];
3320 num_cols++;
3321 }
3322 else
3323 {
3324 C_offd_data[A_marker[A_col]] -= D_tmp[i]*B_offd_data[j];
3325 }
3326 }
3327 }
3328 hypre_TFree(A_marker, HYPRE_MEMORY_HOST);
3329
3330 } /* end parallel region */
3331
3332 /*for (i=0; i < num_cols_offd_B; i++)
3333 col_map_offd_C[i] = col_map_offd_B[i]; */
3334
3335 num_sends_B = hypre_ParCSRCommPkgNumSends(comm_pkg_B);
3336 num_recvs_B = hypre_ParCSRCommPkgNumRecvs(comm_pkg_B);
3337 recv_procs_B = hypre_ParCSRCommPkgRecvProcs(comm_pkg_B);
3338 recv_vec_starts_B = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_B);
3339 send_procs_B = hypre_ParCSRCommPkgSendProcs(comm_pkg_B);
3340 send_map_starts_B = hypre_ParCSRCommPkgSendMapStarts(comm_pkg_B);
3341 send_map_elmts_B = hypre_ParCSRCommPkgSendMapElmts(comm_pkg_B);
3342
3343 recv_procs_C = hypre_CTAlloc(HYPRE_Int, num_recvs_B, HYPRE_MEMORY_HOST);
3344 recv_vec_starts_C = hypre_CTAlloc(HYPRE_Int, num_recvs_B+1, HYPRE_MEMORY_HOST);
3345 send_procs_C = hypre_CTAlloc(HYPRE_Int, num_sends_B, HYPRE_MEMORY_HOST);
3346 send_map_starts_C = hypre_CTAlloc(HYPRE_Int, num_sends_B+1, HYPRE_MEMORY_HOST);
3347 send_map_elmts_C = hypre_CTAlloc(HYPRE_Int, send_map_starts_B[num_sends_B], HYPRE_MEMORY_HOST);
3348
3349 for (i=0; i < num_recvs_B; i++)
3350 recv_procs_C[i] = recv_procs_B[i];
3351 for (i=0; i < num_recvs_B+1; i++)
3352 recv_vec_starts_C[i] = recv_vec_starts_B[i];
3353 for (i=0; i < num_sends_B; i++)
3354 send_procs_C[i] = send_procs_B[i];
3355 for (i=0; i < num_sends_B+1; i++)
3356 send_map_starts_C[i] = send_map_starts_B[i];
3357 for (i=0; i < send_map_starts_B[num_sends_B]; i++)
3358 send_map_elmts_C[i] = send_map_elmts_B[i];
3359
3360 comm_pkg_C = hypre_CTAlloc(hypre_ParCSRCommPkg, 1, HYPRE_MEMORY_HOST);
3361 hypre_ParCSRCommPkgComm(comm_pkg_C) = comm;
3362 hypre_ParCSRCommPkgNumRecvs(comm_pkg_C) = num_recvs_B;
3363 hypre_ParCSRCommPkgRecvProcs(comm_pkg_C) = recv_procs_C;
3364 hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_C) = recv_vec_starts_C;
3365 hypre_ParCSRCommPkgNumSends(comm_pkg_C) = num_sends_B;
3366 hypre_ParCSRCommPkgSendProcs(comm_pkg_C) = send_procs_C;
3367 hypre_ParCSRCommPkgSendMapStarts(comm_pkg_C) = send_map_starts_C;
3368 hypre_ParCSRCommPkgSendMapElmts(comm_pkg_C) = send_map_elmts_C;
3369
3370 hypre_ParCSRMatrixCommPkg(C) = comm_pkg_C;
3371
3372 hypre_TFree(D_tmp, HYPRE_MEMORY_HOST);
3373 if (num_cols_offd_A) hypre_TFree(map_to_B, HYPRE_MEMORY_HOST);
3374
3375 *C_ptr = C;
3376
3377 return (hypre_error_flag);
3378 }
3379
3380 /*--------------------------------------------------------------------------
3381 * hypre_ParTMatmul:
3382 *
3383 * Multiplies two ParCSRMatrices transpose(A) and B and returns
3384 * the product in ParCSRMatrix C
3385 *
3386 * Note that C does not own the partitionings since its row_starts
3387 * is owned by A and col_starts by B.
3388 *--------------------------------------------------------------------------*/
3389
3390 hypre_ParCSRMatrix*
hypre_ParTMatmul(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * B)3391 hypre_ParTMatmul( hypre_ParCSRMatrix *A,
3392 hypre_ParCSRMatrix *B)
3393 {
3394 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
3395 hypre_ParCSRCommPkg *comm_pkg_A = hypre_ParCSRMatrixCommPkg(A);
3396
3397 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
3398 hypre_CSRMatrix *AT_diag = NULL;
3399
3400 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
3401 hypre_CSRMatrix *AT_offd = NULL;
3402
3403 HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
3404 HYPRE_Int num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag);
3405
3406 hypre_CSRMatrix *B_diag = hypre_ParCSRMatrixDiag(B);
3407
3408 hypre_CSRMatrix *B_offd = hypre_ParCSRMatrixOffd(B);
3409 HYPRE_BigInt *col_map_offd_B = hypre_ParCSRMatrixColMapOffd(B);
3410
3411 HYPRE_BigInt first_col_diag_B = hypre_ParCSRMatrixFirstColDiag(B);
3412 HYPRE_BigInt *col_starts_A = hypre_ParCSRMatrixColStarts(A);
3413 HYPRE_BigInt *col_starts_B = hypre_ParCSRMatrixColStarts(B);
3414 HYPRE_Int num_rows_diag_B = hypre_CSRMatrixNumRows(B_diag);
3415 HYPRE_Int num_cols_diag_B = hypre_CSRMatrixNumCols(B_diag);
3416 HYPRE_Int num_cols_offd_B = hypre_CSRMatrixNumCols(B_offd);
3417
3418 hypre_ParCSRMatrix *C;
3419 HYPRE_BigInt *col_map_offd_C = NULL;
3420 HYPRE_Int *map_B_to_C;
3421
3422 hypre_CSRMatrix *C_diag = NULL;
3423 hypre_CSRMatrix *C_tmp_diag = NULL;
3424
3425 HYPRE_Complex *C_diag_data = NULL;
3426 HYPRE_Int *C_diag_i = NULL;
3427 HYPRE_Int *C_diag_j = NULL;
3428 HYPRE_BigInt first_col_diag_C;
3429 HYPRE_BigInt last_col_diag_C;
3430
3431 hypre_CSRMatrix *C_offd = NULL;
3432 hypre_CSRMatrix *C_tmp_offd = NULL;
3433 hypre_CSRMatrix *C_int = NULL;
3434 hypre_CSRMatrix *C_ext = NULL;
3435 HYPRE_Int *C_ext_i;
3436 HYPRE_BigInt *C_ext_j;
3437 HYPRE_Complex *C_ext_data;
3438 HYPRE_Int *C_ext_diag_i;
3439 HYPRE_Int *C_ext_diag_j;
3440 HYPRE_Complex *C_ext_diag_data;
3441 HYPRE_Int *C_ext_offd_i;
3442 HYPRE_Int *C_ext_offd_j;
3443 HYPRE_Complex *C_ext_offd_data;
3444 HYPRE_Int C_ext_size = 0;
3445 HYPRE_Int C_ext_diag_size = 0;
3446 HYPRE_Int C_ext_offd_size = 0;
3447
3448 HYPRE_Int *C_tmp_diag_i;
3449 HYPRE_Int *C_tmp_diag_j;
3450 HYPRE_Complex *C_tmp_diag_data;
3451 HYPRE_Int *C_tmp_offd_i;
3452 HYPRE_Int *C_tmp_offd_j;
3453 HYPRE_Complex *C_tmp_offd_data;
3454
3455 HYPRE_Complex *C_offd_data=NULL;
3456 HYPRE_Int *C_offd_i=NULL;
3457 HYPRE_Int *C_offd_j=NULL;
3458
3459 HYPRE_BigInt *temp;
3460 HYPRE_Int *send_map_starts_A;
3461 HYPRE_Int *send_map_elmts_A;
3462 HYPRE_Int num_sends_A;
3463
3464 HYPRE_Int num_cols_offd_C = 0;
3465
3466 HYPRE_Int *P_marker;
3467
3468 HYPRE_Int i, j;
3469 HYPRE_Int i1, j_indx;
3470
3471 HYPRE_BigInt nrows_A, ncols_A;
3472 HYPRE_BigInt nrows_B, ncols_B;
3473 /*HYPRE_Int allsquare = 0;*/
3474 HYPRE_Int cnt, cnt_offd, cnt_diag;
3475 HYPRE_BigInt value;
3476 HYPRE_Int num_procs, my_id;
3477 HYPRE_Int max_num_threads;
3478 HYPRE_Int *C_diag_array = NULL;
3479 HYPRE_Int *C_offd_array = NULL;
3480
3481 HYPRE_BigInt first_row_index, first_col_diag;
3482 HYPRE_Int local_num_rows, local_num_cols;
3483
3484 nrows_A = hypre_ParCSRMatrixGlobalNumRows(A);
3485 ncols_A = hypre_ParCSRMatrixGlobalNumCols(A);
3486 nrows_B = hypre_ParCSRMatrixGlobalNumRows(B);
3487 ncols_B = hypre_ParCSRMatrixGlobalNumCols(B);
3488
3489 hypre_MPI_Comm_size(comm,&num_procs);
3490 hypre_MPI_Comm_rank(comm, &my_id);
3491 max_num_threads = hypre_NumThreads();
3492
3493 if (nrows_A != nrows_B || num_rows_diag_A != num_rows_diag_B)
3494 {
3495 hypre_error_w_msg(HYPRE_ERROR_GENERIC," Error! Incompatible matrix dimensions!\n");
3496 return NULL;
3497 }
3498
3499 HYPRE_MemoryLocation memory_location_A = hypre_ParCSRMatrixMemoryLocation(A);
3500 HYPRE_MemoryLocation memory_location_B = hypre_ParCSRMatrixMemoryLocation(B);
3501
3502 /* RL: TODO cannot guarantee, maybe should never assert
3503 hypre_assert(memory_location_A == memory_location_B);
3504 */
3505
3506 /* RL: in the case of A=H, B=D, or A=D, B=H, let C = D,
3507 * not sure if this is the right thing to do.
3508 * Also, need something like this in other places
3509 * TODO */
3510 HYPRE_MemoryLocation memory_location_C = hypre_max(memory_location_A, memory_location_B);
3511
3512 /*if (num_cols_diag_A == num_cols_diag_B) allsquare = 1;*/
3513
3514 /*---------------------------------------------------------------------
3515 * If there exists no CommPkg for A, a CommPkg is generated using
3516 * equally load balanced partitionings
3517 *--------------------------------------------------------------------*/
3518
3519 HYPRE_ANNOTATE_FUNC_BEGIN;
3520
3521 if (!comm_pkg_A)
3522 {
3523 hypre_MatvecCommPkgCreate(A);
3524 comm_pkg_A = hypre_ParCSRMatrixCommPkg(A);
3525 }
3526
3527 hypre_CSRMatrixTranspose(A_diag, &AT_diag, 1);
3528 hypre_CSRMatrixTranspose(A_offd, &AT_offd, 1);
3529
3530 C_tmp_diag = hypre_CSRMatrixMultiply(AT_diag, B_diag);
3531 C_ext_size = 0;
3532 if (num_procs > 1)
3533 {
3534 hypre_CSRMatrix *C_int_diag;
3535 hypre_CSRMatrix *C_int_offd;
3536 void *request;
3537
3538 C_tmp_offd = hypre_CSRMatrixMultiply(AT_diag, B_offd);
3539 C_int_diag = hypre_CSRMatrixMultiply(AT_offd, B_diag);
3540 C_int_offd = hypre_CSRMatrixMultiply(AT_offd, B_offd);
3541 hypre_ParCSRMatrixDiag(B) = C_int_diag;
3542 hypre_ParCSRMatrixOffd(B) = C_int_offd;
3543 C_int = hypre_MergeDiagAndOffd(B);
3544 hypre_ParCSRMatrixDiag(B) = B_diag;
3545 hypre_ParCSRMatrixOffd(B) = B_offd;
3546 hypre_ExchangeExternalRowsInit(C_int, comm_pkg_A, &request);
3547 C_ext = hypre_ExchangeExternalRowsWait(request);
3548 C_ext_i = hypre_CSRMatrixI(C_ext);
3549 C_ext_j = hypre_CSRMatrixBigJ(C_ext);
3550 C_ext_data = hypre_CSRMatrixData(C_ext);
3551 C_ext_size = C_ext_i[hypre_CSRMatrixNumRows(C_ext)];
3552
3553 hypre_CSRMatrixDestroy(C_int);
3554 hypre_CSRMatrixDestroy(C_int_diag);
3555 hypre_CSRMatrixDestroy(C_int_offd);
3556 }
3557 else
3558 {
3559 C_tmp_offd = hypre_CSRMatrixCreate(num_cols_diag_A, 0, 0);
3560 hypre_CSRMatrixInitialize(C_tmp_offd);
3561 hypre_CSRMatrixNumRownnz(C_tmp_offd) = 0;
3562 }
3563 hypre_CSRMatrixDestroy(AT_diag);
3564 hypre_CSRMatrixDestroy(AT_offd);
3565
3566 /*-----------------------------------------------------------------------
3567 * Add contents of C_ext to C_tmp_diag and C_tmp_offd
3568 * to obtain C_diag and C_offd
3569 *-----------------------------------------------------------------------*/
3570
3571 /* check for new nonzero columns in C_offd generated through C_ext */
3572
3573 first_col_diag_C = first_col_diag_B;
3574 last_col_diag_C = first_col_diag_B + (HYPRE_BigInt)num_cols_diag_B - 1;
3575
3576 C_tmp_diag_i = hypre_CSRMatrixI(C_tmp_diag);
3577 if (C_ext_size || num_cols_offd_B)
3578 {
3579 HYPRE_Int C_ext_num_rows;
3580
3581 num_sends_A = hypre_ParCSRCommPkgNumSends(comm_pkg_A);
3582 send_map_starts_A = hypre_ParCSRCommPkgSendMapStarts(comm_pkg_A);
3583 send_map_elmts_A = hypre_ParCSRCommPkgSendMapElmts(comm_pkg_A);
3584 C_ext_num_rows = send_map_starts_A[num_sends_A];
3585
3586 C_ext_diag_i = hypre_CTAlloc(HYPRE_Int, C_ext_num_rows+1, HYPRE_MEMORY_HOST);
3587 C_ext_offd_i = hypre_CTAlloc(HYPRE_Int, C_ext_num_rows+1, HYPRE_MEMORY_HOST);
3588 temp = hypre_CTAlloc(HYPRE_BigInt, C_ext_size+num_cols_offd_B, HYPRE_MEMORY_HOST);
3589 C_ext_diag_size = 0;
3590 C_ext_offd_size = 0;
3591 for (i = 0; i < C_ext_num_rows; i++)
3592 {
3593 for (j = C_ext_i[i]; j < C_ext_i[i+1]; j++)
3594 {
3595 if (C_ext_j[j] < first_col_diag_C ||
3596 C_ext_j[j] > last_col_diag_C)
3597 {
3598 temp[C_ext_offd_size++] = C_ext_j[j];
3599 }
3600 else
3601 {
3602 C_ext_diag_size++;
3603 }
3604 }
3605 C_ext_diag_i[i+1] = C_ext_diag_size;
3606 C_ext_offd_i[i+1] = C_ext_offd_size;
3607 }
3608 cnt = C_ext_offd_size;
3609 for (i = 0; i < num_cols_offd_B; i++)
3610 {
3611 temp[cnt++] = col_map_offd_B[i];
3612 }
3613
3614 if (cnt)
3615 {
3616 hypre_BigQsort0(temp,0,cnt-1);
3617 value = temp[0];
3618 num_cols_offd_C = 1;
3619 for (i = 1; i < cnt; i++)
3620 {
3621 if (temp[i] > value)
3622 {
3623 value = temp[i];
3624 temp[num_cols_offd_C++] = value;
3625 }
3626 }
3627 }
3628
3629 if (num_cols_offd_C)
3630 {
3631 col_map_offd_C = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_C, HYPRE_MEMORY_HOST);
3632 }
3633 for (i = 0; i < num_cols_offd_C; i++)
3634 {
3635 col_map_offd_C[i] = temp[i];
3636 }
3637
3638 hypre_TFree(temp, HYPRE_MEMORY_HOST);
3639
3640 if (C_ext_diag_size)
3641 {
3642 C_ext_diag_j = hypre_CTAlloc(HYPRE_Int, C_ext_diag_size, HYPRE_MEMORY_HOST);
3643 C_ext_diag_data = hypre_CTAlloc(HYPRE_Complex, C_ext_diag_size, HYPRE_MEMORY_HOST);
3644 }
3645 if (C_ext_offd_size)
3646 {
3647 C_ext_offd_j = hypre_CTAlloc(HYPRE_Int, C_ext_offd_size, HYPRE_MEMORY_HOST);
3648 C_ext_offd_data = hypre_CTAlloc(HYPRE_Complex, C_ext_offd_size, HYPRE_MEMORY_HOST);
3649 }
3650
3651 C_tmp_diag_j = hypre_CSRMatrixJ(C_tmp_diag);
3652 C_tmp_diag_data = hypre_CSRMatrixData(C_tmp_diag);
3653
3654 C_tmp_offd_i = hypre_CSRMatrixI(C_tmp_offd);
3655 C_tmp_offd_j = hypre_CSRMatrixJ(C_tmp_offd);
3656 C_tmp_offd_data = hypre_CSRMatrixData(C_tmp_offd);
3657
3658 cnt_offd = 0;
3659 cnt_diag = 0;
3660 for (i = 0; i < C_ext_num_rows; i++)
3661 {
3662 for (j = C_ext_i[i]; j < C_ext_i[i+1]; j++)
3663 {
3664 if (C_ext_j[j] < first_col_diag_C ||
3665 C_ext_j[j] > last_col_diag_C)
3666 {
3667 C_ext_offd_j[cnt_offd] = hypre_BigBinarySearch(col_map_offd_C,
3668 C_ext_j[j],
3669 num_cols_offd_C);
3670 C_ext_offd_data[cnt_offd++] = C_ext_data[j];
3671 }
3672 else
3673 {
3674 C_ext_diag_j[cnt_diag] = (HYPRE_Int)(C_ext_j[j] - first_col_diag_C);
3675 C_ext_diag_data[cnt_diag++] = C_ext_data[j];
3676 }
3677 }
3678 }
3679 }
3680
3681 if (C_ext)
3682 {
3683 hypre_CSRMatrixDestroy(C_ext);
3684 C_ext = NULL;
3685 }
3686
3687 if (num_cols_offd_B)
3688 {
3689 map_B_to_C = hypre_CTAlloc(HYPRE_Int, num_cols_offd_B, HYPRE_MEMORY_HOST);
3690
3691 cnt = 0;
3692 for (i = 0; i < num_cols_offd_C; i++)
3693 {
3694 if (col_map_offd_C[i] == col_map_offd_B[cnt])
3695 {
3696 map_B_to_C[cnt++] = i;
3697 if (cnt == num_cols_offd_B) break;
3698 }
3699 }
3700 for (i = 0; i < hypre_CSRMatrixI(C_tmp_offd)[hypre_CSRMatrixNumRows(C_tmp_offd)]; i++)
3701 {
3702 j_indx = C_tmp_offd_j[i];
3703 C_tmp_offd_j[i] = map_B_to_C[j_indx];
3704 }
3705 }
3706
3707 /*-----------------------------------------------------------------------
3708 * Need to compute:
3709 * C_diag = C_tmp_diag + C_ext_diag
3710 * C_offd = C_tmp_offd + C_ext_offd
3711 *
3712 * First generate structure
3713 *-----------------------------------------------------------------------*/
3714
3715 if (C_ext_size || num_cols_offd_B)
3716 {
3717 C_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_diag_A+1, memory_location_C);
3718 C_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_diag_A+1, memory_location_C);
3719
3720 C_diag_array = hypre_CTAlloc(HYPRE_Int, max_num_threads, HYPRE_MEMORY_HOST);
3721 C_offd_array = hypre_CTAlloc(HYPRE_Int, max_num_threads, HYPRE_MEMORY_HOST);
3722
3723 #ifdef HYPRE_USING_OPENMP
3724 #pragma omp parallel
3725 #endif
3726 {
3727 HYPRE_Int *B_marker = NULL;
3728 HYPRE_Int *B_marker_offd = NULL;
3729 HYPRE_Int ik, jk, j1, j2, jcol;
3730 HYPRE_Int ns, ne, ii, nnz_d, nnz_o;
3731 HYPRE_Int rest, size;
3732 HYPRE_Int num_threads = hypre_NumActiveThreads();
3733
3734 size = num_cols_diag_A/num_threads;
3735 rest = num_cols_diag_A - size*num_threads;
3736 ii = hypre_GetThreadNum();
3737 if (ii < rest)
3738 {
3739 ns = ii*size+ii;
3740 ne = (ii+1)*size+ii+1;
3741 }
3742 else
3743 {
3744 ns = ii*size+rest;
3745 ne = (ii+1)*size+rest;
3746 }
3747
3748 B_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_B, HYPRE_MEMORY_HOST);
3749 B_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_C, HYPRE_MEMORY_HOST);
3750
3751 for (ik = 0; ik < num_cols_diag_B; ik++)
3752 {
3753 B_marker[ik] = -1;
3754 }
3755
3756 for (ik = 0; ik < num_cols_offd_C; ik++)
3757 {
3758 B_marker_offd[ik] = -1;
3759 }
3760
3761 nnz_d = 0;
3762 nnz_o = 0;
3763 for (ik = ns; ik < ne; ik++)
3764 {
3765 for (jk = C_tmp_diag_i[ik]; jk < C_tmp_diag_i[ik+1]; jk++)
3766 {
3767 jcol = C_tmp_diag_j[jk];
3768 B_marker[jcol] = ik;
3769 nnz_d++;
3770 }
3771
3772 for (jk = C_tmp_offd_i[ik]; jk < C_tmp_offd_i[ik+1]; jk++)
3773 {
3774 jcol = C_tmp_offd_j[jk];
3775 B_marker_offd[jcol] = ik;
3776 nnz_o++;
3777 }
3778
3779 for (jk = 0; jk < num_sends_A; jk++)
3780 {
3781 for (j1 = send_map_starts_A[jk]; j1 < send_map_starts_A[jk+1]; j1++)
3782 {
3783 if (send_map_elmts_A[j1] == ik)
3784 {
3785 for (j2 = C_ext_diag_i[j1]; j2 < C_ext_diag_i[j1+1]; j2++)
3786 {
3787 jcol = C_ext_diag_j[j2];
3788 if (B_marker[jcol] < ik)
3789 {
3790 B_marker[jcol] = ik;
3791 nnz_d++;
3792 }
3793 }
3794 for (j2 = C_ext_offd_i[j1]; j2 < C_ext_offd_i[j1+1]; j2++)
3795 {
3796 jcol = C_ext_offd_j[j2];
3797 if (B_marker_offd[jcol] < ik)
3798 {
3799 B_marker_offd[jcol] = ik;
3800 nnz_o++;
3801 }
3802 }
3803 break;
3804 }
3805 }
3806 }
3807 C_diag_array[ii] = nnz_d;
3808 C_offd_array[ii] = nnz_o;
3809 }
3810 #ifdef HYPRE_USING_OPENMP
3811 #pragma omp barrier
3812 #endif
3813
3814 if (ii == 0)
3815 {
3816 nnz_d = 0;
3817 nnz_o = 0;
3818 for (ik = 0; ik < num_threads-1; ik++)
3819 {
3820 C_diag_array[ik+1] += C_diag_array[ik];
3821 C_offd_array[ik+1] += C_offd_array[ik];
3822 }
3823 nnz_d = C_diag_array[num_threads-1];
3824 nnz_o = C_offd_array[num_threads-1];
3825 C_diag_i[num_cols_diag_A] = nnz_d;
3826 C_offd_i[num_cols_diag_A] = nnz_o;
3827
3828 C_diag = hypre_CSRMatrixCreate(num_cols_diag_A, num_cols_diag_A, nnz_d);
3829 C_offd = hypre_CSRMatrixCreate(num_cols_diag_A, num_cols_offd_C, nnz_o);
3830 hypre_CSRMatrixI(C_diag) = C_diag_i;
3831 hypre_CSRMatrixInitialize_v2(C_diag, 0, memory_location_C);
3832 C_diag_j = hypre_CSRMatrixJ(C_diag);
3833 C_diag_data = hypre_CSRMatrixData(C_diag);
3834 hypre_CSRMatrixI(C_offd) = C_offd_i;
3835 hypre_CSRMatrixInitialize_v2(C_offd, 0, memory_location_C);
3836 C_offd_j = hypre_CSRMatrixJ(C_offd);
3837 C_offd_data = hypre_CSRMatrixData(C_offd);
3838 }
3839 #ifdef HYPRE_USING_OPENMP
3840 #pragma omp barrier
3841 #endif
3842
3843 /*-----------------------------------------------------------------------
3844 * Need to compute C_diag = C_tmp_diag + C_ext_diag
3845 * and C_offd = C_tmp_offd + C_ext_offd !!!!
3846 * Now fill in values
3847 *-----------------------------------------------------------------------*/
3848
3849 for (ik = 0; ik < num_cols_diag_B; ik++)
3850 {
3851 B_marker[ik] = -1;
3852 }
3853
3854 for (ik = 0; ik < num_cols_offd_C; ik++)
3855 {
3856 B_marker_offd[ik] = -1;
3857 }
3858
3859 /*-----------------------------------------------------------------------
3860 * Populate matrices
3861 *-----------------------------------------------------------------------*/
3862
3863 nnz_d = 0;
3864 nnz_o = 0;
3865 if (ii)
3866 {
3867 nnz_d = C_diag_array[ii-1];
3868 nnz_o = C_offd_array[ii-1];
3869 }
3870 for (ik = ns; ik < ne; ik++)
3871 {
3872 C_diag_i[ik] = nnz_d;
3873 C_offd_i[ik] = nnz_o;
3874 for (jk = C_tmp_diag_i[ik]; jk < C_tmp_diag_i[ik+1]; jk++)
3875 {
3876 jcol = C_tmp_diag_j[jk];
3877 C_diag_j[nnz_d] = jcol;
3878 C_diag_data[nnz_d] = C_tmp_diag_data[jk];
3879 B_marker[jcol] = nnz_d;
3880 nnz_d++;
3881 }
3882
3883 for (jk = C_tmp_offd_i[ik]; jk < C_tmp_offd_i[ik+1]; jk++)
3884 {
3885 jcol = C_tmp_offd_j[jk];
3886 C_offd_j[nnz_o] = jcol;
3887 C_offd_data[nnz_o] = C_tmp_offd_data[jk];
3888 B_marker_offd[jcol] = nnz_o;
3889 nnz_o++;
3890 }
3891
3892 for (jk = 0; jk < num_sends_A; jk++)
3893 {
3894 for (j1 = send_map_starts_A[jk]; j1 < send_map_starts_A[jk+1]; j1++)
3895 {
3896 if (send_map_elmts_A[j1] == ik)
3897 {
3898 for (j2 = C_ext_diag_i[j1]; j2 < C_ext_diag_i[j1+1]; j2++)
3899 {
3900 jcol = C_ext_diag_j[j2];
3901 if (B_marker[jcol] < C_diag_i[ik])
3902 {
3903 C_diag_j[nnz_d] = jcol;
3904 C_diag_data[nnz_d] = C_ext_diag_data[j2];
3905 B_marker[jcol] = nnz_d;
3906 nnz_d++;
3907 }
3908 else
3909 {
3910 C_diag_data[B_marker[jcol]] += C_ext_diag_data[j2];
3911 }
3912 }
3913 for (j2 = C_ext_offd_i[j1]; j2 < C_ext_offd_i[j1+1]; j2++)
3914 {
3915 jcol = C_ext_offd_j[j2];
3916 if (B_marker_offd[jcol] < C_offd_i[ik])
3917 {
3918 C_offd_j[nnz_o] = jcol;
3919 C_offd_data[nnz_o] = C_ext_offd_data[j2];
3920 B_marker_offd[jcol] = nnz_o;
3921 nnz_o++;
3922 }
3923 else
3924 {
3925 C_offd_data[B_marker_offd[jcol]] += C_ext_offd_data[j2];
3926 }
3927 }
3928 break;
3929 }
3930 }
3931 }
3932 }
3933 hypre_TFree(B_marker, HYPRE_MEMORY_HOST);
3934 hypre_TFree(B_marker_offd, HYPRE_MEMORY_HOST);
3935
3936 } /*end parallel region */
3937
3938 hypre_TFree(C_diag_array, HYPRE_MEMORY_HOST);
3939 hypre_TFree(C_offd_array, HYPRE_MEMORY_HOST);
3940 }
3941
3942 /*C = hypre_ParCSRMatrixCreate(comm, ncols_A, ncols_B, col_starts_A,
3943 col_starts_B, num_cols_offd_C, nnz_diag, nnz_offd);
3944
3945 hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(C));
3946 hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(C)); */
3947 /* row_starts[0] is start of local rows. row_starts[1] is start of next
3948 processor's rows */
3949 first_row_index = col_starts_A[0];
3950 local_num_rows = (HYPRE_Int)(col_starts_A[1]-first_row_index );
3951 first_col_diag = col_starts_B[0];
3952 local_num_cols = (HYPRE_Int)(col_starts_B[1]-first_col_diag);
3953
3954 C = hypre_CTAlloc(hypre_ParCSRMatrix, 1, HYPRE_MEMORY_HOST);
3955 hypre_ParCSRMatrixComm(C) = comm;
3956 hypre_ParCSRMatrixGlobalNumRows(C) = ncols_A;
3957 hypre_ParCSRMatrixGlobalNumCols(C) = ncols_B;
3958 hypre_ParCSRMatrixFirstRowIndex(C) = first_row_index;
3959 hypre_ParCSRMatrixFirstColDiag(C) = first_col_diag;
3960 hypre_ParCSRMatrixLastRowIndex(C) = first_row_index + (HYPRE_BigInt)local_num_rows - 1;
3961 hypre_ParCSRMatrixLastColDiag(C) = first_col_diag + (HYPRE_BigInt)local_num_cols - 1;
3962 hypre_ParCSRMatrixColMapOffd(C) = NULL;
3963 hypre_ParCSRMatrixAssumedPartition(C) = NULL;
3964 hypre_ParCSRMatrixCommPkg(C) = NULL;
3965 hypre_ParCSRMatrixCommPkgT(C) = NULL;
3966
3967 /* C row/col starts*/
3968 hypre_ParCSRMatrixRowStarts(C)[0] = col_starts_A[0];
3969 hypre_ParCSRMatrixRowStarts(C)[1] = col_starts_A[1];
3970 hypre_ParCSRMatrixColStarts(C)[0] = col_starts_B[0];
3971 hypre_ParCSRMatrixColStarts(C)[1] = col_starts_B[1];
3972
3973 /* set defaults */
3974 hypre_ParCSRMatrixOwnsData(C) = 1;
3975 hypre_ParCSRMatrixRowindices(C) = NULL;
3976 hypre_ParCSRMatrixRowvalues(C) = NULL;
3977 hypre_ParCSRMatrixGetrowactive(C) = 0;
3978
3979 if (C_diag)
3980 {
3981 hypre_CSRMatrixSetRownnz(C_diag);
3982 hypre_ParCSRMatrixDiag(C) = C_diag;
3983 }
3984 else
3985 {
3986 hypre_ParCSRMatrixDiag(C) = C_tmp_diag;
3987 }
3988
3989 if (C_offd)
3990 {
3991 hypre_CSRMatrixSetRownnz(C_offd);
3992 hypre_ParCSRMatrixOffd(C) = C_offd;
3993 }
3994 else
3995 {
3996 hypre_ParCSRMatrixOffd(C) = C_tmp_offd;
3997 }
3998
3999 hypre_CSRMatrixMemoryLocation(hypre_ParCSRMatrixDiag(C)) = memory_location_C;
4000 hypre_CSRMatrixMemoryLocation(hypre_ParCSRMatrixOffd(C)) = memory_location_C;
4001
4002 if (num_cols_offd_C)
4003 {
4004 HYPRE_Int jj_count_offd, nnz_offd;
4005 HYPRE_BigInt *new_col_map_offd_C = NULL;
4006
4007 P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_offd_C, HYPRE_MEMORY_HOST);
4008 for (i = 0; i < num_cols_offd_C; i++)
4009 {
4010 P_marker[i] = -1;
4011 }
4012
4013 jj_count_offd = 0;
4014 nnz_offd = C_offd_i[num_cols_diag_A];
4015 for (i = 0; i < nnz_offd; i++)
4016 {
4017 i1 = C_offd_j[i];
4018 if (P_marker[i1])
4019 {
4020 P_marker[i1] = 0;
4021 jj_count_offd++;
4022 }
4023 }
4024
4025 if (jj_count_offd < num_cols_offd_C)
4026 {
4027 new_col_map_offd_C = hypre_CTAlloc(HYPRE_BigInt, jj_count_offd, HYPRE_MEMORY_HOST);
4028 jj_count_offd = 0;
4029 for (i = 0; i < num_cols_offd_C; i++)
4030 {
4031 if (!P_marker[i])
4032 {
4033 P_marker[i] = jj_count_offd;
4034 new_col_map_offd_C[jj_count_offd++] = col_map_offd_C[i];
4035 }
4036 }
4037
4038 for (i = 0; i < nnz_offd; i++)
4039 {
4040 i1 = C_offd_j[i];
4041 C_offd_j[i] = P_marker[i1];
4042 }
4043
4044 num_cols_offd_C = jj_count_offd;
4045 hypre_TFree(col_map_offd_C, HYPRE_MEMORY_HOST);
4046 col_map_offd_C = new_col_map_offd_C;
4047 hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(C)) = num_cols_offd_C;
4048 }
4049 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
4050 }
4051
4052 hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;
4053
4054 /*-----------------------------------------------------------------------
4055 * Free various arrays
4056 *-----------------------------------------------------------------------*/
4057 if (C_ext_size || num_cols_offd_B)
4058 {
4059 hypre_TFree(C_ext_diag_i, HYPRE_MEMORY_HOST);
4060 hypre_TFree(C_ext_offd_i, HYPRE_MEMORY_HOST);
4061 }
4062
4063 if (C_ext_diag_size)
4064 {
4065 hypre_TFree(C_ext_diag_j, HYPRE_MEMORY_HOST);
4066 hypre_TFree(C_ext_diag_data, HYPRE_MEMORY_HOST);
4067 }
4068
4069 if (C_ext_offd_size)
4070 {
4071 hypre_TFree(C_ext_offd_j, HYPRE_MEMORY_HOST);
4072 hypre_TFree(C_ext_offd_data, HYPRE_MEMORY_HOST);
4073 }
4074
4075 if (num_cols_offd_B)
4076 {
4077 hypre_TFree(map_B_to_C, HYPRE_MEMORY_HOST);
4078 }
4079
4080 if (C_diag)
4081 {
4082 hypre_CSRMatrixDestroy(C_tmp_diag);
4083 }
4084
4085 if (C_offd)
4086 {
4087 hypre_CSRMatrixDestroy(C_tmp_offd);
4088 }
4089
4090 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
4091 if ( hypre_GetExecPolicy2(memory_location_A, memory_location_B) == HYPRE_EXEC_DEVICE )
4092 {
4093 hypre_CSRMatrixMoveDiagFirstDevice(hypre_ParCSRMatrixDiag(C));
4094 hypre_SyncCudaComputeStream(hypre_handle());
4095 }
4096 #endif
4097
4098 HYPRE_ANNOTATE_FUNC_END;
4099
4100 return C;
4101 }
4102
4103 /*--------------------------------------------------------------------------
4104 * hypre_ParvecBdiagInvScal
4105 *--------------------------------------------------------------------------*/
4106
4107 HYPRE_Int
hypre_ParvecBdiagInvScal(hypre_ParVector * b,HYPRE_Int blockSize,hypre_ParVector ** bs,hypre_ParCSRMatrix * A)4108 hypre_ParvecBdiagInvScal( hypre_ParVector *b,
4109 HYPRE_Int blockSize,
4110 hypre_ParVector **bs,
4111 hypre_ParCSRMatrix *A)
4112 {
4113 MPI_Comm comm = hypre_ParCSRMatrixComm(b);
4114 HYPRE_Int num_procs, my_id;
4115 hypre_MPI_Comm_rank(comm, &my_id);
4116 hypre_MPI_Comm_size(comm, &num_procs);
4117
4118 HYPRE_Int i, j, s, block_start, block_end;
4119 HYPRE_BigInt nrow_global = hypre_ParVectorGlobalSize(b);
4120 HYPRE_BigInt first_row = hypre_ParVectorFirstIndex(b);
4121 HYPRE_BigInt last_row = hypre_ParVectorLastIndex(b);
4122 HYPRE_BigInt end_row = last_row + 1; /* one past-the-last */
4123 HYPRE_BigInt first_row_block = first_row / (HYPRE_BigInt)(blockSize) * (HYPRE_BigInt)blockSize;
4124 HYPRE_BigInt end_row_block = hypre_min( (last_row / (HYPRE_BigInt)blockSize + 1) * (HYPRE_BigInt)blockSize, nrow_global );
4125
4126 hypre_assert(blockSize == A->bdiag_size);
4127 HYPRE_Complex *bdiaginv = A->bdiaginv;
4128 hypre_ParCSRCommPkg *comm_pkg = A->bdiaginv_comm_pkg;
4129
4130 HYPRE_Complex *dense = bdiaginv;
4131
4132 //for (i=first_row_block; i < end_row; i+=blockSize) ;
4133 //printf("===[%d %d), [ %d %d ) %d === \n", first_row, end_row, first_row_block, end_row_block, i);
4134
4135 /* local vector of b */
4136 hypre_Vector *b_local = hypre_ParVectorLocalVector(b);
4137 HYPRE_Complex *b_local_data = hypre_VectorData(b_local);
4138 /* number of sends (#procs) */
4139 HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
4140 /* number of rows to send */
4141 HYPRE_Int num_rows_send = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends);
4142 /* number of recvs (#procs) */
4143 HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
4144 /* number of rows to recv */
4145 HYPRE_Int num_rows_recv = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs);
4146 hypre_ParCSRCommHandle *comm_handle;
4147
4148 HYPRE_BigInt *part = hypre_TAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
4149 hypre_TMemcpy(part, hypre_ParVectorPartitioning(b), HYPRE_BigInt, 2,
4150 HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST);
4151
4152 hypre_ParVector *bnew = hypre_ParVectorCreate( hypre_ParVectorComm(b),
4153 hypre_ParVectorGlobalSize(b), part );
4154 hypre_ParVectorInitialize(bnew);
4155 hypre_Vector *bnew_local = hypre_ParVectorLocalVector(bnew);
4156 HYPRE_Complex *bnew_local_data = hypre_VectorData(bnew_local);
4157
4158 /* send and recv b */
4159 HYPRE_Complex *send_b = hypre_TAlloc(HYPRE_Complex, num_rows_send, HYPRE_MEMORY_HOST);
4160 HYPRE_Complex *recv_b = hypre_TAlloc(HYPRE_Complex, num_rows_recv, HYPRE_MEMORY_HOST);
4161
4162 for (i = 0; i < num_rows_send; i++)
4163 {
4164 j = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, i);
4165 send_b[i] = b_local_data[j];
4166 }
4167 comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg, send_b, recv_b);
4168 /* ... */
4169 hypre_ParCSRCommHandleDestroy(comm_handle);
4170
4171 for (block_start = first_row_block; block_start < end_row_block; block_start += blockSize)
4172 {
4173 HYPRE_BigInt big_i;
4174 block_end = hypre_min(block_start + (HYPRE_BigInt)blockSize, nrow_global);
4175 s = (HYPRE_Int)(block_end - block_start);
4176 for (big_i = block_start; big_i < block_end; big_i++)
4177 {
4178 if (big_i < first_row || big_i >= end_row)
4179 {
4180 continue;
4181 }
4182
4183 HYPRE_Int local_i = (HYPRE_Int)(big_i - first_row);
4184 HYPRE_Int block_i = (HYPRE_Int)(big_i - block_start);
4185
4186 bnew_local_data[local_i] = 0.0;
4187
4188 for (j = 0; j < s; j++)
4189 {
4190 HYPRE_BigInt global_rid = block_start + (HYPRE_BigInt)j;
4191 HYPRE_Complex val = dense[block_i + j*blockSize];
4192 if (val == 0.0)
4193 {
4194 continue;
4195 }
4196 if (global_rid >= first_row && global_rid < end_row)
4197 {
4198 HYPRE_Int rid = (HYPRE_Int)(global_rid - first_row);
4199 bnew_local_data[local_i] += val * b_local_data[rid];
4200 }
4201 else
4202 {
4203 HYPRE_Int rid;
4204
4205 if (global_rid < first_row)
4206 {
4207 rid = (HYPRE_Int)(global_rid - first_row_block);
4208 }
4209 else
4210 {
4211 rid = (HYPRE_Int)(first_row - first_row_block + global_rid - end_row);
4212 }
4213 bnew_local_data[local_i] += val * recv_b[rid];
4214 }
4215 }
4216 }
4217 dense += blockSize * blockSize;
4218 }
4219
4220 hypre_TFree(send_b, HYPRE_MEMORY_HOST);
4221 hypre_TFree(recv_b, HYPRE_MEMORY_HOST);
4222 *bs = bnew;
4223
4224 return hypre_error_flag;
4225 }
4226
4227 /**
4228 * @brief Compute As = B^{-1}*A, where B is the block diagonal of A
4229 * @param[in] A :
4230 * @param[in] blockSize: block size
4231 * @param[out] B :
4232 * @return
4233 * @warning
4234 */
4235 HYPRE_Int
hypre_ParcsrBdiagInvScal(hypre_ParCSRMatrix * A,HYPRE_Int blockSize,hypre_ParCSRMatrix ** As)4236 hypre_ParcsrBdiagInvScal( hypre_ParCSRMatrix *A,
4237 HYPRE_Int blockSize,
4238 hypre_ParCSRMatrix **As)
4239 {
4240 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
4241 HYPRE_Int num_procs, my_id;
4242 hypre_MPI_Comm_rank(comm, &my_id);
4243 hypre_MPI_Comm_size(comm, &num_procs);
4244
4245 HYPRE_Int i, j, k, s;
4246 HYPRE_BigInt block_start, block_end;
4247 /* diag part of A */
4248 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
4249 HYPRE_Real *A_diag_a = hypre_CSRMatrixData(A_diag);
4250 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
4251 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
4252 /* off-diag part of A */
4253 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
4254 HYPRE_Real *A_offd_a = hypre_CSRMatrixData(A_offd);
4255 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
4256 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
4257
4258 HYPRE_Int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
4259 HYPRE_BigInt *col_map_offd_A = hypre_ParCSRMatrixColMapOffd(A);
4260
4261
4262 HYPRE_Int nrow_local = hypre_CSRMatrixNumRows(A_diag);
4263 HYPRE_BigInt first_row = hypre_ParCSRMatrixFirstRowIndex(A);
4264 HYPRE_BigInt last_row = hypre_ParCSRMatrixLastRowIndex(A);
4265 HYPRE_BigInt end_row = first_row + (HYPRE_BigInt)nrow_local; /* one past-the-last */
4266
4267 HYPRE_Int ncol_local = hypre_CSRMatrixNumCols(A_diag);
4268 HYPRE_BigInt first_col = hypre_ParCSRMatrixFirstColDiag(A);
4269 /* HYPRE_Int last_col = hypre_ParCSRMatrixLastColDiag(A); */
4270 HYPRE_BigInt end_col = first_col + (HYPRE_BigInt)ncol_local;
4271
4272 HYPRE_BigInt nrow_global = hypre_ParCSRMatrixGlobalNumRows(A);
4273 HYPRE_BigInt ncol_global = hypre_ParCSRMatrixGlobalNumCols(A);
4274 HYPRE_BigInt *row_starts = hypre_ParCSRMatrixRowStarts(A);
4275 void *request;
4276
4277 /* if square globally and locally */
4278 HYPRE_Int square2 = (nrow_global == ncol_global) && (nrow_local == ncol_local) &&
4279 (first_row == first_col);
4280
4281 if (nrow_global != ncol_global)
4282 {
4283 hypre_printf("hypre_ParcsrBdiagInvScal: only support N_ROW == N_COL\n");
4284 return hypre_error_flag;
4285 }
4286
4287 /* in block diagonals, row range of the blocks this proc span */
4288 HYPRE_BigInt first_row_block = first_row / (HYPRE_BigInt)blockSize * (HYPRE_BigInt)blockSize;
4289 HYPRE_BigInt end_row_block = hypre_min( (last_row / (HYPRE_BigInt)blockSize + 1) * (HYPRE_BigInt)blockSize, nrow_global );
4290 HYPRE_Int num_blocks = (HYPRE_Int)(last_row / (HYPRE_BigInt)blockSize + 1 - first_row / (HYPRE_BigInt)blockSize);
4291
4292 //for (i=first_row_block; i < end_row; i+=blockSize) ;
4293 //printf("===[%d %d), [ %d %d ) %d === \n", first_row, end_row, first_row_block, end_row_block, i);
4294 //return 0;
4295
4296 /* number of external rows */
4297 HYPRE_Int num_ext_rows = (HYPRE_Int)(end_row_block - first_row_block - (end_row - first_row));
4298 HYPRE_BigInt *ext_indices;
4299 HYPRE_Int A_ext_nnz;
4300
4301 hypre_CSRMatrix *A_ext = NULL;
4302 HYPRE_Complex *A_ext_a = NULL;
4303 HYPRE_Int *A_ext_i = NULL;
4304 HYPRE_BigInt *A_ext_j = NULL;
4305
4306 HYPRE_Real *dense_all = hypre_CTAlloc(HYPRE_Complex, num_blocks*blockSize*blockSize, HYPRE_MEMORY_HOST);
4307 HYPRE_Real *dense = dense_all;
4308 HYPRE_Int *IPIV = hypre_TAlloc(HYPRE_Int, blockSize, HYPRE_MEMORY_HOST);
4309 HYPRE_Complex *dgetri_work = NULL;
4310 HYPRE_Int dgetri_lwork = -1, lapack_info;
4311
4312 HYPRE_Int num_cols_A_offd_new;
4313 HYPRE_BigInt *col_map_offd_A_new;
4314 HYPRE_BigInt big_i;
4315 HYPRE_Int *offd2new = NULL;
4316 HYPRE_Int *marker_diag, *marker_newoffd;
4317
4318 HYPRE_Int nnz_diag = A_diag_i[nrow_local];
4319 HYPRE_Int nnz_offd = A_offd_i[nrow_local];
4320 HYPRE_Int nnz_diag_new = 0, nnz_offd_new = 0;
4321 HYPRE_Int *A_diag_i_new, *A_diag_j_new, *A_offd_i_new, *A_offd_j_new;
4322 HYPRE_Complex *A_diag_a_new, *A_offd_a_new;
4323 /* heuristic */
4324 HYPRE_Int nnz_diag_alloc = 2 * nnz_diag;
4325 HYPRE_Int nnz_offd_alloc = 2 * nnz_offd;
4326
4327 A_diag_i_new = hypre_CTAlloc(HYPRE_Int, nrow_local + 1, HYPRE_MEMORY_HOST);
4328 A_diag_j_new = hypre_CTAlloc(HYPRE_Int, nnz_diag_alloc, HYPRE_MEMORY_HOST);
4329 A_diag_a_new = hypre_CTAlloc(HYPRE_Complex, nnz_diag_alloc, HYPRE_MEMORY_HOST);
4330 A_offd_i_new = hypre_CTAlloc(HYPRE_Int, nrow_local + 1, HYPRE_MEMORY_HOST);
4331 A_offd_j_new = hypre_CTAlloc(HYPRE_Int, nnz_offd_alloc, HYPRE_MEMORY_HOST);
4332 A_offd_a_new = hypre_CTAlloc(HYPRE_Complex, nnz_offd_alloc, HYPRE_MEMORY_HOST);
4333
4334 hypre_ParCSRMatrix *Anew;
4335 hypre_CSRMatrix *Anew_diag;
4336 hypre_CSRMatrix *Anew_offd;
4337
4338 HYPRE_Real eps = 2.2e-16;
4339
4340 /* Start with extracting the external rows */
4341 HYPRE_BigInt *ext_offd;
4342 ext_indices = hypre_CTAlloc(HYPRE_BigInt, num_ext_rows, HYPRE_MEMORY_HOST);
4343 j = 0;
4344 for (big_i = first_row_block; big_i < first_row; big_i++)
4345 {
4346 ext_indices[j++] = big_i;
4347 }
4348 for (big_i = end_row; big_i < end_row_block; big_i++)
4349 {
4350 ext_indices[j++] = big_i;
4351 }
4352
4353 hypre_assert(j == num_ext_rows);
4354
4355 /* create CommPkg for external rows */
4356 hypre_ParCSRFindExtendCommPkg(comm, nrow_global, first_row, nrow_local, row_starts,
4357 hypre_ParCSRMatrixAssumedPartition(A),
4358 num_ext_rows, ext_indices, &A->bdiaginv_comm_pkg);
4359
4360 hypre_ParcsrGetExternalRowsInit(A, num_ext_rows, ext_indices, A->bdiaginv_comm_pkg, 1, &request);
4361 A_ext = hypre_ParcsrGetExternalRowsWait(request);
4362
4363 hypre_TFree(ext_indices, HYPRE_MEMORY_HOST);
4364
4365 A_ext_i = hypre_CSRMatrixI(A_ext);
4366 A_ext_j = hypre_CSRMatrixBigJ(A_ext);
4367 A_ext_a = hypre_CSRMatrixData(A_ext);
4368 A_ext_nnz = A_ext_i[num_ext_rows];
4369 ext_offd = hypre_CTAlloc(HYPRE_BigInt, A_ext_nnz, HYPRE_MEMORY_HOST);
4370
4371 /* fint the offd incides in A_ext */
4372 for (i = 0, j = 0; i < A_ext_nnz; i++)
4373 {
4374 /* global index */
4375 HYPRE_BigInt cid = A_ext_j[i];
4376 /* keep the offd indices */
4377 if (cid < first_col || cid >= end_col)
4378 {
4379 ext_offd[j++] = cid;
4380 }
4381 }
4382 /* remove duplicates after sorting (TODO better ways?) */
4383 hypre_BigQsort0(ext_offd, 0, j-1);
4384 for (i = 0, k = 0; i < j; i++)
4385 {
4386 if (i == 0 || ext_offd[i] != ext_offd[i-1])
4387 {
4388 ext_offd[k++] = ext_offd[i];
4389 }
4390 }
4391 /* uniion these `k' new indices into col_map_offd_A */
4392 col_map_offd_A_new = hypre_CTAlloc(HYPRE_BigInt, num_cols_A_offd + k, HYPRE_MEMORY_HOST);
4393 if (k)
4394 {
4395 /* map offd to offd_new */
4396 offd2new = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
4397 }
4398 hypre_union2(num_cols_A_offd, col_map_offd_A, k, ext_offd,
4399 &num_cols_A_offd_new, col_map_offd_A_new, offd2new, NULL);
4400 hypre_TFree(ext_offd, HYPRE_MEMORY_HOST);
4401 /*
4402 * adjust column indices in A_ext
4403 */
4404 for (i = 0; i < A_ext_nnz; i++)
4405 {
4406 HYPRE_BigInt cid = A_ext_j[i];
4407 if (cid < first_col || cid >= end_col)
4408 {
4409 j = hypre_BigBinarySearch(col_map_offd_A_new, cid, num_cols_A_offd_new);
4410 /* searching must succeed */
4411 hypre_assert(j >= 0 && j < num_cols_A_offd_new);
4412 /* trick: save ncol_local + j back */
4413 A_ext_j[i] = ncol_local + j;
4414 }
4415 else
4416 {
4417 /* save local index: [0, ncol_local-1] */
4418 A_ext_j[i] = cid - first_col;
4419 }
4420 }
4421
4422 /* marker for diag */
4423 marker_diag = hypre_TAlloc(HYPRE_Int, ncol_local, HYPRE_MEMORY_HOST);
4424 for (i = 0; i < ncol_local; i++)
4425 {
4426 marker_diag[i] = -1;
4427 }
4428 /* marker for newoffd */
4429 marker_newoffd = hypre_TAlloc(HYPRE_Int, num_cols_A_offd_new, HYPRE_MEMORY_HOST);
4430 for (i = 0; i < num_cols_A_offd_new; i++)
4431 {
4432 marker_newoffd[i] = -1;
4433 }
4434
4435 /* outer most loop for blocks */
4436 for (block_start = first_row_block; block_start < end_row_block; block_start += (HYPRE_BigInt)blockSize)
4437 {
4438 HYPRE_BigInt big_i;
4439 block_end = hypre_min(block_start + (HYPRE_BigInt)blockSize, nrow_global);
4440 s = (HYPRE_Int)(block_end - block_start);
4441
4442 /* 1. fill the dense block diag matrix */
4443 for (big_i = block_start; big_i < block_end; big_i++)
4444 {
4445 /* row index in this block */
4446 HYPRE_Int block_i = (HYPRE_Int)(big_i - block_start);
4447
4448 /* row index i: it can be local or external */
4449 if (big_i >= first_row && big_i < end_row)
4450 {
4451 /* is a local row */
4452 j = (HYPRE_Int)(big_i - first_row);
4453 for (k = A_diag_i[j]; k < A_diag_i[j+1]; k++)
4454 {
4455 HYPRE_BigInt cid = (HYPRE_BigInt)A_diag_j[k] + first_col;
4456 if (cid >= block_start && cid < block_end)
4457 {
4458 dense[block_i + (HYPRE_Int)(cid-block_start)*blockSize] = A_diag_a[k];
4459 }
4460 }
4461 if (num_cols_A_offd)
4462 {
4463 for (k = A_offd_i[j]; k < A_offd_i[j+1]; k++)
4464 {
4465 HYPRE_BigInt cid = col_map_offd_A[A_offd_j[k]];
4466 if (cid >= block_start && cid < block_end)
4467 {
4468 dense[block_i + (HYPRE_Int)(cid-block_start)*blockSize] = A_offd_a[k];
4469 }
4470 }
4471 }
4472 }
4473 else
4474 {
4475 /* is an external row */
4476 if (big_i < first_row)
4477 {
4478 j = (HYPRE_Int)(big_i - first_row_block);
4479 }
4480 else
4481 {
4482 j = (HYPRE_Int)(first_row - first_row_block + big_i - end_row);
4483 }
4484 for (k = A_ext_i[j]; k < A_ext_i[j+1]; k++)
4485 {
4486 HYPRE_BigInt cid = A_ext_j[k];
4487 /* recover the global index */
4488 cid = cid < (HYPRE_BigInt)ncol_local ? cid + first_col : col_map_offd_A_new[cid-ncol_local];
4489 if (cid >= block_start && cid < block_end)
4490 {
4491 dense[block_i + (HYPRE_Int)(cid-block_start)*blockSize] = A_ext_a[k];
4492 }
4493 }
4494 }
4495 }
4496
4497 /* 2. invert the dense matrix */
4498 hypre_dgetrf(&s, &s, dense, &blockSize, IPIV, &lapack_info);
4499
4500 hypre_assert(lapack_info == 0);
4501
4502 if (lapack_info == 0)
4503 {
4504 HYPRE_Int query = -1;
4505 HYPRE_Real lwork_opt;
4506 /* query the optimal size of work */
4507 hypre_dgetri(&s, dense, &blockSize, IPIV, &lwork_opt, &query, &lapack_info);
4508
4509 hypre_assert(lapack_info == 0);
4510
4511 if (lwork_opt > dgetri_lwork)
4512 {
4513 dgetri_lwork = lwork_opt;
4514 dgetri_work = hypre_TReAlloc(dgetri_work, HYPRE_Complex, dgetri_lwork, HYPRE_MEMORY_HOST);
4515 }
4516
4517 hypre_dgetri(&s, dense, &blockSize, IPIV, dgetri_work, &dgetri_lwork, &lapack_info);
4518
4519 hypre_assert(lapack_info == 0);
4520 }
4521
4522 /* filter out *zeros* */
4523 HYPRE_Real Fnorm = 0.0;
4524 for (i = 0; i < s; i++)
4525 {
4526 for (j = 0; j < s; j++)
4527 {
4528 HYPRE_Complex t = dense[j+i*blockSize];
4529 Fnorm += t * t;
4530 }
4531 }
4532
4533 Fnorm = sqrt(Fnorm);
4534
4535 for (i = 0; i < s; i++)
4536 {
4537 for (j = 0; j < s; j++)
4538 {
4539 if ( hypre_abs(dense[j+i*blockSize]) < eps * Fnorm )
4540 {
4541 dense[j+i*blockSize] = 0.0;
4542 }
4543 }
4544 }
4545
4546 /* 3. premultiplication: one-pass dynamic allocation */
4547 for (big_i = block_start; big_i < block_end; big_i++)
4548 {
4549 /* starting points of this row in j */
4550 HYPRE_Int diag_i_start = nnz_diag_new;
4551 HYPRE_Int offd_i_start = nnz_offd_new;
4552
4553 /* compute a new row with global index 'i' and local index 'local_i' */
4554 HYPRE_Int local_i = (HYPRE_Int)(big_i - first_row);
4555 /* row index in this block */
4556 HYPRE_Int block_i = (HYPRE_Int)(big_i - block_start);
4557
4558 if (big_i < first_row || big_i >= end_row)
4559 {
4560 continue;
4561 }
4562
4563 /* if square^2: reserve the first space in diag part to the diag entry */
4564 if (square2)
4565 {
4566 marker_diag[local_i] = nnz_diag_new;
4567 if (nnz_diag_new == nnz_diag_alloc)
4568 {
4569 nnz_diag_alloc = nnz_diag_alloc * 2 + 1;
4570 A_diag_j_new = hypre_TReAlloc(A_diag_j_new, HYPRE_Int, nnz_diag_alloc, HYPRE_MEMORY_HOST);
4571 A_diag_a_new = hypre_TReAlloc(A_diag_a_new, HYPRE_Complex, nnz_diag_alloc, HYPRE_MEMORY_HOST);
4572 }
4573 A_diag_j_new[nnz_diag_new] = local_i;
4574 A_diag_a_new[nnz_diag_new] = 0.0;
4575 nnz_diag_new ++;
4576 }
4577
4578 /* combine s rows */
4579 for (j = 0; j < s; j++)
4580 {
4581 /* row to combine: global row id */
4582 HYPRE_BigInt global_rid = block_start + (HYPRE_BigInt)j;
4583 /* the multipiler */
4584 HYPRE_Complex val = dense[block_i + j*blockSize];
4585
4586 if (val == 0.0)
4587 {
4588 continue;
4589 }
4590
4591 if (global_rid >= first_row && global_rid < end_row)
4592 {
4593 /* this row is local */
4594 HYPRE_Int rid = (HYPRE_Int)(global_rid - first_row);
4595 HYPRE_Int ii;
4596
4597 for (ii = A_diag_i[rid]; ii < A_diag_i[rid+1]; ii++)
4598 {
4599 HYPRE_Int col = A_diag_j[ii];
4600 HYPRE_Complex vv = A_diag_a[ii];
4601
4602 if (marker_diag[col] < diag_i_start)
4603 {
4604 /* this col has not been seen before, create new entry */
4605 marker_diag[col] = nnz_diag_new;
4606 if (nnz_diag_new == nnz_diag_alloc)
4607 {
4608 nnz_diag_alloc = nnz_diag_alloc * 2 + 1;
4609 A_diag_j_new = hypre_TReAlloc(A_diag_j_new, HYPRE_Int, nnz_diag_alloc, HYPRE_MEMORY_HOST);
4610 A_diag_a_new = hypre_TReAlloc(A_diag_a_new, HYPRE_Complex, nnz_diag_alloc, HYPRE_MEMORY_HOST);
4611 }
4612 A_diag_j_new[nnz_diag_new] = col;
4613 A_diag_a_new[nnz_diag_new] = val * vv;
4614 nnz_diag_new ++;
4615 }
4616 else
4617 {
4618 /* existing entry, update */
4619 HYPRE_Int p = marker_diag[col];
4620
4621 hypre_assert(A_diag_j_new[p] == col);
4622
4623 A_diag_a_new[p] += val * vv;
4624 }
4625 }
4626
4627 for (ii = A_offd_i[rid]; ii < A_offd_i[rid+1]; ii++)
4628 {
4629 HYPRE_Int col = A_offd_j[ii];
4630 /* use the mapper to map to new offd */
4631 HYPRE_Int col_new = offd2new ? offd2new[col] : col;
4632 HYPRE_Complex vv = A_offd_a[ii];
4633
4634 if (marker_newoffd[col_new] < offd_i_start)
4635 {
4636 /* this col has not been seen before, create new entry */
4637 marker_newoffd[col_new] = nnz_offd_new;
4638 if (nnz_offd_new == nnz_offd_alloc)
4639 {
4640 nnz_offd_alloc = nnz_offd_alloc * 2 + 1;
4641 A_offd_j_new = hypre_TReAlloc(A_offd_j_new, HYPRE_Int, nnz_offd_alloc, HYPRE_MEMORY_HOST);
4642 A_offd_a_new = hypre_TReAlloc(A_offd_a_new, HYPRE_Complex, nnz_offd_alloc, HYPRE_MEMORY_HOST);
4643 }
4644 A_offd_j_new[nnz_offd_new] = col_new;
4645 A_offd_a_new[nnz_offd_new] = val * vv;
4646 nnz_offd_new ++;
4647 }
4648 else
4649 {
4650 /* existing entry, update */
4651 HYPRE_Int p = marker_newoffd[col_new];
4652
4653 hypre_assert(A_offd_j_new[p] == col_new);
4654
4655 A_offd_a_new[p] += val * vv;
4656 }
4657 }
4658 }
4659 else
4660 {
4661 /* this is an external row: go to A_ext */
4662 HYPRE_Int rid, ii;
4663
4664 if (global_rid < first_row)
4665 {
4666 rid = (HYPRE_Int)(global_rid - first_row_block);
4667 }
4668 else
4669 {
4670 rid = (HYPRE_Int)(first_row - first_row_block + global_rid - end_row);
4671 }
4672
4673 for (ii = A_ext_i[rid]; ii < A_ext_i[rid+1]; ii++)
4674 {
4675 HYPRE_Int col = (HYPRE_Int)A_ext_j[ii];
4676 HYPRE_Complex vv = A_ext_a[ii];
4677
4678 if (col < ncol_local)
4679 {
4680 /* in diag part */
4681 if (marker_diag[col] < diag_i_start)
4682 {
4683 /* this col has not been seen before, create new entry */
4684 marker_diag[col] = nnz_diag_new;
4685 if (nnz_diag_new == nnz_diag_alloc)
4686 {
4687 nnz_diag_alloc = nnz_diag_alloc * 2 + 1;
4688 A_diag_j_new = hypre_TReAlloc(A_diag_j_new, HYPRE_Int, nnz_diag_alloc, HYPRE_MEMORY_HOST);
4689 A_diag_a_new = hypre_TReAlloc(A_diag_a_new, HYPRE_Complex, nnz_diag_alloc, HYPRE_MEMORY_HOST);
4690 }
4691 A_diag_j_new[nnz_diag_new] = col;
4692 A_diag_a_new[nnz_diag_new] = val * vv;
4693 nnz_diag_new ++;
4694 }
4695 else
4696 {
4697 /* existing entry, update */
4698 HYPRE_Int p = marker_diag[col];
4699
4700 hypre_assert(A_diag_j_new[p] == col);
4701
4702 A_diag_a_new[p] += val * vv;
4703 }
4704 }
4705 else
4706 {
4707 /* in offd part */
4708 col -= ncol_local;
4709
4710 if (marker_newoffd[col] < offd_i_start)
4711 {
4712 /* this col has not been seen before, create new entry */
4713 marker_newoffd[col] = nnz_offd_new;
4714 if (nnz_offd_new == nnz_offd_alloc)
4715 {
4716 nnz_offd_alloc = nnz_offd_alloc * 2 + 1;
4717 A_offd_j_new = hypre_TReAlloc(A_offd_j_new, HYPRE_Int, nnz_offd_alloc, HYPRE_MEMORY_HOST);
4718 A_offd_a_new = hypre_TReAlloc(A_offd_a_new, HYPRE_Complex, nnz_offd_alloc, HYPRE_MEMORY_HOST);
4719 }
4720 A_offd_j_new[nnz_offd_new] = col;
4721 A_offd_a_new[nnz_offd_new] = val * vv;
4722 nnz_offd_new ++;
4723 }
4724 else
4725 {
4726 /* existing entry, update */
4727 HYPRE_Int p = marker_newoffd[col];
4728
4729 hypre_assert(A_offd_j_new[p] == col);
4730
4731 A_offd_a_new[p] += val * vv;
4732 }
4733 }
4734 }
4735 }
4736 }
4737
4738 /* done for row local_i */
4739 A_diag_i_new[local_i + 1] = nnz_diag_new;
4740 A_offd_i_new[local_i + 1] = nnz_offd_new;
4741 } /* for i, each row */
4742
4743 dense += blockSize * blockSize;
4744 } /* for each block */
4745
4746 /* done with all rows */
4747 /* resize properly */
4748 A_diag_j_new = hypre_TReAlloc(A_diag_j_new, HYPRE_Int, nnz_diag_new, HYPRE_MEMORY_HOST);
4749 A_diag_a_new = hypre_TReAlloc(A_diag_a_new, HYPRE_Complex, nnz_diag_new, HYPRE_MEMORY_HOST);
4750 A_offd_j_new = hypre_TReAlloc(A_offd_j_new, HYPRE_Int, nnz_offd_new, HYPRE_MEMORY_HOST);
4751 A_offd_a_new = hypre_TReAlloc(A_offd_a_new, HYPRE_Complex, nnz_offd_new, HYPRE_MEMORY_HOST);
4752
4753 /* readjust col_map_offd_new */
4754 for (i = 0; i < num_cols_A_offd_new; i++)
4755 {
4756 marker_newoffd[i] = -1;
4757 }
4758 for (i = 0; i < nnz_offd_new; i++)
4759 {
4760 j = A_offd_j_new[i];
4761 if (marker_newoffd[j] == -1)
4762 {
4763 marker_newoffd[j] = 1;
4764 }
4765 }
4766 for (i = 0, j = 0; i < num_cols_A_offd_new; i++)
4767 {
4768 if (marker_newoffd[i] == 1)
4769 {
4770 col_map_offd_A_new[j] = col_map_offd_A_new[i];
4771 marker_newoffd[i] = j++;
4772 }
4773 }
4774 num_cols_A_offd_new = j;
4775
4776 for (i = 0; i < nnz_offd_new; i++)
4777 {
4778 j = marker_newoffd[A_offd_j_new[i]];
4779 hypre_assert(j >= 0 && j < num_cols_A_offd_new);
4780 A_offd_j_new[i] = j;
4781 }
4782
4783 /* Now, we should have everything of Parcsr matrix As */
4784 Anew = hypre_ParCSRMatrixCreate(comm,
4785 nrow_global,
4786 ncol_global,
4787 hypre_ParCSRMatrixRowStarts(A),
4788 hypre_ParCSRMatrixColStarts(A),
4789 num_cols_A_offd_new,
4790 nnz_diag_new,
4791 nnz_offd_new);
4792
4793 Anew_diag = hypre_ParCSRMatrixDiag(Anew);
4794 hypre_CSRMatrixData(Anew_diag) = A_diag_a_new;
4795 hypre_CSRMatrixI(Anew_diag) = A_diag_i_new;
4796 hypre_CSRMatrixJ(Anew_diag) = A_diag_j_new;
4797
4798 Anew_offd = hypre_ParCSRMatrixOffd(Anew);
4799 hypre_CSRMatrixData(Anew_offd) = A_offd_a_new;
4800 hypre_CSRMatrixI(Anew_offd) = A_offd_i_new;
4801 hypre_CSRMatrixJ(Anew_offd) = A_offd_j_new;
4802
4803 hypre_ParCSRMatrixColMapOffd(Anew) = col_map_offd_A_new;
4804
4805 hypre_ParCSRMatrixSetNumNonzeros(Anew);
4806 hypre_ParCSRMatrixDNumNonzeros(Anew) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(Anew);
4807 //printf("nnz_diag %d --> %d, nnz_offd %d --> %d\n", nnz_diag, nnz_diag_new, nnz_offd, nnz_offd_new);
4808
4809 /* create CommPkg of Anew */
4810 hypre_MatvecCommPkgCreate(Anew);
4811
4812 *As = Anew;
4813
4814 /*
4815 if (bdiaginv)
4816 {
4817 *bdiaginv = dense_all;
4818 }
4819 else
4820 {
4821 hypre_TFree(dense_all, HYPRE_MEMORY_HOST);
4822 }
4823 */
4824 /* save diagonal blocks in A */
4825 A->bdiag_size = blockSize;
4826 A->bdiaginv = dense_all;
4827
4828 /* free workspace */
4829 hypre_TFree(IPIV, HYPRE_MEMORY_HOST);
4830 hypre_TFree(dgetri_work, HYPRE_MEMORY_HOST);
4831 hypre_TFree(marker_diag, HYPRE_MEMORY_HOST);
4832 hypre_TFree(marker_newoffd, HYPRE_MEMORY_HOST);
4833 hypre_TFree(offd2new, HYPRE_MEMORY_HOST);
4834 hypre_CSRMatrixDestroy(A_ext);
4835
4836 return hypre_error_flag;
4837 }
4838
4839 /*--------------------------------------------------------------------------
4840 * hypre_ParcsrGetExternalRowsInit
4841 *--------------------------------------------------------------------------*/
4842
4843 HYPRE_Int
hypre_ParcsrGetExternalRowsInit(hypre_ParCSRMatrix * A,HYPRE_Int indices_len,HYPRE_BigInt * indices,hypre_ParCSRCommPkg * comm_pkg,HYPRE_Int want_data,void ** request_ptr)4844 hypre_ParcsrGetExternalRowsInit( hypre_ParCSRMatrix *A,
4845 HYPRE_Int indices_len,
4846 HYPRE_BigInt *indices,
4847 hypre_ParCSRCommPkg *comm_pkg,
4848 HYPRE_Int want_data,
4849 void **request_ptr)
4850 {
4851 HYPRE_Int i, j, k;
4852 HYPRE_Int num_sends, num_rows_send, num_nnz_send, *send_i,
4853 num_recvs, num_rows_recv, num_nnz_recv, *recv_i,
4854 *send_jstarts, *recv_jstarts, *send_i_offset;
4855 HYPRE_BigInt *send_j, *recv_j;
4856 HYPRE_Complex *send_a = NULL, *recv_a = NULL;
4857 hypre_ParCSRCommPkg *comm_pkg_j;
4858 hypre_ParCSRCommHandle *comm_handle, *comm_handle_j, *comm_handle_a;
4859 /* HYPRE_Int global_num_rows = hypre_ParCSRMatrixGlobalNumRows(A); */
4860 /* diag part of A */
4861 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
4862 HYPRE_Real *A_diag_a = hypre_CSRMatrixData(A_diag);
4863 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
4864 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
4865 /* HYPRE_Int local_num_rows = hypre_CSRMatrixNumRows(A_diag); */
4866 /* off-diag part of A */
4867 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
4868 HYPRE_Real *A_offd_a = hypre_CSRMatrixData(A_offd);
4869 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
4870 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
4871 /* HYPRE_BigInt *row_starts = hypre_ParCSRMatrixRowStarts(A); */
4872 /* HYPRE_BigInt first_row = hypre_ParCSRMatrixFirstRowIndex(A); */
4873 HYPRE_BigInt first_col = hypre_ParCSRMatrixFirstColDiag(A);
4874 HYPRE_BigInt *col_map_offd_A = hypre_ParCSRMatrixColMapOffd(A);
4875 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
4876 HYPRE_Int num_procs;
4877 HYPRE_Int my_id;
4878 void **vrequest;
4879 hypre_CSRMatrix *A_ext;
4880
4881 hypre_MPI_Comm_size(comm, &num_procs);
4882 hypre_MPI_Comm_rank(comm, &my_id);
4883
4884 /* number of sends (#procs) */
4885 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
4886 /* number of rows to send */
4887 num_rows_send = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends);
4888 /* number of recvs (#procs) */
4889 num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
4890 /* number of rows to recv */
4891 num_rows_recv = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs);
4892
4893 /* must be true if indices contains proper offd indices */
4894 hypre_assert(indices_len == num_rows_recv);
4895
4896 /* send_i/recv_i:
4897 * the arrays to send and recv: we first send and recv the row lengths */
4898 send_i = hypre_TAlloc(HYPRE_Int, num_rows_send, HYPRE_MEMORY_HOST);
4899 recv_i = hypre_CTAlloc(HYPRE_Int, num_rows_recv + 1, HYPRE_MEMORY_HOST);
4900 /* fill the send array with row lengths */
4901 for (i = 0, num_nnz_send = 0; i < num_rows_send; i++)
4902 {
4903 /* j: row index to send */
4904 j = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, i);
4905 send_i[i] = A_diag_i[j+1] - A_diag_i[j] + A_offd_i[j+1] - A_offd_i[j];
4906 num_nnz_send += send_i[i];
4907 }
4908
4909 /* send this array out: note the shift in recv_i by one (async) */
4910 comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, send_i, recv_i+1);
4911
4912 /* prepare data to send out. overlap with the above commmunication */
4913 send_j = hypre_TAlloc(HYPRE_BigInt, num_nnz_send, HYPRE_MEMORY_HOST);
4914 if (want_data)
4915 {
4916 send_a = hypre_TAlloc(HYPRE_Complex, num_nnz_send, HYPRE_MEMORY_HOST);
4917 }
4918
4919 send_i_offset = hypre_TAlloc(HYPRE_Int, num_rows_send + 1, HYPRE_MEMORY_HOST);
4920 send_i_offset[0] = 0;
4921 hypre_TMemcpy(send_i_offset + 1, send_i, HYPRE_Int, num_rows_send,
4922 HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST);
4923 /* prefix sum. TODO: OMP parallelization */
4924 for (i = 1; i <= num_rows_send; i++)
4925 {
4926 send_i_offset[i] += send_i_offset[i-1];
4927 }
4928 hypre_assert(send_i_offset[num_rows_send] == num_nnz_send);
4929
4930 /* pointers to each proc in send_j */
4931 send_jstarts = hypre_TAlloc(HYPRE_Int, num_sends + 1, HYPRE_MEMORY_HOST);
4932 #ifdef HYPRE_USING_OPENMP
4933 #pragma omp parallel for HYPRE_SMP_SCHEDULE
4934 #endif
4935 for (i = 0; i <= num_sends; i++)
4936 {
4937 send_jstarts[i] = send_i_offset[hypre_ParCSRCommPkgSendMapStart(comm_pkg, i)];
4938 }
4939 hypre_assert(send_jstarts[num_sends] == num_nnz_send);
4940
4941 /* fill the CSR matrix: j and a */
4942 #ifdef HYPRE_USING_OPENMP
4943 #pragma omp parallel for HYPRE_SMP_SCHEDULE private(i,j,k)
4944 #endif
4945 for (i = 0; i < num_rows_send; i++)
4946 {
4947 HYPRE_Int i1 = send_i_offset[i];
4948 j = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, i);
4949 /* open row j and fill ja and a to send */
4950 for (k = A_diag_i[j]; k < A_diag_i[j+1]; k++)
4951 {
4952 send_j[i1] = first_col + A_diag_j[k];
4953 if (want_data)
4954 {
4955 send_a[i1] = A_diag_a[k];
4956 }
4957 i1++;
4958 }
4959 if (num_procs > 1)
4960 {
4961 for (k = A_offd_i[j]; k < A_offd_i[j+1]; k++)
4962 {
4963 send_j[i1] = col_map_offd_A[A_offd_j[k]];
4964 if (want_data)
4965 {
4966 send_a[i1] = A_offd_a[k];
4967 }
4968 i1++;
4969 }
4970 }
4971 hypre_assert(send_i_offset[i+1] == i1);
4972 }
4973
4974 /* finish the above communication: send_i/recv_i */
4975 hypre_ParCSRCommHandleDestroy(comm_handle);
4976
4977 /* adjust recv_i to ptrs */
4978 for (i = 1; i <= num_rows_recv; i++)
4979 {
4980 recv_i[i] += recv_i[i-1];
4981 }
4982 num_nnz_recv = recv_i[num_rows_recv];
4983 recv_j = hypre_CTAlloc(HYPRE_BigInt, num_nnz_recv, HYPRE_MEMORY_HOST);
4984 if (want_data)
4985 {
4986 recv_a = hypre_CTAlloc(HYPRE_Complex, num_nnz_recv, HYPRE_MEMORY_HOST);
4987 }
4988 recv_jstarts = hypre_CTAlloc(HYPRE_Int, num_recvs + 1, HYPRE_MEMORY_HOST);
4989 for (i = 1; i <= num_recvs; i++)
4990 {
4991 j = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
4992 recv_jstarts[i] = recv_i[j];
4993 }
4994
4995 /* ready to send and recv: create a communication package for data */
4996 comm_pkg_j = hypre_CTAlloc(hypre_ParCSRCommPkg, 1, HYPRE_MEMORY_HOST);
4997 hypre_ParCSRCommPkgComm (comm_pkg_j) = comm;
4998 hypre_ParCSRCommPkgNumSends (comm_pkg_j) = num_sends;
4999 hypre_ParCSRCommPkgSendProcs (comm_pkg_j) = hypre_ParCSRCommPkgSendProcs(comm_pkg);
5000 hypre_ParCSRCommPkgSendMapStarts(comm_pkg_j) = send_jstarts;
5001 hypre_ParCSRCommPkgNumRecvs (comm_pkg_j) = num_recvs;
5002 hypre_ParCSRCommPkgRecvProcs (comm_pkg_j) = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
5003 hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_j) = recv_jstarts;
5004
5005 /* init communication */
5006 /* ja */
5007 comm_handle_j = hypre_ParCSRCommHandleCreate(21, comm_pkg_j, send_j, recv_j);
5008 if (want_data)
5009 {
5010 /* a */
5011 comm_handle_a = hypre_ParCSRCommHandleCreate(1, comm_pkg_j, send_a, recv_a);
5012 }
5013 else
5014 {
5015 comm_handle_a = NULL;
5016 }
5017
5018 /* create A_ext */
5019 A_ext = hypre_CSRMatrixCreate(num_rows_recv, hypre_ParCSRMatrixGlobalNumCols(A), num_nnz_recv);
5020 hypre_CSRMatrixMemoryLocation(A_ext) = HYPRE_MEMORY_HOST;
5021 hypre_CSRMatrixI (A_ext) = recv_i;
5022 hypre_CSRMatrixBigJ(A_ext) = recv_j;
5023 hypre_CSRMatrixData(A_ext) = recv_a;
5024
5025 /* output */
5026 vrequest = hypre_TAlloc(void *, 4, HYPRE_MEMORY_HOST);
5027 vrequest[0] = (void *) comm_handle_j;
5028 vrequest[1] = (void *) comm_handle_a;
5029 vrequest[2] = (void *) A_ext;
5030 vrequest[3] = (void *) comm_pkg_j;
5031
5032 *request_ptr = (void *) vrequest;
5033
5034 /* free */
5035 hypre_TFree(send_i, HYPRE_MEMORY_HOST);
5036 hypre_TFree(send_i_offset, HYPRE_MEMORY_HOST);
5037
5038 return hypre_error_flag;
5039 }
5040
5041 hypre_CSRMatrix*
hypre_ParcsrGetExternalRowsWait(void * vrequest)5042 hypre_ParcsrGetExternalRowsWait(void *vrequest)
5043 {
5044 void **request = (void **) vrequest;
5045
5046 hypre_ParCSRCommHandle *comm_handle_j = (hypre_ParCSRCommHandle *) request[0];
5047 hypre_ParCSRCommHandle *comm_handle_a = (hypre_ParCSRCommHandle *) request[1];
5048 hypre_CSRMatrix *A_ext = (hypre_CSRMatrix *) request[2];
5049 hypre_ParCSRCommPkg *comm_pkg_j = (hypre_ParCSRCommPkg *) request[3];
5050 HYPRE_BigInt *send_j = (HYPRE_BigInt *) hypre_ParCSRCommHandleSendData(comm_handle_j);
5051
5052 if (comm_handle_a)
5053 {
5054 HYPRE_Complex *send_a = (HYPRE_Complex *) hypre_ParCSRCommHandleSendData(comm_handle_a);
5055 hypre_ParCSRCommHandleDestroy(comm_handle_a);
5056 hypre_TFree(send_a, HYPRE_MEMORY_HOST);
5057 }
5058
5059 hypre_ParCSRCommHandleDestroy(comm_handle_j);
5060 hypre_TFree(send_j, HYPRE_MEMORY_HOST);
5061
5062 hypre_TFree(hypre_ParCSRCommPkgSendMapStarts(comm_pkg_j), HYPRE_MEMORY_HOST);
5063 hypre_TFree(hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_j), HYPRE_MEMORY_HOST);
5064 hypre_TFree(comm_pkg_j, HYPRE_MEMORY_HOST);
5065
5066 hypre_TFree(request, HYPRE_MEMORY_HOST);
5067
5068 return A_ext;
5069 }
5070
5071 /*--------------------------------------------------------------------------
5072 * hypre_ParCSRMatrixAdd: performs C = alpha*A + beta*B
5073 *
5074 * A and B are assumed to have the same row and column partitionings
5075 *--------------------------------------------------------------------------*/
5076 HYPRE_Int
hypre_ParCSRMatrixAddHost(HYPRE_Complex alpha,hypre_ParCSRMatrix * A,HYPRE_Complex beta,hypre_ParCSRMatrix * B,hypre_ParCSRMatrix ** C_ptr)5077 hypre_ParCSRMatrixAddHost( HYPRE_Complex alpha,
5078 hypre_ParCSRMatrix *A,
5079 HYPRE_Complex beta,
5080 hypre_ParCSRMatrix *B,
5081 hypre_ParCSRMatrix **C_ptr )
5082 {
5083 /* ParCSRMatrix data */
5084 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
5085 HYPRE_BigInt num_rows_A = hypre_ParCSRMatrixGlobalNumRows(A);
5086 HYPRE_BigInt num_cols_A = hypre_ParCSRMatrixGlobalNumCols(A);
5087 /* HYPRE_BigInt num_rows_B = hypre_ParCSRMatrixGlobalNumRows(B); */
5088 /* HYPRE_BigInt num_cols_B = hypre_ParCSRMatrixGlobalNumCols(B); */
5089
5090 /* diag part of A */
5091 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
5092 HYPRE_Int *rownnz_diag_A = hypre_CSRMatrixRownnz(A_diag);
5093 HYPRE_Int num_rownnz_diag_A = hypre_CSRMatrixNumRownnz(A_diag);
5094 HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
5095 HYPRE_Int num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag);
5096
5097 /* off-diag part of A */
5098 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
5099 HYPRE_Int *rownnz_offd_A = hypre_CSRMatrixRownnz(A_offd);
5100 HYPRE_Int num_rownnz_offd_A = hypre_CSRMatrixNumRownnz(A_offd);
5101 HYPRE_Int num_rows_offd_A = hypre_CSRMatrixNumRows(A_offd);
5102 HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
5103 HYPRE_BigInt *col_map_offd_A = hypre_ParCSRMatrixColMapOffd(A);
5104 HYPRE_Int *A2C_offd;
5105
5106 /* diag part of B */
5107 hypre_CSRMatrix *B_diag = hypre_ParCSRMatrixDiag(B);
5108 HYPRE_Int *rownnz_diag_B = hypre_CSRMatrixRownnz(B_diag);
5109 HYPRE_Int num_rownnz_diag_B = hypre_CSRMatrixNumRownnz(B_diag);
5110 HYPRE_Int num_rows_diag_B = hypre_CSRMatrixNumRows(B_diag);
5111 /* HYPRE_Int num_cols_diag_B = hypre_CSRMatrixNumCols(B_diag); */
5112
5113 /* off-diag part of B */
5114 hypre_CSRMatrix *B_offd = hypre_ParCSRMatrixOffd(B);
5115 HYPRE_Int *rownnz_offd_B = hypre_CSRMatrixRownnz(B_offd);
5116 HYPRE_Int num_rownnz_offd_B = hypre_CSRMatrixNumRownnz(B_offd);
5117 HYPRE_Int num_rows_offd_B = hypre_CSRMatrixNumRows(B_offd);
5118 HYPRE_Int num_cols_offd_B = hypre_CSRMatrixNumCols(B_offd);
5119 HYPRE_BigInt *col_map_offd_B = hypre_ParCSRMatrixColMapOffd(B);
5120 HYPRE_Int *B2C_offd;
5121
5122 /* C data */
5123 hypre_ParCSRMatrix *C;
5124 hypre_CSRMatrix *C_diag;
5125 hypre_CSRMatrix *C_offd;
5126 HYPRE_BigInt *col_map_offd_C;
5127 HYPRE_Int *C_diag_i, *C_offd_i;
5128 HYPRE_Int *rownnz_diag_C = NULL;
5129 HYPRE_Int *rownnz_offd_C = NULL;
5130 HYPRE_Int num_rownnz_diag_C;
5131 HYPRE_Int num_rownnz_offd_C;
5132 HYPRE_Int num_rows_diag_C = num_rows_diag_A;
5133 HYPRE_Int num_cols_diag_C = num_cols_diag_A;
5134 HYPRE_Int num_rows_offd_C = num_rows_offd_A;
5135 HYPRE_Int num_cols_offd_C = num_cols_offd_A + num_cols_offd_B;
5136 HYPRE_Int *twspace;
5137
5138 HYPRE_MemoryLocation memory_location_A = hypre_ParCSRMatrixMemoryLocation(A);
5139 HYPRE_MemoryLocation memory_location_B = hypre_ParCSRMatrixMemoryLocation(B);
5140
5141 /* RL: TODO cannot guarantee, maybe should never assert
5142 hypre_assert(memory_location_A == memory_location_B);
5143 */
5144
5145 /* RL: in the case of A=H, B=D, or A=D, B=H, let C = D,
5146 * not sure if this is the right thing to do.
5147 * Also, need something like this in other places
5148 * TODO */
5149 HYPRE_MemoryLocation memory_location_C = hypre_max(memory_location_A, memory_location_B);
5150
5151 HYPRE_ANNOTATE_FUNC_BEGIN;
5152
5153 /* Allocate memory */
5154 twspace = hypre_TAlloc(HYPRE_Int, hypre_NumThreads(), HYPRE_MEMORY_HOST);
5155 C_diag_i = hypre_CTAlloc(HYPRE_Int, num_rows_diag_A + 1, memory_location_C);
5156 C_offd_i = hypre_CTAlloc(HYPRE_Int, num_rows_offd_A + 1, memory_location_C);
5157 col_map_offd_C = hypre_TAlloc(HYPRE_BigInt, num_cols_offd_C, HYPRE_MEMORY_HOST);
5158
5159 /* Compute num_cols_offd_C, A2C_offd, and B2C_offd*/
5160 A2C_offd = hypre_TAlloc(HYPRE_Int, num_cols_offd_A, HYPRE_MEMORY_HOST);
5161 B2C_offd = hypre_TAlloc(HYPRE_Int, num_cols_offd_B, HYPRE_MEMORY_HOST);
5162 hypre_union2(num_cols_offd_A, col_map_offd_A,
5163 num_cols_offd_B, col_map_offd_B,
5164 &num_cols_offd_C, col_map_offd_C,
5165 A2C_offd, B2C_offd);
5166
5167 /* Set nonzero rows data of diag_C */
5168 num_rownnz_diag_C = num_rows_diag_A;
5169 if ((num_rownnz_diag_A < num_rows_diag_A) &&
5170 (num_rownnz_diag_B < num_rows_diag_B))
5171 {
5172 hypre_MergeOrderedArrays( num_rownnz_diag_A, rownnz_diag_A,
5173 num_rownnz_diag_B, rownnz_diag_B,
5174 &num_rownnz_diag_C, &rownnz_diag_C);
5175 }
5176
5177 /* Set nonzero rows data of offd_C */
5178 num_rownnz_offd_C = num_rows_offd_A;
5179 if ((num_rownnz_offd_A < num_rows_offd_A) &&
5180 (num_rownnz_offd_B < num_rows_offd_B))
5181 {
5182 hypre_MergeOrderedArrays( num_rownnz_offd_A, rownnz_offd_A,
5183 num_rownnz_offd_B, rownnz_offd_B,
5184 &num_rownnz_offd_C, &rownnz_offd_C);
5185 }
5186
5187 /* Set diag_C */
5188 #ifdef HYPRE_USING_OPENMP
5189 #pragma omp parallel
5190 #endif
5191 {
5192 HYPRE_Int ii, num_threads;
5193 HYPRE_Int size, rest, ns, ne;
5194 HYPRE_Int *marker_diag;
5195 HYPRE_Int *marker_offd;
5196
5197 ii = hypre_GetThreadNum();
5198 num_threads = hypre_NumActiveThreads();
5199
5200 /*-----------------------------------------------------------------------
5201 * Compute C_diag = alpha*A_diag + beta*B_diag
5202 *-----------------------------------------------------------------------*/
5203
5204 size = num_rownnz_diag_C/num_threads;
5205 rest = num_rownnz_diag_C - size*num_threads;
5206 if (ii < rest)
5207 {
5208 ns = ii*size+ii;
5209 ne = (ii+1)*size+ii+1;
5210 }
5211 else
5212 {
5213 ns = ii*size+rest;
5214 ne = (ii+1)*size+rest;
5215 }
5216
5217 marker_diag = hypre_TAlloc(HYPRE_Int, num_cols_diag_A, HYPRE_MEMORY_HOST);
5218 hypre_CSRMatrixAddFirstPass(ns, ne, twspace, marker_diag,
5219 NULL, NULL, A_diag, B_diag,
5220 num_rows_diag_C, num_rownnz_diag_C,
5221 num_cols_diag_C, rownnz_diag_C,
5222 memory_location_C, C_diag_i, &C_diag);
5223 hypre_CSRMatrixAddSecondPass(ns, ne, twspace, marker_diag,
5224 NULL, NULL, rownnz_diag_C,
5225 alpha, beta, A_diag, B_diag, C_diag);
5226 hypre_TFree(marker_diag, HYPRE_MEMORY_HOST);
5227
5228 /*-----------------------------------------------------------------------
5229 * Compute C_offd = alpha*A_offd + beta*B_offd
5230 *-----------------------------------------------------------------------*/
5231
5232 size = num_rownnz_offd_C/num_threads;
5233 rest = num_rownnz_offd_C - size*num_threads;
5234 if (ii < rest)
5235 {
5236 ns = ii*size+ii;
5237 ne = (ii+1)*size+ii+1;
5238 }
5239 else
5240 {
5241 ns = ii*size+rest;
5242 ne = (ii+1)*size+rest;
5243 }
5244
5245 marker_offd = hypre_TAlloc(HYPRE_Int, num_cols_offd_C, HYPRE_MEMORY_HOST);
5246 hypre_CSRMatrixAddFirstPass(ns, ne, twspace, marker_offd,
5247 A2C_offd, B2C_offd, A_offd, B_offd,
5248 num_rows_offd_C, num_rownnz_offd_C,
5249 num_cols_offd_C, rownnz_offd_C,
5250 memory_location_C, C_offd_i, &C_offd);
5251 hypre_CSRMatrixAddSecondPass(ns, ne, twspace, marker_offd,
5252 A2C_offd, B2C_offd, rownnz_offd_C,
5253 alpha, beta, A_offd, B_offd, C_offd);
5254 hypre_TFree(marker_offd, HYPRE_MEMORY_HOST);
5255 } /* end of omp parallel region */
5256
5257 /* Free memory */
5258 hypre_TFree(twspace, HYPRE_MEMORY_HOST);
5259 hypre_TFree(A2C_offd, HYPRE_MEMORY_HOST);
5260 hypre_TFree(B2C_offd, HYPRE_MEMORY_HOST);
5261
5262 /* Create ParCSRMatrix C */
5263 C = hypre_ParCSRMatrixCreate(comm,
5264 num_rows_A,
5265 num_cols_A,
5266 hypre_ParCSRMatrixRowStarts(A),
5267 hypre_ParCSRMatrixColStarts(A),
5268 num_cols_offd_C,
5269 hypre_CSRMatrixNumNonzeros(C_diag),
5270 hypre_CSRMatrixNumNonzeros(C_offd));
5271
5272 hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(C));
5273 hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(C));
5274 hypre_ParCSRMatrixDiag(C) = C_diag;
5275 hypre_ParCSRMatrixOffd(C) = C_offd;
5276 hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;
5277 hypre_ParCSRMatrixSetNumNonzeros(C);
5278 hypre_ParCSRMatrixDNumNonzeros(C) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(C);
5279
5280 /* create CommPkg of C */
5281 hypre_MatvecCommPkgCreate(C);
5282
5283 *C_ptr = C;
5284
5285 HYPRE_ANNOTATE_FUNC_END;
5286
5287 return hypre_error_flag;
5288 }
5289
5290 HYPRE_Int
hypre_ParCSRMatrixAdd(HYPRE_Complex alpha,hypre_ParCSRMatrix * A,HYPRE_Complex beta,hypre_ParCSRMatrix * B,hypre_ParCSRMatrix ** C_ptr)5291 hypre_ParCSRMatrixAdd( HYPRE_Complex alpha,
5292 hypre_ParCSRMatrix *A,
5293 HYPRE_Complex beta,
5294 hypre_ParCSRMatrix *B,
5295 hypre_ParCSRMatrix **C_ptr )
5296 {
5297 hypre_assert(hypre_ParCSRMatrixGlobalNumRows(A) == hypre_ParCSRMatrixGlobalNumRows(B));
5298 hypre_assert(hypre_ParCSRMatrixGlobalNumCols(A) == hypre_ParCSRMatrixGlobalNumCols(B));
5299 hypre_assert(hypre_ParCSRMatrixNumRows(A) == hypre_ParCSRMatrixNumRows(B));
5300 hypre_assert(hypre_ParCSRMatrixNumCols(A) == hypre_ParCSRMatrixNumCols(B));
5301
5302 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
5303 if ( hypre_GetExecPolicy2( hypre_ParCSRMatrixMemoryLocation(A),
5304 hypre_ParCSRMatrixMemoryLocation(B) ) == HYPRE_EXEC_DEVICE )
5305 {
5306 hypre_ParCSRMatrixAddDevice(alpha, A, beta, B, C_ptr);
5307 }
5308 else
5309 #endif
5310 {
5311 hypre_ParCSRMatrixAddHost(alpha, A, beta, B, C_ptr);
5312 }
5313
5314 return hypre_error_flag;
5315 }
5316
5317 /*--------------------------------------------------------------------------
5318 * hypre_ParCSRMatrixFnorm
5319 *--------------------------------------------------------------------------*/
5320
5321 HYPRE_Real
hypre_ParCSRMatrixFnorm(hypre_ParCSRMatrix * A)5322 hypre_ParCSRMatrixFnorm( hypre_ParCSRMatrix *A )
5323 {
5324 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
5325 HYPRE_Real f_diag, f_offd, local_result, result;
5326
5327 f_diag = hypre_CSRMatrixFnorm(hypre_ParCSRMatrixDiag(A));
5328 f_offd = hypre_CSRMatrixFnorm(hypre_ParCSRMatrixOffd(A));
5329 local_result = f_diag * f_diag + f_offd * f_offd;
5330
5331 hypre_MPI_Allreduce(&local_result, &result, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm);
5332
5333 return sqrt(result);
5334 }
5335
5336 /*--------------------------------------------------------------------------
5337 * hypre_ParCSRMatrixInfNorm
5338 *
5339 * Computes the infinity norm of A:
5340 *
5341 * norm = max_{i} sum_{j} |A_{ij}|
5342 *--------------------------------------------------------------------------*/
5343
5344 HYPRE_Int
hypre_ParCSRMatrixInfNorm(hypre_ParCSRMatrix * A,HYPRE_Real * norm)5345 hypre_ParCSRMatrixInfNorm( hypre_ParCSRMatrix *A,
5346 HYPRE_Real *norm )
5347 {
5348 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
5349
5350 /* diag part of A */
5351 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
5352 HYPRE_Complex *A_diag_a = hypre_CSRMatrixData(A_diag);
5353 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
5354 HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
5355
5356 /* off-diag part of A */
5357 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
5358 HYPRE_Complex *A_offd_a = hypre_CSRMatrixData(A_offd);
5359 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
5360
5361 /* Local variables */
5362 HYPRE_Int i, j;
5363 HYPRE_Real maxsum = 0.0;
5364 HYPRE_Real rowsum;
5365
5366 #ifdef _MSC_VER
5367 #ifdef HYPRE_USING_OPENMP
5368 #pragma omp parallel private(i,j,rowsum)
5369 #endif
5370 {
5371 HYPRE_Real maxsum_local;
5372
5373 maxsum_local = 0.0;
5374 #ifdef HYPRE_USING_OPENMP
5375 #pragma omp for HYPRE_SMP_SCHEDULE
5376 #endif
5377 for (i = 0; i < num_rows_diag_A; i++)
5378 {
5379 rowsum = 0.0;
5380 for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
5381 {
5382 rowsum += hypre_cabs(A_diag_a[j]);
5383 }
5384 for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
5385 {
5386 rowsum += hypre_cabs(A_offd_a[j]);
5387 }
5388
5389 maxsum_local = hypre_max(maxsum_local, rowsum);
5390 }
5391
5392 #ifdef HYPRE_USING_OPENMP
5393 #pragma omp critical
5394 #endif
5395 {
5396 maxsum = hypre_max(maxsum, maxsum_local);
5397 }
5398 }
5399 #else
5400 #ifdef HYPRE_USING_OPENMP
5401 #pragma omp parallel for private(i,j,rowsum) reduction(max:maxsum) HYPRE_SMP_SCHEDULE
5402 #endif
5403 for (i = 0; i < num_rows_diag_A; i++)
5404 {
5405 rowsum = 0.0;
5406 for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
5407 {
5408 rowsum += hypre_cabs(A_diag_a[j]);
5409 }
5410 for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
5411 {
5412 rowsum += hypre_cabs(A_offd_a[j]);
5413 }
5414
5415 maxsum = hypre_max(maxsum, rowsum);
5416 }
5417 #endif
5418
5419 hypre_MPI_Allreduce(&maxsum, norm, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, comm);
5420
5421 return hypre_error_flag;
5422 }
5423
5424 /*--------------------------------------------------------------------------
5425 * hypre_ExchangeExternalRowsInit
5426 *--------------------------------------------------------------------------*/
5427
5428 HYPRE_Int
hypre_ExchangeExternalRowsInit(hypre_CSRMatrix * B_ext,hypre_ParCSRCommPkg * comm_pkg_A,void ** request_ptr)5429 hypre_ExchangeExternalRowsInit( hypre_CSRMatrix *B_ext,
5430 hypre_ParCSRCommPkg *comm_pkg_A,
5431 void **request_ptr)
5432 {
5433 MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg_A);
5434 HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg_A);
5435 HYPRE_Int *recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg_A);
5436 HYPRE_Int *recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_A);
5437 HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg_A);
5438 HYPRE_Int *send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg_A);
5439 HYPRE_Int *send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg_A);
5440
5441 HYPRE_Int num_elmts_send = send_map_starts[num_sends];
5442 HYPRE_Int num_elmts_recv = recv_vec_starts[num_recvs];
5443
5444 HYPRE_Int *B_ext_i = B_ext ? hypre_CSRMatrixI(B_ext) : NULL;
5445 HYPRE_BigInt *B_ext_j = B_ext ? hypre_CSRMatrixBigJ(B_ext) : NULL;
5446 HYPRE_Complex *B_ext_data = B_ext ? hypre_CSRMatrixData(B_ext) : NULL;
5447 HYPRE_Int B_ext_ncols = B_ext ? hypre_CSRMatrixNumCols(B_ext) : 0;
5448 HYPRE_Int B_ext_nrows = B_ext ? hypre_CSRMatrixNumRows(B_ext) : 0;
5449 HYPRE_Int *B_ext_rownnz = hypre_CTAlloc(HYPRE_Int, B_ext_nrows, HYPRE_MEMORY_HOST);
5450
5451 hypre_assert(num_elmts_recv == B_ext_nrows);
5452
5453 /* output matrix */
5454 hypre_CSRMatrix *B_int;
5455 HYPRE_Int B_int_nrows = num_elmts_send;
5456 HYPRE_Int B_int_ncols = B_ext_ncols;
5457 HYPRE_Int *B_int_i = hypre_TAlloc(HYPRE_Int, B_int_nrows + 1, HYPRE_MEMORY_HOST);
5458 HYPRE_BigInt *B_int_j = NULL;
5459 HYPRE_Complex *B_int_data = NULL;
5460 HYPRE_Int B_int_nnz;
5461
5462 hypre_ParCSRCommHandle *comm_handle, *comm_handle_j, *comm_handle_a;
5463 hypre_ParCSRCommPkg *comm_pkg_j;
5464
5465 HYPRE_Int *jdata_recv_vec_starts;
5466 HYPRE_Int *jdata_send_map_starts;
5467
5468 HYPRE_Int i;
5469 HYPRE_Int num_procs;
5470 void **vrequest;
5471
5472 hypre_MPI_Comm_size(comm, &num_procs);
5473
5474 jdata_send_map_starts = hypre_TAlloc(HYPRE_Int, num_sends+1, HYPRE_MEMORY_HOST);
5475
5476 /*--------------------------------------------------------------------------
5477 * B_ext_rownnz contains the number of elements of row j
5478 * (to be determined through send_map_elmnts on the receiving end)
5479 *--------------------------------------------------------------------------*/
5480 for (i = 0; i < B_ext_nrows; i++)
5481 {
5482 B_ext_rownnz[i] = B_ext_i[i+1] - B_ext_i[i];
5483 }
5484
5485 /*--------------------------------------------------------------------------
5486 * initialize communication: send/recv the row nnz
5487 * (note the use of comm_pkg_A, mode 12, as in transpose matvec
5488 *--------------------------------------------------------------------------*/
5489 comm_handle = hypre_ParCSRCommHandleCreate(12, comm_pkg_A, B_ext_rownnz, B_int_i + 1);
5490
5491 jdata_recv_vec_starts = hypre_TAlloc(HYPRE_Int, num_recvs + 1, HYPRE_MEMORY_HOST);
5492 jdata_recv_vec_starts[0] = 0;
5493 for (i = 1; i <= num_recvs; i++)
5494 {
5495 jdata_recv_vec_starts[i] = B_ext_i[recv_vec_starts[i]];
5496 }
5497
5498 comm_pkg_j = hypre_CTAlloc(hypre_ParCSRCommPkg, 1, HYPRE_MEMORY_HOST);
5499 hypre_ParCSRCommPkgComm(comm_pkg_j) = comm;
5500 hypre_ParCSRCommPkgNumSends(comm_pkg_j) = num_recvs;
5501 hypre_ParCSRCommPkgNumRecvs(comm_pkg_j) = num_sends;
5502 hypre_ParCSRCommPkgSendProcs(comm_pkg_j) = recv_procs;
5503 hypre_ParCSRCommPkgRecvProcs(comm_pkg_j) = send_procs;
5504
5505 hypre_ParCSRCommHandleDestroy(comm_handle);
5506
5507 /*--------------------------------------------------------------------------
5508 * compute B_int: row nnz to row ptrs
5509 *--------------------------------------------------------------------------*/
5510 B_int_i[0] = 0;
5511 for (i = 1; i <= B_int_nrows; i++)
5512 {
5513 B_int_i[i] += B_int_i[i-1];
5514 }
5515
5516 B_int_nnz = B_int_i[B_int_nrows];
5517
5518 B_int_j = hypre_TAlloc(HYPRE_BigInt, B_int_nnz, HYPRE_MEMORY_HOST);
5519 B_int_data = hypre_TAlloc(HYPRE_Complex, B_int_nnz, HYPRE_MEMORY_HOST);
5520
5521 for (i = 0; i <= num_sends; i++)
5522 {
5523 jdata_send_map_starts[i] = B_int_i[send_map_starts[i]];
5524 }
5525
5526 /* note the order of send/recv is reversed */
5527 hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_j) = jdata_send_map_starts;
5528 hypre_ParCSRCommPkgSendMapStarts(comm_pkg_j) = jdata_recv_vec_starts;
5529
5530 /* send/recv CSR rows */
5531 comm_handle_a = hypre_ParCSRCommHandleCreate( 1, comm_pkg_j, B_ext_data, B_int_data);
5532 comm_handle_j = hypre_ParCSRCommHandleCreate(21, comm_pkg_j, B_ext_j, B_int_j);
5533
5534 /* create CSR */
5535 B_int = hypre_CSRMatrixCreate(B_int_nrows, B_int_ncols, B_int_nnz);
5536 hypre_CSRMatrixMemoryLocation(B_int) = HYPRE_MEMORY_HOST;
5537 hypre_CSRMatrixI(B_int) = B_int_i;
5538 hypre_CSRMatrixBigJ(B_int) = B_int_j;
5539 hypre_CSRMatrixData(B_int) = B_int_data;
5540
5541 /* output */
5542 vrequest = hypre_TAlloc(void *, 4, HYPRE_MEMORY_HOST);
5543 vrequest[0] = (void *) comm_handle_j;
5544 vrequest[1] = (void *) comm_handle_a;
5545 vrequest[2] = (void *) B_int;
5546 vrequest[3] = (void *) comm_pkg_j;
5547
5548 *request_ptr = (void *) vrequest;
5549
5550 hypre_TFree(B_ext_rownnz, HYPRE_MEMORY_HOST);
5551
5552 return hypre_error_flag;
5553 }
5554
5555 /*--------------------------------------------------------------------------
5556 * hypre_ExchangeExternalRowsWait
5557 *--------------------------------------------------------------------------*/
5558
5559 hypre_CSRMatrix*
hypre_ExchangeExternalRowsWait(void * vrequest)5560 hypre_ExchangeExternalRowsWait(void *vrequest)
5561 {
5562 void **request = (void **) vrequest;
5563
5564 hypre_ParCSRCommHandle *comm_handle_j = (hypre_ParCSRCommHandle *) request[0];
5565 hypre_ParCSRCommHandle *comm_handle_a = (hypre_ParCSRCommHandle *) request[1];
5566 hypre_CSRMatrix *B_int = (hypre_CSRMatrix *) request[2];
5567 hypre_ParCSRCommPkg *comm_pkg_j = (hypre_ParCSRCommPkg *) request[3];
5568
5569 /* communication done */
5570 hypre_ParCSRCommHandleDestroy(comm_handle_a);
5571 hypre_ParCSRCommHandleDestroy(comm_handle_j);
5572
5573 hypre_TFree(hypre_ParCSRCommPkgSendMapStarts(comm_pkg_j), HYPRE_MEMORY_HOST);
5574 hypre_TFree(hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_j), HYPRE_MEMORY_HOST);
5575 hypre_TFree(comm_pkg_j, HYPRE_MEMORY_HOST);
5576
5577 hypre_TFree(request, HYPRE_MEMORY_HOST);
5578
5579 return B_int;
5580 }
5581
5582
5583 /*--------------------------------------------------------------------------
5584 * hypre_ParCSRMatrixExtractSubmatrixFC
5585 *
5586 * extract submatrix A_{FF}, A_{FC}, A_{CF} or A_{CC}
5587 * char job[2] = "FF", "FC", "CF" or "CC"
5588 *--------------------------------------------------------------------------*/
5589
5590 HYPRE_Int
hypre_ParCSRMatrixExtractSubmatrixFC(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,HYPRE_BigInt * cpts_starts_in,const char * job,hypre_ParCSRMatrix ** B_ptr,HYPRE_Real strength_thresh)5591 hypre_ParCSRMatrixExtractSubmatrixFC( hypre_ParCSRMatrix *A,
5592 HYPRE_Int *CF_marker,
5593 HYPRE_BigInt *cpts_starts_in,
5594 const char *job,
5595 hypre_ParCSRMatrix **B_ptr,
5596 HYPRE_Real strength_thresh)
5597 {
5598 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
5599 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
5600 hypre_ParCSRCommHandle *comm_handle;
5601
5602 /* diag part of A */
5603 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
5604 HYPRE_Complex *A_diag_a = hypre_CSRMatrixData(A_diag);
5605 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
5606 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
5607 /* off-diag part of A */
5608 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
5609 HYPRE_Complex *A_offd_a = hypre_CSRMatrixData(A_offd);
5610 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
5611 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
5612
5613 HYPRE_Int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
5614 //HYPRE_Int *col_map_offd_A = hypre_ParCSRMatrixColMapOffd(A);
5615
5616 hypre_ParCSRMatrix *B;
5617 hypre_CSRMatrix *B_diag, *B_offd;
5618 HYPRE_Real *B_maxel_row;
5619 HYPRE_Int *B_diag_i, *B_diag_j, *B_offd_i, *B_offd_j;
5620 HYPRE_Complex *B_diag_a, *B_offd_a;
5621 HYPRE_Int num_cols_B_offd;
5622 HYPRE_BigInt *col_map_offd_B;
5623
5624 HYPRE_Int i, j, k, k1, k2;
5625 HYPRE_BigInt B_nrow_global, B_ncol_global;
5626 HYPRE_Int A_nlocal, B_nrow_local, B_ncol_local,
5627 B_nnz_diag, B_nnz_offd;
5628 HYPRE_BigInt total_global_fpts, total_global_cpts, *fpts_starts, *cpts_starts;
5629 HYPRE_Int nf_local, nc_local;
5630 HYPRE_BigInt big_nf_local;
5631 HYPRE_Int row_set, col_set;
5632 HYPRE_BigInt *B_row_starts, *B_col_starts, B_first_col;
5633 HYPRE_Int my_id, num_procs;
5634 HYPRE_Int *sub_idx_diag;
5635 HYPRE_BigInt *sub_idx_offd;
5636 HYPRE_Int num_sends;
5637 HYPRE_BigInt *send_buf_data;
5638
5639 /* MPI size and rank*/
5640 hypre_MPI_Comm_size(comm, &num_procs);
5641 hypre_MPI_Comm_rank(comm, &my_id);
5642
5643 row_set = job[0] == 'F' ? -1 : 1;
5644 col_set = job[1] == 'F' ? -1 : 1;
5645
5646 A_nlocal = hypre_CSRMatrixNumRows(A_diag);
5647
5648 /*-------------- global number of C points and local C points
5649 * assuming cpts_starts is given */
5650 if (row_set == 1 || col_set == 1)
5651 {
5652 /* copy cpts_starts first */
5653 HYPRE_Int len;
5654 len = 2;
5655 cpts_starts = hypre_TAlloc(HYPRE_BigInt, len, HYPRE_MEMORY_HOST);
5656 hypre_TMemcpy(cpts_starts, cpts_starts_in, HYPRE_BigInt, len,
5657 HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST);
5658
5659 if (my_id == (num_procs -1))
5660 {
5661 total_global_cpts = cpts_starts[1];
5662 }
5663 hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
5664 nc_local = (HYPRE_Int)(cpts_starts[1] - cpts_starts[0]);
5665 }
5666
5667 /*-------------- global number of F points, local F points, and F starts */
5668 if (row_set == -1 || col_set == -1)
5669 {
5670 nf_local = 0;
5671 for (i = 0; i < A_nlocal; i++)
5672 {
5673 if (CF_marker[i] < 0)
5674 {
5675 nf_local++;
5676 }
5677 }
5678 big_nf_local = (HYPRE_BigInt) nf_local;
5679 fpts_starts = hypre_TAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
5680 hypre_MPI_Scan(&big_nf_local, fpts_starts+1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
5681 fpts_starts[0] = fpts_starts[1] - nf_local;
5682 if (my_id == num_procs - 1)
5683 {
5684 total_global_fpts = fpts_starts[1];
5685 }
5686 hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
5687 }
5688
5689 if (row_set == -1 && col_set == -1)
5690 {
5691 /* FF */
5692 B_nrow_local = nf_local;
5693 B_ncol_local = nf_local;
5694 B_nrow_global = total_global_fpts;
5695 B_ncol_global = total_global_fpts;
5696
5697 B_row_starts = B_col_starts = fpts_starts;
5698 }
5699 else if (row_set == -1 && col_set == 1)
5700 {
5701 /* FC */
5702 B_nrow_local = nf_local;
5703 B_ncol_local = nc_local;
5704 B_nrow_global = total_global_fpts;
5705 B_ncol_global = total_global_cpts;
5706
5707 B_row_starts = fpts_starts;
5708 B_col_starts = cpts_starts;
5709 }
5710 else if (row_set == 1 && col_set == -1)
5711 {
5712 /* CF */
5713 B_nrow_local = nc_local;
5714 B_ncol_local = nf_local;
5715 B_nrow_global = total_global_cpts;
5716 B_ncol_global = total_global_fpts;
5717
5718 B_row_starts = cpts_starts;
5719 B_col_starts = fpts_starts;
5720 }
5721 else
5722 {
5723 /* CC */
5724 B_nrow_local = nc_local;
5725 B_ncol_local = nc_local;
5726 B_nrow_global = total_global_cpts;
5727 B_ncol_global = total_global_cpts;
5728
5729 B_row_starts = B_col_starts = cpts_starts;
5730 }
5731
5732 /* global index of my first col */
5733 B_first_col = B_col_starts[0];
5734
5735 /* sub_idx_diag: [local] mapping from F+C to F/C, if not selected, be -1 */
5736 sub_idx_diag = hypre_TAlloc(HYPRE_Int, A_nlocal, HYPRE_MEMORY_HOST);
5737 for (i = 0, k = 0; i < A_nlocal; i++)
5738 {
5739 HYPRE_Int CF_i = CF_marker[i] > 0 ? 1 : -1;
5740 if (CF_i == col_set)
5741 {
5742 sub_idx_diag[i] = k++;
5743 }
5744 else
5745 {
5746 sub_idx_diag[i] = -1;
5747 }
5748 }
5749
5750 hypre_assert(k == B_ncol_local);
5751
5752 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
5753 send_buf_data = hypre_TAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends),
5754 HYPRE_MEMORY_HOST);
5755 k = 0;
5756 for (i = 0; i < num_sends; i++)
5757 {
5758 /* start pos of elements sent to send_proc[i] */
5759 HYPRE_Int si = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
5760 HYPRE_Int ei = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1);
5761 /* loop through all elems to send_proc[i] */
5762 for (j = si; j < ei; j++)
5763 {
5764 /* j1: local idx */
5765 HYPRE_BigInt j1 = sub_idx_diag[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)];
5766 if (j1 != -1)
5767 {
5768 /* adjust j1 to B global idx */
5769 j1 += B_first_col;
5770 }
5771 send_buf_data[k++] = j1;
5772 }
5773 }
5774
5775 hypre_assert(k == hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
5776
5777 /* recv buffer */
5778 sub_idx_offd = hypre_TAlloc(HYPRE_BigInt, num_cols_A_offd, HYPRE_MEMORY_HOST);
5779 /* create a handle to start communication. 11: for integer */
5780 comm_handle = hypre_ParCSRCommHandleCreate(21, comm_pkg, send_buf_data, sub_idx_offd);
5781 /* destroy the handle to finish communication */
5782 hypre_ParCSRCommHandleDestroy(comm_handle);
5783
5784 for (i = 0, num_cols_B_offd = 0; i < num_cols_A_offd; i++)
5785 {
5786 if (sub_idx_offd[i] != -1)
5787 {
5788 num_cols_B_offd ++;
5789 }
5790 }
5791 col_map_offd_B = hypre_TAlloc(HYPRE_BigInt, num_cols_B_offd, HYPRE_MEMORY_HOST);
5792 for (i = 0, k = 0; i < num_cols_A_offd; i++)
5793 {
5794 if (sub_idx_offd[i] != -1)
5795 {
5796 col_map_offd_B[k] = sub_idx_offd[i];
5797 sub_idx_offd[i] = k++;
5798 }
5799 }
5800
5801 hypre_assert(k == num_cols_B_offd);
5802
5803 /* count nnz and set ia */
5804 B_nnz_diag = B_nnz_offd = 0;
5805 B_maxel_row = hypre_TAlloc(HYPRE_Real, B_nrow_local, HYPRE_MEMORY_HOST);
5806 B_diag_i = hypre_TAlloc(HYPRE_Int, B_nrow_local+1, HYPRE_MEMORY_HOST);
5807 B_offd_i = hypre_TAlloc(HYPRE_Int, B_nrow_local+1, HYPRE_MEMORY_HOST);
5808 B_diag_i[0] = B_offd_i[0] = 0;
5809
5810 for (i = 0, k = 0; i < A_nlocal; i++)
5811 {
5812 HYPRE_Int CF_i = CF_marker[i] > 0 ? 1 : -1;
5813 if (CF_i != row_set)
5814 {
5815 continue;
5816 }
5817 k++;
5818
5819 // Get max abs-value element of this row
5820 HYPRE_Real temp_max = 0;
5821 if (strength_thresh > 0) {
5822 for (j = A_diag_i[i]+1; j < A_diag_i[i+1]; j++)
5823 {
5824 if (hypre_cabs(A_diag_a[j]) > temp_max)
5825 {
5826 temp_max = hypre_cabs(A_diag_a[j]);
5827 }
5828 }
5829 for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
5830 {
5831 if (hypre_cabs(A_offd_a[j]) > temp_max)
5832 {
5833 temp_max = hypre_cabs(A_offd_a[j]);
5834 }
5835 }
5836 }
5837 B_maxel_row[k-1] = temp_max;
5838
5839 // add one for diagonal element
5840 j = A_diag_i[i];
5841 if (sub_idx_diag[A_diag_j[j]] != -1)
5842 {
5843 B_nnz_diag++;
5844 }
5845
5846 // Count nnzs larger than tolerance times max row element
5847 for (j = A_diag_i[i]+1; j < A_diag_i[i+1]; j++)
5848 {
5849 if ( (sub_idx_diag[A_diag_j[j]] != -1) &&
5850 (hypre_cabs(A_diag_a[j]) > (strength_thresh*temp_max)) )
5851 {
5852 B_nnz_diag++;
5853 }
5854 }
5855 for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
5856 {
5857 if ( (sub_idx_offd[A_offd_j[j]] != -1) &&
5858 (hypre_cabs(A_offd_a[j]) > (strength_thresh*temp_max)) )
5859 {
5860 B_nnz_offd++;
5861 }
5862 }
5863 B_diag_i[k] = B_nnz_diag;
5864 B_offd_i[k] = B_nnz_offd;
5865 }
5866
5867 hypre_assert(k == B_nrow_local);
5868
5869 B_diag_j = hypre_TAlloc(HYPRE_Int, B_nnz_diag, HYPRE_MEMORY_HOST);
5870 B_diag_a = hypre_TAlloc(HYPRE_Complex, B_nnz_diag, HYPRE_MEMORY_HOST);
5871 B_offd_j = hypre_TAlloc(HYPRE_Int, B_nnz_offd, HYPRE_MEMORY_HOST);
5872 B_offd_a = hypre_TAlloc(HYPRE_Complex, B_nnz_offd, HYPRE_MEMORY_HOST);
5873
5874 for (i = 0, k=0, k1 = 0, k2 = 0; i < A_nlocal; i++)
5875 {
5876 HYPRE_Int CF_i = CF_marker[i] > 0 ? 1 : -1;
5877 if (CF_i != row_set)
5878 {
5879 continue;
5880 }
5881 HYPRE_Real maxel = B_maxel_row[k];
5882 k++;
5883
5884 for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
5885 {
5886 HYPRE_Int j1 = sub_idx_diag[A_diag_j[j]];
5887 if ( (j1 != -1) && ( (hypre_cabs(A_diag_a[j]) > (strength_thresh*maxel)) || j==A_diag_i[i] ) )
5888 {
5889 B_diag_j[k1] = j1;
5890 B_diag_a[k1] = A_diag_a[j];
5891 k1++;
5892 }
5893 }
5894 for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
5895 {
5896 HYPRE_Int j1 = sub_idx_offd[A_offd_j[j]];
5897 if ((j1 != -1) && (hypre_cabs(A_offd_a[j]) > (strength_thresh*maxel)))
5898 {
5899 hypre_assert(j1 >= 0 && j1 < num_cols_B_offd);
5900 B_offd_j[k2] = j1;
5901 B_offd_a[k2] = A_offd_a[j];
5902 k2++;
5903 }
5904 }
5905 }
5906
5907 hypre_assert(k1 == B_nnz_diag && k2 == B_nnz_offd);
5908
5909 /* ready to create B = A(rowset, colset) */
5910 B = hypre_ParCSRMatrixCreate(comm,
5911 B_nrow_global,
5912 B_ncol_global,
5913 B_row_starts,
5914 B_col_starts,
5915 num_cols_B_offd,
5916 B_nnz_diag,
5917 B_nnz_offd);
5918
5919 B_diag = hypre_ParCSRMatrixDiag(B);
5920 hypre_CSRMatrixMemoryLocation(B_diag) = HYPRE_MEMORY_HOST;
5921 hypre_CSRMatrixData(B_diag) = B_diag_a;
5922 hypre_CSRMatrixI(B_diag) = B_diag_i;
5923 hypre_CSRMatrixJ(B_diag) = B_diag_j;
5924
5925 B_offd = hypre_ParCSRMatrixOffd(B);
5926 hypre_CSRMatrixMemoryLocation(B_offd) = HYPRE_MEMORY_HOST;
5927 hypre_CSRMatrixData(B_offd) = B_offd_a;
5928 hypre_CSRMatrixI(B_offd) = B_offd_i;
5929 hypre_CSRMatrixJ(B_offd) = B_offd_j;
5930
5931 hypre_ParCSRMatrixColMapOffd(B) = col_map_offd_B;
5932
5933 hypre_ParCSRMatrixSetNumNonzeros(B);
5934 hypre_ParCSRMatrixDNumNonzeros(B) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(B);
5935
5936 hypre_MatvecCommPkgCreate(B);
5937
5938 *B_ptr = B;
5939
5940 if (B_row_starts == B_col_starts)
5941 {
5942 hypre_TFree(B_row_starts, HYPRE_MEMORY_HOST);
5943 }
5944 else
5945 {
5946 hypre_TFree(B_row_starts, HYPRE_MEMORY_HOST);
5947 hypre_TFree(B_col_starts, HYPRE_MEMORY_HOST);
5948 }
5949 hypre_TFree(B_maxel_row, HYPRE_MEMORY_HOST);
5950 hypre_TFree(send_buf_data, HYPRE_MEMORY_HOST);
5951 hypre_TFree(sub_idx_diag, HYPRE_MEMORY_HOST);
5952 hypre_TFree(sub_idx_offd, HYPRE_MEMORY_HOST);
5953
5954 return hypre_error_flag;
5955 }
5956
5957 /* drop the entries that are not on the diagonal and smaller than:
5958 * type 0: tol (TODO)
5959 * type 1: tol*(1-norm of row)
5960 * type 2: tol*(2-norm of row)
5961 * type -1: tol*(infinity norm of row) */
5962 HYPRE_Int
hypre_ParCSRMatrixDropSmallEntriesHost(hypre_ParCSRMatrix * A,HYPRE_Real tol,HYPRE_Int type)5963 hypre_ParCSRMatrixDropSmallEntriesHost( hypre_ParCSRMatrix *A,
5964 HYPRE_Real tol,
5965 HYPRE_Int type)
5966 {
5967 HYPRE_Int i, j, k, nnz_diag, nnz_offd, A_diag_i_i, A_offd_i_i;
5968
5969 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
5970 /* diag part of A */
5971 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
5972 HYPRE_Real *A_diag_a = hypre_CSRMatrixData(A_diag);
5973 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
5974 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
5975 /* off-diag part of A */
5976 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
5977 HYPRE_Real *A_offd_a = hypre_CSRMatrixData(A_offd);
5978 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
5979 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
5980
5981 HYPRE_Int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
5982 HYPRE_BigInt *col_map_offd_A = hypre_ParCSRMatrixColMapOffd(A);
5983 HYPRE_Int *marker_offd = NULL;
5984
5985 HYPRE_BigInt first_row = hypre_ParCSRMatrixFirstRowIndex(A);
5986 HYPRE_Int nrow_local = hypre_CSRMatrixNumRows(A_diag);
5987 HYPRE_Int my_id, num_procs;
5988 /* MPI size and rank*/
5989 hypre_MPI_Comm_size(comm, &num_procs);
5990 hypre_MPI_Comm_rank(comm, &my_id);
5991
5992 marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
5993
5994 nnz_diag = nnz_offd = A_diag_i_i = A_offd_i_i = 0;
5995 for (i = 0; i < nrow_local; i++)
5996 {
5997 /* compute row norm */
5998 HYPRE_Real row_nrm = 0.0;
5999 for (j = A_diag_i_i; j < A_diag_i[i+1]; j++)
6000 {
6001 HYPRE_Complex v = A_diag_a[j];
6002 if (type == 1)
6003 {
6004 row_nrm += fabs(v);
6005 }
6006 else if (type == 2)
6007 {
6008 row_nrm += v*v;
6009 }
6010 else
6011 {
6012 row_nrm = hypre_max(row_nrm, fabs(v));
6013 }
6014 }
6015 if (num_procs > 1)
6016 {
6017 for (j = A_offd_i_i; j < A_offd_i[i+1]; j++)
6018 {
6019 HYPRE_Complex v = A_offd_a[j];
6020 if (type == 1)
6021 {
6022 row_nrm += fabs(v);
6023 }
6024 else if (type == 2)
6025 {
6026 row_nrm += v*v;
6027 }
6028 else
6029 {
6030 row_nrm = hypre_max(row_nrm, fabs(v));
6031 }
6032 }
6033 }
6034
6035 if (type == 2)
6036 {
6037 row_nrm = sqrt(row_nrm);
6038 }
6039
6040 /* drop small entries based on tol and row norm */
6041 for (j = A_diag_i_i; j < A_diag_i[i+1]; j++)
6042 {
6043 HYPRE_Int col = A_diag_j[j];
6044 HYPRE_Complex val = A_diag_a[j];
6045 if (i == col || fabs(val) >= tol * row_nrm)
6046 {
6047 A_diag_j[nnz_diag] = col;
6048 A_diag_a[nnz_diag] = val;
6049 nnz_diag ++;
6050 }
6051 }
6052 if (num_procs > 1)
6053 {
6054 for (j = A_offd_i_i; j < A_offd_i[i+1]; j++)
6055 {
6056 HYPRE_Int col = A_offd_j[j];
6057 HYPRE_Complex val = A_offd_a[j];
6058 /* in normal cases: diagonal entry should not
6059 * appear in A_offd (but this can still be possible) */
6060 if (i + first_row == col_map_offd_A[col] || fabs(val) >= tol * row_nrm)
6061 {
6062 if (0 == marker_offd[col])
6063 {
6064 marker_offd[col] = 1;
6065 }
6066 A_offd_j[nnz_offd] = col;
6067 A_offd_a[nnz_offd] = val;
6068 nnz_offd ++;
6069 }
6070 }
6071 }
6072 A_diag_i_i = A_diag_i[i+1];
6073 A_offd_i_i = A_offd_i[i+1];
6074 A_diag_i[i+1] = nnz_diag;
6075 A_offd_i[i+1] = nnz_offd;
6076 }
6077
6078 hypre_CSRMatrixNumNonzeros(A_diag) = nnz_diag;
6079 hypre_CSRMatrixNumNonzeros(A_offd) = nnz_offd;
6080 hypre_ParCSRMatrixSetNumNonzeros(A);
6081 hypre_ParCSRMatrixDNumNonzeros(A) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(A);
6082
6083 for (i = 0, k = 0; i < num_cols_A_offd; i++)
6084 {
6085 if (marker_offd[i])
6086 {
6087 col_map_offd_A[k] = col_map_offd_A[i];
6088 marker_offd[i] = k++;
6089 }
6090 }
6091 /* num_cols_A_offd = k; */
6092 hypre_CSRMatrixNumCols(A_offd) = k;
6093 for (i = 0; i < nnz_offd; i++)
6094 {
6095 A_offd_j[i] = marker_offd[A_offd_j[i]];
6096 }
6097
6098 if ( hypre_ParCSRMatrixCommPkg(A) )
6099 {
6100 hypre_MatvecCommPkgDestroy( hypre_ParCSRMatrixCommPkg(A) );
6101 }
6102 hypre_MatvecCommPkgCreate(A);
6103
6104 hypre_TFree(marker_offd, HYPRE_MEMORY_HOST);
6105
6106 return hypre_error_flag;
6107 }
6108
6109 /* drop the entries that are not on the diagonal and smaller than
6110 * type 0: tol
6111 * type 1: tol*(1-norm of row)
6112 * type 2: tol*(2-norm of row)
6113 * type -1: tol*(infinity norm of row)
6114 * NOTE: some type options above unavailable on either host or device */
6115 HYPRE_Int
hypre_ParCSRMatrixDropSmallEntries(hypre_ParCSRMatrix * A,HYPRE_Real tol,HYPRE_Int type)6116 hypre_ParCSRMatrixDropSmallEntries( hypre_ParCSRMatrix *A,
6117 HYPRE_Real tol,
6118 HYPRE_Int type)
6119 {
6120 if (tol <= 0.0)
6121 {
6122 return hypre_error_flag;
6123 }
6124
6125 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
6126 hypre_GpuProfilingPushRange("ParCSRMatrixDropSmallEntries");
6127 #endif
6128
6129 HYPRE_Int ierr = 0;
6130
6131 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
6132 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
6133
6134 if (exec == HYPRE_EXEC_DEVICE)
6135 {
6136 ierr = hypre_ParCSRMatrixDropSmallEntriesDevice(A, tol, type);
6137 }
6138 else
6139 #endif
6140 {
6141 ierr = hypre_ParCSRMatrixDropSmallEntriesHost(A, tol, type);
6142 }
6143
6144 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
6145 hypre_GpuProfilingPopRange();
6146 #endif
6147
6148 return ierr;
6149 }
6150
6151 /* Scale ParCSR matrix A = scalar * A
6152 * A: the target CSR matrix
6153 * scalar: real number
6154 */
6155 HYPRE_Int
hypre_ParCSRMatrixScale(hypre_ParCSRMatrix * A,HYPRE_Complex scalar)6156 hypre_ParCSRMatrixScale(hypre_ParCSRMatrix *A,
6157 HYPRE_Complex scalar)
6158 {
6159 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
6160 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
6161
6162 hypre_CSRMatrixScale(A_diag, scalar);
6163 hypre_CSRMatrixScale(A_offd, scalar);
6164
6165 return hypre_error_flag;
6166 }
6167
6168 /*--------------------------------------------------------------------------
6169 * hypre_ParCSRMatrixReorder:
6170 *
6171 * Reorders the column and data arrays of a the diagonal component of a square
6172 * ParCSR matrix, such that the first entry in each row is the diagonal one.
6173 *--------------------------------------------------------------------------*/
6174
6175 HYPRE_Int
hypre_ParCSRMatrixReorder(hypre_ParCSRMatrix * A)6176 hypre_ParCSRMatrixReorder(hypre_ParCSRMatrix *A)
6177 {
6178 HYPRE_BigInt nrows_A = hypre_ParCSRMatrixGlobalNumRows(A);
6179 HYPRE_BigInt ncols_A = hypre_ParCSRMatrixGlobalNumCols(A);
6180 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
6181
6182 if (nrows_A != ncols_A)
6183 {
6184 hypre_error_w_msg(HYPRE_ERROR_GENERIC," Error! Matrix should be square!\n");
6185 return hypre_error_flag;
6186 }
6187
6188 hypre_CSRMatrixReorder(A_diag);
6189
6190 return hypre_error_flag;
6191 }
6192