1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 #include <HYPRE_config.h>
9 #include "_hypre_utilities.h"
10 #include "par_csr_block_matrix.h"
11 #include "../parcsr_mv/_hypre_parcsr_mv.h"
12
13 /*--------------------------------------------------------------------------
14 * used in RAP function - block size must be an argument because RAP_int may
15 * by NULL
16 *--------------------------------------------------------------------------*/
17
18 hypre_CSRBlockMatrix *
hypre_ExchangeRAPBlockData(hypre_CSRBlockMatrix * RAP_int,hypre_ParCSRCommPkg * comm_pkg_RT,HYPRE_Int block_size)19 hypre_ExchangeRAPBlockData(hypre_CSRBlockMatrix *RAP_int,
20 hypre_ParCSRCommPkg *comm_pkg_RT, HYPRE_Int block_size)
21 {
22 HYPRE_Int *RAP_int_i;
23 HYPRE_BigInt *RAP_int_j = NULL;
24 HYPRE_Complex *RAP_int_data = NULL;
25 HYPRE_Int num_cols = 0;
26
27 MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg_RT);
28 HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg_RT);
29 HYPRE_Int *recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg_RT);
30 HYPRE_Int *recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_RT);
31 HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg_RT);
32 HYPRE_Int *send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg_RT);
33 HYPRE_Int *send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg_RT);
34
35 /* HYPRE_Int block_size = hypre_CSRBlockMatrixBlockSize(RAP_int); */
36
37 hypre_CSRBlockMatrix *RAP_ext;
38
39 HYPRE_Int *RAP_ext_i;
40 HYPRE_BigInt *RAP_ext_j = NULL;
41 HYPRE_Complex *RAP_ext_data = NULL;
42
43 hypre_ParCSRCommHandle *comm_handle = NULL;
44 hypre_ParCSRCommPkg *tmp_comm_pkg;
45
46 HYPRE_Int *jdata_recv_vec_starts;
47 HYPRE_Int *jdata_send_map_starts;
48
49 HYPRE_Int num_rows;
50 HYPRE_Int num_nonzeros;
51 HYPRE_Int i, j, bnnz;
52 HYPRE_Int num_procs, my_id;
53
54 hypre_MPI_Comm_size(comm,&num_procs);
55 hypre_MPI_Comm_rank(comm,&my_id);
56
57 bnnz = block_size * block_size;
58
59 RAP_ext_i = hypre_CTAlloc(HYPRE_Int, send_map_starts[num_sends]+1, HYPRE_MEMORY_HOST);
60 jdata_recv_vec_starts = hypre_CTAlloc(HYPRE_Int, num_recvs+1, HYPRE_MEMORY_HOST);
61 jdata_send_map_starts = hypre_CTAlloc(HYPRE_Int, num_sends+1, HYPRE_MEMORY_HOST);
62
63 /*--------------------------------------------------------------------------
64 * recompute RAP_int_i so that RAP_int_i[j+1] contains the number of
65 * elements of row j (to be determined through send_map_elmnts on the
66 * receiving end)
67 *--------------------------------------------------------------------------*/
68
69 if (num_recvs)
70 {
71 RAP_int_i = hypre_CSRBlockMatrixI(RAP_int);
72 RAP_int_j = hypre_CSRBlockMatrixBigJ(RAP_int);
73 RAP_int_data = hypre_CSRBlockMatrixData(RAP_int);
74 num_cols = hypre_CSRBlockMatrixNumCols(RAP_int);
75 }
76 jdata_recv_vec_starts[0] = 0;
77 for (i=0; i < num_recvs; i++)
78 jdata_recv_vec_starts[i+1] = RAP_int_i[recv_vec_starts[i+1]];
79
80 for (i=num_recvs; i > 0; i--)
81 for (j = recv_vec_starts[i]; j > recv_vec_starts[i-1]; j--)
82 RAP_int_i[j] -= RAP_int_i[j-1];
83
84 /*--------------------------------------------------------------------------
85 * initialize communication
86 *--------------------------------------------------------------------------*/
87
88 if (num_recvs && num_sends)
89 comm_handle = hypre_ParCSRCommHandleCreate(12,comm_pkg_RT,
90 &RAP_int_i[1], &RAP_ext_i[1]);
91 else if (num_recvs)
92 comm_handle = hypre_ParCSRCommHandleCreate(12,comm_pkg_RT,
93 &RAP_int_i[1], NULL);
94 else if (num_sends)
95 comm_handle = hypre_ParCSRCommHandleCreate(12,comm_pkg_RT,
96 NULL, &RAP_ext_i[1]);
97
98 tmp_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg, 1, HYPRE_MEMORY_HOST);
99 hypre_ParCSRCommPkgComm(tmp_comm_pkg) = comm;
100 hypre_ParCSRCommPkgNumSends(tmp_comm_pkg) = num_recvs;
101 hypre_ParCSRCommPkgNumRecvs(tmp_comm_pkg) = num_sends;
102 hypre_ParCSRCommPkgSendProcs(tmp_comm_pkg) = recv_procs;
103 hypre_ParCSRCommPkgRecvProcs(tmp_comm_pkg) = send_procs;
104
105 hypre_ParCSRCommHandleDestroy(comm_handle);
106 comm_handle = NULL;
107
108 /*--------------------------------------------------------------------------
109 * compute num_nonzeros for RAP_ext
110 *--------------------------------------------------------------------------*/
111
112 for (i=0; i < num_sends; i++)
113 for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
114 RAP_ext_i[j+1] += RAP_ext_i[j];
115
116 num_rows = send_map_starts[num_sends];
117 num_nonzeros = RAP_ext_i[num_rows];
118 if (num_nonzeros)
119 {
120 RAP_ext_j = hypre_CTAlloc(HYPRE_BigInt, num_nonzeros, HYPRE_MEMORY_HOST);
121 RAP_ext_data = hypre_CTAlloc(HYPRE_Complex, num_nonzeros*bnnz, HYPRE_MEMORY_HOST);
122 }
123
124 for (i=0; i < num_sends+1; i++)
125 {
126 jdata_send_map_starts[i] = RAP_ext_i[send_map_starts[i]];
127 }
128
129 hypre_ParCSRCommPkgRecvVecStarts(tmp_comm_pkg) = jdata_send_map_starts;
130 hypre_ParCSRCommPkgSendMapStarts(tmp_comm_pkg) = jdata_recv_vec_starts;
131
132 comm_handle = hypre_ParCSRBlockCommHandleCreate(1, bnnz, tmp_comm_pkg,
133 (void *) RAP_int_data, (void *) RAP_ext_data);
134 hypre_ParCSRBlockCommHandleDestroy(comm_handle);
135 comm_handle = NULL;
136
137 comm_handle = hypre_ParCSRCommHandleCreate(21,tmp_comm_pkg,RAP_int_j,
138 RAP_ext_j);
139 RAP_ext = hypre_CSRBlockMatrixCreate(block_size,num_rows,num_cols,
140 num_nonzeros);
141
142 hypre_CSRBlockMatrixI(RAP_ext) = RAP_ext_i;
143 if (num_nonzeros)
144 {
145 hypre_CSRBlockMatrixBigJ(RAP_ext) = RAP_ext_j;
146 hypre_CSRBlockMatrixData(RAP_ext) = RAP_ext_data;
147 }
148
149 hypre_ParCSRCommHandleDestroy(comm_handle);
150 comm_handle = NULL;
151
152 hypre_TFree(jdata_recv_vec_starts, HYPRE_MEMORY_HOST);
153 hypre_TFree(jdata_send_map_starts, HYPRE_MEMORY_HOST);
154 hypre_TFree(tmp_comm_pkg, HYPRE_MEMORY_HOST);
155
156 return RAP_ext;
157 }
158
159 /*--------------------------------------------------------------------------
160 * hypre_BoomerAMGBuildCoarseOperator
161 *--------------------------------------------------------------------------*/
162
163 HYPRE_Int
hypre_ParCSRBlockMatrixRAP(hypre_ParCSRBlockMatrix * RT,hypre_ParCSRBlockMatrix * A,hypre_ParCSRBlockMatrix * P,hypre_ParCSRBlockMatrix ** RAP_ptr)164 hypre_ParCSRBlockMatrixRAP(hypre_ParCSRBlockMatrix *RT,
165 hypre_ParCSRBlockMatrix *A,
166 hypre_ParCSRBlockMatrix *P,
167 hypre_ParCSRBlockMatrix **RAP_ptr )
168
169 {
170 MPI_Comm comm = hypre_ParCSRBlockMatrixComm(A);
171
172 hypre_CSRBlockMatrix *RT_diag = hypre_ParCSRBlockMatrixDiag(RT);
173 hypre_CSRBlockMatrix *RT_offd = hypre_ParCSRBlockMatrixOffd(RT);
174 HYPRE_Int num_cols_offd_RT = hypre_CSRBlockMatrixNumCols(RT_offd);
175 HYPRE_Int num_rows_offd_RT = hypre_CSRBlockMatrixNumRows(RT_offd);
176 hypre_ParCSRCommPkg *comm_pkg_RT = hypre_ParCSRBlockMatrixCommPkg(RT);
177 HYPRE_Int num_recvs_RT = 0;
178 HYPRE_Int num_sends_RT = 0;
179 HYPRE_Int *send_map_starts_RT;
180 HYPRE_Int *send_map_elmts_RT;
181
182 hypre_CSRBlockMatrix *A_diag = hypre_ParCSRBlockMatrixDiag(A);
183
184 HYPRE_Complex *A_diag_data = hypre_CSRBlockMatrixData(A_diag);
185 HYPRE_Int *A_diag_i = hypre_CSRBlockMatrixI(A_diag);
186 HYPRE_Int *A_diag_j = hypre_CSRBlockMatrixJ(A_diag);
187 HYPRE_Int block_size = hypre_CSRBlockMatrixBlockSize(A_diag);
188
189 hypre_CSRBlockMatrix *A_offd = hypre_ParCSRBlockMatrixOffd(A);
190
191 HYPRE_Complex *A_offd_data = hypre_CSRBlockMatrixData(A_offd);
192 HYPRE_Int *A_offd_i = hypre_CSRBlockMatrixI(A_offd);
193 HYPRE_Int *A_offd_j = hypre_CSRBlockMatrixJ(A_offd);
194
195 HYPRE_Int num_cols_diag_A = hypre_CSRBlockMatrixNumCols(A_diag);
196 HYPRE_Int num_cols_offd_A = hypre_CSRBlockMatrixNumCols(A_offd);
197
198 hypre_CSRBlockMatrix *P_diag = hypre_ParCSRBlockMatrixDiag(P);
199
200 HYPRE_Complex *P_diag_data = hypre_CSRBlockMatrixData(P_diag);
201 HYPRE_Int *P_diag_i = hypre_CSRBlockMatrixI(P_diag);
202 HYPRE_Int *P_diag_j = hypre_CSRBlockMatrixJ(P_diag);
203
204 hypre_CSRBlockMatrix *P_offd = hypre_ParCSRBlockMatrixOffd(P);
205 HYPRE_BigInt *col_map_offd_P = hypre_ParCSRBlockMatrixColMapOffd(P);
206
207 HYPRE_Complex *P_offd_data = hypre_CSRBlockMatrixData(P_offd);
208 HYPRE_Int *P_offd_i = hypre_CSRBlockMatrixI(P_offd);
209 HYPRE_Int *P_offd_j = hypre_CSRBlockMatrixJ(P_offd);
210
211 HYPRE_BigInt first_col_diag_P = hypre_ParCSRBlockMatrixFirstColDiag(P);
212 HYPRE_BigInt last_col_diag_P;
213 HYPRE_Int num_cols_diag_P = hypre_CSRBlockMatrixNumCols(P_diag);
214 HYPRE_Int num_cols_offd_P = hypre_CSRBlockMatrixNumCols(P_offd);
215 HYPRE_BigInt *coarse_partitioning = hypre_ParCSRBlockMatrixColStarts(P);
216 HYPRE_BigInt *row_starts, *col_starts;
217
218 hypre_ParCSRBlockMatrix *RAP;
219 HYPRE_BigInt *col_map_offd_RAP;
220
221 hypre_CSRBlockMatrix *RAP_int = NULL;
222
223 HYPRE_Complex *RAP_int_data;
224 HYPRE_Int *RAP_int_i;
225 HYPRE_BigInt *RAP_int_j;
226
227 hypre_CSRBlockMatrix *RAP_ext;
228
229 HYPRE_Complex *RAP_ext_data;
230 HYPRE_Int *RAP_ext_i;
231 HYPRE_BigInt *RAP_ext_j;
232
233 hypre_CSRBlockMatrix *RAP_diag;
234
235 HYPRE_Complex *RAP_diag_data;
236 HYPRE_Int *RAP_diag_i;
237 HYPRE_Int *RAP_diag_j;
238
239 hypre_CSRBlockMatrix *RAP_offd;
240
241 HYPRE_Complex *RAP_offd_data;
242 HYPRE_Int *RAP_offd_i;
243 HYPRE_Int *RAP_offd_j;
244
245 HYPRE_Int RAP_size;
246 HYPRE_Int RAP_ext_size;
247 HYPRE_Int RAP_diag_size;
248 HYPRE_Int RAP_offd_size;
249 HYPRE_Int P_ext_diag_size;
250 HYPRE_Int P_ext_offd_size;
251 HYPRE_Int first_col_diag_RAP;
252 HYPRE_Int last_col_diag_RAP;
253 HYPRE_Int num_cols_offd_RAP = 0;
254
255 hypre_CSRBlockMatrix *R_diag;
256
257 HYPRE_Complex *R_diag_data;
258 HYPRE_Int *R_diag_i;
259 HYPRE_Int *R_diag_j;
260
261 hypre_CSRBlockMatrix *R_offd;
262
263 HYPRE_Complex *R_offd_data;
264 HYPRE_Int *R_offd_i;
265 HYPRE_Int *R_offd_j;
266
267 hypre_CSRBlockMatrix *Ps_ext;
268
269 HYPRE_Complex *Ps_ext_data;
270 HYPRE_Int *Ps_ext_i;
271 HYPRE_BigInt *Ps_ext_j;
272
273 HYPRE_Complex *P_ext_diag_data;
274 HYPRE_Int *P_ext_diag_i;
275 HYPRE_Int *P_ext_diag_j;
276
277 HYPRE_Complex *P_ext_offd_data;
278 HYPRE_Int *P_ext_offd_i;
279 HYPRE_Int *P_ext_offd_j;
280
281 HYPRE_BigInt *col_map_offd_Pext;
282 HYPRE_Int *map_P_to_Pext;
283 HYPRE_Int *map_P_to_RAP;
284 HYPRE_Int *map_Pext_to_RAP;
285
286 HYPRE_Int *P_marker;
287 HYPRE_Int **P_mark_array;
288 HYPRE_Int **A_mark_array;
289 HYPRE_Int *A_marker;
290 HYPRE_BigInt *temp = NULL;
291
292 HYPRE_Int n_coarse;
293 HYPRE_Int num_cols_offd_Pext = 0;
294
295 HYPRE_Int ic, i, j, k, bnnz, kk;
296 HYPRE_Int i1, i2, i3, ii, ns, ne, size, rest;
297 HYPRE_Int cnt, cnt_offd, cnt_diag;
298 HYPRE_Int jj1, jj2, jj3, jcol;
299 HYPRE_BigInt value;
300
301 HYPRE_Int *jj_count, *jj_cnt_diag, *jj_cnt_offd;
302 HYPRE_Int jj_counter, jj_count_diag, jj_count_offd;
303 HYPRE_Int jj_row_begining, jj_row_begin_diag, jj_row_begin_offd;
304 HYPRE_Int start_indexing = 0; /* start indexing for RAP_data at 0 */
305 HYPRE_Int num_nz_cols_A;
306 HYPRE_Int num_procs;
307 HYPRE_Int num_threads, ind;
308
309 HYPRE_Complex *r_entries;
310 HYPRE_Complex *r_a_products;
311 HYPRE_Complex *r_a_p_products;
312
313 HYPRE_Complex zero = 0.0;
314
315 /*-----------------------------------------------------------------------
316 * Copy ParCSRBlockMatrix RT into CSRBlockMatrix R so that we have
317 * row-wise access to restriction .
318 *-----------------------------------------------------------------------*/
319
320 hypre_MPI_Comm_size(comm,&num_procs);
321 /* num_threads = hypre_NumThreads(); */
322 num_threads = 1;
323
324 bnnz = block_size * block_size;
325 r_a_products = hypre_TAlloc(HYPRE_Complex, bnnz , HYPRE_MEMORY_HOST);
326 r_a_p_products = hypre_TAlloc(HYPRE_Complex, bnnz , HYPRE_MEMORY_HOST);
327
328 if (comm_pkg_RT)
329 {
330 num_recvs_RT = hypre_ParCSRCommPkgNumRecvs(comm_pkg_RT);
331 num_sends_RT = hypre_ParCSRCommPkgNumSends(comm_pkg_RT);
332 send_map_starts_RT =hypre_ParCSRCommPkgSendMapStarts(comm_pkg_RT);
333 send_map_elmts_RT = hypre_ParCSRCommPkgSendMapElmts(comm_pkg_RT);
334 }
335
336 hypre_CSRBlockMatrixTranspose(RT_diag,&R_diag,1);
337 if (num_cols_offd_RT)
338 {
339 hypre_CSRBlockMatrixTranspose(RT_offd,&R_offd,1);
340 R_offd_data = hypre_CSRBlockMatrixData(R_offd);
341 R_offd_i = hypre_CSRBlockMatrixI(R_offd);
342 R_offd_j = hypre_CSRBlockMatrixJ(R_offd);
343 }
344
345 /*-----------------------------------------------------------------------
346 * Access the CSR vectors for R. Also get sizes of fine and
347 * coarse grids.
348 *-----------------------------------------------------------------------*/
349
350 R_diag_data = hypre_CSRBlockMatrixData(R_diag);
351 R_diag_i = hypre_CSRBlockMatrixI(R_diag);
352 R_diag_j = hypre_CSRBlockMatrixJ(R_diag);
353
354 n_coarse = hypre_ParCSRBlockMatrixGlobalNumCols(P);
355 num_nz_cols_A = num_cols_diag_A + num_cols_offd_A;
356
357 /*-----------------------------------------------------------------------
358 * Generate Ps_ext, i.e. portion of P that is stored on neighbor procs
359 * and needed locally for triple matrix product
360 *-----------------------------------------------------------------------*/
361
362 if (num_procs > 1)
363 {
364 Ps_ext = hypre_ParCSRBlockMatrixExtractBExt(P,A,1);
365 Ps_ext_data = hypre_CSRBlockMatrixData(Ps_ext);
366 Ps_ext_i = hypre_CSRBlockMatrixI(Ps_ext);
367 Ps_ext_j = hypre_CSRBlockMatrixBigJ(Ps_ext);
368 }
369
370 P_ext_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1, HYPRE_MEMORY_HOST);
371 P_ext_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1, HYPRE_MEMORY_HOST);
372 P_ext_diag_size = 0;
373 P_ext_offd_size = 0;
374 last_col_diag_P = first_col_diag_P + (HYPRE_BigInt)num_cols_diag_P - 1;
375
376 for (i=0; i < num_cols_offd_A; i++)
377 {
378 for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++)
379 if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P)
380 P_ext_offd_size++;
381 else
382 P_ext_diag_size++;
383 P_ext_diag_i[i+1] = P_ext_diag_size;
384 P_ext_offd_i[i+1] = P_ext_offd_size;
385 }
386
387 if (P_ext_diag_size)
388 {
389 P_ext_diag_j = hypre_CTAlloc(HYPRE_Int, P_ext_diag_size, HYPRE_MEMORY_HOST);
390 P_ext_diag_data = hypre_CTAlloc(HYPRE_Complex, P_ext_diag_size*bnnz, HYPRE_MEMORY_HOST);
391 }
392 if (P_ext_offd_size)
393 {
394 P_ext_offd_j = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size, HYPRE_MEMORY_HOST);
395 P_ext_offd_data = hypre_CTAlloc(HYPRE_Complex, P_ext_offd_size*bnnz, HYPRE_MEMORY_HOST);
396 }
397
398 cnt_offd = 0;
399 cnt_diag = 0;
400 cnt = 0;
401 for (i=0; i < num_cols_offd_A; i++)
402 {
403 for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++)
404 if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P)
405 {
406 Ps_ext_j[cnt_offd] = Ps_ext_j[j];
407 for (kk = 0; kk < bnnz; kk++)
408 P_ext_offd_data[cnt_offd*bnnz+kk] = Ps_ext_data[j*bnnz+kk];
409 cnt_offd++;
410 }
411 else
412 {
413 P_ext_diag_j[cnt_diag] = (HYPRE_Int)(Ps_ext_j[j] - first_col_diag_P);
414 for (kk = 0; kk < bnnz; kk++)
415 P_ext_diag_data[cnt_diag*bnnz+kk] = Ps_ext_data[j*bnnz+kk];
416 cnt_diag++;
417 }
418 }
419 if (P_ext_offd_size || num_cols_offd_P)
420 {
421 temp = hypre_CTAlloc(HYPRE_BigInt, P_ext_offd_size+num_cols_offd_P, HYPRE_MEMORY_HOST);
422 for (i=0; i < P_ext_offd_size; i++)
423 temp[i] = Ps_ext_j[i];
424 cnt = P_ext_offd_size;
425 for (i=0; i < num_cols_offd_P; i++)
426 temp[cnt++] = col_map_offd_P[i];
427 }
428 if (cnt)
429 {
430 hypre_BigQsort0(temp, 0, cnt-1);
431
432 num_cols_offd_Pext = 1;
433 value = temp[0];
434 for (i=1; i < cnt; i++)
435 {
436 if (temp[i] > value)
437 {
438 value = temp[i];
439 temp[num_cols_offd_Pext++] = value;
440 }
441 }
442 }
443
444 if (num_cols_offd_Pext)
445 col_map_offd_Pext = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_Pext, HYPRE_MEMORY_HOST);
446
447 for (i=0; i < num_cols_offd_Pext; i++)
448 col_map_offd_Pext[i] = temp[i];
449
450 if (P_ext_offd_size || num_cols_offd_P)
451 hypre_TFree(temp, HYPRE_MEMORY_HOST);
452
453 for (i=0 ; i < P_ext_offd_size; i++)
454 P_ext_offd_j[i] = hypre_BigBinarySearch(col_map_offd_Pext,
455 Ps_ext_j[i],
456 num_cols_offd_Pext);
457 if (num_cols_offd_P)
458 {
459 map_P_to_Pext = hypre_CTAlloc(HYPRE_Int, num_cols_offd_P, HYPRE_MEMORY_HOST);
460
461 cnt = 0;
462 for (i=0; i < num_cols_offd_Pext; i++)
463 if (col_map_offd_Pext[i] == col_map_offd_P[cnt])
464 {
465 map_P_to_Pext[cnt++] = i;
466 if (cnt == num_cols_offd_P) break;
467 }
468 }
469
470 if (num_procs > 1)
471 {
472 hypre_CSRBlockMatrixDestroy(Ps_ext);
473 Ps_ext = NULL;
474 }
475
476 /*-----------------------------------------------------------------------
477 * First Pass: Determine size of RAP_int and set up RAP_int_i if there
478 * are more than one processor and nonzero elements in R_offd
479 *-----------------------------------------------------------------------*/
480
481 P_mark_array = hypre_CTAlloc(HYPRE_Int *, num_threads, HYPRE_MEMORY_HOST);
482 A_mark_array = hypre_CTAlloc(HYPRE_Int *, num_threads, HYPRE_MEMORY_HOST);
483
484 if (num_cols_offd_RT)
485 {
486 jj_count = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
487
488 for (ii = 0; ii < num_threads; ii++)
489 {
490 size = num_cols_offd_RT/num_threads;
491 rest = num_cols_offd_RT - size*num_threads;
492 if (ii < rest)
493 {
494 ns = ii*size+ii;
495 ne = (ii+1)*size+ii+1;
496 }
497 else
498 {
499 ns = ii*size+rest;
500 ne = (ii+1)*size+rest;
501 }
502
503 /*--------------------------------------------------------------------
504 * Allocate marker arrays.
505 *--------------------------------------------------------------------*/
506
507 if (num_cols_offd_Pext || num_cols_diag_P)
508 {
509 P_mark_array[ii] = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+num_cols_offd_Pext, HYPRE_MEMORY_HOST);
510 P_marker = P_mark_array[ii];
511 }
512 A_mark_array[ii] = hypre_CTAlloc(HYPRE_Int, num_nz_cols_A, HYPRE_MEMORY_HOST);
513 A_marker = A_mark_array[ii];
514
515 /*--------------------------------------------------------------------
516 * Initialize some stuff.
517 *--------------------------------------------------------------------*/
518
519 jj_counter = start_indexing;
520 for (ic = 0; ic < num_cols_diag_P+num_cols_offd_Pext; ic++)
521 {
522 P_marker[ic] = -1;
523 }
524 for (i = 0; i < num_nz_cols_A; i++)
525 {
526 A_marker[i] = -1;
527 }
528
529 /*--------------------------------------------------------------------
530 * Loop over exterior c-points
531 *--------------------------------------------------------------------*/
532
533 for (ic = ns; ic < ne; ic++)
534 {
535
536 jj_row_begining = jj_counter;
537
538 /*-----------------------------------------------------------------
539 * Loop over entries in row ic of R_offd.
540 *-----------------------------------------------------------------*/
541
542 for (jj1 = R_offd_i[ic]; jj1 < R_offd_i[ic+1]; jj1++)
543 {
544 i1 = R_offd_j[jj1];
545
546 /*--------------------------------------------------------------
547 * Loop over entries in row i1 of A_offd.
548 *--------------------------------------------------------------*/
549
550 for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
551 {
552 i2 = A_offd_j[jj2];
553
554 /*-----------------------------------------------------------
555 * Check A_marker to see if point i2 has been previously
556 * visited. New entries in RAP only occur from unmarked points.
557 *-----------------------------------------------------------*/
558
559 if (A_marker[i2] != ic)
560 {
561
562 /*--------------------------------------------------------
563 * Mark i2 as visited.
564 *--------------------------------------------------------*/
565
566 A_marker[i2] = ic;
567
568 /*--------------------------------------------------------
569 * Loop over entries in row i2 of P_ext.
570 *--------------------------------------------------------*/
571
572 for (jj3=P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++)
573 {
574 i3 = P_ext_diag_j[jj3];
575
576 /*-----------------------------------------------------
577 * Check P_marker to see that RAP_{ic,i3} has not
578 * been accounted for. If it has not, mark it and
579 * increment counter.
580 *-----------------------------------------------------*/
581
582 if (P_marker[i3] < jj_row_begining)
583 {
584 P_marker[i3] = jj_counter;
585 jj_counter++;
586 }
587 }
588 for (jj3=P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++)
589 {
590 i3 = P_ext_offd_j[jj3] + num_cols_diag_P;
591
592 /*-----------------------------------------------------
593 * Check P_marker to see that RAP_{ic,i3} has not
594 * been accounted for. If it has not, mark it and
595 * increment counter.
596 *-----------------------------------------------------*/
597
598 if (P_marker[i3] < jj_row_begining)
599 {
600 P_marker[i3] = jj_counter;
601 jj_counter++;
602 }
603 }
604 }
605 }
606 /*--------------------------------------------------------------
607 * Loop over entries in row i1 of A_diag.
608 *--------------------------------------------------------------*/
609
610 for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
611 {
612 i2 = A_diag_j[jj2];
613
614 /*-----------------------------------------------------------
615 * Check A_marker to see if point i2 has been previously
616 * visited. New entries in RAP only occur from unmarked
617 * points.
618 *-----------------------------------------------------------*/
619
620 if (A_marker[i2+num_cols_offd_A] != ic)
621 {
622
623 /*--------------------------------------------------------
624 * Mark i2 as visited.
625 *--------------------------------------------------------*/
626
627 A_marker[i2+num_cols_offd_A] = ic;
628
629 /*--------------------------------------------------------
630 * Loop over entries in row i2 of P_diag.
631 *--------------------------------------------------------*/
632
633 for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++)
634 {
635 i3 = P_diag_j[jj3];
636
637 /*-----------------------------------------------------
638 * Check P_marker to see that RAP_{ic,i3} has not
639 * been accounted for. If it has not, mark it and
640 * increment counter.
641 *-----------------------------------------------------*/
642
643 if (P_marker[i3] < jj_row_begining)
644 {
645 P_marker[i3] = jj_counter;
646 jj_counter++;
647 }
648 }
649
650 /*--------------------------------------------------------
651 * Loop over entries in row i2 of P_offd.
652 *--------------------------------------------------------*/
653
654 for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++)
655 {
656 i3 = map_P_to_Pext[P_offd_j[jj3]] + num_cols_diag_P;
657
658 /*-----------------------------------------------------
659 * Check P_marker to see that RAP_{ic,i3} has not
660 * been accounted for. If it has not, mark it and
661 * increment counter.
662 *-----------------------------------------------------*/
663
664 if (P_marker[i3] < jj_row_begining)
665 {
666 P_marker[i3] = jj_counter;
667 jj_counter++;
668 }
669 }
670 }
671 }
672 }
673 }
674 jj_count[ii] = jj_counter;
675 }
676
677 /*-----------------------------------------------------------------------
678 * Allocate RAP_int_data and RAP_int_j arrays.
679 *-----------------------------------------------------------------------*/
680
681 for (i = 0; i < num_threads-1; i++) jj_count[i+1] += jj_count[i];
682
683 RAP_size = jj_count[num_threads-1];
684 RAP_int_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_RT+1, HYPRE_MEMORY_HOST);
685 RAP_int_data = hypre_CTAlloc(HYPRE_Complex, RAP_size*bnnz, HYPRE_MEMORY_HOST);
686 RAP_int_j = hypre_CTAlloc(HYPRE_BigInt, RAP_size, HYPRE_MEMORY_HOST);
687 RAP_int_i[num_cols_offd_RT] = RAP_size;
688
689 /*-----------------------------------------------------------------------
690 * Second Pass: Fill in RAP_int_data and RAP_int_j.
691 *-----------------------------------------------------------------------*/
692
693 for (ii = 0; ii < num_threads; ii++)
694 {
695 size = num_cols_offd_RT/num_threads;
696 rest = num_cols_offd_RT - size*num_threads;
697 if (ii < rest)
698 {
699 ns = ii*size+ii;
700 ne = (ii+1)*size+ii+1;
701 }
702 else
703 {
704 ns = ii*size+rest;
705 ne = (ii+1)*size+rest;
706 }
707
708 /*--------------------------------------------------------------------
709 * Initialize some stuff.
710 *--------------------------------------------------------------------*/
711
712 if (num_cols_offd_Pext || num_cols_diag_P)
713 P_marker = P_mark_array[ii];
714 A_marker = A_mark_array[ii];
715
716 jj_counter = start_indexing;
717 if (ii > 0) jj_counter = jj_count[ii-1];
718
719 for (ic = 0; ic < num_cols_diag_P+num_cols_offd_Pext; ic++)
720 {
721 P_marker[ic] = -1;
722 }
723 for (i = 0; i < num_nz_cols_A; i++)
724 {
725 A_marker[i] = -1;
726 }
727
728 /*--------------------------------------------------------------------
729 * Loop over exterior c-points.
730 *--------------------------------------------------------------------*/
731
732 for (ic = ns; ic < ne; ic++)
733 {
734 jj_row_begining = jj_counter;
735 RAP_int_i[ic] = jj_counter;
736
737 /*-----------------------------------------------------------------
738 * Loop over entries in row ic of R_offd.
739 *-----------------------------------------------------------------*/
740
741 for (jj1 = R_offd_i[ic]; jj1 < R_offd_i[ic+1]; jj1++)
742 {
743 i1 = R_offd_j[jj1];
744 r_entries = &(R_offd_data[jj1*bnnz]);
745
746 /*--------------------------------------------------------------
747 * Loop over entries in row i1 of A_offd.
748 *--------------------------------------------------------------*/
749
750 for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
751 {
752 i2 = A_offd_j[jj2];
753 hypre_CSRBlockMatrixBlockMultAdd(r_entries,
754 &(A_offd_data[jj2*bnnz]), zero,
755 r_a_products, block_size);
756
757 /*-----------------------------------------------------------
758 * Check A_marker to see if point i2 has been previously
759 * visited.New entries in RAP only occur from unmarked points.
760 *-----------------------------------------------------------*/
761
762 if (A_marker[i2] != ic)
763 {
764 /*--------------------------------------------------------
765 * Mark i2 as visited.
766 *--------------------------------------------------------*/
767
768 A_marker[i2] = ic;
769
770 /*--------------------------------------------------------
771 * Loop over entries in row i2 of P_ext.
772 *--------------------------------------------------------*/
773
774 for (jj3=P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++)
775 {
776 i3 = P_ext_diag_j[jj3];
777 hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
778 &(P_ext_diag_data[jj3*bnnz]), zero,
779 r_a_p_products, block_size);
780
781 /*-----------------------------------------------------
782 * Check P_marker to see that RAP_{ic,i3} has not
783 * been accounted for. If it has not, create a new
784 * entry. If it has, add new contribution.
785 *-----------------------------------------------------*/
786
787 if (P_marker[i3] < jj_row_begining)
788 {
789 P_marker[i3] = jj_counter;
790 for (kk = 0; kk < bnnz; kk++)
791 RAP_int_data[jj_counter*bnnz+kk] =
792 r_a_p_products[kk];
793 RAP_int_j[jj_counter] = i3 + first_col_diag_P;
794 jj_counter++;
795 }
796 else
797 {
798 for (kk = 0; kk < bnnz; kk++)
799 RAP_int_data[P_marker[i3]*bnnz+kk] +=
800 r_a_p_products[kk];
801 }
802 }
803 for (jj3=P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++)
804 {
805 i3 = P_ext_offd_j[jj3] + num_cols_diag_P;
806 hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
807 &(P_ext_offd_data[jj3*bnnz]), zero,
808 r_a_p_products, block_size);
809
810 /*--------------------------------------------------
811 * Check P_marker to see that RAP_{ic,i3} has not
812 * been accounted for. If it has not, create a new
813 * entry. If it has, add new contribution.
814 *--------------------------------------------------*/
815
816 if (P_marker[i3] < jj_row_begining)
817 {
818 P_marker[i3] = jj_counter;
819 for (kk = 0; kk < bnnz; kk++)
820 RAP_int_data[jj_counter*bnnz+kk] =
821 r_a_p_products[kk];
822 RAP_int_j[jj_counter]
823 = col_map_offd_Pext[i3-num_cols_diag_P];
824 jj_counter++;
825 }
826 else
827 {
828 for (kk = 0; kk < bnnz; kk++)
829 RAP_int_data[P_marker[i3]*bnnz+kk] +=
830 r_a_p_products[kk];
831 }
832 }
833 }
834
835 /*-----------------------------------------------------------
836 * If i2 is previously visited ( A_marker[12]=ic ) it yields
837 * no new entries in RAP and can just add new contributions.
838 *-----------------------------------------------------------*/
839
840 else
841 {
842 for (jj3=P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++)
843 {
844 i3 = P_ext_diag_j[jj3];
845 hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
846 &(P_ext_diag_data[jj3*bnnz]), zero,
847 r_a_p_products, block_size);
848 for (kk = 0; kk < bnnz; kk++)
849 RAP_int_data[P_marker[i3]*bnnz+kk] +=
850 r_a_p_products[kk];
851 }
852 for (jj3=P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++)
853 {
854 i3 = P_ext_offd_j[jj3] + num_cols_diag_P;
855 hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
856 &(P_ext_offd_data[jj3*bnnz]), zero,
857 r_a_p_products, block_size);
858 ind = P_marker[i3] * bnnz;
859 for (kk = 0; kk < bnnz; kk++)
860 RAP_int_data[ind++] += r_a_p_products[kk];
861 }
862 }
863 }
864
865 /*--------------------------------------------------------------
866 * Loop over entries in row i1 of A_diag.
867 *--------------------------------------------------------------*/
868
869 for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
870 {
871 i2 = A_diag_j[jj2];
872 hypre_CSRBlockMatrixBlockMultAdd(r_entries,
873 &(A_diag_data[jj2*bnnz]), zero, r_a_products,
874 block_size);
875
876 /*-----------------------------------------------------------
877 * Check A_marker to see if point i2 has been previously
878 * visited. New entries in RAP only occur from unmarked points.
879 *-----------------------------------------------------------*/
880
881 if (A_marker[i2+num_cols_offd_A] != ic)
882 {
883
884 /*--------------------------------------------------------
885 * Mark i2 as visited.
886 *--------------------------------------------------------*/
887
888 A_marker[i2+num_cols_offd_A] = ic;
889
890 /*--------------------------------------------------------
891 * Loop over entries in row i2 of P_diag.
892 *--------------------------------------------------------*/
893
894 for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++)
895 {
896 i3 = P_diag_j[jj3];
897 hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
898 &(P_diag_data[jj3*bnnz]), zero,
899 r_a_p_products, block_size);
900
901 /*-----------------------------------------------------
902 * Check P_marker to see that RAP_{ic,i3} has not
903 * been accounted for. If it has not, create a new
904 * entry. If it has, add new contribution.
905 *-----------------------------------------------------*/
906
907 if (P_marker[i3] < jj_row_begining)
908 {
909 P_marker[i3] = jj_counter;
910 ind = jj_counter * bnnz;
911 for (kk = 0; kk < bnnz; kk++)
912 RAP_int_data[ind++] = r_a_p_products[kk];
913 RAP_int_j[jj_counter] = (HYPRE_BigInt)i3 + first_col_diag_P;
914 jj_counter++;
915 }
916 else
917 {
918 ind = P_marker[i3] * bnnz;
919 for (kk = 0; kk < bnnz; kk++)
920 RAP_int_data[ind++] += r_a_p_products[kk];
921 }
922 }
923 for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++)
924 {
925 i3 = map_P_to_Pext[P_offd_j[jj3]] + num_cols_diag_P;
926 hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
927 &(P_offd_data[jj3*bnnz]), zero,
928 r_a_p_products, block_size);
929
930 /*-----------------------------------------------------
931 * Check P_marker to see that RAP_{ic,i3} has not
932 * been accounted for. If it has not, create a new
933 * entry. If it has, add new contribution.
934 *-----------------------------------------------------*/
935
936 if (P_marker[i3] < jj_row_begining)
937 {
938 P_marker[i3] = jj_counter;
939 ind = jj_counter * bnnz;
940 for (kk = 0; kk < bnnz; kk++)
941 RAP_int_data[ind++] = r_a_p_products[kk];
942 RAP_int_j[jj_counter] =
943 col_map_offd_Pext[i3-num_cols_diag_P];
944 jj_counter++;
945 }
946 else
947 {
948 ind = P_marker[i3] * bnnz;
949 for (kk = 0; kk < bnnz; kk++)
950 RAP_int_data[ind++] += r_a_p_products[kk];
951 }
952 }
953 }
954
955 /*-----------------------------------------------------------
956 * If i2 is previously visited ( A_marker[12]=ic ) it yields
957 * no new entries in RAP and can just add new contributions.
958 *-----------------------------------------------------------*/
959
960 else
961 {
962 for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++)
963 {
964 i3 = P_diag_j[jj3];
965 hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
966 &(P_diag_data[jj3*bnnz]), zero,
967 r_a_p_products, block_size);
968 ind = P_marker[i3] * bnnz;
969 for (kk = 0; kk < bnnz; kk++)
970 RAP_int_data[ind++] += r_a_p_products[kk];
971 }
972 for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++)
973 {
974 i3 = map_P_to_Pext[P_offd_j[jj3]] + num_cols_diag_P;
975 hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
976 &(P_offd_data[jj3*bnnz]), zero,
977 r_a_p_products, block_size);
978 ind = P_marker[i3] * bnnz;
979 for (kk = 0; kk < bnnz; kk++)
980 RAP_int_data[ind++] += r_a_p_products[kk];
981 }
982 }
983 }
984 }
985 }
986 if (num_cols_offd_Pext || num_cols_diag_P)
987 hypre_TFree(P_mark_array[ii], HYPRE_MEMORY_HOST);
988 hypre_TFree(A_mark_array[ii], HYPRE_MEMORY_HOST);
989 }
990
991 RAP_int = hypre_CSRBlockMatrixCreate(block_size,num_cols_offd_RT,
992 num_rows_offd_RT,RAP_size);
993 hypre_CSRBlockMatrixI(RAP_int) = RAP_int_i;
994 hypre_CSRBlockMatrixBigJ(RAP_int) = RAP_int_j;
995 hypre_CSRBlockMatrixData(RAP_int) = RAP_int_data;
996 hypre_TFree(jj_count, HYPRE_MEMORY_HOST);
997 }
998
999 RAP_ext_size = 0;
1000 if (num_sends_RT || num_recvs_RT)
1001 {
1002 RAP_ext = hypre_ExchangeRAPBlockData(RAP_int,comm_pkg_RT, block_size);
1003 RAP_ext_i = hypre_CSRBlockMatrixI(RAP_ext);
1004 RAP_ext_j = hypre_CSRBlockMatrixBigJ(RAP_ext);
1005 RAP_ext_data = hypre_CSRBlockMatrixData(RAP_ext);
1006 RAP_ext_size = RAP_ext_i[hypre_CSRBlockMatrixNumRows(RAP_ext)];
1007 }
1008 if (num_cols_offd_RT)
1009 {
1010 hypre_CSRBlockMatrixDestroy(RAP_int);
1011 RAP_int = NULL;
1012 }
1013
1014 RAP_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+1, HYPRE_MEMORY_HOST);
1015 RAP_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+1, HYPRE_MEMORY_HOST);
1016
1017 first_col_diag_RAP = first_col_diag_P;
1018 last_col_diag_RAP = first_col_diag_P + (HYPRE_BigInt)num_cols_diag_P - 1;
1019
1020 /*-----------------------------------------------------------------------
1021 * check for new nonzero columns in RAP_offd generated through RAP_ext
1022 *-----------------------------------------------------------------------*/
1023
1024 if (RAP_ext_size || num_cols_offd_Pext)
1025 {
1026 temp = hypre_CTAlloc(HYPRE_BigInt, RAP_ext_size+num_cols_offd_Pext, HYPRE_MEMORY_HOST);
1027 cnt = 0;
1028 for (i=0; i < RAP_ext_size; i++)
1029 if (RAP_ext_j[i] < first_col_diag_RAP
1030 || RAP_ext_j[i] > last_col_diag_RAP)
1031 temp[cnt++] = RAP_ext_j[i];
1032 for (i=0; i < num_cols_offd_Pext; i++)
1033 temp[cnt++] = col_map_offd_Pext[i];
1034
1035 if (cnt)
1036 {
1037 hypre_BigQsort0(temp,0,cnt-1);
1038 value = temp[0];
1039 num_cols_offd_RAP = 1;
1040 for (i=1; i < cnt; i++)
1041 {
1042 if (temp[i] > value)
1043 {
1044 value = temp[i];
1045 temp[num_cols_offd_RAP++] = value;
1046 }
1047 }
1048 }
1049
1050 /* now evaluate col_map_offd_RAP */
1051 if (num_cols_offd_RAP)
1052 col_map_offd_RAP = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_RAP, HYPRE_MEMORY_HOST);
1053
1054 for (i=0 ; i < num_cols_offd_RAP; i++)
1055 col_map_offd_RAP[i] = temp[i];
1056
1057 hypre_TFree(temp, HYPRE_MEMORY_HOST);
1058 }
1059
1060 if (num_cols_offd_P)
1061 {
1062 map_P_to_RAP = hypre_CTAlloc(HYPRE_Int, num_cols_offd_P, HYPRE_MEMORY_HOST);
1063
1064 cnt = 0;
1065 for (i=0; i < num_cols_offd_RAP; i++)
1066 if (col_map_offd_RAP[i] == col_map_offd_P[cnt])
1067 {
1068 map_P_to_RAP[cnt++] = i;
1069 if (cnt == num_cols_offd_P) break;
1070 }
1071 }
1072
1073 if (num_cols_offd_Pext)
1074 {
1075 map_Pext_to_RAP = hypre_CTAlloc(HYPRE_Int, num_cols_offd_Pext, HYPRE_MEMORY_HOST);
1076
1077 cnt = 0;
1078 for (i=0; i < num_cols_offd_RAP; i++)
1079 if (col_map_offd_RAP[i] == col_map_offd_Pext[cnt])
1080 {
1081 map_Pext_to_RAP[cnt++] = i;
1082 if (cnt == num_cols_offd_Pext) break;
1083 }
1084 }
1085
1086 /*-----------------------------------------------------------------------
1087 * Convert RAP_ext column indices
1088 *-----------------------------------------------------------------------*/
1089
1090 for (i=0; i < RAP_ext_size; i++)
1091 if (RAP_ext_j[i] < first_col_diag_RAP
1092 || RAP_ext_j[i] > last_col_diag_RAP)
1093 RAP_ext_j[i] = (HYPRE_BigInt)(num_cols_diag_P)
1094 + hypre_BigBinarySearch(col_map_offd_RAP,
1095 RAP_ext_j[i],num_cols_offd_RAP);
1096 else
1097 RAP_ext_j[i] -= first_col_diag_RAP;
1098
1099 /*-----------------------------------------------------------------------
1100 * Initialize some stuff.
1101 *-----------------------------------------------------------------------*/
1102
1103 jj_cnt_diag = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
1104 jj_cnt_offd = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
1105
1106 for (ii = 0; ii < num_threads; ii++)
1107 {
1108 size = num_cols_diag_P/num_threads;
1109 rest = num_cols_diag_P - size*num_threads;
1110 if (ii < rest)
1111 {
1112 ns = ii*size+ii;
1113 ne = (ii+1)*size+ii+1;
1114 }
1115 else
1116 {
1117 ns = ii*size+rest;
1118 ne = (ii+1)*size+rest;
1119 }
1120
1121 P_mark_array[ii] = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+num_cols_offd_RAP, HYPRE_MEMORY_HOST);
1122 A_mark_array[ii] = hypre_CTAlloc(HYPRE_Int, num_nz_cols_A, HYPRE_MEMORY_HOST);
1123 P_marker = P_mark_array[ii];
1124 A_marker = A_mark_array[ii];
1125 jj_count_diag = start_indexing;
1126 jj_count_offd = start_indexing;
1127
1128 for (ic = 0; ic < num_cols_diag_P+num_cols_offd_RAP; ic++)
1129 {
1130 P_marker[ic] = -1;
1131 }
1132 for (i = 0; i < num_nz_cols_A; i++)
1133 {
1134 A_marker[i] = -1;
1135 }
1136
1137 /*-----------------------------------------------------------------------
1138 * Loop over interior c-points.
1139 *-----------------------------------------------------------------------*/
1140
1141 for (ic = ns; ic < ne; ic++)
1142 {
1143
1144 /*--------------------------------------------------------------------
1145 * Set marker for diagonal entry, RAP_{ic,ic}. and for all points
1146 * being added to row ic of RAP_diag and RAP_offd through RAP_ext
1147 *--------------------------------------------------------------------*/
1148
1149 P_marker[ic] = jj_count_diag;
1150 jj_row_begin_diag = jj_count_diag;
1151 jj_row_begin_offd = jj_count_offd;
1152 jj_count_diag++;
1153
1154 for (i=0; i < num_sends_RT; i++)
1155 for (j = send_map_starts_RT[i]; j < send_map_starts_RT[i+1]; j++)
1156 if (send_map_elmts_RT[j] == ic)
1157 {
1158 for (k=RAP_ext_i[j]; k < RAP_ext_i[j+1]; k++)
1159 {
1160 jcol = (HYPRE_Int)RAP_ext_j[k];
1161 if (jcol < num_cols_diag_P)
1162 {
1163 if (P_marker[jcol] < jj_row_begin_diag)
1164 {
1165 P_marker[jcol] = jj_count_diag;
1166 jj_count_diag++;
1167 }
1168 }
1169 else
1170 {
1171 if (P_marker[jcol] < jj_row_begin_offd)
1172 {
1173 P_marker[jcol] = jj_count_offd;
1174 jj_count_offd++;
1175 }
1176 }
1177 }
1178 break;
1179 }
1180
1181 /*-----------------------------------------------------------------
1182 * Loop over entries in row ic of R_diag.
1183 *-----------------------------------------------------------------*/
1184
1185 for (jj1 = R_diag_i[ic]; jj1 < R_diag_i[ic+1]; jj1++)
1186 {
1187 i1 = R_diag_j[jj1];
1188
1189 /*-----------------------------------------------------------------
1190 * Loop over entries in row i1 of A_offd.
1191 *-----------------------------------------------------------------*/
1192
1193 if (num_cols_offd_A)
1194 {
1195 for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
1196 {
1197 i2 = A_offd_j[jj2];
1198
1199 /*-----------------------------------------------------------
1200 * Check A_marker to see if point i2 has been previously
1201 * visited.New entries in RAP only occur from unmarked points.
1202 *-----------------------------------------------------------*/
1203
1204 if (A_marker[i2] != ic)
1205 {
1206 /*--------------------------------------------------------
1207 * Mark i2 as visited.
1208 *--------------------------------------------------------*/
1209
1210 A_marker[i2] = ic;
1211
1212 /*--------------------------------------------------------
1213 * Loop over entries in row i2 of P_ext.
1214 *--------------------------------------------------------*/
1215
1216 for (jj3=P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++)
1217 {
1218 i3 = P_ext_diag_j[jj3];
1219
1220 /*-----------------------------------------------------
1221 * Check P_marker to see that RAP_{ic,i3} has not
1222 * been accounted for. If it has not, mark it and
1223 * increment counter.
1224 *-----------------------------------------------------*/
1225
1226 if (P_marker[i3] < jj_row_begin_diag)
1227 {
1228 P_marker[i3] = jj_count_diag;
1229 jj_count_diag++;
1230 }
1231 }
1232 for (jj3=P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++)
1233 {
1234 i3 = map_Pext_to_RAP[P_ext_offd_j[jj3]]+num_cols_diag_P;
1235
1236 /*-----------------------------------------------------
1237 * Check P_marker to see that RAP_{ic,i3} has not
1238 * been accounted for. If it has not, mark it and
1239 * increment counter.
1240 *-----------------------------------------------------*/
1241
1242 if (P_marker[i3] < jj_row_begin_offd)
1243 {
1244 P_marker[i3] = jj_count_offd;
1245 jj_count_offd++;
1246 }
1247 }
1248 }
1249 }
1250 }
1251
1252 /*-----------------------------------------------------------------
1253 * Loop over entries in row i1 of A_diag.
1254 *-----------------------------------------------------------------*/
1255
1256 for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
1257 {
1258 i2 = A_diag_j[jj2];
1259
1260 /*--------------------------------------------------------------
1261 * Check A_marker to see if point i2 has been previously
1262 * visited. New entries in RAP only occur from unmarked points.
1263 *--------------------------------------------------------------*/
1264
1265 if (A_marker[i2+num_cols_offd_A] != ic)
1266 {
1267
1268 /*-----------------------------------------------------------
1269 * Mark i2 as visited.
1270 *-----------------------------------------------------------*/
1271
1272 A_marker[i2+num_cols_offd_A] = ic;
1273
1274 /*-----------------------------------------------------------
1275 * Loop over entries in row i2 of P_diag.
1276 *-----------------------------------------------------------*/
1277
1278 for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++)
1279 {
1280 i3 = P_diag_j[jj3];
1281
1282 /*--------------------------------------------------------
1283 * Check P_marker to see that RAP_{ic,i3} has not already
1284 * been accounted for. If it has not, mark it and increment
1285 * counter.
1286 *--------------------------------------------------------*/
1287
1288 if (P_marker[i3] < jj_row_begin_diag)
1289 {
1290 P_marker[i3] = jj_count_diag;
1291 jj_count_diag++;
1292 }
1293 }
1294
1295 /*-----------------------------------------------------------
1296 * Loop over entries in row i2 of P_offd.
1297 *-----------------------------------------------------------*/
1298
1299 if (num_cols_offd_P)
1300 {
1301 for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++)
1302 {
1303 i3 = map_P_to_RAP[P_offd_j[jj3]] + num_cols_diag_P;
1304
1305 /*-----------------------------------------------------
1306 * Check P_marker to see that RAP_{ic,i3} has not
1307 * been accounted for. If it has not, mark it and
1308 * increment counter.
1309 *-----------------------------------------------------*/
1310
1311 if (P_marker[i3] < jj_row_begin_offd)
1312 {
1313 P_marker[i3] = jj_count_offd;
1314 jj_count_offd++;
1315 }
1316 }
1317 }
1318 }
1319 }
1320 }
1321
1322 /*--------------------------------------------------------------------
1323 * Set RAP_diag_i and RAP_offd_i for this row.
1324 *--------------------------------------------------------------------*/
1325 }
1326 jj_cnt_diag[ii] = jj_count_diag;
1327 jj_cnt_offd[ii] = jj_count_offd;
1328 }
1329
1330 for (i=0; i < num_threads-1; i++)
1331 {
1332 jj_cnt_diag[i+1] += jj_cnt_diag[i];
1333 jj_cnt_offd[i+1] += jj_cnt_offd[i];
1334 }
1335
1336 jj_count_diag = jj_cnt_diag[num_threads-1];
1337 jj_count_offd = jj_cnt_offd[num_threads-1];
1338
1339 RAP_diag_i[num_cols_diag_P] = jj_count_diag;
1340 RAP_offd_i[num_cols_diag_P] = jj_count_offd;
1341
1342 /*-----------------------------------------------------------------------
1343 * Allocate RAP_diag_data and RAP_diag_j arrays.
1344 * Allocate RAP_offd_data and RAP_offd_j arrays.
1345 *-----------------------------------------------------------------------*/
1346
1347 RAP_diag_size = jj_count_diag;
1348 if (RAP_diag_size)
1349 {
1350 RAP_diag_data = hypre_CTAlloc(HYPRE_Complex, RAP_diag_size*bnnz, HYPRE_MEMORY_HOST);
1351 RAP_diag_j = hypre_CTAlloc(HYPRE_Int, RAP_diag_size, HYPRE_MEMORY_HOST);
1352 }
1353
1354 RAP_offd_size = jj_count_offd;
1355 if (RAP_offd_size)
1356 {
1357 RAP_offd_data = hypre_CTAlloc(HYPRE_Complex, RAP_offd_size*bnnz, HYPRE_MEMORY_HOST);
1358 RAP_offd_j = hypre_CTAlloc(HYPRE_Int, RAP_offd_size, HYPRE_MEMORY_HOST);
1359 }
1360
1361 if (RAP_offd_size == 0 && num_cols_offd_RAP != 0)
1362 {
1363 num_cols_offd_RAP = 0;
1364 hypre_TFree(col_map_offd_RAP, HYPRE_MEMORY_HOST);
1365 }
1366
1367 /*-----------------------------------------------------------------------
1368 * Second Pass: Fill in RAP_diag_data and RAP_diag_j.
1369 * Second Pass: Fill in RAP_offd_data and RAP_offd_j.
1370 *-----------------------------------------------------------------------*/
1371
1372 for (ii = 0; ii < num_threads; ii++)
1373 {
1374 size = num_cols_diag_P/num_threads;
1375 rest = num_cols_diag_P - size*num_threads;
1376 if (ii < rest)
1377 {
1378 ns = ii*size+ii;
1379 ne = (ii+1)*size+ii+1;
1380 }
1381 else
1382 {
1383 ns = ii*size+rest;
1384 ne = (ii+1)*size+rest;
1385 }
1386
1387 /*-----------------------------------------------------------------------
1388 * Initialize some stuff.
1389 *-----------------------------------------------------------------------*/
1390
1391 P_marker = P_mark_array[ii];
1392 A_marker = A_mark_array[ii];
1393 for (ic = 0; ic < num_cols_diag_P+num_cols_offd_RAP; ic++)
1394 {
1395 P_marker[ic] = -1;
1396 }
1397 for (i = 0; i < num_nz_cols_A ; i++)
1398 {
1399 A_marker[i] = -1;
1400 }
1401
1402 jj_count_diag = start_indexing;
1403 jj_count_offd = start_indexing;
1404 if (ii > 0)
1405 {
1406 jj_count_diag = jj_cnt_diag[ii-1];
1407 jj_count_offd = jj_cnt_offd[ii-1];
1408 }
1409
1410 /*-----------------------------------------------------------------------
1411 * Loop over interior c-points.
1412 *-----------------------------------------------------------------------*/
1413
1414 for (ic = ns; ic < ne; ic++)
1415 {
1416
1417 /*--------------------------------------------------------------------
1418 * Create diagonal entry, RAP_{ic,ic} and add entries of RAP_ext
1419 *--------------------------------------------------------------------*/
1420
1421 P_marker[ic] = jj_count_diag;
1422 jj_row_begin_diag = jj_count_diag;
1423 jj_row_begin_offd = jj_count_offd;
1424 RAP_diag_i[ic] = jj_row_begin_diag;
1425 RAP_offd_i[ic] = jj_row_begin_offd;
1426 ind = jj_count_diag * bnnz;
1427 for (kk = 0; kk < bnnz; kk++)
1428 RAP_diag_data[ind++] = zero;
1429 RAP_diag_j[jj_count_diag] = ic;
1430 jj_count_diag++;
1431
1432 for (i=0; i < num_sends_RT; i++)
1433 for (j = send_map_starts_RT[i]; j < send_map_starts_RT[i+1]; j++)
1434 if (send_map_elmts_RT[j] == ic)
1435 {
1436 for (k=RAP_ext_i[j]; k < RAP_ext_i[j+1]; k++)
1437 {
1438 jcol = (HYPRE_Int)RAP_ext_j[k];
1439 if (jcol < num_cols_diag_P)
1440 {
1441 if (P_marker[jcol] < jj_row_begin_diag)
1442 {
1443 P_marker[jcol] = jj_count_diag;
1444 ind = jj_count_diag * bnnz;
1445 for (kk = 0; kk < bnnz; kk++)
1446 RAP_diag_data[ind++] = RAP_ext_data[k*bnnz+kk];
1447 RAP_diag_j[jj_count_diag] = jcol;
1448 jj_count_diag++;
1449 }
1450 else
1451 {
1452 ind = P_marker[jcol] * bnnz;
1453 for (kk = 0; kk < bnnz; kk++)
1454 RAP_diag_data[ind++] += RAP_ext_data[k*bnnz+kk];
1455 }
1456 }
1457 else
1458 {
1459 if (P_marker[jcol] < jj_row_begin_offd)
1460 {
1461 P_marker[jcol] = jj_count_offd;
1462 ind = jj_count_offd * bnnz;
1463 for (kk = 0; kk < bnnz; kk++)
1464 RAP_offd_data[ind++] = RAP_ext_data[k*bnnz+kk];
1465 RAP_offd_j[jj_count_offd]
1466 = jcol-num_cols_diag_P;
1467 jj_count_offd++;
1468 }
1469 else
1470 {
1471 ind = P_marker[jcol] * bnnz;
1472 for (kk = 0; kk < bnnz; kk++)
1473 RAP_offd_data[ind++] += RAP_ext_data[k*bnnz+kk];
1474 }
1475 }
1476 }
1477 break;
1478 }
1479
1480 /*--------------------------------------------------------------------
1481 * Loop over entries in row ic of R_diag.
1482 *--------------------------------------------------------------------*/
1483
1484 for (jj1 = R_diag_i[ic]; jj1 < R_diag_i[ic+1]; jj1++)
1485 {
1486 i1 = R_diag_j[jj1];
1487 r_entries = &(R_diag_data[jj1*bnnz]);
1488
1489 /*-----------------------------------------------------------------
1490 * Loop over entries in row i1 of A_offd.
1491 *-----------------------------------------------------------------*/
1492
1493 if (num_cols_offd_A)
1494 {
1495 for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
1496 {
1497 i2 = A_offd_j[jj2];
1498 hypre_CSRBlockMatrixBlockMultAdd(r_entries,
1499 &(A_offd_data[jj2*bnnz]), zero, r_a_products,
1500 block_size);
1501
1502 /*-----------------------------------------------------------
1503 * Check A_marker to see if point i2 has been previously
1504 * visited.New entries in RAP only occur from unmarked points.
1505 *-----------------------------------------------------------*/
1506
1507 if (A_marker[i2] != ic)
1508 {
1509 /*--------------------------------------------------------
1510 * Mark i2 as visited.
1511 *--------------------------------------------------------*/
1512
1513 A_marker[i2] = ic;
1514
1515 /*--------------------------------------------------------
1516 * Loop over entries in row i2 of P_ext.
1517 *--------------------------------------------------------*/
1518
1519 for (jj3=P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++)
1520 {
1521 i3 = P_ext_diag_j[jj3];
1522 hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
1523 &(P_ext_diag_data[jj3*bnnz]), zero,
1524 r_a_p_products, block_size);
1525
1526 /*-----------------------------------------------------
1527 * Check P_marker to see that RAP_{ic,i3} has not
1528 * been accounted for. If it has not, create a new
1529 * entry. If it has, add new contribution.
1530 *-----------------------------------------------------*/
1531
1532 if (P_marker[i3] < jj_row_begin_diag)
1533 {
1534 P_marker[i3] = jj_count_diag;
1535 ind = jj_count_diag * bnnz;
1536 for (kk = 0; kk < bnnz; kk++)
1537 RAP_diag_data[ind++] = r_a_p_products[kk];
1538 RAP_diag_j[jj_count_diag] = i3;
1539 jj_count_diag++;
1540 }
1541 else
1542 {
1543 ind = P_marker[i3] * bnnz;
1544 for (kk = 0; kk < bnnz; kk++)
1545 RAP_diag_data[ind++] += r_a_p_products[kk];
1546 }
1547 }
1548 for (jj3=P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++)
1549 {
1550 i3 = map_Pext_to_RAP[P_ext_offd_j[jj3]]+num_cols_diag_P;
1551 hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
1552 &(P_ext_offd_data[jj3*bnnz]),
1553 zero, r_a_p_products, block_size);
1554
1555 /*-----------------------------------------------------
1556 * Check P_marker to see that RAP_{ic,i3} has not
1557 * been accounted for. If it has not, create a new
1558 * entry. If it has, add new contribution.
1559 *-----------------------------------------------------*/
1560 if (P_marker[i3] < jj_row_begin_offd)
1561 {
1562 P_marker[i3] = jj_count_offd;
1563 ind = jj_count_offd * bnnz;
1564 for (kk = 0; kk < bnnz; kk++)
1565 RAP_offd_data[ind++] = r_a_p_products[kk];
1566 RAP_offd_j[jj_count_offd] = i3 - num_cols_diag_P;
1567 jj_count_offd++;
1568 }
1569 else
1570 {
1571 ind = P_marker[i3] * bnnz;
1572 for (kk = 0; kk < bnnz; kk++)
1573 RAP_offd_data[ind++] += r_a_p_products[kk];
1574 }
1575 }
1576 }
1577
1578 /*-----------------------------------------------------------
1579 * If i2 is previously visited ( A_marker[12]=ic ) it yields
1580 * no new entries in RAP and can just add new contributions.
1581 *-----------------------------------------------------------*/
1582 else
1583 {
1584 for (jj3=P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++)
1585 {
1586 i3 = P_ext_diag_j[jj3];
1587 hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
1588 &(P_ext_diag_data[jj3*bnnz]), zero,
1589 r_a_p_products, block_size);
1590 ind = P_marker[i3] * bnnz;
1591 for (kk = 0; kk < bnnz; kk++)
1592 RAP_diag_data[ind++] += r_a_p_products[kk];
1593 }
1594 for (jj3=P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++)
1595 {
1596 i3=map_Pext_to_RAP[P_ext_offd_j[jj3]] + num_cols_diag_P;
1597 hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
1598 &(P_ext_offd_data[jj3*bnnz]),
1599 zero, r_a_p_products, block_size);
1600 ind = P_marker[i3] * bnnz;
1601 for (kk = 0; kk < bnnz; kk++)
1602 RAP_offd_data[ind++] += r_a_p_products[kk];
1603 }
1604 }
1605 }
1606 }
1607
1608 /*-----------------------------------------------------------------
1609 * Loop over entries in row i1 of A_diag.
1610 *-----------------------------------------------------------------*/
1611
1612 for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
1613 {
1614 i2 = A_diag_j[jj2];
1615 hypre_CSRBlockMatrixBlockMultAdd(r_entries,
1616 &(A_diag_data[jj2*bnnz]),
1617 zero, r_a_products, block_size);
1618
1619 /*--------------------------------------------------------------
1620 * Check A_marker to see if point i2 has been previously
1621 * visited. New entries in RAP only occur from unmarked points.
1622 *--------------------------------------------------------------*/
1623
1624 if (A_marker[i2+num_cols_offd_A] != ic)
1625 {
1626
1627 /*-----------------------------------------------------------
1628 * Mark i2 as visited.
1629 *-----------------------------------------------------------*/
1630
1631 A_marker[i2+num_cols_offd_A] = ic;
1632
1633 /*-----------------------------------------------------------
1634 * Loop over entries in row i2 of P_diag.
1635 *-----------------------------------------------------------*/
1636
1637 for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++)
1638 {
1639 i3 = P_diag_j[jj3];
1640 hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
1641 &(P_diag_data[jj3*bnnz]),
1642 zero, r_a_p_products, block_size);
1643
1644 /*--------------------------------------------------------
1645 * Check P_marker to see that RAP_{ic,i3} has not already
1646 * been accounted for. If it has not, create a new entry.
1647 * If it has, add new contribution.
1648 *--------------------------------------------------------*/
1649
1650 if (P_marker[i3] < jj_row_begin_diag)
1651 {
1652 P_marker[i3] = jj_count_diag;
1653 ind = jj_count_diag * bnnz;
1654 for (kk = 0; kk < bnnz; kk++)
1655 RAP_diag_data[ind++] = r_a_p_products[kk];
1656 RAP_diag_j[jj_count_diag] = P_diag_j[jj3];
1657 jj_count_diag++;
1658 }
1659 else
1660 {
1661 ind = P_marker[i3] * bnnz;
1662 for (kk = 0; kk < bnnz; kk++)
1663 RAP_diag_data[ind++] += r_a_p_products[kk];
1664 }
1665 }
1666 if (num_cols_offd_P)
1667 {
1668 for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++)
1669 {
1670 i3 = map_P_to_RAP[P_offd_j[jj3]] + num_cols_diag_P;
1671 hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
1672 &(P_offd_data[jj3*bnnz]),
1673 zero, r_a_p_products, block_size);
1674
1675 /*-----------------------------------------------------
1676 * Check P_marker to see that RAP_{ic,i3} has not already
1677 * been accounted for. If it has not, create a new entry.
1678 * If it has, add new contribution.
1679 *-----------------------------------------------------*/
1680
1681 if (P_marker[i3] < jj_row_begin_offd)
1682 {
1683 P_marker[i3] = jj_count_offd;
1684 ind = jj_count_offd * bnnz;
1685 for (kk = 0; kk < bnnz; kk++)
1686 RAP_offd_data[ind++] = r_a_p_products[kk];
1687 RAP_offd_j[jj_count_offd] = i3 - num_cols_diag_P;
1688 jj_count_offd++;
1689 }
1690 else
1691 {
1692 ind = P_marker[i3] * bnnz;
1693 for (kk = 0; kk < bnnz; kk++)
1694 RAP_offd_data[ind++] += r_a_p_products[kk];
1695 }
1696 }
1697 }
1698 }
1699
1700 /*--------------------------------------------------------------
1701 * If i2 is previously visited ( A_marker[12]=ic ) it yields
1702 * no new entries in RAP and can just add new contributions.
1703 *--------------------------------------------------------------*/
1704
1705 else
1706 {
1707 for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++)
1708 {
1709 i3 = P_diag_j[jj3];
1710 hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
1711 &(P_diag_data[jj3*bnnz]),
1712 zero, r_a_p_products, block_size);
1713 ind = P_marker[i3] * bnnz;
1714 for (kk = 0; kk < bnnz; kk++)
1715 RAP_diag_data[ind++] += r_a_p_products[kk];
1716 }
1717 if (num_cols_offd_P)
1718 {
1719 for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++)
1720 {
1721 i3 = map_P_to_RAP[P_offd_j[jj3]] + num_cols_diag_P;
1722 hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
1723 &(P_offd_data[jj3*bnnz]),
1724 zero, r_a_p_products, block_size);
1725 ind = P_marker[i3] * bnnz;
1726 for (kk = 0; kk < bnnz; kk++)
1727 RAP_offd_data[ind++] += r_a_p_products[kk];
1728 }
1729 }
1730 }
1731 }
1732 }
1733 }
1734 hypre_TFree(P_mark_array[ii], HYPRE_MEMORY_HOST);
1735 hypre_TFree(A_mark_array[ii], HYPRE_MEMORY_HOST);
1736 }
1737
1738
1739 row_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
1740 col_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
1741 for (i = 0; i <= 1; i++)
1742 {
1743 row_starts[i] = col_starts[i] = coarse_partitioning[i];
1744 }
1745
1746 RAP = hypre_ParCSRBlockMatrixCreate(comm, block_size, n_coarse, n_coarse,
1747 row_starts, col_starts,
1748 num_cols_offd_RAP, RAP_diag_size, RAP_offd_size);
1749
1750 RAP_diag = hypre_ParCSRBlockMatrixDiag(RAP);
1751 hypre_CSRBlockMatrixI(RAP_diag) = RAP_diag_i;
1752 if (RAP_diag_size)
1753 {
1754 hypre_CSRBlockMatrixData(RAP_diag) = RAP_diag_data;
1755 hypre_CSRBlockMatrixJ(RAP_diag) = RAP_diag_j;
1756 }
1757
1758 RAP_offd = hypre_ParCSRBlockMatrixOffd(RAP);
1759 hypre_CSRBlockMatrixI(RAP_offd) = RAP_offd_i;
1760 if (num_cols_offd_RAP)
1761 {
1762 hypre_CSRBlockMatrixData(RAP_offd) = RAP_offd_data;
1763 hypre_CSRBlockMatrixJ(RAP_offd) = RAP_offd_j;
1764 hypre_ParCSRBlockMatrixColMapOffd(RAP) = col_map_offd_RAP;
1765 }
1766 if (num_procs > 1)
1767 {
1768 hypre_BlockMatvecCommPkgCreate(RAP);
1769 }
1770
1771 *RAP_ptr = RAP;
1772
1773 /*-----------------------------------------------------------------------
1774 * Free R, P_ext and marker arrays.
1775 *-----------------------------------------------------------------------*/
1776
1777 hypre_CSRBlockMatrixDestroy(R_diag);
1778 R_diag = NULL;
1779
1780 if (num_cols_offd_RT)
1781 {
1782 hypre_CSRBlockMatrixDestroy(R_offd);
1783 R_offd = NULL;
1784 }
1785
1786 if (num_sends_RT || num_recvs_RT)
1787 {
1788 hypre_CSRBlockMatrixDestroy(RAP_ext);
1789 RAP_ext = NULL;
1790 }
1791 hypre_TFree(P_mark_array, HYPRE_MEMORY_HOST);
1792 hypre_TFree(A_mark_array, HYPRE_MEMORY_HOST);
1793 hypre_TFree(P_ext_diag_i, HYPRE_MEMORY_HOST);
1794 hypre_TFree(P_ext_offd_i, HYPRE_MEMORY_HOST);
1795 hypre_TFree(jj_cnt_diag, HYPRE_MEMORY_HOST);
1796 hypre_TFree(jj_cnt_offd, HYPRE_MEMORY_HOST);
1797 if (num_cols_offd_P)
1798 {
1799 hypre_TFree(map_P_to_Pext, HYPRE_MEMORY_HOST);
1800 hypre_TFree(map_P_to_RAP, HYPRE_MEMORY_HOST);
1801 }
1802 if (num_cols_offd_Pext)
1803 {
1804 hypre_TFree(col_map_offd_Pext, HYPRE_MEMORY_HOST);
1805 hypre_TFree(map_Pext_to_RAP, HYPRE_MEMORY_HOST);
1806 }
1807 if (P_ext_diag_size)
1808 {
1809 hypre_TFree(P_ext_diag_data, HYPRE_MEMORY_HOST);
1810 hypre_TFree(P_ext_diag_j, HYPRE_MEMORY_HOST);
1811 }
1812 if (P_ext_offd_size)
1813 {
1814 hypre_TFree(P_ext_offd_data, HYPRE_MEMORY_HOST);
1815 hypre_TFree(P_ext_offd_j, HYPRE_MEMORY_HOST);
1816 }
1817
1818 hypre_TFree(r_a_products,HYPRE_MEMORY_HOST);
1819 hypre_TFree(r_a_p_products,HYPRE_MEMORY_HOST);
1820 hypre_TFree(row_starts,HYPRE_MEMORY_HOST);
1821 hypre_TFree(col_starts,HYPRE_MEMORY_HOST);
1822
1823 return hypre_error_flag;
1824 }
1825