1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 #include <HYPRE_config.h>
9 #include "_hypre_utilities.h"
10 #include "par_csr_block_matrix.h"
11 #include "../parcsr_mv/_hypre_parcsr_mv.h"
12 
13 /*--------------------------------------------------------------------------
14  * used in RAP function - block size must be an argument because RAP_int may
15  * by NULL
16  *--------------------------------------------------------------------------*/
17 
18 hypre_CSRBlockMatrix *
hypre_ExchangeRAPBlockData(hypre_CSRBlockMatrix * RAP_int,hypre_ParCSRCommPkg * comm_pkg_RT,HYPRE_Int block_size)19 hypre_ExchangeRAPBlockData(hypre_CSRBlockMatrix *RAP_int,
20                            hypre_ParCSRCommPkg *comm_pkg_RT, HYPRE_Int block_size)
21 {
22    HYPRE_Int     *RAP_int_i;
23    HYPRE_BigInt  *RAP_int_j = NULL;
24    HYPRE_Complex *RAP_int_data = NULL;
25    HYPRE_Int     num_cols = 0;
26 
27    MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg_RT);
28    HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg_RT);
29    HYPRE_Int *recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg_RT);
30    HYPRE_Int *recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_RT);
31    HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg_RT);
32    HYPRE_Int *send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg_RT);
33    HYPRE_Int *send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg_RT);
34 
35 /*   HYPRE_Int block_size = hypre_CSRBlockMatrixBlockSize(RAP_int); */
36 
37    hypre_CSRBlockMatrix *RAP_ext;
38 
39    HYPRE_Int     *RAP_ext_i;
40    HYPRE_BigInt  *RAP_ext_j = NULL;
41    HYPRE_Complex *RAP_ext_data = NULL;
42 
43    hypre_ParCSRCommHandle *comm_handle = NULL;
44    hypre_ParCSRCommPkg *tmp_comm_pkg;
45 
46    HYPRE_Int *jdata_recv_vec_starts;
47    HYPRE_Int *jdata_send_map_starts;
48 
49    HYPRE_Int num_rows;
50    HYPRE_Int num_nonzeros;
51    HYPRE_Int i, j, bnnz;
52    HYPRE_Int num_procs, my_id;
53 
54    hypre_MPI_Comm_size(comm,&num_procs);
55    hypre_MPI_Comm_rank(comm,&my_id);
56 
57    bnnz = block_size * block_size;
58 
59    RAP_ext_i = hypre_CTAlloc(HYPRE_Int,  send_map_starts[num_sends]+1, HYPRE_MEMORY_HOST);
60    jdata_recv_vec_starts = hypre_CTAlloc(HYPRE_Int,  num_recvs+1, HYPRE_MEMORY_HOST);
61    jdata_send_map_starts = hypre_CTAlloc(HYPRE_Int,  num_sends+1, HYPRE_MEMORY_HOST);
62 
63 /*--------------------------------------------------------------------------
64  * recompute RAP_int_i so that RAP_int_i[j+1] contains the number of
65  * elements of row j (to be determined through send_map_elmnts on the
66  * receiving end)
67  *--------------------------------------------------------------------------*/
68 
69    if (num_recvs)
70    {
71       RAP_int_i = hypre_CSRBlockMatrixI(RAP_int);
72       RAP_int_j = hypre_CSRBlockMatrixBigJ(RAP_int);
73       RAP_int_data = hypre_CSRBlockMatrixData(RAP_int);
74       num_cols = hypre_CSRBlockMatrixNumCols(RAP_int);
75    }
76    jdata_recv_vec_starts[0] = 0;
77    for (i=0; i < num_recvs; i++)
78       jdata_recv_vec_starts[i+1] = RAP_int_i[recv_vec_starts[i+1]];
79 
80    for (i=num_recvs; i > 0; i--)
81       for (j = recv_vec_starts[i]; j > recv_vec_starts[i-1]; j--)
82          RAP_int_i[j] -= RAP_int_i[j-1];
83 
84 /*--------------------------------------------------------------------------
85  * initialize communication
86  *--------------------------------------------------------------------------*/
87 
88    if (num_recvs && num_sends)
89       comm_handle = hypre_ParCSRCommHandleCreate(12,comm_pkg_RT,
90                                                  &RAP_int_i[1], &RAP_ext_i[1]);
91    else if (num_recvs)
92       comm_handle = hypre_ParCSRCommHandleCreate(12,comm_pkg_RT,
93                                                  &RAP_int_i[1], NULL);
94    else if (num_sends)
95       comm_handle = hypre_ParCSRCommHandleCreate(12,comm_pkg_RT,
96                                                  NULL, &RAP_ext_i[1]);
97 
98    tmp_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg,  1, HYPRE_MEMORY_HOST);
99    hypre_ParCSRCommPkgComm(tmp_comm_pkg) = comm;
100    hypre_ParCSRCommPkgNumSends(tmp_comm_pkg) = num_recvs;
101    hypre_ParCSRCommPkgNumRecvs(tmp_comm_pkg) = num_sends;
102    hypre_ParCSRCommPkgSendProcs(tmp_comm_pkg) = recv_procs;
103    hypre_ParCSRCommPkgRecvProcs(tmp_comm_pkg) = send_procs;
104 
105    hypre_ParCSRCommHandleDestroy(comm_handle);
106    comm_handle = NULL;
107 
108 /*--------------------------------------------------------------------------
109  * compute num_nonzeros for RAP_ext
110  *--------------------------------------------------------------------------*/
111 
112    for (i=0; i < num_sends; i++)
113       for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
114          RAP_ext_i[j+1] += RAP_ext_i[j];
115 
116    num_rows = send_map_starts[num_sends];
117    num_nonzeros = RAP_ext_i[num_rows];
118    if (num_nonzeros)
119    {
120       RAP_ext_j = hypre_CTAlloc(HYPRE_BigInt,  num_nonzeros, HYPRE_MEMORY_HOST);
121       RAP_ext_data = hypre_CTAlloc(HYPRE_Complex,  num_nonzeros*bnnz, HYPRE_MEMORY_HOST);
122    }
123 
124    for (i=0; i < num_sends+1; i++)
125    {
126       jdata_send_map_starts[i] = RAP_ext_i[send_map_starts[i]];
127    }
128 
129    hypre_ParCSRCommPkgRecvVecStarts(tmp_comm_pkg) = jdata_send_map_starts;
130    hypre_ParCSRCommPkgSendMapStarts(tmp_comm_pkg) = jdata_recv_vec_starts;
131 
132    comm_handle = hypre_ParCSRBlockCommHandleCreate(1, bnnz, tmp_comm_pkg,
133                                                    (void *) RAP_int_data, (void *) RAP_ext_data);
134    hypre_ParCSRBlockCommHandleDestroy(comm_handle);
135    comm_handle = NULL;
136 
137    comm_handle = hypre_ParCSRCommHandleCreate(21,tmp_comm_pkg,RAP_int_j,
138                                               RAP_ext_j);
139    RAP_ext = hypre_CSRBlockMatrixCreate(block_size,num_rows,num_cols,
140                                         num_nonzeros);
141 
142    hypre_CSRBlockMatrixI(RAP_ext) = RAP_ext_i;
143    if (num_nonzeros)
144    {
145       hypre_CSRBlockMatrixBigJ(RAP_ext) = RAP_ext_j;
146       hypre_CSRBlockMatrixData(RAP_ext) = RAP_ext_data;
147    }
148 
149    hypre_ParCSRCommHandleDestroy(comm_handle);
150    comm_handle = NULL;
151 
152    hypre_TFree(jdata_recv_vec_starts, HYPRE_MEMORY_HOST);
153    hypre_TFree(jdata_send_map_starts, HYPRE_MEMORY_HOST);
154    hypre_TFree(tmp_comm_pkg, HYPRE_MEMORY_HOST);
155 
156    return RAP_ext;
157 }
158 
159 /*--------------------------------------------------------------------------
160  * hypre_BoomerAMGBuildCoarseOperator
161  *--------------------------------------------------------------------------*/
162 
163 HYPRE_Int
hypre_ParCSRBlockMatrixRAP(hypre_ParCSRBlockMatrix * RT,hypre_ParCSRBlockMatrix * A,hypre_ParCSRBlockMatrix * P,hypre_ParCSRBlockMatrix ** RAP_ptr)164 hypre_ParCSRBlockMatrixRAP(hypre_ParCSRBlockMatrix  *RT,
165                            hypre_ParCSRBlockMatrix  *A,
166                            hypre_ParCSRBlockMatrix  *P,
167                            hypre_ParCSRBlockMatrix **RAP_ptr )
168 
169 {
170    MPI_Comm        comm = hypre_ParCSRBlockMatrixComm(A);
171 
172    hypre_CSRBlockMatrix *RT_diag = hypre_ParCSRBlockMatrixDiag(RT);
173    hypre_CSRBlockMatrix *RT_offd = hypre_ParCSRBlockMatrixOffd(RT);
174    HYPRE_Int             num_cols_offd_RT = hypre_CSRBlockMatrixNumCols(RT_offd);
175    HYPRE_Int             num_rows_offd_RT = hypre_CSRBlockMatrixNumRows(RT_offd);
176    hypre_ParCSRCommPkg   *comm_pkg_RT = hypre_ParCSRBlockMatrixCommPkg(RT);
177    HYPRE_Int             num_recvs_RT = 0;
178    HYPRE_Int             num_sends_RT = 0;
179    HYPRE_Int             *send_map_starts_RT;
180    HYPRE_Int             *send_map_elmts_RT;
181 
182    hypre_CSRBlockMatrix *A_diag = hypre_ParCSRBlockMatrixDiag(A);
183 
184    HYPRE_Complex         *A_diag_data = hypre_CSRBlockMatrixData(A_diag);
185    HYPRE_Int             *A_diag_i = hypre_CSRBlockMatrixI(A_diag);
186    HYPRE_Int             *A_diag_j = hypre_CSRBlockMatrixJ(A_diag);
187    HYPRE_Int             block_size = hypre_CSRBlockMatrixBlockSize(A_diag);
188 
189    hypre_CSRBlockMatrix *A_offd = hypre_ParCSRBlockMatrixOffd(A);
190 
191    HYPRE_Complex         *A_offd_data = hypre_CSRBlockMatrixData(A_offd);
192    HYPRE_Int             *A_offd_i = hypre_CSRBlockMatrixI(A_offd);
193    HYPRE_Int             *A_offd_j = hypre_CSRBlockMatrixJ(A_offd);
194 
195    HYPRE_Int  num_cols_diag_A = hypre_CSRBlockMatrixNumCols(A_diag);
196    HYPRE_Int  num_cols_offd_A = hypre_CSRBlockMatrixNumCols(A_offd);
197 
198    hypre_CSRBlockMatrix *P_diag = hypre_ParCSRBlockMatrixDiag(P);
199 
200    HYPRE_Complex         *P_diag_data = hypre_CSRBlockMatrixData(P_diag);
201    HYPRE_Int             *P_diag_i = hypre_CSRBlockMatrixI(P_diag);
202    HYPRE_Int             *P_diag_j = hypre_CSRBlockMatrixJ(P_diag);
203 
204    hypre_CSRBlockMatrix *P_offd = hypre_ParCSRBlockMatrixOffd(P);
205    HYPRE_BigInt          *col_map_offd_P = hypre_ParCSRBlockMatrixColMapOffd(P);
206 
207    HYPRE_Complex         *P_offd_data = hypre_CSRBlockMatrixData(P_offd);
208    HYPRE_Int             *P_offd_i = hypre_CSRBlockMatrixI(P_offd);
209    HYPRE_Int             *P_offd_j = hypre_CSRBlockMatrixJ(P_offd);
210 
211    HYPRE_BigInt  first_col_diag_P = hypre_ParCSRBlockMatrixFirstColDiag(P);
212    HYPRE_BigInt  last_col_diag_P;
213    HYPRE_Int  num_cols_diag_P = hypre_CSRBlockMatrixNumCols(P_diag);
214    HYPRE_Int  num_cols_offd_P = hypre_CSRBlockMatrixNumCols(P_offd);
215    HYPRE_BigInt *coarse_partitioning = hypre_ParCSRBlockMatrixColStarts(P);
216    HYPRE_BigInt *row_starts, *col_starts;
217 
218    hypre_ParCSRBlockMatrix *RAP;
219    HYPRE_BigInt            *col_map_offd_RAP;
220 
221    hypre_CSRBlockMatrix *RAP_int = NULL;
222 
223    HYPRE_Complex         *RAP_int_data;
224    HYPRE_Int             *RAP_int_i;
225    HYPRE_BigInt          *RAP_int_j;
226 
227    hypre_CSRBlockMatrix *RAP_ext;
228 
229    HYPRE_Complex         *RAP_ext_data;
230    HYPRE_Int             *RAP_ext_i;
231    HYPRE_BigInt          *RAP_ext_j;
232 
233    hypre_CSRBlockMatrix *RAP_diag;
234 
235    HYPRE_Complex         *RAP_diag_data;
236    HYPRE_Int             *RAP_diag_i;
237    HYPRE_Int             *RAP_diag_j;
238 
239    hypre_CSRBlockMatrix *RAP_offd;
240 
241    HYPRE_Complex         *RAP_offd_data;
242    HYPRE_Int             *RAP_offd_i;
243    HYPRE_Int             *RAP_offd_j;
244 
245    HYPRE_Int              RAP_size;
246    HYPRE_Int              RAP_ext_size;
247    HYPRE_Int              RAP_diag_size;
248    HYPRE_Int              RAP_offd_size;
249    HYPRE_Int              P_ext_diag_size;
250    HYPRE_Int              P_ext_offd_size;
251    HYPRE_Int              first_col_diag_RAP;
252    HYPRE_Int              last_col_diag_RAP;
253    HYPRE_Int              num_cols_offd_RAP = 0;
254 
255    hypre_CSRBlockMatrix *R_diag;
256 
257    HYPRE_Complex         *R_diag_data;
258    HYPRE_Int             *R_diag_i;
259    HYPRE_Int             *R_diag_j;
260 
261    hypre_CSRBlockMatrix *R_offd;
262 
263    HYPRE_Complex         *R_offd_data;
264    HYPRE_Int             *R_offd_i;
265    HYPRE_Int             *R_offd_j;
266 
267    hypre_CSRBlockMatrix *Ps_ext;
268 
269    HYPRE_Complex         *Ps_ext_data;
270    HYPRE_Int             *Ps_ext_i;
271    HYPRE_BigInt          *Ps_ext_j;
272 
273    HYPRE_Complex         *P_ext_diag_data;
274    HYPRE_Int             *P_ext_diag_i;
275    HYPRE_Int             *P_ext_diag_j;
276 
277    HYPRE_Complex         *P_ext_offd_data;
278    HYPRE_Int             *P_ext_offd_i;
279    HYPRE_Int             *P_ext_offd_j;
280 
281    HYPRE_BigInt          *col_map_offd_Pext;
282    HYPRE_Int             *map_P_to_Pext;
283    HYPRE_Int             *map_P_to_RAP;
284    HYPRE_Int             *map_Pext_to_RAP;
285 
286    HYPRE_Int             *P_marker;
287    HYPRE_Int            **P_mark_array;
288    HYPRE_Int            **A_mark_array;
289    HYPRE_Int             *A_marker;
290    HYPRE_BigInt          *temp = NULL;
291 
292    HYPRE_Int              n_coarse;
293    HYPRE_Int              num_cols_offd_Pext = 0;
294 
295    HYPRE_Int              ic, i, j, k, bnnz, kk;
296    HYPRE_Int              i1, i2, i3, ii, ns, ne, size, rest;
297    HYPRE_Int              cnt, cnt_offd, cnt_diag;
298    HYPRE_Int              jj1, jj2, jj3, jcol;
299    HYPRE_BigInt           value;
300 
301    HYPRE_Int             *jj_count, *jj_cnt_diag, *jj_cnt_offd;
302    HYPRE_Int              jj_counter, jj_count_diag, jj_count_offd;
303    HYPRE_Int              jj_row_begining, jj_row_begin_diag, jj_row_begin_offd;
304    HYPRE_Int              start_indexing = 0; /* start indexing for RAP_data at 0 */
305    HYPRE_Int              num_nz_cols_A;
306    HYPRE_Int              num_procs;
307    HYPRE_Int              num_threads, ind;
308 
309    HYPRE_Complex          *r_entries;
310    HYPRE_Complex          *r_a_products;
311    HYPRE_Complex          *r_a_p_products;
312 
313    HYPRE_Complex          zero = 0.0;
314 
315    /*-----------------------------------------------------------------------
316     *  Copy ParCSRBlockMatrix RT into CSRBlockMatrix R so that we have
317     *  row-wise access to restriction .
318     *-----------------------------------------------------------------------*/
319 
320    hypre_MPI_Comm_size(comm,&num_procs);
321    /* num_threads = hypre_NumThreads(); */
322    num_threads = 1;
323 
324    bnnz = block_size * block_size;
325    r_a_products = hypre_TAlloc(HYPRE_Complex, bnnz , HYPRE_MEMORY_HOST);
326    r_a_p_products = hypre_TAlloc(HYPRE_Complex, bnnz , HYPRE_MEMORY_HOST);
327 
328    if (comm_pkg_RT)
329    {
330       num_recvs_RT = hypre_ParCSRCommPkgNumRecvs(comm_pkg_RT);
331       num_sends_RT = hypre_ParCSRCommPkgNumSends(comm_pkg_RT);
332       send_map_starts_RT =hypre_ParCSRCommPkgSendMapStarts(comm_pkg_RT);
333       send_map_elmts_RT = hypre_ParCSRCommPkgSendMapElmts(comm_pkg_RT);
334    }
335 
336    hypre_CSRBlockMatrixTranspose(RT_diag,&R_diag,1);
337    if (num_cols_offd_RT)
338    {
339       hypre_CSRBlockMatrixTranspose(RT_offd,&R_offd,1);
340       R_offd_data = hypre_CSRBlockMatrixData(R_offd);
341       R_offd_i    = hypre_CSRBlockMatrixI(R_offd);
342       R_offd_j    = hypre_CSRBlockMatrixJ(R_offd);
343    }
344 
345    /*-----------------------------------------------------------------------
346     *  Access the CSR vectors for R. Also get sizes of fine and
347     *  coarse grids.
348     *-----------------------------------------------------------------------*/
349 
350    R_diag_data = hypre_CSRBlockMatrixData(R_diag);
351    R_diag_i    = hypre_CSRBlockMatrixI(R_diag);
352    R_diag_j    = hypre_CSRBlockMatrixJ(R_diag);
353 
354    n_coarse = hypre_ParCSRBlockMatrixGlobalNumCols(P);
355    num_nz_cols_A = num_cols_diag_A + num_cols_offd_A;
356 
357    /*-----------------------------------------------------------------------
358     *  Generate Ps_ext, i.e. portion of P that is stored on neighbor procs
359     *  and needed locally for triple matrix product
360     *-----------------------------------------------------------------------*/
361 
362    if (num_procs > 1)
363    {
364       Ps_ext = hypre_ParCSRBlockMatrixExtractBExt(P,A,1);
365       Ps_ext_data = hypre_CSRBlockMatrixData(Ps_ext);
366       Ps_ext_i    = hypre_CSRBlockMatrixI(Ps_ext);
367       Ps_ext_j    = hypre_CSRBlockMatrixBigJ(Ps_ext);
368    }
369 
370    P_ext_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1, HYPRE_MEMORY_HOST);
371    P_ext_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1, HYPRE_MEMORY_HOST);
372    P_ext_diag_size = 0;
373    P_ext_offd_size = 0;
374    last_col_diag_P = first_col_diag_P + (HYPRE_BigInt)num_cols_diag_P - 1;
375 
376    for (i=0; i < num_cols_offd_A; i++)
377    {
378       for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++)
379          if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P)
380             P_ext_offd_size++;
381          else
382             P_ext_diag_size++;
383       P_ext_diag_i[i+1] = P_ext_diag_size;
384       P_ext_offd_i[i+1] = P_ext_offd_size;
385    }
386 
387    if (P_ext_diag_size)
388    {
389       P_ext_diag_j = hypre_CTAlloc(HYPRE_Int,  P_ext_diag_size, HYPRE_MEMORY_HOST);
390       P_ext_diag_data = hypre_CTAlloc(HYPRE_Complex,  P_ext_diag_size*bnnz, HYPRE_MEMORY_HOST);
391    }
392    if (P_ext_offd_size)
393    {
394       P_ext_offd_j = hypre_CTAlloc(HYPRE_Int,  P_ext_offd_size, HYPRE_MEMORY_HOST);
395       P_ext_offd_data = hypre_CTAlloc(HYPRE_Complex,  P_ext_offd_size*bnnz, HYPRE_MEMORY_HOST);
396    }
397 
398    cnt_offd = 0;
399    cnt_diag = 0;
400    cnt = 0;
401    for (i=0; i < num_cols_offd_A; i++)
402    {
403       for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++)
404          if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P)
405          {
406             Ps_ext_j[cnt_offd] = Ps_ext_j[j];
407             for (kk = 0; kk < bnnz; kk++)
408                P_ext_offd_data[cnt_offd*bnnz+kk] = Ps_ext_data[j*bnnz+kk];
409             cnt_offd++;
410          }
411          else
412          {
413             P_ext_diag_j[cnt_diag] = (HYPRE_Int)(Ps_ext_j[j] - first_col_diag_P);
414             for (kk = 0; kk < bnnz; kk++)
415                P_ext_diag_data[cnt_diag*bnnz+kk] = Ps_ext_data[j*bnnz+kk];
416             cnt_diag++;
417          }
418    }
419    if (P_ext_offd_size || num_cols_offd_P)
420    {
421       temp = hypre_CTAlloc(HYPRE_BigInt,  P_ext_offd_size+num_cols_offd_P, HYPRE_MEMORY_HOST);
422       for (i=0; i < P_ext_offd_size; i++)
423          temp[i] = Ps_ext_j[i];
424       cnt = P_ext_offd_size;
425       for (i=0; i < num_cols_offd_P; i++)
426          temp[cnt++] = col_map_offd_P[i];
427    }
428    if (cnt)
429    {
430       hypre_BigQsort0(temp, 0, cnt-1);
431 
432       num_cols_offd_Pext = 1;
433       value = temp[0];
434       for (i=1; i < cnt; i++)
435       {
436          if (temp[i] > value)
437          {
438             value = temp[i];
439             temp[num_cols_offd_Pext++] = value;
440          }
441       }
442    }
443 
444    if (num_cols_offd_Pext)
445       col_map_offd_Pext = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_Pext, HYPRE_MEMORY_HOST);
446 
447    for (i=0; i < num_cols_offd_Pext; i++)
448       col_map_offd_Pext[i] = temp[i];
449 
450    if (P_ext_offd_size || num_cols_offd_P)
451       hypre_TFree(temp, HYPRE_MEMORY_HOST);
452 
453    for (i=0 ; i < P_ext_offd_size; i++)
454       P_ext_offd_j[i] = hypre_BigBinarySearch(col_map_offd_Pext,
455                                            Ps_ext_j[i],
456                                            num_cols_offd_Pext);
457    if (num_cols_offd_P)
458    {
459       map_P_to_Pext = hypre_CTAlloc(HYPRE_Int, num_cols_offd_P, HYPRE_MEMORY_HOST);
460 
461       cnt = 0;
462       for (i=0; i < num_cols_offd_Pext; i++)
463          if (col_map_offd_Pext[i] == col_map_offd_P[cnt])
464          {
465             map_P_to_Pext[cnt++] = i;
466             if (cnt == num_cols_offd_P) break;
467          }
468    }
469 
470    if (num_procs > 1)
471    {
472       hypre_CSRBlockMatrixDestroy(Ps_ext);
473       Ps_ext = NULL;
474    }
475 
476    /*-----------------------------------------------------------------------
477     *  First Pass: Determine size of RAP_int and set up RAP_int_i if there
478     *  are more than one processor and nonzero elements in R_offd
479     *-----------------------------------------------------------------------*/
480 
481    P_mark_array = hypre_CTAlloc(HYPRE_Int *,  num_threads, HYPRE_MEMORY_HOST);
482    A_mark_array = hypre_CTAlloc(HYPRE_Int *,  num_threads, HYPRE_MEMORY_HOST);
483 
484    if (num_cols_offd_RT)
485    {
486       jj_count = hypre_CTAlloc(HYPRE_Int,  num_threads, HYPRE_MEMORY_HOST);
487 
488       for (ii = 0; ii < num_threads; ii++)
489       {
490          size = num_cols_offd_RT/num_threads;
491          rest = num_cols_offd_RT - size*num_threads;
492          if (ii < rest)
493          {
494             ns = ii*size+ii;
495             ne = (ii+1)*size+ii+1;
496          }
497          else
498          {
499             ns = ii*size+rest;
500             ne = (ii+1)*size+rest;
501          }
502 
503          /*--------------------------------------------------------------------
504           *  Allocate marker arrays.
505           *--------------------------------------------------------------------*/
506 
507          if (num_cols_offd_Pext || num_cols_diag_P)
508          {
509             P_mark_array[ii] = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+num_cols_offd_Pext, HYPRE_MEMORY_HOST);
510             P_marker = P_mark_array[ii];
511          }
512          A_mark_array[ii] = hypre_CTAlloc(HYPRE_Int,  num_nz_cols_A, HYPRE_MEMORY_HOST);
513          A_marker = A_mark_array[ii];
514 
515          /*--------------------------------------------------------------------
516           *  Initialize some stuff.
517           *--------------------------------------------------------------------*/
518 
519          jj_counter = start_indexing;
520          for (ic = 0; ic < num_cols_diag_P+num_cols_offd_Pext; ic++)
521          {
522             P_marker[ic] = -1;
523          }
524          for (i = 0; i < num_nz_cols_A; i++)
525          {
526             A_marker[i] = -1;
527          }
528 
529          /*--------------------------------------------------------------------
530           *  Loop over exterior c-points
531           *--------------------------------------------------------------------*/
532 
533          for (ic = ns; ic < ne; ic++)
534          {
535 
536             jj_row_begining = jj_counter;
537 
538             /*-----------------------------------------------------------------
539              *  Loop over entries in row ic of R_offd.
540              *-----------------------------------------------------------------*/
541 
542             for (jj1 = R_offd_i[ic]; jj1 < R_offd_i[ic+1]; jj1++)
543             {
544                i1  = R_offd_j[jj1];
545 
546                /*--------------------------------------------------------------
547                 *  Loop over entries in row i1 of A_offd.
548                 *--------------------------------------------------------------*/
549 
550                for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
551                {
552                   i2 = A_offd_j[jj2];
553 
554                   /*-----------------------------------------------------------
555                    *  Check A_marker to see if point i2 has been previously
556                    *  visited. New entries in RAP only occur from unmarked points.
557                    *-----------------------------------------------------------*/
558 
559                   if (A_marker[i2] != ic)
560                   {
561 
562                      /*--------------------------------------------------------
563                       *  Mark i2 as visited.
564                       *--------------------------------------------------------*/
565 
566                      A_marker[i2] = ic;
567 
568                      /*--------------------------------------------------------
569                       *  Loop over entries in row i2 of P_ext.
570                       *--------------------------------------------------------*/
571 
572                      for (jj3=P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++)
573                      {
574                         i3 = P_ext_diag_j[jj3];
575 
576                         /*-----------------------------------------------------
577                          *  Check P_marker to see that RAP_{ic,i3} has not
578                          *  been accounted for. If it has not, mark it and
579                          *  increment counter.
580                          *-----------------------------------------------------*/
581 
582                         if (P_marker[i3] < jj_row_begining)
583                         {
584                            P_marker[i3] = jj_counter;
585                            jj_counter++;
586                         }
587                      }
588                      for (jj3=P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++)
589                      {
590                         i3 = P_ext_offd_j[jj3] + num_cols_diag_P;
591 
592                         /*-----------------------------------------------------
593                          *  Check P_marker to see that RAP_{ic,i3} has not
594                          *  been accounted for. If it has not, mark it and
595                          *  increment counter.
596                          *-----------------------------------------------------*/
597 
598                         if (P_marker[i3] < jj_row_begining)
599                         {
600                            P_marker[i3] = jj_counter;
601                            jj_counter++;
602                         }
603                      }
604                   }
605                }
606                /*--------------------------------------------------------------
607                 *  Loop over entries in row i1 of A_diag.
608                 *--------------------------------------------------------------*/
609 
610                for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
611                {
612                   i2 = A_diag_j[jj2];
613 
614                   /*-----------------------------------------------------------
615                    *  Check A_marker to see if point i2 has been previously
616                    *  visited. New entries in RAP only occur from unmarked
617                    * points.
618                    *-----------------------------------------------------------*/
619 
620                   if (A_marker[i2+num_cols_offd_A] != ic)
621                   {
622 
623                      /*--------------------------------------------------------
624                       *  Mark i2 as visited.
625                       *--------------------------------------------------------*/
626 
627                      A_marker[i2+num_cols_offd_A] = ic;
628 
629                      /*--------------------------------------------------------
630                       *  Loop over entries in row i2 of P_diag.
631                       *--------------------------------------------------------*/
632 
633                      for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++)
634                      {
635                         i3 = P_diag_j[jj3];
636 
637                         /*-----------------------------------------------------
638                          *  Check P_marker to see that RAP_{ic,i3} has not
639                          *  been accounted for. If it has not, mark it and
640                          *  increment counter.
641                          *-----------------------------------------------------*/
642 
643                         if (P_marker[i3] < jj_row_begining)
644                         {
645                            P_marker[i3] = jj_counter;
646                            jj_counter++;
647                         }
648                      }
649 
650                      /*--------------------------------------------------------
651                       *  Loop over entries in row i2 of P_offd.
652                       *--------------------------------------------------------*/
653 
654                      for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++)
655                      {
656                         i3 = map_P_to_Pext[P_offd_j[jj3]] + num_cols_diag_P;
657 
658                         /*-----------------------------------------------------
659                          *  Check P_marker to see that RAP_{ic,i3} has not
660                          *  been accounted for. If it has not, mark it and
661                          *  increment counter.
662                          *-----------------------------------------------------*/
663 
664                         if (P_marker[i3] < jj_row_begining)
665                         {
666                            P_marker[i3] = jj_counter;
667                            jj_counter++;
668                         }
669                      }
670                   }
671                }
672             }
673          }
674          jj_count[ii] = jj_counter;
675       }
676 
677       /*-----------------------------------------------------------------------
678        *  Allocate RAP_int_data and RAP_int_j arrays.
679        *-----------------------------------------------------------------------*/
680 
681       for (i = 0; i < num_threads-1; i++) jj_count[i+1] += jj_count[i];
682 
683       RAP_size = jj_count[num_threads-1];
684       RAP_int_i = hypre_CTAlloc(HYPRE_Int,  num_cols_offd_RT+1, HYPRE_MEMORY_HOST);
685       RAP_int_data = hypre_CTAlloc(HYPRE_Complex,  RAP_size*bnnz, HYPRE_MEMORY_HOST);
686       RAP_int_j    = hypre_CTAlloc(HYPRE_BigInt,  RAP_size, HYPRE_MEMORY_HOST);
687       RAP_int_i[num_cols_offd_RT] = RAP_size;
688 
689       /*-----------------------------------------------------------------------
690        *  Second Pass: Fill in RAP_int_data and RAP_int_j.
691        *-----------------------------------------------------------------------*/
692 
693       for (ii = 0; ii < num_threads; ii++)
694       {
695          size = num_cols_offd_RT/num_threads;
696          rest = num_cols_offd_RT - size*num_threads;
697          if (ii < rest)
698          {
699             ns = ii*size+ii;
700             ne = (ii+1)*size+ii+1;
701          }
702          else
703          {
704             ns = ii*size+rest;
705             ne = (ii+1)*size+rest;
706          }
707 
708          /*--------------------------------------------------------------------
709           *  Initialize some stuff.
710           *--------------------------------------------------------------------*/
711 
712          if (num_cols_offd_Pext || num_cols_diag_P)
713             P_marker = P_mark_array[ii];
714          A_marker = A_mark_array[ii];
715 
716          jj_counter = start_indexing;
717          if (ii > 0) jj_counter = jj_count[ii-1];
718 
719          for (ic = 0; ic < num_cols_diag_P+num_cols_offd_Pext; ic++)
720          {
721             P_marker[ic] = -1;
722          }
723          for (i = 0; i < num_nz_cols_A; i++)
724          {
725             A_marker[i] = -1;
726          }
727 
728          /*--------------------------------------------------------------------
729           *  Loop over exterior c-points.
730           *--------------------------------------------------------------------*/
731 
732          for (ic = ns; ic < ne; ic++)
733          {
734             jj_row_begining = jj_counter;
735             RAP_int_i[ic] = jj_counter;
736 
737             /*-----------------------------------------------------------------
738              *  Loop over entries in row ic of R_offd.
739              *-----------------------------------------------------------------*/
740 
741             for (jj1 = R_offd_i[ic]; jj1 < R_offd_i[ic+1]; jj1++)
742             {
743                i1  = R_offd_j[jj1];
744                r_entries = &(R_offd_data[jj1*bnnz]);
745 
746                /*--------------------------------------------------------------
747                 *  Loop over entries in row i1 of A_offd.
748                 *--------------------------------------------------------------*/
749 
750                for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
751                {
752                   i2 = A_offd_j[jj2];
753                   hypre_CSRBlockMatrixBlockMultAdd(r_entries,
754                                                    &(A_offd_data[jj2*bnnz]), zero,
755                                                    r_a_products, block_size);
756 
757                   /*-----------------------------------------------------------
758                    *  Check A_marker to see if point i2 has been previously
759                    *  visited.New entries in RAP only occur from unmarked points.
760                    *-----------------------------------------------------------*/
761 
762                   if (A_marker[i2] != ic)
763                   {
764                      /*--------------------------------------------------------
765                       *  Mark i2 as visited.
766                       *--------------------------------------------------------*/
767 
768                      A_marker[i2] = ic;
769 
770                      /*--------------------------------------------------------
771                       *  Loop over entries in row i2 of P_ext.
772                       *--------------------------------------------------------*/
773 
774                      for (jj3=P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++)
775                      {
776                         i3 = P_ext_diag_j[jj3];
777                         hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
778                                                          &(P_ext_diag_data[jj3*bnnz]), zero,
779                                                          r_a_p_products, block_size);
780 
781                         /*-----------------------------------------------------
782                          *  Check P_marker to see that RAP_{ic,i3} has not
783                          *  been accounted for. If it has not, create a new
784                          *  entry. If it has, add new contribution.
785                          *-----------------------------------------------------*/
786 
787                         if (P_marker[i3] < jj_row_begining)
788                         {
789                            P_marker[i3] = jj_counter;
790                            for (kk = 0; kk < bnnz; kk++)
791                               RAP_int_data[jj_counter*bnnz+kk] =
792                                  r_a_p_products[kk];
793                            RAP_int_j[jj_counter] = i3 + first_col_diag_P;
794                            jj_counter++;
795                         }
796                         else
797                         {
798                            for (kk = 0; kk < bnnz; kk++)
799                               RAP_int_data[P_marker[i3]*bnnz+kk] +=
800                                  r_a_p_products[kk];
801                         }
802                      }
803                      for (jj3=P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++)
804                      {
805                         i3 = P_ext_offd_j[jj3] + num_cols_diag_P;
806                         hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
807                                                          &(P_ext_offd_data[jj3*bnnz]), zero,
808                                                          r_a_p_products, block_size);
809 
810                         /*--------------------------------------------------
811                          *  Check P_marker to see that RAP_{ic,i3} has not
812                          *  been accounted for. If it has not, create a new
813                          *  entry. If it has, add new contribution.
814                          *--------------------------------------------------*/
815 
816                         if (P_marker[i3] < jj_row_begining)
817                         {
818                            P_marker[i3] = jj_counter;
819                            for (kk = 0; kk < bnnz; kk++)
820                               RAP_int_data[jj_counter*bnnz+kk] =
821                                  r_a_p_products[kk];
822                            RAP_int_j[jj_counter]
823                               = col_map_offd_Pext[i3-num_cols_diag_P];
824                            jj_counter++;
825                         }
826                         else
827                         {
828                            for (kk = 0; kk < bnnz; kk++)
829                               RAP_int_data[P_marker[i3]*bnnz+kk] +=
830                                  r_a_p_products[kk];
831                         }
832                      }
833                   }
834 
835                   /*-----------------------------------------------------------
836                    *  If i2 is previously visited ( A_marker[12]=ic ) it yields
837                    *  no new entries in RAP and can just add new contributions.
838                    *-----------------------------------------------------------*/
839 
840                   else
841                   {
842                      for (jj3=P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++)
843                      {
844                         i3 = P_ext_diag_j[jj3];
845                         hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
846                                                          &(P_ext_diag_data[jj3*bnnz]), zero,
847                                                          r_a_p_products, block_size);
848                         for (kk = 0; kk < bnnz; kk++)
849                            RAP_int_data[P_marker[i3]*bnnz+kk] +=
850                               r_a_p_products[kk];
851                      }
852                      for (jj3=P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++)
853                      {
854                         i3 = P_ext_offd_j[jj3] + num_cols_diag_P;
855                         hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
856                                                          &(P_ext_offd_data[jj3*bnnz]), zero,
857                                                          r_a_p_products, block_size);
858                         ind = P_marker[i3] * bnnz;
859                         for (kk = 0; kk < bnnz; kk++)
860                            RAP_int_data[ind++] += r_a_p_products[kk];
861                      }
862                   }
863                }
864 
865                /*--------------------------------------------------------------
866                 *  Loop over entries in row i1 of A_diag.
867                 *--------------------------------------------------------------*/
868 
869                for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
870                {
871                   i2 = A_diag_j[jj2];
872                   hypre_CSRBlockMatrixBlockMultAdd(r_entries,
873                                                    &(A_diag_data[jj2*bnnz]), zero, r_a_products,
874                                                    block_size);
875 
876                   /*-----------------------------------------------------------
877                    *  Check A_marker to see if point i2 has been previously
878                    *  visited. New entries in RAP only occur from unmarked points.
879                    *-----------------------------------------------------------*/
880 
881                   if (A_marker[i2+num_cols_offd_A] != ic)
882                   {
883 
884                      /*--------------------------------------------------------
885                       *  Mark i2 as visited.
886                       *--------------------------------------------------------*/
887 
888                      A_marker[i2+num_cols_offd_A] = ic;
889 
890                      /*--------------------------------------------------------
891                       *  Loop over entries in row i2 of P_diag.
892                       *--------------------------------------------------------*/
893 
894                      for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++)
895                      {
896                         i3 = P_diag_j[jj3];
897                         hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
898                                                          &(P_diag_data[jj3*bnnz]), zero,
899                                                          r_a_p_products, block_size);
900 
901                         /*-----------------------------------------------------
902                          *  Check P_marker to see that RAP_{ic,i3} has not
903                          *  been accounted for. If it has not, create a new
904                          *  entry. If it has, add new contribution.
905                          *-----------------------------------------------------*/
906 
907                         if (P_marker[i3] < jj_row_begining)
908                         {
909                            P_marker[i3] = jj_counter;
910                            ind = jj_counter * bnnz;
911                            for (kk = 0; kk < bnnz; kk++)
912                               RAP_int_data[ind++] = r_a_p_products[kk];
913                            RAP_int_j[jj_counter] = (HYPRE_BigInt)i3 + first_col_diag_P;
914                            jj_counter++;
915                         }
916                         else
917                         {
918                            ind = P_marker[i3] * bnnz;
919                            for (kk = 0; kk < bnnz; kk++)
920                               RAP_int_data[ind++] += r_a_p_products[kk];
921                         }
922                      }
923                      for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++)
924                      {
925                         i3 = map_P_to_Pext[P_offd_j[jj3]] + num_cols_diag_P;
926                         hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
927                                                          &(P_offd_data[jj3*bnnz]), zero,
928                                                          r_a_p_products, block_size);
929 
930                         /*-----------------------------------------------------
931                          *  Check P_marker to see that RAP_{ic,i3} has not
932                          *  been accounted for. If it has not, create a new
933                          *  entry. If it has, add new contribution.
934                          *-----------------------------------------------------*/
935 
936                         if (P_marker[i3] < jj_row_begining)
937                         {
938                            P_marker[i3] = jj_counter;
939                            ind = jj_counter * bnnz;
940                            for (kk = 0; kk < bnnz; kk++)
941                               RAP_int_data[ind++] = r_a_p_products[kk];
942                            RAP_int_j[jj_counter] =
943                               col_map_offd_Pext[i3-num_cols_diag_P];
944                            jj_counter++;
945                         }
946                         else
947                         {
948                            ind = P_marker[i3] * bnnz;
949                            for (kk = 0; kk < bnnz; kk++)
950                               RAP_int_data[ind++] += r_a_p_products[kk];
951                         }
952                      }
953                   }
954 
955                   /*-----------------------------------------------------------
956                    *  If i2 is previously visited ( A_marker[12]=ic ) it yields
957                    *  no new entries in RAP and can just add new contributions.
958                    *-----------------------------------------------------------*/
959 
960                   else
961                   {
962                      for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++)
963                      {
964                         i3 = P_diag_j[jj3];
965                         hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
966                                                          &(P_diag_data[jj3*bnnz]), zero,
967                                                          r_a_p_products, block_size);
968                         ind = P_marker[i3] * bnnz;
969                         for (kk = 0; kk < bnnz; kk++)
970                            RAP_int_data[ind++] += r_a_p_products[kk];
971                      }
972                      for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++)
973                      {
974                         i3 = map_P_to_Pext[P_offd_j[jj3]] + num_cols_diag_P;
975                         hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
976                                                          &(P_offd_data[jj3*bnnz]), zero,
977                                                          r_a_p_products, block_size);
978                         ind = P_marker[i3] * bnnz;
979                         for (kk = 0; kk < bnnz; kk++)
980                            RAP_int_data[ind++] += r_a_p_products[kk];
981                      }
982                   }
983                }
984             }
985          }
986          if (num_cols_offd_Pext || num_cols_diag_P)
987             hypre_TFree(P_mark_array[ii], HYPRE_MEMORY_HOST);
988          hypre_TFree(A_mark_array[ii], HYPRE_MEMORY_HOST);
989       }
990 
991       RAP_int = hypre_CSRBlockMatrixCreate(block_size,num_cols_offd_RT,
992                                            num_rows_offd_RT,RAP_size);
993       hypre_CSRBlockMatrixI(RAP_int) = RAP_int_i;
994       hypre_CSRBlockMatrixBigJ(RAP_int) = RAP_int_j;
995       hypre_CSRBlockMatrixData(RAP_int) = RAP_int_data;
996       hypre_TFree(jj_count, HYPRE_MEMORY_HOST);
997    }
998 
999    RAP_ext_size = 0;
1000    if (num_sends_RT || num_recvs_RT)
1001    {
1002       RAP_ext = hypre_ExchangeRAPBlockData(RAP_int,comm_pkg_RT, block_size);
1003       RAP_ext_i = hypre_CSRBlockMatrixI(RAP_ext);
1004       RAP_ext_j = hypre_CSRBlockMatrixBigJ(RAP_ext);
1005       RAP_ext_data = hypre_CSRBlockMatrixData(RAP_ext);
1006       RAP_ext_size = RAP_ext_i[hypre_CSRBlockMatrixNumRows(RAP_ext)];
1007    }
1008    if (num_cols_offd_RT)
1009    {
1010       hypre_CSRBlockMatrixDestroy(RAP_int);
1011       RAP_int = NULL;
1012    }
1013 
1014    RAP_diag_i = hypre_CTAlloc(HYPRE_Int,  num_cols_diag_P+1, HYPRE_MEMORY_HOST);
1015    RAP_offd_i = hypre_CTAlloc(HYPRE_Int,  num_cols_diag_P+1, HYPRE_MEMORY_HOST);
1016 
1017    first_col_diag_RAP = first_col_diag_P;
1018    last_col_diag_RAP = first_col_diag_P + (HYPRE_BigInt)num_cols_diag_P - 1;
1019 
1020    /*-----------------------------------------------------------------------
1021     *  check for new nonzero columns in RAP_offd generated through RAP_ext
1022     *-----------------------------------------------------------------------*/
1023 
1024    if (RAP_ext_size || num_cols_offd_Pext)
1025    {
1026       temp = hypre_CTAlloc(HYPRE_BigInt, RAP_ext_size+num_cols_offd_Pext, HYPRE_MEMORY_HOST);
1027       cnt = 0;
1028       for (i=0; i < RAP_ext_size; i++)
1029          if (RAP_ext_j[i] < first_col_diag_RAP
1030              || RAP_ext_j[i] > last_col_diag_RAP)
1031             temp[cnt++] = RAP_ext_j[i];
1032       for (i=0; i < num_cols_offd_Pext; i++)
1033          temp[cnt++] = col_map_offd_Pext[i];
1034 
1035       if (cnt)
1036       {
1037          hypre_BigQsort0(temp,0,cnt-1);
1038          value = temp[0];
1039          num_cols_offd_RAP = 1;
1040          for (i=1; i < cnt; i++)
1041          {
1042             if (temp[i] > value)
1043             {
1044                value = temp[i];
1045                temp[num_cols_offd_RAP++] = value;
1046             }
1047          }
1048       }
1049 
1050       /* now evaluate col_map_offd_RAP */
1051       if (num_cols_offd_RAP)
1052          col_map_offd_RAP = hypre_CTAlloc(HYPRE_BigInt,  num_cols_offd_RAP, HYPRE_MEMORY_HOST);
1053 
1054       for (i=0 ; i < num_cols_offd_RAP; i++)
1055          col_map_offd_RAP[i] = temp[i];
1056 
1057       hypre_TFree(temp, HYPRE_MEMORY_HOST);
1058    }
1059 
1060    if (num_cols_offd_P)
1061    {
1062       map_P_to_RAP = hypre_CTAlloc(HYPRE_Int, num_cols_offd_P, HYPRE_MEMORY_HOST);
1063 
1064       cnt = 0;
1065       for (i=0; i < num_cols_offd_RAP; i++)
1066          if (col_map_offd_RAP[i] == col_map_offd_P[cnt])
1067          {
1068             map_P_to_RAP[cnt++] = i;
1069             if (cnt == num_cols_offd_P) break;
1070          }
1071    }
1072 
1073    if (num_cols_offd_Pext)
1074    {
1075       map_Pext_to_RAP = hypre_CTAlloc(HYPRE_Int, num_cols_offd_Pext, HYPRE_MEMORY_HOST);
1076 
1077       cnt = 0;
1078       for (i=0; i < num_cols_offd_RAP; i++)
1079          if (col_map_offd_RAP[i] == col_map_offd_Pext[cnt])
1080          {
1081             map_Pext_to_RAP[cnt++] = i;
1082             if (cnt == num_cols_offd_Pext) break;
1083          }
1084    }
1085 
1086    /*-----------------------------------------------------------------------
1087     *  Convert RAP_ext column indices
1088     *-----------------------------------------------------------------------*/
1089 
1090    for (i=0; i < RAP_ext_size; i++)
1091       if (RAP_ext_j[i] < first_col_diag_RAP
1092           || RAP_ext_j[i] > last_col_diag_RAP)
1093          RAP_ext_j[i] = (HYPRE_BigInt)(num_cols_diag_P)
1094             + hypre_BigBinarySearch(col_map_offd_RAP,
1095                                  RAP_ext_j[i],num_cols_offd_RAP);
1096       else
1097          RAP_ext_j[i] -= first_col_diag_RAP;
1098 
1099    /*-----------------------------------------------------------------------
1100     *  Initialize some stuff.
1101     *-----------------------------------------------------------------------*/
1102 
1103    jj_cnt_diag = hypre_CTAlloc(HYPRE_Int,  num_threads, HYPRE_MEMORY_HOST);
1104    jj_cnt_offd = hypre_CTAlloc(HYPRE_Int,  num_threads, HYPRE_MEMORY_HOST);
1105 
1106    for (ii = 0; ii < num_threads; ii++)
1107    {
1108       size = num_cols_diag_P/num_threads;
1109       rest = num_cols_diag_P - size*num_threads;
1110       if (ii < rest)
1111       {
1112          ns = ii*size+ii;
1113          ne = (ii+1)*size+ii+1;
1114       }
1115       else
1116       {
1117          ns = ii*size+rest;
1118          ne = (ii+1)*size+rest;
1119       }
1120 
1121       P_mark_array[ii] = hypre_CTAlloc(HYPRE_Int,  num_cols_diag_P+num_cols_offd_RAP, HYPRE_MEMORY_HOST);
1122       A_mark_array[ii] = hypre_CTAlloc(HYPRE_Int,  num_nz_cols_A, HYPRE_MEMORY_HOST);
1123       P_marker = P_mark_array[ii];
1124       A_marker = A_mark_array[ii];
1125       jj_count_diag = start_indexing;
1126       jj_count_offd = start_indexing;
1127 
1128       for (ic = 0; ic < num_cols_diag_P+num_cols_offd_RAP; ic++)
1129       {
1130          P_marker[ic] = -1;
1131       }
1132       for (i = 0; i < num_nz_cols_A; i++)
1133       {
1134          A_marker[i] = -1;
1135       }
1136 
1137       /*-----------------------------------------------------------------------
1138        *  Loop over interior c-points.
1139        *-----------------------------------------------------------------------*/
1140 
1141       for (ic = ns; ic < ne; ic++)
1142       {
1143 
1144          /*--------------------------------------------------------------------
1145           *  Set marker for diagonal entry, RAP_{ic,ic}. and for all points
1146           *  being added to row ic of RAP_diag and RAP_offd through RAP_ext
1147           *--------------------------------------------------------------------*/
1148 
1149          P_marker[ic] = jj_count_diag;
1150          jj_row_begin_diag = jj_count_diag;
1151          jj_row_begin_offd = jj_count_offd;
1152          jj_count_diag++;
1153 
1154          for (i=0; i < num_sends_RT; i++)
1155             for (j = send_map_starts_RT[i]; j < send_map_starts_RT[i+1]; j++)
1156                if (send_map_elmts_RT[j] == ic)
1157                {
1158                   for (k=RAP_ext_i[j]; k < RAP_ext_i[j+1]; k++)
1159                   {
1160                      jcol = (HYPRE_Int)RAP_ext_j[k];
1161                      if (jcol < num_cols_diag_P)
1162                      {
1163                         if (P_marker[jcol] < jj_row_begin_diag)
1164                         {
1165                            P_marker[jcol] = jj_count_diag;
1166                            jj_count_diag++;
1167                         }
1168                      }
1169                      else
1170                      {
1171                         if (P_marker[jcol] < jj_row_begin_offd)
1172                         {
1173                            P_marker[jcol] = jj_count_offd;
1174                            jj_count_offd++;
1175                         }
1176                      }
1177                   }
1178                   break;
1179                }
1180 
1181          /*-----------------------------------------------------------------
1182           *  Loop over entries in row ic of R_diag.
1183           *-----------------------------------------------------------------*/
1184 
1185          for (jj1 = R_diag_i[ic]; jj1 < R_diag_i[ic+1]; jj1++)
1186          {
1187             i1  = R_diag_j[jj1];
1188 
1189             /*-----------------------------------------------------------------
1190              *  Loop over entries in row i1 of A_offd.
1191              *-----------------------------------------------------------------*/
1192 
1193             if (num_cols_offd_A)
1194             {
1195                for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
1196                {
1197                   i2 = A_offd_j[jj2];
1198 
1199                   /*-----------------------------------------------------------
1200                    *  Check A_marker to see if point i2 has been previously
1201                    *  visited.New entries in RAP only occur from unmarked points.
1202                    *-----------------------------------------------------------*/
1203 
1204                   if (A_marker[i2] != ic)
1205                   {
1206                      /*--------------------------------------------------------
1207                       *  Mark i2 as visited.
1208                       *--------------------------------------------------------*/
1209 
1210                      A_marker[i2] = ic;
1211 
1212                      /*--------------------------------------------------------
1213                       *  Loop over entries in row i2 of P_ext.
1214                       *--------------------------------------------------------*/
1215 
1216                      for (jj3=P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++)
1217                      {
1218                         i3 = P_ext_diag_j[jj3];
1219 
1220                         /*-----------------------------------------------------
1221                          *  Check P_marker to see that RAP_{ic,i3} has not
1222                          *  been accounted for. If it has not, mark it and
1223                          *  increment counter.
1224                          *-----------------------------------------------------*/
1225 
1226                         if (P_marker[i3] < jj_row_begin_diag)
1227                         {
1228                            P_marker[i3] = jj_count_diag;
1229                            jj_count_diag++;
1230                         }
1231                      }
1232                      for (jj3=P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++)
1233                      {
1234                         i3 = map_Pext_to_RAP[P_ext_offd_j[jj3]]+num_cols_diag_P;
1235 
1236                         /*-----------------------------------------------------
1237                          *  Check P_marker to see that RAP_{ic,i3} has not
1238                          *  been accounted for. If it has not, mark it and
1239                          *  increment counter.
1240                          *-----------------------------------------------------*/
1241 
1242                         if (P_marker[i3] < jj_row_begin_offd)
1243                         {
1244                            P_marker[i3] = jj_count_offd;
1245                            jj_count_offd++;
1246                         }
1247                      }
1248                   }
1249                }
1250             }
1251 
1252             /*-----------------------------------------------------------------
1253              *  Loop over entries in row i1 of A_diag.
1254              *-----------------------------------------------------------------*/
1255 
1256             for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
1257             {
1258                i2 = A_diag_j[jj2];
1259 
1260                /*--------------------------------------------------------------
1261                 *  Check A_marker to see if point i2 has been previously
1262                 *  visited. New entries in RAP only occur from unmarked points.
1263                 *--------------------------------------------------------------*/
1264 
1265                if (A_marker[i2+num_cols_offd_A] != ic)
1266                {
1267 
1268                   /*-----------------------------------------------------------
1269                    *  Mark i2 as visited.
1270                    *-----------------------------------------------------------*/
1271 
1272                   A_marker[i2+num_cols_offd_A] = ic;
1273 
1274                   /*-----------------------------------------------------------
1275                    *  Loop over entries in row i2 of P_diag.
1276                    *-----------------------------------------------------------*/
1277 
1278                   for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++)
1279                   {
1280                      i3 = P_diag_j[jj3];
1281 
1282                      /*--------------------------------------------------------
1283                       *  Check P_marker to see that RAP_{ic,i3} has not already
1284                       *  been accounted for. If it has not, mark it and increment
1285                       *  counter.
1286                       *--------------------------------------------------------*/
1287 
1288                      if (P_marker[i3] < jj_row_begin_diag)
1289                      {
1290                         P_marker[i3] = jj_count_diag;
1291                         jj_count_diag++;
1292                      }
1293                   }
1294 
1295                   /*-----------------------------------------------------------
1296                    *  Loop over entries in row i2 of P_offd.
1297                    *-----------------------------------------------------------*/
1298 
1299                   if (num_cols_offd_P)
1300                   {
1301                      for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++)
1302                      {
1303                         i3 = map_P_to_RAP[P_offd_j[jj3]] + num_cols_diag_P;
1304 
1305                         /*-----------------------------------------------------
1306                          *  Check P_marker to see that RAP_{ic,i3} has not
1307                          *  been accounted for. If it has not, mark it and
1308                          *  increment counter.
1309                          *-----------------------------------------------------*/
1310 
1311                         if (P_marker[i3] < jj_row_begin_offd)
1312                         {
1313                            P_marker[i3] = jj_count_offd;
1314                            jj_count_offd++;
1315                         }
1316                      }
1317                   }
1318                }
1319             }
1320          }
1321 
1322          /*--------------------------------------------------------------------
1323           * Set RAP_diag_i and RAP_offd_i for this row.
1324           *--------------------------------------------------------------------*/
1325       }
1326       jj_cnt_diag[ii] = jj_count_diag;
1327       jj_cnt_offd[ii] = jj_count_offd;
1328    }
1329 
1330    for (i=0; i < num_threads-1; i++)
1331    {
1332       jj_cnt_diag[i+1] += jj_cnt_diag[i];
1333       jj_cnt_offd[i+1] += jj_cnt_offd[i];
1334    }
1335 
1336    jj_count_diag = jj_cnt_diag[num_threads-1];
1337    jj_count_offd = jj_cnt_offd[num_threads-1];
1338 
1339    RAP_diag_i[num_cols_diag_P] = jj_count_diag;
1340    RAP_offd_i[num_cols_diag_P] = jj_count_offd;
1341 
1342    /*-----------------------------------------------------------------------
1343     *  Allocate RAP_diag_data and RAP_diag_j arrays.
1344     *  Allocate RAP_offd_data and RAP_offd_j arrays.
1345     *-----------------------------------------------------------------------*/
1346 
1347    RAP_diag_size = jj_count_diag;
1348    if (RAP_diag_size)
1349    {
1350       RAP_diag_data = hypre_CTAlloc(HYPRE_Complex,  RAP_diag_size*bnnz, HYPRE_MEMORY_HOST);
1351       RAP_diag_j    = hypre_CTAlloc(HYPRE_Int,  RAP_diag_size, HYPRE_MEMORY_HOST);
1352    }
1353 
1354    RAP_offd_size = jj_count_offd;
1355    if (RAP_offd_size)
1356    {
1357       RAP_offd_data = hypre_CTAlloc(HYPRE_Complex,  RAP_offd_size*bnnz, HYPRE_MEMORY_HOST);
1358       RAP_offd_j    = hypre_CTAlloc(HYPRE_Int,  RAP_offd_size, HYPRE_MEMORY_HOST);
1359    }
1360 
1361    if (RAP_offd_size == 0 && num_cols_offd_RAP != 0)
1362    {
1363       num_cols_offd_RAP = 0;
1364       hypre_TFree(col_map_offd_RAP, HYPRE_MEMORY_HOST);
1365    }
1366 
1367    /*-----------------------------------------------------------------------
1368     *  Second Pass: Fill in RAP_diag_data and RAP_diag_j.
1369     *  Second Pass: Fill in RAP_offd_data and RAP_offd_j.
1370     *-----------------------------------------------------------------------*/
1371 
1372    for (ii = 0; ii < num_threads; ii++)
1373    {
1374       size = num_cols_diag_P/num_threads;
1375       rest = num_cols_diag_P - size*num_threads;
1376       if (ii < rest)
1377       {
1378          ns = ii*size+ii;
1379          ne = (ii+1)*size+ii+1;
1380       }
1381       else
1382       {
1383          ns = ii*size+rest;
1384          ne = (ii+1)*size+rest;
1385       }
1386 
1387       /*-----------------------------------------------------------------------
1388        *  Initialize some stuff.
1389        *-----------------------------------------------------------------------*/
1390 
1391       P_marker = P_mark_array[ii];
1392       A_marker = A_mark_array[ii];
1393       for (ic = 0; ic < num_cols_diag_P+num_cols_offd_RAP; ic++)
1394       {
1395          P_marker[ic] = -1;
1396       }
1397       for (i = 0; i < num_nz_cols_A ; i++)
1398       {
1399          A_marker[i] = -1;
1400       }
1401 
1402       jj_count_diag = start_indexing;
1403       jj_count_offd = start_indexing;
1404       if (ii > 0)
1405       {
1406          jj_count_diag = jj_cnt_diag[ii-1];
1407          jj_count_offd = jj_cnt_offd[ii-1];
1408       }
1409 
1410       /*-----------------------------------------------------------------------
1411        *  Loop over interior c-points.
1412        *-----------------------------------------------------------------------*/
1413 
1414       for (ic = ns; ic < ne; ic++)
1415       {
1416 
1417          /*--------------------------------------------------------------------
1418           *  Create diagonal entry, RAP_{ic,ic} and add entries of RAP_ext
1419           *--------------------------------------------------------------------*/
1420 
1421          P_marker[ic] = jj_count_diag;
1422          jj_row_begin_diag = jj_count_diag;
1423          jj_row_begin_offd = jj_count_offd;
1424          RAP_diag_i[ic] = jj_row_begin_diag;
1425          RAP_offd_i[ic] = jj_row_begin_offd;
1426          ind = jj_count_diag * bnnz;
1427          for (kk = 0; kk < bnnz; kk++)
1428             RAP_diag_data[ind++] = zero;
1429          RAP_diag_j[jj_count_diag] = ic;
1430          jj_count_diag++;
1431 
1432          for (i=0; i < num_sends_RT; i++)
1433             for (j = send_map_starts_RT[i]; j < send_map_starts_RT[i+1]; j++)
1434                if (send_map_elmts_RT[j] == ic)
1435                {
1436                   for (k=RAP_ext_i[j]; k < RAP_ext_i[j+1]; k++)
1437                   {
1438                      jcol = (HYPRE_Int)RAP_ext_j[k];
1439                      if (jcol < num_cols_diag_P)
1440                      {
1441                         if (P_marker[jcol] < jj_row_begin_diag)
1442                         {
1443                            P_marker[jcol] = jj_count_diag;
1444                            ind = jj_count_diag * bnnz;
1445                            for (kk = 0; kk < bnnz; kk++)
1446                               RAP_diag_data[ind++] = RAP_ext_data[k*bnnz+kk];
1447                            RAP_diag_j[jj_count_diag] = jcol;
1448                            jj_count_diag++;
1449                         }
1450                         else
1451                         {
1452                            ind = P_marker[jcol] * bnnz;
1453                            for (kk = 0; kk < bnnz; kk++)
1454                               RAP_diag_data[ind++] += RAP_ext_data[k*bnnz+kk];
1455                         }
1456                      }
1457                      else
1458                      {
1459                         if (P_marker[jcol] < jj_row_begin_offd)
1460                         {
1461                            P_marker[jcol] = jj_count_offd;
1462                            ind = jj_count_offd * bnnz;
1463                            for (kk = 0; kk < bnnz; kk++)
1464                               RAP_offd_data[ind++] = RAP_ext_data[k*bnnz+kk];
1465                            RAP_offd_j[jj_count_offd]
1466                               = jcol-num_cols_diag_P;
1467                            jj_count_offd++;
1468                         }
1469                         else
1470                         {
1471                            ind = P_marker[jcol] * bnnz;
1472                            for (kk = 0; kk < bnnz; kk++)
1473                               RAP_offd_data[ind++] += RAP_ext_data[k*bnnz+kk];
1474                         }
1475                      }
1476                   }
1477                   break;
1478                }
1479 
1480          /*--------------------------------------------------------------------
1481           *  Loop over entries in row ic of R_diag.
1482           *--------------------------------------------------------------------*/
1483 
1484          for (jj1 = R_diag_i[ic]; jj1 < R_diag_i[ic+1]; jj1++)
1485          {
1486             i1  = R_diag_j[jj1];
1487             r_entries = &(R_diag_data[jj1*bnnz]);
1488 
1489             /*-----------------------------------------------------------------
1490              *  Loop over entries in row i1 of A_offd.
1491              *-----------------------------------------------------------------*/
1492 
1493             if (num_cols_offd_A)
1494             {
1495                for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
1496                {
1497                   i2 = A_offd_j[jj2];
1498                   hypre_CSRBlockMatrixBlockMultAdd(r_entries,
1499                                                    &(A_offd_data[jj2*bnnz]), zero, r_a_products,
1500                                                    block_size);
1501 
1502                   /*-----------------------------------------------------------
1503                    *  Check A_marker to see if point i2 has been previously
1504                    *  visited.New entries in RAP only occur from unmarked points.
1505                    *-----------------------------------------------------------*/
1506 
1507                   if (A_marker[i2] != ic)
1508                   {
1509                      /*--------------------------------------------------------
1510                       *  Mark i2 as visited.
1511                       *--------------------------------------------------------*/
1512 
1513                      A_marker[i2] = ic;
1514 
1515                      /*--------------------------------------------------------
1516                       *  Loop over entries in row i2 of P_ext.
1517                       *--------------------------------------------------------*/
1518 
1519                      for (jj3=P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++)
1520                      {
1521                         i3 = P_ext_diag_j[jj3];
1522                         hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
1523                                                          &(P_ext_diag_data[jj3*bnnz]), zero,
1524                                                          r_a_p_products, block_size);
1525 
1526                         /*-----------------------------------------------------
1527                          *  Check P_marker to see that RAP_{ic,i3} has not
1528                          *  been accounted for. If it has not, create a new
1529                          *  entry. If it has, add new contribution.
1530                          *-----------------------------------------------------*/
1531 
1532                         if (P_marker[i3] < jj_row_begin_diag)
1533                         {
1534                            P_marker[i3] = jj_count_diag;
1535                            ind = jj_count_diag * bnnz;
1536                            for (kk = 0; kk < bnnz; kk++)
1537                               RAP_diag_data[ind++] = r_a_p_products[kk];
1538                            RAP_diag_j[jj_count_diag] = i3;
1539                            jj_count_diag++;
1540                         }
1541                         else
1542                         {
1543                            ind = P_marker[i3] * bnnz;
1544                            for (kk = 0; kk < bnnz; kk++)
1545                               RAP_diag_data[ind++] += r_a_p_products[kk];
1546                         }
1547                      }
1548                      for (jj3=P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++)
1549                      {
1550                         i3 = map_Pext_to_RAP[P_ext_offd_j[jj3]]+num_cols_diag_P;
1551                         hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
1552                                                          &(P_ext_offd_data[jj3*bnnz]),
1553                                                          zero, r_a_p_products, block_size);
1554 
1555                         /*-----------------------------------------------------
1556                          *  Check P_marker to see that RAP_{ic,i3} has not
1557                          *  been accounted for. If it has not, create a new
1558                          *  entry. If it has, add new contribution.
1559                          *-----------------------------------------------------*/
1560                         if (P_marker[i3] < jj_row_begin_offd)
1561                         {
1562                            P_marker[i3] = jj_count_offd;
1563                            ind = jj_count_offd * bnnz;
1564                            for (kk = 0; kk < bnnz; kk++)
1565                               RAP_offd_data[ind++] = r_a_p_products[kk];
1566                            RAP_offd_j[jj_count_offd] = i3 - num_cols_diag_P;
1567                            jj_count_offd++;
1568                         }
1569                         else
1570                         {
1571                            ind = P_marker[i3] * bnnz;
1572                            for (kk = 0; kk < bnnz; kk++)
1573                               RAP_offd_data[ind++] += r_a_p_products[kk];
1574                         }
1575                      }
1576                   }
1577 
1578                   /*-----------------------------------------------------------
1579                    *  If i2 is previously visited ( A_marker[12]=ic ) it yields
1580                    *  no new entries in RAP and can just add new contributions.
1581                    *-----------------------------------------------------------*/
1582                   else
1583                   {
1584                      for (jj3=P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++)
1585                      {
1586                         i3 = P_ext_diag_j[jj3];
1587                         hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
1588                                                          &(P_ext_diag_data[jj3*bnnz]), zero,
1589                                                          r_a_p_products, block_size);
1590                         ind = P_marker[i3] * bnnz;
1591                         for (kk = 0; kk < bnnz; kk++)
1592                            RAP_diag_data[ind++] += r_a_p_products[kk];
1593                      }
1594                      for (jj3=P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++)
1595                      {
1596                         i3=map_Pext_to_RAP[P_ext_offd_j[jj3]] + num_cols_diag_P;
1597                         hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
1598                                                          &(P_ext_offd_data[jj3*bnnz]),
1599                                                          zero, r_a_p_products, block_size);
1600                         ind = P_marker[i3] * bnnz;
1601                         for (kk = 0; kk < bnnz; kk++)
1602                            RAP_offd_data[ind++] += r_a_p_products[kk];
1603                      }
1604                   }
1605                }
1606             }
1607 
1608             /*-----------------------------------------------------------------
1609              *  Loop over entries in row i1 of A_diag.
1610              *-----------------------------------------------------------------*/
1611 
1612             for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
1613             {
1614                i2 = A_diag_j[jj2];
1615                hypre_CSRBlockMatrixBlockMultAdd(r_entries,
1616                                                 &(A_diag_data[jj2*bnnz]),
1617                                                 zero, r_a_products, block_size);
1618 
1619                /*--------------------------------------------------------------
1620                 *  Check A_marker to see if point i2 has been previously
1621                 *  visited. New entries in RAP only occur from unmarked points.
1622                 *--------------------------------------------------------------*/
1623 
1624                if (A_marker[i2+num_cols_offd_A] != ic)
1625                {
1626 
1627                   /*-----------------------------------------------------------
1628                    *  Mark i2 as visited.
1629                    *-----------------------------------------------------------*/
1630 
1631                   A_marker[i2+num_cols_offd_A] = ic;
1632 
1633                   /*-----------------------------------------------------------
1634                    *  Loop over entries in row i2 of P_diag.
1635                    *-----------------------------------------------------------*/
1636 
1637                   for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++)
1638                   {
1639                      i3 = P_diag_j[jj3];
1640                      hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
1641                                                       &(P_diag_data[jj3*bnnz]),
1642                                                       zero, r_a_p_products, block_size);
1643 
1644                      /*--------------------------------------------------------
1645                       *  Check P_marker to see that RAP_{ic,i3} has not already
1646                       *  been accounted for. If it has not, create a new entry.
1647                       *  If it has, add new contribution.
1648                       *--------------------------------------------------------*/
1649 
1650                      if (P_marker[i3] < jj_row_begin_diag)
1651                      {
1652                         P_marker[i3] = jj_count_diag;
1653                         ind = jj_count_diag * bnnz;
1654                         for (kk = 0; kk < bnnz; kk++)
1655                            RAP_diag_data[ind++] = r_a_p_products[kk];
1656                         RAP_diag_j[jj_count_diag] = P_diag_j[jj3];
1657                         jj_count_diag++;
1658                      }
1659                      else
1660                      {
1661                         ind = P_marker[i3] * bnnz;
1662                         for (kk = 0; kk < bnnz; kk++)
1663                            RAP_diag_data[ind++] += r_a_p_products[kk];
1664                      }
1665                   }
1666                   if (num_cols_offd_P)
1667                   {
1668                      for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++)
1669                      {
1670                         i3 = map_P_to_RAP[P_offd_j[jj3]] + num_cols_diag_P;
1671                         hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
1672                                                          &(P_offd_data[jj3*bnnz]),
1673                                                          zero, r_a_p_products, block_size);
1674 
1675                         /*-----------------------------------------------------
1676                          *  Check P_marker to see that RAP_{ic,i3} has not already
1677                          *  been accounted for. If it has not, create a new entry.
1678                          *  If it has, add new contribution.
1679                          *-----------------------------------------------------*/
1680 
1681                         if (P_marker[i3] < jj_row_begin_offd)
1682                         {
1683                            P_marker[i3] = jj_count_offd;
1684                            ind = jj_count_offd * bnnz;
1685                            for (kk = 0; kk < bnnz; kk++)
1686                               RAP_offd_data[ind++] = r_a_p_products[kk];
1687                            RAP_offd_j[jj_count_offd] = i3 - num_cols_diag_P;
1688                            jj_count_offd++;
1689                         }
1690                         else
1691                         {
1692                            ind = P_marker[i3] * bnnz;
1693                            for (kk = 0; kk < bnnz; kk++)
1694                               RAP_offd_data[ind++] += r_a_p_products[kk];
1695                         }
1696                      }
1697                   }
1698                }
1699 
1700                /*--------------------------------------------------------------
1701                 *  If i2 is previously visited ( A_marker[12]=ic ) it yields
1702                 *  no new entries in RAP and can just add new contributions.
1703                 *--------------------------------------------------------------*/
1704 
1705                else
1706                {
1707                   for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++)
1708                   {
1709                      i3 = P_diag_j[jj3];
1710                      hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
1711                                                       &(P_diag_data[jj3*bnnz]),
1712                                                       zero, r_a_p_products, block_size);
1713                      ind = P_marker[i3] * bnnz;
1714                      for (kk = 0; kk < bnnz; kk++)
1715                         RAP_diag_data[ind++] += r_a_p_products[kk];
1716                   }
1717                   if (num_cols_offd_P)
1718                   {
1719                      for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++)
1720                      {
1721                         i3 = map_P_to_RAP[P_offd_j[jj3]] + num_cols_diag_P;
1722                         hypre_CSRBlockMatrixBlockMultAdd(r_a_products,
1723                                                          &(P_offd_data[jj3*bnnz]),
1724                                                          zero, r_a_p_products, block_size);
1725                         ind = P_marker[i3] * bnnz;
1726                         for (kk = 0; kk < bnnz; kk++)
1727                            RAP_offd_data[ind++] += r_a_p_products[kk];
1728                      }
1729                   }
1730                }
1731             }
1732          }
1733       }
1734       hypre_TFree(P_mark_array[ii], HYPRE_MEMORY_HOST);
1735       hypre_TFree(A_mark_array[ii], HYPRE_MEMORY_HOST);
1736    }
1737 
1738 
1739    row_starts = hypre_CTAlloc(HYPRE_BigInt,  2, HYPRE_MEMORY_HOST);
1740    col_starts = hypre_CTAlloc(HYPRE_BigInt,  2, HYPRE_MEMORY_HOST);
1741    for (i = 0; i <= 1; i++)
1742    {
1743       row_starts[i] = col_starts[i] = coarse_partitioning[i];
1744    }
1745 
1746    RAP = hypre_ParCSRBlockMatrixCreate(comm, block_size, n_coarse, n_coarse,
1747                                        row_starts, col_starts,
1748                                        num_cols_offd_RAP, RAP_diag_size, RAP_offd_size);
1749 
1750    RAP_diag = hypre_ParCSRBlockMatrixDiag(RAP);
1751    hypre_CSRBlockMatrixI(RAP_diag) = RAP_diag_i;
1752    if (RAP_diag_size)
1753    {
1754       hypre_CSRBlockMatrixData(RAP_diag) = RAP_diag_data;
1755       hypre_CSRBlockMatrixJ(RAP_diag) = RAP_diag_j;
1756    }
1757 
1758    RAP_offd = hypre_ParCSRBlockMatrixOffd(RAP);
1759    hypre_CSRBlockMatrixI(RAP_offd) = RAP_offd_i;
1760    if (num_cols_offd_RAP)
1761    {
1762       hypre_CSRBlockMatrixData(RAP_offd) = RAP_offd_data;
1763       hypre_CSRBlockMatrixJ(RAP_offd) = RAP_offd_j;
1764       hypre_ParCSRBlockMatrixColMapOffd(RAP) = col_map_offd_RAP;
1765    }
1766    if (num_procs > 1)
1767    {
1768       hypre_BlockMatvecCommPkgCreate(RAP);
1769    }
1770 
1771    *RAP_ptr = RAP;
1772 
1773    /*-----------------------------------------------------------------------
1774     *  Free R, P_ext and marker arrays.
1775     *-----------------------------------------------------------------------*/
1776 
1777    hypre_CSRBlockMatrixDestroy(R_diag);
1778    R_diag = NULL;
1779 
1780    if (num_cols_offd_RT)
1781    {
1782       hypre_CSRBlockMatrixDestroy(R_offd);
1783       R_offd = NULL;
1784    }
1785 
1786    if (num_sends_RT || num_recvs_RT)
1787    {
1788       hypre_CSRBlockMatrixDestroy(RAP_ext);
1789       RAP_ext = NULL;
1790    }
1791    hypre_TFree(P_mark_array, HYPRE_MEMORY_HOST);
1792    hypre_TFree(A_mark_array, HYPRE_MEMORY_HOST);
1793    hypre_TFree(P_ext_diag_i, HYPRE_MEMORY_HOST);
1794    hypre_TFree(P_ext_offd_i, HYPRE_MEMORY_HOST);
1795    hypre_TFree(jj_cnt_diag, HYPRE_MEMORY_HOST);
1796    hypre_TFree(jj_cnt_offd, HYPRE_MEMORY_HOST);
1797    if (num_cols_offd_P)
1798    {
1799       hypre_TFree(map_P_to_Pext, HYPRE_MEMORY_HOST);
1800       hypre_TFree(map_P_to_RAP, HYPRE_MEMORY_HOST);
1801    }
1802    if (num_cols_offd_Pext)
1803    {
1804       hypre_TFree(col_map_offd_Pext, HYPRE_MEMORY_HOST);
1805       hypre_TFree(map_Pext_to_RAP, HYPRE_MEMORY_HOST);
1806    }
1807    if (P_ext_diag_size)
1808    {
1809       hypre_TFree(P_ext_diag_data, HYPRE_MEMORY_HOST);
1810       hypre_TFree(P_ext_diag_j, HYPRE_MEMORY_HOST);
1811    }
1812    if (P_ext_offd_size)
1813    {
1814       hypre_TFree(P_ext_offd_data, HYPRE_MEMORY_HOST);
1815       hypre_TFree(P_ext_offd_j, HYPRE_MEMORY_HOST);
1816    }
1817 
1818    hypre_TFree(r_a_products,HYPRE_MEMORY_HOST);
1819    hypre_TFree(r_a_p_products,HYPRE_MEMORY_HOST);
1820    hypre_TFree(row_starts,HYPRE_MEMORY_HOST);
1821    hypre_TFree(col_starts,HYPRE_MEMORY_HOST);
1822 
1823    return hypre_error_flag;
1824 }
1825