1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 #include "_hypre_utilities.h"
9 #include "_hypre_parcsr_mv.h"
10 #include "_hypre_lapack.h"
11 #include "_hypre_blas.h"
12 
13 /* -----------------------------------------------------------------------------
14  * generate AFF or AFC
15  * ----------------------------------------------------------------------------- */
16 
17 HYPRE_Int
hypre_ParCSRMatrixGenerateFFFC(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,HYPRE_BigInt * cpts_starts,hypre_ParCSRMatrix * S,hypre_ParCSRMatrix ** A_FC_ptr,hypre_ParCSRMatrix ** A_FF_ptr)18 hypre_ParCSRMatrixGenerateFFFC( hypre_ParCSRMatrix  *A,
19                                 HYPRE_Int           *CF_marker,
20                                 HYPRE_BigInt        *cpts_starts,
21                                 hypre_ParCSRMatrix  *S,
22                                 hypre_ParCSRMatrix **A_FC_ptr,
23                                 hypre_ParCSRMatrix **A_FF_ptr)
24 {
25    MPI_Comm                 comm     = hypre_ParCSRMatrixComm(A);
26    HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
27    hypre_ParCSRCommPkg     *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
28    hypre_ParCSRCommHandle  *comm_handle;
29 
30    /* diag part of A */
31    hypre_CSRMatrix    *A_diag   = hypre_ParCSRMatrixDiag(A);
32    HYPRE_Complex      *A_diag_data = hypre_CSRMatrixData(A_diag);
33    HYPRE_Int          *A_diag_i = hypre_CSRMatrixI(A_diag);
34    HYPRE_Int          *A_diag_j = hypre_CSRMatrixJ(A_diag);
35    /* off-diag part of A */
36    hypre_CSRMatrix    *A_offd   = hypre_ParCSRMatrixOffd(A);
37    HYPRE_Complex      *A_offd_data = hypre_CSRMatrixData(A_offd);
38    HYPRE_Int          *A_offd_i = hypre_CSRMatrixI(A_offd);
39    HYPRE_Int          *A_offd_j = hypre_CSRMatrixJ(A_offd);
40 
41    HYPRE_Int           n_fine = hypre_CSRMatrixNumRows(A_diag);
42    HYPRE_Int           num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
43 
44    /* diag part of S */
45    hypre_CSRMatrix    *S_diag   = hypre_ParCSRMatrixDiag(S);
46    HYPRE_Int          *S_diag_i = hypre_CSRMatrixI(S_diag);
47    HYPRE_Int          *S_diag_j = hypre_CSRMatrixJ(S_diag);
48    /* off-diag part of S */
49    hypre_CSRMatrix    *S_offd   = hypre_ParCSRMatrixOffd(S);
50    HYPRE_Int          *S_offd_i = hypre_CSRMatrixI(S_offd);
51    HYPRE_Int          *S_offd_j = hypre_CSRMatrixJ(S_offd);
52 
53    hypre_ParCSRMatrix *A_FC;
54    hypre_CSRMatrix    *A_FC_diag, *A_FC_offd;
55    HYPRE_Int          *A_FC_diag_i, *A_FC_diag_j, *A_FC_offd_i, *A_FC_offd_j=NULL;
56    HYPRE_Complex      *A_FC_diag_data, *A_FC_offd_data=NULL;
57    HYPRE_Int           num_cols_offd_A_FC;
58    HYPRE_BigInt       *col_map_offd_A_FC = NULL;
59 
60    hypre_ParCSRMatrix *A_FF;
61    hypre_CSRMatrix    *A_FF_diag, *A_FF_offd;
62    HYPRE_Int          *A_FF_diag_i, *A_FF_diag_j, *A_FF_offd_i, *A_FF_offd_j;
63    HYPRE_Complex      *A_FF_diag_data, *A_FF_offd_data;
64    HYPRE_Int           num_cols_offd_A_FF;
65    HYPRE_BigInt       *col_map_offd_A_FF = NULL;
66 
67    HYPRE_Int          *fine_to_coarse;
68    HYPRE_Int          *fine_to_fine;
69    HYPRE_Int          *fine_to_coarse_offd = NULL;
70    HYPRE_Int          *fine_to_fine_offd = NULL;
71 
72    HYPRE_Int           i, j, jj;
73    HYPRE_Int           startc, index;
74    HYPRE_Int           cpt, fpt, row;
75    HYPRE_Int          *CF_marker_offd = NULL, *marker_offd=NULL;
76    HYPRE_Int          *int_buf_data = NULL;
77    HYPRE_BigInt       *big_convert;
78    HYPRE_BigInt       *big_convert_offd = NULL;
79    HYPRE_BigInt       *big_buf_data = NULL;
80 
81    HYPRE_BigInt        total_global_fpts, total_global_cpts, *fpts_starts;
82    HYPRE_Int           my_id, num_procs, num_sends;
83    HYPRE_Int           d_count_FF, d_count_FC, o_count_FF, o_count_FC;
84    HYPRE_Int           n_Fpts;
85    HYPRE_Int          *cpt_array, *fpt_array;
86    HYPRE_Int           start, stop;
87    HYPRE_Int           num_threads;
88 
89    num_threads = hypre_NumThreads();
90 
91    /* MPI size and rank*/
92    hypre_MPI_Comm_size(comm, &num_procs);
93    hypre_MPI_Comm_rank(comm, &my_id);
94 
95    fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
96    fine_to_fine = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
97    big_convert = hypre_CTAlloc(HYPRE_BigInt, n_fine, HYPRE_MEMORY_HOST);
98 
99    cpt_array = hypre_CTAlloc(HYPRE_Int, num_threads+1, HYPRE_MEMORY_HOST);
100    fpt_array = hypre_CTAlloc(HYPRE_Int, num_threads+1, HYPRE_MEMORY_HOST);
101 #ifdef HYPRE_USING_OPENMP
102 #pragma omp parallel private(i,j,jj,start,stop,row,cpt,fpt,d_count_FC,d_count_FF,o_count_FC,o_count_FF)
103 #endif
104    {
105       HYPRE_Int my_thread_num = hypre_GetThreadNum();
106 
107       start = (n_fine/num_threads)*my_thread_num;
108       if (my_thread_num == num_threads-1)
109       {
110          stop = n_fine;
111       }
112       else
113       {
114          stop = (n_fine/num_threads)*(my_thread_num+1);
115       }
116       for (i=start; i < stop; i++)
117       {
118          if (CF_marker[i] > 0)
119          {
120             cpt_array[my_thread_num+1]++;
121          }
122          else
123          {
124             fpt_array[my_thread_num+1]++;
125          }
126       }
127 
128 #ifdef HYPRE_USING_OPENMP
129 #pragma omp barrier
130 #endif
131       if (my_thread_num == 0)
132       {
133          for (i=1; i < num_threads; i++)
134          {
135             cpt_array[i+1] += cpt_array[i];
136             fpt_array[i+1] += fpt_array[i];
137          }
138       }
139 #ifdef HYPRE_USING_OPENMP
140 #pragma omp barrier
141 #endif
142 
143       cpt = cpt_array[my_thread_num];
144       fpt = fpt_array[my_thread_num];
145       for (i=start; i < stop; i++)
146       {
147          if (CF_marker[i] > 0)
148          {
149             fine_to_coarse[i] = cpt++;
150             fine_to_fine[i] = -1;
151          }
152          else
153          {
154             fine_to_fine[i] = fpt++;
155             fine_to_coarse[i] = -1;
156          }
157       }
158 #ifdef HYPRE_USING_OPENMP
159 #pragma omp barrier
160 #endif
161 
162       if (my_thread_num == 0)
163       {
164          HYPRE_BigInt big_Fpts;
165          n_Fpts = fpt_array[num_threads];
166          big_Fpts = n_Fpts;
167 
168          fpts_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
169          hypre_MPI_Scan(&big_Fpts, fpts_starts+1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
170          fpts_starts[0] = fpts_starts[1] - big_Fpts;
171          if (my_id == num_procs - 1)
172          {
173             total_global_fpts = fpts_starts[1];
174             total_global_cpts = cpts_starts[1];
175          }
176          hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
177          hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
178       }
179 #ifdef HYPRE_USING_OPENMP
180 #pragma omp barrier
181 #endif
182 
183       for (i=start; i < stop; i++)
184       {
185          if (CF_marker[i] > 0)
186          {
187             big_convert[i] = (HYPRE_BigInt)fine_to_coarse[i] + cpts_starts[0];
188          }
189          else
190          {
191             big_convert[i] = (HYPRE_BigInt)fine_to_fine[i] + fpts_starts[0];
192          }
193       }
194 
195 #ifdef HYPRE_USING_OPENMP
196 #pragma omp barrier
197 #endif
198       if (my_thread_num == 0)
199       {
200          if (num_cols_A_offd)
201          {
202             CF_marker_offd = hypre_CTAlloc(HYPRE_Int,  num_cols_A_offd, HYPRE_MEMORY_HOST);
203             big_convert_offd = hypre_CTAlloc(HYPRE_BigInt,  num_cols_A_offd, HYPRE_MEMORY_HOST);
204             fine_to_coarse_offd = hypre_CTAlloc(HYPRE_Int,  num_cols_A_offd, HYPRE_MEMORY_HOST);
205             fine_to_fine_offd = hypre_CTAlloc(HYPRE_Int,  num_cols_A_offd, HYPRE_MEMORY_HOST);
206          }
207          index = 0;
208          num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
209          int_buf_data = hypre_CTAlloc(HYPRE_Int,  hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
210          big_buf_data = hypre_CTAlloc(HYPRE_BigInt,  hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
211          for (i = 0; i < num_sends; i++)
212          {
213             startc = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
214             for (j = startc; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
215             {
216                int_buf_data[index] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
217                big_buf_data[index++] = big_convert[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
218             }
219          }
220 
221          comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data, CF_marker_offd);
222 
223          hypre_ParCSRCommHandleDestroy(comm_handle);
224 
225          comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg, big_buf_data, big_convert_offd);
226 
227          hypre_ParCSRCommHandleDestroy(comm_handle);
228 
229          marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
230          for (i = 0; i < n_fine; i++)
231          {
232             if (CF_marker[i] < 0)
233             {
234                for (j = S_offd_i[i]; j < S_offd_i[i+1]; j++)
235                {
236                   marker_offd[S_offd_j[j]] = 1;
237                }
238             }
239          }
240 
241          num_cols_offd_A_FC = 0;
242          num_cols_offd_A_FF = 0;
243          if (num_cols_A_offd)
244          {
245             for (i=0; i < num_cols_A_offd; i++)
246             {
247                if (CF_marker_offd[i] > 0 && marker_offd[i] > 0)
248                {
249                   fine_to_coarse_offd[i] = num_cols_offd_A_FC++;
250                   fine_to_fine_offd[i] = -1;
251                }
252                else if (CF_marker_offd[i] < 0 && marker_offd[i] > 0)
253                {
254                   fine_to_fine_offd[i] = num_cols_offd_A_FF++;
255                   fine_to_coarse_offd[i] = -1;
256                }
257             }
258 
259             col_map_offd_A_FF = hypre_TAlloc(HYPRE_BigInt, num_cols_offd_A_FF, HYPRE_MEMORY_HOST);
260             col_map_offd_A_FC = hypre_TAlloc(HYPRE_BigInt, num_cols_offd_A_FC, HYPRE_MEMORY_HOST);
261 
262             cpt = 0;
263             fpt = 0;
264             for (i=0; i < num_cols_A_offd; i++)
265             {
266                if (CF_marker_offd[i] > 0 && marker_offd[i] > 0)
267                {
268                   col_map_offd_A_FC[cpt++] = big_convert_offd[i];
269                }
270                else if (CF_marker_offd[i] < 0 && marker_offd[i] > 0)
271                {
272                   col_map_offd_A_FF[fpt++] = big_convert_offd[i];
273                }
274             }
275          }
276 
277          A_FF_diag_i = hypre_CTAlloc(HYPRE_Int,n_Fpts+1, memory_location_P);
278          A_FC_diag_i = hypre_CTAlloc(HYPRE_Int,n_Fpts+1, memory_location_P);
279          A_FF_offd_i = hypre_CTAlloc(HYPRE_Int,n_Fpts+1, memory_location_P);
280          A_FC_offd_i = hypre_CTAlloc(HYPRE_Int,n_Fpts+1, memory_location_P);
281       }
282 
283 #ifdef HYPRE_USING_OPENMP
284 #pragma omp barrier
285 #endif
286       d_count_FC = 0;
287       d_count_FF = 0;
288       o_count_FC = 0;
289       o_count_FF = 0;
290       row = fpt_array[my_thread_num];
291       for (i=start; i < stop; i++)
292       {
293          if (CF_marker[i] < 0)
294          {
295             row++;
296             d_count_FF++; /* account for diagonal element */
297             for (j = S_diag_i[i]; j < S_diag_i[i+1]; j++)
298             {
299                jj = S_diag_j[j];
300                if (CF_marker[jj] > 0)
301                   d_count_FC++;
302                else
303                   d_count_FF++;
304             }
305             A_FF_diag_i[row] = d_count_FF;
306             A_FC_diag_i[row] = d_count_FC;
307             for (j = S_offd_i[i]; j < S_offd_i[i+1]; j++)
308             {
309                jj = S_offd_j[j];
310                if (CF_marker_offd[jj] > 0)
311                   o_count_FC++;
312                else
313                   o_count_FF++;
314             }
315             A_FF_offd_i[row] = o_count_FF;
316             A_FC_offd_i[row] = o_count_FC;
317          }
318       }
319 
320 #ifdef HYPRE_USING_OPENMP
321 #pragma omp barrier
322 #endif
323       if (my_thread_num == 0)
324       {
325          HYPRE_Int fpt2;
326          for (i=1; i<num_threads+1; i++)
327          {
328             fpt = fpt_array[i];
329             fpt2 = fpt_array[i-1];
330 
331             if (fpt == fpt2)
332             {
333                continue;
334             }
335 
336             A_FC_diag_i[fpt] += A_FC_diag_i[fpt2];
337             A_FF_diag_i[fpt] += A_FF_diag_i[fpt2];
338             A_FC_offd_i[fpt] += A_FC_offd_i[fpt2];
339             A_FF_offd_i[fpt] += A_FF_offd_i[fpt2];
340          }
341          row = fpt_array[num_threads];
342          d_count_FC = A_FC_diag_i[row];
343          d_count_FF = A_FF_diag_i[row];
344          o_count_FC = A_FC_offd_i[row];
345          o_count_FF = A_FF_offd_i[row];
346          A_FF_diag_j = hypre_CTAlloc(HYPRE_Int,d_count_FF, memory_location_P);
347          A_FC_diag_j = hypre_CTAlloc(HYPRE_Int,d_count_FC, memory_location_P);
348          A_FF_offd_j = hypre_CTAlloc(HYPRE_Int,o_count_FF, memory_location_P);
349          A_FC_offd_j = hypre_CTAlloc(HYPRE_Int,o_count_FC, memory_location_P);
350          A_FF_diag_data = hypre_CTAlloc(HYPRE_Real,d_count_FF, memory_location_P);
351          A_FC_diag_data = hypre_CTAlloc(HYPRE_Real,d_count_FC, memory_location_P);
352          A_FF_offd_data = hypre_CTAlloc(HYPRE_Real,o_count_FF, memory_location_P);
353          A_FC_offd_data = hypre_CTAlloc(HYPRE_Real,o_count_FC, memory_location_P);
354       }
355 
356 #ifdef HYPRE_USING_OPENMP
357 #pragma omp barrier
358 #endif
359       row = fpt_array[my_thread_num];
360       d_count_FC = A_FC_diag_i[row];
361       d_count_FF = A_FF_diag_i[row];
362       o_count_FC = A_FC_offd_i[row];
363       o_count_FF = A_FF_offd_i[row];
364       for (i=start; i < stop; i++)
365       {
366          if (CF_marker[i] < 0)
367          {
368             HYPRE_Int jS, jA;
369             row++;
370             jA = A_diag_i[i];
371             A_FF_diag_j[d_count_FF] = fine_to_fine[A_diag_j[jA]];
372             A_FF_diag_data[d_count_FF++] = A_diag_data[jA++];
373             for (j = S_diag_i[i]; j < S_diag_i[i+1]; j++)
374             {
375                jA = A_diag_i[i]+1;
376                jS = S_diag_j[j];
377                while (A_diag_j[jA] != jS) jA++;
378                if (CF_marker[S_diag_j[j]] > 0)
379                {
380                   A_FC_diag_j[d_count_FC] = fine_to_coarse[A_diag_j[jA]];
381                   A_FC_diag_data[d_count_FC++] = A_diag_data[jA++];
382                }
383                else
384                {
385                   A_FF_diag_j[d_count_FF] = fine_to_fine[A_diag_j[jA]];
386                   A_FF_diag_data[d_count_FF++] = A_diag_data[jA++];
387                }
388             }
389             A_FF_diag_i[row] = d_count_FF;
390             A_FC_diag_i[row] = d_count_FC;
391             for (j = S_offd_i[i]; j < S_offd_i[i+1]; j++)
392             {
393                jA = A_offd_i[i];
394                jS = S_offd_j[j];
395                while (jS != A_offd_j[jA]) jA++;
396                if (CF_marker_offd[S_offd_j[j]] > 0)
397                {
398                   A_FC_offd_j[o_count_FC] = fine_to_coarse_offd[A_offd_j[jA]];
399                   A_FC_offd_data[o_count_FC++] = A_offd_data[jA++];
400                }
401                else
402                {
403                   A_FF_offd_j[o_count_FF] = fine_to_fine_offd[A_offd_j[jA]];
404                   A_FF_offd_data[o_count_FF++] = A_offd_data[jA++];
405                }
406             }
407             A_FF_offd_i[row] = o_count_FF;
408             A_FC_offd_i[row] = o_count_FC;
409          }
410       }
411    } /*end parallel region */
412 
413    A_FC = hypre_ParCSRMatrixCreate(comm,
414             total_global_fpts,
415             total_global_cpts,
416             fpts_starts,
417             cpts_starts,
418             num_cols_offd_A_FC,
419             A_FC_diag_i[n_Fpts],
420             A_FC_offd_i[n_Fpts]);
421 
422    A_FF = hypre_ParCSRMatrixCreate(comm,
423             total_global_fpts,
424             total_global_fpts,
425             fpts_starts,
426             fpts_starts,
427             num_cols_offd_A_FF,
428             A_FF_diag_i[n_Fpts],
429             A_FF_offd_i[n_Fpts]);
430 
431    A_FC_diag = hypre_ParCSRMatrixDiag(A_FC);
432    hypre_CSRMatrixData(A_FC_diag) = A_FC_diag_data;
433    hypre_CSRMatrixI(A_FC_diag) = A_FC_diag_i;
434    hypre_CSRMatrixJ(A_FC_diag) = A_FC_diag_j;
435    A_FC_offd = hypre_ParCSRMatrixOffd(A_FC);
436    hypre_CSRMatrixData(A_FC_offd) = A_FC_offd_data;
437    hypre_CSRMatrixI(A_FC_offd) = A_FC_offd_i;
438    hypre_CSRMatrixJ(A_FC_offd) = A_FC_offd_j;
439    hypre_ParCSRMatrixColMapOffd(A_FC) = col_map_offd_A_FC;
440 
441    hypre_CSRMatrixMemoryLocation(A_FC_diag) = memory_location_P;
442    hypre_CSRMatrixMemoryLocation(A_FC_offd) = memory_location_P;
443 
444    A_FF_diag = hypre_ParCSRMatrixDiag(A_FF);
445    hypre_CSRMatrixData(A_FF_diag) = A_FF_diag_data;
446    hypre_CSRMatrixI(A_FF_diag) = A_FF_diag_i;
447    hypre_CSRMatrixJ(A_FF_diag) = A_FF_diag_j;
448    A_FF_offd = hypre_ParCSRMatrixOffd(A_FF);
449    hypre_CSRMatrixData(A_FF_offd) = A_FF_offd_data;
450    hypre_CSRMatrixI(A_FF_offd) = A_FF_offd_i;
451    hypre_CSRMatrixJ(A_FF_offd) = A_FF_offd_j;
452    hypre_ParCSRMatrixColMapOffd(A_FF) = col_map_offd_A_FF;
453 
454    hypre_CSRMatrixMemoryLocation(A_FF_diag) = memory_location_P;
455    hypre_CSRMatrixMemoryLocation(A_FF_offd) = memory_location_P;
456 
457    hypre_TFree(fine_to_coarse, HYPRE_MEMORY_HOST);
458    hypre_TFree(fine_to_fine, HYPRE_MEMORY_HOST);
459    hypre_TFree(big_convert, HYPRE_MEMORY_HOST);
460    hypre_TFree(fine_to_coarse_offd, HYPRE_MEMORY_HOST);
461    hypre_TFree(fine_to_fine_offd, HYPRE_MEMORY_HOST);
462    hypre_TFree(big_convert_offd, HYPRE_MEMORY_HOST);
463    hypre_TFree(CF_marker_offd, HYPRE_MEMORY_HOST);
464    hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
465    hypre_TFree(big_buf_data, HYPRE_MEMORY_HOST);
466    hypre_TFree(marker_offd, HYPRE_MEMORY_HOST);
467    hypre_TFree(cpt_array, HYPRE_MEMORY_HOST);
468    hypre_TFree(fpt_array, HYPRE_MEMORY_HOST);
469    hypre_TFree(fpts_starts, HYPRE_MEMORY_HOST);
470 
471    *A_FC_ptr = A_FC;
472    *A_FF_ptr = A_FF;
473 
474    return hypre_error_flag;
475 }
476 
477 /* -----------------------------------------------------------------------------
478  * generate AFF, AFC, for 2 stage extended interpolation
479  * ----------------------------------------------------------------------------- */
480 
481 HYPRE_Int
hypre_ParCSRMatrixGenerateFFFC3(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,HYPRE_BigInt * cpts_starts,hypre_ParCSRMatrix * S,hypre_ParCSRMatrix ** A_FC_ptr,hypre_ParCSRMatrix ** A_FF_ptr)482 hypre_ParCSRMatrixGenerateFFFC3( hypre_ParCSRMatrix  *A,
483                                  HYPRE_Int           *CF_marker,
484                                  HYPRE_BigInt        *cpts_starts,
485                                  hypre_ParCSRMatrix  *S,
486                                  hypre_ParCSRMatrix **A_FC_ptr,
487                                  hypre_ParCSRMatrix **A_FF_ptr)
488 {
489    MPI_Comm                 comm     = hypre_ParCSRMatrixComm(A);
490    HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
491    hypre_ParCSRCommPkg     *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
492    hypre_ParCSRCommHandle  *comm_handle;
493 
494    /* diag part of A */
495    hypre_CSRMatrix    *A_diag   = hypre_ParCSRMatrixDiag(A);
496    HYPRE_Complex      *A_diag_data = hypre_CSRMatrixData(A_diag);
497    HYPRE_Int          *A_diag_i = hypre_CSRMatrixI(A_diag);
498    HYPRE_Int          *A_diag_j = hypre_CSRMatrixJ(A_diag);
499    /* off-diag part of A */
500    hypre_CSRMatrix    *A_offd   = hypre_ParCSRMatrixOffd(A);
501    HYPRE_Complex      *A_offd_data = hypre_CSRMatrixData(A_offd);
502    HYPRE_Int          *A_offd_i = hypre_CSRMatrixI(A_offd);
503    HYPRE_Int          *A_offd_j = hypre_CSRMatrixJ(A_offd);
504 
505    HYPRE_Int           n_fine = hypre_CSRMatrixNumRows(A_diag);
506    HYPRE_Int           num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
507 
508    /* diag part of S */
509    hypre_CSRMatrix    *S_diag   = hypre_ParCSRMatrixDiag(S);
510    HYPRE_Int          *S_diag_i = hypre_CSRMatrixI(S_diag);
511    HYPRE_Int          *S_diag_j = hypre_CSRMatrixJ(S_diag);
512    /* off-diag part of S */
513    hypre_CSRMatrix    *S_offd   = hypre_ParCSRMatrixOffd(S);
514    HYPRE_Int          *S_offd_i = hypre_CSRMatrixI(S_offd);
515    HYPRE_Int          *S_offd_j = hypre_CSRMatrixJ(S_offd);
516 
517    hypre_ParCSRMatrix *A_FC;
518    hypre_CSRMatrix    *A_FC_diag, *A_FC_offd;
519    HYPRE_Int          *A_FC_diag_i, *A_FC_diag_j, *A_FC_offd_i, *A_FC_offd_j=NULL;
520    HYPRE_Complex      *A_FC_diag_data, *A_FC_offd_data=NULL;
521    HYPRE_Int           num_cols_offd_A_FC;
522    HYPRE_BigInt       *col_map_offd_A_FC = NULL;
523 
524    hypre_ParCSRMatrix *A_FF;
525    hypre_CSRMatrix    *A_FF_diag, *A_FF_offd;
526    HYPRE_Int          *A_FF_diag_i, *A_FF_diag_j, *A_FF_offd_i, *A_FF_offd_j;
527    HYPRE_Complex      *A_FF_diag_data, *A_FF_offd_data;
528    HYPRE_Int           num_cols_offd_A_FF;
529    HYPRE_BigInt       *col_map_offd_A_FF = NULL;
530 
531    HYPRE_Int          *fine_to_coarse;
532    HYPRE_Int          *fine_to_fine;
533    HYPRE_Int          *fine_to_coarse_offd = NULL;
534    HYPRE_Int          *fine_to_fine_offd = NULL;
535 
536    HYPRE_Int           i, j, jj;
537    HYPRE_Int           startc, index;
538    HYPRE_Int           cpt, fpt, new_fpt, row, rowc;
539    HYPRE_Int          *CF_marker_offd = NULL;
540    HYPRE_Int          *int_buf_data = NULL;
541    HYPRE_BigInt       *big_convert;
542    HYPRE_BigInt       *big_convert_offd = NULL;
543    HYPRE_BigInt       *big_buf_data = NULL;
544 
545    HYPRE_BigInt        total_global_fpts, total_global_cpts, total_global_new_fpts;
546    HYPRE_BigInt       *fpts_starts, *new_fpts_starts;
547    HYPRE_Int           my_id, num_procs, num_sends;
548    HYPRE_Int           d_count_FF, d_count_FC, o_count_FF, o_count_FC;
549    HYPRE_Int           n_Fpts;
550    HYPRE_Int           n_new_Fpts;
551    HYPRE_Int          *cpt_array, *fpt_array, *new_fpt_array;
552    HYPRE_Int           start, stop;
553    HYPRE_Int           num_threads;
554 
555    num_threads = hypre_NumThreads();
556 
557    /* MPI size and rank*/
558    hypre_MPI_Comm_size(comm, &num_procs);
559    hypre_MPI_Comm_rank(comm, &my_id);
560 
561    fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
562    fine_to_fine = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
563    big_convert = hypre_CTAlloc(HYPRE_BigInt, n_fine, HYPRE_MEMORY_HOST);
564 
565    cpt_array = hypre_CTAlloc(HYPRE_Int, num_threads+1, HYPRE_MEMORY_HOST);
566    fpt_array = hypre_CTAlloc(HYPRE_Int, num_threads+1, HYPRE_MEMORY_HOST);
567    new_fpt_array = hypre_CTAlloc(HYPRE_Int, num_threads+1, HYPRE_MEMORY_HOST);
568 #ifdef HYPRE_USING_OPENMP
569 #pragma omp parallel private(i,j,jj,start,stop,row,rowc,cpt,new_fpt,fpt,d_count_FC,d_count_FF,o_count_FC,o_count_FF)
570 #endif
571    {
572       HYPRE_Int my_thread_num = hypre_GetThreadNum();
573 
574       start = (n_fine/num_threads)*my_thread_num;
575       if (my_thread_num == num_threads-1)
576       {
577          stop = n_fine;
578       }
579       else
580       {
581          stop = (n_fine/num_threads)*(my_thread_num+1);
582       }
583       for (i=start; i < stop; i++)
584       {
585          if (CF_marker[i] > 0)
586          {
587             cpt_array[my_thread_num+1]++;
588          }
589          else if (CF_marker[i] == -2)
590          {
591             new_fpt_array[my_thread_num+1]++;
592             fpt_array[my_thread_num+1]++;
593          }
594          else
595          {
596             fpt_array[my_thread_num+1]++;
597          }
598       }
599 
600 #ifdef HYPRE_USING_OPENMP
601 #pragma omp barrier
602 #endif
603       if (my_thread_num == 0)
604       {
605          for (i=1; i < num_threads; i++)
606          {
607             cpt_array[i+1] += cpt_array[i];
608             fpt_array[i+1] += fpt_array[i];
609             new_fpt_array[i+1] += new_fpt_array[i];
610          }
611       }
612 #ifdef HYPRE_USING_OPENMP
613 #pragma omp barrier
614 #endif
615 
616       cpt = cpt_array[my_thread_num];
617       fpt = fpt_array[my_thread_num];
618       for (i=start; i < stop; i++)
619       {
620          if (CF_marker[i] > 0)
621          {
622             fine_to_coarse[i] = cpt++;
623             fine_to_fine[i] = -1;
624          }
625          else
626          {
627             fine_to_fine[i] = fpt++;
628             fine_to_coarse[i] = -1;
629          }
630       }
631 #ifdef HYPRE_USING_OPENMP
632 #pragma omp barrier
633 #endif
634 
635       if (my_thread_num == 0)
636       {
637          HYPRE_BigInt big_Fpts, big_new_Fpts;
638          n_Fpts = fpt_array[num_threads];
639          n_new_Fpts = new_fpt_array[num_threads];
640          big_Fpts = n_Fpts;
641          big_new_Fpts = n_new_Fpts;
642 
643          fpts_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
644          new_fpts_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
645          hypre_MPI_Scan(&big_Fpts, fpts_starts+1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
646          hypre_MPI_Scan(&big_new_Fpts, new_fpts_starts+1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
647          fpts_starts[0] = fpts_starts[1] - big_Fpts;
648          new_fpts_starts[0] = new_fpts_starts[1] - big_new_Fpts;
649          if (my_id == num_procs - 1)
650          {
651             total_global_new_fpts = new_fpts_starts[1];
652             total_global_fpts = fpts_starts[1];
653             total_global_cpts = cpts_starts[1];
654          }
655          hypre_MPI_Bcast(&total_global_new_fpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
656          hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
657          hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
658       }
659 #ifdef HYPRE_USING_OPENMP
660 #pragma omp barrier
661 #endif
662 
663       for (i=start; i < stop; i++)
664       {
665          if (CF_marker[i] > 0)
666          {
667             big_convert[i] = (HYPRE_BigInt)fine_to_coarse[i] + cpts_starts[0];
668          }
669          else
670          {
671             big_convert[i] = (HYPRE_BigInt)fine_to_fine[i] + fpts_starts[0];
672          }
673       }
674 
675 #ifdef HYPRE_USING_OPENMP
676 #pragma omp barrier
677 #endif
678       if (my_thread_num == 0)
679       {
680          if (num_cols_A_offd)
681          {
682             CF_marker_offd = hypre_CTAlloc(HYPRE_Int,  num_cols_A_offd, HYPRE_MEMORY_HOST);
683             big_convert_offd = hypre_CTAlloc(HYPRE_BigInt,  num_cols_A_offd, HYPRE_MEMORY_HOST);
684             fine_to_coarse_offd = hypre_CTAlloc(HYPRE_Int,  num_cols_A_offd, HYPRE_MEMORY_HOST);
685             fine_to_fine_offd = hypre_CTAlloc(HYPRE_Int,  num_cols_A_offd, HYPRE_MEMORY_HOST);
686          }
687          index = 0;
688          num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
689          int_buf_data = hypre_CTAlloc(HYPRE_Int,  hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
690          big_buf_data = hypre_CTAlloc(HYPRE_BigInt,  hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
691          for (i = 0; i < num_sends; i++)
692          {
693             startc = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
694             for (j = startc; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
695             {
696                int_buf_data[index] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
697                big_buf_data[index++] = big_convert[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
698             }
699          }
700 
701          comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data, CF_marker_offd);
702 
703          hypre_ParCSRCommHandleDestroy(comm_handle);
704 
705          comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg, big_buf_data, big_convert_offd);
706 
707          hypre_ParCSRCommHandleDestroy(comm_handle);
708 
709          num_cols_offd_A_FC = 0;
710          num_cols_offd_A_FF = 0;
711          if (num_cols_A_offd)
712          {
713             for (i=0; i < num_cols_A_offd; i++)
714             {
715                if (CF_marker_offd[i] > 0)
716                {
717                   fine_to_coarse_offd[i] = num_cols_offd_A_FC++;
718                   fine_to_fine_offd[i] = -1;
719                }
720                else
721                {
722                   fine_to_fine_offd[i] = num_cols_offd_A_FF++;
723                   fine_to_coarse_offd[i] = -1;
724                }
725             }
726 
727             col_map_offd_A_FF = hypre_TAlloc(HYPRE_BigInt, num_cols_offd_A_FF, HYPRE_MEMORY_HOST);
728             col_map_offd_A_FC = hypre_TAlloc(HYPRE_BigInt, num_cols_offd_A_FC, HYPRE_MEMORY_HOST);
729 
730             cpt = 0;
731             fpt = 0;
732             for (i=0; i < num_cols_A_offd; i++)
733             {
734                if (CF_marker_offd[i] > 0)
735                {
736                   col_map_offd_A_FC[cpt++] = big_convert_offd[i];
737                }
738                else
739                {
740                    col_map_offd_A_FF[fpt++] = big_convert_offd[i];
741                }
742             }
743          }
744 
745          A_FF_diag_i = hypre_CTAlloc(HYPRE_Int,n_new_Fpts+1, memory_location_P);
746          A_FC_diag_i = hypre_CTAlloc(HYPRE_Int,n_Fpts+1, memory_location_P);
747          A_FF_offd_i = hypre_CTAlloc(HYPRE_Int,n_new_Fpts+1, memory_location_P);
748          A_FC_offd_i = hypre_CTAlloc(HYPRE_Int,n_Fpts+1, memory_location_P);
749       }
750 
751 #ifdef HYPRE_USING_OPENMP
752 #pragma omp barrier
753 #endif
754       d_count_FC = 0;
755       d_count_FF = 0;
756       o_count_FC = 0;
757       o_count_FF = 0;
758       row = new_fpt_array[my_thread_num];
759       rowc = fpt_array[my_thread_num];
760       for (i=start; i < stop; i++)
761       {
762          if (CF_marker[i] == -2)
763          {
764             row++;
765             rowc++;
766             d_count_FF++; /* account for diagonal element */
767             for (j = S_diag_i[i]; j < S_diag_i[i+1]; j++)
768             {
769                jj = S_diag_j[j];
770                if (CF_marker[jj] > 0)
771                   d_count_FC++;
772                else
773                   d_count_FF++;
774             }
775             A_FF_diag_i[row] = d_count_FF;
776             A_FC_diag_i[rowc] = d_count_FC;
777             for (j = S_offd_i[i]; j < S_offd_i[i+1]; j++)
778             {
779                jj = S_offd_j[j];
780                if (CF_marker_offd[jj] > 0)
781                   o_count_FC++;
782                else
783                   o_count_FF++;
784             }
785             A_FF_offd_i[row] = o_count_FF;
786             A_FC_offd_i[rowc] = o_count_FC;
787          }
788          else if (CF_marker[i] < 0)
789          {
790             rowc++;
791             for (j = S_diag_i[i]; j < S_diag_i[i+1]; j++)
792             {
793                jj = S_diag_j[j];
794                if (CF_marker[jj] > 0)
795                   d_count_FC++;
796             }
797             A_FC_diag_i[rowc] = d_count_FC;
798             for (j = S_offd_i[i]; j < S_offd_i[i+1]; j++)
799             {
800                jj = S_offd_j[j];
801                if (CF_marker_offd[jj] > 0)
802                   o_count_FC++;
803             }
804             A_FC_offd_i[rowc] = o_count_FC;
805          }
806       }
807 
808 #ifdef HYPRE_USING_OPENMP
809 #pragma omp barrier
810 #endif
811       if (my_thread_num == 0)
812       {
813          HYPRE_Int fpt2, new_fpt2;
814          for (i=1; i<num_threads+1; i++)
815          {
816             fpt = fpt_array[i];
817             new_fpt = new_fpt_array[i];
818             fpt2 = fpt_array[i-1];
819             new_fpt2 = new_fpt_array[i-1];
820             if (new_fpt != new_fpt2)
821             {
822                A_FF_diag_i[new_fpt] += A_FF_diag_i[new_fpt2];
823                A_FF_offd_i[new_fpt] += A_FF_offd_i[new_fpt2];
824             }
825             if (fpt != fpt2)
826             {
827                A_FC_diag_i[fpt] += A_FC_diag_i[fpt2];
828                A_FC_offd_i[fpt] += A_FC_offd_i[fpt2];
829             }
830          }
831          row = new_fpt_array[num_threads];
832          rowc = fpt_array[num_threads];
833          d_count_FC = A_FC_diag_i[rowc];
834          d_count_FF = A_FF_diag_i[row];
835          o_count_FC = A_FC_offd_i[rowc];
836          o_count_FF = A_FF_offd_i[row];
837          A_FF_diag_j = hypre_CTAlloc(HYPRE_Int,d_count_FF, memory_location_P);
838          A_FC_diag_j = hypre_CTAlloc(HYPRE_Int,d_count_FC, memory_location_P);
839          A_FF_offd_j = hypre_CTAlloc(HYPRE_Int,o_count_FF, memory_location_P);
840          A_FC_offd_j = hypre_CTAlloc(HYPRE_Int,o_count_FC, memory_location_P);
841          A_FF_diag_data = hypre_CTAlloc(HYPRE_Real,d_count_FF, memory_location_P);
842          A_FC_diag_data = hypre_CTAlloc(HYPRE_Real,d_count_FC, memory_location_P);
843          A_FF_offd_data = hypre_CTAlloc(HYPRE_Real,o_count_FF, memory_location_P);
844          A_FC_offd_data = hypre_CTAlloc(HYPRE_Real,o_count_FC, memory_location_P);
845       }
846 
847 #ifdef HYPRE_USING_OPENMP
848 #pragma omp barrier
849 #endif
850       row = new_fpt_array[my_thread_num];
851       rowc = fpt_array[my_thread_num];
852       d_count_FC = A_FC_diag_i[rowc];
853       d_count_FF = A_FF_diag_i[row];
854       o_count_FC = A_FC_offd_i[rowc];
855       o_count_FF = A_FF_offd_i[row];
856       for (i=start; i < stop; i++)
857       {
858          if (CF_marker[i] == -2)
859          {
860             HYPRE_Int jS, jA;
861             row++;
862             rowc++;
863             jA = A_diag_i[i];
864             A_FF_diag_j[d_count_FF] = fine_to_fine[A_diag_j[jA]];
865             A_FF_diag_data[d_count_FF++] = A_diag_data[jA++];
866             for (j = S_diag_i[i]; j < S_diag_i[i+1]; j++)
867             {
868                jA = A_diag_i[i]+1;
869                jS = S_diag_j[j];
870                while (A_diag_j[jA] != jS) jA++;
871                if (CF_marker[S_diag_j[j]] > 0)
872                {
873                   A_FC_diag_j[d_count_FC] = fine_to_coarse[A_diag_j[jA]];
874                   A_FC_diag_data[d_count_FC++] = A_diag_data[jA++];
875                }
876                else
877                {
878                   A_FF_diag_j[d_count_FF] = fine_to_fine[A_diag_j[jA]];
879                   A_FF_diag_data[d_count_FF++] = A_diag_data[jA++];
880                }
881             }
882             A_FF_diag_i[row] = d_count_FF;
883             A_FC_diag_i[rowc] = d_count_FC;
884             for (j = S_offd_i[i]; j < S_offd_i[i+1]; j++)
885             {
886                jA = A_offd_i[i];
887                jS = S_offd_j[j];
888                while (jS != A_offd_j[jA]) jA++;
889                if (CF_marker_offd[S_offd_j[j]] > 0)
890                {
891                   A_FC_offd_j[o_count_FC] = fine_to_coarse_offd[A_offd_j[jA]];
892                   A_FC_offd_data[o_count_FC++] = A_offd_data[jA++];
893                }
894                else
895                {
896                   A_FF_offd_j[o_count_FF] = fine_to_fine_offd[A_offd_j[jA]];
897                   A_FF_offd_data[o_count_FF++] = A_offd_data[jA++];
898                }
899             }
900             A_FF_offd_i[row] = o_count_FF;
901             A_FC_offd_i[rowc] = o_count_FC;
902          }
903          else if (CF_marker[i] < 0)
904          {
905             HYPRE_Int jS, jA;
906             rowc++;
907             for (j = S_diag_i[i]; j < S_diag_i[i+1]; j++)
908             {
909                jA = A_diag_i[i]+1;
910                jS = S_diag_j[j];
911                while (A_diag_j[jA] != jS) jA++;
912                if (CF_marker[S_diag_j[j]] > 0)
913                {
914                   A_FC_diag_j[d_count_FC] = fine_to_coarse[A_diag_j[jA]];
915                   A_FC_diag_data[d_count_FC++] = A_diag_data[jA++];
916                }
917             }
918             A_FC_diag_i[rowc] = d_count_FC;
919             for (j = S_offd_i[i]; j < S_offd_i[i+1]; j++)
920             {
921                jA = A_offd_i[i];
922                jS = S_offd_j[j];
923                while (jS != A_offd_j[jA]) jA++;
924                if (CF_marker_offd[S_offd_j[j]] > 0)
925                {
926                   A_FC_offd_j[o_count_FC] = fine_to_coarse_offd[A_offd_j[jA]];
927                   A_FC_offd_data[o_count_FC++] = A_offd_data[jA++];
928                }
929             }
930             A_FC_offd_i[rowc] = o_count_FC;
931          }
932       }
933    } /*end parallel region */
934 
935    A_FC = hypre_ParCSRMatrixCreate(comm,
936             total_global_fpts,
937             total_global_cpts,
938             fpts_starts,
939             cpts_starts,
940             num_cols_offd_A_FC,
941             A_FC_diag_i[n_Fpts],
942             A_FC_offd_i[n_Fpts]);
943 
944    A_FF = hypre_ParCSRMatrixCreate(comm,
945             total_global_new_fpts,
946             total_global_fpts,
947             new_fpts_starts,
948             fpts_starts,
949             num_cols_offd_A_FF,
950             A_FF_diag_i[n_new_Fpts],
951             A_FF_offd_i[n_new_Fpts]);
952 
953    A_FC_diag = hypre_ParCSRMatrixDiag(A_FC);
954    hypre_CSRMatrixData(A_FC_diag) = A_FC_diag_data;
955    hypre_CSRMatrixI(A_FC_diag) = A_FC_diag_i;
956    hypre_CSRMatrixJ(A_FC_diag) = A_FC_diag_j;
957    A_FC_offd = hypre_ParCSRMatrixOffd(A_FC);
958    hypre_CSRMatrixData(A_FC_offd) = A_FC_offd_data;
959    hypre_CSRMatrixI(A_FC_offd) = A_FC_offd_i;
960    hypre_CSRMatrixJ(A_FC_offd) = A_FC_offd_j;
961    hypre_ParCSRMatrixColMapOffd(A_FC) = col_map_offd_A_FC;
962 
963    hypre_CSRMatrixMemoryLocation(A_FC_diag) = memory_location_P;
964    hypre_CSRMatrixMemoryLocation(A_FC_offd) = memory_location_P;
965 
966    A_FF_diag = hypre_ParCSRMatrixDiag(A_FF);
967    hypre_CSRMatrixData(A_FF_diag) = A_FF_diag_data;
968    hypre_CSRMatrixI(A_FF_diag) = A_FF_diag_i;
969    hypre_CSRMatrixJ(A_FF_diag) = A_FF_diag_j;
970    A_FF_offd = hypre_ParCSRMatrixOffd(A_FF);
971    hypre_CSRMatrixData(A_FF_offd) = A_FF_offd_data;
972    hypre_CSRMatrixI(A_FF_offd) = A_FF_offd_i;
973    hypre_CSRMatrixJ(A_FF_offd) = A_FF_offd_j;
974    hypre_ParCSRMatrixColMapOffd(A_FF) = col_map_offd_A_FF;
975 
976    hypre_CSRMatrixMemoryLocation(A_FF_diag) = memory_location_P;
977    hypre_CSRMatrixMemoryLocation(A_FF_offd) = memory_location_P;
978 
979    hypre_TFree(fine_to_coarse, HYPRE_MEMORY_HOST);
980    hypre_TFree(fine_to_fine, HYPRE_MEMORY_HOST);
981    hypre_TFree(big_convert, HYPRE_MEMORY_HOST);
982    hypre_TFree(fine_to_coarse_offd, HYPRE_MEMORY_HOST);
983    hypre_TFree(fine_to_fine_offd, HYPRE_MEMORY_HOST);
984    hypre_TFree(big_convert_offd, HYPRE_MEMORY_HOST);
985    hypre_TFree(CF_marker_offd, HYPRE_MEMORY_HOST);
986    hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
987    hypre_TFree(big_buf_data, HYPRE_MEMORY_HOST);
988 
989    hypre_TFree(cpt_array, HYPRE_MEMORY_HOST);
990    hypre_TFree(fpt_array, HYPRE_MEMORY_HOST);
991    hypre_TFree(new_fpt_array, HYPRE_MEMORY_HOST);
992    hypre_TFree(fpts_starts, HYPRE_MEMORY_HOST);
993    hypre_TFree(new_fpts_starts, HYPRE_MEMORY_HOST);
994 
995    *A_FC_ptr = A_FC;
996    *A_FF_ptr = A_FF;
997 
998    return hypre_error_flag;
999 }
1000 /* -----------------------------------------------------------------------------
1001  * generate AFF, AFC, AFFC for 2 stage extended+i(e)interpolation
1002  * ----------------------------------------------------------------------------- */
1003 
1004 HYPRE_Int
hypre_ParCSRMatrixGenerateFFFCD3(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,HYPRE_BigInt * cpts_starts,hypre_ParCSRMatrix * S,hypre_ParCSRMatrix ** A_FC_ptr,hypre_ParCSRMatrix ** A_FF_ptr,HYPRE_Real ** D_lambda_ptr)1005 hypre_ParCSRMatrixGenerateFFFCD3( hypre_ParCSRMatrix *A,
1006                                   HYPRE_Int           *CF_marker,
1007                                   HYPRE_BigInt        *cpts_starts,
1008                                   hypre_ParCSRMatrix  *S,
1009                                   hypre_ParCSRMatrix **A_FC_ptr,
1010                                   hypre_ParCSRMatrix **A_FF_ptr,
1011                                   HYPRE_Real         **D_lambda_ptr)
1012 {
1013    MPI_Comm                 comm     = hypre_ParCSRMatrixComm(A);
1014    HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
1015    hypre_ParCSRCommPkg     *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1016    hypre_ParCSRCommHandle  *comm_handle;
1017 
1018    /* diag part of A */
1019    hypre_CSRMatrix    *A_diag   = hypre_ParCSRMatrixDiag(A);
1020    HYPRE_Complex      *A_diag_data = hypre_CSRMatrixData(A_diag);
1021    HYPRE_Int          *A_diag_i = hypre_CSRMatrixI(A_diag);
1022    HYPRE_Int          *A_diag_j = hypre_CSRMatrixJ(A_diag);
1023    /* off-diag part of A */
1024    hypre_CSRMatrix    *A_offd   = hypre_ParCSRMatrixOffd(A);
1025    HYPRE_Complex      *A_offd_data = hypre_CSRMatrixData(A_offd);
1026    HYPRE_Int          *A_offd_i = hypre_CSRMatrixI(A_offd);
1027    HYPRE_Int          *A_offd_j = hypre_CSRMatrixJ(A_offd);
1028 
1029    HYPRE_Int           n_fine = hypre_CSRMatrixNumRows(A_diag);
1030    HYPRE_Int           num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
1031 
1032    /* diag part of S */
1033    hypre_CSRMatrix    *S_diag   = hypre_ParCSRMatrixDiag(S);
1034    HYPRE_Int          *S_diag_i = hypre_CSRMatrixI(S_diag);
1035    HYPRE_Int          *S_diag_j = hypre_CSRMatrixJ(S_diag);
1036    /* off-diag part of S */
1037    hypre_CSRMatrix    *S_offd   = hypre_ParCSRMatrixOffd(S);
1038    HYPRE_Int          *S_offd_i = hypre_CSRMatrixI(S_offd);
1039    HYPRE_Int          *S_offd_j = hypre_CSRMatrixJ(S_offd);
1040 
1041    HYPRE_Real         *D_lambda;
1042    hypre_ParCSRMatrix *A_FC;
1043    hypre_CSRMatrix    *A_FC_diag, *A_FC_offd;
1044    HYPRE_Int          *A_FC_diag_i, *A_FC_diag_j, *A_FC_offd_i, *A_FC_offd_j=NULL;
1045    HYPRE_Complex      *A_FC_diag_data, *A_FC_offd_data=NULL;
1046    HYPRE_Int           num_cols_offd_A_FC;
1047    HYPRE_BigInt       *col_map_offd_A_FC = NULL;
1048 
1049    hypre_ParCSRMatrix *A_FF;
1050    hypre_CSRMatrix    *A_FF_diag, *A_FF_offd;
1051    HYPRE_Int          *A_FF_diag_i, *A_FF_diag_j, *A_FF_offd_i, *A_FF_offd_j;
1052    HYPRE_Complex      *A_FF_diag_data, *A_FF_offd_data;
1053    HYPRE_Int           num_cols_offd_A_FF;
1054    HYPRE_BigInt       *col_map_offd_A_FF = NULL;
1055 
1056    HYPRE_Int          *fine_to_coarse;
1057    HYPRE_Int          *fine_to_fine;
1058    HYPRE_Int          *fine_to_coarse_offd = NULL;
1059    HYPRE_Int          *fine_to_fine_offd = NULL;
1060 
1061    HYPRE_Int           i, j, jj;
1062    HYPRE_Int           startc, index;
1063    HYPRE_Int           cpt, fpt, new_fpt, row, rowc;
1064    HYPRE_Int          *CF_marker_offd = NULL;
1065    HYPRE_Int          *int_buf_data = NULL;
1066    HYPRE_BigInt       *big_convert;
1067    HYPRE_BigInt       *big_convert_offd = NULL;
1068    HYPRE_BigInt       *big_buf_data = NULL;
1069 
1070    HYPRE_BigInt        total_global_fpts, total_global_cpts, total_global_new_fpts;
1071    HYPRE_BigInt       *fpts_starts, *new_fpts_starts;
1072    HYPRE_Int           my_id, num_procs, num_sends;
1073    HYPRE_Int           d_count_FF, d_count_FC, o_count_FF, o_count_FC;
1074    HYPRE_Int           n_Fpts;
1075    HYPRE_Int           n_new_Fpts;
1076    HYPRE_Int          *cpt_array, *fpt_array, *new_fpt_array;
1077    HYPRE_Int           start, stop;
1078    HYPRE_Int           num_threads;
1079 
1080    num_threads = hypre_NumThreads();
1081 
1082    /* MPI size and rank*/
1083    hypre_MPI_Comm_size(comm, &num_procs);
1084    hypre_MPI_Comm_rank(comm, &my_id);
1085 
1086    fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
1087    fine_to_fine = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
1088    big_convert = hypre_CTAlloc(HYPRE_BigInt, n_fine, HYPRE_MEMORY_HOST);
1089 
1090    cpt_array = hypre_CTAlloc(HYPRE_Int, num_threads+1, HYPRE_MEMORY_HOST);
1091    fpt_array = hypre_CTAlloc(HYPRE_Int, num_threads+1, HYPRE_MEMORY_HOST);
1092    new_fpt_array = hypre_CTAlloc(HYPRE_Int, num_threads+1, HYPRE_MEMORY_HOST);
1093 #ifdef HYPRE_USING_OPENMP
1094 #pragma omp parallel private(i,j,jj,start,stop,row,rowc,cpt,new_fpt,fpt,d_count_FC,d_count_FF,o_count_FC,o_count_FF)
1095 #endif
1096    {
1097       HYPRE_Int my_thread_num = hypre_GetThreadNum();
1098 
1099       start = (n_fine/num_threads)*my_thread_num;
1100       if (my_thread_num == num_threads-1)
1101       {
1102          stop = n_fine;
1103       }
1104       else
1105       {
1106          stop = (n_fine/num_threads)*(my_thread_num+1);
1107       }
1108       for (i=start; i < stop; i++)
1109       {
1110          if (CF_marker[i] > 0)
1111          {
1112             cpt_array[my_thread_num+1]++;
1113          }
1114          else if (CF_marker[i] == -2)
1115          {
1116             new_fpt_array[my_thread_num+1]++;
1117             fpt_array[my_thread_num+1]++;
1118          }
1119          else
1120          {
1121             fpt_array[my_thread_num+1]++;
1122          }
1123       }
1124 
1125 #ifdef HYPRE_USING_OPENMP
1126 #pragma omp barrier
1127 #endif
1128       if (my_thread_num == 0)
1129       {
1130          for (i=1; i < num_threads; i++)
1131          {
1132             cpt_array[i+1] += cpt_array[i];
1133             fpt_array[i+1] += fpt_array[i];
1134             new_fpt_array[i+1] += new_fpt_array[i];
1135          }
1136       }
1137 #ifdef HYPRE_USING_OPENMP
1138 #pragma omp barrier
1139 #endif
1140 
1141       cpt = cpt_array[my_thread_num];
1142       fpt = fpt_array[my_thread_num];
1143       for (i=start; i < stop; i++)
1144       {
1145          if (CF_marker[i] > 0)
1146          {
1147             fine_to_coarse[i] = cpt++;
1148             fine_to_fine[i] = -1;
1149          }
1150          else
1151          {
1152             fine_to_fine[i] = fpt++;
1153             fine_to_coarse[i] = -1;
1154          }
1155       }
1156 #ifdef HYPRE_USING_OPENMP
1157 #pragma omp barrier
1158 #endif
1159 
1160       if (my_thread_num == 0)
1161       {
1162          HYPRE_BigInt big_Fpts, big_new_Fpts;
1163          n_Fpts = fpt_array[num_threads];
1164          n_new_Fpts = new_fpt_array[num_threads];
1165          big_Fpts = n_Fpts;
1166          big_new_Fpts = n_new_Fpts;
1167 
1168          fpts_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
1169          new_fpts_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
1170          hypre_MPI_Scan(&big_Fpts, fpts_starts+1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
1171          hypre_MPI_Scan(&big_new_Fpts, new_fpts_starts+1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
1172          fpts_starts[0] = fpts_starts[1] - big_Fpts;
1173          new_fpts_starts[0] = new_fpts_starts[1] - big_new_Fpts;
1174          if (my_id == num_procs - 1)
1175          {
1176             total_global_new_fpts = new_fpts_starts[1];
1177             total_global_fpts = fpts_starts[1];
1178             total_global_cpts = cpts_starts[1];
1179          }
1180          hypre_MPI_Bcast(&total_global_new_fpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
1181          hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
1182          hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
1183       }
1184 #ifdef HYPRE_USING_OPENMP
1185 #pragma omp barrier
1186 #endif
1187 
1188       for (i=start; i < stop; i++)
1189       {
1190          if (CF_marker[i] > 0)
1191          {
1192             big_convert[i] = (HYPRE_BigInt)fine_to_coarse[i] + cpts_starts[0];
1193          }
1194          else
1195          {
1196             big_convert[i] = (HYPRE_BigInt)fine_to_fine[i] + fpts_starts[0];
1197          }
1198       }
1199 
1200 #ifdef HYPRE_USING_OPENMP
1201 #pragma omp barrier
1202 #endif
1203       if (my_thread_num == 0)
1204       {
1205          if (num_cols_A_offd)
1206          {
1207             CF_marker_offd = hypre_CTAlloc(HYPRE_Int,  num_cols_A_offd, HYPRE_MEMORY_HOST);
1208             big_convert_offd = hypre_CTAlloc(HYPRE_BigInt,  num_cols_A_offd, HYPRE_MEMORY_HOST);
1209             fine_to_coarse_offd = hypre_CTAlloc(HYPRE_Int,  num_cols_A_offd, HYPRE_MEMORY_HOST);
1210             fine_to_fine_offd = hypre_CTAlloc(HYPRE_Int,  num_cols_A_offd, HYPRE_MEMORY_HOST);
1211          }
1212          index = 0;
1213          num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1214          int_buf_data = hypre_CTAlloc(HYPRE_Int,  hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
1215          big_buf_data = hypre_CTAlloc(HYPRE_BigInt,  hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
1216          for (i = 0; i < num_sends; i++)
1217          {
1218             startc = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1219             for (j = startc; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
1220             {
1221                int_buf_data[index] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1222                big_buf_data[index++] = big_convert[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1223             }
1224          }
1225 
1226          comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data, CF_marker_offd);
1227 
1228          hypre_ParCSRCommHandleDestroy(comm_handle);
1229 
1230          comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg, big_buf_data, big_convert_offd);
1231 
1232          hypre_ParCSRCommHandleDestroy(comm_handle);
1233 
1234          num_cols_offd_A_FC = 0;
1235          num_cols_offd_A_FF = 0;
1236          if (num_cols_A_offd)
1237          {
1238             for (i=0; i < num_cols_A_offd; i++)
1239             {
1240                if (CF_marker_offd[i] > 0)
1241                {
1242                   fine_to_coarse_offd[i] = num_cols_offd_A_FC++;
1243                   fine_to_fine_offd[i] = -1;
1244                }
1245                else
1246                {
1247                   fine_to_fine_offd[i] = num_cols_offd_A_FF++;
1248                   fine_to_coarse_offd[i] = -1;
1249                }
1250             }
1251 
1252             col_map_offd_A_FF = hypre_TAlloc(HYPRE_BigInt, num_cols_offd_A_FF, HYPRE_MEMORY_HOST);
1253             col_map_offd_A_FC = hypre_TAlloc(HYPRE_BigInt, num_cols_offd_A_FC, HYPRE_MEMORY_HOST);
1254 
1255             cpt = 0;
1256             fpt = 0;
1257             for (i=0; i < num_cols_A_offd; i++)
1258             {
1259                if (CF_marker_offd[i] > 0)
1260                {
1261                   col_map_offd_A_FC[cpt++] = big_convert_offd[i];
1262                }
1263                else
1264                {
1265                    col_map_offd_A_FF[fpt++] = big_convert_offd[i];
1266                }
1267             }
1268          }
1269 
1270          A_FF_diag_i = hypre_CTAlloc(HYPRE_Int,n_new_Fpts+1, memory_location_P);
1271          A_FC_diag_i = hypre_CTAlloc(HYPRE_Int,n_Fpts+1, memory_location_P);
1272          A_FF_offd_i = hypre_CTAlloc(HYPRE_Int,n_new_Fpts+1, memory_location_P);
1273          A_FC_offd_i = hypre_CTAlloc(HYPRE_Int,n_Fpts+1, memory_location_P);
1274          D_lambda = hypre_CTAlloc(HYPRE_Real, n_Fpts, memory_location_P);
1275       }
1276 
1277 #ifdef HYPRE_USING_OPENMP
1278 #pragma omp barrier
1279 #endif
1280       d_count_FC = 0;
1281       d_count_FF = 0;
1282       o_count_FC = 0;
1283       o_count_FF = 0;
1284       row = new_fpt_array[my_thread_num];
1285       rowc = fpt_array[my_thread_num];
1286       for (i=start; i < stop; i++)
1287       {
1288          if (CF_marker[i] == -2)
1289          {
1290             row++;
1291             rowc++;
1292             d_count_FF++; /* account for diagonal element */
1293             for (j = S_diag_i[i]; j < S_diag_i[i+1]; j++)
1294             {
1295                jj = S_diag_j[j];
1296                if (CF_marker[jj] > 0)
1297                   d_count_FC++;
1298                else
1299                   d_count_FF++;
1300             }
1301             A_FF_diag_i[row] = d_count_FF;
1302             A_FC_diag_i[rowc] = d_count_FC;
1303             for (j = S_offd_i[i]; j < S_offd_i[i+1]; j++)
1304             {
1305                jj = S_offd_j[j];
1306                if (CF_marker_offd[jj] > 0)
1307                   o_count_FC++;
1308                else
1309                   o_count_FF++;
1310             }
1311             A_FF_offd_i[row] = o_count_FF;
1312             A_FC_offd_i[rowc] = o_count_FC;
1313          }
1314          else if (CF_marker[i] < 0)
1315          {
1316             rowc++;
1317             for (j = S_diag_i[i]; j < S_diag_i[i+1]; j++)
1318             {
1319                jj = S_diag_j[j];
1320                if (CF_marker[jj] > 0)
1321                   d_count_FC++;
1322             }
1323             A_FC_diag_i[rowc] = d_count_FC;
1324             for (j = S_offd_i[i]; j < S_offd_i[i+1]; j++)
1325             {
1326                jj = S_offd_j[j];
1327                if (CF_marker_offd[jj] > 0)
1328                   o_count_FC++;
1329             }
1330             A_FC_offd_i[rowc] = o_count_FC;
1331          }
1332       }
1333 
1334 #ifdef HYPRE_USING_OPENMP
1335 #pragma omp barrier
1336 #endif
1337       if (my_thread_num == 0)
1338       {
1339          HYPRE_Int fpt2, new_fpt2;
1340          for (i=1; i<num_threads+1; i++)
1341          {
1342             fpt = fpt_array[i];
1343             new_fpt = new_fpt_array[i];
1344             fpt2 = fpt_array[i-1];
1345             new_fpt2 = new_fpt_array[i-1];
1346             if (fpt != fpt2)
1347             {
1348                A_FC_diag_i[fpt] += A_FC_diag_i[fpt2];
1349                A_FC_offd_i[fpt] += A_FC_offd_i[fpt2];
1350             }
1351             if (new_fpt != new_fpt2)
1352             {
1353                A_FF_diag_i[new_fpt] += A_FF_diag_i[new_fpt2];
1354                A_FF_offd_i[new_fpt] += A_FF_offd_i[new_fpt2];
1355             }
1356          }
1357          row = new_fpt_array[num_threads];
1358          rowc = fpt_array[num_threads];
1359          d_count_FC = A_FC_diag_i[rowc];
1360          d_count_FF = A_FF_diag_i[row];
1361          o_count_FC = A_FC_offd_i[rowc];
1362          o_count_FF = A_FF_offd_i[row];
1363          A_FF_diag_j = hypre_CTAlloc(HYPRE_Int,d_count_FF, memory_location_P);
1364          A_FC_diag_j = hypre_CTAlloc(HYPRE_Int,d_count_FC, memory_location_P);
1365          A_FF_offd_j = hypre_CTAlloc(HYPRE_Int,o_count_FF, memory_location_P);
1366          A_FC_offd_j = hypre_CTAlloc(HYPRE_Int,o_count_FC, memory_location_P);
1367          A_FF_diag_data = hypre_CTAlloc(HYPRE_Real,d_count_FF, memory_location_P);
1368          A_FC_diag_data = hypre_CTAlloc(HYPRE_Real,d_count_FC, memory_location_P);
1369          A_FF_offd_data = hypre_CTAlloc(HYPRE_Real,o_count_FF, memory_location_P);
1370          A_FC_offd_data = hypre_CTAlloc(HYPRE_Real,o_count_FC, memory_location_P);
1371       }
1372 
1373 #ifdef HYPRE_USING_OPENMP
1374 #pragma omp barrier
1375 #endif
1376       row = new_fpt_array[my_thread_num];
1377       rowc = fpt_array[my_thread_num];
1378       d_count_FC = A_FC_diag_i[rowc];
1379       d_count_FF = A_FF_diag_i[row];
1380       o_count_FC = A_FC_offd_i[rowc];
1381       o_count_FF = A_FF_offd_i[row];
1382       for (i=start; i < stop; i++)
1383       {
1384          if (CF_marker[i] == -2)
1385          {
1386             HYPRE_Int jS, jA;
1387             HYPRE_Real sum = 0;
1388             row++;
1389             jA = A_diag_i[i];
1390             A_FF_diag_j[d_count_FF] = fine_to_fine[A_diag_j[jA]];
1391             A_FF_diag_data[d_count_FF++] = A_diag_data[jA++];
1392             for (j = S_diag_i[i]; j < S_diag_i[i+1]; j++)
1393             {
1394                jA = A_diag_i[i]+1;
1395                jS = S_diag_j[j];
1396                while (A_diag_j[jA] != jS) jA++;
1397                if (CF_marker[S_diag_j[j]] > 0)
1398                {
1399                   A_FC_diag_j[d_count_FC] = fine_to_coarse[A_diag_j[jA]];
1400                   A_FC_diag_data[d_count_FC++] = A_diag_data[jA++];
1401                }
1402                else
1403                {
1404                   sum += 1;
1405                   D_lambda[rowc] += A_diag_data[jA];
1406                   A_FF_diag_j[d_count_FF] = fine_to_fine[A_diag_j[jA]];
1407                   A_FF_diag_data[d_count_FF++] = A_diag_data[jA++];
1408                }
1409             }
1410             for (j = S_offd_i[i]; j < S_offd_i[i+1]; j++)
1411             {
1412                jA = A_offd_i[i];
1413                jS = S_offd_j[j];
1414                while (jS != A_offd_j[jA]) jA++;
1415                if (CF_marker_offd[S_offd_j[j]] > 0)
1416                {
1417                   A_FC_offd_j[o_count_FC] = fine_to_coarse_offd[A_offd_j[jA]];
1418                   A_FC_offd_data[o_count_FC++] = A_offd_data[jA++];
1419                }
1420                else
1421                {
1422                   sum += 1;
1423                   D_lambda[rowc] += A_offd_data[jA];
1424                   A_FF_offd_j[o_count_FF] = fine_to_fine_offd[A_offd_j[jA]];
1425                   A_FF_offd_data[o_count_FF++] = A_offd_data[jA++];
1426                }
1427             }
1428             if (sum) D_lambda[rowc] = D_lambda[rowc]/sum;
1429             rowc++;
1430             A_FF_diag_i[row] = d_count_FF;
1431             A_FC_diag_i[rowc] = d_count_FC;
1432             A_FF_offd_i[row] = o_count_FF;
1433             A_FC_offd_i[rowc] = o_count_FC;
1434          }
1435          else if (CF_marker[i] < 0)
1436          {
1437             HYPRE_Int jS, jA;
1438             HYPRE_Real sum = 0;
1439             for (j = S_diag_i[i]; j < S_diag_i[i+1]; j++)
1440             {
1441                jA = A_diag_i[i]+1;
1442                jS = S_diag_j[j];
1443                while (A_diag_j[jA] != jS) jA++;
1444                if (CF_marker[S_diag_j[j]] > 0)
1445                {
1446                   A_FC_diag_j[d_count_FC] = fine_to_coarse[A_diag_j[jA]];
1447                   A_FC_diag_data[d_count_FC++] = A_diag_data[jA++];
1448                }
1449                else
1450                {
1451                   sum += 1;
1452                   D_lambda[rowc] += A_diag_data[jA];
1453                }
1454             }
1455             for (j = S_offd_i[i]; j < S_offd_i[i+1]; j++)
1456             {
1457                jA = A_offd_i[i];
1458                jS = S_offd_j[j];
1459                while (jS != A_offd_j[jA]) jA++;
1460                if (CF_marker_offd[S_offd_j[j]] > 0)
1461                {
1462                   A_FC_offd_j[o_count_FC] = fine_to_coarse_offd[A_offd_j[jA]];
1463                   A_FC_offd_data[o_count_FC++] = A_offd_data[jA++];
1464                }
1465                else
1466                {
1467                   sum += 1;
1468                   D_lambda[rowc] += A_offd_data[jA];
1469                }
1470             }
1471             if (sum) D_lambda[rowc] = D_lambda[rowc]/sum;
1472             rowc++;
1473             A_FC_diag_i[rowc] = d_count_FC;
1474             A_FC_offd_i[rowc] = o_count_FC;
1475          }
1476       }
1477    } /*end parallel region */
1478 
1479    A_FC = hypre_ParCSRMatrixCreate(comm,
1480             total_global_fpts,
1481             total_global_cpts,
1482             fpts_starts,
1483             cpts_starts,
1484             num_cols_offd_A_FC,
1485             A_FC_diag_i[n_Fpts],
1486             A_FC_offd_i[n_Fpts]);
1487 
1488    A_FF = hypre_ParCSRMatrixCreate(comm,
1489             total_global_new_fpts,
1490             total_global_fpts,
1491             new_fpts_starts,
1492             fpts_starts,
1493             num_cols_offd_A_FF,
1494             A_FF_diag_i[n_new_Fpts],
1495             A_FF_offd_i[n_new_Fpts]);
1496 
1497    A_FC_diag = hypre_ParCSRMatrixDiag(A_FC);
1498    hypre_CSRMatrixData(A_FC_diag) = A_FC_diag_data;
1499    hypre_CSRMatrixI(A_FC_diag) = A_FC_diag_i;
1500    hypre_CSRMatrixJ(A_FC_diag) = A_FC_diag_j;
1501    A_FC_offd = hypre_ParCSRMatrixOffd(A_FC);
1502    hypre_CSRMatrixData(A_FC_offd) = A_FC_offd_data;
1503    hypre_CSRMatrixI(A_FC_offd) = A_FC_offd_i;
1504    hypre_CSRMatrixJ(A_FC_offd) = A_FC_offd_j;
1505    hypre_ParCSRMatrixColMapOffd(A_FC) = col_map_offd_A_FC;
1506 
1507    hypre_CSRMatrixMemoryLocation(A_FC_diag) = memory_location_P;
1508    hypre_CSRMatrixMemoryLocation(A_FC_offd) = memory_location_P;
1509 
1510    A_FF_diag = hypre_ParCSRMatrixDiag(A_FF);
1511    hypre_CSRMatrixData(A_FF_diag) = A_FF_diag_data;
1512    hypre_CSRMatrixI(A_FF_diag) = A_FF_diag_i;
1513    hypre_CSRMatrixJ(A_FF_diag) = A_FF_diag_j;
1514    A_FF_offd = hypre_ParCSRMatrixOffd(A_FF);
1515    hypre_CSRMatrixData(A_FF_offd) = A_FF_offd_data;
1516    hypre_CSRMatrixI(A_FF_offd) = A_FF_offd_i;
1517    hypre_CSRMatrixJ(A_FF_offd) = A_FF_offd_j;
1518    hypre_ParCSRMatrixColMapOffd(A_FF) = col_map_offd_A_FF;
1519 
1520    hypre_CSRMatrixMemoryLocation(A_FF_diag) = memory_location_P;
1521    hypre_CSRMatrixMemoryLocation(A_FF_offd) = memory_location_P;
1522 
1523    hypre_TFree(fine_to_coarse, HYPRE_MEMORY_HOST);
1524    hypre_TFree(fine_to_fine, HYPRE_MEMORY_HOST);
1525    hypre_TFree(big_convert, HYPRE_MEMORY_HOST);
1526    hypre_TFree(fine_to_coarse_offd, HYPRE_MEMORY_HOST);
1527    hypre_TFree(fine_to_fine_offd, HYPRE_MEMORY_HOST);
1528    hypre_TFree(big_convert_offd, HYPRE_MEMORY_HOST);
1529    hypre_TFree(CF_marker_offd, HYPRE_MEMORY_HOST);
1530    hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
1531    hypre_TFree(big_buf_data, HYPRE_MEMORY_HOST);
1532 
1533    hypre_TFree(cpt_array, HYPRE_MEMORY_HOST);
1534    hypre_TFree(fpt_array, HYPRE_MEMORY_HOST);
1535    hypre_TFree(new_fpt_array, HYPRE_MEMORY_HOST);
1536    hypre_TFree(fpts_starts, HYPRE_MEMORY_HOST);
1537    hypre_TFree(new_fpts_starts, HYPRE_MEMORY_HOST);
1538 
1539    *A_FC_ptr = A_FC;
1540    *A_FF_ptr = A_FF;
1541    *D_lambda_ptr = D_lambda;
1542 
1543    return hypre_error_flag;
1544 }
1545