1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 #include "_hypre_parcsr_ls.h"
9 
10 /*--------------------------------------------------------------------------
11  * hypre_ParAMGBuildModMultipass
12  * This routine implements Stuben's direct interpolation with multiple passes.
13  * expressed with matrix matrix multiplications
14  *--------------------------------------------------------------------------*/
15 
16 HYPRE_Int
hypre_BoomerAMGBuildModMultipassHost(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Real trunc_factor,HYPRE_Int P_max_elmts,HYPRE_Int interp_type,HYPRE_Int num_functions,HYPRE_Int * dof_func,hypre_ParCSRMatrix ** P_ptr)17 hypre_BoomerAMGBuildModMultipassHost( hypre_ParCSRMatrix  *A,
18                                       HYPRE_Int           *CF_marker,
19                                       hypre_ParCSRMatrix  *S,
20                                       HYPRE_BigInt        *num_cpts_global,
21                                       HYPRE_Real           trunc_factor,
22                                       HYPRE_Int            P_max_elmts,
23                                       HYPRE_Int            interp_type,
24                                       HYPRE_Int            num_functions,
25                                       HYPRE_Int           *dof_func,
26                                       hypre_ParCSRMatrix **P_ptr )
27 {
28 #ifdef HYPRE_PROFILE
29    hypre_profile_times[HYPRE_TIMER_ID_MULTIPASS_INTERP] -= hypre_MPI_Wtime();
30 #endif
31 
32    MPI_Comm                comm = hypre_ParCSRMatrixComm(A);
33    hypre_ParCSRCommPkg    *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
34    hypre_ParCSRCommHandle *comm_handle;
35 
36    hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
37    HYPRE_Int       *A_diag_i = hypre_CSRMatrixI(A_diag);
38    HYPRE_Int       *A_diag_j = hypre_CSRMatrixJ(A_diag);
39    HYPRE_Real      *A_diag_data = hypre_CSRMatrixData(A_diag);
40    HYPRE_Int        n_fine = hypre_CSRMatrixNumRows(A_diag);
41 
42    hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
43    HYPRE_Int       *A_offd_i = hypre_CSRMatrixI(A_offd);
44    HYPRE_Int       *A_offd_j = hypre_CSRMatrixJ(A_offd);
45    HYPRE_Real      *A_offd_data = hypre_CSRMatrixData(A_offd);
46 
47    HYPRE_Int        num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
48 
49    hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
50    HYPRE_Int       *S_diag_i = hypre_CSRMatrixI(S_diag);
51    HYPRE_Int       *S_diag_j = hypre_CSRMatrixJ(S_diag);
52 
53    hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
54    HYPRE_Int       *S_offd_i = hypre_CSRMatrixI(S_offd);
55    HYPRE_Int       *S_offd_j = hypre_CSRMatrixJ(S_offd);
56 
57    hypre_ParCSRMatrix **Pi;
58    hypre_ParCSRMatrix *P;
59    hypre_CSRMatrix *P_diag;
60    HYPRE_Real      *P_diag_data;
61    HYPRE_Int       *P_diag_i; /*at first counter of nonzero cols for each row,
62                                       finally will be pointer to start of row */
63    HYPRE_Int       *P_diag_j;
64 
65    hypre_CSRMatrix *P_offd;
66    HYPRE_Real      *P_offd_data = NULL;
67    HYPRE_Int       *P_offd_i; /*at first counter of nonzero cols for each row,
68                                       finally will be pointer to start of row */
69    HYPRE_Int       *P_offd_j = NULL;
70    HYPRE_BigInt    *col_map_offd_P = NULL;
71    HYPRE_Int        num_cols_offd_P = 0;
72 
73    HYPRE_Int        num_sends = 0;
74    HYPRE_Int       *int_buf_data = NULL;
75 
76    HYPRE_Int       *fine_to_coarse;
77    HYPRE_Int       *points_left;
78    HYPRE_Int       *pass_marker;
79    HYPRE_Int       *pass_marker_offd = NULL;
80    HYPRE_Int       *pass_order;
81    HYPRE_Int       *pass_starts;
82 
83    HYPRE_Int        i, j, i1, i2, j1;
84    HYPRE_Int        num_passes, p, remaining;
85    HYPRE_Int        global_remaining;
86    HYPRE_Int        cnt, cnt_old, cnt_rem, current_pass;
87    HYPRE_Int        startc, index;
88 
89    HYPRE_BigInt     total_global_cpts;
90    HYPRE_Int        my_id, num_procs;
91    HYPRE_Int        P_offd_size = 0;
92 
93    HYPRE_Int       *dof_func_offd = NULL;
94    HYPRE_Real      *row_sums = NULL;
95 
96    /* MPI size and rank*/
97    hypre_MPI_Comm_size(comm, &num_procs);
98    hypre_MPI_Comm_rank(comm, &my_id);
99 
100    if (num_procs > 1)
101    {
102       if (my_id == num_procs - 1)
103       {
104          total_global_cpts = num_cpts_global[1];
105       }
106       hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
107    }
108    else
109    {
110       total_global_cpts = num_cpts_global[1];
111    }
112    /* Generate pass marker array */
113 
114    pass_marker = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
115    /* contains pass numbers for each variable according to original order */
116    pass_order = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
117    /* contains row numbers according to new order, pass 1 followed by pass 2 etc */
118    fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
119    /* reverse of pass_order, keeps track where original numbers go */
120    points_left = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
121    /* contains row numbers of remaining points, auxiliary */
122    pass_starts = hypre_CTAlloc(HYPRE_Int, 10, HYPRE_MEMORY_HOST);
123    /* contains beginning for each pass in pass_order field, assume no more than 10 passes */
124 
125    P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
126    P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
127 
128    cnt = 0;
129    remaining = 0;
130    for (i=0; i < n_fine; i++)
131    {
132       if (CF_marker[i] == 1)
133       {
134          pass_marker[i] = 1;
135          P_diag_i[i+1] = 1;
136          P_offd_i[i+1] = 0;
137          fine_to_coarse[i] = cnt;
138          pass_order[cnt++] = i;
139       }
140       else
141       {
142          points_left[remaining++] = i;
143       }
144    }
145    pass_starts[0] = 0;
146    pass_starts[1] = cnt;
147 
148    if (num_functions > 1)
149    {
150       dof_func_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A, HYPRE_MEMORY_HOST);
151       index = 0;
152       num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
153       int_buf_data = hypre_CTAlloc(HYPRE_Int,  hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
154       for (i = 0; i < num_sends; i++)
155       {
156          startc = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
157          for (j = startc; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
158          {
159             int_buf_data[index++] = dof_func[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
160          }
161       }
162 
163       comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data, dof_func_offd);
164 
165       hypre_ParCSRCommHandleDestroy(comm_handle);
166 
167       hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
168    }
169 
170    if (num_procs > 1)
171    {
172       pass_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A, HYPRE_MEMORY_HOST);
173       index = 0;
174       num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
175       int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
176       for (i = 0; i < num_sends; i++)
177       {
178          startc = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
179          for (j = startc; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
180          {
181             int_buf_data[index++] = pass_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
182          }
183       }
184 
185       comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, pass_marker_offd);
186 
187       hypre_ParCSRCommHandleDestroy(comm_handle);
188    }
189    current_pass = 1;
190    num_passes = 1;
191    /* color points according to pass number */
192    hypre_MPI_Allreduce(&remaining, &global_remaining, 1, HYPRE_MPI_INT, hypre_MPI_MAX, comm);
193    while (global_remaining > 0)
194    {
195       HYPRE_Int remaining_pts = remaining;
196       cnt_rem = 0;
197       for (i=0; i < remaining_pts; i++)
198       {
199          i1 = points_left[i];
200          cnt_old = cnt;
201          for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
202          {
203             j1 = S_diag_j[j];
204             if (pass_marker[j1] == current_pass)
205             {
206                pass_marker[i1] = current_pass + 1;
207                pass_order[cnt++] = i1;
208                remaining--;
209                break;
210             }
211          }
212          if (cnt == cnt_old)
213          {
214             for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
215             {
216                j1 = S_offd_j[j];
217                if (pass_marker_offd[j1] == current_pass)
218                {
219                   pass_marker[i1] = current_pass + 1;
220                   pass_order[cnt++] = i1;
221                   remaining--;
222                   break;
223                }
224             }
225          }
226          if (cnt == cnt_old)
227          {
228             points_left[cnt_rem++] = i1;
229          }
230       }
231       remaining = cnt_rem;
232       current_pass++;
233       num_passes++;
234       if (num_passes > 9)
235       {
236          hypre_error_w_msg(HYPRE_ERROR_GENERIC," Warning!!! too many passes! out of range!\n");
237          break;
238       }
239       pass_starts[num_passes] = cnt;
240       /* update pass_marker_offd */
241       index = 0;
242       if (num_procs > 1)
243       {
244          for (i = 0; i < num_sends; i++)
245          {
246             startc = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
247             for (j = startc; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
248             {
249                int_buf_data[index++] = pass_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
250             }
251          }
252          comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, pass_marker_offd);
253 
254          hypre_ParCSRCommHandleDestroy(comm_handle);
255       }
256       hypre_MPI_Allreduce(&remaining, &global_remaining, 1, HYPRE_MPI_INT, hypre_MPI_MAX, comm);
257    }
258    hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
259    hypre_TFree(points_left, HYPRE_MEMORY_HOST);
260 
261    /* generate row sum of weak points and C-points to be ignored */
262 
263    row_sums = hypre_CTAlloc(HYPRE_Real, n_fine, HYPRE_MEMORY_HOST);
264    if (num_functions >  1)
265    {
266       for (i=0; i < n_fine; i++)
267       {
268          if (CF_marker[i] < 0)
269          {
270             for (j = A_diag_i[i]+1; j < A_diag_i[i+1]; j++)
271             {
272                if (dof_func[i] == dof_func[A_diag_j[j]])
273                {
274                   row_sums[i] += A_diag_data[j];
275                }
276             }
277             for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
278             {
279                if (dof_func[i] == dof_func_offd[A_offd_j[j]])
280                {
281                    row_sums[i] += A_offd_data[j];
282                }
283             }
284          }
285       }
286    }
287    else
288    {
289       for (i=0; i < n_fine; i++)
290       {
291          if (CF_marker[i] < 0)
292          {
293             for (j = A_diag_i[i]+1; j < A_diag_i[i+1]; j++)
294             {
295                row_sums[i] += A_diag_data[j];
296             }
297             for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
298             {
299                row_sums[i] += A_offd_data[j];
300             }
301          }
302       }
303    }
304 
305    Pi = hypre_CTAlloc(hypre_ParCSRMatrix*, num_passes, HYPRE_MEMORY_HOST);
306    hypre_GenerateMultipassPi(A, S, num_cpts_global, &pass_order[pass_starts[1]], pass_marker,
307                              pass_marker_offd, pass_starts[2]-pass_starts[1], 1, row_sums, &Pi[0]);
308    if (interp_type == 8)
309    {
310       for (i=1; i<num_passes-1; i++)
311       {
312          hypre_ParCSRMatrix *Q;
313          HYPRE_BigInt *c_pts_starts = hypre_ParCSRMatrixRowStarts(Pi[i-1]);
314          hypre_GenerateMultipassPi(A, S, c_pts_starts, &pass_order[pass_starts[i+1]], pass_marker,
315                              pass_marker_offd, pass_starts[i+2]-pass_starts[i+1], i+1, row_sums, &Q);
316          Pi[i] = hypre_ParMatmul(Q, Pi[i-1]);
317          hypre_ParCSRMatrixDestroy(Q);
318       }
319    }
320    else if (interp_type == 9)
321    {
322       for (i=1; i<num_passes-1; i++)
323       {
324          HYPRE_BigInt *c_pts_starts = hypre_ParCSRMatrixRowStarts(Pi[i-1]);
325          hypre_GenerateMultiPi(A, S, Pi[i-1], c_pts_starts, &pass_order[pass_starts[i+1]], pass_marker,
326                              pass_marker_offd, pass_starts[i+2]-pass_starts[i+1], i+1,
327                              num_functions, dof_func, dof_func_offd, &Pi[i]);
328       }
329    }
330 
331    /* p pulate P_diag_i[i+1] with nnz of i-th row */
332    for (i = 0; i < num_passes-1; i++)
333    {
334       HYPRE_Int *Pi_diag_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(Pi[i]));
335       HYPRE_Int *Pi_offd_i = hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(Pi[i]));
336       j1 = 0;
337       for (j=pass_starts[i+1]; j < pass_starts[i+2]; j++)
338       {
339          i1 = pass_order[j];
340          P_diag_i[i1+1] = Pi_diag_i[j1+1]-Pi_diag_i[j1];
341          P_offd_i[i1+1] = Pi_offd_i[j1+1]-Pi_offd_i[j1];
342          j1++;
343       }
344    }
345 
346    for (i=0; i < n_fine; i++)
347    {
348       P_diag_i[i+1] += P_diag_i[i];
349       P_offd_i[i+1] += P_offd_i[i];
350    }
351 
352    P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_i[n_fine], HYPRE_MEMORY_HOST);
353    P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_i[n_fine], HYPRE_MEMORY_HOST);
354    P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_i[n_fine], HYPRE_MEMORY_HOST);
355    P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_i[n_fine], HYPRE_MEMORY_HOST);
356 
357    /* insert weights for coarse points */
358    for (i=0; i < pass_starts[1]; i++)
359    {
360       i1 = pass_order[i];
361       j = P_diag_i[i1];
362       P_diag_j[j] = fine_to_coarse[i1];
363       P_diag_data[j] = 1.0;
364    }
365 
366    /* generate col_map_offd_P by combining all col_map_offd_Pi
367     * and reompute indices if needed */
368 
369    /* insert remaining weights */
370    for (p=0; p < num_passes-1; p++)
371    {
372       HYPRE_Int *Pi_diag_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(Pi[p]));
373       HYPRE_Int *Pi_offd_i = hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(Pi[p]));
374       HYPRE_Int *Pi_diag_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixDiag(Pi[p]));
375       HYPRE_Int *Pi_offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(Pi[p]));
376       HYPRE_Real *Pi_diag_data = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(Pi[p]));
377       HYPRE_Real *Pi_offd_data = hypre_CSRMatrixData(hypre_ParCSRMatrixOffd(Pi[p]));
378       j1 = 0;
379       for (i = pass_starts[p+1]; i < pass_starts[p+2]; i++)
380       {
381          i1 = pass_order[i];
382          i2 = Pi_diag_i[j1];
383          for (j = P_diag_i[i1]; j < P_diag_i[i1+1]; j++)
384          {
385             P_diag_j[j] = Pi_diag_j[i2];
386             P_diag_data[j] = Pi_diag_data[i2++];
387          }
388          i2 = Pi_offd_i[j1];
389          for (j = P_offd_i[i1]; j < P_offd_i[i1+1]; j++)
390          {
391             P_offd_j[j] = Pi_offd_j[i2];
392             P_offd_data[j] = Pi_offd_data[i2++];
393          }
394          j1++;
395       }
396    }
397    /* Note that col indices in P_offd_j probably not consistent,
398       this gets fixed after truncation */
399 
400    P = hypre_ParCSRMatrixCreate(comm,
401                                 hypre_ParCSRMatrixGlobalNumRows(A),
402                                 total_global_cpts,
403                                 hypre_ParCSRMatrixRowStarts(A),
404                                 num_cpts_global,
405                                 num_cols_offd_P,
406                                 P_diag_i[n_fine],
407                                 P_offd_i[n_fine]);
408    P_diag = hypre_ParCSRMatrixDiag(P);
409    hypre_CSRMatrixData(P_diag) = P_diag_data;
410    hypre_CSRMatrixI(P_diag) = P_diag_i;
411    hypre_CSRMatrixJ(P_diag) = P_diag_j;
412    P_offd = hypre_ParCSRMatrixOffd(P);
413    hypre_CSRMatrixData(P_offd) = P_offd_data;
414    hypre_CSRMatrixI(P_offd) = P_offd_i;
415    hypre_CSRMatrixJ(P_offd) = P_offd_j;
416 
417    /* Compress P, removing coefficients smaller than trunc_factor * Max */
418 
419    if (trunc_factor != 0.0 || P_max_elmts > 0)
420    {
421       hypre_BoomerAMGInterpTruncation(P, trunc_factor, P_max_elmts);
422       P_diag_data = hypre_CSRMatrixData(P_diag);
423       P_diag_i = hypre_CSRMatrixI(P_diag);
424       P_diag_j = hypre_CSRMatrixJ(P_diag);
425       P_offd_data = hypre_CSRMatrixData(P_offd);
426       P_offd_i = hypre_CSRMatrixI(P_offd);
427       P_offd_j = hypre_CSRMatrixJ(P_offd);
428    }
429 
430    num_cols_offd_P = 0;
431    P_offd_size = P_offd_i[n_fine];
432    if (P_offd_size)
433    {
434       HYPRE_BigInt *tmp_P_offd_j = hypre_CTAlloc(HYPRE_BigInt, P_offd_size, HYPRE_MEMORY_HOST);
435       HYPRE_BigInt *big_P_offd_j = hypre_CTAlloc(HYPRE_BigInt, P_offd_size, HYPRE_MEMORY_HOST);
436       for (p=0; p < num_passes-1; p++)
437       {
438          HYPRE_BigInt *col_map_offd_Pi = hypre_ParCSRMatrixColMapOffd(Pi[p]);
439          for (i = pass_starts[p+1]; i < pass_starts[p+2]; i++)
440          {
441             i1 = pass_order[i];
442             for (j = P_offd_i[i1]; j < P_offd_i[i1+1]; j++)
443             {
444                big_P_offd_j[j] = col_map_offd_Pi[P_offd_j[j]];
445             }
446          }
447       }
448 
449       for (i=0; i < P_offd_size; i++)
450       {
451          tmp_P_offd_j[i] = big_P_offd_j[i];
452       }
453 
454       hypre_BigQsort0(tmp_P_offd_j, 0, P_offd_size-1);
455 
456       num_cols_offd_P = 1;
457       for (i=0; i < P_offd_size-1; i++)
458       {
459           if (tmp_P_offd_j[i+1] > tmp_P_offd_j[i])
460           {
461              tmp_P_offd_j[num_cols_offd_P++] = tmp_P_offd_j[i+1];
462           }
463       }
464 
465       col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_P, HYPRE_MEMORY_HOST);
466 
467       for (i=0; i < num_cols_offd_P; i++)
468       {
469          col_map_offd_P[i] = tmp_P_offd_j[i];
470       }
471 
472       for (i=0; i < P_offd_size; i++)
473       {
474          P_offd_j[i] = hypre_BigBinarySearch(col_map_offd_P,
475                big_P_offd_j[i],
476                num_cols_offd_P);
477       }
478       hypre_TFree(tmp_P_offd_j, HYPRE_MEMORY_HOST);
479       hypre_TFree(big_P_offd_j, HYPRE_MEMORY_HOST);
480    }
481 
482    for (i=0; i < num_passes-1; i++)
483    {
484       hypre_ParCSRMatrixDestroy(Pi[i]);
485    }
486    hypre_TFree (Pi, HYPRE_MEMORY_HOST);
487    hypre_TFree (pass_marker, HYPRE_MEMORY_HOST);
488    hypre_TFree (pass_marker_offd, HYPRE_MEMORY_HOST);
489    hypre_TFree (pass_order, HYPRE_MEMORY_HOST);
490    hypre_TFree (pass_starts, HYPRE_MEMORY_HOST);
491    hypre_TFree (fine_to_coarse, HYPRE_MEMORY_HOST);
492    hypre_TFree (dof_func_offd, HYPRE_MEMORY_HOST);
493    hypre_TFree (row_sums, HYPRE_MEMORY_HOST);
494 
495    for (i=0; i < n_fine; i++)
496    {
497       if (CF_marker[i] == -3) CF_marker[i] = -1;
498    }
499 
500    hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
501    hypre_CSRMatrixNumCols(P_offd) = num_cols_offd_P;
502 
503    hypre_MatvecCommPkgCreate(P);
504 
505    *P_ptr = P;
506 
507    return hypre_error_flag;
508 
509 }
510 
511 
512 HYPRE_Int
hypre_GenerateMultipassPi(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * S,HYPRE_BigInt * c_pts_starts,HYPRE_Int * pass_order,HYPRE_Int * pass_marker,HYPRE_Int * pass_marker_offd,HYPRE_Int num_points,HYPRE_Int color,HYPRE_Real * row_sums,hypre_ParCSRMatrix ** P_ptr)513 hypre_GenerateMultipassPi( hypre_ParCSRMatrix  *A,
514                            hypre_ParCSRMatrix  *S,
515                            HYPRE_BigInt        *c_pts_starts,
516                            HYPRE_Int           *pass_order, /* array containing row numbers of rows in A and S to be considered */
517                            HYPRE_Int           *pass_marker,
518                            HYPRE_Int           *pass_marker_offd,
519                            HYPRE_Int            num_points,
520                            HYPRE_Int            color,
521                            HYPRE_Real          *row_sums,
522                            hypre_ParCSRMatrix **P_ptr )
523 {
524    MPI_Comm                comm = hypre_ParCSRMatrixComm(A);
525    hypre_ParCSRCommPkg    *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
526    hypre_ParCSRCommHandle *comm_handle;
527 
528    hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
529    HYPRE_Real      *A_diag_data = hypre_CSRMatrixData(A_diag);
530    HYPRE_Int       *A_diag_i = hypre_CSRMatrixI(A_diag);
531    HYPRE_Int       *A_diag_j = hypre_CSRMatrixJ(A_diag);
532    HYPRE_Int        n_fine = hypre_CSRMatrixNumRows(A_diag);
533 
534    hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
535    HYPRE_Real      *A_offd_data = hypre_CSRMatrixData(A_offd);
536    HYPRE_Int       *A_offd_i = hypre_CSRMatrixI(A_offd);
537    HYPRE_Int       *A_offd_j = hypre_CSRMatrixJ(A_offd);
538    HYPRE_Int        num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
539 
540    hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
541    HYPRE_Int       *S_diag_i = hypre_CSRMatrixI(S_diag);
542    HYPRE_Int       *S_diag_j = hypre_CSRMatrixJ(S_diag);
543 
544    hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
545    HYPRE_Int       *S_offd_i = hypre_CSRMatrixI(S_offd);
546    HYPRE_Int       *S_offd_j = hypre_CSRMatrixJ(S_offd);
547    HYPRE_BigInt    *col_map_offd_P = NULL;
548    HYPRE_Int        num_cols_offd_P;
549    HYPRE_Int        nnz_diag, nnz_offd;
550    HYPRE_Int        n_cpts, i, j, i1, j1, j2;
551    HYPRE_Int        startc, index;
552    HYPRE_Int        cpt, cnt_diag, cnt_offd;
553 
554    hypre_ParCSRMatrix *P;
555    hypre_CSRMatrix *P_diag;
556    HYPRE_Real      *P_diag_data;
557    HYPRE_Int       *P_diag_i; /*at first counter of nonzero cols for each row,
558                                       finally will be pointer to start of row */
559    HYPRE_Int       *P_diag_j;
560 
561    hypre_CSRMatrix *P_offd;
562    HYPRE_Real      *P_offd_data = NULL;
563    HYPRE_Int       *P_offd_i; /*at first counter of nonzero cols for each row,
564                                       finally will be pointer to start of row */
565    HYPRE_Int       *P_offd_j = NULL;
566    HYPRE_Int       *fine_to_coarse;
567    HYPRE_Int       *fine_to_coarse_offd = NULL;
568    HYPRE_BigInt    *f_pts_starts = NULL;
569    HYPRE_Int        my_id, num_procs;
570    HYPRE_BigInt     total_global_fpts;
571    HYPRE_BigInt     total_global_cpts;
572    HYPRE_BigInt    *big_convert;
573    HYPRE_BigInt    *big_convert_offd = NULL;
574    HYPRE_BigInt    *big_buf_data = NULL;
575    HYPRE_Int        num_sends;
576    HYPRE_Real      *row_sum_C;
577 
578    /* MPI size and rank*/
579    hypre_MPI_Comm_size(comm, &num_procs);
580    hypre_MPI_Comm_rank(comm, &my_id);
581 
582    /* define P matrices */
583 
584    P_diag_i = hypre_CTAlloc(HYPRE_Int, num_points+1, HYPRE_MEMORY_HOST);
585    P_offd_i = hypre_CTAlloc(HYPRE_Int, num_points+1, HYPRE_MEMORY_HOST);
586    fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
587 
588    /* fill P */
589 
590    n_cpts = 0;
591    for (i=0; i < n_fine; i++)
592    {
593       if (pass_marker[i] == color)
594       {
595          fine_to_coarse[i] = n_cpts++;
596       }
597       else
598       {
599          fine_to_coarse[i] = -1;
600       }
601    }
602 
603    if (num_procs > 1)
604    {
605       HYPRE_BigInt big_Fpts;
606       big_Fpts = num_points;
607 
608       f_pts_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
609       hypre_MPI_Scan(&big_Fpts, f_pts_starts+1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
610       f_pts_starts[0] = f_pts_starts[1] - big_Fpts;
611       if (my_id == num_procs - 1)
612       {
613          total_global_fpts = f_pts_starts[1];
614          total_global_cpts = c_pts_starts[1];
615       }
616       hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
617       hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
618    }
619    else
620    {
621       f_pts_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
622       f_pts_starts[0] = 0;
623       f_pts_starts[1] = num_points;
624       total_global_fpts = f_pts_starts[1];
625       total_global_cpts = c_pts_starts[1];
626    }
627 
628    {
629       big_convert = hypre_CTAlloc(HYPRE_BigInt, n_fine, HYPRE_MEMORY_HOST);
630       for (i=0; i < n_fine; i++)
631       {
632          if (pass_marker[i] == color)
633          {
634             big_convert[i] = (HYPRE_BigInt)fine_to_coarse[i] + c_pts_starts[0];
635          }
636       }
637 
638       num_cols_offd_P = 0;
639       if (num_procs > 1)
640       {
641          big_convert_offd = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_A, HYPRE_MEMORY_HOST);
642          fine_to_coarse_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A, HYPRE_MEMORY_HOST);
643          index = 0;
644          num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
645          big_buf_data = hypre_CTAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
646          for (i = 0; i < num_sends; i++)
647          {
648             startc = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
649             for (j = startc; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
650             {
651                big_buf_data[index++] = big_convert[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
652             }
653          }
654 
655          comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg, big_buf_data, big_convert_offd);
656 
657          hypre_ParCSRCommHandleDestroy(comm_handle);
658 
659          num_cols_offd_P = 0;
660          for (i=0; i < num_cols_offd_A; i++)
661          {
662             if (pass_marker_offd[i] == color)
663             {
664                fine_to_coarse_offd[i] = num_cols_offd_P++;
665             }
666          }
667 
668          col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_P, HYPRE_MEMORY_HOST);
669 
670          cpt = 0;
671          for (i=0; i < num_cols_offd_A; i++)
672          {
673             if (pass_marker_offd[i] == color)
674             {
675                col_map_offd_P[cpt++] = big_convert_offd[i];
676             }
677          }
678       }
679    }
680 
681    /* generate P_diag_i and P_offd_i */
682    nnz_diag = 0;
683    nnz_offd = 0;
684    for (i=0; i < num_points; i++)
685    {
686       i1 = pass_order[i];
687       for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
688       {
689          j1 = S_diag_j[j];
690          if (pass_marker[j1] == color)
691          {
692              P_diag_i[i+1]++;
693              nnz_diag++;
694          }
695       }
696       for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
697       {
698          j1 = S_offd_j[j];
699          if (pass_marker_offd[j1] == color)
700          {
701              P_offd_i[i+1]++;
702              nnz_offd++;
703          }
704       }
705    }
706 
707    for (i=1; i < num_points+1; i++)
708    {
709       P_diag_i[i] += P_diag_i[i-1];
710       P_offd_i[i] += P_offd_i[i-1];
711    }
712 
713    P_diag_j = hypre_CTAlloc(HYPRE_Int, nnz_diag, HYPRE_MEMORY_HOST);
714    P_diag_data = hypre_CTAlloc(HYPRE_Real, nnz_diag, HYPRE_MEMORY_HOST);
715    P_offd_j = hypre_CTAlloc(HYPRE_Int, nnz_offd, HYPRE_MEMORY_HOST);
716    P_offd_data = hypre_CTAlloc(HYPRE_Real, nnz_offd, HYPRE_MEMORY_HOST);
717 
718    cnt_diag = 0;
719    cnt_offd = 0;
720    for (i=0; i < num_points; i++)
721    {
722       i1 = pass_order[i];
723       j2 = A_diag_i[i1];
724       for (j = S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
725       {
726          j1 = S_diag_j[j];
727          while (A_diag_j[j2] != j1) j2++;
728          if (pass_marker[j1] == color && A_diag_j[j2] == j1)
729          {
730             P_diag_j[cnt_diag] = fine_to_coarse[j1];
731             P_diag_data[cnt_diag++] = A_diag_data[j2];
732          }
733       }
734       j2 = A_offd_i[i1];
735       for (j = S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
736       {
737          j1 = S_offd_j[j];
738          while (A_offd_j[j2] != j1) j2++;
739          if (pass_marker_offd[j1] == color && A_offd_j[j2] == j1)
740          {
741             P_offd_j[cnt_offd] = fine_to_coarse_offd[j1];
742             P_offd_data[cnt_offd++] = A_offd_data[j2];
743          }
744       }
745    }
746 
747    //row_sums = hypre_CTAlloc(HYPRE_Real, num_points, HYPRE_MEMORY_HOST);
748    row_sum_C = hypre_CTAlloc(HYPRE_Real, num_points, HYPRE_MEMORY_HOST);
749    for (i=0; i < num_points; i++)
750    {
751       HYPRE_Real diagonal, value;
752       i1 = pass_order[i];
753       diagonal = A_diag_data[A_diag_i[i1]];
754       /*for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++)
755       {
756          row_sums[i] += A_diag_data[j];
757       }
758       for (j=A_offd_i[i1]; j < A_offd_i[i1+1]; j++)
759       {
760          row_sums[i] += A_offd_data[j];
761       }*/
762       for (j = P_diag_i[i]; j < P_diag_i[i+1]; j++)
763       {
764           row_sum_C[i] += P_diag_data[j];
765       }
766       for (j = P_offd_i[i]; j < P_offd_i[i+1]; j++)
767       {
768            row_sum_C[i] += P_offd_data[j];
769       }
770       value = row_sum_C[i]*diagonal;
771       if (value != 0)
772       {
773          row_sums[i1] /= value;
774       }
775       for (j = P_diag_i[i]; j < P_diag_i[i+1]; j++)
776       {
777          P_diag_data[j] = -P_diag_data[j]*row_sums[i1];
778       }
779       for (j = P_offd_i[i]; j < P_offd_i[i+1]; j++)
780       {
781          P_offd_data[j] = -P_offd_data[j]*row_sums[i1];
782       }
783    }
784 
785 
786    P = hypre_ParCSRMatrixCreate(comm,
787                                 total_global_fpts,
788                                 total_global_cpts,
789                                 f_pts_starts,
790                                 c_pts_starts,
791                                 num_cols_offd_P,
792                                 P_diag_i[num_points],
793                                 P_offd_i[num_points]);
794 
795    P_diag = hypre_ParCSRMatrixDiag(P);
796    hypre_CSRMatrixData(P_diag) = P_diag_data;
797    hypre_CSRMatrixI(P_diag) = P_diag_i;
798    hypre_CSRMatrixJ(P_diag) = P_diag_j;
799    P_offd = hypre_ParCSRMatrixOffd(P);
800    hypre_CSRMatrixData(P_offd) = P_offd_data;
801    hypre_CSRMatrixI(P_offd) = P_offd_i;
802    hypre_CSRMatrixJ(P_offd) = P_offd_j;
803    hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
804 
805    hypre_CSRMatrixMemoryLocation(P_diag) = HYPRE_MEMORY_HOST;
806    hypre_CSRMatrixMemoryLocation(P_offd) = HYPRE_MEMORY_HOST;
807 
808    /* free stuff */
809    hypre_TFree(fine_to_coarse, HYPRE_MEMORY_HOST);
810    hypre_TFree(fine_to_coarse_offd, HYPRE_MEMORY_HOST);
811    //hypre_TFree(row_sums, HYPRE_MEMORY_HOST);
812    hypre_TFree(row_sum_C, HYPRE_MEMORY_HOST);
813    hypre_TFree(big_convert, HYPRE_MEMORY_HOST);
814    hypre_TFree(big_convert_offd, HYPRE_MEMORY_HOST);
815    hypre_TFree(big_buf_data, HYPRE_MEMORY_HOST);
816    hypre_TFree(f_pts_starts, HYPRE_MEMORY_HOST);
817 
818    hypre_MatvecCommPkgCreate(P);
819    *P_ptr = P;
820 
821    return hypre_error_flag;
822 }
823 
824 HYPRE_Int
hypre_GenerateMultiPi(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * S,hypre_ParCSRMatrix * P,HYPRE_BigInt * c_pts_starts,HYPRE_Int * pass_order,HYPRE_Int * pass_marker,HYPRE_Int * pass_marker_offd,HYPRE_Int num_points,HYPRE_Int color,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int * dof_func_offd,hypre_ParCSRMatrix ** Pi_ptr)825 hypre_GenerateMultiPi( hypre_ParCSRMatrix  *A,
826                        hypre_ParCSRMatrix  *S,
827                        hypre_ParCSRMatrix  *P,
828                        HYPRE_BigInt        *c_pts_starts,
829                        HYPRE_Int           *pass_order, /* array containing row numbers of rows in A and S to be considered */
830                        HYPRE_Int           *pass_marker,
831                        HYPRE_Int           *pass_marker_offd,
832                        HYPRE_Int            num_points,
833                        HYPRE_Int            color,
834                        HYPRE_Int            num_functions,
835                        HYPRE_Int           *dof_func,
836                        HYPRE_Int           *dof_func_offd,
837                        hypre_ParCSRMatrix **Pi_ptr )
838 {
839    MPI_Comm                comm = hypre_ParCSRMatrixComm(A);
840    hypre_ParCSRCommPkg    *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
841    hypre_ParCSRCommHandle *comm_handle;
842 
843    hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
844    HYPRE_Real      *A_diag_data = hypre_CSRMatrixData(A_diag);
845    HYPRE_Int       *A_diag_i = hypre_CSRMatrixI(A_diag);
846    HYPRE_Int       *A_diag_j = hypre_CSRMatrixJ(A_diag);
847    HYPRE_Int        n_fine = hypre_CSRMatrixNumRows(A_diag);
848 
849    hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
850    HYPRE_Real      *A_offd_data = hypre_CSRMatrixData(A_offd);
851    HYPRE_Int       *A_offd_i = hypre_CSRMatrixI(A_offd);
852    HYPRE_Int       *A_offd_j = hypre_CSRMatrixJ(A_offd);
853    HYPRE_Int        num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
854 
855    hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
856    HYPRE_Int       *S_diag_i = hypre_CSRMatrixI(S_diag);
857    HYPRE_Int       *S_diag_j = hypre_CSRMatrixJ(S_diag);
858 
859    hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
860    HYPRE_Int       *S_offd_i = hypre_CSRMatrixI(S_offd);
861    HYPRE_Int       *S_offd_j = hypre_CSRMatrixJ(S_offd);
862    HYPRE_BigInt    *col_map_offd_Q = NULL;
863    HYPRE_Int        num_cols_offd_Q;
864 
865    hypre_ParCSRMatrix *Pi;
866    hypre_CSRMatrix *Pi_diag;
867    HYPRE_Int       *Pi_diag_i;
868    HYPRE_Real      *Pi_diag_data;
869 
870    hypre_CSRMatrix *Pi_offd;
871    HYPRE_Int       *Pi_offd_i;
872    HYPRE_Real      *Pi_offd_data;
873 
874    HYPRE_Int        nnz_diag, nnz_offd;
875    HYPRE_Int        n_cpts, i, j, i1, j1, j2;
876    HYPRE_Int        startc, index;
877    HYPRE_Int        cpt, cnt_diag, cnt_offd;
878 
879    hypre_ParCSRMatrix *Q;
880    hypre_CSRMatrix *Q_diag;
881    HYPRE_Real      *Q_diag_data;
882    HYPRE_Int       *Q_diag_i; /*at first counter of nonzero cols for each row,
883                                       finally will be pointer to start of row */
884    HYPRE_Int       *Q_diag_j;
885 
886    hypre_CSRMatrix *Q_offd;
887    HYPRE_Real      *Q_offd_data = NULL;
888    HYPRE_Int       *Q_offd_i; /*at first counter of nonzero cols for each row,
889                                       finally will be pointer to start of row */
890    HYPRE_Int       *Q_offd_j = NULL;
891    HYPRE_Int       *fine_to_coarse;
892    HYPRE_Int       *fine_to_coarse_offd = NULL;
893    HYPRE_BigInt    *f_pts_starts = NULL;
894    HYPRE_Int        my_id, num_procs;
895    HYPRE_BigInt     total_global_fpts;
896    HYPRE_BigInt     total_global_cpts;
897    HYPRE_BigInt    *big_convert;
898    HYPRE_BigInt    *big_convert_offd = NULL;
899    HYPRE_BigInt    *big_buf_data = NULL;
900    HYPRE_Int        num_sends;
901    //HYPRE_Real      *row_sums;
902    HYPRE_Real      *row_sums_C;
903    HYPRE_Real      *w_row_sum;
904 
905    /* MPI size and rank*/
906    hypre_MPI_Comm_size(comm, &num_procs);
907    hypre_MPI_Comm_rank(comm, &my_id);
908 
909    /* define P matrices */
910 
911    Q_diag_i = hypre_CTAlloc(HYPRE_Int, num_points+1, HYPRE_MEMORY_HOST);
912    Q_offd_i = hypre_CTAlloc(HYPRE_Int, num_points+1, HYPRE_MEMORY_HOST);
913    fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
914 
915    /* fill P */
916 
917    n_cpts = 0;
918    for (i=0; i < n_fine; i++)
919    {
920       if (pass_marker[i] == color)
921       {
922          fine_to_coarse[i] = n_cpts++;
923       }
924       else
925       {
926          fine_to_coarse[i] = -1;
927       }
928    }
929 
930    if (num_procs > 1)
931    {
932       HYPRE_BigInt big_Fpts;
933       big_Fpts = num_points;
934 
935       f_pts_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
936       hypre_MPI_Scan(&big_Fpts, f_pts_starts+1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
937       f_pts_starts[0] = f_pts_starts[1] - big_Fpts;
938       if (my_id == num_procs - 1)
939       {
940          total_global_fpts = f_pts_starts[1];
941          total_global_cpts = c_pts_starts[1];
942       }
943       hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
944       hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
945    }
946    else
947    {
948       f_pts_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
949       f_pts_starts[0] = 0;
950       f_pts_starts[1] = num_points;
951       total_global_fpts = f_pts_starts[1];
952       total_global_cpts = c_pts_starts[1];
953    }
954 
955    {
956       big_convert = hypre_CTAlloc(HYPRE_BigInt, n_fine, HYPRE_MEMORY_HOST);
957       for (i=0; i < n_fine; i++)
958       {
959          if (pass_marker[i] == color)
960          {
961             big_convert[i] = (HYPRE_BigInt)fine_to_coarse[i] + c_pts_starts[0];
962          }
963       }
964 
965       num_cols_offd_Q = 0;
966       if (num_procs > 1)
967       {
968          big_convert_offd = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_A, HYPRE_MEMORY_HOST);
969          fine_to_coarse_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A, HYPRE_MEMORY_HOST);
970          index = 0;
971          num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
972          big_buf_data = hypre_CTAlloc(HYPRE_BigInt,  hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
973          for (i = 0; i < num_sends; i++)
974          {
975             startc = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
976             for (j = startc; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
977             {
978                big_buf_data[index++] = big_convert[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
979             }
980          }
981 
982          comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg, big_buf_data, big_convert_offd);
983 
984          hypre_ParCSRCommHandleDestroy(comm_handle);
985 
986          num_cols_offd_Q = 0;
987          for (i=0; i < num_cols_offd_A; i++)
988          {
989             if (pass_marker_offd[i] == color)
990             {
991                fine_to_coarse_offd[i] = num_cols_offd_Q++;
992             }
993          }
994 
995          col_map_offd_Q = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_Q, HYPRE_MEMORY_HOST);
996 
997          cpt = 0;
998          for (i=0; i < num_cols_offd_A; i++)
999          {
1000             if (pass_marker_offd[i] == color)
1001             {
1002                col_map_offd_Q[cpt++] = big_convert_offd[i];
1003             }
1004          }
1005       }
1006    }
1007 
1008    /* generate Q_diag_i and Q_offd_i */
1009    nnz_diag = 0;
1010    nnz_offd = 0;
1011    for (i=0; i < num_points; i++)
1012    {
1013       i1 = pass_order[i];
1014       for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
1015       {
1016          j1 = S_diag_j[j];
1017          if (pass_marker[j1] == color)
1018          {
1019              Q_diag_i[i+1]++;
1020              nnz_diag++;
1021          }
1022       }
1023       for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
1024       {
1025          j1 = S_offd_j[j];
1026          if (pass_marker_offd[j1] == color)
1027          {
1028              Q_offd_i[i+1]++;
1029              nnz_offd++;
1030          }
1031       }
1032    }
1033 
1034    for (i=1; i < num_points+1; i++)
1035    {
1036       Q_diag_i[i] += Q_diag_i[i-1];
1037       Q_offd_i[i] += Q_offd_i[i-1];
1038    }
1039 
1040    Q_diag_j = hypre_CTAlloc(HYPRE_Int, nnz_diag, HYPRE_MEMORY_HOST);
1041    Q_diag_data = hypre_CTAlloc(HYPRE_Real, nnz_diag, HYPRE_MEMORY_HOST);
1042    Q_offd_j = hypre_CTAlloc(HYPRE_Int, nnz_offd, HYPRE_MEMORY_HOST);
1043    Q_offd_data = hypre_CTAlloc(HYPRE_Real, nnz_offd, HYPRE_MEMORY_HOST);
1044    w_row_sum = hypre_CTAlloc(HYPRE_Real, num_points, HYPRE_MEMORY_HOST);
1045 
1046    cnt_diag = 0;
1047    cnt_offd = 0;
1048    if (num_functions > 1)
1049    {
1050       for (i=0; i < num_points; i++)
1051       {
1052          i1 = pass_order[i];
1053          j2 = A_diag_i[i1]+1;
1054          //if (w_row_minus) w_row_sum[i] = -w_row_minus[i1];
1055          for (j = S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
1056          {
1057             j1 = S_diag_j[j];
1058             while (A_diag_j[j2] != j1)
1059             {
1060                if (dof_func[i1] == dof_func[A_diag_j[j2]])
1061                {
1062                   w_row_sum[i] += A_diag_data[j2];
1063                }
1064                j2++;
1065             }
1066             if (pass_marker[j1] == color && A_diag_j[j2] == j1)
1067             {
1068                Q_diag_j[cnt_diag] = fine_to_coarse[j1];
1069                Q_diag_data[cnt_diag++] = A_diag_data[j2++];
1070             }
1071             else
1072             {
1073                if (dof_func[i1] == dof_func[A_diag_j[j2]])
1074                {
1075                   w_row_sum[i] += A_diag_data[j2];
1076                }
1077                j2++;
1078             }
1079          }
1080          while (j2 < A_diag_i[i1+1])
1081          {
1082             if (dof_func[i1] == dof_func[A_diag_j[j2]])
1083             {
1084                w_row_sum[i] += A_diag_data[j2];
1085             }
1086             j2++;
1087          }
1088          j2 = A_offd_i[i1];
1089          for (j = S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
1090          {
1091             j1 = S_offd_j[j];
1092             while (A_offd_j[j2] != j1)
1093             {
1094                if (dof_func[i1] == dof_func_offd[A_offd_j[j2]])
1095                {
1096                   w_row_sum[i] += A_offd_data[j2];
1097                }
1098                j2++;
1099             }
1100             if (pass_marker_offd[j1] == color && A_offd_j[j2] == j1)
1101             {
1102                Q_offd_j[cnt_offd] = fine_to_coarse_offd[j1];
1103                Q_offd_data[cnt_offd++] = A_offd_data[j2++];
1104             }
1105             else
1106             {
1107                if (dof_func[i1] == dof_func_offd[A_offd_j[j2]])
1108                {
1109                   w_row_sum[i] += A_offd_data[j2];
1110                }
1111                j2++;
1112             }
1113          }
1114          while (j2 < A_offd_i[i1+1])
1115          {
1116             if (dof_func[i1] == dof_func_offd[A_offd_j[j2]])
1117             {
1118                w_row_sum[i] += A_offd_data[j2];
1119             }
1120             j2++;
1121          }
1122       }
1123    }
1124    else
1125    {
1126       for (i=0; i < num_points; i++)
1127       {
1128          i1 = pass_order[i];
1129          j2 = A_diag_i[i1]+1;
1130          for (j = S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
1131          {
1132             j1 = S_diag_j[j];
1133             while (A_diag_j[j2] != j1)
1134             {
1135                w_row_sum[i] += A_diag_data[j2];
1136                j2++;
1137             }
1138             if (pass_marker[j1] == color && A_diag_j[j2] == j1)
1139             {
1140                Q_diag_j[cnt_diag] = fine_to_coarse[j1];
1141                Q_diag_data[cnt_diag++] = A_diag_data[j2++];
1142             }
1143             else
1144             {
1145                w_row_sum[i] += A_diag_data[j2];
1146                j2++;
1147             }
1148          }
1149          while (j2 < A_diag_i[i1+1])
1150          {
1151             w_row_sum[i] += A_diag_data[j2];
1152             j2++;
1153          }
1154          j2 = A_offd_i[i1];
1155          for (j = S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
1156          {
1157             j1 = S_offd_j[j];
1158             while (A_offd_j[j2] != j1)
1159             {
1160                w_row_sum[i] += A_offd_data[j2];
1161                j2++;
1162             }
1163             if (pass_marker_offd[j1] == color && A_offd_j[j2] == j1)
1164             {
1165                Q_offd_j[cnt_offd] = fine_to_coarse_offd[j1];
1166                Q_offd_data[cnt_offd++] = A_offd_data[j2++];
1167             }
1168             else
1169             {
1170                w_row_sum[i] += A_offd_data[j2];
1171                j2++;
1172             }
1173          }
1174          while (j2 < A_offd_i[i1+1])
1175          {
1176             w_row_sum[i] += A_offd_data[j2];
1177             j2++;
1178          }
1179       }
1180    }
1181 
1182    Q = hypre_ParCSRMatrixCreate(comm,
1183                                 total_global_fpts,
1184                                 total_global_cpts,
1185                                 f_pts_starts,
1186                                 c_pts_starts,
1187                                 num_cols_offd_Q,
1188                                 Q_diag_i[num_points],
1189                                 Q_offd_i[num_points]);
1190 
1191    Q_diag = hypre_ParCSRMatrixDiag(Q);
1192    hypre_CSRMatrixData(Q_diag) = Q_diag_data;
1193    hypre_CSRMatrixI(Q_diag) = Q_diag_i;
1194    hypre_CSRMatrixJ(Q_diag) = Q_diag_j;
1195    Q_offd = hypre_ParCSRMatrixOffd(Q);
1196    hypre_CSRMatrixData(Q_offd) = Q_offd_data;
1197    hypre_CSRMatrixI(Q_offd) = Q_offd_i;
1198    hypre_CSRMatrixJ(Q_offd) = Q_offd_j;
1199    hypre_ParCSRMatrixColMapOffd(Q) = col_map_offd_Q;
1200 
1201    hypre_CSRMatrixMemoryLocation(Q_diag) = HYPRE_MEMORY_HOST;
1202    hypre_CSRMatrixMemoryLocation(Q_offd) = HYPRE_MEMORY_HOST;
1203 
1204    /* free stuff */
1205    hypre_TFree(fine_to_coarse, HYPRE_MEMORY_HOST);
1206    hypre_TFree(fine_to_coarse_offd, HYPRE_MEMORY_HOST);
1207    hypre_TFree(big_convert, HYPRE_MEMORY_HOST);
1208    hypre_TFree(big_convert_offd, HYPRE_MEMORY_HOST);
1209    hypre_TFree(big_buf_data, HYPRE_MEMORY_HOST);
1210 
1211    hypre_MatvecCommPkgCreate(Q);
1212 
1213    Pi = hypre_ParMatmul(Q, P);
1214 
1215    Pi_diag = hypre_ParCSRMatrixDiag(Pi);
1216    Pi_diag_data = hypre_CSRMatrixData(Pi_diag);
1217    Pi_diag_i = hypre_CSRMatrixI(Pi_diag);
1218    Pi_offd = hypre_ParCSRMatrixOffd(Pi);
1219    Pi_offd_data = hypre_CSRMatrixData(Pi_offd);
1220    Pi_offd_i = hypre_CSRMatrixI(Pi_offd);
1221 
1222    row_sums_C = hypre_CTAlloc(HYPRE_Real, num_points, HYPRE_MEMORY_HOST);
1223    for (i=0; i < num_points; i++)
1224    {
1225       HYPRE_Real diagonal, value;
1226       i1 = pass_order[i];
1227       diagonal = A_diag_data[A_diag_i[i1]];
1228       for (j=Pi_diag_i[i]; j < Pi_diag_i[i+1]; j++)
1229       {
1230          row_sums_C[i] += Pi_diag_data[j];
1231       }
1232       for (j=Pi_offd_i[i]; j < Pi_offd_i[i+1]; j++)
1233       {
1234          row_sums_C[i] += Pi_offd_data[j];
1235       }
1236       value = row_sums_C[i]*diagonal;
1237       row_sums_C[i] += w_row_sum[i];
1238       if (value != 0)
1239       {
1240          row_sums_C[i] /= value;
1241       }
1242       for (j = Pi_diag_i[i]; j < Pi_diag_i[i+1]; j++)
1243       {
1244          Pi_diag_data[j] = -Pi_diag_data[j]*row_sums_C[i];
1245       }
1246       for (j = Pi_offd_i[i]; j < Pi_offd_i[i+1]; j++)
1247       {
1248          Pi_offd_data[j] = -Pi_offd_data[j]*row_sums_C[i];
1249       }
1250    }
1251 
1252    hypre_ParCSRMatrixDestroy(Q);
1253    //hypre_TFree(row_sums, HYPRE_MEMORY_HOST);
1254    hypre_TFree(row_sums_C, HYPRE_MEMORY_HOST);
1255    hypre_TFree(w_row_sum, HYPRE_MEMORY_HOST);
1256    hypre_TFree(f_pts_starts, HYPRE_MEMORY_HOST);
1257 
1258    *Pi_ptr = Pi;
1259 
1260    return hypre_error_flag;
1261 }
1262 
1263 
1264 HYPRE_Int
hypre_BoomerAMGBuildModMultipass(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Real trunc_factor,HYPRE_Int P_max_elmts,HYPRE_Int interp_type,HYPRE_Int num_functions,HYPRE_Int * dof_func,hypre_ParCSRMatrix ** P_ptr)1265 hypre_BoomerAMGBuildModMultipass( hypre_ParCSRMatrix  *A,
1266                                   HYPRE_Int           *CF_marker,
1267                                   hypre_ParCSRMatrix  *S,
1268                                   HYPRE_BigInt        *num_cpts_global,
1269                                   HYPRE_Real           trunc_factor,
1270                                   HYPRE_Int            P_max_elmts,
1271                                   HYPRE_Int            interp_type,
1272                                   HYPRE_Int            num_functions,
1273                                   HYPRE_Int           *dof_func,
1274                                   hypre_ParCSRMatrix **P_ptr )
1275 {
1276 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1277    hypre_GpuProfilingPushRange("ModMultipass");
1278 #endif
1279 
1280    HYPRE_Int ierr = 0;
1281 
1282 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1283    HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy2( hypre_ParCSRMatrixMemoryLocation(A),
1284                                                       hypre_ParCSRMatrixMemoryLocation(S) );
1285    if (exec == HYPRE_EXEC_DEVICE)
1286    {
1287       ierr = hypre_BoomerAMGBuildModMultipassDevice( A, CF_marker, S, num_cpts_global,
1288                                                      trunc_factor, P_max_elmts,
1289                                                      interp_type, num_functions,
1290                                                      dof_func, P_ptr);
1291    }
1292    else
1293 #endif
1294    {
1295       ierr = hypre_BoomerAMGBuildModMultipassHost( A, CF_marker, S, num_cpts_global,
1296                                                    trunc_factor, P_max_elmts,
1297                                                    interp_type, num_functions,
1298                                                    dof_func, P_ptr);
1299    }
1300 
1301 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1302    hypre_GpuProfilingPopRange();
1303 #endif
1304 
1305    return ierr;
1306 }
1307 
1308