1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 #include "_hypre_parcsr_ls.h"
9 #include "float.h"
10 #include "ams.h"
11 #include "ads.h"
12 #include "_hypre_utilities.hpp"
13 
14 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
15 __global__ void hypreCUDAKernel_AMSComputePi_copy1(HYPRE_Int nnz, HYPRE_Int dim, HYPRE_Int *j_in, HYPRE_Int *j_out);
16 __global__ void hypreCUDAKernel_AMSComputePi_copy2(HYPRE_Int nrows, HYPRE_Int dim, HYPRE_Int *i_in, HYPRE_Real *data_in, HYPRE_Real *Gx_data, HYPRE_Real *Gy_data, HYPRE_Real *Gz_data, HYPRE_Real *data_out);
17 __global__ void hypreCUDAKernel_AMSComputePixyz_copy(HYPRE_Int nrows, HYPRE_Int dim, HYPRE_Int *i_in, HYPRE_Real *data_in, HYPRE_Real *Gx_data, HYPRE_Real *Gy_data, HYPRE_Real *Gz_data, HYPRE_Real *data_x_out, HYPRE_Real *data_y_out, HYPRE_Real *data_z_out );
18 #endif
19 
20 /*--------------------------------------------------------------------------
21  * hypre_ADSCreate
22  *
23  * Allocate the ADS solver structure.
24  *--------------------------------------------------------------------------*/
25 
hypre_ADSCreate()26 void * hypre_ADSCreate()
27 {
28    hypre_ADSData *ads_data;
29 
30    ads_data = hypre_CTAlloc(hypre_ADSData, 1, HYPRE_MEMORY_HOST);
31 
32    /* Default parameters */
33 
34    ads_data -> maxit = 20;             /* perform at most 20 iterations */
35    ads_data -> tol = 1e-6;             /* convergence tolerance */
36    ads_data -> print_level = 1;        /* print residual norm at each step */
37    ads_data -> cycle_type = 1;         /* a 3-level multiplicative solver */
38    ads_data -> A_relax_type = 2;       /* offd-l1-scaled GS */
39    ads_data -> A_relax_times = 1;      /* one relaxation sweep */
40    ads_data -> A_relax_weight = 1.0;   /* damping parameter */
41    ads_data -> A_omega = 1.0;          /* SSOR coefficient */
42    ads_data -> A_cheby_order = 2;      /* Cheby: order (1 -4 are vaild) */
43    ads_data -> A_cheby_fraction = 0.3; /* Cheby: fraction of spectrum to smooth */
44 
45    ads_data -> B_C_cycle_type = 11;    /* a 5-level multiplicative solver */
46    ads_data -> B_C_coarsen_type = 10;  /* HMIS coarsening */
47    ads_data -> B_C_agg_levels = 1;     /* Levels of aggressive coarsening */
48    ads_data -> B_C_relax_type = 3;     /* hybrid G-S/Jacobi */
49    ads_data -> B_C_theta = 0.25;       /* strength threshold */
50    ads_data -> B_C_interp_type = 0;    /* interpolation type */
51    ads_data -> B_C_Pmax = 0;           /* max nonzero elements in interp. rows */
52    ads_data -> B_Pi_coarsen_type = 10; /* HMIS coarsening */
53    ads_data -> B_Pi_agg_levels = 1;    /* Levels of aggressive coarsening */
54    ads_data -> B_Pi_relax_type = 3;    /* hybrid G-S/Jacobi */
55    ads_data -> B_Pi_theta = 0.25;      /* strength threshold */
56    ads_data -> B_Pi_interp_type = 0;   /* interpolation type */
57    ads_data -> B_Pi_Pmax = 0;          /* max nonzero elements in interp. rows */
58 
59    /* The rest of the fields are initialized using the Set functions */
60 
61    ads_data -> A     = NULL;
62    ads_data -> C     = NULL;
63    ads_data -> A_C   = NULL;
64    ads_data -> B_C   = 0;
65    ads_data -> Pi    = NULL;
66    ads_data -> A_Pi  = NULL;
67    ads_data -> B_Pi  = 0;
68    ads_data -> Pix    = NULL;
69    ads_data -> Piy    = NULL;
70    ads_data -> Piz    = NULL;
71    ads_data -> A_Pix  = NULL;
72    ads_data -> A_Piy  = NULL;
73    ads_data -> A_Piz  = NULL;
74    ads_data -> B_Pix  = 0;
75    ads_data -> B_Piy  = 0;
76    ads_data -> B_Piz  = 0;
77    ads_data -> G     = NULL;
78    ads_data -> x     = NULL;
79    ads_data -> y     = NULL;
80    ads_data -> z     = NULL;
81    ads_data -> zz  = NULL;
82 
83    ads_data -> r0  = NULL;
84    ads_data -> g0  = NULL;
85    ads_data -> r1  = NULL;
86    ads_data -> g1  = NULL;
87    ads_data -> r2  = NULL;
88    ads_data -> g2  = NULL;
89 
90    ads_data -> A_l1_norms = NULL;
91    ads_data -> A_max_eig_est = 0;
92    ads_data -> A_min_eig_est = 0;
93 
94    ads_data -> owns_Pi = 1;
95    ads_data -> ND_Pi   = NULL;
96    ads_data -> ND_Pix  = NULL;
97    ads_data -> ND_Piy  = NULL;
98    ads_data -> ND_Piz  = NULL;
99 
100    return (void *) ads_data;
101 }
102 
103 /*--------------------------------------------------------------------------
104  * hypre_ADSDestroy
105  *
106  * Deallocate the ADS solver structure. Note that the input data (given
107  * through the Set functions) is not destroyed.
108  *--------------------------------------------------------------------------*/
109 
hypre_ADSDestroy(void * solver)110 HYPRE_Int hypre_ADSDestroy(void *solver)
111 {
112    hypre_ADSData *ads_data = (hypre_ADSData *) solver;
113 
114    if (!ads_data)
115    {
116       hypre_error_in_arg(1);
117       return hypre_error_flag;
118    }
119 
120    if (ads_data -> A_C)
121       hypre_ParCSRMatrixDestroy(ads_data -> A_C);
122    if (ads_data -> B_C)
123       HYPRE_AMSDestroy(ads_data -> B_C);
124 
125    if (ads_data -> owns_Pi && ads_data -> Pi)
126       hypre_ParCSRMatrixDestroy(ads_data -> Pi);
127    if (ads_data -> A_Pi)
128       hypre_ParCSRMatrixDestroy(ads_data -> A_Pi);
129    if (ads_data -> B_Pi)
130       HYPRE_BoomerAMGDestroy(ads_data -> B_Pi);
131 
132    if (ads_data -> owns_Pi && ads_data -> Pix)
133       hypre_ParCSRMatrixDestroy(ads_data -> Pix);
134    if (ads_data -> A_Pix)
135       hypre_ParCSRMatrixDestroy(ads_data -> A_Pix);
136    if (ads_data -> B_Pix)
137       HYPRE_BoomerAMGDestroy(ads_data -> B_Pix);
138    if (ads_data -> owns_Pi && ads_data -> Piy)
139       hypre_ParCSRMatrixDestroy(ads_data -> Piy);
140    if (ads_data -> A_Piy)
141       hypre_ParCSRMatrixDestroy(ads_data -> A_Piy);
142    if (ads_data -> B_Piy)
143       HYPRE_BoomerAMGDestroy(ads_data -> B_Piy);
144    if (ads_data -> owns_Pi && ads_data -> Piz)
145       hypre_ParCSRMatrixDestroy(ads_data -> Piz);
146    if (ads_data -> A_Piz)
147       hypre_ParCSRMatrixDestroy(ads_data -> A_Piz);
148    if (ads_data -> B_Piz)
149       HYPRE_BoomerAMGDestroy(ads_data -> B_Piz);
150 
151    if (ads_data -> r0)
152       hypre_ParVectorDestroy(ads_data -> r0);
153    if (ads_data -> g0)
154       hypre_ParVectorDestroy(ads_data -> g0);
155    if (ads_data -> r1)
156       hypre_ParVectorDestroy(ads_data -> r1);
157    if (ads_data -> g1)
158       hypre_ParVectorDestroy(ads_data -> g1);
159    if (ads_data -> r2)
160       hypre_ParVectorDestroy(ads_data -> r2);
161    if (ads_data -> g2)
162       hypre_ParVectorDestroy(ads_data -> g2);
163    if (ads_data -> zz)
164       hypre_ParVectorDestroy(ads_data -> zz);
165 
166    hypre_SeqVectorDestroy(ads_data -> A_l1_norms);
167 
168    /* C, G, x, y and z are not destroyed */
169 
170    if (ads_data)
171       hypre_TFree(ads_data, HYPRE_MEMORY_HOST);
172 
173    return hypre_error_flag;
174 }
175 
176 /*--------------------------------------------------------------------------
177  * hypre_ADSSetDiscreteCurl
178  *
179  * Set the discrete curl matrix C.
180  * This function should be called before hypre_ADSSetup()!
181  *--------------------------------------------------------------------------*/
182 
hypre_ADSSetDiscreteCurl(void * solver,hypre_ParCSRMatrix * C)183 HYPRE_Int hypre_ADSSetDiscreteCurl(void *solver,
184                                    hypre_ParCSRMatrix *C)
185 {
186    hypre_ADSData *ads_data = (hypre_ADSData *) solver;
187    ads_data -> C = C;
188    return hypre_error_flag;
189 }
190 
191 /*--------------------------------------------------------------------------
192  * hypre_ADSSetDiscreteGradient
193  *
194  * Set the discrete gradient matrix G.
195  * This function should be called before hypre_ADSSetup()!
196  *--------------------------------------------------------------------------*/
197 
hypre_ADSSetDiscreteGradient(void * solver,hypre_ParCSRMatrix * G)198 HYPRE_Int hypre_ADSSetDiscreteGradient(void *solver,
199                                        hypre_ParCSRMatrix *G)
200 {
201    hypre_ADSData *ads_data = (hypre_ADSData *) solver;
202    ads_data -> G = G;
203    return hypre_error_flag;
204 }
205 
206 /*--------------------------------------------------------------------------
207  * hypre_ADSSetCoordinateVectors
208  *
209  * Set the x, y and z coordinates of the vertices in the mesh.
210  * This function should be called before hypre_ADSSetup()!
211  *--------------------------------------------------------------------------*/
212 
hypre_ADSSetCoordinateVectors(void * solver,hypre_ParVector * x,hypre_ParVector * y,hypre_ParVector * z)213 HYPRE_Int hypre_ADSSetCoordinateVectors(void *solver,
214                                         hypre_ParVector *x,
215                                         hypre_ParVector *y,
216                                         hypre_ParVector *z)
217 {
218    hypre_ADSData *ads_data = (hypre_ADSData *) solver;
219    ads_data -> x = x;
220    ads_data -> y = y;
221    ads_data -> z = z;
222    return hypre_error_flag;
223 }
224 
225 /*--------------------------------------------------------------------------
226  * hypre_ADSSetInterpolations
227  *
228  * Set the (components of) the Raviart-Thomas (RT_Pi) and the Nedelec (ND_Pi)
229  * interpolation matrices.
230  *
231  * This function is generally intended to be used only for high-order H(div)
232  * discretizations (in the lowest order case, these matrices are constructed
233  * internally in ADS from the discreet gradient and curl matrices and the
234  * coordinates of the vertices), though it can also be used in the lowest-order
235  * case or for other types of discretizations.
236  *
237  * By definition, RT_Pi and ND_Pi are the matrix representations of the linear
238  * operators that interpolate (high-order) vector nodal finite elements into the
239  * (high-order) Raviart-Thomas and Nedelec spaces. The component matrices are
240  * defined in both cases as Pix phi = Pi (phi,0,0) and similarly for Piy and
241  * Piz. Note that all these operators depend on the choice of the basis and
242  * degrees of freedom in the high-order spaces.
243  *
244  * The column numbering of RT_Pi and ND_Pi should be node-based, i.e. the x/y/z
245  * components of the first node (vertex or high-order dof) should be listed
246  * first, followed by the x/y/z components of the second node and so on (see the
247  * documentation of HYPRE_BoomerAMGSetDofFunc).
248  *
249  * If used, this function should be called before hypre_ADSSetup() and there is
250  * no need to provide the vertex coordinates. Furthermore, only one of the sets
251  * {RT_Pi} and {RT_Pix,RT_Piy,RT_Piz} needs to be specified (though it is OK to
252  * provide both).  If RT_Pix is NULL, then scalar Pi-based ADS cycles, i.e.
253  * those with cycle_type > 10, will be unavailable. Similarly, ADS cycles based
254  * on monolithic Pi (cycle_type < 10) require that RT_Pi is not NULL. The same
255  * restrictions hold for the sets {ND_Pi} and {ND_Pix,ND_Piy,ND_Piz} -- only one
256  * of them needs to be specified, and the availability of each enables different
257  * AMS cycle type options for the subspace solve.
258  *--------------------------------------------------------------------------*/
259 
hypre_ADSSetInterpolations(void * solver,hypre_ParCSRMatrix * RT_Pi,hypre_ParCSRMatrix * RT_Pix,hypre_ParCSRMatrix * RT_Piy,hypre_ParCSRMatrix * RT_Piz,hypre_ParCSRMatrix * ND_Pi,hypre_ParCSRMatrix * ND_Pix,hypre_ParCSRMatrix * ND_Piy,hypre_ParCSRMatrix * ND_Piz)260 HYPRE_Int hypre_ADSSetInterpolations(void *solver,
261                                      hypre_ParCSRMatrix *RT_Pi,
262                                      hypre_ParCSRMatrix *RT_Pix,
263                                      hypre_ParCSRMatrix *RT_Piy,
264                                      hypre_ParCSRMatrix *RT_Piz,
265                                      hypre_ParCSRMatrix *ND_Pi,
266                                      hypre_ParCSRMatrix *ND_Pix,
267                                      hypre_ParCSRMatrix *ND_Piy,
268                                      hypre_ParCSRMatrix *ND_Piz)
269 {
270    hypre_ADSData *ads_data = (hypre_ADSData *) solver;
271    ads_data -> Pi = RT_Pi;
272    ads_data -> Pix = RT_Pix;
273    ads_data -> Piy = RT_Piy;
274    ads_data -> Piz = RT_Piz;
275    ads_data -> ND_Pi = ND_Pi;
276    ads_data -> ND_Pix = ND_Pix;
277    ads_data -> ND_Piy = ND_Piy;
278    ads_data -> ND_Piz = ND_Piz;
279    ads_data -> owns_Pi = 0;
280    return hypre_error_flag;
281 }
282 
283 /*--------------------------------------------------------------------------
284  * hypre_ADSSetMaxIter
285  *
286  * Set the maximum number of iterations in the auxiliary-space method.
287  * The default value is 20. To use the ADS solver as a preconditioner,
288  * set maxit to 1, tol to 0.0 and print_level to 0.
289  *--------------------------------------------------------------------------*/
290 
hypre_ADSSetMaxIter(void * solver,HYPRE_Int maxit)291 HYPRE_Int hypre_ADSSetMaxIter(void *solver,
292                               HYPRE_Int maxit)
293 {
294    hypre_ADSData *ads_data = (hypre_ADSData *) solver;
295    ads_data -> maxit = maxit;
296    return hypre_error_flag;
297 }
298 
299 /*--------------------------------------------------------------------------
300  * hypre_ADSSetTol
301  *
302  * Set the convergence tolerance (if the method is used as a solver).
303  * The default value is 1e-6.
304  *--------------------------------------------------------------------------*/
305 
hypre_ADSSetTol(void * solver,HYPRE_Real tol)306 HYPRE_Int hypre_ADSSetTol(void *solver,
307                           HYPRE_Real tol)
308 {
309    hypre_ADSData *ads_data = (hypre_ADSData *) solver;
310    ads_data -> tol = tol;
311    return hypre_error_flag;
312 }
313 
314 /*--------------------------------------------------------------------------
315  * hypre_ADSSetCycleType
316  *
317  * Choose which three-level solver to use. Possible values are:
318  *
319  *   1 = 3-level multipl. solver (01210)      <-- small solution time
320  *   2 = 3-level additive solver (0+1+2)
321  *   3 = 3-level multipl. solver (02120)
322  *   4 = 3-level additive solver (010+2)
323  *   5 = 3-level multipl. solver (0102010)    <-- small solution time
324  *   6 = 3-level additive solver (1+020)
325  *   7 = 3-level multipl. solver (0201020)    <-- small number of iterations
326  *   8 = 3-level additive solver (0(1+2)0)    <-- small solution time
327  *   9 = 3-level multipl. solver (01210) with discrete divergence
328  *  11 = 5-level multipl. solver (013454310)  <-- small solution time, memory
329  *  12 = 5-level additive solver (0+1+3+4+5)
330  *  13 = 5-level multipl. solver (034515430)  <-- small solution time, memory
331  *  14 = 5-level additive solver (01(3+4+5)10)
332  *
333  * The default value is 1.
334  *--------------------------------------------------------------------------*/
335 
hypre_ADSSetCycleType(void * solver,HYPRE_Int cycle_type)336 HYPRE_Int hypre_ADSSetCycleType(void *solver,
337                                 HYPRE_Int cycle_type)
338 {
339    hypre_ADSData *ads_data = (hypre_ADSData *) solver;
340    ads_data -> cycle_type = cycle_type;
341    return hypre_error_flag;
342 }
343 
344 /*--------------------------------------------------------------------------
345  * hypre_ADSSetPrintLevel
346  *
347  * Control how much information is printed during the solution iterations.
348  * The defaut values is 1 (print residual norm at each step).
349  *--------------------------------------------------------------------------*/
350 
hypre_ADSSetPrintLevel(void * solver,HYPRE_Int print_level)351 HYPRE_Int hypre_ADSSetPrintLevel(void *solver,
352                                  HYPRE_Int print_level)
353 {
354    hypre_ADSData *ads_data = (hypre_ADSData *) solver;
355    ads_data -> print_level = print_level;
356    return hypre_error_flag;
357 }
358 
359 /*--------------------------------------------------------------------------
360  * hypre_ADSSetSmoothingOptions
361  *
362  * Set relaxation parameters for A. Default values: 2, 1, 1.0, 1.0.
363  *--------------------------------------------------------------------------*/
364 
hypre_ADSSetSmoothingOptions(void * solver,HYPRE_Int A_relax_type,HYPRE_Int A_relax_times,HYPRE_Real A_relax_weight,HYPRE_Real A_omega)365 HYPRE_Int hypre_ADSSetSmoothingOptions(void *solver,
366                                        HYPRE_Int A_relax_type,
367                                        HYPRE_Int A_relax_times,
368                                        HYPRE_Real A_relax_weight,
369                                        HYPRE_Real A_omega)
370 {
371    hypre_ADSData *ads_data = (hypre_ADSData *) solver;
372    ads_data -> A_relax_type = A_relax_type;
373    ads_data -> A_relax_times = A_relax_times;
374    ads_data -> A_relax_weight = A_relax_weight;
375    ads_data -> A_omega = A_omega;
376    return hypre_error_flag;
377 }
378 
379 /*--------------------------------------------------------------------------
380  * hypre_ADSSetChebySmoothingOptions
381  *
382  * Set parameters for Chebyshev relaxation. Default values: 2, 0.3.
383  *--------------------------------------------------------------------------*/
384 
hypre_ADSSetChebySmoothingOptions(void * solver,HYPRE_Int A_cheby_order,HYPRE_Int A_cheby_fraction)385 HYPRE_Int hypre_ADSSetChebySmoothingOptions(void *solver,
386                                             HYPRE_Int A_cheby_order,
387                                             HYPRE_Int A_cheby_fraction)
388 {
389    hypre_ADSData *ads_data = (hypre_ADSData *) solver;
390    ads_data -> A_cheby_order =  A_cheby_order;
391    ads_data -> A_cheby_fraction =  A_cheby_fraction;
392 
393    return hypre_error_flag;
394 }
395 
396 /*--------------------------------------------------------------------------
397  * hypre_ADSSetAMSOptions
398  *
399  * Set AMS parameters for B_C. Default values: 11, 10, 1, 3, 0.25, 0, 0.
400  *
401  * Note that B_C_cycle_type should be greater than 10, unless the high-order
402  * interface of hypre_ADSSetInterpolations is being used!
403  *--------------------------------------------------------------------------*/
404 
hypre_ADSSetAMSOptions(void * solver,HYPRE_Int B_C_cycle_type,HYPRE_Int B_C_coarsen_type,HYPRE_Int B_C_agg_levels,HYPRE_Int B_C_relax_type,HYPRE_Real B_C_theta,HYPRE_Int B_C_interp_type,HYPRE_Int B_C_Pmax)405 HYPRE_Int hypre_ADSSetAMSOptions(void *solver,
406                                  HYPRE_Int B_C_cycle_type,
407                                  HYPRE_Int B_C_coarsen_type,
408                                  HYPRE_Int B_C_agg_levels,
409                                  HYPRE_Int B_C_relax_type,
410                                  HYPRE_Real B_C_theta,
411                                  HYPRE_Int B_C_interp_type,
412                                  HYPRE_Int B_C_Pmax)
413 {
414    hypre_ADSData *ads_data = (hypre_ADSData *) solver;
415    ads_data -> B_C_cycle_type = B_C_cycle_type;
416    ads_data -> B_C_coarsen_type = B_C_coarsen_type;
417    ads_data -> B_C_agg_levels = B_C_agg_levels;
418    ads_data -> B_C_relax_type = B_C_relax_type;
419    ads_data -> B_C_theta = B_C_theta;
420    ads_data -> B_C_interp_type = B_C_interp_type;
421    ads_data -> B_C_Pmax = B_C_Pmax;
422 
423    return hypre_error_flag;
424 }
425 
426 /*--------------------------------------------------------------------------
427  * hypre_ADSSetAMGOptions
428  *
429  * Set AMG parameters for B_Pi. Default values: 10, 1, 3, 0.25, 0, 0.
430  *--------------------------------------------------------------------------*/
431 
hypre_ADSSetAMGOptions(void * solver,HYPRE_Int B_Pi_coarsen_type,HYPRE_Int B_Pi_agg_levels,HYPRE_Int B_Pi_relax_type,HYPRE_Real B_Pi_theta,HYPRE_Int B_Pi_interp_type,HYPRE_Int B_Pi_Pmax)432 HYPRE_Int hypre_ADSSetAMGOptions(void *solver,
433                                  HYPRE_Int B_Pi_coarsen_type,
434                                  HYPRE_Int B_Pi_agg_levels,
435                                  HYPRE_Int B_Pi_relax_type,
436                                  HYPRE_Real B_Pi_theta,
437                                  HYPRE_Int B_Pi_interp_type,
438                                  HYPRE_Int B_Pi_Pmax)
439 {
440    hypre_ADSData *ads_data = (hypre_ADSData *) solver;
441    ads_data -> B_Pi_coarsen_type = B_Pi_coarsen_type;
442    ads_data -> B_Pi_agg_levels = B_Pi_agg_levels;
443    ads_data -> B_Pi_relax_type = B_Pi_relax_type;
444    ads_data -> B_Pi_theta = B_Pi_theta;
445    ads_data -> B_Pi_interp_type = B_Pi_interp_type;
446    ads_data -> B_Pi_Pmax = B_Pi_Pmax;
447    return hypre_error_flag;
448 }
449 
450 /*--------------------------------------------------------------------------
451  * hypre_ADSComputePi
452  *
453  * Construct the Pi interpolation matrix, which maps the space of vector
454  * linear finite elements to the space of face finite elements.
455  *
456  * The construction is based on the fact that Pi = [Pi_x, Pi_y, Pi_z], where
457  * each block has the same sparsity structure as C*G, with entries that can be
458  * computed from the vectors RT100, RT010 and RT001.
459  *
460  * We assume a constant number of vertices per face (no prisms or pyramids).
461  *--------------------------------------------------------------------------*/
462 
hypre_ADSComputePi(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * C,hypre_ParCSRMatrix * G,hypre_ParVector * x,hypre_ParVector * y,hypre_ParVector * z,hypre_ParCSRMatrix * PiNDx,hypre_ParCSRMatrix * PiNDy,hypre_ParCSRMatrix * PiNDz,hypre_ParCSRMatrix ** Pi_ptr)463 HYPRE_Int hypre_ADSComputePi(hypre_ParCSRMatrix *A,
464                              hypre_ParCSRMatrix *C,
465                              hypre_ParCSRMatrix *G,
466                              hypre_ParVector *x,
467                              hypre_ParVector *y,
468                              hypre_ParVector *z,
469                              hypre_ParCSRMatrix *PiNDx,
470                              hypre_ParCSRMatrix *PiNDy,
471                              hypre_ParCSRMatrix *PiNDz,
472                              hypre_ParCSRMatrix **Pi_ptr)
473 {
474 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
475    HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
476 #endif
477 
478    hypre_ParCSRMatrix *Pi;
479 
480    /* Compute the representations of the coordinate vectors, RT100, RT010 and
481       RT001, in the Raviart-Thomas space, by observing that the RT coordinates
482       of (1,0,0) = -curl (0,z,0) are given by C*PiNDy*z, etc. (We ignore the
483       minus sign since it is irrelevant for the coarse-grid correction.) */
484    hypre_ParVector *RT100, *RT010, *RT001;
485    {
486       hypre_ParVector *PiNDlin = hypre_ParVectorInRangeOf(PiNDx);
487 
488       RT100 = hypre_ParVectorInRangeOf(C);
489       hypre_ParCSRMatrixMatvec(1.0, PiNDy, z, 0.0, PiNDlin);
490       hypre_ParCSRMatrixMatvec(1.0, C, PiNDlin, 0.0, RT100);
491       RT010 = hypre_ParVectorInRangeOf(C);
492       hypre_ParCSRMatrixMatvec(1.0, PiNDz, x, 0.0, PiNDlin);
493       hypre_ParCSRMatrixMatvec(1.0, C, PiNDlin, 0.0, RT010);
494       RT001 = hypre_ParVectorInRangeOf(C);
495       hypre_ParCSRMatrixMatvec(1.0, PiNDx, y, 0.0, PiNDlin);
496       hypre_ParCSRMatrixMatvec(1.0, C, PiNDlin, 0.0, RT001);
497 
498       hypre_ParVectorDestroy(PiNDlin);
499    }
500 
501    /* Compute Pi = [Pi_x, Pi_y, Pi_z] */
502    {
503       HYPRE_Int i, j, d;
504 
505       HYPRE_Real *RT100_data = hypre_VectorData(hypre_ParVectorLocalVector(RT100));
506       HYPRE_Real *RT010_data = hypre_VectorData(hypre_ParVectorLocalVector(RT010));
507       HYPRE_Real *RT001_data = hypre_VectorData(hypre_ParVectorLocalVector(RT001));
508 
509       /* Each component of Pi has the sparsity pattern of the topological
510          face-to-vertex matrix. */
511       hypre_ParCSRMatrix *F2V;
512 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
513       if (exec == HYPRE_EXEC_DEVICE)
514       {
515          F2V = hypre_ParCSRMatMat(C, G);
516       }
517       else
518 #endif
519       {
520          F2V = hypre_ParMatmul(C, G);
521       }
522 
523       /* Create the parallel interpolation matrix */
524       {
525          MPI_Comm comm = hypre_ParCSRMatrixComm(F2V);
526          HYPRE_BigInt global_num_rows = hypre_ParCSRMatrixGlobalNumRows(F2V);
527          HYPRE_BigInt global_num_cols = 3*hypre_ParCSRMatrixGlobalNumCols(F2V);
528          HYPRE_BigInt *row_starts = hypre_ParCSRMatrixRowStarts(F2V);
529          HYPRE_BigInt *col_starts;
530          HYPRE_Int col_starts_size;
531          HYPRE_Int num_cols_offd = 3*hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(F2V));
532          HYPRE_Int num_nonzeros_diag = 3*hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(F2V));
533          HYPRE_Int num_nonzeros_offd = 3*hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(F2V));
534          HYPRE_BigInt *col_starts_F2V = hypre_ParCSRMatrixColStarts(F2V);
535          col_starts_size = 2;
536          col_starts = hypre_TAlloc(HYPRE_BigInt, col_starts_size, HYPRE_MEMORY_HOST);
537          for (i = 0; i < col_starts_size; i++)
538             col_starts[i] = 3 * col_starts_F2V[i];
539 
540          Pi = hypre_ParCSRMatrixCreate(comm,
541                                        global_num_rows,
542                                        global_num_cols,
543                                        row_starts,
544                                        col_starts,
545                                        num_cols_offd,
546                                        num_nonzeros_diag,
547                                        num_nonzeros_offd);
548 
549          hypre_ParCSRMatrixOwnsData(Pi) = 1;
550          hypre_ParCSRMatrixInitialize(Pi);
551       }
552 
553       /* Fill-in the diagonal part */
554       {
555          hypre_CSRMatrix *F2V_diag = hypre_ParCSRMatrixDiag(F2V);
556          HYPRE_Int *F2V_diag_I = hypre_CSRMatrixI(F2V_diag);
557          HYPRE_Int *F2V_diag_J = hypre_CSRMatrixJ(F2V_diag);
558 
559          HYPRE_Int F2V_diag_nrows = hypre_CSRMatrixNumRows(F2V_diag);
560          HYPRE_Int F2V_diag_nnz = hypre_CSRMatrixNumNonzeros(F2V_diag);
561 
562          hypre_CSRMatrix *Pi_diag = hypre_ParCSRMatrixDiag(Pi);
563          HYPRE_Int *Pi_diag_I = hypre_CSRMatrixI(Pi_diag);
564          HYPRE_Int *Pi_diag_J = hypre_CSRMatrixJ(Pi_diag);
565          HYPRE_Real *Pi_diag_data = hypre_CSRMatrixData(Pi_diag);
566 
567 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
568          if (exec == HYPRE_EXEC_DEVICE)
569          {
570             HYPRE_THRUST_CALL( transform,
571                                F2V_diag_I,
572                                F2V_diag_I + F2V_diag_nrows + 1,
573                                Pi_diag_I,
574                                3 * _1 );
575 
576             dim3 bDim = hypre_GetDefaultCUDABlockDimension();
577             dim3 gDim = hypre_GetDefaultCUDAGridDimension(F2V_diag_nnz, "thread", bDim);
578 
579             HYPRE_CUDA_LAUNCH( hypreCUDAKernel_AMSComputePi_copy1, gDim, bDim,
580                                F2V_diag_nnz, 3, F2V_diag_J, Pi_diag_J );
581 
582             gDim = hypre_GetDefaultCUDAGridDimension(F2V_diag_nrows, "warp", bDim);
583 
584             HYPRE_CUDA_LAUNCH( hypreCUDAKernel_AMSComputePi_copy2, gDim, bDim,
585                                F2V_diag_nrows, 3, F2V_diag_I, NULL, RT100_data, RT010_data, RT001_data,
586                                Pi_diag_data );
587          }
588          else
589 #endif
590          {
591             for (i = 0; i < F2V_diag_nrows+1; i++)
592                Pi_diag_I[i] = 3 * F2V_diag_I[i];
593 
594             for (i = 0; i < F2V_diag_nnz; i++)
595                for (d = 0; d < 3; d++)
596                   Pi_diag_J[3*i+d] = 3*F2V_diag_J[i]+d;
597 
598             for (i = 0; i < F2V_diag_nrows; i++)
599                for (j = F2V_diag_I[i]; j < F2V_diag_I[i+1]; j++)
600                {
601                   *Pi_diag_data++ = RT100_data[i];
602                   *Pi_diag_data++ = RT010_data[i];
603                   *Pi_diag_data++ = RT001_data[i];
604                }
605          }
606       }
607 
608       /* Fill-in the off-diagonal part */
609       {
610          hypre_CSRMatrix *F2V_offd = hypre_ParCSRMatrixOffd(F2V);
611          HYPRE_Int *F2V_offd_I = hypre_CSRMatrixI(F2V_offd);
612          HYPRE_Int *F2V_offd_J = hypre_CSRMatrixJ(F2V_offd);
613 
614          HYPRE_Int F2V_offd_nrows = hypre_CSRMatrixNumRows(F2V_offd);
615          HYPRE_Int F2V_offd_ncols = hypre_CSRMatrixNumCols(F2V_offd);
616          HYPRE_Int F2V_offd_nnz = hypre_CSRMatrixNumNonzeros(F2V_offd);
617 
618          hypre_CSRMatrix *Pi_offd = hypre_ParCSRMatrixOffd(Pi);
619          HYPRE_Int *Pi_offd_I = hypre_CSRMatrixI(Pi_offd);
620          HYPRE_Int *Pi_offd_J = hypre_CSRMatrixJ(Pi_offd);
621          HYPRE_Real *Pi_offd_data = hypre_CSRMatrixData(Pi_offd);
622 
623          HYPRE_BigInt *F2V_cmap = hypre_ParCSRMatrixColMapOffd(F2V);
624          HYPRE_BigInt *Pi_cmap = hypre_ParCSRMatrixColMapOffd(Pi);
625 
626 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
627          if (exec == HYPRE_EXEC_DEVICE)
628          {
629             if (F2V_offd_ncols)
630             {
631                HYPRE_THRUST_CALL( transform,
632                                   F2V_offd_I,
633                                   F2V_offd_I + F2V_offd_nrows + 1,
634                                   Pi_offd_I,
635                                   3 * _1 );
636             }
637 
638             dim3 bDim = hypre_GetDefaultCUDABlockDimension();
639             dim3 gDim = hypre_GetDefaultCUDAGridDimension(F2V_offd_nnz, "thread", bDim);
640 
641             HYPRE_CUDA_LAUNCH( hypreCUDAKernel_AMSComputePi_copy1, gDim, bDim,
642                                F2V_offd_nnz, 3, F2V_offd_J, Pi_offd_J );
643 
644             gDim = hypre_GetDefaultCUDAGridDimension(F2V_offd_nrows, "warp", bDim);
645 
646             HYPRE_CUDA_LAUNCH( hypreCUDAKernel_AMSComputePi_copy2, gDim, bDim,
647                                F2V_offd_nrows, 3, F2V_offd_I, NULL, RT100_data, RT010_data, RT001_data,
648                                Pi_offd_data );
649          }
650          else
651 #endif
652          {
653             if (F2V_offd_ncols)
654                for (i = 0; i < F2V_offd_nrows+1; i++)
655                   Pi_offd_I[i] = 3 * F2V_offd_I[i];
656 
657             for (i = 0; i < F2V_offd_nnz; i++)
658                for (d = 0; d < 3; d++)
659                   Pi_offd_J[3*i+d] = 3*F2V_offd_J[i]+d;
660 
661             for (i = 0; i < F2V_offd_nrows; i++)
662                for (j = F2V_offd_I[i]; j < F2V_offd_I[i+1]; j++)
663                {
664                   *Pi_offd_data++ = RT100_data[i];
665                   *Pi_offd_data++ = RT010_data[i];
666                   *Pi_offd_data++ = RT001_data[i];
667                }
668          }
669 
670          for (i = 0; i < F2V_offd_ncols; i++)
671             for (d = 0; d < 3; d++)
672                Pi_cmap[3*i+d] = 3*F2V_cmap[i]+(HYPRE_BigInt)d;
673       }
674 
675       hypre_ParCSRMatrixDestroy(F2V);
676    }
677 
678    hypre_ParVectorDestroy(RT100);
679    hypre_ParVectorDestroy(RT010);
680    hypre_ParVectorDestroy(RT001);
681 
682    *Pi_ptr = Pi;
683 
684    return hypre_error_flag;
685 }
686 
687 /*--------------------------------------------------------------------------
688  * hypre_ADSComputePixyz
689  *
690  * Construct the components Pix, Piy, Piz of the interpolation matrix Pi, which
691  * maps the space of vector linear finite elements to the space of face finite
692  * elements.
693  *
694  * The construction is based on the fact that each component has the same
695  * sparsity structure as the matrix C*G, with entries that can be computed from
696  * the vectors RT100, RT010 and RT001.
697  *
698  * We assume a constant number of vertices per face (no prisms or pyramids).
699  *--------------------------------------------------------------------------*/
700 
hypre_ADSComputePixyz(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * C,hypre_ParCSRMatrix * G,hypre_ParVector * x,hypre_ParVector * y,hypre_ParVector * z,hypre_ParCSRMatrix * PiNDx,hypre_ParCSRMatrix * PiNDy,hypre_ParCSRMatrix * PiNDz,hypre_ParCSRMatrix ** Pix_ptr,hypre_ParCSRMatrix ** Piy_ptr,hypre_ParCSRMatrix ** Piz_ptr)701 HYPRE_Int hypre_ADSComputePixyz(hypre_ParCSRMatrix *A,
702                                 hypre_ParCSRMatrix *C,
703                                 hypre_ParCSRMatrix *G,
704                                 hypre_ParVector *x,
705                                 hypre_ParVector *y,
706                                 hypre_ParVector *z,
707                                 hypre_ParCSRMatrix *PiNDx,
708                                 hypre_ParCSRMatrix *PiNDy,
709                                 hypre_ParCSRMatrix *PiNDz,
710                                 hypre_ParCSRMatrix **Pix_ptr,
711                                 hypre_ParCSRMatrix **Piy_ptr,
712                                 hypre_ParCSRMatrix **Piz_ptr)
713 {
714    hypre_ParCSRMatrix *Pix, *Piy, *Piz;
715 
716 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
717    HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
718 #endif
719 
720    /* Compute the representations of the coordinate vectors, RT100, RT010 and
721       RT001, in the Raviart-Thomas space, by observing that the RT coordinates
722       of (1,0,0) = -curl (0,z,0) are given by C*PiNDy*z, etc. (We ignore the
723       minus sign since it is irrelevant for the coarse-grid correction.) */
724    hypre_ParVector *RT100, *RT010, *RT001;
725    {
726       hypre_ParVector *PiNDlin = hypre_ParVectorInRangeOf(PiNDx);
727 
728       RT100 = hypre_ParVectorInRangeOf(C);
729       hypre_ParCSRMatrixMatvec(1.0, PiNDy, z, 0.0, PiNDlin);
730       hypre_ParCSRMatrixMatvec(1.0, C, PiNDlin, 0.0, RT100);
731       RT010 = hypre_ParVectorInRangeOf(C);
732       hypre_ParCSRMatrixMatvec(1.0, PiNDz, x, 0.0, PiNDlin);
733       hypre_ParCSRMatrixMatvec(1.0, C, PiNDlin, 0.0, RT010);
734       RT001 = hypre_ParVectorInRangeOf(C);
735       hypre_ParCSRMatrixMatvec(1.0, PiNDx, y, 0.0, PiNDlin);
736       hypre_ParCSRMatrixMatvec(1.0, C, PiNDlin, 0.0, RT001);
737 
738       hypre_ParVectorDestroy(PiNDlin);
739    }
740 
741    /* Compute Pix, Piy, Piz */
742    {
743       HYPRE_Int i, j;
744 
745       HYPRE_Real *RT100_data = hypre_VectorData(hypre_ParVectorLocalVector(RT100));
746       HYPRE_Real *RT010_data = hypre_VectorData(hypre_ParVectorLocalVector(RT010));
747       HYPRE_Real *RT001_data = hypre_VectorData(hypre_ParVectorLocalVector(RT001));
748 
749       /* Each component of Pi has the sparsity pattern of the topological
750          face-to-vertex matrix. */
751       hypre_ParCSRMatrix *F2V;
752 
753 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
754       if (exec == HYPRE_EXEC_DEVICE)
755       {
756          F2V = hypre_ParCSRMatMat(C, G);
757       }
758       else
759 #endif
760       {
761          F2V = hypre_ParMatmul(C, G);
762       }
763 
764       /* Create the components of the parallel interpolation matrix */
765       {
766          MPI_Comm comm = hypre_ParCSRMatrixComm(F2V);
767          HYPRE_BigInt global_num_rows = hypre_ParCSRMatrixGlobalNumRows(F2V);
768          HYPRE_BigInt global_num_cols = hypre_ParCSRMatrixGlobalNumCols(F2V);
769          HYPRE_BigInt *row_starts = hypre_ParCSRMatrixRowStarts(F2V);
770          HYPRE_BigInt *col_starts = hypre_ParCSRMatrixColStarts(F2V);
771          HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(F2V));
772          HYPRE_Int num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(F2V));
773          HYPRE_Int num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(F2V));
774 
775          Pix = hypre_ParCSRMatrixCreate(comm,
776                                         global_num_rows,
777                                         global_num_cols,
778                                         row_starts,
779                                         col_starts,
780                                         num_cols_offd,
781                                         num_nonzeros_diag,
782                                         num_nonzeros_offd);
783          hypre_ParCSRMatrixOwnsData(Pix) = 1;
784          hypre_ParCSRMatrixInitialize(Pix);
785 
786          Piy = hypre_ParCSRMatrixCreate(comm,
787                                         global_num_rows,
788                                         global_num_cols,
789                                         row_starts,
790                                         col_starts,
791                                         num_cols_offd,
792                                         num_nonzeros_diag,
793                                         num_nonzeros_offd);
794          hypre_ParCSRMatrixOwnsData(Piy) = 1;
795          hypre_ParCSRMatrixInitialize(Piy);
796 
797          Piz = hypre_ParCSRMatrixCreate(comm,
798                                         global_num_rows,
799                                         global_num_cols,
800                                         row_starts,
801                                         col_starts,
802                                         num_cols_offd,
803                                         num_nonzeros_diag,
804                                         num_nonzeros_offd);
805          hypre_ParCSRMatrixOwnsData(Piz) = 1;
806          hypre_ParCSRMatrixInitialize(Piz);
807       }
808 
809       /* Fill-in the diagonal part */
810       {
811          hypre_CSRMatrix *F2V_diag = hypre_ParCSRMatrixDiag(F2V);
812          HYPRE_Int *F2V_diag_I = hypre_CSRMatrixI(F2V_diag);
813          HYPRE_Int *F2V_diag_J = hypre_CSRMatrixJ(F2V_diag);
814 
815          HYPRE_Int F2V_diag_nrows = hypre_CSRMatrixNumRows(F2V_diag);
816          HYPRE_Int F2V_diag_nnz = hypre_CSRMatrixNumNonzeros(F2V_diag);
817 
818          hypre_CSRMatrix *Pix_diag = hypre_ParCSRMatrixDiag(Pix);
819          HYPRE_Int *Pix_diag_I = hypre_CSRMatrixI(Pix_diag);
820          HYPRE_Int *Pix_diag_J = hypre_CSRMatrixJ(Pix_diag);
821          HYPRE_Real *Pix_diag_data = hypre_CSRMatrixData(Pix_diag);
822 
823          hypre_CSRMatrix *Piy_diag = hypre_ParCSRMatrixDiag(Piy);
824          HYPRE_Int *Piy_diag_I = hypre_CSRMatrixI(Piy_diag);
825          HYPRE_Int *Piy_diag_J = hypre_CSRMatrixJ(Piy_diag);
826          HYPRE_Real *Piy_diag_data = hypre_CSRMatrixData(Piy_diag);
827 
828          hypre_CSRMatrix *Piz_diag = hypre_ParCSRMatrixDiag(Piz);
829          HYPRE_Int *Piz_diag_I = hypre_CSRMatrixI(Piz_diag);
830          HYPRE_Int *Piz_diag_J = hypre_CSRMatrixJ(Piz_diag);
831          HYPRE_Real *Piz_diag_data = hypre_CSRMatrixData(Piz_diag);
832 
833 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
834          if (exec == HYPRE_EXEC_DEVICE)
835          {
836             HYPRE_THRUST_CALL( copy_n,
837                                thrust::make_zip_iterator(thrust::make_tuple(F2V_diag_I, F2V_diag_I, F2V_diag_I)),
838                                F2V_diag_nrows + 1,
839                                thrust::make_zip_iterator(thrust::make_tuple(Pix_diag_I, Piy_diag_I, Piz_diag_I)) );
840 
841             HYPRE_THRUST_CALL( copy_n,
842                                thrust::make_zip_iterator(thrust::make_tuple(F2V_diag_J, F2V_diag_J, F2V_diag_J)),
843                                F2V_diag_nnz,
844                                thrust::make_zip_iterator(thrust::make_tuple(Pix_diag_J, Piy_diag_J, Piz_diag_J)) );
845 
846             dim3 bDim = hypre_GetDefaultCUDABlockDimension();
847             dim3 gDim = hypre_GetDefaultCUDAGridDimension(F2V_diag_nrows, "warp", bDim);
848 
849             HYPRE_CUDA_LAUNCH( hypreCUDAKernel_AMSComputePixyz_copy, gDim, bDim,
850                                F2V_diag_nrows, 3, F2V_diag_I, NULL, RT100_data, RT010_data, RT001_data,
851                                Pix_diag_data, Piy_diag_data, Piz_diag_data );
852          }
853          else
854 #endif
855          {
856             for (i = 0; i < F2V_diag_nrows+1; i++)
857             {
858                Pix_diag_I[i] = F2V_diag_I[i];
859                Piy_diag_I[i] = F2V_diag_I[i];
860                Piz_diag_I[i] = F2V_diag_I[i];
861             }
862 
863             for (i = 0; i < F2V_diag_nnz; i++)
864             {
865                Pix_diag_J[i] = F2V_diag_J[i];
866                Piy_diag_J[i] = F2V_diag_J[i];
867                Piz_diag_J[i] = F2V_diag_J[i];
868             }
869 
870             for (i = 0; i < F2V_diag_nrows; i++)
871                for (j = F2V_diag_I[i]; j < F2V_diag_I[i+1]; j++)
872                {
873                   *Pix_diag_data++ = RT100_data[i];
874                   *Piy_diag_data++ = RT010_data[i];
875                   *Piz_diag_data++ = RT001_data[i];
876                }
877          }
878       }
879 
880       /* Fill-in the off-diagonal part */
881       {
882          hypre_CSRMatrix *F2V_offd = hypre_ParCSRMatrixOffd(F2V);
883          HYPRE_Int *F2V_offd_I = hypre_CSRMatrixI(F2V_offd);
884          HYPRE_Int *F2V_offd_J = hypre_CSRMatrixJ(F2V_offd);
885 
886          HYPRE_Int F2V_offd_nrows = hypre_CSRMatrixNumRows(F2V_offd);
887          HYPRE_Int F2V_offd_ncols = hypre_CSRMatrixNumCols(F2V_offd);
888          HYPRE_Int F2V_offd_nnz = hypre_CSRMatrixNumNonzeros(F2V_offd);
889 
890          hypre_CSRMatrix *Pix_offd = hypre_ParCSRMatrixOffd(Pix);
891          HYPRE_Int *Pix_offd_I = hypre_CSRMatrixI(Pix_offd);
892          HYPRE_Int *Pix_offd_J = hypre_CSRMatrixJ(Pix_offd);
893          HYPRE_Real *Pix_offd_data = hypre_CSRMatrixData(Pix_offd);
894 
895          hypre_CSRMatrix *Piy_offd = hypre_ParCSRMatrixOffd(Piy);
896          HYPRE_Int *Piy_offd_I = hypre_CSRMatrixI(Piy_offd);
897          HYPRE_Int *Piy_offd_J = hypre_CSRMatrixJ(Piy_offd);
898          HYPRE_Real *Piy_offd_data = hypre_CSRMatrixData(Piy_offd);
899 
900          hypre_CSRMatrix *Piz_offd = hypre_ParCSRMatrixOffd(Piz);
901          HYPRE_Int *Piz_offd_I = hypre_CSRMatrixI(Piz_offd);
902          HYPRE_Int *Piz_offd_J = hypre_CSRMatrixJ(Piz_offd);
903          HYPRE_Real *Piz_offd_data = hypre_CSRMatrixData(Piz_offd);
904 
905          HYPRE_BigInt *F2V_cmap = hypre_ParCSRMatrixColMapOffd(F2V);
906          HYPRE_BigInt *Pix_cmap = hypre_ParCSRMatrixColMapOffd(Pix);
907          HYPRE_BigInt *Piy_cmap = hypre_ParCSRMatrixColMapOffd(Piy);
908          HYPRE_BigInt *Piz_cmap = hypre_ParCSRMatrixColMapOffd(Piz);
909 
910 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
911          if (exec == HYPRE_EXEC_DEVICE)
912          {
913             if (F2V_offd_ncols)
914             {
915                HYPRE_THRUST_CALL( copy_n,
916                                   thrust::make_zip_iterator(thrust::make_tuple(F2V_offd_I, F2V_offd_I, F2V_offd_I)),
917                                   F2V_offd_nrows + 1,
918                                   thrust::make_zip_iterator(thrust::make_tuple(Pix_offd_I, Piy_offd_I, Piz_offd_I)) );
919             }
920 
921             HYPRE_THRUST_CALL( copy_n,
922                                thrust::make_zip_iterator(thrust::make_tuple(F2V_offd_J, F2V_offd_J, F2V_offd_J)),
923                                F2V_offd_nnz,
924                                thrust::make_zip_iterator(thrust::make_tuple(Pix_offd_J, Piy_offd_J, Piz_offd_J)) );
925 
926             dim3 bDim = hypre_GetDefaultCUDABlockDimension();
927             dim3 gDim = hypre_GetDefaultCUDAGridDimension(F2V_offd_nrows, "warp", bDim);
928 
929             HYPRE_CUDA_LAUNCH( hypreCUDAKernel_AMSComputePixyz_copy, gDim, bDim,
930                                F2V_offd_nrows, 3, F2V_offd_I, NULL, RT100_data, RT010_data, RT001_data,
931                                Pix_offd_data, Piy_offd_data, Piz_offd_data );
932          }
933          else
934 #endif
935          {
936             if (F2V_offd_ncols)
937                for (i = 0; i < F2V_offd_nrows+1; i++)
938                {
939                   Pix_offd_I[i] = F2V_offd_I[i];
940                   Piy_offd_I[i] = F2V_offd_I[i];
941                   Piz_offd_I[i] = F2V_offd_I[i];
942                }
943 
944             for (i = 0; i < F2V_offd_nnz; i++)
945             {
946                Pix_offd_J[i] = F2V_offd_J[i];
947                Piy_offd_J[i] = F2V_offd_J[i];
948                Piz_offd_J[i] = F2V_offd_J[i];
949             }
950 
951             for (i = 0; i < F2V_offd_nrows; i++)
952                for (j = F2V_offd_I[i]; j < F2V_offd_I[i+1]; j++)
953                {
954                   *Pix_offd_data++ = RT100_data[i];
955                   *Piy_offd_data++ = RT010_data[i];
956                   *Piz_offd_data++ = RT001_data[i];
957                }
958          }
959 
960          for (i = 0; i < F2V_offd_ncols; i++)
961          {
962             Pix_cmap[i] = F2V_cmap[i];
963             Piy_cmap[i] = F2V_cmap[i];
964             Piz_cmap[i] = F2V_cmap[i];
965          }
966       }
967 
968       if (HYPRE_AssumedPartitionCheck())
969          hypre_ParCSRMatrixDestroy(F2V);
970       else
971          hypre_ParCSRBooleanMatrixDestroy((hypre_ParCSRBooleanMatrix*)F2V);
972    }
973 
974    hypre_ParVectorDestroy(RT100);
975    hypre_ParVectorDestroy(RT010);
976    hypre_ParVectorDestroy(RT001);
977 
978    *Pix_ptr = Pix;
979    *Piy_ptr = Piy;
980    *Piz_ptr = Piz;
981 
982    return hypre_error_flag;
983 }
984 
985 /*--------------------------------------------------------------------------
986  * hypre_ADSSetup
987  *
988  * Construct the ADS solver components.
989  *
990  * The following functions need to be called before hypre_ADSSetup():
991  * - hypre_ADSSetDiscreteCurl()
992  * - hypre_ADSSetDiscreteGradient()
993  * - hypre_ADSSetCoordinateVectors()
994  *--------------------------------------------------------------------------*/
995 
hypre_ADSSetup(void * solver,hypre_ParCSRMatrix * A,hypre_ParVector * b,hypre_ParVector * x)996 HYPRE_Int hypre_ADSSetup(void *solver,
997                          hypre_ParCSRMatrix *A,
998                          hypre_ParVector *b,
999                          hypre_ParVector *x)
1000 {
1001 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1002    HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
1003 #endif
1004 
1005    hypre_ADSData *ads_data = (hypre_ADSData *) solver;
1006    hypre_AMSData *ams_data;
1007 
1008    ads_data -> A = A;
1009 
1010    /* Make sure that the first entry in each row is the diagonal one. */
1011    /* hypre_CSRMatrixReorder(hypre_ParCSRMatrixDiag(ads_data -> A)); */
1012 
1013    /* Compute the l1 norm of the rows of A */
1014    if (ads_data -> A_relax_type >= 1 && ads_data -> A_relax_type <= 4)
1015    {
1016       HYPRE_Real *l1_norm_data = NULL;
1017 
1018       hypre_ParCSRComputeL1Norms(ads_data -> A, ads_data -> A_relax_type, NULL, &l1_norm_data);
1019 
1020       ads_data -> A_l1_norms = hypre_SeqVectorCreate(hypre_ParCSRMatrixNumRows(ads_data -> A));
1021       hypre_VectorData(ads_data -> A_l1_norms) = l1_norm_data;
1022       hypre_SeqVectorInitialize_v2(ads_data -> A_l1_norms, hypre_ParCSRMatrixMemoryLocation(ads_data -> A));
1023    }
1024 
1025    /* Chebyshev? */
1026    if (ads_data -> A_relax_type == 16)
1027    {
1028       hypre_ParCSRMaxEigEstimateCG(ads_data->A, 1, 10,
1029                                    &ads_data->A_max_eig_est,
1030                                    &ads_data->A_min_eig_est);
1031    }
1032 
1033    /* Create the AMS solver on the range of C^T */
1034    {
1035       HYPRE_AMSCreate(&ads_data -> B_C);
1036       HYPRE_AMSSetDimension(ads_data -> B_C, 3);
1037 
1038       /* B_C is a preconditioner */
1039       HYPRE_AMSSetMaxIter(ads_data -> B_C, 1);
1040       HYPRE_AMSSetTol(ads_data -> B_C, 0.0);
1041       HYPRE_AMSSetPrintLevel(ads_data -> B_C, 0);
1042 
1043       HYPRE_AMSSetCycleType(ads_data -> B_C, ads_data -> B_C_cycle_type);
1044       HYPRE_AMSSetDiscreteGradient(ads_data -> B_C,
1045                                    (HYPRE_ParCSRMatrix) ads_data -> G);
1046 
1047       if (ads_data -> ND_Pi == NULL && ads_data -> ND_Pix == NULL)
1048       {
1049          if (ads_data -> B_C_cycle_type < 10)
1050          {
1051             hypre_error_w_msg(HYPRE_ERROR_GENERIC,
1052                               "Unsupported AMS cycle type in ADS!");
1053          }
1054          HYPRE_AMSSetCoordinateVectors(ads_data -> B_C,
1055                                        (HYPRE_ParVector) ads_data -> x,
1056                                        (HYPRE_ParVector) ads_data -> y,
1057                                        (HYPRE_ParVector) ads_data -> z);
1058       }
1059       else
1060       {
1061          if ((ads_data -> B_C_cycle_type < 10 && ads_data -> ND_Pi == NULL) ||
1062              (ads_data -> B_C_cycle_type > 10 && ads_data -> ND_Pix == NULL))
1063          {
1064             hypre_error_w_msg(HYPRE_ERROR_GENERIC,
1065                               "Unsupported AMS cycle type in ADS!");
1066          }
1067          HYPRE_AMSSetInterpolations(ads_data -> B_C,
1068                                     (HYPRE_ParCSRMatrix) ads_data -> ND_Pi,
1069                                     (HYPRE_ParCSRMatrix) ads_data -> ND_Pix,
1070                                     (HYPRE_ParCSRMatrix) ads_data -> ND_Piy,
1071                                     (HYPRE_ParCSRMatrix) ads_data -> ND_Piz);
1072       }
1073 
1074       /* beta=0 in the subspace */
1075       HYPRE_AMSSetBetaPoissonMatrix(ads_data -> B_C, NULL);
1076 
1077       /* Reuse A's relaxation parameters for A_C */
1078       HYPRE_AMSSetSmoothingOptions(ads_data -> B_C,
1079                                    ads_data -> A_relax_type,
1080                                    ads_data -> A_relax_times,
1081                                    ads_data -> A_relax_weight,
1082                                    ads_data -> A_omega);
1083 
1084       HYPRE_AMSSetAlphaAMGOptions(ads_data -> B_C, ads_data -> B_C_coarsen_type,
1085                                   ads_data -> B_C_agg_levels, ads_data -> B_C_relax_type,
1086                                   ads_data -> B_C_theta, ads_data -> B_C_interp_type,
1087                                   ads_data -> B_C_Pmax);
1088       /* No need to call HYPRE_AMSSetBetaAMGOptions */
1089 
1090       /* Construct the coarse space matrix by RAP */
1091       if (!ads_data -> A_C)
1092       {
1093          if (!hypre_ParCSRMatrixCommPkg(ads_data -> C))
1094          {
1095             hypre_MatvecCommPkgCreate(ads_data -> C);
1096          }
1097 
1098          if (!hypre_ParCSRMatrixCommPkg(ads_data -> A))
1099          {
1100             hypre_MatvecCommPkgCreate(ads_data -> A);
1101          }
1102 
1103 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1104          if (exec == HYPRE_EXEC_DEVICE)
1105          {
1106             ads_data -> A_C = hypre_ParCSRMatrixRAPKT(ads_data -> C,
1107                                                       ads_data -> A,
1108                                                       ads_data -> C, 1);
1109          }
1110          else
1111 #endif
1112          {
1113             hypre_BoomerAMGBuildCoarseOperator(ads_data -> C,
1114                                                ads_data -> A,
1115                                                ads_data -> C,
1116                                                &ads_data -> A_C);
1117          }
1118 
1119          /* Make sure that A_C has no zero rows (this can happen if beta is zero
1120             in part of the domain). */
1121          hypre_ParCSRMatrixFixZeroRows(ads_data -> A_C);
1122       }
1123 
1124       HYPRE_AMSSetup(ads_data -> B_C, (HYPRE_ParCSRMatrix)ads_data -> A_C, 0, 0);
1125    }
1126 
1127    ams_data = (hypre_AMSData *) ads_data -> B_C;
1128 
1129    if (ads_data -> Pi == NULL && ads_data -> Pix == NULL)
1130    {
1131       if (ads_data -> cycle_type > 10)
1132          /* Construct Pi{x,y,z} instead of Pi = [Pix,Piy,Piz] */
1133          hypre_ADSComputePixyz(ads_data -> A,
1134                                ads_data -> C,
1135                                ads_data -> G,
1136                                ads_data -> x,
1137                                ads_data -> y,
1138                                ads_data -> z,
1139                                ams_data -> Pix,
1140                                ams_data -> Piy,
1141                                ams_data -> Piz,
1142                                &ads_data -> Pix,
1143                                &ads_data -> Piy,
1144                                &ads_data -> Piz);
1145       else
1146          /* Construct the Pi interpolation matrix */
1147          hypre_ADSComputePi(ads_data -> A,
1148                             ads_data -> C,
1149                             ads_data -> G,
1150                             ads_data -> x,
1151                             ads_data -> y,
1152                             ads_data -> z,
1153                             ams_data -> Pix,
1154                             ams_data -> Piy,
1155                             ams_data -> Piz,
1156                             &ads_data -> Pi);
1157    }
1158 
1159    if (ads_data -> cycle_type > 10)
1160    /* Create the AMG solvers on the range of Pi{x,y,z}^T */
1161    {
1162       HYPRE_BoomerAMGCreate(&ads_data -> B_Pix);
1163       HYPRE_BoomerAMGSetCoarsenType(ads_data -> B_Pix, ads_data -> B_Pi_coarsen_type);
1164       HYPRE_BoomerAMGSetAggNumLevels(ads_data -> B_Pix, ads_data -> B_Pi_agg_levels);
1165       HYPRE_BoomerAMGSetRelaxType(ads_data -> B_Pix, ads_data -> B_Pi_relax_type);
1166       HYPRE_BoomerAMGSetNumSweeps(ads_data -> B_Pix, 1);
1167       HYPRE_BoomerAMGSetMaxLevels(ads_data -> B_Pix, 25);
1168       HYPRE_BoomerAMGSetTol(ads_data -> B_Pix, 0.0);
1169       HYPRE_BoomerAMGSetMaxIter(ads_data -> B_Pix, 1);
1170       HYPRE_BoomerAMGSetStrongThreshold(ads_data -> B_Pix, ads_data -> B_Pi_theta);
1171       HYPRE_BoomerAMGSetInterpType(ads_data -> B_Pix, ads_data -> B_Pi_interp_type);
1172       HYPRE_BoomerAMGSetPMaxElmts(ads_data -> B_Pix, ads_data -> B_Pi_Pmax);
1173 
1174       HYPRE_BoomerAMGCreate(&ads_data -> B_Piy);
1175       HYPRE_BoomerAMGSetCoarsenType(ads_data -> B_Piy, ads_data -> B_Pi_coarsen_type);
1176       HYPRE_BoomerAMGSetAggNumLevels(ads_data -> B_Piy, ads_data -> B_Pi_agg_levels);
1177       HYPRE_BoomerAMGSetRelaxType(ads_data -> B_Piy, ads_data -> B_Pi_relax_type);
1178       HYPRE_BoomerAMGSetNumSweeps(ads_data -> B_Piy, 1);
1179       HYPRE_BoomerAMGSetMaxLevels(ads_data -> B_Piy, 25);
1180       HYPRE_BoomerAMGSetTol(ads_data -> B_Piy, 0.0);
1181       HYPRE_BoomerAMGSetMaxIter(ads_data -> B_Piy, 1);
1182       HYPRE_BoomerAMGSetStrongThreshold(ads_data -> B_Piy, ads_data -> B_Pi_theta);
1183       HYPRE_BoomerAMGSetInterpType(ads_data -> B_Piy, ads_data -> B_Pi_interp_type);
1184       HYPRE_BoomerAMGSetPMaxElmts(ads_data -> B_Piy, ads_data -> B_Pi_Pmax);
1185 
1186       HYPRE_BoomerAMGCreate(&ads_data -> B_Piz);
1187       HYPRE_BoomerAMGSetCoarsenType(ads_data -> B_Piz, ads_data -> B_Pi_coarsen_type);
1188       HYPRE_BoomerAMGSetAggNumLevels(ads_data -> B_Piz, ads_data -> B_Pi_agg_levels);
1189       HYPRE_BoomerAMGSetRelaxType(ads_data -> B_Piz, ads_data -> B_Pi_relax_type);
1190       HYPRE_BoomerAMGSetNumSweeps(ads_data -> B_Piz, 1);
1191       HYPRE_BoomerAMGSetMaxLevels(ads_data -> B_Piz, 25);
1192       HYPRE_BoomerAMGSetTol(ads_data -> B_Piz, 0.0);
1193       HYPRE_BoomerAMGSetMaxIter(ads_data -> B_Piz, 1);
1194       HYPRE_BoomerAMGSetStrongThreshold(ads_data -> B_Piz, ads_data -> B_Pi_theta);
1195       HYPRE_BoomerAMGSetInterpType(ads_data -> B_Piz, ads_data -> B_Pi_interp_type);
1196       HYPRE_BoomerAMGSetPMaxElmts(ads_data -> B_Piz, ads_data -> B_Pi_Pmax);
1197 
1198       /* Don't use exact solve on the coarsest level (matrices may be singular) */
1199       HYPRE_BoomerAMGSetCycleRelaxType(ads_data -> B_Pix,
1200                                        ads_data -> B_Pi_relax_type, 3);
1201       HYPRE_BoomerAMGSetCycleRelaxType(ads_data -> B_Piy,
1202                                        ads_data -> B_Pi_relax_type, 3);
1203       HYPRE_BoomerAMGSetCycleRelaxType(ads_data -> B_Piz,
1204                                        ads_data -> B_Pi_relax_type, 3);
1205 
1206       /* Construct the coarse space matrices by RAP */
1207       if (!hypre_ParCSRMatrixCommPkg(ads_data -> Pix))
1208       {
1209          hypre_MatvecCommPkgCreate(ads_data -> Pix);
1210       }
1211 
1212 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1213       if (exec == HYPRE_EXEC_DEVICE)
1214       {
1215          ads_data -> A_Pix = hypre_ParCSRMatrixRAPKT(ads_data -> Pix,
1216                                                      ads_data -> A,
1217                                                      ads_data -> Pix, 1);
1218       }
1219       else
1220 #endif
1221       {
1222          hypre_BoomerAMGBuildCoarseOperator(ads_data -> Pix,
1223                                             ads_data -> A,
1224                                             ads_data -> Pix,
1225                                             &ads_data -> A_Pix);
1226       }
1227 
1228       HYPRE_BoomerAMGSetup(ads_data -> B_Pix,
1229                            (HYPRE_ParCSRMatrix)ads_data -> A_Pix,
1230                            NULL, NULL);
1231 
1232       if (!hypre_ParCSRMatrixCommPkg(ads_data -> Piy))
1233       {
1234          hypre_MatvecCommPkgCreate(ads_data -> Piy);
1235       }
1236 
1237 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1238       if (exec == HYPRE_EXEC_DEVICE)
1239       {
1240          ads_data -> A_Piy = hypre_ParCSRMatrixRAPKT(ads_data -> Piy,
1241                                                      ads_data -> A,
1242                                                      ads_data -> Piy, 1);
1243       }
1244       else
1245 #endif
1246       {
1247          hypre_BoomerAMGBuildCoarseOperator(ads_data -> Piy,
1248                                             ads_data -> A,
1249                                             ads_data -> Piy,
1250                                             &ads_data -> A_Piy);
1251       }
1252 
1253       HYPRE_BoomerAMGSetup(ads_data -> B_Piy,
1254                            (HYPRE_ParCSRMatrix)ads_data -> A_Piy,
1255                            NULL, NULL);
1256 
1257       if (!hypre_ParCSRMatrixCommPkg(ads_data -> Piz))
1258       {
1259          hypre_MatvecCommPkgCreate(ads_data -> Piz);
1260       }
1261 
1262 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1263       if (exec == HYPRE_EXEC_DEVICE)
1264       {
1265          ads_data -> A_Piz = hypre_ParCSRMatrixRAPKT(ads_data -> Piz,
1266                                                      ads_data -> A,
1267                                                      ads_data -> Piz, 1);
1268       }
1269       else
1270 #endif
1271       {
1272          hypre_BoomerAMGBuildCoarseOperator(ads_data -> Piz,
1273                                             ads_data -> A,
1274                                             ads_data -> Piz,
1275                                             &ads_data -> A_Piz);
1276       }
1277 
1278       HYPRE_BoomerAMGSetup(ads_data -> B_Piz,
1279                            (HYPRE_ParCSRMatrix)ads_data -> A_Piz,
1280                            NULL, NULL);
1281    }
1282    else
1283    /* Create the AMG solver on the range of Pi^T */
1284    {
1285       HYPRE_BoomerAMGCreate(&ads_data -> B_Pi);
1286       HYPRE_BoomerAMGSetCoarsenType(ads_data -> B_Pi, ads_data -> B_Pi_coarsen_type);
1287       HYPRE_BoomerAMGSetAggNumLevels(ads_data -> B_Pi, ads_data -> B_Pi_agg_levels);
1288       HYPRE_BoomerAMGSetRelaxType(ads_data -> B_Pi, ads_data -> B_Pi_relax_type);
1289       HYPRE_BoomerAMGSetNumSweeps(ads_data -> B_Pi, 1);
1290       HYPRE_BoomerAMGSetMaxLevels(ads_data -> B_Pi, 25);
1291       HYPRE_BoomerAMGSetTol(ads_data -> B_Pi, 0.0);
1292       HYPRE_BoomerAMGSetMaxIter(ads_data -> B_Pi, 1);
1293       HYPRE_BoomerAMGSetStrongThreshold(ads_data -> B_Pi, ads_data -> B_Pi_theta);
1294       HYPRE_BoomerAMGSetInterpType(ads_data -> B_Pi, ads_data -> B_Pi_interp_type);
1295       HYPRE_BoomerAMGSetPMaxElmts(ads_data -> B_Pi, ads_data -> B_Pi_Pmax);
1296 
1297       /* Don't use exact solve on the coarsest level (matrix may be singular) */
1298       HYPRE_BoomerAMGSetCycleRelaxType(ads_data -> B_Pi,
1299                                        ads_data -> B_Pi_relax_type,
1300                                        3);
1301 
1302       /* Construct the coarse space matrix by RAP and notify BoomerAMG that this
1303          is a 3 x 3 block system. */
1304       if (!ads_data -> A_Pi)
1305       {
1306          if (!hypre_ParCSRMatrixCommPkg(ads_data -> Pi))
1307          {
1308             hypre_MatvecCommPkgCreate(ads_data -> Pi);
1309          }
1310 
1311          if (!hypre_ParCSRMatrixCommPkg(ads_data -> A))
1312          {
1313             hypre_MatvecCommPkgCreate(ads_data -> A);
1314          }
1315 
1316 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1317          if (exec == HYPRE_EXEC_DEVICE)
1318          {
1319             ads_data -> A_Pi = hypre_ParCSRMatrixRAPKT(ads_data -> Pi,
1320                                                        ads_data -> A,
1321                                                        ads_data -> Pi, 1);
1322          }
1323          else
1324 #endif
1325          {
1326             hypre_BoomerAMGBuildCoarseOperator(ads_data -> Pi,
1327                                                ads_data -> A,
1328                                                ads_data -> Pi,
1329                                                &ads_data -> A_Pi);
1330          }
1331 
1332          HYPRE_BoomerAMGSetNumFunctions(ads_data -> B_Pi, 3);
1333          /* HYPRE_BoomerAMGSetNodal(ads_data -> B_Pi, 1); */
1334       }
1335 
1336       HYPRE_BoomerAMGSetup(ads_data -> B_Pi,
1337                            (HYPRE_ParCSRMatrix)ads_data -> A_Pi,
1338                            NULL, NULL);
1339    }
1340 
1341    /* Allocate temporary vectors */
1342    ads_data -> r0 = hypre_ParVectorInRangeOf(ads_data -> A);
1343    ads_data -> g0 = hypre_ParVectorInRangeOf(ads_data -> A);
1344    if (ads_data -> A_C)
1345    {
1346       ads_data -> r1 = hypre_ParVectorInRangeOf(ads_data -> A_C);
1347       ads_data -> g1 = hypre_ParVectorInRangeOf(ads_data -> A_C);
1348    }
1349    if (ads_data -> cycle_type > 10)
1350    {
1351       ads_data -> r2 = hypre_ParVectorInDomainOf(ads_data -> Pix);
1352       ads_data -> g2 = hypre_ParVectorInDomainOf(ads_data -> Pix);
1353    }
1354    else
1355    {
1356       ads_data -> r2 = hypre_ParVectorInDomainOf(ads_data -> Pi);
1357       ads_data -> g2 = hypre_ParVectorInDomainOf(ads_data -> Pi);
1358    }
1359 
1360    return hypre_error_flag;
1361 }
1362 
1363 /*--------------------------------------------------------------------------
1364  * hypre_ADSSolve
1365  *
1366  * Solve the system A x = b.
1367  *--------------------------------------------------------------------------*/
1368 
hypre_ADSSolve(void * solver,hypre_ParCSRMatrix * A,hypre_ParVector * b,hypre_ParVector * x)1369 HYPRE_Int hypre_ADSSolve(void *solver,
1370                          hypre_ParCSRMatrix *A,
1371                          hypre_ParVector *b,
1372                          hypre_ParVector *x)
1373 {
1374    hypre_ADSData *ads_data = (hypre_ADSData *) solver;
1375 
1376    HYPRE_Int i, my_id = -1;
1377    HYPRE_Real r0_norm, r_norm, b_norm, relative_resid = 0, old_resid;
1378 
1379    char cycle[30];
1380    hypre_ParCSRMatrix *Ai[5], *Pi[5];
1381    HYPRE_Solver Bi[5];
1382    HYPRE_PtrToSolverFcn HBi[5];
1383    hypre_ParVector *ri[5], *gi[5];
1384    HYPRE_Int needZ = 0;
1385 
1386    hypre_ParVector *z = ads_data -> zz;
1387 
1388    Ai[0] = ads_data -> A_C;    Pi[0] = ads_data -> C;
1389    Ai[1] = ads_data -> A_Pi;   Pi[1] = ads_data -> Pi;
1390    Ai[2] = ads_data -> A_Pix;  Pi[2] = ads_data -> Pix;
1391    Ai[3] = ads_data -> A_Piy;  Pi[3] = ads_data -> Piy;
1392    Ai[4] = ads_data -> A_Piz;  Pi[4] = ads_data -> Piz;
1393 
1394    Bi[0] = ads_data -> B_C;    HBi[0] = (HYPRE_PtrToSolverFcn) hypre_AMSSolve;
1395    Bi[1] = ads_data -> B_Pi;   HBi[1] = (HYPRE_PtrToSolverFcn) hypre_BoomerAMGBlockSolve;
1396    Bi[2] = ads_data -> B_Pix;  HBi[2] = (HYPRE_PtrToSolverFcn) hypre_BoomerAMGSolve;
1397    Bi[3] = ads_data -> B_Piy;  HBi[3] = (HYPRE_PtrToSolverFcn) hypre_BoomerAMGSolve;
1398    Bi[4] = ads_data -> B_Piz;  HBi[4] = (HYPRE_PtrToSolverFcn) hypre_BoomerAMGSolve;
1399 
1400    ri[0] = ads_data -> r1;     gi[0] = ads_data -> g1;
1401    ri[1] = ads_data -> r2;     gi[1] = ads_data -> g2;
1402    ri[2] = ads_data -> r2;     gi[2] = ads_data -> g2;
1403    ri[3] = ads_data -> r2;     gi[3] = ads_data -> g2;
1404    ri[4] = ads_data -> r2;     gi[4] = ads_data -> g2;
1405 
1406 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1407    HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
1408 #endif
1409 
1410    /* may need to create an additional temporary vector for relaxation */
1411 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1412    if (exec == HYPRE_EXEC_DEVICE)
1413    {
1414       needZ = ads_data -> A_relax_type == 2 || ads_data -> A_relax_type == 4 || ads_data -> A_relax_type == 16;
1415    }
1416    else
1417 #endif
1418    {
1419       needZ = hypre_NumThreads() > 1 || ads_data -> A_relax_type == 16;
1420    }
1421 
1422    if (needZ && !z)
1423    {
1424       z = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
1425                                 hypre_ParCSRMatrixGlobalNumRows(A),
1426                                 hypre_ParCSRMatrixRowStarts(A));
1427       hypre_ParVectorInitialize(z);
1428       ads_data -> zz = z;
1429    }
1430 
1431    if (ads_data -> print_level > 0)
1432       hypre_MPI_Comm_rank(hypre_ParCSRMatrixComm(A), &my_id);
1433 
1434    switch (ads_data -> cycle_type)
1435    {
1436       case 1:
1437       default:
1438          hypre_sprintf(cycle,"%s","01210");
1439          break;
1440       case 2:
1441          hypre_sprintf(cycle,"%s","(0+1+2)");
1442          break;
1443       case 3:
1444          hypre_sprintf(cycle,"%s","02120");
1445          break;
1446       case 4:
1447          hypre_sprintf(cycle,"%s","(010+2)");
1448          break;
1449       case 5:
1450          hypre_sprintf(cycle,"%s","0102010");
1451          break;
1452       case 6:
1453          hypre_sprintf(cycle,"%s","(020+1)");
1454          break;
1455       case 7:
1456          hypre_sprintf(cycle,"%s","0201020");
1457          break;
1458       case 8:
1459          hypre_sprintf(cycle,"%s","0(+1+2)0");
1460          break;
1461       case 9:
1462          hypre_sprintf(cycle,"%s","01210");
1463          break;
1464       case 11:
1465          hypre_sprintf(cycle,"%s","013454310");
1466          break;
1467       case 12:
1468          hypre_sprintf(cycle,"%s","(0+1+3+4+5)");
1469          break;
1470       case 13:
1471          hypre_sprintf(cycle,"%s","034515430");
1472          break;
1473       case 14:
1474          hypre_sprintf(cycle,"%s","01(+3+4+5)10");
1475          break;
1476    }
1477 
1478    for (i = 0; i < ads_data -> maxit; i++)
1479    {
1480       /* Compute initial residual norms */
1481       if (ads_data -> maxit > 1 && i == 0)
1482       {
1483          hypre_ParVectorCopy(b, ads_data -> r0);
1484          hypre_ParCSRMatrixMatvec(-1.0, ads_data -> A, x, 1.0, ads_data -> r0);
1485          r_norm = sqrt(hypre_ParVectorInnerProd(ads_data -> r0,ads_data -> r0));
1486          r0_norm = r_norm;
1487          b_norm = sqrt(hypre_ParVectorInnerProd(b, b));
1488          if (b_norm)
1489             relative_resid = r_norm / b_norm;
1490          else
1491             relative_resid = r_norm;
1492          if (my_id == 0 && ads_data -> print_level > 0)
1493          {
1494             hypre_printf("                                            relative\n");
1495             hypre_printf("               residual        factor       residual\n");
1496             hypre_printf("               --------        ------       --------\n");
1497             hypre_printf("    Initial    %e                 %e\n",
1498                          r_norm, relative_resid);
1499          }
1500       }
1501 
1502       /* Apply the preconditioner */
1503       hypre_ParCSRSubspacePrec(ads_data -> A,
1504                                ads_data -> A_relax_type,
1505                                ads_data -> A_relax_times,
1506                                ads_data -> A_l1_norms ? hypre_VectorData(ads_data -> A_l1_norms) : NULL,
1507                                ads_data -> A_relax_weight,
1508                                ads_data -> A_omega,
1509                                ads_data -> A_max_eig_est,
1510                                ads_data -> A_min_eig_est,
1511                                ads_data -> A_cheby_order,
1512                                ads_data -> A_cheby_fraction,
1513                                Ai, Bi, HBi, Pi, ri, gi,
1514                                b, x,
1515                                ads_data -> r0,
1516                                ads_data -> g0,
1517                                cycle,
1518                                z);
1519 
1520       /* Compute new residual norms */
1521       if (ads_data -> maxit > 1)
1522       {
1523          old_resid = r_norm;
1524          hypre_ParVectorCopy(b, ads_data -> r0);
1525          hypre_ParCSRMatrixMatvec(-1.0, ads_data -> A, x, 1.0, ads_data -> r0);
1526          r_norm = sqrt(hypre_ParVectorInnerProd(ads_data -> r0,ads_data -> r0));
1527          if (b_norm)
1528             relative_resid = r_norm / b_norm;
1529          else
1530             relative_resid = r_norm;
1531          if (my_id == 0 && ads_data -> print_level > 0)
1532             hypre_printf("    Cycle %2d   %e    %f     %e \n",
1533                          i+1, r_norm, r_norm / old_resid, relative_resid);
1534       }
1535 
1536       if (relative_resid < ads_data -> tol)
1537       {
1538          i++;
1539          break;
1540       }
1541    }
1542 
1543    if (my_id == 0 && ads_data -> print_level > 0 && ads_data -> maxit > 1)
1544       hypre_printf("\n\n Average Convergence Factor = %f\n\n",
1545                    pow((r_norm/r0_norm),(1.0/(HYPRE_Real) i)));
1546 
1547    ads_data -> num_iterations = i;
1548    ads_data -> rel_resid_norm = relative_resid;
1549 
1550    if (ads_data -> num_iterations == ads_data -> maxit && ads_data -> tol > 0.0)
1551       hypre_error(HYPRE_ERROR_CONV);
1552 
1553    return hypre_error_flag;
1554 }
1555 
1556 /*--------------------------------------------------------------------------
1557  * hypre_ADSGetNumIterations
1558  *
1559  * Get the number of ADS iterations.
1560  *--------------------------------------------------------------------------*/
1561 
hypre_ADSGetNumIterations(void * solver,HYPRE_Int * num_iterations)1562 HYPRE_Int hypre_ADSGetNumIterations(void *solver,
1563                                     HYPRE_Int *num_iterations)
1564 {
1565    hypre_ADSData *ads_data = (hypre_ADSData *) solver;
1566    *num_iterations = ads_data -> num_iterations;
1567    return hypre_error_flag;
1568 }
1569 
1570 /*--------------------------------------------------------------------------
1571  * hypre_ADSGetFinalRelativeResidualNorm
1572  *
1573  * Get the final relative residual norm in ADS.
1574  *--------------------------------------------------------------------------*/
1575 
hypre_ADSGetFinalRelativeResidualNorm(void * solver,HYPRE_Real * rel_resid_norm)1576 HYPRE_Int hypre_ADSGetFinalRelativeResidualNorm(void *solver,
1577                                                 HYPRE_Real *rel_resid_norm)
1578 {
1579    hypre_ADSData *ads_data = (hypre_ADSData *) solver;
1580    *rel_resid_norm = ads_data -> rel_resid_norm;
1581    return hypre_error_flag;
1582 }
1583