1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 #include "_hypre_parcsr_ls.h"
9 #include "float.h"
10 #include "ams.h"
11 #include "ads.h"
12 #include "_hypre_utilities.hpp"
13
14 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
15 __global__ void hypreCUDAKernel_AMSComputePi_copy1(HYPRE_Int nnz, HYPRE_Int dim, HYPRE_Int *j_in, HYPRE_Int *j_out);
16 __global__ void hypreCUDAKernel_AMSComputePi_copy2(HYPRE_Int nrows, HYPRE_Int dim, HYPRE_Int *i_in, HYPRE_Real *data_in, HYPRE_Real *Gx_data, HYPRE_Real *Gy_data, HYPRE_Real *Gz_data, HYPRE_Real *data_out);
17 __global__ void hypreCUDAKernel_AMSComputePixyz_copy(HYPRE_Int nrows, HYPRE_Int dim, HYPRE_Int *i_in, HYPRE_Real *data_in, HYPRE_Real *Gx_data, HYPRE_Real *Gy_data, HYPRE_Real *Gz_data, HYPRE_Real *data_x_out, HYPRE_Real *data_y_out, HYPRE_Real *data_z_out );
18 #endif
19
20 /*--------------------------------------------------------------------------
21 * hypre_ADSCreate
22 *
23 * Allocate the ADS solver structure.
24 *--------------------------------------------------------------------------*/
25
hypre_ADSCreate()26 void * hypre_ADSCreate()
27 {
28 hypre_ADSData *ads_data;
29
30 ads_data = hypre_CTAlloc(hypre_ADSData, 1, HYPRE_MEMORY_HOST);
31
32 /* Default parameters */
33
34 ads_data -> maxit = 20; /* perform at most 20 iterations */
35 ads_data -> tol = 1e-6; /* convergence tolerance */
36 ads_data -> print_level = 1; /* print residual norm at each step */
37 ads_data -> cycle_type = 1; /* a 3-level multiplicative solver */
38 ads_data -> A_relax_type = 2; /* offd-l1-scaled GS */
39 ads_data -> A_relax_times = 1; /* one relaxation sweep */
40 ads_data -> A_relax_weight = 1.0; /* damping parameter */
41 ads_data -> A_omega = 1.0; /* SSOR coefficient */
42 ads_data -> A_cheby_order = 2; /* Cheby: order (1 -4 are vaild) */
43 ads_data -> A_cheby_fraction = 0.3; /* Cheby: fraction of spectrum to smooth */
44
45 ads_data -> B_C_cycle_type = 11; /* a 5-level multiplicative solver */
46 ads_data -> B_C_coarsen_type = 10; /* HMIS coarsening */
47 ads_data -> B_C_agg_levels = 1; /* Levels of aggressive coarsening */
48 ads_data -> B_C_relax_type = 3; /* hybrid G-S/Jacobi */
49 ads_data -> B_C_theta = 0.25; /* strength threshold */
50 ads_data -> B_C_interp_type = 0; /* interpolation type */
51 ads_data -> B_C_Pmax = 0; /* max nonzero elements in interp. rows */
52 ads_data -> B_Pi_coarsen_type = 10; /* HMIS coarsening */
53 ads_data -> B_Pi_agg_levels = 1; /* Levels of aggressive coarsening */
54 ads_data -> B_Pi_relax_type = 3; /* hybrid G-S/Jacobi */
55 ads_data -> B_Pi_theta = 0.25; /* strength threshold */
56 ads_data -> B_Pi_interp_type = 0; /* interpolation type */
57 ads_data -> B_Pi_Pmax = 0; /* max nonzero elements in interp. rows */
58
59 /* The rest of the fields are initialized using the Set functions */
60
61 ads_data -> A = NULL;
62 ads_data -> C = NULL;
63 ads_data -> A_C = NULL;
64 ads_data -> B_C = 0;
65 ads_data -> Pi = NULL;
66 ads_data -> A_Pi = NULL;
67 ads_data -> B_Pi = 0;
68 ads_data -> Pix = NULL;
69 ads_data -> Piy = NULL;
70 ads_data -> Piz = NULL;
71 ads_data -> A_Pix = NULL;
72 ads_data -> A_Piy = NULL;
73 ads_data -> A_Piz = NULL;
74 ads_data -> B_Pix = 0;
75 ads_data -> B_Piy = 0;
76 ads_data -> B_Piz = 0;
77 ads_data -> G = NULL;
78 ads_data -> x = NULL;
79 ads_data -> y = NULL;
80 ads_data -> z = NULL;
81 ads_data -> zz = NULL;
82
83 ads_data -> r0 = NULL;
84 ads_data -> g0 = NULL;
85 ads_data -> r1 = NULL;
86 ads_data -> g1 = NULL;
87 ads_data -> r2 = NULL;
88 ads_data -> g2 = NULL;
89
90 ads_data -> A_l1_norms = NULL;
91 ads_data -> A_max_eig_est = 0;
92 ads_data -> A_min_eig_est = 0;
93
94 ads_data -> owns_Pi = 1;
95 ads_data -> ND_Pi = NULL;
96 ads_data -> ND_Pix = NULL;
97 ads_data -> ND_Piy = NULL;
98 ads_data -> ND_Piz = NULL;
99
100 return (void *) ads_data;
101 }
102
103 /*--------------------------------------------------------------------------
104 * hypre_ADSDestroy
105 *
106 * Deallocate the ADS solver structure. Note that the input data (given
107 * through the Set functions) is not destroyed.
108 *--------------------------------------------------------------------------*/
109
hypre_ADSDestroy(void * solver)110 HYPRE_Int hypre_ADSDestroy(void *solver)
111 {
112 hypre_ADSData *ads_data = (hypre_ADSData *) solver;
113
114 if (!ads_data)
115 {
116 hypre_error_in_arg(1);
117 return hypre_error_flag;
118 }
119
120 if (ads_data -> A_C)
121 hypre_ParCSRMatrixDestroy(ads_data -> A_C);
122 if (ads_data -> B_C)
123 HYPRE_AMSDestroy(ads_data -> B_C);
124
125 if (ads_data -> owns_Pi && ads_data -> Pi)
126 hypre_ParCSRMatrixDestroy(ads_data -> Pi);
127 if (ads_data -> A_Pi)
128 hypre_ParCSRMatrixDestroy(ads_data -> A_Pi);
129 if (ads_data -> B_Pi)
130 HYPRE_BoomerAMGDestroy(ads_data -> B_Pi);
131
132 if (ads_data -> owns_Pi && ads_data -> Pix)
133 hypre_ParCSRMatrixDestroy(ads_data -> Pix);
134 if (ads_data -> A_Pix)
135 hypre_ParCSRMatrixDestroy(ads_data -> A_Pix);
136 if (ads_data -> B_Pix)
137 HYPRE_BoomerAMGDestroy(ads_data -> B_Pix);
138 if (ads_data -> owns_Pi && ads_data -> Piy)
139 hypre_ParCSRMatrixDestroy(ads_data -> Piy);
140 if (ads_data -> A_Piy)
141 hypre_ParCSRMatrixDestroy(ads_data -> A_Piy);
142 if (ads_data -> B_Piy)
143 HYPRE_BoomerAMGDestroy(ads_data -> B_Piy);
144 if (ads_data -> owns_Pi && ads_data -> Piz)
145 hypre_ParCSRMatrixDestroy(ads_data -> Piz);
146 if (ads_data -> A_Piz)
147 hypre_ParCSRMatrixDestroy(ads_data -> A_Piz);
148 if (ads_data -> B_Piz)
149 HYPRE_BoomerAMGDestroy(ads_data -> B_Piz);
150
151 if (ads_data -> r0)
152 hypre_ParVectorDestroy(ads_data -> r0);
153 if (ads_data -> g0)
154 hypre_ParVectorDestroy(ads_data -> g0);
155 if (ads_data -> r1)
156 hypre_ParVectorDestroy(ads_data -> r1);
157 if (ads_data -> g1)
158 hypre_ParVectorDestroy(ads_data -> g1);
159 if (ads_data -> r2)
160 hypre_ParVectorDestroy(ads_data -> r2);
161 if (ads_data -> g2)
162 hypre_ParVectorDestroy(ads_data -> g2);
163 if (ads_data -> zz)
164 hypre_ParVectorDestroy(ads_data -> zz);
165
166 hypre_SeqVectorDestroy(ads_data -> A_l1_norms);
167
168 /* C, G, x, y and z are not destroyed */
169
170 if (ads_data)
171 hypre_TFree(ads_data, HYPRE_MEMORY_HOST);
172
173 return hypre_error_flag;
174 }
175
176 /*--------------------------------------------------------------------------
177 * hypre_ADSSetDiscreteCurl
178 *
179 * Set the discrete curl matrix C.
180 * This function should be called before hypre_ADSSetup()!
181 *--------------------------------------------------------------------------*/
182
hypre_ADSSetDiscreteCurl(void * solver,hypre_ParCSRMatrix * C)183 HYPRE_Int hypre_ADSSetDiscreteCurl(void *solver,
184 hypre_ParCSRMatrix *C)
185 {
186 hypre_ADSData *ads_data = (hypre_ADSData *) solver;
187 ads_data -> C = C;
188 return hypre_error_flag;
189 }
190
191 /*--------------------------------------------------------------------------
192 * hypre_ADSSetDiscreteGradient
193 *
194 * Set the discrete gradient matrix G.
195 * This function should be called before hypre_ADSSetup()!
196 *--------------------------------------------------------------------------*/
197
hypre_ADSSetDiscreteGradient(void * solver,hypre_ParCSRMatrix * G)198 HYPRE_Int hypre_ADSSetDiscreteGradient(void *solver,
199 hypre_ParCSRMatrix *G)
200 {
201 hypre_ADSData *ads_data = (hypre_ADSData *) solver;
202 ads_data -> G = G;
203 return hypre_error_flag;
204 }
205
206 /*--------------------------------------------------------------------------
207 * hypre_ADSSetCoordinateVectors
208 *
209 * Set the x, y and z coordinates of the vertices in the mesh.
210 * This function should be called before hypre_ADSSetup()!
211 *--------------------------------------------------------------------------*/
212
hypre_ADSSetCoordinateVectors(void * solver,hypre_ParVector * x,hypre_ParVector * y,hypre_ParVector * z)213 HYPRE_Int hypre_ADSSetCoordinateVectors(void *solver,
214 hypre_ParVector *x,
215 hypre_ParVector *y,
216 hypre_ParVector *z)
217 {
218 hypre_ADSData *ads_data = (hypre_ADSData *) solver;
219 ads_data -> x = x;
220 ads_data -> y = y;
221 ads_data -> z = z;
222 return hypre_error_flag;
223 }
224
225 /*--------------------------------------------------------------------------
226 * hypre_ADSSetInterpolations
227 *
228 * Set the (components of) the Raviart-Thomas (RT_Pi) and the Nedelec (ND_Pi)
229 * interpolation matrices.
230 *
231 * This function is generally intended to be used only for high-order H(div)
232 * discretizations (in the lowest order case, these matrices are constructed
233 * internally in ADS from the discreet gradient and curl matrices and the
234 * coordinates of the vertices), though it can also be used in the lowest-order
235 * case or for other types of discretizations.
236 *
237 * By definition, RT_Pi and ND_Pi are the matrix representations of the linear
238 * operators that interpolate (high-order) vector nodal finite elements into the
239 * (high-order) Raviart-Thomas and Nedelec spaces. The component matrices are
240 * defined in both cases as Pix phi = Pi (phi,0,0) and similarly for Piy and
241 * Piz. Note that all these operators depend on the choice of the basis and
242 * degrees of freedom in the high-order spaces.
243 *
244 * The column numbering of RT_Pi and ND_Pi should be node-based, i.e. the x/y/z
245 * components of the first node (vertex or high-order dof) should be listed
246 * first, followed by the x/y/z components of the second node and so on (see the
247 * documentation of HYPRE_BoomerAMGSetDofFunc).
248 *
249 * If used, this function should be called before hypre_ADSSetup() and there is
250 * no need to provide the vertex coordinates. Furthermore, only one of the sets
251 * {RT_Pi} and {RT_Pix,RT_Piy,RT_Piz} needs to be specified (though it is OK to
252 * provide both). If RT_Pix is NULL, then scalar Pi-based ADS cycles, i.e.
253 * those with cycle_type > 10, will be unavailable. Similarly, ADS cycles based
254 * on monolithic Pi (cycle_type < 10) require that RT_Pi is not NULL. The same
255 * restrictions hold for the sets {ND_Pi} and {ND_Pix,ND_Piy,ND_Piz} -- only one
256 * of them needs to be specified, and the availability of each enables different
257 * AMS cycle type options for the subspace solve.
258 *--------------------------------------------------------------------------*/
259
hypre_ADSSetInterpolations(void * solver,hypre_ParCSRMatrix * RT_Pi,hypre_ParCSRMatrix * RT_Pix,hypre_ParCSRMatrix * RT_Piy,hypre_ParCSRMatrix * RT_Piz,hypre_ParCSRMatrix * ND_Pi,hypre_ParCSRMatrix * ND_Pix,hypre_ParCSRMatrix * ND_Piy,hypre_ParCSRMatrix * ND_Piz)260 HYPRE_Int hypre_ADSSetInterpolations(void *solver,
261 hypre_ParCSRMatrix *RT_Pi,
262 hypre_ParCSRMatrix *RT_Pix,
263 hypre_ParCSRMatrix *RT_Piy,
264 hypre_ParCSRMatrix *RT_Piz,
265 hypre_ParCSRMatrix *ND_Pi,
266 hypre_ParCSRMatrix *ND_Pix,
267 hypre_ParCSRMatrix *ND_Piy,
268 hypre_ParCSRMatrix *ND_Piz)
269 {
270 hypre_ADSData *ads_data = (hypre_ADSData *) solver;
271 ads_data -> Pi = RT_Pi;
272 ads_data -> Pix = RT_Pix;
273 ads_data -> Piy = RT_Piy;
274 ads_data -> Piz = RT_Piz;
275 ads_data -> ND_Pi = ND_Pi;
276 ads_data -> ND_Pix = ND_Pix;
277 ads_data -> ND_Piy = ND_Piy;
278 ads_data -> ND_Piz = ND_Piz;
279 ads_data -> owns_Pi = 0;
280 return hypre_error_flag;
281 }
282
283 /*--------------------------------------------------------------------------
284 * hypre_ADSSetMaxIter
285 *
286 * Set the maximum number of iterations in the auxiliary-space method.
287 * The default value is 20. To use the ADS solver as a preconditioner,
288 * set maxit to 1, tol to 0.0 and print_level to 0.
289 *--------------------------------------------------------------------------*/
290
hypre_ADSSetMaxIter(void * solver,HYPRE_Int maxit)291 HYPRE_Int hypre_ADSSetMaxIter(void *solver,
292 HYPRE_Int maxit)
293 {
294 hypre_ADSData *ads_data = (hypre_ADSData *) solver;
295 ads_data -> maxit = maxit;
296 return hypre_error_flag;
297 }
298
299 /*--------------------------------------------------------------------------
300 * hypre_ADSSetTol
301 *
302 * Set the convergence tolerance (if the method is used as a solver).
303 * The default value is 1e-6.
304 *--------------------------------------------------------------------------*/
305
hypre_ADSSetTol(void * solver,HYPRE_Real tol)306 HYPRE_Int hypre_ADSSetTol(void *solver,
307 HYPRE_Real tol)
308 {
309 hypre_ADSData *ads_data = (hypre_ADSData *) solver;
310 ads_data -> tol = tol;
311 return hypre_error_flag;
312 }
313
314 /*--------------------------------------------------------------------------
315 * hypre_ADSSetCycleType
316 *
317 * Choose which three-level solver to use. Possible values are:
318 *
319 * 1 = 3-level multipl. solver (01210) <-- small solution time
320 * 2 = 3-level additive solver (0+1+2)
321 * 3 = 3-level multipl. solver (02120)
322 * 4 = 3-level additive solver (010+2)
323 * 5 = 3-level multipl. solver (0102010) <-- small solution time
324 * 6 = 3-level additive solver (1+020)
325 * 7 = 3-level multipl. solver (0201020) <-- small number of iterations
326 * 8 = 3-level additive solver (0(1+2)0) <-- small solution time
327 * 9 = 3-level multipl. solver (01210) with discrete divergence
328 * 11 = 5-level multipl. solver (013454310) <-- small solution time, memory
329 * 12 = 5-level additive solver (0+1+3+4+5)
330 * 13 = 5-level multipl. solver (034515430) <-- small solution time, memory
331 * 14 = 5-level additive solver (01(3+4+5)10)
332 *
333 * The default value is 1.
334 *--------------------------------------------------------------------------*/
335
hypre_ADSSetCycleType(void * solver,HYPRE_Int cycle_type)336 HYPRE_Int hypre_ADSSetCycleType(void *solver,
337 HYPRE_Int cycle_type)
338 {
339 hypre_ADSData *ads_data = (hypre_ADSData *) solver;
340 ads_data -> cycle_type = cycle_type;
341 return hypre_error_flag;
342 }
343
344 /*--------------------------------------------------------------------------
345 * hypre_ADSSetPrintLevel
346 *
347 * Control how much information is printed during the solution iterations.
348 * The defaut values is 1 (print residual norm at each step).
349 *--------------------------------------------------------------------------*/
350
hypre_ADSSetPrintLevel(void * solver,HYPRE_Int print_level)351 HYPRE_Int hypre_ADSSetPrintLevel(void *solver,
352 HYPRE_Int print_level)
353 {
354 hypre_ADSData *ads_data = (hypre_ADSData *) solver;
355 ads_data -> print_level = print_level;
356 return hypre_error_flag;
357 }
358
359 /*--------------------------------------------------------------------------
360 * hypre_ADSSetSmoothingOptions
361 *
362 * Set relaxation parameters for A. Default values: 2, 1, 1.0, 1.0.
363 *--------------------------------------------------------------------------*/
364
hypre_ADSSetSmoothingOptions(void * solver,HYPRE_Int A_relax_type,HYPRE_Int A_relax_times,HYPRE_Real A_relax_weight,HYPRE_Real A_omega)365 HYPRE_Int hypre_ADSSetSmoothingOptions(void *solver,
366 HYPRE_Int A_relax_type,
367 HYPRE_Int A_relax_times,
368 HYPRE_Real A_relax_weight,
369 HYPRE_Real A_omega)
370 {
371 hypre_ADSData *ads_data = (hypre_ADSData *) solver;
372 ads_data -> A_relax_type = A_relax_type;
373 ads_data -> A_relax_times = A_relax_times;
374 ads_data -> A_relax_weight = A_relax_weight;
375 ads_data -> A_omega = A_omega;
376 return hypre_error_flag;
377 }
378
379 /*--------------------------------------------------------------------------
380 * hypre_ADSSetChebySmoothingOptions
381 *
382 * Set parameters for Chebyshev relaxation. Default values: 2, 0.3.
383 *--------------------------------------------------------------------------*/
384
hypre_ADSSetChebySmoothingOptions(void * solver,HYPRE_Int A_cheby_order,HYPRE_Int A_cheby_fraction)385 HYPRE_Int hypre_ADSSetChebySmoothingOptions(void *solver,
386 HYPRE_Int A_cheby_order,
387 HYPRE_Int A_cheby_fraction)
388 {
389 hypre_ADSData *ads_data = (hypre_ADSData *) solver;
390 ads_data -> A_cheby_order = A_cheby_order;
391 ads_data -> A_cheby_fraction = A_cheby_fraction;
392
393 return hypre_error_flag;
394 }
395
396 /*--------------------------------------------------------------------------
397 * hypre_ADSSetAMSOptions
398 *
399 * Set AMS parameters for B_C. Default values: 11, 10, 1, 3, 0.25, 0, 0.
400 *
401 * Note that B_C_cycle_type should be greater than 10, unless the high-order
402 * interface of hypre_ADSSetInterpolations is being used!
403 *--------------------------------------------------------------------------*/
404
hypre_ADSSetAMSOptions(void * solver,HYPRE_Int B_C_cycle_type,HYPRE_Int B_C_coarsen_type,HYPRE_Int B_C_agg_levels,HYPRE_Int B_C_relax_type,HYPRE_Real B_C_theta,HYPRE_Int B_C_interp_type,HYPRE_Int B_C_Pmax)405 HYPRE_Int hypre_ADSSetAMSOptions(void *solver,
406 HYPRE_Int B_C_cycle_type,
407 HYPRE_Int B_C_coarsen_type,
408 HYPRE_Int B_C_agg_levels,
409 HYPRE_Int B_C_relax_type,
410 HYPRE_Real B_C_theta,
411 HYPRE_Int B_C_interp_type,
412 HYPRE_Int B_C_Pmax)
413 {
414 hypre_ADSData *ads_data = (hypre_ADSData *) solver;
415 ads_data -> B_C_cycle_type = B_C_cycle_type;
416 ads_data -> B_C_coarsen_type = B_C_coarsen_type;
417 ads_data -> B_C_agg_levels = B_C_agg_levels;
418 ads_data -> B_C_relax_type = B_C_relax_type;
419 ads_data -> B_C_theta = B_C_theta;
420 ads_data -> B_C_interp_type = B_C_interp_type;
421 ads_data -> B_C_Pmax = B_C_Pmax;
422
423 return hypre_error_flag;
424 }
425
426 /*--------------------------------------------------------------------------
427 * hypre_ADSSetAMGOptions
428 *
429 * Set AMG parameters for B_Pi. Default values: 10, 1, 3, 0.25, 0, 0.
430 *--------------------------------------------------------------------------*/
431
hypre_ADSSetAMGOptions(void * solver,HYPRE_Int B_Pi_coarsen_type,HYPRE_Int B_Pi_agg_levels,HYPRE_Int B_Pi_relax_type,HYPRE_Real B_Pi_theta,HYPRE_Int B_Pi_interp_type,HYPRE_Int B_Pi_Pmax)432 HYPRE_Int hypre_ADSSetAMGOptions(void *solver,
433 HYPRE_Int B_Pi_coarsen_type,
434 HYPRE_Int B_Pi_agg_levels,
435 HYPRE_Int B_Pi_relax_type,
436 HYPRE_Real B_Pi_theta,
437 HYPRE_Int B_Pi_interp_type,
438 HYPRE_Int B_Pi_Pmax)
439 {
440 hypre_ADSData *ads_data = (hypre_ADSData *) solver;
441 ads_data -> B_Pi_coarsen_type = B_Pi_coarsen_type;
442 ads_data -> B_Pi_agg_levels = B_Pi_agg_levels;
443 ads_data -> B_Pi_relax_type = B_Pi_relax_type;
444 ads_data -> B_Pi_theta = B_Pi_theta;
445 ads_data -> B_Pi_interp_type = B_Pi_interp_type;
446 ads_data -> B_Pi_Pmax = B_Pi_Pmax;
447 return hypre_error_flag;
448 }
449
450 /*--------------------------------------------------------------------------
451 * hypre_ADSComputePi
452 *
453 * Construct the Pi interpolation matrix, which maps the space of vector
454 * linear finite elements to the space of face finite elements.
455 *
456 * The construction is based on the fact that Pi = [Pi_x, Pi_y, Pi_z], where
457 * each block has the same sparsity structure as C*G, with entries that can be
458 * computed from the vectors RT100, RT010 and RT001.
459 *
460 * We assume a constant number of vertices per face (no prisms or pyramids).
461 *--------------------------------------------------------------------------*/
462
hypre_ADSComputePi(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * C,hypre_ParCSRMatrix * G,hypre_ParVector * x,hypre_ParVector * y,hypre_ParVector * z,hypre_ParCSRMatrix * PiNDx,hypre_ParCSRMatrix * PiNDy,hypre_ParCSRMatrix * PiNDz,hypre_ParCSRMatrix ** Pi_ptr)463 HYPRE_Int hypre_ADSComputePi(hypre_ParCSRMatrix *A,
464 hypre_ParCSRMatrix *C,
465 hypre_ParCSRMatrix *G,
466 hypre_ParVector *x,
467 hypre_ParVector *y,
468 hypre_ParVector *z,
469 hypre_ParCSRMatrix *PiNDx,
470 hypre_ParCSRMatrix *PiNDy,
471 hypre_ParCSRMatrix *PiNDz,
472 hypre_ParCSRMatrix **Pi_ptr)
473 {
474 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
475 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
476 #endif
477
478 hypre_ParCSRMatrix *Pi;
479
480 /* Compute the representations of the coordinate vectors, RT100, RT010 and
481 RT001, in the Raviart-Thomas space, by observing that the RT coordinates
482 of (1,0,0) = -curl (0,z,0) are given by C*PiNDy*z, etc. (We ignore the
483 minus sign since it is irrelevant for the coarse-grid correction.) */
484 hypre_ParVector *RT100, *RT010, *RT001;
485 {
486 hypre_ParVector *PiNDlin = hypre_ParVectorInRangeOf(PiNDx);
487
488 RT100 = hypre_ParVectorInRangeOf(C);
489 hypre_ParCSRMatrixMatvec(1.0, PiNDy, z, 0.0, PiNDlin);
490 hypre_ParCSRMatrixMatvec(1.0, C, PiNDlin, 0.0, RT100);
491 RT010 = hypre_ParVectorInRangeOf(C);
492 hypre_ParCSRMatrixMatvec(1.0, PiNDz, x, 0.0, PiNDlin);
493 hypre_ParCSRMatrixMatvec(1.0, C, PiNDlin, 0.0, RT010);
494 RT001 = hypre_ParVectorInRangeOf(C);
495 hypre_ParCSRMatrixMatvec(1.0, PiNDx, y, 0.0, PiNDlin);
496 hypre_ParCSRMatrixMatvec(1.0, C, PiNDlin, 0.0, RT001);
497
498 hypre_ParVectorDestroy(PiNDlin);
499 }
500
501 /* Compute Pi = [Pi_x, Pi_y, Pi_z] */
502 {
503 HYPRE_Int i, j, d;
504
505 HYPRE_Real *RT100_data = hypre_VectorData(hypre_ParVectorLocalVector(RT100));
506 HYPRE_Real *RT010_data = hypre_VectorData(hypre_ParVectorLocalVector(RT010));
507 HYPRE_Real *RT001_data = hypre_VectorData(hypre_ParVectorLocalVector(RT001));
508
509 /* Each component of Pi has the sparsity pattern of the topological
510 face-to-vertex matrix. */
511 hypre_ParCSRMatrix *F2V;
512 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
513 if (exec == HYPRE_EXEC_DEVICE)
514 {
515 F2V = hypre_ParCSRMatMat(C, G);
516 }
517 else
518 #endif
519 {
520 F2V = hypre_ParMatmul(C, G);
521 }
522
523 /* Create the parallel interpolation matrix */
524 {
525 MPI_Comm comm = hypre_ParCSRMatrixComm(F2V);
526 HYPRE_BigInt global_num_rows = hypre_ParCSRMatrixGlobalNumRows(F2V);
527 HYPRE_BigInt global_num_cols = 3*hypre_ParCSRMatrixGlobalNumCols(F2V);
528 HYPRE_BigInt *row_starts = hypre_ParCSRMatrixRowStarts(F2V);
529 HYPRE_BigInt *col_starts;
530 HYPRE_Int col_starts_size;
531 HYPRE_Int num_cols_offd = 3*hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(F2V));
532 HYPRE_Int num_nonzeros_diag = 3*hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(F2V));
533 HYPRE_Int num_nonzeros_offd = 3*hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(F2V));
534 HYPRE_BigInt *col_starts_F2V = hypre_ParCSRMatrixColStarts(F2V);
535 col_starts_size = 2;
536 col_starts = hypre_TAlloc(HYPRE_BigInt, col_starts_size, HYPRE_MEMORY_HOST);
537 for (i = 0; i < col_starts_size; i++)
538 col_starts[i] = 3 * col_starts_F2V[i];
539
540 Pi = hypre_ParCSRMatrixCreate(comm,
541 global_num_rows,
542 global_num_cols,
543 row_starts,
544 col_starts,
545 num_cols_offd,
546 num_nonzeros_diag,
547 num_nonzeros_offd);
548
549 hypre_ParCSRMatrixOwnsData(Pi) = 1;
550 hypre_ParCSRMatrixInitialize(Pi);
551 }
552
553 /* Fill-in the diagonal part */
554 {
555 hypre_CSRMatrix *F2V_diag = hypre_ParCSRMatrixDiag(F2V);
556 HYPRE_Int *F2V_diag_I = hypre_CSRMatrixI(F2V_diag);
557 HYPRE_Int *F2V_diag_J = hypre_CSRMatrixJ(F2V_diag);
558
559 HYPRE_Int F2V_diag_nrows = hypre_CSRMatrixNumRows(F2V_diag);
560 HYPRE_Int F2V_diag_nnz = hypre_CSRMatrixNumNonzeros(F2V_diag);
561
562 hypre_CSRMatrix *Pi_diag = hypre_ParCSRMatrixDiag(Pi);
563 HYPRE_Int *Pi_diag_I = hypre_CSRMatrixI(Pi_diag);
564 HYPRE_Int *Pi_diag_J = hypre_CSRMatrixJ(Pi_diag);
565 HYPRE_Real *Pi_diag_data = hypre_CSRMatrixData(Pi_diag);
566
567 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
568 if (exec == HYPRE_EXEC_DEVICE)
569 {
570 HYPRE_THRUST_CALL( transform,
571 F2V_diag_I,
572 F2V_diag_I + F2V_diag_nrows + 1,
573 Pi_diag_I,
574 3 * _1 );
575
576 dim3 bDim = hypre_GetDefaultCUDABlockDimension();
577 dim3 gDim = hypre_GetDefaultCUDAGridDimension(F2V_diag_nnz, "thread", bDim);
578
579 HYPRE_CUDA_LAUNCH( hypreCUDAKernel_AMSComputePi_copy1, gDim, bDim,
580 F2V_diag_nnz, 3, F2V_diag_J, Pi_diag_J );
581
582 gDim = hypre_GetDefaultCUDAGridDimension(F2V_diag_nrows, "warp", bDim);
583
584 HYPRE_CUDA_LAUNCH( hypreCUDAKernel_AMSComputePi_copy2, gDim, bDim,
585 F2V_diag_nrows, 3, F2V_diag_I, NULL, RT100_data, RT010_data, RT001_data,
586 Pi_diag_data );
587 }
588 else
589 #endif
590 {
591 for (i = 0; i < F2V_diag_nrows+1; i++)
592 Pi_diag_I[i] = 3 * F2V_diag_I[i];
593
594 for (i = 0; i < F2V_diag_nnz; i++)
595 for (d = 0; d < 3; d++)
596 Pi_diag_J[3*i+d] = 3*F2V_diag_J[i]+d;
597
598 for (i = 0; i < F2V_diag_nrows; i++)
599 for (j = F2V_diag_I[i]; j < F2V_diag_I[i+1]; j++)
600 {
601 *Pi_diag_data++ = RT100_data[i];
602 *Pi_diag_data++ = RT010_data[i];
603 *Pi_diag_data++ = RT001_data[i];
604 }
605 }
606 }
607
608 /* Fill-in the off-diagonal part */
609 {
610 hypre_CSRMatrix *F2V_offd = hypre_ParCSRMatrixOffd(F2V);
611 HYPRE_Int *F2V_offd_I = hypre_CSRMatrixI(F2V_offd);
612 HYPRE_Int *F2V_offd_J = hypre_CSRMatrixJ(F2V_offd);
613
614 HYPRE_Int F2V_offd_nrows = hypre_CSRMatrixNumRows(F2V_offd);
615 HYPRE_Int F2V_offd_ncols = hypre_CSRMatrixNumCols(F2V_offd);
616 HYPRE_Int F2V_offd_nnz = hypre_CSRMatrixNumNonzeros(F2V_offd);
617
618 hypre_CSRMatrix *Pi_offd = hypre_ParCSRMatrixOffd(Pi);
619 HYPRE_Int *Pi_offd_I = hypre_CSRMatrixI(Pi_offd);
620 HYPRE_Int *Pi_offd_J = hypre_CSRMatrixJ(Pi_offd);
621 HYPRE_Real *Pi_offd_data = hypre_CSRMatrixData(Pi_offd);
622
623 HYPRE_BigInt *F2V_cmap = hypre_ParCSRMatrixColMapOffd(F2V);
624 HYPRE_BigInt *Pi_cmap = hypre_ParCSRMatrixColMapOffd(Pi);
625
626 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
627 if (exec == HYPRE_EXEC_DEVICE)
628 {
629 if (F2V_offd_ncols)
630 {
631 HYPRE_THRUST_CALL( transform,
632 F2V_offd_I,
633 F2V_offd_I + F2V_offd_nrows + 1,
634 Pi_offd_I,
635 3 * _1 );
636 }
637
638 dim3 bDim = hypre_GetDefaultCUDABlockDimension();
639 dim3 gDim = hypre_GetDefaultCUDAGridDimension(F2V_offd_nnz, "thread", bDim);
640
641 HYPRE_CUDA_LAUNCH( hypreCUDAKernel_AMSComputePi_copy1, gDim, bDim,
642 F2V_offd_nnz, 3, F2V_offd_J, Pi_offd_J );
643
644 gDim = hypre_GetDefaultCUDAGridDimension(F2V_offd_nrows, "warp", bDim);
645
646 HYPRE_CUDA_LAUNCH( hypreCUDAKernel_AMSComputePi_copy2, gDim, bDim,
647 F2V_offd_nrows, 3, F2V_offd_I, NULL, RT100_data, RT010_data, RT001_data,
648 Pi_offd_data );
649 }
650 else
651 #endif
652 {
653 if (F2V_offd_ncols)
654 for (i = 0; i < F2V_offd_nrows+1; i++)
655 Pi_offd_I[i] = 3 * F2V_offd_I[i];
656
657 for (i = 0; i < F2V_offd_nnz; i++)
658 for (d = 0; d < 3; d++)
659 Pi_offd_J[3*i+d] = 3*F2V_offd_J[i]+d;
660
661 for (i = 0; i < F2V_offd_nrows; i++)
662 for (j = F2V_offd_I[i]; j < F2V_offd_I[i+1]; j++)
663 {
664 *Pi_offd_data++ = RT100_data[i];
665 *Pi_offd_data++ = RT010_data[i];
666 *Pi_offd_data++ = RT001_data[i];
667 }
668 }
669
670 for (i = 0; i < F2V_offd_ncols; i++)
671 for (d = 0; d < 3; d++)
672 Pi_cmap[3*i+d] = 3*F2V_cmap[i]+(HYPRE_BigInt)d;
673 }
674
675 hypre_ParCSRMatrixDestroy(F2V);
676 }
677
678 hypre_ParVectorDestroy(RT100);
679 hypre_ParVectorDestroy(RT010);
680 hypre_ParVectorDestroy(RT001);
681
682 *Pi_ptr = Pi;
683
684 return hypre_error_flag;
685 }
686
687 /*--------------------------------------------------------------------------
688 * hypre_ADSComputePixyz
689 *
690 * Construct the components Pix, Piy, Piz of the interpolation matrix Pi, which
691 * maps the space of vector linear finite elements to the space of face finite
692 * elements.
693 *
694 * The construction is based on the fact that each component has the same
695 * sparsity structure as the matrix C*G, with entries that can be computed from
696 * the vectors RT100, RT010 and RT001.
697 *
698 * We assume a constant number of vertices per face (no prisms or pyramids).
699 *--------------------------------------------------------------------------*/
700
hypre_ADSComputePixyz(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * C,hypre_ParCSRMatrix * G,hypre_ParVector * x,hypre_ParVector * y,hypre_ParVector * z,hypre_ParCSRMatrix * PiNDx,hypre_ParCSRMatrix * PiNDy,hypre_ParCSRMatrix * PiNDz,hypre_ParCSRMatrix ** Pix_ptr,hypre_ParCSRMatrix ** Piy_ptr,hypre_ParCSRMatrix ** Piz_ptr)701 HYPRE_Int hypre_ADSComputePixyz(hypre_ParCSRMatrix *A,
702 hypre_ParCSRMatrix *C,
703 hypre_ParCSRMatrix *G,
704 hypre_ParVector *x,
705 hypre_ParVector *y,
706 hypre_ParVector *z,
707 hypre_ParCSRMatrix *PiNDx,
708 hypre_ParCSRMatrix *PiNDy,
709 hypre_ParCSRMatrix *PiNDz,
710 hypre_ParCSRMatrix **Pix_ptr,
711 hypre_ParCSRMatrix **Piy_ptr,
712 hypre_ParCSRMatrix **Piz_ptr)
713 {
714 hypre_ParCSRMatrix *Pix, *Piy, *Piz;
715
716 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
717 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
718 #endif
719
720 /* Compute the representations of the coordinate vectors, RT100, RT010 and
721 RT001, in the Raviart-Thomas space, by observing that the RT coordinates
722 of (1,0,0) = -curl (0,z,0) are given by C*PiNDy*z, etc. (We ignore the
723 minus sign since it is irrelevant for the coarse-grid correction.) */
724 hypre_ParVector *RT100, *RT010, *RT001;
725 {
726 hypre_ParVector *PiNDlin = hypre_ParVectorInRangeOf(PiNDx);
727
728 RT100 = hypre_ParVectorInRangeOf(C);
729 hypre_ParCSRMatrixMatvec(1.0, PiNDy, z, 0.0, PiNDlin);
730 hypre_ParCSRMatrixMatvec(1.0, C, PiNDlin, 0.0, RT100);
731 RT010 = hypre_ParVectorInRangeOf(C);
732 hypre_ParCSRMatrixMatvec(1.0, PiNDz, x, 0.0, PiNDlin);
733 hypre_ParCSRMatrixMatvec(1.0, C, PiNDlin, 0.0, RT010);
734 RT001 = hypre_ParVectorInRangeOf(C);
735 hypre_ParCSRMatrixMatvec(1.0, PiNDx, y, 0.0, PiNDlin);
736 hypre_ParCSRMatrixMatvec(1.0, C, PiNDlin, 0.0, RT001);
737
738 hypre_ParVectorDestroy(PiNDlin);
739 }
740
741 /* Compute Pix, Piy, Piz */
742 {
743 HYPRE_Int i, j;
744
745 HYPRE_Real *RT100_data = hypre_VectorData(hypre_ParVectorLocalVector(RT100));
746 HYPRE_Real *RT010_data = hypre_VectorData(hypre_ParVectorLocalVector(RT010));
747 HYPRE_Real *RT001_data = hypre_VectorData(hypre_ParVectorLocalVector(RT001));
748
749 /* Each component of Pi has the sparsity pattern of the topological
750 face-to-vertex matrix. */
751 hypre_ParCSRMatrix *F2V;
752
753 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
754 if (exec == HYPRE_EXEC_DEVICE)
755 {
756 F2V = hypre_ParCSRMatMat(C, G);
757 }
758 else
759 #endif
760 {
761 F2V = hypre_ParMatmul(C, G);
762 }
763
764 /* Create the components of the parallel interpolation matrix */
765 {
766 MPI_Comm comm = hypre_ParCSRMatrixComm(F2V);
767 HYPRE_BigInt global_num_rows = hypre_ParCSRMatrixGlobalNumRows(F2V);
768 HYPRE_BigInt global_num_cols = hypre_ParCSRMatrixGlobalNumCols(F2V);
769 HYPRE_BigInt *row_starts = hypre_ParCSRMatrixRowStarts(F2V);
770 HYPRE_BigInt *col_starts = hypre_ParCSRMatrixColStarts(F2V);
771 HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(F2V));
772 HYPRE_Int num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(F2V));
773 HYPRE_Int num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(F2V));
774
775 Pix = hypre_ParCSRMatrixCreate(comm,
776 global_num_rows,
777 global_num_cols,
778 row_starts,
779 col_starts,
780 num_cols_offd,
781 num_nonzeros_diag,
782 num_nonzeros_offd);
783 hypre_ParCSRMatrixOwnsData(Pix) = 1;
784 hypre_ParCSRMatrixInitialize(Pix);
785
786 Piy = hypre_ParCSRMatrixCreate(comm,
787 global_num_rows,
788 global_num_cols,
789 row_starts,
790 col_starts,
791 num_cols_offd,
792 num_nonzeros_diag,
793 num_nonzeros_offd);
794 hypre_ParCSRMatrixOwnsData(Piy) = 1;
795 hypre_ParCSRMatrixInitialize(Piy);
796
797 Piz = hypre_ParCSRMatrixCreate(comm,
798 global_num_rows,
799 global_num_cols,
800 row_starts,
801 col_starts,
802 num_cols_offd,
803 num_nonzeros_diag,
804 num_nonzeros_offd);
805 hypre_ParCSRMatrixOwnsData(Piz) = 1;
806 hypre_ParCSRMatrixInitialize(Piz);
807 }
808
809 /* Fill-in the diagonal part */
810 {
811 hypre_CSRMatrix *F2V_diag = hypre_ParCSRMatrixDiag(F2V);
812 HYPRE_Int *F2V_diag_I = hypre_CSRMatrixI(F2V_diag);
813 HYPRE_Int *F2V_diag_J = hypre_CSRMatrixJ(F2V_diag);
814
815 HYPRE_Int F2V_diag_nrows = hypre_CSRMatrixNumRows(F2V_diag);
816 HYPRE_Int F2V_diag_nnz = hypre_CSRMatrixNumNonzeros(F2V_diag);
817
818 hypre_CSRMatrix *Pix_diag = hypre_ParCSRMatrixDiag(Pix);
819 HYPRE_Int *Pix_diag_I = hypre_CSRMatrixI(Pix_diag);
820 HYPRE_Int *Pix_diag_J = hypre_CSRMatrixJ(Pix_diag);
821 HYPRE_Real *Pix_diag_data = hypre_CSRMatrixData(Pix_diag);
822
823 hypre_CSRMatrix *Piy_diag = hypre_ParCSRMatrixDiag(Piy);
824 HYPRE_Int *Piy_diag_I = hypre_CSRMatrixI(Piy_diag);
825 HYPRE_Int *Piy_diag_J = hypre_CSRMatrixJ(Piy_diag);
826 HYPRE_Real *Piy_diag_data = hypre_CSRMatrixData(Piy_diag);
827
828 hypre_CSRMatrix *Piz_diag = hypre_ParCSRMatrixDiag(Piz);
829 HYPRE_Int *Piz_diag_I = hypre_CSRMatrixI(Piz_diag);
830 HYPRE_Int *Piz_diag_J = hypre_CSRMatrixJ(Piz_diag);
831 HYPRE_Real *Piz_diag_data = hypre_CSRMatrixData(Piz_diag);
832
833 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
834 if (exec == HYPRE_EXEC_DEVICE)
835 {
836 HYPRE_THRUST_CALL( copy_n,
837 thrust::make_zip_iterator(thrust::make_tuple(F2V_diag_I, F2V_diag_I, F2V_diag_I)),
838 F2V_diag_nrows + 1,
839 thrust::make_zip_iterator(thrust::make_tuple(Pix_diag_I, Piy_diag_I, Piz_diag_I)) );
840
841 HYPRE_THRUST_CALL( copy_n,
842 thrust::make_zip_iterator(thrust::make_tuple(F2V_diag_J, F2V_diag_J, F2V_diag_J)),
843 F2V_diag_nnz,
844 thrust::make_zip_iterator(thrust::make_tuple(Pix_diag_J, Piy_diag_J, Piz_diag_J)) );
845
846 dim3 bDim = hypre_GetDefaultCUDABlockDimension();
847 dim3 gDim = hypre_GetDefaultCUDAGridDimension(F2V_diag_nrows, "warp", bDim);
848
849 HYPRE_CUDA_LAUNCH( hypreCUDAKernel_AMSComputePixyz_copy, gDim, bDim,
850 F2V_diag_nrows, 3, F2V_diag_I, NULL, RT100_data, RT010_data, RT001_data,
851 Pix_diag_data, Piy_diag_data, Piz_diag_data );
852 }
853 else
854 #endif
855 {
856 for (i = 0; i < F2V_diag_nrows+1; i++)
857 {
858 Pix_diag_I[i] = F2V_diag_I[i];
859 Piy_diag_I[i] = F2V_diag_I[i];
860 Piz_diag_I[i] = F2V_diag_I[i];
861 }
862
863 for (i = 0; i < F2V_diag_nnz; i++)
864 {
865 Pix_diag_J[i] = F2V_diag_J[i];
866 Piy_diag_J[i] = F2V_diag_J[i];
867 Piz_diag_J[i] = F2V_diag_J[i];
868 }
869
870 for (i = 0; i < F2V_diag_nrows; i++)
871 for (j = F2V_diag_I[i]; j < F2V_diag_I[i+1]; j++)
872 {
873 *Pix_diag_data++ = RT100_data[i];
874 *Piy_diag_data++ = RT010_data[i];
875 *Piz_diag_data++ = RT001_data[i];
876 }
877 }
878 }
879
880 /* Fill-in the off-diagonal part */
881 {
882 hypre_CSRMatrix *F2V_offd = hypre_ParCSRMatrixOffd(F2V);
883 HYPRE_Int *F2V_offd_I = hypre_CSRMatrixI(F2V_offd);
884 HYPRE_Int *F2V_offd_J = hypre_CSRMatrixJ(F2V_offd);
885
886 HYPRE_Int F2V_offd_nrows = hypre_CSRMatrixNumRows(F2V_offd);
887 HYPRE_Int F2V_offd_ncols = hypre_CSRMatrixNumCols(F2V_offd);
888 HYPRE_Int F2V_offd_nnz = hypre_CSRMatrixNumNonzeros(F2V_offd);
889
890 hypre_CSRMatrix *Pix_offd = hypre_ParCSRMatrixOffd(Pix);
891 HYPRE_Int *Pix_offd_I = hypre_CSRMatrixI(Pix_offd);
892 HYPRE_Int *Pix_offd_J = hypre_CSRMatrixJ(Pix_offd);
893 HYPRE_Real *Pix_offd_data = hypre_CSRMatrixData(Pix_offd);
894
895 hypre_CSRMatrix *Piy_offd = hypre_ParCSRMatrixOffd(Piy);
896 HYPRE_Int *Piy_offd_I = hypre_CSRMatrixI(Piy_offd);
897 HYPRE_Int *Piy_offd_J = hypre_CSRMatrixJ(Piy_offd);
898 HYPRE_Real *Piy_offd_data = hypre_CSRMatrixData(Piy_offd);
899
900 hypre_CSRMatrix *Piz_offd = hypre_ParCSRMatrixOffd(Piz);
901 HYPRE_Int *Piz_offd_I = hypre_CSRMatrixI(Piz_offd);
902 HYPRE_Int *Piz_offd_J = hypre_CSRMatrixJ(Piz_offd);
903 HYPRE_Real *Piz_offd_data = hypre_CSRMatrixData(Piz_offd);
904
905 HYPRE_BigInt *F2V_cmap = hypre_ParCSRMatrixColMapOffd(F2V);
906 HYPRE_BigInt *Pix_cmap = hypre_ParCSRMatrixColMapOffd(Pix);
907 HYPRE_BigInt *Piy_cmap = hypre_ParCSRMatrixColMapOffd(Piy);
908 HYPRE_BigInt *Piz_cmap = hypre_ParCSRMatrixColMapOffd(Piz);
909
910 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
911 if (exec == HYPRE_EXEC_DEVICE)
912 {
913 if (F2V_offd_ncols)
914 {
915 HYPRE_THRUST_CALL( copy_n,
916 thrust::make_zip_iterator(thrust::make_tuple(F2V_offd_I, F2V_offd_I, F2V_offd_I)),
917 F2V_offd_nrows + 1,
918 thrust::make_zip_iterator(thrust::make_tuple(Pix_offd_I, Piy_offd_I, Piz_offd_I)) );
919 }
920
921 HYPRE_THRUST_CALL( copy_n,
922 thrust::make_zip_iterator(thrust::make_tuple(F2V_offd_J, F2V_offd_J, F2V_offd_J)),
923 F2V_offd_nnz,
924 thrust::make_zip_iterator(thrust::make_tuple(Pix_offd_J, Piy_offd_J, Piz_offd_J)) );
925
926 dim3 bDim = hypre_GetDefaultCUDABlockDimension();
927 dim3 gDim = hypre_GetDefaultCUDAGridDimension(F2V_offd_nrows, "warp", bDim);
928
929 HYPRE_CUDA_LAUNCH( hypreCUDAKernel_AMSComputePixyz_copy, gDim, bDim,
930 F2V_offd_nrows, 3, F2V_offd_I, NULL, RT100_data, RT010_data, RT001_data,
931 Pix_offd_data, Piy_offd_data, Piz_offd_data );
932 }
933 else
934 #endif
935 {
936 if (F2V_offd_ncols)
937 for (i = 0; i < F2V_offd_nrows+1; i++)
938 {
939 Pix_offd_I[i] = F2V_offd_I[i];
940 Piy_offd_I[i] = F2V_offd_I[i];
941 Piz_offd_I[i] = F2V_offd_I[i];
942 }
943
944 for (i = 0; i < F2V_offd_nnz; i++)
945 {
946 Pix_offd_J[i] = F2V_offd_J[i];
947 Piy_offd_J[i] = F2V_offd_J[i];
948 Piz_offd_J[i] = F2V_offd_J[i];
949 }
950
951 for (i = 0; i < F2V_offd_nrows; i++)
952 for (j = F2V_offd_I[i]; j < F2V_offd_I[i+1]; j++)
953 {
954 *Pix_offd_data++ = RT100_data[i];
955 *Piy_offd_data++ = RT010_data[i];
956 *Piz_offd_data++ = RT001_data[i];
957 }
958 }
959
960 for (i = 0; i < F2V_offd_ncols; i++)
961 {
962 Pix_cmap[i] = F2V_cmap[i];
963 Piy_cmap[i] = F2V_cmap[i];
964 Piz_cmap[i] = F2V_cmap[i];
965 }
966 }
967
968 if (HYPRE_AssumedPartitionCheck())
969 hypre_ParCSRMatrixDestroy(F2V);
970 else
971 hypre_ParCSRBooleanMatrixDestroy((hypre_ParCSRBooleanMatrix*)F2V);
972 }
973
974 hypre_ParVectorDestroy(RT100);
975 hypre_ParVectorDestroy(RT010);
976 hypre_ParVectorDestroy(RT001);
977
978 *Pix_ptr = Pix;
979 *Piy_ptr = Piy;
980 *Piz_ptr = Piz;
981
982 return hypre_error_flag;
983 }
984
985 /*--------------------------------------------------------------------------
986 * hypre_ADSSetup
987 *
988 * Construct the ADS solver components.
989 *
990 * The following functions need to be called before hypre_ADSSetup():
991 * - hypre_ADSSetDiscreteCurl()
992 * - hypre_ADSSetDiscreteGradient()
993 * - hypre_ADSSetCoordinateVectors()
994 *--------------------------------------------------------------------------*/
995
hypre_ADSSetup(void * solver,hypre_ParCSRMatrix * A,hypre_ParVector * b,hypre_ParVector * x)996 HYPRE_Int hypre_ADSSetup(void *solver,
997 hypre_ParCSRMatrix *A,
998 hypre_ParVector *b,
999 hypre_ParVector *x)
1000 {
1001 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1002 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
1003 #endif
1004
1005 hypre_ADSData *ads_data = (hypre_ADSData *) solver;
1006 hypre_AMSData *ams_data;
1007
1008 ads_data -> A = A;
1009
1010 /* Make sure that the first entry in each row is the diagonal one. */
1011 /* hypre_CSRMatrixReorder(hypre_ParCSRMatrixDiag(ads_data -> A)); */
1012
1013 /* Compute the l1 norm of the rows of A */
1014 if (ads_data -> A_relax_type >= 1 && ads_data -> A_relax_type <= 4)
1015 {
1016 HYPRE_Real *l1_norm_data = NULL;
1017
1018 hypre_ParCSRComputeL1Norms(ads_data -> A, ads_data -> A_relax_type, NULL, &l1_norm_data);
1019
1020 ads_data -> A_l1_norms = hypre_SeqVectorCreate(hypre_ParCSRMatrixNumRows(ads_data -> A));
1021 hypre_VectorData(ads_data -> A_l1_norms) = l1_norm_data;
1022 hypre_SeqVectorInitialize_v2(ads_data -> A_l1_norms, hypre_ParCSRMatrixMemoryLocation(ads_data -> A));
1023 }
1024
1025 /* Chebyshev? */
1026 if (ads_data -> A_relax_type == 16)
1027 {
1028 hypre_ParCSRMaxEigEstimateCG(ads_data->A, 1, 10,
1029 &ads_data->A_max_eig_est,
1030 &ads_data->A_min_eig_est);
1031 }
1032
1033 /* Create the AMS solver on the range of C^T */
1034 {
1035 HYPRE_AMSCreate(&ads_data -> B_C);
1036 HYPRE_AMSSetDimension(ads_data -> B_C, 3);
1037
1038 /* B_C is a preconditioner */
1039 HYPRE_AMSSetMaxIter(ads_data -> B_C, 1);
1040 HYPRE_AMSSetTol(ads_data -> B_C, 0.0);
1041 HYPRE_AMSSetPrintLevel(ads_data -> B_C, 0);
1042
1043 HYPRE_AMSSetCycleType(ads_data -> B_C, ads_data -> B_C_cycle_type);
1044 HYPRE_AMSSetDiscreteGradient(ads_data -> B_C,
1045 (HYPRE_ParCSRMatrix) ads_data -> G);
1046
1047 if (ads_data -> ND_Pi == NULL && ads_data -> ND_Pix == NULL)
1048 {
1049 if (ads_data -> B_C_cycle_type < 10)
1050 {
1051 hypre_error_w_msg(HYPRE_ERROR_GENERIC,
1052 "Unsupported AMS cycle type in ADS!");
1053 }
1054 HYPRE_AMSSetCoordinateVectors(ads_data -> B_C,
1055 (HYPRE_ParVector) ads_data -> x,
1056 (HYPRE_ParVector) ads_data -> y,
1057 (HYPRE_ParVector) ads_data -> z);
1058 }
1059 else
1060 {
1061 if ((ads_data -> B_C_cycle_type < 10 && ads_data -> ND_Pi == NULL) ||
1062 (ads_data -> B_C_cycle_type > 10 && ads_data -> ND_Pix == NULL))
1063 {
1064 hypre_error_w_msg(HYPRE_ERROR_GENERIC,
1065 "Unsupported AMS cycle type in ADS!");
1066 }
1067 HYPRE_AMSSetInterpolations(ads_data -> B_C,
1068 (HYPRE_ParCSRMatrix) ads_data -> ND_Pi,
1069 (HYPRE_ParCSRMatrix) ads_data -> ND_Pix,
1070 (HYPRE_ParCSRMatrix) ads_data -> ND_Piy,
1071 (HYPRE_ParCSRMatrix) ads_data -> ND_Piz);
1072 }
1073
1074 /* beta=0 in the subspace */
1075 HYPRE_AMSSetBetaPoissonMatrix(ads_data -> B_C, NULL);
1076
1077 /* Reuse A's relaxation parameters for A_C */
1078 HYPRE_AMSSetSmoothingOptions(ads_data -> B_C,
1079 ads_data -> A_relax_type,
1080 ads_data -> A_relax_times,
1081 ads_data -> A_relax_weight,
1082 ads_data -> A_omega);
1083
1084 HYPRE_AMSSetAlphaAMGOptions(ads_data -> B_C, ads_data -> B_C_coarsen_type,
1085 ads_data -> B_C_agg_levels, ads_data -> B_C_relax_type,
1086 ads_data -> B_C_theta, ads_data -> B_C_interp_type,
1087 ads_data -> B_C_Pmax);
1088 /* No need to call HYPRE_AMSSetBetaAMGOptions */
1089
1090 /* Construct the coarse space matrix by RAP */
1091 if (!ads_data -> A_C)
1092 {
1093 if (!hypre_ParCSRMatrixCommPkg(ads_data -> C))
1094 {
1095 hypre_MatvecCommPkgCreate(ads_data -> C);
1096 }
1097
1098 if (!hypre_ParCSRMatrixCommPkg(ads_data -> A))
1099 {
1100 hypre_MatvecCommPkgCreate(ads_data -> A);
1101 }
1102
1103 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1104 if (exec == HYPRE_EXEC_DEVICE)
1105 {
1106 ads_data -> A_C = hypre_ParCSRMatrixRAPKT(ads_data -> C,
1107 ads_data -> A,
1108 ads_data -> C, 1);
1109 }
1110 else
1111 #endif
1112 {
1113 hypre_BoomerAMGBuildCoarseOperator(ads_data -> C,
1114 ads_data -> A,
1115 ads_data -> C,
1116 &ads_data -> A_C);
1117 }
1118
1119 /* Make sure that A_C has no zero rows (this can happen if beta is zero
1120 in part of the domain). */
1121 hypre_ParCSRMatrixFixZeroRows(ads_data -> A_C);
1122 }
1123
1124 HYPRE_AMSSetup(ads_data -> B_C, (HYPRE_ParCSRMatrix)ads_data -> A_C, 0, 0);
1125 }
1126
1127 ams_data = (hypre_AMSData *) ads_data -> B_C;
1128
1129 if (ads_data -> Pi == NULL && ads_data -> Pix == NULL)
1130 {
1131 if (ads_data -> cycle_type > 10)
1132 /* Construct Pi{x,y,z} instead of Pi = [Pix,Piy,Piz] */
1133 hypre_ADSComputePixyz(ads_data -> A,
1134 ads_data -> C,
1135 ads_data -> G,
1136 ads_data -> x,
1137 ads_data -> y,
1138 ads_data -> z,
1139 ams_data -> Pix,
1140 ams_data -> Piy,
1141 ams_data -> Piz,
1142 &ads_data -> Pix,
1143 &ads_data -> Piy,
1144 &ads_data -> Piz);
1145 else
1146 /* Construct the Pi interpolation matrix */
1147 hypre_ADSComputePi(ads_data -> A,
1148 ads_data -> C,
1149 ads_data -> G,
1150 ads_data -> x,
1151 ads_data -> y,
1152 ads_data -> z,
1153 ams_data -> Pix,
1154 ams_data -> Piy,
1155 ams_data -> Piz,
1156 &ads_data -> Pi);
1157 }
1158
1159 if (ads_data -> cycle_type > 10)
1160 /* Create the AMG solvers on the range of Pi{x,y,z}^T */
1161 {
1162 HYPRE_BoomerAMGCreate(&ads_data -> B_Pix);
1163 HYPRE_BoomerAMGSetCoarsenType(ads_data -> B_Pix, ads_data -> B_Pi_coarsen_type);
1164 HYPRE_BoomerAMGSetAggNumLevels(ads_data -> B_Pix, ads_data -> B_Pi_agg_levels);
1165 HYPRE_BoomerAMGSetRelaxType(ads_data -> B_Pix, ads_data -> B_Pi_relax_type);
1166 HYPRE_BoomerAMGSetNumSweeps(ads_data -> B_Pix, 1);
1167 HYPRE_BoomerAMGSetMaxLevels(ads_data -> B_Pix, 25);
1168 HYPRE_BoomerAMGSetTol(ads_data -> B_Pix, 0.0);
1169 HYPRE_BoomerAMGSetMaxIter(ads_data -> B_Pix, 1);
1170 HYPRE_BoomerAMGSetStrongThreshold(ads_data -> B_Pix, ads_data -> B_Pi_theta);
1171 HYPRE_BoomerAMGSetInterpType(ads_data -> B_Pix, ads_data -> B_Pi_interp_type);
1172 HYPRE_BoomerAMGSetPMaxElmts(ads_data -> B_Pix, ads_data -> B_Pi_Pmax);
1173
1174 HYPRE_BoomerAMGCreate(&ads_data -> B_Piy);
1175 HYPRE_BoomerAMGSetCoarsenType(ads_data -> B_Piy, ads_data -> B_Pi_coarsen_type);
1176 HYPRE_BoomerAMGSetAggNumLevels(ads_data -> B_Piy, ads_data -> B_Pi_agg_levels);
1177 HYPRE_BoomerAMGSetRelaxType(ads_data -> B_Piy, ads_data -> B_Pi_relax_type);
1178 HYPRE_BoomerAMGSetNumSweeps(ads_data -> B_Piy, 1);
1179 HYPRE_BoomerAMGSetMaxLevels(ads_data -> B_Piy, 25);
1180 HYPRE_BoomerAMGSetTol(ads_data -> B_Piy, 0.0);
1181 HYPRE_BoomerAMGSetMaxIter(ads_data -> B_Piy, 1);
1182 HYPRE_BoomerAMGSetStrongThreshold(ads_data -> B_Piy, ads_data -> B_Pi_theta);
1183 HYPRE_BoomerAMGSetInterpType(ads_data -> B_Piy, ads_data -> B_Pi_interp_type);
1184 HYPRE_BoomerAMGSetPMaxElmts(ads_data -> B_Piy, ads_data -> B_Pi_Pmax);
1185
1186 HYPRE_BoomerAMGCreate(&ads_data -> B_Piz);
1187 HYPRE_BoomerAMGSetCoarsenType(ads_data -> B_Piz, ads_data -> B_Pi_coarsen_type);
1188 HYPRE_BoomerAMGSetAggNumLevels(ads_data -> B_Piz, ads_data -> B_Pi_agg_levels);
1189 HYPRE_BoomerAMGSetRelaxType(ads_data -> B_Piz, ads_data -> B_Pi_relax_type);
1190 HYPRE_BoomerAMGSetNumSweeps(ads_data -> B_Piz, 1);
1191 HYPRE_BoomerAMGSetMaxLevels(ads_data -> B_Piz, 25);
1192 HYPRE_BoomerAMGSetTol(ads_data -> B_Piz, 0.0);
1193 HYPRE_BoomerAMGSetMaxIter(ads_data -> B_Piz, 1);
1194 HYPRE_BoomerAMGSetStrongThreshold(ads_data -> B_Piz, ads_data -> B_Pi_theta);
1195 HYPRE_BoomerAMGSetInterpType(ads_data -> B_Piz, ads_data -> B_Pi_interp_type);
1196 HYPRE_BoomerAMGSetPMaxElmts(ads_data -> B_Piz, ads_data -> B_Pi_Pmax);
1197
1198 /* Don't use exact solve on the coarsest level (matrices may be singular) */
1199 HYPRE_BoomerAMGSetCycleRelaxType(ads_data -> B_Pix,
1200 ads_data -> B_Pi_relax_type, 3);
1201 HYPRE_BoomerAMGSetCycleRelaxType(ads_data -> B_Piy,
1202 ads_data -> B_Pi_relax_type, 3);
1203 HYPRE_BoomerAMGSetCycleRelaxType(ads_data -> B_Piz,
1204 ads_data -> B_Pi_relax_type, 3);
1205
1206 /* Construct the coarse space matrices by RAP */
1207 if (!hypre_ParCSRMatrixCommPkg(ads_data -> Pix))
1208 {
1209 hypre_MatvecCommPkgCreate(ads_data -> Pix);
1210 }
1211
1212 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1213 if (exec == HYPRE_EXEC_DEVICE)
1214 {
1215 ads_data -> A_Pix = hypre_ParCSRMatrixRAPKT(ads_data -> Pix,
1216 ads_data -> A,
1217 ads_data -> Pix, 1);
1218 }
1219 else
1220 #endif
1221 {
1222 hypre_BoomerAMGBuildCoarseOperator(ads_data -> Pix,
1223 ads_data -> A,
1224 ads_data -> Pix,
1225 &ads_data -> A_Pix);
1226 }
1227
1228 HYPRE_BoomerAMGSetup(ads_data -> B_Pix,
1229 (HYPRE_ParCSRMatrix)ads_data -> A_Pix,
1230 NULL, NULL);
1231
1232 if (!hypre_ParCSRMatrixCommPkg(ads_data -> Piy))
1233 {
1234 hypre_MatvecCommPkgCreate(ads_data -> Piy);
1235 }
1236
1237 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1238 if (exec == HYPRE_EXEC_DEVICE)
1239 {
1240 ads_data -> A_Piy = hypre_ParCSRMatrixRAPKT(ads_data -> Piy,
1241 ads_data -> A,
1242 ads_data -> Piy, 1);
1243 }
1244 else
1245 #endif
1246 {
1247 hypre_BoomerAMGBuildCoarseOperator(ads_data -> Piy,
1248 ads_data -> A,
1249 ads_data -> Piy,
1250 &ads_data -> A_Piy);
1251 }
1252
1253 HYPRE_BoomerAMGSetup(ads_data -> B_Piy,
1254 (HYPRE_ParCSRMatrix)ads_data -> A_Piy,
1255 NULL, NULL);
1256
1257 if (!hypre_ParCSRMatrixCommPkg(ads_data -> Piz))
1258 {
1259 hypre_MatvecCommPkgCreate(ads_data -> Piz);
1260 }
1261
1262 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1263 if (exec == HYPRE_EXEC_DEVICE)
1264 {
1265 ads_data -> A_Piz = hypre_ParCSRMatrixRAPKT(ads_data -> Piz,
1266 ads_data -> A,
1267 ads_data -> Piz, 1);
1268 }
1269 else
1270 #endif
1271 {
1272 hypre_BoomerAMGBuildCoarseOperator(ads_data -> Piz,
1273 ads_data -> A,
1274 ads_data -> Piz,
1275 &ads_data -> A_Piz);
1276 }
1277
1278 HYPRE_BoomerAMGSetup(ads_data -> B_Piz,
1279 (HYPRE_ParCSRMatrix)ads_data -> A_Piz,
1280 NULL, NULL);
1281 }
1282 else
1283 /* Create the AMG solver on the range of Pi^T */
1284 {
1285 HYPRE_BoomerAMGCreate(&ads_data -> B_Pi);
1286 HYPRE_BoomerAMGSetCoarsenType(ads_data -> B_Pi, ads_data -> B_Pi_coarsen_type);
1287 HYPRE_BoomerAMGSetAggNumLevels(ads_data -> B_Pi, ads_data -> B_Pi_agg_levels);
1288 HYPRE_BoomerAMGSetRelaxType(ads_data -> B_Pi, ads_data -> B_Pi_relax_type);
1289 HYPRE_BoomerAMGSetNumSweeps(ads_data -> B_Pi, 1);
1290 HYPRE_BoomerAMGSetMaxLevels(ads_data -> B_Pi, 25);
1291 HYPRE_BoomerAMGSetTol(ads_data -> B_Pi, 0.0);
1292 HYPRE_BoomerAMGSetMaxIter(ads_data -> B_Pi, 1);
1293 HYPRE_BoomerAMGSetStrongThreshold(ads_data -> B_Pi, ads_data -> B_Pi_theta);
1294 HYPRE_BoomerAMGSetInterpType(ads_data -> B_Pi, ads_data -> B_Pi_interp_type);
1295 HYPRE_BoomerAMGSetPMaxElmts(ads_data -> B_Pi, ads_data -> B_Pi_Pmax);
1296
1297 /* Don't use exact solve on the coarsest level (matrix may be singular) */
1298 HYPRE_BoomerAMGSetCycleRelaxType(ads_data -> B_Pi,
1299 ads_data -> B_Pi_relax_type,
1300 3);
1301
1302 /* Construct the coarse space matrix by RAP and notify BoomerAMG that this
1303 is a 3 x 3 block system. */
1304 if (!ads_data -> A_Pi)
1305 {
1306 if (!hypre_ParCSRMatrixCommPkg(ads_data -> Pi))
1307 {
1308 hypre_MatvecCommPkgCreate(ads_data -> Pi);
1309 }
1310
1311 if (!hypre_ParCSRMatrixCommPkg(ads_data -> A))
1312 {
1313 hypre_MatvecCommPkgCreate(ads_data -> A);
1314 }
1315
1316 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1317 if (exec == HYPRE_EXEC_DEVICE)
1318 {
1319 ads_data -> A_Pi = hypre_ParCSRMatrixRAPKT(ads_data -> Pi,
1320 ads_data -> A,
1321 ads_data -> Pi, 1);
1322 }
1323 else
1324 #endif
1325 {
1326 hypre_BoomerAMGBuildCoarseOperator(ads_data -> Pi,
1327 ads_data -> A,
1328 ads_data -> Pi,
1329 &ads_data -> A_Pi);
1330 }
1331
1332 HYPRE_BoomerAMGSetNumFunctions(ads_data -> B_Pi, 3);
1333 /* HYPRE_BoomerAMGSetNodal(ads_data -> B_Pi, 1); */
1334 }
1335
1336 HYPRE_BoomerAMGSetup(ads_data -> B_Pi,
1337 (HYPRE_ParCSRMatrix)ads_data -> A_Pi,
1338 NULL, NULL);
1339 }
1340
1341 /* Allocate temporary vectors */
1342 ads_data -> r0 = hypre_ParVectorInRangeOf(ads_data -> A);
1343 ads_data -> g0 = hypre_ParVectorInRangeOf(ads_data -> A);
1344 if (ads_data -> A_C)
1345 {
1346 ads_data -> r1 = hypre_ParVectorInRangeOf(ads_data -> A_C);
1347 ads_data -> g1 = hypre_ParVectorInRangeOf(ads_data -> A_C);
1348 }
1349 if (ads_data -> cycle_type > 10)
1350 {
1351 ads_data -> r2 = hypre_ParVectorInDomainOf(ads_data -> Pix);
1352 ads_data -> g2 = hypre_ParVectorInDomainOf(ads_data -> Pix);
1353 }
1354 else
1355 {
1356 ads_data -> r2 = hypre_ParVectorInDomainOf(ads_data -> Pi);
1357 ads_data -> g2 = hypre_ParVectorInDomainOf(ads_data -> Pi);
1358 }
1359
1360 return hypre_error_flag;
1361 }
1362
1363 /*--------------------------------------------------------------------------
1364 * hypre_ADSSolve
1365 *
1366 * Solve the system A x = b.
1367 *--------------------------------------------------------------------------*/
1368
hypre_ADSSolve(void * solver,hypre_ParCSRMatrix * A,hypre_ParVector * b,hypre_ParVector * x)1369 HYPRE_Int hypre_ADSSolve(void *solver,
1370 hypre_ParCSRMatrix *A,
1371 hypre_ParVector *b,
1372 hypre_ParVector *x)
1373 {
1374 hypre_ADSData *ads_data = (hypre_ADSData *) solver;
1375
1376 HYPRE_Int i, my_id = -1;
1377 HYPRE_Real r0_norm, r_norm, b_norm, relative_resid = 0, old_resid;
1378
1379 char cycle[30];
1380 hypre_ParCSRMatrix *Ai[5], *Pi[5];
1381 HYPRE_Solver Bi[5];
1382 HYPRE_PtrToSolverFcn HBi[5];
1383 hypre_ParVector *ri[5], *gi[5];
1384 HYPRE_Int needZ = 0;
1385
1386 hypre_ParVector *z = ads_data -> zz;
1387
1388 Ai[0] = ads_data -> A_C; Pi[0] = ads_data -> C;
1389 Ai[1] = ads_data -> A_Pi; Pi[1] = ads_data -> Pi;
1390 Ai[2] = ads_data -> A_Pix; Pi[2] = ads_data -> Pix;
1391 Ai[3] = ads_data -> A_Piy; Pi[3] = ads_data -> Piy;
1392 Ai[4] = ads_data -> A_Piz; Pi[4] = ads_data -> Piz;
1393
1394 Bi[0] = ads_data -> B_C; HBi[0] = (HYPRE_PtrToSolverFcn) hypre_AMSSolve;
1395 Bi[1] = ads_data -> B_Pi; HBi[1] = (HYPRE_PtrToSolverFcn) hypre_BoomerAMGBlockSolve;
1396 Bi[2] = ads_data -> B_Pix; HBi[2] = (HYPRE_PtrToSolverFcn) hypre_BoomerAMGSolve;
1397 Bi[3] = ads_data -> B_Piy; HBi[3] = (HYPRE_PtrToSolverFcn) hypre_BoomerAMGSolve;
1398 Bi[4] = ads_data -> B_Piz; HBi[4] = (HYPRE_PtrToSolverFcn) hypre_BoomerAMGSolve;
1399
1400 ri[0] = ads_data -> r1; gi[0] = ads_data -> g1;
1401 ri[1] = ads_data -> r2; gi[1] = ads_data -> g2;
1402 ri[2] = ads_data -> r2; gi[2] = ads_data -> g2;
1403 ri[3] = ads_data -> r2; gi[3] = ads_data -> g2;
1404 ri[4] = ads_data -> r2; gi[4] = ads_data -> g2;
1405
1406 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1407 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
1408 #endif
1409
1410 /* may need to create an additional temporary vector for relaxation */
1411 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1412 if (exec == HYPRE_EXEC_DEVICE)
1413 {
1414 needZ = ads_data -> A_relax_type == 2 || ads_data -> A_relax_type == 4 || ads_data -> A_relax_type == 16;
1415 }
1416 else
1417 #endif
1418 {
1419 needZ = hypre_NumThreads() > 1 || ads_data -> A_relax_type == 16;
1420 }
1421
1422 if (needZ && !z)
1423 {
1424 z = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
1425 hypre_ParCSRMatrixGlobalNumRows(A),
1426 hypre_ParCSRMatrixRowStarts(A));
1427 hypre_ParVectorInitialize(z);
1428 ads_data -> zz = z;
1429 }
1430
1431 if (ads_data -> print_level > 0)
1432 hypre_MPI_Comm_rank(hypre_ParCSRMatrixComm(A), &my_id);
1433
1434 switch (ads_data -> cycle_type)
1435 {
1436 case 1:
1437 default:
1438 hypre_sprintf(cycle,"%s","01210");
1439 break;
1440 case 2:
1441 hypre_sprintf(cycle,"%s","(0+1+2)");
1442 break;
1443 case 3:
1444 hypre_sprintf(cycle,"%s","02120");
1445 break;
1446 case 4:
1447 hypre_sprintf(cycle,"%s","(010+2)");
1448 break;
1449 case 5:
1450 hypre_sprintf(cycle,"%s","0102010");
1451 break;
1452 case 6:
1453 hypre_sprintf(cycle,"%s","(020+1)");
1454 break;
1455 case 7:
1456 hypre_sprintf(cycle,"%s","0201020");
1457 break;
1458 case 8:
1459 hypre_sprintf(cycle,"%s","0(+1+2)0");
1460 break;
1461 case 9:
1462 hypre_sprintf(cycle,"%s","01210");
1463 break;
1464 case 11:
1465 hypre_sprintf(cycle,"%s","013454310");
1466 break;
1467 case 12:
1468 hypre_sprintf(cycle,"%s","(0+1+3+4+5)");
1469 break;
1470 case 13:
1471 hypre_sprintf(cycle,"%s","034515430");
1472 break;
1473 case 14:
1474 hypre_sprintf(cycle,"%s","01(+3+4+5)10");
1475 break;
1476 }
1477
1478 for (i = 0; i < ads_data -> maxit; i++)
1479 {
1480 /* Compute initial residual norms */
1481 if (ads_data -> maxit > 1 && i == 0)
1482 {
1483 hypre_ParVectorCopy(b, ads_data -> r0);
1484 hypre_ParCSRMatrixMatvec(-1.0, ads_data -> A, x, 1.0, ads_data -> r0);
1485 r_norm = sqrt(hypre_ParVectorInnerProd(ads_data -> r0,ads_data -> r0));
1486 r0_norm = r_norm;
1487 b_norm = sqrt(hypre_ParVectorInnerProd(b, b));
1488 if (b_norm)
1489 relative_resid = r_norm / b_norm;
1490 else
1491 relative_resid = r_norm;
1492 if (my_id == 0 && ads_data -> print_level > 0)
1493 {
1494 hypre_printf(" relative\n");
1495 hypre_printf(" residual factor residual\n");
1496 hypre_printf(" -------- ------ --------\n");
1497 hypre_printf(" Initial %e %e\n",
1498 r_norm, relative_resid);
1499 }
1500 }
1501
1502 /* Apply the preconditioner */
1503 hypre_ParCSRSubspacePrec(ads_data -> A,
1504 ads_data -> A_relax_type,
1505 ads_data -> A_relax_times,
1506 ads_data -> A_l1_norms ? hypre_VectorData(ads_data -> A_l1_norms) : NULL,
1507 ads_data -> A_relax_weight,
1508 ads_data -> A_omega,
1509 ads_data -> A_max_eig_est,
1510 ads_data -> A_min_eig_est,
1511 ads_data -> A_cheby_order,
1512 ads_data -> A_cheby_fraction,
1513 Ai, Bi, HBi, Pi, ri, gi,
1514 b, x,
1515 ads_data -> r0,
1516 ads_data -> g0,
1517 cycle,
1518 z);
1519
1520 /* Compute new residual norms */
1521 if (ads_data -> maxit > 1)
1522 {
1523 old_resid = r_norm;
1524 hypre_ParVectorCopy(b, ads_data -> r0);
1525 hypre_ParCSRMatrixMatvec(-1.0, ads_data -> A, x, 1.0, ads_data -> r0);
1526 r_norm = sqrt(hypre_ParVectorInnerProd(ads_data -> r0,ads_data -> r0));
1527 if (b_norm)
1528 relative_resid = r_norm / b_norm;
1529 else
1530 relative_resid = r_norm;
1531 if (my_id == 0 && ads_data -> print_level > 0)
1532 hypre_printf(" Cycle %2d %e %f %e \n",
1533 i+1, r_norm, r_norm / old_resid, relative_resid);
1534 }
1535
1536 if (relative_resid < ads_data -> tol)
1537 {
1538 i++;
1539 break;
1540 }
1541 }
1542
1543 if (my_id == 0 && ads_data -> print_level > 0 && ads_data -> maxit > 1)
1544 hypre_printf("\n\n Average Convergence Factor = %f\n\n",
1545 pow((r_norm/r0_norm),(1.0/(HYPRE_Real) i)));
1546
1547 ads_data -> num_iterations = i;
1548 ads_data -> rel_resid_norm = relative_resid;
1549
1550 if (ads_data -> num_iterations == ads_data -> maxit && ads_data -> tol > 0.0)
1551 hypre_error(HYPRE_ERROR_CONV);
1552
1553 return hypre_error_flag;
1554 }
1555
1556 /*--------------------------------------------------------------------------
1557 * hypre_ADSGetNumIterations
1558 *
1559 * Get the number of ADS iterations.
1560 *--------------------------------------------------------------------------*/
1561
hypre_ADSGetNumIterations(void * solver,HYPRE_Int * num_iterations)1562 HYPRE_Int hypre_ADSGetNumIterations(void *solver,
1563 HYPRE_Int *num_iterations)
1564 {
1565 hypre_ADSData *ads_data = (hypre_ADSData *) solver;
1566 *num_iterations = ads_data -> num_iterations;
1567 return hypre_error_flag;
1568 }
1569
1570 /*--------------------------------------------------------------------------
1571 * hypre_ADSGetFinalRelativeResidualNorm
1572 *
1573 * Get the final relative residual norm in ADS.
1574 *--------------------------------------------------------------------------*/
1575
hypre_ADSGetFinalRelativeResidualNorm(void * solver,HYPRE_Real * rel_resid_norm)1576 HYPRE_Int hypre_ADSGetFinalRelativeResidualNorm(void *solver,
1577 HYPRE_Real *rel_resid_norm)
1578 {
1579 hypre_ADSData *ads_data = (hypre_ADSData *) solver;
1580 *rel_resid_norm = ads_data -> rel_resid_norm;
1581 return hypre_error_flag;
1582 }
1583