1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 #include "seq_mv.h"
9 #include "_hypre_parcsr_ls.h"
10 #include "_hypre_utilities.hpp"
11
12 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
13
14 HYPRE_Int
hypre_BoomerAMGRelaxHybridGaussSeidelDevice(hypre_ParCSRMatrix * A,hypre_ParVector * f,HYPRE_Int * cf_marker,HYPRE_Int relax_points,HYPRE_Real relax_weight,HYPRE_Real omega,HYPRE_Real * l1_norms,hypre_ParVector * u,hypre_ParVector * Vtemp,hypre_ParVector * Ztemp,HYPRE_Int GS_order,HYPRE_Int Symm)15 hypre_BoomerAMGRelaxHybridGaussSeidelDevice( hypre_ParCSRMatrix *A,
16 hypre_ParVector *f,
17 HYPRE_Int *cf_marker,
18 HYPRE_Int relax_points,
19 HYPRE_Real relax_weight,
20 HYPRE_Real omega,
21 HYPRE_Real *l1_norms,
22 hypre_ParVector *u,
23 hypre_ParVector *Vtemp,
24 hypre_ParVector *Ztemp,
25 HYPRE_Int GS_order,
26 HYPRE_Int Symm )
27 {
28 /* Vtemp, Ztemp have the fine-grid size. Create two shell vectors that have the correct size */
29 hypre_ParVector *w1 = hypre_ParVectorCloneShallow(f);
30 hypre_ParVector *w2 = hypre_ParVectorCloneShallow(u);
31 hypre_VectorData(hypre_ParVectorLocalVector(w1)) = hypre_VectorData(hypre_ParVectorLocalVector(Vtemp));
32 hypre_VectorData(hypre_ParVectorLocalVector(w2)) = hypre_VectorData(hypre_ParVectorLocalVector(Ztemp));
33
34 if (Symm)
35 {
36 /* V = f - A*u */
37 hypre_ParCSRMatrixMatvecOutOfPlace(-1.0, A, u, 1.0, f, w1);
38
39 /* Z = L^{-1}*V */
40 hypre_CSRMatrixTriLowerUpperSolveDevice('L', hypre_ParCSRMatrixDiag(A), l1_norms,
41 hypre_ParVectorLocalVector(w1), hypre_ParVectorLocalVector(w2));
42
43 /* u = u + w*Z */
44 hypre_ParVectorAxpy(relax_weight, w2, u);
45
46 /* Note: only update V from local change of u, i.e., V = V - w*A_diag*Z_local */
47 hypre_CSRMatrixMatvec(-relax_weight, hypre_ParCSRMatrixDiag(A), hypre_ParVectorLocalVector(w2),
48 1.0, hypre_ParVectorLocalVector(w1));
49
50 /* Z = U^{-1}*V */
51 hypre_CSRMatrixTriLowerUpperSolveDevice('U', hypre_ParCSRMatrixDiag(A), l1_norms,
52 hypre_ParVectorLocalVector(w1), hypre_ParVectorLocalVector(w2));
53
54 /* u = u + w*Z */
55 hypre_ParVectorAxpy(relax_weight, w2, u);
56 }
57 else
58 {
59 const char uplo = GS_order > 0 ? 'L' : 'U';
60 /* V = f - A*u */
61 hypre_ParCSRMatrixMatvecOutOfPlace(-1.0, A, u, 1.0, f, w1);
62
63 /* Z = L^{-1}*V or Z = U^{-1}*V */
64 hypre_CSRMatrixTriLowerUpperSolveDevice(uplo, hypre_ParCSRMatrixDiag(A), l1_norms,
65 hypre_ParVectorLocalVector(w1), hypre_ParVectorLocalVector(w2));
66
67 /* u = u + w*Z */
68 hypre_ParVectorAxpy(relax_weight, w2, u);
69 }
70
71 hypre_ParVectorDestroy(w1);
72 hypre_ParVectorDestroy(w2);
73
74 return hypre_error_flag;
75 }
76
77 HYPRE_Int
hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice(hypre_ParCSRMatrix * A,hypre_ParVector * f,HYPRE_Real relax_weight,HYPRE_Real omega,hypre_ParVector * u,hypre_ParVector * r,hypre_ParVector * z,HYPRE_Int num_inner_iters)78 hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice ( hypre_ParCSRMatrix *A,
79 hypre_ParVector *f,
80 HYPRE_Real relax_weight,
81 HYPRE_Real omega,
82 hypre_ParVector *u,
83 hypre_ParVector *r,
84 hypre_ParVector *z,
85 HYPRE_Int num_inner_iters)
86 {
87 hypre_GpuProfilingPushRange("BoomerAMGRelaxTwoStageGaussSeidelDevice");
88
89 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
90 HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag);
91 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
92 HYPRE_Complex *A_diag_data = hypre_CSRMatrixData(A_diag);
93 hypre_Vector *u_local = hypre_ParVectorLocalVector(u);
94 hypre_Vector *r_local = hypre_ParVectorLocalVector(r);
95 hypre_Vector *z_local = hypre_ParVectorLocalVector(z);
96 HYPRE_Complex *u_data = hypre_VectorData(u_local);
97 HYPRE_Complex *r_data = hypre_VectorData(r_local);
98 HYPRE_Complex *z_data = hypre_VectorData(z_local);
99 HYPRE_Int zsize = hypre_VectorSize(z_local);
100 HYPRE_Int rsize = hypre_VectorSize(r_local);
101 HYPRE_Complex multiplier = 1.0;
102 HYPRE_Int i;
103
104 hypre_ParCSRMatrixMatvecOutOfPlace(-relax_weight, A, u, relax_weight, f, r);
105
106 hypreDevice_DiagScaleVector(num_rows, A_diag_i, A_diag_data, r_data, 0.0, z_data);
107
108 // set this so that axpy works out properly. Reset later.
109 hypre_VectorSize(z_local) = rsize;
110
111 // 1) u = u + z
112 hypre_SeqVectorAxpy(multiplier, z_local, u_local);
113 multiplier *= -1.0;
114
115 for (i = 0; i < num_inner_iters; ++i)
116 {
117 // 2) r = Lz
118 hypre_CSRMatrixSpMVDevice(1.0, A_diag, z_local, 0.0, r_local, -2);
119 // 3) z = r/D, u = u + m*z
120 hypreDevice_DiagScaleVector2(num_rows, A_diag_i, A_diag_data, r_data, multiplier, z_data, u_data);
121 multiplier *= -1.0;
122 }
123
124 // reset this
125 hypre_VectorSize(z_local) = zsize;
126
127 hypre_GpuProfilingPopRange();
128
129 return 0;
130 }
131
132 #endif /* #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
133
134