1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 /******************************************************************************
9 *
10 * Chebyshev setup and solve
11 *
12 *****************************************************************************/
13
14 #include "_hypre_parcsr_ls.h"
15 #include "_hypre_parcsr_mv.h"
16 #include "float.h"
17
18
19 /******************************************************************************
20
21 Chebyshev relaxation
22
23
24 Can specify order 1-4 (this is the order of the resid polynomial)- here we
25 explicitly code the coefficients (instead of
26 iteratively determining)
27
28
29 variant 0: standard chebyshev
30 this is rlx 11 if scale = 0, and 16 if scale == 1
31
32 variant 1: modified cheby: T(t)* f(t) where f(t) = (1-b/t)
33 this is rlx 15 if scale = 0, and 17 if scale == 1
34
35 ratio indicates the percentage of the whole spectrum to use (so .5
36 means half, and .1 means 10percent)
37
38
39 *******************************************************************************/
40
41 /**
42 * @brief Setups of coefficients (and optional diagonal scaling elements) for
43 * Chebyshev relaxation
44 *
45 * Will calculate ds_ptr on device/host depending on where A is located
46 *
47 * @param[in] A Matrix for which to seteup
48 * @param[in] max_eig Maximum eigenvalue
49 * @param[in] min_eig Maximum eigenvalue
50 * @param[in] fraction Fraction used to calculate lower bound
51 * @param[in] order Polynomial order to use [1,4]
52 * @param[in] scale Whether or not to scale by the diagonal
53 * @param[in] variant Whether or not to use a variant of Chebyshev (0 standard, 1 variant)
54 * @param[out] coefs_ptr *coefs_ptr will be allocated to contain coefficients of the polynomial
55 * @param[out] ds_ptr *ds_ptr will be allocated to allow scaling by the diagonal
56 */
57 HYPRE_Int
hypre_ParCSRRelax_Cheby_Setup(hypre_ParCSRMatrix * A,HYPRE_Real max_eig,HYPRE_Real min_eig,HYPRE_Real fraction,HYPRE_Int order,HYPRE_Int scale,HYPRE_Int variant,HYPRE_Real ** coefs_ptr,HYPRE_Real ** ds_ptr)58 hypre_ParCSRRelax_Cheby_Setup(hypre_ParCSRMatrix *A, /* matrix to relax with */
59 HYPRE_Real max_eig,
60 HYPRE_Real min_eig,
61 HYPRE_Real fraction,
62 HYPRE_Int order, /* polynomial order */
63 HYPRE_Int scale, /* scale by diagonal?*/
64 HYPRE_Int variant,
65 HYPRE_Real **coefs_ptr,
66 HYPRE_Real **ds_ptr) /* initial/updated approximation */
67 {
68 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
69 HYPRE_Real theta, delta;
70 HYPRE_Real den;
71 HYPRE_Real upper_bound = 0.0, lower_bound = 0.0;
72 HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag);
73 HYPRE_Real *coefs = NULL;
74 HYPRE_Int cheby_order;
75 HYPRE_Real *ds_data = NULL;
76
77 /* u = u + p(A)r */
78 if (order > 4)
79 {
80 order = 4;
81 }
82
83 if (order < 1)
84 {
85 order = 1;
86 }
87
88 coefs = hypre_CTAlloc(HYPRE_Real, order+1, HYPRE_MEMORY_HOST);
89 /* we are using the order of p(A) */
90 cheby_order = order - 1;
91
92 if (min_eig >= 0.0)
93 {
94 /* make sure we are large enough - Adams et al. 2003 */
95 upper_bound = max_eig * 1.1;
96 /* lower_bound = max_eig/fraction; */
97 lower_bound = (upper_bound - min_eig) * fraction + min_eig;
98 }
99 else if (max_eig <= 0.0)
100 {
101 upper_bound = min_eig * 1.1;
102 lower_bound = max_eig - (max_eig - upper_bound) * fraction;
103 }
104
105 /* theta and delta */
106 theta = (upper_bound + lower_bound)/2;
107 delta = (upper_bound - lower_bound)/2;
108
109 if (variant == 1)
110 {
111 switch ( cheby_order ) /* these are the corresponding cheby polynomials: u = u_o + s(A)r_0 - so order is
112 one less that resid poly: r(t) = 1 - t*s(t) */
113 {
114 case 0:
115 coefs[0] = 1.0/theta;
116
117 break;
118
119 case 1: /* (del - t + 2*th)/(th^2 + del*th) */
120 den = (theta*theta + delta*theta);
121
122 coefs[0] = (delta + 2*theta)/den;
123 coefs[1] = -1.0/den;
124
125 break;
126
127 case 2: /* (4*del*th - del^2 - t*(2*del + 6*th) + 2*t^2 + 6*th^2)/(2*del*th^2 - del^2*th - del^3 + 2*th^3)*/
128 den = 2*delta*theta*theta - delta*delta*theta - pow(delta,3) + 2*pow(theta,3);
129
130 coefs[0] = (4*delta*theta - pow(delta,2) + 6*pow(theta,2))/den;
131 coefs[1] = -(2*delta + 6*theta)/den;
132 coefs[2] = 2/den;
133
134 break;
135
136 case 3: /* -(6*del^2*th - 12*del*th^2 - t^2*(4*del + 16*th) + t*(12*del*th - 3*del^2 + 24*th^2) + 3*del^3 + 4*t^3 - 16*th^3)/(4*del*th^3 - 3*del^2*th^2 - 3*del^3*th + 4*th^4)*/
137 den = - (4*delta*pow(theta,3) - 3*pow(delta,2)*pow(theta,2) - 3*pow(delta,3)*theta + 4*pow(theta,4) );
138
139 coefs[0] = (6*pow(delta,2)*theta - 12*delta*pow(theta,2) + 3*pow(delta,3) - 16*pow(theta,3) )/den;
140 coefs[1] = (12*delta*theta - 3*pow(delta,2) + 24*pow(theta,2))/den;
141 coefs[2] = -( 4*delta + 16*theta)/den;
142 coefs[3] = 4/den;
143
144 break;
145 }
146 }
147
148 else /* standard chebyshev */
149 {
150
151 switch ( cheby_order ) /* these are the corresponding cheby polynomials: u = u_o + s(A)r_0 - so order is
152 one less thatn resid poly: r(t) = 1 - t*s(t) */
153 {
154 case 0:
155 coefs[0] = 1.0/theta;
156 break;
157
158 case 1: /* ( 2*t - 4*th)/(del^2 - 2*th^2) */
159 den = delta*delta - 2*theta*theta;
160
161 coefs[0] = -4*theta/den;
162 coefs[1] = 2/den;
163
164 break;
165
166 case 2: /* (3*del^2 - 4*t^2 + 12*t*th - 12*th^2)/(3*del^2*th - 4*th^3)*/
167 den = 3*(delta*delta)*theta - 4*(theta*theta*theta);
168
169 coefs[0] = (3*delta*delta - 12 *theta*theta)/den;
170 coefs[1] = 12*theta/den;
171 coefs[2] = -4/den;
172
173 break;
174
175 case 3: /*(t*(8*del^2 - 48*th^2) - 16*del^2*th + 32*t^2*th - 8*t^3 + 32*th^3)/(del^4 - 8*del^2*th^2 + 8*th^4)*/
176 den = pow(delta,4) - 8*delta*delta*theta*theta + 8*pow(theta,4);
177
178 coefs[0] = (32*pow(theta,3)- 16*delta*delta*theta)/den;
179 coefs[1] = (8*delta*delta - 48*theta*theta)/den;
180 coefs[2] = 32*theta/den;
181 coefs[3] = -8/den;
182
183 break;
184 }
185 }
186 *coefs_ptr = coefs;
187
188 if (scale)
189 {
190 /*grab 1/sqrt(abs(diagonal)) */
191 ds_data = hypre_CTAlloc(HYPRE_Real, num_rows, hypre_ParCSRMatrixMemoryLocation(A));
192 hypre_CSRMatrixExtractDiagonal(hypre_ParCSRMatrixDiag(A), ds_data, 4);
193 } /* end of scaling code */
194 *ds_ptr = ds_data;
195
196 return hypre_error_flag;
197 }
198
199 /**
200 * @brief Solve using a chebyshev polynomial on the host
201 *
202 * @param[in] A Matrix to relax with
203 * @param[in] f right-hand side
204 * @param[in] ds_data Diagonal information
205 * @param[in] coefs Polynomial coefficients
206 * @param[in] order Order of the polynomial
207 * @param[in] scale Whether or not to scale by diagonal
208 * @param[in] scale Whether or not to use a variant
209 * @param[in,out] u Initial/updated approximation
210 * @param[in] v Temp vector
211 * @param[in] r Temp Vector
212 * @param[in] orig_u_vec Temp Vector
213 * @param[in] tmp Temp Vector
214 */
215 HYPRE_Int
hypre_ParCSRRelax_Cheby_SolveHost(hypre_ParCSRMatrix * A,hypre_ParVector * f,HYPRE_Real * ds_data,HYPRE_Real * coefs,HYPRE_Int order,HYPRE_Int scale,HYPRE_Int variant,hypre_ParVector * u,hypre_ParVector * v,hypre_ParVector * r,hypre_ParVector * orig_u_vec,hypre_ParVector * tmp_vec)216 hypre_ParCSRRelax_Cheby_SolveHost(hypre_ParCSRMatrix *A, /* matrix to relax with */
217 hypre_ParVector *f, /* right-hand side */
218 HYPRE_Real *ds_data,
219 HYPRE_Real *coefs,
220 HYPRE_Int order, /* polynomial order */
221 HYPRE_Int scale, /* scale by diagonal?*/
222 HYPRE_Int variant,
223 hypre_ParVector *u, /* initial/updated approximation */
224 hypre_ParVector *v, /* temporary vector */
225 hypre_ParVector *r, /* another vector */
226 hypre_ParVector *orig_u_vec, /*another temp vector */
227 hypre_ParVector *tmp_vec) /*a potential temp vector */
228 {
229 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
230 HYPRE_Real *u_data = hypre_VectorData(hypre_ParVectorLocalVector(u));
231 HYPRE_Real *f_data = hypre_VectorData(hypre_ParVectorLocalVector(f));
232 HYPRE_Real *v_data = hypre_VectorData(hypre_ParVectorLocalVector(v));
233
234 HYPRE_Real *r_data = hypre_VectorData(hypre_ParVectorLocalVector(r));
235
236 HYPRE_Int i, j;
237 HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag);
238
239 HYPRE_Real mult;
240 HYPRE_Real *orig_u;
241
242 HYPRE_Int cheby_order;
243
244 HYPRE_Real *tmp_data;
245
246
247 /* u = u + p(A)r */
248
249 if (order > 4)
250 order = 4;
251 if (order < 1)
252 order = 1;
253
254 /* we are using the order of p(A) */
255 cheby_order = order -1;
256
257 hypre_assert(hypre_VectorSize(hypre_ParVectorLocalVector(orig_u_vec)) >= num_rows);
258 orig_u = hypre_VectorData(hypre_ParVectorLocalVector(orig_u_vec));
259
260 if (!scale)
261 {
262 /* get residual: r = f - A*u */
263 hypre_ParVectorCopy(f, r);
264 hypre_ParCSRMatrixMatvec(-1.0, A, u, 1.0, r);
265
266 /* o = u; u = r .* coef */
267 for ( i = 0; i < num_rows; i++ )
268 {
269 orig_u[i] = u_data[i];
270 u_data[i] = r_data[i] * coefs[cheby_order];
271 }
272 for (i = cheby_order - 1; i >= 0; i-- )
273 {
274 hypre_ParCSRMatrixMatvec(1.0, A, u, 0.0, v);
275 mult = coefs[i];
276 /* u = mult * r + v */
277 #ifdef HYPRE_USING_OPENMP
278 #pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
279 #endif
280 for ( j = 0; j < num_rows; j++ )
281 {
282 u_data[j] = mult * r_data[j] + v_data[j];
283 }
284 }
285
286 /* u = o + u */
287 #ifdef HYPRE_USING_OPENMP
288 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
289 #endif
290 for ( i = 0; i < num_rows; i++ )
291 {
292 u_data[i] = orig_u[i] + u_data[i];
293 }
294 }
295 else /* scaling! */
296 {
297
298 /*grab 1/sqrt(diagonal) */
299 tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(tmp_vec));
300
301 /* get ds_data and get scaled residual: r = D^(-1/2)f -
302 * D^(-1/2)A*u */
303
304 hypre_ParCSRMatrixMatvec(-1.0, A, u, 0.0, tmp_vec);
305 /* r = ds .* (f + tmp) */
306 #ifdef HYPRE_USING_OPENMP
307 #pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
308 #endif
309 for ( j = 0; j < num_rows; j++ )
310 {
311 r_data[j] = ds_data[j] * (f_data[j] + tmp_data[j]);
312 }
313
314 /* save original u, then start
315 the iteration by multiplying r by the cheby coef.*/
316
317 /* o = u; u = r * coef */
318 #ifdef HYPRE_USING_OPENMP
319 #pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
320 #endif
321 for ( j = 0; j < num_rows; j++ )
322 {
323 orig_u[j] = u_data[j]; /* orig, unscaled u */
324
325 u_data[j] = r_data[j] * coefs[cheby_order];
326 }
327
328 /* now do the other coefficients */
329 for (i = cheby_order - 1; i >= 0; i-- )
330 {
331 /* v = D^(-1/2)AD^(-1/2)u */
332 /* tmp = ds .* u */
333 #ifdef HYPRE_USING_OPENMP
334 #pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
335 #endif
336 for ( j = 0; j < num_rows; j++ )
337 {
338 tmp_data[j] = ds_data[j] * u_data[j];
339 }
340 hypre_ParCSRMatrixMatvec(1.0, A, tmp_vec, 0.0, v);
341
342 /* u_new = coef*r + v*/
343 mult = coefs[i];
344
345 /* u = coef * r + ds .* v */
346 #ifdef HYPRE_USING_OPENMP
347 #pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
348 #endif
349 for ( j = 0; j < num_rows; j++ )
350 {
351 u_data[j] = mult * r_data[j] + ds_data[j]*v_data[j];
352 }
353
354 } /* end of cheby_order loop */
355
356 /* now we have to scale u_data before adding it to u_orig*/
357
358 /* u = orig_u + ds .* u */
359 #ifdef HYPRE_USING_OPENMP
360 #pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
361 #endif
362 for ( j = 0; j < num_rows; j++ )
363 {
364 u_data[j] = orig_u[j] + ds_data[j]*u_data[j];
365 }
366
367 }/* end of scaling code */
368
369 return hypre_error_flag;
370 }
371
372 /**
373 * @brief Solve using a chebyshev polynomial
374 *
375 * Determines whether to solve on host or device
376 *
377 * @param[in] A Matrix to relax with
378 * @param[in] f right-hand side
379 * @param[in] ds_data Diagonal information
380 * @param[in] coefs Polynomial coefficients
381 * @param[in] order Order of the polynomial
382 * @param[in] scale Whether or not to scale by diagonal
383 * @param[in] scale Whether or not to use a variant
384 * @param[in,out] u Initial/updated approximation
385 * @param[out] v Temp vector
386 * @param[out] r Temp Vector
387 * @param[out] orig_u_vec Temp Vector
388 * @param[out] tmp_vec Temp Vector
389 */
390 HYPRE_Int
hypre_ParCSRRelax_Cheby_Solve(hypre_ParCSRMatrix * A,hypre_ParVector * f,HYPRE_Real * ds_data,HYPRE_Real * coefs,HYPRE_Int order,HYPRE_Int scale,HYPRE_Int variant,hypre_ParVector * u,hypre_ParVector * v,hypre_ParVector * r,hypre_ParVector * orig_u_vec,hypre_ParVector * tmp_vec)391 hypre_ParCSRRelax_Cheby_Solve(hypre_ParCSRMatrix *A, /* matrix to relax with */
392 hypre_ParVector *f, /* right-hand side */
393 HYPRE_Real *ds_data,
394 HYPRE_Real *coefs,
395 HYPRE_Int order, /* polynomial order */
396 HYPRE_Int scale, /* scale by diagonal?*/
397 HYPRE_Int variant,
398 hypre_ParVector *u, /* initial/updated approximation */
399 hypre_ParVector *v, /* temporary vector */
400 hypre_ParVector *r, /*another temp vector */
401 hypre_ParVector *orig_u_vec, /*another temp vector */
402 hypre_ParVector *tmp_vec) /*another temp vector */
403 {
404 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
405 hypre_GpuProfilingPushRange("ParCSRRelaxChebySolve");
406 #endif
407 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(hypre_ParCSRMatrixMemoryLocation(A));
408 HYPRE_Int ierr = 0;
409
410 if (exec == HYPRE_EXEC_HOST)
411 {
412 ierr = hypre_ParCSRRelax_Cheby_SolveHost(A, f, ds_data, coefs, order, scale, variant, u, v, r, orig_u_vec, tmp_vec);
413 }
414 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
415 else
416 {
417 ierr = hypre_ParCSRRelax_Cheby_SolveDevice(A, f, ds_data, coefs, order, scale, variant, u, v, r, orig_u_vec, tmp_vec);
418 }
419 #endif
420
421 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
422 hypre_GpuProfilingPopRange();
423 #endif
424 return ierr;
425 }
426