1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 /******************************************************************************
9  *
10  * Chebyshev setup and solve
11  *
12  *****************************************************************************/
13 
14 #include "_hypre_parcsr_ls.h"
15 #include "_hypre_parcsr_mv.h"
16 #include "float.h"
17 
18 
19 /******************************************************************************
20 
21 Chebyshev relaxation
22 
23 
24 Can specify order 1-4 (this is the order of the resid polynomial)- here we
25 explicitly code the coefficients (instead of
26 iteratively determining)
27 
28 
29 variant 0: standard chebyshev
30 this is rlx 11 if scale = 0, and 16 if scale == 1
31 
32 variant 1: modified cheby: T(t)* f(t) where f(t) = (1-b/t)
33 this is rlx 15 if scale = 0, and 17 if scale == 1
34 
35 ratio indicates the percentage of the whole spectrum to use (so .5
36 means half, and .1 means 10percent)
37 
38 
39 *******************************************************************************/
40 
41 /**
42  * @brief Setups of coefficients (and optional diagonal scaling elements) for
43  * Chebyshev relaxation
44  *
45  * Will calculate ds_ptr on device/host depending on where A is located
46  *
47  * @param[in] A Matrix for which to seteup
48  * @param[in] max_eig Maximum eigenvalue
49  * @param[in] min_eig Maximum eigenvalue
50  * @param[in] fraction Fraction used to calculate lower bound
51  * @param[in] order Polynomial order to use [1,4]
52  * @param[in] scale Whether or not to scale by the diagonal
53  * @param[in] variant Whether or not to use a variant of Chebyshev (0 standard, 1 variant)
54  * @param[out] coefs_ptr *coefs_ptr will be allocated to contain coefficients of the polynomial
55  * @param[out] ds_ptr *ds_ptr will be allocated to allow scaling by the diagonal
56  */
57 HYPRE_Int
hypre_ParCSRRelax_Cheby_Setup(hypre_ParCSRMatrix * A,HYPRE_Real max_eig,HYPRE_Real min_eig,HYPRE_Real fraction,HYPRE_Int order,HYPRE_Int scale,HYPRE_Int variant,HYPRE_Real ** coefs_ptr,HYPRE_Real ** ds_ptr)58 hypre_ParCSRRelax_Cheby_Setup(hypre_ParCSRMatrix *A,         /* matrix to relax with */
59                               HYPRE_Real          max_eig,
60                               HYPRE_Real          min_eig,
61                               HYPRE_Real          fraction,
62                               HYPRE_Int           order,     /* polynomial order */
63                               HYPRE_Int           scale,     /* scale by diagonal?*/
64                               HYPRE_Int           variant,
65                               HYPRE_Real        **coefs_ptr,
66                               HYPRE_Real        **ds_ptr)    /* initial/updated approximation */
67 {
68    hypre_CSRMatrix *A_diag       = hypre_ParCSRMatrixDiag(A);
69    HYPRE_Real       theta, delta;
70    HYPRE_Real       den;
71    HYPRE_Real       upper_bound = 0.0, lower_bound = 0.0;
72    HYPRE_Int        num_rows     = hypre_CSRMatrixNumRows(A_diag);
73    HYPRE_Real      *coefs        = NULL;
74    HYPRE_Int        cheby_order;
75    HYPRE_Real      *ds_data = NULL;
76 
77    /* u = u + p(A)r */
78    if (order > 4)
79    {
80       order = 4;
81    }
82 
83    if (order < 1)
84    {
85       order = 1;
86    }
87 
88    coefs = hypre_CTAlloc(HYPRE_Real, order+1, HYPRE_MEMORY_HOST);
89    /* we are using the order of p(A) */
90    cheby_order = order - 1;
91 
92    if (min_eig >= 0.0)
93    {
94       /* make sure we are large enough - Adams et al. 2003 */
95       upper_bound = max_eig * 1.1;
96       /* lower_bound = max_eig/fraction; */
97       lower_bound = (upper_bound - min_eig) * fraction + min_eig;
98    }
99    else if (max_eig <= 0.0)
100    {
101       upper_bound = min_eig * 1.1;
102       lower_bound = max_eig - (max_eig - upper_bound) * fraction;
103    }
104 
105    /* theta and delta */
106    theta = (upper_bound + lower_bound)/2;
107    delta = (upper_bound - lower_bound)/2;
108 
109    if (variant == 1)
110    {
111       switch ( cheby_order ) /* these are the corresponding cheby polynomials: u = u_o + s(A)r_0  - so order is
112                                 one less that  resid poly: r(t) = 1 - t*s(t) */
113       {
114          case 0:
115             coefs[0] = 1.0/theta;
116 
117             break;
118 
119          case 1:  /* (del - t + 2*th)/(th^2 + del*th) */
120             den = (theta*theta + delta*theta);
121 
122             coefs[0] = (delta + 2*theta)/den;
123             coefs[1] = -1.0/den;
124 
125             break;
126 
127          case 2:  /* (4*del*th - del^2 - t*(2*del + 6*th) + 2*t^2 + 6*th^2)/(2*del*th^2 - del^2*th - del^3 + 2*th^3)*/
128             den = 2*delta*theta*theta - delta*delta*theta - pow(delta,3) + 2*pow(theta,3);
129 
130             coefs[0] = (4*delta*theta - pow(delta,2) +  6*pow(theta,2))/den;
131             coefs[1] = -(2*delta + 6*theta)/den;
132             coefs[2] =  2/den;
133 
134             break;
135 
136          case 3: /* -(6*del^2*th - 12*del*th^2 - t^2*(4*del + 16*th) + t*(12*del*th - 3*del^2 + 24*th^2) + 3*del^3 + 4*t^3 - 16*th^3)/(4*del*th^3 - 3*del^2*th^2 - 3*del^3*th + 4*th^4)*/
137             den = - (4*delta*pow(theta,3) - 3*pow(delta,2)*pow(theta,2) - 3*pow(delta,3)*theta + 4*pow(theta,4) );
138 
139             coefs[0] = (6*pow(delta,2)*theta - 12*delta*pow(theta,2) + 3*pow(delta,3) - 16*pow(theta,3)   )/den;
140             coefs[1] = (12*delta*theta - 3*pow(delta,2) + 24*pow(theta,2))/den;
141             coefs[2] =  -( 4*delta + 16*theta)/den;
142             coefs[3] = 4/den;
143 
144             break;
145       }
146    }
147 
148    else /* standard chebyshev */
149    {
150 
151       switch ( cheby_order ) /* these are the corresponding cheby polynomials: u = u_o + s(A)r_0  - so order is
152                                 one less thatn resid poly: r(t) = 1 - t*s(t) */
153       {
154          case 0:
155             coefs[0] = 1.0/theta;
156             break;
157 
158          case 1:  /* (  2*t - 4*th)/(del^2 - 2*th^2) */
159             den = delta*delta - 2*theta*theta;
160 
161             coefs[0] = -4*theta/den;
162             coefs[1] = 2/den;
163 
164             break;
165 
166          case 2: /* (3*del^2 - 4*t^2 + 12*t*th - 12*th^2)/(3*del^2*th - 4*th^3)*/
167             den = 3*(delta*delta)*theta - 4*(theta*theta*theta);
168 
169             coefs[0] = (3*delta*delta - 12 *theta*theta)/den;
170             coefs[1] = 12*theta/den;
171             coefs[2] = -4/den;
172 
173             break;
174 
175          case 3: /*(t*(8*del^2 - 48*th^2) - 16*del^2*th + 32*t^2*th - 8*t^3 + 32*th^3)/(del^4 - 8*del^2*th^2 + 8*th^4)*/
176             den = pow(delta,4) - 8*delta*delta*theta*theta + 8*pow(theta,4);
177 
178             coefs[0] = (32*pow(theta,3)- 16*delta*delta*theta)/den;
179             coefs[1] = (8*delta*delta - 48*theta*theta)/den;
180             coefs[2] = 32*theta/den;
181             coefs[3] = -8/den;
182 
183             break;
184       }
185    }
186    *coefs_ptr = coefs;
187 
188    if (scale)
189    {
190       /*grab 1/sqrt(abs(diagonal)) */
191       ds_data = hypre_CTAlloc(HYPRE_Real, num_rows, hypre_ParCSRMatrixMemoryLocation(A));
192       hypre_CSRMatrixExtractDiagonal(hypre_ParCSRMatrixDiag(A), ds_data, 4);
193    } /* end of scaling code */
194    *ds_ptr = ds_data;
195 
196    return hypre_error_flag;
197 }
198 
199 /**
200  * @brief Solve using a chebyshev polynomial on the host
201  *
202  * @param[in] A Matrix to relax with
203  * @param[in] f right-hand side
204  * @param[in] ds_data Diagonal information
205  * @param[in] coefs Polynomial coefficients
206  * @param[in] order Order of the polynomial
207  * @param[in] scale Whether or not to scale by diagonal
208  * @param[in] scale Whether or not to use a variant
209  * @param[in,out] u Initial/updated approximation
210  * @param[in] v Temp vector
211  * @param[in] r Temp Vector
212  * @param[in] orig_u_vec Temp Vector
213  * @param[in] tmp Temp Vector
214  */
215 HYPRE_Int
hypre_ParCSRRelax_Cheby_SolveHost(hypre_ParCSRMatrix * A,hypre_ParVector * f,HYPRE_Real * ds_data,HYPRE_Real * coefs,HYPRE_Int order,HYPRE_Int scale,HYPRE_Int variant,hypre_ParVector * u,hypre_ParVector * v,hypre_ParVector * r,hypre_ParVector * orig_u_vec,hypre_ParVector * tmp_vec)216 hypre_ParCSRRelax_Cheby_SolveHost(hypre_ParCSRMatrix *A, /* matrix to relax with */
217                                   hypre_ParVector    *f, /* right-hand side */
218                                   HYPRE_Real         *ds_data,
219                                   HYPRE_Real         *coefs,
220                                   HYPRE_Int           order, /* polynomial order */
221                                   HYPRE_Int           scale, /* scale by diagonal?*/
222                                   HYPRE_Int           variant,
223                                   hypre_ParVector    *u, /* initial/updated approximation */
224                                   hypre_ParVector    *v, /* temporary vector */
225                                   hypre_ParVector    *r, /* another vector */
226                                   hypre_ParVector    *orig_u_vec, /*another temp vector */
227                                   hypre_ParVector    *tmp_vec) /*a potential temp vector */
228 {
229    hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
230    HYPRE_Real *u_data = hypre_VectorData(hypre_ParVectorLocalVector(u));
231    HYPRE_Real *f_data = hypre_VectorData(hypre_ParVectorLocalVector(f));
232    HYPRE_Real *v_data = hypre_VectorData(hypre_ParVectorLocalVector(v));
233 
234    HYPRE_Real  *r_data = hypre_VectorData(hypre_ParVectorLocalVector(r));
235 
236    HYPRE_Int i, j;
237    HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag);
238 
239    HYPRE_Real mult;
240    HYPRE_Real *orig_u;
241 
242    HYPRE_Int cheby_order;
243 
244    HYPRE_Real  *tmp_data;
245 
246 
247    /* u = u + p(A)r */
248 
249    if (order > 4)
250       order = 4;
251    if (order < 1)
252       order = 1;
253 
254    /* we are using the order of p(A) */
255    cheby_order = order -1;
256 
257    hypre_assert(hypre_VectorSize(hypre_ParVectorLocalVector(orig_u_vec)) >= num_rows);
258    orig_u = hypre_VectorData(hypre_ParVectorLocalVector(orig_u_vec));
259 
260    if (!scale)
261    {
262       /* get residual: r = f - A*u */
263       hypre_ParVectorCopy(f, r);
264       hypre_ParCSRMatrixMatvec(-1.0, A, u, 1.0, r);
265 
266       /* o = u; u = r .* coef */
267       for ( i = 0; i < num_rows; i++ )
268       {
269          orig_u[i] = u_data[i];
270          u_data[i] = r_data[i] * coefs[cheby_order];
271       }
272       for (i = cheby_order - 1; i >= 0; i-- )
273       {
274          hypre_ParCSRMatrixMatvec(1.0, A, u, 0.0, v);
275          mult = coefs[i];
276          /* u = mult * r + v */
277 #ifdef HYPRE_USING_OPENMP
278 #pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
279 #endif
280          for ( j = 0; j < num_rows; j++ )
281          {
282             u_data[j] = mult * r_data[j] + v_data[j];
283          }
284       }
285 
286       /* u = o + u */
287 #ifdef HYPRE_USING_OPENMP
288 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
289 #endif
290       for ( i = 0; i < num_rows; i++ )
291       {
292          u_data[i] = orig_u[i] + u_data[i];
293       }
294    }
295    else /* scaling! */
296    {
297 
298       /*grab 1/sqrt(diagonal) */
299       tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(tmp_vec));
300 
301     /* get ds_data and get scaled residual: r = D^(-1/2)f -
302        * D^(-1/2)A*u */
303 
304       hypre_ParCSRMatrixMatvec(-1.0, A, u, 0.0, tmp_vec);
305       /* r = ds .* (f + tmp) */
306 #ifdef HYPRE_USING_OPENMP
307 #pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
308 #endif
309       for ( j = 0; j < num_rows; j++ )
310       {
311          r_data[j] = ds_data[j] * (f_data[j] + tmp_data[j]);
312       }
313 
314       /* save original u, then start
315          the iteration by multiplying r by the cheby coef.*/
316 
317       /* o = u;  u = r * coef */
318 #ifdef HYPRE_USING_OPENMP
319 #pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
320 #endif
321       for ( j = 0; j < num_rows; j++ )
322       {
323          orig_u[j] = u_data[j]; /* orig, unscaled u */
324 
325          u_data[j] = r_data[j] * coefs[cheby_order];
326       }
327 
328       /* now do the other coefficients */
329       for (i = cheby_order - 1; i >= 0; i-- )
330       {
331          /* v = D^(-1/2)AD^(-1/2)u */
332          /* tmp = ds .* u */
333 #ifdef HYPRE_USING_OPENMP
334 #pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
335 #endif
336          for ( j = 0; j < num_rows; j++ )
337          {
338             tmp_data[j]  =  ds_data[j] * u_data[j];
339          }
340          hypre_ParCSRMatrixMatvec(1.0, A, tmp_vec, 0.0, v);
341 
342          /* u_new = coef*r + v*/
343          mult = coefs[i];
344 
345          /* u = coef * r + ds .* v */
346 #ifdef HYPRE_USING_OPENMP
347 #pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
348 #endif
349          for ( j = 0; j < num_rows; j++ )
350          {
351             u_data[j] = mult * r_data[j] + ds_data[j]*v_data[j];
352          }
353 
354       } /* end of cheby_order loop */
355 
356       /* now we have to scale u_data before adding it to u_orig*/
357 
358       /* u = orig_u + ds .* u */
359 #ifdef HYPRE_USING_OPENMP
360 #pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
361 #endif
362       for ( j = 0; j < num_rows; j++ )
363       {
364          u_data[j] = orig_u[j] + ds_data[j]*u_data[j];
365       }
366 
367    }/* end of scaling code */
368 
369    return hypre_error_flag;
370 }
371 
372 /**
373  * @brief Solve using a chebyshev polynomial
374  *
375  * Determines whether to solve on host or device
376  *
377  * @param[in] A Matrix to relax with
378  * @param[in] f right-hand side
379  * @param[in] ds_data Diagonal information
380  * @param[in] coefs Polynomial coefficients
381  * @param[in] order Order of the polynomial
382  * @param[in] scale Whether or not to scale by diagonal
383  * @param[in] scale Whether or not to use a variant
384  * @param[in,out] u Initial/updated approximation
385  * @param[out] v Temp vector
386  * @param[out] r Temp Vector
387  * @param[out] orig_u_vec Temp Vector
388  * @param[out] tmp_vec Temp Vector
389  */
390 HYPRE_Int
hypre_ParCSRRelax_Cheby_Solve(hypre_ParCSRMatrix * A,hypre_ParVector * f,HYPRE_Real * ds_data,HYPRE_Real * coefs,HYPRE_Int order,HYPRE_Int scale,HYPRE_Int variant,hypre_ParVector * u,hypre_ParVector * v,hypre_ParVector * r,hypre_ParVector * orig_u_vec,hypre_ParVector * tmp_vec)391 hypre_ParCSRRelax_Cheby_Solve(hypre_ParCSRMatrix *A, /* matrix to relax with */
392                               hypre_ParVector    *f, /* right-hand side */
393                               HYPRE_Real         *ds_data,
394                               HYPRE_Real         *coefs,
395                               HYPRE_Int           order, /* polynomial order */
396                               HYPRE_Int           scale, /* scale by diagonal?*/
397                               HYPRE_Int           variant,
398                               hypre_ParVector    *u, /* initial/updated approximation */
399                               hypre_ParVector    *v, /* temporary vector */
400                               hypre_ParVector    *r, /*another temp vector */
401                               hypre_ParVector    *orig_u_vec, /*another temp vector */
402                               hypre_ParVector    *tmp_vec) /*another temp vector */
403 {
404 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
405    hypre_GpuProfilingPushRange("ParCSRRelaxChebySolve");
406 #endif
407    HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(hypre_ParCSRMatrixMemoryLocation(A));
408    HYPRE_Int             ierr = 0;
409 
410    if (exec == HYPRE_EXEC_HOST)
411    {
412       ierr = hypre_ParCSRRelax_Cheby_SolveHost(A, f, ds_data, coefs, order, scale, variant, u, v, r, orig_u_vec, tmp_vec);
413    }
414 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
415    else
416    {
417       ierr = hypre_ParCSRRelax_Cheby_SolveDevice(A, f, ds_data, coefs, order, scale, variant, u, v, r, orig_u_vec, tmp_vec);
418    }
419 #endif
420 
421 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
422    hypre_GpuProfilingPopRange();
423 #endif
424    return ierr;
425 }
426