1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 /******************************************************************************
9  *
10  * a few more relaxation schemes: Chebychev, FCF-Jacobi, CG  -
11  * these do not go through the CF interface (hypre_BoomerAMGRelaxIF)
12  *
13  *****************************************************************************/
14 
15 #include "_hypre_parcsr_ls.h"
16 #include "float.h"
17 
18 /******************************************************************************
19  *
20  * use Gershgorin discs to estimate smallest and largest eigenvalues
21  * A is assumed to be symmetric
22  * For SPD matrix, it returns [0, max_eig = max (aii + ri)],
23  *                 ri is radius of disc centered at a_ii
24  * For SND matrix, it returns [min_eig = min (aii - ri), 0]
25  *
26  * scale > 0: compute eigen estimate of D^{-1/2}*A*D^{-1/2}, where
27  *            D = diag(A) for SPD matrix, D = -diag(A) for SND
28  *
29  * scale = 1: The algorithm is performed on D^{-1}*A, since it
30  *            has the same eigenvalues as D^{-1/2}*A*D^{-1/2}
31  * scale = 2: The algorithm is performed on D^{-1/2}*A*D^{-1/2} (TODO)
32  *
33  *****************************************************************************/
34 HYPRE_Int
hypre_ParCSRMaxEigEstimateHost(hypre_ParCSRMatrix * A,HYPRE_Int scale,HYPRE_Real * max_eig,HYPRE_Real * min_eig)35 hypre_ParCSRMaxEigEstimateHost( hypre_ParCSRMatrix *A,       /* matrix to relax with */
36                             HYPRE_Int           scale,   /* scale by diagonal?   */
37                             HYPRE_Real         *max_eig,
38                             HYPRE_Real         *min_eig )
39 {
40    HYPRE_Int   A_num_rows  = hypre_ParCSRMatrixNumRows(A);
41    HYPRE_Int  *A_diag_i    = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(A));
42    HYPRE_Int  *A_diag_j    = hypre_CSRMatrixJ(hypre_ParCSRMatrixDiag(A));
43    HYPRE_Int  *A_offd_i    = hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(A));
44    HYPRE_Real *A_diag_data = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(A));
45    HYPRE_Real *A_offd_data = hypre_CSRMatrixData(hypre_ParCSRMatrixOffd(A));
46    HYPRE_Real *diag        = NULL;
47    HYPRE_Int   i, j;
48    HYPRE_Real  e_max, e_min;
49    HYPRE_Real  send_buf[2], recv_buf[2];
50 
51    HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A);
52 
53    if (scale > 1)
54    {
55       diag = hypre_TAlloc(HYPRE_Real, A_num_rows, memory_location);
56    }
57 
58    for (i = 0; i < A_num_rows; i++)
59    {
60       HYPRE_Real a_ii = 0.0, r_i = 0.0, lower, upper;
61 
62       for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
63       {
64          if (A_diag_j[j] == i)
65          {
66             a_ii = A_diag_data[j];
67          }
68          else
69          {
70             r_i += hypre_abs(A_diag_data[j]);
71          }
72       }
73 
74       for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
75       {
76          r_i += hypre_abs(A_offd_data[j]);
77       }
78 
79       lower = a_ii - r_i;
80       upper = a_ii + r_i;
81 
82       if (scale == 1)
83       {
84          lower /= hypre_abs(a_ii);
85          upper /= hypre_abs(a_ii);
86       }
87 
88       if (i)
89       {
90          e_max = hypre_max(e_max, upper);
91          e_min = hypre_min(e_min, lower);
92       }
93       else
94       {
95          e_max = upper;
96          e_min = lower;
97       }
98    }
99 
100    send_buf[0] = -e_min;
101    send_buf[1] =  e_max;
102 
103    /* get e_min e_max across procs */
104    hypre_MPI_Allreduce(send_buf, recv_buf, 2, HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_ParCSRMatrixComm(A));
105 
106    e_min = -recv_buf[0];
107    e_max =  recv_buf[1];
108 
109    /* return */
110    if ( hypre_abs(e_min) > hypre_abs(e_max) )
111    {
112       *min_eig = e_min;
113       *max_eig = hypre_min(0.0, e_max);
114    }
115    else
116    {
117       *min_eig = hypre_max(e_min, 0.0);
118       *max_eig = e_max;
119    }
120 
121    hypre_TFree(diag, memory_location);
122 
123    return hypre_error_flag;
124 }
125 
126 /**
127  * @brief Estimates the max eigenvalue using infinity norm. Will determine
128  * whether or not to use host or device internally
129  *
130  * @param[in] A Matrix to relax with
131  * @param[in] to scale by diagonal
132  * @param[out] Maximum eigenvalue
133  */
134 HYPRE_Int
hypre_ParCSRMaxEigEstimate(hypre_ParCSRMatrix * A,HYPRE_Int scale,HYPRE_Real * max_eig,HYPRE_Real * min_eig)135 hypre_ParCSRMaxEigEstimate(hypre_ParCSRMatrix *A, /* matrix to relax with */
136                            HYPRE_Int scale, /* scale by diagonal?*/
137                            HYPRE_Real *max_eig,
138                            HYPRE_Real *min_eig)
139 {
140 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
141    hypre_GpuProfilingPushRange("ParCSRMaxEigEstimate");
142 #endif
143    HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
144    HYPRE_Int ierr = 0;
145    if (exec == HYPRE_EXEC_HOST)
146    {
147       ierr = hypre_ParCSRMaxEigEstimateHost(A,scale,max_eig,min_eig);
148    }
149 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
150    else
151    {
152       ierr = hypre_ParCSRMaxEigEstimateDevice(A,scale,max_eig,min_eig);
153    }
154 #endif
155 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
156    hypre_GpuProfilingPopRange();
157 #endif
158    return ierr;
159 }
160 
161 /**
162  *  @brief Uses CG to get the eigenvalue estimate. Will determine whether to use
163  *  host or device internally
164  *
165  *  @param[in] A Matrix to relax with
166  *  @param[in] scale Gets the eigenvalue est of D^{-1/2} A D^{-1/2}
167  *  @param[in] max_iter Maximum number of iterations for CG
168  *  @param[out] max_eig Estimated max eigenvalue
169  *  @param[out] min_eig Estimated min eigenvalue
170  */
171 HYPRE_Int
hypre_ParCSRMaxEigEstimateCG(hypre_ParCSRMatrix * A,HYPRE_Int scale,HYPRE_Int max_iter,HYPRE_Real * max_eig,HYPRE_Real * min_eig)172 hypre_ParCSRMaxEigEstimateCG(hypre_ParCSRMatrix *A,     /* matrix to relax with */
173                              HYPRE_Int           scale, /* scale by diagonal?*/
174                              HYPRE_Int           max_iter,
175                              HYPRE_Real         *max_eig,
176                              HYPRE_Real         *min_eig)
177 {
178 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
179    hypre_GpuProfilingPushRange("ParCSRMaxEigEstimateCG");
180 #endif
181    HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(hypre_ParCSRMatrixMemoryLocation(A));
182    HYPRE_Int             ierr = 0;
183    if (exec == HYPRE_EXEC_HOST)
184    {
185       ierr = hypre_ParCSRMaxEigEstimateCGHost(A, scale, max_iter, max_eig, min_eig);
186    }
187 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
188    else
189    {
190       ierr = hypre_ParCSRMaxEigEstimateCGDevice(A, scale, max_iter, max_eig, min_eig);
191    }
192 #endif
193 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
194    hypre_GpuProfilingPopRange();
195 #endif
196    return ierr;
197 }
198 
199 /**
200  *  @brief Uses CG to get the eigenvalue estimate on the host
201  *
202  *  @param[in] A Matrix to relax with
203  *  @param[in] scale Gets the eigenvalue est of D^{-1/2} A D^{-1/2}
204  *  @param[in] max_iter Maximum number of iterations for CG
205  *  @param[out] max_eig Estimated max eigenvalue
206  *  @param[out] min_eig Estimated min eigenvalue
207  */
208 HYPRE_Int
hypre_ParCSRMaxEigEstimateCGHost(hypre_ParCSRMatrix * A,HYPRE_Int scale,HYPRE_Int max_iter,HYPRE_Real * max_eig,HYPRE_Real * min_eig)209 hypre_ParCSRMaxEigEstimateCGHost( hypre_ParCSRMatrix *A,     /* matrix to relax with */
210                                  HYPRE_Int           scale, /* scale by diagonal?*/
211                                  HYPRE_Int           max_iter,
212                                  HYPRE_Real         *max_eig,
213                                  HYPRE_Real         *min_eig )
214 {
215    HYPRE_Int i, j, err;
216    hypre_ParVector *p;
217    hypre_ParVector *s;
218    hypre_ParVector *r;
219    hypre_ParVector *ds;
220    hypre_ParVector *u;
221 
222    HYPRE_Real *tridiag = NULL;
223    HYPRE_Real *trioffd = NULL;
224 
225    HYPRE_Real lambda_max ;
226    HYPRE_Real beta, gamma = 0.0, alpha, sdotp, gamma_old, alphainv;
227    HYPRE_Real lambda_min;
228    HYPRE_Real *s_data, *p_data, *ds_data, *u_data;
229    HYPRE_Int local_size = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A));
230 
231    /* check the size of A - don't iterate more than the size */
232    HYPRE_BigInt size = hypre_ParCSRMatrixGlobalNumRows(A);
233 
234    if (size < (HYPRE_BigInt) max_iter)
235    {
236       max_iter = (HYPRE_Int) size;
237    }
238 
239    /* create some temp vectors: p, s, r , ds, u*/
240    r = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
241                              hypre_ParCSRMatrixGlobalNumRows(A),
242                              hypre_ParCSRMatrixRowStarts(A));
243    hypre_ParVectorInitialize(r);
244 
245    p = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
246                              hypre_ParCSRMatrixGlobalNumRows(A),
247                              hypre_ParCSRMatrixRowStarts(A));
248    hypre_ParVectorInitialize(p);
249 
250    s = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
251                              hypre_ParCSRMatrixGlobalNumRows(A),
252                              hypre_ParCSRMatrixRowStarts(A));
253    hypre_ParVectorInitialize(s);
254 
255    ds = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
256                               hypre_ParCSRMatrixGlobalNumRows(A),
257                               hypre_ParCSRMatrixRowStarts(A));
258    hypre_ParVectorInitialize(ds);
259 
260    u = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
261                              hypre_ParCSRMatrixGlobalNumRows(A),
262                              hypre_ParCSRMatrixRowStarts(A));
263    hypre_ParVectorInitialize(u);
264 
265    /* point to local data */
266    s_data = hypre_VectorData(hypre_ParVectorLocalVector(s));
267    p_data = hypre_VectorData(hypre_ParVectorLocalVector(p));
268    ds_data = hypre_VectorData(hypre_ParVectorLocalVector(ds));
269    u_data = hypre_VectorData(hypre_ParVectorLocalVector(u));
270 
271    /* make room for tri-diag matrix */
272    tridiag = hypre_CTAlloc(HYPRE_Real, max_iter+1, HYPRE_MEMORY_HOST);
273    trioffd = hypre_CTAlloc(HYPRE_Real, max_iter+1, HYPRE_MEMORY_HOST);
274    for (i=0; i < max_iter + 1; i++)
275    {
276       tridiag[i] = 0;
277       trioffd[i] = 0;
278    }
279 
280    /* set residual to random */
281    hypre_ParVectorSetRandomValues(r,1);
282 
283    if (scale)
284    {
285       hypre_CSRMatrixExtractDiagonal(hypre_ParCSRMatrixDiag(A), ds_data, 4);
286    }
287    else
288    {
289       /* set ds to 1 */
290       hypre_ParVectorSetConstantValues(ds,1.0);
291    }
292 
293    /* gamma = <r,Cr> */
294    gamma = hypre_ParVectorInnerProd(r,p);
295 
296    /* for the initial filling of the tridiag matrix */
297    beta = 1.0;
298 
299    i = 0;
300    while (i < max_iter)
301    {
302       /* s = C*r */
303       /* TO DO:  C = diag scale */
304       hypre_ParVectorCopy(r, s);
305 
306       /*gamma = <r,Cr> */
307       gamma_old = gamma;
308       gamma = hypre_ParVectorInnerProd(r,s);
309 
310       if (i==0)
311       {
312          beta = 1.0;
313          /* p_0 = C*r */
314          hypre_ParVectorCopy(s, p);
315       }
316       else
317       {
318          /* beta = gamma / gamma_old */
319          beta = gamma / gamma_old;
320 
321          /* p = s + beta p */
322 #ifdef HYPRE_USING_OPENMP
323 #pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
324 #endif
325          for (j=0; j < local_size; j++)
326          {
327             p_data[j] = s_data[j] + beta*p_data[j];
328          }
329       }
330 
331       if (scale)
332       {
333          /* s = D^{-1/2}A*D^{-1/2}*p */
334          for (j = 0; j < local_size; j++)
335          {
336             u_data[j] = ds_data[j] * p_data[j];
337          }
338          hypre_ParCSRMatrixMatvec(1.0, A, u, 0.0, s);
339          for (j = 0; j < local_size; j++)
340          {
341             s_data[j] = ds_data[j] * s_data[j];
342          }
343       }
344       else
345       {
346          /* s = A*p */
347          hypre_ParCSRMatrixMatvec(1.0, A, p, 0.0, s);
348       }
349 
350       /* <s,p> */
351       sdotp =  hypre_ParVectorInnerProd(s,p);
352 
353       /* alpha = gamma / <s,p> */
354       alpha = gamma/sdotp;
355 
356       /* get tridiagonal matrix */
357       alphainv = 1.0/alpha;
358 
359       tridiag[i+1] = alphainv;
360       tridiag[i] *= beta;
361       tridiag[i] += alphainv;
362 
363       trioffd[i+1] = alphainv;
364       trioffd[i] *= sqrt(beta);
365 
366       /* x = x + alpha*p */
367       /* don't need */
368 
369       /* r = r - alpha*s */
370       hypre_ParVectorAxpy( -alpha, s, r);
371 
372       i++;
373    }
374 
375    /* eispack routine - eigenvalues return in tridiag and ordered*/
376    hypre_LINPACKcgtql1(&i,tridiag,trioffd,&err);
377 
378    lambda_max = tridiag[i-1];
379    lambda_min = tridiag[0];
380    /* hypre_printf("linpack max eig est = %g\n", lambda_max);*/
381    /* hypre_printf("linpack min eig est = %g\n", lambda_min);*/
382 
383    hypre_TFree(tridiag, HYPRE_MEMORY_HOST);
384    hypre_TFree(trioffd, HYPRE_MEMORY_HOST);
385 
386    hypre_ParVectorDestroy(r);
387    hypre_ParVectorDestroy(s);
388    hypre_ParVectorDestroy(p);
389    hypre_ParVectorDestroy(ds);
390    hypre_ParVectorDestroy(u);
391 
392    /* return */
393    *max_eig = lambda_max;
394    *min_eig = lambda_min;
395 
396    return hypre_error_flag;
397 }
398 
399 /******************************************************************************
400 Chebyshev relaxation
401 
402 Can specify order 1-4 (this is the order of the resid polynomial)- here we
403 explicitly code the coefficients (instead of iteratively determining)
404 
405 variant 0: standard chebyshev
406 this is rlx 11 if scale = 0, and 16 if scale == 1
407 
408 variant 1: modified cheby: T(t)* f(t) where f(t) = (1-b/t)
409 this is rlx 15 if scale = 0, and 17 if scale == 1
410 
411 ratio indicates the percentage of the whole spectrum to use (so .5
412 means half, and .1 means 10percent)
413 *******************************************************************************/
414 
415 HYPRE_Int
hypre_ParCSRRelax_Cheby(hypre_ParCSRMatrix * A,hypre_ParVector * f,HYPRE_Real max_eig,HYPRE_Real min_eig,HYPRE_Real fraction,HYPRE_Int order,HYPRE_Int scale,HYPRE_Int variant,hypre_ParVector * u,hypre_ParVector * v,hypre_ParVector * r)416 hypre_ParCSRRelax_Cheby(hypre_ParCSRMatrix *A, /* matrix to relax with */
417                         hypre_ParVector    *f, /* right-hand side */
418                         HYPRE_Real          max_eig,
419                         HYPRE_Real          min_eig,
420                         HYPRE_Real          fraction,
421                         HYPRE_Int           order, /* polynomial order */
422                         HYPRE_Int           scale, /* scale by diagonal?*/
423                         HYPRE_Int           variant,
424                         hypre_ParVector    *u, /* initial/updated approximation */
425                         hypre_ParVector    *v, /* temporary vector */
426                         hypre_ParVector    *r /*another temp vector */)
427 {
428    HYPRE_Real *coefs   = NULL;
429    HYPRE_Real *ds_data = NULL;
430 
431    hypre_ParVector *tmp_vec    = NULL;
432    hypre_ParVector *orig_u_vec = NULL;
433 
434    hypre_ParCSRRelax_Cheby_Setup(A, max_eig, min_eig, fraction, order, scale, variant, &coefs, &ds_data);
435 
436    orig_u_vec = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
437                                       hypre_ParCSRMatrixGlobalNumRows(A),
438                                       hypre_ParCSRMatrixRowStarts(A));
439    hypre_ParVectorInitialize_v2(orig_u_vec, hypre_ParCSRMatrixMemoryLocation(A));
440 
441    if (scale)
442    {
443       tmp_vec = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
444                                       hypre_ParCSRMatrixGlobalNumRows(A),
445                                       hypre_ParCSRMatrixRowStarts(A));
446       hypre_ParVectorInitialize_v2(tmp_vec, hypre_ParCSRMatrixMemoryLocation(A));
447    }
448    hypre_ParCSRRelax_Cheby_Solve(A, f, ds_data, coefs, order, scale, variant, u, v, r, orig_u_vec, tmp_vec);
449 
450    hypre_TFree(ds_data, hypre_ParCSRMatrixMemoryLocation(A));
451    hypre_TFree(coefs, HYPRE_MEMORY_HOST);
452    hypre_ParVectorDestroy(orig_u_vec);
453    hypre_ParVectorDestroy(tmp_vec);
454 
455    return hypre_error_flag;
456 }
457 
458 /*--------------------------------------------------------------------------
459  * CG Smoother
460  *--------------------------------------------------------------------------*/
461 
462 HYPRE_Int
hypre_ParCSRRelax_CG(HYPRE_Solver solver,hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u,HYPRE_Int num_its)463 hypre_ParCSRRelax_CG( HYPRE_Solver        solver,
464                       hypre_ParCSRMatrix *A,
465                       hypre_ParVector    *f,
466                       hypre_ParVector    *u,
467                       HYPRE_Int           num_its)
468 {
469 
470    HYPRE_PCGSetMaxIter(solver, num_its); /* max iterations */
471    HYPRE_PCGSetTol(solver, 0.0); /* max iterations */
472    HYPRE_ParCSRPCGSolve(solver, (HYPRE_ParCSRMatrix)A, (HYPRE_ParVector)f, (HYPRE_ParVector)u);
473 
474 #if 0
475    {
476       HYPRE_Int myid;
477       HYPRE_Int num_iterations;
478       HYPRE_Real final_res_norm;
479 
480       hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid);
481       HYPRE_PCGGetNumIterations(solver, &num_iterations);
482       HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm);
483       if (myid ==0)
484       {
485          hypre_printf("            -----CG PCG Iterations = %d\n", num_iterations);
486          hypre_printf("            -----CG PCG Final Relative Residual Norm = %e\n", final_res_norm);
487       }
488     }
489 #endif
490 
491    return hypre_error_flag;
492 }
493 
494 
495 /* tql1.f --
496 
497   this is the eispack translation - from Barry Smith in Petsc
498 
499   Note that this routine always uses real numbers (not complex) even
500   if the underlying matrix is Hermitian. This is because the Lanczos
501   process applied to Hermitian matrices always produces a real,
502   symmetric tridiagonal matrix.
503 */
504 
505 HYPRE_Int
hypre_LINPACKcgtql1(HYPRE_Int * n,HYPRE_Real * d,HYPRE_Real * e,HYPRE_Int * ierr)506 hypre_LINPACKcgtql1(HYPRE_Int *n,HYPRE_Real *d,HYPRE_Real *e,HYPRE_Int *ierr)
507 {
508    /* System generated locals */
509    HYPRE_Int  i__1,i__2;
510    HYPRE_Real d__1,d__2,c_b10 = 1.0;
511 
512    /* Local variables */
513    HYPRE_Real c,f,g,h;
514    HYPRE_Int  i,j,l,m;
515    HYPRE_Real p,r,s,c2,c3 = 0.0;
516    HYPRE_Int  l1,l2;
517    HYPRE_Real s2 = 0.0;
518    HYPRE_Int  ii;
519    HYPRE_Real dl1,el1;
520    HYPRE_Int  mml;
521    HYPRE_Real tst1,tst2;
522 
523    /*     THIS SUBROUTINE IS A TRANSLATION OF THE ALGOL PROCEDURE TQL1, */
524    /*     NUM. MATH. 11, 293-306(1968) BY BOWDLER, MARTIN, REINSCH, AND */
525    /*     WILKINSON. */
526    /*     HANDBOOK FOR AUTO. COMP., VOL.II-LINEAR ALGEBRA, 227-240(1971). */
527 
528    /*     THIS SUBROUTINE FINDS THE EIGENVALUES OF A SYMMETRIC */
529    /*     TRIDIAGONAL MATRIX BY THE QL METHOD. */
530 
531    /*     ON INPUT */
532 
533    /*        N IS THE ORDER OF THE MATRIX. */
534 
535    /*        D CONTAINS THE DIAGONAL ELEMENTS OF THE INPUT MATRIX. */
536 
537    /*        E CONTAINS THE SUBDIAGONAL ELEMENTS OF THE INPUT MATRIX */
538    /*          IN ITS LAST N-1 POSITIONS.  E(1) IS ARBITRARY. */
539 
540    /*      ON OUTPUT */
541 
542    /*        D CONTAINS THE EIGENVALUES IN ASCENDING ORDER.  IF AN */
543    /*          ERROR EXIT IS MADE, THE EIGENVALUES ARE CORRECT AND */
544    /*          ORDERED FOR INDICES 1,2,...IERR-1, BUT MAY NOT BE */
545    /*          THE SMALLEST EIGENVALUES. */
546 
547    /*        E HAS BEEN DESTROYED. */
548 
549    /*        IERR IS SET TO */
550    /*          ZERO       FOR NORMAL RETURN, */
551    /*          J          IF THE J-TH EIGENVALUE HAS NOT BEEN */
552    /*                     DETERMINED AFTER 30 ITERATIONS. */
553 
554    /*     CALLS CGPTHY FOR  DSQRT(A*A + B*B) . */
555 
556    /*     QUESTIONS AND COMMENTS SHOULD BE DIRECTED TO BURTON S. GARBOW, */
557    /*     MATHEMATICS AND COMPUTER SCIENCE DIV, ARGONNE NATIONAL LABORATORY
558    */
559 
560    /*     THIS VERSION DATED AUGUST 1983. */
561 
562    /*     ------------------------------------------------------------------
563    */
564    HYPRE_Real ds;
565 
566    --e;
567    --d;
568 
569    *ierr = 0;
570    if (*n == 1)
571    {
572       goto L1001;
573    }
574 
575    i__1 = *n;
576    for (i = 2; i <= i__1; ++i)
577    {
578       e[i - 1] = e[i];
579    }
580 
581    f = 0.;
582    tst1 = 0.;
583    e[*n] = 0.;
584 
585    i__1 = *n;
586    for (l = 1; l <= i__1; ++l)
587    {
588       j = 0;
589       h = (d__1 = d[l],fabs(d__1)) + (d__2 = e[l],fabs(d__2));
590       if (tst1 < h)
591       {
592          tst1 = h;
593       }
594       /*     .......... LOOK FOR SMALL SUB-DIAGONAL ELEMENT .......... */
595       i__2 = *n;
596       for (m = l; m <= i__2; ++m) {
597          tst2 = tst1 + (d__1 = e[m],fabs(d__1));
598          if (tst2 == tst1)
599          {
600             goto L120;
601          }
602          /*     .......... E(N) IS ALWAYS ZERO,SO THERE IS NO EXIT */
603          /*                THROUGH THE BOTTOM OF THE LOOP .......... */
604       }
605 L120:
606       if (m == l)
607       {
608          goto L210;
609       }
610 L130:
611       if (j == 30)
612       {
613          goto L1000;
614       }
615       ++j;
616       /*     .......... FORM SHIFT .......... */
617       l1 = l + 1;
618       l2 = l1 + 1;
619       g = d[l];
620       p = (d[l1] - g) / (e[l] * 2.);
621       r = hypre_LINPACKcgpthy(&p,&c_b10);
622       ds = 1.0; if (p < 0.0) ds = -1.0;
623       d[l] = e[l] / (p + ds*r);
624       d[l1] = e[l] * (p + ds*r);
625       dl1 = d[l1];
626       h = g - d[l];
627       if (l2 > *n)
628       {
629          goto L145;
630       }
631 
632       i__2 = *n;
633       for (i = l2; i <= i__2; ++i) {
634          d[i] -= h;
635       }
636 
637 L145:
638       f += h;
639       /*     .......... QL TRANSFORMATION .......... */
640       p = d[m];
641       c = 1.;
642       c2 = c;
643       el1 = e[l1];
644       s = 0.;
645       mml = m - l;
646       /*     .......... FOR I=M-1 STEP -1 UNTIL L DO -- .......... */
647       i__2 = mml;
648       for (ii = 1; ii <= i__2; ++ii)
649       {
650          c3 = c2;
651          c2 = c;
652          s2 = s;
653          i = m - ii;
654          g = c * e[i];
655          h = c * p;
656          r = hypre_LINPACKcgpthy(&p,&e[i]);
657          e[i + 1] = s * r;
658          s = e[i] / r;
659          c = p / r;
660          p = c * d[i] - s * g;
661          d[i + 1] = h + s * (c * g + s * d[i]);
662       }
663 
664       p = -s * s2 * c3 * el1 * e[l] / dl1;
665       e[l] = s * p;
666       d[l] = c * p;
667       tst2 = tst1 + (d__1 = e[l],fabs(d__1));
668       if (tst2 > tst1)
669       {
670          goto L130;
671       }
672 L210:
673       p = d[l] + f;
674       /*     .......... ORDER EIGENVALUES .......... */
675       if (l == 1) {
676          goto L250;
677       }
678       /*     .......... FOR I=L STEP -1 UNTIL 2 DO -- .......... */
679       i__2 = l;
680       for (ii = 2; ii <= i__2; ++ii)
681       {
682          i = l + 2 - ii;
683          if (p >= d[i - 1])
684          {
685             goto L270;
686          }
687          d[i] = d[i - 1];
688       }
689 
690 L250:
691       i = 1;
692 L270:
693       d[i] = p;
694    }
695 
696    goto L1001;
697    /*     .......... SET ERROR -- NO CONVERGENCE TO AN */
698    /*                EIGENVALUE AFTER 30 ITERATIONS .......... */
699 L1000:
700    *ierr = l;
701 L1001:
702    return 0;
703 
704 } /* cgtql1_ */
705 
706 HYPRE_Real
hypre_LINPACKcgpthy(HYPRE_Real * a,HYPRE_Real * b)707 hypre_LINPACKcgpthy(HYPRE_Real *a,HYPRE_Real *b)
708 {
709    /* System generated locals */
710    HYPRE_Real ret_val,d__1,d__2,d__3;
711 
712    /* Local variables */
713    HYPRE_Real p,r,s,t,u;
714 
715    /*     FINDS DSQRT(A**2+B**2) WITHOUT OVERFLOW OR DESTRUCTIVE UNDERFLOW */
716 
717 
718    /* Computing MAX */
719    d__1 = fabs(*a),d__2 = fabs(*b);
720    p = hypre_max(d__1,d__2);
721    if (!p)
722    {
723       goto L20;
724    }
725    /* Computing MIN */
726    d__2 = fabs(*a),d__3 = fabs(*b);
727    /* Computing 2nd power */
728    d__1 = hypre_min(d__2,d__3) / p;
729    r = d__1 * d__1;
730 L10:
731    t = r + 4.;
732    if (t == 4.)
733    {
734       goto L20;
735    }
736    s = r / t;
737    u = s * 2. + 1.;
738    p = u * p;
739    /* Computing 2nd power */
740    d__1 = s / u;
741    r = d__1 * d__1 * r;
742    goto L10;
743 L20:
744    ret_val = p;
745 
746    return ret_val;
747 } /* cgpthy_ */
748