1 subroutine qmr_real(rtdb, nlen, g_work, niter, tol, ierr, 2 $ unknowns, matvec, xyz, ldebug) 3c 4c Fancy QMR algorithm. Will document later. 5c use constants 6 implicit None 7#include "errquit.fh" 8#include "inp.fh" 9#include "rtdb.fh" 10#include "stdio.fh" 11#include "nwc_const.fh" 12#include "mafdecls.fh" 13#include "global.fh" 14#include "testutil.fh" 15#include "dimqm_constants.fh" 16c 17c Input Variables 18 integer rtdb 19 integer nlen 20 integer g_work(6) 21 integer niter 22 double precision tol 23 integer ierr 24 integer g_unknowns 25 external matvec 26 double precision xyz(3,*) 27 double precision unknowns(nlen) 28 logical ldebug 29c 30c Local variables 31 integer maxiter 32 double precision d_n, e_n 33 double precision sinn, cosn 34 double precision rhs_n, rhs_n_plus1 35 double precision l_n, l_n_plus1 36 double precision res_0, res_n, res_n_minus1, res_n_plus1 37 double precision omegamax, omega 38 double precision res_nrm, res_nrm_upper_bound, nrm_check 39 double precision nrm_tol, min_tol, max_tol 40 double precision scv, scpn, scs 41 double precision u_n_minus1, dtmp 42 integer i 43 integer id 44 double precision time 45c 46c BLAS/LAPACK routines 47 external dlamch 48 double precision dlamch 49 double precision dnrm2 50 external dnrm2 51 external drotg 52 call ga_sync() 53c 54c Get node ID 55 id = ga_nodeid() 56 if(id.eq.0.and.ldebug) then 57 write(LuOut,*) "Start QMR real routine" 58 call util_flush(LuOut) 59 end if 60c Check tolerances of the machine 61 nrm_tol = dlamch('E') * TEN 62 min_tol = SQRT(SQRT(dlamch('S'))) 63 max_tol = ONE / min_tol 64 if (tol <= ZERO) tol = SQRT(dlamch('E')) 65 if(ldebug) then 66 if(id .eq. 0) write(LuOut,*) "Tol: ", tol 67 end if 68c 69c Zero work arrays 70 do i = 1, 6 71 call ga_zero(g_work(i)) 72 end do 73c 74c Initalize by copying input field to work 2 and 3 75 call ga_put(g_work(2), 1, nLen, 1, 1, unknowns, 1) 76 call ga_copy(g_work(2), g_work(3)) 77 call ga_sync() 78c 79c Initial residual 80c res_0 = dnrm2(nlen,unknowns,1) 81 res_0 = SQRT(ga_ddot(g_work(3), g_work(3))) 82c 83c If already converged, exit here 84 if((tol >= ONE) .or. (res_0 <= tol)) then 85 niter = 0 86 tol = res_0 87 return 88 end if 89c 90c Initialize the variables 91 maxiter = niter 92 scv = res_0 93 e_n = ONE 94 cosn = ONE 95 res_nrm = ONE 96 scpn = ONE 97 scs = ZERO 98 sinn = ZERO 99 l_n_plus1 = ZERO 100 omega = ONE 101 rhs_n = omega * res_0 102 omegamax = ONE / omega 103c 104c Begin the algorithm 105 do niter = 1, maxiter 106 time = util_timer() 107c 108c Check if E_n is nonsingular 109 if(e_n == ZERO) then 110 ierr = 4 111 return 112 end if 113c 114c Compute scale factor for the vector w_{n} 115c Check for invariant subspaces, and scale the vectors if needed. 116 ierr = 0 117 if (scpn * scv < nrm_tol) ierr = 5 118 if (ierr /= 0) return ! A-invarient subspace 119c 120 d_n = ga_ddot(g_work(3), g_work(3)) / (scv**2) 121 if((scv >= max_tol) .or. (scv <= min_tol)) then 122 dtmp = ONE / scv 123 call ga_scale(g_work(3), dtmp) 124 scv = ONE 125 end if 126 scv = ONE / scv 127c 128c Build the vector p_n 129 u_n_minus1 = d_n * l_n_plus1 / e_n 130 dtmp = u_n_minus1 * scpn / scv 131 call ga_add(ONE, g_work(3), -dtmp, g_work(4), g_work(4)) 132 scpn = scv 133c 134c Check if D_n is nonsingular 135 if(d_n == ZERO) then 136 ierr = 4 137 return 138 end if 139c 140c Multiply current residual by the matrix 141 call matvec(rtdb, nlen, g_work(4), g_work(6), xyz, ldebug) 142c 143c Compute p_n^T A p_n 144 e_n = ga_ddot(g_work(4), g_work(6)) * scpn**2 145c 146c Build the vector v_{n+1} 147 l_n = e_n / d_n 148 call ga_add(ONE, g_work(6), -l_n, g_work(3), g_work(3)) 149c 150c Compute the scale factor for v_{n+1} 151 scv = SQRT(ga_ddot(g_work(3), g_work(3))) 152 l_n_plus1 = scpn * scv 153c 154c The QMR code starts here. 155c Multiply the new column by the previous omeags 156c Get the next scaling factor omega(i) and update omegamax 157 res_n = omega * l_n 158 omega = ONE 159 res_n_plus1 = omega * l_n_plus1 160 omegamax = MAX(omegamax, ONE/omega) 161c 162c Apply the previous rotation 163 res_n_minus1 = sinn * res_n 164 res_n = cosn * res_n 165c 166c Compute and apply the rotation for the last element 167 call drotg(res_n, res_n_plus1, cosn, sinn) 168c 169c Apply the new rotation to the right-hand side vector 170 rhs_n_plus1 = -sinn * rhs_n 171 rhs_n = cosn * rhs_n 172c 173c Compute the next search direction s_i 174 dtmp = res_n_minus1 * scs / scpn 175c g_work(:,5) = g_work(:,4) + -dtmp * g_work(:,5) 176 call ga_add(ONE, g_work(4), -dtmp, g_work(5), g_work(5)) 177c 178c Compute the new QMR iterate, then scale the search direction 179 scs = scpn / res_n 180 dtmp = scs * rhs_n 181c g_work(:,1) = g_work(:,1) + dtmp * g_work(:,5) 182 call ga_add(ONE, g_work(1), dtmp, g_work(5), g_work(1)) 183 if((ABS(scs) >= max_tol) .or. (ABS(scs) <= min_tol)) then 184c g_work(:,5) = scs * g_work(:,5) 185 call ga_scale(g_work(5), scs) 186 scs = ONE 187 end if 188c 189c Compute the residual norm upper bound 190c If the scaled upper bound is within one order of magnitude of 191c the targer convergence norm, compute the true residual norm. 192 rhs_n = rhs_n_plus1 193 res_nrm_upper_bound = 194 $ SQRT(REAL(niter+1))*omegamax*ABS(rhs_n_plus1)/res_0 195 nrm_check = res_nrm_upper_bound 196 if((res_nrm_upper_bound/tol <= TEN).or.(niter >= maxiter)) then 197c Multiply the current residual by the matrix 198 call matvec(rtdb, nlen, g_work(1), g_work(6), xyz, ldebug) 199c g_work(:,6) = g_work(:,2) - g_work(:,6) 200 call ga_add(ONE, g_work(2), -ONE, g_work(6), g_work(6)) 201 call ga_sync() 202 res_nrm = SQRT(ga_ddot(g_work(6), g_work(6))) / res_0 203 call ga_sync() 204 nrm_check = res_nrm 205 if(id.eq.0.and.ldebug) write(LuOut,*) "Res_nrm: ", res_nrm 206 else 207 if(id.eq.0.and.ldebug) then 208 write(LuOut,*) "Res_nrm_upper_bound:", res_nrm_upper_bound 209 end if 210 end if 211 time = util_timer() - time 212 if(id.eq.0.and.ldebug) then 213 write(LuOut,*) "Iteration", niter 214 write(LuOut,*) "Time (s):", time 215 call util_flush(LuOut) 216 end if 217c 218c Check for convergece or termination. Stop if: 219c 1. algorithm converged; 220c 2. there is an error condition; 221c 3. the residual norm upper bound is smaller than the computed 222c residual norm by a factor of at least 100; 223c 4. algorithm exceeded the iterations limit 224 if(res_nrm <= tol) then 225 ierr = 0 226 exit 227 else if(ierr /= 0) then 228 exit 229 else if(res_nrm_upper_bound < nrm_check / HUNDRED) then 230 ierr = 3 231 exit 232 end if 233 call ga_sync() 234 end do 235c 236c Put proper values into output variables 237 if (niter > maxiter) ierr = 3 238 tol = res_nrm 239 call ga_get(g_work(1), 1, nLen, 1, 1, unknowns, 1) 240 if(id.eq.0.and.ldebug) write(LuOut,*) 241 $ "End QMR Routine" 242 end subroutine qmr_real 243