1 subroutine qmr_complex(rtdb, nlen, g_work, niter, tol, ierr, 2 $ unknowns, matvec, xyz, ldebug) 3c 4c Fancy QMR algorithm. Will document later. 5 implicit None 6#include "errquit.fh" 7#include "inp.fh" 8#include "rtdb.fh" 9#include "stdio.fh" 10#include "nwc_const.fh" 11#include "mafdecls.fh" 12#include "global.fh" 13#include "testutil.fh" 14#include "dimqm_constants.fh" 15c 16c Input Variables 17 integer rtdb 18 integer nlen 19 integer g_work(7) 20 integer niter 21 double precision tol 22 integer ierr 23 integer g_unknowns 24 external matvec 25 double precision xyz(3,*) 26 double complex unknowns(nlen) 27 logical ldebug 28c 29c Local variables 30 integer maxiter 31 complex*16 d_n, e_n, l_n 32 complex*16 sinn, scs 33 complex*16 rhs_n, rhs_n_plus1 34 complex*16 res_n, res_n_minus1, res_n_plus1 35 complex*16 u_n_minus1, dtmp 36 double precision l_n_plus1 37 double precision omegamax, omega, cosn, res_0 38 double precision res_nrm, res_nrm_upper_bound, nrm_check 39 double precision nrm_tol, min_tol, max_tol 40 double precision scv, scpn, tmp 41 integer i 42 integer id 43 double precision time 44c 45c BLAS/LAPACK routines 46 external dlamch 47 double precision dlamch 48 double precision dnrm2 49 external dnrm2 50 external zrotg 51 call ga_sync() 52c 53c Check for valid input 54 ierr = 0 55 if(nlen < 1 .or. niter < 1) then 56 ierr = 2 57 return 58 end if 59c 60c Get node ID 61 id = ga_nodeid() 62 if(id.eq.0.and.ldebug) then 63 write(LuOut,*) "Start QMR complex routine" 64 call util_flush(LuOut) 65 end if 66c Check tolerances of the machine 67c Not sure if this is needed. 68 nrm_tol = dlamch('E') * TEN 69 min_tol = SQRT(SQRT(dlamch('S'))) 70c write(luout,*) "mintol:", min_tol 71c min_tol = 1.0d-30 72 max_tol = ONE / min_tol 73 if (tol <= ZERO) tol = SQRT(dlamch('E')) 74 if(ldebug) then 75 if(id .eq. 0) write(LuOut,*) "Tol: ", tol 76 end if 77c 78c Zero work arrays 79 do i = 1, 7 80 call ga_zero(g_work(i)) 81 end do 82c 83c Initalize by placing input field into work 2 and 3 84 call ga_put(g_work(2), 1, nLen, 1, 1, unknowns, 1) 85 call ga_copy(g_work(2), g_work(3)) 86 call ga_sync() 87c 88c Initial residual 89c res_0 = dnrm2(nlen,unknowns,1) 90 call ga_conjg(g_work(3), g_work(7)) 91 res_0 = SQRT(REAL(ga_zdot(g_work(7), g_work(3)), KIND=8)) 92c 93c If already converged, exit here 94 if((tol >= ONE) .or. (res_0 <= tol )) then 95 niter = 0 96 tol = res_0 97 return 98 end if 99c 100c Initialize the variables 101 maxiter = niter 102 scv = res_0 103 e_n = ONE_C 104 cosn = ONE 105 res_nrm = ONE 106 scpn = ONE 107 scs = ZERO_C 108 sinn = ZERO_C 109 l_n_plus1 = ZERO 110 omega = ONE 111 rhs_n = omega * res_0 112 omegamax = ONE / omega 113c 114c Begin the algorithm 115 do niter = 1, maxiter 116 time = util_timer() 117c 118c Check if E_n is nonsingular 119 if(ABS(e_n) == ZERO) then 120 ierr = 3 121 return 122 end if 123c 124c Compute scale factor for the vector w_{n} 125c Check for invariant subspaces, and scale the vectors if needed. 126 ierr = 0 127 if (scpn * scv < nrm_tol) then 128 ierr = 5 ! A-invarient subspace 129 return 130 end if 131c 132 d_n = ga_zdot(g_work(3), g_work(3)) 133 d_n = d_n / (scv**2) 134 if((scv >= max_tol) .or. (scv <= min_tol)) then 135 dtmp = ONE_C / scv 136 call ga_scale(g_work(3), dtmp) 137 scv = ONE 138 end if 139 scv = ONE / scv 140c 141c Build the vector p_n 142 u_n_minus1 = d_n * l_n_plus1 / e_n 143 dtmp = u_n_minus1 * scpn / scv 144 call ga_add(ONE_C, g_work(3), -dtmp, g_work(4), g_work(4)) 145 scpn = scv 146c 147c Check if D_n is nonsingular 148 if(ABS(d_n) == ZERO) then 149 ierr = 4 150 return 151 end if 152c 153c Multiply current residual by the matrix 154 call matvec(rtdb, nlen, g_work(4), g_work(6), xyz, ldebug) 155c 156c Compute p_n^T A p_n 157 e_n = ga_zdot(g_work(4), g_work(6)) 158 e_n = e_n * scpn**2 159c 160c Build the vector v_{n+1} 161 l_n = e_n / d_n 162 call ga_add(ONE_C, g_work(6), -l_n, g_work(3), g_work(3)) 163 call ga_sync() 164c 165c Compute the scale factor for v_{n+1} 166 call ga_conjg(g_work(3), g_work(7)) 167 scv = SQRT(REAL(ga_zdot(g_work(7), g_work(3)), KIND=8)) 168 l_n_plus1 = scpn * scv 169c 170c The QMR code starts here. 171c Multiply the new column by the previous omeags 172c Get the next scaling factor omega(i) and update omegamax 173 res_n = omega * l_n 174 omega = ONE 175 res_n_plus1 = omega * l_n_plus1 176 omegamax = MAX(omegamax, ONE/omega) 177c 178c Apply the previous rotation 179 res_n_minus1 = sinn * res_n 180 res_n = cosn * res_n 181c 182c Compute and apply the rotation for the last element 183 call zrotg(res_n, res_n_plus1, cosn, sinn) 184c 185c Apply the new rotation to the right-hand side vector 186 rhs_n_plus1 = -CONJG(sinn) * rhs_n 187 rhs_n = cosn * rhs_n 188c 189c Compute the next search direction s_i 190 dtmp = res_n_minus1 * scs / scpn 191c g_work(:,5) = g_work(:,4) + -dtmp * g_work(:,5) 192 call ga_add(ONE_C, g_work(4), -dtmp, g_work(5), g_work(5)) 193c 194c Compute the new QMR iterate, then scale the search direction 195 scs = scpn / res_n 196 dtmp = scs * rhs_n 197c g_work(:,1) = g_work(:,1) + dtmp * g_work(:,5) 198 call ga_add(ONE_C, g_work(1), dtmp, g_work(5), g_work(1)) 199 if((ABS(scs) >= max_tol) .or. (ABS(scs) <= min_tol)) then 200c g_work(:,5) = scs * g_work(:,5) 201 call ga_scale(g_work(5), scs) 202 scs = ONE_C 203 end if 204c 205c Compute the residual norm upper bound 206c If the scaled upper bound is within one order of magnitude of 207c the targer convergence norm, compute the true residual norm. 208 rhs_n = rhs_n_plus1 209 res_nrm_upper_bound = 210 $ SQRT(REAL(niter+1, KIND=8))*omegamax*ABS(rhs_n_plus1)/res_0 211 nrm_check = res_nrm_upper_bound 212 if((res_nrm_upper_bound/tol <= TEN).or.(niter >= maxiter)) then 213c Multiply the current residual by the matrix 214 call matvec(rtdb, nlen, g_work(1), g_work(6), xyz, ldebug) 215 call ga_add(ONE_C, g_work(2), -ONE, g_work(6), g_work(6)) 216 call ga_sync() 217 call ga_conjg(g_work(6), g_work(7)) 218 res_nrm = SQRT(REAL(ga_zdot(g_work(7), g_work(6)),KIND=8)) 219 res_nrm = res_nrm / res_0 220 call ga_sync() 221 nrm_check = res_nrm 222 if(id.eq.0.and.ldebug) write(LuOut,*) "Res_nrm: ", res_nrm 223 else 224 if(id.eq.0.and.ldebug) then 225 write(LuOut,*) "Res_nrm_upper_bound:", res_nrm_upper_bound 226 end if 227 end if 228 time = util_timer() - time 229 if(id.eq.0.and.ldebug) then 230 write(LuOut,*) "Iteration", niter 231 write(LuOut,*) "Time (s):", time 232 write(luout,*) "" 233 call util_flush(LuOut) 234 end if 235c 236c Check for convergece or termination. Stop if: 237c 1. algorithm converged; 238c 2. there is an error condition; 239c 3. the residual norm upper bound is smaller than the computed 240c residual norm by a factor of at least 100; 241c 4. algorithm exceeded the iterations limit 242 if(res_nrm <= tol) then 243 ierr = 0 244 exit 245 else if(ierr /= 0) then 246 exit 247 else if(res_nrm_upper_bound < nrm_check / HUNDRED) then 248 ierr = 3 249 exit 250 end if 251 call ga_sync() 252 end do 253c 254c Put proper values into output variables 255 if (niter > maxiter) ierr = 3 256 tol = res_nrm 257c call ga_copy(g_work(1), g_unknowns) 258 call ga_get(g_work(1), 1, nLen, 1, 1, unknowns, 1) 259 if(id.eq.0.and.ldebug) write(LuOut,*) 260 $ "End QMR Routine" 261 end subroutine qmr_complex 262