1 subroutine qmr_seed_real(rtdb, nlen, g_work, niter, tol, ierr, 2 $ unknowns, matvec, xyz, ldebug, dir) 3c 4c Fancy QMR algorithm. Will document later. 5c use constants 6 implicit None 7#include "errquit.fh" 8#include "inp.fh" 9#include "rtdb.fh" 10#include "stdio.fh" 11#include "nwc_const.fh" 12#include "mafdecls.fh" 13#include "global.fh" 14#include "testutil.fh" 15#include "dimqm_constants.fh" 16c 17c Input Variables 18 integer rtdb 19 integer nlen 20 integer g_work(6) 21 integer niter 22 double precision tol 23 integer ierr 24 integer g_unknowns 25 external matvec 26 double precision xyz(3,*) 27 double precision unknowns(nlen) 28 logical ldebug 29 character dir 30c 31c Local variables 32 integer maxiter 33 double precision d_n, e_n 34 double precision sinn, cosn 35 double precision rhs_n, rhs_n_plus1 36 double precision l_n, l_n_plus1 37 double precision res_0, res_n, res_n_minus1, res_n_plus1 38 double precision omegamax, omega 39 double precision res_nrm, res_nrm_upper_bound, nrm_check 40 double precision nrm_tol, min_tol, max_tol 41 double precision scv, scpn, scs 42 double precision u_n_minus1, dtmp 43 integer i 44 integer id, ld, k_seed, l_seed 45 integer ilo, ihi, jlo, jhi 46 double precision time 47 logical stat 48c 49c BLAS/LAPACK routines 50 external dlamch 51 double precision dlamch 52 double precision dnrm2 53 external dnrm2 54 external drotg 55 call ga_sync() 56c 57c Get node ID 58 id = ga_nodeid() 59 if(id.eq.0.and.ldebug) then 60 write(LuOut,*) "Start seed QMR routine" 61 call util_flush(LuOut) 62 end if 63c Check tolerances of the machine 64 nrm_tol = dlamch('E') * TEN 65 min_tol = SQRT(SQRT(dlamch('S'))) 66 max_tol = ONE / min_tol 67 if (tol <= ZERO) tol = SQRT(dlamch('E')) 68 if(ldebug) then 69 if(id .eq. 0) write(LuOut,*) "Tol: ", tol 70 end if 71c 72c Zero work arrays 73 do i = 1, 6 74 call ga_zero(g_work(i)) 75 end do 76 if(id.eq.0) then 77 if(.not.ma_push_get(mt_dbl, nlen, 'qmr:seedtemp', l_seed, 78 $ k_seed)) 79 $ call errquit('qmr real malloc k_seed failed', 1, MA_ERR) 80 call dfill(nlen, ZERO, dbl_mb(k_seed), 1) 81 end if 82 83c 84c Initalize by copying input field to work 2 and 3 85 call ga_put(g_work(2), 1, nLen, 1, 1, unknowns, 1) 86 call ga_copy(g_work(2), g_work(3)) 87 call ga_sync() 88c 89c Initial residual 90c res_0 = dnrm2(nlen,unknowns,1) 91 res_0 = SQRT(ga_ddot(g_work(3), g_work(3))) 92c 93c If already converged, exit here 94 if((tol >= ONE) .or. (res_0 <= tol)) then 95 niter = 0 96 tol = res_0 97 if(.not.rtdb_put(rtdb,'seed:k'//dir, mt_int, 1, niter)) 98 $ call errquit('put seed:k failed', 1, RTDB_ERR) 99 if(id.eq.0) then 100 stat = ma_pop_stack(l_seed) 101 end if 102 return 103 end if 104c 105c Initialize the variables 106 maxiter = niter 107 scv = res_0 108 e_n = ONE 109 cosn = ONE 110 res_nrm = ONE 111 scpn = ONE 112 scs = ZERO 113 sinn = ZERO 114 l_n_plus1 = ZERO 115 omega = ONE 116 rhs_n = omega * res_0 117 omegamax = ONE / omega 118c 119c Begin the algorithm 120 do niter = 1, maxiter 121 time = util_timer() 122c 123c Check if E_n is nonsingular 124 if(e_n == ZERO) then 125 ierr = 4 126 return 127 end if 128c 129c Compute scale factor for the vector w_{n} 130c Check for invariant subspaces, and scale the vectors if needed. 131 ierr = 0 132 if (scpn * scv < nrm_tol) ierr = 5 133 if (ierr /= 0) return ! A-invarient subspace 134c 135 d_n = ga_ddot(g_work(3), g_work(3)) / (scv**2) 136 if((scv >= max_tol) .or. (scv <= min_tol)) then 137 dtmp = ONE / scv 138 call ga_scale(g_work(3), dtmp) 139 scv = ONE 140 end if 141 scv = ONE / scv 142c 143c Build the vector p_n 144 u_n_minus1 = d_n * l_n_plus1 / e_n 145 dtmp = u_n_minus1 * scpn / scv 146 call ga_add(ONE, g_work(3), -dtmp, g_work(4), g_work(4)) 147 scpn = scv 148c 149c Check if D_n is nonsingular 150 if(d_n == ZERO) then 151 ierr = 4 152 return 153 end if 154c 155c Multiply current residual by the matrix 156 call matvec(rtdb, nlen, g_work(4), g_work(6), xyz, ldebug) 157c 158c Compute p_n^T A p_n 159 e_n = ga_ddot(g_work(4), g_work(6)) * scpn**2 160c 161c Build the vector v_{n+1} 162 l_n = e_n / d_n 163 call ga_add(ONE, g_work(6), -l_n, g_work(3), g_work(3)) 164c 165c Compute the scale factor for v_{n+1} 166 scv = SQRT(ga_ddot(g_work(3), g_work(3))) 167 l_n_plus1 = scpn * scv 168c 169c The QMR code starts here. 170c Multiply the new column by the previous omeags 171c Get the next scaling factor omega(i) and update omegamax 172 res_n = omega * l_n 173 omega = ONE 174 res_n_plus1 = omega * l_n_plus1 175 omegamax = MAX(omegamax, ONE/omega) 176c 177c Apply the previous rotation 178 res_n_minus1 = sinn * res_n 179 res_n = cosn * res_n 180c 181c Compute and apply the rotation for the last element 182 call drotg(res_n, res_n_plus1, cosn, sinn) 183c Save rotations and vector for seeding 184 if(id.eq.0) then 185c write(luout,*) "Saving data" 186 stat = rtdb_parallel(.false.) 187 if(.not.rtdb_put(rtdb,'seed:sin'//CHAR(niter)//dir, 188 $ mt_dbl, 1,sinn)) 189 $ call errquit('put sinn failed', niter, RTDB_ERR) 190 if(.not.rtdb_put(rtdb,'seed:cos'//CHAR(niter)//dir, 191 $ mt_dbl, 1,cosn)) 192 $ call errquit('put cosn failed', niter, RTDB_ERR) 193 if(.not.rtdb_put(rtdb,'seed:scp'//CHAR(niter)//dir, 194 $ mt_dbl, 1,scpn)) 195 $ call errquit('put scp failed', niter, RTDB_ERR) 196 call ga_get(g_work(3), 1, nlen, 1, 1, dbl_mb(k_seed), 1) 197 if(.not.rtdb_put(rtdb,'seed:v'//CHAR(niter)//dir, 198 $ mt_dbl, nlen, dbl_mb(k_seed))) 199 $ call errquit('put v failed', niter, RTDB_ERR) 200c call ga_distribution(g_work(3), id, ilo, ihi, jlo, jhi) 201c call ga_access(g_work(3), ilo, ihi, jlo, jhi, k_seed, ld) 202c if(.not.rtdb_put(rtdb,'seed:v'//CHAR(niter)//dir, 203c $ mt_dbl, nlen, dbl_mb(k_seed))) 204c $ call errquit('put v failed', niter, RTDB_ERR) 205c call ga_release(g_work(3), ilo, ihi, jlo, jhi) 206 stat = rtdb_parallel(.true.) 207c write(luout,*) "Data saved" 208 end if 209 call ga_sync() 210c 211c Apply the new rotation to the right-hand side vector 212 rhs_n_plus1 = -sinn * rhs_n 213 rhs_n = cosn * rhs_n 214c 215c Compute the next search direction s_i 216 dtmp = res_n_minus1 * scs / scpn 217c g_work(:,5) = g_work(:,4) + -dtmp * g_work(:,5) 218 call ga_add(ONE, g_work(4), -dtmp, g_work(5), g_work(5)) 219c 220c Compute the new QMR iterate, then scale the search direction 221 scs = scpn / res_n 222 dtmp = scs * rhs_n 223c 224c Save search direction for seeding 225 if(id.eq.0) then 226c write(luout,*) "Saving data 2" 227 stat = rtdb_parallel(.false.) 228 if(.not.rtdb_put(rtdb,'seed:scs'//CHAR(niter)//dir, 229 $ mt_dbl, 1, scs)) 230 $ call errquit('put scs failed', niter, RTDB_ERR) 231 232 call ga_get(g_work(5), 1, nlen, 1, 1, dbl_mb(k_seed), 1) 233 if(.not.rtdb_put(rtdb,'seed:s'//CHAR(niter)//dir, 234 $ mt_dbl, nlen, dbl_mb(k_seed))) 235 $ call errquit('put s failed', niter, RTDB_ERR) 236c write(luout,*) "Data saved 2" 237c call util_flush(luout) 238 stat = rtdb_parallel(.true.) 239 end if 240 call ga_sync() 241c call ga_access(g_work(5), 1, nlen, 1, 1, k_seed, ld) 242c if(.not.rtdb_put(rtdb,'seed:s'//CHAR(niter)//dir, 243c $ mt_dbl, nlen, dbl_mb(k_seed))) 244c $ call errquit('put s failed', niter, RTDB_ERR) 245c call ga_release(g_work(5), 1, nlen, 1, 1) 246c 247c g_work(:,1) = g_work(:,1) + dtmp * g_work(:,5) 248 call ga_add(ONE, g_work(1), dtmp, g_work(5), g_work(1)) 249 if((ABS(scs) >= max_tol) .or. (ABS(scs) <= min_tol)) then 250c g_work(:,5) = scs * g_work(:,5) 251 call ga_scale(g_work(5), scs) 252 scs = ONE 253 end if 254c write(luout,*) "updated iterate" 255c 256c Compute the residual norm upper bound 257c If the scaled upper bound is within one order of magnitude of 258c the targer convergence norm, compute the true residual norm. 259 rhs_n = rhs_n_plus1 260 res_nrm_upper_bound = 261 $ SQRT(REAL(niter+1))*omegamax*ABS(rhs_n_plus1)/res_0 262 nrm_check = res_nrm_upper_bound 263 if((res_nrm_upper_bound/tol <= TEN).or.(niter >= maxiter)) then 264c Multiply the current residual by the matrix 265 call matvec(rtdb, nlen, g_work(1), g_work(6), xyz, ldebug) 266c g_work(:,6) = g_work(:,2) - g_work(:,6) 267 call ga_add(ONE, g_work(2), -ONE, g_work(6), g_work(6)) 268 call ga_sync() 269 res_nrm = SQRT(ga_ddot(g_work(6), g_work(6))) / res_0 270 call ga_sync() 271 nrm_check = res_nrm 272 if(ldebug) write(LuOut,*) "Res_nrm: ", res_nrm 273 else 274 if(ldebug) then 275 write(LuOut,*) "Res_nrm_upper_bound:", res_nrm_upper_bound 276 end if 277 end if 278 time = util_timer() - time 279 if(ldebug) then 280 write(LuOut,*) "Iteration", niter 281 write(LuOut,*) "Time (s):", time 282 call util_flush(LuOut) 283 end if 284c 285c Check for convergece or termination. Stop if: 286c 1. algorithm converged; 287c 2. there is an error condition; 288c 3. the residual norm upper bound is smaller than the computed 289c residual norm by a factor of at least 100; 290c 4. algorithm exceeded the iterations limit 291 if(res_nrm <= tol) then 292 ierr = 0 293 exit 294 else if(ierr /= 0) then 295 exit 296 else if(res_nrm_upper_bound < nrm_check / HUNDRED) then 297 ierr = 3 298 exit 299 end if 300 call ga_sync() 301 end do 302c 303c Put proper values into output variables 304 if (niter > maxiter) ierr = 3 305 tol = res_nrm 306 call ga_get(g_work(1), 1, nLen, 1, 1, unknowns, 1) 307c 308c Save number of iterations for projection 309 if(.not.rtdb_put(rtdb,'seed:k'//dir, mt_int, 1, niter)) 310 $ call errquit('put seed:k failed', 1, RTDB_ERR) 311c 312c Destroy memory 313 if(id.eq.0) then 314 stat = ma_pop_stack(l_seed) 315 end if 316 call ga_sync() 317 if(id.eq.0.and.ldebug) write(LuOut,*) 318 $ "End seed QMR Routine" 319 end subroutine qmr_seed_real 320