1 subroutine seeded_qmr_real_augment(rtdb, nlen, g_work, niter, tol, 2 $ ierr, unknowns, matvec, xyz, ldebug, muold) 3c 4c Fancy QMR algorithm. Will document later. 5c use constants 6 implicit None 7#include "errquit.fh" 8#include "inp.fh" 9#include "rtdb.fh" 10#include "stdio.fh" 11#include "nwc_const.fh" 12#include "mafdecls.fh" 13#include "global.fh" 14#include "testutil.fh" 15#include "dimqm_constants.fh" 16c 17c Input Variables 18 integer rtdb 19 integer nlen 20 integer g_work(7) 21 integer niter 22 double precision tol 23 integer ierr 24 integer g_unknowns 25 external matvec 26 double precision xyz(3,*) 27 double precision unknowns(nlen) 28 logical ldebug 29 double precision muold(nlen) 30c 31c Local variables 32 integer maxiter 33 double precision d_n, e_n 34 double precision sinn, cosn 35 double precision rhs_n, rhs_n_plus1 36 double precision l_n, l_n_plus1 37 double precision res_0, res_n, res_n_minus1, res_n_plus1 38 double precision omegamax, omega 39 double precision res_nrm, res_nrm_upper_bound, nrm_check 40 double precision nrm_tol, min_tol, max_tol 41 double precision scv, scpn, scs 42 double precision u_n_minus1, dtmp 43 integer i, j, k 44 integer id, ld, k_seed, g_seed(2) 45 double precision time 46 integer k_work, lda 47 double precision g_n 48 logical stat 49c 50c BLAS/LAPACK routines 51 external dlamch 52 double precision dlamch 53 double precision dnrm2 54 external dnrm2 55 external drotg 56 call ga_sync() 57c 58c Get node ID 59 id = ga_nodeid() 60 if(id.eq.0.and.ldebug) then 61 write(LuOut,*) "Start QMR routine" 62 call util_flush(LuOut) 63 end if 64c Check tolerances of the machine 65c Not sure if this is needed. 66 nrm_tol = dlamch('E') * TEN 67 min_tol = SQRT(SQRT(dlamch('S'))) 68 max_tol = ONE / min_tol 69 if (tol <= ZERO) tol = SQRT(dlamch('E')) 70 if(ldebug) then 71 if(id .eq. 0) write(LuOut,*) "Tol: ", tol 72 end if 73c 74c Zero work arrays 75 do i = 1, 7 76 call ga_zero(g_work(i)) 77 end do 78c 79c Create and zero seed arrays 80 if(.not. ga_create(mt_dbl, nlen, 1, 'g_seed1', nlen, -1, 81 $ g_seed(1))) 82 $ call errquit('g_seed1: ga create failed',0,GA_ERR) 83 if(.not. ga_create(mt_dbl, nlen, 1, 'g_seed2', nlen, -1, 84 $ g_seed(2))) 85 $ call errquit('g_seed2: ga create failed',0,GA_ERR) 86 do i = 1, 2 87 call ga_zero(g_seed(i)) 88 end do 89 90c 91c Initalize by copying input field to work 2 and 3 92c call ga_copy(g_unknowns, g_work(2)) 93c call ga_copy(g_unknowns, g_work(3)) 94c write(LuOut,*) nLen 95 call ga_put(g_work(2), 1, nLen, 1, 1, unknowns, 1) 96c Use old dipoles as initial guess 97c call ga_put(g_work(1), 1, nLen, 1, 1, muold, 1) 98c Determine initial residual 99 call matvec(rtdb, nlen, g_work(1), g_work(3), xyz, ldebug) 100 call ga_add(ONE, g_work(2), -ONE, g_work(3), g_work(3)) 101c Save the initial residual vector in work(7) 102 call ga_copy(g_work(3), g_work(7)) 103 call ga_sync() 104c call ga_print(g_work(2)) 105c call ga_print(g_work(3)) 106c 107c Initial residual 108c res_0 = dnrm2(nlen,unknowns,1) 109 res_0 = SQRT(ga_ddot(g_work(3), g_work(3))) 110 write(luout,*) "res_0:", res_0 111c 112c If already converged, exit here 113 if((tol >= ONE) .or. (res_0 <= tol)) then 114 niter = 0 115 tol = res_0 116 call ga_get(g_work(1), 1, nLen, 1, 1, unknowns, 1) 117 return 118 end if 119c Pull in how many seed vectors we have thus far 120 if(.not.rtdb_get(rtdb,'seed:k', mt_int, 1, k)) 121 $ call errquit('get seed:k failed', 1, RTDB_ERR) 122c 123c Now project the seed space onto the iterate and residual 124 rhs_n = res_0 125 cosn = ONE 126 sinn = ZERO 127 if (ldebug) write(luout,*) "Projecting", k, "steps" 128 do j = 1, k 129c Pull in scalars 130 if(.not.rtdb_get(rtdb,'seed:sin'//CHAR(j), mt_dbl, 1, sinn)) 131 $ call errquit('get seed sinn failed', j, RTDB_ERR) 132 if(.not.rtdb_get(rtdb,'seed:cos'//CHAR(j), mt_dbl, 1, cosn)) 133 $ call errquit('get seed cosn failed', j, RTDB_ERR) 134 if(.not.rtdb_get(rtdb,'seed:scp'//CHAR(j), mt_dbl, 1, scpn)) 135 $ call errquit('get seed scpn failed', j, RTDB_ERR) 136 if(.not.rtdb_get(rtdb,'seed:scs'//CHAR(j), mt_dbl, 1, scs)) 137 $ call errquit('get seed scsn failed', j, RTDB_ERR) 138c Pull in arrays 139 call ga_access(g_seed(1), 1, nlen, 1, 1, k_seed, ld) 140 if(.not.rtdb_get(rtdb,'seed:v'//CHAR(j), mt_dbl, nlen, 141 $ dbl_mb(k_seed))) 142 $ call errquit('get seed work(3) failed', j, RTDB_ERR) 143 call ga_release_update(g_seed(1), 1, nlen, 1, 1) 144 call ga_access(g_seed(2), 1, nlen, 1, 1, k_seed, ld) 145 if(.not.rtdb_get(rtdb,'seed:s'//CHAR(j), mt_dbl, nlen, 146 $ dbl_mb(k_seed))) 147 $ call errquit('get seed s failed', j, RTDB_ERR) 148 call ga_release_update(g_seed(2), 1, nlen, 1, 1) 149 g_n = ga_ddot(g_work(7), g_seed(1)) * scpn 150 rhs_n_plus1 = -sinn * rhs_n + cosn * g_n 151 rhs_n = cosn * rhs_n + sinn * g_n 152 dtmp = scs * rhs_n 153 call ga_add(ONE, g_work(1), dtmp, g_seed(2), g_work(1)) 154 rhs_n = rhs_n_plus1 155 end do 156 call matvec(rtdb, nlen, g_work(1), g_work(3), xyz, ldebug) 157 call ga_add(ONE, g_work(2), -ONE, g_work(3), g_work(3)) 158 dtmp = SQRT(ga_ddot(g_work(3), g_work(3))) 159c res_0 = SQRT(ga_ddot(g_work(3), g_work(3))) 160c 161c Initialize the variables 162 maxiter = niter 163 scv = dtmp 164 e_n = ONE 165 cosn = ONE 166 res_nrm = dtmp/res_0 167 scpn = ONE 168 scs = ZERO 169 sinn = ZERO 170 l_n_plus1 = ZERO 171 omega = ONE 172 rhs_n = omega * dtmp 173 omegamax = ONE / omega 174 g_n = ZERO 175 if (ldebug) write(luout,*) "projected res_nrm:", res_nrm 176c 177c Begin the algorithm 178 do niter = 1, maxiter 179 k = k + 1 180 time = util_timer() 181c 182c Check if E_n is nonsingular 183 if(e_n == ZERO) then 184 ierr = 4 185 return 186 end if 187c 188c Compute scale factor for the vector w_{n} 189c Check for invariant subspaces, and scale the vectors if needed. 190 ierr = 0 191 if (scpn * scv < nrm_tol) ierr = 5 192 if (ierr /= 0) return ! A-invarient subspace 193c 194 d_n = ga_ddot(g_work(3), g_work(3)) / (scv**2) 195 if((scv >= max_tol) .or. (scv <= min_tol)) then 196 dtmp = ONE / scv 197 call ga_scale(g_work(3), dtmp) 198 scv = ONE 199 end if 200 scv = ONE / scv 201c 202c Build the vector p_n 203 u_n_minus1 = d_n * l_n_plus1 / e_n 204 dtmp = u_n_minus1 * scpn / scv 205 call ga_add(ONE, g_work(3), -dtmp, g_work(4), g_work(4)) 206 scpn = scv 207c 208c Check if D_n is nonsingular 209 if(d_n == ZERO) then 210 ierr = 4 211 return 212 end if 213c 214c Multiply current residual by the matrix 215 call matvec(rtdb, nlen, g_work(4), g_work(6), xyz, ldebug) 216c 217c Compute p_n^T A p_n 218 e_n = ga_ddot(g_work(4), g_work(6)) * scpn**2 219c 220c Build the vector v_{n+1} 221 l_n = e_n / d_n 222 call ga_add(ONE, g_work(6), -l_n, g_work(3), g_work(3)) 223c 224c Compute the scale factor for v_{n+1} 225 scv = SQRT(ga_ddot(g_work(3), g_work(3))) 226 l_n_plus1 = scpn * scv 227c 228c The QMR code starts here. 229c Multiply the new column by the previous omeags 230c Get the next scaling factor omega(i) and update omegamax 231 res_n = omega * l_n 232 omega = ONE 233 res_n_plus1 = omega * l_n_plus1 234 omegamax = MAX(omegamax, ONE/omega) 235c 236c Apply the previous rotation 237 res_n_minus1 = sinn * res_n 238 res_n = cosn * res_n 239c 240c Compute and apply the rotation for the last element 241 call drotg(res_n, res_n_plus1, cosn, sinn) 242c Save rotations and vector for seeding 243 if(.not.rtdb_put(rtdb,'seed:sin'//CHAR(k), mt_dbl, 1, sinn)) 244 $ call errquit('put sinn failed', k, RTDB_ERR) 245 if(.not.rtdb_put(rtdb,'seed:cos'//CHAR(k), mt_dbl, 1, cosn)) 246 $ call errquit('put cosn failed', k, RTDB_ERR) 247 if(.not.rtdb_put(rtdb,'seed:scp'//CHAR(k), mt_dbl, 1, scpn)) 248 $ call errquit('put scp failed', k, RTDB_ERR) 249 call ga_access(g_work(3), 1, nlen, 1, 1, k_seed, ld) 250 if(.not.rtdb_put(rtdb,'seed:v'//CHAR(k), mt_dbl, nlen, 251 $ dbl_mb(k_seed))) 252 $ call errquit('put v failed', k, RTDB_ERR) 253 call ga_release(g_work(3), 1, nlen, 1, 1) 254c 255c Apply the new rotation to the right-hand side vector 256 rhs_n_plus1 = -sinn * rhs_n 257 rhs_n = cosn * rhs_n 258c 259c Compute the next search direction s_i 260 dtmp = res_n_minus1 * scs / scpn 261c g_work(:,5) = g_work(:,4) + -dtmp * g_work(:,5) 262 call ga_add(ONE, g_work(4), -dtmp, g_work(5), g_work(5)) 263c 264c Compute the new QMR iterate, then scale the search direction 265 scs = scpn / res_n 266 dtmp = scs * rhs_n 267c 268c Save search direction for seeding 269 call ga_access(g_work(5), 1, nlen, 1, 1, k_seed, ld) 270 if(.not.rtdb_put(rtdb,'seed:s'//CHAR(k), mt_dbl, nlen, 271 $ dbl_mb(k_seed))) 272 $ call errquit('put s failed', k, RTDB_ERR) 273 call ga_release(g_work(5), 1, nlen, 1, 1) 274 if(.not.rtdb_put(rtdb,'seed:scs'//CHAR(k), mt_dbl, 1, scs)) 275 $ call errquit('put scs failed', k, RTDB_ERR) 276c 277c g_work(:,1) = g_work(:,1) + dtmp * g_work(:,5) 278 call ga_add(ONE, g_work(1), dtmp, g_work(5), g_work(1)) 279 if((ABS(scs) >= max_tol) .or. (ABS(scs) <= min_tol)) then 280c g_work(:,5) = scs * g_work(:,5) 281 call ga_scale(g_work(5), scs) 282 scs = ONE 283 end if 284c 285c Compute the residual norm upper bound 286c If the scaled upper bound is within one order of magnitude of 287c the targer convergence norm, compute the true residual norm. 288 rhs_n = rhs_n_plus1 289 res_nrm_upper_bound = 290 $ SQRT(REAL(niter+1))*omegamax*ABS(rhs_n_plus1)/res_0 291 nrm_check = res_nrm_upper_bound 292c if((res_nrm_upper_bound/tol <= TEN).or.(niter >= maxiter)) then 293c Multiply the current residual by the matrix 294 call matvec(rtdb, nlen, g_work(1), g_work(6), xyz, ldebug) 295c g_work(:,6) = g_work(:,2) - g_work(:,6) 296 call ga_add(ONE, g_work(2), -ONE, g_work(6), g_work(6)) 297 call ga_sync() 298 res_nrm = SQRT(ga_ddot(g_work(6), g_work(6))) / res_0 299 call ga_sync() 300 nrm_check = res_nrm 301 if(id.eq.0.and.ldebug) write(LuOut,*) "Res_nrm: ", res_nrm 302 if((res_nrm_upper_bound/tol <= TEN).or.(niter >= maxiter)) then 303 nrm_check = res_nrm 304 else 305 if(id.eq.0.and.ldebug) then 306 write(LuOut,*) "Res_nrm_upper_bound:", res_nrm_upper_bound 307 end if 308 end if 309 time = util_timer() - time 310 if(id.eq.0.and.ldebug) then 311 write(LuOut,*) "Iteration", niter 312 write(LuOut,*) "Time (s):", time 313 call util_flush(LuOut) 314 end if 315c 316c Check for convergece or termination. Stop if: 317c 1. algorithm converged; 318c 2. there is an error condition; 319c 3. the residual norm upper bound is smaller than the computed 320c residual norm by a factor of at least 100; 321c 4. algorithm exceeded the iterations limit 322 if(res_nrm <= tol) then 323 ierr = 0 324 exit 325 else if(ierr /= 0) then 326 exit 327 else if(res_nrm_upper_bound < nrm_check / HUNDRED) then 328 ierr = 3 329 exit 330 end if 331c call ga_get(g_work(1), 1, nLen, 1, 1, unknowns, 1) 332c write(luout,*) "END QMR DIPOLES" 333c write(luout,*) unknowns 334 call ga_sync() 335 end do 336c 337c Put proper values into output variables 338 if (niter > maxiter) ierr = 3 339 tol = res_nrm 340c call ga_copy(g_work(1), g_unknowns) 341 call ga_get(g_work(1), 1, nLen, 1, 1, unknowns, 1) 342 if(.not.rtdb_put(rtdb,'seed:k', mt_int, 1, k)) 343 $ call errquit('put seed:k failed', 1, RTDB_ERR) 344c do i = 1, 2 345c stat = ga_destroy(g_seed(i)) 346c end do 347 if(id.eq.0.and.ldebug) write(LuOut,*) 348 $ "End QMR Routine" 349 end subroutine seeded_qmr_real_augment 350