1 subroutine seeded_qmr_complex(rtdb, nlen, g_work, niter, tol, 2 $ ierr, unknowns, matvec, xyz, 3 $ ldebug, dir) 4c 5c Fancy QMR algorithm. Will document later. 6 implicit None 7#include "errquit.fh" 8#include "inp.fh" 9#include "rtdb.fh" 10#include "stdio.fh" 11#include "nwc_const.fh" 12#include "mafdecls.fh" 13#include "global.fh" 14#include "testutil.fh" 15#include "dimqm_constants.fh" 16#include "tcgmsg.fh" 17#include "msgids.fh" 18c 19c Input Variables 20 integer rtdb 21 integer nlen 22 integer g_work(7) 23 integer niter 24 double precision tol 25 integer ierr 26 integer g_unknowns 27 external matvec 28 double precision xyz(3,*) 29 double complex unknowns(nlen) 30 logical ldebug 31 character dir 32c 33c Local variables 34 integer maxiter 35 complex*16 d_n, e_n, l_n 36 complex*16 sinn, scs 37 complex*16 rhs_n, rhs_n_plus1 38 complex*16 res_n, res_n_minus1, res_n_plus1 39 complex*16 u_n_minus1, dtmp 40 double precision l_n_plus1 41 double precision omegamax, omega, cosn, res_0 42 double precision res_nrm, res_nrm_upper_bound, nrm_check 43 double precision nrm_tol, min_tol, max_tol 44 double precision scv, scpn, tmp 45 integer i, j, k 46 integer id, ld, k_seed, l_seed 47 double precision time 48 complex*16 g_n 49 logical stat 50c 51c BLAS/LAPACK routines 52 external dlamch 53 double precision dlamch 54 double precision dnrm2 55 external dnrm2 56 external zrotg 57 call ga_sync() 58c 59c Check for valid input 60 ierr = 0 61 if(nlen < 1 .or. niter < 1) then 62 ierr = 2 63 return 64 end if 65c 66c Get node ID 67 id = ga_nodeid() 68 if(id.eq.0.and.ldebug) then 69 write(LuOut,*) "Start non-seed QMR complex routine" 70 call util_flush(LuOut) 71 end if 72c Check tolerances of the machine 73c Not sure if this is needed. 74 nrm_tol = dlamch('E') * TEN 75 min_tol = SQRT(SQRT(dlamch('S'))) 76c write(luout,*) "mintol:", min_tol 77c min_tol = 1.0d-30 78 max_tol = ONE / min_tol 79 if (tol <= ZERO) tol = SQRT(dlamch('E')) 80 if(ldebug) then 81 if(id .eq. 0) write(LuOut,*) "Tol: ", tol 82 end if 83c 84c Zero work arrays 85 do i = 1, 7 86 call ga_zero(g_work(i)) 87 end do 88 if(id.eq.0) then 89 if(.not.ma_push_get(mt_dcpl, nlen, 'seed:tmp', l_seed, k_seed)) 90 $ call errquit('seeded temp malloc failed', 9, MA_ERR) 91 end if 92c 93c Initalize by placing input field into work 2 and 3 94 call ga_put(g_work(2), 1, nLen, 1, 1, unknowns, 1) 95 call ga_copy(g_work(2), g_work(3)) 96c Save the inital residual vector in work(6) 97 call ga_copy(g_work(3), g_work(6)) 98 call ga_sync() 99c 100c Initial residual 101c res_0 = dnrm2(nlen,unknowns,1) 102 call ga_conjg(g_work(3), g_work(7)) 103 res_0 = SQRT(REAL(ga_zdot(g_work(7), g_work(3)), KIND=8)) 104 if (ldebug) write(luout,*) "res0:", res_0 105c 106c If already converged, exit here 107 if((tol >= ONE) .or. (res_0 <= tol )) then 108 niter = 0 109 tol = res_0 110 if(id.eq.0) then 111 stat = ma_pop_stack(l_seed) 112 end if 113 return 114 end if 115c Pull in how many seed vectors we have 116 if(.not.rtdb_get(rtdb,'seed:k'//dir//'c', mt_int, 1, k)) 117 $ k = 0 118c $ call errquit('get seed:k failed', 1, RTDB_ERR) 119c 120c Now project the seed space onto the iterate and residual 121 rhs_n = res_0 122 if(id.eq.0.and.ldebug) write(luout,*) "Projecting", k, "steps" 123 do j = 1, k 124c Pull in scalars 125 if(id.eq.0) then 126 stat = rtdb_parallel(.false.) 127 if(.not.rtdb_get(rtdb,'seed:sin'//CHAR(j)//dir//'c', 128 $ mt_dcpl, 1, sinn)) 129 $ call errquit('get seed sinn failed', j, RTDB_ERR) 130 if(.not.rtdb_get(rtdb,'seed:cos'//CHAR(j)//dir//'c', 131 $ mt_dbl, 1, cosn)) 132 $ call errquit('get seed cosn failed', j, RTDB_ERR) 133 if(.not.rtdb_get(rtdb,'seed:scp'//CHAR(j)//dir//'c', 134 $ mt_dbl, 1, scpn)) 135 $ call errquit('get seed scpn failed', j, RTDB_ERR) 136 if(.not.rtdb_get(rtdb,'seed:scs'//CHAR(j)//dir//'c', 137 $ mt_dcpl, 1, scs)) 138 $ call errquit('get seed scs failed', j, RTDB_ERR) 139c Pull in arrays 140c call ga_access(g_work(4), 1, nlen, 1, 1, k_seed, ld) 141 if(.not.rtdb_get(rtdb,'seed:v'//CHAR(j)//dir//'c', 142 $ mt_dcpl, nlen, dcpl_mb(k_seed))) 143 $ call errquit('get seed work(3) failed', j, RTDB_ERR) 144 call ga_put(g_work(4), 1, nlen, 1, 1, dcpl_mb(k_seed), 1) 145c call ga_release_update(g_work(4), 1, nlen, 1, 1) 146c call ga_access(g_work(5), 1, nlen, 1, 1, k_seed, ld) 147 if(.not.rtdb_get(rtdb,'seed:s'//CHAR(j)//dir//'c', 148 $ mt_dcpl, nlen, dcpl_mb(k_seed))) 149 $ call errquit('get seed s failed', j, RTDB_ERR) 150 call ga_put(g_work(5), 1, nlen, 1, 1, dcpl_mb(k_seed), 1) 151c call ga_release_update(g_work(5), 1, nlen, 1, 1) 152 stat = rtdb_parallel(.true.) 153 end if 154 call ga_brdcst(msg_dimqm, sinn, mdtob(2), 0) 155 call ga_brdcst(msg_dimqm, cosn, mdtob(1), 0) 156 call ga_brdcst(msg_dimqm, scpn, mdtob(1), 0) 157 call ga_brdcst(msg_dimqm, scs, mdtob(2), 0) 158 call ga_sync() 159 g_n = ga_zdot(g_work(6), g_work(4)) * scpn 160 rhs_n_plus1 = -CONJG(sinn) * rhs_n + cosn * g_n 161 rhs_n = cosn * rhs_n + sinn * g_n 162 dtmp = scs * rhs_n 163 call ga_add(ONE_C, g_work(1), dtmp, g_work(5), g_work(1)) 164 rhs_n = rhs_n_plus1 165 end do 166 call matvec(rtdb, nlen, g_work(1), g_work(3), xyz, ldebug) 167 call ga_add(ONE_C, g_work(2), -ONE_C, g_work(3), g_work(3)) 168 call ga_conjg(g_work(3), g_work(7)) 169 dtmp = SQRT(REAL(ga_zdot(g_work(7), g_work(3)), KIND=8)) 170c 171c Initialize the variables 172 maxiter = niter 173 scv = REAL(dtmp) 174 e_n = ONE_C 175 cosn = ONE 176 res_nrm = REAL(dtmp)/res_0 177 scpn = ONE 178 scs = ZERO_C 179 sinn = ZERO_C 180 l_n_plus1 = ZERO 181 omega = ONE 182 rhs_n = omega * dtmp 183 omegamax = ONE / omega 184c 185c Re-zero work arrays used in seeding 186 if(id.eq.0 .and. ldebug) 187 $ write(luout,*) "Projected res_0:", REAL(dtmp) 188 call ga_zero(g_work(4)) 189 call ga_zero(g_work(5)) 190c 191c Begin the algorithm 192 do niter = 1, maxiter 193 time = util_timer() 194c 195c Check if E_n is nonsingular 196 if(ABS(e_n) == ZERO) then 197 ierr = 3 198 return 199 end if 200c 201c Compute scale factor for the vector w_{n} 202c Check for invariant subspaces, and scale the vectors if needed. 203 ierr = 0 204 if (scpn * scv < nrm_tol) then 205 ierr = 5 ! A-invarient subspace 206 return 207 end if 208c 209 d_n = ga_zdot(g_work(3), g_work(3)) 210 d_n = d_n / (scv**2) 211 if((scv >= max_tol) .or. (scv <= min_tol)) then 212 dtmp = ONE_C / scv 213 call ga_scale(g_work(3), dtmp) 214 scv = ONE 215 end if 216 scv = ONE / scv 217c 218c Build the vector p_n 219 u_n_minus1 = d_n * l_n_plus1 / e_n 220 dtmp = u_n_minus1 * scpn / scv 221 call ga_add(ONE_C, g_work(3), -dtmp, g_work(4), g_work(4)) 222 scpn = scv 223c 224c Check if D_n is nonsingular 225 if(ABS(d_n) == ZERO) then 226 ierr = 4 227 return 228 end if 229c 230c Multiply current residual by the matrix 231 call matvec(rtdb, nlen, g_work(4), g_work(6), xyz, ldebug) 232c 233c Compute p_n^T A p_n 234 e_n = ga_zdot(g_work(4), g_work(6)) 235 e_n = e_n * scpn**2 236c 237c Build the vector v_{n+1} 238 l_n = e_n / d_n 239 call ga_add(ONE_C, g_work(6), -l_n, g_work(3), g_work(3)) 240 call ga_sync() 241c 242c Compute the scale factor for v_{n+1} 243 call ga_conjg(g_work(3), g_work(7)) 244 scv = SQRT(REAL(ga_zdot(g_work(7), g_work(3)), KIND=8)) 245 l_n_plus1 = scpn * scv 246c 247c The QMR code starts here. 248c Multiply the new column by the previous omeags 249c Get the next scaling factor omega(i) and update omegamax 250 res_n = omega * l_n 251 omega = ONE 252 res_n_plus1 = omega * l_n_plus1 253 omegamax = MAX(omegamax, ONE/omega) 254c 255c Apply the previous rotation 256 res_n_minus1 = sinn * res_n 257 res_n = cosn * res_n 258c 259c Compute and apply the rotation for the last element 260 call zrotg(res_n, res_n_plus1, cosn, sinn) 261c 262c Apply the new rotation to the right-hand side vector 263 rhs_n_plus1 = -CONJG(sinn) * rhs_n 264 rhs_n = cosn * rhs_n 265c 266c Compute the next search direction s_i 267 dtmp = res_n_minus1 * scs / scpn 268c g_work(:,5) = g_work(:,4) + -dtmp * g_work(:,5) 269 call ga_add(ONE_C, g_work(4), -dtmp, g_work(5), g_work(5)) 270c 271c Compute the new QMR iterate, then scale the search direction 272 scs = scpn / res_n 273 dtmp = scs * rhs_n 274c g_work(:,1) = g_work(:,1) + dtmp * g_work(:,5) 275 call ga_add(ONE_C, g_work(1), dtmp, g_work(5), g_work(1)) 276 if((ABS(scs) >= max_tol) .or. (ABS(scs) <= min_tol)) then 277c g_work(:,5) = scs * g_work(:,5) 278 call ga_scale(g_work(5), scs) 279 scs = ONE_C 280 end if 281c 282c Compute the residual norm upper bound 283c If the scaled upper bound is within one order of magnitude of 284c the targer convergence norm, compute the true residual norm. 285 rhs_n = rhs_n_plus1 286 res_nrm_upper_bound = 287 $ SQRT(REAL(niter+1, KIND=8))*omegamax*ABS(rhs_n_plus1)/res_0 288 nrm_check = res_nrm_upper_bound 289 if((res_nrm_upper_bound/tol <= TEN).or.(niter >= maxiter)) then 290c Multiply the current residual by the matrix 291 call matvec(rtdb, nlen, g_work(1), g_work(6), xyz, ldebug) 292 call ga_add(ONE_C, g_work(2), -ONE, g_work(6), g_work(6)) 293 call ga_sync() 294 call ga_conjg(g_work(6), g_work(7)) 295 res_nrm = SQRT(REAL(ga_zdot(g_work(7), g_work(6)),KIND=8)) 296 res_nrm = res_nrm / res_0 297 call ga_sync() 298 nrm_check = res_nrm 299 if(id.eq.0.and.ldebug) write(LuOut,*) "Res_nrm: ", res_nrm 300 else 301 if(id.eq.0.and.ldebug) then 302 write(LuOut,*) "Res_nrm_upper_bound:", res_nrm_upper_bound 303 end if 304 end if 305 time = util_timer() - time 306 if(id.eq.0.and.ldebug) then 307 write(LuOut,*) "Iteration", niter 308 write(LuOut,*) "Time (s):", time 309 write(luout,*) "" 310 call util_flush(LuOut) 311 end if 312c 313c Check for convergece or termination. Stop if: 314c 1. algorithm converged; 315c 2. there is an error condition; 316c 3. the residual norm upper bound is smaller than the computed 317c residual norm by a factor of at least 100; 318c 4. algorithm exceeded the iterations limit 319 if(res_nrm <= tol) then 320 ierr = 0 321 exit 322 else if(ierr /= 0) then 323 exit 324 else if(res_nrm_upper_bound < nrm_check / HUNDRED) then 325 ierr = 3 326 exit 327 end if 328 call ga_sync() 329 end do 330c 331c Put proper values into output variables 332 if (niter > maxiter) ierr = 3 333 tol = res_nrm 334 call ga_get(g_work(1), 1, nLen, 1, 1, unknowns, 1) 335 if(id.eq.0) then 336 stat = ma_pop_stack(l_seed) 337 end if 338 if(id.eq.0.and.ldebug) write(LuOut,*) 339 $ "End non-seed QMR Complex Routine" 340 end subroutine seeded_qmr_complex 341