1      subroutine seeded_qmr_real_augment(rtdb, nlen, g_work, niter, tol,
2     $                     ierr, unknowns, matvec, xyz, ldebug, muold)
3c
4c     Fancy QMR algorithm.  Will document later.
5c      use constants
6      implicit None
7#include "errquit.fh"
8#include "inp.fh"
9#include "rtdb.fh"
10#include "stdio.fh"
11#include "nwc_const.fh"
12#include "mafdecls.fh"
13#include "global.fh"
14#include "testutil.fh"
15#include "dimqm_constants.fh"
16c
17c     Input Variables
18      integer rtdb
19      integer nlen
20      integer g_work(7)
21      integer niter
22      double precision tol
23      integer ierr
24      integer g_unknowns
25      external matvec
26      double precision xyz(3,*)
27      double precision unknowns(nlen)
28      logical ldebug
29      double precision muold(nlen)
30c
31c     Local variables
32      integer maxiter
33      double precision d_n, e_n
34      double precision sinn, cosn
35      double precision rhs_n, rhs_n_plus1
36      double precision l_n, l_n_plus1
37      double precision res_0, res_n, res_n_minus1, res_n_plus1
38      double precision omegamax, omega
39      double precision res_nrm, res_nrm_upper_bound, nrm_check
40      double precision nrm_tol, min_tol, max_tol
41      double precision scv, scpn, scs
42      double precision u_n_minus1, dtmp
43      integer i, j, k
44      integer id, ld, k_seed, g_seed(2)
45      double precision time
46      integer k_work, lda
47      double precision g_n
48      logical stat
49c
50c     BLAS/LAPACK routines
51      external          dlamch
52      double precision  dlamch
53      double precision dnrm2
54      external dnrm2
55      external drotg
56      call ga_sync()
57c
58c     Get node ID
59      id = ga_nodeid()
60      if(id.eq.0.and.ldebug) then
61        write(LuOut,*) "Start QMR routine"
62        call util_flush(LuOut)
63      end if
64c     Check tolerances of the machine
65c     Not sure if this is needed.
66      nrm_tol = dlamch('E') * TEN
67      min_tol = SQRT(SQRT(dlamch('S')))
68      max_tol = ONE / min_tol
69      if (tol <= ZERO) tol = SQRT(dlamch('E'))
70      if(ldebug) then
71        if(id .eq. 0) write(LuOut,*) "Tol: ", tol
72      end if
73c
74c     Zero work arrays
75      do i = 1, 7
76        call ga_zero(g_work(i))
77      end do
78c
79c     Create and zero seed arrays
80      if(.not. ga_create(mt_dbl, nlen, 1, 'g_seed1', nlen, -1,
81     $  g_seed(1)))
82     $  call errquit('g_seed1: ga create failed',0,GA_ERR)
83      if(.not. ga_create(mt_dbl, nlen, 1, 'g_seed2', nlen, -1,
84     $  g_seed(2)))
85     $  call errquit('g_seed2: ga create failed',0,GA_ERR)
86      do i = 1, 2
87        call ga_zero(g_seed(i))
88      end do
89
90c
91c     Initalize by copying input field to work 2 and 3
92c      call ga_copy(g_unknowns, g_work(2))
93c      call ga_copy(g_unknowns, g_work(3))
94c      write(LuOut,*) nLen
95      call ga_put(g_work(2), 1, nLen, 1, 1, unknowns, 1)
96c   Use old dipoles as initial guess
97c      call ga_put(g_work(1), 1, nLen, 1, 1, muold, 1)
98c   Determine initial residual
99      call matvec(rtdb, nlen, g_work(1), g_work(3), xyz, ldebug)
100      call ga_add(ONE, g_work(2), -ONE, g_work(3), g_work(3))
101c   Save the initial residual vector in work(7)
102      call ga_copy(g_work(3), g_work(7))
103      call ga_sync()
104c      call ga_print(g_work(2))
105c      call ga_print(g_work(3))
106c
107c     Initial residual
108c      res_0 = dnrm2(nlen,unknowns,1)
109      res_0 = SQRT(ga_ddot(g_work(3), g_work(3)))
110      write(luout,*) "res_0:", res_0
111c
112c     If already converged, exit here
113      if((tol >= ONE) .or. (res_0 <= tol)) then
114        niter = 0
115        tol = res_0
116        call ga_get(g_work(1), 1, nLen, 1, 1, unknowns, 1)
117        return
118      end if
119c     Pull in how many seed vectors we have thus far
120      if(.not.rtdb_get(rtdb,'seed:k', mt_int, 1, k))
121     $      call errquit('get seed:k failed', 1, RTDB_ERR)
122c
123c     Now project the seed space onto the iterate and residual
124      rhs_n     = res_0
125      cosn      = ONE
126      sinn      = ZERO
127      if (ldebug) write(luout,*) "Projecting", k, "steps"
128      do j = 1, k
129c       Pull in scalars
130        if(.not.rtdb_get(rtdb,'seed:sin'//CHAR(j), mt_dbl, 1, sinn))
131     $      call errquit('get seed sinn failed', j, RTDB_ERR)
132        if(.not.rtdb_get(rtdb,'seed:cos'//CHAR(j), mt_dbl, 1, cosn))
133     $      call errquit('get seed cosn failed', j, RTDB_ERR)
134        if(.not.rtdb_get(rtdb,'seed:scp'//CHAR(j), mt_dbl, 1, scpn))
135     $      call errquit('get seed scpn failed', j, RTDB_ERR)
136        if(.not.rtdb_get(rtdb,'seed:scs'//CHAR(j), mt_dbl, 1, scs))
137     $      call errquit('get seed scsn failed', j, RTDB_ERR)
138c       Pull in arrays
139        call ga_access(g_seed(1), 1, nlen, 1, 1, k_seed, ld)
140        if(.not.rtdb_get(rtdb,'seed:v'//CHAR(j), mt_dbl, nlen,
141     $               dbl_mb(k_seed)))
142     $      call errquit('get seed work(3) failed', j, RTDB_ERR)
143        call ga_release_update(g_seed(1), 1, nlen, 1, 1)
144        call ga_access(g_seed(2), 1, nlen, 1, 1, k_seed, ld)
145        if(.not.rtdb_get(rtdb,'seed:s'//CHAR(j), mt_dbl, nlen,
146     $               dbl_mb(k_seed)))
147     $      call errquit('get seed s failed', j, RTDB_ERR)
148        call ga_release_update(g_seed(2), 1, nlen, 1, 1)
149        g_n = ga_ddot(g_work(7), g_seed(1)) * scpn
150        rhs_n_plus1 = -sinn * rhs_n + cosn * g_n
151        rhs_n       =  cosn * rhs_n + sinn * g_n
152        dtmp = scs * rhs_n
153        call ga_add(ONE, g_work(1), dtmp, g_seed(2), g_work(1))
154        rhs_n = rhs_n_plus1
155      end do
156      call matvec(rtdb, nlen, g_work(1), g_work(3), xyz, ldebug)
157      call ga_add(ONE, g_work(2), -ONE, g_work(3), g_work(3))
158      dtmp = SQRT(ga_ddot(g_work(3), g_work(3)))
159c      res_0 = SQRT(ga_ddot(g_work(3), g_work(3)))
160c
161c     Initialize the variables
162      maxiter   = niter
163      scv       = dtmp
164      e_n       = ONE
165      cosn      = ONE
166      res_nrm   = dtmp/res_0
167      scpn      = ONE
168      scs       = ZERO
169      sinn      = ZERO
170      l_n_plus1 = ZERO
171      omega     = ONE
172      rhs_n     = omega * dtmp
173      omegamax  = ONE / omega
174      g_n       = ZERO
175      if (ldebug) write(luout,*) "projected res_nrm:", res_nrm
176c
177c     Begin the algorithm
178      do niter = 1, maxiter
179        k = k + 1
180        time = util_timer()
181c
182c       Check if E_n is nonsingular
183        if(e_n == ZERO) then
184          ierr = 4
185          return
186        end if
187c
188c       Compute scale factor for the vector w_{n}
189c       Check for invariant subspaces, and scale the vectors if needed.
190        ierr = 0
191        if (scpn * scv < nrm_tol) ierr = 5
192        if (ierr /= 0) return ! A-invarient subspace
193c
194        d_n = ga_ddot(g_work(3), g_work(3)) / (scv**2)
195        if((scv >= max_tol) .or. (scv <= min_tol)) then
196          dtmp = ONE / scv
197          call ga_scale(g_work(3), dtmp)
198          scv = ONE
199        end if
200        scv = ONE / scv
201c
202c       Build the vector p_n
203        u_n_minus1 = d_n * l_n_plus1 / e_n
204        dtmp       = u_n_minus1 * scpn / scv
205        call ga_add(ONE, g_work(3), -dtmp, g_work(4), g_work(4))
206        scpn = scv
207c
208c       Check if D_n is nonsingular
209        if(d_n == ZERO) then
210          ierr = 4
211          return
212        end if
213c
214c       Multiply current residual by the matrix
215        call matvec(rtdb, nlen, g_work(4), g_work(6), xyz, ldebug)
216c
217c       Compute p_n^T A p_n
218        e_n = ga_ddot(g_work(4), g_work(6)) * scpn**2
219c
220c       Build the vector v_{n+1}
221        l_n = e_n / d_n
222        call ga_add(ONE, g_work(6), -l_n, g_work(3), g_work(3))
223c
224c       Compute the scale factor for v_{n+1}
225        scv = SQRT(ga_ddot(g_work(3), g_work(3)))
226        l_n_plus1 = scpn * scv
227c
228c       The QMR code starts here.
229c       Multiply the new column by the previous omeags
230c       Get the next scaling factor omega(i) and update omegamax
231        res_n       = omega * l_n
232        omega       = ONE
233        res_n_plus1 = omega * l_n_plus1
234        omegamax    = MAX(omegamax, ONE/omega)
235c
236c       Apply the previous rotation
237        res_n_minus1 = sinn * res_n
238        res_n        = cosn * res_n
239c
240c       Compute and apply the rotation for the last element
241        call drotg(res_n, res_n_plus1, cosn, sinn)
242c       Save rotations and vector for seeding
243        if(.not.rtdb_put(rtdb,'seed:sin'//CHAR(k), mt_dbl, 1, sinn))
244     $      call errquit('put sinn failed', k, RTDB_ERR)
245        if(.not.rtdb_put(rtdb,'seed:cos'//CHAR(k), mt_dbl, 1, cosn))
246     $      call errquit('put cosn failed', k, RTDB_ERR)
247        if(.not.rtdb_put(rtdb,'seed:scp'//CHAR(k), mt_dbl, 1, scpn))
248     $      call errquit('put scp failed', k, RTDB_ERR)
249        call ga_access(g_work(3), 1, nlen, 1, 1, k_seed, ld)
250        if(.not.rtdb_put(rtdb,'seed:v'//CHAR(k), mt_dbl, nlen,
251     $          dbl_mb(k_seed)))
252     $      call errquit('put v failed', k, RTDB_ERR)
253        call ga_release(g_work(3), 1, nlen, 1, 1)
254c
255c       Apply the new rotation to the right-hand side vector
256        rhs_n_plus1 = -sinn * rhs_n
257        rhs_n       =  cosn * rhs_n
258c
259c       Compute the next search direction s_i
260        dtmp = res_n_minus1 * scs / scpn
261c       g_work(:,5) = g_work(:,4) + -dtmp * g_work(:,5)
262        call ga_add(ONE, g_work(4), -dtmp, g_work(5), g_work(5))
263c
264c       Compute the new QMR iterate, then scale the search direction
265        scs  = scpn / res_n
266        dtmp = scs * rhs_n
267c
268c       Save search direction for seeding
269        call ga_access(g_work(5), 1, nlen, 1, 1, k_seed, ld)
270        if(.not.rtdb_put(rtdb,'seed:s'//CHAR(k), mt_dbl, nlen,
271     $          dbl_mb(k_seed)))
272     $      call errquit('put s failed', k, RTDB_ERR)
273        call ga_release(g_work(5), 1, nlen, 1, 1)
274        if(.not.rtdb_put(rtdb,'seed:scs'//CHAR(k), mt_dbl, 1, scs))
275     $      call errquit('put scs failed', k, RTDB_ERR)
276c
277c       g_work(:,1) = g_work(:,1) + dtmp * g_work(:,5)
278        call ga_add(ONE, g_work(1), dtmp, g_work(5), g_work(1))
279        if((ABS(scs) >= max_tol) .or. (ABS(scs) <= min_tol)) then
280c         g_work(:,5) = scs * g_work(:,5)
281          call ga_scale(g_work(5), scs)
282          scs = ONE
283        end if
284c
285c       Compute the residual norm upper bound
286c       If the scaled upper bound is within one order of magnitude of
287c       the targer convergence norm, compute the true residual norm.
288        rhs_n = rhs_n_plus1
289        res_nrm_upper_bound =
290     $    SQRT(REAL(niter+1))*omegamax*ABS(rhs_n_plus1)/res_0
291        nrm_check = res_nrm_upper_bound
292c        if((res_nrm_upper_bound/tol <= TEN).or.(niter >= maxiter)) then
293c         Multiply the current residual by the matrix
294           call matvec(rtdb, nlen, g_work(1), g_work(6), xyz, ldebug)
295c          g_work(:,6) = g_work(:,2) - g_work(:,6)
296           call ga_add(ONE, g_work(2), -ONE, g_work(6), g_work(6))
297           call ga_sync()
298           res_nrm = SQRT(ga_ddot(g_work(6), g_work(6))) / res_0
299           call ga_sync()
300           nrm_check = res_nrm
301           if(id.eq.0.and.ldebug) write(LuOut,*) "Res_nrm: ", res_nrm
302        if((res_nrm_upper_bound/tol <= TEN).or.(niter >= maxiter)) then
303           nrm_check = res_nrm
304        else
305          if(id.eq.0.and.ldebug) then
306            write(LuOut,*) "Res_nrm_upper_bound:", res_nrm_upper_bound
307          end if
308        end if
309        time = util_timer() - time
310        if(id.eq.0.and.ldebug) then
311          write(LuOut,*) "Iteration", niter
312          write(LuOut,*) "Time (s):", time
313          call util_flush(LuOut)
314        end if
315c
316c       Check for convergece or termination.  Stop if:
317c         1. algorithm converged;
318c         2. there is an error condition;
319c         3. the residual norm upper bound is smaller than the computed
320c            residual norm by a factor of at least 100;
321c         4. algorithm exceeded the iterations limit
322        if(res_nrm <= tol) then
323          ierr = 0
324          exit
325        else if(ierr /= 0) then
326          exit
327        else if(res_nrm_upper_bound < nrm_check / HUNDRED) then
328          ierr = 3
329          exit
330        end if
331c        call ga_get(g_work(1), 1, nLen, 1, 1, unknowns, 1)
332c        write(luout,*) "END QMR DIPOLES"
333c        write(luout,*) unknowns
334        call ga_sync()
335      end do
336c
337c     Put proper values into output variables
338      if (niter > maxiter) ierr = 3
339      tol = res_nrm
340c      call ga_copy(g_work(1), g_unknowns)
341      call ga_get(g_work(1), 1, nLen, 1, 1, unknowns, 1)
342      if(.not.rtdb_put(rtdb,'seed:k', mt_int, 1, k))
343     $  call errquit('put seed:k failed', 1, RTDB_ERR)
344c      do i = 1, 2
345c        stat =  ga_destroy(g_seed(i))
346c      end do
347      if(id.eq.0.and.ldebug) write(LuOut,*)
348     $   "End QMR Routine"
349      end subroutine seeded_qmr_real_augment
350