1      subroutine qmr_real(rtdb, nlen, g_work, niter, tol, ierr,
2     $                    unknowns, matvec, xyz, ldebug)
3c
4c     Fancy QMR algorithm.  Will document later.
5c      use constants
6      implicit None
7#include "errquit.fh"
8#include "inp.fh"
9#include "rtdb.fh"
10#include "stdio.fh"
11#include "nwc_const.fh"
12#include "mafdecls.fh"
13#include "global.fh"
14#include "testutil.fh"
15#include "dimqm_constants.fh"
16c
17c     Input Variables
18      integer rtdb
19      integer nlen
20      integer g_work(6)
21      integer niter
22      double precision tol
23      integer ierr
24      integer g_unknowns
25      external matvec
26      double precision xyz(3,*)
27      double precision unknowns(nlen)
28      logical ldebug
29c
30c     Local variables
31      integer maxiter
32      double precision d_n, e_n
33      double precision sinn, cosn
34      double precision rhs_n, rhs_n_plus1
35      double precision l_n, l_n_plus1
36      double precision res_0, res_n, res_n_minus1, res_n_plus1
37      double precision omegamax, omega
38      double precision res_nrm, res_nrm_upper_bound, nrm_check
39      double precision nrm_tol, min_tol, max_tol
40      double precision scv, scpn, scs
41      double precision u_n_minus1, dtmp
42      integer i
43      integer id
44      double precision time
45c
46c     BLAS/LAPACK routines
47      external          dlamch
48      double precision  dlamch
49      double precision dnrm2
50      external dnrm2
51      external drotg
52      call ga_sync()
53c
54c     Get node ID
55      id = ga_nodeid()
56      if(id.eq.0.and.ldebug) then
57        write(LuOut,*) "Start QMR real routine"
58        call util_flush(LuOut)
59      end if
60c     Check tolerances of the machine
61      nrm_tol = dlamch('E') * TEN
62      min_tol = SQRT(SQRT(dlamch('S')))
63      max_tol = ONE / min_tol
64      if (tol <= ZERO) tol = SQRT(dlamch('E'))
65      if(ldebug) then
66        if(id .eq. 0) write(LuOut,*) "Tol: ", tol
67      end if
68c
69c     Zero work arrays
70      do i = 1, 6
71        call ga_zero(g_work(i))
72      end do
73c
74c     Initalize by copying input field to work 2 and 3
75      call ga_put(g_work(2), 1, nLen, 1, 1, unknowns, 1)
76      call ga_copy(g_work(2), g_work(3))
77      call ga_sync()
78c
79c     Initial residual
80c      res_0 = dnrm2(nlen,unknowns,1)
81      res_0 = SQRT(ga_ddot(g_work(3), g_work(3)))
82c
83c     If already converged, exit here
84      if((tol >= ONE) .or. (res_0 <= tol)) then
85        niter = 0
86        tol = res_0
87        return
88      end if
89c
90c     Initialize the variables
91      maxiter   = niter
92      scv       = res_0
93      e_n       = ONE
94      cosn      = ONE
95      res_nrm   = ONE
96      scpn      = ONE
97      scs       = ZERO
98      sinn      = ZERO
99      l_n_plus1 = ZERO
100      omega     = ONE
101      rhs_n     = omega * res_0
102      omegamax  = ONE / omega
103c
104c     Begin the algorithm
105      do niter = 1, maxiter
106        time = util_timer()
107c
108c       Check if E_n is nonsingular
109        if(e_n == ZERO) then
110          ierr = 4
111          return
112        end if
113c
114c       Compute scale factor for the vector w_{n}
115c       Check for invariant subspaces, and scale the vectors if needed.
116        ierr = 0
117        if (scpn * scv < nrm_tol) ierr = 5
118        if (ierr /= 0) return ! A-invarient subspace
119c
120        d_n = ga_ddot(g_work(3), g_work(3)) / (scv**2)
121        if((scv >= max_tol) .or. (scv <= min_tol)) then
122          dtmp = ONE / scv
123          call ga_scale(g_work(3), dtmp)
124          scv = ONE
125        end if
126        scv = ONE / scv
127c
128c       Build the vector p_n
129        u_n_minus1 = d_n * l_n_plus1 / e_n
130        dtmp       = u_n_minus1 * scpn / scv
131        call ga_add(ONE, g_work(3), -dtmp, g_work(4), g_work(4))
132        scpn = scv
133c
134c       Check if D_n is nonsingular
135        if(d_n == ZERO) then
136          ierr = 4
137          return
138        end if
139c
140c       Multiply current residual by the matrix
141        call matvec(rtdb, nlen, g_work(4), g_work(6), xyz, ldebug)
142c
143c       Compute p_n^T A p_n
144        e_n = ga_ddot(g_work(4), g_work(6)) * scpn**2
145c
146c       Build the vector v_{n+1}
147        l_n = e_n / d_n
148        call ga_add(ONE, g_work(6), -l_n, g_work(3), g_work(3))
149c
150c       Compute the scale factor for v_{n+1}
151        scv = SQRT(ga_ddot(g_work(3), g_work(3)))
152        l_n_plus1 = scpn * scv
153c
154c       The QMR code starts here.
155c       Multiply the new column by the previous omeags
156c       Get the next scaling factor omega(i) and update omegamax
157        res_n       = omega * l_n
158        omega       = ONE
159        res_n_plus1 = omega * l_n_plus1
160        omegamax    = MAX(omegamax, ONE/omega)
161c
162c       Apply the previous rotation
163        res_n_minus1 = sinn * res_n
164        res_n        = cosn * res_n
165c
166c       Compute and apply the rotation for the last element
167        call drotg(res_n, res_n_plus1, cosn, sinn)
168c
169c       Apply the new rotation to the right-hand side vector
170        rhs_n_plus1 = -sinn * rhs_n
171        rhs_n       =  cosn * rhs_n
172c
173c       Compute the next search direction s_i
174        dtmp = res_n_minus1 * scs / scpn
175c       g_work(:,5) = g_work(:,4) + -dtmp * g_work(:,5)
176        call ga_add(ONE, g_work(4), -dtmp, g_work(5), g_work(5))
177c
178c       Compute the new QMR iterate, then scale the search direction
179        scs  = scpn / res_n
180        dtmp = scs * rhs_n
181c       g_work(:,1) = g_work(:,1) + dtmp * g_work(:,5)
182        call ga_add(ONE, g_work(1), dtmp, g_work(5), g_work(1))
183        if((ABS(scs) >= max_tol) .or. (ABS(scs) <= min_tol)) then
184c         g_work(:,5) = scs * g_work(:,5)
185          call ga_scale(g_work(5), scs)
186          scs = ONE
187        end if
188c
189c       Compute the residual norm upper bound
190c       If the scaled upper bound is within one order of magnitude of
191c       the targer convergence norm, compute the true residual norm.
192        rhs_n = rhs_n_plus1
193        res_nrm_upper_bound =
194     $    SQRT(REAL(niter+1))*omegamax*ABS(rhs_n_plus1)/res_0
195        nrm_check = res_nrm_upper_bound
196        if((res_nrm_upper_bound/tol <= TEN).or.(niter >= maxiter)) then
197c         Multiply the current residual by the matrix
198           call matvec(rtdb, nlen, g_work(1), g_work(6), xyz, ldebug)
199c          g_work(:,6) = g_work(:,2) - g_work(:,6)
200           call ga_add(ONE, g_work(2), -ONE, g_work(6), g_work(6))
201           call ga_sync()
202           res_nrm = SQRT(ga_ddot(g_work(6), g_work(6))) / res_0
203           call ga_sync()
204           nrm_check = res_nrm
205           if(id.eq.0.and.ldebug) write(LuOut,*) "Res_nrm: ", res_nrm
206        else
207          if(id.eq.0.and.ldebug) then
208            write(LuOut,*) "Res_nrm_upper_bound:", res_nrm_upper_bound
209          end if
210        end if
211        time = util_timer() - time
212        if(id.eq.0.and.ldebug) then
213          write(LuOut,*) "Iteration", niter
214          write(LuOut,*) "Time (s):", time
215          call util_flush(LuOut)
216        end if
217c
218c       Check for convergece or termination.  Stop if:
219c         1. algorithm converged;
220c         2. there is an error condition;
221c         3. the residual norm upper bound is smaller than the computed
222c            residual norm by a factor of at least 100;
223c         4. algorithm exceeded the iterations limit
224        if(res_nrm <= tol) then
225          ierr = 0
226          exit
227        else if(ierr /= 0) then
228          exit
229        else if(res_nrm_upper_bound < nrm_check / HUNDRED) then
230          ierr = 3
231          exit
232        end if
233        call ga_sync()
234      end do
235c
236c     Put proper values into output variables
237      if (niter > maxiter) ierr = 3
238      tol = res_nrm
239      call ga_get(g_work(1), 1, nLen, 1, 1, unknowns, 1)
240      if(id.eq.0.and.ldebug) write(LuOut,*)
241     $   "End QMR Routine"
242      end subroutine qmr_real
243