1      subroutine qmr_complex(rtdb, nlen, g_work, niter, tol, ierr,
2     $                    unknowns, matvec, xyz, ldebug)
3c
4c     Fancy QMR algorithm.  Will document later.
5      implicit None
6#include "errquit.fh"
7#include "inp.fh"
8#include "rtdb.fh"
9#include "stdio.fh"
10#include "nwc_const.fh"
11#include "mafdecls.fh"
12#include "global.fh"
13#include "testutil.fh"
14#include "dimqm_constants.fh"
15c
16c     Input Variables
17      integer rtdb
18      integer nlen
19      integer g_work(7)
20      integer niter
21      double precision tol
22      integer ierr
23      integer g_unknowns
24      external matvec
25      double precision xyz(3,*)
26      double complex unknowns(nlen)
27      logical ldebug
28c
29c     Local variables
30      integer maxiter
31      complex*16 d_n, e_n, l_n
32      complex*16 sinn, scs
33      complex*16 rhs_n, rhs_n_plus1
34      complex*16  res_n, res_n_minus1, res_n_plus1
35      complex*16 u_n_minus1, dtmp
36      double precision l_n_plus1
37      double precision omegamax, omega, cosn, res_0
38      double precision res_nrm, res_nrm_upper_bound, nrm_check
39      double precision nrm_tol, min_tol, max_tol
40      double precision scv, scpn, tmp
41      integer i
42      integer id
43      double precision time
44c
45c     BLAS/LAPACK routines
46      external          dlamch
47      double precision  dlamch
48      double precision dnrm2
49      external dnrm2
50      external zrotg
51      call ga_sync()
52c
53c     Check for valid input
54      ierr = 0
55      if(nlen < 1 .or. niter < 1) then
56        ierr = 2
57        return
58      end if
59c
60c     Get node ID
61      id = ga_nodeid()
62      if(id.eq.0.and.ldebug) then
63        write(LuOut,*) "Start QMR complex routine"
64        call util_flush(LuOut)
65      end if
66c     Check tolerances of the machine
67c     Not sure if this is needed.
68      nrm_tol = dlamch('E') * TEN
69      min_tol = SQRT(SQRT(dlamch('S')))
70c      write(luout,*) "mintol:", min_tol
71c      min_tol = 1.0d-30
72      max_tol = ONE / min_tol
73      if (tol <= ZERO) tol = SQRT(dlamch('E'))
74      if(ldebug) then
75        if(id .eq. 0) write(LuOut,*) "Tol: ", tol
76      end if
77c
78c     Zero work arrays
79      do i = 1, 7
80        call ga_zero(g_work(i))
81      end do
82c
83c     Initalize by placing input field into work 2 and 3
84      call ga_put(g_work(2), 1, nLen, 1, 1, unknowns, 1)
85      call ga_copy(g_work(2), g_work(3))
86      call ga_sync()
87c
88c     Initial residual
89c      res_0 = dnrm2(nlen,unknowns,1)
90      call ga_conjg(g_work(3), g_work(7))
91      res_0 = SQRT(REAL(ga_zdot(g_work(7), g_work(3)), KIND=8))
92c
93c     If already converged, exit here
94      if((tol >= ONE) .or. (res_0 <= tol )) then
95        niter = 0
96        tol = res_0
97        return
98      end if
99c
100c     Initialize the variables
101      maxiter   = niter
102      scv       = res_0
103      e_n       = ONE_C
104      cosn      = ONE
105      res_nrm   = ONE
106      scpn      = ONE
107      scs       = ZERO_C
108      sinn      = ZERO_C
109      l_n_plus1 = ZERO
110      omega     = ONE
111      rhs_n     = omega * res_0
112      omegamax  = ONE / omega
113c
114c     Begin the algorithm
115      do niter = 1, maxiter
116        time = util_timer()
117c
118c       Check if E_n is nonsingular
119        if(ABS(e_n) == ZERO) then
120          ierr = 3
121          return
122        end if
123c
124c       Compute scale factor for the vector w_{n}
125c       Check for invariant subspaces, and scale the vectors if needed.
126        ierr = 0
127        if (scpn * scv < nrm_tol) then
128          ierr = 5 ! A-invarient subspace
129          return
130        end if
131c
132        d_n = ga_zdot(g_work(3), g_work(3))
133        d_n = d_n / (scv**2)
134        if((scv >= max_tol) .or. (scv <= min_tol)) then
135          dtmp = ONE_C / scv
136          call ga_scale(g_work(3), dtmp)
137          scv = ONE
138        end if
139        scv = ONE / scv
140c
141c       Build the vector p_n
142        u_n_minus1 = d_n * l_n_plus1 / e_n
143        dtmp       = u_n_minus1 * scpn / scv
144        call ga_add(ONE_C, g_work(3), -dtmp, g_work(4), g_work(4))
145        scpn = scv
146c
147c       Check if D_n is nonsingular
148        if(ABS(d_n) == ZERO) then
149          ierr = 4
150          return
151        end if
152c
153c       Multiply current residual by the matrix
154        call matvec(rtdb, nlen, g_work(4), g_work(6), xyz, ldebug)
155c
156c       Compute p_n^T A p_n
157        e_n = ga_zdot(g_work(4), g_work(6))
158        e_n = e_n * scpn**2
159c
160c       Build the vector v_{n+1}
161        l_n = e_n / d_n
162        call ga_add(ONE_C, g_work(6), -l_n, g_work(3), g_work(3))
163        call ga_sync()
164c
165c       Compute the scale factor for v_{n+1}
166        call ga_conjg(g_work(3), g_work(7))
167        scv = SQRT(REAL(ga_zdot(g_work(7), g_work(3)), KIND=8))
168        l_n_plus1 = scpn * scv
169c
170c       The QMR code starts here.
171c       Multiply the new column by the previous omeags
172c       Get the next scaling factor omega(i) and update omegamax
173        res_n       = omega * l_n
174        omega       = ONE
175        res_n_plus1 = omega * l_n_plus1
176        omegamax    = MAX(omegamax, ONE/omega)
177c
178c       Apply the previous rotation
179        res_n_minus1 = sinn * res_n
180        res_n        = cosn * res_n
181c
182c       Compute and apply the rotation for the last element
183        call zrotg(res_n, res_n_plus1, cosn, sinn)
184c
185c       Apply the new rotation to the right-hand side vector
186        rhs_n_plus1 = -CONJG(sinn) * rhs_n
187        rhs_n       =  cosn * rhs_n
188c
189c       Compute the next search direction s_i
190        dtmp = res_n_minus1 * scs / scpn
191c       g_work(:,5) = g_work(:,4) + -dtmp * g_work(:,5)
192        call ga_add(ONE_C, g_work(4), -dtmp, g_work(5), g_work(5))
193c
194c       Compute the new QMR iterate, then scale the search direction
195        scs  = scpn / res_n
196        dtmp = scs * rhs_n
197c       g_work(:,1) = g_work(:,1) + dtmp * g_work(:,5)
198        call ga_add(ONE_C, g_work(1), dtmp, g_work(5), g_work(1))
199        if((ABS(scs) >= max_tol) .or. (ABS(scs) <= min_tol)) then
200c         g_work(:,5) = scs * g_work(:,5)
201          call ga_scale(g_work(5), scs)
202          scs = ONE_C
203        end if
204c
205c       Compute the residual norm upper bound
206c       If the scaled upper bound is within one order of magnitude of
207c       the targer convergence norm, compute the true residual norm.
208        rhs_n = rhs_n_plus1
209        res_nrm_upper_bound =
210     $    SQRT(REAL(niter+1, KIND=8))*omegamax*ABS(rhs_n_plus1)/res_0
211        nrm_check = res_nrm_upper_bound
212        if((res_nrm_upper_bound/tol <= TEN).or.(niter >= maxiter)) then
213c         Multiply the current residual by the matrix
214           call matvec(rtdb, nlen, g_work(1), g_work(6), xyz, ldebug)
215           call ga_add(ONE_C, g_work(2), -ONE, g_work(6), g_work(6))
216           call ga_sync()
217           call ga_conjg(g_work(6), g_work(7))
218           res_nrm = SQRT(REAL(ga_zdot(g_work(7), g_work(6)),KIND=8))
219           res_nrm = res_nrm / res_0
220           call ga_sync()
221           nrm_check = res_nrm
222           if(id.eq.0.and.ldebug) write(LuOut,*) "Res_nrm: ", res_nrm
223        else
224          if(id.eq.0.and.ldebug) then
225            write(LuOut,*) "Res_nrm_upper_bound:", res_nrm_upper_bound
226          end if
227        end if
228        time = util_timer() - time
229        if(id.eq.0.and.ldebug) then
230          write(LuOut,*) "Iteration", niter
231          write(LuOut,*) "Time (s):", time
232          write(luout,*) ""
233          call util_flush(LuOut)
234        end if
235c
236c       Check for convergece or termination.  Stop if:
237c         1. algorithm converged;
238c         2. there is an error condition;
239c         3. the residual norm upper bound is smaller than the computed
240c            residual norm by a factor of at least 100;
241c         4. algorithm exceeded the iterations limit
242        if(res_nrm <= tol) then
243          ierr = 0
244          exit
245        else if(ierr /= 0) then
246          exit
247        else if(res_nrm_upper_bound < nrm_check / HUNDRED) then
248          ierr = 3
249          exit
250        end if
251        call ga_sync()
252      end do
253c
254c     Put proper values into output variables
255      if (niter > maxiter) ierr = 3
256      tol = res_nrm
257c      call ga_copy(g_work(1), g_unknowns)
258      call ga_get(g_work(1), 1, nLen, 1, 1, unknowns, 1)
259      if(id.eq.0.and.ldebug) write(LuOut,*)
260     $   "End QMR Routine"
261      end subroutine qmr_complex
262