1      subroutine qmr_seed_real(rtdb, nlen, g_work, niter, tol, ierr,
2     $                    unknowns, matvec, xyz, ldebug, dir)
3c
4c     Fancy QMR algorithm.  Will document later.
5c      use constants
6      implicit None
7#include "errquit.fh"
8#include "inp.fh"
9#include "rtdb.fh"
10#include "stdio.fh"
11#include "nwc_const.fh"
12#include "mafdecls.fh"
13#include "global.fh"
14#include "testutil.fh"
15#include "dimqm_constants.fh"
16c
17c     Input Variables
18      integer rtdb
19      integer nlen
20      integer g_work(6)
21      integer niter
22      double precision tol
23      integer ierr
24      integer g_unknowns
25      external matvec
26      double precision xyz(3,*)
27      double precision unknowns(nlen)
28      logical ldebug
29      character dir
30c
31c     Local variables
32      integer maxiter
33      double precision d_n, e_n
34      double precision sinn, cosn
35      double precision rhs_n, rhs_n_plus1
36      double precision l_n, l_n_plus1
37      double precision res_0, res_n, res_n_minus1, res_n_plus1
38      double precision omegamax, omega
39      double precision res_nrm, res_nrm_upper_bound, nrm_check
40      double precision nrm_tol, min_tol, max_tol
41      double precision scv, scpn, scs
42      double precision u_n_minus1, dtmp
43      integer i
44      integer id, ld, k_seed, l_seed
45      integer ilo, ihi, jlo, jhi
46      double precision time
47      logical stat
48c
49c     BLAS/LAPACK routines
50      external          dlamch
51      double precision  dlamch
52      double precision dnrm2
53      external dnrm2
54      external drotg
55      call ga_sync()
56c
57c     Get node ID
58      id = ga_nodeid()
59      if(id.eq.0.and.ldebug) then
60        write(LuOut,*) "Start seed QMR routine"
61        call util_flush(LuOut)
62      end if
63c     Check tolerances of the machine
64      nrm_tol = dlamch('E') * TEN
65      min_tol = SQRT(SQRT(dlamch('S')))
66      max_tol = ONE / min_tol
67      if (tol <= ZERO) tol = SQRT(dlamch('E'))
68      if(ldebug) then
69        if(id .eq. 0) write(LuOut,*) "Tol: ", tol
70      end if
71c
72c     Zero work arrays
73      do i = 1, 6
74        call ga_zero(g_work(i))
75      end do
76      if(id.eq.0) then
77        if(.not.ma_push_get(mt_dbl, nlen, 'qmr:seedtemp', l_seed,
78     $                      k_seed))
79     $      call errquit('qmr real malloc k_seed failed', 1, MA_ERR)
80        call dfill(nlen, ZERO, dbl_mb(k_seed), 1)
81      end if
82
83c
84c     Initalize by copying input field to work 2 and 3
85      call ga_put(g_work(2), 1, nLen, 1, 1, unknowns, 1)
86      call ga_copy(g_work(2), g_work(3))
87      call ga_sync()
88c
89c     Initial residual
90c     res_0 = dnrm2(nlen,unknowns,1)
91      res_0 = SQRT(ga_ddot(g_work(3), g_work(3)))
92c
93c     If already converged, exit here
94      if((tol >= ONE) .or. (res_0 <= tol)) then
95        niter = 0
96        tol = res_0
97        if(.not.rtdb_put(rtdb,'seed:k'//dir, mt_int, 1, niter))
98     $    call errquit('put seed:k failed', 1, RTDB_ERR)
99        if(id.eq.0) then
100          stat = ma_pop_stack(l_seed)
101        end if
102        return
103      end if
104c
105c     Initialize the variables
106      maxiter   = niter
107      scv       = res_0
108      e_n       = ONE
109      cosn      = ONE
110      res_nrm   = ONE
111      scpn      = ONE
112      scs       = ZERO
113      sinn      = ZERO
114      l_n_plus1 = ZERO
115      omega     = ONE
116      rhs_n     = omega * res_0
117      omegamax  = ONE / omega
118c
119c     Begin the algorithm
120      do niter = 1, maxiter
121        time = util_timer()
122c
123c       Check if E_n is nonsingular
124        if(e_n == ZERO) then
125          ierr = 4
126          return
127        end if
128c
129c       Compute scale factor for the vector w_{n}
130c       Check for invariant subspaces, and scale the vectors if needed.
131        ierr = 0
132        if (scpn * scv < nrm_tol) ierr = 5
133        if (ierr /= 0) return ! A-invarient subspace
134c
135        d_n = ga_ddot(g_work(3), g_work(3)) / (scv**2)
136        if((scv >= max_tol) .or. (scv <= min_tol)) then
137          dtmp = ONE / scv
138          call ga_scale(g_work(3), dtmp)
139          scv = ONE
140        end if
141        scv = ONE / scv
142c
143c       Build the vector p_n
144        u_n_minus1 = d_n * l_n_plus1 / e_n
145        dtmp       = u_n_minus1 * scpn / scv
146        call ga_add(ONE, g_work(3), -dtmp, g_work(4), g_work(4))
147        scpn = scv
148c
149c       Check if D_n is nonsingular
150        if(d_n == ZERO) then
151          ierr = 4
152          return
153        end if
154c
155c       Multiply current residual by the matrix
156        call matvec(rtdb, nlen, g_work(4), g_work(6), xyz, ldebug)
157c
158c       Compute p_n^T A p_n
159        e_n = ga_ddot(g_work(4), g_work(6)) * scpn**2
160c
161c       Build the vector v_{n+1}
162        l_n = e_n / d_n
163        call ga_add(ONE, g_work(6), -l_n, g_work(3), g_work(3))
164c
165c       Compute the scale factor for v_{n+1}
166        scv = SQRT(ga_ddot(g_work(3), g_work(3)))
167        l_n_plus1 = scpn * scv
168c
169c       The QMR code starts here.
170c       Multiply the new column by the previous omeags
171c       Get the next scaling factor omega(i) and update omegamax
172        res_n       = omega * l_n
173        omega       = ONE
174        res_n_plus1 = omega * l_n_plus1
175        omegamax    = MAX(omegamax, ONE/omega)
176c
177c       Apply the previous rotation
178        res_n_minus1 = sinn * res_n
179        res_n        = cosn * res_n
180c
181c       Compute and apply the rotation for the last element
182        call drotg(res_n, res_n_plus1, cosn, sinn)
183c       Save rotations and vector for seeding
184        if(id.eq.0) then
185c          write(luout,*) "Saving data"
186          stat = rtdb_parallel(.false.)
187          if(.not.rtdb_put(rtdb,'seed:sin'//CHAR(niter)//dir,
188     $                     mt_dbl, 1,sinn))
189     $      call errquit('put sinn failed', niter, RTDB_ERR)
190          if(.not.rtdb_put(rtdb,'seed:cos'//CHAR(niter)//dir,
191     $                     mt_dbl, 1,cosn))
192     $      call errquit('put cosn failed', niter, RTDB_ERR)
193          if(.not.rtdb_put(rtdb,'seed:scp'//CHAR(niter)//dir,
194     $                     mt_dbl, 1,scpn))
195     $      call errquit('put scp failed', niter, RTDB_ERR)
196          call ga_get(g_work(3), 1, nlen, 1, 1, dbl_mb(k_seed), 1)
197          if(.not.rtdb_put(rtdb,'seed:v'//CHAR(niter)//dir,
198     $                     mt_dbl, nlen, dbl_mb(k_seed)))
199     $      call errquit('put v failed', niter, RTDB_ERR)
200c        call ga_distribution(g_work(3), id, ilo, ihi, jlo, jhi)
201c        call ga_access(g_work(3), ilo, ihi, jlo, jhi, k_seed, ld)
202c        if(.not.rtdb_put(rtdb,'seed:v'//CHAR(niter)//dir,
203c     $                   mt_dbl, nlen, dbl_mb(k_seed)))
204c     $      call errquit('put v failed', niter, RTDB_ERR)
205c        call ga_release(g_work(3), ilo, ihi, jlo, jhi)
206          stat = rtdb_parallel(.true.)
207c          write(luout,*) "Data saved"
208        end if
209        call ga_sync()
210c
211c       Apply the new rotation to the right-hand side vector
212        rhs_n_plus1 = -sinn * rhs_n
213        rhs_n       =  cosn * rhs_n
214c
215c       Compute the next search direction s_i
216        dtmp = res_n_minus1 * scs / scpn
217c       g_work(:,5) = g_work(:,4) + -dtmp * g_work(:,5)
218        call ga_add(ONE, g_work(4), -dtmp, g_work(5), g_work(5))
219c
220c       Compute the new QMR iterate, then scale the search direction
221        scs  = scpn / res_n
222        dtmp = scs * rhs_n
223c
224c       Save search direction for seeding
225        if(id.eq.0) then
226c          write(luout,*) "Saving data 2"
227          stat = rtdb_parallel(.false.)
228          if(.not.rtdb_put(rtdb,'seed:scs'//CHAR(niter)//dir,
229     $                   mt_dbl, 1, scs))
230     $      call errquit('put scs failed', niter, RTDB_ERR)
231
232          call ga_get(g_work(5), 1, nlen, 1, 1, dbl_mb(k_seed), 1)
233          if(.not.rtdb_put(rtdb,'seed:s'//CHAR(niter)//dir,
234     $                     mt_dbl, nlen, dbl_mb(k_seed)))
235     $      call errquit('put s failed', niter, RTDB_ERR)
236c          write(luout,*) "Data saved 2"
237c          call util_flush(luout)
238          stat = rtdb_parallel(.true.)
239        end if
240        call ga_sync()
241c        call ga_access(g_work(5), 1, nlen, 1, 1, k_seed, ld)
242c        if(.not.rtdb_put(rtdb,'seed:s'//CHAR(niter)//dir,
243c     $                   mt_dbl, nlen, dbl_mb(k_seed)))
244c     $      call errquit('put s failed', niter, RTDB_ERR)
245c        call ga_release(g_work(5), 1, nlen, 1, 1)
246c
247c       g_work(:,1) = g_work(:,1) + dtmp * g_work(:,5)
248        call ga_add(ONE, g_work(1), dtmp, g_work(5), g_work(1))
249        if((ABS(scs) >= max_tol) .or. (ABS(scs) <= min_tol)) then
250c         g_work(:,5) = scs * g_work(:,5)
251          call ga_scale(g_work(5), scs)
252          scs = ONE
253        end if
254c        write(luout,*) "updated iterate"
255c
256c       Compute the residual norm upper bound
257c       If the scaled upper bound is within one order of magnitude of
258c       the targer convergence norm, compute the true residual norm.
259        rhs_n = rhs_n_plus1
260        res_nrm_upper_bound =
261     $    SQRT(REAL(niter+1))*omegamax*ABS(rhs_n_plus1)/res_0
262        nrm_check = res_nrm_upper_bound
263        if((res_nrm_upper_bound/tol <= TEN).or.(niter >= maxiter)) then
264c         Multiply the current residual by the matrix
265           call matvec(rtdb, nlen, g_work(1), g_work(6), xyz, ldebug)
266c          g_work(:,6) = g_work(:,2) - g_work(:,6)
267           call ga_add(ONE, g_work(2), -ONE, g_work(6), g_work(6))
268           call ga_sync()
269           res_nrm = SQRT(ga_ddot(g_work(6), g_work(6))) / res_0
270           call ga_sync()
271           nrm_check = res_nrm
272           if(ldebug) write(LuOut,*) "Res_nrm: ", res_nrm
273        else
274          if(ldebug) then
275            write(LuOut,*) "Res_nrm_upper_bound:", res_nrm_upper_bound
276          end if
277        end if
278        time = util_timer() - time
279        if(ldebug) then
280          write(LuOut,*) "Iteration", niter
281          write(LuOut,*) "Time (s):", time
282          call util_flush(LuOut)
283        end if
284c
285c       Check for convergece or termination.  Stop if:
286c         1. algorithm converged;
287c         2. there is an error condition;
288c         3. the residual norm upper bound is smaller than the computed
289c            residual norm by a factor of at least 100;
290c         4. algorithm exceeded the iterations limit
291        if(res_nrm <= tol) then
292          ierr = 0
293          exit
294        else if(ierr /= 0) then
295          exit
296        else if(res_nrm_upper_bound < nrm_check / HUNDRED) then
297          ierr = 3
298          exit
299        end if
300        call ga_sync()
301      end do
302c
303c     Put proper values into output variables
304      if (niter > maxiter) ierr = 3
305      tol = res_nrm
306      call ga_get(g_work(1), 1, nLen, 1, 1, unknowns, 1)
307c
308c     Save number of iterations for projection
309      if(.not.rtdb_put(rtdb,'seed:k'//dir, mt_int, 1, niter))
310     $  call errquit('put seed:k failed', 1, RTDB_ERR)
311c
312c     Destroy memory
313      if(id.eq.0) then
314        stat = ma_pop_stack(l_seed)
315      end if
316      call ga_sync()
317      if(id.eq.0.and.ldebug) write(LuOut,*)
318     $   "End seed QMR Routine"
319      end subroutine qmr_seed_real
320