1      subroutine seeded_qmr_complex(rtdb, nlen, g_work, niter, tol,
2     $                              ierr, unknowns, matvec, xyz,
3     $                              ldebug, dir)
4c
5c     Fancy QMR algorithm.  Will document later.
6      implicit None
7#include "errquit.fh"
8#include "inp.fh"
9#include "rtdb.fh"
10#include "stdio.fh"
11#include "nwc_const.fh"
12#include "mafdecls.fh"
13#include "global.fh"
14#include "testutil.fh"
15#include "dimqm_constants.fh"
16#include "tcgmsg.fh"
17#include "msgids.fh"
18c
19c     Input Variables
20      integer rtdb
21      integer nlen
22      integer g_work(7)
23      integer niter
24      double precision tol
25      integer ierr
26      integer g_unknowns
27      external matvec
28      double precision xyz(3,*)
29      double complex unknowns(nlen)
30      logical ldebug
31      character dir
32c
33c     Local variables
34      integer maxiter
35      complex*16 d_n, e_n, l_n
36      complex*16 sinn, scs
37      complex*16 rhs_n, rhs_n_plus1
38      complex*16  res_n, res_n_minus1, res_n_plus1
39      complex*16 u_n_minus1, dtmp
40      double precision l_n_plus1
41      double precision omegamax, omega, cosn, res_0
42      double precision res_nrm, res_nrm_upper_bound, nrm_check
43      double precision nrm_tol, min_tol, max_tol
44      double precision scv, scpn, tmp
45      integer i, j, k
46      integer id, ld, k_seed, l_seed
47      double precision time
48      complex*16 g_n
49      logical stat
50c
51c     BLAS/LAPACK routines
52      external          dlamch
53      double precision  dlamch
54      double precision dnrm2
55      external dnrm2
56      external zrotg
57      call ga_sync()
58c
59c     Check for valid input
60      ierr = 0
61      if(nlen < 1 .or. niter < 1) then
62        ierr = 2
63        return
64      end if
65c
66c     Get node ID
67      id = ga_nodeid()
68      if(id.eq.0.and.ldebug) then
69        write(LuOut,*) "Start non-seed QMR complex routine"
70        call util_flush(LuOut)
71      end if
72c     Check tolerances of the machine
73c     Not sure if this is needed.
74      nrm_tol = dlamch('E') * TEN
75      min_tol = SQRT(SQRT(dlamch('S')))
76c      write(luout,*) "mintol:", min_tol
77c      min_tol = 1.0d-30
78      max_tol = ONE / min_tol
79      if (tol <= ZERO) tol = SQRT(dlamch('E'))
80      if(ldebug) then
81        if(id .eq. 0) write(LuOut,*) "Tol: ", tol
82      end if
83c
84c     Zero work arrays
85      do i = 1, 7
86        call ga_zero(g_work(i))
87      end do
88      if(id.eq.0) then
89        if(.not.ma_push_get(mt_dcpl, nlen, 'seed:tmp', l_seed, k_seed))
90     $    call errquit('seeded temp malloc failed', 9, MA_ERR)
91      end if
92c
93c     Initalize by placing input field into work 2 and 3
94      call ga_put(g_work(2), 1, nLen, 1, 1, unknowns, 1)
95      call ga_copy(g_work(2), g_work(3))
96c     Save the inital residual vector in work(6)
97      call ga_copy(g_work(3), g_work(6))
98      call ga_sync()
99c
100c     Initial residual
101c      res_0 = dnrm2(nlen,unknowns,1)
102      call ga_conjg(g_work(3), g_work(7))
103      res_0 = SQRT(REAL(ga_zdot(g_work(7), g_work(3)), KIND=8))
104      if (ldebug) write(luout,*) "res0:", res_0
105c
106c     If already converged, exit here
107      if((tol >= ONE) .or. (res_0 <= tol )) then
108        niter = 0
109        tol = res_0
110        if(id.eq.0) then
111          stat = ma_pop_stack(l_seed)
112        end if
113        return
114      end if
115c     Pull in how many seed vectors we have
116      if(.not.rtdb_get(rtdb,'seed:k'//dir//'c', mt_int, 1, k))
117     $      k = 0
118c     $      call errquit('get seed:k failed', 1, RTDB_ERR)
119c
120c     Now project the seed space onto the iterate and residual
121      rhs_n = res_0
122      if(id.eq.0.and.ldebug) write(luout,*) "Projecting", k, "steps"
123      do j = 1, k
124c       Pull in scalars
125        if(id.eq.0) then
126          stat = rtdb_parallel(.false.)
127          if(.not.rtdb_get(rtdb,'seed:sin'//CHAR(j)//dir//'c',
128     $                   mt_dcpl, 1, sinn))
129     $      call errquit('get seed sinn failed', j, RTDB_ERR)
130          if(.not.rtdb_get(rtdb,'seed:cos'//CHAR(j)//dir//'c',
131     $                   mt_dbl, 1, cosn))
132     $      call errquit('get seed cosn failed', j, RTDB_ERR)
133          if(.not.rtdb_get(rtdb,'seed:scp'//CHAR(j)//dir//'c',
134     $                   mt_dbl, 1, scpn))
135     $      call errquit('get seed scpn failed', j, RTDB_ERR)
136          if(.not.rtdb_get(rtdb,'seed:scs'//CHAR(j)//dir//'c',
137     $                   mt_dcpl, 1, scs))
138     $      call errquit('get seed scs failed', j, RTDB_ERR)
139c       Pull in arrays
140c        call ga_access(g_work(4), 1, nlen, 1, 1, k_seed, ld)
141          if(.not.rtdb_get(rtdb,'seed:v'//CHAR(j)//dir//'c',
142     $                   mt_dcpl, nlen, dcpl_mb(k_seed)))
143     $      call errquit('get seed work(3) failed', j, RTDB_ERR)
144          call ga_put(g_work(4), 1, nlen, 1, 1, dcpl_mb(k_seed), 1)
145c        call ga_release_update(g_work(4), 1, nlen, 1, 1)
146c        call ga_access(g_work(5), 1, nlen, 1, 1, k_seed, ld)
147          if(.not.rtdb_get(rtdb,'seed:s'//CHAR(j)//dir//'c',
148     $                   mt_dcpl, nlen, dcpl_mb(k_seed)))
149     $      call errquit('get seed s failed', j, RTDB_ERR)
150          call ga_put(g_work(5), 1, nlen, 1, 1, dcpl_mb(k_seed), 1)
151c        call ga_release_update(g_work(5), 1, nlen, 1, 1)
152          stat = rtdb_parallel(.true.)
153        end if
154        call ga_brdcst(msg_dimqm, sinn, mdtob(2), 0)
155        call ga_brdcst(msg_dimqm, cosn, mdtob(1), 0)
156        call ga_brdcst(msg_dimqm, scpn, mdtob(1), 0)
157        call ga_brdcst(msg_dimqm, scs,  mdtob(2), 0)
158        call ga_sync()
159        g_n = ga_zdot(g_work(6), g_work(4)) * scpn
160        rhs_n_plus1 = -CONJG(sinn) * rhs_n + cosn * g_n
161        rhs_n       =  cosn * rhs_n + sinn * g_n
162        dtmp = scs * rhs_n
163        call ga_add(ONE_C, g_work(1), dtmp, g_work(5), g_work(1))
164        rhs_n = rhs_n_plus1
165      end do
166      call matvec(rtdb, nlen, g_work(1), g_work(3), xyz, ldebug)
167      call ga_add(ONE_C, g_work(2), -ONE_C, g_work(3), g_work(3))
168      call ga_conjg(g_work(3), g_work(7))
169      dtmp = SQRT(REAL(ga_zdot(g_work(7), g_work(3)), KIND=8))
170c
171c     Initialize the variables
172      maxiter   = niter
173      scv       = REAL(dtmp)
174      e_n       = ONE_C
175      cosn      = ONE
176      res_nrm   = REAL(dtmp)/res_0
177      scpn      = ONE
178      scs       = ZERO_C
179      sinn      = ZERO_C
180      l_n_plus1 = ZERO
181      omega     = ONE
182      rhs_n     = omega * dtmp
183      omegamax  = ONE / omega
184c
185c     Re-zero work arrays used in seeding
186      if(id.eq.0 .and. ldebug)
187     $   write(luout,*) "Projected res_0:", REAL(dtmp)
188      call ga_zero(g_work(4))
189      call ga_zero(g_work(5))
190c
191c     Begin the algorithm
192      do niter = 1, maxiter
193        time = util_timer()
194c
195c       Check if E_n is nonsingular
196        if(ABS(e_n) == ZERO) then
197          ierr = 3
198          return
199        end if
200c
201c       Compute scale factor for the vector w_{n}
202c       Check for invariant subspaces, and scale the vectors if needed.
203        ierr = 0
204        if (scpn * scv < nrm_tol) then
205          ierr = 5 ! A-invarient subspace
206          return
207        end if
208c
209        d_n = ga_zdot(g_work(3), g_work(3))
210        d_n = d_n / (scv**2)
211        if((scv >= max_tol) .or. (scv <= min_tol)) then
212          dtmp = ONE_C / scv
213          call ga_scale(g_work(3), dtmp)
214          scv = ONE
215        end if
216        scv = ONE / scv
217c
218c       Build the vector p_n
219        u_n_minus1 = d_n * l_n_plus1 / e_n
220        dtmp       = u_n_minus1 * scpn / scv
221        call ga_add(ONE_C, g_work(3), -dtmp, g_work(4), g_work(4))
222        scpn = scv
223c
224c       Check if D_n is nonsingular
225        if(ABS(d_n) == ZERO) then
226          ierr = 4
227          return
228        end if
229c
230c       Multiply current residual by the matrix
231        call matvec(rtdb, nlen, g_work(4), g_work(6), xyz, ldebug)
232c
233c       Compute p_n^T A p_n
234        e_n = ga_zdot(g_work(4), g_work(6))
235        e_n = e_n * scpn**2
236c
237c       Build the vector v_{n+1}
238        l_n = e_n / d_n
239        call ga_add(ONE_C, g_work(6), -l_n, g_work(3), g_work(3))
240        call ga_sync()
241c
242c       Compute the scale factor for v_{n+1}
243        call ga_conjg(g_work(3), g_work(7))
244        scv = SQRT(REAL(ga_zdot(g_work(7), g_work(3)), KIND=8))
245        l_n_plus1 = scpn * scv
246c
247c       The QMR code starts here.
248c       Multiply the new column by the previous omeags
249c       Get the next scaling factor omega(i) and update omegamax
250        res_n       = omega * l_n
251        omega       = ONE
252        res_n_plus1 = omega * l_n_plus1
253        omegamax    = MAX(omegamax, ONE/omega)
254c
255c       Apply the previous rotation
256        res_n_minus1 = sinn * res_n
257        res_n        = cosn * res_n
258c
259c       Compute and apply the rotation for the last element
260        call zrotg(res_n, res_n_plus1, cosn, sinn)
261c
262c       Apply the new rotation to the right-hand side vector
263        rhs_n_plus1 = -CONJG(sinn) * rhs_n
264        rhs_n       =  cosn * rhs_n
265c
266c       Compute the next search direction s_i
267        dtmp = res_n_minus1 * scs / scpn
268c       g_work(:,5) = g_work(:,4) + -dtmp * g_work(:,5)
269        call ga_add(ONE_C, g_work(4), -dtmp, g_work(5), g_work(5))
270c
271c       Compute the new QMR iterate, then scale the search direction
272        scs  = scpn / res_n
273        dtmp = scs * rhs_n
274c       g_work(:,1) = g_work(:,1) + dtmp * g_work(:,5)
275        call ga_add(ONE_C, g_work(1), dtmp, g_work(5), g_work(1))
276        if((ABS(scs) >= max_tol) .or. (ABS(scs) <= min_tol)) then
277c         g_work(:,5) = scs * g_work(:,5)
278          call ga_scale(g_work(5), scs)
279          scs = ONE_C
280        end if
281c
282c       Compute the residual norm upper bound
283c       If the scaled upper bound is within one order of magnitude of
284c       the targer convergence norm, compute the true residual norm.
285        rhs_n = rhs_n_plus1
286        res_nrm_upper_bound =
287     $    SQRT(REAL(niter+1, KIND=8))*omegamax*ABS(rhs_n_plus1)/res_0
288        nrm_check = res_nrm_upper_bound
289        if((res_nrm_upper_bound/tol <= TEN).or.(niter >= maxiter)) then
290c         Multiply the current residual by the matrix
291           call matvec(rtdb, nlen, g_work(1), g_work(6), xyz, ldebug)
292           call ga_add(ONE_C, g_work(2), -ONE, g_work(6), g_work(6))
293           call ga_sync()
294           call ga_conjg(g_work(6), g_work(7))
295           res_nrm = SQRT(REAL(ga_zdot(g_work(7), g_work(6)),KIND=8))
296           res_nrm = res_nrm / res_0
297           call ga_sync()
298           nrm_check = res_nrm
299           if(id.eq.0.and.ldebug) write(LuOut,*) "Res_nrm: ", res_nrm
300        else
301          if(id.eq.0.and.ldebug) then
302            write(LuOut,*) "Res_nrm_upper_bound:", res_nrm_upper_bound
303          end if
304        end if
305        time = util_timer() - time
306        if(id.eq.0.and.ldebug) then
307          write(LuOut,*) "Iteration", niter
308          write(LuOut,*) "Time (s):", time
309          write(luout,*) ""
310          call util_flush(LuOut)
311        end if
312c
313c       Check for convergece or termination.  Stop if:
314c         1. algorithm converged;
315c         2. there is an error condition;
316c         3. the residual norm upper bound is smaller than the computed
317c            residual norm by a factor of at least 100;
318c         4. algorithm exceeded the iterations limit
319        if(res_nrm <= tol) then
320          ierr = 0
321          exit
322        else if(ierr /= 0) then
323          exit
324        else if(res_nrm_upper_bound < nrm_check / HUNDRED) then
325          ierr = 3
326          exit
327        end if
328        call ga_sync()
329      end do
330c
331c     Put proper values into output variables
332      if (niter > maxiter) ierr = 3
333      tol = res_nrm
334      call ga_get(g_work(1), 1, nLen, 1, 1, unknowns, 1)
335      if(id.eq.0) then
336        stat = ma_pop_stack(l_seed)
337      end if
338      if(id.eq.0.and.ldebug) write(LuOut,*)
339     $   "End non-seed QMR Complex Routine"
340      end subroutine seeded_qmr_complex
341