1c $Id$
2C>
3C> \defgroup kain Krylov subspace Accelerated Inexact Newton method (KAIN)
4C>
5C> \ingroup kain
6C> @{
7C>
8C> \file ga_it2.F
9C> \brief Implementation of Krylov-subspace nonlinear equation solver
10C>
11C> This files provides an implementation of the KAIN method to solve
12C> linear and nonlinear systems of equations [1]. The solvers
13C> `ga_kain` and `ga_lkain` are general purpose routines. They require
14C> a routine to calculate a matrix-vector product and a routine
15C> to apply a preconditioner as arguments.
16C>
17C> [1] R.J. Harrison,
18C>     <i>"Krylov Subspace Accelerated Inexact Newton Method for Linear
19C>     and Nonlinear Systems",</i>
20C>     Journal of Computational Chemistry (2004) <b>25</b>, pp 328-334,
21C>     DOI: <a href="https://doi.org/10.1002/jcc.10108">10.1002/jcc.10108</a>
22C>
23C> @}
24c
25C> \ingroup kain
26C> @{
27C>
28C> \brief A trivial matrix-vector product routine
29C>
30C> This routine is simply there to test the code for system solvers.
31C> The matrix-vector product it calculates is simply a product involving
32C> an explicitly stored matrix. This matrix is passed through a common
33C> block.
34C>
35      subroutine test_product(acc,g_x, g_Ax)
36      implicit none
37#include "errquit.fh"
38      double precision acc !< [Input] Accuracy (not used here)
39      integer g_x  !< [Input] Global array with vector
40      integer g_Ax !< [Output] Global array with matrix-vector product
41c
42      integer g_a, g_b
43      common /testme/ g_A, g_B
44      integer n, nvec, typex, typeax
45      double complex one, zero
46c
47      one  = cmplx(1.0d0,0.0d0)
48      zero = cmplx(0.0d0,0.0d0)
49c
50      call ga_inquire(g_Ax, typeax, n, nvec)
51      call ga_inquire(g_x, typex, n, nvec)
52      if (typex.ne.typeax)
53     $  call errquit("test_product: g_x not same type as g_Ax",
54     $               typex,UERR)
55      call ga_zero(g_Ax)
56      call ga_dgemm('n', 'n', n, nvec, n, one, g_A, g_x, zero, g_Ax)
57c
58      end
59C>
60C> \brief A trivial non-linear matrix-vector product routine
61C>
62C> This routine is simply there to test the code for system solvers.
63C> The matrix-vector product it calculates is simply a product involving
64C> an explicitly stored matrix and it adds another vector to achieve
65C> something non-linear. The additional matrix and vector are passed
66C> through a common block.
67C>
68      subroutine test_nlproduct(acc,g_x, g_Ax)
69      implicit none
70#include "errquit.fh"
71      double precision acc
72      integer g_x, g_Ax
73c
74      integer g_a, g_b
75      common /testme/ g_A, g_B
76      integer n, nvec, typex, typeax, typeb
77      double complex mone, one, zero
78c
79      mone = cmplx(-1.0d0,0.0d0)
80      one  = cmplx(1.0d0,0.0d0)
81      zero = cmplx(0.0d0,0.0d0)
82c
83      call ga_inquire(g_b, typeb, n, nvec)
84      call ga_inquire(g_Ax, typeax, n, nvec)
85      call ga_inquire(g_x, typex, n, nvec)
86      if (typex.ne.typeax)
87     $  call errquit("test_nlproduct: g_x not same type as g_Ax",
88     $               typex,UERR)
89      if (typex.ne.typeb)
90     $  call errquit("test_nlproduct: g_x not same type as g_b",
91     $               typex,UERR)
92      call ga_zero(g_Ax)
93      call ga_dgemm('n', 'n', n, nvec, n, one, g_A, g_x, zero, g_Ax)
94      call ga_add(one, g_AX, mone, g_B, g_AX)
95c
96      end
97C>
98C> \brief A trivial preconditioner
99C>
100C> This routine is simply there to test the code for system solvers.
101C> It simply scales the elements of a vector by
102C> \f{eqnarray*}{
103C>    x_i = \frac{x_i}{i - \epsilon}
104C> \f}
105C> where \f$ \epsilon \f$ is the shift.
106C>
107      subroutine test_precond(g_x,shift)
108      implicit none
109#include "mafdecls.fh"
110#include "errquit.fh"
111#include "global.fh"
112      integer g_x
113      double precision shift
114      integer n, nvec, type
115      integer i, ivec
116      double precision x
117      double complex zx
118c
119      call ga_inquire(g_x, type, n, nvec)
120      if (ga_nodeid() .eq. 0) then
121         if (type.eq.MT_DBL) then
122            do ivec = 1, nvec
123               do i = 1, n
124                  call ga_get(g_x, i, i, ivec, ivec, x, 1)
125                  x = x / (dble(i) - shift)
126                  call ga_put(g_x, i, i, ivec, ivec, x, 1)
127               end do
128            end do
129         else if (type.eq.MT_DCPL) then
130            do ivec = 1, nvec
131               do i = 1, n
132                  call ga_get(g_x, i, i, ivec, ivec, zx, 1)
133                  zx = zx / (dble(i) - shift)
134                  call ga_put(g_x, i, i, ivec, ivec, zx, 1)
135               end do
136            end do
137         else
138            call errquit('test_precond: illegal type',type,UERR)
139         endif
140      end if
141      call ga_sync()
142c
143      end
144C>
145C> \brief test driver for ga_lkain
146C>
147C> This routine drives a standard test of the ga_lkain
148C> and the ga_kain solver.
149C> Because the test is fixed the subroutine takes no inputs
150C> and it prints a summary on standard output.
151C>
152      subroutine ga_lkain_test()
153      implicit none
154#include "errquit.fh"
155#include "mafdecls.fh"
156#include "global.fh"
157      integer n, nvec, maxsub, maxiter
158c     parameter (n=300, nvec=1)
159      parameter (n=10, nvec=1)
160      double complex angle
161      integer g_x
162      double precision util_random
163      external test_precond, test_product, test_nlproduct, util_random
164c
165      integer g_tx, g_ta, g_tb
166      integer g_a, g_b
167      common /testme/ g_a, g_b
168c
169      integer i
170      logical  ga_copy_dz
171      external ga_copy_dz
172****      integer info
173****      double precision a(n,n), b(n), w(n)
174c
175      maxsub = 4*nvec
176      maxiter = 100
177c
178      if (.not. ga_create(MT_DBL, n, nvec, 'testx', 0, 0, g_x))
179     $     call errquit('test kain', 1, GA_ERR)
180      if (.not. ga_create(MT_DBL, n, nvec, 'testb', 0, 0, g_b))
181     $     call errquit('test kain', 2, GA_ERR)
182      if (.not. ga_create(MT_DBL, n, n, 'testA', 0, 0, g_A))
183     $     call errquit('test kain', 3, GA_ERR)
184c
185      call ga_ran_fill(g_A, 1, n, 1, n)
186      call ga_ran_fill(g_b, 1, n, 1, nvec)
187      if (ga_nodeid() .eq. 0) then
188         do i = 1, n
189            call ga_put(g_a, i, i, i, i, 0.5*dble(i), 1)
190         end do
191      end if
192      call ga_sync()
193c
194      call ga_copy(g_b, g_x)
195      call test_precond(g_x,0.0d0)
196      call util_flush(6)
197      call ga_print(g_b)
198      call ga_print(g_x)
199      call util_flush(6)
200c
201****      call ga_get(g_a, 1, n, 1, n, a, n)
202****      call ga_get(g_b, 1, n, 1, nvec, b, n)
203****      call dgesv(n, nvec, a, n, w, b, n, info)
204****      write(6,*) ' info ', info
205****      call ga_put(g_x, 1, n, 1, nvec, b, n)
206c
207C     This should have something other than zero
208      call ga_lkain(0,g_x, g_b,test_product,test_precond,1d-6,maxsub,
209     $     maxiter,.true.,.true.)
210      call util_flush(6)
211      call ga_print(g_x)
212      call util_flush(6)
213      call ga_summarize(0)
214      call ma_summarize_allocated_blocks()
215c
216      write(6,*)
217      write(6,*)
218      write(6,*)
219      write(6,*) ' DOING NON LINEAR '
220      write(6,*)
221      write(6,*)
222      write(6,*)
223      call ga_copy(g_b, g_x)
224      call test_precond(g_x,0.0d0)
225      maxsub = 10
226c
227      call ga_kain(g_x,
228     $     test_nlproduct, test_precond,
229     $     1d-6,
230     $     10.0d0, 10.0d0,
231     $     maxsub, maxiter,
232     $     .true.)
233c
234      call util_flush(6)
235      call ga_print(g_x)
236      call util_flush(6)
237      call ga_summarize(0)
238      call ma_summarize_allocated_blocks()
239c
240c     Same but now COMPLEX
241c
242      g_tx = g_x
243      g_tb = g_b
244      g_tA = g_A
245      maxsub = 4*nvec
246      maxiter = 100
247      if (.not. ga_create(MT_DCPL, n, nvec, 'testx', 0, 0, g_x))
248     $     call errquit('test kain', 1, GA_ERR)
249      if (.not. ga_create(MT_DCPL, n, nvec, 'testb', 0, 0, g_b))
250     $     call errquit('test kain', 2, GA_ERR)
251      if (.not. ga_create(MT_DCPL, n, n, 'testA', 0, 0, g_A))
252     $     call errquit('test kain', 3, GA_ERR)
253c
254c     call ga_ran_fill(g_A, 1, n, 1, n)
255c     call ga_ran_fill(g_b, 1, n, 1, nvec)
256c     if (ga_nodeid() .eq. 0) then
257c        do i = 1, n
258c           call ga_put(g_a, i, i, i, i, 0.5*cmplx(dble(i)), 1)
259c        end do
260c     end if
261      if (.not. ga_copy_dz(g_tb,g_b))
262     +  call errquit('test kain: copy g_b failed',0,GA_ERR)
263      if (.not. ga_copy_dz(g_tA,g_A))
264     +  call errquit('test kain: copy g_A failed',0,GA_ERR)
265      call ga_sync()
266      angle = cmplx(1.0d0,util_random(0))
267      angle = angle/abs(angle)
268      call ga_scale(g_b,angle)
269      angle = cmplx(1.0d0,util_random(0))
270      angle = angle/abs(angle)
271      call ga_scale(g_A,angle)
272c
273      if (.not. ga_destroy(g_tx))
274     $     call errquit('test kain', 1, GA_ERR)
275      if (.not. ga_destroy(g_tb))
276     $     call errquit('test kain', 2, GA_ERR)
277      if (.not. ga_destroy(g_tA))
278     $     call errquit('test kain', 3, GA_ERR)
279c
280      call ga_copy(g_b, g_x)
281      call test_precond(g_x,0.0d0)
282      call util_flush(6)
283      call ga_print(g_b)
284      call ga_print(g_x)
285      call util_flush(6)
286c
287****      call ga_get(g_a, 1, n, 1, n, a, n)
288****      call ga_get(g_b, 1, n, 1, nvec, b, n)
289****      call dgesv(n, nvec, a, n, w, b, n, info)
290****      write(6,*) ' info ', info
291****      call ga_put(g_x, 1, n, 1, nvec, b, n)
292c
293C     This should have something other than zero
294      call ga_lkain(0,g_x, g_b,test_product,test_precond,1d-6,maxsub,
295     $     maxiter,.true.,.true.)
296      call util_flush(6)
297      call ga_print(g_x)
298      call util_flush(6)
299      call ga_summarize(0)
300      call ma_summarize_allocated_blocks()
301c
302      write(6,*)
303      write(6,*)
304      write(6,*)
305      write(6,*) ' DOING NON LINEAR '
306      write(6,*)
307      write(6,*)
308      write(6,*)
309      call ga_copy(g_b, g_x)
310      call test_precond(g_x,0.0d0)
311      maxsub = 10
312c
313      call ga_kain(g_x,
314     $     test_nlproduct, test_precond,
315     $     1d-6,
316     $     10.0d0, 10.0d0,
317     $     maxsub, maxiter,
318     $     .true.)
319c
320      call util_flush(6)
321      call ga_print(g_x)
322      call util_flush(6)
323      call ga_summarize(0)
324      call ma_summarize_allocated_blocks()
325      call errquit('done',0, MEM_ERR)
326c
327      if (.not. ga_destroy(g_x))
328     $     call errquit('test kain', 1, GA_ERR)
329      if (.not. ga_destroy(g_b))
330     $     call errquit('test kain', 2, GA_ERR)
331      if (.not. ga_destroy(g_A))
332     $     call errquit('test kain', 3, GA_ERR)
333c
334      end
335C>
336C> \brief The Linear System Solver
337C>
338C> The routine solves a linear system of equations using a Krylov
339C> subspace method:
340C> \f{eqnarray*}{
341C>   F(x) &=& b \\\\
342C>   F(x) &=& Ax
343C> \f}
344C> The Right-Hand Side (RHS) matrix and the solution
345C> matrix store the data pertaining to a single linear system in
346C> columns. Using multiple columns the solver can solve multiple
347C> equations simultaneously. Internally one Krylov subspace is used
348C> for all systems of equations, and subspace vectors contribute to
349C> the solutions of all equations.
350C>
351C> An special argument `odiff` is provided that allows one to use a
352C> difference strategy for the required matrix-vector products. I.e.
353C> rather than computing
354C> \f{eqnarray*}{
355C>     y &=& Ax_i
356C> \f}
357C> each iteration one uses a formulation that requires
358C> \f{eqnarray*}{
359C>     y' &=& A(x_i-x_{i-1})
360C> \f}
361C> As \f$ x \f$ converges \f$ x_i - x_{i-1} \f$ approaches \f$ 0 \f$.
362C> This may be exploited in the matrix-vector product to achieve
363C> considerable savings.
364C>
365C> Furthermore the routine takes two subroutines as arguments. One
366C> provides the matrix-vector product and has the interface
367C> ~~~~
368C> subroutine product(acc,g_x,g_Ax)
369C> ~~~~
370C> which evaluates
371C> \f{eqnarray*}{
372C>   g_{Ax} &=& A g_x
373C> \f}
374C> where
375C>
376C> - acc: the accuracy required for the matrix-vector elements
377C>
378C> - g_x: is a global array containing the input vectors in columns
379C>
380C> - g_Ax: is a global array that will contain the matrix-vector product
381C>   results in columns
382C>
383C> Note that `ga_lkain` may pass global arrays with a different number
384C> of vectors than the number of linear systems being solved. Hence
385C> the product routine should check the actual dimension of g_x and
386C> g_Ax.
387C>
388C> The second subroutine provides a preconditioner and has the
389C> interface
390C> ~~~~
391C> subroutine precond(g_x,shift)
392C> ~~~~
393C> where
394C>
395C> - g_x: is a global array holding a set of vectors that will have
396C>   the preconditioner applied to them
397C>
398C> - shift: is a shift to ensure the preconditioner is non-singular
399C>
400C> Note that the number of vectors may also here be different from the
401C> number of linear systems being solved. Hence the dimension of g_x
402C> should be tested. Finally, the shift is not used in this linear
403C> system solver. The argument is provided to ensure that the same
404C> preconditioner can be used in eigenvalue solvers where the shift is
405C> used.
406C>
407      subroutine ga_lkain(rtdb,g_x, g_b, product, precond,
408     $     tol, mmaxsub, maxiter, odiff, oprint)
409      implicit none
410#include "errquit.fh"
411#include "mafdecls.fh"
412#include "global.fh"
413#include "util.fh"
414#include "rtdb.fh"
415#include "stdio.fh"
416c
417      integer g_x          !< [Input/Output] Initial guess/solution
418      integer g_b          !< [Input] Right-hand side vectors
419      external product     !< [Input] product routine
420      external precond     !< [Input] preconditioner routine
421      double precision tol !< [Input] convergence threshold
422      integer mmaxsub      !< [Input] maximum subspace dimension
423      integer maxiter      !< [Input] maximum no. of iterations
424      logical odiff        !< [Input] use differences in product
425      logical oprint       !< [Input] print flag
426      integer rtdb         !< [Input] the RTDB handle
427c
428c     Solves the linear equations A(X)-B=0 for multiple vectors.
429c
430c     call product(acc,g_x, g_Ax)
431c     . acc is the accuracy trequired for each element of the product
432c     . g_x contains the vectors and g_Ax should be filled
433c     .     with the product vectors.  The no. of vectors (columns) in
434c     . g_x might differ from the no. of vectors input to ga_lkain().
435c
436c     call precond(g_x,shift)
437c     . apply preconditioning directly to the vectors in g_x with the
438c     . optional shift (not used here but used by the diagonalizer)
439c
440c     On input g_x should contain an initial guess.  It returns the
441c     solution.
442c
443c     maxsub should be at least 3*nvec and can be beneficially increased
444c     to about 10*nvec.
445c
446c     Needs to be extended to store the sub-space vectors out-of-core
447c     at least while the product() routine is being executed.
448c
449      integer dimi, dimj
450      integer iter, n, nvec, nsub, isub, typex, typeb, maxsub
451      integer g_y, g_Ay, g_Ax, g_r, g_a, g_bb, g_c, g_xold, g_Axold
452      double precision rmax, acc, ga_svd_tol
453      double complex zero, one, mone
454      logical converged
455      logical odebug
456      character*255 filestub,filesoln
457c
458      logical  file_write_ga, file_read_ga
459      external file_write_ga, file_read_ga
460c
461      logical solver_restart
462      external solver_restart
463c
464      logical do_restart
465c
466      zero = cmplx(0.0d0,0.0d0)
467      one  = cmplx(1.0d0,0.0d0)
468      mone = cmplx(-1.0d0,0.0d0)
469c
470      odebug = util_print('debug lsolve', print_never) .and.
471     $     ga_nodeid().eq.0
472      if (.not.rtdb_get(rtdb,'cphf:acc',mt_dbl,1,acc)) acc=1d-4*tol
473      call ga_inquire(g_b, typeb, dimi, dimj)
474      call ga_inquire(g_x, typex, n, nvec)
475      if (typeb.ne.typex)
476     $  call errquit("ga_lkain: solution and right-hand-side vectors not
477     $ of same type",0,UERR)
478      maxsub = mmaxsub          ! So don't modify input scalar arg
479      if (maxsub .lt. 3*nvec) maxsub = 3*nvec
480      maxsub = (maxsub/nvec)*nvec
481c
482      if (.not.rtdb_get(rtdb,'cphf:ga_svd_tol',mt_dbl,1,
483     &                  ga_svd_tol)) then
484c       See comment just before the ga_svd_solve_seq call to
485c       understand these choices.
486        if ((100*maxsub).lt.n) then
487          ga_svd_tol = 1d-7
488        else
489          ga_svd_tol = 1d-14
490        endif
491      endif
492c
493      if (oprint .and. ga_nodeid().eq.0) then
494         write(6,1) n, nvec, maxsub, maxiter, tol, util_wallsec()
495 1       format(//,'Iterative solution of linear equations',/,
496     $        '  No. of variables', i9,/,
497     $        '  No. of equations', i9,/,
498     $        '  Maximum subspace', i9,/,
499     $        '        Iterations', i9,/,
500     $        '       Convergence', 1p,d9.1,/,
501     $        '        Start time', 0p,f9.1,/)
502         call util_flush(6)
503      end if
504c
505      if (.not. ga_create(typex, n, maxsub, 'lkain: Y',
506     $     0, 0, g_y))
507     $     call errquit('lkain: failed allocating subspace', maxsub,
508     &       GA_ERR)
509      if (.not. ga_create(typex, n, maxsub, 'lkain: Ay',
510     $     0, 0, g_Ay))
511     $     call errquit('lkain: failed allocating subspace2', maxsub,
512     &       GA_ERR)
513      if (.not. ga_create(typex, n, nvec, 'lkain: Ax',
514     $     0, 0, g_Ax))
515     $     call errquit('lkain: failed allocating subspace3', nvec,
516     &       GA_ERR)
517      if (.not. ga_create(typex, n, nvec, 'lkain: r',
518     $     0, 0, g_r))
519     $     call errquit('lkain: failed allocating subspace4', nvec,
520     &       GA_ERR)
521      if (odiff) then
522         if (.not. ga_create(typex, n, nvec, 'lkain: xold',
523     $        0, 0, g_xold))
524     $        call errquit('lkain: failed allocating subspace5', nvec,
525     &       GA_ERR)
526         if (.not. ga_create(typex, n, nvec, 'lkain: Axold',
527     $        0, 0, g_Axold))
528     $        call errquit('lkain: failed allocating subspace6', nvec,
529     &       GA_ERR)
530         call ga_zero(g_xold)
531         call ga_zero(g_Axold)
532      end if
533      call ga_zero(g_y)
534      call ga_zero(g_Ay)
535      call ga_zero(g_Ax)
536      call ga_zero(g_r)
537      call ga_sync()
538c
539c     Solution file
540c
541      if (.not. rtdb_cget(rtdb, 'solver:filestub', 1, filestub))
542     &       filestub = 'lkain_soln'
543      if (.not. rtdb_cget(rtdb, 'solver:filesoln', 1, filesoln))
544     &       filesoln = 'lkain_soln'
545#if 0
546      call util_file_name(filestub,.false.,.false.,filesoln)
547#else
548      call cphf_fname(filestub,filesoln)
549#endif
550c     if (ga_nodeid().eq.0) write(luout,*) "ga_lkain filestub:",filestub
551c     if (ga_nodeid().eq.0) write(luout,*) "ga_lkain filesoln:",filesoln
552c
553c     Check if this is a restart
554c
555      if (solver_restart(rtdb)) then
556        do_restart = .true.
557        if(.not.file_read_ga(filesoln,g_x)) do_restart = .false.
558        if (do_restart) then
559          if (ga_nodeid().eq.0)
560     &     write(luout,*) "Restarting solution from: ", filesoln
561        else
562          if (ga_nodeid().eq.0)
563     &     write(luout,*) "Error in restart solution: ", filesoln
564          goto 50
565        end if  ! do_restart
566      end if  ! solver_restart
567c
568 50   continue
569c
570      if (oprint .and. ga_nodeid().eq.0) then
571         write(6,2)
572         call util_flush(6)
573 2       format(/
574     $        '   iter   nsub   residual    time',/,
575     $        '   ----  ------  --------  ---------')
576      end if
577      nsub = 0
578      converged = .false.
579      do iter = 1, maxiter
580         if (odiff) then
581            call ga_add(one, g_x, mone, g_xold,  g_x)
582         end if
583         call product(acc,g_x, g_Ax)
584         if (odiff) then
585            call ga_add(one, g_Ax, one, g_Axold, g_Ax)
586            call ga_add(one, g_x,  one, g_xold,  g_x)
587            call ga_copy(g_x, g_xold)
588            call ga_copy(g_Ax, g_Axold)
589         end if
590         call ga_zero(g_r)
591         call ga_sync()
592         call ga_add(one, g_b, mone, g_Ax, g_r) ! The residual
593         call ga_sync()
594         call ga_maxelt(g_r, rmax)
595c
596         if (.not. rtdb_put(rtdb, 'lkain:rmax', mt_dbl, 1, rmax))
597     $     call errquit('ga_lkain: rmax put failed', 1, RTDB_ERR)
598c
599         if (oprint .and. ga_nodeid().eq.0) then
600            write(6,3) iter, nsub+nvec, rmax, util_wallsec()
601            call util_flush(6)
602 3          format(' ', i5, i7, 3x,1p,d9.2,0p,f10.1)
603         end if
604c
605c        Check convergence
606c
607         if (rmax .lt. tol) then
608            converged = .true.
609            goto 100
610         end if
611         call precond(g_Ax,0.0d0)
612         call precond(g_r,0.0d0)
613         call ga_sync()
614c
615c        Copy the vectors to the subspace work area
616c
617         call ga_copy_patch('n',
618     $        g_Ax, 1, n, 1, nvec,
619     $        g_Ay, 1, n, nsub+1, nsub+nvec)
620         call ga_copy_patch('n',
621     $        g_x, 1, n, 1, nvec,
622     $        g_y, 1, n, nsub+1, nsub+nvec)
623         nsub = nsub + nvec
624c
625c        Form and solve the subspace equations using SVD in order
626c        to manage near linear dependence in the subspace.
627c
628         if (.not. ga_create(typex, nsub, nsub, 'lkain: A', 0, 0, g_a))
629     $        call errquit('lkain: allocating g_a?', nsub, GA_ERR)
630         if (.not. ga_create(typex, nsub, nvec, 'lkain: B', 0, 0,g_bb))
631     $        call errquit('lkain: allocating g_bb?', nsub, GA_ERR)
632         if (.not. ga_create(typex, nsub, nvec, 'lkain: C', 0, 0, g_c))
633     $        call errquit('lkain: allocating g_c?', nsub, GA_ERR)
634         call ga_zero(g_a)
635         call ga_zero(g_bb)
636         call ga_zero(g_c)
637         call ga_sync()
638         call ga_dgemm('c','n',nsub,nsub,n,one,g_y,g_Ay,zero,g_a)
639         call ga_sync()
640         call ga_dgemm('c','n',nsub,nvec,n,one,g_y,g_r,zero,g_bb)
641         call ga_sync()
642         if (odebug) then
643           call util_flush(6)
644           call ga_print(g_a)
645           call ga_print(g_bb)
646           call util_flush(6)
647         endif
648c
649c     The threshold used here should reflect the accuracy in the
650c     products.  If very accurate products are used, then there is big
651c     advantage for small cases (maxsub close to n) in using a very
652c     small threshold in the SVD solve (e.g., 1e-14), but for more
653c     realistic examples (maxsub << n) there is only a little
654c     advantage and in the precence of real noise in the products
655c     screening with a realistic threshold is important.
656c
657         call ga_svd_solve_seq(g_a,g_bb,g_c,ga_svd_tol)
658         if (odebug) then
659           call util_flush(6)
660           call ga_print(g_c)
661           call util_flush(6)
662         endif
663c
664c     Form and add the correction, in parts, onto the solution
665c
666         call ga_sync()
667         call ga_dgemm('n','n',n,nvec,nsub,mone,g_Ay,g_c,one,g_r)
668         if (odebug) then
669            call util_flush(6)
670            write(6,*) ' The update in the complement '
671            call util_flush(6)
672            call ga_print(g_r)
673         end if
674         call ga_sync()
675         call ga_add(one, g_r, one, g_x, g_x)
676         call ga_sync()
677         call ga_dgemm('n','n',n,nvec,nsub,one,g_y,g_c,zero,g_r)
678         if (odebug) then
679            call util_flush(6)
680            write(6,*) ' The update in the subspace '
681            call util_flush(6)
682            call ga_print(g_r)
683         end if
684         call ga_sync()
685         call ga_add(one, g_r, one, g_x, g_x)
686         call ga_sync()
687c
688c        Save intermediate solution
689c
690         if(.not.file_write_ga(filesoln,g_x)) call errquit
691     $     ('ga_lkain:could not write solution',1, DISK_ERR)
692c
693         if (.not. ga_destroy(g_a)) call errquit('lkain: a',0, GA_ERR)
694         if (.not. ga_destroy(g_bb))call errquit('lkain: b',0, GA_ERR)
695         if (.not. ga_destroy(g_c)) call errquit('lkain: c',0, GA_ERR)
696c
697c     Reduce the subspace as necessary
698c
699         if (nsub .eq. maxsub) then
700            do isub = nvec+1, maxsub, nvec
701               call ga_copy_patch('n',
702     $              g_Ay, 1, n, isub, isub+nvec-1,
703     $              g_Ax, 1, n, 1, nvec)
704               call ga_copy_patch('n',
705     $              g_Ax, 1, n, 1, nvec,
706     $              g_Ay, 1, n, isub-nvec, isub-1)
707c
708               call ga_copy_patch('n',
709     $              g_y, 1, n, isub, isub+nvec-1,
710     $              g_Ax, 1, n, 1, nvec)
711               call ga_copy_patch('n',
712     $              g_Ax, 1, n, 1, nvec,
713     $              g_y, 1, n, isub-nvec, isub-1)
714            end do
715            nsub = nsub - nvec
716            call ga_sync()
717         end if
718c
719      end do
720 100  continue
721c
722      if (odiff) then
723         if (.not. ga_destroy(g_xold)) call errquit('lkain: destroy',1,
724     &       GA_ERR)
725         if (.not. ga_destroy(g_Axold)) call errquit('lkain: destroy',2,
726     &       GA_ERR)
727      end if
728      if (.not. ga_destroy(g_Ax)) call errquit('lkain: destroy',20,
729     &       GA_ERR)
730      if (.not. ga_destroy(g_Ay)) call errquit('lkain: destroy',3,
731     &       GA_ERR)
732      if (.not. ga_destroy(g_y)) call errquit('lkain: destroy',4,
733     &       GA_ERR)
734      if (.not. ga_destroy(g_r)) call errquit('lkain: destroy',5,
735     &       GA_ERR)
736c
737      if (.not. converged) call errquit('lkain: not converged',0,
738     &       CALC_ERR)
739c
740      end
741C>
742C> \brief The Non-Linear System Solver
743C>
744C> The routine solves a non-linear system of equations using a Krylov
745C> subspace method.
746C> \f{eqnarray*}{
747C>   F(x) &=& 0
748C> \f}
749C> For linear systems the vector function in the above equation would
750C> be \f$ F(x) = Ax - b \f$ but this routine also allows for non-linear
751C> vector functions.
752C> The Right-Hand Side (RHS) matrix and the solution
753C> matrix store the data pertaining to a single linear system in
754C> columns. Using multiple columns the solver can solve multiple
755C> equations simultaneously. Internally one Krylov subspace is used
756C> for all systems of equations, and subspace vectors contribute to
757C> the solutions of all equations.
758C>
759C> Furthermore the routine takes two subroutines as arguments. One
760C> provides the vector residual and has the interface
761C> ~~~~
762C> subroutine residual(acc,g_x,g_Ax)
763C> ~~~~
764C> where
765C>
766C> - acc: the accuracy required for the matrix-vector elements
767C>
768C> - g_x: is a global array containing the input vectors in columns
769C>
770C> - g_Ax: is a global array that will contain the residual-vector
771C>   results in columns
772C>
773C> Note that `ga_lkain` may pass global arrays with a different number
774C> of vectors than the number of linear systems being solved. Hence
775C> the product routine should check the actual dimension of g_x and
776C> g_Ax.
777C>
778C> The second subroutine provides a preconditioner and has the
779C> interface
780C> ~~~~
781C> subroutine precond(g_x,shift)
782C> ~~~~
783C> where
784C>
785C> - g_x: is a global array holding a set of vectors that will have
786C>   the preconditioner applied to them
787C>
788C> - shift: is a shift to ensure the preconditioner is non-singular
789C>
790C> Note that the number of vectors may also here be different from the
791C> number of linear systems being solved. Hence the dimension of g_x
792C> should be tested. Finally, the shift is not used in this linear
793C> system solver. The argument is provided to ensure that the same
794C> preconditioner can be used in eigenvalue solvers where the shift is
795C> used.
796C>
797      subroutine ga_kain(
798     $     g_x,
799     $     residual, precond,
800     $     tol,
801     $     trustmin, trustmax,
802     $     maxsub, maxiter,
803     $     oprint)
804      implicit none
805#include "errquit.fh"
806#include "mafdecls.fh"
807#include "global.fh"
808#include "util.fh"
809c
810      integer g_x               !< [Input/Output] Initial guess/solution
811      external residual         !< [Input] residual routine
812      external precond          !< [Input] preconditioner routine
813      double precision tol      !< [Input] convergence threshold
814      double precision trustmin !< [Input] range to constrain trust radius
815      double precision trustmax !< [Input] range to constrain trust radius
816      integer maxsub            !< [Input] maximum subspace dimension
817      integer maxiter           !< [Input] maximum no. of iterations
818      logical oprint            !< [Input] print flag
819c
820c     Solves the non-linear equations f(X)=0 for multiple vectors.
821c
822c     call residual(acc, g_x, g_Ax)
823c     . acc is the accuracy trequired for each element of the residual
824c     . g_x contains the vectors and g_Ax should be filled
825c     .     with the residual vectors.  The no. of vectors (columns) in
826c     . g_x might differ from the no. of vectors input to ga_kain().
827c
828c     call precond(g_x,shift)
829c     . apply preconditioning directly to the vectors in g_x with the
830c     . optional shift (not used here but used by the diagonalizer)
831c
832c     On input g_x should contain an initial guess.  It returns the
833c     solution.
834c
835      integer maxmaxsub
836      parameter (maxmaxsub = 20)
837      integer iter, n, nvec, nsub, isub, jsub, typex
838      integer g_y, g_Ay, g_Ax, g_delta, g_a, g_b, g_c
839      integer mdtob_nsub1
840      double precision rmax, acc, trust
841      double precision a(maxmaxsub,maxmaxsub), b(maxmaxsub),
842     $                 c(maxmaxsub), csum
843      double complex za(maxmaxsub,maxmaxsub), zb(maxmaxsub),
844     $               zc(maxmaxsub), zcsum
845      double complex zero, one, mone
846      logical converged
847      logical odebug
848c
849      zero = cmplx(0.0d0,0.0d0)
850      one  = cmplx(1.0d0,0.0d0)
851      mone = cmplx(-1.0d0,0.0d0)
852c
853      trust = trustmin
854cold      acc = 0.01d0*tol
855      acc = 0.0001d0*tol
856      if (maxsub .gt. maxmaxsub) maxsub = maxmaxsub
857      odebug = util_print('debug lsolve', print_never) .and.
858     $     ga_nodeid().eq.0
859c
860      call ga_inquire(g_x, typex, n, nvec)
861      if (nvec .ne. 1) call errquit('kain: nvec?', nvec, GA_ERR)
862c
863      if (oprint .and. ga_nodeid().eq.0) then
864         write(6,1) n, maxsub, tol, trustmin, trustmax, util_wallsec()
865 1       format(//,'Iterative solution of non-linear equations',/,
866     $        '  No. of variables', i9,/,
867     $        '  Maximum subspace', i9,/,
868     $        '       Convergence', 1p,d9.1,/,
869     $        '     Trust min/max', 0p,2f6.2,
870     $        '        Start time', 0p,f9.1,/)
871         call util_flush(6)
872      end if
873c
874      if (.not. ga_create(typex, n, maxsub, 'kain: Y',
875     $     0, 0, g_y))
876     $     call errquit('kain: failed allocating subspace', maxsub,
877     &       GA_ERR)
878      if (.not. ga_create(typex, n, maxsub, 'kain: Ay',
879     $     0, 0, g_Ay))
880     $     call errquit('kain: failed allocating subspace2', maxsub,
881     &       GA_ERR)
882      if (.not. ga_create(typex, n, 1, 'kain: Ax',
883     $     0, 0, g_Ax))
884     $     call errquit('kain: failed allocating subspace3', 1, GA_ERR)
885      call ga_zero(g_y)
886      call ga_zero(g_Ay)
887c
888      if (oprint .and. ga_nodeid().eq.0) then
889         write(6,2)
890 2       format(/
891     $        '   iter   nsub   residual    time',/,
892     $        '   ----  ------  --------  ---------')
893      end if
894      nsub = 0
895      converged = .false.
896      do iter = 1, maxiter
897         call ga_zero(g_Ax)
898         call residual(acc, g_x, g_Ax)
899         call ga_maxelt(g_Ax, rmax)
900         if (oprint .and. ga_nodeid().eq.0) then
901            write(6,3) iter, nsub+1, rmax, util_wallsec()
902 3          format(' ', i5, i7, 3x,1p,d9.2,0p,f10.1)
903         end if
904         if (rmax .lt. tol) then
905            converged = .true.
906            goto 100
907         end if
908c
909c     Copy the vectors to the subspace work area
910c
911         call precond(g_Ax,0.0d0)
912         call ga_copy_patch('n',
913     $        g_Ax, 1, n, 1, 1,
914     $        g_Ay, 1, n, nsub+1, nsub+1)
915         call ga_copy_patch('n',
916     $        g_x, 1, n, 1, 1,
917     $        g_y, 1, n, nsub+1, nsub+1)
918         nsub = nsub + 1
919c
920c     Not converged ... make the update
921c
922         g_delta = g_Ax         ! A reminder that these two are aliased
923         call ga_scale(g_delta, mone)
924c
925         if (iter .gt. 1) then
926c
927c     Form the reduced space matrix and RHS
928c
929           if (typex.eq.MT_DBL) then
930             call ga_local_mdot(n,nsub,nsub,a,maxmaxsub,g_y,g_Ay)
931             do isub = 1, nsub-1
932                b(isub) = -(a(isub,nsub) - a(nsub,nsub))
933             end do
934             do isub = 1, nsub-1
935                do jsub = 1, nsub-1
936                   a(isub,jsub) = a(isub,jsub)
937     $                  - a(nsub,jsub) - a(isub,nsub) + a(nsub,nsub)
938                end do
939             end do
940           else if (typex.eq.MT_DCPL) then
941             call ga_local_zmdot(n,nsub,nsub,za,maxmaxsub,g_y,g_Ay)
942             do isub = 1, nsub-1
943                zb(isub) = -(za(isub,nsub) - za(nsub,nsub))
944             end do
945             do isub = 1, nsub-1
946                do jsub = 1, nsub-1
947                   za(isub,jsub) = za(isub,jsub)
948     $                  - za(nsub,jsub) - za(isub,nsub) + za(nsub,nsub)
949                end do
950             end do
951           else
952             call errquit("ga_kain: illegal type",typex,UERR)
953           endif
954c
955c     Solve the subspace equations (lazily using existing GA routine)
956c
957            if (.not. ga_create(typex,nsub-1,nsub-1,'kain: A',
958     $           nsub-1,nsub-1,g_a))
959     $           call errquit('kain: allocating g_a?', nsub, GA_ERR)
960            if (.not. ga_create(typex,nsub-1,1,'kain: B',nsub-1,1,g_b))
961     $           call errquit('kain: allocating g_bb?', nsub, GA_ERR)
962            if (.not. ga_create(typex,nsub-1,1,'kain: C',nsub-1,1,g_c))
963     $           call errquit('kain: allocating g_c?', nsub, GA_ERR)
964            if (ga_nodeid() .eq. 0) then
965              if (typex.eq.MT_DBL) then
966                call ga_put(g_a, 1, nsub-1, 1, nsub-1, a, maxmaxsub)
967                call ga_put(g_b, 1, nsub-1, 1, 1, b, 1)
968              else if (typex.eq.MT_DCPL) then
969                call ga_put(g_a, 1, nsub-1, 1, nsub-1, za, maxmaxsub)
970                call ga_put(g_b, 1, nsub-1, 1, 1, zb, 1)
971              else
972                call errquit("ga_kain: illegal type",typex,UERR)
973              endif
974            end if
975            call ga_sync
976c
977            call ga_svd_solve_seq(g_a,g_b,g_c,1d-14)
978c
979            if (odebug) then
980              call util_flush(6)
981              call ga_print(g_c)
982              call util_flush(6)
983           endif
984           mdtob_nsub1=MA_sizeof(MT_DBL,nsub-1,MT_BYTE)
985            if (typex.eq.MT_DBL) then
986              if (ga_nodeid() .eq. 0)
987     $           call ga_get(g_c, 1, nsub-1, 1, 1, c, 1)
988              call ga_brdcst(1, c, mdtob_nsub1, 0)
989              write(6,*) ' KAIN SUBSPACE COEFFS'
990              call output(c, 1, nsub-1, 1, 1, nsub-1, 1, 1)
991            else if (typex.eq.MT_DCPL) then
992              if (ga_nodeid() .eq. 0)
993     $           call ga_get(g_c, 1, nsub-1, 1, 1, zc, 1)
994              call ga_brdcst(1, zc, 2*mdtob_nsub1, 0)
995              write(6,*) ' KAIN SUBSPACE COEFFS'
996              call zoutput(zc, 1, nsub-1, 1, 1, nsub-1, 1, 1)
997            endif
998            call ga_sync
999            if (.not. ga_destroy(g_a)) call errquit('kain: a',0, GA_ERR)
1000            if (.not. ga_destroy(g_b)) call errquit('kain: b',0, GA_ERR)
1001            if (.not. ga_destroy(g_c)) call errquit('kain: c',0, GA_ERR)
1002c
1003c     Form the correction
1004c
1005            if (typex.eq.MT_DBL) then
1006              csum = 0.0d0
1007              do isub = 1, nsub-1
1008                 csum = csum + c(isub)
1009                 call ga_add_patch( c(isub),  g_y, 1, n, isub, isub,
1010     $                one, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
1011                 call ga_add_patch(-c(isub), g_Ay, 1, n, isub, isub,
1012     $                one, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
1013              end do
1014              call ga_add_patch(-csum,  g_y, 1, n, nsub, nsub,
1015     $             one, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
1016              call ga_add_patch( csum,  g_Ay, 1, n, nsub, nsub,
1017     $             one, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
1018            else if (typex.eq.MT_DCPL) then
1019              zcsum = 0.0d0
1020              do isub = 1, nsub-1
1021                 zcsum = zcsum + zc(isub)
1022                 call ga_add_patch( zc(isub),  g_y, 1, n, isub, isub,
1023     $                one, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
1024                 call ga_add_patch(-zc(isub), g_Ay, 1, n, isub, isub,
1025     $                one, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
1026              end do
1027              call ga_add_patch(-zcsum,  g_y, 1, n, nsub, nsub,
1028     $             one, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
1029              call ga_add_patch( zcsum,  g_Ay, 1, n, nsub, nsub,
1030     $             one, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
1031            endif
1032         endif
1033c
1034c     Step restriction
1035c
1036         call ga_maxelt(g_delta, rmax)
1037         if (rmax .gt. trust) then
1038            if (oprint) write(6,*) ' RESTRICTION ', rmax, trust
1039            call ga_scale(g_delta, trust/rmax)
1040         end if
1041c
1042         call ga_add(one, g_delta, one, g_x, g_x)
1043c
1044c     Reduce the subspace as necessary (note g_delta=g_Ax destroyed)
1045c
1046         if (nsub .eq. maxsub) then
1047            do isub = 2, maxsub
1048               call ga_copy_patch('n',
1049     $              g_Ay, 1, n, isub, isub,
1050     $              g_Ax, 1, n, 1, 1)
1051               call ga_copy_patch('n',
1052     $              g_Ax, 1, n, 1, 1,
1053     $              g_Ay, 1, n, isub-1, isub-1)
1054c
1055               call ga_copy_patch('n',
1056     $              g_y, 1, n, isub, isub,
1057     $              g_Ax, 1, n, 1, 1)
1058               call ga_copy_patch('n',
1059     $              g_Ax, 1, n, 1, 1,
1060     $              g_y, 1, n, isub-1, isub-1)
1061            end do
1062            nsub = nsub - 1
1063         end if
1064c
1065      end do
1066 100  continue
1067c
1068      if (.not. ga_destroy(g_Ax)) call errquit('kain: destroy',20,
1069     &       GA_ERR)
1070      if (.not. ga_destroy(g_Ay)) call errquit('kain: destroy',3,
1071     &       GA_ERR)
1072      if (.not. ga_destroy(g_y)) call errquit('kain: destroy',4, GA_ERR)
1073c
1074      if (.not. converged) call errquit('kain: not converged',0,
1075     &       CALC_ERR)
1076c
1077      end
1078C>
1079C> \brief A direct linear system solver
1080C>
1081C> Solve for X from the linear equations
1082C> \f{eqnarray*}{
1083C>    A*X &=& B
1084C> \f}
1085C> or more explicitly
1086C> \f{eqnarray*}{
1087C>    A(m,n)*X(n,nvec) = B(m,nvec)
1088C> \f}
1089C> Where \f$ A \f$ is a general real matrix (not necessarily square, or
1090C> symmetric, or full rank) and \f$ X \f$ and \f$ B \f$ are matrices with one or more
1091C> columns representing the solutions and right hand sides.  Singular
1092C> values of \f$ A \f$ less than \f$ tol\f$  are neglected. \f$ X \f$ is returned.
1093C>
1094C> If the SVD of \f$ A \f$ is \f$ U*values*VT \f$, then the solution
1095C> is of the form
1096C> \f{eqnarray*}{
1097C>    V*(1/values)*UT*B
1098C> \f}
1099C> where the reciprocal of `values` less than `tol` are neglected.
1100C
1101      subroutine ga_svd_solve_seq(g_a, g_b, g_x, tol)
1102      implicit none
1103#include "errquit.fh"
1104#include "global.fh"
1105#include "mafdecls.fh"
1106#include "util.fh"
1107      integer g_a !< [Input] the matrix stored explicitly
1108      integer g_b !< [Input] the right-hand-sides
1109      integer g_x !< [Input/Output] the guess/solution
1110      double precision tol !< [Input] the tolerance
1111c
1112c     Solve for X from the linear equations
1113c
1114c     A*X = B
1115c
1116c     A(m,n)*X(n,nvec) = B(m,nvec)
1117c
1118c     Where A is a general real matrix (not necessarily square, or
1119c     symmetric, or full rank) and X and B are matrices with one or more
1120c     columns representing the solutions and right hand sides.  Singular
1121c     values of A less than tol are neglected.  X is returned.
1122c
1123c     If the SVD of A is U*values*VT, then the solution
1124c     is of the form
1125c
1126c     V*(1/values)*UT*B
1127c
1128c     where the reciprocal of values less than tol are neglected.
1129c
1130      integer m,n,nn,type,nvec,nsing,l_val, k_val,g_u,g_vt,i,g_tmp
1131      double complex one, zero
1132      logical oprint
1133c
1134      oprint = util_print('debug svdsolve', print_high) .and.
1135     $     ga_nodeid().eq.0
1136c
1137      one  = cmplx(1.0d0,0.0d0)
1138      zero = cmplx(0.0d0,0.0d0)
1139c
1140      call ga_inquire(g_a, type, m, n)
1141      call ga_inquire(g_b, type, nn, nvec)
1142      if (nn .ne. n) call errquit('gasvdsol: b does not conform',nn,
1143     &       GA_ERR)
1144      nsing = min(m,n)
1145      if (.not. ma_push_get(MT_DBL, nsing, 'gasvdsol', l_val, k_val))
1146     $     call errquit('gasvdsol: val',nsing, MA_ERR)
1147      if (.not. ga_create(type,m,nsing,'gasvd',0,0,g_u))
1148     $     call errquit('gasvdsol: u',m*nsing, GA_ERR)
1149      if (.not. ga_create(type,nsing,n,'gasvd',0,0,g_vt))
1150     $     call errquit('gasvdsol: u',nsing*n, GA_ERR)
1151      if (.not. ga_create(type,nsing,nvec,'gasvd',0,0,g_tmp))
1152     $     call errquit('gasvdsol: tmp',nsing*nvec, GA_ERR)
1153      call ga_zero(g_tmp)
1154c
1155      call ga_svd_seq(g_a, g_u,g_vt,dbl_mb(k_val))
1156c
1157      do i = 0, nsing-1
1158        if (dbl_mb(k_val+i) .lt. tol) then
1159          if (ga_nodeid() .eq. 0 .and. oprint) then
1160            write(6,*) ' neglecting ', i+1, dbl_mb(k_val+i)
1161          endif
1162          dbl_mb(k_val+i) = 0.0d0
1163        else
1164          dbl_mb(k_val+i) = 1.0d0/dbl_mb(k_val+i)
1165        end if
1166      end do
1167c
1168      call ga_dgemm('c','n',nsing,nvec,m,one,g_u,g_b,zero,g_tmp)
1169      call ga_scale_lh(g_tmp,dbl_mb(k_val))
1170      call ga_zero(g_x)
1171      call ga_dgemm('c','n',n,nvec,nsing,one,g_vt,g_tmp,zero,g_x)
1172c
1173      if (.not. ga_destroy(g_tmp)) call errquit('gasvdsol: des',1,
1174     &       GA_ERR)
1175      if (.not. ga_destroy(g_u)) call errquit('gasvdsol: des',2,
1176     &       GA_ERR)
1177      if (.not. ga_destroy(g_vt)) call errquit('gasvdsol: des',3,
1178     &       GA_ERR)
1179      if (.not. ma_pop_stack(l_val)) call errquit('gasvdsol: pop',4,
1180     &       GA_ERR)
1181c
1182      end
1183C>
1184C> \brief Perform SVD on rectangular matrix
1185C>
1186      subroutine ga_svd_seq(g_a, g_u, g_vt, values)
1187      implicit none
1188#include "errquit.fh"
1189#include "global.fh"
1190#include "mafdecls.fh"
1191      integer g_a, g_u, g_vt
1192      double precision values(*)
1193c
1194c     Perform SVD on rectangular matrix
1195c
1196c     nsing = min(n,m)
1197c     g_a(m,n)      --- input matrix
1198c     g_u(m,nsing)  --- left singular vectors (output)
1199c     g_vt(nsing,n) --- right singular vectors transposed (output)
1200c     values(nsing) --- singular values (output)
1201c
1202c     A = U*values*VT
1203c
1204c     A possible parallel algorithm is to diagonalize ATA to get
1205c     V and AAT to get U --- both have values**2 as eigenvalues.
1206c
1207      integer n, m, type, l_a, k_a, l_u, k_u, l_vt, k_vt,
1208     $     l_work, k_work, lwork, info, nsing
1209      integer l_rwork, k_rwork
1210c
1211      call ga_inquire(g_a, type, m, n)
1212      nsing = min(m,n)
1213      if (ga_nodeid() .eq. 0) then
1214         lwork = 10*max(m,n)
1215         if (.not. ma_push_get(type, m*n, 'gasvd', l_a, k_a))
1216     $        call errquit('gasvd: a',m*n, MA_ERR)
1217         if (.not. ma_push_get(type, m*nsing, 'gasvd', l_u, k_u))
1218     $        call errquit('gasvd: u',m*nsing, MA_ERR)
1219         if (.not. ma_push_get(type, nsing*n, 'gasvd', l_vt, k_vt))
1220     $        call errquit('gasvd: vt',nsing*n, MA_ERR)
1221         if (.not. ma_push_get(type, lwork, 'gasvd', l_work, k_work))
1222     $        call errquit('gasvd: work',lwork, MA_ERR)
1223         if (type.eq.MT_DCPL) then
1224            if (.not. ma_push_get(MT_DBL, lwork, 'gasvd',
1225     $                            l_rwork, k_rwork))
1226     $           call errquit('gasvd: work',lwork, MA_ERR)
1227         endif
1228c
1229         if (type.eq.MT_DBL) then
1230c
1231           call ga_get(g_a, 1, m, 1, n, dbl_mb(k_a), m)
1232c
1233           call dgesvd('s','s',m,n,dbl_mb(k_a),m,values,
1234     $          dbl_mb(k_u),m,dbl_mb(k_vt),nsing,
1235     $          dbl_mb(k_work),lwork,info)
1236           if (info .ne. 0) call errquit('gasvd:d: failed',info,MEM_ERR)
1237c
1238           call ga_put(g_u,  1, n,     1, nsing, dbl_mb(k_u),  n)
1239           call ga_put(g_vt, 1, nsing, 1, m,     dbl_mb(k_vt), n)
1240c
1241         else if (type.eq.MT_DCPL) then
1242c
1243           call ga_get(g_a, 1, m, 1, n, dcpl_mb(k_a), m)
1244c
1245           call zgesvd('s','s',m,n,dcpl_mb(k_a),m,values,
1246     $          dcpl_mb(k_u),m,dcpl_mb(k_vt),nsing,
1247     $          dcpl_mb(k_work),lwork,dbl_mb(k_rwork),info)
1248           if (info .ne. 0) call errquit('gasvd:z: failed',info,MEM_ERR)
1249c
1250           call ga_put(g_u,  1, n,     1, nsing, dcpl_mb(k_u),  n)
1251           call ga_put(g_vt, 1, nsing, 1, m,     dcpl_mb(k_vt), n)
1252c
1253         else
1254c
1255           call errquit('gasvd: illegal data type',type,UERR)
1256c
1257         endif
1258c
1259         if (.not. ma_chop_stack(l_a)) call errquit('gasvd ma',0,
1260     &       MA_ERR)
1261      end if
1262      call ga_sync()
1263      call ga_brdcst(1,values,n*8,0)
1264      call ga_sync()
1265c
1266      end
1267c
1268      logical function solver_restart(rtdb)
1269c
1270      implicit none
1271c
1272#include "errquit.fh"
1273#include "mafdecls.fh"
1274#include "global.fh"
1275#include "util.fh"
1276#include "rtdb.fh"
1277#include "stdio.fh"
1278c
1279      integer rtdb
1280c
1281      integer restr
1282c
1283c     Check for the restart flag
1284c
1285      solver_restart = .false.
1286      if (.not.rtdb_get(rtdb,'solver:restart',mt_int,1,restr))
1287     &     restr= 0
1288      if (restr.gt.0) solver_restart = .true.
1289c
1290      return
1291      end
1292c
1293c     some parameters for the cphf solver (ga_lkain())
1294c
1295      subroutine solver_setup(rtdb,restr)
1296c
1297      implicit none
1298#include "errquit.fh"
1299c
1300#include "global.fh"
1301#include "hess_info.fh"
1302#include "mafdecls.fh"
1303#include "rtdb.fh"
1304#include "nwc_const.fh"
1305#include "msgids.fh"
1306#include "stdio.fh"
1307#include "util.fh"
1308#include "inp.fh"
1309c
1310      integer rtdb
1311      integer restr
1312c
1313      character*256 cphf_sol
1314c
1315c     set up the files for the cphf solution
1316c
1317      call cphf_fname('cphf_sol',cphf_sol)
1318      if (.not. rtdb_cput(rtdb, 'solver:filestub', 1, 'cphf_sol'))
1319     *  call errquit('solver_setup: file stub',555,RTDB_ERR)
1320      if (.not. rtdb_cput(rtdb, 'solver:filesoln', 1,
1321     C     cphf_sol(1:inp_strlen(cphf_sol))))
1322     *  call errquit('solver_setup: intermediate solution file',555,
1323     &       RTDB_ERR)
1324      if (.not. rtdb_put(rtdb, 'solver:restart', MT_int, 1, restr))
1325     *  call errquit('solver_setup: solver restart flag',555, RTDB_ERR)
1326c
1327      return
1328      end
1329c
1330C> @}
1331