1!
2! Copyright (C) 2001-2018 Quantum ESPRESSO group
3! This file is distributed under the terms of the
4! GNU General Public License. See the file `License'
5! in the root directory of the present distribution,
6! or http://www.gnu.org/copyleft/gpl.txt .
7!
8!
9!----------------------------------------------------------------------
10subroutine cgsolve_all (ch_psi, cg_psi, e, d0psi, dpsi, h_diag, &
11     ndmx, ndim, ethr, ik, kter, conv_root, anorm, nbnd, npol)
12  !----------------------------------------------------------------------
13  !
14  !     iterative solution of the linear systems (i=1,nbnd):
15  !
16  !                 ( H - e_i + Q ) * dpsi_i = d0psi_i                      (1)
17  !
18  !     where H is a complex hermitean matrix, e_i is a real scalar, Q is a
19  !     projector on occupied states, dpsi_i and d0psi_ are complex vectors
20  !
21  !     on input:
22  !                 ch_psi   EXTERNAL  name of a subroutine:
23  !                          Calculates  (H-e+Q)*psi products.
24  !                          Vectors psi and psip should be dimensioned
25  !                          (ndmx,nbnd)
26  !
27  !                 cg_psi   EXTERNAL  name of a subroutine:
28  !                          which calculates (h-e)^-1 * psi, with
29  !                          some approximation, e.g. (diag(h)-e)
30  !
31  !                 e        real     unperturbed eigenvalue.
32  !
33  !                 dpsi     contains an estimate of the solution
34  !                          vector.
35  !
36  !                 d0psi    contains the right hand side vector
37  !                          of the system.
38  !
39  !                 ndmx     integer row dimension of dpsi, ecc.
40  !
41  !                 ndim     integer actual row dimension of dpsi
42  !
43  !                 ethr     real     convergence threshold. solution
44  !                          improvement is stopped when the error in
45  !                          eq (1), defined as l.h.s. - r.h.s., becomes
46  !                          less than ethr in norm.
47  !
48  !     on output:  dpsi     contains the refined estimate of the
49  !                          solution vector.
50  !
51  !                 d0psi    is corrupted on exit
52  !
53  !   revised (extensively)       6 Apr 1997 by A. Dal Corso & F. Mauri
54  !   revised (to reduce memory) 29 May 2004 by S. de Gironcoli
55  !
56  USE kinds,          ONLY : DP
57  USE mp_bands,       ONLY : intra_bgrp_comm, inter_bgrp_comm, use_bgrp_in_hpsi
58  USE mp,             ONLY : mp_sum, mp_barrier
59  USE control_flags,  ONLY : gamma_only
60  USE gvect,          ONLY : gstart
61
62  implicit none
63  !
64  !   first the I/O variables
65  !
66  integer :: ndmx, & ! input: the maximum dimension of the vectors
67             ndim, & ! input: the actual dimension of the vectors
68             kter, & ! output: counter on iterations
69             nbnd, & ! input: the number of bands
70             npol, & ! input: number of components of the wavefunctions
71             ik      ! input: the k point
72
73  real(DP) :: &
74             e(nbnd), & ! input: the actual eigenvalue
75             anorm,   & ! output: the norm of the error in the solution
76             h_diag(ndmx*npol,nbnd), & ! input: an estimate of ( H - \epsilon )
77             ethr       ! input: the required precision
78
79  complex(DP) :: &
80             dpsi (ndmx*npol, nbnd), & ! output: the solution of the linear syst
81             d0psi (ndmx*npol, nbnd)   ! input: the known term
82
83  logical :: conv_root ! output: if true the root is converged
84  external ch_psi      ! input: the routine computing ch_psi
85  external cg_psi      ! input: the routine computing cg_psi
86  !
87  !  here the local variables
88  !
89  integer, parameter :: maxter = 400
90  ! the maximum number of iterations
91  integer :: iter, ibnd, ibnd_, lbnd
92  ! counters on iteration, bands
93  integer , allocatable :: conv (:)
94  ! if 1 the root is converged
95
96  complex(DP), allocatable :: g (:,:), t (:,:), h (:,:), hold (:,:)
97  !  the gradient of psi
98  !  the preconditioned gradient
99  !  the delta gradient
100  !  the conjugate gradient
101  !  work space
102  complex(DP) ::  dcgamma, dclambda
103  !  the ratio between rho
104  !  step length
105  REAL(kind=dp), EXTERNAL :: ddot
106  !  the scalar product
107  real(DP), allocatable :: rho (:), rhoold (:), eu (:), a(:), c(:)
108  ! the residue
109  ! auxiliary for h_diag
110  real(DP) :: kter_eff
111  ! account the number of iterations with b
112  ! coefficient of quadratic form
113
114  ! bgrp parallelization auxiliary variables
115  INTEGER :: n_start, n_end, my_nbnd
116  logical :: lsave_use_bgrp_in_hpsi
117  !
118  call start_clock ('cgsolve')
119
120  call divide (inter_bgrp_comm,nbnd,n_start,n_end)
121  my_nbnd = n_end - n_start + 1
122
123  ! allocate workspace (bgrp distributed)
124  allocate ( conv(nbnd) )
125  allocate ( g(ndmx*npol,my_nbnd), t(ndmx*npol,my_nbnd), h(ndmx*npol,my_nbnd), &
126             hold(ndmx*npol,my_nbnd) )
127  allocate ( a(my_nbnd), c(my_nbnd) )
128  allocate ( rho(my_nbnd), rhoold(my_nbnd) )
129  allocate ( eu(my_nbnd) )
130  !      WRITE( stdout,*) g,t,h,hold
131
132  kter_eff = 0.d0 ; conv (1:nbnd) = 0
133
134  g=(0.d0,0.d0); t=(0.d0,0.d0); h=(0.d0,0.d0); hold=(0.d0,0.d0)
135
136  ! bgrp parallelization is done outside h_psi/s_psi. set use_bgrp_in_hpsi temporarily to false
137  lsave_use_bgrp_in_hpsi = use_bgrp_in_hpsi ; use_bgrp_in_hpsi = .false.
138
139  do iter = 1, maxter
140     !
141     !    compute the gradient. can reuse information from previous step
142     !
143     if (iter == 1) then
144        call ch_psi (ndim, dpsi(1,n_start), g, e(n_start), ik, my_nbnd)
145        do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1
146           call zaxpy (ndim, (-1.d0,0.d0), d0psi(1,ibnd), 1, g(1,ibnd_), 1)
147        enddo
148        IF (npol==2) THEN
149           do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1
150              call zaxpy (ndim, (-1.d0,0.d0), d0psi(ndmx+1,ibnd), 1, g(ndmx+1,ibnd_), 1)
151           enddo
152        END IF
153     endif
154     !
155     !    compute preconditioned residual vector and convergence check
156     !
157     lbnd = 0
158     do ibnd = n_start, n_end ;  ibnd_ = ibnd - n_start + 1
159        if (conv (ibnd) .eq.0) then
160           lbnd = lbnd+1
161           call zcopy (ndmx*npol, g (1, ibnd_), 1, h (1, ibnd_), 1)
162           call cg_psi(ndmx, ndim, 1, h(1,ibnd_), h_diag(1,ibnd) )
163
164           IF (gamma_only) THEN
165              rho(lbnd)=2.0d0*ddot(2*ndmx*npol,h(1,ibnd_),1,g(1,ibnd_),1)
166              IF(gstart==2) THEN
167                 rho(lbnd)=rho(lbnd)-DBLE(h(1,ibnd_))*DBLE(g(1,ibnd_))
168              ENDIF
169           ELSE
170              rho(lbnd) = ddot (2*ndmx*npol, h(1,ibnd_), 1, g(1,ibnd_), 1)
171           ENDIF
172        endif
173     enddo
174     kter_eff = kter_eff + DBLE (lbnd) / DBLE (nbnd)
175     call mp_sum( rho(1:lbnd), intra_bgrp_comm )
176     do ibnd = n_end, n_start, -1 ; ibnd_ = ibnd - n_start + 1
177        if (conv(ibnd).eq.0) then
178           rho(ibnd_)=rho(lbnd)
179           lbnd = lbnd -1
180           anorm = sqrt (rho (ibnd_) )
181           !write(6,*) 'anorm', ibnd, anorm ; FLUSH(6)
182           if (anorm.lt.ethr) conv (ibnd) = 1
183        endif
184     enddo
185!
186     conv_root = .true.
187     do ibnd = n_start, n_end
188        conv_root = conv_root.and. (conv (ibnd) .eq.1)
189     enddo
190     if (conv_root) goto 100
191     !
192     !        compute the step direction h. Conjugate it to previous step
193     !
194     lbnd = 0
195     do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1
196        if (conv (ibnd) .eq.0) then
197!
198!          change sign to h
199!
200           call dscal (2 * ndmx * npol, - 1.d0, h (1, ibnd_), 1)
201           if (iter.ne.1) then
202              dcgamma = rho (ibnd_) / rhoold (ibnd_)
203              call zaxpy (ndmx*npol, dcgamma, hold (1, ibnd_), 1, h (1, ibnd_), 1)
204           endif
205
206!
207! here hold is used as auxiliary vector in order to efficiently compute t = A*h
208! it is later set to the current (becoming old) value of h
209!
210           lbnd = lbnd+1
211           call zcopy (ndmx*npol, h (1, ibnd_), 1, hold (1, lbnd), 1)
212           eu (lbnd) = e (ibnd)
213        endif
214     enddo
215     !
216     !        compute t = A*h
217     !
218     call ch_psi (ndim, hold, t, eu, ik, lbnd)
219     !
220     !        compute the coefficients a and c for the line minimization
221     !        compute step length lambda
222     lbnd=0
223     do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1
224        if (conv (ibnd) .eq.0) then
225           lbnd=lbnd+1
226           IF (gamma_only) THEN
227              a(lbnd) = 2.0d0*ddot(2*ndmx*npol,h(1,ibnd_),1,g(1,ibnd_),1)
228              c(lbnd) = 2.0d0*ddot(2*ndmx*npol,h(1,ibnd_),1,t(1,lbnd),1)
229              IF (gstart == 2) THEN
230                 a(lbnd)=a(lbnd)-DBLE(h(1,ibnd_))*DBLE(g(1,ibnd_))
231                 c(lbnd)=c(lbnd)-DBLE(h(1,ibnd_))*DBLE(t(1,lbnd))
232              ENDIF
233           ELSE
234              a(lbnd) = ddot (2*ndmx*npol, h(1,ibnd_), 1, g(1,ibnd_), 1)
235              c(lbnd) = ddot (2*ndmx*npol, h(1,ibnd_), 1, t(1,lbnd), 1)
236           ENDIF
237        end if
238     end do
239     call mp_sum(  a(1:lbnd), intra_bgrp_comm )
240     call mp_sum(  c(1:lbnd), intra_bgrp_comm )
241     lbnd=0
242     do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1
243        if (conv (ibnd) .eq.0) then
244           lbnd=lbnd+1
245           dclambda = CMPLX( - a(lbnd) / c(lbnd), 0.d0,kind=DP)
246           !
247           !    move to new position
248           !
249           call zaxpy (ndmx*npol, dclambda, h(1,ibnd_), 1, dpsi(1,ibnd), 1)
250           !
251           !    update to get the gradient
252           !
253           !g=g+lam
254           call zaxpy (ndmx*npol, dclambda, t(1,lbnd), 1, g(1,ibnd_), 1)
255           !
256           !    save current (now old) h and rho for later use
257           !
258           call zcopy (ndmx*npol, h(1,ibnd_), 1, hold(1,ibnd_), 1)
259           rhoold (ibnd_) = rho (ibnd_)
260        endif
261     enddo
262  enddo
263100 continue
264  ! deallocate workspace not needed anymore
265  deallocate (eu) ; deallocate (rho, rhoold) ; deallocate (a,c) ; deallocate (g, t, h, hold)
266
267  ! wait for all bgrp to complete their task
268  CALL mp_barrier( inter_bgrp_comm )
269
270  ! check if all root converged across all bgrp
271  call mp_sum( conv, inter_bgrp_comm )
272  conv_root = .true.
273  do ibnd = 1, nbnd
274     conv_root = conv_root.and. (conv (ibnd) .eq.1)
275  enddo
276  deallocate (conv)
277
278  ! collect the result
279  if (n_start > 1 ) dpsi(:, 1:n_start-1) = (0.d0,0.d0) ; if (n_end < nbnd) dpsi(:, n_end+1:nbnd) = (0.d0,0.d0)
280  call mp_sum( dpsi, inter_bgrp_comm )
281
282  call mp_sum( kter_eff, inter_bgrp_comm )
283  kter = kter_eff
284
285  ! restore the value of use_bgrp_in_hpsi to its saved value
286  use_bgrp_in_hpsi = lsave_use_bgrp_in_hpsi
287
288
289  call stop_clock ('cgsolve')
290  return
291end subroutine cgsolve_all
292