1!
2! GNU General Public License. See the file `License'
3! in the root directory of the present distribution,
4! or http://www.gnu.org/copyleft/gpl.txt .
5!
6!
7!----------------------------------------------------------------------
8subroutine ccgsolve_all (ch_psi, ccg_psi, e, d0psi, dpsi, h_diag, &
9     ndmx, ndim, ethr, ik, kter, conv_root, anorm, nbnd, npol, freq_c)
10  !----------------------------------------------------------------------
11  !
12  !     iterative solution of the linear systems (i=1,nbnd):
13  !
14  !                 ( H - e_i + Q ) * dpsi_i = d0psi_i                      (1)
15  !
16  !     where H is a complex hermitean matrix, e_i is a real scalar, Q is a
17  !     projector on occupied states, dpsi_i and d0psi_ are complex vectors
18  !
19  !     on input:
20  !                 ch_psi   EXTERNAL  name of a subroutine:
21  !                          Calculates  (H-e+Q)*psi products.
22  !                          Vectors psi and psip should be dimensioned
23  !                          (ndmx,nbnd)
24  !
25  !                 cg_psi   EXTERNAL  name of a subroutine:
26  !                          which calculates (h-e)^-1 * psi, with
27  !                          some approximation, e.g. (diag(h)-e)
28  !
29  !                 e        real     unperturbed eigenvalue.
30  !
31  !                 dpsi     contains an estimate of the solution
32  !                          vector.
33  !
34  !                 d0psi    contains the right hand side vector
35  !                          of the system.
36  !
37  !                 ndmx     integer row dimension of dpsi, ecc.
38  !
39  !                 ndim     integer actual row dimension of dpsi
40  !
41  !                 ethr     real     convergence threshold. solution
42  !                          improvement is stopped when the error in
43  !                          eq (1), defined as l.h.s. - r.h.s., becomes
44  !                          less than ethr in norm.
45  !
46  !     on output:  dpsi     contains the refined estimate of the
47  !                          solution vector.
48  !
49  !                 d0psi    is corrupted on exit
50  !
51  !   revised (extensively)       6 Apr 1997 by A. Dal Corso & F. Mauri
52  !   revised (to reduce memory) 29 May 2004 by S. de Gironcoli
53  !
54  USE kinds,          ONLY : DP
55  USE mp_bands,       ONLY : intra_bgrp_comm, inter_bgrp_comm, use_bgrp_in_hpsi
56  USE mp,             ONLY : mp_sum, mp_barrier
57  USE control_flags,  ONLY : gamma_only
58  USE gvect,          ONLY : gstart
59
60  implicit none
61  !
62  !   first the I/O variables
63  !
64  integer :: ndmx, & ! input: the maximum dimension of the vectors
65             ndim, & ! input: the actual dimension of the vectors
66             kter, & ! output: counter on iterations
67             nbnd, & ! input: the number of bands
68             npol, & ! input: number of components of the wavefunctions
69             ik      ! input: the k point
70
71  real(DP) :: &
72             e(nbnd), & ! input: the actual eigenvalue
73             anorm,   & ! output: the norm of the error in the solution
74             ethr       ! input: the required precision
75
76  complex(DP) :: &
77             dpsi (ndmx*npol, nbnd), & ! output: the solution of the linear syst
78             h_diag(ndmx*npol,nbnd), & ! input: an estimate of ( H - \epsilon -w)
79                                       ! w is complex
80             freq_c                , & ! complex frequency
81             d0psi (ndmx*npol, nbnd)   ! input: the known term
82
83  logical :: conv_root ! output: if true the root is converged
84  external ch_psi      ! input: the routine computing ch_psi
85  external ccg_psi      ! input: the routine computing cg_psi
86  !
87  !  here the local variables
88  !
89  integer, parameter :: maxter = 1000
90  ! the maximum number of iterations
91  integer :: iter, ibnd, ibnd_, lbnd
92  ! counters on iteration, bands
93  integer , allocatable :: conv (:)
94  ! if 1 the root is converged
95
96  complex(DP), allocatable :: g (:,:),  t (:,:),  h (:,:),  hold (:,:)
97  COMPLEX(DP), ALLOCATABLE :: gs (:,:), ts (:,:), hs (:,:), hsold (:,:)
98  !  the gradient of psi
99  !  the preconditioned gradient
100  !  the delta gradient
101  !  the conjugate gradient
102  !  work space
103  complex(DP) ::  dcgamma, dcgamma1, dclambda, dclambda1
104  !  the ratio between rho
105  !  step length
106  complex(DP), external :: zdotc
107  REAL(kind=dp), EXTERNAL :: ddot
108  !  the scalar product
109  real(DP), allocatable    :: eu (:)
110  complex(DP), allocatable :: rho (:), rhoold (:), euc (:), a(:), c(:)
111  ! the residue
112  ! auxiliary for h_diag
113  real(DP) :: kter_eff
114  ! account the number of iterations with b
115  ! coefficient of quadratic form
116  integer, allocatable :: indb(:)
117
118  ! bgrp parallelization auxiliary variables
119  INTEGER :: n_start, n_end, my_nbnd
120  logical :: lsave_use_bgrp_in_hpsi
121  !
122  call start_clock ('ccgsolve')
123
124  call divide (inter_bgrp_comm,nbnd,n_start,n_end)
125  my_nbnd = n_end - n_start + 1
126
127!  my_nbnd =nbnd
128
129  ! allocate workspace (bgrp distributed)
130  allocate ( conv(nbnd) )
131  allocate ( g(ndmx*npol,my_nbnd), t(ndmx*npol,my_nbnd), h(ndmx*npol,my_nbnd), &
132             hold(ndmx*npol,my_nbnd) )
133  allocate ( gs(ndmx*npol,my_nbnd), ts(ndmx*npol,my_nbnd), hs(ndmx*npol,my_nbnd), &
134             hsold(ndmx*npol,my_nbnd) )
135  allocate ( a(my_nbnd), c(my_nbnd) )
136  allocate ( rho(my_nbnd), rhoold(my_nbnd) )
137  allocate ( eu(my_nbnd) )
138  allocate ( euc(my_nbnd) )
139  allocate ( indb(my_nbnd) )
140  !      WRITE( stdout,*) g,t,h,hold
141
142  kter_eff = 0.d0 ; conv (1:nbnd) = 0
143
144  g=(0.d0,0.d0); t=(0.d0,0.d0); h=(0.d0,0.d0); hold=(0.d0,0.d0)
145  gs=(0.d0,0.d0); ts=(0.d0,0.d0); hs=(0.d0,0.d0); hsold=(0.d0,0.d0)
146
147  rho= (0.0d0, 0.0d0); rhoold=(0.0d0,0.d0)
148
149  ! bgrp parallelization is done outside h_psi/s_psi. set use_bgrp_in_hpsi temporarily to false
150  lsave_use_bgrp_in_hpsi = use_bgrp_in_hpsi ; use_bgrp_in_hpsi = .false.
151
152  do ibnd = 1, nbnd
153     indb(ibnd) = ibnd
154  enddo
155
156  eu = e
157  do iter = 1, maxter
158     !
159     !    compute the gradient. can reuse information from previous step
160     !
161     if (iter == 1) then
162        do ibnd = n_start, n_end
163           euc(ibnd) = CMPLX(e(indb(ibnd))+DREAL(freq_c), DIMAG(freq_c), KIND=DP)
164        ENDDO
165
166        call ch_psi (ndim, dpsi, g, euc, ik, my_nbnd)
167
168        do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1
169           call zaxpy (ndim, (-1.d0,0.d0), d0psi(1,ibnd), 1, g(1,ibnd_), 1)
170        enddo
171        IF (npol==2) THEN
172           do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1
173              call zaxpy (ndim, (-1.d0,0.d0), d0psi(ndmx+1,ibnd), 1, g(ndmx+1,ibnd_), 1)
174           enddo
175        END IF
176        gs(:,:) = CONJG(g(:,:))
177     endif
178     !
179     !    compute preconditioned residual vector and convergence check
180     !
181     lbnd = 0
182     do ibnd = n_start, n_end ;  ibnd_ = ibnd - n_start + 1
183        if (conv (ibnd) .eq.0) then
184           lbnd = lbnd+1
185           call zcopy (ndmx*npol, g (1, ibnd_), 1, h (1, ibnd_), 1)
186           call zcopy (ndmx*npol, gs (1, ibnd_), 1, hs (1, ibnd_), 1)
187
188           call ccg_psi(ndmx, ndim, 1, h(1,ibnd_), h_diag(1,ibnd), 1 )
189           call ccg_psi(ndmx, ndim, 1, hs(1,ibnd_), h_diag(1,ibnd), -1 )
190
191           IF (gamma_only) THEN
192              rho(lbnd)=2.0d0*ddot(2*ndmx*npol,h(1,ibnd_),1,g(1,ibnd_),1)
193              IF(gstart==2) THEN
194                 rho(lbnd)=rho(lbnd)-DBLE(h(1,ibnd_))*DBLE(g(1,ibnd_))
195              ENDIF
196           ELSE
197              rho(lbnd) = zdotc (ndmx*npol, hs(1,ibnd_), 1, g(1,ibnd_), 1)
198           ENDIF
199
200        endif
201     enddo
202     kter_eff = kter_eff + DBLE (lbnd) / DBLE (nbnd)
203     call mp_sum( rho(1:lbnd), intra_bgrp_comm )
204     do ibnd = n_end, n_start, -1 ; ibnd_ = ibnd - n_start + 1
205        if (conv(ibnd).eq.0) then
206           rho(ibnd_)=rho(lbnd)
207           lbnd = lbnd -1
208           anorm = sqrt ( abs (rho (ibnd_)) )
209           if (anorm.lt.ethr) conv (ibnd) = 1
210        endif
211     enddo
212     !
213     conv_root = .true.
214     do ibnd = n_start, n_end
215        conv_root = conv_root.and. (conv (ibnd) .eq.1)
216     enddo
217     if (conv_root) goto 100
218     !
219     !        compute the step direction h. Conjugate it to previous step
220     !
221     lbnd = 0
222     do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1
223        if (conv (ibnd) .eq.0) then
224           !
225           ! change sign to h and hs
226           !
227           call dscal (2 * ndmx * npol, - 1.d0, h (1, ibnd_), 1)
228           call dscal (2 * ndmx * npol, - 1.d0, hs (1, ibnd_), 1)
229
230           if (iter.ne.1) then
231              dcgamma = rho (ibnd_) / rhoold (ibnd_)
232              dcgamma1 = CONJG(dcgamma)
233
234              call zaxpy (ndmx*npol, dcgamma, hold (1, ibnd_), 1, h (1, ibnd_), 1)
235              CALL zaxpy (ndmx*npol, dcgamma1, hsold (1, ibnd_), 1, hs (1, ibnd_), 1)
236
237           endif
238
239           !
240           ! here hold is used as auxiliary vector in order to efficiently compute t = A*h
241           ! it is later set to the current (becoming old) value of h
242           !
243           lbnd = lbnd+1
244           call zcopy (ndmx*npol, h (1, ibnd_), 1, hold (1, lbnd), 1)
245           CALL zcopy (ndmx*npol, hs (1, ibnd_), 1, hsold (1, lbnd), 1)
246
247           indb (lbnd) = ibnd
248        endif
249     enddo
250
251     !
252     !        compute t = A*h and  ts= A^+ * h
253     !
254
255     DO ibnd=1,lbnd
256        euc(ibnd) = CMPLX(e(indb(ibnd))+DREAL(freq_c), DIMAG(freq_c), KIND=DP)
257     ENDDO
258
259     call ch_psi (ndim, hold, t, euc, ik, lbnd)
260
261     call ch_psi (ndim, hsold, ts, conjg(euc), ik, lbnd)
262
263     !
264     !        compute the coefficients a and c for the line minimization
265     !        compute step length lambda
266     !
267
268     lbnd=0
269     do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1
270        if (conv (ibnd) .eq.0) then
271           lbnd=lbnd+1
272
273           IF (gamma_only) THEN
274              a(lbnd) = 2.0d0*ddot(2*ndmx*npol,hs(1,ibnd_),1,g(1,ibnd_),1)
275              c(lbnd) = 2.0d0*ddot(2*ndmx*npol,hs(1,ibnd_),1,t(1,lbnd),1)
276              IF (gstart == 2) THEN
277                 a(lbnd)=a(lbnd)-DBLE(hs(1,ibnd_))*DBLE(g(1,ibnd_))
278                 c(lbnd)=c(lbnd)-DBLE(hs(1,ibnd_))*DBLE(t(1,lbnd))
279
280              ENDIF
281           ELSE
282              a(lbnd) = zdotc (ndmx*npol, hs(1,ibnd_), 1, g(1,ibnd_), 1)
283              c(lbnd) = zdotc (ndmx*npol, hs(1,ibnd_), 1, t(1,lbnd), 1)
284
285           ENDIF
286
287
288        end if
289     end do
290
291     !
292     call mp_sum(  a(1:lbnd), intra_bgrp_comm )
293     call mp_sum(  c(1:lbnd), intra_bgrp_comm )
294     lbnd=0
295     do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1
296        if (conv (ibnd) .eq.0) then
297           lbnd=lbnd+1
298           dclambda = - a(lbnd) / c(lbnd)
299           dclambda1 = CONJG(dclambda)
300           !
301           !    move to new position
302           !
303
304           call zaxpy (ndmx*npol, dclambda, h(1,ibnd_), 1, dpsi(1,ibnd), 1)
305
306           !
307           !    update to get the gradient
308           !
309           !g=g+lam
310           call zaxpy (ndmx*npol, dclambda, t(1,lbnd), 1, g(1,ibnd_), 1)
311           CALL zaxpy (ndmx*npol, dclambda1, ts(1,lbnd), 1, gs(1,ibnd_), 1)
312
313           !
314           !    save current (now old) h and rho for later use
315           !
316           call zcopy (ndmx*npol, h(1,ibnd_), 1, hold(1,ibnd_), 1)
317           CALL zcopy (ndmx*npol, hs(1,ibnd_), 1, hsold(1,ibnd_), 1)
318
319           rhoold (ibnd_) = rho (ibnd_)
320
321        endif
322     enddo
323  enddo
324
325100 continue
326  ! deallocate workspace not needed anymore
327  deallocate (eu) ; deallocate (rho, rhoold) ; deallocate (a,c) ; deallocate (g, t, h, hold)
328  deallocate (euc)
329
330  ! wait for all bgrp to complete their task
331  CALL mp_barrier( inter_bgrp_comm )
332
333  ! check if all root converged across all bgrp
334  call mp_sum( conv, inter_bgrp_comm )
335  conv_root = .true.
336  do ibnd = 1, nbnd
337     conv_root = conv_root.and. (conv (ibnd) .eq.1)
338  enddo
339  deallocate (conv)
340
341  ! collect the result
342  if (n_start > 1 ) dpsi(:, 1:n_start-1) = (0.d0,0.d0) ; if (n_end < nbnd) dpsi(:, n_end+1:nbnd) = (0.d0,0.d0)
343  call mp_sum( dpsi, inter_bgrp_comm )
344
345  call mp_sum( kter_eff, inter_bgrp_comm )
346  kter = kter_eff
347
348  ! restore the value of use_bgrp_in_hpsi to its saved value
349  use_bgrp_in_hpsi = lsave_use_bgrp_in_hpsi
350
351
352  call stop_clock ('ccgsolve')
353  return
354end subroutine ccgsolve_all
355