1! 2! Copyright (C) 2001-2018 Quantum ESPRESSO group 3! This file is distributed under the terms of the 4! GNU General Public License. See the file `License' 5! in the root directory of the present distribution, 6! or http://www.gnu.org/copyleft/gpl.txt . 7! 8! 9!---------------------------------------------------------------------- 10subroutine cgsolve_all (ch_psi, cg_psi, e, d0psi, dpsi, h_diag, & 11 ndmx, ndim, ethr, ik, kter, conv_root, anorm, nbnd, npol) 12 !---------------------------------------------------------------------- 13 ! 14 ! iterative solution of the linear systems (i=1,nbnd): 15 ! 16 ! ( H - e_i + Q ) * dpsi_i = d0psi_i (1) 17 ! 18 ! where H is a complex hermitean matrix, e_i is a real scalar, Q is a 19 ! projector on occupied states, dpsi_i and d0psi_ are complex vectors 20 ! 21 ! on input: 22 ! ch_psi EXTERNAL name of a subroutine: 23 ! Calculates (H-e+Q)*psi products. 24 ! Vectors psi and psip should be dimensioned 25 ! (ndmx,nbnd) 26 ! 27 ! cg_psi EXTERNAL name of a subroutine: 28 ! which calculates (h-e)^-1 * psi, with 29 ! some approximation, e.g. (diag(h)-e) 30 ! 31 ! e real unperturbed eigenvalue. 32 ! 33 ! dpsi contains an estimate of the solution 34 ! vector. 35 ! 36 ! d0psi contains the right hand side vector 37 ! of the system. 38 ! 39 ! ndmx integer row dimension of dpsi, ecc. 40 ! 41 ! ndim integer actual row dimension of dpsi 42 ! 43 ! ethr real convergence threshold. solution 44 ! improvement is stopped when the error in 45 ! eq (1), defined as l.h.s. - r.h.s., becomes 46 ! less than ethr in norm. 47 ! 48 ! on output: dpsi contains the refined estimate of the 49 ! solution vector. 50 ! 51 ! d0psi is corrupted on exit 52 ! 53 ! revised (extensively) 6 Apr 1997 by A. Dal Corso & F. Mauri 54 ! revised (to reduce memory) 29 May 2004 by S. de Gironcoli 55 ! 56 USE kinds, ONLY : DP 57 USE mp_bands, ONLY : intra_bgrp_comm, inter_bgrp_comm, use_bgrp_in_hpsi 58 USE mp, ONLY : mp_sum, mp_barrier 59 USE control_flags, ONLY : gamma_only 60 USE gvect, ONLY : gstart 61 62 implicit none 63 ! 64 ! first the I/O variables 65 ! 66 integer :: ndmx, & ! input: the maximum dimension of the vectors 67 ndim, & ! input: the actual dimension of the vectors 68 kter, & ! output: counter on iterations 69 nbnd, & ! input: the number of bands 70 npol, & ! input: number of components of the wavefunctions 71 ik ! input: the k point 72 73 real(DP) :: & 74 e(nbnd), & ! input: the actual eigenvalue 75 anorm, & ! output: the norm of the error in the solution 76 h_diag(ndmx*npol,nbnd), & ! input: an estimate of ( H - \epsilon ) 77 ethr ! input: the required precision 78 79 complex(DP) :: & 80 dpsi (ndmx*npol, nbnd), & ! output: the solution of the linear syst 81 d0psi (ndmx*npol, nbnd) ! input: the known term 82 83 logical :: conv_root ! output: if true the root is converged 84 external ch_psi ! input: the routine computing ch_psi 85 external cg_psi ! input: the routine computing cg_psi 86 ! 87 ! here the local variables 88 ! 89 integer, parameter :: maxter = 400 90 ! the maximum number of iterations 91 integer :: iter, ibnd, ibnd_, lbnd 92 ! counters on iteration, bands 93 integer , allocatable :: conv (:) 94 ! if 1 the root is converged 95 96 complex(DP), allocatable :: g (:,:), t (:,:), h (:,:), hold (:,:) 97 ! the gradient of psi 98 ! the preconditioned gradient 99 ! the delta gradient 100 ! the conjugate gradient 101 ! work space 102 complex(DP) :: dcgamma, dclambda 103 ! the ratio between rho 104 ! step length 105 REAL(kind=dp), EXTERNAL :: ddot 106 ! the scalar product 107 real(DP), allocatable :: rho (:), rhoold (:), eu (:), a(:), c(:) 108 ! the residue 109 ! auxiliary for h_diag 110 real(DP) :: kter_eff 111 ! account the number of iterations with b 112 ! coefficient of quadratic form 113 114 ! bgrp parallelization auxiliary variables 115 INTEGER :: n_start, n_end, my_nbnd 116 logical :: lsave_use_bgrp_in_hpsi 117 ! 118 call start_clock ('cgsolve') 119 120 call divide (inter_bgrp_comm,nbnd,n_start,n_end) 121 my_nbnd = n_end - n_start + 1 122 123 ! allocate workspace (bgrp distributed) 124 allocate ( conv(nbnd) ) 125 allocate ( g(ndmx*npol,my_nbnd), t(ndmx*npol,my_nbnd), h(ndmx*npol,my_nbnd), & 126 hold(ndmx*npol,my_nbnd) ) 127 allocate ( a(my_nbnd), c(my_nbnd) ) 128 allocate ( rho(my_nbnd), rhoold(my_nbnd) ) 129 allocate ( eu(my_nbnd) ) 130 ! WRITE( stdout,*) g,t,h,hold 131 132 kter_eff = 0.d0 ; conv (1:nbnd) = 0 133 134 g=(0.d0,0.d0); t=(0.d0,0.d0); h=(0.d0,0.d0); hold=(0.d0,0.d0) 135 136 ! bgrp parallelization is done outside h_psi/s_psi. set use_bgrp_in_hpsi temporarily to false 137 lsave_use_bgrp_in_hpsi = use_bgrp_in_hpsi ; use_bgrp_in_hpsi = .false. 138 139 do iter = 1, maxter 140 ! 141 ! compute the gradient. can reuse information from previous step 142 ! 143 if (iter == 1) then 144 call ch_psi (ndim, dpsi(1,n_start), g, e(n_start), ik, my_nbnd) 145 do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1 146 call zaxpy (ndim, (-1.d0,0.d0), d0psi(1,ibnd), 1, g(1,ibnd_), 1) 147 enddo 148 IF (npol==2) THEN 149 do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1 150 call zaxpy (ndim, (-1.d0,0.d0), d0psi(ndmx+1,ibnd), 1, g(ndmx+1,ibnd_), 1) 151 enddo 152 END IF 153 endif 154 ! 155 ! compute preconditioned residual vector and convergence check 156 ! 157 lbnd = 0 158 do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1 159 if (conv (ibnd) .eq.0) then 160 lbnd = lbnd+1 161 call zcopy (ndmx*npol, g (1, ibnd_), 1, h (1, ibnd_), 1) 162 call cg_psi(ndmx, ndim, 1, h(1,ibnd_), h_diag(1,ibnd) ) 163 164 IF (gamma_only) THEN 165 rho(lbnd)=2.0d0*ddot(2*ndmx*npol,h(1,ibnd_),1,g(1,ibnd_),1) 166 IF(gstart==2) THEN 167 rho(lbnd)=rho(lbnd)-DBLE(h(1,ibnd_))*DBLE(g(1,ibnd_)) 168 ENDIF 169 ELSE 170 rho(lbnd) = ddot (2*ndmx*npol, h(1,ibnd_), 1, g(1,ibnd_), 1) 171 ENDIF 172 endif 173 enddo 174 kter_eff = kter_eff + DBLE (lbnd) / DBLE (nbnd) 175 call mp_sum( rho(1:lbnd), intra_bgrp_comm ) 176 do ibnd = n_end, n_start, -1 ; ibnd_ = ibnd - n_start + 1 177 if (conv(ibnd).eq.0) then 178 rho(ibnd_)=rho(lbnd) 179 lbnd = lbnd -1 180 anorm = sqrt (rho (ibnd_) ) 181 !write(6,*) 'anorm', ibnd, anorm ; FLUSH(6) 182 if (anorm.lt.ethr) conv (ibnd) = 1 183 endif 184 enddo 185! 186 conv_root = .true. 187 do ibnd = n_start, n_end 188 conv_root = conv_root.and. (conv (ibnd) .eq.1) 189 enddo 190 if (conv_root) goto 100 191 ! 192 ! compute the step direction h. Conjugate it to previous step 193 ! 194 lbnd = 0 195 do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1 196 if (conv (ibnd) .eq.0) then 197! 198! change sign to h 199! 200 call dscal (2 * ndmx * npol, - 1.d0, h (1, ibnd_), 1) 201 if (iter.ne.1) then 202 dcgamma = rho (ibnd_) / rhoold (ibnd_) 203 call zaxpy (ndmx*npol, dcgamma, hold (1, ibnd_), 1, h (1, ibnd_), 1) 204 endif 205 206! 207! here hold is used as auxiliary vector in order to efficiently compute t = A*h 208! it is later set to the current (becoming old) value of h 209! 210 lbnd = lbnd+1 211 call zcopy (ndmx*npol, h (1, ibnd_), 1, hold (1, lbnd), 1) 212 eu (lbnd) = e (ibnd) 213 endif 214 enddo 215 ! 216 ! compute t = A*h 217 ! 218 call ch_psi (ndim, hold, t, eu, ik, lbnd) 219 ! 220 ! compute the coefficients a and c for the line minimization 221 ! compute step length lambda 222 lbnd=0 223 do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1 224 if (conv (ibnd) .eq.0) then 225 lbnd=lbnd+1 226 IF (gamma_only) THEN 227 a(lbnd) = 2.0d0*ddot(2*ndmx*npol,h(1,ibnd_),1,g(1,ibnd_),1) 228 c(lbnd) = 2.0d0*ddot(2*ndmx*npol,h(1,ibnd_),1,t(1,lbnd),1) 229 IF (gstart == 2) THEN 230 a(lbnd)=a(lbnd)-DBLE(h(1,ibnd_))*DBLE(g(1,ibnd_)) 231 c(lbnd)=c(lbnd)-DBLE(h(1,ibnd_))*DBLE(t(1,lbnd)) 232 ENDIF 233 ELSE 234 a(lbnd) = ddot (2*ndmx*npol, h(1,ibnd_), 1, g(1,ibnd_), 1) 235 c(lbnd) = ddot (2*ndmx*npol, h(1,ibnd_), 1, t(1,lbnd), 1) 236 ENDIF 237 end if 238 end do 239 call mp_sum( a(1:lbnd), intra_bgrp_comm ) 240 call mp_sum( c(1:lbnd), intra_bgrp_comm ) 241 lbnd=0 242 do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1 243 if (conv (ibnd) .eq.0) then 244 lbnd=lbnd+1 245 dclambda = CMPLX( - a(lbnd) / c(lbnd), 0.d0,kind=DP) 246 ! 247 ! move to new position 248 ! 249 call zaxpy (ndmx*npol, dclambda, h(1,ibnd_), 1, dpsi(1,ibnd), 1) 250 ! 251 ! update to get the gradient 252 ! 253 !g=g+lam 254 call zaxpy (ndmx*npol, dclambda, t(1,lbnd), 1, g(1,ibnd_), 1) 255 ! 256 ! save current (now old) h and rho for later use 257 ! 258 call zcopy (ndmx*npol, h(1,ibnd_), 1, hold(1,ibnd_), 1) 259 rhoold (ibnd_) = rho (ibnd_) 260 endif 261 enddo 262 enddo 263100 continue 264 ! deallocate workspace not needed anymore 265 deallocate (eu) ; deallocate (rho, rhoold) ; deallocate (a,c) ; deallocate (g, t, h, hold) 266 267 ! wait for all bgrp to complete their task 268 CALL mp_barrier( inter_bgrp_comm ) 269 270 ! check if all root converged across all bgrp 271 call mp_sum( conv, inter_bgrp_comm ) 272 conv_root = .true. 273 do ibnd = 1, nbnd 274 conv_root = conv_root.and. (conv (ibnd) .eq.1) 275 enddo 276 deallocate (conv) 277 278 ! collect the result 279 if (n_start > 1 ) dpsi(:, 1:n_start-1) = (0.d0,0.d0) ; if (n_end < nbnd) dpsi(:, n_end+1:nbnd) = (0.d0,0.d0) 280 call mp_sum( dpsi, inter_bgrp_comm ) 281 282 call mp_sum( kter_eff, inter_bgrp_comm ) 283 kter = kter_eff 284 285 ! restore the value of use_bgrp_in_hpsi to its saved value 286 use_bgrp_in_hpsi = lsave_use_bgrp_in_hpsi 287 288 289 call stop_clock ('cgsolve') 290 return 291end subroutine cgsolve_all 292