1! 2! GNU General Public License. See the file `License' 3! in the root directory of the present distribution, 4! or http://www.gnu.org/copyleft/gpl.txt . 5! 6! 7!---------------------------------------------------------------------- 8subroutine ccgsolve_all (ch_psi, ccg_psi, e, d0psi, dpsi, h_diag, & 9 ndmx, ndim, ethr, ik, kter, conv_root, anorm, nbnd, npol, freq_c) 10 !---------------------------------------------------------------------- 11 ! 12 ! iterative solution of the linear systems (i=1,nbnd): 13 ! 14 ! ( H - e_i + Q ) * dpsi_i = d0psi_i (1) 15 ! 16 ! where H is a complex hermitean matrix, e_i is a real scalar, Q is a 17 ! projector on occupied states, dpsi_i and d0psi_ are complex vectors 18 ! 19 ! on input: 20 ! ch_psi EXTERNAL name of a subroutine: 21 ! Calculates (H-e+Q)*psi products. 22 ! Vectors psi and psip should be dimensioned 23 ! (ndmx,nbnd) 24 ! 25 ! cg_psi EXTERNAL name of a subroutine: 26 ! which calculates (h-e)^-1 * psi, with 27 ! some approximation, e.g. (diag(h)-e) 28 ! 29 ! e real unperturbed eigenvalue. 30 ! 31 ! dpsi contains an estimate of the solution 32 ! vector. 33 ! 34 ! d0psi contains the right hand side vector 35 ! of the system. 36 ! 37 ! ndmx integer row dimension of dpsi, ecc. 38 ! 39 ! ndim integer actual row dimension of dpsi 40 ! 41 ! ethr real convergence threshold. solution 42 ! improvement is stopped when the error in 43 ! eq (1), defined as l.h.s. - r.h.s., becomes 44 ! less than ethr in norm. 45 ! 46 ! on output: dpsi contains the refined estimate of the 47 ! solution vector. 48 ! 49 ! d0psi is corrupted on exit 50 ! 51 ! revised (extensively) 6 Apr 1997 by A. Dal Corso & F. Mauri 52 ! revised (to reduce memory) 29 May 2004 by S. de Gironcoli 53 ! 54 USE kinds, ONLY : DP 55 USE mp_bands, ONLY : intra_bgrp_comm, inter_bgrp_comm, use_bgrp_in_hpsi 56 USE mp, ONLY : mp_sum, mp_barrier 57 USE control_flags, ONLY : gamma_only 58 USE gvect, ONLY : gstart 59 60 implicit none 61 ! 62 ! first the I/O variables 63 ! 64 integer :: ndmx, & ! input: the maximum dimension of the vectors 65 ndim, & ! input: the actual dimension of the vectors 66 kter, & ! output: counter on iterations 67 nbnd, & ! input: the number of bands 68 npol, & ! input: number of components of the wavefunctions 69 ik ! input: the k point 70 71 real(DP) :: & 72 e(nbnd), & ! input: the actual eigenvalue 73 anorm, & ! output: the norm of the error in the solution 74 ethr ! input: the required precision 75 76 complex(DP) :: & 77 dpsi (ndmx*npol, nbnd), & ! output: the solution of the linear syst 78 h_diag(ndmx*npol,nbnd), & ! input: an estimate of ( H - \epsilon -w) 79 ! w is complex 80 freq_c , & ! complex frequency 81 d0psi (ndmx*npol, nbnd) ! input: the known term 82 83 logical :: conv_root ! output: if true the root is converged 84 external ch_psi ! input: the routine computing ch_psi 85 external ccg_psi ! input: the routine computing cg_psi 86 ! 87 ! here the local variables 88 ! 89 integer, parameter :: maxter = 1000 90 ! the maximum number of iterations 91 integer :: iter, ibnd, ibnd_, lbnd 92 ! counters on iteration, bands 93 integer , allocatable :: conv (:) 94 ! if 1 the root is converged 95 96 complex(DP), allocatable :: g (:,:), t (:,:), h (:,:), hold (:,:) 97 COMPLEX(DP), ALLOCATABLE :: gs (:,:), ts (:,:), hs (:,:), hsold (:,:) 98 ! the gradient of psi 99 ! the preconditioned gradient 100 ! the delta gradient 101 ! the conjugate gradient 102 ! work space 103 complex(DP) :: dcgamma, dcgamma1, dclambda, dclambda1 104 ! the ratio between rho 105 ! step length 106 complex(DP), external :: zdotc 107 REAL(kind=dp), EXTERNAL :: ddot 108 ! the scalar product 109 real(DP), allocatable :: eu (:) 110 complex(DP), allocatable :: rho (:), rhoold (:), euc (:), a(:), c(:) 111 ! the residue 112 ! auxiliary for h_diag 113 real(DP) :: kter_eff 114 ! account the number of iterations with b 115 ! coefficient of quadratic form 116 integer, allocatable :: indb(:) 117 118 ! bgrp parallelization auxiliary variables 119 INTEGER :: n_start, n_end, my_nbnd 120 logical :: lsave_use_bgrp_in_hpsi 121 ! 122 call start_clock ('ccgsolve') 123 124 call divide (inter_bgrp_comm,nbnd,n_start,n_end) 125 my_nbnd = n_end - n_start + 1 126 127! my_nbnd =nbnd 128 129 ! allocate workspace (bgrp distributed) 130 allocate ( conv(nbnd) ) 131 allocate ( g(ndmx*npol,my_nbnd), t(ndmx*npol,my_nbnd), h(ndmx*npol,my_nbnd), & 132 hold(ndmx*npol,my_nbnd) ) 133 allocate ( gs(ndmx*npol,my_nbnd), ts(ndmx*npol,my_nbnd), hs(ndmx*npol,my_nbnd), & 134 hsold(ndmx*npol,my_nbnd) ) 135 allocate ( a(my_nbnd), c(my_nbnd) ) 136 allocate ( rho(my_nbnd), rhoold(my_nbnd) ) 137 allocate ( eu(my_nbnd) ) 138 allocate ( euc(my_nbnd) ) 139 allocate ( indb(my_nbnd) ) 140 ! WRITE( stdout,*) g,t,h,hold 141 142 kter_eff = 0.d0 ; conv (1:nbnd) = 0 143 144 g=(0.d0,0.d0); t=(0.d0,0.d0); h=(0.d0,0.d0); hold=(0.d0,0.d0) 145 gs=(0.d0,0.d0); ts=(0.d0,0.d0); hs=(0.d0,0.d0); hsold=(0.d0,0.d0) 146 147 rho= (0.0d0, 0.0d0); rhoold=(0.0d0,0.d0) 148 149 ! bgrp parallelization is done outside h_psi/s_psi. set use_bgrp_in_hpsi temporarily to false 150 lsave_use_bgrp_in_hpsi = use_bgrp_in_hpsi ; use_bgrp_in_hpsi = .false. 151 152 do ibnd = 1, nbnd 153 indb(ibnd) = ibnd 154 enddo 155 156 eu = e 157 do iter = 1, maxter 158 ! 159 ! compute the gradient. can reuse information from previous step 160 ! 161 if (iter == 1) then 162 do ibnd = n_start, n_end 163 euc(ibnd) = CMPLX(e(indb(ibnd))+DREAL(freq_c), DIMAG(freq_c), KIND=DP) 164 ENDDO 165 166 call ch_psi (ndim, dpsi, g, euc, ik, my_nbnd) 167 168 do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1 169 call zaxpy (ndim, (-1.d0,0.d0), d0psi(1,ibnd), 1, g(1,ibnd_), 1) 170 enddo 171 IF (npol==2) THEN 172 do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1 173 call zaxpy (ndim, (-1.d0,0.d0), d0psi(ndmx+1,ibnd), 1, g(ndmx+1,ibnd_), 1) 174 enddo 175 END IF 176 gs(:,:) = CONJG(g(:,:)) 177 endif 178 ! 179 ! compute preconditioned residual vector and convergence check 180 ! 181 lbnd = 0 182 do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1 183 if (conv (ibnd) .eq.0) then 184 lbnd = lbnd+1 185 call zcopy (ndmx*npol, g (1, ibnd_), 1, h (1, ibnd_), 1) 186 call zcopy (ndmx*npol, gs (1, ibnd_), 1, hs (1, ibnd_), 1) 187 188 call ccg_psi(ndmx, ndim, 1, h(1,ibnd_), h_diag(1,ibnd), 1 ) 189 call ccg_psi(ndmx, ndim, 1, hs(1,ibnd_), h_diag(1,ibnd), -1 ) 190 191 IF (gamma_only) THEN 192 rho(lbnd)=2.0d0*ddot(2*ndmx*npol,h(1,ibnd_),1,g(1,ibnd_),1) 193 IF(gstart==2) THEN 194 rho(lbnd)=rho(lbnd)-DBLE(h(1,ibnd_))*DBLE(g(1,ibnd_)) 195 ENDIF 196 ELSE 197 rho(lbnd) = zdotc (ndmx*npol, hs(1,ibnd_), 1, g(1,ibnd_), 1) 198 ENDIF 199 200 endif 201 enddo 202 kter_eff = kter_eff + DBLE (lbnd) / DBLE (nbnd) 203 call mp_sum( rho(1:lbnd), intra_bgrp_comm ) 204 do ibnd = n_end, n_start, -1 ; ibnd_ = ibnd - n_start + 1 205 if (conv(ibnd).eq.0) then 206 rho(ibnd_)=rho(lbnd) 207 lbnd = lbnd -1 208 anorm = sqrt ( abs (rho (ibnd_)) ) 209 if (anorm.lt.ethr) conv (ibnd) = 1 210 endif 211 enddo 212 ! 213 conv_root = .true. 214 do ibnd = n_start, n_end 215 conv_root = conv_root.and. (conv (ibnd) .eq.1) 216 enddo 217 if (conv_root) goto 100 218 ! 219 ! compute the step direction h. Conjugate it to previous step 220 ! 221 lbnd = 0 222 do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1 223 if (conv (ibnd) .eq.0) then 224 ! 225 ! change sign to h and hs 226 ! 227 call dscal (2 * ndmx * npol, - 1.d0, h (1, ibnd_), 1) 228 call dscal (2 * ndmx * npol, - 1.d0, hs (1, ibnd_), 1) 229 230 if (iter.ne.1) then 231 dcgamma = rho (ibnd_) / rhoold (ibnd_) 232 dcgamma1 = CONJG(dcgamma) 233 234 call zaxpy (ndmx*npol, dcgamma, hold (1, ibnd_), 1, h (1, ibnd_), 1) 235 CALL zaxpy (ndmx*npol, dcgamma1, hsold (1, ibnd_), 1, hs (1, ibnd_), 1) 236 237 endif 238 239 ! 240 ! here hold is used as auxiliary vector in order to efficiently compute t = A*h 241 ! it is later set to the current (becoming old) value of h 242 ! 243 lbnd = lbnd+1 244 call zcopy (ndmx*npol, h (1, ibnd_), 1, hold (1, lbnd), 1) 245 CALL zcopy (ndmx*npol, hs (1, ibnd_), 1, hsold (1, lbnd), 1) 246 247 indb (lbnd) = ibnd 248 endif 249 enddo 250 251 ! 252 ! compute t = A*h and ts= A^+ * h 253 ! 254 255 DO ibnd=1,lbnd 256 euc(ibnd) = CMPLX(e(indb(ibnd))+DREAL(freq_c), DIMAG(freq_c), KIND=DP) 257 ENDDO 258 259 call ch_psi (ndim, hold, t, euc, ik, lbnd) 260 261 call ch_psi (ndim, hsold, ts, conjg(euc), ik, lbnd) 262 263 ! 264 ! compute the coefficients a and c for the line minimization 265 ! compute step length lambda 266 ! 267 268 lbnd=0 269 do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1 270 if (conv (ibnd) .eq.0) then 271 lbnd=lbnd+1 272 273 IF (gamma_only) THEN 274 a(lbnd) = 2.0d0*ddot(2*ndmx*npol,hs(1,ibnd_),1,g(1,ibnd_),1) 275 c(lbnd) = 2.0d0*ddot(2*ndmx*npol,hs(1,ibnd_),1,t(1,lbnd),1) 276 IF (gstart == 2) THEN 277 a(lbnd)=a(lbnd)-DBLE(hs(1,ibnd_))*DBLE(g(1,ibnd_)) 278 c(lbnd)=c(lbnd)-DBLE(hs(1,ibnd_))*DBLE(t(1,lbnd)) 279 280 ENDIF 281 ELSE 282 a(lbnd) = zdotc (ndmx*npol, hs(1,ibnd_), 1, g(1,ibnd_), 1) 283 c(lbnd) = zdotc (ndmx*npol, hs(1,ibnd_), 1, t(1,lbnd), 1) 284 285 ENDIF 286 287 288 end if 289 end do 290 291 ! 292 call mp_sum( a(1:lbnd), intra_bgrp_comm ) 293 call mp_sum( c(1:lbnd), intra_bgrp_comm ) 294 lbnd=0 295 do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1 296 if (conv (ibnd) .eq.0) then 297 lbnd=lbnd+1 298 dclambda = - a(lbnd) / c(lbnd) 299 dclambda1 = CONJG(dclambda) 300 ! 301 ! move to new position 302 ! 303 304 call zaxpy (ndmx*npol, dclambda, h(1,ibnd_), 1, dpsi(1,ibnd), 1) 305 306 ! 307 ! update to get the gradient 308 ! 309 !g=g+lam 310 call zaxpy (ndmx*npol, dclambda, t(1,lbnd), 1, g(1,ibnd_), 1) 311 CALL zaxpy (ndmx*npol, dclambda1, ts(1,lbnd), 1, gs(1,ibnd_), 1) 312 313 ! 314 ! save current (now old) h and rho for later use 315 ! 316 call zcopy (ndmx*npol, h(1,ibnd_), 1, hold(1,ibnd_), 1) 317 CALL zcopy (ndmx*npol, hs(1,ibnd_), 1, hsold(1,ibnd_), 1) 318 319 rhoold (ibnd_) = rho (ibnd_) 320 321 endif 322 enddo 323 enddo 324 325100 continue 326 ! deallocate workspace not needed anymore 327 deallocate (eu) ; deallocate (rho, rhoold) ; deallocate (a,c) ; deallocate (g, t, h, hold) 328 deallocate (euc) 329 330 ! wait for all bgrp to complete their task 331 CALL mp_barrier( inter_bgrp_comm ) 332 333 ! check if all root converged across all bgrp 334 call mp_sum( conv, inter_bgrp_comm ) 335 conv_root = .true. 336 do ibnd = 1, nbnd 337 conv_root = conv_root.and. (conv (ibnd) .eq.1) 338 enddo 339 deallocate (conv) 340 341 ! collect the result 342 if (n_start > 1 ) dpsi(:, 1:n_start-1) = (0.d0,0.d0) ; if (n_end < nbnd) dpsi(:, n_end+1:nbnd) = (0.d0,0.d0) 343 call mp_sum( dpsi, inter_bgrp_comm ) 344 345 call mp_sum( kter_eff, inter_bgrp_comm ) 346 kter = kter_eff 347 348 ! restore the value of use_bgrp_in_hpsi to its saved value 349 use_bgrp_in_hpsi = lsave_use_bgrp_in_hpsi 350 351 352 call stop_clock ('ccgsolve') 353 return 354end subroutine ccgsolve_all 355