1! --- 2! Copyright (C) 1996-2016 The SIESTA group 3! This file is distributed under the terms of the 4! GNU General Public License: see COPYING in the top directory 5! or http://www.gnu.org/copyleft/gpl.txt . 6! See Docs/Contributors.txt for a list of contributors. 7! --- 8module m_dminim 9 10use fdf, only : fdf_boolean, fdf_integer, fdf_get, fdf_physical 11use files, only : slabel 12use parallel, only : ProcessorY, BlockSize, Node, Nodes 13use precision, only : dp 14use siesta_options, only : fixspin 15use sys, only : die 16#ifdef MPI 17use mpi_siesta, only : mpi_integer, mpi_double_precision, mpi_comm_world, mpi_sum, mpi_status_size 18use parallelsubs, only : GetNodeOrbs, GlobalToLocalOrb, WhichNodeOrb 19use parallelsubs, only : set_BlockSizeDefault 20#endif 21 22implicit none 23 24external :: io_assign, io_close 25 26!**** PRIVATE ***********************************! 27 28private 29 30type multispin 31 real(dp), allocatable :: mtrx(:,:) 32end type multispin 33 34real(dp), parameter :: Pi=3.141592653589793238462643383279502884197_dp 35 36logical, save :: LongOut ! print detailed output? 37logical, save :: UseCholesky ! use Cholesky factorization? 38logical, save :: UseSparse ! use sparse algebra? 39logical, save :: WriteCoeffs ! write WF coeffs. at the end of each MD iteration? 40logical, save :: ReadCoeffs ! read WF coeffs. at the start of the calculation? 41logical, save :: FirstCall(1:2)=.true. ! first time minim_cg is called? (up/down spin) 42#ifdef MPI 43logical, save :: Use2D ! use 2D data decomposition? 44#endif 45 46integer, save :: N_occ_loc_1D(1:2) ! num. of WFs (local 1D) (up/down spin) 47integer, save :: h_dim_loc_1D ! num. of AOs (local 1D) 48integer, save :: N_occ_loc(1:2,1:2) ! num. of WFs (local 1D or 2D) (up/down spin) 49integer, save :: h_dim_loc(1:2) ! num. of AOs (local 1D or 2D) (up/down spin) 50integer, save :: N_occ_diff ! difference between num. of WFs for up/down spin 51#ifdef MPI 52integer, save :: BlockSize_c ! ScaLAPACK blocking factor for WFs 53integer, save :: desc1(1:9) ! descriptor for operator matrix in AO basis (1D or 2D) 54integer, save :: desc2(1:9,1:2) ! descriptor for operator matrix in WF basis (1D or 2D) (up/down spin) 55integer, save :: desc3(1:9,1:2) ! descriptor for WF coeffs. matrix (1D or 2D) (up/down spin) 56integer, allocatable :: numh_recv(:) ! num. of nonzero elements of each row of sparse matrices 57integer, allocatable :: listhptr_recv(:) ! pointer to start of row in listh 58integer, allocatable :: listh_recv(:) ! list of nonzero elements of each row of sparse matrices 59integer, allocatable :: h_dim_l2g_recv(:) ! local-to-global index transform for AOs 60integer, allocatable, save :: h_dim_l2g(:) ! local-to-global index transform for AOs 61#endif 62 63#ifdef MPI 64real(dp), allocatable :: As_recv(:) ! work matrix 65#endif 66 67type(multispin), save :: c(1:2) ! WF coeffs. matrix (after Cholesky fact.) (up/down spin) 68type(multispin), save :: c_orig(1:2) ! WF coeffs. matrix (before Cholesky fact.) (up/down spin) 69 70!**** PUBLIC ************************************! 71 72public :: dminim 73 74!************************************************! 75 76contains 77 78!================================================! 79! use the orbital minimization method (OMM) to ! 80! solve the eigenvalue problem (double precision ! 81! routine, for Gamma point-only calculations) ! 82!================================================! 83subroutine dminim(CalcE,PreviousCallDiagon,iscf,istp,nbasis,nspin,h_dim,nhmax,numh,listhptr,listh,d_sparse,eta,qs,h_sparse,& 84 s_sparse,t_sparse) 85 86 use densematrix, only : psi, allocDenseMatrix, resetDenseMatrix 87 implicit none 88 89 !**** INPUT ***********************************! 90 91 logical, intent(in) :: CalcE ! Calculate the energy-density matrix from the existing coeffs.? 92 logical, intent(in) :: PreviousCallDiagon ! Previous SCF iteration solved by diagonalization? 93 94 integer, intent(in) :: iscf ! SCF iteration num. 95 integer, intent(in) :: istp ! MD iteration num. 96 integer, intent(in) :: nbasis ! dimension of numh and listhptr 97 integer, intent(in) :: nspin ! num. of spins 98 integer, intent(in) :: h_dim ! num. of AOs (global) 99 integer, intent(in) :: nhmax ! first dimension of listh and sparse matrices 100 integer, intent(in) :: numh(1:nbasis) ! num. of nonzero elements of each row of sparse matrices 101 integer, intent(in) :: listhptr(1:nbasis) ! pointer to start of row in listh 102 integer, intent(in) :: listh(1:nhmax) ! list of nonzero elements of each row of sparse matrices 103 104 real(dp), intent(in) :: qs(1:2) ! num. of electrons per spin 105 real(dp), intent(in) :: eta(1:2) ! chemical potential for Kim functional 106 real(dp), intent(in), optional :: h_sparse(1:nhmax,1:nspin) ! hamiltonian matrix (sparse) 107 real(dp), intent(in), optional :: t_sparse(1:nhmax) ! kinetic energy matrix (sparse) 108 real(dp), intent(in), optional :: s_sparse(1:nhmax) ! overlap matrix (sparse) 109 110 !**** OUTPUT **********************************! 111 112 real(dp), intent(out) :: d_sparse(1:nhmax,1:nspin) ! (energy-)density matrix (sparse) 113 114 !**** LOCAL ***********************************! 115 116 logical :: UpdatePrecon ! update the preconditioner? 117 logical :: UsePrecon ! use the preconditioner? 118 logical :: UpdateSparseComm ! update nhmax_max? 119 120 integer :: ispin 121 integer :: io 122 integer :: jo 123 integer :: j 124 integer :: k 125 integer :: ind 126 integer, save :: N_occ(1:2) ! num. of WFs (global) (up/down spin) 127 integer, save :: precon_default ! default number of SCF steps for which to use preconditioning 128 integer, save :: precon_first_step ! number of SCF steps for which to use preconditioning (first MD step) 129 integer, save :: last_precon_update=0 ! istp value of last preconditioner update 130 integer, save :: last_call(1:2)=0 ! iscf and istp value of last module call 131#ifdef MPI 132 integer :: BlockSize_c_default 133#endif 134 135 real(dp), allocatable :: h_dense1D(:,:) ! hamiltonian matrix (dense 1D) 136 real(dp), allocatable :: t_dense1D(:,:) ! kinetic energy matrix (dense 1D) 137 real(dp), allocatable :: s_dense1D(:,:) ! overlap matrix (dense 1D) 138 real(dp), allocatable :: d_dense1D(:,:) ! (energy-)density matrix (dense 1D) 139 140 !**********************************************! 141 142 call timer('dmin',1) 143 144 if (all(FirstCall(1:2))) then 145 146 LongOut=fdf_boolean('OMM.LongOutput',.false.) 147 148 UseCholesky=fdf_boolean('OMM.UseCholesky',.true.) 149 UseSparse=fdf_boolean('OMM.UseSparse',.false.) 150 if (UseSparse .and. UseCholesky) call die('ERROR: sparse algebra not compatible with Cholesky factorization!') 151 152 WriteCoeffs=fdf_boolean('OMM.WriteCoeffs',.false.) 153 ReadCoeffs=fdf_boolean('OMM.ReadCoeffs',.false.) 154 155 if (.not. UseCholesky) then 156 precon_default=fdf_integer('OMM.Precon',-1) 157 precon_first_step=fdf_integer('OMM.PreconFirstStep',precon_default) 158 end if 159 160 if (nspin==1) then 161 N_occ(1)=nint(0.5_dp*qs(1)) 162 if (abs(N_occ(1)-0.5_dp*qs(1))>1.0d-10) call die('ERROR: only integer number of electrons per spin allowed!') 163 else 164 if (.not. fixspin) call die('ERROR: OMM for spin unpolarized calculations only supports fixed spin!') 165 do ispin=1,nspin 166 N_occ(ispin)=nint(qs(ispin)) 167 if (abs(N_occ(ispin)-qs(ispin))>1.0d-10) call die('ERROR: only integer number of electrons per spin allowed!') 168 end do 169 N_occ_diff=N_occ(1)-N_occ(2) 170 end if 171 172 h_dim_loc_1D=nbasis 173 174#ifdef MPI 175 Use2D=fdf_boolean('OMM.Use2D',.true.) 176 if (UseSparse .and. Use2D) call die('ERROR: sparse algebra not compatible with 2D data decomposition!') 177 178 ! calculate the ScaLAPACK blocking factor for distributing the WF coeffs. matrix 179 call set_blocksizedefault(Nodes,N_occ(1),BlockSize_c_default) 180 BlockSize_c=fdf_integer('OMM.BlockSize',BlockSize_c_default) 181#endif 182 183 end if 184 185 if (.not. UseSparse) allocate(d_dense1D(1:h_dim,1:h_dim_loc_1D)) 186 187 if (CalcE) then 188 189 UsePrecon=.false. 190 UpdatePrecon=.false. 191 UpdateSparseComm=.false. 192 193 else 194 195 if (.not. UseSparse) allocate(h_dense1D(1:h_dim,1:h_dim_loc_1D)) 196 197 ! decide whether preconditioning should be used in this minimization call 198 if (UseCholesky) then 199 UsePrecon=.false. 200 UpdatePrecon=.false. 201 else 202 if (istp==1) then 203 if ((iscf<=precon_first_step) .or. (precon_first_step<0)) then 204 UsePrecon=.true. 205 else 206 UsePrecon=.false. 207 end if 208 else 209 if ((iscf<=precon_default) .or. (precon_default<0)) then 210 UsePrecon=.true. 211 else 212 UsePrecon=.false. 213 end if 214 end if 215 end if 216 217 ! if this is the first time the module is called for this MD step, convert the new overlap 218 ! matrix from sparse to dense 219 if (UseSparse) then 220 if (istp/=last_call(2)) then 221 UpdateSparseComm=.true. 222 else 223 UpdateSparseComm=.false. 224 end if 225 if (UsePrecon .and. (istp/=last_precon_update)) then 226 allocate(s_dense1D(1:h_dim_loc_1D,1:h_dim)) 227 s_dense1D=0.0_dp 228 do io=1,h_dim_loc_1D 229 do j=1,numh(io) 230 ind=listhptr(io)+j 231 jo=listh(ind) 232 s_dense1D(io,jo)=s_dense1D(io,jo)+s_sparse(ind) 233 end do 234 end do 235 end if 236 else 237 if (istp/=last_call(2)) then 238 allocate(s_dense1D(1:h_dim,1:h_dim_loc_1D)) 239 s_dense1D=0.0_dp 240 do io=1,h_dim_loc_1D 241 do j=1,numh(io) 242 ind=listhptr(io)+j 243 jo=listh(ind) 244 s_dense1D(jo,io)=s_dense1D(jo,io)+s_sparse(ind) 245 end do 246 end do 247 end if 248 end if 249 250 ! if this is the first time we are using preconditioning for this MD step, convert also the new 251 ! kinetic energy matrix from sparse to dense 252 if (UsePrecon) then 253 if (istp/=last_precon_update) then 254 if (UseSparse) then 255 allocate(t_dense1D(1:h_dim_loc_1D,1:h_dim)) 256 t_dense1D=0.0_dp 257 do io=1,h_dim_loc_1D 258 do j=1,numh(io) 259 ind=listhptr(io)+j 260 jo=listh(ind) 261 t_dense1D(io,jo)=t_dense1D(io,jo)+t_sparse(ind) 262 end do 263 end do 264 else 265 allocate(t_dense1D(1:h_dim,1:h_dim_loc_1D)) 266 t_dense1D=0.0_dp 267 do io=1,h_dim_loc_1D 268 do j=1,numh(io) 269 ind=listhptr(io)+j 270 jo=listh(ind) 271 t_dense1D(jo,io)=t_dense1D(jo,io)+t_sparse(ind) 272 end do 273 end do 274 end if 275 UpdatePrecon=.true. 276 last_precon_update=istp 277 else 278 UpdatePrecon=.false. 279 end if 280 else 281 UpdatePrecon=.false. 282 end if 283 284 end if 285 286 ! The psi-array *must* be allocated even if it is not accessed when 287 ! PreviousCallDiagon is .false. or else compilers might flag as an 288 ! error the unallocated 'psi' dummy argument in some of the routines 289 ! below. 290 ! 291 ! This call will keep the full 'psi' when needed (i.e., when 292 ! PreviousCallDiagon is .true. and 'psi' holds the eigenvectors 293 ! from a previous call to diagon that did not deallocate 'psi') 294 ! and allocate a minimal version when 'psi' has been deallocated 295 ! In order to achieve the former, the "shrink=.false." option in 296 ! allocDenseMatrix is essential. 297 call allocDenseMatrix(1, 1, 1) 298 299 do ispin=1,nspin 300 301 if (.not. calcE) then 302 ! convert the hamiltonian matrix from sparse to dense (and shift the eingevalue spectrum 303 ! w.r.t. the chemical potential reference) 304 if (.not. UseSparse) then 305 h_dense1D=0.0_dp 306 do io=1,h_dim_loc_1D 307 do j=1,numh(io) 308 ind=listhptr(io)+j 309 jo=listh(ind) 310 h_dense1D(jo,io)=h_dense1D(jo,io)+h_sparse(ind,ispin)-eta(ispin)*s_sparse(ind) 311 end do 312 end do 313 end if 314 end if 315 316 ! call the routine to perform the energy minimization 317 if (UseSparse) then 318 if (CalcE) then 319 call minim_cg_sparse(nhmax,numh,listhptr,listh,CalcE,PreviousCallDiagon,iscf,h_dim,N_occ(ispin),eta(ispin),& 320 psi,nspin,ispin,UpdatePrecon,UsePrecon,UpdateSparseComm,d_sparse(1:nhmax,ispin)) 321 else 322 if (allocated(s_dense1D)) then 323 if (allocated(t_dense1D)) then 324 call minim_cg_sparse(nhmax,numh,listhptr,listh,CalcE,PreviousCallDiagon,iscf,h_dim,N_occ(ispin),eta(ispin),& 325 psi,nspin,ispin,UpdatePrecon,UsePrecon,UpdateSparseComm,d_sparse(1:nhmax,ispin),& 326 h_sparse(1:nhmax,ispin)-eta(ispin)*s_sparse(1:nhmax),s_sparse,s_dense1D,t_dense1D) 327 else 328 call minim_cg_sparse(nhmax,numh,listhptr,listh,CalcE,PreviousCallDiagon,iscf,h_dim,N_occ(ispin),eta(ispin),& 329 psi,nspin,ispin,UpdatePrecon,UsePrecon,UpdateSparseComm,d_sparse(1:nhmax,ispin),& 330 h_sparse(1:nhmax,ispin)-eta(ispin)*s_sparse(1:nhmax),s_sparse,s_dense1D) 331 end if 332 else 333 if (allocated(t_dense1D)) then 334 call minim_cg_sparse(nhmax,numh,listhptr,listh,CalcE,PreviousCallDiagon,iscf,h_dim,N_occ(ispin),eta(ispin),& 335 psi,nspin,ispin,UpdatePrecon,UsePrecon,UpdateSparseComm,d_sparse(1:nhmax,ispin),& 336 h_sparse(1:nhmax,ispin)-eta(ispin)*s_sparse(1:nhmax),s_sparse,t_dense1D=t_dense1D) 337 else 338 call minim_cg_sparse(nhmax,numh,listhptr,listh,CalcE,PreviousCallDiagon,iscf,h_dim,N_occ(ispin),eta(ispin),& 339 psi,nspin,ispin,UpdatePrecon,UsePrecon,UpdateSparseComm,d_sparse(1:nhmax,ispin),& 340 h_sparse(1:nhmax,ispin)-eta(ispin)*s_sparse(1:nhmax),s_sparse) 341 end if 342 end if 343 end if 344 else 345 if (CalcE) then 346 call minim_cg(CalcE,PreviousCallDiagon,iscf,h_dim,N_occ(ispin),eta(ispin),psi,nspin,ispin,UpdatePrecon,UsePrecon,d_dense1D) 347 else 348 if (allocated(s_dense1D)) then 349 if (allocated(t_dense1D)) then 350 call minim_cg(CalcE,PreviousCallDiagon,iscf,h_dim,N_occ(ispin),eta(ispin),psi,nspin,ispin,UpdatePrecon,UsePrecon,& 351 d_dense1D,h_dense1D,s_dense1D,t_dense1D) 352 else 353 call minim_cg(CalcE,PreviousCallDiagon,iscf,h_dim,N_occ(ispin),eta(ispin),psi,nspin,ispin,UpdatePrecon,UsePrecon,& 354 d_dense1D,h_dense1D,s_dense1D) 355 end if 356 else 357 if (allocated(t_dense1D)) then 358 call minim_cg(CalcE,PreviousCallDiagon,iscf,h_dim,N_occ(ispin),eta(ispin),psi,nspin,ispin,UpdatePrecon,UsePrecon,& 359 d_dense1D,h_dense1D,t_dense1D=t_dense1D) 360 else 361 call minim_cg(CalcE,PreviousCallDiagon,iscf,h_dim,N_occ(ispin),eta(ispin),psi,nspin,ispin,UpdatePrecon,UsePrecon,& 362 d_dense1D,h_dense1D) 363 end if 364 end if 365 end if 366 end if 367 368 ! convert the (energy-)density matrix from dense to sparse 369 if (.not. UseSparse) then 370 do io=1,h_dim_loc_1D 371 do j=1,numh(io) 372 ind=listhptr(io)+j 373 jo=listh(ind) 374 d_sparse(ind,ispin)=d_dense1D(jo,io) 375 end do 376 end do 377 end if 378 379 end do 380 381 if (.not. CalcE) then 382 if (allocated(t_dense1D)) deallocate(t_dense1D) 383 if (allocated(s_dense1D)) deallocate(s_dense1D) 384 if (.not. UseSparse) deallocate(h_dense1D) 385 end if 386 if (.not. UseSparse) deallocate(d_dense1D) 387 388 last_call(1:2)=(/iscf,istp/) 389 390 ! Clean-up dense matrices holding seed eigenvectors 391 call resetDenseMatrix() 392 393 call timer('dmin',2) 394 395 end subroutine dminim 396 397!================================================! 398! minimize the Kim functional by conjugate ! 399! gradients (dense routine) ! 400!================================================! 401subroutine minim_cg(CalcE,PreviousCallDiagon,iscf,h_dim,N_occ,eta,psi,nspin,ispin,UpdatePrecon,UsePrecon,d_dense1D,h_dense1D,& 402 s_dense1D,t_dense1D) 403 implicit none 404 405 !**** INPUT ***********************************! 406 407 logical, intent(in) :: CalcE ! calculate the energy-density matrix from the existing coeffs.? 408 logical, intent(in) :: PreviousCallDiagon ! Previous SCF iteration solved by diagonalization? 409 logical, intent(in) :: UpdatePrecon ! update the preconditioner? 410 logical, intent(in) :: UsePrecon ! use the preconditioner? 411 412 integer, intent(in) :: iscf ! SCF iteration num. 413 integer, intent(in) :: h_dim ! num. of AOs (global) 414 integer, intent(in) :: N_occ ! num. of WFs (global) 415 integer, intent(in) :: nspin ! num. of spins 416 integer, intent(in) :: ispin ! up/down spin 417 418 real(dp), intent(in) :: eta ! chemical potential for Kim functional 419 real(dp), intent(in) :: psi(1:h_dim,1:h_dim_loc_1D,1:nspin) ! eigenvectors from diagonalization 420 real(dp), intent(in), optional :: h_dense1D(:,:) ! hamiltonian matrix in AO basis (dense 1D) 421 real(dp), intent(in), optional :: s_dense1D(:,:) ! overlap matrix in AO basis (dense 1D) 422 real(dp), intent(in), optional :: t_dense1D(:,:) ! kinetic energy matrix in AO basis (dense 1D) 423 424 !**** OUTPUT **********************************! 425 426 real(dp), intent(out) :: d_dense1D(:,:) ! (energy-)density matrix in AO basis (dense 1D) 427 428 !**** LOCAL ***********************************! 429 430 character(len=100) :: WF_COEFFS_filename 431#ifdef MPI 432 character(len=5) :: Node_name 433#endif 434 435 logical :: new_s 436 logical :: conv 437 logical :: ls_conv 438 logical :: ls_fail 439 logical :: ReadCoeffs2 440 441 integer :: i 442 integer :: j 443 integer :: k 444 integer :: l 445 integer :: m 446 integer :: n 447 integer :: info 448 integer :: icg ! CG step num. 449 integer :: n_step_max=100 ! max. num. steps for CG minimization 450 integer :: lwork 451 integer, allocatable :: ipiv(:) 452#ifdef MPI 453 integer :: liwork 454 integer :: mpi_status(1:mpi_status_size) ! MPI status 455 integer, save :: ictxt ! handle for main BLACS context (1D or 2D) 456 integer, save :: ictxt_1D ! handle for additional BLACS context (1D) 457 integer, save :: ictxt_1D_T ! handle for additional BLACS context (1D transposed) 458 integer, save :: desc1_1D(1:9) ! descriptor for operator matrix in AO basis (1D) 459 integer, save :: desc3_1D_T(1:9,1:2) ! descriptor for WF coeffs. matrix (1D transposed) 460 integer, allocatable :: iwork(:) 461 integer, external :: numroc 462#else 463 integer, external :: ilaenv 464#endif 465 466 real(dp) :: rn 467 real(dp) :: rn2 468 real(dp) :: E_diff 469 real(dp) :: TrQS 470 real(dp) :: lambda 471 real(dp) :: lambda_n 472 real(dp) :: lambda_d 473 real(dp) :: lambda_n_tot 474 real(dp) :: lambda_d_tot 475 real(dp) :: E_OMM ! OMM functional energy 476 real(dp) :: E_OMM_old ! OMM functional energy at previous step 477 real(dp) :: coeff(0:4) ! coeffs. of the quartic equation 478 real(dp), save :: x_min(1:2) ! position of minimum 479 real(dp), save :: t_precon_scale ! kinetic energy scale for the preconditioning 480 real(dp), save :: cg_tol ! convergence tolerance of CG minimization 481 real(dp), allocatable :: work1(:,:) ! work matrix 482 real(dp), allocatable :: Sd(:,:) ! g^T*s*g 483 real(dp), allocatable :: Sdd(:,:) ! g^T*s*g 484 real(dp), allocatable :: g(:,:) ! gradient 485 real(dp), allocatable :: g_p(:,:) ! gradient at previous step 486 real(dp), allocatable :: pg(:,:) ! preconditioned gradient 487 real(dp), allocatable :: pg_p(:,:) ! preconditioned gradient at previous step 488 real(dp), allocatable :: d(:,:) ! conjugate search direction 489 real(dp), allocatable :: hc(:,:) ! h*c 490 real(dp), allocatable :: hg(:,:) ! h*g 491 real(dp), allocatable :: h_dense(:,:) ! hamiltonian matrix in AO basis (dense 2D or Cholesky transformed) 492 real(dp), allocatable, save :: s_dense(:,:) ! overlap matrix in AO basis (dense 1D or 2D) 493 real(dp), allocatable, save :: p_dense(:,:) ! preconditioning matrix in AO basis (dense 1D or 2D) 494 real(dp), external :: ddot 495#ifdef MPI 496 real(dp) :: Tr_loc 497 real(dp), save :: scale_Cholesky ! Cholesky factorization eigenvalue scaling factor 498 real(dp), allocatable :: d_dense2D(:,:) ! (energy-)density matrix in AO basis (dense 2D) 499 real(dp), allocatable :: work2(:,:) ! work matrix 500 real(dp), allocatable :: work3(:,:) ! work matrix 501#endif 502 503 type(multispin), save :: H(1:2) ! hamiltonian matrix in WF basis 504 type(multispin), save :: S(1:2) ! overlap matrix in WF basis 505 type(multispin), save :: Hd(1:2) ! g^T*h*c 506 type(multispin), save :: Hdd(1:2) ! g^T*h*g 507 type(multispin), save :: sc(1:2) ! s*c 508 type(multispin), save :: sg(1:2) ! s*g 509 type(multispin), save :: twoI(1:2) ! identity matrix x2 510 type(multispin), save :: cd(1:2) ! work matrix 511 512 !**********************************************! 513 514 call timer('m_cg',1) 515 516 ! if this is the first time the minimization module is called, several things need to be done 517 ! (detailed below) 518 if (FirstCall(ispin)) then 519 520#ifdef MPI 521 if (all(FirstCall(1:2))) then 522 523 ! initialize the BLACS process grids 524 if (Use2D) then 525 call blacs_get(-1,0,ictxt_1D) 526 call blacs_gridinit(ictxt_1D,'C',1,Nodes) 527 call blacs_get(ictxt_1D,10,ictxt) 528 call blacs_gridinit(ictxt,'C',processorY,Nodes/processorY) 529 call blacs_get(ictxt,10,ictxt_1D_T) 530 call blacs_gridinit(ictxt_1D_T,'C',Nodes,1) 531 else 532 call blacs_get(-1,0,ictxt) 533 call blacs_gridinit(ictxt,'C',1,Nodes) 534 call blacs_get(ictxt,10,ictxt_1D_T) 535 call blacs_gridinit(ictxt_1D_T,'C',Nodes,1) 536 end if 537 538 end if 539 540 ! calculate the local dimensions of the AO and WF matrices 541 call blacs_gridinfo(ictxt_1D_T,i,j,k,l) 542 N_occ_loc_1D(ispin)=numroc(N_occ,BlockSize_c,k,0,Nodes) 543 if (Use2D) then 544 call blacs_gridinfo(ictxt,i,j,k,l) 545 h_dim_loc(1)=numroc(h_dim,BlockSize,k,0,processorY) 546 h_dim_loc(2)=numroc(h_dim,BlockSize,l,0,Nodes/processorY) 547 N_occ_loc(1,ispin)=numroc(N_occ,BlockSize_c,k,0,processorY) 548 N_occ_loc(2,ispin)=numroc(N_occ,BlockSize_c,l,0,Nodes/processorY) 549 else 550 h_dim_loc(1)=h_dim 551 h_dim_loc(2)=h_dim_loc_1D 552 N_occ_loc(1,ispin)=N_occ 553 N_occ_loc(2,ispin)=N_occ_loc_1D(ispin) 554 end if 555 556 ! initialize the matrix descriptors 557 if (Use2D) then 558 call descinit(desc1_1D,h_dim,h_dim,BlockSize,BlockSize,0,0,ictxt_1D,h_dim,info) 559 if (info/=0) call die('ERROR: desc1_1D setup has failed in minim!') 560 end if 561 call descinit(desc1,h_dim,h_dim,BlockSize,BlockSize,0,0,ictxt,h_dim_loc(1),info) 562 if (info/=0) call die('ERROR: desc1 setup has failed in minim!') 563 call descinit(desc2(1:9,ispin),N_occ,N_occ,BlockSize_c,BlockSize_c,0,0,ictxt,N_occ_loc(1,ispin),info) 564 if (info/=0) call die('ERROR: desc2 setup has failed in minim!') 565 call descinit(desc3(1:9,ispin),N_occ,h_dim,BlockSize_c,BlockSize,0,0,ictxt,N_occ_loc(1,ispin),info) 566 if (info/=0) call die('ERROR: desc3 setup has failed in minim!') 567 call descinit(desc3_1D_T(1:9,ispin),N_occ,h_dim,BlockSize_c,BlockSize,0,0,ictxt_1D_T,N_occ_loc_1D(ispin),info) 568 if (info/=0) call die('ERROR: desc3_1D_T setup has failed in minim!') 569#else 570 N_occ_loc_1D(ispin)=N_occ 571 h_dim_loc(1)=h_dim 572 h_dim_loc(2)=h_dim 573 N_occ_loc(1,ispin)=N_occ 574 N_occ_loc(2,ispin)=N_occ 575#endif 576 577 ! allocate the WF coeffs. matrix 578 allocate(c(ispin)%mtrx(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 579 if (UseCholesky) allocate(c_orig(ispin)%mtrx(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 580 581 ! if this is the first SCF step, then we need to initialize the WF coeffs. matrix with random 582 ! numbers between -0.5 and 0.5 (normalize at the end to avoid instabilities), unless we are 583 ! reading them from file 584 if (.not. PreviousCallDiagon) then 585 if (ReadCoeffs) then 586#ifdef MPI 587 write(Node_name,'(i5)') Node 588 if (nspin==1) then 589 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS.'//trim(adjustl(Node_name)) 590 else 591 if (ispin==1) then 592 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS_UP.'//trim(adjustl(Node_name)) 593 else if (ispin==2) then 594 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS_DOWN.'//trim(adjustl(Node_name)) 595 end if 596 end if 597#else 598 if (nspin==1) then 599 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS' 600 else 601 if (ispin==1) then 602 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS_UP' 603 else if (ispin==2) then 604 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS_DOWN' 605 end if 606 end if 607#endif 608 inquire(file=trim(WF_COEFFS_filename),exist=ReadCoeffs2) 609 else 610 ReadCoeffs2=.false. 611 end if 612 if (ReadCoeffs2) then 613 call io_assign(i) 614 open(i,file=trim(WF_COEFFS_filename),form='unformatted',status='old',action='read') 615 if (UseCholesky) then 616 read(i) c_orig(ispin)%mtrx 617 else 618 read(i) c(ispin)%mtrx 619 end if 620 call io_close(i) 621 else 622 if ((ispin==1) .or. (N_occ_diff/=0)) then 623 call rand_init 624 do i=1,h_dim_loc(2) 625 do j=1,N_occ_loc(1,ispin) 626 call random_number(rn) 627 call random_number(rn2) 628 c(ispin)%mtrx(j,i)=sign(0.5_dp*rn,rn2-0.5_dp) 629 end do 630 end do 631 c(ispin)%mtrx=1.0d-2*c(ispin)%mtrx/sqrt(real(h_dim,dp)) 632 else 633 c(2)%mtrx=c(1)%mtrx 634 end if 635 end if 636 end if 637 638 if (all(FirstCall(1:2))) then 639 t_precon_scale=fdf_physical('OMM.TPreconScale',10.0_dp,'Ry') 640 cg_tol=fdf_get('OMM.RelTol',1.0d-9) 641 end if 642 643 end if 644 645 ! if the previous SCF iteration was solved by diagonalization, then we take the lowest N_occ 646 ! eigenfunctions as our initial guess, but we must take care to distribute them properly amongst 647 ! the MPI processes for parallel runs 648 if (PreviousCallDiagon) then 649#ifdef MPI 650 allocate(work1(1:N_occ_loc_1D(ispin),1:h_dim)) 651 allocate(work2(1:h_dim,1)) 652 ! i: receiving node 653 ! j: local orbital num. on receiving node 654 ! k: global orbital num. 655 ! l: sending node 656 ! m: local orbital num. on sending node 657 k=0 658 do i=0,Nodes-1 659 n=N_occ_loc_1D(ispin) 660 call mpi_bcast(n,1,mpi_integer,i,mpi_comm_world,info) 661 do j=1,n 662 k=k+1 663 call WhichNodeOrb(k,Nodes,l) 664 call GlobalToLocalOrb(k,l,Nodes,m) 665 if (Node==l) then 666 if (Node==i) then 667 work1(j,1:h_dim)=psi(1:h_dim,m,ispin) 668 else 669 call mpi_send(psi(1,m,ispin),h_dim,mpi_double_precision,i,k,mpi_comm_world,info) 670 end if 671 else if (Node==i) then 672 call mpi_recv(work2(1,1),h_dim,mpi_double_precision,l,k,mpi_comm_world,mpi_status,info) 673 work1(j,1:h_dim)=work2(1:h_dim,1) 674 end if 675 end do 676 end do 677 deallocate(work2) 678 call pdgemr2d(N_occ,h_dim,work1,1,1,desc3_1D_T(1:9,ispin),c(ispin)%mtrx,1,1,desc3(1:9,ispin),ictxt) 679 deallocate(work1) 680#else 681 do i=1,N_occ 682 c(ispin)%mtrx(i,1:h_dim)=psi(1:h_dim,i,ispin) 683 end do 684#endif 685 end if 686 687#ifdef MPI 688 if (Use2D) allocate(d_dense2D(1:h_dim_loc(1),1:h_dim_loc(2))) 689#endif 690 691 if (CalcE) then 692 693 ! calculate the energy-density matrix: e=c^T*[(2*I-S)*(H+eta*S)]*c 694#ifdef MPI 695 call pdgeadd('T',N_occ,N_occ,x_min(ispin),Hd(ispin)%mtrx,1,1,desc2(1:9,ispin),1.0_dp,H(ispin)%mtrx,1,1,desc2(1:9,ispin)) 696#else 697 do k=1,N_occ 698 do l=1,N_occ 699 H(ispin)%mtrx(k,l)=H(ispin)%mtrx(k,l)+x_min(ispin)*Hd(ispin)%mtrx(l,k) 700 end do 701 end do 702#endif 703 H(ispin)%mtrx=H(ispin)%mtrx+x_min(ispin)*Hd(ispin)%mtrx+x_min(ispin)**2*Hdd(ispin)%mtrx 704 allocate(work1(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 705 if (UseCholesky) then 706#ifdef MPI 707 if (Use2D) then 708 call calc_densmat(h_dim,N_occ,ispin,H(ispin)%mtrx+eta*S(ispin)%mtrx,c_orig(ispin)%mtrx,d_dense2D,work1,cd(ispin)%mtrx) 709 else 710 call calc_densmat(h_dim,N_occ,ispin,H(ispin)%mtrx+eta*S(ispin)%mtrx,c_orig(ispin)%mtrx,d_dense1D,work1,cd(ispin)%mtrx) 711 end if 712#else 713 call calc_densmat(h_dim,N_occ,ispin,H(ispin)%mtrx+eta*S(ispin)%mtrx,c_orig(ispin)%mtrx,d_dense1D,work1,cd(ispin)%mtrx) 714#endif 715 else 716#ifdef MPI 717 if (Use2D) then 718 call calc_densmat(h_dim,N_occ,ispin,H(ispin)%mtrx+eta*S(ispin)%mtrx,c(ispin)%mtrx,d_dense2D,work1,cd(ispin)%mtrx) 719 else 720 call calc_densmat(h_dim,N_occ,ispin,H(ispin)%mtrx+eta*S(ispin)%mtrx,c(ispin)%mtrx,d_dense1D,work1,cd(ispin)%mtrx) 721 end if 722#else 723 call calc_densmat(h_dim,N_occ,ispin,H(ispin)%mtrx+eta*S(ispin)%mtrx,c(ispin)%mtrx,d_dense1D,work1,cd(ispin)%mtrx) 724#endif 725 end if 726#ifdef MPI 727 if (Use2D) call pdgemr2d(h_dim,h_dim,d_dense2D,1,1,desc1,d_dense1D,1,1,desc1_1D,ictxt) 728#endif 729 if (nspin==1) d_dense1D=2.0_dp*d_dense1D 730 731 if (WriteCoeffs) then 732 call io_assign(i) 733#ifdef MPI 734 write(Node_name,'(i5)') Node 735 if (nspin==1) then 736 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS.'//trim(adjustl(Node_name)) 737 else 738 if (ispin==1) then 739 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS_UP.'//trim(adjustl(Node_name)) 740 else if (ispin==2) then 741 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS_DOWN.'//trim(adjustl(Node_name)) 742 end if 743 end if 744#else 745 if (nspin==1) then 746 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS' 747 else 748 if (ispin==1) then 749 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS_UP' 750 else if (ispin==2) then 751 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS_DOWN' 752 end if 753 end if 754#endif 755 open(i,file=trim(WF_COEFFS_filename),form='unformatted',status='replace',action='write') 756 if (UseCholesky) then 757 write(i) c_orig(ispin)%mtrx 758 else 759 write(i) c(ispin)%mtrx 760 end if 761 call io_close(i) 762 end if 763 764 deallocate(work1) 765 deallocate(cd(ispin)%mtrx) 766 if (allocated(p_dense)) deallocate(p_dense) 767 if (.not. UseCholesky) then 768 deallocate(sg(ispin)%mtrx) 769 deallocate(sc(ispin)%mtrx) 770 end if 771 if (allocated(s_dense)) deallocate(s_dense) 772 deallocate(S(ispin)%mtrx) 773 deallocate(Hdd(ispin)%mtrx) 774 deallocate(Hd(ispin)%mtrx) 775 deallocate(H(ispin)%mtrx) 776#ifdef MPI 777 if (Use2D) deallocate(d_dense2D) 778#endif 779 780 call timer('m_cg',2) 781 782 return 783 784 end if 785 786 if (.not. allocated(H(ispin)%mtrx)) allocate(H(ispin)%mtrx(1:N_occ_loc(1,ispin),1:N_occ_loc(2,ispin))) 787 if (.not. allocated(Hd(ispin)%mtrx)) allocate(Hd(ispin)%mtrx(1:N_occ_loc(1,ispin),1:N_occ_loc(2,ispin))) 788 if (.not. allocated(Hdd(ispin)%mtrx)) allocate(Hdd(ispin)%mtrx(1:N_occ_loc(1,ispin),1:N_occ_loc(2,ispin))) 789 if (.not. allocated(S(ispin)%mtrx)) then 790 allocate(S(ispin)%mtrx(1:N_occ_loc(1,ispin),1:N_occ_loc(2,ispin))) 791 if (.not. allocated(s_dense)) then 792 allocate(s_dense(1:h_dim_loc(1),1:h_dim_loc(2))) 793#ifdef MPI 794 if (Use2D) then 795 call pdgemr2d(h_dim,h_dim,s_dense1D,1,1,desc1_1D,s_dense,1,1,desc1,ictxt) 796 else 797 s_dense=s_dense1D 798 end if 799#else 800 s_dense=s_dense1D 801#endif 802 if (UseCholesky) then 803#ifdef MPI 804 call pdpotrf('U',h_dim,s_dense,1,1,desc1,info) 805 if (info/=0) call die('ERROR: pdpotrf has failed in minim!') 806#else 807 call dpotrf('U',h_dim,s_dense,h_dim,info) 808 if (info/=0) call die('ERROR: dpotrf has failed in minim!') 809#endif 810 end if 811 end if 812 new_s=.true. 813 else 814 new_s=.false. 815 end if 816 if (.not. UseCholesky) then 817 if (.not. allocated(sc(ispin)%mtrx)) allocate(sc(ispin)%mtrx(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 818 if (.not. allocated(sg(ispin)%mtrx)) allocate(sg(ispin)%mtrx(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 819 end if 820 821 if (UseCholesky) then 822 if ((new_s .and. (.not. FirstCall(ispin))) .or. & 823 PreviousCallDiagon .or. & 824 (ReadCoeffs .and. FirstCall(ispin))) then 825 if (.not. PreviousCallDiagon) c(ispin)%mtrx=c_orig(ispin)%mtrx 826#ifdef MPI 827 call pdtrmm('R','U','T','N',N_occ,h_dim,1.0_dp,s_dense,1,1,desc1,c(ispin)%mtrx,1,1,desc3(1:9,ispin)) 828#else 829 call dtrmm('R','U','T','N',N_occ,h_dim,1.0_dp,s_dense,h_dim,c(ispin)%mtrx,N_occ) 830#endif 831 end if 832 end if 833 834#ifdef MPI 835 if (UseCholesky .or. Use2D) then 836 allocate(h_dense(1:h_dim_loc(1),1:h_dim_loc(2))) 837 if (Use2D) then 838 call pdgemr2d(h_dim,h_dim,h_dense1D,1,1,desc1_1D,h_dense,1,1,desc1,ictxt) 839 else 840 h_dense=h_dense1D 841 end if 842 end if 843#else 844 if (UseCholesky) then 845 allocate(h_dense(1:h_dim_loc(1),1:h_dim_loc(2))) 846 h_dense=h_dense1D 847 end if 848#endif 849 if (UseCholesky) then 850#ifdef MPI 851 call pdsygst(1,'U',h_dim,h_dense,1,1,desc1,s_dense,1,1,desc1,scale_Cholesky,info) 852 if (info/=0) call die('ERROR: pdsygst has failed in minim!') 853 allocate(work1(1:h_dim_loc(1),1:h_dim_loc(2))) 854 allocate(work2(1:h_dim_loc(1),1:h_dim_loc(2))) 855 allocate(work3(1:h_dim_loc(1),1:h_dim_loc(2))) 856 call pdtran(h_dim,h_dim,1.0_dp,h_dense,1,1,desc1,0.0_dp,work1,1,1,desc1) 857 work2=0.0_dp 858 call pdlaset('U',h_dim,h_dim,1.0_dp,0.5_dp,work2,1,1,desc1) 859 work3=0.0_dp 860 call pdlaset('L',h_dim,h_dim,1.0_dp,0.5_dp,work3,1,1,desc1) 861 h_dense=work2*h_dense+work3*work1 862 deallocate(work3) 863 deallocate(work2) 864 deallocate(work1) 865#else 866 call dsygst(1,'U',h_dim,h_dense,h_dim,s_dense,h_dim,info) 867 if (info/=0) call die('ERROR: dsygst has failed in minim!') 868 do i=1,h_dim-1 869 do j=i+1,h_dim 870 h_dense(j,i)=h_dense(i,j) 871 end do 872 end do 873#endif 874 end if 875 876 ! calculate the preconditioning matrix (s+t/tau)^-1 877 if (UpdatePrecon .and. (ispin==1)) then 878 if (.not. allocated(p_dense)) allocate(p_dense(1:h_dim_loc(1),1:h_dim_loc(2))) 879#ifdef MPI 880 if (Use2D) then 881 call pdgemr2d(h_dim,h_dim,t_dense1D,1,1,desc1_1D,p_dense,1,1,desc1,ictxt) 882 else 883 p_dense=t_dense1D 884 end if 885#else 886 p_dense=t_dense1D 887#endif 888 p_dense=s_dense+p_dense/t_precon_scale 889#ifdef MPI 890 allocate(ipiv(1:h_dim_loc(1)+BlockSize)) 891 call pdgetrf(h_dim,h_dim,p_dense,1,1,desc1,ipiv,info) 892 if (info/=0) call die('ERROR: pdgetrf has failed in minim!') 893 allocate(work1(1:1,1:1)) 894 allocate(iwork(1:1)) 895 call pdgetri(h_dim,p_dense,1,1,desc1,ipiv,work1,-1,iwork,-1,info) 896 if (info/=0) call die('ERROR: pdgetri has failed in minim!') 897 liwork=iwork(1) 898 deallocate(iwork) 899 lwork=work1(1,1) 900 deallocate(work1) 901 allocate(work1(1:lwork,1:1)) 902 allocate(iwork(1:liwork)) 903 call pdgetri(h_dim,p_dense,1,1,desc1,ipiv,work1,lwork,iwork,liwork,info) 904 if (info/=0) call die('ERROR: pdgetri has failed in minim!') 905 deallocate(iwork) 906 deallocate(work1) 907 deallocate(ipiv) 908#else 909 allocate(ipiv(1:h_dim)) 910 lwork=h_dim*ilaenv(1,'dsytrf','U',h_dim,-1,-1,-1) 911 allocate(work1(1:lwork,1:1)) 912 call dsytrf('U',h_dim,p_dense,h_dim,ipiv,work1,lwork,info) 913 if (info/=0) call die('ERROR: dsytrf has failed in minim!') 914 deallocate(work1) 915 allocate(work1(1:h_dim,1:1)) 916 call dsytri('U',h_dim,p_dense,h_dim,ipiv,work1,info) 917 if (info/=0) call die('ERROR: dsytri has failed in minim!') 918 deallocate(work1) 919 deallocate(ipiv) 920 do i=1,h_dim-1 921 do j=i+1,h_dim 922 p_dense(j,i)=p_dense(i,j) 923 end do 924 end do 925#endif 926 end if 927 928 allocate(Sd(1:N_occ_loc(1,ispin),1:N_occ_loc(2,ispin))) 929 allocate(Sdd(1:N_occ_loc(1,ispin),1:N_occ_loc(2,ispin))) 930 allocate(g(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 931 allocate(g_p(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 932 if (UsePrecon) then 933 allocate(pg(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 934 allocate(pg_p(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 935 end if 936 allocate(d(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 937 allocate(hc(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 938 allocate(hg(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 939 allocate(work1(1:N_occ_loc(1,ispin),1:N_occ_loc(2,ispin))) 940 941 ! first we calculate the energy and gradient for our initial guess, with the following steps: 942 ! -calculate the hamiltonian in WF basis: H=c^T*h*c 943 if (allocated(h_dense)) then 944 call calc_A(h_dim,N_occ,ispin,h_dense,c(ispin)%mtrx,H(ispin)%mtrx,hc) 945 else 946 call calc_A(h_dim,N_occ,ispin,h_dense1D,c(ispin)%mtrx,H(ispin)%mtrx,hc) 947 end if 948 ! -calculate the overlap matrix in WF basis: S=c^T*s*c 949 if (UseCholesky) then 950#ifdef MPI 951 if (new_s .or. PreviousCallDiagon) call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,c(ispin)%mtrx,1,1,desc3(1:9,ispin),& 952 c(ispin)%mtrx,1,1,desc3(1:9,ispin),0.0_dp,S(ispin)%mtrx,1,1,desc2(1:9,ispin)) 953#else 954 if (new_s .or. PreviousCallDiagon) call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,c(ispin)%mtrx,N_occ,c(ispin)%mtrx,N_occ,0.0_dp,& 955 S(ispin)%mtrx,N_occ) 956#endif 957 else 958 if (new_s .or. PreviousCallDiagon) then 959 call calc_A(h_dim,N_occ,ispin,s_dense,c(ispin)%mtrx,S(ispin)%mtrx,sc(ispin)%mtrx) 960 else 961 sc(ispin)%mtrx=sc(ispin)%mtrx+x_min(ispin)*sg(ispin)%mtrx 962 end if 963 end if 964 ! -calculate the gradient: g=2*(2*h*c-s*c*H-h*c*S) 965 ! (note that we *reuse* h*c and s*c contained in hc and sc from the previous call to calc_A) 966 if (UseCholesky) then 967 call calc_grad(h_dim,N_occ,ispin,H(ispin)%mtrx,S(ispin)%mtrx,g,hc,c(ispin)%mtrx) 968 else 969 call calc_grad(h_dim,N_occ,ispin,H(ispin)%mtrx,S(ispin)%mtrx,g,hc,sc(ispin)%mtrx) 970 end if 971 ! -calculate the preconditioned gradient by premultiplying g by (s+t/tau)^-1 972 if (UsePrecon) then 973#ifdef MPI 974 call pdgemm('N','N',N_occ,h_dim,h_dim,1.0_dp,g,1,1,desc3(1:9,ispin),p_dense,1,1,desc1,0.0_dp,pg,1,1,desc3(1:9,ispin)) 975#else 976 call dgemm('N','N',N_occ,h_dim,h_dim,1.0_dp,g,N_occ,p_dense,h_dim,0.0_dp,pg,N_occ) 977#endif 978 end if 979 ! -calculate the additional matrices: 980 ! Hd=g^T*h*c 981 ! Sd=g^T*s*c 982 ! Hdd=g^T*h*g 983 ! Sdd=g^T*s*g 984 ! (again, h*c has already been calculated, although h*g has not) 985 ! and, finally, the coeffs. of the quartic line search equation in the direction g 986 ! (the energy at c is given by the zeroth-order coeff. c(0)) 987 if (UsePrecon) then 988#ifdef MPI 989 call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,hc,1,1,desc3(1:9,ispin),pg,1,1,desc3(1:9,ispin),0.0_dp,Hd(ispin)%mtrx,1,1,& 990 desc2(1:9,ispin)) 991 call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,sc(ispin)%mtrx,1,1,desc3(1:9,ispin),pg,1,1,desc3(1:9,ispin),0.0_dp,Sd,1,1,& 992 desc2(1:9,ispin)) 993#else 994 call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,hc,N_occ,pg,N_occ,0.0_dp,Hd(ispin)%mtrx,N_occ) 995 call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,sc(ispin)%mtrx,N_occ,pg,N_occ,0.0_dp,Sd,N_occ) 996#endif 997 else 998#ifdef MPI 999 call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,hc,1,1,desc3(1:9,ispin),g,1,1,desc3(1:9,ispin),0.0_dp,Hd(ispin)%mtrx,1,1,& 1000 desc2(1:9,ispin)) 1001#else 1002 call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,hc,N_occ,g,N_occ,0.0_dp,Hd(ispin)%mtrx,N_occ) 1003#endif 1004 if (UseCholesky) then 1005#ifdef MPI 1006 call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,c(ispin)%mtrx,1,1,desc3(1:9,ispin),g,1,1,desc3(1:9,ispin),0.0_dp,Sd,1,1,& 1007 desc2(1:9,ispin)) 1008#else 1009 call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,c(ispin)%mtrx,N_occ,g,N_occ,0.0_dp,Sd,N_occ) 1010#endif 1011 else 1012#ifdef MPI 1013 call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,sc(ispin)%mtrx,1,1,desc3(1:9,ispin),g,1,1,desc3(1:9,ispin),0.0_dp,Sd,1,1,& 1014 desc2(1:9,ispin)) 1015#else 1016 call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,sc(ispin)%mtrx,N_occ,g,N_occ,0.0_dp,Sd,N_occ) 1017#endif 1018 end if 1019 end if 1020 if (UsePrecon) then 1021 if (allocated(h_dense)) then 1022 call calc_A(h_dim,N_occ,ispin,h_dense,pg,Hdd(ispin)%mtrx,hg) 1023 else 1024 call calc_A(h_dim,N_occ,ispin,h_dense1D,pg,Hdd(ispin)%mtrx,hg) 1025 end if 1026 call calc_A(h_dim,N_occ,ispin,s_dense,pg,Sdd,sg(ispin)%mtrx) 1027 else 1028 if (allocated(h_dense)) then 1029 call calc_A(h_dim,N_occ,ispin,h_dense,g,Hdd(ispin)%mtrx,hg) 1030 else 1031 call calc_A(h_dim,N_occ,ispin,h_dense1D,g,Hdd(ispin)%mtrx,hg) 1032 end if 1033 if (UseCholesky) then 1034#ifdef MPI 1035 call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,g,1,1,desc3(1:9,ispin),g,1,1,desc3(1:9,ispin),0.0_dp,Sdd,1,1,desc2(1:9,ispin)) 1036#else 1037 call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,g,N_occ,g,N_occ,0.0_dp,Sdd,N_occ) 1038#endif 1039 else 1040 call calc_A(h_dim,N_occ,ispin,s_dense,g,Sdd,sg(ispin)%mtrx) 1041 end if 1042 end if 1043 call calc_coeff(h_dim,N_occ,ispin,H(ispin)%mtrx,S(ispin)%mtrx,Hd(ispin)%mtrx,Sd,Hdd(ispin)%mtrx,Sdd,coeff,work1) 1044 E_OMM=coeff(0) 1045 1046 ! this is the main loop of the CG algorithm. We perform a series of line minimizations, with the 1047 ! gradient g at each new step being modified to obtain the search direction d 1048 if (Node==0) then 1049 if (ispin==1) then 1050 print'(a)', '+---------------------------------------------+' 1051 if (UseCholesky) then 1052 print'(a)', '| OMM (Cholesky factorization) |' 1053 else if (UsePrecon) then 1054 print'(a)', '| OMM (preconditioning) |' 1055 else 1056 print'(a)', '| OMM |' 1057 end if 1058 print'(a)', '+---------------------------------------------+' 1059 end if 1060 if (nspin==2) then 1061 if (ispin==1) then 1062 print'(a)', '| up spin |' 1063 else 1064 print'(a)', '| down spin |' 1065 end if 1066 print'(a)', '+---------------------------------------------+' 1067 end if 1068 if (LongOut) print'(a)', '| E_OMM E_diff |' 1069 end if 1070 conv=.false. 1071 d=0.0_dp 1072 icg=0 1073 do i=1,n_step_max 1074 lambda=0.0_dp 1075 do j=1,h_dim*N_occ-1 1076 if (UsePrecon) then 1077 d=pg+lambda*d 1078 else 1079 d=g+lambda*d 1080 end if 1081 g_p=g 1082 if (UsePrecon) pg_p=pg 1083 E_OMM_old=E_OMM 1084 ! if this is not the first CG step, we have to recalculate Hd, Sd, Hdd, Sdd, and the coeffs. 1085 if (icg>0) then 1086#ifdef MPI 1087 call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,hc,1,1,desc3(1:9,ispin),d,1,1,desc3(1:9,ispin),0.0_dp,Hd(ispin)%mtrx,1,1,& 1088 desc2(1:9,ispin)) 1089#else 1090 call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,hc,N_occ,d,N_occ,0.0_dp,Hd(ispin)%mtrx,N_occ) 1091#endif 1092 if (UseCholesky) then 1093#ifdef MPI 1094 call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,c(ispin)%mtrx,1,1,desc3(1:9,ispin),d,1,1,desc3(1:9,ispin),0.0_dp,Sd,1,1,& 1095 desc2(1:9,ispin)) 1096#else 1097 call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,c(ispin)%mtrx,N_occ,d,N_occ,0.0_dp,Sd,N_occ) 1098#endif 1099 else 1100#ifdef MPI 1101 call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,sc(ispin)%mtrx,1,1,desc3(1:9,ispin),d,1,1,desc3(1:9,ispin),0.0_dp,Sd,1,1,& 1102 desc2(1:9,ispin)) 1103#else 1104 call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,sc(ispin)%mtrx,N_occ,d,N_occ,0.0_dp,Sd,N_occ) 1105#endif 1106 end if 1107 if (allocated(h_dense)) then 1108 call calc_A(h_dim,N_occ,ispin,h_dense,d,Hdd(ispin)%mtrx,hg) 1109 else 1110 call calc_A(h_dim,N_occ,ispin,h_dense1D,d,Hdd(ispin)%mtrx,hg) 1111 end if 1112 if (UseCholesky) then 1113#ifdef MPI 1114 call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,d,1,1,desc3(1:9,ispin),d,1,1,desc3(1:9,ispin),0.0_dp,Sdd,1,1,& 1115 desc2(1:9,ispin)) 1116#else 1117 call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,d,N_occ,d,N_occ,0.0_dp,Sdd,N_occ) 1118#endif 1119 else 1120 call calc_A(h_dim,N_occ,ispin,s_dense,d,Sdd,sg(ispin)%mtrx) 1121 end if 1122 call calc_coeff(h_dim,N_occ,ispin,H(ispin)%mtrx,S(ispin)%mtrx,Hd(ispin)%mtrx,Sd,Hdd(ispin)%mtrx,Sdd,coeff,work1) 1123 end if 1124 ! using the coeffs. calculated anlytically, we can find the minimum of the functional in the 1125 ! search direction, and calculate the energy at that minimum 1126 call solve_quartic(coeff(0:4),x_min(ispin),ls_fail) 1127 ! in certain regions of the coeffs. space the line search gives no minimum--this occurs when there 1128 ! are positive eigenvalues in the eigenspecturm which are significantly occupied by our coeffs. 1129 ! matrix; the only known cure, unfortunately, is to scale down the entire matrix, thus returning to 1130 ! a safe region of the coeffs. space. 1131 if (ls_fail) then 1132 if (Node==0) print'(a)', '| WARNING: Rescaling coefficients! |' 1133 E_OMM=3.0*E_OMM 1134 c(ispin)%mtrx=0.5_dp*c(ispin)%mtrx 1135 ls_conv=.false. 1136 else 1137 ! if the line search is successful, move to the minimum 1138 E_OMM=coeff(4)*x_min(ispin)**4+& 1139 coeff(3)*x_min(ispin)**3+& 1140 coeff(2)*x_min(ispin)**2+& 1141 coeff(1)*x_min(ispin)+& 1142 coeff(0) 1143 c(ispin)%mtrx=c(ispin)%mtrx+x_min(ispin)*d 1144 ls_conv=.true. 1145 end if 1146 ! recalculate S at the minimum (or for the rescaled coeffs.) 1147 if (ls_fail) then 1148 S(ispin)%mtrx=0.25_dp*S(ispin)%mtrx 1149 else 1150#ifdef MPI 1151 call pdgeadd('T',N_occ,N_occ,x_min(ispin),Sd,1,1,desc2(1:9,ispin),1.0_dp,S(ispin)%mtrx,1,1,desc2(1:9,ispin)) 1152#else 1153 do k=1,N_occ 1154 do l=1,N_occ 1155 S(ispin)%mtrx(k,l)=S(ispin)%mtrx(k,l)+x_min(ispin)*Sd(l,k) 1156 end do 1157 end do 1158#endif 1159 S(ispin)%mtrx=S(ispin)%mtrx+x_min(ispin)*Sd+x_min(ispin)**2*Sdd 1160 end if 1161 E_diff=2.0_dp*abs((E_OMM-E_OMM_old)/(E_OMM+E_OMM_old)) 1162 if ((Node==0) .and. LongOut) print'(a,2(1x,i5),2(1x,es15.7e3),1x,a)', '|', i, j, E_OMM, E_diff, '|' 1163 icg=icg+1 1164 if (E_diff<=cg_tol) then 1165 conv=.true. 1166 exit 1167 end if 1168 ! recalculate H at the minimum (or for the rescaled coeffs.) 1169 if (ls_fail) then 1170 H(ispin)%mtrx=0.25_dp*H(ispin)%mtrx 1171 else 1172#ifdef MPI 1173 call pdgeadd('T',N_occ,N_occ,x_min(ispin),Hd(ispin)%mtrx,1,1,desc2(1:9,ispin),1.0_dp,H(ispin)%mtrx,1,1,desc2(1:9,ispin)) 1174#else 1175 do k=1,N_occ 1176 do l=1,N_occ 1177 H(ispin)%mtrx(k,l)=H(ispin)%mtrx(k,l)+x_min(ispin)*Hd(ispin)%mtrx(l,k) 1178 end do 1179 end do 1180#endif 1181 H(ispin)%mtrx=H(ispin)%mtrx+x_min(ispin)*Hd(ispin)%mtrx+x_min(ispin)**2*Hdd(ispin)%mtrx 1182 end if 1183 ! recalculate g at the minimum (or for the rescaled coeffs.) 1184 if (ls_fail) then 1185 hc=0.5_dp*hc 1186 if (.not. UseCholesky) sc(ispin)%mtrx=0.5_dp*sc(ispin)%mtrx 1187 g=g_p+1.5_dp*hc 1188 else 1189 hc=hc+x_min(ispin)*hg 1190 if (UseCholesky) then 1191 call calc_grad(h_dim,N_occ,ispin,H(ispin)%mtrx,S(ispin)%mtrx,g,hc,c(ispin)%mtrx) 1192 else 1193 sc(ispin)%mtrx=sc(ispin)%mtrx+x_min(ispin)*sg(ispin)%mtrx 1194 call calc_grad(h_dim,N_occ,ispin,H(ispin)%mtrx,S(ispin)%mtrx,g,hc,sc(ispin)%mtrx) 1195 end if 1196 end if 1197 if (UsePrecon) then 1198#ifdef MPI 1199 call pdgemm('N','N',N_occ,h_dim,h_dim,1.0_dp,g,1,1,desc3(1:9,ispin),p_dense,1,1,desc1,0.0_dp,pg,1,1,desc3(1:9,ispin)) 1200#else 1201 call dgemm('N','N',N_occ,h_dim,h_dim,1.0_dp,g,N_occ,p_dense,h_dim,0.0_dp,pg,N_occ) 1202#endif 1203 end if 1204 if (ls_conv) then 1205 if (UsePrecon) then 1206 lambda_n=ddot(N_occ_loc(1,ispin)*h_dim_loc(2),pg,1,g-g_p,1) 1207 lambda_d=ddot(N_occ_loc(1,ispin)*h_dim_loc(2),pg_p,1,g_p,1) 1208 else 1209 lambda_n=ddot(N_occ_loc(1,ispin)*h_dim_loc(2),g,1,g-g_p,1) 1210 lambda_d=ddot(N_occ_loc(1,ispin)*h_dim_loc(2),g_p,1,g_p,1) 1211 end if 1212#ifdef MPI 1213 call mpi_allreduce(lambda_n,lambda_n_tot,1,mpi_double_precision,mpi_sum,mpi_comm_world,info) 1214 call mpi_allreduce(lambda_d,lambda_d_tot,1,mpi_double_precision,mpi_sum,mpi_comm_world,info) 1215 lambda=lambda_n_tot/lambda_d_tot 1216#else 1217 lambda=lambda_n/lambda_d 1218#endif 1219 else 1220 exit 1221 end if 1222 end do 1223 if (conv) exit 1224 end do 1225 if (i>n_step_max) then 1226 if (Node==0) print'(a)', '| WARNING: OMM failed to converge! |' 1227 end if 1228 if ((Node==0) .and. LongOut) print'(a)', '+---------------------------------------------+' 1229 1230 deallocate(work1) 1231 deallocate(hg) 1232 deallocate(hc) 1233 deallocate(d) 1234 if (UsePrecon) then 1235 deallocate(pg_p) 1236 deallocate(pg) 1237 end if 1238 deallocate(g_p) 1239 deallocate(g) 1240 deallocate(Sdd) 1241 deallocate(Sd) 1242 if (allocated(h_dense)) deallocate(h_dense) 1243 1244 ! calculate the density matrix: d=c*(2*I-S)*c^T 1245 if (.not. allocated(twoI(ispin)%mtrx)) then 1246 allocate(twoI(ispin)%mtrx(1:N_occ_loc(1,ispin),1:N_occ_loc(2,ispin))) 1247#ifdef MPI 1248 call pdlaset('A',N_occ,N_occ,0.0_dp,2.0_dp,twoI(ispin)%mtrx,1,1,desc2(1:9,ispin)) 1249#else 1250 twoI(ispin)%mtrx=0.0_dp 1251 do i=1,N_occ 1252 twoI(ispin)%mtrx(i,i)=2.0_dp 1253 end do 1254#endif 1255 end if 1256 if (.not. allocated(cd(ispin)%mtrx)) allocate(cd(ispin)%mtrx(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 1257 if (UseCholesky) then 1258 c_orig(ispin)%mtrx=c(ispin)%mtrx 1259#ifdef MPI 1260 call pdtrsm('R','U','T','N',N_occ,h_dim,1.0_dp,s_dense,1,1,desc1,c_orig(ispin)%mtrx,1,1,desc3(1:9,ispin)) 1261 if (Use2D) then 1262 call calc_densmat(h_dim,N_occ,ispin,twoI(ispin)%mtrx-S(ispin)%mtrx,c_orig(ispin)%mtrx,d_dense2D,cd(ispin)%mtrx) 1263 else 1264 call calc_densmat(h_dim,N_occ,ispin,twoI(ispin)%mtrx-S(ispin)%mtrx,c_orig(ispin)%mtrx,d_dense1D,cd(ispin)%mtrx) 1265 end if 1266#else 1267 call dtrsm('R','U','T','N',N_occ,h_dim,1.0_dp,s_dense,h_dim,c_orig(ispin)%mtrx,N_occ) 1268 call calc_densmat(h_dim,N_occ,ispin,twoI(ispin)%mtrx-S(ispin)%mtrx,c_orig(ispin)%mtrx,d_dense1D,cd(ispin)%mtrx) 1269#endif 1270 else 1271#ifdef MPI 1272 if (Use2D) then 1273 call calc_densmat(h_dim,N_occ,ispin,twoI(ispin)%mtrx-S(ispin)%mtrx,c(ispin)%mtrx,d_dense2D,cd(ispin)%mtrx) 1274 else 1275 call calc_densmat(h_dim,N_occ,ispin,twoI(ispin)%mtrx-S(ispin)%mtrx,c(ispin)%mtrx,d_dense1D,cd(ispin)%mtrx) 1276 end if 1277#else 1278 call calc_densmat(h_dim,N_occ,ispin,twoI(ispin)%mtrx-S(ispin)%mtrx,c(ispin)%mtrx,d_dense1D,cd(ispin)%mtrx) 1279#endif 1280 end if 1281#ifdef MPI 1282 if (Use2D) then 1283 call pdgemr2d(h_dim,h_dim,d_dense2D,1,1,desc1,d_dense1D,1,1,desc1_1D,ictxt) 1284 deallocate(d_dense2D) 1285 end if 1286#endif 1287 if (nspin==1) d_dense1D=2.0_dp*d_dense1D 1288 1289 ! calculate the trace of S to make sure we are occupying the right number of eigenstates in our 1290 ! solution 1291#ifdef MPI 1292 Tr_loc=ddot(N_occ_loc(1,ispin)*N_occ_loc(2,ispin),twoI(ispin)%mtrx-S(ispin)%mtrx,1,S(ispin)%mtrx,1) 1293 call mpi_allreduce(Tr_loc,TrQS,1,mpi_double_precision,mpi_sum,mpi_comm_world,info) 1294#else 1295 TrQS=ddot(N_occ*N_occ,twoI(ispin)%mtrx-S(ispin)%mtrx,1,S(ispin)%mtrx,1) 1296#endif 1297 if (Node==0) then 1298 if (nspin==1) then 1299 print'(a,i5,a)', '| minim: icg = ', icg, ' |' 1300 print'(a,f13.7,a)', '| minim: 2*Tr[(2*I-S)*S] = ', 2.0_dp*TrQS, ' |' 1301 else 1302 print'(a,i5,a)', '| minim: icg = ', icg, ' |' 1303 print'(a,f13.7,a)', '| minim: Tr[(2*I-S)*S] = ', TrQS, ' |' 1304 end if 1305 print'(a)', '+---------------------------------------------+' 1306 end if 1307 1308 if (FirstCall(ispin)) FirstCall(ispin)=.false. 1309 1310 call timer('m_cg',2) 1311 1312end subroutine minim_cg 1313 1314!================================================! 1315! minimize the Kim functional by conjugate ! 1316! gradients (sparse routine) ! 1317!================================================! 1318subroutine minim_cg_sparse(nhmax,numh,listhptr,listh,CalcE,PreviousCallDiagon,iscf,h_dim,N_occ,eta,psi,nspin,ispin,& 1319 UpdatePrecon,UsePrecon,UpdateSparseComm,d_sparse,h_sparse,s_sparse,s_dense1D,t_dense1D) 1320 implicit none 1321 1322 !**** INPUT ***********************************! 1323 1324 logical, intent(in) :: CalcE ! calculate the energy-density matrix from the existing coeffs.? 1325 logical, intent(in) :: PreviousCallDiagon ! Previous SCF iteration solved by diagonalization? 1326 logical, intent(in) :: UpdatePrecon ! update the preconditioner? 1327 logical, intent(in) :: UsePrecon ! use the preconditioner? 1328 logical, intent(in) :: UpdateSparseComm ! update nhmax_max? 1329 1330 integer, intent(in) :: nhmax ! first dimension of listh and sparse matrices 1331 integer, intent(in) :: numh(:) ! num. of nonzero elements of each row of sparse matrices 1332 integer, intent(in) :: listhptr(:) ! pointer to start of row in listh 1333 integer, intent(in) :: listh(:) ! list of nonzero elements of each row of sparse matrices 1334 integer, intent(in) :: iscf ! SCF iteration num. 1335 integer, intent(in) :: h_dim ! num. of AOs (global) 1336 integer, intent(in) :: N_occ ! num. of WFs (global) 1337 integer, intent(in) :: nspin ! num. of spins 1338 integer, intent(in) :: ispin ! up/down spin 1339 1340 real(dp), intent(in) :: eta ! chemical potential for Kim functional 1341 real(dp), intent(in) :: psi(1:h_dim,1:h_dim_loc_1D,1:nspin) ! eigenvectors from diagonalization 1342 real(dp), intent(in), optional :: h_sparse(:) ! hamiltonian matrix in AO basis (sparse) 1343 real(dp), intent(in), optional :: s_sparse(:) ! overlap matrix in AO basis (sparse) 1344 real(dp), intent(in), optional :: s_dense1D(:,:) ! overlap matrix in AO basis (dense 1D) 1345 real(dp), intent(in), optional :: t_dense1D(:,:) ! kinetic energy matrix in AO basis (dense 1D) 1346 1347 !**** INOUT ***********************************! 1348 1349 real(dp), intent(inout) :: d_sparse(:) ! (energy-)density matrix in AO basis (sparse) 1350 1351 !**** LOCAL ***********************************! 1352 1353 character(len=100) :: WF_COEFFS_filename 1354#ifdef MPI 1355 character(len=5) :: Node_name 1356#endif 1357 1358 logical :: new_s 1359 logical :: conv 1360 logical :: ls_conv 1361 logical :: ls_fail 1362 logical :: ReadCoeffs2 1363 1364 integer :: i 1365 integer :: j 1366 integer :: k 1367 integer :: l 1368 integer :: m 1369 integer :: n 1370 integer :: info 1371 integer :: icg ! CG step num. 1372 integer :: n_step_max=100 ! max. num. steps for CG minimization 1373 integer :: lwork 1374 integer, allocatable :: ipiv(:) 1375#ifdef MPI 1376 integer :: liwork 1377 integer :: mpi_status(1:mpi_status_size) ! MPI status 1378 integer, save :: ictxt ! handle for main BLACS context (1D) 1379 integer, save :: nhmax_max 1380 integer, save :: h_dim_loc_max 1381 integer, allocatable :: iwork(:) 1382 integer, external :: numroc 1383#else 1384 integer, external :: ilaenv 1385#endif 1386 1387 real(dp) :: rn 1388 real(dp) :: rn2 1389 real(dp) :: E_diff 1390 real(dp) :: TrQS 1391 real(dp) :: lambda 1392 real(dp) :: lambda_n 1393 real(dp) :: lambda_d 1394 real(dp) :: lambda_n_tot 1395 real(dp) :: lambda_d_tot 1396 real(dp) :: E_OMM ! OMM functional energy 1397 real(dp) :: E_OMM_old ! OMM functional energy at previous step 1398 real(dp) :: coeff(0:4) ! coeffs. of the quartic equation 1399 real(dp), save :: x_min(1:2) ! position of minimum 1400 real(dp), save :: t_precon_scale ! kinetic energy scale for the preconditioning 1401 real(dp), save :: cg_tol ! convergence tolerance of CG minimization 1402 real(dp), allocatable :: work1(:,:) ! work matrix 1403 real(dp), allocatable :: Sd(:,:) ! g^T*s*g 1404 real(dp), allocatable :: Sdd(:,:) ! g^T*s*g 1405 real(dp), allocatable :: g(:,:) ! gradient 1406 real(dp), allocatable :: g_p(:,:) ! gradient at previous step 1407 real(dp), allocatable :: pg(:,:) ! preconditioned gradient 1408 real(dp), allocatable :: pg_p(:,:) ! preconditioned gradient at previous step 1409 real(dp), allocatable :: d(:,:) ! conjugate search direction 1410 real(dp), allocatable :: hc(:,:) ! h*c 1411 real(dp), allocatable :: hg(:,:) ! h*g 1412 real(dp), allocatable, save :: p_dense1D(:,:) ! preconditioning matrix in AO basis (dense 1D) 1413 real(dp), external :: ddot 1414#ifdef MPI 1415 real(dp) :: Tr_loc 1416 real(dp), allocatable :: work2(:,:) ! work matrix 1417 real(dp), allocatable :: work3(:,:) ! work matrix 1418#endif 1419 1420 type(multispin), save :: H(1:2) ! hamiltonian matrix in WF basis 1421 type(multispin), save :: S(1:2) ! overlap matrix in WF basis 1422 type(multispin), save :: Hd(1:2) ! g^T*h*c 1423 type(multispin), save :: Hdd(1:2) ! g^T*h*g 1424 type(multispin), save :: sc(1:2) ! s*c 1425 type(multispin), save :: sg(1:2) ! s*g 1426 type(multispin), save :: twoI(1:2) ! identity matrix x2 1427 type(multispin), save :: cd(1:2) ! work matrix 1428 1429 !**********************************************! 1430 1431 call timer('m_cg',1) 1432 1433 ! if this is the first time the minimization module is called, several things need to be done 1434 ! (detailed below) 1435 if (FirstCall(ispin)) then 1436 1437#ifdef MPI 1438 if (all(FirstCall(1:2))) then 1439 1440 ! calculate the local-to-global index transformations needed for the sparse matrix-matrix 1441 ! multiplications 1442 allocate(h_dim_l2g(1:h_dim_loc_1D)) 1443 j=0 1444 k=0 1445 l=0 1446 do i=1,h_dim 1447 k=k+1 1448 if (j==Node) then 1449 l=l+1 1450 h_dim_l2g(l)=i 1451 end if 1452 if (k==BlockSize) then 1453 k=0 1454 j=j+1 1455 if (j==Nodes) j=0 1456 end if 1457 end do 1458 1459 ! find the largest value of h_dim_loc over all MPI processes needed for the sparse matrix 1460 ! multiplications 1461 h_dim_loc_max=0 1462 do i=0,Nodes-1 1463 if (i==Node) then 1464 k=h_dim_loc_1D 1465 end if 1466 call mpi_bcast(j,1,mpi_integer,i,mpi_comm_world,info) 1467 call mpi_bcast(k,1,mpi_integer,i,mpi_comm_world,info) 1468 if (k>h_dim_loc_max) h_dim_loc_max=k 1469 end do 1470 1471 ! initialize the BLACS process grids 1472 call blacs_get(-1,0,ictxt) 1473 call blacs_gridinit(ictxt,'C',Nodes,1) 1474 1475 end if 1476 1477 ! calculate the local dimensions of the AO and WF matrices 1478 call blacs_gridinfo(ictxt,i,j,k,l) 1479 N_occ_loc_1D(ispin)=numroc(N_occ,BlockSize_c,k,0,Nodes) 1480 h_dim_loc(1)=h_dim_loc_1D 1481 h_dim_loc(2)=h_dim 1482 N_occ_loc(1,ispin)=N_occ_loc_1D(ispin) 1483 N_occ_loc(2,ispin)=N_occ 1484 1485 ! initialize the matrix descriptors 1486 call descinit(desc1,h_dim,h_dim,BlockSize,BlockSize,0,0,ictxt,h_dim_loc(1),info) 1487 if (info/=0) call die('ERROR: desc1 setup has failed in minim!') 1488 call descinit(desc2(1:9,ispin),N_occ,N_occ,BlockSize_c,BlockSize_c,0,0,ictxt,N_occ_loc(1,ispin),info) 1489 if (info/=0) call die('ERROR: desc2 setup has failed in minim!') 1490 call descinit(desc3(1:9,ispin),N_occ,h_dim,BlockSize_c,BlockSize,0,0,ictxt,N_occ_loc(1,ispin),info) 1491 if (info/=0) call die('ERROR: desc3 setup has failed in minim!') 1492#else 1493 N_occ_loc_1D(ispin)=N_occ 1494 h_dim_loc(1)=h_dim 1495 h_dim_loc(2)=h_dim 1496 N_occ_loc(1,ispin)=N_occ 1497 N_occ_loc(2,ispin)=N_occ 1498#endif 1499 1500 ! allocate the WF coeffs. matrix 1501 allocate(c(ispin)%mtrx(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 1502 1503 ! if this is the first SCF step, then we need to initialize the WF coeffs. matrix with random 1504 ! numbers between -0.5 and 0.5 (normalize at the end to avoid instabilities), unless we are 1505 ! reading them from file 1506 if (.not. PreviousCallDiagon) then 1507 if (ReadCoeffs) then 1508#ifdef MPI 1509 write(Node_name,'(i5)') Node 1510 if (nspin==1) then 1511 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS.'//trim(adjustl(Node_name)) 1512 else 1513 if (ispin==1) then 1514 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS_UP.'//trim(adjustl(Node_name)) 1515 else if (ispin==2) then 1516 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS_DOWN.'//trim(adjustl(Node_name)) 1517 end if 1518 end if 1519#else 1520 if (nspin==1) then 1521 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS' 1522 else 1523 if (ispin==1) then 1524 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS_UP' 1525 else if (ispin==2) then 1526 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS_DOWN' 1527 end if 1528 end if 1529#endif 1530 inquire(file=trim(WF_COEFFS_filename),exist=ReadCoeffs2) 1531 else 1532 ReadCoeffs2=.false. 1533 end if 1534 if (ReadCoeffs2) then 1535 call io_assign(i) 1536 open(i,file=trim(WF_COEFFS_filename),form='unformatted',status='old',action='read') 1537 read(i) c(ispin)%mtrx 1538 call io_close(i) 1539 else 1540 if ((ispin==1) .or. (N_occ_diff/=0)) then 1541 call rand_init 1542 do i=1,h_dim_loc(2) 1543 do j=1,N_occ_loc(1,ispin) 1544 call random_number(rn) 1545 call random_number(rn2) 1546 c(ispin)%mtrx(j,i)=sign(0.5_dp*rn,rn2-0.5_dp) 1547 end do 1548 end do 1549 c(ispin)%mtrx=1.0d-2*c(ispin)%mtrx/sqrt(real(h_dim,dp)) 1550 else 1551 c(2)%mtrx=c(1)%mtrx 1552 end if 1553 end if 1554 end if 1555 1556 if (all(FirstCall(1:2))) then 1557 t_precon_scale=fdf_physical('OMM.TPreconScale',10.0_dp,'Ry') 1558 cg_tol=fdf_get('OMM.RelTol',1.0d-9) 1559 end if 1560 1561 end if 1562 1563 ! if the previous SCF iteration was solved by diagonalization, then we take the lowest N_occ 1564 ! eigenfunctions as our initial guess, but we must take care to distribute them properly amongst 1565 ! the MPI processes for parallel runs 1566 if (PreviousCallDiagon) then 1567#ifdef MPI 1568 allocate(work2(1:h_dim,1)) 1569 ! i: receiving node 1570 ! j: local orbital num. on receiving node 1571 ! k: global orbital num. 1572 ! l: sending node 1573 ! m: local orbital num. on sending node 1574 k=0 1575 do i=0,Nodes-1 1576 n=N_occ_loc_1D(ispin) 1577 call mpi_bcast(n,1,mpi_integer,i,mpi_comm_world,info) 1578 do j=1,n 1579 k=k+1 1580 call WhichNodeOrb(k,Nodes,l) 1581 call GlobalToLocalOrb(k,l,Nodes,m) 1582 if (Node==l) then 1583 if (Node==i) then 1584 c(ispin)%mtrx(j,1:h_dim)=psi(1:h_dim,m,ispin) 1585 else 1586 call mpi_send(psi(1,m,ispin),h_dim,mpi_double_precision,i,k,mpi_comm_world,info) 1587 end if 1588 else if (Node==i) then 1589 call mpi_recv(work2(1,1),h_dim,mpi_double_precision,l,k,mpi_comm_world,mpi_status,info) 1590 c(ispin)%mtrx(j,1:h_dim)=work2(1:h_dim,1) 1591 end if 1592 end do 1593 end do 1594 deallocate(work2) 1595#else 1596 do i=1,N_occ 1597 c(ispin)%mtrx(i,1:h_dim)=psi(1:h_dim,i,ispin) 1598 end do 1599#endif 1600 end if 1601 1602#ifdef MPI 1603 ! find the largest value of nhmax over all MPI processes needed for the sparse matrix 1604 ! multiplications 1605 if (UpdateSparseComm) then 1606 nhmax_max=0 1607 do i=0,Nodes-1 1608 if (i==Node) then 1609 j=nhmax 1610 end if 1611 call mpi_bcast(j,1,mpi_integer,i,mpi_comm_world,info) 1612 call mpi_bcast(k,1,mpi_integer,i,mpi_comm_world,info) 1613 if (j>nhmax_max) nhmax_max=j 1614 end do 1615 end if 1616 1617 allocate(numh_recv(1:h_dim_loc_max)) 1618 allocate(listhptr_recv(1:h_dim_loc_max)) 1619 allocate(listh_recv(1:nhmax_max)) 1620 allocate(h_dim_l2g_recv(1:h_dim_loc_max)) 1621 allocate(As_recv(1:nhmax_max)) 1622#endif 1623 1624 if (CalcE) then 1625 1626 ! calculate the energy-density matrix: e=c^T*[(2*I-S)*(H+eta*S)]*c 1627#ifdef MPI 1628 call pdgeadd('T',N_occ,N_occ,x_min(ispin),Hd(ispin)%mtrx,1,1,desc2(1:9,ispin),1.0_dp,H(ispin)%mtrx,1,1,desc2(1:9,ispin)) 1629#else 1630 do k=1,N_occ 1631 do l=1,N_occ 1632 H(ispin)%mtrx(k,l)=H(ispin)%mtrx(k,l)+x_min(ispin)*Hd(ispin)%mtrx(l,k) 1633 end do 1634 end do 1635#endif 1636 H(ispin)%mtrx=H(ispin)%mtrx+x_min(ispin)*Hd(ispin)%mtrx+x_min(ispin)**2*Hdd(ispin)%mtrx 1637 allocate(work1(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 1638 call calc_densmat_sparse(h_dim,N_occ,ispin,nhmax,numh,listhptr,listh,H(ispin)%mtrx+eta*S(ispin)%mtrx,c(ispin)%mtrx,& 1639 d_sparse,work1,cd(ispin)%mtrx) 1640 if (nspin==1) d_sparse=2.0_dp*d_sparse 1641 1642 if (WriteCoeffs) then 1643 call io_assign(i) 1644#ifdef MPI 1645 write(Node_name,'(i5)') Node 1646 if (nspin==1) then 1647 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS.'//trim(adjustl(Node_name)) 1648 else 1649 if (ispin==1) then 1650 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS_UP.'//trim(adjustl(Node_name)) 1651 else if (ispin==2) then 1652 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS_DOWN.'//trim(adjustl(Node_name)) 1653 end if 1654 end if 1655#else 1656 if (nspin==1) then 1657 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS' 1658 else 1659 if (ispin==1) then 1660 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS_UP' 1661 else if (ispin==2) then 1662 WF_COEFFS_filename=trim(slabel)//'.WF_COEFFS_DOWN' 1663 end if 1664 end if 1665#endif 1666 open(i,file=trim(WF_COEFFS_filename),form='unformatted',status='replace',action='write') 1667 write(i) c(ispin)%mtrx 1668 call io_close(i) 1669 end if 1670 1671 deallocate(work1) 1672 deallocate(cd(ispin)%mtrx) 1673 if (allocated(p_dense1D)) deallocate(p_dense1D) 1674 deallocate(sg(ispin)%mtrx) 1675 deallocate(sc(ispin)%mtrx) 1676 deallocate(S(ispin)%mtrx) 1677 deallocate(Hdd(ispin)%mtrx) 1678 deallocate(Hd(ispin)%mtrx) 1679 deallocate(H(ispin)%mtrx) 1680 1681#ifdef MPI 1682 deallocate(As_recv) 1683 deallocate(h_dim_l2g_recv) 1684 deallocate(listh_recv) 1685 deallocate(listhptr_recv) 1686 deallocate(numh_recv) 1687#endif 1688 1689 call timer('m_cg',2) 1690 1691 return 1692 1693 end if 1694 1695 if (.not. allocated(H(ispin)%mtrx)) allocate(H(ispin)%mtrx(1:N_occ_loc(1,ispin),1:N_occ_loc(2,ispin))) 1696 if (.not. allocated(Hd(ispin)%mtrx)) allocate(Hd(ispin)%mtrx(1:N_occ_loc(1,ispin),1:N_occ_loc(2,ispin))) 1697 if (.not. allocated(Hdd(ispin)%mtrx)) allocate(Hdd(ispin)%mtrx(1:N_occ_loc(1,ispin),1:N_occ_loc(2,ispin))) 1698 if (.not. allocated(S(ispin)%mtrx)) then 1699 allocate(S(ispin)%mtrx(1:N_occ_loc(1,ispin),1:N_occ_loc(2,ispin))) 1700 new_s=.true. 1701 else 1702 new_s=.false. 1703 end if 1704 if (.not. allocated(sc(ispin)%mtrx)) allocate(sc(ispin)%mtrx(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 1705 if (.not. allocated(sg(ispin)%mtrx)) allocate(sg(ispin)%mtrx(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 1706 1707 ! calculate the preconditioning matrix (s+t/tau)^-1 1708 if (UpdatePrecon .and. (ispin==1)) then 1709 if (.not. allocated(p_dense1D)) allocate(p_dense1D(1:h_dim_loc(1),1:h_dim_loc(2))) 1710 p_dense1D=s_dense1D+t_dense1D/t_precon_scale 1711#ifdef MPI 1712 allocate(ipiv(1:h_dim_loc(1)+BlockSize)) 1713 call pdgetrf(h_dim,h_dim,p_dense1D,1,1,desc1,ipiv,info) 1714 if (info/=0) then 1715print*, info 1716call die('ERROR: pdgetrf has failed in minim!') 1717end if 1718 allocate(work1(1:1,1:1)) 1719 allocate(iwork(1:1)) 1720 call pdgetri(h_dim,p_dense1D,1,1,desc1,ipiv,work1,-1,iwork,-1,info) 1721 if (info/=0) call die('ERROR: pdgetri has failed in minim!') 1722 liwork=iwork(1) 1723 deallocate(iwork) 1724 lwork=work1(1,1) 1725 deallocate(work1) 1726 allocate(work1(1:lwork,1:1)) 1727 allocate(iwork(1:liwork)) 1728 call pdgetri(h_dim,p_dense1D,1,1,desc1,ipiv,work1,lwork,iwork,liwork,info) 1729 if (info/=0) call die('ERROR: pdgetri has failed in minim!') 1730 deallocate(iwork) 1731 deallocate(work1) 1732 deallocate(ipiv) 1733#else 1734 allocate(ipiv(1:h_dim)) 1735 lwork=h_dim*ilaenv(1,'dsytrf','U',h_dim,-1,-1,-1) 1736 allocate(work1(1:lwork,1:1)) 1737 call dsytrf('U',h_dim,p_dense1D,h_dim,ipiv,work1,lwork,info) 1738 if (info/=0) call die('ERROR: dsytrf has failed in minim!') 1739 deallocate(work1) 1740 allocate(work1(1:h_dim,1:1)) 1741 call dsytri('U',h_dim,p_dense1D,h_dim,ipiv,work1,info) 1742 if (info/=0) call die('ERROR: dsytri has failed in minim!') 1743 deallocate(work1) 1744 deallocate(ipiv) 1745 do i=1,h_dim-1 1746 do j=i+1,h_dim 1747 p_dense1D(j,i)=p_dense1D(i,j) 1748 end do 1749 end do 1750#endif 1751 end if 1752 1753 allocate(Sd(1:N_occ_loc(1,ispin),1:N_occ_loc(2,ispin))) 1754 allocate(Sdd(1:N_occ_loc(1,ispin),1:N_occ_loc(2,ispin))) 1755 allocate(g(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 1756 allocate(g_p(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 1757 if (UsePrecon) then 1758 allocate(pg(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 1759 allocate(pg_p(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 1760 end if 1761 allocate(d(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 1762 allocate(hc(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 1763 allocate(hg(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 1764 allocate(work1(1:N_occ_loc(1,ispin),1:N_occ_loc(2,ispin))) 1765 1766 ! first we calculate the energy and gradient for our initial guess, with the following steps: 1767 ! -calculate the hamiltonian in WF basis: H=c^T*h*c 1768 call calc_A_sparse(h_dim,N_occ,ispin,nhmax,numh,listhptr,listh,h_sparse,c(ispin)%mtrx,H(ispin)%mtrx,hc) 1769 ! -calculate the overlap matrix in WF basis: S=c^T*s*c 1770 if (new_s .or. PreviousCallDiagon) then 1771 call calc_A_sparse(h_dim,N_occ,ispin,nhmax,numh,listhptr,listh,s_sparse,c(ispin)%mtrx,S(ispin)%mtrx,sc(ispin)%mtrx) 1772 else 1773 sc(ispin)%mtrx=sc(ispin)%mtrx+x_min(ispin)*sg(ispin)%mtrx 1774 end if 1775 ! -calculate the gradient: g=2*(2*h*c-s*c*H-h*c*S) 1776 ! (note that we *reuse* h*c and s*c contained in hc and sc from the previous call to calc_A) 1777 call calc_grad(h_dim,N_occ,ispin,H(ispin)%mtrx,S(ispin)%mtrx,g,hc,sc(ispin)%mtrx) 1778 ! -calculate the preconditioned gradient by premultiplying g by (s+t/tau)^-1 1779 if (UsePrecon) then 1780#ifdef MPI 1781 call pdgemm('N','N',N_occ,h_dim,h_dim,1.0_dp,g,1,1,desc3(1:9,ispin),p_dense1D,1,1,desc1,0.0_dp,pg,1,1,desc3(1:9,ispin)) 1782#else 1783 call dgemm('N','N',N_occ,h_dim,h_dim,1.0_dp,g,N_occ,p_dense1D,h_dim,0.0_dp,pg,N_occ) 1784#endif 1785 end if 1786 ! -calculate the additional matrices: 1787 ! Hd=g^T*h*c 1788 ! Sd=g^T*s*c 1789 ! Hdd=g^T*h*g 1790 ! Sdd=g^T*s*g 1791 ! (again, h*c has already been calculated, although h*g has not) 1792 ! and, finally, the coeffs. of the quartic line search equation in the direction g 1793 ! (the energy at c is given by the zeroth-order coeff. c(0)) 1794 if (UsePrecon) then 1795#ifdef MPI 1796 call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,hc,1,1,desc3(1:9,ispin),pg,1,1,desc3(1:9,ispin),0.0_dp,Hd(ispin)%mtrx,1,1,& 1797 desc2(1:9,ispin)) 1798 call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,sc(ispin)%mtrx,1,1,desc3(1:9,ispin),pg,1,1,desc3(1:9,ispin),0.0_dp,Sd,1,1,& 1799 desc2(1:9,ispin)) 1800#else 1801 call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,hc,N_occ,pg,N_occ,0.0_dp,Hd(ispin)%mtrx,N_occ) 1802 call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,sc(ispin)%mtrx,N_occ,pg,N_occ,0.0_dp,Sd,N_occ) 1803#endif 1804 else 1805#ifdef MPI 1806 call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,hc,1,1,desc3(1:9,ispin),g,1,1,desc3(1:9,ispin),0.0_dp,Hd(ispin)%mtrx,1,1,& 1807 desc2(1:9,ispin)) 1808#else 1809 call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,hc,N_occ,g,N_occ,0.0_dp,Hd(ispin)%mtrx,N_occ) 1810#endif 1811#ifdef MPI 1812 call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,sc(ispin)%mtrx,1,1,desc3(1:9,ispin),g,1,1,desc3(1:9,ispin),0.0_dp,Sd,1,1,& 1813 desc2(1:9,ispin)) 1814#else 1815 call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,sc(ispin)%mtrx,N_occ,g,N_occ,0.0_dp,Sd,N_occ) 1816#endif 1817 end if 1818 if (UsePrecon) then 1819 call calc_A_sparse(h_dim,N_occ,ispin,nhmax,numh,listhptr,listh,h_sparse,pg,Hdd(ispin)%mtrx,hg) 1820 call calc_A_sparse(h_dim,N_occ,ispin,nhmax,numh,listhptr,listh,s_sparse,pg,Sdd,sg(ispin)%mtrx) 1821 else 1822 call calc_A_sparse(h_dim,N_occ,ispin,nhmax,numh,listhptr,listh,h_sparse,g,Hdd(ispin)%mtrx,hg) 1823 call calc_A_sparse(h_dim,N_occ,ispin,nhmax,numh,listhptr,listh,s_sparse,g,Sdd,sg(ispin)%mtrx) 1824 end if 1825 call calc_coeff(h_dim,N_occ,ispin,H(ispin)%mtrx,S(ispin)%mtrx,Hd(ispin)%mtrx,Sd,Hdd(ispin)%mtrx,Sdd,coeff,work1) 1826 E_OMM=coeff(0) 1827 1828 ! this is the main loop of the CG algorithm. We perform a series of line minimizations, with the 1829 ! gradient g at each new step being modified to obtain the search direction d 1830 if (Node==0) then 1831 if (ispin==1) then 1832 print'(a)', '+---------------------------------------------+' 1833 if (UsePrecon) then 1834 print'(a)', '| OMM (sparse algebra+preconditioning) |' 1835 else 1836 print'(a)', '| OMM (sparse algebra) |' 1837 end if 1838 print'(a)', '+---------------------------------------------+' 1839 end if 1840 if (nspin==2) then 1841 if (ispin==1) then 1842 print'(a)', '| up spin |' 1843 else 1844 print'(a)', '| down spin |' 1845 end if 1846 print'(a)', '+---------------------------------------------+' 1847 end if 1848 if (LongOut) print'(a)', '| E_OMM E_diff |' 1849 end if 1850 conv=.false. 1851 d=0.0_dp 1852 icg=0 1853 do i=1,n_step_max 1854 lambda=0.0_dp 1855 do j=1,h_dim*N_occ-1 1856 if (UsePrecon) then 1857 d=pg+lambda*d 1858 else 1859 d=g+lambda*d 1860 end if 1861 g_p=g 1862 if (UsePrecon) pg_p=pg 1863 E_OMM_old=E_OMM 1864 ! if this is not the first CG step, we have to recalculate Hd, Sd, Hdd, Sdd, and the coeffs. 1865 if (icg>0) then 1866#ifdef MPI 1867 call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,hc,1,1,desc3(1:9,ispin),d,1,1,desc3(1:9,ispin),0.0_dp,Hd(ispin)%mtrx,1,1,& 1868 desc2(1:9,ispin)) 1869 call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,sc(ispin)%mtrx,1,1,desc3(1:9,ispin),d,1,1,desc3(1:9,ispin),0.0_dp,Sd,1,1,& 1870 desc2(1:9,ispin)) 1871#else 1872 call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,hc,N_occ,d,N_occ,0.0_dp,Hd(ispin)%mtrx,N_occ) 1873 call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,sc(ispin)%mtrx,N_occ,d,N_occ,0.0_dp,Sd,N_occ) 1874#endif 1875 call calc_A_sparse(h_dim,N_occ,ispin,nhmax,numh,listhptr,listh,h_sparse,d,Hdd(ispin)%mtrx,hg) 1876 call calc_A_sparse(h_dim,N_occ,ispin,nhmax,numh,listhptr,listh,s_sparse,d,Sdd,sg(ispin)%mtrx) 1877 call calc_coeff(h_dim,N_occ,ispin,H(ispin)%mtrx,S(ispin)%mtrx,Hd(ispin)%mtrx,Sd,Hdd(ispin)%mtrx,Sdd,coeff,work1) 1878 end if 1879 ! using the coeffs. calculated anlytically, we can find the minimum of the functional in the 1880 ! search direction, and calculate the energy at that minimum 1881 call solve_quartic(coeff(0:4),x_min(ispin),ls_fail) 1882 ! in certain regions of the coeffs. space the line search gives no minimum--this occurs when there 1883 ! are positive eigenvalues in the eigenspecturm which are significantly occupied by our coeffs. 1884 ! matrix; the only known cure, unfortunately, is to scale down the entire matrix, thus returning to 1885 ! a safe region of the coeffs. space. 1886 if (ls_fail) then 1887 if (Node==0) print'(a)', '| WARNING: Rescaling coefficients! |' 1888 E_OMM=3.0*E_OMM 1889 c(ispin)%mtrx=0.5_dp*c(ispin)%mtrx 1890 ls_conv=.false. 1891 else 1892 ! if the line search is successful, move to the minimum 1893 E_OMM=coeff(4)*x_min(ispin)**4+& 1894 coeff(3)*x_min(ispin)**3+& 1895 coeff(2)*x_min(ispin)**2+& 1896 coeff(1)*x_min(ispin)+& 1897 coeff(0) 1898 c(ispin)%mtrx=c(ispin)%mtrx+x_min(ispin)*d 1899 ls_conv=.true. 1900 end if 1901 ! recalculate S at the minimum (or for the rescaled coeffs.) 1902 if (ls_fail) then 1903 S(ispin)%mtrx=0.25_dp*S(ispin)%mtrx 1904 else 1905#ifdef MPI 1906 call pdgeadd('T',N_occ,N_occ,x_min(ispin),Sd,1,1,desc2(1:9,ispin),1.0_dp,S(ispin)%mtrx,1,1,desc2(1:9,ispin)) 1907#else 1908 do k=1,N_occ 1909 do l=1,N_occ 1910 S(ispin)%mtrx(k,l)=S(ispin)%mtrx(k,l)+x_min(ispin)*Sd(l,k) 1911 end do 1912 end do 1913#endif 1914 S(ispin)%mtrx=S(ispin)%mtrx+x_min(ispin)*Sd+x_min(ispin)**2*Sdd 1915 end if 1916 E_diff=2.0_dp*abs((E_OMM-E_OMM_old)/(E_OMM+E_OMM_old)) 1917 if ((Node==0) .and. LongOut) print'(a,2(1x,i5),2(1x,es15.7e3),1x,a)', '|', i, j, E_OMM, E_diff, '|' 1918 icg=icg+1 1919 if (E_diff<=cg_tol) then 1920 conv=.true. 1921 exit 1922 end if 1923 ! recalculate H at the minimum (or for the rescaled coeffs.) 1924 if (ls_fail) then 1925 H(ispin)%mtrx=0.25_dp*H(ispin)%mtrx 1926 else 1927#ifdef MPI 1928 call pdgeadd('T',N_occ,N_occ,x_min(ispin),Hd(ispin)%mtrx,1,1,desc2(1:9,ispin),1.0_dp,H(ispin)%mtrx,1,1,desc2(1:9,ispin)) 1929#else 1930 do k=1,N_occ 1931 do l=1,N_occ 1932 H(ispin)%mtrx(k,l)=H(ispin)%mtrx(k,l)+x_min(ispin)*Hd(ispin)%mtrx(l,k) 1933 end do 1934 end do 1935#endif 1936 H(ispin)%mtrx=H(ispin)%mtrx+x_min(ispin)*Hd(ispin)%mtrx+x_min(ispin)**2*Hdd(ispin)%mtrx 1937 end if 1938 ! recalculate g at the minimum (or for the rescaled coeffs.) 1939 if (ls_fail) then 1940 hc=0.5_dp*hc 1941 sc(ispin)%mtrx=0.5_dp*sc(ispin)%mtrx 1942 g=g_p+1.5_dp*hc 1943 else 1944 hc=hc+x_min(ispin)*hg 1945 sc(ispin)%mtrx=sc(ispin)%mtrx+x_min(ispin)*sg(ispin)%mtrx 1946 call calc_grad(h_dim,N_occ,ispin,H(ispin)%mtrx,S(ispin)%mtrx,g,hc,sc(ispin)%mtrx) 1947 end if 1948 if (UsePrecon) then 1949#ifdef MPI 1950 call pdgemm('N','N',N_occ,h_dim,h_dim,1.0_dp,g,1,1,desc3(1:9,ispin),p_dense1D,1,1,desc1,0.0_dp,pg,1,1,desc3(1:9,ispin)) 1951#else 1952 call dgemm('N','N',N_occ,h_dim,h_dim,1.0_dp,g,N_occ,p_dense1D,h_dim,0.0_dp,pg,N_occ) 1953#endif 1954 end if 1955 if (ls_conv) then 1956 if (UsePrecon) then 1957 lambda_n=ddot(N_occ_loc(1,ispin)*h_dim_loc(2),pg,1,g-g_p,1) 1958 lambda_d=ddot(N_occ_loc(1,ispin)*h_dim_loc(2),pg_p,1,g_p,1) 1959 else 1960 lambda_n=ddot(N_occ_loc(1,ispin)*h_dim_loc(2),g,1,g-g_p,1) 1961 lambda_d=ddot(N_occ_loc(1,ispin)*h_dim_loc(2),g_p,1,g_p,1) 1962 end if 1963#ifdef MPI 1964 call mpi_allreduce(lambda_n,lambda_n_tot,1,mpi_double_precision,mpi_sum,mpi_comm_world,info) 1965 call mpi_allreduce(lambda_d,lambda_d_tot,1,mpi_double_precision,mpi_sum,mpi_comm_world,info) 1966 lambda=lambda_n_tot/lambda_d_tot 1967#else 1968 lambda=lambda_n/lambda_d 1969#endif 1970 else 1971 exit 1972 end if 1973 end do 1974 if (conv) exit 1975 end do 1976 if (i>n_step_max) then 1977 if (Node==0) print'(a)', '| WARNING: OMM failed to converge! |' 1978 end if 1979 if ((Node==0) .and. LongOut) print'(a)', '+---------------------------------------------+' 1980 1981 deallocate(work1) 1982 deallocate(hg) 1983 deallocate(hc) 1984 deallocate(d) 1985 if (UsePrecon) then 1986 deallocate(pg_p) 1987 deallocate(pg) 1988 end if 1989 deallocate(g_p) 1990 deallocate(g) 1991 deallocate(Sdd) 1992 deallocate(Sd) 1993 1994 ! calculate the density matrix: d=c*(2*I-S)*c^T 1995 if (.not. allocated(twoI(ispin)%mtrx)) then 1996 allocate(twoI(ispin)%mtrx(1:N_occ_loc(1,ispin),1:N_occ_loc(2,ispin))) 1997#ifdef MPI 1998 call pdlaset('A',N_occ,N_occ,0.0_dp,2.0_dp,twoI(ispin)%mtrx,1,1,desc2(1:9,ispin)) 1999#else 2000 twoI(ispin)%mtrx=0.0_dp 2001 do i=1,N_occ 2002 twoI(ispin)%mtrx(i,i)=2.0_dp 2003 end do 2004#endif 2005 end if 2006 if (.not. allocated(cd(ispin)%mtrx)) allocate(cd(ispin)%mtrx(1:N_occ_loc(1,ispin),1:h_dim_loc(2))) 2007 call calc_densmat_sparse(h_dim,N_occ,ispin,nhmax,numh,listhptr,listh,twoI(ispin)%mtrx-S(ispin)%mtrx,c(ispin)%mtrx,& 2008 d_sparse,cd(ispin)%mtrx) 2009 if (nspin==1) d_sparse=2.0_dp*d_sparse 2010 2011#ifdef MPI 2012 deallocate(As_recv) 2013 deallocate(h_dim_l2g_recv) 2014 deallocate(listh_recv) 2015 deallocate(listhptr_recv) 2016 deallocate(numh_recv) 2017#endif 2018 2019 ! calculate the trace of S to make sure we are occupying the right number of eigenstates in our 2020 ! solution 2021#ifdef MPI 2022 Tr_loc=ddot(N_occ_loc(1,ispin)*N_occ_loc(2,ispin),twoI(ispin)%mtrx-S(ispin)%mtrx,1,S(ispin)%mtrx,1) 2023 call mpi_allreduce(Tr_loc,TrQS,1,mpi_double_precision,mpi_sum,mpi_comm_world,info) 2024#else 2025 TrQS=ddot(N_occ*N_occ,twoI(ispin)%mtrx-S(ispin)%mtrx,1,S(ispin)%mtrx,1) 2026#endif 2027 if (Node==0) then 2028 if (nspin==1) then 2029 print'(a,i5,a)', '| minim: icg = ', icg, ' |' 2030 print'(a,f13.7,a)', '| minim: 2*Tr[(2*I-S)*S] = ', 2.0_dp*TrQS, ' |' 2031 else 2032 print'(a,i5,a)', '| minim: icg = ', icg, ' |' 2033 print'(a,f13.7,a)', '| minim: Tr[(2*I-S)*S] = ', TrQS, ' |' 2034 end if 2035 print'(a)', '+---------------------------------------------+' 2036 end if 2037 2038 if (FirstCall(ispin)) FirstCall(ispin)=.false. 2039 2040 call timer('m_cg',2) 2041 2042end subroutine minim_cg_sparse 2043 2044!================================================! 2045! calculate the coeffs. of the quartic line ! 2046! search equation from three energy points and ! 2047! two gradient points ! 2048!================================================! 2049subroutine fit_quartic(x,y,g,c) 2050 implicit none 2051 2052 !**** INPUT ***********************************! 2053 2054 real(dp), intent(in) :: x(1:3) ! three x-points {x_i} 2055 real(dp), intent(in) :: y(1:3) ! y(x_i) at the three points 2056 real(dp), intent(in) :: g(1:2) ! (dy/dx)|x_i at the three points 2057 2058 !**** OUTPUT **********************************! 2059 2060 real(dp), intent(out) :: c(0:4) ! coeffs. of the quartic equation 2061 2062 !**********************************************! 2063 2064 !call timer('m_solve_quartic',1) 2065 2066 ! the following expressions for the coeffs. were produced automatically using Maple 12 2067 c(4)=(x(3)**3*x(2)*g(1)-3*x(1)*x(2)**2*y(1)+3*y(3)*x(1)*x(2)**2+x(1)**2*x(2)**2*g(1)+x(3)*x(2)**3*& 2068 g(1)+2*x(1)**2*x(3)**2*g(2)-3*x(2)*x(3)**2*y(1)+3*y(2)*x(1)**2*x(2)-x(3)**3*x(1)*g(1)+x(3)**3*& 2069 x(2)*g(2)-x(2)**2*x(3)**2*g(2)-x(1)**2*x(2)**2*g(2)-2*x(2)**2*x(3)**2*g(1)+3*x(2)*x(3)**2*& 2070 y(2)+x(1)**2*x(3)**2*g(1)-x(1)*x(2)**3*g(1)-3*x(1)*x(3)**2*y(1)-x(3)**3*x(1)*g(2)+3*x(1)*& 2071 x(3)**2*y(2)+x(2)*g(2)*x(1)**3-3*y(3)*x(1)**2*x(2)-x(3)*g(2)*x(1)**3+6*x(1)*x(3)*x(2)*y(1)+2*& 2072 x(1)*x(3)*x(2)**2*g(2)+x(1)*x(3)**2*x(2)*g(1)-x(1)*x(3)**2*x(2)*g(2)-2*x(1)**2*x(3)*x(2)*g(1)-& 2073 x(1)**2*x(3)*x(2)*g(2)+x(1)*x(3)*x(2)**2*g(1)-6*x(1)*x(3)*x(2)*y(2)+2*x(3)**3*y(1)-2*x(3)**3*& 2074 y(2)+x(2)**3*y(1)+y(3)*x(1)**3-y(3)*x(2)**3-y(2)*x(1)**3)/(-2*x(3)**3*x(1)**4+x(3)**4*x(1)**3-& 2075 x(1)**2*x(2)**5-x(3)**4*x(2)**3-x(2)**5*x(3)**2-3*x(1)**4*x(2)**3+2*x(3)**3*x(2)**4+x(1)**5*& 2076 x(3)**2+3*x(2)**4*x(1)**3+4*x(3)**3*x(2)*x(1)**3-4*x(3)**3*x(1)*x(2)**3+2*x(1)*x(3)*x(2)**5+4*& 2077 x(1)**4*x(3)*x(2)**2+8*x(1)**2*x(3)**2*x(2)**3+x(1)**4*x(3)**2*x(2)-x(1)*x(3)**2*x(2)**4-2*& 2078 x(1)**5*x(3)*x(2)-4*x(1)**2*x(3)*x(2)**4+x(1)**5*x(2)**2-8*x(2)**2*x(3)**2*x(1)**3-3*x(3)**4*& 2079 x(1)**2*x(2)+3*x(3)**4*x(1)*x(2)**2) 2080 2081 c(3)=-(-x(1)*g(1)+2*c(4)*x(1)**4-x(1)*g(2)+4*x(1)*c(4)*x(2)**3+x(2)*g(1)-4*x(2)*c(4)*x(1)**3+x(2)*& 2082 g(2)-2*c(4)*x(2)**4+2*y(1)-2*y(2))/(x(1)**3+3*x(1)*x(2)**2-3*x(2)*x(1)**2-x(2)**3) 2083 2084 c(2)=-(-y(2)+c(4)*x(2)**4+c(3)*x(2)**3+x(2)*g(1)-4*x(2)*c(4)*x(1)**3-3*x(2)*c(3)*x(1)**2+y(1)+3*& 2085 c(4)*x(1)**4+2*c(3)*x(1)**3-x(1)*g(1))/(x(1)**2-2*x(1)*x(2)+x(2)**2) 2086 2087 c(1)=g(1)-4*c(4)*x(1)**3-3*c(3)*x(1)**2-2*c(2)*x(1) 2088 2089 c(0)=y(1)-c(4)*x(1)**4-c(3)*x(1)**3-c(2)*x(1)**2-c(1)*x(1) 2090 2091 !if (Node==0) print*, 'f(x)=',c(4),'*x**4+',c(3),'*x**3+',c(2),'*x**2+',c(1),'*x+',c(0) 2092 2093 !call timer('m_fit_quartic',2) 2094 2095end subroutine fit_quartic 2096 2097!================================================! 2098! find the minimum for the quartic line search ! 2099! equation ! 2100!================================================! 2101subroutine solve_quartic(c,x_min,fail) 2102 implicit none 2103 2104 !**** INPUT ***********************************! 2105 2106 real(dp), intent(in) :: c(0:4) ! coeffs. of the quartic equation 2107 2108 !**** OUTPUT **********************************! 2109 2110 logical, intent(out) :: fail ! did we fail to find a minimum? 2111 2112 real(dp), intent(out) :: x_min ! position of minimum 2113 2114 !**** LOCAL ***********************************! 2115 2116 integer :: i 2117 integer :: x_order(1:3) 2118 2119 real(dp) :: t(1:3) 2120 real(dp) :: z(1:3) 2121 real(dp) :: a 2122 real(dp) :: b 2123 real(dp) :: d 2124 real(dp) :: Q 2125 real(dp) :: R 2126 real(dp) :: theta 2127 real(dp) :: S 2128 real(dp) :: U 2129 2130 !**********************************************! 2131 2132 !call timer('m_solve_quartic',1) 2133 2134 fail=.false. 2135 2136 !if (c(4)<0.0_dp) then 2137 ! if (Node==0) print*, '#WARNING: Function is unbounded!' 2138 ! !stop 2139 !end if 2140 2141 ! in order to find the minimum of the quartic equation, we have to solve a cubic equation; the 2142 ! following method is taken from Numerical Recipes 2143 a=3.0_dp*c(3)/(4.0_dp*c(4)) 2144 b=2.0_dp*c(2)/(4.0_dp*c(4)) 2145 if ((abs(b)>=1.0d11) .or. (abs(c(4))<=1.0d-11)) then 2146 !if (Node==0) print*, '#WARNING: Function is quadratic!' 2147 x_min=-0.5_dp*c(1)/c(2) 2148 return 2149 end if 2150 d=c(1)/(4.0_dp*c(4)) 2151 2152 Q=(a**2-3.0_dp*b)/9.0_dp 2153 R=(2.0_dp*a**3-9.0_dp*a*b+27.0_dp*d)/54.0_dp 2154 if (R**2<Q**3) then 2155 theta=acos(R/sqrt(Q**3)) 2156 t(1)=-2.0_dp*sqrt(Q)*cos(theta/3.0_dp)-a/3.0_dp 2157 t(2)=-2.0_dp*sqrt(Q)*cos((theta+2.0_dp*Pi)/3.0_dp)-a/3.0_dp 2158 t(3)=-2.0_dp*sqrt(Q)*cos((theta-2.0_dp*Pi)/3.0_dp)-a/3.0_dp 2159 z(1:3)=c(4)*t(1:3)**4+c(3)*t(1:3)**3+c(2)*t(1:3)**2+c(1)*t(1:3)+c(0) 2160 if (c(4)>0.0_dp) then 2161 if (all(z(1)>=z(2:3))) then 2162 x_order(1:3)=(/1,2,3/) 2163 else if (z(2)>z(3)) then 2164 x_order(1:3)=(/2,3,1/) 2165 else 2166 x_order(1:3)=(/3,1,2/) 2167 end if 2168 if ((0.0_dp<=t(x_order(1))) .and. (t(x_order(2))<=t(x_order(1)))) then 2169 x_min=t(x_order(2)) 2170 else 2171 x_min=t(x_order(3)) 2172 end if 2173 else 2174 if (all(z(1)<=z(2:3))) then 2175 x_min=t(1) 2176 else if (z(2)<z(3)) then 2177 x_min=t(2) 2178 else 2179 x_min=t(3) 2180 end if 2181 end if 2182 else 2183 S=-sign(1.0_dp,R)*(abs(R)+sqrt(R**2-Q**3))**(1.0_dp/3.0_dp) 2184 if (S==0.0_dp) then 2185 U=0.0_dp 2186 else 2187 U=Q/S 2188 end if 2189 x_min=(S+U)-(a/3.0_dp) 2190 if (c(4)<0.0_dp) fail=.true. 2191 end if 2192 2193 !call timer('m_solve_quartic',2) 2194 2195end subroutine solve_quartic 2196 2197!================================================! 2198! calculate the gradient of the Kim functional: ! 2199! g=2*(2*h*C-s*C*H-h*C*S) ! 2200!================================================! 2201subroutine calc_grad(h_dim,N_occ,ispin,H,S,grad,hc,sc) 2202 implicit none 2203 2204 !**** INPUT ***********************************! 2205 2206 integer, intent(in) :: h_dim ! num. of AOs (global) 2207 integer, intent(in) :: N_occ ! num. of WFs (global) 2208 integer, intent(in) :: ispin ! up/down spin (1/2) 2209 2210 real(dp), intent(in) :: H(:,:) ! hamiltonian matrix in WF basis 2211 real(dp), intent(in) :: S(:,:) ! overlap matrix in WF basis 2212 2213 !**** INOUT ***********************************! 2214 2215 real(dp), intent(inout) :: grad(:,:) ! gradient of Kim functional 2216 real(dp), intent(inout) :: hc(:,:) ! h*c 2217 real(dp), intent(inout) :: sc(:,:) ! s*c 2218 2219 !**********************************************! 2220 2221 call timer('m_calc_grad',1) 2222 2223 grad=4.0_dp*hc 2224 2225#ifdef MPI 2226 call pdgemm('N','N',N_occ,h_dim,N_occ,-2.0_dp,S,1,1,desc2(1:9,ispin),hc,1,1,desc3(1:9,ispin),1.0_dp,grad,1,1,desc3(1:9,ispin)) 2227 call pdgemm('N','N',N_occ,h_dim,N_occ,-2.0_dp,H,1,1,desc2(1:9,ispin),sc,1,1,desc3(1:9,ispin),1.0_dp,grad,1,1,desc3(1:9,ispin)) 2228#else 2229 call dgemm('N','N',N_occ,h_dim,N_occ,-2.0_dp,S,N_occ,hc,N_occ,1.0_dp,grad,N_occ) 2230 call dgemm('N','N',N_occ,h_dim,N_occ,-2.0_dp,H,N_occ,sc,N_occ,1.0_dp,grad,N_occ) 2231#endif 2232 2233 call timer('m_calc_grad',2) 2234 2235end subroutine calc_grad 2236 2237!================================================! 2238! calculate operator matrix in WF basis (dense ! 2239! routine) ! 2240!================================================! 2241subroutine calc_A(h_dim,N_occ,ispin,Ap,c,A,Apc) 2242 implicit none 2243 2244 !**** INPUT ***********************************! 2245 2246 integer, intent(in) :: h_dim ! num. of AOs (global) 2247 integer, intent(in) :: N_occ ! num. of WFs (global) 2248 integer, intent(in) :: ispin ! up/down spin (1/2) 2249 2250 real(dp), intent(in) :: Ap(:,:) ! operator matrix in AO basis 2251 real(dp), intent(in) :: c(:,:) ! WF coeffs. matrix 2252 2253 !**** INOUT ***********************************! 2254 2255 real(dp), intent(inout) :: A(:,:) ! operator matrix in WF basis 2256 real(dp), intent(inout) :: Apc(:,:) ! work matrix 2257 2258 !**********************************************! 2259 2260 call timer('m_calc_A',1) 2261 2262#ifdef MPI 2263 call pdgemm('N','N',N_occ,h_dim,h_dim,1.0_dp,c, 1,1,desc3(1:9,ispin),Ap,1,1,desc1, 0.0_dp,Apc,1,1,desc3(1:9,ispin)) 2264 call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,Apc,1,1,desc3(1:9,ispin),c, 1,1,desc3(1:9,ispin),0.0_dp,A, 1,1,desc2(1:9,ispin)) 2265#else 2266 call dgemm('N','N',N_occ,h_dim,h_dim,1.0_dp,c, N_occ,Ap,h_dim,0.0_dp,Apc,N_occ) 2267 call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,Apc,N_occ,c, N_occ,0.0_dp,A, N_occ) 2268#endif 2269 2270 call timer('m_calc_A',2) 2271 2272end subroutine calc_A 2273 2274!================================================! 2275! calculate operator matrix in WF basis (sparse ! 2276! routine) ! 2277!================================================! 2278subroutine calc_A_sparse(h_dim,N_occ,ispin,nhmax,numh,listhptr,listh,As,c,A,Asc) 2279 implicit none 2280 2281 !**** INPUT ***********************************! 2282 2283 integer, intent(in) :: h_dim ! num. of AOs (global) 2284 integer, intent(in) :: N_occ ! num. of WFs (global) 2285 integer, intent(in) :: ispin ! up/down spin (1/2) 2286 integer, intent(in) :: nhmax ! first dimension of listh and sparse matrices 2287 integer, intent(in) :: numh(:) ! num. of nonzero elements of each row of sparse matrices 2288 integer, intent(in) :: listhptr(:) ! pointer to start of row in listh 2289 integer, intent(in) :: listh(:) ! list of nonzero elements of each row of sparse matrices 2290 2291 real(dp), intent(in) :: As(:) ! operator matrix in AO basis (sparse) 2292 real(dp), intent(in) :: c(:,:) ! WF coeffs. matrix 2293 2294 !**** INOUT ***********************************! 2295 2296 real(dp), intent(inout) :: A(:,:) ! operator matrix in WF basis 2297 real(dp), intent(inout) :: Asc(:,:) ! work matrix 2298 2299 !**** LOCAL ***********************************! 2300 2301 integer :: i 2302 integer :: j 2303 integer :: l 2304#ifdef MPI 2305 integer :: n_comm 2306 integer :: info 2307 integer :: h_dim_loc_recv 2308 integer :: nhmax_recv 2309#endif 2310 2311 !**********************************************! 2312 2313 call timer('m_calc_A',1) 2314 2315#ifdef MPI 2316 Asc=0.0_dp 2317 do n_comm=0,Nodes-1 2318 if (n_comm==Node) then 2319 h_dim_loc_recv=h_dim_loc_1D 2320 nhmax_recv=nhmax 2321 numh_recv(1:h_dim_loc_1D)=numh(1:h_dim_loc_1D) 2322 listhptr_recv(1:h_dim_loc_1D)=listhptr(1:h_dim_loc_1D) 2323 listh_recv(1:nhmax)=listh(1:nhmax) 2324 As_recv(1:nhmax)=As(1:nhmax) 2325 h_dim_l2g_recv(1:h_dim_loc_1D)=h_dim_l2g(1:h_dim_loc_1D) 2326 end if 2327 call mpi_bcast(h_dim_loc_recv, 1, mpi_integer, n_comm,mpi_comm_world,info) 2328 call mpi_bcast(nhmax_recv, 1, mpi_integer, n_comm,mpi_comm_world,info) 2329 call mpi_bcast(numh_recv(1), h_dim_loc_recv, mpi_integer, n_comm,mpi_comm_world,info) 2330 call mpi_bcast(listhptr_recv(1), h_dim_loc_recv, mpi_integer, n_comm,mpi_comm_world,info) 2331 call mpi_bcast(listh_recv(1), nhmax_recv, mpi_integer, n_comm,mpi_comm_world,info) 2332 call mpi_bcast(As_recv(1), nhmax_recv, mpi_double_precision,n_comm,mpi_comm_world,info) 2333 call mpi_bcast(h_dim_l2g_recv(1),h_dim_loc_recv, mpi_integer, n_comm,mpi_comm_world,info) 2334 if (n_comm==Node) then 2335 do i=1,h_dim_loc_1D 2336 do j=1,numh(i) 2337 l=listhptr(i)+j 2338 Asc(:,h_dim_l2g(i))=Asc(:,h_dim_l2g(i))+As(l)*c(:,listh(l)) 2339 end do 2340 end do 2341 else 2342 do i=1,h_dim_loc_recv 2343 do j=1,numh_recv(i) 2344 l=listhptr_recv(i)+j 2345 Asc(:,h_dim_l2g_recv(i))=Asc(:,h_dim_l2g_recv(i))+As_recv(l)*c(:,listh_recv(l)) 2346 end do 2347 end do 2348 end if 2349 end do 2350 call pdgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,Asc,1,1,desc3(1:9,ispin),c,1,1,desc3(1:9,ispin),0.0_dp,A,1,1,desc2(1:9,ispin)) 2351#else 2352 Asc=0.0_dp 2353 do i=1,h_dim 2354 do j=1,numh(i) 2355 l=listhptr(i)+j 2356 Asc(:,i)=Asc(:,i)+As(l)*c(:,listh(l)) 2357 end do 2358 end do 2359 call dgemm('N','T',N_occ,N_occ,h_dim,1.0_dp,Asc,N_occ,c,N_occ,0.0_dp,A,N_occ) 2360#endif 2361 2362 call timer('m_calc_A',2) 2363 2364end subroutine calc_A_sparse 2365 2366!================================================! 2367! calculate operator matrix in AO basis (dense ! 2368! routine) ! 2369!================================================! 2370subroutine calc_densmat(h_dim,N_occ,ispin,A,c1,Ap,cA,c2) 2371 implicit none 2372 2373 !**** INPUT ***********************************! 2374 2375 integer, intent(in) :: h_dim ! num. of AOs (global) 2376 integer, intent(in) :: N_occ ! num. of WFs (global) 2377 integer, intent(in) :: ispin ! up/down spin (1/2) 2378 2379 real(dp), intent(in) :: A(:,:) ! Operator matrix in WF basis 2380 real(dp), intent(in) :: c1(:,:) ! WF coeffs. matrix 2381 real(dp), intent(in), optional :: c2(:,:) ! pre-multiplied WF coeffs. matrix 2382 2383 !**** INOUT ***********************************! 2384 2385 real(dp), intent(inout) :: Ap(:,:) ! operator matrix in orbital basis 2386 real(dp), intent(inout) :: cA(:,:) ! work matrix 2387 2388 !**********************************************! 2389 2390 call timer('m_calc_densmat',1) 2391 2392#ifdef MPI 2393 if (present(c2)) then 2394 call pdgemm('N','N',N_occ,h_dim,N_occ,1.0_dp,A,1,1,desc2(1:9,ispin),c2,1,1,desc3(1:9,ispin),0.0_dp,cA,1,1,desc3(1:9,ispin)) 2395 else 2396 call pdgemm('N','N',N_occ,h_dim,N_occ,1.0_dp,A,1,1,desc2(1:9,ispin),c1,1,1,desc3(1:9,ispin),0.0_dp,cA,1,1,desc3(1:9,ispin)) 2397 end if 2398 call pdgemm('T','N',h_dim,h_dim,N_occ,1.0_dp,c1,1,1,desc3(1:9,ispin),cA,1,1,desc3(1:9,ispin),0.0_dp,Ap,1,1,desc1) 2399#else 2400 if (present(c2)) then 2401 call dgemm('N','N',N_occ,h_dim,N_occ,1.0_dp,A,N_occ,c2,N_occ,0.0_dp,cA,N_occ) 2402 else 2403 call dgemm('N','N',N_occ,h_dim,N_occ,1.0_dp,A,N_occ,c1,N_occ,0.0_dp,cA,N_occ) 2404 end if 2405 call dgemm('T','N',h_dim,h_dim,N_occ,1.0_dp,c1,N_occ,cA,N_occ,0.0_dp,Ap,h_dim) 2406#endif 2407 2408 call timer('m_calc_densmat',2) 2409 2410end subroutine calc_densmat 2411 2412!================================================! 2413! calculate operator matrix in AO basis (sparse ! 2414! routine) ! 2415!================================================! 2416subroutine calc_densmat_sparse(h_dim,N_occ,ispin,nhmax,numh,listhptr,listh,A,c1,As,cA,c2) 2417 implicit none 2418 2419 !**** INPUT ***********************************! 2420 2421 integer, intent(in) :: h_dim ! num. of AOs (global) 2422 integer, intent(in) :: N_occ ! num. of WFs (global) 2423 integer, intent(in) :: ispin ! up/down spin (1/2) 2424 integer, intent(in) :: nhmax ! first dimension of listh and sparse matrices 2425 integer, intent(in) :: numh(:) ! num. of nonzero elements of each row of sparse matrices 2426 integer, intent(in) :: listhptr(:) ! pointer to start of row in listh 2427 integer, intent(in) :: listh(:) ! list of nonzero elements of each row of sparse matrices 2428 2429 real(dp), intent(in) :: A(:,:) ! operator matrix in WF basis 2430 real(dp), intent(in) :: c1(:,:) ! WF coeffs. matrix 2431 real(dp), intent(in), optional :: c2(:,:) ! pre-multiplied WF coeffs. matrix 2432 2433 !**** OUTPUT **********************************! 2434 2435 real(dp), intent(out) :: As(:) ! operator matrix in AO basis (sparse) 2436 2437 !**** INOUT ***********************************! 2438 2439 real(dp), intent(inout) :: cA(:,:) ! work matrix 2440 2441 !**** LOCAL ***********************************! 2442 2443 integer :: i 2444 integer :: j 2445 integer :: k 2446 integer :: l 2447#ifdef MPI 2448 integer :: n_comm 2449 integer :: info 2450 integer :: h_dim_loc_recv 2451 integer :: nhmax_recv 2452#endif 2453 2454 !**********************************************! 2455 2456 call timer('m_calc_densmat',1) 2457 2458#ifdef MPI 2459 if (present(c2)) then 2460 call pdgemm('N','N',N_occ,h_dim,N_occ,1.0_dp,A,1,1,desc2(1:9,ispin),c2,1,1,desc3(1:9,ispin),0.0_dp,cA,1,1,desc3(1:9,ispin)) 2461 else 2462 call pdgemm('N','N',N_occ,h_dim,N_occ,1.0_dp,A,1,1,desc2(1:9,ispin),c1,1,1,desc3(1:9,ispin),0.0_dp,cA,1,1,desc3(1:9,ispin)) 2463 end if 2464 do n_comm=0,Nodes-1 2465 if (n_comm==Node) then 2466 h_dim_loc_recv=h_dim_loc_1D 2467 nhmax_recv=nhmax 2468 numh_recv(1:h_dim_loc_1D)=numh(1:h_dim_loc_1D) 2469 listhptr_recv(1:h_dim_loc_1D)=listhptr(1:h_dim_loc_1D) 2470 listh_recv(1:nhmax)=listh(1:nhmax) 2471 h_dim_l2g_recv(1:h_dim_loc_1D)=h_dim_l2g(1:h_dim_loc_1D) 2472 end if 2473 call mpi_bcast(h_dim_loc_recv, 1, mpi_integer,n_comm,mpi_comm_world,info) 2474 call mpi_bcast(nhmax_recv, 1, mpi_integer,n_comm,mpi_comm_world,info) 2475 call mpi_bcast(numh_recv(1), h_dim_loc_recv, mpi_integer,n_comm,mpi_comm_world,info) 2476 call mpi_bcast(listhptr_recv(1), h_dim_loc_recv, mpi_integer,n_comm,mpi_comm_world,info) 2477 call mpi_bcast(listh_recv(1), nhmax_recv, mpi_integer,n_comm,mpi_comm_world,info) 2478 call mpi_bcast(h_dim_l2g_recv(1),h_dim_loc_recv, mpi_integer,n_comm,mpi_comm_world,info) 2479 As_recv=0.0_dp 2480 do i=1,h_dim_loc_recv 2481 do j=1,numh_recv(i) 2482 l=listhptr_recv(i)+j 2483 do k=1,N_occ_loc_1D(ispin) 2484 As_recv(l)=As_recv(l)+cA(k,listh_recv(l))*c1(k,h_dim_l2g_recv(i)) 2485 end do 2486 end do 2487 end do 2488 call mpi_reduce(As_recv(1),As(1),nhmax_recv,mpi_double_precision,mpi_sum,n_comm,mpi_comm_world,info) 2489 end do 2490#else 2491 if (present(c2)) then 2492 call dgemm('N','N',N_occ,h_dim,N_occ,1.0_dp,A,N_occ,c2,N_occ,0.0_dp,cA,N_occ) 2493 else 2494 call dgemm('N','N',N_occ,h_dim,N_occ,1.0_dp,A,N_occ,c1,N_occ,0.0_dp,cA,N_occ) 2495 end if 2496 As=0.0_dp 2497 do i=1,h_dim 2498 do j=1,numh(i) 2499 l=listhptr(i)+j 2500 do k=1,N_occ 2501 As(l)=As(l)+cA(k,listh(l))*c1(k,i) 2502 end do 2503 end do 2504 end do 2505#endif 2506 2507 call timer('m_calc_densmat',2) 2508 2509end subroutine calc_densmat_sparse 2510 2511!================================================! 2512! calculate coeffs. of the quartic line search ! 2513! equation using analytical expressions ! 2514!================================================! 2515subroutine calc_coeff(h_dim,N_occ,ispin,H,S,Hd,Sd,Hdd,Sdd,coeff,SdT) 2516 implicit none 2517 2518 !**** INPUT ***********************************! 2519 2520 integer, intent(in) :: h_dim ! num. of AOs (global) 2521 integer, intent(in) :: N_occ ! num. of WFs (global) 2522 integer, intent(in) :: ispin ! up/down spin (1/2) 2523 2524 real(dp), intent(in) :: H(:,:) ! hamiltonian matrix in WF basis 2525 real(dp), intent(in) :: S(:,:) ! overlap matrix in WF basis 2526 real(dp), intent(in) :: Hd(:,:) ! g^T*h*c 2527 real(dp), intent(in) :: Sd(:,:) ! g^T*s*c 2528 real(dp), intent(in) :: Hdd(:,:) ! g^T*h*g 2529 real(dp), intent(in) :: Sdd(:,:) ! g^T*h*g 2530 2531 !**** INOUT ***********************************! 2532 2533 real(dp), intent(inout) :: coeff(0:4) ! coeffs. of the quartic equation 2534 real(dp), intent(inout) :: SdT(:,:) ! work matrix 2535 2536 !**** LOCAL ***********************************! 2537 2538#ifdef MPI 2539 integer :: info 2540#else 2541 integer :: i, j 2542#endif 2543 2544 real(dp) :: Tr_loc 2545 real(dp) :: TrH 2546 real(dp) :: TrHS 2547 real(dp) :: TrHd 2548 real(dp) :: TrHdS 2549 real(dp) :: TrHSd 2550 real(dp) :: TrHdd 2551 real(dp) :: TrHddS 2552 real(dp) :: TrHSdd 2553 real(dp) :: TrHdSd 2554 real(dp) :: TrHdSdT 2555 real(dp) :: TrHddSd 2556 real(dp) :: TrHdSdd 2557 real(dp) :: TrHddSdd 2558 real(dp), external :: ddot 2559#ifdef MPI 2560 real(dp), external :: pdlatra 2561#endif 2562 2563 !**********************************************! 2564 2565 call timer('m_calc_coeff',1) 2566 2567#ifdef MPI 2568 TrH=pdlatra(N_occ,H,1,1,desc2(1:9,ispin)) 2569 Tr_loc=ddot(N_occ_loc(1,ispin)*N_occ_loc(2,ispin),H,1,S,1) 2570 call mpi_allreduce(Tr_loc,TrHS,1,mpi_double_precision,mpi_sum,mpi_comm_world,info) 2571 2572 TrHd=pdlatra(N_occ,Hd,1,1,desc2(1:9,ispin)) 2573 Tr_loc=ddot(N_occ_loc(1,ispin)*N_occ_loc(2,ispin),Hd,1,S,1) 2574 call mpi_allreduce(Tr_loc,TrHdS,1,mpi_double_precision,mpi_sum,mpi_comm_world,info) 2575 Tr_loc=ddot(N_occ_loc(1,ispin)*N_occ_loc(2,ispin),H,1,Sd,1) 2576 call mpi_allreduce(Tr_loc,TrHSd,1,mpi_double_precision,mpi_sum,mpi_comm_world,info) 2577 2578 TrHdd=pdlatra(N_occ,Hdd,1,1,desc2(1:9,ispin)) 2579 Tr_loc=ddot(N_occ_loc(1,ispin)*N_occ_loc(2,ispin),Hdd,1,S,1) 2580 call mpi_allreduce(Tr_loc,TrHddS,1,mpi_double_precision,mpi_sum,mpi_comm_world,info) 2581 Tr_loc=ddot(N_occ_loc(1,ispin)*N_occ_loc(2,ispin),H,1,Sdd,1) 2582 call mpi_allreduce(Tr_loc,TrHSdd,1,mpi_double_precision,mpi_sum,mpi_comm_world,info) 2583 call pdtran(N_occ,N_occ,1.0_dp,Sd,1,1,desc2(1:9,ispin),0.0_dp,SdT,1,1,desc2(1:9,ispin)) 2584 Tr_loc=ddot(N_occ_loc(1,ispin)*N_occ_loc(2,ispin),Hd,1,SdT,1) 2585 call mpi_allreduce(Tr_loc,TrHdSd,1,mpi_double_precision,mpi_sum,mpi_comm_world,info) 2586 Tr_loc=ddot(N_occ_loc(1,ispin)*N_occ_loc(2,ispin),Hd,1,Sd,1) 2587 call mpi_allreduce(Tr_loc,TrHdSdT,1,mpi_double_precision,mpi_sum,mpi_comm_world,info) 2588 2589 Tr_loc=ddot(N_occ_loc(1,ispin)*N_occ_loc(2,ispin),Hdd,1,Sd,1) 2590 call mpi_allreduce(Tr_loc,TrHddSd,1,mpi_double_precision,mpi_sum,mpi_comm_world,info) 2591 Tr_loc=ddot(N_occ_loc(1,ispin)*N_occ_loc(2,ispin),Hd,1,Sdd,1) 2592 call mpi_allreduce(Tr_loc,TrHdSdd,1,mpi_double_precision,mpi_sum,mpi_comm_world,info) 2593 2594 Tr_loc=ddot(N_occ_loc(1,ispin)*N_occ_loc(2,ispin),Hdd,1,Sdd,1) 2595 call mpi_allreduce(Tr_loc,TrHddSdd,1,mpi_double_precision,mpi_sum,mpi_comm_world,info) 2596#else 2597 TrH=0.0_dp 2598 do i=1,N_occ 2599 TrH=TrH+H(i,i) 2600 end do 2601 TrHS=ddot(N_occ*N_occ,H,1,S,1) 2602 2603 TrHd=0.0_dp 2604 do i=1,N_occ 2605 TrHd=TrHd+Hd(i,i) 2606 end do 2607 TrHdS=ddot(N_occ*N_occ,Hd,1,S,1) 2608 TrHSd=ddot(N_occ*N_occ,H,1,Sd,1) 2609 2610 TrHdd=0.0_dp 2611 do i=1,N_occ 2612 TrHdd=TrHdd+Hdd(i,i) 2613 end do 2614 TrHddS=ddot(N_occ*N_occ,Hdd,1,S,1) 2615 TrHSdd=ddot(N_occ*N_occ,H,1,Sdd,1) 2616 do i=1,N_occ 2617 do j=1,N_occ 2618 SdT(j,i)=Sd(i,j) 2619 end do 2620 end do 2621 TrHdSd=ddot(N_occ*N_occ,Hd,1,SdT,1) 2622 TrHdSdT=ddot(N_occ*N_occ,Hd,1,Sd,1) 2623 2624 TrHddSd=ddot(N_occ*N_occ,Hdd,1,Sd,1) 2625 TrHdSdd=ddot(N_occ*N_occ,Hd,1,Sdd,1) 2626 2627 TrHddSdd=ddot(N_occ*N_occ,Hdd,1,Sdd,1) 2628#endif 2629 2630 coeff(0)=2.0_dp*TrH-TrHS 2631 coeff(1)=2.0_dp*(2.0_dp*TrHd-TrHdS-TrHSd) 2632 coeff(2)=2.0_dp*(TrHdd-TrHdSd-TrHdSdT)-TrHddS-TrHSdd 2633 coeff(3)=-2.0_dp*(TrHddSd+TrHdSdd) 2634 coeff(4)=-TrHddSdd 2635 2636 call timer('m_calc_coeff',2) 2637 2638end subroutine calc_coeff 2639 2640!================================================! 2641! random number generator ! 2642! -initialize with: ! 2643! call rand_init() ! 2644! -generate new number with: ! 2645! call random_numer(rn) ! 2646! where where rn is a real(dp) variable ! 2647!================================================! 2648subroutine rand_init 2649 implicit none 2650 2651 !**** LOCAL ***********************************! 2652 2653 character(10) :: system_time 2654 2655 integer :: i 2656 integer :: rand_size 2657 integer, allocatable :: rand_seed(:) 2658 2659 real(dp) :: rtime 2660 real(dp) :: rn 2661 2662 !**********************************************! 2663 2664 call random_seed(size=rand_size) 2665 allocate(rand_seed(1:rand_size)) 2666 call date_and_time(time=system_time) 2667 read (system_time,*) rtime 2668 !rand_seed=(Node+1)*int(rtime*1000.0_dp) 2669 rand_seed=(Node+1)*123456 2670 call random_seed(put=rand_seed) 2671 deallocate(rand_seed) 2672 2673 do i=1,10000 2674 call random_number(rn) 2675 end do 2676 2677end subroutine rand_init 2678 2679end module m_dminim 2680