1! --- 2! Copyright (C) 1996-2016 The SIESTA group 3! This file is distributed under the terms of the 4! GNU General Public License: see COPYING in the top directory 5! or http://www.gnu.org/copyleft/gpl.txt . 6! See Docs/Contributors.txt for a list of contributors. 7! --- 8 subroutine overkkneig( kvector, bvector, nuo, nuotot, psiatk, psinei, & 9 & maxnh, delkmat, nbandsocc_loc, nbandsocc, Mkb ) 10 11 12 use precision, only: dp ! Real double precision type 13 use parallel, only: Nodes ! Total number of Nodes 14 use parallel, only: Node ! Local Node 15 use parallel, only: IONode ! Input/output node 16 use atomlist, only: iaorb ! Pointer to atom to which 17 ! orbital belongs 18 use atomlist, only: indxuo ! Index of equivalent orbital 19 ! in unit cell 20 use sparse_matrices, only: numh ! Number of nonzero element of 21 ! each row of the 22 ! hamiltonian matrix 23 use sparse_matrices, only: listh ! Nonzero hamiltonian-matrix 24 ! elements 25 use sparse_matrices, only: listhptr ! Pointer to start of each row 26 ! of the hamiltonian matrix 27 use sparse_matrices, only: xijo ! Vectors between orbital 28 ! centers (sparse) 29 use siesta_geom, only: xa ! Atomic positions 30 use alloc, only: re_alloc ! Allocatation routines 31 use alloc, only: de_alloc ! Deallocatation routines 32 33#ifdef MPI 34 use parallelsubs, only: LocalToGlobalOrb ! Converts an orbital index 35 ! in the local frame 36 ! to the global frame 37 use parallelsubs, only: GetNodeOrbs ! Calculates the number of 38 ! orbitals stored on the 39 ! local Node. 40 use m_orderbands, only: which_band_in_node ! Given a node and a 41 ! local index, 42 ! this array gives the 43 ! global index of the band 44 ! stored there 45 use m_orderbands, only: sequential_index_included_bands 46 ! Sequential number of the 47 ! bands included for 48 ! wannierization 49 ! (the bands are listed 50 ! in order of incremental 51 ! energy) 52 53 use mpi_siesta 54#endif 55 56 implicit none 57 58 real(dp), intent(in) :: kvector(3) ! Wave vector for which the 59 ! Overlap matrix between 60 ! the periodic part of the 61 ! wave functions will be 62 ! computed 63 real(dp), intent(in) :: bvector(3) ! Vector that connects a 64 ! given k-point with its 65 ! neighbour (in Bohr^-1) 66 integer, intent(in) :: nuo ! Number of orbitals in local 67 ! node 68 ! NOTE: Running in parallel 69 ! this is core dependent 70 ! Sum_{cores} no_l = no_u 71 integer, intent(in) :: nuotot ! Number of orbitals in the 72 ! unit cell. 73 complex(dp), intent(in) :: psiatk(nuotot,nuo) ! Coefficients of the 74 ! eigenvector at the k-point 75 ! First index: orbital 76 ! Second index: band 77 complex(dp), intent(in) :: psinei(nuotot,nuo) ! Coefficients of the 78 ! eigenvector at the 79 ! neighbour k-point 80 ! First index: orbital 81 ! Second index: band 82! Note that, when running in parallel, the information known by each local node 83! for psiatk, and psinei is: each local node knows all the coefficients 84! for only a few nuo bands. 85 integer, intent(in) :: maxnh ! Maximum number of orbitals 86 ! interacting 87 ! NOTE: In parallel runs, 88 ! maxnh changes from node to 89 ! node 90 complex(dp), intent(in) :: delkmat(maxnh) ! Matrix elements of a plane 91 ! wave 92 integer, intent(in) :: nbandsocc_loc ! Number of ocuppied bands 93 ! in the local node 94 integer, intent(in) :: nbandsocc ! Number of occupied bands 95 complex(dp), intent(out) :: Mkb(nbandsocc,nbandsocc) 96 ! Overlap matrix between the 97 ! the periodic part of the 98 99! Internal variables 100 integer :: imu ! Counter for the loop on orbit 101 integer :: inu ! Counter for the loop on orbit 102 integer :: nband ! Counter for the loop on bands 103 integer :: mband ! Counter for the loop on bands 104 integer :: jneig ! Counter for the loop on neigb 105 integer :: ia ! Atomic index 106 integer :: ind ! Index for the neighbour 107 ! orbital in the list 108 integer :: jo ! Neighbour orbital in the 109 ! supercell 110 real(dp) :: kxmu ! Dot product between the 111 ! b vector and the atomic 112 ! position. 113 ! See the term in the 114 ! bracket in the right hand 115 ! side of Eq. (5) of the 116 ! paper by 117 ! D. Sanchez-Portal et al. 118 ! Fundamental Physics for 119 ! Ferroelectrics 120 ! (AIP Conf. Proc. Vol 535) 121 ! ed R. Cohen (Melville, AIP) 122 ! pp 111-120 (2000). 123 real(dp) :: kxij ! Dot product between the 124 ! wave vector k and the 125 ! relative position of the 126 ! two orbitals (see the 127 ! exponential right after the 128 ! summatory in Eq. (5) of 129 ! the paper by 130 ! D. Sanchez-Portal et al. 131 ! Fundamental Physics for 132 ! Ferroelectrics 133 ! (AIP Conf. Proc. Vol 535) 134 ! ed R. Cohen (Melville, AIP) 135 ! pp 111-120 (2000). 136 137 complex(dp) :: eibr ! Exponential exp(i kxmu ) 138 complex(dp) :: eikr ! Exponential exp(i kxij ) 139 complex(dp) :: pipj ! Product of the coefficients 140 ! of the wave functions 141 complex(dp), dimension(:,:), pointer :: aux => null() 142 complex(dp), dimension(:,:), pointer :: aux2 => null() 143 144 integer :: imu_global ! Global index of the atom orbi 145#ifdef MPI 146 integer :: MPIerror 147 integer :: inode ! Counter for the loop on nodes 148 integer :: norb_max_loc ! Maximum number of atomic 149 ! orbitals that will be 150 ! stored on any node 151 integer :: norb_loc ! Number of atomic orbitals 152 ! on the local node 153 integer :: moccband_global ! Global index of the 154 ! occupied band 155 integer :: moccband_sequential ! Global index of the 156 ! occupied band in sequential 157 ! notation 158 integer :: noccband_global ! Global index of the 159 ! occupied band 160 integer :: noccband_sequential ! Global index of the 161 ! occupied band in sequential 162 ! notation 163 complex(dp), dimension(:,:), pointer :: auxtmp => null() ! Temporal arrays used to 164 complex(dp), dimension(:,:), pointer :: aux2loc => null() ! broadcast auxiliary 165 ! matrices 166#endif 167 168 169! Start time counter 170 call timer( 'overkkneig', 1 ) 171 172! Initialize Mkb 173 Mkb(:,:) = cmplx(0.0_dp,0.0_dp,kind=dp) 174 175!! For debugging 176! write(6,'(a,i5,3f12.5)') & 177! & 'overkkneig: Node, kvector =', Node, kvector 178! write(6,'(a,i5,3f12.5)') & 179! & 'overkkneig: Node, bvector =', Node, bvector 180! write(6,'(a,2i5)') & 181! & 'overkkneig: Node, maxnh =', Node, maxnh 182! if( Node .eq. 1 ) then 183!! if( IOnode ) then 184! do nband = 1, nuo 185! do imu = 1, nuotot 186! write(6,'(a,2i5,2f12.5)') & 187! & 'overkkneig: nband, imu, coeff = ', imu, nband, psiatk(imu,nband) 188! enddo 189! enddo 190! endif 191!! End debugging 192 193 194! Compute the auxiliary matrix aux, defined as: 195! \sum_{\vec{R}_{l}} 196! exp^{i \vec{k} \cdot \left(\vec{R}_{\mu}-\vec{R}_{\nu}-\vec{R}_{l} \right) } 197! \int d \vec{r} \phi_{\nu} \left(\vec{r}-\vec{R}_{\nu}-\vec{R}_{l} \right) 198! exp^{- i \vec{b} \left(\vec{r}-\vec{R}_{\mu} \right) } 199! \phi_{\mu} \left(\vec{r}-\vec{R}_{\mu} \right) 200! See, for instance, Eq. (5) of the paper by D. Sanchez-Portal et al. 201! Fundamental Physics for Ferroelectrics (AIP Conf. Proc. Vol 535) 202! ed R. Cohen (Melville, AIP) pp 111-120 203! In aux, the second index refer to the orbital in the unit cell 204! (\mu in the previous notation), 205! while the first index refer to the neighbour orbital (\nu) in the 206! previous notation. 207 208! Allocate local memory. 209! The second dimension of aux is equal to the number of orbitals 210! in the local node, nuo, since that is the maximum number of 211! orbitals mu (in the previous equation) that can be computed locally 212 call re_alloc( aux, 1, nuotot, 1, nuo, name='aux', routine='overkkneig') 213 214 aux(:,:) = cmplx(0.0_dp,0.0_dp,kind=dp) 215 216! Loop on all the atomic orbitals known by the local node 217 do imu = 1, nuo 218! Identify the global index of the orbital 219#ifdef MPI 220 call LocalToGlobalOrb( imu, Node, Nodes, imu_global) 221!! For debugging 222! write(6,'(a,3i5)')' overkkneig: Node, imu, imu_global = ', & 223! & Node, imu, imu_global 224!! End debugging 225#else 226 imu_global = imu 227#endif 228! For each atomic orbital, find the atom to which that orbital belongs... 229 ia = iaorb( imu_global ) 230! ... the position of the atom and the dot product between the vector 231! connecting neighbour k-points, b, and the position of the atom. 232! See the term in the bracket in the right hand side of Eq. (5) of the paper 233! D. Sanchez-Portal et al. 234! Fundamental Physics for Ferroelectrics (AIP Conf. Proc. Vol 535) 235! ed R. Cohen (Melville, AIP) pp 111-120, 2000 236 kxmu = bvector(1) * xa(1,ia) + & 237 & bvector(2) * xa(2,ia) + & 238 & bvector(3) * xa(3,ia) 239 eibr = cmplx( dcos(kxmu), dsin(kxmu), kind=dp ) 240 do jneig = 1, numh( imu ) 241 ind = listhptr(imu) + jneig 242 jo = listh(ind) 243 inu = indxuo(jo) 244! Compute the dot product between the wave vector k and the relative 245! position of the two orbitals (see the exponential right after the 246! summatory in Eq. (5) of the paper by D. Sanchez-Portal et al. 247! Fundamental Physics for Ferroelectrics (AIP Conf. Proc. Vol 535) 248! ed R. Cohen (Melville, AIP) pp 111-120 (2000). 249 kxij = kvector(1) * xijo(1,ind) + & 250 & kvector(2) * xijo(2,ind) + & 251 & kvector(3) * xijo(3,ind) 252! The origin for the relative position vector is taken on 253! the atom in the unit cell. 254! In the formula for overlap matrix at neighbor k-points, 255! See, for instance, Eq. (5) of the paper by D. Sanchez-Portal et al. 256! Fundamental Physics for Ferroelectrics (AIP Conf. Proc. Vol 535) 257! ed R. Cohen (Melville, AIP) pp 111-120 (2000). 258! just the opposite vector appears. We have to change the sign 259! of the phase factor. 260 kxij = -1.0_dp * kxij 261 eikr = cmplx( dcos(kxij), dsin(kxij), kind=dp ) 262! imu runs on the local orbitals on this node, 263! so it has to be the second index in aux 264! (remember that the second dimension in aux has been allocated with 265! nuo = no_l) 266 aux(inu,imu) = aux(inu,imu) + eikr * delkmat(ind) * eibr 267 enddo ! End loop on neighbour orbitals 268 enddo ! End loop on orbitals in the unit cell local to this node 269 270!! For debugging 271!! if( IOnode ) then 272! if( Node .eq. 1 ) then 273! do imu = 1, nuotot 274! do inu = 1, nuotot 275! write(6,'(a,3i5,2f12.5)') & 276! & 'overkkneig: Node, imu, inu, aux = ', & 277! & Node, imu, inu, aux(inu,imu) 278! enddo 279! enddo 280! endif 281!! End debugging 282 283! Compute \sum_{\mu} c_{\mu,m} \left( \vec{k} + \vec{b} \right) aux_{\nu,\mu} 284! and store it in an auxiliary matrix 285 286! Allocate local memory 287 call re_alloc( aux2, 1, nbandsocc, 1, nuotot, & 288 & name='aux2', routine='overkkneig' ) 289 290 aux2(:,:) = cmplx(0.0_dp,0.0_dp,kind=dp) 291 292#ifdef MPI 293 294! Compute the number of atomic orbitals stored on Node 0 295! We assume that this is the maximum number of orbitals that will be stored 296! on any node. 297 call GetNodeOrbs( nuotot, 0, Nodes, norb_max_loc) 298 299!! For debugging 300! write(6,'(a,4i5)')' overkkneig: Node, nuotot, Nodes, norb_max_loc = ', & 301! & Node, nuotot, Nodes, norb_max_loc 302!! End debugging 303 304! Allocate the temporal variable that will be used to broadcast 305! the auxiliary matrix aux (see comments for its definition above) 306! to all the other nodes 307! auxtmp follows the same structure as aux: 308! First index: all the atomic orbitals in the unit cell (nuotot) 309! Second index: maximum number of atomic orbitals stored in a local node 310 call re_alloc( auxtmp, 1, nuotot, 1, norb_max_loc, & 311 & name='auxtmp', routine='overkkneig' ) 312 auxtmp(:,:) = cmplx(0.0_dp,0.0_dp,kind=dp) 313 314! Loop on all the nodes: 315! This is required because a given Node, for instance Node = 0, 316! knows the coefficients of the wave function at a neighbour k-point 317! for all the atomic orbitals (mu = 1, nuotot), 318! while it only knows the matrix aux for some of the matrix orbitals (mu=1,nuo). 319! To compute the sum on mu we have to take to the Node = 0 the full 320! aux matrix from the rest of the nodes. 321 322 do inode = 0, Nodes-1 323 324! Compute the number of occupied bands stored on Node inode 325 call GetNodeOrbs( nuotot, inode, Nodes, norb_loc ) 326 327! Copy the auxiliary matrix for inode to a temporal variable 328 if( Node .eq. inode ) then 329 do imu = 1, norb_loc 330 do inu = 1, nuotot 331 auxtmp(inu,imu) = aux(inu,imu) 332 enddo 333 enddo 334 endif 335 336! Broadcast the auxiliary matrix from node inode to all the other nodes 337 call MPI_Bcast( auxtmp(1,1), nuotot*norb_loc, & 338 & MPI_double_complex, inode, MPI_Comm_World, MPIerror ) 339 340! Loop on the occupied bands stored on the local node (Node) 341 do mband = 1, nbandsocc_loc 342 343! Identify the global index of the occupied band 344 moccband_global = which_band_in_node(Node,mband) 345 moccband_sequential = sequential_index_included_bands(moccband_global) 346!! For debugging 347! write(6,'(a,4i5)') & 348! & ' overkkisig: Node, mband, moccband_global, sequential = ', & 349! & Node, mband, moccband_global, moccband_sequential 350!! End debugging 351 352! Loop on all the atomic orbitals of the unit cell 353! As parallelized now, a given node knows 354! the coefficients of psinei for all the atomic orbitals 355! of nuo = no_l bands 356 357 do inu = 1, nuotot 358! Perform the sum on mu, following the notation of Eq. (5) in the 359! paper by D. Sanchez-Portal et al. 360! Fundamental Physics for Ferroelectrics (AIP Conf. Proc. Vol 535) (2000) 361 do imu = 1, norb_loc 362! Identify the global index of the occupied band 363 call LocalToGlobalOrb( imu, inode, Nodes, imu_global ) 364!! For debugging 365! write(6,'(a,4i5)')' overkkneig: Node, inu, imu, imu_global = ', & 366! & Node, inu, imu, imu_global 367!! End debugging 368 aux2(moccband_sequential,inu) = aux2(moccband_sequential,inu) + & 369 & psinei(imu_global,mband) * auxtmp(inu,imu) 370 enddo ! End loop on the sumatory on mu 371 enddo ! End loop on atomic orbitals in the unit cell (nuotot) 372 enddo ! End loop on local occupied bands (mband) 373 enddo ! End loop on nodes (inode) 374 375! Allocate workspace array for global reduction 376 call re_alloc( aux2loc, 1, nbandsocc, 1, nuotot, & 377 & name='aux2loc', routine='overkkneig' ) 378! Global reduction of aux2loc matrix 379 aux2loc(:,:) = cmplx(0.0_dp,0.0_dp,kind=dp) 380 call MPI_AllReduce( aux2(1,1), aux2loc(1,1), nbandsocc*nuotot, & 381 & MPI_double_complex,MPI_sum,MPI_Comm_World,MPIerror ) 382 aux2(:,:) = aux2loc(:,:) 383#else 384 do mband = 1, nbandsocc 385 do inu = 1, nuotot 386 do imu = 1, nuotot 387 aux2(mband,inu) = aux2(mband,inu) + & 388 & psinei(imu,mband) * aux(inu,imu) 389 enddo 390 enddo 391 enddo 392#endif 393 394!! For debugging 395! if( IOnode ) then 396!! if( Node .eq. 1 ) then 397! do nband = 1, nuo 398! do inu = 1, nuotot 399! write(6,'(a,3i5,2f12.5)') & 400! & 'overkkneig: Node, inu, nband, psinei = ', & 401! & Node, inu, nband, psinei(inu,nband) 402! enddo 403! enddo 404! 405! do inu = 1, nuotot 406! do nband = 1, nbandsocc 407! write(6,'(a,3i5,2f12.5)') & 408! & 'overkkneig: Node, inu, nband, aux2 = ', & 409! & Node, inu, nband, aux2(nband,inu) 410! enddo 411! enddo 412! endif 413!! End debugging 414 415 do nband = 1, nbandsocc_loc 416 do mband = 1, nbandsocc 417 do inu = 1, nuotot 418#ifdef MPI 419! Identify the global index of the occupied band 420 noccband_global = which_band_in_node(Node,nband) 421 noccband_sequential = sequential_index_included_bands(noccband_global) 422 Mkb(noccband_sequential,mband) = Mkb(noccband_sequential,mband) + & 423 conjg(psiatk(inu,nband)) * aux2(mband,inu) 424#else 425 Mkb(nband,mband) = Mkb(nband,mband) + & 426 conjg(psiatk(inu,nband)) * aux2(mband,inu) 427#endif 428 enddo 429 enddo 430 enddo 431 432#ifdef MPI 433! Allocate workspace array for global reduction 434 call re_alloc( aux2loc, 1, nbandsocc, 1, nbandsocc, & 435 & name='aux2loc', routine='overkkneig' ) 436! Global reduction of aux2loc matrix 437 aux2loc(:,:) = cmplx(0.0_dp,0.0_dp,kind=dp) 438 call MPI_AllReduce( Mkb(1,1), aux2loc(1,1), nbandsocc*nbandsocc, & 439 & MPI_double_complex,MPI_sum,MPI_Comm_World,MPIerror ) 440 Mkb(:,:) = aux2loc(:,:) 441#endif 442 443!! For debugging 444! if( IOnode ) then 445!! if( Node .eq. 1 ) then 446! do inu = 1, nuotot 447! do nband = 1, nuo 448! write(6,'(a,3i5,2f12.5)') & 449! & 'overkkneig: Node, inu, nband, psiatk = ', & 450! & Node, inu, nband, psiatk(inu,nband) 451! enddo 452! enddo 453! 454! do nband = 1, nbandsocc 455! do mband = 1, nbandsocc 456! write(6,'(a,3i5,2f12.5)') & 457! & 'overkkneig: Node, nband, mband, Mkb = ', & 458! & Node, nband, mband, Mkb(nband,mband) 459! enddo 460! enddo 461! endif 462!! End debugging 463 464 call de_alloc( aux, name='aux', routine="overkkneig" ) 465 call de_alloc( aux2, name='aux2', routine="overkkneig" ) 466#ifdef MPI 467 call de_alloc( auxtmp, name='auxtmp', routine="overkkneig" ) 468 call de_alloc( aux2loc, name='aux2loc', routine="overkkneig" ) 469#endif 470 471! End time counter 472 call timer( 'overkkneig', 2 ) 473 474 end subroutine overkkneig 475 476