1! ---
2! Copyright (C) 1996-2016	The SIESTA group
3!  This file is distributed under the terms of the
4!  GNU General Public License: see COPYING in the top directory
5!  or http://www.gnu.org/copyleft/gpl.txt .
6! See Docs/Contributors.txt for a list of contributors.
7! ---
8  subroutine overkkneig( kvector, bvector, nuo, nuotot, psiatk, psinei,   &
9 &                       maxnh, delkmat, nbandsocc_loc, nbandsocc, Mkb )
10
11
12  use precision,            only: dp             ! Real double precision type
13  use parallel,             only: Nodes          ! Total number of Nodes
14  use parallel,             only: Node           ! Local Node
15  use parallel,             only: IONode         ! Input/output node
16  use atomlist,             only: iaorb          ! Pointer to atom to which
17                                                 !   orbital belongs
18  use atomlist,             only: indxuo         ! Index of equivalent orbital
19                                                 !   in unit cell
20  use sparse_matrices,      only: numh           ! Number of nonzero element of
21                                                 !   each row of the
22                                                 !   hamiltonian matrix
23  use sparse_matrices,      only: listh          ! Nonzero hamiltonian-matrix
24                                                 !   elements
25  use sparse_matrices,      only: listhptr       ! Pointer to start of each row
26                                                 !   of the hamiltonian matrix
27  use sparse_matrices,      only: xijo           ! Vectors between orbital
28                                                 !   centers (sparse)
29  use siesta_geom,          only: xa             ! Atomic positions
30  use alloc,                only: re_alloc       ! Allocatation routines
31  use alloc,                only: de_alloc       ! Deallocatation routines
32
33#ifdef MPI
34  use parallelsubs,         only: LocalToGlobalOrb ! Converts an orbital index
35                                                 !   in the local frame
36                                                 !   to the global frame
37  use parallelsubs,         only: GetNodeOrbs    ! Calculates the number of
38                                                 !   orbitals stored on the
39                                                 !   local Node.
40  use m_orderbands,       only: which_band_in_node  ! Given a node and a
41                                                    !   local index,
42                                                    !   this array gives the
43                                                    !   global index of the band
44                                                    !   stored there
45  use m_orderbands,       only: sequential_index_included_bands
46                                                    ! Sequential number of the
47                                                    !   bands included for
48                                                    !   wannierization
49                                                    !   (the bands are listed
50                                                    !   in order of incremental
51                                                    !   energy)
52
53  use mpi_siesta
54#endif
55
56  implicit none
57
58  real(dp),    intent(in)  :: kvector(3)         ! Wave vector for which the
59                                                 !   Overlap matrix between
60                                                 !   the periodic part of the
61                                                 !   wave functions will be
62                                                 !   computed
63  real(dp),    intent(in)  :: bvector(3)         ! Vector that connects a
64                                                 !   given k-point with its
65                                                 !   neighbour (in Bohr^-1)
66  integer,     intent(in)  :: nuo                ! Number of orbitals in local
67                                                 !   node
68                                                 ! NOTE: Running in parallel
69                                                 !   this is core dependent
70                                                 !   Sum_{cores} no_l = no_u
71  integer,     intent(in)  :: nuotot             ! Number of orbitals in the
72                                                 !   unit cell.
73  complex(dp), intent(in)  :: psiatk(nuotot,nuo) ! Coefficients of the
74                                                 !    eigenvector at the k-point
75                                                 !    First  index: orbital
76                                                 !   Second index: band
77  complex(dp), intent(in)  :: psinei(nuotot,nuo) ! Coefficients of the
78                                                 !   eigenvector at the
79                                                 !   neighbour k-point
80                                                 !   First  index: orbital
81                                                 !   Second index: band
82! Note that, when running in parallel, the information known by each local node
83! for psiatk, and psinei is: each local node knows all the coefficients
84! for only a few nuo bands.
85  integer,     intent(in)  :: maxnh              ! Maximum number of orbitals
86                                                 !   interacting
87                                                 !   NOTE: In parallel runs,
88                                                 !   maxnh changes from node to
89                                                 !   node
90  complex(dp), intent(in)  :: delkmat(maxnh)     ! Matrix elements of a plane
91                                                 !   wave
92  integer,     intent(in)  :: nbandsocc_loc      ! Number of ocuppied bands
93                                                 !   in the local node
94  integer,     intent(in)  :: nbandsocc          ! Number of occupied bands
95  complex(dp), intent(out) :: Mkb(nbandsocc,nbandsocc)
96                                                 ! Overlap matrix between the
97                                                 !   the periodic part of the
98
99! Internal variables
100  integer     :: imu                             ! Counter for the loop on orbit
101  integer     :: inu                             ! Counter for the loop on orbit
102  integer     :: nband                           ! Counter for the loop on bands
103  integer     :: mband                           ! Counter for the loop on bands
104  integer     :: jneig                           ! Counter for the loop on neigb
105  integer     :: ia                              ! Atomic index
106  integer     :: ind                             ! Index for the neighbour
107                                                 !   orbital in the list
108  integer     :: jo                              ! Neighbour orbital in the
109                                                 !   supercell
110  real(dp)    :: kxmu                            ! Dot product between the
111                                                 !   b vector and the atomic
112                                                 !   position.
113                                                 !   See the term in the
114                                                 !   bracket in the right hand
115                                                 !   side of Eq. (5) of the
116                                                 !   paper by
117                                                 !   D. Sanchez-Portal et al.
118                                                 !   Fundamental Physics for
119                                                 !   Ferroelectrics
120                                                 !   (AIP Conf. Proc. Vol 535)
121                                                 !   ed R. Cohen (Melville, AIP)
122                                                 !   pp 111-120 (2000).
123  real(dp)    :: kxij                            ! Dot product between the
124                                                 !   wave vector k and the
125                                                 !   relative position of the
126                                                 !   two orbitals (see the
127                                                 !   exponential right after the
128                                                 !   summatory in Eq. (5) of
129                                                 !   the paper by
130                                                 !   D. Sanchez-Portal et al.
131                                                 !   Fundamental Physics for
132                                                 !   Ferroelectrics
133                                                 !   (AIP Conf. Proc. Vol 535)
134                                                 !   ed R. Cohen (Melville, AIP)
135                                                 !   pp 111-120 (2000).
136
137  complex(dp) :: eibr                            ! Exponential exp(i kxmu )
138  complex(dp) :: eikr                            ! Exponential exp(i kxij )
139  complex(dp) :: pipj                            ! Product of the coefficients
140                                                 !   of the wave functions
141  complex(dp), dimension(:,:), pointer :: aux => null()
142  complex(dp), dimension(:,:), pointer :: aux2 => null()
143
144  integer     :: imu_global                      ! Global index of the atom orbi
145#ifdef MPI
146  integer     :: MPIerror
147  integer     :: inode                           ! Counter for the loop on nodes
148  integer     :: norb_max_loc                    ! Maximum number of atomic
149                                                 !   orbitals that will be
150                                                 !   stored on any node
151  integer     :: norb_loc                        ! Number of atomic orbitals
152                                                 !   on the local node
153  integer     :: moccband_global                 ! Global index of the
154                                                 !   occupied band
155  integer     :: moccband_sequential             ! Global index of the
156                                                 !   occupied band in sequential
157                                                 !   notation
158  integer     :: noccband_global                 ! Global index of the
159                                                 !   occupied band
160  integer     :: noccband_sequential             ! Global index of the
161                                                 !   occupied band in sequential
162                                                 !   notation
163  complex(dp), dimension(:,:), pointer :: auxtmp => null() ! Temporal arrays used to
164  complex(dp), dimension(:,:), pointer :: aux2loc => null() !   broadcast auxiliary
165                                                 !   matrices
166#endif
167
168
169! Start time counter
170  call timer( 'overkkneig', 1 )
171
172! Initialize Mkb
173  Mkb(:,:) = cmplx(0.0_dp,0.0_dp,kind=dp)
174
175!!     For debugging
176!  write(6,'(a,i5,3f12.5)')    &
177! &  'overkkneig: Node, kvector =', Node, kvector
178!  write(6,'(a,i5,3f12.5)')    &
179! &  'overkkneig: Node, bvector =', Node, bvector
180!  write(6,'(a,2i5)')    &
181! &  'overkkneig: Node, maxnh =', Node, maxnh
182!      if( Node .eq. 1 ) then
183!!      if( IOnode ) then
184!        do nband = 1, nuo
185!          do imu = 1, nuotot
186!            write(6,'(a,2i5,2f12.5)')         &
187! &            'overkkneig: nband, imu, coeff = ', imu, nband, psiatk(imu,nband)
188!          enddo
189!        enddo
190!      endif
191!!     End debugging
192
193
194! Compute the auxiliary matrix aux, defined as:
195! \sum_{\vec{R}_{l}}
196! exp^{i \vec{k} \cdot \left(\vec{R}_{\mu}-\vec{R}_{\nu}-\vec{R}_{l} \right) }
197! \int d \vec{r} \phi_{\nu} \left(\vec{r}-\vec{R}_{\nu}-\vec{R}_{l} \right)
198! exp^{- i \vec{b} \left(\vec{r}-\vec{R}_{\mu} \right) }
199! \phi_{\mu} \left(\vec{r}-\vec{R}_{\mu} \right)
200! See, for instance, Eq. (5) of the paper by D. Sanchez-Portal et al.
201! Fundamental Physics for Ferroelectrics (AIP Conf. Proc. Vol 535)
202! ed R. Cohen (Melville, AIP) pp 111-120
203! In aux, the second index refer to the orbital in the unit cell
204! (\mu in the previous notation),
205! while the first index refer to the neighbour orbital (\nu) in the
206! previous notation.
207
208! Allocate local memory.
209! The second dimension of aux is equal to the number of orbitals
210! in the local node, nuo, since that is the maximum number of
211! orbitals mu (in the previous equation) that can be computed locally
212  call re_alloc( aux, 1, nuotot, 1, nuo, name='aux', routine='overkkneig')
213
214  aux(:,:)  = cmplx(0.0_dp,0.0_dp,kind=dp)
215
216! Loop on all the atomic orbitals known by the local node
217  do imu = 1, nuo
218!   Identify the global index of the orbital
219#ifdef MPI
220    call LocalToGlobalOrb( imu, Node, Nodes, imu_global)
221!!   For debugging
222!    write(6,'(a,3i5)')' overkkneig: Node, imu, imu_global = ',  &
223! &                                  Node, imu, imu_global
224!!   End debugging
225#else
226    imu_global = imu
227#endif
228!   For each atomic orbital, find the atom to which that orbital belongs...
229    ia = iaorb( imu_global )
230!   ... the position of the atom and the dot product between the vector
231!   connecting neighbour k-points, b, and the position of the atom.
232!   See the term in the bracket in the right hand side of Eq. (5) of the paper
233!   D. Sanchez-Portal et al.
234!   Fundamental Physics for Ferroelectrics (AIP Conf. Proc. Vol 535)
235!   ed R. Cohen (Melville, AIP) pp 111-120, 2000
236    kxmu = bvector(1) * xa(1,ia) +      &
237 &         bvector(2) * xa(2,ia) +      &
238 &         bvector(3) * xa(3,ia)
239    eibr = cmplx( dcos(kxmu), dsin(kxmu), kind=dp )
240    do jneig = 1, numh( imu )
241      ind = listhptr(imu) + jneig
242      jo  = listh(ind)
243      inu = indxuo(jo)
244!     Compute the dot product between the wave vector k and the relative
245!     position of the two orbitals (see the exponential right after the
246!     summatory in Eq. (5) of the paper by D. Sanchez-Portal et al.
247!     Fundamental Physics for Ferroelectrics (AIP Conf. Proc. Vol 535)
248!     ed R. Cohen (Melville, AIP) pp 111-120 (2000).
249      kxij = kvector(1) * xijo(1,ind) + &
250 &           kvector(2) * xijo(2,ind) + &
251 &           kvector(3) * xijo(3,ind)
252!     The origin for the relative position vector is taken on
253!     the atom in the unit cell.
254!     In the formula for overlap matrix at neighbor k-points,
255!     See, for instance, Eq. (5) of the paper by D. Sanchez-Portal et al.
256!     Fundamental Physics for Ferroelectrics (AIP Conf. Proc. Vol 535)
257!     ed R. Cohen (Melville, AIP) pp 111-120 (2000).
258!     just the opposite vector appears. We have to change the sign
259!     of the phase factor.
260      kxij = -1.0_dp * kxij
261      eikr = cmplx( dcos(kxij), dsin(kxij), kind=dp )
262!     imu runs on the local orbitals on this node,
263!     so it has to be the second index in aux
264!     (remember that the second dimension in aux has been allocated with
265!     nuo = no_l)
266      aux(inu,imu) = aux(inu,imu) + eikr * delkmat(ind) * eibr
267    enddo   ! End loop on neighbour orbitals
268  enddo     ! End loop on orbitals in the unit cell local to this node
269
270!! For debugging
271!!  if( IOnode ) then
272!  if( Node .eq. 1 ) then
273!    do imu = 1, nuotot
274!      do inu = 1, nuotot
275!        write(6,'(a,3i5,2f12.5)')                   &
276! &        'overkkneig: Node, imu, inu, aux = ',     &
277! &                     Node, imu, inu, aux(inu,imu)
278!      enddo
279!    enddo
280!  endif
281!! End debugging
282
283! Compute \sum_{\mu} c_{\mu,m} \left( \vec{k} + \vec{b} \right) aux_{\nu,\mu}
284! and store it in an auxiliary matrix
285
286! Allocate local memory
287  call re_alloc( aux2, 1, nbandsocc, 1, nuotot,      &
288 &               name='aux2', routine='overkkneig' )
289
290  aux2(:,:)  = cmplx(0.0_dp,0.0_dp,kind=dp)
291
292#ifdef MPI
293
294! Compute the number of atomic orbitals stored on Node 0
295! We assume that this is the maximum number of orbitals that will be stored
296! on any node.
297  call GetNodeOrbs( nuotot, 0, Nodes, norb_max_loc)
298
299!! For debugging
300!  write(6,'(a,4i5)')' overkkneig: Node, nuotot, Nodes, norb_max_loc = ', &
301! &                                Node, nuotot, Nodes, norb_max_loc
302!! End debugging
303
304! Allocate the temporal variable that will be used to broadcast
305! the auxiliary matrix aux (see comments for its definition above)
306! to all the other nodes
307! auxtmp follows the same structure as aux:
308! First index: all the atomic orbitals in the unit cell (nuotot)
309! Second index: maximum number of atomic orbitals stored in a local node
310  call re_alloc( auxtmp, 1, nuotot, 1, norb_max_loc, &
311 &               name='auxtmp', routine='overkkneig' )
312  auxtmp(:,:)  = cmplx(0.0_dp,0.0_dp,kind=dp)
313
314! Loop on all the nodes:
315! This is required because a given Node, for instance Node = 0,
316! knows the coefficients of the wave function at a neighbour k-point
317! for all the atomic orbitals (mu = 1, nuotot),
318! while it only knows the matrix aux for some of the matrix orbitals (mu=1,nuo).
319! To compute the sum on mu we have to take to the Node = 0 the full
320! aux matrix from the rest of the nodes.
321
322  do inode = 0, Nodes-1
323
324!   Compute the number of occupied bands stored on Node inode
325    call GetNodeOrbs( nuotot, inode, Nodes, norb_loc )
326
327!   Copy the auxiliary matrix for inode to a temporal variable
328    if( Node .eq. inode ) then
329      do imu = 1, norb_loc
330        do inu = 1, nuotot
331          auxtmp(inu,imu) = aux(inu,imu)
332        enddo
333      enddo
334    endif
335
336!   Broadcast the auxiliary matrix from node inode to all the other nodes
337    call MPI_Bcast( auxtmp(1,1), nuotot*norb_loc, &
338 &                  MPI_double_complex, inode, MPI_Comm_World, MPIerror )
339
340!   Loop on the occupied bands stored on the local node (Node)
341    do mband = 1, nbandsocc_loc
342
343!     Identify the global index of the occupied band
344       moccband_global     = which_band_in_node(Node,mband)
345       moccband_sequential = sequential_index_included_bands(moccband_global)
346!!      For debugging
347!        write(6,'(a,4i5)')                                            &
348! &        ' overkkisig: Node, mband, moccband_global, sequential = ', &
349! &                      Node, mband, moccband_global, moccband_sequential
350!!      End debugging
351
352!     Loop on all the atomic orbitals of the unit cell
353!     As parallelized now, a given node knows
354!     the coefficients of psinei for all the atomic orbitals
355!     of nuo = no_l bands
356
357      do inu = 1, nuotot
358!       Perform the sum on mu, following the notation of Eq. (5) in the
359!       paper by D. Sanchez-Portal et al.
360!       Fundamental Physics for Ferroelectrics (AIP Conf. Proc. Vol 535) (2000)
361        do imu = 1, norb_loc
362!         Identify the global index of the occupied band
363          call LocalToGlobalOrb( imu, inode, Nodes, imu_global )
364!!         For debugging
365!          write(6,'(a,4i5)')' overkkneig: Node, inu, imu, imu_global = ',  &
366! &                                        Node, inu, imu, imu_global
367!!         End debugging
368          aux2(moccband_sequential,inu) = aux2(moccband_sequential,inu) +  &
369 &            psinei(imu_global,mband) * auxtmp(inu,imu)
370        enddo   ! End loop on the sumatory on mu
371      enddo     ! End loop on atomic orbitals in the unit cell (nuotot)
372    enddo       ! End loop on local occupied bands (mband)
373  enddo         ! End loop on nodes (inode)
374
375! Allocate workspace array for global reduction
376  call re_alloc( aux2loc, 1, nbandsocc, 1, nuotot,      &
377 &               name='aux2loc', routine='overkkneig' )
378! Global reduction of aux2loc matrix
379  aux2loc(:,:) = cmplx(0.0_dp,0.0_dp,kind=dp)
380  call MPI_AllReduce( aux2(1,1), aux2loc(1,1), nbandsocc*nuotot, &
381 &                    MPI_double_complex,MPI_sum,MPI_Comm_World,MPIerror )
382  aux2(:,:) = aux2loc(:,:)
383#else
384  do mband = 1, nbandsocc
385    do inu = 1, nuotot
386      do imu = 1, nuotot
387        aux2(mband,inu) = aux2(mband,inu) +    &
388 &                        psinei(imu,mband) * aux(inu,imu)
389      enddo
390    enddo
391  enddo
392#endif
393
394!! For debugging
395!  if( IOnode ) then
396!!  if( Node .eq. 1 ) then
397!    do nband = 1, nuo
398!      do inu = 1, nuotot
399!        write(6,'(a,3i5,2f12.5)')                   &
400! &        'overkkneig: Node, inu, nband, psinei = ',     &
401! &                     Node, inu, nband, psinei(inu,nband)
402!      enddo
403!    enddo
404!
405!    do inu = 1, nuotot
406!      do nband = 1, nbandsocc
407!        write(6,'(a,3i5,2f12.5)')                   &
408! &        'overkkneig: Node, inu, nband, aux2 = ',     &
409! &                     Node, inu, nband, aux2(nband,inu)
410!      enddo
411!    enddo
412!  endif
413!! End debugging
414
415  do nband = 1, nbandsocc_loc
416    do mband = 1, nbandsocc
417      do inu = 1, nuotot
418#ifdef MPI
419!       Identify the global index of the occupied band
420        noccband_global     = which_band_in_node(Node,nband)
421        noccband_sequential = sequential_index_included_bands(noccband_global)
422        Mkb(noccband_sequential,mband) = Mkb(noccband_sequential,mband) +    &
423          conjg(psiatk(inu,nband)) * aux2(mband,inu)
424#else
425        Mkb(nband,mband) = Mkb(nband,mband) +    &
426          conjg(psiatk(inu,nband)) * aux2(mband,inu)
427#endif
428      enddo
429    enddo
430  enddo
431
432#ifdef MPI
433! Allocate workspace array for global reduction
434  call re_alloc( aux2loc, 1, nbandsocc, 1, nbandsocc,      &
435 &               name='aux2loc', routine='overkkneig' )
436! Global reduction of aux2loc matrix
437  aux2loc(:,:) = cmplx(0.0_dp,0.0_dp,kind=dp)
438  call MPI_AllReduce( Mkb(1,1), aux2loc(1,1), nbandsocc*nbandsocc, &
439 &                    MPI_double_complex,MPI_sum,MPI_Comm_World,MPIerror )
440  Mkb(:,:) = aux2loc(:,:)
441#endif
442
443!! For debugging
444!  if( IOnode ) then
445!!  if( Node .eq. 1 ) then
446!    do inu = 1, nuotot
447!      do nband = 1, nuo
448!        write(6,'(a,3i5,2f12.5)')                   &
449! &        'overkkneig: Node, inu, nband, psiatk = ',     &
450! &                     Node, inu, nband, psiatk(inu,nband)
451!      enddo
452!    enddo
453!
454!    do nband = 1, nbandsocc
455!      do mband = 1, nbandsocc
456!        write(6,'(a,3i5,2f12.5)')                       &
457! &        'overkkneig: Node, nband, mband, Mkb = ',     &
458! &                     Node, nband, mband, Mkb(nband,mband)
459!      enddo
460!    enddo
461!  endif
462!! End debugging
463
464  call de_alloc( aux,    name='aux', routine="overkkneig"    )
465  call de_alloc( aux2,   name='aux2', routine="overkkneig"   )
466#ifdef MPI
467  call de_alloc( auxtmp, name='auxtmp', routine="overkkneig"   )
468  call de_alloc( aux2loc, name='aux2loc', routine="overkkneig"   )
469#endif
470
471! End time counter
472  call timer( 'overkkneig', 2 )
473
474  end subroutine overkkneig
475
476