1!
2! Copyright (C) 1996-2016	The SIESTA group
3!  This file is distributed under the terms of the
4!  GNU General Public License: see COPYING in the top directory
5!  or http://www.gnu.org/copyleft/gpl.txt.
6! See Docs/Contributors.txt for a list of contributors.
7!
8module m_vmat
9
10  implicit none
11
12  private
13  public :: vmat
14
15contains
16
17  subroutine vmat( no, np, dvol, spin, V, nvmax, &
18       numVs, listVsptr, listVs, Vs, &
19       nuo, nuotot, iaorb, iphorb, isa )
20
21! ********************************************************************
22! Finds the matrix elements of the potential.
23! First version written by P.Ordejon.
24! Name and interface modified by J.M.Soler. May'95.
25! Re-ordered so that mesh is the outer loop and the orbitals are
26! handled as lower-half triangular. J.D.Gale and J.M.Soler, Feb'99
27! Version of vmat that use a direct algorithm to save memory.
28! Modified by J.D.Gale, November'99
29! *********************** INPUT **************************************
30! integer no              : Number of basis orbitals
31! integer np              : Number of columns in C (local)
32! real*8  dvol            : Volume per mesh point
33! type(tSpin) spin        : Spin configuration
34! real*4  V(np,spin%Grid) : Value of the potential at the mesh points
35! integer nvmax           : First dimension of listV and Vs, and maxim.
36!                           number of nonzero elements in any row of Vs
37! integer numVs(nuo)      : Number of non-zero elements in a row of Vs
38! integer listVsptr(nuo)  : Pointer to the start of rows in listVs
39! integer listVs(nvmax)   : List of non-zero elements of Vs
40! integer iaorb(*)        : Pointer to atom to which orbital belongs
41! integer iphorb(*)       : Orbital index within each atom
42! integer isa(*)          : Species index of all atoms
43! ******************** INPUT AND OUTPUT *******************************
44! real*8  Vs(nvmax,spin%H): Value of nonzero elements in each row
45!                           of Vs to which the potential matrix
46!                           elements are summed up
47! *********************************************************************
48
49!  Modules
50    use precision,     only: dp, grid_p
51    use atmfuncs,      only: rcut, all_phi
52    use atm_types,     only: nsmax=>nspecies
53    use atomlist,      only: indxuo
54    use t_spin,        only: tSpin
55    use listsc_module, only: LISTSC
56    use mesh,          only: dxa, nsp, xdop, xdsp, meshLim
57    use meshdscf,      only: matrixMtoO
58    use meshdscf,      only: needdscfl, listdl, numdl, nrowsdscfl, listdlptr
59    use meshphi,       only: directphi, endpht, lstpht, listp2, phi
60    use parallel,      only: Nodes, Node
61    use alloc,         only: re_alloc, de_alloc
62    use parallelsubs,  only: GlobalToLocalOrb
63#ifdef MPI
64    use mpi_siesta
65#endif
66#ifdef _OPENMP
67    use omp_lib
68#endif
69
70! Argument types and dimensions
71    integer                  :: no, np, nvmax, nuo, nuotot, iaorb(*)
72    type(tSpin), intent(in)  :: spin
73    integer                  :: iphorb(*), isa(*), numVs(nuo)
74    integer                  :: listVsptr(nuo), listVs(nvmax)
75    real(grid_p), intent(in) :: V(nsp,np,spin%Grid)
76    real(dp)                 :: dvol
77    real(dp),         target :: Vs(nvmax,spin%H)
78! Internal variables and arrays
79    integer, parameter :: minloc = 1000 ! Min buffer size
80    integer, parameter :: maxoa  = 100  ! Max # of orb/atom
81    integer :: i, ia, ic, ii, ijl, il, imp, ind, iop
82    integer :: ip, iphi, io, is, isp, ispin, iu, iul
83    integer :: j, jc, jl, last, lasta, lastop
84    integer :: maxloc, maxloc2, nc, nlocal, nphiloc
85    integer :: nvmaxl, triang, lenx, leny, lenz,lenxy
86
87    ! Size of Hamiltonian
88    logical :: ParallelLocal
89    real(dp) :: Vij, r2sp, dxsp(3), VClocal(nsp)
90
91    integer, pointer :: ilc(:), ilocal(:), iorb(:)
92    real(dp), pointer :: DscfL(:,:), t_DscfL(:,:,:)
93    real(dp), pointer :: Vss(:,:), t_Vss(:,:,:), Clocal(:,:)
94    real(dp), pointer :: Vlocal(:,:), phia(:,:), r2cut(:)
95    integer :: NTH, TID
96#ifdef _TRACE_
97    integer :: MPIerror
98#endif
99
100#ifdef DEBUG
101    call write_debug( '    PRE vmat' )
102#endif
103
104#ifdef _TRACE_
105    call MPI_Barrier( MPI_Comm_World, MPIerror )
106    call MPItrace_event( 1000, 4 )
107#endif
108
109!   Start time counter
110    call timer('vmat',1)
111
112!   Find atomic cutoff radii
113    nullify(r2cut)
114    call re_alloc( r2cut, 1, nsmax, 'r2cut', 'vmat' )
115    r2cut = 0.0_dp
116    do i = 1,nuotot
117       ia = iaorb(i)
118       is = isa(ia)
119       io = iphorb(i)
120       r2cut(is) = max( r2cut(is), rcut(is,io)**2 )
121    end do
122
123!   Set algorithm logical
124    ParallelLocal = (Nodes > 1)
125    lenx  = meshLim(2,1) - meshLim(1,1) + 1
126    leny  = meshLim(2,2) - meshLim(1,2) + 1
127    lenz  = meshLim(2,3) - meshLim(1,3) + 1
128    lenxy = lenx*leny
129
130!   Find value of maxloc
131    maxloc2 = maxval(endpht(1:np)-endpht(0:np-1))
132    maxloc  = maxloc2 + minloc
133    maxloc  = min( maxloc, no )
134    triang  = (maxloc+1)*(maxloc+2)/2
135    if ( ParallelLocal ) then
136       if ( nrowsDscfL > 0 ) then
137          nvmaxl = listdlptr(nrowsDscfL) + numdl(nrowsDscfL)
138       else
139          nvmaxl = 1
140       end if
141    end if
142
143!   Allocate local memory
144!$OMP parallel default(shared), &
145!$OMP&shared(NTH,t_DscfL,t_Vss,spin), &
146!$OMP&private(TID,last), &
147!$OMP&private(ip,nc,nlocal,ic,imp,i,il,iu,iul,ii,ind,j,ijl,ispin), &
148!$OMP&private(lasta,lastop,ia,is,iop,isp,dxsp,r2sp,nphiloc,iphi,jc,jl), &
149!$OMP&private(Vij,VClocal,DscfL,Vss,ilocal,ilc,iorb,Vlocal,Clocal,phia)
150
151!$OMP single
152#ifdef _OPENMP
153    NTH = omp_get_num_threads( )
154#else
155    NTH = 1
156#endif
157!$OMP end single ! implicit barrier, IMPORTANT
158
159#ifdef _OPENMP
160    TID = omp_get_thread_num( ) + 1
161#else
162    TID = 1
163#endif
164
165    nullify(Clocal,phia,ilocal,ilc,iorb,Vlocal)
166!$OMP critical
167    ! Perhaps the critical section is not needed,
168    ! however it "tells" the OS to allocate per
169    ! thread, possibly waiting for each thread to
170    ! place the memory in the best position.
171    allocate( Clocal(nsp,maxloc2) )
172    allocate( ilocal(no) , ilc(maxloc2) , iorb(maxloc) )
173    allocate( Vlocal(triang,spin%Grid) )
174    if ( DirectPhi ) allocate( phia(maxoa,nsp) )
175!$OMP end critical
176
177!$OMP single
178    if ( ParallelLocal ) then
179       nullify( t_DscfL )
180       call re_alloc( t_DscfL, 1, nvmaxl, 1, spin%H, 1, NTH, &
181            'DscfL',  'vmat' )
182    else
183       if ( NTH > 1 ) then
184          nullify( t_Vss )
185          call re_alloc( t_Vss, 1, nvmax, 1, spin%H, 2, NTH, &
186               'Vss',  'vmat' )
187       end if
188    end if
189!$OMP end single ! implicit barrier
190
191    if ( ParallelLocal ) then
192       DscfL => t_DscfL(1:nvmaxl,:,TID)
193       DscfL(1:nvmaxl,:) = 0._dp
194    else
195       if ( NTH > 1 ) then
196          if ( TID == 1 ) then
197             Vss => Vs
198          else
199             Vss => t_Vss(1:nvmax,:,TID)
200             Vss(1:nvmax,:) = 0._dp
201          end if
202       else
203          Vss => Vs
204       end if
205    end if
206
207!   Full initializations done only once
208    ilocal(1:no)             = 0
209    iorb(1:maxloc)           = 0
210    Vlocal(1:triang,:) = 0._dp
211    last = 0
212
213!   Loop over grid points
214!$OMP do
215    do ip = 1,np
216!      Find number of nonzero orbitals at this point
217       nc = endpht(ip) - endpht(ip-1)
218!      Find new required size of Vlocal
219       nlocal = last
220       do ic = 1,nc
221          imp = endpht(ip-1) + ic
222          i = lstpht(imp)
223          if (ilocal(i) == 0) nlocal = nlocal + 1
224       end do
225
226!      If overflooded, add Vlocal to Vs and reinitialize it
227       if (nlocal > maxloc .and. last > 0 ) then
228          if ( ParallelLocal ) then
229             do il = 1,last
230                i   = iorb(il)
231                iu  = indxuo(i)
232                iul = NeedDscfL(iu)
233                if ( i == iu ) then
234                   do ii = 1, numdl(iul)
235                      ind = listdlptr(iul) + ii
236                      j   = listdl(ind)
237                      ijl = idx_ijl(il,ilocal(j))
238                      do ispin = 1, spin%Grid
239                         DscfL(ind,ispin) = DscfL(ind,ispin) + Vlocal(ijl,ispin) * dVol
240                      end do
241                      if ( spin%SO ) then
242                         DscfL(ind,7:8) = DscfL(ind,7:8) + &
243                              Vlocal(ijl,3:4) * dVol
244                      end if
245                   end do
246                else
247                   do ii = 1, numdl(iul)
248                      ind = listdlptr(iul) + ii
249                      j   = LISTSC( i, iu, listdl(ind) )
250                      ijl = idx_ijl(il,ilocal(j))
251                      do ispin = 1, spin%Grid
252                         DscfL(ind,ispin) = DscfL(ind,ispin) + Vlocal(ijl,ispin) * dVol
253                      end do
254                      if ( spin%SO ) then
255                         DscfL(ind,7:8) = DscfL(ind,7:8) + &
256                              Vlocal(ijl,3:4) * dVol
257                      end if
258                   end do
259                end if
260             end do
261          else
262             do il = 1,last
263                i  = iorb(il)
264                iu = indxuo(i)
265                call GlobalToLocalOrb( iu, Node, Nodes, iul )
266                if ( i == iu ) then
267                   do ii = 1, numVs(iul)
268                      ind = listVsptr(iul) + ii
269                      j   = listVs(ind)
270                      ijl = idx_ijl(il,ilocal(j))
271                      do ispin = 1, spin%Grid
272                         Vss(ind,ispin) = Vss(ind,ispin) + Vlocal(ijl,ispin) * dVol
273                      end do
274                      if ( spin%SO ) then
275                         Vss(ind,7:8) = Vss(ind,7:8) + Vlocal(ijl,3:4) * dVol
276                      end if
277                   end do
278                else
279                   do ii = 1, numVs(iul)
280                      ind = listVsptr(iul) + ii
281                      j   = LISTSC( i, iu, listVs(ind) )
282                      ijl = idx_ijl(il,ilocal(j))
283                      do ispin = 1, spin%Grid
284                         Vss(ind,ispin) = Vss(ind,ispin) + Vlocal(ijl,ispin) * dVol
285                      end do
286                      if ( spin%SO ) then
287                         Vss(ind,7:8) = Vss(ind,7:8) + Vlocal(ijl,3:4) * dVol
288                      end if
289                   end do
290                end if
291             end do
292          end if
293!         Reset local arrays
294          do i = 1, last
295             ilocal(iorb(i)) = 0
296          end do
297          iorb(1:last) = 0
298          ijl = (last+1)*(last+2)/2
299          do ispin = 1 , spin%Grid
300             do i = 1 , ijl
301                Vlocal(i,ispin) = 0._dp
302             end do
303          end do
304          last = 0
305       end if
306
307!      Look for required orbitals not yet in Vlocal
308       if ( nlocal > last ) then
309          do ic = 1,nc
310             imp = endpht(ip-1) + ic
311             i   = lstpht(imp)
312             if ( ilocal(i) == 0 ) then
313                last = last + 1
314                ilocal(i) = last
315                iorb(last) = i
316             end if
317          end do
318       end if
319
320!      Check algorithm
321       if ( DirectPhi ) then
322          lasta = 0
323          lastop = 0
324          do ic = 1 , nc
325             imp = endpht(ip-1) + ic
326             i   = lstpht(imp)
327             il  = ilocal(i)
328             ia  = iaorb(i)
329             iop = listp2(imp)
330             ilc(ic) = il
331
332!            Generate or retrieve phi values
333             if ( ia /= lasta .or. iop /= lastop ) then
334                lasta  = ia
335                lastop = iop
336                is = isa(ia)
337                do isp = 1,nsp
338                   dxsp(:) = xdsp(:,isp) + xdop(:,iop) - dxa(:,ia)
339                   r2sp = sum(dxsp**2)
340                   if ( r2sp < r2cut(is) ) then
341!$OMP critical
342                      call all_phi( is, +1, dxsp, maxoa, nphiloc, phia(:,isp) )
343!$OMP end critical
344                   else
345                      phia(:,isp) = 0.0_dp
346                   end if
347                end do
348             end if
349             iphi = iphorb(i)
350
351             Clocal(:,ic) = phia(iphi,:)
352
353             do ispin = 1 , spin%Grid
354
355!               Create VClocal for the first orbital of mesh point
356                Vij = 0._dp
357                do isp = 1,nsp
358                   VClocal(isp) = V(isp,ip,ispin) * Clocal(isp,ic)
359!                  This is the jc == ic value
360                   Vij = Vij + VClocal(isp) * Clocal(isp,ic)
361                end do
362
363!               ic == jc, hence we simply do
364                ijl = idx_ijl(il,ilc(ic))
365                Vlocal(ijl,ispin) = Vlocal(ijl,ispin) + Vij
366
367!               Loop on second orbital of mesh point
368                do jc = 1 , ic - 1
369                   jl = ilc(jc)
370
371!                  Calculate ic * jc
372                   Vij = 0._dp
373                   do isp = 1,nsp
374                      Vij = Vij + VClocal(isp) * Clocal(isp,jc)
375                   end do
376
377                   ijl = idx_ijl(il,jl)
378                   if ( il == jl ) then
379                      Vlocal(ijl,ispin) = Vlocal(ijl,ispin) + Vij * 2._dp
380                   else
381                      Vlocal(ijl,ispin) = Vlocal(ijl,ispin) + Vij
382                   end if
383
384                end do
385             end do
386
387          end do
388
389       else
390
391          do ic = 1 , nc
392             imp = endpht(ip-1) + ic
393             il  = ilocal(lstpht(imp))
394             ilc(ic) = il
395
396             Clocal(:,ic) = phi(:,imp)
397
398             do ispin = 1 , spin%Grid
399
400!               Create VClocal for the first orbital of mesh point
401                Vij = 0._dp
402                do isp = 1 , nsp
403                   VClocal(isp) = V(isp,ip,ispin) * Clocal(isp,ic)
404!                  This is the jc == ic value
405                   Vij = Vij + VClocal(isp) * Clocal(isp,ic)
406                end do
407
408!               ic == jc, hence we simply do
409                ijl = idx_ijl(il,ilc(ic))
410                Vlocal(ijl,ispin) = Vlocal(ijl,ispin) + Vij
411
412!               Loop on second orbital of mesh point
413                do jc = 1 , ic - 1
414                   jl = ilc(jc)
415
416!                  Calculate ic * jc
417                   Vij = 0._dp
418                   do isp = 1 , nsp
419                      Vij = Vij + VClocal(isp) * Clocal(isp,jc)
420                   end do
421
422                   ijl = idx_ijl(il,jl)
423                   if ( il == jl ) then
424                      Vlocal(ijl,ispin) = Vlocal(ijl,ispin) + Vij * 2._dp
425                   else
426                      Vlocal(ijl,ispin) = Vlocal(ijl,ispin) + Vij
427                   end if
428
429                end do
430             end do
431
432          end do
433
434       end if
435
436    end do
437!$OMP end do nowait
438
439!$OMP barrier
440
441! Note that this is already performed in parallel!
442!   Add final Vlocal to Vs
443    if ( ParallelLocal .and. last > 0 ) then
444       do il = 1 , last
445          i   = iorb(il)
446          iu  = indxuo(i)
447          iul = NeedDscfL(iu)
448          if ( i == iu ) then
449             do ii = 1, numdl(iul)
450                ind = listdlptr(iul) + ii
451                j   = listdl(ind)
452                ijl = idx_ijl(il,ilocal(j))
453                do ispin = 1, spin%Grid
454                   DscfL(ind,ispin) = DscfL(ind,ispin) + Vlocal(ijl,ispin) * dVol
455                end do
456                if ( spin%SO ) then
457                   DscfL(ind,7:8) = DscfL(ind,7:8) + Vlocal(ijl,3:4) * dVol
458                end if
459             end do
460          else
461             do ii = 1, numdl(iul)
462                ind = listdlptr(iul) + ii
463                j   = LISTSC( i, iu, listdl(ind) )
464                ijl = idx_ijl(il,ilocal(j))
465                do ispin = 1, spin%Grid
466                   DscfL(ind,ispin) = DscfL(ind,ispin) + Vlocal(ijl,ispin) * dVol
467                end do
468                if ( spin%SO ) then
469                   DscfL(ind,7:8) = DscfL(ind,7:8) + Vlocal(ijl,3:4) * dVol
470                end if
471             end do
472          end if
473       end do
474    else if ( last > 0 ) then
475       do il = 1 , last
476          i  = iorb(il)
477          iu = indxuo(i)
478          if ( i == iu ) then
479             do ii = 1, numVs(iu)
480                ind = listVsptr(iu)+ii
481                j   = listVs(ind)
482                ijl = idx_ijl(il,ilocal(j))
483                do ispin = 1, spin%Grid
484                   Vss(ind,ispin) = Vss(ind,ispin) + Vlocal(ijl,ispin) * dVol
485                end do
486                if ( spin%SO ) then
487                   Vss(ind,7:8) = Vss(ind,7:8) + Vlocal(ijl,3:4) * dVol
488                end if
489             end do
490          else
491             do ii = 1, numVs(iu)
492                ind = listVsptr(iu)+ii
493                j   = LISTSC( i, iu, listVs(ind) )
494                ijl = idx_ijl(il,ilocal(j))
495                do ispin = 1, spin%Grid
496                   Vss(ind,ispin) = Vss(ind,ispin) + Vlocal(ijl,ispin) * dVol
497                end do
498                if ( spin%SO ) then
499                   Vss(ind,7:8) = Vss(ind,7:8) + Vlocal(ijl,3:4) * dVol
500                end if
501             end do
502          end if
503       end do
504    end if
505
506!$OMP barrier
507
508    if ( ParallelLocal .and. NTH > 1 ) then
509       do ispin = 1 , spin%H
510!$OMP do
511          do ind = 1, nvmaxl
512             do ii = 2, NTH
513                t_DscfL(ind,ispin,1) = t_DscfL(ind,ispin,1) + &
514                     t_DscfL(ind,ispin,ii)
515             end do
516          end do
517!$OMP end do nowait
518       end do
519!$OMP barrier
520    else if ( NTH > 1 ) then
521       do ispin = 1 , spin%H
522!$OMP do
523          do ind = 1, nvmax
524             do ii = 2, NTH
525                Vs(ind,ispin) = Vs(ind,ispin) + t_Vss(ind,ispin,ii)
526             end do
527          end do
528!$OMP end do nowait
529       end do
530!$OMP barrier
531    end if
532
533!   Free memory
534    deallocate(Clocal,ilocal,ilc,iorb,Vlocal)
535    if ( DirectPhi ) deallocate( phia )
536
537!$OMP master
538    if ( ParallelLocal ) then
539!      Redistribute Hamiltonian from mesh to orbital based distribution
540       DscfL => t_DscfL(1:nvmaxl,1:spin%H,1)
541       call matrixMtoO( nvmaxl, nvmax, numVs, listVsptr, nuo, &
542            spin%H, DscfL, Vs )
543       call de_alloc( t_DscfL, 'DscfL', 'vmat' )
544    else if ( NTH > 1 ) then
545       call de_alloc( t_Vss, 'Vss', 'vmat' )
546    end if
547!$OMP end master
548
549!$OMP end parallel
550
551    call de_alloc( r2cut, 'r2cut', 'vmat' )
552
553#ifdef _TRACE_
554    call MPI_Barrier( MPI_Comm_World, MPIerror )
555    call MPItrace_event( 1000, 0 )
556#endif
557
558    call timer('vmat',2)
559
560#ifdef DEBUG
561    call write_debug( '    POS vmat' )
562#endif
563
564  contains
565
566    ! In any case will the compiler most likely inline this
567    ! small routine. So it should not pose any problem.
568    pure function idx_ijl(i,j) result(ij)
569      integer, intent(in) :: i,j
570      integer :: ij
571      if ( i > j ) then
572         ij = i * (i + 1)/2 + j + 1
573      else
574         ij = j * (j + 1)/2 + i + 1
575      end if
576    end function idx_ijl
577
578  end subroutine vmat
579
580end module m_vmat
581