1!
2! Copyright (C) 1996-2016	The SIESTA group
3!  This file is distributed under the terms of the
4!  GNU General Public License: see COPYING in the top directory
5!  or http://www.gnu.org/copyleft/gpl.txt.
6! See Docs/Contributors.txt for a list of contributors.
7!
8      subroutine poison( CELL, N1, N2, N3, Mesh, RHO, U, V, STRESS,
9     &                   NSM )
10
11C *********************************************************************
12C SOLVES POISSON'S EQUATION.
13C ENERGY AND POTENTIAL RETURNED IN RYDBERG UNITS.
14C WRITTEN BY J.M.SOLER, JUNE 1995.
15C **************** INPUT **********************************************
16C REAL*8  CELL(3,3)     : UNIT CELL VECTORS
17C INTEGER N1,N2,N3      : NUMBER OF MESH DIVISIONS IN EACH CELL VECTOR
18C INTEGER Mesh(3)       : Number of global mesh divisions
19C REAL*4  RHO(N1,N2,N3) : DENSITIY AT MESH POINTS
20C **************** OUTPUT *********************************************
21C REAL*8  U             : ELECTROSTATIC ENERGY (IN RY)
22C REAL*4  V(N1,N2,N3)   : ELECTROSTATIC POTENTIAL (IN RY)
23C                         V AND RHO MAY BE THE SAME PHYSICAL ARRAY
24C REAL*8  STRESS(3,3) : Electrostatic-energy contribution to stress
25C                       tensor (in Ry/Bohr**3) assuming constant density
26C                       (not charge), i.e. r->r' => rho'(r') = rho(r)
27C                       For plane-wave and grid (finite difference)
28C                       basis sets, density rescaling gives an extra
29C                       term (not included) equal to -2*U/cell_volume
30C                       for the diagonal elements of stress. For other
31C                       basis sets, the extra term is, in general:
32C                       IntegralOf( V * d_rho/d_strain ) / cell_volume
33C INTEGER NSM           : Number of sub-mesh points per mesh point
34C                       : along each axis
35C *********************************************************************
36
37C     Modules
38      use precision,   only : dp, grid_p
39      use parallel,    only : Node, Nodes, ProcessorY
40      use sys,         only : die
41      use alloc,       only : re_alloc, de_alloc
42      use m_fft,       only : fft     ! 3-D fast Fourier transform
43      use cellsubs,    only : reclat  ! Finds reciprocal lattice vectors
44      use cellsubs,    only : volcel  ! Finds unit cell volume
45      use m_chkgmx,    only : chkgmx  ! Checks planewave cutoff of a mesh
46#ifdef MPI
47      use mpi_siesta
48#endif
49      implicit          none
50C     Input/output variables
51      integer               :: N1, N2, N3, Mesh(3), NSM
52      real(grid_p)          :: RHO(N1*N2*N3), V(N1*N2*N3)
53      real(dp)              :: CELL(3,3), STRESS(3,3), U
54C     Local variables
55      integer               :: I, I1, I2, I3, IX, J, J1, J2, J3, JX,
56     &                         NP, NG, NG2, NG3,
57     &                         ProcessorZ, Py, Pz, J2min, J2max,
58     &                         J3min, J3max, J2L, J3L, NRemY, NRemZ,
59     &                         BlockSizeY, BlockSizeZ
60      real(dp)              :: C, B(3,3), DU, G(3), G2, G2MAX,
61     &                         PI, VG, VOLUME, PI8
62      real(grid_p), pointer :: CG(:,:)
63      real(dp),   parameter :: K0(3)= (/ 0.0, 0.0, 0.0 /), TINY= 1.0e-15
64#ifdef MPI
65      integer               :: MPIerror
66#endif
67
68#ifdef DEBUG
69      call write_debug( '    PRE POISON' )
70#endif
71#ifdef _TRACE_
72      call MPI_Barrier( MPI_Comm_World, MPIerror )
73      call MPItrace_event( 1000, 2 )
74#endif
75
76C     Start time counter
77      call timer( 'POISON', 1 )
78
79C     Allocate local memory
80      nullify( CG )
81      call re_alloc( CG, 1, 2, 1, n1*n2*n3, 'CG', 'poison' )
82
83C     Find unit cell volume
84      VOLUME = VOLCEL( CELL )
85
86C     Find reciprocal lattice vectors
87      call reclat(CELL, B, 1 )
88
89C     Find maximun planewave cutoff
90      NP = N1 * N2 * N3
91      G2MAX = 1.0e30_dp
92      call CHKGMX( K0, B, Mesh, G2MAX )
93
94C     Copy density to complex array
95!$OMP parallel do default(shared), private(I)
96      do I = 1, NP
97        CG(1,I) = RHO(I)
98        CG(2,I) = 0.0_grid_p
99      enddo
100!$OMP end parallel do
101
102C     Find fourier transform of density
103      call fft( CG, Mesh, -1 )
104
105C     Initialize stress contribution
106      do IX = 1,3
107        do JX = 1,3
108          STRESS(JX,IX) = 0.0_dp
109        enddo
110      enddo
111
112C     Work out processor grid dimensions
113      ProcessorZ = Nodes/ProcessorY
114      if (ProcessorY*ProcessorZ.ne.Nodes)
115     &     call die('ERROR: ProcessorY must be a factor of the' //
116     &     ' number of processors!')
117      Py = (Node/ProcessorZ) + 1
118      Pz = Node - (Py - 1)*ProcessorZ + 1
119
120C     Multiply by 8*PI/G2 to get the potential
121      PI  = 4.0_dp * atan(1.0_dp)
122      PI8 = PI * 8._dp
123      U = 0.0_dp
124      NG2 = Mesh(2)
125      NG3 = Mesh(3)
126      BlockSizeY = ((NG2/NSM)/ProcessorY)*NSM
127      BlockSizeZ = ((NG3/NSM)/ProcessorZ)*NSM
128      NRemY = (NG2 - BlockSizeY*ProcessorY)/NSM
129      NRemZ = (NG3 - BlockSizeZ*ProcessorZ)/NSM
130      J2min = (Py-1)*BlockSizeY + NSM*min(Py-1,NRemY)
131      J2max = J2min + BlockSizeY - 1
132      if (Py-1.lt.NRemY) J2max = J2max + NSM
133      J2max = min(J2max,NG2-1)
134      J3min = (Pz-1)*BlockSizeZ + NSM*min(Pz-1,NRemZ)
135      J3max = J3min + BlockSizeZ - 1
136      if (Pz-1.lt.NRemZ) J3max = J3max + NSM
137      J3max = min(J3max,NG3-1)
138
139!$OMP parallel do default(shared),
140!$OMP&private(J3,J3L,I3,J2,J2L,I2,J1,I1,G,G2),
141!$OMP&private(J,VG,DU,C), reduction(+:U,STRESS)
142      do J3 = J3min,J3max
143        J3L = J3 - J3min
144        if (J3.gt.NG3/2) then
145          I3 = J3 - NG3
146        else
147          I3 = J3
148        endif
149        do J2 = J2min,J2max
150          J2L = J2 - J2min
151          if (J2.gt.NG2/2) then
152            I2 = J2 - NG2
153          else
154            I2 = J2
155          endif
156          do J1 = 0,N1-1
157            if (J1.gt.N1/2) then
158              I1 = J1 - N1
159            else
160              I1 = J1
161            endif
162            G(1)= B(1,1) * I1 + B(1,2) * I2 + B(1,3) * I3
163            G(2)= B(2,1) * I1 + B(2,2) * I2 + B(2,3) * I3
164            G(3)= B(3,1) * I1 + B(3,2) * I2 + B(3,3) * I3
165            G2 = G(1)**2 + G(2)**2 + G(3)**2
166            J = 1 + J1 + N1 * J2L + N1 * N2 * J3L
167            if (G2.LT.G2MAX .AND. G2.GT.TINY) then
168              VG = PI8 / G2
169              DU = VG * ( CG(1,J)**2 + CG(2,J)**2 )
170              U = U + DU
171              C = 2.0_dp * DU / G2
172              DO IX = 1,3
173                DO JX = 1,3
174                  STRESS(JX,IX) = STRESS(JX,IX) + C * G(IX) * G(JX)
175                ENDDO
176              ENDDO
177              CG(1,J) = VG * CG(1,J)
178              CG(2,J) = VG * CG(2,J)
179            else
180              CG(1,J) = 0.0_dp
181              CG(2,J) = 0.0_dp
182            endif
183          enddo
184        enddo
185      enddo
186!$OMP end parallel do
187
188      NG = Mesh(1)*Mesh(2)*Mesh(3)
189      U = 0.5_dp * U * VOLUME / DBLE(NG)**2
190      C = 0.5_dp / DBLE(NG)**2
191      do IX = 1,3
192        do JX = 1,3
193          STRESS(JX,IX) = C * STRESS(JX,IX)
194        enddo
195        STRESS(IX,IX) = STRESS(IX,IX) + U / VOLUME
196      enddo
197
198C     Go back to real space
199      call fft( CG, Mesh, +1 )
200
201C     Copy potential to array V
202!$OMP parallel do default(shared), private(I)
203      do I = 1, NP
204        V(I) = CG(1,I)
205      enddo
206!$OMP end parallel do
207
208C     Free local memory
209      call de_alloc( CG, 'CG', 'poison' )
210
211#ifdef _TRACE_
212      call MPI_Barrier( MPI_Comm_World, MPIerror )
213      call MPItrace_event( 1000, 0 )
214#endif
215
216C     Stop time counter
217      call timer( 'POISON', 2 )
218
219#ifdef DEBUG
220      call write_debug( '    POS POISON' )
221#endif
222      end subroutine poison
223