1! 2! Copyright (C) 1996-2016 The SIESTA group 3! This file is distributed under the terms of the 4! GNU General Public License: see COPYING in the top directory 5! or http://www.gnu.org/copyleft/gpl.txt. 6! See Docs/Contributors.txt for a list of contributors. 7! 8 subroutine poison( CELL, N1, N2, N3, Mesh, RHO, U, V, STRESS, 9 & NSM ) 10 11C ********************************************************************* 12C SOLVES POISSON'S EQUATION. 13C ENERGY AND POTENTIAL RETURNED IN RYDBERG UNITS. 14C WRITTEN BY J.M.SOLER, JUNE 1995. 15C **************** INPUT ********************************************** 16C REAL*8 CELL(3,3) : UNIT CELL VECTORS 17C INTEGER N1,N2,N3 : NUMBER OF MESH DIVISIONS IN EACH CELL VECTOR 18C INTEGER Mesh(3) : Number of global mesh divisions 19C REAL*4 RHO(N1,N2,N3) : DENSITIY AT MESH POINTS 20C **************** OUTPUT ********************************************* 21C REAL*8 U : ELECTROSTATIC ENERGY (IN RY) 22C REAL*4 V(N1,N2,N3) : ELECTROSTATIC POTENTIAL (IN RY) 23C V AND RHO MAY BE THE SAME PHYSICAL ARRAY 24C REAL*8 STRESS(3,3) : Electrostatic-energy contribution to stress 25C tensor (in Ry/Bohr**3) assuming constant density 26C (not charge), i.e. r->r' => rho'(r') = rho(r) 27C For plane-wave and grid (finite difference) 28C basis sets, density rescaling gives an extra 29C term (not included) equal to -2*U/cell_volume 30C for the diagonal elements of stress. For other 31C basis sets, the extra term is, in general: 32C IntegralOf( V * d_rho/d_strain ) / cell_volume 33C INTEGER NSM : Number of sub-mesh points per mesh point 34C : along each axis 35C ********************************************************************* 36 37C Modules 38 use precision, only : dp, grid_p 39 use parallel, only : Node, Nodes, ProcessorY 40 use sys, only : die 41 use alloc, only : re_alloc, de_alloc 42 use m_fft, only : fft ! 3-D fast Fourier transform 43 use cellsubs, only : reclat ! Finds reciprocal lattice vectors 44 use cellsubs, only : volcel ! Finds unit cell volume 45 use m_chkgmx, only : chkgmx ! Checks planewave cutoff of a mesh 46#ifdef MPI 47 use mpi_siesta 48#endif 49 implicit none 50C Input/output variables 51 integer :: N1, N2, N3, Mesh(3), NSM 52 real(grid_p) :: RHO(N1*N2*N3), V(N1*N2*N3) 53 real(dp) :: CELL(3,3), STRESS(3,3), U 54C Local variables 55 integer :: I, I1, I2, I3, IX, J, J1, J2, J3, JX, 56 & NP, NG, NG2, NG3, 57 & ProcessorZ, Py, Pz, J2min, J2max, 58 & J3min, J3max, J2L, J3L, NRemY, NRemZ, 59 & BlockSizeY, BlockSizeZ 60 real(dp) :: C, B(3,3), DU, G(3), G2, G2MAX, 61 & PI, VG, VOLUME, PI8 62 real(grid_p), pointer :: CG(:,:) 63 real(dp), parameter :: K0(3)= (/ 0.0, 0.0, 0.0 /), TINY= 1.0e-15 64#ifdef MPI 65 integer :: MPIerror 66#endif 67 68#ifdef DEBUG 69 call write_debug( ' PRE POISON' ) 70#endif 71#ifdef _TRACE_ 72 call MPI_Barrier( MPI_Comm_World, MPIerror ) 73 call MPItrace_event( 1000, 2 ) 74#endif 75 76C Start time counter 77 call timer( 'POISON', 1 ) 78 79C Allocate local memory 80 nullify( CG ) 81 call re_alloc( CG, 1, 2, 1, n1*n2*n3, 'CG', 'poison' ) 82 83C Find unit cell volume 84 VOLUME = VOLCEL( CELL ) 85 86C Find reciprocal lattice vectors 87 call reclat(CELL, B, 1 ) 88 89C Find maximun planewave cutoff 90 NP = N1 * N2 * N3 91 G2MAX = 1.0e30_dp 92 call CHKGMX( K0, B, Mesh, G2MAX ) 93 94C Copy density to complex array 95!$OMP parallel do default(shared), private(I) 96 do I = 1, NP 97 CG(1,I) = RHO(I) 98 CG(2,I) = 0.0_grid_p 99 enddo 100!$OMP end parallel do 101 102C Find fourier transform of density 103 call fft( CG, Mesh, -1 ) 104 105C Initialize stress contribution 106 do IX = 1,3 107 do JX = 1,3 108 STRESS(JX,IX) = 0.0_dp 109 enddo 110 enddo 111 112C Work out processor grid dimensions 113 ProcessorZ = Nodes/ProcessorY 114 if (ProcessorY*ProcessorZ.ne.Nodes) 115 & call die('ERROR: ProcessorY must be a factor of the' // 116 & ' number of processors!') 117 Py = (Node/ProcessorZ) + 1 118 Pz = Node - (Py - 1)*ProcessorZ + 1 119 120C Multiply by 8*PI/G2 to get the potential 121 PI = 4.0_dp * atan(1.0_dp) 122 PI8 = PI * 8._dp 123 U = 0.0_dp 124 NG2 = Mesh(2) 125 NG3 = Mesh(3) 126 BlockSizeY = ((NG2/NSM)/ProcessorY)*NSM 127 BlockSizeZ = ((NG3/NSM)/ProcessorZ)*NSM 128 NRemY = (NG2 - BlockSizeY*ProcessorY)/NSM 129 NRemZ = (NG3 - BlockSizeZ*ProcessorZ)/NSM 130 J2min = (Py-1)*BlockSizeY + NSM*min(Py-1,NRemY) 131 J2max = J2min + BlockSizeY - 1 132 if (Py-1.lt.NRemY) J2max = J2max + NSM 133 J2max = min(J2max,NG2-1) 134 J3min = (Pz-1)*BlockSizeZ + NSM*min(Pz-1,NRemZ) 135 J3max = J3min + BlockSizeZ - 1 136 if (Pz-1.lt.NRemZ) J3max = J3max + NSM 137 J3max = min(J3max,NG3-1) 138 139!$OMP parallel do default(shared), 140!$OMP&private(J3,J3L,I3,J2,J2L,I2,J1,I1,G,G2), 141!$OMP&private(J,VG,DU,C), reduction(+:U,STRESS) 142 do J3 = J3min,J3max 143 J3L = J3 - J3min 144 if (J3.gt.NG3/2) then 145 I3 = J3 - NG3 146 else 147 I3 = J3 148 endif 149 do J2 = J2min,J2max 150 J2L = J2 - J2min 151 if (J2.gt.NG2/2) then 152 I2 = J2 - NG2 153 else 154 I2 = J2 155 endif 156 do J1 = 0,N1-1 157 if (J1.gt.N1/2) then 158 I1 = J1 - N1 159 else 160 I1 = J1 161 endif 162 G(1)= B(1,1) * I1 + B(1,2) * I2 + B(1,3) * I3 163 G(2)= B(2,1) * I1 + B(2,2) * I2 + B(2,3) * I3 164 G(3)= B(3,1) * I1 + B(3,2) * I2 + B(3,3) * I3 165 G2 = G(1)**2 + G(2)**2 + G(3)**2 166 J = 1 + J1 + N1 * J2L + N1 * N2 * J3L 167 if (G2.LT.G2MAX .AND. G2.GT.TINY) then 168 VG = PI8 / G2 169 DU = VG * ( CG(1,J)**2 + CG(2,J)**2 ) 170 U = U + DU 171 C = 2.0_dp * DU / G2 172 DO IX = 1,3 173 DO JX = 1,3 174 STRESS(JX,IX) = STRESS(JX,IX) + C * G(IX) * G(JX) 175 ENDDO 176 ENDDO 177 CG(1,J) = VG * CG(1,J) 178 CG(2,J) = VG * CG(2,J) 179 else 180 CG(1,J) = 0.0_dp 181 CG(2,J) = 0.0_dp 182 endif 183 enddo 184 enddo 185 enddo 186!$OMP end parallel do 187 188 NG = Mesh(1)*Mesh(2)*Mesh(3) 189 U = 0.5_dp * U * VOLUME / DBLE(NG)**2 190 C = 0.5_dp / DBLE(NG)**2 191 do IX = 1,3 192 do JX = 1,3 193 STRESS(JX,IX) = C * STRESS(JX,IX) 194 enddo 195 STRESS(IX,IX) = STRESS(IX,IX) + U / VOLUME 196 enddo 197 198C Go back to real space 199 call fft( CG, Mesh, +1 ) 200 201C Copy potential to array V 202!$OMP parallel do default(shared), private(I) 203 do I = 1, NP 204 V(I) = CG(1,I) 205 enddo 206!$OMP end parallel do 207 208C Free local memory 209 call de_alloc( CG, 'CG', 'poison' ) 210 211#ifdef _TRACE_ 212 call MPI_Barrier( MPI_Comm_World, MPIerror ) 213 call MPItrace_event( 1000, 0 ) 214#endif 215 216C Stop time counter 217 call timer( 'POISON', 2 ) 218 219#ifdef DEBUG 220 call write_debug( ' POS POISON' ) 221#endif 222 end subroutine poison 223