1!! Copyright (C) 2002-2016 M. Marques, A. Castro, A. Rubio, G. Bertsch
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include "global.h"
20
21module stress_oct_m
22  use boundaries_oct_m
23  use comm_oct_m
24  use cube_oct_m
25  use cube_function_oct_m
26  use density_oct_m
27  use derivatives_oct_m
28  use double_grid_oct_m
29  use epot_oct_m
30  use fft_oct_m
31  use fourier_space_oct_m
32  use geometry_oct_m
33  use global_oct_m
34  use grid_oct_m
35  use hamiltonian_elec_oct_m
36  use kpoints_oct_m
37  use loct_math_oct_m
38  use mesh_oct_m
39  use mesh_function_oct_m
40  use messages_oct_m
41  use mpi_oct_m
42  use namespace_oct_m
43  use periodic_copy_oct_m
44  use poisson_fft_oct_m
45  use poisson_oct_m
46  use profiling_oct_m
47  use projector_oct_m
48  use simul_box_oct_m
49  use species_oct_m
50  use states_elec_oct_m
51  use states_elec_dim_oct_m
52  use submesh_oct_m
53  use v_ks_oct_m
54  use XC_F90(lib_m)
55
56  implicit none
57
58  private
59  public ::                    &
60    stress_calculate
61
62  integer, parameter ::             &
63    CMD_FINISH = 1,                 &
64    CMD_POISSON_SOLVE = 2
65
66
67  FLOAT,                pointer :: rho(:, :)
68  logical                       :: total_density_alloc
69  FLOAT,                pointer :: rho_total(:)
70  CMPLX, allocatable :: rho_total_fs(:,:,:)
71  FLOAT, allocatable :: FourPi_G2(:,:,:), Gvec(:,:,:,:), Gvec_G(:,:,:,:)
72
73contains
74
75  ! ---------------------------------------------------------
76  !> This computes the total stress on the lattice
77  subroutine stress_calculate(namespace, gr, hm, st, geo, ks)
78    type(namespace_t),        intent(in)    :: namespace
79    type(grid_t),             intent(inout) :: gr !< grid
80    type(hamiltonian_elec_t), intent(inout) :: hm
81    type(states_elec_t),      intent(inout) :: st
82    type(geometry_t),         intent(inout) :: geo !< geometry
83    type(v_ks_t),             intent(in)    :: ks
84
85    type(profile_t), save :: stress_prof
86    FLOAT :: stress(3,3) ! stress tensor in Cartecian coordinate
87    FLOAT :: stress_KE(3,3), stress_Hartree(3,3), stress_xc(3,3) ! temporal
88    FLOAT :: stress_ps(3,3), stress_Ewald(3,3)
89
90    call profiling_in(stress_prof, "STRESS")
91    PUSH_SUB(stress_calculate)
92
93    SAFE_ALLOCATE(rho(1:gr%fine%mesh%np, 1:st%d%nspin))
94
95    if(gr%der%mesh%sb%kpoints%use_symmetries) then
96      call messages_not_implemented("Stress tensors with k-point symmetries", namespace=namespace)
97    end if
98
99    if(ks%theory_level == HARTREE .or. ks%theory_level == HARTREE_FOCK .or. &
100      (ks%theory_level == KOHN_SHAM_DFT .and. bitand(hm%xc%family, XC_FAMILY_LDA) == 0)) then
101      write(message(1),'(a)') 'The stress tensor is currently only properly computed at the DFT-LDA level'
102      call messages_fatal(1, namespace=namespace)
103    end if
104    if(ks%vdw_correction /= OPTION__VDWCORRECTION__NONE) then
105      write(message(1),'(a)') 'The stress tensor is currently not properly computed with vdW corrections'
106      call messages_fatal(1, namespace=namespace)
107    end if
108
109    stress(:,:) = M_ZERO
110
111    call calculate_density()
112    call fourier_space_init(hm%psolver_fine)
113    call density_rs2fs(hm%psolver_fine)
114
115    ! Stress from kinetic energy of electrons
116    call stress_from_kinetic_energy_electron(gr%der, hm, st, stress, stress_KE)
117
118    ! Stress from Hartree energy
119    call stress_from_Hartree(hm, stress, stress_Hartree)
120
121    ! Stress from exchange-correlation energy
122    call stress_from_xc(gr%der, hm, stress, stress_xc)
123
124    ! Stress from pseudopotentials
125    call stress_from_pseudo(gr, hm, st, geo, stress, stress_ps)
126
127    ! Stress from Ewald summation
128    call stress_from_Ewald_sum(gr, geo, hm, stress, stress_Ewald)
129
130
131    ! Stress from kinetic energy of ion
132    ! Stress from ion-field interaction
133
134    ! Sign changed to fit conventional definition
135    stress = - stress
136
137    gr%sb%stress_tensor(1:3,1:3) = stress(1:3,1:3)
138
139    SAFE_DEALLOCATE_A(FourPi_G2)
140    SAFE_DEALLOCATE_A(Gvec)
141    SAFE_DEALLOCATE_A(Gvec_G)
142    SAFE_DEALLOCATE_P(rho)
143    if (total_density_alloc) then
144      SAFE_DEALLOCATE_P(rho_total)
145    end if
146    SAFE_DEALLOCATE_A(rho_total_fs)
147
148    POP_SUB(stress_calculate)
149    call profiling_out(stress_prof)
150
151  contains
152    subroutine calculate_density()
153      integer :: ip
154!      FLOAT                         :: amaldi_factor
155
156      PUSH_SUB(stress.calculate_density)
157
158      ! get density taking into account non-linear core corrections
159      call states_elec_total_density(st, gr%fine%mesh, rho)
160
161      nullify(rho_total)
162
163      if(associated(st%rho_core) .or. hm%d%spin_channels > 1) then
164         total_density_alloc = .true.
165
166         SAFE_ALLOCATE(rho_total(1:gr%fine%mesh%np))
167
168         do ip = 1, gr%fine%mesh%np
169            rho_total(ip) = sum(rho(ip, 1:hm%d%spin_channels))
170         end do
171
172         ! remove non-local core corrections
173         if(associated(st%rho_core)) then
174            do ip = 1, gr%fine%mesh%np
175               rho_total(ip) = rho_total(ip) - st%rho_core(ip)
176            end do
177         end if
178      else
179         total_density_alloc = .false.
180         rho_total => rho(:, 1)
181      end if
182
183      POP_SUB(stress.calculate_density)
184    end subroutine calculate_density
185    ! -------------------------------------------------------
186
187    ! ---------------------------------------------------------
188    subroutine density_rs2fs(this)
189      type(poisson_t), target,   intent(in)    :: this
190      type(cube_t),    pointer             :: cube
191      type(fourier_space_op_t), pointer    :: coulb
192      type(cube_function_t) :: cf
193      integer :: ii, jj, kk, iit, jjt, kkt
194      FLOAT :: gx, xx(3)
195      CMPLX :: zphase
196
197      cube => this%cube
198      coulb => this%fft_solver%coulb
199
200      call cube_function_null(cf)
201      call dcube_function_alloc_RS(cube, cf, in_device = (this%fft_solver%kernel /= POISSON_FFT_KERNEL_CORRECTED))
202
203
204      ! put the density in the cube
205      if (cube%parallel_in_domains) then
206         call dmesh_to_cube_parallel(this%der%mesh, rho_total, cube, cf, this%mesh_cube_map)
207      else
208         if(this%der%mesh%parallel_in_domains) then
209            call dmesh_to_cube(this%der%mesh, rho_total, cube, cf, local = .true.)
210         else
211            call dmesh_to_cube(this%der%mesh, rho_total, cube, cf)
212         end if
213      end if
214
215      ASSERT(associated(cube%fft))
216      ASSERT(cube%fft%library /= FFTLIB_NONE)
217
218      SAFE_ALLOCATE(rho_total_fs(1:cube%rs_n_global(1),1:cube%rs_n_global(2),1:cube%rs_n_global(3)))
219
220      call cube_function_alloc_fs(cube, cf)
221
222      call dcube_function_rs2fs(cube, cf)
223      cf%fs = cf%fs/TOFLOAT(cube%rs_n(1)*cube%rs_n(2)*cube%rs_n(3)) !Normalize
224
225      select case(cube%fft%library)
226      case(FFTLIB_PFFT)
227! Not implemented yet
228         write(message(1),'(a)') 'Internal error: PFFT library is not applicable for stress calculation.'
229         call messages_fatal(1, namespace=namespace)
230      case(FFTLIB_FFTW)
231         if(associated(cube%Lrs))then
232            xx(1:3) = cube%Lrs(1,1:3)
233            xx(1:3) = matmul(gr%sb%rlattice(1:3,1:3),xx)
234         else
235            xx(1:3) = -TOFLOAT(cube%rs_n_global(1:3)/2 )/TOFLOAT(cube%rs_n_global(1:3))
236            xx(1:3) = matmul(gr%sb%rlattice(1:3,1:3),xx)
237         end if
238         do kk = 1, cube%fs_n(3)
239            kkt = - pad_feq(kk, cube%rs_n_global(3), .true.)
240            kkt = mod(kkt+cube%rs_n_global(3),cube%rs_n_global(3)) +1
241            do jj = 1, cube%fs_n(2)
242               jjt = - pad_feq(jj , cube%rs_n_global(2), .true.)
243               jjt = mod(jjt+cube%rs_n_global(2),cube%rs_n_global(2)) + 1
244               do ii = 1, cube%fs_n(1)
245                  iit = - pad_feq(ii , cube%rs_n_global(1), .true.)
246                  iit = mod(iit+cube%rs_n_global(1),cube%rs_n_global(1)) + 1
247
248                  gx =  sum(xx(1:3)*Gvec(ii, jj, kk, 1:3) )
249                  zphase = TOCMPLX(cos(gx), sin(gx))
250                  rho_total_fs(ii,jj,kk) = conjg(cf%fs(ii, jj, kk))*zphase
251                  rho_total_fs(iit,jjt,kkt) = cf%fs(ii, jj, kk)*conjg(zphase)
252               end do
253            end do
254         end do
255      case(FFTLIB_ACCEL)
256! Not implemented yet
257         write(message(1),'(a)') 'Internal error: ACCEL library is not applicable for stress calculation.'
258         call messages_fatal(1, namespace=namespace)
259      case default
260         ASSERT(.false.)
261       end select
262
263       call cube_function_free_fs(cube, cf)
264       call dcube_function_free_rs(cube, cf)
265
266    end subroutine density_rs2fs
267
268    ! -------------------------------------------------------
269    subroutine fourier_space_init(this)
270      type(poisson_t), target,   intent(in)    :: this
271      type(cube_t),    pointer             :: cube
272      type(fourier_space_op_t), pointer    :: coulb
273      integer :: db(3)
274      integer :: ix,iy,iz, ixx(3)
275      FLOAT :: gg(3), modg2, temp(3), qq(1:MAX_DIM)
276
277      qq(1:MAX_DIM) = M_ZERO
278
279      cube => this%cube
280      coulb => this%fft_solver%coulb
281
282      db(1:3) = cube%rs_n_global(1:3)
283
284      SAFE_ALLOCATE(FourPi_G2(1:db(1),1:db(2),1:db(3)))
285      SAFE_ALLOCATE(Gvec(1:db(1),1:db(2),1:db(3),3))
286      SAFE_ALLOCATE(Gvec_G(1:db(1),1:db(2),1:db(3),3))
287
288
289
290      if(cube%fft%library == FFTLIB_PFFT) then
291!Not implemented yet
292
293      else if(cube%fft%library == FFTLIB_FFTW) then
294
295         temp(1:3) = M_TWO*M_PI/(db(1:3)*gr%fine%der%mesh%spacing(1:3))
296
297         do ix = 1, cube%rs_n_global(1)
298            ixx(1) = pad_feq(ix, db(1), .true.)
299            do iy = 1, cube%rs_n_global(2)
300               ixx(2) = pad_feq(iy, db(2), .true.)
301               do iz = 1, cube%rs_n_global(3)
302                  ixx(3) = pad_feq(iz, db(3), .true.)
303
304                  call poisson_fft_gg_transform_l(ixx, temp, gr%fine%der%mesh%sb, qq, gg, modg2)
305
306                  !HH not very elegant
307                  if(cube%fft%library.eq.FFTLIB_NFFT) modg2=cube%Lfs(ix,1)**2+cube%Lfs(iy,2)**2+cube%Lfs(iz,3)**2
308
309                  if(abs(modg2) > M_EPSILON) then
310                     FourPi_G2(ix,iy,iz) = 4d0*M_PI/modg2
311                     Gvec_G(ix, iy, iz, 1) = gg(1)/sqrt(modg2)
312                     Gvec_G(ix, iy, iz, 2) = gg(2)/sqrt(modg2)
313                     Gvec_G(ix, iy, iz, 3) = gg(3)/sqrt(modg2)
314                  else
315                     FourPi_G2(ix,iy,iz) = M_ZERO
316                     Gvec_G(ix, iy, iz, 1) = M_ZERO
317                     Gvec_G(ix, iy, iz, 2) = M_ZERO
318                     Gvec_G(ix, iy, iz, 3) = M_ZERO
319                  end if
320
321                  Gvec(ix, iy, iz, 1) = gg(1)
322                  Gvec(ix, iy, iz, 2) = gg(2)
323                  Gvec(ix, iy, iz, 3) = gg(3)
324
325               end do
326            end do
327         end do
328
329      else if(cube%fft%library == FFTLIB_ACCEL) then
330!Not implemented yet
331      end if
332
333    end subroutine fourier_space_init
334  end subroutine stress_calculate
335
336  ! -------------------------------------------------------
337  subroutine stress_from_kinetic_energy_electron(der, hm, st, stress, stress_KE)
338    type(derivatives_t),  intent(in)    :: der
339    type(hamiltonian_elec_t),  intent(in)    :: hm
340    type(states_elec_t),  intent(inout) :: st
341    FLOAT,                intent(inout) :: stress(:, :)
342    FLOAT,                intent(out)   :: stress_KE(3, 3) ! temporal
343    FLOAT                               :: stress_l(3, 3)
344    integer :: ik, ist, idir, jdir, idim, ispin
345    CMPLX, allocatable :: gpsi(:, :, :), psi(:, :)
346    type(profile_t), save :: prof
347
348    call profiling_in(prof, "STRESS_FROM_KEE")
349    PUSH_SUB(stress_from_kinetic_energy_electron)
350
351    stress_l(:,:) = M_ZERO
352
353    SAFE_ALLOCATE(psi(1:der%mesh%np_part, 1:st%d%dim))
354    SAFE_ALLOCATE(gpsi(1:der%mesh%np, 1:der%mesh%sb%dim, 1:st%d%dim))
355
356
357    do ik = st%d%kpt%start, st%d%kpt%end
358       ispin = states_elec_dim_get_spin_index(st%d, ik)
359       do ist = st%st_start, st%st_end
360
361          call states_elec_get_state(st, der%mesh, ist, ik, psi)
362
363          do idim = 1, st%d%dim
364             call boundaries_set(der%boundaries, psi(:, idim))
365          end do
366
367          if(associated(hm%hm_base%phase)) then
368            call states_elec_set_phase(st%d, psi, hm%hm_base%phase(1:der%mesh%np_part, ik), der%mesh%np_part,.false.)
369          end if
370
371          do idim = 1, st%d%dim
372             call zderivatives_grad(der, psi(:, idim), gpsi(:, :, idim), set_bc = .false.)
373          end do
374
375
376          do idir = 1, der%mesh%sb%dim
377             do jdir = 1, der%mesh%sb%dim
378
379                do idim = 1, st%d%dim
380                   stress_l(idir,jdir) = stress_l(idir,jdir) + &
381                        st%d%kweights(ik)*st%occ(ist, ik) &
382                        *dmf_integrate(der%mesh, TOFLOAT(conjg(gpsi(:, idir, idim))*gpsi(:, jdir, idim)))
383                end do
384             end do
385          end do
386       end do
387
388    end do
389
390
391    if(st%parallel_in_states .or. st%d%kpt%parallel) then
392       ! TODO: this could take dim = (/der%mesh%np, der%mesh%sb%dim, st%d%nspin/)) to reduce the amount of data copied
393       call comm_allreduce(st%st_kpt_mpi_grp%comm, stress_l)
394    end if
395
396    stress_l = stress_l/der%mesh%sb%rcell_volume
397    stress_KE = stress_l
398    stress = stress + stress_l
399
400
401    SAFE_DEALLOCATE_A(psi)
402    SAFE_DEALLOCATE_A(gpsi)
403
404    call profiling_out(prof)
405    POP_SUB(stress_from_kinetic_energy_electron)
406  end subroutine stress_from_kinetic_energy_electron
407
408! -------------------------------------------------------
409  subroutine stress_from_Hartree(hm, stress, stress_Hartree)
410    type(hamiltonian_elec_t), intent(in)    :: hm
411    FLOAT,                    intent(inout) :: stress(:, :)
412    FLOAT,                    intent(out)   :: stress_Hartree(3, 3) ! temporal
413
414    FLOAT :: stress_l(3, 3)
415    type(cube_t), pointer :: cube
416    integer :: idir, jdir, ii, jj, kk
417    FLOAT :: ss
418    type(profile_t), save :: prof
419
420    cube => hm%psolver_fine%cube
421
422    call profiling_in(prof, "STRESS_FROM_HARTREE")
423    PUSH_SUB(stress_from_Hartree)
424
425    stress_l(:,:) = M_ZERO
426
427    do idir = 1,3
428       do jdir = 1,3
429          ss=M_ZERO
430          !$omp parallel do private(ii, jj, kk) reduction(+:ss)
431          do kk = 1, cube%rs_n_global(3)
432             do jj = 1, cube%rs_n_global(2)
433                do ii = 1, cube%rs_n_global(1)
434                   ss = ss + abs(rho_total_fs(ii,jj,kk))**2 &
435                        *M_TWO*Gvec_G(ii, jj, kk,idir)*Gvec_G(ii, jj, kk,jdir) &
436                        *FourPi_G2(ii, jj, kk)
437                end do
438             end do
439          end do
440          !$omp end parallel do
441          stress_l(idir,jdir) = - ss
442       end do
443    end do
444
445    ss=M_ZERO
446    !$omp parallel do private(ii, jj, kk) reduction(+:ss)
447    do kk = 1, cube%rs_n_global(3)
448       do jj = 1, cube%rs_n_global(2)
449          do ii = 1, cube%rs_n_global(1)
450             ss = ss + abs(rho_total_fs(ii, jj, kk))**2*FourPi_G2 (ii, jj, kk)
451          end do
452       end do
453    end do
454    !$omp end parallel do
455
456    do idir = 1,3
457       stress_l(idir,idir) = stress_l(idir,idir) + ss
458    end do
459    stress_l = CNST(0.5) * stress_l
460
461
462    stress_Hartree =  stress_l
463    stress = stress + stress_l
464
465    call profiling_out(prof)
466    POP_SUB(stress_from_Hartree)
467
468  end subroutine stress_from_Hartree
469
470  ! -------------------------------------------------------
471  ! We assume hm%energy%echange, correlation, and intnvxc
472  ! have already been calculated somewhere else.
473  subroutine stress_from_xc(der, hm, stress, stress_xc)
474    type(derivatives_t),  intent(in) :: der
475    type(hamiltonian_elec_t),  intent(in)    :: hm
476    FLOAT,                         intent(inout) :: stress(:, :)
477    FLOAT,                         intent(out) :: stress_xc(3, 3) ! temporal
478    FLOAT :: stress_l(3, 3)
479    integer :: ii
480    type(profile_t), save :: prof
481
482    call profiling_in(prof, "STRESS_FROM_XC")
483    PUSH_SUB(stress_from_xc)
484
485    stress_l = M_ZERO
486    do ii = 1,3
487       stress_l(ii,ii) = - hm%energy%exchange - hm%energy%correlation &
488            + hm%energy%intnvxc
489    end do
490    stress_l = stress_l/der%mesh%sb%rcell_volume
491
492    stress_xc(:,:) = stress_l(:,:)
493    stress(:,:) = stress(:,:) + stress_l(:,:)
494
495    call profiling_out(prof)
496    POP_SUB(stress_from_xc)
497  end subroutine stress_from_xc
498  ! -------------------------------------------------------
499  subroutine stress_from_pseudo(gr, hm, st, geo, stress, stress_ps)
500    type(grid_t),      target,        intent(in) :: gr !< grid
501    type(hamiltonian_elec_t),  intent(inout)    :: hm
502    type(states_elec_t),    intent(inout) :: st
503    type(geometry_t),          intent(in) :: geo !< geometry
504    type(cube_t),    pointer             :: cube
505    type(derivatives_t),  pointer :: der
506    FLOAT,                         intent(inout) :: stress(:, :)
507    FLOAT,                         intent(out) :: stress_ps(3, 3) ! temporal
508    FLOAT :: stress_l(3, 3)
509    FLOAT :: stress_t_SR(3, 3), stress_t_LR(3, 3), stress_t_NL(3, 3)
510    CMPLX, allocatable :: gpsi(:, :, :), psi(:, :), rppsi(:, :, :)
511    FLOAT :: energy_ps_SR
512    FLOAT,  allocatable :: vloc(:),rvloc(:,:)
513    FLOAT,  allocatable :: rho_t(:),grho_t(:,:)
514    FLOAT :: sigma_erf, alpha, gx, g2
515    CMPLX :: zphase, zfact
516    integer :: ik, ispin, ist, idim, idir, jdir, iatom
517    integer :: ii,jj,kk
518    type(profile_t), save :: prof
519
520    call profiling_in(prof, "STRESS_FROM_PSEUDO")
521    PUSH_SUB(stress_from_pseudo)
522
523    stress_l = M_ZERO
524
525    cube => hm%psolver_fine%cube
526    der => gr%der
527
528
529    SAFE_ALLOCATE(psi(1:der%mesh%np_part, 1:st%d%dim))
530    SAFE_ALLOCATE(gpsi(1:der%mesh%np, 1:der%mesh%sb%dim, 1:st%d%dim))
531    SAFE_ALLOCATE(rppsi(1:der%mesh%np, 1:der%mesh%sb%dim+1, 1:st%d%dim))
532    SAFE_ALLOCATE(vloc(1:gr%mesh%np))
533    SAFE_ALLOCATE(rvloc(1:gr%mesh%np, 1:der%mesh%sb%dim))
534    SAFE_ALLOCATE(rho_t(1:der%mesh%np_part))
535    SAFE_ALLOCATE(grho_t(1:der%mesh%np,1:der%mesh%sb%dim))
536
537
538! calculate stress from non-local pseudopotentials
539    stress_t_NL = M_ZERO
540    do ik = st%d%kpt%start, st%d%kpt%end
541       ispin = states_elec_dim_get_spin_index(st%d, ik)
542       do ist = st%st_start, st%st_end
543
544          call states_elec_get_state(st, der%mesh, ist, ik, psi)
545
546          do idim = 1, st%d%dim
547             call boundaries_set(der%boundaries, psi(:, idim))
548          end do
549
550          if(associated(hm%hm_base%phase)) then
551            call states_elec_set_phase(st%d, psi, hm%hm_base%phase(1:der%mesh%np_part, ik), der%mesh%np_part, .false.)
552          end if
553
554          do idim = 1, st%d%dim
555             call zderivatives_grad(der, psi(:, idim), gpsi(:, :, idim), set_bc = .false.)
556          end do
557
558
559
560          rppsi = M_ZERO
561          do iatom = 1, geo%natoms
562             if(species_is_ps(geo%atom(iatom)%species)) then
563                call zr_project_psi(hm%ep%proj(iatom), der%mesh, st%d%dim, ik, psi, rppsi)
564             end if
565          end do
566
567          do idir = 1,3
568             do jdir = 1,3
569                do idim = 1, st%d%dim
570
571                   stress_t_NL(idir, jdir) = stress_t_NL(idir, jdir) &
572                        +2d0*st%d%kweights(ik)*st%occ(ist, ik) &
573                        *dmf_integrate(der%mesh, TOFLOAT(conjg(gpsi(1:der%mesh%np,idir,idim))*rppsi(1:der%mesh%np,jdir,idim)))
574
575                   if(idir /= jdir)cycle
576
577                   stress_t_NL(idir, jdir) = stress_t_NL(idir, jdir) &
578                        +st%d%kweights(ik)*st%occ(ist, ik) &
579                        *dmf_integrate(der%mesh, TOFLOAT(conjg(psi(1:der%mesh%np,idim))*rppsi(1:der%mesh%np,4,idim)))
580
581                end do
582             end do
583          end do
584       end do
585    end do
586
587
588    if(st%parallel_in_states .or. st%d%kpt%parallel) then
589      ! TODO: this could take dim = (/der%mesh%np, der%mesh%sb%dim, st%d%nspin/)) to reduce the amount of data copied
590       call comm_allreduce(st%st_kpt_mpi_grp%comm, stress_t_NL)
591    end if
592
593
594    stress_t_NL = stress_t_NL/der%mesh%sb%rcell_volume
595
596! calculate stress from short-range local pseudopotentials
597    stress_t_SR = M_ZERO
598
599    rho_t(1:gr%mesh%np) = rho_total(1:gr%mesh%np)
600    call boundaries_set(der%boundaries, rho_t(:))
601    call dderivatives_grad(der, rho_t, grho_t, set_bc = .false.)
602
603    vloc = M_ZERO
604    rvloc = M_ZERO
605    do iatom = 1, geo%natoms
606       call epot_local_pseudopotential_SR(gr%der, gr%dgrid, hm%geo, iatom, vloc, rvloc)
607    end do
608
609    energy_ps_SR = dmf_dotp(gr%mesh, vloc, rho_total(:))
610    do idir = 1,3
611       stress_t_SR(idir,idir) = energy_ps_SR
612    end do
613
614    do idir = 1,3
615       do jdir =1,3
616
617          stress_t_SR(idir, jdir) = stress_t_SR(idir, jdir) &
618               +dmf_dotp(gr%mesh, rvloc(:, jdir), grho_t(:, idir))
619       end do
620    end do
621
622    stress_t_SR = stress_t_SR/der%mesh%sb%rcell_volume
623
624! calculate stress from long-range local pseudopotentials
625    stress_t_LR = M_ZERO
626
627
628 ! We assume this value is applied for range-separation for all the species
629    sigma_erf = CNST(0.625)
630    alpha = M_ONE/(sqrt(M_TWO)*sigma_erf)
631
632    do kk = 1, cube%rs_n_global(3)
633       do jj = 1, cube%rs_n_global(2)
634          do ii = 1, cube%rs_n_global(1)
635
636             zphase = M_Z0
637             do iatom = 1, geo%natoms
638                gx = sum(Gvec(ii, jj, kk, 1:3)*geo%atom(iatom)%x(1:3))
639                zphase = zphase + species_zval(geo%atom(iatom)%species) &
640                     *TOCMPLX(cos(gx), sin(gx))
641             end do
642             g2 = sum(Gvec(ii, jj, kk, 1:3)**2)
643             zfact =  sigma_erf**2*FourPi_G2(ii,jj,kk) &
644                  *exp(-M_HALF*sigma_erf**2*g2)/(M_HALF*sigma_erf**2) &
645                  *(M_HALF*sigma_erf**2*g2 + M_ONE) &
646                  *rho_total_fs(ii,jj,kk)*conjg(zphase)
647
648             do idir =1,3
649                do jdir =1,3
650                   stress_t_LR(idir, jdir) = stress_t_LR(idir, jdir) &
651                        + TOFLOAT(zfact)*Gvec_G(ii, jj, kk,idir)*Gvec_G(ii, jj, kk,jdir)
652                end do
653             end do
654
655             do idir =1,3
656                stress_t_LR(idir, idir) = stress_t_LR(idir, idir) &
657                     - FourPi_G2(ii,jj,kk)*exp(-M_HALF*sigma_erf**2*g2) &
658                     * TOFLOAT(rho_total_fs(ii,jj,kk)*conjg(zphase))
659             end do
660
661          end do
662       end do
663    end do
664
665    stress_t_LR = stress_t_LR/der%mesh%sb%rcell_volume
666
667
668    stress_ps = stress_t_SR + stress_t_LR + stress_t_NL
669
670!!! NOTE!! This part is moved to Ewald contoributoin
671!! Contribition from G=0 component of the long-range part
672!    charge = M_ZERO
673!    do iatom = 1, geo%natoms
674!       zi = species_zval(geo%atom(iatom)%species)
675!       charge = charge + zi
676!    end do
677!
678!    do idir = 1,3
679!       stress_ps(idir,idir) = stress_ps(idir,idir) &
680!            + 2d0*M_PI*sigma_erf**2*charge**2 /der%mesh%sb%rcell_volume**2
681!    end do
682
683    stress = stress + stress_ps
684
685
686    call profiling_out(prof)
687    POP_SUB(stress_from_pseudo)
688
689  end subroutine stress_from_pseudo
690
691  ! -------------------------------------------------------
692  subroutine epot_local_pseudopotential_SR(der, dgrid, geo, iatom, vpsl, rvpsl)
693    type(derivatives_t),      intent(in)    :: der
694    type(double_grid_t),      intent(in)    :: dgrid
695    type(geometry_t),         intent(in)    :: geo
696    integer,                  intent(in)    :: iatom
697    FLOAT,                    intent(inout) :: vpsl(:)
698    FLOAT,                    intent(inout) :: rvpsl(:,:)
699
700    integer :: ip
701    FLOAT :: radius
702    FLOAT, allocatable :: vl(:)
703    type(submesh_t)  :: sphere
704    type(profile_t), save :: prof
705
706    PUSH_SUB(epot_local_pseudopotential_sr)
707    call profiling_in(prof, "EPOT_LOCAL_PSEUDOPOTENTIAL_SR")
708
709    !the localized part
710
711    if(species_is_ps(geo%atom(iatom)%species)) then
712
713       radius = double_grid_get_rmax(dgrid, geo%atom(iatom)%species, der%mesh) + der%mesh%spacing(1)
714
715       call submesh_init(sphere, der%mesh%sb, der%mesh, geo%atom(iatom)%x, radius)
716       SAFE_ALLOCATE(vl(1:sphere%np))
717
718       call double_grid_apply_local(dgrid, geo%atom(iatom)%species, der%mesh, sphere, geo%atom(iatom)%x, vl)
719
720       ! Cannot be written (correctly) as a vector expression since for periodic systems,
721       ! there can be values ip, jp such that sphere%map(ip) == sphere%map(jp).
722       do ip = 1, sphere%np
723          vpsl(sphere%map(ip)) = vpsl(sphere%map(ip)) + vl(ip)
724          rvpsl(sphere%map(ip) ,1) = rvpsl(sphere%map(ip), 1) + sphere%x(ip, 1) * vl(ip)
725          rvpsl(sphere%map(ip) ,2) = rvpsl(sphere%map(ip), 2) + sphere%x(ip, 2) * vl(ip)
726          rvpsl(sphere%map(ip) ,3) = rvpsl(sphere%map(ip), 3) + sphere%x(ip, 3) * vl(ip)
727       end do
728
729       SAFE_DEALLOCATE_A(vl)
730       call submesh_end(sphere)
731
732    end if
733
734    call profiling_out(prof)
735    POP_SUB(epot_local_pseudopotential_sr)
736  end subroutine epot_local_pseudopotential_SR
737! -------------------------------------------------------
738  subroutine poisson_fft_gg_transform_l(gg_in, temp, sb, qq, gg, modg2)
739    integer,           intent(in)    :: gg_in(:)
740    FLOAT,             intent(in)    :: temp(:)
741    type(simul_box_t), intent(in)    :: sb
742    FLOAT,             intent(in)    :: qq(:)
743    FLOAT,             intent(inout) :: gg(:)
744    FLOAT,             intent(out)   :: modg2
745
746!    integer :: idir
747
748    ! no PUSH_SUB, called too frequently
749
750    gg(1:3) = gg_in(1:3)
751    gg(1:sb%periodic_dim) = gg(1:sb%periodic_dim) + qq(1:sb%periodic_dim)
752    gg(1:3) = gg(1:3) * temp(1:3)
753    gg(1:3) = matmul(sb%klattice_primitive(1:3,1:3),gg(1:3))
754! MJV 27 jan 2015 this should not be necessary
755!    do idir = 1, 3
756!      gg(idir) = gg(idir) / lalg_nrm2(3, sb%klattice_primitive(1:3, idir))
757!    end do
758
759    modg2 = sum(gg(1:3)**2)
760
761  end subroutine poisson_fft_gg_transform_l
762! ---------------------------------------------------------
763  subroutine stress_from_Ewald_sum(gr, geo, hm, stress, stress_Ewald)
764    type(grid_t),      target,        intent(in) :: gr !< grid
765    type(geometry_t), target, intent(in)    :: geo
766    type(hamiltonian_elec_t),  intent(inout)    :: hm
767    FLOAT,                         intent(inout) :: stress(:, :)
768    FLOAT,                         intent(out) :: stress_Ewald(3, 3) ! temporal
769
770    FLOAT :: stress_l(3, 3)
771
772    type(simul_box_t), pointer :: sb
773    FLOAT :: rr, xi(1:MAX_DIM), zi, zj, erfc, rcut
774    integer :: iatom, jatom, icopy
775    type(periodic_copy_t) :: pc
776    integer :: ix, iy, iz, isph, ss, idim, idir, jdir
777    FLOAT   :: gg(1:MAX_DIM), gg2, gx
778    FLOAT   :: factor, charge, Hp, charge_sq
779    FLOAT   :: alpha
780    CMPLX   :: sumatoms, aa
781    FLOAT :: sigma_erf
782    type(profile_t), save :: prof
783
784    call profiling_in(prof, "STRESS_FROM_EWALD")
785    PUSH_SUB(stress_from_Ewald_sum)
786
787
788    alpha = hm%ep%ion_interaction%alpha
789    sb   => gr%sb
790
791    rcut = CNST(6.0)/alpha
792    stress_l = M_ZERO
793! the short-range part is calculated directly
794    do iatom = geo%atoms_dist%start, geo%atoms_dist%end
795       if (.not. species_represents_real_atom(geo%atom(iatom)%species)) cycle
796       zi = species_zval(geo%atom(iatom)%species)
797
798       call periodic_copy_init(pc, sb, geo%atom(iatom)%x, rcut)
799
800       do icopy = 1, periodic_copy_num(pc)
801          xi(1:sb%dim) = periodic_copy_position(pc, sb, icopy)
802
803          do jatom = 1,  geo%natoms
804             zj = species_zval(geo%atom(jatom)%species)
805             rr = sqrt( sum( (xi(1:sb%dim) - geo%atom(jatom)%x(1:sb%dim))**2 ) )
806
807          if(rr < CNST(1e-5)) cycle
808
809          erfc = M_ONE - loct_erf(alpha*rr)
810          Hp = -M_TWO/sqrt(M_PI)*exp(-(alpha*rr)**2) - erfc/(alpha*rr)
811          factor = M_HALF*zj*zi*alpha*Hp
812          do idir = 1,3
813             do jdir =1,3
814                stress_l(idir, jdir) = stress_l(idir, jdir) &
815                     -factor&
816                     *(xi(idir) - geo%atom(jatom)%x(idir)) &
817                     *(xi(jdir) - geo%atom(jatom)%x(jdir))/(rr**2)
818
819             end do
820          end do
821
822        end do
823
824      end do
825
826      call periodic_copy_end(pc)
827    end do
828
829    if(geo%atoms_dist%parallel) then
830       call comm_allreduce(geo%atoms_dist%mpi_grp%comm, stress_l)
831    end if
832
833! And the long-range part, using an Ewald sum
834    charge = M_ZERO
835    charge_sq = M_ZERO
836    do iatom = 1, geo%natoms
837       zi = species_zval(geo%atom(iatom)%species)
838       charge = charge + zi
839       charge_sq = charge_sq + zi**2
840    end do
841
842! get a converged value for the cutoff in g
843    rcut = huge(rcut)
844    do idim = 1, sb%dim
845      rcut = min(rcut, sum(sb%klattice(1:sb%dim, idim)**2))
846    end do
847
848    rcut = sqrt(rcut)
849
850    isph = ceiling(CNST(9.5)*alpha/rcut)
851
852    do ix = -isph, isph
853       do iy = -isph, isph
854          do iz = -isph, isph
855
856             ss = ix**2 + iy**2 + iz**2
857
858             if(ss == 0 .or. ss > isph**2) cycle
859
860             gg(1:sb%dim) = ix*sb%klattice(1:sb%dim, 1) + iy*sb%klattice(1:sb%dim, 2) + iz*sb%klattice(1:sb%dim, 3)
861             gg2 = sum(gg(1:sb%dim)**2)
862
863          ! g=0 must be removed from the sum
864             if(gg2 < M_EPSILON) cycle
865
866             gx = -CNST(0.25)*gg2/alpha**2
867
868             if(gx < CNST(-36.0)) cycle
869
870             factor = M_TWO*M_PI*exp(gx)/(sb%rcell_volume*gg2)
871
872             if(factor < epsilon(factor)) cycle
873
874             sumatoms = M_Z0
875
876             do iatom = 1, geo%natoms
877                gx = sum(gg(1:sb%dim)*geo%atom(iatom)%x(1:sb%dim))
878                aa = species_zval(geo%atom(iatom)%species)*TOCMPLX(cos(gx), sin(gx))
879                sumatoms = sumatoms + aa
880             end do
881
882             factor = factor*abs(sumatoms)**2
883
884             do idir = 1,3
885                do jdir =1,3
886
887                   stress_l(idir, jdir) = stress_l(idir, jdir) &
888                        - M_TWO*factor*gg(idir)*gg(jdir)/gg2*(CNST(0.25)*gg2/alpha**2+M_ONE)
889
890                end do
891                stress_l(idir, idir) = stress_l(idir, idir) + factor
892             end do
893
894          end do
895       end do
896    end do
897
898
899    factor = M_HALF*M_PI*charge**2/(sb%rcell_volume*alpha**2)
900    stress_l(1, 1) = stress_l(1, 1) - factor
901    stress_l(2, 2) = stress_l(2, 2) - factor
902    stress_l(3, 3) = stress_l(3, 3) - factor
903
904
905! Contribition from G=0 component of the long-range part
906    sigma_erf = CNST(0.625)
907    do idir = 1,3
908       stress_l(idir,idir) = stress_l(idir,idir) &
909            + M_TWO*M_PI*sigma_erf**2*charge**2 /sb%rcell_volume
910    end do
911
912    stress_l = stress_l/sb%rcell_volume
913
914    stress_Ewald = stress_l
915    stress = stress + stress_l
916
917
918    call profiling_out(prof)
919    POP_SUB(stress_from_Ewald_sum)
920
921  end subroutine stress_from_Ewald_sum
922  ! -------------------------------------------------------
923  ! -------------------------------------------------------
924  ! -------------------------------------------------------
925
926
927
928end module stress_oct_m
929
930!! Local Variables:
931!! mode: f90
932!! coding: utf-8
933!! End:
934