1!! Copyright (C) 2015 X. Andrade
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19!---------------------------------------------------------------------------
20subroutine X(density_accumulate_grad)(gr, st, psib, grad_psib, grad_rho)
21  type(grid_t),        intent(in)    :: gr
22  type(states_elec_t), intent(in)    :: st
23  type(wfs_elec_t),    intent(in)    :: psib
24  type(wfs_elec_t),    intent(in)    :: grad_psib(:)
25  FLOAT,               intent(inout) :: grad_rho(:, :)
26
27  integer :: ii, ist, idir, ip
28  FLOAT :: ff
29  R_TYPE :: psi, gpsi
30  integer :: wgsize
31  FLOAT, allocatable :: grad_rho_tmp(:, :), weights(:)
32  type(accel_mem_t) :: grad_rho_buff, weights_buff
33  type(accel_kernel_t), save :: ker_calc_grad_dens
34
35  PUSH_SUB(X(density_accumulate_grad))
36
37  call psib%check_compatibility_with(grad_psib(1))
38
39  select case(psib%status())
40  case(BATCH_NOT_PACKED)
41    do idir = 1, gr%mesh%sb%dim
42      do ii = 1, psib%nst_linear
43        ist = psib%linear_to_ist(ii)
44
45        ff = st%d%kweights(psib%ik)*st%occ(ist, psib%ik)*M_TWO
46        if(abs(ff) <= M_EPSILON) cycle
47
48        do ip = 1, gr%mesh%np
49
50          psi = psib%X(ff_linear)(ip, ii)
51          gpsi = grad_psib(idir)%X(ff_linear)(ip, ii)
52          grad_rho(ip, idir) = grad_rho(ip, idir) + ff*R_REAL(R_CONJ(psi)*gpsi)
53
54        end do
55      end do
56
57    end do
58
59  case(BATCH_PACKED)
60    do ii = 1, psib%nst_linear
61      ist = psib%linear_to_ist(ii)
62
63      ff = st%d%kweights(psib%ik)*st%occ(ist, psib%ik)*M_TWO
64      if(abs(ff) <= M_EPSILON) cycle
65
66      do idir = 1, gr%mesh%sb%dim
67        do ip = 1, gr%mesh%np
68
69          psi = psib%X(ff_pack)(ii, ip)
70          gpsi = grad_psib(idir)%X(ff_pack)(ii, ip)
71          grad_rho(ip, idir) = grad_rho(ip, idir) + ff*R_REAL(R_CONJ(psi)*gpsi)
72
73        end do
74      end do
75
76    end do
77
78  case(BATCH_DEVICE_PACKED)
79    call accel_create_buffer(grad_rho_buff, ACCEL_MEM_WRITE_ONLY, TYPE_FLOAT, gr%mesh%np*gr%sb%dim)
80
81    SAFE_ALLOCATE(weights(1:psib%pack_size(1)))
82
83    weights = CNST(0.0)
84    do ii = 1, psib%nst_linear
85      ist = psib%linear_to_ist(ii)
86      weights(ii) = st%d%kweights(psib%ik)*st%occ(ist, psib%ik)*M_TWO
87    end do
88
89    call accel_create_buffer(weights_buff, ACCEL_MEM_READ_ONLY, TYPE_FLOAT, psib%pack_size(1))
90    call accel_write_buffer(weights_buff, psib%pack_size(1), weights)
91
92    SAFE_DEALLOCATE_A(weights)
93
94    call accel_kernel_start_call(ker_calc_grad_dens, 'forces.cl', TOSTRING(X(density_gradient)), &
95      flags = '-D' + R_TYPE_CL)
96
97    do idir = 1, gr%mesh%sb%dim
98      call accel_set_kernel_arg(ker_calc_grad_dens, 0, idir - 1)
99      call accel_set_kernel_arg(ker_calc_grad_dens, 1, psib%pack_size(1))
100      call accel_set_kernel_arg(ker_calc_grad_dens, 2, gr%mesh%np)
101      call accel_set_kernel_arg(ker_calc_grad_dens, 3, weights_buff)
102      call accel_set_kernel_arg(ker_calc_grad_dens, 4, grad_psib(idir)%ff_device)
103      call accel_set_kernel_arg(ker_calc_grad_dens, 5, psib%ff_device)
104      call accel_set_kernel_arg(ker_calc_grad_dens, 6, log2(psib%pack_size(1)))
105      call accel_set_kernel_arg(ker_calc_grad_dens, 7, grad_rho_buff)
106
107      wgsize = accel_kernel_workgroup_size(ker_calc_grad_dens)
108      call accel_kernel_run(ker_calc_grad_dens, (/pad(gr%mesh%np, wgsize)/), (/wgsize/))
109
110    end do
111
112    call accel_release_buffer(weights_buff)
113
114    SAFE_ALLOCATE(grad_rho_tmp(1:gr%mesh%np, 1:gr%sb%dim))
115
116    call accel_read_buffer(grad_rho_buff, gr%mesh%np*gr%sb%dim, grad_rho_tmp)
117
118    call accel_release_buffer(grad_rho_buff)
119
120    do idir = 1, gr%mesh%sb%dim
121      do ip = 1, gr%mesh%np
122        grad_rho(ip, idir) = grad_rho(ip, idir) + grad_rho_tmp(ip, idir)
123      end do
124    end do
125
126    SAFE_DEALLOCATE_A(grad_rho_tmp)
127  end select
128
129  POP_SUB(X(density_accumulate_grad))
130
131end subroutine X(density_accumulate_grad)
132
133!! Local Variables:
134!! mode: f90
135!! coding: utf-8
136!! End:
137