1!! Copyright (C) 2015 X. Andrade 2!! 3!! This program is free software; you can redistribute it and/or modify 4!! it under the terms of the GNU General Public License as published by 5!! the Free Software Foundation; either version 2, or (at your option) 6!! any later version. 7!! 8!! This program is distributed in the hope that it will be useful, 9!! but WITHOUT ANY WARRANTY; without even the implied warranty of 10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11!! GNU General Public License for more details. 12!! 13!! You should have received a copy of the GNU General Public License 14!! along with this program; if not, write to the Free Software 15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16!! 02110-1301, USA. 17!! 18 19!--------------------------------------------------------------------------- 20subroutine X(density_accumulate_grad)(gr, st, psib, grad_psib, grad_rho) 21 type(grid_t), intent(in) :: gr 22 type(states_elec_t), intent(in) :: st 23 type(wfs_elec_t), intent(in) :: psib 24 type(wfs_elec_t), intent(in) :: grad_psib(:) 25 FLOAT, intent(inout) :: grad_rho(:, :) 26 27 integer :: ii, ist, idir, ip 28 FLOAT :: ff 29 R_TYPE :: psi, gpsi 30 integer :: wgsize 31 FLOAT, allocatable :: grad_rho_tmp(:, :), weights(:) 32 type(accel_mem_t) :: grad_rho_buff, weights_buff 33 type(accel_kernel_t), save :: ker_calc_grad_dens 34 35 PUSH_SUB(X(density_accumulate_grad)) 36 37 call psib%check_compatibility_with(grad_psib(1)) 38 39 select case(psib%status()) 40 case(BATCH_NOT_PACKED) 41 do idir = 1, gr%mesh%sb%dim 42 do ii = 1, psib%nst_linear 43 ist = psib%linear_to_ist(ii) 44 45 ff = st%d%kweights(psib%ik)*st%occ(ist, psib%ik)*M_TWO 46 if(abs(ff) <= M_EPSILON) cycle 47 48 do ip = 1, gr%mesh%np 49 50 psi = psib%X(ff_linear)(ip, ii) 51 gpsi = grad_psib(idir)%X(ff_linear)(ip, ii) 52 grad_rho(ip, idir) = grad_rho(ip, idir) + ff*R_REAL(R_CONJ(psi)*gpsi) 53 54 end do 55 end do 56 57 end do 58 59 case(BATCH_PACKED) 60 do ii = 1, psib%nst_linear 61 ist = psib%linear_to_ist(ii) 62 63 ff = st%d%kweights(psib%ik)*st%occ(ist, psib%ik)*M_TWO 64 if(abs(ff) <= M_EPSILON) cycle 65 66 do idir = 1, gr%mesh%sb%dim 67 do ip = 1, gr%mesh%np 68 69 psi = psib%X(ff_pack)(ii, ip) 70 gpsi = grad_psib(idir)%X(ff_pack)(ii, ip) 71 grad_rho(ip, idir) = grad_rho(ip, idir) + ff*R_REAL(R_CONJ(psi)*gpsi) 72 73 end do 74 end do 75 76 end do 77 78 case(BATCH_DEVICE_PACKED) 79 call accel_create_buffer(grad_rho_buff, ACCEL_MEM_WRITE_ONLY, TYPE_FLOAT, gr%mesh%np*gr%sb%dim) 80 81 SAFE_ALLOCATE(weights(1:psib%pack_size(1))) 82 83 weights = CNST(0.0) 84 do ii = 1, psib%nst_linear 85 ist = psib%linear_to_ist(ii) 86 weights(ii) = st%d%kweights(psib%ik)*st%occ(ist, psib%ik)*M_TWO 87 end do 88 89 call accel_create_buffer(weights_buff, ACCEL_MEM_READ_ONLY, TYPE_FLOAT, psib%pack_size(1)) 90 call accel_write_buffer(weights_buff, psib%pack_size(1), weights) 91 92 SAFE_DEALLOCATE_A(weights) 93 94 call accel_kernel_start_call(ker_calc_grad_dens, 'forces.cl', TOSTRING(X(density_gradient)), & 95 flags = '-D' + R_TYPE_CL) 96 97 do idir = 1, gr%mesh%sb%dim 98 call accel_set_kernel_arg(ker_calc_grad_dens, 0, idir - 1) 99 call accel_set_kernel_arg(ker_calc_grad_dens, 1, psib%pack_size(1)) 100 call accel_set_kernel_arg(ker_calc_grad_dens, 2, gr%mesh%np) 101 call accel_set_kernel_arg(ker_calc_grad_dens, 3, weights_buff) 102 call accel_set_kernel_arg(ker_calc_grad_dens, 4, grad_psib(idir)%ff_device) 103 call accel_set_kernel_arg(ker_calc_grad_dens, 5, psib%ff_device) 104 call accel_set_kernel_arg(ker_calc_grad_dens, 6, log2(psib%pack_size(1))) 105 call accel_set_kernel_arg(ker_calc_grad_dens, 7, grad_rho_buff) 106 107 wgsize = accel_kernel_workgroup_size(ker_calc_grad_dens) 108 call accel_kernel_run(ker_calc_grad_dens, (/pad(gr%mesh%np, wgsize)/), (/wgsize/)) 109 110 end do 111 112 call accel_release_buffer(weights_buff) 113 114 SAFE_ALLOCATE(grad_rho_tmp(1:gr%mesh%np, 1:gr%sb%dim)) 115 116 call accel_read_buffer(grad_rho_buff, gr%mesh%np*gr%sb%dim, grad_rho_tmp) 117 118 call accel_release_buffer(grad_rho_buff) 119 120 do idir = 1, gr%mesh%sb%dim 121 do ip = 1, gr%mesh%np 122 grad_rho(ip, idir) = grad_rho(ip, idir) + grad_rho_tmp(ip, idir) 123 end do 124 end do 125 126 SAFE_DEALLOCATE_A(grad_rho_tmp) 127 end select 128 129 POP_SUB(X(density_accumulate_grad)) 130 131end subroutine X(density_accumulate_grad) 132 133!! Local Variables: 134!! mode: f90 135!! coding: utf-8 136!! End: 137