1!=========================================================================== 2! 3! Module mtxelmultiply_m 4! 5! (1) mtxelMultiply() Refactored from main.f90 By (PWD) 6! Last Modified 10/21/2010 (PWD) 7! 8! Combine the <c,k|e^(-i(q+G).r)|v,k+q><v,k+q|e^(i(q+G`).r)|c,k> 9! ------------------------------------------------- 10! energy denominator 11! Input: 12! pol%eden(band,spin) = 1/(e_val-e_cond) = energy denominators 13! pol%gme(band,g-vector,spin) = plane wave matrix elements 14! pol%isrtx orders the |G(i)|^2 i=1,pol%nmtx 15! vwfn%isort orders |qk+g|^2 (in vwfn type) 16! 17! energies are apparently assumed in Rydbergs. 18! 19!=========================================================================== 20 21#include "f_defs.h" 22 23module mtxelmultiply_m 24 25 use global_m 26 use blas_m 27 use scalapack_m 28 use timing_m, only: timing => epsilon_timing 29 30 implicit none 31 32 private 33 34 public :: & 35 mtxelMultiply 36 37contains 38 39 subroutine mtxelmultiply(scal,ntot,nrk,nst,fact,vwfn,gmetempr,gmetempc,chilocal, & 40 polgme,pol,indt,pht,ipe,ispin) 41 type (scalapack), intent(in) :: scal 42 integer, intent(in) :: ntot 43 integer, intent(in) :: nrk 44 integer, intent(in) :: nst(:) !< (nrk) 45 real(DP), intent(in) :: fact 46 type (valence_wfns), intent(in) :: vwfn 47 SCALAR, dimension(:,:), intent(inout) :: gmetempr(:,:),gmetempc(:,:) 48 SCALAR, dimension(:,:), intent(inout) :: chilocal(:,:) 49 SCALAR, intent(in) :: polgme(:,:,:,:,:,:) 50 type (polarizability), intent(inout) :: pol 51 integer, dimension(:,:,:), intent(in) :: indt 52 SCALAR, dimension(:,:,:), intent(in) :: pht 53 integer, intent(in) :: ipe 54 integer, intent(in) :: ispin 55 56 SCALAR :: negfact 57 integer, allocatable :: tmprowindex(:),tmpcolindex(:) 58 SCALAR, allocatable :: tmprowph(:),tmpcolph(:) 59 integer :: irk, iv, ilimit, j, it, icurr, itot, mytot 60 61 PUSH_SUB(mtxelMultiply) 62 63 itot=0 64 negfact = -1D0*fact 65 66 if ( peinf%inode == 0 ) call timing%start(timing%chi_sum_prep) 67 68 SAFE_ALLOCATE(tmprowindex,(scal%nprd(ipe))) 69 SAFE_ALLOCATE(tmpcolindex,(scal%npcd(ipe))) 70 SAFE_ALLOCATE(tmprowph,(scal%nprd(ipe))) 71 SAFE_ALLOCATE(tmpcolph,(scal%npcd(ipe))) 72 73 do irk = 1, nrk 74 do it = 1, nst(irk) 75 76 do icurr=1,scal%nprd(ipe) 77 tmprowindex(icurr) = indt(scal%imyrowd(icurr,ipe),it,irk) 78 tmprowph(icurr) = pht(scal%imyrowd(icurr,ipe),it,irk) 79 enddo 80 do icurr=1,scal%npcd(ipe) 81 tmpcolindex(icurr) = indt(scal%imycold(icurr,ipe),it,irk) 82 tmpcolph(icurr) = pht(scal%imycold(icurr,ipe),it,irk) 83 enddo 84 85!$OMP PARALLEL DO collapse(2) private (mytot,iv,j,icurr) 86 do iv = 1,peinf%nvownactual 87 do j = 1, peinf%ncownactual 88 89 mytot = itot + (iv-1)*peinf%ncownactual + j 90 91! JRD XXX the index here probably generates gather instructions 92! May want to also consider streaming stores 93 94 do icurr=1,scal%nprd(ipe) 95 gmetempr(icurr,mytot)=polgme( & 96 tmprowindex(icurr),j,iv, & 97 ispin,irk,pol%nfreq_group)* & 98 tmprowph(icurr) 99 enddo 100 101 do icurr=1,scal%npcd(ipe) 102 gmetempc(icurr,mytot)= & 103 MYCONJG(polgme(tmpcolindex(icurr),j,iv,ispin,irk,pol%nfreq_group)*tmpcolph(icurr)) 104 enddo 105 106 enddo ! j 107 enddo ! iv 108!$OMP END PARALLEL DO 109 110 itot = itot + peinf%nvownactual*peinf%ncownactual 111 112 enddo ! it 113 enddo ! irk 114 115 if ( peinf%inode == 0 ) call timing%stop(timing%chi_sum_prep) 116 117 if ( peinf%inode == 0 ) call timing%start(timing%chi_sum_gemm) 118 119 if (ntot.ne.0) then 120 !call X(gemm)('n','n',scal%nprd(ipe),scal%npcd(ipe),ntot, & 121 ! negfact,gmetempr,scal%nprd(ipe),gmetempc,ntot,ZERO,chilocal,scal%nprd(ipe)) 122 call X(gemm)('n','t',scal%nprd(ipe),scal%npcd(ipe),ntot, & 123 negfact,gmetempr,scal%nprd(ipe),gmetempc,scal%npcd(ipe),ZERO,chilocal,scal%nprd(ipe)) 124 end if 125 126 if ( peinf%inode == 0 ) call timing%stop(timing%chi_sum_gemm) 127 128 SAFE_DEALLOCATE(tmprowindex) 129 SAFE_DEALLOCATE(tmpcolindex) 130 SAFE_DEALLOCATE(tmprowph) 131 SAFE_DEALLOCATE(tmpcolph) 132 133 POP_SUB(mtxelMultiply) 134 135 return 136 137 end subroutine mtxelMultiply 138 139end module mtxelmultiply_m 140