1!================================================================================= 2! 3! Routines: 4! 5! (1) w_sum() Originally By JRD Last Modified 4/1/2012 (JRD) 6! 7! Multiply Valence-Valence matrix elements by W to create temporary arrays 8! for the head, wing and body. 9! 10! This routine scales as N^3, but is nested within the the igp loop 11! in mtxel_kernel. Thus, it represents an N^4 step. Doing the multiplication 12! here is cheaper than doing it in the N^5 g_sum subroutine. 13! 14!================================================================================= 15 16#include "f_defs.h" 17 18module w_sum_m 19 20 use global_m 21 implicit none 22 23 public :: w_sum 24 25 private 26 27contains 28 29 subroutine w_sum(xct,wptcol,ofs1,ofs1p,n1,n1p,temph,tempw,tempb,m11p,indinvigp,ng_eps) 30 type (xctinfo), intent(in) :: xct 31 SCALAR, intent(in) :: wptcol(:) 32 !> offset (i.e., add this number to map a local index to the global band index) 33 integer, intent(in) :: ofs1, ofs1p 34 !> number of bands for each wfn 35 integer, intent(in) :: n1, n1p 36 SCALAR, intent(inout) :: tempw(:,:,:,:), tempb(:,:,:,:), temph(:,:,:), m11p(:,:,:,:) 37 integer, intent(in) :: indinvigp 38 integer, intent(in) :: ng_eps 39 40 SCALAR, allocatable :: m11p_conj(:,:,:) 41 integer :: isv, ig, i1, i1p, gi1, gi1p 42 43 PUSH_SUB(w_sum) 44 45 ! JRD: We allocate a new temporary array in order to get better cache performance 46 SAFE_ALLOCATE(m11p_conj,(n1,n1p,xct%nspin)) 47 m11p_conj(:,:,:) = MYCONJG(m11p(indinvigp,:,:,:)) 48 49 do isv=1,xct%nspin 50 if (indinvigp .eq. 1) then 51 temph(ofs1+1:ofs1+n1, ofs1p+1:ofs1p+n1p, isv) = wptcol(1)*m11p_conj(1:n1, 1:n1p, isv) 52 53 !$OMP PARALLEL PRIVATE(i1p, gi1p, i1, gi1, ig) SHARED(wptcol, tempb, m11p_conj) 54 do i1p = 1, n1p 55 gi1p = ofs1p+i1p 56 do i1 = 1, n1 57 gi1 = ofs1+i1 58 !$OMP DO 59 do ig=2,ng_eps 60 tempw(ig, gi1, gi1p, isv) = wptcol(ig) * m11p_conj(i1, i1p, isv) 61 enddo 62 !$OMP END DO 63 enddo 64 enddo 65 !$OMP END PARALLEL 66 67 else 68 69 tempw(1, ofs1+1:ofs1+n1, ofs1p+1:ofs1p+n1p, isv) = tempw(1, ofs1+1:ofs1+n1, ofs1p+1:ofs1p+n1p, isv) + & 70 wptcol(1)*m11p_conj(1:n1, 1:n1p, isv) 71 72 !$OMP PARALLEL PRIVATE(i1p, gi1p, i1, gi1, ig) SHARED(wptcol, tempb, m11p_conj) 73 do i1p = 1, n1p 74 gi1p = ofs1p+i1p 75 do i1 = 1, n1 76 gi1 = ofs1+i1 77 !$OMP DO 78 do ig=2,ng_eps 79 tempb(ig, gi1, gi1p, isv) = tempb(ig, gi1, gi1p, isv) + & 80 wptcol(ig) * m11p_conj(i1, i1p, isv) 81 enddo 82 !$OMP END DO NOWAIT 83 enddo 84 enddo 85 !$OMP END PARALLEL 86 87 endif 88 89 enddo 90 91 SAFE_DEALLOCATE(m11p_conj) 92 93 POP_SUB(w_sum) 94 95 end subroutine w_sum 96 97end module w_sum_m 98