w_sum.f90 - OpenGrok cross reference for /dports/science/berkeleygw/BGW-2.0.0/BSE/w_sum.f90

!=================================================================================
!
! Routines:
!
! (1) w_sum()         Originally By JRD       Last Modified 4/1/2012 (JRD)
!
!  Multiply Valence-Valence matrix elements by W to create temporary arrays
!  for the head, wing and body.
!
!  This routine scales as N^3, but is nested within the the igp loop
!  in mtxel_kernel. Thus, it represents an N^4 step. Doing the multiplication
!  here is cheaper than doing it in the N^5 g_sum subroutine.
!
!=================================================================================

#include "f_defs.h"

module w_sum_m

  use global_m
  implicit none

  public :: w_sum

  private

contains

  subroutine w_sum(xct,wptcol,ofs1,ofs1p,n1,n1p,temph,tempw,tempb,m11p,indinvigp,ng_eps)
    type (xctinfo), intent(in) :: xct
    SCALAR, intent(in) :: wptcol(:)
    !> offset (i.e., add this number to map a local index to the global band index)
    integer, intent(in) :: ofs1, ofs1p
    !> number of bands for each wfn
    integer, intent(in) :: n1, n1p
    SCALAR, intent(inout) :: tempw(:,:,:,:), tempb(:,:,:,:), temph(:,:,:), m11p(:,:,:,:)
    integer, intent(in) :: indinvigp
    integer, intent(in) :: ng_eps

    SCALAR, allocatable :: m11p_conj(:,:,:)
    integer :: isv, ig, i1, i1p, gi1, gi1p

    PUSH_SUB(w_sum)

    ! JRD: We allocate a new temporary array in order to get better cache performance
    SAFE_ALLOCATE(m11p_conj,(n1,n1p,xct%nspin))
    m11p_conj(:,:,:) = MYCONJG(m11p(indinvigp,:,:,:))

    do isv=1,xct%nspin
      if (indinvigp .eq. 1) then
        temph(ofs1+1:ofs1+n1, ofs1p+1:ofs1p+n1p, isv) = wptcol(1)*m11p_conj(1:n1, 1:n1p, isv)

        !$OMP PARALLEL PRIVATE(i1p, gi1p, i1, gi1, ig) SHARED(wptcol, tempb, m11p_conj)
        do i1p = 1, n1p
          gi1p = ofs1p+i1p
          do i1 = 1, n1
            gi1 = ofs1+i1
            !$OMP DO
            do ig=2,ng_eps
              tempw(ig, gi1, gi1p, isv) = wptcol(ig) * m11p_conj(i1, i1p, isv)
            enddo
            !$OMP END DO
          enddo
        enddo
        !$OMP END PARALLEL

      else

        tempw(1, ofs1+1:ofs1+n1, ofs1p+1:ofs1p+n1p, isv) = tempw(1, ofs1+1:ofs1+n1, ofs1p+1:ofs1p+n1p, isv) + &
          wptcol(1)*m11p_conj(1:n1, 1:n1p, isv)

        !$OMP PARALLEL PRIVATE(i1p, gi1p, i1, gi1, ig) SHARED(wptcol, tempb, m11p_conj)
        do i1p = 1, n1p
          gi1p = ofs1p+i1p
          do i1 = 1, n1
            gi1 = ofs1+i1
            !$OMP DO
            do ig=2,ng_eps
              tempb(ig, gi1, gi1p, isv) = tempb(ig, gi1, gi1p, isv) + &
                wptcol(ig) * m11p_conj(i1, i1p, isv)
            enddo
            !$OMP END DO NOWAIT
          enddo
        enddo
        !$OMP END PARALLEL

      endif

    enddo

    SAFE_DEALLOCATE(m11p_conj)

    POP_SUB(w_sum)

  end subroutine w_sum

end module w_sum_m