!--------------------------------------------------------------------------------------------------! ! CP2K: A general program to perform molecular dynamics simulations ! ! Copyright (C) 2000 - 2019 CP2K developers group ! !--------------------------------------------------------------------------------------------------! ! ************************************************************************************************** !> \brief Build up the plane wave density by collocating the primitive Gaussian !> functions (pgf). !> \par History !> Joost VandeVondele (02.2002) !> 1) rewrote collocate_pgf for increased accuracy and speed !> 2) collocate_core hack for PGI compiler !> 3) added multiple grid feature !> 4) new way to go over the grid !> Joost VandeVondele (05.2002) !> 1) prelim. introduction of the real space grid type !> JGH [30.08.02] multigrid arrays independent from potential !> JGH [17.07.03] distributed real space code !> JGH [23.11.03] refactoring and new loop ordering !> JGH [04.12.03] OpneMP parallelization of main loops !> Joost VandeVondele (12.2003) !> 1) modified to compute tau !> Joost removed incremental build feature !> Joost introduced map consistent !> Rewrote grid integration/collocation routines, [Joost VandeVondele,03.2007] !> JGH [26.06.15] modification to allow for k-points !> \author Matthias Krack (03.04.2001) ! ************************************************************************************************** MODULE qs_integrate_potential_product USE admm_types, ONLY: admm_type USE atomic_kind_types, ONLY: atomic_kind_type,& get_atomic_kind_set USE basis_set_types, ONLY: get_gto_basis_set,& gto_basis_set_type USE cell_types, ONLY: cell_type,& pbc USE cp_control_types, ONLY: dft_control_type USE cp_dbcsr_operations, ONLY: dbcsr_deallocate_matrix_set USE cube_utils, ONLY: cube_info_type USE dbcsr_api, ONLY: & dbcsr_add_block_node, dbcsr_copy, dbcsr_create, dbcsr_distribution_get, & dbcsr_distribution_type, dbcsr_finalize, dbcsr_get_block_p, dbcsr_get_info, dbcsr_p_type, & dbcsr_type, dbcsr_work_create USE gaussian_gridlevels, ONLY: gridlevel_info_type USE input_constants, ONLY: do_admm_exch_scaling_merlot USE kinds, ONLY: default_string_length,& dp,& int_8 USE memory_utilities, ONLY: reallocate USE orbital_pointers, ONLY: ncoset USE particle_types, ONLY: particle_type USE pw_env_types, ONLY: pw_env_get,& pw_env_type USE pw_types, ONLY: pw_p_type USE qs_environment_types, ONLY: get_qs_env,& qs_environment_type USE qs_force_types, ONLY: qs_force_type USE qs_integrate_potential_low, ONLY: integrate_pgf_product_rspace USE qs_kind_types, ONLY: get_qs_kind,& get_qs_kind_set,& qs_kind_type USE realspace_grid_types, ONLY: realspace_grid_desc_p_type,& realspace_grid_p_type,& rs_grid_release,& rs_grid_retain USE rs_pw_interface, ONLY: potential_pw2rs USE task_list_methods, ONLY: int2pair,& rs_distribute_matrix USE task_list_types, ONLY: task_list_type USE virial_types, ONLY: virial_type !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads #include "./base/base_uses.f90" IMPLICIT NONE PRIVATE INTEGER :: debug_count = 0 LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .FALSE. CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_integrate_potential_product' ! *** Public subroutines *** ! *** Don't include this routines directly, use the interface to ! *** qs_integrate_potential PUBLIC :: integrate_v_rspace CONTAINS ! ************************************************************************************************** !> \brief computes matrix elements corresponding to a given potential !> \param v_rspace ... !> \param hmat ... !> \param hmat_kp ... !> \param pmat ... !> \param pmat_kp ... !> \param qs_env ... !> \param calculate_forces ... !> \param force_adm whether force of in aux. dens. matrix is calculated !> \param ispin ... !> \param compute_tau ... !> \param gapw ... !> \param basis_type ... !> \param pw_env_external ... !> \param task_list_external ... !> \par History !> IAB (29-Apr-2010): Added OpenMP parallelisation to task loop !> (c) The Numerical Algorithms Group (NAG) Ltd, 2010 on behalf of the HECToR project !> Some refactoring, get priorities for options correct (JGH, 04.2014) !> Added options to allow for k-points !> For a smooth transition we allow for old and new (vector) matrices (hmat, pmat) (JGH, 06.2015) !> \note !> integrates a given potential (or other object on a real !> space grid) = v_rspace using a multi grid technique (mgrid_*) !> over the basis set producing a number for every element of h !> (should have the same sparsity structure of S) !> additional screening is available using the magnitude of the !> elements in p (? I'm not sure this is a very good idea) !> this argument is optional !> derivatives of these matrix elements with respect to the ionic !> coordinates can be computed as well ! ************************************************************************************************** SUBROUTINE integrate_v_rspace(v_rspace, hmat, hmat_kp, pmat, pmat_kp, & qs_env, calculate_forces, force_adm, ispin, & compute_tau, gapw, basis_type, pw_env_external, task_list_external) TYPE(pw_p_type) :: v_rspace TYPE(dbcsr_p_type), INTENT(INOUT), OPTIONAL :: hmat TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL, & POINTER :: hmat_kp TYPE(dbcsr_p_type), INTENT(IN), OPTIONAL :: pmat TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL, & POINTER :: pmat_kp TYPE(qs_environment_type), POINTER :: qs_env LOGICAL, INTENT(IN) :: calculate_forces LOGICAL, INTENT(IN), OPTIONAL :: force_adm INTEGER, INTENT(IN), OPTIONAL :: ispin LOGICAL, INTENT(IN), OPTIONAL :: compute_tau, gapw CHARACTER(len=*), INTENT(IN), OPTIONAL :: basis_type TYPE(pw_env_type), OPTIONAL, POINTER :: pw_env_external TYPE(task_list_type), OPTIONAL, POINTER :: task_list_external CHARACTER(len=*), PARAMETER :: routineN = 'integrate_v_rspace', & routineP = moduleN//':'//routineN CHARACTER(len=default_string_length) :: my_basis_type INTEGER :: atom_a, atom_b, bcol, brow, handle, i, iatom, igrid_level, ikind, ikind_old, & ilevel, img, ipair, ipgf, ipgf_new, iset, iset_new, iset_old, itask, ithread, jatom, & jkind, jkind_old, jpgf, jpgf_new, jset, jset_new, jset_old, maxco, maxpgf, maxset, & maxsgf_set, na1, na2, natom, nb1, nb2, ncoa, ncob, nimages, nkind, nseta, nsetb, nthread, & offs_dv, sgfa, sgfb INTEGER(KIND=int_8), DIMENSION(:), POINTER :: atom_pair_recv, atom_pair_send INTEGER(kind=int_8), DIMENSION(:, :), POINTER :: tasks INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind INTEGER, ALLOCATABLE, DIMENSION(:, :) :: block_touched INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, npgfa, & npgfb, nsgfa, nsgfb INTEGER, DIMENSION(:, :), POINTER :: first_sgfa, first_sgfb LOGICAL :: atom_pair_changed, atom_pair_done, distributed_grids, do_kp, found, h_duplicated, & has_threads, map_consistent, my_compute_tau, my_force_adm, my_gapw, new_set_pair_coming, & p_duplicated, pab_required, scatter, use_subpatch, use_virial REAL(KIND=dp) :: admm_scal_fac, dab, eps_gvg_rspace, & rab2, scalef, zetp REAL(KIND=dp), DIMENSION(3) :: force_a, force_b, ra, rab, rab_inv, rb REAL(KIND=dp), DIMENSION(3, 3) :: my_virial_a, my_virial_b REAL(KIND=dp), DIMENSION(:), POINTER :: set_radius_a, set_radius_b REAL(KIND=dp), DIMENSION(:, :), POINTER :: dist_ab, h_block, hab, p_block, pab, & rpgfa, rpgfb, sphi_a, sphi_b, work, & zeta, zetb REAL(KIND=dp), DIMENSION(:, :, :), POINTER :: habt, hadb, hdab, pabt, workt REAL(kind=dp), DIMENSION(:, :, :, :), POINTER :: hadbt, hdabt TYPE(admm_type), POINTER :: admm_env TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set TYPE(cell_type), POINTER :: cell TYPE(cube_info_type), DIMENSION(:), POINTER :: cube_info TYPE(dbcsr_distribution_type) :: dist TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: deltap, dhmat, htemp TYPE(dbcsr_type), POINTER :: href TYPE(dft_control_type), POINTER :: dft_control TYPE(gridlevel_info_type), POINTER :: gridlevel_info TYPE(gto_basis_set_type), POINTER :: orb_basis_set TYPE(particle_type), DIMENSION(:), POINTER :: particle_set TYPE(pw_env_type), POINTER :: pw_env TYPE(qs_force_type), DIMENSION(:), POINTER :: force TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set TYPE(realspace_grid_desc_p_type), DIMENSION(:), & POINTER :: rs_descs TYPE(realspace_grid_p_type), DIMENSION(:), POINTER :: rs_v TYPE(task_list_type), POINTER :: task_list, task_list_soft TYPE(virial_type), POINTER :: virial CALL timeset(routineN, handle) ! we test here if the provided operator matrices are consistent CPASSERT(PRESENT(hmat) .OR. PRESENT(hmat_kp)) do_kp = .FALSE. IF (PRESENT(hmat_kp)) do_kp = .TRUE. IF (PRESENT(pmat)) THEN CPASSERT(PRESENT(hmat)) ELSE IF (PRESENT(pmat_kp)) THEN CPASSERT(PRESENT(hmat_kp)) END IF NULLIFY (pw_env, rs_descs, tasks, dist_ab, admm_env) debug_count = debug_count + 1 offs_dv = 0 ! this routine works in two modes: ! normal mode : ! tau mode : < nabla a| V | nabla b> my_compute_tau = .FALSE. IF (PRESENT(compute_tau)) my_compute_tau = compute_tau my_force_adm = .FALSE. IF (PRESENT(force_adm)) my_force_adm = force_adm ! this sets the basis set to be used. GAPW(==soft basis) overwrites basis_type ! default is "ORB" my_gapw = .FALSE. IF (PRESENT(gapw)) my_gapw = gapw IF (PRESENT(basis_type)) THEN my_basis_type = basis_type ELSE my_basis_type = "ORB" END IF ! get the task lists ! task lists have to be in sync with basis sets ! there is an option to provide the task list from outside (not through qs_env) ! outside option has highest priority SELECT CASE (my_basis_type) CASE ("ORB") CALL get_qs_env(qs_env=qs_env, & task_list=task_list, & task_list_soft=task_list_soft) CASE ("AUX_FIT") CALL get_qs_env(qs_env=qs_env, & task_list_aux_fit=task_list, & task_list_soft=task_list_soft) END SELECT IF (my_gapw) task_list => task_list_soft IF (PRESENT(task_list_external)) task_list => task_list_external CPASSERT(ASSOCIATED(task_list)) ! the information on the grids is provided through pw_env ! pw_env has to be the parent env for the potential grid (input) ! there is an option to provide an external grid CALL get_qs_env(qs_env=qs_env, pw_env=pw_env) IF (PRESENT(pw_env_external)) pw_env => pw_env_external ! get all the general information on the system we are working on CALL get_qs_env(qs_env=qs_env, & atomic_kind_set=atomic_kind_set, & qs_kind_set=qs_kind_set, & cell=cell, & dft_control=dft_control, & particle_set=particle_set, & force=force, & virial=virial) admm_scal_fac = 1.0_dp IF (my_force_adm) THEN CALL get_qs_env(qs_env=qs_env, admm_env=admm_env) ! Calculate bare scaling of force according to Merlot, 1. IF: ADMMP, 2. IF: ADMMS, IF ((.NOT. admm_env%charge_constrain) .AND. & (admm_env%scaling_model == do_admm_exch_scaling_merlot)) THEN admm_scal_fac = admm_env%gsi(ispin)**2 ELSE IF (admm_env%charge_constrain .AND. & (admm_env%scaling_model == do_admm_exch_scaling_merlot)) THEN admm_scal_fac = (admm_env%gsi(ispin))**(2.0_dp/3.0_dp) END IF END IF ! short cuts to task list variables tasks => task_list%tasks dist_ab => task_list%dist_ab atom_pair_send => task_list%atom_pair_send atom_pair_recv => task_list%atom_pair_recv CPASSERT(ASSOCIATED(pw_env)) CALL pw_env_get(pw_env, rs_descs=rs_descs, rs_grids=rs_v) DO i = 1, SIZE(rs_v) CALL rs_grid_retain(rs_v(i)%rs_grid) END DO ! assign from pw_env gridlevel_info => pw_env%gridlevel_info cube_info => pw_env%cube_info ! transform the potential on the rs_multigrids CALL potential_pw2rs(rs_v, v_rspace, pw_env) nimages = dft_control%nimages IF (nimages > 1) THEN CPASSERT(do_kp) END IF nkind = SIZE(qs_kind_set) natom = SIZE(particle_set) use_virial = virial%pv_availability .AND. (.NOT. virial%pv_numer) IF (calculate_forces) THEN ALLOCATE (atom_of_kind(natom)) CALL get_atomic_kind_set(atomic_kind_set, atom_of_kind=atom_of_kind) END IF map_consistent = dft_control%qs_control%map_consistent IF (map_consistent) THEN ! needs to be consistent with rho_rspace eps_gvg_rspace = dft_control%qs_control%eps_rho_rspace ELSE eps_gvg_rspace = dft_control%qs_control%eps_gvg_rspace ENDIF pab_required = (PRESENT(pmat) .OR. PRESENT(pmat_kp)) & .AND. (calculate_forces .OR. .NOT. map_consistent) CALL get_qs_kind_set(qs_kind_set=qs_kind_set, & maxco=maxco, & maxsgf_set=maxsgf_set, & basis_type=my_basis_type) distributed_grids = .FALSE. DO igrid_level = 1, gridlevel_info%ngrid_levels IF (rs_v(igrid_level)%rs_grid%desc%distributed) THEN distributed_grids = .TRUE. ENDIF ENDDO ! initialize the working hmat structures h_duplicated = .FALSE. ALLOCATE (dhmat(nimages)) IF (do_kp) THEN DO img = 1, nimages dhmat(img)%matrix => hmat_kp(img)%matrix END DO ELSE dhmat(1)%matrix => hmat%matrix END IF IF (distributed_grids) THEN h_duplicated = .TRUE. href => dhmat(1)%matrix DO img = 1, nimages NULLIFY (dhmat(img)%matrix) ALLOCATE (dhmat(img)%matrix) CALL dbcsr_create(dhmat(img)%matrix, template=href, name='LocalH') END DO END IF p_duplicated = .FALSE. IF (pab_required) THEN ! initialize the working pmat structures ALLOCATE (deltap(nimages)) IF (do_kp) THEN DO img = 1, nimages deltap(img)%matrix => pmat_kp(img)%matrix END DO ELSE deltap(1)%matrix => pmat%matrix END IF IF (distributed_grids) THEN p_duplicated = .TRUE. DO img = 1, nimages NULLIFY (deltap(img)%matrix) ALLOCATE (deltap(img)%matrix) END DO IF (do_kp) THEN DO img = 1, nimages CALL dbcsr_copy(deltap(img)%matrix, pmat_kp(img)%matrix, name="LocalP") END DO ELSE CALL dbcsr_copy(deltap(1)%matrix, pmat%matrix, name="LocalP") END IF END IF END IF nthread = 1 !$ nthread = omp_get_max_threads() ! *** Allocate work storage *** NULLIFY (pabt, habt, workt) CALL reallocate(habt, 1, maxco, 1, maxco, 0, nthread) CALL reallocate(workt, 1, maxco, 1, maxsgf_set, 0, nthread) IF (pab_required) THEN CALL reallocate(pabt, 1, maxco, 1, maxco, 0, nthread) ELSE CPASSERT(.NOT. calculate_forces) END IF NULLIFY (hdabt, hadbt, hdab, hadb) ! get maximum numbers natom = SIZE(particle_set) maxset = 0 maxpgf = 0 DO ikind = 1, nkind CALL get_qs_kind(qs_kind_set(ikind), & softb=my_gapw, & basis_set=orb_basis_set, basis_type=my_basis_type) IF (.NOT. ASSOCIATED(orb_basis_set)) CYCLE CALL get_gto_basis_set(gto_basis_set=orb_basis_set, & npgf=npgfa, nset=nseta) maxset = MAX(nseta, maxset) maxpgf = MAX(MAXVAL(npgfa), maxpgf) END DO IF (distributed_grids .AND. pab_required) THEN CALL rs_distribute_matrix(rs_descs, deltap, atom_pair_send, atom_pair_recv, & natom, nimages, scatter=.TRUE.) ENDIF IF (debug_this_module) THEN ALLOCATE (block_touched(natom, natom)) END IF !$OMP PARALLEL DEFAULT(NONE), & !$OMP SHARED(workt,habt,hdabt,hadbt,pabt,tasks,particle_set,natom,maxset), & !$OMP SHARED(maxpgf,my_basis_type,my_gapw,dhmat,deltap,use_virial,admm_scal_fac), & !$OMP SHARED(pab_required,calculate_forces,ncoset,rs_v,cube_info,my_compute_tau), & !$OMP SHARED(map_consistent,eps_gvg_rspace,force,virial,cell,atom_of_kind,dist_ab), & !$OMP SHARED(gridlevel_info,task_list,block_touched,nthread,qs_kind_set), & !$OMP SHARED(nimages,do_kp), & !$OMP PRIVATE(ithread,work,hab,hdab,hadb,pab,iset_old,jset_old), & !$OMP PRIVATE(ikind_old,jkind_old,iatom,jatom,iset,jset,ikind,jkind,ilevel,ipgf,jpgf), & !$OMP PRIVATE(img,brow,bcol,orb_basis_set,first_sgfa,la_max,la_min,npgfa,nseta,nsgfa), & !$OMP PRIVATE(rpgfa,set_radius_a,sphi_a,zeta,first_sgfb,lb_max,lb_min,npgfb), & !$OMP PRIVATE(nsetb,nsgfb,rpgfb,set_radius_b,sphi_b,zetb,found,atom_a,atom_b), & !$OMP PRIVATE(force_a,force_b,my_virial_a,my_virial_b,atom_pair_changed,h_block), & !$OMP PRIVATE(p_block,ncoa,sgfa,ncob,sgfb,rab,rab2,ra,rb,zetp,dab,igrid_level), & !$OMP PRIVATE(na1,na2,nb1,nb2,use_subpatch,rab_inv,new_set_pair_coming,atom_pair_done), & !$OMP PRIVATE(iset_new,jset_new,ipgf_new,jpgf_new,scalef), & !$OMP PRIVATE(itask,dist,has_threads) ithread = 0 !$ ithread = omp_get_thread_num() work => workt(:, :, ithread) hab => habt(:, :, ithread) IF (pab_required) THEN pab => pabt(:, :, ithread) END IF iset_old = -1; jset_old = -1 ikind_old = -1; jkind_old = -1 ! Here we loop over gridlevels first, finalising the matrix after each grid level is ! completed. On each grid level, we loop over atom pairs, which will only access ! a single block of each matrix, so with OpenMP, each matrix block is only touched ! by a single thread for each grid level loop_gridlevels: DO igrid_level = 1, gridlevel_info%ngrid_levels DO img = 1, nimages CALL dbcsr_work_create(dhmat(img)%matrix, work_mutable=.TRUE., n=nthread) CALL dbcsr_get_info(dhmat(img)%matrix, distribution=dist) CALL dbcsr_distribution_get(dist, has_threads=has_threads) !$ IF (.NOT. has_threads) & !$ CPABORT("No thread distribution defined.") END DO !$OMP BARRIER IF (debug_this_module) THEN !$OMP SINGLE block_touched = -1 !$OMP END SINGLE !$OMP FLUSH END IF !$OMP DO schedule (dynamic, MAX(1,task_list%npairs(igrid_level)/(nthread*50))) loop_pairs: DO ipair = 1, task_list%npairs(igrid_level) loop_tasks: DO itask = task_list%taskstart(ipair, igrid_level), task_list%taskstop(ipair, igrid_level) CALL int2pair(tasks(3, itask), ilevel, img, iatom, jatom, iset, jset, ipgf, jpgf, & nimages, natom, maxset, maxpgf) CPASSERT(img == 1 .OR. do_kp) ! At the start of a block of tasks, get atom data (and kind data, if needed) IF (itask .EQ. task_list%taskstart(ipair, igrid_level)) THEN ikind = particle_set(iatom)%atomic_kind%kind_number jkind = particle_set(jatom)%atomic_kind%kind_number ra(:) = pbc(particle_set(iatom)%r, cell) IF (iatom <= jatom) THEN brow = iatom bcol = jatom ELSE brow = jatom bcol = iatom END IF IF (ikind .NE. ikind_old) THEN CALL get_qs_kind(qs_kind_set(ikind), & softb=my_gapw, & basis_set=orb_basis_set, basis_type=my_basis_type) CALL get_gto_basis_set(gto_basis_set=orb_basis_set, & first_sgf=first_sgfa, & lmax=la_max, & lmin=la_min, & npgf=npgfa, & nset=nseta, & nsgf_set=nsgfa, & pgf_radius=rpgfa, & set_radius=set_radius_a, & sphi=sphi_a, & zet=zeta) ENDIF IF (jkind .NE. jkind_old) THEN CALL get_qs_kind(qs_kind_set(jkind), & softb=my_gapw, & basis_set=orb_basis_set, basis_type=my_basis_type) CALL get_gto_basis_set(gto_basis_set=orb_basis_set, & first_sgf=first_sgfb, & lmax=lb_max, & lmin=lb_min, & npgf=npgfb, & nset=nsetb, & nsgf_set=nsgfb, & pgf_radius=rpgfb, & set_radius=set_radius_b, & sphi=sphi_b, & zet=zetb) ENDIF IF (debug_this_module) THEN !$OMP CRITICAL (block_touched_critical) IF ((block_touched(brow, bcol) .NE. ithread) .AND. (block_touched(brow, bcol) .NE. -1)) THEN CPABORT("Block has been modified by another thread") END IF block_touched(brow, bcol) = ithread !$OMP END CRITICAL (block_touched_critical) END IF NULLIFY (h_block) CALL dbcsr_get_block_p(dhmat(img)%matrix, brow, bcol, h_block, found) IF (.NOT. ASSOCIATED(h_block)) THEN CALL dbcsr_add_block_node(dhmat(img)%matrix, brow, bcol, h_block) END IF IF (pab_required) THEN CALL dbcsr_get_block_p(matrix=deltap(img)%matrix, & row=brow, col=bcol, BLOCK=p_block, found=found) CPASSERT(found) END IF IF (calculate_forces) THEN atom_a = atom_of_kind(iatom) atom_b = atom_of_kind(jatom) force_a(:) = 0.0_dp force_b(:) = 0.0_dp ENDIF IF (use_virial) THEN my_virial_a = 0.0_dp my_virial_b = 0.0_dp ENDIF ikind_old = ikind jkind_old = jkind atom_pair_changed = .TRUE. ELSE atom_pair_changed = .FALSE. ENDIF IF (atom_pair_changed .OR. iset_old .NE. iset .OR. jset_old .NE. jset) THEN ncoa = npgfa(iset)*ncoset(la_max(iset)) sgfa = first_sgfa(1, iset) ncob = npgfb(jset)*ncoset(lb_max(jset)) sgfb = first_sgfb(1, jset) IF (pab_required) THEN IF (iatom <= jatom) THEN CALL dgemm("N", "N", ncoa, nsgfb(jset), nsgfa(iset), & 1.0_dp, sphi_a(1, sgfa), SIZE(sphi_a, 1), & p_block(sgfa, sgfb), SIZE(p_block, 1), & 0.0_dp, work(1, 1), SIZE(work, 1)) CALL dgemm("N", "T", ncoa, ncob, nsgfb(jset), & 1.0_dp, work(1, 1), SIZE(work, 1), & sphi_b(1, sgfb), SIZE(sphi_b, 1), & 0.0_dp, pab(1, 1), SIZE(pab, 1)) ELSE CALL dgemm("N", "N", ncob, nsgfa(iset), nsgfb(jset), & 1.0_dp, sphi_b(1, sgfb), SIZE(sphi_b, 1), & p_block(sgfb, sgfa), SIZE(p_block, 1), & 0.0_dp, work(1, 1), SIZE(work, 1)) CALL dgemm("N", "T", ncob, ncoa, nsgfa(iset), & 1.0_dp, work(1, 1), SIZE(work, 1), & sphi_a(1, sgfa), SIZE(sphi_a, 1), & 0.0_dp, pab(1, 1), SIZE(pab, 1)) END IF END IF IF (iatom <= jatom) THEN hab(1:ncoa, 1:ncob) = 0._dp ELSE hab(1:ncob, 1:ncoa) = 0._dp ENDIF iset_old = iset jset_old = jset ENDIF rab(1) = dist_ab(1, itask) rab(2) = dist_ab(2, itask) rab(3) = dist_ab(3, itask) rab2 = DOT_PRODUCT(rab, rab) rb(1) = ra(1) + rab(1) rb(2) = ra(2) + rab(2) rb(3) = ra(3) + rab(3) zetp = zeta(ipgf, iset) + zetb(jpgf, jset) dab = SQRT(rab2) na1 = (ipgf - 1)*ncoset(la_max(iset)) + 1 na2 = ipgf*ncoset(la_max(iset)) nb1 = (jpgf - 1)*ncoset(lb_max(jset)) + 1 nb2 = jpgf*ncoset(lb_max(jset)) ! check whether we need to use fawzi's generalised collocation scheme IF (rs_v(igrid_level)%rs_grid%desc%distributed) THEN !tasks(4,:) is 0 for replicated, 1 for distributed 2 for exceptional distributed tasks IF (tasks(4, itask) .EQ. 2) THEN use_subpatch = .TRUE. ELSE use_subpatch = .FALSE. ENDIF ELSE use_subpatch = .FALSE. ENDIF IF (pab_required) THEN IF (iatom <= jatom) THEN CALL integrate_pgf_product_rspace( & la_max(iset), zeta(ipgf, iset), la_min(iset), & lb_max(jset), zetb(jpgf, jset), lb_min(jset), & ra, rab, rab2, rs_v(igrid_level)%rs_grid, cell, & cube_info(igrid_level), & hab, pab=pab, o1=na1 - 1, o2=nb1 - 1, & eps_gvg_rspace=eps_gvg_rspace, & calculate_forces=calculate_forces, & force_a=force_a, force_b=force_b, & compute_tau=my_compute_tau, map_consistent=map_consistent, & use_virial=use_virial, my_virial_a=my_virial_a, & my_virial_b=my_virial_b, use_subpatch=use_subpatch, subpatch_pattern=tasks(6, itask)) ELSE rab_inv = -rab CALL integrate_pgf_product_rspace( & lb_max(jset), zetb(jpgf, jset), lb_min(jset), & la_max(iset), zeta(ipgf, iset), la_min(iset), & rb, rab_inv, rab2, rs_v(igrid_level)%rs_grid, cell, & cube_info(igrid_level), & hab, pab=pab, o1=nb1 - 1, o2=na1 - 1, & eps_gvg_rspace=eps_gvg_rspace, & calculate_forces=calculate_forces, & force_a=force_b, force_b=force_a, & compute_tau=my_compute_tau, map_consistent=map_consistent, & use_virial=use_virial, my_virial_a=my_virial_b, & my_virial_b=my_virial_a, use_subpatch=use_subpatch, subpatch_pattern=tasks(6, itask)) END IF ELSE IF (iatom <= jatom) THEN CALL integrate_pgf_product_rspace( & la_max(iset), zeta(ipgf, iset), la_min(iset), & lb_max(jset), zetb(jpgf, jset), lb_min(jset), & ra, rab, rab2, rs_v(igrid_level)%rs_grid, cell, & cube_info(igrid_level), & hab, o1=na1 - 1, o2=nb1 - 1, & eps_gvg_rspace=eps_gvg_rspace, & calculate_forces=calculate_forces, & force_a=force_a, force_b=force_b, & compute_tau=my_compute_tau, & map_consistent=map_consistent, use_subpatch=use_subpatch, subpatch_pattern=tasks(6, itask)) ELSE rab_inv = -rab CALL integrate_pgf_product_rspace( & lb_max(jset), zetb(jpgf, jset), lb_min(jset), & la_max(iset), zeta(ipgf, iset), la_min(iset), & rb, rab_inv, rab2, rs_v(igrid_level)%rs_grid, cell, & cube_info(igrid_level), & hab, o1=nb1 - 1, o2=na1 - 1, & eps_gvg_rspace=eps_gvg_rspace, & calculate_forces=calculate_forces, & force_a=force_b, force_b=force_a, & compute_tau=my_compute_tau, & map_consistent=map_consistent, use_subpatch=use_subpatch, subpatch_pattern=tasks(6, itask)) END IF END IF new_set_pair_coming = .FALSE. atom_pair_done = .FALSE. IF (itask < task_list%taskstop(ipair, igrid_level)) THEN CALL int2pair(tasks(3, itask + 1), ilevel, img, iatom, jatom, iset_new, jset_new, ipgf_new, jpgf_new, & nimages, natom, maxset, maxpgf) IF (iset_new .NE. iset .OR. jset_new .NE. jset) THEN new_set_pair_coming = .TRUE. ENDIF ELSE ! do not forget the last block new_set_pair_coming = .TRUE. atom_pair_done = .TRUE. ENDIF ! contract the block into h if we're done with the current set pair IF (new_set_pair_coming) THEN IF (iatom <= jatom) THEN CALL dgemm("N", "N", ncoa, nsgfb(jset), ncob, & 1.0_dp, hab(1, 1), SIZE(hab, 1), & sphi_b(1, sgfb), SIZE(sphi_b, 1), & 0.0_dp, work(1, 1), SIZE(work, 1)) CALL dgemm("T", "N", nsgfa(iset), nsgfb(jset), ncoa, & 1.0_dp, sphi_a(1, sgfa), SIZE(sphi_a, 1), & work(1, 1), SIZE(work, 1), & 1.0_dp, h_block(sgfa, sgfb), SIZE(h_block, 1)) ELSE CALL dgemm("N", "N", ncob, nsgfa(iset), ncoa, & 1.0_dp, hab(1, 1), SIZE(hab, 1), & sphi_a(1, sgfa), SIZE(sphi_a, 1), & 0.0_dp, work(1, 1), SIZE(work, 1)) CALL dgemm("T", "N", nsgfb(jset), nsgfa(iset), ncob, & 1.0_dp, sphi_b(1, sgfb), SIZE(sphi_b, 1), & work(1, 1), SIZE(work, 1), & 1.0_dp, h_block(sgfb, sgfa), SIZE(h_block, 1)) END IF END IF IF (atom_pair_done) THEN !$OMP CRITICAL(force_critical) IF (iatom == jatom) THEN scalef = 1.0_dp ELSE scalef = 2.0_dp END IF IF (calculate_forces) THEN force(ikind)%rho_elec(:, atom_a) = & force(ikind)%rho_elec(:, atom_a) + scalef*admm_scal_fac*force_a(:) force(jkind)%rho_elec(:, atom_b) = & force(jkind)%rho_elec(:, atom_b) + scalef*admm_scal_fac*force_b(:) ENDIF IF (use_virial) THEN IF (use_virial .AND. calculate_forces) THEN virial%pv_virial = virial%pv_virial + scalef*admm_scal_fac*my_virial_a virial%pv_virial = virial%pv_virial + scalef*admm_scal_fac*my_virial_b END IF END IF !$OMP END CRITICAL (force_critical) ENDIF END DO loop_tasks END DO loop_pairs !$OMP END DO DO img = 1, nimages CALL dbcsr_finalize(dhmat(img)%matrix) END DO END DO loop_gridlevels !$OMP END PARALLEL IF (debug_this_module) THEN DEALLOCATE (block_touched) END IF IF (h_duplicated) THEN ! Reconstruct H matrix if using distributed RS grids ! note send and recv direction reversed WRT collocate scatter = .FALSE. IF (do_kp) THEN CALL rs_distribute_matrix(rs_descs, dhmat, atom_pair_recv, atom_pair_send, & natom, nimages, scatter, hmats=hmat_kp) ELSE ALLOCATE (htemp(1)) htemp(1)%matrix => hmat%matrix CALL rs_distribute_matrix(rs_descs, dhmat, atom_pair_recv, atom_pair_send, & natom, nimages, scatter, hmats=htemp) NULLIFY (htemp(1)%matrix) DEALLOCATE (htemp) END IF CALL dbcsr_deallocate_matrix_set(dhmat) ELSE DO img = 1, nimages NULLIFY (dhmat(img)%matrix) END DO DEALLOCATE (dhmat) END IF IF (pab_required) THEN IF (p_duplicated) THEN CALL dbcsr_deallocate_matrix_set(deltap) ELSE DO img = 1, nimages NULLIFY (deltap(img)%matrix) END DO DEALLOCATE (deltap) END IF END IF ! *** Release work storage *** DEALLOCATE (habt, workt) IF (pab_required) THEN DEALLOCATE (pabt) END IF IF (ASSOCIATED(rs_v)) THEN DO i = 1, SIZE(rs_v) CALL rs_grid_release(rs_v(i)%rs_grid) END DO END IF IF (calculate_forces) THEN DEALLOCATE (atom_of_kind) END IF CALL timestop(handle) END SUBROUTINE integrate_v_rspace END MODULE qs_integrate_potential_product