1!--------------------------------------------------------------------------------------------------! 2! CP2K: A general program to perform molecular dynamics simulations ! 3! Copyright (C) 2000 - 2019 CP2K developers group ! 4!--------------------------------------------------------------------------------------------------! 5 6! ************************************************************************************************** 7!> \brief Rountines to calculate gradients of RI-GPW-MP2 energy using pw 8!> \par History 9!> 10.2013 created [Mauro Del Ben] 10! ************************************************************************************************** 11MODULE mp2_ri_grad 12 USE atomic_kind_types, ONLY: atomic_kind_type,& 13 get_atomic_kind_set 14 USE basis_set_types, ONLY: gto_basis_set_type 15 USE cell_types, ONLY: cell_type,& 16 pbc 17 USE cp_blacs_env, ONLY: cp_blacs_env_type 18 USE cp_control_types, ONLY: dft_control_type 19 USE cp_dbcsr_operations, ONLY: copy_dbcsr_to_fm,& 20 dbcsr_deallocate_matrix_set 21 USE cp_eri_mme_interface, ONLY: cp_eri_mme_param 22 USE cp_fm_struct, ONLY: cp_fm_struct_create,& 23 cp_fm_struct_release,& 24 cp_fm_struct_type 25 USE cp_fm_types, ONLY: cp_fm_create,& 26 cp_fm_get_info,& 27 cp_fm_indxg2l,& 28 cp_fm_indxg2p,& 29 cp_fm_indxl2g,& 30 cp_fm_release,& 31 cp_fm_set_all,& 32 cp_fm_type 33 USE cp_gemm_interface, ONLY: cp_gemm 34 USE cp_para_env, ONLY: cp_para_env_create,& 35 cp_para_env_release 36 USE cp_para_types, ONLY: cp_para_env_type 37 USE dbcsr_api, ONLY: & 38 dbcsr_add, dbcsr_copy, dbcsr_copy_into_existing, dbcsr_create, dbcsr_multiply, & 39 dbcsr_p_type, dbcsr_release, dbcsr_set, dbcsr_transposed, dbcsr_type, & 40 dbcsr_type_no_symmetry, dbcsr_type_symmetric 41 USE gaussian_gridlevels, ONLY: gaussian_gridlevel 42 USE input_constants, ONLY: do_eri_gpw,& 43 do_eri_mme 44 USE kinds, ONLY: dp 45 USE mathconstants, ONLY: fourpi 46 USE message_passing, ONLY: & 47 mp_alltoall, mp_comm_split_direct, mp_irecv, mp_isend, mp_request_null, mp_sendrecv, & 48 mp_sum, mp_wait, mp_waitall 49 USE mp2_eri, ONLY: mp2_eri_2c_integrate,& 50 mp2_eri_3c_integrate,& 51 mp2_eri_deallocate_forces,& 52 mp2_eri_force 53 USE mp2_eri_gpw, ONLY: calc_potential_gpw,& 54 cleanup_gpw,& 55 prepare_gpw 56 USE mp2_types, ONLY: integ_mat_buffer_type,& 57 integ_mat_buffer_type_2D,& 58 mp2_type 59 USE orbital_pointers, ONLY: ncoset 60 USE particle_types, ONLY: particle_type 61 USE pw_env_types, ONLY: pw_env_get,& 62 pw_env_type 63 USE pw_methods, ONLY: pw_copy,& 64 pw_derive,& 65 pw_integral_ab,& 66 pw_scale,& 67 pw_transfer 68 USE pw_poisson_methods, ONLY: pw_poisson_solve 69 USE pw_poisson_types, ONLY: pw_poisson_type 70 USE pw_pool_types, ONLY: pw_pool_create_pw,& 71 pw_pool_give_back_pw,& 72 pw_pool_type 73 USE pw_types, ONLY: COMPLEXDATA1D,& 74 REALDATA3D,& 75 REALSPACE,& 76 RECIPROCALSPACE,& 77 pw_p_type 78 USE qs_collocate_density, ONLY: calculate_rho_elec,& 79 calculate_wavefunction 80 USE qs_environment_types, ONLY: get_qs_env,& 81 qs_environment_type 82 USE qs_force_types, ONLY: qs_force_type 83 USE qs_integrate_potential, ONLY: integrate_pgf_product_rspace,& 84 integrate_v_rspace 85 USE qs_kind_types, ONLY: get_qs_kind,& 86 qs_kind_type 87 USE qs_ks_types, ONLY: qs_ks_env_type 88 USE qs_neighbor_list_types, ONLY: neighbor_list_set_p_type 89 USE realspace_grid_types, ONLY: realspace_grid_desc_p_type,& 90 realspace_grid_p_type,& 91 rs_grid_release,& 92 rs_grid_retain 93 USE rs_pw_interface, ONLY: potential_pw2rs 94 USE task_list_types, ONLY: task_list_type 95 USE util, ONLY: get_limit 96 USE virial_types, ONLY: virial_type 97#include "./base/base_uses.f90" 98 99 IMPLICIT NONE 100 101 PRIVATE 102 103 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_grad' 104 105 PUBLIC :: calc_ri_mp2_nonsep 106 107CONTAINS 108 109! ************************************************************************************************** 110!> \brief Calculate the non-separable part of the gradients and update the 111!> Lagrangian 112!> \param qs_env ... 113!> \param mp2_env ... 114!> \param para_env ... 115!> \param para_env_sub ... 116!> \param cell ... 117!> \param particle_set ... 118!> \param atomic_kind_set ... 119!> \param qs_kind_set ... 120!> \param mo_coeff ... 121!> \param nmo ... 122!> \param homo ... 123!> \param dimen_RI ... 124!> \param Eigenval ... 125!> \param my_group_L_start ... 126!> \param my_group_L_end ... 127!> \param my_group_L_size ... 128!> \param sab_orb_sub ... 129!> \param mat_munu ... 130!> \param blacs_env_sub ... 131!> \param Eigenval_beta ... 132!> \param homo_beta ... 133!> \param mo_coeff_beta ... 134!> \author Mauro Del Ben 135! ************************************************************************************************** 136 SUBROUTINE calc_ri_mp2_nonsep(qs_env, mp2_env, para_env, para_env_sub, cell, particle_set, & 137 atomic_kind_set, qs_kind_set, mo_coeff, nmo, homo, dimen_RI, Eigenval, & 138 my_group_L_start, my_group_L_end, my_group_L_size, sab_orb_sub, mat_munu, & 139 blacs_env_sub, Eigenval_beta, homo_beta, mo_coeff_beta) 140 TYPE(qs_environment_type), POINTER :: qs_env 141 TYPE(mp2_type), POINTER :: mp2_env 142 TYPE(cp_para_env_type), POINTER :: para_env, para_env_sub 143 TYPE(cell_type), POINTER :: cell 144 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set 145 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set 146 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set 147 TYPE(cp_fm_type), POINTER :: mo_coeff 148 INTEGER, INTENT(IN) :: nmo, homo, dimen_RI 149 REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: Eigenval 150 INTEGER, INTENT(IN) :: my_group_L_start, my_group_L_end, & 151 my_group_L_size 152 TYPE(neighbor_list_set_p_type), DIMENSION(:), & 153 POINTER :: sab_orb_sub 154 TYPE(dbcsr_p_type), INTENT(INOUT) :: mat_munu 155 TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub 156 REAL(KIND=dp), DIMENSION(:), INTENT(IN), OPTIONAL :: Eigenval_beta 157 INTEGER, INTENT(IN), OPTIONAL :: homo_beta 158 TYPE(cp_fm_type), OPTIONAL, POINTER :: mo_coeff_beta 159 160 CHARACTER(LEN=*), PARAMETER :: routineN = 'calc_ri_mp2_nonsep', & 161 routineP = moduleN//':'//routineN 162 163 INTEGER :: alpha, atom_a, beta, dimen, dir, eri_method, handle, handle2, handle3, i, iatom, & 164 igrid_level, ikind, iorb, ipgf, iset, itmp(2), L_counter, lb(3), LLL, location(3), & 165 my_P_end, my_P_size, my_P_start, na1, na2, natom, ncoa, nseta, offset, potential_type, & 166 sgfa, tp(3), ub(3), virtual, virtual_beta 167 INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, kind_of 168 INTEGER, DIMENSION(3) :: comp 169 INTEGER, DIMENSION(:), POINTER :: la_max, la_min, npgfa, nsgfa 170 INTEGER, DIMENSION(:, :), POINTER :: first_sgfa 171 LOGICAL :: alpha_beta, map_it_here, skip_shell, & 172 use_virial 173 REAL(KIND=dp) :: cutoff_old, e_hartree, eps_filter, & 174 factor, omega, pair_energy, rab2, & 175 relative_cutoff_old, total_rho 176 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: e_cutoff_old, wf_vector 177 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: G_PQ_local, I_ab 178 REAL(KIND=dp), DIMENSION(3) :: force_a, force_b, ra, rab 179 REAL(KIND=dp), DIMENSION(3, 3) :: h_stress, my_virial_a, my_virial_b 180 REAL(KIND=dp), DIMENSION(:), POINTER :: set_radius_a 181 REAL(KIND=dp), DIMENSION(:, :), POINTER :: I_tmp2, pab, rpgfa, sphi_a, zeta 182 TYPE(cp_eri_mme_param), POINTER :: eri_param 183 TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp 184 TYPE(cp_fm_type), POINTER :: L1_mu_i, L1_mu_i_beta, L2_nu_a, & 185 L2_nu_a_beta 186 TYPE(dbcsr_p_type) :: Lag_mu_i_1, Lag_mu_i_1_beta, Lag_nu_a_2, Lag_nu_a_2_beta, & 187 matrix_P_inu, matrix_P_inu_beta, matrix_P_munu, matrix_P_munu_nosym 188 TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: G_P_ia, G_P_ia_beta, mat_munu_local, & 189 matrix_P_munu_local 190 TYPE(dbcsr_type), POINTER :: mo_coeff_o, mo_coeff_o_beta, mo_coeff_v, & 191 mo_coeff_v_beta 192 TYPE(dft_control_type), POINTER :: dft_control 193 TYPE(gto_basis_set_type), POINTER :: basis_set_a 194 TYPE(mp2_eri_force), ALLOCATABLE, DIMENSION(:) :: force_2c, force_3c_aux, force_3c_orb_mu, & 195 force_3c_orb_nu 196 TYPE(pw_env_type), POINTER :: pw_env_sub 197 TYPE(pw_p_type) :: dvg(3), pot_g, psi_L, psi_L_beta, rho_g, & 198 rho_r, temp_pw_g 199 TYPE(pw_poisson_type), POINTER :: poisson_env 200 TYPE(pw_pool_type), POINTER :: auxbas_pw_pool 201 TYPE(qs_force_type), DIMENSION(:), POINTER :: force 202 TYPE(qs_ks_env_type), POINTER :: ks_env 203 TYPE(realspace_grid_desc_p_type), DIMENSION(:), & 204 POINTER :: rs_descs 205 TYPE(realspace_grid_p_type), DIMENSION(:), POINTER :: rs_v 206 TYPE(task_list_type), POINTER :: task_list_sub 207 TYPE(virial_type), POINTER :: virial 208 209 CALL timeset(routineN, handle) 210 211 eri_method = mp2_env%eri_method 212 eri_param => mp2_env%eri_mme_param 213 214 ! Find out whether we have a closed or open shell 215 alpha_beta = .FALSE. 216 IF (PRESENT(homo_beta) .AND. PRESENT(Eigenval_beta)) alpha_beta = .TRUE. 217 218 dimen = nmo 219 virtual = dimen - homo 220 potential_type = mp2_env%potential_parameter%potential_type 221 omega = mp2_env%potential_parameter%omega 222 IF (alpha_beta) virtual_beta = dimen - homo_beta 223 eps_filter = mp2_env%mp2_gpw%eps_filter 224 NULLIFY (mo_coeff_o, mo_coeff_v, G_P_ia, ks_env) 225 mo_coeff_o => mp2_env%ri_grad%mo_coeff_o 226 mo_coeff_v => mp2_env%ri_grad%mo_coeff_v 227 G_P_ia => mp2_env%ri_grad%G_P_ia 228 229 IF (alpha_beta) THEN 230 NULLIFY (mo_coeff_o_beta, mo_coeff_v_beta, G_P_ia_beta) 231 mo_coeff_o_beta => mp2_env%ri_grad%mo_coeff_o_beta 232 mo_coeff_v_beta => mp2_env%ri_grad%mo_coeff_v_beta 233 G_P_ia_beta => mp2_env%ri_grad%G_P_ia_beta 234 ENDIF 235 236 itmp = get_limit(dimen_RI, para_env_sub%num_pe, para_env_sub%mepos) 237 my_P_start = itmp(1) 238 my_P_end = itmp(2) 239 my_P_size = itmp(2) - itmp(1) + 1 240 ALLOCATE (G_PQ_local(dimen_RI, my_group_L_size)) 241 242 G_PQ_local = 0.0_dp 243 IF (.NOT. alpha_beta) THEN 244 G_PQ_local(my_P_start:my_P_end, 1:my_group_L_size) = mp2_env%ri_grad%Gamma_PQ 245 ELSE 246 G_PQ_local(my_P_start:my_P_end, 1:my_group_L_size) = & 247 0.50_dp*(mp2_env%ri_grad%Gamma_PQ + mp2_env%ri_grad%Gamma_PQ_beta) 248 ENDIF 249 DEALLOCATE (mp2_env%ri_grad%Gamma_PQ) 250 IF (alpha_beta) THEN 251 DEALLOCATE (mp2_env%ri_grad%Gamma_PQ_beta) 252 ENDIF 253 CALL mp_sum(G_PQ_local, para_env_sub%group) 254 255 ! deallocate here PQ_half, maybe usefull in the future 256 ! This is really bad style 257 DEALLOCATE (mp2_env%ri_grad%PQ_half) 258 259 ! create matrix holding the back transformation (G_P_inu) 260 ALLOCATE (matrix_P_inu%matrix) 261 CALL dbcsr_create(matrix_P_inu%matrix, template=mo_coeff_o) 262 IF (alpha_beta) THEN 263 ALLOCATE (matrix_P_inu_beta%matrix) 264 CALL dbcsr_create(matrix_P_inu_beta%matrix, template=mo_coeff_o_beta) 265 ENDIF 266 267 ! non symmetric matrix 268 ALLOCATE (matrix_P_munu_nosym%matrix) 269 CALL dbcsr_create(matrix_P_munu_nosym%matrix, template=mat_munu%matrix, & 270 matrix_type=dbcsr_type_no_symmetry) 271 272 ! create Lagrangian matrices in mixed AO/MO formalism 273 ALLOCATE (Lag_mu_i_1%matrix) 274 CALL dbcsr_create(Lag_mu_i_1%matrix, template=mo_coeff_o) 275 CALL dbcsr_set(Lag_mu_i_1%matrix, 0.0_dp) 276 IF (alpha_beta) THEN 277 ALLOCATE (Lag_mu_i_1_beta%matrix) 278 CALL dbcsr_create(Lag_mu_i_1_beta%matrix, template=mo_coeff_o_beta) 279 CALL dbcsr_set(Lag_mu_i_1_beta%matrix, 0.0_dp) 280 ENDIF 281 282 ALLOCATE (Lag_nu_a_2%matrix) 283 CALL dbcsr_create(Lag_nu_a_2%matrix, template=mo_coeff_v) 284 CALL dbcsr_set(Lag_nu_a_2%matrix, 0.0_dp) 285 IF (alpha_beta) THEN 286 ALLOCATE (Lag_nu_a_2_beta%matrix) 287 CALL dbcsr_create(Lag_nu_a_2_beta%matrix, template=mo_coeff_v_beta) 288 CALL dbcsr_set(Lag_nu_a_2_beta%matrix, 0.0_dp) 289 ENDIF 290 291 ! get forces 292 NULLIFY (force, virial) 293 CALL get_qs_env(qs_env=qs_env, force=force, virial=virial) 294 295 ! prepare integral derivatives with mme method 296 IF (eri_method .EQ. do_eri_mme) THEN 297 ALLOCATE (matrix_P_munu_local(my_group_L_size)) 298 ALLOCATE (mat_munu_local(my_group_L_size)) 299 L_counter = 0 300 DO LLL = my_group_L_start, my_group_L_end 301 L_counter = L_counter + 1 302 ALLOCATE (matrix_P_munu_local(L_counter)%matrix) 303 ALLOCATE (mat_munu_local(L_counter)%matrix) 304 CALL dbcsr_create(matrix_P_munu_local(L_counter)%matrix, template=mat_munu%matrix, & 305 matrix_type=dbcsr_type_symmetric) 306 CALL dbcsr_create(mat_munu_local(L_counter)%matrix, template=mat_munu%matrix, & 307 matrix_type=dbcsr_type_symmetric) 308 CALL dbcsr_copy(mat_munu_local(L_counter)%matrix, mat_munu%matrix) 309 CALL dbcsr_set(mat_munu_local(L_counter)%matrix, 0.0_dp) 310 311 IF (alpha_beta) THEN 312 CALL G_P_transform_MO_to_AO(matrix_P_munu_local(L_counter), matrix_P_munu_nosym, mat_munu, & 313 G_P_ia(L_counter), matrix_P_inu, & 314 mo_coeff_v, mo_coeff_o, eps_filter, & 315 G_P_ia_beta(L_counter), matrix_P_inu_beta, mo_coeff_v_beta, mo_coeff_o_beta) 316 ELSE 317 CALL G_P_transform_MO_to_AO(matrix_P_munu_local(L_counter), matrix_P_munu_nosym, mat_munu, & 318 G_P_ia(L_counter), matrix_P_inu, & 319 mo_coeff_v, mo_coeff_o, eps_filter) 320 ENDIF 321 ENDDO 322 323 ALLOCATE (I_tmp2(dimen_RI, my_group_L_size)) 324 I_tmp2(:, :) = 0.0_dp 325 CALL mp2_eri_2c_integrate(eri_param, potential_type, omega, para_env_sub, qs_env, & 326 basis_type_a="RI_AUX", basis_type_b="RI_AUX", & 327 hab=I_tmp2, first_b=my_group_L_start, last_b=my_group_L_end, & 328 eri_method=eri_method, pab=G_PQ_local, force_a=force_2c) 329 330 DEALLOCATE (I_tmp2) 331 CALL mp2_eri_3c_integrate(eri_param, potential_type, omega, para_env_sub, qs_env, & 332 first_c=my_group_L_start, last_c=my_group_L_end, mat_ab=mat_munu_local, & 333 basis_type_a="ORB", basis_type_b="ORB", basis_type_c="RI_AUX", & 334 sab_nl=sab_orb_sub, eri_method=eri_method, & 335 pabc=matrix_P_munu_local, & 336 force_a=force_3c_orb_mu, force_b=force_3c_orb_nu, force_c=force_3c_aux) 337 338 L_counter = 0 339 DO LLL = my_group_L_start, my_group_L_end 340 L_counter = L_counter + 1 341 ! we recompute matrix_P_inu 342 CALL dbcsr_multiply("N", "T", 1.0_dp, mo_coeff_v, G_P_ia(L_counter)%matrix, & 343 0.0_dp, matrix_P_inu%matrix, filter_eps=eps_filter) 344 IF (alpha_beta) THEN 345 CALL dbcsr_multiply("N", "T", 1.0_dp, mo_coeff_v_beta, G_P_ia_beta(L_counter)%matrix, & 346 0.0_dp, matrix_P_inu_beta%matrix, filter_eps=eps_filter) 347 ENDIF 348 349 IF (alpha_beta) THEN 350 CALL update_lagrangian(mat_munu_local(L_counter), matrix_P_inu, Lag_mu_i_1, & 351 G_P_ia(L_counter), mo_coeff_o, Lag_nu_a_2, & 352 eps_filter, & 353 matrix_P_inu_beta, Lag_mu_i_1_beta, G_P_ia_beta(L_counter), mo_coeff_o_beta, Lag_nu_a_2_beta) 354 ELSE 355 CALL update_lagrangian(mat_munu_local(L_counter), matrix_P_inu, Lag_mu_i_1, & 356 G_P_ia(L_counter), mo_coeff_o, Lag_nu_a_2, & 357 eps_filter) 358 359 ENDIF 360 ENDDO 361 362 DO ikind = 1, SIZE(force) 363 force(ikind)%mp2_non_sep(:, :) = -4.0_dp*force_2c(ikind)%forces(:, :) + & 364 force_3c_orb_mu(ikind)%forces(:, :) + & 365 force_3c_orb_nu(ikind)%forces(:, :) + & 366 force_3c_aux(ikind)%forces(:, :) 367 END DO 368 369 CALL mp2_eri_deallocate_forces(force_2c) 370 CALL mp2_eri_deallocate_forces(force_3c_aux) 371 CALL mp2_eri_deallocate_forces(force_3c_orb_mu) 372 CALL mp2_eri_deallocate_forces(force_3c_orb_nu) 373 CALL dbcsr_deallocate_matrix_set(matrix_P_munu_local) 374 CALL dbcsr_deallocate_matrix_set(mat_munu_local) 375 376 ELSEIF (eri_method == do_eri_gpw) THEN 377 ALLOCATE (matrix_P_munu%matrix) 378 CALL dbcsr_create(matrix_P_munu%matrix, template=mat_munu%matrix, & 379 matrix_type=dbcsr_type_symmetric) 380 381 CALL get_qs_env(qs_env, ks_env=ks_env) 382 383 natom = SIZE(particle_set) 384 385 ALLOCATE (kind_of(natom)) 386 ALLOCATE (atom_of_kind(natom)) 387 CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind) 388 389 ! Supporting stuff for GPW 390 CALL prepare_gpw(qs_env, dft_control, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, & 391 auxbas_pw_pool, poisson_env, task_list_sub, rho_r, rho_g, pot_g, psi_L, sab_orb_sub) 392 393 ! wave function vector and supporting stuff 394 ALLOCATE (wf_vector(dimen_RI)) 395 IF (alpha_beta) THEN 396 NULLIFY (psi_L_beta%pw) 397 CALL pw_pool_create_pw(auxbas_pw_pool, psi_L_beta%pw, & 398 use_data=REALDATA3D, & 399 in_space=REALSPACE) 400 ENDIF 401 402 ! check if we want to calculate the virial 403 use_virial = virial%pv_availability .AND. (.NOT. virial%pv_numer) 404 405 ! in case virial is required we need auxilliary pw 406 ! for calculate the MP2-volume contribution to the virial 407 ! (hartree potential derivatives) 408 IF (use_virial) THEN 409 NULLIFY (temp_pw_g%pw) 410 CALL pw_pool_create_pw(auxbas_pw_pool, temp_pw_g%pw, & 411 use_data=COMPLEXDATA1D, & 412 in_space=RECIPROCALSPACE) 413 DO i = 1, 3 414 NULLIFY (dvg(i)%pw) 415 CALL pw_pool_create_pw(auxbas_pw_pool, dvg(i)%pw, & 416 use_data=COMPLEXDATA1D, & 417 in_space=RECIPROCALSPACE) 418 END DO 419 END IF 420 421 ! start main loop over auxiliary basis functions 422 CALL timeset(routineN//"_loop", handle2) 423 424 L_counter = 0 425 DO LLL = my_group_L_start, my_group_L_end 426 L_counter = L_counter + 1 427 428 IF (alpha_beta) THEN 429 CALL G_P_transform_MO_to_AO(matrix_P_munu, matrix_P_munu_nosym, mat_munu, & 430 G_P_ia(L_counter), & 431 matrix_P_inu, & 432 mo_coeff_v, mo_coeff_o, & 433 eps_filter, & 434 G_P_ia_beta(L_counter), matrix_P_inu_beta, & 435 mo_coeff_v_beta, mo_coeff_o_beta) 436 ELSE 437 CALL G_P_transform_MO_to_AO(matrix_P_munu, matrix_P_munu_nosym, mat_munu, & 438 G_P_ia(L_counter), & 439 matrix_P_inu, & 440 mo_coeff_v, mo_coeff_o, & 441 eps_filter) 442 ENDIF 443 444 ! calculate potential associted to the single aux function 445 CALL timeset(routineN//"_wf_pot", handle3) 446 wf_vector = 0.0_dp 447 wf_vector(LLL) = 1.0_dp 448 ! pseudo psi_L 449 CALL calculate_wavefunction(mo_coeff, 1, psi_L, rho_g, atomic_kind_set, & 450 qs_kind_set, cell, dft_control, particle_set, pw_env_sub, & 451 basis_type="RI_AUX", & 452 external_vector=wf_vector) 453 454 rho_r%pw%cr3d = psi_L%pw%cr3d 455 IF (alpha_beta) THEN 456 CALL calculate_wavefunction(mo_coeff_beta, 1, psi_L_beta, rho_g, atomic_kind_set, & 457 qs_kind_set, cell, dft_control, particle_set, & 458 pw_env_sub, basis_type='RI_AUX', & 459 external_vector=wf_vector) 460 rho_r%pw%cr3d = 0.50_dp*(rho_r%pw%cr3d+psi_L_beta%pw%cr3d) 461 ENDIF 462 463 CALL calc_potential_gpw(rho_r, rho_g, poisson_env, pot_g, potential_type, omega) 464 CALL timestop(handle3) 465 466 IF (use_virial) THEN 467 ! make a copy of the density in G space 468 ! calculate the potential derivatives in G space 469 CALL timeset(routineN//"_Virial", handle3) 470 CALL pw_copy(rho_g%pw, temp_pw_g%pw) 471 DO i = 1, 3 472 comp = 0 473 comp(i) = 1 474 CALL pw_copy(pot_g%pw, dvg(i)%pw) 475 CALL pw_derive(dvg(i)%pw, comp) 476 END DO 477 CALL timestop(handle3) 478 END IF 479 480 ! integrate the potential of the single gaussian and update 481 ! 2-center forces with Gamma_PQ 482 CALL timeset(routineN//"_int_PQ", handle3) 483 NULLIFY (rs_v) 484 NULLIFY (rs_descs) 485 CALL pw_env_get(pw_env_sub, rs_descs=rs_descs, rs_grids=rs_v) 486 DO i = 1, SIZE(rs_v) 487 CALL rs_grid_retain(rs_v(i)%rs_grid) 488 END DO 489 CALL potential_pw2rs(rs_v, rho_r, pw_env_sub) 490 491 offset = 0 492 DO iatom = 1, natom 493 ikind = kind_of(iatom) 494 atom_a = atom_of_kind(iatom) 495 CALL get_qs_kind(qs_kind=qs_kind_set(ikind), basis_set=basis_set_a, & 496 basis_type="RI_AUX") 497 498 first_sgfa => basis_set_a%first_sgf 499 la_max => basis_set_a%lmax 500 la_min => basis_set_a%lmin 501 npgfa => basis_set_a%npgf 502 nseta = basis_set_a%nset 503 nsgfa => basis_set_a%nsgf_set 504 rpgfa => basis_set_a%pgf_radius 505 set_radius_a => basis_set_a%set_radius 506 sphi_a => basis_set_a%sphi 507 zeta => basis_set_a%zet 508 509 ra(:) = pbc(particle_set(iatom)%r, cell) 510 rab = 0.0_dp 511 rab2 = 0.0_dp 512 513 force_a(:) = 0.0_dp 514 force_b(:) = 0.0_dp 515 IF (use_virial) THEN 516 my_virial_a = 0.0_dp 517 my_virial_b = 0.0_dp 518 END IF 519 520 DO iset = 1, nseta 521 ncoa = npgfa(iset)*ncoset(la_max(iset)) 522 sgfa = first_sgfa(1, iset) 523 524 ALLOCATE (I_tmp2(ncoa, 1)) 525 I_tmp2 = 0.0_dp 526 ALLOCATE (I_ab(nsgfa(iset), 1)) 527 I_ab = 0.0_dp 528 ALLOCATE (pab(ncoa, 1)) 529 pab = 0.0_dp 530 531 I_ab(1:nsgfa(iset), 1) = -4.0_dp*G_PQ_local(offset + 1:offset + nsgfa(iset), L_counter) 532 533 CALL dgemm("N", "N", ncoa, 1, nsgfa(iset), & 534 1.0_dp, sphi_a(1, sgfa), SIZE(sphi_a, 1), & 535 I_ab(1, 1), SIZE(I_ab, 1), & 536 0.0_dp, pab(1, 1), SIZE(pab, 1)) 537 538 I_ab = 0.0_dp 539 540 igrid_level = gaussian_gridlevel(pw_env_sub%gridlevel_info, MINVAL(zeta(:, iset))) 541 map_it_here = .FALSE. 542 IF (.NOT. ALL(rs_v(igrid_level)%rs_grid%desc%perd == 1)) THEN 543 DO dir = 1, 3 544 ! bounds of local grid (i.e. removing the 'wings'), if periodic 545 tp(dir) = FLOOR(DOT_PRODUCT(cell%h_inv(dir, :), ra)*rs_v(igrid_level)%rs_grid%desc%npts(dir)) 546 tp(dir) = MODULO(tp(dir), rs_v(igrid_level)%rs_grid%desc%npts(dir)) 547 IF (rs_v(igrid_level)%rs_grid%desc%perd(dir) .NE. 1) THEN 548 lb(dir) = rs_v(igrid_level)%rs_grid%lb_local(dir) + rs_v(igrid_level)%rs_grid%desc%border 549 ub(dir) = rs_v(igrid_level)%rs_grid%ub_local(dir) - rs_v(igrid_level)%rs_grid%desc%border 550 ELSE 551 lb(dir) = rs_v(igrid_level)%rs_grid%lb_local(dir) 552 ub(dir) = rs_v(igrid_level)%rs_grid%ub_local(dir) 553 ENDIF 554 ! distributed grid, only map if it is local to the grid 555 location(dir) = tp(dir) + rs_v(igrid_level)%rs_grid%desc%lb(dir) 556 ENDDO 557 IF (lb(1) <= location(1) .AND. location(1) <= ub(1) .AND. & 558 lb(2) <= location(2) .AND. location(2) <= ub(2) .AND. & 559 lb(3) <= location(3) .AND. location(3) <= ub(3)) THEN 560 map_it_here = .TRUE. 561 ENDIF 562 ELSE 563 ! not distributed, just a round-robin distribution over the full set of CPUs 564 IF (MODULO(offset, para_env_sub%num_pe) == para_env_sub%mepos) map_it_here = .TRUE. 565 ENDIF 566 567 offset = offset + nsgfa(iset) 568 569 IF (map_it_here) THEN 570 DO ipgf = 1, npgfa(iset) 571 na1 = (ipgf - 1)*ncoset(la_max(iset)) + 1 572 na2 = ipgf*ncoset(la_max(iset)) 573 igrid_level = gaussian_gridlevel(pw_env_sub%gridlevel_info, zeta(ipgf, iset)) 574 575 CALL integrate_pgf_product_rspace(la_max=la_max(iset), zeta=zeta(ipgf, iset)/2.0_dp, la_min=la_min(iset), & 576 lb_max=0, zetb=zeta(ipgf, iset)/2.0_dp, lb_min=0, & 577 ra=ra, rab=rab, rab2=rab2, & 578 rsgrid=rs_v(igrid_level)%rs_grid, & 579 cell=cell, & 580 cube_info=pw_env_sub%cube_info(igrid_level), & 581 hab=I_tmp2, & 582 pab=pab, & 583 o1=na1 - 1, & 584 o2=0, & 585 map_consistent=.TRUE., & 586 eps_gvg_rspace=dft_control%qs_control%eps_gvg_rspace, & 587 calculate_forces=.TRUE., & 588 force_a=force_a, force_b=force_b, & 589 use_virial=use_virial, my_virial_a=my_virial_a, my_virial_b=my_virial_b) 590 591 END DO 592 593 ! CALL dgemm("T","N",nsgfa(iset),1,ncoa,& 594 ! 1.0_dp,sphi_a(1,sgfa),SIZE(sphi_a,1),& 595 ! I_tmp2(1,1),SIZE(I_tmp2,1),& 596 ! 1.0_dp,I_ab(1,1),SIZE(I_ab,1)) 597 ! L_local_col(offset-nsgfa(iset)+1:offset,i_counter)=I_ab(1:nsgfa(iset),1) 598 END IF 599 600 DEALLOCATE (I_tmp2) 601 DEALLOCATE (I_ab) 602 DEALLOCATE (pab) 603 604 END DO 605 606 force(ikind)%rho_elec(:, atom_a) = & 607 force(ikind)%rho_elec(:, atom_a) + force_a(:) + force_b 608 IF (use_virial) THEN 609 virial%pv_virial = virial%pv_virial + my_virial_a + my_virial_b 610 END IF 611 END DO 612 613 DO i = 1, SIZE(rs_v) 614 CALL rs_grid_release(rs_v(i)%rs_grid) 615 END DO 616 CALL timestop(handle3) 617 ! here we are done with the 2 centers 618 619 ! CALL cp_dbcsr_write_sparse_matrix(matrix_P_munu%matrix,4,12,qs_env,para_env_sub,& 620 ! output_unit=unit_nr) 621 622 ! integrate the potential of the single gaussian and update 623 ! 3-center forces 624 CALL timeset(routineN//"_int", handle3) 625 CALL dbcsr_set(mat_munu%matrix, 0.0_dp) 626 CALL integrate_v_rspace(rho_r, hmat=mat_munu, pmat=matrix_P_munu, & 627 qs_env=qs_env, calculate_forces=.TRUE., compute_tau=.FALSE., gapw=.FALSE., & 628 pw_env_external=pw_env_sub, & 629 task_list_external=task_list_sub) 630 CALL timestop(handle3) 631 632 IF (alpha_beta) THEN 633 CALL update_lagrangian(mat_munu, matrix_P_inu, Lag_mu_i_1, & 634 G_P_ia(L_counter), mo_coeff_o, Lag_nu_a_2, & 635 eps_filter, & 636 matrix_P_inu_beta, Lag_mu_i_1_beta, G_P_ia_beta(L_counter), mo_coeff_o_beta, Lag_nu_a_2_beta) 637 ELSE 638 CALL update_lagrangian(mat_munu, matrix_P_inu, Lag_mu_i_1, & 639 G_P_ia(L_counter), mo_coeff_o, Lag_nu_a_2, & 640 eps_filter) 641 642 ENDIF 643 644 IF (use_virial) THEN 645 ! add the volume contribution to the virial due to 646 ! the (P|Q) integrals, first we put the full gamme_PQ 647 ! pseudo wave-function into grid in order to calculate the 648 ! hartree potential derivatives 649 CALL timeset(routineN//"_Virial", handle3) 650 wf_vector = 0.0_dp 651 wf_vector(:) = -2.0_dp*G_PQ_local(:, L_counter) 652 CALL calculate_wavefunction(mo_coeff, 1, psi_L, rho_g, atomic_kind_set, & 653 qs_kind_set, cell, dft_control, particle_set, pw_env_sub, & 654 basis_type="RI_AUX", & 655 external_vector=wf_vector) 656 ! transfer to reciprocal space and calculate potential 657 rho_r%pw%cr3d = psi_L%pw%cr3d 658 CALL pw_transfer(rho_r%pw, rho_g%pw) 659 CALL pw_poisson_solve(poisson_env, rho_g%pw, pair_energy, pot_g%pw) 660 ! update virial with volume term (first calculate hartree like energy (diagonal part of the virial)) 661 e_hartree = 0.0_dp 662 h_stress = 0.0_dp 663 e_hartree = pw_integral_ab(temp_pw_g%pw, pot_g%pw) 664 DO alpha = 1, 3 665 comp = 0 666 comp(alpha) = 1 667 CALL pw_copy(pot_g%pw, rho_g%pw) 668 CALL pw_derive(rho_g%pw, comp) 669 h_stress(alpha, alpha) = -e_hartree 670 DO beta = alpha, 3 671 h_stress(alpha, beta) = h_stress(alpha, beta) & 672 - 2.0_dp*pw_integral_ab(rho_g%pw, dvg(beta)%pw)/fourpi 673 h_stress(beta, alpha) = h_stress(alpha, beta) 674 END DO 675 END DO 676 virial%pv_virial = virial%pv_virial + h_stress/REAL(para_env_sub%num_pe, dp) 677 CALL timestop(handle3) 678 END IF 679 680 ! put the gamma density on grid 681 CALL timeset(routineN//"_Gpot", handle3) 682 CALL calculate_rho_elec(matrix_p=matrix_P_munu%matrix, & 683 rho=rho_r, & 684 rho_gspace=rho_g, & 685 total_rho=total_rho, & 686 task_list_external=task_list_sub, & 687 pw_env_external=pw_env_sub, & 688 ks_env=ks_env) 689 ! calculate associated hartree potential 690 ! CALL pw_transfer(rho_r%pw, rho_g%pw) 691 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1 692 CALL pw_poisson_solve(poisson_env, rho_g%pw, pair_energy, pot_g%pw) 693 CALL pw_transfer(pot_g%pw, rho_r%pw) 694 CALL pw_scale(rho_r%pw, rho_r%pw%pw_grid%dvol) 695 CALL timestop(handle3) 696 697 IF (use_virial) THEN 698 ! add the volume contribution to the virial coming from 699 ! the 3-center integrals (mu nu|P) 700 CALL timeset(routineN//"_Virial", handle3) 701 e_hartree = 0.0_dp 702 h_stress = 0.0_dp 703 e_hartree = pw_integral_ab(temp_pw_g%pw, pot_g%pw) 704 DO alpha = 1, 3 705 comp = 0 706 comp(alpha) = 1 707 CALL pw_copy(pot_g%pw, rho_g%pw) 708 CALL pw_derive(rho_g%pw, comp) 709 h_stress(alpha, alpha) = -e_hartree 710 DO beta = alpha, 3 711 h_stress(alpha, beta) = h_stress(alpha, beta) & 712 - 2.0_dp*pw_integral_ab(rho_g%pw, dvg(beta)%pw)/fourpi 713 h_stress(beta, alpha) = h_stress(alpha, beta) 714 END DO 715 END DO 716 virial%pv_virial = virial%pv_virial + h_stress/REAL(para_env_sub%num_pe, dp) 717 CALL timestop(handle3) 718 END IF 719 720 ! integrate potential with auxiliary basis function derivatives 721 NULLIFY (rs_v) 722 NULLIFY (rs_descs) 723 CALL pw_env_get(pw_env_sub, rs_descs=rs_descs, rs_grids=rs_v) 724 DO i = 1, SIZE(rs_v) 725 CALL rs_grid_retain(rs_v(i)%rs_grid) 726 END DO 727 CALL potential_pw2rs(rs_v, rho_r, pw_env_sub) 728 729 offset = 0 730 DO iatom = 1, natom 731 ikind = kind_of(iatom) 732 atom_a = atom_of_kind(iatom) 733 CALL get_qs_kind(qs_kind=qs_kind_set(ikind), basis_set=basis_set_a, & 734 basis_type="RI_AUX") 735 736 first_sgfa => basis_set_a%first_sgf 737 la_max => basis_set_a%lmax 738 la_min => basis_set_a%lmin 739 npgfa => basis_set_a%npgf 740 nseta = basis_set_a%nset 741 nsgfa => basis_set_a%nsgf_set 742 rpgfa => basis_set_a%pgf_radius 743 set_radius_a => basis_set_a%set_radius 744 sphi_a => basis_set_a%sphi 745 zeta => basis_set_a%zet 746 747 ra(:) = pbc(particle_set(iatom)%r, cell) 748 rab = 0.0_dp 749 rab2 = 0.0_dp 750 751 force_a(:) = 0.0_dp 752 force_b(:) = 0.0_dp 753 IF (use_virial) THEN 754 my_virial_a = 0.0_dp 755 my_virial_b = 0.0_dp 756 END IF 757 758 DO iset = 1, nseta 759 ncoa = npgfa(iset)*ncoset(la_max(iset)) 760 sgfa = first_sgfa(1, iset) 761 762 ALLOCATE (I_tmp2(ncoa, 1)) 763 I_tmp2 = 0.0_dp 764 ALLOCATE (I_ab(nsgfa(iset), 1)) 765 I_ab = 0.0_dp 766 ALLOCATE (pab(ncoa, 1)) 767 pab = 0.0_dp 768 769 skip_shell = .TRUE. 770 DO iorb = 1, nsgfa(iset) 771 IF (iorb + offset == LLL) THEN 772 I_ab(iorb, 1) = 1.0_dp 773 skip_shell = .FALSE. 774 END IF 775 END DO 776 777 IF (skip_shell) THEN 778 offset = offset + nsgfa(iset) 779 DEALLOCATE (I_tmp2) 780 DEALLOCATE (I_ab) 781 DEALLOCATE (pab) 782 CYCLE 783 END IF 784 785 CALL dgemm("N", "N", ncoa, 1, nsgfa(iset), & 786 1.0_dp, sphi_a(1, sgfa), SIZE(sphi_a, 1), & 787 I_ab(1, 1), SIZE(I_ab, 1), & 788 0.0_dp, pab(1, 1), SIZE(pab, 1)) 789 I_ab = 0.0_dp 790 791 igrid_level = gaussian_gridlevel(pw_env_sub%gridlevel_info, MINVAL(zeta(:, iset))) 792 map_it_here = .FALSE. 793 IF (.NOT. ALL(rs_v(igrid_level)%rs_grid%desc%perd == 1)) THEN 794 DO dir = 1, 3 795 ! bounds of local grid (i.e. removing the 'wings'), if periodic 796 tp(dir) = FLOOR(DOT_PRODUCT(cell%h_inv(dir, :), ra)*rs_v(igrid_level)%rs_grid%desc%npts(dir)) 797 tp(dir) = MODULO(tp(dir), rs_v(igrid_level)%rs_grid%desc%npts(dir)) 798 IF (rs_v(igrid_level)%rs_grid%desc%perd(dir) .NE. 1) THEN 799 lb(dir) = rs_v(igrid_level)%rs_grid%lb_local(dir) + rs_v(igrid_level)%rs_grid%desc%border 800 ub(dir) = rs_v(igrid_level)%rs_grid%ub_local(dir) - rs_v(igrid_level)%rs_grid%desc%border 801 ELSE 802 lb(dir) = rs_v(igrid_level)%rs_grid%lb_local(dir) 803 ub(dir) = rs_v(igrid_level)%rs_grid%ub_local(dir) 804 ENDIF 805 ! distributed grid, only map if it is local to the grid 806 location(dir) = tp(dir) + rs_v(igrid_level)%rs_grid%desc%lb(dir) 807 ENDDO 808 IF (lb(1) <= location(1) .AND. location(1) <= ub(1) .AND. & 809 lb(2) <= location(2) .AND. location(2) <= ub(2) .AND. & 810 lb(3) <= location(3) .AND. location(3) <= ub(3)) THEN 811 map_it_here = .TRUE. 812 ENDIF 813 ELSE 814 ! not distributed, just a round-robin distribution over the full set of CPUs 815 IF (MODULO(offset, para_env_sub%num_pe) == para_env_sub%mepos) map_it_here = .TRUE. 816 ENDIF 817 818 offset = offset + nsgfa(iset) 819 820 IF (map_it_here) THEN 821 DO ipgf = 1, npgfa(iset) 822 na1 = (ipgf - 1)*ncoset(la_max(iset)) + 1 823 na2 = ipgf*ncoset(la_max(iset)) 824 igrid_level = gaussian_gridlevel(pw_env_sub%gridlevel_info, zeta(ipgf, iset)) 825 826 CALL integrate_pgf_product_rspace(la_max=la_max(iset), zeta=zeta(ipgf, iset)/2.0_dp, la_min=la_min(iset), & 827 lb_max=0, zetb=zeta(ipgf, iset)/2.0_dp, lb_min=0, & 828 ra=ra, rab=rab, rab2=rab2, & 829 rsgrid=rs_v(igrid_level)%rs_grid, & 830 cell=cell, & 831 cube_info=pw_env_sub%cube_info(igrid_level), & 832 hab=I_tmp2, & 833 pab=pab, & 834 o1=na1 - 1, & 835 o2=0, & 836 map_consistent=.TRUE., & 837 eps_gvg_rspace=dft_control%qs_control%eps_gvg_rspace, & 838 calculate_forces=.TRUE., & 839 force_a=force_a, force_b=force_b, & 840 use_virial=use_virial, my_virial_a=my_virial_a, my_virial_b=my_virial_b) 841 END DO 842 ! CALL dgemm("T","N".0_dp,sphi_a(1,sgfa),SIZE(sphi_a,1),& 843 ! I_tmp2(1,1),SIZE(I_tmp2,1),& 844 ! 1.0_dp,I_ab(1,1),SIZE(I_ab,1)) 845 ! L_local_col(offset-nsgfa(iset)+1:offset,i_counter)=I_ab(1:nsgfa(iset),1) 846 END IF 847 848 DEALLOCATE (I_tmp2) 849 DEALLOCATE (I_ab) 850 DEALLOCATE (pab) 851 852 END DO 853 854 force(ikind)%rho_elec(:, atom_a) = & 855 force(ikind)%rho_elec(:, atom_a) + force_a(:) + force_b(:) 856 IF (use_virial) THEN 857 virial%pv_virial = virial%pv_virial + my_virial_a + my_virial_b 858 END IF 859 END DO 860 861 DO i = 1, SIZE(rs_v) 862 CALL rs_grid_release(rs_v(i)%rs_grid) 863 END DO 864 865 END DO 866 867 CALL timestop(handle2) 868 869 DEALLOCATE (kind_of) 870 DEALLOCATE (atom_of_kind) 871 DEALLOCATE (wf_vector) 872 873 IF (alpha_beta) THEN 874 CALL pw_pool_give_back_pw(auxbas_pw_pool, psi_L_beta%pw) 875 ENDIF 876 877 IF (use_virial) THEN 878 CALL pw_pool_give_back_pw(auxbas_pw_pool, temp_pw_g%pw) 879 DO i = 1, 3 880 CALL pw_pool_give_back_pw(auxbas_pw_pool, dvg(i)%pw) 881 END DO 882 END IF 883 884 CALL cleanup_gpw(qs_env, e_cutoff_old, cutoff_old, relative_cutoff_old, pw_env_sub, & 885 task_list_sub, auxbas_pw_pool, rho_r, rho_g, pot_g, psi_L) 886 887 CALL dbcsr_release(matrix_P_munu%matrix) 888 DEALLOCATE (matrix_P_munu%matrix) 889 890 ENDIF 891 892 DEALLOCATE (G_PQ_local) 893 894 CALL dbcsr_release(matrix_P_inu%matrix) 895 DEALLOCATE (matrix_P_inu%matrix) 896 897 CALL dbcsr_release(matrix_P_munu_nosym%matrix) 898 DEALLOCATE (matrix_P_munu_nosym%matrix) 899 900 ! release the full gamma_P_ia structure 901 DEALLOCATE (G_P_ia) 902 903 ! Release G_P_ia_beta and and matrix_P_inu_beta 904 IF (alpha_beta) THEN 905 CALL dbcsr_release(matrix_P_inu_beta%matrix) 906 DEALLOCATE (matrix_P_inu_beta%matrix) 907 DEALLOCATE (G_P_ia_beta) 908 ENDIF 909 910 ! move the forces in the correct place 911 IF (eri_method .EQ. do_eri_gpw) THEN 912 DO ikind = 1, SIZE(force) 913 force(ikind)%mp2_non_sep(:, :) = force(ikind)%rho_elec(:, :) 914 force(ikind)%rho_elec(:, :) = 0.0_dp 915 END DO 916 ENDIF 917 918 ! Now we move from the local matrices to the global ones 919 ! defined over all MPI tasks 920 ! Start with moving from the DBCSR to FM for the lagrangians 921 NULLIFY (L1_mu_i, fm_struct_tmp) 922 CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env_sub, context=blacs_env_sub, & 923 nrow_global=dimen, ncol_global=homo) 924 CALL cp_fm_create(L1_mu_i, fm_struct_tmp, name="Lag_mu_i") 925 CALL cp_fm_struct_release(fm_struct_tmp) 926 CALL cp_fm_set_all(L1_mu_i, 0.0_dp) 927 CALL copy_dbcsr_to_fm(matrix=Lag_mu_i_1%matrix, fm=L1_mu_i) 928 929 ! release Lag_mu_i_1 930 CALL dbcsr_release(Lag_mu_i_1%matrix) 931 DEALLOCATE (Lag_mu_i_1%matrix) 932 933 NULLIFY (L2_nu_a, fm_struct_tmp) 934 CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env_sub, context=blacs_env_sub, & 935 nrow_global=dimen, ncol_global=virtual) 936 CALL cp_fm_create(L2_nu_a, fm_struct_tmp, name="Lag_nu_a") 937 CALL cp_fm_struct_release(fm_struct_tmp) 938 CALL cp_fm_set_all(L2_nu_a, 0.0_dp) 939 CALL copy_dbcsr_to_fm(matrix=Lag_nu_a_2%matrix, fm=L2_nu_a) 940 941 ! release Lag_nu_a_2 942 CALL dbcsr_release(Lag_nu_a_2%matrix) 943 DEALLOCATE (Lag_nu_a_2%matrix) 944 945 ! The same for the beta Lagrangians 946 IF (alpha_beta) THEN 947 ! Now we move from the local matrices to the global ones 948 ! defined over all MPI tasks 949 ! Start with moving from the DBCSR to FM for the lagrangians 950 NULLIFY (L1_mu_i_beta, fm_struct_tmp) 951 CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env_sub, context=blacs_env_sub, & 952 nrow_global=dimen, ncol_global=homo_beta) 953 CALL cp_fm_create(L1_mu_i_beta, fm_struct_tmp, name="Lag_mu_i_beta") 954 CALL cp_fm_struct_release(fm_struct_tmp) 955 CALL cp_fm_set_all(L1_mu_i_beta, 0.0_dp) 956 CALL copy_dbcsr_to_fm(matrix=Lag_mu_i_1_beta%matrix, fm=L1_mu_i_beta) 957 958 ! release Lag_mu_i_1 959 CALL dbcsr_release(Lag_mu_i_1_beta%matrix) 960 DEALLOCATE (Lag_mu_i_1_beta%matrix) 961 962 NULLIFY (L2_nu_a_beta, fm_struct_tmp) 963 CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env_sub, context=blacs_env_sub, & 964 nrow_global=dimen, ncol_global=virtual_beta) 965 CALL cp_fm_create(L2_nu_a_beta, fm_struct_tmp, name="Lag_nu_a_beta") 966 CALL cp_fm_struct_release(fm_struct_tmp) 967 CALL cp_fm_set_all(L2_nu_a_beta, 0.0_dp) 968 CALL copy_dbcsr_to_fm(matrix=Lag_nu_a_2_beta%matrix, fm=L2_nu_a_beta) 969 970 ! release Lag_nu_a_2 971 CALL dbcsr_release(Lag_nu_a_2_beta%matrix) 972 DEALLOCATE (Lag_nu_a_2_beta%matrix) 973 ENDIF 974 975 ! Set the factor to multiply P_ij (depends on the open or closed shell) 976 factor = 1.0_dp 977 IF (alpha_beta) factor = 0.50_dp 978 979 CALL create_W_P(qs_env, mp2_env, mo_coeff, homo, virtual, dimen, para_env, para_env_sub, & 980 Eigenval, L1_mu_i, L2_nu_a, factor, .FALSE.) 981 ! Alpha_beta_case 982 IF (alpha_beta) THEN 983 CALL create_W_P(qs_env, mp2_env, mo_coeff_beta, homo_beta, virtual_beta, dimen, para_env, & 984 para_env_sub, Eigenval_beta, L1_mu_i_beta, L2_nu_a_beta, & 985 factor, .TRUE.) 986 ENDIF 987 988 CALL timestop(handle) 989 990 END SUBROUTINE calc_ri_mp2_nonsep 991 992! ************************************************************************************************** 993!> \brief ... 994!> \param G_P_munu ... 995!> \param G_P_munu_nosym ... 996!> \param mat_munu ... 997!> \param G_P_ia ... 998!> \param G_P_inu ... 999!> \param mo_coeff_v ... 1000!> \param mo_coeff_o ... 1001!> \param eps_filter ... 1002!> \param G_P_ia_beta ... 1003!> \param G_P_inu_beta ... 1004!> \param mo_coeff_v_beta ... 1005!> \param mo_coeff_o_beta ... 1006! ************************************************************************************************** 1007 SUBROUTINE G_P_transform_MO_to_AO(G_P_munu, G_P_munu_nosym, mat_munu, G_P_ia, G_P_inu, & 1008 mo_coeff_v, mo_coeff_o, eps_filter, & 1009 G_P_ia_beta, G_P_inu_beta, mo_coeff_v_beta, mo_coeff_o_beta) 1010 TYPE(dbcsr_p_type), INTENT(INOUT) :: G_P_munu, G_P_munu_nosym, mat_munu 1011 TYPE(dbcsr_p_type), INTENT(IN) :: G_P_ia 1012 TYPE(dbcsr_p_type), INTENT(INOUT) :: G_P_inu 1013 TYPE(dbcsr_type), INTENT(IN) :: mo_coeff_v, mo_coeff_o 1014 REAL(KIND=dp), INTENT(IN) :: eps_filter 1015 TYPE(dbcsr_p_type), INTENT(IN), OPTIONAL :: G_P_ia_beta 1016 TYPE(dbcsr_p_type), INTENT(INOUT), OPTIONAL :: G_P_inu_beta 1017 TYPE(dbcsr_type), INTENT(IN), OPTIONAL :: mo_coeff_v_beta, mo_coeff_o_beta 1018 1019 CHARACTER(LEN=*), PARAMETER :: routineN = 'G_P_transform_MO_to_AO', & 1020 routineP = moduleN//':'//routineN 1021 1022 INTEGER :: handle 1023 LOGICAL :: alpha_beta 1024 1025 alpha_beta = PRESENT(G_P_ia_beta) .AND. PRESENT(G_P_inu_beta) .AND. PRESENT(mo_coeff_v_beta) .AND. PRESENT(mo_coeff_o_beta) 1026 1027 CALL dbcsr_set(G_P_munu_nosym%matrix, 0.0_dp) 1028 CALL G_P_transform_alpha_beta(G_P_ia, G_P_inu, G_P_munu_nosym, mo_coeff_v, mo_coeff_o, [1.0_dp, 0.0_dp], eps_filter) 1029 1030 IF (alpha_beta) THEN 1031 CALL G_P_transform_alpha_beta(G_P_ia_beta, G_P_inu_beta, G_P_munu_nosym, mo_coeff_v_beta, mo_coeff_o_beta, & 1032 [0.5_dp, 0.5_dp], eps_filter) 1033 ENDIF 1034 1035 ! symmetrize 1036 CALL timeset(routineN//"_symmetrize", handle) 1037 CALL dbcsr_set(G_P_munu%matrix, 0.0_dp) 1038 CALL dbcsr_transposed(G_P_munu%matrix, G_P_munu_nosym%matrix) 1039 CALL dbcsr_add(G_P_munu%matrix, G_P_munu_nosym%matrix, & 1040 alpha_scalar=2.0_dp, beta_scalar=2.0_dp) 1041 ! this is a trick to avoid that integrate_v_rspace starts to cry 1042 CALL dbcsr_copy_into_existing(mat_munu%matrix, G_P_munu%matrix) 1043 CALL dbcsr_copy(G_P_munu%matrix, mat_munu%matrix) 1044 1045 CALL timestop(handle) 1046 1047 END SUBROUTINE G_P_transform_MO_to_AO 1048 1049! ************************************************************************************************** 1050!> \brief ... 1051!> \param G_P_ia ... 1052!> \param G_P_inu ... 1053!> \param G_P_munu ... 1054!> \param mo_coeff_v ... 1055!> \param mo_coeff_o ... 1056!> \param fraction_add ... 1057!> \param eps_filter ... 1058! ************************************************************************************************** 1059 SUBROUTINE G_P_transform_alpha_beta(G_P_ia, G_P_inu, G_P_munu, mo_coeff_v, mo_coeff_o, fraction_add, eps_filter) 1060 TYPE(dbcsr_p_type), INTENT(IN) :: G_P_ia 1061 TYPE(dbcsr_p_type), INTENT(INOUT) :: G_P_inu, G_P_munu 1062 TYPE(dbcsr_type), INTENT(IN) :: mo_coeff_v, mo_coeff_o 1063 REAL(KIND=dp), DIMENSION(2), INTENT(IN) :: fraction_add 1064 REAL(KIND=dp), INTENT(IN) :: eps_filter 1065 1066 CHARACTER(LEN=*), PARAMETER :: routineN = 'G_P_transform_alpha_beta', & 1067 routineP = moduleN//':'//routineN 1068 1069 INTEGER :: handle 1070 1071 CALL timeset(routineN//"_back_v", handle) 1072 ! first back-transformation a->nu 1073 CALL dbcsr_set(G_P_inu%matrix, 0.0_dp) 1074 CALL dbcsr_multiply("N", "T", 1.0_dp, mo_coeff_v, G_P_ia%matrix, & 1075 0.0_dp, G_P_inu%matrix, filter_eps=eps_filter) 1076 CALL timestop(handle) 1077 1078 ! second back-transformation i->mu 1079 CALL timeset(routineN//"_back_o", handle) 1080 CALL dbcsr_multiply("N", "T", fraction_add(1), G_P_inu%matrix, mo_coeff_o, & 1081 fraction_add(2), G_P_munu%matrix, filter_eps=eps_filter) 1082 1083 CALL timestop(handle) 1084 1085 END SUBROUTINE G_P_transform_alpha_beta 1086 1087! ************************************************************************************************** 1088!> \brief ... 1089!> \param mat_munu ... 1090!> \param matrix_P_inu ... 1091!> \param Lag_mu_i_1 ... 1092!> \param G_P_ia ... 1093!> \param mo_coeff_o ... 1094!> \param Lag_nu_a_2 ... 1095!> \param eps_filter ... 1096!> \param matrix_P_inu_beta ... 1097!> \param Lag_mu_i_1_beta ... 1098!> \param G_P_ia_beta ... 1099!> \param mo_coeff_o_beta ... 1100!> \param Lag_nu_a_2_beta ... 1101! ************************************************************************************************** 1102 SUBROUTINE update_lagrangian(mat_munu, matrix_P_inu, Lag_mu_i_1, & 1103 G_P_ia, mo_coeff_o, Lag_nu_a_2, & 1104 eps_filter, & 1105 matrix_P_inu_beta, Lag_mu_i_1_beta, G_P_ia_beta, mo_coeff_o_beta, Lag_nu_a_2_beta) 1106 TYPE(dbcsr_p_type), INTENT(IN) :: mat_munu 1107 TYPE(dbcsr_p_type), INTENT(INOUT) :: matrix_P_inu, Lag_mu_i_1, G_P_ia 1108 TYPE(dbcsr_type), POINTER :: mo_coeff_o 1109 TYPE(dbcsr_p_type), INTENT(INOUT) :: Lag_nu_a_2 1110 REAL(KIND=dp), INTENT(IN) :: eps_filter 1111 TYPE(dbcsr_p_type), INTENT(INOUT), OPTIONAL :: matrix_P_inu_beta, Lag_mu_i_1_beta, & 1112 G_P_ia_beta 1113 TYPE(dbcsr_type), OPTIONAL, POINTER :: mo_coeff_o_beta 1114 TYPE(dbcsr_p_type), INTENT(INOUT), OPTIONAL :: Lag_nu_a_2_beta 1115 1116 CHARACTER(LEN=*), PARAMETER :: routineN = 'update_lagrangian', & 1117 routineP = moduleN//':'//routineN 1118 1119 INTEGER :: handle 1120 LOGICAL :: alpha_beta 1121 1122 ! update lagrangian 1123 CALL timeset(routineN//"_Lag", handle) 1124 1125 alpha_beta = PRESENT(matrix_P_inu_beta) .AND. PRESENT(Lag_mu_i_1_beta) .AND. PRESENT(G_P_ia_beta) & 1126 .AND. PRESENT(mo_coeff_o_beta) .AND. PRESENT(Lag_nu_a_2_beta) 1127 1128 CALL update_lagrangian_alpha_beta(mat_munu, matrix_P_inu, Lag_mu_i_1, & 1129 G_P_ia, mo_coeff_o, Lag_nu_a_2, eps_filter) 1130 IF (alpha_beta) THEN 1131 CALL update_lagrangian_alpha_beta(mat_munu, matrix_P_inu_beta, Lag_mu_i_1_beta, & 1132 G_P_ia_beta, mo_coeff_o_beta, Lag_nu_a_2_beta, eps_filter) 1133 ENDIF 1134 1135 CALL timestop(handle) 1136 1137 END SUBROUTINE update_lagrangian 1138 1139! ************************************************************************************************** 1140!> \brief ... 1141!> \param mat_munu ... 1142!> \param matrix_P_inu ... 1143!> \param Lag_mu_i_1 ... 1144!> \param G_P_ia ... 1145!> \param mo_coeff_o ... 1146!> \param Lag_nu_a_2 ... 1147!> \param eps_filter ... 1148! ************************************************************************************************** 1149 SUBROUTINE update_lagrangian_alpha_beta(mat_munu, matrix_P_inu, Lag_mu_i_1, & 1150 G_P_ia, mo_coeff_o, Lag_nu_a_2, & 1151 eps_filter) 1152 TYPE(dbcsr_p_type), INTENT(IN) :: mat_munu 1153 TYPE(dbcsr_p_type), INTENT(INOUT) :: matrix_P_inu, Lag_mu_i_1, G_P_ia 1154 TYPE(dbcsr_type), POINTER :: mo_coeff_o 1155 TYPE(dbcsr_p_type), INTENT(INOUT) :: Lag_nu_a_2 1156 REAL(KIND=dp), INTENT(IN) :: eps_filter 1157 1158 CHARACTER(LEN=*), PARAMETER :: routineN = 'update_lagrangian_alpha_beta', & 1159 routineP = moduleN//':'//routineN 1160 1161 ! first contract mat_munu with the half back transformed Gamma_i_nu 1162 ! in order to update Lag_mu_i_1 1163 CALL dbcsr_multiply("N", "N", 1.0_dp, mat_munu%matrix, matrix_P_inu%matrix, & 1164 1.0_dp, Lag_mu_i_1%matrix, filter_eps=eps_filter) 1165 1166 ! transform first index of mat_munu and store the result into matrix_P_inu 1167 CALL dbcsr_set(matrix_P_inu%matrix, 0.0_dp) 1168 CALL dbcsr_multiply("N", "N", 1.0_dp, mat_munu%matrix, mo_coeff_o, & 1169 0.0_dp, matrix_P_inu%matrix, filter_eps=eps_filter) 1170 1171 ! contract the transformend matrix_P_inu with the untransformend Gamma_i_a 1172 ! in order to update Lag_nu_a_2 1173 CALL dbcsr_multiply("N", "N", -1.0_dp, matrix_P_inu%matrix, G_P_ia%matrix, & 1174 1.0_dp, Lag_nu_a_2%matrix, filter_eps=eps_filter) 1175 1176 ! release the actual gamma_P_ia 1177 CALL dbcsr_release(G_P_ia%matrix) 1178 DEALLOCATE (G_P_ia%matrix) 1179 1180 END SUBROUTINE update_lagrangian_alpha_beta 1181 1182! ************************************************************************************************** 1183!> \brief ... 1184!> \param qs_env ... 1185!> \param mp2_env ... 1186!> \param mo_coeff ... 1187!> \param homo ... 1188!> \param virtual ... 1189!> \param dimen ... 1190!> \param para_env ... 1191!> \param para_env_sub ... 1192!> \param Eigenval ... 1193!> \param L1_mu_i ... 1194!> \param L2_nu_a ... 1195!> \param factor ... 1196!> \param alpha_beta ... 1197! ************************************************************************************************** 1198 SUBROUTINE create_W_P(qs_env, mp2_env, mo_coeff, homo, virtual, dimen, para_env, para_env_sub, & 1199 Eigenval, L1_mu_i, L2_nu_a, factor, alpha_beta) 1200 TYPE(qs_environment_type), POINTER :: qs_env 1201 TYPE(mp2_type), POINTER :: mp2_env 1202 TYPE(cp_fm_type), POINTER :: mo_coeff 1203 INTEGER, INTENT(IN) :: homo, virtual, dimen 1204 TYPE(cp_para_env_type), POINTER :: para_env, para_env_sub 1205 REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: Eigenval 1206 TYPE(cp_fm_type), POINTER :: L1_mu_i, L2_nu_a 1207 REAL(KIND=dp), INTENT(IN) :: factor 1208 LOGICAL, INTENT(IN) :: alpha_beta 1209 1210 CHARACTER(LEN=*), PARAMETER :: routineN = 'create_W_P', routineP = moduleN//':'//routineN 1211 1212 INTEGER :: color_exchange, comm_exchange, dummy_proc, handle, handle2, handle3, i, i_global, & 1213 i_local, iiB, iii, iproc, itmp(2), j_global, j_local, jjB, max_col_size, max_row_size, & 1214 my_B_virtual_end, my_B_virtual_start, mypcol, mypcol_1i, mypcol_2a, myprow, myprow_1i, & 1215 myprow_2a, ncol_block, ncol_block_1i, ncol_block_2a, ncol_local, ncol_local_1i, & 1216 ncol_local_2a, npcol, npcol_1i, npcol_2a, nprow, nprow_1i, nprow_2a, nrow_block, & 1217 nrow_block_1i, nrow_block_2a, nrow_local, nrow_local_1i, nrow_local_2a, number_of_rec, & 1218 number_of_send, proc_receive, proc_receive_static, proc_send, proc_send_ex 1219 INTEGER :: proc_send_static, proc_send_sub, proc_shift, rec_col_size, rec_counter, & 1220 rec_row_size, send_col_size, send_counter, send_pcol, send_prow, send_row_size, & 1221 size_rec_buffer, size_send_buffer 1222 INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, map_rec_size, map_send_size, pos_info, & 1223 pos_info_ex, proc_2_send_pos, proc_map_ex, req_send, sub_proc_map 1224 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, mepos_2_grid, & 1225 mepos_2_grid_1i, mepos_2_grid_2a, & 1226 sizes, sizes_1i, sizes_2a 1227 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: col_indeces_info_1i, & 1228 col_indeces_info_2a, & 1229 row_indeces_info_1i, & 1230 row_indeces_info_2a 1231 INTEGER, DIMENSION(:), POINTER :: col_indices, col_indices_1i, & 1232 col_indices_2a, row_indices, & 1233 row_indices_1i, row_indices_2a 1234 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: ab_rec, ab_send, mat_rec, mat_send 1235 TYPE(cp_blacs_env_type), POINTER :: blacs_env 1236 TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp 1237 TYPE(cp_fm_type), POINTER :: fm_P_ij, L_mu_q 1238 TYPE(cp_para_env_type), POINTER :: para_env_exchange 1239 TYPE(integ_mat_buffer_type), ALLOCATABLE, & 1240 DIMENSION(:) :: buffer_rec, buffer_send 1241 TYPE(integ_mat_buffer_type_2D), ALLOCATABLE, & 1242 DIMENSION(:) :: buffer_cyclic 1243 1244 CALL timeset(routineN, handle) 1245 1246 ! create the globally distributed mixed lagrangian 1247 NULLIFY (blacs_env) 1248 CALL get_qs_env(qs_env, blacs_env=blacs_env) 1249 1250 NULLIFY (L_mu_q, fm_struct_tmp) 1251 CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, & 1252 nrow_global=dimen, ncol_global=dimen) 1253 CALL cp_fm_create(L_mu_q, fm_struct_tmp, name="Lag_mu_q") 1254 CALL cp_fm_struct_release(fm_struct_tmp) 1255 CALL cp_fm_set_all(L_mu_q, 0.0_dp) 1256 1257 ! create all information array 1258 ALLOCATE (pos_info(0:para_env%num_pe - 1)) 1259 pos_info = 0 1260 pos_info(para_env%mepos) = para_env_sub%mepos 1261 CALL mp_sum(pos_info, para_env%group) 1262 1263 ALLOCATE (sub_proc_map(-para_env_sub%num_pe:2*para_env_sub%num_pe - 1)) 1264 sub_proc_map = 0 1265 DO i = 0, para_env_sub%num_pe - 1 1266 sub_proc_map(i) = i 1267 sub_proc_map(-i - 1) = para_env_sub%num_pe - i - 1 1268 sub_proc_map(para_env_sub%num_pe + i) = i 1269 END DO 1270 1271 ! get matrix information for the global 1272 CALL cp_fm_get_info(matrix=L_mu_q, & 1273 nrow_local=nrow_local, & 1274 ncol_local=ncol_local, & 1275 row_indices=row_indices, & 1276 col_indices=col_indices, & 1277 nrow_block=nrow_block, & 1278 ncol_block=ncol_block) 1279 myprow = L_mu_q%matrix_struct%context%mepos(1) 1280 mypcol = L_mu_q%matrix_struct%context%mepos(2) 1281 nprow = L_mu_q%matrix_struct%context%num_pe(1) 1282 npcol = L_mu_q%matrix_struct%context%num_pe(2) 1283 1284 ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1)) 1285 grid_2_mepos = 0 1286 grid_2_mepos(myprow, mypcol) = para_env%mepos 1287 CALL mp_sum(grid_2_mepos, para_env%group) 1288 1289 ! get matrix information for L1_mu_i 1290 CALL cp_fm_get_info(matrix=L1_mu_i, & 1291 nrow_local=nrow_local_1i, & 1292 ncol_local=ncol_local_1i, & 1293 row_indices=row_indices_1i, & 1294 col_indices=col_indices_1i, & 1295 nrow_block=nrow_block_1i, & 1296 ncol_block=ncol_block_1i) 1297 myprow_1i = L1_mu_i%matrix_struct%context%mepos(1) 1298 mypcol_1i = L1_mu_i%matrix_struct%context%mepos(2) 1299 nprow_1i = L1_mu_i%matrix_struct%context%num_pe(1) 1300 npcol_1i = L1_mu_i%matrix_struct%context%num_pe(2) 1301 1302 ALLOCATE (mepos_2_grid_1i(0:para_env_sub%num_pe - 1, 2)) 1303 mepos_2_grid_1i = 0 1304 mepos_2_grid_1i(para_env_sub%mepos, 1) = myprow_1i 1305 mepos_2_grid_1i(para_env_sub%mepos, 2) = mypcol_1i 1306 CALL mp_sum(mepos_2_grid_1i, para_env_sub%group) 1307 1308 ALLOCATE (sizes_1i(2, 0:para_env_sub%num_pe - 1)) 1309 sizes_1i = 0 1310 sizes_1i(1, para_env_sub%mepos) = nrow_local_1i 1311 sizes_1i(2, para_env_sub%mepos) = ncol_local_1i 1312 CALL mp_sum(sizes_1i, para_env_sub%group) 1313 1314 ! get matrix information for L2_nu_a 1315 CALL cp_fm_get_info(matrix=L2_nu_a, & 1316 nrow_local=nrow_local_2a, & 1317 ncol_local=ncol_local_2a, & 1318 row_indices=row_indices_2a, & 1319 col_indices=col_indices_2a, & 1320 nrow_block=nrow_block_2a, & 1321 ncol_block=ncol_block_2a) 1322 myprow_2a = L2_nu_a%matrix_struct%context%mepos(1) 1323 mypcol_2a = L2_nu_a%matrix_struct%context%mepos(2) 1324 nprow_2a = L2_nu_a%matrix_struct%context%num_pe(1) 1325 npcol_2a = L2_nu_a%matrix_struct%context%num_pe(2) 1326 1327 ALLOCATE (mepos_2_grid_2a(0:para_env_sub%num_pe - 1, 2)) 1328 mepos_2_grid_2a = 0 1329 mepos_2_grid_2a(para_env_sub%mepos, 1) = myprow_2a 1330 mepos_2_grid_2a(para_env_sub%mepos, 2) = mypcol_2a 1331 CALL mp_sum(mepos_2_grid_2a, para_env_sub%group) 1332 1333 ALLOCATE (sizes_2a(2, 0:para_env_sub%num_pe - 1)) 1334 sizes_2a = 0 1335 sizes_2a(1, para_env_sub%mepos) = nrow_local_2a 1336 sizes_2a(2, para_env_sub%mepos) = ncol_local_2a 1337 CALL mp_sum(sizes_2a, para_env_sub%group) 1338 1339 ! Here we perform a ring communication scheme taking into account 1340 ! for the sub-group distribution of the source matrices. 1341 ! as a first step we need to redistribute the data within 1342 ! the subgroup. 1343 ! In order to do so we have to allocate the structure 1344 ! that will hold the local data involved in communication, this 1345 ! structure will be the same for processes in different subgroups 1346 ! sharing the same position in the subgroup. 1347 ! -1) create the exchange para_env 1348 color_exchange = para_env_sub%mepos 1349 CALL mp_comm_split_direct(para_env%group, comm_exchange, color_exchange) 1350 NULLIFY (para_env_exchange) 1351 CALL cp_para_env_create(para_env_exchange, comm_exchange) 1352 ! crate the proc maps exchange and info 1353 ALLOCATE (proc_map_ex(-para_env_exchange%num_pe:2*para_env_exchange%num_pe - 1)) 1354 DO i = 0, para_env_exchange%num_pe - 1 1355 proc_map_ex(i) = i 1356 proc_map_ex(-i - 1) = para_env_exchange%num_pe - i - 1 1357 proc_map_ex(para_env_exchange%num_pe + i) = i 1358 END DO 1359 ALLOCATE (pos_info_ex(0:para_env%num_pe - 1)) 1360 pos_info_ex = 0 1361 pos_info_ex(para_env%mepos) = para_env_exchange%mepos 1362 CALL mp_sum(pos_info_ex, para_env%group) 1363 ALLOCATE (sizes(2, 0:para_env_exchange%num_pe - 1)) 1364 sizes = 0 1365 sizes(1, para_env_exchange%mepos) = nrow_local 1366 sizes(2, para_env_exchange%mepos) = ncol_local 1367 CALL mp_sum(sizes, para_env_exchange%group) 1368 1369 ! 0) store some info about indeces of the fm matrices (subgroup) 1370 CALL timeset(routineN//"_inx", handle2) 1371 ! matrix L1_mu_i 1372 max_row_size = MAXVAL(sizes_1i(1, :)) 1373 max_col_size = MAXVAL(sizes_1i(2, :)) 1374 ALLOCATE (row_indeces_info_1i(2, max_row_size, 0:para_env_sub%num_pe - 1)) 1375 ALLOCATE (col_indeces_info_1i(2, max_col_size, 0:para_env_sub%num_pe - 1)) 1376 row_indeces_info_1i = 0 1377 col_indeces_info_1i = 0 1378 dummy_proc = 0 1379 ! row 1380 DO iiB = 1, nrow_local_1i 1381 i_global = row_indices_1i(iiB) 1382 send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, & 1383 L_mu_q%matrix_struct%first_p_pos(1), nprow) 1384 i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, & 1385 L_mu_q%matrix_struct%first_p_pos(1), nprow) 1386 row_indeces_info_1i(1, iiB, para_env_sub%mepos) = send_prow 1387 row_indeces_info_1i(2, iiB, para_env_sub%mepos) = i_local 1388 END DO 1389 ! col 1390 DO jjB = 1, ncol_local_1i 1391 j_global = col_indices_1i(jjB) 1392 send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, & 1393 L_mu_q%matrix_struct%first_p_pos(2), npcol) 1394 j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, & 1395 L_mu_q%matrix_struct%first_p_pos(2), npcol) 1396 col_indeces_info_1i(1, jjB, para_env_sub%mepos) = send_pcol 1397 col_indeces_info_1i(2, jjB, para_env_sub%mepos) = j_local 1398 END DO 1399 CALL mp_sum(row_indeces_info_1i, para_env_sub%group) 1400 CALL mp_sum(col_indeces_info_1i, para_env_sub%group) 1401 1402 ! matrix L2_nu_a 1403 max_row_size = MAXVAL(sizes_2a(1, :)) 1404 max_col_size = MAXVAL(sizes_2a(2, :)) 1405 ALLOCATE (row_indeces_info_2a(2, max_row_size, 0:para_env_sub%num_pe - 1)) 1406 ALLOCATE (col_indeces_info_2a(2, max_col_size, 0:para_env_sub%num_pe - 1)) 1407 row_indeces_info_2a = 0 1408 col_indeces_info_2a = 0 1409 ! row 1410 DO iiB = 1, nrow_local_2a 1411 i_global = row_indices_2a(iiB) 1412 send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, & 1413 L_mu_q%matrix_struct%first_p_pos(1), nprow) 1414 i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, & 1415 L_mu_q%matrix_struct%first_p_pos(1), nprow) 1416 row_indeces_info_2a(1, iiB, para_env_sub%mepos) = send_prow 1417 row_indeces_info_2a(2, iiB, para_env_sub%mepos) = i_local 1418 END DO 1419 ! col 1420 DO jjB = 1, ncol_local_2a 1421 j_global = col_indices_2a(jjB) + homo 1422 send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, & 1423 L_mu_q%matrix_struct%first_p_pos(2), npcol) 1424 j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, & 1425 L_mu_q%matrix_struct%first_p_pos(2), npcol) 1426 col_indeces_info_2a(1, jjB, para_env_sub%mepos) = send_pcol 1427 col_indeces_info_2a(2, jjB, para_env_sub%mepos) = j_local 1428 END DO 1429 CALL mp_sum(row_indeces_info_2a, para_env_sub%group) 1430 CALL mp_sum(col_indeces_info_2a, para_env_sub%group) 1431 CALL timestop(handle2) 1432 1433 ! 1) define the map for sending data in the subgroup starting with L1_mu_i 1434 CALL timeset(routineN//"_subinfo", handle2) 1435 ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1)) 1436 map_send_size = 0 1437 DO jjB = 1, ncol_local_1i 1438 ! j_global=col_indices_1i(jjB) 1439 ! send_pcol=cp_fm_indxg2p(j_global,ncol_block,dummy_proc,& 1440 ! L_mu_q%matrix_struct%first_p_pos(2),npcol) 1441 send_pcol = col_indeces_info_1i(1, jjB, para_env_sub%mepos) 1442 DO iiB = 1, nrow_local_1i 1443 ! i_global=row_indices_1i(iiB) 1444 ! send_prow=cp_fm_indxg2p(i_global,nrow_block,dummy_proc,& 1445 ! L_mu_q%matrix_struct%first_p_pos(1),nprow) 1446 send_prow = row_indeces_info_1i(1, iiB, para_env_sub%mepos) 1447 proc_send = grid_2_mepos(send_prow, send_pcol) 1448 proc_send_sub = pos_info(proc_send) 1449 map_send_size(proc_send_sub) = map_send_size(proc_send_sub) + 1 1450 END DO 1451 END DO 1452 ! and the same for L2_nu_a 1453 DO jjB = 1, ncol_local_2a 1454 ! j_global=col_indices_2a(jjB)+homo 1455 ! send_pcol=cp_fm_indxg2p(j_global,ncol_block,dummy_proc,& 1456 ! L_mu_q%matrix_struct%first_p_pos(2),npcol) 1457 send_pcol = col_indeces_info_2a(1, jjB, para_env_sub%mepos) 1458 DO iiB = 1, nrow_local_2a 1459 ! i_global=row_indices_2a(iiB) 1460 ! send_prow=cp_fm_indxg2p(i_global,nrow_block,dummy_proc,& 1461 ! L_mu_q%matrix_struct%first_p_pos(1),nprow) 1462 send_prow = row_indeces_info_2a(1, iiB, para_env_sub%mepos) 1463 proc_send = grid_2_mepos(send_prow, send_pcol) 1464 proc_send_sub = pos_info(proc_send) 1465 map_send_size(proc_send_sub) = map_send_size(proc_send_sub) + 1 1466 END DO 1467 END DO 1468 ! and exchange data in order to create map_rec_size 1469 ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1)) 1470 map_rec_size = 0 1471 CALL mp_alltoall(map_send_size, map_rec_size, 1, para_env_sub%group) 1472 CALL timestop(handle2) 1473 1474 ! 2) reorder data in sending buffer 1475 CALL timeset(routineN//"_sub_Bsend", handle2) 1476 ! count the number of messages (include myself) 1477 number_of_send = 0 1478 DO proc_shift = 0, para_env_sub%num_pe - 1 1479 proc_send = sub_proc_map(para_env_sub%mepos + proc_shift) 1480 IF (map_send_size(proc_send) > 0) THEN 1481 number_of_send = number_of_send + 1 1482 END IF 1483 END DO 1484 ! allocate the structure that will hold the messages to be sent 1485 ALLOCATE (buffer_send(number_of_send)) 1486 send_counter = 0 1487 ALLOCATE (proc_2_send_pos(0:para_env_sub%num_pe - 1)) 1488 proc_2_send_pos = 0 1489 DO proc_shift = 0, para_env_sub%num_pe - 1 1490 proc_send = sub_proc_map(para_env_sub%mepos + proc_shift) 1491 size_send_buffer = map_send_size(proc_send) 1492 IF (map_send_size(proc_send) > 0) THEN 1493 send_counter = send_counter + 1 1494 ! allocate the sending buffer (msg) 1495 ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer)) 1496 buffer_send(send_counter)%msg = 0.0_dp 1497 buffer_send(send_counter)%proc = proc_send 1498 proc_2_send_pos(proc_send) = send_counter 1499 END IF 1500 END DO 1501 ! loop over the locally held data and fill the buffer_send 1502 ! for doing that we need an array that keep track if the 1503 ! sequential increase of the index for each message 1504 ALLOCATE (iii_vet(number_of_send)) 1505 iii_vet = 0 1506 DO jjB = 1, ncol_local_1i 1507 ! j_global=col_indices_1i(jjB) 1508 ! send_pcol=cp_fm_indxg2p(j_global,ncol_block,dummy_proc,& 1509 ! L_mu_q%matrix_struct%first_p_pos(2),npcol) 1510 send_pcol = col_indeces_info_1i(1, jjB, para_env_sub%mepos) 1511 DO iiB = 1, nrow_local_1i 1512 ! i_global=row_indices_1i(iiB) 1513 ! send_prow=cp_fm_indxg2p(i_global,nrow_block,dummy_proc,& 1514 ! L_mu_q%matrix_struct%first_p_pos(1),nprow) 1515 send_prow = row_indeces_info_1i(1, iiB, para_env_sub%mepos) 1516 proc_send = grid_2_mepos(send_prow, send_pcol) 1517 proc_send_sub = pos_info(proc_send) 1518 send_counter = proc_2_send_pos(proc_send_sub) 1519 iii_vet(send_counter) = iii_vet(send_counter) + 1 1520 iii = iii_vet(send_counter) 1521 buffer_send(send_counter)%msg(iii) = L1_mu_i%local_data(iiB, jjB) 1522 END DO 1523 END DO 1524 ! release the local data of L1_mu_i 1525 DEALLOCATE (L1_mu_i%local_data) 1526 ! and the same for L2_nu_a 1527 DO jjB = 1, ncol_local_2a 1528 ! j_global=col_indices_2a(jjB)+homo 1529 ! send_pcol=cp_fm_indxg2p(j_global,ncol_block,dummy_proc,& 1530 ! L_mu_q%matrix_struct%first_p_pos(2),npcol) 1531 send_pcol = col_indeces_info_2a(1, jjB, para_env_sub%mepos) 1532 DO iiB = 1, nrow_local_2a 1533 ! i_global=row_indices_2a(iiB) 1534 ! send_prow=cp_fm_indxg2p(i_global,nrow_block,dummy_proc,& 1535 ! L_mu_q%matrix_struct%first_p_pos(1),nprow) 1536 send_prow = row_indeces_info_2a(1, iiB, para_env_sub%mepos) 1537 proc_send = grid_2_mepos(send_prow, send_pcol) 1538 proc_send_sub = pos_info(proc_send) 1539 send_counter = proc_2_send_pos(proc_send_sub) 1540 iii_vet(send_counter) = iii_vet(send_counter) + 1 1541 iii = iii_vet(send_counter) 1542 buffer_send(send_counter)%msg(iii) = L2_nu_a%local_data(iiB, jjB) 1543 END DO 1544 END DO 1545 DEALLOCATE (L2_nu_a%local_data) 1546 DEALLOCATE (proc_2_send_pos) 1547 DEALLOCATE (iii_vet) 1548 CALL timestop(handle2) 1549 1550 ! 3) create the buffer for receive, post the message with irecv 1551 ! and send the messages with mp_isend 1552 CALL timeset(routineN//"_sub_isendrecv", handle2) 1553 ! count the number of messages to be received 1554 number_of_rec = 0 1555 DO proc_shift = 0, para_env_sub%num_pe - 1 1556 proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift) 1557 IF (map_rec_size(proc_receive) > 0) THEN 1558 number_of_rec = number_of_rec + 1 1559 END IF 1560 END DO 1561 ALLOCATE (buffer_rec(number_of_rec)) 1562 rec_counter = 0 1563 DO proc_shift = 0, para_env_sub%num_pe - 1 1564 proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift) 1565 size_rec_buffer = map_rec_size(proc_receive) 1566 IF (map_rec_size(proc_receive) > 0) THEN 1567 rec_counter = rec_counter + 1 1568 ! prepare the buffer for receive 1569 ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer)) 1570 buffer_rec(rec_counter)%msg = 0.0_dp 1571 buffer_rec(rec_counter)%proc = proc_receive 1572 ! post the message to be received (not need to send to myself) 1573 IF (proc_receive /= para_env_sub%mepos) THEN 1574 CALL mp_irecv(buffer_rec(rec_counter)%msg, proc_receive, para_env_sub%group, & 1575 buffer_rec(rec_counter)%msg_req) 1576 END IF 1577 END IF 1578 END DO 1579 ! send messages 1580 ALLOCATE (req_send(number_of_send)) 1581 req_send = mp_request_null 1582 send_counter = 0 1583 DO proc_shift = 0, para_env_sub%num_pe - 1 1584 proc_send = sub_proc_map(para_env_sub%mepos + proc_shift) 1585 IF (map_send_size(proc_send) > 0) THEN 1586 send_counter = send_counter + 1 1587 IF (proc_send == para_env_sub%mepos) THEN 1588 buffer_rec(send_counter)%msg = buffer_send(send_counter)%msg 1589 ELSE 1590 CALL mp_isend(buffer_send(send_counter)%msg, proc_send, para_env_sub%group, & 1591 buffer_send(send_counter)%msg_req) 1592 req_send(send_counter) = buffer_send(send_counter)%msg_req 1593 END IF 1594 END IF 1595 END DO 1596 DEALLOCATE (map_send_size) 1597 CALL timestop(handle2) 1598 1599 ! 4) (if memory is a problem we should move this part after point 5) 1600 ! Here we create the new buffer for cyclic(ring) communication and 1601 ! we fill it with the data received from the other member of the 1602 ! subgroup 1603 CALL timeset(routineN//"_Bcyclic", handle2) 1604 ! first allocata new structure 1605 ALLOCATE (buffer_cyclic(0:para_env_exchange%num_pe - 1)) 1606 DO iproc = 0, para_env_exchange%num_pe - 1 1607 rec_row_size = sizes(1, iproc) 1608 rec_col_size = sizes(2, iproc) 1609 ALLOCATE (buffer_cyclic(iproc)%msg(rec_row_size, rec_col_size)) 1610 buffer_cyclic(iproc)%msg = 0.0_dp 1611 END DO 1612 ! now collect data from other member of the subgroup and fill 1613 ! buffer_cyclic 1614 rec_counter = 0 1615 DO proc_shift = 0, para_env_sub%num_pe - 1 1616 proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift) 1617 size_rec_buffer = map_rec_size(proc_receive) 1618 IF (map_rec_size(proc_receive) > 0) THEN 1619 rec_counter = rec_counter + 1 1620 1621 ! wait for the message 1622 IF (proc_receive /= para_env_sub%mepos) CALL mp_wait(buffer_rec(rec_counter)%msg_req) 1623 1624 CALL timeset(routineN//"_fill", handle3) 1625 iii = 0 1626 DO jjB = 1, sizes_1i(2, proc_receive) 1627 ! j_global=cp_fm_indxl2g(jjB,ncol_block_1i,mepos_2_grid_1i(proc_receive,2),& 1628 ! L1_mu_i%matrix_struct%first_p_pos(2),npcol_1i) 1629 ! send_pcol=cp_fm_indxg2p(j_global,ncol_block,dummy_proc,& 1630 ! L_mu_q%matrix_struct%first_p_pos(2),npcol) 1631 ! j_local=cp_fm_indxg2l(j_global,ncol_block,dummy_proc,& 1632 ! L_mu_q%matrix_struct%first_p_pos(2),npcol) 1633 send_pcol = col_indeces_info_1i(1, jjB, proc_receive) 1634 j_local = col_indeces_info_1i(2, jjB, proc_receive) 1635 DO iiB = 1, sizes_1i(1, proc_receive) 1636 ! i_global=cp_fm_indxl2g(iiB,nrow_block_1i,mepos_2_grid_1i(proc_receive,1),& 1637 ! L1_mu_i%matrix_struct%first_p_pos(1),nprow_1i) 1638 ! send_prow=cp_fm_indxg2p(i_global,nrow_block,dummy_proc,& 1639 ! L_mu_q%matrix_struct%first_p_pos(1),nprow) 1640 send_prow = row_indeces_info_1i(1, iiB, proc_receive) 1641 proc_send = grid_2_mepos(send_prow, send_pcol) 1642 proc_send_sub = pos_info(proc_send) 1643 IF (proc_send_sub /= para_env_sub%mepos) CYCLE 1644 iii = iii + 1 1645 ! i_local=cp_fm_indxg2l(i_global,nrow_block,dummy_proc,& 1646 ! L_mu_q%matrix_struct%first_p_pos(1),nprow) 1647 i_local = row_indeces_info_1i(2, iiB, proc_receive) 1648 proc_send_ex = pos_info_ex(proc_send) 1649 buffer_cyclic(proc_send_ex)%msg(i_local, j_local) = buffer_rec(rec_counter)%msg(iii) 1650 END DO 1651 END DO 1652 ! and the same for L2_nu_a 1653 DO jjB = 1, sizes_2a(2, proc_receive) 1654 ! j_global=cp_fm_indxl2g(jjB,ncol_block_2a,mepos_2_grid_2a(proc_receive,2),& 1655 ! L2_nu_a%matrix_struct%first_p_pos(2),npcol_2a)+homo 1656 ! send_pcol=cp_fm_indxg2p(j_global,ncol_block,dummy_proc,& 1657 ! L_mu_q%matrix_struct%first_p_pos(2),npcol) 1658 ! j_local=cp_fm_indxg2l(j_global,ncol_block,dummy_proc,& 1659 ! L_mu_q%matrix_struct%first_p_pos(2),npcol) 1660 send_pcol = col_indeces_info_2a(1, jjB, proc_receive) 1661 j_local = col_indeces_info_2a(2, jjB, proc_receive) 1662 DO iiB = 1, sizes_2a(1, proc_receive) 1663 ! i_global=cp_fm_indxl2g(iiB,nrow_block_2a,mepos_2_grid_2a(proc_receive,1),& 1664 ! L2_nu_a%matrix_struct%first_p_pos(1),nprow_2a) 1665 ! send_prow=cp_fm_indxg2p(i_global,nrow_block,dummy_proc,& 1666 ! L_mu_q%matrix_struct%first_p_pos(1),nprow) 1667 send_prow = row_indeces_info_2a(1, iiB, proc_receive) 1668 proc_send = grid_2_mepos(send_prow, send_pcol) 1669 proc_send_sub = pos_info(proc_send) 1670 IF (proc_send_sub /= para_env_sub%mepos) CYCLE 1671 iii = iii + 1 1672 ! i_local=cp_fm_indxg2l(i_global,nrow_block,dummy_proc,& 1673 ! L_mu_q%matrix_struct%first_p_pos(1),nprow) 1674 i_local = row_indeces_info_2a(2, iiB, proc_receive) 1675 proc_send_ex = pos_info_ex(proc_send) 1676 buffer_cyclic(proc_send_ex)%msg(i_local, j_local) = buffer_rec(rec_counter)%msg(iii) 1677 END DO 1678 END DO 1679 CALL timestop(handle3) 1680 1681 ! deallocate the received message 1682 DEALLOCATE (buffer_rec(rec_counter)%msg) 1683 END IF 1684 END DO 1685 DEALLOCATE (row_indeces_info_1i) 1686 DEALLOCATE (col_indeces_info_1i) 1687 DEALLOCATE (row_indeces_info_2a) 1688 DEALLOCATE (col_indeces_info_2a) 1689 DEALLOCATE (buffer_rec) 1690 DEALLOCATE (map_rec_size) 1691 CALL timestop(handle2) 1692 1693 ! 5) Wait for all messeges to be sent in the subgroup 1694 CALL timeset(routineN//"_sub_waitall", handle2) 1695 CALL mp_waitall(req_send(:)) 1696 DO send_counter = 1, number_of_send 1697 DEALLOCATE (buffer_send(send_counter)%msg) 1698 END DO 1699 DEALLOCATE (buffer_send) 1700 DEALLOCATE (req_send) 1701 CALL timestop(handle2) 1702 1703 ! 6) Start with ring communication 1704 CALL timeset(routineN//"_ring", handle2) 1705 proc_send_static = proc_map_ex(para_env_exchange%mepos + 1) 1706 proc_receive_static = proc_map_ex(para_env_exchange%mepos - 1) 1707 max_row_size = MAXVAL(sizes(1, :)) 1708 max_col_size = MAXVAL(sizes(2, :)) 1709 ALLOCATE (mat_send(max_row_size, max_col_size)) 1710 ALLOCATE (mat_rec(max_row_size, max_col_size)) 1711 mat_send = 0.0_dp 1712 mat_send(1:nrow_local, 1:ncol_local) = buffer_cyclic(para_env_exchange%mepos)%msg(:, :) 1713 DEALLOCATE (buffer_cyclic(para_env_exchange%mepos)%msg) 1714 DO proc_shift = 1, para_env_exchange%num_pe - 1 1715 proc_send = proc_map_ex(para_env_exchange%mepos + proc_shift) 1716 proc_receive = proc_map_ex(para_env_exchange%mepos - proc_shift) 1717 1718 rec_row_size = sizes(1, proc_receive) 1719 rec_col_size = sizes(2, proc_receive) 1720 1721 mat_rec = 0.0_dp 1722 CALL mp_sendrecv(mat_send, proc_send_static, & 1723 mat_rec, proc_receive_static, & 1724 para_env_exchange%group) 1725 1726 mat_send = 0.0_dp 1727 mat_send(1:rec_row_size, 1:rec_col_size) = mat_rec(1:rec_row_size, 1:rec_col_size) + & 1728 buffer_cyclic(proc_receive)%msg(:, :) 1729 1730 DEALLOCATE (buffer_cyclic(proc_receive)%msg) 1731 END DO 1732 ! and finaly 1733 CALL mp_sendrecv(mat_send, proc_send_static, & 1734 mat_rec, proc_receive_static, & 1735 para_env_exchange%group) 1736 L_mu_q%local_data(1:nrow_local, 1:ncol_local) = mat_rec(1:nrow_local, 1:ncol_local) 1737 DEALLOCATE (buffer_cyclic) 1738 DEALLOCATE (mat_send) 1739 DEALLOCATE (mat_rec) 1740 CALL timestop(handle2) 1741 1742 DEALLOCATE (proc_map_ex) 1743 ! release para_env_exchange 1744 CALL cp_para_env_release(para_env_exchange) 1745 1746 CALL cp_fm_release(L1_mu_i) 1747 CALL cp_fm_release(L2_nu_a) 1748 DEALLOCATE (pos_info_ex) 1749 DEALLOCATE (grid_2_mepos) 1750 DEALLOCATE (mepos_2_grid_1i) 1751 DEALLOCATE (mepos_2_grid_2a) 1752 DEALLOCATE (sizes) 1753 DEALLOCATE (sizes_1i) 1754 DEALLOCATE (sizes_2a) 1755 1756 ! update the P_ij block of P_MP2 with the 1757 ! non-singular ij pairs 1758 CALL timeset(routineN//"_Pij", handle2) 1759 NULLIFY (fm_P_ij, fm_struct_tmp) 1760 CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, & 1761 nrow_global=homo, ncol_global=homo) 1762 CALL cp_fm_create(fm_P_ij, fm_struct_tmp, name="fm_P_ij") 1763 CALL cp_fm_struct_release(fm_struct_tmp) 1764 CALL cp_fm_set_all(fm_P_ij, 0.0_dp) 1765 1766 CALL cp_gemm('T', 'N', homo, homo, dimen, 1.0_dp, & 1767 mo_coeff, L_mu_q, 0.0_dp, fm_P_ij, & 1768 a_first_col=1, & 1769 a_first_row=1, & 1770 b_first_col=1, & 1771 b_first_row=1, & 1772 c_first_col=1, & 1773 c_first_row=1) 1774 ! don't know if it is better to transpose (for communication reasons) 1775 ! or just recompute the transposed matrix 1776 CALL cp_gemm('T', 'N', homo, homo, dimen, -2.0_dp, & 1777 L_mu_q, mo_coeff, 2.0_dp, fm_P_ij, & 1778 a_first_col=1, & 1779 a_first_row=1, & 1780 b_first_col=1, & 1781 b_first_row=1, & 1782 c_first_col=1, & 1783 c_first_row=1) 1784 ! we have it, update P_ij local 1785 CALL cp_fm_get_info(matrix=fm_P_ij, & 1786 nrow_local=nrow_local, & 1787 ncol_local=ncol_local, & 1788 row_indices=row_indices, & 1789 col_indices=col_indices) 1790 DO jjB = 1, ncol_local 1791 j_global = col_indices(jjB) 1792 DO iiB = 1, nrow_local 1793 i_global = row_indices(iiB) 1794 ! diagonal elements alreasy updated 1795 IF (j_global == i_global) CYCLE 1796 ! check if the given element is above the threshold 1797 IF (ABS(Eigenval(j_global) - Eigenval(i_global)) < mp2_env%ri_mp2%eps_canonical) CYCLE 1798 IF (.NOT. alpha_beta) THEN 1799 mp2_env%ri_grad%P_ij(i_global, j_global) = & 1800 factor*fm_P_ij%local_data(iiB, jjB)/(Eigenval(j_global) - Eigenval(i_global)) 1801 ELSE 1802 mp2_env%ri_grad%P_ij_beta(i_global, j_global) = & 1803 factor*fm_P_ij%local_data(iiB, jjB)/(Eigenval(j_global) - Eigenval(i_global)) 1804 ENDIF 1805 END DO 1806 END DO 1807 ! release fm_P_ij 1808 CALL cp_fm_release(fm_P_ij) 1809 ! mp_sum it (we can avoid mp_sum, but for now let's keep it easy) 1810 IF (.NOT. alpha_beta) THEN 1811 CALL mp_sum(mp2_env%ri_grad%P_ij, para_env%group) 1812 ELSE 1813 CALL mp_sum(mp2_env%ri_grad%P_ij_beta, para_env%group) 1814 ENDIF 1815 CALL timestop(handle2) 1816 1817 ! Now create and fill the P matrix (MO) 1818 ! FOR NOW WE ASSUME P_ab AND P_ij ARE REPLICATED OVER EACH MPI RANK 1819 IF (.NOT. alpha_beta) THEN 1820 CALL timeset(routineN//"_PMO", handle2) 1821 NULLIFY (mp2_env%ri_grad%P_mo, fm_struct_tmp) 1822 CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, & 1823 nrow_global=dimen, ncol_global=dimen) 1824 CALL cp_fm_create(mp2_env%ri_grad%P_mo, fm_struct_tmp, name="P_MP2_MO") 1825 CALL cp_fm_set_all(mp2_env%ri_grad%P_mo, 0.0_dp) 1826 ELSE 1827 CALL timeset(routineN//"_PMO", handle2) 1828 NULLIFY (mp2_env%ri_grad%P_mo_beta, fm_struct_tmp) 1829 CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, & 1830 nrow_global=dimen, ncol_global=dimen) 1831 CALL cp_fm_create(mp2_env%ri_grad%P_mo_beta, fm_struct_tmp, name="P_MP2_MO") 1832 CALL cp_fm_set_all(mp2_env%ri_grad%P_mo_beta, 0.0_dp) 1833 ENDIF 1834 1835 ! start with the (easy) occ-occ block and locally held P_ab elements 1836 itmp = get_limit(virtual, para_env_sub%num_pe, para_env_sub%mepos) 1837 my_B_virtual_start = itmp(1) 1838 my_B_virtual_end = itmp(2) 1839 1840 IF (.NOT. alpha_beta) THEN 1841 CALL cp_fm_get_info(matrix=mp2_env%ri_grad%P_mo, & 1842 nrow_local=nrow_local, & 1843 ncol_local=ncol_local, & 1844 row_indices=row_indices, & 1845 col_indices=col_indices, & 1846 nrow_block=nrow_block, & 1847 ncol_block=ncol_block) 1848 myprow = mp2_env%ri_grad%P_mo%matrix_struct%context%mepos(1) 1849 mypcol = mp2_env%ri_grad%P_mo%matrix_struct%context%mepos(2) 1850 nprow = mp2_env%ri_grad%P_mo%matrix_struct%context%num_pe(1) 1851 npcol = mp2_env%ri_grad%P_mo%matrix_struct%context%num_pe(2) 1852 ELSE 1853 CALL cp_fm_get_info(matrix=mp2_env%ri_grad%P_mo_beta, & 1854 nrow_local=nrow_local, & 1855 ncol_local=ncol_local, & 1856 row_indices=row_indices, & 1857 col_indices=col_indices, & 1858 nrow_block=nrow_block, & 1859 ncol_block=ncol_block) 1860 myprow = mp2_env%ri_grad%P_mo_beta%matrix_struct%context%mepos(1) 1861 mypcol = mp2_env%ri_grad%P_mo_beta%matrix_struct%context%mepos(2) 1862 nprow = mp2_env%ri_grad%P_mo_beta%matrix_struct%context%num_pe(1) 1863 npcol = mp2_env%ri_grad%P_mo_beta%matrix_struct%context%num_pe(2) 1864 ENDIF 1865 1866 DO jjB = 1, ncol_local 1867 j_global = col_indices(jjB) 1868 DO iiB = 1, nrow_local 1869 i_global = row_indices(iiB) 1870 IF (.NOT. alpha_beta) THEN 1871 IF (i_global <= homo .AND. j_global <= homo) THEN 1872 mp2_env%ri_grad%P_mo%local_data(iiB, jjB) = mp2_env%ri_grad%P_ij(i_global, j_global) 1873 END IF 1874 IF ((my_B_virtual_start <= i_global - homo .AND. i_global - homo <= my_B_virtual_end) .AND. (j_global > homo)) THEN 1875 mp2_env%ri_grad%P_mo%local_data(iiB, jjB) = & 1876 mp2_env%ri_grad%P_ab(i_global - homo - my_B_virtual_start + 1, j_global - homo) 1877 END IF 1878 ELSE 1879 IF (i_global <= homo .AND. j_global <= homo) THEN 1880 mp2_env%ri_grad%P_mo_beta%local_data(iiB, jjB) = mp2_env%ri_grad%P_ij_beta(i_global, j_global) 1881 END IF 1882 IF ((my_B_virtual_start <= i_global - homo .AND. i_global - homo <= my_B_virtual_end) .AND. (j_global > homo)) THEN 1883 mp2_env%ri_grad%P_mo_beta%local_data(iiB, jjB) = & 1884 mp2_env%ri_grad%P_ab_beta(i_global - homo - my_B_virtual_start + 1, j_global - homo) 1885 END IF 1886 ENDIF 1887 END DO 1888 END DO 1889 ! deallocate the local P_ij 1890 IF (.NOT. alpha_beta) THEN 1891 DEALLOCATE (mp2_env%ri_grad%P_ij) 1892 ELSE 1893 DEALLOCATE (mp2_env%ri_grad%P_ij_beta) 1894 ENDIF 1895 1896 ! send around the sub_group the local data and check if we 1897 ! have to update our block with external elements 1898 ALLOCATE (mepos_2_grid(0:para_env_sub%num_pe - 1, 2)) 1899 mepos_2_grid = 0 1900 mepos_2_grid(para_env_sub%mepos, 1) = myprow 1901 mepos_2_grid(para_env_sub%mepos, 2) = mypcol 1902 CALL mp_sum(mepos_2_grid, para_env_sub%group) 1903 1904 ALLOCATE (sizes(2, 0:para_env_sub%num_pe - 1)) 1905 sizes = 0 1906 sizes(1, para_env_sub%mepos) = nrow_local 1907 sizes(2, para_env_sub%mepos) = ncol_local 1908 CALL mp_sum(sizes, para_env_sub%group) 1909 1910 ALLOCATE (ab_rec(nrow_local, ncol_local)) 1911 DO proc_shift = 1, para_env_sub%num_pe - 1 1912 proc_send = sub_proc_map(para_env_sub%mepos + proc_shift) 1913 proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift) 1914 1915 send_prow = mepos_2_grid(proc_send, 1) 1916 send_pcol = mepos_2_grid(proc_send, 2) 1917 1918 send_row_size = sizes(1, proc_send) 1919 send_col_size = sizes(2, proc_send) 1920 1921 ALLOCATE (ab_send(send_row_size, send_col_size)) 1922 ab_send = 0.0_dp 1923 1924 ! first loop over row since in this way we can cycle 1925 DO iiB = 1, send_row_size 1926 i_global = cp_fm_indxl2g(iiB, nrow_block, send_prow, & 1927 mp2_env%ri_grad%P_mo%matrix_struct%first_p_pos(1), nprow) 1928 IF (i_global <= homo) CYCLE 1929 i_global = i_global - homo 1930 IF (.NOT. (my_B_virtual_start <= i_global .AND. i_global <= my_B_virtual_end)) CYCLE 1931 DO jjB = 1, send_col_size 1932 IF (.NOT. alpha_beta) THEN 1933 j_global = cp_fm_indxl2g(jjB, ncol_block, send_pcol, & 1934 mp2_env%ri_grad%P_mo%matrix_struct%first_p_pos(2), npcol) 1935 ELSE 1936 j_global = cp_fm_indxl2g(jjB, ncol_block, send_pcol, & 1937 mp2_env%ri_grad%P_mo_beta%matrix_struct%first_p_pos(2), npcol) 1938 ENDIF 1939 IF (j_global <= homo) CYCLE 1940 j_global = j_global - homo 1941 IF (.NOT. alpha_beta) THEN 1942 ab_send(iiB, jjB) = mp2_env%ri_grad%P_ab(i_global - my_B_virtual_start + 1, j_global) 1943 ELSE 1944 ab_send(iiB, jjB) = mp2_env%ri_grad%P_ab_beta(i_global - my_B_virtual_start + 1, j_global) 1945 ENDIF 1946 END DO 1947 END DO 1948 1949 ab_rec = 0.0_dp 1950 CALL mp_sendrecv(ab_send, proc_send, & 1951 ab_rec, proc_receive, & 1952 para_env_sub%group) 1953 IF (.NOT. alpha_beta) THEN 1954 mp2_env%ri_grad%P_mo%local_data(1:nrow_local, 1:ncol_local) = & 1955 mp2_env%ri_grad%P_mo%local_data(1:nrow_local, 1:ncol_local) + & 1956 ab_rec(1:nrow_local, 1:ncol_local) 1957 ELSE 1958 mp2_env%ri_grad%P_mo_beta%local_data(1:nrow_local, 1:ncol_local) = & 1959 mp2_env%ri_grad%P_mo_beta%local_data(1:nrow_local, 1:ncol_local) + & 1960 ab_rec(1:nrow_local, 1:ncol_local) 1961 ENDIF 1962 1963 DEALLOCATE (ab_send) 1964 END DO 1965 DEALLOCATE (ab_rec) 1966 DEALLOCATE (mepos_2_grid) 1967 DEALLOCATE (sizes) 1968 1969 ! deallocate the local P_ab' 1970 IF (.NOT. alpha_beta) THEN 1971 DEALLOCATE (mp2_env%ri_grad%P_ab) 1972 ELSE 1973 DEALLOCATE (mp2_env%ri_grad%P_ab_beta) 1974 ENDIF 1975 CALL timestop(handle2) 1976 1977 ! create also W_MP2_MO 1978 CALL timeset(routineN//"_WMO", handle2) 1979 IF (.NOT. alpha_beta) THEN 1980 NULLIFY (mp2_env%ri_grad%W_mo) 1981 CALL cp_fm_create(mp2_env%ri_grad%W_mo, fm_struct_tmp, name="W_MP2_MO") 1982 CALL cp_fm_set_all(mp2_env%ri_grad%W_mo, 0.0_dp) 1983 CALL cp_fm_struct_release(fm_struct_tmp) 1984 1985 ! all block 1986 CALL cp_gemm('T', 'N', dimen, dimen, dimen, 2.0_dp*factor, & 1987 L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%W_mo, & 1988 a_first_col=1, & 1989 a_first_row=1, & 1990 b_first_col=1, & 1991 b_first_row=1, & 1992 c_first_col=1, & 1993 c_first_row=1) 1994 1995 ! occ-occ block 1996 CALL cp_gemm('T', 'N', homo, homo, dimen, -2.0_dp*factor, & 1997 L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%W_mo, & 1998 a_first_col=1, & 1999 a_first_row=1, & 2000 b_first_col=1, & 2001 b_first_row=1, & 2002 c_first_col=1, & 2003 c_first_row=1) 2004 2005 ! occ-virt block 2006 CALL cp_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, & 2007 mo_coeff, L_mu_q, 0.0_dp, mp2_env%ri_grad%W_mo, & 2008 a_first_col=1, & 2009 a_first_row=1, & 2010 b_first_col=homo + 1, & 2011 b_first_row=1, & 2012 c_first_col=homo + 1, & 2013 c_first_row=1) 2014 ELSE 2015 NULLIFY (mp2_env%ri_grad%W_mo_beta) 2016 CALL cp_fm_create(mp2_env%ri_grad%W_mo_beta, fm_struct_tmp, name="W_MP2_MO") 2017 CALL cp_fm_set_all(mp2_env%ri_grad%W_mo_beta, 0.0_dp) 2018 CALL cp_fm_struct_release(fm_struct_tmp) 2019 2020 ! all block 2021 CALL cp_gemm('T', 'N', dimen, dimen, dimen, 2.0_dp*factor, & 2022 L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%W_mo_beta, & 2023 a_first_col=1, & 2024 a_first_row=1, & 2025 b_first_col=1, & 2026 b_first_row=1, & 2027 c_first_col=1, & 2028 c_first_row=1) 2029 2030 ! occ-occ block 2031 CALL cp_gemm('T', 'N', homo, homo, dimen, -2.0_dp*factor, & 2032 L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%W_mo_beta, & 2033 a_first_col=1, & 2034 a_first_row=1, & 2035 b_first_col=1, & 2036 b_first_row=1, & 2037 c_first_col=1, & 2038 c_first_row=1) 2039 2040 ! occ-virt block 2041 CALL cp_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, & 2042 mo_coeff, L_mu_q, 0.0_dp, mp2_env%ri_grad%W_mo_beta, & 2043 a_first_col=1, & 2044 a_first_row=1, & 2045 b_first_col=homo + 1, & 2046 b_first_row=1, & 2047 c_first_col=homo + 1, & 2048 c_first_row=1) 2049 ENDIF 2050 CALL timestop(handle2) 2051 2052 ! Calculate occ-virt block of the lagrangian in MO 2053 CALL timeset(routineN//"_Ljb", handle2) 2054 IF (.NOT. alpha_beta) THEN 2055 NULLIFY (mp2_env%ri_grad%L_jb, fm_struct_tmp) 2056 CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, & 2057 nrow_global=homo, ncol_global=virtual) 2058 CALL cp_fm_create(mp2_env%ri_grad%L_jb, fm_struct_tmp, name="fm_L_jb") 2059 CALL cp_fm_struct_release(fm_struct_tmp) 2060 CALL cp_fm_set_all(mp2_env%ri_grad%L_jb, 0.0_dp) 2061 2062 ! first Virtual 2063 CALL cp_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, & 2064 L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%L_jb, & 2065 a_first_col=1, & 2066 a_first_row=1, & 2067 b_first_col=homo + 1, & 2068 b_first_row=1, & 2069 c_first_col=1, & 2070 c_first_row=1) 2071 ! then occupied 2072 CALL cp_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, & 2073 mo_coeff, L_mu_q, 1.0_dp, mp2_env%ri_grad%L_jb, & 2074 a_first_col=1, & 2075 a_first_row=1, & 2076 b_first_col=homo + 1, & 2077 b_first_row=1, & 2078 c_first_col=1, & 2079 c_first_row=1) 2080 ELSE 2081 NULLIFY (mp2_env%ri_grad%L_jb_beta, fm_struct_tmp) 2082 CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, & 2083 nrow_global=homo, ncol_global=virtual) 2084 CALL cp_fm_create(mp2_env%ri_grad%L_jb_beta, fm_struct_tmp, name="fm_L_jb") 2085 CALL cp_fm_struct_release(fm_struct_tmp) 2086 CALL cp_fm_set_all(mp2_env%ri_grad%L_jb_beta, 0.0_dp) 2087 2088 ! first Virtual 2089 CALL cp_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, & 2090 L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%L_jb_beta, & 2091 a_first_col=1, & 2092 a_first_row=1, & 2093 b_first_col=homo + 1, & 2094 b_first_row=1, & 2095 c_first_col=1, & 2096 c_first_row=1) 2097 ! then occupied 2098 CALL cp_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, & 2099 mo_coeff, L_mu_q, 1.0_dp, mp2_env%ri_grad%L_jb_beta, & 2100 a_first_col=1, & 2101 a_first_row=1, & 2102 b_first_col=homo + 1, & 2103 b_first_row=1, & 2104 c_first_col=1, & 2105 c_first_row=1) 2106 ENDIF 2107 ! finally release L_mu_q 2108 CALL cp_fm_release(L_mu_q) 2109 CALL timestop(handle2) 2110 2111 ! here we should be done next CPHF 2112 2113 DEALLOCATE (sub_proc_map) 2114 DEALLOCATE (pos_info) 2115 2116 CALL timestop(handle) 2117 2118 END SUBROUTINE create_W_P 2119 2120END MODULE mp2_ri_grad 2121