1!--------------------------------------------------------------------------------------------------! 2! CP2K: A general program to perform molecular dynamics simulations ! 3! Copyright (C) 2000 - 2019 CP2K developers group ! 4!--------------------------------------------------------------------------------------------------! 5 6! ************************************************************************************************** 7!> \brief Rountines for GW + Bethe-Salpeter for computing electronic excitations 8!> \par History 9!> 04.2017 created [Jan Wilhelm] 10! ************************************************************************************************** 11MODULE bse 12 USE cp_fm_basic_linalg, ONLY: cp_fm_upper_to_full 13 USE cp_fm_cholesky, ONLY: cp_fm_cholesky_decompose,& 14 cp_fm_cholesky_invert 15 USE cp_fm_types, ONLY: cp_fm_create,& 16 cp_fm_get_info,& 17 cp_fm_release,& 18 cp_fm_set_all,& 19 cp_fm_to_fm,& 20 cp_fm_type 21 USE cp_gemm_interface, ONLY: cp_gemm 22 USE cp_para_types, ONLY: cp_para_env_type 23 USE group_dist_types, ONLY: get_group_dist,& 24 group_dist_d1_type 25 USE kinds, ONLY: dp 26 USE message_passing, ONLY: mp_alltoall,& 27 mp_sum 28 USE mp2_types, ONLY: integ_mat_buffer_type 29 USE rpa_communication, ONLY: communicate_buffer 30#include "./base/base_uses.f90" 31 32 IMPLICIT NONE 33 34 PRIVATE 35 36 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'bse' 37 38 PUBLIC :: mult_B_with_W_and_fill_local_3c_arrays, do_subspace_iterations 39 40CONTAINS 41 42! ************************************************************************************************** 43!> \brief ... 44!> \param B_bar_ijQ_bse_local ... 45!> \param B_abQ_bse_local ... 46!> \param B_bar_iaQ_bse_local ... 47!> \param B_iaQ_bse_local ... 48!> \param homo ... 49!> \param virtual ... 50!> \param num_Z_vectors ... 51!> \param max_iter ... 52!> \param threshold_min_trans ... 53!> \param Eigenval ... 54!> \param para_env ... 55! ************************************************************************************************** 56 SUBROUTINE do_subspace_iterations(B_bar_ijQ_bse_local, B_abQ_bse_local, B_bar_iaQ_bse_local, & 57 B_iaQ_bse_local, homo, virtual, num_Z_vectors, & 58 max_iter, threshold_min_trans, Eigenval, para_env) 59 60 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: B_bar_ijQ_bse_local, B_abQ_bse_local, & 61 B_bar_iaQ_bse_local, B_iaQ_bse_local 62 INTEGER :: homo, virtual, num_Z_vectors, max_iter 63 REAL(KIND=dp) :: threshold_min_trans 64 REAL(KIND=dp), DIMENSION(:) :: Eigenval 65 TYPE(cp_para_env_type), POINTER :: para_env 66 67 CHARACTER(LEN=*), PARAMETER :: routineN = 'do_subspace_iterations', & 68 routineP = moduleN//':'//routineN 69 70 INTEGER :: handle, i_iter, local_RI_size 71 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: M_ia_tmp, M_ji_tmp, RI_vector 72 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: AZ, BZ, Z_vectors 73 74 CALL timeset(routineN, handle) 75 76 ! JW hack 2del 77 threshold_min_trans = 0.01_dp 78 79 ALLOCATE (Z_vectors(homo, virtual, num_Z_vectors)) 80 Z_vectors = 0.0_dp 81 82 ALLOCATE (AZ(homo, virtual, num_Z_vectors)) 83 AZ = 0.0_dp 84 85 ALLOCATE (BZ(homo, virtual, num_Z_vectors)) 86 BZ = 0.0_dp 87 88 local_RI_size = SIZE(B_iaQ_bse_local, 3) 89 90 ALLOCATE (M_ia_tmp(homo, virtual)) 91 M_ia_tmp = 0.0_dp 92 93 ALLOCATE (M_ji_tmp(homo, homo)) 94 M_ji_tmp = 0.0_dp 95 96 ALLOCATE (RI_vector(local_RI_size, num_Z_vectors)) 97 RI_vector = 0.0_dp 98 99 CALL initial_guess_Z_vectors(Z_vectors, Eigenval, num_Z_vectors, homo, virtual) 100 101 DO i_iter = 1, max_iter 102 103 CALL compute_AZ(AZ, Z_vectors, B_iaQ_bse_local, B_bar_ijQ_bse_local, B_abQ_bse_local, & 104 M_ia_tmp, RI_vector, Eigenval, homo, virtual, num_Z_vectors, local_RI_size, & 105 para_env) 106 107 CALL compute_BZ(BZ, Z_vectors, B_iaQ_bse_local, B_bar_iaQ_bse_local, & 108 M_ji_tmp, RI_vector, homo, virtual, num_Z_vectors, local_RI_size, & 109 para_env) 110 111 END DO 112 113 DEALLOCATE (AZ, BZ, Z_vectors, M_ia_tmp, M_ji_tmp, RI_vector) 114 115 CALL timestop(handle) 116 117 END SUBROUTINE 118 119! ************************************************************************************************** 120!> \brief ... 121!> \param BZ ... 122!> \param Z_vectors ... 123!> \param B_iaQ_bse_local ... 124!> \param B_bar_iaQ_bse_local ... 125!> \param M_ji_tmp ... 126!> \param RI_vector ... 127!> \param homo ... 128!> \param virtual ... 129!> \param num_Z_vectors ... 130!> \param local_RI_size ... 131!> \param para_env ... 132! ************************************************************************************************** 133 SUBROUTINE compute_BZ(BZ, Z_vectors, B_iaQ_bse_local, B_bar_iaQ_bse_local, & 134 M_ji_tmp, RI_vector, homo, virtual, num_Z_vectors, local_RI_size, para_env) 135 136 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: BZ, Z_vectors, B_iaQ_bse_local, & 137 B_bar_iaQ_bse_local 138 REAL(KIND=dp), DIMENSION(:, :) :: M_ji_tmp, RI_vector 139 INTEGER :: homo, virtual, num_Z_vectors, & 140 local_RI_size 141 TYPE(cp_para_env_type), POINTER :: para_env 142 143 CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_BZ', routineP = moduleN//':'//routineN 144 145 INTEGER :: i_Z_vector, LLL 146 147 BZ(:, :, :) = 0.0_dp 148 149 CALL compute_v_ia_jb_part(BZ, Z_vectors, B_iaQ_bse_local, RI_vector, local_RI_size, & 150 num_Z_vectors, homo, virtual) 151 152 DO i_Z_vector = 1, num_Z_vectors 153 154 DO LLL = 1, local_RI_size 155 156 ! M_ji^P = sum_b Z_jb*B_bi^P 157 CALL DGEMM("N", "T", homo, homo, virtual, 1.0_dp, Z_vectors(:, :, i_Z_vector), homo, & 158 B_iaQ_bse_local(:, :, LLL), homo, 0.0_dp, M_ji_tmp, homo) 159 ! (BZ)_ia = sum_jP M_ij^P*B^bar_ja^P 160 CALL DGEMM("T", "N", homo, virtual, homo, 1.0_dp, M_ji_tmp, homo, & 161 B_bar_iaQ_bse_local, homo, 1.0_dp, BZ(:, :, i_Z_vector), homo) 162 163 END DO 164 165 END DO 166 167 ! we make the mp_sum to sum over all RI basis functions 168 CALL mp_sum(BZ, para_env%group) 169 170 END SUBROUTINE 171 172! ************************************************************************************************** 173!> \brief ... 174!> \param AZ ... 175!> \param Z_vectors ... 176!> \param B_iaQ_bse_local ... 177!> \param B_bar_ijQ_bse_local ... 178!> \param B_abQ_bse_local ... 179!> \param M_ia_tmp ... 180!> \param RI_vector ... 181!> \param Eigenval ... 182!> \param homo ... 183!> \param virtual ... 184!> \param num_Z_vectors ... 185!> \param local_RI_size ... 186!> \param para_env ... 187! ************************************************************************************************** 188 SUBROUTINE compute_AZ(AZ, Z_vectors, B_iaQ_bse_local, B_bar_ijQ_bse_local, B_abQ_bse_local, M_ia_tmp, & 189 RI_vector, Eigenval, homo, virtual, num_Z_vectors, local_RI_size, para_env) 190 191 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: AZ, Z_vectors, B_iaQ_bse_local, & 192 B_bar_ijQ_bse_local, B_abQ_bse_local 193 REAL(KIND=dp), DIMENSION(:, :) :: M_ia_tmp, RI_vector 194 REAL(KIND=dp), DIMENSION(:) :: Eigenval 195 INTEGER :: homo, virtual, num_Z_vectors, & 196 local_RI_size 197 TYPE(cp_para_env_type), POINTER :: para_env 198 199 CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_AZ', routineP = moduleN//':'//routineN 200 201 INTEGER :: a_virt, handle, i_occ, i_Z_vector, LLL 202 REAL(KIND=dp) :: eigen_diff 203 204 CALL timeset(routineN, handle) 205 206 AZ(:, :, :) = 0.0_dp 207 208 CALL compute_v_ia_jb_part(AZ, Z_vectors, B_iaQ_bse_local, RI_vector, local_RI_size, & 209 num_Z_vectors, homo, virtual) 210 211 DO i_Z_vector = 1, num_Z_vectors 212 213 ! JW TO DO: OMP PARALLELIZATION 214 DO LLL = 1, local_RI_size 215 216 ! M_ja^P = sum_j Z_jb*B_ba^P 217 CALL DGEMM("N", "N", homo, virtual, virtual, 1.0_dp, Z_vectors(:, :, i_Z_vector), homo, & 218 B_abQ_bse_local(:, :, LLL), virtual, 0.0_dp, M_ia_tmp, homo) 219 220 ! (AZ)_ia = sum_jP B_bar_ij^P*M_ja^P 221 CALL DGEMM("N", "N", homo, virtual, homo, 1.0_dp, B_bar_ijQ_bse_local(:, :, LLL), homo, & 222 M_ia_tmp, homo, 1.0_dp, AZ(:, :, i_Z_vector), homo) 223 224 END DO 225 226 END DO 227 228 ! we make the mp_sum to sum over all RI basis functions 229 CALL mp_sum(AZ, para_env%group) 230 231 ! add (e_a-e_i)*Z_ia 232 DO i_occ = 1, homo 233 DO a_virt = 1, virtual 234 235 eigen_diff = Eigenval(a_virt + homo) - Eigenval(i_occ) 236 237 AZ(i_occ, a_virt, :) = AZ(i_occ, a_virt, :) + Z_vectors(i_occ, a_virt, :)*eigen_diff 238 239 END DO 240 END DO 241 242 CALL timestop(handle) 243 244 END SUBROUTINE 245 246! ************************************************************************************************** 247!> \brief ... 248!> \param AZ ... 249!> \param Z_vectors ... 250!> \param B_iaQ_bse_local ... 251!> \param RI_vector ... 252!> \param local_RI_size ... 253!> \param num_Z_vectors ... 254!> \param homo ... 255!> \param virtual ... 256! ************************************************************************************************** 257 SUBROUTINE compute_v_ia_jb_part(AZ, Z_vectors, B_iaQ_bse_local, RI_vector, local_RI_size, & 258 num_Z_vectors, homo, virtual) 259 260 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: AZ, Z_vectors, B_iaQ_bse_local 261 REAL(KIND=dp), DIMENSION(:, :) :: RI_vector 262 INTEGER :: local_RI_size, num_Z_vectors, homo, & 263 virtual 264 265 CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_v_ia_jb_part', & 266 routineP = moduleN//':'//routineN 267 268 INTEGER :: a_virt, handle, i_occ, i_Z_vector, LLL 269 270 CALL timeset(routineN, handle) 271 272 RI_vector = 0.0_dp 273 274 ! v_P = sum_jb B_jb^P Z_jb 275 DO LLL = 1, local_RI_size 276 DO i_Z_vector = 1, num_Z_vectors 277 DO i_occ = 1, homo 278 DO a_virt = 1, virtual 279 280 RI_vector(LLL, i_Z_vector) = RI_vector(LLL, i_Z_vector) + & 281 Z_vectors(i_occ, a_virt, i_Z_vector)* & 282 B_iaQ_bse_local(i_occ, a_virt, LLL) 283 284 END DO 285 END DO 286 END DO 287 END DO 288 289 ! AZ = sum_P B_ia^P*v_P + ... 290 DO LLL = 1, local_RI_size 291 DO i_Z_vector = 1, num_Z_vectors 292 DO i_occ = 1, homo 293 DO a_virt = 1, virtual 294 295 AZ(i_occ, a_virt, i_Z_vector) = AZ(i_occ, a_virt, i_Z_vector) + & 296 RI_vector(LLL, i_Z_vector)* & 297 B_iaQ_bse_local(i_occ, a_virt, LLL) 298 299 END DO 300 END DO 301 END DO 302 END DO 303 304 CALL timestop(handle) 305 306 END SUBROUTINE 307 308! ************************************************************************************************** 309!> \brief ... 310!> \param Z_vectors ... 311!> \param Eigenval ... 312!> \param num_Z_vectors ... 313!> \param homo ... 314!> \param virtual ... 315! ************************************************************************************************** 316 SUBROUTINE initial_guess_Z_vectors(Z_vectors, Eigenval, num_Z_vectors, homo, virtual) 317 318 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: Z_vectors 319 REAL(KIND=dp), DIMENSION(:) :: Eigenval 320 INTEGER :: num_Z_vectors, homo, virtual 321 322 CHARACTER(LEN=*), PARAMETER :: routineN = 'initial_guess_Z_vectors', & 323 routineP = moduleN//':'//routineN 324 325 INTEGER :: a_virt, handle, i_occ, i_Z_vector, & 326 min_loc(2) 327 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: eigen_diff_ia 328 329 CALL timeset(routineN, handle) 330 331 ALLOCATE (eigen_diff_ia(homo, virtual)) 332 333 DO i_occ = 1, homo 334 DO a_virt = 1, virtual 335 eigen_diff_ia(i_occ, a_virt) = Eigenval(a_virt + homo) - Eigenval(i_occ) 336 END DO 337 END DO 338 339 DO i_Z_vector = 1, num_Z_vectors 340 341 min_loc = MINLOC(eigen_diff_ia) 342 343 Z_vectors(min_loc(1), min_loc(2), i_Z_vector) = 1.0_dp 344 345 eigen_diff_ia(min_loc(1), min_loc(2)) = 1.0E20_dp 346 347 END DO 348 349 DEALLOCATE (eigen_diff_ia) 350 351 CALL timestop(handle) 352 353 END SUBROUTINE 354 355! ************************************************************************************************** 356!> \brief ... 357!> \param fm_mat_S_ij_bse ... 358!> \param fm_mat_S_ab_bse ... 359!> \param fm_mat_S ... 360!> \param fm_mat_Q_static_bse ... 361!> \param fm_mat_Q_static_bse_gemm ... 362!> \param B_bar_ijQ_bse_local ... 363!> \param B_abQ_bse_local ... 364!> \param B_bar_iaQ_bse_local ... 365!> \param B_iaQ_bse_local ... 366!> \param dimen_RI ... 367!> \param homo ... 368!> \param virtual ... 369!> \param dimen_ia ... 370!> \param gd_array ... 371!> \param color_sub ... 372!> \param para_env ... 373! ************************************************************************************************** 374 SUBROUTINE mult_B_with_W_and_fill_local_3c_arrays(fm_mat_S_ij_bse, fm_mat_S_ab_bse, fm_mat_S, fm_mat_Q_static_bse, & 375 fm_mat_Q_static_bse_gemm, & 376 B_bar_ijQ_bse_local, B_abQ_bse_local, B_bar_iaQ_bse_local, & 377 B_iaQ_bse_local, dimen_RI, homo, virtual, dimen_ia, & 378 gd_array, color_sub, para_env) 379 380 TYPE(cp_fm_type), POINTER :: fm_mat_S_ij_bse, fm_mat_S_ab_bse, & 381 fm_mat_S, fm_mat_Q_static_bse, & 382 fm_mat_Q_static_bse_gemm 383 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: B_bar_ijQ_bse_local, B_abQ_bse_local, & 384 B_bar_iaQ_bse_local, B_iaQ_bse_local 385 INTEGER :: dimen_RI, homo, virtual, dimen_ia 386 TYPE(group_dist_d1_type) :: gd_array 387 INTEGER :: color_sub 388 TYPE(cp_para_env_type), POINTER :: para_env 389 390 CHARACTER(LEN=*), PARAMETER :: routineN = 'mult_B_with_W_and_fill_local_3c_arrays', & 391 routineP = moduleN//':'//routineN 392 393 INTEGER :: handle, i_global, iiB, info_chol, & 394 j_global, jjB, ncol_local, nrow_local 395 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices 396 TYPE(cp_fm_type), POINTER :: fm_mat_S_bar_ia_bse, & 397 fm_mat_S_bar_ij_bse, fm_mat_work 398 399 CALL timeset(routineN, handle) 400 401 CALL cp_fm_create(fm_mat_S_bar_ia_bse, fm_mat_S%matrix_struct) 402 CALL cp_fm_to_fm(fm_mat_S, fm_mat_S_bar_ia_bse) 403 CALL cp_fm_set_all(fm_mat_S_bar_ia_bse, 0.0_dp) 404 405 CALL cp_fm_create(fm_mat_S_bar_ij_bse, fm_mat_S_ij_bse%matrix_struct) 406 CALL cp_fm_to_fm(fm_mat_S_ij_bse, fm_mat_S_bar_ij_bse) 407 CALL cp_fm_set_all(fm_mat_S_bar_ij_bse, 0.0_dp) 408 409 CALL cp_fm_create(fm_mat_work, fm_mat_Q_static_bse_gemm%matrix_struct) 410 CALL cp_fm_to_fm(fm_mat_Q_static_bse_gemm, fm_mat_work) 411 CALL cp_fm_set_all(fm_mat_work, 0.0_dp) 412 413 ! get info of fm_mat_Q_static_bse and compute ((1+Q(0))^-1-1) 414 CALL cp_fm_get_info(matrix=fm_mat_Q_static_bse_gemm, & 415 nrow_local=nrow_local, & 416 ncol_local=ncol_local, & 417 row_indices=row_indices, & 418 col_indices=col_indices) 419 420 DO jjB = 1, ncol_local 421 j_global = col_indices(jjB) 422 DO iiB = 1, nrow_local 423 i_global = row_indices(iiB) 424 IF (j_global == i_global .AND. i_global <= dimen_RI) THEN 425 fm_mat_Q_static_bse_gemm%local_data(iiB, jjB) = fm_mat_Q_static_bse_gemm%local_data(iiB, jjB) + 1.0_dp 426 END IF 427 END DO 428 END DO 429 430 ! calculate Trace(Log(Matrix)) as Log(DET(Matrix)) via cholesky decomposition 431 CALL cp_fm_cholesky_decompose(matrix=fm_mat_Q_static_bse_gemm, n=dimen_RI, info_out=info_chol) 432 CPASSERT(info_chol == 0) 433 434 ! calculate [1+Q(i0)]^-1 435 CALL cp_fm_cholesky_invert(fm_mat_Q_static_bse_gemm) 436 ! symmetrize the result 437 CALL cp_fm_upper_to_full(fm_mat_Q_static_bse_gemm, fm_mat_work) 438 439 CALL cp_gemm(transa="N", transb="N", m=homo**2, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, & 440 matrix_a=fm_mat_S_ij_bse, matrix_b=fm_mat_Q_static_bse, beta=0.0_dp, & 441 matrix_c=fm_mat_S_bar_ij_bse) 442 443 ! fm_mat_S_bar_ia_bse has a different blacs_env as fm_mat_S_ij_bse since we take 444 ! fm_mat_S from RPA. Therefore, we also need a different fm_mat_Q_static_bse_gemm 445 CALL cp_gemm(transa="N", transb="N", m=dimen_ia, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, & 446 matrix_a=fm_mat_S, matrix_b=fm_mat_Q_static_bse_gemm, beta=0.0_dp, & 447 matrix_c=fm_mat_S_bar_ia_bse) 448 449 CALL allocate_and_fill_local_array(B_iaQ_bse_local, fm_mat_S, gd_array, color_sub, homo, virtual, dimen_RI, para_env) 450 451 CALL allocate_and_fill_local_array(B_bar_iaQ_bse_local, fm_mat_S_bar_ia_bse, gd_array, color_sub, homo, virtual, & 452 dimen_RI, para_env) 453 454 CALL allocate_and_fill_local_array(B_bar_ijQ_bse_local, fm_mat_S_bar_ij_bse, gd_array, color_sub, homo, homo, & 455 dimen_RI, para_env) 456 457 CALL allocate_and_fill_local_array(B_abQ_bse_local, fm_mat_S_ab_bse, gd_array, color_sub, virtual, virtual, & 458 dimen_RI, para_env) 459 460 CALL cp_fm_release(fm_mat_S_bar_ia_bse) 461 CALL cp_fm_release(fm_mat_S_bar_ij_bse) 462 CALL cp_fm_release(fm_mat_work) 463 464 CALL timestop(handle) 465 466 END SUBROUTINE 467 468! ************************************************************************************************** 469!> \brief ... 470!> \param B_local ... 471!> \param fm_mat_S ... 472!> \param gd_array ... 473!> \param color_sub ... 474!> \param small_size ... 475!> \param big_size ... 476!> \param dimen_RI ... 477!> \param para_env ... 478! ************************************************************************************************** 479 SUBROUTINE allocate_and_fill_local_array(B_local, fm_mat_S, gd_array, & 480 color_sub, small_size, big_size, dimen_RI, para_env) 481 482 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: B_local 483 TYPE(cp_fm_type), POINTER :: fm_mat_S 484 TYPE(group_dist_d1_type) :: gd_array 485 INTEGER :: color_sub, small_size, big_size, dimen_RI 486 TYPE(cp_para_env_type), POINTER :: para_env 487 488 CHARACTER(LEN=*), PARAMETER :: routineN = 'allocate_and_fill_local_array', & 489 routineP = moduleN//':'//routineN 490 491 INTEGER :: combi_index, end_RI, handle, handle1, i_comm, i_entry, iiB, imepos, jjB, & 492 level_big_size, level_small_size, ncol_local, nrow_local, num_comm_cycles, RI_index, & 493 size_RI, start_RI 494 INTEGER, ALLOCATABLE, DIMENSION(:) :: entry_counter, mepos_from_RI_index, & 495 num_entries_rec, num_entries_send 496 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices 497 INTEGER, DIMENSION(:, :), POINTER :: req_array 498 REAL(KIND=dp) :: matrix_el 499 TYPE(integ_mat_buffer_type), ALLOCATABLE, & 500 DIMENSION(:) :: buffer_rec, buffer_send 501 502 CALL timeset(routineN, handle) 503 504 ALLOCATE (mepos_from_RI_index(dimen_RI)) 505 mepos_from_RI_index = 0 506 507 DO imepos = 0, para_env%num_pe - 1 508 509 CALL get_group_dist(gd_array, pos=imepos, starts=start_RI, ends=end_RI) 510 511 mepos_from_RI_index(start_RI:end_RI) = imepos 512 513 END DO 514 515 ! color_sub is automatically the number of the process since every subgroup has only one MPI rank 516 CALL get_group_dist(gd_array, color_sub, start_RI, end_RI, size_RI) 517 518 ALLOCATE (B_local(small_size, big_size, 1:size_RI)) 519 520 ALLOCATE (num_entries_send(0:para_env%num_pe - 1)) 521 ALLOCATE (num_entries_rec(0:para_env%num_pe - 1)) 522 523 ALLOCATE (req_array(1:para_env%num_pe, 4)) 524 525 ALLOCATE (entry_counter(0:para_env%num_pe - 1)) 526 527 CALL cp_fm_get_info(matrix=fm_mat_S, & 528 nrow_local=nrow_local, & 529 ncol_local=ncol_local, & 530 row_indices=row_indices, & 531 col_indices=col_indices) 532 533 num_comm_cycles = 10 534 535 ! communicate not all due to huge memory overhead, since for every number in fm_mat_S, we store 536 ! three additional ones (RI index, first MO index, second MO index!!) 537 DO i_comm = 0, num_comm_cycles - 1 538 539 num_entries_send = 0 540 num_entries_rec = 0 541 542 ! loop over RI index to get the number of sent entries 543 DO jjB = 1, ncol_local 544 545 RI_index = col_indices(jjB) 546 547 IF (MODULO(RI_index, num_comm_cycles) /= i_comm) CYCLE 548 549 imepos = mepos_from_RI_index(RI_index) 550 551 num_entries_send(imepos) = num_entries_send(imepos) + nrow_local 552 553 END DO 554 555 CALL mp_alltoall(num_entries_send, num_entries_rec, 1, para_env%group) 556 557 ALLOCATE (buffer_rec(0:para_env%num_pe - 1)) 558 ALLOCATE (buffer_send(0:para_env%num_pe - 1)) 559 560 ! allocate data message and corresponding indices 561 DO imepos = 0, para_env%num_pe - 1 562 563 ALLOCATE (buffer_rec(imepos)%msg(num_entries_rec(imepos))) 564 buffer_rec(imepos)%msg = 0.0_dp 565 566 ALLOCATE (buffer_send(imepos)%msg(num_entries_send(imepos))) 567 buffer_send(imepos)%msg = 0.0_dp 568 569 ALLOCATE (buffer_rec(imepos)%indx(num_entries_rec(imepos), 3)) 570 buffer_rec(imepos)%indx = 0 571 572 ALLOCATE (buffer_send(imepos)%indx(num_entries_send(imepos), 3)) 573 buffer_send(imepos)%indx = 0 574 575 END DO 576 577 entry_counter(:) = 0 578 579 ! loop over RI index for filling the send-buffer 580 DO jjB = 1, ncol_local 581 582 RI_index = col_indices(jjB) 583 584 IF (MODULO(RI_index, num_comm_cycles) /= i_comm) CYCLE 585 586 imepos = mepos_from_RI_index(RI_index) 587 588 DO iiB = 1, nrow_local 589 590 combi_index = row_indices(iiB) 591 level_small_size = MAX(1, combi_index - 1)/big_size + 1 592 level_big_size = combi_index - (level_small_size - 1)*big_size 593 594 entry_counter(imepos) = entry_counter(imepos) + 1 595 596 buffer_send(imepos)%msg(entry_counter(imepos)) = fm_mat_S%local_data(iiB, jjB) 597 598 buffer_send(imepos)%indx(entry_counter(imepos), 1) = RI_index 599 buffer_send(imepos)%indx(entry_counter(imepos), 2) = level_small_size 600 buffer_send(imepos)%indx(entry_counter(imepos), 3) = level_big_size 601 602 END DO 603 604 END DO 605 606 CALL timeset("BSE_comm_data", handle1) 607 608 CALL communicate_buffer(para_env, num_entries_rec, num_entries_send, buffer_rec, buffer_send, req_array) 609 610 CALL timestop(handle1) 611 612 ! fill B_local 613 DO imepos = 0, para_env%num_pe - 1 614 615 DO i_entry = 1, num_entries_rec(imepos) 616 617 RI_index = buffer_rec(imepos)%indx(i_entry, 1) - start_RI + 1 618 level_small_size = buffer_rec(imepos)%indx(i_entry, 2) 619 level_big_size = buffer_rec(imepos)%indx(i_entry, 3) 620 621 matrix_el = buffer_rec(imepos)%msg(i_entry) 622 623 B_local(level_small_size, level_big_size, RI_index) = matrix_el 624 625 END DO 626 627 END DO 628 629 DO imepos = 0, para_env%num_pe - 1 630 DEALLOCATE (buffer_send(imepos)%msg) 631 DEALLOCATE (buffer_send(imepos)%indx) 632 DEALLOCATE (buffer_rec(imepos)%msg) 633 DEALLOCATE (buffer_rec(imepos)%indx) 634 END DO 635 636 DEALLOCATE (buffer_rec, buffer_send) 637 638 END DO 639 640 DEALLOCATE (num_entries_send, num_entries_rec) 641 642 DEALLOCATE (mepos_from_RI_index) 643 644 DEALLOCATE (entry_counter, req_array) 645 646 CALL timestop(handle) 647 648 END SUBROUTINE 649 650END MODULE bse 651