1!--------------------------------------------------------------------------------------------------! 2! CP2K: A general program to perform molecular dynamics simulations ! 3! Copyright (C) 2000 - 2020 CP2K developers group ! 4!--------------------------------------------------------------------------------------------------! 5 6! ************************************************************************************************** 7!> \brief Routines to calculate RI-GPW-MP2 energy using pw 8!> \par History 9!> 06.2012 created [Mauro Del Ben] 10!> 03.2019 Refactored from mp2_ri_gpw [Frederick Stein] 11! ************************************************************************************************** 12MODULE mp2_ri_gpw 13 USE cp_para_env, ONLY: cp_para_env_create,& 14 cp_para_env_release 15 USE cp_para_types, ONLY: cp_para_env_type 16 USE group_dist_types, ONLY: get_group_dist,& 17 group_dist_d1_type,& 18 maxsize,& 19 release_group_dist 20 USE kinds, ONLY: dp,& 21 int_8 22 USE machine, ONLY: m_flush,& 23 m_memory,& 24 m_walltime 25 USE message_passing, ONLY: mp_allgather,& 26 mp_comm_split_direct,& 27 mp_min,& 28 mp_sendrecv,& 29 mp_sum 30 USE mp2_ri_grad_util, ONLY: complete_gamma 31 USE mp2_types, ONLY: mp2_type 32 33!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num 34#include "./base/base_uses.f90" 35 36 IMPLICIT NONE 37 38 PRIVATE 39 40 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_gpw' 41 42 PUBLIC :: mp2_ri_gpw_compute_en 43 44CONTAINS 45 46! ************************************************************************************************** 47!> \brief ... 48!> \param Emp2 ... 49!> \param Emp2_Cou ... 50!> \param Emp2_EX ... 51!> \param BIb_C ... 52!> \param mp2_env ... 53!> \param para_env ... 54!> \param para_env_sub ... 55!> \param color_sub ... 56!> \param gd_array ... 57!> \param gd_B_virtual ... 58!> \param Eigenval ... 59!> \param nmo ... 60!> \param homo ... 61!> \param dimen_RI ... 62!> \param unit_nr ... 63!> \param calc_forces ... 64!> \param calc_ex ... 65!> \param open_shell_SS ... 66!> \param BIb_C_beta ... 67!> \param homo_beta ... 68!> \param Eigenval_beta ... 69!> \param gd_B_virtual_beta ... 70! ************************************************************************************************** 71 SUBROUTINE mp2_ri_gpw_compute_en(Emp2, Emp2_Cou, Emp2_EX, BIb_C, mp2_env, para_env, para_env_sub, color_sub, & 72 gd_array, gd_B_virtual, & 73 Eigenval, nmo, homo, dimen_RI, unit_nr, calc_forces, calc_ex, & 74 open_shell_SS, BIb_C_beta, homo_beta, Eigenval_beta, & 75 gd_B_virtual_beta) 76 REAL(KIND=dp), INTENT(OUT) :: Emp2, Emp2_Cou, Emp2_EX 77 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), & 78 INTENT(INOUT) :: BIb_C 79 TYPE(mp2_type), POINTER :: mp2_env 80 TYPE(cp_para_env_type), POINTER :: para_env, para_env_sub 81 INTEGER, INTENT(IN) :: color_sub 82 TYPE(group_dist_d1_type), INTENT(INOUT) :: gd_array, gd_B_virtual 83 REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: Eigenval 84 INTEGER, INTENT(IN) :: nmo, homo, dimen_RI, unit_nr 85 LOGICAL, INTENT(IN) :: calc_forces, calc_ex 86 LOGICAL, INTENT(IN), OPTIONAL :: open_shell_SS 87 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), & 88 INTENT(INOUT), OPTIONAL :: BIb_C_beta 89 INTEGER, INTENT(IN), OPTIONAL :: homo_beta 90 REAL(KIND=dp), DIMENSION(:), INTENT(IN), OPTIONAL :: Eigenval_beta 91 TYPE(group_dist_d1_type), INTENT(INOUT), OPTIONAL :: gd_B_virtual_beta 92 93 CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_ri_gpw_compute_en', & 94 routineP = moduleN//':'//routineN 95 96 INTEGER :: a, a_global, b, b_global, best_block_size, best_integ_group_size, block_size, & 97 comm_exchange, comm_P, end_point, handle, handle2, handle3, iiB, ij_counter, & 98 ij_counter_send, ij_index, integ_group_size, irep, jjB, Lend_pos, Lstart_pos, & 99 max_ij_pairs, min_integ_group_size, my_B_size, my_B_size_beta, my_B_virtual_end, & 100 my_B_virtual_end_beta, my_B_virtual_start, my_B_virtual_start_beta, my_block_size, & 101 my_group_L_end, my_group_L_size, my_group_L_size_orig, my_group_L_start, my_homo_beta, & 102 my_i, my_ij_pairs, my_j, my_new_group_L_size, my_num_dgemm_call, ngroup, num_IJ_blocks 103 INTEGER :: num_integ_group, pos_integ_group, proc_receive, proc_send, proc_shift, & 104 rec_B_size, rec_B_virtual_end, rec_B_virtual_start, rec_L_size, send_B_size, & 105 send_B_virtual_end, send_B_virtual_start, send_block_size, send_i, send_ij_index, send_j, & 106 start_point, sub_P_color, sub_sub_color, total_ij_pairs, virtual, virtual_beta 107 INTEGER, ALLOCATABLE, DIMENSION(:) :: integ_group_pos2color_sub, num_ij_pairs, & 108 proc_map, proc_map_rep, & 109 sizes_array_orig, sub_proc_map 110 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ij_map 111 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: ranges_info_array 112 LOGICAL :: my_alpha_alpha_case, my_alpha_beta_case, & 113 my_beta_beta_case, my_open_shell_SS 114 REAL(KIND=dp) :: actual_flop_rate, amp_fac, mem_for_aK, mem_for_comm, mem_for_iaK, & 115 mem_for_rep, mem_min, mem_per_group, mem_real, my_flop_rate, null_mat_rec(2, 2, 2), & 116 null_mat_send(2, 2, 2), sym_fac, t_end, t_start 117 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: external_ab, external_i_aL, local_ab, & 118 local_ba, t_ab 119 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: B_ia_Q, B_ia_Q_beta, BI_C_rec, & 120 local_i_aL, local_j_aL, Y_i_aP, & 121 Y_j_aP, Y_j_aP_beta 122 TYPE(cp_para_env_type), POINTER :: para_env_exchange, para_env_P, & 123 para_env_rep 124 125 CALL timeset(routineN, handle) 126 127 my_open_shell_SS = .FALSE. 128 IF (PRESENT(open_shell_SS)) my_open_shell_SS = open_shell_SS 129 130 ! t_ab = amp_fac*(:,a|:,b)-(:,b|:,a) 131 IF (calc_forces) amp_fac = 2.0_dp 132 ! If we calculate the gradient we need to distinguish 133 ! between alpha-alpha and beta-beta cases for UMP2 134 135 my_alpha_alpha_case = .FALSE. 136 my_beta_beta_case = .FALSE. 137 my_alpha_beta_case = .FALSE. 138 IF (calc_forces) THEN 139 IF (my_open_shell_SS) THEN 140 amp_fac = 1.0_dp 141 IF ((.NOT. ALLOCATED(mp2_env%ri_grad%P_ij)) & 142 .AND. (.NOT. ALLOCATED(mp2_env%ri_grad%P_ab))) THEN 143 my_alpha_alpha_case = .TRUE. 144 amp_fac = 1.0_dp 145 ELSE 146 IF ((.NOT. ALLOCATED(mp2_env%ri_grad%P_ij_beta)) & 147 .AND. (.NOT. ALLOCATED(mp2_env%ri_grad%P_ab_beta))) THEN 148 my_beta_beta_case = .TRUE. 149 ENDIF 150 ENDIF 151 ENDIF 152 ENDIF 153 154 my_alpha_beta_case = .FALSE. 155 IF (PRESENT(BIb_C_beta) .AND. & 156 PRESENT(gd_B_virtual_beta) .AND. & 157 PRESENT(homo_beta) .AND. & 158 PRESENT(Eigenval_beta)) THEN 159 my_alpha_beta_case = .TRUE. 160 my_alpha_alpha_case = .FALSE. 161 ENDIF 162 163 IF (my_alpha_beta_case) amp_fac = 1.0_dp 164 165 virtual = nmo - homo 166 IF (my_alpha_beta_case) virtual_beta = nmo - homo_beta 167 168 CALL mp2_ri_get_sizes( & 169 mp2_env, para_env, para_env_sub, gd_array, gd_B_virtual, & 170 homo, dimen_RI, unit_nr, color_sub, best_block_size, best_integ_group_size, block_size, & 171 integ_group_size, min_integ_group_size, my_B_size, my_B_virtual_end, my_B_virtual_start, my_group_L_size, & 172 my_group_L_start, my_group_L_end, ngroup, num_IJ_blocks, num_integ_group, pos_integ_group, virtual, my_alpha_beta_case, & 173 my_open_shell_SS, mem_for_aK, mem_for_comm, mem_for_iaK, mem_for_rep, mem_min, mem_per_group, mem_real) 174 175 IF (my_alpha_beta_case) THEN 176 CALL get_group_dist(gd_B_virtual_beta, para_env_sub%mepos, my_B_virtual_start_beta, my_B_virtual_end_beta, my_B_size_beta) 177 my_homo_beta = homo_beta 178 ELSE 179 my_B_virtual_start_beta = my_B_virtual_start 180 my_B_virtual_end_beta = my_B_virtual_end 181 my_B_size_beta = my_B_size 182 my_homo_beta = homo 183 END IF 184 185 ! now create a group that contains all the proc that have the same virtual starting point 186 ! in the integ group 187 ! sub_sub_color=para_env_sub%mepos 188 CALL mp2_ri_create_group( & 189 BIb_C, para_env, para_env_sub, homo, color_sub, & 190 gd_array%sizes, calc_forces, & 191 comm_exchange, integ_group_size, my_B_size, iiB, my_group_L_end, & 192 my_group_L_size, my_group_L_size_orig, my_group_L_start, my_new_group_L_size, & 193 sub_sub_color, integ_group_pos2color_sub, proc_map, proc_map_rep, sizes_array_orig, & 194 sub_proc_map, ranges_info_array, para_env_exchange, para_env_rep, num_integ_group) 195 196 ! ***************************************************************** 197 ! ********** REPLICATION-BLOCKED COMMUNICATION SCHEME *********** 198 ! ***************************************************************** 199 ! introduce block size, the number of occupied orbitals has to be a 200 ! multiple of the block size 201 202 ! Calculate the maximum number of ij pairs that have to be computed 203 ! among groups 204 CALL mp2_ri_communication(my_alpha_beta_case, total_ij_pairs, homo, my_homo_beta, num_IJ_blocks, & 205 block_size, ngroup, ij_map, color_sub, my_ij_pairs, my_open_shell_SS, unit_nr) 206 207 ALLOCATE (num_ij_pairs(0:para_env_exchange%num_pe - 1)) 208 num_ij_pairs = 0 209 num_ij_pairs(para_env_exchange%mepos) = my_ij_pairs 210 CALL mp_sum(num_ij_pairs, para_env_exchange%group) 211 212 max_ij_pairs = MAXVAL(num_ij_pairs) 213 214 ! start real stuff 215 IF (.NOT. my_alpha_beta_case) THEN 216 CALL mp2_ri_allocate(local_ab, t_ab, mp2_env, homo, virtual, dimen_RI, my_B_size, & 217 block_size, my_B_size_beta, my_group_L_size, local_i_aL, & 218 local_j_aL, calc_forces, Y_i_aP, Y_j_aP, & 219 my_alpha_beta_case, & 220 my_beta_beta_case) 221 ELSE 222 CALL mp2_ri_allocate(local_ab, t_ab, mp2_env, homo, virtual, dimen_RI, my_B_size, & 223 block_size, my_B_size_beta, my_group_L_size, local_i_aL, & 224 local_j_aL, calc_forces, Y_i_aP, Y_j_aP_beta, & 225 my_alpha_beta_case, & 226 my_beta_beta_case, local_ba, virtual_beta) 227 ENDIF 228 229 CALL timeset(routineN//"_RI_loop", handle2) 230 null_mat_rec = 0.0_dp 231 null_mat_send = 0.0_dp 232 Emp2 = 0.0_dp 233 Emp2_Cou = 0.0_dp 234 Emp2_EX = 0.0_dp 235 my_num_dgemm_call = 0 236 my_flop_rate = 0.0_dp 237 DO ij_index = 1, max_ij_pairs 238 239 IF (ij_index <= my_ij_pairs) THEN 240 ! We have work to do 241 ij_counter = (ij_index - MIN(1, color_sub))*ngroup + color_sub 242 my_i = ij_map(ij_counter, 1) 243 my_j = ij_map(ij_counter, 2) 244 my_block_size = ij_map(ij_counter, 3) 245 246 local_i_aL = 0.0_dp 247 DO irep = 0, num_integ_group - 1 248 Lstart_pos = ranges_info_array(1, irep, para_env_exchange%mepos) 249 Lend_pos = ranges_info_array(2, irep, para_env_exchange%mepos) 250 start_point = ranges_info_array(3, irep, para_env_exchange%mepos) 251 end_point = ranges_info_array(4, irep, para_env_exchange%mepos) 252 253 local_i_aL(Lstart_pos:Lend_pos, 1:my_B_size, 1:my_block_size) = & 254 BIb_C(start_point:end_point, 1:my_B_size, my_i:my_i + my_block_size - 1) 255 END DO 256 257 local_j_aL = 0.0_dp 258 DO irep = 0, num_integ_group - 1 259 Lstart_pos = ranges_info_array(1, irep, para_env_exchange%mepos) 260 Lend_pos = ranges_info_array(2, irep, para_env_exchange%mepos) 261 start_point = ranges_info_array(3, irep, para_env_exchange%mepos) 262 end_point = ranges_info_array(4, irep, para_env_exchange%mepos) 263 264 IF (.NOT. my_alpha_beta_case) THEN 265 local_j_aL(Lstart_pos:Lend_pos, 1:my_B_size, 1:my_block_size) = & 266 BIb_C(start_point:end_point, 1:my_B_size, my_j:my_j + my_block_size - 1) 267 ELSE 268 local_j_aL(Lstart_pos:Lend_pos, 1:my_B_size_beta, 1:my_block_size) = & 269 BIb_C_beta(start_point:end_point, 1:my_B_size_beta, my_j:my_j + my_block_size - 1) 270 END IF 271 END DO 272 273 ! collect data from other proc 274 CALL timeset(routineN//"_comm", handle3) 275 DO proc_shift = 1, para_env_exchange%num_pe - 1 276 proc_send = proc_map(para_env_exchange%mepos + proc_shift) 277 proc_receive = proc_map(para_env_exchange%mepos - proc_shift) 278 279 send_ij_index = num_ij_pairs(proc_send) 280 281 CALL get_group_dist(gd_array, proc_receive, sizes=rec_L_size) 282 ALLOCATE (BI_C_rec(rec_L_size, MAX(my_B_size, my_B_size_beta), my_block_size)) 283 284 IF (ij_index <= send_ij_index) THEN 285 ij_counter_send = (ij_index - MIN(1, integ_group_pos2color_sub(proc_send)))*ngroup + & 286 integ_group_pos2color_sub(proc_send) 287 send_i = ij_map(ij_counter_send, 1) 288 send_j = ij_map(ij_counter_send, 2) 289 send_block_size = ij_map(ij_counter_send, 3) 290 291 ! occupied i 292 BI_C_rec = 0.0_dp 293 CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:my_B_size, send_i:send_i + send_block_size - 1), proc_send, & 294 BI_C_rec(1:rec_L_size, 1:my_B_size, 1:my_block_size), proc_receive, & 295 para_env_exchange%group) 296 DO irep = 0, num_integ_group - 1 297 Lstart_pos = ranges_info_array(1, irep, proc_receive) 298 Lend_pos = ranges_info_array(2, irep, proc_receive) 299 start_point = ranges_info_array(3, irep, proc_receive) 300 end_point = ranges_info_array(4, irep, proc_receive) 301 302 local_i_aL(Lstart_pos:Lend_pos, 1:my_B_size, 1:my_block_size) = & 303 BI_C_rec(start_point:end_point, 1:my_B_size, 1:my_block_size) 304 305 END DO 306 307 ! occupied j 308 BI_C_rec = 0.0_dp 309 IF (.NOT. my_alpha_beta_case) THEN 310 CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:my_B_size, send_j:send_j + send_block_size - 1), proc_send, & 311 BI_C_rec(1:rec_L_size, 1:my_B_size, 1:my_block_size), proc_receive, & 312 para_env_exchange%group) 313 ELSE 314 CALL mp_sendrecv(BIb_C_beta(1:my_group_L_size, 1:my_B_size_beta, send_j:send_j + send_block_size - 1), proc_send, & 315 BI_C_rec(1:rec_L_size, 1:my_B_size_beta, 1:my_block_size), proc_receive, & 316 para_env_exchange%group) 317 END IF 318 319 DO irep = 0, num_integ_group - 1 320 Lstart_pos = ranges_info_array(1, irep, proc_receive) 321 Lend_pos = ranges_info_array(2, irep, proc_receive) 322 start_point = ranges_info_array(3, irep, proc_receive) 323 end_point = ranges_info_array(4, irep, proc_receive) 324 325 IF (.NOT. my_alpha_beta_case) THEN 326 local_j_aL(Lstart_pos:Lend_pos, 1:my_B_size, 1:my_block_size) = & 327 BI_C_rec(start_point:end_point, 1:my_B_size, 1:my_block_size) 328 ELSE 329 local_j_aL(Lstart_pos:Lend_pos, 1:my_B_size_beta, 1:my_block_size) = & 330 BI_C_rec(start_point:end_point, 1:my_B_size_beta, 1:my_block_size) 331 END IF 332 333 END DO 334 335 ELSE 336 ! we send the null matrix while we know that we have to receive something 337 338 ! occupied i 339 BI_C_rec = 0.0_dp 340 CALL mp_sendrecv(null_mat_send, proc_send, & 341 BI_C_rec(1:rec_L_size, 1:my_B_size, 1:my_block_size), proc_receive, & 342 para_env_exchange%group) 343 344 DO irep = 0, num_integ_group - 1 345 Lstart_pos = ranges_info_array(1, irep, proc_receive) 346 Lend_pos = ranges_info_array(2, irep, proc_receive) 347 start_point = ranges_info_array(3, irep, proc_receive) 348 end_point = ranges_info_array(4, irep, proc_receive) 349 350 local_i_aL(Lstart_pos:Lend_pos, 1:my_B_size, 1:my_block_size) = & 351 BI_C_rec(start_point:end_point, 1:my_B_size, 1:my_block_size) 352 353 END DO 354 355 ! occupied j 356 BI_C_rec = 0.0_dp 357 IF (.NOT. my_alpha_beta_case) THEN 358 CALL mp_sendrecv(null_mat_send, proc_send, & 359 BI_C_rec(1:rec_L_size, 1:my_B_size, 1:my_block_size), proc_receive, & 360 para_env_exchange%group) 361 ELSE 362 CALL mp_sendrecv(null_mat_send, proc_send, & 363 BI_C_rec(1:rec_L_size, 1:my_B_size_beta, 1:my_block_size), proc_receive, & 364 para_env_exchange%group) 365 END IF 366 DO irep = 0, num_integ_group - 1 367 Lstart_pos = ranges_info_array(1, irep, proc_receive) 368 Lend_pos = ranges_info_array(2, irep, proc_receive) 369 start_point = ranges_info_array(3, irep, proc_receive) 370 end_point = ranges_info_array(4, irep, proc_receive) 371 372 IF (.NOT. my_alpha_beta_case) THEN 373 local_j_aL(Lstart_pos:Lend_pos, 1:my_B_size, 1:my_block_size) = & 374 BI_C_rec(start_point:end_point, 1:my_B_size, 1:my_block_size) 375 ELSE 376 local_j_aL(Lstart_pos:Lend_pos, 1:my_B_size_beta, 1:my_block_size) = & 377 BI_C_rec(start_point:end_point, 1:my_B_size_beta, 1:my_block_size) 378 END IF 379 380 END DO 381 382 END IF 383 384 DEALLOCATE (BI_C_rec) 385 386 END DO 387 CALL timestop(handle3) 388 389 ! loop over the block elements 390 DO iiB = 1, my_block_size 391 DO jjB = 1, my_block_size 392 CALL timeset(routineN//"_expansion", handle3) 393 ! calculate the integrals (ia|jb) strating from my local data ... 394 local_ab = 0.0_dp 395 IF ((my_alpha_beta_case) .AND. (calc_forces)) THEN 396 local_ba = 0.0_dp 397 ENDIF 398 t_start = m_walltime() 399 CALL dgemm('T', 'N', my_B_size, my_B_size_beta, dimen_RI, 1.0_dp, & 400 local_i_aL(:, :, iiB), dimen_RI, local_j_aL(:, :, jjB), dimen_RI, & 401 0.0_dp, local_ab(my_B_virtual_start:my_B_virtual_end, 1:my_B_size_beta), my_B_size) 402 t_end = m_walltime() 403 actual_flop_rate = 2.0_dp*my_B_size*my_B_size_beta*REAL(dimen_RI, KIND=dp)/(MAX(0.01_dp, t_end - t_start)) 404 my_flop_rate = my_flop_rate + actual_flop_rate 405 my_num_dgemm_call = my_num_dgemm_call + 1 406 ! Additional integrals only for alpha_beta case and forces 407 IF ((my_alpha_beta_case) .AND. (calc_forces)) THEN 408 t_start = m_walltime() 409 CALL dgemm('T', 'N', my_B_size_beta, my_B_size, dimen_RI, 1.0_dp, & 410 local_j_aL(:, :, iiB), dimen_RI, local_i_aL(:, :, jjB), dimen_RI, & 411 0.0_dp, local_ba(my_B_virtual_start_beta:my_B_virtual_end_beta, 1:my_B_size), my_B_size_beta) 412 t_end = m_walltime() 413 actual_flop_rate = 2.0_dp*my_B_size*my_B_size_beta*REAL(dimen_RI, KIND=dp)/(MAX(0.01_dp, t_end - t_start)) 414 my_flop_rate = my_flop_rate + actual_flop_rate 415 my_num_dgemm_call = my_num_dgemm_call + 1 416 ENDIF 417 ! ... and from the other of my subgroup 418 DO proc_shift = 1, para_env_sub%num_pe - 1 419 proc_send = sub_proc_map(para_env_sub%mepos + proc_shift) 420 proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift) 421 422 CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size) 423 424 ALLOCATE (external_i_aL(dimen_RI, rec_B_size)) 425 external_i_aL = 0.0_dp 426 427 CALL mp_sendrecv(local_i_aL(:, :, iiB), proc_send, & 428 external_i_aL, proc_receive, & 429 para_env_sub%group) 430 431 t_start = m_walltime() 432 CALL dgemm('T', 'N', rec_B_size, my_B_size_beta, dimen_RI, 1.0_dp, & 433 external_i_aL, dimen_RI, local_j_aL(:, :, jjB), dimen_RI, & 434 0.0_dp, local_ab(rec_B_virtual_start:rec_B_virtual_end, 1:my_B_size_beta), rec_B_size) 435 436 t_end = m_walltime() 437 actual_flop_rate = 2.0_dp*rec_B_size*my_B_size_beta*REAL(dimen_RI, KIND=dp)/(MAX(0.01_dp, t_end - t_start)) 438 my_flop_rate = my_flop_rate + actual_flop_rate 439 my_num_dgemm_call = my_num_dgemm_call + 1 440 441 DEALLOCATE (external_i_aL) 442 ! Additional integrals only for alpha_beta case and forces 443 IF ((my_alpha_beta_case) .AND. (calc_forces)) THEN 444 445 CALL get_group_dist(gd_B_virtual_beta, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size) 446 447 ALLOCATE (external_i_aL(dimen_RI, rec_B_size)) 448 external_i_aL = 0.0_dp 449 450 CALL mp_sendrecv(local_j_aL(:, :, jjB), proc_send, & 451 external_i_aL, proc_receive, & 452 para_env_sub%group) 453 454 t_start = m_walltime() 455 CALL dgemm('T', 'N', rec_B_size, my_B_size, dimen_RI, 1.0_dp, & 456 external_i_aL, dimen_RI, local_i_aL(:, :, iiB), dimen_RI, & 457 0.0_dp, local_ba(rec_B_virtual_start:rec_B_virtual_end, 1:my_B_size), rec_B_size) 458 t_end = m_walltime() 459 actual_flop_rate = 2.0_dp*rec_B_size*my_B_size*REAL(dimen_RI, KIND=dp)/(MAX(0.01_dp, t_end - t_start)) 460 my_flop_rate = my_flop_rate + actual_flop_rate 461 my_num_dgemm_call = my_num_dgemm_call + 1 462 463 DEALLOCATE (external_i_aL) 464 ENDIF 465 466 END DO 467 CALL timestop(handle3) 468 469 !sample peak memory 470 CALL m_memory() 471 472 CALL timeset(routineN//"_ener", handle3) 473 ! calculate coulomb only MP2 474 sym_fac = 2.0_dp 475 IF (my_i == my_j) sym_fac = 1.0_dp 476 IF (.NOT. my_alpha_beta_case) THEN 477 DO b = 1, my_B_size 478 b_global = b + my_B_virtual_start - 1 479 DO a = 1, virtual 480 Emp2_Cou = Emp2_Cou - sym_fac*2.0_dp*local_ab(a, b)**2/ & 481 (Eigenval(homo + a) + Eigenval(homo + b_global) - Eigenval(my_i + iiB - 1) - Eigenval(my_j + jjB - 1)) 482 END DO 483 END DO 484 ELSE 485 DO b = 1, my_B_size_beta 486 b_global = b + my_B_virtual_start_beta - 1 487 DO a = 1, virtual 488 Emp2_Cou = Emp2_Cou - local_ab(a, b)**2/ & 489 (Eigenval(homo + a) + Eigenval_beta(homo_beta + b_global) - & 490 Eigenval(my_i + iiB - 1) - Eigenval_beta(my_j + jjB - 1)) 491 END DO 492 END DO 493 END IF 494 495 IF (calc_ex) THEN 496 ! contract integrals with orbital energies for exchange MP2 energy 497 ! starting with local ... 498 ! IF(my_open_shell_SS) sym_fac=sym_fac*2.0_dp 499 IF (calc_forces .AND. (.NOT. my_alpha_beta_case)) t_ab = 0.0_dp 500 DO b = 1, my_B_size 501 b_global = b + my_B_virtual_start - 1 502 DO a = 1, my_B_size 503 a_global = a + my_B_virtual_start - 1 504 Emp2_Ex = Emp2_Ex + sym_fac*local_ab(a_global, b)*local_ab(b_global, a)/ & 505 (Eigenval(homo + a_global) + Eigenval(homo + b_global) - Eigenval(my_i + iiB - 1) - Eigenval(my_j + jjB - 1)) 506 IF (calc_forces .AND. (.NOT. my_alpha_beta_case)) & 507 t_ab(a_global, b) = -(amp_fac*local_ab(a_global, b) - local_ab(b_global, a))/ & 508 (Eigenval(homo + a_global) + Eigenval(homo + b_global) - & 509 Eigenval(my_i + iiB - 1) - Eigenval(my_j + jjB - 1)) 510 END DO 511 END DO 512 ! ... and then with external data 513 DO proc_shift = 1, para_env_sub%num_pe - 1 514 proc_send = sub_proc_map(para_env_sub%mepos + proc_shift) 515 proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift) 516 517 CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size) 518 519 CALL get_group_dist(gd_B_virtual, proc_send, send_B_virtual_start, send_B_virtual_end, send_B_size) 520 521 ALLOCATE (external_ab(my_B_size, rec_B_size)) 522 external_ab = 0.0_dp 523 524 CALL mp_sendrecv(local_ab(send_B_virtual_start:send_B_virtual_end, 1:my_B_size), proc_send, & 525 external_ab(1:my_B_size, 1:rec_B_size), proc_receive, & 526 para_env_sub%group) 527 528 DO b = 1, my_B_size 529 b_global = b + my_B_virtual_start - 1 530 DO a = 1, rec_B_size 531 a_global = a + rec_B_virtual_start - 1 532 Emp2_Ex = Emp2_Ex + sym_fac*local_ab(a_global, b)*external_ab(b, a)/ & 533 (Eigenval(homo + a_global) + Eigenval(homo + b_global) - Eigenval(my_i + iiB - 1) - Eigenval(my_j + jjB - 1)) 534 IF (calc_forces .AND. (.NOT. my_alpha_beta_case)) & 535 t_ab(a_global, b) = -(amp_fac*local_ab(a_global, b) - external_ab(b, a))/ & 536 (Eigenval(homo + a_global) + Eigenval(homo + b_global) - & 537 Eigenval(my_i + iiB - 1) - Eigenval(my_j + jjB - 1)) 538 END DO 539 END DO 540 541 DEALLOCATE (external_ab) 542 END DO 543 END IF 544 CALL timestop(handle3) 545 546 IF (calc_forces) THEN 547 ! update P_ab, Gamma_P_ia 548 IF (.NOT. my_alpha_beta_case) THEN 549 Y_i_aP = 0.0_dp 550 Y_j_aP = 0.0_dp 551 CALL mp2_update_P_gamma(mp2_env, para_env_sub, gd_B_virtual, & 552 Eigenval, homo, dimen_RI, iiB, jjB, my_B_size, & 553 my_B_virtual_end, my_B_virtual_start, my_i, my_j, virtual, & 554 sub_proc_map, local_ab, t_ab, local_i_aL, local_j_aL, & 555 my_open_shell_ss, my_alpha_alpha_case, my_beta_beta_case, Y_i_aP, Y_j_aP) 556 ELSE 557 Y_i_aP = 0.0_dp 558 Y_j_aP_beta = 0.0_dp 559 CALL mp2_update_P_gamma(mp2_env, para_env_sub, gd_B_virtual, & 560 Eigenval, homo, dimen_RI, iiB, jjB, my_B_size, & 561 my_B_virtual_end, my_B_virtual_start, my_i, my_j, virtual, sub_proc_map, & 562 local_ab, t_ab, local_i_aL, local_j_aL, my_open_shell_ss, my_alpha_alpha_case, & 563 my_beta_beta_case, Y_i_aP, Y_j_aP_beta, Eigenval_beta, homo_beta, my_B_size_beta, & 564 gd_B_virtual_beta, & 565 my_B_virtual_start_beta, my_B_virtual_end_beta, virtual_beta, local_ba) 566 ENDIF 567 568 END IF 569 570 END DO ! jjB 571 END DO ! iiB 572 573 ELSE 574 ! No work to do and we know that we have to receive nothing, but send something 575 ! send data to other proc 576 DO proc_shift = 1, para_env_exchange%num_pe - 1 577 proc_send = proc_map(para_env_exchange%mepos + proc_shift) 578 proc_receive = proc_map(para_env_exchange%mepos - proc_shift) 579 580 send_ij_index = num_ij_pairs(proc_send) 581 582 IF (ij_index <= send_ij_index) THEN 583 ! something to send 584 ij_counter_send = (ij_index - MIN(1, integ_group_pos2color_sub(proc_send)))*ngroup + & 585 integ_group_pos2color_sub(proc_send) 586 send_i = ij_map(ij_counter_send, 1) 587 send_j = ij_map(ij_counter_send, 2) 588 send_block_size = ij_map(ij_counter_send, 3) 589 590 ! occupied i 591 CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:my_B_size, send_i:send_i + send_block_size - 1), proc_send, & 592 null_mat_rec, proc_receive, & 593 para_env_exchange%group) 594 ! occupied j 595 IF (.NOT. my_alpha_beta_case) THEN 596 CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:my_B_size, send_j:send_j + send_block_size - 1), proc_send, & 597 null_mat_rec, proc_receive, & 598 para_env_exchange%group) 599 ELSE 600 CALL mp_sendrecv(BIb_C_beta(1:my_group_L_size, 1:my_B_size_beta, send_j:send_j + send_block_size - 1), proc_send, & 601 null_mat_rec, proc_receive, & 602 para_env_exchange%group) 603 END IF 604 605 ELSE 606 ! nothing to send 607 ! occupied i 608 CALL mp_sendrecv(null_mat_send, proc_send, & 609 null_mat_rec, proc_receive, & 610 para_env_exchange%group) 611 ! occupied j 612 CALL mp_sendrecv(null_mat_send, proc_send, & 613 null_mat_rec, proc_receive, & 614 para_env_exchange%group) 615 616 END IF 617 END DO 618 END IF 619 620 ! redistribute gamma 621 IF (calc_forces) THEN 622 ! Closed shell, alpha-alpha or beta-beta case 623 IF ((.NOT. my_alpha_beta_case)) THEN 624 CALL mp2_redistribute_gamma(mp2_env, ij_index, my_B_size, & 625 my_block_size, my_group_L_size, my_i, my_ij_pairs, my_j, ngroup, & 626 num_integ_group, integ_group_pos2color_sub, num_ij_pairs, proc_map, & 627 ij_map, ranges_info_array, Y_i_aP, Y_j_aP, para_env_exchange, & 628 null_mat_rec, null_mat_send, gd_array%sizes, my_alpha_alpha_case, & 629 my_beta_beta_case, my_alpha_beta_case, my_open_shell_ss) 630 ELSE 631 ! Alpha-beta case 632 CALL mp2_redistribute_gamma(mp2_env, ij_index, my_B_size, & 633 my_block_size, my_group_L_size, my_i, my_ij_pairs, my_j, ngroup, & 634 num_integ_group, integ_group_pos2color_sub, num_ij_pairs, proc_map, & 635 ij_map, ranges_info_array, Y_i_aP, Y_j_aP_beta, para_env_exchange, & 636 null_mat_rec, null_mat_send, gd_array%sizes, my_alpha_alpha_case, my_beta_beta_case, & 637 my_alpha_beta_case, my_open_shell_ss, my_B_size_beta) 638 ENDIF 639 END IF 640 641 END DO 642 CALL timestop(handle2) 643 644 DEALLOCATE (local_i_aL) 645 DEALLOCATE (local_j_aL) 646 DEALLOCATE (ij_map) 647 DEALLOCATE (num_ij_pairs) 648 649 IF (calc_forces) THEN 650 DEALLOCATE (Y_i_aP) 651 IF (.NOT. my_alpha_beta_case) THEN 652 DEALLOCATE (Y_j_aP) 653 ELSE 654 DEALLOCATE (Y_j_aP_beta) 655 ENDIF 656 IF (ALLOCATED(t_ab)) THEN 657 DEALLOCATE (t_ab) 658 ENDIF 659 ! Deallocate additional integrals: alpha_beta case with forces 660 IF (ALLOCATED(local_ba)) THEN 661 DEALLOCATE (local_ba) 662 ENDIF 663 664 ! here we check if there are almost degenerate ij 665 ! pairs and we update P_ij with these contribution. 666 ! If all pairs are degenerate with each other this step will scale O(N^6), 667 ! if the number of degenerate pairs scales linearly with the system size 668 ! this step will scale O(N^5). 669 ! Start counting the number of almost degenerate ij pairs according 670 ! to eps_canonical 671 IF (.NOT. my_alpha_beta_case) THEN 672 CALL quasi_degenerate_P_ij( & 673 mp2_env, Eigenval, homo, virtual, my_open_shell_ss, & 674 my_beta_beta_case, my_alpha_beta_case, Bib_C, unit_nr, dimen_RI, & 675 my_B_size, ngroup, num_integ_group, my_group_L_size, & 676 color_sub, ranges_info_array, para_env_exchange, para_env_sub, proc_map, & 677 my_B_virtual_start, my_B_virtual_end, gd_array%sizes, gd_B_virtual, & 678 sub_proc_map, integ_group_pos2color_sub, local_ab) 679 ELSE 680 CALL quasi_degenerate_P_ij( & 681 mp2_env, Eigenval, homo, virtual, my_open_shell_ss, & 682 my_beta_beta_case, my_alpha_beta_case, Bib_C, unit_nr, dimen_RI, & 683 my_B_size, ngroup, num_integ_group, my_group_L_size, & 684 color_sub, ranges_info_array, para_env_exchange, para_env_sub, proc_map, & 685 my_B_virtual_start, my_B_virtual_end, gd_array%sizes, gd_B_virtual, & 686 sub_proc_map, integ_group_pos2color_sub, local_ab, BIb_C_beta, my_B_size_beta, & 687 gd_B_virtual_beta, my_B_virtual_start_beta, & 688 virtual_beta, homo_beta, Eigenval_beta, my_B_virtual_end_beta) 689 ENDIF 690 691 END IF 692 693 DEALLOCATE (integ_group_pos2color_sub) 694 DEALLOCATE (local_ab) 695 696 CALL mp_sum(Emp2_Cou, para_env%group) 697 CALL mp_sum(Emp2_Ex, para_env%group) 698 699 IF (calc_forces) THEN 700 ! sum P_ab 701 IF (.NOT. my_open_shell_ss) THEN 702 mp2_env%ri_grad%P_ab(:, :) = mp2_env%ri_grad%P_ab(:, :)*amp_fac 703 IF (my_alpha_beta_case) mp2_env%ri_grad%P_ab_beta(:, :) = & 704 mp2_env%ri_grad%P_ab_beta(:, :)*amp_fac 705 sub_P_color = para_env_sub%mepos 706 CALL mp_comm_split_direct(para_env%group, comm_P, sub_P_color) 707 NULLIFY (para_env_P) 708 CALL cp_para_env_create(para_env_P, comm_P) 709 CALL mp_sum(mp2_env%ri_grad%P_ab, para_env_P%group) 710 IF (my_alpha_beta_case) CALL mp_sum(mp2_env%ri_grad%P_ab_beta, para_env_P%group) 711 ! release para_env_P 712 CALL cp_para_env_release(para_env_P) 713 ENDIF 714 715 ! recover original information (before replication) 716 DEALLOCATE (gd_array%sizes) 717 iiB = SIZE(sizes_array_orig) 718 ALLOCATE (gd_array%sizes(0:iiB - 1)) 719 gd_array%sizes(:) = sizes_array_orig 720 DEALLOCATE (sizes_array_orig) 721 722 ! make a copy of the original integrals (ia|Q) 723 my_group_L_size = my_group_L_size_orig 724 ALLOCATE (B_ia_Q(homo, my_B_size, my_group_L_size)) 725 B_ia_Q = 0.0_dp 726 DO jjB = 1, homo 727 DO iiB = 1, my_B_size 728 B_ia_Q(jjB, iiB, 1:my_group_L_size) = BIb_C(1:my_group_L_size, iiB, jjB) 729 END DO 730 END DO 731 DEALLOCATE (BIb_C) 732 IF (my_alpha_beta_case) THEN 733 ALLOCATE (B_ia_Q_beta(homo_beta, my_B_size_beta, my_group_L_size)) 734 B_ia_Q_beta = 0.0_dp 735 DO jjB = 1, homo_beta 736 DO iiB = 1, my_B_size_beta 737 B_ia_Q_beta(jjB, iiB, 1:my_group_L_size) = & 738 BIb_C_beta(1:my_group_L_size, iiB, jjB) 739 END DO 740 END DO 741 DEALLOCATE (BIb_C_beta) 742 ENDIF 743 744 ! sum Gamma and dereplicate 745 ALLOCATE (BIb_C(homo, my_B_size, my_group_L_size)) 746 IF (my_alpha_beta_case) ALLOCATE (BIb_C_beta(homo_beta, my_B_size_beta, my_group_L_size)) 747 DO proc_shift = 1, para_env_rep%num_pe - 1 748 ! invert order 749 proc_send = proc_map_rep(para_env_rep%mepos - proc_shift) 750 proc_receive = proc_map_rep(para_env_rep%mepos + proc_shift) 751 752 start_point = ranges_info_array(3, proc_shift, para_env_exchange%mepos) 753 end_point = ranges_info_array(4, proc_shift, para_env_exchange%mepos) 754 755 BIb_C = 0.0_dp 756 ! Closed shell, alpha-alpha, and alpha-alpha part in alpha-beta case 757 IF (my_alpha_alpha_case .OR. (.NOT. my_open_shell_ss)) THEN 758 CALL mp_sendrecv(mp2_env%ri_grad%Gamma_P_ia(1:homo, 1:my_B_size, start_point:end_point), & 759 proc_send, BIb_C, proc_receive, para_env_rep%group) 760 mp2_env%ri_grad%Gamma_P_ia(1:homo, 1:my_B_size, 1:my_group_L_size) = & 761 mp2_env%ri_grad%Gamma_P_ia(1:homo, 1:my_B_size, 1:my_group_L_size) + BIb_C 762 ENDIF 763 ! Beta-beta 764 IF (my_beta_beta_case) THEN 765 CALL mp_sendrecv(mp2_env%ri_grad%Gamma_P_ia_beta(1:homo, 1:my_B_size, start_point:end_point), & 766 proc_send, BIb_C, proc_receive, para_env_rep%group) 767 mp2_env%ri_grad%Gamma_P_ia_beta(1:homo, 1:my_B_size, 1:my_group_L_size) = & 768 mp2_env%ri_grad%Gamma_P_ia_beta(1:homo, 1:my_B_size, 1:my_group_L_size) + BIb_C 769 ENDIF 770 IF (my_alpha_beta_case) THEN ! Beta-beta part of alpha-beta case 771 CALL mp_sendrecv(mp2_env%ri_grad%Gamma_P_ia_beta(1:homo_beta, 1:my_B_size_beta, start_point:end_point), & 772 proc_send, BIb_C_beta, proc_receive, para_env_rep%group) 773 mp2_env%ri_grad%Gamma_P_ia_beta(1:homo_beta, 1:my_B_size_beta, 1:my_group_L_size) = & 774 mp2_env%ri_grad%Gamma_P_ia_beta(1:homo_beta, 1:my_B_size_beta, 1:my_group_L_size) + BIb_C_beta 775 ENDIF 776 777 END DO 778 IF (.NOT. my_open_shell_ss) THEN 779 BIb_C(:, :, :) = mp2_env%ri_grad%Gamma_P_ia(1:homo, 1:my_B_size, 1:my_group_L_size) 780 DEALLOCATE (mp2_env%ri_grad%Gamma_P_ia) 781 ALLOCATE (mp2_env%ri_grad%Gamma_P_ia(homo, my_B_size, my_group_L_size)) 782 mp2_env%ri_grad%Gamma_P_ia(:, :, :) = BIb_C 783 DEALLOCATE (BIb_C) 784 IF (my_alpha_beta_case) THEN 785 BIb_C_beta(:, :, :) = mp2_env%ri_grad%Gamma_P_ia_beta(1:homo_beta, 1:my_B_size_beta, 1:my_group_L_size) 786 DEALLOCATE (mp2_env%ri_grad%Gamma_P_ia_beta) 787 ALLOCATE (mp2_env%ri_grad%Gamma_P_ia_beta(homo_beta, my_B_size_beta, my_group_L_size)) 788 mp2_env%ri_grad%Gamma_P_ia_beta(:, :, :) = BIb_C_beta 789 DEALLOCATE (BIb_C_beta) 790 ENDIF 791 ENDIF 792 ! For open shell systems, we need to pass Bib_C through the subroutine in alpha-alpha and beta-beta case. 793 ! Only for forces, as IF suggests! Here we deallocate it, but restore after complete_gamma 794 IF (my_open_shell_ss) THEN 795 DEALLOCATE (BIb_C) 796 ENDIF 797 798 IF (.NOT. my_open_shell_ss) THEN 799 CALL complete_gamma(mp2_env, B_ia_Q, dimen_RI, homo, virtual, para_env, para_env_sub, ngroup, & 800 my_group_L_size, my_group_L_start, my_group_L_end, & 801 my_B_size, my_B_virtual_start, & 802 gd_array, gd_B_virtual, & 803 sub_proc_map, .TRUE.) 804 IF (my_alpha_beta_case) THEN 805 CALL complete_gamma(mp2_env, B_ia_Q_beta, dimen_RI, homo_beta, virtual_beta, para_env, para_env_sub, & 806 ngroup, my_group_L_size, my_group_L_start, my_group_L_end, & 807 my_B_size_beta, my_B_virtual_start_beta, & 808 gd_array, gd_B_virtual_beta, sub_proc_map, .FALSE.) 809 ENDIF 810 ENDIF 811 ! Here we restore BIb_C 812 IF (my_open_shell_ss) THEN 813 ALLOCATE (BIb_C(my_group_L_size, my_B_size, homo)) 814 BIb_C = 0.0_dp 815 ! copy the integrals (ia|Q) back 816 DO jjB = 1, homo 817 DO iiB = 1, my_B_size 818 BIb_C(1:my_group_L_size, iiB, jjB) = & 819 B_ia_Q(jjB, iiB, 1:my_group_L_size) 820 END DO 821 END DO 822 ENDIF 823 824 END IF 825 826 Emp2 = Emp2_Cou + Emp2_EX 827 828 DEALLOCATE (proc_map) 829 DEALLOCATE (sub_proc_map) 830 DEALLOCATE (proc_map_rep) 831 DEALLOCATE (ranges_info_array) 832 833 IF (.NOT. my_open_shell_SS) THEN 834 ! keep the array for the next calculations 835 IF (ALLOCATED(BIb_C)) DEALLOCATE (BIb_C) 836 CALL release_group_dist(gd_array) 837 CALL release_group_dist(gd_B_virtual) 838 IF (my_alpha_beta_case) THEN 839 CALL release_group_dist(gd_B_virtual_beta) 840 END IF 841 END IF 842 843 CALL cp_para_env_release(para_env_exchange) 844 CALL cp_para_env_release(para_env_rep) 845 846 my_flop_rate = my_flop_rate/REAL(MAX(my_num_dgemm_call, 1), KIND=dp)/1.0E9_dp 847 CALL mp_sum(my_flop_rate, para_env%group) 848 my_flop_rate = my_flop_rate/para_env%num_pe 849 IF (unit_nr > 0) WRITE (UNIT=unit_nr, FMT="(T3,A,T66,F15.2)") & 850 "PERFORMANCE| DGEMM flop rate (Gflops / MPI rank):", my_flop_rate 851 852 CALL timestop(handle) 853 854 END SUBROUTINE mp2_ri_gpw_compute_en 855 856! ************************************************************************************************** 857!> \brief ... 858!> \param BIb_C ... 859!> \param para_env ... 860!> \param para_env_sub ... 861!> \param para_env_exchange ... 862!> \param para_env_rep ... 863!> \param homo ... 864!> \param proc_map_rep ... 865!> \param sizes_array ... 866!> \param my_B_size ... 867!> \param my_group_L_size ... 868!> \param my_group_L_start ... 869!> \param my_group_L_end ... 870!> \param my_new_group_L_size ... 871!> \param new_sizes_array ... 872!> \param ranges_info_array ... 873! ************************************************************************************************** 874 SUBROUTINE replicate_iaK_2intgroup(BIb_C, para_env, para_env_sub, para_env_exchange, para_env_rep, & 875 homo, proc_map_rep, & 876 sizes_array, & 877 my_B_size, & 878 my_group_L_size, my_group_L_start, my_group_L_end, & 879 my_new_group_L_size, new_sizes_array, ranges_info_array) 880 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), & 881 INTENT(INOUT) :: BIb_C 882 TYPE(cp_para_env_type), POINTER :: para_env, para_env_sub, & 883 para_env_exchange, para_env_rep 884 INTEGER, INTENT(IN) :: homo 885 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT) :: proc_map_rep 886 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(IN) :: sizes_array 887 INTEGER, INTENT(IN) :: my_B_size, my_group_L_size, & 888 my_group_L_start, my_group_L_end 889 INTEGER, INTENT(INOUT) :: my_new_group_L_size 890 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT) :: new_sizes_array 891 INTEGER, ALLOCATABLE, DIMENSION(:, :, :), & 892 INTENT(OUT) :: ranges_info_array 893 894 CHARACTER(LEN=*), PARAMETER :: routineN = 'replicate_iaK_2intgroup', & 895 routineP = moduleN//':'//routineN 896 897 INTEGER :: comm_rep, end_point, handle, i, & 898 max_L_size, proc_receive, proc_send, & 899 proc_shift, start_point, sub_sub_color 900 INTEGER, ALLOCATABLE, DIMENSION(:) :: rep_ends_array, rep_sizes_array, & 901 rep_starts_array 902 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: BIb_C_copy 903 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :) :: BIb_C_gather 904 905 CALL timeset(routineN, handle) 906 907 ! create the replication group 908 sub_sub_color = para_env_sub%mepos*para_env_exchange%num_pe + para_env_exchange%mepos 909 CALL mp_comm_split_direct(para_env%group, comm_rep, sub_sub_color) 910 NULLIFY (para_env_rep) 911 CALL cp_para_env_create(para_env_rep, comm_rep) 912 913 ! crate the proc maps 914 ALLOCATE (proc_map_rep(-para_env_rep%num_pe:2*para_env_rep%num_pe - 1)) 915 DO i = 0, para_env_rep%num_pe - 1 916 proc_map_rep(i) = i 917 proc_map_rep(-i - 1) = para_env_rep%num_pe - i - 1 918 proc_map_rep(para_env_rep%num_pe + i) = i 919 END DO 920 921 ! create the new limits for K according to the size 922 ! of the integral group 923 ALLOCATE (new_sizes_array(0:para_env_exchange%num_pe - 1)) 924 new_sizes_array = 0 925 ALLOCATE (ranges_info_array(4, 0:para_env_rep%num_pe - 1, 0:para_env_exchange%num_pe - 1)) 926 ranges_info_array = 0 927 928 ! info array for replication 929 ALLOCATE (rep_ends_array(0:para_env_rep%num_pe - 1)) 930 rep_ends_array = 0 931 ALLOCATE (rep_starts_array(0:para_env_rep%num_pe - 1)) 932 rep_starts_array = 0 933 ALLOCATE (rep_sizes_array(0:para_env_rep%num_pe - 1)) 934 rep_sizes_array = 0 935 936 rep_sizes_array(para_env_rep%mepos) = my_group_L_size 937 rep_starts_array(para_env_rep%mepos) = my_group_L_start 938 rep_ends_array(para_env_rep%mepos) = my_group_L_end 939 940 CALL mp_sum(rep_sizes_array, para_env_rep%group) 941 CALL mp_sum(rep_starts_array, para_env_rep%group) 942 CALL mp_sum(rep_ends_array, para_env_rep%group) 943 944 ! calculate my_new_group_L_size according to sizes_array 945 my_new_group_L_size = my_group_L_size 946 ranges_info_array(1, 0, para_env_exchange%mepos) = my_group_L_start 947 ranges_info_array(2, 0, para_env_exchange%mepos) = my_group_L_end 948 ranges_info_array(3, 0, para_env_exchange%mepos) = 1 949 ranges_info_array(4, 0, para_env_exchange%mepos) = my_group_L_size 950 951 DO proc_shift = 1, para_env_rep%num_pe - 1 952 proc_send = proc_map_rep(para_env_rep%mepos + proc_shift) 953 proc_receive = proc_map_rep(para_env_rep%mepos - proc_shift) 954 955 my_new_group_L_size = my_new_group_L_size + rep_sizes_array(proc_receive) 956 957 ranges_info_array(1, proc_shift, para_env_exchange%mepos) = rep_starts_array(proc_receive) 958 ranges_info_array(2, proc_shift, para_env_exchange%mepos) = rep_ends_array(proc_receive) 959 ranges_info_array(3, proc_shift, para_env_exchange%mepos) = ranges_info_array(4, proc_shift - 1, para_env_exchange%mepos) + 1 960 ranges_info_array(4, proc_shift, para_env_exchange%mepos) = my_new_group_L_size 961 962 END DO 963 new_sizes_array(para_env_exchange%mepos) = my_new_group_L_size 964 965 CALL mp_sum(new_sizes_array, para_env_exchange%group) 966 CALL mp_sum(ranges_info_array, para_env_exchange%group) 967 968 ! replication scheme using mp_allgather 969 ! get the max L size of the 970 max_L_size = MAXVAL(sizes_array) 971 972 ALLOCATE (BIb_C_copy(max_L_size, my_B_size, homo)) 973 BIb_C_copy = 0.0_dp 974 BIb_C_copy(1:my_group_L_size, 1:my_B_size, 1:homo) = BIb_C 975 976 DEALLOCATE (BIb_C) 977 978 ALLOCATE (BIb_C_gather(max_L_size, my_B_size, homo, 0:para_env_rep%num_pe - 1)) 979 BIb_C_gather = 0.0_dp 980 981 CALL mp_allgather(BIb_C_copy, BIb_C_gather, para_env_rep%group) 982 983 DEALLOCATE (BIb_C_copy) 984 985 ALLOCATE (BIb_C(my_new_group_L_size, my_B_size, homo)) 986 BIb_C = 0.0_dp 987 988 ! reorder data 989 DO proc_shift = 0, para_env_rep%num_pe - 1 990 proc_send = proc_map_rep(para_env_rep%mepos + proc_shift) 991 proc_receive = proc_map_rep(para_env_rep%mepos - proc_shift) 992 993 start_point = ranges_info_array(3, proc_shift, para_env_exchange%mepos) 994 end_point = ranges_info_array(4, proc_shift, para_env_exchange%mepos) 995 996 BIb_C(start_point:end_point, 1:my_B_size, 1:homo) = & 997 BIb_C_gather(1:end_point - start_point + 1, 1:my_B_size, 1:homo, proc_receive) 998 999 END DO 1000 1001 DEALLOCATE (BIb_C_gather) 1002 DEALLOCATE (rep_sizes_array) 1003 DEALLOCATE (rep_starts_array) 1004 DEALLOCATE (rep_ends_array) 1005 1006 CALL timestop(handle) 1007 1008 END SUBROUTINE replicate_iaK_2intgroup 1009 1010! ************************************************************************************************** 1011!> \brief ... 1012!> \param local_ab ... 1013!> \param t_ab ... 1014!> \param mp2_env ... 1015!> \param homo ... 1016!> \param virtual ... 1017!> \param dimen_RI ... 1018!> \param my_B_size ... 1019!> \param block_size ... 1020!> \param my_B_size_beta ... 1021!> \param my_group_L_size ... 1022!> \param local_i_aL ... 1023!> \param local_j_aL ... 1024!> \param calc_forces ... 1025!> \param Y_i_aP ... 1026!> \param Y_j_aP ... 1027!> \param alpha_beta ... 1028!> \param beta_beta ... 1029!> \param local_ba ... 1030!> \param virtual_beta ... 1031! ************************************************************************************************** 1032 SUBROUTINE mp2_ri_allocate(local_ab, t_ab, mp2_env, homo, virtual, dimen_RI, my_B_size, & 1033 block_size, my_B_size_beta, my_group_L_size, & 1034 local_i_aL, local_j_aL, calc_forces, & 1035 Y_i_aP, Y_j_aP, alpha_beta, & 1036 beta_beta, local_ba, virtual_beta) 1037 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), & 1038 INTENT(OUT) :: local_ab, t_ab 1039 TYPE(mp2_type), POINTER :: mp2_env 1040 INTEGER, INTENT(IN) :: homo, virtual, dimen_RI, my_B_size, & 1041 block_size, my_B_size_beta, & 1042 my_group_L_size 1043 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), & 1044 INTENT(OUT) :: local_i_aL, local_j_aL 1045 LOGICAL, INTENT(IN) :: calc_forces 1046 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), & 1047 INTENT(OUT) :: Y_i_aP, Y_j_aP 1048 LOGICAL, INTENT(IN) :: alpha_beta, beta_beta 1049 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), & 1050 INTENT(OUT), OPTIONAL :: local_ba 1051 INTEGER, INTENT(IN), OPTIONAL :: virtual_beta 1052 1053 CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_ri_allocate', & 1054 routineP = moduleN//':'//routineN 1055 1056 INTEGER :: handle 1057 1058 CALL timeset(routineN, handle) 1059 1060 ALLOCATE (local_i_aL(dimen_RI, my_B_size, block_size)) 1061 ALLOCATE (local_j_aL(dimen_RI, my_B_size_beta, block_size)) 1062 ALLOCATE (local_ab(virtual, my_B_size_beta)) 1063 1064 IF (calc_forces) THEN 1065 ALLOCATE (Y_i_aP(my_B_size, dimen_RI, block_size)) 1066 Y_i_aP = 0.0_dp 1067 ! For closed-shell, alpha-alpha and beta-beta my_B_size_beta=my_b_size 1068 ! Not for alpha-beta case: Y_j_aP_beta is sent and received as Y_j_aP 1069 ALLOCATE (Y_j_aP(my_B_size_beta, dimen_RI, block_size)) 1070 Y_j_aP = 0.0_dp 1071 ! Closed shell or alpha-alpha case 1072 IF (.NOT. (beta_beta .OR. alpha_beta)) THEN 1073 ALLOCATE (mp2_env%ri_grad%P_ij(homo, homo)) 1074 ALLOCATE (mp2_env%ri_grad%P_ab(my_B_size, virtual)) 1075 mp2_env%ri_grad%P_ij = 0.0_dp 1076 mp2_env%ri_grad%P_ab = 0.0_dp 1077 ALLOCATE (mp2_env%ri_grad%Gamma_P_ia(homo, my_B_size, my_group_L_size)) 1078 mp2_env%ri_grad%Gamma_P_ia = 0.0_dp 1079 ELSE 1080 IF (beta_beta) THEN 1081 ALLOCATE (mp2_env%ri_grad%P_ij_beta(homo, homo)) 1082 ALLOCATE (mp2_env%ri_grad%P_ab_beta(my_B_size, virtual)) 1083 mp2_env%ri_grad%P_ij_beta = 0.0_dp 1084 mp2_env%ri_grad%P_ab_beta = 0.0_dp 1085 ALLOCATE (mp2_env%ri_grad%Gamma_P_ia_beta(homo, my_B_size_beta, my_group_L_size)) 1086 mp2_env%ri_grad%Gamma_P_ia_beta = 0.0_dp 1087 ENDIF 1088 ENDIF 1089 IF (.NOT. alpha_beta) THEN 1090 ! For non-alpha-beta case we need amplitudes 1091 ALLOCATE (t_ab(virtual, my_B_size_beta)) 1092 ELSE 1093 ! We need more integrals 1094 ALLOCATE (local_ba(virtual_beta, my_B_size)) 1095 ENDIF 1096 END IF 1097 ! 1098 1099 CALL timestop(handle) 1100 1101 END SUBROUTINE mp2_ri_allocate 1102 1103! ************************************************************************************************** 1104!> \brief ... 1105!> \param my_alpha_beta_case ... 1106!> \param total_ij_pairs ... 1107!> \param homo ... 1108!> \param homo_beta ... 1109!> \param num_IJ_blocks ... 1110!> \param block_size ... 1111!> \param ngroup ... 1112!> \param ij_map ... 1113!> \param color_sub ... 1114!> \param my_ij_pairs ... 1115!> \param my_open_shell_SS ... 1116!> \param unit_nr ... 1117! ************************************************************************************************** 1118 SUBROUTINE mp2_ri_communication(my_alpha_beta_case, total_ij_pairs, homo, homo_beta, num_IJ_blocks, & 1119 block_size, ngroup, ij_map, color_sub, my_ij_pairs, my_open_shell_SS, unit_nr) 1120 LOGICAL, INTENT(IN) :: my_alpha_beta_case 1121 INTEGER, INTENT(OUT) :: total_ij_pairs 1122 INTEGER, INTENT(IN) :: homo, homo_beta 1123 INTEGER, INTENT(OUT) :: num_IJ_blocks 1124 INTEGER, INTENT(IN) :: block_size, ngroup 1125 INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(OUT) :: ij_map 1126 INTEGER, INTENT(IN) :: color_sub 1127 INTEGER, INTENT(OUT) :: my_ij_pairs 1128 LOGICAL, INTENT(IN) :: my_open_shell_SS 1129 INTEGER, INTENT(IN) :: unit_nr 1130 1131 CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_ri_communication', & 1132 routineP = moduleN//':'//routineN 1133 1134 INTEGER :: assigned_blocks, first_I_block, first_J_block, handle, iiB, ij_block_counter, & 1135 ij_counter, jjB, last_i_block, last_J_block, num_block_per_group, total_ij_block, & 1136 total_ij_pairs_blocks 1137 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ij_marker 1138 1139! Calculate the maximum number of ij pairs that have to be computed 1140! among groups 1141 1142 CALL timeset(routineN, handle) 1143 1144 IF (.NOT. my_alpha_beta_case) THEN 1145 total_ij_pairs = homo*(1 + homo)/2 1146 num_IJ_blocks = homo/block_size - 1 1147 1148 first_I_block = 1 1149 last_i_block = block_size*(num_IJ_blocks - 1) 1150 1151 first_J_block = block_size + 1 1152 last_J_block = block_size*(num_IJ_blocks + 1) 1153 1154 ij_block_counter = 0 1155 DO iiB = first_I_block, last_i_block, block_size 1156 DO jjB = iiB + block_size, last_J_block, block_size 1157 ij_block_counter = ij_block_counter + 1 1158 END DO 1159 END DO 1160 1161 total_ij_block = ij_block_counter 1162 num_block_per_group = total_ij_block/ngroup 1163 assigned_blocks = num_block_per_group*ngroup 1164 1165 total_ij_pairs_blocks = assigned_blocks + (total_ij_pairs - assigned_blocks*(block_size**2)) 1166 1167 ALLOCATE (ij_marker(homo, homo)) 1168 ij_marker = 0 1169 ALLOCATE (ij_map(total_ij_pairs_blocks, 3)) 1170 ij_map = 0 1171 ij_counter = 0 1172 my_ij_pairs = 0 1173 DO iiB = first_I_block, last_i_block, block_size 1174 DO jjB = iiB + block_size, last_J_block, block_size 1175 IF (ij_counter + 1 > assigned_blocks) EXIT 1176 ij_counter = ij_counter + 1 1177 ij_marker(iiB:iiB + block_size - 1, jjB:jjB + block_size - 1) = 1 1178 ij_map(ij_counter, 1) = iiB 1179 ij_map(ij_counter, 2) = jjB 1180 ij_map(ij_counter, 3) = block_size 1181 IF (MOD(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1 1182 END DO 1183 END DO 1184 DO iiB = 1, homo 1185 DO jjB = iiB, homo 1186 IF (ij_marker(iiB, jjB) == 0) THEN 1187 ij_counter = ij_counter + 1 1188 ij_map(ij_counter, 1) = iiB 1189 ij_map(ij_counter, 2) = jjB 1190 ij_map(ij_counter, 3) = 1 1191 IF (MOD(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1 1192 END IF 1193 END DO 1194 END DO 1195 DEALLOCATE (ij_marker) 1196 1197 IF ((.NOT. my_open_shell_SS)) THEN 1198 IF (unit_nr > 0) THEN 1199 IF (block_size == 1) THEN 1200 WRITE (UNIT=unit_nr, FMT="(T3,A,T66,F15.1)") & 1201 "RI_INFO| Percentage of ij pairs communicated with block size 1:", 100.0_dp 1202 ELSE 1203 WRITE (UNIT=unit_nr, FMT="(T3,A,T66,F15.1)") & 1204 "RI_INFO| Percentage of ij pairs communicated with block size 1:", & 1205 100.0_dp*REAL((total_ij_pairs - assigned_blocks*(block_size**2)), KIND=dp)/REAL(total_ij_pairs, KIND=dp) 1206 END IF 1207 CALL m_flush(unit_nr) 1208 END IF 1209 END IF 1210 1211 ELSE 1212 ! alpha-beta case no index symmetry 1213 total_ij_pairs = homo*homo_beta 1214 ALLOCATE (ij_map(total_ij_pairs, 3)) 1215 ij_map = 0 1216 ij_counter = 0 1217 my_ij_pairs = 0 1218 DO iiB = 1, homo 1219 DO jjB = 1, homo_beta 1220 ij_counter = ij_counter + 1 1221 ij_map(ij_counter, 1) = iiB 1222 ij_map(ij_counter, 2) = jjB 1223 ij_map(ij_counter, 3) = 1 1224 IF (MOD(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1 1225 END DO 1226 END DO 1227 END IF 1228 1229 CALL timestop(handle) 1230 1231 END SUBROUTINE mp2_ri_communication 1232 1233! ************************************************************************************************** 1234!> \brief ... 1235!> \param BIb_C ... 1236!> \param para_env ... 1237!> \param para_env_sub ... 1238!> \param homo ... 1239!> \param color_sub ... 1240!> \param sizes_array ... 1241!> \param calc_forces ... 1242!> \param comm_exchange ... 1243!> \param integ_group_size ... 1244!> \param my_B_size ... 1245!> \param iiB ... 1246!> \param my_group_L_end ... 1247!> \param my_group_L_size ... 1248!> \param my_group_L_size_orig ... 1249!> \param my_group_L_start ... 1250!> \param my_new_group_L_size ... 1251!> \param sub_sub_color ... 1252!> \param integ_group_pos2color_sub ... 1253!> \param proc_map ... 1254!> \param proc_map_rep ... 1255!> \param sizes_array_orig ... 1256!> \param sub_proc_map ... 1257!> \param ranges_info_array ... 1258!> \param para_env_exchange ... 1259!> \param para_env_rep ... 1260!> \param num_integ_group ... 1261! ************************************************************************************************** 1262 SUBROUTINE mp2_ri_create_group(BIb_C, para_env, para_env_sub, homo, color_sub, & 1263 sizes_array, calc_forces, & 1264 comm_exchange, integ_group_size, my_B_size, iiB, my_group_L_end, & 1265 my_group_L_size, my_group_L_size_orig, my_group_L_start, my_new_group_L_size, & 1266 sub_sub_color, integ_group_pos2color_sub, & 1267 proc_map, proc_map_rep, sizes_array_orig, & 1268 sub_proc_map, ranges_info_array, para_env_exchange, para_env_rep, num_integ_group) 1269 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), & 1270 INTENT(INOUT) :: BIb_C 1271 TYPE(cp_para_env_type), POINTER :: para_env, para_env_sub 1272 INTEGER, INTENT(IN) :: homo, color_sub 1273 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(INOUT) :: sizes_array 1274 LOGICAL, INTENT(IN) :: calc_forces 1275 INTEGER, INTENT(OUT) :: comm_exchange 1276 INTEGER, INTENT(IN) :: integ_group_size, my_B_size 1277 INTEGER, INTENT(OUT) :: iiB 1278 INTEGER, INTENT(IN) :: my_group_L_end 1279 INTEGER, INTENT(INOUT) :: my_group_L_size 1280 INTEGER, INTENT(OUT) :: my_group_L_size_orig 1281 INTEGER, INTENT(IN) :: my_group_L_start 1282 INTEGER, INTENT(INOUT) :: my_new_group_L_size 1283 INTEGER, INTENT(OUT) :: sub_sub_color 1284 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT) :: integ_group_pos2color_sub, proc_map, & 1285 proc_map_rep, sizes_array_orig, & 1286 sub_proc_map 1287 INTEGER, ALLOCATABLE, DIMENSION(:, :, :), & 1288 INTENT(OUT) :: ranges_info_array 1289 TYPE(cp_para_env_type), POINTER :: para_env_exchange, para_env_rep 1290 INTEGER, INTENT(IN) :: num_integ_group 1291 1292 CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_ri_create_group', & 1293 routineP = moduleN//':'//routineN 1294 1295 INTEGER :: handle, i 1296 INTEGER, ALLOCATABLE, DIMENSION(:) :: new_sizes_array 1297 1298 CALL timeset(routineN, handle) 1299 ! 1300 sub_sub_color = para_env_sub%mepos*num_integ_group + color_sub/integ_group_size 1301 CALL mp_comm_split_direct(para_env%group, comm_exchange, sub_sub_color) 1302 NULLIFY (para_env_exchange) 1303 CALL cp_para_env_create(para_env_exchange, comm_exchange) 1304 1305 ! create the proc maps 1306 ALLOCATE (proc_map(-para_env_exchange%num_pe:2*para_env_exchange%num_pe - 1)) 1307 DO i = 0, para_env_exchange%num_pe - 1 1308 proc_map(i) = i 1309 proc_map(-i - 1) = para_env_exchange%num_pe - i - 1 1310 proc_map(para_env_exchange%num_pe + i) = i 1311 END DO 1312 1313 ALLOCATE (sub_proc_map(-para_env_sub%num_pe:2*para_env_sub%num_pe - 1)) 1314 DO i = 0, para_env_sub%num_pe - 1 1315 sub_proc_map(i) = i 1316 sub_proc_map(-i - 1) = para_env_sub%num_pe - i - 1 1317 sub_proc_map(para_env_sub%num_pe + i) = i 1318 END DO 1319 1320 CALL replicate_iaK_2intgroup(BIb_C, para_env, para_env_sub, para_env_exchange, para_env_rep, & 1321 homo, proc_map_rep, & 1322 sizes_array, & 1323 my_B_size, & 1324 my_group_L_size, my_group_L_start, my_group_L_end, & 1325 my_new_group_L_size, new_sizes_array, ranges_info_array) 1326 1327 ALLOCATE (integ_group_pos2color_sub(0:para_env_exchange%num_pe - 1)) 1328 integ_group_pos2color_sub = 0 1329 integ_group_pos2color_sub(para_env_exchange%mepos) = color_sub 1330 CALL mp_sum(integ_group_pos2color_sub, para_env_exchange%group) 1331 1332 IF (calc_forces) THEN 1333 iiB = SIZE(sizes_array) 1334 ALLOCATE (sizes_array_orig(0:iiB - 1)) 1335 sizes_array_orig(:) = sizes_array 1336 END IF 1337 1338 my_group_L_size_orig = my_group_L_size 1339 my_group_L_size = my_new_group_L_size 1340 DEALLOCATE (sizes_array) 1341 1342 ALLOCATE (sizes_array(0:integ_group_size - 1)) 1343 sizes_array(:) = new_sizes_array 1344 1345 DEALLOCATE (new_sizes_array) 1346 ! 1347 CALL timestop(handle) 1348 1349 END SUBROUTINE mp2_ri_create_group 1350 1351! ************************************************************************************************** 1352!> \brief ... 1353!> \param mp2_env ... 1354!> \param para_env ... 1355!> \param para_env_sub ... 1356!> \param gd_array ... 1357!> \param gd_B_virtual ... 1358!> \param homo ... 1359!> \param dimen_RI ... 1360!> \param unit_nr ... 1361!> \param color_sub ... 1362!> \param best_block_size ... 1363!> \param best_integ_group_size ... 1364!> \param block_size ... 1365!> \param integ_group_size ... 1366!> \param min_integ_group_size ... 1367!> \param my_B_size ... 1368!> \param my_B_virtual_end ... 1369!> \param my_B_virtual_start ... 1370!> \param my_group_L_size ... 1371!> \param my_group_L_start ... 1372!> \param my_group_L_end ... 1373!> \param ngroup ... 1374!> \param num_IJ_blocks ... 1375!> \param num_integ_group ... 1376!> \param pos_integ_group ... 1377!> \param virtual ... 1378!> \param my_alpha_beta_case ... 1379!> \param my_open_shell_SS ... 1380!> \param mem_for_aK ... 1381!> \param mem_for_comm ... 1382!> \param mem_for_iaK ... 1383!> \param mem_for_rep ... 1384!> \param mem_min ... 1385!> \param mem_per_group ... 1386!> \param mem_real ... 1387! ************************************************************************************************** 1388 SUBROUTINE mp2_ri_get_sizes(mp2_env, para_env, para_env_sub, gd_array, gd_B_virtual, & 1389 homo, dimen_RI, unit_nr, color_sub, & 1390 best_block_size, best_integ_group_size, block_size, & 1391 integ_group_size, min_integ_group_size, my_B_size, & 1392 my_B_virtual_end, my_B_virtual_start, my_group_L_size, & 1393 my_group_L_start, my_group_L_end, ngroup, num_IJ_blocks, num_integ_group, & 1394 pos_integ_group, virtual, my_alpha_beta_case, & 1395 my_open_shell_SS, mem_for_aK, mem_for_comm, & 1396 mem_for_iaK, mem_for_rep, mem_min, mem_per_group, mem_real) 1397 TYPE(mp2_type), POINTER :: mp2_env 1398 TYPE(cp_para_env_type), POINTER :: para_env, para_env_sub 1399 TYPE(group_dist_d1_type), INTENT(IN) :: gd_array, gd_B_virtual 1400 INTEGER, INTENT(IN) :: homo, dimen_RI, unit_nr, color_sub 1401 INTEGER, INTENT(OUT) :: best_block_size, best_integ_group_size, block_size, & 1402 integ_group_size, min_integ_group_size, my_B_size, my_B_virtual_end, my_B_virtual_start, & 1403 my_group_L_size, my_group_L_start, my_group_L_end, ngroup, num_IJ_blocks, & 1404 num_integ_group, pos_integ_group 1405 INTEGER, INTENT(IN) :: virtual 1406 LOGICAL, INTENT(IN) :: my_alpha_beta_case, my_open_shell_SS 1407 REAL(KIND=dp), INTENT(OUT) :: mem_for_aK, mem_for_comm, mem_for_iaK, & 1408 mem_for_rep, mem_min, mem_per_group, & 1409 mem_real 1410 1411 CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_ri_get_sizes', & 1412 routineP = moduleN//':'//routineN 1413 1414 INTEGER :: handle, iiB 1415 INTEGER(KIND=int_8) :: mem 1416 1417 CALL timeset(routineN, handle) 1418 1419 ngroup = para_env%num_pe/para_env_sub%num_pe 1420 1421 ! Calculate available memory and create integral group according to that 1422 ! mem_for_iaK is the memory needed for storing the 3 centre integrals 1423 mem_for_iaK = REAL(homo, KIND=dp)*virtual*dimen_RI*8.0_dp/(1024_dp**2) 1424 mem_for_aK = REAL(virtual, KIND=dp)*dimen_RI*8.0_dp/(1024_dp**2) 1425 1426 CALL m_memory(mem) 1427 mem_real = (mem + 1024*1024 - 1)/(1024*1024) 1428 ! mp_min .... a hack.. it should be mp_max, but as it turns out, on some processes the previously freed memory (hfx) 1429 ! has not been given back to the OS yet. 1430 CALL mp_min(mem_real, para_env%group) 1431 1432 mem_min = 2.0_dp*REAL(homo, KIND=dp)*maxsize(gd_B_virtual)*maxsize(gd_array)*8.0_dp/(1024**2) 1433 mem_min = mem_min + 3.0_dp*maxsize(gd_B_virtual)*REAL(dimen_RI, KIND=dp)*8.0_dp/(1024**2) 1434 1435 IF ((.NOT. my_open_shell_SS) .AND. (.NOT. my_alpha_beta_case)) THEN 1436 IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T68,F9.2,A4)') 'RI_INFO| Minimum required memory per MPI process:', & 1437 mem_min, ' MiB' 1438 END IF 1439 1440 mem_real = mp2_env%mp2_memory 1441 1442 mem_per_group = mem_real*para_env_sub%num_pe 1443 1444 ! here we try to find the best block_size and integ_group_size 1445 best_integ_group_size = ngroup 1446 best_block_size = 1 1447 1448 ! in the open shell case no replication and no block communication is done 1449 IF ((.NOT. my_open_shell_SS) .AND. (.NOT. my_alpha_beta_case)) THEN 1450 ! Here we split the memory half for the communication, half for replication 1451 IF (mp2_env%ri_mp2%block_size > 0) THEN 1452 best_block_size = mp2_env%ri_mp2%block_size 1453 mem_for_rep = MAX(mem_min, mem_per_group - 2.0_dp*mem_for_aK*best_block_size) 1454 ELSE 1455 mem_for_rep = mem_per_group/2.0_dp 1456 END IF 1457 ! calculate the minimum replication group size according to the available memory 1458 min_integ_group_size = CEILING(2.0_dp*mem_for_iaK/mem_for_rep) 1459 1460 integ_group_size = MIN(min_integ_group_size, ngroup) - 1 1461 DO iiB = min_integ_group_size + 1, ngroup 1462 integ_group_size = integ_group_size + 1 1463 ! check that the ngroup is a multiple of integ_group_size 1464 IF (MOD(ngroup, integ_group_size) /= 0) CYCLE 1465 ! check that the integ group size is not too small (10% is empirical for now) 1466 IF (REAL(integ_group_size, KIND=dp)/REAL(ngroup, KIND=dp) < 0.1_dp) CYCLE 1467 1468 best_integ_group_size = integ_group_size 1469 EXIT 1470 END DO 1471 1472 IF (.NOT. (mp2_env%ri_mp2%block_size > 0)) THEN 1473 mem_for_comm = mem_per_group - 2.0_dp*mem_for_iaK/best_integ_group_size 1474 DO 1475 num_IJ_blocks = (homo/best_block_size) 1476 num_IJ_blocks = (num_IJ_blocks*num_IJ_blocks - num_IJ_blocks)/2 1477 IF (num_IJ_blocks > ngroup .OR. best_block_size == 1) THEN 1478 EXIT 1479 ELSE 1480 best_block_size = best_block_size - 1 1481 END IF 1482 END DO 1483 END IF 1484 1485 ! check that best_block_size is not bigger than homo/2-1 1486 best_block_size = MIN(MAX(homo/2 - 1 + MOD(homo, 2), 1), best_block_size) 1487 END IF 1488 1489 integ_group_size = best_integ_group_size 1490 block_size = best_block_size 1491 1492 IF ((.NOT. my_open_shell_SS) .AND. (.NOT. my_alpha_beta_case)) THEN 1493 IF (unit_nr > 0) THEN 1494 WRITE (UNIT=unit_nr, FMT="(T3,A,T75,i6)") & 1495 "RI_INFO| Group size for integral replication:", integ_group_size*para_env_sub%num_pe 1496 WRITE (UNIT=unit_nr, FMT="(T3,A,T75,i6)") & 1497 "RI_INFO| Block size:", block_size 1498 CALL m_flush(unit_nr) 1499 END IF 1500 END IF 1501 1502 num_integ_group = ngroup/integ_group_size 1503 1504 pos_integ_group = MOD(color_sub, integ_group_size) 1505 1506 CALL get_group_dist(gd_array, color_sub, my_group_L_start, my_group_L_end, my_group_L_size) 1507 1508 CALL get_group_dist(gd_B_virtual, para_env_sub%mepos, my_B_virtual_start, my_B_virtual_end, my_B_size) 1509 1510 CALL timestop(handle) 1511 1512 END SUBROUTINE mp2_ri_get_sizes 1513 1514! ************************************************************************************************** 1515!> \brief ... 1516!> \param mp2_env ... 1517!> \param para_env_sub ... 1518!> \param gd_B_virtual ... 1519!> \param Eigenval ... 1520!> \param homo ... 1521!> \param dimen_RI ... 1522!> \param iiB ... 1523!> \param jjB ... 1524!> \param my_B_size ... 1525!> \param my_B_virtual_end ... 1526!> \param my_B_virtual_start ... 1527!> \param my_i ... 1528!> \param my_j ... 1529!> \param virtual ... 1530!> \param sub_proc_map ... 1531!> \param local_ab ... 1532!> \param t_ab ... 1533!> \param local_i_aL ... 1534!> \param local_j_aL ... 1535!> \param open_ss ... 1536!> \param alpha_alpha ... 1537!> \param beta_beta ... 1538!> \param Y_i_aP ... 1539!> \param Y_j_aP ... 1540!> \param eigenval_beta ... 1541!> \param homo_beta ... 1542!> \param my_B_size_beta ... 1543!> \param gd_B_virtual_beta ... 1544!> \param my_B_virtual_start_beta ... 1545!> \param my_B_virtual_end_beta ... 1546!> \param virtual_beta ... 1547!> \param local_ba ... 1548! ************************************************************************************************** 1549 SUBROUTINE mp2_update_P_gamma(mp2_env, para_env_sub, gd_B_virtual, & 1550 Eigenval, homo, dimen_RI, iiB, jjB, my_B_size, & 1551 my_B_virtual_end, my_B_virtual_start, my_i, my_j, virtual, sub_proc_map, local_ab, & 1552 t_ab, local_i_aL, local_j_aL, open_ss, alpha_alpha, beta_beta, Y_i_aP, Y_j_aP, & 1553 eigenval_beta, homo_beta, my_B_size_beta, gd_B_virtual_beta, & 1554 my_B_virtual_start_beta, my_B_virtual_end_beta, virtual_beta, local_ba) 1555 TYPE(mp2_type), POINTER :: mp2_env 1556 TYPE(cp_para_env_type), POINTER :: para_env_sub 1557 TYPE(group_dist_d1_type), INTENT(IN) :: gd_B_virtual 1558 REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: Eigenval 1559 INTEGER, INTENT(IN) :: homo, dimen_RI, iiB, jjB, my_B_size, & 1560 my_B_virtual_end, my_B_virtual_start, & 1561 my_i, my_j, virtual 1562 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(IN) :: sub_proc_map 1563 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), & 1564 INTENT(INOUT) :: local_ab 1565 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), & 1566 INTENT(IN) :: t_ab 1567 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), & 1568 INTENT(IN) :: local_i_aL, local_j_aL 1569 LOGICAL, INTENT(IN) :: open_ss, alpha_alpha, beta_beta 1570 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), & 1571 INTENT(INOUT) :: Y_i_aP, Y_j_aP 1572 REAL(KIND=dp), DIMENSION(:), INTENT(IN), OPTIONAL :: Eigenval_beta 1573 INTEGER, INTENT(IN), OPTIONAL :: homo_beta, my_B_size_beta 1574 TYPE(group_dist_d1_type), INTENT(IN), OPTIONAL :: gd_B_virtual_beta 1575 INTEGER, INTENT(IN), OPTIONAL :: my_B_virtual_start_beta, & 1576 my_B_virtual_end_beta, virtual_beta 1577 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), & 1578 INTENT(INOUT), OPTIONAL :: local_ba 1579 1580 CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_update_P_gamma', & 1581 routineP = moduleN//':'//routineN 1582 1583 INTEGER :: a, b, b_global, handle, proc_receive, proc_send, proc_shift, rec_B_size, & 1584 rec_B_virtual_end, rec_B_virtual_start, send_B_size, send_B_virtual_end, & 1585 send_B_virtual_start 1586 LOGICAL :: alpha_beta 1587 REAL(KIND=dp) :: factor, P_ij_diag 1588 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: external_ab, send_ab 1589 1590! 1591! In alpha-beta case Y_j_aP_beta is sent and is received as Y_j_aP 1592! 1593 1594 CALL timeset(routineN//"_Pia", handle) 1595! Find out whether we have an alpha-beta case 1596 alpha_beta = .FALSE. 1597 IF (PRESENT(Eigenval_beta) .AND. PRESENT(homo_beta) .AND. PRESENT(my_B_size_beta)) & 1598 alpha_beta = .TRUE. 1599! update P_ab, Gamma_P_ia 1600! First, P_ab 1601 IF (open_ss) THEN 1602 factor = 1.0_dp 1603 ELSE 1604 factor = 2.0_dp 1605 ENDIF 1606 ! divide the (ia|jb) integrals by Delta_ij^ab 1607 IF (.NOT. alpha_beta) THEN 1608 DO b = 1, my_B_size 1609 b_global = b + my_B_virtual_start - 1 1610 DO a = 1, virtual 1611 local_ab(a, b) = -local_ab(a, b)/ & 1612 (Eigenval(homo + a) + Eigenval(homo + b_global) - Eigenval(my_i + iiB - 1) - Eigenval(my_j + jjB - 1)) 1613 END DO 1614 END DO 1615 ! update diagonal part of P_ij 1616 P_ij_diag = -SUM(local_ab*t_ab)*factor 1617 ELSE 1618 DO b = 1, my_B_size_beta 1619 b_global = b + my_B_virtual_start_beta - 1 1620 DO a = 1, virtual 1621 local_ab(a, b) = -local_ab(a, b)/ & 1622 (Eigenval(homo + a) + Eigenval_beta(homo_beta + b_global) - Eigenval(my_i + iiB - 1) - Eigenval_beta(my_j + jjB - 1)) 1623 END DO 1624 END DO 1625 ! update diagonal part of P_ij 1626 P_ij_diag = -SUM(local_ab*local_ab) 1627 ! More integrals needed only for alpha-beta case: local_ba 1628 DO b = 1, my_B_size 1629 b_global = b + my_B_virtual_start - 1 1630 DO a = 1, virtual_beta 1631 local_ba(a, b) = -local_ba(a, b)/ & 1632 (Eigenval_beta(homo_beta + a) + Eigenval(homo + b_global) - Eigenval(my_i + iiB - 1) - Eigenval_beta(my_j + jjB - 1)) 1633 END DO 1634 END DO 1635 ENDIF 1636 1637 ! P_ab and add diagonal part of P_ij 1638 1639 ! Alpha_alpha or closed-shell case 1640 IF (((.NOT. open_ss) .AND. (.NOT. alpha_beta)) .OR. alpha_alpha) THEN 1641 CALL dgemm('T', 'N', my_B_size, my_B_size, virtual, 1.0_dp, & 1642 t_ab(:, :), virtual, local_ab(:, :), virtual, & 1643 1.0_dp, mp2_env%ri_grad%P_ab(1:my_B_size, my_B_virtual_start:my_B_virtual_end), my_B_size) 1644 mp2_env%ri_grad%P_ij(my_i + iiB - 1, my_i + iiB - 1) = & 1645 mp2_env%ri_grad%P_ij(my_i + iiB - 1, my_i + iiB - 1) + P_ij_diag 1646 ENDIF 1647 ! Beta_beta case 1648 IF (beta_beta) THEN 1649 CALL dgemm('T', 'N', my_B_size, my_B_size, virtual, 1.0_dp, & 1650 t_ab(:, :), virtual, local_ab(:, :), virtual, & 1651 1.0_dp, mp2_env%ri_grad%P_ab_beta(1:my_B_size, my_B_virtual_start:my_B_virtual_end), my_B_size) 1652 mp2_env%ri_grad%P_ij_beta(my_i + iiB - 1, my_i + iiB - 1) = & 1653 mp2_env%ri_grad%P_ij_beta(my_i + iiB - 1, my_i + iiB - 1) + P_ij_diag 1654 ENDIF 1655 ! Alpha_beta case 1656 IF (alpha_beta) THEN 1657 CALL dgemm('T', 'N', my_B_size, my_B_size, virtual_beta, 1.0_dp, & 1658 local_ba(:, :), virtual_beta, local_ba(:, :), virtual_beta, 1.0_dp, & 1659 mp2_env%ri_grad%P_ab(1:my_B_size, my_B_virtual_start: & 1660 my_B_virtual_end), my_B_size) 1661 mp2_env%ri_grad%P_ij(my_i + iiB - 1, my_i + iiB - 1) = & 1662 mp2_env%ri_grad%P_ij(my_i + iiB - 1, my_i + iiB - 1) + P_ij_diag 1663 CALL dgemm('T', 'N', my_B_size_beta, my_B_size_beta, virtual, 1.0_dp, & 1664 local_ab(:, :), virtual, local_ab(:, :), virtual, 1.0_dp, & 1665 mp2_env%ri_grad%P_ab_beta(1:my_B_size_beta, my_B_virtual_start_beta: & 1666 my_B_virtual_end_beta), my_B_size_beta) 1667 mp2_env%ri_grad%P_ij_beta(my_j + jjB - 1, my_j + jjB - 1) = & 1668 mp2_env%ri_grad%P_ij_beta(my_j + jjB - 1, my_j + jjB - 1) + P_ij_diag 1669 ENDIF 1670 ! The summation is over unique pairs. In alpha-beta case, all pairs are unique: subroutine is called for 1671 ! both i^alpha,j^beta and i^beta,j^alpha. Formally, my_i can be equal to my_j, but they are different 1672 ! due to spin in alpha-beta case. 1673 IF ((my_i /= my_j) .AND. (.NOT. alpha_beta)) THEN 1674 ! Alpha_alpha or closed-shell case 1675 IF (((.NOT. open_ss) .AND. (.NOT. alpha_beta)) .OR. alpha_alpha) THEN 1676 CALL dgemm('N', 'T', my_B_size, virtual, my_B_size, 1.0_dp, & 1677 t_ab(my_B_virtual_start:my_B_virtual_end, 1:my_B_size), my_B_size, & 1678 local_ab(:, :), virtual, & 1679 1.0_dp, mp2_env%ri_grad%P_ab(1:my_B_size, 1:virtual), my_B_size) 1680 mp2_env%ri_grad%P_ij(my_j + jjB - 1, my_j + jjB - 1) = & 1681 mp2_env%ri_grad%P_ij(my_j + jjB - 1, my_j + jjB - 1) + P_ij_diag 1682 ENDIF 1683 ! Beta_beta_case 1684 IF (beta_beta) THEN 1685 CALL dgemm('N', 'T', my_B_size, virtual, my_B_size, 1.0_dp, & 1686 t_ab(my_B_virtual_start:my_B_virtual_end, 1:my_B_size), my_B_size, & 1687 local_ab(:, :), virtual, & 1688 1.0_dp, mp2_env%ri_grad%P_ab_beta(1:my_B_size, 1:virtual), my_B_size) 1689 mp2_env%ri_grad%P_ij_beta(my_j + jjB - 1, my_j + jjB - 1) = & 1690 mp2_env%ri_grad%P_ij_beta(my_j + jjB - 1, my_j + jjB - 1) + P_ij_diag 1691 ENDIF 1692 END IF 1693 DO proc_shift = 1, para_env_sub%num_pe - 1 1694 proc_send = sub_proc_map(para_env_sub%mepos + proc_shift) 1695 proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift) 1696 ! Alpha-alpha, beta-beta, closed shell 1697 IF (.NOT. alpha_beta) THEN 1698 CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size) 1699 CALL get_group_dist(gd_B_virtual, proc_send, send_B_virtual_start, send_B_virtual_end, send_B_size) 1700 ELSE ! Alpha-beta case 1701 CALL get_group_dist(gd_B_virtual_beta, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size) 1702 CALL get_group_dist(gd_B_virtual_beta, proc_send, send_B_virtual_start, send_B_virtual_end, send_B_size) 1703 ENDIF 1704 1705 ALLOCATE (external_ab(virtual, rec_B_size)) 1706 external_ab = 0.0_dp 1707 1708 IF (.NOT. alpha_beta) THEN 1709 CALL mp_sendrecv(local_ab(1:virtual, 1:my_B_size), proc_send, & 1710 external_ab(1:virtual, 1:rec_B_size), proc_receive, & 1711 para_env_sub%group) 1712 ELSE 1713 CALL mp_sendrecv(local_ab(1:virtual, 1:my_B_size_beta), proc_send, & 1714 external_ab(1:virtual, 1:rec_B_size), proc_receive, & 1715 para_env_sub%group) 1716 ENDIF 1717 1718 ! Alpha-alpha or closed-shell case 1719 IF (((.NOT. open_ss) .AND. (.NOT. alpha_beta)) .OR. alpha_alpha) & 1720 CALL dgemm('T', 'N', my_B_size, rec_B_size, virtual, 1.0_dp, & 1721 t_ab(:, :), virtual, external_ab(:, :), virtual, & 1722 1.0_dp, mp2_env%ri_grad%P_ab(1:my_B_size, rec_B_virtual_start:rec_B_virtual_end), my_B_size) 1723 ! Beta-beta case 1724 IF (beta_beta) & 1725 CALL dgemm('T', 'N', my_B_size, rec_B_size, virtual, 1.0_dp, & 1726 t_ab(:, :), virtual, external_ab(:, :), virtual, & 1727 1.0_dp, mp2_env%ri_grad%P_ab_beta(1:my_B_size, rec_B_virtual_start:rec_B_virtual_end), my_B_size) 1728 ! Alpha-beta case 1729 IF (alpha_beta) THEN 1730 ! CALL dgemm('N','T',my_B_size,virtual,rec_B_size,1.0_dp,& 1731 ! local_ab(1:my_B_size,rec_B_virtual_start:rec_B_virtual_end),my_B_size,external_ab(:,:),rec_B_size,& 1732 ! 1.0_dp,mp2_env%ri_grad%P_ab(1:my_B_size,1:virtual),my_B_size) 1733 1734 ! Alpha-beta part of beta-beta density 1735 CALL dgemm('T', 'N', my_B_size_beta, rec_B_size, virtual, 1.0_dp, & 1736 local_ab(:, :), virtual, external_ab(:, :), virtual, & 1737 1.0_dp, mp2_env%ri_grad%P_ab_beta(1:my_B_size_beta, rec_B_virtual_start:rec_B_virtual_end), & 1738 my_B_size_beta) 1739 1740 ! For alpha-beta part of alpha-density we need a new parallel code 1741 DEALLOCATE (external_ab) 1742 ! And new external_ab (of a different size) 1743 CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size) 1744 CALL get_group_dist(gd_B_virtual, proc_send, send_B_virtual_start, send_B_virtual_end, send_B_size) 1745 ALLOCATE (external_ab(virtual_beta, rec_B_size)) 1746 external_ab = 0.0_dp 1747 CALL mp_sendrecv(local_ba(1:virtual_beta, 1:my_B_size), proc_send, & 1748 external_ab(1:virtual_beta, 1:rec_B_size), proc_receive, & 1749 para_env_sub%group) 1750 CALL dgemm('T', 'N', my_B_size, rec_B_size, virtual_beta, 1.0_dp, & 1751 local_ba(:, :), virtual_beta, external_ab(:, :), virtual_beta, & 1752 1.0_dp, mp2_env%ri_grad%P_ab(1:my_B_size, rec_B_virtual_start:rec_B_virtual_end), my_B_size) 1753 ENDIF 1754 1755 DEALLOCATE (external_ab) 1756 1757 IF ((my_i /= my_j) .AND. (.NOT. alpha_beta)) THEN 1758 ALLOCATE (external_ab(my_B_size, virtual)) 1759 external_ab = 0.0_dp 1760 1761 ALLOCATE (send_ab(send_B_size, virtual)) 1762 send_ab = 0.0_dp 1763 1764 CALL dgemm('N', 'T', send_B_size, virtual, my_B_size, 1.0_dp, & 1765 t_ab(send_B_virtual_start:send_B_virtual_end, 1:my_B_size), send_B_size, & 1766 local_ab(:, :), virtual, & 1767 0.0_dp, send_ab(1:send_B_size, 1:virtual), send_B_size) 1768 1769 CALL mp_sendrecv(send_ab, proc_send, & 1770 external_ab, proc_receive, & 1771 para_env_sub%group) 1772 1773 ! Alpha_alpha or closed-shell case 1774 IF (((.NOT. open_ss) .AND. (.NOT. alpha_beta)) .OR. alpha_alpha) & 1775 mp2_env%ri_grad%P_ab(:, :) = mp2_env%ri_grad%P_ab + external_ab 1776 ! Beta_beta case 1777 IF (beta_beta) & 1778 mp2_env%ri_grad%P_ab_beta(:, :) = mp2_env%ri_grad%P_ab_beta + external_ab 1779 1780 DEALLOCATE (external_ab) 1781 DEALLOCATE (send_ab) 1782 END IF 1783 1784 END DO 1785 CALL timestop(handle) 1786 1787 ! Now, Gamma_P_ia (made of Y_ia_P) 1788 1789 CALL timeset(routineN//"_Gamma", handle) 1790 IF (.NOT. alpha_beta) THEN 1791 ! Alpha-alpha, beta-beta and closed shell 1792 CALL dgemm('N', 'T', my_B_size, dimen_RI, my_B_size, 1.0_dp, & 1793 t_ab(my_B_virtual_start:my_B_virtual_end, 1:my_B_size), my_B_size, & 1794 local_j_aL(1:dimen_RI, 1:my_B_size, jjB), dimen_RI, & 1795 1.0_dp, Y_i_aP(1:my_B_size, 1:dimen_RI, iiB), my_B_size) 1796 ELSE ! Alpha-beta 1797 CALL dgemm('N', 'T', my_B_size, dimen_RI, my_B_size_beta, 1.0_dp, & 1798 local_ab(my_B_virtual_start:my_B_virtual_end, 1:my_B_size_beta), my_B_size, & 1799 local_j_aL(1:dimen_RI, 1:my_B_size_beta, jjB), dimen_RI, & 1800 1.0_dp, Y_i_aP(1:my_B_size, 1:dimen_RI, iiB), my_B_size) 1801 CALL dgemm('T', 'T', my_B_size_beta, dimen_RI, my_B_size, 1.0_dp, & 1802 local_ab(my_B_virtual_start:my_B_virtual_end, 1:my_B_size_beta), my_B_size, & 1803 local_i_aL(1:dimen_RI, 1:my_B_size, jjB), dimen_RI, & 1804 1.0_dp, Y_j_aP(1:my_B_size_beta, 1:dimen_RI, jjB), my_B_size_beta) 1805 1806 ENDIF 1807 1808 ALLOCATE (external_ab(my_B_size, dimen_RI)) 1809 external_ab = 0.0_dp 1810 ! 1811 DO proc_shift = 1, para_env_sub%num_pe - 1 1812 proc_send = sub_proc_map(para_env_sub%mepos + proc_shift) 1813 proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift) 1814 1815 CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size) 1816 1817 CALL get_group_dist(gd_B_virtual, proc_send, send_B_virtual_start, send_B_virtual_end, send_B_size) 1818 1819 IF (.NOT. alpha_beta) THEN 1820 ALLOCATE (send_ab(send_B_size, dimen_RI)) 1821 send_ab = 0.0_dp 1822 CALL dgemm('N', 'T', send_B_size, dimen_RI, my_B_size, 1.0_dp, & 1823 t_ab(send_B_virtual_start:send_B_virtual_end, 1:my_B_size), send_B_size, & 1824 local_j_aL(1:dimen_RI, 1:my_B_size, jjB), dimen_RI, & 1825 0.0_dp, send_ab(1:send_B_size, 1:dimen_RI), send_B_size) 1826 CALL mp_sendrecv(send_ab, proc_send, external_ab, proc_receive, para_env_sub%group) 1827 ! Alpha-alpha, beta-beta and closed shell 1828 1829 Y_i_aP(1:my_B_size, 1:dimen_RI, iiB) = & 1830 Y_i_aP(1:my_B_size, 1:dimen_RI, iiB) + external_ab 1831 1832 DEALLOCATE (send_ab) 1833 ELSE ! Alpha-beta case 1834 ! Alpha-alpha part 1835 ALLOCATE (send_ab(send_B_size, dimen_RI)) 1836 send_ab = 0.0_dp 1837 CALL dgemm('N', 'T', send_B_size, dimen_RI, my_B_size_beta, 1.0_dp, & 1838 local_ab(send_B_virtual_start:send_B_virtual_end, 1:my_B_size_beta), send_B_size, & 1839 local_j_aL(1:dimen_RI, 1:my_B_size_beta, jjB), dimen_RI, & 1840 0.0_dp, send_ab(1:send_B_size, 1:dimen_RI), send_B_size) 1841 CALL mp_sendrecv(send_ab, proc_send, external_ab, proc_receive, para_env_sub%group) 1842 Y_i_aP(1:my_B_size, 1:dimen_RI, iiB) = & 1843 Y_i_aP(1:my_B_size, 1:dimen_RI, iiB) + external_ab 1844 DEALLOCATE (send_ab) 1845 ENDIF 1846 END DO 1847 DEALLOCATE (external_ab) 1848 1849 IF (alpha_beta) THEN 1850 ! For beta-beta part (in alpha-beta case) we need a new parallel code 1851 ALLOCATE (external_ab(my_B_size_beta, dimen_RI)) 1852 external_ab = 0.0_dp 1853 DO proc_shift = 1, para_env_sub%num_pe - 1 1854 proc_send = sub_proc_map(para_env_sub%mepos + proc_shift) 1855 proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift) 1856 1857 CALL get_group_dist(gd_B_virtual_beta, proc_send, send_B_virtual_start, send_B_virtual_end, send_B_size) 1858 ALLOCATE (send_ab(send_B_size, dimen_RI)) 1859 send_ab = 0.0_dp 1860 1861 CALL dgemm('N', 'T', send_B_size, dimen_RI, my_B_size, 1.0_dp, & 1862 local_ba(send_B_virtual_start:send_B_virtual_end, 1:my_B_size), send_B_size, & 1863 local_i_aL(1:dimen_RI, 1:my_B_size, jjB), dimen_RI, & 1864 0.0_dp, send_ab(1:send_B_size, 1:dimen_RI), send_B_size) 1865 CALL mp_sendrecv(send_ab, proc_send, external_ab, proc_receive, para_env_sub%group) 1866 Y_j_aP(1:my_B_size_beta, 1:dimen_RI, jjB) = & 1867 Y_j_aP(1:my_B_size_beta, 1:dimen_RI, jjB) + external_ab 1868 DEALLOCATE (send_ab) 1869 1870 END DO 1871 DEALLOCATE (external_ab) 1872 ENDIF 1873 1874 IF ((my_i /= my_j) .AND. (.NOT. alpha_beta)) THEN 1875 ! Alpha-alpha, beta-beta and closed shell 1876 CALL dgemm('T', 'T', my_B_size, dimen_RI, my_B_size, 1.0_dp, & 1877 t_ab(my_B_virtual_start:my_B_virtual_end, 1:my_B_size), my_B_size, & 1878 local_i_aL(1:dimen_RI, 1:my_B_size, iiB), dimen_RI, & 1879 1.0_dp, Y_j_aP(1:my_B_size, 1:dimen_RI, jjB), my_B_size) 1880 1881 DO proc_shift = 1, para_env_sub%num_pe - 1 1882 proc_send = sub_proc_map(para_env_sub%mepos + proc_shift) 1883 proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift) 1884 1885 CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size) 1886 1887 ALLOCATE (external_ab(dimen_RI, rec_B_size)) 1888 external_ab = 0.0_dp 1889 1890 CALL mp_sendrecv(local_i_aL(1:dimen_RI, 1:my_B_size, iiB), proc_send, & 1891 external_ab, proc_receive, para_env_sub%group) 1892 1893 ! Alpha-alpha, beta-beta and closed shell 1894 CALL dgemm('T', 'T', my_B_size, dimen_RI, rec_B_size, 1.0_dp, & 1895 t_ab(rec_B_virtual_start:rec_B_virtual_end, 1:my_B_size), rec_B_size, & 1896 external_ab(1:dimen_RI, 1:rec_B_size), dimen_RI, & 1897 1.0_dp, Y_j_aP(1:my_B_size, 1:dimen_RI, jjB), my_B_size) 1898 1899 DEALLOCATE (external_ab) 1900 END DO 1901 END IF 1902 1903 CALL timestop(handle) 1904 1905 END SUBROUTINE mp2_update_P_gamma 1906 1907! ************************************************************************************************** 1908!> \brief ... 1909!> \param mp2_env ... 1910!> \param ij_index ... 1911!> \param my_B_size ... 1912!> \param my_block_size ... 1913!> \param my_group_L_size ... 1914!> \param my_i ... 1915!> \param my_ij_pairs ... 1916!> \param my_j ... 1917!> \param ngroup ... 1918!> \param num_integ_group ... 1919!> \param integ_group_pos2color_sub ... 1920!> \param num_ij_pairs ... 1921!> \param proc_map ... 1922!> \param ij_map ... 1923!> \param ranges_info_array ... 1924!> \param Y_i_aP ... 1925!> \param Y_j_aP ... 1926!> \param para_env_exchange ... 1927!> \param null_mat_rec ... 1928!> \param null_mat_send ... 1929!> \param sizes_array ... 1930!> \param alpha_alpha ... 1931!> \param beta_beta ... 1932!> \param alpha_beta ... 1933!> \param open_shell ... 1934!> \param my_b_size_beta ... 1935! ************************************************************************************************** 1936 SUBROUTINE mp2_redistribute_gamma(mp2_env, ij_index, my_B_size, & 1937 my_block_size, my_group_L_size, my_i, my_ij_pairs, my_j, ngroup, & 1938 num_integ_group, integ_group_pos2color_sub, num_ij_pairs, proc_map, & 1939 ij_map, ranges_info_array, Y_i_aP, Y_j_aP, para_env_exchange, & 1940 null_mat_rec, null_mat_send, sizes_array, alpha_alpha, beta_beta, & 1941 alpha_beta, open_shell, my_b_size_beta) 1942 1943 TYPE(mp2_type), POINTER :: mp2_env 1944 INTEGER, INTENT(IN) :: ij_index, my_B_size, my_block_size, & 1945 my_group_L_size, my_i, my_ij_pairs, & 1946 my_j, ngroup, num_integ_group 1947 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(IN) :: integ_group_pos2color_sub, num_ij_pairs, & 1948 proc_map 1949 INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN) :: ij_map 1950 INTEGER, ALLOCATABLE, DIMENSION(:, :, :), & 1951 INTENT(IN) :: ranges_info_array 1952 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), & 1953 INTENT(IN) :: Y_i_aP, Y_j_aP 1954 TYPE(cp_para_env_type), POINTER :: para_env_exchange 1955 REAL(KIND=dp), INTENT(OUT) :: null_mat_rec(:, :, :), & 1956 null_mat_send(:, :, :) 1957 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(IN) :: sizes_array 1958 LOGICAL, INTENT(IN) :: alpha_alpha, beta_beta, alpha_beta, & 1959 open_shell 1960 INTEGER, INTENT(IN), OPTIONAL :: my_B_size_beta 1961 1962 CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_redistribute_gamma', & 1963 routineP = moduleN//':'//routineN 1964 1965 INTEGER :: end_point, handle, handle2, iiB, ij_counter_rec, irep, jjb, kkk, Lend_pos, lll, & 1966 Lstart_pos, proc_receive, proc_send, proc_shift, rec_block_size, rec_i, rec_ij_index, & 1967 rec_j, send_L_size, start_point 1968 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: BI_C_rec, BI_C_rec_beta, BI_C_send, & 1969 BI_C_send_beta 1970 1971! In alpha-beta case Y_i_aP_beta is sent as Y_j_aP 1972 1973 CALL timeset(routineN//"_comm2", handle) 1974 IF (ij_index <= my_ij_pairs) THEN 1975 ! somethig to send 1976 ! start with myself 1977 CALL timeset(routineN//"_comm2_w", handle2) 1978 DO irep = 0, num_integ_group - 1 1979 Lstart_pos = ranges_info_array(1, irep, para_env_exchange%mepos) 1980 Lend_pos = ranges_info_array(2, irep, para_env_exchange%mepos) 1981 start_point = ranges_info_array(3, irep, para_env_exchange%mepos) 1982 end_point = ranges_info_array(4, irep, para_env_exchange%mepos) 1983 DO iiB = 1, my_block_size 1984!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(kkk,lll) & 1985!$OMP SHARED(start_point,end_point,Lstart_pos,Lend_pos,& 1986!$OMP mp2_env,my_i,iiB,my_B_size,Y_i_aP,& 1987!$OMP alpha_alpha,beta_beta,open_shell) 1988 DO kkk = start_point, end_point 1989 lll = kkk - start_point + Lstart_pos 1990 IF (alpha_alpha .OR. (.NOT. open_shell)) THEN 1991 mp2_env%ri_grad%Gamma_P_ia(my_i + iiB - 1, 1:my_B_size, kkk) = & 1992 mp2_env%ri_grad%Gamma_P_ia(my_i + iiB - 1, 1:my_B_size, kkk) + & 1993 Y_i_aP(1:my_B_size, lll, iiB) 1994 ENDIF 1995 IF (beta_beta) THEN 1996 mp2_env%ri_grad%Gamma_P_ia_beta(my_i + iiB - 1, 1:my_B_size, kkk) = & 1997 mp2_env%ri_grad%Gamma_P_ia_beta(my_i + iiB - 1, 1:my_B_size, kkk) + & 1998 Y_i_aP(1:my_B_size, lll, iiB) 1999 ENDIF 2000 END DO 2001!$OMP END PARALLEL DO 2002 END DO 2003 DO jjB = 1, my_block_size 2004 IF (.NOT. alpha_beta) THEN 2005!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(kkk,lll) & 2006!$OMP SHARED(start_point,end_point,Lstart_pos,Lend_pos,& 2007!$OMP mp2_env,my_j,jjB,my_B_size,Y_j_aP,& 2008!$OMP alpha_alpha,beta_beta,open_shell) 2009 DO kkk = start_point, end_point 2010 lll = kkk - start_point + Lstart_pos 2011 IF (alpha_alpha .OR. (.NOT. open_shell)) THEN 2012 mp2_env%ri_grad%Gamma_P_ia(my_j + jjB - 1, 1:my_B_size, kkk) = & 2013 mp2_env%ri_grad%Gamma_P_ia(my_j + jjB - 1, 1:my_B_size, kkk) + & 2014 Y_j_aP(1:my_B_size, lll, jjB) 2015 ENDIF 2016 IF (beta_beta) THEN 2017 mp2_env%ri_grad%Gamma_P_ia_beta(my_j + jjB - 1, 1:my_B_size, kkk) = & 2018 mp2_env%ri_grad%Gamma_P_ia_beta(my_j + jjB - 1, 1:my_B_size, kkk) + & 2019 Y_j_aP(1:my_B_size, lll, jjB) 2020 ENDIF 2021 END DO 2022!$OMP END PARALLEL DO 2023 ELSE 2024!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(kkk,lll) & 2025!$OMP SHARED(start_point,end_point,Lstart_pos,Lend_pos,& 2026!$OMP mp2_env,my_j,jjB,my_B_size_beta,Y_j_aP) 2027 DO kkk = start_point, end_point 2028 lll = kkk - start_point + Lstart_pos 2029 mp2_env%ri_grad%Gamma_P_ia_beta(my_j + jjB - 1, 1:my_B_size_beta, kkk) = & 2030 mp2_env%ri_grad%Gamma_P_ia_beta(my_j + jjB - 1, 1:my_B_size_beta, kkk) + & 2031 Y_j_aP(1:my_B_size_beta, lll, jjB) 2032 ENDDO 2033!$OMP END PARALLEL DO 2034 ENDIF 2035 END DO 2036 END DO 2037 CALL timestop(handle2) 2038 2039 ! Y_i_aP(my_B_size,dimen_RI,block_size) 2040 2041 DO proc_shift = 1, para_env_exchange%num_pe - 1 2042 proc_send = proc_map(para_env_exchange%mepos + proc_shift) 2043 proc_receive = proc_map(para_env_exchange%mepos - proc_shift) 2044 2045 send_L_size = sizes_array(proc_send) 2046 IF (.NOT. alpha_beta) THEN 2047 ALLOCATE (BI_C_send(2*my_block_size, my_B_size, send_L_size)) 2048 ELSE 2049 ALLOCATE (BI_C_send(my_block_size, my_B_size, send_L_size)) 2050 ALLOCATE (BI_C_send_beta(my_block_size, my_B_size_beta, send_L_size)) 2051 ENDIF 2052 CALL timeset(routineN//"_comm2_w", handle2) 2053 BI_C_send = 0.0_dp 2054 IF (alpha_beta) BI_C_send_beta = 0.0_dp 2055 DO irep = 0, num_integ_group - 1 2056 Lstart_pos = ranges_info_array(1, irep, proc_send) 2057 Lend_pos = ranges_info_array(2, irep, proc_send) 2058 start_point = ranges_info_array(3, irep, proc_send) 2059 end_point = ranges_info_array(4, irep, proc_send) 2060 DO iiB = 1, my_block_size 2061!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(kkk,lll) & 2062!$OMP SHARED(start_point,end_point,Lstart_pos,Lend_pos,& 2063!$OMP BI_C_send,iiB,my_B_size,Y_i_aP) 2064 DO kkk = start_point, end_point 2065 lll = kkk - start_point + Lstart_pos 2066 BI_C_send(iiB, 1:my_B_size, kkk) = Y_i_aP(1:my_B_size, lll, iiB) 2067 END DO 2068!$OMP END PARALLEL DO 2069 END DO 2070 DO jjB = 1, my_block_size 2071 IF (.NOT. alpha_beta) THEN 2072!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(kkk,lll) & 2073!$OMP SHARED(start_point,end_point,Lstart_pos,Lend_pos,my_block_size,& 2074!$OMP BI_C_send,jjB,my_B_size,Y_j_aP) 2075 DO kkk = start_point, end_point 2076 lll = kkk - start_point + Lstart_pos 2077 BI_C_send(jjB + my_block_size, 1:my_B_size, kkk) = Y_j_aP(1:my_B_size, lll, jjB) 2078 END DO 2079!$OMP END PARALLEL DO 2080 ELSE 2081!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(kkk,lll) & 2082!$OMP SHARED(start_point,end_point,Lstart_pos,Lend_pos,& 2083!$OMP BI_C_send_beta,jjB,my_B_size_beta,Y_j_aP) 2084 DO kkk = start_point, end_point 2085 lll = kkk - start_point + Lstart_pos 2086 BI_C_send_beta(jjB, 1:my_B_size_beta, kkk) = Y_j_aP(1:my_B_size_beta, lll, jjB) 2087 END DO 2088!$OMP END PARALLEL DO 2089 ENDIF 2090 END DO 2091 END DO 2092 CALL timestop(handle2) 2093 2094 rec_ij_index = num_ij_pairs(proc_receive) 2095 2096 IF (ij_index <= rec_ij_index) THEN 2097 ! we know that proc_receive has something to send for us, let's see what 2098 ij_counter_rec = & 2099 (ij_index - MIN(1, integ_group_pos2color_sub(proc_receive)))*ngroup + integ_group_pos2color_sub(proc_receive) 2100 2101 rec_i = ij_map(ij_counter_rec, 1) 2102 rec_j = ij_map(ij_counter_rec, 2) 2103 rec_block_size = ij_map(ij_counter_rec, 3) 2104 2105 IF (.NOT. alpha_beta) THEN 2106 ALLOCATE (BI_C_rec(2*rec_block_size, my_B_size, my_group_L_size)) 2107 ELSE 2108 ALLOCATE (BI_C_rec(rec_block_size, my_B_size, my_group_L_size)) 2109 ALLOCATE (BI_C_rec_beta(rec_block_size, my_B_size_beta, my_group_L_size)) 2110 ENDIF 2111 2112 BI_C_rec = 0.0_dp 2113 IF (alpha_beta) BI_C_rec_beta = 0.0_dp 2114 2115 CALL mp_sendrecv(BI_C_send, proc_send, & 2116 BI_C_rec, proc_receive, & 2117 para_env_exchange%group) 2118 IF (alpha_beta) THEN 2119 CALL mp_sendrecv(BI_C_send_beta, proc_send, & 2120 BI_C_rec_beta, proc_receive, & 2121 para_env_exchange%group) 2122 ENDIF 2123 2124 CALL timeset(routineN//"_comm2_w", handle2) 2125 DO irep = 0, num_integ_group - 1 2126 Lstart_pos = ranges_info_array(1, irep, para_env_exchange%mepos) 2127 Lend_pos = ranges_info_array(2, irep, para_env_exchange%mepos) 2128 start_point = ranges_info_array(3, irep, para_env_exchange%mepos) 2129 end_point = ranges_info_array(4, irep, para_env_exchange%mepos) 2130 DO iiB = 1, rec_block_size 2131!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(kkk) & 2132!$OMP SHARED(start_point,end_point,& 2133!$OMP mp2_env,rec_i,iiB,my_B_size,BI_C_rec,& 2134!$OMP alpha_alpha,beta_beta,open_shell) 2135 DO kkk = start_point, end_point 2136 IF (alpha_alpha .OR. (.NOT. open_shell)) THEN 2137 mp2_env%ri_grad%Gamma_P_ia(rec_i + iiB - 1, 1:my_B_size, kkk) = & 2138 mp2_env%ri_grad%Gamma_P_ia(rec_i + iiB - 1, 1:my_B_size, kkk) + & 2139 BI_C_rec(iiB, 1:my_B_size, kkk) 2140 ENDIF 2141 IF (beta_beta) THEN 2142 mp2_env%ri_grad%Gamma_P_ia_beta(rec_i + iiB - 1, 1:my_B_size, kkk) = & 2143 mp2_env%ri_grad%Gamma_P_ia_beta(rec_i + iiB - 1, 1:my_B_size, kkk) + & 2144 BI_C_rec(iiB, 1:my_B_size, kkk) 2145 ENDIF 2146 END DO 2147!$OMP END PARALLEL DO 2148 END DO 2149 DO jjB = 1, rec_block_size 2150 IF (.NOT. alpha_beta) THEN 2151!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(kkk) & 2152!$OMP SHARED(start_point,end_point,rec_block_size,& 2153!$OMP mp2_env,rec_j,jjB,my_B_size,BI_C_rec,& 2154!$OMP alpha_alpha,beta_beta,open_shell) 2155 DO kkk = start_point, end_point 2156 IF (alpha_alpha .OR. (.NOT. open_shell)) THEN 2157 mp2_env%ri_grad%Gamma_P_ia(rec_j + jjB - 1, 1:my_B_size, kkk) = & 2158 mp2_env%ri_grad%Gamma_P_ia(rec_j + jjB - 1, 1:my_B_size, kkk) + & 2159 BI_C_rec(jjB + rec_block_size, 1:my_B_size, kkk) 2160 ENDIF 2161 IF (beta_beta) THEN 2162 mp2_env%ri_grad%Gamma_P_ia_beta(rec_j + jjB - 1, 1:my_B_size, kkk) = & 2163 mp2_env%ri_grad%Gamma_P_ia_beta(rec_j + jjB - 1, 1:my_B_size, kkk) + & 2164 BI_C_rec(jjB + rec_block_size, 1:my_B_size, kkk) 2165 ENDIF 2166 END DO 2167!$OMP END PARALLEL DO 2168 ELSE 2169!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(kkk) & 2170!$OMP SHARED(start_point,end_point,& 2171!$OMP mp2_env,rec_j,jjB,my_B_size_beta,BI_C_rec_beta) 2172 DO kkk = start_point, end_point 2173 mp2_env%ri_grad%Gamma_P_ia_beta(rec_j + jjB - 1, 1:my_B_size_beta, kkk) = & 2174 mp2_env%ri_grad%Gamma_P_ia_beta(rec_j + jjB - 1, 1:my_B_size_beta, kkk) + & 2175 BI_C_rec_beta(jjB, 1:my_B_size_beta, kkk) 2176 END DO 2177!$OMP END PARALLEL DO 2178 ENDIF 2179 END DO 2180 END DO 2181 CALL timestop(handle2) 2182 2183 DEALLOCATE (BI_C_rec) 2184 IF (alpha_beta) DEALLOCATE (BI_C_rec_beta) 2185 2186 ELSE 2187 ! we have something to send but nothing to receive 2188 2189 CALL mp_sendrecv(BI_C_send, proc_send, & 2190 null_mat_rec, proc_receive, & 2191 para_env_exchange%group) 2192 IF (alpha_beta) THEN 2193 CALL mp_sendrecv(BI_C_send_beta, proc_send, & 2194 null_mat_rec, proc_receive, & 2195 para_env_exchange%group) 2196 ENDIF 2197 2198 END IF 2199 2200 DEALLOCATE (BI_C_send) 2201 IF (alpha_beta) DEALLOCATE (BI_C_send_beta) 2202 END DO 2203 2204 ELSE 2205 ! noting to send check if we have to receive 2206 DO proc_shift = 1, para_env_exchange%num_pe - 1 2207 proc_send = proc_map(para_env_exchange%mepos + proc_shift) 2208 proc_receive = proc_map(para_env_exchange%mepos - proc_shift) 2209 rec_ij_index = num_ij_pairs(proc_receive) 2210 2211 IF (ij_index <= rec_ij_index) THEN 2212 ! we know that proc_receive has something to send for us, let's see what 2213 ij_counter_rec = & 2214 (ij_index - MIN(1, integ_group_pos2color_sub(proc_receive)))*ngroup + integ_group_pos2color_sub(proc_receive) 2215 2216 rec_i = ij_map(ij_counter_rec, 1) 2217 rec_j = ij_map(ij_counter_rec, 2) 2218 rec_block_size = ij_map(ij_counter_rec, 3) 2219 2220 IF (.NOT. alpha_beta) THEN 2221 ALLOCATE (BI_C_rec(2*rec_block_size, my_B_size, my_group_L_size)) 2222 ELSE 2223 ALLOCATE (BI_C_rec(rec_block_size, my_B_size, my_group_L_size)) 2224 ALLOCATE (BI_C_rec_beta(rec_block_size, my_B_size_beta, my_group_L_size)) 2225 ENDIF 2226 2227 BI_C_rec = 0.0_dp 2228 IF (alpha_beta) BI_C_rec_beta = 0.0_dp 2229 2230 CALL mp_sendrecv(null_mat_send, proc_send, & 2231 BI_C_rec, proc_receive, & 2232 para_env_exchange%group) 2233 IF (alpha_beta) THEN 2234 CALL mp_sendrecv(null_mat_send, proc_send, & 2235 BI_C_rec_beta, proc_receive, & 2236 para_env_exchange%group) 2237 ENDIF 2238 2239 CALL timeset(routineN//"_comm2_w", handle2) 2240 DO irep = 0, num_integ_group - 1 2241 Lstart_pos = ranges_info_array(1, irep, para_env_exchange%mepos) 2242 Lend_pos = ranges_info_array(2, irep, para_env_exchange%mepos) 2243 start_point = ranges_info_array(3, irep, para_env_exchange%mepos) 2244 end_point = ranges_info_array(4, irep, para_env_exchange%mepos) 2245 DO iiB = 1, rec_block_size 2246!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(kkk) & 2247!$OMP SHARED(start_point,end_point,& 2248!$OMP mp2_env,rec_i,iiB,my_B_size,BI_C_rec,& 2249!$OMP alpha_alpha,beta_beta,open_shell) 2250 DO kkk = start_point, end_point 2251 IF (alpha_alpha .OR. (.NOT. open_shell)) THEN 2252 mp2_env%ri_grad%Gamma_P_ia(rec_i + iiB - 1, 1:my_B_size, kkk) = & 2253 mp2_env%ri_grad%Gamma_P_ia(rec_i + iiB - 1, 1:my_B_size, kkk) + & 2254 BI_C_rec(iiB, 1:my_B_size, kkk) 2255 ENDIF 2256 IF (beta_beta) THEN 2257 mp2_env%ri_grad%Gamma_P_ia_beta(rec_i + iiB - 1, 1:my_B_size, kkk) = & 2258 mp2_env%ri_grad%Gamma_P_ia_beta(rec_i + iiB - 1, 1:my_B_size, kkk) + & 2259 BI_C_rec(iiB, 1:my_B_size, kkk) 2260 ENDIF 2261 END DO 2262!$OMP END PARALLEL DO 2263 END DO 2264 DO jjB = 1, rec_block_size 2265 IF (.NOT. alpha_beta) THEN 2266!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(kkk) & 2267!$OMP SHARED(start_point,end_point,rec_block_size,& 2268!$OMP mp2_env,rec_j,jjB,my_B_size,BI_C_rec,& 2269!$OMP alpha_alpha,beta_beta,open_shell) 2270 DO kkk = start_point, end_point 2271 IF (alpha_alpha .OR. (.NOT. open_shell)) THEN 2272 mp2_env%ri_grad%Gamma_P_ia(rec_j + jjB - 1, 1:my_B_size, kkk) = & 2273 mp2_env%ri_grad%Gamma_P_ia(rec_j + jjB - 1, 1:my_B_size, kkk) + & 2274 BI_C_rec(jjB + rec_block_size, 1:my_B_size, kkk) 2275 ENDIF 2276 IF (beta_beta) THEN 2277 mp2_env%ri_grad%Gamma_P_ia_beta(rec_j + jjB - 1, 1:my_B_size, kkk) = & 2278 mp2_env%ri_grad%Gamma_P_ia_beta(rec_j + jjB - 1, 1:my_B_size, kkk) + & 2279 BI_C_rec(jjB + rec_block_size, 1:my_B_size, kkk) 2280 ENDIF 2281 END DO 2282!$OMP END PARALLEL DO 2283 ELSE 2284!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(kkk) & 2285!$OMP SHARED(start_point,end_point,& 2286!$OMP mp2_env,rec_j,jjB,my_B_size_beta,BI_C_rec_beta) 2287 DO kkk = start_point, end_point 2288 mp2_env%ri_grad%Gamma_P_ia_beta(rec_j + jjB - 1, 1:my_B_size_beta, kkk) = & 2289 mp2_env%ri_grad%Gamma_P_ia_beta(rec_j + jjB - 1, 1:my_B_size_beta, kkk) + & 2290 BI_C_rec_beta(jjB, 1:my_B_size_beta, kkk) 2291 END DO 2292!$OMP END PARALLEL DO 2293 ENDIF 2294 END DO 2295 END DO 2296 CALL timestop(handle2) 2297 2298 DEALLOCATE (BI_C_rec) 2299 IF (alpha_beta) DEALLOCATE (BI_C_rec_beta) 2300 2301 ELSE 2302 ! nothing to send nothing to receive 2303 CALL mp_sendrecv(null_mat_send, proc_send, & 2304 null_mat_rec, proc_receive, & 2305 para_env_exchange%group) 2306 IF (alpha_beta) THEN 2307 CALL mp_sendrecv(null_mat_send, proc_send, & 2308 null_mat_rec, proc_receive, & 2309 para_env_exchange%group) 2310 ENDIF 2311 2312 END IF 2313 END DO 2314 2315 END IF 2316 CALL timestop(handle) 2317 2318 END SUBROUTINE mp2_redistribute_gamma 2319 2320! ************************************************************************************************** 2321!> \brief ... 2322!> \param mp2_env ... 2323!> \param Eigenval ... 2324!> \param homo ... 2325!> \param virtual ... 2326!> \param open_shell ... 2327!> \param beta_beta ... 2328!> \param alpha_beta ... 2329!> \param Bib_C ... 2330!> \param unit_nr ... 2331!> \param dimen_RI ... 2332!> \param my_B_size ... 2333!> \param ngroup ... 2334!> \param num_integ_group ... 2335!> \param my_group_L_size ... 2336!> \param color_sub ... 2337!> \param ranges_info_array ... 2338!> \param para_env_exchange ... 2339!> \param para_env_sub ... 2340!> \param proc_map ... 2341!> \param my_B_virtual_start ... 2342!> \param my_B_virtual_end ... 2343!> \param sizes_array ... 2344!> \param gd_B_virtual ... 2345!> \param sub_proc_map ... 2346!> \param integ_group_pos2color_sub ... 2347!> \param local_ab ... 2348!> \param BIb_C_beta ... 2349!> \param my_B_size_beta ... 2350!> \param gd_B_virtual_beta ... 2351!> \param my_B_virtual_start_beta ... 2352!> \param virtual_beta ... 2353!> \param homo_beta ... 2354!> \param Eigenval_beta ... 2355!> \param my_B_virtual_end_beta ... 2356! ************************************************************************************************** 2357 SUBROUTINE quasi_degenerate_P_ij(mp2_env, Eigenval, homo, virtual, open_shell, & 2358 beta_beta, alpha_beta, Bib_C, unit_nr, dimen_RI, & 2359 my_B_size, ngroup, num_integ_group, my_group_L_size, & 2360 color_sub, ranges_info_array, para_env_exchange, para_env_sub, proc_map, & 2361 my_B_virtual_start, my_B_virtual_end, sizes_array, gd_B_virtual, & 2362 sub_proc_map, integ_group_pos2color_sub, & 2363 local_ab, BIb_C_beta, my_B_size_beta, & 2364 gd_B_virtual_beta, my_B_virtual_start_beta, & 2365 virtual_beta, homo_beta, Eigenval_beta, my_B_virtual_end_beta) 2366 TYPE(mp2_type), POINTER :: mp2_env 2367 REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: Eigenval 2368 INTEGER, INTENT(IN) :: homo, virtual 2369 LOGICAL, INTENT(IN) :: open_shell, beta_beta, alpha_beta 2370 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), & 2371 INTENT(IN) :: BIb_C 2372 INTEGER, INTENT(IN) :: unit_nr, dimen_RI, my_B_size, ngroup, & 2373 num_integ_group, my_group_L_size, & 2374 color_sub 2375 INTEGER, ALLOCATABLE, DIMENSION(:, :, :), & 2376 INTENT(IN) :: ranges_info_array 2377 TYPE(cp_para_env_type), POINTER :: para_env_exchange, para_env_sub 2378 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(IN) :: proc_map 2379 INTEGER, INTENT(IN) :: my_B_virtual_start, my_B_virtual_end 2380 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(IN) :: sizes_array 2381 TYPE(group_dist_d1_type), INTENT(IN) :: gd_B_virtual 2382 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(IN) :: sub_proc_map, integ_group_pos2color_sub 2383 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), & 2384 INTENT(INOUT) :: local_ab 2385 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), & 2386 INTENT(IN), OPTIONAL :: BIb_C_beta 2387 INTEGER, INTENT(IN), OPTIONAL :: my_B_size_beta 2388 TYPE(group_dist_d1_type), INTENT(IN), OPTIONAL :: gd_B_virtual_beta 2389 INTEGER, INTENT(IN), OPTIONAL :: my_B_virtual_start_beta, virtual_beta, & 2390 homo_beta 2391 REAL(KIND=dp), DIMENSION(:), INTENT(IN), OPTIONAL :: Eigenval_beta 2392 INTEGER, INTENT(IN), OPTIONAL :: my_B_virtual_end_beta 2393 2394 CHARACTER(LEN=*), PARAMETER :: routineN = 'quasi_degenerate_P_ij', & 2395 routineP = moduleN//':'//routineN 2396 2397 INTEGER :: a, a_global, b, b_global, end_point, handle, handle2, ijk_counter, & 2398 ijk_counter_send, ijk_index, iloops, irep, Lend_pos, Lstart_pos, max_ijk, max_ijk_beta, & 2399 max_ijk_loop, my_i, my_ijk, my_ijk_beta, my_j, my_k, my_virtual, nloops, proc_receive, & 2400 proc_send, proc_shift, rec_B_size, rec_B_virtual_end, rec_B_virtual_start, rec_L_size, & 2401 send_B_size, send_B_virtual_end, send_B_virtual_start, send_i, send_ijk_index, send_j, & 2402 send_k, size_B_i, size_B_j, size_B_k, start_point 2403 INTEGER, ALLOCATABLE, DIMENSION(:) :: num_ijk, num_ijk_beta 2404 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ijk_map, ijk_map_beta 2405 REAL(KIND=dp) :: amp_fac, null_mat_rec(2, 2, 2), & 2406 null_mat_send(2, 2, 2), P_ij_elem 2407 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: external_ab, external_i_aL, t_ab 2408 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: BI_C_rec, local_i_aL, local_j_aL, & 2409 local_k_aL 2410 2411! 2412 2413 CALL timeset(routineN//"_ij_sing", handle) 2414! Define the number of loops over orbital triplets 2415 2416 nloops = 1 2417 IF (alpha_beta) nloops = 2 2418 2419 ! Find the number of quasi-degenerate orbitals and orbital triplets 2420 2421 IF (.NOT. alpha_beta) THEN 2422 CALL Find_quasi_degenerate_ij(my_ijk, homo, Eigenval, mp2_env, ijk_map, unit_nr, ngroup, & 2423 beta_beta, alpha_beta, para_env_exchange, num_ijk, max_ijk, color_sub) 2424 ELSE 2425 CALL Find_quasi_degenerate_ij(my_ijk, homo, Eigenval, mp2_env, ijk_map, unit_nr, ngroup, & 2426 beta_beta, alpha_beta, para_env_exchange, num_ijk, max_ijk, color_sub, & 2427 Eigenval_beta, homo_beta, ijk_map_beta, num_ijk_beta, max_ijk_beta, my_ijk_beta) 2428 ENDIF 2429 2430 ! Set amplitude factor 2431 amp_fac = 2.0_dp 2432 IF (open_shell) amp_fac = 1.0_dp 2433 2434 ! Loop(s) over orbital triplets 2435 DO iloops = 1, nloops 2436 IF (iloops .EQ. 1) THEN 2437 size_B_i = my_B_size 2438 size_B_j = my_B_size 2439 max_ijk_loop = max_ijk 2440 my_virtual = virtual 2441 IF (alpha_beta) THEN 2442 size_B_k = my_B_size_beta 2443 ELSE 2444 size_B_k = my_B_size 2445 ENDIF 2446 ELSE 2447 size_B_i = my_B_size_beta 2448 size_B_j = my_B_size_beta 2449 size_B_k = my_B_size 2450 my_virtual = virtual_beta 2451 max_ijk_loop = max_ijk_beta 2452 ENDIF 2453 2454 ALLOCATE (local_i_aL(dimen_RI, size_B_i, 1)) 2455 ALLOCATE (local_j_aL(dimen_RI, size_B_j, 1)) 2456 ALLOCATE (local_k_aL(dimen_RI, size_B_k, 1)) 2457 ALLOCATE (t_ab(my_virtual, size_B_k)) 2458 2459 DO ijk_index = 1, max_ijk_loop 2460 IF (iloops .EQ. 2) my_ijk = my_ijk_beta 2461 IF (ijk_index <= my_ijk) THEN 2462 ! work to be done 2463 ijk_counter = (ijk_index - MIN(1, color_sub))*ngroup + color_sub 2464 IF (iloops .EQ. 1) THEN 2465 my_i = ijk_map(ijk_counter, 1) 2466 my_j = ijk_map(ijk_counter, 2) 2467 my_k = ijk_map(ijk_counter, 3) 2468 ELSE 2469 my_i = ijk_map_beta(ijk_counter, 1) 2470 my_j = ijk_map_beta(ijk_counter, 2) 2471 my_k = ijk_map_beta(ijk_counter, 3) 2472 ENDIF 2473 2474 local_i_aL = 0.0_dp 2475 local_j_al = 0.0_dp 2476 local_k_al = 0.0_dp 2477 DO irep = 0, num_integ_group - 1 2478 Lstart_pos = ranges_info_array(1, irep, para_env_exchange%mepos) 2479 Lend_pos = ranges_info_array(2, irep, para_env_exchange%mepos) 2480 start_point = ranges_info_array(3, irep, para_env_exchange%mepos) 2481 end_point = ranges_info_array(4, irep, para_env_exchange%mepos) 2482 2483 IF (.NOT. alpha_beta) THEN 2484 local_i_aL(Lstart_pos:Lend_pos, 1:size_B_i, 1) = BIb_C(start_point:end_point, 1:size_B_i, my_i) 2485 local_j_aL(Lstart_pos:Lend_pos, 1:size_B_j, 1) = BIb_C(start_point:end_point, 1:size_B_j, my_j) 2486 local_k_aL(Lstart_pos:Lend_pos, 1:size_B_k, 1) = BIb_C(start_point:end_point, 1:size_B_k, my_k) 2487 ELSE 2488 IF (iloops .EQ. 1) THEN ! For alpha-alpha density 2489 local_i_aL(Lstart_pos:Lend_pos, 1:size_B_i, 1) = BIb_C(start_point:end_point, 1:size_B_i, my_i) 2490 local_j_aL(Lstart_pos:Lend_pos, 1:size_B_j, 1) = BIb_C(start_point:end_point, 1:size_B_j, my_j) 2491 local_k_aL(Lstart_pos:Lend_pos, 1:size_B_k, 1) = BIb_C_beta(start_point:end_point, 1:size_B_k, my_k) 2492 ELSE ! For beta-beta density 2493 local_i_aL(Lstart_pos:Lend_pos, 1:size_B_i, 1) = BIb_C_beta(start_point:end_point, 1:size_B_i, my_i) 2494 local_j_aL(Lstart_pos:Lend_pos, 1:size_B_j, 1) = BIb_C_beta(start_point:end_point, 1:size_B_j, my_j) 2495 local_k_aL(Lstart_pos:Lend_pos, 1:size_B_k, 1) = BIb_C(start_point:end_point, 1:size_B_k, my_k) 2496 ENDIF 2497 ENDIF 2498 END DO 2499 2500 DO proc_shift = 1, para_env_exchange%num_pe - 1 2501 proc_send = proc_map(para_env_exchange%mepos + proc_shift) 2502 proc_receive = proc_map(para_env_exchange%mepos - proc_shift) 2503 2504 send_ijk_index = num_ijk(proc_send) 2505 IF (iloops .EQ. 2) send_ijk_index = num_ijk_beta(proc_send) 2506 2507 rec_L_size = sizes_array(proc_receive) 2508 ALLOCATE (BI_C_rec(rec_L_size, size_B_i, 1)) 2509 2510 IF (ijk_index <= send_ijk_index) THEN 2511 ! something to send 2512 ijk_counter_send = (ijk_index - MIN(1, integ_group_pos2color_sub(proc_send)))* & 2513 ngroup + integ_group_pos2color_sub(proc_send) 2514 IF (iloops .EQ. 1) THEN 2515 send_i = ijk_map(ijk_counter_send, 1) 2516 send_j = ijk_map(ijk_counter_send, 2) 2517 send_k = ijk_map(ijk_counter_send, 3) 2518 ELSE 2519 send_i = ijk_map_beta(ijk_counter_send, 1) 2520 send_j = ijk_map_beta(ijk_counter_send, 2) 2521 send_k = ijk_map_beta(ijk_counter_send, 3) 2522 ENDIF 2523 END IF 2524 2525 ! occupied i 2526 BI_C_rec = 0.0_dp 2527 IF (ijk_index <= send_ijk_index) THEN 2528 IF (iloops .EQ. 1) THEN 2529 CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:size_B_i, send_i), proc_send, & 2530 BI_C_rec(1:rec_L_size, 1:size_B_i, 1), proc_receive, & 2531 para_env_exchange%group) 2532 ELSE 2533 CALL mp_sendrecv(BIb_C_beta(1:my_group_L_size, 1:size_B_i, send_i), proc_send, & 2534 BI_C_rec(1:rec_L_size, 1:size_B_i, 1), proc_receive, & 2535 para_env_exchange%group) 2536 ENDIF 2537 ELSE 2538 ! nothing to send 2539 CALL mp_sendrecv(null_mat_send, proc_send, & 2540 BI_C_rec(1:rec_L_size, 1:size_B_i, 1:1), proc_receive, & 2541 para_env_exchange%group) 2542 END IF 2543 DO irep = 0, num_integ_group - 1 2544 Lstart_pos = ranges_info_array(1, irep, proc_receive) 2545 Lend_pos = ranges_info_array(2, irep, proc_receive) 2546 start_point = ranges_info_array(3, irep, proc_receive) 2547 end_point = ranges_info_array(4, irep, proc_receive) 2548 2549 local_i_aL(Lstart_pos:Lend_pos, 1:size_B_i, 1) = BI_C_rec(start_point:end_point, 1:size_B_i, 1) 2550 END DO 2551 2552 ! occupied j 2553 BI_C_rec = 0.0_dp 2554 IF (ijk_index <= send_ijk_index) THEN 2555 IF (iloops .EQ. 1) THEN 2556 CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:size_B_j, send_j), proc_send, & 2557 BI_C_rec(1:rec_L_size, 1:size_B_j, 1), proc_receive, & 2558 para_env_exchange%group) 2559 ELSE ! For beta_beta density, the size is different now 2560 CALL mp_sendrecv(BIb_C_beta(1:my_group_L_size, 1:size_B_j, send_j), proc_send, & 2561 BI_C_rec(1:rec_L_size, 1:size_B_j, 1), proc_receive, & 2562 para_env_exchange%group) 2563 ENDIF 2564 ELSE 2565 ! nothing to send 2566 CALL mp_sendrecv(null_mat_send, proc_send, & 2567 BI_C_rec(1:rec_L_size, 1:size_B_j, 1:1), proc_receive, & 2568 para_env_exchange%group) 2569 END IF 2570 DO irep = 0, num_integ_group - 1 2571 Lstart_pos = ranges_info_array(1, irep, proc_receive) 2572 Lend_pos = ranges_info_array(2, irep, proc_receive) 2573 start_point = ranges_info_array(3, irep, proc_receive) 2574 end_point = ranges_info_array(4, irep, proc_receive) 2575 2576 local_j_aL(Lstart_pos:Lend_pos, 1:size_B_j, 1) = BI_C_rec(start_point:end_point, 1:size_B_j, 1) 2577 END DO 2578 2579 ! occupied k 2580 BI_C_rec = 0.0_dp 2581 DEALLOCATE (BI_C_rec) 2582 ALLOCATE (BI_C_rec(rec_L_size, size_B_k, 1)) 2583 BI_C_rec = 0.0_dp 2584 IF (ijk_index <= send_ijk_index) THEN 2585 IF (iloops .EQ. 1) THEN 2586 IF (.NOT. alpha_beta) THEN 2587 CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:size_B_k, send_k), proc_send, & 2588 BI_C_rec(1:rec_L_size, 1:size_B_k, 1), proc_receive, & 2589 para_env_exchange%group) 2590 ELSE 2591 CALL mp_sendrecv(BIb_C_beta(1:my_group_L_size, 1:size_B_k, send_k), proc_send, & 2592 BI_C_rec(1:rec_L_size, 1:size_B_k, 1), proc_receive, & 2593 para_env_exchange%group) 2594 ENDIF 2595 ELSE ! For beta_beta density, the size is different now 2596 BI_C_rec = 0.0_dp 2597 CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:size_B_k, send_k), proc_send, & 2598 BI_C_rec(1:rec_L_size, 1:size_B_k, 1), proc_receive, & 2599 para_env_exchange%group) 2600 ENDIF 2601 ELSE 2602 ! nothing to send 2603 BI_C_rec = 0.0_dp 2604 CALL mp_sendrecv(null_mat_send, proc_send, & 2605 BI_C_rec(1:rec_L_size, 1:size_B_k, 1:1), proc_receive, & 2606 para_env_exchange%group) 2607 END IF 2608 DO irep = 0, num_integ_group - 1 2609 Lstart_pos = ranges_info_array(1, irep, proc_receive) 2610 Lend_pos = ranges_info_array(2, irep, proc_receive) 2611 start_point = ranges_info_array(3, irep, proc_receive) 2612 end_point = ranges_info_array(4, irep, proc_receive) 2613 2614 local_k_aL(Lstart_pos:Lend_pos, 1:size_B_k, 1) = BI_C_rec(start_point:end_point, 1:size_B_k, 1) 2615 END DO 2616 2617 DEALLOCATE (BI_C_rec) 2618 END DO 2619 2620 ! expand integrals 2621 CALL timeset(routineN//"_exp_ik", handle2) 2622 local_ab = 0.0_dp 2623 IF (iloops .EQ. 2) THEN ! For alpha-beta case for beta-beta density the dimensions are different 2624 DEALLOCATE (local_ab) 2625 ALLOCATE (local_ab(virtual_beta, size_B_k)) 2626 local_ab = 0.0_dp 2627 CALL dgemm('T', 'N', size_B_i, size_B_k, dimen_RI, 1.0_dp, & 2628 local_i_aL(:, :, 1), dimen_RI, local_k_aL(:, :, 1), dimen_RI, & 2629 0.0_dp, local_ab(my_B_virtual_start_beta:my_B_virtual_end_beta, 1:size_B_k), size_B_i) 2630 ELSE 2631 CALL dgemm('T', 'N', size_B_i, size_B_k, dimen_RI, 1.0_dp, & 2632 local_i_aL(:, :, 1), dimen_RI, local_k_aL(:, :, 1), dimen_RI, & 2633 0.0_dp, local_ab(my_B_virtual_start:my_B_virtual_end, 1:size_B_k), size_B_i) 2634 ENDIF 2635 DO proc_shift = 1, para_env_sub%num_pe - 1 2636 proc_send = sub_proc_map(para_env_sub%mepos + proc_shift) 2637 proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift) 2638 2639 IF (iloops .EQ. 1) THEN 2640 CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size) 2641 ELSE 2642 CALL get_group_dist(gd_B_virtual_beta, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size) 2643 ENDIF 2644 2645 ALLOCATE (external_i_aL(dimen_RI, rec_B_size)) 2646 external_i_aL = 0.0_dp 2647 2648 CALL mp_sendrecv(local_i_aL(:, :, 1), proc_send, & 2649 external_i_aL, proc_receive, & 2650 para_env_sub%group) 2651 2652 CALL dgemm('T', 'N', rec_B_size, size_B_k, dimen_RI, 1.0_dp, & 2653 external_i_aL, dimen_RI, local_k_aL(:, :, 1), dimen_RI, & 2654 0.0_dp, local_ab(rec_B_virtual_start:rec_B_virtual_end, 1:size_B_k), rec_B_size) 2655 2656 DEALLOCATE (external_i_aL) 2657 END DO 2658 CALL timestop(handle2) 2659 2660 ! Amplitudes 2661 CALL timeset(routineN//"_tab", handle2) 2662 t_ab = 0.0_dp 2663 ! Alpha-alpha, beta-beta and closed shell 2664 IF (.NOT. alpha_beta) THEN 2665 DO b = 1, size_B_k 2666 b_global = b + my_B_virtual_start - 1 2667 DO a = 1, my_B_size 2668 a_global = a + my_B_virtual_start - 1 2669 t_ab(a_global, b) = (amp_fac*local_ab(a_global, b) - local_ab(b_global, a))/ & 2670 (Eigenval(my_i) + Eigenval(my_k) - Eigenval(homo + a_global) - Eigenval(homo + b_global)) 2671 END DO 2672 END DO 2673 ELSE 2674 IF (iloops .EQ. 1) THEN ! Alpha-beta for alpha-alpha density 2675 DO b = 1, size_B_k 2676 b_global = b + my_B_virtual_start_beta - 1 2677 DO a = 1, my_B_size 2678 a_global = a + my_B_virtual_start - 1 2679 t_ab(a_global, b) = local_ab(a_global, b)/ & 2680 (Eigenval(my_i) + Eigenval_beta(my_k) - Eigenval(homo + a_global) - & 2681 Eigenval_beta(homo_beta + b_global)) 2682 END DO 2683 END DO 2684 ELSE ! Alpha-beta for beta-beta density 2685 DO b = 1, size_B_k 2686 b_global = b + my_B_virtual_start - 1 2687 DO a = 1, my_B_size_beta 2688 a_global = a + my_B_virtual_start_beta - 1 2689 t_ab(a_global, b) = local_ab(a_global, b)/ & 2690 (Eigenval_beta(my_i) + Eigenval(my_k) - Eigenval_beta(homo_beta + a_global) - & 2691 Eigenval(homo + b_global)) 2692 END DO 2693 END DO 2694 ENDIF 2695 ENDIF 2696 2697 IF (.NOT. alpha_beta) THEN 2698 DO proc_shift = 1, para_env_sub%num_pe - 1 2699 proc_send = sub_proc_map(para_env_sub%mepos + proc_shift) 2700 proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift) 2701 CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size) 2702 CALL get_group_dist(gd_B_virtual, proc_send, send_B_virtual_start, send_B_virtual_end, send_B_size) 2703 2704 ALLOCATE (external_ab(size_B_i, rec_B_size)) 2705 external_ab = 0.0_dp 2706 CALL mp_sendrecv(local_ab(send_B_virtual_start:send_B_virtual_end, 1:size_B_k), proc_send, & 2707 external_ab(1:size_B_i, 1:rec_B_size), proc_receive, para_env_sub%group) 2708 2709 DO b = 1, my_B_size 2710 b_global = b + my_B_virtual_start - 1 2711 DO a = 1, rec_B_size 2712 a_global = a + rec_B_virtual_start - 1 2713 t_ab(a_global, b) = (amp_fac*local_ab(a_global, b) - external_ab(b, a))/ & 2714 (Eigenval(my_i) + Eigenval(my_k) - Eigenval(homo + a_global) - Eigenval(homo + b_global)) 2715 END DO 2716 END DO 2717 2718 DEALLOCATE (external_ab) 2719 END DO 2720 ENDIF 2721 CALL timestop(handle2) 2722 2723 ! Expand the second set of integrals 2724 CALL timeset(routineN//"_exp_jk", handle2) 2725 local_ab = 0.0_dp 2726 2727 IF (iloops .EQ. 2) THEN ! In alpha-beta case for beta-beta density the dimensions are different 2728 CALL dgemm('T', 'N', size_B_j, size_B_k, dimen_RI, 1.0_dp, & 2729 local_j_aL(:, :, 1), dimen_RI, local_k_aL(:, :, 1), dimen_RI, & 2730 0.0_dp, local_ab(my_B_virtual_start_beta:my_B_virtual_end_beta, 1:size_B_k), size_B_j) 2731 ELSE 2732 CALL dgemm('T', 'N', size_B_j, size_B_k, dimen_RI, 1.0_dp, & 2733 local_j_aL(:, :, 1), dimen_RI, local_k_aL(:, :, 1), dimen_RI, & 2734 0.0_dp, local_ab(my_B_virtual_start:my_B_virtual_end, 1:size_B_k), size_B_j) 2735 ENDIF 2736 2737 DO proc_shift = 1, para_env_sub%num_pe - 1 2738 proc_send = sub_proc_map(para_env_sub%mepos + proc_shift) 2739 proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift) 2740 2741 IF (iloops .EQ. 1) THEN 2742 CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size) 2743 ELSE 2744 CALL get_group_dist(gd_B_virtual_beta, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size) 2745 ENDIF 2746 2747 ALLOCATE (external_i_aL(dimen_RI, rec_B_size)) 2748 external_i_aL = 0.0_dp 2749 2750 CALL mp_sendrecv(local_j_aL(:, :, 1), proc_send, & 2751 external_i_aL, proc_receive, & 2752 para_env_sub%group) 2753 2754 CALL dgemm('T', 'N', rec_B_size, size_B_k, dimen_RI, 1.0_dp, & 2755 external_i_aL, dimen_RI, local_k_aL(:, :, 1), dimen_RI, & 2756 0.0_dp, local_ab(rec_B_virtual_start:rec_B_virtual_end, 1:size_B_k), rec_B_size) 2757 2758 DEALLOCATE (external_i_aL) 2759 END DO 2760 CALL timestop(handle2) 2761 2762 CALL timeset(routineN//"_Pij", handle2) 2763 ! Alpha-alpha, beta-beta and closed shell 2764 IF (.NOT. alpha_beta) THEN 2765 DO b = 1, size_B_k 2766 b_global = b + my_B_virtual_start - 1 2767 DO a = 1, my_B_size 2768 a_global = a + my_B_virtual_start - 1 2769 local_ab(a_global, b) = local_ab(a_global, b)/ & 2770 (Eigenval(my_j) + Eigenval(my_k) - Eigenval(homo + a_global) - Eigenval(homo + b_global)) 2771 END DO 2772 END DO 2773 ELSE 2774 IF (iloops .EQ. 1) THEN ! Alpha-beta for alpha-alpha density 2775 DO b = 1, size_B_k 2776 b_global = b + my_B_virtual_start_beta - 1 2777 DO a = 1, my_B_size 2778 a_global = a + my_B_virtual_start - 1 2779 local_ab(a_global, b) = local_ab(a_global, b)/ & 2780 (Eigenval(my_j) + Eigenval_beta(my_k) - Eigenval(homo + a_global) - & 2781 Eigenval_beta(homo_beta + b_global)) 2782 END DO 2783 END DO 2784 ELSE ! Alpha-beta for beta-beta density 2785 DO b = 1, size_B_k 2786 b_global = b + my_B_virtual_start - 1 2787 DO a = 1, my_B_size_beta 2788 a_global = a + my_B_virtual_start_beta - 1 2789 local_ab(a_global, b) = local_ab(a_global, b)/ & 2790 (Eigenval_beta(my_j) + Eigenval(my_k) - Eigenval_beta(homo_beta + a_global) - & 2791 Eigenval(homo + b_global)) 2792 END DO 2793 END DO 2794 ENDIF 2795 ENDIF 2796 ! 2797 P_ij_elem = SUM(local_ab*t_ab) 2798 IF ((.NOT. open_shell) .AND. (.NOT. alpha_beta)) THEN 2799 P_ij_elem = P_ij_elem*2.0_dp 2800 ENDIF 2801 IF ((beta_beta) .OR. (iloops .EQ. 2)) THEN 2802 mp2_env%ri_grad%P_ij_beta(my_i, my_j) = mp2_env%ri_grad%P_ij_beta(my_i, my_j) - P_ij_elem 2803 mp2_env%ri_grad%P_ij_beta(my_j, my_i) = mp2_env%ri_grad%P_ij_beta(my_j, my_i) - P_ij_elem 2804 ELSE 2805 mp2_env%ri_grad%P_ij(my_i, my_j) = mp2_env%ri_grad%P_ij(my_i, my_j) - P_ij_elem 2806 mp2_env%ri_grad%P_ij(my_j, my_i) = mp2_env%ri_grad%P_ij(my_j, my_i) - P_ij_elem 2807 ENDIF 2808 CALL timestop(handle2) 2809 ELSE 2810 ! no work to be done, possible messeges to be exchanged 2811 DO proc_shift = 1, para_env_exchange%num_pe - 1 2812 proc_send = proc_map(para_env_exchange%mepos + proc_shift) 2813 proc_receive = proc_map(para_env_exchange%mepos - proc_shift) 2814 2815 send_ijk_index = num_ijk(proc_send) 2816 IF (iloops .EQ. 2) send_ijk_index = num_ijk_beta(proc_send) 2817 2818 IF (ijk_index <= send_ijk_index) THEN 2819 ! somethig to send 2820 ijk_counter_send = (ijk_index - MIN(1, integ_group_pos2color_sub(proc_send)))*ngroup + & 2821 integ_group_pos2color_sub(proc_send) 2822 IF (iloops .EQ. 1) THEN 2823 send_i = ijk_map(ijk_counter_send, 1) 2824 send_j = ijk_map(ijk_counter_send, 2) 2825 send_k = ijk_map(ijk_counter_send, 3) 2826 ELSE 2827 send_i = ijk_map_beta(ijk_counter_send, 1) 2828 send_j = ijk_map_beta(ijk_counter_send, 2) 2829 send_k = ijk_map_beta(ijk_counter_send, 3) 2830 ENDIF 2831 ! occupied i 2832 IF (iloops .EQ. 1) THEN 2833 CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:size_B_i, send_i:send_i), proc_send, & 2834 null_mat_rec, proc_receive, para_env_exchange%group) 2835 ELSE 2836 CALL mp_sendrecv(BIb_C_beta(1:my_group_L_size, 1:size_B_i, send_i:send_i), proc_send, & 2837 null_mat_rec, proc_receive, para_env_exchange%group) 2838 ENDIF 2839 ! occupied j 2840 IF (iloops .EQ. 1) THEN 2841 CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:size_B_j, send_j:send_j), proc_send, & 2842 null_mat_rec, proc_receive, para_env_exchange%group) 2843 ELSE ! For beta_beta density, the size is different now 2844 CALL mp_sendrecv(BIb_C_beta(1:my_group_L_size, 1:size_B_j, send_j:send_j), proc_send, & 2845 null_mat_rec, proc_receive, para_env_exchange%group) 2846 ENDIF 2847 ! occupied k 2848 IF (iloops .EQ. 1) THEN 2849 IF (.NOT. alpha_beta) THEN 2850 CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:size_B_k, send_k:send_k), proc_send, & 2851 null_mat_rec, proc_receive, para_env_exchange%group) 2852 ELSE 2853 CALL mp_sendrecv(BIb_C_beta(1:my_group_L_size, 1:size_B_k, send_k:send_k), proc_send, & 2854 null_mat_rec, proc_receive, para_env_exchange%group) 2855 ENDIF 2856 ELSE ! For beta_beta density, the size is different now 2857 CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:size_B_k, send_k:send_k), proc_send, & 2858 null_mat_rec, proc_receive, para_env_exchange%group) 2859 ENDIF 2860 2861 ELSE 2862 ! nothing to send 2863 ! occupied i 2864 CALL mp_sendrecv(null_mat_send, proc_send, & 2865 null_mat_rec, proc_receive, & 2866 para_env_exchange%group) 2867 ! occupied j 2868 CALL mp_sendrecv(null_mat_send, proc_send, & 2869 null_mat_rec, proc_receive, & 2870 para_env_exchange%group) 2871 ! occupied k 2872 CALL mp_sendrecv(null_mat_send, proc_send, & 2873 null_mat_rec, proc_receive, & 2874 para_env_exchange%group) 2875 END IF 2876 2877 END DO ! proc loop 2878 END IF 2879 END DO ! ijk_index loop 2880 DEALLOCATE (local_i_aL) 2881 DEALLOCATE (local_j_aL) 2882 DEALLOCATE (local_k_aL) 2883 DEALLOCATE (t_ab) 2884 ENDDO ! over number of loops (iloop) 2885 ! 2886 DEALLOCATE (ijk_map) 2887 DEALLOCATE (num_ijk) 2888 IF (alpha_beta) THEN 2889 DEALLOCATE (ijk_map_beta, num_ijk_beta) 2890 ENDIF 2891 CALL timestop(handle) 2892 2893 END SUBROUTINE Quasi_degenerate_P_ij 2894 2895! ************************************************************************************************** 2896!> \brief ... 2897!> \param my_ijk ... 2898!> \param homo ... 2899!> \param Eigenval ... 2900!> \param mp2_env ... 2901!> \param ijk_map ... 2902!> \param unit_nr ... 2903!> \param ngroup ... 2904!> \param beta_beta ... 2905!> \param alpha_beta ... 2906!> \param para_env_exchange ... 2907!> \param num_ijk ... 2908!> \param max_ijk ... 2909!> \param color_sub ... 2910!> \param Eigenval_beta ... 2911!> \param homo_beta ... 2912!> \param ijk_map_beta ... 2913!> \param num_ijk_beta ... 2914!> \param max_ijk_beta ... 2915!> \param my_ijk_beta ... 2916! ************************************************************************************************** 2917 SUBROUTINE Find_quasi_degenerate_ij(my_ijk, homo, Eigenval, mp2_env, ijk_map, unit_nr, ngroup, & 2918 beta_beta, alpha_beta, para_env_exchange, num_ijk, max_ijk, color_sub, Eigenval_beta, & 2919 homo_beta, ijk_map_beta, num_ijk_beta, max_ijk_beta, my_ijk_beta) 2920 2921 INTEGER, INTENT(OUT) :: my_ijk 2922 INTEGER, INTENT(IN) :: homo 2923 REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: Eigenval 2924 TYPE(mp2_type), POINTER :: mp2_env 2925 INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(OUT) :: ijk_map 2926 INTEGER, INTENT(IN) :: unit_nr, ngroup 2927 LOGICAL, INTENT(IN) :: beta_beta, alpha_beta 2928 TYPE(cp_para_env_type), POINTER :: para_env_exchange 2929 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT) :: num_ijk 2930 INTEGER, INTENT(OUT) :: max_ijk 2931 INTEGER, INTENT(IN) :: color_sub 2932 REAL(KIND=dp), DIMENSION(:), INTENT(IN), OPTIONAL :: Eigenval_beta 2933 INTEGER, INTENT(IN), OPTIONAL :: homo_beta 2934 INTEGER, ALLOCATABLE, DIMENSION(:, :), & 2935 INTENT(OUT), OPTIONAL :: ijk_map_beta 2936 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT), & 2937 OPTIONAL :: num_ijk_beta 2938 INTEGER, INTENT(OUT), OPTIONAL :: max_ijk_beta, my_ijk_beta 2939 2940 CHARACTER(LEN=*), PARAMETER :: routineN = 'Find_quasi_degenerate_ij', & 2941 routineP = moduleN//':'//routineN 2942 2943 INTEGER :: iib, ijk_counter, jjb, kkb, my_homo, & 2944 num_sing_ij, total_ijk 2945 2946 IF (alpha_beta) THEN 2947 my_homo = homo_beta 2948 ELSE 2949 my_homo = homo 2950 ENDIF 2951 2952 ! General case 2953 num_sing_ij = 0 2954 DO iiB = 1, homo 2955 ! diagonal elements already updated 2956 DO jjB = iiB + 1, homo 2957 IF (ABS(Eigenval(jjB) - Eigenval(iiB)) < mp2_env%ri_mp2%eps_canonical) & 2958 num_sing_ij = num_sing_ij + 1 2959 END DO 2960 END DO 2961 IF (.NOT. beta_beta) THEN 2962 IF (unit_nr > 0) THEN 2963 WRITE (UNIT=unit_nr, FMT="(T3,A,T75,i6)") & 2964 "MO_INFO| Number of ij pairs below EPS_CANONICAL:", num_sing_ij 2965 END IF 2966 ELSE 2967 IF (unit_nr > 0) THEN 2968 WRITE (UNIT=unit_nr, FMT="(T3,A,T75,i6)") & 2969 "MO_INFO| Number of ij pairs (spin beta) below EPS_CANONICAL:", num_sing_ij 2970 END IF 2971 ENDIF 2972 total_ijk = my_homo*num_sing_ij 2973 ALLOCATE (ijk_map(total_ijk, 3)) 2974 ijk_map = 0 2975 2976 my_ijk = 0 2977 ijk_counter = 0 2978 DO iiB = 1, homo 2979 ! diagonal elements already updated 2980 DO jjB = iiB + 1, homo 2981 IF (ABS(Eigenval(jjB) - Eigenval(iiB)) >= mp2_env%ri_mp2%eps_canonical) CYCLE 2982 DO kkB = 1, my_homo 2983 ijk_counter = ijk_counter + 1 2984 ijk_map(ijk_counter, 1) = iiB 2985 ijk_map(ijk_counter, 2) = jjB 2986 ijk_map(ijk_counter, 3) = kkB 2987 IF (MOD(ijk_counter, ngroup) == color_sub) my_ijk = my_ijk + 1 2988 END DO 2989 END DO 2990 END DO 2991 2992 ALLOCATE (num_ijk(0:para_env_exchange%num_pe - 1)) 2993 num_ijk = 0 2994 num_ijk(para_env_exchange%mepos) = my_ijk 2995 CALL mp_sum(num_ijk, para_env_exchange%group) 2996 max_ijk = MAXVAL(num_ijk) 2997 2998 ! Alpha-beta case: we need a second map 2999 IF (alpha_beta) THEN 3000 num_sing_ij = 0 3001 DO iiB = 1, homo_beta 3002 ! diagonal elements already updated 3003 DO jjB = iiB + 1, homo_beta 3004 IF (ABS(Eigenval_beta(jjB) - Eigenval_beta(iiB)) < mp2_env%ri_mp2%eps_canonical) & 3005 num_sing_ij = num_sing_ij + 1 3006 END DO 3007 END DO 3008 ! total number of elemets that have to be computed 3009 total_ijk = homo*num_sing_ij 3010 ALLOCATE (ijk_map_beta(total_ijk, 3)) 3011 ijk_map_beta = 0 3012 my_ijk_beta = 0 3013 ijk_counter = 0 3014 DO iiB = 1, homo_beta 3015 ! diagonal elements already updated 3016 DO jjB = iiB + 1, homo_beta 3017 IF (ABS(Eigenval_beta(jjB) - Eigenval_beta(iiB)) >= mp2_env%ri_mp2%eps_canonical) CYCLE 3018 DO kkB = 1, homo 3019 ijk_counter = ijk_counter + 1 3020 ijk_map_beta(ijk_counter, 1) = iiB 3021 ijk_map_beta(ijk_counter, 2) = jjB 3022 ijk_map_beta(ijk_counter, 3) = kkB 3023 IF (MOD(ijk_counter, ngroup) == color_sub) my_ijk_beta = my_ijk_beta + 1 3024 END DO 3025 END DO 3026 END DO 3027 ALLOCATE (num_ijk_beta(0:para_env_exchange%num_pe - 1)) 3028 num_ijk_beta = 0 3029 num_ijk_beta(para_env_exchange%mepos) = my_ijk_beta 3030 CALL mp_sum(num_ijk_beta, para_env_exchange%group) 3031 max_ijk_beta = MAXVAL(num_ijk_beta) 3032 ENDIF 3033 3034 END SUBROUTINE Find_quasi_degenerate_ij 3035 3036END MODULE mp2_ri_gpw 3037