1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2020  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief Routines to calculate RI-GPW-MP2 energy using pw
8!> \par History
9!>      06.2012 created [Mauro Del Ben]
10!>      03.2019 Refactored from mp2_ri_gpw [Frederick Stein]
11! **************************************************************************************************
12MODULE mp2_ri_gpw
13   USE cp_para_env,                     ONLY: cp_para_env_create,&
14                                              cp_para_env_release
15   USE cp_para_types,                   ONLY: cp_para_env_type
16   USE group_dist_types,                ONLY: get_group_dist,&
17                                              group_dist_d1_type,&
18                                              maxsize,&
19                                              release_group_dist
20   USE kinds,                           ONLY: dp,&
21                                              int_8
22   USE machine,                         ONLY: m_flush,&
23                                              m_memory,&
24                                              m_walltime
25   USE message_passing,                 ONLY: mp_allgather,&
26                                              mp_comm_split_direct,&
27                                              mp_min,&
28                                              mp_sendrecv,&
29                                              mp_sum
30   USE mp2_ri_grad_util,                ONLY: complete_gamma
31   USE mp2_types,                       ONLY: mp2_type
32
33!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num
34#include "./base/base_uses.f90"
35
36   IMPLICIT NONE
37
38   PRIVATE
39
40   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_gpw'
41
42   PUBLIC :: mp2_ri_gpw_compute_en
43
44CONTAINS
45
46! **************************************************************************************************
47!> \brief ...
48!> \param Emp2 ...
49!> \param Emp2_Cou ...
50!> \param Emp2_EX ...
51!> \param BIb_C ...
52!> \param mp2_env ...
53!> \param para_env ...
54!> \param para_env_sub ...
55!> \param color_sub ...
56!> \param gd_array ...
57!> \param gd_B_virtual ...
58!> \param Eigenval ...
59!> \param nmo ...
60!> \param homo ...
61!> \param dimen_RI ...
62!> \param unit_nr ...
63!> \param calc_forces ...
64!> \param calc_ex ...
65!> \param open_shell_SS ...
66!> \param BIb_C_beta ...
67!> \param homo_beta ...
68!> \param Eigenval_beta ...
69!> \param gd_B_virtual_beta ...
70! **************************************************************************************************
71   SUBROUTINE mp2_ri_gpw_compute_en(Emp2, Emp2_Cou, Emp2_EX, BIb_C, mp2_env, para_env, para_env_sub, color_sub, &
72                                    gd_array, gd_B_virtual, &
73                                    Eigenval, nmo, homo, dimen_RI, unit_nr, calc_forces, calc_ex, &
74                                    open_shell_SS, BIb_C_beta, homo_beta, Eigenval_beta, &
75                                    gd_B_virtual_beta)
76      REAL(KIND=dp), INTENT(OUT)                         :: Emp2, Emp2_Cou, Emp2_EX
77      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
78         INTENT(INOUT)                                   :: BIb_C
79      TYPE(mp2_type), POINTER                            :: mp2_env
80      TYPE(cp_para_env_type), POINTER                    :: para_env, para_env_sub
81      INTEGER, INTENT(IN)                                :: color_sub
82      TYPE(group_dist_d1_type), INTENT(INOUT)            :: gd_array, gd_B_virtual
83      REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: Eigenval
84      INTEGER, INTENT(IN)                                :: nmo, homo, dimen_RI, unit_nr
85      LOGICAL, INTENT(IN)                                :: calc_forces, calc_ex
86      LOGICAL, INTENT(IN), OPTIONAL                      :: open_shell_SS
87      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
88         INTENT(INOUT), OPTIONAL                         :: BIb_C_beta
89      INTEGER, INTENT(IN), OPTIONAL                      :: homo_beta
90      REAL(KIND=dp), DIMENSION(:), INTENT(IN), OPTIONAL  :: Eigenval_beta
91      TYPE(group_dist_d1_type), INTENT(INOUT), OPTIONAL  :: gd_B_virtual_beta
92
93      CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_ri_gpw_compute_en', &
94         routineP = moduleN//':'//routineN
95
96      INTEGER :: a, a_global, b, b_global, best_block_size, best_integ_group_size, block_size, &
97         comm_exchange, comm_P, end_point, handle, handle2, handle3, iiB, ij_counter, &
98         ij_counter_send, ij_index, integ_group_size, irep, jjB, Lend_pos, Lstart_pos, &
99         max_ij_pairs, min_integ_group_size, my_B_size, my_B_size_beta, my_B_virtual_end, &
100         my_B_virtual_end_beta, my_B_virtual_start, my_B_virtual_start_beta, my_block_size, &
101         my_group_L_end, my_group_L_size, my_group_L_size_orig, my_group_L_start, my_homo_beta, &
102         my_i, my_ij_pairs, my_j, my_new_group_L_size, my_num_dgemm_call, ngroup, num_IJ_blocks
103      INTEGER :: num_integ_group, pos_integ_group, proc_receive, proc_send, proc_shift, &
104         rec_B_size, rec_B_virtual_end, rec_B_virtual_start, rec_L_size, send_B_size, &
105         send_B_virtual_end, send_B_virtual_start, send_block_size, send_i, send_ij_index, send_j, &
106         start_point, sub_P_color, sub_sub_color, total_ij_pairs, virtual, virtual_beta
107      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: integ_group_pos2color_sub, num_ij_pairs, &
108                                                            proc_map, proc_map_rep, &
109                                                            sizes_array_orig, sub_proc_map
110      INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: ij_map
111      INTEGER, ALLOCATABLE, DIMENSION(:, :, :)           :: ranges_info_array
112      LOGICAL                                            :: my_alpha_alpha_case, my_alpha_beta_case, &
113                                                            my_beta_beta_case, my_open_shell_SS
114      REAL(KIND=dp) :: actual_flop_rate, amp_fac, mem_for_aK, mem_for_comm, mem_for_iaK, &
115         mem_for_rep, mem_min, mem_per_group, mem_real, my_flop_rate, null_mat_rec(2, 2, 2), &
116         null_mat_send(2, 2, 2), sym_fac, t_end, t_start
117      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: external_ab, external_i_aL, local_ab, &
118                                                            local_ba, t_ab
119      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: B_ia_Q, B_ia_Q_beta, BI_C_rec, &
120                                                            local_i_aL, local_j_aL, Y_i_aP, &
121                                                            Y_j_aP, Y_j_aP_beta
122      TYPE(cp_para_env_type), POINTER                    :: para_env_exchange, para_env_P, &
123                                                            para_env_rep
124
125      CALL timeset(routineN, handle)
126
127      my_open_shell_SS = .FALSE.
128      IF (PRESENT(open_shell_SS)) my_open_shell_SS = open_shell_SS
129
130      ! t_ab = amp_fac*(:,a|:,b)-(:,b|:,a)
131      IF (calc_forces) amp_fac = 2.0_dp
132      ! If we calculate the gradient we need to distinguish
133      ! between alpha-alpha and beta-beta cases for UMP2
134
135      my_alpha_alpha_case = .FALSE.
136      my_beta_beta_case = .FALSE.
137      my_alpha_beta_case = .FALSE.
138      IF (calc_forces) THEN
139         IF (my_open_shell_SS) THEN
140            amp_fac = 1.0_dp
141            IF ((.NOT. ALLOCATED(mp2_env%ri_grad%P_ij)) &
142                .AND. (.NOT. ALLOCATED(mp2_env%ri_grad%P_ab))) THEN
143               my_alpha_alpha_case = .TRUE.
144               amp_fac = 1.0_dp
145            ELSE
146               IF ((.NOT. ALLOCATED(mp2_env%ri_grad%P_ij_beta)) &
147                   .AND. (.NOT. ALLOCATED(mp2_env%ri_grad%P_ab_beta))) THEN
148                  my_beta_beta_case = .TRUE.
149               ENDIF
150            ENDIF
151         ENDIF
152      ENDIF
153
154      my_alpha_beta_case = .FALSE.
155      IF (PRESENT(BIb_C_beta) .AND. &
156          PRESENT(gd_B_virtual_beta) .AND. &
157          PRESENT(homo_beta) .AND. &
158          PRESENT(Eigenval_beta)) THEN
159         my_alpha_beta_case = .TRUE.
160         my_alpha_alpha_case = .FALSE.
161      ENDIF
162
163      IF (my_alpha_beta_case) amp_fac = 1.0_dp
164
165      virtual = nmo - homo
166      IF (my_alpha_beta_case) virtual_beta = nmo - homo_beta
167
168      CALL mp2_ri_get_sizes( &
169         mp2_env, para_env, para_env_sub, gd_array, gd_B_virtual, &
170         homo, dimen_RI, unit_nr, color_sub, best_block_size, best_integ_group_size, block_size, &
171         integ_group_size, min_integ_group_size, my_B_size, my_B_virtual_end, my_B_virtual_start, my_group_L_size, &
172         my_group_L_start, my_group_L_end, ngroup, num_IJ_blocks, num_integ_group, pos_integ_group, virtual, my_alpha_beta_case, &
173         my_open_shell_SS, mem_for_aK, mem_for_comm, mem_for_iaK, mem_for_rep, mem_min, mem_per_group, mem_real)
174
175      IF (my_alpha_beta_case) THEN
176         CALL get_group_dist(gd_B_virtual_beta, para_env_sub%mepos, my_B_virtual_start_beta, my_B_virtual_end_beta, my_B_size_beta)
177         my_homo_beta = homo_beta
178      ELSE
179         my_B_virtual_start_beta = my_B_virtual_start
180         my_B_virtual_end_beta = my_B_virtual_end
181         my_B_size_beta = my_B_size
182         my_homo_beta = homo
183      END IF
184
185      ! now create a group that contains all the proc that have the same virtual starting point
186      ! in the integ group
187      ! sub_sub_color=para_env_sub%mepos
188      CALL mp2_ri_create_group( &
189         BIb_C, para_env, para_env_sub, homo, color_sub, &
190         gd_array%sizes, calc_forces, &
191         comm_exchange, integ_group_size, my_B_size, iiB, my_group_L_end, &
192         my_group_L_size, my_group_L_size_orig, my_group_L_start, my_new_group_L_size, &
193         sub_sub_color, integ_group_pos2color_sub, proc_map, proc_map_rep, sizes_array_orig, &
194         sub_proc_map, ranges_info_array, para_env_exchange, para_env_rep, num_integ_group)
195
196      ! *****************************************************************
197      ! **********  REPLICATION-BLOCKED COMMUNICATION SCHEME  ***********
198      ! *****************************************************************
199      ! introduce block size, the number of occupied orbitals has to be a
200      ! multiple of the block size
201
202      ! Calculate the maximum number of ij pairs that have to be computed
203      ! among groups
204      CALL mp2_ri_communication(my_alpha_beta_case, total_ij_pairs, homo, my_homo_beta, num_IJ_blocks, &
205                                block_size, ngroup, ij_map, color_sub, my_ij_pairs, my_open_shell_SS, unit_nr)
206
207      ALLOCATE (num_ij_pairs(0:para_env_exchange%num_pe - 1))
208      num_ij_pairs = 0
209      num_ij_pairs(para_env_exchange%mepos) = my_ij_pairs
210      CALL mp_sum(num_ij_pairs, para_env_exchange%group)
211
212      max_ij_pairs = MAXVAL(num_ij_pairs)
213
214      ! start real stuff
215      IF (.NOT. my_alpha_beta_case) THEN
216         CALL mp2_ri_allocate(local_ab, t_ab, mp2_env, homo, virtual, dimen_RI, my_B_size, &
217                              block_size, my_B_size_beta, my_group_L_size, local_i_aL, &
218                              local_j_aL, calc_forces, Y_i_aP, Y_j_aP, &
219                              my_alpha_beta_case, &
220                              my_beta_beta_case)
221      ELSE
222         CALL mp2_ri_allocate(local_ab, t_ab, mp2_env, homo, virtual, dimen_RI, my_B_size, &
223                              block_size, my_B_size_beta, my_group_L_size, local_i_aL, &
224                              local_j_aL, calc_forces, Y_i_aP, Y_j_aP_beta, &
225                              my_alpha_beta_case, &
226                              my_beta_beta_case, local_ba, virtual_beta)
227      ENDIF
228
229      CALL timeset(routineN//"_RI_loop", handle2)
230      null_mat_rec = 0.0_dp
231      null_mat_send = 0.0_dp
232      Emp2 = 0.0_dp
233      Emp2_Cou = 0.0_dp
234      Emp2_EX = 0.0_dp
235      my_num_dgemm_call = 0
236      my_flop_rate = 0.0_dp
237      DO ij_index = 1, max_ij_pairs
238
239         IF (ij_index <= my_ij_pairs) THEN
240            ! We have work to do
241            ij_counter = (ij_index - MIN(1, color_sub))*ngroup + color_sub
242            my_i = ij_map(ij_counter, 1)
243            my_j = ij_map(ij_counter, 2)
244            my_block_size = ij_map(ij_counter, 3)
245
246            local_i_aL = 0.0_dp
247            DO irep = 0, num_integ_group - 1
248               Lstart_pos = ranges_info_array(1, irep, para_env_exchange%mepos)
249               Lend_pos = ranges_info_array(2, irep, para_env_exchange%mepos)
250               start_point = ranges_info_array(3, irep, para_env_exchange%mepos)
251               end_point = ranges_info_array(4, irep, para_env_exchange%mepos)
252
253               local_i_aL(Lstart_pos:Lend_pos, 1:my_B_size, 1:my_block_size) = &
254                  BIb_C(start_point:end_point, 1:my_B_size, my_i:my_i + my_block_size - 1)
255            END DO
256
257            local_j_aL = 0.0_dp
258            DO irep = 0, num_integ_group - 1
259               Lstart_pos = ranges_info_array(1, irep, para_env_exchange%mepos)
260               Lend_pos = ranges_info_array(2, irep, para_env_exchange%mepos)
261               start_point = ranges_info_array(3, irep, para_env_exchange%mepos)
262               end_point = ranges_info_array(4, irep, para_env_exchange%mepos)
263
264               IF (.NOT. my_alpha_beta_case) THEN
265                  local_j_aL(Lstart_pos:Lend_pos, 1:my_B_size, 1:my_block_size) = &
266                     BIb_C(start_point:end_point, 1:my_B_size, my_j:my_j + my_block_size - 1)
267               ELSE
268                  local_j_aL(Lstart_pos:Lend_pos, 1:my_B_size_beta, 1:my_block_size) = &
269                     BIb_C_beta(start_point:end_point, 1:my_B_size_beta, my_j:my_j + my_block_size - 1)
270               END IF
271            END DO
272
273            ! collect data from other proc
274            CALL timeset(routineN//"_comm", handle3)
275            DO proc_shift = 1, para_env_exchange%num_pe - 1
276               proc_send = proc_map(para_env_exchange%mepos + proc_shift)
277               proc_receive = proc_map(para_env_exchange%mepos - proc_shift)
278
279               send_ij_index = num_ij_pairs(proc_send)
280
281               CALL get_group_dist(gd_array, proc_receive, sizes=rec_L_size)
282               ALLOCATE (BI_C_rec(rec_L_size, MAX(my_B_size, my_B_size_beta), my_block_size))
283
284               IF (ij_index <= send_ij_index) THEN
285                  ij_counter_send = (ij_index - MIN(1, integ_group_pos2color_sub(proc_send)))*ngroup + &
286                                    integ_group_pos2color_sub(proc_send)
287                  send_i = ij_map(ij_counter_send, 1)
288                  send_j = ij_map(ij_counter_send, 2)
289                  send_block_size = ij_map(ij_counter_send, 3)
290
291                  ! occupied i
292                  BI_C_rec = 0.0_dp
293                  CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:my_B_size, send_i:send_i + send_block_size - 1), proc_send, &
294                                   BI_C_rec(1:rec_L_size, 1:my_B_size, 1:my_block_size), proc_receive, &
295                                   para_env_exchange%group)
296                  DO irep = 0, num_integ_group - 1
297                     Lstart_pos = ranges_info_array(1, irep, proc_receive)
298                     Lend_pos = ranges_info_array(2, irep, proc_receive)
299                     start_point = ranges_info_array(3, irep, proc_receive)
300                     end_point = ranges_info_array(4, irep, proc_receive)
301
302                     local_i_aL(Lstart_pos:Lend_pos, 1:my_B_size, 1:my_block_size) = &
303                        BI_C_rec(start_point:end_point, 1:my_B_size, 1:my_block_size)
304
305                  END DO
306
307                  ! occupied j
308                  BI_C_rec = 0.0_dp
309                  IF (.NOT. my_alpha_beta_case) THEN
310                     CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:my_B_size, send_j:send_j + send_block_size - 1), proc_send, &
311                                      BI_C_rec(1:rec_L_size, 1:my_B_size, 1:my_block_size), proc_receive, &
312                                      para_env_exchange%group)
313                  ELSE
314                 CALL mp_sendrecv(BIb_C_beta(1:my_group_L_size, 1:my_B_size_beta, send_j:send_j + send_block_size - 1), proc_send, &
315                                      BI_C_rec(1:rec_L_size, 1:my_B_size_beta, 1:my_block_size), proc_receive, &
316                                      para_env_exchange%group)
317                  END IF
318
319                  DO irep = 0, num_integ_group - 1
320                     Lstart_pos = ranges_info_array(1, irep, proc_receive)
321                     Lend_pos = ranges_info_array(2, irep, proc_receive)
322                     start_point = ranges_info_array(3, irep, proc_receive)
323                     end_point = ranges_info_array(4, irep, proc_receive)
324
325                     IF (.NOT. my_alpha_beta_case) THEN
326                        local_j_aL(Lstart_pos:Lend_pos, 1:my_B_size, 1:my_block_size) = &
327                           BI_C_rec(start_point:end_point, 1:my_B_size, 1:my_block_size)
328                     ELSE
329                        local_j_aL(Lstart_pos:Lend_pos, 1:my_B_size_beta, 1:my_block_size) = &
330                           BI_C_rec(start_point:end_point, 1:my_B_size_beta, 1:my_block_size)
331                     END IF
332
333                  END DO
334
335               ELSE
336                  ! we send the null matrix while we know that we have to receive something
337
338                  ! occupied i
339                  BI_C_rec = 0.0_dp
340                  CALL mp_sendrecv(null_mat_send, proc_send, &
341                                   BI_C_rec(1:rec_L_size, 1:my_B_size, 1:my_block_size), proc_receive, &
342                                   para_env_exchange%group)
343
344                  DO irep = 0, num_integ_group - 1
345                     Lstart_pos = ranges_info_array(1, irep, proc_receive)
346                     Lend_pos = ranges_info_array(2, irep, proc_receive)
347                     start_point = ranges_info_array(3, irep, proc_receive)
348                     end_point = ranges_info_array(4, irep, proc_receive)
349
350                     local_i_aL(Lstart_pos:Lend_pos, 1:my_B_size, 1:my_block_size) = &
351                        BI_C_rec(start_point:end_point, 1:my_B_size, 1:my_block_size)
352
353                  END DO
354
355                  ! occupied j
356                  BI_C_rec = 0.0_dp
357                  IF (.NOT. my_alpha_beta_case) THEN
358                     CALL mp_sendrecv(null_mat_send, proc_send, &
359                                      BI_C_rec(1:rec_L_size, 1:my_B_size, 1:my_block_size), proc_receive, &
360                                      para_env_exchange%group)
361                  ELSE
362                     CALL mp_sendrecv(null_mat_send, proc_send, &
363                                      BI_C_rec(1:rec_L_size, 1:my_B_size_beta, 1:my_block_size), proc_receive, &
364                                      para_env_exchange%group)
365                  END IF
366                  DO irep = 0, num_integ_group - 1
367                     Lstart_pos = ranges_info_array(1, irep, proc_receive)
368                     Lend_pos = ranges_info_array(2, irep, proc_receive)
369                     start_point = ranges_info_array(3, irep, proc_receive)
370                     end_point = ranges_info_array(4, irep, proc_receive)
371
372                     IF (.NOT. my_alpha_beta_case) THEN
373                        local_j_aL(Lstart_pos:Lend_pos, 1:my_B_size, 1:my_block_size) = &
374                           BI_C_rec(start_point:end_point, 1:my_B_size, 1:my_block_size)
375                     ELSE
376                        local_j_aL(Lstart_pos:Lend_pos, 1:my_B_size_beta, 1:my_block_size) = &
377                           BI_C_rec(start_point:end_point, 1:my_B_size_beta, 1:my_block_size)
378                     END IF
379
380                  END DO
381
382               END IF
383
384               DEALLOCATE (BI_C_rec)
385
386            END DO
387            CALL timestop(handle3)
388
389            ! loop over the block elements
390            DO iiB = 1, my_block_size
391               DO jjB = 1, my_block_size
392                  CALL timeset(routineN//"_expansion", handle3)
393                  ! calculate the integrals (ia|jb) strating from my local data ...
394                  local_ab = 0.0_dp
395                  IF ((my_alpha_beta_case) .AND. (calc_forces)) THEN
396                     local_ba = 0.0_dp
397                  ENDIF
398                  t_start = m_walltime()
399                  CALL dgemm('T', 'N', my_B_size, my_B_size_beta, dimen_RI, 1.0_dp, &
400                             local_i_aL(:, :, iiB), dimen_RI, local_j_aL(:, :, jjB), dimen_RI, &
401                             0.0_dp, local_ab(my_B_virtual_start:my_B_virtual_end, 1:my_B_size_beta), my_B_size)
402                  t_end = m_walltime()
403                  actual_flop_rate = 2.0_dp*my_B_size*my_B_size_beta*REAL(dimen_RI, KIND=dp)/(MAX(0.01_dp, t_end - t_start))
404                  my_flop_rate = my_flop_rate + actual_flop_rate
405                  my_num_dgemm_call = my_num_dgemm_call + 1
406                  ! Additional integrals only for alpha_beta case and forces
407                  IF ((my_alpha_beta_case) .AND. (calc_forces)) THEN
408                     t_start = m_walltime()
409                     CALL dgemm('T', 'N', my_B_size_beta, my_B_size, dimen_RI, 1.0_dp, &
410                                local_j_aL(:, :, iiB), dimen_RI, local_i_aL(:, :, jjB), dimen_RI, &
411                                0.0_dp, local_ba(my_B_virtual_start_beta:my_B_virtual_end_beta, 1:my_B_size), my_B_size_beta)
412                     t_end = m_walltime()
413                     actual_flop_rate = 2.0_dp*my_B_size*my_B_size_beta*REAL(dimen_RI, KIND=dp)/(MAX(0.01_dp, t_end - t_start))
414                     my_flop_rate = my_flop_rate + actual_flop_rate
415                     my_num_dgemm_call = my_num_dgemm_call + 1
416                  ENDIF
417                  ! ... and from the other of my subgroup
418                  DO proc_shift = 1, para_env_sub%num_pe - 1
419                     proc_send = sub_proc_map(para_env_sub%mepos + proc_shift)
420                     proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift)
421
422                     CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
423
424                     ALLOCATE (external_i_aL(dimen_RI, rec_B_size))
425                     external_i_aL = 0.0_dp
426
427                     CALL mp_sendrecv(local_i_aL(:, :, iiB), proc_send, &
428                                      external_i_aL, proc_receive, &
429                                      para_env_sub%group)
430
431                     t_start = m_walltime()
432                     CALL dgemm('T', 'N', rec_B_size, my_B_size_beta, dimen_RI, 1.0_dp, &
433                                external_i_aL, dimen_RI, local_j_aL(:, :, jjB), dimen_RI, &
434                                0.0_dp, local_ab(rec_B_virtual_start:rec_B_virtual_end, 1:my_B_size_beta), rec_B_size)
435
436                     t_end = m_walltime()
437                     actual_flop_rate = 2.0_dp*rec_B_size*my_B_size_beta*REAL(dimen_RI, KIND=dp)/(MAX(0.01_dp, t_end - t_start))
438                     my_flop_rate = my_flop_rate + actual_flop_rate
439                     my_num_dgemm_call = my_num_dgemm_call + 1
440
441                     DEALLOCATE (external_i_aL)
442                     ! Additional integrals only for alpha_beta case and forces
443                     IF ((my_alpha_beta_case) .AND. (calc_forces)) THEN
444
445                        CALL get_group_dist(gd_B_virtual_beta, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
446
447                        ALLOCATE (external_i_aL(dimen_RI, rec_B_size))
448                        external_i_aL = 0.0_dp
449
450                        CALL mp_sendrecv(local_j_aL(:, :, jjB), proc_send, &
451                                         external_i_aL, proc_receive, &
452                                         para_env_sub%group)
453
454                        t_start = m_walltime()
455                        CALL dgemm('T', 'N', rec_B_size, my_B_size, dimen_RI, 1.0_dp, &
456                                   external_i_aL, dimen_RI, local_i_aL(:, :, iiB), dimen_RI, &
457                                   0.0_dp, local_ba(rec_B_virtual_start:rec_B_virtual_end, 1:my_B_size), rec_B_size)
458                        t_end = m_walltime()
459                        actual_flop_rate = 2.0_dp*rec_B_size*my_B_size*REAL(dimen_RI, KIND=dp)/(MAX(0.01_dp, t_end - t_start))
460                        my_flop_rate = my_flop_rate + actual_flop_rate
461                        my_num_dgemm_call = my_num_dgemm_call + 1
462
463                        DEALLOCATE (external_i_aL)
464                     ENDIF
465
466                  END DO
467                  CALL timestop(handle3)
468
469                  !sample peak memory
470                  CALL m_memory()
471
472                  CALL timeset(routineN//"_ener", handle3)
473                  ! calculate coulomb only MP2
474                  sym_fac = 2.0_dp
475                  IF (my_i == my_j) sym_fac = 1.0_dp
476                  IF (.NOT. my_alpha_beta_case) THEN
477                     DO b = 1, my_B_size
478                        b_global = b + my_B_virtual_start - 1
479                        DO a = 1, virtual
480                           Emp2_Cou = Emp2_Cou - sym_fac*2.0_dp*local_ab(a, b)**2/ &
481                              (Eigenval(homo + a) + Eigenval(homo + b_global) - Eigenval(my_i + iiB - 1) - Eigenval(my_j + jjB - 1))
482                        END DO
483                     END DO
484                  ELSE
485                     DO b = 1, my_B_size_beta
486                        b_global = b + my_B_virtual_start_beta - 1
487                        DO a = 1, virtual
488                           Emp2_Cou = Emp2_Cou - local_ab(a, b)**2/ &
489                                      (Eigenval(homo + a) + Eigenval_beta(homo_beta + b_global) - &
490                                       Eigenval(my_i + iiB - 1) - Eigenval_beta(my_j + jjB - 1))
491                        END DO
492                     END DO
493                  END IF
494
495                  IF (calc_ex) THEN
496                     ! contract integrals with orbital energies for exchange MP2 energy
497                     ! starting with local ...
498                     ! IF(my_open_shell_SS) sym_fac=sym_fac*2.0_dp
499                     IF (calc_forces .AND. (.NOT. my_alpha_beta_case)) t_ab = 0.0_dp
500                     DO b = 1, my_B_size
501                        b_global = b + my_B_virtual_start - 1
502                        DO a = 1, my_B_size
503                           a_global = a + my_B_virtual_start - 1
504                           Emp2_Ex = Emp2_Ex + sym_fac*local_ab(a_global, b)*local_ab(b_global, a)/ &
505                       (Eigenval(homo + a_global) + Eigenval(homo + b_global) - Eigenval(my_i + iiB - 1) - Eigenval(my_j + jjB - 1))
506                           IF (calc_forces .AND. (.NOT. my_alpha_beta_case)) &
507                              t_ab(a_global, b) = -(amp_fac*local_ab(a_global, b) - local_ab(b_global, a))/ &
508                                                  (Eigenval(homo + a_global) + Eigenval(homo + b_global) - &
509                                                   Eigenval(my_i + iiB - 1) - Eigenval(my_j + jjB - 1))
510                        END DO
511                     END DO
512                     ! ... and then with external data
513                     DO proc_shift = 1, para_env_sub%num_pe - 1
514                        proc_send = sub_proc_map(para_env_sub%mepos + proc_shift)
515                        proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift)
516
517                        CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
518
519                        CALL get_group_dist(gd_B_virtual, proc_send, send_B_virtual_start, send_B_virtual_end, send_B_size)
520
521                        ALLOCATE (external_ab(my_B_size, rec_B_size))
522                        external_ab = 0.0_dp
523
524                        CALL mp_sendrecv(local_ab(send_B_virtual_start:send_B_virtual_end, 1:my_B_size), proc_send, &
525                                         external_ab(1:my_B_size, 1:rec_B_size), proc_receive, &
526                                         para_env_sub%group)
527
528                        DO b = 1, my_B_size
529                           b_global = b + my_B_virtual_start - 1
530                           DO a = 1, rec_B_size
531                              a_global = a + rec_B_virtual_start - 1
532                              Emp2_Ex = Emp2_Ex + sym_fac*local_ab(a_global, b)*external_ab(b, a)/ &
533                       (Eigenval(homo + a_global) + Eigenval(homo + b_global) - Eigenval(my_i + iiB - 1) - Eigenval(my_j + jjB - 1))
534                              IF (calc_forces .AND. (.NOT. my_alpha_beta_case)) &
535                                 t_ab(a_global, b) = -(amp_fac*local_ab(a_global, b) - external_ab(b, a))/ &
536                                                     (Eigenval(homo + a_global) + Eigenval(homo + b_global) - &
537                                                      Eigenval(my_i + iiB - 1) - Eigenval(my_j + jjB - 1))
538                           END DO
539                        END DO
540
541                        DEALLOCATE (external_ab)
542                     END DO
543                  END IF
544                  CALL timestop(handle3)
545
546                  IF (calc_forces) THEN
547                     ! update P_ab, Gamma_P_ia
548                     IF (.NOT. my_alpha_beta_case) THEN
549                        Y_i_aP = 0.0_dp
550                        Y_j_aP = 0.0_dp
551                        CALL mp2_update_P_gamma(mp2_env, para_env_sub, gd_B_virtual, &
552                                                Eigenval, homo, dimen_RI, iiB, jjB, my_B_size, &
553                                                my_B_virtual_end, my_B_virtual_start, my_i, my_j, virtual, &
554                                                sub_proc_map, local_ab, t_ab, local_i_aL, local_j_aL, &
555                                                my_open_shell_ss, my_alpha_alpha_case, my_beta_beta_case, Y_i_aP, Y_j_aP)
556                     ELSE
557                        Y_i_aP = 0.0_dp
558                        Y_j_aP_beta = 0.0_dp
559                        CALL mp2_update_P_gamma(mp2_env, para_env_sub, gd_B_virtual, &
560                                                Eigenval, homo, dimen_RI, iiB, jjB, my_B_size, &
561                                                my_B_virtual_end, my_B_virtual_start, my_i, my_j, virtual, sub_proc_map, &
562                                                local_ab, t_ab, local_i_aL, local_j_aL, my_open_shell_ss, my_alpha_alpha_case, &
563                                                my_beta_beta_case, Y_i_aP, Y_j_aP_beta, Eigenval_beta, homo_beta, my_B_size_beta, &
564                                                gd_B_virtual_beta, &
565                                                my_B_virtual_start_beta, my_B_virtual_end_beta, virtual_beta, local_ba)
566                     ENDIF
567
568                  END IF
569
570               END DO ! jjB
571            END DO ! iiB
572
573         ELSE
574            ! No work to do and we know that we have to receive nothing, but send something
575            ! send data to other proc
576            DO proc_shift = 1, para_env_exchange%num_pe - 1
577               proc_send = proc_map(para_env_exchange%mepos + proc_shift)
578               proc_receive = proc_map(para_env_exchange%mepos - proc_shift)
579
580               send_ij_index = num_ij_pairs(proc_send)
581
582               IF (ij_index <= send_ij_index) THEN
583                  ! something to send
584                  ij_counter_send = (ij_index - MIN(1, integ_group_pos2color_sub(proc_send)))*ngroup + &
585                                    integ_group_pos2color_sub(proc_send)
586                  send_i = ij_map(ij_counter_send, 1)
587                  send_j = ij_map(ij_counter_send, 2)
588                  send_block_size = ij_map(ij_counter_send, 3)
589
590                  ! occupied i
591                  CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:my_B_size, send_i:send_i + send_block_size - 1), proc_send, &
592                                   null_mat_rec, proc_receive, &
593                                   para_env_exchange%group)
594                  ! occupied j
595                  IF (.NOT. my_alpha_beta_case) THEN
596                     CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:my_B_size, send_j:send_j + send_block_size - 1), proc_send, &
597                                      null_mat_rec, proc_receive, &
598                                      para_env_exchange%group)
599                  ELSE
600                 CALL mp_sendrecv(BIb_C_beta(1:my_group_L_size, 1:my_B_size_beta, send_j:send_j + send_block_size - 1), proc_send, &
601                                      null_mat_rec, proc_receive, &
602                                      para_env_exchange%group)
603                  END IF
604
605               ELSE
606                  ! nothing to send
607                  ! occupied i
608                  CALL mp_sendrecv(null_mat_send, proc_send, &
609                                   null_mat_rec, proc_receive, &
610                                   para_env_exchange%group)
611                  ! occupied j
612                  CALL mp_sendrecv(null_mat_send, proc_send, &
613                                   null_mat_rec, proc_receive, &
614                                   para_env_exchange%group)
615
616               END IF
617            END DO
618         END IF
619
620         ! redistribute gamma
621         IF (calc_forces) THEN
622            ! Closed shell, alpha-alpha or beta-beta case
623            IF ((.NOT. my_alpha_beta_case)) THEN
624               CALL mp2_redistribute_gamma(mp2_env, ij_index, my_B_size, &
625                                           my_block_size, my_group_L_size, my_i, my_ij_pairs, my_j, ngroup, &
626                                           num_integ_group, integ_group_pos2color_sub, num_ij_pairs, proc_map, &
627                                           ij_map, ranges_info_array, Y_i_aP, Y_j_aP, para_env_exchange, &
628                                           null_mat_rec, null_mat_send, gd_array%sizes, my_alpha_alpha_case, &
629                                           my_beta_beta_case, my_alpha_beta_case, my_open_shell_ss)
630            ELSE
631               ! Alpha-beta case
632               CALL mp2_redistribute_gamma(mp2_env, ij_index, my_B_size, &
633                                           my_block_size, my_group_L_size, my_i, my_ij_pairs, my_j, ngroup, &
634                                           num_integ_group, integ_group_pos2color_sub, num_ij_pairs, proc_map, &
635                                           ij_map, ranges_info_array, Y_i_aP, Y_j_aP_beta, para_env_exchange, &
636                                           null_mat_rec, null_mat_send, gd_array%sizes, my_alpha_alpha_case, my_beta_beta_case, &
637                                           my_alpha_beta_case, my_open_shell_ss, my_B_size_beta)
638            ENDIF
639         END IF
640
641      END DO
642      CALL timestop(handle2)
643
644      DEALLOCATE (local_i_aL)
645      DEALLOCATE (local_j_aL)
646      DEALLOCATE (ij_map)
647      DEALLOCATE (num_ij_pairs)
648
649      IF (calc_forces) THEN
650         DEALLOCATE (Y_i_aP)
651         IF (.NOT. my_alpha_beta_case) THEN
652            DEALLOCATE (Y_j_aP)
653         ELSE
654            DEALLOCATE (Y_j_aP_beta)
655         ENDIF
656         IF (ALLOCATED(t_ab)) THEN
657            DEALLOCATE (t_ab)
658         ENDIF
659         ! Deallocate additional integrals: alpha_beta case with forces
660         IF (ALLOCATED(local_ba)) THEN
661            DEALLOCATE (local_ba)
662         ENDIF
663
664         ! here we check if there are almost degenerate ij
665         ! pairs and we update P_ij with these contribution.
666         ! If all pairs are degenerate with each other this step will scale O(N^6),
667         ! if the number of degenerate pairs scales linearly with the system size
668         ! this step will scale O(N^5).
669         ! Start counting the number of almost degenerate ij pairs according
670         ! to eps_canonical
671         IF (.NOT. my_alpha_beta_case) THEN
672            CALL quasi_degenerate_P_ij( &
673               mp2_env, Eigenval, homo, virtual, my_open_shell_ss, &
674               my_beta_beta_case, my_alpha_beta_case, Bib_C, unit_nr, dimen_RI, &
675               my_B_size, ngroup, num_integ_group, my_group_L_size, &
676               color_sub, ranges_info_array, para_env_exchange, para_env_sub, proc_map, &
677               my_B_virtual_start, my_B_virtual_end, gd_array%sizes, gd_B_virtual, &
678               sub_proc_map, integ_group_pos2color_sub, local_ab)
679         ELSE
680            CALL quasi_degenerate_P_ij( &
681               mp2_env, Eigenval, homo, virtual, my_open_shell_ss, &
682               my_beta_beta_case, my_alpha_beta_case, Bib_C, unit_nr, dimen_RI, &
683               my_B_size, ngroup, num_integ_group, my_group_L_size, &
684               color_sub, ranges_info_array, para_env_exchange, para_env_sub, proc_map, &
685               my_B_virtual_start, my_B_virtual_end, gd_array%sizes, gd_B_virtual, &
686               sub_proc_map, integ_group_pos2color_sub, local_ab, BIb_C_beta, my_B_size_beta, &
687               gd_B_virtual_beta, my_B_virtual_start_beta, &
688               virtual_beta, homo_beta, Eigenval_beta, my_B_virtual_end_beta)
689         ENDIF
690
691      END IF
692
693      DEALLOCATE (integ_group_pos2color_sub)
694      DEALLOCATE (local_ab)
695
696      CALL mp_sum(Emp2_Cou, para_env%group)
697      CALL mp_sum(Emp2_Ex, para_env%group)
698
699      IF (calc_forces) THEN
700         ! sum P_ab
701         IF (.NOT. my_open_shell_ss) THEN
702            mp2_env%ri_grad%P_ab(:, :) = mp2_env%ri_grad%P_ab(:, :)*amp_fac
703            IF (my_alpha_beta_case) mp2_env%ri_grad%P_ab_beta(:, :) = &
704               mp2_env%ri_grad%P_ab_beta(:, :)*amp_fac
705            sub_P_color = para_env_sub%mepos
706            CALL mp_comm_split_direct(para_env%group, comm_P, sub_P_color)
707            NULLIFY (para_env_P)
708            CALL cp_para_env_create(para_env_P, comm_P)
709            CALL mp_sum(mp2_env%ri_grad%P_ab, para_env_P%group)
710            IF (my_alpha_beta_case) CALL mp_sum(mp2_env%ri_grad%P_ab_beta, para_env_P%group)
711            ! release para_env_P
712            CALL cp_para_env_release(para_env_P)
713         ENDIF
714
715         ! recover original information (before replication)
716         DEALLOCATE (gd_array%sizes)
717         iiB = SIZE(sizes_array_orig)
718         ALLOCATE (gd_array%sizes(0:iiB - 1))
719         gd_array%sizes(:) = sizes_array_orig
720         DEALLOCATE (sizes_array_orig)
721
722         ! make a copy of the original integrals (ia|Q)
723         my_group_L_size = my_group_L_size_orig
724         ALLOCATE (B_ia_Q(homo, my_B_size, my_group_L_size))
725         B_ia_Q = 0.0_dp
726         DO jjB = 1, homo
727            DO iiB = 1, my_B_size
728               B_ia_Q(jjB, iiB, 1:my_group_L_size) = BIb_C(1:my_group_L_size, iiB, jjB)
729            END DO
730         END DO
731         DEALLOCATE (BIb_C)
732         IF (my_alpha_beta_case) THEN
733            ALLOCATE (B_ia_Q_beta(homo_beta, my_B_size_beta, my_group_L_size))
734            B_ia_Q_beta = 0.0_dp
735            DO jjB = 1, homo_beta
736               DO iiB = 1, my_B_size_beta
737                  B_ia_Q_beta(jjB, iiB, 1:my_group_L_size) = &
738                     BIb_C_beta(1:my_group_L_size, iiB, jjB)
739               END DO
740            END DO
741            DEALLOCATE (BIb_C_beta)
742         ENDIF
743
744         ! sum Gamma and dereplicate
745         ALLOCATE (BIb_C(homo, my_B_size, my_group_L_size))
746         IF (my_alpha_beta_case) ALLOCATE (BIb_C_beta(homo_beta, my_B_size_beta, my_group_L_size))
747         DO proc_shift = 1, para_env_rep%num_pe - 1
748            ! invert order
749            proc_send = proc_map_rep(para_env_rep%mepos - proc_shift)
750            proc_receive = proc_map_rep(para_env_rep%mepos + proc_shift)
751
752            start_point = ranges_info_array(3, proc_shift, para_env_exchange%mepos)
753            end_point = ranges_info_array(4, proc_shift, para_env_exchange%mepos)
754
755            BIb_C = 0.0_dp
756            ! Closed shell, alpha-alpha, and alpha-alpha part in alpha-beta case
757            IF (my_alpha_alpha_case .OR. (.NOT. my_open_shell_ss)) THEN
758               CALL mp_sendrecv(mp2_env%ri_grad%Gamma_P_ia(1:homo, 1:my_B_size, start_point:end_point), &
759                                proc_send, BIb_C, proc_receive, para_env_rep%group)
760               mp2_env%ri_grad%Gamma_P_ia(1:homo, 1:my_B_size, 1:my_group_L_size) = &
761                  mp2_env%ri_grad%Gamma_P_ia(1:homo, 1:my_B_size, 1:my_group_L_size) + BIb_C
762            ENDIF
763            ! Beta-beta
764            IF (my_beta_beta_case) THEN
765               CALL mp_sendrecv(mp2_env%ri_grad%Gamma_P_ia_beta(1:homo, 1:my_B_size, start_point:end_point), &
766                                proc_send, BIb_C, proc_receive, para_env_rep%group)
767               mp2_env%ri_grad%Gamma_P_ia_beta(1:homo, 1:my_B_size, 1:my_group_L_size) = &
768                  mp2_env%ri_grad%Gamma_P_ia_beta(1:homo, 1:my_B_size, 1:my_group_L_size) + BIb_C
769            ENDIF
770            IF (my_alpha_beta_case) THEN ! Beta-beta part of alpha-beta case
771               CALL mp_sendrecv(mp2_env%ri_grad%Gamma_P_ia_beta(1:homo_beta, 1:my_B_size_beta, start_point:end_point), &
772                                proc_send, BIb_C_beta, proc_receive, para_env_rep%group)
773               mp2_env%ri_grad%Gamma_P_ia_beta(1:homo_beta, 1:my_B_size_beta, 1:my_group_L_size) = &
774                  mp2_env%ri_grad%Gamma_P_ia_beta(1:homo_beta, 1:my_B_size_beta, 1:my_group_L_size) + BIb_C_beta
775            ENDIF
776
777         END DO
778         IF (.NOT. my_open_shell_ss) THEN
779            BIb_C(:, :, :) = mp2_env%ri_grad%Gamma_P_ia(1:homo, 1:my_B_size, 1:my_group_L_size)
780            DEALLOCATE (mp2_env%ri_grad%Gamma_P_ia)
781            ALLOCATE (mp2_env%ri_grad%Gamma_P_ia(homo, my_B_size, my_group_L_size))
782            mp2_env%ri_grad%Gamma_P_ia(:, :, :) = BIb_C
783            DEALLOCATE (BIb_C)
784            IF (my_alpha_beta_case) THEN
785               BIb_C_beta(:, :, :) = mp2_env%ri_grad%Gamma_P_ia_beta(1:homo_beta, 1:my_B_size_beta, 1:my_group_L_size)
786               DEALLOCATE (mp2_env%ri_grad%Gamma_P_ia_beta)
787               ALLOCATE (mp2_env%ri_grad%Gamma_P_ia_beta(homo_beta, my_B_size_beta, my_group_L_size))
788               mp2_env%ri_grad%Gamma_P_ia_beta(:, :, :) = BIb_C_beta
789               DEALLOCATE (BIb_C_beta)
790            ENDIF
791         ENDIF
792         ! For open shell systems, we need to pass Bib_C through the subroutine in alpha-alpha and beta-beta case.
793         ! Only for forces, as IF suggests! Here we deallocate it, but restore after complete_gamma
794         IF (my_open_shell_ss) THEN
795            DEALLOCATE (BIb_C)
796         ENDIF
797
798         IF (.NOT. my_open_shell_ss) THEN
799            CALL complete_gamma(mp2_env, B_ia_Q, dimen_RI, homo, virtual, para_env, para_env_sub, ngroup, &
800                                my_group_L_size, my_group_L_start, my_group_L_end, &
801                                my_B_size, my_B_virtual_start, &
802                                gd_array, gd_B_virtual, &
803                                sub_proc_map, .TRUE.)
804            IF (my_alpha_beta_case) THEN
805               CALL complete_gamma(mp2_env, B_ia_Q_beta, dimen_RI, homo_beta, virtual_beta, para_env, para_env_sub, &
806                                   ngroup, my_group_L_size, my_group_L_start, my_group_L_end, &
807                                   my_B_size_beta, my_B_virtual_start_beta, &
808                                   gd_array, gd_B_virtual_beta, sub_proc_map, .FALSE.)
809            ENDIF
810         ENDIF
811         ! Here we restore BIb_C
812         IF (my_open_shell_ss) THEN
813            ALLOCATE (BIb_C(my_group_L_size, my_B_size, homo))
814            BIb_C = 0.0_dp
815            ! copy the integrals (ia|Q) back
816            DO jjB = 1, homo
817               DO iiB = 1, my_B_size
818                  BIb_C(1:my_group_L_size, iiB, jjB) = &
819                     B_ia_Q(jjB, iiB, 1:my_group_L_size)
820               END DO
821            END DO
822         ENDIF
823
824      END IF
825
826      Emp2 = Emp2_Cou + Emp2_EX
827
828      DEALLOCATE (proc_map)
829      DEALLOCATE (sub_proc_map)
830      DEALLOCATE (proc_map_rep)
831      DEALLOCATE (ranges_info_array)
832
833      IF (.NOT. my_open_shell_SS) THEN
834         ! keep the array for the next calculations
835         IF (ALLOCATED(BIb_C)) DEALLOCATE (BIb_C)
836         CALL release_group_dist(gd_array)
837         CALL release_group_dist(gd_B_virtual)
838         IF (my_alpha_beta_case) THEN
839            CALL release_group_dist(gd_B_virtual_beta)
840         END IF
841      END IF
842
843      CALL cp_para_env_release(para_env_exchange)
844      CALL cp_para_env_release(para_env_rep)
845
846      my_flop_rate = my_flop_rate/REAL(MAX(my_num_dgemm_call, 1), KIND=dp)/1.0E9_dp
847      CALL mp_sum(my_flop_rate, para_env%group)
848      my_flop_rate = my_flop_rate/para_env%num_pe
849      IF (unit_nr > 0) WRITE (UNIT=unit_nr, FMT="(T3,A,T66,F15.2)") &
850         "PERFORMANCE| DGEMM flop rate (Gflops / MPI rank):", my_flop_rate
851
852      CALL timestop(handle)
853
854   END SUBROUTINE mp2_ri_gpw_compute_en
855
856! **************************************************************************************************
857!> \brief ...
858!> \param BIb_C ...
859!> \param para_env ...
860!> \param para_env_sub ...
861!> \param para_env_exchange ...
862!> \param para_env_rep ...
863!> \param homo ...
864!> \param proc_map_rep ...
865!> \param sizes_array ...
866!> \param my_B_size ...
867!> \param my_group_L_size ...
868!> \param my_group_L_start ...
869!> \param my_group_L_end ...
870!> \param my_new_group_L_size ...
871!> \param new_sizes_array ...
872!> \param ranges_info_array ...
873! **************************************************************************************************
874   SUBROUTINE replicate_iaK_2intgroup(BIb_C, para_env, para_env_sub, para_env_exchange, para_env_rep, &
875                                      homo, proc_map_rep, &
876                                      sizes_array, &
877                                      my_B_size, &
878                                      my_group_L_size, my_group_L_start, my_group_L_end, &
879                                      my_new_group_L_size, new_sizes_array, ranges_info_array)
880      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
881         INTENT(INOUT)                                   :: BIb_C
882      TYPE(cp_para_env_type), POINTER                    :: para_env, para_env_sub, &
883                                                            para_env_exchange, para_env_rep
884      INTEGER, INTENT(IN)                                :: homo
885      INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT)    :: proc_map_rep
886      INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(IN)     :: sizes_array
887      INTEGER, INTENT(IN)                                :: my_B_size, my_group_L_size, &
888                                                            my_group_L_start, my_group_L_end
889      INTEGER, INTENT(INOUT)                             :: my_new_group_L_size
890      INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT)    :: new_sizes_array
891      INTEGER, ALLOCATABLE, DIMENSION(:, :, :), &
892         INTENT(OUT)                                     :: ranges_info_array
893
894      CHARACTER(LEN=*), PARAMETER :: routineN = 'replicate_iaK_2intgroup', &
895         routineP = moduleN//':'//routineN
896
897      INTEGER                                            :: comm_rep, end_point, handle, i, &
898                                                            max_L_size, proc_receive, proc_send, &
899                                                            proc_shift, start_point, sub_sub_color
900      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: rep_ends_array, rep_sizes_array, &
901                                                            rep_starts_array
902      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: BIb_C_copy
903      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :)  :: BIb_C_gather
904
905      CALL timeset(routineN, handle)
906
907      ! create the replication group
908      sub_sub_color = para_env_sub%mepos*para_env_exchange%num_pe + para_env_exchange%mepos
909      CALL mp_comm_split_direct(para_env%group, comm_rep, sub_sub_color)
910      NULLIFY (para_env_rep)
911      CALL cp_para_env_create(para_env_rep, comm_rep)
912
913      ! crate the proc maps
914      ALLOCATE (proc_map_rep(-para_env_rep%num_pe:2*para_env_rep%num_pe - 1))
915      DO i = 0, para_env_rep%num_pe - 1
916         proc_map_rep(i) = i
917         proc_map_rep(-i - 1) = para_env_rep%num_pe - i - 1
918         proc_map_rep(para_env_rep%num_pe + i) = i
919      END DO
920
921      ! create the new limits for K according to the size
922      ! of the integral group
923      ALLOCATE (new_sizes_array(0:para_env_exchange%num_pe - 1))
924      new_sizes_array = 0
925      ALLOCATE (ranges_info_array(4, 0:para_env_rep%num_pe - 1, 0:para_env_exchange%num_pe - 1))
926      ranges_info_array = 0
927
928      ! info array for replication
929      ALLOCATE (rep_ends_array(0:para_env_rep%num_pe - 1))
930      rep_ends_array = 0
931      ALLOCATE (rep_starts_array(0:para_env_rep%num_pe - 1))
932      rep_starts_array = 0
933      ALLOCATE (rep_sizes_array(0:para_env_rep%num_pe - 1))
934      rep_sizes_array = 0
935
936      rep_sizes_array(para_env_rep%mepos) = my_group_L_size
937      rep_starts_array(para_env_rep%mepos) = my_group_L_start
938      rep_ends_array(para_env_rep%mepos) = my_group_L_end
939
940      CALL mp_sum(rep_sizes_array, para_env_rep%group)
941      CALL mp_sum(rep_starts_array, para_env_rep%group)
942      CALL mp_sum(rep_ends_array, para_env_rep%group)
943
944      ! calculate my_new_group_L_size according to sizes_array
945      my_new_group_L_size = my_group_L_size
946      ranges_info_array(1, 0, para_env_exchange%mepos) = my_group_L_start
947      ranges_info_array(2, 0, para_env_exchange%mepos) = my_group_L_end
948      ranges_info_array(3, 0, para_env_exchange%mepos) = 1
949      ranges_info_array(4, 0, para_env_exchange%mepos) = my_group_L_size
950
951      DO proc_shift = 1, para_env_rep%num_pe - 1
952         proc_send = proc_map_rep(para_env_rep%mepos + proc_shift)
953         proc_receive = proc_map_rep(para_env_rep%mepos - proc_shift)
954
955         my_new_group_L_size = my_new_group_L_size + rep_sizes_array(proc_receive)
956
957         ranges_info_array(1, proc_shift, para_env_exchange%mepos) = rep_starts_array(proc_receive)
958         ranges_info_array(2, proc_shift, para_env_exchange%mepos) = rep_ends_array(proc_receive)
959       ranges_info_array(3, proc_shift, para_env_exchange%mepos) = ranges_info_array(4, proc_shift - 1, para_env_exchange%mepos) + 1
960         ranges_info_array(4, proc_shift, para_env_exchange%mepos) = my_new_group_L_size
961
962      END DO
963      new_sizes_array(para_env_exchange%mepos) = my_new_group_L_size
964
965      CALL mp_sum(new_sizes_array, para_env_exchange%group)
966      CALL mp_sum(ranges_info_array, para_env_exchange%group)
967
968      ! replication scheme using mp_allgather
969      ! get the max L size of the
970      max_L_size = MAXVAL(sizes_array)
971
972      ALLOCATE (BIb_C_copy(max_L_size, my_B_size, homo))
973      BIb_C_copy = 0.0_dp
974      BIb_C_copy(1:my_group_L_size, 1:my_B_size, 1:homo) = BIb_C
975
976      DEALLOCATE (BIb_C)
977
978      ALLOCATE (BIb_C_gather(max_L_size, my_B_size, homo, 0:para_env_rep%num_pe - 1))
979      BIb_C_gather = 0.0_dp
980
981      CALL mp_allgather(BIb_C_copy, BIb_C_gather, para_env_rep%group)
982
983      DEALLOCATE (BIb_C_copy)
984
985      ALLOCATE (BIb_C(my_new_group_L_size, my_B_size, homo))
986      BIb_C = 0.0_dp
987
988      ! reorder data
989      DO proc_shift = 0, para_env_rep%num_pe - 1
990         proc_send = proc_map_rep(para_env_rep%mepos + proc_shift)
991         proc_receive = proc_map_rep(para_env_rep%mepos - proc_shift)
992
993         start_point = ranges_info_array(3, proc_shift, para_env_exchange%mepos)
994         end_point = ranges_info_array(4, proc_shift, para_env_exchange%mepos)
995
996         BIb_C(start_point:end_point, 1:my_B_size, 1:homo) = &
997            BIb_C_gather(1:end_point - start_point + 1, 1:my_B_size, 1:homo, proc_receive)
998
999      END DO
1000
1001      DEALLOCATE (BIb_C_gather)
1002      DEALLOCATE (rep_sizes_array)
1003      DEALLOCATE (rep_starts_array)
1004      DEALLOCATE (rep_ends_array)
1005
1006      CALL timestop(handle)
1007
1008   END SUBROUTINE replicate_iaK_2intgroup
1009
1010! **************************************************************************************************
1011!> \brief ...
1012!> \param local_ab ...
1013!> \param t_ab ...
1014!> \param mp2_env ...
1015!> \param homo ...
1016!> \param virtual ...
1017!> \param dimen_RI ...
1018!> \param my_B_size ...
1019!> \param block_size ...
1020!> \param my_B_size_beta ...
1021!> \param my_group_L_size ...
1022!> \param local_i_aL ...
1023!> \param local_j_aL ...
1024!> \param calc_forces ...
1025!> \param Y_i_aP ...
1026!> \param Y_j_aP ...
1027!> \param alpha_beta ...
1028!> \param beta_beta ...
1029!> \param local_ba ...
1030!> \param virtual_beta ...
1031! **************************************************************************************************
1032   SUBROUTINE mp2_ri_allocate(local_ab, t_ab, mp2_env, homo, virtual, dimen_RI, my_B_size, &
1033                              block_size, my_B_size_beta, my_group_L_size, &
1034                              local_i_aL, local_j_aL, calc_forces, &
1035                              Y_i_aP, Y_j_aP, alpha_beta, &
1036                              beta_beta, local_ba, virtual_beta)
1037      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
1038         INTENT(OUT)                                     :: local_ab, t_ab
1039      TYPE(mp2_type), POINTER                            :: mp2_env
1040      INTEGER, INTENT(IN)                                :: homo, virtual, dimen_RI, my_B_size, &
1041                                                            block_size, my_B_size_beta, &
1042                                                            my_group_L_size
1043      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
1044         INTENT(OUT)                                     :: local_i_aL, local_j_aL
1045      LOGICAL, INTENT(IN)                                :: calc_forces
1046      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
1047         INTENT(OUT)                                     :: Y_i_aP, Y_j_aP
1048      LOGICAL, INTENT(IN)                                :: alpha_beta, beta_beta
1049      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
1050         INTENT(OUT), OPTIONAL                           :: local_ba
1051      INTEGER, INTENT(IN), OPTIONAL                      :: virtual_beta
1052
1053      CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_ri_allocate', &
1054         routineP = moduleN//':'//routineN
1055
1056      INTEGER                                            :: handle
1057
1058      CALL timeset(routineN, handle)
1059
1060      ALLOCATE (local_i_aL(dimen_RI, my_B_size, block_size))
1061      ALLOCATE (local_j_aL(dimen_RI, my_B_size_beta, block_size))
1062      ALLOCATE (local_ab(virtual, my_B_size_beta))
1063
1064      IF (calc_forces) THEN
1065         ALLOCATE (Y_i_aP(my_B_size, dimen_RI, block_size))
1066         Y_i_aP = 0.0_dp
1067         ! For  closed-shell, alpha-alpha and beta-beta my_B_size_beta=my_b_size
1068         ! Not for alpha-beta case: Y_j_aP_beta is sent and received as Y_j_aP
1069         ALLOCATE (Y_j_aP(my_B_size_beta, dimen_RI, block_size))
1070         Y_j_aP = 0.0_dp
1071         ! Closed shell or alpha-alpha case
1072         IF (.NOT. (beta_beta .OR. alpha_beta)) THEN
1073            ALLOCATE (mp2_env%ri_grad%P_ij(homo, homo))
1074            ALLOCATE (mp2_env%ri_grad%P_ab(my_B_size, virtual))
1075            mp2_env%ri_grad%P_ij = 0.0_dp
1076            mp2_env%ri_grad%P_ab = 0.0_dp
1077            ALLOCATE (mp2_env%ri_grad%Gamma_P_ia(homo, my_B_size, my_group_L_size))
1078            mp2_env%ri_grad%Gamma_P_ia = 0.0_dp
1079         ELSE
1080            IF (beta_beta) THEN
1081               ALLOCATE (mp2_env%ri_grad%P_ij_beta(homo, homo))
1082               ALLOCATE (mp2_env%ri_grad%P_ab_beta(my_B_size, virtual))
1083               mp2_env%ri_grad%P_ij_beta = 0.0_dp
1084               mp2_env%ri_grad%P_ab_beta = 0.0_dp
1085               ALLOCATE (mp2_env%ri_grad%Gamma_P_ia_beta(homo, my_B_size_beta, my_group_L_size))
1086               mp2_env%ri_grad%Gamma_P_ia_beta = 0.0_dp
1087            ENDIF
1088         ENDIF
1089         IF (.NOT. alpha_beta) THEN
1090            ! For non-alpha-beta case we need amplitudes
1091            ALLOCATE (t_ab(virtual, my_B_size_beta))
1092         ELSE
1093            ! We need more integrals
1094            ALLOCATE (local_ba(virtual_beta, my_B_size))
1095         ENDIF
1096      END IF
1097      !
1098
1099      CALL timestop(handle)
1100
1101   END SUBROUTINE mp2_ri_allocate
1102
1103! **************************************************************************************************
1104!> \brief ...
1105!> \param my_alpha_beta_case ...
1106!> \param total_ij_pairs ...
1107!> \param homo ...
1108!> \param homo_beta ...
1109!> \param num_IJ_blocks ...
1110!> \param block_size ...
1111!> \param ngroup ...
1112!> \param ij_map ...
1113!> \param color_sub ...
1114!> \param my_ij_pairs ...
1115!> \param my_open_shell_SS ...
1116!> \param unit_nr ...
1117! **************************************************************************************************
1118   SUBROUTINE mp2_ri_communication(my_alpha_beta_case, total_ij_pairs, homo, homo_beta, num_IJ_blocks, &
1119                                   block_size, ngroup, ij_map, color_sub, my_ij_pairs, my_open_shell_SS, unit_nr)
1120      LOGICAL, INTENT(IN)                                :: my_alpha_beta_case
1121      INTEGER, INTENT(OUT)                               :: total_ij_pairs
1122      INTEGER, INTENT(IN)                                :: homo, homo_beta
1123      INTEGER, INTENT(OUT)                               :: num_IJ_blocks
1124      INTEGER, INTENT(IN)                                :: block_size, ngroup
1125      INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(OUT) :: ij_map
1126      INTEGER, INTENT(IN)                                :: color_sub
1127      INTEGER, INTENT(OUT)                               :: my_ij_pairs
1128      LOGICAL, INTENT(IN)                                :: my_open_shell_SS
1129      INTEGER, INTENT(IN)                                :: unit_nr
1130
1131      CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_ri_communication', &
1132         routineP = moduleN//':'//routineN
1133
1134      INTEGER :: assigned_blocks, first_I_block, first_J_block, handle, iiB, ij_block_counter, &
1135         ij_counter, jjB, last_i_block, last_J_block, num_block_per_group, total_ij_block, &
1136         total_ij_pairs_blocks
1137      INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: ij_marker
1138
1139! Calculate the maximum number of ij pairs that have to be computed
1140! among groups
1141
1142      CALL timeset(routineN, handle)
1143
1144      IF (.NOT. my_alpha_beta_case) THEN
1145         total_ij_pairs = homo*(1 + homo)/2
1146         num_IJ_blocks = homo/block_size - 1
1147
1148         first_I_block = 1
1149         last_i_block = block_size*(num_IJ_blocks - 1)
1150
1151         first_J_block = block_size + 1
1152         last_J_block = block_size*(num_IJ_blocks + 1)
1153
1154         ij_block_counter = 0
1155         DO iiB = first_I_block, last_i_block, block_size
1156            DO jjB = iiB + block_size, last_J_block, block_size
1157               ij_block_counter = ij_block_counter + 1
1158            END DO
1159         END DO
1160
1161         total_ij_block = ij_block_counter
1162         num_block_per_group = total_ij_block/ngroup
1163         assigned_blocks = num_block_per_group*ngroup
1164
1165         total_ij_pairs_blocks = assigned_blocks + (total_ij_pairs - assigned_blocks*(block_size**2))
1166
1167         ALLOCATE (ij_marker(homo, homo))
1168         ij_marker = 0
1169         ALLOCATE (ij_map(total_ij_pairs_blocks, 3))
1170         ij_map = 0
1171         ij_counter = 0
1172         my_ij_pairs = 0
1173         DO iiB = first_I_block, last_i_block, block_size
1174            DO jjB = iiB + block_size, last_J_block, block_size
1175               IF (ij_counter + 1 > assigned_blocks) EXIT
1176               ij_counter = ij_counter + 1
1177               ij_marker(iiB:iiB + block_size - 1, jjB:jjB + block_size - 1) = 1
1178               ij_map(ij_counter, 1) = iiB
1179               ij_map(ij_counter, 2) = jjB
1180               ij_map(ij_counter, 3) = block_size
1181               IF (MOD(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1182            END DO
1183         END DO
1184         DO iiB = 1, homo
1185            DO jjB = iiB, homo
1186               IF (ij_marker(iiB, jjB) == 0) THEN
1187                  ij_counter = ij_counter + 1
1188                  ij_map(ij_counter, 1) = iiB
1189                  ij_map(ij_counter, 2) = jjB
1190                  ij_map(ij_counter, 3) = 1
1191                  IF (MOD(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1192               END IF
1193            END DO
1194         END DO
1195         DEALLOCATE (ij_marker)
1196
1197         IF ((.NOT. my_open_shell_SS)) THEN
1198            IF (unit_nr > 0) THEN
1199               IF (block_size == 1) THEN
1200                  WRITE (UNIT=unit_nr, FMT="(T3,A,T66,F15.1)") &
1201                     "RI_INFO| Percentage of ij pairs communicated with block size 1:", 100.0_dp
1202               ELSE
1203                  WRITE (UNIT=unit_nr, FMT="(T3,A,T66,F15.1)") &
1204                     "RI_INFO| Percentage of ij pairs communicated with block size 1:", &
1205                     100.0_dp*REAL((total_ij_pairs - assigned_blocks*(block_size**2)), KIND=dp)/REAL(total_ij_pairs, KIND=dp)
1206               END IF
1207               CALL m_flush(unit_nr)
1208            END IF
1209         END IF
1210
1211      ELSE
1212         ! alpha-beta case no index symmetry
1213         total_ij_pairs = homo*homo_beta
1214         ALLOCATE (ij_map(total_ij_pairs, 3))
1215         ij_map = 0
1216         ij_counter = 0
1217         my_ij_pairs = 0
1218         DO iiB = 1, homo
1219            DO jjB = 1, homo_beta
1220               ij_counter = ij_counter + 1
1221               ij_map(ij_counter, 1) = iiB
1222               ij_map(ij_counter, 2) = jjB
1223               ij_map(ij_counter, 3) = 1
1224               IF (MOD(ij_counter, ngroup) == color_sub) my_ij_pairs = my_ij_pairs + 1
1225            END DO
1226         END DO
1227      END IF
1228
1229      CALL timestop(handle)
1230
1231   END SUBROUTINE mp2_ri_communication
1232
1233! **************************************************************************************************
1234!> \brief ...
1235!> \param BIb_C ...
1236!> \param para_env ...
1237!> \param para_env_sub ...
1238!> \param homo ...
1239!> \param color_sub ...
1240!> \param sizes_array ...
1241!> \param calc_forces ...
1242!> \param comm_exchange ...
1243!> \param integ_group_size ...
1244!> \param my_B_size ...
1245!> \param iiB ...
1246!> \param my_group_L_end ...
1247!> \param my_group_L_size ...
1248!> \param my_group_L_size_orig ...
1249!> \param my_group_L_start ...
1250!> \param my_new_group_L_size ...
1251!> \param sub_sub_color ...
1252!> \param integ_group_pos2color_sub ...
1253!> \param proc_map ...
1254!> \param proc_map_rep ...
1255!> \param sizes_array_orig ...
1256!> \param sub_proc_map ...
1257!> \param ranges_info_array ...
1258!> \param para_env_exchange ...
1259!> \param para_env_rep ...
1260!> \param num_integ_group ...
1261! **************************************************************************************************
1262   SUBROUTINE mp2_ri_create_group(BIb_C, para_env, para_env_sub, homo, color_sub, &
1263                                  sizes_array, calc_forces, &
1264                                  comm_exchange, integ_group_size, my_B_size, iiB, my_group_L_end, &
1265                                  my_group_L_size, my_group_L_size_orig, my_group_L_start, my_new_group_L_size, &
1266                                  sub_sub_color, integ_group_pos2color_sub, &
1267                                  proc_map, proc_map_rep, sizes_array_orig, &
1268                                  sub_proc_map, ranges_info_array, para_env_exchange, para_env_rep, num_integ_group)
1269      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
1270         INTENT(INOUT)                                   :: BIb_C
1271      TYPE(cp_para_env_type), POINTER                    :: para_env, para_env_sub
1272      INTEGER, INTENT(IN)                                :: homo, color_sub
1273      INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(INOUT)  :: sizes_array
1274      LOGICAL, INTENT(IN)                                :: calc_forces
1275      INTEGER, INTENT(OUT)                               :: comm_exchange
1276      INTEGER, INTENT(IN)                                :: integ_group_size, my_B_size
1277      INTEGER, INTENT(OUT)                               :: iiB
1278      INTEGER, INTENT(IN)                                :: my_group_L_end
1279      INTEGER, INTENT(INOUT)                             :: my_group_L_size
1280      INTEGER, INTENT(OUT)                               :: my_group_L_size_orig
1281      INTEGER, INTENT(IN)                                :: my_group_L_start
1282      INTEGER, INTENT(INOUT)                             :: my_new_group_L_size
1283      INTEGER, INTENT(OUT)                               :: sub_sub_color
1284      INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT)    :: integ_group_pos2color_sub, proc_map, &
1285                                                            proc_map_rep, sizes_array_orig, &
1286                                                            sub_proc_map
1287      INTEGER, ALLOCATABLE, DIMENSION(:, :, :), &
1288         INTENT(OUT)                                     :: ranges_info_array
1289      TYPE(cp_para_env_type), POINTER                    :: para_env_exchange, para_env_rep
1290      INTEGER, INTENT(IN)                                :: num_integ_group
1291
1292      CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_ri_create_group', &
1293         routineP = moduleN//':'//routineN
1294
1295      INTEGER                                            :: handle, i
1296      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: new_sizes_array
1297
1298      CALL timeset(routineN, handle)
1299      !
1300      sub_sub_color = para_env_sub%mepos*num_integ_group + color_sub/integ_group_size
1301      CALL mp_comm_split_direct(para_env%group, comm_exchange, sub_sub_color)
1302      NULLIFY (para_env_exchange)
1303      CALL cp_para_env_create(para_env_exchange, comm_exchange)
1304
1305      ! create the proc maps
1306      ALLOCATE (proc_map(-para_env_exchange%num_pe:2*para_env_exchange%num_pe - 1))
1307      DO i = 0, para_env_exchange%num_pe - 1
1308         proc_map(i) = i
1309         proc_map(-i - 1) = para_env_exchange%num_pe - i - 1
1310         proc_map(para_env_exchange%num_pe + i) = i
1311      END DO
1312
1313      ALLOCATE (sub_proc_map(-para_env_sub%num_pe:2*para_env_sub%num_pe - 1))
1314      DO i = 0, para_env_sub%num_pe - 1
1315         sub_proc_map(i) = i
1316         sub_proc_map(-i - 1) = para_env_sub%num_pe - i - 1
1317         sub_proc_map(para_env_sub%num_pe + i) = i
1318      END DO
1319
1320      CALL replicate_iaK_2intgroup(BIb_C, para_env, para_env_sub, para_env_exchange, para_env_rep, &
1321                                   homo, proc_map_rep, &
1322                                   sizes_array, &
1323                                   my_B_size, &
1324                                   my_group_L_size, my_group_L_start, my_group_L_end, &
1325                                   my_new_group_L_size, new_sizes_array, ranges_info_array)
1326
1327      ALLOCATE (integ_group_pos2color_sub(0:para_env_exchange%num_pe - 1))
1328      integ_group_pos2color_sub = 0
1329      integ_group_pos2color_sub(para_env_exchange%mepos) = color_sub
1330      CALL mp_sum(integ_group_pos2color_sub, para_env_exchange%group)
1331
1332      IF (calc_forces) THEN
1333         iiB = SIZE(sizes_array)
1334         ALLOCATE (sizes_array_orig(0:iiB - 1))
1335         sizes_array_orig(:) = sizes_array
1336      END IF
1337
1338      my_group_L_size_orig = my_group_L_size
1339      my_group_L_size = my_new_group_L_size
1340      DEALLOCATE (sizes_array)
1341
1342      ALLOCATE (sizes_array(0:integ_group_size - 1))
1343      sizes_array(:) = new_sizes_array
1344
1345      DEALLOCATE (new_sizes_array)
1346      !
1347      CALL timestop(handle)
1348
1349   END SUBROUTINE mp2_ri_create_group
1350
1351! **************************************************************************************************
1352!> \brief ...
1353!> \param mp2_env ...
1354!> \param para_env ...
1355!> \param para_env_sub ...
1356!> \param gd_array ...
1357!> \param gd_B_virtual ...
1358!> \param homo ...
1359!> \param dimen_RI ...
1360!> \param unit_nr ...
1361!> \param color_sub ...
1362!> \param best_block_size ...
1363!> \param best_integ_group_size ...
1364!> \param block_size ...
1365!> \param integ_group_size ...
1366!> \param min_integ_group_size ...
1367!> \param my_B_size ...
1368!> \param my_B_virtual_end ...
1369!> \param my_B_virtual_start ...
1370!> \param my_group_L_size ...
1371!> \param my_group_L_start ...
1372!> \param my_group_L_end ...
1373!> \param ngroup ...
1374!> \param num_IJ_blocks ...
1375!> \param num_integ_group ...
1376!> \param pos_integ_group ...
1377!> \param virtual ...
1378!> \param my_alpha_beta_case ...
1379!> \param my_open_shell_SS ...
1380!> \param mem_for_aK ...
1381!> \param mem_for_comm ...
1382!> \param mem_for_iaK ...
1383!> \param mem_for_rep ...
1384!> \param mem_min ...
1385!> \param mem_per_group ...
1386!> \param mem_real ...
1387! **************************************************************************************************
1388   SUBROUTINE mp2_ri_get_sizes(mp2_env, para_env, para_env_sub, gd_array, gd_B_virtual, &
1389                               homo, dimen_RI, unit_nr, color_sub, &
1390                               best_block_size, best_integ_group_size, block_size, &
1391                               integ_group_size, min_integ_group_size, my_B_size, &
1392                               my_B_virtual_end, my_B_virtual_start, my_group_L_size, &
1393                               my_group_L_start, my_group_L_end, ngroup, num_IJ_blocks, num_integ_group, &
1394                               pos_integ_group, virtual, my_alpha_beta_case, &
1395                               my_open_shell_SS, mem_for_aK, mem_for_comm, &
1396                               mem_for_iaK, mem_for_rep, mem_min, mem_per_group, mem_real)
1397      TYPE(mp2_type), POINTER                            :: mp2_env
1398      TYPE(cp_para_env_type), POINTER                    :: para_env, para_env_sub
1399      TYPE(group_dist_d1_type), INTENT(IN)               :: gd_array, gd_B_virtual
1400      INTEGER, INTENT(IN)                                :: homo, dimen_RI, unit_nr, color_sub
1401      INTEGER, INTENT(OUT) :: best_block_size, best_integ_group_size, block_size, &
1402         integ_group_size, min_integ_group_size, my_B_size, my_B_virtual_end, my_B_virtual_start, &
1403         my_group_L_size, my_group_L_start, my_group_L_end, ngroup, num_IJ_blocks, &
1404         num_integ_group, pos_integ_group
1405      INTEGER, INTENT(IN)                                :: virtual
1406      LOGICAL, INTENT(IN)                                :: my_alpha_beta_case, my_open_shell_SS
1407      REAL(KIND=dp), INTENT(OUT)                         :: mem_for_aK, mem_for_comm, mem_for_iaK, &
1408                                                            mem_for_rep, mem_min, mem_per_group, &
1409                                                            mem_real
1410
1411      CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_ri_get_sizes', &
1412         routineP = moduleN//':'//routineN
1413
1414      INTEGER                                            :: handle, iiB
1415      INTEGER(KIND=int_8)                                :: mem
1416
1417      CALL timeset(routineN, handle)
1418
1419      ngroup = para_env%num_pe/para_env_sub%num_pe
1420
1421      ! Calculate available memory and create integral group according to that
1422      ! mem_for_iaK is the memory needed for storing the 3 centre integrals
1423      mem_for_iaK = REAL(homo, KIND=dp)*virtual*dimen_RI*8.0_dp/(1024_dp**2)
1424      mem_for_aK = REAL(virtual, KIND=dp)*dimen_RI*8.0_dp/(1024_dp**2)
1425
1426      CALL m_memory(mem)
1427      mem_real = (mem + 1024*1024 - 1)/(1024*1024)
1428      ! mp_min .... a hack.. it should be mp_max, but as it turns out, on some processes the previously freed memory (hfx)
1429      ! has not been given back to the OS yet.
1430      CALL mp_min(mem_real, para_env%group)
1431
1432      mem_min = 2.0_dp*REAL(homo, KIND=dp)*maxsize(gd_B_virtual)*maxsize(gd_array)*8.0_dp/(1024**2)
1433      mem_min = mem_min + 3.0_dp*maxsize(gd_B_virtual)*REAL(dimen_RI, KIND=dp)*8.0_dp/(1024**2)
1434
1435      IF ((.NOT. my_open_shell_SS) .AND. (.NOT. my_alpha_beta_case)) THEN
1436         IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T68,F9.2,A4)') 'RI_INFO| Minimum required memory per MPI process:', &
1437            mem_min, ' MiB'
1438      END IF
1439
1440      mem_real = mp2_env%mp2_memory
1441
1442      mem_per_group = mem_real*para_env_sub%num_pe
1443
1444      ! here we try to find the best block_size and integ_group_size
1445      best_integ_group_size = ngroup
1446      best_block_size = 1
1447
1448      ! in the open shell case no replication and no block communication is done
1449      IF ((.NOT. my_open_shell_SS) .AND. (.NOT. my_alpha_beta_case)) THEN
1450         ! Here we split the memory half for the communication, half for replication
1451         IF (mp2_env%ri_mp2%block_size > 0) THEN
1452            best_block_size = mp2_env%ri_mp2%block_size
1453            mem_for_rep = MAX(mem_min, mem_per_group - 2.0_dp*mem_for_aK*best_block_size)
1454         ELSE
1455            mem_for_rep = mem_per_group/2.0_dp
1456         END IF
1457         ! calculate the minimum replication group size according to the available memory
1458         min_integ_group_size = CEILING(2.0_dp*mem_for_iaK/mem_for_rep)
1459
1460         integ_group_size = MIN(min_integ_group_size, ngroup) - 1
1461         DO iiB = min_integ_group_size + 1, ngroup
1462            integ_group_size = integ_group_size + 1
1463            ! check that the ngroup is a multiple of  integ_group_size
1464            IF (MOD(ngroup, integ_group_size) /= 0) CYCLE
1465            ! check that the integ group size is not too small (10% is empirical for now)
1466            IF (REAL(integ_group_size, KIND=dp)/REAL(ngroup, KIND=dp) < 0.1_dp) CYCLE
1467
1468            best_integ_group_size = integ_group_size
1469            EXIT
1470         END DO
1471
1472         IF (.NOT. (mp2_env%ri_mp2%block_size > 0)) THEN
1473            mem_for_comm = mem_per_group - 2.0_dp*mem_for_iaK/best_integ_group_size
1474            DO
1475               num_IJ_blocks = (homo/best_block_size)
1476               num_IJ_blocks = (num_IJ_blocks*num_IJ_blocks - num_IJ_blocks)/2
1477               IF (num_IJ_blocks > ngroup .OR. best_block_size == 1) THEN
1478                  EXIT
1479               ELSE
1480                  best_block_size = best_block_size - 1
1481               END IF
1482            END DO
1483         END IF
1484
1485         ! check that best_block_size is not bigger than homo/2-1
1486         best_block_size = MIN(MAX(homo/2 - 1 + MOD(homo, 2), 1), best_block_size)
1487      END IF
1488
1489      integ_group_size = best_integ_group_size
1490      block_size = best_block_size
1491
1492      IF ((.NOT. my_open_shell_SS) .AND. (.NOT. my_alpha_beta_case)) THEN
1493         IF (unit_nr > 0) THEN
1494            WRITE (UNIT=unit_nr, FMT="(T3,A,T75,i6)") &
1495               "RI_INFO| Group size for integral replication:", integ_group_size*para_env_sub%num_pe
1496            WRITE (UNIT=unit_nr, FMT="(T3,A,T75,i6)") &
1497               "RI_INFO| Block size:", block_size
1498            CALL m_flush(unit_nr)
1499         END IF
1500      END IF
1501
1502      num_integ_group = ngroup/integ_group_size
1503
1504      pos_integ_group = MOD(color_sub, integ_group_size)
1505
1506      CALL get_group_dist(gd_array, color_sub, my_group_L_start, my_group_L_end, my_group_L_size)
1507
1508      CALL get_group_dist(gd_B_virtual, para_env_sub%mepos, my_B_virtual_start, my_B_virtual_end, my_B_size)
1509
1510      CALL timestop(handle)
1511
1512   END SUBROUTINE mp2_ri_get_sizes
1513
1514! **************************************************************************************************
1515!> \brief ...
1516!> \param mp2_env ...
1517!> \param para_env_sub ...
1518!> \param gd_B_virtual ...
1519!> \param Eigenval ...
1520!> \param homo ...
1521!> \param dimen_RI ...
1522!> \param iiB ...
1523!> \param jjB ...
1524!> \param my_B_size ...
1525!> \param my_B_virtual_end ...
1526!> \param my_B_virtual_start ...
1527!> \param my_i ...
1528!> \param my_j ...
1529!> \param virtual ...
1530!> \param sub_proc_map ...
1531!> \param local_ab ...
1532!> \param t_ab ...
1533!> \param local_i_aL ...
1534!> \param local_j_aL ...
1535!> \param open_ss ...
1536!> \param alpha_alpha ...
1537!> \param beta_beta ...
1538!> \param Y_i_aP ...
1539!> \param Y_j_aP ...
1540!> \param eigenval_beta ...
1541!> \param homo_beta ...
1542!> \param my_B_size_beta ...
1543!> \param gd_B_virtual_beta ...
1544!> \param my_B_virtual_start_beta ...
1545!> \param my_B_virtual_end_beta ...
1546!> \param virtual_beta ...
1547!> \param local_ba ...
1548! **************************************************************************************************
1549   SUBROUTINE mp2_update_P_gamma(mp2_env, para_env_sub, gd_B_virtual, &
1550                                 Eigenval, homo, dimen_RI, iiB, jjB, my_B_size, &
1551                                 my_B_virtual_end, my_B_virtual_start, my_i, my_j, virtual, sub_proc_map, local_ab, &
1552                                 t_ab, local_i_aL, local_j_aL, open_ss, alpha_alpha, beta_beta, Y_i_aP, Y_j_aP, &
1553                                 eigenval_beta, homo_beta, my_B_size_beta, gd_B_virtual_beta, &
1554                                 my_B_virtual_start_beta, my_B_virtual_end_beta, virtual_beta, local_ba)
1555      TYPE(mp2_type), POINTER                            :: mp2_env
1556      TYPE(cp_para_env_type), POINTER                    :: para_env_sub
1557      TYPE(group_dist_d1_type), INTENT(IN)               :: gd_B_virtual
1558      REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: Eigenval
1559      INTEGER, INTENT(IN)                                :: homo, dimen_RI, iiB, jjB, my_B_size, &
1560                                                            my_B_virtual_end, my_B_virtual_start, &
1561                                                            my_i, my_j, virtual
1562      INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(IN)     :: sub_proc_map
1563      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
1564         INTENT(INOUT)                                   :: local_ab
1565      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
1566         INTENT(IN)                                      :: t_ab
1567      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
1568         INTENT(IN)                                      :: local_i_aL, local_j_aL
1569      LOGICAL, INTENT(IN)                                :: open_ss, alpha_alpha, beta_beta
1570      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
1571         INTENT(INOUT)                                   :: Y_i_aP, Y_j_aP
1572      REAL(KIND=dp), DIMENSION(:), INTENT(IN), OPTIONAL  :: Eigenval_beta
1573      INTEGER, INTENT(IN), OPTIONAL                      :: homo_beta, my_B_size_beta
1574      TYPE(group_dist_d1_type), INTENT(IN), OPTIONAL     :: gd_B_virtual_beta
1575      INTEGER, INTENT(IN), OPTIONAL                      :: my_B_virtual_start_beta, &
1576                                                            my_B_virtual_end_beta, virtual_beta
1577      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
1578         INTENT(INOUT), OPTIONAL                         :: local_ba
1579
1580      CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_update_P_gamma', &
1581         routineP = moduleN//':'//routineN
1582
1583      INTEGER :: a, b, b_global, handle, proc_receive, proc_send, proc_shift, rec_B_size, &
1584         rec_B_virtual_end, rec_B_virtual_start, send_B_size, send_B_virtual_end, &
1585         send_B_virtual_start
1586      LOGICAL                                            :: alpha_beta
1587      REAL(KIND=dp)                                      :: factor, P_ij_diag
1588      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: external_ab, send_ab
1589
1590!
1591!  In alpha-beta case Y_j_aP_beta is sent and is received as Y_j_aP
1592!
1593
1594      CALL timeset(routineN//"_Pia", handle)
1595! Find out whether we have an alpha-beta case
1596      alpha_beta = .FALSE.
1597      IF (PRESENT(Eigenval_beta) .AND. PRESENT(homo_beta) .AND. PRESENT(my_B_size_beta)) &
1598         alpha_beta = .TRUE.
1599! update P_ab, Gamma_P_ia
1600! First, P_ab
1601      IF (open_ss) THEN
1602         factor = 1.0_dp
1603      ELSE
1604         factor = 2.0_dp
1605      ENDIF
1606      ! divide the (ia|jb) integrals by Delta_ij^ab
1607      IF (.NOT. alpha_beta) THEN
1608         DO b = 1, my_B_size
1609            b_global = b + my_B_virtual_start - 1
1610            DO a = 1, virtual
1611               local_ab(a, b) = -local_ab(a, b)/ &
1612                              (Eigenval(homo + a) + Eigenval(homo + b_global) - Eigenval(my_i + iiB - 1) - Eigenval(my_j + jjB - 1))
1613            END DO
1614         END DO
1615         ! update diagonal part of P_ij
1616         P_ij_diag = -SUM(local_ab*t_ab)*factor
1617      ELSE
1618         DO b = 1, my_B_size_beta
1619            b_global = b + my_B_virtual_start_beta - 1
1620            DO a = 1, virtual
1621               local_ab(a, b) = -local_ab(a, b)/ &
1622               (Eigenval(homo + a) + Eigenval_beta(homo_beta + b_global) - Eigenval(my_i + iiB - 1) - Eigenval_beta(my_j + jjB - 1))
1623            END DO
1624         END DO
1625         ! update diagonal part of P_ij
1626         P_ij_diag = -SUM(local_ab*local_ab)
1627         ! More integrals needed only for alpha-beta case: local_ba
1628         DO b = 1, my_B_size
1629            b_global = b + my_B_virtual_start - 1
1630            DO a = 1, virtual_beta
1631               local_ba(a, b) = -local_ba(a, b)/ &
1632               (Eigenval_beta(homo_beta + a) + Eigenval(homo + b_global) - Eigenval(my_i + iiB - 1) - Eigenval_beta(my_j + jjB - 1))
1633            END DO
1634         END DO
1635      ENDIF
1636
1637      ! P_ab and add diagonal part of P_ij
1638
1639      ! Alpha_alpha or closed-shell case
1640      IF (((.NOT. open_ss) .AND. (.NOT. alpha_beta)) .OR. alpha_alpha) THEN
1641         CALL dgemm('T', 'N', my_B_size, my_B_size, virtual, 1.0_dp, &
1642                    t_ab(:, :), virtual, local_ab(:, :), virtual, &
1643                    1.0_dp, mp2_env%ri_grad%P_ab(1:my_B_size, my_B_virtual_start:my_B_virtual_end), my_B_size)
1644         mp2_env%ri_grad%P_ij(my_i + iiB - 1, my_i + iiB - 1) = &
1645            mp2_env%ri_grad%P_ij(my_i + iiB - 1, my_i + iiB - 1) + P_ij_diag
1646      ENDIF
1647      ! Beta_beta case
1648      IF (beta_beta) THEN
1649         CALL dgemm('T', 'N', my_B_size, my_B_size, virtual, 1.0_dp, &
1650                    t_ab(:, :), virtual, local_ab(:, :), virtual, &
1651                    1.0_dp, mp2_env%ri_grad%P_ab_beta(1:my_B_size, my_B_virtual_start:my_B_virtual_end), my_B_size)
1652         mp2_env%ri_grad%P_ij_beta(my_i + iiB - 1, my_i + iiB - 1) = &
1653            mp2_env%ri_grad%P_ij_beta(my_i + iiB - 1, my_i + iiB - 1) + P_ij_diag
1654      ENDIF
1655      ! Alpha_beta case
1656      IF (alpha_beta) THEN
1657         CALL dgemm('T', 'N', my_B_size, my_B_size, virtual_beta, 1.0_dp, &
1658                    local_ba(:, :), virtual_beta, local_ba(:, :), virtual_beta, 1.0_dp, &
1659                    mp2_env%ri_grad%P_ab(1:my_B_size, my_B_virtual_start: &
1660                                         my_B_virtual_end), my_B_size)
1661         mp2_env%ri_grad%P_ij(my_i + iiB - 1, my_i + iiB - 1) = &
1662            mp2_env%ri_grad%P_ij(my_i + iiB - 1, my_i + iiB - 1) + P_ij_diag
1663         CALL dgemm('T', 'N', my_B_size_beta, my_B_size_beta, virtual, 1.0_dp, &
1664                    local_ab(:, :), virtual, local_ab(:, :), virtual, 1.0_dp, &
1665                    mp2_env%ri_grad%P_ab_beta(1:my_B_size_beta, my_B_virtual_start_beta: &
1666                                              my_B_virtual_end_beta), my_B_size_beta)
1667         mp2_env%ri_grad%P_ij_beta(my_j + jjB - 1, my_j + jjB - 1) = &
1668            mp2_env%ri_grad%P_ij_beta(my_j + jjB - 1, my_j + jjB - 1) + P_ij_diag
1669      ENDIF
1670      ! The summation is over unique pairs. In alpha-beta case, all pairs are unique: subroutine is called for
1671      ! both i^alpha,j^beta and i^beta,j^alpha. Formally, my_i can be equal to my_j, but they are different
1672      ! due to spin in alpha-beta case.
1673      IF ((my_i /= my_j) .AND. (.NOT. alpha_beta)) THEN
1674         ! Alpha_alpha or closed-shell case
1675         IF (((.NOT. open_ss) .AND. (.NOT. alpha_beta)) .OR. alpha_alpha) THEN
1676            CALL dgemm('N', 'T', my_B_size, virtual, my_B_size, 1.0_dp, &
1677                       t_ab(my_B_virtual_start:my_B_virtual_end, 1:my_B_size), my_B_size, &
1678                       local_ab(:, :), virtual, &
1679                       1.0_dp, mp2_env%ri_grad%P_ab(1:my_B_size, 1:virtual), my_B_size)
1680            mp2_env%ri_grad%P_ij(my_j + jjB - 1, my_j + jjB - 1) = &
1681               mp2_env%ri_grad%P_ij(my_j + jjB - 1, my_j + jjB - 1) + P_ij_diag
1682         ENDIF
1683         ! Beta_beta_case
1684         IF (beta_beta) THEN
1685            CALL dgemm('N', 'T', my_B_size, virtual, my_B_size, 1.0_dp, &
1686                       t_ab(my_B_virtual_start:my_B_virtual_end, 1:my_B_size), my_B_size, &
1687                       local_ab(:, :), virtual, &
1688                       1.0_dp, mp2_env%ri_grad%P_ab_beta(1:my_B_size, 1:virtual), my_B_size)
1689            mp2_env%ri_grad%P_ij_beta(my_j + jjB - 1, my_j + jjB - 1) = &
1690               mp2_env%ri_grad%P_ij_beta(my_j + jjB - 1, my_j + jjB - 1) + P_ij_diag
1691         ENDIF
1692      END IF
1693      DO proc_shift = 1, para_env_sub%num_pe - 1
1694         proc_send = sub_proc_map(para_env_sub%mepos + proc_shift)
1695         proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift)
1696         ! Alpha-alpha, beta-beta, closed shell
1697         IF (.NOT. alpha_beta) THEN
1698            CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
1699            CALL get_group_dist(gd_B_virtual, proc_send, send_B_virtual_start, send_B_virtual_end, send_B_size)
1700         ELSE ! Alpha-beta case
1701            CALL get_group_dist(gd_B_virtual_beta, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
1702            CALL get_group_dist(gd_B_virtual_beta, proc_send, send_B_virtual_start, send_B_virtual_end, send_B_size)
1703         ENDIF
1704
1705         ALLOCATE (external_ab(virtual, rec_B_size))
1706         external_ab = 0.0_dp
1707
1708         IF (.NOT. alpha_beta) THEN
1709            CALL mp_sendrecv(local_ab(1:virtual, 1:my_B_size), proc_send, &
1710                             external_ab(1:virtual, 1:rec_B_size), proc_receive, &
1711                             para_env_sub%group)
1712         ELSE
1713            CALL mp_sendrecv(local_ab(1:virtual, 1:my_B_size_beta), proc_send, &
1714                             external_ab(1:virtual, 1:rec_B_size), proc_receive, &
1715                             para_env_sub%group)
1716         ENDIF
1717
1718         ! Alpha-alpha or closed-shell case
1719         IF (((.NOT. open_ss) .AND. (.NOT. alpha_beta)) .OR. alpha_alpha) &
1720            CALL dgemm('T', 'N', my_B_size, rec_B_size, virtual, 1.0_dp, &
1721                       t_ab(:, :), virtual, external_ab(:, :), virtual, &
1722                       1.0_dp, mp2_env%ri_grad%P_ab(1:my_B_size, rec_B_virtual_start:rec_B_virtual_end), my_B_size)
1723         ! Beta-beta case
1724         IF (beta_beta) &
1725            CALL dgemm('T', 'N', my_B_size, rec_B_size, virtual, 1.0_dp, &
1726                       t_ab(:, :), virtual, external_ab(:, :), virtual, &
1727                       1.0_dp, mp2_env%ri_grad%P_ab_beta(1:my_B_size, rec_B_virtual_start:rec_B_virtual_end), my_B_size)
1728         ! Alpha-beta case
1729         IF (alpha_beta) THEN
1730            !   CALL dgemm('N','T',my_B_size,virtual,rec_B_size,1.0_dp,&
1731            !             local_ab(1:my_B_size,rec_B_virtual_start:rec_B_virtual_end),my_B_size,external_ab(:,:),rec_B_size,&
1732            !             1.0_dp,mp2_env%ri_grad%P_ab(1:my_B_size,1:virtual),my_B_size)
1733
1734            ! Alpha-beta part of beta-beta density
1735            CALL dgemm('T', 'N', my_B_size_beta, rec_B_size, virtual, 1.0_dp, &
1736                       local_ab(:, :), virtual, external_ab(:, :), virtual, &
1737                       1.0_dp, mp2_env%ri_grad%P_ab_beta(1:my_B_size_beta, rec_B_virtual_start:rec_B_virtual_end), &
1738                       my_B_size_beta)
1739
1740            ! For alpha-beta part of alpha-density we need a new parallel code
1741            DEALLOCATE (external_ab)
1742            ! And new external_ab (of a different size)
1743            CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
1744            CALL get_group_dist(gd_B_virtual, proc_send, send_B_virtual_start, send_B_virtual_end, send_B_size)
1745            ALLOCATE (external_ab(virtual_beta, rec_B_size))
1746            external_ab = 0.0_dp
1747            CALL mp_sendrecv(local_ba(1:virtual_beta, 1:my_B_size), proc_send, &
1748                             external_ab(1:virtual_beta, 1:rec_B_size), proc_receive, &
1749                             para_env_sub%group)
1750            CALL dgemm('T', 'N', my_B_size, rec_B_size, virtual_beta, 1.0_dp, &
1751                       local_ba(:, :), virtual_beta, external_ab(:, :), virtual_beta, &
1752                       1.0_dp, mp2_env%ri_grad%P_ab(1:my_B_size, rec_B_virtual_start:rec_B_virtual_end), my_B_size)
1753         ENDIF
1754
1755         DEALLOCATE (external_ab)
1756
1757         IF ((my_i /= my_j) .AND. (.NOT. alpha_beta)) THEN
1758            ALLOCATE (external_ab(my_B_size, virtual))
1759            external_ab = 0.0_dp
1760
1761            ALLOCATE (send_ab(send_B_size, virtual))
1762            send_ab = 0.0_dp
1763
1764            CALL dgemm('N', 'T', send_B_size, virtual, my_B_size, 1.0_dp, &
1765                       t_ab(send_B_virtual_start:send_B_virtual_end, 1:my_B_size), send_B_size, &
1766                       local_ab(:, :), virtual, &
1767                       0.0_dp, send_ab(1:send_B_size, 1:virtual), send_B_size)
1768
1769            CALL mp_sendrecv(send_ab, proc_send, &
1770                             external_ab, proc_receive, &
1771                             para_env_sub%group)
1772
1773            ! Alpha_alpha or closed-shell case
1774            IF (((.NOT. open_ss) .AND. (.NOT. alpha_beta)) .OR. alpha_alpha) &
1775               mp2_env%ri_grad%P_ab(:, :) = mp2_env%ri_grad%P_ab + external_ab
1776            ! Beta_beta case
1777            IF (beta_beta) &
1778               mp2_env%ri_grad%P_ab_beta(:, :) = mp2_env%ri_grad%P_ab_beta + external_ab
1779
1780            DEALLOCATE (external_ab)
1781            DEALLOCATE (send_ab)
1782         END IF
1783
1784      END DO
1785      CALL timestop(handle)
1786
1787      ! Now, Gamma_P_ia (made of Y_ia_P)
1788
1789      CALL timeset(routineN//"_Gamma", handle)
1790      IF (.NOT. alpha_beta) THEN
1791         ! Alpha-alpha, beta-beta and closed shell
1792         CALL dgemm('N', 'T', my_B_size, dimen_RI, my_B_size, 1.0_dp, &
1793                    t_ab(my_B_virtual_start:my_B_virtual_end, 1:my_B_size), my_B_size, &
1794                    local_j_aL(1:dimen_RI, 1:my_B_size, jjB), dimen_RI, &
1795                    1.0_dp, Y_i_aP(1:my_B_size, 1:dimen_RI, iiB), my_B_size)
1796      ELSE ! Alpha-beta
1797         CALL dgemm('N', 'T', my_B_size, dimen_RI, my_B_size_beta, 1.0_dp, &
1798                    local_ab(my_B_virtual_start:my_B_virtual_end, 1:my_B_size_beta), my_B_size, &
1799                    local_j_aL(1:dimen_RI, 1:my_B_size_beta, jjB), dimen_RI, &
1800                    1.0_dp, Y_i_aP(1:my_B_size, 1:dimen_RI, iiB), my_B_size)
1801         CALL dgemm('T', 'T', my_B_size_beta, dimen_RI, my_B_size, 1.0_dp, &
1802                    local_ab(my_B_virtual_start:my_B_virtual_end, 1:my_B_size_beta), my_B_size, &
1803                    local_i_aL(1:dimen_RI, 1:my_B_size, jjB), dimen_RI, &
1804                    1.0_dp, Y_j_aP(1:my_B_size_beta, 1:dimen_RI, jjB), my_B_size_beta)
1805
1806      ENDIF
1807
1808      ALLOCATE (external_ab(my_B_size, dimen_RI))
1809      external_ab = 0.0_dp
1810      !
1811      DO proc_shift = 1, para_env_sub%num_pe - 1
1812         proc_send = sub_proc_map(para_env_sub%mepos + proc_shift)
1813         proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift)
1814
1815         CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
1816
1817         CALL get_group_dist(gd_B_virtual, proc_send, send_B_virtual_start, send_B_virtual_end, send_B_size)
1818
1819         IF (.NOT. alpha_beta) THEN
1820            ALLOCATE (send_ab(send_B_size, dimen_RI))
1821            send_ab = 0.0_dp
1822            CALL dgemm('N', 'T', send_B_size, dimen_RI, my_B_size, 1.0_dp, &
1823                       t_ab(send_B_virtual_start:send_B_virtual_end, 1:my_B_size), send_B_size, &
1824                       local_j_aL(1:dimen_RI, 1:my_B_size, jjB), dimen_RI, &
1825                       0.0_dp, send_ab(1:send_B_size, 1:dimen_RI), send_B_size)
1826            CALL mp_sendrecv(send_ab, proc_send, external_ab, proc_receive, para_env_sub%group)
1827            ! Alpha-alpha, beta-beta and closed shell
1828
1829            Y_i_aP(1:my_B_size, 1:dimen_RI, iiB) = &
1830               Y_i_aP(1:my_B_size, 1:dimen_RI, iiB) + external_ab
1831
1832            DEALLOCATE (send_ab)
1833         ELSE ! Alpha-beta case
1834            ! Alpha-alpha part
1835            ALLOCATE (send_ab(send_B_size, dimen_RI))
1836            send_ab = 0.0_dp
1837            CALL dgemm('N', 'T', send_B_size, dimen_RI, my_B_size_beta, 1.0_dp, &
1838                       local_ab(send_B_virtual_start:send_B_virtual_end, 1:my_B_size_beta), send_B_size, &
1839                       local_j_aL(1:dimen_RI, 1:my_B_size_beta, jjB), dimen_RI, &
1840                       0.0_dp, send_ab(1:send_B_size, 1:dimen_RI), send_B_size)
1841            CALL mp_sendrecv(send_ab, proc_send, external_ab, proc_receive, para_env_sub%group)
1842            Y_i_aP(1:my_B_size, 1:dimen_RI, iiB) = &
1843               Y_i_aP(1:my_B_size, 1:dimen_RI, iiB) + external_ab
1844            DEALLOCATE (send_ab)
1845         ENDIF
1846      END DO
1847      DEALLOCATE (external_ab)
1848
1849      IF (alpha_beta) THEN
1850         ! For beta-beta part (in alpha-beta case) we need a new parallel code
1851         ALLOCATE (external_ab(my_B_size_beta, dimen_RI))
1852         external_ab = 0.0_dp
1853         DO proc_shift = 1, para_env_sub%num_pe - 1
1854            proc_send = sub_proc_map(para_env_sub%mepos + proc_shift)
1855            proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift)
1856
1857            CALL get_group_dist(gd_B_virtual_beta, proc_send, send_B_virtual_start, send_B_virtual_end, send_B_size)
1858            ALLOCATE (send_ab(send_B_size, dimen_RI))
1859            send_ab = 0.0_dp
1860
1861            CALL dgemm('N', 'T', send_B_size, dimen_RI, my_B_size, 1.0_dp, &
1862                       local_ba(send_B_virtual_start:send_B_virtual_end, 1:my_B_size), send_B_size, &
1863                       local_i_aL(1:dimen_RI, 1:my_B_size, jjB), dimen_RI, &
1864                       0.0_dp, send_ab(1:send_B_size, 1:dimen_RI), send_B_size)
1865            CALL mp_sendrecv(send_ab, proc_send, external_ab, proc_receive, para_env_sub%group)
1866            Y_j_aP(1:my_B_size_beta, 1:dimen_RI, jjB) = &
1867               Y_j_aP(1:my_B_size_beta, 1:dimen_RI, jjB) + external_ab
1868            DEALLOCATE (send_ab)
1869
1870         END DO
1871         DEALLOCATE (external_ab)
1872      ENDIF
1873
1874      IF ((my_i /= my_j) .AND. (.NOT. alpha_beta)) THEN
1875         ! Alpha-alpha, beta-beta and closed shell
1876         CALL dgemm('T', 'T', my_B_size, dimen_RI, my_B_size, 1.0_dp, &
1877                    t_ab(my_B_virtual_start:my_B_virtual_end, 1:my_B_size), my_B_size, &
1878                    local_i_aL(1:dimen_RI, 1:my_B_size, iiB), dimen_RI, &
1879                    1.0_dp, Y_j_aP(1:my_B_size, 1:dimen_RI, jjB), my_B_size)
1880
1881         DO proc_shift = 1, para_env_sub%num_pe - 1
1882            proc_send = sub_proc_map(para_env_sub%mepos + proc_shift)
1883            proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift)
1884
1885            CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
1886
1887            ALLOCATE (external_ab(dimen_RI, rec_B_size))
1888            external_ab = 0.0_dp
1889
1890            CALL mp_sendrecv(local_i_aL(1:dimen_RI, 1:my_B_size, iiB), proc_send, &
1891                             external_ab, proc_receive, para_env_sub%group)
1892
1893            ! Alpha-alpha, beta-beta and closed shell
1894            CALL dgemm('T', 'T', my_B_size, dimen_RI, rec_B_size, 1.0_dp, &
1895                       t_ab(rec_B_virtual_start:rec_B_virtual_end, 1:my_B_size), rec_B_size, &
1896                       external_ab(1:dimen_RI, 1:rec_B_size), dimen_RI, &
1897                       1.0_dp, Y_j_aP(1:my_B_size, 1:dimen_RI, jjB), my_B_size)
1898
1899            DEALLOCATE (external_ab)
1900         END DO
1901      END IF
1902
1903      CALL timestop(handle)
1904
1905   END SUBROUTINE mp2_update_P_gamma
1906
1907! **************************************************************************************************
1908!> \brief ...
1909!> \param mp2_env ...
1910!> \param ij_index ...
1911!> \param my_B_size ...
1912!> \param my_block_size ...
1913!> \param my_group_L_size ...
1914!> \param my_i ...
1915!> \param my_ij_pairs ...
1916!> \param my_j ...
1917!> \param ngroup ...
1918!> \param num_integ_group ...
1919!> \param integ_group_pos2color_sub ...
1920!> \param num_ij_pairs ...
1921!> \param proc_map ...
1922!> \param ij_map ...
1923!> \param ranges_info_array ...
1924!> \param Y_i_aP ...
1925!> \param Y_j_aP ...
1926!> \param para_env_exchange ...
1927!> \param null_mat_rec ...
1928!> \param null_mat_send ...
1929!> \param sizes_array ...
1930!> \param alpha_alpha ...
1931!> \param beta_beta ...
1932!> \param alpha_beta ...
1933!> \param open_shell ...
1934!> \param my_b_size_beta ...
1935! **************************************************************************************************
1936   SUBROUTINE mp2_redistribute_gamma(mp2_env, ij_index, my_B_size, &
1937                                     my_block_size, my_group_L_size, my_i, my_ij_pairs, my_j, ngroup, &
1938                                     num_integ_group, integ_group_pos2color_sub, num_ij_pairs, proc_map, &
1939                                     ij_map, ranges_info_array, Y_i_aP, Y_j_aP, para_env_exchange, &
1940                                     null_mat_rec, null_mat_send, sizes_array, alpha_alpha, beta_beta, &
1941                                     alpha_beta, open_shell, my_b_size_beta)
1942
1943      TYPE(mp2_type), POINTER                            :: mp2_env
1944      INTEGER, INTENT(IN)                                :: ij_index, my_B_size, my_block_size, &
1945                                                            my_group_L_size, my_i, my_ij_pairs, &
1946                                                            my_j, ngroup, num_integ_group
1947      INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(IN)     :: integ_group_pos2color_sub, num_ij_pairs, &
1948                                                            proc_map
1949      INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN)  :: ij_map
1950      INTEGER, ALLOCATABLE, DIMENSION(:, :, :), &
1951         INTENT(IN)                                      :: ranges_info_array
1952      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
1953         INTENT(IN)                                      :: Y_i_aP, Y_j_aP
1954      TYPE(cp_para_env_type), POINTER                    :: para_env_exchange
1955      REAL(KIND=dp), INTENT(OUT)                         :: null_mat_rec(:, :, :), &
1956                                                            null_mat_send(:, :, :)
1957      INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(IN)     :: sizes_array
1958      LOGICAL, INTENT(IN)                                :: alpha_alpha, beta_beta, alpha_beta, &
1959                                                            open_shell
1960      INTEGER, INTENT(IN), OPTIONAL                      :: my_B_size_beta
1961
1962      CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_redistribute_gamma', &
1963         routineP = moduleN//':'//routineN
1964
1965      INTEGER :: end_point, handle, handle2, iiB, ij_counter_rec, irep, jjb, kkk, Lend_pos, lll, &
1966         Lstart_pos, proc_receive, proc_send, proc_shift, rec_block_size, rec_i, rec_ij_index, &
1967         rec_j, send_L_size, start_point
1968      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: BI_C_rec, BI_C_rec_beta, BI_C_send, &
1969                                                            BI_C_send_beta
1970
1971! In alpha-beta case Y_i_aP_beta is sent as Y_j_aP
1972
1973      CALL timeset(routineN//"_comm2", handle)
1974      IF (ij_index <= my_ij_pairs) THEN
1975         ! somethig to send
1976         ! start with myself
1977         CALL timeset(routineN//"_comm2_w", handle2)
1978         DO irep = 0, num_integ_group - 1
1979            Lstart_pos = ranges_info_array(1, irep, para_env_exchange%mepos)
1980            Lend_pos = ranges_info_array(2, irep, para_env_exchange%mepos)
1981            start_point = ranges_info_array(3, irep, para_env_exchange%mepos)
1982            end_point = ranges_info_array(4, irep, para_env_exchange%mepos)
1983            DO iiB = 1, my_block_size
1984!$OMP           PARALLEL DO DEFAULT(NONE) PRIVATE(kkk,lll) &
1985!$OMP                                     SHARED(start_point,end_point,Lstart_pos,Lend_pos,&
1986!$OMP                                            mp2_env,my_i,iiB,my_B_size,Y_i_aP,&
1987!$OMP                                     alpha_alpha,beta_beta,open_shell)
1988               DO kkk = start_point, end_point
1989                  lll = kkk - start_point + Lstart_pos
1990                  IF (alpha_alpha .OR. (.NOT. open_shell)) THEN
1991                     mp2_env%ri_grad%Gamma_P_ia(my_i + iiB - 1, 1:my_B_size, kkk) = &
1992                        mp2_env%ri_grad%Gamma_P_ia(my_i + iiB - 1, 1:my_B_size, kkk) + &
1993                        Y_i_aP(1:my_B_size, lll, iiB)
1994                  ENDIF
1995                  IF (beta_beta) THEN
1996                     mp2_env%ri_grad%Gamma_P_ia_beta(my_i + iiB - 1, 1:my_B_size, kkk) = &
1997                        mp2_env%ri_grad%Gamma_P_ia_beta(my_i + iiB - 1, 1:my_B_size, kkk) + &
1998                        Y_i_aP(1:my_B_size, lll, iiB)
1999                  ENDIF
2000               END DO
2001!$OMP           END PARALLEL DO
2002            END DO
2003            DO jjB = 1, my_block_size
2004               IF (.NOT. alpha_beta) THEN
2005!$OMP              PARALLEL DO DEFAULT(NONE) PRIVATE(kkk,lll) &
2006!$OMP                                        SHARED(start_point,end_point,Lstart_pos,Lend_pos,&
2007!$OMP                                               mp2_env,my_j,jjB,my_B_size,Y_j_aP,&
2008!$OMP                                        alpha_alpha,beta_beta,open_shell)
2009                  DO kkk = start_point, end_point
2010                     lll = kkk - start_point + Lstart_pos
2011                     IF (alpha_alpha .OR. (.NOT. open_shell)) THEN
2012                        mp2_env%ri_grad%Gamma_P_ia(my_j + jjB - 1, 1:my_B_size, kkk) = &
2013                           mp2_env%ri_grad%Gamma_P_ia(my_j + jjB - 1, 1:my_B_size, kkk) + &
2014                           Y_j_aP(1:my_B_size, lll, jjB)
2015                     ENDIF
2016                     IF (beta_beta) THEN
2017                        mp2_env%ri_grad%Gamma_P_ia_beta(my_j + jjB - 1, 1:my_B_size, kkk) = &
2018                           mp2_env%ri_grad%Gamma_P_ia_beta(my_j + jjB - 1, 1:my_B_size, kkk) + &
2019                           Y_j_aP(1:my_B_size, lll, jjB)
2020                     ENDIF
2021                  END DO
2022!$OMP              END PARALLEL DO
2023               ELSE
2024!$OMP              PARALLEL DO DEFAULT(NONE) PRIVATE(kkk,lll) &
2025!$OMP                                        SHARED(start_point,end_point,Lstart_pos,Lend_pos,&
2026!$OMP                                               mp2_env,my_j,jjB,my_B_size_beta,Y_j_aP)
2027                  DO kkk = start_point, end_point
2028                     lll = kkk - start_point + Lstart_pos
2029                     mp2_env%ri_grad%Gamma_P_ia_beta(my_j + jjB - 1, 1:my_B_size_beta, kkk) = &
2030                        mp2_env%ri_grad%Gamma_P_ia_beta(my_j + jjB - 1, 1:my_B_size_beta, kkk) + &
2031                        Y_j_aP(1:my_B_size_beta, lll, jjB)
2032                  ENDDO
2033!$OMP              END PARALLEL DO
2034               ENDIF
2035            END DO
2036         END DO
2037         CALL timestop(handle2)
2038
2039         ! Y_i_aP(my_B_size,dimen_RI,block_size)
2040
2041         DO proc_shift = 1, para_env_exchange%num_pe - 1
2042            proc_send = proc_map(para_env_exchange%mepos + proc_shift)
2043            proc_receive = proc_map(para_env_exchange%mepos - proc_shift)
2044
2045            send_L_size = sizes_array(proc_send)
2046            IF (.NOT. alpha_beta) THEN
2047               ALLOCATE (BI_C_send(2*my_block_size, my_B_size, send_L_size))
2048            ELSE
2049               ALLOCATE (BI_C_send(my_block_size, my_B_size, send_L_size))
2050               ALLOCATE (BI_C_send_beta(my_block_size, my_B_size_beta, send_L_size))
2051            ENDIF
2052            CALL timeset(routineN//"_comm2_w", handle2)
2053            BI_C_send = 0.0_dp
2054            IF (alpha_beta) BI_C_send_beta = 0.0_dp
2055            DO irep = 0, num_integ_group - 1
2056               Lstart_pos = ranges_info_array(1, irep, proc_send)
2057               Lend_pos = ranges_info_array(2, irep, proc_send)
2058               start_point = ranges_info_array(3, irep, proc_send)
2059               end_point = ranges_info_array(4, irep, proc_send)
2060               DO iiB = 1, my_block_size
2061!$OMP             PARALLEL DO DEFAULT(NONE) PRIVATE(kkk,lll) &
2062!$OMP                                       SHARED(start_point,end_point,Lstart_pos,Lend_pos,&
2063!$OMP                                              BI_C_send,iiB,my_B_size,Y_i_aP)
2064                  DO kkk = start_point, end_point
2065                     lll = kkk - start_point + Lstart_pos
2066                     BI_C_send(iiB, 1:my_B_size, kkk) = Y_i_aP(1:my_B_size, lll, iiB)
2067                  END DO
2068!$OMP             END PARALLEL DO
2069               END DO
2070               DO jjB = 1, my_block_size
2071                  IF (.NOT. alpha_beta) THEN
2072!$OMP                PARALLEL DO DEFAULT(NONE) PRIVATE(kkk,lll) &
2073!$OMP                                          SHARED(start_point,end_point,Lstart_pos,Lend_pos,my_block_size,&
2074!$OMP                                                 BI_C_send,jjB,my_B_size,Y_j_aP)
2075                     DO kkk = start_point, end_point
2076                        lll = kkk - start_point + Lstart_pos
2077                        BI_C_send(jjB + my_block_size, 1:my_B_size, kkk) = Y_j_aP(1:my_B_size, lll, jjB)
2078                     END DO
2079!$OMP                END PARALLEL DO
2080                  ELSE
2081!$OMP                PARALLEL DO DEFAULT(NONE) PRIVATE(kkk,lll) &
2082!$OMP                                          SHARED(start_point,end_point,Lstart_pos,Lend_pos,&
2083!$OMP                                                 BI_C_send_beta,jjB,my_B_size_beta,Y_j_aP)
2084                     DO kkk = start_point, end_point
2085                        lll = kkk - start_point + Lstart_pos
2086                        BI_C_send_beta(jjB, 1:my_B_size_beta, kkk) = Y_j_aP(1:my_B_size_beta, lll, jjB)
2087                     END DO
2088!$OMP                END PARALLEL DO
2089                  ENDIF
2090               END DO
2091            END DO
2092            CALL timestop(handle2)
2093
2094            rec_ij_index = num_ij_pairs(proc_receive)
2095
2096            IF (ij_index <= rec_ij_index) THEN
2097               ! we know that proc_receive has something to send for us, let's see what
2098               ij_counter_rec = &
2099                  (ij_index - MIN(1, integ_group_pos2color_sub(proc_receive)))*ngroup + integ_group_pos2color_sub(proc_receive)
2100
2101               rec_i = ij_map(ij_counter_rec, 1)
2102               rec_j = ij_map(ij_counter_rec, 2)
2103               rec_block_size = ij_map(ij_counter_rec, 3)
2104
2105               IF (.NOT. alpha_beta) THEN
2106                  ALLOCATE (BI_C_rec(2*rec_block_size, my_B_size, my_group_L_size))
2107               ELSE
2108                  ALLOCATE (BI_C_rec(rec_block_size, my_B_size, my_group_L_size))
2109                  ALLOCATE (BI_C_rec_beta(rec_block_size, my_B_size_beta, my_group_L_size))
2110               ENDIF
2111
2112               BI_C_rec = 0.0_dp
2113               IF (alpha_beta) BI_C_rec_beta = 0.0_dp
2114
2115               CALL mp_sendrecv(BI_C_send, proc_send, &
2116                                BI_C_rec, proc_receive, &
2117                                para_env_exchange%group)
2118               IF (alpha_beta) THEN
2119                  CALL mp_sendrecv(BI_C_send_beta, proc_send, &
2120                                   BI_C_rec_beta, proc_receive, &
2121                                   para_env_exchange%group)
2122               ENDIF
2123
2124               CALL timeset(routineN//"_comm2_w", handle2)
2125               DO irep = 0, num_integ_group - 1
2126                  Lstart_pos = ranges_info_array(1, irep, para_env_exchange%mepos)
2127                  Lend_pos = ranges_info_array(2, irep, para_env_exchange%mepos)
2128                  start_point = ranges_info_array(3, irep, para_env_exchange%mepos)
2129                  end_point = ranges_info_array(4, irep, para_env_exchange%mepos)
2130                  DO iiB = 1, rec_block_size
2131!$OMP               PARALLEL DO DEFAULT(NONE) PRIVATE(kkk) &
2132!$OMP                                         SHARED(start_point,end_point,&
2133!$OMP                                                mp2_env,rec_i,iiB,my_B_size,BI_C_rec,&
2134!$OMP                                         alpha_alpha,beta_beta,open_shell)
2135                     DO kkk = start_point, end_point
2136                        IF (alpha_alpha .OR. (.NOT. open_shell)) THEN
2137                           mp2_env%ri_grad%Gamma_P_ia(rec_i + iiB - 1, 1:my_B_size, kkk) = &
2138                              mp2_env%ri_grad%Gamma_P_ia(rec_i + iiB - 1, 1:my_B_size, kkk) + &
2139                              BI_C_rec(iiB, 1:my_B_size, kkk)
2140                        ENDIF
2141                        IF (beta_beta) THEN
2142                           mp2_env%ri_grad%Gamma_P_ia_beta(rec_i + iiB - 1, 1:my_B_size, kkk) = &
2143                              mp2_env%ri_grad%Gamma_P_ia_beta(rec_i + iiB - 1, 1:my_B_size, kkk) + &
2144                              BI_C_rec(iiB, 1:my_B_size, kkk)
2145                        ENDIF
2146                     END DO
2147!$OMP               END PARALLEL DO
2148                  END DO
2149                  DO jjB = 1, rec_block_size
2150                     IF (.NOT. alpha_beta) THEN
2151!$OMP                   PARALLEL DO DEFAULT(NONE) PRIVATE(kkk) &
2152!$OMP                                             SHARED(start_point,end_point,rec_block_size,&
2153!$OMP                                                    mp2_env,rec_j,jjB,my_B_size,BI_C_rec,&
2154!$OMP                                             alpha_alpha,beta_beta,open_shell)
2155                        DO kkk = start_point, end_point
2156                           IF (alpha_alpha .OR. (.NOT. open_shell)) THEN
2157                              mp2_env%ri_grad%Gamma_P_ia(rec_j + jjB - 1, 1:my_B_size, kkk) = &
2158                                 mp2_env%ri_grad%Gamma_P_ia(rec_j + jjB - 1, 1:my_B_size, kkk) + &
2159                                 BI_C_rec(jjB + rec_block_size, 1:my_B_size, kkk)
2160                           ENDIF
2161                           IF (beta_beta) THEN
2162                              mp2_env%ri_grad%Gamma_P_ia_beta(rec_j + jjB - 1, 1:my_B_size, kkk) = &
2163                                 mp2_env%ri_grad%Gamma_P_ia_beta(rec_j + jjB - 1, 1:my_B_size, kkk) + &
2164                                 BI_C_rec(jjB + rec_block_size, 1:my_B_size, kkk)
2165                           ENDIF
2166                        END DO
2167!$OMP                   END PARALLEL DO
2168                     ELSE
2169!$OMP                   PARALLEL DO DEFAULT(NONE) PRIVATE(kkk) &
2170!$OMP                                             SHARED(start_point,end_point,&
2171!$OMP                                           mp2_env,rec_j,jjB,my_B_size_beta,BI_C_rec_beta)
2172                        DO kkk = start_point, end_point
2173                           mp2_env%ri_grad%Gamma_P_ia_beta(rec_j + jjB - 1, 1:my_B_size_beta, kkk) = &
2174                              mp2_env%ri_grad%Gamma_P_ia_beta(rec_j + jjB - 1, 1:my_B_size_beta, kkk) + &
2175                              BI_C_rec_beta(jjB, 1:my_B_size_beta, kkk)
2176                        END DO
2177!$OMP                   END PARALLEL DO
2178                     ENDIF
2179                  END DO
2180               END DO
2181               CALL timestop(handle2)
2182
2183               DEALLOCATE (BI_C_rec)
2184               IF (alpha_beta) DEALLOCATE (BI_C_rec_beta)
2185
2186            ELSE
2187               ! we have something to send but nothing to receive
2188
2189               CALL mp_sendrecv(BI_C_send, proc_send, &
2190                                null_mat_rec, proc_receive, &
2191                                para_env_exchange%group)
2192               IF (alpha_beta) THEN
2193                  CALL mp_sendrecv(BI_C_send_beta, proc_send, &
2194                                   null_mat_rec, proc_receive, &
2195                                   para_env_exchange%group)
2196               ENDIF
2197
2198            END IF
2199
2200            DEALLOCATE (BI_C_send)
2201            IF (alpha_beta) DEALLOCATE (BI_C_send_beta)
2202         END DO
2203
2204      ELSE
2205         ! noting to send check if we have to receive
2206         DO proc_shift = 1, para_env_exchange%num_pe - 1
2207            proc_send = proc_map(para_env_exchange%mepos + proc_shift)
2208            proc_receive = proc_map(para_env_exchange%mepos - proc_shift)
2209            rec_ij_index = num_ij_pairs(proc_receive)
2210
2211            IF (ij_index <= rec_ij_index) THEN
2212               ! we know that proc_receive has something to send for us, let's see what
2213               ij_counter_rec = &
2214                  (ij_index - MIN(1, integ_group_pos2color_sub(proc_receive)))*ngroup + integ_group_pos2color_sub(proc_receive)
2215
2216               rec_i = ij_map(ij_counter_rec, 1)
2217               rec_j = ij_map(ij_counter_rec, 2)
2218               rec_block_size = ij_map(ij_counter_rec, 3)
2219
2220               IF (.NOT. alpha_beta) THEN
2221                  ALLOCATE (BI_C_rec(2*rec_block_size, my_B_size, my_group_L_size))
2222               ELSE
2223                  ALLOCATE (BI_C_rec(rec_block_size, my_B_size, my_group_L_size))
2224                  ALLOCATE (BI_C_rec_beta(rec_block_size, my_B_size_beta, my_group_L_size))
2225               ENDIF
2226
2227               BI_C_rec = 0.0_dp
2228               IF (alpha_beta) BI_C_rec_beta = 0.0_dp
2229
2230               CALL mp_sendrecv(null_mat_send, proc_send, &
2231                                BI_C_rec, proc_receive, &
2232                                para_env_exchange%group)
2233               IF (alpha_beta) THEN
2234                  CALL mp_sendrecv(null_mat_send, proc_send, &
2235                                   BI_C_rec_beta, proc_receive, &
2236                                   para_env_exchange%group)
2237               ENDIF
2238
2239               CALL timeset(routineN//"_comm2_w", handle2)
2240               DO irep = 0, num_integ_group - 1
2241                  Lstart_pos = ranges_info_array(1, irep, para_env_exchange%mepos)
2242                  Lend_pos = ranges_info_array(2, irep, para_env_exchange%mepos)
2243                  start_point = ranges_info_array(3, irep, para_env_exchange%mepos)
2244                  end_point = ranges_info_array(4, irep, para_env_exchange%mepos)
2245                  DO iiB = 1, rec_block_size
2246!$OMP               PARALLEL DO DEFAULT(NONE) PRIVATE(kkk) &
2247!$OMP                                         SHARED(start_point,end_point,&
2248!$OMP                                                mp2_env,rec_i,iiB,my_B_size,BI_C_rec,&
2249!$OMP                                         alpha_alpha,beta_beta,open_shell)
2250                     DO kkk = start_point, end_point
2251                        IF (alpha_alpha .OR. (.NOT. open_shell)) THEN
2252                           mp2_env%ri_grad%Gamma_P_ia(rec_i + iiB - 1, 1:my_B_size, kkk) = &
2253                              mp2_env%ri_grad%Gamma_P_ia(rec_i + iiB - 1, 1:my_B_size, kkk) + &
2254                              BI_C_rec(iiB, 1:my_B_size, kkk)
2255                        ENDIF
2256                        IF (beta_beta) THEN
2257                           mp2_env%ri_grad%Gamma_P_ia_beta(rec_i + iiB - 1, 1:my_B_size, kkk) = &
2258                              mp2_env%ri_grad%Gamma_P_ia_beta(rec_i + iiB - 1, 1:my_B_size, kkk) + &
2259                              BI_C_rec(iiB, 1:my_B_size, kkk)
2260                        ENDIF
2261                     END DO
2262!$OMP               END PARALLEL DO
2263                  END DO
2264                  DO jjB = 1, rec_block_size
2265                     IF (.NOT. alpha_beta) THEN
2266!$OMP                  PARALLEL DO DEFAULT(NONE) PRIVATE(kkk) &
2267!$OMP                                         SHARED(start_point,end_point,rec_block_size,&
2268!$OMP                                                mp2_env,rec_j,jjB,my_B_size,BI_C_rec,&
2269!$OMP                                            alpha_alpha,beta_beta,open_shell)
2270                        DO kkk = start_point, end_point
2271                           IF (alpha_alpha .OR. (.NOT. open_shell)) THEN
2272                              mp2_env%ri_grad%Gamma_P_ia(rec_j + jjB - 1, 1:my_B_size, kkk) = &
2273                                 mp2_env%ri_grad%Gamma_P_ia(rec_j + jjB - 1, 1:my_B_size, kkk) + &
2274                                 BI_C_rec(jjB + rec_block_size, 1:my_B_size, kkk)
2275                           ENDIF
2276                           IF (beta_beta) THEN
2277                              mp2_env%ri_grad%Gamma_P_ia_beta(rec_j + jjB - 1, 1:my_B_size, kkk) = &
2278                                 mp2_env%ri_grad%Gamma_P_ia_beta(rec_j + jjB - 1, 1:my_B_size, kkk) + &
2279                                 BI_C_rec(jjB + rec_block_size, 1:my_B_size, kkk)
2280                           ENDIF
2281                        END DO
2282!$OMP                  END PARALLEL DO
2283                     ELSE
2284!$OMP                  PARALLEL DO DEFAULT(NONE) PRIVATE(kkk) &
2285!$OMP                                         SHARED(start_point,end_point,&
2286!$OMP                                       mp2_env,rec_j,jjB,my_B_size_beta,BI_C_rec_beta)
2287                        DO kkk = start_point, end_point
2288                           mp2_env%ri_grad%Gamma_P_ia_beta(rec_j + jjB - 1, 1:my_B_size_beta, kkk) = &
2289                              mp2_env%ri_grad%Gamma_P_ia_beta(rec_j + jjB - 1, 1:my_B_size_beta, kkk) + &
2290                              BI_C_rec_beta(jjB, 1:my_B_size_beta, kkk)
2291                        END DO
2292!$OMP                  END PARALLEL DO
2293                     ENDIF
2294                  END DO
2295               END DO
2296               CALL timestop(handle2)
2297
2298               DEALLOCATE (BI_C_rec)
2299               IF (alpha_beta) DEALLOCATE (BI_C_rec_beta)
2300
2301            ELSE
2302               ! nothing to send nothing to receive
2303               CALL mp_sendrecv(null_mat_send, proc_send, &
2304                                null_mat_rec, proc_receive, &
2305                                para_env_exchange%group)
2306               IF (alpha_beta) THEN
2307                  CALL mp_sendrecv(null_mat_send, proc_send, &
2308                                   null_mat_rec, proc_receive, &
2309                                   para_env_exchange%group)
2310               ENDIF
2311
2312            END IF
2313         END DO
2314
2315      END IF
2316      CALL timestop(handle)
2317
2318   END SUBROUTINE mp2_redistribute_gamma
2319
2320! **************************************************************************************************
2321!> \brief ...
2322!> \param mp2_env ...
2323!> \param Eigenval ...
2324!> \param homo ...
2325!> \param virtual ...
2326!> \param open_shell ...
2327!> \param beta_beta ...
2328!> \param alpha_beta ...
2329!> \param Bib_C ...
2330!> \param unit_nr ...
2331!> \param dimen_RI ...
2332!> \param my_B_size ...
2333!> \param ngroup ...
2334!> \param num_integ_group ...
2335!> \param my_group_L_size ...
2336!> \param color_sub ...
2337!> \param ranges_info_array ...
2338!> \param para_env_exchange ...
2339!> \param para_env_sub ...
2340!> \param proc_map ...
2341!> \param my_B_virtual_start ...
2342!> \param my_B_virtual_end ...
2343!> \param sizes_array ...
2344!> \param gd_B_virtual ...
2345!> \param sub_proc_map ...
2346!> \param integ_group_pos2color_sub ...
2347!> \param local_ab ...
2348!> \param BIb_C_beta ...
2349!> \param my_B_size_beta ...
2350!> \param gd_B_virtual_beta ...
2351!> \param my_B_virtual_start_beta ...
2352!> \param virtual_beta ...
2353!> \param homo_beta ...
2354!> \param Eigenval_beta ...
2355!> \param my_B_virtual_end_beta ...
2356! **************************************************************************************************
2357   SUBROUTINE quasi_degenerate_P_ij(mp2_env, Eigenval, homo, virtual, open_shell, &
2358                                    beta_beta, alpha_beta, Bib_C, unit_nr, dimen_RI, &
2359                                    my_B_size, ngroup, num_integ_group, my_group_L_size, &
2360                                    color_sub, ranges_info_array, para_env_exchange, para_env_sub, proc_map, &
2361                                    my_B_virtual_start, my_B_virtual_end, sizes_array, gd_B_virtual, &
2362                                    sub_proc_map, integ_group_pos2color_sub, &
2363                                    local_ab, BIb_C_beta, my_B_size_beta, &
2364                                    gd_B_virtual_beta, my_B_virtual_start_beta, &
2365                                    virtual_beta, homo_beta, Eigenval_beta, my_B_virtual_end_beta)
2366      TYPE(mp2_type), POINTER                            :: mp2_env
2367      REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: Eigenval
2368      INTEGER, INTENT(IN)                                :: homo, virtual
2369      LOGICAL, INTENT(IN)                                :: open_shell, beta_beta, alpha_beta
2370      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
2371         INTENT(IN)                                      :: BIb_C
2372      INTEGER, INTENT(IN)                                :: unit_nr, dimen_RI, my_B_size, ngroup, &
2373                                                            num_integ_group, my_group_L_size, &
2374                                                            color_sub
2375      INTEGER, ALLOCATABLE, DIMENSION(:, :, :), &
2376         INTENT(IN)                                      :: ranges_info_array
2377      TYPE(cp_para_env_type), POINTER                    :: para_env_exchange, para_env_sub
2378      INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(IN)     :: proc_map
2379      INTEGER, INTENT(IN)                                :: my_B_virtual_start, my_B_virtual_end
2380      INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(IN)     :: sizes_array
2381      TYPE(group_dist_d1_type), INTENT(IN)               :: gd_B_virtual
2382      INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(IN)     :: sub_proc_map, integ_group_pos2color_sub
2383      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
2384         INTENT(INOUT)                                   :: local_ab
2385      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
2386         INTENT(IN), OPTIONAL                            :: BIb_C_beta
2387      INTEGER, INTENT(IN), OPTIONAL                      :: my_B_size_beta
2388      TYPE(group_dist_d1_type), INTENT(IN), OPTIONAL     :: gd_B_virtual_beta
2389      INTEGER, INTENT(IN), OPTIONAL                      :: my_B_virtual_start_beta, virtual_beta, &
2390                                                            homo_beta
2391      REAL(KIND=dp), DIMENSION(:), INTENT(IN), OPTIONAL  :: Eigenval_beta
2392      INTEGER, INTENT(IN), OPTIONAL                      :: my_B_virtual_end_beta
2393
2394      CHARACTER(LEN=*), PARAMETER :: routineN = 'quasi_degenerate_P_ij', &
2395         routineP = moduleN//':'//routineN
2396
2397      INTEGER :: a, a_global, b, b_global, end_point, handle, handle2, ijk_counter, &
2398         ijk_counter_send, ijk_index, iloops, irep, Lend_pos, Lstart_pos, max_ijk, max_ijk_beta, &
2399         max_ijk_loop, my_i, my_ijk, my_ijk_beta, my_j, my_k, my_virtual, nloops, proc_receive, &
2400         proc_send, proc_shift, rec_B_size, rec_B_virtual_end, rec_B_virtual_start, rec_L_size, &
2401         send_B_size, send_B_virtual_end, send_B_virtual_start, send_i, send_ijk_index, send_j, &
2402         send_k, size_B_i, size_B_j, size_B_k, start_point
2403      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: num_ijk, num_ijk_beta
2404      INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: ijk_map, ijk_map_beta
2405      REAL(KIND=dp)                                      :: amp_fac, null_mat_rec(2, 2, 2), &
2406                                                            null_mat_send(2, 2, 2), P_ij_elem
2407      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: external_ab, external_i_aL, t_ab
2408      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: BI_C_rec, local_i_aL, local_j_aL, &
2409                                                            local_k_aL
2410
2411!
2412
2413      CALL timeset(routineN//"_ij_sing", handle)
2414! Define the number of loops over orbital triplets
2415
2416      nloops = 1
2417      IF (alpha_beta) nloops = 2
2418
2419      ! Find the number of quasi-degenerate orbitals and orbital triplets
2420
2421      IF (.NOT. alpha_beta) THEN
2422         CALL Find_quasi_degenerate_ij(my_ijk, homo, Eigenval, mp2_env, ijk_map, unit_nr, ngroup, &
2423                                       beta_beta, alpha_beta, para_env_exchange, num_ijk, max_ijk, color_sub)
2424      ELSE
2425         CALL Find_quasi_degenerate_ij(my_ijk, homo, Eigenval, mp2_env, ijk_map, unit_nr, ngroup, &
2426                                       beta_beta, alpha_beta, para_env_exchange, num_ijk, max_ijk, color_sub, &
2427                                       Eigenval_beta, homo_beta, ijk_map_beta, num_ijk_beta, max_ijk_beta, my_ijk_beta)
2428      ENDIF
2429
2430      ! Set amplitude factor
2431      amp_fac = 2.0_dp
2432      IF (open_shell) amp_fac = 1.0_dp
2433
2434      ! Loop(s) over orbital triplets
2435      DO iloops = 1, nloops
2436         IF (iloops .EQ. 1) THEN
2437            size_B_i = my_B_size
2438            size_B_j = my_B_size
2439            max_ijk_loop = max_ijk
2440            my_virtual = virtual
2441            IF (alpha_beta) THEN
2442               size_B_k = my_B_size_beta
2443            ELSE
2444               size_B_k = my_B_size
2445            ENDIF
2446         ELSE
2447            size_B_i = my_B_size_beta
2448            size_B_j = my_B_size_beta
2449            size_B_k = my_B_size
2450            my_virtual = virtual_beta
2451            max_ijk_loop = max_ijk_beta
2452         ENDIF
2453
2454         ALLOCATE (local_i_aL(dimen_RI, size_B_i, 1))
2455         ALLOCATE (local_j_aL(dimen_RI, size_B_j, 1))
2456         ALLOCATE (local_k_aL(dimen_RI, size_B_k, 1))
2457         ALLOCATE (t_ab(my_virtual, size_B_k))
2458
2459         DO ijk_index = 1, max_ijk_loop
2460            IF (iloops .EQ. 2) my_ijk = my_ijk_beta
2461            IF (ijk_index <= my_ijk) THEN
2462               ! work to be done
2463               ijk_counter = (ijk_index - MIN(1, color_sub))*ngroup + color_sub
2464               IF (iloops .EQ. 1) THEN
2465                  my_i = ijk_map(ijk_counter, 1)
2466                  my_j = ijk_map(ijk_counter, 2)
2467                  my_k = ijk_map(ijk_counter, 3)
2468               ELSE
2469                  my_i = ijk_map_beta(ijk_counter, 1)
2470                  my_j = ijk_map_beta(ijk_counter, 2)
2471                  my_k = ijk_map_beta(ijk_counter, 3)
2472               ENDIF
2473
2474               local_i_aL = 0.0_dp
2475               local_j_al = 0.0_dp
2476               local_k_al = 0.0_dp
2477               DO irep = 0, num_integ_group - 1
2478                  Lstart_pos = ranges_info_array(1, irep, para_env_exchange%mepos)
2479                  Lend_pos = ranges_info_array(2, irep, para_env_exchange%mepos)
2480                  start_point = ranges_info_array(3, irep, para_env_exchange%mepos)
2481                  end_point = ranges_info_array(4, irep, para_env_exchange%mepos)
2482
2483                  IF (.NOT. alpha_beta) THEN
2484                     local_i_aL(Lstart_pos:Lend_pos, 1:size_B_i, 1) = BIb_C(start_point:end_point, 1:size_B_i, my_i)
2485                     local_j_aL(Lstart_pos:Lend_pos, 1:size_B_j, 1) = BIb_C(start_point:end_point, 1:size_B_j, my_j)
2486                     local_k_aL(Lstart_pos:Lend_pos, 1:size_B_k, 1) = BIb_C(start_point:end_point, 1:size_B_k, my_k)
2487                  ELSE
2488                     IF (iloops .EQ. 1) THEN ! For alpha-alpha density
2489                        local_i_aL(Lstart_pos:Lend_pos, 1:size_B_i, 1) = BIb_C(start_point:end_point, 1:size_B_i, my_i)
2490                        local_j_aL(Lstart_pos:Lend_pos, 1:size_B_j, 1) = BIb_C(start_point:end_point, 1:size_B_j, my_j)
2491                        local_k_aL(Lstart_pos:Lend_pos, 1:size_B_k, 1) = BIb_C_beta(start_point:end_point, 1:size_B_k, my_k)
2492                     ELSE ! For beta-beta density
2493                        local_i_aL(Lstart_pos:Lend_pos, 1:size_B_i, 1) = BIb_C_beta(start_point:end_point, 1:size_B_i, my_i)
2494                        local_j_aL(Lstart_pos:Lend_pos, 1:size_B_j, 1) = BIb_C_beta(start_point:end_point, 1:size_B_j, my_j)
2495                        local_k_aL(Lstart_pos:Lend_pos, 1:size_B_k, 1) = BIb_C(start_point:end_point, 1:size_B_k, my_k)
2496                     ENDIF
2497                  ENDIF
2498               END DO
2499
2500               DO proc_shift = 1, para_env_exchange%num_pe - 1
2501                  proc_send = proc_map(para_env_exchange%mepos + proc_shift)
2502                  proc_receive = proc_map(para_env_exchange%mepos - proc_shift)
2503
2504                  send_ijk_index = num_ijk(proc_send)
2505                  IF (iloops .EQ. 2) send_ijk_index = num_ijk_beta(proc_send)
2506
2507                  rec_L_size = sizes_array(proc_receive)
2508                  ALLOCATE (BI_C_rec(rec_L_size, size_B_i, 1))
2509
2510                  IF (ijk_index <= send_ijk_index) THEN
2511                     ! something to send
2512                     ijk_counter_send = (ijk_index - MIN(1, integ_group_pos2color_sub(proc_send)))* &
2513                                        ngroup + integ_group_pos2color_sub(proc_send)
2514                     IF (iloops .EQ. 1) THEN
2515                        send_i = ijk_map(ijk_counter_send, 1)
2516                        send_j = ijk_map(ijk_counter_send, 2)
2517                        send_k = ijk_map(ijk_counter_send, 3)
2518                     ELSE
2519                        send_i = ijk_map_beta(ijk_counter_send, 1)
2520                        send_j = ijk_map_beta(ijk_counter_send, 2)
2521                        send_k = ijk_map_beta(ijk_counter_send, 3)
2522                     ENDIF
2523                  END IF
2524
2525                  ! occupied i
2526                  BI_C_rec = 0.0_dp
2527                  IF (ijk_index <= send_ijk_index) THEN
2528                     IF (iloops .EQ. 1) THEN
2529                        CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:size_B_i, send_i), proc_send, &
2530                                         BI_C_rec(1:rec_L_size, 1:size_B_i, 1), proc_receive, &
2531                                         para_env_exchange%group)
2532                     ELSE
2533                        CALL mp_sendrecv(BIb_C_beta(1:my_group_L_size, 1:size_B_i, send_i), proc_send, &
2534                                         BI_C_rec(1:rec_L_size, 1:size_B_i, 1), proc_receive, &
2535                                         para_env_exchange%group)
2536                     ENDIF
2537                  ELSE
2538                     ! nothing to send
2539                     CALL mp_sendrecv(null_mat_send, proc_send, &
2540                                      BI_C_rec(1:rec_L_size, 1:size_B_i, 1:1), proc_receive, &
2541                                      para_env_exchange%group)
2542                  END IF
2543                  DO irep = 0, num_integ_group - 1
2544                     Lstart_pos = ranges_info_array(1, irep, proc_receive)
2545                     Lend_pos = ranges_info_array(2, irep, proc_receive)
2546                     start_point = ranges_info_array(3, irep, proc_receive)
2547                     end_point = ranges_info_array(4, irep, proc_receive)
2548
2549                     local_i_aL(Lstart_pos:Lend_pos, 1:size_B_i, 1) = BI_C_rec(start_point:end_point, 1:size_B_i, 1)
2550                  END DO
2551
2552                  ! occupied j
2553                  BI_C_rec = 0.0_dp
2554                  IF (ijk_index <= send_ijk_index) THEN
2555                     IF (iloops .EQ. 1) THEN
2556                        CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:size_B_j, send_j), proc_send, &
2557                                         BI_C_rec(1:rec_L_size, 1:size_B_j, 1), proc_receive, &
2558                                         para_env_exchange%group)
2559                     ELSE ! For beta_beta density, the size is different now
2560                        CALL mp_sendrecv(BIb_C_beta(1:my_group_L_size, 1:size_B_j, send_j), proc_send, &
2561                                         BI_C_rec(1:rec_L_size, 1:size_B_j, 1), proc_receive, &
2562                                         para_env_exchange%group)
2563                     ENDIF
2564                  ELSE
2565                     ! nothing to send
2566                     CALL mp_sendrecv(null_mat_send, proc_send, &
2567                                      BI_C_rec(1:rec_L_size, 1:size_B_j, 1:1), proc_receive, &
2568                                      para_env_exchange%group)
2569                  END IF
2570                  DO irep = 0, num_integ_group - 1
2571                     Lstart_pos = ranges_info_array(1, irep, proc_receive)
2572                     Lend_pos = ranges_info_array(2, irep, proc_receive)
2573                     start_point = ranges_info_array(3, irep, proc_receive)
2574                     end_point = ranges_info_array(4, irep, proc_receive)
2575
2576                     local_j_aL(Lstart_pos:Lend_pos, 1:size_B_j, 1) = BI_C_rec(start_point:end_point, 1:size_B_j, 1)
2577                  END DO
2578
2579                  ! occupied k
2580                  BI_C_rec = 0.0_dp
2581                  DEALLOCATE (BI_C_rec)
2582                  ALLOCATE (BI_C_rec(rec_L_size, size_B_k, 1))
2583                  BI_C_rec = 0.0_dp
2584                  IF (ijk_index <= send_ijk_index) THEN
2585                     IF (iloops .EQ. 1) THEN
2586                        IF (.NOT. alpha_beta) THEN
2587                           CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:size_B_k, send_k), proc_send, &
2588                                            BI_C_rec(1:rec_L_size, 1:size_B_k, 1), proc_receive, &
2589                                            para_env_exchange%group)
2590                        ELSE
2591                           CALL mp_sendrecv(BIb_C_beta(1:my_group_L_size, 1:size_B_k, send_k), proc_send, &
2592                                            BI_C_rec(1:rec_L_size, 1:size_B_k, 1), proc_receive, &
2593                                            para_env_exchange%group)
2594                        ENDIF
2595                     ELSE ! For beta_beta density, the size is different now
2596                        BI_C_rec = 0.0_dp
2597                        CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:size_B_k, send_k), proc_send, &
2598                                         BI_C_rec(1:rec_L_size, 1:size_B_k, 1), proc_receive, &
2599                                         para_env_exchange%group)
2600                     ENDIF
2601                  ELSE
2602                     ! nothing to send
2603                     BI_C_rec = 0.0_dp
2604                     CALL mp_sendrecv(null_mat_send, proc_send, &
2605                                      BI_C_rec(1:rec_L_size, 1:size_B_k, 1:1), proc_receive, &
2606                                      para_env_exchange%group)
2607                  END IF
2608                  DO irep = 0, num_integ_group - 1
2609                     Lstart_pos = ranges_info_array(1, irep, proc_receive)
2610                     Lend_pos = ranges_info_array(2, irep, proc_receive)
2611                     start_point = ranges_info_array(3, irep, proc_receive)
2612                     end_point = ranges_info_array(4, irep, proc_receive)
2613
2614                     local_k_aL(Lstart_pos:Lend_pos, 1:size_B_k, 1) = BI_C_rec(start_point:end_point, 1:size_B_k, 1)
2615                  END DO
2616
2617                  DEALLOCATE (BI_C_rec)
2618               END DO
2619
2620               ! expand integrals
2621               CALL timeset(routineN//"_exp_ik", handle2)
2622               local_ab = 0.0_dp
2623               IF (iloops .EQ. 2) THEN ! For alpha-beta case for beta-beta density the dimensions are different
2624                  DEALLOCATE (local_ab)
2625                  ALLOCATE (local_ab(virtual_beta, size_B_k))
2626                  local_ab = 0.0_dp
2627                  CALL dgemm('T', 'N', size_B_i, size_B_k, dimen_RI, 1.0_dp, &
2628                             local_i_aL(:, :, 1), dimen_RI, local_k_aL(:, :, 1), dimen_RI, &
2629                             0.0_dp, local_ab(my_B_virtual_start_beta:my_B_virtual_end_beta, 1:size_B_k), size_B_i)
2630               ELSE
2631                  CALL dgemm('T', 'N', size_B_i, size_B_k, dimen_RI, 1.0_dp, &
2632                             local_i_aL(:, :, 1), dimen_RI, local_k_aL(:, :, 1), dimen_RI, &
2633                             0.0_dp, local_ab(my_B_virtual_start:my_B_virtual_end, 1:size_B_k), size_B_i)
2634               ENDIF
2635               DO proc_shift = 1, para_env_sub%num_pe - 1
2636                  proc_send = sub_proc_map(para_env_sub%mepos + proc_shift)
2637                  proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift)
2638
2639                  IF (iloops .EQ. 1) THEN
2640                     CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
2641                  ELSE
2642                     CALL get_group_dist(gd_B_virtual_beta, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
2643                  ENDIF
2644
2645                  ALLOCATE (external_i_aL(dimen_RI, rec_B_size))
2646                  external_i_aL = 0.0_dp
2647
2648                  CALL mp_sendrecv(local_i_aL(:, :, 1), proc_send, &
2649                                   external_i_aL, proc_receive, &
2650                                   para_env_sub%group)
2651
2652                  CALL dgemm('T', 'N', rec_B_size, size_B_k, dimen_RI, 1.0_dp, &
2653                             external_i_aL, dimen_RI, local_k_aL(:, :, 1), dimen_RI, &
2654                             0.0_dp, local_ab(rec_B_virtual_start:rec_B_virtual_end, 1:size_B_k), rec_B_size)
2655
2656                  DEALLOCATE (external_i_aL)
2657               END DO
2658               CALL timestop(handle2)
2659
2660               ! Amplitudes
2661               CALL timeset(routineN//"_tab", handle2)
2662               t_ab = 0.0_dp
2663               ! Alpha-alpha, beta-beta and closed shell
2664               IF (.NOT. alpha_beta) THEN
2665                  DO b = 1, size_B_k
2666                     b_global = b + my_B_virtual_start - 1
2667                     DO a = 1, my_B_size
2668                        a_global = a + my_B_virtual_start - 1
2669                        t_ab(a_global, b) = (amp_fac*local_ab(a_global, b) - local_ab(b_global, a))/ &
2670                                           (Eigenval(my_i) + Eigenval(my_k) - Eigenval(homo + a_global) - Eigenval(homo + b_global))
2671                     END DO
2672                  END DO
2673               ELSE
2674                  IF (iloops .EQ. 1) THEN ! Alpha-beta for alpha-alpha density
2675                     DO b = 1, size_B_k
2676                        b_global = b + my_B_virtual_start_beta - 1
2677                        DO a = 1, my_B_size
2678                           a_global = a + my_B_virtual_start - 1
2679                           t_ab(a_global, b) = local_ab(a_global, b)/ &
2680                                               (Eigenval(my_i) + Eigenval_beta(my_k) - Eigenval(homo + a_global) - &
2681                                                Eigenval_beta(homo_beta + b_global))
2682                        END DO
2683                     END DO
2684                  ELSE ! Alpha-beta for beta-beta density
2685                     DO b = 1, size_B_k
2686                        b_global = b + my_B_virtual_start - 1
2687                        DO a = 1, my_B_size_beta
2688                           a_global = a + my_B_virtual_start_beta - 1
2689                           t_ab(a_global, b) = local_ab(a_global, b)/ &
2690                                               (Eigenval_beta(my_i) + Eigenval(my_k) - Eigenval_beta(homo_beta + a_global) - &
2691                                                Eigenval(homo + b_global))
2692                        END DO
2693                     END DO
2694                  ENDIF
2695               ENDIF
2696
2697               IF (.NOT. alpha_beta) THEN
2698                  DO proc_shift = 1, para_env_sub%num_pe - 1
2699                     proc_send = sub_proc_map(para_env_sub%mepos + proc_shift)
2700                     proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift)
2701                     CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
2702                     CALL get_group_dist(gd_B_virtual, proc_send, send_B_virtual_start, send_B_virtual_end, send_B_size)
2703
2704                     ALLOCATE (external_ab(size_B_i, rec_B_size))
2705                     external_ab = 0.0_dp
2706                     CALL mp_sendrecv(local_ab(send_B_virtual_start:send_B_virtual_end, 1:size_B_k), proc_send, &
2707                                      external_ab(1:size_B_i, 1:rec_B_size), proc_receive, para_env_sub%group)
2708
2709                     DO b = 1, my_B_size
2710                        b_global = b + my_B_virtual_start - 1
2711                        DO a = 1, rec_B_size
2712                           a_global = a + rec_B_virtual_start - 1
2713                           t_ab(a_global, b) = (amp_fac*local_ab(a_global, b) - external_ab(b, a))/ &
2714                                           (Eigenval(my_i) + Eigenval(my_k) - Eigenval(homo + a_global) - Eigenval(homo + b_global))
2715                        END DO
2716                     END DO
2717
2718                     DEALLOCATE (external_ab)
2719                  END DO
2720               ENDIF
2721               CALL timestop(handle2)
2722
2723               ! Expand the second set of integrals
2724               CALL timeset(routineN//"_exp_jk", handle2)
2725               local_ab = 0.0_dp
2726
2727               IF (iloops .EQ. 2) THEN ! In alpha-beta case for beta-beta density the dimensions are different
2728                  CALL dgemm('T', 'N', size_B_j, size_B_k, dimen_RI, 1.0_dp, &
2729                             local_j_aL(:, :, 1), dimen_RI, local_k_aL(:, :, 1), dimen_RI, &
2730                             0.0_dp, local_ab(my_B_virtual_start_beta:my_B_virtual_end_beta, 1:size_B_k), size_B_j)
2731               ELSE
2732                  CALL dgemm('T', 'N', size_B_j, size_B_k, dimen_RI, 1.0_dp, &
2733                             local_j_aL(:, :, 1), dimen_RI, local_k_aL(:, :, 1), dimen_RI, &
2734                             0.0_dp, local_ab(my_B_virtual_start:my_B_virtual_end, 1:size_B_k), size_B_j)
2735               ENDIF
2736
2737               DO proc_shift = 1, para_env_sub%num_pe - 1
2738                  proc_send = sub_proc_map(para_env_sub%mepos + proc_shift)
2739                  proc_receive = sub_proc_map(para_env_sub%mepos - proc_shift)
2740
2741                  IF (iloops .EQ. 1) THEN
2742                     CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
2743                  ELSE
2744                     CALL get_group_dist(gd_B_virtual_beta, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
2745                  ENDIF
2746
2747                  ALLOCATE (external_i_aL(dimen_RI, rec_B_size))
2748                  external_i_aL = 0.0_dp
2749
2750                  CALL mp_sendrecv(local_j_aL(:, :, 1), proc_send, &
2751                                   external_i_aL, proc_receive, &
2752                                   para_env_sub%group)
2753
2754                  CALL dgemm('T', 'N', rec_B_size, size_B_k, dimen_RI, 1.0_dp, &
2755                             external_i_aL, dimen_RI, local_k_aL(:, :, 1), dimen_RI, &
2756                             0.0_dp, local_ab(rec_B_virtual_start:rec_B_virtual_end, 1:size_B_k), rec_B_size)
2757
2758                  DEALLOCATE (external_i_aL)
2759               END DO
2760               CALL timestop(handle2)
2761
2762               CALL timeset(routineN//"_Pij", handle2)
2763               ! Alpha-alpha, beta-beta and closed shell
2764               IF (.NOT. alpha_beta) THEN
2765                  DO b = 1, size_B_k
2766                     b_global = b + my_B_virtual_start - 1
2767                     DO a = 1, my_B_size
2768                        a_global = a + my_B_virtual_start - 1
2769                        local_ab(a_global, b) = local_ab(a_global, b)/ &
2770                                           (Eigenval(my_j) + Eigenval(my_k) - Eigenval(homo + a_global) - Eigenval(homo + b_global))
2771                     END DO
2772                  END DO
2773               ELSE
2774                  IF (iloops .EQ. 1) THEN ! Alpha-beta for alpha-alpha density
2775                     DO b = 1, size_B_k
2776                        b_global = b + my_B_virtual_start_beta - 1
2777                        DO a = 1, my_B_size
2778                           a_global = a + my_B_virtual_start - 1
2779                           local_ab(a_global, b) = local_ab(a_global, b)/ &
2780                                                   (Eigenval(my_j) + Eigenval_beta(my_k) - Eigenval(homo + a_global) - &
2781                                                    Eigenval_beta(homo_beta + b_global))
2782                        END DO
2783                     END DO
2784                  ELSE ! Alpha-beta for beta-beta density
2785                     DO b = 1, size_B_k
2786                        b_global = b + my_B_virtual_start - 1
2787                        DO a = 1, my_B_size_beta
2788                           a_global = a + my_B_virtual_start_beta - 1
2789                           local_ab(a_global, b) = local_ab(a_global, b)/ &
2790                                                   (Eigenval_beta(my_j) + Eigenval(my_k) - Eigenval_beta(homo_beta + a_global) - &
2791                                                    Eigenval(homo + b_global))
2792                        END DO
2793                     END DO
2794                  ENDIF
2795               ENDIF
2796               !
2797               P_ij_elem = SUM(local_ab*t_ab)
2798               IF ((.NOT. open_shell) .AND. (.NOT. alpha_beta)) THEN
2799                  P_ij_elem = P_ij_elem*2.0_dp
2800               ENDIF
2801               IF ((beta_beta) .OR. (iloops .EQ. 2)) THEN
2802                  mp2_env%ri_grad%P_ij_beta(my_i, my_j) = mp2_env%ri_grad%P_ij_beta(my_i, my_j) - P_ij_elem
2803                  mp2_env%ri_grad%P_ij_beta(my_j, my_i) = mp2_env%ri_grad%P_ij_beta(my_j, my_i) - P_ij_elem
2804               ELSE
2805                  mp2_env%ri_grad%P_ij(my_i, my_j) = mp2_env%ri_grad%P_ij(my_i, my_j) - P_ij_elem
2806                  mp2_env%ri_grad%P_ij(my_j, my_i) = mp2_env%ri_grad%P_ij(my_j, my_i) - P_ij_elem
2807               ENDIF
2808               CALL timestop(handle2)
2809            ELSE
2810               ! no work to be done, possible messeges to be exchanged
2811               DO proc_shift = 1, para_env_exchange%num_pe - 1
2812                  proc_send = proc_map(para_env_exchange%mepos + proc_shift)
2813                  proc_receive = proc_map(para_env_exchange%mepos - proc_shift)
2814
2815                  send_ijk_index = num_ijk(proc_send)
2816                  IF (iloops .EQ. 2) send_ijk_index = num_ijk_beta(proc_send)
2817
2818                  IF (ijk_index <= send_ijk_index) THEN
2819                     ! somethig to send
2820                     ijk_counter_send = (ijk_index - MIN(1, integ_group_pos2color_sub(proc_send)))*ngroup + &
2821                                        integ_group_pos2color_sub(proc_send)
2822                     IF (iloops .EQ. 1) THEN
2823                        send_i = ijk_map(ijk_counter_send, 1)
2824                        send_j = ijk_map(ijk_counter_send, 2)
2825                        send_k = ijk_map(ijk_counter_send, 3)
2826                     ELSE
2827                        send_i = ijk_map_beta(ijk_counter_send, 1)
2828                        send_j = ijk_map_beta(ijk_counter_send, 2)
2829                        send_k = ijk_map_beta(ijk_counter_send, 3)
2830                     ENDIF
2831                     ! occupied i
2832                     IF (iloops .EQ. 1) THEN
2833                        CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:size_B_i, send_i:send_i), proc_send, &
2834                                         null_mat_rec, proc_receive, para_env_exchange%group)
2835                     ELSE
2836                        CALL mp_sendrecv(BIb_C_beta(1:my_group_L_size, 1:size_B_i, send_i:send_i), proc_send, &
2837                                         null_mat_rec, proc_receive, para_env_exchange%group)
2838                     ENDIF
2839                     ! occupied j
2840                     IF (iloops .EQ. 1) THEN
2841                        CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:size_B_j, send_j:send_j), proc_send, &
2842                                         null_mat_rec, proc_receive, para_env_exchange%group)
2843                     ELSE ! For beta_beta density, the size is different now
2844                        CALL mp_sendrecv(BIb_C_beta(1:my_group_L_size, 1:size_B_j, send_j:send_j), proc_send, &
2845                                         null_mat_rec, proc_receive, para_env_exchange%group)
2846                     ENDIF
2847                     ! occupied k
2848                     IF (iloops .EQ. 1) THEN
2849                        IF (.NOT. alpha_beta) THEN
2850                           CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:size_B_k, send_k:send_k), proc_send, &
2851                                            null_mat_rec, proc_receive, para_env_exchange%group)
2852                        ELSE
2853                           CALL mp_sendrecv(BIb_C_beta(1:my_group_L_size, 1:size_B_k, send_k:send_k), proc_send, &
2854                                            null_mat_rec, proc_receive, para_env_exchange%group)
2855                        ENDIF
2856                     ELSE ! For beta_beta density, the size is different now
2857                        CALL mp_sendrecv(BIb_C(1:my_group_L_size, 1:size_B_k, send_k:send_k), proc_send, &
2858                                         null_mat_rec, proc_receive, para_env_exchange%group)
2859                     ENDIF
2860
2861                  ELSE
2862                     ! nothing to send
2863                     ! occupied i
2864                     CALL mp_sendrecv(null_mat_send, proc_send, &
2865                                      null_mat_rec, proc_receive, &
2866                                      para_env_exchange%group)
2867                     ! occupied j
2868                     CALL mp_sendrecv(null_mat_send, proc_send, &
2869                                      null_mat_rec, proc_receive, &
2870                                      para_env_exchange%group)
2871                     ! occupied k
2872                     CALL mp_sendrecv(null_mat_send, proc_send, &
2873                                      null_mat_rec, proc_receive, &
2874                                      para_env_exchange%group)
2875                  END IF
2876
2877               END DO ! proc loop
2878            END IF
2879         END DO ! ijk_index loop
2880         DEALLOCATE (local_i_aL)
2881         DEALLOCATE (local_j_aL)
2882         DEALLOCATE (local_k_aL)
2883         DEALLOCATE (t_ab)
2884      ENDDO ! over number of loops (iloop)
2885      !
2886      DEALLOCATE (ijk_map)
2887      DEALLOCATE (num_ijk)
2888      IF (alpha_beta) THEN
2889         DEALLOCATE (ijk_map_beta, num_ijk_beta)
2890      ENDIF
2891      CALL timestop(handle)
2892
2893   END SUBROUTINE Quasi_degenerate_P_ij
2894
2895! **************************************************************************************************
2896!> \brief ...
2897!> \param my_ijk ...
2898!> \param homo ...
2899!> \param Eigenval ...
2900!> \param mp2_env ...
2901!> \param ijk_map ...
2902!> \param unit_nr ...
2903!> \param ngroup ...
2904!> \param beta_beta ...
2905!> \param alpha_beta ...
2906!> \param para_env_exchange ...
2907!> \param num_ijk ...
2908!> \param max_ijk ...
2909!> \param color_sub ...
2910!> \param Eigenval_beta ...
2911!> \param homo_beta ...
2912!> \param ijk_map_beta ...
2913!> \param num_ijk_beta ...
2914!> \param max_ijk_beta ...
2915!> \param my_ijk_beta ...
2916! **************************************************************************************************
2917   SUBROUTINE Find_quasi_degenerate_ij(my_ijk, homo, Eigenval, mp2_env, ijk_map, unit_nr, ngroup, &
2918                                       beta_beta, alpha_beta, para_env_exchange, num_ijk, max_ijk, color_sub, Eigenval_beta, &
2919                                       homo_beta, ijk_map_beta, num_ijk_beta, max_ijk_beta, my_ijk_beta)
2920
2921      INTEGER, INTENT(OUT)                               :: my_ijk
2922      INTEGER, INTENT(IN)                                :: homo
2923      REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: Eigenval
2924      TYPE(mp2_type), POINTER                            :: mp2_env
2925      INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(OUT) :: ijk_map
2926      INTEGER, INTENT(IN)                                :: unit_nr, ngroup
2927      LOGICAL, INTENT(IN)                                :: beta_beta, alpha_beta
2928      TYPE(cp_para_env_type), POINTER                    :: para_env_exchange
2929      INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT)    :: num_ijk
2930      INTEGER, INTENT(OUT)                               :: max_ijk
2931      INTEGER, INTENT(IN)                                :: color_sub
2932      REAL(KIND=dp), DIMENSION(:), INTENT(IN), OPTIONAL  :: Eigenval_beta
2933      INTEGER, INTENT(IN), OPTIONAL                      :: homo_beta
2934      INTEGER, ALLOCATABLE, DIMENSION(:, :), &
2935         INTENT(OUT), OPTIONAL                           :: ijk_map_beta
2936      INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT), &
2937         OPTIONAL                                        :: num_ijk_beta
2938      INTEGER, INTENT(OUT), OPTIONAL                     :: max_ijk_beta, my_ijk_beta
2939
2940      CHARACTER(LEN=*), PARAMETER :: routineN = 'Find_quasi_degenerate_ij', &
2941         routineP = moduleN//':'//routineN
2942
2943      INTEGER                                            :: iib, ijk_counter, jjb, kkb, my_homo, &
2944                                                            num_sing_ij, total_ijk
2945
2946      IF (alpha_beta) THEN
2947         my_homo = homo_beta
2948      ELSE
2949         my_homo = homo
2950      ENDIF
2951
2952      ! General case
2953      num_sing_ij = 0
2954      DO iiB = 1, homo
2955         ! diagonal elements already updated
2956         DO jjB = iiB + 1, homo
2957            IF (ABS(Eigenval(jjB) - Eigenval(iiB)) < mp2_env%ri_mp2%eps_canonical) &
2958               num_sing_ij = num_sing_ij + 1
2959         END DO
2960      END DO
2961      IF (.NOT. beta_beta) THEN
2962         IF (unit_nr > 0) THEN
2963            WRITE (UNIT=unit_nr, FMT="(T3,A,T75,i6)") &
2964               "MO_INFO| Number of ij pairs below EPS_CANONICAL:", num_sing_ij
2965         END IF
2966      ELSE
2967         IF (unit_nr > 0) THEN
2968            WRITE (UNIT=unit_nr, FMT="(T3,A,T75,i6)") &
2969               "MO_INFO| Number of ij pairs (spin beta) below EPS_CANONICAL:", num_sing_ij
2970         END IF
2971      ENDIF
2972      total_ijk = my_homo*num_sing_ij
2973      ALLOCATE (ijk_map(total_ijk, 3))
2974      ijk_map = 0
2975
2976      my_ijk = 0
2977      ijk_counter = 0
2978      DO iiB = 1, homo
2979         ! diagonal elements already updated
2980         DO jjB = iiB + 1, homo
2981            IF (ABS(Eigenval(jjB) - Eigenval(iiB)) >= mp2_env%ri_mp2%eps_canonical) CYCLE
2982            DO kkB = 1, my_homo
2983               ijk_counter = ijk_counter + 1
2984               ijk_map(ijk_counter, 1) = iiB
2985               ijk_map(ijk_counter, 2) = jjB
2986               ijk_map(ijk_counter, 3) = kkB
2987               IF (MOD(ijk_counter, ngroup) == color_sub) my_ijk = my_ijk + 1
2988            END DO
2989         END DO
2990      END DO
2991
2992      ALLOCATE (num_ijk(0:para_env_exchange%num_pe - 1))
2993      num_ijk = 0
2994      num_ijk(para_env_exchange%mepos) = my_ijk
2995      CALL mp_sum(num_ijk, para_env_exchange%group)
2996      max_ijk = MAXVAL(num_ijk)
2997
2998      ! Alpha-beta case: we need a second map
2999      IF (alpha_beta) THEN
3000         num_sing_ij = 0
3001         DO iiB = 1, homo_beta
3002            ! diagonal elements already updated
3003            DO jjB = iiB + 1, homo_beta
3004               IF (ABS(Eigenval_beta(jjB) - Eigenval_beta(iiB)) < mp2_env%ri_mp2%eps_canonical) &
3005                  num_sing_ij = num_sing_ij + 1
3006            END DO
3007         END DO
3008         ! total number of elemets that have to be computed
3009         total_ijk = homo*num_sing_ij
3010         ALLOCATE (ijk_map_beta(total_ijk, 3))
3011         ijk_map_beta = 0
3012         my_ijk_beta = 0
3013         ijk_counter = 0
3014         DO iiB = 1, homo_beta
3015            ! diagonal elements already updated
3016            DO jjB = iiB + 1, homo_beta
3017               IF (ABS(Eigenval_beta(jjB) - Eigenval_beta(iiB)) >= mp2_env%ri_mp2%eps_canonical) CYCLE
3018               DO kkB = 1, homo
3019                  ijk_counter = ijk_counter + 1
3020                  ijk_map_beta(ijk_counter, 1) = iiB
3021                  ijk_map_beta(ijk_counter, 2) = jjB
3022                  ijk_map_beta(ijk_counter, 3) = kkB
3023                  IF (MOD(ijk_counter, ngroup) == color_sub) my_ijk_beta = my_ijk_beta + 1
3024               END DO
3025            END DO
3026         END DO
3027         ALLOCATE (num_ijk_beta(0:para_env_exchange%num_pe - 1))
3028         num_ijk_beta = 0
3029         num_ijk_beta(para_env_exchange%mepos) = my_ijk_beta
3030         CALL mp_sum(num_ijk_beta, para_env_exchange%group)
3031         max_ijk_beta = MAXVAL(num_ijk_beta)
3032      ENDIF
3033
3034   END SUBROUTINE Find_quasi_degenerate_ij
3035
3036END MODULE mp2_ri_gpw
3037