1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2019  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief Rountines for GW + Bethe-Salpeter for computing electronic excitations
8!> \par History
9!>      04.2017 created [Jan Wilhelm]
10! **************************************************************************************************
11MODULE bse
12   USE cp_fm_basic_linalg,              ONLY: cp_fm_upper_to_full
13   USE cp_fm_cholesky,                  ONLY: cp_fm_cholesky_decompose,&
14                                              cp_fm_cholesky_invert
15   USE cp_fm_types,                     ONLY: cp_fm_create,&
16                                              cp_fm_get_info,&
17                                              cp_fm_release,&
18                                              cp_fm_set_all,&
19                                              cp_fm_to_fm,&
20                                              cp_fm_type
21   USE cp_gemm_interface,               ONLY: cp_gemm
22   USE cp_para_types,                   ONLY: cp_para_env_type
23   USE group_dist_types,                ONLY: get_group_dist,&
24                                              group_dist_d1_type
25   USE kinds,                           ONLY: dp
26   USE message_passing,                 ONLY: mp_alltoall,&
27                                              mp_sum
28   USE mp2_types,                       ONLY: integ_mat_buffer_type
29   USE rpa_communication,               ONLY: communicate_buffer
30#include "./base/base_uses.f90"
31
32   IMPLICIT NONE
33
34   PRIVATE
35
36   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'bse'
37
38   PUBLIC :: mult_B_with_W_and_fill_local_3c_arrays, do_subspace_iterations
39
40CONTAINS
41
42! **************************************************************************************************
43!> \brief ...
44!> \param B_bar_ijQ_bse_local ...
45!> \param B_abQ_bse_local ...
46!> \param B_bar_iaQ_bse_local ...
47!> \param B_iaQ_bse_local ...
48!> \param homo ...
49!> \param virtual ...
50!> \param num_Z_vectors ...
51!> \param max_iter ...
52!> \param threshold_min_trans ...
53!> \param Eigenval ...
54!> \param para_env ...
55! **************************************************************************************************
56   SUBROUTINE do_subspace_iterations(B_bar_ijQ_bse_local, B_abQ_bse_local, B_bar_iaQ_bse_local, &
57                                     B_iaQ_bse_local, homo, virtual, num_Z_vectors, &
58                                     max_iter, threshold_min_trans, Eigenval, para_env)
59
60      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: B_bar_ijQ_bse_local, B_abQ_bse_local, &
61                                                            B_bar_iaQ_bse_local, B_iaQ_bse_local
62      INTEGER                                            :: homo, virtual, num_Z_vectors, max_iter
63      REAL(KIND=dp)                                      :: threshold_min_trans
64      REAL(KIND=dp), DIMENSION(:)                        :: Eigenval
65      TYPE(cp_para_env_type), POINTER                    :: para_env
66
67      CHARACTER(LEN=*), PARAMETER :: routineN = 'do_subspace_iterations', &
68         routineP = moduleN//':'//routineN
69
70      INTEGER                                            :: handle, i_iter, local_RI_size
71      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: M_ia_tmp, M_ji_tmp, RI_vector
72      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: AZ, BZ, Z_vectors
73
74      CALL timeset(routineN, handle)
75
76      ! JW hack 2del
77      threshold_min_trans = 0.01_dp
78
79      ALLOCATE (Z_vectors(homo, virtual, num_Z_vectors))
80      Z_vectors = 0.0_dp
81
82      ALLOCATE (AZ(homo, virtual, num_Z_vectors))
83      AZ = 0.0_dp
84
85      ALLOCATE (BZ(homo, virtual, num_Z_vectors))
86      BZ = 0.0_dp
87
88      local_RI_size = SIZE(B_iaQ_bse_local, 3)
89
90      ALLOCATE (M_ia_tmp(homo, virtual))
91      M_ia_tmp = 0.0_dp
92
93      ALLOCATE (M_ji_tmp(homo, homo))
94      M_ji_tmp = 0.0_dp
95
96      ALLOCATE (RI_vector(local_RI_size, num_Z_vectors))
97      RI_vector = 0.0_dp
98
99      CALL initial_guess_Z_vectors(Z_vectors, Eigenval, num_Z_vectors, homo, virtual)
100
101      DO i_iter = 1, max_iter
102
103         CALL compute_AZ(AZ, Z_vectors, B_iaQ_bse_local, B_bar_ijQ_bse_local, B_abQ_bse_local, &
104                         M_ia_tmp, RI_vector, Eigenval, homo, virtual, num_Z_vectors, local_RI_size, &
105                         para_env)
106
107         CALL compute_BZ(BZ, Z_vectors, B_iaQ_bse_local, B_bar_iaQ_bse_local, &
108                         M_ji_tmp, RI_vector, homo, virtual, num_Z_vectors, local_RI_size, &
109                         para_env)
110
111      END DO
112
113      DEALLOCATE (AZ, BZ, Z_vectors, M_ia_tmp, M_ji_tmp, RI_vector)
114
115      CALL timestop(handle)
116
117   END SUBROUTINE
118
119! **************************************************************************************************
120!> \brief ...
121!> \param BZ ...
122!> \param Z_vectors ...
123!> \param B_iaQ_bse_local ...
124!> \param B_bar_iaQ_bse_local ...
125!> \param M_ji_tmp ...
126!> \param RI_vector ...
127!> \param homo ...
128!> \param virtual ...
129!> \param num_Z_vectors ...
130!> \param local_RI_size ...
131!> \param para_env ...
132! **************************************************************************************************
133   SUBROUTINE compute_BZ(BZ, Z_vectors, B_iaQ_bse_local, B_bar_iaQ_bse_local, &
134                         M_ji_tmp, RI_vector, homo, virtual, num_Z_vectors, local_RI_size, para_env)
135
136      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: BZ, Z_vectors, B_iaQ_bse_local, &
137                                                            B_bar_iaQ_bse_local
138      REAL(KIND=dp), DIMENSION(:, :)                     :: M_ji_tmp, RI_vector
139      INTEGER                                            :: homo, virtual, num_Z_vectors, &
140                                                            local_RI_size
141      TYPE(cp_para_env_type), POINTER                    :: para_env
142
143      CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_BZ', routineP = moduleN//':'//routineN
144
145      INTEGER                                            :: i_Z_vector, LLL
146
147      BZ(:, :, :) = 0.0_dp
148
149      CALL compute_v_ia_jb_part(BZ, Z_vectors, B_iaQ_bse_local, RI_vector, local_RI_size, &
150                                num_Z_vectors, homo, virtual)
151
152      DO i_Z_vector = 1, num_Z_vectors
153
154         DO LLL = 1, local_RI_size
155
156            ! M_ji^P = sum_b Z_jb*B_bi^P
157            CALL DGEMM("N", "T", homo, homo, virtual, 1.0_dp, Z_vectors(:, :, i_Z_vector), homo, &
158                       B_iaQ_bse_local(:, :, LLL), homo, 0.0_dp, M_ji_tmp, homo)
159            ! (BZ)_ia = sum_jP M_ij^P*B^bar_ja^P
160            CALL DGEMM("T", "N", homo, virtual, homo, 1.0_dp, M_ji_tmp, homo, &
161                       B_bar_iaQ_bse_local, homo, 1.0_dp, BZ(:, :, i_Z_vector), homo)
162
163         END DO
164
165      END DO
166
167      ! we make the mp_sum to sum over all RI basis functions
168      CALL mp_sum(BZ, para_env%group)
169
170   END SUBROUTINE
171
172! **************************************************************************************************
173!> \brief ...
174!> \param AZ ...
175!> \param Z_vectors ...
176!> \param B_iaQ_bse_local ...
177!> \param B_bar_ijQ_bse_local ...
178!> \param B_abQ_bse_local ...
179!> \param M_ia_tmp ...
180!> \param RI_vector ...
181!> \param Eigenval ...
182!> \param homo ...
183!> \param virtual ...
184!> \param num_Z_vectors ...
185!> \param local_RI_size ...
186!> \param para_env ...
187! **************************************************************************************************
188   SUBROUTINE compute_AZ(AZ, Z_vectors, B_iaQ_bse_local, B_bar_ijQ_bse_local, B_abQ_bse_local, M_ia_tmp, &
189                         RI_vector, Eigenval, homo, virtual, num_Z_vectors, local_RI_size, para_env)
190
191      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: AZ, Z_vectors, B_iaQ_bse_local, &
192                                                            B_bar_ijQ_bse_local, B_abQ_bse_local
193      REAL(KIND=dp), DIMENSION(:, :)                     :: M_ia_tmp, RI_vector
194      REAL(KIND=dp), DIMENSION(:)                        :: Eigenval
195      INTEGER                                            :: homo, virtual, num_Z_vectors, &
196                                                            local_RI_size
197      TYPE(cp_para_env_type), POINTER                    :: para_env
198
199      CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_AZ', routineP = moduleN//':'//routineN
200
201      INTEGER                                            :: a_virt, handle, i_occ, i_Z_vector, LLL
202      REAL(KIND=dp)                                      :: eigen_diff
203
204      CALL timeset(routineN, handle)
205
206      AZ(:, :, :) = 0.0_dp
207
208      CALL compute_v_ia_jb_part(AZ, Z_vectors, B_iaQ_bse_local, RI_vector, local_RI_size, &
209                                num_Z_vectors, homo, virtual)
210
211      DO i_Z_vector = 1, num_Z_vectors
212
213         ! JW TO DO: OMP PARALLELIZATION
214         DO LLL = 1, local_RI_size
215
216            ! M_ja^P = sum_j Z_jb*B_ba^P
217            CALL DGEMM("N", "N", homo, virtual, virtual, 1.0_dp, Z_vectors(:, :, i_Z_vector), homo, &
218                       B_abQ_bse_local(:, :, LLL), virtual, 0.0_dp, M_ia_tmp, homo)
219
220            ! (AZ)_ia = sum_jP B_bar_ij^P*M_ja^P
221            CALL DGEMM("N", "N", homo, virtual, homo, 1.0_dp, B_bar_ijQ_bse_local(:, :, LLL), homo, &
222                       M_ia_tmp, homo, 1.0_dp, AZ(:, :, i_Z_vector), homo)
223
224         END DO
225
226      END DO
227
228      ! we make the mp_sum to sum over all RI basis functions
229      CALL mp_sum(AZ, para_env%group)
230
231      ! add (e_a-e_i)*Z_ia
232      DO i_occ = 1, homo
233         DO a_virt = 1, virtual
234
235            eigen_diff = Eigenval(a_virt + homo) - Eigenval(i_occ)
236
237            AZ(i_occ, a_virt, :) = AZ(i_occ, a_virt, :) + Z_vectors(i_occ, a_virt, :)*eigen_diff
238
239         END DO
240      END DO
241
242      CALL timestop(handle)
243
244   END SUBROUTINE
245
246! **************************************************************************************************
247!> \brief ...
248!> \param AZ ...
249!> \param Z_vectors ...
250!> \param B_iaQ_bse_local ...
251!> \param RI_vector ...
252!> \param local_RI_size ...
253!> \param num_Z_vectors ...
254!> \param homo ...
255!> \param virtual ...
256! **************************************************************************************************
257   SUBROUTINE compute_v_ia_jb_part(AZ, Z_vectors, B_iaQ_bse_local, RI_vector, local_RI_size, &
258                                   num_Z_vectors, homo, virtual)
259
260      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: AZ, Z_vectors, B_iaQ_bse_local
261      REAL(KIND=dp), DIMENSION(:, :)                     :: RI_vector
262      INTEGER                                            :: local_RI_size, num_Z_vectors, homo, &
263                                                            virtual
264
265      CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_v_ia_jb_part', &
266         routineP = moduleN//':'//routineN
267
268      INTEGER                                            :: a_virt, handle, i_occ, i_Z_vector, LLL
269
270      CALL timeset(routineN, handle)
271
272      RI_vector = 0.0_dp
273
274      ! v_P = sum_jb B_jb^P Z_jb
275      DO LLL = 1, local_RI_size
276         DO i_Z_vector = 1, num_Z_vectors
277            DO i_occ = 1, homo
278               DO a_virt = 1, virtual
279
280                  RI_vector(LLL, i_Z_vector) = RI_vector(LLL, i_Z_vector) + &
281                                               Z_vectors(i_occ, a_virt, i_Z_vector)* &
282                                               B_iaQ_bse_local(i_occ, a_virt, LLL)
283
284               END DO
285            END DO
286         END DO
287      END DO
288
289      ! AZ = sum_P B_ia^P*v_P + ...
290      DO LLL = 1, local_RI_size
291         DO i_Z_vector = 1, num_Z_vectors
292            DO i_occ = 1, homo
293               DO a_virt = 1, virtual
294
295                  AZ(i_occ, a_virt, i_Z_vector) = AZ(i_occ, a_virt, i_Z_vector) + &
296                                                  RI_vector(LLL, i_Z_vector)* &
297                                                  B_iaQ_bse_local(i_occ, a_virt, LLL)
298
299               END DO
300            END DO
301         END DO
302      END DO
303
304      CALL timestop(handle)
305
306   END SUBROUTINE
307
308! **************************************************************************************************
309!> \brief ...
310!> \param Z_vectors ...
311!> \param Eigenval ...
312!> \param num_Z_vectors ...
313!> \param homo ...
314!> \param virtual ...
315! **************************************************************************************************
316   SUBROUTINE initial_guess_Z_vectors(Z_vectors, Eigenval, num_Z_vectors, homo, virtual)
317
318      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: Z_vectors
319      REAL(KIND=dp), DIMENSION(:)                        :: Eigenval
320      INTEGER                                            :: num_Z_vectors, homo, virtual
321
322      CHARACTER(LEN=*), PARAMETER :: routineN = 'initial_guess_Z_vectors', &
323         routineP = moduleN//':'//routineN
324
325      INTEGER                                            :: a_virt, handle, i_occ, i_Z_vector, &
326                                                            min_loc(2)
327      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: eigen_diff_ia
328
329      CALL timeset(routineN, handle)
330
331      ALLOCATE (eigen_diff_ia(homo, virtual))
332
333      DO i_occ = 1, homo
334         DO a_virt = 1, virtual
335            eigen_diff_ia(i_occ, a_virt) = Eigenval(a_virt + homo) - Eigenval(i_occ)
336         END DO
337      END DO
338
339      DO i_Z_vector = 1, num_Z_vectors
340
341         min_loc = MINLOC(eigen_diff_ia)
342
343         Z_vectors(min_loc(1), min_loc(2), i_Z_vector) = 1.0_dp
344
345         eigen_diff_ia(min_loc(1), min_loc(2)) = 1.0E20_dp
346
347      END DO
348
349      DEALLOCATE (eigen_diff_ia)
350
351      CALL timestop(handle)
352
353   END SUBROUTINE
354
355! **************************************************************************************************
356!> \brief ...
357!> \param fm_mat_S_ij_bse ...
358!> \param fm_mat_S_ab_bse ...
359!> \param fm_mat_S ...
360!> \param fm_mat_Q_static_bse ...
361!> \param fm_mat_Q_static_bse_gemm ...
362!> \param B_bar_ijQ_bse_local ...
363!> \param B_abQ_bse_local ...
364!> \param B_bar_iaQ_bse_local ...
365!> \param B_iaQ_bse_local ...
366!> \param dimen_RI ...
367!> \param homo ...
368!> \param virtual ...
369!> \param dimen_ia ...
370!> \param gd_array ...
371!> \param color_sub ...
372!> \param para_env ...
373! **************************************************************************************************
374   SUBROUTINE mult_B_with_W_and_fill_local_3c_arrays(fm_mat_S_ij_bse, fm_mat_S_ab_bse, fm_mat_S, fm_mat_Q_static_bse, &
375                                                     fm_mat_Q_static_bse_gemm, &
376                                                     B_bar_ijQ_bse_local, B_abQ_bse_local, B_bar_iaQ_bse_local, &
377                                                     B_iaQ_bse_local, dimen_RI, homo, virtual, dimen_ia, &
378                                                     gd_array, color_sub, para_env)
379
380      TYPE(cp_fm_type), POINTER                          :: fm_mat_S_ij_bse, fm_mat_S_ab_bse, &
381                                                            fm_mat_S, fm_mat_Q_static_bse, &
382                                                            fm_mat_Q_static_bse_gemm
383      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: B_bar_ijQ_bse_local, B_abQ_bse_local, &
384                                                            B_bar_iaQ_bse_local, B_iaQ_bse_local
385      INTEGER                                            :: dimen_RI, homo, virtual, dimen_ia
386      TYPE(group_dist_d1_type)                           :: gd_array
387      INTEGER                                            :: color_sub
388      TYPE(cp_para_env_type), POINTER                    :: para_env
389
390      CHARACTER(LEN=*), PARAMETER :: routineN = 'mult_B_with_W_and_fill_local_3c_arrays', &
391         routineP = moduleN//':'//routineN
392
393      INTEGER                                            :: handle, i_global, iiB, info_chol, &
394                                                            j_global, jjB, ncol_local, nrow_local
395      INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
396      TYPE(cp_fm_type), POINTER                          :: fm_mat_S_bar_ia_bse, &
397                                                            fm_mat_S_bar_ij_bse, fm_mat_work
398
399      CALL timeset(routineN, handle)
400
401      CALL cp_fm_create(fm_mat_S_bar_ia_bse, fm_mat_S%matrix_struct)
402      CALL cp_fm_to_fm(fm_mat_S, fm_mat_S_bar_ia_bse)
403      CALL cp_fm_set_all(fm_mat_S_bar_ia_bse, 0.0_dp)
404
405      CALL cp_fm_create(fm_mat_S_bar_ij_bse, fm_mat_S_ij_bse%matrix_struct)
406      CALL cp_fm_to_fm(fm_mat_S_ij_bse, fm_mat_S_bar_ij_bse)
407      CALL cp_fm_set_all(fm_mat_S_bar_ij_bse, 0.0_dp)
408
409      CALL cp_fm_create(fm_mat_work, fm_mat_Q_static_bse_gemm%matrix_struct)
410      CALL cp_fm_to_fm(fm_mat_Q_static_bse_gemm, fm_mat_work)
411      CALL cp_fm_set_all(fm_mat_work, 0.0_dp)
412
413      ! get info of fm_mat_Q_static_bse and compute ((1+Q(0))^-1-1)
414      CALL cp_fm_get_info(matrix=fm_mat_Q_static_bse_gemm, &
415                          nrow_local=nrow_local, &
416                          ncol_local=ncol_local, &
417                          row_indices=row_indices, &
418                          col_indices=col_indices)
419
420      DO jjB = 1, ncol_local
421         j_global = col_indices(jjB)
422         DO iiB = 1, nrow_local
423            i_global = row_indices(iiB)
424            IF (j_global == i_global .AND. i_global <= dimen_RI) THEN
425               fm_mat_Q_static_bse_gemm%local_data(iiB, jjB) = fm_mat_Q_static_bse_gemm%local_data(iiB, jjB) + 1.0_dp
426            END IF
427         END DO
428      END DO
429
430      ! calculate Trace(Log(Matrix)) as Log(DET(Matrix)) via cholesky decomposition
431      CALL cp_fm_cholesky_decompose(matrix=fm_mat_Q_static_bse_gemm, n=dimen_RI, info_out=info_chol)
432      CPASSERT(info_chol == 0)
433
434      ! calculate [1+Q(i0)]^-1
435      CALL cp_fm_cholesky_invert(fm_mat_Q_static_bse_gemm)
436      ! symmetrize the result
437      CALL cp_fm_upper_to_full(fm_mat_Q_static_bse_gemm, fm_mat_work)
438
439      CALL cp_gemm(transa="N", transb="N", m=homo**2, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
440                   matrix_a=fm_mat_S_ij_bse, matrix_b=fm_mat_Q_static_bse, beta=0.0_dp, &
441                   matrix_c=fm_mat_S_bar_ij_bse)
442
443      ! fm_mat_S_bar_ia_bse has a different blacs_env as fm_mat_S_ij_bse since we take
444      ! fm_mat_S from RPA. Therefore, we also need a different fm_mat_Q_static_bse_gemm
445      CALL cp_gemm(transa="N", transb="N", m=dimen_ia, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
446                   matrix_a=fm_mat_S, matrix_b=fm_mat_Q_static_bse_gemm, beta=0.0_dp, &
447                   matrix_c=fm_mat_S_bar_ia_bse)
448
449      CALL allocate_and_fill_local_array(B_iaQ_bse_local, fm_mat_S, gd_array, color_sub, homo, virtual, dimen_RI, para_env)
450
451      CALL allocate_and_fill_local_array(B_bar_iaQ_bse_local, fm_mat_S_bar_ia_bse, gd_array, color_sub, homo, virtual, &
452                                         dimen_RI, para_env)
453
454      CALL allocate_and_fill_local_array(B_bar_ijQ_bse_local, fm_mat_S_bar_ij_bse, gd_array, color_sub, homo, homo, &
455                                         dimen_RI, para_env)
456
457      CALL allocate_and_fill_local_array(B_abQ_bse_local, fm_mat_S_ab_bse, gd_array, color_sub, virtual, virtual, &
458                                         dimen_RI, para_env)
459
460      CALL cp_fm_release(fm_mat_S_bar_ia_bse)
461      CALL cp_fm_release(fm_mat_S_bar_ij_bse)
462      CALL cp_fm_release(fm_mat_work)
463
464      CALL timestop(handle)
465
466   END SUBROUTINE
467
468! **************************************************************************************************
469!> \brief ...
470!> \param B_local ...
471!> \param fm_mat_S ...
472!> \param gd_array ...
473!> \param color_sub ...
474!> \param small_size ...
475!> \param big_size ...
476!> \param dimen_RI ...
477!> \param para_env ...
478! **************************************************************************************************
479   SUBROUTINE allocate_and_fill_local_array(B_local, fm_mat_S, gd_array, &
480                                            color_sub, small_size, big_size, dimen_RI, para_env)
481
482      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: B_local
483      TYPE(cp_fm_type), POINTER                          :: fm_mat_S
484      TYPE(group_dist_d1_type)                           :: gd_array
485      INTEGER                                            :: color_sub, small_size, big_size, dimen_RI
486      TYPE(cp_para_env_type), POINTER                    :: para_env
487
488      CHARACTER(LEN=*), PARAMETER :: routineN = 'allocate_and_fill_local_array', &
489         routineP = moduleN//':'//routineN
490
491      INTEGER :: combi_index, end_RI, handle, handle1, i_comm, i_entry, iiB, imepos, jjB, &
492         level_big_size, level_small_size, ncol_local, nrow_local, num_comm_cycles, RI_index, &
493         size_RI, start_RI
494      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: entry_counter, mepos_from_RI_index, &
495                                                            num_entries_rec, num_entries_send
496      INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
497      INTEGER, DIMENSION(:, :), POINTER                  :: req_array
498      REAL(KIND=dp)                                      :: matrix_el
499      TYPE(integ_mat_buffer_type), ALLOCATABLE, &
500         DIMENSION(:)                                    :: buffer_rec, buffer_send
501
502      CALL timeset(routineN, handle)
503
504      ALLOCATE (mepos_from_RI_index(dimen_RI))
505      mepos_from_RI_index = 0
506
507      DO imepos = 0, para_env%num_pe - 1
508
509         CALL get_group_dist(gd_array, pos=imepos, starts=start_RI, ends=end_RI)
510
511         mepos_from_RI_index(start_RI:end_RI) = imepos
512
513      END DO
514
515      ! color_sub is automatically the number of the process since every subgroup has only one MPI rank
516      CALL get_group_dist(gd_array, color_sub, start_RI, end_RI, size_RI)
517
518      ALLOCATE (B_local(small_size, big_size, 1:size_RI))
519
520      ALLOCATE (num_entries_send(0:para_env%num_pe - 1))
521      ALLOCATE (num_entries_rec(0:para_env%num_pe - 1))
522
523      ALLOCATE (req_array(1:para_env%num_pe, 4))
524
525      ALLOCATE (entry_counter(0:para_env%num_pe - 1))
526
527      CALL cp_fm_get_info(matrix=fm_mat_S, &
528                          nrow_local=nrow_local, &
529                          ncol_local=ncol_local, &
530                          row_indices=row_indices, &
531                          col_indices=col_indices)
532
533      num_comm_cycles = 10
534
535      ! communicate not all due to huge memory overhead, since for every number in fm_mat_S, we store
536      ! three additional ones (RI index, first MO index, second MO index!!)
537      DO i_comm = 0, num_comm_cycles - 1
538
539         num_entries_send = 0
540         num_entries_rec = 0
541
542         ! loop over RI index to get the number of sent entries
543         DO jjB = 1, ncol_local
544
545            RI_index = col_indices(jjB)
546
547            IF (MODULO(RI_index, num_comm_cycles) /= i_comm) CYCLE
548
549            imepos = mepos_from_RI_index(RI_index)
550
551            num_entries_send(imepos) = num_entries_send(imepos) + nrow_local
552
553         END DO
554
555         CALL mp_alltoall(num_entries_send, num_entries_rec, 1, para_env%group)
556
557         ALLOCATE (buffer_rec(0:para_env%num_pe - 1))
558         ALLOCATE (buffer_send(0:para_env%num_pe - 1))
559
560         ! allocate data message and corresponding indices
561         DO imepos = 0, para_env%num_pe - 1
562
563            ALLOCATE (buffer_rec(imepos)%msg(num_entries_rec(imepos)))
564            buffer_rec(imepos)%msg = 0.0_dp
565
566            ALLOCATE (buffer_send(imepos)%msg(num_entries_send(imepos)))
567            buffer_send(imepos)%msg = 0.0_dp
568
569            ALLOCATE (buffer_rec(imepos)%indx(num_entries_rec(imepos), 3))
570            buffer_rec(imepos)%indx = 0
571
572            ALLOCATE (buffer_send(imepos)%indx(num_entries_send(imepos), 3))
573            buffer_send(imepos)%indx = 0
574
575         END DO
576
577         entry_counter(:) = 0
578
579         ! loop over RI index for filling the send-buffer
580         DO jjB = 1, ncol_local
581
582            RI_index = col_indices(jjB)
583
584            IF (MODULO(RI_index, num_comm_cycles) /= i_comm) CYCLE
585
586            imepos = mepos_from_RI_index(RI_index)
587
588            DO iiB = 1, nrow_local
589
590               combi_index = row_indices(iiB)
591               level_small_size = MAX(1, combi_index - 1)/big_size + 1
592               level_big_size = combi_index - (level_small_size - 1)*big_size
593
594               entry_counter(imepos) = entry_counter(imepos) + 1
595
596               buffer_send(imepos)%msg(entry_counter(imepos)) = fm_mat_S%local_data(iiB, jjB)
597
598               buffer_send(imepos)%indx(entry_counter(imepos), 1) = RI_index
599               buffer_send(imepos)%indx(entry_counter(imepos), 2) = level_small_size
600               buffer_send(imepos)%indx(entry_counter(imepos), 3) = level_big_size
601
602            END DO
603
604         END DO
605
606         CALL timeset("BSE_comm_data", handle1)
607
608         CALL communicate_buffer(para_env, num_entries_rec, num_entries_send, buffer_rec, buffer_send, req_array)
609
610         CALL timestop(handle1)
611
612         ! fill B_local
613         DO imepos = 0, para_env%num_pe - 1
614
615            DO i_entry = 1, num_entries_rec(imepos)
616
617               RI_index = buffer_rec(imepos)%indx(i_entry, 1) - start_RI + 1
618               level_small_size = buffer_rec(imepos)%indx(i_entry, 2)
619               level_big_size = buffer_rec(imepos)%indx(i_entry, 3)
620
621               matrix_el = buffer_rec(imepos)%msg(i_entry)
622
623               B_local(level_small_size, level_big_size, RI_index) = matrix_el
624
625            END DO
626
627         END DO
628
629         DO imepos = 0, para_env%num_pe - 1
630            DEALLOCATE (buffer_send(imepos)%msg)
631            DEALLOCATE (buffer_send(imepos)%indx)
632            DEALLOCATE (buffer_rec(imepos)%msg)
633            DEALLOCATE (buffer_rec(imepos)%indx)
634         END DO
635
636         DEALLOCATE (buffer_rec, buffer_send)
637
638      END DO
639
640      DEALLOCATE (num_entries_send, num_entries_rec)
641
642      DEALLOCATE (mepos_from_RI_index)
643
644      DEALLOCATE (entry_counter, req_array)
645
646      CALL timestop(handle)
647
648   END SUBROUTINE
649
650END MODULE bse
651