1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2019  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief All the kernel specific subroutines for XAS TDP calculations
8!> \author A. Bussy (03.2019)
9! **************************************************************************************************
10
11MODULE xas_tdp_kernel
12   USE basis_set_types,                 ONLY: get_gto_basis_set,&
13                                              gto_basis_set_type
14   USE cp_array_utils,                  ONLY: cp_2d_r_p_type
15   USE cp_dbcsr_operations,             ONLY: cp_dbcsr_dist2d_to_dist,&
16                                              dbcsr_deallocate_matrix_set
17   USE cp_para_types,                   ONLY: cp_para_env_type
18   USE dbcsr_api,                       ONLY: &
19        dbcsr_add, dbcsr_complete_redistribute, dbcsr_copy, dbcsr_create, dbcsr_desymmetrize, &
20        dbcsr_distribution_release, dbcsr_distribution_type, dbcsr_filter, dbcsr_finalize, &
21        dbcsr_get_block_p, dbcsr_get_info, dbcsr_get_stored_coordinates, &
22        dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, dbcsr_iterator_start, &
23        dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, dbcsr_p_type, dbcsr_put_block, &
24        dbcsr_release, dbcsr_reserve_block2d, dbcsr_set, dbcsr_transposed, dbcsr_type, &
25        dbcsr_type_symmetric
26   USE distribution_2d_types,           ONLY: distribution_2d_type
27   USE kinds,                           ONLY: dp
28   USE message_passing,                 ONLY: mp_bcast,&
29                                              mp_irecv,&
30                                              mp_isend,&
31                                              mp_waitall
32   USE qs_environment_types,            ONLY: get_qs_env,&
33                                              qs_environment_type
34   USE qs_kind_types,                   ONLY: get_qs_kind,&
35                                              qs_kind_type
36   USE qs_o3c_methods,                  ONLY: contract3_o3c
37   USE qs_o3c_types,                    ONLY: &
38        get_o3c_container, get_o3c_iterator_info, get_o3c_vec, o3c_container_type, o3c_iterate, &
39        o3c_iterator_create, o3c_iterator_release, o3c_iterator_type, o3c_vec_create, &
40        o3c_vec_release, o3c_vec_type
41   USE util,                            ONLY: get_limit
42   USE xas_tdp_types,                   ONLY: donor_state_type,&
43                                              xas_tdp_control_type,&
44                                              xas_tdp_env_type
45
46!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num
47#include "./base/base_uses.f90"
48
49   IMPLICIT NONE
50   PRIVATE
51
52   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'xas_tdp_kernel'
53
54   PUBLIC :: kernel_coulomb_xc, kernel_exchange
55
56CONTAINS
57
58! **************************************************************************************************
59!> \brief Computes, if asked for it, the Coulomb and XC kernel matrices, in the usuall matrix format
60!> \param coul_ker pointer the the Coulomb kernel matrix (can be void pointer)
61!> \param xc_ker array of pointer to the different xc kernels (5 of them):
62!>               1) the restricted closed-shell singlet kernel
63!>               2) the restricted closed-shell triplet kernel
64!>               3) the spin-conserving open-shell xc kernel
65!>               4) the on-diagonal spin-flip open-shell xc kernel
66!> \param donor_state ...
67!> \param xas_tdp_env ...
68!> \param xas_tdp_control ...
69!> \param qs_env ...
70!> \note Coulomb and xc kernel are put together in the same routine because they use the same RI
71!>       Coulomb: (aI|Jb) = (aI|P) (P|Q)^-1 (Q|Jb)
72!>       XC : (aI|fxc|Jb) = (aI|P) (P|Q)^-1 (Q|fxc|R) (R|S)^-1 (S|Jb)
73!>       In the above formula, a,b label the sgfs
74!>       The routine analyses the xas_tdp_control to know which kernel must be computed and how
75!>       (open-shell, singlet, triplet, ROKS, LSD, etc...)
76!>       On entry, the pointers should be allocated
77! **************************************************************************************************
78   SUBROUTINE kernel_coulomb_xc(coul_ker, xc_ker, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
79
80      TYPE(dbcsr_type), INTENT(INOUT)                    :: coul_ker
81      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: xc_ker
82      TYPE(donor_state_type), POINTER                    :: donor_state
83      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
84      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
85      TYPE(qs_environment_type), POINTER                 :: qs_env
86
87      CHARACTER(len=*), PARAMETER :: routineN = 'kernel_coulomb_xc', &
88         routineP = moduleN//':'//routineN
89
90      INTEGER                                            :: bo(2), handle, i, ibatch, iex, lb, &
91                                                            natom, nbatch, ndo_mo, ndo_so, &
92                                                            nex_atom, nsgfp, ri_atom, source, ub
93      INTEGER, DIMENSION(:), POINTER                     :: blk_size
94      LOGICAL                                            :: do_coulomb, do_sc, do_sf, do_sg, do_tp, &
95                                                            do_xc, found
96      REAL(dp), DIMENSION(:, :), POINTER                 :: PQ
97      TYPE(cp_para_env_type), POINTER                    :: para_env
98      TYPE(dbcsr_distribution_type), POINTER             :: dist
99      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int
100
101      NULLIFY (contr1_int, PQ, para_env, dist, blk_size)
102
103!  Initialization
104      ndo_mo = donor_state%ndo_mo
105      do_xc = xas_tdp_control%do_xc
106      do_sg = xas_tdp_control%do_singlet
107      do_tp = xas_tdp_control%do_triplet
108      do_sc = xas_tdp_control%do_spin_cons
109      do_sf = xas_tdp_control%do_spin_flip
110      ndo_so = ndo_mo; IF (xas_tdp_control%do_uks) ndo_so = 2*ndo_mo
111      ri_atom = donor_state%at_index
112      CALL get_qs_env(qs_env, natom=natom, para_env=para_env)
113      do_coulomb = xas_tdp_control%do_coulomb
114      dist => donor_state%dbcsr_dist
115      blk_size => donor_state%blk_size
116
117!  If no Coulomb nor xc, simply exit
118      IF ((.NOT. do_coulomb) .AND. (.NOT. do_xc)) RETURN
119
120      CALL timeset(routineN, handle)
121
122!  Contract the RI 3-center integrals once to get (aI|P)
123      CALL contract_o3c_int(contr1_int, "COULOMB", donor_state, xas_tdp_env, xas_tdp_control, qs_env)
124
125!  Deal with the Coulomb case
126      IF (do_coulomb) CALL coulomb(coul_ker, contr1_int, dist, blk_size, xas_tdp_env, &
127                                   xas_tdp_control, qs_env)
128
129!  Deal with the XC case
130      IF (do_xc) THEN
131
132         ! In the end, we compute: (aI|fxc|Jb) = (aI|P) (P|Q)^-1 (Q|fxc|R) (R|S)^-1 (S|Jb)
133         ! where fxc can take different spin contributions.
134
135         ! Precompute the product (aI|P) * (P|Q)^-1 and store it in contr1_int
136         PQ => xas_tdp_env%ri_inv_coul
137         CALL ri_all_blocks_mm(contr1_int, PQ)
138
139         ! If not already done (e.g. when multpile donor states for a given excited atom), broadcast
140         ! the RI matrix (Q|fxc|R) on all procs
141         IF (.NOT. xas_tdp_env%fxc_avail) THEN
142            ! Find on which processor the integrals (Q|fxc|R) for this atom are stored
143            nsgfp = SIZE(PQ, 1)
144            CALL get_qs_env(qs_env, para_env=para_env)
145            found = .FALSE.
146            nbatch = para_env%num_pe/xas_tdp_control%batch_size
147            IF (nbatch*xas_tdp_control%batch_size .NE. para_env%num_pe) nbatch = nbatch + 1
148            nex_atom = SIZE(xas_tdp_env%ex_atom_indices)
149
150            DO ibatch = 0, nbatch - 1
151
152               bo = get_limit(nex_atom, nbatch, ibatch)
153               DO iex = bo(1), bo(2)
154
155                  IF (xas_tdp_env%ex_atom_indices(iex) == ri_atom) THEN
156                     source = ibatch*xas_tdp_control%batch_size !fxc is on all procs of the batch,
157                     found = .TRUE. !but simply take the first
158                     EXIT
159                  END IF
160               END DO !iex
161               IF (found) EXIT
162            END DO !ip
163
164            ! Broadcast the integrals to all procs (deleted after all donor states for this atoms are treated)
165            lb = 1; IF (do_sf .AND. .NOT. do_sc) lb = 4
166            ub = 2; IF (do_sc) ub = 3; IF (do_sf) ub = 4
167            DO i = lb, ub
168               IF (.NOT. ASSOCIATED(xas_tdp_env%ri_fxc(ri_atom, i)%array)) THEN
169                  ALLOCATE (xas_tdp_env%ri_fxc(ri_atom, i)%array(nsgfp, nsgfp))
170               END IF
171               CALL mp_bcast(xas_tdp_env%ri_fxc(ri_atom, i)%array, source, para_env%group)
172            END DO
173
174            xas_tdp_env%fxc_avail = .TRUE.
175         END IF
176
177         ! Case study on the calculation type
178         IF (do_sg .OR. do_tp) THEN
179            CALL rcs_xc(xc_ker(1)%matrix, xc_ker(2)%matrix, contr1_int, dist, blk_size, &
180                        donor_state, xas_tdp_env, xas_tdp_control, qs_env)
181         END IF
182
183         IF (do_sc) THEN
184            CALL sc_os_xc(xc_ker(3)%matrix, contr1_int, dist, blk_size, donor_state, &
185                          xas_tdp_env, xas_tdp_control, qs_env)
186         END IF
187
188         IF (do_sf) THEN
189            CALL ondiag_sf_os_xc(xc_ker(4)%matrix, contr1_int, dist, blk_size, donor_state, &
190                                 xas_tdp_env, xas_tdp_control, qs_env)
191         END IF
192
193      END IF ! do_xc
194
195!  Clean-up
196      CALL dbcsr_deallocate_matrix_set(contr1_int)
197
198      CALL timestop(handle)
199
200   END SUBROUTINE kernel_coulomb_xc
201
202! **************************************************************************************************
203!> \brief Create the matrix containing the Coulomb kernel, which is:
204!>        (aI_sigma|J_tau b) ~= (aI_sigma|P) * (P|Q) * (Q|J_tau b)
205!> \param coul_ker the Coulomb kernel
206!> \param contr1_int the once contracted RI integrals (aI|P)
207!> \param dist the inherited dbcsr ditribution
208!> \param blk_size the inherited block sizes
209!> \param xas_tdp_env ...
210!> \param xas_tdp_control ...
211!> \param qs_env ...
212! **************************************************************************************************
213   SUBROUTINE coulomb(coul_ker, contr1_int, dist, blk_size, xas_tdp_env, xas_tdp_control, qs_env)
214
215      TYPE(dbcsr_type), INTENT(INOUT)                    :: coul_ker
216      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int
217      TYPE(dbcsr_distribution_type), POINTER             :: dist
218      INTEGER, DIMENSION(:), POINTER                     :: blk_size
219      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
220      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
221      TYPE(qs_environment_type), POINTER                 :: qs_env
222
223      CHARACTER(len=*), PARAMETER :: routineN = 'coulomb', routineP = moduleN//':'//routineN
224
225      LOGICAL                                            :: quadrants(3)
226      REAL(dp), DIMENSION(:, :), POINTER                 :: PQ
227      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int
228      TYPE(dbcsr_type)                                   :: work_mat
229
230      NULLIFY (PQ, rhs_int, lhs_int)
231
232      ! Get the inver RI coulomb
233      PQ => xas_tdp_env%ri_inv_coul
234
235      ! Create a normal type work matrix
236      CALL dbcsr_create(work_mat, name="WORK", matrix_type="N", dist=dist, &
237                        row_blk_size=blk_size, col_blk_size=blk_size)
238
239      ! Compute the product (aI|P) * (P|Q)^-1 * (Q|Jb) = (aI|Jb)
240      rhs_int => contr1_int ! the incoming contr1_int is not modified
241      ALLOCATE (lhs_int(SIZE(contr1_int)))
242      CALL copy_ri_contr_int(lhs_int, rhs_int) ! RHS containts (Q|JB)^T
243      CALL ri_all_blocks_mm(lhs_int, PQ) ! LHS contatins (aI|P)*(P|Q)^-1
244
245      !In the special case of ROKS, same MOs for each spin => put same (aI|Jb) product on the
246      !alpha-alpha, alpha-beta and beta-beta quadrants of the kernel matrix.
247      IF (xas_tdp_control%do_roks) THEN
248         quadrants = [.TRUE., .TRUE., .TRUE.]
249      ELSE
250         quadrants = [.TRUE., .FALSE., .FALSE.]
251      END IF
252      CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
253                          eps_filter=xas_tdp_control%eps_filter)
254      CALL dbcsr_finalize(work_mat)
255
256      !Create the symmetric kernel matrix and redistribute work_mat into it
257      CALL dbcsr_create(coul_ker, name="COULOMB KERNEL", matrix_type="S", dist=dist, &
258                        row_blk_size=blk_size, col_blk_size=blk_size)
259      CALL dbcsr_complete_redistribute(work_mat, coul_ker)
260
261      !clean-up
262      CALL dbcsr_release(work_mat)
263      CALL dbcsr_deallocate_matrix_set(lhs_int)
264
265   END SUBROUTINE coulomb
266
267! **************************************************************************************************
268!> \brief Create the matrix containing the XC kenrel in the spin-conserving open-shell case:
269!>        (aI_sigma|fxc|J_tau b) ~= (aI_sigma|P) (P|Q)^-1 (Q|fxc|R) (R|S)^-1 (S|J_tau b)
270!> \param xc_ker the kernel matrix
271!> \param contr1_int_PQ the once contracted RI integrals, with inverse coulomb: (aI_sigma|P) (P|Q)^-1
272!> \param dist inherited dbcsr dist
273!> \param blk_size inherited block sizes
274!> \param donor_state ...
275!> \param xas_tdp_env ...
276!> \param xas_tdp_control ...
277!> \param qs_env ...
278!> note Prior to calling this function, the (Q|fxc|R) integral must be brodcasted to all procs
279! **************************************************************************************************
280   SUBROUTINE sc_os_xc(xc_ker, contr1_int_PQ, dist, blk_size, donor_state, xas_tdp_env, &
281                       xas_tdp_control, qs_env)
282
283      TYPE(dbcsr_type), INTENT(INOUT)                    :: xc_ker
284      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int_PQ
285      TYPE(dbcsr_distribution_type), POINTER             :: dist
286      INTEGER, DIMENSION(:), POINTER                     :: blk_size
287      TYPE(donor_state_type), POINTER                    :: donor_state
288      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
289      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
290      TYPE(qs_environment_type), POINTER                 :: qs_env
291
292      CHARACTER(len=*), PARAMETER :: routineN = 'sc_os_xc', routineP = moduleN//':'//routineN
293
294      INTEGER                                            :: ndo_mo, ri_atom
295      LOGICAL                                            :: quadrants(3)
296      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int
297      TYPE(dbcsr_type)                                   :: work_mat
298
299      NULLIFY (lhs_int, rhs_int)
300
301      ! Initialization
302      ndo_mo = donor_state%ndo_mo
303      ri_atom = donor_state%at_index
304      !normal type work matrix such that distribution of all spin quadrants match
305      CALL dbcsr_create(work_mat, name="WORK", matrix_type="N", dist=dist, &
306                        row_blk_size=blk_size, col_blk_size=blk_size)
307
308      rhs_int => contr1_int_PQ ! contains [ (aI|P)*(P|Q)^-1 ]^T
309      ALLOCATE (lhs_int(SIZE(contr1_int_PQ))) ! will contain (aI|P)*(P|Q)^-1 * (Q|fxc|R)
310
311      ! Case study: UKS or ROKS ?
312      IF (xas_tdp_control%do_uks) THEN
313
314         ! In the case of UKS, donor MOs might be different for different spins. Moreover, the
315         ! fxc itself might change since fxc = fxc_sigma,tau
316         ! => Carfully treat each spin-quadrant separately
317
318         ! alpha-alpha spin quadrant (upper-lefet)
319         quadrants = [.TRUE., .FALSE., .FALSE.]
320
321         ! Copy the alpha part into lhs_int, multiply by the alpha-alpha (Q|fxc|R) and then
322         ! by the alpha part of rhs_int
323         CALL copy_ri_contr_int(lhs_int(1:ndo_mo), rhs_int(1:ndo_mo))
324         CALL ri_all_blocks_mm(lhs_int(1:ndo_mo), xas_tdp_env%ri_fxc(ri_atom, 1)%array)
325         CALL ri_int_product(work_mat, lhs_int(1:ndo_mo), rhs_int(1:ndo_mo), quadrants, qs_env, &
326                             eps_filter=xas_tdp_control%eps_filter)
327
328         ! alpha-beta spin quadrant (upper-right)
329         quadrants = [.FALSE., .TRUE., .FALSE.]
330
331         !Copy the alpha part into LHS, multiply by the alpha-beta kernel and the beta part of RHS
332         CALL copy_ri_contr_int(lhs_int(1:ndo_mo), rhs_int(1:ndo_mo))
333         CALL ri_all_blocks_mm(lhs_int(1:ndo_mo), xas_tdp_env%ri_fxc(ri_atom, 2)%array)
334         CALL ri_int_product(work_mat, lhs_int(1:ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo), &
335                             quadrants, qs_env, eps_filter=xas_tdp_control%eps_filter)
336
337         ! beta-beta spin quadrant (lower-right)
338         quadrants = [.FALSE., .FALSE., .TRUE.]
339
340         !Copy the beta part into LHS, multiply by the beta-beta kernel and the beta part of RHS
341         CALL copy_ri_contr_int(lhs_int(ndo_mo + 1:2*ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo))
342         CALL ri_all_blocks_mm(lhs_int(ndo_mo + 1:2*ndo_mo), xas_tdp_env%ri_fxc(ri_atom, 3)%array)
343         CALL ri_int_product(work_mat, lhs_int(ndo_mo + 1:2*ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo), &
344                             quadrants, qs_env, eps_filter=xas_tdp_control%eps_filter)
345
346      ELSE IF (xas_tdp_control%do_roks) THEN
347
348         ! In the case of ROKS, fxc = fxc_sigma,tau is different for each spin quadrant, but the
349         ! donor MOs remain the same
350
351         ! alpha-alpha kernel in the upper left quadrant
352         quadrants = [.TRUE., .FALSE., .FALSE.]
353
354         !Copy the LHS and multiply by alpha-alpha kernel
355         CALL copy_ri_contr_int(lhs_int, rhs_int)
356         CALL ri_all_blocks_mm(lhs_int, xas_tdp_env%ri_fxc(ri_atom, 1)%array)
357         CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
358                             eps_filter=xas_tdp_control%eps_filter)
359
360         ! alpha-beta kernel in the upper-right quadrant
361         quadrants = [.FALSE., .TRUE., .FALSE.]
362
363         !Copy LHS and multiply by the alpha-beta kernel
364         CALL copy_ri_contr_int(lhs_int, rhs_int)
365         CALL ri_all_blocks_mm(lhs_int, xas_tdp_env%ri_fxc(ri_atom, 2)%array)
366         CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
367                             eps_filter=xas_tdp_control%eps_filter)
368
369         ! beta-beta kernel in the lower-right quadrant
370         quadrants = [.FALSE., .FALSE., .TRUE.]
371
372         !Copy the LHS and multiply by the beta-beta kernel
373         CALL copy_ri_contr_int(lhs_int, rhs_int)
374         CALL ri_all_blocks_mm(lhs_int, xas_tdp_env%ri_fxc(ri_atom, 3)%array)
375         CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
376                             eps_filter=xas_tdp_control%eps_filter)
377
378      END IF
379      CALL dbcsr_finalize(work_mat)
380
381      ! Create a symmetric kernel matrix and redistribute the normal work matrix into it
382      CALL dbcsr_create(xc_ker, name="SC OS XC KERNEL", matrix_type="S", dist=dist, &
383                        row_blk_size=blk_size, col_blk_size=blk_size)
384      CALL dbcsr_complete_redistribute(work_mat, xc_ker)
385
386      !clean-up
387      CALL dbcsr_deallocate_matrix_set(lhs_int)
388      CALL dbcsr_release(work_mat)
389
390   END SUBROUTINE sc_os_xc
391
392! **************************************************************************************************
393!> \brief Create the matrix containing the on-diagonal spin-flip XC kernel (open-shell), which is:
394!>        (a I_sigma|fxc|J_tau b) * delta_sigma,tau, fxc = 1/(rhoa-rhob) * (dE/drhoa - dE/drhob)
395!>        with RI: (a I_sigma|fxc|J_tau b) ~= (aI_sigma|P) (P|Q)^-1 (Q|fxc|R) (R|S)^-1 (S|J_tau b)
396!> \param xc_ker the kernel matrix
397!> \param contr1_int_PQ the once contracted RI integrals with coulomb product: (aI_sigma|P) (P|Q)^-1
398!> \param dist inherited dbcsr dist
399!> \param blk_size inherited block sizes
400!> \param donor_state ...
401!> \param xas_tdp_env ...
402!> \param xas_tdp_control ...
403!> \param qs_env ...
404!> \note  It must be later on multiplied by the spin-swapped Q projector
405!>        Prior to calling this function, the (Q|fxc|R) integral must be brodcasted to all procs
406! **************************************************************************************************
407   SUBROUTINE ondiag_sf_os_xc(xc_ker, contr1_int_PQ, dist, blk_size, donor_state, xas_tdp_env, &
408                              xas_tdp_control, qs_env)
409
410      TYPE(dbcsr_type), INTENT(INOUT)                    :: xc_ker
411      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int_PQ
412      TYPE(dbcsr_distribution_type), POINTER             :: dist
413      INTEGER, DIMENSION(:), POINTER                     :: blk_size
414      TYPE(donor_state_type), POINTER                    :: donor_state
415      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
416      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
417      TYPE(qs_environment_type), POINTER                 :: qs_env
418
419      CHARACTER(len=*), PARAMETER :: routineN = 'ondiag_sf_os_xc', &
420         routineP = moduleN//':'//routineN
421
422      INTEGER                                            :: ndo_mo, ri_atom
423      LOGICAL                                            :: quadrants(3)
424      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int
425      TYPE(dbcsr_type)                                   :: work_mat
426
427      NULLIFY (lhs_int, rhs_int)
428
429      ! Initialization
430      ndo_mo = donor_state%ndo_mo
431      ri_atom = donor_state%at_index
432      !normal type work matrix such that distribution of all spin quadrants match
433      CALL dbcsr_create(work_mat, name="WORK", matrix_type="N", dist=dist, &
434                        row_blk_size=blk_size, col_blk_size=blk_size)
435
436      !Create a lhs_int, in which the whole (aI_sigma|P) (P|Q)^-1 (Q|fxc|R) will be put
437      !because in spin-flip fxc is spin-independant, can take the product once and for all
438      rhs_int => contr1_int_PQ
439      ALLOCATE (lhs_int(SIZE(contr1_int_PQ)))
440      CALL copy_ri_contr_int(lhs_int, rhs_int)
441      CALL ri_all_blocks_mm(lhs_int, xas_tdp_env%ri_fxc(ri_atom, 4)%array)
442
443      ! Case study: UKS or ROKS ?
444      IF (xas_tdp_control%do_uks) THEN
445
446         ! In the case of UKS, donor MOs might be different for different spins
447         ! => Carfully treat each spin-quadrant separately
448         ! NO alpha-beta because of the delta_sigma,tau
449
450         ! alpha-alpha spin quadrant (upper-lefet)
451         quadrants = [.TRUE., .FALSE., .FALSE.]
452         CALL ri_int_product(work_mat, lhs_int(1:ndo_mo), rhs_int(1:ndo_mo), quadrants, qs_env, &
453                             eps_filter=xas_tdp_control%eps_filter)
454
455         ! beta-beta spin quadrant (lower-right)
456         quadrants = [.FALSE., .FALSE., .TRUE.]
457         CALL ri_int_product(work_mat, lhs_int(ndo_mo + 1:2*ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo), &
458                             quadrants, qs_env, eps_filter=xas_tdp_control%eps_filter)
459
460      ELSE IF (xas_tdp_control%do_roks) THEN
461
462         ! In the case of ROKS, same donor MOs for both spins => can do it all at once
463         ! But NOT the alpha-beta quadrant because of delta_sigma,tau
464
465         quadrants = [.TRUE., .FALSE., .TRUE.]
466         CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
467                             eps_filter=xas_tdp_control%eps_filter)
468
469      END IF
470      CALL dbcsr_finalize(work_mat)
471
472      ! Create a symmetric kernel matrix and redistribute the normal work matrix into it
473      CALL dbcsr_create(xc_ker, name="ON-DIAG SF OS XC KERNEL", matrix_type="S", dist=dist, &
474                        row_blk_size=blk_size, col_blk_size=blk_size)
475      CALL dbcsr_complete_redistribute(work_mat, xc_ker)
476
477      !clean-up
478      CALL dbcsr_deallocate_matrix_set(lhs_int)
479      CALL dbcsr_release(work_mat)
480
481   END SUBROUTINE ondiag_sf_os_xc
482
483! **************************************************************************************************
484!> \brief Create the matrix containing the XC kernel in the restricted closed-shell case, for
485!>        singlets: (aI|fxc|Jb) = (aI|P) (P|Q)^-1 (Q|fxc_alp,alp+fxc_alp,bet|R) (R|S)^-1 (S|Jb)
486!>        triplets: (aI|fxc|Jb) = (aI|P) (P|Q)^-1 (Q|fxc_alp,alp-fxc_alp,bet|R) (R|S)^-1 (S|Jb)
487!> \param sg_xc_ker the singlet kernel matrix
488!> \param tp_xc_ker the triplet kernel matrix
489!> \param contr1_int_PQ the once contracted RI integrals including inverse 2-center Coulomb prodcut:
490!>                      (aI|P)*(P|Q)^-1
491!> \param dist inherited dbcsr dist
492!> \param blk_size inherited block sizes
493!> \param donor_state ...
494!> \param xas_tdp_env ...
495!> \param xas_tdp_control ...
496!> \param qs_env ...
497! **************************************************************************************************
498   SUBROUTINE rcs_xc(sg_xc_ker, tp_xc_ker, contr1_int_PQ, dist, blk_size, donor_state, &
499                     xas_tdp_env, xas_tdp_control, qs_env)
500
501      TYPE(dbcsr_type), INTENT(INOUT)                    :: sg_xc_ker, tp_xc_ker
502      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int_PQ
503      TYPE(dbcsr_distribution_type), POINTER             :: dist
504      INTEGER, DIMENSION(:), POINTER                     :: blk_size
505      TYPE(donor_state_type), POINTER                    :: donor_state
506      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
507      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
508      TYPE(qs_environment_type), POINTER                 :: qs_env
509
510      CHARACTER(len=*), PARAMETER :: routineN = 'rcs_xc', routineP = moduleN//':'//routineN
511
512      INTEGER                                            :: nsgfp, ri_atom
513      LOGICAL                                            :: quadrants(3)
514      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: fxc
515      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int
516      TYPE(dbcsr_type)                                   :: work_mat
517
518      NULLIFY (lhs_int, rhs_int)
519
520      ! Initialization
521      ri_atom = donor_state%at_index
522      nsgfp = SIZE(xas_tdp_env%ri_fxc(ri_atom, 1)%array, 1)
523      rhs_int => contr1_int_PQ ! RHS contains [ (aI|P)*(P|Q)^-1 ]^T
524      ALLOCATE (lhs_int(SIZE(contr1_int_PQ))) ! LHS will contatin (aI|P)*(P|Q)^-1 * (Q|fxc|R)
525
526      ! Work structures
527      ALLOCATE (fxc(nsgfp, nsgfp))
528      CALL dbcsr_create(work_mat, name="WORK", matrix_type="N", dist=dist, &
529                        row_blk_size=blk_size, col_blk_size=blk_size)
530
531      ! Case study: singlet and/or triplet ?
532      IF (xas_tdp_control%do_singlet) THEN
533
534         ! Take the sum of fxc for alpha-alpha and alpha-beta
535         CALL dcopy(nsgfp*nsgfp, xas_tdp_env%ri_fxc(ri_atom, 1)%array, 1, fxc, 1)
536         CALL daxpy(nsgfp*nsgfp, 1.0_dp, xas_tdp_env%ri_fxc(ri_atom, 2)%array, 1, fxc, 1)
537
538         ! Copy the fresh lhs_int = (aI|P) (P|Q)^-1 and multiply by (Q|fxc|R)
539         CALL copy_ri_contr_int(lhs_int, rhs_int)
540         CALL ri_all_blocks_mm(lhs_int, fxc)
541
542         ! Compute the final LHS RHS product => spin-restricted, only upper-left quadrant
543         quadrants = [.TRUE., .FALSE., .FALSE.]
544         CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
545                             eps_filter=xas_tdp_control%eps_filter)
546         CALL dbcsr_finalize(work_mat)
547
548         !Create the symmetric kernel matrix and redistribute work_mat into it
549         CALL dbcsr_create(sg_xc_ker, name="XC SINGLET KERNEL", matrix_type="S", dist=dist, &
550                           row_blk_size=blk_size, col_blk_size=blk_size)
551         CALL dbcsr_complete_redistribute(work_mat, sg_xc_ker)
552
553      END IF
554
555      IF (xas_tdp_control%do_triplet) THEN
556
557         ! Take the difference of fxc for alpha-alpha and alpha-beta
558         CALL dcopy(nsgfp*nsgfp, xas_tdp_env%ri_fxc(ri_atom, 1)%array, 1, fxc, 1)
559         CALL daxpy(nsgfp*nsgfp, -1.0_dp, xas_tdp_env%ri_fxc(ri_atom, 2)%array, 1, fxc, 1)
560
561         ! Copy the fresh lhs_int = (aI|P) (P|Q)^-1 and multiply by (Q|fxc|R)
562         CALL copy_ri_contr_int(lhs_int, rhs_int)
563         CALL ri_all_blocks_mm(lhs_int, fxc)
564
565         ! Compute the final LHS RHS product => spin-restricted, only upper-left quadrant
566         quadrants = [.TRUE., .FALSE., .FALSE.]
567         CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
568                             eps_filter=xas_tdp_control%eps_filter)
569         CALL dbcsr_finalize(work_mat)
570
571         !Create the symmetric kernel matrix and redistribute work_mat into it
572         CALL dbcsr_create(tp_xc_ker, name="XC TRIPLET KERNEL", matrix_type="S", dist=dist, &
573                           row_blk_size=blk_size, col_blk_size=blk_size)
574         CALL dbcsr_complete_redistribute(work_mat, tp_xc_ker)
575
576      END IF
577
578      ! clean-up
579      CALL dbcsr_deallocate_matrix_set(lhs_int)
580      CALL dbcsr_release(work_mat)
581      DEALLOCATE (fxc)
582
583   END SUBROUTINE rcs_xc
584
585! **************************************************************************************************
586!> \brief Computes the exact exchange kernel matrix using RI. Returns an array of 2 matrices,
587!>        which are:
588!>        1) the on-diagonal kernel: (ab|I_sigma J_tau) * delta_sigma,tau
589!>        2) the off-diagonal spin-conserving kernel: (aJ_sigma|I_tau b) * delta_sigma,tau
590!>        An internal analysis determines which of the above are computed (can range from 0 to 2),
591!> \param ex_ker ...
592!> \param donor_state ...
593!> \param xas_tdp_env ...
594!> \param xas_tdp_control ...
595!> \param qs_env ...
596!> \note In the case of spin-conserving excitation, the kernel must later be multiplied by the
597!>       usual Q projector. In the case of spin-flip, one needs to project the excitations coming
598!>       from alpha donor MOs on the unoccupied beta MOs. This is done by multiplying by a Q
599!>       projector where the alpha-alpha and beta-beta quadrants are swapped
600!>       The ex_ker array should be allocated on entry (not the internals)
601! **************************************************************************************************
602   SUBROUTINE kernel_exchange(ex_ker, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
603
604      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: ex_ker
605      TYPE(donor_state_type), POINTER                    :: donor_state
606      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
607      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
608      TYPE(qs_environment_type), POINTER                 :: qs_env
609
610      CHARACTER(len=*), PARAMETER :: routineN = 'kernel_exchange', &
611         routineP = moduleN//':'//routineN
612
613      INTEGER                                            :: handle
614      INTEGER, DIMENSION(:), POINTER                     :: blk_size
615      LOGICAL                                            :: do_off_sc
616      TYPE(dbcsr_distribution_type), POINTER             :: dist
617      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int
618
619      NULLIFY (contr1_int, dist, blk_size)
620
621      !Don't do anything if no hfx
622      IF (.NOT. xas_tdp_control%do_hfx) RETURN
623
624      CALL timeset(routineN, handle)
625
626      dist => donor_state%dbcsr_dist
627      blk_size => donor_state%blk_size
628
629      !compute the off-diag spin-conserving only if not TDA and anything that is spin-conserving
630      do_off_sc = (.NOT. xas_tdp_control%tamm_dancoff) .AND. &
631                  (xas_tdp_control%do_spin_cons .OR. xas_tdp_control%do_singlet .OR. xas_tdp_control%do_triplet)
632
633      ! Need the once contracted integrals (aI|P)
634      CALL contract_o3c_int(contr1_int, "EXCHANGE", donor_state, xas_tdp_env, xas_tdp_control, qs_env)
635
636!  The on-diagonal exchange : (ab|P) * (P|Q)^-1 * (Q|I_sigma J_tau) * delta_sigma,tau
637      CALL ondiag_ex(ex_ker(1)%matrix, contr1_int, dist, blk_size, donor_state, xas_tdp_env, &
638                     xas_tdp_control, qs_env)
639
640!  The off-diag spin-conserving case: (aJ_sigma|P) * (P|Q)^-1 * (Q|I_tau b) * delta_sigma,tau
641      IF (do_off_sc) THEN
642         CALL offdiag_ex_sc(ex_ker(2)%matrix, contr1_int, dist, blk_size, donor_state, &
643                            xas_tdp_env, xas_tdp_control, qs_env)
644      END IF
645
646      !clean-up
647      CALL dbcsr_deallocate_matrix_set(contr1_int)
648
649      CALL timestop(handle)
650
651   END SUBROUTINE kernel_exchange
652
653! **************************************************************************************************
654!> \brief Create the matrix containing the on-diagonal exact exchange kernel, which is:
655!>        (ab|I_sigma J_tau) * delta_sigma,tau, where a,b are AOs, I_sigma and J_tau are the donor
656!>        spin-orbitals. A RI is done: (ab|I_sigma J_tau) = (ab|P) * (P|Q)^-1 * (Q|I_sigma J_tau)
657!> \param ondiag_ex_ker the on-diagonal exchange kernel in dbcsr format
658!> \param contr1_int the already once-contracted RI 3-center integrals (aI_sigma|P)
659!>        where each matrix of the array contains the contraction for the donor spin-orbital I_sigma
660!> \param dist the inherited dbcsr distribution
661!> \param blk_size the inherited dbcsr block sizes
662!> \param donor_state ...
663!> \param xas_tdp_env ...
664!> \param xas_tdp_control ...
665!> \param qs_env ...
666!> \note In the presence of a RI metric, we have instead M^-1 * (P|Q) * M^-1
667! **************************************************************************************************
668   SUBROUTINE ondiag_ex(ondiag_ex_ker, contr1_int, dist, blk_size, donor_state, xas_tdp_env, &
669                        xas_tdp_control, qs_env)
670
671      TYPE(dbcsr_type), INTENT(INOUT)                    :: ondiag_ex_ker
672      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int
673      TYPE(dbcsr_distribution_type), POINTER             :: dist
674      INTEGER, DIMENSION(:), POINTER                     :: blk_size
675      TYPE(donor_state_type), POINTER                    :: donor_state
676      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
677      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
678      TYPE(qs_environment_type), POINTER                 :: qs_env
679
680      CHARACTER(len=*), PARAMETER :: routineN = 'ondiag_ex', routineP = moduleN//':'//routineN
681
682      INTEGER                                            :: blk, iblk, iso, jblk, jso, nblk, ndo_mo, &
683                                                            ndo_so, nsgfa, nsgfp, ri_atom, source
684      INTEGER, DIMENSION(:), POINTER                     :: ri_blk_size, vec_blk_size
685      LOGICAL                                            :: do_roks, do_uks, found
686      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: coeffs, ri_coeffs
687      REAL(dp), DIMENSION(:), POINTER                    :: v
688      REAL(dp), DIMENSION(:, :), POINTER                 :: aIQ, pblock, PQ
689      TYPE(cp_para_env_type), POINTER                    :: para_env
690      TYPE(dbcsr_distribution_type)                      :: opt_dbcsr_dist
691      TYPE(dbcsr_iterator_type)                          :: iter
692      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
693      TYPE(dbcsr_type)                                   :: abIJ_desymm, abIJ_desymm_std_dist, &
694                                                            contr1_int_t, work_mat
695      TYPE(dbcsr_type), POINTER                          :: abIJ
696      TYPE(o3c_vec_type), DIMENSION(:), POINTER          :: o3c_vec
697
698      NULLIFY (para_env, matrix_s, ri_blk_size, o3c_vec, v)
699      NULLIFY (vec_blk_size, pblock, abIJ, PQ, aIQ)
700
701      ! We want to compute (ab|I_sigma J_tau) = (ab|P) * (P|Q)^-1 * (Q|I_sigma J_tau)
702      ! Already have (cJ_tau|P)  stored in contr1_int. Need to further contract the
703      ! AOs c with the coeff of the I_alpha spin-orbital.
704
705      ! Initialization
706      ndo_mo = donor_state%ndo_mo
707      ri_atom = donor_state%at_index
708      do_roks = xas_tdp_control%do_roks
709      do_uks = xas_tdp_control%do_uks
710      ndo_so = ndo_mo; IF (do_uks) ndo_so = 2*ndo_mo !if not UKS, same donor MOs for both spins
711      PQ => xas_tdp_env%ri_inv_ex
712
713      CALL get_qs_env(qs_env, para_env=para_env, matrix_s=matrix_s)
714      CALL dbcsr_get_info(contr1_int(1)%matrix, col_blk_size=ri_blk_size)
715      nsgfp = ri_blk_size(ri_atom)
716      nsgfa = SIZE(donor_state%contract_coeffs, 1)
717      ALLOCATE (coeffs(nsgfp, ndo_so), ri_coeffs(nsgfp, ndo_so))
718
719      nblk = SIZE(ri_blk_size)
720      ALLOCATE (o3c_vec(nblk), vec_blk_size(nblk))
721      vec_blk_size = 1; vec_blk_size(ri_atom) = nsgfp
722      CALL o3c_vec_create(o3c_vec, vec_blk_size)
723
724      ! a and b need to overlap for non-zero (ab|IJ) => same block structure as overlap S
725      ! goes into an o3c routine => need compatible distribution_2d
726      CALL cp_dbcsr_dist2d_to_dist(xas_tdp_env%opt_dist2d_ex, opt_dbcsr_dist)
727
728      ALLOCATE (abIJ)
729      CALL dbcsr_create(abIJ, template=matrix_s(1)%matrix, name="(ab|IJ)", dist=opt_dbcsr_dist)
730      CALL dbcsr_complete_redistribute(matrix_s(1)%matrix, abIJ)
731
732      CALL dbcsr_create(abIJ_desymm_std_dist, template=matrix_s(1)%matrix, matrix_type="N")
733
734      CALL dbcsr_create(work_mat, name="WORK", matrix_type="N", dist=dist, &
735                        row_blk_size=blk_size, col_blk_size=blk_size)
736
737      ! Loop over donor spin-orbitals. End matrix is symmetric => span only upper half
738      DO iso = 1, ndo_so
739
740         ! take the (aI|Q) block in contr1_int that has a centered on the excited atom
741         CALL dbcsr_get_stored_coordinates(contr1_int(iso)%matrix, ri_atom, ri_atom, source)
742         IF (para_env%mepos == source) THEN
743            CALL dbcsr_get_block_p(contr1_int(iso)%matrix, ri_atom, ri_atom, aIQ, found)
744         ELSE
745            ALLOCATE (aIQ(nsgfa, nsgfp))
746         END IF
747         CALL mp_bcast(aIQ, source, para_env%group)
748
749         ! get the contraction (Q|IJ) by taking (Q|Ia)*contract_coeffs and put it in coeffs
750         CALL dgemm('T', 'N', nsgfp, ndo_so, nsgfa, 1.0_dp, aIQ, nsgfa, donor_state%contract_coeffs, &
751                    nsgfa, 0.0_dp, coeffs, nsgfp)
752
753         ! take (P|Q)^-1 * (Q|IJ) and put that in ri_coeffs
754         CALL dgemm('N', 'N', nsgfp, ndo_so, nsgfp, 1.0_dp, PQ, nsgfp, coeffs, nsgfp, 0.0_dp, &
755                    ri_coeffs, nsgfp)
756
757         IF (.NOT. para_env%mepos == source) DEALLOCATE (aIQ)
758
759         DO jso = iso, ndo_so
760
761            ! There is no alpha-beta exchange. In case of UKS, iso,jso span all spin-orbitals
762            ! => CYCLE if iso and jso are indexing MOs with different spin (and we have UKS)
763            IF (do_uks .AND. (iso <= ndo_mo .AND. jso > ndo_mo)) CYCLE
764
765            ! compute (ab|IJ) = sum_P (ab|P) * (P|Q)^-1 * (Q|IJ)
766            CALL get_o3c_vec(o3c_vec, ri_atom, v)
767            v(:) = ri_coeffs(:, jso)
768            CALL dbcsr_set(abIJ, 0.0_dp)
769            CALL contract3_o3c(xas_tdp_env%ri_o3c_ex, o3c_vec, abIJ)
770
771            ! (ab|IJ) is symmetric, but need it as normal in the standard dbcsr distribution
772            ! for the block by block copying process
773            CALL dbcsr_desymmetrize(abIJ, abIJ_desymm)
774            CALL dbcsr_filter(abIJ_desymm, 1.0E-12_dp) !if short range exchange, might have 0 blocks
775            CALL dbcsr_complete_redistribute(abIJ_desymm, abIJ_desymm_std_dist)
776
777            CALL dbcsr_iterator_start(iter, abIJ_desymm_std_dist)
778            DO WHILE (dbcsr_iterator_blocks_left(iter))
779
780               CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
781               IF (iso == jso .AND. jblk < iblk) CYCLE
782
783               CALL dbcsr_get_block_p(abIJ_desymm_std_dist, iblk, jblk, pblock, found)
784
785               IF (found) THEN
786                  CALL dbcsr_put_block(work_mat, (iso - 1)*nblk + iblk, (jso - 1)*nblk + jblk, pblock)
787
788                  !In case of ROKS, we have (ab|IJ) for alpha-alpha spin, but it is the same for
789                  !beta-beta => replicate the blocks (alpha-beta is zero)
790                  IF (do_roks) THEN
791                     !the beta-beta block
792                     CALL dbcsr_put_block(work_mat, (ndo_so + iso - 1)*nblk + iblk, (ndo_so + jso - 1)*nblk + jblk, pblock)
793                  END IF
794               END IF
795
796            END DO !iterator
797            CALL dbcsr_iterator_stop(iter)
798            CALL dbcsr_release(abIJ_desymm)
799
800         END DO !jso
801         CALL dbcsr_release(contr1_int_t)
802      END DO !iso
803
804      CALL dbcsr_finalize(work_mat)
805      CALL dbcsr_create(ondiag_ex_ker, name="ONDIAG EX KERNEL", matrix_type="S", dist=dist, &
806                        row_blk_size=blk_size, col_blk_size=blk_size)
807      CALL dbcsr_complete_redistribute(work_mat, ondiag_ex_ker)
808
809      !Clean-up
810      CALL dbcsr_release(work_mat)
811      CALL dbcsr_release(abIJ)
812      CALL o3c_vec_release(o3c_vec)
813      CALL dbcsr_distribution_release(opt_dbcsr_dist)
814      CALL dbcsr_release(abIJ_desymm_std_dist)
815      DEALLOCATE (o3c_vec, vec_blk_size, abIJ)
816
817   END SUBROUTINE ondiag_ex
818
819! **************************************************************************************************
820!> \brief Create the matrix containing the off-diagonal exact exchange kernel in the spin-conserving
821!>        case (which also includes excitations from the closed=shell ref state ) This matrix reads:
822!>        (aJ_sigma|I_tau b) * delta_sigma,tau , where a, b are AOs and J_sigma, I_tau are the donor
823!>        spin-orbital. A RI is done: (aJ_sigma|I_tau b) = (aJ_sigma|P) * (P|Q)^-1 * (Q|I_tau b)
824!> \param offdiag_ex_ker the off-diagonal, spin-conserving exchange kernel in dbcsr format
825!> \param contr1_int the once-contracted RI integrals: (aJ_sigma|P)
826!> \param dist the inherited dbcsr ditribution
827!> \param blk_size the inherited block sizes
828!> \param donor_state ...
829!> \param xas_tdp_env ...
830!> \param xas_tdp_control ...
831!> \param qs_env ...
832! **************************************************************************************************
833   SUBROUTINE offdiag_ex_sc(offdiag_ex_ker, contr1_int, dist, blk_size, donor_state, xas_tdp_env, &
834                            xas_tdp_control, qs_env)
835
836      TYPE(dbcsr_type), INTENT(INOUT)                    :: offdiag_ex_ker
837      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int
838      TYPE(dbcsr_distribution_type), POINTER             :: dist
839      INTEGER, DIMENSION(:), POINTER                     :: blk_size
840      TYPE(donor_state_type), POINTER                    :: donor_state
841      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
842      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
843      TYPE(qs_environment_type), POINTER                 :: qs_env
844
845      CHARACTER(len=*), PARAMETER :: routineN = 'offdiag_ex_sc', routineP = moduleN//':'//routineN
846
847      INTEGER                                            :: ndo_mo
848      LOGICAL                                            :: do_roks, do_uks, quadrants(3)
849      REAL(dp), DIMENSION(:, :), POINTER                 :: PQ
850      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int
851      TYPE(dbcsr_type)                                   :: work_mat
852
853      NULLIFY (PQ, lhs_int, rhs_int)
854
855      !Initialization
856      ndo_mo = donor_state%ndo_mo
857      do_roks = xas_tdp_control%do_roks
858      do_uks = xas_tdp_control%do_uks
859      PQ => xas_tdp_env%ri_inv_ex
860
861      rhs_int => contr1_int
862      ALLOCATE (lhs_int(SIZE(contr1_int)))
863      CALL copy_ri_contr_int(lhs_int, rhs_int)
864      CALL ri_all_blocks_mm(lhs_int, PQ)
865
866      !Given the lhs_int and rhs_int, all we need to do is multiply elements from the former by
867      !the transpose of the later, and put the result in the correct spin quadrants
868
869      !Create a normal type work matrix
870      CALL dbcsr_create(work_mat, name="WORK", matrix_type="N", dist=dist, &
871                        row_blk_size=blk_size, col_blk_size=blk_size)
872
873      !Case study on closed-shell, ROKS or UKS
874      IF (do_roks) THEN
875         !In ROKS, the donor MOs for each spin are the same => copy the product in both the
876         !alpha-alpha and the beta-beta quadrants
877         quadrants = [.TRUE., .FALSE., .TRUE.]
878         CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
879                             eps_filter=xas_tdp_control%eps_filter, mo_transpose=.TRUE.)
880
881      ELSE IF (do_uks) THEN
882         !In UKS, the donor MOs are possibly different for each spin => start with the
883         !alpha-alpha product and the perform the beta-beta product separately
884         quadrants = [.TRUE., .FALSE., .FALSE.]
885         CALL ri_int_product(work_mat, lhs_int(1:ndo_mo), rhs_int(1:ndo_mo), quadrants, &
886                             qs_env, eps_filter=xas_tdp_control%eps_filter, mo_transpose=.TRUE.)
887
888         quadrants = [.FALSE., .FALSE., .TRUE.]
889         CALL ri_int_product(work_mat, lhs_int(ndo_mo + 1:2*ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo), &
890                             quadrants, qs_env, eps_filter=xas_tdp_control%eps_filter, mo_transpose=.TRUE.)
891      ELSE
892         !In the restricted closed-shell case, only have one spin and a single qudarant
893         quadrants = [.TRUE., .FALSE., .FALSE.]
894         CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
895                             eps_filter=xas_tdp_control%eps_filter, mo_transpose=.TRUE.)
896      END IF
897      CALL dbcsr_finalize(work_mat)
898
899      !Create the symmetric kernel matrix and redistribute work_mat into it
900      CALL dbcsr_create(offdiag_ex_ker, name="OFFDIAG EX KERNEL", matrix_type="S", dist=dist, &
901                        row_blk_size=blk_size, col_blk_size=blk_size)
902      CALL dbcsr_complete_redistribute(work_mat, offdiag_ex_ker)
903
904      !clean-up
905      CALL dbcsr_release(work_mat)
906      CALL dbcsr_deallocate_matrix_set(lhs_int)
907
908   END SUBROUTINE offdiag_ex_sc
909
910! **************************************************************************************************
911!> \brief Contract the ri 3-center integrals contained in o3c with repect to the donor MOs coeffs,
912!>        for a given excited atom k => (aI|k) = sum_b c_Ib (ab|k)
913!> \param contr_int the contracted integrals as array of dbcsr matrices
914!> \param op_type for which operator type we contract (COULOMB or EXCHANGE)
915!> \param donor_state ...
916!> \param xas_tdp_env ...
917!> \param xas_tdp_control ...
918!> \param qs_env ...
919!> \note  In the output matrices, (aI_b|k) is stored at block a,b where I_b is the partial
920!>        contraction that only includes coeffs from atom b. Note that the contracted matrix is
921!>        not symmetric, but the blocks (aI_b|P) and (bI_a|P) are strictly identical
922! **************************************************************************************************
923   SUBROUTINE contract_o3c_int(contr_int, op_type, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
924
925      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr_int
926      CHARACTER(len=*), INTENT(IN)                       :: op_type
927      TYPE(donor_state_type), POINTER                    :: donor_state
928      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
929      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
930      TYPE(qs_environment_type), POINTER                 :: qs_env
931
932      CHARACTER(len=*), PARAMETER :: routineN = 'contract_o3c_int', &
933         routineP = moduleN//':'//routineN
934
935      CHARACTER                                          :: my_op
936      INTEGER                                            :: handle, i, imo, ispin, katom, kkind, &
937                                                            natom, ndo_mo, ndo_so, nsgfk, nspins
938      INTEGER, DIMENSION(:), POINTER                     :: ri_blk_size, std_blk_size
939      LOGICAL                                            :: do_uks
940      REAL(dp), DIMENSION(:, :), POINTER                 :: coeffs
941      TYPE(cp_para_env_type), POINTER                    :: para_env
942      TYPE(dbcsr_distribution_type)                      :: opt_dbcsr_dist
943      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrices, matrix_s
944      TYPE(dbcsr_type), POINTER                          :: aI_P, P_Ib, work1, work2, work3
945      TYPE(distribution_2d_type), POINTER                :: opt_dist2d
946      TYPE(gto_basis_set_type), POINTER                  :: ri_basis
947      TYPE(o3c_container_type), POINTER                  :: o3c_coul, o3c_ex
948      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
949
950      NULLIFY (matrix_s, std_blk_size, ri_blk_size, qs_kind_set, ri_basis)
951      NULLIFY (aI_P, P_Ib, work1, work2, matrices, coeffs, opt_dist2d)
952
953      CALL timeset(routineN, handle)
954
955!  Initialization
956      CALL get_qs_env(qs_env, natom=natom, matrix_s=matrix_s, qs_kind_set=qs_kind_set, para_env=para_env)
957      ndo_mo = donor_state%ndo_mo
958      kkind = donor_state%kind_index
959      katom = donor_state%at_index
960      !by default contract for Coulomb
961      my_op = "C"
962      o3c_coul => xas_tdp_env%ri_o3c_coul
963      opt_dist2d => xas_tdp_env%opt_dist2d_coul
964      IF (op_type == "EXCHANGE") THEN
965         my_op = "E"
966         CPASSERT(ASSOCIATED(xas_tdp_env%ri_o3c_ex))
967         o3c_ex => xas_tdp_env%ri_o3c_ex
968         opt_dist2d => xas_tdp_env%opt_dist2d_ex
969      END IF
970      do_uks = xas_tdp_control%do_uks
971      nspins = 1; IF (do_uks) nspins = 2
972      ndo_so = nspins*ndo_mo
973
974!  contracted integrals block sizes
975      CALL dbcsr_get_info(matrix_s(1)%matrix, col_blk_size=std_blk_size)
976      ! getting the block dimensions for the RI basis
977      CALL get_qs_kind(qs_kind_set(kkind), basis_set=ri_basis, basis_type="RI_XAS")
978      CALL get_gto_basis_set(ri_basis, nsgf=nsgfk)
979      ALLOCATE (ri_blk_size(natom))
980      ri_blk_size = nsgfk
981
982!  Create  work matrices. They must have the block distribution of a symmetric matric to enter the
983!  o3c routines. HACK => create a symmetric matrix with non-symmetric block size (aI_P, P_Ib)
984!  Also create their normal matrix conterparts (work1, work2)
985!  Everything that goes into a o3c routine must be compatible with the optimal dist_2d
986      CALL cp_dbcsr_dist2d_to_dist(opt_dist2d, opt_dbcsr_dist)
987
988      ALLOCATE (aI_P, P_Ib, work1, work2, work3, matrices(2))
989      CALL dbcsr_create(aI_P, dist=opt_dbcsr_dist, matrix_type="S", name="(aI|P)", &
990                        row_blk_size=std_blk_size, col_blk_size=ri_blk_size)
991      CALL dbcsr_create(work1, dist=opt_dbcsr_dist, matrix_type="N", name="WORK 1", &
992                        row_blk_size=std_blk_size, col_blk_size=ri_blk_size)
993
994      CALL dbcsr_create(P_Ib, dist=opt_dbcsr_dist, matrix_type="S", name="(P|Ib)", &
995                        row_blk_size=ri_blk_size, col_blk_size=std_blk_size)
996      CALL dbcsr_create(work2, dist=opt_dbcsr_dist, matrix_type="N", name="WORK 2", &
997                        row_blk_size=ri_blk_size, col_blk_size=std_blk_size)
998
999      !reserve the blocks (needed for o3c routines)
1000      matrices(1)%matrix => aI_P; matrices(2)%matrix => P_Ib
1001      CALL reserve_contraction_blocks(matrices, katom, qs_env)
1002
1003      matrices(1)%matrix => work1; matrices(2)%matrix => work2
1004      CALL reserve_contraction_blocks(matrices, katom, qs_env, matrix_type="N")
1005      DEALLOCATE (matrices)
1006
1007      !  Create the contracted integral matrices
1008      ALLOCATE (contr_int(ndo_so))
1009      DO i = 1, ndo_so
1010         ALLOCATE (contr_int(i)%matrix)
1011         CALL dbcsr_create(matrix=contr_int(i)%matrix, template=matrix_s(1)%matrix, matrix_type="N", &
1012                           row_blk_size=std_blk_size, col_blk_size=ri_blk_size)
1013      END DO
1014
1015      ! Only take the coeffs for atom on which MOs I,J are localized
1016      coeffs => donor_state%contract_coeffs
1017
1018      DO ispin = 1, nspins
1019
1020!     Loop over the donor MOs, fill the o3c_vec and contract
1021         DO imo = 1, ndo_mo
1022
1023            ! do the contraction
1024            CALL dbcsr_set(aI_P, 0.0_dp); CALL dbcsr_set(P_Ib, 0.0_dp)
1025            IF (my_op == "C") THEN
1026               CALL contract_o3c_once(o3c_coul, coeffs(:, (ispin - 1)*ndo_mo + imo), aI_P, P_Ib, katom)
1027            ELSE
1028               CALL contract_o3c_once(o3c_ex, coeffs(:, (ispin - 1)*ndo_mo + imo), aI_P, P_Ib, katom)
1029            END IF
1030
1031            ! Get the full "normal" (aI|P) contracted integrals
1032            CALL change_dist(P_Ib, work2, para_env)
1033            CALL dbcsr_transposed(work3, work2)
1034            CALL change_dist(aI_P, work1, para_env)
1035            CALL dbcsr_add(work3, work1, 1.0_dp, 1.0_dp)
1036            CALL dbcsr_complete_redistribute(work3, contr_int((ispin - 1)*ndo_mo + imo)%matrix)
1037
1038            CALL dbcsr_release(work3)
1039
1040         END DO !imo
1041      END DO !ispin
1042
1043!  Clean-up
1044      CALL dbcsr_release(aI_P)
1045      CALL dbcsr_release(P_Ib)
1046      CALL dbcsr_release(work1)
1047      CALL dbcsr_release(work2)
1048      CALL dbcsr_distribution_release(opt_dbcsr_dist)
1049      DEALLOCATE (ri_blk_size, aI_P, P_Ib, work1, work2, work3)
1050
1051      CALL timestop(handle)
1052
1053   END SUBROUTINE contract_o3c_int
1054
1055! **************************************************************************************************
1056!> \brief Home made utility to transfer a matrix from a given proc distribution to another
1057!> \param mat_in ...
1058!> \param mat_out ...
1059!> \param para_env ...
1060!> \note It is assumed both matrices have the same block sizes and structure, and that all blocks
1061!>       are at least reserved
1062! **************************************************************************************************
1063   SUBROUTINE change_dist(mat_in, mat_out, para_env)
1064
1065      TYPE(dbcsr_type), INTENT(INOUT)                    :: mat_in, mat_out
1066      TYPE(cp_para_env_type), POINTER                    :: para_env
1067
1068      CHARACTER(len=*), PARAMETER :: routineN = 'change_dist', routineP = moduleN//':'//routineN
1069
1070      INTEGER                                            :: blk, dest, group, handle, iblk, ir, is, &
1071                                                            jblk, nblk, num_pe, s1, s2, source, tag
1072      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: recv_req, send_req
1073      LOGICAL                                            :: found_in, found_out
1074      REAL(dp), DIMENSION(:, :), POINTER                 :: pbin, pbout
1075      TYPE(cp_2d_r_p_type), ALLOCATABLE, DIMENSION(:)    :: recv_buff, send_buff
1076      TYPE(dbcsr_iterator_type)                          :: iter
1077
1078      NULLIFY (pbin, pbout)
1079
1080      CALL timeset(routineN, handle)
1081
1082      CALL dbcsr_get_info(mat_in, nblkrows_total=nblk)
1083      group = para_env%group
1084      num_pe = para_env%num_pe
1085
1086      !Allocate a pointer array which will point on the block => we won't need to send more than
1087      !nblk**2/num_pe message per processor (assumes blocks well distributed)
1088      ALLOCATE (send_buff(nblk**2/num_pe + 1), recv_buff(nblk**2/num_pe + 1))
1089      ALLOCATE (send_req(nblk**2/num_pe + 1), recv_req(nblk**2/num_pe + 1))
1090      is = 0; ir = 0
1091
1092      !Iterate over input matrix
1093      CALL dbcsr_iterator_start(iter, mat_in)
1094      DO WHILE (dbcsr_iterator_blocks_left(iter))
1095
1096         CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
1097         CALL dbcsr_get_block_p(mat_in, iblk, jblk, pbin, found_in)
1098
1099         IF (found_in) THEN
1100
1101            !Check if the block is on the same proc on mat_out
1102            CALL dbcsr_get_block_p(mat_out, iblk, jblk, pbout, found_out)
1103            IF (found_out) THEN
1104               !Simply copy the block from mat_in to mat_out
1105               s1 = SIZE(pbout, 1); s2 = SIZE(pbout, 2)
1106               CALL dcopy(s1*s2, pbin, 1, pbout, 1)
1107            ELSE
1108               !If block not on the same processor, need to send it
1109               CALL dbcsr_get_stored_coordinates(mat_out, iblk, jblk, dest)
1110               !unique tag
1111               tag = nblk*iblk + jblk
1112               !point on the block such that the buffer is not changed after isend
1113               is = is + 1
1114               send_buff(is)%array => pbin
1115               CALL mp_isend(msgin=send_buff(is)%array, dest=dest, comm=group, request=send_req(is), &
1116                             tag=tag)
1117            END IF
1118
1119         END IF !found_in
1120
1121      END DO !iterator
1122      CALL dbcsr_iterator_stop(iter)
1123
1124      !Iterate over the output matrix
1125      CALL dbcsr_iterator_start(iter, mat_out)
1126      DO WHILE (dbcsr_iterator_blocks_left(iter))
1127
1128         CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
1129         CALL dbcsr_get_block_p(mat_out, iblk, jblk, pbout, found_out)
1130
1131         IF (found_out) THEN
1132            !Check if the block is on the same proc on mat_in
1133            CALL dbcsr_get_block_p(mat_in, iblk, jblk, pbin, found_in)
1134            !If not, need to receiv it
1135            IF (.NOT. found_in) THEN
1136               CALL dbcsr_get_stored_coordinates(mat_in, iblk, jblk, source)
1137               tag = nblk*iblk + jblk
1138               ir = ir + 1
1139               recv_buff(ir)%array => pbout
1140               CALL mp_irecv(msgout=recv_buff(ir)%array, source=source, request=recv_req(ir), &
1141                             tag=tag, comm=group)
1142            END IF
1143         END IF
1144
1145      END DO !iterator
1146      CALL dbcsr_iterator_stop(iter)
1147
1148      !Make sure all communication is over. is, ir are 0 is no communication
1149      CALL mp_waitall(send_req(1:is))
1150      CALL mp_waitall(recv_req(1:ir))
1151
1152      CALL timestop(handle)
1153
1154   END SUBROUTINE change_dist
1155
1156! **************************************************************************************************
1157!> \brief Reserves the blocks in of a dbcsr matrix as needed for RI 3-center contraction (aI|P)
1158!> \param matrices the matrices for which blocks are reserved
1159!> \param ri_atom the index of the atom on which RI is done (= all coeffs of I are there, and P too)
1160!> \param qs_env ...
1161!> \param matrix_type whether the matrices are symmetric or not
1162!> \note Even if the matrix_type is set to "normal", only reserve blocks on the upper-diagonal
1163! **************************************************************************************************
1164   SUBROUTINE reserve_contraction_blocks(matrices, ri_atom, qs_env, matrix_type)
1165
1166      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrices
1167      INTEGER, INTENT(IN)                                :: ri_atom
1168      TYPE(qs_environment_type), POINTER                 :: qs_env
1169      CHARACTER, INTENT(IN), OPTIONAL                    :: matrix_type
1170
1171      CHARACTER(len=*), PARAMETER :: routineN = 'reserve_contraction_blocks', &
1172         routineP = moduleN//':'//routineN
1173
1174      CHARACTER                                          :: my_type
1175      INTEGER                                            :: blk, i, iblk, jblk, n
1176      LOGICAL                                            :: found
1177      REAL(dp), DIMENSION(:, :), POINTER                 :: pblock_m, pblock_s
1178      TYPE(dbcsr_distribution_type)                      :: dist
1179      TYPE(dbcsr_iterator_type)                          :: iter
1180      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
1181      TYPE(dbcsr_type), POINTER                          :: template, work
1182
1183      NULLIFY (matrix_s, pblock_s, pblock_m, template)
1184
1185!  Initialization
1186      CALL get_qs_env(qs_env, matrix_s=matrix_s)
1187      n = SIZE(matrices)
1188      !assume symmetric type
1189      my_type = dbcsr_type_symmetric
1190      IF (PRESENT(matrix_type)) my_type = matrix_type
1191      CALL dbcsr_get_info(matrices(1)%matrix, distribution=dist)
1192
1193!  Need to redistribute matrix_s in the distribution of matrices
1194      ALLOCATE (work)
1195      CALL dbcsr_create(work, template=matrix_s(1)%matrix, dist=dist)
1196      CALL dbcsr_complete_redistribute(matrix_s(1)%matrix, work)
1197
1198!  If non-symmetric, need to desymmetrize matrix as as a template
1199      IF (my_type .NE. dbcsr_type_symmetric) THEN
1200         ALLOCATE (template)
1201         CALL dbcsr_desymmetrize(work, template)
1202      ELSE
1203         template => work
1204      END IF
1205
1206!  Loop over matrix_s as need a,b to overlap
1207      CALL dbcsr_iterator_start(iter, template)
1208      DO WHILE (dbcsr_iterator_blocks_left(iter))
1209
1210         CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
1211         !only have a,b pair if one of them is the ri_atom
1212         IF (iblk .NE. ri_atom .AND. jblk .NE. ri_atom) CYCLE
1213         IF (jblk < iblk) CYCLE
1214
1215         CALL dbcsr_get_block_p(template, iblk, jblk, pblock_s, found)
1216
1217         IF (found) THEN
1218            DO i = 1, n
1219               NULLIFY (pblock_m)
1220               CALL dbcsr_reserve_block2d(matrices(i)%matrix, iblk, jblk, pblock_m)
1221               pblock_m = 0.0_dp
1222            END DO
1223         END IF
1224
1225      END DO !dbcsr iter
1226      CALL dbcsr_iterator_stop(iter)
1227      DO i = 1, n
1228         CALL dbcsr_finalize(matrices(i)%matrix)
1229      END DO
1230
1231!  Clean-up
1232      IF (my_type .NE. dbcsr_type_symmetric) THEN
1233         CALL dbcsr_release(template)
1234         DEALLOCATE (template)
1235      END IF
1236      CALL dbcsr_release(work)
1237      DEALLOCATE (work)
1238
1239   END SUBROUTINE reserve_contraction_blocks
1240
1241! **************************************************************************************************
1242!> \brief Contraction of the 3-center integrals over index 1 and 2, for a given atom_k. The results
1243!>        are stored in two matrices, such that (a,b are block indices):
1244!>        mat_aIb(ab) = mat_aIb(ab) + sum j_b (i_aj_b|k)*v(j_b) and
1245!>        mat_bIa(ba) = mat_bIa(ba) + sum i_a (i_aj_b|k)*v(i_a)
1246!>        The block size of the columns of mat_aIb and the rows of mat_bIa are the size of k
1247!> \param o3c the container for the integrals
1248!> \param vec the contraction coefficients
1249!> \param mat_aIb ...
1250!> \param mat_bIa ...
1251!> \param atom_k the atom for which we contract
1252!> \note To get the full (aI|P) matrix, one has to sum mat_aIb + mat_bIa^T (the latter has empty diag)
1253!>       It is assumed that the contraction coefficients for MO I are all on atom_k
1254! **************************************************************************************************
1255   SUBROUTINE contract_o3c_once(o3c, vec, mat_aIb, mat_bIa, atom_k)
1256      TYPE(o3c_container_type), POINTER                  :: o3c
1257      REAL(dp), DIMENSION(:), INTENT(IN)                 :: vec
1258      TYPE(dbcsr_type), INTENT(INOUT)                    :: mat_aIb, mat_bIa
1259      INTEGER, INTENT(IN)                                :: atom_k
1260
1261      CHARACTER(LEN=*), PARAMETER :: routineN = 'contract_o3c_once', &
1262         routineP = moduleN//':'//routineN
1263
1264      INTEGER                                            :: handle, i, iatom, icol, irow, jatom, &
1265                                                            katom, mepos, n, nthread, s1, s2
1266      INTEGER, DIMENSION(:), POINTER                     :: atom_blk_size
1267      LOGICAL                                            :: found, ijsymmetric, trans
1268      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: work
1269      REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
1270      REAL(dp), DIMENSION(:, :, :), POINTER              :: iabc
1271      TYPE(o3c_iterator_type)                            :: o3c_iterator
1272
1273      CALL timeset(routineN, handle)
1274
1275      CALL get_o3c_container(o3c, ijsymmetric=ijsymmetric)
1276      CPASSERT(ijsymmetric)
1277
1278      CALL dbcsr_get_info(mat_aIb, row_blk_size=atom_blk_size)
1279
1280      nthread = 1
1281!$    nthread = omp_get_max_threads()
1282      CALL o3c_iterator_create(o3c, o3c_iterator, nthread=nthread)
1283
1284!$OMP PARALLEL DEFAULT(NONE) &
1285!$OMP SHARED (nthread,o3c_iterator,vec,mat_aIb,mat_bIa,atom_k,atom_blk_size)&
1286!$OMP PRIVATE (mepos,iabc,iatom,jatom,katom,irow,icol,trans,pblock,found,i,n,s1,s2,work)
1287
1288      mepos = 0
1289!$    mepos = omp_get_thread_num()
1290
1291      DO WHILE (o3c_iterate(o3c_iterator, mepos=mepos) == 0)
1292         CALL get_o3c_iterator_info(o3c_iterator, mepos=mepos, iatom=iatom, jatom=jatom, katom=katom, &
1293                                    integral=iabc)
1294
1295         IF (katom .NE. atom_k) CYCLE
1296
1297         IF (iatom <= jatom) THEN
1298            irow = iatom
1299            icol = jatom
1300            trans = .FALSE.
1301         ELSE
1302            irow = jatom
1303            icol = iatom
1304            trans = .TRUE.
1305         END IF
1306
1307         ! Deal with mat_aIb
1308         IF (icol == atom_k) THEN
1309            n = SIZE(vec)
1310            s1 = atom_blk_size(irow)
1311            s2 = SIZE(iabc, 3)
1312            ALLOCATE (work(s1, s2))
1313            work = 0.0_dp
1314
1315            IF (trans) THEN
1316               DO i = 1, n
1317                  CALL daxpy(s1*s2, vec(i), iabc(i, :, :), 1, work(:, :), 1)
1318               END DO
1319            ELSE
1320               DO i = 1, n
1321                  CALL daxpy(s1*s2, vec(i), iabc(:, i, :), 1, work(:, :), 1)
1322               END DO
1323            END IF
1324
1325!$OMP CRITICAL(mat_aIb_critical)
1326            CALL dbcsr_get_block_p(matrix=mat_aIb, row=irow, col=icol, BLOCK=pblock, found=found)
1327            IF (found) THEN
1328               CALL daxpy(s1*s2, 1.0_dp, work(:, :), 1, pblock(:, :), 1)
1329            END IF
1330!$OMP END CRITICAL(mat_aIb_critical)
1331            DEALLOCATE (work)
1332         END IF ! icol == atom_k
1333
1334         ! Deal with mat_bIa, keep block diagonal empty
1335         IF (irow == icol) CYCLE
1336         IF (irow == atom_k) THEN
1337
1338            n = SIZE(vec)
1339            s1 = SIZE(iabc, 3)
1340            s2 = atom_blk_size(icol)
1341            ALLOCATE (work(s1, s2))
1342            work = 0.0_dp
1343
1344            IF (trans) THEN
1345               DO i = 1, n
1346                  CALL daxpy(s1*s2, vec(i), TRANSPOSE(iabc(:, i, :)), 1, work(:, :), 1)
1347               END DO
1348            ELSE
1349               DO i = 1, n
1350                  CALL daxpy(s1*s2, vec(i), TRANSPOSE(iabc(i, :, :)), 1, work(:, :), 1)
1351               END DO
1352            END IF
1353
1354!$OMP CRITICAL(mat_bIa_critical)
1355            CALL dbcsr_get_block_p(matrix=mat_bIa, row=irow, col=icol, BLOCK=pblock, found=found)
1356            IF (found) THEN
1357               CALL daxpy(s1*s2, 1.0_dp, work(:, :), 1, pblock(:, :), 1)
1358            END IF
1359!$OMP END CRITICAL(mat_bIa_critical)
1360            DEALLOCATE (work)
1361         END IF !irow == atom_k
1362
1363      END DO !o3c iterator
1364!$OMP END PARALLEL
1365      CALL o3c_iterator_release(o3c_iterator)
1366
1367      CALL timestop(handle)
1368
1369   END SUBROUTINE contract_o3c_once
1370
1371! **************************************************************************************************
1372!> \brief Multiply all the blocks of a contracted RI integral (aI|P) by a matrix of type (P|...|Q)
1373!> \param contr_int the integral array
1374!> \param PQ the smaller matrix to multiply all blocks
1375!> \note  It is assumed that all non-zero blocks have the same number of columns. Can pass partial
1376!>        arrays, e.g. contr_int(1:3)
1377! **************************************************************************************************
1378   SUBROUTINE ri_all_blocks_mm(contr_int, PQ)
1379
1380      TYPE(dbcsr_p_type), DIMENSION(:)                   :: contr_int
1381      REAL(dp), DIMENSION(:, :), INTENT(IN)              :: PQ
1382
1383      CHARACTER(len=*), PARAMETER :: routineN = 'ri_all_blocks_mm', &
1384         routineP = moduleN//':'//routineN
1385
1386      INTEGER                                            :: blk, iblk, imo, jblk, ndo_mo, s1, s2
1387      LOGICAL                                            :: found
1388      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: work
1389      REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
1390      TYPE(dbcsr_iterator_type)                          :: iter
1391
1392      NULLIFY (pblock)
1393
1394      ndo_mo = SIZE(contr_int)
1395
1396      DO imo = 1, ndo_mo
1397         CALL dbcsr_iterator_start(iter, contr_int(imo)%matrix)
1398         DO WHILE (dbcsr_iterator_blocks_left(iter))
1399
1400            CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
1401            CALL dbcsr_get_block_p(contr_int(imo)%matrix, iblk, jblk, pblock, found)
1402
1403            IF (found) THEN
1404               s1 = SIZE(pblock, 1)
1405               s2 = SIZE(pblock, 2)
1406               ALLOCATE (work(s1, s2))
1407               CALL dgemm('N', 'N', s1, s2, s2, 1.0_dp, pblock, s1, PQ, s2, 0.0_dp, work, s1)
1408               CALL dcopy(s1*s2, work, 1, pblock, 1)
1409               DEALLOCATE (work)
1410            END IF
1411
1412         END DO ! dbcsr iterator
1413         CALL dbcsr_iterator_stop(iter)
1414      END DO !imo
1415
1416   END SUBROUTINE ri_all_blocks_mm
1417
1418! **************************************************************************************************
1419!> \brief Copies an (partial) array of contracted RI integrals into anoter one
1420!> \param new_int where the copy is stored
1421!> \param ref_int what is copied
1422!> \note Allocate the matrices of new_int if not done already
1423! **************************************************************************************************
1424   SUBROUTINE copy_ri_contr_int(new_int, ref_int)
1425
1426      TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT)    :: new_int
1427      TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: ref_int
1428
1429      CHARACTER(len=*), PARAMETER :: routineN = 'copy_ri_contr_int', &
1430         routineP = moduleN//':'//routineN
1431
1432      INTEGER                                            :: iso, ndo_so
1433
1434      CPASSERT(SIZE(new_int) == SIZE(ref_int))
1435      ndo_so = SIZE(ref_int)
1436
1437      DO iso = 1, ndo_so
1438         IF (.NOT. ASSOCIATED(new_int(iso)%matrix)) ALLOCATE (new_int(iso)%matrix)
1439         CALL dbcsr_copy(new_int(iso)%matrix, ref_int(iso)%matrix)
1440      END DO
1441
1442   END SUBROUTINE copy_ri_contr_int
1443
1444! **************************************************************************************************
1445!> \brief Takes the product of contracted integrals and put them in a kernel matrix
1446!> \param kernel the matrix where the products are stored
1447!> \param lhs_int the left-hand side contracted integrals
1448!> \param rhs_int the right-hand side contracted integrals
1449!> \param quadrants on which quadrant(s) on the kernel matrix the product is stored
1450!> \param qs_env ...
1451!> \param eps_filter filter for dbcsr matrix multiplication
1452!> \param mo_transpose whether the MO blocks should be transpose, i.e. (aI|Jb) => (aJ|Ib)
1453!> \note It is assumed that the kerenl matrix is NOT symmetric
1454!>       There are three quadrants, corresponding to 1: the upper-left (diagonal), 2: the
1455!>       upper-right (off-diagonal) and 3: the lower-right (diagonal).
1456!>       Need to finalize the kernel matrix after calling this routine (possibly multiple times)
1457! **************************************************************************************************
1458   SUBROUTINE ri_int_product(kernel, lhs_int, rhs_int, quadrants, qs_env, eps_filter, mo_transpose)
1459
1460      TYPE(dbcsr_type), INTENT(INOUT)                    :: kernel
1461      TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: lhs_int, rhs_int
1462      LOGICAL, DIMENSION(3), INTENT(IN)                  :: quadrants
1463      TYPE(qs_environment_type), POINTER                 :: qs_env
1464      REAL(dp), INTENT(IN), OPTIONAL                     :: eps_filter
1465      LOGICAL, INTENT(IN), OPTIONAL                      :: mo_transpose
1466
1467      CHARACTER(len=*), PARAMETER :: routineN = 'ri_int_product', routineP = moduleN//':'//routineN
1468
1469      INTEGER                                            :: blk, i, iblk, iso, j, jblk, jso, nblk, &
1470                                                            ndo_so
1471      LOGICAL                                            :: found, my_mt
1472      REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
1473      TYPE(dbcsr_iterator_type)                          :: iter
1474      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
1475      TYPE(dbcsr_type)                                   :: prod
1476
1477      NULLIFY (matrix_s, pblock)
1478
1479!  Initialization
1480      CPASSERT(SIZE(lhs_int) == SIZE(rhs_int))
1481      CPASSERT(ANY(quadrants))
1482      ndo_so = SIZE(lhs_int)
1483      CALL get_qs_env(qs_env, matrix_s=matrix_s, natom=nblk)
1484      CALL dbcsr_create(prod, template=matrix_s(1)%matrix, matrix_type="N")
1485      my_mt = .FALSE.
1486      IF (PRESENT(mo_transpose)) my_mt = mo_transpose
1487
1488      ! The kernel matrix is symmetric (even if normal type) => only fill upper half on diagonal
1489      ! quadrants, but the whole thing on upper-right quadrant
1490      DO iso = 1, ndo_so
1491         DO jso = 1, ndo_so
1492
1493            ! If on-diagonal quadrants only, can skip jso < iso
1494            IF (.NOT. quadrants(2) .AND. jso < iso) CYCLE
1495
1496            i = iso; j = jso;
1497            IF (my_mt) THEN
1498               i = jso; j = iso;
1499            END IF
1500
1501            ! Take the product lhs*rhs^T
1502            CALL dbcsr_multiply('N', 'T', 1.0_dp, lhs_int(i)%matrix, rhs_int(j)%matrix, &
1503                                0.0_dp, prod, filter_eps=eps_filter)
1504
1505            ! Loop over blocks of prod and fill kernel matrix => ok cuz same (but replicated) dist
1506            CALL dbcsr_iterator_start(iter, prod)
1507            DO WHILE (dbcsr_iterator_blocks_left(iter))
1508
1509               CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
1510               IF ((iso == jso .AND. jblk < iblk) .AND. .NOT. quadrants(2)) CYCLE
1511
1512               CALL dbcsr_get_block_p(prod, iblk, jblk, pblock, found)
1513
1514               IF (found) THEN
1515
1516                  ! Case study on quadrant
1517                  !upper-left
1518                  IF (quadrants(1)) THEN
1519                     CALL dbcsr_put_block(kernel, (iso - 1)*nblk + iblk, (jso - 1)*nblk + jblk, pblock)
1520                  END IF
1521
1522                  !upper-right
1523                  IF (quadrants(2)) THEN
1524                     CALL dbcsr_put_block(kernel, (iso - 1)*nblk + iblk, (ndo_so + jso - 1)*nblk + jblk, pblock)
1525                  END IF
1526
1527                  !lower-right
1528                  IF (quadrants(3)) THEN
1529                     CALL dbcsr_put_block(kernel, (ndo_so + iso - 1)*nblk + iblk, (ndo_so + jso - 1)*nblk + jblk, pblock)
1530                  END IF
1531
1532               END IF
1533
1534            END DO ! dbcsr iterator
1535            CALL dbcsr_iterator_stop(iter)
1536
1537         END DO !jso
1538      END DO !iso
1539
1540!  Clean-up
1541      CALL dbcsr_release(prod)
1542
1543   END SUBROUTINE ri_int_product
1544
1545END MODULE xas_tdp_kernel
1546