1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2020  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief Utilities for X-ray absorption spectroscopy using TDDFPT
8!> \author AB (01.2018)
9! **************************************************************************************************
10
11MODULE xas_tdp_utils
12   USE cp_blacs_env,                    ONLY: cp_blacs_env_type
13   USE cp_cfm_basic_linalg,             ONLY: cp_cfm_gemm
14   USE cp_cfm_diag,                     ONLY: cp_cfm_heevd
15   USE cp_cfm_types,                    ONLY: cp_cfm_create,&
16                                              cp_cfm_get_info,&
17                                              cp_cfm_get_submatrix,&
18                                              cp_cfm_release,&
19                                              cp_cfm_type,&
20                                              cp_fm_to_cfm
21   USE cp_dbcsr_cholesky,               ONLY: cp_dbcsr_cholesky_decompose,&
22                                              cp_dbcsr_cholesky_invert
23   USE cp_dbcsr_diag,                   ONLY: cp_dbcsr_power
24   USE cp_dbcsr_operations,             ONLY: copy_dbcsr_to_fm,&
25                                              copy_fm_to_dbcsr,&
26                                              cp_dbcsr_sm_fm_multiply,&
27                                              dbcsr_allocate_matrix_set,&
28                                              dbcsr_deallocate_matrix_set
29   USE cp_fm_basic_linalg,              ONLY: cp_fm_column_scale,&
30                                              cp_fm_scale,&
31                                              cp_fm_transpose,&
32                                              cp_fm_upper_to_full
33   USE cp_fm_diag,                      ONLY: choose_eigv_solver,&
34                                              cp_fm_geeig
35   USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
36                                              cp_fm_struct_release,&
37                                              cp_fm_struct_type
38   USE cp_fm_types,                     ONLY: &
39        cp_fm_create, cp_fm_get_diag, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_p_type, &
40        cp_fm_release, cp_fm_set_element, cp_fm_to_fm_submat, cp_fm_type
41   USE cp_gemm_interface,               ONLY: cp_gemm
42   USE cp_para_types,                   ONLY: cp_para_env_type
43   USE dbcsr_api,                       ONLY: &
44        dbcsr_add, dbcsr_copy, dbcsr_create, dbcsr_distribution_get, dbcsr_distribution_new, &
45        dbcsr_distribution_release, dbcsr_distribution_type, dbcsr_finalize, dbcsr_get_block_p, &
46        dbcsr_get_info, dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, &
47        dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, &
48        dbcsr_p_type, dbcsr_put_block, dbcsr_release, dbcsr_reserve_all_blocks, dbcsr_set, &
49        dbcsr_type
50   USE input_constants,                 ONLY: tddfpt_singlet,&
51                                              tddfpt_spin_cons,&
52                                              tddfpt_spin_flip,&
53                                              tddfpt_triplet,&
54                                              xas_dip_len
55   USE kinds,                           ONLY: dp
56   USE mathlib,                         ONLY: get_diag
57   USE physcon,                         ONLY: a_fine
58   USE qs_environment_types,            ONLY: get_qs_env,&
59                                              qs_environment_type
60   USE qs_mo_types,                     ONLY: get_mo_set,&
61                                              mo_set_p_type
62   USE xas_tdp_kernel,                  ONLY: kernel_coulomb_xc,&
63                                              kernel_exchange
64   USE xas_tdp_types,                   ONLY: donor_state_type,&
65                                              xas_tdp_control_type,&
66                                              xas_tdp_env_type
67
68!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num
69#include "./base/base_uses.f90"
70
71   IMPLICIT NONE
72   PRIVATE
73
74   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'xas_tdp_utils'
75
76   PUBLIC :: setup_xas_tdp_prob, solve_xas_tdp_prob, include_rcs_soc, include_os_soc
77
78   !A helper type for SOC
79   TYPE dbcsr_soc_package_type
80      TYPE(dbcsr_type), POINTER     :: dbcsr_sg
81      TYPE(dbcsr_type), POINTER     :: dbcsr_tp
82      TYPE(dbcsr_type), POINTER     :: dbcsr_sc
83      TYPE(dbcsr_type), POINTER     :: dbcsr_sf
84      TYPE(dbcsr_type), POINTER     :: dbcsr_prod
85      TYPE(dbcsr_type), POINTER     :: dbcsr_ovlp
86      TYPE(dbcsr_type), POINTER     :: dbcsr_tmp
87      TYPE(dbcsr_type), POINTER     :: dbcsr_work
88   END TYPE dbcsr_soc_package_type
89
90CONTAINS
91
92! **************************************************************************************************
93!> \brief Builds the matrix that defines the XAS TDDFPT generalized eigenvalue problem to be solved
94!>        for excitation energies omega. The problem has the form omega*G*C = M*C, where C contains
95!>        the response orbitals coefficients. The matrix M and the metric G are stored in the given
96!>        donor_state
97!> \param donor_state the donor_state for which the problem is restricted
98!> \param qs_env ...
99!> \param xas_tdp_env ...
100!> \param xas_tdp_control ...
101!> \note  the matrix M is symmetric and has the form | M_d   M_o |
102!>                                                   | M_o   M_d |,
103!>       -In the SPIN-RESTRICTED case:
104!>        depending on whether we consider singlet or triplet excitation, the diagonal (M_d) and
105!>        off-diagonal (M_o) parts of M differ:
106!>        - For singlet: M_d = A + 2B + C_aa + C_ab - D
107!>                       M_o = 2B + C_aa + C_ab - E
108!>        - For triplet: M_d = A + C_aa - C_ab - D
109!>                       M_o = C_aa - C_ab - E
110!>        where other subroutines computes the matrices A, B, E, D and G, which are:
111!>        - A: the ground-state contribution: F_pq*delta_IJ - epsilon_IJ*S_pq
112!>        - B: the Coulomb kernel ~(pI|Jq)
113!>        - C: the xc kernel c_aa (double derivatibe wrt to n_alpha) and C_ab (wrt n_alpha and n_beta)
114!>        - D: the on-digonal exact exchange kernel ~(pq|IJ)
115!>        - E: the off-diagonal exact exchange kernel ~(pJ|Iq)
116!>        - G: the metric  S_pq*delta_IJ
117!>        For the xc functionals, C_aa + C_ab or C_aa - C_ab are stored in the same matrix
118!>        In the above definitions, I,J label the donnor MOs and p,q the sgfs of the basis
119!>
120!>       -In the SPIN-UNRESTRICTED, spin-conserving case:
121!>        the on- and off-diagonal elements of M are:
122!>                     M_d = A + B + C -D
123!>                     M_o = B + C - E
124!>        where the submatrices A, B, C, D and E are:
125!>        - A: the groun-state contribution: (F_pq*delta_IJ - epsilon_IJ*S_pq) * delta_ab
126!>        - B: the Coulomb kernel: (pI_a|J_b q)
127!>        - C: the xc kernel: (pI_a|fxc_ab|J_b q)
128!>        - D: the on-diagonal exact-exchange kernel: (pq|I_a J_b) delta_ab
129!>        - E: the off-diagonal exact-exchange kernel: (pJ_b|I_a q) delta_ab
130!>        - G: the metric S_pq*delta_IJ*delta_ab
131!>        p,q label the sgfs, I,J the donro MOs and a,b the spins
132!>
133!>       -In both above cases, the matrix M is always  projected onto the unperturbed unoccupied
134!>        ground state: M <= Q * M * Q^T = (1 - SP) * M * (1 - PS)
135!>
136!>       -In the SPIN-FLIP case:
137!>        Only the TDA is implemented, that is, there are only on-diagonal elements:
138!>                    M_d = A + C - D
139!>        where the submatrices A, C and D are:
140!>        - A: the ground state-contribution: (F_pq*delta_IJ - epsilon_IJ*S_pq) * delta_ab, but here,
141!>                                            the alph-alpha quadrant has the beta Fock matrix and
142!>                                            the beta-beta quadrant has the alpha Fock matrix
143!>        - C: the SF xc kernel: (pI_a|fxc|J_bq), fxc = 1/m * (vxc_a -vxc_b)
144!>        - D: the on-diagonal exact-exchange kernel: (pq|I_a J_b) delta_ab
145!>        To ensure that all excitation start from a given spin to the opposite, we then multiply
146!>        by a Q projector where we swap the alpha-alpha and beta-beta spin-quadrants
147!>
148!>        All possibilities: TDA or full-TDDFT, singlet or triplet, xc or hybrid, etc are treated
149!>        in the same routine to avoid recomputing stuff
150!>        Under TDA, only the on-diagonal elements of M are computed
151!>        In the case of non-TDA, one turns the problem Hermitian
152! **************************************************************************************************
153   SUBROUTINE setup_xas_tdp_prob(donor_state, qs_env, xas_tdp_env, xas_tdp_control)
154
155      TYPE(donor_state_type), POINTER                    :: donor_state
156      TYPE(qs_environment_type), POINTER                 :: qs_env
157      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
158      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
159
160      CHARACTER(len=*), PARAMETER :: routineN = 'setup_xas_tdp_prob', &
161         routineP = moduleN//':'//routineN
162
163      INTEGER                                            :: handle
164      INTEGER, DIMENSION(:), POINTER                     :: submat_blk_size
165      LOGICAL                                            :: do_coul, do_hfx, do_os, do_sc, do_sf, &
166                                                            do_sg, do_tda, do_tp, do_xc
167      REAL(dp)                                           :: eps_filter, sx
168      TYPE(dbcsr_distribution_type), POINTER             :: submat_dist
169      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: ex_ker, xc_ker
170      TYPE(dbcsr_type)                                   :: matrix_a, matrix_a_sf, matrix_b, proj_Q, &
171                                                            proj_Q_sf, work
172      TYPE(dbcsr_type), POINTER :: matrix_c_sc, matrix_c_sf, matrix_c_sg, matrix_c_tp, matrix_d, &
173         matrix_e_sc, sc_matrix_tdp, sf_matrix_tdp, sg_matrix_tdp, tp_matrix_tdp
174
175      NULLIFY (sg_matrix_tdp, tp_matrix_tdp, submat_dist, submat_blk_size, matrix_c_sf)
176      NULLIFY (matrix_c_sg, matrix_c_tp, matrix_c_sc, matrix_d, matrix_e_sc)
177      NULLIFY (sc_matrix_tdp, sf_matrix_tdp, ex_ker, xc_ker)
178
179      CALL timeset(routineN, handle)
180
181!  Initialization
182      do_os = xas_tdp_control%do_uks .OR. xas_tdp_control%do_roks
183      do_sc = xas_tdp_control%do_spin_cons
184      do_sf = xas_tdp_control%do_spin_flip
185      do_sg = xas_tdp_control%do_singlet
186      do_tp = xas_tdp_control%do_triplet
187      do_xc = xas_tdp_control%do_xc
188      do_hfx = xas_tdp_control%do_hfx
189      do_coul = xas_tdp_control%do_coulomb
190      do_tda = xas_tdp_control%tamm_dancoff
191      sx = xas_tdp_control%sx
192      eps_filter = xas_tdp_control%eps_filter
193      IF (do_sc) THEN
194         ALLOCATE (donor_state%sc_matrix_tdp)
195         sc_matrix_tdp => donor_state%sc_matrix_tdp
196      END IF
197      IF (do_sf) THEN
198         ALLOCATE (donor_state%sf_matrix_tdp)
199         sf_matrix_tdp => donor_state%sf_matrix_tdp
200      END IF
201      IF (do_sg) THEN
202         ALLOCATE (donor_state%sg_matrix_tdp)
203         sg_matrix_tdp => donor_state%sg_matrix_tdp
204      END IF
205      IF (do_tp) THEN
206         ALLOCATE (donor_state%tp_matrix_tdp)
207         tp_matrix_tdp => donor_state%tp_matrix_tdp
208      END IF
209
210!  Get the dist and block size of all matrices A, B, C, etc
211      CALL compute_submat_dist_and_blk_size(donor_state, do_os, qs_env)
212      submat_dist => donor_state%dbcsr_dist
213      submat_blk_size => donor_state%blk_size
214
215!  Allocate and compute all the matrices A, B, C, etc we will need
216
217      ! The projector(s) on the unoccupied unperturbed ground state 1-SP and associated work matrix
218      IF (do_sg .OR. do_tp .OR. do_sc) THEN !spin-conserving
219         CALL get_q_projector(proj_Q, donor_state, do_os, xas_tdp_env)
220      END IF
221      IF (do_sf) THEN !spin-flip
222         CALL get_q_projector(proj_Q_sf, donor_state, do_os, xas_tdp_env, do_sf=.TRUE.)
223      END IF
224      CALL dbcsr_create(matrix=work, matrix_type="N", dist=submat_dist, name="WORK", &
225                        row_blk_size=submat_blk_size, col_blk_size=submat_blk_size)
226
227      ! The ground state contribution(s)
228      IF (do_sg .OR. do_tp .OR. do_sc) THEN !spin-conserving
229         CALL build_gs_contribution(matrix_a, donor_state, do_os, qs_env)
230      END IF
231      IF (do_sf) THEN !spin-flip
232         CALL build_gs_contribution(matrix_a_sf, donor_state, do_os, qs_env, do_sf=.TRUE.)
233      END IF
234
235      ! The Coulomb and XC kernels. Internal analysis to know which matrix to compute
236      CALL dbcsr_allocate_matrix_set(xc_ker, 4)
237      ALLOCATE (xc_ker(1)%matrix, xc_ker(2)%matrix, xc_ker(3)%matrix, xc_ker(4)%matrix)
238      CALL kernel_coulomb_xc(matrix_b, xc_ker, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
239      matrix_c_sg => xc_ker(1)%matrix; matrix_c_tp => xc_ker(2)%matrix
240      matrix_c_sc => xc_ker(3)%matrix; matrix_c_sf => xc_ker(4)%matrix
241
242      ! The exact exchange. Internal analysis to know which matrices to compute
243      CALL dbcsr_allocate_matrix_set(ex_ker, 2)
244      ALLOCATE (ex_ker(1)%matrix, ex_ker(2)%matrix)
245      CALL kernel_exchange(ex_ker, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
246      matrix_d => ex_ker(1)%matrix; matrix_e_sc => ex_ker(2)%matrix
247
248      ! Build the metric G, also need its inverse in case of full-TDDFT
249      IF (do_tda) THEN
250         ALLOCATE (donor_state%metric(1))
251         CALL build_metric(donor_state%metric, donor_state, qs_env, do_os)
252      ELSE
253         ALLOCATE (donor_state%metric(2))
254         CALL build_metric(donor_state%metric, donor_state, qs_env, do_os, do_inv=.TRUE.)
255      END IF
256
257!  Build the eigenvalue problem, depending on the case (TDA, singlet, triplet, hfx, etc ...)
258      IF (do_tda) THEN
259
260         IF (do_sc) THEN ! open-shell spin-conserving under TDA
261
262            ! The final matrix is M = A + B + C - D
263            CALL dbcsr_copy(sc_matrix_tdp, matrix_a, name="OS MATRIX TDP")
264            IF (do_coul) CALL dbcsr_add(sc_matrix_tdp, matrix_b, 1.0_dp, 1.0_dp)
265
266            IF (do_xc) CALL dbcsr_add(sc_matrix_tdp, matrix_c_sc, 1.0_dp, 1.0_dp) !xc kernel
267            IF (do_hfx) CALL dbcsr_add(sc_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx) !scaled hfx
268
269            ! The product with the Q projector
270            CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, sc_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
271            CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, sc_matrix_tdp, filter_eps=eps_filter)
272
273         END IF !do_sc
274
275         IF (do_sf) THEN ! open-shell spin-flip under TDA
276
277            ! The final matrix is M = A + C - D
278            CALL dbcsr_copy(sf_matrix_tdp, matrix_a_sf, name="OS MATRIX TDP")
279
280            IF (do_xc) CALL dbcsr_add(sf_matrix_tdp, matrix_c_sf, 1.0_dp, 1.0_dp) !xc kernel
281            IF (do_hfx) CALL dbcsr_add(sf_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx) !scaled hfx
282
283            ! Take the product with the (spin-flip) Q projector
284            CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q_sf, sf_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
285            CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q_sf, 0.0_dp, sf_matrix_tdp, filter_eps=eps_filter)
286
287         END IF !do_sf
288
289         IF (do_sg) THEN ! singlets under TDA
290
291            ! The final matrix is M = A + 2B + (C_aa + C_ab) - D
292            CALL dbcsr_copy(sg_matrix_tdp, matrix_a, name="SINGLET MATRIX TDP")
293            IF (do_coul) CALL dbcsr_add(sg_matrix_tdp, matrix_b, 1.0_dp, 2.0_dp)
294
295            IF (do_xc) CALL dbcsr_add(sg_matrix_tdp, matrix_c_sg, 1.0_dp, 1.0_dp) ! xc kernel
296            IF (do_hfx) CALL dbcsr_add(sg_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx) ! scaled hfx
297
298            ! Take the product with the Q projector:
299            CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, sg_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
300            CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, sg_matrix_tdp, filter_eps=eps_filter)
301
302         END IF !do_sg (TDA)
303
304         IF (do_tp) THEN ! triplets under TDA
305
306            ! The final matrix is M =  A + (C_aa - C_ab) - D
307            CALL dbcsr_copy(tp_matrix_tdp, matrix_a, name="TRIPLET MATRIX TDP")
308
309            IF (do_xc) CALL dbcsr_add(tp_matrix_tdp, matrix_c_tp, 1.0_dp, 1.0_dp) ! xc_kernel
310            IF (do_hfx) CALL dbcsr_add(tp_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx) ! scaled hfx
311
312            ! Take the product with the Q projector:
313            CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, tp_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
314            CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, tp_matrix_tdp, filter_eps=eps_filter)
315
316         END IF !do_tp (TDA)
317
318      ELSE ! not TDA
319
320         ! In the case of full-TDDFT, the problem is turned Hermitian with the help of auxiliary
321         ! matrices AUX = (A-D+E)^(+-0.5) that are stored in donor_state
322         CALL build_aux_matrix(1.0E-8_dp, sx, matrix_a, matrix_d, matrix_e_sc, do_hfx, proj_Q, &
323                               work, donor_state, eps_filter, qs_env)
324
325         IF (do_sc) THEN !full-TDDFT open-shell spin-conserving
326
327            ! The final matrix is the sum of the on- and off-diagonal elements as in the description
328            ! M = A + 2B + 2C - D - E
329            CALL dbcsr_copy(sc_matrix_tdp, matrix_a, name="OS MATRIX TDP")
330            IF (do_coul) CALL dbcsr_add(sc_matrix_tdp, matrix_b, 1.0_dp, 2.0_dp)
331
332            IF (do_hfx) THEN !scaled hfx
333               CALL dbcsr_add(sc_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx)
334               CALL dbcsr_add(sc_matrix_tdp, matrix_e_sc, 1.0_dp, -1.0_dp*sx)
335            END IF
336            IF (do_xc) THEN
337               CALL dbcsr_add(sc_matrix_tdp, matrix_c_sc, 1.0_dp, 2.0_dp)
338            END IF
339
340            ! Take the product with the Q projector
341            CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, sc_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
342            CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, sc_matrix_tdp, filter_eps=eps_filter)
343
344            ! Take the product with the inverse metric
345            ! M <= G^-1 * M * G^-1
346            CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%metric(2)%matrix, sc_matrix_tdp, &
347                                0.0_dp, work, filter_eps=eps_filter)
348            CALL dbcsr_multiply('N', 'N', 1.0_dp, work, donor_state%metric(2)%matrix, 0.0_dp, &
349                                sc_matrix_tdp, filter_eps=eps_filter)
350
351         END IF
352
353         IF (do_sg) THEN ! full-TDDFT singlets
354
355            ! The final matrix is the sum of the on- and off-diagonal elements as in the description
356            ! M = A + 4B + 2(C_aa + C_ab) - D - E
357            CALL dbcsr_copy(sg_matrix_tdp, matrix_a, name="SINGLET MATRIX TDP")
358            IF (do_coul) CALL dbcsr_add(sg_matrix_tdp, matrix_b, 1.0_dp, 4.0_dp)
359
360            IF (do_hfx) THEN !scaled hfx
361               CALL dbcsr_add(sg_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx)
362               CALL dbcsr_add(sg_matrix_tdp, matrix_e_sc, 1.0_dp, -1.0_dp*sx)
363            END IF
364            IF (do_xc) THEN !xc kernel
365               CALL dbcsr_add(sg_matrix_tdp, matrix_c_sg, 1.0_dp, 2.0_dp)
366            END IF
367
368            ! Take the product with the Q projector
369            CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, sg_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
370            CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, sg_matrix_tdp, filter_eps=eps_filter)
371
372            ! Take the product with the inverse metric
373            ! M <= G^-1 * M * G^-1
374            CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%metric(2)%matrix, sg_matrix_tdp, &
375                                0.0_dp, work, filter_eps=eps_filter)
376            CALL dbcsr_multiply('N', 'N', 1.0_dp, work, donor_state%metric(2)%matrix, 0.0_dp, &
377                                sg_matrix_tdp, filter_eps=eps_filter)
378
379         END IF ! singlets
380
381         IF (do_tp) THEN ! full-TDDFT triplets
382
383            ! The final matrix is the sum of the on- and off-diagonal elements as in the description
384            ! M = A + 2(C_aa - C_ab) - D - E
385            CALL dbcsr_copy(tp_matrix_tdp, matrix_a, name="TRIPLET MATRIX TDP")
386
387            IF (do_hfx) THEN !scaled hfx
388               CALL dbcsr_add(tp_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx)
389               CALL dbcsr_add(tp_matrix_tdp, matrix_e_sc, 1.0_dp, -1.0_dp*sx)
390            END IF
391            IF (do_xc) THEN
392               CALL dbcsr_add(tp_matrix_tdp, matrix_c_tp, 1.0_dp, 2.0_dp)
393            END IF
394
395            ! Take the product with the Q projector
396            CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, tp_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
397            CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, tp_matrix_tdp, filter_eps=eps_filter)
398
399            ! Take the product with the inverse metric
400            ! M <= G^-1 * M * G^-1
401            CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%metric(2)%matrix, tp_matrix_tdp, &
402                                0.0_dp, work, filter_eps=eps_filter)
403            CALL dbcsr_multiply('N', 'N', 1.0_dp, work, donor_state%metric(2)%matrix, 0.0_dp, &
404                                tp_matrix_tdp, filter_eps=eps_filter)
405
406         END IF ! triplets
407
408      END IF ! test on TDA
409
410!  Clean-up
411      CALL dbcsr_release(matrix_a)
412      CALL dbcsr_release(matrix_a_sf)
413      CALL dbcsr_release(matrix_b)
414      CALL dbcsr_release(proj_Q)
415      CALL dbcsr_release(proj_Q_sf)
416      CALL dbcsr_release(work)
417      CALL dbcsr_deallocate_matrix_set(ex_ker)
418      CALL dbcsr_deallocate_matrix_set(xc_ker)
419
420      CALL timestop(handle)
421
422   END SUBROUTINE setup_xas_tdp_prob
423
424! **************************************************************************************************
425!> \brief Solves the XAS TDP generalized eigenvalue problem omega*C = matrix_tdp*C using standard
426!>        full diagonalization methods. The problem is Hermitian (made that way even if not TDA)
427!> \param donor_state ...
428!> \param xas_tdp_control ...
429!> \param xas_tdp_env ...
430!> \param qs_env ...
431!> \param ex_type whether we deal with singlets, triplets, spin-conserving open-shell or spin-flip
432!> \note The computed eigenvalues and eigenvectors are stored in the donor_state
433!>       The eigenvectors are the LR-coefficients. In case of TDA, c^- is stored. In the general
434!>       case, the sum c^+ + c^- is stored.
435!>      - Spin-restricted:
436!>       In case both singlets and triplets are considered, this routine must be called twice. This
437!>       is the choice that was made because the body of the routine is exactly the same in both cases
438!>       Note that for singlet we solve for u = 1/sqrt(2)*(c_alpha + c_beta) = sqrt(2)*c
439!>       and that for triplets we solve for v = 1/sqrt(2)*(c_alpha - c_beta) = sqrt(2)*c
440!>      - Spin-unrestricted:
441!>       The problem is solved for the LR coefficients c_pIa as they are (not linear combination)
442!>       The routine might be called twice (once for spin-conservign, one for spin-flip)
443! **************************************************************************************************
444   SUBROUTINE solve_xas_tdp_prob(donor_state, xas_tdp_control, xas_tdp_env, qs_env, ex_type)
445
446      TYPE(donor_state_type), POINTER                    :: donor_state
447      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
448      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
449      TYPE(qs_environment_type), POINTER                 :: qs_env
450      INTEGER, INTENT(IN)                                :: ex_type
451
452      CHARACTER(len=*), PARAMETER :: routineN = 'solve_xas_tdp_prob', &
453         routineP = moduleN//':'//routineN
454
455      INTEGER                                            :: first_ex, handle, i, imo, ispin, nao, &
456                                                            ndo_mo, nelectron, nevals, nocc, nrow, &
457                                                            nspins
458      LOGICAL                                            :: do_os, do_range, do_sf
459      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: scaling, tmp_evals
460      REAL(dp), DIMENSION(:), POINTER                    :: lr_evals
461      TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
462      TYPE(cp_fm_struct_type), POINTER                   :: ex_struct, fm_struct
463      TYPE(cp_fm_type), POINTER                          :: c_diff, c_sum, lhs_matrix, lr_coeffs, &
464                                                            rhs_matrix, work
465      TYPE(cp_para_env_type), POINTER                    :: para_env
466      TYPE(dbcsr_type)                                   :: tmp_mat
467      TYPE(dbcsr_type), POINTER                          :: matrix_tdp
468
469      CALL timeset(routineN, handle)
470
471      NULLIFY (para_env, blacs_env, fm_struct, rhs_matrix, matrix_tdp, lhs_matrix, work)
472      NULLIFY (c_diff, c_sum, ex_struct, lr_evals, lr_coeffs)
473      CPASSERT(ASSOCIATED(xas_tdp_env))
474
475      do_os = .FALSE.
476      do_sf = .FALSE.
477      IF (ex_type == tddfpt_spin_cons) THEN
478         matrix_tdp => donor_state%sc_matrix_tdp
479         do_os = .TRUE.
480      ELSE IF (ex_type == tddfpt_spin_flip) THEN
481         matrix_tdp => donor_state%sf_matrix_tdp
482         do_os = .TRUE.
483         do_sf = .TRUE.
484      ELSE IF (ex_type == tddfpt_singlet) THEN
485         matrix_tdp => donor_state%sg_matrix_tdp
486      ELSE IF (ex_type == tddfpt_triplet) THEN
487         matrix_tdp => donor_state%tp_matrix_tdp
488      END IF
489      CALL get_qs_env(qs_env=qs_env, para_env=para_env, blacs_env=blacs_env, nelectron_total=nelectron)
490
491!     Initialization
492      nspins = 1; IF (do_os) nspins = 2
493      CALL cp_fm_get_info(donor_state%gs_coeffs, nrow_global=nao)
494      CALL dbcsr_get_info(matrix_tdp, nfullrows_total=nrow)
495      ndo_mo = donor_state%ndo_mo
496      nocc = nelectron/2; IF (do_os) nocc = nelectron
497      nocc = ndo_mo*nocc
498      first_ex = nocc + 1 !where to find the first proper eigenvalue
499
500      !solve by energy_range or number of states ?
501      do_range = .FALSE.
502      IF (xas_tdp_control%e_range > 0.0_dp) do_range = .TRUE.
503
504      ! create the fm infrastructure
505      ALLOCATE (tmp_evals(nrow))
506      CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=nrow, &
507                               para_env=para_env, ncol_global=nrow)
508      CALL cp_fm_create(c_sum, fm_struct)
509      CALL cp_fm_create(rhs_matrix, fm_struct)
510      CALL cp_fm_create(work, fm_struct)
511
512!     Test on TDA
513      IF (xas_tdp_control%tamm_dancoff) THEN
514
515!        Get the main matrix_tdp as an fm
516         CALL copy_dbcsr_to_fm(matrix_tdp, rhs_matrix)
517
518!        Get the metric as a fm
519         CALL cp_fm_create(lhs_matrix, fm_struct)
520         CALL copy_dbcsr_to_fm(donor_state%metric(1)%matrix, lhs_matrix)
521
522         !Diagonalisation (Cholesky decomposition). In TDA, c_sum = c^-
523         CALL cp_fm_geeig(rhs_matrix, lhs_matrix, c_sum, tmp_evals, work)
524
525!        TDA specific clean-up
526         CALL cp_fm_release(lhs_matrix)
527
528      ELSE ! not TDA
529
530!        Need to multiply the current matrix_tdp with the auxiliary matrix
531!        tmp_mat =  (A-D+E)^0.5 * M * (A-D+E)^0.5
532         CALL dbcsr_create(matrix=tmp_mat, template=matrix_tdp, matrix_type="N")
533         CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%matrix_aux, matrix_tdp, &
534                             0.0_dp, tmp_mat, filter_eps=xas_tdp_control%eps_filter)
535         CALL dbcsr_multiply('N', 'N', 1.0_dp, tmp_mat, donor_state%matrix_aux, &
536                             0.0_dp, tmp_mat, filter_eps=xas_tdp_control%eps_filter)
537
538!        Get the matrix as a fm
539         CALL copy_dbcsr_to_fm(tmp_mat, rhs_matrix)
540
541!        Solve the "turned-Hermitian" eigenvalue problem
542         CALL choose_eigv_solver(rhs_matrix, work, tmp_evals)
543
544!        Currently, work = (A-D+E)^0.5 (c^+ - c^-) and tmp_evals = omega^2
545!        Put tiny almost zero eigenvalues to zero (corresponding to occupied MOs)
546         WHERE (tmp_evals < 1.0E-4_dp) tmp_evals = 0.0_dp
547
548!        Retrieve c_diff = (c^+ - c^-) for normalization
549!        (c^+ - c^-) = 1/omega^2 * M * (A-D+E)^0.5 * work
550         CALL cp_fm_create(c_diff, fm_struct)
551         CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_tdp, donor_state%matrix_aux, &
552                             0.0_dp, tmp_mat, filter_eps=xas_tdp_control%eps_filter)
553         CALL cp_dbcsr_sm_fm_multiply(tmp_mat, work, c_diff, ncol=nrow)
554
555         ALLOCATE (scaling(nrow))
556         scaling = 0.0_dp
557         WHERE (ABS(tmp_evals) > 1.0E-8_dp) scaling = 1.0_dp/tmp_evals
558         CALL cp_fm_column_scale(c_diff, scaling)
559
560!        Normalize with the metric: c_diff * G * c_diff = +- 1
561         scaling = 0.0_dp
562         CALL get_normal_scaling(scaling, c_diff, donor_state)
563         CALL cp_fm_column_scale(c_diff, scaling)
564
565!        Get the actual eigenvalues
566         tmp_evals = SQRT(tmp_evals)
567
568!        Get c_sum = (c^+ + c^-), which appears in all transition density related expressions
569!        c_sum = -1/omega G^-1 * (A-D+E) * (c^+ - c^-)
570         CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%matrix_aux, donor_state%matrix_aux, &
571                             0.0_dp, tmp_mat, filter_eps=xas_tdp_control%eps_filter)
572         CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%metric(2)%matrix, tmp_mat, &
573                             0.0_dp, tmp_mat, filter_eps=xas_tdp_control%eps_filter)
574         CALL cp_dbcsr_sm_fm_multiply(tmp_mat, c_diff, c_sum, ncol=nrow)
575         WHERE (tmp_evals .NE. 0) scaling = -1.0_dp/tmp_evals
576         CALL cp_fm_column_scale(c_sum, scaling)
577
578!        Full TDDFT specific clean-up
579         CALL cp_fm_release(c_diff)
580         CALL dbcsr_release(tmp_mat)
581         DEALLOCATE (scaling)
582
583      END IF ! TDA
584
585!     Full matrix clean-up
586      CALL cp_fm_release(rhs_matrix)
587      CALL cp_fm_release(work)
588
589!  Reorganize the eigenvalues, we want a lr_evals array with the proper dimension and where the
590!  first element is the first eval. Need a case study on do_range
591      IF (do_range) THEN
592
593         WHERE (tmp_evals > tmp_evals(first_ex) + xas_tdp_control%e_range) tmp_evals = 0.0_dp
594         nevals = MAXLOC(tmp_evals, 1) - nocc
595
596         ALLOCATE (lr_evals(nevals))
597         lr_evals(:) = tmp_evals(first_ex:nocc + nevals)
598
599      ELSE
600
601         !Determine the number of evals to keep base on N_EXCITED
602         nevals = nspins*nao - nocc/ndo_mo
603         IF (xas_tdp_control%n_excited > 0 .AND. xas_tdp_control%n_excited < nevals) THEN
604            nevals = xas_tdp_control%n_excited
605         END IF
606         nevals = ndo_mo*nevals !as in input description, multiply by # of donor MOs
607
608         ALLOCATE (lr_evals(nevals))
609         lr_evals(:) = tmp_evals(first_ex:nocc + nevals)
610      END IF
611
612!  Reorganize the eigenvectors in array of cp_fm so that each ndo_mo columns corresponds to an
613!  excited state. Makes later calls to those easier and more efficient
614!  In case of open-shell, we store the coeffs in the same logic as the matrix => first block where
615!  the columns are the c_Ialpha and second block with columns as c_Ibeta
616      CALL cp_fm_struct_create(ex_struct, nrow_global=nao, ncol_global=ndo_mo*nspins*nevals, &
617                               para_env=para_env, context=blacs_env)
618      CALL cp_fm_create(lr_coeffs, ex_struct)
619
620      DO i = 1, nevals
621         DO ispin = 1, nspins
622            DO imo = 1, ndo_mo
623
624               CALL cp_fm_to_fm_submat(msource=c_sum, mtarget=lr_coeffs, &
625                                       nrow=nao, ncol=1, s_firstrow=((ispin - 1)*ndo_mo + imo - 1)*nao + 1, &
626                                       s_firstcol=first_ex + i - 1, t_firstrow=1, &
627                                       t_firstcol=(i - 1)*ndo_mo*nspins + (ispin - 1)*ndo_mo + imo)
628            END DO !imo
629         END DO !ispin
630      END DO !istate
631
632      IF (ex_type == tddfpt_spin_cons) THEN
633         donor_state%sc_coeffs => lr_coeffs
634         donor_state%sc_evals => lr_evals
635      ELSE IF (ex_type == tddfpt_spin_flip) THEN
636         donor_state%sf_coeffs => lr_coeffs
637         donor_state%sf_evals => lr_evals
638      ELSE IF (ex_type == tddfpt_singlet) THEN
639         donor_state%sg_coeffs => lr_coeffs
640         donor_State%sg_evals => lr_evals
641      ELSE IF (ex_type == tddfpt_triplet) THEN
642         donor_state%tp_coeffs => lr_coeffs
643         donor_state%tp_evals => lr_evals
644      END IF
645
646!  Clean-up
647      CALL cp_fm_release(c_sum)
648      CALL cp_fm_struct_release(fm_struct)
649      CALL cp_fm_struct_release(ex_struct)
650
651!  Perform a partial clean-up of the donor_state
652      CALL dbcsr_release(matrix_tdp)
653
654      CALL timestop(handle)
655
656   END SUBROUTINE solve_xas_tdp_prob
657
658! **************************************************************************************************
659!> \brief Returns the scaling to apply to normalize the LR eigenvectors.
660!> \param scaling the scaling array to apply
661!> \param lr_coeffs the linear response coefficients as a fm
662!> \param donor_state ...
663!> \note The LR coeffs are normalized when c^T G c = +- 1, G is the metric, c = c^- for TDA and
664!>       c = c^+ - c^- for the full problem
665! **************************************************************************************************
666   SUBROUTINE get_normal_scaling(scaling, lr_coeffs, donor_state)
667
668      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: scaling
669      TYPE(cp_fm_type), POINTER                          :: lr_coeffs
670      TYPE(donor_state_type), POINTER                    :: donor_state
671
672      CHARACTER(len=*), PARAMETER :: routineN = 'get_normal_scaling', &
673         routineP = moduleN//':'//routineN
674
675      INTEGER                                            :: nrow, nscal, nvals
676      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag
677      TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
678      TYPE(cp_fm_struct_type), POINTER                   :: norm_struct, work_struct
679      TYPE(cp_fm_type), POINTER                          :: fm_norm, work
680      TYPE(cp_para_env_type), POINTER                    :: para_env
681
682      NULLIFY (para_env, blacs_env, norm_struct, work, fm_norm, work_struct)
683
684!  Creating the matrix structures and initializing the work matrices
685      CALL cp_fm_get_info(lr_coeffs, context=blacs_env, para_env=para_env, &
686                          matrix_struct=work_struct, ncol_global=nvals, nrow_global=nrow)
687      CALL cp_fm_struct_create(norm_struct, para_env=para_env, context=blacs_env, &
688                               nrow_global=nvals, ncol_global=nvals)
689
690      CALL cp_fm_create(work, work_struct)
691      CALL cp_fm_create(fm_norm, norm_struct)
692
693!  Taking c^T * G * C
694      CALL cp_dbcsr_sm_fm_multiply(donor_state%metric(1)%matrix, lr_coeffs, work, ncol=nvals)
695      CALL cp_gemm('T', 'N', nvals, nvals, nrow, 1.0_dp, lr_coeffs, work, 0.0_dp, fm_norm)
696
697!  Computing the needed scaling
698      ALLOCATE (diag(nvals))
699      CALL cp_fm_get_diag(fm_norm, diag)
700      WHERE (ABS(diag) > 1.0E-8_dp) diag = 1.0_dp/SQRT(ABS(diag))
701
702      nscal = SIZE(scaling)
703      scaling(1:nscal) = diag(1:nscal)
704
705!  Clean-up
706      CALL cp_fm_release(work)
707      CALL cp_fm_release(fm_norm)
708      CALL cp_fm_struct_release(norm_struct)
709
710   END SUBROUTINE get_normal_scaling
711
712! **************************************************************************************************
713!> \brief This subroutine computes the row/column block structure as well as the dbcsr ditrinution
714!>        for the submatrices making up the generalized XAS TDP eigenvalue problem. They all share
715!>        the same properties, which are based on the replication of the KS matrix. Stored in the
716!>        donor_state_type
717!> \param donor_state ...
718!> \param do_os whether this is a open-shell calculation
719!> \param qs_env ...
720! **************************************************************************************************
721   SUBROUTINE compute_submat_dist_and_blk_size(donor_state, do_os, qs_env)
722
723      TYPE(donor_state_type), POINTER                    :: donor_state
724      LOGICAL, INTENT(IN)                                :: do_os
725      TYPE(qs_environment_type), POINTER                 :: qs_env
726
727      CHARACTER(len=*), PARAMETER :: routineN = 'compute_submat_dist_and_blk_size', &
728         routineP = moduleN//':'//routineN
729
730      INTEGER                                            :: group, i, nao, nblk_row, ndo_mo, nspins, &
731                                                            scol_dist, srow_dist
732      INTEGER, DIMENSION(:), POINTER                     :: col_dist, col_dist_sub, row_blk_size, &
733                                                            row_dist, row_dist_sub, submat_blk_size
734      INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
735      TYPE(dbcsr_distribution_type), POINTER             :: dbcsr_dist, submat_dist
736      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_ks
737
738      NULLIFY (matrix_ks, dbcsr_dist, row_blk_size, row_dist, col_dist, pgrid, col_dist_sub)
739      NULLIFY (row_dist_sub, submat_dist, submat_blk_size)
740
741!  The submatrices are indexed by M_{pi sig,qj tau}, where p,q label basis functions and i,j donor
742!  MOs and sig,tau the spins. For each spin and donor MOs combination, one has a submatrix of the
743!  size of the KS matrix (nao x nao) with the same dbcsr block structure
744
745!  Initialization
746      ndo_mo = donor_state%ndo_mo
747      CALL get_qs_env(qs_env=qs_env, matrix_ks=matrix_ks, dbcsr_dist=dbcsr_dist)
748      CALL dbcsr_get_info(matrix_ks(1)%matrix, row_blk_size=row_blk_size)
749      CALL dbcsr_distribution_get(dbcsr_dist, row_dist=row_dist, col_dist=col_dist, group=group, &
750                                  pgrid=pgrid)
751      nao = SUM(row_blk_size)
752      nblk_row = SIZE(row_blk_size)
753      srow_dist = SIZE(row_dist)
754      scol_dist = SIZE(col_dist)
755      nspins = 1; IF (do_os) nspins = 2
756
757!  Creation if submatrix block size and col/row distribution
758      ALLOCATE (submat_blk_size(ndo_mo*nspins*nblk_row))
759      ALLOCATE (row_dist_sub(ndo_mo*nspins*srow_dist))
760      ALLOCATE (col_dist_sub(ndo_mo*nspins*scol_dist))
761
762      DO i = 1, ndo_mo*nspins
763         submat_blk_size((i - 1)*nblk_row + 1:i*nblk_row) = row_blk_size
764         row_dist_sub((i - 1)*srow_dist + 1:i*srow_dist) = row_dist
765         col_dist_sub((i - 1)*scol_dist + 1:i*scol_dist) = col_dist
766      END DO
767
768!  Create the submatrix dbcsr distribution
769      ALLOCATE (submat_dist)
770      CALL dbcsr_distribution_new(submat_dist, group=group, pgrid=pgrid, row_dist=row_dist_sub, &
771                                  col_dist=col_dist_sub)
772
773      donor_state%dbcsr_dist => submat_dist
774      donor_state%blk_size => submat_blk_size
775
776!  Clean-up
777      DEALLOCATE (col_dist_sub, row_dist_sub)
778
779   END SUBROUTINE compute_submat_dist_and_blk_size
780
781! **************************************************************************************************
782!> \brief Returns the projector on the unperturbed unoccupied ground state Q = 1 - SP on the block
783!>        diagonal of a matrix with the standard size and distribution.
784!> \param proj_Q the matrix with the projector
785!> \param donor_state ...
786!> \param do_os whether it is open-shell calculation
787!> \param xas_tdp_env ...
788!> \param do_sf whether the projector should be prepared for spin-flip excitations
789!> \note In the spin-flip case, the alpha spins are sent to beta and vice-versa. The structure of
790!>       the projector is changed by swapping the alpha-alpha with the beta-beta block, which
791!>       naturally take the spin change into account. Only for open-shell.
792! **************************************************************************************************
793   SUBROUTINE get_q_projector(proj_Q, donor_state, do_os, xas_tdp_env, do_sf)
794
795      TYPE(dbcsr_type), INTENT(INOUT)                    :: proj_Q
796      TYPE(donor_state_type), POINTER                    :: donor_state
797      LOGICAL, INTENT(IN)                                :: do_os
798      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
799      LOGICAL, INTENT(IN), OPTIONAL                      :: do_sf
800
801      CHARACTER(len=*), PARAMETER :: routineN = 'get_q_projector', &
802         routineP = moduleN//':'//routineN
803
804      INTEGER                                            :: blk, handle, iblk, imo, ispin, jblk, &
805                                                            nblk_row, ndo_mo, nspins
806      INTEGER, DIMENSION(:), POINTER                     :: blk_size_q, row_blk_size
807      LOGICAL                                            :: found_block, my_dosf
808      REAL(dp), DIMENSION(:), POINTER                    :: work_block
809      TYPE(dbcsr_distribution_type), POINTER             :: dist_q
810      TYPE(dbcsr_iterator_type)                          :: iter
811      TYPE(dbcsr_type), POINTER                          :: one_sp
812
813      NULLIFY (work_block, one_sp, row_blk_size, dist_q, blk_size_q)
814
815      CALL timeset(routineN, handle)
816
817!  Initialization
818      nspins = 1; IF (do_os) nspins = 2
819      ndo_mo = donor_state%ndo_mo
820      one_sp => xas_tdp_env%q_projector(1)%matrix
821      CALL dbcsr_get_info(one_sp, row_blk_size=row_blk_size)
822      nblk_row = SIZE(row_blk_size)
823      my_dosf = .FALSE.
824      IF (PRESENT(do_sf)) my_dosf = do_sf
825      dist_q => donor_state%dbcsr_dist
826      blk_size_q => donor_state%blk_size
827
828      ! the projector is not symmetric
829      CALL dbcsr_create(matrix=proj_Q, name="PROJ Q", matrix_type="N", dist=dist_q, &
830                        row_blk_size=blk_size_q, col_blk_size=blk_size_q)
831
832!  Fill the projector by looping over 1-SP and duplicating blocks. (all on the spin-MO block diagonal)
833      DO ispin = 1, nspins
834         one_sp => xas_tdp_env%q_projector(ispin)%matrix
835
836         !if spin-flip, swap the alpha-alpha and beta-beta blocks
837         IF (my_dosf) one_sp => xas_tdp_env%q_projector(3 - ispin)%matrix
838
839         CALL dbcsr_iterator_start(iter, one_sp)
840         DO WHILE (dbcsr_iterator_blocks_left(iter))
841
842            CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
843
844            ! get the block
845            CALL dbcsr_get_block_p(one_sp, iblk, jblk, work_block, found_block)
846
847            IF (found_block) THEN
848
849               DO imo = 1, ndo_mo
850                  CALL dbcsr_put_block(proj_Q, ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + iblk, &
851                                       ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + jblk, work_block)
852               END DO
853
854            END IF
855            NULLIFY (work_block)
856
857         END DO !iterator
858         CALL dbcsr_iterator_stop(iter)
859      END DO !ispin
860
861      CALL dbcsr_finalize(proj_Q)
862
863      CALL timestop(handle)
864
865   END SUBROUTINE get_q_projector
866
867! **************************************************************************************************
868!> \brief Builds the matrix containing the ground state contribution to the matrix_tdp (aka matrix A)
869!>         => A_{pis,qjt} = (F_pq*delta_ij - epsilon_ij*S_pq) delta_st, where:
870!>         F is the KS matrix
871!>         S is the overlap matrix
872!>         epsilon_ij is the donor MO eigenvalue
873!>         i,j labels the MOs, p,q the AOs and s,t the spins
874!> \param matrix_a  pointer to a DBCSR matrix containing A
875!> \param donor_state ...
876!> \param do_os ...
877!> \param qs_env ...
878!> \param do_sf whether the ground state contribution should accommodate spin-flip
879!> \note Even localized non-canonical MOs are diagonalized in their subsapce => eps_ij = eps_ii*delta_ij
880! **************************************************************************************************
881   SUBROUTINE build_gs_contribution(matrix_a, donor_state, do_os, qs_env, do_sf)
882
883      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_a
884      TYPE(donor_state_type), POINTER                    :: donor_state
885      LOGICAL, INTENT(IN)                                :: do_os
886      TYPE(qs_environment_type), POINTER                 :: qs_env
887      LOGICAL, INTENT(IN), OPTIONAL                      :: do_sf
888
889      CHARACTER(len=*), PARAMETER :: routineN = 'build_gs_contribution', &
890         routineP = moduleN//':'//routineN
891
892      INTEGER                                            :: blk, handle, iblk, imo, ispin, jblk, &
893                                                            nblk_row, ndo_mo, nspins
894      INTEGER, DIMENSION(:), POINTER                     :: blk_size_a, row_blk_size
895      LOGICAL                                            :: found_block, my_dosf
896      REAL(dp), DIMENSION(:), POINTER                    :: energy_evals, work_block
897      TYPE(dbcsr_distribution_type), POINTER             :: dbcsr_dist, dist_a
898      TYPE(dbcsr_iterator_type)                          :: iter
899      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: m_ks, matrix_ks, matrix_s
900      TYPE(dbcsr_type)                                   :: work_matrix
901
902      NULLIFY (matrix_ks, dbcsr_dist, row_blk_size, work_block, energy_evals, matrix_s, m_ks)
903      NULLIFY (dist_a, blk_size_a)
904
905      !  Note: matrix A is symmetric and block diagonal. If ADMM, the ks matrix is the total one,
906      !        and it is corrected for eigenvalues (done at xas_tdp_init)
907
908      CALL timeset(routineN, handle)
909
910!  Initialization
911      nspins = 1; IF (do_os) nspins = 2
912      ndo_mo = donor_state%ndo_mo
913      CALL get_qs_env(qs_env=qs_env, matrix_ks=matrix_ks, matrix_s=matrix_s, dbcsr_dist=dbcsr_dist)
914      CALL dbcsr_get_info(matrix_s(1)%matrix, row_blk_size=row_blk_size)
915      nblk_row = SIZE(row_blk_size)
916      dist_a => donor_state%dbcsr_dist
917      blk_size_a => donor_state%blk_size
918
919!  Prepare the KS matrix pointer
920      ALLOCATE (m_ks(nspins))
921      m_ks(1)%matrix => matrix_ks(1)%matrix
922      IF (do_os) m_ks(2)%matrix => matrix_ks(2)%matrix
923
924! If spin-flip, swap the KS alpha-alpha and beta-beta quadrants.
925      my_dosf = .FALSE.
926      IF (PRESENT(do_sf)) my_dosf = do_sf
927      IF (my_dosf .AND. do_os) THEN
928         m_ks(1)%matrix => matrix_ks(2)%matrix
929         m_ks(2)%matrix => matrix_ks(1)%matrix
930      END IF
931
932!  Creating the symmetric matrix A (and work)
933      CALL dbcsr_create(matrix=matrix_a, name="MATRIX A", matrix_type="S", dist=dist_a, &
934                        row_blk_size=blk_size_a, col_blk_size=blk_size_a)
935      CALL dbcsr_create(matrix=work_matrix, name="WORK MAT", matrix_type="S", dist=dist_a, &
936                        row_blk_size=blk_size_a, col_blk_size=blk_size_a)
937
938      DO ispin = 1, nspins
939
940!     Loop over the blocks of KS and put them on the spin-MO block diagonal of matrix A
941         CALL dbcsr_iterator_start(iter, m_ks(ispin)%matrix)
942         DO WHILE (dbcsr_iterator_blocks_left(iter))
943
944            CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
945
946!           Get the block
947            CALL dbcsr_get_block_p(m_ks(ispin)%matrix, iblk, jblk, work_block, found_block)
948
949            IF (found_block) THEN
950
951!              The KS matrix only appears on diagonal of matrix A => loop over II donor MOs
952               DO imo = 1, ndo_mo
953
954!                 Put the block as it is
955                  CALL dbcsr_put_block(matrix_a, ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + iblk, &
956                                       ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + jblk, work_block)
957
958               END DO !imo
959            END IF !found_block
960            NULLIFY (work_block)
961         END DO ! iteration on KS blocks
962         CALL dbcsr_iterator_stop(iter)
963
964!     Loop over the blocks of S and put them on the block diagonal of work
965         energy_evals => donor_state%energy_evals(:, ispin)
966
967         CALL dbcsr_iterator_start(iter, matrix_s(1)%matrix)
968         DO WHILE (dbcsr_iterator_blocks_left(iter))
969
970            CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
971
972!           Get the block
973            CALL dbcsr_get_block_p(matrix_s(1)%matrix, iblk, jblk, work_block, found_block)
974
975            IF (found_block) THEN
976
977!              Add S matrix on block diagonal as epsilon_ii*S_pq
978               DO imo = 1, ndo_mo
979
980                  CALL dbcsr_put_block(work_matrix, ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + iblk, &
981                                       ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + jblk, &
982                                       energy_evals(imo)*work_block)
983               END DO !imo
984            END IF !found block
985            NULLIFY (work_block)
986         END DO ! iteration on S blocks
987         CALL dbcsr_iterator_stop(iter)
988
989      END DO !ispin
990      CALL dbcsr_finalize(matrix_a)
991      CALL dbcsr_finalize(work_matrix)
992
993!  Take matrix_a = matrix_a - work
994      CALL dbcsr_add(matrix_a, work_matrix, 1.0_dp, -1.0_dp)
995      CALL dbcsr_finalize(matrix_a)
996
997!  Clean-up
998      CALL dbcsr_release(work_matrix)
999      DEALLOCATE (m_ks)
1000
1001      CALL timestop(handle)
1002
1003   END SUBROUTINE build_gs_contribution
1004
1005! **************************************************************************************************
1006!> \brief Creates the metric (aka  matrix G) needed for the generalized eigenvalue problem and inverse
1007!>         => G_{pis,qjt} = S_pq*delta_ij*delta_st
1008!> \param matrix_g dbcsr matrix containing G
1009!> \param donor_state ...
1010!> \param qs_env ...
1011!> \param do_os if open-shell calculation
1012!> \param do_inv if the inverse of G should be computed
1013! **************************************************************************************************
1014   SUBROUTINE build_metric(matrix_g, donor_state, qs_env, do_os, do_inv)
1015
1016      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_g
1017      TYPE(donor_state_type), POINTER                    :: donor_state
1018      TYPE(qs_environment_type), POINTER                 :: qs_env
1019      LOGICAL, INTENT(IN)                                :: do_os
1020      LOGICAL, INTENT(IN), OPTIONAL                      :: do_inv
1021
1022      CHARACTER(len=*), PARAMETER :: routineN = 'build_metric', routineP = moduleN//':'//routineN
1023
1024      INTEGER                                            :: blk, handle, i, iblk, jblk, nao, &
1025                                                            nblk_row, ndo_mo, nspins
1026      INTEGER, DIMENSION(:), POINTER                     :: blk_size_g, row_blk_size
1027      LOGICAL                                            :: found_block, my_do_inv
1028      REAL(dp), DIMENSION(:), POINTER                    :: work_block
1029      TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
1030      TYPE(cp_para_env_type), POINTER                    :: para_env
1031      TYPE(dbcsr_distribution_type), POINTER             :: dist_g
1032      TYPE(dbcsr_iterator_type)                          :: iter
1033      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
1034      TYPE(dbcsr_type)                                   :: matrix_sinv
1035
1036      NULLIFY (matrix_s, row_blk_size, work_block, para_env, blacs_env, dist_g, blk_size_g)
1037
1038      CALL timeset(routineN, handle)
1039
1040!  Initialization
1041      nspins = 1; IF (do_os) nspins = 2
1042      ndo_mo = donor_state%ndo_mo
1043      CALL get_qs_env(qs_env=qs_env, matrix_s=matrix_s)
1044      CALL dbcsr_get_info(matrix_s(1)%matrix, row_blk_size=row_blk_size, nfullrows_total=nao)
1045      nblk_row = SIZE(row_blk_size)
1046      my_do_inv = .FALSE.
1047      IF (PRESENT(do_inv)) my_do_inv = do_inv
1048      dist_g => donor_state%dbcsr_dist
1049      blk_size_g => donor_state%blk_size
1050
1051!  Creating the symmetric  matrices G and G^-1 with the right size and distribution
1052      ALLOCATE (matrix_g(1)%matrix)
1053      CALL dbcsr_create(matrix=matrix_g(1)%matrix, name="MATRIX G", matrix_type="S", dist=dist_g, &
1054                        row_blk_size=blk_size_g, col_blk_size=blk_size_g)
1055
1056!  Fill the matrices G by looping over the block of S and putting them on the diagonal
1057      CALL dbcsr_iterator_start(iter, matrix_s(1)%matrix)
1058      DO WHILE (dbcsr_iterator_blocks_left(iter))
1059
1060         CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
1061
1062!        Get the block
1063         CALL dbcsr_get_block_p(matrix_s(1)%matrix, iblk, jblk, work_block, found_block)
1064
1065         IF (found_block) THEN
1066
1067!           Go over the diagonal of G => donor MOs ii, spin ss
1068            DO i = 1, ndo_mo*nspins
1069               CALL dbcsr_put_block(matrix_g(1)%matrix, (i - 1)*nblk_row + iblk, (i - 1)*nblk_row + jblk, work_block)
1070            END DO
1071
1072         END IF
1073         NULLIFY (work_block)
1074
1075      END DO ! dbcsr_iterator
1076      CALL dbcsr_iterator_stop(iter)
1077
1078!  Finalize
1079      CALL dbcsr_finalize(matrix_g(1)%matrix)
1080
1081!  If the inverse of G is required, do the same as above with the inverse
1082      IF (my_do_inv) THEN
1083
1084         CPASSERT(SIZE(matrix_g) == 2)
1085
1086         ! Create the matrix
1087         ALLOCATE (matrix_g(2)%matrix)
1088         CALL dbcsr_create(matrix=matrix_g(2)%matrix, name="MATRIX GINV", matrix_type="S", &
1089                           dist=dist_g, row_blk_size=blk_size_g, col_blk_size=blk_size_g)
1090
1091         ! Invert the overlap matrix
1092         CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
1093         CALL dbcsr_copy(matrix_sinv, matrix_s(1)%matrix)
1094         CALL cp_dbcsr_cholesky_decompose(matrix_sinv, para_env=para_env, blacs_env=blacs_env)
1095         CALL cp_dbcsr_cholesky_invert(matrix_sinv, para_env=para_env, blacs_env=blacs_env, upper_to_full=.TRUE.)
1096
1097!     Fill the matrices G^-1 by looping over the block of S^-1 and putting them on the diagonal
1098         CALL dbcsr_iterator_start(iter, matrix_sinv)
1099         DO WHILE (dbcsr_iterator_blocks_left(iter))
1100
1101            CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
1102
1103!           Get the block
1104            CALL dbcsr_get_block_p(matrix_sinv, iblk, jblk, work_block, found_block)
1105
1106            IF (found_block) THEN
1107
1108!              Go over the diagonal of G => donor MOs ii spin ss
1109               DO i = 1, ndo_mo*nspins
1110                  CALL dbcsr_put_block(matrix_g(2)%matrix, (i - 1)*nblk_row + iblk, (i - 1)*nblk_row + jblk, work_block)
1111               END DO
1112
1113            END IF
1114            NULLIFY (work_block)
1115
1116         END DO ! dbcsr_iterator
1117         CALL dbcsr_iterator_stop(iter)
1118
1119         !  Finalize
1120         CALL dbcsr_finalize(matrix_g(2)%matrix)
1121
1122         !  Clean-up
1123         CALL dbcsr_release(matrix_sinv)
1124      END IF !do_inv
1125
1126      CALL timestop(handle)
1127
1128   END SUBROUTINE build_metric
1129
1130! **************************************************************************************************
1131!> \brief Builds the auxiliary matrix (A-D+E)^+0.5 needed for the transofrmation of the
1132!>        full-TDDFT problem into an Hermitian one
1133!> \param threshold a threshold for allowed negative eigenvalues
1134!> \param sx the amount of exact exchange
1135!> \param matrix_a the ground state contribution matrix A
1136!> \param matrix_d the on-diagonal exchange kernel matrix (ab|IJ)
1137!> \param matrix_e the off-diagonal exchange kernel matrix (aJ|Ib)
1138!> \param do_hfx if exact exchange is included
1139!> \param proj_Q ...
1140!> \param work ...
1141!> \param donor_state ...
1142!> \param eps_filter for the dbcsr multiplication
1143!> \param qs_env ...
1144! **************************************************************************************************
1145   SUBROUTINE build_aux_matrix(threshold, sx, matrix_a, matrix_d, matrix_e, do_hfx, proj_Q, &
1146                               work, donor_state, eps_filter, qs_env)
1147
1148      REAL(dp), INTENT(IN)                               :: threshold, sx
1149      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_a, matrix_d, matrix_e
1150      LOGICAL, INTENT(IN)                                :: do_hfx
1151      TYPE(dbcsr_type), INTENT(INOUT)                    :: proj_Q, work
1152      TYPE(donor_state_type), POINTER                    :: donor_state
1153      REAL(dp), INTENT(IN)                               :: eps_filter
1154      TYPE(qs_environment_type), POINTER                 :: qs_env
1155
1156      CHARACTER(len=*), PARAMETER :: routineN = 'build_aux_matrix', &
1157         routineP = moduleN//':'//routineN
1158
1159      INTEGER                                            :: handle, ndep
1160      REAL(dp)                                           :: evals(2)
1161      TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
1162      TYPE(cp_para_env_type), POINTER                    :: para_env
1163      TYPE(dbcsr_type)                                   :: tmp_mat
1164
1165      NULLIFY (blacs_env, para_env)
1166
1167      CALL timeset(routineN, handle)
1168
1169      CALL dbcsr_copy(tmp_mat, matrix_a)
1170      IF (do_hfx) THEN
1171         CALL dbcsr_add(tmp_mat, matrix_d, 1.0_dp, -1.0_dp*sx) !scaled hfx
1172         CALL dbcsr_add(tmp_mat, matrix_e, 1.0_dp, 1.0_dp*sx)
1173      END IF
1174
1175      ! Take the product with the Q projector:
1176      CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, tmp_mat, 0.0_dp, work, filter_eps=eps_filter)
1177      CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, tmp_mat, filter_eps=eps_filter)
1178
1179      ! Actually computing and storing the auxiliary matrix
1180      ALLOCATE (donor_state%matrix_aux)
1181      CALL dbcsr_create(matrix=donor_state%matrix_aux, template=matrix_a, name="MAT AUX")
1182
1183      CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
1184
1185      ! good quality sqrt
1186      CALL cp_dbcsr_power(tmp_mat, 0.5_dp, threshold, ndep, para_env, blacs_env, eigenvalues=evals)
1187
1188      CALL dbcsr_copy(donor_state%matrix_aux, tmp_mat)
1189
1190      ! Warn the user if matrix not positive semi-definite
1191      IF (evals(1) < 0.0_dp .AND. ABS(evals(1)) > threshold) THEN
1192         CPWARN("The full TDDFT problem might not have been soundly turned Hermitian. Try TDA.")
1193      END IF
1194
1195      ! clean-up
1196      CALL dbcsr_release(tmp_mat)
1197
1198      CALL timestop(handle)
1199
1200   END SUBROUTINE build_aux_matrix
1201
1202! **************************************************************************************************
1203!> \brief Includes the SOC effects on the precomputed spin-conserving and spin-flip excitations
1204!>        from an open-shell calculation (UKS or ROKS). This is a perturbative treatment
1205!> \param donor_state ...
1206!> \param xas_tdp_env ...
1207!> \param xas_tdp_control ...
1208!> \param qs_env ...
1209!> \note Using AMEWs, build an hermitian matrix with all excited states SOC coupling + the
1210!>       excitation energies on the diagonal. Then diagonalize it to get the new excitation
1211!>       energies and corresponding linear combinations of lr_coeffs.
1212!>       The AMEWs are normalized
1213!>       Only for open-shell calculations
1214! **************************************************************************************************
1215   SUBROUTINE include_os_soc(donor_state, xas_tdp_env, xas_tdp_control, qs_env)
1216
1217      TYPE(donor_state_type), POINTER                    :: donor_state
1218      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
1219      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
1220      TYPE(qs_environment_type), POINTER                 :: qs_env
1221
1222      CHARACTER(len=*), PARAMETER :: routineN = 'include_os_soc', routineP = moduleN//':'//routineN
1223
1224      INTEGER                                            :: group, handle, homo, iex, isc, isf, nao, &
1225                                                            ndo_mo, ndo_so, nex, npcols, nprows, &
1226                                                            nsc, nsf, ntot, tas(2), tbs(2)
1227      INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, col_dist, row_blk_size, &
1228                                                            row_dist, row_dist_new
1229      INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
1230      LOGICAL                                            :: do_roks, do_uks
1231      REAL(dp)                                           :: eps_filter, gs_sum, soc
1232      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag, tmp_evals
1233      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: domo_soc_x, domo_soc_y, domo_soc_z, &
1234                                                            gsex_block
1235      REAL(dp), DIMENSION(:), POINTER                    :: sc_evals, sf_evals
1236      TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
1237      TYPE(cp_cfm_type), POINTER                         :: evecs_cfm, pert_cfm
1238      TYPE(cp_fm_struct_type), POINTER                   :: full_struct, gsex_struct, prod_struct, &
1239                                                            vec_struct, work_struct
1240      TYPE(cp_fm_type), POINTER :: gs_coeffs, gsex_fm, img_fm, mo_coeff, prod_work, real_fm, &
1241         sc_coeffs, sf_coeffs, vec_soc_x, vec_soc_y, vec_soc_z, vec_work, work_fm
1242      TYPE(cp_para_env_type), POINTER                    :: para_env
1243      TYPE(dbcsr_distribution_type), POINTER             :: coeffs_dist, dbcsr_dist, prod_dist
1244      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
1245      TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
1246      TYPE(dbcsr_type), POINTER                          :: dbcsr_ovlp, dbcsr_prod, dbcsr_sc, &
1247                                                            dbcsr_sf, dbcsr_tmp, dbcsr_work, &
1248                                                            orb_soc_x, orb_soc_y, orb_soc_z
1249      TYPE(mo_set_p_type), DIMENSION(:), POINTER         :: mos
1250
1251      NULLIFY (gs_coeffs, sc_coeffs, sf_coeffs, matrix_s, orb_soc_x, orb_soc_y, orb_soc_z, mos)
1252      NULLIFY (real_fm, img_fm, full_struct, para_env, blacs_env, mo_coeff, vec_soc_x, work_fm)
1253      NULLIFY (sc_evals, sf_evals, vec_work, prod_work, vec_struct, prod_struct, vec_soc_y)
1254      NULLIFY (vec_soc_z, pert_cfm, evecs_cfm, work_struct, gsex_struct, col_dist, row_dist)
1255      NULLIFY (col_blk_size, row_blk_size, row_dist_new, pgrid, dbcsr_sc, dbcsr_sf, dbcsr_work)
1256      NULLIFY (dbcsr_tmp, dbcsr_ovlp, dbcsr_prod)
1257
1258      CALL timeset(routineN, handle)
1259
1260! Initialization
1261      sc_coeffs => donor_state%sc_coeffs
1262      sf_coeffs => donor_state%sf_coeffs
1263      sc_evals => donor_state%sc_evals
1264      sf_evals => donor_state%sf_evals
1265      nsc = SIZE(sc_evals)
1266      nsf = SIZE(sf_evals)
1267      ntot = 1 + nsc + nsf
1268      nex = nsc !by contrutciotn nsc == nsf, but keep 2 counts for clarity
1269      ndo_mo = donor_state%ndo_mo
1270      ndo_so = 2*ndo_mo
1271      CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env, mos=mos, matrix_s=matrix_s)
1272      CALL dbcsr_get_info(matrix_s(1)%matrix, nfullrows_total=nao)
1273      orb_soc_x => xas_tdp_env%orb_soc(1)%matrix
1274      orb_soc_y => xas_tdp_env%orb_soc(2)%matrix
1275      orb_soc_z => xas_tdp_env%orb_soc(3)%matrix
1276      do_roks = xas_tdp_control%do_roks
1277      do_uks = xas_tdp_control%do_uks
1278      eps_filter = xas_tdp_control%eps_filter
1279
1280      ! For the GS coeffs, we use the same structure both for ROKS and UKS here => allows us to write
1281      ! general code later on, and not use IF (do_roks) statements every second line
1282      IF (do_uks) gs_coeffs => donor_state%gs_coeffs
1283      IF (do_roks) THEN
1284         CALL cp_fm_struct_create(vec_struct, context=blacs_env, para_env=para_env, &
1285                                  nrow_global=nao, ncol_global=ndo_so)
1286         CALL cp_fm_create(gs_coeffs, vec_struct)
1287
1288         ! only alpha donor MOs are stored, need to copy them intoboth the alpha and the beta slot
1289         CALL cp_fm_to_fm_submat(msource=donor_state%gs_coeffs, mtarget=gs_coeffs, nrow=nao, &
1290                                 ncol=ndo_mo, s_firstrow=1, s_firstcol=1, t_firstrow=1, &
1291                                 t_firstcol=1)
1292         CALL cp_fm_to_fm_submat(msource=donor_state%gs_coeffs, mtarget=gs_coeffs, nrow=nao, &
1293                                 ncol=ndo_mo, s_firstrow=1, s_firstcol=1, t_firstrow=1, &
1294                                 t_firstcol=ndo_mo + 1)
1295
1296         CALL cp_fm_struct_release(vec_struct)
1297      END IF
1298
1299! Creating the real and the imaginary part of the SOC perturbation matrix
1300      CALL cp_fm_struct_create(full_struct, context=blacs_env, para_env=para_env, &
1301                               nrow_global=ntot, ncol_global=ntot)
1302      CALL cp_fm_create(real_fm, full_struct)
1303      CALL cp_fm_create(img_fm, full_struct)
1304
1305! Put the excitation energies on the diagonal of the real  matrix. Element 1,1 is the ground state
1306      DO isc = 1, nsc
1307         CALL cp_fm_set_element(real_fm, 1 + isc, 1 + isc, sc_evals(isc))
1308      END DO
1309      DO isf = 1, nsf
1310         CALL cp_fm_set_element(real_fm, 1 + nsc + isf, 1 + nsc + isf, sf_evals(isf))
1311      END DO
1312
1313! Create the bdcsr machinery
1314      CALL get_qs_env(qs_env, dbcsr_dist=dbcsr_dist)
1315      CALL dbcsr_distribution_get(dbcsr_dist, group=group, row_dist=row_dist, pgrid=pgrid, &
1316                                  npcols=npcols, nprows=nprows)
1317      ALLOCATE (col_dist(nex), row_dist_new(nex))
1318      DO iex = 1, nex
1319         col_dist(iex) = MODULO(npcols - iex, npcols)
1320         row_dist_new(iex) = MODULO(nprows - iex, nprows)
1321      END DO
1322      ALLOCATE (coeffs_dist, prod_dist)
1323      CALL dbcsr_distribution_new(coeffs_dist, group=group, pgrid=pgrid, row_dist=row_dist, &
1324                                  col_dist=col_dist)
1325      CALL dbcsr_distribution_new(prod_dist, group=group, pgrid=pgrid, row_dist=row_dist_new, &
1326                                  col_dist=col_dist)
1327
1328      !Create the matrices
1329      ALLOCATE (col_blk_size(nex))
1330      col_blk_size = ndo_so
1331      CALL dbcsr_get_info(matrix_s(1)%matrix, row_blk_size=row_blk_size)
1332
1333      ALLOCATE (dbcsr_sc, dbcsr_sf, dbcsr_work, dbcsr_ovlp, dbcsr_tmp, dbcsr_prod)
1334      CALL dbcsr_create(matrix=dbcsr_sc, name="SPIN CONS", matrix_type="N", dist=coeffs_dist, &
1335                        row_blk_size=row_blk_size, col_blk_size=col_blk_size)
1336      CALL dbcsr_create(matrix=dbcsr_sf, name="SPIN FLIP", matrix_type="N", dist=coeffs_dist, &
1337                        row_blk_size=row_blk_size, col_blk_size=col_blk_size)
1338      CALL dbcsr_create(matrix=dbcsr_work, name="WORK", matrix_type="N", dist=coeffs_dist, &
1339                        row_blk_size=row_blk_size, col_blk_size=col_blk_size)
1340      CALL dbcsr_create(matrix=dbcsr_prod, name="PROD", matrix_type="N", dist=prod_dist, &
1341                        row_blk_size=col_blk_size, col_blk_size=col_blk_size)
1342      CALL dbcsr_create(matrix=dbcsr_ovlp, name="OVLP", matrix_type="N", dist=prod_dist, &
1343                        row_blk_size=col_blk_size, col_blk_size=col_blk_size)
1344
1345      col_blk_size = 1
1346      CALL dbcsr_create(matrix=dbcsr_tmp, name="TMP", matrix_type="N", dist=prod_dist, &
1347                        row_blk_size=col_blk_size, col_blk_size=col_blk_size)
1348      CALL dbcsr_reserve_all_blocks(dbcsr_tmp)
1349
1350      dbcsr_soc_package%dbcsr_sc => dbcsr_sc
1351      dbcsr_soc_package%dbcsr_sf => dbcsr_sf
1352      dbcsr_soc_package%dbcsr_work => dbcsr_work
1353      dbcsr_soc_package%dbcsr_ovlp => dbcsr_ovlp
1354      dbcsr_soc_package%dbcsr_prod => dbcsr_prod
1355      dbcsr_soc_package%dbcsr_tmp => dbcsr_tmp
1356
1357      !Filling the coeffs matrices by copying from the stored fms
1358      CALL copy_fm_to_dbcsr(sc_coeffs, dbcsr_sc)
1359      CALL copy_fm_to_dbcsr(sf_coeffs, dbcsr_sf)
1360
1361! Precompute what we can before looping over excited states.
1362      ! Need to compute the scalar: sum_i sum_sigma <phi^0_i,sigma|SOC|phi^0_i,sigma>, where all
1363      ! occupied MOs are taken into account
1364
1365      !start with the alpha MOs
1366      CALL get_mo_set(mos(1)%mo_set, mo_coeff=mo_coeff, homo=homo)
1367      ALLOCATE (diag(homo))
1368      CALL cp_fm_get_info(mo_coeff, matrix_struct=vec_struct)
1369      CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
1370                               nrow_global=homo, ncol_global=homo)
1371      CALL cp_fm_create(vec_work, vec_struct)
1372      CALL cp_fm_create(prod_work, prod_struct)
1373
1374      ! <alpha|SOC_z|alpha> => spin integration yields +1
1375      CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, mo_coeff, vec_work, ncol=homo)
1376      CALL cp_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, vec_work, 0.0_dp, prod_work)
1377      CALL cp_fm_get_diag(prod_work, diag)
1378      gs_sum = SUM(diag)
1379
1380      CALL cp_fm_release(vec_work)
1381      CALL cp_fm_release(prod_work)
1382      CALL cp_fm_struct_release(prod_struct)
1383      DEALLOCATE (diag)
1384      NULLIFY (vec_struct)
1385
1386      ! Now do the same with the beta gs coeffs
1387      CALL get_mo_set(mos(2)%mo_set, mo_coeff=mo_coeff, homo=homo)
1388      ALLOCATE (diag(homo))
1389      CALL cp_fm_get_info(mo_coeff, matrix_struct=vec_struct)
1390      CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
1391                               nrow_global=homo, ncol_global=homo)
1392      CALL cp_fm_create(vec_work, vec_struct)
1393      CALL cp_fm_create(prod_work, prod_struct)
1394
1395      ! <beta|SOC_z|beta> => spin integration yields -1
1396      CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, mo_coeff, vec_work, ncol=homo)
1397      CALL cp_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, vec_work, 0.0_dp, prod_work)
1398      CALL cp_fm_get_diag(prod_work, diag)
1399      gs_sum = gs_sum - SUM(diag) ! -1 because of spin integration
1400
1401      CALL cp_fm_release(vec_work)
1402      CALL cp_fm_release(prod_work)
1403      CALL cp_fm_struct_release(prod_struct)
1404      DEALLOCATE (diag)
1405
1406      ! Need to compute: <phi^0_Isigma|SOC|phi^0_Jtau> for the donor MOs
1407
1408      CALL cp_fm_struct_create(vec_struct, context=blacs_env, para_env=para_env, &
1409                               nrow_global=nao, ncol_global=ndo_so)
1410      CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
1411                               nrow_global=ndo_so, ncol_global=ndo_so)
1412      CALL cp_fm_create(vec_soc_x, vec_struct) ! for SOC_x*gs_coeffs
1413      CALL cp_fm_create(vec_soc_y, vec_struct) ! for SOC_y*gs_coeffs
1414      CALL cp_fm_create(vec_soc_z, vec_struct) ! for SOC_z*gs_coeffs
1415      CALL cp_fm_create(prod_work, prod_struct)
1416      ALLOCATE (diag(ndo_so))
1417
1418      ALLOCATE (domo_soc_x(ndo_so, ndo_so), domo_soc_y(ndo_so, ndo_so), domo_soc_z(ndo_so, ndo_so))
1419
1420      CALL cp_dbcsr_sm_fm_multiply(orb_soc_x, gs_coeffs, vec_soc_x, ncol=ndo_so)
1421      CALL cp_gemm('T', 'N', ndo_so, ndo_so, nao, 1.0_dp, gs_coeffs, vec_soc_x, 0.0_dp, prod_work)
1422      CALL cp_fm_get_submatrix(prod_work, domo_soc_x)
1423
1424      CALL cp_dbcsr_sm_fm_multiply(orb_soc_y, gs_coeffs, vec_soc_y, ncol=ndo_so)
1425      CALL cp_gemm('T', 'N', ndo_so, ndo_so, nao, 1.0_dp, gs_coeffs, vec_soc_y, 0.0_dp, prod_work)
1426      CALL cp_fm_get_submatrix(prod_work, domo_soc_y)
1427
1428      CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, gs_coeffs, vec_soc_z, ncol=ndo_so)
1429      CALL cp_gemm('T', 'N', ndo_so, ndo_so, nao, 1.0_dp, gs_coeffs, vec_soc_z, 0.0_dp, prod_work)
1430      CALL cp_fm_get_submatrix(prod_work, domo_soc_z)
1431
1432      ! some more useful work matrices
1433      CALL cp_fm_struct_create(work_struct, context=blacs_env, para_env=para_env, &
1434                               nrow_global=nex, ncol_global=nex)
1435      CALL cp_fm_create(work_fm, work_struct)
1436
1437!  Looping over the excited states, computing the SOC and filling the perturbation matrix
1438!  There are 3 loops to do: sc-sc, sc-sf and sf-sf
1439!  The final perturbation matrix is Hermitian, only the upper diagonal is needed
1440
1441      !need some work matrices for the GS stuff
1442      CALL cp_fm_struct_create(gsex_struct, context=blacs_env, para_env=para_env, &
1443                               nrow_global=nex*ndo_so, ncol_global=ndo_so)
1444      CALL cp_fm_create(gsex_fm, gsex_struct)
1445      ALLOCATE (gsex_block(ndo_so, ndo_so))
1446
1447!  Start with ground-state/spin-conserving SOC:
1448      !  <Psi_0|SOC|Psi_Jsc> = sum_k,sigma <phi^0_k,sigma|SOC|phi^Jsc_k,sigma>
1449
1450      !compute -sc_coeffs*SOC_Z*gs_coeffs, minus sign because SOC_z antisymmetric
1451      CALL cp_gemm('T', 'N', nex*ndo_so, ndo_so, nao, -1.0_dp, sc_coeffs, vec_soc_z, 0.0_dp, gsex_fm)
1452
1453      DO isc = 1, nsc
1454         CALL cp_fm_get_submatrix(fm=gsex_fm, target_m=gsex_block, start_row=(isc - 1)*ndo_so + 1, &
1455                                  start_col=1, n_rows=ndo_so, n_cols=ndo_so)
1456         diag(:) = get_diag(gsex_block)
1457         soc = SUM(diag(1:ndo_mo)) - SUM(diag(ndo_mo + 1:ndo_so)) !minus sign because of spin integration
1458
1459         !purely imaginary contribution
1460         CALL cp_fm_set_element(img_fm, 1, 1 + isc, soc)
1461      END DO !isc
1462
1463!  Then ground-state/spin-flip SOC:
1464      !<Psi_0|SOC|Psi_Jsf> = sum_k,sigma <phi^0_k,sigma|SOC|phi^Jsc_k,tau>   sigma != tau
1465
1466      !compute  -sc_coeffs*SOC_x*gs_coeffs, imaginary contribution
1467      CALL cp_gemm('T', 'N', nex*ndo_so, ndo_so, nao, -1.0_dp, sc_coeffs, vec_soc_x, 0.0_dp, gsex_fm)
1468
1469      DO isf = 1, nsf
1470         CALL cp_fm_get_submatrix(fm=gsex_fm, target_m=gsex_block, start_row=(isf - 1)*ndo_so + 1, &
1471                                  start_col=1, n_rows=ndo_so, n_cols=ndo_so)
1472         diag(:) = get_diag(gsex_block)
1473         soc = SUM(diag) !alpha and beta parts are simply added due to spin integration
1474         CALL cp_fm_set_element(img_fm, 1, 1 + nsc + isf, soc)
1475      END DO !isf
1476
1477      !compute -sc_coeffs*SOC_y*gs_coeffs, real contribution
1478      CALL cp_gemm('T', 'N', nex*ndo_so, ndo_so, nao, -1.0_dp, sc_coeffs, vec_soc_y, 0.0_dp, gsex_fm)
1479
1480      DO isf = 1, nsf
1481         CALL cp_fm_get_submatrix(fm=gsex_fm, target_m=gsex_block, start_row=(isf - 1)*ndo_so + 1, &
1482                                  start_col=1, n_rows=ndo_so, n_cols=ndo_so)
1483         diag(:) = get_diag(gsex_block)
1484         soc = SUM(diag(1:ndo_mo)) ! alpha-beta
1485         soc = soc - SUM(diag(ndo_mo + 1:ndo_so)) !beta-alpha
1486         CALL cp_fm_set_element(real_fm, 1, 1 + nsc + isf, soc)
1487      END DO !isf
1488
1489      !ground-state cleanup
1490      CALL cp_fm_release(gsex_fm)
1491      CALL cp_fm_release(vec_soc_x)
1492      CALL cp_fm_release(vec_soc_y)
1493      CALL cp_fm_release(vec_soc_z)
1494      CALL cp_fm_release(prod_work)
1495      CALL cp_fm_struct_release(gsex_struct)
1496      CALL cp_fm_struct_release(prod_struct)
1497      CALL cp_fm_struct_release(vec_struct)
1498      DEALLOCATE (gsex_block)
1499
1500!  Then spin-conserving/spin-conserving SOC
1501!  <Psi_Isc|SOC|Psi_Jsc> =
1502!  sum_k,sigma [<psi^Isc_k,sigma|SOC|psi^Jsc_k,sigma> + <psi^Isc_k,sigma|psi^Jsc_k,sigma> * gs_sum]
1503!  - sum_k,l,sigma <psi^0_k,sigma|SOC|psi^0_l,sigma> * <psi^Isc_l,sigma|psi^Jsc_k,sigma>
1504
1505      !Same spin integration => only SOC_z matters, and the contribution is purely imaginary
1506      CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_z, dbcsr_sc, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
1507      CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
1508
1509      !the overlap as well
1510      CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sc, 0.0_dp, dbcsr_work, &
1511                          filter_eps=eps_filter)
1512      CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
1513
1514      CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_z, pref_diaga=1.0_dp, &
1515                                pref_diagb=-1.0_dp, pref_tracea=-1.0_dp, pref_traceb=1.0_dp, &
1516                                pref_diags=gs_sum, symmetric=.TRUE.)
1517
1518      CALL copy_dbcsr_to_fm(dbcsr_tmp, work_fm)
1519      CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=img_fm, nrow=nex, ncol=nex, s_firstrow=1, &
1520                              s_firstcol=1, t_firstrow=2, t_firstcol=2)
1521
1522!  Then spin-flip/spin-flip SOC
1523!  <Psi_Isf|SOC|Psi_Jsf> =
1524!  sum_k,sigma [<psi^Isf_k,tau|SOC|psi^Jsf_k,tau> + <psi^Isf_k,tau|psi^Jsf_k,tau> * gs_sum]
1525!  - sum_k,l,sigma <psi^0_k,sigma|SOC|psi^0_l,sigma> * <psi^Isf_l,tau|psi^Jsf_k,tau> , tau != sigma
1526
1527      !Same spin integration => only SOC_z matters, and the contribution is purely imaginary
1528      CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_z, dbcsr_sf, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
1529      CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
1530
1531      !the overlap as well
1532      CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sf, 0.0_dp, &
1533                          dbcsr_work, filter_eps=eps_filter)
1534      CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
1535
1536      !note: the different prefactors are derived from the fact that because of spin-flip, we have
1537      !alpha-gs and beta-lr interaction
1538      CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_z, pref_diaga=-1.0_dp, &
1539                                pref_diagb=1.0_dp, pref_tracea=-1.0_dp, pref_traceb=1.0_dp, &
1540                                pref_diags=gs_sum, symmetric=.TRUE.)
1541
1542      CALL copy_dbcsr_to_fm(dbcsr_tmp, work_fm)
1543      CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=img_fm, nrow=nex, ncol=nex, s_firstrow=1, &
1544                              s_firstcol=1, t_firstrow=1 + nsc + 1, t_firstcol=1 + nsc + 1)
1545
1546!  Finally the spin-conserving/spin-flip interaction
1547! <Psi_Isc|SOC|Psi_Jsf> =   sum_k,sigma <psi^Isc_k,sigma|SOC|psi^Isf_k,tau>
1548!                           - sum_k,l,sigma <psi^0_k,tau|SOC|psi^0_l,sigma
1549
1550      tas(1) = 1; tbs(1) = ndo_mo + 1
1551      tas(2) = ndo_mo + 1; tbs(2) = 1
1552
1553      !the overlap
1554      CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sf, 0.0_dp, &
1555                          dbcsr_work, filter_eps=eps_filter)
1556      CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
1557
1558      !start with the imaginary contribution
1559      CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_x, dbcsr_sf, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
1560      CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
1561
1562      CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_x, pref_diaga=1.0_dp, &
1563                                pref_diagb=1.0_dp, pref_tracea=-1.0_dp, pref_traceb=-1.0_dp, &
1564                                tracea_start=tas, traceb_start=tbs)
1565
1566      CALL copy_dbcsr_to_fm(dbcsr_tmp, work_fm)
1567      CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=img_fm, nrow=nex, ncol=nex, s_firstrow=1, &
1568                              s_firstcol=1, t_firstrow=2, t_firstcol=1 + nsc + 1)
1569
1570      !follow up with the real contribution
1571      CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_y, dbcsr_sf, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
1572      CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
1573
1574      CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_y, pref_diaga=1.0_dp, &
1575                                pref_diagb=-1.0_dp, pref_tracea=1.0_dp, pref_traceb=-1.0_dp, &
1576                                tracea_start=tas, traceb_start=tbs)
1577
1578      CALL copy_dbcsr_to_fm(dbcsr_tmp, work_fm)
1579      CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=real_fm, nrow=nex, ncol=nex, s_firstrow=1, &
1580                              s_firstcol=1, t_firstrow=2, t_firstcol=1 + nsc + 1)
1581
1582!  Setting up the complex Hermitian perturbed matrix
1583      CALL cp_cfm_create(pert_cfm, full_struct)
1584      CALL cp_fm_to_cfm(real_fm, img_fm, pert_cfm)
1585
1586      CALL cp_fm_release(real_fm)
1587      CALL cp_fm_release(img_fm)
1588
1589!  Diagonalize the perturbed matrix
1590      ALLOCATE (tmp_evals(ntot))
1591      CALL cp_cfm_create(evecs_cfm, full_struct)
1592      CALL cp_cfm_heevd(pert_cfm, evecs_cfm, tmp_evals)
1593
1594      !shift the energies such that the GS has zero and store all that in soc_evals (\wo the GS)
1595      ALLOCATE (donor_state%soc_evals(ntot - 1))
1596      donor_state%soc_evals(:) = tmp_evals(2:ntot) - tmp_evals(1)
1597
1598!  The SOC dipole oscillator strengths
1599      CALL compute_soc_dipole_fosc(evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
1600                                   xas_tdp_control, qs_env, gs_coeffs=gs_coeffs)
1601
1602!  And quadrupole
1603      IF (xas_tdp_control%do_quad) THEN
1604         CALL compute_soc_quadrupole_fosc(evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
1605                                          xas_tdp_control, qs_env, gs_coeffs=gs_coeffs)
1606      END IF
1607
1608! Clean-up
1609      CALL cp_cfm_release(pert_cfm)
1610      CALL cp_cfm_release(evecs_cfm)
1611      CALL cp_fm_struct_release(full_struct)
1612      IF (do_roks) CALL cp_fm_release(gs_coeffs)
1613      CALL dbcsr_distribution_release(coeffs_dist)
1614      CALL dbcsr_distribution_release(prod_dist)
1615      CALL dbcsr_release(dbcsr_sc)
1616      CALL dbcsr_release(dbcsr_sf)
1617      CALL dbcsr_release(dbcsr_prod)
1618      CALL dbcsr_release(dbcsr_ovlp)
1619      CALL dbcsr_release(dbcsr_tmp)
1620      CALL dbcsr_release(dbcsr_work)
1621      CALL cp_fm_release(work_fm)
1622      CALL cp_fm_struct_release(work_struct)
1623      DEALLOCATE (coeffs_dist, prod_dist, col_dist, col_blk_size, row_dist_new)
1624      DEALLOCATE (dbcsr_sc, dbcsr_sf, dbcsr_work, dbcsr_prod, dbcsr_ovlp, dbcsr_tmp)
1625
1626      CALL timestop(handle)
1627
1628   END SUBROUTINE include_os_soc
1629
1630! **************************************************************************************************
1631!> \brief Includes the SOC effects on the precomputed restricted closed-shell singlet and triplet
1632!>        excitations. This is a perturbative treatmnent
1633!> \param donor_state ...
1634!> \param xas_tdp_env ...
1635!> \param xas_tdp_control ...
1636!> \param qs_env ...
1637!> \note Using AMEWs, build an hermitian matrix with all excited states SOC coupling + the
1638!>       excitation energies on the diagonal. Then diagonalize it to get the new excitation
1639!>       energies and corresponding linear combinations of lr_coeffs.
1640!>       The AMEWs are normalized
1641!>       Only for spin-restricted calculations
1642!>       The ms=-1,+1 triplets are not explicitely computed in the first place. Assume they have
1643!>       the same energy as the ms=0 triplets and apply the spin raising and lowering operators
1644!>       on the latter to get their AMEWs => this is the qusi-degenerate perturbation theory
1645!>       approach by Neese (QDPT)
1646! **************************************************************************************************
1647   SUBROUTINE include_rcs_soc(donor_state, xas_tdp_env, xas_tdp_control, qs_env)
1648
1649      TYPE(donor_state_type), POINTER                    :: donor_state
1650      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
1651      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
1652      TYPE(qs_environment_type), POINTER                 :: qs_env
1653
1654      CHARACTER(len=*), PARAMETER :: routineN = 'include_rcs_soc', &
1655         routineP = moduleN//':'//routineN
1656
1657      INTEGER                                            :: group, handle, iex, isg, itp, nao, &
1658                                                            ndo_mo, nex, npcols, nprows, nsg, &
1659                                                            ntot, ntp
1660      INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, col_dist, row_blk_size, &
1661                                                            row_dist, row_dist_new
1662      INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
1663      REAL(dp)                                           :: eps_filter, soc_gst, sqrt2
1664      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag, tmp_evals
1665      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: domo_soc_x, domo_soc_y, domo_soc_z, &
1666                                                            gstp_block
1667      REAL(dp), DIMENSION(:), POINTER                    :: sg_evals, tp_evals
1668      TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
1669      TYPE(cp_cfm_type), POINTER                         :: evecs_cfm, hami_cfm
1670      TYPE(cp_fm_struct_type), POINTER                   :: full_struct, gstp_struct, prod_struct, &
1671                                                            vec_struct, work_struct
1672      TYPE(cp_fm_type), POINTER                          :: gs_coeffs, gstp_fm, img_fm, prod_fm, &
1673                                                            real_fm, sg_coeffs, tmp_fm, tp_coeffs, &
1674                                                            vec_soc_x, vec_soc_y, vec_soc_z, &
1675                                                            work_fm
1676      TYPE(cp_para_env_type), POINTER                    :: para_env
1677      TYPE(dbcsr_distribution_type), POINTER             :: coeffs_dist, dbcsr_dist, prod_dist
1678      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
1679      TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
1680      TYPE(dbcsr_type), POINTER                          :: dbcsr_ovlp, dbcsr_prod, dbcsr_sg, &
1681                                                            dbcsr_tmp, dbcsr_tp, dbcsr_work, &
1682                                                            orb_soc_x, orb_soc_y, orb_soc_z
1683
1684      NULLIFY (sg_coeffs, tp_coeffs, gs_coeffs, sg_evals, tp_evals, real_fm, img_fm, full_struct)
1685      NULLIFY (para_env, blacs_env, prod_fm, prod_struct, vec_struct, orb_soc_y, orb_soc_z)
1686      NULLIFY (matrix_s, orb_soc_x, hami_cfm, evecs_cfm, vec_soc_x, vec_soc_y, vec_soc_z)
1687      NULLIFY (work_struct, work_fm, tmp_fm, dbcsr_dist, coeffs_dist, prod_dist, pgrid)
1688      NULLIFY (col_dist, row_dist, col_blk_size, row_blk_size, row_dist_new, gstp_struct)
1689      NULLIFY (dbcsr_tp, dbcsr_sg, dbcsr_prod, dbcsr_work, dbcsr_ovlp, dbcsr_tmp)
1690
1691      CALL timeset(routineN, handle)
1692
1693!  Initialization
1694      CPASSERT(ASSOCIATED(xas_tdp_control))
1695      gs_coeffs => donor_state%gs_coeffs
1696      sg_coeffs => donor_state%sg_coeffs
1697      tp_coeffs => donor_state%tp_coeffs
1698      sg_evals => donor_state%sg_evals
1699      tp_evals => donor_state%tp_evals
1700      nsg = SIZE(sg_evals)
1701      ntp = SIZE(tp_evals)
1702      ntot = 1 + nsg + 3*ntp
1703      ndo_mo = donor_state%ndo_mo
1704      CALL get_qs_env(qs_env, matrix_s=matrix_s)
1705      CALL dbcsr_get_info(matrix_s(1)%matrix, nfullrows_total=nao)
1706      orb_soc_x => xas_tdp_env%orb_soc(1)%matrix
1707      orb_soc_y => xas_tdp_env%orb_soc(2)%matrix
1708      orb_soc_z => xas_tdp_env%orb_soc(3)%matrix
1709      !by construction nsg == ntp, keep those separate for more code clarity though
1710      CPASSERT(nsg == ntp)
1711      nex = nsg
1712      eps_filter = xas_tdp_control%eps_filter
1713
1714!  Creating the real part and imaginary part of the final SOC fm
1715      CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
1716      CALL cp_fm_struct_create(full_struct, context=blacs_env, para_env=para_env, &
1717                               nrow_global=ntot, ncol_global=ntot)
1718      CALL cp_fm_create(real_fm, full_struct)
1719      CALL cp_fm_create(img_fm, full_struct)
1720
1721!  Put the excitation energies on the diagonal of the real matrix
1722      DO isg = 1, nsg
1723         CALL cp_fm_set_element(real_fm, 1 + isg, 1 + isg, sg_evals(isg))
1724      END DO
1725      DO itp = 1, ntp
1726         ! first T^-1, then T^0, then T^+1
1727         CALL cp_fm_set_element(real_fm, 1 + itp + nsg, 1 + itp + nsg, tp_evals(itp))
1728         CALL cp_fm_set_element(real_fm, 1 + itp + ntp + nsg, 1 + itp + ntp + nsg, tp_evals(itp))
1729         CALL cp_fm_set_element(real_fm, 1 + itp + 2*ntp + nsg, 1 + itp + 2*ntp + nsg, tp_evals(itp))
1730      END DO
1731
1732!  Create the dbcsr machinery (for fast MM, the core of this routine)
1733      CALL get_qs_env(qs_env, dbcsr_dist=dbcsr_dist)
1734      CALL dbcsr_distribution_get(dbcsr_dist, group=group, row_dist=row_dist, pgrid=pgrid, &
1735                                  npcols=npcols, nprows=nprows)
1736      ALLOCATE (col_dist(nex), row_dist_new(nex))
1737      DO iex = 1, nex
1738         col_dist(iex) = MODULO(npcols - iex, npcols)
1739         row_dist_new(iex) = MODULO(nprows - iex, nprows)
1740      END DO
1741      ALLOCATE (coeffs_dist, prod_dist)
1742      CALL dbcsr_distribution_new(coeffs_dist, group=group, pgrid=pgrid, row_dist=row_dist, &
1743                                  col_dist=col_dist)
1744      CALL dbcsr_distribution_new(prod_dist, group=group, pgrid=pgrid, row_dist=row_dist_new, &
1745                                  col_dist=col_dist)
1746
1747      !Create the matrices
1748      ALLOCATE (col_blk_size(nex))
1749      col_blk_size = ndo_mo
1750      CALL dbcsr_get_info(matrix_s(1)%matrix, row_blk_size=row_blk_size)
1751
1752      ALLOCATE (dbcsr_sg, dbcsr_tp, dbcsr_work, dbcsr_ovlp, dbcsr_tmp, dbcsr_prod)
1753      CALL dbcsr_create(matrix=dbcsr_sg, name="SINGLETS", matrix_type="N", dist=coeffs_dist, &
1754                        row_blk_size=row_blk_size, col_blk_size=col_blk_size)
1755      CALL dbcsr_create(matrix=dbcsr_tp, name="TRIPLETS", matrix_type="N", dist=coeffs_dist, &
1756                        row_blk_size=row_blk_size, col_blk_size=col_blk_size)
1757      CALL dbcsr_create(matrix=dbcsr_work, name="WORK", matrix_type="N", dist=coeffs_dist, &
1758                        row_blk_size=row_blk_size, col_blk_size=col_blk_size)
1759      CALL dbcsr_create(matrix=dbcsr_prod, name="PROD", matrix_type="N", dist=prod_dist, &
1760                        row_blk_size=col_blk_size, col_blk_size=col_blk_size)
1761      CALL dbcsr_create(matrix=dbcsr_ovlp, name="OVLP", matrix_type="N", dist=prod_dist, &
1762                        row_blk_size=col_blk_size, col_blk_size=col_blk_size)
1763
1764      col_blk_size = 1
1765      CALL dbcsr_create(matrix=dbcsr_tmp, name="TMP", matrix_type="N", dist=prod_dist, &
1766                        row_blk_size=col_blk_size, col_blk_size=col_blk_size)
1767      CALL dbcsr_reserve_all_blocks(dbcsr_tmp)
1768
1769      dbcsr_soc_package%dbcsr_sg => dbcsr_sg
1770      dbcsr_soc_package%dbcsr_tp => dbcsr_tp
1771      dbcsr_soc_package%dbcsr_work => dbcsr_work
1772      dbcsr_soc_package%dbcsr_ovlp => dbcsr_ovlp
1773      dbcsr_soc_package%dbcsr_prod => dbcsr_prod
1774      dbcsr_soc_package%dbcsr_tmp => dbcsr_tmp
1775
1776      !Filling the coeffs matrices by copying from the stored fms
1777      CALL copy_fm_to_dbcsr(sg_coeffs, dbcsr_sg)
1778      CALL copy_fm_to_dbcsr(tp_coeffs, dbcsr_tp)
1779
1780!  Create the work and helper fms
1781      CALL cp_fm_get_info(gs_coeffs, matrix_struct=vec_struct)
1782      CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
1783                               nrow_global=ndo_mo, ncol_global=ndo_mo)
1784      CALL cp_fm_create(prod_fm, prod_struct)
1785      CALL cp_fm_create(vec_soc_x, vec_struct)
1786      CALL cp_fm_create(vec_soc_y, vec_struct)
1787      CALL cp_fm_create(vec_soc_z, vec_struct)
1788      CALL cp_fm_struct_create(work_struct, context=blacs_env, para_env=para_env, &
1789                               nrow_global=nex, ncol_global=nex)
1790      CALL cp_fm_create(work_fm, work_struct)
1791      CALL cp_fm_create(tmp_fm, work_struct)
1792      ALLOCATE (diag(ndo_mo))
1793
1794!  Precompute everything we can before looping over excited states
1795      sqrt2 = SQRT(2.0_dp)
1796
1797      ! The subset of the donor MOs matrix elements: <phi_I^0|Hsoc|phi_J^0> (kept as global array, small)
1798      ALLOCATE (domo_soc_x(ndo_mo, ndo_mo), domo_soc_y(ndo_mo, ndo_mo), domo_soc_z(ndo_mo, ndo_mo))
1799
1800      CALL cp_dbcsr_sm_fm_multiply(orb_soc_x, gs_coeffs, vec_soc_x, ncol=ndo_mo)
1801      CALL cp_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, gs_coeffs, vec_soc_x, 0.0_dp, prod_fm)
1802      CALL cp_fm_get_submatrix(prod_fm, domo_soc_x)
1803
1804      CALL cp_dbcsr_sm_fm_multiply(orb_soc_y, gs_coeffs, vec_soc_y, ncol=ndo_mo)
1805      CALL cp_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, gs_coeffs, vec_soc_y, 0.0_dp, prod_fm)
1806      CALL cp_fm_get_submatrix(prod_fm, domo_soc_y)
1807
1808      CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, gs_coeffs, vec_soc_z, ncol=ndo_mo)
1809      CALL cp_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, gs_coeffs, vec_soc_z, 0.0_dp, prod_fm)
1810      CALL cp_fm_get_submatrix(prod_fm, domo_soc_z)
1811
1812!  Only have SOC between singlet-triplet triplet-triplet and ground_state-triplet, the resulting
1813!  matrix is Hermitian i.e. the real part is symmetric and the imaginary part is anti-symmetric.
1814!  Can only fill upper half
1815
1816      !Start with the ground state/triplet SOC, SOC*gs_coeffs already computed above
1817      !note: we are computing <0|H|T>, but have SOC*gs_coeffs instead of gs_coeffs*SOC in store. Since
1818      !      the SOC Hamiltonian is anti-symmetric, a - signs pops up in the gemms below
1819
1820      CALL cp_fm_struct_create(gstp_struct, context=blacs_env, para_env=para_env, &
1821                               nrow_global=ntp*ndo_mo, ncol_global=ndo_mo)
1822      CALL cp_fm_create(gstp_fm, gstp_struct)
1823      ALLOCATE (gstp_block(ndo_mo, ndo_mo))
1824
1825      !gs-triplet with Ms=+-1, imaginary part
1826      CALL cp_gemm('T', 'N', ndo_mo*ntp, ndo_mo, nao, -1.0_dp, tp_coeffs, vec_soc_x, 0.0_dp, gstp_fm)
1827
1828      DO itp = 1, ntp
1829         CALL cp_fm_get_submatrix(fm=gstp_fm, target_m=gstp_block, start_row=(itp - 1)*ndo_mo + 1, &
1830                                  start_col=1, n_rows=ndo_mo, n_cols=ndo_mo)
1831         diag(:) = get_diag(gstp_block)
1832         soc_gst = SUM(diag)
1833         CALL cp_fm_set_element(img_fm, 1, 1 + nsg + itp, -1.0_dp*soc_gst) ! <0|H_x|T^-1>
1834         CALL cp_fm_set_element(img_fm, 1, 1 + nsg + 2*ntp + itp, soc_gst) ! <0|H_x|T^+1>
1835      END DO
1836
1837      !gs-triplet with Ms=+-1, real part
1838      CALL cp_gemm('T', 'N', ndo_mo*ntp, ndo_mo, nao, -1.0_dp, tp_coeffs, vec_soc_y, 0.0_dp, gstp_fm)
1839
1840      DO itp = 1, ntp
1841         CALL cp_fm_get_submatrix(fm=gstp_fm, target_m=gstp_block, start_row=(itp - 1)*ndo_mo + 1, &
1842                                  start_col=1, n_rows=ndo_mo, n_cols=ndo_mo)
1843         diag(:) = get_diag(gstp_block)
1844         soc_gst = SUM(diag)
1845         CALL cp_fm_set_element(real_fm, 1, 1 + nsg + itp, -1.0_dp*soc_gst) ! <0|H_y|T^-1>
1846         CALL cp_fm_set_element(real_fm, 1, 1 + nsg + 2*ntp + itp, -1.0_dp*soc_gst) ! <0|H_y|T^+1>
1847      END DO
1848
1849      !gs-triplet with Ms=0, purely imaginary
1850      CALL cp_gemm('T', 'N', ndo_mo*ntp, ndo_mo, nao, -1.0_dp, tp_coeffs, vec_soc_z, 0.0_dp, gstp_fm)
1851
1852      DO itp = 1, ntp
1853         CALL cp_fm_get_submatrix(fm=gstp_fm, target_m=gstp_block, start_row=(itp - 1)*ndo_mo + 1, &
1854                                  start_col=1, n_rows=ndo_mo, n_cols=ndo_mo)
1855         diag(:) = get_diag(gstp_block)
1856         soc_gst = sqrt2*SUM(diag)
1857         CALL cp_fm_set_element(img_fm, 1, 1 + nsg + ntp + itp, soc_gst)
1858      END DO
1859
1860      !gs clean-up
1861      CALL cp_fm_release(prod_fm)
1862      CALL cp_fm_release(vec_soc_x)
1863      CALL cp_fm_release(vec_soc_y)
1864      CALL cp_fm_release(vec_soc_z)
1865      CALL cp_fm_release(gstp_fm)
1866      CALL cp_fm_struct_release(gstp_struct)
1867      CALL cp_fm_struct_release(prod_struct)
1868      DEALLOCATE (gstp_block)
1869
1870      !Now do the singlet-triplet SOC
1871      !start by computing the singlet-triplet overlap
1872      CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_tp, 0.0_dp, &
1873                          dbcsr_work, filter_eps=eps_filter)
1874      CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
1875
1876      !singlet-triplet with Ms=+-1, imaginary part
1877      CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_x, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
1878      CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
1879
1880      CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_x, &
1881                                 pref_trace=-1.0_dp, pref_overall=-0.5_dp*sqrt2)
1882
1883      !<S|H_x|T^-1>
1884      CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
1885      CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
1886                              s_firstrow=1, s_firstcol=1, t_firstrow=2, &
1887                              t_firstcol=1 + nsg + 1)
1888
1889      !<S|H_x|T^+1> takes a minus sign
1890      CALL cp_fm_scale(-1.0_dp, tmp_fm)
1891      CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
1892                              s_firstrow=1, s_firstcol=1, t_firstrow=2, &
1893                              t_firstcol=1 + nsg + 2*ntp + 1)
1894
1895      !singlet-triplet with Ms=+-1, real part
1896      CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_y, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
1897      CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
1898
1899      CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_y, &
1900                                 pref_trace=-1.0_dp, pref_overall=-0.5_dp*sqrt2)
1901
1902      !<S|H_y|T^-1>
1903      CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
1904      CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=real_fm, nrow=nex, ncol=nex, &
1905                              s_firstrow=1, s_firstcol=1, t_firstrow=2, &
1906                              t_firstcol=1 + nsg + 1)
1907
1908      !<S|H_y|T^+1>
1909      CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=real_fm, nrow=nex, ncol=nex, &
1910                              s_firstrow=1, s_firstcol=1, t_firstrow=2, &
1911                              t_firstcol=1 + nsg + 2*ntp + 1)
1912
1913      !singlet-triplet with Ms=0, purely imaginary
1914      CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_z, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
1915      CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
1916
1917      CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_z, &
1918                                 pref_trace=-1.0_dp, pref_overall=1.0_dp)
1919
1920      !<S|H_z|T^0>
1921      CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
1922      CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
1923                              s_firstrow=1, s_firstcol=1, t_firstrow=2, &
1924                              t_firstcol=1 + nsg + ntp + 1)
1925
1926      !Now the triplet-triplet SOC
1927      !start by computing the overlap
1928      CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_tp, 0.0_dp, &
1929                          dbcsr_work, filter_eps=eps_filter)
1930      CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
1931
1932      !Ms=0 to Ms=+-1 SOC, imaginary part
1933      CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_x, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
1934      CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
1935
1936      CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_x, &
1937                                 pref_trace=1.0_dp, pref_overall=-0.5_dp*sqrt2)
1938
1939      !<T^0|H_x|T^+1>
1940      CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
1941      CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
1942                              s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + ntp + 1, &
1943                              t_firstcol=1 + nsg + 2*ntp + 1)
1944
1945      !<T^-1|H_x|T^0>, takes a minus sign and a transpose (because computed <T^0|H_x|T^-1>)
1946      CALL cp_fm_transpose(tmp_fm, work_fm)
1947      CALL cp_fm_scale(-1.0_dp, work_fm)
1948      CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
1949                              s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 1, &
1950                              t_firstcol=1 + nsg + ntp + 1)
1951
1952      !Ms=0 to Ms=+-1 SOC, real part
1953      CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_y, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
1954      CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
1955
1956      CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_y, &
1957                                 pref_trace=1.0_dp, pref_overall=0.5_dp*sqrt2)
1958
1959      !<T^0|H_y|T^+1>
1960      CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
1961      CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=real_fm, nrow=nex, ncol=nex, &
1962                              s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + ntp + 1, &
1963                              t_firstcol=1 + nsg + 2*ntp + 1)
1964
1965      !<T^-1|H_y|T^0>, takes a minus sign and a transpose
1966      CALL cp_fm_transpose(tmp_fm, work_fm)
1967      CALL cp_fm_scale(-1.0_dp, work_fm)
1968      CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=real_fm, nrow=nex, ncol=nex, &
1969                              s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 1, &
1970                              t_firstcol=1 + nsg + ntp + 1)
1971
1972      !Ms=1 to Ms=1 and Ms=-1 to Ms=-1 SOC, purely imaginary
1973      CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_z, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
1974      CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
1975
1976      CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_z, &
1977                                 pref_trace=1.0_dp, pref_overall=1.0_dp)
1978
1979      !<T^+1|H_z|T^+1>
1980      CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
1981      CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
1982                              s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 2*ntp + 1, &
1983                              t_firstcol=1 + nsg + 2*ntp + 1)
1984
1985      !<T^-1|H_z|T^-1>, takes a minus sign
1986      CALL cp_fm_scale(-1.0_dp, tmp_fm)
1987      CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
1988                              s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 1, &
1989                              t_firstcol=1 + nsg + 1)
1990
1991!  Intermediate clean-up
1992      CALL cp_fm_struct_release(work_struct)
1993      CALL cp_fm_release(work_fm)
1994      CALL cp_fm_release(tmp_fm)
1995      DEALLOCATE (diag, domo_soc_x, domo_soc_y, domo_soc_z)
1996
1997!  Set-up the complex hermitian perturbation matrix
1998      CALL cp_cfm_create(hami_cfm, full_struct)
1999      CALL cp_fm_to_cfm(real_fm, img_fm, hami_cfm)
2000
2001      CALL cp_fm_release(real_fm)
2002      CALL cp_fm_release(img_fm)
2003
2004!  Diagonalize the Hamiltonian
2005      ALLOCATE (tmp_evals(ntot))
2006      CALL cp_cfm_create(evecs_cfm, full_struct)
2007      CALL cp_cfm_heevd(hami_cfm, evecs_cfm, tmp_evals)
2008
2009      !  Adjust the energies so the GS has zero, and store in the donor_state (without the GS)
2010      ALLOCATE (donor_state%soc_evals(ntot - 1))
2011      donor_state%soc_evals(:) = tmp_evals(2:ntot) - tmp_evals(1)
2012
2013!  Compute the dipole oscillator strengths
2014      CALL compute_soc_dipole_fosc(evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
2015                                   xas_tdp_control, qs_env)
2016
2017!  And the quadrupole (if needed)
2018      IF (xas_tdp_control%do_quad) THEN
2019         CALL compute_soc_quadrupole_fosc(evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
2020                                          xas_tdp_control, qs_env)
2021      END IF
2022
2023!  Clean-up
2024      CALL cp_fm_struct_release(full_struct)
2025      CALL cp_cfm_release(hami_cfm)
2026      CALL cp_cfm_release(evecs_cfm)
2027      CALL dbcsr_distribution_release(coeffs_dist)
2028      CALL dbcsr_distribution_release(prod_dist)
2029      CALL dbcsr_release(dbcsr_sg)
2030      CALL dbcsr_release(dbcsr_tp)
2031      CALL dbcsr_release(dbcsr_prod)
2032      CALL dbcsr_release(dbcsr_ovlp)
2033      CALL dbcsr_release(dbcsr_tmp)
2034      CALL dbcsr_release(dbcsr_work)
2035      DEALLOCATE (coeffs_dist, prod_dist, col_dist, col_blk_size, row_dist_new)
2036      DEALLOCATE (dbcsr_sg, dbcsr_tp, dbcsr_work, dbcsr_prod, dbcsr_ovlp, dbcsr_tmp)
2037
2038      CALL timestop(handle)
2039
2040   END SUBROUTINE include_rcs_soc
2041
2042! **************************************************************************************************
2043!> \brief Computes the matrix elements of a one-body operator (given wrt AOs) in the basis of the
2044!>        excited state AMEWs with ground state, for the open-shell case
2045!> \param amew_op the operator in the basis of the AMEWs (array because could have x,y,z components)
2046!> \param ao_op the operator in the basis of the atomic orbitals
2047!> \param gs_coeffs the coefficient of the GS donor MOs. Ecplicitely passed because of special
2048!>                  format in the ROKS case (see include_os_soc routine)
2049!> \param dbcsr_soc_package inhertited from the main SOC routine
2050!> \param donor_state ...
2051!> \param eps_filter ...
2052!> \param qs_env ...
2053!> \note The ordering of the AMEWs is consistent with SOC and is gs, sc, sf
2054!>       We assume that the operator is spin-independent => only <0|0>, <0|sc>, <sc|sc> and <sf|sf>
2055!>       yield non-zero matrix elements
2056!>       Only for open-shell calculations
2057! **************************************************************************************************
2058   SUBROUTINE get_os_amew_op(amew_op, ao_op, gs_coeffs, dbcsr_soc_package, donor_state, &
2059                             eps_filter, qs_env)
2060
2061      TYPE(cp_fm_p_type), DIMENSION(:), POINTER          :: amew_op
2062      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: ao_op
2063      TYPE(cp_fm_type), POINTER                          :: gs_coeffs
2064      TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
2065      TYPE(donor_state_type), POINTER                    :: donor_state
2066      REAL(dp), INTENT(IN)                               :: eps_filter
2067      TYPE(qs_environment_type), POINTER                 :: qs_env
2068
2069      CHARACTER(len=*), PARAMETER :: routineN = 'get_os_amew_op', routineP = moduleN//':'//routineN
2070
2071      INTEGER                                            :: dim_op, homo, i, isc, nao, ndo_mo, &
2072                                                            ndo_so, nex, nsc, nsf, ntot
2073      REAL(dp)                                           :: op
2074      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag, gsgs_op
2075      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: domo_op, gsex_block, tmp
2076      TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
2077      TYPE(cp_fm_struct_type), POINTER                   :: full_struct, gsex_struct, prod_struct, &
2078                                                            tmp_struct, vec_struct
2079      TYPE(cp_fm_type), POINTER                          :: amew_op_i, gsex_fm, mo_coeff, prod_work, &
2080                                                            sc_coeffs, sf_coeffs, tmp_fm, &
2081                                                            vec_work, work_fm
2082      TYPE(cp_para_env_type), POINTER                    :: para_env
2083      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
2084      TYPE(dbcsr_type), POINTER                          :: ao_op_i, dbcsr_ovlp, dbcsr_prod, &
2085                                                            dbcsr_sc, dbcsr_sf, dbcsr_tmp, &
2086                                                            dbcsr_work
2087      TYPE(mo_set_p_type), DIMENSION(:), POINTER         :: mos
2088
2089      NULLIFY (matrix_s, para_env, blacs_env, full_struct, vec_struct, prod_struct, mos, vec_work)
2090      NULLIFY (prod_work, mo_coeff, ao_op_i, amew_op_i, work_fm, tmp_fm, tmp_struct)
2091      NULLIFY (dbcsr_sc, dbcsr_sf, dbcsr_ovlp, dbcsr_work, dbcsr_tmp, dbcsr_prod)
2092
2093!  Iinitialization
2094      dim_op = SIZE(ao_op)
2095      sc_coeffs => donor_state%sc_coeffs
2096      sf_coeffs => donor_state%sf_coeffs
2097      nsc = SIZE(donor_state%sc_evals)
2098      nsf = SIZE(donor_state%sf_evals)
2099      nex = nsc
2100      ntot = 1 + nsc + nsf
2101      ndo_mo = donor_state%ndo_mo
2102      ndo_so = 2*donor_state%ndo_mo !open-shell => nspins = 2
2103      CALL get_qs_env(qs_env, matrix_s=matrix_s, para_env=para_env, blacs_env=blacs_env, mos=mos)
2104      CALL dbcsr_get_info(matrix_s(1)%matrix, nfullrows_total=nao)
2105
2106      dbcsr_sc => dbcsr_soc_package%dbcsr_sc
2107      dbcsr_sf => dbcsr_soc_package%dbcsr_sf
2108      dbcsr_work => dbcsr_soc_package%dbcsr_work
2109      dbcsr_tmp => dbcsr_soc_package%dbcsr_tmp
2110      dbcsr_prod => dbcsr_soc_package%dbcsr_prod
2111      dbcsr_ovlp => dbcsr_soc_package%dbcsr_ovlp
2112
2113!  Create the amew_op matrix set
2114      CALL cp_fm_struct_create(full_struct, context=blacs_env, para_env=para_env, &
2115                               nrow_global=ntot, ncol_global=ntot)
2116      ALLOCATE (amew_op(dim_op))
2117      DO i = 1, dim_op
2118         CALL cp_fm_create(amew_op(i)%matrix, full_struct)
2119      END DO
2120
2121!  Before looping, need to evaluate sum_j,sigma <phi^0_j,sgima|op|phi^0_j,sigma>, for each dimension
2122!  of the operator
2123      ALLOCATE (gsgs_op(dim_op))
2124
2125      !start with the alpha MOs
2126      CALL get_mo_set(mos(1)%mo_set, mo_coeff=mo_coeff, homo=homo)
2127      ALLOCATE (diag(homo))
2128      CALL cp_fm_get_info(mo_coeff, matrix_struct=vec_struct)
2129      CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
2130                               nrow_global=homo, ncol_global=homo)
2131      CALL cp_fm_create(vec_work, vec_struct)
2132      CALL cp_fm_create(prod_work, prod_struct)
2133
2134      DO i = 1, dim_op
2135
2136         ao_op_i => ao_op(i)%matrix
2137
2138         CALL cp_dbcsr_sm_fm_multiply(ao_op_i, mo_coeff, vec_work, ncol=homo)
2139         CALL cp_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, vec_work, 0.0_dp, prod_work)
2140         CALL cp_fm_get_diag(prod_work, diag)
2141         gsgs_op(i) = SUM(diag)
2142
2143      END DO !i
2144
2145      CALL cp_fm_release(vec_work)
2146      CALL cp_fm_release(prod_work)
2147      CALL cp_fm_struct_release(prod_struct)
2148      DEALLOCATE (diag)
2149      NULLIFY (vec_struct)
2150
2151      !then beta orbitals
2152      CALL get_mo_set(mos(2)%mo_set, mo_coeff=mo_coeff, homo=homo)
2153      ALLOCATE (diag(homo))
2154      CALL cp_fm_get_info(mo_coeff, matrix_struct=vec_struct)
2155      CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
2156                               nrow_global=homo, ncol_global=homo)
2157      CALL cp_fm_create(vec_work, vec_struct)
2158      CALL cp_fm_create(prod_work, prod_struct)
2159
2160      DO i = 1, dim_op
2161
2162         ao_op_i => ao_op(i)%matrix
2163
2164         CALL cp_dbcsr_sm_fm_multiply(ao_op_i, mo_coeff, vec_work, ncol=homo)
2165         CALL cp_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, vec_work, 0.0_dp, prod_work)
2166         CALL cp_fm_get_diag(prod_work, diag)
2167         gsgs_op(i) = gsgs_op(i) + SUM(diag)
2168
2169      END DO !i
2170
2171      CALL cp_fm_release(vec_work)
2172      CALL cp_fm_release(prod_work)
2173      CALL cp_fm_struct_release(prod_struct)
2174      DEALLOCATE (diag)
2175      NULLIFY (vec_struct)
2176
2177!  Before looping over excited AMEWs, define some work matrices and structures
2178      CALL cp_fm_struct_create(vec_struct, context=blacs_env, para_env=para_env, &
2179                               nrow_global=nao, ncol_global=ndo_so)
2180      CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
2181                               nrow_global=ndo_so, ncol_global=ndo_so)
2182      CALL cp_fm_struct_create(gsex_struct, context=blacs_env, para_env=para_env, &
2183                               nrow_global=ndo_so*nex, ncol_global=ndo_so)
2184      CALL cp_fm_struct_create(tmp_struct, context=blacs_env, para_env=para_env, &
2185                               nrow_global=nex, ncol_global=nex)
2186      CALL cp_fm_create(vec_work, vec_struct) !for op*|phi>
2187      CALL cp_fm_create(prod_work, prod_struct) !for any <phi|op|phi>
2188      CALL cp_fm_create(work_fm, full_struct)
2189      CALL cp_fm_create(gsex_fm, gsex_struct)
2190      CALL cp_fm_create(tmp_fm, tmp_struct)
2191      ALLOCATE (diag(ndo_so))
2192      ALLOCATE (domo_op(ndo_so, ndo_so))
2193      ALLOCATE (tmp(ndo_so, ndo_so))
2194      ALLOCATE (gsex_block(ndo_so, ndo_so))
2195
2196!  Loop over the dimensions of the operator
2197      DO i = 1, dim_op
2198
2199         ao_op_i => ao_op(i)%matrix
2200         amew_op_i => amew_op(i)%matrix
2201
2202         !put the gs-gs contribution
2203         CALL cp_fm_set_element(amew_op_i, 1, 1, gsgs_op(i))
2204
2205         !  Precompute what we can before looping over excited states
2206         ! Need the operator in the donor MOs basis <phi^0_I,sigma|op_i|phi^0_J,tau>
2207         CALL cp_dbcsr_sm_fm_multiply(ao_op_i, gs_coeffs, vec_work, ncol=ndo_so)
2208         CALL cp_gemm('T', 'N', ndo_so, ndo_so, nao, 1.0_dp, gs_coeffs, vec_work, 0.0_dp, prod_work)
2209         CALL cp_fm_get_submatrix(prod_work, domo_op)
2210
2211         !  Do the ground-state/spin-conserving operator
2212         CALL cp_gemm('T', 'N', ndo_so*nsc, ndo_so, nao, 1.0_dp, sc_coeffs, vec_work, 0.0_dp, gsex_fm)
2213         DO isc = 1, nsc
2214            CALL cp_fm_get_submatrix(fm=gsex_fm, target_m=gsex_block, start_row=(isc - 1)*ndo_so + 1, &
2215                                     start_col=1, n_rows=ndo_so, n_cols=ndo_so)
2216            diag(:) = get_diag(gsex_block)
2217            op = SUM(diag)
2218            CALL cp_fm_set_element(amew_op_i, 1, 1 + isc, op)
2219         END DO !isc
2220
2221         !  The spin-conserving/spin-conserving operator
2222         !overlap
2223         CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sc, 0.0_dp, &
2224                             dbcsr_work, filter_eps=eps_filter)
2225         CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
2226
2227         !operator in SC LR-orbital basis
2228         CALL dbcsr_multiply('N', 'N', 1.0_dp, ao_op_i, dbcsr_sc, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
2229         CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
2230
2231         CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_op, pref_diaga=1.0_dp, &
2232                                   pref_diagb=1.0_dp, pref_tracea=-1.0_dp, pref_traceb=-1.0_dp, &
2233                                   pref_diags=gsgs_op(i), symmetric=.TRUE.)
2234
2235         CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
2236         CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op_i, nrow=nex, ncol=nex, &
2237                                 s_firstrow=1, s_firstcol=1, t_firstrow=2, t_firstcol=2)
2238
2239         !  The spin-flip/spin-flip operator
2240         !overlap
2241         CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sf, 0.0_dp, &
2242                             dbcsr_work, filter_eps=eps_filter)
2243         CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
2244
2245         !operator in SF LR-orbital basis
2246         CALL dbcsr_multiply('N', 'N', 1.0_dp, ao_op_i, dbcsr_sf, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
2247         CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
2248
2249         !need to reorganize the domo_op array by swapping the alpha-alpha and the beta-beta quarter
2250         tmp(1:ndo_mo, 1:ndo_mo) = domo_op(ndo_mo + 1:ndo_so, ndo_mo + 1:ndo_so)
2251         tmp(ndo_mo + 1:ndo_so, ndo_mo + 1:ndo_so) = domo_op(1:ndo_mo, 1:ndo_mo)
2252
2253         CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, tmp, pref_diaga=1.0_dp, &
2254                                   pref_diagb=1.0_dp, pref_tracea=-1.0_dp, pref_traceb=-1.0_dp, &
2255                                   pref_diags=gsgs_op(i), symmetric=.TRUE.)
2256
2257         CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
2258         CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op_i, nrow=nex, ncol=nex, &
2259                                 s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsc + 1, t_firstcol=1 + nsc + 1)
2260
2261         !Symmetry => only upper diag explicitly built
2262         CALL cp_fm_upper_to_full(amew_op_i, work_fm)
2263
2264      END DO !i
2265
2266!  Clean-up
2267      CALL cp_fm_struct_release(full_struct)
2268      CALL cp_fm_struct_release(prod_struct)
2269      CALL cp_fm_struct_release(vec_struct)
2270      CALL cp_fm_struct_release(tmp_struct)
2271      CALL cp_fm_struct_release(gsex_struct)
2272      CALL cp_fm_release(work_fm)
2273      CALL cp_fm_release(tmp_fm)
2274      CALL cp_fm_release(vec_work)
2275      CALL cp_fm_release(prod_work)
2276      CALL cp_fm_release(gsex_fm)
2277
2278   END SUBROUTINE get_os_amew_op
2279
2280! **************************************************************************************************
2281!> \brief Computes the matrix elements of a one-body operator (given wrt AOs) in the basis of the
2282!>        excited state AMEWs with ground state, singlet and triplet with Ms = -1,0,+1
2283!> \param amew_op the operator in the basis of the AMEWs (array because could have x,y,z components)
2284!> \param ao_op the operator in the basis of the atomic orbitals
2285!> \param dbcsr_soc_package inherited from the main SOC routine
2286!> \param donor_state ...
2287!> \param eps_filter for dbcsr multiplication
2288!> \param qs_env ...
2289!> \note The ordering of the AMEWs is consistent with SOC and is gs, sg, tp(-1), tp(0). tp(+1)
2290!>       We assume that the operator is spin-independent => only <0|0>, <0|S>, <S|S> and <T|T>
2291!>       yield non-zero matrix elements
2292!>       Only for spin-restricted calculations
2293! **************************************************************************************************
2294   SUBROUTINE get_rcs_amew_op(amew_op, ao_op, dbcsr_soc_package, donor_state, eps_filter, qs_env)
2295
2296      TYPE(cp_fm_p_type), DIMENSION(:), POINTER          :: amew_op
2297      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: ao_op
2298      TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
2299      TYPE(donor_state_type), POINTER                    :: donor_state
2300      REAL(dp), INTENT(IN)                               :: eps_filter
2301      TYPE(qs_environment_type), POINTER                 :: qs_env
2302
2303      CHARACTER(len=*), PARAMETER :: routineN = 'get_rcs_amew_op', &
2304         routineP = moduleN//':'//routineN
2305
2306      INTEGER                                            :: dim_op, homo, i, isg, nao, ndo_mo, nex, &
2307                                                            nsg, ntot, ntp
2308      REAL(dp)                                           :: op, sqrt2
2309      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag, gs_diag, gsgs_op
2310      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: domo_op, sggs_block
2311      TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
2312      TYPE(cp_fm_struct_type), POINTER                   :: full_struct, gsgs_struct, prod_struct, &
2313                                                            sggs_struct, std_struct, tmp_struct, &
2314                                                            vec_struct
2315      TYPE(cp_fm_type), POINTER                          :: amew_op_i, gs_coeffs, gs_fm, mo_coeff, &
2316                                                            prod_fm, sg_coeffs, sggs_fm, tmp_fm, &
2317                                                            vec_op, work_fm
2318      TYPE(cp_para_env_type), POINTER                    :: para_env
2319      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
2320      TYPE(dbcsr_type), POINTER                          :: ao_op_i, dbcsr_ovlp, dbcsr_prod, &
2321                                                            dbcsr_sg, dbcsr_tmp, dbcsr_tp, &
2322                                                            dbcsr_work
2323      TYPE(mo_set_p_type), DIMENSION(:), POINTER         :: mos
2324
2325      NULLIFY (gs_coeffs, sg_coeffs, tmp_fm, matrix_s, full_struct, prod_fm, prod_struct, work_fm)
2326      NULLIFY (vec_struct, blacs_env, para_env, mo_coeff, mos, gsgs_struct, std_struct)
2327      NULLIFY (vec_op, gs_fm, tmp_struct, sggs_struct, sggs_fm)
2328      NULLIFY (ao_op_i, dbcsr_tp, dbcsr_sg, dbcsr_ovlp, dbcsr_work, dbcsr_tmp, dbcsr_prod)
2329
2330!  Initialization
2331      gs_coeffs => donor_state%gs_coeffs
2332      sg_coeffs => donor_state%sg_coeffs
2333      nsg = SIZE(donor_state%sg_evals)
2334      ntp = nsg; nex = nsg !all the same by construction, keep them separate for clarity
2335      ntot = 1 + nsg + 3*ntp
2336      ndo_mo = donor_state%ndo_mo
2337      CALL get_qs_env(qs_env, matrix_s=matrix_s, para_env=para_env, blacs_env=blacs_env, mos=mos)
2338      sqrt2 = SQRT(2.0_dp)
2339      dim_op = SIZE(ao_op)
2340
2341      dbcsr_sg => dbcsr_soc_package%dbcsr_sg
2342      dbcsr_tp => dbcsr_soc_package%dbcsr_tp
2343      dbcsr_work => dbcsr_soc_package%dbcsr_work
2344      dbcsr_prod => dbcsr_soc_package%dbcsr_prod
2345      dbcsr_ovlp => dbcsr_soc_package%dbcsr_ovlp
2346      dbcsr_tmp => dbcsr_soc_package%dbcsr_tmp
2347
2348!  Create the amew_op matrix
2349      CALL cp_fm_struct_create(full_struct, context=blacs_env, para_env=para_env, &
2350                               nrow_global=ntot, ncol_global=ntot)
2351      ALLOCATE (amew_op(dim_op))
2352      DO i = 1, dim_op
2353         CALL cp_fm_create(amew_op(i)%matrix, full_struct)
2354      END DO !i
2355
2356!  Deal with the GS-GS contribution <0|0> = 2*sum_j <phi_j|op|phi_j>
2357      CALL get_mo_set(mos(1)%mo_set, mo_coeff=mo_coeff, nao=nao, homo=homo)
2358      CALL cp_fm_struct_create(gsgs_struct, context=blacs_env, para_env=para_env, &
2359                               nrow_global=homo, ncol_global=homo)
2360      CALL cp_fm_get_info(mo_coeff, matrix_struct=std_struct)
2361      CALL cp_fm_create(gs_fm, gsgs_struct)
2362      CALL cp_fm_create(work_fm, std_struct)
2363      ALLOCATE (gsgs_op(dim_op))
2364      ALLOCATE (gs_diag(homo))
2365
2366      DO i = 1, dim_op
2367
2368         ao_op_i => ao_op(i)%matrix
2369
2370         CALL cp_dbcsr_sm_fm_multiply(ao_op_i, mo_coeff, work_fm, ncol=homo)
2371         CALL cp_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, work_fm, 0.0_dp, gs_fm)
2372         CALL cp_fm_get_diag(gs_fm, gs_diag)
2373         gsgs_op(i) = 2.0_dp*SUM(gs_diag)
2374
2375      END DO !i
2376
2377      CALL cp_fm_release(gs_fm)
2378      CALL cp_fm_release(work_fm)
2379      CALL cp_fm_struct_release(gsgs_struct)
2380      DEALLOCATE (gs_diag)
2381
2382!  Create the work and helper fms
2383      CALL cp_fm_get_info(gs_coeffs, matrix_struct=vec_struct)
2384      CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
2385                               nrow_global=ndo_mo, ncol_global=ndo_mo)
2386      CALL cp_fm_create(prod_fm, prod_struct)
2387      CALL cp_fm_create(vec_op, vec_struct)
2388      CALL cp_fm_struct_create(tmp_struct, context=blacs_env, para_env=para_env, &
2389                               nrow_global=nex, ncol_global=nex)
2390      CALL cp_fm_struct_create(sggs_struct, context=blacs_env, para_env=para_env, &
2391                               nrow_global=ndo_mo*nsg, ncol_global=ndo_mo)
2392      CALL cp_fm_create(tmp_fm, tmp_struct)
2393      CALL cp_fm_create(work_fm, full_struct)
2394      CALL cp_fm_create(sggs_fm, sggs_struct)
2395      ALLOCATE (diag(ndo_mo))
2396      ALLOCATE (domo_op(ndo_mo, ndo_mo))
2397      ALLOCATE (sggs_block(ndo_mo, ndo_mo))
2398
2399! Iterate over the dimensions of the operator
2400! Note: operator matrices are asusmed symmetric, can only do upper half
2401      DO i = 1, dim_op
2402
2403         ao_op_i => ao_op(i)%matrix
2404         amew_op_i => amew_op(i)%matrix
2405
2406         ! The GS-GS contribution
2407         CALL cp_fm_set_element(amew_op_i, 1, 1, gsgs_op(i))
2408
2409         ! Compute the operator in the donor MOs basis
2410         CALL cp_dbcsr_sm_fm_multiply(ao_op_i, gs_coeffs, vec_op, ncol=ndo_mo)
2411         CALL cp_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, gs_coeffs, vec_op, 0.0_dp, prod_fm)
2412         CALL cp_fm_get_submatrix(prod_fm, domo_op)
2413
2414         ! Compute the ground-state/singlet components. ao_op*gs_coeffs already stored in vec_op
2415         CALL cp_gemm('T', 'N', ndo_mo*nsg, ndo_mo, nao, 1.0_dp, sg_coeffs, vec_op, 0.0_dp, sggs_fm)
2416         DO isg = 1, nsg
2417            CALL cp_fm_get_submatrix(fm=sggs_fm, target_m=sggs_block, start_row=(isg - 1)*ndo_mo + 1, &
2418                                     start_col=1, n_rows=ndo_mo, n_cols=ndo_mo)
2419            diag(:) = get_diag(sggs_block)
2420            op = sqrt2*SUM(diag)
2421            CALL cp_fm_set_element(amew_op_i, 1, 1 + isg, op)
2422         END DO
2423
2424         ! do the singlet-singlet components
2425         !start with the overlap
2426         CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sg, 0.0_dp, &
2427                             dbcsr_work, filter_eps=eps_filter)
2428         CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
2429
2430         !then the operator in the LR orbital basis
2431         CALL dbcsr_multiply('N', 'N', 1.0_dp, ao_op_i, dbcsr_sg, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
2432         CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
2433
2434         !use the soc routine, it is compatible
2435         CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_op, pref_trace=-1.0_dp, &
2436                                    pref_overall=1.0_dp, pref_diags=gsgs_op(i), symmetric=.TRUE.)
2437
2438         CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
2439         CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op_i, nrow=nex, ncol=nex, &
2440                                 s_firstrow=1, s_firstcol=1, t_firstrow=2, t_firstcol=2)
2441
2442         ! compute the triplet-triplet components
2443         !the overlap
2444         CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_tp, 0.0_dp, &
2445                             dbcsr_work, filter_eps=eps_filter)
2446         CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
2447
2448         !the operator in the LR orbital basis
2449         CALL dbcsr_multiply('N', 'N', 1.0_dp, ao_op_i, dbcsr_sg, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
2450         CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
2451
2452         CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_op, pref_trace=-1.0_dp, &
2453                                    pref_overall=1.0_dp, pref_diags=gsgs_op(i), symmetric=.TRUE.)
2454
2455         CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
2456         !<T^-1|op|T^-1>
2457         CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op_i, nrow=nex, ncol=nex, &
2458                                 s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 1, t_firstcol=1 + nsg + 1)
2459         !<T^0|op|T^0>
2460         CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op_i, nrow=nex, ncol=nex, &
2461                                 s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + ntp + 1, &
2462                                 t_firstcol=1 + nsg + ntp + 1)
2463         !<T^-1|op|T^-1>
2464         CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op_i, nrow=nex, ncol=nex, &
2465                                 s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 2*ntp + 1, &
2466                                 t_firstcol=1 + nsg + 2*ntp + 1)
2467
2468         ! Symmetrize the matrix (only upper triangle built)
2469         CALL cp_fm_upper_to_full(amew_op_i, work_fm)
2470
2471      END DO !i
2472
2473!  Clean-up
2474      CALL cp_fm_release(prod_fm)
2475      CALL cp_fm_release(work_fm)
2476      CALL cp_fm_release(tmp_fm)
2477      CALL cp_fm_release(vec_op)
2478      CALL cp_fm_release(sggs_fm)
2479      CALL cp_fm_struct_release(prod_struct)
2480      CALL cp_fm_struct_release(full_struct)
2481      CALL cp_fm_struct_release(tmp_struct)
2482      CALL cp_fm_struct_release(sggs_struct)
2483
2484   END SUBROUTINE get_rcs_amew_op
2485
2486! **************************************************************************************************
2487!> \brief Computes the os SOC matrix elements between excited states AMEWs based on the LR orbitals
2488!> \param amew_soc output dbcsr matrix with the SOC in the AMEW basis (needs to be fully resereved)
2489!> \param lr_soc dbcsr matrix with the SOC wrt the LR orbitals
2490!> \param lr_overlap dbcsr matrix with the excited states LR orbital overlap
2491!> \param domo_soc the SOC in the basis of the donor MOs
2492!> \param pref_diaga ...
2493!> \param pref_diagb ...
2494!> \param pref_tracea ...
2495!> \param pref_traceb ...
2496!> \param pref_diags see notes
2497!> \param symmetric if the outcome is known to be symmetric, only elements with iex <= jex are done
2498!> \param tracea_start the indices where to start in the trace part for alpha
2499!> \param traceb_start the indices where to start in the trace part for beta
2500!> \note For an excited states pair i,j, the AMEW SOC matrix element is:
2501!>       soc_ij =   pref_diaga*SUM(alpha part of diag of lr_soc_ij)
2502!>                + pref_diagb*SUM(beta part of diag of lr_soc_ij)
2503!>                + pref_tracea*SUM(alpha part of lr_ovlp_ij*TRANSPOSE(domo_soc))
2504!>                + pref_traceb*SUM(beta part of lr_ovlp_ij*TRANSPOSE(domo_soc))
2505!>       optinally, one can add pref_diags*SUM(diag lr_ovlp_ij)
2506! **************************************************************************************************
2507   SUBROUTINE os_amew_soc_elements(amew_soc, lr_soc, lr_overlap, domo_soc, pref_diaga, &
2508                                   pref_diagb, pref_tracea, pref_traceb, pref_diags, &
2509                                   symmetric, tracea_start, traceb_start)
2510
2511      TYPE(dbcsr_type)                                   :: amew_soc, lr_soc, lr_overlap
2512      REAL(dp), DIMENSION(:, :)                          :: domo_soc
2513      REAL(dp)                                           :: pref_diaga, pref_diagb, pref_tracea, &
2514                                                            pref_traceb
2515      REAL(dp), OPTIONAL                                 :: pref_diags
2516      LOGICAL, OPTIONAL                                  :: symmetric
2517      INTEGER, DIMENSION(2), OPTIONAL                    :: tracea_start, traceb_start
2518
2519      CHARACTER(len=*), PARAMETER :: routineN = 'os_amew_soc_elements', &
2520         routineP = moduleN//':'//routineN
2521
2522      INTEGER                                            :: blk, iex, jex, ndo_mo, ndo_so
2523      INTEGER, DIMENSION(2)                              :: tas, tbs
2524      LOGICAL                                            :: do_diags, found, my_symm
2525      REAL(dp)                                           :: soc_elem
2526      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag
2527      REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
2528      TYPE(dbcsr_iterator_type)                          :: iter
2529
2530      ndo_so = SIZE(domo_soc, 1)
2531      ndo_mo = ndo_so/2
2532      ALLOCATE (diag(ndo_so))
2533      my_symm = .FALSE.
2534      IF (PRESENT(symmetric)) my_symm = symmetric
2535      do_diags = .FALSE.
2536      IF (PRESENT(pref_diags)) do_diags = .TRUE.
2537
2538      !by default, alpha part is (1:ndo_mo,1:ndo_mo) and beta is (ndo_mo+1:ndo_so,ndo_mo+1:ndo_so)
2539      !note: in some SF cases, that might change, mainly because the spin-flip LR-coeffs have
2540      !inverse order, that is: the beta-coeffs in the alpha spot and the alpha coeffs in the
2541      !beta spot
2542      tas = 1
2543      tbs = ndo_mo + 1
2544      IF (PRESENT(tracea_start)) tas = tracea_start
2545      IF (PRESENT(traceb_start)) tbs = traceb_start
2546
2547      CALL dbcsr_set(amew_soc, 0.0_dp)
2548      !loop over the excited states pairs as the block of amew_soc (which are all reserved)
2549      CALL dbcsr_iterator_start(iter, amew_soc)
2550      DO WHILE (dbcsr_iterator_blocks_left(iter))
2551
2552         CALL dbcsr_iterator_next_block(iter, row=iex, column=jex, blk=blk)
2553
2554         IF (my_symm .AND. iex > jex) CYCLE
2555
2556         !compute the soc matrix element
2557         soc_elem = 0.0_dp
2558         CALL dbcsr_get_block_p(lr_soc, iex, jex, pblock, found)
2559         IF (found) THEN
2560            diag(:) = get_diag(pblock)
2561            soc_elem = soc_elem + pref_diaga*SUM(diag(1:ndo_mo)) + pref_diagb*(SUM(diag(ndo_mo + 1:ndo_so)))
2562         END IF
2563
2564         CALL dbcsr_get_block_p(lr_overlap, iex, jex, pblock, found)
2565         IF (found) THEN
2566            soc_elem = soc_elem &
2567                       + pref_tracea*SUM(pblock(tas(1):tas(1) + ndo_mo - 1, tas(2):tas(2) + ndo_mo - 1)* &
2568                                         TRANSPOSE(domo_soc(tas(1):tas(1) + ndo_mo - 1, tas(2):tas(2) + ndo_mo - 1))) &
2569                       + pref_traceb*SUM(pblock(tbs(1):tbs(1) + ndo_mo - 1, tbs(2):tbs(2) + ndo_mo - 1)* &
2570                                         TRANSPOSE(domo_soc(tbs(1):tbs(1) + ndo_mo - 1, tbs(2):tbs(2) + ndo_mo - 1)))
2571
2572            IF (do_diags) THEN
2573               diag(:) = get_diag(pblock)
2574               soc_elem = soc_elem + pref_diags*SUM(diag)
2575            END IF
2576         END IF
2577
2578         CALL dbcsr_get_block_p(amew_soc, iex, jex, pblock, found)
2579         pblock = soc_elem
2580
2581      END DO
2582      CALL dbcsr_iterator_stop(iter)
2583
2584   END SUBROUTINE os_amew_soc_elements
2585
2586! **************************************************************************************************
2587!> \brief Computes the rcs SOC matrix elements between excited states AMEWs based on the LR orbitals
2588!> \param amew_soc output dbcsr matrix with the SOC in the AMEW basis (needs to be fully resereved)
2589!> \param lr_soc dbcsr matrix with the SOC wrt the LR orbitals
2590!> \param lr_overlap dbcsr matrix with the excited states LR orbital overlap
2591!> \param domo_soc the SOC in the basis of the donor MOs
2592!> \param pref_trace see notes
2593!> \param pref_overall see notes
2594!> \param pref_diags see notes
2595!> \param symmetric if the outcome is known to be symmetric, only elements with iex <= jex are done
2596!> \note For an excited states pair i,j, the AMEW SOC matrix element is:
2597!>       soc_ij = pref_overall*(SUM(diag(lr_soc_ij)) + pref_trace*SUM(lr_overlap_ij*TRANSPOSE(domo_soc)))
2598!>       optionally, the value pref_diags*SUM(diag(lr_overlap_ij)) can be added (before pref_overall)
2599! **************************************************************************************************
2600   SUBROUTINE rcs_amew_soc_elements(amew_soc, lr_soc, lr_overlap, domo_soc, pref_trace, &
2601                                    pref_overall, pref_diags, symmetric)
2602
2603      TYPE(dbcsr_type)                                   :: amew_soc, lr_soc, lr_overlap
2604      REAL(dp), DIMENSION(:, :)                          :: domo_soc
2605      REAL(dp)                                           :: pref_trace, pref_overall
2606      REAL(dp), OPTIONAL                                 :: pref_diags
2607      LOGICAL, OPTIONAL                                  :: symmetric
2608
2609      CHARACTER(len=*), PARAMETER :: routineN = 'rcs_amew_soc_elements', &
2610         routineP = moduleN//':'//routineN
2611
2612      INTEGER                                            :: blk, iex, jex
2613      LOGICAL                                            :: do_diags, found, my_symm
2614      REAL(dp)                                           :: soc_elem
2615      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag
2616      REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
2617      TYPE(dbcsr_iterator_type)                          :: iter
2618
2619      ALLOCATE (diag(SIZE(domo_soc, 1)))
2620      my_symm = .FALSE.
2621      IF (PRESENT(symmetric)) my_symm = symmetric
2622      do_diags = .FALSE.
2623      IF (PRESENT(pref_diags)) do_diags = .TRUE.
2624
2625      CALL dbcsr_set(amew_soc, 0.0_dp)
2626      !loop over the excited states pairs as the block of amew_soc (which are all reserved)
2627      CALL dbcsr_iterator_start(iter, amew_soc)
2628      DO WHILE (dbcsr_iterator_blocks_left(iter))
2629
2630         CALL dbcsr_iterator_next_block(iter, row=iex, column=jex, blk=blk)
2631
2632         IF (my_symm .AND. iex > jex) CYCLE
2633
2634         !compute the soc matrix element
2635         soc_elem = 0.0_dp
2636         CALL dbcsr_get_block_p(lr_soc, iex, jex, pblock, found)
2637         IF (found) THEN
2638            diag(:) = get_diag(pblock)
2639            soc_elem = soc_elem + SUM(diag)
2640         END IF
2641
2642         CALL dbcsr_get_block_p(lr_overlap, iex, jex, pblock, found)
2643         IF (found) THEN
2644            soc_elem = soc_elem + pref_trace*SUM(pblock*TRANSPOSE(domo_soc))
2645
2646            IF (do_diags) THEN
2647               diag(:) = get_diag(pblock)
2648               soc_elem = soc_elem + pref_diags*SUM(diag)
2649            END IF
2650         END IF
2651
2652         CALL dbcsr_get_block_p(amew_soc, iex, jex, pblock, found)
2653         pblock = pref_overall*soc_elem
2654
2655      END DO
2656      CALL dbcsr_iterator_stop(iter)
2657
2658   END SUBROUTINE rcs_amew_soc_elements
2659
2660! **************************************************************************************************
2661!> \brief Computes the dipole oscillator strengths in the AMEWs basis for SOC
2662!> \param soc_evecs_cfm the complex AMEWs coefficients
2663!> \param dbcsr_soc_package ...
2664!> \param donor_state ...
2665!> \param xas_tdp_env ...
2666!> \param xas_tdp_control ...
2667!> \param qs_env ...
2668!> \param gs_coeffs the ground state coefficients, given for open-shell because in ROKS, the gs_coeffs
2669!>                  are stored slightly differently within SOC for efficiency and code uniquness
2670! **************************************************************************************************
2671   SUBROUTINE compute_soc_dipole_fosc(soc_evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
2672                                      xas_tdp_control, qs_env, gs_coeffs)
2673
2674      TYPE(cp_cfm_type), POINTER                         :: soc_evecs_cfm
2675      TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
2676      TYPE(donor_state_type), POINTER                    :: donor_state
2677      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
2678      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
2679      TYPE(qs_environment_type), POINTER                 :: qs_env
2680      TYPE(cp_fm_type), OPTIONAL, POINTER                :: gs_coeffs
2681
2682      CHARACTER(len=*), PARAMETER :: routineN = 'compute_soc_dipole_fosc', &
2683         routineP = moduleN//':'//routineN
2684
2685      COMPLEX(dp), ALLOCATABLE, DIMENSION(:, :)          :: transdip
2686      INTEGER                                            :: handle, i, nosc, ntot
2687      LOGICAL                                            :: do_os, do_rcs
2688      REAL(dp), DIMENSION(:), POINTER                    :: osc_str, soc_evals
2689      TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
2690      TYPE(cp_cfm_type), POINTER                         :: dip_cfm, work1_cfm, work2_cfm
2691      TYPE(cp_fm_p_type), DIMENSION(:), POINTER          :: amew_dip
2692      TYPE(cp_fm_struct_type), POINTER                   :: dip_struct, full_struct
2693      TYPE(cp_para_env_type), POINTER                    :: para_env
2694
2695      NULLIFY (para_env, blacs_env, dip_struct, full_struct, work1_cfm, work2_cfm, osc_str)
2696      NULLIFY (soc_evals, amew_dip, dip_cfm)
2697
2698      CALL timeset(routineN, handle)
2699
2700      !init
2701      CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
2702      do_os = xas_tdp_control%do_spin_cons
2703      do_rcs = xas_tdp_control%do_singlet
2704      soc_evals => donor_state%soc_evals
2705      nosc = SIZE(soc_evals)
2706      ntot = nosc + 1 !because GS AMEW is in there
2707      ALLOCATE (donor_state%soc_osc_str(nosc))
2708      osc_str => donor_state%soc_osc_str
2709      osc_str(:) = 0.0_dp
2710      IF (do_os .AND. .NOT. PRESENT(gs_coeffs)) CPABORT("Need to pass gs_coeffs for open-shell")
2711
2712      !get some work arrays/matrix
2713      CALL cp_fm_struct_create(dip_struct, context=blacs_env, para_env=para_env, &
2714                               nrow_global=ntot, ncol_global=1)
2715      CALL cp_cfm_get_info(soc_evecs_cfm, matrix_struct=full_struct)
2716      CALL cp_cfm_create(dip_cfm, dip_struct)
2717      CALL cp_cfm_create(work1_cfm, full_struct)
2718      CALL cp_cfm_create(work2_cfm, full_struct)
2719      ALLOCATE (transdip(ntot, 1))
2720
2721      !get the dipole in the AMEW basis
2722      IF (do_os) THEN
2723         CALL get_os_amew_op(amew_dip, xas_tdp_env%dipmat, gs_coeffs, dbcsr_soc_package, &
2724                             donor_state, xas_tdp_control%eps_filter, qs_env)
2725      ELSE
2726         CALL get_rcs_amew_op(amew_dip, xas_tdp_env%dipmat, dbcsr_soc_package, donor_state, &
2727                              xas_tdp_control%eps_filter, qs_env)
2728      END IF
2729
2730      DO i = 1, 3 !cartesian coord x, y, z
2731
2732         !Convert the real dipole into the cfm format for calculations
2733         CALL cp_fm_to_cfm(msourcer=amew_dip(i)%matrix, mtarget=work1_cfm)
2734
2735         !compute amew_coeffs^dagger * amew_dip * amew_gs to get the transition moments
2736         CALL cp_cfm_gemm('C', 'N', ntot, ntot, ntot, (1.0_dp, 0.0_dp), soc_evecs_cfm, work1_cfm, &
2737                          (0.0_dp, 0.0_dp), work2_cfm)
2738         CALL cp_cfm_gemm('N', 'N', ntot, 1, ntot, (1.0_dp, 0.0_dp), work2_cfm, soc_evecs_cfm, &
2739                          (0.0_dp, 0.0_dp), dip_cfm)
2740
2741         CALL cp_cfm_get_submatrix(dip_cfm, transdip)
2742
2743         !transition dipoles are real numbers
2744         osc_str(:) = osc_str(:) + REAL(transdip(2:ntot, 1))**2 + AIMAG(transdip(2:ntot, 1))**2
2745
2746      END DO !i
2747
2748      !multiply with appropriate prefac depending in the rep
2749      IF (xas_tdp_control%dipole_form == xas_dip_len) THEN
2750         osc_str(:) = 2.0_dp/3.0_dp*soc_evals(:)*osc_str(:)
2751      ELSE
2752         osc_str(:) = 2.0_dp/3.0_dp/soc_evals(:)*osc_str(:)
2753      END IF
2754
2755      !clean-up
2756      CALL cp_fm_struct_release(dip_struct)
2757      CALL cp_cfm_release(work1_cfm)
2758      CALL cp_cfm_release(work2_cfm)
2759      CALL cp_cfm_release(dip_cfm)
2760      DO i = 1, 3
2761         CALL cp_fm_release(amew_dip(i)%matrix)
2762      END DO
2763      DEALLOCATE (amew_dip, transdip)
2764
2765      CALL timestop(handle)
2766
2767   END SUBROUTINE compute_soc_dipole_fosc
2768
2769! **************************************************************************************************
2770!> \brief Computes the quadrupole oscillator strengths in the AMEWs basis for SOC
2771!> \param soc_evecs_cfm the complex AMEWs coefficients
2772!> \param dbcsr_soc_package inherited from the main SOC routine
2773!> \param donor_state ...
2774!> \param xas_tdp_env ...
2775!> \param xas_tdp_control ...
2776!> \param qs_env ...
2777!> \param gs_coeffs the ground state coefficients, given for open-shell because in ROKS, the gs_coeffs
2778!>                  are stored slightly differently within SOC for efficiency and code uniquness
2779! **************************************************************************************************
2780   SUBROUTINE compute_soc_quadrupole_fosc(soc_evecs_cfm, dbcsr_soc_package, donor_state, &
2781                                          xas_tdp_env, xas_tdp_control, qs_env, gs_coeffs)
2782
2783      TYPE(cp_cfm_type), POINTER                         :: soc_evecs_cfm
2784      TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
2785      TYPE(donor_state_type), POINTER                    :: donor_state
2786      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
2787      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
2788      TYPE(qs_environment_type), POINTER                 :: qs_env
2789      TYPE(cp_fm_type), OPTIONAL, POINTER                :: gs_coeffs
2790
2791      CHARACTER(len=*), PARAMETER :: routineN = 'compute_soc_quadrupole_fosc', &
2792         routineP = moduleN//':'//routineN
2793
2794      COMPLEX(dp), ALLOCATABLE, DIMENSION(:)             :: trace
2795      COMPLEX(dp), ALLOCATABLE, DIMENSION(:, :)          :: transquad
2796      INTEGER                                            :: handle, i, nosc, ntot
2797      LOGICAL                                            :: do_os, do_rcs
2798      REAL(dp), DIMENSION(:), POINTER                    :: osc_str, soc_evals
2799      TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
2800      TYPE(cp_cfm_type), POINTER                         :: quad_cfm, work1_cfm, work2_cfm
2801      TYPE(cp_fm_p_type), DIMENSION(:), POINTER          :: amew_quad
2802      TYPE(cp_fm_struct_type), POINTER                   :: full_struct, quad_struct
2803      TYPE(cp_para_env_type), POINTER                    :: para_env
2804
2805      NULLIFY (para_env, blacs_env, quad_struct, full_struct, work1_cfm, work2_cfm, osc_str)
2806      NULLIFY (soc_evals, amew_quad, quad_cfm)
2807
2808      CALL timeset(routineN, handle)
2809
2810      !init
2811      CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
2812      do_os = xas_tdp_control%do_spin_cons
2813      do_rcs = xas_tdp_control%do_singlet
2814      soc_evals => donor_state%soc_evals
2815      nosc = SIZE(soc_evals)
2816      ntot = nosc + 1 !because GS AMEW is in there
2817      ALLOCATE (donor_state%soc_quad_osc_str(nosc))
2818      osc_str => donor_state%soc_quad_osc_str
2819      osc_str(:) = 0.0_dp
2820      IF (do_os .AND. .NOT. PRESENT(gs_coeffs)) CPABORT("Need to pass gs_coeffs for open-shell")
2821
2822      !get some work arrays/matrix
2823      CALL cp_fm_struct_create(quad_struct, context=blacs_env, para_env=para_env, &
2824                               nrow_global=ntot, ncol_global=1)
2825      CALL cp_cfm_get_info(soc_evecs_cfm, matrix_struct=full_struct)
2826      CALL cp_cfm_create(quad_cfm, quad_struct)
2827      CALL cp_cfm_create(work1_cfm, full_struct)
2828      CALL cp_cfm_create(work2_cfm, full_struct)
2829      ALLOCATE (transquad(ntot, 1))
2830      ALLOCATE (trace(nosc))
2831      trace = (0.0_dp, 0.0_dp)
2832
2833      !get the quadrupole in the AMEWs basis
2834      IF (do_os) THEN
2835         CALL get_os_amew_op(amew_quad, xas_tdp_env%quadmat, gs_coeffs, dbcsr_soc_package, &
2836                             donor_state, xas_tdp_control%eps_filter, qs_env)
2837      ELSE
2838         CALL get_rcs_amew_op(amew_quad, xas_tdp_env%quadmat, dbcsr_soc_package, donor_state, &
2839                              xas_tdp_control%eps_filter, qs_env)
2840      END IF
2841
2842      DO i = 1, 6 ! x2, xy, xz, y2, yz, z2
2843
2844         !Convert the real quadrupole into a cfm for further calculation
2845         CALL cp_fm_to_cfm(msourcer=amew_quad(i)%matrix, mtarget=work1_cfm)
2846
2847         !compute amew_coeffs^dagger * amew_quad * amew_gs to get the transition moments
2848         CALL cp_cfm_gemm('C', 'N', ntot, ntot, ntot, (1.0_dp, 0.0_dp), soc_evecs_cfm, work1_cfm, &
2849                          (0.0_dp, 0.0_dp), work2_cfm)
2850         CALL cp_cfm_gemm('N', 'N', ntot, 1, ntot, (1.0_dp, 0.0_dp), work2_cfm, soc_evecs_cfm, &
2851                          (0.0_dp, 0.0_dp), quad_cfm)
2852
2853         CALL cp_cfm_get_submatrix(quad_cfm, transquad)
2854
2855         !if x2, y2 or z2, need to keep track of trace
2856         IF (i == 1 .OR. i == 4 .OR. i == 6) THEN
2857            osc_str(:) = osc_str(:) + REAL(transquad(2:ntot, 1))**2 + AIMAG(transquad(2:ntot, 1))**2
2858            trace(:) = trace(:) + transquad(2:ntot, 1)
2859
2860            !if xy, xz, or yz, need to count twice (for yx, zx and zy)
2861         ELSE
2862            osc_str(:) = osc_str(:) + 2.0_dp*(REAL(transquad(2:ntot, 1))**2 + AIMAG(transquad(2:ntot, 1))**2)
2863         END IF
2864
2865      END DO !i
2866
2867      !remove a third of the trace
2868      osc_str(:) = osc_str(:) - 1._dp/3._dp*(REAL(trace(:))**2 + AIMAG(trace(:))**2)
2869
2870      !multiply by the prefactor
2871      osc_str(:) = osc_str(:)*1._dp/20._dp*a_fine**2*soc_evals(:)**3
2872
2873      !clean-up
2874      CALL cp_fm_struct_release(quad_struct)
2875      CALL cp_cfm_release(work1_cfm)
2876      CALL cp_cfm_release(work2_cfm)
2877      CALL cp_cfm_release(quad_cfm)
2878      DO i = 1, 6
2879         CALL cp_fm_release(amew_quad(i)%matrix)
2880      END DO
2881      DEALLOCATE (amew_quad, transquad, trace)
2882
2883      CALL timestop(handle)
2884
2885   END SUBROUTINE compute_soc_quadrupole_fosc
2886
2887END MODULE xas_tdp_utils
2888
2889