1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2019  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief This module deals with all the integrals done on local atomic grids in xas_tdp. This is
8!>        mostly used to compute the xc kernel matrix elements wrt two RI basis elements (centered
9!>        on the same excited atom) <P|fxc(r)|Q>, where the kernel fxc is purely a function of the
10!>        ground state density and r. This is also used to compute the SOC matrix elements in the
11!>        orbital basis
12! **************************************************************************************************
13MODULE xas_tdp_atom
14   USE ai_contraction_sphi,             ONLY: ab_contract
15   USE atom_operators,                  ONLY: calculate_model_potential
16   USE basis_set_types,                 ONLY: get_gto_basis_set,&
17                                              gto_basis_set_p_type,&
18                                              gto_basis_set_type
19   USE cell_types,                      ONLY: cell_type,&
20                                              pbc
21   USE cp_array_utils,                  ONLY: cp_1d_i_p_type,&
22                                              cp_1d_r_p_type,&
23                                              cp_2d_r_p_type,&
24                                              cp_3d_r_p_type
25   USE cp_blacs_env,                    ONLY: cp_blacs_env_type
26   USE cp_control_types,                ONLY: dft_control_type,&
27                                              qs_control_type
28   USE cp_dbcsr_cholesky,               ONLY: cp_dbcsr_cholesky_decompose,&
29                                              cp_dbcsr_cholesky_invert
30   USE cp_dbcsr_operations,             ONLY: cp_dbcsr_dist2d_to_dist,&
31                                              dbcsr_deallocate_matrix_set
32   USE cp_log_handling,                 ONLY: cp_logger_get_default_io_unit
33   USE cp_para_types,                   ONLY: cp_para_env_type
34   USE dbcsr_api,                       ONLY: &
35        dbcsr_complete_redistribute, dbcsr_create, dbcsr_distribution_get, dbcsr_distribution_new, &
36        dbcsr_distribution_release, dbcsr_distribution_type, dbcsr_filter, dbcsr_finalize, &
37        dbcsr_get_block_p, dbcsr_get_stored_coordinates, dbcsr_iterator_blocks_left, &
38        dbcsr_iterator_next_block, dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, &
39        dbcsr_p_type, dbcsr_put_block, dbcsr_release, dbcsr_replicate_all, dbcsr_type
40   USE distribution_2d_types,           ONLY: distribution_2d_release,&
41                                              distribution_2d_type
42   USE input_constants,                 ONLY: do_potential_id
43   USE input_section_types,             ONLY: section_vals_get_subs_vals,&
44                                              section_vals_type
45   USE kinds,                           ONLY: default_string_length,&
46                                              dp,&
47                                              int_8
48   USE lebedev,                         ONLY: deallocate_lebedev_grids,&
49                                              get_number_of_lebedev_grid,&
50                                              init_lebedev_grids,&
51                                              lebedev_grid
52   USE mathconstants,                   ONLY: dfac,&
53                                              pi
54   USE mathlib,                         ONLY: get_diag
55   USE memory_utilities,                ONLY: reallocate
56   USE message_passing,                 ONLY: mp_comm_free,&
57                                              mp_comm_split_direct,&
58                                              mp_irecv,&
59                                              mp_isend,&
60                                              mp_sum,&
61                                              mp_sync,&
62                                              mp_waitall
63   USE orbital_pointers,                ONLY: indco,&
64                                              indso,&
65                                              nco,&
66                                              ncoset,&
67                                              nso,&
68                                              nsoset
69   USE orbital_transformation_matrices, ONLY: orbtramat
70   USE particle_methods,                ONLY: get_particle_set
71   USE particle_types,                  ONLY: particle_type
72   USE physcon,                         ONLY: c_light_au
73   USE qs_environment_types,            ONLY: get_qs_env,&
74                                              qs_environment_type
75   USE qs_grid_atom,                    ONLY: allocate_grid_atom,&
76                                              create_grid_atom,&
77                                              grid_atom_type
78   USE qs_harmonics_atom,               ONLY: allocate_harmonics_atom,&
79                                              create_harmonics_atom,&
80                                              get_maxl_CG,&
81                                              get_none0_cg_list,&
82                                              harmonics_atom_type
83   USE qs_integral_utils,               ONLY: basis_set_list_setup
84   USE qs_kind_types,                   ONLY: get_qs_kind,&
85                                              get_qs_kind_set,&
86                                              qs_kind_type
87   USE qs_ks_types,                     ONLY: release_sab
88   USE qs_neighbor_list_types,          ONLY: neighbor_list_set_p_type
89   USE qs_o3c_methods,                  ONLY: calculate_o3c_integrals,&
90                                              contract12_o3c
91   USE qs_o3c_types,                    ONLY: get_o3c_iterator_info,&
92                                              init_o3c_container,&
93                                              o3c_container_type,&
94                                              o3c_iterate,&
95                                              o3c_iterator_create,&
96                                              o3c_iterator_release,&
97                                              o3c_iterator_type,&
98                                              release_o3c_container
99   USE qs_overlap,                      ONLY: build_overlap_matrix_simple
100   USE qs_rho_types,                    ONLY: qs_rho_get,&
101                                              qs_rho_type
102   USE spherical_harmonics,             ONLY: clebsch_gordon,&
103                                              clebsch_gordon_deallocate,&
104                                              clebsch_gordon_init
105   USE util,                            ONLY: get_limit,&
106                                              sort_unique
107   USE xas_tdp_types,                   ONLY: xas_atom_env_type,&
108                                              xas_tdp_control_type,&
109                                              xas_tdp_env_type
110   USE xas_tdp_utils,                   ONLY: build_xas_tdp_3c_nl,&
111                                              build_xas_tdp_ovlp_nl,&
112                                              get_o3c_memory,&
113                                              get_opt_3c_dist2d
114   USE xc,                              ONLY: divide_by_norm_drho
115   USE xc_atom,                         ONLY: xc_rho_set_atom_update
116   USE xc_derivative_set_types,         ONLY: xc_derivative_set_type,&
117                                              xc_dset_create,&
118                                              xc_dset_get_derivative,&
119                                              xc_dset_release
120   USE xc_derivative_types,             ONLY: xc_derivative_get,&
121                                              xc_derivative_type
122   USE xc_derivatives,                  ONLY: xc_functionals_eval,&
123                                              xc_functionals_get_needs
124   USE xc_rho_cflags_types,             ONLY: xc_rho_cflags_type
125   USE xc_rho_set_types,                ONLY: xc_rho_set_create,&
126                                              xc_rho_set_release,&
127                                              xc_rho_set_type
128
129!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num
130#include "./base/base_uses.f90"
131
132   IMPLICIT NONE
133
134   PRIVATE
135
136   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'xas_tdp_atom'
137
138   PUBLIC :: init_xas_atom_env, integrate_fxc_atoms, integrate_soc_atoms
139
140CONTAINS
141
142! **************************************************************************************************
143!> \brief Initializes a xas_atom_env type given the qs_enxas_atom_env, qs_envv
144!> \param xas_atom_env the xas_atom_env to initialize
145!> \param xas_tdp_env ...
146!> \param xas_tdp_control ...
147!> \param qs_env ...
148! **************************************************************************************************
149   SUBROUTINE init_xas_atom_env(xas_atom_env, xas_tdp_env, xas_tdp_control, qs_env)
150
151      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
152      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
153      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
154      TYPE(qs_environment_type), POINTER                 :: qs_env
155
156      CHARACTER(len=*), PARAMETER :: routineN = 'init_xas_atom_env', &
157         routineP = moduleN//':'//routineN
158
159      INTEGER                                            :: handle, ikind, natom, nex_atoms, &
160                                                            nex_kinds, nkind, nspins
161      LOGICAL                                            :: do_soc, do_xc
162      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
163      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
164
165      NULLIFY (qs_kind_set, particle_set)
166
167      CALL timeset(routineN, handle)
168
169!  Initializing the type
170      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set, natom=natom, particle_set=particle_set)
171
172      nkind = SIZE(qs_kind_set)
173      nex_kinds = xas_tdp_env%nex_kinds
174      nex_atoms = xas_tdp_env%nex_atoms
175      do_xc = xas_tdp_control%do_xc
176      do_soc = xas_tdp_control%do_soc
177      nspins = 1; IF (xas_tdp_control%do_uks .OR. xas_tdp_control%do_roks) nspins = 2
178
179      xas_atom_env%nspins = nspins
180      xas_atom_env%ri_radius = xas_tdp_control%ri_radius
181      ALLOCATE (xas_atom_env%grid_atom_set(nkind))
182      ALLOCATE (xas_atom_env%harmonics_atom_set(nkind))
183      ALLOCATE (xas_atom_env%ri_sphi_so(nkind))
184      ALLOCATE (xas_atom_env%orb_sphi_so(nkind))
185      IF (do_xc) THEN
186         ALLOCATE (xas_atom_env%gr(nkind))
187         ALLOCATE (xas_atom_env%ga(nkind))
188         ALLOCATE (xas_atom_env%dgr1(nkind))
189         ALLOCATE (xas_atom_env%dgr2(nkind))
190         ALLOCATE (xas_atom_env%dga1(nkind))
191         ALLOCATE (xas_atom_env%dga2(nkind))
192      END IF
193
194      xas_atom_env%excited_atoms => xas_tdp_env%ex_atom_indices
195      xas_atom_env%excited_kinds => xas_tdp_env%ex_kind_indices
196
197!  Allocate and initialize the atomic grids and harmonics
198      CALL init_xas_atom_grid_harmo(xas_atom_env, xas_tdp_control%grid_info, do_xc, qs_env)
199
200!  Compute the contraction coefficients for spherical orbitals
201      DO ikind = 1, nkind
202         NULLIFY (xas_atom_env%orb_sphi_so(ikind)%array, xas_atom_env%ri_sphi_so(ikind)%array)
203         IF (do_soc) THEN
204            CALL compute_sphi_so(ikind, "ORB", xas_atom_env%orb_sphi_so(ikind)%array, qs_env)
205         END IF
206         IF (ANY(xas_atom_env%excited_kinds == ikind) .AND. do_xc) THEN
207            CALL compute_sphi_so(ikind, "RI_XAS", xas_atom_env%ri_sphi_so(ikind)%array, qs_env)
208         END IF
209      END DO !ikind
210
211!  Compute the coefficients to expand the density in the RI_XAS basis, if requested
212      IF (do_xc) CALL calculate_density_coeffs(xas_atom_env, qs_env)
213
214      CALL timestop(handle)
215
216   END SUBROUTINE init_xas_atom_env
217
218! **************************************************************************************************
219!> \brief Initializes the atomic grids and harmonics for the RI atomic calculations
220!> \param xas_atom_env ...
221!> \param grid_info ...
222!> \param do_xc Whether the xc kernel will ne computed on the atomic grids. If not, the harmonics
223!>        are built for the orbital basis for all kinds.
224!> \param qs_env ...
225!> \note Largely inspired by init_rho_atom subroutine
226! **************************************************************************************************
227   SUBROUTINE init_xas_atom_grid_harmo(xas_atom_env, grid_info, do_xc, qs_env)
228
229      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
230      CHARACTER(len=default_string_length), &
231         DIMENSION(:, :), POINTER                        :: grid_info
232      LOGICAL, INTENT(IN)                                :: do_xc
233      TYPE(qs_environment_type), POINTER                 :: qs_env
234
235      CHARACTER(len=*), PARAMETER :: routineN = 'init_xas_atom_grid_harmo', &
236         routineP = moduleN//':'//routineN
237
238      CHARACTER(LEN=default_string_length)               :: kind_name
239      INTEGER :: igrid, ikind, il, iso, iso1, iso2, l1, l1l2, l2, la, lc1, lc2, lcleb, ll, llmax, &
240         lp, m1, m2, max_s_harm, max_s_set, maxl, maxlgto, maxs, mm, mp, na, nr, quadrature, stat
241      REAL(dp)                                           :: kind_radius
242      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: rga
243      REAL(dp), DIMENSION(:, :, :), POINTER              :: my_CG
244      TYPE(dft_control_type), POINTER                    :: dft_control
245      TYPE(grid_atom_type), POINTER                      :: grid_atom
246      TYPE(gto_basis_set_type), POINTER                  :: tmp_basis
247      TYPE(harmonics_atom_type), POINTER                 :: harmonics
248      TYPE(qs_control_type), POINTER                     :: qs_control
249      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
250
251      NULLIFY (my_CG, qs_kind_set, tmp_basis, grid_atom, harmonics, qs_control, dft_control)
252
253!  Initialization of some integer for the CG coeff generation
254      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set, dft_control=dft_control)
255      IF (do_xc) THEN
256         CALL get_qs_kind_set(qs_kind_set, maxlgto=maxlgto, basis_type="RI_XAS")
257      ELSE
258         CALL get_qs_kind_set(qs_kind_set, maxlgto=maxlgto, basis_type="ORB")
259      END IF
260      qs_control => dft_control%qs_control
261
262      !maximum expansion
263      llmax = 2*maxlgto
264      max_s_harm = nsoset(llmax)
265      max_s_set = nsoset(maxlgto)
266      lcleb = llmax
267
268!  Allocate and compute the CG coeffs (copied from init_rho_atom)
269      CALL clebsch_gordon_init(lcleb)
270      CALL reallocate(my_CG, 1, max_s_set, 1, max_s_set, 1, max_s_harm)
271
272      ALLOCATE (rga(lcleb, 2))
273      DO lc1 = 0, maxlgto
274         DO iso1 = nsoset(lc1 - 1) + 1, nsoset(lc1)
275            l1 = indso(1, iso1)
276            m1 = indso(2, iso1)
277            DO lc2 = 0, maxlgto
278               DO iso2 = nsoset(lc2 - 1) + 1, nsoset(lc2)
279                  l2 = indso(1, iso2)
280                  m2 = indso(2, iso2)
281                  CALL clebsch_gordon(l1, m1, l2, m2, rga)
282                  IF (l1 + l2 > llmax) THEN
283                     l1l2 = llmax
284                  ELSE
285                     l1l2 = l1 + l2
286                  END IF
287                  mp = m1 + m2
288                  mm = m1 - m2
289                  IF (m1*m2 < 0 .OR. (m1*m2 == 0 .AND. (m1 < 0 .OR. m2 < 0))) THEN
290                     mp = -ABS(mp)
291                     mm = -ABS(mm)
292                  ELSE
293                     mp = ABS(mp)
294                     mm = ABS(mm)
295                  END IF
296                  DO lp = MOD(l1 + l2, 2), l1l2, 2
297                     il = lp/2 + 1
298                     IF (ABS(mp) <= lp) THEN
299                     IF (mp >= 0) THEN
300                        iso = nsoset(lp - 1) + lp + 1 + mp
301                     ELSE
302                        iso = nsoset(lp - 1) + lp + 1 - ABS(mp)
303                     END IF
304                     my_CG(iso1, iso2, iso) = rga(il, 1)
305                     ENDIF
306                     IF (mp /= mm .AND. ABS(mm) <= lp) THEN
307                     IF (mm >= 0) THEN
308                        iso = nsoset(lp - 1) + lp + 1 + mm
309                     ELSE
310                        iso = nsoset(lp - 1) + lp + 1 - ABS(mm)
311                     END IF
312                     my_CG(iso1, iso2, iso) = rga(il, 2)
313                     ENDIF
314                  END DO
315               ENDDO ! iso2
316            ENDDO ! lc2
317         ENDDO ! iso1
318      ENDDO ! lc1
319      DEALLOCATE (rga)
320      CALL clebsch_gordon_deallocate()
321
322!  Create the Lebedev grids and compute the spherical harmonics
323      CALL init_lebedev_grids()
324      quadrature = qs_control%gapw_control%quadrature
325
326      DO ikind = 1, SIZE(xas_atom_env%grid_atom_set)
327
328!        Allocate the grid and the harmonics for this kind
329         NULLIFY (xas_atom_env%grid_atom_set(ikind)%grid_atom)
330         NULLIFY (xas_atom_env%harmonics_atom_set(ikind)%harmonics_atom)
331         CALL allocate_grid_atom(xas_atom_env%grid_atom_set(ikind)%grid_atom)
332         CALL allocate_harmonics_atom(xas_atom_env%harmonics_atom_set(ikind)%harmonics_atom)
333
334         NULLIFY (grid_atom, harmonics)
335         grid_atom => xas_atom_env%grid_atom_set(ikind)%grid_atom
336         harmonics => xas_atom_env%harmonics_atom_set(ikind)%harmonics_atom
337
338!        Initialize some integers
339         CALL get_qs_kind(qs_kind_set(ikind), ngrid_rad=nr, ngrid_ang=na, name=kind_name)
340
341         !take the grid dimension given as input, if none, take the GAPW ones above
342         IF (ANY(grid_info == kind_name)) THEN
343            DO igrid = 1, SIZE(grid_info, 1)
344               IF (grid_info(igrid, 1) == kind_name) THEN
345
346                  !hack to convert string into integer
347                  READ (grid_info(igrid, 2), *, iostat=stat) na
348                  IF (stat .NE. 0) CPABORT("The 'na' value for the GRID keyword must be an integer")
349                  READ (grid_info(igrid, 3), *, iostat=stat) nr
350                  IF (stat .NE. 0) CPABORT("The 'nr' value for the GRID keyword must be an integer")
351                  EXIT
352               END IF
353            END DO
354         ELSE IF (do_xc .AND. ANY(xas_atom_env%excited_kinds == ikind)) THEN
355            !need good integration grids for the xc kernel, but taking the default GAPW grid
356            CPWARN("The default (and possibly small) GAPW grid is beeing used for one excited KIND")
357         END IF
358
359         ll = get_number_of_lebedev_grid(n=na)
360         na = lebedev_grid(ll)%n
361         la = lebedev_grid(ll)%l
362         grid_atom%ng_sphere = na
363         grid_atom%nr = nr
364
365!        If this is an excited kind, create the harmonics with the RI_XAS basis, otherwise the ORB
366         IF (ANY(xas_atom_env%excited_kinds == ikind) .AND. do_xc) THEN
367            CALL get_qs_kind(qs_kind_set(ikind), basis_set=tmp_basis, basis_type="RI_XAS")
368         ELSE
369            CALL get_qs_kind(qs_kind_set(ikind), basis_set=tmp_basis, basis_type="ORB")
370         END IF
371         CALL get_gto_basis_set(gto_basis_set=tmp_basis, maxl=maxl, kind_radius=kind_radius)
372
373         CALL create_grid_atom(grid_atom, nr, na, llmax, ll, quadrature)
374         CALL truncate_radial_grid(grid_atom, kind_radius)
375
376         maxs = nsoset(maxl)
377         CALL create_harmonics_atom(harmonics, &
378                                    my_CG, na, llmax, maxs, max_s_harm, ll, grid_atom%wa, &
379                                    grid_atom%azi, grid_atom%pol)
380         CALL get_maxl_CG(harmonics, tmp_basis, llmax, max_s_harm)
381      END DO
382
383      CALL deallocate_lebedev_grids()
384      DEALLOCATE (my_CG)
385
386   END SUBROUTINE init_xas_atom_grid_harmo
387
388! **************************************************************************************************
389!> \brief Reduces the radial extension of an atomic grid such that it only covers a given radius
390!> \param grid_atom the atomic grid from which we truncate the radial part
391!> \param max_radius the maximal radial extension of the resulting grid
392!> \note Since the RI density used for <P|fxc|Q> is only of quality close to the atom, and the
393!>       integrand only non-zero within the radius of the gaussian P,Q. One can reduce the grid
394!>       extansion to the largest radius of the RI basis set
395! **************************************************************************************************
396   SUBROUTINE truncate_radial_grid(grid_atom, max_radius)
397
398      TYPE(grid_atom_type), POINTER                      :: grid_atom
399      REAL(dp), INTENT(IN)                               :: max_radius
400
401      CHARACTER(len=*), PARAMETER :: routineN = 'truncate_radial_grid', &
402         routineP = moduleN//':'//routineN
403
404      INTEGER                                            :: first_ir, ir, llmax_p1, na, new_nr, nr
405
406      nr = grid_atom%nr
407      na = grid_atom%ng_sphere
408      llmax_p1 = SIZE(grid_atom%rad2l, 2) - 1
409
410!     Find the index corresponding to the limiting radius (small ir => large radius)
411      DO ir = 1, nr
412         IF (grid_atom%rad(ir) < max_radius) THEN
413            first_ir = ir
414            EXIT
415         END IF
416      END DO
417      new_nr = nr - first_ir + 1
418
419!     Reallcoate everything that depends on r
420      grid_atom%nr = new_nr
421
422      grid_atom%rad(1:new_nr) = grid_atom%rad(first_ir:nr)
423      grid_atom%rad2(1:new_nr) = grid_atom%rad2(first_ir:nr)
424      grid_atom%wr(1:new_nr) = grid_atom%wr(first_ir:nr)
425      grid_atom%weight(:, 1:new_nr) = grid_atom%weight(:, first_ir:nr)
426      grid_atom%rad2l(1:new_nr, :) = grid_atom%rad2l(first_ir:nr, :)
427      grid_atom%oorad2l(1:new_nr, :) = grid_atom%oorad2l(first_ir:nr, :)
428
429      CALL reallocate(grid_atom%rad, 1, new_nr)
430      CALL reallocate(grid_atom%rad2, 1, new_nr)
431      CALL reallocate(grid_atom%wr, 1, new_nr)
432      CALL reallocate(grid_atom%weight, 1, na, 1, new_nr)
433      CALL reallocate(grid_atom%rad2l, 1, new_nr, 0, llmax_p1)
434      CALL reallocate(grid_atom%oorad2l, 1, new_nr, 0, llmax_p1)
435
436   END SUBROUTINE truncate_radial_grid
437
438! **************************************************************************************************
439!> \brief Computes the contraction coefficients to go from spherical orbitals to sgf for a given
440!>        atomic kind and a given basis type.
441!> \param ikind the kind for which we compute the coefficients
442!> \param basis_type the type of basis for which we compute
443!> \param sphi_so where the new contraction coefficients are stored (not yet allocated)
444!> \param qs_env ...
445! **************************************************************************************************
446   SUBROUTINE compute_sphi_so(ikind, basis_type, sphi_so, qs_env)
447
448      INTEGER, INTENT(IN)                                :: ikind
449      CHARACTER(len=*), INTENT(IN)                       :: basis_type
450      REAL(dp), DIMENSION(:, :), POINTER                 :: sphi_so
451      TYPE(qs_environment_type), POINTER                 :: qs_env
452
453      CHARACTER(len=*), PARAMETER :: routineN = 'compute_sphi_so', &
454         routineP = moduleN//':'//routineN
455
456      INTEGER                                            :: ico, ipgf, iset, iso, l, lx, ly, lz, &
457                                                            maxso, nset, sgfi, start_c, start_s
458      INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf, nsgf_set
459      INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf
460      REAL(dp)                                           :: factor
461      REAL(dp), DIMENSION(:, :), POINTER                 :: sphi
462      TYPE(gto_basis_set_type), POINTER                  :: basis
463      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
464
465      NULLIFY (basis, lmax, lmin, npgf, nsgf_set, qs_kind_set, first_sgf, sphi)
466
467      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set)
468      CALL get_qs_kind(qs_kind_set(ikind), basis_set=basis, basis_type=basis_type)
469      CALL get_gto_basis_set(basis, lmax=lmax, nset=nset, npgf=npgf, maxso=maxso, lmin=lmin, &
470                             nsgf_set=nsgf_set, sphi=sphi, first_sgf=first_sgf)
471
472      ALLOCATE (sphi_so(maxso, SUM(nsgf_set)))
473      sphi_so = 0.0_dp
474
475      DO iset = 1, nset
476         sgfi = first_sgf(1, iset)
477         DO ipgf = 1, npgf(iset)
478            start_s = (ipgf - 1)*nsoset(lmax(iset))
479            start_c = (ipgf - 1)*ncoset(lmax(iset))
480
481            DO l = lmin(iset), lmax(iset)
482               DO iso = 1, nso(l)
483                  DO ico = 1, nco(l)
484
485                     lx = indco(1, ico + ncoset(l - 1))
486                     ly = indco(2, ico + ncoset(l - 1))
487                     lz = indco(3, ico + ncoset(l - 1))
488
489                     factor = orbtramat(l)%s2c(iso, ico) &
490                              *SQRT(4.0_dp*pi/dfac(2*l + 1)*dfac(2*lx - 1)*dfac(2*ly - 1)*dfac(2*lz - 1))
491
492                     CALL daxpy(nsgf_set(iset), factor, &
493                                sphi(start_c + ncoset(l - 1) + ico, sgfi:sgfi + nsgf_set(iset) - 1), 1, &
494                                sphi_so(start_s + nsoset(l - 1) + iso, sgfi:sgfi + nsgf_set(iset) - 1), 1)
495
496                  END DO !ico
497               END DO !iso
498            END DO !l
499         END DO !ipgf
500      END DO !iset
501
502   END SUBROUTINE compute_sphi_so
503
504! **************************************************************************************************
505!> \brief Find the neighbors of a given set of atoms based on the non-zero blocks of a provided
506!>        overlap matrix. Optionally returns an array containing the indices of all involved atoms
507!>        (the given subset plus all their neighbors, without repetition) AND/OR an array of arrays
508!>        providing the indices of the neighbors of each input atom.
509!> \param base_atoms the set of atoms for which we search neighbors
510!> \param mat_s the overlap matrix used to find neighbors
511!> \param radius the cutoff radius after which atoms are not considered neighbors
512!> \param qs_env ...
513!> \param all_neighbors the array uniquely contatining all indices of all atoms involved
514!> \param neighbor_set array of arrays contining the neighbors of all given atoms
515! **************************************************************************************************
516   SUBROUTINE find_neighbors(base_atoms, mat_s, radius, qs_env, all_neighbors, neighbor_set)
517
518      INTEGER, DIMENSION(:), INTENT(INOUT)               :: base_atoms
519      TYPE(dbcsr_type), INTENT(IN)                       :: mat_s
520      REAL(dp)                                           :: radius
521      TYPE(qs_environment_type), POINTER                 :: qs_env
522      INTEGER, DIMENSION(:), OPTIONAL, POINTER           :: all_neighbors
523      TYPE(cp_1d_i_p_type), DIMENSION(:), OPTIONAL, &
524         POINTER                                         :: neighbor_set
525
526      CHARACTER(len=*), PARAMETER :: routineN = 'find_neighbors', routineP = moduleN//':'//routineN
527
528      INTEGER                                            :: blk, i, iat, ibase, iblk, jblk, mepos, &
529                                                            natom, nb, nbase
530      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: blk_to_base, inb, who_is_there
531      INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: n_neighbors
532      LOGICAL, ALLOCATABLE, DIMENSION(:)                 :: is_base_atom
533      REAL(dp)                                           :: dist2, rad2, ri(3), rij(3), rj(3)
534      TYPE(cell_type), POINTER                           :: cell
535      TYPE(cp_1d_i_p_type), DIMENSION(:), POINTER        :: my_neighbor_set
536      TYPE(cp_para_env_type), POINTER                    :: para_env
537      TYPE(dbcsr_iterator_type)                          :: iter
538      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
539
540      NULLIFY (particle_set, para_env, my_neighbor_set, cell)
541
542      ! Initialization
543      CALL get_qs_env(qs_env, para_env=para_env, natom=natom, particle_set=particle_set, cell=cell)
544      mepos = para_env%mepos
545      nbase = SIZE(base_atoms)
546      !work with the neighbor_set structure, see at the end if we keep it
547      ALLOCATE (my_neighbor_set(nbase))
548      rad2 = radius**2
549
550      ALLOCATE (blk_to_base(natom), is_base_atom(natom))
551      blk_to_base = 0; is_base_atom = .FALSE.
552      DO ibase = 1, nbase
553         blk_to_base(base_atoms(ibase)) = ibase
554         is_base_atom(base_atoms(ibase)) = .TRUE.
555      END DO
556
557      ! First loop over S => count the number of neighbors
558      ALLOCATE (n_neighbors(nbase, 0:para_env%num_pe - 1))
559      n_neighbors = 0
560
561      CALL dbcsr_iterator_start(iter, mat_s)
562      DO WHILE (dbcsr_iterator_blocks_left(iter))
563
564         CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
565
566         !avoid self-neighbors
567         IF (iblk == jblk) CYCLE
568
569         !test distance
570         ri = pbc(particle_set(iblk)%r, cell)
571         rj = pbc(particle_set(jblk)%r, cell)
572         rij = pbc(ri, rj, cell)
573         dist2 = SUM(rij**2)
574         IF (dist2 > rad2) CYCLE
575
576         IF (is_base_atom(iblk)) THEN
577            ibase = blk_to_base(iblk)
578            n_neighbors(ibase, mepos) = n_neighbors(ibase, mepos) + 1
579         END IF
580         IF (is_base_atom(jblk)) THEN
581            ibase = blk_to_base(jblk)
582            n_neighbors(ibase, mepos) = n_neighbors(ibase, mepos) + 1
583         END IF
584
585      END DO !iter
586      CALL dbcsr_iterator_stop(iter)
587      CALL mp_sum(n_neighbors, para_env%group)
588
589      ! Allocate the neighbor_set arrays at the correct length
590      DO ibase = 1, nbase
591         ALLOCATE (my_neighbor_set(ibase)%array(SUM(n_neighbors(ibase, :))))
592         my_neighbor_set(ibase)%array = 0
593      END DO
594
595      ! Loop a second time over S, this time fill the neighbors details
596      CALL dbcsr_iterator_start(iter, mat_s)
597      ALLOCATE (inb(nbase))
598      inb = 1
599      DO WHILE (dbcsr_iterator_blocks_left(iter))
600
601         CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
602         IF (iblk == jblk) CYCLE
603
604         !test distance
605         ri = pbc(particle_set(iblk)%r, cell)
606         rj = pbc(particle_set(jblk)%r, cell)
607         rij = pbc(ri, rj, cell)
608         dist2 = SUM(rij**2)
609         IF (dist2 > rad2) CYCLE
610
611         IF (is_base_atom(iblk)) THEN
612            ibase = blk_to_base(iblk)
613            my_neighbor_set(ibase)%array(SUM(n_neighbors(ibase, 0:mepos - 1)) + inb(ibase)) = jblk
614            inb(ibase) = inb(ibase) + 1
615         END IF
616         IF (is_base_atom(jblk)) THEN
617            ibase = blk_to_base(jblk)
618            my_neighbor_set(ibase)%array(SUM(n_neighbors(ibase, 0:mepos - 1)) + inb(ibase)) = iblk
619            inb(ibase) = inb(ibase) + 1
620         END IF
621
622      END DO !iter
623      CALL dbcsr_iterator_stop(iter)
624
625      ! Make sure the info is shared among the procs
626      DO ibase = 1, nbase
627         CALL mp_sum(my_neighbor_set(ibase)%array, para_env%group)
628      END DO
629
630      ! Gather all indices if asked for it
631      IF (PRESENT(all_neighbors)) THEN
632         ALLOCATE (who_is_there(natom))
633         who_is_there = 0
634
635         DO ibase = 1, nbase
636            who_is_there(base_atoms(ibase)) = 1
637            DO nb = 1, SIZE(my_neighbor_set(ibase)%array)
638               who_is_there(my_neighbor_set(ibase)%array(nb)) = 1
639            END DO
640         END DO
641
642         ALLOCATE (all_neighbors(natom))
643         i = 0
644         DO iat = 1, natom
645            IF (who_is_there(iat) == 1) THEN
646               i = i + 1
647               all_neighbors(i) = iat
648            END IF
649         END DO
650         CALL reallocate(all_neighbors, 1, i)
651      END IF
652
653      ! If not asked for the neighbor set, deallocate it
654      IF (PRESENT(neighbor_set)) THEN
655         neighbor_set => my_neighbor_set
656      ELSE
657         DO ibase = 1, nbase
658            DEALLOCATE (my_neighbor_set(ibase)%array)
659         END DO
660         DEALLOCATE (my_neighbor_set)
661      END IF
662
663   END SUBROUTINE find_neighbors
664
665! **************************************************************************************************
666!> \brief Returns the RI inverse overlap for a subset of the RI_XAS matrix spaning a given
667!>        excited atom and its neighbors.
668!> \param ri_sinv the inverse overlap as a dbcsr matrix
669!> \param whole_s the whole RI overlap matrix
670!> \param neighbors the indeces of the excited atom and their neighbors
671!> \param idx_to_nb array telling where any atom can be found in neighbors (if there at all)
672!> \param basis_set_ri the RI basis set list for all kinds
673!> \param qs_env ...
674!> \note It is assumed that the neighbors are sorted, the output matrix is assumed to be small and
675!>       is replicated on all processors
676! **************************************************************************************************
677   SUBROUTINE get_exat_ri_sinv(ri_sinv, whole_s, neighbors, idx_to_nb, basis_set_ri, qs_env)
678
679      TYPE(dbcsr_type)                                   :: ri_sinv, whole_s
680      INTEGER, DIMENSION(:), INTENT(IN)                  :: neighbors, idx_to_nb
681      TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_ri
682      TYPE(qs_environment_type), POINTER                 :: qs_env
683
684      CHARACTER(len=*), PARAMETER :: routineN = 'get_exat_ri_sinv', &
685         routineP = moduleN//':'//routineN
686
687      INTEGER                                            :: blk, dest, group, handle, iat, ikind, &
688                                                            inb, ir, is, jat, jnb, natom, nnb, &
689                                                            npcols, nprows, source, tag
690      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: recv_req, send_req
691      INTEGER, DIMENSION(:), POINTER                     :: col_dist, nsgf, row_dist
692      INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
693      LOGICAL                                            :: found_risinv, found_whole
694      LOGICAL, ALLOCATABLE, DIMENSION(:)                 :: is_neighbor
695      REAL(dp)                                           :: ri(3), rij(3), rj(3)
696      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: radius
697      REAL(dp), DIMENSION(:, :), POINTER                 :: block_risinv, block_whole
698      TYPE(cell_type), POINTER                           :: cell
699      TYPE(cp_2d_r_p_type), ALLOCATABLE, DIMENSION(:)    :: recv_buff, send_buff
700      TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
701      TYPE(cp_para_env_type), POINTER                    :: para_env
702      TYPE(dbcsr_distribution_type)                      :: sinv_dist
703      TYPE(dbcsr_distribution_type), POINTER             :: dbcsr_dist
704      TYPE(dbcsr_iterator_type)                          :: iter
705      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
706
707      NULLIFY (pgrid, dbcsr_dist, row_dist, col_dist, nsgf, particle_set, block_whole, block_risinv)
708      NULLIFY (cell, para_env, blacs_env)
709
710      CALL timeset(routineN, handle)
711
712      CALL get_qs_env(qs_env, dbcsr_dist=dbcsr_dist, particle_set=particle_set, natom=natom, &
713                      para_env=para_env, blacs_env=blacs_env, cell=cell)
714      nnb = SIZE(neighbors)
715      ALLOCATE (nsgf(nnb), is_neighbor(natom), radius(nnb))
716      is_neighbor = .FALSE.
717      DO inb = 1, nnb
718         iat = neighbors(inb)
719         ikind = particle_set(iat)%atomic_kind%kind_number
720         CALL get_gto_basis_set(basis_set_ri(ikind)%gto_basis_set, nsgf=nsgf(inb), kind_radius=radius(inb))
721         is_neighbor(iat) = .TRUE.
722      END DO
723
724      !Create the ri_sinv matrix based on some arbitrary dbcsr_dist
725      CALL dbcsr_distribution_get(dbcsr_dist, group=group, pgrid=pgrid, nprows=nprows, npcols=npcols)
726
727      ALLOCATE (row_dist(nnb), col_dist(nnb))
728      DO inb = 1, nnb
729         row_dist(inb) = MODULO(nprows - inb, nprows)
730         col_dist(inb) = MODULO(npcols - inb, npcols)
731      END DO
732
733      CALL dbcsr_distribution_new(sinv_dist, group=group, pgrid=pgrid, row_dist=row_dist, &
734                                  col_dist=col_dist)
735
736      CALL dbcsr_create(matrix=ri_sinv, name="RI_SINV", matrix_type="S", dist=sinv_dist, &
737                        row_blk_size=nsgf, col_blk_size=nsgf)
738      !reserving the blocks in the correct pattern
739      DO inb = 1, nnb
740         ri = pbc(particle_set(neighbors(inb))%r, cell)
741         DO jnb = inb, nnb
742
743            !do the atom overlap ?
744            rj = pbc(particle_set(neighbors(jnb))%r, cell)
745            rij = pbc(ri, rj, cell)
746            IF (SUM(rij**2) > (radius(inb) + radius(jnb))**2) CYCLE
747
748            CALL dbcsr_get_stored_coordinates(ri_sinv, inb, jnb, blk)
749            IF (para_env%mepos == blk) THEN
750               ALLOCATE (block_risinv(nsgf(inb), nsgf(jnb)))
751               block_risinv = 0.0_dp
752               CALL dbcsr_put_block(ri_sinv, inb, jnb, block_risinv)
753               DEALLOCATE (block_risinv)
754            END IF
755         END DO
756      END DO
757      CALL dbcsr_finalize(ri_sinv)
758
759      CALL dbcsr_distribution_release(sinv_dist)
760      DEALLOCATE (row_dist, col_dist)
761
762      !prepare the send and recv buffers we will need for change of dist between the two matrices
763      !worst case scenario: all neighbors are on same procs => need to send nnb**2 messages
764      ALLOCATE (send_buff(nnb**2), recv_buff(nnb**2))
765      ALLOCATE (send_req(nnb**2), recv_req(nnb**2))
766      is = 0; ir = 0
767
768      !Loop over the whole RI overlap matrix and pick the blocks we need
769      CALL dbcsr_iterator_start(iter, whole_s)
770      DO WHILE (dbcsr_iterator_blocks_left(iter))
771
772         CALL dbcsr_iterator_next_block(iter, row=iat, column=jat, blk=blk)
773         CALL dbcsr_get_block_p(whole_s, iat, jat, block_whole, found_whole)
774
775         !only interested in neighbors
776         IF (.NOT. found_whole) CYCLE
777         IF (.NOT. is_neighbor(iat)) CYCLE
778         IF (.NOT. is_neighbor(jat)) CYCLE
779
780         inb = idx_to_nb(iat)
781         jnb = idx_to_nb(jat)
782
783         !If blocks are on the same proc for both matrices, simply copy
784         CALL dbcsr_get_block_p(ri_sinv, inb, jnb, block_risinv, found_risinv)
785         IF (found_risinv) THEN
786            CALL dcopy(nsgf(inb)*nsgf(jnb), block_whole, 1, block_risinv, 1)
787         ELSE
788
789            !send the block with unique tag to the proc where inb,jnb is in ri_sinv
790            CALL dbcsr_get_stored_coordinates(ri_sinv, inb, jnb, dest)
791            is = is + 1
792            send_buff(is)%array => block_whole
793            tag = natom*iat + jat
794            CALL mp_isend(msgin=send_buff(is)%array, dest=dest, comm=group, request=send_req(is), tag=tag)
795
796         END IF
797
798      END DO !dbcsr iter
799      CALL dbcsr_iterator_stop(iter)
800
801      !Loop over ri_sinv and receive all those blocks
802      CALL dbcsr_iterator_start(iter, ri_sinv)
803      DO WHILE (dbcsr_iterator_blocks_left(iter))
804
805         CALL dbcsr_iterator_next_block(iter, row=inb, column=jnb, blk=blk)
806         CALL dbcsr_get_block_p(ri_sinv, inb, jnb, block_risinv, found_risinv)
807
808         IF (.NOT. found_risinv) CYCLE
809
810         iat = neighbors(inb)
811         jat = neighbors(jnb)
812
813         !If blocks are on the same proc on both matrices do nothing
814         CALL dbcsr_get_stored_coordinates(whole_s, iat, jat, source)
815         IF (para_env%mepos == source) CYCLE
816
817         tag = natom*iat + jat
818         ir = ir + 1
819         recv_buff(ir)%array => block_risinv
820         CALL mp_irecv(msgout=recv_buff(ir)%array, source=source, request=recv_req(ir), &
821                       tag=tag, comm=group)
822
823      END DO
824      CALL dbcsr_iterator_stop(iter)
825
826      !make sure that all comm is over before proceeding
827      CALL mp_waitall(send_req(1:is))
828      CALL mp_waitall(recv_req(1:ir))
829
830      !Invert
831      CALL cp_dbcsr_cholesky_decompose(ri_sinv, para_env=para_env, blacs_env=blacs_env)
832      CALL cp_dbcsr_cholesky_invert(ri_sinv, para_env=para_env, blacs_env=blacs_env, upper_to_full=.TRUE.)
833      CALL dbcsr_filter(ri_sinv, 1.E-10_dp) !make sure ri_sinv is sparse coming out of fm routines
834      CALL dbcsr_replicate_all(ri_sinv)
835
836      !clean-up
837      DEALLOCATE (nsgf)
838
839      CALL timestop(handle)
840
841   END SUBROUTINE get_exat_ri_sinv
842
843! **************************************************************************************************
844!> \brief Compute the coefficients to project the density on a partial RI_XAS basis
845!> \param xas_atom_env ...
846!> \param qs_env ...
847!> \note The density is n = sum_ab P_ab*phi_a*phi_b, the RI basis covers the products of orbital sgfs
848!>       => n = sum_ab sum_cd P_ab (phi_a phi_b xi_c) S_cd^-1 xi_d
849!>            = sum_d coeff_d xi_d , where xi are the RI basis func.
850!>       In this case, with the partial RI projection, the RI basis is restricted to an excited atom
851!>       and its neighbors at a time. Leads to smaller overlap matrix to invert and less 3-center
852!>       overlap to compute. The procedure is repeated for each excited atom
853! **************************************************************************************************
854   SUBROUTINE calculate_density_coeffs(xas_atom_env, qs_env)
855
856      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
857      TYPE(qs_environment_type), POINTER                 :: qs_env
858
859      CHARACTER(len=*), PARAMETER :: routineN = 'calculate_density_coeffs', &
860         routineP = moduleN//':'//routineN
861
862      INTEGER :: bo(2), exat, handle, iat, ibatch, iex, inb, ispin, jnb, katom, mepos, nat_batch, &
863         natom, nbatch, nex, nkind, nnb, nspins, nthread, output_unit, ri_at
864      INTEGER(int_8)                                     :: free_mem, mem_per_exat, o3c_mem
865      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: idx_to_nb, neighbors, nsgf
866      INTEGER, DIMENSION(:), POINTER                     :: all_ri_atoms, batch_atoms
867      LOGICAL                                            :: found, foundt, unique
868      REAL(dp)                                           :: factor
869      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: work
870      REAL(dp), DIMENSION(:, :), POINTER                 :: pblock, pblockt, tvec
871      TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
872      TYPE(cp_para_env_type), POINTER                    :: para_env
873      TYPE(dbcsr_distribution_type)                      :: opt_dbcsr_dist
874      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: overlap, rho_ao, rho_redist
875      TYPE(dbcsr_type)                                   :: ri_sinv
876      TYPE(dbcsr_type), POINTER                          :: ri_mats
877      TYPE(distribution_2d_type), POINTER                :: opt_dist2d
878      TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_orb, basis_set_ri
879      TYPE(neighbor_list_set_p_type), DIMENSION(:), &
880         POINTER                                         :: ab_list, ac_list, sab_ri
881      TYPE(o3c_container_type), POINTER                  :: o3c
882      TYPE(o3c_iterator_type)                            :: o3c_iterator
883      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
884      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
885      TYPE(qs_rho_type), POINTER                         :: rho
886
887      NULLIFY (qs_kind_set, basis_set_ri, basis_set_orb, ac_list, rho, rho_ao, sab_ri, ri_mats)
888      NULLIFY (o3c, tvec, particle_set, para_env, all_ri_atoms, overlap, batch_atoms)
889      NULLIFY (blacs_env, pblock, pblockt, ab_list, opt_dist2d, rho_redist)
890
891      !Idea: We don't do a full RI here as it would be too expensive in many ways (inversion of a
892      !      large matrix, many 3-center overlaps, density coefficients for all atoms, etc...)
893      !      Instead, we go excited atom by excited atom and only do a RI expansion on its basis
894      !      and that of its closest neighbors (defined through RI_RADIUS), such that we only have
895      !      very small matrices to invert and only a few (abc) overlp integrals with c on the
896      !      excited atom its neighbors. This is physically sound since we only need the density
897      !      well defined on the excited atom grid as we do (aI|P)*(P|Q)^-1*(Q|fxc|R)*(R|S)^-1*(S|Jb)
898      !      The o3c integrals are computed before the excited atom loop though for optimal load
899      !      balance
900
901      CALL timeset(routineN, handle)
902
903      CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, rho=rho, &
904                      natom=natom, particle_set=particle_set, &
905                      para_env=para_env, blacs_env=blacs_env)
906      nspins = xas_atom_env%nspins
907      nex = SIZE(xas_atom_env%excited_atoms)
908      CALL qs_rho_get(rho, rho_ao=rho_ao)
909
910!  Create the needed neighbor list and basis set lists.
911      ALLOCATE (basis_set_ri(nkind))
912      ALLOCATE (basis_set_orb(nkind))
913      CALL basis_set_list_setup(basis_set_ri, "RI_XAS", qs_kind_set)
914      CALL basis_set_list_setup(basis_set_orb, "ORB", qs_kind_set)
915
916!  Compute the RI overlap matrix on the whole system
917      CALL build_xas_tdp_ovlp_nl(sab_ri, basis_set_ri, basis_set_ri, qs_env)
918      CALL build_overlap_matrix_simple(qs_env%ks_env, overlap, basis_set_ri, basis_set_ri, sab_ri)
919      ri_mats => overlap(1)%matrix
920      CALL release_sab(sab_ri)
921
922!  Get the neighbors of the excited atoms (= all the atoms where density coeffs are needed)
923      CALL find_neighbors(xas_atom_env%excited_atoms, ri_mats, xas_atom_env%ri_radius, &
924                          qs_env, all_neighbors=all_ri_atoms, neighbor_set=xas_atom_env%exat_neighbors)
925
926      !keep in mind that double occupation is included in rho_ao in case of closed-shell
927      factor = 0.5_dp; IF (nspins == 2) factor = 1.0_dp
928
929!  Allocate space for the projected density coefficients. On all ri_atoms.
930!  Note: the sub-region where we project the density changes from excited atom to excited atom
931!        => need different sets of RI coeffs
932      ALLOCATE (nsgf(natom))
933      CALL get_particle_set(particle_set, qs_kind_set, nsgf=nsgf, basis=basis_set_ri)
934      ALLOCATE (xas_atom_env%ri_dcoeff(natom, nspins, nex))
935      DO iex = 1, nex
936         DO ispin = 1, nspins
937            DO iat = 1, natom
938               NULLIFY (xas_atom_env%ri_dcoeff(iat, ispin, iex)%array)
939               IF ((.NOT. ANY(xas_atom_env%exat_neighbors(iex)%array == iat)) &
940                   .AND. (.NOT. xas_atom_env%excited_atoms(iex) == iat)) CYCLE
941               ALLOCATE (xas_atom_env%ri_dcoeff(iat, ispin, iex)%array(nsgf(iat)))
942               xas_atom_env%ri_dcoeff(iat, ispin, iex)%array = 0.0_dp
943            END DO
944         END DO
945      END DO
946
947      output_unit = cp_logger_get_default_io_unit()
948      IF (output_unit > 0) THEN
949         WRITE (output_unit, FMT="(/,T7,A,/,T7,A)") &
950            "Excited atom, natoms in RI_REGION:", &
951            "---------------------------------"
952      END IF
953
954      !Idea: we want to split the density projection process in batches such that it never exceeds
955      !      available memory. We use max 65% of available memory (arbitrary choice)
956      !      This is a buffer that should catch any load imbalance between the batches
957
958      !Build the naive overlap neighbor list in order to compute the total memory cost of o3c
959      CALL build_xas_tdp_ovlp_nl(ab_list, basis_set_orb, basis_set_orb, qs_env)
960      CALL build_xas_tdp_3c_nl(ac_list, basis_set_orb, basis_set_ri, do_potential_id, &
961                               qs_env, excited_atoms=all_ri_atoms)
962      CALL get_o3c_memory(o3c_mem, free_mem, ab_list, ac_list, basis_set_orb, basis_set_orb, &
963                          basis_set_ri, qs_env)
964      CALL release_sab(ab_list)
965      CALL release_sab(ac_list)
966
967      mem_per_exat = o3c_mem/nex
968      IF (mem_per_exat > FLOOR(0.65_dp*REAL(free_mem, dp), KIND=int_8)) &
969         CPABORT("XAS_TDP| Not enough memory for density projection")
970
971      !divide into batches that at most use 65% of free memory
972      nbatch = CEILING(REAL(o3c_mem, dp)/(0.65_dp*REAL(free_mem, dp)))
973      DO ibatch = 1, nbatch
974
975         !excited atoms in that batch
976         bo = get_limit(nex, nbatch, ibatch - 1)
977
978         !Get all ri atoms belonging to that batch in an array (ex atoms + neighbors in RI_REGION)
979         nat_batch = bo(2) - bo(1) + 1
980         ALLOCATE (batch_atoms(nat_batch))
981         batch_atoms(:) = xas_atom_env%excited_atoms(bo(1):bo(2))
982         DO iex = bo(1), bo(2)
983            DO inb = 1, SIZE(xas_atom_env%exat_neighbors(iex)%array)
984               IF (.NOT. ANY(batch_atoms == xas_atom_env%exat_neighbors(iex)%array(inb))) THEN
985                  CALL reallocate(batch_atoms, 1, nat_batch + 1)
986                  batch_atoms(nat_batch + 1) = xas_atom_env%exat_neighbors(iex)%array(inb)
987                  nat_batch = nat_batch + 1
988               END IF
989            END DO !inb
990         END DO !iex
991
992         !Get the optimal distribution_2d for the overlap integrals (abc), centers c on excited atoms
993         !and their neighbors defined by RI_RADIUS, for the current batch
994         CALL build_xas_tdp_ovlp_nl(ab_list, basis_set_orb, basis_set_orb, qs_env)
995         CALL build_xas_tdp_3c_nl(ac_list, basis_set_orb, basis_set_ri, do_potential_id, &
996                                  qs_env, excited_atoms=batch_atoms)
997         CALL get_opt_3c_dist2d(opt_dist2d, ab_list, ac_list, basis_set_orb, basis_set_orb, &
998                                basis_set_ri, qs_env)
999         CALL release_sab(ab_list)
1000         CALL release_sab(ac_list)
1001
1002         !get the ab and ac neighbor lists based on the optimized distribution
1003         CALL build_xas_tdp_ovlp_nl(ab_list, basis_set_orb, basis_set_orb, qs_env, ext_dist2d=opt_dist2d)
1004         CALL build_xas_tdp_3c_nl(ac_list, basis_set_orb, basis_set_ri, do_potential_id, &
1005                                  qs_env, excited_atoms=batch_atoms, ext_dist2d=opt_dist2d)
1006         !get the AO density matrix in the optimized distribution for compatibility
1007
1008         CALL cp_dbcsr_dist2d_to_dist(opt_dist2d, opt_dbcsr_dist)
1009         ALLOCATE (rho_redist(nspins))
1010         DO ispin = 1, nspins
1011            ALLOCATE (rho_redist(ispin)%matrix)
1012            CALL dbcsr_create(rho_redist(ispin)%matrix, template=rho_ao(ispin)%matrix, dist=opt_dbcsr_dist)
1013            CALL dbcsr_complete_redistribute(rho_ao(ispin)%matrix, rho_redist(ispin)%matrix)
1014         END DO
1015
1016         !Compute the integrals and cantract with the density matrix
1017         ALLOCATE (o3c)
1018         CALL init_o3c_container(o3c, nspins, basis_set_orb, basis_set_orb, basis_set_ri, ab_list, ac_list)
1019         CALL calculate_o3c_integrals(o3c)
1020         CALL contract12_o3c(o3c, rho_redist)
1021
1022         !  Loop over the excited atoms
1023         DO iex = bo(1), bo(2)
1024
1025            !get the neighbors of current excited atom
1026            exat = xas_atom_env%excited_atoms(iex)
1027            nnb = 1 + SIZE(xas_atom_env%exat_neighbors(iex)%array)
1028            ALLOCATE (neighbors(nnb))
1029            neighbors(1) = exat
1030            neighbors(2:nnb) = xas_atom_env%exat_neighbors(iex)%array(:)
1031            CALL sort_unique(neighbors, unique)
1032
1033            IF (output_unit > 0) THEN
1034               WRITE (output_unit, FMT="(T7,I12,I21)") &
1035                  exat, nnb
1036            END IF
1037
1038            !link the atoms to their position in neighbors
1039            ALLOCATE (idx_to_nb(natom))
1040            idx_to_nb = 0
1041            DO inb = 1, nnb
1042               idx_to_nb(neighbors(inb)) = inb
1043            END DO
1044
1045            !Compute the RI inverse overlap matrix on the reduced RI basis that spans the excited
1046            !atom and its neighbors, ri_sinv is replicated over all procs
1047            CALL get_exat_ri_sinv(ri_sinv, ri_mats, neighbors, idx_to_nb, basis_set_ri, qs_env)
1048
1049            !Loop over the integral to get the density coefficients
1050            nthread = 1
1051!$          nthread = omp_get_max_threads()
1052            CALL o3c_iterator_create(o3c, o3c_iterator, nthread=nthread)
1053
1054!$OMP PARALLEL DEFAULT(NONE)&
1055!$OMP SHARED (o3c_iterator,xas_atom_env,nsgf,nspins,factor,nnb,neighbors,iex,idx_to_nb,ri_sinv)&
1056!$OMP PRIVATE (mepos,katom,tvec,work,inb,jnb,ri_at,found,foundt,pblock,pblockt)
1057
1058            mepos = 0
1059!$          mepos = omp_get_thread_num()
1060
1061            DO WHILE (o3c_iterate(o3c_iterator, mepos=mepos) == 0)
1062               CALL get_o3c_iterator_info(o3c_iterator, mepos=mepos, katom=katom, tvec=tvec)
1063
1064               !we only treat katoms that are the excited atom or one of its neighbor
1065               IF (.NOT. ANY(neighbors == katom)) CYCLE
1066               inb = idx_to_nb(katom)
1067
1068               !Loop over the negihbors (incl. excited atom)
1069               DO jnb = 1, nnb
1070
1071                  ri_at = neighbors(jnb)
1072
1073                  !get the corresponding block in ri_sinv
1074                  CALL dbcsr_get_block_p(ri_sinv, inb, jnb, pblock, found)
1075                  CALL dbcsr_get_block_p(ri_sinv, jnb, inb, pblockt, foundt)
1076                  IF ((.NOT. found) .AND. (.NOT. foundt)) CYCLE
1077
1078                  !katom and ri_at interact => compute the ri_dcoeff for ri_at, coming from katom
1079                  ! The coefficients for katom/ri_at are tvec*S^-1
1080                  ALLOCATE (work(nsgf(ri_at), nspins))
1081                  IF (found) THEN
1082                     CALL dgemm('T', 'N', nsgf(ri_at), nspins, nsgf(katom), 1.0_dp, pblock, &
1083                                nsgf(katom), tvec, nsgf(katom), 0.0_dp, work, nsgf(ri_at))
1084                  ELSE
1085                     CALL dgemm('N', 'N', nsgf(ri_at), nspins, nsgf(katom), 1.0_dp, pblockt, &
1086                                nsgf(ri_at), tvec, nsgf(katom), 0.0_dp, work, nsgf(ri_at))
1087                  END IF
1088
1089!$OMP    CRITICAL(spin1)
1090                  CALL daxpy(nsgf(ri_at), factor, work(:, 1), 1, xas_atom_env%ri_dcoeff(ri_at, 1, iex)%array(:), 1)
1091!$OMP    END CRITICAL(spin1)
1092
1093                  IF (nspins == 2) THEN
1094!$OMP    CRITICAL(spin2)
1095                     CALL daxpy(nsgf(ri_at), 1._dp, work(:, 2), 1, xas_atom_env%ri_dcoeff(ri_at, 2, iex)%array(:), 1)
1096!$OMP    END CRITICAL(spin2)
1097                  END IF
1098
1099                  DEALLOCATE (work)
1100               END DO !jnb
1101            END DO !o3c_iterator
1102!$OMP    END PARALLEL
1103            CALL o3c_iterator_release(o3c_iterator)
1104
1105            !clean-up for excited atom
1106            CALL dbcsr_release(ri_sinv)
1107            DEALLOCATE (neighbors, idx_to_nb)
1108
1109         END DO !iex
1110
1111         !batch clean-up
1112         CALL distribution_2d_release(opt_dist2d)
1113         CALL dbcsr_deallocate_matrix_set(rho_redist)
1114         CALL dbcsr_distribution_release(opt_dbcsr_dist)
1115         CALL release_o3c_container(o3c)
1116         CALL release_sab(ab_list)
1117         CALL release_sab(ac_list)
1118         DEALLOCATE (batch_atoms, o3c)
1119      END DO !ibatch
1120
1121      !making sure all procs have the same info
1122      DO iex = 1, nex
1123         DO ispin = 1, nspins
1124            DO iat = 1, natom
1125               IF ((.NOT. ANY(xas_atom_env%exat_neighbors(iex)%array == iat)) &
1126                   .AND. (.NOT. xas_atom_env%excited_atoms(iex) == iat)) CYCLE
1127               CALL mp_sum(xas_atom_env%ri_dcoeff(iat, ispin, iex)%array, para_env%group)
1128            END DO !iat
1129         END DO !ispin
1130      END DO !iex
1131
1132!  clean-up
1133
1134      CALL dbcsr_deallocate_matrix_set(overlap)
1135      DEALLOCATE (basis_set_ri, basis_set_orb, all_ri_atoms)
1136
1137      CALL timestop(handle)
1138
1139   END SUBROUTINE calculate_density_coeffs
1140
1141! **************************************************************************************************
1142!> \brief Evaluates the density on a given atomic grid
1143!> \param rho_set where the densities are stored
1144!> \param ri_dcoeff the arrays containing the RI density coefficients of this atom, for each spin
1145!> \param atom_kind the kind of the atom in question
1146!> \param do_gga whether the gradient of the density should also be put on the grid
1147!> \param bounds the local bounds of the grid
1148!> \param xas_atom_env ...
1149!> \param qs_env ...
1150!> \note The density is expressed as n = sum_d coeff_d*xi_d. Knowing the coordinate of each grid
1151!>       grid point, one can simply evaluate xi_d(r)
1152! **************************************************************************************************
1153   SUBROUTINE put_density_on_atomic_grid(rho_set, ri_dcoeff, atom_kind, do_gga, bounds, &
1154                                         xas_atom_env, qs_env)
1155
1156      TYPE(xc_rho_set_type), POINTER                     :: rho_set
1157      TYPE(cp_1d_r_p_type), DIMENSION(:)                 :: ri_dcoeff
1158      INTEGER, INTENT(IN)                                :: atom_kind
1159      LOGICAL, INTENT(IN)                                :: do_gga
1160      INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds
1161      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
1162      TYPE(qs_environment_type), POINTER                 :: qs_env
1163
1164      CHARACTER(len=*), PARAMETER :: routineN = 'put_density_on_atomic_grid', &
1165         routineP = moduleN//':'//routineN
1166
1167      INTEGER                                            :: dir, handle, ipgf, iset, isgf, iso, &
1168                                                            maxso, n, na, nr, nset, nspins, sgfi, &
1169                                                            start, start_pgf
1170      INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf, nsgf_set
1171      INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf
1172      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: sgf
1173      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: dsgf, so
1174      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :, :)       :: dso
1175      REAL(dp), DIMENSION(:, :), POINTER                 :: dgr1, dgr2, ga, gr, ri_sphi_so, zet
1176      REAL(dp), DIMENSION(:, :, :), POINTER              :: dga1, dga2, rhoa, rhob
1177      TYPE(grid_atom_type), POINTER                      :: grid_atom
1178      TYPE(gto_basis_set_type), POINTER                  :: ri_basis
1179      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
1180
1181      NULLIFY (grid_atom, ri_basis, qs_kind_set, lmax, npgf, zet, nsgf_set, ri_sphi_so)
1182      NULLIFY (lmin, first_sgf, rhoa, rhob, ga, gr, dgr1, dgr2, dga1, dga2)
1183
1184      CALL timeset(routineN, handle)
1185
1186!  Strategy: it makes sense to evaluate the spherical orbital on the grid (because of symmetry)
1187!            From there, one can directly contract into sgf using ri_sphi_so and then take the weight
1188!            The spherical orbital were precomputed and split in a purely radial and a purely
1189!            angular part. The full values on each grid point are obtain through gemm
1190
1191!  Generalities
1192      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set)
1193      CALL get_qs_kind(qs_kind_set(atom_kind), basis_set=ri_basis, basis_type="RI_XAS")
1194      CALL get_gto_basis_set(ri_basis, lmax=lmax, npgf=npgf, zet=zet, nset=nset, nsgf_set=nsgf_set, &
1195                             first_sgf=first_sgf, lmin=lmin, maxso=maxso)
1196
1197!  Get the grid and the info we need from it
1198      grid_atom => xas_atom_env%grid_atom_set(atom_kind)%grid_atom
1199      na = grid_atom%ng_sphere
1200      nr = bounds(2, 2) - bounds(1, 2) + 1
1201      n = na*nr
1202      nspins = xas_atom_env%nspins
1203      ri_sphi_so => xas_atom_env%ri_sphi_so(atom_kind)%array
1204
1205!  Point to the rho_set densities
1206      rhoa => rho_set%rhoa
1207      rhob => rho_set%rhob
1208      rhoa = 0.0_dp; rhob = 0.0_dp;
1209      IF (do_gga) THEN
1210         DO dir = 1, 3
1211            rho_set%drhoa(dir)%array = 0.0_dp
1212            rho_set%drhob(dir)%array = 0.0_dp
1213         END DO
1214      END IF
1215
1216!  Point to the precomputed SO
1217      ga => xas_atom_env%ga(atom_kind)%array
1218      gr => xas_atom_env%gr(atom_kind)%array
1219      IF (do_gga) THEN
1220         dga1 => xas_atom_env%dga1(atom_kind)%array
1221         dga2 => xas_atom_env%dga2(atom_kind)%array
1222         dgr1 => xas_atom_env%dgr1(atom_kind)%array
1223         dgr2 => xas_atom_env%dgr2(atom_kind)%array
1224      END IF
1225
1226!  Loop over the sets and pgfs of the basis set
1227      DO iset = 1, nset
1228
1229         !allocate space to store the spherical orbitals on the grid
1230         ALLOCATE (so(na, nr, npgf(iset)*nsoset(lmax(iset))))
1231         IF (do_gga) ALLOCATE (dso(na, nr, 3, npgf(iset)*nsoset(lmax(iset))))
1232
1233         DO ipgf = 1, npgf(iset)
1234            start_pgf = (ipgf - 1)*nsoset(lmax(iset))
1235            start = (iset - 1)*maxso + start_pgf
1236
1237            !loop over the spherical gaussian orbitals
1238            DO iso = nsoset(lmin(iset) - 1) + 1, nsoset(lmax(iset))
1239
1240               !the spherical orbital on the grid
1241               CALL dgemm('N', 'T', na, nr, 1, 1.0_dp, ga(:, start + iso:start + iso), na, &
1242                          gr(:, start + iso:start + iso), nr, 0.0_dp, so(:, :, start_pgf + iso), na)
1243
1244               !the gradient on the grid
1245               IF (do_gga) THEN
1246
1247                  DO dir = 1, 3
1248                     CALL dgemm('N', 'T', na, nr, 1, 1.0_dp, dga1(:, start + iso:start + iso, dir), na, &
1249                                dgr1(:, start + iso:start + iso), nr, 0.0_dp, dso(:, :, dir, start_pgf + iso), na)
1250                     CALL dgemm('N', 'T', na, nr, 1, 1.0_dp, dga2(:, start + iso:start + iso, dir), na, &
1251                                dgr2(:, start + iso:start + iso), nr, 1.0_dp, dso(:, :, dir, start_pgf + iso), na)
1252                  END DO
1253               END IF
1254
1255            END DO !iso
1256
1257         END DO !ipgf
1258
1259         !contract the so to get the sgf
1260         ALLOCATE (sgf(na, nr))
1261         IF (do_gga) THEN
1262            ALLOCATE (dsgf(na, nr, 3))
1263         END IF
1264         sgfi = first_sgf(1, iset) - 1
1265
1266         DO isgf = 1, nsgf_set(iset)
1267
1268            sgf = 0.0_dp
1269            IF (do_gga) dsgf = 0.0_dp
1270
1271            !so -> sgf
1272            DO ipgf = 1, npgf(iset)
1273               start_pgf = (ipgf - 1)*nsoset(lmax(iset))
1274               DO iso = nsoset(lmin(iset) - 1) + 1, nsoset(lmax(iset))
1275                  CALL daxpy(n, ri_sphi_so(start_pgf + iso, sgfi + isgf), so(:, :, start_pgf + iso), 1, &
1276                             sgf(:, :), 1)
1277               END DO !iso
1278            END DO !ipgf
1279
1280            !put the sgf on the grid with the approriate coefficients and sum
1281            CALL daxpy(n, ri_dcoeff(1)%array(sgfi + isgf), sgf(:, :), 1, rhoa(:, :, 1), 1)
1282
1283            IF (nspins == 2) THEN
1284               CALL daxpy(n, ri_dcoeff(2)%array(sgfi + isgf), sgf(:, :), 1, rhob(:, :, 1), 1)
1285            END IF
1286
1287            IF (do_gga) THEN
1288               DO ipgf = 1, npgf(iset)
1289                  start_pgf = (ipgf - 1)*nsoset(lmax(iset))
1290                  DO iso = nsoset(lmin(iset) - 1) + 1, nsoset(lmax(iset))
1291                     DO dir = 1, 3
1292                        CALL daxpy(n, ri_sphi_so(start_pgf + iso, sgfi + isgf), dso(:, :, dir, start_pgf + iso), &
1293                                   1, dsgf(:, :, dir), 1)
1294                     END DO !dir
1295                  END DO !iso
1296               END DO !ipgf
1297
1298               !put the gradient of the sgf on the grid with correspond RI coeff
1299               DO dir = 1, 3
1300                  CALL daxpy(n, ri_dcoeff(1)%array(sgfi + isgf), dsgf(:, :, dir), &
1301                             1, rho_set%drhoa(dir)%array(:, :, 1), 1)
1302
1303                  IF (nspins == 2) THEN
1304                     CALL daxpy(n, ri_dcoeff(2)%array(sgfi + isgf), dsgf(:, :, dir), &
1305                                1, rho_set%drhob(dir)%array(:, :, 1), 1)
1306                  END IF
1307               END DO !dir
1308            END IF !do_gga
1309
1310         END DO !isgf
1311
1312         DEALLOCATE (so, sgf)
1313         IF (do_gga) DEALLOCATE (dso, dsgf)
1314      END DO !iset
1315
1316      ! Treat spin restricted case (=> copy alpha into beta)
1317      IF (nspins == 1) THEN
1318         CALL dcopy(n, rhoa(:, :, 1), 1, rhob(:, :, 1), 1)
1319
1320         IF (do_gga) THEN
1321            DO dir = 1, 3
1322               CALL dcopy(n, rho_set%drhoa(dir)%array(:, :, 1), 1, rho_set%drhob(dir)%array(:, :, 1), 1)
1323            END DO
1324         END IF
1325      END IF
1326
1327      CALL timestop(handle)
1328
1329   END SUBROUTINE put_density_on_atomic_grid
1330
1331! **************************************************************************************************
1332!> \brief Adds the density of a given source atom with source kind (with ri_dcoeff) on the atomic
1333!>        grid belonging to another target atom of target kind. The evaluations of the basis
1334!>        function first requires the evaluation of the x,y,z coordinates on each grid point of
1335!>        target atom wrt to the position of source atom
1336!> \param rho_set where the densities are stored
1337!> \param ri_dcoeff the arrays containing the RI density coefficient of source_iat, for each spin
1338!> \param source_iat the index of the source atom
1339!> \param source_ikind the kind of the source atom
1340!> \param target_iat the index of the target atom
1341!> \param target_ikind the kind of the target atom
1342!> \param do_gga whether the gradient of the density is needed
1343!> \param bounds the local bounds of the atomic grid
1344!> \param xas_atom_env ...
1345!> \param qs_env ...
1346! **************************************************************************************************
1347   SUBROUTINE put_density_on_other_grid(rho_set, ri_dcoeff, source_iat, source_ikind, target_iat, &
1348                                        target_ikind, do_gga, bounds, xas_atom_env, qs_env)
1349
1350      TYPE(xc_rho_set_type), POINTER                     :: rho_set
1351      TYPE(cp_1d_r_p_type), DIMENSION(:)                 :: ri_dcoeff
1352      INTEGER, INTENT(IN)                                :: source_iat, source_ikind, target_iat, &
1353                                                            target_ikind
1354      LOGICAL, INTENT(IN)                                :: do_gga
1355      INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds
1356      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
1357      TYPE(qs_environment_type), POINTER                 :: qs_env
1358
1359      CHARACTER(len=*), PARAMETER :: routineN = 'put_density_on_other_grid', &
1360         routineP = moduleN//':'//routineN
1361
1362      INTEGER                                            :: dir, er, handle, ia, ico, ipgf, ir, &
1363                                                            iset, isgf, lx, ly, lz, n, na, nr, &
1364                                                            nset, nspins, sgfi, sr, start
1365      INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf, nsgf_set
1366      INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf
1367      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: sgf
1368      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: co, dsgf, pos
1369      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :, :)       :: dco
1370      REAL(dp), DIMENSION(3)                             :: rs, rst, rt
1371      REAL(dp), DIMENSION(:, :), POINTER                 :: ri_sphi, zet
1372      REAL(dp), DIMENSION(:, :, :), POINTER              :: rhoa, rhob
1373      TYPE(cell_type), POINTER                           :: cell
1374      TYPE(grid_atom_type), POINTER                      :: grid_atom
1375      TYPE(gto_basis_set_type), POINTER                  :: ri_basis
1376      TYPE(harmonics_atom_type), POINTER                 :: harmonics
1377      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
1378      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
1379
1380      NULLIFY (qs_kind_set, ri_basis, lmax, npgf, nsgf_set, lmin, zet, first_sgf, grid_atom)
1381      NULLIFY (harmonics, rhoa, rhob, particle_set, cell, ri_sphi)
1382
1383      !Same logic as the  put_density_on_own_grid routine. Loop over orbitals, put them on the grid,
1384      !contract into sgf and daxpy with coeff. Notable difference: use cartesian orbitals instead of
1385      !spherical, since the center of the grid is not the origin and thus, spherical symmetry can't
1386      !be exploited so well
1387
1388      CALL timeset(routineN, handle)
1389
1390      !Generalities
1391      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set, particle_set=particle_set, cell=cell)
1392      !want basis of the source atom
1393      CALL get_qs_kind(qs_kind_set(source_ikind), basis_set=ri_basis, basis_type="RI_XAS")
1394      CALL get_gto_basis_set(ri_basis, lmax=lmax, npgf=npgf, zet=zet, nset=nset, nsgf_set=nsgf_set, &
1395                             first_sgf=first_sgf, lmin=lmin, sphi=ri_sphi)
1396
1397      ! Want the grid and harmonics of the target atom
1398      grid_atom => xas_atom_env%grid_atom_set(target_ikind)%grid_atom
1399      harmonics => xas_atom_env%harmonics_atom_set(target_ikind)%harmonics_atom
1400      na = grid_atom%ng_sphere
1401      sr = bounds(1, 2); er = bounds(2, 2)
1402      nr = er - sr + 1
1403      n = na*nr
1404      nspins = xas_atom_env%nspins
1405
1406      !  Point to the rho_set densities
1407      rhoa => rho_set%rhoa
1408      rhob => rho_set%rhob
1409
1410      !  Need the source-target position vector
1411      rs = pbc(particle_set(source_iat)%r, cell)
1412      rt = pbc(particle_set(target_iat)%r, cell)
1413      rst = pbc(rs, rt, cell)
1414
1415      ! Precompute the positions on the target grid
1416      ALLOCATE (pos(na, sr:er, 4))
1417!$OMP PARALLEL DO COLLAPSE(2) SCHEDULE(STATIC) DEFAULT(NONE), &
1418!$OMP SHARED(na,nr,pos,harmonics,grid_atom,rst,sr,er), &
1419!$OMP PRIVATE(ia,ir)
1420      DO ir = sr, er
1421         DO ia = 1, na
1422            pos(ia, ir, 1:3) = harmonics%a(:, ia)*grid_atom%rad(ir) + rst
1423            pos(ia, ir, 4) = pos(ia, ir, 1)**2 + pos(ia, ir, 2)**2 + pos(ia, ir, 3)**2
1424         END DO
1425      END DO
1426!$OMP END PARALLEL DO
1427
1428      ! Loop over the cartesian gaussian functions and evaluate them
1429      DO iset = 1, nset
1430
1431         !allocate space to store the cartesian orbtial on the grid
1432         ALLOCATE (co(na, sr:er, npgf(iset)*ncoset(lmax(iset))))
1433         IF (do_gga) ALLOCATE (dco(na, sr:er, 3, npgf(iset)*ncoset(lmax(iset))))
1434
1435!$OMP PARALLEL DEFAULT(NONE), &
1436!$OMP SHARED(co,npgf,ncoset,lmax,lmin,indco,pos,zet,iset,na,nr,do_gga,dco,sr,er), &
1437!$OMP PRIVATE(ipgf,start,ico,lx,ly,lz,ia,ir)
1438
1439!$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
1440         DO ir = sr, er
1441            DO ia = 1, na
1442               co(ia, ir, :) = 0.0_dp
1443               IF (do_gga) THEN
1444                  dco(ia, ir, :, :) = 0.0_dp
1445               END IF
1446            END DO
1447         END DO
1448!$OMP END DO NOWAIT
1449
1450         DO ipgf = 1, npgf(iset)
1451            start = (ipgf - 1)*ncoset(lmax(iset))
1452
1453            !loop over the cartesian orbitals
1454            DO ico = ncoset(lmin(iset) - 1) + 1, ncoset(lmax(iset))
1455               lx = indco(1, ico)
1456               ly = indco(2, ico)
1457               lz = indco(3, ico)
1458
1459               ! compute g = x**lx * y**ly * z**lz * exp(-zet * r**2)
1460!$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
1461               DO ir = sr, er
1462                  DO ia = 1, na
1463                     co(ia, ir, start + ico) = pos(ia, ir, 1)**lx*pos(ia, ir, 2)**ly*pos(ia, ir, 3)**lz &
1464                                               *EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1465                  END DO
1466               END DO
1467!$OMP END DO NOWAIT
1468
1469               IF (do_gga) THEN
1470                  !the gradient: dg_x = lx*x**(lx-1) * y**ly * z**lz * exp(-zet * r**2)
1471                  !                     -2*zet* x**(lx+1) * y**ly * z**lz * exp(-zet * r**2)
1472                  !                   = (lx*x**(lx-1) - 2*zet*x**(lx+1)) * y**ly * z**lz * exp(-zet * r**2)
1473
1474                  !x direction, special case if lx == 0
1475                  IF (lx == 0) THEN
1476!$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
1477                     DO ir = sr, er
1478                        DO ia = 1, na
1479                           dco(ia, ir, 1, start + ico) = -2.0_dp*pos(ia, ir, 1)*zet(ipgf, iset) &
1480                                                         *pos(ia, ir, 2)**ly*pos(ia, ir, 3)**lz &
1481                                                         *EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1482                        END DO
1483                     END DO
1484!$OMP END DO NOWAIT
1485                  ELSE
1486!$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
1487                     DO ir = sr, er
1488                        DO ia = 1, na
1489                           dco(ia, ir, 1, start + ico) = (lx*pos(ia, ir, 1)**(lx - 1) &
1490                                                          - 2.0_dp*pos(ia, ir, 1)**(lx + 1)*zet(ipgf, iset)) &
1491                                                         *pos(ia, ir, 2)**ly*pos(ia, ir, 3)**lz &
1492                                                         *EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1493                        END DO
1494                     END DO
1495!$OMP END DO NOWAIT
1496                  END IF !lx == 0
1497
1498                  !y direction, special case if ly == 0
1499                  IF (ly == 0) THEN
1500!$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
1501                     DO ir = sr, er
1502                        DO ia = 1, na
1503                           dco(ia, ir, 2, start + ico) = -2.0_dp*pos(ia, ir, 2)*zet(ipgf, iset) &
1504                                                         *pos(ia, ir, 1)**lx*pos(ia, ir, 3)**lz &
1505                                                         *EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1506                        END DO
1507                     END DO
1508!$OMP END DO NOWAIT
1509                  ELSE
1510!$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
1511                     DO ir = sr, er
1512                        DO ia = 1, na
1513                           dco(ia, ir, 2, start + ico) = (ly*pos(ia, ir, 2)**(ly - 1) &
1514                                                          - 2.0_dp*pos(ia, ir, 2)**(ly + 1)*zet(ipgf, iset)) &
1515                                                         *pos(ia, ir, 1)**lx*pos(ia, ir, 3)**lz &
1516                                                         *EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1517                        END DO
1518                     END DO
1519!$OMP END DO NOWAIT
1520                  END IF !ly == 0
1521
1522                  !z direction, special case if lz == 0
1523                  IF (lz == 0) THEN
1524!$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
1525                     DO ir = sr, er
1526                        DO ia = 1, na
1527                           dco(ia, ir, 3, start + ico) = -2.0_dp*pos(ia, ir, 3)*zet(ipgf, iset) &
1528                                                         *pos(ia, ir, 1)**lx*pos(ia, ir, 2)**ly &
1529                                                         *EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1530                        END DO
1531                     END DO
1532!$OMP END DO NOWAIT
1533                  ELSE
1534!$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
1535                     DO ir = sr, er
1536                        DO ia = 1, na
1537                           dco(ia, ir, 3, start + ico) = (lz*pos(ia, ir, 3)**(lz - 1) &
1538                                                          - 2.0_dp*pos(ia, ir, 3)**(lz + 1)*zet(ipgf, iset)) &
1539                                                         *pos(ia, ir, 1)**lx*pos(ia, ir, 2)**ly &
1540                                                         *EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1541                        END DO
1542                     END DO
1543!$OMP END DO NOWAIT
1544                  END IF !lz == 0
1545
1546               END IF !gga
1547
1548            END DO !ico
1549         END DO !ipgf
1550
1551!$OMP END PARALLEL
1552
1553         !contract the co into sgf
1554         ALLOCATE (sgf(na, nr))
1555         IF (do_gga) ALLOCATE (dsgf(na, nr, 3))
1556         sgfi = first_sgf(1, iset) - 1
1557
1558         DO isgf = 1, nsgf_set(iset)
1559            sgf = 0.0_dp
1560            IF (do_gga) dsgf = 0.0_dp
1561
1562            DO ipgf = 1, npgf(iset)
1563               start = (ipgf - 1)*ncoset(lmax(iset))
1564               DO ico = ncoset(lmin(iset) - 1) + 1, ncoset(lmax(iset))
1565                  CALL daxpy(n, ri_sphi(start + ico, sgfi + isgf), co(:, :, start + ico), 1, sgf(:, :), 1)
1566               END DO !ico
1567            END DO !ipgf
1568
1569            !add the density to the grid
1570            CALL daxpy(n, ri_dcoeff(1)%array(sgfi + isgf), sgf(:, :), 1, rhoa(:, :, 1), 1)
1571
1572            IF (nspins == 2) THEN
1573               CALL daxpy(n, ri_dcoeff(2)%array(sgfi + isgf), sgf(:, :), 1, rhob(:, :, 1), 1)
1574            END IF
1575
1576            !deal with the gradient
1577            IF (do_gga) THEN
1578
1579               DO ipgf = 1, npgf(iset)
1580                  start = (ipgf - 1)*ncoset(lmax(iset))
1581                  DO ico = ncoset(lmin(iset) - 1) + 1, ncoset(lmax(iset))
1582                     DO dir = 1, 3
1583                        CALL daxpy(n, ri_sphi(start + ico, sgfi + isgf), dco(:, :, dir, start + ico), 1, dsgf(:, :, dir), 1)
1584                     END DO
1585                  END DO !ico
1586               END DO !ipgf
1587
1588               DO dir = 1, 3
1589                  CALL daxpy(n, ri_dcoeff(1)%array(sgfi + isgf), dsgf(:, :, dir), 1, &
1590                             rho_set%drhoa(dir)%array(:, :, 1), 1)
1591
1592                  IF (nspins == 2) THEN
1593                     CALL daxpy(n, ri_dcoeff(2)%array(sgfi + isgf), dsgf(:, :, dir), 1, &
1594                                rho_set%drhob(dir)%array(:, :, 1), 1)
1595                  END IF
1596               END DO
1597            END IF !do_gga
1598
1599         END DO !isgf
1600
1601         DEALLOCATE (co, sgf)
1602         IF (do_gga) DEALLOCATE (dco, dsgf)
1603      END DO !iset
1604
1605      !Treat spin-restricted case (copy alpha into beta)
1606      IF (nspins == 1) THEN
1607         CALL dcopy(n, rhoa(:, :, 1), 1, rhob(:, :, 1), 1)
1608
1609         IF (do_gga) THEN
1610            DO dir = 1, 3
1611               CALL dcopy(n, rho_set%drhoa(dir)%array(:, :, 1), 1, rho_set%drhob(dir)%array(:, :, 1), 1)
1612            END DO
1613         END IF
1614      END IF
1615
1616      CALL timestop(handle)
1617
1618   END SUBROUTINE put_density_on_other_grid
1619
1620! **************************************************************************************************
1621!> \brief Computes the norm of the density gradient on the atomic grid
1622!> \param rho_set ...
1623!> \param atom_kind ...
1624!> \param bounds ...
1625!> \param xas_atom_env ...
1626!> \note GGA is assumed
1627! **************************************************************************************************
1628   SUBROUTINE compute_norm_drho(rho_set, atom_kind, bounds, xas_atom_env)
1629
1630      TYPE(xc_rho_set_type), POINTER                     :: rho_set
1631      INTEGER, INTENT(IN)                                :: atom_kind
1632      INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds
1633      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
1634
1635      CHARACTER(len=*), PARAMETER :: routineN = 'compute_norm_drho', &
1636         routineP = moduleN//':'//routineN
1637
1638      INTEGER                                            :: dir, er, ia, ir, n, na, nr, nspins, sr
1639
1640      na = xas_atom_env%grid_atom_set(atom_kind)%grid_atom%ng_sphere
1641      sr = bounds(1, 2); er = bounds(2, 2)
1642      nr = er - sr + 1
1643      n = na*nr
1644      nspins = xas_atom_env%nspins
1645
1646      rho_set%norm_drhoa = 0.0_dp
1647      rho_set%norm_drhob = 0.0_dp
1648      rho_set%norm_drho = 0.0_dp
1649
1650      DO dir = 1, 3
1651         DO ir = sr, er
1652            DO ia = 1, na
1653               rho_set%norm_drhoa(ia, ir, 1) = rho_set%norm_drhoa(ia, ir, 1) &
1654                                               + rho_set%drhoa(dir)%array(ia, ir, 1)**2
1655            END DO !ia
1656         END DO !ir
1657      END DO !dir
1658      rho_set%norm_drhoa = SQRT(rho_set%norm_drhoa)
1659
1660      IF (nspins == 1) THEN
1661         !spin-restricted
1662         CALL dcopy(n, rho_set%norm_drhoa(:, :, 1), 1, rho_set%norm_drhob(:, :, 1), 1)
1663      ELSE
1664         DO dir = 1, 3
1665            DO ir = sr, er
1666               DO ia = 1, na
1667                  rho_set%norm_drhob(ia, ir, 1) = rho_set%norm_drhob(ia, ir, 1) &
1668                                                  + rho_set%drhob(dir)%array(ia, ir, 1)**2
1669               END DO
1670            END DO
1671         END DO
1672         rho_set%norm_drhob = SQRT(rho_set%norm_drhob)
1673      END IF
1674
1675      DO dir = 1, 3
1676         DO ir = sr, er
1677            DO ia = 1, na
1678               rho_set%norm_drho(ia, ir, 1) = rho_set%norm_drho(ia, ir, 1) + &
1679                                              (rho_set%drhoa(dir)%array(ia, ir, 1) + rho_set%drhob(dir)%array(ia, ir, 1))**2
1680            END DO
1681         END DO
1682      END DO
1683      rho_set%norm_drho = SQRT(rho_set%norm_drho)
1684
1685   END SUBROUTINE compute_norm_drho
1686
1687! **************************************************************************************************
1688!> \brief Precomputes the spherical orbitals of the RI basis on the excited atom grids
1689!> \param do_gga whether the gradient needs to be computed for GGA or not
1690!> \param batch_size among how many procs the grid is divided
1691!> \param ipe the current proc index within the batch
1692!> \param xas_atom_env ...
1693!> \param qs_env ...
1694!> \note the functions are split in a purely angular part of size na and a purely radial part of
1695!>       size nr. The full function on the grid can simply be obtained with dgemm and we save space
1696! **************************************************************************************************
1697   SUBROUTINE precompute_so_dso(do_gga, batch_size, ipe, xas_atom_env, qs_env)
1698
1699      LOGICAL, INTENT(IN)                                :: do_gga
1700      INTEGER, INTENT(IN)                                :: batch_size, ipe
1701      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
1702      TYPE(qs_environment_type), POINTER                 :: qs_env
1703
1704      CHARACTER(len=*), PARAMETER :: routineN = 'precompute_so_dso', &
1705         routineP = moduleN//':'//routineN
1706
1707      INTEGER                                            :: bo(2), dir, er, handle, ikind, ipgf, &
1708                                                            iset, iso, l, maxso, n, na, nkind, nr, &
1709                                                            nset, nsotot, sr, starti
1710      INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf, nsgf_set
1711      REAL(dp), DIMENSION(:, :), POINTER                 :: dgr1, dgr2, ga, gr, slm, zet
1712      REAL(dp), DIMENSION(:, :, :), POINTER              :: dga1, dga2, dslm_dxyz
1713      TYPE(cp_para_env_type), POINTER                    :: para_env
1714      TYPE(grid_atom_type), POINTER                      :: grid_atom
1715      TYPE(gto_basis_set_type), POINTER                  :: ri_basis
1716      TYPE(harmonics_atom_type), POINTER                 :: harmonics
1717      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
1718
1719      NULLIFY (qs_kind_set, harmonics, grid_atom, slm, dslm_dxyz, ri_basis, lmax, lmin, npgf)
1720      NULLIFY (nsgf_set, zet, para_env)
1721
1722      CALL timeset(routineN, handle)
1723
1724      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set, para_env=para_env)
1725      nkind = SIZE(qs_kind_set)
1726
1727      DO ikind = 1, nkind
1728
1729         NULLIFY (xas_atom_env%ga(ikind)%array)
1730         NULLIFY (xas_atom_env%gr(ikind)%array)
1731         NULLIFY (xas_atom_env%dga1(ikind)%array)
1732         NULLIFY (xas_atom_env%dga2(ikind)%array)
1733         NULLIFY (xas_atom_env%dgr1(ikind)%array)
1734         NULLIFY (xas_atom_env%dgr2(ikind)%array)
1735
1736         IF (.NOT. ANY(xas_atom_env%excited_kinds == ikind)) CYCLE
1737
1738         !grid info
1739         harmonics => xas_atom_env%harmonics_atom_set(ikind)%harmonics_atom
1740         grid_atom => xas_atom_env%grid_atom_set(ikind)%grid_atom
1741
1742         na = grid_atom%ng_sphere
1743         nr = grid_atom%nr
1744         sr = 1; er = nr
1745         n = na*nr
1746
1747         !Split the grid
1748         bo = get_limit(nr, batch_size, ipe)
1749         sr = bo(1); er = bo(2)
1750         nr = er - sr + 1
1751
1752         slm => harmonics%slm
1753         dslm_dxyz => harmonics%dslm_dxyz
1754
1755         !basis info
1756         CALL get_qs_kind(qs_kind_set(ikind), basis_set=ri_basis, basis_type="RI_XAS")
1757         CALL get_gto_basis_set(ri_basis, lmax=lmax, npgf=npgf, zet=zet, nset=nset, &
1758                                nsgf_set=nsgf_set, lmin=lmin, maxso=maxso)
1759         nsotot = maxso*nset
1760
1761         !Put the gaussians and their gradient as purely angular or radial arrays
1762         ALLOCATE (xas_atom_env%ga(ikind)%array(na, nsotot))
1763         ALLOCATE (xas_atom_env%gr(ikind)%array(nr, nsotot))
1764         xas_atom_env%ga(ikind)%array = 0.0_dp; xas_atom_env%gr(ikind)%array = 0.0_dp
1765         IF (do_gga) THEN
1766            ALLOCATE (xas_atom_env%dga1(ikind)%array(na, nsotot, 3))
1767            ALLOCATE (xas_atom_env%dgr1(ikind)%array(nr, nsotot))
1768            ALLOCATE (xas_atom_env%dga2(ikind)%array(na, nsotot, 3))
1769            ALLOCATE (xas_atom_env%dgr2(ikind)%array(nr, nsotot))
1770            xas_atom_env%dga1(ikind)%array = 0.0_dp; xas_atom_env%dgr1(ikind)%array = 0.0_dp
1771            xas_atom_env%dga2(ikind)%array = 0.0_dp; xas_atom_env%dgr2(ikind)%array = 0.0_dp
1772         END IF
1773
1774         ga => xas_atom_env%ga(ikind)%array
1775         gr => xas_atom_env%gr(ikind)%array
1776         dga1 => xas_atom_env%dga1(ikind)%array
1777         dga2 => xas_atom_env%dga2(ikind)%array
1778         dgr1 => xas_atom_env%dgr1(ikind)%array
1779         dgr2 => xas_atom_env%dgr2(ikind)%array
1780
1781         DO iset = 1, nset
1782            DO ipgf = 1, npgf(iset)
1783               starti = (iset - 1)*maxso + (ipgf - 1)*nsoset(lmax(iset))
1784               DO iso = nsoset(lmin(iset) - 1) + 1, nsoset(lmax(iset))
1785                  l = indso(1, iso)
1786
1787                  !radial part of the gaussian
1788                  gr(1:nr, starti + iso) = grid_atom%rad(sr:er)**l*EXP(-zet(ipgf, iset)*grid_atom%rad2(sr:er))
1789
1790                  !angular part of the gaussian
1791                  ga(1:na, starti + iso) = slm(1:na, iso)
1792
1793                  IF (do_gga) THEN
1794                     !radial part of the gradient => same in all three direction
1795                     dgr1(1:nr, starti + iso) = grid_atom%rad(sr:er)**(l - 1) &
1796                                                *EXP(-zet(ipgf, iset)*grid_atom%rad2(sr:er))
1797                     dgr2(1:nr, starti + iso) = -2.0_dp*zet(ipgf, iset)*grid_atom%rad(sr:er)**(l + 1) &
1798                                                *EXP(-zet(ipgf, iset)*grid_atom%rad2(sr:er))
1799
1800                     !angular part of the gradient
1801                     DO dir = 1, 3
1802                        dga1(1:na, starti + iso, dir) = dslm_dxyz(dir, 1:na, iso)
1803                        dga2(1:na, starti + iso, dir) = harmonics%a(dir, 1:na)*slm(1:na, iso)
1804                     END DO
1805                  END IF
1806
1807               END DO !iso
1808            END DO !ipgf
1809         END DO !iset
1810
1811      END DO !ikind
1812
1813      CALL timestop(handle)
1814
1815   END SUBROUTINE precompute_so_dso
1816
1817! **************************************************************************************************
1818!> \brief Integrate the xc kernel as a function of r on the atomic grids for the RI_XAS basis
1819!> \param int_fxc the global array containing the (P|fxc|Q) integrals, for all spin configurations
1820!> \param xas_atom_env ...
1821!> \param xas_tdp_control ...
1822!> \param qs_env ...
1823!> \note Note that if closed-shell, alpha-alpha term and beta-beta terms are the same
1824!>       Store the (P|fxc|Q) integrals on the processor they were computed on
1825!>       int_fxc(1)%matrix is alpha-alpha, 2: alpha-beta, 3: beta-beta
1826! **************************************************************************************************
1827   SUBROUTINE integrate_fxc_atoms(int_fxc, xas_atom_env, xas_tdp_control, qs_env)
1828
1829      TYPE(cp_2d_r_p_type), DIMENSION(:, :), POINTER     :: int_fxc
1830      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
1831      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
1832      TYPE(qs_environment_type), POINTER                 :: qs_env
1833
1834      CHARACTER(len=*), PARAMETER :: routineN = 'integrate_fxc_atoms', &
1835         routineP = moduleN//':'//routineN
1836
1837      INTEGER                                            :: batch_size, bo(2), handle, i, iatom, &
1838                                                            ibatch, iex, ikind, inb, ipe, mepos, &
1839                                                            na, natom, nb, nbatch, nbk, nex_atom, &
1840                                                            nr, num_pe, subcomm
1841      INTEGER, DIMENSION(2, 3)                           :: bounds
1842      INTEGER, DIMENSION(:), POINTER                     :: exat_neighbors
1843      LOGICAL                                            :: do_gga, do_sc, do_sf
1844      TYPE(cp_1d_r_p_type), DIMENSION(:, :), POINTER     :: ri_dcoeff
1845      TYPE(cp_para_env_type), POINTER                    :: para_env
1846      TYPE(dft_control_type), POINTER                    :: dft_control
1847      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
1848      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
1849      TYPE(section_vals_type), POINTER                   :: input, xc_functionals
1850      TYPE(xc_derivative_set_type), POINTER              :: deriv_set
1851      TYPE(xc_rho_cflags_type)                           :: needs
1852      TYPE(xc_rho_set_type), POINTER                     :: rho_set
1853
1854      NULLIFY (particle_set, qs_kind_set, dft_control, deriv_set, para_env, exat_neighbors)
1855      NULLIFY (rho_set, input, xc_functionals)
1856
1857      CALL timeset(routineN, handle)
1858
1859!  Initialize
1860      CALL get_qs_env(qs_env, particle_set=particle_set, qs_kind_set=qs_kind_set, natom=natom, &
1861                      dft_control=dft_control, input=input, para_env=para_env)
1862      ALLOCATE (int_fxc(natom, 4))
1863      DO iatom = 1, natom
1864         DO i = 1, 4
1865            NULLIFY (int_fxc(iatom, i)%array)
1866         END DO
1867      END DO
1868      nex_atom = SIZE(xas_atom_env%excited_atoms)
1869      !spin conserving in the general sense here
1870      do_sc = xas_tdp_control%do_spin_cons .OR. xas_tdp_control%do_singlet .OR. xas_tdp_control%do_triplet
1871      do_sf = xas_tdp_control%do_spin_flip
1872
1873!  Get some info on the functionals
1874      xc_functionals => section_vals_get_subs_vals(input, "DFT%XAS_TDP%KERNEL%XC_FUNCTIONAL")
1875      ! ask for lsd in any case
1876      needs = xc_functionals_get_needs(xc_functionals, lsd=.TRUE., add_basic_components=.TRUE.)
1877      do_gga = needs%drho_spin !because either LDA or GGA, and the former does not need gradient
1878
1879!  Distribute the excited atoms over the processors, which are split into batches
1880      num_pe = para_env%num_pe
1881      mepos = para_env%mepos
1882      batch_size = xas_tdp_control%batch_size
1883      nbatch = num_pe/batch_size
1884      ibatch = mepos/batch_size
1885      !the proc index within the batch
1886      ipe = MODULO(mepos, batch_size)
1887      !make sure we include last procs if batch_size is not divisor of total num_procs
1888      IF (nbatch*batch_size .NE. num_pe) nbatch = nbatch + 1
1889      !the size of the last batch might be less than nominal one
1890      IF ((ibatch + 1)*batch_size > num_pe) batch_size = num_pe - ibatch*batch_size
1891      !distribute the excted atoms over the batches
1892      bo = get_limit(nex_atom, nbatch, ibatch)
1893      !create a subcommunicator for this batch
1894      CALL mp_comm_split_direct(para_env%group, subcomm, ibatch)
1895
1896!  Precompute the spherical orbital of the RI basis (and maybe their gradient) on the grids of the
1897!  excited atoms. Needed for the GGA integration and to actually put the density on the grid
1898      CALL precompute_so_dso(do_gga, batch_size, ipe, xas_atom_env, qs_env)
1899
1900!  Looping over the excited atoms
1901      DO iex = bo(1), bo(2)
1902
1903         iatom = xas_atom_env%excited_atoms(iex)
1904         ikind = particle_set(iatom)%atomic_kind%kind_number
1905         exat_neighbors => xas_atom_env%exat_neighbors(iex)%array
1906         ri_dcoeff => xas_atom_env%ri_dcoeff(:, :, iex)
1907
1908!     General grid/basis info
1909         na = xas_atom_env%grid_atom_set(ikind)%grid_atom%ng_sphere
1910         nr = xas_atom_env%grid_atom_set(ikind)%grid_atom%nr
1911
1912!     Creating a xc_rho_set to store the density and dset for the kernel
1913         bounds(1:2, 1:3) = 1
1914         bounds(2, 1) = na
1915         bounds(2, 2) = nr
1916         !split the grid by columns for best efficiency
1917         bounds(:, 2) = get_limit(nr, batch_size, ipe)
1918
1919         CALL xc_rho_set_create(rho_set=rho_set, local_bounds=bounds, &
1920                                rho_cutoff=dft_control%qs_control%eps_rho_rspace, &
1921                                drho_cutoff=dft_control%qs_control%eps_rho_rspace)
1922         CALL xc_dset_create(deriv_set, local_bounds=bounds)
1923
1924         ! allocate internals of the rho_set
1925         CALL xc_rho_set_atom_update(rho_set, needs, nspins=2, bo=bounds)
1926
1927!     Put the density, and possibly its gradient,  on the grid (for this atom)
1928         CALL put_density_on_atomic_grid(rho_set, ri_dcoeff(iatom, :), ikind, &
1929                                         do_gga, bounds, xas_atom_env, qs_env)
1930
1931!     Take the neighboring atom contributions to the density (and gradient)
1932         DO inb = 1, SIZE(exat_neighbors)
1933
1934            nb = exat_neighbors(inb)
1935            nbk = particle_set(nb)%atomic_kind%kind_number
1936            CALL put_density_on_other_grid(rho_set, ri_dcoeff(nb, :), nb, nbk, iatom, &
1937                                           ikind, do_gga, bounds, xas_atom_env, qs_env)
1938
1939         END DO
1940
1941!     In case of GGA, also need the norm of the density gradient
1942         IF (do_gga) CALL compute_norm_drho(rho_set, ikind, bounds, xas_atom_env)
1943
1944!     Compute the required derivatives
1945         CALL xc_functionals_eval(xc_functionals, lsd=.TRUE., rho_set=rho_set, deriv_set=deriv_set, &
1946                                  deriv_order=2)
1947
1948         !spin-conserving (LDA part)
1949         IF (do_sc) THEN
1950            CALL integrate_sc_fxc(int_fxc, iatom, ikind, deriv_set, bounds, xas_atom_env, qs_env)
1951         END IF
1952
1953         !spin-flip (LDA part)
1954         IF (do_sf) THEN
1955            CALL integrate_sf_fxc(int_fxc, iatom, ikind, rho_set, deriv_set, bounds, xas_atom_env, qs_env)
1956         END IF
1957
1958         !Gradient correction (note: spin-flip only keeps the lda part, aka ALDA0)
1959         IF (do_gga .AND. do_sc) THEN
1960            CALL integrate_gga_fxc(int_fxc, iatom, ikind, rho_set, deriv_set, bounds, xas_atom_env, qs_env)
1961         END IF
1962
1963!     Sum the contribution of the different procs of the batch
1964         DO i = 1, 4
1965            IF (ASSOCIATED(int_fxc(iatom, i)%array)) THEN
1966               CALL mp_sum(int_fxc(iatom, i)%array, subcomm)
1967            END IF
1968         END DO
1969
1970!     Clean-up
1971         CALL xc_dset_release(deriv_set)
1972         CALL xc_rho_set_release(rho_set)
1973
1974      END DO !iex
1975
1976      !Not necessary to sync, but makes sure that any load inbalance (which can be significant if
1977      !nbatch is not a divisor of n_excited) is reported here
1978      CALL mp_sync(para_env%group)
1979
1980      !clean-up
1981      CALL mp_comm_free(subcomm)
1982
1983      CALL timestop(handle)
1984
1985   END SUBROUTINE integrate_fxc_atoms
1986
1987! **************************************************************************************************
1988!> \brief Integrate the gradient correction part of the xc kernel on the atomic grid
1989!> \param int_fxc the array containing the (P|fxc|Q) integrals
1990!> \param iatom the index of the current excited atom
1991!> \param ikind the index of the current excited kind
1992!> \param rho_set the variable contatinind the density and its gradient
1993!> \param deriv_set the functional derivatives
1994!> \param bounds the bounds of the local grid
1995!> \param xas_atom_env ...
1996!> \param qs_env ...
1997!> \note Ignored in case of pure LDA, added on top of the LDA kernel in case of GGA
1998! **************************************************************************************************
1999   SUBROUTINE integrate_gga_fxc(int_fxc, iatom, ikind, rho_set, deriv_set, bounds, xas_atom_env, qs_env)
2000
2001      TYPE(cp_2d_r_p_type), DIMENSION(:, :), POINTER     :: int_fxc
2002      INTEGER, INTENT(IN)                                :: iatom, ikind
2003      TYPE(xc_rho_set_type), POINTER                     :: rho_set
2004      TYPE(xc_derivative_set_type), POINTER              :: deriv_set
2005      INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds
2006      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
2007      TYPE(qs_environment_type), POINTER                 :: qs_env
2008
2009      CHARACTER(len=*), PARAMETER :: routineN = 'integrate_gga_fxc', &
2010         routineP = moduleN//':'//routineN
2011
2012      INTEGER                                            :: dir, er, handle, i, jpgf, jset, jso, &
2013                                                            maxso, na, nr, nset, nsgf, nsoi, &
2014                                                            nsotot, sr, startj, ub
2015      INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf
2016      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: int_sgf, res, so, work
2017      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: dso
2018      REAL(dp), DIMENSION(:, :), POINTER                 :: dgr1, dgr2, ga, gr, ri_sphi_so, weight
2019      REAL(dp), DIMENSION(:, :, :), POINTER              :: dga1, dga2
2020      TYPE(cp_2d_r_p_type), ALLOCATABLE, DIMENSION(:)    :: int_so, vxc
2021      TYPE(cp_3d_r_p_type), ALLOCATABLE, DIMENSION(:)    :: vxg
2022      TYPE(grid_atom_type), POINTER                      :: grid_atom
2023      TYPE(gto_basis_set_type), POINTER                  :: ri_basis
2024      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
2025
2026      NULLIFY (grid_atom, ri_basis, qs_kind_set, ga, gr, dgr1, dgr2, lmax, lmin, npgf)
2027      NULLIFY (weight, ri_sphi_so, dga1, dga2)
2028
2029      !Startegy: we need to compute <phi_i|fxc|phij>, most of existing application of the 2nd
2030      !          functional derivative involve the response density, and the expression of the
2031      !          integral int (fxc*n^1) is well known. We substitute the spherical orbital phi_j
2032      !          in place of n^1 in the formula and thus perform the first integration. Then
2033      !          we obtain something in the form int (fxc*phi_j) = Vxc - div (Vxg) that we can
2034      !          put on the grid and treat like a potential. The second integration is done by
2035      !          using the divergence theorem and numerical integration:
2036      !          <phi_i|fxc|phi_j> = int phi_i*(Vxc - div(Vxg)) = int phi_i*Vxc + grad(phi_i).Vxg
2037      !          Note the sign change and the dot product.
2038
2039      CALL timeset(routineN, handle)
2040
2041      !If closed shell, only compute f_aa and f_ab (ub = 2)
2042      ub = 2
2043      IF (xas_atom_env%nspins == 2) ub = 3
2044
2045      !Get the necessary grid info
2046      grid_atom => xas_atom_env%grid_atom_set(ikind)%grid_atom
2047      na = grid_atom%ng_sphere
2048      sr = bounds(1, 2); er = bounds(2, 2)
2049      nr = er - sr + 1
2050      weight => grid_atom%weight
2051
2052      !get the ri_basis indo
2053      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set)
2054      CALL get_qs_kind(qs_kind_set(ikind), basis_set=ri_basis, basis_type="RI_XAS")
2055
2056      CALL get_gto_basis_set(gto_basis_set=ri_basis, lmax=lmax, lmin=lmin, nsgf=nsgf, &
2057                             maxso=maxso, npgf=npgf, nset=nset)
2058      nsotot = nset*maxso
2059
2060      !Point to the precomputed so
2061      ga => xas_atom_env%ga(ikind)%array
2062      gr => xas_atom_env%gr(ikind)%array
2063      dgr1 => xas_atom_env%dgr1(ikind)%array
2064      dgr2 => xas_atom_env%dgr2(ikind)%array
2065      dga1 => xas_atom_env%dga1(ikind)%array
2066      dga2 => xas_atom_env%dga2(ikind)%array
2067
2068      !Before integration, wanna pre-divide all relevant derivastives by the nrom of the gradient
2069      CALL divide_by_norm_drho(deriv_set, rho_set, lsd=.TRUE.)
2070
2071      !Wanna integrate <phi_i|fxc|phi_j>, start looping over phi_j and do the first integration, then
2072      !collect vxc and vxg and loop over phi_i for the second integration
2073      !Note: we do not use the CG coefficients because they are only useful when there is a product
2074      !      of Gaussians, which is not really the case here
2075
2076      ALLOCATE (so(na, nr))
2077      ALLOCATE (dso(na, nr, 3))
2078
2079      ALLOCATE (vxc(ub))
2080      ALLOCATE (vxg(ub))
2081      ALLOCATE (int_so(ub))
2082      DO i = 1, ub
2083         ALLOCATE (vxc(i)%array(na, nr))
2084         ALLOCATE (vxg(i)%array(na, nr, 3))
2085         ALLOCATE (int_so(i)%array(nsotot, nsotot))
2086         vxc(i)%array = 0.0_dp; vxg(i)%array = 0.0_dp; int_so(i)%array = 0.0_dp
2087      END DO
2088
2089      DO jset = 1, nset
2090         DO jpgf = 1, npgf(jset)
2091            startj = (jset - 1)*maxso + (jpgf - 1)*nsoset(lmax(jset))
2092            DO jso = nsoset(lmin(jset) - 1) + 1, nsoset(lmax(jset))
2093
2094               !put the so phi_j and its gradient on the grid
2095
2096               !so
2097               CALL dgemm('N', 'T', na, nr, 1, 1.0_dp, ga(:, startj + jso:startj + jso), na, &
2098                          gr(:, startj + jso:startj + jso), nr, 0.0_dp, so(:, :), na)
2099
2100               !dso
2101               DO dir = 1, 3
2102                  CALL dgemm('N', 'T', na, nr, 1, 1.0_dp, dga1(:, startj + jso:startj + jso, dir), &
2103                             na, dgr1(:, startj + jso:startj + jso), nr, 0.0_dp, dso(:, :, dir), na)
2104                  CALL dgemm('N', 'T', na, nr, 1, 1.0_dp, dga2(:, startj + jso:startj + jso, dir), &
2105                             na, dgr2(:, startj + jso:startj + jso), nr, 1.0_dp, dso(:, :, dir), na)
2106               END DO !dir
2107
2108               !Perform the first integration (analytically)
2109               CALL get_vxc_vxg(vxc, vxg, so, dso, na, sr, er, rho_set, deriv_set, weight)
2110
2111               !For a given phi_j, compute the second integration with all phi_i at once
2112               !=> allows for efficient gemm to take place
2113               !kernl is symmetric => avoid double computing
2114               nsoi = startj + jso
2115               ALLOCATE (res(nsoi, nsoi), work(na, nsoi))
2116               res = 0.0_dp; work = 0.0_dp
2117
2118               DO i = 1, ub
2119
2120                  !integrate so*Vxc and store in the int_so
2121                  CALL dgemm('N', 'N', na, nsoi, nr, 1.0_dp, vxc(i)%array(:, :), na, &
2122                             gr(:, 1:nsoi), nr, 0.0_dp, work, na)
2123                  CALL dgemm('T', 'N', nsoi, nsoi, na, 1.0_dp, work, na, &
2124                             ga(:, 1:nsoi), na, 0.0_dp, res, nsoi)
2125                  int_so(i)%array(1:nsoi, startj + jso) = get_diag(res)
2126
2127                  DO dir = 1, 3
2128
2129                     ! integrate and sum up Vxg*dso
2130                     CALL dgemm('N', 'N', na, nsoi, nr, 1.0_dp, vxg(i)%array(:, :, dir), na, &
2131                                dgr1(:, 1:nsoi), nr, 0.0_dp, work, na)
2132                     CALL dgemm('T', 'N', nsoi, nsoi, na, 1.0_dp, work, na, &
2133                                dga1(:, 1:nsoi, dir), na, 0.0_dp, res, nsoi)
2134                     CALL daxpy(nsoi, 1.0_dp, get_diag(res), 1, int_so(i)%array(1:nsoi, startj + jso), 1)
2135
2136                     CALL dgemm('N', 'N', na, nsoi, nr, 1.0_dp, vxg(i)%array(:, :, dir), na, &
2137                                dgr2(:, 1:nsoi), nr, 0.0_dp, work, na)
2138                     CALL dgemm('T', 'N', nsoi, nsoi, na, 1.0_dp, work, na, &
2139                                dga2(:, 1:nsoi, dir), na, 0.0_dp, res, nsoi)
2140                     CALL daxpy(nsoi, 1.0_dp, get_diag(res), 1, int_so(i)%array(1:nsoi, startj + jso), 1)
2141
2142                  END DO
2143
2144                  !symmetry
2145                  int_so(i)%array(startj + jso, 1:nsoi) = int_so(i)%array(1:nsoi, startj + jso)
2146
2147               END DO !i
2148               DEALLOCATE (res, work)
2149
2150            END DO !jso
2151         END DO !jpgf
2152      END DO !jset
2153
2154      !Contract into sgf and add to already computed LDA part of int_fxc
2155      ri_sphi_so => xas_atom_env%ri_sphi_so(ikind)%array
2156      ALLOCATE (int_sgf(nsgf, nsgf))
2157      DO i = 1, ub
2158         CALL contract_so2sgf(int_sgf, int_so(i)%array, ri_basis, ri_sphi_so)
2159         CALL daxpy(nsgf*nsgf, 1.0_dp, int_sgf, 1, int_fxc(iatom, i)%array, 1)
2160      END DO
2161
2162      !Clean-up
2163      DO i = 1, ub
2164         DEALLOCATE (vxc(i)%array)
2165         DEALLOCATE (vxg(i)%array)
2166         DEALLOCATE (int_so(i)%array)
2167      END DO
2168      DEALLOCATE (vxc, vxg, int_so)
2169
2170      CALL timestop(handle)
2171
2172   END SUBROUTINE integrate_gga_fxc
2173
2174! **************************************************************************************************
2175!> \brief Computes the first integration of the GGA part of <phi_i|fxc|phi_j>, i.e. int fxc*phi_j.
2176!>        The result is of the form Vxc - div(Vxg). Up to 3 results are returned, correspoinding to
2177!>        f_aa, f_ab and (if open-shell) f_bb
2178!> \param vxc ...
2179!> \param vxg ...
2180!> \param so the spherical orbital on the grid
2181!> \param dso the derivative of the spherical orbital on the grid
2182!> \param na ...
2183!> \param sr the starting index for ir
2184!> \param er the ending index for ir
2185!> \param rho_set ...
2186!> \param deriv_set ...
2187!> \param weight the grid weight
2188!> \note This method is extremly similar to xc_calc_2nd_deriv of xc.F, but because it is a special
2189!>       case that can be further optimized and because the interface of the original routine does
2190!>       not fit this code, it has been re-written (no pw, no rho1_set but just the so, etc...)
2191! **************************************************************************************************
2192   SUBROUTINE get_vxc_vxg(vxc, vxg, so, dso, na, sr, er, rho_set, deriv_set, weight)
2193
2194      TYPE(cp_2d_r_p_type), DIMENSION(:)                 :: vxc
2195      TYPE(cp_3d_r_p_type), DIMENSION(:)                 :: vxg
2196      REAL(dp), DIMENSION(:, :)                          :: so
2197      REAL(dp), DIMENSION(:, :, :)                       :: dso
2198      INTEGER, INTENT(IN)                                :: na, sr, er
2199      TYPE(xc_rho_set_type), POINTER                     :: rho_set
2200      TYPE(xc_derivative_set_type), POINTER              :: deriv_set
2201      REAL(dp), DIMENSION(:, :), POINTER                 :: weight
2202
2203      CHARACTER(len=*), PARAMETER :: routineN = 'get_vxc_vxg', routineP = moduleN//':'//routineN
2204
2205      INTEGER                                            :: dir, handle, i, ia, ir, jr, ub
2206      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: dot_proda, dot_prodb, tmp
2207      REAL(dp), DIMENSION(:, :, :), POINTER              :: d1e, d2e, norm_drhoa, norm_drhob
2208      TYPE(xc_derivative_type), POINTER                  :: deriv
2209
2210      NULLIFY (norm_drhoa, norm_drhob, d2e, d1e, deriv)
2211
2212      CALL timeset(routineN, handle)
2213
2214      !Note: this routines follows the order of the terms in equaiton (A.7) of Thomas Chassaing
2215      !      thesis, except for the pure LDA terms that are droped. The n^(1)_a and n^(1)_b
2216      !      response densities are replaced by the spherical orbital.
2217      !      The usual spin ordering is used: aa => 1, ab => 2 , bb => 3
2218
2219      !TODO: test giving explicit sr:er bounds in definition
2220
2221      !point to the relevant components of rho_set
2222      ub = SIZE(vxc)
2223      norm_drhoa => rho_set%norm_drhoa
2224      norm_drhob => rho_set%norm_drhob
2225
2226      !Some init
2227      DO i = 1, ub
2228         vxc(i)%array = 0.0_dp
2229         vxg(i)%array = 0.0_dp
2230      END DO
2231
2232      ALLOCATE (tmp(na, sr:er), dot_proda(na, sr:er), dot_prodb(na, sr:er))
2233      dot_proda = 0.0_dp; dot_prodb = 0.0_dp
2234
2235      !Strategy: most terms are either multiplied by drhoa or drhob => group those first and then
2236      !          multiply. Also most terms are multiplied by the dot product grad_n . grad_so, so
2237      !          precompute it as well
2238
2239!$OMP PARALLEL DEFAULT(NONE), &
2240!$OMP SHARED(dot_proda,dot_prodb,tmp,vxc,vxg,deriv_set,rho_set,na,sr,er,norm_drhoa,norm_drhob,dso, &
2241!$OMP        so,weight,ub), &
2242!$OMP PRIVATE(ia,ir,dir,deriv,d1e,d2e,jr)
2243
2244      !Precompute the very common dot products grad_na . grad_so and grad_nb . grad_so
2245      DO dir = 1, 3
2246!$OMP DO SCHEDULE(STATIC)
2247         DO ir = sr, er
2248            jr = ir - sr + 1
2249            DO ia = 1, na
2250               dot_proda(ia, ir) = dot_proda(ia, ir) + rho_set%drhoa(dir)%array(ia, ir, 1)*dso(ia, jr, dir)
2251               dot_prodb(ia, ir) = dot_prodb(ia, ir) + rho_set%drhob(dir)%array(ia, ir, 1)*dso(ia, jr, dir)
2252            END DO !ia
2253         END DO !ir
2254!$OMP END DO NOWAIT
2255      END DO !dir
2256
2257      !Deal with f_aa
2258
2259      !Vxc, first term
2260      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(norm_drhoa)")
2261      CALL xc_derivative_get(deriv, deriv_data=d2e)
2262!$OMP DO SCHEDULE(STATIC)
2263      DO ir = sr, er
2264         jr = ir - sr + 1
2265         DO ia = 1, na
2266            vxc(1)%array(ia, jr) = d2e(ia, ir, 1)*dot_proda(ia, ir)
2267         END DO !ia
2268      END DO !ir
2269!$OMP END DO NOWAIT
2270
2271      !Vxc, second term
2272      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(norm_drho)")
2273      CALL xc_derivative_get(deriv, deriv_data=d2e)
2274!$OMP DO SCHEDULE(STATIC)
2275      DO ir = sr, er
2276         jr = ir - sr + 1
2277         DO ia = 1, na
2278            vxc(1)%array(ia, jr) = vxc(1)%array(ia, jr) + d2e(ia, ir, 1)*dot_prodb(ia, ir)
2279         END DO !ia
2280      END DO !ir
2281!$OMP END DO NOWAIT
2282
2283      !Vxc, take the grid weight into acocunt
2284!$OMP DO SCHEDULE(STATIC)
2285      DO ir = sr, er
2286         jr = ir - sr + 1
2287         DO ia = 1, na
2288            vxc(1)%array(ia, jr) = vxc(1)%array(ia, jr)*weight(ia, ir)
2289         END DO !ia
2290      END DO !ir
2291!$OMP END DO NOWAIT
2292
2293      !Vxg, first term (to be multiplied by drhoa)
2294      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(norm_drhoa)")
2295      CALL xc_derivative_get(deriv, deriv_data=d2e)
2296!$OMP DO SCHEDULE(STATIC)
2297      DO ir = sr, er
2298         jr = ir - sr + 1
2299         DO ia = 1, na
2300            tmp(ia, ir) = d2e(ia, ir, 1)*so(ia, jr)
2301         END DO !ia
2302      END DO !ir
2303!$OMP END DO NOWAIT
2304
2305      !Vxg, second term (to be multiplied by drhoa)
2306      deriv => xc_dset_get_derivative(deriv_set, "(norm_drhoa)(norm_drho)")
2307      CALL xc_derivative_get(deriv, deriv_data=d2e)
2308!$OMP DO SCHEDULE(STATIC)
2309      DO ir = sr, er
2310         DO ia = 1, na
2311            tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_prodb(ia, ir)
2312         END DO !ia
2313      END DO !ir
2314!$OMP END DO NOWAIT
2315
2316      !Vxg, third term (to be multiplied by drhoa)
2317      deriv => xc_dset_get_derivative(deriv_set, "(norm_drhoa)(norm_drhoa)")
2318      CALL xc_derivative_get(deriv, deriv_data=d2e)
2319!$OMP DO SCHEDULE(STATIC)
2320      DO ir = sr, er
2321         DO ia = 1, na
2322            tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_proda(ia, ir)
2323         END DO !ia
2324      END DO !ir
2325!$OMP END DO NOWAIT
2326
2327      !Vxg, fourth term (to be multiplied by drhoa)
2328      deriv => xc_dset_get_derivative(deriv_set, "(norm_drhoa)")
2329      CALL xc_derivative_get(deriv, deriv_data=d1e)
2330!$OMP DO SCHEDULE(STATIC)
2331      DO ir = sr, er
2332         DO ia = 1, na
2333            tmp(ia, ir) = tmp(ia, ir) - d1e(ia, ir, 1)*dot_proda(ia, ir) &
2334                          /MAX(norm_drhoa(ia, ir, 1), rho_set%drho_cutoff)**2
2335         END DO !ia
2336      END DO !ir
2337!$OMP END DO NOWAIT
2338
2339      !put tmp*drhoa in Vxg (so that we can reuse it for drhob terms)
2340      DO dir = 1, 3
2341!$OMP DO SCHEDULE(STATIC)
2342         DO ir = sr, er
2343            jr = ir - sr + 1
2344            DO ia = 1, na
2345               vxg(1)%array(ia, jr, dir) = tmp(ia, ir)*rho_set%drhoa(dir)%array(ia, ir, 1)
2346            END DO !ia
2347         END DO !ir
2348!$OMP END DO NOWAIT
2349      END DO !dir
2350
2351      !Vxg, fifth term (to be multiplied by drhob)
2352      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(norm_drho)")
2353      CALL xc_derivative_get(deriv, deriv_data=d2e)
2354!$OMP DO SCHEDULE(STATIC)
2355      DO ir = sr, er
2356         jr = ir - sr + 1
2357         DO ia = 1, na
2358            tmp(ia, ir) = d2e(ia, ir, 1)*so(ia, jr)
2359         END DO !ia
2360      END DO !ir
2361!$OMP END DO NOWAIT
2362
2363      !Vxg, sixth term (to be multiplied by drhob)
2364      deriv => xc_dset_get_derivative(deriv_set, "(norm_drhoa)(norm_drho)")
2365      CALL xc_derivative_get(deriv, deriv_data=d2e)
2366!$OMP DO SCHEDULE(STATIC)
2367      DO ir = sr, er
2368         DO ia = 1, na
2369            tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_proda(ia, ir)
2370         END DO !ia
2371      END DO !ir
2372!$OMP END DO NOWAIT
2373
2374      !Vxg, seventh term (to be multiplied by drhob)
2375      deriv => xc_dset_get_derivative(deriv_set, "(norm_drho)(norm_drho)")
2376      CALL xc_derivative_get(deriv, deriv_data=d2e)
2377!$OMP DO SCHEDULE(STATIC)
2378      DO ir = sr, er
2379         DO ia = 1, na
2380            tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_prodb(ia, ir)
2381         END DO !ia
2382      END DO !ir
2383!$OMP END DO NOWAIT
2384
2385      !put tmp*drhob in Vxg
2386      DO dir = 1, 3
2387!$OMP DO SCHEDULE(STATIC)
2388         DO ir = sr, er
2389            jr = ir - sr + 1
2390            DO ia = 1, na
2391               vxg(1)%array(ia, jr, dir) = vxg(1)%array(ia, jr, dir) + tmp(ia, ir)*rho_set%drhob(dir)%array(ia, ir, 1)
2392            END DO !ia
2393         END DO !ir
2394!$OMP END DO NOWAIT
2395      END DO !dir
2396
2397      !Vxg, last term
2398      deriv => xc_dset_get_derivative(deriv_set, "(norm_drhoa)")
2399      CALL xc_derivative_get(deriv, deriv_data=d1e)
2400      DO dir = 1, 3
2401!$OMP DO SCHEDULE(STATIC)
2402         DO ir = sr, er
2403            jr = ir - sr + 1
2404            DO ia = 1, na
2405               vxg(1)%array(ia, jr, dir) = vxg(1)%array(ia, jr, dir) + d1e(ia, ir, 1)*dso(ia, jr, dir)
2406            END DO !ia
2407         END DO !ir
2408!$OMP END DO NOWAIT
2409      END DO !dir
2410
2411      !Vxg, take the grid weight into account
2412      DO dir = 1, 3
2413!$OMP DO SCHEDULE(STATIC)
2414         DO ir = sr, er
2415            jr = ir - sr + 1
2416            DO ia = 1, na
2417               vxg(1)%array(ia, jr, dir) = vxg(1)%array(ia, jr, dir)*weight(ia, ir)
2418            END DO !ia
2419         END DO !ir
2420!$OMP END DO NOWAIT
2421      END DO !dir
2422
2423      !Deal with fab
2424
2425      !Vxc, first term
2426      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(norm_drhob)")
2427      CALL xc_derivative_get(deriv, deriv_data=d2e)
2428!$OMP DO SCHEDULE(STATIC)
2429      DO ir = sr, er
2430         jr = ir - sr + 1
2431         DO ia = 1, na
2432            vxc(2)%array(ia, jr) = d2e(ia, ir, 1)*dot_prodb(ia, ir)
2433         END DO !ia
2434      END DO !ir
2435!$OMP END DO NOWAIT
2436
2437      !Vxc, second term
2438      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(norm_drho)")
2439      CALL xc_derivative_get(deriv, deriv_data=d2e)
2440!$OMP DO SCHEDULE(STATIC)
2441      DO ir = sr, er
2442         jr = ir - sr + 1
2443         DO ia = 1, na
2444            vxc(2)%array(ia, jr) = vxc(2)%array(ia, jr) + d2e(ia, ir, 1)*dot_proda(ia, ir)
2445         END DO !ia
2446      END DO !ir
2447!$OMP END DO NOWAIT
2448
2449      !Vxc, take the grid weight into acocunt
2450!$OMP DO SCHEDULE(STATIC)
2451      DO ir = sr, er
2452         jr = ir - sr + 1
2453         DO ia = 1, na
2454            vxc(2)%array(ia, jr) = vxc(2)%array(ia, jr)*weight(ia, ir)
2455         END DO !ia
2456      END DO !ir
2457!$OMP END DO NOWAIT
2458
2459      !Vxg, first term (to be multiplied by drhoa)
2460      deriv => xc_dset_get_derivative(deriv_set, "(rhob)(norm_drhoa)")
2461      CALL xc_derivative_get(deriv, deriv_data=d2e)
2462!$OMP DO SCHEDULE(STATIC)
2463      DO ir = sr, er
2464         jr = ir - sr + 1
2465         DO ia = 1, na
2466            tmp(ia, ir) = d2e(ia, ir, 1)*so(ia, jr)
2467         END DO !ia
2468      END DO !ir
2469!$OMP END DO NOWAIT
2470
2471      !Vxg, second term (to be multiplied by drhoa)
2472      deriv => xc_dset_get_derivative(deriv_set, "(norm_drhoa)(norm_drho)")
2473      CALL xc_derivative_get(deriv, deriv_data=d2e)
2474!$OMP DO SCHEDULE(STATIC)
2475      DO ir = sr, er
2476         DO ia = 1, na
2477            tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_proda(ia, ir)
2478         END DO !ia
2479      END DO !ir
2480!$OMP END DO NOWAIT
2481
2482      !Vxg, third term (to be multiplied by drhoa)
2483      deriv => xc_dset_get_derivative(deriv_set, "(norm_drhoa)(norm_drhob)")
2484      CALL xc_derivative_get(deriv, deriv_data=d2e)
2485!$OMP DO SCHEDULE(STATIC)
2486      DO ir = sr, er
2487         DO ia = 1, na
2488            tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_prodb(ia, ir)
2489         END DO !ia
2490      END DO !ir
2491!$OMP END DO NOWAIT
2492
2493      !put tmp*drhoa in Vxg
2494      DO dir = 1, 3
2495!$OMP DO SCHEDULE(STATIC)
2496         DO ir = sr, er
2497            jr = ir - sr + 1
2498            DO ia = 1, na
2499               vxg(2)%array(ia, jr, dir) = tmp(ia, ir)*rho_set%drhoa(dir)%array(ia, ir, 1)
2500            END DO !ia
2501         END DO !ir
2502!$OMP END DO NOWAIT
2503      END DO !dir
2504
2505      !Vxg, fourth term (to be multiplied by drhob)
2506      deriv => xc_dset_get_derivative(deriv_set, "(rhob)(norm_drho)")
2507      CALL xc_derivative_get(deriv, deriv_data=d2e)
2508!$OMP DO SCHEDULE(STATIC)
2509      DO ir = sr, er
2510         jr = ir - sr + 1
2511         DO ia = 1, na
2512            tmp(ia, ir) = d2e(ia, ir, 1)*so(ia, jr)
2513         END DO !ia
2514      END DO !ir
2515!$OMP END DO NOWAIT
2516
2517      !Vxg, fifth term (to be multiplied by drhob)
2518      deriv => xc_dset_get_derivative(deriv_set, "(norm_drho)(norm_drhob)")
2519      CALL xc_derivative_get(deriv, deriv_data=d2e)
2520!$OMP DO SCHEDULE(STATIC)
2521      DO ir = sr, er
2522         DO ia = 1, na
2523            tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_prodb(ia, ir)
2524         END DO !ia
2525      END DO !ir
2526!$OMP END DO NOWAIT
2527
2528      !Vxg, sixth term (to be multiplied by drhob)
2529      deriv => xc_dset_get_derivative(deriv_set, "(norm_drho)(norm_drho)")
2530      CALL xc_derivative_get(deriv, deriv_data=d2e)
2531!$OMP DO SCHEDULE(STATIC)
2532      DO ir = sr, er
2533         DO ia = 1, na
2534            tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_proda(ia, ir)
2535         END DO !ia
2536      END DO !ir
2537!$OMP END DO NOWAIT
2538
2539      !put tmp*drhob in Vxg
2540      DO dir = 1, 3
2541!$OMP DO SCHEDULE(STATIC)
2542         DO ir = sr, er
2543            jr = ir - sr + 1
2544            DO ia = 1, na
2545               vxg(2)%array(ia, jr, dir) = vxg(2)%array(ia, jr, dir) + tmp(ia, ir)*rho_set%drhob(dir)%array(ia, ir, 1)
2546            END DO
2547         END DO
2548!$OMP END DO NOWAIT
2549      END DO
2550
2551      !Vxg, last term
2552      deriv => xc_dset_get_derivative(deriv_set, "(norm_drho)")
2553      CALL xc_derivative_get(deriv, deriv_data=d1e)
2554      DO dir = 1, 3
2555!$OMP DO SCHEDULE(STATIC)
2556         DO ir = sr, er
2557            jr = ir - sr + 1
2558            DO ia = 1, na
2559               vxg(2)%array(ia, jr, dir) = vxg(2)%array(ia, jr, dir) + d1e(ia, ir, 1)*dso(ia, jr, dir)
2560            END DO !ia
2561         END DO !ir
2562!$OMP END DO NOWAIT
2563      END DO !dir
2564
2565      !Vxg, take the grid weight into account
2566      DO dir = 1, 3
2567!$OMP DO SCHEDULE(STATIC)
2568         DO ir = sr, er
2569            jr = ir - sr + 1
2570            DO ia = 1, na
2571               vxg(2)%array(ia, jr, dir) = vxg(2)%array(ia, jr, dir)*weight(ia, ir)
2572            END DO !ia
2573         END DO !ir
2574!$OMP END DO NOWAIT
2575      END DO !dir
2576
2577      !Deal with f_bb, if so required
2578      IF (ub == 3) THEN
2579
2580         !Vxc, first term
2581         deriv => xc_dset_get_derivative(deriv_set, "(rhob)(norm_drhob)")
2582         CALL xc_derivative_get(deriv, deriv_data=d2e)
2583!$OMP DO SCHEDULE(STATIC)
2584         DO ir = sr, er
2585            jr = ir - sr + 1
2586            DO ia = 1, na
2587               vxc(3)%array(ia, jr) = d2e(ia, ir, 1)*dot_prodb(ia, ir)
2588            END DO !ia
2589         END DO !ir
2590!$OMP END DO NOWAIT
2591
2592         !Vxc, second term
2593         deriv => xc_dset_get_derivative(deriv_set, "(rhob)(norm_drho)")
2594         CALL xc_derivative_get(deriv, deriv_data=d2e)
2595!$OMP DO SCHEDULE(STATIC)
2596         DO ir = sr, er
2597            jr = ir - sr + 1
2598            DO ia = 1, na
2599               vxc(3)%array(ia, jr) = vxc(3)%array(ia, jr) + d2e(ia, ir, 1)*dot_proda(ia, ir)
2600            END DO !i
2601         END DO !ir
2602!$OMP END DO NOWAIT
2603
2604         !Vxc, take the grid weight into acocunt
2605!$OMP DO SCHEDULE(STATIC)
2606         DO ir = sr, er
2607            jr = ir - sr + 1
2608            DO ia = 1, na
2609               vxc(3)%array(ia, jr) = vxc(3)%array(ia, jr)*weight(ia, ir)
2610            END DO !ia
2611         END DO !ir
2612!$OMP END DO NOWAIT
2613
2614         !Vxg, first term (to be multiplied by drhob)
2615         deriv => xc_dset_get_derivative(deriv_set, "(rhob)(norm_drhob)")
2616         CALL xc_derivative_get(deriv, deriv_data=d2e)
2617!$OMP DO SCHEDULE(STATIC)
2618         DO ir = sr, er
2619            jr = ir - sr + 1
2620            DO ia = 1, na
2621               tmp(ia, ir) = d2e(ia, ir, 1)*so(ia, jr)
2622            END DO !ia
2623         END DO !ir
2624!$OMP END DO NOWAIT
2625
2626         !Vxg, second term (to be multiplied by drhob)
2627         deriv => xc_dset_get_derivative(deriv_set, "(norm_drhob)(norm_drho)")
2628         CALL xc_derivative_get(deriv, deriv_data=d2e)
2629!$OMP DO SCHEDULE(STATIC)
2630         DO ir = sr, er
2631            DO ia = 1, na
2632               tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_proda(ia, ir)
2633            END DO !ia
2634         END DO !ir
2635!$OMP END DO NOWAIT
2636
2637         !Vxg, third term (to be multiplied by drhob)
2638         deriv => xc_dset_get_derivative(deriv_set, "(norm_drhob)(norm_drhob)")
2639         CALL xc_derivative_get(deriv, deriv_data=d2e)
2640!$OMP DO SCHEDULE(STATIC)
2641         DO ir = sr, er
2642            DO ia = 1, na
2643               tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_prodb(ia, ir)
2644            END DO !ia
2645         END DO !ir
2646!$OMP END DO NOWAIT
2647
2648         !Vxg, fourth term (to be multiplied by drhob)
2649         deriv => xc_dset_get_derivative(deriv_set, "(norm_drhob)")
2650         CALL xc_derivative_get(deriv, deriv_data=d1e)
2651!$OMP DO SCHEDULE(STATIC)
2652         DO ir = sr, er
2653            DO ia = 1, na
2654               tmp(ia, ir) = tmp(ia, ir) - d1e(ia, ir, 1)*dot_prodb(ia, ir) &
2655                             /MAX(norm_drhob(ia, ir, 1), rho_set%drho_cutoff)**2
2656            END DO !ia
2657         END DO !ir
2658!$OMP END DO NOWAIT
2659
2660         !put tmp*drhob in Vxg (so that we can reuse it for drhoa terms)
2661         DO dir = 1, 3
2662!$OMP DO SCHEDULE(STATIC)
2663            DO ir = sr, er
2664               jr = ir - sr + 1
2665               DO ia = 1, na
2666                  vxg(3)%array(ia, jr, dir) = tmp(ia, ir)*rho_set%drhob(dir)%array(ia, ir, 1)
2667               END DO !ia
2668            END DO !ir
2669!$OMP END DO NOWAIT
2670         END DO !dir
2671
2672         !Vxg, fifth term (to be multiplied by drhoa)
2673         deriv => xc_dset_get_derivative(deriv_set, "(rhob)(norm_drho)")
2674         CALL xc_derivative_get(deriv, deriv_data=d2e)
2675!$OMP DO SCHEDULE(STATIC)
2676         DO ir = sr, er
2677            jr = ir - sr + 1
2678            DO ia = 1, na
2679               tmp(ia, ir) = d2e(ia, ir, 1)*so(ia, jr)
2680            END DO !ia
2681         END DO !ir
2682!$OMP END DO NOWAIT
2683
2684         !Vxg, sixth term (to be multiplied by drhoa)
2685         deriv => xc_dset_get_derivative(deriv_set, "(norm_drhob)(norm_drho)")
2686         CALL xc_derivative_get(deriv, deriv_data=d2e)
2687!$OMP DO SCHEDULE(STATIC)
2688         DO ir = sr, er
2689            DO ia = 1, na
2690               tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_prodb(ia, ir)
2691            END DO !ia
2692         END DO !ir
2693!$OMP END DO NOWAIT
2694
2695         !Vxg, seventh term (to be multiplied by drhoa)
2696         deriv => xc_dset_get_derivative(deriv_set, "(norm_drho)(norm_drho)")
2697         CALL xc_derivative_get(deriv, deriv_data=d2e)
2698!$OMP DO SCHEDULE(STATIC)
2699         DO ir = sr, er
2700            DO ia = 1, na
2701               tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_proda(ia, ir)
2702            END DO !ia
2703         END DO !ir
2704!$OMP END DO NOWAIT
2705
2706         !put tmp*drhoa in Vxg
2707         DO dir = 1, 3
2708!$OMP DO SCHEDULE(STATIC)
2709            DO ir = sr, er
2710               jr = ir - sr + 1
2711               DO ia = 1, na
2712                  vxg(3)%array(ia, jr, dir) = vxg(3)%array(ia, jr, dir) + tmp(ia, ir)*rho_set%drhoa(dir)%array(ia, ir, 1)
2713               END DO !ia
2714            END DO !ir
2715!$OMP END DO NOWAIT
2716         END DO !dir
2717
2718         !Vxg, last term
2719         deriv => xc_dset_get_derivative(deriv_set, "(norm_drhob)")
2720         CALL xc_derivative_get(deriv, deriv_data=d1e)
2721         DO dir = 1, 3
2722!$OMP DO SCHEDULE(STATIC)
2723            DO ir = sr, er
2724               jr = ir - sr + 1
2725               DO ia = 1, na
2726                  vxg(3)%array(ia, jr, dir) = vxg(3)%array(ia, jr, dir) + d1e(ia, ir, 1)*dso(ia, jr, dir)
2727               END DO !ia
2728            END DO !ir
2729!$OMP END DO NOWAIT
2730         END DO !dir
2731
2732         !Vxg, take the grid weight into account
2733         DO dir = 1, 3
2734!$OMP DO SCHEDULE(STATIC)
2735            DO ir = sr, er
2736               jr = ir - sr + 1
2737               DO ia = 1, na
2738                  vxg(3)%array(ia, jr, dir) = vxg(3)%array(ia, jr, dir)*weight(ia, ir)
2739               END DO !ia
2740            END DO !ir
2741!$OMP END DO NOWAIT
2742         END DO !dir
2743
2744      END IF !f_bb
2745
2746!$OMP END PARALLEL
2747
2748      CALL timestop(handle)
2749
2750   END SUBROUTINE get_vxc_vxg
2751
2752! **************************************************************************************************
2753!> \brief Integrate the fxc kernel in the spin-conserving case, be it closed- or open-shell
2754!> \param int_fxc the array containing the (P|fxc|Q) integrals
2755!> \param iatom the index of the current excited atom
2756!> \param ikind the index of the current excited kind
2757!> \param deriv_set the set of functional derivatives
2758!> \param bounds local bounds of the atomic grid
2759!> \param xas_atom_env ...
2760!> \param qs_env ...
2761! **************************************************************************************************
2762   SUBROUTINE integrate_sc_fxc(int_fxc, iatom, ikind, deriv_set, bounds, xas_atom_env, qs_env)
2763
2764      TYPE(cp_2d_r_p_type), DIMENSION(:, :), POINTER     :: int_fxc
2765      INTEGER, INTENT(IN)                                :: iatom, ikind
2766      TYPE(xc_derivative_set_type), POINTER              :: deriv_set
2767      INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds
2768      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
2769      TYPE(qs_environment_type), POINTER                 :: qs_env
2770
2771      CHARACTER(len=*), PARAMETER :: routineN = 'integrate_sc_fxc', &
2772         routineP = moduleN//':'//routineN
2773
2774      INTEGER                                            :: er, i, maxso, na, nr, nset, nsotot, &
2775                                                            nspins, ri_nsgf, sr, ub
2776      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: fxc, int_so
2777      REAL(dp), DIMENSION(:, :), POINTER                 :: ri_sphi_so
2778      TYPE(cp_3d_r_p_type), ALLOCATABLE, DIMENSION(:)    :: d2e
2779      TYPE(dft_control_type), POINTER                    :: dft_control
2780      TYPE(grid_atom_type), POINTER                      :: grid_atom
2781      TYPE(gto_basis_set_type), POINTER                  :: ri_basis
2782      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
2783      TYPE(xc_derivative_type), POINTER                  :: deriv
2784
2785      NULLIFY (grid_atom, deriv, ri_basis, ri_sphi_so, qs_kind_set, dft_control)
2786
2787      ! Initialization
2788      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set, dft_control=dft_control)
2789      grid_atom => xas_atom_env%grid_atom_set(ikind)%grid_atom
2790      na = grid_atom%ng_sphere
2791      sr = bounds(1, 2); er = bounds(2, 2)
2792      nr = er - er + 1
2793      CALL get_qs_kind(qs_kind_set(ikind), basis_set=ri_basis, basis_type="RI_XAS")
2794      CALL get_gto_basis_set(ri_basis, nset=nset, maxso=maxso, nsgf=ri_nsgf)
2795      nsotot = nset*maxso
2796      ri_sphi_so => xas_atom_env%ri_sphi_so(ikind)%array
2797      nspins = dft_control%nspins
2798
2799      ! Get the second derivatives
2800      ALLOCATE (d2e(3))
2801      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(rhoa)")
2802      CALL xc_derivative_get(deriv, deriv_data=d2e(1)%array)
2803      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(rhob)")
2804      CALL xc_derivative_get(deriv, deriv_data=d2e(2)%array)
2805      deriv => xc_dset_get_derivative(deriv_set, "(rhob)(rhob)")
2806      CALL xc_derivative_get(deriv, deriv_data=d2e(3)%array)
2807
2808      ! Allocate some work arrays
2809      ALLOCATE (fxc(na, sr:er))
2810      ALLOCATE (int_so(nsotot, nsotot))
2811
2812      ! Integrate for all three derivatives, taking the grid weight into account
2813      ! If closed shell, do not need to integrate beta-beta as it is the same as alpha-alpha
2814      ub = 2; IF (nspins == 2) ub = 3
2815      DO i = 1, ub
2816
2817         int_so = 0.0_dp
2818         fxc(:, sr:er) = d2e(i)%array(:, :, 1)*grid_atom%weight(:, sr:er)
2819         CALL integrate_so_prod(int_so, fxc, ikind, bounds, xas_atom_env, qs_env)
2820
2821         !contract into sgf. Array allocated on current processor only
2822         ALLOCATE (int_fxc(iatom, i)%array(ri_nsgf, ri_nsgf))
2823         CALL contract_so2sgf(int_fxc(iatom, i)%array, int_so, ri_basis, ri_sphi_so)
2824
2825      END DO
2826
2827   END SUBROUTINE integrate_sc_fxc
2828
2829! **************************************************************************************************
2830!> \brief Integrate the fxc kernel in the spin-flip case (open-shell assumed). The spin-flip LDA
2831!>        kernel reads: fxc = 1/(rhoa - rhob) * (dE/drhoa - dE/drhob)
2832!> \param int_fxc the array containing the (P|fxc|Q) integrals
2833!> \param iatom the index of the current excited atom
2834!> \param ikind the index of the current excited kind
2835!> \param rho_set the density in the atomic grid
2836!> \param deriv_set the set of functional derivatives
2837!> \param bounds the bounds of the local frid
2838!> \param xas_atom_env ...
2839!> \param qs_env ...
2840! **************************************************************************************************
2841   SUBROUTINE integrate_sf_fxc(int_fxc, iatom, ikind, rho_set, deriv_set, bounds, xas_atom_env, qs_env)
2842
2843      TYPE(cp_2d_r_p_type), DIMENSION(:, :), POINTER     :: int_fxc
2844      INTEGER, INTENT(IN)                                :: iatom, ikind
2845      TYPE(xc_rho_set_type), POINTER                     :: rho_set
2846      TYPE(xc_derivative_set_type), POINTER              :: deriv_set
2847      INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds
2848      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
2849      TYPE(qs_environment_type), POINTER                 :: qs_env
2850
2851      CHARACTER(len=*), PARAMETER :: routineN = 'integrate_sf_fxc', &
2852         routineP = moduleN//':'//routineN
2853
2854      INTEGER                                            :: er, ia, ir, maxso, na, nr, nset, nsotot, &
2855                                                            ri_nsgf, sr
2856      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: fxc, int_so
2857      REAL(dp), DIMENSION(:, :), POINTER                 :: ri_sphi_so
2858      REAL(dp), DIMENSION(:, :, :), POINTER              :: rhoa, rhob
2859      TYPE(cp_3d_r_p_type), ALLOCATABLE, DIMENSION(:)    :: d1e, d2e
2860      TYPE(dft_control_type), POINTER                    :: dft_control
2861      TYPE(grid_atom_type), POINTER                      :: grid_atom
2862      TYPE(gto_basis_set_type), POINTER                  :: ri_basis
2863      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
2864      TYPE(xc_derivative_type), POINTER                  :: deriv
2865
2866      NULLIFY (grid_atom, deriv, ri_basis, ri_sphi_so, qs_kind_set, rhoa, rhob, dft_control)
2867
2868      ! Initialization
2869      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set, dft_control=dft_control)
2870      grid_atom => xas_atom_env%grid_atom_set(ikind)%grid_atom
2871      na = grid_atom%ng_sphere
2872      sr = bounds(1, 2); er = bounds(2, 2)
2873      nr = er - sr + 1
2874      CALL get_qs_kind(qs_kind_set(ikind), basis_set=ri_basis, basis_type="RI_XAS")
2875      CALL get_gto_basis_set(ri_basis, nset=nset, maxso=maxso, nsgf=ri_nsgf)
2876      nsotot = nset*maxso
2877      ri_sphi_so => xas_atom_env%ri_sphi_so(ikind)%array
2878      rhoa => rho_set%rhoa
2879      rhob => rho_set%rhob
2880
2881      ALLOCATE (d1e(2))
2882      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)")
2883      CALL xc_derivative_get(deriv, deriv_data=d1e(1)%array)
2884      deriv => xc_dset_get_derivative(deriv_set, "(rhob)")
2885      CALL xc_derivative_get(deriv, deriv_data=d1e(2)%array)
2886
2887      ! In case rhoa -> rhob, take the limit, which involves the (already computed) second derivatives
2888      ALLOCATE (d2e(3))
2889      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(rhoa)")
2890      CALL xc_derivative_get(deriv, deriv_data=d2e(1)%array)
2891      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(rhob)")
2892      CALL xc_derivative_get(deriv, deriv_data=d2e(2)%array)
2893      deriv => xc_dset_get_derivative(deriv_set, "(rhob)(rhob)")
2894      CALL xc_derivative_get(deriv, deriv_data=d2e(3)%array)
2895
2896      !Compute the kernel on the grid. Already take weight into acocunt there
2897      ALLOCATE (fxc(na, sr:er))
2898!$OMP PARALLEL DO COLLAPSE(2) SCHEDULE(STATIC) DEFAULT(NONE), &
2899!$OMP SHARED(grid_atom,fxc,d1e,d2e,dft_control,na,sr,er,rhoa,rhob), &
2900!$OMP PRIVATE(ia,ir)
2901      DO ir = sr, er
2902         DO ia = 1, na
2903
2904            !Need to be careful not to devide by zero. Assume that if rhoa == rhob, then
2905            !take the limit fxc = 0.5* (f_aa + f_bb - 2f_ab)
2906            IF (ABS(rhoa(ia, ir, 1) - rhob(ia, ir, 1)) > dft_control%qs_control%eps_rho_rspace) THEN
2907               fxc(ia, ir) = grid_atom%weight(ia, ir)/(rhoa(ia, ir, 1) - rhob(ia, ir, 1)) &
2908                             *(d1e(1)%array(ia, ir, 1) - d1e(2)%array(ia, ir, 1))
2909            ELSE
2910               fxc(ia, ir) = 0.5_dp*grid_atom%weight(ia, ir)* &
2911                             (d2e(1)%array(ia, ir, 1) + d2e(3)%array(ia, ir, 1) - 2._dp*d2e(2)%array(ia, ir, 1))
2912            END IF
2913
2914         END DO
2915      END DO
2916!$OMP END PARALLEL DO
2917
2918      ! Integrate wrt to so
2919      ALLOCATE (int_so(nsotot, nsotot))
2920      int_so = 0.0_dp
2921      CALL integrate_so_prod(int_so, fxc, ikind, bounds, xas_atom_env, qs_env)
2922
2923      ! Contract into sgf. Array located on current processor only
2924      ALLOCATE (int_fxc(iatom, 4)%array(ri_nsgf, ri_nsgf))
2925      CALL contract_so2sgf(int_fxc(iatom, 4)%array, int_so, ri_basis, ri_sphi_so)
2926
2927   END SUBROUTINE integrate_sf_fxc
2928
2929! **************************************************************************************************
2930!> \brief Contract spherical orbitals to spherical Gaussians (so to sgf)
2931!> \param int_sgf the arary with the sgf integrals
2932!> \param int_so the array with the so integrals (to contract)
2933!> \param basis the corresponding gto basis set
2934!> \param sphi_so the contraction coefficients for the s:
2935! **************************************************************************************************
2936   SUBROUTINE contract_so2sgf(int_sgf, int_so, basis, sphi_so)
2937
2938      REAL(dp), DIMENSION(:, :)                          :: int_sgf, int_so
2939      TYPE(gto_basis_set_type), POINTER                  :: basis
2940      REAL(dp), DIMENSION(:, :)                          :: sphi_so
2941
2942      CHARACTER(len=*), PARAMETER :: routineN = 'contract_so2sgf', &
2943         routineP = moduleN//':'//routineN
2944
2945      INTEGER                                            :: iset, jset, maxso, nset, nsoi, nsoj, &
2946                                                            sgfi, sgfj, starti, startj
2947      INTEGER, DIMENSION(:), POINTER                     :: lmax, npgf, nsgf_set
2948      INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf
2949
2950      NULLIFY (nsgf_set, npgf, lmax, first_sgf)
2951
2952      CALL get_gto_basis_set(basis, nset=nset, maxso=maxso, nsgf_set=nsgf_set, first_sgf=first_sgf, &
2953                             npgf=npgf, lmax=lmax)
2954
2955      DO iset = 1, nset
2956         starti = (iset - 1)*maxso + 1
2957         nsoi = npgf(iset)*nsoset(lmax(iset))
2958         sgfi = first_sgf(1, iset)
2959
2960         DO jset = 1, nset
2961            startj = (jset - 1)*maxso + 1
2962            nsoj = npgf(jset)*nsoset(lmax(jset))
2963            sgfj = first_sgf(1, jset)
2964
2965            CALL ab_contract(int_sgf(sgfi:sgfi + nsgf_set(iset) - 1, sgfj:sgfj + nsgf_set(jset) - 1), &
2966                             int_so(starti:starti + nsoi - 1, startj:startj + nsoj - 1), &
2967                             sphi_so(:, sgfi:), sphi_so(:, sgfj:), nsoi, nsoj, &
2968                             nsgf_set(iset), nsgf_set(jset))
2969         END DO !jset
2970      END DO !iset
2971
2972   END SUBROUTINE contract_so2sgf
2973
2974! **************************************************************************************************
2975!> \brief Integrate the product of spherical gaussian orbitals with the xc kernel on the atomic grid
2976!> \param intso the integral in terms of spherical orbitals
2977!> \param fxc the xc kernel at each grid point
2978!> \param ikind the kind of the atom we integrate for
2979!> \param bounds the local bounds of the atomic grid
2980!> \param xas_atom_env ...
2981!> \param qs_env ...
2982!> \note Largely copied from gaVxcgb_noGC. Rewritten here because we need our own atomic grid,
2983!>       harmonics, basis set and we do not need the soft vxc. Could have tweaked the original, but
2984!>       it would have been messy. Also we do not need rho_atom (too big and fancy for us)
2985!>       We also go over the whole range of angular momentum l
2986! **************************************************************************************************
2987   SUBROUTINE integrate_so_prod(intso, fxc, ikind, bounds, xas_atom_env, qs_env)
2988
2989      REAL(dp), DIMENSION(:, :), INTENT(INOUT)           :: intso
2990      REAL(dp), DIMENSION(:, :), INTENT(IN)              :: fxc
2991      INTEGER, INTENT(IN)                                :: ikind
2992      INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds
2993      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
2994      TYPE(qs_environment_type), POINTER                 :: qs_env
2995
2996      CHARACTER(len=*), PARAMETER :: routineN = 'integrate_so_prod', &
2997         routineP = moduleN//':'//routineN
2998
2999      INTEGER :: er, handle, ia, ic, icg, ipgf1, ipgf2, iset1, iset2, iso, iso1, iso2, l, ld, &
3000         lmax12, lmin12, m1, m2, max_iso_not0, max_iso_not0_local, max_s_harm, maxl, maxso, n1, &
3001         n2, na, ngau1, ngau2, nngau1, nr, nset, size1, sr
3002      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: cg_n_list
3003      INTEGER, ALLOCATABLE, DIMENSION(:, :, :)           :: cg_list
3004      INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf
3005      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: g1, g2
3006      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: gfxcg, gg, matso
3007      REAL(dp), DIMENSION(:, :), POINTER                 :: zet
3008      REAL(dp), DIMENSION(:, :, :), POINTER              :: my_CG
3009      TYPE(grid_atom_type), POINTER                      :: grid_atom
3010      TYPE(gto_basis_set_type), POINTER                  :: ri_basis
3011      TYPE(harmonics_atom_type), POINTER                 :: harmonics
3012      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
3013
3014      CALL timeset(routineN, handle)
3015
3016      NULLIFY (grid_atom, harmonics, ri_basis, qs_kind_set, lmax, lmin, npgf, zet, my_CG)
3017
3018!  Initialization
3019      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set)
3020      CALL get_qs_kind(qs_kind_set(ikind), basis_set=ri_basis, basis_type="RI_XAS")
3021      grid_atom => xas_atom_env%grid_atom_set(ikind)%grid_atom
3022      harmonics => xas_atom_env%harmonics_atom_set(ikind)%harmonics_atom
3023
3024      CALL get_gto_basis_set(ri_basis, lmax=lmax, lmin=lmin, maxso=maxso, maxl=maxl, npgf=npgf, &
3025                             nset=nset, zet=zet)
3026
3027      na = grid_atom%ng_sphere
3028      sr = bounds(1, 2); er = bounds(2, 2)
3029      nr = er - sr + 1
3030      my_CG => harmonics%my_CG
3031      max_iso_not0 = harmonics%max_iso_not0
3032      max_s_harm = harmonics%max_s_harm
3033      CPASSERT(2*maxl .LE. indso(1, max_iso_not0))
3034
3035      ALLOCATE (g1(nr), g2(nr), gg(nr, 0:2*maxl))
3036      ALLOCATE (gfxcg(na, 0:2*maxl))
3037      ALLOCATE (matso(nsoset(maxl), nsoset(maxl)))
3038      ALLOCATE (cg_list(2, nsoset(maxl)**2, max_s_harm), cg_n_list(max_s_harm))
3039
3040      g1 = 0.0_dp
3041      g2 = 0.0_dp
3042      m1 = 0
3043!  Loop over the product of so
3044      DO iset1 = 1, nset
3045         n1 = nsoset(lmax(iset1))
3046         m2 = 0
3047         DO iset2 = 1, nset
3048            CALL get_none0_cg_list(my_CG, lmin(iset1), lmax(iset1), lmin(iset2), lmax(iset2), &
3049                                   max_s_harm, lmax(iset1) + lmax(iset2), cg_list, cg_n_list, &
3050                                   max_iso_not0_local)
3051            CPASSERT(max_iso_not0_local .LE. max_iso_not0)
3052
3053            n2 = nsoset(lmax(iset2))
3054            DO ipgf1 = 1, npgf(iset1)
3055               ngau1 = n1*(ipgf1 - 1) + m1
3056               size1 = nsoset(lmax(iset1)) - nsoset(lmin(iset1) - 1)
3057               nngau1 = nsoset(lmin(iset1) - 1) + ngau1
3058
3059               g1(:) = EXP(-zet(ipgf1, iset1)*grid_atom%rad2(sr:er))
3060               DO ipgf2 = 1, npgf(iset2)
3061                  ngau2 = n2*(ipgf2 - 1) + m2
3062
3063                  g2(:) = EXP(-zet(ipgf2, iset2)*grid_atom%rad2(sr:er))
3064                  lmin12 = lmin(iset1) + lmin(iset2)
3065                  lmax12 = lmax(iset1) + lmax(iset2)
3066
3067                  !get the gaussian product
3068                  gg = 0.0_dp
3069                  IF (lmin12 == 0) THEN
3070                     gg(:, lmin12) = g1(:)*g2(:)
3071                  ELSE
3072                     gg(:, lmin12) = grid_atom%rad2l(sr:er, lmin12)*g1(:)*g2(:)
3073                  END IF
3074
3075                  DO l = lmin12 + 1, lmax12
3076                     gg(:, l) = grid_atom%rad(sr:er)*gg(:, l - 1)
3077                  END DO
3078
3079                  ld = lmax12 + 1
3080                  CALL dgemm('N', 'N', na, ld, nr, 1.0_dp, fxc(1:na, 1:nr), na, gg(:, 0:lmax12), &
3081                             nr, 0.0_dp, gfxcg(1:na, 0:lmax12), na)
3082
3083                  !integrate
3084                  matso = 0.0_dp
3085                  DO iso = 1, max_iso_not0_local
3086                     DO icg = 1, cg_n_list(iso)
3087                        iso1 = cg_list(1, icg, iso)
3088                        iso2 = cg_list(2, icg, iso)
3089                        l = indso(1, iso1) + indso(1, iso2)
3090
3091                        DO ia = 1, na
3092                           matso(iso1, iso2) = matso(iso1, iso2) + gfxcg(ia, l)* &
3093                                               my_CG(iso1, iso2, iso)*harmonics%slm(ia, iso)
3094                        END DO !ia
3095                     END DO !icg
3096                  END DO !iso
3097
3098                  !write in integral matrix
3099                  DO ic = nsoset(lmin(iset2) - 1) + 1, nsoset(lmax(iset2))
3100                     iso1 = nsoset(lmin(iset1) - 1) + 1
3101                     iso2 = ngau2 + ic
3102                     CALL daxpy(size1, 1.0_dp, matso(iso1, ic), 1, intso(nngau1 + 1, iso2), 1)
3103                  END DO !ic
3104
3105               END DO !ipgf2
3106            END DO ! ipgf1
3107            m2 = m2 + maxso
3108         END DO !iset2
3109         m1 = m1 + maxso
3110      END DO !iset1
3111
3112      CALL timestop(handle)
3113
3114   END SUBROUTINE integrate_so_prod
3115
3116! **************************************************************************************************
3117!> \brief This routine computes the integral of a potential V wrt the derivitive of the spherical
3118!>        orbitals, that is <df/dx|V|dg/dy> on the atomic grid.
3119!> \param intso the integral in terms of the spherical orbitals (well, their derivitive)
3120!> \param V the potential (put on the grid and weighted) to integrate
3121!> \param ikind the atomic kind for which we integrate
3122!> \param xas_atom_env ...
3123!> \param qs_env ...
3124!> \note The atomic grids are taken fron xas_atom_env and the orbitals are the normal ones. Ok since
3125!>       the grid and spherical harmonics for those grids are at least as good as the GAPW ones
3126! **************************************************************************************************
3127   SUBROUTINE integrate_so_dxdy_prod(intso, V, ikind, xas_atom_env, qs_env)
3128
3129      REAL(dp), DIMENSION(:, :, :), INTENT(INOUT)        :: intso
3130      REAL(dp), DIMENSION(:, :), INTENT(IN)              :: V
3131      INTEGER, INTENT(IN)                                :: ikind
3132      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
3133      TYPE(qs_environment_type), POINTER                 :: qs_env
3134
3135      CHARACTER(len=*), PARAMETER :: routineN = 'integrate_so_dxdy_prod', &
3136         routineP = moduleN//':'//routineN
3137
3138      INTEGER                                            :: i, ipgf, iset, iso, j, jpgf, jset, jso, &
3139                                                            k, l, maxso, na, nr, nset, starti, &
3140                                                            startj
3141      INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf
3142      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: fga, fgr, r1, r2, work
3143      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: a1, a2
3144      REAL(dp), DIMENSION(:, :), POINTER                 :: slm, zet
3145      REAL(dp), DIMENSION(:, :, :), POINTER              :: dslm_dxyz
3146      TYPE(grid_atom_type), POINTER                      :: grid_atom
3147      TYPE(gto_basis_set_type), POINTER                  :: basis
3148      TYPE(harmonics_atom_type), POINTER                 :: harmonics
3149      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
3150
3151      NULLIFY (grid_atom, harmonics, basis, qs_kind_set, dslm_dxyz, slm, lmin, lmax, npgf, zet)
3152
3153!  Getting what we need from the atom_env
3154      harmonics => xas_atom_env%harmonics_atom_set(ikind)%harmonics_atom
3155      grid_atom => xas_atom_env%grid_atom_set(ikind)%grid_atom
3156
3157      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set)
3158      CALL get_qs_kind(qs_kind_set(ikind), basis_set=basis, basis_type="ORB")
3159
3160      na = grid_atom%ng_sphere
3161      nr = grid_atom%nr
3162
3163      slm => harmonics%slm
3164      dslm_dxyz => harmonics%dslm_dxyz
3165
3166!  Getting what we need from the orbital basis
3167      CALL get_gto_basis_set(gto_basis_set=basis, lmax=lmax, lmin=lmin, &
3168                             maxso=maxso, npgf=npgf, nset=nset, zet=zet)
3169
3170!  Separate the functions into purely r and purely angular parts, compute them all
3171!  and use matrix mutliplication for the integral. We use f for x derivative ang g for y
3172
3173      ! Separating the functions. Note that the radial part is the same for x and y derivatives
3174      ALLOCATE (a1(na, nset*maxso, 3), a2(na, nset*maxso, 3))
3175      ALLOCATE (r1(nr, nset*maxso), r2(nr, nset*maxso))
3176      a1 = 0.0_dp; a2 = 0.0_dp
3177      r1 = 0.0_dp; r2 = 0.0_dp
3178
3179      DO iset = 1, nset
3180         DO ipgf = 1, npgf(iset)
3181            starti = (iset - 1)*maxso + (ipgf - 1)*nsoset(lmax(iset))
3182            DO iso = nsoset(lmin(iset) - 1) + 1, nsoset(lmax(iset))
3183               l = indso(1, iso)
3184
3185               ! The x derivitive of the spherical orbital, divided in angular and radial parts
3186               ! Two of each are needed because d/dx(r^l Y_lm) * exp(-al*r^2) + r^l Y_lm * ! d/dx(exp-al*r^2)
3187
3188               ! the purely radial part of d/dx(r^l Y_lm) * exp(-al*r^2) (same for y)
3189               r1(1:nr, starti + iso) = grid_atom%rad(1:nr)**(l - 1)*EXP(-zet(ipgf, iset)*grid_atom%rad2(1:nr))
3190
3191               ! the purely radial part of r^l Y_lm * d/dx(exp-al*r^2) (same for y)
3192               r2(1:nr, starti + iso) = -2.0_dp*zet(ipgf, iset)*grid_atom%rad(1:nr)**(l + 1) &
3193                                        *EXP(-zet(ipgf, iset)*grid_atom%rad2(1:nr))
3194
3195               DO i = 1, 3
3196                  ! the purely angular part of d/dx(r^l Y_lm) * exp(-al*r^2)
3197                  a1(1:na, starti + iso, i) = dslm_dxyz(i, 1:na, iso)
3198
3199                  ! the purely angular part of r^l Y_lm * d/dx(exp-al*r^2)
3200                  a2(1:na, starti + iso, i) = harmonics%a(i, 1:na)*slm(1:na, iso)
3201               END DO
3202
3203            END DO !iso
3204         END DO !ipgf
3205      END DO !iset
3206
3207      ! Do the integration in terms of so using matrix products
3208      intso = 0.0_dp
3209      ALLOCATE (fga(na, 1))
3210      ALLOCATE (fgr(nr, 1))
3211      ALLOCATE (work(na, 1))
3212      fga = 0.0_dp; fgr = 0.0_dp; work = 0.0_dp
3213
3214      DO iset = 1, nset
3215         DO jset = 1, nset
3216            DO ipgf = 1, npgf(iset)
3217               starti = (iset - 1)*maxso + (ipgf - 1)*nsoset(lmax(iset))
3218               DO jpgf = 1, npgf(jset)
3219                  startj = (jset - 1)*maxso + (jpgf - 1)*nsoset(lmax(jset))
3220
3221                  DO i = 1, 3
3222                     j = MOD(i, 3) + 1
3223                     k = MOD(i + 1, 3) + 1
3224
3225                     DO iso = nsoset(lmin(iset) - 1) + 1, nsoset(lmax(iset))
3226                        DO jso = nsoset(lmin(jset) - 1) + 1, nsoset(lmax(jset))
3227
3228                           !Two component per function => 4 terms in total
3229
3230                           ! take r1*a1(j) * V * r1*a1(k)
3231                           fgr(1:nr, 1) = r1(1:nr, starti + iso)*r1(1:nr, startj + jso)
3232                           fga(1:na, 1) = a1(1:na, starti + iso, j)*a1(1:na, startj + jso, k)
3233
3234                           CALL dgemm('N', 'N', na, 1, nr, 1.0_dp, V, na, fgr, nr, 0.0_dp, work, na)
3235                           CALL dgemm('T', 'N', 1, 1, na, 1.0_dp, work, na, fga, na, 0.0_dp, &
3236                                      intso(starti + iso, startj + jso, i), 1)
3237
3238                           ! add r1*a1(j) * V * r2*a2(k)
3239                           fgr(1:nr, 1) = r1(1:nr, starti + iso)*r2(1:nr, startj + jso)
3240                           fga(1:na, 1) = a1(1:na, starti + iso, j)*a2(1:na, startj + jso, k)
3241
3242                           CALL dgemm('N', 'N', na, 1, nr, 1.0_dp, V, na, fgr, nr, 0.0_dp, work, na)
3243                           CALL dgemm('T', 'N', 1, 1, na, 1.0_dp, work, na, fga, na, 1.0_dp, &
3244                                      intso(starti + iso, startj + jso, i), 1)
3245
3246                           ! add r2*a2(j) * V * r1*a1(k)
3247                           fgr(1:nr, 1) = r2(1:nr, starti + iso)*r1(1:nr, startj + jso)
3248                           fga(1:na, 1) = a2(1:na, starti + iso, j)*a1(1:na, startj + jso, k)
3249
3250                           CALL dgemm('N', 'N', na, 1, nr, 1.0_dp, V, na, fgr, nr, 0.0_dp, work, na)
3251                           CALL dgemm('T', 'N', 1, 1, na, 1.0_dp, work, na, fga, na, 1.0_dp, &
3252                                      intso(starti + iso, startj + jso, i), 1)
3253
3254                           ! add the last term: r2*a2(j) * V * r2*a2(k)
3255                           fgr(1:nr, 1) = r2(1:nr, starti + iso)*r2(1:nr, startj + jso)
3256                           fga(1:na, 1) = a2(1:na, starti + iso, j)*a2(1:na, startj + jso, k)
3257
3258                           CALL dgemm('N', 'N', na, 1, nr, 1.0_dp, V, na, fgr, nr, 0.0_dp, work, na)
3259                           CALL dgemm('T', 'N', 1, 1, na, 1.0_dp, work, na, fga, na, 1.0_dp, &
3260                                      intso(starti + iso, startj + jso, i), 1)
3261
3262                        END DO !jso
3263                     END DO !iso
3264
3265                  END DO !i
3266               END DO !jpgf
3267            END DO !ipgf
3268         END DO !jset
3269      END DO !iset
3270
3271      DO i = 1, 3
3272         intso(:, :, i) = intso(:, :, i) - TRANSPOSE(intso(:, :, i))
3273      END DO
3274
3275   END SUBROUTINE integrate_so_dxdy_prod
3276
3277! **************************************************************************************************
3278!> \brief Computes the SOC matrix elements with respect to the ORB basis set for each atomic kind
3279!>        and put them as the block diagonal of dbcsr_matrix
3280!> \param matrix_soc the matrix where the SOC is stored
3281!> \param xas_atom_env ...
3282!> \param qs_env ...
3283!> \note We compute: <da_dx|V\(4c^2-2V)|db_dy> - <da_dy|V\(4c^2-2V)|db_dx>, where V is a model
3284!>       potential on the atomic grid. What we get is purely imaginary
3285! **************************************************************************************************
3286   SUBROUTINE integrate_soc_atoms(matrix_soc, xas_atom_env, qs_env)
3287
3288      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_soc
3289      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
3290      TYPE(qs_environment_type), POINTER                 :: qs_env
3291
3292      CHARACTER(len=*), PARAMETER :: routineN = 'integrate_soc_atoms', &
3293         routineP = moduleN//':'//routineN
3294
3295      INTEGER                                            :: blk, handle, i, iat, ikind, ir, jat, &
3296                                                            maxso, na, nkind, nr, nset, nsgf
3297      REAL(dp)                                           :: zeff
3298      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: Vr
3299      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: V
3300      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: intso
3301      REAL(dp), DIMENSION(:, :), POINTER                 :: sphi_so
3302      REAL(dp), DIMENSION(:, :, :), POINTER              :: intsgf
3303      TYPE(cp_3d_r_p_type), DIMENSION(:), POINTER        :: int_soc
3304      TYPE(dbcsr_iterator_type)                          :: iter
3305      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
3306      TYPE(grid_atom_type), POINTER                      :: grid
3307      TYPE(gto_basis_set_type), POINTER                  :: basis
3308      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
3309      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
3310
3311      NULLIFY (int_soc, basis, qs_kind_set, sphi_so, matrix_s)
3312      NULLIFY (particle_set)
3313
3314      CALL timeset(routineN, handle)
3315
3316!  Initialization
3317      CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, matrix_s=matrix_s, &
3318                      particle_set=particle_set)
3319      ALLOCATE (int_soc(nkind))
3320
3321!  Loop over the kinds to compute the integrals
3322      DO ikind = 1, nkind
3323
3324         CALL get_qs_kind(qs_kind_set(ikind), basis_set=basis, basis_type="ORB", zeff=zeff)
3325         CALL get_gto_basis_set(basis, nset=nset, maxso=maxso)
3326         ALLOCATE (intso(nset*maxso, nset*maxso, 3))
3327
3328         ! compute the model potential on the grid
3329         grid => xas_atom_env%grid_atom_set(ikind)%grid_atom
3330         nr = grid%nr
3331         na = grid%ng_sphere
3332         ALLOCATE (Vr(nr))
3333         !TODO: do we need potential contribution from neighboring atoms ?!?
3334         CALL calculate_model_potential(Vr, grid, zeff)
3335
3336         !Compute V/(4c^2-2V) and weight it
3337         ALLOCATE (V(na, nr))
3338         V = 0.0_dp
3339         DO ir = 1, nr
3340            CALL daxpy(na, Vr(ir)/(4.0_dp*c_light_au**2 - 2.0_dp*Vr(ir)), grid%weight(1:na, ir), 1, &
3341                       V(1:na, ir), 1)
3342         END DO
3343         DEALLOCATE (Vr)
3344
3345         ! compute the integral <da_dx|...|db_dy> in terms of so
3346         CALL integrate_so_dxdy_prod(intso, V, ikind, xas_atom_env, qs_env)
3347         DEALLOCATE (V)
3348
3349         ! contract in terms of sgf
3350         CALL get_gto_basis_set(basis, nsgf=nsgf)
3351         ALLOCATE (int_soc(ikind)%array(nsgf, nsgf, 3))
3352         intsgf => int_soc(ikind)%array
3353         sphi_so => xas_atom_env%orb_sphi_so(ikind)%array
3354         intsgf = 0.0_dp
3355
3356         DO i = 1, 3
3357            CALL contract_so2sgf(intsgf(:, :, i), intso(:, :, i), basis, sphi_so)
3358         END DO
3359
3360         DEALLOCATE (intso)
3361      END DO !ikind
3362
3363!  Build the matrix_soc based on the matrix_s (but anti-symmetric)
3364      DO i = 1, 3
3365         CALL dbcsr_create(matrix_soc(i)%matrix, name="SOC MATRIX", template=matrix_s(1)%matrix, &
3366                           matrix_type="A")
3367      END DO
3368
3369!  Iterate over its diagonal blocks and fill=it
3370      CALL dbcsr_iterator_start(iter, matrix_s(1)%matrix)
3371      DO WHILE (dbcsr_iterator_blocks_left(iter))
3372
3373         CALL dbcsr_iterator_next_block(iter, row=iat, column=jat, blk=blk)
3374         IF (.NOT. iat == jat) CYCLE
3375         ikind = particle_set(iat)%atomic_kind%kind_number
3376
3377         DO i = 1, 3
3378            CALL dbcsr_put_block(matrix_soc(i)%matrix, iat, iat, int_soc(ikind)%array(:, :, i))
3379         END DO
3380
3381      END DO !iat
3382      CALL dbcsr_iterator_stop(iter)
3383      DO i = 1, 3
3384         CALL dbcsr_finalize(matrix_soc(i)%matrix)
3385      END DO
3386
3387! Clean-up
3388      DO ikind = 1, nkind
3389         DEALLOCATE (int_soc(ikind)%array)
3390      END DO
3391      DEALLOCATE (int_soc)
3392
3393      CALL timestop(handle)
3394
3395   END SUBROUTINE integrate_soc_atoms
3396
3397END MODULE xas_tdp_atom
3398