1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2020  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief This module deals with all the integrals done on local atomic grids in xas_tdp. This is
8!>        mostly used to compute the xc kernel matrix elements wrt two RI basis elements (centered
9!>        on the same excited atom) <P|fxc(r)|Q>, where the kernel fxc is purely a function of the
10!>        ground state density and r. This is also used to compute the SOC matrix elements in the
11!>        orbital basis
12! **************************************************************************************************
13MODULE xas_tdp_atom
14   USE ai_contraction_sphi,             ONLY: ab_contract
15   USE atom_operators,                  ONLY: calculate_model_potential
16   USE basis_set_types,                 ONLY: get_gto_basis_set,&
17                                              gto_basis_set_p_type,&
18                                              gto_basis_set_type
19   USE cell_types,                      ONLY: cell_type,&
20                                              pbc
21   USE cp_array_utils,                  ONLY: cp_1d_i_p_type,&
22                                              cp_1d_r_p_type,&
23                                              cp_2d_r_p_type,&
24                                              cp_3d_r_p_type
25   USE cp_blacs_env,                    ONLY: cp_blacs_env_type
26   USE cp_control_types,                ONLY: dft_control_type,&
27                                              qs_control_type
28   USE cp_dbcsr_cholesky,               ONLY: cp_dbcsr_cholesky_decompose,&
29                                              cp_dbcsr_cholesky_invert
30   USE cp_dbcsr_operations,             ONLY: cp_dbcsr_dist2d_to_dist,&
31                                              dbcsr_deallocate_matrix_set
32   USE cp_log_handling,                 ONLY: cp_logger_get_default_io_unit
33   USE cp_para_env,                     ONLY: cp_para_env_create
34   USE cp_para_types,                   ONLY: cp_para_env_type
35   USE dbcsr_api,                       ONLY: &
36        dbcsr_complete_redistribute, dbcsr_create, dbcsr_distribution_get, dbcsr_distribution_new, &
37        dbcsr_distribution_release, dbcsr_distribution_type, dbcsr_filter, dbcsr_finalize, &
38        dbcsr_get_block_p, dbcsr_get_stored_coordinates, dbcsr_iterator_blocks_left, &
39        dbcsr_iterator_next_block, dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, &
40        dbcsr_p_type, dbcsr_put_block, dbcsr_release, dbcsr_replicate_all, dbcsr_type
41   USE dbcsr_tensor_api,                ONLY: dbcsr_t_destroy,&
42                                              dbcsr_t_get_block,&
43                                              dbcsr_t_iterator_blocks_left,&
44                                              dbcsr_t_iterator_next_block,&
45                                              dbcsr_t_iterator_start,&
46                                              dbcsr_t_iterator_stop,&
47                                              dbcsr_t_iterator_type,&
48                                              dbcsr_t_type
49   USE distribution_2d_types,           ONLY: distribution_2d_release,&
50                                              distribution_2d_type
51   USE input_constants,                 ONLY: do_potential_id
52   USE input_section_types,             ONLY: section_vals_get_subs_vals,&
53                                              section_vals_type
54   USE kinds,                           ONLY: default_string_length,&
55                                              dp
56   USE lebedev,                         ONLY: deallocate_lebedev_grids,&
57                                              get_number_of_lebedev_grid,&
58                                              init_lebedev_grids,&
59                                              lebedev_grid
60   USE libint_2c_3c,                    ONLY: libint_potential_type
61   USE mathconstants,                   ONLY: dfac,&
62                                              pi
63   USE mathlib,                         ONLY: get_diag
64   USE memory_utilities,                ONLY: reallocate
65   USE message_passing,                 ONLY: mp_comm_split_direct,&
66                                              mp_irecv,&
67                                              mp_isend,&
68                                              mp_sum,&
69                                              mp_sync,&
70                                              mp_waitall
71   USE orbital_pointers,                ONLY: indco,&
72                                              indso,&
73                                              nco,&
74                                              ncoset,&
75                                              nso,&
76                                              nsoset
77   USE orbital_transformation_matrices, ONLY: orbtramat
78   USE particle_methods,                ONLY: get_particle_set
79   USE particle_types,                  ONLY: particle_type
80   USE physcon,                         ONLY: c_light_au
81   USE qs_environment_types,            ONLY: get_qs_env,&
82                                              qs_environment_type
83   USE qs_grid_atom,                    ONLY: allocate_grid_atom,&
84                                              create_grid_atom,&
85                                              grid_atom_type
86   USE qs_harmonics_atom,               ONLY: allocate_harmonics_atom,&
87                                              create_harmonics_atom,&
88                                              get_maxl_CG,&
89                                              get_none0_cg_list,&
90                                              harmonics_atom_type
91   USE qs_integral_utils,               ONLY: basis_set_list_setup
92   USE qs_kind_types,                   ONLY: get_qs_kind,&
93                                              get_qs_kind_set,&
94                                              qs_kind_type
95   USE qs_neighbor_list_types,          ONLY: neighbor_list_set_p_type,&
96                                              release_neighbor_list_sets
97   USE qs_overlap,                      ONLY: build_overlap_matrix_simple
98   USE qs_rho_types,                    ONLY: qs_rho_get,&
99                                              qs_rho_type
100   USE spherical_harmonics,             ONLY: clebsch_gordon,&
101                                              clebsch_gordon_deallocate,&
102                                              clebsch_gordon_init
103   USE util,                            ONLY: get_limit,&
104                                              sort_unique
105   USE xas_tdp_integrals,               ONLY: build_xas_tdp_3c_nl,&
106                                              build_xas_tdp_ovlp_nl,&
107                                              create_pqX_tensor,&
108                                              fill_pqx_tensor,&
109                                              get_opt_3c_dist2d
110   USE xas_tdp_types,                   ONLY: batch_info_type,&
111                                              get_proc_batch_sizes,&
112                                              release_batch_info,&
113                                              xas_atom_env_type,&
114                                              xas_tdp_control_type,&
115                                              xas_tdp_env_type
116   USE xc,                              ONLY: divide_by_norm_drho
117   USE xc_atom,                         ONLY: xc_rho_set_atom_update
118   USE xc_derivative_set_types,         ONLY: xc_derivative_set_type,&
119                                              xc_dset_create,&
120                                              xc_dset_get_derivative,&
121                                              xc_dset_release
122   USE xc_derivative_types,             ONLY: xc_derivative_get,&
123                                              xc_derivative_type
124   USE xc_derivatives,                  ONLY: xc_functionals_eval,&
125                                              xc_functionals_get_needs
126   USE xc_rho_cflags_types,             ONLY: xc_rho_cflags_type
127   USE xc_rho_set_types,                ONLY: xc_rho_set_create,&
128                                              xc_rho_set_release,&
129                                              xc_rho_set_type
130
131!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num
132#include "./base/base_uses.f90"
133
134   IMPLICIT NONE
135
136   PRIVATE
137
138   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'xas_tdp_atom'
139
140   PUBLIC :: init_xas_atom_env, integrate_fxc_atoms, integrate_soc_atoms
141
142CONTAINS
143
144! **************************************************************************************************
145!> \brief Initializes a xas_atom_env type given the qs_enxas_atom_env, qs_envv
146!> \param xas_atom_env the xas_atom_env to initialize
147!> \param xas_tdp_env ...
148!> \param xas_tdp_control ...
149!> \param qs_env ...
150! **************************************************************************************************
151   SUBROUTINE init_xas_atom_env(xas_atom_env, xas_tdp_env, xas_tdp_control, qs_env)
152
153      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
154      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
155      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
156      TYPE(qs_environment_type), POINTER                 :: qs_env
157
158      CHARACTER(len=*), PARAMETER :: routineN = 'init_xas_atom_env', &
159         routineP = moduleN//':'//routineN
160
161      INTEGER                                            :: handle, ikind, natom, nex_atoms, &
162                                                            nex_kinds, nkind, nspins
163      LOGICAL                                            :: do_soc, do_xc
164      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
165      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
166
167      NULLIFY (qs_kind_set, particle_set)
168
169      CALL timeset(routineN, handle)
170
171!  Initializing the type
172      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set, natom=natom, particle_set=particle_set)
173
174      nkind = SIZE(qs_kind_set)
175      nex_kinds = xas_tdp_env%nex_kinds
176      nex_atoms = xas_tdp_env%nex_atoms
177      do_xc = xas_tdp_control%do_xc
178      do_soc = xas_tdp_control%do_soc
179      nspins = 1; IF (xas_tdp_control%do_uks .OR. xas_tdp_control%do_roks) nspins = 2
180
181      xas_atom_env%nspins = nspins
182      xas_atom_env%ri_radius = xas_tdp_control%ri_radius
183      ALLOCATE (xas_atom_env%grid_atom_set(nkind))
184      ALLOCATE (xas_atom_env%harmonics_atom_set(nkind))
185      ALLOCATE (xas_atom_env%ri_sphi_so(nkind))
186      ALLOCATE (xas_atom_env%orb_sphi_so(nkind))
187      IF (do_xc) THEN
188         ALLOCATE (xas_atom_env%gr(nkind))
189         ALLOCATE (xas_atom_env%ga(nkind))
190         ALLOCATE (xas_atom_env%dgr1(nkind))
191         ALLOCATE (xas_atom_env%dgr2(nkind))
192         ALLOCATE (xas_atom_env%dga1(nkind))
193         ALLOCATE (xas_atom_env%dga2(nkind))
194      END IF
195
196      xas_atom_env%excited_atoms => xas_tdp_env%ex_atom_indices
197      xas_atom_env%excited_kinds => xas_tdp_env%ex_kind_indices
198
199!  Allocate and initialize the atomic grids and harmonics
200      CALL init_xas_atom_grid_harmo(xas_atom_env, xas_tdp_control%grid_info, do_xc, qs_env)
201
202!  Compute the contraction coefficients for spherical orbitals
203      DO ikind = 1, nkind
204         NULLIFY (xas_atom_env%orb_sphi_so(ikind)%array, xas_atom_env%ri_sphi_so(ikind)%array)
205         IF (do_soc) THEN
206            CALL compute_sphi_so(ikind, "ORB", xas_atom_env%orb_sphi_so(ikind)%array, qs_env)
207         END IF
208         IF (ANY(xas_atom_env%excited_kinds == ikind) .AND. do_xc) THEN
209            CALL compute_sphi_so(ikind, "RI_XAS", xas_atom_env%ri_sphi_so(ikind)%array, qs_env)
210         END IF
211      END DO !ikind
212
213!  Compute the coefficients to expand the density in the RI_XAS basis, if requested
214      IF (do_xc) CALL calculate_density_coeffs(xas_atom_env, qs_env)
215
216      CALL timestop(handle)
217
218   END SUBROUTINE init_xas_atom_env
219
220! **************************************************************************************************
221!> \brief Initializes the atomic grids and harmonics for the RI atomic calculations
222!> \param xas_atom_env ...
223!> \param grid_info ...
224!> \param do_xc Whether the xc kernel will ne computed on the atomic grids. If not, the harmonics
225!>        are built for the orbital basis for all kinds.
226!> \param qs_env ...
227!> \note Largely inspired by init_rho_atom subroutine
228! **************************************************************************************************
229   SUBROUTINE init_xas_atom_grid_harmo(xas_atom_env, grid_info, do_xc, qs_env)
230
231      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
232      CHARACTER(len=default_string_length), &
233         DIMENSION(:, :), POINTER                        :: grid_info
234      LOGICAL, INTENT(IN)                                :: do_xc
235      TYPE(qs_environment_type), POINTER                 :: qs_env
236
237      CHARACTER(len=*), PARAMETER :: routineN = 'init_xas_atom_grid_harmo', &
238         routineP = moduleN//':'//routineN
239
240      CHARACTER(LEN=default_string_length)               :: kind_name
241      INTEGER :: igrid, ikind, il, iso, iso1, iso2, l1, l1l2, l2, la, lc1, lc2, lcleb, ll, llmax, &
242         lp, m1, m2, max_s_harm, max_s_set, maxl, maxlgto, maxs, mm, mp, na, nr, quadrature, stat
243      REAL(dp)                                           :: kind_radius
244      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: rga
245      REAL(dp), DIMENSION(:, :, :), POINTER              :: my_CG
246      TYPE(dft_control_type), POINTER                    :: dft_control
247      TYPE(grid_atom_type), POINTER                      :: grid_atom
248      TYPE(gto_basis_set_type), POINTER                  :: tmp_basis
249      TYPE(harmonics_atom_type), POINTER                 :: harmonics
250      TYPE(qs_control_type), POINTER                     :: qs_control
251      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
252
253      NULLIFY (my_CG, qs_kind_set, tmp_basis, grid_atom, harmonics, qs_control, dft_control)
254
255!  Initialization of some integer for the CG coeff generation
256      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set, dft_control=dft_control)
257      IF (do_xc) THEN
258         CALL get_qs_kind_set(qs_kind_set, maxlgto=maxlgto, basis_type="RI_XAS")
259      ELSE
260         CALL get_qs_kind_set(qs_kind_set, maxlgto=maxlgto, basis_type="ORB")
261      END IF
262      qs_control => dft_control%qs_control
263
264      !maximum expansion
265      llmax = 2*maxlgto
266      max_s_harm = nsoset(llmax)
267      max_s_set = nsoset(maxlgto)
268      lcleb = llmax
269
270!  Allocate and compute the CG coeffs (copied from init_rho_atom)
271      CALL clebsch_gordon_init(lcleb)
272      CALL reallocate(my_CG, 1, max_s_set, 1, max_s_set, 1, max_s_harm)
273
274      ALLOCATE (rga(lcleb, 2))
275      DO lc1 = 0, maxlgto
276         DO iso1 = nsoset(lc1 - 1) + 1, nsoset(lc1)
277            l1 = indso(1, iso1)
278            m1 = indso(2, iso1)
279            DO lc2 = 0, maxlgto
280               DO iso2 = nsoset(lc2 - 1) + 1, nsoset(lc2)
281                  l2 = indso(1, iso2)
282                  m2 = indso(2, iso2)
283                  CALL clebsch_gordon(l1, m1, l2, m2, rga)
284                  IF (l1 + l2 > llmax) THEN
285                     l1l2 = llmax
286                  ELSE
287                     l1l2 = l1 + l2
288                  END IF
289                  mp = m1 + m2
290                  mm = m1 - m2
291                  IF (m1*m2 < 0 .OR. (m1*m2 == 0 .AND. (m1 < 0 .OR. m2 < 0))) THEN
292                     mp = -ABS(mp)
293                     mm = -ABS(mm)
294                  ELSE
295                     mp = ABS(mp)
296                     mm = ABS(mm)
297                  END IF
298                  DO lp = MOD(l1 + l2, 2), l1l2, 2
299                     il = lp/2 + 1
300                     IF (ABS(mp) <= lp) THEN
301                     IF (mp >= 0) THEN
302                        iso = nsoset(lp - 1) + lp + 1 + mp
303                     ELSE
304                        iso = nsoset(lp - 1) + lp + 1 - ABS(mp)
305                     END IF
306                     my_CG(iso1, iso2, iso) = rga(il, 1)
307                     ENDIF
308                     IF (mp /= mm .AND. ABS(mm) <= lp) THEN
309                     IF (mm >= 0) THEN
310                        iso = nsoset(lp - 1) + lp + 1 + mm
311                     ELSE
312                        iso = nsoset(lp - 1) + lp + 1 - ABS(mm)
313                     END IF
314                     my_CG(iso1, iso2, iso) = rga(il, 2)
315                     ENDIF
316                  END DO
317               ENDDO ! iso2
318            ENDDO ! lc2
319         ENDDO ! iso1
320      ENDDO ! lc1
321      DEALLOCATE (rga)
322      CALL clebsch_gordon_deallocate()
323
324!  Create the Lebedev grids and compute the spherical harmonics
325      CALL init_lebedev_grids()
326      quadrature = qs_control%gapw_control%quadrature
327
328      DO ikind = 1, SIZE(xas_atom_env%grid_atom_set)
329
330!        Allocate the grid and the harmonics for this kind
331         NULLIFY (xas_atom_env%grid_atom_set(ikind)%grid_atom)
332         NULLIFY (xas_atom_env%harmonics_atom_set(ikind)%harmonics_atom)
333         CALL allocate_grid_atom(xas_atom_env%grid_atom_set(ikind)%grid_atom)
334         CALL allocate_harmonics_atom(xas_atom_env%harmonics_atom_set(ikind)%harmonics_atom)
335
336         NULLIFY (grid_atom, harmonics)
337         grid_atom => xas_atom_env%grid_atom_set(ikind)%grid_atom
338         harmonics => xas_atom_env%harmonics_atom_set(ikind)%harmonics_atom
339
340!        Initialize some integers
341         CALL get_qs_kind(qs_kind_set(ikind), ngrid_rad=nr, ngrid_ang=na, name=kind_name)
342
343         !take the grid dimension given as input, if none, take the GAPW ones above
344         IF (ANY(grid_info == kind_name)) THEN
345            DO igrid = 1, SIZE(grid_info, 1)
346               IF (grid_info(igrid, 1) == kind_name) THEN
347
348                  !hack to convert string into integer
349                  READ (grid_info(igrid, 2), *, iostat=stat) na
350                  IF (stat .NE. 0) CPABORT("The 'na' value for the GRID keyword must be an integer")
351                  READ (grid_info(igrid, 3), *, iostat=stat) nr
352                  IF (stat .NE. 0) CPABORT("The 'nr' value for the GRID keyword must be an integer")
353                  EXIT
354               END IF
355            END DO
356         ELSE IF (do_xc .AND. ANY(xas_atom_env%excited_kinds == ikind)) THEN
357            !need good integration grids for the xc kernel, but taking the default GAPW grid
358            CPWARN("The default (and possibly small) GAPW grid is being used for one excited KIND")
359         END IF
360
361         ll = get_number_of_lebedev_grid(n=na)
362         na = lebedev_grid(ll)%n
363         la = lebedev_grid(ll)%l
364         grid_atom%ng_sphere = na
365         grid_atom%nr = nr
366
367!        If this is an excited kind, create the harmonics with the RI_XAS basis, otherwise the ORB
368         IF (ANY(xas_atom_env%excited_kinds == ikind) .AND. do_xc) THEN
369            CALL get_qs_kind(qs_kind_set(ikind), basis_set=tmp_basis, basis_type="RI_XAS")
370         ELSE
371            CALL get_qs_kind(qs_kind_set(ikind), basis_set=tmp_basis, basis_type="ORB")
372         END IF
373         CALL get_gto_basis_set(gto_basis_set=tmp_basis, maxl=maxl, kind_radius=kind_radius)
374
375         CALL create_grid_atom(grid_atom, nr, na, llmax, ll, quadrature)
376         CALL truncate_radial_grid(grid_atom, kind_radius)
377
378         maxs = nsoset(maxl)
379         CALL create_harmonics_atom(harmonics, &
380                                    my_CG, na, llmax, maxs, max_s_harm, ll, grid_atom%wa, &
381                                    grid_atom%azi, grid_atom%pol)
382         CALL get_maxl_CG(harmonics, tmp_basis, llmax, max_s_harm)
383      END DO
384
385      CALL deallocate_lebedev_grids()
386      DEALLOCATE (my_CG)
387
388   END SUBROUTINE init_xas_atom_grid_harmo
389
390! **************************************************************************************************
391!> \brief Reduces the radial extension of an atomic grid such that it only covers a given radius
392!> \param grid_atom the atomic grid from which we truncate the radial part
393!> \param max_radius the maximal radial extension of the resulting grid
394!> \note Since the RI density used for <P|fxc|Q> is only of quality close to the atom, and the
395!>       integrand only non-zero within the radius of the gaussian P,Q. One can reduce the grid
396!>       extansion to the largest radius of the RI basis set
397! **************************************************************************************************
398   SUBROUTINE truncate_radial_grid(grid_atom, max_radius)
399
400      TYPE(grid_atom_type), POINTER                      :: grid_atom
401      REAL(dp), INTENT(IN)                               :: max_radius
402
403      CHARACTER(len=*), PARAMETER :: routineN = 'truncate_radial_grid', &
404         routineP = moduleN//':'//routineN
405
406      INTEGER                                            :: first_ir, ir, llmax_p1, na, new_nr, nr
407
408      nr = grid_atom%nr
409      na = grid_atom%ng_sphere
410      llmax_p1 = SIZE(grid_atom%rad2l, 2) - 1
411
412!     Find the index corresponding to the limiting radius (small ir => large radius)
413      DO ir = 1, nr
414         IF (grid_atom%rad(ir) < max_radius) THEN
415            first_ir = ir
416            EXIT
417         END IF
418      END DO
419      new_nr = nr - first_ir + 1
420
421!     Reallcoate everything that depends on r
422      grid_atom%nr = new_nr
423
424      grid_atom%rad(1:new_nr) = grid_atom%rad(first_ir:nr)
425      grid_atom%rad2(1:new_nr) = grid_atom%rad2(first_ir:nr)
426      grid_atom%wr(1:new_nr) = grid_atom%wr(first_ir:nr)
427      grid_atom%weight(:, 1:new_nr) = grid_atom%weight(:, first_ir:nr)
428      grid_atom%rad2l(1:new_nr, :) = grid_atom%rad2l(first_ir:nr, :)
429      grid_atom%oorad2l(1:new_nr, :) = grid_atom%oorad2l(first_ir:nr, :)
430
431      CALL reallocate(grid_atom%rad, 1, new_nr)
432      CALL reallocate(grid_atom%rad2, 1, new_nr)
433      CALL reallocate(grid_atom%wr, 1, new_nr)
434      CALL reallocate(grid_atom%weight, 1, na, 1, new_nr)
435      CALL reallocate(grid_atom%rad2l, 1, new_nr, 0, llmax_p1)
436      CALL reallocate(grid_atom%oorad2l, 1, new_nr, 0, llmax_p1)
437
438   END SUBROUTINE truncate_radial_grid
439
440! **************************************************************************************************
441!> \brief Computes the contraction coefficients to go from spherical orbitals to sgf for a given
442!>        atomic kind and a given basis type.
443!> \param ikind the kind for which we compute the coefficients
444!> \param basis_type the type of basis for which we compute
445!> \param sphi_so where the new contraction coefficients are stored (not yet allocated)
446!> \param qs_env ...
447! **************************************************************************************************
448   SUBROUTINE compute_sphi_so(ikind, basis_type, sphi_so, qs_env)
449
450      INTEGER, INTENT(IN)                                :: ikind
451      CHARACTER(len=*), INTENT(IN)                       :: basis_type
452      REAL(dp), DIMENSION(:, :), POINTER                 :: sphi_so
453      TYPE(qs_environment_type), POINTER                 :: qs_env
454
455      CHARACTER(len=*), PARAMETER :: routineN = 'compute_sphi_so', &
456         routineP = moduleN//':'//routineN
457
458      INTEGER                                            :: ico, ipgf, iset, iso, l, lx, ly, lz, &
459                                                            maxso, nset, sgfi, start_c, start_s
460      INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf, nsgf_set
461      INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf
462      REAL(dp)                                           :: factor
463      REAL(dp), DIMENSION(:, :), POINTER                 :: sphi
464      TYPE(gto_basis_set_type), POINTER                  :: basis
465      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
466
467      NULLIFY (basis, lmax, lmin, npgf, nsgf_set, qs_kind_set, first_sgf, sphi)
468
469      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set)
470      CALL get_qs_kind(qs_kind_set(ikind), basis_set=basis, basis_type=basis_type)
471      CALL get_gto_basis_set(basis, lmax=lmax, nset=nset, npgf=npgf, maxso=maxso, lmin=lmin, &
472                             nsgf_set=nsgf_set, sphi=sphi, first_sgf=first_sgf)
473
474      ALLOCATE (sphi_so(maxso, SUM(nsgf_set)))
475      sphi_so = 0.0_dp
476
477      DO iset = 1, nset
478         sgfi = first_sgf(1, iset)
479         DO ipgf = 1, npgf(iset)
480            start_s = (ipgf - 1)*nsoset(lmax(iset))
481            start_c = (ipgf - 1)*ncoset(lmax(iset))
482
483            DO l = lmin(iset), lmax(iset)
484               DO iso = 1, nso(l)
485                  DO ico = 1, nco(l)
486
487                     lx = indco(1, ico + ncoset(l - 1))
488                     ly = indco(2, ico + ncoset(l - 1))
489                     lz = indco(3, ico + ncoset(l - 1))
490
491                     factor = orbtramat(l)%s2c(iso, ico) &
492                              *SQRT(4.0_dp*pi/dfac(2*l + 1)*dfac(2*lx - 1)*dfac(2*ly - 1)*dfac(2*lz - 1))
493
494                     CALL daxpy(nsgf_set(iset), factor, &
495                                sphi(start_c + ncoset(l - 1) + ico, sgfi:sgfi + nsgf_set(iset) - 1), 1, &
496                                sphi_so(start_s + nsoset(l - 1) + iso, sgfi:sgfi + nsgf_set(iset) - 1), 1)
497
498                  END DO !ico
499               END DO !iso
500            END DO !l
501         END DO !ipgf
502      END DO !iset
503
504   END SUBROUTINE compute_sphi_so
505
506! **************************************************************************************************
507!> \brief Find the neighbors of a given set of atoms based on the non-zero blocks of a provided
508!>        overlap matrix. Optionally returns an array containing the indices of all involved atoms
509!>        (the given subset plus all their neighbors, without repetition) AND/OR an array of arrays
510!>        providing the indices of the neighbors of each input atom.
511!> \param base_atoms the set of atoms for which we search neighbors
512!> \param mat_s the overlap matrix used to find neighbors
513!> \param radius the cutoff radius after which atoms are not considered neighbors
514!> \param qs_env ...
515!> \param all_neighbors the array uniquely contatining all indices of all atoms involved
516!> \param neighbor_set array of arrays containing the neighbors of all given atoms
517! **************************************************************************************************
518   SUBROUTINE find_neighbors(base_atoms, mat_s, radius, qs_env, all_neighbors, neighbor_set)
519
520      INTEGER, DIMENSION(:), INTENT(INOUT)               :: base_atoms
521      TYPE(dbcsr_type), INTENT(IN)                       :: mat_s
522      REAL(dp)                                           :: radius
523      TYPE(qs_environment_type), POINTER                 :: qs_env
524      INTEGER, DIMENSION(:), OPTIONAL, POINTER           :: all_neighbors
525      TYPE(cp_1d_i_p_type), DIMENSION(:), OPTIONAL, &
526         POINTER                                         :: neighbor_set
527
528      CHARACTER(len=*), PARAMETER :: routineN = 'find_neighbors', routineP = moduleN//':'//routineN
529
530      INTEGER                                            :: blk, i, iat, ibase, iblk, jblk, mepos, &
531                                                            natom, nb, nbase
532      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: blk_to_base, inb, who_is_there
533      INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: n_neighbors
534      LOGICAL, ALLOCATABLE, DIMENSION(:)                 :: is_base_atom
535      REAL(dp)                                           :: dist2, rad2, ri(3), rij(3), rj(3)
536      TYPE(cell_type), POINTER                           :: cell
537      TYPE(cp_1d_i_p_type), DIMENSION(:), POINTER        :: my_neighbor_set
538      TYPE(cp_para_env_type), POINTER                    :: para_env
539      TYPE(dbcsr_iterator_type)                          :: iter
540      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
541
542      NULLIFY (particle_set, para_env, my_neighbor_set, cell)
543
544      ! Initialization
545      CALL get_qs_env(qs_env, para_env=para_env, natom=natom, particle_set=particle_set, cell=cell)
546      mepos = para_env%mepos
547      nbase = SIZE(base_atoms)
548      !work with the neighbor_set structure, see at the end if we keep it
549      ALLOCATE (my_neighbor_set(nbase))
550      rad2 = radius**2
551
552      ALLOCATE (blk_to_base(natom), is_base_atom(natom))
553      blk_to_base = 0; is_base_atom = .FALSE.
554      DO ibase = 1, nbase
555         blk_to_base(base_atoms(ibase)) = ibase
556         is_base_atom(base_atoms(ibase)) = .TRUE.
557      END DO
558
559      ! First loop over S => count the number of neighbors
560      ALLOCATE (n_neighbors(nbase, 0:para_env%num_pe - 1))
561      n_neighbors = 0
562
563      CALL dbcsr_iterator_start(iter, mat_s)
564      DO WHILE (dbcsr_iterator_blocks_left(iter))
565
566         CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
567
568         !avoid self-neighbors
569         IF (iblk == jblk) CYCLE
570
571         !test distance
572         ri = pbc(particle_set(iblk)%r, cell)
573         rj = pbc(particle_set(jblk)%r, cell)
574         rij = pbc(ri, rj, cell)
575         dist2 = SUM(rij**2)
576         IF (dist2 > rad2) CYCLE
577
578         IF (is_base_atom(iblk)) THEN
579            ibase = blk_to_base(iblk)
580            n_neighbors(ibase, mepos) = n_neighbors(ibase, mepos) + 1
581         END IF
582         IF (is_base_atom(jblk)) THEN
583            ibase = blk_to_base(jblk)
584            n_neighbors(ibase, mepos) = n_neighbors(ibase, mepos) + 1
585         END IF
586
587      END DO !iter
588      CALL dbcsr_iterator_stop(iter)
589      CALL mp_sum(n_neighbors, para_env%group)
590
591      ! Allocate the neighbor_set arrays at the correct length
592      DO ibase = 1, nbase
593         ALLOCATE (my_neighbor_set(ibase)%array(SUM(n_neighbors(ibase, :))))
594         my_neighbor_set(ibase)%array = 0
595      END DO
596
597      ! Loop a second time over S, this time fill the neighbors details
598      CALL dbcsr_iterator_start(iter, mat_s)
599      ALLOCATE (inb(nbase))
600      inb = 1
601      DO WHILE (dbcsr_iterator_blocks_left(iter))
602
603         CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
604         IF (iblk == jblk) CYCLE
605
606         !test distance
607         ri = pbc(particle_set(iblk)%r, cell)
608         rj = pbc(particle_set(jblk)%r, cell)
609         rij = pbc(ri, rj, cell)
610         dist2 = SUM(rij**2)
611         IF (dist2 > rad2) CYCLE
612
613         IF (is_base_atom(iblk)) THEN
614            ibase = blk_to_base(iblk)
615            my_neighbor_set(ibase)%array(SUM(n_neighbors(ibase, 0:mepos - 1)) + inb(ibase)) = jblk
616            inb(ibase) = inb(ibase) + 1
617         END IF
618         IF (is_base_atom(jblk)) THEN
619            ibase = blk_to_base(jblk)
620            my_neighbor_set(ibase)%array(SUM(n_neighbors(ibase, 0:mepos - 1)) + inb(ibase)) = iblk
621            inb(ibase) = inb(ibase) + 1
622         END IF
623
624      END DO !iter
625      CALL dbcsr_iterator_stop(iter)
626
627      ! Make sure the info is shared among the procs
628      DO ibase = 1, nbase
629         CALL mp_sum(my_neighbor_set(ibase)%array, para_env%group)
630      END DO
631
632      ! Gather all indices if asked for it
633      IF (PRESENT(all_neighbors)) THEN
634         ALLOCATE (who_is_there(natom))
635         who_is_there = 0
636
637         DO ibase = 1, nbase
638            who_is_there(base_atoms(ibase)) = 1
639            DO nb = 1, SIZE(my_neighbor_set(ibase)%array)
640               who_is_there(my_neighbor_set(ibase)%array(nb)) = 1
641            END DO
642         END DO
643
644         ALLOCATE (all_neighbors(natom))
645         i = 0
646         DO iat = 1, natom
647            IF (who_is_there(iat) == 1) THEN
648               i = i + 1
649               all_neighbors(i) = iat
650            END IF
651         END DO
652         CALL reallocate(all_neighbors, 1, i)
653      END IF
654
655      ! If not asked for the neighbor set, deallocate it
656      IF (PRESENT(neighbor_set)) THEN
657         neighbor_set => my_neighbor_set
658      ELSE
659         DO ibase = 1, nbase
660            DEALLOCATE (my_neighbor_set(ibase)%array)
661         END DO
662         DEALLOCATE (my_neighbor_set)
663      END IF
664
665   END SUBROUTINE find_neighbors
666
667! **************************************************************************************************
668!> \brief Returns the RI inverse overlap for a subset of the RI_XAS matrix spaning a given
669!>        excited atom and its neighbors.
670!> \param ri_sinv the inverse overlap as a dbcsr matrix
671!> \param whole_s the whole RI overlap matrix
672!> \param neighbors the indeces of the excited atom and their neighbors
673!> \param idx_to_nb array telling where any atom can be found in neighbors (if there at all)
674!> \param basis_set_ri the RI basis set list for all kinds
675!> \param qs_env ...
676!> \note It is assumed that the neighbors are sorted, the output matrix is assumed to be small and
677!>       is replicated on all processors
678! **************************************************************************************************
679   SUBROUTINE get_exat_ri_sinv(ri_sinv, whole_s, neighbors, idx_to_nb, basis_set_ri, qs_env)
680
681      TYPE(dbcsr_type)                                   :: ri_sinv, whole_s
682      INTEGER, DIMENSION(:), INTENT(IN)                  :: neighbors, idx_to_nb
683      TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_ri
684      TYPE(qs_environment_type), POINTER                 :: qs_env
685
686      CHARACTER(len=*), PARAMETER :: routineN = 'get_exat_ri_sinv', &
687         routineP = moduleN//':'//routineN
688
689      INTEGER                                            :: blk, dest, group, handle, iat, ikind, &
690                                                            inb, ir, is, jat, jnb, natom, nnb, &
691                                                            npcols, nprows, source, tag
692      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: recv_req, send_req
693      INTEGER, DIMENSION(:), POINTER                     :: col_dist, nsgf, row_dist
694      INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
695      LOGICAL                                            :: found_risinv, found_whole
696      LOGICAL, ALLOCATABLE, DIMENSION(:)                 :: is_neighbor
697      REAL(dp)                                           :: ri(3), rij(3), rj(3)
698      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: radius
699      REAL(dp), DIMENSION(:, :), POINTER                 :: block_risinv, block_whole
700      TYPE(cell_type), POINTER                           :: cell
701      TYPE(cp_2d_r_p_type), ALLOCATABLE, DIMENSION(:)    :: recv_buff, send_buff
702      TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
703      TYPE(cp_para_env_type), POINTER                    :: para_env
704      TYPE(dbcsr_distribution_type)                      :: sinv_dist
705      TYPE(dbcsr_distribution_type), POINTER             :: dbcsr_dist
706      TYPE(dbcsr_iterator_type)                          :: iter
707      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
708
709      NULLIFY (pgrid, dbcsr_dist, row_dist, col_dist, nsgf, particle_set, block_whole, block_risinv)
710      NULLIFY (cell, para_env, blacs_env)
711
712      CALL timeset(routineN, handle)
713
714      CALL get_qs_env(qs_env, dbcsr_dist=dbcsr_dist, particle_set=particle_set, natom=natom, &
715                      para_env=para_env, blacs_env=blacs_env, cell=cell)
716      nnb = SIZE(neighbors)
717      ALLOCATE (nsgf(nnb), is_neighbor(natom), radius(nnb))
718      is_neighbor = .FALSE.
719      DO inb = 1, nnb
720         iat = neighbors(inb)
721         ikind = particle_set(iat)%atomic_kind%kind_number
722         CALL get_gto_basis_set(basis_set_ri(ikind)%gto_basis_set, nsgf=nsgf(inb), kind_radius=radius(inb))
723         is_neighbor(iat) = .TRUE.
724      END DO
725
726      !Create the ri_sinv matrix based on some arbitrary dbcsr_dist
727      CALL dbcsr_distribution_get(dbcsr_dist, group=group, pgrid=pgrid, nprows=nprows, npcols=npcols)
728
729      ALLOCATE (row_dist(nnb), col_dist(nnb))
730      DO inb = 1, nnb
731         row_dist(inb) = MODULO(nprows - inb, nprows)
732         col_dist(inb) = MODULO(npcols - inb, npcols)
733      END DO
734
735      CALL dbcsr_distribution_new(sinv_dist, group=group, pgrid=pgrid, row_dist=row_dist, &
736                                  col_dist=col_dist)
737
738      CALL dbcsr_create(matrix=ri_sinv, name="RI_SINV", matrix_type="S", dist=sinv_dist, &
739                        row_blk_size=nsgf, col_blk_size=nsgf)
740      !reserving the blocks in the correct pattern
741      DO inb = 1, nnb
742         ri = pbc(particle_set(neighbors(inb))%r, cell)
743         DO jnb = inb, nnb
744
745            !do the atom overlap ?
746            rj = pbc(particle_set(neighbors(jnb))%r, cell)
747            rij = pbc(ri, rj, cell)
748            IF (SUM(rij**2) > (radius(inb) + radius(jnb))**2) CYCLE
749
750            CALL dbcsr_get_stored_coordinates(ri_sinv, inb, jnb, blk)
751            IF (para_env%mepos == blk) THEN
752               ALLOCATE (block_risinv(nsgf(inb), nsgf(jnb)))
753               block_risinv = 0.0_dp
754               CALL dbcsr_put_block(ri_sinv, inb, jnb, block_risinv)
755               DEALLOCATE (block_risinv)
756            END IF
757         END DO
758      END DO
759      CALL dbcsr_finalize(ri_sinv)
760
761      CALL dbcsr_distribution_release(sinv_dist)
762      DEALLOCATE (row_dist, col_dist)
763
764      !prepare the send and recv buffers we will need for change of dist between the two matrices
765      !worst case scenario: all neighbors are on same procs => need to send nnb**2 messages
766      ALLOCATE (send_buff(nnb**2), recv_buff(nnb**2))
767      ALLOCATE (send_req(nnb**2), recv_req(nnb**2))
768      is = 0; ir = 0
769
770      !Loop over the whole RI overlap matrix and pick the blocks we need
771      CALL dbcsr_iterator_start(iter, whole_s)
772      DO WHILE (dbcsr_iterator_blocks_left(iter))
773
774         CALL dbcsr_iterator_next_block(iter, row=iat, column=jat, blk=blk)
775         CALL dbcsr_get_block_p(whole_s, iat, jat, block_whole, found_whole)
776
777         !only interested in neighbors
778         IF (.NOT. found_whole) CYCLE
779         IF (.NOT. is_neighbor(iat)) CYCLE
780         IF (.NOT. is_neighbor(jat)) CYCLE
781
782         inb = idx_to_nb(iat)
783         jnb = idx_to_nb(jat)
784
785         !If blocks are on the same proc for both matrices, simply copy
786         CALL dbcsr_get_block_p(ri_sinv, inb, jnb, block_risinv, found_risinv)
787         IF (found_risinv) THEN
788            CALL dcopy(nsgf(inb)*nsgf(jnb), block_whole, 1, block_risinv, 1)
789         ELSE
790
791            !send the block with unique tag to the proc where inb,jnb is in ri_sinv
792            CALL dbcsr_get_stored_coordinates(ri_sinv, inb, jnb, dest)
793            is = is + 1
794            send_buff(is)%array => block_whole
795            tag = natom*iat + jat
796            CALL mp_isend(msgin=send_buff(is)%array, dest=dest, comm=group, request=send_req(is), tag=tag)
797
798         END IF
799
800      END DO !dbcsr iter
801      CALL dbcsr_iterator_stop(iter)
802
803      !Loop over ri_sinv and receive all those blocks
804      CALL dbcsr_iterator_start(iter, ri_sinv)
805      DO WHILE (dbcsr_iterator_blocks_left(iter))
806
807         CALL dbcsr_iterator_next_block(iter, row=inb, column=jnb, blk=blk)
808         CALL dbcsr_get_block_p(ri_sinv, inb, jnb, block_risinv, found_risinv)
809
810         IF (.NOT. found_risinv) CYCLE
811
812         iat = neighbors(inb)
813         jat = neighbors(jnb)
814
815         !If blocks are on the same proc on both matrices do nothing
816         CALL dbcsr_get_stored_coordinates(whole_s, iat, jat, source)
817         IF (para_env%mepos == source) CYCLE
818
819         tag = natom*iat + jat
820         ir = ir + 1
821         recv_buff(ir)%array => block_risinv
822         CALL mp_irecv(msgout=recv_buff(ir)%array, source=source, request=recv_req(ir), &
823                       tag=tag, comm=group)
824
825      END DO
826      CALL dbcsr_iterator_stop(iter)
827
828      !make sure that all comm is over before proceeding
829      CALL mp_waitall(send_req(1:is))
830      CALL mp_waitall(recv_req(1:ir))
831
832      !Invert
833      CALL cp_dbcsr_cholesky_decompose(ri_sinv, para_env=para_env, blacs_env=blacs_env)
834      CALL cp_dbcsr_cholesky_invert(ri_sinv, para_env=para_env, blacs_env=blacs_env, upper_to_full=.TRUE.)
835      CALL dbcsr_filter(ri_sinv, 1.E-10_dp) !make sure ri_sinv is sparse coming out of fm routines
836      CALL dbcsr_replicate_all(ri_sinv)
837
838      !clean-up
839      DEALLOCATE (nsgf)
840
841      CALL timestop(handle)
842
843   END SUBROUTINE get_exat_ri_sinv
844
845! **************************************************************************************************
846!> \brief Compute the coefficients to project the density on a partial RI_XAS basis
847!> \param xas_atom_env ...
848!> \param qs_env ...
849!> \note The density is n = sum_ab P_ab*phi_a*phi_b, the RI basis covers the products of orbital sgfs
850!>       => n = sum_ab sum_cd P_ab (phi_a phi_b xi_c) S_cd^-1 xi_d
851!>            = sum_d coeff_d xi_d , where xi are the RI basis func.
852!>       In this case, with the partial RI projection, the RI basis is restricted to an excited atom
853!>       and its neighbors at a time. Leads to smaller overlap matrix to invert and less 3-center
854!>       overlap to compute. The procedure is repeated for each excited atom
855!>       This is a test implementation for tensors
856! **************************************************************************************************
857   SUBROUTINE calculate_density_coeffs(xas_atom_env, qs_env)
858
859      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
860      TYPE(qs_environment_type), POINTER                 :: qs_env
861
862      CHARACTER(len=*), PARAMETER :: routineN = 'calculate_density_coeffs', &
863         routineP = moduleN//':'//routineN
864
865      INTEGER                                            :: blk, exat, handle, i, iat, iatom, iex, &
866                                                            inb, ind(3), ispin, jatom, jnb, katom, &
867                                                            natom, nex, nkind, nnb, nspins, &
868                                                            output_unit, ri_at
869      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: blk_size_orb, blk_size_ri, idx_to_nb, &
870                                                            neighbors
871      INTEGER, DIMENSION(:), POINTER                     :: all_ri_atoms
872      LOGICAL                                            :: pmat_found, pmat_foundt, sinv_found, &
873                                                            sinv_foundt, tensor_found, unique
874      REAL(dp)                                           :: factor, prefac
875      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: work2
876      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: work1
877      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: t_block
878      REAL(dp), DIMENSION(:, :), POINTER                 :: pmat_block, pmat_blockt, sinv_block, &
879                                                            sinv_blockt
880      TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
881      TYPE(cp_para_env_type), POINTER                    :: para_env
882      TYPE(dbcsr_distribution_type)                      :: opt_dbcsr_dist
883      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: overlap, rho_ao, rho_redist
884      TYPE(dbcsr_t_iterator_type)                        :: iter
885      TYPE(dbcsr_t_type)                                 :: pqX
886      TYPE(dbcsr_type)                                   :: ri_sinv
887      TYPE(dbcsr_type), POINTER                          :: ri_mats
888      TYPE(distribution_2d_type), POINTER                :: opt_dist2d
889      TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_orb, basis_set_ri
890      TYPE(libint_potential_type)                        :: pot
891      TYPE(neighbor_list_set_p_type), DIMENSION(:), &
892         POINTER                                         :: ab_list, ac_list, sab_ri
893      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
894      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
895      TYPE(qs_rho_type), POINTER                         :: rho
896
897      NULLIFY (qs_kind_set, basis_set_ri, basis_set_orb, ac_list, rho, rho_ao, sab_ri, ri_mats)
898      NULLIFY (particle_set, para_env, all_ri_atoms, overlap, pmat_blockt)
899      NULLIFY (blacs_env, pmat_block, ab_list, opt_dist2d, rho_redist, sinv_block, sinv_blockt)
900
901      !Idea: We don't do a full RI here as it would be too expensive in many ways (inversion of a
902      !      large matrix, many 3-center overlaps, density coefficients for all atoms, etc...)
903      !      Instead, we go excited atom by excited atom and only do a RI expansion on its basis
904      !      and that of its closest neighbors (defined through RI_RADIUS), such that we only have
905      !      very small matrices to invert and only a few (abc) overlp integrals with c on the
906      !      excited atom its neighbors. This is physically sound since we only need the density
907      !      well defined on the excited atom grid as we do (aI|P)*(P|Q)^-1*(Q|fxc|R)*(R|S)^-1*(S|Jb)
908
909      CALL timeset(routineN, handle)
910
911      CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, rho=rho, &
912                      natom=natom, particle_set=particle_set, &
913                      para_env=para_env, blacs_env=blacs_env)
914      nspins = xas_atom_env%nspins
915      nex = SIZE(xas_atom_env%excited_atoms)
916      CALL qs_rho_get(rho, rho_ao=rho_ao)
917
918!  Create the needed neighbor list and basis set lists.
919      ALLOCATE (basis_set_ri(nkind))
920      ALLOCATE (basis_set_orb(nkind))
921      CALL basis_set_list_setup(basis_set_ri, "RI_XAS", qs_kind_set)
922      CALL basis_set_list_setup(basis_set_orb, "ORB", qs_kind_set)
923
924!  Compute the RI overlap matrix on the whole system
925      CALL build_xas_tdp_ovlp_nl(sab_ri, basis_set_ri, basis_set_ri, qs_env)
926      CALL build_overlap_matrix_simple(qs_env%ks_env, overlap, basis_set_ri, basis_set_ri, sab_ri)
927      ri_mats => overlap(1)%matrix
928      CALL release_neighbor_list_sets(sab_ri)
929
930!  Get the neighbors of the excited atoms (= all the atoms where density coeffs are needed)
931      CALL find_neighbors(xas_atom_env%excited_atoms, ri_mats, xas_atom_env%ri_radius, &
932                          qs_env, all_neighbors=all_ri_atoms, neighbor_set=xas_atom_env%exat_neighbors)
933
934      !keep in mind that double occupation is included in rho_ao in case of closed-shell
935      factor = 0.5_dp; IF (nspins == 2) factor = 1.0_dp
936
937!  Allocate space for the projected density coefficients. On all ri_atoms.
938!  Note: the sub-region where we project the density changes from excited atom to excited atom
939!        => need different sets of RI coeffs
940      ALLOCATE (blk_size_ri(natom))
941      CALL get_particle_set(particle_set, qs_kind_set, nsgf=blk_size_ri, basis=basis_set_ri)
942      ALLOCATE (xas_atom_env%ri_dcoeff(natom, nspins, nex))
943      DO iex = 1, nex
944         DO ispin = 1, nspins
945            DO iat = 1, natom
946               NULLIFY (xas_atom_env%ri_dcoeff(iat, ispin, iex)%array)
947               IF ((.NOT. ANY(xas_atom_env%exat_neighbors(iex)%array == iat)) &
948                   .AND. (.NOT. xas_atom_env%excited_atoms(iex) == iat)) CYCLE
949               ALLOCATE (xas_atom_env%ri_dcoeff(iat, ispin, iex)%array(blk_size_ri(iat)))
950               xas_atom_env%ri_dcoeff(iat, ispin, iex)%array = 0.0_dp
951            END DO
952         END DO
953      END DO
954
955      output_unit = cp_logger_get_default_io_unit()
956      IF (output_unit > 0) THEN
957         WRITE (output_unit, FMT="(/,T7,A,/,T7,A)") &
958            "Excited atom, natoms in RI_REGION:", &
959            "---------------------------------"
960      END IF
961
962      !We go atom by atom, first compute the optimal distribution for the integrals, then
963      !the integrals themselves that we put into a tensor, then the contraction with the
964      !density
965
966      ALLOCATE (blk_size_orb(natom))
967      CALL get_particle_set(particle_set, qs_kind_set, nsgf=blk_size_orb, basis=basis_set_orb)
968
969      DO iex = 1, nex
970
971         !get neighbors of current atom
972         exat = xas_atom_env%excited_atoms(iex)
973         nnb = 1 + SIZE(xas_atom_env%exat_neighbors(iex)%array)
974         ALLOCATE (neighbors(nnb))
975         neighbors(1) = exat
976         neighbors(2:nnb) = xas_atom_env%exat_neighbors(iex)%array(:)
977         CALL sort_unique(neighbors, unique)
978
979         !link the atoms to their position in neighbors
980         ALLOCATE (idx_to_nb(natom))
981         idx_to_nb = 0
982         DO inb = 1, nnb
983            idx_to_nb(neighbors(inb)) = inb
984         END DO
985
986         IF (output_unit > 0) THEN
987            WRITE (output_unit, FMT="(T7,I12,I21)") &
988               exat, nnb
989         END IF
990
991         !Get the optimal distribution_2d for the overlap integrals (abc), centers c on the current
992         !excited atom and its neighbors defined by RI_RADIUS
993         CALL build_xas_tdp_ovlp_nl(ab_list, basis_set_orb, basis_set_orb, qs_env)
994         CALL build_xas_tdp_3c_nl(ac_list, basis_set_orb, basis_set_ri, do_potential_id, &
995                                  qs_env, excited_atoms=neighbors)
996         CALL get_opt_3c_dist2d(opt_dist2d, ab_list, ac_list, basis_set_orb, basis_set_orb, &
997                                basis_set_ri, qs_env)
998         CALL release_neighbor_list_sets(ab_list)
999         CALL release_neighbor_list_sets(ac_list)
1000
1001         !get the ab and ac neighbor lists based on the optimized distribution
1002         CALL build_xas_tdp_ovlp_nl(ab_list, basis_set_orb, basis_set_orb, qs_env, ext_dist2d=opt_dist2d)
1003         CALL build_xas_tdp_3c_nl(ac_list, basis_set_orb, basis_set_ri, do_potential_id, &
1004                                  qs_env, excited_atoms=neighbors, ext_dist2d=opt_dist2d)
1005
1006         !get the AO density matrix in the optimized distribution
1007         CALL cp_dbcsr_dist2d_to_dist(opt_dist2d, opt_dbcsr_dist)
1008         ALLOCATE (rho_redist(nspins))
1009         DO ispin = 1, nspins
1010            ALLOCATE (rho_redist(ispin)%matrix)
1011            CALL dbcsr_create(rho_redist(ispin)%matrix, template=rho_ao(ispin)%matrix, dist=opt_dbcsr_dist)
1012            CALL dbcsr_complete_redistribute(rho_ao(ispin)%matrix, rho_redist(ispin)%matrix)
1013         END DO
1014
1015         !Compute the 3-center overlap integrals
1016         pot%potential_type = do_potential_id
1017
1018         CALL create_pqX_tensor(pqX, ab_list, ac_list, opt_dbcsr_dist, blk_size_orb, blk_size_orb, &
1019                                blk_size_ri)
1020         CALL fill_pqX_tensor(pqX, ab_list, ac_list, basis_set_orb, basis_set_orb, basis_set_ri, &
1021                              pot, qs_env)
1022
1023         !Compute the RI inverse overlap matrix on the reduced RI basis that spans the excited
1024         !atom and its neighbors, ri_sinv is replicated over all procs
1025         CALL get_exat_ri_sinv(ri_sinv, ri_mats, neighbors, idx_to_nb, basis_set_ri, qs_env)
1026
1027         !Do the actual contraction: coeff_y = sum_pq sum_x P_pq (phi_p phi_q xi_x) S_xy^-1
1028         !TODO: re-enable OMP parallelization once/if the tensor iterator becomes thread safe
1029
1030!!$OMP PARALLEL DEFAULT(NONE) &
1031!!$OMP SHARED (pqX,nspins,blk_size_ri,nnb,neighbors,ri_sinv,factor,xas_atom_env,iex,rho_redist,idx_to_nb)&
1032!!$OMP PRIVATE(iter,ind,blk,t_block,tensor_found,iatom,jatom,inb,prefac,ispin,work1,pmat_block, &
1033!!$OMP         pmat_blockt,pmat_found,pmat_foundt,i,jnb,ri_at,sinv_block,sinv_blockt,sinv_found,&
1034!!$OMP         sinv_foundt,work2,katom)
1035
1036         CALL dbcsr_t_iterator_start(iter, pqX)
1037         DO WHILE (dbcsr_t_iterator_blocks_left(iter))
1038            CALL dbcsr_t_iterator_next_block(iter, ind, blk)
1039!!$OMP CRITICAL (get_t_block)
1040            CALL dbcsr_t_get_block(pqX, ind, t_block, tensor_found)
1041!!$OMP END CRITICAL (get_t_block)
1042
1043            iatom = ind(1)
1044            jatom = ind(2)
1045            katom = ind(3)
1046            inb = idx_to_nb(katom)
1047
1048            !non-diagonal elements need to be counted twice
1049            prefac = 2.0_dp
1050            IF (iatom == jatom) prefac = 1.0_dp
1051
1052            DO ispin = 1, nspins
1053
1054               !rho_redist is symmetric, block can be in either location
1055               CALL dbcsr_get_block_p(rho_redist(ispin)%matrix, iatom, jatom, pmat_block, pmat_found)
1056               CALL dbcsr_get_block_p(rho_redist(ispin)%matrix, jatom, iatom, pmat_blockt, pmat_foundt)
1057               IF ((.NOT. pmat_found) .AND. (.NOT. pmat_foundt)) CYCLE
1058
1059               ALLOCATE (work1(blk_size_ri(katom), 1))
1060               work1 = 0.0_dp
1061
1062               !first contraction with the density matrix
1063               IF (pmat_found) THEN
1064                  DO i = 1, blk_size_ri(katom)
1065                     work1(i, 1) = prefac*SUM(pmat_block(:, :)*t_block(:, :, i))
1066                  END DO
1067               ELSE
1068                  DO i = 1, blk_size_ri(katom)
1069                     work1(i, 1) = prefac*SUM(TRANSPOSE(pmat_blockt(:, :))*t_block(:, :, i))
1070                  END DO
1071               END IF
1072
1073               !loop over neighbors
1074               DO jnb = 1, nnb
1075
1076                  ri_at = neighbors(jnb)
1077
1078                  !ri_sinv is a symmetric matrix => actual block is one of the two
1079                  CALL dbcsr_get_block_p(ri_sinv, inb, jnb, sinv_block, sinv_found)
1080                  CALL dbcsr_get_block_p(ri_sinv, jnb, inb, sinv_blockt, sinv_foundt)
1081                  IF ((.NOT. sinv_found) .AND. (.NOT. sinv_foundt)) CYCLE
1082
1083                  !second contraction with the inverse RI overlap
1084                  ALLOCATE (work2(SIZE(xas_atom_env%ri_dcoeff(ri_at, ispin, iex)%array)))
1085                  work2 = 0.0_dp
1086
1087                  IF (sinv_found) THEN
1088                     DO i = 1, blk_size_ri(katom)
1089                        CALL daxpy(SIZE(work2), factor*work1(i, 1), sinv_block(i, :), 1, work2(:), 1)
1090                     END DO
1091                  ELSE
1092                     DO i = 1, blk_size_ri(katom)
1093                        CALL daxpy(SIZE(work2), factor*work1(i, 1), sinv_blockt(:, i), 1, work2(:), 1)
1094                     END DO
1095                  END IF
1096
1097!!$OMP CRITICAL (ri_dcoeff_update)
1098                  xas_atom_env%ri_dcoeff(ri_at, ispin, iex)%array(:) = &
1099                     xas_atom_env%ri_dcoeff(ri_at, ispin, iex)%array(:) + work2(:)
1100!!$OMP END CRITICAL (ri_dcoeff_update)
1101
1102                  DEALLOCATE (work2)
1103               END DO !jnb
1104
1105               DEALLOCATE (work1)
1106            END DO
1107
1108            DEALLOCATE (t_block)
1109         END DO !iter
1110         CALL dbcsr_t_iterator_stop(iter)
1111!!$OMP END PARALLEL
1112
1113         !clean-up
1114         CALL dbcsr_release(ri_sinv)
1115         CALL dbcsr_t_destroy(pqX)
1116         CALL distribution_2d_release(opt_dist2d)
1117         CALL dbcsr_deallocate_matrix_set(rho_redist)
1118         CALL dbcsr_distribution_release(opt_dbcsr_dist)
1119         CALL release_neighbor_list_sets(ab_list)
1120         CALL release_neighbor_list_sets(ac_list)
1121         DEALLOCATE (neighbors, idx_to_nb)
1122
1123      END DO !iex
1124
1125      !making sure all procs have the same info
1126      DO iex = 1, nex
1127         DO ispin = 1, nspins
1128            DO iat = 1, natom
1129               IF ((.NOT. ANY(xas_atom_env%exat_neighbors(iex)%array == iat)) &
1130                   .AND. (.NOT. xas_atom_env%excited_atoms(iex) == iat)) CYCLE
1131               CALL mp_sum(xas_atom_env%ri_dcoeff(iat, ispin, iex)%array, para_env%group)
1132            END DO !iat
1133         END DO !ispin
1134      END DO !iex
1135
1136!  clean-up
1137      CALL dbcsr_deallocate_matrix_set(overlap)
1138      DEALLOCATE (basis_set_ri, basis_set_orb, all_ri_atoms)
1139
1140      CALL timestop(handle)
1141
1142   END SUBROUTINE calculate_density_coeffs
1143
1144! **************************************************************************************************
1145!> \brief Evaluates the density on a given atomic grid
1146!> \param rho_set where the densities are stored
1147!> \param ri_dcoeff the arrays containing the RI density coefficients of this atom, for each spin
1148!> \param atom_kind the kind of the atom in question
1149!> \param do_gga whether the gradient of the density should also be put on the grid
1150!> \param batch_info how the so are distributed
1151!> \param xas_atom_env ...
1152!> \param qs_env ...
1153!> \note The density is expressed as n = sum_d coeff_d*xi_d. Knowing the coordinate of each grid
1154!>       grid point, one can simply evaluate xi_d(r)
1155! **************************************************************************************************
1156   SUBROUTINE put_density_on_atomic_grid(rho_set, ri_dcoeff, atom_kind, do_gga, batch_info, &
1157                                         xas_atom_env, qs_env)
1158
1159      TYPE(xc_rho_set_type), POINTER                     :: rho_set
1160      TYPE(cp_1d_r_p_type), DIMENSION(:)                 :: ri_dcoeff
1161      INTEGER, INTENT(IN)                                :: atom_kind
1162      LOGICAL, INTENT(IN)                                :: do_gga
1163      TYPE(batch_info_type)                              :: batch_info
1164      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
1165      TYPE(qs_environment_type), POINTER                 :: qs_env
1166
1167      CHARACTER(len=*), PARAMETER :: routineN = 'put_density_on_atomic_grid', &
1168         routineP = moduleN//':'//routineN
1169
1170      INTEGER                                            :: dir, handle, ipgf, iset, iso, iso_proc, &
1171                                                            ispin, maxso, n, na, nr, nset, nsgfi, &
1172                                                            nsoi, nspins, sgfi, starti
1173      INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf, nsgf_set
1174      INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf
1175      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: so, work1, work2
1176      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: dso
1177      REAL(dp), DIMENSION(:, :), POINTER                 :: dgr1, dgr2, ga, gr, ri_sphi_so, zet
1178      REAL(dp), DIMENSION(:, :, :), POINTER              :: dga1, dga2, rhoa, rhob
1179      TYPE(cp_1d_r_p_type), DIMENSION(:), POINTER        :: ri_dcoeff_so
1180      TYPE(grid_atom_type), POINTER                      :: grid_atom
1181      TYPE(gto_basis_set_type), POINTER                  :: ri_basis
1182      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
1183
1184      NULLIFY (grid_atom, ri_basis, qs_kind_set, lmax, npgf, zet, nsgf_set, ri_sphi_so)
1185      NULLIFY (lmin, first_sgf, rhoa, rhob, ga, gr, dgr1, dgr2, dga1, dga2, ri_dcoeff_so)
1186
1187      CALL timeset(routineN, handle)
1188
1189!  Strategy: it makes sense to evaluate the spherical orbital on the grid (because of symmetry)
1190!            From there, one can directly contract into sgf using ri_sphi_so and then take the weight
1191!            The spherical orbital were precomputed and split in a purely radial and a purely
1192!            angular part. The full values on each grid point are obtain through gemm
1193
1194!  Generalities
1195      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set)
1196      CALL get_qs_kind(qs_kind_set(atom_kind), basis_set=ri_basis, basis_type="RI_XAS")
1197      CALL get_gto_basis_set(ri_basis, lmax=lmax, npgf=npgf, zet=zet, nset=nset, nsgf_set=nsgf_set, &
1198                             first_sgf=first_sgf, lmin=lmin, maxso=maxso)
1199
1200!  Get the grid and the info we need from it
1201      grid_atom => xas_atom_env%grid_atom_set(atom_kind)%grid_atom
1202      na = grid_atom%ng_sphere
1203      nr = grid_atom%nr
1204      n = na*nr
1205      nspins = xas_atom_env%nspins
1206      ri_sphi_so => xas_atom_env%ri_sphi_so(atom_kind)%array
1207
1208!  Point to the rho_set densities
1209      rhoa => rho_set%rhoa
1210      rhob => rho_set%rhob
1211      rhoa = 0.0_dp; rhob = 0.0_dp;
1212      IF (do_gga) THEN
1213         DO dir = 1, 3
1214            rho_set%drhoa(dir)%array = 0.0_dp
1215            rho_set%drhob(dir)%array = 0.0_dp
1216         END DO
1217      END IF
1218
1219!  Point to the precomputed SO
1220      ga => xas_atom_env%ga(atom_kind)%array
1221      gr => xas_atom_env%gr(atom_kind)%array
1222      IF (do_gga) THEN
1223         dga1 => xas_atom_env%dga1(atom_kind)%array
1224         dga2 => xas_atom_env%dga2(atom_kind)%array
1225         dgr1 => xas_atom_env%dgr1(atom_kind)%array
1226         dgr2 => xas_atom_env%dgr2(atom_kind)%array
1227      END IF
1228
1229!  Need to express the ri_dcoeffs in terms of so (and not sgf)
1230      ALLOCATE (ri_dcoeff_so(nspins))
1231      DO ispin = 1, nspins
1232         ALLOCATE (ri_dcoeff_so(ispin)%array(nset*maxso))
1233         ri_dcoeff_so(ispin)%array = 0.0_dp
1234
1235         !for a given so, loop over sgf and sum
1236         DO iset = 1, nset
1237            sgfi = first_sgf(1, iset)
1238            nsoi = npgf(iset)*nsoset(lmax(iset))
1239            nsgfi = nsgf_set(iset)
1240            ALLOCATE (work1(nsoi, 1), work2(nsgfi, 1))
1241
1242            work2(:, 1) = ri_dcoeff(ispin)%array(sgfi:sgfi + nsgfi - 1)
1243            CALL dgemm('N', 'N', nsoi, 1, nsgfi, 1.0_dp, ri_sphi_so(1:nsoi, sgfi:sgfi + nsgfi - 1), &
1244                       nsoi, work2, nsgfi, 0.0_dp, work1, nsoi)
1245            ri_dcoeff_so(ispin)%array((iset - 1)*maxso + 1:(iset - 1)*maxso + nsoi) = work1(:, 1)
1246
1247            DEALLOCATE (work1, work2)
1248         END DO
1249
1250      END DO
1251
1252      !allocate space to store the spherical orbitals on the grid
1253      ALLOCATE (so(na, nr))
1254      IF (do_gga) ALLOCATE (dso(na, nr, 3))
1255
1256!  Loop over the spherical orbitals on this proc
1257      DO iso_proc = 1, batch_info%nso_proc(atom_kind)
1258         iset = batch_info%so_proc_info(atom_kind)%array(1, iso_proc)
1259         ipgf = batch_info%so_proc_info(atom_kind)%array(2, iso_proc)
1260         iso = batch_info%so_proc_info(atom_kind)%array(3, iso_proc)
1261         IF (iso < 0) CYCLE
1262
1263         starti = (iset - 1)*maxso + (ipgf - 1)*nsoset(lmax(iset))
1264
1265         !the spherical orbital on the grid
1266         CALL dgemm('N', 'T', na, nr, 1, 1.0_dp, ga(:, iso_proc:iso_proc), na, &
1267                    gr(:, iso_proc:iso_proc), nr, 0.0_dp, so(:, :), na)
1268
1269         !the gradient on the grid
1270         IF (do_gga) THEN
1271
1272            DO dir = 1, 3
1273               CALL dgemm('N', 'T', na, nr, 1, 1.0_dp, dga1(:, iso_proc:iso_proc, dir), na, &
1274                          dgr1(:, iso_proc:iso_proc), nr, 0.0_dp, dso(:, :, dir), na)
1275               CALL dgemm('N', 'T', na, nr, 1, 1.0_dp, dga2(:, iso_proc:iso_proc, dir), na, &
1276                          dgr2(:, iso_proc:iso_proc), nr, 1.0_dp, dso(:, :, dir), na)
1277            END DO
1278         END IF
1279
1280         !put the so on the grid with the approriate coefficients and sum
1281         CALL daxpy(n, ri_dcoeff_so(1)%array(starti + iso), so(:, :), 1, rhoa(:, :, 1), 1)
1282
1283         IF (nspins == 2) THEN
1284            CALL daxpy(n, ri_dcoeff_so(2)%array(starti + iso), so(:, :), 1, rhob(:, :, 1), 1)
1285         END IF
1286
1287         IF (do_gga) THEN
1288
1289            !put the gradient of the so on the grid with correspond RI coeff
1290            DO dir = 1, 3
1291               CALL daxpy(n, ri_dcoeff_so(1)%array(starti + iso), dso(:, :, dir), &
1292                          1, rho_set%drhoa(dir)%array(:, :, 1), 1)
1293
1294               IF (nspins == 2) THEN
1295                  CALL daxpy(n, ri_dcoeff_so(2)%array(starti + iso), dso(:, :, dir), &
1296                             1, rho_set%drhob(dir)%array(:, :, 1), 1)
1297               END IF
1298            END DO !dir
1299         END IF !do_gga
1300
1301      END DO
1302
1303! Treat spin restricted case (=> copy alpha into beta)
1304      IF (nspins == 1) THEN
1305         CALL dcopy(n, rhoa(:, :, 1), 1, rhob(:, :, 1), 1)
1306
1307         IF (do_gga) THEN
1308            DO dir = 1, 3
1309               CALL dcopy(n, rho_set%drhoa(dir)%array(:, :, 1), 1, rho_set%drhob(dir)%array(:, :, 1), 1)
1310            END DO
1311         END IF
1312      END IF
1313
1314! Note: sum over procs is done outside
1315
1316!  clean-up
1317      DO ispin = 1, nspins
1318         DEALLOCATE (ri_dcoeff_so(ispin)%array)
1319      END DO
1320      DEALLOCATE (ri_dcoeff_so)
1321
1322      CALL timestop(handle)
1323
1324   END SUBROUTINE put_density_on_atomic_grid
1325
1326! **************************************************************************************************
1327!> \brief Adds the density of a given source atom with source kind (with ri_dcoeff) on the atomic
1328!>        grid belonging to another target atom of target kind. The evaluations of the basis
1329!>        function first requires the evaluation of the x,y,z coordinates on each grid point of
1330!>        target atom wrt to the position of source atom
1331!> \param rho_set where the densities are stored
1332!> \param ri_dcoeff the arrays containing the RI density coefficient of source_iat, for each spin
1333!> \param source_iat the index of the source atom
1334!> \param source_ikind the kind of the source atom
1335!> \param target_iat the index of the target atom
1336!> \param target_ikind the kind of the target atom
1337!> \param sr starting r index for the local grid
1338!> \param er ending r index for the local grid
1339!> \param do_gga whether the gradient of the density is needed
1340!> \param xas_atom_env ...
1341!> \param qs_env ...
1342! **************************************************************************************************
1343   SUBROUTINE put_density_on_other_grid(rho_set, ri_dcoeff, source_iat, source_ikind, target_iat, &
1344                                        target_ikind, sr, er, do_gga, xas_atom_env, qs_env)
1345
1346      TYPE(xc_rho_set_type), POINTER                     :: rho_set
1347      TYPE(cp_1d_r_p_type), DIMENSION(:)                 :: ri_dcoeff
1348      INTEGER, INTENT(IN)                                :: source_iat, source_ikind, target_iat, &
1349                                                            target_ikind, sr, er
1350      LOGICAL, INTENT(IN)                                :: do_gga
1351      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
1352      TYPE(qs_environment_type), POINTER                 :: qs_env
1353
1354      CHARACTER(len=*), PARAMETER :: routineN = 'put_density_on_other_grid', &
1355         routineP = moduleN//':'//routineN
1356
1357      INTEGER                                            :: dir, handle, ia, ico, ipgf, ir, iset, &
1358                                                            isgf, lx, ly, lz, n, na, nr, nset, &
1359                                                            nspins, sgfi, start
1360      INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf, nsgf_set
1361      INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf
1362      REAL(dp)                                           :: rmom
1363      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: sgf
1364      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: co, dsgf, pos
1365      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :, :)       :: dco
1366      REAL(dp), DIMENSION(3)                             :: rs, rst, rt
1367      REAL(dp), DIMENSION(:, :), POINTER                 :: ri_sphi, zet
1368      REAL(dp), DIMENSION(:, :, :), POINTER              :: rhoa, rhob
1369      TYPE(cell_type), POINTER                           :: cell
1370      TYPE(grid_atom_type), POINTER                      :: grid_atom
1371      TYPE(gto_basis_set_type), POINTER                  :: ri_basis
1372      TYPE(harmonics_atom_type), POINTER                 :: harmonics
1373      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
1374      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
1375
1376      NULLIFY (qs_kind_set, ri_basis, lmax, npgf, nsgf_set, lmin, zet, first_sgf, grid_atom)
1377      NULLIFY (harmonics, rhoa, rhob, particle_set, cell, ri_sphi)
1378
1379      !Same logic as the  put_density_on_own_grid routine. Loop over orbitals, put them on the grid,
1380      !contract into sgf and daxpy with coeff. Notable difference: use cartesian orbitals instead of
1381      !spherical, since the center of the grid is not the origin and thus, spherical symmetry can't
1382      !be exploited so well
1383
1384      CALL timeset(routineN, handle)
1385
1386      !Generalities
1387      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set, particle_set=particle_set, cell=cell)
1388      !want basis of the source atom
1389      CALL get_qs_kind(qs_kind_set(source_ikind), basis_set=ri_basis, basis_type="RI_XAS")
1390      CALL get_gto_basis_set(ri_basis, lmax=lmax, npgf=npgf, zet=zet, nset=nset, nsgf_set=nsgf_set, &
1391                             first_sgf=first_sgf, lmin=lmin, sphi=ri_sphi)
1392
1393      ! Want the grid and harmonics of the target atom
1394      grid_atom => xas_atom_env%grid_atom_set(target_ikind)%grid_atom
1395      harmonics => xas_atom_env%harmonics_atom_set(target_ikind)%harmonics_atom
1396      na = grid_atom%ng_sphere
1397      nr = er - sr + 1
1398      n = na*nr
1399      nspins = xas_atom_env%nspins
1400
1401      !  Point to the rho_set densities
1402      rhoa => rho_set%rhoa
1403      rhob => rho_set%rhob
1404
1405      !  Need the source-target position vector
1406      rs = pbc(particle_set(source_iat)%r, cell)
1407      rt = pbc(particle_set(target_iat)%r, cell)
1408      rst = pbc(rs, rt, cell)
1409
1410      ! Precompute the positions on the target grid
1411      ALLOCATE (pos(na, sr:er, 4))
1412!$OMP PARALLEL DO COLLAPSE(2) SCHEDULE(STATIC) DEFAULT(NONE), &
1413!$OMP SHARED(na,sr,er,pos,harmonics,grid_atom,rst), &
1414!$OMP PRIVATE(ia,ir)
1415      DO ir = sr, er
1416         DO ia = 1, na
1417            pos(ia, ir, 1:3) = harmonics%a(:, ia)*grid_atom%rad(ir) + rst
1418            pos(ia, ir, 4) = pos(ia, ir, 1)**2 + pos(ia, ir, 2)**2 + pos(ia, ir, 3)**2
1419         END DO
1420      END DO
1421!$OMP END PARALLEL DO
1422
1423      ! Loop over the cartesian gaussian functions and evaluate them
1424      DO iset = 1, nset
1425
1426         !allocate space to store the cartesian orbtial on the grid
1427         ALLOCATE (co(na, sr:er, npgf(iset)*ncoset(lmax(iset))))
1428         IF (do_gga) ALLOCATE (dco(na, sr:er, 3, npgf(iset)*ncoset(lmax(iset))))
1429
1430!$OMP PARALLEL DEFAULT(NONE), &
1431!$OMP SHARED(co,npgf,ncoset,lmax,lmin,indco,pos,zet,iset,na,sr,er,do_gga,dco), &
1432!$OMP PRIVATE(ipgf,start,ico,lx,ly,lz,ia,ir,rmom)
1433
1434!$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
1435         DO ir = sr, er
1436            DO ia = 1, na
1437               co(ia, ir, :) = 0.0_dp
1438               IF (do_gga) THEN
1439                  dco(ia, ir, :, :) = 0.0_dp
1440               END IF
1441            END DO
1442         END DO
1443!$OMP END DO NOWAIT
1444
1445         DO ipgf = 1, npgf(iset)
1446            start = (ipgf - 1)*ncoset(lmax(iset))
1447
1448            !loop over the cartesian orbitals
1449            DO ico = ncoset(lmin(iset) - 1) + 1, ncoset(lmax(iset))
1450               lx = indco(1, ico)
1451               ly = indco(2, ico)
1452               lz = indco(3, ico)
1453
1454               ! compute g = x**lx * y**ly * z**lz * exp(-zet * r**2)
1455!$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
1456               DO ir = sr, er
1457                  DO ia = 1, na
1458                     rmom = EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1459                     IF (lx /= 0) rmom = rmom*pos(ia, ir, 1)**lx
1460                     IF (ly /= 0) rmom = rmom*pos(ia, ir, 2)**ly
1461                     IF (lz /= 0) rmom = rmom*pos(ia, ir, 3)**lz
1462                     co(ia, ir, start + ico) = rmom
1463                     !co(ia, ir, start + ico) = pos(ia, ir, 1)**lx*pos(ia, ir, 2)**ly*pos(ia, ir, 3)**lz &
1464                     !                          *EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1465                  END DO
1466               END DO
1467!$OMP END DO NOWAIT
1468
1469               IF (do_gga) THEN
1470                  !the gradient: dg_x = lx*x**(lx-1) * y**ly * z**lz * exp(-zet * r**2)
1471                  !                     -2*zet* x**(lx+1) * y**ly * z**lz * exp(-zet * r**2)
1472                  !                   = (lx*x**(lx-1) - 2*zet*x**(lx+1)) * y**ly * z**lz * exp(-zet * r**2)
1473
1474                  !x direction, special case if lx == 0
1475                  IF (lx == 0) THEN
1476!$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
1477                     DO ir = sr, er
1478                        DO ia = 1, na
1479                           rmom = -2.0_dp*pos(ia, ir, 1)*zet(ipgf, iset)*EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1480                           IF (ly /= 0) rmom = rmom*pos(ia, ir, 2)**ly
1481                           IF (lz /= 0) rmom = rmom*pos(ia, ir, 3)**lz
1482                           dco(ia, ir, 1, start + ico) = rmom
1483!                          dco(ia, ir, 1, start + ico) = -2.0_dp*pos(ia, ir, 1)*zet(ipgf, iset) &
1484!                                                        *pos(ia, ir, 2)**ly*pos(ia, ir, 3)**lz &
1485!                                                        *EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1486                        END DO
1487                     END DO
1488!$OMP END DO NOWAIT
1489                  ELSE
1490!$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
1491                     DO ir = sr, er
1492                        DO ia = 1, na
1493                           IF (lx /= 1) THEN
1494                              rmom = (lx*pos(ia, ir, 1)**(lx - 1) - 2.0_dp*pos(ia, ir, 1)**(lx + 1)* &
1495                                      zet(ipgf, iset))*EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1496                           ELSE
1497                              rmom = (1.0_dp - 2.0_dp*pos(ia, ir, 1)**2*zet(ipgf, iset))* &
1498                                     EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1499                           END IF
1500                           IF (ly /= 0) rmom = rmom*pos(ia, ir, 2)**ly
1501                           IF (lz /= 0) rmom = rmom*pos(ia, ir, 3)**lz
1502                           dco(ia, ir, 1, start + ico) = rmom
1503!                          dco(ia, ir, 1, start + ico) = (lx*pos(ia, ir, 1)**(lx - 1) &
1504!                                                         - 2.0_dp*pos(ia, ir, 1)**(lx + 1)*zet(ipgf, iset)) &
1505!                                                        *pos(ia, ir, 2)**ly*pos(ia, ir, 3)**lz &
1506!                                                        *EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1507                        END DO
1508                     END DO
1509!$OMP END DO NOWAIT
1510                  END IF !lx == 0
1511
1512                  !y direction, special case if ly == 0
1513                  IF (ly == 0) THEN
1514!$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
1515                     DO ir = sr, er
1516                        DO ia = 1, na
1517                           rmom = -2.0_dp*pos(ia, ir, 2)*zet(ipgf, iset)*EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1518                           IF (lx /= 0) rmom = rmom*pos(ia, ir, 1)**lx
1519                           IF (lz /= 0) rmom = rmom*pos(ia, ir, 3)**lz
1520                           dco(ia, ir, 2, start + ico) = rmom
1521!                          dco(ia, ir, 2, start + ico) = -2.0_dp*pos(ia, ir, 2)*zet(ipgf, iset) &
1522!                                                        *pos(ia, ir, 1)**lx*pos(ia, ir, 3)**lz &
1523!                                                        *EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1524                        END DO
1525                     END DO
1526!$OMP END DO NOWAIT
1527                  ELSE
1528!$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
1529                     DO ir = sr, er
1530                        DO ia = 1, na
1531                           IF (ly /= 1) THEN
1532                              rmom = (ly*pos(ia, ir, 2)**(ly - 1) - 2.0_dp*pos(ia, ir, 2)**(ly + 1)*zet(ipgf, iset)) &
1533                                     *EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1534                           ELSE
1535                              rmom = (1.0_dp - 2.0_dp*pos(ia, ir, 2)**2*zet(ipgf, iset)) &
1536                                     *EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1537                           END IF
1538                           IF (lx /= 0) rmom = rmom*pos(ia, ir, 1)**lx
1539                           IF (lz /= 0) rmom = rmom*pos(ia, ir, 3)**lz
1540                           dco(ia, ir, 2, start + ico) = rmom
1541!                          dco(ia, ir, 2, start + ico) = (ly*pos(ia, ir, 2)**(ly - 1) &
1542!                                                         - 2.0_dp*pos(ia, ir, 2)**(ly + 1)*zet(ipgf, iset)) &
1543!                                                        *pos(ia, ir, 1)**lx*pos(ia, ir, 3)**lz &
1544!                                                        *EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1545                        END DO
1546                     END DO
1547!$OMP END DO NOWAIT
1548                  END IF !ly == 0
1549
1550                  !z direction, special case if lz == 0
1551                  IF (lz == 0) THEN
1552!$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
1553                     DO ir = sr, er
1554                        DO ia = 1, na
1555                           rmom = -2.0_dp*pos(ia, ir, 3)*zet(ipgf, iset)*EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1556                           IF (lx /= 0) rmom = rmom*pos(ia, ir, 1)**lx
1557                           IF (ly /= 0) rmom = rmom*pos(ia, ir, 2)**ly
1558                           dco(ia, ir, 3, start + ico) = rmom
1559!                          dco(ia, ir, 3, start + ico) = -2.0_dp*pos(ia, ir, 3)*zet(ipgf, iset) &
1560!                                                        *pos(ia, ir, 1)**lx*pos(ia, ir, 2)**ly &
1561!                                                        *EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1562                        END DO
1563                     END DO
1564!$OMP END DO NOWAIT
1565                  ELSE
1566!$OMP DO COLLAPSE(2) SCHEDULE(STATIC)
1567                     DO ir = sr, er
1568                        DO ia = 1, na
1569                           IF (lz /= 1) THEN
1570                              rmom = (lz*pos(ia, ir, 3)**(lz - 1) - 2.0_dp*pos(ia, ir, 3)**(lz + 1)* &
1571                                      zet(ipgf, iset))*EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1572                           ELSE
1573                              rmom = (1.0_dp - 2.0_dp*pos(ia, ir, 3)**2*zet(ipgf, iset))* &
1574                                     EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1575                           END IF
1576                           IF (lx /= 0) rmom = rmom*pos(ia, ir, 1)**lx
1577                           IF (ly /= 0) rmom = rmom*pos(ia, ir, 2)**ly
1578                           dco(ia, ir, 3, start + ico) = rmom
1579!                          dco(ia, ir, 3, start + ico) = (lz*pos(ia, ir, 3)**(lz - 1) &
1580!                                                         - 2.0_dp*pos(ia, ir, 3)**(lz + 1)*zet(ipgf, iset)) &
1581!                                                        *pos(ia, ir, 1)**lx*pos(ia, ir, 2)**ly &
1582!                                                        *EXP(-zet(ipgf, iset)*pos(ia, ir, 4))
1583                        END DO
1584                     END DO
1585!$OMP END DO NOWAIT
1586                  END IF !lz == 0
1587
1588               END IF !gga
1589
1590            END DO !ico
1591         END DO !ipgf
1592
1593!$OMP END PARALLEL
1594
1595         !contract the co into sgf
1596         ALLOCATE (sgf(na, sr:er))
1597         IF (do_gga) ALLOCATE (dsgf(na, sr:er, 3))
1598         sgfi = first_sgf(1, iset) - 1
1599
1600         DO isgf = 1, nsgf_set(iset)
1601            sgf = 0.0_dp
1602            IF (do_gga) dsgf = 0.0_dp
1603
1604            DO ipgf = 1, npgf(iset)
1605               start = (ipgf - 1)*ncoset(lmax(iset))
1606               DO ico = ncoset(lmin(iset) - 1) + 1, ncoset(lmax(iset))
1607                  CALL daxpy(n, ri_sphi(start + ico, sgfi + isgf), co(:, sr:er, start + ico), 1, sgf(:, sr:er), 1)
1608               END DO !ico
1609            END DO !ipgf
1610
1611            !add the density to the grid
1612            CALL daxpy(n, ri_dcoeff(1)%array(sgfi + isgf), sgf(:, sr:er), 1, rhoa(:, sr:er, 1), 1)
1613
1614            IF (nspins == 2) THEN
1615               CALL daxpy(n, ri_dcoeff(2)%array(sgfi + isgf), sgf(:, sr:er), 1, rhob(:, sr:er, 1), 1)
1616            END IF
1617
1618            !deal with the gradient
1619            IF (do_gga) THEN
1620
1621               DO ipgf = 1, npgf(iset)
1622                  start = (ipgf - 1)*ncoset(lmax(iset))
1623                  DO ico = ncoset(lmin(iset) - 1) + 1, ncoset(lmax(iset))
1624                     DO dir = 1, 3
1625                        CALL daxpy(n, ri_sphi(start + ico, sgfi + isgf), dco(:, sr:er, dir, start + ico), &
1626                                   1, dsgf(:, sr:er, dir), 1)
1627                     END DO
1628                  END DO !ico
1629               END DO !ipgf
1630
1631               DO dir = 1, 3
1632                  CALL daxpy(n, ri_dcoeff(1)%array(sgfi + isgf), dsgf(:, sr:er, dir), 1, &
1633                             rho_set%drhoa(dir)%array(:, sr:er, 1), 1)
1634
1635                  IF (nspins == 2) THEN
1636                     CALL daxpy(n, ri_dcoeff(2)%array(sgfi + isgf), dsgf(:, sr:er, dir), 1, &
1637                                rho_set%drhob(dir)%array(:, sr:er, 1), 1)
1638                  END IF
1639               END DO
1640            END IF !do_gga
1641
1642         END DO !isgf
1643
1644         DEALLOCATE (co, sgf)
1645         IF (do_gga) DEALLOCATE (dco, dsgf)
1646      END DO !iset
1647
1648      !Treat spin-restricted case (copy alpha into beta)
1649      IF (nspins == 1) THEN
1650         CALL dcopy(n, rhoa(:, sr:er, 1), 1, rhob(:, sr:er, 1), 1)
1651
1652         IF (do_gga) THEN
1653            DO dir = 1, 3
1654               CALL dcopy(n, rho_set%drhoa(dir)%array(:, sr:er, 1), 1, rho_set%drhob(dir)%array(:, sr:er, 1), 1)
1655            END DO
1656         END IF
1657      END IF
1658
1659      CALL timestop(handle)
1660
1661   END SUBROUTINE put_density_on_other_grid
1662
1663! **************************************************************************************************
1664!> \brief Computes the norm of the density gradient on the atomic grid
1665!> \param rho_set ...
1666!> \param atom_kind ...
1667!> \param xas_atom_env ...
1668!> \note GGA is assumed
1669! **************************************************************************************************
1670   SUBROUTINE compute_norm_drho(rho_set, atom_kind, xas_atom_env)
1671
1672      TYPE(xc_rho_set_type), POINTER                     :: rho_set
1673      INTEGER, INTENT(IN)                                :: atom_kind
1674      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
1675
1676      CHARACTER(len=*), PARAMETER :: routineN = 'compute_norm_drho', &
1677         routineP = moduleN//':'//routineN
1678
1679      INTEGER                                            :: dir, ia, ir, n, na, nr, nspins
1680
1681      na = xas_atom_env%grid_atom_set(atom_kind)%grid_atom%ng_sphere
1682      nr = xas_atom_env%grid_atom_set(atom_kind)%grid_atom%nr
1683      n = na*nr
1684      nspins = xas_atom_env%nspins
1685
1686      rho_set%norm_drhoa = 0.0_dp
1687      rho_set%norm_drhob = 0.0_dp
1688      rho_set%norm_drho = 0.0_dp
1689
1690      DO dir = 1, 3
1691         DO ir = 1, nr
1692            DO ia = 1, na
1693               rho_set%norm_drhoa(ia, ir, 1) = rho_set%norm_drhoa(ia, ir, 1) &
1694                                               + rho_set%drhoa(dir)%array(ia, ir, 1)**2
1695            END DO !ia
1696         END DO !ir
1697      END DO !dir
1698      rho_set%norm_drhoa = SQRT(rho_set%norm_drhoa)
1699
1700      IF (nspins == 1) THEN
1701         !spin-restricted
1702         CALL dcopy(n, rho_set%norm_drhoa(:, :, 1), 1, rho_set%norm_drhob(:, :, 1), 1)
1703      ELSE
1704         DO dir = 1, 3
1705            DO ir = 1, nr
1706               DO ia = 1, na
1707                  rho_set%norm_drhob(ia, ir, 1) = rho_set%norm_drhob(ia, ir, 1) &
1708                                                  + rho_set%drhob(dir)%array(ia, ir, 1)**2
1709               END DO
1710            END DO
1711         END DO
1712         rho_set%norm_drhob = SQRT(rho_set%norm_drhob)
1713      END IF
1714
1715      DO dir = 1, 3
1716         DO ir = 1, nr
1717            DO ia = 1, na
1718               rho_set%norm_drho(ia, ir, 1) = rho_set%norm_drho(ia, ir, 1) + &
1719                                              (rho_set%drhoa(dir)%array(ia, ir, 1) + &
1720                                               rho_set%drhob(dir)%array(ia, ir, 1))**2
1721            END DO
1722         END DO
1723      END DO
1724      rho_set%norm_drho = SQRT(rho_set%norm_drho)
1725
1726   END SUBROUTINE compute_norm_drho
1727
1728! **************************************************************************************************
1729!> \brief Precomputes the spherical orbitals of the RI basis on the excited atom grids
1730!> \param do_gga whether the gradient needs to be computed for GGA or not
1731!> \param batch_info the parallelization info to complete with so distribution info
1732!> \param xas_atom_env ...
1733!> \param qs_env ...
1734!> \note the functions are split in a purely angular part of size na and a purely radial part of
1735!>       size nr. The full function on the grid can simply be obtained with dgemm and we save space
1736! **************************************************************************************************
1737   SUBROUTINE precompute_so_dso(do_gga, batch_info, xas_atom_env, qs_env)
1738
1739      LOGICAL, INTENT(IN)                                :: do_gga
1740      TYPE(batch_info_type)                              :: batch_info
1741      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
1742      TYPE(qs_environment_type), POINTER                 :: qs_env
1743
1744      CHARACTER(len=*), PARAMETER :: routineN = 'precompute_so_dso', &
1745         routineP = moduleN//':'//routineN
1746
1747      INTEGER                                            :: bo(2), dir, handle, ikind, ipgf, iset, &
1748                                                            iso, iso_proc, l, maxso, n, na, nkind, &
1749                                                            nr, nset, nso_proc, nsotot, starti
1750      INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf, nsgf_set
1751      INTEGER, DIMENSION(:, :), POINTER                  :: so_proc_info
1752      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: rexp
1753      REAL(dp), DIMENSION(:, :), POINTER                 :: dgr1, dgr2, ga, gr, slm, zet
1754      REAL(dp), DIMENSION(:, :, :), POINTER              :: dga1, dga2, dslm_dxyz
1755      TYPE(cp_para_env_type), POINTER                    :: para_env
1756      TYPE(grid_atom_type), POINTER                      :: grid_atom
1757      TYPE(gto_basis_set_type), POINTER                  :: ri_basis
1758      TYPE(harmonics_atom_type), POINTER                 :: harmonics
1759      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
1760
1761      NULLIFY (qs_kind_set, harmonics, grid_atom, slm, dslm_dxyz, ri_basis, lmax, lmin, npgf)
1762      NULLIFY (nsgf_set, zet, para_env, so_proc_info)
1763
1764      CALL timeset(routineN, handle)
1765
1766      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set, para_env=para_env)
1767      nkind = SIZE(qs_kind_set)
1768
1769      ALLOCATE (batch_info%so_proc_info(nkind))
1770      ALLOCATE (batch_info%nso_proc(nkind))
1771      ALLOCATE (batch_info%so_bo(2, nkind))
1772
1773      DO ikind = 1, nkind
1774
1775         NULLIFY (xas_atom_env%ga(ikind)%array)
1776         NULLIFY (xas_atom_env%gr(ikind)%array)
1777         NULLIFY (xas_atom_env%dga1(ikind)%array)
1778         NULLIFY (xas_atom_env%dga2(ikind)%array)
1779         NULLIFY (xas_atom_env%dgr1(ikind)%array)
1780         NULLIFY (xas_atom_env%dgr2(ikind)%array)
1781
1782         NULLIFY (batch_info%so_proc_info(ikind)%array)
1783
1784         IF (.NOT. ANY(xas_atom_env%excited_kinds == ikind)) CYCLE
1785
1786         !grid info
1787         harmonics => xas_atom_env%harmonics_atom_set(ikind)%harmonics_atom
1788         grid_atom => xas_atom_env%grid_atom_set(ikind)%grid_atom
1789
1790         na = grid_atom%ng_sphere
1791         nr = grid_atom%nr
1792         n = na*nr
1793
1794         slm => harmonics%slm
1795         dslm_dxyz => harmonics%dslm_dxyz
1796
1797         !basis info
1798         CALL get_qs_kind(qs_kind_set(ikind), basis_set=ri_basis, basis_type="RI_XAS")
1799         CALL get_gto_basis_set(ri_basis, lmax=lmax, npgf=npgf, zet=zet, nset=nset, &
1800                                nsgf_set=nsgf_set, lmin=lmin, maxso=maxso)
1801         nsotot = maxso*nset
1802
1803         !we split all so among the processors of the batch
1804         bo = get_limit(nsotot, batch_info%batch_size, batch_info%ipe)
1805         nso_proc = bo(2) - bo(1) + 1
1806         batch_info%so_bo(:, ikind) = bo
1807         batch_info%nso_proc(ikind) = nso_proc
1808
1809         !store info about the so's set, pgf and index
1810         ALLOCATE (batch_info%so_proc_info(ikind)%array(3, nso_proc))
1811         so_proc_info => batch_info%so_proc_info(ikind)%array
1812         so_proc_info = -1 !default is -1 => set so value to zero
1813         DO iset = 1, nset
1814            DO ipgf = 1, npgf(iset)
1815               starti = (iset - 1)*maxso + (ipgf - 1)*nsoset(lmax(iset))
1816               DO iso = nsoset(lmin(iset) - 1) + 1, nsoset(lmax(iset))
1817
1818                  !only consider so that are on this proc
1819                  IF (starti + iso < bo(1) .OR. starti + iso > bo(2)) CYCLE
1820                  iso_proc = starti + iso - bo(1) + 1
1821                  so_proc_info(1, iso_proc) = iset
1822                  so_proc_info(2, iso_proc) = ipgf
1823                  so_proc_info(3, iso_proc) = iso
1824
1825               END DO
1826            END DO
1827         END DO
1828
1829         !Put the gaussians and their gradient as purely angular or radial arrays
1830         ALLOCATE (xas_atom_env%ga(ikind)%array(na, nso_proc))
1831         ALLOCATE (xas_atom_env%gr(ikind)%array(nr, nso_proc))
1832         xas_atom_env%ga(ikind)%array = 0.0_dp; xas_atom_env%gr(ikind)%array = 0.0_dp
1833         IF (do_gga) THEN
1834            ALLOCATE (xas_atom_env%dga1(ikind)%array(na, nso_proc, 3))
1835            ALLOCATE (xas_atom_env%dgr1(ikind)%array(nr, nso_proc))
1836            ALLOCATE (xas_atom_env%dga2(ikind)%array(na, nso_proc, 3))
1837            ALLOCATE (xas_atom_env%dgr2(ikind)%array(nr, nso_proc))
1838            xas_atom_env%dga1(ikind)%array = 0.0_dp; xas_atom_env%dgr1(ikind)%array = 0.0_dp
1839            xas_atom_env%dga2(ikind)%array = 0.0_dp; xas_atom_env%dgr2(ikind)%array = 0.0_dp
1840         END IF
1841
1842         ga => xas_atom_env%ga(ikind)%array
1843         gr => xas_atom_env%gr(ikind)%array
1844         dga1 => xas_atom_env%dga1(ikind)%array
1845         dga2 => xas_atom_env%dga2(ikind)%array
1846         dgr1 => xas_atom_env%dgr1(ikind)%array
1847         dgr2 => xas_atom_env%dgr2(ikind)%array
1848
1849         ALLOCATE (rexp(nr))
1850
1851         DO iso_proc = 1, nso_proc
1852            iset = so_proc_info(1, iso_proc)
1853            ipgf = so_proc_info(2, iso_proc)
1854            iso = so_proc_info(3, iso_proc)
1855            IF (iso < 0) CYCLE
1856
1857            l = indso(1, iso)
1858
1859            !radial part of the gaussian
1860            rexp(1:nr) = EXP(-zet(ipgf, iset)*grid_atom%rad2(1:nr))
1861            gr(1:nr, iso_proc) = grid_atom%rad(1:nr)**l*rexp(1:nr)
1862
1863            !angular part of the gaussian
1864            ga(1:na, iso_proc) = slm(1:na, iso)
1865
1866            IF (do_gga) THEN
1867               !radial part of the gradient => same in all three direction
1868               dgr1(1:nr, iso_proc) = grid_atom%rad(1:nr)**(l - 1)*rexp(1:nr)
1869               dgr2(1:nr, iso_proc) = -2.0_dp*zet(ipgf, iset)*grid_atom%rad(1:nr)**(l + 1)*rexp(1:nr)
1870
1871               !angular part of the gradient
1872               DO dir = 1, 3
1873                  dga1(1:na, iso_proc, dir) = dslm_dxyz(dir, 1:na, iso)
1874                  dga2(1:na, iso_proc, dir) = harmonics%a(dir, 1:na)*slm(1:na, iso)
1875               END DO
1876            END IF
1877
1878         END DO !iso_proc
1879
1880      END DO !ikind
1881
1882      CALL timestop(handle)
1883
1884   END SUBROUTINE precompute_so_dso
1885
1886! **************************************************************************************************
1887!> \brief Integrate the xc kernel as a function of r on the atomic grids for the RI_XAS basis
1888!> \param int_fxc the global array containing the (P|fxc|Q) integrals, for all spin configurations
1889!> \param xas_atom_env ...
1890!> \param xas_tdp_control ...
1891!> \param qs_env ...
1892!> \note Note that if closed-shell, alpha-alpha term and beta-beta terms are the same
1893!>       Store the (P|fxc|Q) integrals on the processor they were computed on
1894!>       int_fxc(1)%matrix is alpha-alpha, 2: alpha-beta, 3: beta-beta
1895! **************************************************************************************************
1896   SUBROUTINE integrate_fxc_atoms(int_fxc, xas_atom_env, xas_tdp_control, qs_env)
1897
1898      TYPE(cp_2d_r_p_type), DIMENSION(:, :), POINTER     :: int_fxc
1899      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
1900      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
1901      TYPE(qs_environment_type), POINTER                 :: qs_env
1902
1903      CHARACTER(len=*), PARAMETER :: routineN = 'integrate_fxc_atoms', &
1904         routineP = moduleN//':'//routineN
1905
1906      INTEGER :: batch_size, dir, er, ex_bo(2), handle, i, iatom, ibatch, iex, ikind, inb, ipe, &
1907         mepos, na, natom, nb, nb_bo(2), nbatch, nbk, nex_atom, nr, num_pe, sr, subcomm
1908      INTEGER, DIMENSION(2, 3)                           :: bounds
1909      INTEGER, DIMENSION(:), POINTER                     :: exat_neighbors
1910      LOGICAL                                            :: do_gga, do_sc, do_sf
1911      TYPE(batch_info_type)                              :: batch_info
1912      TYPE(cp_1d_r_p_type), DIMENSION(:, :), POINTER     :: ri_dcoeff
1913      TYPE(cp_para_env_type), POINTER                    :: para_env
1914      TYPE(dft_control_type), POINTER                    :: dft_control
1915      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
1916      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
1917      TYPE(section_vals_type), POINTER                   :: input, xc_functionals
1918      TYPE(xc_derivative_set_type), POINTER              :: deriv_set
1919      TYPE(xc_rho_cflags_type)                           :: needs
1920      TYPE(xc_rho_set_type), POINTER                     :: rho_set
1921
1922      NULLIFY (particle_set, qs_kind_set, dft_control, deriv_set, para_env, exat_neighbors)
1923      NULLIFY (rho_set, input, xc_functionals)
1924
1925      CALL timeset(routineN, handle)
1926
1927!  Initialize
1928      CALL get_qs_env(qs_env, particle_set=particle_set, qs_kind_set=qs_kind_set, natom=natom, &
1929                      dft_control=dft_control, input=input, para_env=para_env)
1930      ALLOCATE (int_fxc(natom, 4))
1931      DO iatom = 1, natom
1932         DO i = 1, 4
1933            NULLIFY (int_fxc(iatom, i)%array)
1934         END DO
1935      END DO
1936      nex_atom = SIZE(xas_atom_env%excited_atoms)
1937      !spin conserving in the general sense here
1938      do_sc = xas_tdp_control%do_spin_cons .OR. xas_tdp_control%do_singlet .OR. xas_tdp_control%do_triplet
1939      do_sf = xas_tdp_control%do_spin_flip
1940
1941!  Get some info on the functionals
1942      xc_functionals => section_vals_get_subs_vals(input, "DFT%XAS_TDP%KERNEL%XC_FUNCTIONAL")
1943      ! ask for lsd in any case
1944      needs = xc_functionals_get_needs(xc_functionals, lsd=.TRUE., add_basic_components=.TRUE.)
1945      do_gga = needs%drho_spin !because either LDA or GGA, and the former does not need gradient
1946
1947!  Distribute the excited atoms over batches of processors
1948!  Then, the spherical orbital of the RI basis are distributed among the procs of the batch, making
1949!  the GGA integration very efficient
1950      num_pe = para_env%num_pe
1951      mepos = para_env%mepos
1952
1953      !create a batch_info_type
1954      CALL get_proc_batch_sizes(batch_size, nbatch, nex_atom, num_pe)
1955
1956      !the batch index
1957      ibatch = mepos/batch_size
1958      !the proc index within the batch
1959      ipe = MODULO(mepos, batch_size)
1960
1961      batch_info%batch_size = batch_size
1962      batch_info%nbatch = nbatch
1963      batch_info%ibatch = ibatch
1964      batch_info%ipe = ipe
1965
1966      !create a subcommunicator for this batch
1967      CALL mp_comm_split_direct(para_env%group, subcomm, ibatch)
1968      NULLIFY (batch_info%para_env)
1969      CALL cp_para_env_create(batch_info%para_env, subcomm)
1970
1971!  Precompute the spherical orbital of the RI basis (and maybe their gradient) on the grids of the
1972!  excited atoms. Needed for the GGA integration and to actually put the density on the grid
1973      CALL precompute_so_dso(do_gga, batch_info, xas_atom_env, qs_env)
1974
1975      !distribute the excted atoms over the batches
1976      ex_bo = get_limit(nex_atom, nbatch, ibatch)
1977
1978!  Looping over the excited atoms
1979      DO iex = ex_bo(1), ex_bo(2)
1980
1981         iatom = xas_atom_env%excited_atoms(iex)
1982         ikind = particle_set(iatom)%atomic_kind%kind_number
1983         exat_neighbors => xas_atom_env%exat_neighbors(iex)%array
1984         ri_dcoeff => xas_atom_env%ri_dcoeff(:, :, iex)
1985
1986!     General grid/basis info
1987         na = xas_atom_env%grid_atom_set(ikind)%grid_atom%ng_sphere
1988         nr = xas_atom_env%grid_atom_set(ikind)%grid_atom%nr
1989
1990!     Creating a xc_rho_set to store the density and dset for the kernel
1991         bounds(1:2, 1:3) = 1
1992         bounds(2, 1) = na
1993         bounds(2, 2) = nr
1994
1995         CALL xc_rho_set_create(rho_set=rho_set, local_bounds=bounds, &
1996                                rho_cutoff=dft_control%qs_control%eps_rho_rspace, &
1997                                drho_cutoff=dft_control%qs_control%eps_rho_rspace)
1998         CALL xc_dset_create(deriv_set, local_bounds=bounds)
1999
2000         ! allocate internals of the rho_set
2001         CALL xc_rho_set_atom_update(rho_set, needs, nspins=2, bo=bounds)
2002
2003!     Put the density, and possibly its gradient,  on the grid (for this atom)
2004         CALL put_density_on_atomic_grid(rho_set, ri_dcoeff(iatom, :), ikind, &
2005                                         do_gga, batch_info, xas_atom_env, qs_env)
2006
2007!     Take the neighboring atom contributions to the density (and gradient)
2008!     distribute the grid among the procs (for best load balance)
2009         nb_bo = get_limit(nr, batch_size, ipe)
2010         sr = nb_bo(1); er = nb_bo(2)
2011         DO inb = 1, SIZE(exat_neighbors)
2012
2013            nb = exat_neighbors(inb)
2014            nbk = particle_set(nb)%atomic_kind%kind_number
2015            CALL put_density_on_other_grid(rho_set, ri_dcoeff(nb, :), nb, nbk, iatom, &
2016                                           ikind, sr, er, do_gga, xas_atom_env, qs_env)
2017
2018         END DO
2019
2020         ! make sure contributions from different procs are summed up
2021         CALL mp_sum(rho_set%rhoa, batch_info%para_env%group)
2022         CALL mp_sum(rho_set%rhob, batch_info%para_env%group)
2023         IF (do_gga) THEN
2024            DO dir = 1, 3
2025               CALL mp_sum(rho_set%drhoa(dir)%array, batch_info%para_env%group)
2026               CALL mp_sum(rho_set%drhob(dir)%array, batch_info%para_env%group)
2027            END DO
2028         END IF
2029
2030!     In case of GGA, also need the norm of the density gradient
2031         IF (do_gga) CALL compute_norm_drho(rho_set, ikind, xas_atom_env)
2032
2033!     Compute the required derivatives
2034         CALL xc_functionals_eval(xc_functionals, lsd=.TRUE., rho_set=rho_set, deriv_set=deriv_set, &
2035                                  deriv_order=2)
2036
2037         !spin-conserving (LDA part)
2038         IF (do_sc) THEN
2039            CALL integrate_sc_fxc(int_fxc, iatom, ikind, deriv_set, xas_atom_env, qs_env)
2040         END IF
2041
2042         !spin-flip (LDA part)
2043         IF (do_sf) THEN
2044            CALL integrate_sf_fxc(int_fxc, iatom, ikind, rho_set, deriv_set, xas_atom_env, qs_env)
2045         END IF
2046
2047         !Gradient correction (note: spin-flip only keeps the lda part, aka ALDA0)
2048         IF (do_gga .AND. do_sc) THEN
2049            CALL integrate_gga_fxc(int_fxc, iatom, ikind, batch_info, rho_set, deriv_set, &
2050                                   xas_atom_env, qs_env)
2051         END IF
2052
2053!     Clean-up
2054         CALL xc_dset_release(deriv_set)
2055         CALL xc_rho_set_release(rho_set)
2056      END DO !iex
2057
2058      CALL release_batch_info(batch_info)
2059
2060      !Not necessary to sync, but makes sure that any load inbalance is reported here
2061      CALL mp_sync(para_env%group)
2062
2063      CALL timestop(handle)
2064
2065   END SUBROUTINE integrate_fxc_atoms
2066
2067! **************************************************************************************************
2068!> \brief Integrate the gradient correction part of the xc kernel on the atomic grid
2069!> \param int_fxc the array containing the (P|fxc|Q) integrals
2070!> \param iatom the index of the current excited atom
2071!> \param ikind the index of the current excited kind
2072!> \param batch_info how the so are distributed over the processor batch
2073!> \param rho_set the variable contatinind the density and its gradient
2074!> \param deriv_set the functional derivatives
2075!> \param xas_atom_env ...
2076!> \param qs_env ...
2077!> \note Ignored in case of pure LDA, added on top of the LDA kernel in case of GGA
2078! **************************************************************************************************
2079   SUBROUTINE integrate_gga_fxc(int_fxc, iatom, ikind, batch_info, rho_set, deriv_set, &
2080                                xas_atom_env, qs_env)
2081
2082      TYPE(cp_2d_r_p_type), DIMENSION(:, :), POINTER     :: int_fxc
2083      INTEGER, INTENT(IN)                                :: iatom, ikind
2084      TYPE(batch_info_type)                              :: batch_info
2085      TYPE(xc_rho_set_type), POINTER                     :: rho_set
2086      TYPE(xc_derivative_set_type), POINTER              :: deriv_set
2087      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
2088      TYPE(qs_environment_type), POINTER                 :: qs_env
2089
2090      CHARACTER(len=*), PARAMETER :: routineN = 'integrate_gga_fxc', &
2091         routineP = moduleN//':'//routineN
2092
2093      INTEGER                                            :: bo(2), dir, handle, i, ia, ir, jpgf, &
2094                                                            jset, jso, l, maxso, na, nr, nset, &
2095                                                            nsgf, nsoi, nsotot, startj, ub
2096      INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf
2097      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: rexp
2098      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: int_sgf, res, so, work
2099      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: dso
2100      REAL(dp), DIMENSION(:, :), POINTER                 :: dgr1, dgr2, ga, gr, ri_sphi_so, weight, &
2101                                                            zet
2102      REAL(dp), DIMENSION(:, :, :), POINTER              :: dga1, dga2
2103      TYPE(cp_2d_r_p_type), ALLOCATABLE, DIMENSION(:)    :: int_so, vxc
2104      TYPE(cp_3d_r_p_type), ALLOCATABLE, DIMENSION(:)    :: vxg
2105      TYPE(cp_para_env_type), POINTER                    :: para_env
2106      TYPE(grid_atom_type), POINTER                      :: grid_atom
2107      TYPE(gto_basis_set_type), POINTER                  :: ri_basis
2108      TYPE(harmonics_atom_type), POINTER                 :: harmonics
2109      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
2110
2111      NULLIFY (grid_atom, ri_basis, qs_kind_set, ga, gr, dgr1, dgr2, lmax, lmin, npgf)
2112      NULLIFY (weight, ri_sphi_so, dga1, dga2, para_env, harmonics, zet)
2113
2114      !Strategy: we need to compute <phi_i|fxc|phij>, most of existing application of the 2nd
2115      !          functional derivative involve the response density, and the expression of the
2116      !          integral int (fxc*n^1) is well known. We substitute the spherical orbital phi_j
2117      !          in place of n^1 in the formula and thus perform the first integration. Then
2118      !          we obtain something in the form int (fxc*phi_j) = Vxc - div (Vxg) that we can
2119      !          put on the grid and treat like a potential. The second integration is done by
2120      !          using the divergence theorem and numerical integration:
2121      !          <phi_i|fxc|phi_j> = int phi_i*(Vxc - div(Vxg)) = int phi_i*Vxc + grad(phi_i).Vxg
2122      !          Note the sign change and the dot product.
2123
2124      CALL timeset(routineN, handle)
2125
2126      !If closed shell, only compute f_aa and f_ab (ub = 2)
2127      ub = 2
2128      IF (xas_atom_env%nspins == 2) ub = 3
2129
2130      !Get the necessary grid info
2131      harmonics => xas_atom_env%harmonics_atom_set(ikind)%harmonics_atom
2132      grid_atom => xas_atom_env%grid_atom_set(ikind)%grid_atom
2133      na = grid_atom%ng_sphere
2134      nr = grid_atom%nr
2135      weight => grid_atom%weight
2136
2137      !get the ri_basis info
2138      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set, para_env=para_env)
2139      CALL get_qs_kind(qs_kind_set(ikind), basis_set=ri_basis, basis_type="RI_XAS")
2140
2141      CALL get_gto_basis_set(gto_basis_set=ri_basis, lmax=lmax, lmin=lmin, nsgf=nsgf, &
2142                             maxso=maxso, npgf=npgf, nset=nset, zet=zet)
2143      nsotot = nset*maxso
2144
2145      !Point to the precomputed so
2146      ga => xas_atom_env%ga(ikind)%array
2147      gr => xas_atom_env%gr(ikind)%array
2148      dgr1 => xas_atom_env%dgr1(ikind)%array
2149      dgr2 => xas_atom_env%dgr2(ikind)%array
2150      dga1 => xas_atom_env%dga1(ikind)%array
2151      dga2 => xas_atom_env%dga2(ikind)%array
2152
2153      !Before integration, wanna pre-divide all relevant derivastives by the nrom of the gradient
2154      CALL divide_by_norm_drho(deriv_set, rho_set, lsd=.TRUE.)
2155
2156      !Wanna integrate <phi_i|fxc|phi_j>, start looping over phi_j and do the first integration, then
2157      !collect vxc and vxg and loop over phi_i for the second integration
2158      !Note: we do not use the CG coefficients because they are only useful when there is a product
2159      !      of Gaussians, which is not really the case here
2160      !Note: the spherical orbitals for phi_i are distributed among the prcos of the current batch
2161
2162      ALLOCATE (so(na, nr))
2163      ALLOCATE (dso(na, nr, 3))
2164      ALLOCATE (rexp(nr))
2165
2166      ALLOCATE (vxc(ub))
2167      ALLOCATE (vxg(ub))
2168      ALLOCATE (int_so(ub))
2169      DO i = 1, ub
2170         ALLOCATE (vxc(i)%array(na, nr))
2171         ALLOCATE (vxg(i)%array(na, nr, 3))
2172         ALLOCATE (int_so(i)%array(nsotot, nsotot))
2173         vxc(i)%array = 0.0_dp; vxg(i)%array = 0.0_dp; int_so(i)%array = 0.0_dp
2174      END DO
2175
2176      DO jset = 1, nset
2177         DO jpgf = 1, npgf(jset)
2178            startj = (jset - 1)*maxso + (jpgf - 1)*nsoset(lmax(jset))
2179            DO jso = nsoset(lmin(jset) - 1) + 1, nsoset(lmax(jset))
2180               l = indso(1, jso)
2181
2182               !put the so phi_j and its gradient on the grid
2183               !more efficient to recompute it rather than mp_bcast each chunk
2184
2185               rexp(1:nr) = EXP(-zet(jpgf, jset)*grid_atom%rad2(1:nr))
2186!$OMP PARALLEL DO COLLAPSE(2) DEFAULT(NONE), &
2187!$OMP SHARED(nr,na,so,dso,grid_atom,l,rexp,harmonics,jso,zet,jset,jpgf), &
2188!$OMP PRIVATE(ir,ia,dir)
2189               DO ir = 1, nr
2190                  DO ia = 1, na
2191
2192                     !so
2193                     so(ia, ir) = grid_atom%rad(ir)**l*rexp(ir)*harmonics%slm(ia, jso)
2194
2195                     !dso
2196                     dso(ia, ir, :) = 0.0_dp
2197                     DO dir = 1, 3
2198                        dso(ia, ir, dir) = dso(ia, ir, dir) &
2199                                           + grid_atom%rad(ir)**(l - 1)*rexp(ir)*harmonics%dslm_dxyz(dir, ia, jso) &
2200                                           - 2.0_dp*zet(jpgf, jset)*grid_atom%rad(ir)**(l + 1)*rexp(ir) &
2201                                           *harmonics%a(dir, ia)*harmonics%slm(ia, jso)
2202                     END DO
2203                  END DO
2204               END DO
2205!$OMP END PARALLEL DO
2206
2207               !Perform the first integration (analytically)
2208               CALL get_vxc_vxg(vxc, vxg, so, dso, na, nr, rho_set, deriv_set, weight)
2209
2210               !For a given phi_j, compute the second integration with all phi_i at once
2211               !=> allows for efficient gemm to take place, especially since so are distributed
2212               nsoi = batch_info%nso_proc(ikind)
2213               bo = batch_info%so_bo(:, ikind)
2214               ALLOCATE (res(nsoi, nsoi), work(na, nsoi))
2215               res = 0.0_dp; work = 0.0_dp
2216
2217               DO i = 1, ub
2218
2219                  !integrate so*Vxc and store in the int_so
2220                  CALL dgemm('N', 'N', na, nsoi, nr, 1.0_dp, vxc(i)%array(:, :), na, &
2221                             gr(:, 1:nsoi), nr, 0.0_dp, work, na)
2222                  CALL dgemm('T', 'N', nsoi, nsoi, na, 1.0_dp, work, na, &
2223                             ga(:, 1:nsoi), na, 0.0_dp, res, nsoi)
2224                  int_so(i)%array(bo(1):bo(2), startj + jso) = get_diag(res)
2225
2226                  DO dir = 1, 3
2227
2228                     ! integrate and sum up Vxg*dso
2229                     CALL dgemm('N', 'N', na, nsoi, nr, 1.0_dp, vxg(i)%array(:, :, dir), na, &
2230                                dgr1(:, 1:nsoi), nr, 0.0_dp, work, na)
2231                     CALL dgemm('T', 'N', nsoi, nsoi, na, 1.0_dp, work, na, &
2232                                dga1(:, 1:nsoi, dir), na, 0.0_dp, res, nsoi)
2233                     CALL daxpy(nsoi, 1.0_dp, get_diag(res), 1, int_so(i)%array(bo(1):bo(2), startj + jso), 1)
2234
2235                     CALL dgemm('N', 'N', na, nsoi, nr, 1.0_dp, vxg(i)%array(:, :, dir), na, &
2236                                dgr2(:, 1:nsoi), nr, 0.0_dp, work, na)
2237                     CALL dgemm('T', 'N', nsoi, nsoi, na, 1.0_dp, work, na, &
2238                                dga2(:, 1:nsoi, dir), na, 0.0_dp, res, nsoi)
2239                     CALL daxpy(nsoi, 1.0_dp, get_diag(res), 1, int_so(i)%array(bo(1):bo(2), startj + jso), 1)
2240
2241                  END DO
2242
2243               END DO !i
2244               DEALLOCATE (res, work)
2245
2246            END DO !jso
2247         END DO !jpgf
2248      END DO !jset
2249
2250      !Contract into sgf and add to already computed LDA part of int_fxc
2251      ri_sphi_so => xas_atom_env%ri_sphi_so(ikind)%array
2252      ALLOCATE (int_sgf(nsgf, nsgf))
2253      DO i = 1, ub
2254         CALL mp_sum(int_so(i)%array, batch_info%para_env%group)
2255         CALL contract_so2sgf(int_sgf, int_so(i)%array, ri_basis, ri_sphi_so)
2256         CALL daxpy(nsgf*nsgf, 1.0_dp, int_sgf, 1, int_fxc(iatom, i)%array, 1)
2257      END DO
2258
2259      !Clean-up
2260      DO i = 1, ub
2261         DEALLOCATE (vxc(i)%array)
2262         DEALLOCATE (vxg(i)%array)
2263         DEALLOCATE (int_so(i)%array)
2264      END DO
2265      DEALLOCATE (vxc, vxg, int_so)
2266
2267      CALL timestop(handle)
2268
2269   END SUBROUTINE integrate_gga_fxc
2270
2271! **************************************************************************************************
2272!> \brief Computes the first integration of the GGA part of <phi_i|fxc|phi_j>, i.e. int fxc*phi_j.
2273!>        The result is of the form Vxc - div(Vxg). Up to 3 results are returned, correspoinding to
2274!>        f_aa, f_ab and (if open-shell) f_bb
2275!> \param vxc ...
2276!> \param vxg ...
2277!> \param so the spherical orbital on the grid
2278!> \param dso the derivative of the spherical orbital on the grid
2279!> \param na ...
2280!> \param nr ...
2281!> \param rho_set ...
2282!> \param deriv_set ...
2283!> \param weight the grid weight
2284!> \note This method is extremely similar to xc_calc_2nd_deriv of xc.F, but because it is a special
2285!>       case that can be further optimized and because the interface of the original routine does
2286!>       not fit this code, it has been re-written (no pw, no rho1_set but just the so, etc...)
2287! **************************************************************************************************
2288   SUBROUTINE get_vxc_vxg(vxc, vxg, so, dso, na, nr, rho_set, deriv_set, weight)
2289
2290      TYPE(cp_2d_r_p_type), DIMENSION(:)                 :: vxc
2291      TYPE(cp_3d_r_p_type), DIMENSION(:)                 :: vxg
2292      REAL(dp), DIMENSION(:, :)                          :: so
2293      REAL(dp), DIMENSION(:, :, :)                       :: dso
2294      INTEGER, INTENT(IN)                                :: na, nr
2295      TYPE(xc_rho_set_type), POINTER                     :: rho_set
2296      TYPE(xc_derivative_set_type), POINTER              :: deriv_set
2297      REAL(dp), DIMENSION(:, :), POINTER                 :: weight
2298
2299      CHARACTER(len=*), PARAMETER :: routineN = 'get_vxc_vxg', routineP = moduleN//':'//routineN
2300
2301      INTEGER                                            :: dir, handle, i, ia, ir, ub
2302      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: dot_proda, dot_prodb, tmp
2303      REAL(dp), DIMENSION(:, :, :), POINTER              :: d1e, d2e, norm_drhoa, norm_drhob
2304      TYPE(xc_derivative_type), POINTER                  :: deriv
2305
2306      NULLIFY (norm_drhoa, norm_drhob, d2e, d1e, deriv)
2307
2308      CALL timeset(routineN, handle)
2309
2310      !Note: this routines follows the order of the terms in equaiton (A.7) of Thomas Chassaing
2311      !      thesis, except for the pure LDA terms that are dropped. The n^(1)_a and n^(1)_b
2312      !      response densities are replaced by the spherical orbital.
2313      !      The usual spin ordering is used: aa => 1, ab => 2 , bb => 3
2314
2315      !point to the relevant components of rho_set
2316      ub = SIZE(vxc)
2317      norm_drhoa => rho_set%norm_drhoa
2318      norm_drhob => rho_set%norm_drhob
2319
2320      !Some init
2321      DO i = 1, ub
2322         vxc(i)%array = 0.0_dp
2323         vxg(i)%array = 0.0_dp
2324      END DO
2325
2326      ALLOCATE (tmp(na, nr), dot_proda(na, nr), dot_prodb(na, nr))
2327      dot_proda = 0.0_dp; dot_prodb = 0.0_dp
2328
2329      !Strategy: most terms are either multiplied by drhoa or drhob => group those first and then
2330      !          multiply. Also most terms are multiplied by the dot product grad_n . grad_so, so
2331      !          precompute it as well
2332
2333!$OMP PARALLEL DEFAULT(NONE), &
2334!$OMP SHARED(dot_proda,dot_prodb,tmp,vxc,vxg,deriv_set,rho_set,na,nr,norm_drhoa,norm_drhob,dso, &
2335!$OMP        so,weight,ub), &
2336!$OMP PRIVATE(ia,ir,dir,deriv,d1e,d2e)
2337
2338      !Precompute the very common dot products grad_na . grad_so and grad_nb . grad_so
2339      DO dir = 1, 3
2340!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2341         DO ir = 1, nr
2342            DO ia = 1, na
2343               dot_proda(ia, ir) = dot_proda(ia, ir) + rho_set%drhoa(dir)%array(ia, ir, 1)*dso(ia, ir, dir)
2344               dot_prodb(ia, ir) = dot_prodb(ia, ir) + rho_set%drhob(dir)%array(ia, ir, 1)*dso(ia, ir, dir)
2345            END DO !ia
2346         END DO !ir
2347!$OMP END DO NOWAIT
2348      END DO !dir
2349
2350      !Deal with f_aa
2351
2352      !Vxc, first term
2353      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(norm_drhoa)")
2354      CALL xc_derivative_get(deriv, deriv_data=d2e)
2355!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2356      DO ir = 1, nr
2357         DO ia = 1, na
2358            vxc(1)%array(ia, ir) = d2e(ia, ir, 1)*dot_proda(ia, ir)
2359         END DO !ia
2360      END DO !ir
2361!$OMP END DO NOWAIT
2362
2363      !Vxc, second term
2364      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(norm_drho)")
2365      CALL xc_derivative_get(deriv, deriv_data=d2e)
2366!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2367      DO ir = 1, nr
2368         DO ia = 1, na
2369            vxc(1)%array(ia, ir) = vxc(1)%array(ia, ir) + d2e(ia, ir, 1)*dot_prodb(ia, ir)
2370         END DO !ia
2371      END DO !ir
2372!$OMP END DO NOWAIT
2373
2374      !Vxc, take the grid weight into acocunt
2375!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2376      DO ir = 1, nr
2377         DO ia = 1, na
2378            vxc(1)%array(ia, ir) = vxc(1)%array(ia, ir)*weight(ia, ir)
2379         END DO !ia
2380      END DO !ir
2381!$OMP END DO NOWAIT
2382
2383      !Vxg, first term (to be multiplied by drhoa)
2384      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(norm_drhoa)")
2385      CALL xc_derivative_get(deriv, deriv_data=d2e)
2386!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2387      DO ir = 1, nr
2388         DO ia = 1, na
2389            tmp(ia, ir) = d2e(ia, ir, 1)*so(ia, ir)
2390         END DO !ia
2391      END DO !ir
2392!$OMP END DO NOWAIT
2393
2394      !Vxg, second term (to be multiplied by drhoa)
2395      deriv => xc_dset_get_derivative(deriv_set, "(norm_drhoa)(norm_drho)")
2396      CALL xc_derivative_get(deriv, deriv_data=d2e)
2397!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2398      DO ir = 1, nr
2399         DO ia = 1, na
2400            tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_prodb(ia, ir)
2401         END DO !ia
2402      END DO !ir
2403!$OMP END DO NOWAIT
2404
2405      !Vxg, third term (to be multiplied by drhoa)
2406      deriv => xc_dset_get_derivative(deriv_set, "(norm_drhoa)(norm_drhoa)")
2407      CALL xc_derivative_get(deriv, deriv_data=d2e)
2408!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2409      DO ir = 1, nr
2410         DO ia = 1, na
2411            tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_proda(ia, ir)
2412         END DO !ia
2413      END DO !ir
2414!$OMP END DO NOWAIT
2415
2416      !Vxg, fourth term (to be multiplied by drhoa)
2417      deriv => xc_dset_get_derivative(deriv_set, "(norm_drhoa)")
2418      CALL xc_derivative_get(deriv, deriv_data=d1e)
2419!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2420      DO ir = 1, nr
2421         DO ia = 1, na
2422            tmp(ia, ir) = tmp(ia, ir) - d1e(ia, ir, 1)*dot_proda(ia, ir) &
2423                          /MAX(norm_drhoa(ia, ir, 1), rho_set%drho_cutoff)**2
2424         END DO !ia
2425      END DO !ir
2426!$OMP END DO NOWAIT
2427
2428      !put tmp*drhoa in Vxg (so that we can reuse it for drhob terms)
2429      DO dir = 1, 3
2430!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2431         DO ir = 1, nr
2432            DO ia = 1, na
2433               vxg(1)%array(ia, ir, dir) = tmp(ia, ir)*rho_set%drhoa(dir)%array(ia, ir, 1)
2434            END DO !ia
2435         END DO !ir
2436!$OMP END DO NOWAIT
2437      END DO !dir
2438
2439      !Vxg, fifth term (to be multiplied by drhob)
2440      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(norm_drho)")
2441      CALL xc_derivative_get(deriv, deriv_data=d2e)
2442!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2443      DO ir = 1, nr
2444         DO ia = 1, na
2445            tmp(ia, ir) = d2e(ia, ir, 1)*so(ia, ir)
2446         END DO !ia
2447      END DO !ir
2448!$OMP END DO NOWAIT
2449
2450      !Vxg, sixth term (to be multiplied by drhob)
2451      deriv => xc_dset_get_derivative(deriv_set, "(norm_drhoa)(norm_drho)")
2452      CALL xc_derivative_get(deriv, deriv_data=d2e)
2453!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2454      DO ir = 1, nr
2455         DO ia = 1, na
2456            tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_proda(ia, ir)
2457         END DO !ia
2458      END DO !ir
2459!$OMP END DO NOWAIT
2460
2461      !Vxg, seventh term (to be multiplied by drhob)
2462      deriv => xc_dset_get_derivative(deriv_set, "(norm_drho)(norm_drho)")
2463      CALL xc_derivative_get(deriv, deriv_data=d2e)
2464!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2465      DO ir = 1, nr
2466         DO ia = 1, na
2467            tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_prodb(ia, ir)
2468         END DO !ia
2469      END DO !ir
2470!$OMP END DO NOWAIT
2471
2472      !put tmp*drhob in Vxg
2473      DO dir = 1, 3
2474!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2475         DO ir = 1, nr
2476            DO ia = 1, na
2477               vxg(1)%array(ia, ir, dir) = vxg(1)%array(ia, ir, dir) + tmp(ia, ir)*rho_set%drhob(dir)%array(ia, ir, 1)
2478            END DO !ia
2479         END DO !ir
2480!$OMP END DO NOWAIT
2481      END DO !dir
2482
2483      !Vxg, last term
2484      deriv => xc_dset_get_derivative(deriv_set, "(norm_drhoa)")
2485      CALL xc_derivative_get(deriv, deriv_data=d1e)
2486      DO dir = 1, 3
2487!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2488         DO ir = 1, nr
2489            DO ia = 1, na
2490               vxg(1)%array(ia, ir, dir) = vxg(1)%array(ia, ir, dir) + d1e(ia, ir, 1)*dso(ia, ir, dir)
2491            END DO !ia
2492         END DO !ir
2493!$OMP END DO NOWAIT
2494      END DO !dir
2495
2496      !Vxg, take the grid weight into account
2497      DO dir = 1, 3
2498!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2499         DO ir = 1, nr
2500            DO ia = 1, na
2501               vxg(1)%array(ia, ir, dir) = vxg(1)%array(ia, ir, dir)*weight(ia, ir)
2502            END DO !ia
2503         END DO !ir
2504!$OMP END DO NOWAIT
2505      END DO !dir
2506
2507      !Deal with fab
2508
2509      !Vxc, first term
2510      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(norm_drhob)")
2511      CALL xc_derivative_get(deriv, deriv_data=d2e)
2512!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2513      DO ir = 1, nr
2514         DO ia = 1, na
2515            vxc(2)%array(ia, ir) = d2e(ia, ir, 1)*dot_prodb(ia, ir)
2516         END DO !ia
2517      END DO !ir
2518!$OMP END DO NOWAIT
2519
2520      !Vxc, second term
2521      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(norm_drho)")
2522      CALL xc_derivative_get(deriv, deriv_data=d2e)
2523!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2524      DO ir = 1, nr
2525         DO ia = 1, na
2526            vxc(2)%array(ia, ir) = vxc(2)%array(ia, ir) + d2e(ia, ir, 1)*dot_proda(ia, ir)
2527         END DO !ia
2528      END DO !ir
2529!$OMP END DO NOWAIT
2530
2531      !Vxc, take the grid weight into acocunt
2532!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2533      DO ir = 1, nr
2534         DO ia = 1, na
2535            vxc(2)%array(ia, ir) = vxc(2)%array(ia, ir)*weight(ia, ir)
2536         END DO !ia
2537      END DO !ir
2538!$OMP END DO NOWAIT
2539
2540      !Vxg, first term (to be multiplied by drhoa)
2541      deriv => xc_dset_get_derivative(deriv_set, "(rhob)(norm_drhoa)")
2542      CALL xc_derivative_get(deriv, deriv_data=d2e)
2543!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2544      DO ir = 1, nr
2545         DO ia = 1, na
2546            tmp(ia, ir) = d2e(ia, ir, 1)*so(ia, ir)
2547         END DO !ia
2548      END DO !ir
2549!$OMP END DO NOWAIT
2550
2551      !Vxg, second term (to be multiplied by drhoa)
2552      deriv => xc_dset_get_derivative(deriv_set, "(norm_drhoa)(norm_drho)")
2553      CALL xc_derivative_get(deriv, deriv_data=d2e)
2554!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2555      DO ir = 1, nr
2556         DO ia = 1, na
2557            tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_proda(ia, ir)
2558         END DO !ia
2559      END DO !ir
2560!$OMP END DO NOWAIT
2561
2562      !Vxg, third term (to be multiplied by drhoa)
2563      deriv => xc_dset_get_derivative(deriv_set, "(norm_drhoa)(norm_drhob)")
2564      CALL xc_derivative_get(deriv, deriv_data=d2e)
2565!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2566      DO ir = 1, nr
2567         DO ia = 1, na
2568            tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_prodb(ia, ir)
2569         END DO !ia
2570      END DO !ir
2571!$OMP END DO NOWAIT
2572
2573      !put tmp*drhoa in Vxg
2574      DO dir = 1, 3
2575!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2576         DO ir = 1, nr
2577            DO ia = 1, na
2578               vxg(2)%array(ia, ir, dir) = tmp(ia, ir)*rho_set%drhoa(dir)%array(ia, ir, 1)
2579            END DO !ia
2580         END DO !ir
2581!$OMP END DO NOWAIT
2582      END DO !dir
2583
2584      !Vxg, fourth term (to be multiplied by drhob)
2585      deriv => xc_dset_get_derivative(deriv_set, "(rhob)(norm_drho)")
2586      CALL xc_derivative_get(deriv, deriv_data=d2e)
2587!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2588      DO ir = 1, nr
2589         DO ia = 1, na
2590            tmp(ia, ir) = d2e(ia, ir, 1)*so(ia, ir)
2591         END DO !ia
2592      END DO !ir
2593!$OMP END DO NOWAIT
2594
2595      !Vxg, fifth term (to be multiplied by drhob)
2596      deriv => xc_dset_get_derivative(deriv_set, "(norm_drho)(norm_drhob)")
2597      CALL xc_derivative_get(deriv, deriv_data=d2e)
2598!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2599      DO ir = 1, nr
2600         DO ia = 1, na
2601            tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_prodb(ia, ir)
2602         END DO !ia
2603      END DO !ir
2604!$OMP END DO NOWAIT
2605
2606      !Vxg, sixth term (to be multiplied by drhob)
2607      deriv => xc_dset_get_derivative(deriv_set, "(norm_drho)(norm_drho)")
2608      CALL xc_derivative_get(deriv, deriv_data=d2e)
2609!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2610      DO ir = 1, nr
2611         DO ia = 1, na
2612            tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_proda(ia, ir)
2613         END DO !ia
2614      END DO !ir
2615!$OMP END DO NOWAIT
2616
2617      !put tmp*drhob in Vxg
2618      DO dir = 1, 3
2619!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2620         DO ir = 1, nr
2621            DO ia = 1, na
2622               vxg(2)%array(ia, ir, dir) = vxg(2)%array(ia, ir, dir) + tmp(ia, ir)*rho_set%drhob(dir)%array(ia, ir, 1)
2623            END DO
2624         END DO
2625!$OMP END DO NOWAIT
2626      END DO
2627
2628      !Vxg, last term
2629      deriv => xc_dset_get_derivative(deriv_set, "(norm_drho)")
2630      CALL xc_derivative_get(deriv, deriv_data=d1e)
2631      DO dir = 1, 3
2632!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2633         DO ir = 1, nr
2634            DO ia = 1, na
2635               vxg(2)%array(ia, ir, dir) = vxg(2)%array(ia, ir, dir) + d1e(ia, ir, 1)*dso(ia, ir, dir)
2636            END DO !ia
2637         END DO !ir
2638!$OMP END DO NOWAIT
2639      END DO !dir
2640
2641      !Vxg, take the grid weight into account
2642      DO dir = 1, 3
2643!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2644         DO ir = 1, nr
2645            DO ia = 1, na
2646               vxg(2)%array(ia, ir, dir) = vxg(2)%array(ia, ir, dir)*weight(ia, ir)
2647            END DO !ia
2648         END DO !ir
2649!$OMP END DO NOWAIT
2650      END DO !dir
2651
2652      !Deal with f_bb, if so required
2653      IF (ub == 3) THEN
2654
2655         !Vxc, first term
2656         deriv => xc_dset_get_derivative(deriv_set, "(rhob)(norm_drhob)")
2657         CALL xc_derivative_get(deriv, deriv_data=d2e)
2658!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2659         DO ir = 1, nr
2660            DO ia = 1, na
2661               vxc(3)%array(ia, ir) = d2e(ia, ir, 1)*dot_prodb(ia, ir)
2662            END DO !ia
2663         END DO !ir
2664!$OMP END DO NOWAIT
2665
2666         !Vxc, second term
2667         deriv => xc_dset_get_derivative(deriv_set, "(rhob)(norm_drho)")
2668         CALL xc_derivative_get(deriv, deriv_data=d2e)
2669!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2670         DO ir = 1, nr
2671            DO ia = 1, na
2672               vxc(3)%array(ia, ir) = vxc(3)%array(ia, ir) + d2e(ia, ir, 1)*dot_proda(ia, ir)
2673            END DO !i
2674         END DO !ir
2675!$OMP END DO NOWAIT
2676
2677         !Vxc, take the grid weight into acocunt
2678!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2679         DO ir = 1, nr
2680            DO ia = 1, na
2681               vxc(3)%array(ia, ir) = vxc(3)%array(ia, ir)*weight(ia, ir)
2682            END DO !ia
2683         END DO !ir
2684!$OMP END DO NOWAIT
2685
2686         !Vxg, first term (to be multiplied by drhob)
2687         deriv => xc_dset_get_derivative(deriv_set, "(rhob)(norm_drhob)")
2688         CALL xc_derivative_get(deriv, deriv_data=d2e)
2689!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2690         DO ir = 1, nr
2691            DO ia = 1, na
2692               tmp(ia, ir) = d2e(ia, ir, 1)*so(ia, ir)
2693            END DO !ia
2694         END DO !ir
2695!$OMP END DO NOWAIT
2696
2697         !Vxg, second term (to be multiplied by drhob)
2698         deriv => xc_dset_get_derivative(deriv_set, "(norm_drhob)(norm_drho)")
2699         CALL xc_derivative_get(deriv, deriv_data=d2e)
2700!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2701         DO ir = 1, nr
2702            DO ia = 1, na
2703               tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_proda(ia, ir)
2704            END DO !ia
2705         END DO !ir
2706!$OMP END DO NOWAIT
2707
2708         !Vxg, third term (to be multiplied by drhob)
2709         deriv => xc_dset_get_derivative(deriv_set, "(norm_drhob)(norm_drhob)")
2710         CALL xc_derivative_get(deriv, deriv_data=d2e)
2711!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2712         DO ir = 1, nr
2713            DO ia = 1, na
2714               tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_prodb(ia, ir)
2715            END DO !ia
2716         END DO !ir
2717!$OMP END DO NOWAIT
2718
2719         !Vxg, fourth term (to be multiplied by drhob)
2720         deriv => xc_dset_get_derivative(deriv_set, "(norm_drhob)")
2721         CALL xc_derivative_get(deriv, deriv_data=d1e)
2722!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2723         DO ir = 1, nr
2724            DO ia = 1, na
2725               tmp(ia, ir) = tmp(ia, ir) - d1e(ia, ir, 1)*dot_prodb(ia, ir) &
2726                             /MAX(norm_drhob(ia, ir, 1), rho_set%drho_cutoff)**2
2727            END DO !ia
2728         END DO !ir
2729!$OMP END DO NOWAIT
2730
2731         !put tmp*drhob in Vxg (so that we can reuse it for drhoa terms)
2732         DO dir = 1, 3
2733!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2734            DO ir = 1, nr
2735               DO ia = 1, na
2736                  vxg(3)%array(ia, ir, dir) = tmp(ia, ir)*rho_set%drhob(dir)%array(ia, ir, 1)
2737               END DO !ia
2738            END DO !ir
2739!$OMP END DO NOWAIT
2740         END DO !dir
2741
2742         !Vxg, fifth term (to be multiplied by drhoa)
2743         deriv => xc_dset_get_derivative(deriv_set, "(rhob)(norm_drho)")
2744         CALL xc_derivative_get(deriv, deriv_data=d2e)
2745!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2746         DO ir = 1, nr
2747            DO ia = 1, na
2748               tmp(ia, ir) = d2e(ia, ir, 1)*so(ia, ir)
2749            END DO !ia
2750         END DO !ir
2751!$OMP END DO NOWAIT
2752
2753         !Vxg, sixth term (to be multiplied by drhoa)
2754         deriv => xc_dset_get_derivative(deriv_set, "(norm_drhob)(norm_drho)")
2755         CALL xc_derivative_get(deriv, deriv_data=d2e)
2756!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2757         DO ir = 1, nr
2758            DO ia = 1, na
2759               tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_prodb(ia, ir)
2760            END DO !ia
2761         END DO !ir
2762!$OMP END DO NOWAIT
2763
2764         !Vxg, seventh term (to be multiplied by drhoa)
2765         deriv => xc_dset_get_derivative(deriv_set, "(norm_drho)(norm_drho)")
2766         CALL xc_derivative_get(deriv, deriv_data=d2e)
2767!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2768         DO ir = 1, nr
2769            DO ia = 1, na
2770               tmp(ia, ir) = tmp(ia, ir) + d2e(ia, ir, 1)*dot_proda(ia, ir)
2771            END DO !ia
2772         END DO !ir
2773!$OMP END DO NOWAIT
2774
2775         !put tmp*drhoa in Vxg
2776         DO dir = 1, 3
2777!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2778            DO ir = 1, nr
2779               DO ia = 1, na
2780                  vxg(3)%array(ia, ir, dir) = vxg(3)%array(ia, ir, dir) + &
2781                                              tmp(ia, ir)*rho_set%drhoa(dir)%array(ia, ir, 1)
2782               END DO !ia
2783            END DO !ir
2784!$OMP END DO NOWAIT
2785         END DO !dir
2786
2787         !Vxg, last term
2788         deriv => xc_dset_get_derivative(deriv_set, "(norm_drhob)")
2789         CALL xc_derivative_get(deriv, deriv_data=d1e)
2790         DO dir = 1, 3
2791!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2792            DO ir = 1, nr
2793               DO ia = 1, na
2794                  vxg(3)%array(ia, ir, dir) = vxg(3)%array(ia, ir, dir) + d1e(ia, ir, 1)*dso(ia, ir, dir)
2795               END DO !ia
2796            END DO !ir
2797!$OMP END DO NOWAIT
2798         END DO !dir
2799
2800         !Vxg, take the grid weight into account
2801         DO dir = 1, 3
2802!$OMP DO SCHEDULE(STATIC) COLLAPSE(2)
2803            DO ir = 1, nr
2804               DO ia = 1, na
2805                  vxg(3)%array(ia, ir, dir) = vxg(3)%array(ia, ir, dir)*weight(ia, ir)
2806               END DO !ia
2807            END DO !ir
2808!$OMP END DO NOWAIT
2809         END DO !dir
2810
2811      END IF !f_bb
2812
2813!$OMP END PARALLEL
2814
2815      CALL timestop(handle)
2816
2817   END SUBROUTINE get_vxc_vxg
2818
2819! **************************************************************************************************
2820!> \brief Integrate the fxc kernel in the spin-conserving case, be it closed- or open-shell
2821!> \param int_fxc the array containing the (P|fxc|Q) integrals
2822!> \param iatom the index of the current excited atom
2823!> \param ikind the index of the current excited kind
2824!> \param deriv_set the set of functional derivatives
2825!> \param xas_atom_env ...
2826!> \param qs_env ...
2827! **************************************************************************************************
2828   SUBROUTINE integrate_sc_fxc(int_fxc, iatom, ikind, deriv_set, xas_atom_env, qs_env)
2829
2830      TYPE(cp_2d_r_p_type), DIMENSION(:, :), POINTER     :: int_fxc
2831      INTEGER, INTENT(IN)                                :: iatom, ikind
2832      TYPE(xc_derivative_set_type), POINTER              :: deriv_set
2833      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
2834      TYPE(qs_environment_type), POINTER                 :: qs_env
2835
2836      CHARACTER(len=*), PARAMETER :: routineN = 'integrate_sc_fxc', &
2837         routineP = moduleN//':'//routineN
2838
2839      INTEGER                                            :: i, maxso, na, nr, nset, nsotot, nspins, &
2840                                                            ri_nsgf, ub
2841      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: fxc, int_so
2842      REAL(dp), DIMENSION(:, :), POINTER                 :: ri_sphi_so
2843      TYPE(cp_3d_r_p_type), ALLOCATABLE, DIMENSION(:)    :: d2e
2844      TYPE(dft_control_type), POINTER                    :: dft_control
2845      TYPE(grid_atom_type), POINTER                      :: grid_atom
2846      TYPE(gto_basis_set_type), POINTER                  :: ri_basis
2847      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
2848      TYPE(xc_derivative_type), POINTER                  :: deriv
2849
2850      NULLIFY (grid_atom, deriv, ri_basis, ri_sphi_so, qs_kind_set, dft_control)
2851
2852      ! Initialization
2853      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set, dft_control=dft_control)
2854      grid_atom => xas_atom_env%grid_atom_set(ikind)%grid_atom
2855      na = grid_atom%ng_sphere
2856      nr = grid_atom%nr
2857      CALL get_qs_kind(qs_kind_set(ikind), basis_set=ri_basis, basis_type="RI_XAS")
2858      CALL get_gto_basis_set(ri_basis, nset=nset, maxso=maxso, nsgf=ri_nsgf)
2859      nsotot = nset*maxso
2860      ri_sphi_so => xas_atom_env%ri_sphi_so(ikind)%array
2861      nspins = dft_control%nspins
2862
2863      ! Get the second derivatives
2864      ALLOCATE (d2e(3))
2865      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(rhoa)")
2866      CALL xc_derivative_get(deriv, deriv_data=d2e(1)%array)
2867      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(rhob)")
2868      CALL xc_derivative_get(deriv, deriv_data=d2e(2)%array)
2869      deriv => xc_dset_get_derivative(deriv_set, "(rhob)(rhob)")
2870      CALL xc_derivative_get(deriv, deriv_data=d2e(3)%array)
2871
2872      ! Allocate some work arrays
2873      ALLOCATE (fxc(na, nr))
2874      ALLOCATE (int_so(nsotot, nsotot))
2875
2876      ! Integrate for all three derivatives, taking the grid weight into account
2877      ! If closed shell, do not need to integrate beta-beta as it is the same as alpha-alpha
2878      ub = 2; IF (nspins == 2) ub = 3
2879      DO i = 1, ub
2880
2881         int_so = 0.0_dp
2882         fxc(:, :) = d2e(i)%array(:, :, 1)*grid_atom%weight(:, :)
2883         CALL integrate_so_prod(int_so, fxc, ikind, xas_atom_env, qs_env)
2884
2885         !contract into sgf. Array allocated on current processor only
2886         ALLOCATE (int_fxc(iatom, i)%array(ri_nsgf, ri_nsgf))
2887         int_fxc(iatom, i)%array = 0.0_dp
2888         CALL contract_so2sgf(int_fxc(iatom, i)%array, int_so, ri_basis, ri_sphi_so)
2889
2890      END DO
2891
2892   END SUBROUTINE integrate_sc_fxc
2893
2894! **************************************************************************************************
2895!> \brief Integrate the fxc kernel in the spin-flip case (open-shell assumed). The spin-flip LDA
2896!>        kernel reads: fxc = 1/(rhoa - rhob) * (dE/drhoa - dE/drhob)
2897!> \param int_fxc the array containing the (P|fxc|Q) integrals
2898!> \param iatom the index of the current excited atom
2899!> \param ikind the index of the current excited kind
2900!> \param rho_set the density in the atomic grid
2901!> \param deriv_set the set of functional derivatives
2902!> \param xas_atom_env ...
2903!> \param qs_env ...
2904! **************************************************************************************************
2905   SUBROUTINE integrate_sf_fxc(int_fxc, iatom, ikind, rho_set, deriv_set, xas_atom_env, qs_env)
2906
2907      TYPE(cp_2d_r_p_type), DIMENSION(:, :), POINTER     :: int_fxc
2908      INTEGER, INTENT(IN)                                :: iatom, ikind
2909      TYPE(xc_rho_set_type), POINTER                     :: rho_set
2910      TYPE(xc_derivative_set_type), POINTER              :: deriv_set
2911      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
2912      TYPE(qs_environment_type), POINTER                 :: qs_env
2913
2914      CHARACTER(len=*), PARAMETER :: routineN = 'integrate_sf_fxc', &
2915         routineP = moduleN//':'//routineN
2916
2917      INTEGER                                            :: ia, ir, maxso, na, nr, nset, nsotot, &
2918                                                            ri_nsgf
2919      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: fxc, int_so
2920      REAL(dp), DIMENSION(:, :), POINTER                 :: ri_sphi_so
2921      REAL(dp), DIMENSION(:, :, :), POINTER              :: rhoa, rhob
2922      TYPE(cp_3d_r_p_type), ALLOCATABLE, DIMENSION(:)    :: d1e, d2e
2923      TYPE(dft_control_type), POINTER                    :: dft_control
2924      TYPE(grid_atom_type), POINTER                      :: grid_atom
2925      TYPE(gto_basis_set_type), POINTER                  :: ri_basis
2926      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
2927      TYPE(xc_derivative_type), POINTER                  :: deriv
2928
2929      NULLIFY (grid_atom, deriv, ri_basis, ri_sphi_so, qs_kind_set, rhoa, rhob, dft_control)
2930
2931      ! Initialization
2932      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set, dft_control=dft_control)
2933      grid_atom => xas_atom_env%grid_atom_set(ikind)%grid_atom
2934      na = grid_atom%ng_sphere
2935      nr = grid_atom%nr
2936      CALL get_qs_kind(qs_kind_set(ikind), basis_set=ri_basis, basis_type="RI_XAS")
2937      CALL get_gto_basis_set(ri_basis, nset=nset, maxso=maxso, nsgf=ri_nsgf)
2938      nsotot = nset*maxso
2939      ri_sphi_so => xas_atom_env%ri_sphi_so(ikind)%array
2940      rhoa => rho_set%rhoa
2941      rhob => rho_set%rhob
2942
2943      ALLOCATE (d1e(2))
2944      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)")
2945      CALL xc_derivative_get(deriv, deriv_data=d1e(1)%array)
2946      deriv => xc_dset_get_derivative(deriv_set, "(rhob)")
2947      CALL xc_derivative_get(deriv, deriv_data=d1e(2)%array)
2948
2949      ! In case rhoa -> rhob, take the limit, which involves the (already computed) second derivatives
2950      ALLOCATE (d2e(3))
2951      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(rhoa)")
2952      CALL xc_derivative_get(deriv, deriv_data=d2e(1)%array)
2953      deriv => xc_dset_get_derivative(deriv_set, "(rhoa)(rhob)")
2954      CALL xc_derivative_get(deriv, deriv_data=d2e(2)%array)
2955      deriv => xc_dset_get_derivative(deriv_set, "(rhob)(rhob)")
2956      CALL xc_derivative_get(deriv, deriv_data=d2e(3)%array)
2957
2958      !Compute the kernel on the grid. Already take weight into acocunt there
2959      ALLOCATE (fxc(na, nr))
2960!$OMP PARALLEL DO COLLAPSE(2) SCHEDULE(STATIC) DEFAULT(NONE), &
2961!$OMP SHARED(grid_atom,fxc,d1e,d2e,dft_control,na,nr,rhoa,rhob), &
2962!$OMP PRIVATE(ia,ir)
2963      DO ir = 1, nr
2964         DO ia = 1, na
2965
2966            !Need to be careful not to divide by zero. Assume that if rhoa == rhob, then
2967            !take the limit fxc = 0.5* (f_aa + f_bb - 2f_ab)
2968            IF (ABS(rhoa(ia, ir, 1) - rhob(ia, ir, 1)) > dft_control%qs_control%eps_rho_rspace) THEN
2969               fxc(ia, ir) = grid_atom%weight(ia, ir)/(rhoa(ia, ir, 1) - rhob(ia, ir, 1)) &
2970                             *(d1e(1)%array(ia, ir, 1) - d1e(2)%array(ia, ir, 1))
2971            ELSE
2972               fxc(ia, ir) = 0.5_dp*grid_atom%weight(ia, ir)* &
2973                             (d2e(1)%array(ia, ir, 1) + d2e(3)%array(ia, ir, 1) - 2._dp*d2e(2)%array(ia, ir, 1))
2974            END IF
2975
2976         END DO
2977      END DO
2978!$OMP END PARALLEL DO
2979
2980      ! Integrate wrt to so
2981      ALLOCATE (int_so(nsotot, nsotot))
2982      int_so = 0.0_dp
2983      CALL integrate_so_prod(int_so, fxc, ikind, xas_atom_env, qs_env)
2984
2985      ! Contract into sgf. Array located on current processor only
2986      ALLOCATE (int_fxc(iatom, 4)%array(ri_nsgf, ri_nsgf))
2987      int_fxc(iatom, 4)%array = 0.0_dp
2988      CALL contract_so2sgf(int_fxc(iatom, 4)%array, int_so, ri_basis, ri_sphi_so)
2989
2990   END SUBROUTINE integrate_sf_fxc
2991
2992! **************************************************************************************************
2993!> \brief Contract spherical orbitals to spherical Gaussians (so to sgf)
2994!> \param int_sgf the array with the sgf integrals
2995!> \param int_so the array with the so integrals (to contract)
2996!> \param basis the corresponding gto basis set
2997!> \param sphi_so the contraction coefficients for the s:
2998! **************************************************************************************************
2999   SUBROUTINE contract_so2sgf(int_sgf, int_so, basis, sphi_so)
3000
3001      REAL(dp), DIMENSION(:, :)                          :: int_sgf, int_so
3002      TYPE(gto_basis_set_type), POINTER                  :: basis
3003      REAL(dp), DIMENSION(:, :)                          :: sphi_so
3004
3005      CHARACTER(len=*), PARAMETER :: routineN = 'contract_so2sgf', &
3006         routineP = moduleN//':'//routineN
3007
3008      INTEGER                                            :: iset, jset, maxso, nset, nsoi, nsoj, &
3009                                                            sgfi, sgfj, starti, startj
3010      INTEGER, DIMENSION(:), POINTER                     :: lmax, npgf, nsgf_set
3011      INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf
3012
3013      NULLIFY (nsgf_set, npgf, lmax, first_sgf)
3014
3015      CALL get_gto_basis_set(basis, nset=nset, maxso=maxso, nsgf_set=nsgf_set, first_sgf=first_sgf, &
3016                             npgf=npgf, lmax=lmax)
3017
3018      DO iset = 1, nset
3019         starti = (iset - 1)*maxso + 1
3020         nsoi = npgf(iset)*nsoset(lmax(iset))
3021         sgfi = first_sgf(1, iset)
3022
3023         DO jset = 1, nset
3024            startj = (jset - 1)*maxso + 1
3025            nsoj = npgf(jset)*nsoset(lmax(jset))
3026            sgfj = first_sgf(1, jset)
3027
3028            CALL ab_contract(int_sgf(sgfi:sgfi + nsgf_set(iset) - 1, sgfj:sgfj + nsgf_set(jset) - 1), &
3029                             int_so(starti:starti + nsoi - 1, startj:startj + nsoj - 1), &
3030                             sphi_so(:, sgfi:), sphi_so(:, sgfj:), nsoi, nsoj, &
3031                             nsgf_set(iset), nsgf_set(jset))
3032         END DO !jset
3033      END DO !iset
3034
3035   END SUBROUTINE contract_so2sgf
3036
3037! **************************************************************************************************
3038!> \brief Integrate the product of spherical gaussian orbitals with the xc kernel on the atomic grid
3039!> \param intso the integral in terms of spherical orbitals
3040!> \param fxc the xc kernel at each grid point
3041!> \param ikind the kind of the atom we integrate for
3042!> \param xas_atom_env ...
3043!> \param qs_env ...
3044!> \note Largely copied from gaVxcgb_noGC. Rewritten here because we need our own atomic grid,
3045!>       harmonics, basis set and we do not need the soft vxc. Could have tweaked the original, but
3046!>       it would have been messy. Also we do not need rho_atom (too big and fancy for us)
3047!>       We also go over the whole range of angular momentum l
3048! **************************************************************************************************
3049   SUBROUTINE integrate_so_prod(intso, fxc, ikind, xas_atom_env, qs_env)
3050
3051      REAL(dp), DIMENSION(:, :), INTENT(INOUT)           :: intso
3052      REAL(dp), DIMENSION(:, :), INTENT(IN)              :: fxc
3053      INTEGER, INTENT(IN)                                :: ikind
3054      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
3055      TYPE(qs_environment_type), POINTER                 :: qs_env
3056
3057      CHARACTER(len=*), PARAMETER :: routineN = 'integrate_so_prod', &
3058         routineP = moduleN//':'//routineN
3059
3060      INTEGER :: handle, ia, ic, icg, ipgf1, ipgf2, iset1, iset2, iso, iso1, iso2, l, ld, lmax12, &
3061         lmin12, m1, m2, max_iso_not0, max_iso_not0_local, max_s_harm, maxl, maxso, n1, n2, na, &
3062         ngau1, ngau2, nngau1, nr, nset, size1
3063      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: cg_n_list
3064      INTEGER, ALLOCATABLE, DIMENSION(:, :, :)           :: cg_list
3065      INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf
3066      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: g1, g2
3067      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: gfxcg, gg, matso
3068      REAL(dp), DIMENSION(:, :), POINTER                 :: zet
3069      REAL(dp), DIMENSION(:, :, :), POINTER              :: my_CG
3070      TYPE(grid_atom_type), POINTER                      :: grid_atom
3071      TYPE(gto_basis_set_type), POINTER                  :: ri_basis
3072      TYPE(harmonics_atom_type), POINTER                 :: harmonics
3073      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
3074
3075      CALL timeset(routineN, handle)
3076
3077      NULLIFY (grid_atom, harmonics, ri_basis, qs_kind_set, lmax, lmin, npgf, zet, my_CG)
3078
3079!  Initialization
3080      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set)
3081      CALL get_qs_kind(qs_kind_set(ikind), basis_set=ri_basis, basis_type="RI_XAS")
3082      grid_atom => xas_atom_env%grid_atom_set(ikind)%grid_atom
3083      harmonics => xas_atom_env%harmonics_atom_set(ikind)%harmonics_atom
3084
3085      CALL get_gto_basis_set(ri_basis, lmax=lmax, lmin=lmin, maxso=maxso, maxl=maxl, npgf=npgf, &
3086                             nset=nset, zet=zet)
3087
3088      na = grid_atom%ng_sphere
3089      nr = grid_atom%nr
3090      my_CG => harmonics%my_CG
3091      max_iso_not0 = harmonics%max_iso_not0
3092      max_s_harm = harmonics%max_s_harm
3093      CPASSERT(2*maxl .LE. indso(1, max_iso_not0))
3094
3095      ALLOCATE (g1(nr), g2(nr), gg(nr, 0:2*maxl))
3096      ALLOCATE (gfxcg(na, 0:2*maxl))
3097      ALLOCATE (matso(nsoset(maxl), nsoset(maxl)))
3098      ALLOCATE (cg_list(2, nsoset(maxl)**2, max_s_harm), cg_n_list(max_s_harm))
3099
3100      g1 = 0.0_dp
3101      g2 = 0.0_dp
3102      m1 = 0
3103!  Loop over the product of so
3104      DO iset1 = 1, nset
3105         n1 = nsoset(lmax(iset1))
3106         m2 = 0
3107         DO iset2 = 1, nset
3108            CALL get_none0_cg_list(my_CG, lmin(iset1), lmax(iset1), lmin(iset2), lmax(iset2), &
3109                                   max_s_harm, lmax(iset1) + lmax(iset2), cg_list, cg_n_list, &
3110                                   max_iso_not0_local)
3111            CPASSERT(max_iso_not0_local .LE. max_iso_not0)
3112
3113            n2 = nsoset(lmax(iset2))
3114            DO ipgf1 = 1, npgf(iset1)
3115               ngau1 = n1*(ipgf1 - 1) + m1
3116               size1 = nsoset(lmax(iset1)) - nsoset(lmin(iset1) - 1)
3117               nngau1 = nsoset(lmin(iset1) - 1) + ngau1
3118
3119               g1(:) = EXP(-zet(ipgf1, iset1)*grid_atom%rad2(1:nr))
3120               DO ipgf2 = 1, npgf(iset2)
3121                  ngau2 = n2*(ipgf2 - 1) + m2
3122
3123                  g2(:) = EXP(-zet(ipgf2, iset2)*grid_atom%rad2(1:nr))
3124                  lmin12 = lmin(iset1) + lmin(iset2)
3125                  lmax12 = lmax(iset1) + lmax(iset2)
3126
3127                  !get the gaussian product
3128                  gg = 0.0_dp
3129                  IF (lmin12 == 0) THEN
3130                     gg(:, lmin12) = g1(:)*g2(:)
3131                  ELSE
3132                     gg(:, lmin12) = grid_atom%rad2l(1:nr, lmin12)*g1(:)*g2(:)
3133                  END IF
3134
3135                  DO l = lmin12 + 1, lmax12
3136                     gg(:, l) = grid_atom%rad(1:nr)*gg(:, l - 1)
3137                  END DO
3138
3139                  ld = lmax12 + 1
3140                  CALL dgemm('N', 'N', na, ld, nr, 1.0_dp, fxc(1:na, 1:nr), na, gg(:, 0:lmax12), &
3141                             nr, 0.0_dp, gfxcg(1:na, 0:lmax12), na)
3142
3143                  !integrate
3144                  matso = 0.0_dp
3145                  DO iso = 1, max_iso_not0_local
3146                     DO icg = 1, cg_n_list(iso)
3147                        iso1 = cg_list(1, icg, iso)
3148                        iso2 = cg_list(2, icg, iso)
3149                        l = indso(1, iso1) + indso(1, iso2)
3150
3151                        DO ia = 1, na
3152                           matso(iso1, iso2) = matso(iso1, iso2) + gfxcg(ia, l)* &
3153                                               my_CG(iso1, iso2, iso)*harmonics%slm(ia, iso)
3154                        END DO !ia
3155                     END DO !icg
3156                  END DO !iso
3157
3158                  !write in integral matrix
3159                  DO ic = nsoset(lmin(iset2) - 1) + 1, nsoset(lmax(iset2))
3160                     iso1 = nsoset(lmin(iset1) - 1) + 1
3161                     iso2 = ngau2 + ic
3162                     CALL daxpy(size1, 1.0_dp, matso(iso1, ic), 1, intso(nngau1 + 1, iso2), 1)
3163                  END DO !ic
3164
3165               END DO !ipgf2
3166            END DO ! ipgf1
3167            m2 = m2 + maxso
3168         END DO !iset2
3169         m1 = m1 + maxso
3170      END DO !iset1
3171
3172      CALL timestop(handle)
3173
3174   END SUBROUTINE integrate_so_prod
3175
3176! **************************************************************************************************
3177!> \brief This routine computes the integral of a potential V wrt the derivative of the spherical
3178!>        orbitals, that is <df/dx|V|dg/dy> on the atomic grid.
3179!> \param intso the integral in terms of the spherical orbitals (well, their derivative)
3180!> \param V the potential (put on the grid and weighted) to integrate
3181!> \param ikind the atomic kind for which we integrate
3182!> \param xas_atom_env ...
3183!> \param qs_env ...
3184!> \note The atomic grids are taken fron xas_atom_env and the orbitals are the normal ones. Ok since
3185!>       the grid and spherical harmonics for those grids are at least as good as the GAPW ones
3186! **************************************************************************************************
3187   SUBROUTINE integrate_so_dxdy_prod(intso, V, ikind, xas_atom_env, qs_env)
3188
3189      REAL(dp), DIMENSION(:, :, :), INTENT(INOUT)        :: intso
3190      REAL(dp), DIMENSION(:, :), INTENT(IN)              :: V
3191      INTEGER, INTENT(IN)                                :: ikind
3192      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
3193      TYPE(qs_environment_type), POINTER                 :: qs_env
3194
3195      CHARACTER(len=*), PARAMETER :: routineN = 'integrate_so_dxdy_prod', &
3196         routineP = moduleN//':'//routineN
3197
3198      INTEGER                                            :: i, ipgf, iset, iso, j, jpgf, jset, jso, &
3199                                                            k, l, maxso, na, nr, nset, starti, &
3200                                                            startj
3201      INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf
3202      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: fga, fgr, r1, r2, work
3203      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: a1, a2
3204      REAL(dp), DIMENSION(:, :), POINTER                 :: slm, zet
3205      REAL(dp), DIMENSION(:, :, :), POINTER              :: dslm_dxyz
3206      TYPE(grid_atom_type), POINTER                      :: grid_atom
3207      TYPE(gto_basis_set_type), POINTER                  :: basis
3208      TYPE(harmonics_atom_type), POINTER                 :: harmonics
3209      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
3210
3211      NULLIFY (grid_atom, harmonics, basis, qs_kind_set, dslm_dxyz, slm, lmin, lmax, npgf, zet)
3212
3213!  Getting what we need from the atom_env
3214      harmonics => xas_atom_env%harmonics_atom_set(ikind)%harmonics_atom
3215      grid_atom => xas_atom_env%grid_atom_set(ikind)%grid_atom
3216
3217      CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set)
3218      CALL get_qs_kind(qs_kind_set(ikind), basis_set=basis, basis_type="ORB")
3219
3220      na = grid_atom%ng_sphere
3221      nr = grid_atom%nr
3222
3223      slm => harmonics%slm
3224      dslm_dxyz => harmonics%dslm_dxyz
3225
3226!  Getting what we need from the orbital basis
3227      CALL get_gto_basis_set(gto_basis_set=basis, lmax=lmax, lmin=lmin, &
3228                             maxso=maxso, npgf=npgf, nset=nset, zet=zet)
3229
3230!  Separate the functions into purely r and purely angular parts, compute them all
3231!  and use matrix mutliplication for the integral. We use f for x derivative ang g for y
3232
3233      ! Separating the functions. Note that the radial part is the same for x and y derivatives
3234      ALLOCATE (a1(na, nset*maxso, 3), a2(na, nset*maxso, 3))
3235      ALLOCATE (r1(nr, nset*maxso), r2(nr, nset*maxso))
3236      a1 = 0.0_dp; a2 = 0.0_dp
3237      r1 = 0.0_dp; r2 = 0.0_dp
3238
3239      DO iset = 1, nset
3240         DO ipgf = 1, npgf(iset)
3241            starti = (iset - 1)*maxso + (ipgf - 1)*nsoset(lmax(iset))
3242            DO iso = nsoset(lmin(iset) - 1) + 1, nsoset(lmax(iset))
3243               l = indso(1, iso)
3244
3245               ! The x derivative of the spherical orbital, divided in angular and radial parts
3246               ! Two of each are needed because d/dx(r^l Y_lm) * exp(-al*r^2) + r^l Y_lm * ! d/dx(exp-al*r^2)
3247
3248               ! the purely radial part of d/dx(r^l Y_lm) * exp(-al*r^2) (same for y)
3249               r1(1:nr, starti + iso) = grid_atom%rad(1:nr)**(l - 1)*EXP(-zet(ipgf, iset)*grid_atom%rad2(1:nr))
3250
3251               ! the purely radial part of r^l Y_lm * d/dx(exp-al*r^2) (same for y)
3252               r2(1:nr, starti + iso) = -2.0_dp*zet(ipgf, iset)*grid_atom%rad(1:nr)**(l + 1) &
3253                                        *EXP(-zet(ipgf, iset)*grid_atom%rad2(1:nr))
3254
3255               DO i = 1, 3
3256                  ! the purely angular part of d/dx(r^l Y_lm) * exp(-al*r^2)
3257                  a1(1:na, starti + iso, i) = dslm_dxyz(i, 1:na, iso)
3258
3259                  ! the purely angular part of r^l Y_lm * d/dx(exp-al*r^2)
3260                  a2(1:na, starti + iso, i) = harmonics%a(i, 1:na)*slm(1:na, iso)
3261               END DO
3262
3263            END DO !iso
3264         END DO !ipgf
3265      END DO !iset
3266
3267      ! Do the integration in terms of so using matrix products
3268      intso = 0.0_dp
3269      ALLOCATE (fga(na, 1))
3270      ALLOCATE (fgr(nr, 1))
3271      ALLOCATE (work(na, 1))
3272      fga = 0.0_dp; fgr = 0.0_dp; work = 0.0_dp
3273
3274      DO iset = 1, nset
3275         DO jset = 1, nset
3276            DO ipgf = 1, npgf(iset)
3277               starti = (iset - 1)*maxso + (ipgf - 1)*nsoset(lmax(iset))
3278               DO jpgf = 1, npgf(jset)
3279                  startj = (jset - 1)*maxso + (jpgf - 1)*nsoset(lmax(jset))
3280
3281                  DO i = 1, 3
3282                     j = MOD(i, 3) + 1
3283                     k = MOD(i + 1, 3) + 1
3284
3285                     DO iso = nsoset(lmin(iset) - 1) + 1, nsoset(lmax(iset))
3286                        DO jso = nsoset(lmin(jset) - 1) + 1, nsoset(lmax(jset))
3287
3288                           !Two component per function => 4 terms in total
3289
3290                           ! take r1*a1(j) * V * r1*a1(k)
3291                           fgr(1:nr, 1) = r1(1:nr, starti + iso)*r1(1:nr, startj + jso)
3292                           fga(1:na, 1) = a1(1:na, starti + iso, j)*a1(1:na, startj + jso, k)
3293
3294                           CALL dgemm('N', 'N', na, 1, nr, 1.0_dp, V, na, fgr, nr, 0.0_dp, work, na)
3295                           CALL dgemm('T', 'N', 1, 1, na, 1.0_dp, work, na, fga, na, 0.0_dp, &
3296                                      intso(starti + iso, startj + jso, i), 1)
3297
3298                           ! add r1*a1(j) * V * r2*a2(k)
3299                           fgr(1:nr, 1) = r1(1:nr, starti + iso)*r2(1:nr, startj + jso)
3300                           fga(1:na, 1) = a1(1:na, starti + iso, j)*a2(1:na, startj + jso, k)
3301
3302                           CALL dgemm('N', 'N', na, 1, nr, 1.0_dp, V, na, fgr, nr, 0.0_dp, work, na)
3303                           CALL dgemm('T', 'N', 1, 1, na, 1.0_dp, work, na, fga, na, 1.0_dp, &
3304                                      intso(starti + iso, startj + jso, i), 1)
3305
3306                           ! add r2*a2(j) * V * r1*a1(k)
3307                           fgr(1:nr, 1) = r2(1:nr, starti + iso)*r1(1:nr, startj + jso)
3308                           fga(1:na, 1) = a2(1:na, starti + iso, j)*a1(1:na, startj + jso, k)
3309
3310                           CALL dgemm('N', 'N', na, 1, nr, 1.0_dp, V, na, fgr, nr, 0.0_dp, work, na)
3311                           CALL dgemm('T', 'N', 1, 1, na, 1.0_dp, work, na, fga, na, 1.0_dp, &
3312                                      intso(starti + iso, startj + jso, i), 1)
3313
3314                           ! add the last term: r2*a2(j) * V * r2*a2(k)
3315                           fgr(1:nr, 1) = r2(1:nr, starti + iso)*r2(1:nr, startj + jso)
3316                           fga(1:na, 1) = a2(1:na, starti + iso, j)*a2(1:na, startj + jso, k)
3317
3318                           CALL dgemm('N', 'N', na, 1, nr, 1.0_dp, V, na, fgr, nr, 0.0_dp, work, na)
3319                           CALL dgemm('T', 'N', 1, 1, na, 1.0_dp, work, na, fga, na, 1.0_dp, &
3320                                      intso(starti + iso, startj + jso, i), 1)
3321
3322                        END DO !jso
3323                     END DO !iso
3324
3325                  END DO !i
3326               END DO !jpgf
3327            END DO !ipgf
3328         END DO !jset
3329      END DO !iset
3330
3331      DO i = 1, 3
3332         intso(:, :, i) = intso(:, :, i) - TRANSPOSE(intso(:, :, i))
3333      END DO
3334
3335   END SUBROUTINE integrate_so_dxdy_prod
3336
3337! **************************************************************************************************
3338!> \brief Computes the SOC matrix elements with respect to the ORB basis set for each atomic kind
3339!>        and put them as the block diagonal of dbcsr_matrix
3340!> \param matrix_soc the matrix where the SOC is stored
3341!> \param xas_atom_env ...
3342!> \param qs_env ...
3343!> \note We compute: <da_dx|V\(4c^2-2V)|db_dy> - <da_dy|V\(4c^2-2V)|db_dx>, where V is a model
3344!>       potential on the atomic grid. What we get is purely imaginary
3345! **************************************************************************************************
3346   SUBROUTINE integrate_soc_atoms(matrix_soc, xas_atom_env, qs_env)
3347
3348      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_soc
3349      TYPE(xas_atom_env_type), POINTER                   :: xas_atom_env
3350      TYPE(qs_environment_type), POINTER                 :: qs_env
3351
3352      CHARACTER(len=*), PARAMETER :: routineN = 'integrate_soc_atoms', &
3353         routineP = moduleN//':'//routineN
3354
3355      INTEGER                                            :: blk, handle, i, iat, ikind, ir, jat, &
3356                                                            maxso, na, nkind, nr, nset, nsgf
3357      REAL(dp)                                           :: zeff
3358      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: Vr
3359      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: V
3360      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: intso
3361      REAL(dp), DIMENSION(:, :), POINTER                 :: sphi_so
3362      REAL(dp), DIMENSION(:, :, :), POINTER              :: intsgf
3363      TYPE(cp_3d_r_p_type), DIMENSION(:), POINTER        :: int_soc
3364      TYPE(dbcsr_iterator_type)                          :: iter
3365      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
3366      TYPE(grid_atom_type), POINTER                      :: grid
3367      TYPE(gto_basis_set_type), POINTER                  :: basis
3368      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
3369      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
3370
3371      NULLIFY (int_soc, basis, qs_kind_set, sphi_so, matrix_s)
3372      NULLIFY (particle_set)
3373
3374      CALL timeset(routineN, handle)
3375
3376!  Initialization
3377      CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, matrix_s=matrix_s, &
3378                      particle_set=particle_set)
3379      ALLOCATE (int_soc(nkind))
3380
3381!  Loop over the kinds to compute the integrals
3382      DO ikind = 1, nkind
3383
3384         CALL get_qs_kind(qs_kind_set(ikind), basis_set=basis, basis_type="ORB", zeff=zeff)
3385         CALL get_gto_basis_set(basis, nset=nset, maxso=maxso)
3386         ALLOCATE (intso(nset*maxso, nset*maxso, 3))
3387
3388         ! compute the model potential on the grid
3389         grid => xas_atom_env%grid_atom_set(ikind)%grid_atom
3390         nr = grid%nr
3391         na = grid%ng_sphere
3392         ALLOCATE (Vr(nr))
3393         !TODO: do we need potential contribution from neighboring atoms ?!?
3394         CALL calculate_model_potential(Vr, grid, zeff)
3395
3396         !Compute V/(4c^2-2V) and weight it
3397         ALLOCATE (V(na, nr))
3398         V = 0.0_dp
3399         DO ir = 1, nr
3400            CALL daxpy(na, Vr(ir)/(4.0_dp*c_light_au**2 - 2.0_dp*Vr(ir)), grid%weight(1:na, ir), 1, &
3401                       V(1:na, ir), 1)
3402         END DO
3403         DEALLOCATE (Vr)
3404
3405         ! compute the integral <da_dx|...|db_dy> in terms of so
3406         CALL integrate_so_dxdy_prod(intso, V, ikind, xas_atom_env, qs_env)
3407         DEALLOCATE (V)
3408
3409         ! contract in terms of sgf
3410         CALL get_gto_basis_set(basis, nsgf=nsgf)
3411         ALLOCATE (int_soc(ikind)%array(nsgf, nsgf, 3))
3412         intsgf => int_soc(ikind)%array
3413         sphi_so => xas_atom_env%orb_sphi_so(ikind)%array
3414         intsgf = 0.0_dp
3415
3416         DO i = 1, 3
3417            CALL contract_so2sgf(intsgf(:, :, i), intso(:, :, i), basis, sphi_so)
3418         END DO
3419
3420         DEALLOCATE (intso)
3421      END DO !ikind
3422
3423!  Build the matrix_soc based on the matrix_s (but anti-symmetric)
3424      DO i = 1, 3
3425         CALL dbcsr_create(matrix_soc(i)%matrix, name="SOC MATRIX", template=matrix_s(1)%matrix, &
3426                           matrix_type="A")
3427      END DO
3428
3429!  Iterate over its diagonal blocks and fill=it
3430      CALL dbcsr_iterator_start(iter, matrix_s(1)%matrix)
3431      DO WHILE (dbcsr_iterator_blocks_left(iter))
3432
3433         CALL dbcsr_iterator_next_block(iter, row=iat, column=jat, blk=blk)
3434         IF (.NOT. iat == jat) CYCLE
3435         ikind = particle_set(iat)%atomic_kind%kind_number
3436
3437         DO i = 1, 3
3438            CALL dbcsr_put_block(matrix_soc(i)%matrix, iat, iat, int_soc(ikind)%array(:, :, i))
3439         END DO
3440
3441      END DO !iat
3442      CALL dbcsr_iterator_stop(iter)
3443      DO i = 1, 3
3444         CALL dbcsr_finalize(matrix_soc(i)%matrix)
3445      END DO
3446
3447! Clean-up
3448      DO ikind = 1, nkind
3449         DEALLOCATE (int_soc(ikind)%array)
3450      END DO
3451      DEALLOCATE (int_soc)
3452
3453      CALL timestop(handle)
3454
3455   END SUBROUTINE integrate_soc_atoms
3456
3457END MODULE xas_tdp_atom
3458