1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2019  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief Calculate the plane wave density by collocating the primitive Gaussian
8!>      functions (pgf).
9!> \par History
10!>      - rewrote collocate for increased accuracy and speed
11!>      - introduced the PGI hack for increased speed with that compiler
12!>        (22.02.02)
13!>      - Added Multiple Grid feature
14!>      - new way to get over the grid (01.03.02)
15!>      - removed timing calls since they were getting expensive
16!>      - Updated with the new QS data structures (09.04.02,MK)
17!>      - introduction of the real space grid type ( prelim. version JVdV 05.02)
18!>      - parallel FFT (JGH 22.05.02)
19!>      - multigrid arrays independent from density (JGH 30.08.02)
20!>      - old density stored in g space (JGH 30.08.02)
21!>      - distributed real space code (JGH 17.07.03)
22!>      - refactoring and new loop ordering (JGH 23.11.03)
23!>      - OpenMP parallelization (JGH 03.12.03)
24!>      - Modified to compute tau (Joost 12.03)
25!>      - removed the incremental density rebuild (Joost 01.04)
26!>      - introduced realspace multigridding (Joost 02.04)
27!>      - introduced map_consistent (Joost 02.04)
28!>      - Addition of the subroutine calculate_atomic_charge_density (TdK, 08.05)
29!>      - rewrite of the collocate/integrate kernels (Joost VandeVondele, 03.07)
30!> \author Matthias Krack (03.04.2001)
31!>      1) Joost VandeVondele (01.2002)
32!>      Thomas D. Kuehne (04.08.2005)
33! **************************************************************************************************
34MODULE qs_collocate_density
35   USE ao_util,                         ONLY: exp_radius_very_extended
36   USE atomic_kind_types,               ONLY: atomic_kind_type,&
37                                              get_atomic_kind,&
38                                              get_atomic_kind_set
39   USE basis_set_types,                 ONLY: get_gto_basis_set,&
40                                              gto_basis_set_type
41   USE cell_types,                      ONLY: cell_type,&
42                                              pbc
43   USE cp_control_types,                ONLY: dft_control_type
44   USE cp_dbcsr_operations,             ONLY: dbcsr_allocate_matrix_set,&
45                                              dbcsr_deallocate_matrix_set
46   USE cp_fm_types,                     ONLY: cp_fm_get_element,&
47                                              cp_fm_get_info,&
48                                              cp_fm_type
49   USE cube_utils,                      ONLY: compute_cube_center,&
50                                              cube_info_type,&
51                                              return_cube,&
52                                              return_cube_nonortho
53   USE d3_poly,                         ONLY: poly_cp2k2d3
54   USE dbcsr_api,                       ONLY: dbcsr_copy,&
55                                              dbcsr_get_block_p,&
56                                              dbcsr_p_type,&
57                                              dbcsr_type
58   USE external_potential_types,        ONLY: get_potential,&
59                                              gth_potential_type
60   USE gauss_colloc,                    ONLY: collocGauss
61   USE gaussian_gridlevels,             ONLY: gaussian_gridlevel,&
62                                              gridlevel_info_type
63   USE kinds,                           ONLY: default_string_length,&
64                                              dp,&
65                                              int_8
66   USE lgrid_types,                     ONLY: lgrid_allocate_grid,&
67                                              lgrid_type
68   USE lri_environment_types,           ONLY: lri_kind_type
69   USE mathconstants,                   ONLY: fac,&
70                                              pi,&
71                                              twopi
72   USE memory_utilities,                ONLY: reallocate
73   USE orbital_pointers,                ONLY: coset,&
74                                              indco,&
75                                              ncoset
76   USE particle_types,                  ONLY: particle_type
77   USE pw_env_types,                    ONLY: pw_env_get,&
78                                              pw_env_type
79   USE pw_methods,                      ONLY: pw_axpy,&
80                                              pw_integrate_function,&
81                                              pw_transfer,&
82                                              pw_zero
83   USE pw_pool_types,                   ONLY: pw_pool_create_pw,&
84                                              pw_pool_give_back_pw,&
85                                              pw_pool_p_type,&
86                                              pw_pool_type,&
87                                              pw_pools_create_pws,&
88                                              pw_pools_give_back_pws
89   USE pw_types,                        ONLY: COMPLEXDATA1D,&
90                                              REALDATA3D,&
91                                              REALSPACE,&
92                                              RECIPROCALSPACE,&
93                                              pw_p_type,&
94                                              pw_type
95   USE qs_environment_types,            ONLY: get_qs_env,&
96                                              qs_environment_type
97   USE qs_kind_types,                   ONLY: get_qs_kind,&
98                                              get_qs_kind_set,&
99                                              qs_kind_type
100   USE qs_ks_types,                     ONLY: get_ks_env,&
101                                              qs_ks_env_type
102   USE qs_modify_pab_block,             ONLY: &
103        FUNC_AB, FUNC_ADBmDAB, FUNC_ARB, FUNC_ARDBmDARB, FUNC_DABpADB, FUNC_DADB, FUNC_DER, &
104        FUNC_DX, FUNC_DXDX, FUNC_DXDY, FUNC_DY, FUNC_DYDY, FUNC_DYDZ, FUNC_DZ, FUNC_DZDX, &
105        FUNC_DZDZ, prepare_adb_m_dab, prepare_arb, prepare_ardb_m_darb, prepare_dab_p_adb, &
106        prepare_dadb, prepare_diadib, prepare_diiadiib, prepare_dijadijb
107   USE qs_neighbor_list_types,          ONLY: neighbor_list_set_p_type
108   USE realspace_grid_types,            ONLY: realspace_grid_desc_p_type,&
109                                              realspace_grid_p_type,&
110                                              realspace_grid_type,&
111                                              rs2pw,&
112                                              rs_grid_release,&
113                                              rs_grid_retain,&
114                                              rs_grid_zero,&
115                                              rs_pw_transfer
116   USE rs_pw_interface,                 ONLY: density_rs2pw,&
117                                              density_rs2pw_basic
118   USE task_list_methods,               ONLY: int2pair,&
119                                              rs_distribute_matrix
120   USE task_list_types,                 ONLY: task_list_type
121
122!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
123
124#include "./base/base_uses.f90"
125
126   IMPLICIT NONE
127
128   PRIVATE
129
130   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_collocate_density'
131! *** Public subroutines ***
132
133   PUBLIC :: calculate_ppl_grid, &
134             calculate_rho_core, &
135             calculate_lri_rho_elec, &
136             calculate_rho_single_gaussian, &
137             calculate_rho_metal, &
138             calculate_rho_resp_single, &
139             calculate_rho_resp_all, &
140             calculate_rho_elec, &
141             calculate_drho_elec, &
142             calculate_wavefunction, &
143             collocate_pgf_product_gspace, &
144             collocate_pgf_product_rspace, &
145             calculate_rho_nlcc
146
147   INTEGER :: debug_count = 0
148
149CONTAINS
150
151! **************************************************************************************************
152!> \brief computes the density of the non-linear core correction on the grid
153!> \param rho_nlcc ...
154!> \param qs_env ...
155! **************************************************************************************************
156   SUBROUTINE calculate_rho_nlcc(rho_nlcc, qs_env)
157
158      TYPE(pw_p_type), INTENT(INOUT)                     :: rho_nlcc
159      TYPE(qs_environment_type), POINTER                 :: qs_env
160
161      CHARACTER(len=*), PARAMETER :: routineN = 'calculate_rho_nlcc', &
162         routineP = moduleN//':'//routineN
163
164      INTEGER                                            :: atom_a, handle, iatom, iexp_nlcc, ikind, &
165                                                            ithread, j, lmax_global, n, natom, nc, &
166                                                            nexp_nlcc, ni, npme, nthread
167      INTEGER(KIND=int_8)                                :: subpatch_pattern
168      INTEGER, DIMENSION(:), POINTER                     :: atom_list, cores, mylmax, nct_nlcc
169      LOGICAL                                            :: nlcc
170      REAL(KIND=dp)                                      :: alpha, eps_rho_rspace
171      REAL(KIND=dp), DIMENSION(3)                        :: ra
172      REAL(KIND=dp), DIMENSION(:), POINTER               :: alpha_nlcc
173      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: cval_nlcc, pab
174      TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
175      TYPE(cell_type), POINTER                           :: cell
176      TYPE(cube_info_type)                               :: cube_info
177      TYPE(dft_control_type), POINTER                    :: dft_control
178      TYPE(gth_potential_type), POINTER                  :: gth_potential
179      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
180      TYPE(pw_env_type), POINTER                         :: pw_env
181      TYPE(pw_pool_type), POINTER                        :: auxbas_pw_pool
182      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
183      TYPE(realspace_grid_type), POINTER                 :: rs_rho
184
185      CALL timeset(routineN, handle)
186
187      NULLIFY (cell, dft_control, pab, particle_set, atomic_kind_set, qs_kind_set, &
188               atom_list, pw_env, rs_rho, auxbas_pw_pool, cores, mylmax)
189
190      CALL get_qs_env(qs_env=qs_env, &
191                      atomic_kind_set=atomic_kind_set, &
192                      qs_kind_set=qs_kind_set, &
193                      cell=cell, &
194                      dft_control=dft_control, &
195                      particle_set=particle_set, &
196                      pw_env=pw_env)
197      CALL pw_env_get(pw_env, auxbas_rs_grid=rs_rho, &
198                      auxbas_pw_pool=auxbas_pw_pool)
199      cube_info = pw_env%cube_info(1)
200      ! be careful in parallel nsmax is choosen with multigrid in mind!
201      CALL rs_grid_retain(rs_rho)
202      CALL rs_grid_zero(rs_rho)
203
204      eps_rho_rspace = dft_control%qs_control%eps_rho_rspace
205
206      !Find the maximum la_max over all of the atomic_kind_set
207      !We are using FUNC_AB so no need to cater for different ga_gb_functions
208      lmax_global = 0
209      DO ikind = 1, SIZE(atomic_kind_set)
210         CALL get_qs_kind(qs_kind_set(ikind), gth_potential=gth_potential)
211         IF (.NOT. ASSOCIATED(gth_potential)) CYCLE
212         CALL get_potential(potential=gth_potential, nct_nlcc=mylmax, nlcc_present=nlcc)
213         IF (.NOT. nlcc) CYCLE
214         lmax_global = MAX(MAXVAL(mylmax), lmax_global)
215      END DO
216      !lmax_global set according to ni = 2*nc-2 below
217      lmax_global = 2*lmax_global - 2
218
219      DO ikind = 1, SIZE(atomic_kind_set)
220         CALL get_atomic_kind(atomic_kind_set(ikind), natom=natom, atom_list=atom_list)
221         CALL get_qs_kind(qs_kind_set(ikind), gth_potential=gth_potential)
222
223         IF (.NOT. ASSOCIATED(gth_potential)) CYCLE
224         CALL get_potential(potential=gth_potential, nlcc_present=nlcc, nexp_nlcc=nexp_nlcc, &
225                            alpha_nlcc=alpha_nlcc, nct_nlcc=nct_nlcc, cval_nlcc=cval_nlcc)
226
227         IF (.NOT. nlcc) CYCLE
228
229         DO iexp_nlcc = 1, nexp_nlcc
230
231            alpha = alpha_nlcc(iexp_nlcc)
232            nc = nct_nlcc(iexp_nlcc)
233
234            ni = ncoset(2*nc - 2)
235            ALLOCATE (pab(ni, 1))
236            pab = 0._dp
237
238            nthread = 1
239            ithread = 0
240
241            CALL reallocate(cores, 1, natom)
242            npme = 0
243            cores = 0
244
245            ! prepare core function
246            DO j = 1, nc
247               SELECT CASE (j)
248               CASE (1)
249                  pab(1, 1) = cval_nlcc(1, iexp_nlcc)
250               CASE (2)
251                  n = coset(2, 0, 0)
252                  pab(n, 1) = cval_nlcc(2, iexp_nlcc)/alpha**2
253                  n = coset(0, 2, 0)
254                  pab(n, 1) = cval_nlcc(2, iexp_nlcc)/alpha**2
255                  n = coset(0, 0, 2)
256                  pab(n, 1) = cval_nlcc(2, iexp_nlcc)/alpha**2
257               CASE (3)
258                  n = coset(4, 0, 0)
259                  pab(n, 1) = cval_nlcc(3, iexp_nlcc)/alpha**4
260                  n = coset(0, 4, 0)
261                  pab(n, 1) = cval_nlcc(3, iexp_nlcc)/alpha**4
262                  n = coset(0, 0, 4)
263                  pab(n, 1) = cval_nlcc(3, iexp_nlcc)/alpha**4
264                  n = coset(2, 2, 0)
265                  pab(n, 1) = 2._dp*cval_nlcc(3, iexp_nlcc)/alpha**4
266                  n = coset(2, 0, 2)
267                  pab(n, 1) = 2._dp*cval_nlcc(3, iexp_nlcc)/alpha**4
268                  n = coset(0, 2, 2)
269                  pab(n, 1) = 2._dp*cval_nlcc(3, iexp_nlcc)/alpha**4
270               CASE (4)
271                  n = coset(6, 0, 0)
272                  pab(n, 1) = cval_nlcc(4, iexp_nlcc)/alpha**6
273                  n = coset(0, 6, 0)
274                  pab(n, 1) = cval_nlcc(4, iexp_nlcc)/alpha**6
275                  n = coset(0, 0, 6)
276                  pab(n, 1) = cval_nlcc(4, iexp_nlcc)/alpha**6
277                  n = coset(4, 2, 0)
278                  pab(n, 1) = 3._dp*cval_nlcc(4, iexp_nlcc)/alpha**6
279                  n = coset(4, 0, 2)
280                  pab(n, 1) = 3._dp*cval_nlcc(4, iexp_nlcc)/alpha**6
281                  n = coset(2, 4, 0)
282                  pab(n, 1) = 3._dp*cval_nlcc(4, iexp_nlcc)/alpha**6
283                  n = coset(2, 0, 4)
284                  pab(n, 1) = 3._dp*cval_nlcc(4, iexp_nlcc)/alpha**6
285                  n = coset(0, 4, 2)
286                  pab(n, 1) = 3._dp*cval_nlcc(4, iexp_nlcc)/alpha**6
287                  n = coset(0, 2, 4)
288                  pab(n, 1) = 3._dp*cval_nlcc(4, iexp_nlcc)/alpha**6
289                  n = coset(2, 2, 2)
290                  pab(n, 1) = 6._dp*cval_nlcc(4, iexp_nlcc)/alpha**6
291               CASE DEFAULT
292                  CPABORT("")
293               END SELECT
294            END DO
295            IF (dft_control%nspins == 2) pab = pab*0.5_dp
296
297            DO iatom = 1, natom
298               atom_a = atom_list(iatom)
299               ra(:) = pbc(particle_set(atom_a)%r, cell)
300               IF (rs_rho%desc%parallel .AND. .NOT. rs_rho%desc%distributed) THEN
301                  ! replicated realspace grid, split the atoms up between procs
302                  IF (MODULO(iatom, rs_rho%desc%group_size) == rs_rho%desc%my_pos) THEN
303                     npme = npme + 1
304                     cores(npme) = iatom
305                  ENDIF
306               ELSE
307                  npme = npme + 1
308                  cores(npme) = iatom
309               ENDIF
310            END DO
311
312            DO j = 1, npme
313
314               iatom = cores(j)
315               atom_a = atom_list(iatom)
316               ra(:) = pbc(particle_set(atom_a)%r, cell)
317               subpatch_pattern = 0
318               ni = 2*nc - 2
319               CALL collocate_pgf_product_rspace(ni, 1/(2*alpha**2), 0, 0, 0.0_dp, 0, ra, &
320                                                 (/0.0_dp, 0.0_dp, 0.0_dp/), 0.0_dp, 1.0_dp, pab, 0, 0, rs_rho, &
321                                                 cell, cube_info, eps_rho_rspace, ga_gb_function=FUNC_AB, &
322                                                 ithread=ithread, use_subpatch=.TRUE., subpatch_pattern=subpatch_pattern, &
323                                                 lmax_global=lmax_global)
324
325            END DO
326
327            DEALLOCATE (pab)
328
329         END DO
330
331      END DO
332
333      IF (ASSOCIATED(cores)) THEN
334         DEALLOCATE (cores)
335      END IF
336
337      CALL rs_pw_transfer(rs_rho, rho_nlcc%pw, rs2pw)
338      CALL rs_grid_release(rs_rho)
339
340      CALL timestop(handle)
341
342   END SUBROUTINE calculate_rho_nlcc
343
344! **************************************************************************************************
345!> \brief computes the local pseudopotential (without erf term) on the grid
346!> \param vppl ...
347!> \param qs_env ...
348! **************************************************************************************************
349   SUBROUTINE calculate_ppl_grid(vppl, qs_env)
350
351      TYPE(pw_p_type), INTENT(INOUT)                     :: vppl
352      TYPE(qs_environment_type), POINTER                 :: qs_env
353
354      CHARACTER(len=*), PARAMETER :: routineN = 'calculate_ppl_grid', &
355         routineP = moduleN//':'//routineN
356
357      INTEGER                                            :: atom_a, handle, iatom, ikind, ithread, &
358                                                            j, lmax_global, lppl, n, natom, ni, &
359                                                            npme, nthread
360      INTEGER(KIND=int_8)                                :: subpatch_pattern
361      INTEGER, DIMENSION(:), POINTER                     :: atom_list, cores
362      REAL(KIND=dp)                                      :: alpha, eps_rho_rspace
363      REAL(KIND=dp), DIMENSION(3)                        :: ra
364      REAL(KIND=dp), DIMENSION(:), POINTER               :: cexp_ppl
365      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: pab
366      TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
367      TYPE(cell_type), POINTER                           :: cell
368      TYPE(cube_info_type)                               :: cube_info
369      TYPE(dft_control_type), POINTER                    :: dft_control
370      TYPE(gth_potential_type), POINTER                  :: gth_potential
371      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
372      TYPE(pw_env_type), POINTER                         :: pw_env
373      TYPE(pw_pool_type), POINTER                        :: auxbas_pw_pool
374      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
375      TYPE(realspace_grid_type), POINTER                 :: rs_rho
376
377      CALL timeset(routineN, handle)
378
379      NULLIFY (cell, dft_control, pab, atomic_kind_set, qs_kind_set, particle_set, &
380               atom_list, pw_env, rs_rho, auxbas_pw_pool, cores)
381
382      CALL get_qs_env(qs_env=qs_env, &
383                      atomic_kind_set=atomic_kind_set, &
384                      qs_kind_set=qs_kind_set, &
385                      cell=cell, &
386                      dft_control=dft_control, &
387                      particle_set=particle_set, &
388                      pw_env=pw_env)
389      CALL pw_env_get(pw_env, auxbas_rs_grid=rs_rho, &
390                      auxbas_pw_pool=auxbas_pw_pool)
391      cube_info = pw_env%cube_info(1)
392      ! be careful in parallel nsmax is choosen with multigrid in mind!
393      CALL rs_grid_retain(rs_rho)
394      CALL rs_grid_zero(rs_rho)
395
396      eps_rho_rspace = dft_control%qs_control%eps_rho_rspace
397
398      !Find maximum la_max over all of the atomic kind set where la_max=ni & ni=2*lppl-2
399      !Thus need to find the max of lppl over atomic_kind_set and use to define lmax_global
400      !We are using FUNC_AB so no need to cater for different ga_gb_functions
401      lmax_global = 0
402      DO ikind = 1, SIZE(atomic_kind_set)
403         CALL get_qs_kind(qs_kind_set(ikind), gth_potential=gth_potential)
404         IF (.NOT. ASSOCIATED(gth_potential)) CYCLE
405         CALL get_potential(potential=gth_potential, nexp_ppl=lppl)
406         lmax_global = MAX(lppl, lmax_global)
407      END DO
408      lmax_global = 2*lmax_global - 2
409
410      DO ikind = 1, SIZE(atomic_kind_set)
411         CALL get_atomic_kind(atomic_kind_set(ikind), natom=natom, atom_list=atom_list)
412         CALL get_qs_kind(qs_kind_set(ikind), gth_potential=gth_potential)
413
414         IF (.NOT. ASSOCIATED(gth_potential)) CYCLE
415         CALL get_potential(potential=gth_potential, alpha_ppl=alpha, nexp_ppl=lppl, cexp_ppl=cexp_ppl)
416
417         IF (lppl <= 0) CYCLE
418
419         ni = ncoset(2*lppl - 2)
420         ALLOCATE (pab(ni, 1))
421         pab = 0._dp
422
423         nthread = 1
424         ithread = 0
425
426         CALL reallocate(cores, 1, natom)
427         npme = 0
428         cores = 0
429
430         ! prepare core function
431         DO j = 1, lppl
432            SELECT CASE (j)
433            CASE (1)
434               pab(1, 1) = cexp_ppl(1)
435            CASE (2)
436               n = coset(2, 0, 0)
437               pab(n, 1) = cexp_ppl(2)
438               n = coset(0, 2, 0)
439               pab(n, 1) = cexp_ppl(2)
440               n = coset(0, 0, 2)
441               pab(n, 1) = cexp_ppl(2)
442            CASE (3)
443               n = coset(4, 0, 0)
444               pab(n, 1) = cexp_ppl(3)
445               n = coset(0, 4, 0)
446               pab(n, 1) = cexp_ppl(3)
447               n = coset(0, 0, 4)
448               pab(n, 1) = cexp_ppl(3)
449               n = coset(2, 2, 0)
450               pab(n, 1) = 2._dp*cexp_ppl(3)
451               n = coset(2, 0, 2)
452               pab(n, 1) = 2._dp*cexp_ppl(3)
453               n = coset(0, 2, 2)
454               pab(n, 1) = 2._dp*cexp_ppl(3)
455            CASE (4)
456               n = coset(6, 0, 0)
457               pab(n, 1) = cexp_ppl(4)
458               n = coset(0, 6, 0)
459               pab(n, 1) = cexp_ppl(4)
460               n = coset(0, 0, 6)
461               pab(n, 1) = cexp_ppl(4)
462               n = coset(4, 2, 0)
463               pab(n, 1) = 3._dp*cexp_ppl(4)
464               n = coset(4, 0, 2)
465               pab(n, 1) = 3._dp*cexp_ppl(4)
466               n = coset(2, 4, 0)
467               pab(n, 1) = 3._dp*cexp_ppl(4)
468               n = coset(2, 0, 4)
469               pab(n, 1) = 3._dp*cexp_ppl(4)
470               n = coset(0, 4, 2)
471               pab(n, 1) = 3._dp*cexp_ppl(4)
472               n = coset(0, 2, 4)
473               pab(n, 1) = 3._dp*cexp_ppl(4)
474               n = coset(2, 2, 2)
475               pab(n, 1) = 6._dp*cexp_ppl(4)
476            CASE DEFAULT
477               CPABORT("")
478            END SELECT
479         END DO
480
481         DO iatom = 1, natom
482            atom_a = atom_list(iatom)
483            ra(:) = pbc(particle_set(atom_a)%r, cell)
484            IF (rs_rho%desc%parallel .AND. .NOT. rs_rho%desc%distributed) THEN
485               ! replicated realspace grid, split the atoms up between procs
486               IF (MODULO(iatom, rs_rho%desc%group_size) == rs_rho%desc%my_pos) THEN
487                  npme = npme + 1
488                  cores(npme) = iatom
489               ENDIF
490            ELSE
491               npme = npme + 1
492               cores(npme) = iatom
493            ENDIF
494         END DO
495
496         IF (npme .GT. 0) THEN
497            DO j = 1, npme
498
499               iatom = cores(j)
500               atom_a = atom_list(iatom)
501               ra(:) = pbc(particle_set(atom_a)%r, cell)
502               subpatch_pattern = 0
503               ni = 2*lppl - 2
504               CALL collocate_pgf_product_rspace(ni, alpha, 0, 0, 0.0_dp, 0, ra, &
505                                                 (/0.0_dp, 0.0_dp, 0.0_dp/), 0.0_dp, 1.0_dp, pab, 0, 0, rs_rho, &
506                                                 cell, cube_info, eps_rho_rspace, ga_gb_function=FUNC_AB, &
507                                                 ithread=ithread, use_subpatch=.TRUE., subpatch_pattern=subpatch_pattern, &
508                                                 lmax_global=lmax_global)
509
510            END DO
511         ENDIF
512
513         DEALLOCATE (pab)
514
515      END DO
516
517      IF (ASSOCIATED(cores)) THEN
518         DEALLOCATE (cores)
519      END IF
520
521      CALL rs_pw_transfer(rs_rho, vppl%pw, rs2pw)
522      CALL rs_grid_release(rs_rho)
523
524      CALL timestop(handle)
525
526   END SUBROUTINE calculate_ppl_grid
527
528! **************************************************************************************************
529!> \brief Collocates the fitted lri density on a grid.
530!> \param lri_rho_g ...
531!> \param lri_rho_r ...
532!> \param qs_env ...
533!> \param lri_coef ...
534!> \param total_rho ...
535!> \param basis_type ...
536!> \param exact_1c_terms ...
537!> \param pmat replicated block diagonal density matrix (optional)
538!> \param atomlist list of atoms to be included (optional)
539!> \par History
540!>      04.2013
541!> \author Dorothea Golze
542! **************************************************************************************************
543   SUBROUTINE calculate_lri_rho_elec(lri_rho_g, lri_rho_r, qs_env, &
544                                     lri_coef, total_rho, basis_type, exact_1c_terms, pmat, atomlist)
545
546      TYPE(pw_p_type), INTENT(INOUT)                     :: lri_rho_g, lri_rho_r
547      TYPE(qs_environment_type), POINTER                 :: qs_env
548      TYPE(lri_kind_type), DIMENSION(:), POINTER         :: lri_coef
549      REAL(KIND=dp), INTENT(OUT)                         :: total_rho
550      CHARACTER(len=*), INTENT(IN)                       :: basis_type
551      LOGICAL, INTENT(IN)                                :: exact_1c_terms
552      TYPE(dbcsr_type), OPTIONAL                         :: pmat
553      INTEGER, DIMENSION(:), OPTIONAL                    :: atomlist
554
555      CHARACTER(len=*), PARAMETER :: routineN = 'calculate_lri_rho_elec', &
556         routineP = moduleN//':'//routineN
557
558      INTEGER :: atom_a, dir, group_size, handle, iatom, igrid_level, ikind, ipgf, iset, jpgf, &
559         jset, lmax_global, m1, maxco, maxpgf, maxset, maxsgf, maxsgf_set, my_pos, na1, natom, &
560         nb1, ncoa, ncob, nseta, offset, sgfa, sgfb
561      INTEGER, DIMENSION(3)                              :: lb, location, tp, ub
562      INTEGER, DIMENSION(:), POINTER                     :: atom_list, la_max, la_min, mylmax, &
563                                                            npgfa, nsgfa
564      INTEGER, DIMENSION(:, :), POINTER                  :: first_sgfa
565      LOGICAL                                            :: found, map_consistent
566      LOGICAL, ALLOCATABLE, DIMENSION(:)                 :: map_it
567      LOGICAL, ALLOCATABLE, DIMENSION(:, :)              :: map_it2
568      REAL(KIND=dp)                                      :: eps_rho_rspace, rab2, zetp
569      REAL(KIND=dp), DIMENSION(3)                        :: ra, rab
570      REAL(KIND=dp), DIMENSION(:), POINTER               :: aci
571      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: p_block, pab, sphi_a, work, zeta
572      TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
573      TYPE(cell_type), POINTER                           :: cell
574      TYPE(cube_info_type), DIMENSION(:), POINTER        :: cube_info
575      TYPE(dft_control_type), POINTER                    :: dft_control
576      TYPE(gridlevel_info_type), POINTER                 :: gridlevel_info
577      TYPE(gto_basis_set_type), POINTER                  :: lri_basis_set, orb_basis_set
578      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
579      TYPE(pw_env_type), POINTER                         :: pw_env
580      TYPE(pw_p_type), DIMENSION(:), POINTER             :: mgrid_gspace, mgrid_rspace
581      TYPE(pw_pool_p_type), DIMENSION(:), POINTER        :: pw_pools
582      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
583      TYPE(qs_kind_type), POINTER                        :: qs_kind
584      TYPE(realspace_grid_p_type), DIMENSION(:), POINTER :: rs_rho
585      TYPE(realspace_grid_type), POINTER                 :: rs_grid
586
587      NULLIFY (aci, atomic_kind_set, qs_kind_set, atom_list, cell, cube_info, &
588               dft_control, first_sgfa, gridlevel_info, la_max, la_min, lri_basis_set, &
589               mgrid_gspace, mgrid_rspace, npgfa, nsgfa, pab, &
590               particle_set, pw_env, pw_pools, rs_grid, rs_rho, sphi_a, &
591               work, zeta, mylmax)
592
593      CALL timeset(routineN, handle)
594
595      IF (exact_1c_terms) THEN
596         CPASSERT(PRESENT(pmat))
597      END IF
598
599      CALL get_qs_env(qs_env=qs_env, qs_kind_set=qs_kind_set, &
600                      atomic_kind_set=atomic_kind_set, &
601                      cell=cell, particle_set=particle_set, &
602                      pw_env=pw_env, &
603                      dft_control=dft_control)
604
605      cube_info => pw_env%cube_info
606      eps_rho_rspace = dft_control%qs_control%eps_rho_rspace
607      map_consistent = dft_control%qs_control%map_consistent
608      gridlevel_info => pw_env%gridlevel_info
609
610      ! *** set up the pw multi-grids *** !
611      CPASSERT(ASSOCIATED(pw_env))
612      CALL pw_env_get(pw_env=pw_env, rs_grids=rs_rho, pw_pools=pw_pools)
613
614      CALL pw_pools_create_pws(pw_pools, mgrid_rspace, &
615                               use_data=REALDATA3D, &
616                               in_space=REALSPACE)
617
618      CALL pw_pools_create_pws(pw_pools, mgrid_gspace, &
619                               use_data=COMPLEXDATA1D, &
620                               in_space=RECIPROCALSPACE)
621
622      ! *** set up the rs multi-grids *** !
623      DO igrid_level = 1, gridlevel_info%ngrid_levels
624         CALL rs_grid_retain(rs_rho(igrid_level)%rs_grid)
625         CALL rs_grid_zero(rs_rho(igrid_level)%rs_grid)
626      END DO
627
628      !take maxco from the LRI basis set!
629      CALL get_qs_kind_set(qs_kind_set=qs_kind_set, &
630                           maxco=maxco, basis_type=basis_type)
631
632      ALLOCATE (pab(maxco, 1))
633      offset = 0
634      my_pos = mgrid_rspace(1)%pw%pw_grid%para%my_pos
635      group_size = mgrid_rspace(1)%pw%pw_grid%para%group_size
636
637      !Find lmax_global across atomic_kind_set
638      lmax_global = 0
639      DO ikind = 1, SIZE(atomic_kind_set)
640         CALL get_qs_kind(qs_kind_set(ikind), basis_set=lri_basis_set, basis_type=basis_type)
641         CALL get_gto_basis_set(gto_basis_set=lri_basis_set, lmax=mylmax)
642         lmax_global = MAX(MAXVAL(mylmax), lmax_global)
643      END DO
644
645      DO ikind = 1, SIZE(atomic_kind_set)
646
647         CALL get_atomic_kind(atomic_kind_set(ikind), natom=natom, atom_list=atom_list)
648         CALL get_qs_kind(qs_kind_set(ikind), basis_set=lri_basis_set, basis_type=basis_type)
649
650         !Take the lri basis set here!
651         CALL get_gto_basis_set(gto_basis_set=lri_basis_set, lmax=la_max, &
652                                lmin=la_min, zet=zeta, nset=nseta, npgf=npgfa, &
653                                sphi=sphi_a, first_sgf=first_sgfa, nsgf_set=nsgfa)
654
655         DO iatom = 1, natom
656            atom_a = atom_list(iatom)
657            IF (PRESENT(ATOMLIST)) THEN
658               IF (atomlist(atom_a) == 0) CYCLE
659            END IF
660            ra(:) = pbc(particle_set(atom_a)%r, cell)
661            aci => lri_coef(ikind)%acoef(iatom, :)
662
663            m1 = MAXVAL(npgfa(1:nseta))
664            ALLOCATE (map_it(m1))
665            DO iset = 1, nseta
666               ! collocate this set locally?
667               map_it = .FALSE.
668               DO ipgf = 1, npgfa(iset)
669                  igrid_level = gaussian_gridlevel(gridlevel_info, zeta(ipgf, iset))
670                  rs_grid => rs_rho(igrid_level)%rs_grid
671                  IF (.NOT. ALL(rs_grid%desc%perd == 1)) THEN
672                     DO dir = 1, 3
673                        ! bounds of local grid (i.e. removing the 'wings'), if periodic
674                        tp(dir) = FLOOR(DOT_PRODUCT(cell%h_inv(dir, :), ra)*rs_grid%desc%npts(dir))
675                        tp(dir) = MODULO(tp(dir), rs_grid%desc%npts(dir))
676                        IF (rs_grid%desc%perd(dir) .NE. 1) THEN
677                           lb(dir) = rs_grid%lb_local(dir) + rs_grid%desc%border
678                           ub(dir) = rs_grid%ub_local(dir) - rs_grid%desc%border
679                        ELSE
680                           lb(dir) = rs_grid%lb_local(dir)
681                           ub(dir) = rs_grid%ub_local(dir)
682                        ENDIF
683                        ! distributed grid, only map if it is local to the grid
684                        location(dir) = tp(dir) + rs_grid%desc%lb(dir)
685                     ENDDO
686                     IF (lb(1) <= location(1) .AND. location(1) <= ub(1) .AND. &
687                         lb(2) <= location(2) .AND. location(2) <= ub(2) .AND. &
688                         lb(3) <= location(3) .AND. location(3) <= ub(3)) THEN
689                        map_it(ipgf) = .TRUE.
690                     ENDIF
691                  ELSE
692                     ! not distributed, just a round-robin distribution over the full set of CPUs
693                     IF (MODULO(offset, group_size) == my_pos) map_it(ipgf) = .TRUE.
694                  ENDIF
695               END DO
696               offset = offset + 1
697
698               IF (ANY(map_it(1:npgfa(iset)))) THEN
699                  sgfa = first_sgfa(1, iset)
700                  ncoa = npgfa(iset)*ncoset(la_max(iset))
701                  m1 = sgfa + nsgfa(iset) - 1
702                  ALLOCATE (work(nsgfa(iset), 1))
703                  work(1:nsgfa(iset), 1) = aci(sgfa:m1)
704                  pab = 0._dp
705
706                  CALL dgemm("N", "N", ncoa, 1, nsgfa(iset), 1.0_dp, lri_basis_set%sphi(1, sgfa), &
707                             SIZE(lri_basis_set%sphi, 1), work(1, 1), SIZE(work, 1), 0.0_dp, pab(1, 1), &
708                             SIZE(pab, 1))
709
710                  DO ipgf = 1, npgfa(iset)
711                     na1 = (ipgf - 1)*ncoset(la_max(iset))
712                     igrid_level = gaussian_gridlevel(gridlevel_info, zeta(ipgf, iset))
713                     rs_grid => rs_rho(igrid_level)%rs_grid
714                     IF (map_it(ipgf)) THEN
715                        CALL collocate_pgf_product_rspace(la_max=la_max(iset), &
716                                                          zeta=zeta(ipgf, iset), &
717                                                          la_min=la_min(iset), &
718                                                          lb_max=0, zetb=0.0_dp, lb_min=0, &
719                                                          ra=ra, rab=(/0.0_dp, 0.0_dp, 0.0_dp/), &
720                                                          rab2=0.0_dp, scale=1._dp, &
721                                                          pab=pab, o1=na1, o2=0, &
722                                                          rsgrid=rs_grid, cell=cell, &
723                                                          cube_info=cube_info(igrid_level), &
724                                                          eps_rho_rspace=eps_rho_rspace, &
725                                                          ga_gb_function=FUNC_AB, &
726                                                          map_consistent=map_consistent, &
727                                                          lmax_global=lmax_global)
728                     ENDIF
729                  END DO
730                  DEALLOCATE (work)
731               END IF
732            END DO
733            DEALLOCATE (map_it)
734         END DO
735      END DO
736
737      DEALLOCATE (pab)
738
739      ! process the one-center terms
740      IF (exact_1c_terms) THEN
741         ! find maximum numbers
742         maxset = 0
743         maxpgf = 0
744         lmax_global = 0
745         offset = 0
746         DO ikind = 1, SIZE(qs_kind_set)
747            qs_kind => qs_kind_set(ikind)
748            CALL get_qs_kind(qs_kind=qs_kind, &
749                             basis_set=orb_basis_set, basis_type="ORB")
750            IF (.NOT. ASSOCIATED(orb_basis_set)) CYCLE
751            CALL get_gto_basis_set(gto_basis_set=orb_basis_set, &
752                                   npgf=npgfa, nset=nseta, lmax=mylmax)
753            maxset = MAX(nseta, maxset)
754            maxpgf = MAX(MAXVAL(npgfa), maxpgf)
755            lmax_global = MAX(MAXVAL(mylmax), lmax_global)
756         END DO
757         CALL get_qs_kind_set(qs_kind_set=qs_kind_set, &
758                              maxco=maxco, &
759                              maxsgf=maxsgf, &
760                              maxsgf_set=maxsgf_set, &
761                              basis_type="ORB")
762         ALLOCATE (pab(maxco, maxco), work(maxco, maxsgf_set))
763
764         DO ikind = 1, SIZE(atomic_kind_set)
765            CALL get_atomic_kind(atomic_kind_set(ikind), natom=natom, atom_list=atom_list)
766            CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set, basis_type="ORB")
767            CALL get_gto_basis_set(gto_basis_set=orb_basis_set, lmax=la_max, &
768                                   lmin=la_min, zet=zeta, nset=nseta, npgf=npgfa, &
769                                   sphi=sphi_a, first_sgf=first_sgfa, nsgf_set=nsgfa)
770            DO iatom = 1, natom
771               atom_a = atom_list(iatom)
772               ra(:) = pbc(particle_set(atom_a)%r, cell)
773               rab(:) = 0.0_dp
774               rab2 = 0.0_dp
775               CALL dbcsr_get_block_p(matrix=pmat, row=atom_a, col=atom_a, BLOCK=p_block, found=found)
776               m1 = MAXVAL(npgfa(1:nseta))
777               ALLOCATE (map_it2(m1, m1))
778               DO iset = 1, nseta
779                  DO jset = 1, nseta
780                     ! processor mappint
781                     map_it2 = .FALSE.
782                     DO ipgf = 1, npgfa(iset)
783                        DO jpgf = 1, npgfa(jset)
784                           zetp = zeta(ipgf, iset) + zeta(jpgf, jset)
785                           igrid_level = gaussian_gridlevel(gridlevel_info, zetp)
786                           rs_grid => rs_rho(igrid_level)%rs_grid
787                           IF (.NOT. ALL(rs_grid%desc%perd == 1)) THEN
788                              DO dir = 1, 3
789                                 ! bounds of local grid (i.e. removing the 'wings'), if periodic
790                                 tp(dir) = FLOOR(DOT_PRODUCT(cell%h_inv(dir, :), ra)*rs_grid%desc%npts(dir))
791                                 tp(dir) = MODULO(tp(dir), rs_grid%desc%npts(dir))
792                                 IF (rs_grid%desc%perd(dir) .NE. 1) THEN
793                                    lb(dir) = rs_grid%lb_local(dir) + rs_grid%desc%border
794                                    ub(dir) = rs_grid%ub_local(dir) - rs_grid%desc%border
795                                 ELSE
796                                    lb(dir) = rs_grid%lb_local(dir)
797                                    ub(dir) = rs_grid%ub_local(dir)
798                                 ENDIF
799                                 ! distributed grid, only map if it is local to the grid
800                                 location(dir) = tp(dir) + rs_grid%desc%lb(dir)
801                              ENDDO
802                              IF (lb(1) <= location(1) .AND. location(1) <= ub(1) .AND. &
803                                  lb(2) <= location(2) .AND. location(2) <= ub(2) .AND. &
804                                  lb(3) <= location(3) .AND. location(3) <= ub(3)) THEN
805                                 map_it2(ipgf, jpgf) = .TRUE.
806                              ENDIF
807                           ELSE
808                              ! not distributed, just a round-robin distribution over the full set of CPUs
809                              IF (MODULO(offset, group_size) == my_pos) map_it2(ipgf, jpgf) = .TRUE.
810                           ENDIF
811                        END DO
812                     END DO
813                     offset = offset + 1
814                     !
815                     IF (ANY(map_it2(1:npgfa(iset), 1:npgfa(jset)))) THEN
816                        ncoa = npgfa(iset)*ncoset(la_max(iset))
817                        sgfa = first_sgfa(1, iset)
818                        ncob = npgfa(jset)*ncoset(la_max(jset))
819                        sgfb = first_sgfa(1, jset)
820                        ! decontract density block
821                        CALL dgemm("N", "N", ncoa, nsgfa(jset), nsgfa(iset), &
822                                   1.0_dp, sphi_a(1, sgfa), SIZE(sphi_a, 1), &
823                                   p_block(sgfa, sgfb), SIZE(p_block, 1), &
824                                   0.0_dp, work(1, 1), maxco)
825                        CALL dgemm("N", "T", ncoa, ncob, nsgfa(jset), &
826                                   1.0_dp, work(1, 1), maxco, &
827                                   sphi_a(1, sgfb), SIZE(sphi_a, 1), &
828                                   0.0_dp, pab(1, 1), maxco)
829                        DO ipgf = 1, npgfa(iset)
830                           DO jpgf = 1, npgfa(jset)
831                              zetp = zeta(ipgf, iset) + zeta(jpgf, jset)
832                              igrid_level = gaussian_gridlevel(gridlevel_info, zetp)
833                              rs_grid => rs_rho(igrid_level)%rs_grid
834
835                              na1 = (ipgf - 1)*ncoset(la_max(iset))
836                              nb1 = (jpgf - 1)*ncoset(la_max(jset))
837
838                              IF (map_it2(ipgf, jpgf)) THEN
839                                 CALL collocate_pgf_product_rspace( &
840                                    la_max(iset), zeta(ipgf, iset), la_min(iset), &
841                                    la_max(jset), zeta(jpgf, jset), la_min(jset), &
842                                    ra, rab, rab2, 1.0_dp, pab, na1, nb1, &
843                                    rs_grid, cell, cube_info(igrid_level), &
844                                    eps_rho_rspace, ga_gb_function=FUNC_AB, &
845                                    map_consistent=map_consistent, lmax_global=lmax_global)
846                              END IF
847                           END DO
848                        END DO
849                     END IF
850                  END DO
851               END DO
852               DEALLOCATE (map_it2)
853               !
854            END DO
855         END DO
856         DEALLOCATE (pab, work)
857      END IF
858
859      CALL pw_zero(lri_rho_g%pw)
860      CALL pw_zero(lri_rho_r%pw)
861
862      DO igrid_level = 1, gridlevel_info%ngrid_levels
863         CALL pw_zero(mgrid_rspace(igrid_level)%pw)
864         CALL rs_pw_transfer(rs=rs_rho(igrid_level)%rs_grid, &
865                             pw=mgrid_rspace(igrid_level)%pw, dir=rs2pw)
866         CALL rs_grid_release(rs_rho(igrid_level)%rs_grid)
867      END DO
868
869      DO igrid_level = 1, gridlevel_info%ngrid_levels
870         CALL pw_zero(mgrid_gspace(igrid_level)%pw)
871         CALL pw_transfer(mgrid_rspace(igrid_level)%pw, &
872                          mgrid_gspace(igrid_level)%pw)
873         CALL pw_axpy(mgrid_gspace(igrid_level)%pw, lri_rho_g%pw)
874      END DO
875      CALL pw_transfer(lri_rho_g%pw, lri_rho_r%pw)
876      total_rho = pw_integrate_function(lri_rho_r%pw, isign=-1)
877
878      ! *** give back the multi-grids *** !
879      CALL pw_pools_give_back_pws(pw_pools, mgrid_gspace)
880      CALL pw_pools_give_back_pws(pw_pools, mgrid_rspace)
881
882      CALL timestop(handle)
883
884   END SUBROUTINE calculate_lri_rho_elec
885
886! **************************************************************************************************
887!> \brief computes the density of the core charges on the grid
888!> \param rho_core ...
889!> \param total_rho ...
890!> \param qs_env ...
891!> \param only_nopaw ...
892! **************************************************************************************************
893   SUBROUTINE calculate_rho_core(rho_core, total_rho, qs_env, only_nopaw)
894
895      TYPE(pw_p_type), INTENT(INOUT)                     :: rho_core
896      REAL(KIND=dp), INTENT(OUT)                         :: total_rho
897      TYPE(qs_environment_type), POINTER                 :: qs_env
898      LOGICAL, INTENT(IN), OPTIONAL                      :: only_nopaw
899
900      CHARACTER(len=*), PARAMETER :: routineN = 'calculate_rho_core', &
901         routineP = moduleN//':'//routineN
902
903      INTEGER                                            :: atom_a, handle, iatom, ikind, ithread, &
904                                                            j, natom, npme, nthread
905      INTEGER(KIND=int_8)                                :: subpatch_pattern
906      INTEGER, DIMENSION(:), POINTER                     :: atom_list, cores
907      LOGICAL                                            :: my_only_nopaw, paw_atom
908      REAL(KIND=dp)                                      :: alpha, eps_rho_rspace
909      REAL(KIND=dp), DIMENSION(3)                        :: ra
910      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: pab
911      TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
912      TYPE(cell_type), POINTER                           :: cell
913      TYPE(cube_info_type)                               :: cube_info
914      TYPE(dft_control_type), POINTER                    :: dft_control
915      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
916      TYPE(pw_env_type), POINTER                         :: pw_env
917      TYPE(pw_p_type)                                    :: rhoc_r
918      TYPE(pw_pool_type), POINTER                        :: auxbas_pw_pool
919      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
920      TYPE(realspace_grid_type), POINTER                 :: rs_rho
921
922      CALL timeset(routineN, handle)
923      NULLIFY (cell, dft_control, pab, atomic_kind_set, qs_kind_set, particle_set, &
924               atom_list, pw_env, rs_rho, auxbas_pw_pool, cores)
925      ALLOCATE (pab(1, 1))
926
927      my_only_nopaw = .FALSE.
928      IF (PRESENT(only_nopaw)) my_only_nopaw = only_nopaw
929
930      CALL get_qs_env(qs_env=qs_env, &
931                      atomic_kind_set=atomic_kind_set, &
932                      qs_kind_set=qs_kind_set, &
933                      cell=cell, &
934                      dft_control=dft_control, &
935                      particle_set=particle_set, &
936                      pw_env=pw_env)
937      CALL pw_env_get(pw_env, auxbas_rs_grid=rs_rho, &
938                      auxbas_pw_pool=auxbas_pw_pool)
939      cube_info = pw_env%cube_info(1)
940      ! be careful in parallel nsmax is choosen with multigrid in mind!
941      CALL rs_grid_retain(rs_rho)
942      CALL rs_grid_zero(rs_rho)
943
944      eps_rho_rspace = dft_control%qs_control%eps_rho_rspace
945
946      DO ikind = 1, SIZE(atomic_kind_set)
947         CALL get_atomic_kind(atomic_kind_set(ikind), natom=natom, atom_list=atom_list)
948         CALL get_qs_kind(qs_kind_set(ikind), paw_atom=paw_atom, &
949                          alpha_core_charge=alpha, ccore_charge=pab(1, 1))
950
951         IF (my_only_nopaw .AND. paw_atom) CYCLE
952         IF (alpha == 0.0_dp .OR. pab(1, 1) == 0.0_dp) CYCLE
953
954         nthread = 1
955         ithread = 0
956
957         CALL reallocate(cores, 1, natom)
958         npme = 0
959         cores = 0
960
961         DO iatom = 1, natom
962            IF (rs_rho%desc%parallel .AND. .NOT. rs_rho%desc%distributed) THEN
963               ! replicated realspace grid, split the atoms up between procs
964               IF (MODULO(iatom, rs_rho%desc%group_size) == rs_rho%desc%my_pos) THEN
965                  npme = npme + 1
966                  cores(npme) = iatom
967               ENDIF
968            ELSE
969               npme = npme + 1
970               cores(npme) = iatom
971            ENDIF
972         END DO
973
974         IF (npme .GT. 0) THEN
975            DO j = 1, npme
976
977               iatom = cores(j)
978               atom_a = atom_list(iatom)
979               ra(:) = pbc(particle_set(atom_a)%r, cell)
980               subpatch_pattern = 0
981               !lmax == 0 so set lmax_global to 0
982               CALL collocate_pgf_product_rspace(0, alpha, 0, 0, 0.0_dp, 0, ra, &
983                                                 (/0.0_dp, 0.0_dp, 0.0_dp/), 0.0_dp, -1.0_dp, pab, 0, 0, rs_rho, &
984                                                 cell, cube_info, eps_rho_rspace, ga_gb_function=FUNC_AB, &
985                                                 ithread=ithread, use_subpatch=.TRUE., subpatch_pattern=subpatch_pattern, &
986                                                 lmax_global=0)
987
988            END DO
989         ENDIF
990
991      END DO
992
993      IF (ASSOCIATED(cores)) THEN
994         DEALLOCATE (cores)
995      END IF
996      DEALLOCATE (pab)
997
998      CALL pw_pool_create_pw(auxbas_pw_pool, rhoc_r%pw, &
999                             use_data=REALDATA3D, in_space=REALSPACE)
1000
1001      CALL rs_pw_transfer(rs_rho, rhoc_r%pw, rs2pw)
1002      CALL rs_grid_release(rs_rho)
1003
1004      total_rho = pw_integrate_function(rhoc_r%pw, isign=-1)
1005
1006      CALL pw_transfer(rhoc_r%pw, rho_core%pw)
1007
1008      CALL pw_pool_give_back_pw(auxbas_pw_pool, rhoc_r%pw)
1009
1010      CALL timestop(handle)
1011
1012   END SUBROUTINE calculate_rho_core
1013
1014! **************************************************************************************************
1015!> \brief collocate a single Gaussian on the grid
1016!> \param rho_gb charge density generated by a single gaussian
1017!> \param qs_env qs environment
1018!> \param iatom_in atom index
1019!> \par History
1020!>        12.2011 created
1021!> \author Dorothea Golze
1022! **************************************************************************************************
1023   SUBROUTINE calculate_rho_single_gaussian(rho_gb, qs_env, iatom_in)
1024
1025      TYPE(pw_p_type), INTENT(INOUT)                     :: rho_gb
1026      TYPE(qs_environment_type), POINTER                 :: qs_env
1027      INTEGER, INTENT(IN)                                :: iatom_in
1028
1029      CHARACTER(len=*), PARAMETER :: routineN = 'calculate_rho_single_gaussian', &
1030         routineP = moduleN//':'//routineN
1031
1032      INTEGER                                            :: atom_a, handle, iatom, npme
1033      INTEGER(KIND=int_8)                                :: subpatch_pattern
1034      REAL(KIND=dp)                                      :: eps_rho_rspace
1035      REAL(KIND=dp), DIMENSION(3)                        :: ra
1036      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: pab
1037      TYPE(cell_type), POINTER                           :: cell
1038      TYPE(dft_control_type), POINTER                    :: dft_control
1039      TYPE(pw_env_type), POINTER                         :: pw_env
1040      TYPE(pw_p_type)                                    :: rhoc_r
1041      TYPE(pw_pool_type), POINTER                        :: auxbas_pw_pool
1042      TYPE(realspace_grid_type), POINTER                 :: rs_rho
1043
1044      CALL timeset(routineN, handle)
1045      NULLIFY (cell, dft_control, pab, pw_env, rs_rho, auxbas_pw_pool)
1046
1047      ALLOCATE (pab(1, 1))
1048
1049      CALL get_qs_env(qs_env=qs_env, &
1050                      cell=cell, &
1051                      dft_control=dft_control, &
1052                      pw_env=pw_env)
1053      CALL pw_env_get(pw_env, auxbas_rs_grid=rs_rho, &
1054                      auxbas_pw_pool=auxbas_pw_pool)
1055      CALL rs_grid_retain(rs_rho)
1056      CALL rs_grid_zero(rs_rho)
1057
1058      eps_rho_rspace = dft_control%qs_control%eps_rho_rspace
1059      pab(1, 1) = 1.0_dp
1060      iatom = iatom_in
1061
1062      npme = 0
1063
1064      IF (rs_rho%desc%parallel .AND. .NOT. rs_rho%desc%distributed) THEN
1065         IF (MODULO(iatom, rs_rho%desc%group_size) == rs_rho%desc%my_pos) THEN
1066            npme = npme + 1
1067         ENDIF
1068      ELSE
1069         npme = npme + 1
1070      ENDIF
1071
1072      IF (npme .GT. 0) THEN
1073         atom_a = qs_env%qmmm_env_qm%image_charge_pot%image_mm_list(iatom)
1074         ra(:) = pbc(qs_env%qmmm_env_qm%image_charge_pot%particles_all(atom_a)%r, cell)
1075         subpatch_pattern = 0
1076         !lmax == 0 so set lmax_global to 0
1077         CALL collocate_pgf_product_rspace(0, qs_env%qmmm_env_qm%image_charge_pot%eta, &
1078                                           0, 0, 0.0_dp, 0, ra, (/0.0_dp, 0.0_dp, 0.0_dp/), 0.0_dp, 1.0_dp, pab, 0, 0, rs_rho, &
1079                                           cell, pw_env%cube_info(1), eps_rho_rspace, ga_gb_function=FUNC_AB, &
1080                                           use_subpatch=.TRUE., subpatch_pattern=subpatch_pattern, &
1081                                           lmax_global=0)
1082      ENDIF
1083
1084      DEALLOCATE (pab)
1085
1086      CALL pw_pool_create_pw(auxbas_pw_pool, rhoc_r%pw, &
1087                             use_data=REALDATA3D, in_space=REALSPACE)
1088
1089      CALL rs_pw_transfer(rs_rho, rhoc_r%pw, rs2pw)
1090      CALL rs_grid_release(rs_rho)
1091
1092      CALL pw_transfer(rhoc_r%pw, rho_gb%pw)
1093
1094      CALL pw_pool_give_back_pw(auxbas_pw_pool, rhoc_r%pw)
1095
1096      CALL timestop(handle)
1097
1098   END SUBROUTINE calculate_rho_single_gaussian
1099
1100! **************************************************************************************************
1101!> \brief computes the image charge density on the grid (including coeffcients)
1102!> \param rho_metal image charge density
1103!> \param coeff expansion coefficients of the image charge density, i.e.
1104!>        rho_metal=sum_a c_a*g_a
1105!> \param total_rho_metal total induced image charge density
1106!> \param qs_env qs environment
1107!> \par History
1108!>        01.2012 created
1109!> \author Dorothea Golze
1110! **************************************************************************************************
1111   SUBROUTINE calculate_rho_metal(rho_metal, coeff, total_rho_metal, qs_env)
1112
1113      TYPE(pw_p_type), INTENT(INOUT)                     :: rho_metal
1114      REAL(KIND=dp), DIMENSION(:), POINTER               :: coeff
1115      REAL(KIND=dp), INTENT(OUT), OPTIONAL               :: total_rho_metal
1116      TYPE(qs_environment_type), POINTER                 :: qs_env
1117
1118      CHARACTER(len=*), PARAMETER :: routineN = 'calculate_rho_metal', &
1119         routineP = moduleN//':'//routineN
1120
1121      INTEGER                                            :: atom_a, handle, iatom, j, natom, npme
1122      INTEGER(KIND=int_8)                                :: subpatch_pattern
1123      INTEGER, DIMENSION(:), POINTER                     :: cores
1124      REAL(KIND=dp)                                      :: eps_rho_rspace
1125      REAL(KIND=dp), DIMENSION(3)                        :: ra
1126      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: pab
1127      TYPE(cell_type), POINTER                           :: cell
1128      TYPE(dft_control_type), POINTER                    :: dft_control
1129      TYPE(pw_env_type), POINTER                         :: pw_env
1130      TYPE(pw_p_type)                                    :: rhoc_r
1131      TYPE(pw_pool_type), POINTER                        :: auxbas_pw_pool
1132      TYPE(realspace_grid_type), POINTER                 :: rs_rho
1133
1134      CALL timeset(routineN, handle)
1135
1136      NULLIFY (cell, dft_control, pab, pw_env, rs_rho, auxbas_pw_pool, cores)
1137
1138      ALLOCATE (pab(1, 1))
1139
1140      CALL get_qs_env(qs_env=qs_env, &
1141                      cell=cell, &
1142                      dft_control=dft_control, &
1143                      pw_env=pw_env)
1144      CALL pw_env_get(pw_env, auxbas_rs_grid=rs_rho, &
1145                      auxbas_pw_pool=auxbas_pw_pool)
1146      CALL rs_grid_retain(rs_rho)
1147      CALL rs_grid_zero(rs_rho)
1148
1149      eps_rho_rspace = dft_control%qs_control%eps_rho_rspace
1150      pab(1, 1) = 1.0_dp
1151
1152      natom = SIZE(qs_env%qmmm_env_qm%image_charge_pot%image_mm_list)
1153
1154      CALL reallocate(cores, 1, natom)
1155      npme = 0
1156      cores = 0
1157
1158      DO iatom = 1, natom
1159         IF (rs_rho%desc%parallel .AND. .NOT. rs_rho%desc%distributed) THEN
1160            IF (MODULO(iatom, rs_rho%desc%group_size) == rs_rho%desc%my_pos) THEN
1161               npme = npme + 1
1162               cores(npme) = iatom
1163            ENDIF
1164         ELSE
1165            npme = npme + 1
1166            cores(npme) = iatom
1167         ENDIF
1168      ENDDO
1169
1170      IF (npme .GT. 0) THEN
1171         DO j = 1, npme
1172            iatom = cores(j)
1173            atom_a = qs_env%qmmm_env_qm%image_charge_pot%image_mm_list(iatom)
1174            ra(:) = pbc(qs_env%qmmm_env_qm%image_charge_pot%particles_all(atom_a)%r, cell)
1175            subpatch_pattern = 0
1176            !lmax == 0 so set lmax_global to 0
1177            CALL collocate_pgf_product_rspace( &
1178               0, qs_env%qmmm_env_qm%image_charge_pot%eta, &
1179               0, 0, 0.0_dp, 0, ra, (/0.0_dp, 0.0_dp, 0.0_dp/), 0.0_dp, coeff(iatom), pab, 0, 0, rs_rho, &
1180               cell, pw_env%cube_info(1), eps_rho_rspace, ga_gb_function=FUNC_AB, &
1181               use_subpatch=.TRUE., subpatch_pattern=subpatch_pattern, lmax_global=0)
1182         ENDDO
1183      ENDIF
1184
1185      DEALLOCATE (pab, cores)
1186
1187      CALL pw_pool_create_pw(auxbas_pw_pool, rhoc_r%pw, &
1188                             use_data=REALDATA3D, in_space=REALSPACE)
1189
1190      CALL rs_pw_transfer(rs_rho, rhoc_r%pw, rs2pw)
1191      CALL rs_grid_release(rs_rho)
1192
1193      IF (PRESENT(total_rho_metal)) &
1194         !minus sign: account for the fact that rho_metal has opposite sign
1195         total_rho_metal = pw_integrate_function(rhoc_r%pw, isign=-1)
1196
1197      CALL pw_transfer(rhoc_r%pw, rho_metal%pw)
1198      CALL pw_pool_give_back_pw(auxbas_pw_pool, rhoc_r%pw)
1199
1200      CALL timestop(handle)
1201
1202   END SUBROUTINE calculate_rho_metal
1203
1204! **************************************************************************************************
1205!> \brief collocate a single Gaussian on the grid for periodic RESP fitting
1206!> \param rho_gb charge density generated by a single gaussian
1207!> \param qs_env qs environment
1208!> \param eta width of single Gaussian
1209!> \param iatom_in atom index
1210!> \par History
1211!>        06.2012 created
1212!> \author Dorothea Golze
1213! **************************************************************************************************
1214   SUBROUTINE calculate_rho_resp_single(rho_gb, qs_env, eta, iatom_in)
1215
1216      TYPE(pw_p_type), INTENT(INOUT)                     :: rho_gb
1217      TYPE(qs_environment_type), POINTER                 :: qs_env
1218      REAL(KIND=dp), INTENT(IN)                          :: eta
1219      INTEGER, INTENT(IN)                                :: iatom_in
1220
1221      CHARACTER(len=*), PARAMETER :: routineN = 'calculate_rho_resp_single', &
1222         routineP = moduleN//':'//routineN
1223
1224      INTEGER                                            :: handle, iatom, npme
1225      INTEGER(KIND=int_8)                                :: subpatch_pattern
1226      REAL(KIND=dp)                                      :: eps_rho_rspace
1227      REAL(KIND=dp), DIMENSION(3)                        :: ra
1228      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: pab
1229      TYPE(cell_type), POINTER                           :: cell
1230      TYPE(dft_control_type), POINTER                    :: dft_control
1231      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
1232      TYPE(pw_env_type), POINTER                         :: pw_env
1233      TYPE(pw_p_type)                                    :: rhoc_r
1234      TYPE(pw_pool_type), POINTER                        :: auxbas_pw_pool
1235      TYPE(realspace_grid_type), POINTER                 :: rs_rho
1236
1237      CALL timeset(routineN, handle)
1238      NULLIFY (cell, dft_control, pab, pw_env, rs_rho, auxbas_pw_pool, &
1239               particle_set)
1240
1241      ALLOCATE (pab(1, 1))
1242
1243      CALL get_qs_env(qs_env=qs_env, &
1244                      cell=cell, &
1245                      dft_control=dft_control, &
1246                      particle_set=particle_set, &
1247                      pw_env=pw_env)
1248      CALL pw_env_get(pw_env, auxbas_rs_grid=rs_rho, &
1249                      auxbas_pw_pool=auxbas_pw_pool)
1250      CALL rs_grid_retain(rs_rho)
1251      CALL rs_grid_zero(rs_rho)
1252
1253      eps_rho_rspace = dft_control%qs_control%eps_rho_rspace
1254      pab(1, 1) = 1.0_dp
1255      iatom = iatom_in
1256
1257      npme = 0
1258
1259      IF (rs_rho%desc%parallel .AND. .NOT. rs_rho%desc%distributed) THEN
1260         IF (MODULO(iatom, rs_rho%desc%group_size) == rs_rho%desc%my_pos) THEN
1261            npme = npme + 1
1262         ENDIF
1263      ELSE
1264         npme = npme + 1
1265      ENDIF
1266
1267      IF (npme .GT. 0) THEN
1268         ra(:) = pbc(particle_set(iatom)%r, cell)
1269         subpatch_pattern = 0
1270         ! lmax == 0 so set lmax_global to 0
1271         CALL collocate_pgf_product_rspace(0, eta, 0, 0, 0.0_dp, 0, ra, &
1272                                           (/0.0_dp, 0.0_dp, 0.0_dp/), 0.0_dp, 1.0_dp, pab, 0, 0, rs_rho, &
1273                                           cell, pw_env%cube_info(1), eps_rho_rspace, ga_gb_function=FUNC_AB, &
1274                                           use_subpatch=.TRUE., subpatch_pattern=subpatch_pattern, &
1275                                           lmax_global=0)
1276      ENDIF
1277
1278      DEALLOCATE (pab)
1279
1280      CALL pw_pool_create_pw(auxbas_pw_pool, rhoc_r%pw, &
1281                             use_data=REALDATA3D, in_space=REALSPACE)
1282
1283      CALL rs_pw_transfer(rs_rho, rhoc_r%pw, rs2pw)
1284      CALL rs_grid_release(rs_rho)
1285
1286      CALL pw_transfer(rhoc_r%pw, rho_gb%pw)
1287
1288      CALL pw_pool_give_back_pw(auxbas_pw_pool, rhoc_r%pw)
1289
1290      CALL timestop(handle)
1291
1292   END SUBROUTINE calculate_rho_resp_single
1293
1294! **************************************************************************************************
1295!> \brief computes the RESP charge density on a grid based on the RESP charges
1296!> \param rho_resp RESP charge density
1297!> \param coeff RESP charges, take care of normalization factor
1298!>        (eta/pi)**1.5 later
1299!> \param natom number of atoms
1300!> \param eta width of single Gaussian
1301!> \param qs_env qs environment
1302!> \par History
1303!>        01.2012 created
1304!> \author Dorothea Golze
1305! **************************************************************************************************
1306   SUBROUTINE calculate_rho_resp_all(rho_resp, coeff, natom, eta, qs_env)
1307
1308      TYPE(pw_p_type), INTENT(INOUT)                     :: rho_resp
1309      REAL(KIND=dp), DIMENSION(:), POINTER               :: coeff
1310      INTEGER, INTENT(IN)                                :: natom
1311      REAL(KIND=dp), INTENT(IN)                          :: eta
1312      TYPE(qs_environment_type), POINTER                 :: qs_env
1313
1314      CHARACTER(len=*), PARAMETER :: routineN = 'calculate_rho_resp_all', &
1315         routineP = moduleN//':'//routineN
1316
1317      INTEGER                                            :: handle, iatom, j, npme
1318      INTEGER(KIND=int_8)                                :: subpatch_pattern
1319      INTEGER, DIMENSION(:), POINTER                     :: cores
1320      REAL(KIND=dp)                                      :: eps_rho_rspace
1321      REAL(KIND=dp), DIMENSION(3)                        :: ra
1322      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: pab
1323      TYPE(cell_type), POINTER                           :: cell
1324      TYPE(dft_control_type), POINTER                    :: dft_control
1325      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
1326      TYPE(pw_env_type), POINTER                         :: pw_env
1327      TYPE(pw_p_type)                                    :: rhoc_r
1328      TYPE(pw_pool_type), POINTER                        :: auxbas_pw_pool
1329      TYPE(realspace_grid_type), POINTER                 :: rs_rho
1330
1331      CALL timeset(routineN, handle)
1332
1333      NULLIFY (cell, cores, dft_control, pab, pw_env, rs_rho, auxbas_pw_pool, &
1334               particle_set)
1335
1336      ALLOCATE (pab(1, 1))
1337
1338      CALL get_qs_env(qs_env=qs_env, &
1339                      cell=cell, &
1340                      dft_control=dft_control, &
1341                      particle_set=particle_set, &
1342                      pw_env=pw_env)
1343      CALL pw_env_get(pw_env, auxbas_rs_grid=rs_rho, &
1344                      auxbas_pw_pool=auxbas_pw_pool)
1345      CALL rs_grid_retain(rs_rho)
1346      CALL rs_grid_zero(rs_rho)
1347
1348      eps_rho_rspace = dft_control%qs_control%eps_rho_rspace
1349      pab(1, 1) = 1.0_dp
1350
1351      CALL reallocate(cores, 1, natom)
1352      npme = 0
1353      cores = 0
1354
1355      DO iatom = 1, natom
1356         IF (rs_rho%desc%parallel .AND. .NOT. rs_rho%desc%distributed) THEN
1357            IF (MODULO(iatom, rs_rho%desc%group_size) == rs_rho%desc%my_pos) THEN
1358               npme = npme + 1
1359               cores(npme) = iatom
1360            ENDIF
1361         ELSE
1362            npme = npme + 1
1363            cores(npme) = iatom
1364         ENDIF
1365      ENDDO
1366
1367      IF (npme .GT. 0) THEN
1368         DO j = 1, npme
1369            iatom = cores(j)
1370            ra(:) = pbc(particle_set(iatom)%r, cell)
1371            subpatch_pattern = 0
1372            ! la_max==0 so set lmax_global to 0
1373            CALL collocate_pgf_product_rspace( &
1374               0, eta, &
1375               0, 0, 0.0_dp, 0, ra, (/0.0_dp, 0.0_dp, 0.0_dp/), 0.0_dp, coeff(iatom), pab, 0, 0, rs_rho, &
1376               cell, pw_env%cube_info(1), eps_rho_rspace, ga_gb_function=FUNC_AB, &
1377               use_subpatch=.TRUE., subpatch_pattern=subpatch_pattern, lmax_global=0)
1378         ENDDO
1379      ENDIF
1380
1381      DEALLOCATE (pab, cores)
1382
1383      CALL pw_pool_create_pw(auxbas_pw_pool, rhoc_r%pw, &
1384                             use_data=REALDATA3D, in_space=REALSPACE)
1385
1386      CALL rs_pw_transfer(rs_rho, rhoc_r%pw, rs2pw)
1387      CALL rs_grid_release(rs_rho)
1388
1389      CALL pw_transfer(rhoc_r%pw, rho_resp%pw)
1390      CALL pw_pool_give_back_pw(auxbas_pw_pool, rhoc_r%pw)
1391
1392      CALL timestop(handle)
1393
1394   END SUBROUTINE calculate_rho_resp_all
1395
1396! **************************************************************************************************
1397!> \brief computes the density corresponding to a given density matrix on the grid
1398!> \param matrix_p ...
1399!> \param matrix_p_kp ...
1400!> \param rho ...
1401!> \param rho_gspace ...
1402!> \param total_rho ...
1403!> \param ks_env ...
1404!> \param soft_valid ...
1405!> \param compute_tau ...
1406!> \param compute_grad ...
1407!> \param basis_type ...
1408!> \param der_type ...
1409!> \param idir ...
1410!> \param task_list_external ...
1411!> \param pw_env_external ...
1412!> \par History
1413!>      IAB (15-Feb-2010): Added OpenMP parallelisation to task loop
1414!>                         (c) The Numerical Algorithms Group (NAG) Ltd, 2010 on behalf of the HECToR project
1415!> \note
1416!>      both rho and rho_gspace contain the new rho
1417!>      (in real and g-space respectively)
1418! **************************************************************************************************
1419   SUBROUTINE calculate_rho_elec(matrix_p, matrix_p_kp, rho, rho_gspace, total_rho, &
1420                                 ks_env, soft_valid, compute_tau, compute_grad, &
1421                                 basis_type, der_type, idir, task_list_external, pw_env_external)
1422
1423      TYPE(dbcsr_type), OPTIONAL, POINTER                :: matrix_p
1424      TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL, &
1425         POINTER                                         :: matrix_p_kp
1426      TYPE(pw_p_type), INTENT(INOUT)                     :: rho, rho_gspace
1427      REAL(KIND=dp), INTENT(OUT)                         :: total_rho
1428      TYPE(qs_ks_env_type), POINTER                      :: ks_env
1429      LOGICAL, INTENT(IN), OPTIONAL                      :: soft_valid, compute_tau, compute_grad
1430      CHARACTER(LEN=*), INTENT(IN), OPTIONAL             :: basis_type
1431      INTEGER, INTENT(IN), OPTIONAL                      :: der_type, idir
1432      TYPE(task_list_type), OPTIONAL, POINTER            :: task_list_external
1433      TYPE(pw_env_type), OPTIONAL, POINTER               :: pw_env_external
1434
1435      CHARACTER(len=*), PARAMETER :: routineN = 'calculate_rho_elec', &
1436         routineP = moduleN//':'//routineN
1437
1438      CHARACTER(LEN=default_string_length)               :: my_basis_type
1439      INTEGER :: bcol, brow, ga_gb_function, handle, iatom, iatom_old, igrid_level, &
1440         igrid_level_dummy, ikind, ikind_old, img, img_old, ipair, ipgf, iset, iset_old, itask, &
1441         ithread, jatom, jatom_old, jkind, jkind_old, jpgf, jset, jset_old, lb, lbr, lbw, &
1442         lmax_global, maxco, maxpgf, maxset, maxsgf, maxsgf_set, my_der_type, my_idir, n, na1, &
1443         na2, natoms, nb1, nb2, nblock, ncoa, ncob, nimages, nr, nrlevel, nseta, nsetb, ntasks, &
1444         nthread, nw, nxy, nz, nzsize, sgfa, sgfb, ub
1445      INTEGER(kind=int_8), DIMENSION(:), POINTER         :: atom_pair_recv, atom_pair_send
1446      INTEGER(kind=int_8), DIMENSION(:, :), POINTER      :: tasks
1447      INTEGER, DIMENSION(:), POINTER                     :: la_max, la_min, lb_max, lb_min, mylmax, &
1448                                                            npgfa, npgfb, nsgfa, nsgfb
1449      INTEGER, DIMENSION(:, :), POINTER                  :: first_sgfa, first_sgfb
1450      LOGICAL :: atom_pair_changed, distributed_rs_grids, do_kp, found, map_consistent, &
1451         my_compute_grad, my_compute_tau, my_soft, use_subpatch
1452      REAL(KIND=dp)                                      :: eps_rho_rspace, rab2, scale, zetp
1453      REAL(KIND=dp), DIMENSION(3)                        :: ra, rab, rab_inv, rb
1454      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: dist_ab, p_block, pab, sphi_a, sphi_b, &
1455                                                            work, zeta, zetb
1456      REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: pabt, workt
1457      TYPE(cell_type), POINTER                           :: cell
1458      TYPE(cube_info_type), DIMENSION(:), POINTER        :: cube_info
1459      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: deltap
1460      TYPE(dft_control_type), POINTER                    :: dft_control
1461      TYPE(gridlevel_info_type), POINTER                 :: gridlevel_info
1462      TYPE(gto_basis_set_type), POINTER                  :: orb_basis_set
1463      TYPE(lgrid_type), POINTER                          :: lgrid
1464      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
1465      TYPE(pw_env_type), POINTER                         :: pw_env
1466      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
1467      TYPE(qs_kind_type), POINTER                        :: qs_kind
1468      TYPE(realspace_grid_desc_p_type), DIMENSION(:), &
1469         POINTER                                         :: rs_descs
1470      TYPE(realspace_grid_p_type), DIMENSION(:), POINTER :: rs_rho
1471      TYPE(task_list_type), POINTER                      :: task_list, task_list_soft
1472
1473      CALL timeset(routineN, handle)
1474
1475      CPASSERT(PRESENT(matrix_p) .OR. PRESENT(matrix_p_kp))
1476      do_kp = PRESENT(matrix_p_kp)
1477
1478      NULLIFY (qs_kind, cell, dft_control, orb_basis_set, deltap, &
1479               qs_kind_set, particle_set, rs_rho, pw_env, rs_descs, &
1480               dist_ab, la_max, la_min, &
1481               lb_max, lb_min, npgfa, npgfb, nsgfa, nsgfb, p_block, &
1482               sphi_a, sphi_b, zeta, zetb, first_sgfa, first_sgfb, tasks, pabt, &
1483               workt, lgrid, mylmax)
1484
1485      debug_count = debug_count + 1
1486
1487      ! by default, the full density is calculated
1488      my_soft = .FALSE.
1489      IF (PRESENT(soft_valid)) my_soft = soft_valid
1490
1491      ! by default, do not compute the kinetic energy density (tau)
1492      ! if compute_tau, all grids referencing to rho are actually tau
1493      IF (PRESENT(compute_tau)) THEN
1494         my_compute_tau = compute_tau
1495      ELSE
1496         my_compute_tau = .FALSE.
1497      ENDIF
1498
1499      IF (PRESENT(compute_grad)) THEN
1500         my_compute_grad = compute_grad
1501      ELSE
1502         my_compute_grad = .FALSE.
1503      ENDIF
1504
1505      my_der_type = 0
1506      my_idir = 0
1507      IF (PRESENT(der_type)) THEN
1508         my_der_type = der_type
1509         ga_gb_function = FUNC_DER(my_der_type)
1510      ELSE IF (my_compute_tau) THEN
1511         ga_gb_function = FUNC_DADB
1512      ELSE IF (my_compute_grad) THEN
1513         ga_gb_function = FUNC_DABpADB
1514      ELSE
1515         ga_gb_function = FUNC_AB
1516      ENDIF
1517      IF (PRESENT(idir)) my_idir = idir
1518
1519      IF (PRESENT(basis_type)) THEN
1520         my_basis_type = basis_type
1521      ELSE
1522         my_basis_type = "ORB"
1523      END IF
1524
1525      CALL get_ks_env(ks_env, &
1526                      qs_kind_set=qs_kind_set, &
1527                      cell=cell, &
1528                      dft_control=dft_control, &
1529                      particle_set=particle_set, &
1530                      pw_env=pw_env)
1531
1532      SELECT CASE (my_basis_type)
1533      CASE ("ORB")
1534         CALL get_ks_env(ks_env, &
1535                         task_list=task_list, &
1536                         task_list_soft=task_list_soft)
1537      CASE ("AUX_FIT")
1538         CALL get_ks_env(ks_env, &
1539                         task_list_aux_fit=task_list, &
1540                         task_list_soft=task_list_soft)
1541      END SELECT
1542
1543      nimages = dft_control%nimages
1544      CPASSERT(nimages == 1 .OR. do_kp)
1545
1546      IF (PRESENT(pw_env_external)) pw_env => pw_env_external
1547      IF (PRESENT(task_list_external)) task_list => task_list_external
1548
1549      ! *** assign from pw_env
1550      gridlevel_info => pw_env%gridlevel_info
1551      cube_info => pw_env%cube_info
1552
1553      !   *** Allocate work storage ***
1554      nthread = 1
1555!$    nthread = omp_get_max_threads()
1556      CALL get_qs_kind_set(qs_kind_set=qs_kind_set, &
1557                           maxco=maxco, &
1558                           maxsgf=maxsgf, &
1559                           maxsgf_set=maxsgf_set, &
1560                           basis_type=my_basis_type)
1561      CALL reallocate(pabt, 1, maxco, 1, maxco, 0, nthread - 1)
1562      CALL reallocate(workt, 1, maxco, 1, maxsgf_set, 0, nthread - 1)
1563
1564      ! find maximum numbers
1565      natoms = SIZE(particle_set)
1566      maxset = 0
1567      maxpgf = 0
1568      lmax_global = 0
1569
1570      DO ikind = 1, SIZE(qs_kind_set)
1571         qs_kind => qs_kind_set(ikind)
1572         CALL get_qs_kind(qs_kind=qs_kind, &
1573                          softb=my_soft, &
1574                          basis_set=orb_basis_set, basis_type=my_basis_type)
1575
1576         IF (.NOT. ASSOCIATED(orb_basis_set)) CYCLE
1577
1578         CALL get_gto_basis_set(gto_basis_set=orb_basis_set, &
1579                                npgf=npgfa, nset=nseta, lmax=mylmax)
1580
1581         maxset = MAX(nseta, maxset)
1582         maxpgf = MAX(MAXVAL(npgfa), maxpgf)
1583         lmax_global = MAX(MAXVAL(mylmax), lmax_global)
1584      END DO
1585
1586      ! Optionally increment lmax_global depending on which ga_gb_function
1587      ! will be used for collocation
1588      SELECT CASE (ga_gb_function)
1589      CASE (FUNC_DADB)
1590         lmax_global = lmax_global + 1
1591      CASE (FUNC_ADBmDAB)
1592         lmax_global = lmax_global + 1
1593      CASE (FUNC_DABpADB)
1594         lmax_global = lmax_global + 1
1595      CASE (FUNC_DX, FUNC_DY, FUNC_DZ)
1596         lmax_global = lmax_global + 1
1597      CASE (FUNC_DXDY, FUNC_DYDZ, FUNC_DZDX)
1598         lmax_global = lmax_global + 2
1599      CASE (FUNC_DXDX, FUNC_DYDY, FUNC_DZDZ)
1600         lmax_global = lmax_global + 2
1601      CASE (FUNC_ARDBmDARB)
1602         lmax_global = lmax_global + 2
1603      CASE (FUNC_ARB)
1604         lmax_global = lmax_global + 1
1605      CASE (FUNC_AB)
1606         lmax_global = lmax_global
1607      CASE DEFAULT
1608         CPABORT("")
1609      END SELECT
1610
1611      ! get the task lists
1612      IF (my_soft) task_list => task_list_soft
1613      CPASSERT(ASSOCIATED(task_list))
1614      tasks => task_list%tasks
1615      dist_ab => task_list%dist_ab
1616      atom_pair_send => task_list%atom_pair_send
1617      atom_pair_recv => task_list%atom_pair_recv
1618      ntasks = task_list%ntasks
1619
1620      ! *** set up the pw multi-grids
1621      CPASSERT(ASSOCIATED(pw_env))
1622      CALL pw_env_get(pw_env, rs_descs=rs_descs, rs_grids=rs_rho, lgrid=lgrid)
1623      ! *** set up the rs multi-grids
1624      CPASSERT(ASSOCIATED(rs_rho))
1625      CPASSERT(ASSOCIATED(rs_descs))
1626      CPASSERT(ASSOCIATED(lgrid))
1627      distributed_rs_grids = .FALSE.
1628
1629      DO igrid_level = 1, gridlevel_info%ngrid_levels
1630         CALL rs_grid_retain(rs_rho(igrid_level)%rs_grid)
1631         CALL rs_grid_zero(rs_rho(igrid_level)%rs_grid)
1632         IF (rs_rho(igrid_level)%rs_grid%desc%distributed) THEN
1633            distributed_rs_grids = .TRUE.
1634         ENDIF
1635      END DO
1636
1637      IF (nthread > 1) THEN
1638         IF (.NOT. ASSOCIATED(lgrid%r)) THEN
1639            CALL lgrid_allocate_grid(lgrid, nthread)
1640         ENDIF
1641      END IF
1642
1643      eps_rho_rspace = dft_control%qs_control%eps_rho_rspace
1644      map_consistent = dft_control%qs_control%map_consistent
1645
1646      !   *** Initialize working density matrix ***
1647      ! distributed rs grids require a matrix that will be changed
1648      ! whereas this is not the case for replicated grids
1649      NULLIFY (deltap)
1650      CALL dbcsr_allocate_matrix_set(deltap, nimages)
1651      IF (distributed_rs_grids) THEN
1652         DO img = 1, nimages
1653            ALLOCATE (deltap(img)%matrix)
1654         END DO
1655         ! this matrix has no strict sparsity pattern in parallel
1656         ! deltap%sparsity_id=-1
1657         IF (do_kp) THEN
1658            DO img = 1, nimages
1659               CALL dbcsr_copy(deltap(img)%matrix, matrix_p_kp(img)%matrix, &
1660                               name="DeltaP")
1661            END DO
1662         ELSE
1663            CALL dbcsr_copy(deltap(1)%matrix, matrix_p, name="DeltaP")
1664         END IF
1665      ELSE
1666         IF (do_kp) THEN
1667            DO img = 1, nimages
1668               deltap(img)%matrix => matrix_p_kp(img)%matrix
1669            END DO
1670         ELSE
1671            deltap(1)%matrix => matrix_p
1672         END IF
1673      ENDIF
1674
1675      ! distribute the matrix
1676      IF (distributed_rs_grids) THEN
1677         CALL rs_distribute_matrix(rs_descs, deltap, atom_pair_send, atom_pair_recv, &
1678                                   natoms, nimages, scatter=.TRUE.)
1679      ENDIF
1680
1681      ! map all tasks on the grids
1682
1683!$OMP PARALLEL DEFAULT(NONE), &
1684!$OMP          SHARED(ntasks,tasks,nimages,natoms,maxset,maxpgf,particle_set,pabt,workt), &
1685!$OMP          SHARED(my_basis_type,my_soft,deltap,maxco,dist_ab,ncoset,nthread), &
1686!$OMP          SHARED(cell,cube_info,eps_rho_rspace,ga_gb_function, my_idir,map_consistent), &
1687!$OMP          SHARED(rs_rho,lgrid,gridlevel_info,task_list,qs_kind_set,lmax_global), &
1688!$OMP          PRIVATE(igrid_level,iatom,jatom,iset,jset,ipgf,jpgf,ikind,jkind,pab,work), &
1689!$OMP          PRIVATE(img,img_old,iatom_old,jatom_old,iset_old,jset_old,ikind_old,jkind_old), &
1690!$OMP          PRIVATE(brow,bcol,qs_kind,orb_basis_set,first_sgfa,la_max,la_min), &
1691!$OMP          PRIVATE(npgfa,nseta,nsgfa,sphi_a,zeta,first_sgfb,lb_max,lb_min,npgfb), &
1692!$OMP          PRIVATE(nsetb,nsgfb,sphi_b,zetb,p_block,found), &
1693!$OMP          PRIVATE(atom_pair_changed,ncoa,sgfa,ncob,sgfb,rab,rab2,ra,rb,zetp), &
1694!$OMP          PRIVATE(na1,na2,nb1,nb2,scale,use_subpatch,rab_inv,ithread,lb,ub,n,nw), &
1695!$OMP          PRIVATE(itask,nz,nxy,nzsize,nrlevel,nblock,lbw,lbr,nr,igrid_level_dummy)
1696
1697      ithread = 0
1698!$    ithread = omp_get_thread_num()
1699      pab => pabt(:, :, ithread)
1700      work => workt(:, :, ithread)
1701
1702      iatom_old = -1; jatom_old = -1; iset_old = -1; jset_old = -1
1703      ikind_old = -1; jkind_old = -1; img_old = -1
1704
1705      ! Loop over each gridlevel first, then loop and load balance over atom pairs
1706      ! We only need a single lgrid, which we sum back onto the rsgrid after each
1707      ! grid level is completed
1708      loop_gridlevels: DO igrid_level = 1, gridlevel_info%ngrid_levels
1709
1710         ! Only zero the region of the lgrid required for this grid level
1711         IF (nthread > 1) THEN
1712            lgrid%r(1:rs_rho(igrid_level)%rs_grid%ngpts_local, ithread) = 0._dp
1713         END IF
1714!$OMP DO schedule(dynamic, MAX(1,task_list%npairs(igrid_level)/(nthread*50)))
1715         loop_pairs: DO ipair = 1, task_list%npairs(igrid_level)
1716         loop_tasks: DO itask = task_list%taskstart(ipair, igrid_level), task_list%taskstop(ipair, igrid_level)
1717            !decode the atom pair and basis info (igrid_level_dummy equals do loop variable by construction).
1718            CALL int2pair(tasks(3, itask), igrid_level_dummy, img, iatom, jatom, iset, jset, ipgf, jpgf, &
1719                          nimages, natoms, maxset, maxpgf)
1720            ikind = particle_set(iatom)%atomic_kind%kind_number
1721            jkind = particle_set(jatom)%atomic_kind%kind_number
1722
1723            IF (iatom .NE. iatom_old .OR. jatom .NE. jatom_old .OR. img .NE. img_old) THEN
1724
1725               IF (iatom .NE. iatom_old) ra(:) = pbc(particle_set(iatom)%r, cell)
1726
1727               IF (iatom <= jatom) THEN
1728                  brow = iatom
1729                  bcol = jatom
1730               ELSE
1731                  brow = jatom
1732                  bcol = iatom
1733               END IF
1734
1735               IF (ikind .NE. ikind_old) THEN
1736                  CALL get_qs_kind(qs_kind_set(ikind), softb=my_soft, &
1737                                   basis_set=orb_basis_set, basis_type=my_basis_type)
1738                  CALL get_gto_basis_set(gto_basis_set=orb_basis_set, &
1739                                         first_sgf=first_sgfa, &
1740                                         lmax=la_max, &
1741                                         lmin=la_min, &
1742                                         npgf=npgfa, &
1743                                         nset=nseta, &
1744                                         nsgf_set=nsgfa, &
1745                                         sphi=sphi_a, &
1746                                         zet=zeta)
1747               ENDIF
1748
1749               IF (jkind .NE. jkind_old) THEN
1750                  CALL get_qs_kind(qs_kind_set(jkind), softb=my_soft, &
1751                                   basis_set=orb_basis_set, basis_type=my_basis_type)
1752                  CALL get_gto_basis_set(gto_basis_set=orb_basis_set, &
1753                                         first_sgf=first_sgfb, &
1754                                         lmax=lb_max, &
1755                                         lmin=lb_min, &
1756                                         npgf=npgfb, &
1757                                         nset=nsetb, &
1758                                         nsgf_set=nsgfb, &
1759                                         sphi=sphi_b, &
1760                                         zet=zetb)
1761               ENDIF
1762
1763               CALL dbcsr_get_block_p(matrix=deltap(img)%matrix, &
1764                                      row=brow, col=bcol, BLOCK=p_block, found=found)
1765               CPASSERT(found)
1766
1767               iatom_old = iatom
1768               jatom_old = jatom
1769               ikind_old = ikind
1770               jkind_old = jkind
1771               img_old = img
1772               atom_pair_changed = .TRUE.
1773
1774            ELSE
1775
1776               atom_pair_changed = .FALSE.
1777
1778            ENDIF
1779
1780            IF (atom_pair_changed .OR. iset_old .NE. iset .OR. jset_old .NE. jset) THEN
1781               ncoa = npgfa(iset)*ncoset(la_max(iset))
1782               sgfa = first_sgfa(1, iset)
1783               ncob = npgfb(jset)*ncoset(lb_max(jset))
1784               sgfb = first_sgfb(1, jset)
1785
1786               IF (iatom <= jatom) THEN
1787                  CALL dgemm("N", "N", ncoa, nsgfb(jset), nsgfa(iset), &
1788                             1.0_dp, sphi_a(1, sgfa), SIZE(sphi_a, 1), &
1789                             p_block(sgfa, sgfb), SIZE(p_block, 1), &
1790                             0.0_dp, work(1, 1), maxco)
1791                  CALL dgemm("N", "T", ncoa, ncob, nsgfb(jset), &
1792                             1.0_dp, work(1, 1), maxco, &
1793                             sphi_b(1, sgfb), SIZE(sphi_b, 1), &
1794                             0.0_dp, pab(1, 1), maxco)
1795               ELSE
1796                  CALL dgemm("N", "N", ncob, nsgfa(iset), nsgfb(jset), &
1797                             1.0_dp, sphi_b(1, sgfb), SIZE(sphi_b, 1), &
1798                             p_block(sgfb, sgfa), SIZE(p_block, 1), &
1799                             0.0_dp, work(1, 1), maxco)
1800                  CALL dgemm("N", "T", ncob, ncoa, nsgfa(iset), &
1801                             1.0_dp, work(1, 1), maxco, &
1802                             sphi_a(1, sgfa), SIZE(sphi_a, 1), &
1803                             0.0_dp, pab(1, 1), maxco)
1804               END IF
1805
1806               iset_old = iset
1807               jset_old = jset
1808            ENDIF
1809
1810            rab(:) = dist_ab(:, itask)
1811            rab2 = rab(1)*rab(1) + rab(2)*rab(2) + rab(3)*rab(3)
1812            rb(:) = ra(:) + rab(:)
1813            zetp = zeta(ipgf, iset) + zetb(jpgf, jset)
1814
1815            na1 = (ipgf - 1)*ncoset(la_max(iset)) + 1
1816            na2 = ipgf*ncoset(la_max(iset))
1817            nb1 = (jpgf - 1)*ncoset(lb_max(jset)) + 1
1818            nb2 = jpgf*ncoset(lb_max(jset))
1819
1820            ! takes the density matrix symmetry in account, i.e. off-diagonal blocks need to be mapped 'twice'
1821            IF (iatom == jatom) THEN
1822               scale = 1.0_dp
1823            ELSE
1824               scale = 2.0_dp
1825            END IF
1826
1827            ! check whether we need to use fawzi's generalised collocation scheme
1828            IF (rs_rho(igrid_level)%rs_grid%desc%distributed) THEN
1829               !tasks(4,:) is 0 for replicated, 1 for distributed 2 for exceptional distributed tasks
1830               IF (tasks(4, itask) .EQ. 2) THEN
1831                  use_subpatch = .TRUE.
1832               ELSE
1833                  use_subpatch = .FALSE.
1834               ENDIF
1835            ELSE
1836               use_subpatch = .FALSE.
1837            ENDIF
1838
1839            IF (nthread > 1) THEN
1840               IF (iatom <= jatom) THEN
1841                  CALL collocate_pgf_product_rspace( &
1842                     la_max(iset), zeta(ipgf, iset), la_min(iset), &
1843                     lb_max(jset), zetb(jpgf, jset), lb_min(jset), &
1844                     ra, rab, rab2, scale, pab, na1 - 1, nb1 - 1, &
1845                     rs_rho(igrid_level)%rs_grid, cell, cube_info(igrid_level), &
1846                     eps_rho_rspace, &
1847                     ga_gb_function=ga_gb_function, &
1848                     idir=my_idir, &
1849                     lgrid=lgrid, ithread=ithread, &
1850                     map_consistent=map_consistent, use_subpatch=use_subpatch, &
1851                     subpatch_pattern=tasks(6, itask), lmax_global=lmax_global)
1852               ELSE
1853                  rab_inv = -rab
1854                  CALL collocate_pgf_product_rspace( &
1855                     lb_max(jset), zetb(jpgf, jset), lb_min(jset), &
1856                     la_max(iset), zeta(ipgf, iset), la_min(iset), &
1857                     rb, rab_inv, rab2, scale, pab, nb1 - 1, na1 - 1, &
1858                     rs_rho(igrid_level)%rs_grid, cell, cube_info(igrid_level), &
1859                     eps_rho_rspace, &
1860                     ga_gb_function=ga_gb_function, &
1861                     idir=my_idir, &
1862                     lgrid=lgrid, ithread=ithread, &
1863                     map_consistent=map_consistent, use_subpatch=use_subpatch, &
1864                     subpatch_pattern=tasks(6, itask), lmax_global=lmax_global)
1865               END IF
1866            ELSE
1867               IF (iatom <= jatom) THEN
1868                  CALL collocate_pgf_product_rspace( &
1869                     la_max(iset), zeta(ipgf, iset), la_min(iset), &
1870                     lb_max(jset), zetb(jpgf, jset), lb_min(jset), &
1871                     ra, rab, rab2, scale, pab, na1 - 1, nb1 - 1, &
1872                     rs_rho(igrid_level)%rs_grid, cell, cube_info(igrid_level), &
1873                     eps_rho_rspace, &
1874                     ga_gb_function=ga_gb_function, &
1875                     idir=my_idir, &
1876                     map_consistent=map_consistent, use_subpatch=use_subpatch, &
1877                     subpatch_pattern=tasks(6, itask), lmax_global=lmax_global)
1878               ELSE
1879                  rab_inv = -rab
1880                  CALL collocate_pgf_product_rspace( &
1881                     lb_max(jset), zetb(jpgf, jset), lb_min(jset), &
1882                     la_max(iset), zeta(ipgf, iset), la_min(iset), &
1883                     rb, rab_inv, rab2, scale, pab, nb1 - 1, na1 - 1, &
1884                     rs_rho(igrid_level)%rs_grid, cell, cube_info(igrid_level), &
1885                     eps_rho_rspace, &
1886                     ga_gb_function=ga_gb_function, &
1887                     idir=my_idir, &
1888                     map_consistent=map_consistent, use_subpatch=use_subpatch, &
1889                     subpatch_pattern=tasks(6, itask), lmax_global=lmax_global)
1890               END IF
1891            END IF
1892         END DO loop_tasks
1893         END DO loop_pairs
1894!$OMP END DO
1895
1896         ! Now sum the thread-local grids back into the rs_grid
1897         ! (in parallel, each thread writes to a section of the rs_grid at a time)
1898         IF (nthread > 1) THEN
1899            nz = (1 + rs_rho(igrid_level)%rs_grid%ub_local(3) &
1900                  - rs_rho(igrid_level)%rs_grid%lb_local(3))
1901            nxy = (1 + rs_rho(igrid_level)%rs_grid%ub_local(1) &
1902                   - rs_rho(igrid_level)%rs_grid%lb_local(1)) &
1903                  *(1 + rs_rho(igrid_level)%rs_grid%ub_local(2) &
1904                    - rs_rho(igrid_level)%rs_grid%lb_local(2))
1905
1906            ! Work out the number of tree levels, and start the reduction
1907
1908            nrlevel = CEILING(LOG10(1.0_dp*nthread)/LOG10(2.0_dp))
1909
1910            DO nr = 1, nrlevel
1911               nblock = 2**nr
1912               nzsize = MIN(nblock, nthread - (ithread/nblock)*nblock)
1913               itask = MODULO(ithread, nblock)
1914
1915               lb = (nz*itask)/nzsize
1916               ub = (nz*(itask + 1))/nzsize - 1
1917               nw = (ithread/nblock)*nblock
1918               lbw = 1 + nxy*lb
1919
1920               n = nw + nblock/2
1921               IF (n < nthread) THEN
1922                  lbr = 1 + nxy*lb
1923
1924                  CALL daxpy(nxy*(1 + ub - lb), 1.0_dp, lgrid%r(lbr, n), 1, lgrid%r(lbw, nw), 1)
1925               END IF
1926!$OMP BARRIER
1927            END DO
1928
1929            ! Copy final result from first local grid to rs_grid
1930            lb = (nz*ithread)/nthread
1931            ub = (nz*(ithread + 1))/nthread - 1
1932            nzsize = 1 + (ub - lb)
1933
1934            lb = lb + rs_rho(igrid_level)%rs_grid%lb_local(3)
1935            ub = ub + rs_rho(igrid_level)%rs_grid%lb_local(3)
1936            lbr = 1 + nxy*(nz*ithread/nthread)
1937
1938            CALL daxpy(nxy*nzsize, 1.0_dp, lgrid%r(lbr, 0), 1, &
1939                       rs_rho(igrid_level)%rs_grid%r(:, :, lb:ub), 1)
1940!$OMP BARRIER
1941         END IF
1942      END DO loop_gridlevels
1943!$OMP END PARALLEL
1944
1945      !   *** Release work storage ***
1946
1947      IF (distributed_rs_grids) THEN
1948         CALL dbcsr_deallocate_matrix_set(deltap)
1949      ELSE
1950         DO img = 1, nimages
1951            NULLIFY (deltap(img)%matrix)
1952         END DO
1953         DEALLOCATE (deltap)
1954      ENDIF
1955
1956      DEALLOCATE (pabt, workt)
1957
1958      CALL density_rs2pw(pw_env, rs_rho, rho, rho_gspace)
1959
1960      total_rho = pw_integrate_function(rho%pw, isign=-1)
1961
1962      CALL timestop(handle)
1963
1964   END SUBROUTINE calculate_rho_elec
1965
1966! **************************************************************************************************
1967!> \brief computes the gradient of the density corresponding to a given
1968!>        density matrix on the grid
1969!> \param matrix_p ...
1970!> \param matrix_p_kp ...
1971!> \param drho ...
1972!> \param drho_gspace ...
1973!> \param qs_env ...
1974!> \param soft_valid ...
1975!> \param basis_type ...
1976!> \note  this is an alternative to calculate the gradient through FFTs
1977! **************************************************************************************************
1978   SUBROUTINE calculate_drho_elec(matrix_p, matrix_p_kp, drho, drho_gspace, qs_env, &
1979                                  soft_valid, basis_type)
1980
1981      TYPE(dbcsr_type), OPTIONAL, POINTER                :: matrix_p
1982      TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL, &
1983         POINTER                                         :: matrix_p_kp
1984      TYPE(pw_p_type), DIMENSION(:), INTENT(INOUT)       :: drho, drho_gspace
1985      TYPE(qs_environment_type), POINTER                 :: qs_env
1986      LOGICAL, INTENT(IN), OPTIONAL                      :: soft_valid
1987      CHARACTER(LEN=*), INTENT(IN), OPTIONAL             :: basis_type
1988
1989      CHARACTER(len=*), PARAMETER :: routineN = 'calculate_drho_elec', &
1990         routineP = moduleN//':'//routineN
1991
1992      CHARACTER(LEN=default_string_length)               :: my_basis_type
1993      INTEGER :: bcol, brow, handle, i, iatom, iatom_old, idir, igrid_level, ikind, ikind_old, &
1994         img, img_old, ipgf, iset, iset_old, itask, ithread, jatom, jatom_old, jkind, jkind_old, &
1995         jpgf, jset, jset_old, lmax_global, maxco, maxpgf, maxset, maxsgf, maxsgf_set, na1, na2, &
1996         natoms, nb1, nb2, ncoa, ncob, nimages, nseta, nsetb, ntasks, nthread, sgfa, sgfb
1997      INTEGER(kind=int_8), DIMENSION(:), POINTER         :: atom_pair_recv, atom_pair_send
1998      INTEGER(kind=int_8), DIMENSION(:, :), POINTER      :: tasks
1999      INTEGER, DIMENSION(:), POINTER                     :: la_max, la_min, lb_max, lb_min, mylmax, &
2000                                                            npgfa, npgfb, nsgfa, nsgfb
2001      INTEGER, DIMENSION(:, :), POINTER                  :: first_sgfa, first_sgfb
2002      LOGICAL                                            :: atom_pair_changed, distributed_rs_grids, &
2003                                                            do_kp, found, map_consistent, my_soft, &
2004                                                            use_subpatch
2005      REAL(KIND=dp)                                      :: eps_rho_rspace, rab2, scale, zetp
2006      REAL(KIND=dp), DIMENSION(3)                        :: ra, rab, rab_inv, rb
2007      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: dist_ab, p_block, pab, sphi_a, sphi_b, &
2008                                                            work, zeta, zetb
2009      REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: pabt, workt
2010      TYPE(cell_type), POINTER                           :: cell
2011      TYPE(cube_info_type), DIMENSION(:), POINTER        :: cube_info
2012      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: deltap
2013      TYPE(dft_control_type), POINTER                    :: dft_control
2014      TYPE(gridlevel_info_type), POINTER                 :: gridlevel_info
2015      TYPE(gto_basis_set_type), POINTER                  :: orb_basis_set
2016      TYPE(neighbor_list_set_p_type), DIMENSION(:), &
2017         POINTER                                         :: sab_orb
2018      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
2019      TYPE(pw_env_type), POINTER                         :: pw_env
2020      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
2021      TYPE(qs_kind_type), POINTER                        :: qs_kind
2022      TYPE(realspace_grid_desc_p_type), DIMENSION(:), &
2023         POINTER                                         :: rs_descs
2024      TYPE(realspace_grid_p_type), DIMENSION(:), POINTER :: rs_rho
2025      TYPE(task_list_type), POINTER                      :: task_list, task_list_soft
2026
2027      CALL timeset(routineN, handle)
2028
2029      CPASSERT(PRESENT(matrix_p) .OR. PRESENT(matrix_p_kp))
2030      do_kp = PRESENT(matrix_p_kp)
2031
2032      NULLIFY (qs_kind, cell, dft_control, orb_basis_set, deltap, &
2033               qs_kind_set, sab_orb, particle_set, rs_rho, pw_env, rs_descs, &
2034               dist_ab, la_max, la_min, &
2035               lb_max, lb_min, npgfa, npgfb, nsgfa, nsgfb, p_block, &
2036               sphi_a, sphi_b, zeta, zetb, first_sgfa, first_sgfb, tasks, pabt, &
2037               workt, mylmax)
2038
2039      debug_count = debug_count + 1
2040
2041      ! by default, the full density is calculated
2042      my_soft = .FALSE.
2043      IF (PRESENT(soft_valid)) my_soft = soft_valid
2044
2045      IF (PRESENT(basis_type)) THEN
2046         my_basis_type = basis_type
2047      ELSE
2048         my_basis_type = "ORB"
2049      END IF
2050
2051      CALL get_qs_env(qs_env=qs_env, &
2052                      qs_kind_set=qs_kind_set, &
2053                      cell=cell, &
2054                      dft_control=dft_control, &
2055                      particle_set=particle_set, &
2056                      sab_orb=sab_orb, &
2057                      pw_env=pw_env)
2058
2059      SELECT CASE (my_basis_type)
2060      CASE ("ORB")
2061         CALL get_qs_env(qs_env=qs_env, &
2062                         task_list=task_list, &
2063                         task_list_soft=task_list_soft)
2064      CASE ("AUX_FIT")
2065         CALL get_qs_env(qs_env=qs_env, &
2066                         task_list_aux_fit=task_list, &
2067                         task_list_soft=task_list_soft)
2068      END SELECT
2069
2070      ! *** assign from pw_env
2071      gridlevel_info => pw_env%gridlevel_info
2072      cube_info => pw_env%cube_info
2073
2074      !   *** Allocate work storage ***
2075      nthread = 1
2076      CALL get_qs_kind_set(qs_kind_set=qs_kind_set, &
2077                           maxco=maxco, &
2078                           maxsgf=maxsgf, &
2079                           maxsgf_set=maxsgf_set, &
2080                           basis_type=my_basis_type)
2081      CALL reallocate(pabt, 1, maxco, 1, maxco, 0, nthread - 1)
2082      CALL reallocate(workt, 1, maxco, 1, maxsgf_set, 0, nthread - 1)
2083
2084      ! find maximum numbers
2085      nimages = dft_control%nimages
2086      CPASSERT(nimages == 1 .OR. do_kp)
2087
2088      natoms = SIZE(particle_set)
2089      maxset = 0
2090      maxpgf = 0
2091      lmax_global = 0
2092
2093      DO ikind = 1, SIZE(qs_kind_set)
2094         qs_kind => qs_kind_set(ikind)
2095         CALL get_qs_kind(qs_kind=qs_kind, &
2096                          softb=my_soft, &
2097                          basis_set=orb_basis_set, basis_type=my_basis_type)
2098
2099         IF (.NOT. ASSOCIATED(orb_basis_set)) CYCLE
2100
2101         CALL get_gto_basis_set(gto_basis_set=orb_basis_set, &
2102                                npgf=npgfa, nset=nseta, lmax=mylmax)
2103
2104         maxset = MAX(nseta, maxset)
2105         maxpgf = MAX(MAXVAL(npgfa), maxpgf)
2106         lmax_global = MAX(MAXVAL(mylmax), lmax_global)
2107      END DO
2108
2109      !update lmax_global since ga_gb_function=FUNC_DABpADB
2110      lmax_global = lmax_global + 1
2111
2112      ! get the task lists
2113      IF (my_soft) task_list => task_list_soft
2114      CPASSERT(ASSOCIATED(task_list))
2115      tasks => task_list%tasks
2116      dist_ab => task_list%dist_ab
2117      atom_pair_send => task_list%atom_pair_send
2118      atom_pair_recv => task_list%atom_pair_recv
2119      ntasks = task_list%ntasks
2120
2121      ! *** set up the rs multi-grids
2122      CPASSERT(ASSOCIATED(pw_env))
2123      CALL pw_env_get(pw_env, rs_descs=rs_descs, rs_grids=rs_rho)
2124      DO igrid_level = 1, gridlevel_info%ngrid_levels
2125         CALL rs_grid_retain(rs_rho(igrid_level)%rs_grid)
2126         distributed_rs_grids = rs_rho(igrid_level)%rs_grid%desc%distributed
2127      END DO
2128
2129      eps_rho_rspace = dft_control%qs_control%eps_rho_rspace
2130      map_consistent = dft_control%qs_control%map_consistent
2131
2132      !   *** Initialize working density matrix ***
2133      ! distributed rs grids require a matrix that will be changed
2134      ! whereas this is not the case for replicated grids
2135      ALLOCATE (deltap(nimages))
2136      IF (distributed_rs_grids) THEN
2137         DO img = 1, nimages
2138         END DO
2139         ! this matrix has no strict sparsity pattern in parallel
2140         ! deltap%sparsity_id=-1
2141         IF (do_kp) THEN
2142            DO img = 1, nimages
2143               CALL dbcsr_copy(deltap(img)%matrix, matrix_p_kp(img)%matrix, &
2144                               name="DeltaP")
2145            END DO
2146         ELSE
2147            CALL dbcsr_copy(deltap(1)%matrix, matrix_p, name="DeltaP")
2148         END IF
2149      ELSE
2150         IF (do_kp) THEN
2151            DO img = 1, nimages
2152               deltap(img)%matrix => matrix_p_kp(img)%matrix
2153            END DO
2154         ELSE
2155            deltap(1)%matrix => matrix_p
2156         END IF
2157      ENDIF
2158
2159      ! distribute the matrix
2160      IF (distributed_rs_grids) THEN
2161         CALL rs_distribute_matrix(rs_descs, deltap, atom_pair_send, atom_pair_recv, &
2162                                   natoms, nimages, scatter=.TRUE.)
2163      ENDIF
2164
2165      ! map all tasks on the grids
2166
2167      ithread = 0
2168      pab => pabt(:, :, ithread)
2169      work => workt(:, :, ithread)
2170
2171      loop_xyz: DO idir = 1, 3
2172
2173         DO igrid_level = 1, gridlevel_info%ngrid_levels
2174            CALL rs_grid_zero(rs_rho(igrid_level)%rs_grid)
2175         END DO
2176
2177         iatom_old = -1; jatom_old = -1; iset_old = -1; jset_old = -1
2178         ikind_old = -1; jkind_old = -1; img_old = -1
2179         loop_tasks: DO itask = 1, ntasks
2180
2181            !decode the atom pair and basis info
2182            CALL int2pair(tasks(3, itask), igrid_level, img, iatom, jatom, iset, jset, ipgf, jpgf, &
2183                          nimages, natoms, maxset, maxpgf)
2184
2185            ikind = particle_set(iatom)%atomic_kind%kind_number
2186            jkind = particle_set(jatom)%atomic_kind%kind_number
2187
2188            IF (iatom .NE. iatom_old .OR. jatom .NE. jatom_old .OR. img .NE. img_old) THEN
2189
2190               IF (iatom .NE. iatom_old) ra(:) = pbc(particle_set(iatom)%r, cell)
2191
2192               IF (iatom <= jatom) THEN
2193                  brow = iatom
2194                  bcol = jatom
2195               ELSE
2196                  brow = jatom
2197                  bcol = iatom
2198               END IF
2199
2200               IF (ikind .NE. ikind_old) THEN
2201                  CALL get_qs_kind(qs_kind_set(ikind), softb=my_soft, &
2202                                   basis_set=orb_basis_set, basis_type=my_basis_type)
2203                  CALL get_gto_basis_set(gto_basis_set=orb_basis_set, &
2204                                         first_sgf=first_sgfa, &
2205                                         lmax=la_max, &
2206                                         lmin=la_min, &
2207                                         npgf=npgfa, &
2208                                         nset=nseta, &
2209                                         nsgf_set=nsgfa, &
2210                                         sphi=sphi_a, &
2211                                         zet=zeta)
2212               ENDIF
2213
2214               IF (jkind .NE. jkind_old) THEN
2215                  CALL get_qs_kind(qs_kind_set(jkind), softb=my_soft, &
2216                                   basis_set=orb_basis_set, basis_type=my_basis_type)
2217                  CALL get_gto_basis_set(gto_basis_set=orb_basis_set, &
2218                                         first_sgf=first_sgfb, &
2219                                         lmax=lb_max, &
2220                                         lmin=lb_min, &
2221                                         npgf=npgfb, &
2222                                         nset=nsetb, &
2223                                         nsgf_set=nsgfb, &
2224                                         sphi=sphi_b, &
2225                                         zet=zetb)
2226               ENDIF
2227
2228               CALL dbcsr_get_block_p(matrix=deltap(img)%matrix, &
2229                                      row=brow, col=bcol, BLOCK=p_block, found=found)
2230               CPASSERT(found)
2231
2232               iatom_old = iatom
2233               jatom_old = jatom
2234               ikind_old = ikind
2235               jkind_old = jkind
2236               img_old = img
2237               atom_pair_changed = .TRUE.
2238
2239            ELSE
2240
2241               atom_pair_changed = .FALSE.
2242
2243            ENDIF
2244
2245            IF (atom_pair_changed .OR. iset_old .NE. iset .OR. jset_old .NE. jset) THEN
2246
2247               ncoa = npgfa(iset)*ncoset(la_max(iset))
2248               sgfa = first_sgfa(1, iset)
2249               ncob = npgfb(jset)*ncoset(lb_max(jset))
2250               sgfb = first_sgfb(1, jset)
2251
2252               IF (iatom <= jatom) THEN
2253                  CALL dgemm("N", "N", ncoa, nsgfb(jset), nsgfa(iset), &
2254                             1.0_dp, sphi_a(1, sgfa), SIZE(sphi_a, 1), &
2255                             p_block(sgfa, sgfb), SIZE(p_block, 1), &
2256                             0.0_dp, work(1, 1), maxco)
2257                  CALL dgemm("N", "T", ncoa, ncob, nsgfb(jset), &
2258                             1.0_dp, work(1, 1), maxco, &
2259                             sphi_b(1, sgfb), SIZE(sphi_b, 1), &
2260                             0.0_dp, pab(1, 1), maxco)
2261               ELSE
2262                  CALL dgemm("N", "N", ncob, nsgfa(iset), nsgfb(jset), &
2263                             1.0_dp, sphi_b(1, sgfb), SIZE(sphi_b, 1), &
2264                             p_block(sgfb, sgfa), SIZE(p_block, 1), &
2265                             0.0_dp, work(1, 1), maxco)
2266                  CALL dgemm("N", "T", ncob, ncoa, nsgfa(iset), &
2267                             1.0_dp, work(1, 1), maxco, &
2268                             sphi_a(1, sgfa), SIZE(sphi_a, 1), &
2269                             0.0_dp, pab(1, 1), maxco)
2270               END IF
2271
2272               iset_old = iset
2273               jset_old = jset
2274
2275            ENDIF
2276
2277            rab(:) = dist_ab(:, itask)
2278            rab2 = rab(1)*rab(1) + rab(2)*rab(2) + rab(3)*rab(3)
2279            rb(:) = ra(:) + rab(:)
2280            zetp = zeta(ipgf, iset) + zetb(jpgf, jset)
2281
2282            na1 = (ipgf - 1)*ncoset(la_max(iset)) + 1
2283            na2 = ipgf*ncoset(la_max(iset))
2284            nb1 = (jpgf - 1)*ncoset(lb_max(jset)) + 1
2285            nb2 = jpgf*ncoset(lb_max(jset))
2286
2287            ! takes the density matrix symmetry in account, i.e. off-diagonal blocks need to be mapped 'twice'
2288            IF (iatom == jatom .AND. img == 1) THEN
2289               scale = 1.0_dp
2290            ELSE
2291               scale = 2.0_dp
2292            END IF
2293
2294            ! check whether we need to use fawzi's generalised collocation scheme
2295            IF (rs_rho(igrid_level)%rs_grid%desc%distributed) THEN
2296               !tasks(4,:) is 0 for replicated, 1 for distributed 2 for exceptional distributed tasks
2297               IF (tasks(4, itask) .EQ. 2) THEN
2298                  use_subpatch = .TRUE.
2299               ELSE
2300                  use_subpatch = .FALSE.
2301               ENDIF
2302            ELSE
2303               use_subpatch = .FALSE.
2304            ENDIF
2305            IF (iatom <= jatom) THEN
2306               CALL collocate_pgf_product_rspace( &
2307                  la_max(iset), zeta(ipgf, iset), la_min(iset), &
2308                  lb_max(jset), zetb(jpgf, jset), lb_min(jset), &
2309                  ra, rab, rab2, scale, pab, na1 - 1, nb1 - 1, &
2310                  rs_rho(igrid_level)%rs_grid, cell, cube_info(igrid_level), &
2311                  eps_rho_rspace, &
2312                  ga_gb_function=FUNC_DABpADB, &
2313                  idir=idir, &
2314                  map_consistent=map_consistent, use_subpatch=use_subpatch, subpatch_pattern=tasks(6, itask), &
2315                  lmax_global=lmax_global)
2316            ELSE
2317               rab_inv = -rab
2318               CALL collocate_pgf_product_rspace( &
2319                  lb_max(jset), zetb(jpgf, jset), lb_min(jset), &
2320                  la_max(iset), zeta(ipgf, iset), la_min(iset), &
2321                  rb, rab_inv, rab2, scale, pab, nb1 - 1, na1 - 1, &
2322                  rs_rho(igrid_level)%rs_grid, cell, cube_info(igrid_level), &
2323                  eps_rho_rspace, &
2324                  ga_gb_function=FUNC_DABpADB, &
2325                  idir=idir, &
2326                  map_consistent=map_consistent, use_subpatch=use_subpatch, subpatch_pattern=tasks(6, itask), &
2327                  lmax_global=lmax_global)
2328            END IF
2329
2330         END DO loop_tasks
2331
2332         CALL density_rs2pw_basic(pw_env, rs_rho, drho(idir), drho_gspace(idir))
2333
2334      END DO loop_xyz
2335
2336      !   *** Release work storage ***
2337      IF (ASSOCIATED(rs_rho)) THEN
2338         DO i = 1, SIZE(rs_rho)
2339            CALL rs_grid_release(rs_rho(i)%rs_grid)
2340         END DO
2341      END IF
2342
2343      IF (distributed_rs_grids) THEN
2344         CALL dbcsr_deallocate_matrix_set(deltap)
2345      ELSE
2346         DO img = 1, nimages
2347            NULLIFY (deltap(img)%matrix)
2348         END DO
2349         DEALLOCATE (deltap)
2350      ENDIF
2351
2352      DEALLOCATE (pabt, workt)
2353
2354      CALL timestop(handle)
2355
2356   END SUBROUTINE calculate_drho_elec
2357
2358! **************************************************************************************************
2359!> \brief maps a given wavefunction on the grid
2360!> \param mo_vectors ...
2361!> \param ivector ...
2362!> \param rho ...
2363!> \param rho_gspace ...
2364!> \param atomic_kind_set ...
2365!> \param qs_kind_set ...
2366!> \param cell ...
2367!> \param dft_control ...
2368!> \param particle_set ...
2369!> \param pw_env ...
2370!> \param basis_type ...
2371!> \param external_vector ...
2372!> \par History
2373!>      08.2002 created [Joost VandeVondele]
2374!>      03.2006 made independent of qs_env [Joost VandeVondele]
2375!> \note
2376!>      modified calculate_rho_elec, should write the wavefunction represented by
2377!>      it's presumably dominated by the FFT and the rs->pw and back routines
2378!>
2379!>    BUGS ???
2380!>      does it take correctly the periodic images of the basis functions into account
2381!>      i.e. is only correct if the basis functions are localised enough to be just in 1 cell ?
2382! **************************************************************************************************
2383   SUBROUTINE calculate_wavefunction(mo_vectors, ivector, rho, rho_gspace, &
2384                                     atomic_kind_set, qs_kind_set, cell, dft_control, particle_set, &
2385                                     pw_env, basis_type, external_vector)
2386
2387      TYPE(cp_fm_type), POINTER                          :: mo_vectors
2388      INTEGER                                            :: ivector
2389      TYPE(pw_p_type), INTENT(INOUT)                     :: rho, rho_gspace
2390      TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
2391      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
2392      TYPE(cell_type), POINTER                           :: cell
2393      TYPE(dft_control_type), POINTER                    :: dft_control
2394      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
2395      TYPE(pw_env_type), POINTER                         :: pw_env
2396      CHARACTER(LEN=*), INTENT(IN), OPTIONAL             :: basis_type
2397      REAL(KIND=dp), DIMENSION(:), OPTIONAL              :: external_vector
2398
2399      CHARACTER(LEN=*), PARAMETER :: routineN = 'calculate_wavefunction', &
2400         routineP = moduleN//':'//routineN
2401
2402      CHARACTER(LEN=default_string_length)               :: my_basis_type
2403      INTEGER :: dir, group, group_size, handle, i, iatom, igrid_level, ikind, ipgf, iset, &
2404         lmax_global, maxco, maxsgf_set, my_pos, na1, na2, nao, natom, ncoa, nseta, offset, sgfa
2405      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: where_is_the_point
2406      INTEGER, DIMENSION(3)                              :: lb, location, tp, ub
2407      INTEGER, DIMENSION(:), POINTER                     :: la_max, la_min, mylmax, npgfa, nsgfa
2408      INTEGER, DIMENSION(:, :), POINTER                  :: first_sgfa
2409      LOGICAL                                            :: local, map_it_here
2410      REAL(KIND=dp)                                      :: dab, eps_rho_rspace, rab2, scale, zetp
2411      REAL(KIND=dp), DIMENSION(3)                        :: ra, rab
2412      REAL(KIND=dp), DIMENSION(:), POINTER               :: eigenvector
2413      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: pab, sphi_a, work, zeta
2414      TYPE(cube_info_type), DIMENSION(:), POINTER        :: cube_info
2415      TYPE(gridlevel_info_type), POINTER                 :: gridlevel_info
2416      TYPE(gto_basis_set_type), POINTER                  :: orb_basis_set
2417      TYPE(pw_p_type), DIMENSION(:), POINTER             :: mgrid_gspace, mgrid_rspace
2418      TYPE(pw_pool_p_type), DIMENSION(:), POINTER        :: pw_pools
2419      TYPE(REALSPACE_GRID_P_TYPE), DIMENSION(:), POINTER :: rs_rho
2420
2421      IF (PRESENT(basis_type)) THEN
2422         my_basis_type = basis_type
2423      ELSE
2424         my_basis_type = "ORB"
2425      END IF
2426
2427      CALL timeset(routineN, handle)
2428
2429      NULLIFY (eigenvector, orb_basis_set, &
2430               pab, work, la_max, la_min, &
2431               npgfa, nsgfa, &
2432               sphi_a, zeta, first_sgfa, &
2433               rs_rho, pw_pools, mgrid_rspace, mgrid_gspace, mylmax)
2434
2435      IF (PRESENT(external_vector)) THEN
2436         nao = SIZE(external_vector)
2437         ALLOCATE (eigenvector(nao))
2438         eigenvector = external_vector
2439      ELSE
2440         CALL cp_fm_get_info(matrix=mo_vectors, nrow_global=nao)
2441         ALLOCATE (eigenvector(nao))
2442         DO i = 1, nao
2443            CALL cp_fm_get_element(mo_vectors, i, ivector, eigenvector(i), local)
2444         ENDDO
2445      ENDIF
2446
2447      ! *** set up the pw multi-grids
2448      CPASSERT(ASSOCIATED(pw_env))
2449      CALL pw_env_get(pw_env, rs_grids=rs_rho, pw_pools=pw_pools, &
2450                      cube_info=cube_info, gridlevel_info=gridlevel_info)
2451
2452      CALL pw_pools_create_pws(pw_pools, mgrid_gspace, &
2453                               use_data=COMPLEXDATA1D, &
2454                               in_space=RECIPROCALSPACE)
2455      CALL pw_pools_create_pws(pw_pools, mgrid_rspace, &
2456                               use_data=REALDATA3D, &
2457                               in_space=REALSPACE)
2458
2459      ! *** set up rs multi-grids
2460      DO igrid_level = 1, gridlevel_info%ngrid_levels
2461         CALL rs_grid_retain(rs_rho(igrid_level)%rs_grid)
2462         CALL rs_grid_zero(rs_rho(igrid_level)%rs_grid)
2463      END DO
2464
2465      eps_rho_rspace = dft_control%qs_control%eps_rho_rspace
2466!   *** Allocate work storage ***
2467      CALL get_atomic_kind_set(atomic_kind_set, natom=natom)
2468      CALL get_qs_kind_set(qs_kind_set, &
2469                           maxco=maxco, &
2470                           maxsgf_set=maxsgf_set, &
2471                           basis_type=my_basis_type)
2472
2473      ALLOCATE (pab(maxco, 1))
2474      ALLOCATE (work(maxco, 1))
2475
2476      offset = 0
2477      group = mgrid_rspace(1)%pw%pw_grid%para%group
2478      my_pos = mgrid_rspace(1)%pw%pw_grid%para%my_pos
2479      group_size = mgrid_rspace(1)%pw%pw_grid%para%group_size
2480      ALLOCATE (where_is_the_point(0:group_size - 1))
2481
2482      !loop over natom to find maximum value of la_max
2483      lmax_global = 0
2484      DO iatom = 1, natom
2485         ikind = particle_set(iatom)%atomic_kind%kind_number
2486         CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set, basis_type=my_basis_type)
2487         CALL get_gto_basis_set(gto_basis_set=orb_basis_set, lmax=mylmax)
2488         lmax_global = MAX(MAXVAL(mylmax), lmax_global)
2489      END DO
2490
2491      DO iatom = 1, natom
2492         ikind = particle_set(iatom)%atomic_kind%kind_number
2493         CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set, basis_type=my_basis_type)
2494         CALL get_gto_basis_set(gto_basis_set=orb_basis_set, &
2495                                first_sgf=first_sgfa, &
2496                                lmax=la_max, &
2497                                lmin=la_min, &
2498                                npgf=npgfa, &
2499                                nset=nseta, &
2500                                nsgf_set=nsgfa, &
2501                                sphi=sphi_a, &
2502                                zet=zeta)
2503         ra(:) = pbc(particle_set(iatom)%r, cell)
2504         rab(:) = 0.0_dp
2505         rab2 = 0.0_dp
2506         dab = 0.0_dp
2507
2508         DO iset = 1, nseta
2509
2510            ncoa = npgfa(iset)*ncoset(la_max(iset))
2511            sgfa = first_sgfa(1, iset)
2512
2513            DO i = 1, nsgfa(iset)
2514               work(i, 1) = eigenvector(offset + i)
2515            ENDDO
2516
2517            CALL dgemm("N", "N", ncoa, 1, nsgfa(iset), &
2518                       1.0_dp, sphi_a(1, sgfa), SIZE(sphi_a, 1), &
2519                       work(1, 1), SIZE(work, 1), &
2520                       0.0_dp, pab(1, 1), SIZE(pab, 1))
2521
2522            DO ipgf = 1, npgfa(iset)
2523
2524               na1 = (ipgf - 1)*ncoset(la_max(iset)) + 1
2525               na2 = ipgf*ncoset(la_max(iset))
2526
2527               scale = 1.0_dp
2528               zetp = zeta(ipgf, iset)
2529               igrid_level = gaussian_gridlevel(gridlevel_info, zetp)
2530
2531               map_it_here = .FALSE.
2532
2533               IF (.NOT. ALL(rs_rho(igrid_level)%rs_grid%desc%perd == 1)) THEN
2534                  DO dir = 1, 3
2535                     ! bounds of local grid (i.e. removing the 'wings'), if periodic
2536                     tp(dir) = FLOOR(DOT_PRODUCT(cell%h_inv(dir, :), ra)*rs_rho(igrid_level)%rs_grid%desc%npts(dir))
2537                     tp(dir) = MODULO(tp(dir), rs_rho(igrid_level)%rs_grid%desc%npts(dir))
2538                     IF (rs_rho(igrid_level)%rs_grid%desc%perd(dir) .NE. 1) THEN
2539                        lb(dir) = rs_rho(igrid_level)%rs_grid%lb_local(dir) + rs_rho(igrid_level)%rs_grid%desc%border
2540                        ub(dir) = rs_rho(igrid_level)%rs_grid%ub_local(dir) - rs_rho(igrid_level)%rs_grid%desc%border
2541                     ELSE
2542                        lb(dir) = rs_rho(igrid_level)%rs_grid%lb_local(dir)
2543                        ub(dir) = rs_rho(igrid_level)%rs_grid%ub_local(dir)
2544                     ENDIF
2545                     ! distributed grid, only map if it is local to the grid
2546                     location(dir) = tp(dir) + rs_rho(igrid_level)%rs_grid%desc%lb(dir)
2547                  ENDDO
2548                  IF (lb(1) <= location(1) .AND. location(1) <= ub(1) .AND. &
2549                      lb(2) <= location(2) .AND. location(2) <= ub(2) .AND. &
2550                      lb(3) <= location(3) .AND. location(3) <= ub(3)) THEN
2551                     map_it_here = .TRUE.
2552                  ENDIF
2553               ELSE
2554                  ! not distributed, just a round-robin distribution over the full set of CPUs
2555                  IF (MODULO(offset, group_size) == my_pos) map_it_here = .TRUE.
2556               ENDIF
2557
2558               IF (map_it_here) CALL collocate_pgf_product_rspace( &
2559                  la_max(iset), zeta(ipgf, iset), la_min(iset), &
2560                  0, 0.0_dp, 0, &
2561                  ra, rab, rab2, scale, pab, na1 - 1, 0, &
2562                  rs_rho(igrid_level)%rs_grid, cell, cube_info(igrid_level), &
2563                  eps_rho_rspace, map_consistent=.TRUE., ga_gb_function=FUNC_AB, &
2564                  lmax_global=lmax_global)
2565
2566            END DO
2567
2568            offset = offset + nsgfa(iset)
2569
2570         END DO
2571
2572      END DO
2573
2574      DO igrid_level = 1, gridlevel_info%ngrid_levels
2575         CALL rs_pw_transfer(rs_rho(igrid_level)%rs_grid, &
2576                             mgrid_rspace(igrid_level)%pw, rs2pw)
2577         CALL rs_grid_release(rs_rho(igrid_level)%rs_grid)
2578      ENDDO
2579
2580      CALL pw_zero(rho_gspace%pw)
2581      DO igrid_level = 1, gridlevel_info%ngrid_levels
2582         CALL pw_transfer(mgrid_rspace(igrid_level)%pw, &
2583                          mgrid_gspace(igrid_level)%pw)
2584         CALL pw_axpy(mgrid_gspace(igrid_level)%pw, rho_gspace%pw)
2585      END DO
2586
2587      CALL pw_transfer(rho_gspace%pw, rho%pw)
2588
2589      ! Release work storage
2590      DEALLOCATE (eigenvector)
2591
2592      DEALLOCATE (pab)
2593
2594      DEALLOCATE (work)
2595
2596      ! give back the pw multi-grids
2597      CALL pw_pools_give_back_pws(pw_pools, mgrid_gspace)
2598      CALL pw_pools_give_back_pws(pw_pools, mgrid_rspace)
2599
2600      CALL timestop(handle)
2601
2602   END SUBROUTINE calculate_wavefunction
2603
2604! **************************************************************************************************
2605!> \brief low level collocation of primitive gaussian functions
2606!> \param la_max ...
2607!> \param zeta ...
2608!> \param la_min ...
2609!> \param lb_max ...
2610!> \param zetb ...
2611!> \param lb_min ...
2612!> \param ra ...
2613!> \param rab ...
2614!> \param rab2 ...
2615!> \param scale ...
2616!> \param pab ...
2617!> \param o1 ...
2618!> \param o2 ...
2619!> \param rsgrid ...
2620!> \param cell ...
2621!> \param cube_info ...
2622!> \param eps_rho_rspace ...
2623!> \param ga_gb_function ...
2624!> \param lgrid ...
2625!> \param ithread ...
2626!> \param map_consistent ...
2627!> \param collocate_rho0 ...
2628!> \param rpgf0_s ...
2629!> \param idir ...
2630!> \param ir ...
2631!> \param rsgauge ...
2632!> \param rsbuf ...
2633!> \param use_subpatch ...
2634!> \param subpatch_pattern ...
2635!> \param lmax_global Maximum possible value of lmax used to dimension arrays
2636! **************************************************************************************************
2637   SUBROUTINE collocate_pgf_product_rspace(la_max, zeta, la_min, &
2638                                           lb_max, zetb, lb_min, &
2639                                           ra, rab, rab2, scale, pab, o1, o2, &
2640                                           rsgrid, cell, cube_info, &
2641                                           eps_rho_rspace, ga_gb_function, &
2642                                           lgrid, ithread, &
2643                                           map_consistent, &
2644                                           collocate_rho0, &
2645                                           rpgf0_s, idir, ir, rsgauge, rsbuf, &
2646                                           use_subpatch, subpatch_pattern, &
2647                                           lmax_global)
2648
2649      INTEGER, INTENT(IN)                      :: la_max
2650      REAL(KIND=dp), INTENT(IN)                :: zeta
2651      INTEGER, INTENT(IN)                      :: la_min, lb_max
2652      REAL(KIND=dp), INTENT(IN)                :: zetb
2653      INTEGER, INTENT(IN)                      :: lb_min
2654      REAL(KIND=dp), DIMENSION(3), INTENT(IN)  :: ra, rab
2655      REAL(KIND=dp), INTENT(IN)                :: rab2, scale
2656      REAL(KIND=dp), DIMENSION(:, :), POINTER  :: pab
2657      INTEGER, INTENT(IN)                      :: o1, o2
2658      TYPE(realspace_grid_type), POINTER       :: rsgrid
2659      TYPE(cell_type), POINTER                 :: cell
2660      TYPE(cube_info_type), INTENT(IN)         :: cube_info
2661      REAL(KIND=dp), INTENT(IN)                :: eps_rho_rspace
2662      INTEGER, INTENT(IN)                      :: ga_gb_function
2663      TYPE(lgrid_type), OPTIONAL               :: lgrid
2664      INTEGER, INTENT(IN), OPTIONAL            :: ithread
2665      LOGICAL, INTENT(IN), OPTIONAL            :: map_consistent, collocate_rho0
2666      REAL(dp), INTENT(IN), OPTIONAL           :: rpgf0_s
2667      INTEGER, INTENT(IN), OPTIONAL            :: idir, ir
2668      TYPE(realspace_grid_type), POINTER, OPTIONAL :: rsgauge, rsbuf
2669      LOGICAL, OPTIONAL                        :: use_subpatch
2670      INTEGER(KIND=int_8), OPTIONAL, INTENT(IN):: subpatch_pattern
2671      INTEGER, INTENT(IN)                      :: lmax_global
2672
2673      CHARACTER(len=*), PARAMETER :: routineN = 'collocate_pgf_product_rspace', &
2674                                     routineP = moduleN//':'//routineN
2675
2676      INTEGER :: cmax, gridbounds(2, 3), i, ico, icoef, ider1, ider2, ig, ithread_l, &
2677                 jco, k, l, la_max_local, la_min_local, lb_max_local, lb_min_local, &
2678                 length, lxa, lxb, lxy, lxyz, lya, lyb, &
2679                 lza, lzb, o1_local, o2_local, offset, start
2680      INTEGER, DIMENSION(3)                    :: cubecenter, lb_cube, ng, &
2681                                                  ub_cube
2682      INTEGER, DIMENSION(:), POINTER           :: sphere_bounds
2683      LOGICAL                                  :: my_collocate_rho0, &
2684                                                  my_map_consistent, subpatch_collocate
2685      REAL(KIND=dp) :: a, b, binomial_k_lxa, binomial_l_lxb, cutoff, f, pg, &
2686                       prefactor, radius, rpg, zetp
2687      REAL(KIND=dp), DIMENSION(3)              :: dr, rap, rb, rbp, roffset, rp
2688      REAL(KIND=dp), DIMENSION(:, :), POINTER  :: pab_local
2689      REAL(KIND=dp), DIMENSION(:, :, :), &
2690         POINTER                                :: grid
2691
2692      INTEGER :: lxp, lyp, lzp, lp, lxpm, iaxis
2693      INTEGER, ALLOCATABLE, DIMENSION(:, :) :: map
2694      REAL(kind=dp) :: p_ele
2695      REAL(kind=dp), DIMENSION(0:lmax_global*2, 0:lmax_global, 0:lmax_global, 3) :: alpha
2696      REAL(kind=dp), DIMENSION(((lmax_global*2 + 1)*(lmax_global*2 + 2)*(lmax_global*2 + 3))/6) :: coef_xyz
2697      REAL(kind=dp), DIMENSION(((lmax_global*2 + 1)*(lmax_global*2 + 2))/2) :: coef_xyt
2698      REAL(kind=dp), DIMENSION(0:lmax_global*2) :: coef_xtt
2699
2700      REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: pol_z
2701      REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: pol_y
2702      REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: pol_x
2703      REAL(KIND=dp) :: t_exp_1, t_exp_2, t_exp_min_1, t_exp_min_2, t_exp_plus_1, t_exp_plus_2
2704
2705      REAL(kind=dp), POINTER, DIMENSION(:, :, :) :: grid_tmp, gauge
2706
2707      subpatch_collocate = .FALSE.
2708
2709      IF (PRESENT(use_subpatch)) THEN
2710         IF (use_subpatch) THEN
2711            subpatch_collocate = .TRUE.
2712            CPASSERT(PRESENT(subpatch_pattern))
2713         ENDIF
2714      ENDIF
2715
2716      IF (PRESENT(ithread)) THEN
2717         ithread_l = ithread
2718      ELSE
2719         ithread_l = 0
2720      ENDIF
2721
2722      ! use identical radii for integrate and collocate ?
2723      IF (PRESENT(map_consistent)) THEN
2724         my_map_consistent = map_consistent
2725      ELSE
2726         my_map_consistent = .FALSE.
2727      ENDIF
2728
2729      IF (PRESENT(collocate_rho0) .AND. PRESENT(rpgf0_s)) THEN
2730         my_collocate_rho0 = collocate_rho0
2731      ELSE
2732         my_collocate_rho0 = .FALSE.
2733      END IF
2734
2735      zetp = zeta + zetb
2736      f = zetb/zetp
2737      rap(:) = f*rab(:)
2738      rbp(:) = rap(:) - rab(:)
2739      rp(:) = ra(:) + rap(:)
2740      rb(:) = ra(:) + rab(:)
2741
2742      ! check to avoid overflows
2743      a = MAXVAL(ABS(rsgrid%desc%dh))
2744      a = 300._dp/(a*a)
2745      !   CPASSERT(zetp < a)
2746
2747      IF (my_map_consistent) THEN
2748         cutoff = 1.0_dp
2749         prefactor = EXP(-zeta*f*rab2)
2750         radius = exp_radius_very_extended(la_min, la_max, lb_min, lb_max, ra=ra, rb=rb, rp=rp, &
2751                                           zetp=zetp, eps=eps_rho_rspace, &
2752                                           prefactor=prefactor, cutoff=cutoff)
2753         prefactor = scale*EXP(-zeta*f*rab2)
2754      ELSE IF (my_collocate_rho0) THEN
2755         cutoff = 0.0_dp
2756         prefactor = 1.0_dp
2757         radius = rpgf0_s
2758      ELSE
2759         cutoff = 0.0_dp
2760         prefactor = scale*EXP(-zeta*f*rab2)
2761         radius = exp_radius_very_extended(la_min, la_max, lb_min, lb_max, pab, o1, o2, ra, rb, rp, &
2762                                           zetp, eps_rho_rspace, prefactor, cutoff)
2763      ENDIF
2764
2765      a = MAXVAL(ABS(rsgrid%desc%dh))
2766      IF (radius .LT. a/2.0_dp) THEN
2767         RETURN
2768      END IF
2769
2770      ! it's a choice to compute lX_min/max, pab here,
2771      ! this way we get the same radius as we use for the corresponding density
2772      SELECT CASE (ga_gb_function)
2773      CASE (FUNC_DADB)
2774         la_max_local = la_max + 1
2775         la_min_local = MAX(la_min - 1, 0)
2776         lb_max_local = lb_max + 1
2777         lb_min_local = MAX(lb_min - 1, 0)
2778         ! create a new pab_local so that mapping pab_local with pgf_a pgf_b
2779         ! is equivalent to mapping pab with 0.5 * (nabla pgf_a) . (nabla pgf_b)
2780         ! (ddx pgf_a ) (ddx pgf_b) = (lax pgf_{a-1x} - 2*zeta*pgf_{a+1x})*(lbx pgf_{b-1x} - 2*zetb*pgf_{b+1x})
2781         ! cleaner would possibly be to touch pzyx directly (avoiding the following allocate)
2782         ALLOCATE (pab_local(ncoset(la_max_local), ncoset(lb_max_local)))
2783         pab_local = 0.0_dp
2784         DO lxa = 0, la_max
2785         DO lxb = 0, lb_max
2786            DO lya = 0, la_max - lxa
2787            DO lyb = 0, lb_max - lxb
2788               DO lza = MAX(la_min - lxa - lya, 0), la_max - lxa - lya
2789               DO lzb = MAX(lb_min - lxb - lyb, 0), lb_max - lxb - lyb
2790
2791                  ! this element of pab results in 12 elements of pab_local
2792                  CALL prepare_dadb(pab_local, pab, lxa, lya, lza, lxb, lyb, lzb, o1, o2, zeta, zetb)
2793
2794               ENDDO
2795               ENDDO
2796            ENDDO
2797            ENDDO
2798         ENDDO
2799         ENDDO
2800         o1_local = 0
2801         o2_local = 0
2802         pab_local = pab_local*0.5_dp
2803      CASE (FUNC_ADBmDAB)
2804         CPASSERT(PRESENT(idir))
2805         la_max_local = la_max + 1
2806         la_min_local = MAX(la_min - 1, 0)
2807         lb_max_local = lb_max + 1
2808         lb_min_local = MAX(lb_min - 1, 0)
2809         ! create a new pab_local so that mapping pab_local with pgf_a pgf_b
2810         ! is equivalent to mapping pab with
2811         !    pgf_a (nabla_{idir} pgf_b) - (nabla_{idir} pgf_a) pgf_b
2812         ! ( pgf_a ) (ddx pgf_b) - (ddx pgf_a)( pgf_b ) =
2813         !          pgf_a *(lbx pgf_{b-1x} - 2*zetb*pgf_{b+1x}) -
2814         !                   (lax pgf_{a-1x} - 2*zeta*pgf_{a+1x}) pgf_b
2815
2816         ALLOCATE (pab_local(ncoset(la_max_local), ncoset(lb_max_local)))
2817         pab_local = 0.0_dp
2818         DO lxa = 0, la_max
2819         DO lxb = 0, lb_max
2820            DO lya = 0, la_max - lxa
2821            DO lyb = 0, lb_max - lxb
2822               DO lza = MAX(la_min - lxa - lya, 0), la_max - lxa - lya
2823               DO lzb = MAX(lb_min - lxb - lyb, 0), lb_max - lxb - lyb
2824                  ! this element of pab results in 4 elements of pab_local
2825                  CALL prepare_adb_m_dab(pab_local, pab, idir, &
2826                                         lxa, lya, lza, lxb, lyb, lzb, o1, o2, zeta, zetb)
2827               END DO
2828               END DO
2829            END DO
2830            END DO
2831         END DO
2832         END DO
2833         o1_local = 0
2834         o2_local = 0
2835      CASE (FUNC_DABpADB)
2836         CPASSERT(PRESENT(idir))
2837         la_max_local = la_max + 1
2838         la_min_local = MAX(la_min - 1, 0)
2839         lb_max_local = lb_max + 1
2840         lb_min_local = MAX(lb_min - 1, 0)
2841         ! create a new pab_local so that mapping pab_local with pgf_a pgf_b
2842         ! is equivalent to mapping pab with
2843         !    pgf_a (nabla_{idir} pgf_b) + (nabla_{idir} pgf_a) pgf_b
2844         ! ( pgf_a ) (ddx pgf_b) + (ddx pgf_a)( pgf_b ) =
2845         !          pgf_a *(lbx pgf_{b-1x} + 2*zetb*pgf_{b+1x}) +
2846         !                   (lax pgf_{a-1x} + 2*zeta*pgf_{a+1x}) pgf_b
2847
2848         ALLOCATE (pab_local(ncoset(la_max_local), ncoset(lb_max_local)))
2849         pab_local = 0.0_dp
2850         DO lxa = 0, la_max
2851         DO lxb = 0, lb_max
2852            DO lya = 0, la_max - lxa
2853            DO lyb = 0, lb_max - lxb
2854               DO lza = MAX(la_min - lxa - lya, 0), la_max - lxa - lya
2855               DO lzb = MAX(lb_min - lxb - lyb, 0), lb_max - lxb - lyb
2856                  ! this element of pab results in 4 elements of pab_local
2857                  CALL prepare_dab_p_adb(pab_local, pab, idir, &
2858                                         lxa, lya, lza, lxb, lyb, lzb, o1, o2, zeta, zetb)
2859               END DO
2860               END DO
2861            END DO
2862            END DO
2863         END DO
2864         END DO
2865         o1_local = 0
2866         o2_local = 0
2867      CASE (FUNC_DX, FUNC_DY, FUNC_DZ)
2868         ider1 = ga_gb_function - 500
2869         la_max_local = la_max + 1
2870         la_min_local = MAX(la_min - 1, 0)
2871         lb_max_local = lb_max + 1
2872         lb_min_local = MAX(lb_min - 1, 0)
2873         ! create a new pab_local so that mapping pab_local with pgf_a pgf_b
2874         ! is equivalent to mapping pab with
2875         !   d_{ider1} pgf_a d_{ider1} pgf_b
2876         ! dx pgf_a dx pgf_b =
2877         !        (lax pgf_{a-1x})*(lbx pgf_{b-1x}) - 2*zetb*lax*pgf_{a-1x}*pgf_{b+1x} -
2878         !         lbx pgf_{b-1x}*2*zeta*pgf_{a+1x}+ 4*zeta*zetab*pgf_{a+1x}pgf_{b+1x}
2879
2880         ALLOCATE (pab_local(ncoset(la_max_local), ncoset(lb_max_local)))
2881         pab_local = 0.0_dp
2882         DO lxa = 0, la_max
2883         DO lxb = 0, lb_max
2884            DO lya = 0, la_max - lxa
2885            DO lyb = 0, lb_max - lxb
2886               DO lza = MAX(la_min - lxa - lya, 0), la_max - lxa - lya
2887               DO lzb = MAX(lb_min - lxb - lyb, 0), lb_max - lxb - lyb
2888                  ! this element of pab results in 4 elements of pab_local
2889                  CALL prepare_dIadIb(pab_local, pab, ider1, &
2890                                      lxa, lya, lza, lxb, lyb, lzb, o1, o2, zeta, zetb)
2891               END DO
2892               END DO
2893            END DO
2894            END DO
2895         END DO
2896         END DO
2897         o1_local = 0
2898         o2_local = 0
2899      CASE (FUNC_DXDY, FUNC_DYDZ, FUNC_DZDX)
2900         ider1 = ga_gb_function - 600
2901         ider2 = ga_gb_function - 600 + 1
2902         IF (ider2 > 3) ider2 = ider1 - 2
2903         la_max_local = la_max + 2
2904         la_min_local = MAX(la_min - 2, 0)
2905         lb_max_local = lb_max + 2
2906         lb_min_local = MAX(lb_min - 2, 0)
2907         ! create a new pab_local so that mapping pab_local with pgf_a pgf_b
2908         ! is equivalent to mapping pab with
2909         !   d_{ider1} pgf_a d_{ider1} pgf_b
2910         ALLOCATE (pab_local(ncoset(la_max_local), ncoset(lb_max_local)))
2911         pab_local = 0.0_dp
2912         DO lxa = 0, la_max
2913         DO lxb = 0, lb_max
2914            DO lya = 0, la_max - lxa
2915            DO lyb = 0, lb_max - lxb
2916               DO lza = MAX(la_min - lxa - lya, 0), la_max - lxa - lya
2917               DO lzb = MAX(lb_min - lxb - lyb, 0), lb_max - lxb - lyb
2918                  ! this element of pab results in 16 elements of pab_local
2919                  CALL prepare_dijadijb(pab_local, pab, ider1, ider2, &
2920                                        lxa, lya, lza, lxb, lyb, lzb, o1, o2, zeta, zetb)
2921               END DO
2922               END DO
2923            END DO
2924            END DO
2925         END DO
2926         END DO
2927         o1_local = 0
2928         o2_local = 0
2929      CASE (FUNC_DXDX, FUNC_DYDY, FUNC_DZDZ)
2930         ider1 = ga_gb_function - 603
2931         la_max_local = la_max + 2
2932         la_min_local = MAX(la_min - 2, 0)
2933         lb_max_local = lb_max + 2
2934         lb_min_local = MAX(lb_min - 2, 0)
2935         ! create a new pab_local so that mapping pab_local with pgf_a pgf_b
2936         ! is equivalent to mapping pab with
2937         !   dd_{ider1} pgf_a dd_{ider1} pgf_b
2938
2939         ALLOCATE (pab_local(ncoset(la_max_local), ncoset(lb_max_local)))
2940         pab_local = 0.0_dp
2941         DO lxa = 0, la_max
2942         DO lxb = 0, lb_max
2943            DO lya = 0, la_max - lxa
2944            DO lyb = 0, lb_max - lxb
2945               DO lza = MAX(la_min - lxa - lya, 0), la_max - lxa - lya
2946               DO lzb = MAX(lb_min - lxb - lyb, 0), lb_max - lxb - lyb
2947                  ! this element of pab results in 9 elements of pab_local
2948                  CALL prepare_diiadiib(pab_local, pab, ider1, &
2949                                        lxa, lya, lza, lxb, lyb, lzb, o1, o2, zeta, zetb)
2950               END DO
2951               END DO
2952            END DO
2953            END DO
2954         END DO
2955         END DO
2956         o1_local = 0
2957         o2_local = 0
2958      CASE (FUNC_ARDBmDARB)
2959         CPASSERT(PRESENT(idir))
2960         CPASSERT(PRESENT(ir))
2961         la_max_local = la_max + 1
2962         la_min_local = MAX(la_min - 1, 0)
2963         lb_max_local = lb_max + 2
2964         lb_min_local = MAX(lb_min - 1, 0)
2965         ! create a new pab_local so that mapping pab_local with pgf_a pgf_b
2966         ! is equivalent to mapping pab with
2967         ! pgf_a (r-Rb)_{ir} (nabla_{idir} pgf_b) - (nabla_{idir} pgf_a) (r-Rb)_{ir}  pgf_b
2968         ! ( pgf_a )(r-Rb)_{ir} (ddx pgf_b) - (ddx pgf_a) (r-Rb)_{ir} ( pgf_b ) =
2969         !                        pgf_a *(lbx pgf_{b-1x+1ir} - 2*zetb*pgf_{b+1x+1ir}) -
2970         !                       (lax pgf_{a-1x} - 2*zeta*pgf_{a+1x}) pgf_{b+1ir}
2971
2972         ALLOCATE (pab_local(ncoset(la_max_local), ncoset(lb_max_local)))
2973         pab_local = 0.0_dp
2974         DO lxa = 0, la_max
2975         DO lxb = 0, lb_max
2976            DO lya = 0, la_max - lxa
2977            DO lyb = 0, lb_max - lxb
2978               DO lza = MAX(la_min - lxa - lya, 0), la_max - lxa - lya
2979               DO lzb = MAX(lb_min - lxb - lyb, 0), lb_max - lxb - lyb
2980
2981                  ! this element of pab results in 4 elements of pab_local
2982                  CALL prepare_ardb_m_darb(pab_local, pab, idir, ir, &
2983                                           lxa, lya, lza, lxb, lyb, lzb, o1, o2, zeta, zetb)
2984               END DO
2985               END DO
2986            END DO
2987            END DO
2988         END DO
2989         END DO
2990         o1_local = 0
2991         o2_local = 0
2992      CASE (FUNC_ARB)
2993         CPASSERT(PRESENT(ir))
2994         la_max_local = la_max
2995         la_min_local = la_min
2996         lb_max_local = lb_max + 1
2997         lb_min_local = lb_min
2998         ! create a new pab_local so that mapping pab_local with pgf_a pgf_b
2999         ! is equivalent to mapping pab with
3000         ! pgf_a (r-Rb)_{ir} pgf_b = pgf_a * pgf_{b+1ir}
3001         ALLOCATE (pab_local(ncoset(la_max_local), ncoset(lb_max_local)))
3002         pab_local = 0.0_dp
3003         DO lxa = 0, la_max
3004         DO lxb = 0, lb_max
3005            DO lya = 0, la_max - lxa
3006            DO lyb = 0, lb_max - lxb
3007               DO lza = MAX(la_min - lxa - lya, 0), la_max - lxa - lya
3008               DO lzb = MAX(lb_min - lxb - lyb, 0), lb_max - lxb - lyb
3009                  ! this element of pab results in 4 elements of pab_local
3010                  CALL prepare_arb(pab_local, pab, ir, lxa, lya, lza, lxb, lyb, lzb, o1, o2)
3011               END DO
3012               END DO
3013            END DO
3014            END DO
3015         END DO
3016         END DO
3017         o1_local = 0
3018         o2_local = 0
3019      CASE (FUNC_AB)
3020         la_max_local = la_max
3021         la_min_local = la_min
3022         lb_max_local = lb_max
3023         lb_min_local = lb_min
3024         pab_local => pab
3025         o1_local = o1
3026         o2_local = o2
3027      CASE DEFAULT
3028         CPASSERT(.FALSE.)
3029      END SELECT
3030
3031      ng(:) = rsgrid%desc%npts(:)
3032      grid => rsgrid%r(:, :, :)
3033      gridbounds(1, 1) = LBOUND(GRID, 1)
3034      gridbounds(2, 1) = UBOUND(GRID, 1)
3035      gridbounds(1, 2) = LBOUND(GRID, 2)
3036      gridbounds(2, 2) = UBOUND(GRID, 2)
3037      gridbounds(1, 3) = LBOUND(GRID, 3)
3038      gridbounds(2, 3) = UBOUND(GRID, 3)
3039
3040      IF (PRESENT(ir) .AND. PRESENT(rsgauge) .AND. PRESENT(rsbuf)) THEN
3041         grid => rsbuf%r(:, :, :)
3042         grid_tmp => rsgrid%r(:, :, :)
3043         gauge => rsgauge%r(:, :, :)
3044      ENDIF
3045
3046!   *** initialise the coefficient matrix, we transform the sum
3047!
3048!   sum_{lxa,lya,lza,lxb,lyb,lzb} P_{lxa,lya,lza,lxb,lyb,lzb} *
3049!           (x-a_x)**lxa (y-a_y)**lya (z-a_z)**lza (x-b_x)**lxb (y-a_y)**lya (z-a_z)**lza
3050!
3051!   into
3052!
3053!   sum_{lxp,lyp,lzp} P_{lxp,lyp,lzp} (x-p_x)**lxp (y-p_y)**lyp (z-p_z)**lzp
3054!
3055!   where p is center of the product gaussian, and lp = la_max + lb_max
3056!   (current implementation is l**7)
3057!
3058      lp = la_max_local + lb_max_local
3059!
3060!   compute polynomial expansion coefs -> (x-a)**lxa (x-b)**lxb -> sum_{ls} alpha(ls,lxa,lxb,1)*(x-p)**ls
3061!
3062!   *** make the alpha matrix ***
3063      alpha(:, :, :, :) = 0.0_dp
3064      DO iaxis = 1, 3
3065      DO lxa = 0, la_max_local
3066      DO lxb = 0, lb_max_local
3067         binomial_k_lxa = 1.0_dp
3068         a = 1.0_dp
3069         DO k = 0, lxa
3070            binomial_l_lxb = 1.0_dp
3071            b = 1.0_dp
3072            DO l = 0, lxb
3073               alpha(lxa - l + lxb - k, lxa, lxb, iaxis) = alpha(lxa - l + lxb - k, lxa, lxb, iaxis) + &
3074                                                           binomial_k_lxa*binomial_l_lxb*a*b
3075               binomial_l_lxb = binomial_l_lxb*REAL(lxb - l, dp)/REAL(l + 1, dp)
3076               b = b*(rp(iaxis) - (ra(iaxis) + rab(iaxis)))
3077            ENDDO
3078            binomial_k_lxa = binomial_k_lxa*REAL(lxa - k, dp)/REAL(k + 1, dp)
3079            a = a*(-ra(iaxis) + rp(iaxis))
3080         ENDDO
3081      ENDDO
3082      ENDDO
3083      ENDDO
3084
3085!
3086!   compute P_{lxp,lyp,lzp} given P_{lxa,lya,lza,lxb,lyb,lzb} and alpha(ls,lxa,lxb,1)
3087!   use a three step procedure
3088!   we don't store zeros, so counting is done using lxyz,lxy in order to have contiguous memory access in collocate_fast.F
3089!
3090      lxyz = 0
3091      DO lzp = 0, lp
3092      DO lyp = 0, lp - lzp
3093      DO lxp = 0, lp - lzp - lyp
3094         lxyz = lxyz + 1
3095         coef_xyz(lxyz) = 0.0_dp
3096      ENDDO
3097      ENDDO
3098      ENDDO
3099      DO lzb = 0, lb_max_local
3100      DO lza = 0, la_max_local
3101         lxy = 0
3102         DO lyp = 0, lp - lza - lzb
3103            DO lxp = 0, lp - lza - lzb - lyp
3104               lxy = lxy + 1
3105               coef_xyt(lxy) = 0.0_dp
3106            ENDDO
3107            lxy = lxy + lza + lzb
3108         ENDDO
3109         DO lyb = 0, lb_max_local - lzb
3110         DO lya = 0, la_max_local - lza
3111            lxpm = (lb_max_local - lzb - lyb) + (la_max_local - lza - lya)
3112            coef_xtt(0:lxpm) = 0.0_dp
3113            DO lxb = MAX(lb_min_local - lzb - lyb, 0), lb_max_local - lzb - lyb
3114            DO lxa = MAX(la_min_local - lza - lya, 0), la_max_local - lza - lya
3115               ico = coset(lxa, lya, lza)
3116               jco = coset(lxb, lyb, lzb)
3117               p_ele = prefactor*pab_local(o1_local + ico, o2_local + jco)
3118               DO lxp = 0, lxa + lxb
3119                  coef_xtt(lxp) = coef_xtt(lxp) + p_ele*alpha(lxp, lxa, lxb, 1)
3120               ENDDO
3121            ENDDO
3122            ENDDO
3123            lxy = 0
3124            DO lyp = 0, lya + lyb
3125               DO lxp = 0, lp - lza - lzb - lya - lyb
3126                  lxy = lxy + 1
3127                  coef_xyt(lxy) = coef_xyt(lxy) + alpha(lyp, lya, lyb, 2)*coef_xtt(lxp)
3128               ENDDO
3129               lxy = lxy + lza + lzb + lya + lyb - lyp
3130            ENDDO
3131         ENDDO
3132         ENDDO
3133         lxyz = 0
3134         DO lzp = 0, lza + lzb
3135            lxy = 0
3136            DO lyp = 0, lp - lza - lzb
3137               DO lxp = 0, lp - lza - lzb - lyp
3138                  lxy = lxy + 1; lxyz = lxyz + 1
3139                  coef_xyz(lxyz) = coef_xyz(lxyz) + alpha(lzp, lza, lzb, 3)*coef_xyt(lxy)
3140               ENDDO
3141               lxy = lxy + lza + lzb; lxyz = lxyz + lza + lzb - lzp
3142            ENDDO
3143            DO lyp = lp - lza - lzb + 1, lp - lzp
3144               DO lxp = 0, lp - lyp - lzp
3145                  lxyz = lxyz + 1
3146               ENDDO
3147            ENDDO
3148         ENDDO
3149      ENDDO
3150      ENDDO
3151
3152      IF (subpatch_collocate) THEN
3153         CALL collocate_general_subpatch()
3154      ELSE
3155         IF (rsgrid%desc%orthorhombic) THEN
3156            CALL collocate_ortho()
3157            ! CALL collocate_general()
3158         ELSE
3159            CALL collocate_general_wings()
3160            !CALL collocate_general_opt()
3161         END IF
3162      END IF
3163
3164      IF (ga_gb_function /= FUNC_AB) THEN
3165         DEALLOCATE (pab_local)
3166      ENDIF
3167
3168   CONTAINS
3169
3170      !
3171      ! this treats efficiently the orthogonal case
3172      !
3173! **************************************************************************************************
3174!> \brief ...
3175! **************************************************************************************************
3176      SUBROUTINE collocate_ortho()
3177
3178!   *** properties of the grid ***
3179
3180         ! notice we're in the ortho case
3181         dr(1) = rsgrid%desc%dh(1, 1)
3182         dr(2) = rsgrid%desc%dh(2, 2)
3183         dr(3) = rsgrid%desc%dh(3, 3)
3184
3185!   *** get the sub grid properties for the given radius ***
3186         CALL return_cube(cube_info, radius, lb_cube, ub_cube, sphere_bounds)
3187         cmax = MAXVAL(ub_cube)
3188
3189!   *** position of the gaussian product
3190!
3191!   this is the actual definition of the position on the grid
3192!   i.e. a point rp(:) gets here grid coordinates
3193!   MODULO(rp(:)/dr(:),ng(:))+1
3194!   hence (0.0,0.0,0.0) in real space is rsgrid%lb on the rsgrid ((1,1,1) on grid)
3195!
3196
3197         ALLOCATE (map(-cmax:cmax, 3))
3198         CALL compute_cube_center(cubecenter, rsgrid%desc, zeta, zetb, ra, rab)
3199         roffset(:) = rp(:) - REAL(cubecenter(:), dp)*dr(:)
3200!   *** a mapping so that the ig corresponds to the right grid point
3201         DO i = 1, 3
3202            IF (rsgrid%desc%perd(i) == 1) THEN
3203               start = lb_cube(i)
3204               DO
3205                  offset = MODULO(cubecenter(i) + start, ng(i)) + 1 - start
3206                  length = MIN(ub_cube(i), ng(i) - offset) - start
3207                  DO ig = start, start + length
3208                     map(ig, i) = ig + offset
3209                  END DO
3210                  IF (start + length .GE. ub_cube(i)) EXIT
3211                  start = start + length + 1
3212               END DO
3213            ELSE
3214               ! this takes partial grid + border regions into account
3215               offset = MODULO(cubecenter(i) + lb_cube(i) + rsgrid%desc%lb(i) - rsgrid%lb_local(i), ng(i)) + 1 - lb_cube(i)
3216               ! check for out of bounds
3217               IF (ub_cube(i) + offset > UBOUND(grid, i) .OR. lb_cube(i) + offset < LBOUND(grid, i)) THEN
3218                  CPASSERT(.FALSE.)
3219               ENDIF
3220               DO ig = lb_cube(i), ub_cube(i)
3221                  map(ig, i) = ig + offset
3222               END DO
3223            END IF
3224         ENDDO
3225         ALLOCATE (pol_z(1:2, 0:lp, -cmax:0))
3226         ALLOCATE (pol_y(1:2, 0:lp, -cmax:0))
3227         ALLOCATE (pol_x(0:lp, -cmax:cmax))
3228
3229         IF (PRESENT(ir) .AND. PRESENT(rsgauge)) CALL collocate_ortho_set_to_0()
3230
3231#include "prep.f90"
3232
3233         IF (PRESENT(lgrid)) THEN
3234#include "call_collocate_omp.f90"
3235         ELSE
3236#include "call_collocate.f90"
3237         END IF
3238
3239         IF (PRESENT(ir) .AND. PRESENT(rsgauge)) CALL collocate_gauge_ortho()
3240
3241         ! deallocation needed to pass around a pgi bug..
3242         DEALLOCATE (pol_z)
3243         DEALLOCATE (pol_y)
3244         DEALLOCATE (pol_x)
3245         DEALLOCATE (map)
3246
3247      END SUBROUTINE collocate_ortho
3248
3249! **************************************************************************************************
3250!> \brief ...
3251! **************************************************************************************************
3252      SUBROUTINE collocate_gauge_ortho()
3253      INTEGER                                            :: i, igmax, igmin, j, j2, jg, jg2, jgmin, &
3254                                                            k, k2, kg, kg2, kgmin, sci
3255      REAL(KIND=dp)                                      :: point(3, 4), res(4), x, y, y2, z, z2
3256
3257! notice we're in the ortho case
3258
3259         dr(1) = rsgrid%desc%dh(1, 1)
3260         dr(2) = rsgrid%desc%dh(2, 2)
3261         dr(3) = rsgrid%desc%dh(3, 3)
3262         !
3263         sci = 1
3264         kgmin = sphere_bounds(sci)
3265         sci = sci + 1
3266         DO kg = kgmin, 0
3267            kg2 = 1 - kg
3268            k = map(kg, 3)
3269            k2 = map(kg2, 3)
3270            jgmin = sphere_bounds(sci)
3271            sci = sci + 1
3272            z = (REAL(kg, dp) + REAL(cubecenter(3), dp))*dr(3)
3273            z2 = (REAL(kg2, dp) + REAL(cubecenter(3), dp))*dr(3)
3274            DO jg = jgmin, 0
3275               jg2 = 1 - jg
3276               j = map(jg, 2)
3277               j2 = map(jg2, 2)
3278               igmin = sphere_bounds(sci)
3279               sci = sci + 1
3280               igmax = 1 - igmin
3281               y = (REAL(jg, dp) + REAL(cubecenter(2), dp))*dr(2)
3282               y2 = (REAL(jg2, dp) + REAL(cubecenter(2), dp))*dr(2)
3283               DO ig = igmin, igmax
3284                  i = map(ig, 1)
3285                  x = (REAL(ig, dp) + REAL(cubecenter(1), dp))*dr(1)
3286                  point(1, 1) = x; point(2, 1) = y; point(3, 1) = z
3287                  point(1, 2) = x; point(2, 2) = y2; point(3, 2) = z
3288                  point(1, 3) = x; point(2, 3) = y; point(3, 3) = z2
3289                  point(1, 4) = x; point(2, 4) = y2; point(3, 4) = z2
3290                  !
3291                  res(1) = (point(ir, 1) - rb(ir)) - gauge(i, j, k)
3292                  res(2) = (point(ir, 2) - rb(ir)) - gauge(i, j2, k)
3293                  res(3) = (point(ir, 3) - rb(ir)) - gauge(i, j, k2)
3294                  res(4) = (point(ir, 4) - rb(ir)) - gauge(i, j2, k2)
3295                  !
3296                  grid_tmp(i, j, k) = grid_tmp(i, j, k) + grid(i, j, k)*res(1)
3297                  grid_tmp(i, j2, k) = grid_tmp(i, j2, k) + grid(i, j2, k)*res(2)
3298                  grid_tmp(i, j, k2) = grid_tmp(i, j, k2) + grid(i, j, k2)*res(3)
3299                  grid_tmp(i, j2, k2) = grid_tmp(i, j2, k2) + grid(i, j2, k2)*res(4)
3300               ENDDO
3301            ENDDO
3302         ENDDO
3303      END SUBROUTINE collocate_gauge_ortho
3304
3305! **************************************************************************************************
3306!> \brief ...
3307! **************************************************************************************************
3308      SUBROUTINE collocate_ortho_set_to_0()
3309      INTEGER                                            :: i, igmax, igmin, j, j2, jg, jg2, jgmin, &
3310                                                            k, k2, kg, kg2, kgmin, sci
3311
3312!
3313
3314         dr(1) = rsgrid%desc%dh(1, 1)
3315         dr(2) = rsgrid%desc%dh(2, 2)
3316         dr(3) = rsgrid%desc%dh(3, 3)
3317         !
3318         sci = 1
3319         kgmin = sphere_bounds(sci)
3320         sci = sci + 1
3321         DO kg = kgmin, 0
3322            kg2 = 1 - kg
3323            k = map(kg, 3)
3324            k2 = map(kg2, 3)
3325            jgmin = sphere_bounds(sci)
3326            sci = sci + 1
3327            DO jg = jgmin, 0
3328               jg2 = 1 - jg
3329               j = map(jg, 2)
3330               j2 = map(jg2, 2)
3331               igmin = sphere_bounds(sci)
3332               sci = sci + 1
3333               igmax = 1 - igmin
3334               DO ig = igmin, igmax
3335                  i = map(ig, 1)
3336                  grid(i, j, k) = 0.0_dp
3337                  grid(i, j2, k) = 0.0_dp
3338                  grid(i, j, k2) = 0.0_dp
3339                  grid(i, j2, k2) = 0.0_dp
3340               ENDDO
3341            ENDDO
3342         ENDDO
3343      END SUBROUTINE collocate_ortho_set_to_0
3344
3345!
3346!   this is a general 'optimized' routine to do the collocation
3347!
3348! **************************************************************************************************
3349!> \brief ...
3350! **************************************************************************************************
3351      SUBROUTINE collocate_general_opt()
3352
3353      INTEGER :: i, i_index, il, ilx, ily, ilz, index_max(3), index_min(3), ismax, ismin, j, &
3354         j_index, jl, jlx, jly, jlz, k, k_index, kl, klx, kly, klz, lpx, lpy, lpz, lx, ly, lz, &
3355         offset(3)
3356      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: grid_map
3357      INTEGER, ALLOCATABLE, DIMENSION(:, :, :)           :: coef_map
3358      REAL(KIND=dp)                                      :: a, b, c, d, di, dip, dj, djp, dk, dkp, &
3359                                                            exp0i, exp1i, exp2i, gp(3), &
3360                                                            hmatgrid(3, 3), pointj(3), pointk(3), &
3361                                                            res, v(3)
3362      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: coef_ijk
3363      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: hmatgridp
3364
3365!
3366! transform P_{lxp,lyp,lzp} into a P_{lip,ljp,lkp} such that
3367! sum_{lxp,lyp,lzp} P_{lxp,lyp,lzp} (x-x_p)**lxp (y-y_p)**lyp (z-z_p)**lzp =
3368! sum_{lip,ljp,lkp} P_{lip,ljp,lkp} (i-i_p)**lip (j-j_p)**ljp (k-k_p)**lkp
3369!
3370
3371         ALLOCATE (coef_ijk(((lp + 1)*(lp + 2)*(lp + 3))/6))
3372
3373         ! aux mapping array to simplify life
3374         ALLOCATE (coef_map(0:lp, 0:lp, 0:lp))
3375         coef_map = HUGE(coef_map)
3376         lxyz = 0
3377         DO lzp = 0, lp
3378         DO lyp = 0, lp - lzp
3379         DO lxp = 0, lp - lzp - lyp
3380            lxyz = lxyz + 1
3381            coef_ijk(lxyz) = 0.0_dp
3382            coef_map(lxp, lyp, lzp) = lxyz
3383         ENDDO
3384         ENDDO
3385         ENDDO
3386
3387         ! cell hmat in grid points
3388         hmatgrid = rsgrid%desc%dh
3389
3390         ! center in grid coords
3391         gp = MATMUL(rsgrid%desc%dh_inv, rp)
3392         cubecenter(:) = FLOOR(gp)
3393
3394         ! transform using multinomials
3395         ALLOCATE (hmatgridp(3, 3, 0:lp))
3396         hmatgridp(:, :, 0) = 1.0_dp
3397         DO k = 1, lp
3398            hmatgridp(:, :, k) = hmatgridp(:, :, k - 1)*hmatgrid(:, :)
3399         ENDDO
3400
3401         lpx = lp
3402         DO klx = 0, lpx
3403         DO jlx = 0, lpx - klx
3404         DO ilx = 0, lpx - klx - jlx
3405            lx = ilx + jlx + klx
3406            lpy = lp - lx
3407            DO kly = 0, lpy
3408            DO jly = 0, lpy - kly
3409            DO ily = 0, lpy - kly - jly
3410               ly = ily + jly + kly
3411               lpz = lp - lx - ly
3412               DO klz = 0, lpz
3413               DO jlz = 0, lpz - klz
3414               DO ilz = 0, lpz - klz - jlz
3415                  lz = ilz + jlz + klz
3416
3417                  il = ilx + ily + ilz
3418                  jl = jlx + jly + jlz
3419                  kl = klx + kly + klz
3420                  coef_ijk(coef_map(il, jl, kl)) = &
3421                     coef_ijk(coef_map(il, jl, kl)) + coef_xyz(coef_map(lx, ly, lz))* &
3422                     hmatgridp(1, 1, ilx)*hmatgridp(1, 2, jlx)*hmatgridp(1, 3, klx)* &
3423                     hmatgridp(2, 1, ily)*hmatgridp(2, 2, jly)*hmatgridp(2, 3, kly)* &
3424                     hmatgridp(3, 1, ilz)*hmatgridp(3, 2, jlz)*hmatgridp(3, 3, klz)* &
3425                     fac(lx)*fac(ly)*fac(lz)/ &
3426                     (fac(ilx)*fac(ily)*fac(ilz)*fac(jlx)*fac(jly)*fac(jlz)*fac(klx)*fac(kly)*fac(klz))
3427               ENDDO
3428               ENDDO
3429               ENDDO
3430            ENDDO
3431            ENDDO
3432            ENDDO
3433         ENDDO
3434         ENDDO
3435         ENDDO
3436
3437         CALL return_cube_nonortho(cube_info, radius, index_min, index_max, rp)
3438
3439         offset(:) = MODULO(index_min(:) + rsgrid%desc%lb(:) - rsgrid%lb_local(:), ng(:)) + 1
3440
3441         ALLOCATE (grid_map(index_min(1):index_max(1)))
3442         DO i = index_min(1), index_max(1)
3443            grid_map(i) = MODULO(i, ng(1)) + 1
3444            IF (rsgrid%desc%perd(1) == 1) THEN
3445               grid_map(i) = MODULO(i, ng(1)) + 1
3446            ELSE
3447               grid_map(i) = i - index_min(1) + offset(1)
3448            ENDIF
3449         ENDDO
3450
3451         ! go over the grid, but cycle if the point is not within the radius
3452         DO k = index_min(3), index_max(3)
3453            dk = k - gp(3)
3454            pointk = hmatgrid(:, 3)*dk
3455
3456            IF (rsgrid%desc%perd(3) == 1) THEN
3457               k_index = MODULO(k, ng(3)) + 1
3458            ELSE
3459               k_index = k - index_min(3) + offset(3)
3460            ENDIF
3461
3462            coef_xyt = 0.0_dp
3463            lxyz = 0
3464            dkp = 1.0_dp
3465            DO kl = 0, lp
3466               lxy = 0
3467               DO jl = 0, lp - kl
3468                  DO il = 0, lp - kl - jl
3469                     lxyz = lxyz + 1; lxy = lxy + 1
3470                     coef_xyt(lxy) = coef_xyt(lxy) + coef_ijk(lxyz)*dkp
3471                  ENDDO
3472                  lxy = lxy + kl
3473               ENDDO
3474               dkp = dkp*dk
3475            ENDDO
3476
3477            DO j = index_min(2), index_max(2)
3478               dj = j - gp(2)
3479               pointj = pointk + hmatgrid(:, 2)*dj
3480               IF (rsgrid%desc%perd(2) == 1) THEN
3481                  j_index = MODULO(j, ng(2)) + 1
3482               ELSE
3483                  j_index = j - index_min(2) + offset(2)
3484               ENDIF
3485
3486               coef_xtt = 0.0_dp
3487               lxy = 0
3488               djp = 1.0_dp
3489               DO jl = 0, lp
3490                  DO il = 0, lp - jl
3491                     lxy = lxy + 1
3492                     coef_xtt(il) = coef_xtt(il) + coef_xyt(lxy)*djp
3493                  ENDDO
3494                  djp = djp*dj
3495               ENDDO
3496
3497               ! find bounds for the inner loop
3498               ! based on a quadratic equation in i
3499               ! a*i**2+b*i+c=radius**2
3500               v = pointj - gp(1)*hmatgrid(:, 1)
3501               a = DOT_PRODUCT(hmatgrid(:, 1), hmatgrid(:, 1))
3502               b = 2*DOT_PRODUCT(v, hmatgrid(:, 1))
3503               c = DOT_PRODUCT(v, v)
3504               d = b*b - 4*a*(c - radius**2)
3505
3506               IF (d < 0) THEN
3507                  CYCLE
3508               ELSE
3509                  d = SQRT(d)
3510                  ismin = CEILING((-b - d)/(2*a))
3511                  ismax = FLOOR((-b + d)/(2*a))
3512               ENDIF
3513               ! prepare for computing -zetp*rsq
3514               a = -zetp*a
3515               b = -zetp*b
3516               c = -zetp*c
3517               i = ismin - 1
3518
3519               ! the recursion relation might have to be done
3520               ! from the center of the gaussian (in both directions)
3521               ! instead as the current implementation from an edge
3522               exp2i = EXP((a*i + b)*i + c)
3523               exp1i = EXP(2*a*i + a + b)
3524               exp0i = EXP(2*a)
3525
3526               DO i = ismin, ismax
3527                  di = i - gp(1)
3528
3529                  ! polynomial terms
3530                  res = 0.0_dp
3531                  dip = 1.0_dp
3532                  DO il = 0, lp
3533                     res = res + coef_xtt(il)*dip
3534                     dip = dip*di
3535                  ENDDO
3536
3537                  ! the exponential recursion
3538                  exp2i = exp2i*exp1i
3539                  exp1i = exp1i*exp0i
3540                  res = res*exp2i
3541
3542                  i_index = grid_map(i)
3543                  IF (PRESENT(lgrid)) THEN
3544                     ig = (k_index - 1)*ng(2)*ng(1) + (j_index - 1)*ng(1) + (i_index - 1) + 1
3545                     lgrid%r(ig, ithread_l) = lgrid%r(ig, ithread_l) + res
3546                  ELSE
3547                     grid(i_index, j_index, k_index) = grid(i_index, j_index, k_index) + res
3548                  ENDIF
3549               ENDDO
3550            ENDDO
3551         ENDDO
3552         !t2=nanotime_ia32()
3553         !write(*,*) t2-t1
3554         ! deallocation needed to pass around a pgi bug..
3555         DEALLOCATE (coef_ijk)
3556         DEALLOCATE (coef_map)
3557         DEALLOCATE (hmatgridp)
3558         DEALLOCATE (grid_map)
3559
3560      END SUBROUTINE collocate_general_opt
3561
3562! **************************************************************************************************
3563!> \brief ...
3564! **************************************************************************************************
3565      SUBROUTINE collocate_general_subpatch()
3566      INTEGER, DIMENSION(2, 3)                           :: local_b
3567      INTEGER, DIMENSION(3)                              :: local_s, periodic
3568      REAL(dp), DIMENSION((lp+1)*(lp+2)*(lp+3)/6)        :: poly_d3
3569
3570         periodic = 1 ! cell%perd
3571         CALL poly_cp2k2d3(coef_xyz, lp, poly_d3)
3572         local_b(1, :) = rsgrid%lb_real - rsgrid%desc%lb
3573         local_b(2, :) = rsgrid%ub_real - rsgrid%desc%lb
3574         local_s = rsgrid%lb_real - rsgrid%lb_local
3575         IF (BTEST(subpatch_pattern, 0)) local_b(1, 1) = local_b(1, 1) - rsgrid%desc%border
3576         IF (BTEST(subpatch_pattern, 1)) local_b(2, 1) = local_b(2, 1) + rsgrid%desc%border
3577         IF (BTEST(subpatch_pattern, 2)) local_b(1, 2) = local_b(1, 2) - rsgrid%desc%border
3578         IF (BTEST(subpatch_pattern, 3)) local_b(2, 2) = local_b(2, 2) + rsgrid%desc%border
3579         IF (BTEST(subpatch_pattern, 4)) local_b(1, 3) = local_b(1, 3) - rsgrid%desc%border
3580         IF (BTEST(subpatch_pattern, 5)) local_b(2, 3) = local_b(2, 3) + rsgrid%desc%border
3581         IF (BTEST(subpatch_pattern, 0)) local_s(1) = local_s(1) - rsgrid%desc%border
3582         IF (BTEST(subpatch_pattern, 2)) local_s(2) = local_s(2) - rsgrid%desc%border
3583         IF (BTEST(subpatch_pattern, 4)) local_s(3) = local_s(3) - rsgrid%desc%border
3584         IF (PRESENT(lgrid)) THEN
3585            CALL collocGauss(h=cell%hmat, h_inv=cell%h_inv, &
3586                             grid=grid, poly=poly_d3, alphai=zetp, posi=rp, max_r2=radius*radius, &
3587                             periodic=periodic, gdim=ng, local_bounds=local_b, local_shift=local_s, &
3588                             lgrid=lgrid)
3589         ELSE
3590            CALL collocGauss(h=cell%hmat, h_inv=cell%h_inv, &
3591                             grid=grid, poly=poly_d3, alphai=zetp, posi=rp, max_r2=radius*radius, &
3592                             periodic=periodic, gdim=ng, local_bounds=local_b, local_shift=local_s)
3593         END IF
3594         ! defaults: local_shift=(/0,0,0/),poly_shift=(/0.0_dp,0.0_dp,0.0_dp/),scale=1.0_dp,
3595
3596      END SUBROUTINE
3597
3598! **************************************************************************************************
3599!> \brief ...
3600! **************************************************************************************************
3601      SUBROUTINE collocate_general_wings()
3602      INTEGER, DIMENSION(2, 3)                           :: local_b
3603      INTEGER, DIMENSION(3)                              :: periodic
3604      REAL(dp), DIMENSION((lp+1)*(lp+2)*(lp+3)/6)        :: poly_d3
3605      REAL(dp), DIMENSION(3)                             :: local_shift, rShifted
3606
3607         periodic = 1 ! cell%perd
3608         CALL poly_cp2k2d3(coef_xyz, lp, poly_d3)
3609         local_b(1, :) = 0
3610         local_b(2, :) = MIN(rsgrid%desc%npts - 1, rsgrid%ub_local - rsgrid%lb_local)
3611         local_shift = REAL(rsgrid%desc%lb - rsgrid%lb_local, dp)/REAL(rsgrid%desc%npts, dp)
3612         rShifted(1) = rp(1) + cell%hmat(1, 1)*local_shift(1) &
3613                       + cell%hmat(1, 2)*local_shift(2) &
3614                       + cell%hmat(1, 3)*local_shift(3)
3615         rShifted(2) = rp(2) + cell%hmat(2, 1)*local_shift(1) &
3616                       + cell%hmat(2, 2)*local_shift(2) &
3617                       + cell%hmat(2, 3)*local_shift(3)
3618         rShifted(3) = rp(3) + cell%hmat(3, 1)*local_shift(1) &
3619                       + cell%hmat(3, 2)*local_shift(2) &
3620                       + cell%hmat(3, 3)*local_shift(3)
3621         IF (PRESENT(lgrid)) THEN
3622            CALL collocGauss(h=cell%hmat, h_inv=cell%h_inv, &
3623                             grid=grid, poly=poly_d3, alphai=zetp, posi=rShifted, max_r2=radius*radius, &
3624                             periodic=periodic, gdim=ng, local_bounds=local_b, &
3625                             lgrid=lgrid)
3626         ELSE
3627            CALL collocGauss(h=cell%hmat, h_inv=cell%h_inv, &
3628                             grid=grid, poly=poly_d3, alphai=zetp, posi=rShifted, max_r2=radius*radius, &
3629                             periodic=periodic, gdim=ng, local_bounds=local_b)
3630         END IF
3631         ! defaults: local_shift=(/0,0,0/),poly_shift=(/0.0_dp,0.0_dp,0.0_dp/),scale=1.0_dp,
3632
3633      END SUBROUTINE
3634
3635!
3636!   this is a general 'reference' routine to do the collocation
3637!
3638! **************************************************************************************************
3639!> \brief ...
3640! **************************************************************************************************
3641      SUBROUTINE collocate_general()
3642
3643      INTEGER                                            :: i, index_max(3), index_min(3), &
3644                                                            ipoint(3), j, k
3645      REAL(KIND=dp)                                      :: inv_ng(3), point(3), primpt
3646
3647! still hard-wired (see MODULO)
3648
3649         CPASSERT(ALL(rsgrid%desc%perd == 1))
3650
3651         CALL return_cube_nonortho(cube_info, radius, index_min, index_max, rp)
3652
3653         inv_ng = 1.0_dp/ng
3654
3655         ! go over the grid, but cycle if the point is not within the radius
3656         DO k = index_min(3), index_max(3)
3657         DO j = index_min(2), index_max(2)
3658         DO i = index_min(1), index_max(1)
3659            ! point in real space
3660            point = MATMUL(cell%hmat, REAL((/i, j, k/), KIND=dp)*inv_ng)
3661            ! primitive_value of point
3662            primpt = primitive_value(point)
3663            ! skip if outside of the sphere
3664            IF (SUM((point - rp)**2) > radius**2) CYCLE
3665            ! point on the grid (including pbc)
3666            ipoint = MODULO((/i, j, k/), ng) + 1
3667            ! add to grid
3668            IF (PRESENT(lgrid)) THEN
3669               ig = ipoint(3)*ng(2)*ng(1) + ipoint(2)*ng(1) + ipoint(1) + 1
3670               lgrid%r(ig, ithread_l) = lgrid%r(ig, ithread_l) + primpt
3671            ELSE
3672               grid(ipoint(1), ipoint(2), ipoint(3)) = grid(ipoint(1), ipoint(2), ipoint(3)) + primpt
3673            ENDIF
3674         ENDDO
3675         ENDDO
3676         ENDDO
3677
3678      END SUBROUTINE collocate_general
3679
3680! **************************************************************************************************
3681!> \brief ...
3682!> \param point ...
3683!> \return ...
3684! **************************************************************************************************
3685      FUNCTION primitive_value(point) RESULT(res)
3686      REAL(KIND=dp)                                      :: point(3), res
3687
3688      REAL(KIND=dp)                                      :: dra(3), drap(3), drb(3), drbp(3), myexp, &
3689                                                            pdrap
3690
3691         res = 0.0_dp
3692         myexp = EXP(-zetp*SUM((point - rp)**2))*prefactor
3693         dra = point - ra
3694         drb = point - rb
3695         drap(1) = 1.0_dp
3696         DO lxa = 0, la_max_local
3697            drbp(1) = 1.0_dp
3698            DO lxb = 0, lb_max_local
3699               drap(2) = 1.0_dp
3700               DO lya = 0, la_max_local - lxa
3701                  drbp(2) = 1.0_dp
3702                  DO lyb = 0, lb_max_local - lxb
3703                     drap(3) = 1.0_dp
3704                     DO lza = 1, MAX(la_min_local - lxa - lya, 0)
3705                        drap(3) = drap(3)*dra(3)
3706                     ENDDO
3707                     DO lza = MAX(la_min_local - lxa - lya, 0), la_max_local - lxa - lya
3708                        drbp(3) = 1.0_dp
3709                        DO lzb = 1, MAX(lb_min_local - lxb - lyb, 0)
3710                           drbp(3) = drbp(3)*drb(3)
3711                        ENDDO
3712                        ico = coset(lxa, lya, lza)
3713                        pdrap = PRODUCT(drap)
3714                        DO lzb = MAX(lb_min_local - lxb - lyb, 0), lb_max_local - lxb - lyb
3715                           jco = coset(lxb, lyb, lzb)
3716                           res = res + pab_local(ico + o1_local, jco + o2_local)*myexp*pdrap*PRODUCT(drbp)
3717                           drbp(3) = drbp(3)*drb(3)
3718                        ENDDO
3719                        drap(3) = drap(3)*dra(3)
3720                     ENDDO
3721                     drbp(2) = drbp(2)*drb(2)
3722                  ENDDO
3723                  drap(2) = drap(2)*dra(2)
3724               ENDDO
3725               drbp(1) = drbp(1)*drb(1)
3726            ENDDO
3727            drap(1) = drap(1)*dra(1)
3728         ENDDO
3729
3730      END FUNCTION primitive_value
3731
3732   END SUBROUTINE collocate_pgf_product_rspace
3733
3734! **************************************************************************************************
3735!> \brief low level collocation of primitive gaussian functions in g-space
3736!> \param la_max ...
3737!> \param zeta ...
3738!> \param la_min ...
3739!> \param lb_max ...
3740!> \param zetb ...
3741!> \param lb_min ...
3742!> \param ra ...
3743!> \param rab ...
3744!> \param rab2 ...
3745!> \param scale ...
3746!> \param pab ...
3747!> \param na ...
3748!> \param nb ...
3749!> \param eps_rho_gspace ...
3750!> \param gsq_max ...
3751!> \param pw ...
3752! **************************************************************************************************
3753   SUBROUTINE collocate_pgf_product_gspace(la_max, zeta, la_min, &
3754                                           lb_max, zetb, lb_min, &
3755                                           ra, rab, rab2, scale, pab, na, nb, &
3756                                           eps_rho_gspace, gsq_max, pw)
3757
3758      ! NOTE: this routine is much slower than collocate_pgf_product_rspace
3759
3760      INTEGER, INTENT(IN)                                :: la_max
3761      REAL(dp), INTENT(IN)                               :: zeta
3762      INTEGER, INTENT(IN)                                :: la_min, lb_max
3763      REAL(dp), INTENT(IN)                               :: zetb
3764      INTEGER, INTENT(IN)                                :: lb_min
3765      REAL(dp), DIMENSION(3), INTENT(IN)                 :: ra, rab
3766      REAL(dp), INTENT(IN)                               :: rab2, scale
3767      REAL(dp), DIMENSION(:, :), POINTER                 :: pab
3768      INTEGER, INTENT(IN)                                :: na, nb
3769      REAL(dp), INTENT(IN)                               :: eps_rho_gspace, gsq_max
3770      TYPE(pw_type), POINTER                             :: pw
3771
3772      CHARACTER(LEN=*), PARAMETER :: routineN = 'collocate_pgf_product_gspace', &
3773                                     routineP = moduleN//':'//routineN
3774
3775      COMPLEX(dp), DIMENSION(3)                          :: phasefactor
3776      COMPLEX(dp), DIMENSION(:), POINTER                 :: rag, rbg
3777      COMPLEX(dp), DIMENSION(:, :, :, :), POINTER        :: cubeaxis
3778      INTEGER                                            :: ax, ay, az, bx, by, bz, handle, i, ico, &
3779                                                            ig, ig2, jco, jg, kg, la, lb, &
3780                                                            lb_cube_min, lb_grid, ub_cube_max, &
3781                                                            ub_grid
3782      INTEGER, DIMENSION(3)                              :: lb_cube, ub_cube
3783      REAL(dp)                                           :: f, fa, fb, pij, prefactor, rzetp, &
3784                                                            twozetp, zetp
3785      REAL(dp), DIMENSION(3)                             :: dg, expfactor, fap, fbp, rap, rbp, rp
3786      REAL(dp), DIMENSION(:), POINTER                    :: g
3787
3788      CALL timeset(routineN, handle)
3789
3790      dg(:) = twopi/(pw%pw_grid%npts(:)*pw%pw_grid%dr(:))
3791
3792      zetp = zeta + zetb
3793      rzetp = 1.0_dp/zetp
3794      f = zetb*rzetp
3795      rap(:) = f*rab(:)
3796      rbp(:) = rap(:) - rab(:)
3797      rp(:) = ra(:) + rap(:)
3798      twozetp = 2.0_dp*zetp
3799      fap(:) = twozetp*rap(:)
3800      fbp(:) = twozetp*rbp(:)
3801
3802      prefactor = scale*SQRT((pi*rzetp)**3)*EXP(-zeta*f*rab2)
3803      phasefactor(:) = EXP(CMPLX(0.0_dp, -rp(:)*dg(:), KIND=dp))
3804      expfactor(:) = EXP(-0.25*rzetp*dg(:)*dg(:))
3805
3806      lb_cube(:) = pw%pw_grid%bounds(1, :)
3807      ub_cube(:) = pw%pw_grid%bounds(2, :)
3808
3809      lb_cube_min = MINVAL(lb_cube(:))
3810      ub_cube_max = MAXVAL(ub_cube(:))
3811
3812      NULLIFY (cubeaxis, g, rag, rbg)
3813
3814      CALL reallocate(cubeaxis, lb_cube_min, ub_cube_max, 1, 3, 0, la_max, 0, lb_max)
3815      CALL reallocate(g, lb_cube_min, ub_cube_max)
3816      CALL reallocate(rag, lb_cube_min, ub_cube_max)
3817      CALL reallocate(rbg, lb_cube_min, ub_cube_max)
3818
3819      lb_grid = LBOUND(pw%cc, 1)
3820      ub_grid = UBOUND(pw%cc, 1)
3821
3822      DO i = 1, 3
3823
3824         DO ig = lb_cube(i), ub_cube(i)
3825            ig2 = ig*ig
3826            cubeaxis(ig, i, 0, 0) = expfactor(i)**ig2*phasefactor(i)**ig
3827         END DO
3828
3829         IF (la_max > 0) THEN
3830            DO ig = lb_cube(i), ub_cube(i)
3831               g(ig) = REAL(ig, dp)*dg(i)
3832               rag(ig) = CMPLX(fap(i), -g(ig), KIND=dp)
3833               cubeaxis(ig, i, 1, 0) = rag(ig)*cubeaxis(ig, i, 0, 0)
3834            END DO
3835            DO la = 2, la_max
3836               fa = REAL(la - 1, dp)*twozetp
3837               DO ig = lb_cube(i), ub_cube(i)
3838                  cubeaxis(ig, i, la, 0) = rag(ig)*cubeaxis(ig, i, la - 1, 0) + &
3839                                           fa*cubeaxis(ig, i, la - 2, 0)
3840               END DO
3841            END DO
3842            IF (lb_max > 0) THEN
3843               fa = twozetp
3844               DO ig = lb_cube(i), ub_cube(i)
3845                  rbg(ig) = CMPLX(fbp(i), -g(ig), KIND=dp)
3846                  cubeaxis(ig, i, 0, 1) = rbg(ig)*cubeaxis(ig, i, 0, 0)
3847                  cubeaxis(ig, i, 1, 1) = rbg(ig)*cubeaxis(ig, i, 1, 0) + &
3848                                          fa*cubeaxis(ig, i, 0, 0)
3849               END DO
3850               DO lb = 2, lb_max
3851                  fb = REAL(lb - 1, dp)*twozetp
3852                  DO ig = lb_cube(i), ub_cube(i)
3853                     cubeaxis(ig, i, 0, lb) = rbg(ig)*cubeaxis(ig, i, 0, lb - 1) + &
3854                                              fb*cubeaxis(ig, i, 0, lb - 2)
3855                     cubeaxis(ig, i, 1, lb) = rbg(ig)*cubeaxis(ig, i, 1, lb - 1) + &
3856                                              fb*cubeaxis(ig, i, 1, lb - 2) + &
3857                                              fa*cubeaxis(ig, i, 0, lb - 1)
3858                  END DO
3859               END DO
3860               DO la = 2, la_max
3861                  fa = REAL(la, dp)*twozetp
3862                  DO ig = lb_cube(i), ub_cube(i)
3863                     cubeaxis(ig, i, la, 1) = rbg(ig)*cubeaxis(ig, i, la, 0) + &
3864                                              fa*cubeaxis(ig, i, la - 1, 0)
3865                  END DO
3866                  DO lb = 2, lb_max
3867                     fb = REAL(lb - 1, dp)*twozetp
3868                     DO ig = lb_cube(i), ub_cube(i)
3869                        cubeaxis(ig, i, la, lb) = rbg(ig)*cubeaxis(ig, i, la, lb - 1) + &
3870                                                  fb*cubeaxis(ig, i, la, lb - 2) + &
3871                                                  fa*cubeaxis(ig, i, la - 1, lb - 1)
3872                     END DO
3873                  END DO
3874               END DO
3875            END IF
3876         ELSE
3877            IF (lb_max > 0) THEN
3878               DO ig = lb_cube(i), ub_cube(i)
3879                  g(ig) = REAL(ig, dp)*dg(i)
3880                  rbg(ig) = CMPLX(fbp(i), -g(ig), KIND=dp)
3881                  cubeaxis(ig, i, 0, 1) = rbg(ig)*cubeaxis(ig, i, 0, 0)
3882               END DO
3883               DO lb = 2, lb_max
3884                  fb = REAL(lb - 1, dp)*twozetp
3885                  DO ig = lb_cube(i), ub_cube(i)
3886                     cubeaxis(ig, i, 0, lb) = rbg(ig)*cubeaxis(ig, i, 0, lb - 1) + &
3887                                              fb*cubeaxis(ig, i, 0, lb - 2)
3888                  END DO
3889               END DO
3890            END IF
3891         END IF
3892
3893      END DO
3894
3895      DO la = 0, la_max
3896         DO lb = 0, lb_max
3897            IF (la + lb == 0) CYCLE
3898            fa = (1.0_dp/twozetp)**(la + lb)
3899            DO i = 1, 3
3900               DO ig = lb_cube(i), ub_cube(i)
3901                  cubeaxis(ig, i, la, lb) = fa*cubeaxis(ig, i, la, lb)
3902               END DO
3903            END DO
3904         END DO
3905      END DO
3906
3907      ! Add the current primitive Gaussian function product to grid
3908
3909      DO ico = ncoset(la_min - 1) + 1, ncoset(la_max)
3910
3911         ax = indco(1, ico)
3912         ay = indco(2, ico)
3913         az = indco(3, ico)
3914
3915         DO jco = ncoset(lb_min - 1) + 1, ncoset(lb_max)
3916
3917            pij = prefactor*pab(na + ico, nb + jco)
3918
3919            IF (ABS(pij) < eps_rho_gspace) CYCLE
3920
3921            bx = indco(1, jco)
3922            by = indco(2, jco)
3923            bz = indco(3, jco)
3924
3925            DO i = lb_grid, ub_grid
3926               IF (pw%pw_grid%gsq(i) > gsq_max) CYCLE
3927               ig = pw%pw_grid%g_hat(1, i)
3928               jg = pw%pw_grid%g_hat(2, i)
3929               kg = pw%pw_grid%g_hat(3, i)
3930               pw%cc(i) = pw%cc(i) + pij*cubeaxis(ig, 1, ax, bx)* &
3931                          cubeaxis(jg, 2, ay, by)* &
3932                          cubeaxis(kg, 3, az, bz)
3933            END DO
3934
3935         END DO
3936
3937      END DO
3938
3939      DEALLOCATE (cubeaxis)
3940      DEALLOCATE (g)
3941      DEALLOCATE (rag)
3942      DEALLOCATE (rbg)
3943
3944      CALL timestop(handle)
3945
3946   END SUBROUTINE collocate_pgf_product_gspace
3947
3948END MODULE qs_collocate_density
3949