1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2019  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5! **************************************************************************************************
6!> \brief Methods used with 3-center overlap type integrals containers
7!> \par History
8!>      - none
9!>      - 11.2018 fixed OMP race condition in contract3_o3c routine (A.Bussy)
10!>      - 05.2019 Added a routine to compute 3-center integrals with libint (A.Bussy)
11! **************************************************************************************************
12MODULE qs_o3c_methods
13   USE ai_contraction_sphi,             ONLY: abc_contract
14   USE ai_overlap3,                     ONLY: overlap3
15   USE basis_set_types,                 ONLY: get_gto_basis_set,&
16                                              gto_basis_set_p_type,&
17                                              gto_basis_set_type
18   USE cp_files,                        ONLY: close_file,&
19                                              open_file
20   USE cp_para_types,                   ONLY: cp_para_env_type
21   USE dbcsr_api,                       ONLY: dbcsr_get_block_p,&
22                                              dbcsr_p_type,&
23                                              dbcsr_type
24   USE gamma,                           ONLY: init_md_ftable
25   USE input_constants,                 ONLY: do_potential_coulomb,&
26                                              do_potential_short,&
27                                              do_potential_truncated
28   USE kinds,                           ONLY: dp
29   USE libint_2c_3c,                    ONLY: cutoff_screen_factor,&
30                                              eri_3center,&
31                                              libint_potential_type
32   USE libint_wrapper,                  ONLY: cp_libint_cleanup_3eri,&
33                                              cp_libint_init_3eri,&
34                                              cp_libint_set_contrdepth,&
35                                              cp_libint_t
36   USE orbital_pointers,                ONLY: ncoset
37   USE qs_o3c_types,                    ONLY: &
38        get_o3c_container, get_o3c_iterator_info, get_o3c_vec, o3c_container_type, o3c_iterate, &
39        o3c_iterator_create, o3c_iterator_release, o3c_iterator_type, o3c_vec_type, &
40        set_o3c_container
41   USE t_c_g0,                          ONLY: get_lmax_init,&
42                                              init
43
44!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num
45#include "./base/base_uses.f90"
46
47   IMPLICIT NONE
48
49   PRIVATE
50
51   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_o3c_methods'
52
53   PUBLIC :: calculate_o3c_integrals, contract12_o3c, contract3_o3c, &
54             calculate_o3c_libint_integrals
55
56CONTAINS
57
58! **************************************************************************************************
59!> \brief ...
60!> \param o3c ...
61!> \param calculate_forces ...
62!> \param matrix_p ...
63! **************************************************************************************************
64   SUBROUTINE calculate_o3c_integrals(o3c, calculate_forces, matrix_p)
65      TYPE(o3c_container_type), POINTER                  :: o3c
66      LOGICAL, INTENT(IN), OPTIONAL                      :: calculate_forces
67      TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL, &
68         POINTER                                         :: matrix_p
69
70      CHARACTER(LEN=*), PARAMETER :: routineN = 'calculate_o3c_integrals', &
71         routineP = moduleN//':'//routineN
72
73      INTEGER :: egfa, egfb, egfc, handle, i, iatom, icol, ikind, irow, iset, ispin, j, jatom, &
74         jkind, jset, katom, kkind, kset, mepos, ncoa, ncob, ncoc, ni, nj, nk, nseta, nsetb, &
75         nsetc, nspin, nthread, sgfa, sgfb, sgfc
76      INTEGER, DIMENSION(:), POINTER                     :: la_max, la_min, lb_max, lb_min, lc_max, &
77                                                            lc_min, npgfa, npgfb, npgfc, nsgfa, &
78                                                            nsgfb, nsgfc
79      INTEGER, DIMENSION(:, :), POINTER                  :: first_sgfa, first_sgfb, first_sgfc
80      LOGICAL                                            :: do_force, found, trans
81      REAL(KIND=dp)                                      :: dij, dik, djk, fpre
82      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: pmat
83      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: sabc, sabc_contr
84      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :)  :: iabdc, iadbc, idabc, sabdc, sdabc
85      REAL(KIND=dp), DIMENSION(3)                        :: rij, rik, rjk
86      REAL(KIND=dp), DIMENSION(:), POINTER               :: set_radius_a, set_radius_b, set_radius_c
87      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: fi, fj, fk, pblock, rpgfa, rpgfb, rpgfc, &
88                                                            sphi_a, sphi_b, sphi_c, tvec, zeta, &
89                                                            zetb, zetc
90      REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: iabc
91      TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_list_a, basis_set_list_b, &
92                                                            basis_set_list_c
93      TYPE(gto_basis_set_type), POINTER                  :: basis_set_a, basis_set_b, basis_set_c
94      TYPE(o3c_iterator_type)                            :: o3c_iterator
95
96      CALL timeset(routineN, handle)
97
98      do_force = .FALSE.
99      IF (PRESENT(calculate_forces)) do_force = calculate_forces
100      CALL get_o3c_container(o3c, nspin=nspin)
101
102      ! basis sets
103      CALL get_o3c_container(o3c, basis_set_list_a=basis_set_list_a, &
104                             basis_set_list_b=basis_set_list_b, basis_set_list_c=basis_set_list_c)
105
106      nthread = 1
107!$    nthread = omp_get_max_threads()
108      CALL o3c_iterator_create(o3c, o3c_iterator, nthread=nthread)
109
110!$OMP PARALLEL DEFAULT(NONE) &
111!$OMP SHARED (nthread,o3c_iterator,ncoset,nspin,basis_set_list_a,basis_set_list_b,&
112!$OMP         basis_set_list_c,do_force,matrix_p)&
113!$OMP PRIVATE (mepos,ikind,jkind,kkind,basis_set_a,basis_set_b,basis_set_c,rij,rik,rjk,&
114!$OMP          first_sgfa,la_max,la_min,npgfa,nseta,nsgfa,rpgfa,set_radius_a,sphi_a,zeta,&
115!$OMP          first_sgfb,lb_max,lb_min,npgfb,nsetb,nsgfb,rpgfb,set_radius_b,sphi_b,zetb,&
116!$OMP          first_sgfc,lc_max,lc_min,npgfc,nsetc,nsgfc,rpgfc,set_radius_c,sphi_c,zetc,&
117!$OMP          iset,jset,kset,dij,dik,djk,ni,nj,nk,iabc,idabc,iadbc,iabdc,tvec,fi,fj,fk,ncoa,&
118!$OMP          ncob,ncoc,sabc,sabc_contr,sdabc,sabdc,sgfa,sgfb,sgfc,egfa,egfb,egfc,i,j,&
119!$OMP          pblock,pmat,ispin,iatom,jatom,katom,irow,icol,found,trans,fpre)
120
121      mepos = 0
122!$    mepos = omp_get_thread_num()
123
124      DO WHILE (o3c_iterate(o3c_iterator, mepos=mepos) == 0)
125         CALL get_o3c_iterator_info(o3c_iterator, mepos=mepos, &
126                                    ikind=ikind, jkind=jkind, kkind=kkind, rij=rij, rik=rik, &
127                                    integral=iabc, tvec=tvec, force_i=fi, force_j=fj, force_k=fk)
128         CPASSERT(.NOT. ASSOCIATED(iabc))
129         CPASSERT(.NOT. ASSOCIATED(tvec))
130         CPASSERT(.NOT. ASSOCIATED(fi))
131         CPASSERT(.NOT. ASSOCIATED(fj))
132         CPASSERT(.NOT. ASSOCIATED(fk))
133         ! basis
134         basis_set_a => basis_set_list_a(ikind)%gto_basis_set
135         basis_set_b => basis_set_list_b(jkind)%gto_basis_set
136         basis_set_c => basis_set_list_c(kkind)%gto_basis_set
137         ! center A
138         first_sgfa => basis_set_a%first_sgf
139         la_max => basis_set_a%lmax
140         la_min => basis_set_a%lmin
141         npgfa => basis_set_a%npgf
142         nseta = basis_set_a%nset
143         nsgfa => basis_set_a%nsgf_set
144         rpgfa => basis_set_a%pgf_radius
145         set_radius_a => basis_set_a%set_radius
146         sphi_a => basis_set_a%sphi
147         zeta => basis_set_a%zet
148         ! center B
149         first_sgfb => basis_set_b%first_sgf
150         lb_max => basis_set_b%lmax
151         lb_min => basis_set_b%lmin
152         npgfb => basis_set_b%npgf
153         nsetb = basis_set_b%nset
154         nsgfb => basis_set_b%nsgf_set
155         rpgfb => basis_set_b%pgf_radius
156         set_radius_b => basis_set_b%set_radius
157         sphi_b => basis_set_b%sphi
158         zetb => basis_set_b%zet
159         ! center C (RI)
160         first_sgfc => basis_set_c%first_sgf
161         lc_max => basis_set_c%lmax
162         lc_min => basis_set_c%lmin
163         npgfc => basis_set_c%npgf
164         nsetc = basis_set_c%nset
165         nsgfc => basis_set_c%nsgf_set
166         rpgfc => basis_set_c%pgf_radius
167         set_radius_c => basis_set_c%set_radius
168         sphi_c => basis_set_c%sphi
169         zetc => basis_set_c%zet
170
171         ni = SUM(nsgfa)
172         nj = SUM(nsgfb)
173         nk = SUM(nsgfc)
174
175         ALLOCATE (iabc(ni, nj, nk))
176         iabc(:, :, :) = 0.0_dp
177         IF (do_force) THEN
178            ALLOCATE (fi(nk, 3), fj(nk, 3), fk(nk, 3))
179            fi(:, :) = 0.0_dp
180            fj(:, :) = 0.0_dp
181            fk(:, :) = 0.0_dp
182            ALLOCATE (idabc(ni, nj, nk, 3))
183            idabc(:, :, :, :) = 0.0_dp
184            ALLOCATE (iadbc(ni, nj, nk, 3))
185            iadbc(:, :, :, :) = 0.0_dp
186            ALLOCATE (iabdc(ni, nj, nk, 3))
187            iabdc(:, :, :, :) = 0.0_dp
188         ELSE
189            NULLIFY (fi, fj, fk)
190         END IF
191         ALLOCATE (tvec(nk, nspin))
192         tvec(:, :) = 0.0_dp
193
194         rjk(1:3) = rik(1:3) - rij(1:3)
195         dij = NORM2(rij)
196         dik = NORM2(rik)
197         djk = NORM2(rjk)
198
199         DO iset = 1, nseta
200            DO jset = 1, nsetb
201               IF (set_radius_a(iset) + set_radius_b(jset) < dij) CYCLE
202               DO kset = 1, nsetc
203                  IF (set_radius_a(iset) + set_radius_c(kset) < dik) CYCLE
204                  IF (set_radius_b(jset) + set_radius_c(kset) < djk) CYCLE
205
206                  ncoa = npgfa(iset)*ncoset(la_max(iset))
207                  ncob = npgfb(jset)*ncoset(lb_max(jset))
208                  ncoc = npgfc(kset)*ncoset(lc_max(kset))
209
210                  sgfa = first_sgfa(1, iset)
211                  sgfb = first_sgfb(1, jset)
212                  sgfc = first_sgfc(1, kset)
213
214                  egfa = sgfa + nsgfa(iset) - 1
215                  egfb = sgfb + nsgfb(jset) - 1
216                  egfc = sgfc + nsgfc(kset) - 1
217
218                  IF (ncoa*ncob*ncoc > 0) THEN
219                     ALLOCATE (sabc(ncoa, ncob, ncoc))
220                     sabc(:, :, :) = 0.0_dp
221                     IF (do_force) THEN
222                        ALLOCATE (sdabc(ncoa, ncob, ncoc, 3))
223                        sdabc(:, :, :, :) = 0.0_dp
224                        ALLOCATE (sabdc(ncoa, ncob, ncoc, 3))
225                        sabdc(:, :, :, :) = 0.0_dp
226                        CALL overlap3(la_max(iset), npgfa(iset), zeta(:, iset), rpgfa(:, iset), la_min(iset), &
227                                      lb_max(jset), npgfb(jset), zetb(:, jset), rpgfb(:, jset), lb_min(jset), &
228                                      lc_max(kset), npgfc(kset), zetc(:, kset), rpgfc(:, kset), lc_min(kset), &
229                                      rij, dij, rik, dik, rjk, djk, sabc, sdabc, sabdc)
230                     ELSE
231                        CALL overlap3(la_max(iset), npgfa(iset), zeta(:, iset), rpgfa(:, iset), la_min(iset), &
232                                      lb_max(jset), npgfb(jset), zetb(:, jset), rpgfb(:, jset), lb_min(jset), &
233                                      lc_max(kset), npgfc(kset), zetc(:, kset), rpgfc(:, kset), lc_min(kset), &
234                                      rij, dij, rik, dik, rjk, djk, sabc)
235                     END IF
236                     ALLOCATE (sabc_contr(nsgfa(iset), nsgfb(jset), nsgfc(kset)))
237
238                     CALL abc_contract(sabc_contr, sabc, &
239                                       sphi_a(:, sgfa:), sphi_b(:, sgfb:), sphi_c(:, sgfc:), &
240                                       ncoa, ncob, ncoc, nsgfa(iset), nsgfb(jset), nsgfc(kset))
241                     iabc(sgfa:egfa, sgfb:egfb, sgfc:egfc) = &
242                        sabc_contr(1:nsgfa(iset), 1:nsgfb(jset), 1:nsgfc(kset))
243                     IF (do_force) THEN
244                        DO i = 1, 3
245                           CALL abc_contract(sabc_contr, sdabc(:, :, :, i), &
246                                             sphi_a(:, sgfa:), sphi_b(:, sgfb:), sphi_c(:, sgfc:), &
247                                             ncoa, ncob, ncoc, nsgfa(iset), nsgfb(jset), nsgfc(kset))
248                           idabc(sgfa:egfa, sgfb:egfb, sgfc:egfc, i) = &
249                              sabc_contr(1:nsgfa(iset), 1:nsgfb(jset), 1:nsgfc(kset))
250                           CALL abc_contract(sabc_contr, sabdc(:, :, :, i), &
251                                             sphi_a(:, sgfa:), sphi_b(:, sgfb:), sphi_c(:, sgfc:), &
252                                             ncoa, ncob, ncoc, nsgfa(iset), nsgfb(jset), nsgfc(kset))
253                           iabdc(sgfa:egfa, sgfb:egfb, sgfc:egfc, i) = &
254                              sabc_contr(1:nsgfa(iset), 1:nsgfb(jset), 1:nsgfc(kset))
255                        END DO
256                     END IF
257
258                     DEALLOCATE (sabc_contr)
259                     DEALLOCATE (sabc)
260                  END IF
261                  IF (do_force) THEN
262                     DEALLOCATE (sdabc, sabdc)
263                  END IF
264               END DO
265            END DO
266         END DO
267         IF (do_force) THEN
268            ! translational invariance
269            iadbc(:, :, :, :) = -idabc(:, :, :, :) - iabdc(:, :, :, :)
270            !
271            ! get the atom indices
272            CALL get_o3c_iterator_info(o3c_iterator, mepos=mepos, &
273                                       iatom=iatom, jatom=jatom, katom=katom)
274            !
275            ! contract over i and j to get forces
276            IF (iatom <= jatom) THEN
277               irow = iatom
278               icol = jatom
279               trans = .FALSE.
280            ELSE
281               irow = jatom
282               icol = iatom
283               trans = .TRUE.
284            END IF
285            IF (iatom == jatom) THEN
286               fpre = 1.0_dp
287            ELSE
288               fpre = 2.0_dp
289            END IF
290            ALLOCATE (pmat(ni, nj))
291            pmat(:, :) = 0.0_dp
292            DO ispin = 1, nspin
293               CALL dbcsr_get_block_p(matrix=matrix_p(ispin)%matrix, &
294                                      row=irow, col=icol, BLOCK=pblock, found=found)
295               IF (found) THEN
296                  IF (trans) THEN
297                     pmat(:, :) = pmat(:, :) + TRANSPOSE(pblock(:, :))
298                  ELSE
299                     pmat(:, :) = pmat(:, :) + pblock(:, :)
300                  END IF
301               END IF
302            END DO
303            DO i = 1, 3
304               DO j = 1, nk
305                  fi(j, i) = fpre*SUM(pmat(:, :)*idabc(:, :, j, i))
306                  fj(j, i) = fpre*SUM(pmat(:, :)*iadbc(:, :, j, i))
307                  fk(j, i) = fpre*SUM(pmat(:, :)*iabdc(:, :, j, i))
308               END DO
309            END DO
310            DEALLOCATE (pmat)
311            !
312            DEALLOCATE (idabc, iadbc, iabdc)
313         END IF
314         !
315         CALL set_o3c_container(o3c_iterator, mepos=mepos, &
316                                integral=iabc, tvec=tvec, force_i=fi, force_j=fj, force_k=fk)
317
318      END DO
319!$OMP END PARALLEL
320      CALL o3c_iterator_release(o3c_iterator)
321
322      CALL timestop(handle)
323
324   END SUBROUTINE calculate_o3c_integrals
325
326! **************************************************************************************************
327!> \brief Computes the 3-center integrals of the o3c container based on libint for the given operator
328!> \param o3c the 3-center integrals container
329!> \param potential_parameter info about the operator as a libint_potential_type
330!> \param para_env ...
331!> \param eps_screen the screening threshold for sicarding integrals before contraction
332!> \note The static initialization of the libint library needs to be done before hand
333!>       In case the truncated coulomb operator is used, the potential parameter file must be read
334!>       in advance too
335! **************************************************************************************************
336   SUBROUTINE calculate_o3c_libint_integrals(o3c, potential_parameter, para_env, eps_screen)
337
338      TYPE(o3c_container_type), POINTER                  :: o3c
339      TYPE(libint_potential_type)                        :: potential_parameter
340      TYPE(cp_para_env_type), OPTIONAL, POINTER          :: para_env
341      REAL(dp), INTENT(IN), OPTIONAL                     :: eps_screen
342
343      CHARACTER(len=*), PARAMETER :: routineN = 'calculate_o3c_libint_integrals', &
344         routineP = moduleN//':'//routineN
345
346      INTEGER :: egfa, egfb, egfc, handle, i, ibasis, ikind, ilist, imax, iset, jkind, jset, &
347         kkind, kset, m_max, max_nset, maxli, maxlj, maxlk, mepos, nbasis, ncoa, ncob, ncoc, ni, &
348         nj, nk, nseta, nsetb, nsetc, nspin, nthread, sgfa, sgfb, sgfc, unit_id
349      INTEGER, DIMENSION(:), POINTER                     :: la_max, la_min, lb_max, lb_min, lc_max, &
350                                                            lc_min, npgfa, npgfb, npgfc, nsgfa, &
351                                                            nsgfb, nsgfc
352      INTEGER, DIMENSION(:, :), POINTER                  :: first_sgfa, first_sgfb, first_sgfc
353      LOGICAL                                            :: do_screen
354      REAL(dp)                                           :: dij, dik, djk, max_val, my_eps_screen, &
355                                                            screen_radius
356      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: max_contr, max_contra, max_contrb, &
357                                                            max_contrc
358      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: sabc
359      REAL(dp), DIMENSION(3)                             :: ri, rij, rik, rj, rjk, rk
360      REAL(dp), DIMENSION(:), POINTER                    :: set_radius_a, set_radius_b, set_radius_c
361      REAL(dp), DIMENSION(:, :), POINTER                 :: rpgf_a, rpgf_b, rpgf_c, sphi_a, sphi_b, &
362                                                            sphi_c, tvec, zeta, zetb, zetc
363      REAL(dp), DIMENSION(:, :, :), POINTER              :: iabc
364      TYPE(cp_libint_t)                                  :: lib
365      TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_list, basis_set_list_a, &
366                                                            basis_set_list_b, basis_set_list_c
367      TYPE(gto_basis_set_type), POINTER                  :: basis_set, basis_set_a, basis_set_b, &
368                                                            basis_set_c
369      TYPE(o3c_iterator_type)                            :: o3c_iterator
370
371      NULLIFY (basis_set_list_a, basis_set_list_b, basis_set_list_c, basis_set_a, basis_set_b)
372      NULLIFY (basis_set_c, iabc, tvec, first_sgfa, first_sgfb, first_sgfc, la_max, la_min, lb_max)
373      NULLIFY (lb_min, lc_max, lc_min, npgfa, npgfb, npgfc, nsgfa, nsgfb, nsgfc)
374      NULLIFY (basis_set, basis_set_list, set_radius_a, set_radius_b, &
375               set_radius_c)
376
377      CALL timeset(routineN, handle)
378
379      CALL get_o3c_container(o3c, nspin=nspin, basis_set_list_a=basis_set_list_a, &
380                             basis_set_list_b=basis_set_list_b, basis_set_list_c=basis_set_list_c)
381
382      !Need the max l for each basis for libint (and overall max #of sets for screening)
383      nbasis = SIZE(basis_set_list_a)
384      max_nset = 0
385      maxli = 0
386      DO ibasis = 1, nbasis
387         CALL get_gto_basis_set(gto_basis_set=basis_set_list_a(ibasis)%gto_basis_set, &
388                                maxl=imax, nset=iset)
389         maxli = MAX(maxli, imax)
390         max_nset = MAX(max_nset, iset)
391      END DO
392      maxlj = 0
393      DO ibasis = 1, nbasis
394         CALL get_gto_basis_set(gto_basis_set=basis_set_list_b(ibasis)%gto_basis_set, &
395                                maxl=imax, nset=iset)
396         maxlj = MAX(maxlj, imax)
397         max_nset = MAX(max_nset, iset)
398      END DO
399      maxlk = 0
400      DO ibasis = 1, nbasis
401         CALL get_gto_basis_set(gto_basis_set=basis_set_list_c(ibasis)%gto_basis_set, &
402                                maxl=imax, nset=iset)
403         maxlk = MAX(maxlk, imax)
404         max_nset = MAX(max_nset, iset)
405      END DO
406      m_max = maxli + maxlj + maxlk
407
408      !Screening
409      do_screen = .FALSE.
410      IF (PRESENT(eps_screen)) THEN
411         do_screen = .TRUE.
412         my_eps_screen = eps_screen
413      END IF
414      screen_radius = 0.0_dp
415      IF (potential_parameter%potential_type == do_potential_truncated .OR. &
416          potential_parameter%potential_type == do_potential_short) THEN
417
418         screen_radius = potential_parameter%cutoff_radius*cutoff_screen_factor
419      ELSE IF (potential_parameter%potential_type == do_potential_coulomb) THEN
420
421         screen_radius = 1000000.0_dp
422      END IF
423
424      !Init the truncated Coulomb operator
425      IF (potential_parameter%potential_type == do_potential_truncated) THEN
426         CPASSERT(PRESENT(para_env))
427
428         !open the file only if necessary
429         IF (m_max > get_lmax_init()) THEN
430            IF (para_env%mepos == 0) THEN
431               CALL open_file(unit_number=unit_id, file_name=potential_parameter%filename)
432            END IF
433            CALL init(m_max, unit_id, para_env%mepos, para_env%group)
434            IF (para_env%mepos == 0) THEN
435               CALL close_file(unit_id)
436            END IF
437         END IF
438      END IF
439
440      !Inint the initial gamma function before the OMP region as it is not thread safe
441      CALL init_md_ftable(nmax=m_max)
442
443      nthread = 1
444!$    nthread = omp_get_max_threads()
445      CALL o3c_iterator_create(o3c, o3c_iterator, nthread=nthread)
446
447!$OMP PARALLEL DEFAULT(NONE) &
448!$OMP SHARED (nthread,o3c_iterator,nspin,basis_set_list_a, basis_set_list_b,basis_set_list_c,nbasis,&
449!$OMP         maxli,maxlj,maxlk,potential_parameter,ncoset,do_screen,my_eps_screen,max_nset,screen_radius) &
450!$OMP PRIVATE (basis_set_a,basis_set_b,basis_set_c,mepos,ikind,jkind,kkind,iabc,tvec,rij,rik,rjk,dij,dik,djk,&
451!$OMP          first_sgfa,la_max,la_min,npgfa,nseta,nsgfa,zeta,iset,ni,ri,ncoa,sgfa,egfa,sphi_a,rpgf_a,&
452!$OMP          first_sgfb,lb_max,lb_min,npgfb,nsetb,nsgfb,zetb,jset,nj,rj,ncob,sgfb,egfb,sphi_b,rpgf_b,&
453!$OMP          first_sgfc,lc_max,lc_min,npgfc,nsetc,nsgfc,zetc,kset,nk,rk,ncoc,sgfc,egfc,sphi_c,rpgf_c,&
454!$OMP          sabc,lib,max_contra,max_contrb,max_contrc,ibasis,i,basis_set,basis_set_list,ilist,&
455!$OMP          max_contr,max_val,set_radius_a,set_radius_b,set_radius_c)
456
457      mepos = 0
458!$    mepos = omp_get_thread_num()
459
460      !each thread needs its own lib because internal parameters could change at different rates
461      CALL cp_libint_init_3eri(lib, MAX(maxli, maxlj, maxlk))
462      CALL cp_libint_set_contrdepth(lib, 1)
463
464      !get the max_contraction values before we loop, on each thread => least amount of computation
465      !and false sharing
466      IF (do_screen) THEN
467
468         !Allocate max_contraction arrays such that we have a specific value for each set/kind
469         ALLOCATE (max_contr(max_nset, nbasis), max_contra(max_nset, nbasis), &
470                   max_contrb(max_nset, nbasis), max_contrc(max_nset, nbasis))
471
472         !Not the most elegent, but better than copying 3 times the same
473         DO ilist = 1, 3
474
475            IF (ilist == 1) basis_set_list => basis_set_list_a
476            IF (ilist == 2) basis_set_list => basis_set_list_b
477            IF (ilist == 3) basis_set_list => basis_set_list_c
478
479            max_contr = 0.0_dp
480
481            DO ibasis = 1, nbasis
482               basis_set => basis_set_list(ibasis)%gto_basis_set
483
484               DO iset = 1, basis_set%nset
485
486                  ncoa = basis_set%npgf(iset)*ncoset(basis_set%lmax(iset))
487                  sgfa = basis_set%first_sgf(1, iset)
488                  egfa = sgfa + basis_set%nsgf_set(iset) - 1
489
490                  max_contr(iset, ibasis) = &
491                     MAXVAL((/(SUM(ABS(basis_set%sphi(1:ncoa, i))), i=sgfa, egfa)/))
492
493               END DO !iset
494            END DO !ibasis
495
496            IF (ilist == 1) max_contra(:, :) = max_contr(:, :)
497            IF (ilist == 2) max_contrb(:, :) = max_contr(:, :)
498            IF (ilist == 3) max_contrc(:, :) = max_contr(:, :)
499         END DO !ilist
500         DEALLOCATE (max_contr)
501      END IF !do_screen
502
503      DO WHILE (o3c_iterate(o3c_iterator, mepos=mepos) == 0)
504
505         CALL get_o3c_iterator_info(o3c_iterator, mepos=mepos, ikind=ikind, jkind=jkind, &
506                                    kkind=kkind, rij=rij, rik=rik, integral=iabc, tvec=tvec)
507
508         rjk = rik - rij
509
510         !basis
511         basis_set_a => basis_set_list_a(ikind)%gto_basis_set
512         basis_set_b => basis_set_list_b(jkind)%gto_basis_set
513         basis_set_c => basis_set_list_c(kkind)%gto_basis_set
514         ! center A
515         first_sgfa => basis_set_a%first_sgf
516         la_max => basis_set_a%lmax
517         la_min => basis_set_a%lmin
518         npgfa => basis_set_a%npgf
519         nseta = basis_set_a%nset
520         nsgfa => basis_set_a%nsgf_set
521         sphi_a => basis_set_a%sphi
522         zeta => basis_set_a%zet
523         rpgf_a => basis_set_a%pgf_radius
524         set_radius_a => basis_set_a%set_radius
525         ! center B
526         first_sgfb => basis_set_b%first_sgf
527         lb_max => basis_set_b%lmax
528         lb_min => basis_set_b%lmin
529         npgfb => basis_set_b%npgf
530         nsetb = basis_set_b%nset
531         nsgfb => basis_set_b%nsgf_set
532         sphi_b => basis_set_b%sphi
533         zetb => basis_set_b%zet
534         rpgf_b => basis_set_b%pgf_radius
535         set_radius_b => basis_set_b%set_radius
536         ! center C
537         first_sgfc => basis_set_c%first_sgf
538         lc_max => basis_set_c%lmax
539         lc_min => basis_set_c%lmin
540         npgfc => basis_set_c%npgf
541         nsetc = basis_set_c%nset
542         nsgfc => basis_set_c%nsgf_set
543         sphi_c => basis_set_c%sphi
544         zetc => basis_set_c%zet
545         rpgf_c => basis_set_c%pgf_radius
546         set_radius_c => basis_set_c%set_radius
547
548         djk = NORM2(rjk)
549         dij = NORM2(rij)
550         dik = NORM2(rik)
551
552         ni = SUM(nsgfa)
553         nj = SUM(nsgfb)
554         nk = SUM(nsgfc)
555
556         ALLOCATE (iabc(ni, nj, nk))
557         iabc(:, :, :) = 0.0_dp
558
559         ALLOCATE (tvec(nk, nspin))
560         tvec(:, :) = 0.0_dp
561
562         !need positions for libint. Only relative positions are needed => set ri to 0.0
563         ri = 0.0_dp
564         rj = rij ! ri + rij
565         rk = rik ! ri + rik
566
567         DO iset = 1, nseta
568            ncoa = npgfa(iset)*ncoset(la_max(iset))
569            sgfa = first_sgfa(1, iset)
570            egfa = sgfa + nsgfa(iset) - 1
571
572            DO jset = 1, nsetb
573               ncob = npgfb(jset)*ncoset(lb_max(jset))
574               sgfb = first_sgfb(1, jset)
575               egfb = sgfb + nsgfb(jset) - 1
576
577               !screening (overlap)
578               IF (set_radius_a(iset) + set_radius_b(jset) < dij) CYCLE
579
580               DO kset = 1, nsetc
581                  ncoc = npgfc(kset)*ncoset(lc_max(kset))
582                  sgfc = first_sgfc(1, kset)
583                  egfc = sgfc + nsgfc(kset) - 1
584
585                  !screening (potential)
586                  IF (set_radius_a(iset) + set_radius_c(kset) + screen_radius < dik) CYCLE
587                  IF (set_radius_b(jset) + set_radius_c(kset) + screen_radius < djk) CYCLE
588
589                  ALLOCATE (sabc(ncoa, ncob, ncoc))
590                  sabc = 0.0_dp
591
592                  CALL eri_3center(sabc, la_min(iset), la_max(iset), npgfa(iset), zeta(:, iset), rpgf_a(:, iset), &
593                                   ri, lb_min(jset), lb_max(jset), npgfb(jset), zetb(:, jset), rpgf_b(:, jset), &
594                                   rj, lc_min(kset), lc_max(kset), npgfc(kset), zetc(:, kset), rpgf_c(:, kset), &
595                                   rk, dij, dik, djk, lib, potential_parameter)
596
597                  IF (do_screen) THEN
598                     max_val = MAXVAL(ABS(sabc))*max_contra(iset, ikind)*max_contrb(jset, jkind) &
599                               *max_contrc(kset, kkind)
600                     IF (max_val < my_eps_screen) THEN
601                        DEALLOCATE (sabc)
602                        CYCLE
603                     END IF
604                  END IF
605
606                  CALL abc_contract(iabc(sgfa:egfa, sgfb:egfb, sgfc:egfc), sabc, &
607                                    sphi_a(:, sgfa:), sphi_b(:, sgfb:), sphi_c(:, sgfc:), &
608                                    ncoa, ncob, ncoc, nsgfa(iset), nsgfb(jset), nsgfc(kset))
609
610                  DEALLOCATE (sabc)
611
612               END DO !kset
613            END DO !jset
614         END DO !iset
615
616         CALL set_o3c_container(o3c_iterator, mepos=mepos, integral=iabc, tvec=tvec)
617
618      END DO !o3c_iterator
619      CALL cp_libint_cleanup_3eri(lib)
620
621!$OMP END PARALLEL
622      CALL o3c_iterator_release(o3c_iterator)
623
624      CALL timestop(handle)
625
626   END SUBROUTINE calculate_o3c_libint_integrals
627
628! **************************************************************************************************
629!> \brief Contraction of 3-tensor over indices 1 and 2 (assuming symmetry)
630!>        t(k) = sum_ij (ijk)*p(ij)
631!> \param o3c ...
632!> \param matrix_p ...
633! **************************************************************************************************
634   SUBROUTINE contract12_o3c(o3c, matrix_p)
635      TYPE(o3c_container_type), POINTER                  :: o3c
636      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_p
637
638      CHARACTER(LEN=*), PARAMETER :: routineN = 'contract12_o3c', routineP = moduleN//':'//routineN
639
640      INTEGER                                            :: handle, iatom, icol, ik, irow, ispin, &
641                                                            jatom, mepos, nk, nspin, nthread
642      LOGICAL                                            :: found, ijsymmetric, trans
643      REAL(KIND=dp)                                      :: fpre
644      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: pblock, tvec
645      REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: iabc
646      TYPE(o3c_iterator_type)                            :: o3c_iterator
647
648      CALL timeset(routineN, handle)
649
650      nspin = SIZE(matrix_p, 1)
651      CALL get_o3c_container(o3c, ijsymmetric=ijsymmetric)
652      CPASSERT(ijsymmetric)
653
654      nthread = 1
655!$    nthread = omp_get_max_threads()
656      CALL o3c_iterator_create(o3c, o3c_iterator, nthread=nthread)
657
658!$OMP PARALLEL DEFAULT(NONE) &
659!$OMP SHARED (nthread,o3c_iterator,matrix_p,nspin)&
660!$OMP PRIVATE (mepos,ispin,iatom,jatom,ik,nk,irow,icol,iabc,tvec,found,pblock,trans,fpre)
661
662      mepos = 0
663!$    mepos = omp_get_thread_num()
664
665      DO WHILE (o3c_iterate(o3c_iterator, mepos=mepos) == 0)
666         CALL get_o3c_iterator_info(o3c_iterator, mepos=mepos, iatom=iatom, jatom=jatom, &
667                                    integral=iabc, tvec=tvec)
668         nk = SIZE(tvec, 1)
669
670         IF (iatom <= jatom) THEN
671            irow = iatom
672            icol = jatom
673            trans = .FALSE.
674         ELSE
675            irow = jatom
676            icol = iatom
677            trans = .TRUE.
678         END IF
679         IF (iatom == jatom) THEN
680            fpre = 1.0_dp
681         ELSE
682            fpre = 2.0_dp
683         END IF
684
685         DO ispin = 1, nspin
686            CALL dbcsr_get_block_p(matrix=matrix_p(ispin)%matrix, &
687                                   row=irow, col=icol, BLOCK=pblock, found=found)
688            IF (found) THEN
689               IF (trans) THEN
690                  DO ik = 1, nk
691                     tvec(ik, ispin) = fpre*SUM(TRANSPOSE(pblock(:, :))*iabc(:, :, ik))
692                  END DO
693               ELSE
694                  DO ik = 1, nk
695                     tvec(ik, ispin) = fpre*SUM(pblock(:, :)*iabc(:, :, ik))
696                  END DO
697               END IF
698            END IF
699         END DO
700
701      END DO
702!$OMP END PARALLEL
703      CALL o3c_iterator_release(o3c_iterator)
704
705      CALL timestop(handle)
706
707   END SUBROUTINE contract12_o3c
708
709! **************************************************************************************************
710!> \brief Contraction of 3-tensor over index 3
711!>        h(ij) = h(ij) + sum_k (ijk)*v(k)
712!> \param o3c ...
713!> \param vec ...
714!> \param matrix ...
715! **************************************************************************************************
716   SUBROUTINE contract3_o3c(o3c, vec, matrix)
717      TYPE(o3c_container_type), POINTER                  :: o3c
718      TYPE(o3c_vec_type), DIMENSION(:), POINTER          :: vec
719      TYPE(dbcsr_type), POINTER                          :: matrix
720
721      CHARACTER(LEN=*), PARAMETER :: routineN = 'contract3_o3c', routineP = moduleN//':'//routineN
722
723      INTEGER                                            :: handle, iatom, icol, ik, irow, jatom, &
724                                                            katom, mepos, nk, nthread, s1, s2
725      LOGICAL                                            :: found, ijsymmetric, trans
726      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: work
727      REAL(KIND=dp), DIMENSION(:), POINTER               :: v
728      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: pblock
729      REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: iabc
730      TYPE(o3c_iterator_type)                            :: o3c_iterator
731
732      CALL timeset(routineN, handle)
733
734      CALL get_o3c_container(o3c, ijsymmetric=ijsymmetric)
735      CPASSERT(ijsymmetric)
736
737      nthread = 1
738!$    nthread = omp_get_max_threads()
739      CALL o3c_iterator_create(o3c, o3c_iterator, nthread=nthread)
740
741!$OMP PARALLEL DEFAULT(NONE) &
742!$OMP SHARED (nthread,o3c_iterator,vec,matrix)&
743!$OMP PRIVATE (mepos,iabc,iatom,jatom,katom,irow,icol,trans,pblock,v,found,ik,nk,work,s1,s2)
744
745      mepos = 0
746!$    mepos = omp_get_thread_num()
747
748      DO WHILE (o3c_iterate(o3c_iterator, mepos=mepos) == 0)
749         CALL get_o3c_iterator_info(o3c_iterator, mepos=mepos, iatom=iatom, jatom=jatom, katom=katom, &
750                                    integral=iabc)
751
752         CALL get_o3c_vec(vec, katom, v)
753         nk = SIZE(v)
754
755         IF (iatom <= jatom) THEN
756            irow = iatom
757            icol = jatom
758            trans = .FALSE.
759         ELSE
760            irow = jatom
761            icol = iatom
762            trans = .TRUE.
763         END IF
764
765         CALL dbcsr_get_block_p(matrix=matrix, row=irow, col=icol, BLOCK=pblock, found=found)
766
767         IF (found) THEN
768            s1 = SIZE(pblock, 1); s2 = SIZE(pblock, 2)
769            ALLOCATE (work(s1, s2))
770            work(:, :) = 0.0_dp
771
772            IF (trans) THEN
773               DO ik = 1, nk
774                  CALL daxpy(s1*s2, v(ik), TRANSPOSE(iabc(:, :, ik)), 1, work(:, :), 1)
775               END DO
776            ELSE
777               DO ik = 1, nk
778                  CALL daxpy(s1*s2, v(ik), iabc(:, :, ik), 1, work(:, :), 1)
779               END DO
780            END IF
781
782            ! Mulitple threads with same irow, icol but different katom (same even in PBCs) can try
783            ! to access the dbcsr block at the same time. Prevent that by CRITICAL section but keep
784            ! computations before hand in order to retain speed
785
786!$OMP CRITICAL
787            CALL dbcsr_get_block_p(matrix=matrix, row=irow, col=icol, BLOCK=pblock, found=found)
788            CALL daxpy(s1*s2, 1.0_dp, work(:, :), 1, pblock(:, :), 1)
789!$OMP END CRITICAL
790
791            DEALLOCATE (work)
792         END IF
793
794      END DO
795!$OMP END PARALLEL
796      CALL o3c_iterator_release(o3c_iterator)
797
798      CALL timestop(handle)
799
800   END SUBROUTINE contract3_o3c
801
802END MODULE qs_o3c_methods
803