1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2020  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief Routines to calculate derivatives with respect to basis function origin
8!> \par History
9!>      04.2008 created [Manuel Guidon]
10!> \author Manuel Guidon
11! **************************************************************************************************
12MODULE hfx_derivatives
13   USE admm_types,                      ONLY: admm_type
14   USE atomic_kind_types,               ONLY: atomic_kind_type,&
15                                              get_atomic_kind_set
16   USE cell_types,                      ONLY: cell_type,&
17                                              pbc
18   USE cp_control_types,                ONLY: dft_control_type
19   USE cp_files,                        ONLY: close_file,&
20                                              open_file
21   USE cp_log_handling,                 ONLY: cp_get_default_logger,&
22                                              cp_logger_type
23   USE cp_output_handling,              ONLY: cp_p_file,&
24                                              cp_print_key_finished_output,&
25                                              cp_print_key_should_output,&
26                                              cp_print_key_unit_nr
27   USE cp_para_types,                   ONLY: cp_para_env_type
28   USE dbcsr_api,                       ONLY: dbcsr_get_matrix_type,&
29                                              dbcsr_p_type,&
30                                              dbcsr_type_antisymmetric
31   USE gamma,                           ONLY: init_md_ftable
32   USE hfx_communication,               ONLY: get_full_density
33   USE hfx_compression_methods,         ONLY: hfx_add_mult_cache_elements,&
34                                              hfx_add_single_cache_element,&
35                                              hfx_decompress_first_cache,&
36                                              hfx_flush_last_cache,&
37                                              hfx_get_mult_cache_elements,&
38                                              hfx_get_single_cache_element,&
39                                              hfx_reset_cache_and_container
40   USE hfx_libint_interface,            ONLY: evaluate_deriv_eri
41   USE hfx_load_balance_methods,        ONLY: collect_load_balance_info,&
42                                              hfx_load_balance,&
43                                              hfx_update_load_balance
44   USE hfx_pair_list_methods,           ONLY: build_atomic_pair_list,&
45                                              build_pair_list,&
46                                              build_pair_list_pgf,&
47                                              build_pgf_product_list,&
48                                              pgf_product_list_size
49   USE hfx_screening_methods,           ONLY: update_pmax_mat
50   USE hfx_types,                       ONLY: &
51        alloc_containers, dealloc_containers, hfx_basis_info_type, hfx_basis_type, hfx_cache_type, &
52        hfx_cell_type, hfx_container_type, hfx_distribution, hfx_general_type, hfx_init_container, &
53        hfx_load_balance_type, hfx_memory_type, hfx_p_kind, hfx_pgf_list, hfx_pgf_product_list, &
54        hfx_potential_type, hfx_screen_coeff_type, hfx_screening_type, hfx_task_list_type, &
55        hfx_type, init_t_c_g0_lmax, log_zero, pair_list_type, pair_set_list_type
56   USE input_constants,                 ONLY: do_potential_mix_cl_trunc,&
57                                              do_potential_truncated,&
58                                              hfx_do_eval_forces
59   USE input_section_types,             ONLY: section_vals_type
60   USE kinds,                           ONLY: dp,&
61                                              int_8
62   USE libint_wrapper,                  ONLY: cp_libint_t
63   USE machine,                         ONLY: m_walltime
64   USE mathconstants,                   ONLY: fac
65   USE message_passing,                 ONLY: mp_sum
66   USE orbital_pointers,                ONLY: nco,&
67                                              ncoset,&
68                                              nso
69   USE particle_types,                  ONLY: particle_type
70   USE qs_environment_types,            ONLY: get_qs_env,&
71                                              qs_environment_type
72   USE qs_force_types,                  ONLY: qs_force_type
73   USE t_c_g0,                          ONLY: init
74   USE util,                            ONLY: sort
75   USE virial_types,                    ONLY: virial_type
76
77!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
78
79#include "./base/base_uses.f90"
80
81   IMPLICIT NONE
82   PRIVATE
83
84   PUBLIC :: derivatives_four_center
85
86   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_derivatives'
87
88!***
89
90CONTAINS
91
92! **************************************************************************************************
93!> \brief computes four center derivatives for a full basis set and updates the
94!>      forces%fock_4c arrays. Uses all 8 eri symmetries
95!> \param qs_env ...
96!> \param rho_ao density matrix
97!> \param rho_ao_resp relaxed density matrix from response
98!> \param hfx_section HFX input section
99!> \param para_env para_env
100!> \param irep ID of HFX replica
101!> \param use_virial ...
102!> \param adiabatic_rescale_factor parameter used for MCY3 hybrid
103!> \param resp_only Calculate only forces from response density
104!> \par History
105!>      06.2007 created [Manuel Guidon]
106!>      08.2007 optimized load balance [Manuel Guidon]
107!>      02.2009 completely rewritten screening part [Manuel Guidon]
108!>      12.2017 major bug fix. removed wrong cycle that was causing segfault.
109!>              see https://groups.google.com/forum/#!topic/cp2k/pc6B14XOALY
110!>              [Tobias Binninger + Valery Weber]
111!> \author Manuel Guidon
112! **************************************************************************************************
113   SUBROUTINE derivatives_four_center(qs_env, rho_ao, rho_ao_resp, hfx_section, para_env, &
114                                      irep, use_virial, adiabatic_rescale_factor, resp_only)
115
116      TYPE(qs_environment_type), POINTER                 :: qs_env
117      TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: rho_ao
118      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: rho_ao_resp
119      TYPE(section_vals_type), POINTER                   :: hfx_section
120      TYPE(cp_para_env_type), POINTER                    :: para_env
121      INTEGER, INTENT(IN)                                :: irep
122      LOGICAL, INTENT(IN)                                :: use_virial
123      REAL(dp), INTENT(IN), OPTIONAL                     :: adiabatic_rescale_factor
124      LOGICAL, INTENT(IN), OPTIONAL                      :: resp_only
125
126      CHARACTER(LEN=*), PARAMETER :: routineN = 'derivatives_four_center', &
127         routineP = moduleN//':'//routineN
128
129      INTEGER :: atomic_offset_ac, atomic_offset_ad, atomic_offset_bc, atomic_offset_bd, bin, &
130         bits_max_val, buffer_left, buffer_size, buffer_start, cache_size, coord, current_counter, &
131         forces_map(4, 2), handle, handle_bin, handle_getP, handle_load, handle_main, i, i_atom, &
132         i_list_ij, i_list_kl, i_set_list_ij, i_set_list_ij_start, i_set_list_ij_stop, &
133         i_set_list_kl, i_set_list_kl_start, i_set_list_kl_stop, i_thread, iatom, iatom_block, &
134         iatom_end, iatom_start, ikind, iset, iw, j, j_atom, jatom, jatom_block, jatom_end, &
135         jatom_start, jkind, jset, k, k_atom, katom, katom_block, katom_end, katom_start
136      INTEGER :: kind_kind_idx, kkind, kset, l_atom, l_max, latom, latom_block, latom_end, &
137         latom_start, lkind, lset, max_am, max_pgf, max_set, my_bin_id, my_bin_size, my_thread_id, &
138         n_threads, natom, nbits, ncob, ncos_max, nints, nkimages, nkind, nneighbors, nseta, &
139         nsetb, nsgf_max, nspins, sgfb, shm_task_counter, shm_total_bins, sphi_a_u1, sphi_a_u2, &
140         sphi_a_u3, sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, &
141         sphi_d_u2, sphi_d_u3, swap_id, tmp_i4, unit_id
142      INTEGER(int_8) :: atom_block, counter, estimate_to_store_int, max_val_memory, &
143         mem_compression_counter, mem_eris, mem_max_val, my_current_counter, my_istart, &
144         n_processes, nblocks, ncpu, neris_incore, neris_onthefly, neris_tmp, neris_total, &
145         nprim_ints, shm_neris_incore, shm_neris_onthefly, shm_neris_total, shm_nprim_ints, &
146         shm_stor_count_max_val, shm_storage_counter_integrals, stor_count_max_val, &
147         storage_counter_integrals, tmp_block, tmp_i8(6)
148      INTEGER(int_8), ALLOCATABLE, DIMENSION(:)          :: tmp_task_list_cost
149      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: atom_of_kind, kind_of, last_sgf_global, &
150                                                            nimages, tmp_index
151      INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, lc_max, lc_min, ld_max, &
152         ld_min, npgfa, npgfb, npgfc, npgfd, nsgfa, nsgfb, nsgfc, nsgfd, shm_block_offset
153      INTEGER, DIMENSION(:, :), POINTER :: first_sgfb, nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d, &
154         offset_ac_set, offset_ad_set, offset_bc_set, offset_bd_set, shm_atomic_block_offset
155      INTEGER, DIMENSION(:, :), POINTER, SAVE            :: shm_is_assoc_atomic_block
156      INTEGER, DIMENSION(:, :, :, :), POINTER            :: shm_set_offset
157      INTEGER, SAVE                                      :: shm_number_of_p_entries
158      LOGICAL :: bins_left, buffer_overflow, do_dynamic_load_balancing, do_it, do_periodic, &
159         do_print_load_balance_info, is_anti_symmetric, my_resp_only, screen_pmat_forces, &
160         treat_forces_in_core, use_disk_storage, with_resp_density
161      LOGICAL, DIMENSION(:, :), POINTER                  :: shm_atomic_pair_list
162      REAL(dp) :: bintime_start, bintime_stop, cartesian_estimate, cartesian_estimate_virial, &
163         compression_factor, eps_schwarz, eps_storage, fac, hf_fraction, ln_10, log10_eps_schwarz, &
164         log10_pmax, log_2, max_contraction_val, max_val1, max_val2, max_val2_set, &
165         my_adiabatic_rescale_factor, pmax_atom, pmax_blocks, pmax_entry, pmax_tmp, ra(3), rab2, &
166         rb(3), rc(3), rcd2, rd(3), spherical_estimate, spherical_estimate_virial, symm_fac, &
167         tmp_virial(3, 3)
168      REAL(dp), ALLOCATABLE, DIMENSION(:) :: ede_buffer1, ede_buffer2, ede_primitives_tmp, &
169         ede_primitives_tmp_virial, ede_work, ede_work2, ede_work2_virial, ede_work_forces, &
170         ede_work_virial, pac_buf, pac_buf_beta, pac_buf_resp, pac_buf_resp_beta, pad_buf, &
171         pad_buf_beta, pad_buf_resp, pad_buf_resp_beta, pbc_buf, pbc_buf_beta, pbc_buf_resp, &
172         pbc_buf_resp_beta, pbd_buf, pbd_buf_beta, pbd_buf_resp, pbd_buf_resp_beta
173      REAL(dp), ALLOCATABLE, DIMENSION(:), TARGET        :: primitive_forces, primitive_forces_virial
174      REAL(dp), DIMENSION(:), POINTER                    :: full_density_resp, &
175                                                            full_density_resp_beta, T2
176      REAL(dp), DIMENSION(:, :), POINTER :: full_density_alpha, full_density_beta, &
177         max_contraction, ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, shm_pmax_atom, shm_pmax_block, &
178         sphi_b, zeta, zetb, zetc, zetd
179      REAL(dp), DIMENSION(:, :, :), POINTER              :: sphi_a_ext_set, sphi_b_ext_set, &
180                                                            sphi_c_ext_set, sphi_d_ext_set
181      REAL(dp), DIMENSION(:, :, :, :), POINTER           :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
182                                                            sphi_d_ext
183      REAL(KIND=dp)                                      :: coeffs_kind_max0
184      TYPE(admm_type), POINTER                           :: admm_env
185      TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
186      TYPE(cell_type), POINTER                           :: cell
187      TYPE(cp_libint_t)                                  :: private_deriv
188      TYPE(cp_logger_type), POINTER                      :: logger
189      TYPE(dft_control_type), POINTER                    :: dft_control
190      TYPE(hfx_basis_info_type), POINTER                 :: basis_info
191      TYPE(hfx_basis_type), DIMENSION(:), POINTER        :: basis_parameter
192      TYPE(hfx_cache_type), DIMENSION(:), POINTER        :: integral_caches
193      TYPE(hfx_cache_type), POINTER                      :: maxval_cache
194      TYPE(hfx_container_type), DIMENSION(:), POINTER    :: integral_containers
195      TYPE(hfx_container_type), POINTER                  :: maxval_container
196      TYPE(hfx_distribution), POINTER                    :: distribution_forces
197      TYPE(hfx_general_type)                             :: general_parameter
198      TYPE(hfx_load_balance_type), POINTER               :: load_balance_parameter
199      TYPE(hfx_memory_type), POINTER                     :: memory_parameter
200      TYPE(hfx_p_kind), DIMENSION(:), POINTER            :: shm_initial_p
201      TYPE(hfx_pgf_list), ALLOCATABLE, DIMENSION(:)      :: pgf_list_ij, pgf_list_kl
202      TYPE(hfx_pgf_product_list), ALLOCATABLE, &
203         DIMENSION(:)                                    :: pgf_product_list
204      TYPE(hfx_potential_type)                           :: potential_parameter
205      TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
206         POINTER                                         :: screen_coeffs_kind, tmp_R_1, tmp_R_2, &
207                                                            tmp_screen_pgf1, tmp_screen_pgf2
208      TYPE(hfx_screen_coeff_type), &
209         DIMENSION(:, :, :, :), POINTER                  :: screen_coeffs_set
210      TYPE(hfx_screen_coeff_type), &
211         DIMENSION(:, :, :, :, :, :), POINTER            :: radii_pgf, screen_coeffs_pgf
212      TYPE(hfx_screening_type)                           :: screening_parameter
213      TYPE(hfx_task_list_type), DIMENSION(:), POINTER    :: shm_task_list, tmp_task_list
214      TYPE(hfx_type), POINTER                            :: actual_x_data
215      TYPE(pair_list_type)                               :: list_ij, list_kl
216      TYPE(pair_set_list_type), ALLOCATABLE, &
217         DIMENSION(:)                                    :: set_list_ij, set_list_kl
218      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
219      TYPE(qs_force_type), DIMENSION(:), POINTER         :: force
220      TYPE(virial_type), POINTER                         :: virial
221
222      NULLIFY (dft_control, admm_env)
223
224      with_resp_density = .FALSE.
225      IF (ASSOCIATED(rho_ao_resp)) with_resp_density = .TRUE.
226
227      my_resp_only = .FALSE.
228      IF (PRESENT(resp_only)) my_resp_only = resp_only
229
230      is_anti_symmetric = dbcsr_get_matrix_type(rho_ao(1, 1)%matrix) .EQ. dbcsr_type_antisymmetric
231
232      CALL get_qs_env(qs_env, &
233                      admm_env=admm_env, &
234                      particle_set=particle_set, &
235                      atomic_kind_set=atomic_kind_set, &
236                      cell=cell, &
237                      dft_control=dft_control)
238
239      nspins = dft_control%nspins
240      nkimages = dft_control%nimages
241      CPASSERT(nkimages == 1)
242
243      !! One atom systems have no contribution to forces
244      IF (SIZE(particle_set, 1) == 1) THEN
245         IF (.NOT. use_virial) RETURN
246      END IF
247
248      CALL timeset(routineN, handle)
249      !! Calculate l_max used in fgamma , because init_md_ftable is definitely not thread safe
250      nkind = SIZE(atomic_kind_set, 1)
251      l_max = 0
252      DO ikind = 1, nkind
253         l_max = MAX(l_max, MAXVAL(qs_env%x_data(1, 1)%basis_parameter(ikind)%lmax))
254      ENDDO
255      l_max = 4*l_max + 1
256      CALL init_md_ftable(l_max)
257
258      IF (qs_env%x_data(1, 1)%potential_parameter%potential_type == do_potential_truncated .OR. &
259          qs_env%x_data(1, 1)%potential_parameter%potential_type == do_potential_mix_cl_trunc) THEN
260         IF (l_max > init_t_c_g0_lmax) THEN
261            CALL open_file(unit_number=unit_id, file_name=qs_env%x_data(1, 1)%potential_parameter%filename)
262            CALL init(l_max, unit_id, para_env%mepos, para_env%group)
263            CALL close_file(unit_id)
264            init_t_c_g0_lmax = l_max
265         END IF
266      END IF
267
268      n_threads = 1
269!$    n_threads = omp_get_max_threads()
270
271      shm_neris_total = 0
272      shm_nprim_ints = 0
273      shm_neris_onthefly = 0
274      shm_storage_counter_integrals = 0
275      shm_neris_incore = 0
276      shm_stor_count_max_val = 0
277
278      !! get force array
279      CALL get_qs_env(qs_env=qs_env, force=force, virial=virial)
280
281      my_adiabatic_rescale_factor = 1.0_dp
282      IF (PRESENT(adiabatic_rescale_factor)) THEN
283         my_adiabatic_rescale_factor = adiabatic_rescale_factor
284      END IF
285
286!$OMP PARALLEL DEFAULT(NONE) SHARED(qs_env,&
287!$OMP                                  rho_ao,&
288!$OMP                                  rho_ao_resp,&
289!$OMP                                  with_resp_density,&
290!$OMP                                  hfx_section,&
291!$OMP                                  para_env,&
292!$OMP                                  irep,&
293!$OMP                                  my_resp_only,&
294!$OMP                                  ncoset,&
295!$OMP                                  nco,&
296!$OMP                                  nso,&
297!$OMP                                  n_threads,&
298!$OMP                                  full_density_alpha,&
299!$OMP                                  full_density_resp,&
300!$OMP                                  full_density_resp_beta,&
301!$OMP                                  full_density_beta,&
302!$OMP                                  shm_initial_p,&
303!$OMP                                  shm_is_assoc_atomic_block,&
304!$OMP                                  shm_number_of_p_entries,&
305!$OMP                                  force,&
306!$OMP                                  virial,&
307!$OMP                                  my_adiabatic_rescale_factor,&
308!$OMP                                  shm_neris_total,&
309!$OMP                                  shm_neris_onthefly,&
310!$OMP                                  shm_storage_counter_integrals,&
311!$OMP                                  shm_neris_incore,&
312!$OMP                                  shm_nprim_ints,&
313!$OMP                                  shm_stor_count_max_val,&
314!$OMP                                  cell,&
315!$OMP                                  screen_coeffs_set,&
316!$OMP                                  screen_coeffs_kind,&
317!$OMP                                  screen_coeffs_pgf,&
318!$OMP                                  pgf_product_list_size,&
319!$OMP                                  nkind,&
320!$OMP                                  is_anti_symmetric,&
321!$OMP                                  radii_pgf,&
322!$OMP                                  shm_atomic_block_offset,&
323!$OMP                                  shm_set_offset,&
324!$OMP                                  shm_block_offset,&
325!$OMP                                  shm_task_counter,&
326!$OMP                                  shm_task_list,&
327!$OMP                                  shm_total_bins,&
328!$OMP                                  shm_pmax_block,&
329!$OMP                                  shm_atomic_pair_list,&
330!$OMP                                  shm_pmax_atom,&
331!$OMP                                  use_virial,&
332!$OMP                                  do_print_load_balance_info,&
333!$OMP                                  nkimages,nspins)&
334!$OMP  PRIVATE(actual_x_data,atomic_kind_set,atomic_offset_ac,atomic_offset_ad,atomic_offset_bc,&
335!$OMP  atomic_offset_bd,atom_of_kind,basis_info,&
336!$OMP  basis_parameter,bin,bins_left,bintime_start,bintime_stop,bits_max_val,buffer_left,buffer_overflow,buffer_size,&
337!$OMP  buffer_start,cache_size,cartesian_estimate,cartesian_estimate_virial,&
338!$OMP  coeffs_kind_max0,compression_factor,counter,current_counter,&
339!$OMP  distribution_forces,do_dynamic_load_balancing,do_it,do_periodic,ede_buffer1,ede_buffer2,&
340!$OMP  ede_primitives_tmp,ede_primitives_tmp_virial,&
341!$OMP  ede_work,ede_work2,ede_work2_virial,ede_work_forces,ede_work_virial,eps_schwarz,eps_storage,estimate_to_store_int,&
342!$OMP  fac,first_sgfb,forces_map,general_parameter,handle_bin,handle_getp,handle_load,&
343!$OMP  handle_main,hf_fraction,i_atom,iatom_end,iatom_start,ikind,integral_caches,integral_containers,&
344!$OMP  i_set_list_ij_start,i_set_list_ij_stop,i_set_list_kl_start,i_set_list_kl_stop,i_thread,iw,j_atom,jatom_end,&
345!$OMP  jatom_start,jkind,katom,k_atom,katom_block,katom_end,katom_start,kind_kind_idx,&
346!$OMP  kind_of,kkind,kset,la_max,la_min,last_sgf_global,latom,l_atom,&
347!$OMP  latom_block,latom_end,latom_start,lb_max,lb_min,lc_max,lc_min,ld_max,&
348!$OMP  ld_min,list_ij,list_kl,lkind,ln_10,load_balance_parameter,log10_eps_schwarz,log10_pmax,&
349!$OMP  log_2,logger,lset,max_am,max_contraction,max_contraction_val,max_pgf,max_set,&
350!$OMP  max_val1,max_val2,max_val2_set,maxval_cache,maxval_container,max_val_memory,mem_compression_counter,mem_eris,&
351!$OMP  mem_max_val,memory_parameter,my_bin_id,my_bin_size,my_current_counter,my_istart,my_thread_id,natom,&
352!$OMP  nbits,nblocks,ncob,ncos_max,ncpu,neris_incore,neris_onthefly,neris_tmp,&
353!$OMP  neris_total,nimages,nints,nneighbors,npgfa,npgfb,npgfc,npgfd,&
354!$OMP  nprim_ints,n_processes,nseta,nsetb,nsgfa,nsgfb,nsgfc,nsgfd,&
355!$OMP  nsgfl_a,nsgfl_b,nsgfl_c,nsgfl_d,nsgf_max,offset_ac_set,offset_ad_set,&
356!$OMP  offset_bc_set,offset_bd_set,pac_buf,pac_buf_beta,pac_buf_resp,pac_buf_resp_beta,pad_buf,pad_buf_beta,pad_buf_resp,&
357!$OMP  pad_buf_resp_beta,particle_set,pbc_buf,pbc_buf_beta,pbc_buf_resp,pbc_buf_resp_beta,pbd_buf,pbd_buf_beta, &
358!$OMP  pbd_buf_resp, pbd_buf_resp_beta, pgf_list_ij,&
359!$OMP  pgf_list_kl,pgf_product_list,pmax_atom,pmax_blocks,pmax_entry,pmax_tmp,potential_parameter,primitive_forces,&
360!$OMP  primitive_forces_virial,private_deriv,ptr_p_1,ptr_p_2,ptr_p_3,ptr_p_4,ra,rab2,&
361!$OMP  rb,rc,rcd2,rd,screening_parameter,screen_pmat_forces,set_list_ij,set_list_kl,&
362!$OMP  sgfb,spherical_estimate,spherical_estimate_virial,sphi_a_ext,sphi_a_ext_set,sphi_a_u1,sphi_a_u2,sphi_a_u3,&
363!$OMP  sphi_b,sphi_b_ext,sphi_b_ext_set,sphi_b_u1,sphi_b_u2,sphi_b_u3,sphi_c_ext,sphi_c_ext_set,&
364!$OMP  sphi_c_u1,sphi_c_u2,sphi_c_u3,sphi_d_ext,sphi_d_ext_set,sphi_d_u1,sphi_d_u2,sphi_d_u3,&
365!$OMP  storage_counter_integrals,stor_count_max_val,swap_id,symm_fac,t2,tmp_block,tmp_i8, tmp_i4,&
366!$OMP  tmp_index,tmp_r_1,tmp_r_2,tmp_screen_pgf1,tmp_screen_pgf2,tmp_task_list,tmp_task_list_cost,tmp_virial,&
367!$OMP  treat_forces_in_core,use_disk_storage,zeta,zetb,zetc,zetd)
368
369      i_thread = 0
370!$    i_thread = omp_get_thread_num()
371
372      ln_10 = LOG(10.0_dp)
373      log_2 = LOG10(2.0_dp)
374      actual_x_data => qs_env%x_data(irep, i_thread + 1)
375      do_periodic = actual_x_data%periodic_parameter%do_periodic
376
377      screening_parameter = actual_x_data%screening_parameter
378      general_parameter = actual_x_data%general_parameter
379      potential_parameter = actual_x_data%potential_parameter
380      basis_info => actual_x_data%basis_info
381
382      load_balance_parameter => actual_x_data%load_balance_parameter
383      basis_parameter => actual_x_data%basis_parameter
384
385      ! IF(with_resp_density) THEN
386      !   ! here we also do a copy of the original load_balance_parameter
387      !   ! since if MP2 forces are required after the calculation of the HFX
388      !   ! derivatives we need to calculate again the SCF energy.
389      !   ALLOCATE(load_balance_parameter_energy)
390      !   load_balance_parameter_energy = actual_x_data%load_balance_parameter
391      ! END IF
392
393      memory_parameter => actual_x_data%memory_parameter
394
395      cache_size = memory_parameter%cache_size
396      bits_max_val = memory_parameter%bits_max_val
397
398      use_disk_storage = .FALSE.
399
400      CALL get_qs_env(qs_env=qs_env, &
401                      atomic_kind_set=atomic_kind_set, &
402                      particle_set=particle_set)
403
404      max_set = basis_info%max_set
405      max_am = basis_info%max_am
406      natom = SIZE(particle_set, 1)
407
408      treat_forces_in_core = memory_parameter%treat_forces_in_core
409
410      hf_fraction = general_parameter%fraction
411      hf_fraction = hf_fraction*my_adiabatic_rescale_factor
412      eps_schwarz = screening_parameter%eps_schwarz_forces
413      IF (eps_schwarz < 0.0_dp) THEN
414         log10_eps_schwarz = log_zero
415      ELSE
416         !! ** make eps_schwarz tighter in case of a stress calculation
417         IF (use_virial) eps_schwarz = eps_schwarz/actual_x_data%periodic_parameter%R_max_stress
418         log10_eps_schwarz = LOG10(eps_schwarz)
419      END IF
420      screen_pmat_forces = screening_parameter%do_p_screening_forces
421      eps_storage = eps_schwarz*memory_parameter%eps_storage_scaling
422
423      counter = 0_int_8
424
425      !! Copy the libint_guy
426      private_deriv = actual_x_data%lib_deriv
427
428      !! Get screening parameter
429
430      ALLOCATE (atom_of_kind(natom))
431      ALLOCATE (kind_of(natom))
432      CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, &
433                               atom_of_kind=atom_of_kind, &
434                               kind_of=kind_of)
435
436      !! Create helper arrray for mapping local basis functions to global ones
437      ALLOCATE (last_sgf_global(0:natom))
438      last_sgf_global(0) = 0
439      DO iatom = 1, natom
440         ikind = kind_of(iatom)
441         last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
442      END DO
443
444      ALLOCATE (max_contraction(max_set, natom))
445
446      max_contraction = 0.0_dp
447      max_pgf = 0
448      DO jatom = 1, natom
449         jkind = kind_of(jatom)
450         lb_max => basis_parameter(jkind)%lmax
451         npgfb => basis_parameter(jkind)%npgf
452         nsetb = basis_parameter(jkind)%nset
453         first_sgfb => basis_parameter(jkind)%first_sgf
454         sphi_b => basis_parameter(jkind)%sphi
455         nsgfb => basis_parameter(jkind)%nsgf
456         DO jset = 1, nsetb
457            ! takes the primitive to contracted transformation into account
458            ncob = npgfb(jset)*ncoset(lb_max(jset))
459            sgfb = first_sgfb(1, jset)
460            ! if the primitives are assumed to be all of max_val2, max_val2*p2s_b becomes
461            ! the maximum value after multiplication with sphi_b
462            max_contraction(jset, jatom) = MAXVAL((/(SUM(ABS(sphi_b(1:ncob, i))), i=sgfb, sgfb + nsgfb(jset) - 1)/))
463            max_pgf = MAX(max_pgf, npgfb(jset))
464         ENDDO
465      ENDDO
466
467      ncpu = para_env%num_pe
468      n_processes = ncpu*n_threads
469
470      !! initialize some counters
471      neris_total = 0_int_8
472      neris_incore = 0_int_8
473      neris_onthefly = 0_int_8
474      mem_eris = 0_int_8
475      mem_max_val = 0_int_8
476      compression_factor = 0.0_dp
477      nprim_ints = 0_int_8
478      neris_tmp = 0_int_8
479      max_val_memory = 1_int_8
480
481!$OMP MASTER
482      !! Set pointer for is_assoc helper
483      shm_is_assoc_atomic_block => actual_x_data%is_assoc_atomic_block
484      shm_number_of_p_entries = actual_x_data%number_of_p_entries
485      shm_atomic_block_offset => actual_x_data%atomic_block_offset
486      shm_set_offset => actual_x_data%set_offset
487      shm_block_offset => actual_x_data%block_offset
488
489      NULLIFY (full_density_alpha)
490      NULLIFY (full_density_beta)
491      ALLOCATE (full_density_alpha(shm_block_offset(ncpu + 1), nkimages))
492
493      !! Get the full density from all the processors
494      CALL timeset(routineN//"_getP", handle_getP)
495      CALL get_full_density(para_env, full_density_alpha(:, 1), rho_ao(1, 1)%matrix, shm_number_of_p_entries, &
496                            shm_block_offset, &
497                            kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
498                            antisymmetric=is_anti_symmetric)
499      ! for now only closed shell case
500      IF (with_resp_density) THEN
501         NULLIFY (full_density_resp)
502         ALLOCATE (full_density_resp(shm_block_offset(ncpu + 1)))
503         CALL get_full_density(para_env, full_density_resp, rho_ao_resp(1)%matrix, shm_number_of_p_entries, &
504                               shm_block_offset, &
505                               kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
506                               antisymmetric=is_anti_symmetric)
507      END IF
508
509      IF (nspins == 2) THEN
510         ALLOCATE (full_density_beta(shm_block_offset(ncpu + 1), 1))
511         CALL get_full_density(para_env, full_density_beta(:, 1), rho_ao(2, 1)%matrix, shm_number_of_p_entries, &
512                               shm_block_offset, &
513                               kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
514                               antisymmetric=is_anti_symmetric)
515         ! With resp density
516         IF (with_resp_density) THEN
517            NULLIFY (full_density_resp_beta)
518            ALLOCATE (full_density_resp_beta(shm_block_offset(ncpu + 1)))
519            CALL get_full_density(para_env, full_density_resp_beta, rho_ao_resp(2)%matrix, shm_number_of_p_entries, &
520                                  shm_block_offset, &
521                                  kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
522                                  antisymmetric=is_anti_symmetric)
523         END IF
524
525      END IF
526      CALL timestop(handle_getP)
527
528      !! Calculate max entries for screening on actual density. If screen_p_mat_forces = FALSE, the
529      !! matrix is initialized to 1.0
530      IF (screen_pmat_forces) THEN
531         NULLIFY (shm_initial_p)
532         shm_initial_p => actual_x_data%initial_p
533         shm_pmax_atom => actual_x_data%pmax_atom
534
535         IF (memory_parameter%treat_forces_in_core) THEN
536            shm_initial_p => actual_x_data%initial_p_forces
537            shm_pmax_atom => actual_x_data%pmax_atom_forces
538         END IF
539         IF (memory_parameter%recalc_forces) THEN
540            CALL update_pmax_mat(shm_initial_p, &
541                                 actual_x_data%map_atom_to_kind_atom, &
542                                 actual_x_data%set_offset, &
543                                 actual_x_data%atomic_block_offset, &
544                                 shm_pmax_atom, &
545                                 full_density_alpha, full_density_beta, &
546                                 natom, kind_of, basis_parameter, &
547                                 nkind, actual_x_data%is_assoc_atomic_block)
548         END IF
549      END IF
550
551      ! restore as full density the HF density
552      ! maybe in the future
553      IF (with_resp_density) THEN
554         full_density_alpha(:, 1) = full_density_alpha(:, 1) - full_density_resp
555         IF (nspins == 2) THEN
556            full_density_beta(:, 1) = &
557               full_density_beta(:, 1) - full_density_resp_beta
558         ENDIF
559         ! full_density_resp=full_density+full_density_resp
560      END IF
561
562      screen_coeffs_set => actual_x_data%screen_funct_coeffs_set
563      screen_coeffs_kind => actual_x_data%screen_funct_coeffs_kind
564      screen_coeffs_pgf => actual_x_data%screen_funct_coeffs_pgf
565      radii_pgf => actual_x_data%pair_dist_radii_pgf
566      shm_pmax_block => actual_x_data%pmax_block
567
568!$OMP END MASTER
569!$OMP BARRIER
570
571!$OMP MASTER
572      CALL timeset(routineN//"_load", handle_load)
573!$OMP END MASTER
574      !! Load balance the work
575      IF (actual_x_data%memory_parameter%recalc_forces) THEN
576         IF (actual_x_data%b_first_load_balance_forces) THEN
577            CALL hfx_load_balance(actual_x_data, eps_schwarz, particle_set, max_set, para_env, &
578                                  screen_coeffs_set, screen_coeffs_kind, &
579                                  shm_is_assoc_atomic_block, do_periodic, &
580                                  load_balance_parameter, kind_of, basis_parameter, shm_initial_p, &
581                                  shm_pmax_atom, i_thread, n_threads, &
582                                  cell, screen_pmat_forces, actual_x_data%map_atom_to_kind_atom, &
583                                  nkind, hfx_do_eval_forces, shm_pmax_block, use_virial)
584            actual_x_data%b_first_load_balance_forces = .FALSE.
585         ELSE
586            CALL hfx_update_load_balance(actual_x_data, para_env, &
587                                         load_balance_parameter, &
588                                         i_thread, n_threads, hfx_do_eval_forces)
589         END IF
590      END IF
591!$OMP MASTER
592      CALL timestop(handle_load)
593!$OMP END MASTER
594
595      !! precompute maximum nco and allocate scratch
596      ncos_max = 0
597      nsgf_max = 0
598      DO iatom = 1, natom
599         ikind = kind_of(iatom)
600         nseta = basis_parameter(ikind)%nset
601         npgfa => basis_parameter(ikind)%npgf
602         la_max => basis_parameter(ikind)%lmax
603         nsgfa => basis_parameter(ikind)%nsgf
604         DO iset = 1, nseta
605            ncos_max = MAX(ncos_max, ncoset(la_max(iset)))
606            nsgf_max = MAX(nsgf_max, nsgfa(iset))
607         ENDDO
608      ENDDO
609
610      !! Allocate work arrays
611      ALLOCATE (primitive_forces(12*nsgf_max**4))
612      primitive_forces = 0.0_dp
613
614      ! ** Allocate buffers for pgf_lists
615      nneighbors = SIZE(actual_x_data%neighbor_cells)
616      ALLOCATE (pgf_list_ij(max_pgf**2))
617      ALLOCATE (pgf_list_kl(max_pgf**2))
618!$OMP     ATOMIC READ
619      tmp_i4 = pgf_product_list_size
620      ALLOCATE (pgf_product_list(tmp_i4))
621      ALLOCATE (nimages(max_pgf**2))
622
623      DO i = 1, max_pgf**2
624         ALLOCATE (pgf_list_ij(i)%image_list(nneighbors))
625         ALLOCATE (pgf_list_kl(i)%image_list(nneighbors))
626      END DO
627
628      ALLOCATE (pbd_buf(nsgf_max**2))
629      ALLOCATE (pbc_buf(nsgf_max**2))
630      ALLOCATE (pad_buf(nsgf_max**2))
631      ALLOCATE (pac_buf(nsgf_max**2))
632      IF (with_resp_density) THEN
633         ALLOCATE (pbd_buf_resp(nsgf_max**2))
634         ALLOCATE (pbc_buf_resp(nsgf_max**2))
635         ALLOCATE (pad_buf_resp(nsgf_max**2))
636         ALLOCATE (pac_buf_resp(nsgf_max**2))
637      END IF
638      IF (nspins == 2) THEN
639         ALLOCATE (pbd_buf_beta(nsgf_max**2))
640         ALLOCATE (pbc_buf_beta(nsgf_max**2))
641         ALLOCATE (pad_buf_beta(nsgf_max**2))
642         ALLOCATE (pac_buf_beta(nsgf_max**2))
643         IF (with_resp_density) THEN
644            ALLOCATE (pbd_buf_resp_beta(nsgf_max**2))
645            ALLOCATE (pbc_buf_resp_beta(nsgf_max**2))
646            ALLOCATE (pad_buf_resp_beta(nsgf_max**2))
647            ALLOCATE (pac_buf_resp_beta(nsgf_max**2))
648         END IF
649      END IF
650
651      ALLOCATE (ede_work(ncos_max**4*12))
652      ALLOCATE (ede_work2(ncos_max**4*12))
653      ALLOCATE (ede_work_forces(ncos_max**4*12))
654      ALLOCATE (ede_buffer1(ncos_max**4))
655      ALLOCATE (ede_buffer2(ncos_max**4))
656      ALLOCATE (ede_primitives_tmp(nsgf_max**4))
657
658      IF (use_virial) THEN
659         ALLOCATE (primitive_forces_virial(12*nsgf_max**4*3))
660         primitive_forces_virial = 0.0_dp
661         ALLOCATE (ede_work_virial(ncos_max**4*12*3))
662         ALLOCATE (ede_work2_virial(ncos_max**4*12*3))
663         ALLOCATE (ede_primitives_tmp_virial(nsgf_max**4))
664         tmp_virial = 0.0_dp
665      ELSE
666         ! dummy allocation
667         ALLOCATE (primitive_forces_virial(1))
668         primitive_forces_virial = 0.0_dp
669         ALLOCATE (ede_work_virial(1))
670         ALLOCATE (ede_work2_virial(1))
671         ALLOCATE (ede_primitives_tmp_virial(1))
672      END IF
673
674      !! Start caluclating integrals of the form (ab|cd) or (ij|kl)
675      !! In order to do so, there is a main four-loop structure that takes into account the two symmetries
676      !!
677      !!   (ab|cd) = (ba|cd) = (ab|dc) = (ba|dc)
678      !!
679      !! by iterating in the following way
680      !!
681      !! DO iatom=1,natom               and       DO katom=1,natom
682      !!   DO jatom=iatom,natom                     DO latom=katom,natom
683      !!
684      !! The third symmetry
685      !!
686      !!  (ab|cd) = (cd|ab)
687      !!
688      !! is taken into account by the following criterion:
689      !!
690      !! IF(katom+latom<=iatom+jatom)  THEN
691      !!   IF( ((iatom+jatom).EQ.(katom+latom) ) .AND.(katom<iatom)) CYCLE
692      !!
693      !! Furthermore, if iatom==jatom==katom==latom we cycle, because the derivatives are zero anyway.
694      !!
695      !! Depending on the degeneracy of an integral the exchange contribution is multiplied by a corresponding
696      !! factor ( symm_fac ).
697      !!
698      !! If one quartet does not pass the screening we CYCLE on the outer most possible loop. Thats why we use
699      !! different hierarchies of short range screening matrices.
700      !!
701      !! If we do a parallel run, each process owns a unique array of workloads. Here, a workload is
702      !! defined as :
703      !!
704      !! istart, jstart, kstart, lstart, number_of_atom_quartets, initial_cpu_id
705      !!
706      !! This tells the process where to start the main loops and how many bunches of integrals it has to
707      !! calculate. The original parallelization is a simple modulo distribution that is binned and
708      !! optimized in the load_balance routines. Since the Monte Carlo routines can swap processors,
709      !! we need to know which was the initial cpu_id.
710      !! Furthermore, the indices iatom, jatom, katom, latom have to be set to istart, jstart, kstart and
711      !! lstart only the first time the loop is executed. All subsequent loops have to start with one or
712      !! iatom and katom respectively. Therefore, we use flags like first_j_loop etc.
713
714!$OMP BARRIER
715!$OMP MASTER
716      CALL timeset(routineN//"_main", handle_main)
717!$OMP END MASTER
718!$OMP BARRIER
719
720      coeffs_kind_max0 = MAXVAL(screen_coeffs_kind(:, :)%x(2))
721      ALLOCATE (set_list_ij((max_set*load_balance_parameter%block_size)**2))
722      ALLOCATE (set_list_kl((max_set*load_balance_parameter%block_size)**2))
723
724!$OMP BARRIER
725
726      IF (actual_x_data%memory_parameter%recalc_forces) THEN
727         actual_x_data%distribution_forces%ram_counter = HUGE(distribution_forces%ram_counter)
728         memory_parameter%ram_counter_forces = HUGE(memory_parameter%ram_counter_forces)
729      END IF
730
731      IF (treat_forces_in_core) THEN
732         buffer_overflow = .FALSE.
733      ELSE
734         buffer_overflow = .TRUE.
735      END IF
736
737      do_dynamic_load_balancing = .TRUE.
738      IF (n_threads == 1) do_dynamic_load_balancing = .FALSE.
739
740      IF (do_dynamic_load_balancing) THEN
741         my_bin_size = SIZE(actual_x_data%distribution_forces)
742      ELSE
743         my_bin_size = 1
744      END IF
745      !! We do not need the containers if MAX_MEM = 0
746      IF (treat_forces_in_core) THEN
747         !! IF new md step -> reinitialize containers
748         IF (actual_x_data%memory_parameter%recalc_forces) THEN
749            CALL dealloc_containers(actual_x_data, hfx_do_eval_forces)
750            CALL alloc_containers(actual_x_data, my_bin_size, hfx_do_eval_forces)
751
752            DO bin = 1, my_bin_size
753               maxval_container => actual_x_data%maxval_container_forces(bin)
754               integral_containers => actual_x_data%integral_containers_forces(:, bin)
755               CALL hfx_init_container(maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
756               DO i = 1, 64
757                  CALL hfx_init_container(integral_containers(i), memory_parameter%actual_memory_usage, .FALSE.)
758               END DO
759            END DO
760         END IF
761
762         !! Decompress the first cache for maxvals and integrals
763         IF (.NOT. actual_x_data%memory_parameter%recalc_forces) THEN
764            DO bin = 1, my_bin_size
765               maxval_cache => actual_x_data%maxval_cache_forces(bin)
766               maxval_container => actual_x_data%maxval_container_forces(bin)
767               integral_caches => actual_x_data%integral_caches_forces(:, bin)
768               integral_containers => actual_x_data%integral_containers_forces(:, bin)
769               CALL hfx_decompress_first_cache(bits_max_val, maxval_cache, maxval_container, &
770                                               memory_parameter%actual_memory_usage, .FALSE.)
771               DO i = 1, 64
772                  CALL hfx_decompress_first_cache(i, integral_caches(i), integral_containers(i), &
773                                                  memory_parameter%actual_memory_usage, .FALSE.)
774               END DO
775            END DO
776         END IF
777      END IF
778
779!$OMP BARRIER
780!$OMP MASTER
781      IF (do_dynamic_load_balancing) THEN
782         ! ** Lets construct the task list
783         shm_total_bins = 0
784         DO i = 1, n_threads
785            shm_total_bins = shm_total_bins + SIZE(qs_env%x_data(irep, i)%distribution_forces)
786         END DO
787         ALLOCATE (tmp_task_list(shm_total_bins))
788         shm_task_counter = 0
789         DO i = 1, n_threads
790            DO bin = 1, SIZE(qs_env%x_data(irep, i)%distribution_forces)
791               shm_task_counter = shm_task_counter + 1
792               tmp_task_list(shm_task_counter)%thread_id = i
793               tmp_task_list(shm_task_counter)%bin_id = bin
794               tmp_task_list(shm_task_counter)%cost = qs_env%x_data(irep, i)%distribution_forces(bin)%cost
795            END DO
796         END DO
797
798         ! ** Now sort the task list
799         ALLOCATE (tmp_task_list_cost(shm_total_bins))
800         ALLOCATE (tmp_index(shm_total_bins))
801
802         DO i = 1, shm_total_bins
803            tmp_task_list_cost(i) = tmp_task_list(i)%cost
804         END DO
805
806         CALL sort(tmp_task_list_cost, shm_total_bins, tmp_index)
807
808         ALLOCATE (actual_x_data%task_list(shm_total_bins))
809
810         DO i = 1, shm_total_bins
811            actual_x_data%task_list(i) = tmp_task_list(tmp_index(shm_total_bins - i + 1))
812         END DO
813
814         shm_task_list => actual_x_data%task_list
815         shm_task_counter = 0
816
817         DEALLOCATE (tmp_task_list_cost, tmp_index, tmp_task_list)
818      END IF
819
820      !! precalculate maximum density matrix elements in blocks
821      shm_pmax_block => actual_x_data%pmax_block
822      actual_x_data%pmax_block = 0.0_dp
823      IF (screen_pmat_forces) THEN
824         DO iatom_block = 1, SIZE(actual_x_data%blocks)
825            iatom_start = actual_x_data%blocks(iatom_block)%istart
826            iatom_end = actual_x_data%blocks(iatom_block)%iend
827            DO jatom_block = 1, SIZE(actual_x_data%blocks)
828               jatom_start = actual_x_data%blocks(jatom_block)%istart
829               jatom_end = actual_x_data%blocks(jatom_block)%iend
830               shm_pmax_block(iatom_block, jatom_block) = MAXVAL(shm_pmax_atom(iatom_start:iatom_end, jatom_start:jatom_end))
831            END DO
832         END DO
833      END IF
834      shm_atomic_pair_list => actual_x_data%atomic_pair_list_forces
835      IF (actual_x_data%memory_parameter%recalc_forces) THEN
836         CALL build_atomic_pair_list(natom, shm_atomic_pair_list, kind_of, basis_parameter, particle_set, &
837                                     do_periodic, screen_coeffs_kind, coeffs_kind_max0, log10_eps_schwarz, cell, &
838                                     actual_x_data%blocks)
839      END IF
840
841      my_bin_size = SIZE(actual_x_data%distribution_forces)
842      DO bin = 1, my_bin_size
843         actual_x_data%distribution_forces(bin)%time_forces = 0.0_dp
844      ENDDO
845!$OMP END MASTER
846!$OMP BARRIER
847
848      IF (treat_forces_in_core) THEN
849         IF (my_bin_size > 0) THEN
850            maxval_container => actual_x_data%maxval_container_forces(1)
851            maxval_cache => actual_x_data%maxval_cache_forces(1)
852
853            integral_containers => actual_x_data%integral_containers_forces(:, 1)
854            integral_caches => actual_x_data%integral_caches_forces(:, 1)
855         END IF
856      END IF
857
858      nblocks = load_balance_parameter%nblocks
859
860      bins_left = .TRUE.
861      do_it = .TRUE.
862      bin = 0
863      DO WHILE (bins_left)
864         IF (.NOT. do_dynamic_load_balancing) THEN
865            bin = bin + 1
866            IF (bin > my_bin_size) THEN
867               do_it = .FALSE.
868               bins_left = .FALSE.
869            ELSE
870               do_it = .TRUE.
871               bins_left = .TRUE.
872               distribution_forces => actual_x_data%distribution_forces(bin)
873            END IF
874         ELSE
875!$OMP CRITICAL(hfxderivatives_critical)
876            shm_task_counter = shm_task_counter + 1
877            IF (shm_task_counter <= shm_total_bins) THEN
878               my_thread_id = shm_task_list(shm_task_counter)%thread_id
879               my_bin_id = shm_task_list(shm_task_counter)%bin_id
880               IF (treat_forces_in_core) THEN
881                  maxval_cache => qs_env%x_data(irep, my_thread_id)%maxval_cache_forces(my_bin_id)
882                  maxval_container => qs_env%x_data(irep, my_thread_id)%maxval_container_forces(my_bin_id)
883                  integral_caches => qs_env%x_data(irep, my_thread_id)%integral_caches_forces(:, my_bin_id)
884                  integral_containers => qs_env%x_data(irep, my_thread_id)%integral_containers_forces(:, my_bin_id)
885               END IF
886               distribution_forces => qs_env%x_data(irep, my_thread_id)%distribution_forces(my_bin_id)
887               do_it = .TRUE.
888               bins_left = .TRUE.
889               IF (actual_x_data%memory_parameter%recalc_forces) THEN
890                  distribution_forces%ram_counter = HUGE(distribution_forces%ram_counter)
891               END IF
892               counter = 0_Int_8
893            ELSE
894               do_it = .FALSE.
895               bins_left = .FALSE.
896            END IF
897!$OMP END CRITICAL(hfxderivatives_critical)
898         END IF
899
900         IF (.NOT. do_it) CYCLE
901!$OMP MASTER
902         CALL timeset(routineN//"_bin", handle_bin)
903!$OMP END MASTER
904         bintime_start = m_walltime()
905         my_istart = distribution_forces%istart
906         my_current_counter = 0
907         IF (distribution_forces%number_of_atom_quartets == 0 .OR. &
908             my_istart == -1_int_8) my_istart = nblocks**4
909         atomic_blocks: DO atom_block = my_istart, nblocks**4 - 1, n_processes
910            latom_block = INT(MODULO(atom_block, nblocks)) + 1
911            tmp_block = atom_block/nblocks
912            katom_block = INT(MODULO(tmp_block, nblocks)) + 1
913            IF (latom_block < katom_block) CYCLE
914            tmp_block = tmp_block/nblocks
915            jatom_block = INT(MODULO(tmp_block, nblocks)) + 1
916            tmp_block = tmp_block/nblocks
917            iatom_block = INT(MODULO(tmp_block, nblocks)) + 1
918            IF (jatom_block < iatom_block) CYCLE
919            my_current_counter = my_current_counter + 1
920            IF (my_current_counter > distribution_forces%number_of_atom_quartets) EXIT atomic_blocks
921
922            iatom_start = actual_x_data%blocks(iatom_block)%istart
923            iatom_end = actual_x_data%blocks(iatom_block)%iend
924            jatom_start = actual_x_data%blocks(jatom_block)%istart
925            jatom_end = actual_x_data%blocks(jatom_block)%iend
926            katom_start = actual_x_data%blocks(katom_block)%istart
927            katom_end = actual_x_data%blocks(katom_block)%iend
928            latom_start = actual_x_data%blocks(latom_block)%istart
929            latom_end = actual_x_data%blocks(latom_block)%iend
930
931            pmax_blocks = MAX(shm_pmax_block(katom_block, iatom_block) + &
932                              shm_pmax_block(latom_block, jatom_block), &
933                              shm_pmax_block(latom_block, iatom_block) + &
934                              shm_pmax_block(katom_block, jatom_block))
935
936            IF (2.0_dp*coeffs_kind_max0 + pmax_blocks < log10_eps_schwarz) CYCLE
937
938            CALL build_pair_list(natom, list_ij, set_list_ij, iatom_start, iatom_end, &
939                                 jatom_start, jatom_end, &
940                                 kind_of, basis_parameter, particle_set, &
941                                 do_periodic, screen_coeffs_set, screen_coeffs_kind, coeffs_kind_max0, &
942                                 log10_eps_schwarz, cell, pmax_blocks, shm_atomic_pair_list)
943
944            CALL build_pair_list(natom, list_kl, set_list_kl, katom_start, katom_end, &
945                                 latom_start, latom_end, &
946                                 kind_of, basis_parameter, particle_set, &
947                                 do_periodic, screen_coeffs_set, screen_coeffs_kind, coeffs_kind_max0, &
948                                 log10_eps_schwarz, cell, pmax_blocks, shm_atomic_pair_list)
949
950            DO i_list_ij = 1, list_ij%n_element
951               iatom = list_ij%elements(i_list_ij)%pair(1)
952               jatom = list_ij%elements(i_list_ij)%pair(2)
953               i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
954               i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
955               ikind = list_ij%elements(i_list_ij)%kind_pair(1)
956               jkind = list_ij%elements(i_list_ij)%kind_pair(2)
957               ra = list_ij%elements(i_list_ij)%r1
958               rb = list_ij%elements(i_list_ij)%r2
959               rab2 = list_ij%elements(i_list_ij)%dist2
960
961               la_max => basis_parameter(ikind)%lmax
962               la_min => basis_parameter(ikind)%lmin
963               npgfa => basis_parameter(ikind)%npgf
964               nseta = basis_parameter(ikind)%nset
965               zeta => basis_parameter(ikind)%zet
966               nsgfa => basis_parameter(ikind)%nsgf
967               sphi_a_ext => basis_parameter(ikind)%sphi_ext(:, :, :, :)
968               nsgfl_a => basis_parameter(ikind)%nsgfl
969               sphi_a_u1 = UBOUND(sphi_a_ext, 1)
970               sphi_a_u2 = UBOUND(sphi_a_ext, 2)
971               sphi_a_u3 = UBOUND(sphi_a_ext, 3)
972
973               lb_max => basis_parameter(jkind)%lmax
974               lb_min => basis_parameter(jkind)%lmin
975               npgfb => basis_parameter(jkind)%npgf
976               nsetb = basis_parameter(jkind)%nset
977               zetb => basis_parameter(jkind)%zet
978               nsgfb => basis_parameter(jkind)%nsgf
979               sphi_b_ext => basis_parameter(jkind)%sphi_ext(:, :, :, :)
980               nsgfl_b => basis_parameter(jkind)%nsgfl
981               sphi_b_u1 = UBOUND(sphi_b_ext, 1)
982               sphi_b_u2 = UBOUND(sphi_b_ext, 2)
983               sphi_b_u3 = UBOUND(sphi_b_ext, 3)
984
985               i_atom = atom_of_kind(iatom)
986               forces_map(1, 1) = ikind
987               forces_map(1, 2) = i_atom
988               j_atom = atom_of_kind(jatom)
989               forces_map(2, 1) = jkind
990               forces_map(2, 2) = j_atom
991
992               DO i_list_kl = 1, list_kl%n_element
993                  katom = list_kl%elements(i_list_kl)%pair(1)
994                  latom = list_kl%elements(i_list_kl)%pair(2)
995
996                  !All four centers equivalent => zero-contribution
997!VIIRAL            IF((iatom==jatom .AND. iatom==katom .AND. iatom==latom)) CYCLE
998                  IF (.NOT. (katom + latom <= iatom + jatom)) CYCLE
999                  IF (((iatom + jatom) .EQ. (katom + latom)) .AND. (katom < iatom)) CYCLE
1000
1001                  i_set_list_kl_start = list_kl%elements(i_list_kl)%set_bounds(1)
1002                  i_set_list_kl_stop = list_kl%elements(i_list_kl)%set_bounds(2)
1003                  kkind = list_kl%elements(i_list_kl)%kind_pair(1)
1004                  lkind = list_kl%elements(i_list_kl)%kind_pair(2)
1005                  rc = list_kl%elements(i_list_kl)%r1
1006                  rd = list_kl%elements(i_list_kl)%r2
1007                  rcd2 = list_kl%elements(i_list_kl)%dist2
1008
1009                  IF (screen_pmat_forces) THEN
1010                     pmax_atom = MAX(shm_pmax_atom(katom, iatom) + &
1011                                     shm_pmax_atom(latom, jatom), &
1012                                     shm_pmax_atom(latom, iatom) + &
1013                                     shm_pmax_atom(katom, jatom))
1014                  ELSE
1015                     pmax_atom = 0.0_dp
1016                  END IF
1017
1018                  IF ((screen_coeffs_kind(jkind, ikind)%x(1)*rab2 + &
1019                       screen_coeffs_kind(jkind, ikind)%x(2)) + &
1020                      (screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
1021                       screen_coeffs_kind(lkind, kkind)%x(2)) + pmax_atom < log10_eps_schwarz) CYCLE
1022
1023                  IF (.NOT. (shm_is_assoc_atomic_block(latom, iatom) >= 1 .AND. &
1024                             shm_is_assoc_atomic_block(katom, iatom) >= 1 .AND. &
1025                             shm_is_assoc_atomic_block(katom, jatom) >= 1 .AND. &
1026                             shm_is_assoc_atomic_block(latom, jatom) >= 1)) CYCLE
1027                  k_atom = atom_of_kind(katom)
1028                  forces_map(3, 1) = kkind
1029                  forces_map(3, 2) = k_atom
1030
1031                  l_atom = atom_of_kind(latom)
1032                  forces_map(4, 1) = lkind
1033                  forces_map(4, 2) = l_atom
1034
1035                  IF (nspins == 1) THEN
1036                     fac = 0.25_dp*hf_fraction
1037                  ELSE
1038                     fac = 0.5_dp*hf_fraction
1039                  END IF
1040                  !calculate symmetry_factor
1041                  symm_fac = 0.25_dp
1042                  IF (iatom == jatom) symm_fac = symm_fac*2.0_dp
1043                  IF (katom == latom) symm_fac = symm_fac*2.0_dp
1044                  IF (iatom == katom .AND. jatom == latom .AND. &
1045                      iatom /= jatom .AND. katom /= latom) symm_fac = symm_fac*2.0_dp
1046                  IF (iatom == katom .AND. iatom == jatom .AND. &
1047                      katom == latom) symm_fac = symm_fac*2.0_dp
1048
1049                  symm_fac = 1.0_dp/symm_fac
1050                  fac = fac*symm_fac
1051
1052                  lc_max => basis_parameter(kkind)%lmax
1053                  lc_min => basis_parameter(kkind)%lmin
1054                  npgfc => basis_parameter(kkind)%npgf
1055                  zetc => basis_parameter(kkind)%zet
1056                  nsgfc => basis_parameter(kkind)%nsgf
1057                  sphi_c_ext => basis_parameter(kkind)%sphi_ext(:, :, :, :)
1058                  nsgfl_c => basis_parameter(kkind)%nsgfl
1059                  sphi_c_u1 = UBOUND(sphi_c_ext, 1)
1060                  sphi_c_u2 = UBOUND(sphi_c_ext, 2)
1061                  sphi_c_u3 = UBOUND(sphi_c_ext, 3)
1062
1063                  ld_max => basis_parameter(lkind)%lmax
1064                  ld_min => basis_parameter(lkind)%lmin
1065                  npgfd => basis_parameter(lkind)%npgf
1066                  zetd => basis_parameter(lkind)%zet
1067                  nsgfd => basis_parameter(lkind)%nsgf
1068                  sphi_d_ext => basis_parameter(lkind)%sphi_ext(:, :, :, :)
1069                  nsgfl_d => basis_parameter(lkind)%nsgfl
1070                  sphi_d_u1 = UBOUND(sphi_d_ext, 1)
1071                  sphi_d_u2 = UBOUND(sphi_d_ext, 2)
1072                  sphi_d_u3 = UBOUND(sphi_d_ext, 3)
1073
1074                  atomic_offset_bd = shm_atomic_block_offset(jatom, latom)
1075                  atomic_offset_bc = shm_atomic_block_offset(jatom, katom)
1076                  atomic_offset_ad = shm_atomic_block_offset(iatom, latom)
1077                  atomic_offset_ac = shm_atomic_block_offset(iatom, katom)
1078
1079                  IF (jatom < latom) THEN
1080                     offset_bd_set => shm_set_offset(:, :, lkind, jkind)
1081                  ELSE
1082                     offset_bd_set => shm_set_offset(:, :, jkind, lkind)
1083                  END IF
1084                  IF (jatom < katom) THEN
1085                     offset_bc_set => shm_set_offset(:, :, kkind, jkind)
1086                  ELSE
1087                     offset_bc_set => shm_set_offset(:, :, jkind, kkind)
1088                  END IF
1089                  IF (iatom < latom) THEN
1090                     offset_ad_set => shm_set_offset(:, :, lkind, ikind)
1091                  ELSE
1092                     offset_ad_set => shm_set_offset(:, :, ikind, lkind)
1093                  END IF
1094                  IF (iatom < katom) THEN
1095                     offset_ac_set => shm_set_offset(:, :, kkind, ikind)
1096                  ELSE
1097                     offset_ac_set => shm_set_offset(:, :, ikind, kkind)
1098                  END IF
1099
1100                  IF (screen_pmat_forces) THEN
1101                     swap_id = 16
1102                     kind_kind_idx = INT(get_1D_idx(kkind, ikind, INT(nkind, int_8)))
1103                     IF (ikind >= kkind) THEN
1104                        ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1105                                                                       actual_x_data%map_atom_to_kind_atom(katom), &
1106                                                                       actual_x_data%map_atom_to_kind_atom(iatom))
1107                     ELSE
1108                        ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1109                                                                       actual_x_data%map_atom_to_kind_atom(iatom), &
1110                                                                       actual_x_data%map_atom_to_kind_atom(katom))
1111                        swap_id = swap_id + 1
1112                     END IF
1113                     kind_kind_idx = INT(get_1D_idx(lkind, jkind, INT(nkind, int_8)))
1114                     IF (jkind >= lkind) THEN
1115                        ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1116                                                                       actual_x_data%map_atom_to_kind_atom(latom), &
1117                                                                       actual_x_data%map_atom_to_kind_atom(jatom))
1118                     ELSE
1119                        ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1120                                                                       actual_x_data%map_atom_to_kind_atom(jatom), &
1121                                                                       actual_x_data%map_atom_to_kind_atom(latom))
1122                        swap_id = swap_id + 2
1123                     END IF
1124                     kind_kind_idx = INT(get_1D_idx(lkind, ikind, INT(nkind, int_8)))
1125                     IF (ikind >= lkind) THEN
1126                        ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1127                                                                       actual_x_data%map_atom_to_kind_atom(latom), &
1128                                                                       actual_x_data%map_atom_to_kind_atom(iatom))
1129                     ELSE
1130                        ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1131                                                                       actual_x_data%map_atom_to_kind_atom(iatom), &
1132                                                                       actual_x_data%map_atom_to_kind_atom(latom))
1133                        swap_id = swap_id + 4
1134                     END IF
1135                     kind_kind_idx = INT(get_1D_idx(kkind, jkind, INT(nkind, int_8)))
1136                     IF (jkind >= kkind) THEN
1137                        ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1138                                                                       actual_x_data%map_atom_to_kind_atom(katom), &
1139                                                                       actual_x_data%map_atom_to_kind_atom(jatom))
1140                     ELSE
1141                        ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1142                                                                       actual_x_data%map_atom_to_kind_atom(jatom), &
1143                                                                       actual_x_data%map_atom_to_kind_atom(katom))
1144                        swap_id = swap_id + 8
1145                     END IF
1146                  END IF
1147
1148                  !! At this stage, check for memory used in compression
1149
1150                  IF (actual_x_data%memory_parameter%recalc_forces) THEN
1151                     IF (treat_forces_in_core) THEN
1152                        ! ** We know the maximum amount of integrals that we can store per MPI-process
1153                        ! ** Now we need to sum the current memory usage among all openMP threads
1154                        ! ** We can just read what is currently stored on the corresponding x_data type
1155                        ! ** If this is thread i and it tries to read the data from thread j, that is
1156                        ! ** currently writing that data, we just dont care, because the possible error
1157                        ! ** is of the order of CACHE_SIZE
1158                        mem_compression_counter = 0
1159                        DO i = 1, n_threads
1160!$OMP                   ATOMIC READ
1161                           tmp_i4 = qs_env%x_data(irep, i)%memory_parameter%actual_memory_usage
1162                           mem_compression_counter = mem_compression_counter + &
1163                                                     tmp_i4*memory_parameter%cache_size
1164                        END DO
1165                        IF (mem_compression_counter + memory_parameter%final_comp_counter_energy &
1166                            > memory_parameter%max_compression_counter) THEN
1167                           buffer_overflow = .TRUE.
1168                           IF (do_dynamic_load_balancing) THEN
1169                              distribution_forces%ram_counter = counter
1170                           ELSE
1171                              memory_parameter%ram_counter_forces = counter
1172                           END IF
1173                        ELSE
1174                           counter = counter + 1
1175                           buffer_overflow = .FALSE.
1176                        END IF
1177                     END IF
1178                  ELSE
1179                     IF (treat_forces_in_core) THEN
1180                        IF (do_dynamic_load_balancing) THEN
1181                           IF (distribution_forces%ram_counter == counter) THEN
1182                              buffer_overflow = .TRUE.
1183                           ELSE
1184                              counter = counter + 1
1185                              buffer_overflow = .FALSE.
1186                           END IF
1187                        ELSE
1188                           IF (memory_parameter%ram_counter_forces == counter) THEN
1189                              buffer_overflow = .TRUE.
1190                           ELSE
1191                              counter = counter + 1
1192                              buffer_overflow = .FALSE.
1193                           END IF
1194                        END IF
1195                     END IF
1196                  END IF
1197
1198                  DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
1199                     iset = set_list_ij(i_set_list_ij)%pair(1)
1200                     jset = set_list_ij(i_set_list_ij)%pair(2)
1201
1202                     ncob = npgfb(jset)*ncoset(lb_max(jset))
1203                     max_val1 = screen_coeffs_set(jset, iset, jkind, ikind)%x(1)*rab2 + &
1204                                screen_coeffs_set(jset, iset, jkind, ikind)%x(2)
1205                     !! Near field screening
1206                     IF (max_val1 + (screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
1207                                     screen_coeffs_kind(lkind, kkind)%x(2)) + pmax_atom < log10_eps_schwarz) CYCLE
1208                     sphi_a_ext_set => sphi_a_ext(:, :, :, iset)
1209                     sphi_b_ext_set => sphi_b_ext(:, :, :, jset)
1210
1211                     DO i_set_list_kl = i_set_list_kl_start, i_set_list_kl_stop
1212                        kset = set_list_kl(i_set_list_kl)%pair(1)
1213                        lset = set_list_kl(i_set_list_kl)%pair(2)
1214
1215                        max_val2_set = (screen_coeffs_set(lset, kset, lkind, kkind)%x(1)*rcd2 + &
1216                                        screen_coeffs_set(lset, kset, lkind, kkind)%x(2))
1217                        max_val2 = max_val1 + max_val2_set
1218
1219                        !! Near field screening
1220                        IF (max_val2 + pmax_atom < log10_eps_schwarz) CYCLE
1221                        sphi_c_ext_set => sphi_c_ext(:, :, :, kset)
1222                        sphi_d_ext_set => sphi_d_ext(:, :, :, lset)
1223
1224                        IF (screen_pmat_forces) THEN
1225                           CALL get_pmax_val(ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, &
1226                                             iset, jset, kset, lset, &
1227                                             pmax_tmp, swap_id)
1228
1229                           log10_pmax = log_2 + pmax_tmp
1230                        ELSE
1231                           log10_pmax = 0.0_dp
1232                        END IF
1233
1234                        max_val2 = max_val2 + log10_pmax
1235                        IF (max_val2 < log10_eps_schwarz) CYCLE
1236
1237                        pmax_entry = EXP(log10_pmax*ln_10)
1238                        !! store current number of integrals, update total number and number of integrals in buffer
1239                        current_counter = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)*12
1240                        IF (buffer_overflow) THEN
1241                           neris_onthefly = neris_onthefly + current_counter
1242                        END IF
1243
1244                        !! Get integrals from buffer and update Kohn-Sham matrix
1245                        IF (.NOT. buffer_overflow .AND. .NOT. actual_x_data%memory_parameter%recalc_forces) THEN
1246                           nints = current_counter
1247                           CALL hfx_get_single_cache_element(estimate_to_store_int, 6, &
1248                                                             maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1249                                                             use_disk_storage)
1250                           spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
1251!                           IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
1252                           IF (.NOT. use_virial) THEN
1253                              IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
1254                           END IF
1255                           nbits = EXPONENT(ANINT(spherical_estimate*pmax_entry/eps_storage)) + 1
1256                           buffer_left = nints
1257                           buffer_start = 1
1258                           neris_incore = neris_incore + INT(nints, int_8)
1259                           DO WHILE (buffer_left > 0)
1260                              buffer_size = MIN(buffer_left, cache_size)
1261                              CALL hfx_get_mult_cache_elements(primitive_forces(buffer_start), &
1262                                                               buffer_size, nbits, &
1263                                                               integral_caches(nbits), &
1264                                                               integral_containers(nbits), &
1265                                                               eps_storage, pmax_entry, &
1266                                                               memory_parameter%actual_memory_usage, &
1267                                                               use_disk_storage)
1268                              buffer_left = buffer_left - buffer_size
1269                              buffer_start = buffer_start + buffer_size
1270                           END DO
1271                        END IF
1272                        !! Calculate integrals if we run out of buffer or the geometry did change
1273                        IF (actual_x_data%memory_parameter%recalc_forces .OR. buffer_overflow) THEN
1274                           max_contraction_val = max_contraction(iset, iatom)* &
1275                                                 max_contraction(jset, jatom)* &
1276                                                 max_contraction(kset, katom)* &
1277                                                 max_contraction(lset, latom)* &
1278                                                 pmax_entry
1279                           tmp_R_1 => radii_pgf(:, :, jset, iset, jkind, ikind)
1280                           tmp_R_2 => radii_pgf(:, :, lset, kset, lkind, kkind)
1281                           tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
1282                           tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
1283
1284                           CALL forces4(private_deriv, ra, rb, rc, rd, npgfa(iset), npgfb(jset), &
1285                                        npgfc(kset), npgfd(lset), &
1286                                        la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
1287                                        lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
1288                                        nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1289                                        sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1290                                        sphi_b_u1, sphi_b_u2, sphi_b_u3, &
1291                                        sphi_c_u1, sphi_c_u2, sphi_c_u3, &
1292                                        sphi_d_u1, sphi_d_u2, sphi_d_u3, &
1293                                        zeta(1:npgfa(iset), iset), zetb(1:npgfb(jset), jset), &
1294                                        zetc(1:npgfc(kset), kset), zetd(1:npgfd(lset), lset), &
1295                                        primitive_forces, &
1296                                        potential_parameter, &
1297                                        actual_x_data%neighbor_cells, &
1298                                        screen_coeffs_set(jset, iset, jkind, ikind)%x, &
1299                                        screen_coeffs_set(lset, kset, lkind, kkind)%x, eps_schwarz, &
1300                                        max_contraction_val, cartesian_estimate, cell, neris_tmp, &
1301                                        log10_pmax, log10_eps_schwarz, &
1302                                        tmp_R_1, tmp_R_2, tmp_screen_pgf1, tmp_screen_pgf2, &
1303                                        pgf_list_ij, pgf_list_kl, pgf_product_list, &
1304                                        nsgfl_a(:, iset), nsgfl_b(:, jset), &
1305                                        nsgfl_c(:, kset), nsgfl_d(:, lset), &
1306                                        sphi_a_ext_set, sphi_b_ext_set, sphi_c_ext_set, sphi_d_ext_set, &
1307                                        ede_work, ede_work2, ede_work_forces, &
1308                                        ede_buffer1, ede_buffer2, ede_primitives_tmp, &
1309                                        nimages, do_periodic, use_virial, ede_work_virial, ede_work2_virial, &
1310                                        ede_primitives_tmp_virial, primitive_forces_virial, &
1311                                        cartesian_estimate_virial)
1312
1313                           nints = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)*12
1314                           neris_total = neris_total + nints
1315                           nprim_ints = nprim_ints + neris_tmp
1316!                           IF (cartesian_estimate == 0.0_dp) cartesian_estimate = TINY(cartesian_estimate)
1317!                           estimate_to_store_int = EXPONENT(cartesian_estimate)
1318!                           estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
1319!                           cartesian_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int+1)
1320!                           IF (.NOT. buffer_overflow .AND. actual_x_data%memory_parameter%recalc_forces) THEN
1321!                              IF (cartesian_estimate < eps_schwarz) THEN
1322!                                 CALL hfx_add_single_cache_element( &
1323!                                    estimate_to_store_int, 6, &
1324!                                    maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1325!                                    use_disk_storage, max_val_memory)
1326!                              END IF
1327!                           END IF
1328!
1329!                           IF (.NOT. use_virial) THEN
1330!                              IF (cartesian_estimate < eps_schwarz) CYCLE
1331!                           END IF
1332
1333                           !! Compress the array for storage
1334                           spherical_estimate = 0.0_dp
1335                           DO i = 1, nints
1336                              spherical_estimate = MAX(spherical_estimate, ABS(primitive_forces(i)))
1337                           END DO
1338
1339                           IF (use_virial) THEN
1340                              spherical_estimate_virial = 0.0_dp
1341                              DO i = 1, 3*nints
1342                                 spherical_estimate_virial = MAX(spherical_estimate_virial, ABS(primitive_forces_virial(i)))
1343                              END DO
1344                           END IF
1345
1346                           IF (spherical_estimate == 0.0_dp) spherical_estimate = TINY(spherical_estimate)
1347                           estimate_to_store_int = EXPONENT(spherical_estimate)
1348                           estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
1349                           IF (.NOT. buffer_overflow .AND. actual_x_data%memory_parameter%recalc_forces) THEN
1350                              CALL hfx_add_single_cache_element(estimate_to_store_int, 6, &
1351                                                                maxval_cache, maxval_container, &
1352                                                                memory_parameter%actual_memory_usage, &
1353                                                                use_disk_storage, max_val_memory)
1354                           END IF
1355                           spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
1356                           IF (.NOT. use_virial) THEN
1357                              IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
1358                           END IF
1359                           IF (.NOT. buffer_overflow) THEN
1360                              nbits = EXPONENT(ANINT(spherical_estimate*pmax_entry/eps_storage)) + 1
1361                              buffer_left = nints
1362                              buffer_start = 1
1363!                              neris_incore = neris_incore+nints
1364                              neris_incore = neris_incore + INT(nints, int_8)
1365
1366                              DO WHILE (buffer_left > 0)
1367                                 buffer_size = MIN(buffer_left, CACHE_SIZE)
1368                                 CALL hfx_add_mult_cache_elements(primitive_forces(buffer_start), &
1369                                                                  buffer_size, nbits, &
1370                                                                  integral_caches(nbits), &
1371                                                                  integral_containers(nbits), &
1372                                                                  eps_storage, pmax_entry, &
1373                                                                  memory_parameter%actual_memory_usage, &
1374                                                                  use_disk_storage)
1375                                 buffer_left = buffer_left - buffer_size
1376                                 buffer_start = buffer_start + buffer_size
1377                              END DO
1378                           ELSE
1379                              !! In order to be consistent with in-core part, round all the eris wrt. eps_schwarz
1380                              !! but only if we treat forces in-core
1381                              IF (treat_forces_in_core) THEN
1382                                 DO i = 1, nints
1383                                    primitive_forces(i) = primitive_forces(i)*pmax_entry
1384                                    IF (ABS(primitive_forces(i)) > eps_storage) THEN
1385                                       primitive_forces(i) = ANINT(primitive_forces(i)/eps_storage, dp)*eps_storage/pmax_entry
1386                                    ELSE
1387                                       primitive_forces(i) = 0.0_dp
1388                                    END IF
1389                                 END DO
1390                              END IF
1391                           END IF
1392                        END IF
1393                        IF (with_resp_density) THEN
1394                           CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1395                                                        full_density_alpha(:, 1), pbd_buf, pbc_buf, pad_buf, pac_buf, &
1396                                                        iatom, jatom, katom, latom, &
1397                                                        iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1398                                                        offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1399                                                        atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1400                           CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1401                                                        full_density_resp, pbd_buf_resp, pbc_buf_resp, pad_buf_resp, pac_buf_resp, &
1402                                                        iatom, jatom, katom, latom, &
1403                                                        iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1404                                                        offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1405                                                        atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1406                        ELSE
1407                           CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1408                                                        full_density_alpha(:, 1), pbd_buf, pbc_buf, pad_buf, pac_buf, &
1409                                                        iatom, jatom, katom, latom, &
1410                                                        iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1411                                                        offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1412                                                        atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1413                        END IF
1414                        IF (nspins == 2) THEN
1415                           IF (with_resp_density) THEN
1416                              CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1417                                                           full_density_beta(:, 1), pbd_buf_beta, pbc_buf_beta, &
1418                                                           pad_buf_beta, pac_buf_beta, iatom, jatom, katom, latom, &
1419                                                           iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1420                                                           offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1421                                                           atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1422                              CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1423                                                           full_density_resp_beta, pbd_buf_resp_beta, pbc_buf_resp_beta, &
1424                                                           pad_buf_resp_beta, pac_buf_resp_beta, iatom, jatom, katom, latom, &
1425                                                           iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1426                                                           offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1427                                                           atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1428                           ELSE
1429                              CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1430                                                           full_density_beta(:, 1), pbd_buf_beta, pbc_buf_beta, pad_buf_beta, &
1431                                                           pac_buf_beta, iatom, jatom, katom, latom, &
1432                                                           iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1433                                                           offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1434                                                           atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1435                           ENDIF
1436                        END IF
1437                        IF (spherical_estimate*pmax_entry >= eps_schwarz) THEN
1438                           DO coord = 1, 12
1439                              T2 => primitive_forces((coord - 1)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset) + 1: &
1440                                                     coord*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset))
1441
1442                              IF (with_resp_density) THEN
1443                                 CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1444                                                    pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
1445                                                    T2, force, forces_map, coord, &
1446                                                    pbd_buf_resp, pbc_buf_resp, pad_buf_resp, pac_buf_resp, &
1447                                                    my_resp_only)
1448                              ELSE
1449                                 CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1450                                                    pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
1451                                                    T2, force, forces_map, coord)
1452                              END IF
1453                              IF (nspins == 2) THEN
1454                                 IF (with_resp_density) THEN
1455                                    CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1456                                                       pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
1457                                                       fac, T2, force, forces_map, coord, &
1458                                                       pbd_buf_resp_beta, pbc_buf_resp_beta, &
1459                                                       pad_buf_resp_beta, pac_buf_resp_beta, &
1460                                                       my_resp_only)
1461                                 ELSE
1462                                    CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1463                                                       pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, fac, &
1464                                                       T2, force, forces_map, coord)
1465                                 ENDIF
1466                              END IF
1467                           END DO !coord
1468                        END IF
1469                        IF (use_virial .AND. spherical_estimate_virial*pmax_entry >= eps_schwarz) THEN
1470                           DO coord = 1, 12
1471                              DO i = 1, 3
1472                                 T2 => primitive_forces_virial( &
1473                                       ((i - 1)*12 + coord - 1)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset) + 1: &
1474                                       ((i - 1)*12 + coord)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset))
1475                                 IF (with_resp_density) THEN
1476                                    CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1477                                                       pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
1478                                                       T2, tmp_virial, coord, i, &
1479                                                       pbd_buf_resp, pbc_buf_resp, pad_buf_resp, pac_buf_resp)
1480                                 ELSE
1481                                    CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1482                                                       pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
1483                                                       T2, tmp_virial, coord, i)
1484                                 END IF
1485                                 IF (nspins == 2) THEN
1486                                    IF (with_resp_density) THEN
1487                                       CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1488                                                          pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
1489                                                          fac, T2, tmp_virial, coord, i, &
1490                                                          pbd_buf_resp_beta, pbc_buf_resp_beta, &
1491                                                          pad_buf_resp_beta, pac_buf_resp_beta)
1492                                    ELSE
1493                                       CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1494                                                          pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
1495                                                          fac, T2, tmp_virial, coord, i)
1496                                    ENDIF
1497                                 END IF
1498                              END DO
1499                           END DO !coord
1500                        END IF
1501
1502                     END DO !i_set_list_kl
1503                  END DO !i_set_list_ij
1504               END DO !i_list_kl
1505            END DO !i_list_ij
1506         END DO atomic_blocks
1507         bintime_stop = m_walltime()
1508         distribution_forces%time_forces = bintime_stop - bintime_start
1509!$OMP MASTER
1510         CALL timestop(handle_bin)
1511!$OMP END MASTER
1512      END DO !bin
1513
1514!$OMP MASTER
1515      logger => cp_get_default_logger()
1516      do_print_load_balance_info = .FALSE.
1517      do_print_load_balance_info = BTEST(cp_print_key_should_output(logger%iter_info, hfx_section, &
1518                                                                    "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO"), cp_p_file)
1519!$OMP END MASTER
1520!$OMP BARRIER
1521      IF (do_print_load_balance_info) THEN
1522         iw = -1
1523!$OMP MASTER
1524         iw = cp_print_key_unit_nr(logger, hfx_section, "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO", &
1525                                   extension=".scfLog")
1526!$OMP END MASTER
1527
1528         CALL collect_load_balance_info(para_env, actual_x_data, iw, n_threads, i_thread, &
1529                                        hfx_do_eval_forces)
1530
1531!$OMP MASTER
1532         CALL cp_print_key_finished_output(iw, logger, hfx_section, &
1533                                           "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO")
1534!$OMP END MASTER
1535      END IF
1536
1537      IF (use_virial) THEN
1538         DO i = 1, 3
1539            DO j = 1, 3
1540               DO k = 1, 3
1541!$OMP                 ATOMIC
1542                  virial%pv_fock_4c(i, j) = virial%pv_fock_4c(i, j) + tmp_virial(i, k)*cell%hmat(j, k)
1543               END DO
1544            END DO
1545         END DO
1546      END IF
1547
1548!$OMP BARRIER
1549      !! Get some number about ERIS
1550!$OMP ATOMIC
1551      shm_neris_total = shm_neris_total + neris_total
1552!$OMP ATOMIC
1553      shm_neris_onthefly = shm_neris_onthefly + neris_onthefly
1554!$OMP ATOMIC
1555      shm_nprim_ints = shm_nprim_ints + nprim_ints
1556
1557      storage_counter_integrals = memory_parameter%actual_memory_usage* &
1558                                  memory_parameter%cache_size
1559      stor_count_max_val = max_val_memory*memory_parameter%cache_size
1560!$OMP ATOMIC
1561      shm_storage_counter_integrals = shm_storage_counter_integrals + storage_counter_integrals
1562!$OMP ATOMIC
1563      shm_neris_incore = shm_neris_incore + neris_incore
1564!$OMP ATOMIC
1565      shm_stor_count_max_val = shm_stor_count_max_val + stor_count_max_val
1566!$OMP BARRIER
1567
1568      ! IF(with_resp_density) THEN
1569      !   ! restore original load_balance_parameter
1570      !   actual_x_data%load_balance_parameter = load_balance_parameter_energy
1571      !   DEALLOCATE(load_balance_parameter_energy)
1572      ! END IF
1573
1574!$OMP MASTER
1575      !! Print some memeory information if this is the first step
1576      IF (actual_x_data%memory_parameter%recalc_forces) THEN
1577         tmp_i8(1:6) = (/shm_storage_counter_integrals, shm_neris_onthefly, shm_neris_incore, &
1578                         shm_neris_total, shm_nprim_ints, shm_stor_count_max_val/)
1579         CALL mp_sum(tmp_i8, para_env%group)
1580         shm_storage_counter_integrals = tmp_i8(1)
1581         shm_neris_onthefly = tmp_i8(2)
1582         shm_neris_incore = tmp_i8(3)
1583         shm_neris_total = tmp_i8(4)
1584         shm_nprim_ints = tmp_i8(5)
1585         shm_stor_count_max_val = tmp_i8(6)
1586         mem_eris = (shm_storage_counter_integrals + 128*1024 - 1)/1024/128
1587         compression_factor = REAL(shm_neris_incore, dp)/REAL(shm_storage_counter_integrals, dp)
1588         mem_max_val = (shm_stor_count_max_val + 128*1024 - 1)/1024/128
1589
1590         IF (shm_neris_incore == 0) THEN
1591            mem_eris = 0
1592            compression_factor = 0.0_dp
1593         END IF
1594
1595         logger => cp_get_default_logger()
1596
1597         iw = cp_print_key_unit_nr(logger, hfx_section, "HF_INFO", &
1598                                   extension=".scfLog")
1599         IF (iw > 0) THEN
1600            WRITE (UNIT=iw, FMT="(/,(T3,A,T65,I16))") &
1601               "HFX_MEM_INFO| Number of cart. primitive DERIV's calculated:      ", shm_nprim_ints
1602
1603            WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
1604               "HFX_MEM_INFO| Number of sph. DERIV's calculated:           ", shm_neris_total
1605
1606            WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
1607               "HFX_MEM_INFO| Number of sph. DERIV's stored in-core:        ", shm_neris_incore
1608
1609            WRITE (UNIT=iw, FMT="((T3,A,T62,I19))") &
1610               "HFX_MEM_INFO| Number of sph. DERIV's calculated on the fly: ", shm_neris_onthefly
1611
1612            WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
1613               "HFX_MEM_INFO| Total memory consumption DERIV's RAM [MiB]:            ", mem_eris
1614
1615            WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
1616               "HFX_MEM_INFO| Whereof max-vals [MiB]:            ", mem_max_val
1617
1618            WRITE (UNIT=iw, FMT="((T3,A,T60,F21.2),/)") &
1619               "HFX_MEM_INFO| Total compression factor DERIV's RAM:                  ", compression_factor
1620         END IF
1621
1622         CALL cp_print_key_finished_output(iw, logger, hfx_section, &
1623                                           "HF_INFO")
1624      END IF
1625!$OMP END MASTER
1626
1627      !! flush caches if the geometry changed
1628      IF (do_dynamic_load_balancing) THEN
1629         my_bin_size = SIZE(actual_x_data%distribution_forces)
1630      ELSE
1631         my_bin_size = 1
1632      END IF
1633
1634      IF (actual_x_data%memory_parameter%recalc_forces) THEN
1635         IF (treat_forces_in_core) THEN
1636            DO bin = 1, my_bin_size
1637               maxval_cache => actual_x_data%maxval_cache_forces(bin)
1638               maxval_container => actual_x_data%maxval_container_forces(bin)
1639               integral_caches => actual_x_data%integral_caches_forces(:, bin)
1640               integral_containers => actual_x_data%integral_containers_forces(:, bin)
1641               CALL hfx_flush_last_cache(bits_max_val, maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1642                                         .FALSE.)
1643               DO i = 1, 64
1644                  CALL hfx_flush_last_cache(i, integral_caches(i), integral_containers(i), &
1645                                            memory_parameter%actual_memory_usage, .FALSE.)
1646               END DO
1647            END DO
1648         END IF
1649      END IF
1650      !! reset all caches except we calculate all on the fly
1651      IF (treat_forces_in_core) THEN
1652         DO bin = 1, my_bin_size
1653            maxval_cache => actual_x_data%maxval_cache_forces(bin)
1654            maxval_container => actual_x_data%maxval_container_forces(bin)
1655            integral_caches => actual_x_data%integral_caches_forces(:, bin)
1656            integral_containers => actual_x_data%integral_containers_forces(:, bin)
1657
1658            CALL hfx_reset_cache_and_container(maxval_cache, maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
1659            DO i = 1, 64
1660               CALL hfx_reset_cache_and_container(integral_caches(i), integral_containers(i), &
1661                                                  memory_parameter%actual_memory_usage, &
1662                                                  .FALSE.)
1663            END DO
1664         END DO
1665      END IF
1666
1667      IF (actual_x_data%memory_parameter%recalc_forces) THEN
1668         actual_x_data%memory_parameter%recalc_forces = .FALSE.
1669      END IF
1670
1671!$OMP BARRIER
1672!$OMP MASTER
1673      CALL timestop(handle_main)
1674!$OMP END MASTER
1675!$OMP BARRIER
1676      DEALLOCATE (last_sgf_global)
1677!$OMP MASTER
1678      DEALLOCATE (full_density_alpha)
1679      IF (with_resp_density) THEN
1680         DEALLOCATE (full_density_resp)
1681      END IF
1682      IF (nspins == 2) THEN
1683         DEALLOCATE (full_density_beta)
1684         IF (with_resp_density) THEN
1685            DEALLOCATE (full_density_resp_beta)
1686         END IF
1687      END IF
1688      IF (do_dynamic_load_balancing) THEN
1689         DEALLOCATE (actual_x_data%task_list)
1690      END IF
1691!$OMP END MASTER
1692      DEALLOCATE (primitive_forces)
1693      DEALLOCATE (atom_of_kind, kind_of)
1694      DEALLOCATE (max_contraction)
1695      DEALLOCATE (pbd_buf)
1696      DEALLOCATE (pbc_buf)
1697      DEALLOCATE (pad_buf)
1698      DEALLOCATE (pac_buf)
1699
1700      IF (with_resp_density) THEN
1701         DEALLOCATE (pbd_buf_resp)
1702         DEALLOCATE (pbc_buf_resp)
1703         DEALLOCATE (pad_buf_resp)
1704         DEALLOCATE (pac_buf_resp)
1705      END IF
1706
1707      DO i = 1, max_pgf**2
1708         DEALLOCATE (pgf_list_ij(i)%image_list)
1709         DEALLOCATE (pgf_list_kl(i)%image_list)
1710      END DO
1711
1712      DEALLOCATE (pgf_list_ij)
1713      DEALLOCATE (pgf_list_kl)
1714      DEALLOCATE (pgf_product_list)
1715
1716      DEALLOCATE (set_list_ij, set_list_kl)
1717
1718      DEALLOCATE (primitive_forces_virial)
1719
1720      DEALLOCATE (ede_work, ede_work2, ede_work_forces, ede_buffer1, ede_buffer2, ede_primitives_tmp)
1721
1722      DEALLOCATE (ede_work_virial, ede_work2_virial, ede_primitives_tmp_virial)
1723
1724      DEALLOCATE (nimages)
1725
1726      IF (nspins == 2) THEN
1727         DEALLOCATE (pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta)
1728         IF (with_resp_density) THEN
1729            DEALLOCATE (pbd_buf_resp_beta)
1730            DEALLOCATE (pbc_buf_resp_beta)
1731            DEALLOCATE (pad_buf_resp_beta)
1732            DEALLOCATE (pac_buf_resp_beta)
1733         END IF
1734      END IF
1735
1736      !
1737      ! this wraps to free_libint, but currently causes a segfault
1738      ! as a result, we don't call it, but some memory remains allocated
1739      !
1740      ! CALL terminate_libderiv(private_deriv)
1741
1742!$OMP END PARALLEL
1743
1744      CALL timestop(handle)
1745
1746   END SUBROUTINE derivatives_four_center
1747
1748! **************************************************************************************************
1749!> \brief ...
1750!> \param deriv ...
1751!> \param ra ...
1752!> \param rb ...
1753!> \param rc ...
1754!> \param rd ...
1755!> \param npgfa ...
1756!> \param npgfb ...
1757!> \param npgfc ...
1758!> \param npgfd ...
1759!> \param la_min ...
1760!> \param la_max ...
1761!> \param lb_min ...
1762!> \param lb_max ...
1763!> \param lc_min ...
1764!> \param lc_max ...
1765!> \param ld_min ...
1766!> \param ld_max ...
1767!> \param nsgfa ...
1768!> \param nsgfb ...
1769!> \param nsgfc ...
1770!> \param nsgfd ...
1771!> \param sphi_a_u1 ...
1772!> \param sphi_a_u2 ...
1773!> \param sphi_a_u3 ...
1774!> \param sphi_b_u1 ...
1775!> \param sphi_b_u2 ...
1776!> \param sphi_b_u3 ...
1777!> \param sphi_c_u1 ...
1778!> \param sphi_c_u2 ...
1779!> \param sphi_c_u3 ...
1780!> \param sphi_d_u1 ...
1781!> \param sphi_d_u2 ...
1782!> \param sphi_d_u3 ...
1783!> \param zeta ...
1784!> \param zetb ...
1785!> \param zetc ...
1786!> \param zetd ...
1787!> \param primitive_integrals ...
1788!> \param potential_parameter ...
1789!> \param neighbor_cells ...
1790!> \param screen1 ...
1791!> \param screen2 ...
1792!> \param eps_schwarz ...
1793!> \param max_contraction_val ...
1794!> \param cart_estimate ...
1795!> \param cell ...
1796!> \param neris_tmp ...
1797!> \param log10_pmax ...
1798!> \param log10_eps_schwarz ...
1799!> \param R1_pgf ...
1800!> \param R2_pgf ...
1801!> \param pgf1 ...
1802!> \param pgf2 ...
1803!> \param pgf_list_ij ...
1804!> \param pgf_list_kl ...
1805!> \param pgf_product_list ...
1806!> \param nsgfl_a ...
1807!> \param nsgfl_b ...
1808!> \param nsgfl_c ...
1809!> \param nsgfl_d ...
1810!> \param sphi_a_ext ...
1811!> \param sphi_b_ext ...
1812!> \param sphi_c_ext ...
1813!> \param sphi_d_ext ...
1814!> \param ede_work ...
1815!> \param ede_work2 ...
1816!> \param ede_work_forces ...
1817!> \param ede_buffer1 ...
1818!> \param ede_buffer2 ...
1819!> \param ede_primitives_tmp ...
1820!> \param nimages ...
1821!> \param do_periodic ...
1822!> \param use_virial ...
1823!> \param ede_work_virial ...
1824!> \param ede_work2_virial ...
1825!> \param ede_primitives_tmp_virial ...
1826!> \param primitive_integrals_virial ...
1827!> \param cart_estimate_virial ...
1828! **************************************************************************************************
1829   SUBROUTINE forces4(deriv, ra, rb, rc, rd, npgfa, npgfb, npgfc, npgfd, &
1830                      la_min, la_max, lb_min, lb_max, &
1831                      lc_min, lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, &
1832                      sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1833                      sphi_b_u1, sphi_b_u2, sphi_b_u3, &
1834                      sphi_c_u1, sphi_c_u2, sphi_c_u3, &
1835                      sphi_d_u1, sphi_d_u2, sphi_d_u3, &
1836                      zeta, zetb, zetc, zetd, &
1837                      primitive_integrals, &
1838                      potential_parameter, neighbor_cells, &
1839                      screen1, screen2, eps_schwarz, max_contraction_val, &
1840                      cart_estimate, cell, neris_tmp, log10_pmax, &
1841                      log10_eps_schwarz, R1_pgf, R2_pgf, pgf1, pgf2, &
1842                      pgf_list_ij, pgf_list_kl, &
1843                      pgf_product_list, &
1844                      nsgfl_a, nsgfl_b, nsgfl_c, &
1845                      nsgfl_d, &
1846                      sphi_a_ext, sphi_b_ext, sphi_c_ext, sphi_d_ext, &
1847                      ede_work, ede_work2, ede_work_forces, &
1848                      ede_buffer1, ede_buffer2, ede_primitives_tmp, &
1849                      nimages, do_periodic, use_virial, ede_work_virial, &
1850                      ede_work2_virial, ede_primitives_tmp_virial, primitive_integrals_virial, &
1851                      cart_estimate_virial)
1852
1853      TYPE(cp_libint_t)                                  :: deriv
1854      REAL(dp), INTENT(IN)                               :: ra(3), rb(3), rc(3), rd(3)
1855      INTEGER, INTENT(IN) :: npgfa, npgfb, npgfc, npgfd, la_min, la_max, lb_min, lb_max, lc_min, &
1856         lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1857         sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, sphi_d_u2, &
1858         sphi_d_u3
1859      REAL(dp), DIMENSION(1:npgfa), INTENT(IN)           :: zeta
1860      REAL(dp), DIMENSION(1:npgfb), INTENT(IN)           :: zetb
1861      REAL(dp), DIMENSION(1:npgfc), INTENT(IN)           :: zetc
1862      REAL(dp), DIMENSION(1:npgfd), INTENT(IN)           :: zetd
1863      REAL(dp), &
1864         DIMENSION(nsgfa, nsgfb, nsgfc, nsgfd, 12)       :: primitive_integrals
1865      TYPE(hfx_potential_type)                           :: potential_parameter
1866      TYPE(hfx_cell_type), DIMENSION(:), POINTER         :: neighbor_cells
1867      REAL(dp), INTENT(IN)                               :: screen1(2), screen2(2), eps_schwarz, &
1868                                                            max_contraction_val
1869      REAL(dp)                                           :: cart_estimate
1870      TYPE(cell_type), POINTER                           :: cell
1871      INTEGER(int_8)                                     :: neris_tmp
1872      REAL(dp), INTENT(IN)                               :: log10_pmax, log10_eps_schwarz
1873      TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
1874         POINTER                                         :: R1_pgf, R2_pgf, pgf1, pgf2
1875      TYPE(hfx_pgf_list), DIMENSION(*)                   :: pgf_list_ij, pgf_list_kl
1876      TYPE(hfx_pgf_product_list), ALLOCATABLE, &
1877         DIMENSION(:), INTENT(INOUT)                     :: pgf_product_list
1878      INTEGER, DIMENSION(0:), INTENT(IN)                 :: nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d
1879      REAL(dp), INTENT(IN) :: sphi_a_ext(sphi_a_u1, sphi_a_u2, sphi_a_u3), &
1880         sphi_b_ext(sphi_b_u1, sphi_b_u2, sphi_b_u3), sphi_c_ext(sphi_c_u1, sphi_c_u2, sphi_c_u3), &
1881         sphi_d_ext(sphi_d_u1, sphi_d_u2, sphi_d_u3)
1882      REAL(dp), DIMENSION(*)                             :: ede_work, ede_work2, ede_work_forces, &
1883                                                            ede_buffer1, ede_buffer2, &
1884                                                            ede_primitives_tmp
1885      INTEGER, DIMENSION(*)                              :: nimages
1886      LOGICAL, INTENT(IN)                                :: do_periodic, use_virial
1887      REAL(dp), DIMENSION(*)                             :: ede_work_virial, ede_work2_virial, &
1888                                                            ede_primitives_tmp_virial
1889      REAL(dp), &
1890         DIMENSION(nsgfa, nsgfb, nsgfc, nsgfd, 12, 3)    :: primitive_integrals_virial
1891      REAL(dp)                                           :: cart_estimate_virial
1892
1893      INTEGER :: ipgf, jpgf, kpgf, la, lb, lc, ld, list_ij, list_kl, lpgf, max_l, ncoa, ncob, &
1894         ncoc, ncod, nelements_ij, nelements_kl, nproducts, nsgfla, nsgflb, nsgflc, nsgfld, nsoa, &
1895         nsob, nsoc, nsod, s_offset_a, s_offset_b, s_offset_c, s_offset_d
1896      REAL(dp)                                           :: EtaInv, tmp_max, tmp_max_virial, Zeta_A, &
1897                                                            Zeta_B, Zeta_C, Zeta_D, ZetaInv
1898
1899      CALL build_pair_list_pgf(npgfa, npgfb, pgf_list_ij, zeta, zetb, screen1, screen2, &
1900                               pgf1, R1_pgf, log10_pmax, log10_eps_schwarz, ra, rb, &
1901                               nelements_ij, &
1902                               neighbor_cells, nimages, do_periodic)
1903      CALL build_pair_list_pgf(npgfc, npgfd, pgf_list_kl, zetc, zetd, screen2, screen1, &
1904                               pgf2, R2_pgf, log10_pmax, log10_eps_schwarz, rc, rd, &
1905                               nelements_kl, &
1906                               neighbor_cells, nimages, do_periodic)
1907
1908      cart_estimate = 0.0_dp
1909      primitive_integrals = 0.0_dp
1910      IF (use_virial) THEN
1911         primitive_integrals_virial = 0.0_dp
1912         cart_estimate_virial = 0.0_dp
1913      END IF
1914      neris_tmp = 0_int_8
1915      max_l = la_max + lb_max + lc_max + ld_max + 1
1916      DO list_ij = 1, nelements_ij
1917         Zeta_A = pgf_list_ij(list_ij)%zeta
1918         Zeta_B = pgf_list_ij(list_ij)%zetb
1919         ZetaInv = pgf_list_ij(list_ij)%ZetaInv
1920         ipgf = pgf_list_ij(list_ij)%ipgf
1921         jpgf = pgf_list_ij(list_ij)%jpgf
1922
1923         DO list_kl = 1, nelements_kl
1924            Zeta_C = pgf_list_kl(list_kl)%zeta
1925            Zeta_D = pgf_list_kl(list_kl)%zetb
1926            EtaInv = pgf_list_kl(list_kl)%ZetaInv
1927            kpgf = pgf_list_kl(list_kl)%ipgf
1928            lpgf = pgf_list_kl(list_kl)%jpgf
1929
1930            CALL build_pgf_product_list(pgf_list_ij(list_ij), pgf_list_kl(list_kl), pgf_product_list, &
1931                                        nproducts, log10_pmax, log10_eps_schwarz, neighbor_cells, cell, &
1932                                        potential_parameter, max_l, do_periodic)
1933            s_offset_a = 0
1934            DO la = la_min, la_max
1935               s_offset_b = 0
1936               ncoa = nco(la)
1937               nsgfla = nsgfl_a(la)
1938               nsoa = nso(la)
1939
1940               DO lb = lb_min, lb_max
1941                  s_offset_c = 0
1942                  ncob = nco(lb)
1943                  nsgflb = nsgfl_b(lb)
1944                  nsob = nso(lb)
1945
1946                  DO lc = lc_min, lc_max
1947                     s_offset_d = 0
1948                     ncoc = nco(lc)
1949                     nsgflc = nsgfl_c(lc)
1950                     nsoc = nso(lc)
1951
1952                     DO ld = ld_min, ld_max
1953                        ncod = nco(ld)
1954                        nsgfld = nsgfl_d(ld)
1955                        nsod = nso(ld)
1956                        tmp_max = 0.0_dp
1957                        tmp_max_virial = 0.0_dp
1958                        CALL evaluate_deriv_eri(deriv, nproducts, pgf_product_list, &
1959                                                la, lb, lc, ld, &
1960                                                ncoa, ncob, ncoc, ncod, &
1961                                                nsgfa, nsgfb, nsgfc, nsgfd, &
1962                                                primitive_integrals, &
1963                                                max_contraction_val, tmp_max, eps_schwarz, neris_tmp, &
1964                                                Zeta_A, Zeta_B, Zeta_C, Zeta_D, ZetaInv, EtaInv, &
1965                                                s_offset_a, s_offset_b, s_offset_c, s_offset_d, &
1966                                                nsgfla, nsgflb, nsgflc, nsgfld, nsoa, nsob, nsoc, nsod, &
1967                                                sphi_a_ext(1, la + 1, ipgf), &
1968                                                sphi_b_ext(1, lb + 1, jpgf), &
1969                                                sphi_c_ext(1, lc + 1, kpgf), &
1970                                                sphi_d_ext(1, ld + 1, lpgf), &
1971                                                ede_work, ede_work2, ede_work_forces, &
1972                                                ede_buffer1, ede_buffer2, ede_primitives_tmp, use_virial, &
1973                                                ede_work_virial, ede_work2_virial, ede_primitives_tmp_virial, &
1974                                                primitive_integrals_virial, cell, tmp_max_virial)
1975                        cart_estimate = MAX(tmp_max, cart_estimate)
1976                        IF (use_virial) cart_estimate_virial = MAX(tmp_max_virial, cart_estimate_virial)
1977                        s_offset_d = s_offset_d + nsod*nsgfld
1978                     END DO !ld
1979                     s_offset_c = s_offset_c + nsoc*nsgflc
1980                  END DO !lc
1981                  s_offset_b = s_offset_b + nsob*nsgflb
1982               END DO !lb
1983               s_offset_a = s_offset_a + nsoa*nsgfla
1984            END DO !la
1985         END DO
1986      END DO
1987
1988   END SUBROUTINE forces4
1989
1990! **************************************************************************************************
1991!> \brief Given a 2d index pair, this function returns a 1d index pair for
1992!>        a symmetric upper triangle NxN matrix
1993!>        The compiler should inline this function, therefore it appears in
1994!>        several modules
1995!> \param i 2d index
1996!> \param j 2d index
1997!> \param N matrix size
1998!> \return ...
1999!> \par History
2000!>      03.2009 created [Manuel Guidon]
2001!> \author Manuel Guidon
2002! **************************************************************************************************
2003   PURE FUNCTION get_1D_idx(i, j, N)
2004      INTEGER, INTENT(IN)                                :: i, j
2005      INTEGER(int_8), INTENT(IN)                         :: N
2006      INTEGER(int_8)                                     :: get_1D_idx
2007
2008      INTEGER(int_8)                                     :: min_ij
2009
2010      min_ij = MIN(i, j)
2011      get_1D_idx = min_ij*N + MAX(i, j) - (min_ij - 1)*min_ij/2 - N
2012
2013   END FUNCTION get_1D_idx
2014
2015! **************************************************************************************************
2016!> \brief This routine prefetches density matrix elements, i.e. reshuffles the
2017!>        data in a way that they can be accessed later on in a cache friendly
2018!>        way
2019!> \param ma_max Size of matrix blocks
2020!> \param mb_max Size of matrix blocks
2021!> \param mc_max Size of matrix blocks
2022!> \param md_max Size of matrix blocks
2023!> \param density ...
2024!> \param pbd buffer that will contain P(b,d)
2025!> \param pbc buffer that will contain P(b,c)
2026!> \param pad buffer that will contain P(a,d)
2027!> \param pac buffer that will contain P(a,c)
2028!> \param iatom ...
2029!> \param jatom ...
2030!> \param katom ...
2031!> \param latom ...
2032!> \param iset ...
2033!> \param jset ...
2034!> \param kset ...
2035!> \param lset ...
2036!> \param offset_bd_set ...
2037!> \param offset_bc_set ...
2038!> \param offset_ad_set ...
2039!> \param offset_ac_set ...
2040!> \param atomic_offset_bd ...
2041!> \param atomic_offset_bc ...
2042!> \param atomic_offset_ad ...
2043!> \param atomic_offset_ac ...
2044!> \param anti_symmetric ...
2045!> \par History
2046!>      03.2009 created [Manuel Guidon]
2047!> \author Manuel Guidon
2048! **************************************************************************************************
2049   SUBROUTINE prefetch_density_matrix(ma_max, mb_max, mc_max, md_max, &
2050                                      density, pbd, pbc, pad, pac, &
2051                                      iatom, jatom, katom, latom, &
2052                                      iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
2053                                      offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
2054                                      atomic_offset_ad, atomic_offset_ac, anti_symmetric)
2055
2056      INTEGER, INTENT(IN)                                :: ma_max, mb_max, mc_max, md_max
2057      REAL(dp), DIMENSION(:), INTENT(IN)                 :: density
2058      REAL(dp), DIMENSION(*), INTENT(INOUT)              :: pbd, pbc, pad, pac
2059      INTEGER, INTENT(IN)                                :: iatom, jatom, katom, latom, iset, jset, &
2060                                                            kset, lset
2061      INTEGER, DIMENSION(:, :), POINTER                  :: offset_bd_set, offset_bc_set, &
2062                                                            offset_ad_set, offset_ac_set
2063      INTEGER, INTENT(IN)                                :: atomic_offset_bd, atomic_offset_bc, &
2064                                                            atomic_offset_ad, atomic_offset_ac
2065      LOGICAL                                            :: anti_symmetric
2066
2067      INTEGER                                            :: i, j, ma, mb, mc, md, offset_ac, &
2068                                                            offset_ad, offset_bc, offset_bd
2069      REAL(dp)                                           :: fac
2070
2071      fac = 1.0_dp
2072      IF (anti_symmetric) fac = -1.0_dp
2073      IF (jatom >= latom) THEN
2074         i = 1
2075         offset_bd = offset_bd_set(jset, lset) + atomic_offset_bd - 1
2076         j = offset_bd
2077         DO md = 1, md_max
2078            DO mb = 1, mb_max
2079               pbd(i) = density(j)*fac
2080               i = i + 1
2081               j = j + 1
2082            END DO
2083         END DO
2084      ELSE
2085         i = 1
2086         offset_bd = offset_bd_set(lset, jset) + atomic_offset_bd - 1
2087         DO md = 1, md_max
2088            j = offset_bd + md - 1
2089            DO mb = 1, mb_max
2090               pbd(i) = density(j)
2091               i = i + 1
2092               j = j + md_max
2093            END DO
2094         END DO
2095      END IF
2096      IF (jatom >= katom) THEN
2097         i = 1
2098         offset_bc = offset_bc_set(jset, kset) + atomic_offset_bc - 1
2099         j = offset_bc
2100         DO mc = 1, mc_max
2101            DO mb = 1, mb_max
2102               pbc(i) = density(j)*fac
2103               i = i + 1
2104               j = j + 1
2105            END DO
2106         END DO
2107      ELSE
2108         i = 1
2109         offset_bc = offset_bc_set(kset, jset) + atomic_offset_bc - 1
2110         DO mc = 1, mc_max
2111            j = offset_bc + mc - 1
2112            DO mb = 1, mb_max
2113               pbc(i) = density(j)
2114               i = i + 1
2115               j = j + mc_max
2116            END DO
2117         END DO
2118      END IF
2119      IF (iatom >= latom) THEN
2120         i = 1
2121         offset_ad = offset_ad_set(iset, lset) + atomic_offset_ad - 1
2122         j = offset_ad
2123         DO md = 1, md_max
2124            DO ma = 1, ma_max
2125               pad(i) = density(j)*fac
2126               i = i + 1
2127               j = j + 1
2128            END DO
2129         END DO
2130      ELSE
2131         i = 1
2132         offset_ad = offset_ad_set(lset, iset) + atomic_offset_ad - 1
2133         DO md = 1, md_max
2134            j = offset_ad + md - 1
2135            DO ma = 1, ma_max
2136               pad(i) = density(j)
2137               i = i + 1
2138               j = j + md_max
2139            END DO
2140         END DO
2141      END IF
2142      IF (iatom >= katom) THEN
2143         i = 1
2144         offset_ac = offset_ac_set(iset, kset) + atomic_offset_ac - 1
2145         j = offset_ac
2146         DO mc = 1, mc_max
2147            DO ma = 1, ma_max
2148               pac(i) = density(j)*fac
2149               i = i + 1
2150               j = j + 1
2151            END DO
2152         END DO
2153      ELSE
2154         i = 1
2155         offset_ac = offset_ac_set(kset, iset) + atomic_offset_ac - 1
2156         DO mc = 1, mc_max
2157            j = offset_ac + mc - 1
2158            DO ma = 1, ma_max
2159               pac(i) = density(j)
2160               i = i + 1
2161               j = j + mc_max
2162            END DO
2163         END DO
2164      END IF
2165   END SUBROUTINE prefetch_density_matrix
2166
2167! **************************************************************************************************
2168!> \brief This routine updates the forces using buffered density matrices
2169!> \param ma_max Size of matrix blocks
2170!> \param mb_max Size of matrix blocks
2171!> \param mc_max Size of matrix blocks
2172!> \param md_max Size of matrix blocks
2173!> \param pbd buffer that will contain P(b,d)
2174!> \param pbc buffer that will contain P(b,c)
2175!> \param pad buffer that will contain P(a,d)
2176!> \param pac buffer that will contain P(a,c)
2177!> \param fac mulitplication factor (spin, symmetry)
2178!> \param prim primitive forces
2179!> \param force storage loacation for forces
2180!> \param forces_map index table
2181!> \param coord which of the 12 coords to be updated
2182!> \param pbd_resp ...
2183!> \param pbc_resp ...
2184!> \param pad_resp ...
2185!> \param pac_resp ...
2186!> \param resp_only ...
2187!> \par History
2188!>      03.2009 created [Manuel Guidon]
2189!> \author Manuel Guidon
2190! **************************************************************************************************
2191   SUBROUTINE update_forces(ma_max, mb_max, mc_max, md_max, &
2192                            pbd, pbc, pad, pac, fac, &
2193                            prim, force, forces_map, coord, &
2194                            pbd_resp, pbc_resp, pad_resp, pac_resp, resp_only)
2195
2196      INTEGER, INTENT(IN)                                :: ma_max, mb_max, mc_max, md_max
2197      REAL(dp), DIMENSION(*), INTENT(IN)                 :: pbd, pbc, pad, pac
2198      REAL(dp), INTENT(IN)                               :: fac
2199      REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2200         INTENT(IN)                                      :: prim
2201      TYPE(qs_force_type), DIMENSION(:), POINTER         :: force
2202      INTEGER, INTENT(IN)                                :: forces_map(4, 2), coord
2203      REAL(dp), DIMENSION(*), INTENT(IN), OPTIONAL       :: pbd_resp, pbc_resp, pad_resp, pac_resp
2204      LOGICAL, INTENT(IN), OPTIONAL                      :: resp_only
2205
2206      INTEGER                                            :: ma, mb, mc, md, p_index
2207      LOGICAL                                            :: full_force, with_resp_density
2208      REAL(dp)                                           :: temp1, temp1_resp, temp3, temp3_resp, &
2209                                                            temp4, teresp
2210
2211      with_resp_density = .FALSE.
2212      IF (PRESENT(pbd_resp) .AND. &
2213          PRESENT(pbc_resp) .AND. &
2214          PRESENT(pad_resp) .AND. &
2215          PRESENT(pac_resp)) with_resp_density = .TRUE.
2216
2217      IF (with_resp_density) THEN
2218         full_force = .TRUE.
2219         IF (PRESENT(resp_only)) full_force = .NOT. resp_only
2220         p_index = 0
2221         temp4 = 0.0_dp
2222         DO md = 1, md_max
2223            DO mc = 1, mc_max
2224               DO mb = 1, mb_max
2225                  temp1 = pbc((mc - 1)*mb_max + mb)*fac
2226                  temp3 = pbd((md - 1)*mb_max + mb)*fac
2227                  temp1_resp = pbc_resp((mc - 1)*mb_max + mb)*fac
2228                  temp3_resp = pbd_resp((md - 1)*mb_max + mb)*fac
2229                  DO ma = 1, ma_max
2230                     p_index = p_index + 1
2231                     ! HF-SCF
2232                     IF (full_force) THEN
2233                        teresp = temp1*pad((md - 1)*ma_max + ma) + &
2234                                 temp3*pac((mc - 1)*ma_max + ma)
2235                     ELSE
2236                        teresp = 0.0_dp
2237                     END IF
2238                     ! RESP+HF
2239                     teresp = teresp + &
2240                              pac((mc - 1)*ma_max + ma)*temp3_resp + &
2241                              pac_resp((mc - 1)*ma_max + ma)*temp3 + &
2242                              pad((md - 1)*ma_max + ma)*temp1_resp + &
2243                              pad_resp((md - 1)*ma_max + ma)*temp1
2244                     temp4 = temp4 + teresp*prim(p_index)
2245                  END DO !ma
2246               END DO !mb
2247            END DO !mc
2248         END DO !md
2249      ELSE
2250         p_index = 0
2251         temp4 = 0.0_dp
2252         DO md = 1, md_max
2253            DO mc = 1, mc_max
2254               DO mb = 1, mb_max
2255                  temp1 = pbc((mc - 1)*mb_max + mb)*fac
2256                  temp3 = pbd((md - 1)*mb_max + mb)*fac
2257                  DO ma = 1, ma_max
2258                     p_index = p_index + 1
2259                     teresp = temp1*pad((md - 1)*ma_max + ma) + &
2260                              temp3*pac((mc - 1)*ma_max + ma)
2261                     temp4 = temp4 + teresp*prim(p_index)
2262                  END DO !ma
2263               END DO !mb
2264            END DO !mc
2265         END DO !md
2266      END IF
2267
2268!$OMP ATOMIC
2269      force(forces_map((coord - 1)/3 + 1, 1))%fock_4c(MOD(coord - 1, 3) + 1, &
2270                                                      forces_map((coord - 1)/3 + 1, 2)) = &
2271         force(forces_map((coord - 1)/3 + 1, 1))%fock_4c(MOD(coord - 1, 3) + 1, &
2272                                                         forces_map((coord - 1)/3 + 1, 2)) - &
2273         temp4
2274
2275   END SUBROUTINE update_forces
2276
2277! **************************************************************************************************
2278!> \brief This routine updates the virial using buffered density matrices
2279!> \param ma_max Size of matrix blocks
2280!> \param mb_max Size of matrix blocks
2281!> \param mc_max Size of matrix blocks
2282!> \param md_max Size of matrix blocks
2283!> \param pbd buffer that will contain P(b,d)
2284!> \param pbc buffer that will contain P(b,c)
2285!> \param pad buffer that will contain P(a,d)
2286!> \param pac buffer that will contain P(a,c)
2287!> \param fac mulitplication factor (spin, symmetry)
2288!> \param prim primitive forces
2289!> \param tmp_virial ...
2290!> \param coord which of the 12 coords to be updated
2291!> \param l ...
2292!> \param pbd_resp ...
2293!> \param pbc_resp ...
2294!> \param pad_resp ...
2295!> \param pac_resp ...
2296!> \par History
2297!>      03.2009 created [Manuel Guidon]
2298!> \author Manuel Guidon
2299! **************************************************************************************************
2300   SUBROUTINE update_virial(ma_max, mb_max, mc_max, md_max, &
2301                            pbd, pbc, pad, pac, fac, &
2302                            prim, tmp_virial, coord, l, &
2303                            pbd_resp, pbc_resp, pad_resp, pac_resp)
2304
2305      INTEGER, INTENT(IN)                                :: ma_max, mb_max, mc_max, md_max
2306      REAL(dp), DIMENSION(*), INTENT(IN)                 :: pbd, pbc, pad, pac
2307      REAL(dp), INTENT(IN)                               :: fac
2308      REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2309         INTENT(IN)                                      :: prim
2310      REAL(dp)                                           :: tmp_virial(3, 3)
2311      INTEGER, INTENT(IN)                                :: coord, l
2312      REAL(dp), DIMENSION(*), INTENT(IN), OPTIONAL       :: pbd_resp, pbc_resp, pad_resp, pac_resp
2313
2314      INTEGER                                            :: i, j, ma, mb, mc, md, p_index
2315      LOGICAL                                            :: with_resp_density
2316      REAL(dp)                                           :: temp1, temp1_resp, temp3, temp3_resp, &
2317                                                            temp4, teresp
2318
2319      with_resp_density = .FALSE.
2320      IF (PRESENT(pbd_resp) .AND. &
2321          PRESENT(pbc_resp) .AND. &
2322          PRESENT(pad_resp) .AND. &
2323          PRESENT(pac_resp)) with_resp_density = .TRUE.
2324
2325      IF (with_resp_density) THEN
2326         p_index = 0
2327         temp4 = 0.0_dp
2328         DO md = 1, md_max
2329            DO mc = 1, mc_max
2330               DO mb = 1, mb_max
2331                  temp1 = pbc((mc - 1)*mb_max + mb)*fac
2332                  temp3 = pbd((md - 1)*mb_max + mb)*fac
2333                  temp1_resp = pbc_resp((mc - 1)*mb_max + mb)*fac
2334                  temp3_resp = pbd_resp((md - 1)*mb_max + mb)*fac
2335                  DO ma = 1, ma_max
2336                     p_index = p_index + 1
2337                     ! HF-SCF
2338                     teresp = temp1*pad((md - 1)*ma_max + ma) + &
2339                              temp3*pac((mc - 1)*ma_max + ma)
2340                     ! RESP+HF
2341                     teresp = teresp + &
2342                              pac((mc - 1)*ma_max + ma)*temp3_resp + &
2343                              pac_resp((mc - 1)*ma_max + ma)*temp3 + &
2344                              pad((md - 1)*ma_max + ma)*temp1_resp + &
2345                              pad_resp((md - 1)*ma_max + ma)*temp1
2346                     temp4 = temp4 + teresp*prim(p_index)
2347                  END DO !ma
2348               END DO !mb
2349            END DO !mc
2350         END DO !md
2351      ELSE
2352         p_index = 0
2353         temp4 = 0.0_dp
2354         DO md = 1, md_max
2355            DO mc = 1, mc_max
2356               DO mb = 1, mb_max
2357                  temp1 = pbc((mc - 1)*mb_max + mb)*fac
2358                  temp3 = pbd((md - 1)*mb_max + mb)*fac
2359                  DO ma = 1, ma_max
2360                     p_index = p_index + 1
2361                     teresp = temp1*pad((md - 1)*ma_max + ma) + &
2362                              temp3*pac((mc - 1)*ma_max + ma)
2363                     temp4 = temp4 + teresp*prim(p_index)
2364                  END DO !ma
2365               END DO !mb
2366            END DO !mc
2367         END DO !md
2368      END IF
2369
2370      j = l
2371      i = MOD(coord - 1, 3) + 1
2372      tmp_virial(i, j) = tmp_virial(i, j) - temp4
2373   END SUBROUTINE update_virial
2374
2375#include "hfx_get_pmax_val.f90"
2376
2377END MODULE hfx_derivatives
2378