1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2020  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief Routines to calculate HFX energy and potential
8!> \par History
9!>      11.2006 created [Manuel Guidon]
10!> \author Manuel Guidon
11! **************************************************************************************************
12MODULE hfx_energy_potential
13   USE atomic_kind_types,               ONLY: atomic_kind_type,&
14                                              get_atomic_kind_set
15   USE bibliography,                    ONLY: cite_reference,&
16                                              guidon2008,&
17                                              guidon2009
18   USE cell_types,                      ONLY: cell_type,&
19                                              pbc
20   USE cp_control_types,                ONLY: dft_control_type
21   USE cp_files,                        ONLY: close_file,&
22                                              open_file
23   USE cp_log_handling,                 ONLY: cp_get_default_logger,&
24                                              cp_logger_type
25   USE cp_output_handling,              ONLY: cp_p_file,&
26                                              cp_print_key_finished_output,&
27                                              cp_print_key_should_output,&
28                                              cp_print_key_unit_nr
29   USE cp_para_types,                   ONLY: cp_para_env_type
30   USE dbcsr_api,                       ONLY: dbcsr_copy,&
31                                              dbcsr_dot,&
32                                              dbcsr_get_matrix_type,&
33                                              dbcsr_p_type,&
34                                              dbcsr_type_antisymmetric
35   USE gamma,                           ONLY: init_md_ftable
36   USE hfx_communication,               ONLY: distribute_ks_matrix,&
37                                              get_atomic_block_maps,&
38                                              get_full_density
39   USE hfx_compression_methods,         ONLY: hfx_add_mult_cache_elements,&
40                                              hfx_add_single_cache_element,&
41                                              hfx_decompress_first_cache,&
42                                              hfx_flush_last_cache,&
43                                              hfx_get_mult_cache_elements,&
44                                              hfx_get_single_cache_element,&
45                                              hfx_reset_cache_and_container
46   USE hfx_contract_block,              ONLY: contract_block
47   USE hfx_libint_interface,            ONLY: evaluate_eri
48   USE hfx_load_balance_methods,        ONLY: collect_load_balance_info,&
49                                              hfx_load_balance,&
50                                              hfx_update_load_balance
51   USE hfx_pair_list_methods,           ONLY: build_atomic_pair_list,&
52                                              build_pair_list,&
53                                              build_pair_list_pgf,&
54                                              build_pgf_product_list,&
55                                              pgf_product_list_size
56   USE hfx_screening_methods,           ONLY: calc_pair_dist_radii,&
57                                              calc_screening_functions,&
58                                              update_pmax_mat
59   USE hfx_types,                       ONLY: &
60        alloc_containers, dealloc_containers, hfx_basis_info_type, hfx_basis_type, hfx_cache_type, &
61        hfx_cell_type, hfx_container_type, hfx_create_neighbor_cells, hfx_distribution, &
62        hfx_general_type, hfx_init_container, hfx_load_balance_type, hfx_memory_type, hfx_p_kind, &
63        hfx_pgf_list, hfx_pgf_product_list, hfx_potential_type, hfx_reset_memory_usage_counter, &
64        hfx_screen_coeff_type, hfx_screening_type, hfx_task_list_type, hfx_type, init_t_c_g0_lmax, &
65        log_zero, pair_list_type, pair_set_list_type
66   USE input_constants,                 ONLY: do_potential_mix_cl_trunc,&
67                                              do_potential_truncated,&
68                                              hfx_do_eval_energy
69   USE input_section_types,             ONLY: section_vals_type
70   USE kinds,                           ONLY: default_string_length,&
71                                              dp,&
72                                              int_8
73   USE kpoint_types,                    ONLY: get_kpoint_info,&
74                                              kpoint_type
75   USE libint_wrapper,                  ONLY: cp_libint_t
76   USE machine,                         ONLY: m_flush,&
77                                              m_memory,&
78                                              m_walltime
79   USE mathconstants,                   ONLY: fac
80   USE message_passing,                 ONLY: mp_max,&
81                                              mp_sum,&
82                                              mp_sync
83   USE orbital_pointers,                ONLY: nco,&
84                                              ncoset,&
85                                              nso
86   USE particle_types,                  ONLY: particle_type
87   USE qs_environment_types,            ONLY: get_qs_env,&
88                                              qs_environment_type
89   USE qs_ks_types,                     ONLY: get_ks_env,&
90                                              qs_ks_env_type
91   USE t_c_g0,                          ONLY: init
92   USE util,                            ONLY: sort
93
94!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
95
96#include "./base/base_uses.f90"
97
98   IMPLICIT NONE
99   PRIVATE
100
101   PUBLIC ::  integrate_four_center, coulomb4
102
103   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_energy_potential'
104
105!***
106
107CONTAINS
108
109! **************************************************************************************************
110!> \brief computes four center integrals for a full basis set and updates the
111!>      Kohn-Sham-Matrix and energy. Uses all 8 eri symmetries
112!> \param qs_env ...
113!> \param x_data ...
114!> \param ks_matrix ...
115!> \param ehfx energy calculated with the updated HFX matrix
116!> \param rho_ao density matrix in ao basis
117!> \param hfx_section input_section HFX
118!> \param para_env ...
119!> \param geometry_did_change flag that indicates we have to recalc integrals
120!> \param irep Index for the HFX replica
121!> \param distribute_fock_matrix Flag that indicates whether to communicate the
122!>        new fock matrix or not
123!> \param ispin ...
124!> \par History
125!>      06.2007 created [Manuel Guidon]
126!>      08.2007 optimized load balance [Manuel Guidon]
127!>      09.2007 new parallelization [Manuel Guidon]
128!>      02.2009 completely rewritten screening part [Manuel Guidon]
129!>      12.2017 major bug fix. removed wrong cycle that was caussing segfault.
130!>              see https://groups.google.com/forum/#!topic/cp2k/pc6B14XOALY
131!>              [Tobias Binninger + Valery Weber]
132!> \author Manuel Guidon
133! **************************************************************************************************
134   SUBROUTINE integrate_four_center(qs_env, x_data, ks_matrix, ehfx, rho_ao, hfx_section, para_env, &
135                                    geometry_did_change, irep, distribute_fock_matrix, &
136                                    ispin)
137
138      TYPE(qs_environment_type), POINTER                 :: qs_env
139      TYPE(hfx_type), DIMENSION(:, :), POINTER           :: x_data
140      TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: ks_matrix
141      REAL(KIND=dp), INTENT(OUT)                         :: ehfx
142      TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: rho_ao
143      TYPE(section_vals_type), POINTER                   :: hfx_section
144      TYPE(cp_para_env_type), POINTER                    :: para_env
145      LOGICAL                                            :: geometry_did_change
146      INTEGER                                            :: irep
147      LOGICAL, INTENT(IN)                                :: distribute_fock_matrix
148      INTEGER, INTENT(IN)                                :: ispin
149
150      CHARACTER(LEN=*), PARAMETER :: routineN = 'integrate_four_center', &
151         routineP = moduleN//':'//routineN
152
153      CHARACTER(len=default_string_length)               :: eps_scaling_str, eps_schwarz_min_str
154      INTEGER :: act_atomic_block_offset, act_set_offset, atomic_offset_ac, atomic_offset_ad, &
155         atomic_offset_bc, atomic_offset_bd, bin, bits_max_val, buffer_left, buffer_size, &
156         buffer_start, cache_size, current_counter, handle, handle_bin, handle_dist_ks, &
157         handle_getP, handle_load, handle_main, i, i_list_ij, i_list_kl, i_set_list_ij, &
158         i_set_list_ij_start, i_set_list_ij_stop, i_set_list_kl, i_set_list_kl_start, &
159         i_set_list_kl_stop, i_thread, iatom, iatom_block, iatom_end, iatom_start, ikind, img, &
160         iset, iw, j, jatom, jatom_block, jatom_end, jatom_start, jkind, jset, katom, katom_block, &
161         katom_end
162      INTEGER :: katom_start, kind_kind_idx, kkind, kset, l_max, latom, latom_block, latom_end, &
163         latom_start, lkind, lset, ma, max_am, max_pgf, max_set, mb, my_bin_id, my_bin_size, &
164         my_thread_id, n_threads, natom, nbits, ncob, ncos_max, nints, nkimages, nkind, &
165         nneighbors, nseta, nsetb, nsgf_max, nspins, pa, sgfb, shm_task_counter, shm_total_bins, &
166         sphi_a_u1, sphi_a_u2, sphi_a_u3, sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, &
167         sphi_c_u3, sphi_d_u1, sphi_d_u2, sphi_d_u3, swap_id, tmp_i4, unit_id
168      INTEGER(int_8) :: atom_block, counter, estimate_to_store_int, max_val_memory, &
169         mb_size_buffers, mb_size_f, mb_size_p, mem_compression_counter, &
170         mem_compression_counter_disk, mem_eris, mem_eris_disk, mem_max_val, memsize_after, &
171         memsize_before, my_current_counter, my_istart, n_processes, nblocks, ncpu, neris_disk, &
172         neris_incore, neris_onthefly, neris_tmp, neris_total, nprim_ints, &
173         shm_mem_compression_counter, shm_neris_disk, shm_neris_incore, shm_neris_onthefly, &
174         shm_neris_total, shm_nprim_ints, shm_stor_count_int_disk, shm_stor_count_max_val, &
175         shm_storage_counter_integrals, stor_count_int_disk
176      INTEGER(int_8) :: stor_count_max_val, storage_counter_integrals, subtr_size_mb, tmp_block, &
177         tmp_i8(8)
178      INTEGER(int_8), ALLOCATABLE, DIMENSION(:)          :: tmp_task_list_cost
179      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: kind_of, last_sgf_global, nimages, &
180                                                            tmp_index
181      INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, lc_max, lc_min, ld_max, &
182         ld_min, npgfa, npgfb, npgfc, npgfd, nsgfa, nsgfb, nsgfc, nsgfd, shm_block_offset
183      INTEGER, DIMENSION(:, :), POINTER :: first_sgfb, nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d, &
184         offset_ac_set, offset_ad_set, offset_bc_set, offset_bd_set, shm_atomic_block_offset
185      INTEGER, DIMENSION(:, :), POINTER, SAVE            :: shm_is_assoc_atomic_block
186      INTEGER, DIMENSION(:, :, :), POINTER               :: cell_to_index
187      INTEGER, DIMENSION(:, :, :, :), POINTER            :: shm_set_offset
188      INTEGER, SAVE                                      :: shm_number_of_p_entries
189      LOGICAL :: bins_left, buffer_overflow, do_disk_storage, do_dynamic_load_balancing, do_it, &
190         do_kpoints, do_p_screening, do_periodic, do_print_load_balance_info, is_anti_symmetric, &
191         ks_fully_occ, my_geo_change, treat_lsd_in_core, use_disk_storage
192      LOGICAL, DIMENSION(:, :), POINTER                  :: shm_atomic_pair_list
193      REAL(dp) :: afac, bintime_start, bintime_stop, cartesian_estimate, compression_factor, &
194         compression_factor_disk, ene_x_aa, ene_x_aa_diag, ene_x_bb, ene_x_bb_diag, eps_schwarz, &
195         eps_storage, etmp, fac, hf_fraction, ln_10, log10_eps_schwarz, log10_pmax, &
196         max_contraction_val, max_val1, max_val2, max_val2_set, pmax_atom, pmax_blocks, &
197         pmax_entry, ra(3), rab2, rb(3), rc(3), rcd2, rd(3), screen_kind_ij, screen_kind_kl, &
198         spherical_estimate, symm_fac
199      REAL(dp), ALLOCATABLE, DIMENSION(:) :: ee_buffer1, ee_buffer2, ee_primitives_tmp, ee_work, &
200         ee_work2, kac_buf, kad_buf, kbc_buf, kbd_buf, pac_buf, pad_buf, pbc_buf, pbd_buf, &
201         primitive_integrals
202      REAL(dp), DIMENSION(:), POINTER                    :: p_work
203      REAL(dp), DIMENSION(:, :), POINTER :: full_density_alpha, full_density_beta, full_ks_alpha, &
204         full_ks_beta, max_contraction, ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, shm_pmax_atom, &
205         shm_pmax_block, sphi_b, zeta, zetb, zetc, zetd
206      REAL(dp), DIMENSION(:, :, :), POINTER              :: sphi_a_ext_set, sphi_b_ext_set, &
207                                                            sphi_c_ext_set, sphi_d_ext_set
208      REAL(dp), DIMENSION(:, :, :, :), POINTER           :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
209                                                            sphi_d_ext
210      REAL(KIND=dp)                                      :: coeffs_kind_max0
211      TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
212      TYPE(cell_type), POINTER                           :: cell
213      TYPE(cp_libint_t)                                  :: private_lib
214      TYPE(cp_logger_type), POINTER                      :: logger
215      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_ks_aux_fit_hfx
216      TYPE(dft_control_type), POINTER                    :: dft_control
217      TYPE(hfx_basis_info_type), POINTER                 :: basis_info
218      TYPE(hfx_basis_type), DIMENSION(:), POINTER        :: basis_parameter
219      TYPE(hfx_cache_type), DIMENSION(:), POINTER        :: integral_caches, integral_caches_disk
220      TYPE(hfx_cache_type), POINTER                      :: maxval_cache, maxval_cache_disk
221      TYPE(hfx_container_type), DIMENSION(:), POINTER    :: integral_containers, &
222                                                            integral_containers_disk
223      TYPE(hfx_container_type), POINTER                  :: maxval_container, maxval_container_disk
224      TYPE(hfx_distribution), POINTER                    :: distribution_energy
225      TYPE(hfx_general_type)                             :: general_parameter
226      TYPE(hfx_load_balance_type), POINTER               :: load_balance_parameter
227      TYPE(hfx_memory_type), POINTER                     :: memory_parameter
228      TYPE(hfx_p_kind), DIMENSION(:), POINTER            :: shm_initial_p
229      TYPE(hfx_pgf_list), ALLOCATABLE, DIMENSION(:)      :: pgf_list_ij, pgf_list_kl
230      TYPE(hfx_pgf_product_list), ALLOCATABLE, &
231         DIMENSION(:)                                    :: pgf_product_list
232      TYPE(hfx_potential_type)                           :: potential_parameter
233      TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
234         POINTER                                         :: screen_coeffs_kind, tmp_R_1, tmp_R_2, &
235                                                            tmp_screen_pgf1, tmp_screen_pgf2
236      TYPE(hfx_screen_coeff_type), &
237         DIMENSION(:, :, :, :), POINTER                  :: screen_coeffs_set
238      TYPE(hfx_screen_coeff_type), &
239         DIMENSION(:, :, :, :, :, :), POINTER            :: radii_pgf, screen_coeffs_pgf
240      TYPE(hfx_screening_type)                           :: screening_parameter
241      TYPE(hfx_task_list_type), DIMENSION(:), POINTER    :: shm_task_list, tmp_task_list
242      TYPE(hfx_type), POINTER                            :: actual_x_data, shm_master_x_data
243      TYPE(kpoint_type), POINTER                         :: kpoints
244      TYPE(pair_list_type)                               :: list_ij, list_kl
245      TYPE(pair_set_list_type), ALLOCATABLE, &
246         DIMENSION(:)                                    :: set_list_ij, set_list_kl
247      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
248      TYPE(qs_ks_env_type), POINTER                      :: ks_env
249
250      NULLIFY (dft_control)
251
252      CALL timeset(routineN, handle)
253
254      CALL cite_reference(Guidon2008)
255      CALL cite_reference(Guidon2009)
256
257      ehfx = 0.0_dp
258
259      ! ** This is not very clean, but effective. ispin can only be 2 if we do the beta spin part in core
260      my_geo_change = geometry_did_change
261      IF (ispin == 2) my_geo_change = .FALSE.
262
263      logger => cp_get_default_logger()
264
265      is_anti_symmetric = dbcsr_get_matrix_type(rho_ao(1, 1)%matrix) .EQ. dbcsr_type_antisymmetric
266
267      IF (my_geo_change) THEN
268         CALL m_memory(memsize_before)
269         CALL mp_max(memsize_before, para_env%group)
270         iw = cp_print_key_unit_nr(logger, hfx_section, "HF_INFO", &
271                                   extension=".scfLog")
272         IF (iw > 0) THEN
273            WRITE (UNIT=iw, FMT="(/,(T3,A,T60,I21))") &
274               "HFX_MEM_INFO| Est. max. program size before HFX [MiB]:", memsize_before/(1024*1024)
275            CALL m_flush(iw)
276         END IF
277         CALL cp_print_key_finished_output(iw, logger, hfx_section, &
278                                           "HF_INFO")
279      END IF
280
281      CALL get_qs_env(qs_env=qs_env, atomic_kind_set=atomic_kind_set, cell=cell, &
282                      matrix_ks_aux_fit_hfx=matrix_ks_aux_fit_hfx)
283
284      NULLIFY (cell_to_index)
285      CALL get_qs_env(qs_env=qs_env, do_kpoints=do_kpoints)
286      IF (do_kpoints) THEN
287         CALL get_qs_env(qs_env=qs_env, ks_env=ks_env)
288         CALL get_ks_env(ks_env=ks_env, kpoints=kpoints)
289         CALL get_kpoint_info(kpoint=kpoints, cell_to_index=cell_to_index)
290      END IF
291
292      !! Calculate l_max used in fgamma , because init_md_ftable is definitely not thread safe
293      nkind = SIZE(atomic_kind_set, 1)
294      l_max = 0
295      DO ikind = 1, nkind
296         l_max = MAX(l_max, MAXVAL(x_data(1, 1)%basis_parameter(ikind)%lmax))
297      ENDDO
298      l_max = 4*l_max
299      CALL init_md_ftable(l_max)
300
301      IF (x_data(1, 1)%potential_parameter%potential_type == do_potential_truncated .OR. &
302          x_data(1, 1)%potential_parameter%potential_type == do_potential_mix_cl_trunc) THEN
303         IF (l_max > init_t_c_g0_lmax) THEN
304            IF (para_env%ionode) THEN
305               CALL open_file(unit_number=unit_id, file_name=x_data(1, 1)%potential_parameter%filename)
306            END IF
307            CALL init(l_max, unit_id, para_env%mepos, para_env%group)
308            IF (para_env%ionode) THEN
309               CALL close_file(unit_id)
310            END IF
311            init_t_c_g0_lmax = l_max
312         END IF
313      END IF
314
315      n_threads = 1
316!$    n_threads = omp_get_max_threads()
317
318      shm_neris_total = 0
319      shm_nprim_ints = 0
320      shm_neris_onthefly = 0
321      shm_storage_counter_integrals = 0
322      shm_stor_count_int_disk = 0
323      shm_neris_incore = 0
324      shm_neris_disk = 0
325      shm_stor_count_max_val = 0
326
327!$OMP PARALLEL DEFAULT(NONE) SHARED(qs_env,&
328!$OMP                                  x_data,&
329!$OMP                                  ks_matrix,&
330!$OMP                                  ehfx,&
331!$OMP                                  rho_ao,&
332!$OMP                                  matrix_ks_aux_fit_hfx,&
333!$OMP                                  hfx_section,&
334!$OMP                                  para_env,&
335!$OMP                                  my_geo_change,&
336!$OMP                                  irep,&
337!$OMP                                  distribute_fock_matrix,&
338!$OMP                                  cell_to_index,&
339!$OMP                                  ncoset,&
340!$OMP                                  nso,&
341!$OMP                                  nco,&
342!$OMP                                  full_ks_alpha,&
343!$OMP                                  full_ks_beta,&
344!$OMP                                  n_threads,&
345!$OMP                                  full_density_alpha,&
346!$OMP                                  full_density_beta,&
347!$OMP                                  shm_initial_p,&
348!$OMP                                  shm_is_assoc_atomic_block,&
349!$OMP                                  shm_number_of_p_entries,&
350!$OMP                                  shm_neris_total,&
351!$OMP                                  shm_neris_onthefly,&
352!$OMP                                  shm_storage_counter_integrals,&
353!$OMP                                  shm_stor_count_int_disk,&
354!$OMP                                  shm_neris_incore,&
355!$OMP                                  shm_neris_disk,&
356!$OMP                                  shm_nprim_ints,&
357!$OMP                                  shm_stor_count_max_val,&
358!$OMP                                  cell,&
359!$OMP                                  screen_coeffs_set,&
360!$OMP                                  screen_coeffs_kind,&
361!$OMP                                  screen_coeffs_pgf,&
362!$OMP                                  pgf_product_list_size,&
363!$OMP                                  radii_pgf,&
364!$OMP                                  nkind,&
365!$OMP                                  ispin,&
366!$OMP                                  is_anti_symmetric,&
367!$OMP                                  shm_atomic_block_offset,&
368!$OMP                                  shm_set_offset,&
369!$OMP                                  shm_block_offset,&
370!$OMP                                  shm_task_counter,&
371!$OMP                                  shm_task_list,&
372!$OMP                                  shm_total_bins,&
373!$OMP                                  shm_master_x_data,&
374!$OMP                                  shm_pmax_atom,&
375!$OMP                                  shm_pmax_block,&
376!$OMP                                  shm_atomic_pair_list,&
377!$OMP                                  shm_mem_compression_counter,&
378!$OMP                                  do_print_load_balance_info) &
379!$OMP PRIVATE(ln_10,i_thread,actual_x_data,do_periodic,screening_parameter,potential_parameter,&
380!$OMP         general_parameter,load_balance_parameter,memory_parameter,cache_size,bits_max_val,&
381!$OMP         basis_parameter,basis_info,treat_lsd_in_core,ncpu,n_processes,neris_total,neris_incore,&
382!$OMP         neris_disk,neris_onthefly,mem_eris,mem_eris_disk,mem_max_val,compression_factor,&
383!$OMP         compression_factor_disk,nprim_ints,neris_tmp,max_val_memory,max_am,do_p_screening,&
384!$OMP         max_set,particle_set,atomic_kind_set,natom,kind_of,ncos_max,nsgf_max,ikind,&
385!$OMP         nseta,npgfa,la_max,nsgfa,primitive_integrals,pbd_buf,pbc_buf,pad_buf,pac_buf,kbd_buf,kbc_buf,&
386!$OMP         kad_buf,kac_buf,ee_work,ee_work2,ee_buffer1,ee_buffer2,ee_primitives_tmp,nspins,max_contraction,&
387!$OMP         max_pgf,jkind,lb_max,nsetb,npgfb,first_sgfb,sphi_b,nsgfb,ncob,sgfb,nneighbors,pgf_list_ij,pgf_list_kl,&
388!$OMP         pgf_product_list,nimages,ks_fully_occ,subtr_size_mb,use_disk_storage,counter,do_disk_storage,&
389!$OMP         maxval_container_disk,maxval_cache_disk,integral_containers_disk,integral_caches_disk,eps_schwarz,&
390!$OMP         log10_eps_schwarz,eps_storage,hf_fraction,buffer_overflow,logger,private_lib,last_sgf_global,handle_getp,&
391!$OMP         p_work,fac,handle_load,do_dynamic_load_balancing,my_bin_size,maxval_container,integral_containers,maxval_cache,&
392!$OMP         integral_caches,tmp_task_list,tmp_task_list_cost,tmp_index,handle_main,coeffs_kind_max0,set_list_ij,&
393!$OMP         set_list_kl,iatom_start,iatom_end,jatom_start,jatom_end,nblocks,bins_left,do_it,distribution_energy,&
394!$OMP         my_thread_id,my_bin_id,handle_bin,bintime_start,my_istart,my_current_counter,latom_block,tmp_block,&
395!$OMP         katom_block,katom_start,katom_end,latom_start,latom_end,pmax_blocks,list_ij,list_kl,i_set_list_ij_start,&
396!$OMP         i_set_list_ij_stop,ra,rb,rab2,la_min,zeta,sphi_a_ext,nsgfl_a,sphi_a_u1,sphi_a_u2,sphi_a_u3,&
397!$OMP         lb_min,zetb,sphi_b_ext,nsgfl_b,sphi_b_u1,sphi_b_u2,sphi_b_u3,katom,latom,i_set_list_kl_start,i_set_list_kl_stop,&
398!$OMP         kkind,lkind,rc,rd,rcd2,pmax_atom,screen_kind_ij,screen_kind_kl,symm_fac,lc_max,lc_min,npgfc,zetc,nsgfc,sphi_c_ext,&
399!$OMP         nsgfl_c,sphi_c_u1,sphi_c_u2,sphi_c_u3,ld_max,ld_min,npgfd,zetd,nsgfd,sphi_d_ext,nsgfl_d,sphi_d_u1,sphi_d_u2,&
400!$OMP         sphi_d_u3,atomic_offset_bd,atomic_offset_bc,atomic_offset_ad,atomic_offset_ac,offset_bd_set,offset_bc_set,&
401!$OMP         offset_ad_set,offset_ac_set,swap_id,kind_kind_idx,ptr_p_1,ptr_p_2,ptr_p_3,ptr_p_4,mem_compression_counter,&
402!$OMP         mem_compression_counter_disk,max_val1,sphi_a_ext_set,sphi_b_ext_set,kset,lset,max_val2_set,max_val2,&
403!$OMP         sphi_c_ext_set,sphi_d_ext_set,pmax_entry,log10_pmax,current_counter,nints,estimate_to_store_int,&
404!$OMP         spherical_estimate,nbits,buffer_left,buffer_start,buffer_size,max_contraction_val,tmp_r_1,tmp_r_2,&
405!$OMP         tmp_screen_pgf1,tmp_screen_pgf2,cartesian_estimate,bintime_stop,iw,memsize_after,storage_counter_integrals,&
406!$OMP         stor_count_int_disk,stor_count_max_val,ene_x_aa,ene_x_bb,mb_size_p,mb_size_f,mb_size_buffers,afac,ene_x_aa_diag,&
407!$OMP         ene_x_bb_diag,act_atomic_block_offset,act_set_offset,j,handle_dist_ks,tmp_i8,tmp_i4,dft_control,&
408!$OMP         etmp,nkimages,img,bin,eps_scaling_str,eps_schwarz_min_str)
409
410      ln_10 = LOG(10.0_dp)
411      i_thread = 0
412!$    i_thread = omp_get_thread_num()
413
414      actual_x_data => x_data(irep, i_thread + 1)
415!$OMP MASTER
416      shm_master_x_data => x_data(irep, 1)
417!$OMP END MASTER
418!$OMP BARRIER
419
420      do_periodic = actual_x_data%periodic_parameter%do_periodic
421
422      IF (do_periodic) THEN
423         ! ** Rebuild neighbor lists in case the cell has changed (i.e. NPT MD)
424         actual_x_data%periodic_parameter%number_of_shells = actual_x_data%periodic_parameter%mode
425         CALL hfx_create_neighbor_cells(actual_x_data, actual_x_data%periodic_parameter%number_of_shells_from_input, &
426                                        cell, i_thread)
427      END IF
428
429      screening_parameter = actual_x_data%screening_parameter
430      potential_parameter = actual_x_data%potential_parameter
431
432      general_parameter = actual_x_data%general_parameter
433      load_balance_parameter => actual_x_data%load_balance_parameter
434      memory_parameter => actual_x_data%memory_parameter
435
436      cache_size = memory_parameter%cache_size
437      bits_max_val = memory_parameter%bits_max_val
438
439      basis_parameter => actual_x_data%basis_parameter
440      basis_info => actual_x_data%basis_info
441
442      treat_lsd_in_core = general_parameter%treat_lsd_in_core
443
444      ncpu = para_env%num_pe
445      n_processes = ncpu*n_threads
446
447      !! initialize some counters
448      neris_total = 0_int_8
449      neris_incore = 0_int_8
450      neris_disk = 0_int_8
451      neris_onthefly = 0_int_8
452      mem_eris = 0_int_8
453      mem_eris_disk = 0_int_8
454      mem_max_val = 0_int_8
455      compression_factor = 0.0_dp
456      compression_factor_disk = 0.0_dp
457      nprim_ints = 0_int_8
458      neris_tmp = 0_int_8
459      max_val_memory = 1_int_8
460
461      max_am = basis_info%max_am
462
463      CALL get_qs_env(qs_env=qs_env, &
464                      atomic_kind_set=atomic_kind_set, &
465                      particle_set=particle_set, &
466                      dft_control=dft_control)
467
468      do_p_screening = screening_parameter%do_initial_p_screening
469      ! Special treatment for MP2 with initial density screening
470      IF (do_p_screening) THEN
471         nspins = dft_control%nspins
472         IF (ASSOCIATED(qs_env%mp2_env)) THEN
473            IF ((qs_env%mp2_env%ri_mp2%free_hfx_buffer)) THEN
474               do_p_screening = ((qs_env%mp2_env%p_screen) .AND. (qs_env%mp2_env%not_last_hfx))
475            ELSE
476               do_p_screening = .FALSE.
477            ENDIF
478         ENDIF
479      ENDIF
480      max_set = basis_info%max_set
481      natom = SIZE(particle_set, 1)
482
483      ! Number of image matrices in k-point calculations (nkimages==1 -> no kpoints)
484      nkimages = dft_control%nimages
485      CPASSERT(nkimages == 1)
486
487      ALLOCATE (kind_of(natom))
488
489      CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, &
490                               kind_of=kind_of)
491
492      !! precompute maximum nco and allocate scratch
493      ncos_max = 0
494      nsgf_max = 0
495      DO iatom = 1, natom
496         ikind = kind_of(iatom)
497         nseta = basis_parameter(ikind)%nset
498         npgfa => basis_parameter(ikind)%npgf
499         la_max => basis_parameter(ikind)%lmax
500         nsgfa => basis_parameter(ikind)%nsgf
501         DO iset = 1, nseta
502            ncos_max = MAX(ncos_max, ncoset(la_max(iset)))
503            nsgf_max = MAX(nsgf_max, nsgfa(iset))
504         ENDDO
505      ENDDO
506      !! Allocate the arrays for the integrals.
507      ALLOCATE (primitive_integrals(nsgf_max**4))
508      primitive_integrals = 0.0_dp
509
510      ALLOCATE (pbd_buf(nsgf_max**2))
511      ALLOCATE (pbc_buf(nsgf_max**2))
512      ALLOCATE (pad_buf(nsgf_max**2))
513      ALLOCATE (pac_buf(nsgf_max**2))
514      ALLOCATE (kbd_buf(nsgf_max**2))
515      ALLOCATE (kbc_buf(nsgf_max**2))
516      ALLOCATE (kad_buf(nsgf_max**2))
517      ALLOCATE (kac_buf(nsgf_max**2))
518      ALLOCATE (ee_work(ncos_max**4))
519      ALLOCATE (ee_work2(ncos_max**4))
520      ALLOCATE (ee_buffer1(ncos_max**4))
521      ALLOCATE (ee_buffer2(ncos_max**4))
522      ALLOCATE (ee_primitives_tmp(nsgf_max**4))
523
524      nspins = dft_control%nspins
525
526      ALLOCATE (max_contraction(max_set, natom))
527
528      max_contraction = 0.0_dp
529      max_pgf = 0
530      DO jatom = 1, natom
531         jkind = kind_of(jatom)
532         lb_max => basis_parameter(jkind)%lmax
533         nsetb = basis_parameter(jkind)%nset
534         npgfb => basis_parameter(jkind)%npgf
535         first_sgfb => basis_parameter(jkind)%first_sgf
536         sphi_b => basis_parameter(jkind)%sphi
537         nsgfb => basis_parameter(jkind)%nsgf
538         DO jset = 1, nsetb
539            ! takes the primitive to contracted transformation into account
540            ncob = npgfb(jset)*ncoset(lb_max(jset))
541            sgfb = first_sgfb(1, jset)
542            ! if the primitives are assumed to be all of max_val2, max_val2*p2s_b becomes
543            ! the maximum value after multiplication with sphi_b
544            max_contraction(jset, jatom) = MAXVAL((/(SUM(ABS(sphi_b(1:ncob, i))), i=sgfb, sgfb + nsgfb(jset) - 1)/))
545            max_pgf = MAX(max_pgf, npgfb(jset))
546         ENDDO
547      ENDDO
548
549      ! ** Allocate buffers for pgf_lists
550      nneighbors = SIZE(actual_x_data%neighbor_cells)
551      ALLOCATE (pgf_list_ij(max_pgf**2))
552      ALLOCATE (pgf_list_kl(max_pgf**2))
553      ! the size of pgf_product_list is allocated and resized as needed. The initial guess grows as needed
554!$OMP     ATOMIC READ
555      tmp_i4 = pgf_product_list_size
556      ALLOCATE (pgf_product_list(tmp_i4))
557      ALLOCATE (nimages(max_pgf**2))
558
559      DO i = 1, max_pgf**2
560         ALLOCATE (pgf_list_ij(i)%image_list(nneighbors))
561         ALLOCATE (pgf_list_kl(i)%image_list(nneighbors))
562      END DO
563!$OMP BARRIER
564!$OMP MASTER
565      !! Calculate helper array that stores if a certain atomic pair is associated in the KS matrix
566      IF (my_geo_change) THEN
567         CALL get_atomic_block_maps(ks_matrix(1, 1)%matrix, basis_parameter, kind_of, &
568                                    shm_master_x_data%is_assoc_atomic_block, &
569                                    shm_master_x_data%number_of_p_entries, &
570                                    para_env, &
571                                    shm_master_x_data%atomic_block_offset, &
572                                    shm_master_x_data%set_offset, &
573                                    shm_master_x_data%block_offset, &
574                                    shm_master_x_data%map_atoms_to_cpus, &
575                                    nkind)
576
577         shm_is_assoc_atomic_block => shm_master_x_data%is_assoc_atomic_block
578
579         !! Get occupation of KS-matrix
580         ks_fully_occ = .TRUE.
581         outer: DO iatom = 1, natom
582            DO jatom = iatom, natom
583               IF (shm_is_assoc_atomic_block(jatom, iatom) == 0) THEN
584                  ks_fully_occ = .FALSE.
585                  EXIT outer
586               END IF
587            END DO
588         END DO outer
589
590         IF (.NOT. ks_fully_occ) THEN
591            CALL cp_warn(__LOCATION__, &
592                         "The Kohn Sham matrix is not 100% occupied. This "// &
593                         "may result in incorrect Hartree-Fock results. Try to decrease EPS_PGF_ORB "// &
594                         "and EPS_FILTER_MATRIX in the QS section. For more information "// &
595                         "see FAQ: https://www.cp2k.org/faq:hfx_eps_warning")
596         END IF
597      END IF
598
599      ! ** Set pointers
600      shm_number_of_p_entries = shm_master_x_data%number_of_p_entries
601      shm_is_assoc_atomic_block => shm_master_x_data%is_assoc_atomic_block
602      shm_atomic_block_offset => shm_master_x_data%atomic_block_offset
603      shm_set_offset => shm_master_x_data%set_offset
604      shm_block_offset => shm_master_x_data%block_offset
605!$OMP END MASTER
606!$OMP BARRIER
607
608      ! ** Reset storage counter given by MAX_MEMORY by subtracting all buffers
609      ! ** Fock and density Matrices (shared)
610      subtr_size_mb = 2_int_8*shm_block_offset(ncpu + 1)
611      ! ** if non restricted ==> alpha/beta spin
612      IF (.NOT. treat_lsd_in_core) THEN
613         IF (nspins == 2) subtr_size_mb = subtr_size_mb*2_int_8
614      END IF
615      ! ** Initial P only MAX(alpha,beta) (shared)
616      IF (do_p_screening .OR. screening_parameter%do_p_screening_forces) THEN
617         subtr_size_mb = subtr_size_mb + memory_parameter%size_p_screen
618      END IF
619      ! ** In core forces require their own initial P
620      IF (screening_parameter%do_p_screening_forces) THEN
621         IF (memory_parameter%treat_forces_in_core) THEN
622            subtr_size_mb = subtr_size_mb + memory_parameter%size_p_screen
623         END IF
624      END IF
625      ! ** primitive buffer (not shared by the threads)
626      subtr_size_mb = subtr_size_mb + nsgf_max**4*n_threads
627      ! ** density + fock buffers
628      subtr_size_mb = subtr_size_mb + 8_int_8*nsgf_max**2*n_threads
629      ! ** screening functions (shared)
630      ! ** coeffs_pgf
631      subtr_size_mb = subtr_size_mb + max_pgf**2*max_set**2*nkind**2
632      ! ** coeffs_set
633      subtr_size_mb = subtr_size_mb + max_set**2*nkind**2
634      ! ** coeffs_kind
635      subtr_size_mb = subtr_size_mb + nkind**2
636      ! ** radii_pgf
637      subtr_size_mb = subtr_size_mb + max_pgf**2*max_set**2*nkind**2
638
639      ! ** is_assoc (shared)
640      subtr_size_mb = subtr_size_mb + natom**2
641
642      ! ** pmax_atom (shared)
643      IF (do_p_screening) THEN
644         subtr_size_mb = subtr_size_mb + natom**2
645      END IF
646      IF (screening_parameter%do_p_screening_forces) THEN
647         IF (memory_parameter%treat_forces_in_core) THEN
648            subtr_size_mb = subtr_size_mb + natom**2
649         END IF
650      END IF
651
652      ! ** Convert into MiB's
653      subtr_size_mb = subtr_size_mb*8_int_8/1024_int_8/1024_int_8
654
655      ! ** Subtracting all these buffers from MAX_MEMORY yields the amount
656      ! ** of RAM that is left for the compressed integrals. When using threads
657      ! ** all the available memory is shared among all n_threads. i.e. the faster
658      ! ** ones can steal from the slower ones
659
660      CALL hfx_reset_memory_usage_counter(memory_parameter, subtr_size_mb)
661
662      use_disk_storage = .FALSE.
663      counter = 0_int_8
664      do_disk_storage = memory_parameter%do_disk_storage
665      IF (do_disk_storage) THEN
666         maxval_container_disk => actual_x_data%maxval_container_disk
667         maxval_cache_disk => actual_x_data%maxval_cache_disk
668
669         integral_containers_disk => actual_x_data%integral_containers_disk
670         integral_caches_disk => actual_x_data%integral_caches_disk
671      END IF
672
673      IF (my_geo_change) THEN
674         memory_parameter%ram_counter = HUGE(memory_parameter%ram_counter)
675      END IF
676
677      IF (my_geo_change) THEN
678         memory_parameter%recalc_forces = .TRUE.
679      ELSE
680         IF (.NOT. memory_parameter%treat_forces_in_core) memory_parameter%recalc_forces = .TRUE.
681      END IF
682
683      !! Get screening parameter
684      eps_schwarz = screening_parameter%eps_schwarz
685      IF (eps_schwarz <= 0.0_dp) THEN
686         log10_eps_schwarz = log_zero
687      ELSE
688         log10_eps_schwarz = LOG10(eps_schwarz)
689      END IF
690      !! get storage epsilon
691      eps_storage = eps_schwarz*memory_parameter%eps_storage_scaling
692
693      !! If we have a hybrid functional, we may need only a fraction of exact exchange
694      hf_fraction = general_parameter%fraction
695
696      !! The number of integrals that fit into the given MAX_MEMORY
697
698      !! Parameters related to the potential 1/r, erf(wr)/r, erfc(wr/r)
699      potential_parameter = actual_x_data%potential_parameter
700
701      !! Variable to check if we calculate the integrals in-core or on the fly
702      !! If TRUE -> on the fly
703      IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
704         buffer_overflow = .FALSE.
705      ELSE
706         buffer_overflow = .TRUE.
707      END IF
708      logger => cp_get_default_logger()
709
710      private_lib = actual_x_data%lib
711
712      !! Helper array to map local basis function indices to global ones
713      ALLOCATE (last_sgf_global(0:natom))
714      last_sgf_global(0) = 0
715      DO iatom = 1, natom
716         ikind = kind_of(iatom)
717         last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
718      END DO
719!$OMP BARRIER
720!$OMP MASTER
721      !! Let master thread get the density (avoid problems with MPI)
722      !! Get the full density from all the processors
723      NULLIFY (full_density_alpha, full_density_beta)
724      ALLOCATE (full_density_alpha(shm_block_offset(ncpu + 1), nkimages))
725      IF (.NOT. treat_lsd_in_core .OR. nspins == 1) THEN
726         CALL timeset(routineN//"_getP", handle_getP)
727         DO img = 1, nkimages
728            CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(ispin, img)%matrix, shm_number_of_p_entries, &
729                                  shm_master_x_data%block_offset, &
730                                  kind_of, basis_parameter, get_max_vals_spin=.FALSE., antisymmetric=is_anti_symmetric)
731         END DO
732
733         IF (nspins == 2) THEN
734            ALLOCATE (full_density_beta(shm_block_offset(ncpu + 1), nkimages))
735            DO img = 1, nkimages
736               CALL get_full_density(para_env, full_density_beta(:, img), rho_ao(2, img)%matrix, shm_number_of_p_entries, &
737                                     shm_master_x_data%block_offset, &
738                                     kind_of, basis_parameter, get_max_vals_spin=.FALSE., antisymmetric=is_anti_symmetric)
739            END DO
740         END IF
741         CALL timestop(handle_getP)
742
743         !! Calculate the max values of the density matrix actual_pmax stores the data from the actual density matrix
744         !! and x_data%initial_p stores the same for the initial guess. The initial guess is updated only in the case of
745         !! a changed geometry
746         NULLIFY (shm_initial_p)
747         IF (do_p_screening) THEN
748            shm_initial_p => shm_master_x_data%initial_p
749            shm_pmax_atom => shm_master_x_data%pmax_atom
750            IF (my_geo_change) THEN
751               CALL update_pmax_mat(shm_master_x_data%initial_p, &
752                                    shm_master_x_data%map_atom_to_kind_atom, &
753                                    shm_master_x_data%set_offset, &
754                                    shm_master_x_data%atomic_block_offset, &
755                                    shm_pmax_atom, &
756                                    full_density_alpha, full_density_beta, &
757                                    natom, kind_of, basis_parameter, &
758                                    nkind, shm_master_x_data%is_assoc_atomic_block)
759            END IF
760         END IF
761      ELSE
762         IF (do_p_screening) THEN
763            CALL timeset(routineN//"_getP", handle_getP)
764            DO img = 1, nkimages
765               CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(1, img)%matrix, shm_number_of_p_entries, &
766                                     shm_master_x_data%block_offset, &
767                                     kind_of, basis_parameter, get_max_vals_spin=.TRUE., &
768                                     rho_beta=rho_ao(2, img)%matrix, antisymmetric=is_anti_symmetric)
769            END DO
770            CALL timestop(handle_getP)
771
772            !! Calculate the max values of the density matrix actual_pmax stores the data from the actual density matrix
773            !! and x_data%initial_p stores the same for the initial guess. The initial guess is updated only in the case of
774            !! a changed geometry
775            NULLIFY (shm_initial_p)
776            shm_initial_p => actual_x_data%initial_p
777            shm_pmax_atom => shm_master_x_data%pmax_atom
778            IF (my_geo_change) THEN
779               CALL update_pmax_mat(shm_master_x_data%initial_p, &
780                                    shm_master_x_data%map_atom_to_kind_atom, &
781                                    shm_master_x_data%set_offset, &
782                                    shm_master_x_data%atomic_block_offset, &
783                                    shm_pmax_atom, &
784                                    full_density_alpha, full_density_beta, &
785                                    natom, kind_of, basis_parameter, &
786                                    nkind, shm_master_x_data%is_assoc_atomic_block)
787            END IF
788         END IF
789         ! ** Now get the density(ispin)
790         DO img = 1, nkimages
791            CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(ispin, img)%matrix, shm_number_of_p_entries, &
792                                  shm_master_x_data%block_offset, &
793                                  kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
794                                  antisymmetric=is_anti_symmetric)
795         END DO
796      END IF
797
798      NULLIFY (full_ks_alpha, full_ks_beta)
799      ALLOCATE (shm_master_x_data%full_ks_alpha(shm_block_offset(ncpu + 1), nkimages))
800      full_ks_alpha => shm_master_x_data%full_ks_alpha
801      full_ks_alpha = 0.0_dp
802
803      IF (.NOT. treat_lsd_in_core) THEN
804         IF (nspins == 2) THEN
805            ALLOCATE (shm_master_x_data%full_ks_beta(shm_block_offset(ncpu + 1), nkimages))
806            full_ks_beta => shm_master_x_data%full_ks_beta
807            full_ks_beta = 0.0_dp
808         END IF
809      END IF
810
811      !! Initialize schwarz screening matrices for near field estimates and boxing screening matrices
812      !! for far field estimates. The update is only performed if the geomtry of the system changed.
813      !! If the system is periodic, then the corresponding routines are called and some variables
814      !! are initialized
815
816!$OMP END MASTER
817!$OMP BARRIER
818
819      IF (.NOT. shm_master_x_data%screen_funct_is_initialized) THEN
820         CALL calc_pair_dist_radii(qs_env, basis_parameter, &
821                                   shm_master_x_data%pair_dist_radii_pgf, max_set, max_pgf, eps_schwarz, &
822                                   n_threads, i_thread)
823!$OMP BARRIER
824         CALL calc_screening_functions(qs_env, basis_parameter, private_lib, shm_master_x_data%potential_parameter, &
825                                       shm_master_x_data%screen_funct_coeffs_set, &
826                                       shm_master_x_data%screen_funct_coeffs_kind, &
827                                       shm_master_x_data%screen_funct_coeffs_pgf, &
828                                       shm_master_x_data%pair_dist_radii_pgf, &
829                                       max_set, max_pgf, n_threads, i_thread, p_work)
830
831!$OMP MASTER
832         shm_master_x_data%screen_funct_is_initialized = .TRUE.
833!$OMP END MASTER
834      END IF
835!$OMP BARRIER
836
837!$OMP MASTER
838      screen_coeffs_set => shm_master_x_data%screen_funct_coeffs_set
839      screen_coeffs_kind => shm_master_x_data%screen_funct_coeffs_kind
840      screen_coeffs_pgf => shm_master_x_data%screen_funct_coeffs_pgf
841      radii_pgf => shm_master_x_data%pair_dist_radii_pgf
842!$OMP END MASTER
843!$OMP BARRIER
844
845      !! Initialize a prefactor depending on the fraction and the number of spins
846      IF (nspins == 1) THEN
847         fac = 0.5_dp*hf_fraction
848      ELSE
849         fac = 1.0_dp*hf_fraction
850      END IF
851
852      !! Call routines that distribute the load on all processes. If we want to screen on a initial density matrix, there is
853      !! an optional parameter. Of course, this is only done if the geometry did change
854!$OMP BARRIER
855!$OMP MASTER
856      CALL timeset(routineN//"_load", handle_load)
857!$OMP END MASTER
858!$OMP BARRIER
859      IF (my_geo_change) THEN
860         IF (actual_x_data%b_first_load_balance_energy) THEN
861            CALL hfx_load_balance(actual_x_data, eps_schwarz, particle_set, max_set, para_env, &
862                                  screen_coeffs_set, screen_coeffs_kind, &
863                                  shm_is_assoc_atomic_block, do_periodic, load_balance_parameter, &
864                                  kind_of, basis_parameter, shm_initial_p, shm_pmax_atom, i_thread, n_threads, &
865                                  cell, do_p_screening, actual_x_data%map_atom_to_kind_atom, &
866                                  nkind, hfx_do_eval_energy, shm_pmax_block, use_virial=.FALSE.)
867            actual_x_data%b_first_load_balance_energy = .FALSE.
868         ELSE
869            CALL hfx_update_load_balance(actual_x_data, para_env, &
870                                         load_balance_parameter, &
871                                         i_thread, n_threads, hfx_do_eval_energy)
872         END IF
873      END IF
874!$OMP BARRIER
875!$OMP MASTER
876      CALL timestop(handle_load)
877!$OMP END MASTER
878!$OMP BARRIER
879
880      !! Start caluclating integrals of the form (ab|cd) or (ij|kl)
881      !! In order to do so, there is a main four-loop structure that takes into account the two symmetries
882      !!
883      !!   (ab|cd) = (ba|cd) = (ab|dc) = (ba|dc)
884      !!
885      !! by iterating in the following way
886      !!
887      !! DO iatom=1,natom               and       DO katom=1,natom
888      !!   DO jatom=iatom,natom                     DO latom=katom,natom
889      !!
890      !! The third symmetry
891      !!
892      !!  (ab|cd) = (cd|ab)
893      !!
894      !! is taken into account by the following criterion:
895      !!
896      !! IF(katom+latom<=iatom+jatom)  THEN
897      !!   IF( ((iatom+jatom).EQ.(katom+latom) ) .AND.(katom<iatom)) CYCLE
898      !!
899      !! Depending on the degeneracy of an integral the exchange contribution is multiplied by a corresponding
900      !! factor ( symm_fac ).
901      !!
902      !! If one quartet does not pass the screening we CYCLE on the outer most possible loop. Thats why we use
903      !! different hierarchies of short range screening matrices.
904      !!
905      !! If we do a parallel run, each process owns a unique array of workloads. Here, a workload is
906      !! defined as :
907      !!
908      !! istart, jstart, kstart, lstart, number_of_atom_quartets, initial_cpu_id
909      !!
910      !! This tells the process where to start the main loops and how many bunches of integrals it has to
911      !! calculate. The original parallelization is a simple modulo distribution that is binned and
912      !! optimized in the load_balance routines. Since the Monte Carlo routines can swap processors,
913      !! we need to know which was the initial cpu_id.
914      !! Furthermore, the indices iatom, jatom, katom, latom have to be set to istart, jstart, kstart and
915      !! lstart only the first time the loop is executed. All subsequent loops have to start with one or
916      !! iatom and katom respectively. Therefore, we use flags like first_j_loop etc.
917
918      do_dynamic_load_balancing = .TRUE.
919
920      IF (n_threads == 1 .OR. do_disk_storage) do_dynamic_load_balancing = .FALSE.
921
922      IF (do_dynamic_load_balancing) THEN
923         my_bin_size = SIZE(actual_x_data%distribution_energy)
924      ELSE
925         my_bin_size = 1
926      END IF
927      !! We do not need the containers if MAX_MEM = 0
928      IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
929         !! IF new md step -> reinitialize containers
930         IF (my_geo_change) THEN
931            CALL dealloc_containers(actual_x_data, hfx_do_eval_energy)
932            CALL alloc_containers(actual_x_data, my_bin_size, hfx_do_eval_energy)
933
934            DO bin = 1, my_bin_size
935               maxval_container => actual_x_data%maxval_container(bin)
936               integral_containers => actual_x_data%integral_containers(:, bin)
937               CALL hfx_init_container(maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
938               DO i = 1, 64
939                  CALL hfx_init_container(integral_containers(i), memory_parameter%actual_memory_usage, .FALSE.)
940               END DO
941            END DO
942         END IF
943
944         !! Decompress the first cache for maxvals and integrals
945         IF (.NOT. my_geo_change) THEN
946            DO bin = 1, my_bin_size
947               maxval_cache => actual_x_data%maxval_cache(bin)
948               maxval_container => actual_x_data%maxval_container(bin)
949               integral_caches => actual_x_data%integral_caches(:, bin)
950               integral_containers => actual_x_data%integral_containers(:, bin)
951               CALL hfx_decompress_first_cache(bits_max_val, maxval_cache, maxval_container, &
952                                               memory_parameter%actual_memory_usage, .FALSE.)
953               DO i = 1, 64
954                  CALL hfx_decompress_first_cache(i, integral_caches(i), integral_containers(i), &
955                                                  memory_parameter%actual_memory_usage, .FALSE.)
956               END DO
957            END DO
958         END IF
959      END IF
960
961      !! Since the I/O routines are no thread-safe, i.e. the procedure to get the unit number, put a lock here
962!$OMP CRITICAL(hfxenergy_io_critical)
963      !! If we do disk storage, we have to initialize the containers/caches
964      IF (do_disk_storage) THEN
965         !! IF new md step -> reinitialize containers
966         IF (my_geo_change) THEN
967            CALL hfx_init_container(maxval_container_disk, memory_parameter%actual_memory_usage_disk, do_disk_storage)
968            DO i = 1, 64
969               CALL hfx_init_container(integral_containers_disk(i), memory_parameter%actual_memory_usage_disk, do_disk_storage)
970            END DO
971         END IF
972         !! Decompress the first cache for maxvals and integrals
973         IF (.NOT. my_geo_change) THEN
974            CALL hfx_decompress_first_cache(bits_max_val, maxval_cache_disk, maxval_container_disk, &
975                                            memory_parameter%actual_memory_usage_disk, .TRUE.)
976            DO i = 1, 64
977               CALL hfx_decompress_first_cache(i, integral_caches_disk(i), integral_containers_disk(i), &
978                                               memory_parameter%actual_memory_usage_disk, .TRUE.)
979            END DO
980         END IF
981      END IF
982!$OMP END CRITICAL(hfxenergy_io_critical)
983
984!$OMP BARRIER
985!$OMP MASTER
986
987      IF (do_dynamic_load_balancing) THEN
988         ! ** Lets construct the task list
989         shm_total_bins = 0
990         DO i = 1, n_threads
991            shm_total_bins = shm_total_bins + SIZE(x_data(irep, i)%distribution_energy)
992         END DO
993         ALLOCATE (tmp_task_list(shm_total_bins))
994         shm_task_counter = 0
995         DO i = 1, n_threads
996            DO bin = 1, SIZE(x_data(irep, i)%distribution_energy)
997               shm_task_counter = shm_task_counter + 1
998               tmp_task_list(shm_task_counter)%thread_id = i
999               tmp_task_list(shm_task_counter)%bin_id = bin
1000               tmp_task_list(shm_task_counter)%cost = x_data(irep, i)%distribution_energy(bin)%cost
1001            END DO
1002         END DO
1003
1004         ! ** Now sort the task list
1005         ALLOCATE (tmp_task_list_cost(shm_total_bins))
1006         ALLOCATE (tmp_index(shm_total_bins))
1007
1008         DO i = 1, shm_total_bins
1009            tmp_task_list_cost(i) = tmp_task_list(i)%cost
1010         END DO
1011
1012         CALL sort(tmp_task_list_cost, shm_total_bins, tmp_index)
1013
1014         ALLOCATE (shm_master_x_data%task_list(shm_total_bins))
1015
1016         DO i = 1, shm_total_bins
1017            shm_master_x_data%task_list(i) = tmp_task_list(tmp_index(shm_total_bins - i + 1))
1018         END DO
1019
1020         shm_task_list => shm_master_x_data%task_list
1021         shm_task_counter = 0
1022
1023         DEALLOCATE (tmp_task_list_cost, tmp_index, tmp_task_list)
1024      END IF
1025!$OMP END MASTER
1026!$OMP BARRIER
1027
1028      IF (my_bin_size > 0) THEN
1029         maxval_container => actual_x_data%maxval_container(1)
1030         maxval_cache => actual_x_data%maxval_cache(1)
1031
1032         integral_containers => actual_x_data%integral_containers(:, 1)
1033         integral_caches => actual_x_data%integral_caches(:, 1)
1034      END IF
1035
1036!$OMP BARRIER
1037!$OMP MASTER
1038      CALL timeset(routineN//"_main", handle_main)
1039!$OMP END MASTER
1040!$OMP BARRIER
1041
1042      coeffs_kind_max0 = MAXVAL(screen_coeffs_kind(:, :)%x(2))
1043      ALLOCATE (set_list_ij((max_set*load_balance_parameter%block_size)**2))
1044      ALLOCATE (set_list_kl((max_set*load_balance_parameter%block_size)**2))
1045
1046!$OMP BARRIER
1047!$OMP MASTER
1048
1049      !! precalculate maximum density matrix elements in blocks
1050      actual_x_data%pmax_block = 0.0_dp
1051      shm_pmax_block => actual_x_data%pmax_block
1052      IF (do_p_screening) THEN
1053         DO iatom_block = 1, SIZE(actual_x_data%blocks)
1054            iatom_start = actual_x_data%blocks(iatom_block)%istart
1055            iatom_end = actual_x_data%blocks(iatom_block)%iend
1056            DO jatom_block = 1, SIZE(actual_x_data%blocks)
1057               jatom_start = actual_x_data%blocks(jatom_block)%istart
1058               jatom_end = actual_x_data%blocks(jatom_block)%iend
1059               shm_pmax_block(iatom_block, jatom_block) = MAXVAL(shm_pmax_atom(iatom_start:iatom_end, jatom_start:jatom_end))
1060            END DO
1061         END DO
1062      END IF
1063      shm_atomic_pair_list => actual_x_data%atomic_pair_list
1064      IF (my_geo_change) THEN
1065         CALL build_atomic_pair_list(natom, shm_atomic_pair_list, kind_of, basis_parameter, particle_set, &
1066                                     do_periodic, screen_coeffs_kind, coeffs_kind_max0, log10_eps_schwarz, cell, &
1067                                     actual_x_data%blocks)
1068      END IF
1069
1070      my_bin_size = SIZE(actual_x_data%distribution_energy)
1071      ! reset timings for the new SCF round
1072      IF (my_geo_change) THEN
1073         DO bin = 1, my_bin_size
1074            actual_x_data%distribution_energy(bin)%time_first_scf = 0.0_dp
1075            actual_x_data%distribution_energy(bin)%time_other_scf = 0.0_dp
1076            actual_x_data%distribution_energy(bin)%time_forces = 0.0_dp
1077         END DO
1078      ENDIF
1079!$OMP END MASTER
1080!$OMP BARRIER
1081
1082      my_bin_size = SIZE(actual_x_data%distribution_energy)
1083      nblocks = load_balance_parameter%nblocks
1084
1085      bins_left = .TRUE.
1086      do_it = .TRUE.
1087      bin = 0
1088      DO WHILE (bins_left)
1089         IF (.NOT. do_dynamic_load_balancing) THEN
1090            bin = bin + 1
1091            IF (bin > my_bin_size) THEN
1092               do_it = .FALSE.
1093               bins_left = .FALSE.
1094            ELSE
1095               do_it = .TRUE.
1096               bins_left = .TRUE.
1097               distribution_energy => actual_x_data%distribution_energy(bin)
1098            END IF
1099         ELSE
1100!$OMP CRITICAL(hfxenergy_critical)
1101            shm_task_counter = shm_task_counter + 1
1102            IF (shm_task_counter <= shm_total_bins) THEN
1103               my_thread_id = shm_task_list(shm_task_counter)%thread_id
1104               my_bin_id = shm_task_list(shm_task_counter)%bin_id
1105               IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
1106                  maxval_cache => x_data(irep, my_thread_id)%maxval_cache(my_bin_id)
1107                  maxval_container => x_data(irep, my_thread_id)%maxval_container(my_bin_id)
1108                  integral_caches => x_data(irep, my_thread_id)%integral_caches(:, my_bin_id)
1109                  integral_containers => x_data(irep, my_thread_id)%integral_containers(:, my_bin_id)
1110               ENDIF
1111               distribution_energy => x_data(irep, my_thread_id)%distribution_energy(my_bin_id)
1112               do_it = .TRUE.
1113               bins_left = .TRUE.
1114               IF (my_geo_change) THEN
1115                  distribution_energy%ram_counter = HUGE(distribution_energy%ram_counter)
1116               END IF
1117               counter = 0_Int_8
1118            ELSE
1119               do_it = .FALSE.
1120               bins_left = .FALSE.
1121            END IF
1122!$OMP END CRITICAL(hfxenergy_critical)
1123         END IF
1124
1125         IF (.NOT. do_it) CYCLE
1126!$OMP MASTER
1127         CALL timeset(routineN//"_bin", handle_bin)
1128!$OMP END MASTER
1129         bintime_start = m_walltime()
1130         my_istart = distribution_energy%istart
1131         my_current_counter = 0
1132         IF (distribution_energy%number_of_atom_quartets == 0 .OR. &
1133             my_istart == -1_int_8) my_istart = nblocks**4
1134         atomic_blocks: DO atom_block = my_istart, nblocks**4 - 1, n_processes
1135            latom_block = INT(MODULO(atom_block, nblocks)) + 1
1136            tmp_block = atom_block/nblocks
1137            katom_block = INT(MODULO(tmp_block, nblocks)) + 1
1138            IF (latom_block < katom_block) CYCLE
1139            tmp_block = tmp_block/nblocks
1140            jatom_block = INT(MODULO(tmp_block, nblocks)) + 1
1141            tmp_block = tmp_block/nblocks
1142            iatom_block = INT(MODULO(tmp_block, nblocks)) + 1
1143            IF (jatom_block < iatom_block) CYCLE
1144            my_current_counter = my_current_counter + 1
1145            IF (my_current_counter > distribution_energy%number_of_atom_quartets) EXIT atomic_blocks
1146
1147            iatom_start = actual_x_data%blocks(iatom_block)%istart
1148            iatom_end = actual_x_data%blocks(iatom_block)%iend
1149            jatom_start = actual_x_data%blocks(jatom_block)%istart
1150            jatom_end = actual_x_data%blocks(jatom_block)%iend
1151            katom_start = actual_x_data%blocks(katom_block)%istart
1152            katom_end = actual_x_data%blocks(katom_block)%iend
1153            latom_start = actual_x_data%blocks(latom_block)%istart
1154            latom_end = actual_x_data%blocks(latom_block)%iend
1155
1156            pmax_blocks = MAX(shm_pmax_block(katom_block, iatom_block), &
1157                              shm_pmax_block(latom_block, jatom_block), &
1158                              shm_pmax_block(latom_block, iatom_block), &
1159                              shm_pmax_block(katom_block, jatom_block))
1160
1161            IF (2.0_dp*coeffs_kind_max0 + pmax_blocks < log10_eps_schwarz) CYCLE
1162
1163            CALL build_pair_list(natom, list_ij, set_list_ij, iatom_start, iatom_end, &
1164                                 jatom_start, jatom_end, &
1165                                 kind_of, basis_parameter, particle_set, &
1166                                 do_periodic, screen_coeffs_set, screen_coeffs_kind, &
1167                                 coeffs_kind_max0, log10_eps_schwarz, cell, pmax_blocks, &
1168                                 shm_atomic_pair_list)
1169
1170            CALL build_pair_list(natom, list_kl, set_list_kl, katom_start, katom_end, &
1171                                 latom_start, latom_end, &
1172                                 kind_of, basis_parameter, particle_set, &
1173                                 do_periodic, screen_coeffs_set, screen_coeffs_kind, &
1174                                 coeffs_kind_max0, log10_eps_schwarz, cell, pmax_blocks, &
1175                                 shm_atomic_pair_list)
1176
1177            DO i_list_ij = 1, list_ij%n_element
1178
1179               iatom = list_ij%elements(i_list_ij)%pair(1)
1180               jatom = list_ij%elements(i_list_ij)%pair(2)
1181               i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
1182               i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
1183               ikind = list_ij%elements(i_list_ij)%kind_pair(1)
1184               jkind = list_ij%elements(i_list_ij)%kind_pair(2)
1185               ra = list_ij%elements(i_list_ij)%r1
1186               rb = list_ij%elements(i_list_ij)%r2
1187               rab2 = list_ij%elements(i_list_ij)%dist2
1188
1189               la_max => basis_parameter(ikind)%lmax
1190               la_min => basis_parameter(ikind)%lmin
1191               npgfa => basis_parameter(ikind)%npgf
1192               nseta = basis_parameter(ikind)%nset
1193               zeta => basis_parameter(ikind)%zet
1194               nsgfa => basis_parameter(ikind)%nsgf
1195               sphi_a_ext => basis_parameter(ikind)%sphi_ext(:, :, :, :)
1196               nsgfl_a => basis_parameter(ikind)%nsgfl
1197               sphi_a_u1 = UBOUND(sphi_a_ext, 1)
1198               sphi_a_u2 = UBOUND(sphi_a_ext, 2)
1199               sphi_a_u3 = UBOUND(sphi_a_ext, 3)
1200
1201               lb_max => basis_parameter(jkind)%lmax
1202               lb_min => basis_parameter(jkind)%lmin
1203               npgfb => basis_parameter(jkind)%npgf
1204               nsetb = basis_parameter(jkind)%nset
1205               zetb => basis_parameter(jkind)%zet
1206               nsgfb => basis_parameter(jkind)%nsgf
1207               sphi_b_ext => basis_parameter(jkind)%sphi_ext(:, :, :, :)
1208               nsgfl_b => basis_parameter(jkind)%nsgfl
1209               sphi_b_u1 = UBOUND(sphi_b_ext, 1)
1210               sphi_b_u2 = UBOUND(sphi_b_ext, 2)
1211               sphi_b_u3 = UBOUND(sphi_b_ext, 3)
1212
1213               DO i_list_kl = 1, list_kl%n_element
1214                  katom = list_kl%elements(i_list_kl)%pair(1)
1215                  latom = list_kl%elements(i_list_kl)%pair(2)
1216
1217                  IF (.NOT. (katom + latom <= iatom + jatom)) CYCLE
1218                  IF (((iatom + jatom) .EQ. (katom + latom)) .AND. (katom < iatom)) CYCLE
1219                  i_set_list_kl_start = list_kl%elements(i_list_kl)%set_bounds(1)
1220                  i_set_list_kl_stop = list_kl%elements(i_list_kl)%set_bounds(2)
1221                  kkind = list_kl%elements(i_list_kl)%kind_pair(1)
1222                  lkind = list_kl%elements(i_list_kl)%kind_pair(2)
1223                  rc = list_kl%elements(i_list_kl)%r1
1224                  rd = list_kl%elements(i_list_kl)%r2
1225                  rcd2 = list_kl%elements(i_list_kl)%dist2
1226
1227                  IF (do_p_screening) THEN
1228                     pmax_atom = MAX(shm_pmax_atom(katom, iatom), &
1229                                     shm_pmax_atom(latom, jatom), &
1230                                     shm_pmax_atom(latom, iatom), &
1231                                     shm_pmax_atom(katom, jatom))
1232                  ELSE
1233                     pmax_atom = 0.0_dp
1234                  END IF
1235
1236                  screen_kind_ij = screen_coeffs_kind(jkind, ikind)%x(1)*rab2 + &
1237                                   screen_coeffs_kind(jkind, ikind)%x(2)
1238                  screen_kind_kl = screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
1239                                   screen_coeffs_kind(lkind, kkind)%x(2)
1240
1241                  IF (screen_kind_ij + screen_kind_kl + pmax_atom < log10_eps_schwarz) CYCLE
1242
1243                  !! we want to be consistent with the KS matrix. If none of the atomic indices
1244                  !! is associated cycle
1245                  IF (.NOT. (shm_is_assoc_atomic_block(latom, iatom) >= 1 .AND. &
1246                             shm_is_assoc_atomic_block(katom, iatom) >= 1 .AND. &
1247                             shm_is_assoc_atomic_block(katom, jatom) >= 1 .AND. &
1248                             shm_is_assoc_atomic_block(latom, jatom) >= 1)) CYCLE
1249
1250                  !! calculate symmetry_factor according to degeneracy of atomic quartet
1251                  symm_fac = 0.5_dp
1252                  IF (iatom == jatom) symm_fac = symm_fac*2.0_dp
1253                  IF (katom == latom) symm_fac = symm_fac*2.0_dp
1254                  IF (iatom == katom .AND. jatom == latom .AND. iatom /= jatom .AND. katom /= latom) symm_fac = symm_fac*2.0_dp
1255                  IF (iatom == katom .AND. iatom == jatom .AND. katom == latom) symm_fac = symm_fac*2.0_dp
1256                  symm_fac = 1.0_dp/symm_fac
1257
1258                  lc_max => basis_parameter(kkind)%lmax
1259                  lc_min => basis_parameter(kkind)%lmin
1260                  npgfc => basis_parameter(kkind)%npgf
1261                  zetc => basis_parameter(kkind)%zet
1262                  nsgfc => basis_parameter(kkind)%nsgf
1263                  sphi_c_ext => basis_parameter(kkind)%sphi_ext(:, :, :, :)
1264                  nsgfl_c => basis_parameter(kkind)%nsgfl
1265                  sphi_c_u1 = UBOUND(sphi_c_ext, 1)
1266                  sphi_c_u2 = UBOUND(sphi_c_ext, 2)
1267                  sphi_c_u3 = UBOUND(sphi_c_ext, 3)
1268
1269                  ld_max => basis_parameter(lkind)%lmax
1270                  ld_min => basis_parameter(lkind)%lmin
1271                  npgfd => basis_parameter(lkind)%npgf
1272                  zetd => basis_parameter(lkind)%zet
1273                  nsgfd => basis_parameter(lkind)%nsgf
1274                  sphi_d_ext => basis_parameter(lkind)%sphi_ext(:, :, :, :)
1275                  nsgfl_d => basis_parameter(lkind)%nsgfl
1276                  sphi_d_u1 = UBOUND(sphi_d_ext, 1)
1277                  sphi_d_u2 = UBOUND(sphi_d_ext, 2)
1278                  sphi_d_u3 = UBOUND(sphi_d_ext, 3)
1279
1280                  atomic_offset_bd = shm_atomic_block_offset(jatom, latom)
1281                  atomic_offset_bc = shm_atomic_block_offset(jatom, katom)
1282                  atomic_offset_ad = shm_atomic_block_offset(iatom, latom)
1283                  atomic_offset_ac = shm_atomic_block_offset(iatom, katom)
1284
1285                  IF (jatom < latom) THEN
1286                     offset_bd_set => shm_set_offset(:, :, lkind, jkind)
1287                  ELSE
1288                     offset_bd_set => shm_set_offset(:, :, jkind, lkind)
1289                  END IF
1290                  IF (jatom < katom) THEN
1291                     offset_bc_set => shm_set_offset(:, :, kkind, jkind)
1292                  ELSE
1293                     offset_bc_set => shm_set_offset(:, :, jkind, kkind)
1294                  END IF
1295                  IF (iatom < latom) THEN
1296                     offset_ad_set => shm_set_offset(:, :, lkind, ikind)
1297                  ELSE
1298                     offset_ad_set => shm_set_offset(:, :, ikind, lkind)
1299                  END IF
1300                  IF (iatom < katom) THEN
1301                     offset_ac_set => shm_set_offset(:, :, kkind, ikind)
1302                  ELSE
1303                     offset_ac_set => shm_set_offset(:, :, ikind, kkind)
1304                  END IF
1305
1306                  IF (do_p_screening) THEN
1307                     swap_id = 0
1308                     kind_kind_idx = INT(get_1D_idx(kkind, ikind, INT(nkind, int_8)))
1309                     IF (ikind >= kkind) THEN
1310                        ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1311                                                                       actual_x_data%map_atom_to_kind_atom(katom), &
1312                                                                       actual_x_data%map_atom_to_kind_atom(iatom))
1313                     ELSE
1314                        ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1315                                                                       actual_x_data%map_atom_to_kind_atom(iatom), &
1316                                                                       actual_x_data%map_atom_to_kind_atom(katom))
1317                        swap_id = swap_id + 1
1318                     END IF
1319                     kind_kind_idx = INT(get_1D_idx(lkind, jkind, INT(nkind, int_8)))
1320                     IF (jkind >= lkind) THEN
1321                        ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1322                                                                       actual_x_data%map_atom_to_kind_atom(latom), &
1323                                                                       actual_x_data%map_atom_to_kind_atom(jatom))
1324                     ELSE
1325                        ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1326                                                                       actual_x_data%map_atom_to_kind_atom(jatom), &
1327                                                                       actual_x_data%map_atom_to_kind_atom(latom))
1328                        swap_id = swap_id + 2
1329                     END IF
1330                     kind_kind_idx = INT(get_1D_idx(lkind, ikind, INT(nkind, int_8)))
1331                     IF (ikind >= lkind) THEN
1332                        ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1333                                                                       actual_x_data%map_atom_to_kind_atom(latom), &
1334                                                                       actual_x_data%map_atom_to_kind_atom(iatom))
1335                     ELSE
1336                        ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1337                                                                       actual_x_data%map_atom_to_kind_atom(iatom), &
1338                                                                       actual_x_data%map_atom_to_kind_atom(latom))
1339                        swap_id = swap_id + 4
1340                     END IF
1341                     kind_kind_idx = INT(get_1D_idx(kkind, jkind, INT(nkind, int_8)))
1342                     IF (jkind >= kkind) THEN
1343                        ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1344                                                                       actual_x_data%map_atom_to_kind_atom(katom), &
1345                                                                       actual_x_data%map_atom_to_kind_atom(jatom))
1346                     ELSE
1347                        ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1348                                                                       actual_x_data%map_atom_to_kind_atom(jatom), &
1349                                                                       actual_x_data%map_atom_to_kind_atom(katom))
1350                        swap_id = swap_id + 8
1351                     END IF
1352                  END IF
1353
1354                  !! At this stage, check for memory used in compression
1355
1356                  IF (my_geo_change) THEN
1357                     IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
1358                        ! ** We know the maximum amount of integrals that we can store per MPI-process
1359                        ! ** Now we need to sum the current memory usage among all openMP threads
1360                        ! ** We can just read what is currently stored on the corresponding x_data type
1361                        ! ** If this is thread i and it tries to read the data from thread j, that is
1362                        ! ** currently writing that data, we just dont care, because the possible error
1363                        ! ** is of the order of CACHE_SIZE
1364                        mem_compression_counter = 0
1365                        DO i = 1, n_threads
1366!$OMP                   ATOMIC READ
1367                           tmp_i4 = x_data(irep, i)%memory_parameter%actual_memory_usage
1368                           mem_compression_counter = mem_compression_counter + &
1369                                                     tmp_i4*memory_parameter%cache_size
1370                        END DO
1371                        IF (mem_compression_counter > memory_parameter%max_compression_counter) THEN
1372                           buffer_overflow = .TRUE.
1373                           IF (do_dynamic_load_balancing) THEN
1374                              distribution_energy%ram_counter = counter
1375                           ELSE
1376                              memory_parameter%ram_counter = counter
1377                           END IF
1378                        ELSE
1379                           counter = counter + 1
1380                           buffer_overflow = .FALSE.
1381                        END IF
1382                     END IF
1383                  ELSE
1384                     IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
1385                        IF (do_dynamic_load_balancing) THEN
1386                           IF (distribution_energy%ram_counter == counter) THEN
1387                              buffer_overflow = .TRUE.
1388                           ELSE
1389                              counter = counter + 1
1390                              buffer_overflow = .FALSE.
1391                           END IF
1392
1393                        ELSE
1394                           IF (memory_parameter%ram_counter == counter) THEN
1395                              buffer_overflow = .TRUE.
1396                           ELSE
1397                              counter = counter + 1
1398                              buffer_overflow = .FALSE.
1399                           END IF
1400                        END IF
1401                     END IF
1402                  END IF
1403
1404                  IF (buffer_overflow .AND. do_disk_storage) THEN
1405                     use_disk_storage = .TRUE.
1406                     buffer_overflow = .FALSE.
1407                  END IF
1408
1409                  IF (use_disk_storage) THEN
1410!$OMP               ATOMIC READ
1411                     tmp_i4 = memory_parameter%actual_memory_usage_disk
1412                     mem_compression_counter_disk = tmp_i4*memory_parameter%cache_size
1413                     IF (mem_compression_counter_disk > memory_parameter%max_compression_counter_disk) THEN
1414                        buffer_overflow = .TRUE.
1415                        use_disk_storage = .FALSE.
1416                     END IF
1417                  END IF
1418
1419                  DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
1420                     iset = set_list_ij(i_set_list_ij)%pair(1)
1421                     jset = set_list_ij(i_set_list_ij)%pair(2)
1422
1423                     ncob = npgfb(jset)*ncoset(lb_max(jset))
1424                     max_val1 = screen_coeffs_set(jset, iset, jkind, ikind)%x(1)*rab2 + &
1425                                screen_coeffs_set(jset, iset, jkind, ikind)%x(2)
1426
1427                     IF (max_val1 + screen_kind_kl + pmax_atom < log10_eps_schwarz) CYCLE
1428
1429                     sphi_a_ext_set => sphi_a_ext(:, :, :, iset)
1430                     sphi_b_ext_set => sphi_b_ext(:, :, :, jset)
1431                     DO i_set_list_kl = i_set_list_kl_start, i_set_list_kl_stop
1432                        kset = set_list_kl(i_set_list_kl)%pair(1)
1433                        lset = set_list_kl(i_set_list_kl)%pair(2)
1434
1435                        max_val2_set = (screen_coeffs_set(lset, kset, lkind, kkind)%x(1)*rcd2 + &
1436                                        screen_coeffs_set(lset, kset, lkind, kkind)%x(2))
1437                        max_val2 = max_val1 + max_val2_set
1438
1439                        !! Near field screening
1440                        IF (max_val2 + pmax_atom < log10_eps_schwarz) CYCLE
1441                        sphi_c_ext_set => sphi_c_ext(:, :, :, kset)
1442                        sphi_d_ext_set => sphi_d_ext(:, :, :, lset)
1443                        !! get max_vals if we screen on initial density
1444                        IF (do_p_screening) THEN
1445                           CALL get_pmax_val(ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, &
1446                                             iset, jset, kset, lset, &
1447                                             pmax_entry, swap_id)
1448                        ELSE
1449                           pmax_entry = 0.0_dp
1450                        END IF
1451                        log10_pmax = pmax_entry
1452                        max_val2 = max_val2 + log10_pmax
1453                        IF (max_val2 < log10_eps_schwarz) CYCLE
1454                        pmax_entry = EXP(log10_pmax*ln_10)
1455
1456                        !! store current number of integrals, update total number and number of integrals in buffer
1457                        current_counter = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)
1458                        IF (buffer_overflow) THEN
1459                           neris_onthefly = neris_onthefly + current_counter
1460                        END IF
1461
1462                        !! Get integrals from buffer and update Kohn-Sham matrix
1463                        IF (.NOT. buffer_overflow .AND. .NOT. my_geo_change) THEN
1464                           nints = current_counter
1465                           IF (.NOT. use_disk_storage) THEN
1466                              CALL hfx_get_single_cache_element( &
1467                                 estimate_to_store_int, 6, &
1468                                 maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1469                                 use_disk_storage)
1470                           ELSE
1471                              CALL hfx_get_single_cache_element( &
1472                                 estimate_to_store_int, 6, &
1473                                 maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
1474                                 use_disk_storage)
1475                           END IF
1476                           spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
1477                           IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
1478                           nbits = EXPONENT(ANINT(spherical_estimate*pmax_entry/eps_storage)) + 1
1479                           buffer_left = nints
1480                           buffer_start = 1
1481                           IF (.NOT. use_disk_storage) THEN
1482                              neris_incore = neris_incore + INT(nints, int_8)
1483                           ELSE
1484                              neris_disk = neris_disk + INT(nints, int_8)
1485                           END IF
1486                           DO WHILE (buffer_left > 0)
1487                              buffer_size = MIN(buffer_left, cache_size)
1488                              IF (.NOT. use_disk_storage) THEN
1489                                 CALL hfx_get_mult_cache_elements(primitive_integrals(buffer_start), &
1490                                                                  buffer_size, nbits, &
1491                                                                  integral_caches(nbits), &
1492                                                                  integral_containers(nbits), &
1493                                                                  eps_storage, pmax_entry, &
1494                                                                  memory_parameter%actual_memory_usage, &
1495                                                                  use_disk_storage)
1496                              ELSE
1497                                 CALL hfx_get_mult_cache_elements(primitive_integrals(buffer_start), &
1498                                                                  buffer_size, nbits, &
1499                                                                  integral_caches_disk(nbits), &
1500                                                                  integral_containers_disk(nbits), &
1501                                                                  eps_storage, pmax_entry, &
1502                                                                  memory_parameter%actual_memory_usage_disk, &
1503                                                                  use_disk_storage)
1504                              END IF
1505                              buffer_left = buffer_left - buffer_size
1506                              buffer_start = buffer_start + buffer_size
1507                           END DO
1508                        END IF
1509                        !! Calculate integrals if we run out of buffer or the geometry did change
1510                        IF (my_geo_change .OR. buffer_overflow) THEN
1511                           max_contraction_val = max_contraction(iset, iatom)* &
1512                                                 max_contraction(jset, jatom)* &
1513                                                 max_contraction(kset, katom)* &
1514                                                 max_contraction(lset, latom)*pmax_entry
1515                           tmp_R_1 => radii_pgf(:, :, jset, iset, jkind, ikind)
1516                           tmp_R_2 => radii_pgf(:, :, lset, kset, lkind, kkind)
1517                           tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
1518                           tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
1519
1520                           CALL coulomb4(private_lib, ra, rb, rc, rd, npgfa(iset), npgfb(jset), npgfc(kset), npgfd(lset), &
1521                                         la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
1522                                         lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
1523                                         nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1524                                         sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1525                                         sphi_b_u1, sphi_b_u2, sphi_b_u3, &
1526                                         sphi_c_u1, sphi_c_u2, sphi_c_u3, &
1527                                         sphi_d_u1, sphi_d_u2, sphi_d_u3, &
1528                                         zeta(1:npgfa(iset), iset), zetb(1:npgfb(jset), jset), &
1529                                         zetc(1:npgfc(kset), kset), zetd(1:npgfd(lset), lset), &
1530                                         primitive_integrals, &
1531                                         potential_parameter, &
1532                                         actual_x_data%neighbor_cells, screen_coeffs_set(jset, iset, jkind, ikind)%x, &
1533                                         screen_coeffs_set(lset, kset, lkind, kkind)%x, eps_schwarz, &
1534                                         max_contraction_val, cartesian_estimate, cell, neris_tmp, &
1535                                         log10_pmax, log10_eps_schwarz, &
1536                                         tmp_R_1, tmp_R_2, tmp_screen_pgf1, tmp_screen_pgf2, &
1537                                         pgf_list_ij, pgf_list_kl, pgf_product_list, &
1538                                         nsgfl_a(:, iset), nsgfl_b(:, jset), &
1539                                         nsgfl_c(:, kset), nsgfl_d(:, lset), &
1540                                         sphi_a_ext_set, &
1541                                         sphi_b_ext_set, &
1542                                         sphi_c_ext_set, &
1543                                         sphi_d_ext_set, &
1544                                         ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
1545                                         nimages, do_periodic, p_work)
1546
1547                           nints = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)
1548                           neris_total = neris_total + nints
1549                           nprim_ints = nprim_ints + neris_tmp
1550!                           IF (cartesian_estimate == 0.0_dp) cartesian_estimate = TINY(cartesian_estimate)
1551!                           estimate_to_store_int = EXPONENT(cartesian_estimate)
1552!                           estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
1553!                           cartesian_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int+1)
1554!                           IF (.NOT. buffer_overflow .AND. my_geo_change) THEN
1555!                              IF (cartesian_estimate < eps_schwarz) THEN
1556!                                 IF (.NOT. use_disk_storage) THEN
1557!                                    CALL hfx_add_single_cache_element( &
1558!                                       estimate_to_store_int, 6, &
1559!                                       maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1560!                                       use_disk_storage, max_val_memory)
1561!                                 ELSE
1562!                                    CALL hfx_add_single_cache_element( &
1563!                                       estimate_to_store_int, 6, &
1564!                                       maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
1565!                                       use_disk_storage)
1566!                                 END IF
1567!                              END IF
1568!                           END IF
1569!
1570!                           IF (cartesian_estimate < eps_schwarz) CYCLE
1571
1572                           !! Compress the array for storage
1573                           spherical_estimate = 0.0_dp
1574                           DO i = 1, nints
1575                              spherical_estimate = MAX(spherical_estimate, ABS(primitive_integrals(i)))
1576                           END DO
1577
1578                           IF (spherical_estimate == 0.0_dp) spherical_estimate = TINY(spherical_estimate)
1579                           estimate_to_store_int = EXPONENT(spherical_estimate)
1580                           estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
1581
1582                           IF (.NOT. buffer_overflow .AND. my_geo_change) THEN
1583                              IF (.NOT. use_disk_storage) THEN
1584                                 CALL hfx_add_single_cache_element( &
1585                                    estimate_to_store_int, 6, &
1586                                    maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1587                                    use_disk_storage, max_val_memory)
1588                              ELSE
1589                                 CALL hfx_add_single_cache_element( &
1590                                    estimate_to_store_int, 6, &
1591                                    maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
1592                                    use_disk_storage)
1593                              END IF
1594                           END IF
1595                           spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
1596                           IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
1597                           IF (.NOT. buffer_overflow) THEN
1598                              nbits = EXPONENT(ANINT(spherical_estimate*pmax_entry/eps_storage)) + 1
1599
1600                              ! In case of a tight eps_storage threshold the number of significant
1601                              ! bits in the integer number NINT(value*pmax_entry/eps_storage) may
1602                              ! exceed the width of the storage element. As the compression algorithm
1603                              ! is designed for IEEE 754 double precision numbers, a 64-bit signed
1604                              ! integer variable which is used to store the result of this float-to-
1605                              ! integer conversion (we have no wish to use more memory for storing
1606                              ! compressed ERIs than it is needed for uncompressed ERIs) may overflow.
1607                              ! Abort with a meaningful message when it happens.
1608                              !
1609                              ! The magic number 63 stands for the number of magnitude bits
1610                              ! (64 bits minus one sign bit).
1611                              IF (nbits > 63) THEN
1612                                 WRITE (eps_schwarz_min_str, '(ES10.3E2)') &
1613                                    spherical_estimate*pmax_entry/ &
1614                                    (SET_EXPONENT(1.0_dp, 63)*memory_parameter%eps_storage_scaling)
1615
1616                                 WRITE (eps_scaling_str, '(ES10.3E2)') &
1617                                    spherical_estimate*pmax_entry/(SET_EXPONENT(1.0_dp, 63)*eps_schwarz)
1618
1619                                 CALL cp_abort(__LOCATION__, &
1620                                               "Overflow during ERI's compression. Please use a larger "// &
1621                                               "EPS_SCHWARZ threshold (above "//TRIM(ADJUSTL(eps_schwarz_min_str))// &
1622                                               ") or increase the EPS_STORAGE_SCALING factor above "// &
1623                                               TRIM(ADJUSTL(eps_scaling_str))//".")
1624                              END IF
1625
1626                              buffer_left = nints
1627                              buffer_start = 1
1628                              IF (.NOT. use_disk_storage) THEN
1629                                 neris_incore = neris_incore + INT(nints, int_8)
1630!                                 neris_incore = neris_incore+nints
1631                              ELSE
1632                                 neris_disk = neris_disk + INT(nints, int_8)
1633!                                 neris_disk = neris_disk+nints
1634                              END IF
1635                              DO WHILE (buffer_left > 0)
1636                                 buffer_size = MIN(buffer_left, CACHE_SIZE)
1637                                 IF (.NOT. use_disk_storage) THEN
1638                                    CALL hfx_add_mult_cache_elements(primitive_integrals(buffer_start), &
1639                                                                     buffer_size, nbits, &
1640                                                                     integral_caches(nbits), &
1641                                                                     integral_containers(nbits), &
1642                                                                     eps_storage, pmax_entry, &
1643                                                                     memory_parameter%actual_memory_usage, &
1644                                                                     use_disk_storage)
1645                                 ELSE
1646                                    CALL hfx_add_mult_cache_elements(primitive_integrals(buffer_start), &
1647                                                                     buffer_size, nbits, &
1648                                                                     integral_caches_disk(nbits), &
1649                                                                     integral_containers_disk(nbits), &
1650                                                                     eps_storage, pmax_entry, &
1651                                                                     memory_parameter%actual_memory_usage_disk, &
1652                                                                     use_disk_storage)
1653                                 END IF
1654                                 buffer_left = buffer_left - buffer_size
1655                                 buffer_start = buffer_start + buffer_size
1656                              END DO
1657                           ELSE
1658                              !! In order to be consistent with in-core part, round all the eris wrt. eps_schwarz
1659                              DO i = 1, nints
1660                                 primitive_integrals(i) = primitive_integrals(i)*pmax_entry
1661                                 IF (ABS(primitive_integrals(i)) > eps_storage) THEN
1662                                    primitive_integrals(i) = ANINT(primitive_integrals(i)/eps_storage, dp)*eps_storage/pmax_entry
1663                                 ELSE
1664                                    primitive_integrals(i) = 0.0_dp
1665                                 END IF
1666                              END DO
1667                           END IF
1668                        END IF
1669                        !!!  DEBUG, print out primitive integrals and indices. Only works serial no OMP  !!!
1670                        IF (.FALSE.) THEN
1671                           CALL print_integrals( &
1672                              iatom, jatom, katom, latom, shm_set_offset, shm_atomic_block_offset, &
1673                              iset, jset, kset, lset, nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), primitive_integrals)
1674                        ENDIF
1675                        IF (.NOT. is_anti_symmetric) THEN
1676                           !! Update Kohn-Sham matrix
1677                           CALL update_fock_matrix( &
1678                              nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1679                              fac, symm_fac, full_density_alpha(:, 1), full_ks_alpha(:, 1), &
1680                              primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1681                              kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1682                              iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1683                              atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1684                           IF (.NOT. treat_lsd_in_core) THEN
1685                              IF (nspins == 2) THEN
1686                                 CALL update_fock_matrix( &
1687                                    nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1688                                    fac, symm_fac, full_density_beta(:, 1), full_ks_beta(:, 1), &
1689                                    primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1690                                    kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1691                                    iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1692                                    atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1693                              END IF
1694                           END IF
1695                        ELSE
1696                           !! Update Kohn-Sham matrix
1697                           CALL update_fock_matrix_as( &
1698                              nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1699                              fac, symm_fac, full_density_alpha(:, 1), full_ks_alpha(:, 1), &
1700                              primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1701                              kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1702                              iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1703                              atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1704                           IF (.NOT. treat_lsd_in_core) THEN
1705                              IF (nspins == 2) THEN
1706                                 CALL update_fock_matrix_as( &
1707                                    nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1708                                    fac, symm_fac, full_density_beta(:, 1), full_ks_beta(:, 1), &
1709                                    primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1710                                    kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1711                                    iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1712                                    atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1713                              END IF
1714                           END IF
1715                        END IF
1716                     END DO ! i_set_list_kl
1717                  END DO ! i_set_list_ij
1718                  IF (do_disk_storage) THEN
1719                     buffer_overflow = .TRUE.
1720                  END IF
1721               END DO !i_list_ij
1722            END DO !i_list_kl
1723         END DO atomic_blocks
1724         bintime_stop = m_walltime()
1725         IF (my_geo_change) THEN
1726            distribution_energy%time_first_scf = bintime_stop - bintime_start
1727         ELSE
1728            distribution_energy%time_other_scf = &
1729               distribution_energy%time_other_scf + bintime_stop - bintime_start
1730         ENDIF
1731!$OMP MASTER
1732         CALL timestop(handle_bin)
1733!$OMP END MASTER
1734      END DO !bin
1735
1736!$OMP MASTER
1737      logger => cp_get_default_logger()
1738      do_print_load_balance_info = .FALSE.
1739      do_print_load_balance_info = BTEST(cp_print_key_should_output(logger%iter_info, hfx_section, &
1740                                                                    "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO"), cp_p_file)
1741!$OMP END MASTER
1742!$OMP BARRIER
1743      IF (do_print_load_balance_info) THEN
1744         iw = -1
1745!$OMP MASTER
1746         iw = cp_print_key_unit_nr(logger, hfx_section, "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO", &
1747                                   extension=".scfLog")
1748!$OMP END MASTER
1749
1750         CALL collect_load_balance_info(para_env, actual_x_data, iw, n_threads, i_thread, &
1751                                        hfx_do_eval_energy)
1752
1753!$OMP MASTER
1754         CALL cp_print_key_finished_output(iw, logger, hfx_section, &
1755                                           "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO")
1756!$OMP END MASTER
1757      END IF
1758
1759!$OMP BARRIER
1760!$OMP MASTER
1761      CALL m_memory(memsize_after)
1762!$OMP END MASTER
1763!$OMP BARRIER
1764
1765      DEALLOCATE (primitive_integrals)
1766!$OMP BARRIER
1767      !! Get some number about ERIS
1768!$OMP ATOMIC
1769      shm_neris_total = shm_neris_total + neris_total
1770!$OMP ATOMIC
1771      shm_neris_onthefly = shm_neris_onthefly + neris_onthefly
1772!$OMP ATOMIC
1773      shm_nprim_ints = shm_nprim_ints + nprim_ints
1774
1775      storage_counter_integrals = memory_parameter%actual_memory_usage* &
1776                                  memory_parameter%cache_size
1777      stor_count_int_disk = memory_parameter%actual_memory_usage_disk* &
1778                            memory_parameter%cache_size
1779      stor_count_max_val = max_val_memory*memory_parameter%cache_size
1780!$OMP ATOMIC
1781      shm_storage_counter_integrals = shm_storage_counter_integrals + storage_counter_integrals
1782!$OMP ATOMIC
1783      shm_stor_count_int_disk = shm_stor_count_int_disk + stor_count_int_disk
1784!$OMP ATOMIC
1785      shm_neris_incore = shm_neris_incore + neris_incore
1786!$OMP ATOMIC
1787      shm_neris_disk = shm_neris_disk + neris_disk
1788!$OMP ATOMIC
1789      shm_stor_count_max_val = shm_stor_count_max_val + stor_count_max_val
1790!$OMP BARRIER
1791
1792      ! ** Calculate how much memory has already been used (might be needed for in-core forces
1793!$OMP MASTER
1794      shm_mem_compression_counter = 0
1795      DO i = 1, n_threads
1796!$OMP       ATOMIC READ
1797         tmp_i4 = x_data(irep, i)%memory_parameter%actual_memory_usage
1798         shm_mem_compression_counter = shm_mem_compression_counter + &
1799                                       tmp_i4*memory_parameter%cache_size
1800      END DO
1801!$OMP END MASTER
1802!$OMP BARRIER
1803      actual_x_data%memory_parameter%final_comp_counter_energy = shm_mem_compression_counter
1804
1805!$OMP MASTER
1806      !! Calculate the exchange energies from the Kohn-Sham matrix. Before we can go on, we have to symmetrize.
1807      ene_x_aa = 0.0_dp
1808      ene_x_bb = 0.0_dp
1809
1810      mb_size_p = shm_block_offset(ncpu + 1)/1024/128
1811      mb_size_f = shm_block_offset(ncpu + 1)/1024/128
1812      IF (.NOT. treat_lsd_in_core) THEN
1813         IF (nspins == 2) THEN
1814            mb_size_f = mb_size_f*2
1815            mb_size_p = mb_size_p*2
1816         END IF
1817      END IF
1818      !! size of primitive_integrals(not shared)
1819      mb_size_buffers = INT(nsgf_max, int_8)**4*n_threads
1820      !! fock density buffers (not shared)
1821      mb_size_buffers = mb_size_buffers + INT(nsgf_max, int_8)**2*n_threads
1822      subtr_size_mb = subtr_size_mb + 8_int_8*nsgf_max**2*n_threads
1823      !! size of screening functions (shared)
1824      mb_size_buffers = mb_size_buffers + max_pgf**2*max_set**2*nkind**2 &
1825                        + max_set**2*nkind**2 &
1826                        + nkind**2 &
1827                        + max_pgf**2*max_set**2*nkind**2
1828      !! is_assoc (shared)
1829      mb_size_buffers = mb_size_buffers + natom**2
1830      ! ** pmax_atom (shared)
1831      IF (do_p_screening) THEN
1832         mb_size_buffers = mb_size_buffers + natom**2
1833      END IF
1834      IF (screening_parameter%do_p_screening_forces) THEN
1835         IF (memory_parameter%treat_forces_in_core) THEN
1836            mb_size_buffers = mb_size_buffers + natom**2
1837         END IF
1838      END IF
1839      ! ** Initial P only MAX(alpha,beta) (shared)
1840      IF (do_p_screening .OR. screening_parameter%do_p_screening_forces) THEN
1841         mb_size_buffers = mb_size_buffers + memory_parameter%size_p_screen
1842      END IF
1843      ! ** In core forces require their own initial P
1844      IF (screening_parameter%do_p_screening_forces) THEN
1845         IF (memory_parameter%treat_forces_in_core) THEN
1846            mb_size_buffers = mb_size_buffers + memory_parameter%size_p_screen
1847         END IF
1848      END IF
1849
1850      !! mb
1851      mb_size_buffers = mb_size_buffers/1024/128
1852
1853      afac = 1.0_dp
1854      IF (is_anti_symmetric) afac = -1.0_dp
1855      CALL timestop(handle_main)
1856      ene_x_aa_diag = 0.0_dp
1857      ene_x_bb_diag = 0.0_dp
1858      DO iatom = 1, natom
1859         ikind = kind_of(iatom)
1860         nseta = basis_parameter(ikind)%nset
1861         nsgfa => basis_parameter(ikind)%nsgf
1862         jatom = iatom
1863         jkind = kind_of(jatom)
1864         nsetb = basis_parameter(jkind)%nset
1865         nsgfb => basis_parameter(jkind)%nsgf
1866         act_atomic_block_offset = shm_atomic_block_offset(jatom, iatom)
1867         DO img = 1, nkimages
1868            DO iset = 1, nseta
1869               DO jset = 1, nsetb
1870                  act_set_offset = shm_set_offset(jset, iset, jkind, ikind)
1871                  i = act_set_offset + act_atomic_block_offset - 1
1872                  DO ma = 1, nsgfa(iset)
1873                     j = shm_set_offset(iset, jset, jkind, ikind) + act_atomic_block_offset - 1 + ma - 1
1874                     DO mb = 1, nsgfb(jset)
1875                        IF (i > j) THEN
1876                           full_ks_alpha(i, img) = (full_ks_alpha(i, img) + full_ks_alpha(j, img)*afac)
1877                           full_ks_alpha(j, img) = full_ks_alpha(i, img)*afac
1878                           IF (.NOT. treat_lsd_in_core .AND. nspins == 2) THEN
1879                              full_ks_beta(i, img) = (full_ks_beta(i, img) + full_ks_beta(j, img)*afac)
1880                              full_ks_beta(j, img) = full_ks_beta(i, img)*afac
1881                           END IF
1882                        END IF
1883                        ! ** For adiabatically rescaled functionals we need the energy coming from the diagonal elements
1884                        IF (i == j) THEN
1885                           ene_x_aa_diag = ene_x_aa_diag + full_ks_alpha(i, img)*full_density_alpha(i, img)
1886                           IF (.NOT. treat_lsd_in_core .AND. nspins == 2) THEN
1887                              ene_x_bb_diag = ene_x_bb_diag + full_ks_beta(i, img)*full_density_beta(i, img)
1888                           END IF
1889                        END IF
1890                        i = i + 1
1891                        j = j + nsgfa(iset)
1892                     END DO
1893                  END DO
1894               END DO
1895            END DO
1896         END DO
1897      END DO
1898
1899      CALL mp_sync(para_env%group)
1900      afac = 1.0_dp
1901      IF (is_anti_symmetric) afac = 0._dp
1902      IF (distribute_fock_matrix) THEN
1903         !! Distribute the current KS-matrix to all the processes
1904         CALL timeset(routineN//"_dist_KS", handle_dist_ks)
1905         DO img = 1, nkimages
1906            CALL distribute_ks_matrix(para_env, full_ks_alpha(:, img), ks_matrix(ispin, img)%matrix, shm_number_of_p_entries, &
1907                                      shm_block_offset, kind_of, basis_parameter, &
1908                                      off_diag_fac=0.5_dp, diag_fac=afac)
1909         END DO
1910
1911         NULLIFY (full_ks_alpha)
1912         DEALLOCATE (shm_master_x_data%full_ks_alpha)
1913         IF (.NOT. treat_lsd_in_core) THEN
1914            IF (nspins == 2) THEN
1915               DO img = 1, nkimages
1916                  CALL distribute_ks_matrix(para_env, full_ks_beta(:, img), ks_matrix(2, img)%matrix, shm_number_of_p_entries, &
1917                                            shm_block_offset, kind_of, basis_parameter, &
1918                                            off_diag_fac=0.5_dp, diag_fac=afac)
1919               END DO
1920               NULLIFY (full_ks_beta)
1921               DEALLOCATE (shm_master_x_data%full_ks_beta)
1922            END IF
1923         END IF
1924         CALL timestop(handle_dist_ks)
1925      END IF
1926
1927      IF (distribute_fock_matrix) THEN
1928         !! ** Calculate the exchange energy
1929         ene_x_aa = 0.0_dp
1930         DO img = 1, nkimages
1931            CALL dbcsr_dot(ks_matrix(ispin, img)%matrix, rho_ao(ispin, img)%matrix, &
1932                           etmp)
1933            ene_x_aa = ene_x_aa + etmp
1934         END DO
1935         !for ADMMS, we need the exchange matrix k(d) for both spins
1936         IF (dft_control%do_admm) THEN
1937            CPASSERT(nkimages == 1)
1938            CALL dbcsr_copy(matrix_ks_aux_fit_hfx(ispin)%matrix, ks_matrix(ispin, 1)%matrix, &
1939                            name="HF exch. part of matrix_ks_aux_fit for ADMMS")
1940         END IF
1941
1942         ene_x_bb = 0.0_dp
1943         IF (nspins == 2 .AND. .NOT. treat_lsd_in_core) THEN
1944            DO img = 1, nkimages
1945               CALL dbcsr_dot(ks_matrix(2, img)%matrix, rho_ao(2, img)%matrix, &
1946                              etmp)
1947               ene_x_bb = ene_x_bb + etmp
1948            END DO
1949            !for ADMMS, we need the exchange matrix k(d) for both spins
1950            IF (dft_control%do_admm) THEN
1951               CPASSERT(nkimages == 1)
1952               CALL dbcsr_copy(matrix_ks_aux_fit_hfx(2)%matrix, ks_matrix(2, 1)%matrix, &
1953                               name="HF exch. part of matrix_ks_aux_fit for ADMMS")
1954            END IF
1955         END IF
1956
1957         !! Update energy type
1958         ehfx = 0.5_dp*(ene_x_aa + ene_x_bb)
1959      ELSE
1960         ! ** It is easier to correct the following expression by the diagonal energy contribution,
1961         ! ** than explicitly going throuhg the diagonal elements
1962         DO img = 1, nkimages
1963            DO pa = 1, SIZE(full_ks_alpha, 1)
1964               ene_x_aa = ene_x_aa + full_ks_alpha(pa, img)*full_density_alpha(pa, img)
1965            END DO
1966         END DO
1967         ! ** Now correct
1968         ene_x_aa = (ene_x_aa + ene_x_aa_diag)*0.5_dp
1969         IF (nspins == 2) THEN
1970            DO img = 1, nkimages
1971               DO pa = 1, SIZE(full_ks_beta, 1)
1972                  ene_x_bb = ene_x_bb + full_ks_beta(pa, img)*full_density_beta(pa, img)
1973               END DO
1974            END DO
1975            ! ** Now correct
1976            ene_x_bb = (ene_x_bb + ene_x_bb_diag)*0.5_dp
1977         END IF
1978         CALL mp_sum(ene_x_aa, para_env%group)
1979         IF (nspins == 2) CALL mp_sum(ene_x_bb, para_env%group)
1980         ehfx = 0.5_dp*(ene_x_aa + ene_x_bb)
1981      END IF
1982
1983      !! Print some memeory information if this is the first step
1984      IF (my_geo_change) THEN
1985         tmp_i8(1:8) = (/shm_storage_counter_integrals, shm_neris_onthefly, shm_neris_incore, shm_neris_disk, &
1986                         shm_neris_total, shm_stor_count_int_disk, shm_nprim_ints, shm_stor_count_max_val/)
1987         CALL mp_sum(tmp_i8, para_env%group)
1988         shm_storage_counter_integrals = tmp_i8(1)
1989         shm_neris_onthefly = tmp_i8(2)
1990         shm_neris_incore = tmp_i8(3)
1991         shm_neris_disk = tmp_i8(4)
1992         shm_neris_total = tmp_i8(5)
1993         shm_stor_count_int_disk = tmp_i8(6)
1994         shm_nprim_ints = tmp_i8(7)
1995         shm_stor_count_max_val = tmp_i8(8)
1996         CALL mp_max(memsize_after, para_env%group)
1997         mem_eris = (shm_storage_counter_integrals + 128*1024 - 1)/1024/128
1998         compression_factor = REAL(shm_neris_incore, dp)/REAL(shm_storage_counter_integrals, dp)
1999         mem_eris_disk = (shm_stor_count_int_disk + 128*1024 - 1)/1024/128
2000         compression_factor_disk = REAL(shm_neris_disk, dp)/REAL(shm_stor_count_int_disk, dp)
2001         mem_max_val = (shm_stor_count_max_val + 128*1024 - 1)/1024/128
2002
2003         IF (shm_neris_incore == 0) THEN
2004            mem_eris = 0
2005            compression_factor = 0.0_dp
2006         END IF
2007         IF (shm_neris_disk == 0) THEN
2008            mem_eris_disk = 0
2009            compression_factor_disk = 0.0_dp
2010         END IF
2011
2012         iw = cp_print_key_unit_nr(logger, hfx_section, "HF_INFO", &
2013                                   extension=".scfLog")
2014         IF (iw > 0) THEN
2015            WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2016               "HFX_MEM_INFO| Number of cart. primitive ERI's calculated:      ", shm_nprim_ints
2017
2018            WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2019               "HFX_MEM_INFO| Number of sph. ERI's calculated:           ", shm_neris_total
2020
2021            WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2022               "HFX_MEM_INFO| Number of sph. ERI's stored in-core:        ", shm_neris_incore
2023
2024            WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2025               "HFX_MEM_INFO| Number of sph. ERI's stored on disk:        ", shm_neris_disk
2026
2027            WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2028               "HFX_MEM_INFO| Number of sph. ERI's calculated on the fly: ", shm_neris_onthefly
2029
2030            WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2031               "HFX_MEM_INFO| Total memory consumption ERI's RAM [MiB]:            ", mem_eris
2032
2033            WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2034               "HFX_MEM_INFO| Whereof max-vals [MiB]:            ", mem_max_val
2035
2036            WRITE (UNIT=iw, FMT="((T3,A,T60,F21.2))") &
2037               "HFX_MEM_INFO| Total compression factor ERI's RAM:                  ", compression_factor
2038
2039            WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2040               "HFX_MEM_INFO| Total memory consumption ERI's disk [MiB]:       ", mem_eris_disk
2041
2042            WRITE (UNIT=iw, FMT="((T3,A,T60,F21.2))") &
2043               "HFX_MEM_INFO| Total compression factor ERI's disk:             ", compression_factor_disk
2044
2045            WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2046               "HFX_MEM_INFO| Size of density/Fock matrix [MiB]:             ", 2_int_8*mb_size_p
2047
2048            IF (do_periodic) THEN
2049               WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2050                  "HFX_MEM_INFO| Size of buffers [MiB]:             ", mb_size_buffers
2051               WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2052                  "HFX_MEM_INFO| Number of periodic image cells considered: ", SIZE(shm_master_x_data%neighbor_cells)
2053            ELSE
2054               WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2055                  "HFX_MEM_INFO| Size of buffers [MiB]:             ", mb_size_buffers
2056            END IF
2057            WRITE (UNIT=iw, FMT="((T3,A,T60,I21),/)") &
2058               "HFX_MEM_INFO| Est. max. program size after HFX  [MiB]:", memsize_after/(1024*1024)
2059            CALL m_flush(iw)
2060         END IF
2061
2062         CALL cp_print_key_finished_output(iw, logger, hfx_section, &
2063                                           "HF_INFO")
2064      END IF
2065!$OMP END MASTER
2066
2067      !! flush caches if the geometry changed
2068      IF (do_dynamic_load_balancing) THEN
2069         my_bin_size = SIZE(actual_x_data%distribution_energy)
2070      ELSE
2071         my_bin_size = 1
2072      END IF
2073
2074      IF (my_geo_change) THEN
2075         IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
2076            DO bin = 1, my_bin_size
2077               maxval_cache => actual_x_data%maxval_cache(bin)
2078               maxval_container => actual_x_data%maxval_container(bin)
2079               integral_caches => actual_x_data%integral_caches(:, bin)
2080               integral_containers => actual_x_data%integral_containers(:, bin)
2081               CALL hfx_flush_last_cache(bits_max_val, maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
2082                                         .FALSE.)
2083               DO i = 1, 64
2084                  CALL hfx_flush_last_cache(i, integral_caches(i), integral_containers(i), &
2085                                            memory_parameter%actual_memory_usage, .FALSE.)
2086               END DO
2087            END DO
2088         END IF
2089      END IF
2090      !! reset all caches except we calculate all on the fly
2091      IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
2092         DO bin = 1, my_bin_size
2093            maxval_cache => actual_x_data%maxval_cache(bin)
2094            maxval_container => actual_x_data%maxval_container(bin)
2095            integral_caches => actual_x_data%integral_caches(:, bin)
2096            integral_containers => actual_x_data%integral_containers(:, bin)
2097
2098            CALL hfx_reset_cache_and_container(maxval_cache, maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
2099            DO i = 1, 64
2100               CALL hfx_reset_cache_and_container(integral_caches(i), integral_containers(i), &
2101                                                  memory_parameter%actual_memory_usage, &
2102                                                  .FALSE.)
2103            END DO
2104         END DO
2105      END IF
2106
2107      !! Since the I/O routines are no thread-safe, i.e. the procedure to get the unit number, put a lock here
2108!$OMP CRITICAL(hfxenergy_out_critical)
2109      IF (do_disk_storage) THEN
2110         !! flush caches if the geometry changed
2111         IF (my_geo_change) THEN
2112            CALL hfx_flush_last_cache(bits_max_val, maxval_cache_disk, maxval_container_disk, &
2113                                      memory_parameter%actual_memory_usage_disk, .TRUE.)
2114            DO i = 1, 64
2115               CALL hfx_flush_last_cache(i, integral_caches_disk(i), integral_containers_disk(i), &
2116                                         memory_parameter%actual_memory_usage_disk, .TRUE.)
2117            END DO
2118         END IF
2119         !! reset all caches except we calculate all on the fly
2120         CALL hfx_reset_cache_and_container(maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
2121                                            do_disk_storage)
2122         DO i = 1, 64
2123            CALL hfx_reset_cache_and_container(integral_caches_disk(i), integral_containers_disk(i), &
2124                                               memory_parameter%actual_memory_usage_disk, do_disk_storage)
2125         END DO
2126      END IF
2127!$OMP END CRITICAL(hfxenergy_out_critical)
2128!$OMP BARRIER
2129      !! Clean up
2130      DEALLOCATE (last_sgf_global)
2131!$OMP MASTER
2132      DEALLOCATE (full_density_alpha)
2133      IF (.NOT. treat_lsd_in_core) THEN
2134         IF (nspins == 2) THEN
2135            DEALLOCATE (full_density_beta)
2136         END IF
2137      END IF
2138      IF (do_dynamic_load_balancing) THEN
2139         DEALLOCATE (shm_master_x_data%task_list)
2140      END IF
2141!$OMP END MASTER
2142      DEALLOCATE (pbd_buf, pbc_buf, pad_buf, pac_buf)
2143      DEALLOCATE (kbd_buf, kbc_buf, kad_buf, kac_buf)
2144      DEALLOCATE (set_list_ij, set_list_kl)
2145
2146      DO i = 1, max_pgf**2
2147         DEALLOCATE (pgf_list_ij(i)%image_list)
2148         DEALLOCATE (pgf_list_kl(i)%image_list)
2149      END DO
2150
2151      DEALLOCATE (pgf_list_ij)
2152      DEALLOCATE (pgf_list_kl)
2153      DEALLOCATE (pgf_product_list)
2154
2155      DEALLOCATE (max_contraction, kind_of)
2156
2157      DEALLOCATE (ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp)
2158
2159      DEALLOCATE (nimages)
2160
2161!$OMP BARRIER
2162!$OMP END PARALLEL
2163
2164      CALL timestop(handle)
2165   END SUBROUTINE integrate_four_center
2166
2167! **************************************************************************************************
2168!> \brief calculates two-electron integrals of a quartet/shell using the library
2169!>      lib_int in the periodic case
2170!> \param lib ...
2171!> \param ra ...
2172!> \param rb ...
2173!> \param rc ...
2174!> \param rd ...
2175!> \param npgfa ...
2176!> \param npgfb ...
2177!> \param npgfc ...
2178!> \param npgfd ...
2179!> \param la_min ...
2180!> \param la_max ...
2181!> \param lb_min ...
2182!> \param lb_max ...
2183!> \param lc_min ...
2184!> \param lc_max ...
2185!> \param ld_min ...
2186!> \param ld_max ...
2187!> \param nsgfa ...
2188!> \param nsgfb ...
2189!> \param nsgfc ...
2190!> \param nsgfd ...
2191!> \param sphi_a_u1 ...
2192!> \param sphi_a_u2 ...
2193!> \param sphi_a_u3 ...
2194!> \param sphi_b_u1 ...
2195!> \param sphi_b_u2 ...
2196!> \param sphi_b_u3 ...
2197!> \param sphi_c_u1 ...
2198!> \param sphi_c_u2 ...
2199!> \param sphi_c_u3 ...
2200!> \param sphi_d_u1 ...
2201!> \param sphi_d_u2 ...
2202!> \param sphi_d_u3 ...
2203!> \param zeta ...
2204!> \param zetb ...
2205!> \param zetc ...
2206!> \param zetd ...
2207!> \param primitive_integrals array of primitive_integrals
2208!> \param potential_parameter contains info for libint
2209!> \param neighbor_cells Periodic images
2210!> \param screen1 set based coefficients for near field screening
2211!> \param screen2 set based coefficients for near field screening
2212!> \param eps_schwarz threshold
2213!> \param max_contraction_val maximum multiplication factor for cart -> sph
2214!> \param cart_estimate maximum calculated integral value
2215!> \param cell cell
2216!> \param neris_tmp counter for calculated cart integrals
2217!> \param log10_pmax logarithm of initial p matrix max element
2218!> \param log10_eps_schwarz log of threshold
2219!> \param R1_pgf coefficients for radii of product distribution function
2220!> \param R2_pgf coefficients for radii of product distribution function
2221!> \param pgf1 schwarz coefficients pgf basid
2222!> \param pgf2 schwarz coefficients pgf basid
2223!> \param pgf_list_ij ...
2224!> \param pgf_list_kl ...
2225!> \param pgf_product_list ...
2226!> \param nsgfl_a ...
2227!> \param nsgfl_b ...
2228!> \param nsgfl_c ...
2229!> \param nsgfl_d ...
2230!> \param sphi_a_ext ...
2231!> \param sphi_b_ext ...
2232!> \param sphi_c_ext ...
2233!> \param sphi_d_ext ...
2234!> \param ee_work ...
2235!> \param ee_work2 ...
2236!> \param ee_buffer1 ...
2237!> \param ee_buffer2 ...
2238!> \param ee_primitives_tmp ...
2239!> \param nimages ...
2240!> \param do_periodic ...
2241!> \param p_work ...
2242!> \par History
2243!>      11.2006 created [Manuel Guidon]
2244!>      02.2009 completely rewritten screening part [Manuel Guidon]
2245!> \author Manuel Guidon
2246! **************************************************************************************************
2247   SUBROUTINE coulomb4(lib, ra, rb, rc, rd, npgfa, npgfb, npgfc, npgfd, &
2248                       la_min, la_max, lb_min, lb_max, &
2249                       lc_min, lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, &
2250                       sphi_a_u1, sphi_a_u2, sphi_a_u3, &
2251                       sphi_b_u1, sphi_b_u2, sphi_b_u3, &
2252                       sphi_c_u1, sphi_c_u2, sphi_c_u3, &
2253                       sphi_d_u1, sphi_d_u2, sphi_d_u3, &
2254                       zeta, zetb, zetc, zetd, &
2255                       primitive_integrals, &
2256                       potential_parameter, neighbor_cells, &
2257                       screen1, screen2, eps_schwarz, max_contraction_val, &
2258                       cart_estimate, cell, neris_tmp, log10_pmax, &
2259                       log10_eps_schwarz, R1_pgf, R2_pgf, pgf1, pgf2, &
2260                       pgf_list_ij, pgf_list_kl, &
2261                       pgf_product_list, &
2262                       nsgfl_a, nsgfl_b, nsgfl_c, &
2263                       nsgfl_d, &
2264                       sphi_a_ext, sphi_b_ext, sphi_c_ext, sphi_d_ext, &
2265                       ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
2266                       nimages, do_periodic, p_work)
2267
2268      TYPE(cp_libint_t)                                  :: lib
2269      REAL(dp), INTENT(IN)                               :: ra(3), rb(3), rc(3), rd(3)
2270      INTEGER, INTENT(IN) :: npgfa, npgfb, npgfc, npgfd, la_min, la_max, lb_min, lb_max, lc_min, &
2271         lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, sphi_a_u1, sphi_a_u2, sphi_a_u3, &
2272         sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, sphi_d_u2, &
2273         sphi_d_u3
2274      REAL(dp), DIMENSION(1:npgfa), INTENT(IN)           :: zeta
2275      REAL(dp), DIMENSION(1:npgfb), INTENT(IN)           :: zetb
2276      REAL(dp), DIMENSION(1:npgfc), INTENT(IN)           :: zetc
2277      REAL(dp), DIMENSION(1:npgfd), INTENT(IN)           :: zetd
2278      REAL(dp), DIMENSION(nsgfa, nsgfb, nsgfc, nsgfd)    :: primitive_integrals
2279      TYPE(hfx_potential_type)                           :: potential_parameter
2280      TYPE(hfx_cell_type), DIMENSION(:), POINTER         :: neighbor_cells
2281      REAL(dp), INTENT(IN)                               :: screen1(2), screen2(2), eps_schwarz, &
2282                                                            max_contraction_val
2283      REAL(dp)                                           :: cart_estimate
2284      TYPE(cell_type), POINTER                           :: cell
2285      INTEGER(int_8)                                     :: neris_tmp
2286      REAL(dp), INTENT(IN)                               :: log10_pmax, log10_eps_schwarz
2287      TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
2288         POINTER                                         :: R1_pgf, R2_pgf, pgf1, pgf2
2289      TYPE(hfx_pgf_list), DIMENSION(*)                   :: pgf_list_ij, pgf_list_kl
2290      TYPE(hfx_pgf_product_list), ALLOCATABLE, &
2291         DIMENSION(:), INTENT(INOUT)                     :: pgf_product_list
2292      INTEGER, DIMENSION(0:), INTENT(IN)                 :: nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d
2293      REAL(dp), INTENT(IN) :: sphi_a_ext(sphi_a_u1, sphi_a_u2, sphi_a_u3), &
2294         sphi_b_ext(sphi_b_u1, sphi_b_u2, sphi_b_u3), sphi_c_ext(sphi_c_u1, sphi_c_u2, sphi_c_u3), &
2295         sphi_d_ext(sphi_d_u1, sphi_d_u2, sphi_d_u3)
2296      REAL(dp), DIMENSION(*)                             :: ee_work, ee_work2, ee_buffer1, &
2297                                                            ee_buffer2, ee_primitives_tmp
2298      INTEGER, DIMENSION(*)                              :: nimages
2299      LOGICAL, INTENT(IN)                                :: do_periodic
2300      REAL(dp), DIMENSION(:), POINTER                    :: p_work
2301
2302      INTEGER :: ipgf, jpgf, kpgf, la, lb, lc, ld, list_ij, list_kl, lpgf, max_l, ncoa, ncob, &
2303         ncoc, ncod, nelements_ij, nelements_kl, nproducts, nsgfla, nsgflb, nsgflc, nsgfld, nsoa, &
2304         nsob, nsoc, nsod, s_offset_a, s_offset_b, s_offset_c, s_offset_d
2305      REAL(dp)                                           :: EtaInv, tmp_max, ZetaInv
2306
2307      CALL build_pair_list_pgf(npgfa, npgfb, pgf_list_ij, zeta, zetb, screen1, screen2, &
2308                               pgf1, R1_pgf, log10_pmax, log10_eps_schwarz, ra, rb, &
2309                               nelements_ij, &
2310                               neighbor_cells, nimages, do_periodic)
2311      CALL build_pair_list_pgf(npgfc, npgfd, pgf_list_kl, zetc, zetd, screen2, screen1, &
2312                               pgf2, R2_pgf, log10_pmax, log10_eps_schwarz, rc, rd, &
2313                               nelements_kl, &
2314                               neighbor_cells, nimages, do_periodic)
2315
2316      cart_estimate = 0.0_dp
2317      neris_tmp = 0
2318      primitive_integrals = 0.0_dp
2319      max_l = la_max + lb_max + lc_max + ld_max
2320      DO list_ij = 1, nelements_ij
2321         ZetaInv = pgf_list_ij(list_ij)%ZetaInv
2322         ipgf = pgf_list_ij(list_ij)%ipgf
2323         jpgf = pgf_list_ij(list_ij)%jpgf
2324
2325         DO list_kl = 1, nelements_kl
2326            EtaInv = pgf_list_kl(list_kl)%ZetaInv
2327            kpgf = pgf_list_kl(list_kl)%ipgf
2328            lpgf = pgf_list_kl(list_kl)%jpgf
2329
2330            CALL build_pgf_product_list(pgf_list_ij(list_ij), pgf_list_kl(list_kl), pgf_product_list, &
2331                                        nproducts, log10_pmax, log10_eps_schwarz, neighbor_cells, cell, &
2332                                        potential_parameter, max_l, do_periodic)
2333
2334            s_offset_a = 0
2335            DO la = la_min, la_max
2336               s_offset_b = 0
2337               ncoa = nco(la)
2338               nsgfla = nsgfl_a(la)
2339               nsoa = nso(la)
2340
2341               DO lb = lb_min, lb_max
2342                  s_offset_c = 0
2343                  ncob = nco(lb)
2344                  nsgflb = nsgfl_b(lb)
2345                  nsob = nso(lb)
2346
2347                  DO lc = lc_min, lc_max
2348                     s_offset_d = 0
2349                     ncoc = nco(lc)
2350                     nsgflc = nsgfl_c(lc)
2351                     nsoc = nso(lc)
2352
2353                     DO ld = ld_min, ld_max
2354                        ncod = nco(ld)
2355                        nsgfld = nsgfl_d(ld)
2356                        nsod = nso(ld)
2357
2358                        tmp_max = 0.0_dp
2359                        CALL evaluate_eri(lib, nproducts, pgf_product_list, &
2360                                          la, lb, lc, ld, &
2361                                          ncoa, ncob, ncoc, ncod, &
2362                                          nsgfa, nsgfb, nsgfc, nsgfd, &
2363                                          primitive_integrals, &
2364                                          max_contraction_val, tmp_max, eps_schwarz, &
2365                                          neris_tmp, ZetaInv, EtaInv, &
2366                                          s_offset_a, s_offset_b, s_offset_c, s_offset_d, &
2367                                          nsgfla, nsgflb, nsgflc, nsgfld, nsoa, nsob, nsoc, nsod, &
2368                                          sphi_a_ext(1, la + 1, ipgf), &
2369                                          sphi_b_ext(1, lb + 1, jpgf), &
2370                                          sphi_c_ext(1, lc + 1, kpgf), &
2371                                          sphi_d_ext(1, ld + 1, lpgf), &
2372                                          ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
2373                                          p_work)
2374                        cart_estimate = MAX(tmp_max, cart_estimate)
2375                        s_offset_d = s_offset_d + nsod*nsgfld
2376                     END DO !ld
2377                     s_offset_c = s_offset_c + nsoc*nsgflc
2378                  END DO !lc
2379                  s_offset_b = s_offset_b + nsob*nsgflb
2380               END DO !lb
2381               s_offset_a = s_offset_a + nsoa*nsgfla
2382            END DO !la
2383         END DO
2384      END DO
2385
2386   END SUBROUTINE coulomb4
2387
2388! **************************************************************************************************
2389!> \brief Given a 2d index pair, this function returns a 1d index pair for
2390!>        a symmetric upper triangle NxN matrix
2391!>        The compiler should inline this function, therefore it appears in
2392!>        several modules
2393!> \param i 2d index
2394!> \param j 2d index
2395!> \param N matrix size
2396!> \return ...
2397!> \par History
2398!>      03.2009 created [Manuel Guidon]
2399!> \author Manuel Guidon
2400! **************************************************************************************************
2401   PURE FUNCTION get_1D_idx(i, j, N)
2402      INTEGER, INTENT(IN)                                :: i, j
2403      INTEGER(int_8), INTENT(IN)                         :: N
2404      INTEGER(int_8)                                     :: get_1D_idx
2405
2406      INTEGER(int_8)                                     :: min_ij
2407
2408      min_ij = MIN(i, j)
2409      get_1D_idx = min_ij*N + MAX(i, j) - (min_ij - 1)*min_ij/2 - N
2410
2411   END FUNCTION get_1D_idx
2412
2413! **************************************************************************************************
2414!> \brief This routine prefetches density/fock matrix elements and stores them
2415!>        in cache friendly arrays. These buffers are then used to update the
2416!>        fock matrix
2417!> \param ma_max Size of matrix blocks
2418!> \param mb_max Size of matrix blocks
2419!> \param mc_max Size of matrix blocks
2420!> \param md_max Size of matrix blocks
2421!> \param fac multiplication factor (spin)
2422!> \param symm_fac multiplication factor (symmetry)
2423!> \param density upper triangular density matrix
2424!> \param ks upper triangular fock matrix
2425!> \param prim primitive integrals
2426!> \param pbd buffer that will contain P(b,d)
2427!> \param pbc buffer that will contain P(b,c)
2428!> \param pad buffer that will contain P(a,d)
2429!> \param pac buffer that will contain P(a,c)
2430!> \param kbd buffer for KS(b,d)
2431!> \param kbc buffer for KS(b,c)
2432!> \param kad buffer for KS(a,d)
2433!> \param kac buffer for KS(a,c)
2434!> \param iatom ...
2435!> \param jatom ...
2436!> \param katom ...
2437!> \param latom ...
2438!> \param iset ...
2439!> \param jset ...
2440!> \param kset ...
2441!> \param lset ...
2442!> \param offset_bd_set ...
2443!> \param offset_bc_set ...
2444!> \param offset_ad_set ...
2445!> \param offset_ac_set ...
2446!> \param atomic_offset_bd ...
2447!> \param atomic_offset_bc ...
2448!> \param atomic_offset_ad ...
2449!> \param atomic_offset_ac ...
2450!> \par History
2451!>      03.2009 created [Manuel Guidon]
2452!> \author Manuel Guidon
2453! **************************************************************************************************
2454
2455   SUBROUTINE update_fock_matrix(ma_max, mb_max, mc_max, md_max, &
2456                                 fac, symm_fac, density, ks, prim, &
2457                                 pbd, pbc, pad, pac, kbd, kbc, kad, kac, &
2458                                 iatom, jatom, katom, latom, &
2459                                 iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, &
2460                                 offset_ac_set, atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, &
2461                                 atomic_offset_ac)
2462
2463      INTEGER, INTENT(IN)                                :: ma_max, mb_max, mc_max, md_max
2464      REAL(dp), INTENT(IN)                               :: fac, symm_fac
2465      REAL(dp), DIMENSION(:), INTENT(IN)                 :: density
2466      REAL(dp), DIMENSION(:), INTENT(INOUT)              :: ks
2467      REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2468         INTENT(IN)                                      :: prim
2469      REAL(dp), DIMENSION(*), INTENT(INOUT)              :: pbd, pbc, pad, pac, kbd, kbc, kad, kac
2470      INTEGER, INTENT(IN)                                :: iatom, jatom, katom, latom, iset, jset, &
2471                                                            kset, lset
2472      INTEGER, DIMENSION(:, :), POINTER                  :: offset_bd_set, offset_bc_set, &
2473                                                            offset_ad_set, offset_ac_set
2474      INTEGER, INTENT(IN)                                :: atomic_offset_bd, atomic_offset_bc, &
2475                                                            atomic_offset_ad, atomic_offset_ac
2476
2477      INTEGER                                            :: i, j, ma, mb, mc, md, offset_ac, &
2478                                                            offset_ad, offset_bc, offset_bd
2479
2480      IF (jatom >= latom) THEN
2481         i = 1
2482         offset_bd = offset_bd_set(jset, lset) + atomic_offset_bd - 1
2483         j = offset_bd
2484         DO md = 1, md_max
2485            DO mb = 1, mb_max
2486               pbd(i) = density(j)
2487               i = i + 1
2488               j = j + 1
2489            END DO
2490         END DO
2491      ELSE
2492         i = 1
2493         offset_bd = offset_bd_set(lset, jset) + atomic_offset_bd - 1
2494         DO md = 1, md_max
2495            j = offset_bd + md - 1
2496            DO mb = 1, mb_max
2497               pbd(i) = density(j)
2498               i = i + 1
2499               j = j + md_max
2500            END DO
2501         END DO
2502      END IF
2503      IF (jatom >= katom) THEN
2504         i = 1
2505         offset_bc = offset_bc_set(jset, kset) + atomic_offset_bc - 1
2506         j = offset_bc
2507         DO mc = 1, mc_max
2508            DO mb = 1, mb_max
2509               pbc(i) = density(j)
2510               i = i + 1
2511               j = j + 1
2512            END DO
2513         END DO
2514      ELSE
2515         i = 1
2516         offset_bc = offset_bc_set(kset, jset) + atomic_offset_bc - 1
2517         DO mc = 1, mc_max
2518            j = offset_bc + mc - 1
2519            DO mb = 1, mb_max
2520               pbc(i) = density(j)
2521               i = i + 1
2522               j = j + mc_max
2523            END DO
2524         END DO
2525      END IF
2526      IF (iatom >= latom) THEN
2527         i = 1
2528         offset_ad = offset_ad_set(iset, lset) + atomic_offset_ad - 1
2529         j = offset_ad
2530         DO md = 1, md_max
2531            DO ma = 1, ma_max
2532               pad(i) = density(j)
2533               i = i + 1
2534               j = j + 1
2535            END DO
2536         END DO
2537      ELSE
2538         i = 1
2539         offset_ad = offset_ad_set(lset, iset) + atomic_offset_ad - 1
2540         DO md = 1, md_max
2541            j = offset_ad + md - 1
2542            DO ma = 1, ma_max
2543               pad(i) = density(j)
2544               i = i + 1
2545               j = j + md_max
2546            END DO
2547         END DO
2548      END IF
2549      IF (iatom >= katom) THEN
2550         i = 1
2551         offset_ac = offset_ac_set(iset, kset) + atomic_offset_ac - 1
2552         j = offset_ac
2553         DO mc = 1, mc_max
2554            DO ma = 1, ma_max
2555               pac(i) = density(j)
2556               i = i + 1
2557               j = j + 1
2558            END DO
2559         END DO
2560      ELSE
2561         i = 1
2562         offset_ac = offset_ac_set(kset, iset) + atomic_offset_ac - 1
2563         DO mc = 1, mc_max
2564            j = offset_ac + mc - 1
2565            DO ma = 1, ma_max
2566               pac(i) = density(j)
2567               i = i + 1
2568               j = j + mc_max
2569            END DO
2570         END DO
2571      END IF
2572
2573      CALL contract_block(ma_max, mb_max, mc_max, md_max, kbd, kbc, kad, kac, pbd, pbc, pad, pac, prim, fac*symm_fac)
2574      IF (jatom >= latom) THEN
2575         i = 1
2576         j = offset_bd
2577         DO md = 1, md_max
2578            DO mb = 1, mb_max
2579!$OMP           ATOMIC
2580               ks(j) = ks(j) + kbd(i)
2581               i = i + 1
2582               j = j + 1
2583            END DO
2584         END DO
2585      ELSE
2586         i = 1
2587         DO md = 1, md_max
2588            j = offset_bd + md - 1
2589            DO mb = 1, mb_max
2590!$OMP           ATOMIC
2591               ks(j) = ks(j) + kbd(i)
2592               i = i + 1
2593               j = j + md_max
2594            END DO
2595         END DO
2596      END IF
2597      IF (jatom >= katom) THEN
2598         i = 1
2599         j = offset_bc
2600         DO mc = 1, mc_max
2601            DO mb = 1, mb_max
2602!$OMP           ATOMIC
2603               ks(j) = ks(j) + kbc(i)
2604               i = i + 1
2605               j = j + 1
2606            END DO
2607         END DO
2608      ELSE
2609         i = 1
2610         DO mc = 1, mc_max
2611            j = offset_bc + mc - 1
2612            DO mb = 1, mb_max
2613!$OMP           ATOMIC
2614               ks(j) = ks(j) + kbc(i)
2615               i = i + 1
2616               j = j + mc_max
2617            END DO
2618         END DO
2619      END IF
2620      IF (iatom >= latom) THEN
2621         i = 1
2622         j = offset_ad
2623         DO md = 1, md_max
2624            DO ma = 1, ma_max
2625!$OMP           ATOMIC
2626               ks(j) = ks(j) + kad(i)
2627               i = i + 1
2628               j = j + 1
2629            END DO
2630         END DO
2631      ELSE
2632         i = 1
2633         DO md = 1, md_max
2634            j = offset_ad + md - 1
2635            DO ma = 1, ma_max
2636!$OMP           ATOMIC
2637               ks(j) = ks(j) + kad(i)
2638               i = i + 1
2639               j = j + md_max
2640            END DO
2641         END DO
2642      END IF
2643      IF (iatom >= katom) THEN
2644         i = 1
2645         j = offset_ac
2646         DO mc = 1, mc_max
2647            DO ma = 1, ma_max
2648!$OMP           ATOMIC
2649               ks(j) = ks(j) + kac(i)
2650               i = i + 1
2651               j = j + 1
2652            END DO
2653         END DO
2654      ELSE
2655         i = 1
2656         DO mc = 1, mc_max
2657            j = offset_ac + mc - 1
2658            DO ma = 1, ma_max
2659!$OMP           ATOMIC
2660               ks(j) = ks(j) + kac(i)
2661               i = i + 1
2662               j = j + mc_max
2663            END DO
2664         END DO
2665      END IF
2666   END SUBROUTINE update_fock_matrix
2667
2668! **************************************************************************************************
2669!> \brief ...
2670!> \param ma_max ...
2671!> \param mb_max ...
2672!> \param mc_max ...
2673!> \param md_max ...
2674!> \param fac ...
2675!> \param symm_fac ...
2676!> \param density ...
2677!> \param ks ...
2678!> \param prim ...
2679!> \param pbd ...
2680!> \param pbc ...
2681!> \param pad ...
2682!> \param pac ...
2683!> \param kbd ...
2684!> \param kbc ...
2685!> \param kad ...
2686!> \param kac ...
2687!> \param iatom ...
2688!> \param jatom ...
2689!> \param katom ...
2690!> \param latom ...
2691!> \param iset ...
2692!> \param jset ...
2693!> \param kset ...
2694!> \param lset ...
2695!> \param offset_bd_set ...
2696!> \param offset_bc_set ...
2697!> \param offset_ad_set ...
2698!> \param offset_ac_set ...
2699!> \param atomic_offset_bd ...
2700!> \param atomic_offset_bc ...
2701!> \param atomic_offset_ad ...
2702!> \param atomic_offset_ac ...
2703! **************************************************************************************************
2704   SUBROUTINE update_fock_matrix_as(ma_max, mb_max, mc_max, md_max, &
2705                                    fac, symm_fac, density, ks, prim, &
2706                                    pbd, pbc, pad, pac, kbd, kbc, kad, kac, &
2707                                    iatom, jatom, katom, latom, &
2708                                    iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, &
2709                                    offset_ac_set, atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, &
2710                                    atomic_offset_ac)
2711
2712      INTEGER, INTENT(IN)                                :: ma_max, mb_max, mc_max, md_max
2713      REAL(dp), INTENT(IN)                               :: fac, symm_fac
2714      REAL(dp), DIMENSION(:), INTENT(IN)                 :: density
2715      REAL(dp), DIMENSION(:), INTENT(INOUT)              :: ks
2716      REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2717         INTENT(IN)                                      :: prim
2718      REAL(dp), DIMENSION(*), INTENT(INOUT)              :: pbd, pbc, pad, pac, kbd, kbc, kad, kac
2719      INTEGER, INTENT(IN)                                :: iatom, jatom, katom, latom, iset, jset, &
2720                                                            kset, lset
2721      INTEGER, DIMENSION(:, :), POINTER                  :: offset_bd_set, offset_bc_set, &
2722                                                            offset_ad_set, offset_ac_set
2723      INTEGER, INTENT(IN)                                :: atomic_offset_bd, atomic_offset_bc, &
2724                                                            atomic_offset_ad, atomic_offset_ac
2725
2726      INTEGER                                            :: i, j, ma, mb, mc, md, offset_ac, &
2727                                                            offset_ad, offset_bc, offset_bd
2728
2729      IF (jatom >= latom) THEN
2730         i = 1
2731         offset_bd = offset_bd_set(jset, lset) + atomic_offset_bd - 1
2732         j = offset_bd
2733         DO md = 1, md_max
2734            DO mb = 1, mb_max
2735               pbd(i) = +density(j)
2736               i = i + 1
2737               j = j + 1
2738            END DO
2739         END DO
2740      ELSE
2741         i = 1
2742         offset_bd = offset_bd_set(lset, jset) + atomic_offset_bd - 1
2743         DO md = 1, md_max
2744            j = offset_bd + md - 1
2745            DO mb = 1, mb_max
2746               pbd(i) = -density(j)
2747               i = i + 1
2748               j = j + md_max
2749            END DO
2750         END DO
2751      END IF
2752      IF (jatom >= katom) THEN
2753         i = 1
2754         offset_bc = offset_bc_set(jset, kset) + atomic_offset_bc - 1
2755         j = offset_bc
2756         DO mc = 1, mc_max
2757            DO mb = 1, mb_max
2758               pbc(i) = -density(j)
2759               i = i + 1
2760               j = j + 1
2761            END DO
2762         END DO
2763      ELSE
2764         i = 1
2765         offset_bc = offset_bc_set(kset, jset) + atomic_offset_bc - 1
2766         DO mc = 1, mc_max
2767            j = offset_bc + mc - 1
2768            DO mb = 1, mb_max
2769               pbc(i) = density(j)
2770               i = i + 1
2771               j = j + mc_max
2772            END DO
2773         END DO
2774      END IF
2775      IF (iatom >= latom) THEN
2776         i = 1
2777         offset_ad = offset_ad_set(iset, lset) + atomic_offset_ad - 1
2778         j = offset_ad
2779         DO md = 1, md_max
2780            DO ma = 1, ma_max
2781               pad(i) = -density(j)
2782               i = i + 1
2783               j = j + 1
2784            END DO
2785         END DO
2786      ELSE
2787         i = 1
2788         offset_ad = offset_ad_set(lset, iset) + atomic_offset_ad - 1
2789         DO md = 1, md_max
2790            j = offset_ad + md - 1
2791            DO ma = 1, ma_max
2792               pad(i) = density(j)
2793               i = i + 1
2794               j = j + md_max
2795            END DO
2796         END DO
2797      END IF
2798      IF (iatom >= katom) THEN
2799         i = 1
2800         offset_ac = offset_ac_set(iset, kset) + atomic_offset_ac - 1
2801         j = offset_ac
2802         DO mc = 1, mc_max
2803            DO ma = 1, ma_max
2804               pac(i) = +density(j)
2805               i = i + 1
2806               j = j + 1
2807            END DO
2808         END DO
2809      ELSE
2810         i = 1
2811         offset_ac = offset_ac_set(kset, iset) + atomic_offset_ac - 1
2812         DO mc = 1, mc_max
2813            j = offset_ac + mc - 1
2814            DO ma = 1, ma_max
2815               pac(i) = -density(j)
2816               i = i + 1
2817               j = j + mc_max
2818            END DO
2819         END DO
2820      END IF
2821
2822      CALL contract_block(ma_max, mb_max, mc_max, md_max, kbd, kbc, kad, kac, pbd, pbc, pad, pac, prim, fac*symm_fac)
2823
2824      IF (jatom >= latom) THEN
2825         i = 1
2826         j = offset_bd
2827         DO md = 1, md_max
2828            DO mb = 1, mb_max
2829!$OMP           ATOMIC
2830               ks(j) = ks(j) + kbd(i)
2831               i = i + 1
2832               j = j + 1
2833            END DO
2834         END DO
2835      ELSE
2836         i = 1
2837         DO md = 1, md_max
2838            j = offset_bd + md - 1
2839            DO mb = 1, mb_max
2840!$OMP           ATOMIC
2841               ks(j) = ks(j) - kbd(i)
2842               i = i + 1
2843               j = j + md_max
2844            END DO
2845         END DO
2846      END IF
2847      IF (jatom >= katom) THEN
2848         i = 1
2849         j = offset_bc
2850         DO mc = 1, mc_max
2851            DO mb = 1, mb_max
2852!$OMP           ATOMIC
2853               ks(j) = ks(j) - kbc(i)
2854               i = i + 1
2855               j = j + 1
2856            END DO
2857         END DO
2858      ELSE
2859         i = 1
2860         DO mc = 1, mc_max
2861            j = offset_bc + mc - 1
2862            DO mb = 1, mb_max
2863!$OMP           ATOMIC
2864               ks(j) = ks(j) + kbc(i)
2865               i = i + 1
2866               j = j + mc_max
2867            END DO
2868         END DO
2869      END IF
2870      IF (iatom >= latom) THEN
2871         i = 1
2872         j = offset_ad
2873         DO md = 1, md_max
2874            DO ma = 1, ma_max
2875!$OMP           ATOMIC
2876               ks(j) = ks(j) - kad(i)
2877               i = i + 1
2878               j = j + 1
2879            END DO
2880         END DO
2881      ELSE
2882         i = 1
2883         DO md = 1, md_max
2884            j = offset_ad + md - 1
2885            DO ma = 1, ma_max
2886!$OMP           ATOMIC
2887               ks(j) = ks(j) + kad(i)
2888               i = i + 1
2889               j = j + md_max
2890            END DO
2891         END DO
2892      END IF
2893! XXXXXXXXXXXXXXXXXXXXXXXX
2894      IF (iatom >= katom) THEN
2895         i = 1
2896         j = offset_ac
2897         DO mc = 1, mc_max
2898            DO ma = 1, ma_max
2899!$OMP           ATOMIC
2900               ks(j) = ks(j) + kac(i)
2901               i = i + 1
2902               j = j + 1
2903            END DO
2904         END DO
2905      ELSE
2906         i = 1
2907         DO mc = 1, mc_max
2908            j = offset_ac + mc - 1
2909            DO ma = 1, ma_max
2910!$OMP           ATOMIC
2911               ks(j) = ks(j) - kac(i)
2912               i = i + 1
2913               j = j + mc_max
2914            END DO
2915         END DO
2916      END IF
2917
2918   END SUBROUTINE update_fock_matrix_as
2919
2920! **************************************************************************************************
2921!> \brief ...
2922!> \param i ...
2923!> \param j ...
2924!> \param k ...
2925!> \param l ...
2926!> \param set_offsets ...
2927!> \param atom_offsets ...
2928!> \param iset ...
2929!> \param jset ...
2930!> \param kset ...
2931!> \param lset ...
2932!> \param ma_max ...
2933!> \param mb_max ...
2934!> \param mc_max ...
2935!> \param md_max ...
2936!> \param prim ...
2937! **************************************************************************************************
2938   SUBROUTINE print_integrals(i, j, k, l, set_offsets, atom_offsets, iset, jset, kset, lset, ma_max, mb_max, mc_max, md_max, prim)
2939      INTEGER                                            :: i, j, k, l
2940      INTEGER, DIMENSION(:, :, :, :), POINTER            :: set_offsets
2941      INTEGER, DIMENSION(:, :), POINTER                  :: atom_offsets
2942      INTEGER                                            :: iset, jset, kset, lset, ma_max, mb_max, &
2943                                                            mc_max, md_max
2944      REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2945         INTENT(IN)                                      :: prim
2946
2947      INTEGER                                            :: iint, ma, mb, mc, md
2948
2949      iint = 0
2950      DO md = 1, md_max
2951         DO mc = 1, mc_max
2952            DO mb = 1, mb_max
2953               DO ma = 1, ma_max
2954                  iint = iint + 1
2955                  IF (ABS(prim(iint)) .GT. 0.0000000000001) &
2956                     WRITE (99, *) atom_offsets(i, 1) + ma + set_offsets(iset, 1, i, 1) - 1, &
2957                     atom_offsets(j, 1) + ma + set_offsets(jset, 1, j, 1) - 1, &
2958                     atom_offsets(k, 1) + ma + set_offsets(kset, 1, k, 1) - 1, &
2959                     atom_offsets(l, 1) + ma + set_offsets(lset, 1, l, 1) - 1, &
2960                     prim(iint)
2961               END DO
2962            END DO
2963         END DO
2964      END DO
2965
2966   END SUBROUTINE print_integrals
2967
2968#include "hfx_get_pmax_val.f90"
2969END MODULE hfx_energy_potential
2970