1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2019  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief Routines for data exchange between MPI processes
8!> \par History
9!>      04.2008 created [Manuel Guidon]
10!> \author Manuel Guidon
11! **************************************************************************************************
12MODULE hfx_communication
13   USE atomic_kind_types,               ONLY: atomic_kind_type,&
14                                              get_atomic_kind_set
15   USE cp_control_types,                ONLY: dft_control_type
16   USE cp_para_types,                   ONLY: cp_para_env_type
17   USE dbcsr_api,                       ONLY: dbcsr_get_block_p,&
18                                              dbcsr_iterator_blocks_left,&
19                                              dbcsr_iterator_next_block,&
20                                              dbcsr_iterator_start,&
21                                              dbcsr_iterator_stop,&
22                                              dbcsr_iterator_type,&
23                                              dbcsr_p_type,&
24                                              dbcsr_type
25   USE hfx_types,                       ONLY: hfx_2D_map,&
26                                              hfx_basis_type,&
27                                              hfx_type
28   USE kinds,                           ONLY: dp,&
29                                              int_8
30   USE message_passing,                 ONLY: mp_allgather,&
31                                              mp_isendrecv,&
32                                              mp_max,&
33                                              mp_sync,&
34                                              mp_waitall
35   USE particle_types,                  ONLY: particle_type
36   USE qs_environment_types,            ONLY: get_qs_env,&
37                                              qs_environment_type
38#include "./base/base_uses.f90"
39
40   IMPLICIT NONE
41   PRIVATE
42
43   PUBLIC :: get_full_density, &
44             distribute_ks_matrix, &
45             scale_and_add_fock_to_ks_matrix, &
46             get_atomic_block_maps
47   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_communication'
48
49!***
50
51CONTAINS
52
53! **************************************************************************************************
54!> \brief - Collects full density matrix from all CPUs
55!> \param para_env ...
56!> \param full_density The full Density matrix
57!> \param rho Distributed density
58!> \param number_of_p_entries Maximal buffer size
59!> \param block_offset ...
60!> \param kind_of ...
61!> \param basis_parameter ...
62!> \param get_max_vals_spin ...
63!> \param rho_beta ...
64!> \param antisymmetric ...
65!> \par History
66!>      11.2007 created [Manuel Guidon]
67!> \author Manuel Guidon
68!> \note
69!>      - Communication with left/right node only
70!>        added a mp_sync before and after the ring of isendrecv. This *speed up* the
71!>        communication, and might protect against idle neighbors flooding a busy node
72!>        with messages [Joost]
73! **************************************************************************************************
74   SUBROUTINE get_full_density(para_env, full_density, rho, number_of_p_entries, &
75                               block_offset, kind_of, basis_parameter, &
76                               get_max_vals_spin, rho_beta, antisymmetric)
77
78      TYPE(cp_para_env_type), POINTER                    :: para_env
79      REAL(dp), DIMENSION(:)                             :: full_density
80      TYPE(dbcsr_type), POINTER                          :: rho
81      INTEGER, INTENT(IN)                                :: number_of_p_entries
82      INTEGER, DIMENSION(:), POINTER                     :: block_offset
83      INTEGER                                            :: kind_of(*)
84      TYPE(hfx_basis_type), DIMENSION(:), POINTER        :: basis_parameter
85      LOGICAL, INTENT(IN)                                :: get_max_vals_spin
86      TYPE(dbcsr_type), OPTIONAL, POINTER                :: rho_beta
87      LOGICAL, INTENT(IN)                                :: antisymmetric
88
89      CHARACTER(LEN=*), PARAMETER :: routineN = 'get_full_density', &
90         routineP = moduleN//':'//routineN
91
92      INTEGER :: blk, block_size, data_from, dest, i, iatom, icpu, ikind, iset, jatom, jkind, &
93         jset, mepos, ncpu, nseta, nsetb, pa, pa1, pb, pb1, req(2), source, source_cpu
94      INTEGER, DIMENSION(:), POINTER                     :: nsgfa, nsgfb
95      LOGICAL                                            :: found
96      REAL(dp)                                           :: symmfac
97      REAL(dp), DIMENSION(:), POINTER                    :: recbuffer, sendbuffer, swapbuffer
98      REAL(dp), DIMENSION(:, :), POINTER                 :: sparse_block, sparse_block_beta
99      TYPE(dbcsr_iterator_type)                          :: iter
100
101!debREAL(dp), DIMENSION(:), POINTER          :: full_density
102
103      full_density = 0.0_dp
104      ALLOCATE (sendbuffer(number_of_p_entries))
105      ALLOCATE (recbuffer(number_of_p_entries))
106
107      i = 1
108      CALL dbcsr_iterator_start(iter, rho, shared=.FALSE.)
109      DO WHILE (dbcsr_iterator_blocks_left(iter))
110         CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block, blk)
111         ! the resulting vector will eb only the upper triangle.
112         ! in case of antisymmetry take care to change signs if a lower block gets copied
113         symmfac = 1.0_dp
114         IF (antisymmetric .AND. iatom > jatom) symmfac = -1.0_dp
115         ikind = kind_of(iatom)
116         nseta = basis_parameter(ikind)%nset
117         nsgfa => basis_parameter(ikind)%nsgf
118         jkind = kind_of(jatom)
119         nsetb = basis_parameter(jkind)%nset
120         nsgfb => basis_parameter(jkind)%nsgf
121         IF (get_max_vals_spin) THEN
122            CALL dbcsr_get_block_p(rho_beta, &
123                                   row=iatom, col=jatom, BLOCK=sparse_block_beta, found=found)
124            pa = 0
125            DO iset = 1, nseta
126               pb = 0
127               DO jset = 1, nsetb
128                  DO pa1 = pa + 1, pa + nsgfa(iset)
129                     DO pb1 = pb + 1, pb + nsgfb(jset)
130                        sendbuffer(i) = MAX(ABS(sparse_block(pa1, pb1)), ABS(sparse_block_beta(pa1, pb1)))
131                        i = i + 1
132                     END DO
133                  END DO
134                  pb = pb + nsgfb(jset)
135               END DO
136               pa = pa + nsgfa(iset)
137            END DO
138         ELSE
139            pa = 0
140            DO iset = 1, nseta
141               pb = 0
142               DO jset = 1, nsetb
143                  DO pa1 = pa + 1, pa + nsgfa(iset)
144                     DO pb1 = pb + 1, pb + nsgfb(jset)
145                        sendbuffer(i) = sparse_block(pa1, pb1)*symmfac
146                        i = i + 1
147                     END DO
148                  END DO
149                  pb = pb + nsgfb(jset)
150               END DO
151               pa = pa + nsgfa(iset)
152            END DO
153         END IF
154      END DO
155      CALL dbcsr_iterator_stop(iter)
156
157      ! sync before/after a ring of isendrecv
158      CALL mp_sync(para_env%group)
159      ncpu = para_env%num_pe
160      mepos = para_env%mepos
161      dest = MODULO(mepos + 1, ncpu)
162      source = MODULO(mepos - 1, ncpu)
163      DO icpu = 0, ncpu - 1
164         IF (icpu .NE. ncpu - 1) THEN
165            CALL mp_isendrecv(sendbuffer, dest, recbuffer, source, &
166                              para_env%group, req(1), req(2), 13)
167         ENDIF
168         data_from = MODULO(mepos - icpu, ncpu)
169         source_cpu = MODULO(data_from, ncpu) + 1
170         block_size = block_offset(source_cpu + 1) - block_offset(source_cpu)
171         full_density(block_offset(source_cpu):block_offset(source_cpu) + block_size - 1) = sendbuffer(1:block_size)
172
173         IF (icpu .NE. ncpu - 1) THEN
174            CALL mp_waitall(req)
175         ENDIF
176         swapbuffer => sendbuffer
177         sendbuffer => recbuffer
178         recbuffer => swapbuffer
179      ENDDO
180      DEALLOCATE (sendbuffer, recbuffer)
181      ! sync before/after a ring of isendrecv
182      CALL mp_sync(para_env%group)
183
184   END SUBROUTINE get_full_density
185
186! **************************************************************************************************
187!> \brief - Distributes the local full Kohn-Sham matrix to all CPUS
188!> \param para_env ...
189!> \param full_ks The full Kohn-Sham matrix
190!> \param ks_matrix Distributed Kohn-Sham matrix
191!> \param number_of_p_entries Maximal buffer size
192!> \param block_offset ...
193!> \param kind_of ...
194!> \param basis_parameter ...
195!> \param off_diag_fac ...
196!> \param diag_fac ...
197!> \par History
198!>      11.2007 created [Manuel Guidon]
199!> \author Manuel Guidon
200!> \note
201!>      - Communication with left/right node only
202! **************************************************************************************************
203   SUBROUTINE distribute_ks_matrix(para_env, full_ks, ks_matrix, number_of_p_entries, &
204                                   block_offset, kind_of, basis_parameter, &
205                                   off_diag_fac, diag_fac)
206
207      TYPE(cp_para_env_type), POINTER                    :: para_env
208      REAL(dp), DIMENSION(:)                             :: full_ks
209      TYPE(dbcsr_type), POINTER                          :: ks_matrix
210      INTEGER, INTENT(IN)                                :: number_of_p_entries
211      INTEGER, DIMENSION(:), POINTER                     :: block_offset
212      INTEGER                                            :: kind_of(*)
213      TYPE(hfx_basis_type), DIMENSION(:), POINTER        :: basis_parameter
214      REAL(dp), INTENT(IN), OPTIONAL                     :: off_diag_fac, diag_fac
215
216      CHARACTER(LEN=*), PARAMETER :: routineN = 'distribute_ks_matrix', &
217         routineP = moduleN//':'//routineN
218
219      INTEGER :: blk, block_size, data_to, dest, dest_cpu, i, iatom, icpu, ikind, iset, jatom, &
220         jkind, jset, mepos, ncpu, nseta, nsetb, pa, pa1, pb, pb1, req(2), source
221      INTEGER, DIMENSION(:), POINTER                     :: nsgfa, nsgfb
222      REAL(dp)                                           :: my_fac, myd_fac
223      REAL(dp), DIMENSION(:), POINTER                    :: recbuffer, sendbuffer, swapbuffer
224      REAL(dp), DIMENSION(:, :), POINTER                 :: sparse_block
225      TYPE(dbcsr_iterator_type)                          :: iter
226
227      my_fac = 1.0_dp; myd_fac = 1.0_dp
228      IF (PRESENT(off_diag_fac)) my_fac = off_diag_fac
229      IF (PRESENT(diag_fac)) myd_fac = diag_fac
230
231      ALLOCATE (sendbuffer(number_of_p_entries))
232      sendbuffer = 0.0_dp
233      ALLOCATE (recbuffer(number_of_p_entries))
234      recbuffer = 0.0_dp
235
236      ncpu = para_env%num_pe
237      mepos = para_env%mepos
238      dest = MODULO(mepos + 1, ncpu)
239      source = MODULO(mepos - 1, ncpu)
240
241      ! sync before/after a ring of isendrecv
242      CALL mp_sync(para_env%group)
243      DO icpu = 1, ncpu
244         i = 1
245         data_to = mepos - icpu
246         dest_cpu = MODULO(data_to, ncpu) + 1
247         block_size = block_offset(dest_cpu + 1) - block_offset(dest_cpu)
248       sendbuffer(1:block_size) = sendbuffer(1:block_size) + full_ks(block_offset(dest_cpu):block_offset(dest_cpu) + block_size - 1)
249         IF (icpu .EQ. ncpu) EXIT
250         CALL mp_isendrecv(sendbuffer, dest, recbuffer, source, &
251                           para_env%group, req(1), req(2), 13)
252
253         CALL mp_waitall(req)
254         swapbuffer => sendbuffer
255         sendbuffer => recbuffer
256         recbuffer => swapbuffer
257      ENDDO
258      ! sync before/after a ring of isendrecv
259      CALL mp_sync(para_env%group)
260
261      i = 1
262      CALL dbcsr_iterator_start(iter, ks_matrix, shared=.FALSE.)
263      DO WHILE (dbcsr_iterator_blocks_left(iter))
264         CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block, blk)
265
266         ikind = kind_of(iatom)
267         nseta = basis_parameter(ikind)%nset
268         nsgfa => basis_parameter(ikind)%nsgf
269         jkind = kind_of(jatom)
270         nsetb = basis_parameter(jkind)%nset
271         nsgfb => basis_parameter(jkind)%nsgf
272         pa = 0
273         DO iset = 1, nseta
274            pb = 0
275            DO jset = 1, nsetb
276               DO pa1 = pa + 1, pa + nsgfa(iset)
277                  DO pb1 = pb + 1, pb + nsgfb(jset)
278                     IF (iatom == jatom .AND. pa1 == pb1) THEN
279                        sparse_block(pa1, pb1) = sendbuffer(i)*myd_fac + sparse_block(pa1, pb1)
280                     ELSE
281                        sparse_block(pa1, pb1) = sendbuffer(i)*my_fac + sparse_block(pa1, pb1)
282                     END IF
283                     i = i + 1
284                  END DO
285               END DO
286               pb = pb + nsgfb(jset)
287            END DO
288            pa = pa + nsgfa(iset)
289         END DO
290      END DO
291      CALL dbcsr_iterator_stop(iter)
292
293      DEALLOCATE (sendbuffer, recbuffer)
294
295   END SUBROUTINE distribute_ks_matrix
296
297! **************************************************************************************************
298!> \brief - Distributes the local full Kohn-Sham matrix to all CPUS. Is called in
299!>        case of adiabatic rescaling. This is just a refactored version of
300!>        distribute_ks_matrix
301!> \param para_env ...
302!> \param qs_env ...
303!> \param ks_matrix Distributed Kohn-Sham matrix
304!> \param irep ...
305!> \param scaling_factor ...
306!> \par History
307!>      11.2007 created [Manuel Guidon]
308!> \author Manuel Guidon
309!> \note
310!>      - Communication with left/right node only
311! **************************************************************************************************
312   SUBROUTINE scale_and_add_fock_to_ks_matrix(para_env, qs_env, ks_matrix, irep, &
313                                              scaling_factor)
314
315      TYPE(cp_para_env_type), POINTER                    :: para_env
316      TYPE(qs_environment_type), POINTER                 :: qs_env
317      TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: ks_matrix
318      INTEGER, INTENT(IN)                                :: irep
319      REAL(dp), INTENT(IN)                               :: scaling_factor
320
321      CHARACTER(LEN=*), PARAMETER :: routineN = 'scale_and_add_fock_to_ks_matrix', &
322         routineP = moduleN//':'//routineN
323
324      INTEGER                                            :: iatom, ikind, img, natom, nimages, nspins
325      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: kind_of, last_sgf_global
326      REAL(dp), DIMENSION(:, :), POINTER                 :: full_ks
327      TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
328      TYPE(dft_control_type), POINTER                    :: dft_control
329      TYPE(hfx_basis_type), DIMENSION(:), POINTER        :: basis_parameter
330      TYPE(hfx_type), POINTER                            :: actual_x_data
331      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
332
333!! All shared data is saved in i_thread = 1!
334
335      NULLIFY (dft_control)
336      actual_x_data => qs_env%x_data(irep, 1)
337      basis_parameter => actual_x_data%basis_parameter
338
339      CALL get_qs_env(qs_env=qs_env, &
340                      atomic_kind_set=atomic_kind_set, &
341                      particle_set=particle_set, &
342                      dft_control=dft_control)
343
344      nspins = dft_control%nspins
345      nimages = dft_control%nimages
346      CPASSERT(nimages == 1)
347
348      natom = SIZE(particle_set, 1)
349      ALLOCATE (kind_of(natom))
350
351      CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, &
352                               kind_of=kind_of)
353
354      ALLOCATE (last_sgf_global(0:natom))
355      last_sgf_global(0) = 0
356      DO iatom = 1, natom
357         ikind = kind_of(iatom)
358         last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
359      END DO
360      full_ks => actual_x_data%full_ks_alpha
361      IF (scaling_factor /= 1.0_dp) THEN
362         full_ks = full_ks*scaling_factor
363      END IF
364      DO img = 1, nimages
365         CALL distribute_ks_matrix(para_env, full_ks(:, img), ks_matrix(1, img)%matrix, actual_x_data%number_of_p_entries, &
366                                   actual_x_data%block_offset, kind_of, basis_parameter, &
367                                   off_diag_fac=0.5_dp)
368      END DO
369      DEALLOCATE (actual_x_data%full_ks_alpha)
370
371      IF (nspins == 2) THEN
372         full_ks => actual_x_data%full_ks_beta
373         IF (scaling_factor /= 1.0_dp) THEN
374            full_ks = full_ks*scaling_factor
375         END IF
376         DO img = 1, nimages
377            CALL distribute_ks_matrix(para_env, full_ks(:, img), ks_matrix(2, img)%matrix, actual_x_data%number_of_p_entries, &
378                                      actual_x_data%block_offset, kind_of, basis_parameter, &
379                                      off_diag_fac=0.5_dp)
380         END DO
381         DEALLOCATE (actual_x_data%full_ks_beta)
382      END IF
383
384      DEALLOCATE (kind_of, last_sgf_global)
385
386   END SUBROUTINE scale_and_add_fock_to_ks_matrix
387
388! **************************************************************************************************
389!> \brief Given a 2d index pair, this function returns a 1d index pair for
390!>        a symmetric upper triangle NxN matrix
391!>        The compiler should inline this function, therefore it appears in
392!>        several modules
393!> \param i 2d index
394!> \param j 2d index
395!> \param N matrix size
396!> \return ...
397!> \par History
398!>      03.2009 created [Manuel Guidon]
399!> \author Manuel Guidon
400! **************************************************************************************************
401   PURE FUNCTION get_1D_idx(i, j, N)
402      INTEGER, INTENT(IN)                                :: i, j
403      INTEGER(int_8), INTENT(IN)                         :: N
404      INTEGER(int_8)                                     :: get_1D_idx
405
406      INTEGER(int_8)                                     :: min_ij
407
408      min_ij = MIN(i, j)
409      get_1D_idx = min_ij*N + MAX(i, j) - (min_ij - 1)*min_ij/2 - N
410
411   END FUNCTION get_1D_idx
412
413! **************************************************************************************************
414!> \brief create a several maps array that reflects the ks matrix sparsity
415!> \param matrix ...
416!> \param basis_parameter ...
417!> \param kind_of ...
418!> \param is_assoc_atomic_block ...
419!> \param number_of_p_entries ...
420!> \param para_env ...
421!> \param atomic_block_offset ...
422!> \param set_offset ...
423!> \param block_offset ...
424!> \param map_atoms_to_cpus ...
425!> \param nkind ...
426!> \par History
427!>      11.2007 refactored [Joost VandeVondele]
428!>      07.2009 add new maps
429!> \author Manuel Guidon
430!> \notes
431!>      is_assoc_atomic_block returns the mpi rank + 1 for associated blocks,
432!>      zero for unassiated blocks
433! **************************************************************************************************
434   SUBROUTINE get_atomic_block_maps(matrix, basis_parameter, kind_of, &
435                                    is_assoc_atomic_block, number_of_p_entries, &
436                                    para_env, atomic_block_offset, set_offset, &
437                                    block_offset, map_atoms_to_cpus, nkind)
438
439      TYPE(dbcsr_type), POINTER                          :: matrix
440      TYPE(hfx_basis_type), DIMENSION(:)                 :: basis_parameter
441      INTEGER, DIMENSION(:)                              :: kind_of
442      INTEGER, DIMENSION(:, :), INTENT(OUT)              :: is_assoc_atomic_block
443      INTEGER, INTENT(OUT)                               :: number_of_p_entries
444      TYPE(cp_para_env_type), POINTER                    :: para_env
445      INTEGER, DIMENSION(:, :), POINTER                  :: atomic_block_offset
446      INTEGER, DIMENSION(:, :, :, :), POINTER            :: set_offset
447      INTEGER, DIMENSION(:), POINTER                     :: block_offset
448      TYPE(hfx_2D_map), DIMENSION(:), POINTER            :: map_atoms_to_cpus
449      INTEGER                                            :: nkind
450
451      CHARACTER(LEN=*), PARAMETER :: routineN = 'get_atomic_block_maps', &
452         routineP = moduleN//':'//routineN
453
454      INTEGER :: blk, handle, iatom, ibuf, icpu, ikind, ilist, iset, itask, jatom, jkind, jset, &
455         natom, ncpu, nseta, nsetb, number_of_p_blocks, offset, tmp(2)
456      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: buffer_in, buffer_out, counter, rcount, &
457                                                            rdispl
458      INTEGER, DIMENSION(:), POINTER                     :: iatom_list, jatom_list, nsgfa, nsgfb
459      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: sparse_block
460      TYPE(dbcsr_iterator_type)                          :: iter
461
462      CALL timeset(routineN, handle)
463
464      is_assoc_atomic_block = 0
465      number_of_p_entries = 0
466      number_of_p_blocks = 0
467
468      !
469      ! count number_of_p_blocks and number_of_p_entries
470      !
471      CALL dbcsr_iterator_start(iter, matrix, shared=.FALSE.)
472      DO WHILE (dbcsr_iterator_blocks_left(iter))
473         CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block, blk)
474         ikind = kind_of(iatom)
475         jkind = kind_of(jatom)
476         number_of_p_blocks = number_of_p_blocks + 1
477         number_of_p_entries = number_of_p_entries + &
478                               basis_parameter(ikind)%nsgf_total*basis_parameter(jkind)%nsgf_total
479      END DO
480      CALL dbcsr_iterator_stop(iter)
481
482      tmp = (/number_of_p_entries, number_of_p_blocks/)
483      CALL mp_max(tmp, para_env%group)
484      number_of_p_entries = tmp(1)
485      number_of_p_blocks = tmp(2)
486      !
487      ! send this info around, so we can construct is_assoc_atomic_block
488      ! pack all data in buffers and use allgatherv
489      !
490      ALLOCATE (buffer_in(3*number_of_p_blocks))
491      ALLOCATE (buffer_out(3*number_of_p_blocks*para_env%num_pe))
492      ALLOCATE (rcount(para_env%num_pe), rdispl(para_env%num_pe))
493
494      buffer_in = 0
495      ibuf = 0
496
497      CALL dbcsr_iterator_start(iter, matrix, shared=.FALSE.)
498      DO WHILE (dbcsr_iterator_blocks_left(iter))
499         CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block, blk)
500
501         buffer_in(ibuf + 1) = iatom
502         buffer_in(ibuf + 2) = jatom
503         buffer_in(ibuf + 3) = para_env%mepos + 1
504         ibuf = ibuf + 3
505      END DO
506      CALL dbcsr_iterator_stop(iter)
507
508      rcount = SIZE(buffer_in)
509      rdispl(1) = 0
510      DO icpu = 2, para_env%num_pe
511         rdispl(icpu) = rdispl(icpu - 1) + rcount(icpu - 1)
512      ENDDO
513      CALL mp_allgather(buffer_in, buffer_out, rcount, rdispl, para_env%group)
514
515      DO ibuf = 0, number_of_p_blocks*para_env%num_pe*3 - 3, 3
516         itask = buffer_out(ibuf + 3)
517         ! buffer_out can be 0 if buffer_in contained less elements than the max number of atom pairs
518         ! is_assoc_atomic_block is a map for atom pairs to a processor (assumes symmetry, i,j on the ame as j,i)
519         IF (itask .NE. 0) THEN
520            iatom = buffer_out(ibuf + 1)
521            jatom = buffer_out(ibuf + 2)
522            is_assoc_atomic_block(iatom, jatom) = itask
523            is_assoc_atomic_block(jatom, iatom) = itask
524         ENDIF
525      ENDDO
526
527      IF (ASSOCIATED(map_atoms_to_cpus)) THEN
528         DO icpu = 1, para_env%num_pe
529            DEALLOCATE (map_atoms_to_cpus(icpu)%iatom_list)
530            DEALLOCATE (map_atoms_to_cpus(icpu)%jatom_list)
531         END DO
532         DEALLOCATE (map_atoms_to_cpus)
533      END IF
534
535      natom = SIZE(is_assoc_atomic_block, 1)
536      ALLOCATE (map_atoms_to_cpus(para_env%num_pe))
537      ALLOCATE (counter(para_env%num_pe))
538      counter = 0
539
540      DO iatom = 1, natom
541         DO jatom = iatom, natom
542            icpu = is_assoc_atomic_block(jatom, iatom)
543            IF (icpu > 0) counter(icpu) = counter(icpu) + 1
544         END DO
545      END DO
546      DO icpu = 1, para_env%num_pe
547         ALLOCATE (map_atoms_to_cpus(icpu)%iatom_list(counter(icpu)))
548         ALLOCATE (map_atoms_to_cpus(icpu)%jatom_list(counter(icpu)))
549      END DO
550      counter = 0
551      DO iatom = 1, natom
552         DO jatom = iatom, natom
553            icpu = is_assoc_atomic_block(jatom, iatom)
554            IF (icpu > 0) THEN
555               counter(icpu) = counter(icpu) + 1
556               map_atoms_to_cpus(icpu)%jatom_list(counter(icpu)) = jatom
557               map_atoms_to_cpus(icpu)%iatom_list(counter(icpu)) = iatom
558            END IF
559         END DO
560      END DO
561
562      DEALLOCATE (counter)
563
564      ncpu = para_env%num_pe
565      offset = 1
566      atomic_block_offset = 0
567      block_offset = 0
568      DO icpu = 1, ncpu
569         iatom_list => map_atoms_to_cpus(icpu)%iatom_list
570         jatom_list => map_atoms_to_cpus(icpu)%jatom_list
571         block_offset(icpu) = offset
572         DO ilist = 1, SIZE(iatom_list)
573            iatom = iatom_list(ilist)
574            ikind = kind_of(iatom)
575            jatom = jatom_list(ilist)
576            jkind = kind_of(jatom)
577            atomic_block_offset(iatom, jatom) = offset
578            atomic_block_offset(jatom, iatom) = offset
579            offset = offset + basis_parameter(ikind)%nsgf_total*basis_parameter(jkind)%nsgf_total
580         END DO
581      END DO
582      block_offset(ncpu + 1) = offset
583      set_offset = 0
584      DO ikind = 1, nkind
585         nseta = basis_parameter(ikind)%nset
586         nsgfa => basis_parameter(ikind)%nsgf
587         DO jkind = 1, nkind
588            nsetb = basis_parameter(jkind)%nset
589            nsgfb => basis_parameter(jkind)%nsgf
590            offset = 1
591            DO iset = 1, nseta
592               DO jset = 1, nsetb
593                  set_offset(jset, iset, jkind, ikind) = offset
594                  offset = offset + nsgfa(iset)*nsgfb(jset)
595               END DO
596            END DO
597         END DO
598      END DO
599
600      CALL timestop(handle)
601
602   END SUBROUTINE get_atomic_block_maps
603
604END MODULE hfx_communication
605