1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2019  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6MODULE qs_fb_atomic_matrix_methods
7
8   USE cp_para_types,                   ONLY: cp_para_env_type
9   USE dbcsr_api,                       ONLY: dbcsr_get_block_p,&
10                                              dbcsr_get_info,&
11                                              dbcsr_get_stored_coordinates,&
12                                              dbcsr_type
13   USE kinds,                           ONLY: dp,&
14                                              int_8
15   USE message_passing,                 ONLY: mp_alltoall
16   USE qs_fb_atomic_halo_types,         ONLY: fb_atomic_halo_atom_global2halo,&
17                                              fb_atomic_halo_get,&
18                                              fb_atomic_halo_has_data,&
19                                              fb_atomic_halo_list_get,&
20                                              fb_atomic_halo_list_obj,&
21                                              fb_atomic_halo_obj
22   USE qs_fb_com_tasks_types,           ONLY: &
23        TASK_COST, TASK_DEST, TASK_N_RECORDS, TASK_PAIR, TASK_SRC, &
24        fb_com_atom_pairs_calc_buffer_sizes, fb_com_atom_pairs_create, fb_com_atom_pairs_decode, &
25        fb_com_atom_pairs_get, fb_com_atom_pairs_has_data, fb_com_atom_pairs_init, &
26        fb_com_atom_pairs_nullify, fb_com_atom_pairs_obj, fb_com_atom_pairs_release, &
27        fb_com_tasks_build_atom_pairs, fb_com_tasks_create, fb_com_tasks_decode_pair, &
28        fb_com_tasks_encode_pair, fb_com_tasks_get, fb_com_tasks_nullify, fb_com_tasks_obj, &
29        fb_com_tasks_release, fb_com_tasks_set, fb_com_tasks_transpose_dest_src
30   USE qs_fb_matrix_data_types,         ONLY: fb_matrix_data_get,&
31                                              fb_matrix_data_has_data,&
32                                              fb_matrix_data_obj
33#include "./base/base_uses.f90"
34
35   IMPLICIT NONE
36
37   PRIVATE
38
39   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_fb_atomic_matrix_methods'
40
41   PUBLIC :: fb_atmatrix_calc_size, &
42             fb_atmatrix_construct, &
43             fb_atmatrix_construct_2, &
44             fb_atmatrix_generate_com_pairs_2
45
46CONTAINS
47
48! **********************************************************************
49!> \brief Calculates the atomic matrix size from a given DBCSR matrix
50!>        and atomic halo. It also calculates the first row (col) or the
51!>        row (col) atomic blocks in the atomic matrix
52!> \param dbcsr_mat : pointer to the DBCSR matrix the atomic matrix is
53!>                    to be constructed from
54!> \param atomic_halo : the atomic halo used for defining the atomic
55!>                      matrix from the DBCSR matrix
56!> \param nrows : outputs total number of rows in the atomic matrix
57!> \param ncols : outputs total number of cols in the atomic matrix
58!> \param blk_row_start : first row in each atomic blk row in the
59!>                        atomic matrix
60!> \param blk_col_start : first col in each atomic blk col in the
61!>                        atomic matrix
62!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
63! **************************************************************************************************
64   SUBROUTINE fb_atmatrix_calc_size(dbcsr_mat, &
65                                    atomic_halo, &
66                                    nrows, &
67                                    ncols, &
68                                    blk_row_start, &
69                                    blk_col_start)
70      TYPE(dbcsr_type), POINTER                          :: dbcsr_mat
71      TYPE(fb_atomic_halo_obj), INTENT(IN)               :: atomic_halo
72      INTEGER, INTENT(OUT)                               :: nrows, ncols
73      INTEGER, DIMENSION(:), INTENT(OUT)                 :: blk_row_start, blk_col_start
74
75      CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_atmatrix_calc_size', &
76         routineP = moduleN//':'//routineN
77
78      INTEGER                                            :: ii, natoms_in_halo
79      INTEGER, DIMENSION(:), POINTER                     :: col_block_size_data, halo_atoms, &
80                                                            row_block_size_data
81      LOGICAL                                            :: check_ok
82
83      NULLIFY (halo_atoms, row_block_size_data, col_block_size_data)
84
85      CALL dbcsr_get_info(dbcsr_mat, row_blk_size=row_block_size_data, col_blk_size=col_block_size_data)
86      CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
87                              natoms=natoms_in_halo, &
88                              halo_atoms=halo_atoms)
89      check_ok = SIZE(blk_row_start) .GE. (natoms_in_halo + 1)
90      CPASSERT(check_ok)
91      check_ok = SIZE(blk_col_start) .GE. (natoms_in_halo + 1)
92      CPASSERT(check_ok)
93      blk_row_start = 0
94      blk_col_start = 0
95      nrows = 0
96      ncols = 0
97      DO ii = 1, natoms_in_halo
98         blk_row_start(ii) = nrows + 1
99         blk_col_start(ii) = ncols + 1
100         nrows = nrows + row_block_size_data(halo_atoms(ii))
101         ncols = ncols + col_block_size_data(halo_atoms(ii))
102      END DO
103      blk_row_start(natoms_in_halo + 1) = nrows + 1
104      blk_col_start(natoms_in_halo + 1) = ncols + 1
105   END SUBROUTINE fb_atmatrix_calc_size
106
107! ****************************************************************************
108!> \brief Constructs atomic matrix for filter basis method from a given
109!>        DBCSR matrix and a set of atomic send and recv pairs
110!>        corresponding to the matrix blocks that needs to be included
111!>        in the atomic matrix. This version is for when we do MPI
112!>        communications at every step, for each atomic matrix.
113!> \param dbcsr_mat : the DBCSR matrix the atomic matrix is to be
114!>                    constructed from
115!> \param atomic_halo : the atomic halo conrresponding to this atomic
116!>                      matrix
117!> \param para_env : cp2k parallel environment
118!> \param atomic_matrix : the atomic matrix to be constructed, it should
119!>                        have already been allocated prior entering
120!>                        this subroutine
121!> \param blk_row_start : first row in each atomic blk row in the
122!>                        atomic matrix
123!> \param blk_col_start : first col in each atomic blk col in the
124!>                        atomic matrix
125!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
126! **************************************************************************************************
127   SUBROUTINE fb_atmatrix_construct(dbcsr_mat, &
128                                    atomic_halo, &
129                                    para_env, &
130                                    atomic_matrix, &
131                                    blk_row_start, &
132                                    blk_col_start)
133      TYPE(dbcsr_type), POINTER                          :: dbcsr_mat
134      TYPE(fb_atomic_halo_obj), INTENT(IN)               :: atomic_halo
135      TYPE(cp_para_env_type), POINTER                    :: para_env
136      REAL(KIND=dp), DIMENSION(:, :), INTENT(OUT)        :: atomic_matrix
137      INTEGER, DIMENSION(:), INTENT(IN)                  :: blk_row_start, blk_col_start
138
139      CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_atmatrix_construct', &
140         routineP = moduleN//':'//routineN
141
142      INTEGER :: handle, iatom, iatom_in_halo, ii, ind, ipair, ipe, jatom, jatom_in_halo, jj, &
143         ncols_blk, npairs_recv, npairs_send, nrows_blk, numprocs, pe, recv_encode, send_encode
144      INTEGER(KIND=int_8), DIMENSION(:), POINTER         :: pairs_recv, pairs_send
145      INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_disps, recv_pair_count, recv_pair_disps, &
146         recv_sizes, send_disps, send_pair_count, send_pair_disps, send_sizes
147      INTEGER, DIMENSION(:), POINTER                     :: col_block_size_data, row_block_size_data
148      LOGICAL                                            :: found
149      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: recv_buf, send_buf
150      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: mat_block
151      TYPE(fb_com_atom_pairs_obj)                        :: atom_pairs_recv, atom_pairs_send
152
153      CALL timeset(routineN, handle)
154
155      NULLIFY (pairs_send, pairs_recv, mat_block, &
156               row_block_size_data, col_block_size_data)
157      CALL fb_com_atom_pairs_nullify(atom_pairs_send)
158      CALL fb_com_atom_pairs_nullify(atom_pairs_recv)
159
160      ! initialise atomic matrix
161      IF (SIZE(atomic_matrix, 1) > 0 .AND. SIZE(atomic_matrix, 2) > 0) THEN
162         atomic_matrix = 0.0_dp
163      END IF
164
165      ! generate send and receiv atomic pairs
166      CALL fb_com_atom_pairs_create(atom_pairs_send)
167      CALL fb_com_atom_pairs_create(atom_pairs_recv)
168      CALL fb_atmatrix_generate_com_pairs(dbcsr_mat, &
169                                          atomic_halo, &
170                                          para_env, &
171                                          atom_pairs_send, &
172                                          atom_pairs_recv)
173
174      ! get com pair informations
175      CALL fb_com_atom_pairs_get(atom_pairs=atom_pairs_send, &
176                                 pairs=pairs_send, &
177                                 npairs=npairs_send, &
178                                 natoms_encode=send_encode)
179      CALL fb_com_atom_pairs_get(atom_pairs=atom_pairs_recv, &
180                                 pairs=pairs_recv, &
181                                 npairs=npairs_recv, &
182                                 natoms_encode=recv_encode)
183
184      ! get para_env info
185      numprocs = para_env%num_pe
186
187      ! get dbcsr row and col block sizes
188      CALL dbcsr_get_info(dbcsr_mat, row_blk_size=row_block_size_data, col_blk_size=col_block_size_data)
189
190      ! allocate temporary arrays for send
191      ALLOCATE (send_sizes(numprocs))
192      ALLOCATE (send_disps(numprocs))
193      ALLOCATE (send_pair_count(numprocs))
194      ALLOCATE (send_pair_disps(numprocs))
195
196      ! setup send buffer sizes
197      CALL fb_com_atom_pairs_calc_buffer_sizes(atom_pairs_send, &
198                                               numprocs, &
199                                               row_block_size_data, &
200                                               col_block_size_data, &
201                                               send_sizes, &
202                                               send_disps, &
203                                               send_pair_count, &
204                                               send_pair_disps)
205      ! allocate send buffer
206      ALLOCATE (send_buf(SUM(send_sizes)))
207
208      ! allocate temporary arrays for recv
209      ALLOCATE (recv_sizes(numprocs))
210      ALLOCATE (recv_disps(numprocs))
211      ALLOCATE (recv_pair_count(numprocs))
212      ALLOCATE (recv_pair_disps(numprocs))
213
214      ! setup recv buffer sizes
215      CALL fb_com_atom_pairs_calc_buffer_sizes(atom_pairs_recv, &
216                                               numprocs, &
217                                               row_block_size_data, &
218                                               col_block_size_data, &
219                                               recv_sizes, &
220                                               recv_disps, &
221                                               recv_pair_count, &
222                                               recv_pair_disps)
223      ! allocate recv buffer
224      ALLOCATE (recv_buf(SUM(recv_sizes)))
225      ! do packing
226      DO ipe = 1, numprocs
227         ! need to reuse send_sizes as an accumulative displacement, so recalculate
228         send_sizes(ipe) = 0
229         DO ipair = 1, send_pair_count(ipe)
230            CALL fb_com_atom_pairs_decode(pairs_send(send_pair_disps(ipe) + ipair), &
231                                          pe, iatom, jatom, send_encode)
232            nrows_blk = row_block_size_data(iatom)
233            ncols_blk = col_block_size_data(jatom)
234            CALL dbcsr_get_block_p(matrix=dbcsr_mat, &
235                                   row=iatom, col=jatom, block=mat_block, &
236                                   found=found)
237            IF (.NOT. found) THEN
238               CPABORT("Matrix block not found")
239            ELSE
240               ! we have found the matrix block
241               DO jj = 1, ncols_blk
242                  DO ii = 1, nrows_blk
243                     ! column major format in blocks
244                     ind = send_disps(ipe) + send_sizes(ipe) + ii + (jj - 1)*nrows_blk
245                     send_buf(ind) = mat_block(ii, jj)
246                  END DO ! ii
247               END DO ! jj
248               send_sizes(ipe) = send_sizes(ipe) + nrows_blk*ncols_blk
249            END IF
250         END DO ! ipair
251      END DO ! ipe
252
253      ! do communication
254      CALL mp_alltoall(send_buf, send_sizes, send_disps, &
255                       recv_buf, recv_sizes, recv_disps, &
256                       para_env%group)
257
258      ! cleanup temporary arrays no longer needed
259      DEALLOCATE (send_buf)
260      DEALLOCATE (send_sizes)
261      DEALLOCATE (send_disps)
262      DEALLOCATE (send_pair_count)
263      DEALLOCATE (send_pair_disps)
264
265      ! do unpacking
266      DO ipe = 1, numprocs
267         recv_sizes(ipe) = 0
268         DO ipair = 1, recv_pair_count(ipe)
269            CALL fb_com_atom_pairs_decode(pairs_recv(recv_pair_disps(ipe) + ipair), &
270                                          pe, iatom, jatom, recv_encode)
271            ! nrows_blk = last_row(iatom) - first_row(iatom) + 1
272            ! ncols_blk = last_col(jatom) - first_col(jatom) + 1
273            nrows_blk = row_block_size_data(iatom)
274            ncols_blk = col_block_size_data(jatom)
275            ! get the corresponding atom indices in halo
276            ! the atoms from the recv_pairs should be in the atomic_halo, because
277            ! the recv_pairs are the matrix blocks requested by the local proc for
278            ! this particular atomic_halo
279            CALL fb_atomic_halo_atom_global2halo(atomic_halo, &
280                                                 iatom, iatom_in_halo, &
281                                                 found)
282            CPASSERT(found)
283            CALL fb_atomic_halo_atom_global2halo(atomic_halo, &
284                                                 jatom, jatom_in_halo, &
285                                                 found)
286            CPASSERT(found)
287            ! put block into the full conventional matrix
288            DO jj = 1, ncols_blk
289               DO ii = 1, nrows_blk
290                  ! column major format in blocks
291                  ind = recv_disps(ipe) + recv_sizes(ipe) + ii + (jj - 1)*nrows_blk
292                  atomic_matrix(blk_row_start(iatom_in_halo) + ii - 1, &
293                                blk_col_start(jatom_in_halo) + jj - 1) = recv_buf(ind)
294
295               END DO ! ii
296            END DO ! jj
297            recv_sizes(ipe) = recv_sizes(ipe) + nrows_blk*ncols_blk
298         END DO ! ipair
299      END DO ! ipe
300
301      ! the constructed matrix is upper triangular, fill it up to full
302      DO ii = 2, SIZE(atomic_matrix, 1)
303         DO jj = 1, ii - 1
304            atomic_matrix(ii, jj) = atomic_matrix(jj, ii)
305         END DO
306      END DO
307
308      ! cleanup rest of the temporary arrays
309      DEALLOCATE (recv_buf)
310      DEALLOCATE (recv_sizes)
311      DEALLOCATE (recv_disps)
312      DEALLOCATE (recv_pair_count)
313      DEALLOCATE (recv_pair_disps)
314      CALL fb_com_atom_pairs_release(atom_pairs_send)
315      CALL fb_com_atom_pairs_release(atom_pairs_recv)
316
317      CALL timestop(handle)
318
319   END SUBROUTINE fb_atmatrix_construct
320
321! ****************************************************************************
322!> \brief Constructs atomic matrix for filter basis method from a given
323!>        DBCSR matrix and a set of atomic send and recv pairs
324!>        corresponding to the matrix blocks that needs to be included
325!>        in the atomic matrix. This version is for when we do MPI
326!>        communications collectively in one go at the beginning.
327!> \param matrix_storage : data storing the relevant DBCSR matrix blocks
328!>                         needed for constructing the atomic matrix
329!> \param atomic_halo : the atomic halo conrresponding to this atomic
330!>                      matrix
331!> \param atomic_matrix : the atomic matrix to be constructed, it should
332!>                        have already been allocated prior entering
333!>                        this subroutine
334!> \param blk_row_start : first row in each atomic blk row in the
335!>                        atomic matrix
336!> \param blk_col_start : first col in each atomic blk col in the
337!>                        atomic matrix
338!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
339! **************************************************************************************************
340   SUBROUTINE fb_atmatrix_construct_2(matrix_storage, &
341                                      atomic_halo, &
342                                      atomic_matrix, &
343                                      blk_row_start, &
344                                      blk_col_start)
345      TYPE(fb_matrix_data_obj), INTENT(IN)               :: matrix_storage
346      TYPE(fb_atomic_halo_obj), INTENT(IN)               :: atomic_halo
347      REAL(KIND=dp), DIMENSION(:, :), INTENT(OUT)        :: atomic_matrix
348      INTEGER, DIMENSION(:), INTENT(IN)                  :: blk_row_start, blk_col_start
349
350      CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_atmatrix_construct_2', &
351         routineP = moduleN//':'//routineN
352
353      INTEGER                                            :: handle, iatom, iatom_global, icol, ii, &
354                                                            irow, jatom, jatom_global, jj, &
355                                                            natoms_in_halo
356      INTEGER, DIMENSION(:), POINTER                     :: halo_atoms
357      LOGICAL                                            :: check_ok, found
358      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: blk_p
359
360      CALL timeset(routineN, handle)
361
362      check_ok = fb_matrix_data_has_data(matrix_storage)
363      CPASSERT(check_ok)
364      check_ok = fb_atomic_halo_has_data(atomic_halo)
365      CPASSERT(check_ok)
366
367      NULLIFY (halo_atoms, blk_p)
368
369      ! initialise atomic matrix
370      IF (SIZE(atomic_matrix, 1) > 0 .AND. SIZE(atomic_matrix, 2) > 0) THEN
371         atomic_matrix = 0.0_dp
372      END IF
373
374      ! get atomic halo information
375      CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
376                              natoms=natoms_in_halo, &
377                              halo_atoms=halo_atoms)
378
379      ! construct atomic matrix using data from matrix_storage
380      DO iatom = 1, natoms_in_halo
381         iatom_global = halo_atoms(iatom)
382         DO jatom = 1, natoms_in_halo
383            jatom_global = halo_atoms(jatom)
384            ! atomic matrices are symmetric, fill only the top
385            ! triangular part
386            IF (jatom_global .GE. iatom_global) THEN
387               CALL fb_matrix_data_get(matrix_storage, &
388                                       iatom_global, &
389                                       jatom_global, &
390                                       blk_p, &
391                                       found)
392               ! copy data to atomic_matrix if found
393               IF (found) THEN
394                  DO jj = 1, SIZE(blk_p, 2)
395                     icol = blk_col_start(jatom) + jj - 1
396                     DO ii = 1, SIZE(blk_p, 1)
397                        irow = blk_row_start(iatom) + ii - 1
398                        atomic_matrix(irow, icol) = blk_p(ii, jj)
399                     END DO ! ii
400                  END DO ! jj
401               END IF
402            END IF
403         END DO ! jatom
404      END DO ! iatom
405
406      ! the constructed matrix is upper triangular, fill it up to full
407      DO ii = 2, SIZE(atomic_matrix, 1)
408         DO jj = 1, ii - 1
409            atomic_matrix(ii, jj) = atomic_matrix(jj, ii)
410         END DO
411      END DO
412
413      CALL timestop(handle)
414
415   END SUBROUTINE fb_atmatrix_construct_2
416
417! ****************************************************************************
418!> \brief generate list of blocks (atom pairs) of a DBCSR matrix to be
419!>        sent and recived in order to construct an atomic matrix
420!>        corresponding to a given atomic halo. This version is for the case
421!>        when we do MPI communications at each step, for each atomic matrix.
422!> \param dbcsr_mat : The DBCSR matrix the atom blocks come from
423!> \param atomic_halo : the atomic halo used to construct the atomic
424!>                      matrix
425!> \param para_env : cp2k parallel environment
426!> \param atom_pairs_send : list of atom blocks from local DBCSR matrix
427!>                          data to be sent
428!> \param atom_pairs_recv : list of atom blocks from remote DBCSR matrix
429!>                          data to be recveived
430!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
431! **************************************************************************************************
432   SUBROUTINE fb_atmatrix_generate_com_pairs(dbcsr_mat, &
433                                             atomic_halo, &
434                                             para_env, &
435                                             atom_pairs_send, &
436                                             atom_pairs_recv)
437      TYPE(dbcsr_type), POINTER                          :: dbcsr_mat
438      TYPE(fb_atomic_halo_obj), INTENT(IN)               :: atomic_halo
439      TYPE(cp_para_env_type), POINTER                    :: para_env
440      TYPE(fb_com_atom_pairs_obj), INTENT(INOUT)         :: atom_pairs_send, atom_pairs_recv
441
442      CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_atmatrix_generate_com_pairs', &
443         routineP = moduleN//':'//routineN
444
445      INTEGER :: counter, dest, handle, iatom, iatom_global, itask, jatom, jatom_global, &
446         natoms_in_halo, nblkrows_total, nencode, ntasks_recv, ntasks_send, src
447      INTEGER(KIND=int_8)                                :: pair
448      INTEGER(KIND=int_8), DIMENSION(:, :), POINTER      :: tasks_recv, tasks_send
449      INTEGER, DIMENSION(:), POINTER                     :: halo_atoms
450      LOGICAL                                            :: found
451      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: mat_block
452      TYPE(fb_com_tasks_obj)                             :: com_tasks_recv, com_tasks_send
453
454      CALL timeset(routineN, handle)
455
456      NULLIFY (halo_atoms, tasks_send, tasks_recv)
457      CALL fb_com_tasks_nullify(com_tasks_send)
458      CALL fb_com_tasks_nullify(com_tasks_recv)
459
460      ! initialise atom_pairs_send and atom_pairs_receive
461      IF (fb_com_atom_pairs_has_data(atom_pairs_send)) THEN
462         CALL fb_com_atom_pairs_init(atom_pairs_send)
463      ELSE
464         CALL fb_com_atom_pairs_create(atom_pairs_send)
465      END IF
466      IF (fb_com_atom_pairs_has_data(atom_pairs_recv)) THEN
467         CALL fb_com_atom_pairs_init(atom_pairs_recv)
468      ELSE
469         CALL fb_com_atom_pairs_create(atom_pairs_recv)
470      END IF
471
472      ! get atomic halo information
473      CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
474                              natoms=natoms_in_halo, &
475                              halo_atoms=halo_atoms)
476
477      ! get the total number of atoms, we can obtain this directly
478      ! from the global block row dimension of the dbcsr matrix
479      CALL dbcsr_get_info(matrix=dbcsr_mat, &
480                          nblkrows_total=nblkrows_total)
481
482      ! destination proc is always the local processor
483      dest = para_env%mepos ! my MPI rank
484
485      ! generate recv task list (tasks_recv)
486
487      ! a recv task corresponds to the copying or transfering of a
488      ! matrix block in the part of the DBCSR matrix owned by the src
489      ! proc to this proc in order to construct the atomic matrix
490      ! corresponding to the given atomic halo. As an upper-bound, the
491      ! number of matrix blocks requred do not exceed natoms_in_halo**2
492      ntasks_recv = natoms_in_halo*natoms_in_halo
493
494      ALLOCATE (tasks_recv(TASK_N_RECORDS, ntasks_recv))
495
496      ! now that tasks_recv has been allocated, generate the tasks
497      itask = 1
498      DO iatom = 1, natoms_in_halo
499         iatom_global = halo_atoms(iatom)
500         DO jatom = 1, natoms_in_halo
501            jatom_global = halo_atoms(jatom)
502            ! atomic matrix is symmetric, and only upper triangular part
503            ! is stored in DBCSR matrix
504            IF (jatom_global .GE. iatom_global) THEN
505               ! find the source proc that supposed to own the block
506               ! (iatom_global, jatom_global)
507               CALL dbcsr_get_stored_coordinates(dbcsr_mat, &
508                                                 iatom_global, &
509                                                 jatom_global, &
510                                                 processor=src)
511               ! we must encode the global atom indices rather the halo
512               ! atomic indices in each task, because halo atomic
513               ! indices are local to each halo, and each processor is
514               ! working on a different halo local to them. So one
515               ! processor would not have the information about the halo
516               ! on another processor, rendering the halo atomic indices
517               ! rather useless outside the local processor.
518               tasks_recv(TASK_DEST, itask) = dest
519               tasks_recv(TASK_SRC, itask) = src
520
521               CALL fb_com_tasks_encode_pair(tasks_recv(TASK_PAIR, itask), &
522                                             iatom_global, jatom_global, &
523                                             nblkrows_total)
524               ! calculation of cost not implemented at the moment
525               tasks_recv(TASK_COST, itask) = 0
526               itask = itask + 1
527            END IF
528         END DO ! jatom
529      END DO ! iatom
530
531      ! get the actual number of tasks
532      ntasks_recv = itask - 1
533
534      ! create tasks
535      CALL fb_com_tasks_create(com_tasks_recv)
536      CALL fb_com_tasks_create(com_tasks_send)
537
538      CALL fb_com_tasks_set(com_tasks=com_tasks_recv, &
539                            task_dim=TASK_N_RECORDS, &
540                            ntasks=ntasks_recv, &
541                            nencode=nblkrows_total, &
542                            tasks=tasks_recv)
543
544      ! genearte the send task list (tasks_send) from the recv task list
545      CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, ">", com_tasks_send, &
546                                           para_env)
547
548      CALL fb_com_tasks_get(com_tasks=com_tasks_send, &
549                            ntasks=ntasks_send, &
550                            tasks=tasks_send, &
551                            nencode=nencode)
552
553      ! because the atomic_halos and the neighbor_list_set used to
554      ! generate the sparse structure of the DBCSR matrix do not
555      ! necessarily have to coincide, we must check of the blocks in
556      ! tasks_send (these should be local to the processor) do indeed
557      ! exist in the DBCSR matrix, if not, then we need to prune these
558      ! out of the task list
559
560      counter = 0
561      DO itask = 1, ntasks_send
562         pair = tasks_send(TASK_PAIR, itask)
563         CALL fb_com_tasks_decode_pair(pair, iatom_global, jatom_global, nencode)
564         ! check if block exists in DBCSR matrix
565         CALL dbcsr_get_block_p(matrix=dbcsr_mat, &
566                                row=iatom_global, col=jatom_global, block=mat_block, &
567                                found=found)
568         IF (found) THEN
569            counter = counter + 1
570            ! we can do this here, because essencially we are inspecting
571            ! the send tasks one by one, and then omit ones which the
572            ! block is not found in the DBCSR matrix. itask is always
573            ! .GE. counter
574            tasks_send(1:TASK_N_RECORDS, counter) = tasks_send(1:TASK_N_RECORDS, itask)
575         END IF
576      END DO
577      ! the new send task list should have size counter. counter
578      ! .LE. the old ntasks_send, thus the task list does not really
579      ! need to be reallocated (as it is just a temporary array), and
580      ! the useful data will cutoff at counter, and the rest of the
581      ! array will just be garbage
582      ntasks_send = counter
583
584      ! tasks_send is set through the pointer already
585      CALL fb_com_tasks_set(com_tasks=com_tasks_send, &
586                            ntasks=ntasks_send)
587
588      ! now, re-distribute the new send tasks list to other processors
589      ! to build the updated recv tasks list
590      CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, "<", com_tasks_send, &
591                                           para_env)
592
593      ! task lists are now complete, now construct the atom_pairs_send
594      ! and atom_pairs_recv from the tasks lists
595      CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_send, &
596                                         atom_pairs=atom_pairs_send, &
597                                         natoms_encode=nencode, &
598                                         send_or_recv="send")
599      CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_recv, &
600                                         atom_pairs=atom_pairs_recv, &
601                                         natoms_encode=nencode, &
602                                         send_or_recv="recv")
603
604      ! cleanup
605      CALL fb_com_tasks_release(com_tasks_recv)
606      CALL fb_com_tasks_release(com_tasks_send)
607
608      CALL timestop(handle)
609
610   END SUBROUTINE fb_atmatrix_generate_com_pairs
611
612! ****************************************************************************
613!> \brief generate list of blocks (atom pairs) of a DBCSR matrix to be
614!>        sent and recived in order to construct all local atomic matrices
615!>        corresponding to the atomic halos. This version is for the case
616!>        when we do MPI communications collectively in one go at the
617!>        beginning.
618!> \param dbcsr_mat : The DBCSR matrix the atom blocks come from
619!> \param atomic_halos : the list of all atomic halos local to the process
620!> \param para_env : cp2k parallel environment
621!> \param atom_pairs_send : list of atom blocks from local DBCSR matrix
622!>                          data to be sent
623!> \param atom_pairs_recv : list of atom blocks from remote DBCSR matrix
624!>                          data to be recveived
625!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
626! **************************************************************************************************
627   SUBROUTINE fb_atmatrix_generate_com_pairs_2(dbcsr_mat, &
628                                               atomic_halos, &
629                                               para_env, &
630                                               atom_pairs_send, &
631                                               atom_pairs_recv)
632      TYPE(dbcsr_type), POINTER                          :: dbcsr_mat
633      TYPE(fb_atomic_halo_list_obj), INTENT(IN)          :: atomic_halos
634      TYPE(cp_para_env_type), POINTER                    :: para_env
635      TYPE(fb_com_atom_pairs_obj), INTENT(INOUT)         :: atom_pairs_send, atom_pairs_recv
636
637      CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_atmatrix_generate_com_pairs_2', &
638         routineP = moduleN//':'//routineN
639
640      INTEGER :: counter, dest, handle, iatom, iatom_global, ihalo, itask, jatom, jatom_global, &
641         natoms_in_halo, nblkrows_total, nencode, nhalos, ntasks_recv, ntasks_send, src
642      INTEGER(KIND=int_8)                                :: pair
643      INTEGER(KIND=int_8), DIMENSION(:, :), POINTER      :: tasks_recv, tasks_send
644      INTEGER, DIMENSION(:), POINTER                     :: halo_atoms
645      LOGICAL                                            :: found
646      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: mat_block
647      TYPE(fb_atomic_halo_obj), DIMENSION(:), POINTER    :: halos
648      TYPE(fb_com_tasks_obj)                             :: com_tasks_recv, com_tasks_send
649
650      CALL timeset(routineN, handle)
651
652      NULLIFY (halo_atoms, tasks_send, tasks_recv)
653      CALL fb_com_tasks_nullify(com_tasks_send)
654      CALL fb_com_tasks_nullify(com_tasks_recv)
655
656      ! initialise atom_pairs_send and atom_pairs_receive
657      IF (fb_com_atom_pairs_has_data(atom_pairs_send)) THEN
658         CALL fb_com_atom_pairs_init(atom_pairs_send)
659      ELSE
660         CALL fb_com_atom_pairs_create(atom_pairs_send)
661      END IF
662      IF (fb_com_atom_pairs_has_data(atom_pairs_recv)) THEN
663         CALL fb_com_atom_pairs_init(atom_pairs_recv)
664      ELSE
665         CALL fb_com_atom_pairs_create(atom_pairs_recv)
666      END IF
667
668      ! get atomic halo list information
669      CALL fb_atomic_halo_list_get(atomic_halos=atomic_halos, &
670                                   nhalos=nhalos, &
671                                   halos=halos)
672      ! get the total number of atoms, we can obtain this directly
673      ! from the global block row dimension of the dbcsr matrix
674      CALL dbcsr_get_info(matrix=dbcsr_mat, &
675                          nblkrows_total=nblkrows_total)
676
677      ! estimate the maximum number of blocks to be received
678      ntasks_recv = 0
679      DO ihalo = 1, nhalos
680         CALL fb_atomic_halo_get(atomic_halo=halos(ihalo), &
681                                 natoms=natoms_in_halo)
682         ntasks_recv = ntasks_recv + natoms_in_halo*natoms_in_halo
683      END DO
684      ALLOCATE (tasks_recv(TASK_N_RECORDS, ntasks_recv))
685
686      ! now that tasks_recv has been allocated, generate the tasks
687
688      ! destination proc is always the local process
689      dest = para_env%mepos
690      itask = 1
691      DO ihalo = 1, nhalos
692         CALL fb_atomic_halo_get(atomic_halo=halos(ihalo), &
693                                 natoms=natoms_in_halo, &
694                                 halo_atoms=halo_atoms)
695         DO iatom = 1, natoms_in_halo
696            iatom_global = halo_atoms(iatom)
697            DO jatom = 1, natoms_in_halo
698               jatom_global = halo_atoms(jatom)
699               ! atomic matrices are always symmetric, treat it as such.
700               ! so only deal with upper triangular parts
701               IF (jatom_global .GE. iatom_global) THEN
702                  ! find the source proc that supposed to own the block
703                  ! (iatom_global, jatom_global)
704                  CALL dbcsr_get_stored_coordinates(dbcsr_mat, &
705                                                    iatom_global, &
706                                                    jatom_global, &
707                                                    processor=src)
708                  ! we must encode the global atom indices rather the halo
709                  ! atomic indices in each task, because halo atomic indices
710                  ! are local to each halo, and each processor is working on a
711                  ! different halo local to them. So one processor would not
712                  ! have the information about the halo on another processor,
713                  ! rendering the halo atomic indices rather useless outside
714                  ! the local processor.
715                  tasks_recv(TASK_DEST, itask) = dest
716                  tasks_recv(TASK_SRC, itask) = src
717                  CALL fb_com_tasks_encode_pair(tasks_recv(TASK_PAIR, itask), &
718                                                iatom_global, jatom_global, &
719                                                nblkrows_total)
720                  ! calculation of cost not implemented at the moment
721                  tasks_recv(TASK_COST, itask) = 0
722                  itask = itask + 1
723               END IF
724            END DO ! jatom
725         END DO ! iatom
726      END DO ! ihalo
727
728      ! set the actual number of tasks obtained
729      ntasks_recv = itask - 1
730
731      ! create tasks
732      CALL fb_com_tasks_create(com_tasks_recv)
733      CALL fb_com_tasks_create(com_tasks_send)
734
735      CALL fb_com_tasks_set(com_tasks=com_tasks_recv, &
736                            task_dim=TASK_N_RECORDS, &
737                            ntasks=ntasks_recv, &
738                            nencode=nblkrows_total, &
739                            tasks=tasks_recv)
740
741      ! genearte the send task list (tasks_send) from the recv task list
742      CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, ">", com_tasks_send, &
743                                           para_env)
744
745      CALL fb_com_tasks_get(com_tasks=com_tasks_send, &
746                            ntasks=ntasks_send, &
747                            tasks=tasks_send, &
748                            nencode=nencode)
749
750      ! because the atomic_halos and the neighbor_list_set used to
751      ! generate the sparse structure of the DBCSR matrix do not
752      ! necessarily have to coincide, we must check of the blocks in
753      ! tasks_send (these should be local to the processor) do indeed
754      ! exist in the DBCSR matrix, if not, then we need to prune these
755      ! out of the task list
756
757      counter = 0
758      DO itask = 1, ntasks_send
759         pair = tasks_send(TASK_PAIR, itask)
760         CALL fb_com_tasks_decode_pair(pair, iatom_global, jatom_global, nencode)
761         ! check if block exists in DBCSR matrix
762         CALL dbcsr_get_block_p(matrix=dbcsr_mat, row=iatom_global, &
763                                col=jatom_global, block=mat_block, &
764                                found=found)
765         IF (found) THEN
766            counter = counter + 1
767            ! we can do this here, because essencially we are inspecting
768            ! the send tasks one by one, and then omit ones which the
769            ! block is not found in the DBCSR matrix. itask is always
770            ! .GE. counter
771            tasks_send(1:TASK_N_RECORDS, counter) = tasks_send(1:TASK_N_RECORDS, itask)
772         END IF
773      END DO
774      ! the new send task list should have size counter. counter
775      ! .LE. the old ntasks_send, thus the task list does not really
776      ! need to be reallocated (as it is just a temporary array), and
777      ! the useful data will cutoff at counter, and the rest of the
778      ! array will just be garbage
779      ntasks_send = counter
780
781      ! tasks_send is set through the pointer already
782      CALL fb_com_tasks_set(com_tasks=com_tasks_send, &
783                            ntasks=ntasks_send)
784
785      ! now, re-distribute the new send tasks list to other processors
786      ! to build the updated recv tasks list
787      CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, "<", com_tasks_send, &
788                                           para_env)
789
790      ! task lists are now complete, now construct the atom_pairs_send
791      ! and atom_pairs_recv from the tasks lists
792      CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_send, &
793                                         atom_pairs=atom_pairs_send, &
794                                         natoms_encode=nencode, &
795                                         send_or_recv="send")
796      CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_recv, &
797                                         atom_pairs=atom_pairs_recv, &
798                                         natoms_encode=nencode, &
799                                         send_or_recv="recv")
800
801      ! cleanup
802      CALL fb_com_tasks_release(com_tasks_recv)
803      CALL fb_com_tasks_release(com_tasks_send)
804
805      CALL timestop(handle)
806
807   END SUBROUTINE fb_atmatrix_generate_com_pairs_2
808
809END MODULE qs_fb_atomic_matrix_methods
810