1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2019  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6MODULE qs_fb_distribution_methods
7
8   USE cell_types,                      ONLY: cell_type
9   USE cp_log_handling,                 ONLY: cp_get_default_logger,&
10                                              cp_logger_type
11   USE cp_output_handling,              ONLY: cp_print_key_finished_output,&
12                                              cp_print_key_unit_nr
13   USE cp_para_types,                   ONLY: cp_para_env_type
14   USE dbcsr_api,                       ONLY: dbcsr_distribution_get,&
15                                              dbcsr_distribution_type,&
16                                              dbcsr_get_info,&
17                                              dbcsr_nblkcols_total,&
18                                              dbcsr_p_type,&
19                                              dbcsr_type
20   USE input_section_types,             ONLY: section_vals_type
21   USE kinds,                           ONLY: dp
22   USE particle_types,                  ONLY: particle_type
23   USE qs_environment_types,            ONLY: get_qs_env,&
24                                              qs_environment_type
25   USE qs_fb_atomic_halo_types,         ONLY: &
26        fb_atomic_halo_build_halo_atoms, fb_atomic_halo_cost, fb_atomic_halo_create, &
27        fb_atomic_halo_init, fb_atomic_halo_nullify, fb_atomic_halo_obj, fb_atomic_halo_release, &
28        fb_atomic_halo_set, fb_build_pair_radii
29   USE qs_fb_env_types,                 ONLY: fb_env_get,&
30                                              fb_env_obj,&
31                                              fb_env_set
32   USE qs_kind_types,                   ONLY: qs_kind_type
33   USE util,                            ONLY: sort
34#include "./base/base_uses.f90"
35
36   IMPLICIT NONE
37
38   PRIVATE
39
40   PUBLIC :: fb_distribution_build
41
42   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_fb_distribution_methods'
43
44! **************************************************************************************************
45!> \brief derived type containing cost data used for process distribution
46!> \param id               : global atomic index
47!> \param cost             : computational cost for the atomic matrix associated
48!>                           to this atom
49!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
50! **************************************************************************************************
51   TYPE fb_distribution_element
52      INTEGER :: id
53      REAL(KIND=dp) :: cost
54   END TYPE fb_distribution_element
55
56! **************************************************************************************************
57!> \brief derived type containing the list of atoms currently allocated to a
58!>        processor
59!> \param list             : list of atoms and their associated costs
60!> \param cost             : total cost of the list
61!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
62! **************************************************************************************************
63   TYPE fb_distribution_list
64      TYPE(fb_distribution_element), DIMENSION(:), POINTER :: list => NULL()
65      INTEGER :: nelements
66      REAL(KIND=dp) :: cost
67   END TYPE fb_distribution_list
68
69! **************************************************************************************************
70!> \brief In filter matrix algorithm, each atomic matrix contributes to a
71!>        column in the filter matrix, which is stored in DBCSR format.
72!>        When distributing the atoms (and hence the atomic matrics) to the
73!>        processors, we want the processors to have atoms that would be
74!>        correspond to the block columns in the DBCSR format local to them.
75!>        This derived type stores this information. For each atom, it
76!>        corresponds to a DBCSR block column, and the list of processors
77!>        in the 2D processor grid responsible for this column will be the
78!>        preferred processors for this atom.
79!> \param list             : list of preferred processors for an atom
80!>                           note that here the processors are indexed from
81!>                           1, i.e. = MPI_RANK+1
82!> \param nprocs           : number of processors in the list
83!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
84! **************************************************************************************************
85   TYPE fb_preferred_procs_list
86      INTEGER, DIMENSION(:), POINTER :: list => NULL()
87      INTEGER :: nprocs
88   END TYPE fb_preferred_procs_list
89
90! Parameters related to automatic resizing of the hash_table:
91! Resize by EXPAND_FACTOR if total no. slots / no. of filled slots < ENLARGE_RATIO
92   INTEGER, PARAMETER, PRIVATE :: ENLARGE_RATIO = 1
93   INTEGER, PARAMETER, PRIVATE :: REDUCE_RATIO = 3
94   INTEGER, PARAMETER, PRIVATE :: EXPAND_FACTOR = 2
95   INTEGER, PARAMETER, PRIVATE :: SHRINK_FACTOR = 2
96
97   INTERFACE fb_distribution_remove
98      MODULE PROCEDURE fb_distribution_remove_ind, &
99         fb_distribution_remove_el
100   END INTERFACE fb_distribution_remove
101
102   INTERFACE fb_distribution_move
103      MODULE PROCEDURE fb_distribution_move_ind, &
104         fb_distribution_move_el
105   END INTERFACE fb_distribution_move
106
107CONTAINS
108
109! **************************************************************************************************
110!> \brief Build local atoms associated to filter matrix algorithm for each
111!>        MPI process, trying to balance the load for calculating the
112!>        filter matrix
113!> \param fb_env : the filter matrix environment
114!> \param qs_env : quickstep environment
115!> \param scf_section : SCF input section
116!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
117! **************************************************************************************************
118   SUBROUTINE fb_distribution_build(fb_env, qs_env, scf_section)
119      TYPE(fb_env_obj), INTENT(INOUT)                    :: fb_env
120      TYPE(qs_environment_type), POINTER                 :: qs_env
121      TYPE(section_vals_type), POINTER                   :: scf_section
122
123      CHARACTER(len=*), PARAMETER :: routineN = 'fb_distribution_build', &
124         routineP = moduleN//':'//routineN
125
126      INTEGER :: handle, i_common_set, iatom, ii, ipe, lb, lowest_cost_ind, my_pe, n_common_sets, &
127         natoms, nhalo_atoms, nkinds, nprocs, owner_id_in_halo, pref_pe, ub
128      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: common_set_ids, local_atoms_all, &
129                                                            local_atoms_sizes, local_atoms_starts, &
130                                                            pe, pos_in_preferred_list
131      INTEGER, DIMENSION(:), POINTER                     :: halo_atoms, local_atoms
132      LOGICAL                                            :: acceptable_move, move_happened
133      REAL(KIND=dp)                                      :: average_cost
134      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: cost_per_atom, cost_per_proc
135      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: pair_radii
136      REAL(KIND=dp), DIMENSION(:), POINTER               :: rcut
137      TYPE(cell_type), POINTER                           :: cell
138      TYPE(cp_para_env_type), POINTER                    :: para_env
139      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: mat_ks
140      TYPE(fb_atomic_halo_obj)                           :: atomic_halo
141      TYPE(fb_distribution_element)                      :: element
142      TYPE(fb_distribution_list), ALLOCATABLE, &
143         DIMENSION(:)                                    :: dist
144      TYPE(fb_preferred_procs_list), ALLOCATABLE, &
145         DIMENSION(:)                                    :: preferred_procs_set
146      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
147      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
148
149      CALL timeset(routineN, handle)
150
151      NULLIFY (mat_ks, rcut, cell, para_env, particle_set, qs_kind_set, &
152               halo_atoms, local_atoms)
153      CALL fb_atomic_halo_nullify(atomic_halo)
154
155      ! obtain relevant data from fb_env, qs_env
156      CALL fb_env_get(fb_env=fb_env, &
157                      rcut=rcut)
158      CALL get_qs_env(qs_env=qs_env, &
159                      natom=natoms, &
160                      particle_set=particle_set, &
161                      qs_kind_set=qs_kind_set, &
162                      nkind=nkinds, &
163                      cell=cell, &
164                      para_env=para_env, &
165                      matrix_ks=mat_ks)
166      nprocs = para_env%num_pe
167      my_pe = para_env%mepos + 1 ! counting from 1
168
169      ! for each global atom, build atomic halo and get the associated cost
170      ALLOCATE (pair_radii(nkinds, nkinds))
171      CALL fb_build_pair_radii(rcut, nkinds, pair_radii)
172      CALL fb_atomic_halo_create(atomic_halo)
173      ALLOCATE (cost_per_atom(natoms))
174      DO iatom = 1, natoms
175         CALL fb_atomic_halo_init(atomic_halo)
176         CALL fb_atomic_halo_build_halo_atoms(iatom, &
177                                              particle_set, &
178                                              cell, &
179                                              pair_radii, &
180                                              halo_atoms, &
181                                              nhalo_atoms, &
182                                              owner_id_in_halo)
183         CALL fb_atomic_halo_set(atomic_halo=atomic_halo, &
184                                 owner_atom=iatom, &
185                                 natoms=nhalo_atoms, &
186                                 halo_atoms=halo_atoms)
187         NULLIFY (halo_atoms)
188         cost_per_atom(iatom) = fb_atomic_halo_cost(atomic_halo, particle_set, qs_kind_set)
189      END DO
190      DEALLOCATE (pair_radii)
191      CALL fb_atomic_halo_release(atomic_halo)
192
193      ! build the preferred_procs_set according to DBCSR mat H
194      ALLOCATE (preferred_procs_set(natoms))
195      ALLOCATE (common_set_ids(natoms))
196      CALL fb_build_preferred_procs(mat_ks(1)%matrix, &
197                                    natoms, &
198                                    preferred_procs_set, &
199                                    common_set_ids, &
200                                    n_common_sets)
201
202      ! for each atomic halo, construct distribution_element, and assign
203      ! the element to a processors using preferred_procs_set in a
204      ! round-robin manner
205      ALLOCATE (dist(nprocs))
206      DO ipe = 1, nprocs
207         CALL fb_distribution_init(dist=dist(ipe))
208      END DO
209      ALLOCATE (pos_in_preferred_list(n_common_sets))
210      pos_in_preferred_list(:) = 0
211      DO iatom = 1, natoms
212         element%id = iatom
213         element%cost = cost_per_atom(iatom)
214         i_common_set = common_set_ids(iatom)
215         pos_in_preferred_list(i_common_set) = &
216            MOD(pos_in_preferred_list(i_common_set), &
217                preferred_procs_set(iatom)%nprocs) + 1
218         ipe = preferred_procs_set(iatom)%list(pos_in_preferred_list(i_common_set))
219         CALL fb_distribution_add(dist(ipe), element)
220      END DO
221
222      DEALLOCATE (pos_in_preferred_list)
223      DEALLOCATE (common_set_ids)
224      DEALLOCATE (cost_per_atom)
225
226      ! sort processors according to the overall cost of their assigned
227      ! corresponding distribution
228      ALLOCATE (cost_per_proc(nprocs))
229      DO ipe = 1, nprocs
230         cost_per_proc(ipe) = dist(ipe)%cost
231      END DO
232      ALLOCATE (pe(nprocs))
233      CALL sort(cost_per_proc, nprocs, pe)
234      ! now that cost_per_proc is sorted, ipe's no longer give mpi
235      ! ranks, the correct one to use should be pe(ipe)
236
237      ! work out the ideal average cost per proc if work load is evenly
238      ! distributed
239      average_cost = SUM(cost_per_proc)/REAL(nprocs, dp)
240
241      DEALLOCATE (cost_per_proc)
242
243      ! loop over the processors, starting with the highest cost, move
244      ! atoms one by one:
245      !   1. FIRST to the next processor in the preferred list that has
246      !      cost below average. IF no such proc is found, THEN
247      !   2. to the next procesor in the overall list that has cost
248      !      below average.
249      ! repeat until the cost on this processor is less than or equal
250      ! to the average cost
251      lowest_cost_ind = 1
252      DO ipe = nprocs, 1, -1
253         redistribute: DO WHILE (dist(pe(ipe))%cost .GT. average_cost)
254            iatom = dist(pe(ipe))%list(lowest_cost_ind)%id
255            move_happened = .FALSE.
256            ! first try to move to a preferred process
257            preferred: DO ii = 1, preferred_procs_set(iatom)%nprocs
258               pref_pe = preferred_procs_set(iatom)%list(ii)
259               acceptable_move = &
260                  fb_distribution_acceptable_move(dist(pe(ipe)), &
261                                                  dist(pe(ipe))%list(lowest_cost_ind), &
262                                                  dist(pref_pe), &
263                                                  average_cost)
264               IF ((pref_pe .NE. pe(ipe)) .AND. acceptable_move) THEN
265                  CALL fb_distribution_move(dist(pe(ipe)), &
266                                            lowest_cost_ind, &
267                                            dist(pref_pe))
268                  move_happened = .TRUE.
269                  EXIT preferred
270               END IF
271            END DO preferred
272            ! if no preferred process is avaliable, move to a proc in
273            ! the sorted list that has cost less than average.  remember
274            ! that some of the proc may have already taken redistributed
275            ! atoms, and thus may beome unavaliable (full)
276            IF (.NOT. move_happened) THEN
277               ! searching from the proc with the least initial cost
278               next_in_line: DO ii = 1, nprocs
279                  acceptable_move = &
280                     fb_distribution_acceptable_move(dist(pe(ipe)), &
281                                                     dist(pe(ipe))%list(lowest_cost_ind), &
282                                                     dist(pe(ii)), &
283                                                     average_cost)
284                  IF ((pe(ii) .NE. pe(ipe)) .AND. acceptable_move) THEN
285                     CALL fb_distribution_move(dist(pe(ipe)), &
286                                               lowest_cost_ind, &
287                                               dist(pe(ii)))
288                     move_happened = .TRUE.
289                     EXIT next_in_line
290                  END IF
291               END DO next_in_line
292            END IF
293            ! if the atom cannot be moved, then this means it is too
294            ! costly for all other processes to accept. When this
295            ! happens we must stop the redistribution process for this
296            ! processor---as all other of its atoms will be even more
297            ! costly
298            IF (.NOT. move_happened) THEN
299               EXIT redistribute
300            END IF
301         END DO redistribute ! while
302      END DO ! ipe
303
304      DEALLOCATE (pe)
305      DO ii = 1, SIZE(preferred_procs_set)
306         CALL fb_preferred_procs_list_release(preferred_procs_set(ii))
307      END DO
308      DEALLOCATE (preferred_procs_set)
309
310      ! generate local atoms from dist
311      ALLOCATE (local_atoms_all(natoms))
312      ALLOCATE (local_atoms_starts(nprocs))
313      ALLOCATE (local_atoms_sizes(nprocs))
314      CALL fb_distribution_to_local_atoms(dist, &
315                                          local_atoms_all, &
316                                          local_atoms_starts, &
317                                          local_atoms_sizes)
318      ALLOCATE (local_atoms(local_atoms_sizes(my_pe)))
319      lb = local_atoms_starts(my_pe)
320      ub = local_atoms_starts(my_pe) + local_atoms_sizes(my_pe) - 1
321      local_atoms(1:local_atoms_sizes(my_pe)) = local_atoms_all(lb:ub)
322      CALL fb_env_set(fb_env=fb_env, &
323                      local_atoms=local_atoms, &
324                      nlocal_atoms=local_atoms_sizes(my_pe))
325
326      ! write out info
327      CALL fb_distribution_write_info(dist, scf_section)
328
329      DEALLOCATE (local_atoms_all)
330      DEALLOCATE (local_atoms_starts)
331      DEALLOCATE (local_atoms_sizes)
332      DO ipe = 1, SIZE(dist)
333         CALL fb_distribution_release(dist(ipe))
334      END DO
335      DEALLOCATE (dist)
336
337      CALL timestop(handle)
338
339   END SUBROUTINE fb_distribution_build
340
341! **************************************************************************************************
342!> \brief Checks if moving an element from one distribution to another is
343!>        allowed in mind of load balancing.
344!> \param dist_from : the source distribution
345!> \param element   : the element in source distribution considered for the
346!>                    move
347!> \param dist_to   : the destination distribution
348!> \param threshold ...
349!> \return : TRUE or FALSE
350!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
351! **************************************************************************************************
352   PURE FUNCTION fb_distribution_acceptable_move(dist_from, &
353                                                 element, &
354                                                 dist_to, &
355                                                 threshold) &
356      RESULT(acceptable)
357      TYPE(fb_distribution_list), INTENT(IN)             :: dist_from
358      TYPE(fb_distribution_element), INTENT(IN)          :: element
359      TYPE(fb_distribution_list), INTENT(IN)             :: dist_to
360      REAL(KIND=dp), INTENT(IN)                          :: threshold
361      LOGICAL                                            :: acceptable
362
363      acceptable = (dist_to%cost + element%cost .LT. dist_from%cost) .AND. &
364                   (dist_to%cost .LT. threshold)
365   END FUNCTION fb_distribution_acceptable_move
366
367! **************************************************************************************************
368!> \brief Write out information on the load distribution on processors
369!> \param dist_set    : set of distributions for the processors
370!> \param scf_section : SCF input section
371!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
372! **************************************************************************************************
373   SUBROUTINE fb_distribution_write_info(dist_set, scf_section)
374      TYPE(fb_distribution_list), DIMENSION(:), &
375         INTENT(IN)                                      :: dist_set
376      TYPE(section_vals_type), POINTER                   :: scf_section
377
378      CHARACTER(len=*), PARAMETER :: routineN = 'fb_distribution_write_info', &
379         routineP = moduleN//':'//routineN
380
381      INTEGER                                            :: ii, max_natoms, min_natoms, natoms, &
382                                                            nprocs, unit_nr
383      REAL(KIND=dp)                                      :: ave_cost, ave_natoms, max_cost, &
384                                                            min_cost, total_cost
385      TYPE(cp_logger_type), POINTER                      :: logger
386
387      nprocs = SIZE(dist_set)
388      natoms = 0
389      total_cost = 0.0_dp
390      DO ii = 1, nprocs
391         natoms = natoms + dist_set(ii)%nelements
392         total_cost = total_cost + dist_set(ii)%cost
393      END DO
394      ave_natoms = REAL(natoms, dp)/REAL(nprocs, dp)
395      ave_cost = total_cost/REAL(nprocs, dp)
396      max_natoms = 0
397      max_cost = 0._dp
398      DO ii = 1, nprocs
399         max_natoms = MAX(max_natoms, dist_set(ii)%nelements)
400         max_cost = MAX(max_cost, dist_set(ii)%cost)
401      END DO
402      min_natoms = natoms
403      min_cost = total_cost
404      DO ii = 1, nprocs
405         min_natoms = MIN(min_natoms, dist_set(ii)%nelements)
406         min_cost = MIN(min_cost, dist_set(ii)%cost)
407      END DO
408
409      logger => cp_get_default_logger()
410      unit_nr = cp_print_key_unit_nr(logger, scf_section, &
411                                     "PRINT%FILTER_MATRIX", &
412                                     extension="")
413
414      IF (unit_nr > 0) THEN
415         WRITE (UNIT=unit_nr, FMT="(/,A,I6,A)") &
416            " FILTER_MAT_DIAG| Load distribution across ", nprocs, " processors:"
417         WRITE (UNIT=unit_nr, &
418                FMT="(A,T40,A,T55,A,T70,A,T85,A)") &
419            " FILTER_MAT_DIAG| ", "Total", "Average", "Max", "Min"
420         WRITE (UNIT=unit_nr, &
421                FMT="(A,T40,I12,T55,F12.1,T70,I12,T85,I10)") &
422            " FILTER_MAT_DIAG|   Atomic Matrices", &
423            natoms, ave_natoms, max_natoms, min_natoms
424         WRITE (UNIT=unit_nr, &
425                FMT="(A,T40,D12.7,T55,D12.7,T70,D12.7,T85,D12.7)") &
426            " FILTER_MAT_DIAG|   Cost*", &
427            total_cost, ave_cost, max_cost, min_cost
428         WRITE (UNIT=unit_nr, FMT="(A)") &
429            " FILTER_MAT_DIAG| (* cost is calculated as sum of cube of atomic matrix sizes)"
430      END IF
431      CALL cp_print_key_finished_output(unit_nr, logger, scf_section, &
432                                        "PRINT%FILTER_MATRIX")
433   END SUBROUTINE fb_distribution_write_info
434
435! **************************************************************************************************
436!> \brief Build the preferred list of processors for atoms
437!> \param dbcsr_mat   : the reference DBCSR matrix, from which the local block
438!>                      cols and the processor maps are obtained
439!> \param natoms      : total number of atoms globally
440!> \param preferred_procs_set : set of preferred procs list for each atom
441!> \param common_set_ids : atoms (block cols) local to the same processor grid
442!>                         col will have the same preferred list. This list
443!>                         maps each atom to their corresponding group
444!> \param n_common_sets  : number of unique preferred lists (groups)
445!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
446! **************************************************************************************************
447   SUBROUTINE fb_build_preferred_procs(dbcsr_mat, &
448                                       natoms, &
449                                       preferred_procs_set, &
450                                       common_set_ids, &
451                                       n_common_sets)
452      TYPE(dbcsr_type), POINTER                          :: dbcsr_mat
453      INTEGER, INTENT(IN)                                :: natoms
454      TYPE(fb_preferred_procs_list), DIMENSION(:), &
455         INTENT(INOUT)                                   :: preferred_procs_set
456      INTEGER, DIMENSION(:), INTENT(OUT)                 :: common_set_ids
457      INTEGER, INTENT(OUT)                               :: n_common_sets
458
459      CHARACTER(len=*), PARAMETER :: routineN = 'fb_build_preferred_procs', &
460         routineP = moduleN//':'//routineN
461
462      INTEGER                                            :: icol, nprows, pcol, prow
463      INTEGER, DIMENSION(:), POINTER                     :: col_dist
464      INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
465      LOGICAL                                            :: check_ok
466      TYPE(dbcsr_distribution_type)                      :: dbcsr_dist
467
468      check_ok = natoms .LE. dbcsr_nblkcols_total(dbcsr_mat)
469      CPASSERT(check_ok)
470      check_ok = SIZE(preferred_procs_set) .GE. natoms
471      CPASSERT(check_ok)
472      check_ok = SIZE(common_set_ids) .GE. natoms
473      CPASSERT(check_ok)
474
475      CALL dbcsr_get_info(dbcsr_mat, distribution=dbcsr_dist, proc_col_dist=col_dist)
476      CALL dbcsr_distribution_get(dbcsr_dist, pgrid=pgrid, nprows=nprows, npcols=n_common_sets)
477
478      DO icol = 1, natoms
479         IF (ASSOCIATED(preferred_procs_set(icol)%list)) THEN
480            DEALLOCATE (preferred_procs_set(icol)%list)
481         END IF
482         ALLOCATE (preferred_procs_set(icol)%list(nprows))
483         pcol = col_dist(icol)
484         ! dbcsr prow and pcol counts from 0
485         DO prow = 0, nprows - 1
486            ! here, we count processes from 1, so +1 from mpirank
487            preferred_procs_set(icol)%list(prow + 1) = pgrid(prow, pcol) + 1
488         END DO
489         preferred_procs_set(icol)%nprocs = nprows
490      END DO
491
492      common_set_ids(:) = 0
493      common_set_ids(1:natoms) = col_dist(1:natoms) + 1
494
495   END SUBROUTINE fb_build_preferred_procs
496
497! **************************************************************************************************
498!> \brief Release a preferred_procs_list
499!> \param preferred_procs_list  : the preferred procs list in question
500!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
501! **************************************************************************************************
502   SUBROUTINE fb_preferred_procs_list_release(preferred_procs_list)
503      TYPE(fb_preferred_procs_list), INTENT(INOUT)       :: preferred_procs_list
504
505      CHARACTER(len=*), PARAMETER :: routineN = 'fb_preferred_procs_list_release', &
506         routineP = moduleN//':'//routineN
507
508      IF (ASSOCIATED(preferred_procs_list%list)) THEN
509         DEALLOCATE (preferred_procs_list%list)
510      END IF
511   END SUBROUTINE fb_preferred_procs_list_release
512
513! **************************************************************************************************
514!> \brief Convert distribution data to 1D array containing information of
515!>        which atoms are distributed to which processor
516!> \param dist_set    : set of distributions for the processors
517!> \param local_atoms : continuous array of atoms arranged in order
518!>                      corresponding their allocated processors
519!> \param local_atoms_starts : starting position in local_atoms array for
520!>                             each processor
521!> \param local_atoms_sizes  : number of atoms local to each processor
522!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
523! **************************************************************************************************
524   SUBROUTINE fb_distribution_to_local_atoms(dist_set, &
525                                             local_atoms, &
526                                             local_atoms_starts, &
527                                             local_atoms_sizes)
528      TYPE(fb_distribution_list), DIMENSION(:), &
529         INTENT(IN)                                      :: dist_set
530      INTEGER, DIMENSION(:), INTENT(OUT)                 :: local_atoms, local_atoms_starts, &
531                                                            local_atoms_sizes
532
533      CHARACTER(len=*), PARAMETER :: routineN = 'fb_distribution_to_local_atoms', &
534         routineP = moduleN//':'//routineN
535
536      INTEGER                                            :: iatom, ipe, n_procs, pos
537      LOGICAL                                            :: check_ok
538
539      n_procs = SIZE(dist_set)
540
541      check_ok = SIZE(local_atoms_starts) .GE. n_procs
542      CPASSERT(check_ok)
543      check_ok = SIZE(local_atoms_sizes) .GE. n_procs
544      CPASSERT(check_ok)
545
546      local_atoms(:) = 0
547      local_atoms_starts(:) = 0
548      local_atoms_sizes(:) = 0
549
550      pos = 1
551      DO ipe = 1, n_procs
552         local_atoms_starts(ipe) = pos
553         DO iatom = 1, dist_set(ipe)%nelements
554            local_atoms(pos) = dist_set(ipe)%list(iatom)%id
555            pos = pos + 1
556            local_atoms_sizes(ipe) = local_atoms_sizes(ipe) + 1
557         END DO
558      END DO
559   END SUBROUTINE fb_distribution_to_local_atoms
560
561! **************************************************************************************************
562!> \brief Initialise a distribution
563!> \param dist        : the distribution in question
564!> \param nmax        : [OPTIONAL] size of the list array to be allocated
565!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
566! **************************************************************************************************
567   SUBROUTINE fb_distribution_init(dist, nmax)
568      TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
569      INTEGER, INTENT(IN), OPTIONAL                      :: nmax
570
571      CHARACTER(len=*), PARAMETER :: routineN = 'fb_distribution_init', &
572         routineP = moduleN//':'//routineN
573
574      INTEGER                                            :: ii, my_nmax
575
576      my_nmax = 0
577      IF (PRESENT(nmax)) my_nmax = nmax
578      IF (ASSOCIATED(dist%list)) THEN
579         DEALLOCATE (dist%list)
580      END IF
581      NULLIFY (dist%list)
582      IF (my_nmax .GT. 0) THEN
583         ALLOCATE (dist%list(my_nmax))
584         DO ii = 1, SIZE(dist%list)
585            dist%list(ii)%id = 0
586            dist%list(ii)%cost = 0.0_dp
587         END DO
588      END IF
589      dist%nelements = 0
590      dist%cost = 0.0_dp
591   END SUBROUTINE fb_distribution_init
592
593! **************************************************************************************************
594!> \brief Resize the list array in a distribution
595!> \param dist        : The distribution in question
596!> \param nmax        : new size of the list array
597!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
598! **************************************************************************************************
599   SUBROUTINE fb_distribution_resize(dist, nmax)
600      TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
601      INTEGER, INTENT(IN)                                :: nmax
602
603      CHARACTER(len=*), PARAMETER :: routineN = 'fb_distribution_resize', &
604         routineP = moduleN//':'//routineN
605
606      INTEGER                                            :: ii, my_nmax
607      TYPE(fb_distribution_element), DIMENSION(:), &
608         POINTER                                         :: new_list
609
610      IF (.NOT. ASSOCIATED(dist%list)) THEN
611         my_nmax = MAX(nmax, 1)
612         ALLOCATE (dist%list(my_nmax))
613      ELSE
614         my_nmax = MAX(nmax, dist%nelements)
615         ALLOCATE (new_list(my_nmax))
616         DO ii = 1, SIZE(new_list)
617            new_list(ii)%id = 0
618            new_list(ii)%cost = 0.0_dp
619         END DO
620         DO ii = 1, dist%nelements
621            new_list(ii) = dist%list(ii)
622         END DO
623         DEALLOCATE (dist%list)
624         dist%list => new_list
625      END IF
626   END SUBROUTINE fb_distribution_resize
627
628! **************************************************************************************************
629!> \brief Add an atom (element) to a distribution
630!> \param dist        : the distribution in question
631!> \param element     : the element to be added
632!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
633! **************************************************************************************************
634   SUBROUTINE fb_distribution_add(dist, element)
635      TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
636      TYPE(fb_distribution_element), INTENT(IN)          :: element
637
638      CHARACTER(len=*), PARAMETER :: routineN = 'fb_distribution_add', &
639         routineP = moduleN//':'//routineN
640
641      INTEGER                                            :: ii, new_nelements, pos
642
643      new_nelements = dist%nelements + 1
644
645      ! resize list if necessary
646      IF (.NOT. ASSOCIATED(dist%list)) THEN
647         CALL fb_distribution_resize(dist, new_nelements)
648      ELSE IF (new_nelements*ENLARGE_RATIO .GT. SIZE(dist%list)) THEN
649         CALL fb_distribution_resize(dist, SIZE(dist%list)*EXPAND_FACTOR)
650      END IF
651      ! assuming the list of elements is always sorted with respect to cost
652      ! slot the new element into the appropriate spot
653      IF (new_nelements == 1) THEN
654         dist%list(1) = element
655      ELSE
656         pos = fb_distribution_find_slot(dist, element)
657         DO ii = dist%nelements, pos, -1
658            dist%list(ii + 1) = dist%list(ii)
659         END DO
660         dist%list(pos) = element
661      END IF
662      dist%nelements = new_nelements
663      dist%cost = dist%cost + element%cost
664   END SUBROUTINE fb_distribution_add
665
666! **************************************************************************************************
667!> \brief Find the correct slot in the list array to add a new element, so that
668!>        the list will always be ordered with respect to cost
669!> \param dist        : the distribution in question
670!> \param element     : element to be added
671!> \return : the correct position to add the new element
672!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
673! **************************************************************************************************
674   PURE FUNCTION fb_distribution_find_slot(dist, element) RESULT(pos)
675      TYPE(fb_distribution_list), INTENT(IN)             :: dist
676      TYPE(fb_distribution_element), INTENT(IN)          :: element
677      INTEGER                                            :: pos
678
679      INTEGER                                            :: lower, middle, N, upper
680
681      N = dist%nelements
682      IF (element%cost .LT. dist%list(1)%cost) THEN
683         pos = 1
684         RETURN
685      END IF
686      IF (element%cost .GE. dist%list(N)%cost) THEN
687         pos = N + 1
688         RETURN
689      END IF
690      lower = 1
691      upper = N
692      DO WHILE ((upper - lower) .GT. 1)
693         middle = (lower + upper)/2
694         IF (element%cost .LT. dist%list(middle)%cost) THEN
695            upper = middle
696         ELSE
697            lower = middle
698         END IF
699      END DO
700      pos = upper
701   END FUNCTION fb_distribution_find_slot
702
703! **************************************************************************************************
704!> \brief Remove the pos-th element from a distribution
705!> \param dist        : the distribution in question
706!> \param pos         : index of the element in the list array
707!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
708! **************************************************************************************************
709   SUBROUTINE fb_distribution_remove_ind(dist, pos)
710      TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
711      INTEGER, INTENT(IN)                                :: pos
712
713      CHARACTER(len=*), PARAMETER :: routineN = 'fb_distribution_remove_ind', &
714         routineP = moduleN//':'//routineN
715
716      INTEGER                                            :: ii
717      LOGICAL                                            :: check_ok
718
719      check_ok = pos .GT. 0
720      CPASSERT(check_ok)
721      IF (pos .LE. dist%nelements) THEN
722         dist%cost = dist%cost - dist%list(pos)%cost
723         DO ii = pos, dist%nelements - 1
724            dist%list(ii) = dist%list(ii + 1)
725         END DO
726         dist%list(dist%nelements)%id = 0
727         dist%list(dist%nelements)%cost = 0.0_dp
728         dist%nelements = dist%nelements - 1
729         ! auto resize if required
730         IF (dist%nelements*REDUCE_RATIO .LT. SIZE(dist%list)) THEN
731            CALL fb_distribution_resize(dist, dist%nelements/SHRINK_FACTOR)
732         END IF
733      END IF
734   END SUBROUTINE fb_distribution_remove_ind
735
736! **************************************************************************************************
737!> \brief Remove a given element from a distribution
738!> \param dist        : the distribution in question
739!> \param element     : the element in question
740!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
741! **************************************************************************************************
742   SUBROUTINE fb_distribution_remove_el(dist, element)
743      TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
744      TYPE(fb_distribution_element), INTENT(IN)          :: element
745
746      CHARACTER(len=*), PARAMETER :: routineN = 'fb_distribution_remove_el', &
747         routineP = moduleN//':'//routineN
748
749      INTEGER                                            :: ii, pos
750
751      pos = dist%nelements + 1
752      DO ii = 1, dist%nelements
753         IF (element%id == dist%list(ii)%id) THEN
754            pos = ii
755            EXIT
756         END IF
757      END DO
758      CALL fb_distribution_remove_ind(dist, pos)
759   END SUBROUTINE fb_distribution_remove_el
760
761! **************************************************************************************************
762!> \brief Move the pos-th element from a distribution to another
763!> \param dist_from   : the source distribution
764!> \param pos         : index of the element in the source distribution
765!> \param dist_to     : the destination distribution
766!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
767! **************************************************************************************************
768   SUBROUTINE fb_distribution_move_ind(dist_from, pos, dist_to)
769      TYPE(fb_distribution_list), INTENT(INOUT)          :: dist_from
770      INTEGER, INTENT(IN)                                :: pos
771      TYPE(fb_distribution_list), INTENT(INOUT)          :: dist_to
772
773      CHARACTER(len=*), PARAMETER :: routineN = 'fb_distribution_move_ind', &
774         routineP = moduleN//':'//routineN
775
776      LOGICAL                                            :: check_ok
777      TYPE(fb_distribution_element)                      :: element
778
779      check_ok = ASSOCIATED(dist_from%list)
780      CPASSERT(check_ok)
781      check_ok = pos .LE. dist_from%nelements
782      CPASSERT(check_ok)
783      element = dist_from%list(pos)
784      CALL fb_distribution_add(dist_to, element)
785      CALL fb_distribution_remove(dist_from, pos)
786   END SUBROUTINE fb_distribution_move_ind
787
788! **************************************************************************************************
789!> \brief Move a given element from a distribution to another
790!> \param dist_from   : the source distribution
791!> \param element     : the element in question
792!> \param dist_to     : the destination distribution
793!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
794! **************************************************************************************************
795   SUBROUTINE fb_distribution_move_el(dist_from, element, dist_to)
796      TYPE(fb_distribution_list), INTENT(INOUT)          :: dist_from
797      TYPE(fb_distribution_element), INTENT(IN)          :: element
798      TYPE(fb_distribution_list), INTENT(INOUT)          :: dist_to
799
800      CHARACTER(len=*), PARAMETER :: routineN = 'fb_distribution_move_el', &
801         routineP = moduleN//':'//routineN
802
803      LOGICAL                                            :: check_ok
804
805      check_ok = ASSOCIATED(dist_from%list)
806      CPASSERT(check_ok)
807      CALL fb_distribution_add(dist_to, element)
808      CALL fb_distribution_remove(dist_from, element)
809   END SUBROUTINE fb_distribution_move_el
810
811! **************************************************************************************************
812!> \brief Release a distribution
813!> \param dist  : the distribution in question
814!> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
815! **************************************************************************************************
816   SUBROUTINE fb_distribution_release(dist)
817      TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
818
819      CHARACTER(len=*), PARAMETER :: routineN = 'fb_distribution_release', &
820         routineP = moduleN//':'//routineN
821
822      IF (ASSOCIATED(dist%list)) THEN
823         DEALLOCATE (dist%list)
824      END IF
825   END SUBROUTINE fb_distribution_release
826
827END MODULE qs_fb_distribution_methods
828