1!================================================================================
2!
3! Modules:
4!
5! (1) peinfo_m      Originally by DAS 8/20/2010
6!
7!     Defines type and global instance of object for "processor equivalent" info.
8!     Use mpi module to define interfaces for MPI calls.
9!     [For F77, MPI header 'mpif.h' was included.]
10!
11!================================================================================
12
13#include "f_defs.h"
14
15module peinfo_m
16
17#ifdef MPI
18  use mpi
19! include "mpif.h" -- the old way, which will not provide interfaces
20#endif
21  use nrtype_m
22  use intrinsics_m
23  implicit none
24
25  ! default must not be private, or else the types defined in mpi module will not be available.
26
27  public ::      &
28    peinfo,      &
29    peinfo_init, &
30    create_mpi_group, &
31    get_thread_id
32
33!-------------------------------
34
35  type peinfo
36    !> default values for serial
37    integer :: npes = 1
38    integer :: npes_freqgrp = 1
39    integer :: nthreads = 1
40    integer :: nthreads_sort = 1 !< Set with `export BGW_NUM_THREADS_SORT=<num>`
41    integer :: inode = 0
42    !> Verbosity level, not to be used directly. Use the verbosity flags instead.
43    integer :: verbosity=1
44    logical :: verb_medium=.false.
45    logical :: verb_high=.false.
46    logical :: verb_log=.false.
47    logical :: verb_debug=.false.
48    logical :: verb_max=.false.
49    !> initialize to zero, then keep track of memory
50    real(DP) :: mymem = 0d0
51    real(DP) :: mymaxmem = 0d0
52    integer :: nckmem
53    integer :: nkpe  !< number of k-points per processor, used in absorption only
54    !> kernel: total number of block-transitions ( nk^2, (nk*nc)^2 or (nk*nc*nv)^2)
55    !! Each block-transition has iholdperown
56    integer :: nck
57    !> kernel: number of block-transitions that I own
58    integer :: nckpe
59    integer :: myown !< Kernel: number of unique (k,kp) pairs I own; BSE: number of blocks I own
60    integer :: mypown !< in BSE, number of unprimed indices I own for all my blocks
61    integer :: npown !< in BSE, max number of unprimed indices owned by any proc in my pool
62    integer :: jobtypeeval  !< Coulomb potential generator
63                            !! 0 = "dumb generator" ; 1 = "smart consumer"
64                            !! See vcoul_generator_m for more details
65    !> BSE: number of blocks I own in the one-dimentional block-cyclic distributed
66    !! matrices hmtx_a/evecs_r.
67    integer :: nblocks
68    !> BSE: size of each block in the one-dimentional block-cyclic distributed
69    !! matrices hmtx_a/evecs_r = ns*nc_block*nv_block, which varies according to ipar.
70    integer :: block_sz
71    !> kernel: (nv,nc,nk,nv,nc,nk) offset in the bse_matrix for the
72    !! block-transition identified by (ivp,icp,ikp,iv,ic,ik)
73    integer, pointer :: wown(:,:,:,:,:,:)
74    integer, pointer :: ciown(:)
75    integer, pointer :: ik(:,:) !< (inode,j) index of jth k owned by inode
76    integer, pointer :: ic(:,:) !< (inode,j) index of jth cband owned by inode
77    integer, pointer :: iv(:,:) !< (inode,j) index of jth vband owned by inode
78    integer, pointer :: ikp(:,:) !< (inode,j) index of jth kp owned by inode
79    integer, pointer :: icp(:,:) !< (inode,j) index of jth cpband owned by inode
80    integer, pointer :: ivp(:,:) !< (inode,j) index of jth vpband owned by inode
81    integer, pointer :: ib(:,:)
82    integer, pointer :: ick(:,:)
83    integer, pointer :: ipe(:)
84    !> (inode,iv,ik) Maps the global index for valence band (iv) at kpt (ik) to
85    !! the local list of valence band the proc owns. (ik) is defined in the
86    !! reducible wedge. ipec is 0 if the proc doesn`t own that band/kpt
87    integer, pointer :: ipec(:,:,:)
88    integer, pointer :: ipev(:,:,:) !< See ipec
89    integer, pointer :: ipek(:,:)   !< Same as ipec, but w/o band index
90    integer, pointer :: ipekq(:,:)  !< Local index of valence band k-points only used
91                                    !< for finite momemtnum calculations
92    integer, pointer :: ipecb(:,:)
93    integer, pointer :: ivckpe(:)
94    !> (npes) Number of k-points in the full fine grid that each processors owns.
95    !! This parallelization is only used for the WFN interpolation in BSE, and
96    !! it has nothing to do with the ikt array used in the hbse_a matrix.
97    integer, pointer :: ikt(:)
98    !> (npes) Number of block-columns of the hbse_a matrix each processors owns.
99    !! Used in BSE only. The size of each block is block_sz.
100    integer, pointer :: ibt(:)
101    !> (nblocks) ikb(ib) is the k-point associated to the ib-th block of the
102    !! distributed BSE Hamiltonian that I own.
103    integer, pointer :: ikb(:)
104    !> (nblocks) icb(ib) is the cond band associated to the ib-th block of the
105    !! distributed BSE Hamiltonian that I own. Used only if ipar==2 or ipar==3.
106    integer, pointer :: icb(:)
107    !> (nblocks) ivb(ib) is the val band associated to the ib-th block of the
108    !! distributed BSE Hamiltonian that I own. Used only if ipar==3.
109    integer, pointer :: ivb(:)
110    !> Number of cond bands in each block of the distributed BSE Hamiltonian.
111    !! This is xct%ncb_fi for ipar<2, and 1 for ipar>=2
112    integer :: nc_block
113    !> Number of val bands in each block of the distributed BSE Hamiltonian.
114    !! This is xct%nvb_fi for ipar<3, and 1 for ipar>=3
115    integer :: nv_block
116    integer, pointer :: neig(:)
117    integer, pointer :: peig(:,:)
118    integer :: npools !< number of pools for the valence bands in Epsilon or outer bands in sigma
119    integer :: npes_pool !< number of processors per pool
120    integer :: pool_group !< mpi_group for pools
121    integer :: pool_comm !< mpi_comm for pools
122    integer :: pool_rank !< rank within pool
123    integer :: my_pool !< what pool this processor is in
124    integer :: nvownmax  !< max. number of valence bands that I can own
125    integer :: ncownmax  !< max. number of conduction bands that I can own
126    integer :: nvownactual !< (total) number of valence bands that I *really* own
127    integer :: ncownactual !< (total) number of conduction bands that I *really* own
128    !> Who owns a particular pair of bands (v,c)?
129    integer, pointer :: global_pairowner(:,:)
130    !> (total) number of valence bands that a particular MPI process owns
131    integer, pointer :: global_nvown(:)
132    !> (total) number of conduction bands that a particular MPI process owns
133    integer, pointer :: global_ncown(:)
134    !> indexv(i) is the local index (in terms of bands that I own) of the ith
135    !! (global) valence band. It is zero if I don`t own valence band #i.
136    integer, pointer :: indexv(:)
137    integer, pointer :: global_indexv(:,:) !< local indices for all processes
138    integer, pointer :: indexc(:) !< see indexv
139    !> Given a local band #i that I own, invindexv(i) is the global index of
140    !! that band. If i>nvownt, the result is zero.
141    integer, pointer :: invindexv(:)
142    integer, pointer :: invindexc(:) !< see invindexv
143    logical, pointer :: doiownv(:) !< do I own a particular valence band?
144    logical, pointer :: doiownc(:) !< do I own a particular conduction band?
145    logical, pointer :: does_it_ownc(:,:) !< (band,node) does a particular node own a cond. band?
146    logical, pointer :: does_it_ownv(:,:) !< (band,node) does a particular node own a val. band?
147    integer, pointer :: iownwfv(:) !< number of val. WFNs each proc. owns
148    integer, pointer :: iownwfc(:) !< number of cond WFNs each proc. owns
149    integer, pointer :: iownwfk(:) !< number of distinct k-points each proc. (partially) owns
150    integer, pointer :: iownwfkq(:) !< Same as iownwfk, but refers to k+Q point when using finite momentum Q
151    integer, pointer :: nxqown(:)
152    integer, pointer :: nxqi(:)
153    integer :: ndiag_max
154    integer :: noffdiag_max
155    integer :: ntband_max
156    integer :: ntband_node
157    integer :: nvband_node
158    integer, pointer :: indext(:)
159    integer, pointer :: ntband_dist(:)
160    integer, pointer :: indext_dist(:,:)
161    integer, pointer :: index_diag(:)
162    logical, pointer :: flag_diag(:)
163    integer, pointer :: index_offdiag(:)
164    logical, pointer :: flag_offdiag(:)
165    !> Parallel frequencies mpi group variables
166    !! igroup = your group number
167    !! rank = your processor number in your group
168    !! _f = frequency evaluation group
169    !! _mtxel = matrix element communication group
170    integer :: igroup_f
171    integer :: rank_f
172    integer :: igroup_mtxel
173    integer :: rank_mtxel
174    integer :: mtxel_comm         !< mtxel group communicator
175    integer :: freq_comm          !< frequency group communicator
176    integer :: npes_orig          !< original number of processors
177                                  !! for when nfreq_group does not
178                                  !! divide total number of procs
179    integer :: mtxel_group        !< mtxel group handle
180    integer :: freq_group         !< frequency group handle
181    integer, pointer :: ranks(:)  !< ranks of processors to include in mpi group
182    logical :: check_norms=.true. !< Whether to check norms, .true. unless doing pseudobands
183  end type peinfo
184
185  type(peinfo), save, public :: peinf
186#ifdef MPI
187  integer, public :: mpistatus(MPI_STATUS_SIZE)
188  integer, public :: mpierr
189#endif
190
191contains
192
193
194  !> FHJ: Set verbosity flags, such as peinf%verb_medium, based on peinf%verbosity.
195  !! Note that verbosity flags are cumulative.
196  subroutine peinfo_set_verbosity()
197    character(len=8) :: verb_str(6)
198    ! cannot use push_pop because that module uses this one
199
200    if (peinf%verbosity<1) peinf%verbosity = 1
201    if (peinf%verbosity>6) peinf%verbosity = 6
202    if (peinf%verbosity>=2) peinf%verb_medium = .true.
203    if (peinf%verbosity>=3) peinf%verb_high = .true.
204    if (peinf%verbosity>=4) peinf%verb_log = .true.
205    if (peinf%verbosity>=5) peinf%verb_debug = .true.
206    if (peinf%verbosity>=6) peinf%verb_max = .true.
207#ifdef VERBOSE
208    ! FHJ: -DVERBOSE flag overwrites everything. This is useful for buildbots.
209    peinf%verb_medium = .true.
210    peinf%verb_high = .true.
211    peinf%verb_log = .true.
212    peinf%verb_debug = .true.
213    peinf%verb_max = .true.
214#endif
215    if (peinf%inode==0) then
216      verb_str(1) = "default"
217      verb_str(2) = "medium"
218      verb_str(3) = "high"
219      verb_str(4) = "log"
220      verb_str(5) = "debug"
221      verb_str(6) = "max"
222      write(6,'(1x,a,i0,3a/)') 'Running with verbosity level ', &
223        peinf%verbosity,' (', trim(verb_str(peinf%verbosity)), ').'
224      if (peinf%verbosity>3) then
225        write(0,'(/a)') 'WARNING: you are running the calculation with a high level of verbosity.'
226        write(0,'(a/)') 'This will impact the performance of the code.'
227      endif
228    endif
229
230  end subroutine peinfo_set_verbosity
231
232
233  subroutine peinfo_init()
234    ! cannot use push_pop because that module uses this one
235
236#ifdef MPI
237    call MPI_Init(mpierr)
238    if(mpierr .ne. MPI_SUCCESS) then
239      write(0,'(a)') 'ERROR: MPI initialization failed!'
240      stop 999
241    endif
242    call MPI_Comm_rank(MPI_COMM_WORLD, peinf%inode, mpierr)
243    call MPI_Comm_size(MPI_COMM_WORLD, peinf%npes, mpierr)
244#endif
245
246#ifdef OMP
247!$OMP PARALLEL
248    peinf%nthreads = OMP_GET_NUM_THREADS()
249!$OMP END PARALLEL
250
251! Why put OMP pragmas here?
252! JRD: I want to make sure our code has a parallel region before that of any library. This affects
253! performance when the libraries are using a different implementation of threads or OpenMP build.
254#endif
255
256! if serial, default values set in type peinfo above are left alone
257
258    return
259  end subroutine peinfo_init
260
261
262  subroutine create_mpi_group(orig_group,group_size,ranks,group_handle,group_comm)
263    integer, intent(in) :: orig_group    !< Handle for original MPI group, which you are breaking into smaller groups
264    integer,intent(in)  :: group_size    !< number of processors in new mpi group
265    integer,intent(in)  :: ranks(:)      !< (group_size) array specifying ranks of processors to include in MPI group
266    integer,intent(out) :: group_handle  !< handle for new MPI group
267    integer,intent(out) :: group_comm    !< communicator for new MPI group
268
269#ifdef MPI
270! DVF : create new group from original group, using ranks specified in `ranks` array
271    call MPI_Group_incl(orig_group, group_size,ranks(1:group_size), group_handle, mpierr)
272    if(mpierr .ne. MPI_SUCCESS) write(0,'(a)') "ERROR: mpi_group_incl failed!"
273! DVF : create communicator for new group
274    call MPI_Comm_create(MPI_COMM_WORLD,group_handle,group_comm,mpierr)
275    if(mpierr .ne. MPI_SUCCESS) write(0,'(a)') "ERROR: mpi_comm_create failed!"
276#else
277    group_handle = -1
278    group_comm = -1
279#endif
280
281    return
282  end subroutine create_mpi_group
283
284
285  !> FHJ: Gets the thread number of the caller. Should be called inside an OMP
286  !! construct. Returns 0 if code was compiled without OMP support.
287  integer function get_thread_id()
288#ifdef OMP
289    integer, external :: omp_get_thread_num
290#endif
291
292    ! cannot use push_pop because that module uses this one
293
294#ifdef OMP
295    get_thread_id = omp_get_thread_num()
296#else
297    get_thread_id = 0
298#endif
299
300    ! cannot use push_pop because that module uses this one
301
302  end function get_thread_id
303
304end module peinfo_m
305