1!================================================================================ 2! 3! Modules: 4! 5! (1) peinfo_m Originally by DAS 8/20/2010 6! 7! Defines type and global instance of object for "processor equivalent" info. 8! Use mpi module to define interfaces for MPI calls. 9! [For F77, MPI header 'mpif.h' was included.] 10! 11!================================================================================ 12 13#include "f_defs.h" 14 15module peinfo_m 16 17#ifdef MPI 18 use mpi 19! include "mpif.h" -- the old way, which will not provide interfaces 20#endif 21 use nrtype_m 22 use intrinsics_m 23 implicit none 24 25 ! default must not be private, or else the types defined in mpi module will not be available. 26 27 public :: & 28 peinfo, & 29 peinfo_init, & 30 create_mpi_group, & 31 get_thread_id 32 33!------------------------------- 34 35 type peinfo 36 !> default values for serial 37 integer :: npes = 1 38 integer :: npes_freqgrp = 1 39 integer :: nthreads = 1 40 integer :: nthreads_sort = 1 !< Set with `export BGW_NUM_THREADS_SORT=<num>` 41 integer :: inode = 0 42 !> Verbosity level, not to be used directly. Use the verbosity flags instead. 43 integer :: verbosity=1 44 logical :: verb_medium=.false. 45 logical :: verb_high=.false. 46 logical :: verb_log=.false. 47 logical :: verb_debug=.false. 48 logical :: verb_max=.false. 49 !> initialize to zero, then keep track of memory 50 real(DP) :: mymem = 0d0 51 real(DP) :: mymaxmem = 0d0 52 integer :: nckmem 53 integer :: nkpe !< number of k-points per processor, used in absorption only 54 !> kernel: total number of block-transitions ( nk^2, (nk*nc)^2 or (nk*nc*nv)^2) 55 !! Each block-transition has iholdperown 56 integer :: nck 57 !> kernel: number of block-transitions that I own 58 integer :: nckpe 59 integer :: myown !< Kernel: number of unique (k,kp) pairs I own; BSE: number of blocks I own 60 integer :: mypown !< in BSE, number of unprimed indices I own for all my blocks 61 integer :: npown !< in BSE, max number of unprimed indices owned by any proc in my pool 62 integer :: jobtypeeval !< Coulomb potential generator 63 !! 0 = "dumb generator" ; 1 = "smart consumer" 64 !! See vcoul_generator_m for more details 65 !> BSE: number of blocks I own in the one-dimentional block-cyclic distributed 66 !! matrices hmtx_a/evecs_r. 67 integer :: nblocks 68 !> BSE: size of each block in the one-dimentional block-cyclic distributed 69 !! matrices hmtx_a/evecs_r = ns*nc_block*nv_block, which varies according to ipar. 70 integer :: block_sz 71 !> kernel: (nv,nc,nk,nv,nc,nk) offset in the bse_matrix for the 72 !! block-transition identified by (ivp,icp,ikp,iv,ic,ik) 73 integer, pointer :: wown(:,:,:,:,:,:) 74 integer, pointer :: ciown(:) 75 integer, pointer :: ik(:,:) !< (inode,j) index of jth k owned by inode 76 integer, pointer :: ic(:,:) !< (inode,j) index of jth cband owned by inode 77 integer, pointer :: iv(:,:) !< (inode,j) index of jth vband owned by inode 78 integer, pointer :: ikp(:,:) !< (inode,j) index of jth kp owned by inode 79 integer, pointer :: icp(:,:) !< (inode,j) index of jth cpband owned by inode 80 integer, pointer :: ivp(:,:) !< (inode,j) index of jth vpband owned by inode 81 integer, pointer :: ib(:,:) 82 integer, pointer :: ick(:,:) 83 integer, pointer :: ipe(:) 84 !> (inode,iv,ik) Maps the global index for valence band (iv) at kpt (ik) to 85 !! the local list of valence band the proc owns. (ik) is defined in the 86 !! reducible wedge. ipec is 0 if the proc doesn`t own that band/kpt 87 integer, pointer :: ipec(:,:,:) 88 integer, pointer :: ipev(:,:,:) !< See ipec 89 integer, pointer :: ipek(:,:) !< Same as ipec, but w/o band index 90 integer, pointer :: ipekq(:,:) !< Local index of valence band k-points only used 91 !< for finite momemtnum calculations 92 integer, pointer :: ipecb(:,:) 93 integer, pointer :: ivckpe(:) 94 !> (npes) Number of k-points in the full fine grid that each processors owns. 95 !! This parallelization is only used for the WFN interpolation in BSE, and 96 !! it has nothing to do with the ikt array used in the hbse_a matrix. 97 integer, pointer :: ikt(:) 98 !> (npes) Number of block-columns of the hbse_a matrix each processors owns. 99 !! Used in BSE only. The size of each block is block_sz. 100 integer, pointer :: ibt(:) 101 !> (nblocks) ikb(ib) is the k-point associated to the ib-th block of the 102 !! distributed BSE Hamiltonian that I own. 103 integer, pointer :: ikb(:) 104 !> (nblocks) icb(ib) is the cond band associated to the ib-th block of the 105 !! distributed BSE Hamiltonian that I own. Used only if ipar==2 or ipar==3. 106 integer, pointer :: icb(:) 107 !> (nblocks) ivb(ib) is the val band associated to the ib-th block of the 108 !! distributed BSE Hamiltonian that I own. Used only if ipar==3. 109 integer, pointer :: ivb(:) 110 !> Number of cond bands in each block of the distributed BSE Hamiltonian. 111 !! This is xct%ncb_fi for ipar<2, and 1 for ipar>=2 112 integer :: nc_block 113 !> Number of val bands in each block of the distributed BSE Hamiltonian. 114 !! This is xct%nvb_fi for ipar<3, and 1 for ipar>=3 115 integer :: nv_block 116 integer, pointer :: neig(:) 117 integer, pointer :: peig(:,:) 118 integer :: npools !< number of pools for the valence bands in Epsilon or outer bands in sigma 119 integer :: npes_pool !< number of processors per pool 120 integer :: pool_group !< mpi_group for pools 121 integer :: pool_comm !< mpi_comm for pools 122 integer :: pool_rank !< rank within pool 123 integer :: my_pool !< what pool this processor is in 124 integer :: nvownmax !< max. number of valence bands that I can own 125 integer :: ncownmax !< max. number of conduction bands that I can own 126 integer :: nvownactual !< (total) number of valence bands that I *really* own 127 integer :: ncownactual !< (total) number of conduction bands that I *really* own 128 !> Who owns a particular pair of bands (v,c)? 129 integer, pointer :: global_pairowner(:,:) 130 !> (total) number of valence bands that a particular MPI process owns 131 integer, pointer :: global_nvown(:) 132 !> (total) number of conduction bands that a particular MPI process owns 133 integer, pointer :: global_ncown(:) 134 !> indexv(i) is the local index (in terms of bands that I own) of the ith 135 !! (global) valence band. It is zero if I don`t own valence band #i. 136 integer, pointer :: indexv(:) 137 integer, pointer :: global_indexv(:,:) !< local indices for all processes 138 integer, pointer :: indexc(:) !< see indexv 139 !> Given a local band #i that I own, invindexv(i) is the global index of 140 !! that band. If i>nvownt, the result is zero. 141 integer, pointer :: invindexv(:) 142 integer, pointer :: invindexc(:) !< see invindexv 143 logical, pointer :: doiownv(:) !< do I own a particular valence band? 144 logical, pointer :: doiownc(:) !< do I own a particular conduction band? 145 logical, pointer :: does_it_ownc(:,:) !< (band,node) does a particular node own a cond. band? 146 logical, pointer :: does_it_ownv(:,:) !< (band,node) does a particular node own a val. band? 147 integer, pointer :: iownwfv(:) !< number of val. WFNs each proc. owns 148 integer, pointer :: iownwfc(:) !< number of cond WFNs each proc. owns 149 integer, pointer :: iownwfk(:) !< number of distinct k-points each proc. (partially) owns 150 integer, pointer :: iownwfkq(:) !< Same as iownwfk, but refers to k+Q point when using finite momentum Q 151 integer, pointer :: nxqown(:) 152 integer, pointer :: nxqi(:) 153 integer :: ndiag_max 154 integer :: noffdiag_max 155 integer :: ntband_max 156 integer :: ntband_node 157 integer :: nvband_node 158 integer, pointer :: indext(:) 159 integer, pointer :: ntband_dist(:) 160 integer, pointer :: indext_dist(:,:) 161 integer, pointer :: index_diag(:) 162 logical, pointer :: flag_diag(:) 163 integer, pointer :: index_offdiag(:) 164 logical, pointer :: flag_offdiag(:) 165 !> Parallel frequencies mpi group variables 166 !! igroup = your group number 167 !! rank = your processor number in your group 168 !! _f = frequency evaluation group 169 !! _mtxel = matrix element communication group 170 integer :: igroup_f 171 integer :: rank_f 172 integer :: igroup_mtxel 173 integer :: rank_mtxel 174 integer :: mtxel_comm !< mtxel group communicator 175 integer :: freq_comm !< frequency group communicator 176 integer :: npes_orig !< original number of processors 177 !! for when nfreq_group does not 178 !! divide total number of procs 179 integer :: mtxel_group !< mtxel group handle 180 integer :: freq_group !< frequency group handle 181 integer, pointer :: ranks(:) !< ranks of processors to include in mpi group 182 logical :: check_norms=.true. !< Whether to check norms, .true. unless doing pseudobands 183 end type peinfo 184 185 type(peinfo), save, public :: peinf 186#ifdef MPI 187 integer, public :: mpistatus(MPI_STATUS_SIZE) 188 integer, public :: mpierr 189#endif 190 191contains 192 193 194 !> FHJ: Set verbosity flags, such as peinf%verb_medium, based on peinf%verbosity. 195 !! Note that verbosity flags are cumulative. 196 subroutine peinfo_set_verbosity() 197 character(len=8) :: verb_str(6) 198 ! cannot use push_pop because that module uses this one 199 200 if (peinf%verbosity<1) peinf%verbosity = 1 201 if (peinf%verbosity>6) peinf%verbosity = 6 202 if (peinf%verbosity>=2) peinf%verb_medium = .true. 203 if (peinf%verbosity>=3) peinf%verb_high = .true. 204 if (peinf%verbosity>=4) peinf%verb_log = .true. 205 if (peinf%verbosity>=5) peinf%verb_debug = .true. 206 if (peinf%verbosity>=6) peinf%verb_max = .true. 207#ifdef VERBOSE 208 ! FHJ: -DVERBOSE flag overwrites everything. This is useful for buildbots. 209 peinf%verb_medium = .true. 210 peinf%verb_high = .true. 211 peinf%verb_log = .true. 212 peinf%verb_debug = .true. 213 peinf%verb_max = .true. 214#endif 215 if (peinf%inode==0) then 216 verb_str(1) = "default" 217 verb_str(2) = "medium" 218 verb_str(3) = "high" 219 verb_str(4) = "log" 220 verb_str(5) = "debug" 221 verb_str(6) = "max" 222 write(6,'(1x,a,i0,3a/)') 'Running with verbosity level ', & 223 peinf%verbosity,' (', trim(verb_str(peinf%verbosity)), ').' 224 if (peinf%verbosity>3) then 225 write(0,'(/a)') 'WARNING: you are running the calculation with a high level of verbosity.' 226 write(0,'(a/)') 'This will impact the performance of the code.' 227 endif 228 endif 229 230 end subroutine peinfo_set_verbosity 231 232 233 subroutine peinfo_init() 234 ! cannot use push_pop because that module uses this one 235 236#ifdef MPI 237 call MPI_Init(mpierr) 238 if(mpierr .ne. MPI_SUCCESS) then 239 write(0,'(a)') 'ERROR: MPI initialization failed!' 240 stop 999 241 endif 242 call MPI_Comm_rank(MPI_COMM_WORLD, peinf%inode, mpierr) 243 call MPI_Comm_size(MPI_COMM_WORLD, peinf%npes, mpierr) 244#endif 245 246#ifdef OMP 247!$OMP PARALLEL 248 peinf%nthreads = OMP_GET_NUM_THREADS() 249!$OMP END PARALLEL 250 251! Why put OMP pragmas here? 252! JRD: I want to make sure our code has a parallel region before that of any library. This affects 253! performance when the libraries are using a different implementation of threads or OpenMP build. 254#endif 255 256! if serial, default values set in type peinfo above are left alone 257 258 return 259 end subroutine peinfo_init 260 261 262 subroutine create_mpi_group(orig_group,group_size,ranks,group_handle,group_comm) 263 integer, intent(in) :: orig_group !< Handle for original MPI group, which you are breaking into smaller groups 264 integer,intent(in) :: group_size !< number of processors in new mpi group 265 integer,intent(in) :: ranks(:) !< (group_size) array specifying ranks of processors to include in MPI group 266 integer,intent(out) :: group_handle !< handle for new MPI group 267 integer,intent(out) :: group_comm !< communicator for new MPI group 268 269#ifdef MPI 270! DVF : create new group from original group, using ranks specified in `ranks` array 271 call MPI_Group_incl(orig_group, group_size,ranks(1:group_size), group_handle, mpierr) 272 if(mpierr .ne. MPI_SUCCESS) write(0,'(a)') "ERROR: mpi_group_incl failed!" 273! DVF : create communicator for new group 274 call MPI_Comm_create(MPI_COMM_WORLD,group_handle,group_comm,mpierr) 275 if(mpierr .ne. MPI_SUCCESS) write(0,'(a)') "ERROR: mpi_comm_create failed!" 276#else 277 group_handle = -1 278 group_comm = -1 279#endif 280 281 return 282 end subroutine create_mpi_group 283 284 285 !> FHJ: Gets the thread number of the caller. Should be called inside an OMP 286 !! construct. Returns 0 if code was compiled without OMP support. 287 integer function get_thread_id() 288#ifdef OMP 289 integer, external :: omp_get_thread_num 290#endif 291 292 ! cannot use push_pop because that module uses this one 293 294#ifdef OMP 295 get_thread_id = omp_get_thread_num() 296#else 297 get_thread_id = 0 298#endif 299 300 ! cannot use push_pop because that module uses this one 301 302 end function get_thread_id 303 304end module peinfo_m 305