1!=========================================================================== 2! 3! Modules: 4! 5! scalapack_m Originally By DAS 6! 7! Functions, types, and interfaces for ScaLAPACK/BLACS. 8! Interfaces are from http://www.netlib.org/scalapack/tools, double, complex16 9! and from http://www.netlib.org/blacs/BLACS/QRef.html (entered manually...) 10! Every ScaLAPACK/BLACS function used in the code should be listed here, and this 11! module should be used in every routine containing ScaLAPACK/BLACS calls to ensure 12! the argument types are correct. 13! 14!============================================================================ 15 16#include "f_defs.h" 17 18module scalapack_m 19 20 use global_m 21 implicit none 22 23 private 24 25 public :: & 26 scalapack, & 27 blacs_setup, & 28 layout_scalapack, & 29 iceil, & 30 descinit, & 31 descset, & 32 pdgesv, & 33 pzgesv, & 34 pdsyevx, & 35 pdgeqrf, & 36 pzgeqrf, & 37 pzheevx, & 38 pdgemr2d, & 39 pzgemr2d, & 40 blacs_get, & 41 blacs_gridinit, & 42 blacs_gridmap, & 43 blacs_gridexit, & 44 blacs_exit 45 46!----------------------------- 47 48 type scalapack 49 integer :: nprow !< the number of processors in a row of your processor grid 50 integer :: npcol !< the number of processors in a column of your processor grid 51 integer :: nbl !< the linear dimension of a block of a distributed matrix 52 integer :: myprow !< the processor`s row coordinate in your processor grid 53 integer :: mypcol !< the processor`s column coordinate in your processor grid 54 integer :: npr !< the number of rows of the matrix the processor owns 55 integer :: npc !< the number of columns of the matrix the processor owns 56 integer :: icntxt !< BLACS context; see BLACS documentation 57 integer, pointer :: npcd(:) !< global list of the number of cols of the matrix owned by all processors 58 integer, pointer :: nprd(:) !< global list of the number of rows of the matrix owned by all processors 59 integer, pointer :: isrtxrow(:) !< isrtxrow/isrtxcol give the sorted index of the gvector in a given block 60 integer, pointer :: isrtxcol(:) !! owned by a processor in terms of the whole list of gvectors 61 integer, pointer :: imycol(:) !< imyrow/imycol give the row/column index of a g-vector owned by a given 62 integer, pointer :: imyrow(:) !! processor in the whole matrix 63 integer, pointer :: imycolinv(:) !< inverse of imycol 64 integer, pointer :: imyrowinv(:) !! inverse of imyrow 65 integer, pointer :: imycold(:,:) !< imycold/imyrowd are global lists of the row/column index of g-vectors 66 integer, pointer :: imyrowd(:,:) !! owned by all the processors in the whole matrix 67 end type scalapack 68 69!> SCALAPACK 70 interface 71 INTEGER FUNCTION ICEIL( INUM, IDENOM ) 72 implicit none 73 INTEGER IDENOM, INUM 74 end FUNCTION ICEIL 75 end interface 76 77 interface 78 SUBROUTINE DESCINIT( DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO ) 79 implicit none 80 INTEGER ICSRC, ICTXT, INFO, IRSRC, LLD, M, MB, N, NB 81 INTEGER DESC( * ) 82 end SUBROUTINE DESCINIT 83 end interface 84 85 interface 86 SUBROUTINE DESCSET( DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD ) 87 implicit none 88 INTEGER ICSRC, ICTXT, IRSRC, LLD, M, MB, N, NB 89 INTEGER DESC( * ) 90 end SUBROUTINE DESCSET 91 end interface 92 93 interface 94 SUBROUTINE PDGESV( N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, DESCB, INFO ) 95 implicit none 96 INTEGER IA, IB, INFO, JA, JB, N, NRHS 97 INTEGER DESCA( * ), DESCB( * ), IPIV( * ) 98 DOUBLE PRECISION A( * ), B( * ) 99 end SUBROUTINE PDGESV 100 end interface 101 102 interface 103 SUBROUTINE PZGESV( N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, DESCB, INFO ) 104 implicit none 105 INTEGER IA, IB, INFO, JA, JB, N, NRHS 106 INTEGER DESCA( * ), DESCB( * ), IPIV( * ) 107 COMPLEX*16 A( * ), B( * ) 108 end SUBROUTINE PZGESV 109 end interface 110 111 interface 112 SUBROUTINE PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, & 113 VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, & 114 ICLUSTR, GAP, INFO ) 115 implicit none 116 CHARACTER JOBZ, RANGE, UPLO 117 INTEGER IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LWORK, M, N, NZ 118 DOUBLE PRECISION ABSTOL, ORFAC, VL, VU 119 INTEGER DESCA( * ), DESCZ( * ), ICLUSTR( * ), IFAIL( * ), IWORK( * ) 120 DOUBLE PRECISION A( * ), GAP( * ), W( * ), WORK( * ), Z( * ) 121 end SUBROUTINE PDSYEVX 122 end interface 123 124 interface 125 SUBROUTINE PDGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO ) 126 implicit none 127 INTEGER IA, INFO, JA, LWORK, M, N 128 INTEGER DESCA( * ) 129 DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) 130 end SUBROUTINE PDGEQRF 131 end interface 132 133 interface 134 SUBROUTINE PZGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO ) 135 implicit none 136 INTEGER IA, INFO, JA, LWORK, M, N 137 INTEGER DESCA( * ) 138 COMPLEX*16 A( * ), TAU( * ), WORK( * ) 139 end SUBROUTINE PZGEQRF 140 end interface 141 142 interface 143 subroutine PZHEEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, & 144 VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, & 145 JZ, DESCZ, WORK, LWORK, RWORK, LRWORK, IWORK, & 146 LIWORK, IFAIL, ICLUSTR, GAP, INFO ) 147 implicit none 148 character JOBZ, RANGE, UPLO 149 integer IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LRWORK, LWORK, M, N, NZ 150 double precision ABSTOL, ORFAC, VL, VU 151 integer DESCA( * ), DESCZ( * ), ICLUSTR( * ), IFAIL( * ), IWORK( * ) 152 double precision GAP( * ), RWORK( * ), W( * ) 153 complex*16 A( * ), WORK( * ), Z( * ) 154 end subroutine PZHEEVX 155 end interface 156 157 interface 158 subroutine PDGEMR2D( M, N, A, IA, JA, DESCA, B, IB, JB, DESCB, ICNTXT) 159 implicit none 160 integer M, N, IA, JA, IB, JB, DESCA( * ), DESCB( * ), ICNTXT 161 double precision A( * ), B( * ) 162 end subroutine PDGEMR2D 163 end interface 164 165 interface 166 subroutine PZGEMR2D( M, N, A, IA, JA, DESCA, B, IB, JB, DESCB, ICNTXT) 167 implicit none 168 integer M, N, IA, JA, IB, JB, DESCA( * ), DESCB( * ), ICNTXT 169 double complex A( * ), B( * ) 170 end subroutine PZGEMR2D 171 end interface 172 173!> BLACS 174 interface 175 subroutine blacs_get(icontxt, what, val) 176 implicit none 177 integer, intent(in) :: icontxt 178 integer, intent(in) :: what 179 integer, intent(out) :: val 180 end subroutine blacs_get 181 end interface 182 183 interface 184 subroutine blacs_gridinit(icontxt, order, nprow, npcol) 185 implicit none 186 integer, intent(inout) :: icontxt 187 character, intent(in) :: order 188 integer, intent(in) :: nprow 189 integer, intent(in) :: npcol 190 end subroutine blacs_gridinit 191 end interface 192 193 !> note: args are out of order so that ldumap,npcol are declared 194 !! prior to their usage as dimensions of usermap. 195 interface 196 subroutine blacs_gridmap(icontxt, usermap, ldumap, nprow, npcol) 197 implicit none 198 integer, intent(inout) :: icontxt 199 integer, intent(in) :: ldumap 200 integer, intent(in) :: nprow 201 integer, intent(in) :: npcol 202 integer, intent(in) :: usermap(ldumap,npcol) 203 end subroutine blacs_gridmap 204 end interface 205 206 interface 207 subroutine blacs_gridexit(icontxt) 208 implicit none 209 integer, intent(in) :: icontxt 210 end subroutine blacs_gridexit 211 end interface 212 213 interface 214 subroutine blacs_exit(icontxt) 215 implicit none 216 integer, intent(in) :: icontxt 217 end subroutine blacs_exit 218 end interface 219 220 interface 221 subroutine blacs_gridinfo(icontxt, nprow, npcol, myprow, mypcol) 222 implicit none 223 integer, intent(in) :: icontxt 224 integer, intent(out) :: nprow 225 integer, intent(out) :: npcol 226 integer, intent(out) :: myprow 227 integer, intent(out) :: mypcol 228 end subroutine blacs_gridinfo 229 end interface 230 231contains 232 233!>-------------------------------------------------------------------------- 234!! Originally by AC, last modified 6/12/2008 (JRD) 235!! Figures out a p by q processor grid layout for the scalapack library. 236!! This p by q grid is used to partition the matrix with a block size b. 237!! The goal is to get a processor grid which is as close to "square" as 238!! possible. For more details, see scalapack documentation. 239!! 240!! Input nproc number of processors 241!! matsize size of matrix 242!! 243!! Output nbl block size 244!! nprow processor grid row 245!! npcol processor grid column 246 subroutine layout_scalapack(matsize, nbl, nproc, nprow, npcol) 247 integer, intent(in) :: matsize 248 integer, intent(out) :: nbl 249 integer, intent(in) :: nproc 250 integer, intent(out) :: nprow, npcol 251 252 integer :: i 253 254 PUSH_SUB(layout_scalapack) 255 256!------------------ 257! Find processor grid 258 259 nprow = int(sqrt(dble(nproc) + 1.0d-6)) 260 261 do i = nprow, 1, -1 262 if(mod(nproc, i) .eq. 0) exit 263 enddo 264 265 nprow = i 266 npcol = nproc/nprow 267 268!------------------- 269! Now for the block size 270 271 nbl = min(32, matsize/(max(nprow, npcol))) 272 273!------------------- 274! Ensure nonzero 275 276 nbl = max(nbl, 1) 277 278 POP_SUB(layout_scalapack) 279 280 return 281 end subroutine layout_scalapack 282 283!-------------------------------------------------------------------------- 284 subroutine blacs_setup(scal, matsize, is_row_order,nppgroup_f,nfreq_group,np_left) 285 type(scalapack), intent(inout) :: scal !< other elements might have been set earlier 286 integer, intent(in) :: matsize 287 logical, intent(in) :: is_row_order 288 integer, intent(in),optional :: nppgroup_f !< # of proc. per freq. group 289 integer, intent(in),optional :: nfreq_group !< number of parallel frequencies 290 integer, intent(in),optional :: np_left !< # of proc. leftover for parallel frequencies 291 292 character :: order 293 integer :: iw,ir,ic,npe 294 logical :: custom_grid 295 integer,allocatable :: usermap(:,:) 296 297 PUSH_SUB(blacs_setup) 298 299#ifdef USESCALAPACK 300 301 npe = peinf%npes 302 303 if (present(nppgroup_f)) then 304 npe = nppgroup_f 305 endif 306 307! DVF : For the group of leftover processors for parallel frequencies 308! We need this value for npe in order to get layout_scalapack to run 309! right, while we still use npp_group_f when defining usermap to get 310! the right processors id`s. Complicated. 311 if (present(np_left)) then 312 npe = np_left 313 endif 314 315 call layout_scalapack(matsize, scal%nbl, npe, & 316 scal%nprow, scal%npcol) 317 318 custom_grid=.false. 319 if (present(nfreq_group)) then 320 custom_grid = nfreq_group > 1 321 endif 322 323 ! FHJ: only bother to create a custom grid if we are doing parallel freqs. 324 if (custom_grid) then 325 326 if(is_row_order) then 327 call die("grid map requires a column-major grid defined in usermap") 328 endif 329 330 SAFE_ALLOCATE(usermap,(scal%nprow,scal%npcol)) 331 332 usermap=0 333 if (.not. present(np_left)) then 334 iw = 1 + peinf%inode/nppgroup_f 335 else 336 iw = 1 + nfreq_group ! DVF: All processors leftover are in same group, so 337 endif ! so there`s no dependence on peinf%inode needed. 338 339 call blacs_get(-1, 0, scal%icntxt) 340 341 do ir=1,scal%nprow 342 do ic=1,scal%npcol 343 usermap(ir,ic)=(iw-1)*nppgroup_f+(ic-1)*scal%nprow+ir-1 344 enddo 345 enddo 346 347 call blacs_gridmap(scal%icntxt,usermap,scal%nprow,scal%nprow, scal%npcol) 348 SAFE_DEALLOCATE(usermap) 349 call blacs_gridinfo(scal%icntxt, scal%nprow, scal%npcol, scal%myprow, scal%mypcol) 350 if (peinf%verb_debug) then 351 ! FHJ: FIXME: barriers are ugly, and this is only a temporary hack to 352 ! make the output more readable. This block should be removed for 353 ! good as soon as we are confident that BLACS is no longer playing 354 ! tricks on us. 355 call MPI_Barrier(MPI_COMM_WORLD, mpierr) 356 write(*,'(4(a,i5,1x))') 'rank=', peinf%inode, 'freq. group=', iw, & 357 'row=', scal%myprow, 'col=', scal%mypcol 358 call MPI_Barrier(MPI_COMM_WORLD, mpierr) 359 endif 360 else 361 362 if(is_row_order) then 363 order = 'r' 364 else 365 order = 'c' 366 endif 367 call blacs_get(-1, 0, scal%icntxt) 368 call blacs_gridinit(scal%icntxt, order, scal%nprow, scal%npcol) 369 call blacs_gridinfo(scal%icntxt, scal%nprow, scal%npcol, scal%myprow, scal%mypcol) 370 371 endif 372 scal%npr = numroc(matsize, scal%nbl, scal%myprow, 0, scal%nprow) 373 scal%npc = numroc(matsize, scal%nbl, scal%mypcol, 0, scal%npcol) 374 375 if (peinf%inode.eq.0) then 376 write(6,*) 377 write(6,'(1x,a,i3,a,i3,a,i4)') 'BLACS processor grid: ', scal%nprow, & 378 ' x ', scal%npcol, '; BLOCKSIZE = ', scal%nbl 379 write(6,*) 380 endif 381 382 if(scal%myprow.eq.-1) then 383 call die('BLACS initialization returned myprow = -1') 384 endif 385 386#else 387 scal%npr=matsize 388 scal%npc=matsize 389 scal%nbl=matsize 390 scal%nprow=1 391 scal%npcol=1 392 scal%myprow=0 393 scal%mypcol=0 394#endif 395 396 POP_SUB(blacs_setup) 397 return 398 end subroutine blacs_setup 399 400 401end module scalapack_m 402