1# Copyright (C) 2010 CAMd 2# Copyright (C) 2010 Argonne National Laboratory 3# Please see the accompanying LICENSE file for further information. 4 5"""Module for high-level BLACS interface. 6 7Usage 8===== 9 10A BLACS grid is a logical grid of processors. To use BLACS, first 11create a BLACS grid. If comm contains 8 or more ranks, this example 12will work:: 13 14 from gpaw.mpi import world 15 from gpaw.blacs import BlacsGrid 16 grid = BlacsGrid(world, 4, 2) 17 18Use the processor grid to create various descriptors for distributed 19arrays:: 20 21 block_desc = grid.new_descriptor(500, 500, 64, 64) 22 local_desc = grid.new_descriptor(500, 500, 500, 500) 23 24The first descriptor describes 500 by 500 arrays distributed amongst 25the 8 CPUs of the BLACS grid in blocks of 64 by 64 elements (which is 26a sensible block size). That means each CPU has many blocks located 27all over the array:: 28 29 print(world.rank, block_desc.shape, block_desc.gshape) 30 31Here block_desc.shape is the local array shape while gshape is the 32global shape. The local array shape varies a bit on each CPU as the 33block distribution may be slightly uneven. 34 35The second descriptor, local_desc, has a block size equal to the 36global size of the array, and will therefore only have one block. 37This block will then reside on the first CPU -- local_desc therefore 38represents non-distributed arrays. Let us instantiate some arrays:: 39 40 H_MM = local_desc.empty() 41 42 if world.rank == 0: 43 assert H_MM.shape == (500, 500) 44 H_MM[:, :] = calculate_hamiltonian_or_something() 45 else: 46 assert H_MM.shape[0] == 0 or H_MM.shape[1] == 0 47 48 H_mm = block_desc.empty() 49 print(H_mm.shape) # many elements on all CPUs 50 51We can then redistribute the local H_MM into H_mm:: 52 53 from gpaw.blacs import Redistributor 54 redistributor = Redistributor(world, local_desc, block_desc) 55 redistributor.redistribute(H_MM, H_mm) 56 57Now we can run parallel linear algebra on H_mm. This will diagonalize 58H_mm, place the eigenvectors in C_mm and the eigenvalues globally in 59eps_M:: 60 61 eps_M = np.empty(500) 62 C_mm = block_desc.empty() 63 block_desc.diagonalize_ex(H_mm, C_mm, eps_M) 64 65We can redistribute C_mm back to the master process if we want:: 66 67 C_MM = local_desc.empty() 68 redistributor2 = Redistributor(world, block_desc, local_desc) 69 redistributor2.redistribute(C_mm, C_MM) 70 71If somebody wants to do all this more easily, they will probably write 72a function for that. 73 74List of interesting classes 75=========================== 76 77 * BlacsGrid 78 * BlacsDescriptor 79 * Redistributor 80 81The other classes in this module are coded specifically for GPAW and 82are inconvenient to use otherwise. 83 84The module gpaw.utilities.blacs contains several functions like gemm, 85gemv and r2k. These functions may or may not have appropriate 86docstings, and may use Fortran-like variable naming. Also, either 87this module or gpaw.utilities.blacs will be renamed at some point. 88 89""" 90 91import numpy as np 92 93import gpaw 94from gpaw.mpi import SerialCommunicator 95from gpaw.matrix_descriptor import MatrixDescriptor 96from gpaw.utilities.scalapack import scalapack_inverse_cholesky, \ 97 scalapack_diagonalize_ex, scalapack_general_diagonalize_ex, \ 98 scalapack_diagonalize_dc, scalapack_general_diagonalize_dc, \ 99 scalapack_diagonalize_mr3, scalapack_general_diagonalize_mr3 100import _gpaw 101 102 103INACTIVE = -1 104BLOCK_CYCLIC_2D = 1 105 106 107class BlacsGrid: 108 """Class representing a 2D grid of processors sharing a Blacs context. 109 110 A BLACS grid defines a logical M by N ordering of a collection of 111 CPUs. A BLACS grid can be used to create BLACS descriptors. On 112 an npcol by nprow BLACS grid, a matrix is distributed amongst M by 113 N CPUs along columns and rows, respectively, while the matrix 114 shape and blocking properties are determined by the descriptors. 115 116 Use the method new_descriptor() to create any number of BLACS 117 descriptors sharing the same CPU layout. 118 119 Most matrix operations require the involved matrices to all be on 120 the same BlacsGrid. Use a Redistributor to redistribute matrices 121 from one BLACS grid to another if necessary. 122 123 Parameters:: 124 125 * comm: MPI communicator for CPUs of the BLACS grid or None. A BLACS 126 grid may use all or some of the CPUs of the communicator. 127 * nprow: Number of CPU rows. 128 * npcol: Number of CPU columns. 129 * order: 'R' or 'C', meaning rows or columns. I'm not sure what this 130 does, it probably interchanges the meaning of rows and columns. XXX 131 132 Complicated stuff 133 ----------------- 134 135 It may be useful to know that a BLACS grid is said to be active 136 and will evaluate to True on any process where comm is not None 137 *and* comm.rank < nprow * npcol. Otherwise it is considered 138 inactive and evaluates to False. Ranks where a grid is inactive 139 never do anything at all. 140 141 BLACS identifies each grid by a unique ID number called the 142 context (frequently abbreviated ConTxt). Grids on inactive ranks 143 have context -1.""" 144 def __init__(self, comm, nprow, npcol, order='R'): 145 assert nprow > 0 146 assert npcol > 0 147 assert len(order) == 1 148 assert order in 'CcRr' 149 # set a default value for the context leads to fewer 150 # if statements below 151 self.context = INACTIVE 152 153 self.comm = comm 154 self.nprow = nprow 155 self.npcol = npcol 156 self.ncpus = nprow * npcol 157 self.order = order 158 159 # There are three cases to handle: 160 # 1. Comm is None is inactive (default). 161 # 2. Comm is a legitimate communicator 162 # 3. DryRun 163 if gpaw.dry_run: 164 self.mycol = INACTIVE 165 self.myrow = INACTIVE 166 return 167 168 if comm is not None: # MPI task is part of the communicator 169 if nprow * npcol > comm.size: 170 raise ValueError( 171 'Impossible: %dx%d Blacs grid with %d CPUs' 172 % (nprow, npcol, comm.size)) 173 174 try: 175 new = _gpaw.new_blacs_context 176 except AttributeError as e: 177 raise AttributeError( 178 'BLACS is unavailable. ' 179 'GPAW must be compiled with BLACS/ScaLAPACK, ' 180 'and must run in MPI-enabled interpreter ' 181 '(gpaw-python). Original error: %s' % e) 182 183 self.context = new(comm.get_c_object(), npcol, nprow, order) 184 assert (self.context != INACTIVE) == (comm.rank < nprow * npcol) 185 186 self.mycol, self.myrow = _gpaw.get_blacs_gridinfo(self.context, 187 nprow, 188 npcol) 189 190 @property 191 def coords(self): 192 return self.myrow, self.mycol 193 194 @property 195 def shape(self): 196 return self.nprow, self.npcol 197 198 def coords2rank(self, row, col): 199 return self.nprow * col + row 200 201 def rank2coords(self, rank): 202 col, row = divmod(rank, self.nprow) 203 return row, col 204 205 def new_descriptor(self, M, N, mb, nb, rsrc=0, csrc=0): 206 """Create a new descriptor from this BLACS grid. 207 208 See documentation for BlacsDescriptor.__init__.""" 209 return BlacsDescriptor(self, M, N, mb, nb, rsrc, csrc) 210 211 def is_active(self): 212 """Whether context is active on this rank.""" 213 return self.context != INACTIVE 214 215 def __bool__(self): 216 2 / 0 217 218 def __str__(self): 219 classname = self.__class__.__name__ 220 template = '%s[comm:size=%d,rank=%d; context=%d; %dx%d]' 221 string = template % (classname, self.comm.size, self.comm.rank, 222 self.context, self.nprow, self.npcol) 223 return string 224 225 def __del__(self): 226 if self.is_active(): 227 _gpaw.blacs_destroy(self.context) 228 229 230class DryRunBlacsGrid00000000(BlacsGrid): 231 def __init__(self, comm, nprow, npcol, order='R'): 232 assert (isinstance(comm, SerialCommunicator) or 233 isinstance(comm.comm, SerialCommunicator)) 234 # DryRunCommunicator is subclass 235 236 if nprow * npcol > comm.size: 237 raise ValueError('Impossible: %dx%d Blacs grid with %d CPUs' 238 % (nprow, npcol, comm.size)) 239 self.context = INACTIVE 240 self.comm = comm 241 self.nprow = nprow 242 self.npcol = npcol 243 self.ncpus = nprow * npcol 244 self.mycol, self.myrow = INACTIVE, INACTIVE 245 self.order = order 246 247 248class BlacsDescriptor(MatrixDescriptor): 249 """Class representing a 2D matrix distribution on a blacs grid. 250 251 A BlacsDescriptor represents a particular shape and distribution 252 of matrices. A BlacsDescriptor has a global matrix shape and a 253 rank-dependent local matrix shape. The local shape is not 254 necessarily equal on all ranks. 255 256 A numpy array is said to be compatible with a BlacsDescriptor if, 257 on all ranks, the shape of the numpy array is equal to the local 258 shape of the BlacsDescriptor. Compatible arrays can be created 259 conveniently with the zeros() and empty() methods. 260 261 An array with a global shape of M by N is distributed such that 262 each process gets a number of distinct blocks of size mb by nb. 263 The blocks on one process generally reside in very different areas 264 of the matrix to improve load balance. 265 266 The following chart describes how different ranks (there are 4 267 ranks in this example, 0 through 3) divide the matrix into blocks. 268 This is called 2D block cyclic distribution:: 269 270 +--+--+--+--+..+--+ 271 | 0| 1| 0| 1|..| 1| 272 +--+--+--+--+..+--+ 273 | 2| 3| 2| 3|..| 3| 274 +--+--+--+--+..+--+ 275 | 0| 1| 0| 1|..| 1| 276 +--+--+--+--+..+--+ 277 | 2| 3| 2| 3|..| 3| 278 +--+--+--+--+..+--+ 279 ................... 280 ................... 281 +--+--+--+--+..+--+ 282 | 2| 3| 2| 3|..| 3| 283 +--+--+--+--+..+--+ 284 285 Also refer to: 286 http://acts.nersc.gov/scalapack/hands-on/datadist.html 287 288 Parameters: 289 * blacsgrid: the BLACS grid of processors to distribute matrices. 290 * M: global row count 291 * N: global column count 292 * mb: number of rows per block 293 * nb: number of columns per block 294 * rsrc: rank on which the first row is stored 295 * csrc: rank on which the first column is stored 296 297 Complicated stuff 298 ----------------- 299 300 If there is trouble with matrix shapes, the below caveats are 301 probably the reason. 302 303 Depending on layout, a descriptor may have a local shape of zero 304 by N or something similar. If the row blocksize is 7, the global 305 row count is 10, and the blacs grid contains 3 row processes: The 306 first process will have 7 rows, the next will have 3, and the last 307 will have 0. The shapes in this case must still be correctly 308 given to BLACS functions, which can be confusing. 309 310 A blacs descriptor must also give the correct local leading 311 dimension (lld), which is the local array size along the 312 memory-contiguous direction in the matrix, and thus equal to the 313 local column number, *except* when local shape is zero, but the 314 implementation probably works. 315 316 """ 317 def __init__(self, blacsgrid, M, N, mb, nb, rsrc=0, csrc=0): 318 assert M > 0 319 assert N > 0 320 assert 1 <= mb 321 assert 1 <= nb 322 if mb > M: 323 mb = M 324 if nb > N: 325 nb = N 326 assert 0 <= rsrc < blacsgrid.nprow 327 assert 0 <= csrc < blacsgrid.npcol 328 329 self.blacsgrid = blacsgrid 330 self.M = M # global size 1 331 self.N = N # global size 2 332 self.mb = mb # block cyclic distr dim 1 333 self.nb = nb # and 2. How many rows or columns are on this processor 334 # more info: 335 # http://www.netlib.org/scalapack/slug/node75.html 336 self.rsrc = rsrc 337 self.csrc = csrc 338 339 if blacsgrid.is_active(): 340 locN, locM = _gpaw.get_blacs_local_shape(self.blacsgrid.context, 341 self.N, self.M, 342 self.nb, self.mb, 343 self.csrc, self.rsrc) 344 # max 1 is nonsensical, but appears 345 # to be required by PBLAS 346 self.lld = max(1, locN) 347 else: 348 # ScaLAPACK has no requirements as to what these values on an 349 # inactive blacsgrid should be. This seemed reasonable to me 350 # at the time. 351 locN, locM = 0, 0 352 # This applies only to empty matrices. Some functions will 353 # complain about lld<1, so lld=1 will make those happy. 354 self.lld = 1 355 356 # locM, locN is not allowed to be negative. This will cause the 357 # redistributor to fail. This could happen on active blacsgrid 358 # which does not contain any piece of the distribute matrix. 359 # This is why there is a final check on the value of locM, locN. 360 MatrixDescriptor.__init__(self, max(0, locM), max(0, locN)) 361 362 # This is the definition of inactive descriptor; can occur 363 # on an active or inactive blacs grid. 364 self.active = locM > 0 and locN > 0 365 366 self.bshape = (self.mb, self.nb) # Shape of one block 367 self.gshape = (M, N) # Global shape of array 368 369 def asarray(self): 370 """Return a nine-element array representing this descriptor. 371 372 In the C/Fortran code, a BLACS descriptor is represented by a 373 special array of arcane nature. The value of asarray() must 374 generally be passed to BLACS functions in the C code.""" 375 arr = np.array([BLOCK_CYCLIC_2D, self.blacsgrid.context, 376 self.N, self.M, self.nb, self.mb, self.csrc, self.rsrc, 377 self.lld], np.intc) 378 return arr 379 380 def __repr__(self): 381 classname = self.__class__.__name__ 382 template = '%s[context=%d, glob %s, block %s, lld %d, loc %s]' 383 string = template % (classname, self.blacsgrid.context, 384 self.gshape, 385 self.bshape, self.lld, self.shape) 386 return string 387 388 def index2grid(self, row, col): 389 """Get the BLACS grid coordinates storing global index (row, col).""" 390 assert row < self.gshape[0], (row, col, self.gshape) 391 assert col < self.gshape[1], (row, col, self.gshape) 392 gridx = (row // self.bshape[0]) % self.blacsgrid.nprow 393 gridy = (col // self.bshape[1]) % self.blacsgrid.npcol 394 return gridx, gridy 395 396 def index2rank(self, row, col): 397 """Get the rank where global index (row, col) is stored.""" 398 return self.blacsgrid.coords2rank(*self.index2grid(row, col)) 399 400 def diagonalize_dc(self, H_nn, C_nn, eps_N, UL='L'): 401 """See documentation in gpaw/utilities/blacs.py.""" 402 scalapack_diagonalize_dc(self, H_nn, C_nn, eps_N, UL) 403 404 def diagonalize_ex(self, H_nn, C_nn, eps_N, UL='L', iu=None): 405 """See documentation in gpaw/utilities/blacs.py.""" 406 scalapack_diagonalize_ex(self, H_nn, C_nn, eps_N, UL, iu=iu) 407 408 def diagonalize_mr3(self, H_nn, C_nn, eps_N, UL='L', iu=None): 409 """See documentation in gpaw/utilities/blacs.py.""" 410 scalapack_diagonalize_mr3(self, H_nn, C_nn, eps_N, UL, iu=iu) 411 412 def general_diagonalize_dc(self, H_mm, S_mm, C_mm, eps_M, 413 UL='L'): 414 """See documentation in gpaw/utilities/blacs.py.""" 415 scalapack_general_diagonalize_dc(self, H_mm, S_mm, C_mm, eps_M, 416 UL) 417 418 def general_diagonalize_ex(self, H_mm, S_mm, C_mm, eps_M, 419 UL='L', iu=None): 420 """See documentation in gpaw/utilities/blacs.py.""" 421 scalapack_general_diagonalize_ex(self, H_mm, S_mm, C_mm, eps_M, 422 UL, iu=iu) 423 424 def general_diagonalize_mr3(self, H_mm, S_mm, C_mm, eps_M, 425 UL='L', iu=None): 426 """See documentation in gpaw/utilities/blacs.py.""" 427 scalapack_general_diagonalize_mr3(self, H_mm, S_mm, C_mm, eps_M, 428 UL, iu=iu) 429 430 def inverse_cholesky(self, S_nn, UL='L'): 431 """See documentation in gpaw/utilities/blacs.py.""" 432 scalapack_inverse_cholesky(self, S_nn, UL) 433 434 def my_blocks(self, array_mn): 435 """Yield the local blocks and their global index limits. 436 437 Yields tuples of the form (Mstart, Mstop, Nstart, Nstop, block), 438 for each locally stored block of the array. 439 """ 440 if not self.check(array_mn): 441 raise ValueError('Bad array shape (%s vs %s)' % (self, 442 array_mn.shape)) 443 444 grid = self.blacsgrid 445 mb = self.mb 446 nb = self.nb 447 myrow = grid.myrow 448 mycol = grid.mycol 449 nprow = grid.nprow 450 npcol = grid.npcol 451 M, N = self.gshape 452 453 Mmyblocks = -(-self.shape[0] // mb) 454 Nmyblocks = -(-self.shape[1] // nb) 455 for Mblock in range(Mmyblocks): 456 for Nblock in range(Nmyblocks): 457 myMstart = Mblock * mb 458 myNstart = Nblock * nb 459 Mstart = myrow * mb + Mblock * mb * nprow 460 Nstart = mycol * mb + Nblock * nb * npcol 461 Mstop = min(Mstart + mb, M) 462 Nstop = min(Nstart + nb, N) 463 block = array_mn[myMstart:myMstart + mb, 464 myNstart:myNstart + nb] 465 466 yield Mstart, Mstop, Nstart, Nstop, block 467 468 def as_serial(self): 469 return self.blacsgrid.new_descriptor(self.M, self.N, self.M, self.N) 470 471 def redistribute(self, otherdesc, src_mn, dst_mn=None, 472 subM=None, subN=None, ia=0, ja=0, ib=0, jb=0, uplo='G'): 473 if self.blacsgrid != otherdesc.blacsgrid: 474 raise ValueError('Cannot redistribute to other BLACS grid. ' 475 'Requires using Redistributor class explicitly') 476 if dst_mn is None: 477 dst_mn = otherdesc.empty(dtype=src_mn.dtype) 478 r = Redistributor(self.blacsgrid.comm, self, otherdesc) 479 r.redistribute(src_mn, dst_mn, subM, subN, ia, ja, ib, jb, uplo) 480 return dst_mn 481 482 def collect_on_master(self, src_mn, dst_mn=None, uplo='G'): 483 desc = self.as_serial() 484 return self.redistribute(desc, src_mn, dst_mn, uplo=uplo) 485 486 def distribute_from_master(self, src_mn, dst_mn=None, uplo='G'): 487 desc = self.as_serial() 488 return desc.redistribute(self, src_mn, dst_mn, uplo=uplo) 489 490 491class Redistributor: 492 """Class for redistributing BLACS matrices on different contexts.""" 493 def __init__(self, supercomm, srcdescriptor, dstdescriptor): 494 """Create redistributor. 495 496 Source and destination descriptors may reside on different 497 BLACS grids, but the descriptors should describe arrays with 498 the same number of elements. 499 500 The communicators of the BLACS grid of srcdescriptor as well 501 as that of dstdescriptor *must* both be subcommunicators of 502 supercomm. 503 504 Allowed values of UPLO are: G for general matrix, U for upper 505 triangular and L for lower triangular. The latter two are useful 506 for symmetric matrices.""" 507 self.supercomm = supercomm 508 self.supercomm_bg = BlacsGrid(self.supercomm, self.supercomm.size, 1) 509 self.srcdescriptor = srcdescriptor 510 self.dstdescriptor = dstdescriptor 511 512 def redistribute(self, src_mn, dst_mn=None, 513 subM=None, subN=None, 514 ia=0, ja=0, ib=0, jb=0, uplo='G'): 515 """Redistribute src_mn into dst_mn. 516 517 src_mn and dst_mn must be compatible with source and 518 destination descriptors of this redistributor. 519 520 If subM and subN are given, distribute only a subM by subN 521 submatrix. 522 523 If any ia, ja, ib and jb are given, they denote the global 524 index (i, j) of the origin of the submatrix inside the source 525 and destination (a, b) matrices.""" 526 527 srcdescriptor = self.srcdescriptor 528 dstdescriptor = self.dstdescriptor 529 dtype = src_mn.dtype 530 531 if dst_mn is None: 532 dst_mn = dstdescriptor.empty(dtype=dtype) 533 534 # self.supercomm must be a supercommunicator of the communicators 535 # corresponding to the context of srcmatrix as well as dstmatrix. 536 # We should verify this somehow. 537 srcdescriptor = self.srcdescriptor 538 dstdescriptor = self.dstdescriptor 539 540 dtype = src_mn.dtype 541 if dst_mn is None: 542 dst_mn = dstdescriptor.zeros(dtype=dtype) 543 544 assert dtype == dst_mn.dtype 545 assert dtype == float or dtype == complex 546 547 # Check to make sure the submatrix of the source 548 # matrix will fit into the destination matrix 549 # plus standard BLACS matrix checks. 550 srcdescriptor.checkassert(src_mn) 551 dstdescriptor.checkassert(dst_mn) 552 553 if subM is None: 554 subM = srcdescriptor.gshape[0] 555 if subN is None: 556 subN = srcdescriptor.gshape[1] 557 558 assert srcdescriptor.gshape[0] >= subM 559 assert srcdescriptor.gshape[1] >= subN 560 assert dstdescriptor.gshape[0] >= subM 561 assert dstdescriptor.gshape[1] >= subN 562 563 # Switch to Fortran conventions 564 uplo = {'U': 'L', 'L': 'U', 'G': 'G'}[uplo] 565 _gpaw.scalapack_redist(srcdescriptor.asarray(), 566 dstdescriptor.asarray(), 567 src_mn, dst_mn, 568 subN, subM, 569 ja + 1, ia + 1, jb + 1, ib + 1, # 1-indexing 570 self.supercomm_bg.context, uplo) 571 return dst_mn 572 573 574def parallelprint(comm, obj): 575 import sys 576 for a in range(comm.size): 577 if a == comm.rank: 578 print('rank=%d' % a) 579 print(obj) 580 print() 581 sys.stdout.flush() 582 comm.barrier() 583