1# Copyright (C) 2010  CAMd
2# Copyright (C) 2010  Argonne National Laboratory
3# Please see the accompanying LICENSE file for further information.
4
5"""Module for high-level BLACS interface.
6
7Usage
8=====
9
10A BLACS grid is a logical grid of processors.  To use BLACS, first
11create a BLACS grid.  If comm contains 8 or more ranks, this example
12will work::
13
14  from gpaw.mpi import world
15  from gpaw.blacs import BlacsGrid
16  grid = BlacsGrid(world, 4, 2)
17
18Use the processor grid to create various descriptors for distributed
19arrays::
20
21  block_desc = grid.new_descriptor(500, 500, 64, 64)
22  local_desc = grid.new_descriptor(500, 500, 500, 500)
23
24The first descriptor describes 500 by 500 arrays distributed amongst
25the 8 CPUs of the BLACS grid in blocks of 64 by 64 elements (which is
26a sensible block size).  That means each CPU has many blocks located
27all over the array::
28
29  print(world.rank, block_desc.shape, block_desc.gshape)
30
31Here block_desc.shape is the local array shape while gshape is the
32global shape.  The local array shape varies a bit on each CPU as the
33block distribution may be slightly uneven.
34
35The second descriptor, local_desc, has a block size equal to the
36global size of the array, and will therefore only have one block.
37This block will then reside on the first CPU -- local_desc therefore
38represents non-distributed arrays.  Let us instantiate some arrays::
39
40  H_MM = local_desc.empty()
41
42  if world.rank == 0:
43      assert H_MM.shape == (500, 500)
44      H_MM[:, :] = calculate_hamiltonian_or_something()
45  else:
46      assert H_MM.shape[0] == 0 or H_MM.shape[1] == 0
47
48  H_mm = block_desc.empty()
49  print(H_mm.shape)  # many elements on all CPUs
50
51We can then redistribute the local H_MM into H_mm::
52
53  from gpaw.blacs import Redistributor
54  redistributor = Redistributor(world, local_desc, block_desc)
55  redistributor.redistribute(H_MM, H_mm)
56
57Now we can run parallel linear algebra on H_mm.  This will diagonalize
58H_mm, place the eigenvectors in C_mm and the eigenvalues globally in
59eps_M::
60
61  eps_M = np.empty(500)
62  C_mm = block_desc.empty()
63  block_desc.diagonalize_ex(H_mm, C_mm, eps_M)
64
65We can redistribute C_mm back to the master process if we want::
66
67  C_MM = local_desc.empty()
68  redistributor2 = Redistributor(world, block_desc, local_desc)
69  redistributor2.redistribute(C_mm, C_MM)
70
71If somebody wants to do all this more easily, they will probably write
72a function for that.
73
74List of interesting classes
75===========================
76
77 * BlacsGrid
78 * BlacsDescriptor
79 * Redistributor
80
81The other classes in this module are coded specifically for GPAW and
82are inconvenient to use otherwise.
83
84The module gpaw.utilities.blacs contains several functions like gemm,
85gemv and r2k.  These functions may or may not have appropriate
86docstings, and may use Fortran-like variable naming.  Also, either
87this module or gpaw.utilities.blacs will be renamed at some point.
88
89"""
90
91import numpy as np
92
93import gpaw
94from gpaw.mpi import SerialCommunicator
95from gpaw.matrix_descriptor import MatrixDescriptor
96from gpaw.utilities.scalapack import scalapack_inverse_cholesky, \
97    scalapack_diagonalize_ex, scalapack_general_diagonalize_ex, \
98    scalapack_diagonalize_dc, scalapack_general_diagonalize_dc, \
99    scalapack_diagonalize_mr3, scalapack_general_diagonalize_mr3
100import _gpaw
101
102
103INACTIVE = -1
104BLOCK_CYCLIC_2D = 1
105
106
107class BlacsGrid:
108    """Class representing a 2D grid of processors sharing a Blacs context.
109
110    A BLACS grid defines a logical M by N ordering of a collection of
111    CPUs.  A BLACS grid can be used to create BLACS descriptors.  On
112    an npcol by nprow BLACS grid, a matrix is distributed amongst M by
113    N CPUs along columns and rows, respectively, while the matrix
114    shape and blocking properties are determined by the descriptors.
115
116    Use the method new_descriptor() to create any number of BLACS
117    descriptors sharing the same CPU layout.
118
119    Most matrix operations require the involved matrices to all be on
120    the same BlacsGrid.  Use a Redistributor to redistribute matrices
121    from one BLACS grid to another if necessary.
122
123    Parameters::
124
125      * comm:  MPI communicator for CPUs of the BLACS grid or None.  A BLACS
126        grid may use all or some of the CPUs of the communicator.
127      * nprow:  Number of CPU rows.
128      * npcol: Number of CPU columns.
129      * order: 'R' or 'C', meaning rows or columns.  I'm not sure what this
130        does, it probably interchanges the meaning of rows and columns. XXX
131
132    Complicated stuff
133    -----------------
134
135    It may be useful to know that a BLACS grid is said to be active
136    and will evaluate to True on any process where comm is not None
137    *and* comm.rank < nprow * npcol.  Otherwise it is considered
138    inactive and evaluates to False.  Ranks where a grid is inactive
139    never do anything at all.
140
141    BLACS identifies each grid by a unique ID number called the
142    context (frequently abbreviated ConTxt).  Grids on inactive ranks
143    have context -1."""
144    def __init__(self, comm, nprow, npcol, order='R'):
145        assert nprow > 0
146        assert npcol > 0
147        assert len(order) == 1
148        assert order in 'CcRr'
149        # set a default value for the context leads to fewer
150        # if statements below
151        self.context = INACTIVE
152
153        self.comm = comm
154        self.nprow = nprow
155        self.npcol = npcol
156        self.ncpus = nprow * npcol
157        self.order = order
158
159        # There are three cases to handle:
160        # 1. Comm is None is inactive (default).
161        # 2. Comm is a legitimate communicator
162        # 3. DryRun
163        if gpaw.dry_run:
164            self.mycol = INACTIVE
165            self.myrow = INACTIVE
166            return
167
168        if comm is not None:  # MPI task is part of the communicator
169            if nprow * npcol > comm.size:
170                raise ValueError(
171                    'Impossible: %dx%d Blacs grid with %d CPUs'
172                    % (nprow, npcol, comm.size))
173
174            try:
175                new = _gpaw.new_blacs_context
176            except AttributeError as e:
177                raise AttributeError(
178                    'BLACS is unavailable.  '
179                    'GPAW must be compiled with BLACS/ScaLAPACK, '
180                    'and must run in MPI-enabled interpreter '
181                    '(gpaw-python).  Original error: %s' % e)
182
183            self.context = new(comm.get_c_object(), npcol, nprow, order)
184            assert (self.context != INACTIVE) == (comm.rank < nprow * npcol)
185
186        self.mycol, self.myrow = _gpaw.get_blacs_gridinfo(self.context,
187                                                          nprow,
188                                                          npcol)
189
190    @property
191    def coords(self):
192        return self.myrow, self.mycol
193
194    @property
195    def shape(self):
196        return self.nprow, self.npcol
197
198    def coords2rank(self, row, col):
199        return self.nprow * col + row
200
201    def rank2coords(self, rank):
202        col, row = divmod(rank, self.nprow)
203        return row, col
204
205    def new_descriptor(self, M, N, mb, nb, rsrc=0, csrc=0):
206        """Create a new descriptor from this BLACS grid.
207
208        See documentation for BlacsDescriptor.__init__."""
209        return BlacsDescriptor(self, M, N, mb, nb, rsrc, csrc)
210
211    def is_active(self):
212        """Whether context is active on this rank."""
213        return self.context != INACTIVE
214
215    def __bool__(self):
216        2 / 0
217
218    def __str__(self):
219        classname = self.__class__.__name__
220        template = '%s[comm:size=%d,rank=%d; context=%d; %dx%d]'
221        string = template % (classname, self.comm.size, self.comm.rank,
222                             self.context, self.nprow, self.npcol)
223        return string
224
225    def __del__(self):
226        if self.is_active():
227            _gpaw.blacs_destroy(self.context)
228
229
230class DryRunBlacsGrid00000000(BlacsGrid):
231    def __init__(self, comm, nprow, npcol, order='R'):
232        assert (isinstance(comm, SerialCommunicator) or
233                isinstance(comm.comm, SerialCommunicator))
234        # DryRunCommunicator is subclass
235
236        if nprow * npcol > comm.size:
237            raise ValueError('Impossible: %dx%d Blacs grid with %d CPUs'
238                             % (nprow, npcol, comm.size))
239        self.context = INACTIVE
240        self.comm = comm
241        self.nprow = nprow
242        self.npcol = npcol
243        self.ncpus = nprow * npcol
244        self.mycol, self.myrow = INACTIVE, INACTIVE
245        self.order = order
246
247
248class BlacsDescriptor(MatrixDescriptor):
249    """Class representing a 2D matrix distribution on a blacs grid.
250
251    A BlacsDescriptor represents a particular shape and distribution
252    of matrices.  A BlacsDescriptor has a global matrix shape and a
253    rank-dependent local matrix shape.  The local shape is not
254    necessarily equal on all ranks.
255
256    A numpy array is said to be compatible with a BlacsDescriptor if,
257    on all ranks, the shape of the numpy array is equal to the local
258    shape of the BlacsDescriptor.  Compatible arrays can be created
259    conveniently with the zeros() and empty() methods.
260
261    An array with a global shape of M by N is distributed such that
262    each process gets a number of distinct blocks of size mb by nb.
263    The blocks on one process generally reside in very different areas
264    of the matrix to improve load balance.
265
266    The following chart describes how different ranks (there are 4
267    ranks in this example, 0 through 3) divide the matrix into blocks.
268    This is called 2D block cyclic distribution::
269
270        +--+--+--+--+..+--+
271        | 0| 1| 0| 1|..| 1|
272        +--+--+--+--+..+--+
273        | 2| 3| 2| 3|..| 3|
274        +--+--+--+--+..+--+
275        | 0| 1| 0| 1|..| 1|
276        +--+--+--+--+..+--+
277        | 2| 3| 2| 3|..| 3|
278        +--+--+--+--+..+--+
279        ...................
280        ...................
281        +--+--+--+--+..+--+
282        | 2| 3| 2| 3|..| 3|
283        +--+--+--+--+..+--+
284
285    Also refer to:
286    http://acts.nersc.gov/scalapack/hands-on/datadist.html
287
288    Parameters:
289     * blacsgrid: the BLACS grid of processors to distribute matrices.
290     * M: global row count
291     * N: global column count
292     * mb: number of rows per block
293     * nb: number of columns per block
294     * rsrc: rank on which the first row is stored
295     * csrc: rank on which the first column is stored
296
297    Complicated stuff
298    -----------------
299
300    If there is trouble with matrix shapes, the below caveats are
301    probably the reason.
302
303    Depending on layout, a descriptor may have a local shape of zero
304    by N or something similar.  If the row blocksize is 7, the global
305    row count is 10, and the blacs grid contains 3 row processes: The
306    first process will have 7 rows, the next will have 3, and the last
307    will have 0.  The shapes in this case must still be correctly
308    given to BLACS functions, which can be confusing.
309
310    A blacs descriptor must also give the correct local leading
311    dimension (lld), which is the local array size along the
312    memory-contiguous direction in the matrix, and thus equal to the
313    local column number, *except* when local shape is zero, but the
314    implementation probably works.
315
316    """
317    def __init__(self, blacsgrid, M, N, mb, nb, rsrc=0, csrc=0):
318        assert M > 0
319        assert N > 0
320        assert 1 <= mb
321        assert 1 <= nb
322        if mb > M:
323            mb = M
324        if nb > N:
325            nb = N
326        assert 0 <= rsrc < blacsgrid.nprow
327        assert 0 <= csrc < blacsgrid.npcol
328
329        self.blacsgrid = blacsgrid
330        self.M = M  # global size 1
331        self.N = N  # global size 2
332        self.mb = mb  # block cyclic distr dim 1
333        self.nb = nb  # and 2.  How many rows or columns are on this processor
334        # more info:
335        # http://www.netlib.org/scalapack/slug/node75.html
336        self.rsrc = rsrc
337        self.csrc = csrc
338
339        if blacsgrid.is_active():
340            locN, locM = _gpaw.get_blacs_local_shape(self.blacsgrid.context,
341                                                     self.N, self.M,
342                                                     self.nb, self.mb,
343                                                     self.csrc, self.rsrc)
344            # max 1 is nonsensical, but appears
345            # to be required by PBLAS
346            self.lld = max(1, locN)
347        else:
348            # ScaLAPACK has no requirements as to what these values on an
349            # inactive blacsgrid should be. This seemed reasonable to me
350            # at the time.
351            locN, locM = 0, 0
352            # This applies only to empty matrices.  Some functions will
353            # complain about lld<1, so lld=1 will make those happy.
354            self.lld = 1
355
356        # locM, locN is not allowed to be negative. This will cause the
357        # redistributor to fail. This could happen on active blacsgrid
358        # which does not contain any piece of the distribute matrix.
359        # This is why there is a final check on the value of locM, locN.
360        MatrixDescriptor.__init__(self, max(0, locM), max(0, locN))
361
362        # This is the definition of inactive descriptor; can occur
363        # on an active or inactive blacs grid.
364        self.active = locM > 0 and locN > 0
365
366        self.bshape = (self.mb, self.nb)  # Shape of one block
367        self.gshape = (M, N)  # Global shape of array
368
369    def asarray(self):
370        """Return a nine-element array representing this descriptor.
371
372        In the C/Fortran code, a BLACS descriptor is represented by a
373        special array of arcane nature.  The value of asarray() must
374        generally be passed to BLACS functions in the C code."""
375        arr = np.array([BLOCK_CYCLIC_2D, self.blacsgrid.context,
376                        self.N, self.M, self.nb, self.mb, self.csrc, self.rsrc,
377                        self.lld], np.intc)
378        return arr
379
380    def __repr__(self):
381        classname = self.__class__.__name__
382        template = '%s[context=%d, glob %s, block %s, lld %d, loc %s]'
383        string = template % (classname, self.blacsgrid.context,
384                             self.gshape,
385                             self.bshape, self.lld, self.shape)
386        return string
387
388    def index2grid(self, row, col):
389        """Get the BLACS grid coordinates storing global index (row, col)."""
390        assert row < self.gshape[0], (row, col, self.gshape)
391        assert col < self.gshape[1], (row, col, self.gshape)
392        gridx = (row // self.bshape[0]) % self.blacsgrid.nprow
393        gridy = (col // self.bshape[1]) % self.blacsgrid.npcol
394        return gridx, gridy
395
396    def index2rank(self, row, col):
397        """Get the rank where global index (row, col) is stored."""
398        return self.blacsgrid.coords2rank(*self.index2grid(row, col))
399
400    def diagonalize_dc(self, H_nn, C_nn, eps_N, UL='L'):
401        """See documentation in gpaw/utilities/blacs.py."""
402        scalapack_diagonalize_dc(self, H_nn, C_nn, eps_N, UL)
403
404    def diagonalize_ex(self, H_nn, C_nn, eps_N, UL='L', iu=None):
405        """See documentation in gpaw/utilities/blacs.py."""
406        scalapack_diagonalize_ex(self, H_nn, C_nn, eps_N, UL, iu=iu)
407
408    def diagonalize_mr3(self, H_nn, C_nn, eps_N, UL='L', iu=None):
409        """See documentation in gpaw/utilities/blacs.py."""
410        scalapack_diagonalize_mr3(self, H_nn, C_nn, eps_N, UL, iu=iu)
411
412    def general_diagonalize_dc(self, H_mm, S_mm, C_mm, eps_M,
413                               UL='L'):
414        """See documentation in gpaw/utilities/blacs.py."""
415        scalapack_general_diagonalize_dc(self, H_mm, S_mm, C_mm, eps_M,
416                                         UL)
417
418    def general_diagonalize_ex(self, H_mm, S_mm, C_mm, eps_M,
419                               UL='L', iu=None):
420        """See documentation in gpaw/utilities/blacs.py."""
421        scalapack_general_diagonalize_ex(self, H_mm, S_mm, C_mm, eps_M,
422                                         UL, iu=iu)
423
424    def general_diagonalize_mr3(self, H_mm, S_mm, C_mm, eps_M,
425                                UL='L', iu=None):
426        """See documentation in gpaw/utilities/blacs.py."""
427        scalapack_general_diagonalize_mr3(self, H_mm, S_mm, C_mm, eps_M,
428                                          UL, iu=iu)
429
430    def inverse_cholesky(self, S_nn, UL='L'):
431        """See documentation in gpaw/utilities/blacs.py."""
432        scalapack_inverse_cholesky(self, S_nn, UL)
433
434    def my_blocks(self, array_mn):
435        """Yield the local blocks and their global index limits.
436
437        Yields tuples of the form (Mstart, Mstop, Nstart, Nstop, block),
438        for each locally stored block of the array.
439        """
440        if not self.check(array_mn):
441            raise ValueError('Bad array shape (%s vs %s)' % (self,
442                                                             array_mn.shape))
443
444        grid = self.blacsgrid
445        mb = self.mb
446        nb = self.nb
447        myrow = grid.myrow
448        mycol = grid.mycol
449        nprow = grid.nprow
450        npcol = grid.npcol
451        M, N = self.gshape
452
453        Mmyblocks = -(-self.shape[0] // mb)
454        Nmyblocks = -(-self.shape[1] // nb)
455        for Mblock in range(Mmyblocks):
456            for Nblock in range(Nmyblocks):
457                myMstart = Mblock * mb
458                myNstart = Nblock * nb
459                Mstart = myrow * mb + Mblock * mb * nprow
460                Nstart = mycol * mb + Nblock * nb * npcol
461                Mstop = min(Mstart + mb, M)
462                Nstop = min(Nstart + nb, N)
463                block = array_mn[myMstart:myMstart + mb,
464                                 myNstart:myNstart + nb]
465
466                yield Mstart, Mstop, Nstart, Nstop, block
467
468    def as_serial(self):
469        return self.blacsgrid.new_descriptor(self.M, self.N, self.M, self.N)
470
471    def redistribute(self, otherdesc, src_mn, dst_mn=None,
472                     subM=None, subN=None, ia=0, ja=0, ib=0, jb=0, uplo='G'):
473        if self.blacsgrid != otherdesc.blacsgrid:
474            raise ValueError('Cannot redistribute to other BLACS grid.  '
475                             'Requires using Redistributor class explicitly')
476        if dst_mn is None:
477            dst_mn = otherdesc.empty(dtype=src_mn.dtype)
478        r = Redistributor(self.blacsgrid.comm, self, otherdesc)
479        r.redistribute(src_mn, dst_mn, subM, subN, ia, ja, ib, jb, uplo)
480        return dst_mn
481
482    def collect_on_master(self, src_mn, dst_mn=None, uplo='G'):
483        desc = self.as_serial()
484        return self.redistribute(desc, src_mn, dst_mn, uplo=uplo)
485
486    def distribute_from_master(self, src_mn, dst_mn=None, uplo='G'):
487        desc = self.as_serial()
488        return desc.redistribute(self, src_mn, dst_mn, uplo=uplo)
489
490
491class Redistributor:
492    """Class for redistributing BLACS matrices on different contexts."""
493    def __init__(self, supercomm, srcdescriptor, dstdescriptor):
494        """Create redistributor.
495
496        Source and destination descriptors may reside on different
497        BLACS grids, but the descriptors should describe arrays with
498        the same number of elements.
499
500        The communicators of the BLACS grid of srcdescriptor as well
501        as that of dstdescriptor *must* both be subcommunicators of
502        supercomm.
503
504        Allowed values of UPLO are: G for general matrix, U for upper
505        triangular and L for lower triangular. The latter two are useful
506        for symmetric matrices."""
507        self.supercomm = supercomm
508        self.supercomm_bg = BlacsGrid(self.supercomm, self.supercomm.size, 1)
509        self.srcdescriptor = srcdescriptor
510        self.dstdescriptor = dstdescriptor
511
512    def redistribute(self, src_mn, dst_mn=None,
513                     subM=None, subN=None,
514                     ia=0, ja=0, ib=0, jb=0, uplo='G'):
515        """Redistribute src_mn into dst_mn.
516
517        src_mn and dst_mn must be compatible with source and
518        destination descriptors of this redistributor.
519
520        If subM and subN are given, distribute only a subM by subN
521        submatrix.
522
523        If any ia, ja, ib and jb are given, they denote the global
524        index (i, j) of the origin of the submatrix inside the source
525        and destination (a, b) matrices."""
526
527        srcdescriptor = self.srcdescriptor
528        dstdescriptor = self.dstdescriptor
529        dtype = src_mn.dtype
530
531        if dst_mn is None:
532            dst_mn = dstdescriptor.empty(dtype=dtype)
533
534        # self.supercomm must be a supercommunicator of the communicators
535        # corresponding to the context of srcmatrix as well as dstmatrix.
536        # We should verify this somehow.
537        srcdescriptor = self.srcdescriptor
538        dstdescriptor = self.dstdescriptor
539
540        dtype = src_mn.dtype
541        if dst_mn is None:
542            dst_mn = dstdescriptor.zeros(dtype=dtype)
543
544        assert dtype == dst_mn.dtype
545        assert dtype == float or dtype == complex
546
547        # Check to make sure the submatrix of the source
548        # matrix will fit into the destination matrix
549        # plus standard BLACS matrix checks.
550        srcdescriptor.checkassert(src_mn)
551        dstdescriptor.checkassert(dst_mn)
552
553        if subM is None:
554            subM = srcdescriptor.gshape[0]
555        if subN is None:
556            subN = srcdescriptor.gshape[1]
557
558        assert srcdescriptor.gshape[0] >= subM
559        assert srcdescriptor.gshape[1] >= subN
560        assert dstdescriptor.gshape[0] >= subM
561        assert dstdescriptor.gshape[1] >= subN
562
563        # Switch to Fortran conventions
564        uplo = {'U': 'L', 'L': 'U', 'G': 'G'}[uplo]
565        _gpaw.scalapack_redist(srcdescriptor.asarray(),
566                               dstdescriptor.asarray(),
567                               src_mn, dst_mn,
568                               subN, subM,
569                               ja + 1, ia + 1, jb + 1, ib + 1,  # 1-indexing
570                               self.supercomm_bg.context, uplo)
571        return dst_mn
572
573
574def parallelprint(comm, obj):
575    import sys
576    for a in range(comm.size):
577        if a == comm.rank:
578            print('rank=%d' % a)
579            print(obj)
580            print()
581            sys.stdout.flush()
582        comm.barrier()
583