1!===========================================================================
2!
3! Modules:
4!
5! scalapack_m    Originally By DAS
6!
7!   Functions, types, and interfaces for ScaLAPACK/BLACS.
8!   Interfaces are from http://www.netlib.org/scalapack/tools, double, complex16
9!   and from http://www.netlib.org/blacs/BLACS/QRef.html (entered manually...)
10!   Every ScaLAPACK/BLACS function used in the code should be listed here, and this
11!   module should be used in every routine containing ScaLAPACK/BLACS calls to ensure
12!   the argument types are correct.
13!
14!============================================================================
15
16#include "f_defs.h"
17
18module scalapack_m
19
20  use global_m
21  implicit none
22
23  private
24
25  public ::           &
26    scalapack,        &
27    blacs_setup,      &
28    layout_scalapack, &
29    iceil,            &
30    descinit,         &
31    descset,          &
32    pdgesv,           &
33    pzgesv,           &
34    pdsyevx,          &
35    pdgeqrf,          &
36    pzgeqrf,          &
37    pzheevx,          &
38    pdgemr2d,         &
39    pzgemr2d,         &
40    blacs_get,        &
41    blacs_gridinit,   &
42    blacs_gridmap,    &
43    blacs_gridexit,   &
44    blacs_exit
45
46!-----------------------------
47
48  type scalapack
49    integer :: nprow  !< the number of processors in a row of your processor grid
50    integer :: npcol  !< the number of processors in a column of your processor grid
51    integer :: nbl    !< the linear dimension of a block of a distributed matrix
52    integer :: myprow !< the processor`s row coordinate in your processor grid
53    integer :: mypcol !< the processor`s column coordinate in your processor grid
54    integer :: npr    !< the number of rows of the matrix the processor owns
55    integer :: npc    !< the number of columns of the matrix the processor owns
56    integer :: icntxt !< BLACS context; see BLACS documentation
57    integer, pointer :: npcd(:)       !< global list of the number of cols of the matrix owned by all processors
58    integer, pointer :: nprd(:)       !< global list of the number of rows of the matrix owned by all processors
59    integer, pointer :: isrtxrow(:)   !< isrtxrow/isrtxcol give the sorted index of the gvector in a given block
60    integer, pointer :: isrtxcol(:)   !! owned by a processor in terms of the whole list of gvectors
61    integer, pointer :: imycol(:)     !< imyrow/imycol give the row/column index of a g-vector owned by a given
62    integer, pointer :: imyrow(:)     !! processor in the whole matrix
63    integer, pointer :: imycolinv(:)  !< inverse of imycol
64    integer, pointer :: imyrowinv(:)  !! inverse of imyrow
65    integer, pointer :: imycold(:,:)  !< imycold/imyrowd are global lists of the row/column index of g-vectors
66    integer, pointer :: imyrowd(:,:)  !! owned by all the processors in the whole matrix
67  end type scalapack
68
69!> SCALAPACK
70  interface
71    INTEGER FUNCTION ICEIL( INUM, IDENOM )
72      implicit none
73      INTEGER            IDENOM, INUM
74    end FUNCTION ICEIL
75  end interface
76
77  interface
78    SUBROUTINE DESCINIT( DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO )
79      implicit none
80      INTEGER            ICSRC, ICTXT, INFO, IRSRC, LLD, M, MB, N, NB
81      INTEGER            DESC( * )
82    end SUBROUTINE DESCINIT
83  end interface
84
85  interface
86    SUBROUTINE DESCSET( DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD )
87      implicit none
88      INTEGER            ICSRC, ICTXT, IRSRC, LLD, M, MB, N, NB
89      INTEGER            DESC( * )
90    end SUBROUTINE DESCSET
91  end interface
92
93  interface
94    SUBROUTINE PDGESV( N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, DESCB, INFO )
95      implicit none
96      INTEGER            IA, IB, INFO, JA, JB, N, NRHS
97      INTEGER            DESCA( * ), DESCB( * ), IPIV( * )
98      DOUBLE PRECISION   A( * ), B( * )
99    end SUBROUTINE PDGESV
100  end interface
101
102  interface
103    SUBROUTINE PZGESV( N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, DESCB, INFO )
104      implicit none
105      INTEGER            IA, IB, INFO, JA, JB, N, NRHS
106      INTEGER            DESCA( * ), DESCB( * ), IPIV( * )
107      COMPLEX*16         A( * ), B( * )
108    end SUBROUTINE PZGESV
109  end interface
110
111  interface
112    SUBROUTINE PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, &
113      VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, &
114      ICLUSTR, GAP, INFO )
115      implicit none
116      CHARACTER          JOBZ, RANGE, UPLO
117      INTEGER            IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LWORK, M, N, NZ
118      DOUBLE PRECISION   ABSTOL, ORFAC, VL, VU
119      INTEGER            DESCA( * ), DESCZ( * ), ICLUSTR( * ), IFAIL( * ), IWORK( * )
120      DOUBLE PRECISION   A( * ), GAP( * ), W( * ), WORK( * ), Z( * )
121    end SUBROUTINE PDSYEVX
122  end interface
123
124  interface
125    SUBROUTINE PDGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO )
126      implicit none
127      INTEGER            IA, INFO, JA, LWORK, M, N
128      INTEGER            DESCA( * )
129      DOUBLE PRECISION   A( * ), TAU( * ), WORK( * )
130    end SUBROUTINE PDGEQRF
131  end interface
132
133  interface
134    SUBROUTINE PZGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO )
135      implicit none
136      INTEGER            IA, INFO, JA, LWORK, M, N
137      INTEGER            DESCA( * )
138      COMPLEX*16         A( * ), TAU( * ), WORK( * )
139    end SUBROUTINE PZGEQRF
140  end interface
141
142  interface
143    subroutine PZHEEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, &
144      VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ,                   &
145      JZ, DESCZ, WORK, LWORK, RWORK, LRWORK, IWORK,                 &
146      LIWORK, IFAIL, ICLUSTR, GAP, INFO )
147      implicit none
148      character          JOBZ, RANGE, UPLO
149      integer            IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LRWORK, LWORK, M, N, NZ
150      double precision   ABSTOL, ORFAC, VL, VU
151      integer            DESCA( * ), DESCZ( * ), ICLUSTR( * ), IFAIL( * ), IWORK( * )
152      double precision   GAP( * ), RWORK( * ), W( * )
153      complex*16         A( * ), WORK( * ), Z( * )
154    end subroutine PZHEEVX
155  end interface
156
157  interface
158    subroutine PDGEMR2D( M, N, A, IA, JA, DESCA, B, IB, JB, DESCB, ICNTXT)
159      implicit none
160      integer            M, N, IA, JA, IB, JB, DESCA( * ), DESCB( * ), ICNTXT
161      double precision   A( * ), B( * )
162    end subroutine PDGEMR2D
163  end interface
164
165  interface
166    subroutine PZGEMR2D( M, N, A, IA, JA, DESCA, B, IB, JB, DESCB, ICNTXT)
167      implicit none
168      integer            M, N, IA, JA, IB, JB, DESCA( * ), DESCB( * ), ICNTXT
169      double complex     A( * ), B( * )
170    end subroutine PZGEMR2D
171  end interface
172
173!> BLACS
174  interface
175    subroutine blacs_get(icontxt, what, val)
176      implicit none
177      integer, intent(in)  :: icontxt
178      integer, intent(in)  :: what
179      integer, intent(out) :: val
180    end subroutine blacs_get
181  end interface
182
183  interface
184    subroutine blacs_gridinit(icontxt, order, nprow, npcol)
185      implicit none
186      integer,   intent(inout) :: icontxt
187      character, intent(in)    :: order
188      integer,   intent(in)    :: nprow
189      integer,   intent(in)    :: npcol
190    end subroutine blacs_gridinit
191  end interface
192
193  !> note: args are out of order so that ldumap,npcol are declared
194  !! prior to their usage as dimensions of usermap.
195  interface
196    subroutine blacs_gridmap(icontxt, usermap, ldumap, nprow, npcol)
197      implicit none
198      integer,   intent(inout) :: icontxt
199      integer,   intent(in)    :: ldumap
200      integer,   intent(in)    :: nprow
201      integer,   intent(in)    :: npcol
202      integer,   intent(in)    :: usermap(ldumap,npcol)
203    end subroutine blacs_gridmap
204  end interface
205
206  interface
207    subroutine blacs_gridexit(icontxt)
208      implicit none
209      integer, intent(in)  :: icontxt
210    end subroutine blacs_gridexit
211  end interface
212
213  interface
214    subroutine blacs_exit(icontxt)
215      implicit none
216      integer, intent(in)  :: icontxt
217    end subroutine blacs_exit
218  end interface
219
220  interface
221    subroutine blacs_gridinfo(icontxt, nprow, npcol, myprow, mypcol)
222      implicit none
223      integer, intent(in)  :: icontxt
224      integer, intent(out) :: nprow
225      integer, intent(out) :: npcol
226      integer, intent(out) :: myprow
227      integer, intent(out) :: mypcol
228    end subroutine blacs_gridinfo
229  end interface
230
231contains
232
233!>--------------------------------------------------------------------------
234!! Originally by AC, last modified 6/12/2008 (JRD)
235!!    Figures out a p by q processor grid layout for the scalapack library.
236!!    This p by q grid is used to partition the matrix with a block size b.
237!!    The goal is to get a processor grid which is as close to "square" as
238!!    possible. For more details, see scalapack documentation.
239!!
240!!    Input         nproc          number of processors
241!!                  matsize        size of matrix
242!!
243!!    Output        nbl              block size
244!!                  nprow            processor grid row
245!!                  npcol            processor grid column
246  subroutine layout_scalapack(matsize, nbl, nproc, nprow, npcol)
247    integer, intent(in) :: matsize
248    integer, intent(out) :: nbl
249    integer, intent(in) :: nproc
250    integer, intent(out) :: nprow, npcol
251
252    integer :: i
253
254    PUSH_SUB(layout_scalapack)
255
256!------------------
257! Find processor grid
258
259    nprow = int(sqrt(dble(nproc) + 1.0d-6))
260
261    do i = nprow, 1, -1
262      if(mod(nproc, i) .eq. 0) exit
263    enddo
264
265    nprow = i
266    npcol = nproc/nprow
267
268!-------------------
269! Now for the block size
270
271    nbl = min(32, matsize/(max(nprow, npcol)))
272
273!-------------------
274! Ensure nonzero
275
276    nbl = max(nbl, 1)
277
278    POP_SUB(layout_scalapack)
279
280    return
281  end subroutine layout_scalapack
282
283!--------------------------------------------------------------------------
284  subroutine blacs_setup(scal, matsize, is_row_order,nppgroup_f,nfreq_group,np_left)
285    type(scalapack), intent(inout) :: scal !< other elements might have been set earlier
286    integer, intent(in) :: matsize
287    logical, intent(in) :: is_row_order
288    integer, intent(in),optional :: nppgroup_f !< # of proc. per freq. group
289    integer, intent(in),optional :: nfreq_group !< number of parallel frequencies
290    integer, intent(in),optional :: np_left !< # of proc. leftover for parallel frequencies
291
292    character :: order
293    integer :: iw,ir,ic,npe
294    logical :: custom_grid
295    integer,allocatable :: usermap(:,:)
296
297    PUSH_SUB(blacs_setup)
298
299#ifdef USESCALAPACK
300
301    npe = peinf%npes
302
303    if (present(nppgroup_f)) then
304      npe = nppgroup_f
305    endif
306
307! DVF : For the group of leftover processors for parallel frequencies
308! We need this value for npe in order to get layout_scalapack to run
309! right, while we still use npp_group_f when defining usermap to get
310! the right processors id`s. Complicated.
311    if (present(np_left)) then
312      npe = np_left
313    endif
314
315    call layout_scalapack(matsize, scal%nbl, npe, &
316      scal%nprow, scal%npcol)
317
318    custom_grid=.false.
319    if (present(nfreq_group)) then
320      custom_grid = nfreq_group > 1
321    endif
322
323    ! FHJ: only bother to create a custom grid if we are doing parallel freqs.
324    if (custom_grid) then
325
326      if(is_row_order) then
327        call die("grid map requires a column-major grid defined in usermap")
328      endif
329
330      SAFE_ALLOCATE(usermap,(scal%nprow,scal%npcol))
331
332      usermap=0
333      if (.not. present(np_left)) then
334        iw = 1 + peinf%inode/nppgroup_f
335      else
336        iw = 1 + nfreq_group   ! DVF: All processors leftover are in same group, so
337      endif                    ! so there`s no dependence on peinf%inode needed.
338
339      call blacs_get(-1, 0, scal%icntxt)
340
341      do ir=1,scal%nprow
342        do ic=1,scal%npcol
343          usermap(ir,ic)=(iw-1)*nppgroup_f+(ic-1)*scal%nprow+ir-1
344        enddo
345      enddo
346
347      call blacs_gridmap(scal%icntxt,usermap,scal%nprow,scal%nprow, scal%npcol)
348      SAFE_DEALLOCATE(usermap)
349      call blacs_gridinfo(scal%icntxt, scal%nprow, scal%npcol, scal%myprow, scal%mypcol)
350      if (peinf%verb_debug) then
351        ! FHJ: FIXME: barriers are ugly, and this is only a temporary hack to
352        !      make the output more readable. This block should be removed for
353        !      good as soon as we are confident that BLACS is no longer playing
354        !      tricks on us.
355        call MPI_Barrier(MPI_COMM_WORLD, mpierr)
356        write(*,'(4(a,i5,1x))') 'rank=', peinf%inode, 'freq. group=', iw, &
357          'row=', scal%myprow, 'col=', scal%mypcol
358        call MPI_Barrier(MPI_COMM_WORLD, mpierr)
359      endif
360    else
361
362      if(is_row_order) then
363        order = 'r'
364      else
365        order = 'c'
366      endif
367      call blacs_get(-1, 0, scal%icntxt)
368      call blacs_gridinit(scal%icntxt, order, scal%nprow, scal%npcol)
369      call blacs_gridinfo(scal%icntxt, scal%nprow, scal%npcol, scal%myprow, scal%mypcol)
370
371    endif
372    scal%npr = numroc(matsize, scal%nbl, scal%myprow, 0, scal%nprow)
373    scal%npc = numroc(matsize, scal%nbl, scal%mypcol, 0, scal%npcol)
374
375    if (peinf%inode.eq.0) then
376      write(6,*)
377      write(6,'(1x,a,i3,a,i3,a,i4)') 'BLACS processor grid: ', scal%nprow, &
378        ' x ', scal%npcol, '; BLOCKSIZE = ', scal%nbl
379      write(6,*)
380    endif
381
382    if(scal%myprow.eq.-1) then
383      call die('BLACS initialization returned myprow = -1')
384    endif
385
386#else
387    scal%npr=matsize
388    scal%npc=matsize
389    scal%nbl=matsize
390    scal%nprow=1
391    scal%npcol=1
392    scal%myprow=0
393    scal%mypcol=0
394#endif
395
396    POP_SUB(blacs_setup)
397    return
398  end subroutine blacs_setup
399
400
401end module scalapack_m
402