1      SUBROUTINE PDLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND,
2     $                    AMAX, EQUED )
3*
4*  -- ScaLAPACK auxiliary routine (version 1.7) --
5*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6*     and University of California, Berkeley.
7*     May 1, 1997
8*
9*     .. Scalar Arguments ..
10      CHARACTER          EQUED, UPLO
11      INTEGER            IA, JA, N
12      DOUBLE PRECISION   AMAX, SCOND
13*     ..
14*     .. Array Arguments ..
15      INTEGER            DESCA( * )
16      DOUBLE PRECISION   A( * ), SC( * ), SR( * )
17*     ..
18*
19*  Purpose
20*  =======
21*
22*  PDLAQSY equilibrates a symmetric distributed matrix
23*  sub( A ) = A(IA:IA+N-1,JA:JA+N-1) using the scaling factors in the
24*  vectors SR and SC.
25*
26*  Notes
27*  =====
28*
29*  Each global data object is described by an associated description
30*  vector.  This vector stores the information required to establish
31*  the mapping between an object element and its corresponding process
32*  and memory location.
33*
34*  Let A be a generic term for any 2D block cyclicly distributed array.
35*  Such a global array has an associated description vector DESCA.
36*  In the following comments, the character _ should be read as
37*  "of the global array".
38*
39*  NOTATION        STORED IN      EXPLANATION
40*  --------------- -------------- --------------------------------------
41*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
42*                                 DTYPE_A = 1.
43*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
44*                                 the BLACS process grid A is distribu-
45*                                 ted over. The context itself is glo-
46*                                 bal, but the handle (the integer
47*                                 value) may vary.
48*  M_A    (global) DESCA( M_ )    The number of rows in the global
49*                                 array A.
50*  N_A    (global) DESCA( N_ )    The number of columns in the global
51*                                 array A.
52*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
53*                                 the rows of the array.
54*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
55*                                 the columns of the array.
56*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
57*                                 row of the array A is distributed.
58*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
59*                                 first column of the array A is
60*                                 distributed.
61*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
62*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
63*
64*  Let K be the number of rows or columns of a distributed matrix,
65*  and assume that its process grid has dimension p x q.
66*  LOCr( K ) denotes the number of elements of K that a process
67*  would receive if K were distributed over the p processes of its
68*  process column.
69*  Similarly, LOCc( K ) denotes the number of elements of K that a
70*  process would receive if K were distributed over the q processes of
71*  its process row.
72*  The values of LOCr() and LOCc() may be determined via a call to the
73*  ScaLAPACK tool function, NUMROC:
74*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
75*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
76*  An upper bound for these quantities may be computed by:
77*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
78*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
79*
80*  Arguments
81*  =========
82*
83*  UPLO    (global input) CHARACTER
84*          Specifies whether the upper or lower triangular part of the
85*          symmetric distributed matrix sub( A ) is to be referenced:
86*             = 'U':  Upper triangular
87*             = 'L':  Lower triangular
88*
89*  N       (global input) INTEGER
90*          The number of rows and columns to be operated on, i.e. the
91*          order of the distributed submatrix sub( A ). N >= 0.
92*
93*  A       (input/output) DOUBLE PRECISION pointer into the local
94*          memory to an array of local dimension (LLD_A,LOCc(JA+N-1)).
95*          On entry, the local pieces of the distributed symmetric
96*          matrix sub( A ). If UPLO = 'U', the leading N-by-N upper
97*          triangular part of sub( A ) contains the upper triangular
98*          part of the matrix, and the strictly lower triangular part
99*          of sub( A ) is not referenced.  If UPLO = 'L', the leading
100*          N-by-N lower triangular part of sub( A ) contains the lower
101*          triangular part of the matrix, and the strictly upper trian-
102*          gular part of sub( A ) is not referenced.
103*          On exit, if EQUED = 'Y', the equilibrated matrix:
104*              diag(SR(IA:IA+N-1)) * sub( A ) * diag(SC(JA:JA+N-1)).
105*
106*  IA      (global input) INTEGER
107*          The row index in the global array A indicating the first
108*          row of sub( A ).
109*
110*  JA      (global input) INTEGER
111*          The column index in the global array A indicating the
112*          first column of sub( A ).
113*
114*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
115*          The array descriptor for the distributed matrix A.
116*
117*  SR      (local input) DOUBLE PRECISION array, dimension LOCr(M_A)
118*          The scale factors for A(IA:IA+M-1,JA:JA+N-1). SR is aligned
119*          with the distributed matrix A, and replicated across every
120*          process column. SR is tied to the distributed matrix A.
121*
122*  SC      (local input) DOUBLE PRECISION array, dimension LOCc(N_A)
123*          The scale factors for sub( A ). SC is aligned with the dis-
124*          tributed matrix A, and replicated down every process row.
125*          SC is tied to the distributed matrix A.
126*
127*  SCOND   (global input) DOUBLE PRECISION
128*          Ratio of the smallest SR(i) (respectively SC(j)) to the
129*          largest SR(i) (respectively SC(j)), with IA <= i <= IA+N-1
130*          and JA <= j <= JA+N-1.
131*
132*  AMAX    (global input) DOUBLE PRECISION
133*          Absolute value of the largest distributed submatrix entry.
134*
135*  EQUED   (output) CHARACTER*1
136*          Specifies whether or not equilibration was done.
137*          = 'N':  No equilibration.
138*          = 'Y':  Equilibration was done, i.e., sub( A ) has been re-
139*                  placed by:
140*                  diag(SR(IA:IA+N-1)) * sub( A ) * diag(SC(JA:JA+N-1)).
141*
142*  Internal Parameters
143*  ===================
144*
145*  THRESH is a threshold value used to decide if scaling should be done
146*  based on the ratio of the scaling factors.  If SCOND < THRESH,
147*  scaling is done.
148*
149*  LARGE and SMALL are threshold values used to decide if scaling should
150*  be done based on the absolute size of the largest matrix element.
151*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
152*
153*  =====================================================================
154*
155*     .. Parameters ..
156      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
157     $                   LLD_, MB_, M_, NB_, N_, RSRC_
158      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
159     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
160     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
161      DOUBLE PRECISION   ONE, THRESH
162      PARAMETER          ( ONE = 1.0D+0, THRESH = 0.1D+0 )
163*     ..
164*     .. Local Scalars ..
165      INTEGER            IACOL, IAROW, ICTXT, II, IIA, IOFFA, IROFF, J,
166     $                   JB, JJ, JJA, JN, KK, LDA, LL, MYCOL, MYROW, NP,
167     $                   NPCOL, NPROW
168      DOUBLE PRECISION   CJ, LARGE, SMALL
169*     ..
170*     .. External Subroutines ..
171      EXTERNAL           BLACS_GRIDINFO, INFOG2L
172*     ..
173*     .. External Functions ..
174      LOGICAL            LSAME
175      INTEGER            ICEIL, NUMROC
176      DOUBLE PRECISION   PDLAMCH
177      EXTERNAL           ICEIL, LSAME, NUMROC, PDLAMCH
178*     ..
179*     .. Intrinsic Functions ..
180      INTRINSIC          MIN, MOD
181*     ..
182*     .. Executable Statements ..
183*
184*     Quick return if possible
185*
186      IF( N.LE.0 ) THEN
187         EQUED = 'N'
188         RETURN
189      END IF
190*
191*     Get grid parameters and compute local indexes
192*
193      ICTXT = DESCA( CTXT_ )
194      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
195      CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA,
196     $              IAROW, IACOL )
197      LDA = DESCA( LLD_ )
198*
199*     Initialize LARGE and SMALL.
200*
201      SMALL = PDLAMCH( ICTXT, 'Safe minimum' ) /
202     $        PDLAMCH( ICTXT, 'Precision' )
203      LARGE = ONE / SMALL
204*
205      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
206*
207*        No equilibration
208*
209         EQUED = 'N'
210*
211      ELSE
212*
213         II = IIA
214         JJ = JJA
215         JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 )
216         JB = JN-JA+1
217*
218*        Replace A by diag(S) * A * diag(S).
219*
220         IF( LSAME( UPLO, 'U' ) ) THEN
221*
222*           Upper triangle of A(IA:IA+N-1,JA:JA+N-1) is stored.
223*           Handle first block separately
224*
225            IOFFA = (JJ-1)*LDA
226            IF( MYCOL.EQ.IACOL ) THEN
227               IF( MYROW.EQ.IAROW ) THEN
228                  DO 20 LL = JJ, JJ + JB -1
229                     CJ = SC( LL )
230                     DO 10 KK = IIA, II+LL-JJ+1
231                        A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK )
232   10                CONTINUE
233                     IOFFA = IOFFA + LDA
234   20             CONTINUE
235               ELSE
236                  IOFFA = IOFFA + JB*LDA
237               END IF
238               JJ = JJ + JB
239            END IF
240*
241            IF( MYROW.EQ.IAROW )
242     $         II = II + JB
243            IAROW = MOD( IAROW+1, NPROW )
244            IACOL = MOD( IACOL+1, NPCOL )
245*
246*           Loop over remaining block of columns
247*
248            DO 70 J = JN+1, JA+N-1, DESCA( NB_ )
249               JB = MIN( JA+N-J, DESCA( NB_ ) )
250*
251               IF( MYCOL.EQ.IACOL ) THEN
252                  IF( MYROW.EQ.IAROW ) THEN
253                     DO 40 LL = JJ, JJ + JB -1
254                        CJ = SC( LL )
255                        DO 30 KK = IIA, II+LL-JJ+1
256                           A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK )
257   30                   CONTINUE
258                        IOFFA = IOFFA + LDA
259   40                CONTINUE
260                  ELSE
261                     DO 60 LL = JJ, JJ + JB -1
262                        CJ = SC( LL )
263                        DO 50 KK = IIA, II-1
264                           A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK )
265   50                   CONTINUE
266                        IOFFA = IOFFA + LDA
267   60                CONTINUE
268                  END IF
269                  JJ = JJ + JB
270               END IF
271*
272               IF( MYROW.EQ.IAROW )
273     $            II = II + JB
274               IAROW = MOD( IAROW+1, NPROW )
275               IACOL = MOD( IACOL+1, NPCOL )
276*
277   70       CONTINUE
278*
279         ELSE
280*
281*           Lower triangle of A(IA:IA+N-1,JA:JA+N-1) is stored.
282*           Handle first block separately
283*
284            IROFF = MOD( IA-1, DESCA( MB_ ) )
285            NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW )
286            IF( MYROW.EQ.IAROW )
287     $         NP = NP-IROFF
288*
289            IOFFA = (JJ-1)*LDA
290            IF( MYCOL.EQ.IACOL ) THEN
291               IF( MYROW.EQ.IAROW ) THEN
292                  DO 90 LL = JJ, JJ + JB -1
293                     CJ = SC( LL )
294                     DO 80 KK = II+LL-JJ, IIA+NP-1
295                        A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK )
296   80                CONTINUE
297                     IOFFA = IOFFA + LDA
298   90             CONTINUE
299               ELSE
300                  DO 110 LL = JJ, JJ + JB -1
301                     CJ = SC( LL )
302                     DO 100 KK = II, IIA+NP-1
303                        A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK )
304  100                CONTINUE
305                     IOFFA = IOFFA + LDA
306  110             CONTINUE
307               END IF
308               JJ = JJ + JB
309            END IF
310*
311            IF( MYROW.EQ.IAROW )
312     $         II = II + JB
313            IAROW = MOD( IAROW+1, NPROW )
314            IACOL = MOD( IACOL+1, NPCOL )
315*
316*           Loop over remaining block of columns
317*
318            DO 160 J = JN+1, JA+N-1, DESCA( NB_ )
319               JB = MIN( JA+N-J, DESCA( NB_ ) )
320*
321               IF( MYCOL.EQ.IACOL ) THEN
322                  IF( MYROW.EQ.IAROW ) THEN
323                     DO 130 LL = JJ, JJ + JB -1
324                        CJ = SC( LL )
325                        DO 120 KK = II+LL-JJ, IIA+NP-1
326                           A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK )
327  120                   CONTINUE
328                        IOFFA = IOFFA + LDA
329  130                CONTINUE
330                  ELSE
331                     DO 150 LL = JJ, JJ + JB -1
332                        CJ = SC( LL )
333                        DO 140 KK = II, IIA+NP-1
334                           A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK )
335  140                   CONTINUE
336                        IOFFA = IOFFA + LDA
337  150                CONTINUE
338                  END IF
339                  JJ = JJ + JB
340               END IF
341*
342               IF( MYROW.EQ.IAROW )
343     $            II = II + JB
344               IAROW = MOD( IAROW+1, NPROW )
345               IACOL = MOD( IACOL+1, NPCOL )
346*
347  160       CONTINUE
348*
349         END IF
350*
351         EQUED = 'Y'
352*
353      END IF
354*
355      RETURN
356*
357*     End of PDLAQSY
358*
359      END
360