1      SUBROUTINE PSPOTF2( UPLO, N, A, IA, JA, DESCA, INFO )
2*
3*  -- ScaLAPACK routine (version 1.7) --
4*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5*     and University of California, Berkeley.
6*     May 1, 1997
7*
8*     .. Scalar Arguments ..
9      CHARACTER          UPLO
10      INTEGER            IA, INFO, JA, N
11*     ..
12*     .. Array Arguments ..
13      INTEGER            DESCA( * )
14      REAL               A( * )
15*     ..
16*
17*  Purpose
18*  =======
19*
20*  PSPOTF2 computes the Cholesky factorization of a real symmetric
21*  positive definite distributed matrix sub( A )=A(IA:IA+N-1,JA:JA+N-1).
22*
23*  The factorization has the form
24*
25*            sub( A ) = U' * U ,  if UPLO = 'U', or
26*
27*            sub( A ) = L  * L',  if UPLO = 'L',
28*
29*  where U is an upper triangular matrix and L is lower triangular.
30*
31*  Notes
32*  =====
33*
34*  Each global data object is described by an associated description
35*  vector.  This vector stores the information required to establish
36*  the mapping between an object element and its corresponding process
37*  and memory location.
38*
39*  Let A be a generic term for any 2D block cyclicly distributed array.
40*  Such a global array has an associated description vector DESCA.
41*  In the following comments, the character _ should be read as
42*  "of the global array".
43*
44*  NOTATION        STORED IN      EXPLANATION
45*  --------------- -------------- --------------------------------------
46*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
47*                                 DTYPE_A = 1.
48*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
49*                                 the BLACS process grid A is distribu-
50*                                 ted over. The context itself is glo-
51*                                 bal, but the handle (the integer
52*                                 value) may vary.
53*  M_A    (global) DESCA( M_ )    The number of rows in the global
54*                                 array A.
55*  N_A    (global) DESCA( N_ )    The number of columns in the global
56*                                 array A.
57*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
58*                                 the rows of the array.
59*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
60*                                 the columns of the array.
61*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
62*                                 row of the array A is distributed.
63*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
64*                                 first column of the array A is
65*                                 distributed.
66*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
67*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
68*
69*  Let K be the number of rows or columns of a distributed matrix,
70*  and assume that its process grid has dimension p x q.
71*  LOCr( K ) denotes the number of elements of K that a process
72*  would receive if K were distributed over the p processes of its
73*  process column.
74*  Similarly, LOCc( K ) denotes the number of elements of K that a
75*  process would receive if K were distributed over the q processes of
76*  its process row.
77*  The values of LOCr() and LOCc() may be determined via a call to the
78*  ScaLAPACK tool function, NUMROC:
79*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
80*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
81*  An upper bound for these quantities may be computed by:
82*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
83*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
84*
85*  This routine requires N <= NB_A-MOD(JA-1, NB_A) and square block
86*  decomposition ( MB_A = NB_A ).
87*
88*  Arguments
89*  =========
90*
91*  UPLO    (global input) CHARACTER
92*          = 'U':  Upper triangle of sub( A ) is stored;
93*          = 'L':  Lower triangle of sub( A ) is stored.
94*
95*  N       (global input) INTEGER
96*          The number of rows and columns to be operated on, i.e. the
97*          order of the distributed submatrix sub( A ). N >= 0.
98*
99*  A       (local input/local output) REAL pointer into the
100*          local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
101*          On entry, this array contains the local pieces of the
102*          N-by-N symmetric distributed matrix sub( A ) to be factored.
103*          If UPLO = 'U', the leading N-by-N upper triangular part of
104*          sub( A ) contains the upper triangular part of the matrix,
105*          and its strictly lower triangular part is not referenced.
106*          If UPLO = 'L', the leading N-by-N lower triangular part of
107*          sub( A ) contains the lower triangular part of the distribu-
108*          ted matrix, and its strictly upper triangular part is not
109*          referenced.  On exit, if UPLO = 'U', the upper triangular
110*          part of the distributed matrix contains the Cholesky factor
111*          U, if UPLO = 'L', the lower triangular part of the distribu-
112*          ted matrix contains the Cholesky factor L.
113*
114*  IA      (global input) INTEGER
115*          The row index in the global array A indicating the first
116*          row of sub( A ).
117*
118*  JA      (global input) INTEGER
119*          The column index in the global array A indicating the
120*          first column of sub( A ).
121*
122*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
123*          The array descriptor for the distributed matrix A.
124*
125*  INFO    (local output) INTEGER
126*          = 0:  successful exit
127*          < 0:  If the i-th argument is an array and the j-entry had
128*                an illegal value, then INFO = -(i*100+j), if the i-th
129*                argument is a scalar and had an illegal value, then
130*                INFO = -i.
131*          > 0:  If INFO = K, the leading minor of order K,
132*                A(IA:IA+K-1,JA:JA+K-1) is not positive definite, and
133*                the factorization could not be completed.
134*
135*  =====================================================================
136*
137*     .. Parameters ..
138      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
139     $                   LLD_, MB_, M_, NB_, N_, RSRC_
140      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
141     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
142     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
143      REAL               ONE, ZERO
144      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
145*     ..
146*     .. Local Scalars ..
147      LOGICAL            UPPER
148      CHARACTER          COLBTOP, ROWBTOP
149      INTEGER            IACOL, IAROW, ICOFF, ICTXT, ICURR, IDIAG, IIA,
150     $                   IOFFA, IROFF, J, JJA, LDA, MYCOL, MYROW,
151     $                   NPCOL, NPROW
152      REAL               AJJ
153*     ..
154*     .. External Subroutines ..
155      EXTERNAL           BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, IGEBR2D,
156     $                   IGEBS2D, INFOG2L, PB_TOPGET, PXERBLA, SGEMV,
157     $                   SSCAL
158*     ..
159*     .. Intrinsic Functions ..
160      INTRINSIC          MOD, SQRT
161*     ..
162*     .. External Functions ..
163      LOGICAL            LSAME
164      REAL               SDOT
165      EXTERNAL           LSAME, SDOT
166*     ..
167*     .. Executable Statements ..
168*
169*     Get grid parameters
170*
171      ICTXT = DESCA( CTXT_ )
172      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
173*
174*     Test the input parameters.
175*
176      INFO = 0
177      IF( NPROW.EQ.-1 ) THEN
178         INFO = -(600+CTXT_)
179      ELSE
180         CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO )
181         IF( INFO.EQ.0 ) THEN
182            UPPER = LSAME( UPLO, 'U' )
183            IROFF = MOD( IA-1, DESCA( MB_ ) )
184            ICOFF = MOD( JA-1, DESCA( NB_ ) )
185            IF ( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
186               INFO = -1
187            ELSE IF( N+ICOFF.GT.DESCA( NB_ ) ) THEN
188               INFO = -2
189            ELSE IF( IROFF.NE.0 ) THEN
190               INFO = -4
191            ELSE IF( ICOFF.NE.0 ) THEN
192               INFO = -5
193            ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN
194               INFO = -(600+NB_)
195            END IF
196         END IF
197      END IF
198*
199      IF( INFO.NE.0 ) THEN
200         CALL PXERBLA( ICTXT, 'PSPOTF2', -INFO )
201         CALL BLACS_ABORT( ICTXT, 1 )
202         RETURN
203      END IF
204*
205*     Quick return if possible
206*
207      IF( N.EQ.0 )
208     $   RETURN
209*
210*     Compute local information
211*
212      CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA,
213     $              IAROW, IACOL )
214      CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP )
215      CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP )
216*
217      IF ( UPPER ) THEN
218*
219*        Process (IAROW, IACOL) owns block to be factorized
220*
221         IF( MYROW.EQ.IAROW ) THEN
222            IF( MYCOL.EQ.IACOL ) THEN
223*
224*              Compute the Cholesky factorization A = U'*U.
225*
226               LDA = DESCA( LLD_ )
227               IDIAG = IIA + ( JJA - 1 ) * LDA
228               IOFFA = IDIAG
229*
230               DO 10 J = JA, JA+N-1
231*
232*                 Compute U(J,J) and test for non-positive-definiteness.
233*
234                  AJJ = A( IDIAG ) -
235     $                  SDOT( J-JA, A( IOFFA ), 1, A( IOFFA ), 1 )
236                  IF( AJJ.LE.ZERO ) THEN
237                     A( IDIAG ) = AJJ
238                     INFO = J - JA + 1
239                     GO TO 20
240                  END IF
241                  AJJ = SQRT( AJJ )
242                  A( IDIAG ) = AJJ
243*
244*                 Compute elements J+1:JA+N-1 of row J.
245*
246                  IF( J.LT.JA+N-1 ) THEN
247                     ICURR = IDIAG + LDA
248                     CALL SGEMV( 'Transpose', J-JA, JA+N-J-1, -ONE,
249     $                           A( IOFFA+LDA ), LDA, A( IOFFA ), 1,
250     $                           ONE, A( ICURR ), LDA )
251                     CALL SSCAL( N-J+JA-1, ONE / AJJ, A( ICURR ), LDA )
252                  END IF
253                  IDIAG = IDIAG + LDA + 1
254                  IOFFA = IOFFA + LDA
255   10          CONTINUE
256*
257   20          CONTINUE
258*
259*              Broadcast INFO to all processes in my IAROW.
260*
261               CALL IGEBS2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1 )
262*
263            ELSE
264*
265               CALL IGEBR2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1,
266     $                       MYROW, IACOL )
267            END IF
268*
269*           IAROW bcasts along columns so that everyone has INFO
270*
271            CALL IGEBS2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO, 1 )
272*
273         ELSE
274*
275            CALL IGEBR2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO, 1,
276     $                    IAROW, MYCOL )
277*
278         END IF
279*
280      ELSE
281*
282*        Process (IAROW, IACOL) owns block to be factorized
283*
284         IF( MYCOL.EQ.IACOL ) THEN
285            IF( MYROW.EQ.IAROW ) THEN
286*
287*              Compute the Cholesky factorization A = L*L'.
288*
289               LDA = DESCA( LLD_ )
290               IDIAG = IIA + ( JJA - 1 ) * LDA
291               IOFFA = IDIAG
292*
293               DO 30 J = JA, JA+N-1
294*
295*                 Compute L(J,J) and test for non-positive-definiteness.
296*
297                  AJJ = A( IDIAG ) -
298     $                  SDOT( J-JA, A( IOFFA ), LDA, A( IOFFA ), LDA )
299                  IF ( AJJ.LE.ZERO ) THEN
300                     A( IDIAG ) = AJJ
301                     INFO = J - JA + 1
302                     GO TO 40
303                  END IF
304                  AJJ = SQRT( AJJ )
305                  A( IDIAG ) = AJJ
306*
307*                 Compute elements J+1:JA+N-1 of column J.
308*
309                  IF( J.LT.JA+N-1 ) THEN
310                     ICURR = IDIAG + 1
311                     CALL SGEMV( 'No transpose', JA+N-J-1, J-JA, -ONE,
312     $                           A( IOFFA+1 ), LDA, A( IOFFA ), LDA,
313     $                           ONE, A( ICURR ), 1 )
314                     CALL SSCAL( JA+N-J-1, ONE / AJJ, A( ICURR ), 1 )
315                  END IF
316                  IDIAG = IDIAG + LDA + 1
317                  IOFFA = IOFFA + 1
318   30          CONTINUE
319*
320   40          CONTINUE
321*
322*              Broadcast INFO to everyone in IACOL
323*
324               CALL IGEBS2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO,
325     $                       1 )
326*
327            ELSE
328*
329               CALL IGEBR2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO,
330     $                       1, IAROW, MYCOL )
331*
332            END IF
333*
334*           IACOL bcasts INFO along rows so that everyone has it
335*
336            CALL IGEBS2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1 )
337*
338         ELSE
339*
340            CALL IGEBR2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1,
341     $                    MYROW, IACOL )
342*
343         END IF
344*
345      END IF
346*
347      RETURN
348*
349*     End of PSPOTF2
350*
351      END
352