1*
2*
3      SUBROUTINE PDSYGS2( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB,
4     $                    DESCB, INFO )
5*
6*  -- ScaLAPACK routine (version 1.7) --
7*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8*     and University of California, Berkeley.
9*     May 1, 1997
10*
11*     .. Scalar Arguments ..
12      CHARACTER          UPLO
13      INTEGER            IA, IB, IBTYPE, INFO, JA, JB, N
14*     ..
15*     .. Array Arguments ..
16      INTEGER            DESCA( * ), DESCB( * )
17      DOUBLE PRECISION   A( * ), B( * )
18*     ..
19*
20*  Purpose
21*  =======
22*
23*  PDSYGS2 reduces a real symmetric-definite generalized eigenproblem
24*  to standard form.
25*
26*  In the following sub( A ) denotes A( IA:IA+N-1, JA:JA+N-1 ) and
27*  sub( B ) denotes B( IB:IB+N-1, JB:JB+N-1 ).
28*
29*  If IBTYPE = 1, the problem is sub( A )*x = lambda*sub( B )*x,
30*  and sub( A ) is overwritten by inv(U**T)*sub( A )*inv(U) or
31*  inv(L)*sub( A )*inv(L**T)
32*
33*  If IBTYPE = 2 or 3, the problem is sub( A )*sub( B )*x = lambda*x or
34*  sub( B )*sub( A )*x = lambda*x, and sub( A ) is overwritten by
35*  U*sub( A )*U**T or L**T*sub( A )*L.
36*
37*  sub( B ) must have been previously factorized as U**T*U or L*L**T by
38*  PDPOTRF.
39*
40*  Notes
41*  =====
42*
43*  Each global data object is described by an associated description
44*  vector.  This vector stores the information required to establish
45*  the mapping between an object element and its corresponding process
46*  and memory location.
47*
48*  Let A be a generic term for any 2D block cyclicly distributed array.
49*  Such a global array has an associated description vector DESCA.
50*  In the following comments, the character _ should be read as
51*  "of the global array".
52*
53*  NOTATION        STORED IN      EXPLANATION
54*  --------------- -------------- --------------------------------------
55*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
56*                                 DTYPE_A = 1.
57*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
58*                                 the BLACS process grid A is distribu-
59*                                 ted over. The context itself is glo-
60*                                 bal, but the handle (the integer
61*                                 value) may vary.
62*  M_A    (global) DESCA( M_ )    The number of rows in the global
63*                                 array A.
64*  N_A    (global) DESCA( N_ )    The number of columns in the global
65*                                 array A.
66*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
67*                                 the rows of the array.
68*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
69*                                 the columns of the array.
70*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
71*                                 row of the array A is distributed.
72*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
73*                                 first column of the array A is
74*                                 distributed.
75*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
76*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
77*
78*  Let K be the number of rows or columns of a distributed matrix,
79*  and assume that its process grid has dimension p x q.
80*  LOCr( K ) denotes the number of elements of K that a process
81*  would receive if K were distributed over the p processes of its
82*  process column.
83*  Similarly, LOCc( K ) denotes the number of elements of K that a
84*  process would receive if K were distributed over the q processes of
85*  its process row.
86*  The values of LOCr() and LOCc() may be determined via a call to the
87*  ScaLAPACK tool function, NUMROC:
88*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
89*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
90*  An upper bound for these quantities may be computed by:
91*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
92*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
93*
94*  Arguments
95*  =========
96*
97*  IBTYPE   (global input) INTEGER
98*          = 1: compute inv(U**T)*sub( A )*inv(U) or
99*               inv(L)*sub( A )*inv(L**T);
100*          = 2 or 3: compute U*sub( A )*U**T or L**T*sub( A )*L.
101*
102*  UPLO    (global input) CHARACTER
103*          = 'U':  Upper triangle of sub( A ) is stored and sub( B ) is
104*                  factored as U**T*U;
105*          = 'L':  Lower triangle of sub( A ) is stored and sub( B ) is
106*                  factored as L*L**T.
107*
108*  N       (global input) INTEGER
109*          The order of the matrices sub( A ) and sub( B ).  N >= 0.
110*
111*  A       (local input/local output) DOUBLE PRECISION pointer into the
112*          local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
113*          On entry, this array contains the local pieces of the
114*          N-by-N symmetric distributed matrix sub( A ). If UPLO = 'U',
115*          the leading N-by-N upper triangular part of sub( A ) contains
116*          the upper triangular part of the matrix, and its strictly
117*          lower triangular part is not referenced.  If UPLO = 'L', the
118*          leading N-by-N lower triangular part of sub( A ) contains
119*          the lower triangular part of the matrix, and its strictly
120*          upper triangular part is not referenced.
121*
122*          On exit, if INFO = 0, the transformed matrix, stored in the
123*          same format as sub( A ).
124*
125*  IA      (global input) INTEGER
126*          A's global row index, which points to the beginning of the
127*          submatrix which is to be operated on.
128*
129*  JA      (global input) INTEGER
130*          A's global column index, which points to the beginning of
131*          the submatrix which is to be operated on.
132*
133*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
134*          The array descriptor for the distributed matrix A.
135*
136*  B       (local input) DOUBLE PRECISION pointer into the local memory
137*          to an array of dimension (LLD_B, LOCc(JB+N-1)). On entry,
138*          this array contains the local pieces of the triangular factor
139*          from the Cholesky factorization of sub( B ), as returned by
140*          PDPOTRF.
141*
142*  IB      (global input) INTEGER
143*          B's global row index, which points to the beginning of the
144*          submatrix which is to be operated on.
145*
146*  JB      (global input) INTEGER
147*          B's global column index, which points to the beginning of
148*          the submatrix which is to be operated on.
149*
150*  DESCB   (global and local input) INTEGER array of dimension DLEN_.
151*          The array descriptor for the distributed matrix B.
152*
153*  INFO    (global output) INTEGER
154*          = 0:  successful exit
155*          < 0:  If the i-th argument is an array and the j-entry had
156*                an illegal value, then INFO = -(i*100+j), if the i-th
157*                argument is a scalar and had an illegal value, then
158*                INFO = -i.
159*
160*  =====================================================================
161*
162*     .. Parameters ..
163      INTEGER            BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
164     $                   MB_, NB_, RSRC_, CSRC_, LLD_
165      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
166     $                   CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
167     $                   RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
168      DOUBLE PRECISION   ONE, HALF
169      PARAMETER          ( ONE = 1.0D+0, HALF = 0.5D+0 )
170*     ..
171*     .. Local Scalars ..
172      LOGICAL            UPPER
173      INTEGER            IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB,
174     $                   ICTXT, IIA, IIB, IOFFA, IOFFB, IROFFA, IROFFB,
175     $                   JJA, JJB, K, LDA, LDB, MYCOL, MYROW, NPCOL,
176     $                   NPROW
177      DOUBLE PRECISION   AKK, BKK, CT
178*     ..
179*     .. External Subroutines ..
180      EXTERNAL           BLACS_EXIT, BLACS_GRIDINFO, CHK1MAT, DAXPY,
181     $                   DSCAL, DSYR2, DTRMV, DTRSV, INFOG2L, PXERBLA
182*     ..
183*     .. Intrinsic Functions ..
184      INTRINSIC          MOD
185*     ..
186*     .. External Functions ..
187      LOGICAL            LSAME
188      INTEGER            INDXG2P
189      EXTERNAL           LSAME, INDXG2P
190*     ..
191*     .. Executable Statements ..
192*       This is just to keep ftnchek happy
193      IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_*
194     $    RSRC_.LT.0 )RETURN
195*
196*     Get grid parameters
197*
198      ICTXT = DESCA( CTXT_ )
199      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
200*
201*     Test the input parameters.
202*
203      INFO = 0
204      IF( NPROW.EQ.-1 ) THEN
205         INFO = -( 700+CTXT_ )
206      ELSE
207         UPPER = LSAME( UPLO, 'U' )
208         CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO )
209         CALL CHK1MAT( N, 3, N, 3, IB, JB, DESCB, 11, INFO )
210         IF( INFO.EQ.0 ) THEN
211            IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ),
212     $              NPROW )
213            IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ),
214     $              NPROW )
215            IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ),
216     $              NPCOL )
217            IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ),
218     $              NPCOL )
219            IROFFA = MOD( IA-1, DESCA( MB_ ) )
220            ICOFFA = MOD( JA-1, DESCA( NB_ ) )
221            IROFFB = MOD( IB-1, DESCB( MB_ ) )
222            ICOFFB = MOD( JB-1, DESCB( NB_ ) )
223            IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN
224               INFO = -1
225            ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
226               INFO = -2
227            ELSE IF( N.LT.0 ) THEN
228               INFO = -3
229            ELSE IF( N+ICOFFA.GT.DESCA( NB_ ) ) THEN
230               INFO = -3
231            ELSE IF( IROFFA.NE.0 ) THEN
232               INFO = -5
233            ELSE IF( ICOFFA.NE.0 ) THEN
234               INFO = -6
235            ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN
236               INFO = -( 700+NB_ )
237            ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN
238               INFO = -9
239            ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN
240               INFO = -10
241            ELSE IF( DESCB( MB_ ).NE.DESCA( MB_ ) ) THEN
242               INFO = -( 1100+MB_ )
243            ELSE IF( DESCB( NB_ ).NE.DESCA( NB_ ) ) THEN
244               INFO = -( 1100+NB_ )
245            ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN
246               INFO = -( 1100+CTXT_ )
247            END IF
248         END IF
249      END IF
250*
251      IF( INFO.NE.0 ) THEN
252         CALL PXERBLA( ICTXT, 'PDSYGS2', -INFO )
253         CALL BLACS_EXIT( ICTXT )
254         RETURN
255      END IF
256*
257*     Quick return if possible
258*
259      IF( N.EQ.0 .OR. ( MYROW.NE.IAROW .OR. MYCOL.NE.IACOL ) )
260     $   RETURN
261*
262*     Compute local information
263*
264      LDA = DESCA( LLD_ )
265      LDB = DESCB( LLD_ )
266      CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA,
267     $              IAROW, IACOL )
268      CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIB, JJB,
269     $              IBROW, IBCOL )
270*
271      IF( IBTYPE.EQ.1 ) THEN
272*
273         IF( UPPER ) THEN
274*
275            IOFFA = IIA + JJA*LDA
276            IOFFB = IIB + JJB*LDB
277*
278*           Compute inv(U')*sub( A )*inv(U)
279*
280            DO 10 K = 1, N
281*
282*              Update the upper triangle of
283*              A(ia+k-1:ia+n-a,ia+k-1:ia+n-1)
284*
285               AKK = A( IOFFA-LDA )
286               BKK = B( IOFFB-LDB )
287               AKK = AKK / BKK**2
288               A( IOFFA-LDA ) = AKK
289               IF( K.LT.N ) THEN
290                  CALL DSCAL( N-K, ONE / BKK, A( IOFFA ), LDA )
291                  CT = -HALF*AKK
292                  CALL DAXPY( N-K, CT, B( IOFFB ), LDB, A( IOFFA ),
293     $                        LDA )
294                  CALL DSYR2( UPLO, N-K, -ONE, A( IOFFA ), LDA,
295     $                        B( IOFFB ), LDB, A( IOFFA+1 ), LDA )
296                  CALL DAXPY( N-K, CT, B( IOFFB ), LDB, A( IOFFA ),
297     $                        LDA )
298                  CALL DTRSV( UPLO, 'Transpose', 'Non-unit', N-K,
299     $                        B( IOFFB+1 ), LDB, A( IOFFA ), LDA )
300               END IF
301*
302*              A( IOFFA ) -> A( K, K+1 )
303*              B( IOFFB ) -> B( K, K+1 )
304*
305               IOFFA = IOFFA + LDA + 1
306               IOFFB = IOFFB + LDB + 1
307*
308   10       CONTINUE
309*
310         ELSE
311*
312            IOFFA = IIA + 1 + ( JJA-1 )*LDA
313            IOFFB = IIB + 1 + ( JJB-1 )*LDB
314*
315*           Compute inv(L)*sub( A )*inv(L')
316*
317            DO 20 K = 1, N
318*
319*              Update the lower triangle of
320*              A(ia+k-1:ia+n-a,ia+k-1:ia+n-1)
321*
322               AKK = A( IOFFA-1 )
323               BKK = B( IOFFB-1 )
324               AKK = AKK / BKK**2
325               A( IOFFA-1 ) = AKK
326*
327               IF( K.LT.N ) THEN
328                  CALL DSCAL( N-K, ONE / BKK, A( IOFFA ), 1 )
329                  CT = -HALF*AKK
330                  CALL DAXPY( N-K, CT, B( IOFFB ), 1, A( IOFFA ), 1 )
331                  CALL DSYR2( UPLO, N-K, -ONE, A( IOFFA ), 1,
332     $                        B( IOFFB ), 1, A( IOFFA+LDA ), LDA )
333                  CALL DAXPY( N-K, CT, B( IOFFB ), 1, A( IOFFA ), 1 )
334                  CALL DTRSV( UPLO, 'No transpose', 'Non-unit', N-K,
335     $                        B( IOFFB+LDB ), LDB, A( IOFFA ), 1 )
336               END IF
337*
338*              A( IOFFA ) -> A( K+1, K )
339*              B( IOFFB ) -> B( K+1, K )
340*
341               IOFFA = IOFFA + LDA + 1
342               IOFFB = IOFFB + LDB + 1
343*
344   20       CONTINUE
345*
346         END IF
347*
348      ELSE
349*
350         IF( UPPER ) THEN
351*
352            IOFFA = IIA + ( JJA-1 )*LDA
353            IOFFB = IIB + ( JJB-1 )*LDB
354*
355*           Compute U*sub( A )*U'
356*
357            DO 30 K = 1, N
358*
359*              Update the upper triangle of A(ia:ia+k-1,ja:ja+k-1)
360*
361               AKK = A( IOFFA+K-1 )
362               BKK = B( IOFFB+K-1 )
363               CALL DTRMV( UPLO, 'No transpose', 'Non-unit', K-1,
364     $                     B( IIB+( JJB-1 )*LDB ), LDB, A( IOFFA ), 1 )
365               CT = HALF*AKK
366               CALL DAXPY( K-1, CT, B( IOFFB ), 1, A( IOFFA ), 1 )
367               CALL DSYR2( UPLO, K-1, ONE, A( IOFFA ), 1, B( IOFFB ), 1,
368     $                     A( IIA+( JJA-1 )*LDA ), LDA )
369               CALL DAXPY( K-1, CT, B( IOFFB ), 1, A( IOFFA ), 1 )
370               CALL DSCAL( K-1, BKK, A( IOFFA ), 1 )
371               A( IOFFA+K-1 ) = AKK*BKK**2
372*
373*              A( IOFFA ) -> A( 1, K )
374*              B( IOFFB ) -> B( 1, K )
375*
376               IOFFA = IOFFA + LDA
377               IOFFB = IOFFB + LDB
378*
379   30       CONTINUE
380*
381         ELSE
382*
383            IOFFA = IIA + ( JJA-1 )*LDA
384            IOFFB = IIB + ( JJB-1 )*LDB
385*
386*           Compute L'*sub( A )*L
387*
388            DO 40 K = 1, N
389*
390*              Update the lower triangle of A(ia:ia+k-1,ja:ja+k-1)
391*
392               AKK = A( IOFFA+( K-1 )*LDA )
393               BKK = B( IOFFB+( K-1 )*LDB )
394               CALL DTRMV( UPLO, 'Transpose', 'Non-unit', K-1,
395     $                     B( IIB+( JJB-1 )*LDB ), LDB, A( IOFFA ),
396     $                     LDA )
397               CT = HALF*AKK
398               CALL DAXPY( K-1, CT, B( IOFFB ), LDB, A( IOFFA ), LDA )
399               CALL DSYR2( UPLO, K-1, ONE, A( IOFFA ), LDA, B( IOFFB ),
400     $                     LDB, A( IIA+( JJA-1 )*LDA ), LDA )
401               CALL DAXPY( K-1, CT, B( IOFFB ), LDB, A( IOFFA ), LDA )
402               CALL DSCAL( K-1, BKK, A( IOFFA ), LDA )
403               A( IOFFA+( K-1 )*LDA ) = AKK*BKK**2
404*
405*              A( IOFFA ) -> A( K, 1 )
406*              B( IOFFB ) -> B( K, 1 )
407*
408               IOFFA = IOFFA + 1
409               IOFFB = IOFFB + 1
410*
411   40       CONTINUE
412*
413         END IF
414*
415      END IF
416*
417      RETURN
418*
419*     End of PDSYGS2
420*
421      END
422