1*> \brief \b CSYTRF_AA
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download CSYTRF_AA + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/csytrf_aa.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/csytrf_aa.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/csytrf_aa.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE CSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
22*
23*       .. Scalar Arguments ..
24*       CHARACTER          UPLO
25*       INTEGER            N, LDA, LWORK, INFO
26*       ..
27*       .. Array Arguments ..
28*       INTEGER            IPIV( * )
29*       COMPLEX            A( LDA, * ), WORK( * )
30*       ..
31*
32*> \par Purpose:
33*  =============
34*>
35*> \verbatim
36*>
37*> CSYTRF_AA computes the factorization of a complex symmetric matrix A
38*> using the Aasen's algorithm.  The form of the factorization is
39*>
40*>    A = U**T*T*U  or  A = L*T*L**T
41*>
42*> where U (or L) is a product of permutation and unit upper (lower)
43*> triangular matrices, and T is a complex symmetric tridiagonal matrix.
44*>
45*> This is the blocked version of the algorithm, calling Level 3 BLAS.
46*> \endverbatim
47*
48*  Arguments:
49*  ==========
50*
51*> \param[in] UPLO
52*> \verbatim
53*>          UPLO is CHARACTER*1
54*>          = 'U':  Upper triangle of A is stored;
55*>          = 'L':  Lower triangle of A is stored.
56*> \endverbatim
57*>
58*> \param[in] N
59*> \verbatim
60*>          N is INTEGER
61*>          The order of the matrix A.  N >= 0.
62*> \endverbatim
63*>
64*> \param[in,out] A
65*> \verbatim
66*>          A is COMPLEX array, dimension (LDA,N)
67*>          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
68*>          N-by-N upper triangular part of A contains the upper
69*>          triangular part of the matrix A, and the strictly lower
70*>          triangular part of A is not referenced.  If UPLO = 'L', the
71*>          leading N-by-N lower triangular part of A contains the lower
72*>          triangular part of the matrix A, and the strictly upper
73*>          triangular part of A is not referenced.
74*>
75*>          On exit, the tridiagonal matrix is stored in the diagonals
76*>          and the subdiagonals of A just below (or above) the diagonals,
77*>          and L is stored below (or above) the subdiaonals, when UPLO
78*>          is 'L' (or 'U').
79*> \endverbatim
80*>
81*> \param[in] LDA
82*> \verbatim
83*>          LDA is INTEGER
84*>          The leading dimension of the array A.  LDA >= max(1,N).
85*> \endverbatim
86*>
87*> \param[out] IPIV
88*> \verbatim
89*>          IPIV is INTEGER array, dimension (N)
90*>          On exit, it contains the details of the interchanges, i.e.,
91*>          the row and column k of A were interchanged with the
92*>          row and column IPIV(k).
93*> \endverbatim
94*>
95*> \param[out] WORK
96*> \verbatim
97*>          WORK is COMPLEX array, dimension (MAX(1,LWORK))
98*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
99*> \endverbatim
100*>
101*> \param[in] LWORK
102*> \verbatim
103*>          LWORK is INTEGER
104*>          The length of WORK. LWORK >= MAX(1,2*N). For optimum performance
105*>          LWORK >= N*(1+NB), where NB is the optimal blocksize.
106*>
107*>          If LWORK = -1, then a workspace query is assumed; the routine
108*>          only calculates the optimal size of the WORK array, returns
109*>          this value as the first entry of the WORK array, and no error
110*>          message related to LWORK is issued by XERBLA.
111*> \endverbatim
112*>
113*> \param[out] INFO
114*> \verbatim
115*>          INFO is INTEGER
116*>          = 0:  successful exit
117*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
118*> \endverbatim
119*
120*  Authors:
121*  ========
122*
123*> \author Univ. of Tennessee
124*> \author Univ. of California Berkeley
125*> \author Univ. of Colorado Denver
126*> \author NAG Ltd.
127*
128*> \ingroup complexSYcomputational
129*
130*  =====================================================================
131      SUBROUTINE CSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)
132*
133*  -- LAPACK computational routine --
134*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
135*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
136*
137      IMPLICIT NONE
138*
139*     .. Scalar Arguments ..
140      CHARACTER          UPLO
141      INTEGER            N, LDA, LWORK, INFO
142*     ..
143*     .. Array Arguments ..
144      INTEGER            IPIV( * )
145      COMPLEX            A( LDA, * ), WORK( * )
146*     ..
147*
148*  =====================================================================
149*     .. Parameters ..
150      COMPLEX            ZERO, ONE
151      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
152*
153*     .. Local Scalars ..
154      LOGICAL            LQUERY, UPPER
155      INTEGER            J, LWKOPT
156      INTEGER            NB, MJ, NJ, K1, K2, J1, J2, J3, JB
157      COMPLEX            ALPHA
158*     ..
159*     .. External Functions ..
160      LOGICAL            LSAME
161      INTEGER            ILAENV
162      EXTERNAL           LSAME, ILAENV
163*     ..
164*     .. External Subroutines ..
165      EXTERNAL           CLASYF_AA, CGEMM, CGEMV, CSCAL, CSWAP, CCOPY,
166     $                   XERBLA
167*     ..
168*     .. Intrinsic Functions ..
169      INTRINSIC          MAX
170*     ..
171*     .. Executable Statements ..
172*
173*     Determine the block size
174*
175      NB = ILAENV( 1, 'CSYTRF_AA', UPLO, N, -1, -1, -1 )
176*
177*     Test the input parameters.
178*
179      INFO = 0
180      UPPER = LSAME( UPLO, 'U' )
181      LQUERY = ( LWORK.EQ.-1 )
182      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
183         INFO = -1
184      ELSE IF( N.LT.0 ) THEN
185         INFO = -2
186      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
187         INFO = -4
188      ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN
189         INFO = -7
190      END IF
191*
192      IF( INFO.EQ.0 ) THEN
193         LWKOPT = (NB+1)*N
194         WORK( 1 ) = LWKOPT
195      END IF
196*
197      IF( INFO.NE.0 ) THEN
198         CALL XERBLA( 'CSYTRF_AA', -INFO )
199         RETURN
200      ELSE IF( LQUERY ) THEN
201         RETURN
202      END IF
203*
204*     Quick return
205*
206      IF ( N.EQ.0 ) THEN
207          RETURN
208      ENDIF
209      IPIV( 1 ) = 1
210      IF ( N.EQ.1 ) THEN
211         RETURN
212      END IF
213*
214*     Adjust block size based on the workspace size
215*
216      IF( LWORK.LT.((1+NB)*N) ) THEN
217         NB = ( LWORK-N ) / N
218      END IF
219*
220      IF( UPPER ) THEN
221*
222*        .....................................................
223*        Factorize A as U**T*D*U using the upper triangle of A
224*        .....................................................
225*
226*        Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
227*
228         CALL CCOPY( N, A( 1, 1 ), LDA, WORK( 1 ), 1 )
229*
230*        J is the main loop index, increasing from 1 to N in steps of
231*        JB, where JB is the number of columns factorized by CLASYF;
232*        JB is either NB, or N-J+1 for the last block
233*
234         J = 0
235 10      CONTINUE
236         IF( J.GE.N )
237     $      GO TO 20
238*
239*        each step of the main loop
240*         J is the last column of the previous panel
241*         J1 is the first column of the current panel
242*         K1 identifies if the previous column of the panel has been
243*          explicitly stored, e.g., K1=1 for the first panel, and
244*          K1=0 for the rest
245*
246         J1 = J + 1
247         JB = MIN( N-J1+1, NB )
248         K1 = MAX(1, J)-J
249*
250*        Panel factorization
251*
252         CALL CLASYF_AA( UPLO, 2-K1, N-J, JB,
253     $                   A( MAX(1, J), J+1 ), LDA,
254     $                   IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) )
255*
256*        Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
257*
258         DO J2 = J+2, MIN(N, J+JB+1)
259            IPIV( J2 ) = IPIV( J2 ) + J
260            IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN
261               CALL CSWAP( J1-K1-2, A( 1, J2 ), 1,
262     $                              A( 1, IPIV(J2) ), 1 )
263            END IF
264         END DO
265         J = J + JB
266*
267*        Trailing submatrix update, where
268*         the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
269*         WORK stores the current block of the auxiriarly matrix H
270*
271         IF( J.LT.N ) THEN
272*
273*           If first panel and JB=1 (NB=1), then nothing to do
274*
275            IF( J1.GT.1 .OR. JB.GT.1 ) THEN
276*
277*              Merge rank-1 update with BLAS-3 update
278*
279               ALPHA = A( J, J+1 )
280               A( J, J+1 ) = ONE
281               CALL CCOPY( N-J, A( J-1, J+1 ), LDA,
282     $                          WORK( (J+1-J1+1)+JB*N ), 1 )
283               CALL CSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 )
284*
285*              K1 identifies if the previous column of the panel has been
286*               explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
287*               while K1=0 and K2=1 for the rest
288*
289               IF( J1.GT.1 ) THEN
290*
291*                 Not first panel
292*
293                  K2 = 1
294               ELSE
295*
296*                 First panel
297*
298                  K2 = 0
299*
300*                 First update skips the first column
301*
302                  JB = JB - 1
303               END IF
304*
305               DO J2 = J+1, N, NB
306                  NJ = MIN( NB, N-J2+1 )
307*
308*                 Update (J2, J2) diagonal block with CGEMV
309*
310                  J3 = J2
311                  DO MJ = NJ-1, 1, -1
312                     CALL CGEMV( 'No transpose', MJ, JB+1,
313     $                          -ONE, WORK( J3-J1+1+K1*N ), N,
314     $                                A( J1-K2, J3 ), 1,
315     $                           ONE, A( J3, J3 ), LDA )
316                     J3 = J3 + 1
317                  END DO
318*
319*                 Update off-diagonal block of J2-th block row with CGEMM
320*
321                  CALL CGEMM( 'Transpose', 'Transpose',
322     $                        NJ, N-J3+1, JB+1,
323     $                       -ONE, A( J1-K2, J2 ), LDA,
324     $                             WORK( J3-J1+1+K1*N ), N,
325     $                        ONE, A( J2, J3 ), LDA )
326               END DO
327*
328*              Recover T( J, J+1 )
329*
330               A( J, J+1 ) = ALPHA
331            END IF
332*
333*           WORK(J+1, 1) stores H(J+1, 1)
334*
335            CALL CCOPY( N-J, A( J+1, J+1 ), LDA, WORK( 1 ), 1 )
336         END IF
337         GO TO 10
338      ELSE
339*
340*        .....................................................
341*        Factorize A as L*D*L**T using the lower triangle of A
342*        .....................................................
343*
344*        copy first column A(1:N, 1) into H(1:N, 1)
345*         (stored in WORK(1:N))
346*
347         CALL CCOPY( N, A( 1, 1 ), 1, WORK( 1 ), 1 )
348*
349*        J is the main loop index, increasing from 1 to N in steps of
350*        JB, where JB is the number of columns factorized by CLASYF;
351*        JB is either NB, or N-J+1 for the last block
352*
353         J = 0
354 11      CONTINUE
355         IF( J.GE.N )
356     $      GO TO 20
357*
358*        each step of the main loop
359*         J is the last column of the previous panel
360*         J1 is the first column of the current panel
361*         K1 identifies if the previous column of the panel has been
362*          explicitly stored, e.g., K1=1 for the first panel, and
363*          K1=0 for the rest
364*
365         J1 = J+1
366         JB = MIN( N-J1+1, NB )
367         K1 = MAX(1, J)-J
368*
369*        Panel factorization
370*
371         CALL CLASYF_AA( UPLO, 2-K1, N-J, JB,
372     $                   A( J+1, MAX(1, J) ), LDA,
373     $                   IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) )
374*
375*        Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
376*
377         DO J2 = J+2, MIN(N, J+JB+1)
378            IPIV( J2 ) = IPIV( J2 ) + J
379            IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN
380               CALL CSWAP( J1-K1-2, A( J2, 1 ), LDA,
381     $                              A( IPIV(J2), 1 ), LDA )
382            END IF
383         END DO
384         J = J + JB
385*
386*        Trailing submatrix update, where
387*          A(J2+1, J1-1) stores L(J2+1, J1) and
388*          WORK(J2+1, 1) stores H(J2+1, 1)
389*
390         IF( J.LT.N ) THEN
391*
392*           if first panel and JB=1 (NB=1), then nothing to do
393*
394            IF( J1.GT.1 .OR. JB.GT.1 ) THEN
395*
396*              Merge rank-1 update with BLAS-3 update
397*
398               ALPHA = A( J+1, J )
399               A( J+1, J ) = ONE
400               CALL CCOPY( N-J, A( J+1, J-1 ), 1,
401     $                          WORK( (J+1-J1+1)+JB*N ), 1 )
402               CALL CSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 )
403*
404*              K1 identifies if the previous column of the panel has been
405*               explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
406*               while K1=0 and K2=1 for the rest
407*
408               IF( J1.GT.1 ) THEN
409*
410*                 Not first panel
411*
412                  K2 = 1
413               ELSE
414*
415*                 First panel
416*
417                  K2 = 0
418*
419*                 First update skips the first column
420*
421                  JB = JB - 1
422               END IF
423*
424               DO J2 = J+1, N, NB
425                  NJ = MIN( NB, N-J2+1 )
426*
427*                 Update (J2, J2) diagonal block with CGEMV
428*
429                  J3 = J2
430                  DO MJ = NJ-1, 1, -1
431                     CALL CGEMV( 'No transpose', MJ, JB+1,
432     $                          -ONE, WORK( J3-J1+1+K1*N ), N,
433     $                                A( J3, J1-K2 ), LDA,
434     $                           ONE, A( J3, J3 ), 1 )
435                     J3 = J3 + 1
436                  END DO
437*
438*                 Update off-diagonal block in J2-th block column with CGEMM
439*
440                  CALL CGEMM( 'No transpose', 'Transpose',
441     $                        N-J3+1, NJ, JB+1,
442     $                       -ONE, WORK( J3-J1+1+K1*N ), N,
443     $                             A( J2, J1-K2 ), LDA,
444     $                        ONE, A( J3, J2 ), LDA )
445               END DO
446*
447*              Recover T( J+1, J )
448*
449               A( J+1, J ) = ALPHA
450            END IF
451*
452*           WORK(J+1, 1) stores H(J+1, 1)
453*
454            CALL CCOPY( N-J, A( J+1, J+1 ), 1, WORK( 1 ), 1 )
455         END IF
456         GO TO 11
457      END IF
458*
459   20 CONTINUE
460      WORK( 1 ) = LWKOPT
461      RETURN
462*
463*     End of CSYTRF_AA
464*
465      END
466