1*> \brief \b SSYTRI_3X
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download SSYTRI_3X + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssytri_3x.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssytri_3x.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssytri_3x.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE SSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
22*
23*       .. Scalar Arguments ..
24*       CHARACTER          UPLO
25*       INTEGER            INFO, LDA, N, NB
26*       ..
27*       .. Array Arguments ..
28*       INTEGER            IPIV( * )
29*       REAL               A( LDA, * ),  E( * ), WORK( N+NB+1, * )
30*       ..
31*
32*
33*> \par Purpose:
34*  =============
35*>
36*> \verbatim
37*> SSYTRI_3X computes the inverse of a real symmetric indefinite
38*> matrix A using the factorization computed by SSYTRF_RK or SSYTRF_BK:
39*>
40*>     A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T),
41*>
42*> where U (or L) is unit upper (or lower) triangular matrix,
43*> U**T (or L**T) is the transpose of U (or L), P is a permutation
44*> matrix, P**T is the transpose of P, and D is symmetric and block
45*> diagonal with 1-by-1 and 2-by-2 diagonal blocks.
46*>
47*> This is the blocked version of the algorithm, calling Level 3 BLAS.
48*> \endverbatim
49*
50*  Arguments:
51*  ==========
52*
53*> \param[in] UPLO
54*> \verbatim
55*>          UPLO is CHARACTER*1
56*>          Specifies whether the details of the factorization are
57*>          stored as an upper or lower triangular matrix.
58*>          = 'U':  Upper triangle of A is stored;
59*>          = 'L':  Lower triangle of A is stored.
60*> \endverbatim
61*>
62*> \param[in] N
63*> \verbatim
64*>          N is INTEGER
65*>          The order of the matrix A.  N >= 0.
66*> \endverbatim
67*>
68*> \param[in,out] A
69*> \verbatim
70*>          A is REAL array, dimension (LDA,N)
71*>          On entry, diagonal of the block diagonal matrix D and
72*>          factors U or L as computed by SYTRF_RK and SSYTRF_BK:
73*>            a) ONLY diagonal elements of the symmetric block diagonal
74*>               matrix D on the diagonal of A, i.e. D(k,k) = A(k,k);
75*>               (superdiagonal (or subdiagonal) elements of D
76*>                should be provided on entry in array E), and
77*>            b) If UPLO = 'U': factor U in the superdiagonal part of A.
78*>               If UPLO = 'L': factor L in the subdiagonal part of A.
79*>
80*>          On exit, if INFO = 0, the symmetric inverse of the original
81*>          matrix.
82*>             If UPLO = 'U': the upper triangular part of the inverse
83*>             is formed and the part of A below the diagonal is not
84*>             referenced;
85*>             If UPLO = 'L': the lower triangular part of the inverse
86*>             is formed and the part of A above the diagonal is not
87*>             referenced.
88*> \endverbatim
89*>
90*> \param[in] LDA
91*> \verbatim
92*>          LDA is INTEGER
93*>          The leading dimension of the array A.  LDA >= max(1,N).
94*> \endverbatim
95*>
96*> \param[in] E
97*> \verbatim
98*>          E is REAL array, dimension (N)
99*>          On entry, contains the superdiagonal (or subdiagonal)
100*>          elements of the symmetric block diagonal matrix D
101*>          with 1-by-1 or 2-by-2 diagonal blocks, where
102*>          If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced;
103*>          If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced.
104*>
105*>          NOTE: For 1-by-1 diagonal block D(k), where
106*>          1 <= k <= N, the element E(k) is not referenced in both
107*>          UPLO = 'U' or UPLO = 'L' cases.
108*> \endverbatim
109*>
110*> \param[in] IPIV
111*> \verbatim
112*>          IPIV is INTEGER array, dimension (N)
113*>          Details of the interchanges and the block structure of D
114*>          as determined by SSYTRF_RK or SSYTRF_BK.
115*> \endverbatim
116*>
117*> \param[out] WORK
118*> \verbatim
119*>          WORK is REAL array, dimension (N+NB+1,NB+3).
120*> \endverbatim
121*>
122*> \param[in] NB
123*> \verbatim
124*>          NB is INTEGER
125*>          Block size.
126*> \endverbatim
127*>
128*> \param[out] INFO
129*> \verbatim
130*>          INFO is INTEGER
131*>          = 0: successful exit
132*>          < 0: if INFO = -i, the i-th argument had an illegal value
133*>          > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
134*>               inverse could not be computed.
135*> \endverbatim
136*
137*  Authors:
138*  ========
139*
140*> \author Univ. of Tennessee
141*> \author Univ. of California Berkeley
142*> \author Univ. of Colorado Denver
143*> \author NAG Ltd.
144*
145*> \ingroup singleSYcomputational
146*
147*> \par Contributors:
148*  ==================
149*> \verbatim
150*>
151*>  June 2017,  Igor Kozachenko,
152*>                  Computer Science Division,
153*>                  University of California, Berkeley
154*>
155*> \endverbatim
156*
157*  =====================================================================
158      SUBROUTINE SSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
159*
160*  -- LAPACK computational routine --
161*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
162*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
163*
164*     .. Scalar Arguments ..
165      CHARACTER          UPLO
166      INTEGER            INFO, LDA, N, NB
167*     ..
168*     .. Array Arguments ..
169      INTEGER            IPIV( * )
170      REAL              A( LDA, * ), E( * ), WORK( N+NB+1, * )
171*     ..
172*
173*  =====================================================================
174*
175*     .. Parameters ..
176      REAL               ONE, ZERO
177      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
178*     ..
179*     .. Local Scalars ..
180      LOGICAL            UPPER
181      INTEGER            CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11
182      REAL               AK, AKKP1, AKP1, D, T, U01_I_J, U01_IP1_J,
183     $                   U11_I_J, U11_IP1_J
184*     ..
185*     .. External Functions ..
186      LOGICAL            LSAME
187      EXTERNAL           LSAME
188*     ..
189*     .. External Subroutines ..
190      EXTERNAL           SGEMM, SSYSWAPR, STRTRI, STRMM, XERBLA
191*     ..
192*     .. Intrinsic Functions ..
193      INTRINSIC          ABS, MAX, MOD
194*     ..
195*     .. Executable Statements ..
196*
197*     Test the input parameters.
198*
199      INFO = 0
200      UPPER = LSAME( UPLO, 'U' )
201      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
202         INFO = -1
203      ELSE IF( N.LT.0 ) THEN
204         INFO = -2
205      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
206         INFO = -4
207      END IF
208*
209*     Quick return if possible
210*
211      IF( INFO.NE.0 ) THEN
212         CALL XERBLA( 'SSYTRI_3X', -INFO )
213         RETURN
214      END IF
215      IF( N.EQ.0 )
216     $   RETURN
217*
218*     Workspace got Non-diag elements of D
219*
220      DO K = 1, N
221         WORK( K, 1 ) = E( K )
222      END DO
223*
224*     Check that the diagonal matrix D is nonsingular.
225*
226      IF( UPPER ) THEN
227*
228*        Upper triangular storage: examine D from bottom to top
229*
230         DO INFO = N, 1, -1
231            IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO )
232     $         RETURN
233         END DO
234      ELSE
235*
236*        Lower triangular storage: examine D from top to bottom.
237*
238         DO INFO = 1, N
239            IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO )
240     $         RETURN
241         END DO
242      END IF
243*
244      INFO = 0
245*
246*     Splitting Workspace
247*     U01 is a block ( N, NB+1 )
248*     The first element of U01 is in WORK( 1, 1 )
249*     U11 is a block ( NB+1, NB+1 )
250*     The first element of U11 is in WORK( N+1, 1 )
251*
252      U11 = N
253*
254*     INVD is a block ( N, 2 )
255*     The first element of INVD is in WORK( 1, INVD )
256*
257      INVD = NB + 2
258
259      IF( UPPER ) THEN
260*
261*        Begin Upper
262*
263*        invA = P * inv(U**T) * inv(D) * inv(U) * P**T.
264*
265         CALL STRTRI( UPLO, 'U', N, A, LDA, INFO )
266*
267*        inv(D) and inv(D) * inv(U)
268*
269         K = 1
270         DO WHILE( K.LE.N )
271            IF( IPIV( K ).GT.0 ) THEN
272*              1 x 1 diagonal NNB
273               WORK( K, INVD ) = ONE /  A( K, K )
274               WORK( K, INVD+1 ) = ZERO
275            ELSE
276*              2 x 2 diagonal NNB
277               T = WORK( K+1, 1 )
278               AK = A( K, K ) / T
279               AKP1 = A( K+1, K+1 ) / T
280               AKKP1 = WORK( K+1, 1 )  / T
281               D = T*( AK*AKP1-ONE )
282               WORK( K, INVD ) = AKP1 / D
283               WORK( K+1, INVD+1 ) = AK / D
284               WORK( K, INVD+1 ) = -AKKP1 / D
285               WORK( K+1, INVD ) = WORK( K, INVD+1 )
286               K = K + 1
287            END IF
288            K = K + 1
289         END DO
290*
291*        inv(U**T) = (inv(U))**T
292*
293*        inv(U**T) * inv(D) * inv(U)
294*
295         CUT = N
296         DO WHILE( CUT.GT.0 )
297            NNB = NB
298            IF( CUT.LE.NNB ) THEN
299               NNB = CUT
300            ELSE
301               ICOUNT = 0
302*              count negative elements,
303               DO I = CUT+1-NNB, CUT
304                  IF( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1
305               END DO
306*              need a even number for a clear cut
307               IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1
308            END IF
309
310            CUT = CUT - NNB
311*
312*           U01 Block
313*
314            DO I = 1, CUT
315               DO J = 1, NNB
316                  WORK( I, J ) = A( I, CUT+J )
317               END DO
318            END DO
319*
320*           U11 Block
321*
322            DO I = 1, NNB
323               WORK( U11+I, I ) = ONE
324               DO J = 1, I-1
325                  WORK( U11+I, J ) = ZERO
326                END DO
327                DO J = I+1, NNB
328                   WORK( U11+I, J ) = A( CUT+I, CUT+J )
329                END DO
330            END DO
331*
332*           invD * U01
333*
334            I = 1
335            DO WHILE( I.LE.CUT )
336               IF( IPIV( I ).GT.0 ) THEN
337                  DO J = 1, NNB
338                     WORK( I, J ) = WORK( I, INVD ) * WORK( I, J )
339                  END DO
340               ELSE
341                  DO J = 1, NNB
342                     U01_I_J = WORK( I, J )
343                     U01_IP1_J = WORK( I+1, J )
344                     WORK( I, J ) = WORK( I, INVD ) * U01_I_J
345     $                            + WORK( I, INVD+1 ) * U01_IP1_J
346                     WORK( I+1, J ) = WORK( I+1, INVD ) * U01_I_J
347     $                              + WORK( I+1, INVD+1 ) * U01_IP1_J
348                  END DO
349                  I = I + 1
350               END IF
351               I = I + 1
352            END DO
353*
354*           invD1 * U11
355*
356            I = 1
357            DO WHILE ( I.LE.NNB )
358               IF( IPIV( CUT+I ).GT.0 ) THEN
359                  DO J = I, NNB
360                     WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J)
361                  END DO
362               ELSE
363                  DO J = I, NNB
364                     U11_I_J = WORK(U11+I,J)
365                     U11_IP1_J = WORK(U11+I+1,J)
366                     WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J)
367     $                            + WORK(CUT+I,INVD+1) * WORK(U11+I+1,J)
368                     WORK( U11+I+1, J ) = WORK(CUT+I+1,INVD) * U11_I_J
369     $                               + WORK(CUT+I+1,INVD+1) * U11_IP1_J
370                  END DO
371                  I = I + 1
372               END IF
373               I = I + 1
374            END DO
375*
376*           U11**T * invD1 * U11 -> U11
377*
378            CALL STRMM( 'L', 'U', 'T', 'U', NNB, NNB,
379     $                 ONE, A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ),
380     $                 N+NB+1 )
381*
382            DO I = 1, NNB
383               DO J = I, NNB
384                  A( CUT+I, CUT+J ) = WORK( U11+I, J )
385               END DO
386            END DO
387*
388*           U01**T * invD * U01 -> A( CUT+I, CUT+J )
389*
390            CALL SGEMM( 'T', 'N', NNB, NNB, CUT, ONE, A( 1, CUT+1 ),
391     $                  LDA, WORK, N+NB+1, ZERO, WORK(U11+1,1), N+NB+1 )
392
393*
394*           U11 =  U11**T * invD1 * U11 + U01**T * invD * U01
395*
396            DO I = 1, NNB
397               DO J = I, NNB
398                  A( CUT+I, CUT+J ) = A( CUT+I, CUT+J ) + WORK(U11+I,J)
399               END DO
400            END DO
401*
402*           U01 =  U00**T * invD0 * U01
403*
404            CALL STRMM( 'L', UPLO, 'T', 'U', CUT, NNB,
405     $                  ONE, A, LDA, WORK, N+NB+1 )
406
407*
408*           Update U01
409*
410            DO I = 1, CUT
411               DO J = 1, NNB
412                  A( I, CUT+J ) = WORK( I, J )
413               END DO
414            END DO
415*
416*           Next Block
417*
418         END DO
419*
420*        Apply PERMUTATIONS P and P**T:
421*        P * inv(U**T) * inv(D) * inv(U) * P**T.
422*        Interchange rows and columns I and IPIV(I) in reverse order
423*        from the formation order of IPIV vector for Upper case.
424*
425*        ( We can use a loop over IPIV with increment 1,
426*        since the ABS value of IPIV(I) represents the row (column)
427*        index of the interchange with row (column) i in both 1x1
428*        and 2x2 pivot cases, i.e. we don't need separate code branches
429*        for 1x1 and 2x2 pivot cases )
430*
431         DO I = 1, N
432             IP = ABS( IPIV( I ) )
433             IF( IP.NE.I ) THEN
434                IF (I .LT. IP) CALL SSYSWAPR( UPLO, N, A, LDA, I ,IP )
435                IF (I .GT. IP) CALL SSYSWAPR( UPLO, N, A, LDA, IP ,I )
436             END IF
437         END DO
438*
439      ELSE
440*
441*        Begin Lower
442*
443*        inv A = P * inv(L**T) * inv(D) * inv(L) * P**T.
444*
445         CALL STRTRI( UPLO, 'U', N, A, LDA, INFO )
446*
447*        inv(D) and inv(D) * inv(L)
448*
449         K = N
450         DO WHILE ( K .GE. 1 )
451            IF( IPIV( K ).GT.0 ) THEN
452*              1 x 1 diagonal NNB
453               WORK( K, INVD ) = ONE /  A( K, K )
454               WORK( K, INVD+1 ) = ZERO
455            ELSE
456*              2 x 2 diagonal NNB
457               T = WORK( K-1, 1 )
458               AK = A( K-1, K-1 ) / T
459               AKP1 = A( K, K ) / T
460               AKKP1 = WORK( K-1, 1 ) / T
461               D = T*( AK*AKP1-ONE )
462               WORK( K-1, INVD ) = AKP1 / D
463               WORK( K, INVD ) = AK / D
464               WORK( K, INVD+1 ) = -AKKP1 / D
465               WORK( K-1, INVD+1 ) = WORK( K, INVD+1 )
466               K = K - 1
467            END IF
468            K = K - 1
469         END DO
470*
471*        inv(L**T) = (inv(L))**T
472*
473*        inv(L**T) * inv(D) * inv(L)
474*
475         CUT = 0
476         DO WHILE( CUT.LT.N )
477            NNB = NB
478            IF( (CUT + NNB).GT.N ) THEN
479               NNB = N - CUT
480            ELSE
481               ICOUNT = 0
482*              count negative elements,
483               DO I = CUT + 1, CUT+NNB
484                  IF ( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1
485               END DO
486*              need a even number for a clear cut
487               IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1
488            END IF
489*
490*           L21 Block
491*
492            DO I = 1, N-CUT-NNB
493               DO J = 1, NNB
494                 WORK( I, J ) = A( CUT+NNB+I, CUT+J )
495               END DO
496            END DO
497*
498*           L11 Block
499*
500            DO I = 1, NNB
501               WORK( U11+I, I) = ONE
502               DO J = I+1, NNB
503                  WORK( U11+I, J ) = ZERO
504               END DO
505               DO J = 1, I-1
506                  WORK( U11+I, J ) = A( CUT+I, CUT+J )
507               END DO
508            END DO
509*
510*           invD*L21
511*
512            I = N-CUT-NNB
513            DO WHILE( I.GE.1 )
514               IF( IPIV( CUT+NNB+I ).GT.0 ) THEN
515                  DO J = 1, NNB
516                     WORK( I, J ) = WORK( CUT+NNB+I, INVD) * WORK( I, J)
517                  END DO
518               ELSE
519                  DO J = 1, NNB
520                     U01_I_J = WORK(I,J)
521                     U01_IP1_J = WORK(I-1,J)
522                     WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+
523     $                        WORK(CUT+NNB+I,INVD+1)*U01_IP1_J
524                     WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+
525     $                        WORK(CUT+NNB+I-1,INVD)*U01_IP1_J
526                  END DO
527                  I = I - 1
528               END IF
529               I = I - 1
530            END DO
531*
532*           invD1*L11
533*
534            I = NNB
535            DO WHILE( I.GE.1 )
536               IF( IPIV( CUT+I ).GT.0 ) THEN
537                  DO J = 1, NNB
538                     WORK( U11+I, J ) = WORK( CUT+I, INVD)*WORK(U11+I,J)
539                  END DO
540
541               ELSE
542                  DO J = 1, NNB
543                     U11_I_J = WORK( U11+I, J )
544                     U11_IP1_J = WORK( U11+I-1, J )
545                     WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J)
546     $                                + WORK(CUT+I,INVD+1) * U11_IP1_J
547                     WORK( U11+I-1, J ) = WORK(CUT+I-1,INVD+1) * U11_I_J
548     $                                  + WORK(CUT+I-1,INVD) * U11_IP1_J
549                  END DO
550                  I = I - 1
551               END IF
552               I = I - 1
553            END DO
554*
555*           L11**T * invD1 * L11 -> L11
556*
557            CALL STRMM( 'L', UPLO, 'T', 'U', NNB, NNB, ONE,
558     $                   A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ),
559     $                   N+NB+1 )
560
561*
562            DO I = 1, NNB
563               DO J = 1, I
564                  A( CUT+I, CUT+J ) = WORK( U11+I, J )
565               END DO
566            END DO
567*
568            IF( (CUT+NNB).LT.N ) THEN
569*
570*              L21**T * invD2*L21 -> A( CUT+I, CUT+J )
571*
572               CALL SGEMM( 'T', 'N', NNB, NNB, N-NNB-CUT, ONE,
573     $                     A( CUT+NNB+1, CUT+1 ), LDA, WORK, N+NB+1,
574     $                     ZERO, WORK( U11+1, 1 ), N+NB+1 )
575
576*
577*              L11 =  L11**T * invD1 * L11 + U01**T * invD * U01
578*
579               DO I = 1, NNB
580                  DO J = 1, I
581                     A( CUT+I, CUT+J ) = A( CUT+I, CUT+J )+WORK(U11+I,J)
582                  END DO
583               END DO
584*
585*              L01 =  L22**T * invD2 * L21
586*
587               CALL STRMM( 'L', UPLO, 'T', 'U', N-NNB-CUT, NNB, ONE,
588     $                     A( CUT+NNB+1, CUT+NNB+1 ), LDA, WORK,
589     $                     N+NB+1 )
590*
591*              Update L21
592*
593               DO I = 1, N-CUT-NNB
594                  DO J = 1, NNB
595                     A( CUT+NNB+I, CUT+J ) = WORK( I, J )
596                  END DO
597               END DO
598*
599            ELSE
600*
601*              L11 =  L11**T * invD1 * L11
602*
603               DO I = 1, NNB
604                  DO J = 1, I
605                     A( CUT+I, CUT+J ) = WORK( U11+I, J )
606                  END DO
607               END DO
608            END IF
609*
610*           Next Block
611*
612            CUT = CUT + NNB
613*
614         END DO
615*
616*        Apply PERMUTATIONS P and P**T:
617*        P * inv(L**T) * inv(D) * inv(L) * P**T.
618*        Interchange rows and columns I and IPIV(I) in reverse order
619*        from the formation order of IPIV vector for Lower case.
620*
621*        ( We can use a loop over IPIV with increment -1,
622*        since the ABS value of IPIV(I) represents the row (column)
623*        index of the interchange with row (column) i in both 1x1
624*        and 2x2 pivot cases, i.e. we don't need separate code branches
625*        for 1x1 and 2x2 pivot cases )
626*
627         DO I = N, 1, -1
628             IP = ABS( IPIV( I ) )
629             IF( IP.NE.I ) THEN
630                IF (I .LT. IP) CALL SSYSWAPR( UPLO, N, A, LDA, I ,IP )
631                IF (I .GT. IP) CALL SSYSWAPR( UPLO, N, A, LDA, IP ,I )
632             END IF
633         END DO
634*
635      END IF
636*
637      RETURN
638*
639*     End of SSYTRI_3X
640*
641      END
642
643