1*> \brief \b DSYTRI_3X
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download DSYTRI_3X + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsytri_3x.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsytri_3x.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsytri_3x.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE DSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
22*
23*       .. Scalar Arguments ..
24*       CHARACTER          UPLO
25*       INTEGER            INFO, LDA, N, NB
26*       ..
27*       .. Array Arguments ..
28*       INTEGER            IPIV( * )
29*       DOUBLE PRECISION   A( LDA, * ),  E( * ), WORK( N+NB+1, * )
30*       ..
31*
32*
33*> \par Purpose:
34*  =============
35*>
36*> \verbatim
37*> DSYTRI_3X computes the inverse of a real symmetric indefinite
38*> matrix A using the factorization computed by DSYTRF_RK or DSYTRF_BK:
39*>
40*>     A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T),
41*>
42*> where U (or L) is unit upper (or lower) triangular matrix,
43*> U**T (or L**T) is the transpose of U (or L), P is a permutation
44*> matrix, P**T is the transpose of P, and D is symmetric and block
45*> diagonal with 1-by-1 and 2-by-2 diagonal blocks.
46*>
47*> This is the blocked version of the algorithm, calling Level 3 BLAS.
48*> \endverbatim
49*
50*  Arguments:
51*  ==========
52*
53*> \param[in] UPLO
54*> \verbatim
55*>          UPLO is CHARACTER*1
56*>          Specifies whether the details of the factorization are
57*>          stored as an upper or lower triangular matrix.
58*>          = 'U':  Upper triangle of A is stored;
59*>          = 'L':  Lower triangle of A is stored.
60*> \endverbatim
61*>
62*> \param[in] N
63*> \verbatim
64*>          N is INTEGER
65*>          The order of the matrix A.  N >= 0.
66*> \endverbatim
67*>
68*> \param[in,out] A
69*> \verbatim
70*>          A is DOUBLE PRECISION array, dimension (LDA,N)
71*>          On entry, diagonal of the block diagonal matrix D and
72*>          factors U or L as computed by DSYTRF_RK and DSYTRF_BK:
73*>            a) ONLY diagonal elements of the symmetric block diagonal
74*>               matrix D on the diagonal of A, i.e. D(k,k) = A(k,k);
75*>               (superdiagonal (or subdiagonal) elements of D
76*>                should be provided on entry in array E), and
77*>            b) If UPLO = 'U': factor U in the superdiagonal part of A.
78*>               If UPLO = 'L': factor L in the subdiagonal part of A.
79*>
80*>          On exit, if INFO = 0, the symmetric inverse of the original
81*>          matrix.
82*>             If UPLO = 'U': the upper triangular part of the inverse
83*>             is formed and the part of A below the diagonal is not
84*>             referenced;
85*>             If UPLO = 'L': the lower triangular part of the inverse
86*>             is formed and the part of A above the diagonal is not
87*>             referenced.
88*> \endverbatim
89*>
90*> \param[in] LDA
91*> \verbatim
92*>          LDA is INTEGER
93*>          The leading dimension of the array A.  LDA >= max(1,N).
94*> \endverbatim
95*>
96*> \param[in] E
97*> \verbatim
98*>          E is DOUBLE PRECISION array, dimension (N)
99*>          On entry, contains the superdiagonal (or subdiagonal)
100*>          elements of the symmetric block diagonal matrix D
101*>          with 1-by-1 or 2-by-2 diagonal blocks, where
102*>          If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced;
103*>          If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced.
104*>
105*>          NOTE: For 1-by-1 diagonal block D(k), where
106*>          1 <= k <= N, the element E(k) is not referenced in both
107*>          UPLO = 'U' or UPLO = 'L' cases.
108*> \endverbatim
109*>
110*> \param[in] IPIV
111*> \verbatim
112*>          IPIV is INTEGER array, dimension (N)
113*>          Details of the interchanges and the block structure of D
114*>          as determined by DSYTRF_RK or DSYTRF_BK.
115*> \endverbatim
116*>
117*> \param[out] WORK
118*> \verbatim
119*>          WORK is DOUBLE PRECISION array, dimension (N+NB+1,NB+3).
120*> \endverbatim
121*>
122*> \param[in] NB
123*> \verbatim
124*>          NB is INTEGER
125*>          Block size.
126*> \endverbatim
127*>
128*> \param[out] INFO
129*> \verbatim
130*>          INFO is INTEGER
131*>          = 0: successful exit
132*>          < 0: if INFO = -i, the i-th argument had an illegal value
133*>          > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
134*>               inverse could not be computed.
135*> \endverbatim
136*
137*  Authors:
138*  ========
139*
140*> \author Univ. of Tennessee
141*> \author Univ. of California Berkeley
142*> \author Univ. of Colorado Denver
143*> \author NAG Ltd.
144*
145*> \date June 2017
146*
147*> \ingroup doubleSYcomputational
148*
149*> \par Contributors:
150*  ==================
151*> \verbatim
152*>
153*>  June 2017,  Igor Kozachenko,
154*>                  Computer Science Division,
155*>                  University of California, Berkeley
156*>
157*> \endverbatim
158*
159*  =====================================================================
160      SUBROUTINE DSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
161*
162*  -- LAPACK computational routine (version 3.7.1) --
163*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
164*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
165*     June 2017
166*
167*     .. Scalar Arguments ..
168      CHARACTER          UPLO
169      INTEGER            INFO, LDA, N, NB
170*     ..
171*     .. Array Arguments ..
172      INTEGER            IPIV( * )
173      DOUBLE PRECISION   A( LDA, * ), E( * ), WORK( N+NB+1, * )
174*     ..
175*
176*  =====================================================================
177*
178*     .. Parameters ..
179      DOUBLE PRECISION   ONE, ZERO
180      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
181*     ..
182*     .. Local Scalars ..
183      LOGICAL            UPPER
184      INTEGER            CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11
185      DOUBLE PRECISION   AK, AKKP1, AKP1, D, T, U01_I_J, U01_IP1_J,
186     $                   U11_I_J, U11_IP1_J
187*     ..
188*     .. External Functions ..
189      LOGICAL            LSAME
190      EXTERNAL           LSAME
191*     ..
192*     .. External Subroutines ..
193      EXTERNAL           DGEMM, DSYSWAPR, DTRTRI, DTRMM, XERBLA
194*     ..
195*     .. Intrinsic Functions ..
196      INTRINSIC          ABS, MAX, MOD
197*     ..
198*     .. Executable Statements ..
199*
200*     Test the input parameters.
201*
202      INFO = 0
203      UPPER = LSAME( UPLO, 'U' )
204      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
205         INFO = -1
206      ELSE IF( N.LT.0 ) THEN
207         INFO = -2
208      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
209         INFO = -4
210      END IF
211*
212*     Quick return if possible
213*
214      IF( INFO.NE.0 ) THEN
215         CALL XERBLA( 'DSYTRI_3X', -INFO )
216         RETURN
217      END IF
218      IF( N.EQ.0 )
219     $   RETURN
220*
221*     Workspace got Non-diag elements of D
222*
223      DO K = 1, N
224         WORK( K, 1 ) = E( K )
225      END DO
226*
227*     Check that the diagonal matrix D is nonsingular.
228*
229      IF( UPPER ) THEN
230*
231*        Upper triangular storage: examine D from bottom to top
232*
233         DO INFO = N, 1, -1
234            IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO )
235     $         RETURN
236         END DO
237      ELSE
238*
239*        Lower triangular storage: examine D from top to bottom.
240*
241         DO INFO = 1, N
242            IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO )
243     $         RETURN
244         END DO
245      END IF
246*
247      INFO = 0
248*
249*     Splitting Workspace
250*     U01 is a block ( N, NB+1 )
251*     The first element of U01 is in WORK( 1, 1 )
252*     U11 is a block ( NB+1, NB+1 )
253*     The first element of U11 is in WORK( N+1, 1 )
254*
255      U11 = N
256*
257*     INVD is a block ( N, 2 )
258*     The first element of INVD is in WORK( 1, INVD )
259*
260      INVD = NB + 2
261
262      IF( UPPER ) THEN
263*
264*        Begin Upper
265*
266*        invA = P * inv(U**T) * inv(D) * inv(U) * P**T.
267*
268         CALL DTRTRI( UPLO, 'U', N, A, LDA, INFO )
269*
270*        inv(D) and inv(D) * inv(U)
271*
272         K = 1
273         DO WHILE( K.LE.N )
274            IF( IPIV( K ).GT.0 ) THEN
275*              1 x 1 diagonal NNB
276               WORK( K, INVD ) = ONE /  A( K, K )
277               WORK( K, INVD+1 ) = ZERO
278            ELSE
279*              2 x 2 diagonal NNB
280               T = WORK( K+1, 1 )
281               AK = A( K, K ) / T
282               AKP1 = A( K+1, K+1 ) / T
283               AKKP1 = WORK( K+1, 1 )  / T
284               D = T*( AK*AKP1-ONE )
285               WORK( K, INVD ) = AKP1 / D
286               WORK( K+1, INVD+1 ) = AK / D
287               WORK( K, INVD+1 ) = -AKKP1 / D
288               WORK( K+1, INVD ) = WORK( K, INVD+1 )
289               K = K + 1
290            END IF
291            K = K + 1
292         END DO
293*
294*        inv(U**T) = (inv(U))**T
295*
296*        inv(U**T) * inv(D) * inv(U)
297*
298         CUT = N
299         DO WHILE( CUT.GT.0 )
300            NNB = NB
301            IF( CUT.LE.NNB ) THEN
302               NNB = CUT
303            ELSE
304               ICOUNT = 0
305*              count negative elements,
306               DO I = CUT+1-NNB, CUT
307                  IF( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1
308               END DO
309*              need a even number for a clear cut
310               IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1
311            END IF
312
313            CUT = CUT - NNB
314*
315*           U01 Block
316*
317            DO I = 1, CUT
318               DO J = 1, NNB
319                  WORK( I, J ) = A( I, CUT+J )
320               END DO
321            END DO
322*
323*           U11 Block
324*
325            DO I = 1, NNB
326               WORK( U11+I, I ) = ONE
327               DO J = 1, I-1
328                  WORK( U11+I, J ) = ZERO
329                END DO
330                DO J = I+1, NNB
331                   WORK( U11+I, J ) = A( CUT+I, CUT+J )
332                END DO
333            END DO
334*
335*           invD * U01
336*
337            I = 1
338            DO WHILE( I.LE.CUT )
339               IF( IPIV( I ).GT.0 ) THEN
340                  DO J = 1, NNB
341                     WORK( I, J ) = WORK( I, INVD ) * WORK( I, J )
342                  END DO
343               ELSE
344                  DO J = 1, NNB
345                     U01_I_J = WORK( I, J )
346                     U01_IP1_J = WORK( I+1, J )
347                     WORK( I, J ) = WORK( I, INVD ) * U01_I_J
348     $                            + WORK( I, INVD+1 ) * U01_IP1_J
349                     WORK( I+1, J ) = WORK( I+1, INVD ) * U01_I_J
350     $                              + WORK( I+1, INVD+1 ) * U01_IP1_J
351                  END DO
352                  I = I + 1
353               END IF
354               I = I + 1
355            END DO
356*
357*           invD1 * U11
358*
359            I = 1
360            DO WHILE ( I.LE.NNB )
361               IF( IPIV( CUT+I ).GT.0 ) THEN
362                  DO J = I, NNB
363                     WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J)
364                  END DO
365               ELSE
366                  DO J = I, NNB
367                     U11_I_J = WORK(U11+I,J)
368                     U11_IP1_J = WORK(U11+I+1,J)
369                     WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J)
370     $                            + WORK(CUT+I,INVD+1) * WORK(U11+I+1,J)
371                     WORK( U11+I+1, J ) = WORK(CUT+I+1,INVD) * U11_I_J
372     $                               + WORK(CUT+I+1,INVD+1) * U11_IP1_J
373                  END DO
374                  I = I + 1
375               END IF
376               I = I + 1
377            END DO
378*
379*           U11**T * invD1 * U11 -> U11
380*
381            CALL DTRMM( 'L', 'U', 'T', 'U', NNB, NNB,
382     $                 ONE, A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ),
383     $                 N+NB+1 )
384*
385            DO I = 1, NNB
386               DO J = I, NNB
387                  A( CUT+I, CUT+J ) = WORK( U11+I, J )
388               END DO
389            END DO
390*
391*           U01**T * invD * U01 -> A( CUT+I, CUT+J )
392*
393            CALL DGEMM( 'T', 'N', NNB, NNB, CUT, ONE, A( 1, CUT+1 ),
394     $                  LDA, WORK, N+NB+1, ZERO, WORK(U11+1,1), N+NB+1 )
395
396*
397*           U11 =  U11**T * invD1 * U11 + U01**T * invD * U01
398*
399            DO I = 1, NNB
400               DO J = I, NNB
401                  A( CUT+I, CUT+J ) = A( CUT+I, CUT+J ) + WORK(U11+I,J)
402               END DO
403            END DO
404*
405*           U01 =  U00**T * invD0 * U01
406*
407            CALL DTRMM( 'L', UPLO, 'T', 'U', CUT, NNB,
408     $                  ONE, A, LDA, WORK, N+NB+1 )
409
410*
411*           Update U01
412*
413            DO I = 1, CUT
414               DO J = 1, NNB
415                  A( I, CUT+J ) = WORK( I, J )
416               END DO
417            END DO
418*
419*           Next Block
420*
421         END DO
422*
423*        Apply PERMUTATIONS P and P**T:
424*        P * inv(U**T) * inv(D) * inv(U) * P**T.
425*        Interchange rows and columns I and IPIV(I) in reverse order
426*        from the formation order of IPIV vector for Upper case.
427*
428*        ( We can use a loop over IPIV with increment 1,
429*        since the ABS value of IPIV(I) represents the row (column)
430*        index of the interchange with row (column) i in both 1x1
431*        and 2x2 pivot cases, i.e. we don't need separate code branches
432*        for 1x1 and 2x2 pivot cases )
433*
434         DO I = 1, N
435             IP = ABS( IPIV( I ) )
436             IF( IP.NE.I ) THEN
437                IF (I .LT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, I ,IP )
438                IF (I .GT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, IP ,I )
439             END IF
440         END DO
441*
442      ELSE
443*
444*        Begin Lower
445*
446*        inv A = P * inv(L**T) * inv(D) * inv(L) * P**T.
447*
448         CALL DTRTRI( UPLO, 'U', N, A, LDA, INFO )
449*
450*        inv(D) and inv(D) * inv(L)
451*
452         K = N
453         DO WHILE ( K .GE. 1 )
454            IF( IPIV( K ).GT.0 ) THEN
455*              1 x 1 diagonal NNB
456               WORK( K, INVD ) = ONE /  A( K, K )
457               WORK( K, INVD+1 ) = ZERO
458            ELSE
459*              2 x 2 diagonal NNB
460               T = WORK( K-1, 1 )
461               AK = A( K-1, K-1 ) / T
462               AKP1 = A( K, K ) / T
463               AKKP1 = WORK( K-1, 1 ) / T
464               D = T*( AK*AKP1-ONE )
465               WORK( K-1, INVD ) = AKP1 / D
466               WORK( K, INVD ) = AK / D
467               WORK( K, INVD+1 ) = -AKKP1 / D
468               WORK( K-1, INVD+1 ) = WORK( K, INVD+1 )
469               K = K - 1
470            END IF
471            K = K - 1
472         END DO
473*
474*        inv(L**T) = (inv(L))**T
475*
476*        inv(L**T) * inv(D) * inv(L)
477*
478         CUT = 0
479         DO WHILE( CUT.LT.N )
480            NNB = NB
481            IF( (CUT + NNB).GT.N ) THEN
482               NNB = N - CUT
483            ELSE
484               ICOUNT = 0
485*              count negative elements,
486               DO I = CUT + 1, CUT+NNB
487                  IF ( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1
488               END DO
489*              need a even number for a clear cut
490               IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1
491            END IF
492*
493*           L21 Block
494*
495            DO I = 1, N-CUT-NNB
496               DO J = 1, NNB
497                 WORK( I, J ) = A( CUT+NNB+I, CUT+J )
498               END DO
499            END DO
500*
501*           L11 Block
502*
503            DO I = 1, NNB
504               WORK( U11+I, I) = ONE
505               DO J = I+1, NNB
506                  WORK( U11+I, J ) = ZERO
507               END DO
508               DO J = 1, I-1
509                  WORK( U11+I, J ) = A( CUT+I, CUT+J )
510               END DO
511            END DO
512*
513*           invD*L21
514*
515            I = N-CUT-NNB
516            DO WHILE( I.GE.1 )
517               IF( IPIV( CUT+NNB+I ).GT.0 ) THEN
518                  DO J = 1, NNB
519                     WORK( I, J ) = WORK( CUT+NNB+I, INVD) * WORK( I, J)
520                  END DO
521               ELSE
522                  DO J = 1, NNB
523                     U01_I_J = WORK(I,J)
524                     U01_IP1_J = WORK(I-1,J)
525                     WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+
526     $                        WORK(CUT+NNB+I,INVD+1)*U01_IP1_J
527                     WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+
528     $                        WORK(CUT+NNB+I-1,INVD)*U01_IP1_J
529                  END DO
530                  I = I - 1
531               END IF
532               I = I - 1
533            END DO
534*
535*           invD1*L11
536*
537            I = NNB
538            DO WHILE( I.GE.1 )
539               IF( IPIV( CUT+I ).GT.0 ) THEN
540                  DO J = 1, NNB
541                     WORK( U11+I, J ) = WORK( CUT+I, INVD)*WORK(U11+I,J)
542                  END DO
543
544               ELSE
545                  DO J = 1, NNB
546                     U11_I_J = WORK( U11+I, J )
547                     U11_IP1_J = WORK( U11+I-1, J )
548                     WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J)
549     $                                + WORK(CUT+I,INVD+1) * U11_IP1_J
550                     WORK( U11+I-1, J ) = WORK(CUT+I-1,INVD+1) * U11_I_J
551     $                                  + WORK(CUT+I-1,INVD) * U11_IP1_J
552                  END DO
553                  I = I - 1
554               END IF
555               I = I - 1
556            END DO
557*
558*           L11**T * invD1 * L11 -> L11
559*
560            CALL DTRMM( 'L', UPLO, 'T', 'U', NNB, NNB, ONE,
561     $                   A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ),
562     $                   N+NB+1 )
563
564*
565            DO I = 1, NNB
566               DO J = 1, I
567                  A( CUT+I, CUT+J ) = WORK( U11+I, J )
568               END DO
569            END DO
570*
571            IF( (CUT+NNB).LT.N ) THEN
572*
573*              L21**T * invD2*L21 -> A( CUT+I, CUT+J )
574*
575               CALL DGEMM( 'T', 'N', NNB, NNB, N-NNB-CUT, ONE,
576     $                     A( CUT+NNB+1, CUT+1 ), LDA, WORK, N+NB+1,
577     $                     ZERO, WORK( U11+1, 1 ), N+NB+1 )
578
579*
580*              L11 =  L11**T * invD1 * L11 + U01**T * invD * U01
581*
582               DO I = 1, NNB
583                  DO J = 1, I
584                     A( CUT+I, CUT+J ) = A( CUT+I, CUT+J )+WORK(U11+I,J)
585                  END DO
586               END DO
587*
588*              L01 =  L22**T * invD2 * L21
589*
590               CALL DTRMM( 'L', UPLO, 'T', 'U', N-NNB-CUT, NNB, ONE,
591     $                     A( CUT+NNB+1, CUT+NNB+1 ), LDA, WORK,
592     $                     N+NB+1 )
593*
594*              Update L21
595*
596               DO I = 1, N-CUT-NNB
597                  DO J = 1, NNB
598                     A( CUT+NNB+I, CUT+J ) = WORK( I, J )
599                  END DO
600               END DO
601*
602            ELSE
603*
604*              L11 =  L11**T * invD1 * L11
605*
606               DO I = 1, NNB
607                  DO J = 1, I
608                     A( CUT+I, CUT+J ) = WORK( U11+I, J )
609                  END DO
610               END DO
611            END IF
612*
613*           Next Block
614*
615            CUT = CUT + NNB
616*
617         END DO
618*
619*        Apply PERMUTATIONS P and P**T:
620*        P * inv(L**T) * inv(D) * inv(L) * P**T.
621*        Interchange rows and columns I and IPIV(I) in reverse order
622*        from the formation order of IPIV vector for Lower case.
623*
624*        ( We can use a loop over IPIV with increment -1,
625*        since the ABS value of IPIV(I) represents the row (column)
626*        index of the interchange with row (column) i in both 1x1
627*        and 2x2 pivot cases, i.e. we don't need separate code branches
628*        for 1x1 and 2x2 pivot cases )
629*
630         DO I = N, 1, -1
631             IP = ABS( IPIV( I ) )
632             IF( IP.NE.I ) THEN
633                IF (I .LT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, I ,IP )
634                IF (I .GT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, IP ,I )
635             END IF
636         END DO
637*
638      END IF
639*
640      RETURN
641*
642*     End of DSYTRI_3X
643*
644      END
645
646