1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "FLAME.h"
12 
FLA_CAQR2_UT_unb_var1(FLA_Obj U,FLA_Obj D,FLA_Obj T)13 FLA_Error FLA_CAQR2_UT_unb_var1( FLA_Obj U,
14                                  FLA_Obj D, FLA_Obj T )
15 {
16   FLA_Obj UTL,   UTR,      U00,  u01,       U02,
17           UBL,   UBR,      u10t, upsilon11, u12t,
18                            U20,  u21,       U22;
19 
20   FLA_Obj DTL,   DTR,      D00,  d01,     D02,
21           DBL,   DBR,      d10t, delta11, d12t,
22                            D20,  d21,     D22;
23 
24   FLA_Obj TTL,   TTR,      T00,  t01,   T02,
25           TBL,   TBR,      t10t, tau11, t12t,
26                            T20,  t21,   T22;
27 
28   FLA_Obj d1, D2;
29 
30   FLA_Obj d01T,
31           d01B;
32 
33   FLA_Obj D00T,
34           D00B;
35 
36   dim_t   m_DT;
37 
38   // Begin partitioning diagonally through D with m - n rows above
39   // the diagonal.
40   m_DT = FLA_Obj_length( D ) - FLA_Obj_width( D );
41 
42   FLA_Part_2x2( U,    &UTL, &UTR,
43                       &UBL, &UBR,     0, 0, FLA_TL );
44 
45   FLA_Part_2x2( D,    &DTL, &DTR,
46                       &DBL, &DBR,     m_DT, 0, FLA_TL );
47 
48   FLA_Part_2x2( T,    &TTL, &TTR,
49                       &TBL, &TBR,     0, 0, FLA_TL );
50 
51   while ( FLA_Obj_min_dim( UBR ) > 0 ){
52 
53     FLA_Repart_2x2_to_3x3( UTL, /**/ UTR,       &U00,  /**/ &u01,       &U02,
54                         /* ************* */   /* ************************** */
55                                                 &u10t, /**/ &upsilon11, &u12t,
56                            UBL, /**/ UBR,       &U20,  /**/ &u21,       &U22,
57                            1, 1, FLA_BR );
58 
59     FLA_Repart_2x2_to_3x3( DTL, /**/ DTR,       &D00,  /**/ &d01,     &D02,
60                         /* ************* */   /* ************************** */
61                                                 &d10t, /**/ &delta11, &d12t,
62                            DBL, /**/ DBR,       &D20,  /**/ &d21,     &D22,
63                            1, 1, FLA_BR );
64 
65     FLA_Repart_2x2_to_3x3( TTL, /**/ TTR,       &T00,  /**/ &t01,   &T02,
66                         /* ************* */   /* ************************ */
67                                                 &t10t, /**/ &tau11, &t12t,
68                            TBL, /**/ TBR,       &T20,  /**/ &t21,   &T22,
69                            1, 1, FLA_BR );
70 
71     /*------------------------------------------------------------*/
72 
73     FLA_Merge_2x1( d01,
74                    delta11,  &d1 );
75 
76     FLA_Merge_2x1( D02,
77                    d12t,     &D2 );
78 
79     // Compute tau11 and u2 from upsilon11 and d1 such that tau11 and u2
80     // determine a Householder transform H such that applying H from the
81     // left to the column vector consisting of upsilon11 and d1 annihilates
82     // the entries in d1 (and updates upsilon11).
83     FLA_Househ2_UT( FLA_LEFT,
84                     upsilon11,
85                     d1, tau11 );
86 
87     // / u12t \ =  H / u12t \
88     // \  D2  /      \  D2  /
89     //
90     // where H is formed from tau11 and d1.
91     FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
92                                           D2 );
93 
94     FLA_Part_2x1( d01,   &d01T,
95                          &d01B,    m_DT, FLA_TOP );
96 
97     FLA_Part_2x1( D00,   &D00T,
98                          &D00B,    m_DT, FLA_TOP );
99 
100     // t01 = D00' * d01;
101     //     = D00T' * d01T + triu( D00B )' * d01B;
102     FLA_Copy_external( d01B, t01 );
103 	FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
104                        D00B, t01 );
105     FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
106 
107     /*------------------------------------------------------------*/
108 
109     FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR,       U00,  u01,       /**/ U02,
110                                                      u10t, upsilon11, /**/ u12t,
111                             /* ************** */  /* ************************ */
112                               &UBL, /**/ &UBR,       U20,  u21,       /**/ U22,
113                               FLA_TL );
114 
115     FLA_Cont_with_3x3_to_2x2( &DTL, /**/ &DTR,       D00,  d01,     /**/ D02,
116                                                      d10t, delta11, /**/ d12t,
117                             /* ************** */  /* ************************ */
118                               &DBL, /**/ &DBR,       D20,  d21,     /**/ D22,
119                               FLA_TL );
120 
121     FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR,       T00,  t01,   /**/ T02,
122                                                      t10t, tau11, /**/ t12t,
123                             /* ************** */  /* ********************** */
124                               &TBL, /**/ &TBR,       T20,  t21,   /**/ T22,
125                               FLA_TL );
126   }
127 
128   return FLA_SUCCESS;
129 }
130 
131