1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "FLAME.h"
12 
FLA_Hess_UT_unb_var5(FLA_Obj A,FLA_Obj T)13 FLA_Error FLA_Hess_UT_unb_var5( FLA_Obj A, FLA_Obj T )
14 {
15   FLA_Error r_val;
16   FLA_Obj   U, Z;
17 
18   FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &U );
19   FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Z );
20 
21   r_val = FLA_Hess_UT_step_unb_var5( A, U, Z, T );
22 
23   FLA_Obj_free( &U );
24   FLA_Obj_free( &Z );
25 
26   return r_val;
27 }
28 
FLA_Hess_UT_step_unb_var5(FLA_Obj A,FLA_Obj U,FLA_Obj Z,FLA_Obj T)29 FLA_Error FLA_Hess_UT_step_unb_var5( FLA_Obj A, FLA_Obj U, FLA_Obj Z, FLA_Obj T )
30 {
31   FLA_Obj  ATL,   ATR,      A00,  a01,     A02,
32            ABL,   ABR,      a10t, alpha11, a12t,
33                             A20,  a21,     A22;
34   FLA_Obj  UTL,   UTR,      U00,  u01,       U02,
35            UBL,   UBR,      u10t, upsilon11, u12t,
36                             U20,  u21,       U22;
37   FLA_Obj  ZTL,   ZTR,      Z00,  z01,    Z02,
38            ZBL,   ZBR,      z10t, zeta11, z12t,
39                             Z20,  z21,    Z22;
40   FLA_Obj  TTL,   TTR,      T00,  t01,   T02,
41            TBL,   TBR,      t10t, tau11, t12t,
42                             T20,  t21,   T22;
43   FLA_Obj  wT,              w0,
44            wB,              omega1,
45                             w2;
46   FLA_Obj  w;
47 
48   FLA_Obj  a21_t,
49            a21_b;
50   FLA_Obj  u21_t,
51            u21_b;
52 
53   FLA_Datatype datatype_A;
54   dim_t        m_A;
55   dim_t        b_alg;
56 
57 
58   b_alg      = FLA_Obj_length( T );
59 
60   datatype_A = FLA_Obj_datatype( A );
61   m_A        = FLA_Obj_length( A );
62 
63   FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
64 
65   FLA_Set( FLA_ZERO, U );
66   FLA_Set( FLA_ZERO, Z );
67 
68   FLA_Part_2x2( A,    &ATL, &ATR,
69                       &ABL, &ABR,     0, 0, FLA_TL );
70   FLA_Part_2x2( U,    &UTL, &UTR,
71                       &UBL, &UBR,     0, 0, FLA_TL );
72   FLA_Part_2x2( Z,    &ZTL, &ZTR,
73                       &ZBL, &ZBR,     0, 0, FLA_TL );
74   FLA_Part_2x2( T,    &TTL, &TTR,
75                       &TBL, &TBR,     0, 0, FLA_TL );
76   FLA_Part_2x1( w,    &wT,
77                       &wB,            0, FLA_TOP );
78 
79   while ( FLA_Obj_length( ATL ) < b_alg )
80   {
81     FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  /**/ &a01,     &A02,
82                         /* ************* */   /* ************************** */
83                                                 &a10t, /**/ &alpha11, &a12t,
84                            ABL, /**/ ABR,       &A20,  /**/ &a21,     &A22,
85                            1, 1, FLA_BR );
86     FLA_Repart_2x2_to_3x3( UTL, /**/ UTR,       &U00,  /**/ &u01,       &U02,
87                         /* ************* */   /* **************************** */
88                                                 &u10t, /**/ &upsilon11, &u12t,
89                            UBL, /**/ UBR,       &U20,  /**/ &u21,       &U22,
90                            1, 1, FLA_BR );
91     FLA_Repart_2x2_to_3x3( ZTL, /**/ ZTR,       &Z00,  /**/ &z01,    &Z02,
92                         /* ************* */   /* ************************* */
93                                                 &z10t, /**/ &zeta11, &z12t,
94                            ZBL, /**/ ZBR,       &Z20,  /**/ &z21,    &Z22,
95                            1, 1, FLA_BR );
96     FLA_Repart_2x2_to_3x3( TTL, /**/ TTR,       &T00,  /**/ &t01,   &T02,
97                         /* ************* */   /* ************************** */
98                                                 &t10t, /**/ &tau11, &t12t,
99                            TBL, /**/ TBR,       &T20,  /**/ &t21,   &T22,
100                            1, 1, FLA_BR );
101     FLA_Repart_2x1_to_3x1( wT,                &w0,
102                         /* ** */            /* ****** */
103                                               &omega1,
104                            wB,                &w2,        1, FLA_BOTTOM );
105 
106     /*------------------------------------------------------------*/
107 
108     if ( FLA_Obj_length( ATL ) > 0 )
109     {
110       // w0 = inv( triu( T00 ) ) * u10t';
111       FLA_Copyt( FLA_CONJ_TRANSPOSE, u10t, w0 );
112       FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
113                 T00, w0 );
114 
115       // a01     = a01     - Z00  * w0;
116       // alpha11 = alpha11 - z10t * w0;
117       // a21     = a21     - Z20  * w0;
118       FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z00, w0, FLA_ONE, a01 );
119       FLA_Dots( FLA_MINUS_ONE, z10t, w0, FLA_ONE, alpha11 );
120       FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, w0, FLA_ONE, a21 );
121 
122       // w0 = inv( triu( T00 ) )' * ( U00' * a01 + u10t' * alpha11 + U20' * a21 );
123       FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
124                   FLA_ONE, U00, a01, FLA_ZERO, w0 );
125       FLA_Axpyt( FLA_CONJ_TRANSPOSE, alpha11, u10t, w0 );
126       FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, a21, FLA_ONE, w0 );
127 
128       FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
129                 T00, w0 );
130 
131       // a01     = a01     - U00  * w0;
132       // alpha11 = alpha11 - u10t * w0;
133       // a21     = a21     - U20  * w0;
134       FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
135                   FLA_MINUS_ONE, U00, w0, FLA_ONE, a01 );
136       FLA_Dots( FLA_MINUS_ONE, u10t, w0, FLA_ONE, alpha11 );
137       FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, U20, w0, FLA_ONE, a21 );
138     }
139 
140     if ( FLA_Obj_length( a21 ) > 0 )
141     {
142       FLA_Part_2x1( a21,    &a21_t,
143                             &a21_b,            1, FLA_TOP );
144 
145       // [ u21, tau11, a21 ] = House( a21 );
146       FLA_Househ2_UT( FLA_LEFT,
147                       a21_t,
148                       a21_b, tau11 );
149 
150       // u21 := a21;
151       FLA_Copy( a21, u21 );
152 
153       // Explicitly set the first element of the Householder vector so we
154       // can use it in regular computations.
155       FLA_Part_2x1( u21,    &u21_t,
156                             &u21_b,            1, FLA_TOP );
157       FLA_Set( FLA_ONE, u21_t );
158 
159       // z01    = A02  * u21;
160       // zeta11 = a12t * u21;
161       // z21    = A22  * u21;
162       FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, u21, FLA_ZERO, z01 );
163       FLA_Dot( a12t, u21, zeta11 );
164       FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, u21, FLA_ZERO, z21 );
165 
166       // t01 = U20' * u21;
167       FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, u21, FLA_ZERO, t01 );
168     }
169 
170     /*------------------------------------------------------------*/
171 
172     FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  a01,     /**/ A02,
173                                                      a10t, alpha11, /**/ a12t,
174                             /* ************** */  /* ************************ */
175                               &ABL, /**/ &ABR,       A20,  a21,     /**/ A22,
176                               FLA_TL );
177     FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR,       U00,  u01,       /**/ U02,
178                                                      u10t, upsilon11, /**/ u12t,
179                             /* ************** */  /* ************************** */
180                               &UBL, /**/ &UBR,       U20,  u21,       /**/ U22,
181                               FLA_TL );
182     FLA_Cont_with_3x3_to_2x2( &ZTL, /**/ &ZTR,       Z00,  z01,    /**/ Z02,
183                                                      z10t, zeta11, /**/ z12t,
184                             /* ************** */  /* *********************** */
185                               &ZBL, /**/ &ZBR,       Z20,  z21,    /**/ Z22,
186                               FLA_TL );
187     FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR,       T00,  t01,   /**/ T02,
188                                                      t10t, tau11, /**/ t12t,
189                             /* ************** */  /* ************************ */
190                               &TBL, /**/ &TBR,       T20,  t21,   /**/ T22,
191                               FLA_TL );
192     FLA_Cont_with_3x1_to_2x1( &wT,                w0,
193                                                   omega1,
194                             /* ** */           /* ****** */
195                               &wB,                w2,     FLA_TOP );
196   }
197 
198   FLA_Obj_free( &w );
199 
200   return FLA_SUCCESS;
201 }
202 
203