1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
FLA_Hess_UT_unb_var5(FLA_Obj A,FLA_Obj T)13 FLA_Error FLA_Hess_UT_unb_var5( FLA_Obj A, FLA_Obj T )
14 {
15 FLA_Error r_val;
16 FLA_Obj U, Z;
17
18 FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &U );
19 FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Z );
20
21 r_val = FLA_Hess_UT_step_unb_var5( A, U, Z, T );
22
23 FLA_Obj_free( &U );
24 FLA_Obj_free( &Z );
25
26 return r_val;
27 }
28
FLA_Hess_UT_step_unb_var5(FLA_Obj A,FLA_Obj U,FLA_Obj Z,FLA_Obj T)29 FLA_Error FLA_Hess_UT_step_unb_var5( FLA_Obj A, FLA_Obj U, FLA_Obj Z, FLA_Obj T )
30 {
31 FLA_Obj ATL, ATR, A00, a01, A02,
32 ABL, ABR, a10t, alpha11, a12t,
33 A20, a21, A22;
34 FLA_Obj UTL, UTR, U00, u01, U02,
35 UBL, UBR, u10t, upsilon11, u12t,
36 U20, u21, U22;
37 FLA_Obj ZTL, ZTR, Z00, z01, Z02,
38 ZBL, ZBR, z10t, zeta11, z12t,
39 Z20, z21, Z22;
40 FLA_Obj TTL, TTR, T00, t01, T02,
41 TBL, TBR, t10t, tau11, t12t,
42 T20, t21, T22;
43 FLA_Obj wT, w0,
44 wB, omega1,
45 w2;
46 FLA_Obj w;
47
48 FLA_Obj a21_t,
49 a21_b;
50 FLA_Obj u21_t,
51 u21_b;
52
53 FLA_Datatype datatype_A;
54 dim_t m_A;
55 dim_t b_alg;
56
57
58 b_alg = FLA_Obj_length( T );
59
60 datatype_A = FLA_Obj_datatype( A );
61 m_A = FLA_Obj_length( A );
62
63 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
64
65 FLA_Set( FLA_ZERO, U );
66 FLA_Set( FLA_ZERO, Z );
67
68 FLA_Part_2x2( A, &ATL, &ATR,
69 &ABL, &ABR, 0, 0, FLA_TL );
70 FLA_Part_2x2( U, &UTL, &UTR,
71 &UBL, &UBR, 0, 0, FLA_TL );
72 FLA_Part_2x2( Z, &ZTL, &ZTR,
73 &ZBL, &ZBR, 0, 0, FLA_TL );
74 FLA_Part_2x2( T, &TTL, &TTR,
75 &TBL, &TBR, 0, 0, FLA_TL );
76 FLA_Part_2x1( w, &wT,
77 &wB, 0, FLA_TOP );
78
79 while ( FLA_Obj_length( ATL ) < b_alg )
80 {
81 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
82 /* ************* */ /* ************************** */
83 &a10t, /**/ &alpha11, &a12t,
84 ABL, /**/ ABR, &A20, /**/ &a21, &A22,
85 1, 1, FLA_BR );
86 FLA_Repart_2x2_to_3x3( UTL, /**/ UTR, &U00, /**/ &u01, &U02,
87 /* ************* */ /* **************************** */
88 &u10t, /**/ &upsilon11, &u12t,
89 UBL, /**/ UBR, &U20, /**/ &u21, &U22,
90 1, 1, FLA_BR );
91 FLA_Repart_2x2_to_3x3( ZTL, /**/ ZTR, &Z00, /**/ &z01, &Z02,
92 /* ************* */ /* ************************* */
93 &z10t, /**/ &zeta11, &z12t,
94 ZBL, /**/ ZBR, &Z20, /**/ &z21, &Z22,
95 1, 1, FLA_BR );
96 FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
97 /* ************* */ /* ************************** */
98 &t10t, /**/ &tau11, &t12t,
99 TBL, /**/ TBR, &T20, /**/ &t21, &T22,
100 1, 1, FLA_BR );
101 FLA_Repart_2x1_to_3x1( wT, &w0,
102 /* ** */ /* ****** */
103 &omega1,
104 wB, &w2, 1, FLA_BOTTOM );
105
106 /*------------------------------------------------------------*/
107
108 if ( FLA_Obj_length( ATL ) > 0 )
109 {
110 // w0 = inv( triu( T00 ) ) * u10t';
111 FLA_Copyt( FLA_CONJ_TRANSPOSE, u10t, w0 );
112 FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
113 T00, w0 );
114
115 // a01 = a01 - Z00 * w0;
116 // alpha11 = alpha11 - z10t * w0;
117 // a21 = a21 - Z20 * w0;
118 FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z00, w0, FLA_ONE, a01 );
119 FLA_Dots( FLA_MINUS_ONE, z10t, w0, FLA_ONE, alpha11 );
120 FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, w0, FLA_ONE, a21 );
121
122 // w0 = inv( triu( T00 ) )' * ( U00' * a01 + u10t' * alpha11 + U20' * a21 );
123 FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
124 FLA_ONE, U00, a01, FLA_ZERO, w0 );
125 FLA_Axpyt( FLA_CONJ_TRANSPOSE, alpha11, u10t, w0 );
126 FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, a21, FLA_ONE, w0 );
127
128 FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
129 T00, w0 );
130
131 // a01 = a01 - U00 * w0;
132 // alpha11 = alpha11 - u10t * w0;
133 // a21 = a21 - U20 * w0;
134 FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
135 FLA_MINUS_ONE, U00, w0, FLA_ONE, a01 );
136 FLA_Dots( FLA_MINUS_ONE, u10t, w0, FLA_ONE, alpha11 );
137 FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, U20, w0, FLA_ONE, a21 );
138 }
139
140 if ( FLA_Obj_length( a21 ) > 0 )
141 {
142 FLA_Part_2x1( a21, &a21_t,
143 &a21_b, 1, FLA_TOP );
144
145 // [ u21, tau11, a21 ] = House( a21 );
146 FLA_Househ2_UT( FLA_LEFT,
147 a21_t,
148 a21_b, tau11 );
149
150 // u21 := a21;
151 FLA_Copy( a21, u21 );
152
153 // Explicitly set the first element of the Householder vector so we
154 // can use it in regular computations.
155 FLA_Part_2x1( u21, &u21_t,
156 &u21_b, 1, FLA_TOP );
157 FLA_Set( FLA_ONE, u21_t );
158
159 // z01 = A02 * u21;
160 // zeta11 = a12t * u21;
161 // z21 = A22 * u21;
162 FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, u21, FLA_ZERO, z01 );
163 FLA_Dot( a12t, u21, zeta11 );
164 FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, u21, FLA_ZERO, z21 );
165
166 // t01 = U20' * u21;
167 FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, u21, FLA_ZERO, t01 );
168 }
169
170 /*------------------------------------------------------------*/
171
172 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
173 a10t, alpha11, /**/ a12t,
174 /* ************** */ /* ************************ */
175 &ABL, /**/ &ABR, A20, a21, /**/ A22,
176 FLA_TL );
177 FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR, U00, u01, /**/ U02,
178 u10t, upsilon11, /**/ u12t,
179 /* ************** */ /* ************************** */
180 &UBL, /**/ &UBR, U20, u21, /**/ U22,
181 FLA_TL );
182 FLA_Cont_with_3x3_to_2x2( &ZTL, /**/ &ZTR, Z00, z01, /**/ Z02,
183 z10t, zeta11, /**/ z12t,
184 /* ************** */ /* *********************** */
185 &ZBL, /**/ &ZBR, Z20, z21, /**/ Z22,
186 FLA_TL );
187 FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
188 t10t, tau11, /**/ t12t,
189 /* ************** */ /* ************************ */
190 &TBL, /**/ &TBR, T20, t21, /**/ T22,
191 FLA_TL );
192 FLA_Cont_with_3x1_to_2x1( &wT, w0,
193 omega1,
194 /* ** */ /* ****** */
195 &wB, w2, FLA_TOP );
196 }
197
198 FLA_Obj_free( &w );
199
200 return FLA_SUCCESS;
201 }
202
203