1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
FLA_CAQR2_UT_unb_var1(FLA_Obj U,FLA_Obj D,FLA_Obj T)13 FLA_Error FLA_CAQR2_UT_unb_var1( FLA_Obj U,
14 FLA_Obj D, FLA_Obj T )
15 {
16 FLA_Obj UTL, UTR, U00, u01, U02,
17 UBL, UBR, u10t, upsilon11, u12t,
18 U20, u21, U22;
19
20 FLA_Obj DTL, DTR, D00, d01, D02,
21 DBL, DBR, d10t, delta11, d12t,
22 D20, d21, D22;
23
24 FLA_Obj TTL, TTR, T00, t01, T02,
25 TBL, TBR, t10t, tau11, t12t,
26 T20, t21, T22;
27
28 FLA_Obj d1, D2;
29
30 FLA_Obj d01T,
31 d01B;
32
33 FLA_Obj D00T,
34 D00B;
35
36 dim_t m_DT;
37
38 // Begin partitioning diagonally through D with m - n rows above
39 // the diagonal.
40 m_DT = FLA_Obj_length( D ) - FLA_Obj_width( D );
41
42 FLA_Part_2x2( U, &UTL, &UTR,
43 &UBL, &UBR, 0, 0, FLA_TL );
44
45 FLA_Part_2x2( D, &DTL, &DTR,
46 &DBL, &DBR, m_DT, 0, FLA_TL );
47
48 FLA_Part_2x2( T, &TTL, &TTR,
49 &TBL, &TBR, 0, 0, FLA_TL );
50
51 while ( FLA_Obj_min_dim( UBR ) > 0 ){
52
53 FLA_Repart_2x2_to_3x3( UTL, /**/ UTR, &U00, /**/ &u01, &U02,
54 /* ************* */ /* ************************** */
55 &u10t, /**/ &upsilon11, &u12t,
56 UBL, /**/ UBR, &U20, /**/ &u21, &U22,
57 1, 1, FLA_BR );
58
59 FLA_Repart_2x2_to_3x3( DTL, /**/ DTR, &D00, /**/ &d01, &D02,
60 /* ************* */ /* ************************** */
61 &d10t, /**/ &delta11, &d12t,
62 DBL, /**/ DBR, &D20, /**/ &d21, &D22,
63 1, 1, FLA_BR );
64
65 FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
66 /* ************* */ /* ************************ */
67 &t10t, /**/ &tau11, &t12t,
68 TBL, /**/ TBR, &T20, /**/ &t21, &T22,
69 1, 1, FLA_BR );
70
71 /*------------------------------------------------------------*/
72
73 FLA_Merge_2x1( d01,
74 delta11, &d1 );
75
76 FLA_Merge_2x1( D02,
77 d12t, &D2 );
78
79 // Compute tau11 and u2 from upsilon11 and d1 such that tau11 and u2
80 // determine a Householder transform H such that applying H from the
81 // left to the column vector consisting of upsilon11 and d1 annihilates
82 // the entries in d1 (and updates upsilon11).
83 FLA_Househ2_UT( FLA_LEFT,
84 upsilon11,
85 d1, tau11 );
86
87 // / u12t \ = H / u12t \
88 // \ D2 / \ D2 /
89 //
90 // where H is formed from tau11 and d1.
91 FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
92 D2 );
93
94 FLA_Part_2x1( d01, &d01T,
95 &d01B, m_DT, FLA_TOP );
96
97 FLA_Part_2x1( D00, &D00T,
98 &D00B, m_DT, FLA_TOP );
99
100 // t01 = D00' * d01;
101 // = D00T' * d01T + triu( D00B )' * d01B;
102 FLA_Copy_external( d01B, t01 );
103 FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
104 D00B, t01 );
105 FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
106
107 /*------------------------------------------------------------*/
108
109 FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR, U00, u01, /**/ U02,
110 u10t, upsilon11, /**/ u12t,
111 /* ************** */ /* ************************ */
112 &UBL, /**/ &UBR, U20, u21, /**/ U22,
113 FLA_TL );
114
115 FLA_Cont_with_3x3_to_2x2( &DTL, /**/ &DTR, D00, d01, /**/ D02,
116 d10t, delta11, /**/ d12t,
117 /* ************** */ /* ************************ */
118 &DBL, /**/ &DBR, D20, d21, /**/ D22,
119 FLA_TL );
120
121 FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
122 t10t, tau11, /**/ t12t,
123 /* ************** */ /* ********************** */
124 &TBL, /**/ &TBR, T20, t21, /**/ T22,
125 FLA_TL );
126 }
127
128 return FLA_SUCCESS;
129 }
130
131