1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
FLA_QR2_UT_blk_var1(FLA_Obj U,FLA_Obj D,FLA_Obj T,fla_qr2ut_t * cntl)13 FLA_Error FLA_QR2_UT_blk_var1( FLA_Obj U,
14 FLA_Obj D, FLA_Obj T, fla_qr2ut_t* cntl )
15 {
16 FLA_Obj UTL, UTR, U00, U01, U02,
17 UBL, UBR, U10, U11, U12,
18 U20, U21, U22;
19
20 FLA_Obj DL, DR, D0, D1, D2;
21
22 FLA_Obj TL, TR, T0, T1, W12;
23
24 FLA_Obj W12T, W12B;
25
26 FLA_Obj T1T, T2B;
27
28 dim_t b_alg, b;
29
30 // Query the algorithmic blocksize by inspecting the length of T.
31 b_alg = FLA_Obj_length( T );
32
33 FLA_Part_2x2( U, &UTL, &UTR,
34 &UBL, &UBR, 0, 0, FLA_TL );
35
36 FLA_Part_1x2( D, &DL, &DR, 0, FLA_LEFT );
37
38 FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
39
40 while ( FLA_Obj_min_dim( UBR ) > 0 ){
41
42 b = min( b_alg, FLA_Obj_min_dim( UBR ) );
43
44 FLA_Repart_2x2_to_3x3( UTL, /**/ UTR, &U00, /**/ &U01, &U02,
45 /* ************* */ /* ******************** */
46 &U10, /**/ &U11, &U12,
47 UBL, /**/ UBR, &U20, /**/ &U21, &U22,
48 b, b, FLA_BR );
49
50 FLA_Repart_1x2_to_1x3( DL, /**/ DR, &D0, /**/ &D1, &D2,
51 b, FLA_RIGHT );
52
53 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &W12,
54 b, FLA_RIGHT );
55
56 /*------------------------------------------------------------*/
57
58 // T1T = FLA_Top_part( T1, b );
59
60 FLA_Part_2x1( T1, &T1T,
61 &T2B, b, FLA_TOP );
62
63 // [ U11, ...
64 // D1, T1 ] = FLA_QR2_UT( U11
65 // D1, T1T );
66
67 FLA_QR2_UT_internal( U11,
68 D1, T1T,
69 FLA_Cntl_sub_qr2ut( cntl ) );
70
71
72 if ( FLA_Obj_width( U12 ) > 0 )
73 {
74 // W12T = FLA_Top_part( W12, b );
75
76 FLA_Part_2x1( W12, &W12T,
77 &W12B, b, FLA_TOP );
78
79 // W12T = inv( triu( T1T ) )' * ( U12 + D1' * D2 );
80
81 FLA_Copy_internal( U12, W12T,
82 FLA_Cntl_sub_copy( cntl ) );
83
84 FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
85 FLA_ONE, D1, D2, FLA_ONE, W12T,
86 FLA_Cntl_sub_gemm1( cntl ) );
87
88 FLA_Trsm_internal( FLA_LEFT, FLA_UPPER_TRIANGULAR,
89 FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
90 FLA_ONE, T1T, W12T,
91 FLA_Cntl_sub_trsm( cntl ) );
92
93 // U12 = U12 - W12T;
94 // D2 = D2 - D1 * W12T;
95
96 FLA_Axpy_internal( FLA_MINUS_ONE, W12T, U12,
97 FLA_Cntl_sub_axpy( cntl ) );
98
99 FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
100 FLA_MINUS_ONE, D1, W12T, FLA_ONE, D2,
101 FLA_Cntl_sub_gemm2( cntl ) );
102 }
103
104 /*------------------------------------------------------------*/
105
106 FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR, U00, U01, /**/ U02,
107 U10, U11, /**/ U12,
108 /* ************** */ /* ****************** */
109 &UBL, /**/ &UBR, U20, U21, /**/ U22,
110 FLA_TL );
111
112 FLA_Cont_with_1x3_to_1x2( &DL, /**/ &DR, D0, D1, /**/ D2,
113 FLA_LEFT );
114
115 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ W12,
116 FLA_LEFT );
117
118 }
119
120 return FLA_SUCCESS;
121 }
122
123