1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "FLAME.h"
12 
FLA_QR2_UT_blk_var1(FLA_Obj U,FLA_Obj D,FLA_Obj T,fla_qr2ut_t * cntl)13 FLA_Error FLA_QR2_UT_blk_var1( FLA_Obj U,
14                                FLA_Obj D, FLA_Obj T, fla_qr2ut_t* cntl )
15 {
16   FLA_Obj UTL,   UTR,      U00, U01, U02,
17           UBL,   UBR,      U10, U11, U12,
18                            U20, U21, U22;
19 
20   FLA_Obj DL,    DR,       D0,  D1,  D2;
21 
22   FLA_Obj TL,    TR,       T0,  T1,  W12;
23 
24   FLA_Obj W12T, W12B;
25 
26   FLA_Obj T1T, T2B;
27 
28   dim_t   b_alg, b;
29 
30   // Query the algorithmic blocksize by inspecting the length of T.
31   b_alg = FLA_Obj_length( T );
32 
33   FLA_Part_2x2( U,    &UTL, &UTR,
34                       &UBL, &UBR,     0, 0, FLA_TL );
35 
36   FLA_Part_1x2( D,    &DL,  &DR,      0, FLA_LEFT );
37 
38   FLA_Part_1x2( T,    &TL,  &TR,      0, FLA_LEFT );
39 
40   while ( FLA_Obj_min_dim( UBR ) > 0 ){
41 
42     b = min( b_alg, FLA_Obj_min_dim( UBR ) );
43 
44     FLA_Repart_2x2_to_3x3( UTL, /**/ UTR,       &U00, /**/ &U01, &U02,
45                         /* ************* */   /* ******************** */
46                                                 &U10, /**/ &U11, &U12,
47                            UBL, /**/ UBR,       &U20, /**/ &U21, &U22,
48                            b, b, FLA_BR );
49 
50     FLA_Repart_1x2_to_1x3( DL,  /**/ DR,        &D0, /**/ &D1, &D2,
51                            b, FLA_RIGHT );
52 
53     FLA_Repart_1x2_to_1x3( TL,  /**/ TR,        &T0, /**/ &T1, &W12,
54                            b, FLA_RIGHT );
55 
56     /*------------------------------------------------------------*/
57 
58     // T1T = FLA_Top_part( T1, b );
59 
60     FLA_Part_2x1( T1,   &T1T,
61                         &T2B, b, FLA_TOP );
62 
63     // [ U11, ...
64     //   D1, T1 ] = FLA_QR2_UT( U11
65     //                          D1, T1T );
66 
67     FLA_QR2_UT_internal( U11,
68                          D1, T1T,
69                          FLA_Cntl_sub_qr2ut( cntl ) );
70 
71 
72     if ( FLA_Obj_width( U12 ) > 0 )
73     {
74       // W12T = FLA_Top_part( W12, b );
75 
76       FLA_Part_2x1( W12,  &W12T,
77                           &W12B, b, FLA_TOP );
78 
79       // W12T = inv( triu( T1T ) )' * ( U12 + D1' * D2 );
80 
81       FLA_Copy_internal( U12, W12T,
82                          FLA_Cntl_sub_copy( cntl ) );
83 
84       FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
85                          FLA_ONE, D1, D2, FLA_ONE, W12T,
86                          FLA_Cntl_sub_gemm1( cntl ) );
87 
88       FLA_Trsm_internal( FLA_LEFT, FLA_UPPER_TRIANGULAR,
89                          FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
90                          FLA_ONE, T1T, W12T,
91                          FLA_Cntl_sub_trsm( cntl ) );
92 
93       // U12 = U12 - W12T;
94       // D2  = D2  - D1 * W12T;
95 
96       FLA_Axpy_internal( FLA_MINUS_ONE, W12T, U12,
97                          FLA_Cntl_sub_axpy( cntl ) );
98 
99       FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
100                          FLA_MINUS_ONE, D1, W12T, FLA_ONE, D2,
101                          FLA_Cntl_sub_gemm2( cntl ) );
102     }
103 
104     /*------------------------------------------------------------*/
105 
106     FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR,       U00, U01, /**/ U02,
107                                                      U10, U11, /**/ U12,
108                             /* ************** */  /* ****************** */
109                               &UBL, /**/ &UBR,       U20, U21, /**/ U22,
110                               FLA_TL );
111 
112     FLA_Cont_with_1x3_to_1x2( &DL,  /**/ &DR,        D0, D1, /**/ D2,
113                               FLA_LEFT );
114 
115     FLA_Cont_with_1x3_to_1x2( &TL,  /**/ &TR,        T0, T1, /**/ W12,
116                               FLA_LEFT );
117 
118   }
119 
120   return FLA_SUCCESS;
121 }
122 
123