1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
FLA_Tridiag_UT_shift_U(FLA_Uplo uplo,FLA_Obj A)13 FLA_Error FLA_Tridiag_UT_shift_U( FLA_Uplo uplo, FLA_Obj A )
14 {
15 FLA_Datatype datatype;
16 int m_A;
17 int rs_A, cs_A;
18
19 if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
20 FLA_Tridiag_UT_shift_U_check( uplo, A );
21
22 datatype = FLA_Obj_datatype( A );
23
24 // Play with swapping of cs rs; we do not need "u" version.
25 if ( uplo == FLA_LOWER_TRIANGULAR )
26 {
27 m_A = FLA_Obj_length( A );
28 rs_A = FLA_Obj_row_stride( A );
29 cs_A = FLA_Obj_col_stride( A );
30 }
31 else
32 {
33 m_A = FLA_Obj_width( A );
34 cs_A = FLA_Obj_row_stride( A );
35 rs_A = FLA_Obj_col_stride( A );
36 }
37
38 switch ( datatype )
39 {
40 case FLA_FLOAT:
41 {
42 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
43 FLA_Tridiag_UT_shift_U_l_ops( m_A,
44 buff_A, rs_A, cs_A );
45 break;
46 }
47
48 case FLA_DOUBLE:
49 {
50 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
51 FLA_Tridiag_UT_shift_U_l_opd( m_A,
52 buff_A, rs_A, cs_A );
53 break;
54 }
55
56 case FLA_COMPLEX:
57 {
58 scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
59 FLA_Tridiag_UT_shift_U_l_opc( m_A,
60 buff_A, rs_A, cs_A );
61 break;
62 }
63
64 case FLA_DOUBLE_COMPLEX:
65 {
66 dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
67 FLA_Tridiag_UT_shift_U_l_opz( m_A,
68 buff_A, rs_A, cs_A );
69 break;
70 }
71 }
72
73 return FLA_SUCCESS;
74 }
75
76
77
FLA_Tridiag_UT_shift_U_l_ops(int m_A,float * buff_A,int rs_A,int cs_A)78 FLA_Error FLA_Tridiag_UT_shift_U_l_ops( int m_A,
79 float* buff_A, int rs_A, int cs_A )
80 {
81 float* a00 = buff_A;
82 float* a10 = buff_A + rs_A;
83 float zero = bl1_s0();
84 float one = bl1_s1();
85 int j;
86
87 for ( j = m_A - 1; j > 0; --j )
88 {
89 float* alpha01 = buff_A + (j )*cs_A + (0 )*rs_A;
90 float* alpha11 = buff_A + (j )*cs_A + (j )*rs_A;
91 float* a20 = buff_A + (j-1)*cs_A + (j+1)*rs_A;
92 float* a21 = buff_A + (j )*cs_A + (j+1)*rs_A;
93
94 int m_ahead = m_A - j - 1;
95
96 *alpha01 = zero;
97 *alpha11 = one;
98 bl1_scopyv( BLIS1_NO_CONJUGATE,
99 m_ahead,
100 a20, rs_A,
101 a21, rs_A );
102 }
103
104 *a00 = one;
105 bl1_ssetv( m_A - 1,
106 &zero,
107 a10, rs_A );
108
109 return FLA_SUCCESS;
110 }
111
FLA_Tridiag_UT_shift_U_l_opd(int m_A,double * buff_A,int rs_A,int cs_A)112 FLA_Error FLA_Tridiag_UT_shift_U_l_opd( int m_A,
113 double* buff_A, int rs_A, int cs_A )
114 {
115 double* a00 = buff_A;
116 double* a10 = buff_A + rs_A;
117 double zero = bl1_d0();
118 double one = bl1_d1();
119 int j;
120
121 for ( j = m_A - 1; j > 0; --j )
122 {
123 double* alpha01 = buff_A + (j )*cs_A + (0 )*rs_A;
124 double* alpha11 = buff_A + (j )*cs_A + (j )*rs_A;
125 double* a20 = buff_A + (j-1)*cs_A + (j+1)*rs_A;
126 double* a21 = buff_A + (j )*cs_A + (j+1)*rs_A;
127
128 int m_ahead = m_A - j - 1;
129
130 *alpha01 = zero;
131 *alpha11 = one;
132 bl1_dcopyv( BLIS1_NO_CONJUGATE,
133 m_ahead,
134 a20, rs_A,
135 a21, rs_A );
136 }
137
138 *a00 = one;
139 bl1_dsetv( m_A - 1,
140 &zero,
141 a10, rs_A );
142
143 return FLA_SUCCESS;
144 }
145
FLA_Tridiag_UT_shift_U_l_opc(int m_A,scomplex * buff_A,int rs_A,int cs_A)146 FLA_Error FLA_Tridiag_UT_shift_U_l_opc( int m_A,
147 scomplex* buff_A, int rs_A, int cs_A )
148 {
149 scomplex* a00 = buff_A;
150 scomplex* a10 = buff_A + rs_A;
151 scomplex zero = bl1_c0();
152 scomplex one = bl1_c1();
153 int j;
154
155 for ( j = m_A - 1; j > 0; --j )
156 {
157 scomplex* alpha01 = buff_A + (j )*cs_A + (0 )*rs_A;
158 scomplex* alpha11 = buff_A + (j )*cs_A + (j )*rs_A;
159 scomplex* a20 = buff_A + (j-1)*cs_A + (j+1)*rs_A;
160 scomplex* a21 = buff_A + (j )*cs_A + (j+1)*rs_A;
161
162 int m_ahead = m_A - j - 1;
163
164 *alpha01 = zero;
165 *alpha11 = one;
166 bl1_ccopyv( BLIS1_NO_CONJUGATE,
167 m_ahead,
168 a20, rs_A,
169 a21, rs_A );
170 }
171
172 *a00 = one;
173 bl1_csetv( m_A - 1,
174 &zero,
175 a10, rs_A );
176
177 return FLA_SUCCESS;
178 }
179
FLA_Tridiag_UT_shift_U_l_opz(int m_A,dcomplex * buff_A,int rs_A,int cs_A)180 FLA_Error FLA_Tridiag_UT_shift_U_l_opz( int m_A,
181 dcomplex* buff_A, int rs_A, int cs_A )
182 {
183 dcomplex* a00 = buff_A;
184 dcomplex* a10 = buff_A + rs_A;
185 dcomplex zero = bl1_z0();
186 dcomplex one = bl1_z1();
187 int j;
188
189 for ( j = m_A - 1; j > 0; --j )
190 {
191 dcomplex* alpha01 = buff_A + (j )*cs_A + (0 )*rs_A;
192 dcomplex* alpha11 = buff_A + (j )*cs_A + (j )*rs_A;
193 dcomplex* a20 = buff_A + (j-1)*cs_A + (j+1)*rs_A;
194 dcomplex* a21 = buff_A + (j )*cs_A + (j+1)*rs_A;
195
196 int m_ahead = m_A - j - 1;
197
198 *alpha01 = zero;
199 *alpha11 = one;
200 bl1_zcopyv( BLIS1_NO_CONJUGATE,
201 m_ahead,
202 a20, rs_A,
203 a21, rs_A );
204 }
205
206 *a00 = one;
207 bl1_zsetv( m_A - 1,
208 &zero,
209 a10, rs_A );
210
211 return FLA_SUCCESS;
212 }
213
214