1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "FLAME.h"
12 
FLA_Tridiag_UT_shift_U(FLA_Uplo uplo,FLA_Obj A)13 FLA_Error FLA_Tridiag_UT_shift_U( FLA_Uplo uplo, FLA_Obj A )
14 {
15 	FLA_Datatype datatype;
16 	int          m_A;
17 	int          rs_A, cs_A;
18 
19 	if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
20 		FLA_Tridiag_UT_shift_U_check( uplo, A );
21 
22 	datatype = FLA_Obj_datatype( A );
23 
24         // Play with swapping of cs rs; we do not need "u" version.
25         if ( uplo == FLA_LOWER_TRIANGULAR )
26           {
27             m_A      = FLA_Obj_length( A );
28             rs_A     = FLA_Obj_row_stride( A );
29             cs_A     = FLA_Obj_col_stride( A );
30           }
31         else
32           {
33             m_A      = FLA_Obj_width( A );
34             cs_A     = FLA_Obj_row_stride( A );
35             rs_A     = FLA_Obj_col_stride( A );
36           }
37 
38 	switch ( datatype )
39 	{
40 		case FLA_FLOAT:
41 		{
42 			float*    buff_A = ( float* ) FLA_FLOAT_PTR( A );
43                         FLA_Tridiag_UT_shift_U_l_ops( m_A,
44                                                       buff_A, rs_A, cs_A );
45 			break;
46 		}
47 
48 		case FLA_DOUBLE:
49 		{
50 			double*   buff_A = ( double* ) FLA_DOUBLE_PTR( A );
51                         FLA_Tridiag_UT_shift_U_l_opd( m_A,
52                                                       buff_A, rs_A, cs_A );
53 			break;
54 		}
55 
56 		case FLA_COMPLEX:
57 		{
58 			scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
59                         FLA_Tridiag_UT_shift_U_l_opc( m_A,
60                                                       buff_A, rs_A, cs_A );
61 			break;
62 		}
63 
64 		case FLA_DOUBLE_COMPLEX:
65 		{
66 			dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
67                         FLA_Tridiag_UT_shift_U_l_opz( m_A,
68                                                       buff_A, rs_A, cs_A );
69 			break;
70 		}
71 	}
72 
73 	return FLA_SUCCESS;
74 }
75 
76 
77 
FLA_Tridiag_UT_shift_U_l_ops(int m_A,float * buff_A,int rs_A,int cs_A)78 FLA_Error FLA_Tridiag_UT_shift_U_l_ops( int       m_A,
79                                         float*    buff_A, int rs_A, int cs_A )
80 {
81 	float*  a00  = buff_A;
82 	float*  a10  = buff_A + rs_A;
83 	float   zero = bl1_s0();
84 	float   one  = bl1_s1();
85 	int     j;
86 
87 	for ( j = m_A - 1; j > 0; --j )
88 	{
89 		float*    alpha01  = buff_A + (j  )*cs_A + (0  )*rs_A;
90 		float*    alpha11  = buff_A + (j  )*cs_A + (j  )*rs_A;
91 		float*    a20      = buff_A + (j-1)*cs_A + (j+1)*rs_A;
92 		float*    a21      = buff_A + (j  )*cs_A + (j+1)*rs_A;
93 
94 		int       m_ahead  = m_A - j - 1;
95 
96 		*alpha01 = zero;
97 		*alpha11 = one;
98 		bl1_scopyv( BLIS1_NO_CONJUGATE,
99 		            m_ahead,
100 		            a20, rs_A,
101 		            a21, rs_A );
102 	}
103 
104 	*a00 = one;
105 	bl1_ssetv( m_A - 1,
106 	           &zero,
107 	           a10, rs_A );
108 
109 	return FLA_SUCCESS;
110 }
111 
FLA_Tridiag_UT_shift_U_l_opd(int m_A,double * buff_A,int rs_A,int cs_A)112 FLA_Error FLA_Tridiag_UT_shift_U_l_opd( int       m_A,
113                                         double*   buff_A, int rs_A, int cs_A )
114 {
115 	double* a00  = buff_A;
116 	double* a10  = buff_A + rs_A;
117 	double  zero = bl1_d0();
118 	double  one  = bl1_d1();
119 	int     j;
120 
121 	for ( j = m_A - 1; j > 0; --j )
122 	{
123 		double*   alpha01  = buff_A + (j  )*cs_A + (0  )*rs_A;
124 		double*   alpha11  = buff_A + (j  )*cs_A + (j  )*rs_A;
125 		double*   a20      = buff_A + (j-1)*cs_A + (j+1)*rs_A;
126 		double*   a21      = buff_A + (j  )*cs_A + (j+1)*rs_A;
127 
128 		int       m_ahead  = m_A - j - 1;
129 
130 		*alpha01 = zero;
131 		*alpha11 = one;
132 		bl1_dcopyv( BLIS1_NO_CONJUGATE,
133 		            m_ahead,
134 		            a20, rs_A,
135 		            a21, rs_A );
136 	}
137 
138 	*a00 = one;
139 	bl1_dsetv( m_A - 1,
140 	           &zero,
141 	           a10, rs_A );
142 
143 	return FLA_SUCCESS;
144 }
145 
FLA_Tridiag_UT_shift_U_l_opc(int m_A,scomplex * buff_A,int rs_A,int cs_A)146 FLA_Error FLA_Tridiag_UT_shift_U_l_opc( int       m_A,
147                                         scomplex* buff_A, int rs_A, int cs_A )
148 {
149 	scomplex* a00  = buff_A;
150 	scomplex* a10  = buff_A + rs_A;
151 	scomplex  zero = bl1_c0();
152 	scomplex  one  = bl1_c1();
153 	int       j;
154 
155 	for ( j = m_A - 1; j > 0; --j )
156 	{
157 		scomplex* alpha01  = buff_A + (j  )*cs_A + (0  )*rs_A;
158 		scomplex* alpha11  = buff_A + (j  )*cs_A + (j  )*rs_A;
159 		scomplex* a20      = buff_A + (j-1)*cs_A + (j+1)*rs_A;
160 		scomplex* a21      = buff_A + (j  )*cs_A + (j+1)*rs_A;
161 
162 		int       m_ahead  = m_A - j - 1;
163 
164 		*alpha01 = zero;
165 		*alpha11 = one;
166 		bl1_ccopyv( BLIS1_NO_CONJUGATE,
167 		            m_ahead,
168 		            a20, rs_A,
169 		            a21, rs_A );
170 	}
171 
172 	*a00 = one;
173 	bl1_csetv( m_A - 1,
174 	           &zero,
175 	           a10, rs_A );
176 
177 	return FLA_SUCCESS;
178 }
179 
FLA_Tridiag_UT_shift_U_l_opz(int m_A,dcomplex * buff_A,int rs_A,int cs_A)180 FLA_Error FLA_Tridiag_UT_shift_U_l_opz( int       m_A,
181                                         dcomplex* buff_A, int rs_A, int cs_A )
182 {
183 	dcomplex* a00  = buff_A;
184 	dcomplex* a10  = buff_A + rs_A;
185 	dcomplex  zero = bl1_z0();
186 	dcomplex  one  = bl1_z1();
187 	int       j;
188 
189 	for ( j = m_A - 1; j > 0; --j )
190 	{
191 		dcomplex* alpha01  = buff_A + (j  )*cs_A + (0  )*rs_A;
192 		dcomplex* alpha11  = buff_A + (j  )*cs_A + (j  )*rs_A;
193 		dcomplex* a20      = buff_A + (j-1)*cs_A + (j+1)*rs_A;
194 		dcomplex* a21      = buff_A + (j  )*cs_A + (j+1)*rs_A;
195 
196 		int       m_ahead  = m_A - j - 1;
197 
198 		*alpha01 = zero;
199 		*alpha11 = one;
200 		bl1_zcopyv( BLIS1_NO_CONJUGATE,
201 		            m_ahead,
202 		            a20, rs_A,
203 		            a21, rs_A );
204 	}
205 
206 	*a00 = one;
207 	bl1_zsetv( m_A - 1,
208 	           &zero,
209 	           a10, rs_A );
210 
211 	return FLA_SUCCESS;
212 }
213 
214