1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
13 #ifdef FLA_ENABLE_NON_CRITICAL_CODE
14
FLA_Ttmm_u_opt_var3(FLA_Obj A)15 FLA_Error FLA_Ttmm_u_opt_var3( FLA_Obj A )
16 {
17 FLA_Datatype datatype;
18 int mn_A;
19 int rs_A, cs_A;
20
21 datatype = FLA_Obj_datatype( A );
22
23 mn_A = FLA_Obj_length( A );
24 rs_A = FLA_Obj_row_stride( A );
25 cs_A = FLA_Obj_col_stride( A );
26
27
28 switch ( datatype )
29 {
30 case FLA_FLOAT:
31 {
32 float* buff_A = FLA_FLOAT_PTR( A );
33
34 FLA_Ttmm_u_ops_var3( mn_A,
35 buff_A, rs_A, cs_A );
36
37 break;
38 }
39
40 case FLA_DOUBLE:
41 {
42 double* buff_A = FLA_DOUBLE_PTR( A );
43
44 FLA_Ttmm_u_opd_var3( mn_A,
45 buff_A, rs_A, cs_A );
46
47 break;
48 }
49
50 case FLA_COMPLEX:
51 {
52 scomplex* buff_A = FLA_COMPLEX_PTR( A );
53
54 FLA_Ttmm_u_opc_var3( mn_A,
55 buff_A, rs_A, cs_A );
56
57 break;
58 }
59
60 case FLA_DOUBLE_COMPLEX:
61 {
62 dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
63
64 FLA_Ttmm_u_opz_var3( mn_A,
65 buff_A, rs_A, cs_A );
66
67 break;
68 }
69 }
70
71 return FLA_SUCCESS;
72 }
73
74
75
FLA_Ttmm_u_ops_var3(int mn_A,float * buff_A,int rs_A,int cs_A)76 FLA_Error FLA_Ttmm_u_ops_var3( int mn_A,
77 float* buff_A, int rs_A, int cs_A )
78 {
79 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
80 int i;
81
82 for ( i = 0; i < mn_A; ++i )
83 {
84 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
85 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
86 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
87
88 int mn_ahead = mn_A - i - 1;
89
90 /*------------------------------------------------------------*/
91
92 // FLA_Absolute_square( alpha11 );
93 bl1_sabsqr( alpha11 );
94
95 // FLA_Dotcs_external( FLA_CONJUGATE, FLA_ONE, a12t, a12t, FLA_ONE, alpha11 );
96 bl1_sdots( BLIS1_CONJUGATE,
97 mn_ahead,
98 buff_1,
99 a12t, cs_A,
100 a12t, cs_A,
101 buff_1,
102 alpha11 );
103
104 // FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, FLA_NONUNIT_DIAG, A22, a12t );
105 bl1_strmv( BLIS1_UPPER_TRIANGULAR,
106 BLIS1_CONJ_NO_TRANSPOSE,
107 BLIS1_NONUNIT_DIAG,
108 mn_ahead,
109 A22, rs_A, cs_A,
110 a12t, cs_A );
111
112 /*------------------------------------------------------------*/
113
114 }
115
116 return FLA_SUCCESS;
117 }
118
119
120
FLA_Ttmm_u_opd_var3(int mn_A,double * buff_A,int rs_A,int cs_A)121 FLA_Error FLA_Ttmm_u_opd_var3( int mn_A,
122 double* buff_A, int rs_A, int cs_A )
123 {
124 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
125 int i;
126
127 for ( i = 0; i < mn_A; ++i )
128 {
129 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
130 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
131 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
132
133 int mn_ahead = mn_A - i - 1;
134
135 /*------------------------------------------------------------*/
136
137 // FLA_Absolute_square( alpha11 );
138 bl1_dabsqr( alpha11 );
139
140 // FLA_Dotcs_external( FLA_CONJUGATE, FLA_ONE, a12t, a12t, FLA_ONE, alpha11 );
141 bl1_ddots( BLIS1_CONJUGATE,
142 mn_ahead,
143 buff_1,
144 a12t, cs_A,
145 a12t, cs_A,
146 buff_1,
147 alpha11 );
148
149 // FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, FLA_NONUNIT_DIAG, A22, a12t );
150 bl1_dtrmv( BLIS1_UPPER_TRIANGULAR,
151 BLIS1_CONJ_NO_TRANSPOSE,
152 BLIS1_NONUNIT_DIAG,
153 mn_ahead,
154 A22, rs_A, cs_A,
155 a12t, cs_A );
156
157 /*------------------------------------------------------------*/
158
159 }
160
161 return FLA_SUCCESS;
162 }
163
164
165
FLA_Ttmm_u_opc_var3(int mn_A,scomplex * buff_A,int rs_A,int cs_A)166 FLA_Error FLA_Ttmm_u_opc_var3( int mn_A,
167 scomplex* buff_A, int rs_A, int cs_A )
168 {
169 scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
170 int i;
171
172 for ( i = 0; i < mn_A; ++i )
173 {
174 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
175 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
176 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
177
178 int mn_ahead = mn_A - i - 1;
179
180 /*------------------------------------------------------------*/
181
182 // FLA_Absolute_square( alpha11 );
183 bl1_cabsqr( alpha11 );
184
185 // FLA_Dotcs_external( FLA_CONJUGATE, FLA_ONE, a12t, a12t, FLA_ONE, alpha11 );
186 bl1_cdots( BLIS1_CONJUGATE,
187 mn_ahead,
188 buff_1,
189 a12t, cs_A,
190 a12t, cs_A,
191 buff_1,
192 alpha11 );
193
194 // FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, FLA_NONUNIT_DIAG, A22, a12t );
195 bl1_ctrmv( BLIS1_UPPER_TRIANGULAR,
196 BLIS1_CONJ_NO_TRANSPOSE,
197 BLIS1_NONUNIT_DIAG,
198 mn_ahead,
199 A22, rs_A, cs_A,
200 a12t, cs_A );
201
202 /*------------------------------------------------------------*/
203
204 }
205
206 return FLA_SUCCESS;
207 }
208
209
210
FLA_Ttmm_u_opz_var3(int mn_A,dcomplex * buff_A,int rs_A,int cs_A)211 FLA_Error FLA_Ttmm_u_opz_var3( int mn_A,
212 dcomplex* buff_A, int rs_A, int cs_A )
213 {
214 dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
215 int i;
216
217 for ( i = 0; i < mn_A; ++i )
218 {
219 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
220 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
221 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
222
223 int mn_ahead = mn_A - i - 1;
224
225 /*------------------------------------------------------------*/
226
227 // FLA_Absolute_square( alpha11 );
228 bl1_zabsqr( alpha11 );
229
230 // FLA_Dotcs_external( FLA_CONJUGATE, FLA_ONE, a12t, a12t, FLA_ONE, alpha11 );
231 bl1_zdots( BLIS1_CONJUGATE,
232 mn_ahead,
233 buff_1,
234 a12t, cs_A,
235 a12t, cs_A,
236 buff_1,
237 alpha11 );
238
239 // FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, FLA_NONUNIT_DIAG, A22, a12t );
240 bl1_ztrmv( BLIS1_UPPER_TRIANGULAR,
241 BLIS1_CONJ_NO_TRANSPOSE,
242 BLIS1_NONUNIT_DIAG,
243 mn_ahead,
244 A22, rs_A, cs_A,
245 a12t, cs_A );
246
247 /*------------------------------------------------------------*/
248
249 }
250
251 return FLA_SUCCESS;
252 }
253
254 #endif
255