1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
13 #ifdef FLA_ENABLE_NON_CRITICAL_CODE
14
FLA_Trinv_ln_opt_var4(FLA_Obj A)15 FLA_Error FLA_Trinv_ln_opt_var4( FLA_Obj A )
16 {
17 FLA_Datatype datatype;
18 int mn_A;
19 int rs_A, cs_A;
20
21 datatype = FLA_Obj_datatype( A );
22
23 mn_A = FLA_Obj_length( A );
24 rs_A = FLA_Obj_row_stride( A );
25 cs_A = FLA_Obj_col_stride( A );
26
27
28 switch ( datatype )
29 {
30 case FLA_FLOAT:
31 {
32 float* buff_A = FLA_FLOAT_PTR( A );
33
34 FLA_Trinv_ln_ops_var4( mn_A,
35 buff_A, rs_A, cs_A );
36
37 break;
38 }
39
40 case FLA_DOUBLE:
41 {
42 double* buff_A = FLA_DOUBLE_PTR( A );
43
44 FLA_Trinv_ln_opd_var4( mn_A,
45 buff_A, rs_A, cs_A );
46
47 break;
48 }
49
50 case FLA_COMPLEX:
51 {
52 scomplex* buff_A = FLA_COMPLEX_PTR( A );
53
54 FLA_Trinv_ln_opc_var4( mn_A,
55 buff_A, rs_A, cs_A );
56
57 break;
58 }
59
60 case FLA_DOUBLE_COMPLEX:
61 {
62 dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
63
64 FLA_Trinv_ln_opz_var4( mn_A,
65 buff_A, rs_A, cs_A );
66
67 break;
68 }
69 }
70
71 return FLA_SUCCESS;
72 }
73
74
75
FLA_Trinv_ln_ops_var4(int mn_A,float * buff_A,int rs_A,int cs_A)76 FLA_Error FLA_Trinv_ln_ops_var4( int mn_A,
77 float* buff_A, int rs_A, int cs_A )
78 {
79 float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
80 int i;
81
82 for ( i = 0; i < mn_A; ++i )
83 {
84 float* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
85 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
86 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
87 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
88 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
89 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
90
91 int mn_ahead = mn_A - i - 1;
92 int mn_behind = i;
93
94 /*------------------------------------------------------------*/
95
96 // FLA_Scal_external( FLA_MINUS_ONE, a21 );
97 // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, A22, a21 );
98 bl1_sscalv( BLIS1_NO_CONJUGATE,
99 mn_ahead,
100 buff_m1,
101 a21, rs_A );
102 bl1_strsv( BLIS1_LOWER_TRIANGULAR,
103 BLIS1_NO_TRANSPOSE,
104 BLIS1_NONUNIT_DIAG,
105 mn_ahead,
106 A22, rs_A, cs_A,
107 a21, rs_A );
108
109 // FLA_Ger_external( FLA_MINUS_ONE, a21, a10t, A20 );
110 bl1_sger( BLIS1_NO_CONJUGATE,
111 BLIS1_NO_CONJUGATE,
112 mn_ahead,
113 mn_behind,
114 buff_m1,
115 a21, rs_A,
116 a10t, cs_A,
117 A20, rs_A, cs_A );
118
119 // FLA_Trmv_external( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t );
120 bl1_strmv( BLIS1_LOWER_TRIANGULAR,
121 BLIS1_TRANSPOSE,
122 BLIS1_NONUNIT_DIAG,
123 mn_behind,
124 A00, rs_A, cs_A,
125 a10t, cs_A );
126
127 // FLA_Invert( FLA_NO_CONJUGATE, alpha11 );
128 bl1_sinverts( BLIS1_NO_CONJUGATE,
129 alpha11 );
130
131 /*------------------------------------------------------------*/
132
133 }
134
135 return FLA_SUCCESS;
136 }
137
138
139
FLA_Trinv_ln_opd_var4(int mn_A,double * buff_A,int rs_A,int cs_A)140 FLA_Error FLA_Trinv_ln_opd_var4( int mn_A,
141 double* buff_A, int rs_A, int cs_A )
142 {
143 double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
144 int i;
145
146 for ( i = 0; i < mn_A; ++i )
147 {
148 double* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
149 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
150 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
151 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
152 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
153 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
154
155 int mn_ahead = mn_A - i - 1;
156 int mn_behind = i;
157
158 /*------------------------------------------------------------*/
159
160 // FLA_Scal_external( FLA_MINUS_ONE, a21 );
161 // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, A22, a21 );
162 bl1_dscalv( BLIS1_NO_CONJUGATE,
163 mn_ahead,
164 buff_m1,
165 a21, rs_A );
166 bl1_dtrsv( BLIS1_LOWER_TRIANGULAR,
167 BLIS1_NO_TRANSPOSE,
168 BLIS1_NONUNIT_DIAG,
169 mn_ahead,
170 A22, rs_A, cs_A,
171 a21, rs_A );
172
173 // FLA_Ger_external( FLA_MINUS_ONE, a21, a10t, A20 );
174 bl1_dger( BLIS1_NO_CONJUGATE,
175 BLIS1_NO_CONJUGATE,
176 mn_ahead,
177 mn_behind,
178 buff_m1,
179 a21, rs_A,
180 a10t, cs_A,
181 A20, rs_A, cs_A );
182
183 // FLA_Trmv_external( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t );
184 bl1_dtrmv( BLIS1_LOWER_TRIANGULAR,
185 BLIS1_TRANSPOSE,
186 BLIS1_NONUNIT_DIAG,
187 mn_behind,
188 A00, rs_A, cs_A,
189 a10t, cs_A );
190
191 // FLA_Invert( FLA_NO_CONJUGATE, alpha11 );
192 bl1_dinverts( BLIS1_NO_CONJUGATE,
193 alpha11 );
194
195 /*------------------------------------------------------------*/
196
197 }
198
199 return FLA_SUCCESS;
200 }
201
202
203
FLA_Trinv_ln_opc_var4(int mn_A,scomplex * buff_A,int rs_A,int cs_A)204 FLA_Error FLA_Trinv_ln_opc_var4( int mn_A,
205 scomplex* buff_A, int rs_A, int cs_A )
206 {
207 scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
208 int i;
209
210 for ( i = 0; i < mn_A; ++i )
211 {
212 scomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
213 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
214 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
215 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
216 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
217 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
218
219 int mn_ahead = mn_A - i - 1;
220 int mn_behind = i;
221
222 /*------------------------------------------------------------*/
223
224 // FLA_Scal_external( FLA_MINUS_ONE, a21 );
225 // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, A22, a21 );
226 bl1_cscalv( BLIS1_NO_CONJUGATE,
227 mn_ahead,
228 buff_m1,
229 a21, rs_A );
230 bl1_ctrsv( BLIS1_LOWER_TRIANGULAR,
231 BLIS1_NO_TRANSPOSE,
232 BLIS1_NONUNIT_DIAG,
233 mn_ahead,
234 A22, rs_A, cs_A,
235 a21, rs_A );
236
237 // FLA_Ger_external( FLA_MINUS_ONE, a21, a10t, A20 );
238 bl1_cger( BLIS1_NO_CONJUGATE,
239 BLIS1_NO_CONJUGATE,
240 mn_ahead,
241 mn_behind,
242 buff_m1,
243 a21, rs_A,
244 a10t, cs_A,
245 A20, rs_A, cs_A );
246
247 // FLA_Trmv_external( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t );
248 bl1_ctrmv( BLIS1_LOWER_TRIANGULAR,
249 BLIS1_TRANSPOSE,
250 BLIS1_NONUNIT_DIAG,
251 mn_behind,
252 A00, rs_A, cs_A,
253 a10t, cs_A );
254
255 // FLA_Invert( FLA_NO_CONJUGATE, alpha11 );
256 bl1_cinverts( BLIS1_NO_CONJUGATE,
257 alpha11 );
258
259 /*------------------------------------------------------------*/
260
261 }
262
263 return FLA_SUCCESS;
264 }
265
266
267
FLA_Trinv_ln_opz_var4(int mn_A,dcomplex * buff_A,int rs_A,int cs_A)268 FLA_Error FLA_Trinv_ln_opz_var4( int mn_A,
269 dcomplex* buff_A, int rs_A, int cs_A )
270 {
271 dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
272 int i;
273
274 for ( i = 0; i < mn_A; ++i )
275 {
276 dcomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
277 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
278 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
279 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
280 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
281 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
282
283 int mn_ahead = mn_A - i - 1;
284 int mn_behind = i;
285
286 /*------------------------------------------------------------*/
287
288 // FLA_Scal_external( FLA_MINUS_ONE, a21 );
289 // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, A22, a21 );
290 bl1_zscalv( BLIS1_NO_CONJUGATE,
291 mn_ahead,
292 buff_m1,
293 a21, rs_A );
294 bl1_ztrsv( BLIS1_LOWER_TRIANGULAR,
295 BLIS1_NO_TRANSPOSE,
296 BLIS1_NONUNIT_DIAG,
297 mn_ahead,
298 A22, rs_A, cs_A,
299 a21, rs_A );
300
301 // FLA_Ger_external( FLA_MINUS_ONE, a21, a10t, A20 );
302 bl1_zger( BLIS1_NO_CONJUGATE,
303 BLIS1_NO_CONJUGATE,
304 mn_ahead,
305 mn_behind,
306 buff_m1,
307 a21, rs_A,
308 a10t, cs_A,
309 A20, rs_A, cs_A );
310
311 // FLA_Trmv_external( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t );
312 bl1_ztrmv( BLIS1_LOWER_TRIANGULAR,
313 BLIS1_TRANSPOSE,
314 BLIS1_NONUNIT_DIAG,
315 mn_behind,
316 A00, rs_A, cs_A,
317 a10t, cs_A );
318
319 // FLA_Invert( FLA_NO_CONJUGATE, alpha11 );
320 bl1_zinverts( BLIS1_NO_CONJUGATE,
321 alpha11 );
322
323 /*------------------------------------------------------------*/
324
325 }
326
327 return FLA_SUCCESS;
328 }
329
330 #endif
331