1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
13 #ifdef FLA_ENABLE_NON_CRITICAL_CODE
14
FLA_Trinv_ln_opt_var2(FLA_Obj A)15 FLA_Error FLA_Trinv_ln_opt_var2( FLA_Obj A )
16 {
17 FLA_Datatype datatype;
18 int mn_A;
19 int rs_A, cs_A;
20
21 datatype = FLA_Obj_datatype( A );
22
23 mn_A = FLA_Obj_length( A );
24 rs_A = FLA_Obj_row_stride( A );
25 cs_A = FLA_Obj_col_stride( A );
26
27
28 switch ( datatype )
29 {
30 case FLA_FLOAT:
31 {
32 float* buff_A = FLA_FLOAT_PTR( A );
33
34 FLA_Trinv_ln_ops_var2( mn_A,
35 buff_A, rs_A, cs_A );
36
37 break;
38 }
39
40 case FLA_DOUBLE:
41 {
42 double* buff_A = FLA_DOUBLE_PTR( A );
43
44 FLA_Trinv_ln_opd_var2( mn_A,
45 buff_A, rs_A, cs_A );
46
47 break;
48 }
49
50 case FLA_COMPLEX:
51 {
52 scomplex* buff_A = FLA_COMPLEX_PTR( A );
53
54 FLA_Trinv_ln_opc_var2( mn_A,
55 buff_A, rs_A, cs_A );
56
57 break;
58 }
59
60 case FLA_DOUBLE_COMPLEX:
61 {
62 dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
63
64 FLA_Trinv_ln_opz_var2( mn_A,
65 buff_A, rs_A, cs_A );
66
67 break;
68 }
69 }
70
71 return FLA_SUCCESS;
72 }
73
74
75
FLA_Trinv_ln_ops_var2(int mn_A,float * buff_A,int rs_A,int cs_A)76 FLA_Error FLA_Trinv_ln_ops_var2( int mn_A,
77 float* buff_A, int rs_A, int cs_A )
78 {
79 float alpha11_m1;
80 int i;
81
82 for ( i = 0; i < mn_A; ++i )
83 {
84 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
85 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
86 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
87
88 int mn_ahead = mn_A - i - 1;
89
90 /*------------------------------------------------------------*/
91
92 // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, A22, a21 );
93 bl1_strsv( BLIS1_LOWER_TRIANGULAR,
94 BLIS1_NO_TRANSPOSE,
95 BLIS1_NONUNIT_DIAG,
96 mn_ahead,
97 A22, rs_A, cs_A,
98 a21, rs_A );
99
100 // FLA_Scal_external( FLA_MINUS_ONE, a21 );
101 // FLA_Inv_scal_external( alpha11, a21 );
102 bl1_sneg2( alpha11, &alpha11_m1 );
103 bl1_sinvscalv( BLIS1_NO_CONJUGATE,
104 mn_ahead,
105 &alpha11_m1,
106 a21, rs_A );
107
108 // FLA_Invert( FLA_NO_CONJUGATE, alpha11 );
109 bl1_sinverts( BLIS1_NO_CONJUGATE,
110 alpha11 );
111
112 /*------------------------------------------------------------*/
113
114 }
115
116 return FLA_SUCCESS;
117 }
118
119
120
FLA_Trinv_ln_opd_var2(int mn_A,double * buff_A,int rs_A,int cs_A)121 FLA_Error FLA_Trinv_ln_opd_var2( int mn_A,
122 double* buff_A, int rs_A, int cs_A )
123 {
124 double alpha11_m1;
125 int i;
126
127 for ( i = 0; i < mn_A; ++i )
128 {
129 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
130 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
131 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
132
133 int mn_ahead = mn_A - i - 1;
134
135 /*------------------------------------------------------------*/
136
137 // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, A22, a21 );
138 bl1_dtrsv( BLIS1_LOWER_TRIANGULAR,
139 BLIS1_NO_TRANSPOSE,
140 BLIS1_NONUNIT_DIAG,
141 mn_ahead,
142 A22, rs_A, cs_A,
143 a21, rs_A );
144
145 // FLA_Scal_external( FLA_MINUS_ONE, a21 );
146 // FLA_Inv_scal_external( alpha11, a21 );
147 bl1_dneg2( alpha11, &alpha11_m1 );
148 bl1_dinvscalv( BLIS1_NO_CONJUGATE,
149 mn_ahead,
150 &alpha11_m1,
151 a21, rs_A );
152
153 // FLA_Invert( FLA_NO_CONJUGATE, alpha11 );
154 bl1_dinverts( BLIS1_NO_CONJUGATE,
155 alpha11 );
156
157 /*------------------------------------------------------------*/
158
159 }
160
161 return FLA_SUCCESS;
162 }
163
164
165
FLA_Trinv_ln_opc_var2(int mn_A,scomplex * buff_A,int rs_A,int cs_A)166 FLA_Error FLA_Trinv_ln_opc_var2( int mn_A,
167 scomplex* buff_A, int rs_A, int cs_A )
168 {
169 scomplex alpha11_m1;
170 int i;
171
172 for ( i = 0; i < mn_A; ++i )
173 {
174 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
175 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
176 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
177
178 int mn_ahead = mn_A - i - 1;
179
180 /*------------------------------------------------------------*/
181
182 // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, A22, a21 );
183 bl1_ctrsv( BLIS1_LOWER_TRIANGULAR,
184 BLIS1_NO_TRANSPOSE,
185 BLIS1_NONUNIT_DIAG,
186 mn_ahead,
187 A22, rs_A, cs_A,
188 a21, rs_A );
189
190 // FLA_Scal_external( FLA_MINUS_ONE, a21 );
191 // FLA_Inv_scal_external( alpha11, a21 );
192 bl1_cneg2( alpha11, &alpha11_m1 );
193 bl1_cinvscalv( BLIS1_NO_CONJUGATE,
194 mn_ahead,
195 &alpha11_m1,
196 a21, rs_A );
197
198 // FLA_Invert( FLA_NO_CONJUGATE, alpha11 );
199 bl1_cinverts( BLIS1_NO_CONJUGATE,
200 alpha11 );
201
202 /*------------------------------------------------------------*/
203
204 }
205
206 return FLA_SUCCESS;
207 }
208
209
210
FLA_Trinv_ln_opz_var2(int mn_A,dcomplex * buff_A,int rs_A,int cs_A)211 FLA_Error FLA_Trinv_ln_opz_var2( int mn_A,
212 dcomplex* buff_A, int rs_A, int cs_A )
213 {
214 dcomplex alpha11_m1;
215 int i;
216
217 for ( i = 0; i < mn_A; ++i )
218 {
219 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
220 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
221 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
222
223 int mn_ahead = mn_A - i - 1;
224
225 /*------------------------------------------------------------*/
226
227 // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, A22, a21 );
228 bl1_ztrsv( BLIS1_LOWER_TRIANGULAR,
229 BLIS1_NO_TRANSPOSE,
230 BLIS1_NONUNIT_DIAG,
231 mn_ahead,
232 A22, rs_A, cs_A,
233 a21, rs_A );
234
235 // FLA_Scal_external( FLA_MINUS_ONE, a21 );
236 // FLA_Inv_scal_external( alpha11, a21 );
237 bl1_zneg2( alpha11, &alpha11_m1 );
238 bl1_zinvscalv( BLIS1_NO_CONJUGATE,
239 mn_ahead,
240 &alpha11_m1,
241 a21, rs_A );
242
243 // FLA_Invert( FLA_NO_CONJUGATE, alpha11 );
244 bl1_zinverts( BLIS1_NO_CONJUGATE,
245 alpha11 );
246
247 /*------------------------------------------------------------*/
248
249 }
250
251 return FLA_SUCCESS;
252 }
253
254 #endif
255