1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
13 #ifdef FLA_ENABLE_NON_CRITICAL_CODE
14
FLA_Ttmm_l_opt_var1(FLA_Obj A)15 FLA_Error FLA_Ttmm_l_opt_var1( FLA_Obj A )
16 {
17 FLA_Datatype datatype;
18 int mn_A;
19 int rs_A, cs_A;
20
21 datatype = FLA_Obj_datatype( A );
22
23 mn_A = FLA_Obj_length( A );
24 rs_A = FLA_Obj_row_stride( A );
25 cs_A = FLA_Obj_col_stride( A );
26
27
28 switch ( datatype )
29 {
30 case FLA_FLOAT:
31 {
32 float* buff_A = FLA_FLOAT_PTR( A );
33
34 FLA_Ttmm_l_ops_var1( mn_A,
35 buff_A, rs_A, cs_A );
36
37 break;
38 }
39
40 case FLA_DOUBLE:
41 {
42 double* buff_A = FLA_DOUBLE_PTR( A );
43
44 FLA_Ttmm_l_opd_var1( mn_A,
45 buff_A, rs_A, cs_A );
46
47 break;
48 }
49
50 case FLA_COMPLEX:
51 {
52 scomplex* buff_A = FLA_COMPLEX_PTR( A );
53
54 FLA_Ttmm_l_opc_var1( mn_A,
55 buff_A, rs_A, cs_A );
56
57 break;
58 }
59
60 case FLA_DOUBLE_COMPLEX:
61 {
62 dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
63
64 FLA_Ttmm_l_opz_var1( mn_A,
65 buff_A, rs_A, cs_A );
66
67 break;
68 }
69 }
70
71 return FLA_SUCCESS;
72 }
73
74
75
FLA_Ttmm_l_ops_var1(int mn_A,float * buff_A,int rs_A,int cs_A)76 FLA_Error FLA_Ttmm_l_ops_var1( int mn_A,
77 float* buff_A, int rs_A, int cs_A )
78 {
79 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
80 int i;
81
82 for ( i = 0; i < mn_A; ++i )
83 {
84 float* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
85 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
86 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
87
88 int mn_behind = i;
89
90 /*------------------------------------------------------------*/
91
92 // FLA_Herc_external( FLA_LOWER_TRIANGULAR, FLA_ONE, a10t, A00 );
93 bl1_ssyr( BLIS1_LOWER_TRIANGULAR,
94 mn_behind,
95 buff_1,
96 a10t, cs_A,
97 A00, rs_A, cs_A );
98
99 // FLA_Scal_external( alpha11, a10t );
100 bl1_sscalv( BLIS1_NO_CONJUGATE,
101 mn_behind,
102 alpha11,
103 a10t, cs_A );
104
105 // FLA_Absolute_square( alpha11 );
106 bl1_sabsqr( alpha11 );
107
108 /*------------------------------------------------------------*/
109
110 }
111
112 return FLA_SUCCESS;
113 }
114
115
116
FLA_Ttmm_l_opd_var1(int mn_A,double * buff_A,int rs_A,int cs_A)117 FLA_Error FLA_Ttmm_l_opd_var1( int mn_A,
118 double* buff_A, int rs_A, int cs_A )
119 {
120 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
121 int i;
122
123 for ( i = 0; i < mn_A; ++i )
124 {
125 double* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
126 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
127 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
128
129 int mn_behind = i;
130
131 /*------------------------------------------------------------*/
132
133 // FLA_Herc_external( FLA_LOWER_TRIANGULAR, FLA_ONE, a10t, A00 );
134 bl1_dsyr( BLIS1_LOWER_TRIANGULAR,
135 mn_behind,
136 buff_1,
137 a10t, cs_A,
138 A00, rs_A, cs_A );
139
140 // FLA_Scal_external( alpha11, a10t );
141 bl1_dscalv( BLIS1_NO_CONJUGATE,
142 mn_behind,
143 alpha11,
144 a10t, cs_A );
145
146 // FLA_Absolute_square( alpha11 );
147 bl1_dabsqr( alpha11 );
148
149 /*------------------------------------------------------------*/
150
151 }
152
153 return FLA_SUCCESS;
154 }
155
156
157
FLA_Ttmm_l_opc_var1(int mn_A,scomplex * buff_A,int rs_A,int cs_A)158 FLA_Error FLA_Ttmm_l_opc_var1( int mn_A,
159 scomplex* buff_A, int rs_A, int cs_A )
160 {
161 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
162 int i;
163
164 for ( i = 0; i < mn_A; ++i )
165 {
166 scomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
167 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
168 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
169
170 int mn_behind = i;
171
172 /*------------------------------------------------------------*/
173
174 // FLA_Herc_external( FLA_LOWER_TRIANGULAR, FLA_ONE, a10t, A00 );
175 bl1_cher( BLIS1_LOWER_TRIANGULAR,
176 BLIS1_CONJUGATE,
177 mn_behind,
178 buff_1,
179 a10t, cs_A,
180 A00, rs_A, cs_A );
181
182 // FLA_Scal_external( alpha11, a10t );
183 bl1_cscalv( BLIS1_NO_CONJUGATE,
184 mn_behind,
185 alpha11,
186 a10t, cs_A );
187
188 // FLA_Absolute_square( alpha11 );
189 bl1_cabsqr( alpha11 );
190
191 /*------------------------------------------------------------*/
192
193 }
194
195 return FLA_SUCCESS;
196 }
197
198
199
FLA_Ttmm_l_opz_var1(int mn_A,dcomplex * buff_A,int rs_A,int cs_A)200 FLA_Error FLA_Ttmm_l_opz_var1( int mn_A,
201 dcomplex* buff_A, int rs_A, int cs_A )
202 {
203 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
204 int i;
205
206 for ( i = 0; i < mn_A; ++i )
207 {
208 dcomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
209 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
210 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
211
212 int mn_behind = i;
213
214 /*------------------------------------------------------------*/
215
216 // FLA_Herc_external( FLA_LOWER_TRIANGULAR, FLA_ONE, a10t, A00 );
217 bl1_zher( BLIS1_LOWER_TRIANGULAR,
218 BLIS1_CONJUGATE,
219 mn_behind,
220 buff_1,
221 a10t, cs_A,
222 A00, rs_A, cs_A );
223
224 // FLA_Scal_external( alpha11, a10t );
225 bl1_zscalv( BLIS1_NO_CONJUGATE,
226 mn_behind,
227 alpha11,
228 a10t, cs_A );
229
230 // FLA_Absolute_square( alpha11 );
231 bl1_zabsqr( alpha11 );
232
233 /*------------------------------------------------------------*/
234
235 }
236
237 return FLA_SUCCESS;
238 }
239
240 #endif
241