1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
13 #ifdef FLA_ENABLE_NON_CRITICAL_CODE
14
FLA_Eig_gest_nu_opt_var4(FLA_Obj A,FLA_Obj Y,FLA_Obj B)15 FLA_Error FLA_Eig_gest_nu_opt_var4( FLA_Obj A, FLA_Obj Y, FLA_Obj B )
16 {
17 FLA_Datatype datatype;
18 int m_AB;
19 int rs_A, cs_A;
20 int rs_B, cs_B;
21 int inc_y;
22 FLA_Obj yL, yR;
23
24 datatype = FLA_Obj_datatype( A );
25
26 m_AB = FLA_Obj_length( A );
27
28 rs_A = FLA_Obj_row_stride( A );
29 cs_A = FLA_Obj_col_stride( A );
30
31 rs_B = FLA_Obj_row_stride( B );
32 cs_B = FLA_Obj_col_stride( B );
33
34 FLA_Part_1x2( Y, &yL, &yR, 1, FLA_LEFT );
35
36 inc_y = FLA_Obj_vector_inc( yL );
37
38 switch ( datatype )
39 {
40 case FLA_FLOAT:
41 {
42 float* buff_A = FLA_FLOAT_PTR( A );
43 float* buff_y = FLA_FLOAT_PTR( yL );
44 float* buff_B = FLA_FLOAT_PTR( B );
45
46 FLA_Eig_gest_nu_ops_var4( m_AB,
47 buff_A, rs_A, cs_A,
48 buff_y, inc_y,
49 buff_B, rs_B, cs_B );
50
51 break;
52 }
53
54 case FLA_DOUBLE:
55 {
56 double* buff_A = FLA_DOUBLE_PTR( A );
57 double* buff_y = FLA_DOUBLE_PTR( yL );
58 double* buff_B = FLA_DOUBLE_PTR( B );
59
60 FLA_Eig_gest_nu_opd_var4( m_AB,
61 buff_A, rs_A, cs_A,
62 buff_y, inc_y,
63 buff_B, rs_B, cs_B );
64
65 break;
66 }
67
68 case FLA_COMPLEX:
69 {
70 scomplex* buff_A = FLA_COMPLEX_PTR( A );
71 scomplex* buff_y = FLA_COMPLEX_PTR( yL );
72 scomplex* buff_B = FLA_COMPLEX_PTR( B );
73
74 FLA_Eig_gest_nu_opc_var4( m_AB,
75 buff_A, rs_A, cs_A,
76 buff_y, inc_y,
77 buff_B, rs_B, cs_B );
78
79 break;
80 }
81
82 case FLA_DOUBLE_COMPLEX:
83 {
84 dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
85 dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( yL );
86 dcomplex* buff_B = FLA_DOUBLE_COMPLEX_PTR( B );
87
88 FLA_Eig_gest_nu_opz_var4( m_AB,
89 buff_A, rs_A, cs_A,
90 buff_y, inc_y,
91 buff_B, rs_B, cs_B );
92
93 break;
94 }
95 }
96
97 return FLA_SUCCESS;
98 }
99
100
101
FLA_Eig_gest_nu_ops_var4(int m_AB,float * buff_A,int rs_A,int cs_A,float * buff_y,int inc_y,float * buff_B,int rs_B,int cs_B)102 FLA_Error FLA_Eig_gest_nu_ops_var4( int m_AB,
103 float* buff_A, int rs_A, int cs_A,
104 float* buff_y, int inc_y,
105 float* buff_B, int rs_B, int cs_B )
106 {
107 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
108 float* buff_1h = FLA_FLOAT_PTR( FLA_ONE_HALF );
109 int i;
110
111 for ( i = 0; i < m_AB; ++i )
112 {
113 float* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
114 float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
115 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
116 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
117 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
118
119 float* b01 = buff_B + (i )*cs_B + (0 )*rs_B;
120 float* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
121
122 float psi11;
123
124 int m_ahead = m_AB - i - 1;
125 int m_behind = i;
126
127 /*------------------------------------------------------------*/
128
129 // FLA_Copy_external( alpha11, psi11 );
130 // FLA_Scal_external( FLA_ONE_HALF, psi11 );
131 bl1_smult3( buff_1h, alpha11, &psi11 );
132
133 // FLA_Axpy_external( psi11, b01, a01 );
134 bl1_saxpyv( BLIS1_NO_CONJUGATE,
135 m_behind,
136 &psi11,
137 b01, rs_B,
138 a01, rs_A );
139
140 // FLA_Her2c_external( FLA_UPPER_TRIANGULAR, FLA_NO_CONJUGATE,
141 // FLA_ONE, a01, b01, A00 );
142 bl1_sher2( BLIS1_UPPER_TRIANGULAR,
143 BLIS1_NO_CONJUGATE,
144 m_behind,
145 buff_1,
146 a01, rs_A,
147 b01, rs_B,
148 A00, rs_A, cs_A );
149
150 // FLA_Axpy_external( psi11, b01, a01 );
151 bl1_saxpyv( BLIS1_NO_CONJUGATE,
152 m_behind,
153 &psi11,
154 b01, rs_B,
155 a01, rs_A );
156
157 // FLA_Scal_external( beta11, a01 );
158 bl1_sscalv( BLIS1_NO_CONJUGATE,
159 m_behind,
160 beta11,
161 a01, rs_A );
162
163 // FLA_Scal_external( beta11, alpha11 );
164 bl1_sscals( beta11, alpha11 );
165 bl1_sscals( beta11, alpha11 );
166
167 // FLA_Ger_external( FLA_ONE, b01, a12t, A02 );
168 bl1_sger( BLIS1_NO_CONJUGATE,
169 BLIS1_NO_CONJUGATE,
170 m_behind,
171 m_ahead,
172 buff_1,
173 b01, rs_B,
174 a12t, cs_A,
175 A02, rs_A, cs_A );
176
177 // FLA_Scal_external( beta11, a12t );
178 bl1_sscalv( BLIS1_NO_CONJUGATE,
179 m_ahead,
180 beta11,
181 a12t, cs_A );
182
183 /*------------------------------------------------------------*/
184
185 }
186
187 return FLA_SUCCESS;
188 }
189
190
191
FLA_Eig_gest_nu_opd_var4(int m_AB,double * buff_A,int rs_A,int cs_A,double * buff_y,int inc_y,double * buff_B,int rs_B,int cs_B)192 FLA_Error FLA_Eig_gest_nu_opd_var4( int m_AB,
193 double* buff_A, int rs_A, int cs_A,
194 double* buff_y, int inc_y,
195 double* buff_B, int rs_B, int cs_B )
196 {
197 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
198 double* buff_1h = FLA_DOUBLE_PTR( FLA_ONE_HALF );
199 int i;
200
201 for ( i = 0; i < m_AB; ++i )
202 {
203 double* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
204 double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
205 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
206 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
207 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
208
209 double* b01 = buff_B + (i )*cs_B + (0 )*rs_B;
210 double* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
211
212 double psi11;
213
214 int m_ahead = m_AB - i - 1;
215 int m_behind = i;
216
217 /*------------------------------------------------------------*/
218
219 // FLA_Copy_external( alpha11, psi11 );
220 // FLA_Scal_external( FLA_ONE_HALF, psi11 );
221 bl1_dmult3( buff_1h, alpha11, &psi11 );
222
223 // FLA_Axpy_external( psi11, b01, a01 );
224 bl1_daxpyv( BLIS1_NO_CONJUGATE,
225 m_behind,
226 &psi11,
227 b01, rs_B,
228 a01, rs_A );
229
230 // FLA_Her2c_external( FLA_UPPER_TRIANGULAR, FLA_NO_CONJUGATE,
231 // FLA_ONE, a01, b01, A00 );
232 bl1_dher2( BLIS1_UPPER_TRIANGULAR,
233 BLIS1_NO_CONJUGATE,
234 m_behind,
235 buff_1,
236 a01, rs_A,
237 b01, rs_B,
238 A00, rs_A, cs_A );
239
240 // FLA_Axpy_external( psi11, b01, a01 );
241 bl1_daxpyv( BLIS1_NO_CONJUGATE,
242 m_behind,
243 &psi11,
244 b01, rs_B,
245 a01, rs_A );
246
247 // FLA_Scal_external( beta11, a01 );
248 bl1_dscalv( BLIS1_NO_CONJUGATE,
249 m_behind,
250 beta11,
251 a01, rs_A );
252
253 // FLA_Scal_external( beta11, alpha11 );
254 bl1_dscals( beta11, alpha11 );
255 bl1_dscals( beta11, alpha11 );
256
257 // FLA_Ger_external( FLA_ONE, b01, a12t, A02 );
258 bl1_dger( BLIS1_NO_CONJUGATE,
259 BLIS1_NO_CONJUGATE,
260 m_behind,
261 m_ahead,
262 buff_1,
263 b01, rs_B,
264 a12t, cs_A,
265 A02, rs_A, cs_A );
266
267 // FLA_Scal_external( beta11, a12t );
268 bl1_dscalv( BLIS1_NO_CONJUGATE,
269 m_ahead,
270 beta11,
271 a12t, cs_A );
272
273 /*------------------------------------------------------------*/
274
275 }
276
277 return FLA_SUCCESS;
278 }
279
280
281
FLA_Eig_gest_nu_opc_var4(int m_AB,scomplex * buff_A,int rs_A,int cs_A,scomplex * buff_y,int inc_y,scomplex * buff_B,int rs_B,int cs_B)282 FLA_Error FLA_Eig_gest_nu_opc_var4( int m_AB,
283 scomplex* buff_A, int rs_A, int cs_A,
284 scomplex* buff_y, int inc_y,
285 scomplex* buff_B, int rs_B, int cs_B )
286 {
287 scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
288 scomplex* buff_1h = FLA_COMPLEX_PTR( FLA_ONE_HALF );
289 int i;
290
291 for ( i = 0; i < m_AB; ++i )
292 {
293 scomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
294 scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
295 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
296 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
297 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
298
299 scomplex* b01 = buff_B + (i )*cs_B + (0 )*rs_B;
300 scomplex* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
301
302 scomplex psi11;
303
304 int m_ahead = m_AB - i - 1;
305 int m_behind = i;
306
307 /*------------------------------------------------------------*/
308
309 // FLA_Copy_external( alpha11, psi11 );
310 // FLA_Scal_external( FLA_ONE_HALF, psi11 );
311 bl1_cmult3( buff_1h, alpha11, &psi11 );
312
313 // FLA_Axpy_external( psi11, b01, a01 );
314 bl1_caxpyv( BLIS1_NO_CONJUGATE,
315 m_behind,
316 &psi11,
317 b01, rs_B,
318 a01, rs_A );
319
320 // FLA_Her2c_external( FLA_UPPER_TRIANGULAR, FLA_NO_CONJUGATE,
321 // FLA_ONE, a01, b01, A00 );
322 bl1_cher2( BLIS1_UPPER_TRIANGULAR,
323 BLIS1_NO_CONJUGATE,
324 m_behind,
325 buff_1,
326 a01, rs_A,
327 b01, rs_B,
328 A00, rs_A, cs_A );
329
330 // FLA_Axpy_external( psi11, b01, a01 );
331 bl1_caxpyv( BLIS1_NO_CONJUGATE,
332 m_behind,
333 &psi11,
334 b01, rs_B,
335 a01, rs_A );
336
337 // FLA_Scal_external( beta11, a01 );
338 bl1_cscalv( BLIS1_NO_CONJUGATE,
339 m_behind,
340 beta11,
341 a01, rs_A );
342
343 // FLA_Scal_external( beta11, alpha11 );
344 bl1_cscals( beta11, alpha11 );
345 bl1_cscals( beta11, alpha11 );
346
347 // FLA_Ger_external( FLA_ONE, b01, a12t, A02 );
348 bl1_cger( BLIS1_NO_CONJUGATE,
349 BLIS1_NO_CONJUGATE,
350 m_behind,
351 m_ahead,
352 buff_1,
353 b01, rs_B,
354 a12t, cs_A,
355 A02, rs_A, cs_A );
356
357 // FLA_Scal_external( beta11, a12t );
358 bl1_cscalv( BLIS1_NO_CONJUGATE,
359 m_ahead,
360 beta11,
361 a12t, cs_A );
362
363 /*------------------------------------------------------------*/
364
365 }
366
367 return FLA_SUCCESS;
368 }
369
370
371
FLA_Eig_gest_nu_opz_var4(int m_AB,dcomplex * buff_A,int rs_A,int cs_A,dcomplex * buff_y,int inc_y,dcomplex * buff_B,int rs_B,int cs_B)372 FLA_Error FLA_Eig_gest_nu_opz_var4( int m_AB,
373 dcomplex* buff_A, int rs_A, int cs_A,
374 dcomplex* buff_y, int inc_y,
375 dcomplex* buff_B, int rs_B, int cs_B )
376 {
377 dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
378 dcomplex* buff_1h = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE_HALF );
379 int i;
380
381 for ( i = 0; i < m_AB; ++i )
382 {
383 dcomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
384 dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
385 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
386 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
387 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
388
389 dcomplex* b01 = buff_B + (i )*cs_B + (0 )*rs_B;
390 dcomplex* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
391
392 dcomplex psi11;
393
394 int m_ahead = m_AB - i - 1;
395 int m_behind = i;
396
397 /*------------------------------------------------------------*/
398
399 // FLA_Copy_external( alpha11, psi11 );
400 // FLA_Scal_external( FLA_ONE_HALF, psi11 );
401 bl1_zmult3( buff_1h, alpha11, &psi11 );
402
403 // FLA_Axpy_external( psi11, b01, a01 );
404 bl1_zaxpyv( BLIS1_NO_CONJUGATE,
405 m_behind,
406 &psi11,
407 b01, rs_B,
408 a01, rs_A );
409
410 // FLA_Her2c_external( FLA_UPPER_TRIANGULAR, FLA_NO_CONJUGATE,
411 // FLA_ONE, a01, b01, A00 );
412 bl1_zher2( BLIS1_UPPER_TRIANGULAR,
413 BLIS1_NO_CONJUGATE,
414 m_behind,
415 buff_1,
416 a01, rs_A,
417 b01, rs_B,
418 A00, rs_A, cs_A );
419
420 // FLA_Axpy_external( psi11, b01, a01 );
421 bl1_zaxpyv( BLIS1_NO_CONJUGATE,
422 m_behind,
423 &psi11,
424 b01, rs_B,
425 a01, rs_A );
426
427 // FLA_Scal_external( beta11, a01 );
428 bl1_zscalv( BLIS1_NO_CONJUGATE,
429 m_behind,
430 beta11,
431 a01, rs_A );
432
433 // FLA_Scal_external( beta11, alpha11 );
434 bl1_zscals( beta11, alpha11 );
435 bl1_zscals( beta11, alpha11 );
436
437 // FLA_Ger_external( FLA_ONE, b01, a12t, A02 );
438 bl1_zger( BLIS1_NO_CONJUGATE,
439 BLIS1_NO_CONJUGATE,
440 m_behind,
441 m_ahead,
442 buff_1,
443 b01, rs_B,
444 a12t, cs_A,
445 A02, rs_A, cs_A );
446
447 // FLA_Scal_external( beta11, a12t );
448 bl1_zscalv( BLIS1_NO_CONJUGATE,
449 m_ahead,
450 beta11,
451 a12t, cs_A );
452
453 /*------------------------------------------------------------*/
454
455 }
456
457 return FLA_SUCCESS;
458 }
459
460 #endif
461