1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
FLA_Eig_gest_il_opt_var4(FLA_Obj A,FLA_Obj Y,FLA_Obj B)13 FLA_Error FLA_Eig_gest_il_opt_var4( FLA_Obj A, FLA_Obj Y, FLA_Obj B )
14 {
15 FLA_Datatype datatype;
16 int m_AB;
17 int rs_A, cs_A;
18 int rs_B, cs_B;
19 int inc_y;
20 FLA_Obj yL, yR;
21
22 datatype = FLA_Obj_datatype( A );
23
24 m_AB = FLA_Obj_length( A );
25
26 rs_A = FLA_Obj_row_stride( A );
27 cs_A = FLA_Obj_col_stride( A );
28
29 rs_B = FLA_Obj_row_stride( B );
30 cs_B = FLA_Obj_col_stride( B );
31
32 FLA_Part_1x2( Y, &yL, &yR, 1, FLA_LEFT );
33
34 inc_y = FLA_Obj_vector_inc( yL );
35
36 switch ( datatype )
37 {
38 case FLA_FLOAT:
39 {
40 float* buff_A = FLA_FLOAT_PTR( A );
41 float* buff_y = FLA_FLOAT_PTR( yL );
42 float* buff_B = FLA_FLOAT_PTR( B );
43
44 FLA_Eig_gest_il_ops_var4( m_AB,
45 buff_A, rs_A, cs_A,
46 buff_y, inc_y,
47 buff_B, rs_B, cs_B );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_A = FLA_DOUBLE_PTR( A );
55 double* buff_y = FLA_DOUBLE_PTR( yL );
56 double* buff_B = FLA_DOUBLE_PTR( B );
57
58 FLA_Eig_gest_il_opd_var4( m_AB,
59 buff_A, rs_A, cs_A,
60 buff_y, inc_y,
61 buff_B, rs_B, cs_B );
62
63 break;
64 }
65
66 case FLA_COMPLEX:
67 {
68 scomplex* buff_A = FLA_COMPLEX_PTR( A );
69 scomplex* buff_y = FLA_COMPLEX_PTR( yL );
70 scomplex* buff_B = FLA_COMPLEX_PTR( B );
71
72 FLA_Eig_gest_il_opc_var4( m_AB,
73 buff_A, rs_A, cs_A,
74 buff_y, inc_y,
75 buff_B, rs_B, cs_B );
76
77 break;
78 }
79
80 case FLA_DOUBLE_COMPLEX:
81 {
82 dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
83 dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( yL );
84 dcomplex* buff_B = FLA_DOUBLE_COMPLEX_PTR( B );
85
86 FLA_Eig_gest_il_opz_var4( m_AB,
87 buff_A, rs_A, cs_A,
88 buff_y, inc_y,
89 buff_B, rs_B, cs_B );
90
91 break;
92 }
93 }
94
95 return FLA_SUCCESS;
96 }
97
98
99
FLA_Eig_gest_il_ops_var4(int m_AB,float * buff_A,int rs_A,int cs_A,float * buff_y,int inc_y,float * buff_B,int rs_B,int cs_B)100 FLA_Error FLA_Eig_gest_il_ops_var4( int m_AB,
101 float* buff_A, int rs_A, int cs_A,
102 float* buff_y, int inc_y,
103 float* buff_B, int rs_B, int cs_B )
104 {
105 float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
106 float* buff_m1h = FLA_FLOAT_PTR( FLA_MINUS_ONE_HALF );
107 int i;
108
109 for ( i = 0; i < m_AB; ++i )
110 {
111 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
112 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
113 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
114 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
115 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
116
117 float* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
118 float* b21 = buff_B + (i )*cs_B + (i+1)*rs_B;
119
120 float psi11;
121
122 int m_ahead = m_AB - i - 1;
123 int m_behind = i;
124
125 /*------------------------------------------------------------*/
126
127 // FLA_Inv_scal_external( beta11, a10t );
128 bl1_sinvscalv( BLIS1_NO_CONJUGATE,
129 m_behind,
130 beta11,
131 a10t, cs_A );
132
133 // FLA_Ger_external( FLA_MINUS_ONE, b21, a10t, A20 );
134 bl1_sger( BLIS1_NO_CONJUGATE,
135 BLIS1_NO_CONJUGATE,
136 m_ahead,
137 m_behind,
138 buff_m1,
139 b21, rs_B,
140 a10t, cs_A,
141 A20, rs_A, cs_A );
142
143 // FLA_Inv_scal_external( beta11, alpha11 );
144 // FLA_Inv_scal_external( beta11, alpha11 );
145 bl1_sinvscals( beta11, alpha11 );
146 bl1_sinvscals( beta11, alpha11 );
147
148 // FLA_Copy_external( alpha11, psi11 );
149 // FLA_Scal_external( FLA_MINUS_ONE_HALF, psi11 );
150 bl1_smult3( buff_m1h, alpha11, &psi11 );
151
152 // FLA_Inv_scal_external( beta11, a21 );
153 bl1_sinvscalv( BLIS1_NO_CONJUGATE,
154 m_ahead,
155 beta11,
156 a21, rs_A );
157
158 // FLA_Axpy_external( psi11, b21, a21 );
159 bl1_saxpyv( BLIS1_NO_CONJUGATE,
160 m_ahead,
161 &psi11,
162 b21, rs_B,
163 a21, rs_A );
164
165 // FLA_Her2c_external( FLA_LOWER_TRIANGULAR, FLA_NO_CONJUGATE,
166 // FLA_MINUS_ONE, a21, b21, A22 );
167 bl1_sher2( BLIS1_LOWER_TRIANGULAR,
168 BLIS1_NO_CONJUGATE,
169 m_ahead,
170 buff_m1,
171 a21, rs_A,
172 b21, rs_B,
173 A22, rs_A, cs_A );
174
175 // FLA_Axpy_external( psi11, b21, a21 );
176 bl1_saxpyv( BLIS1_NO_CONJUGATE,
177 m_ahead,
178 &psi11,
179 b21, rs_B,
180 a21, rs_A );
181
182 /*------------------------------------------------------------*/
183
184 }
185
186 return FLA_SUCCESS;
187 }
188
189
190
FLA_Eig_gest_il_opd_var4(int m_AB,double * buff_A,int rs_A,int cs_A,double * buff_y,int inc_y,double * buff_B,int rs_B,int cs_B)191 FLA_Error FLA_Eig_gest_il_opd_var4( int m_AB,
192 double* buff_A, int rs_A, int cs_A,
193 double* buff_y, int inc_y,
194 double* buff_B, int rs_B, int cs_B )
195 {
196 double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
197 double* buff_m1h = FLA_DOUBLE_PTR( FLA_MINUS_ONE_HALF );
198 int i;
199
200 for ( i = 0; i < m_AB; ++i )
201 {
202 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
203 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
204 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
205 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
206 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
207
208 double* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
209 double* b21 = buff_B + (i )*cs_B + (i+1)*rs_B;
210
211 double psi11;
212
213 int m_ahead = m_AB - i - 1;
214 int m_behind = i;
215
216 /*------------------------------------------------------------*/
217
218 // FLA_Inv_scal_external( beta11, a10t );
219 bl1_dinvscalv( BLIS1_NO_CONJUGATE,
220 m_behind,
221 beta11,
222 a10t, cs_A );
223
224 // FLA_Ger_external( FLA_MINUS_ONE, b21, a10t, A20 );
225 bl1_dger( BLIS1_NO_CONJUGATE,
226 BLIS1_NO_CONJUGATE,
227 m_ahead,
228 m_behind,
229 buff_m1,
230 b21, rs_B,
231 a10t, cs_A,
232 A20, rs_A, cs_A );
233
234 // FLA_Inv_scal_external( beta11, alpha11 );
235 // FLA_Inv_scal_external( beta11, alpha11 );
236 bl1_dinvscals( beta11, alpha11 );
237 bl1_dinvscals( beta11, alpha11 );
238
239 // FLA_Copy_external( alpha11, psi11 );
240 // FLA_Scal_external( FLA_MINUS_ONE_HALF, psi11 );
241 bl1_dmult3( buff_m1h, alpha11, &psi11 );
242
243 // FLA_Inv_scal_external( beta11, a21 );
244 bl1_dinvscalv( BLIS1_NO_CONJUGATE,
245 m_ahead,
246 beta11,
247 a21, rs_A );
248
249 // FLA_Axpy_external( psi11, b21, a21 );
250 bl1_daxpyv( BLIS1_NO_CONJUGATE,
251 m_ahead,
252 &psi11,
253 b21, rs_B,
254 a21, rs_A );
255
256 // FLA_Her2c_external( FLA_LOWER_TRIANGULAR, FLA_NO_CONJUGATE,
257 // FLA_MINUS_ONE, a21, b21, A22 );
258 bl1_dher2( BLIS1_LOWER_TRIANGULAR,
259 BLIS1_NO_CONJUGATE,
260 m_ahead,
261 buff_m1,
262 a21, rs_A,
263 b21, rs_B,
264 A22, rs_A, cs_A );
265
266 // FLA_Axpy_external( psi11, b21, a21 );
267 bl1_daxpyv( BLIS1_NO_CONJUGATE,
268 m_ahead,
269 &psi11,
270 b21, rs_B,
271 a21, rs_A );
272
273 /*------------------------------------------------------------*/
274
275 }
276
277 return FLA_SUCCESS;
278 }
279
280
281
FLA_Eig_gest_il_opc_var4(int m_AB,scomplex * buff_A,int rs_A,int cs_A,scomplex * buff_y,int inc_y,scomplex * buff_B,int rs_B,int cs_B)282 FLA_Error FLA_Eig_gest_il_opc_var4( int m_AB,
283 scomplex* buff_A, int rs_A, int cs_A,
284 scomplex* buff_y, int inc_y,
285 scomplex* buff_B, int rs_B, int cs_B )
286 {
287 scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
288 scomplex* buff_m1h = FLA_COMPLEX_PTR( FLA_MINUS_ONE_HALF );
289 int i;
290
291 for ( i = 0; i < m_AB; ++i )
292 {
293 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
294 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
295 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
296 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
297 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
298
299 scomplex* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
300 scomplex* b21 = buff_B + (i )*cs_B + (i+1)*rs_B;
301
302 scomplex psi11;
303
304 int m_ahead = m_AB - i - 1;
305 int m_behind = i;
306
307 /*------------------------------------------------------------*/
308
309 // FLA_Inv_scal_external( beta11, a10t );
310 bl1_cinvscalv( BLIS1_NO_CONJUGATE,
311 m_behind,
312 beta11,
313 a10t, cs_A );
314
315 // FLA_Ger_external( FLA_MINUS_ONE, b21, a10t, A20 );
316 bl1_cger( BLIS1_NO_CONJUGATE,
317 BLIS1_NO_CONJUGATE,
318 m_ahead,
319 m_behind,
320 buff_m1,
321 b21, rs_B,
322 a10t, cs_A,
323 A20, rs_A, cs_A );
324
325 // FLA_Inv_scal_external( beta11, alpha11 );
326 // FLA_Inv_scal_external( beta11, alpha11 );
327 bl1_cinvscals( beta11, alpha11 );
328 bl1_cinvscals( beta11, alpha11 );
329
330 // FLA_Copy_external( alpha11, psi11 );
331 // FLA_Scal_external( FLA_MINUS_ONE_HALF, psi11 );
332 bl1_cmult3( buff_m1h, alpha11, &psi11 );
333
334 // FLA_Inv_scal_external( beta11, a21 );
335 bl1_cinvscalv( BLIS1_NO_CONJUGATE,
336 m_ahead,
337 beta11,
338 a21, rs_A );
339
340 // FLA_Axpy_external( psi11, b21, a21 );
341 bl1_caxpyv( BLIS1_NO_CONJUGATE,
342 m_ahead,
343 &psi11,
344 b21, rs_B,
345 a21, rs_A );
346
347 // FLA_Her2c_external( FLA_LOWER_TRIANGULAR, FLA_NO_CONJUGATE,
348 // FLA_MINUS_ONE, a21, b21, A22 );
349 bl1_cher2( BLIS1_LOWER_TRIANGULAR,
350 BLIS1_NO_CONJUGATE,
351 m_ahead,
352 buff_m1,
353 a21, rs_A,
354 b21, rs_B,
355 A22, rs_A, cs_A );
356
357 // FLA_Axpy_external( psi11, b21, a21 );
358 bl1_caxpyv( BLIS1_NO_CONJUGATE,
359 m_ahead,
360 &psi11,
361 b21, rs_B,
362 a21, rs_A );
363
364 /*------------------------------------------------------------*/
365
366 }
367
368 return FLA_SUCCESS;
369 }
370
371
372
FLA_Eig_gest_il_opz_var4(int m_AB,dcomplex * buff_A,int rs_A,int cs_A,dcomplex * buff_y,int inc_y,dcomplex * buff_B,int rs_B,int cs_B)373 FLA_Error FLA_Eig_gest_il_opz_var4( int m_AB,
374 dcomplex* buff_A, int rs_A, int cs_A,
375 dcomplex* buff_y, int inc_y,
376 dcomplex* buff_B, int rs_B, int cs_B )
377 {
378 dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
379 dcomplex* buff_m1h = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE_HALF );
380 int i;
381
382 for ( i = 0; i < m_AB; ++i )
383 {
384 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
385 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
386 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
387 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
388 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
389
390 dcomplex* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
391 dcomplex* b21 = buff_B + (i )*cs_B + (i+1)*rs_B;
392
393 dcomplex psi11;
394
395 int m_ahead = m_AB - i - 1;
396 int m_behind = i;
397
398 /*------------------------------------------------------------*/
399
400 // FLA_Inv_scal_external( beta11, a10t );
401 bl1_zinvscalv( BLIS1_NO_CONJUGATE,
402 m_behind,
403 beta11,
404 a10t, cs_A );
405
406 // FLA_Ger_external( FLA_MINUS_ONE, b21, a10t, A20 );
407 bl1_zger( BLIS1_NO_CONJUGATE,
408 BLIS1_NO_CONJUGATE,
409 m_ahead,
410 m_behind,
411 buff_m1,
412 b21, rs_B,
413 a10t, cs_A,
414 A20, rs_A, cs_A );
415
416 // FLA_Inv_scal_external( beta11, alpha11 );
417 // FLA_Inv_scal_external( beta11, alpha11 );
418 bl1_zinvscals( beta11, alpha11 );
419 bl1_zinvscals( beta11, alpha11 );
420
421 // FLA_Copy_external( alpha11, psi11 );
422 // FLA_Scal_external( FLA_MINUS_ONE_HALF, psi11 );
423 bl1_zmult3( buff_m1h, alpha11, &psi11 );
424
425 // FLA_Inv_scal_external( beta11, a21 );
426 bl1_zinvscalv( BLIS1_NO_CONJUGATE,
427 m_ahead,
428 beta11,
429 a21, rs_A );
430
431 // FLA_Axpy_external( psi11, b21, a21 );
432 bl1_zaxpyv( BLIS1_NO_CONJUGATE,
433 m_ahead,
434 &psi11,
435 b21, rs_B,
436 a21, rs_A );
437
438 // FLA_Her2c_external( FLA_LOWER_TRIANGULAR, FLA_NO_CONJUGATE,
439 // FLA_MINUS_ONE, a21, b21, A22 );
440 bl1_zher2( BLIS1_LOWER_TRIANGULAR,
441 BLIS1_NO_CONJUGATE,
442 m_ahead,
443 buff_m1,
444 a21, rs_A,
445 b21, rs_B,
446 A22, rs_A, cs_A );
447
448 // FLA_Axpy_external( psi11, b21, a21 );
449 bl1_zaxpyv( BLIS1_NO_CONJUGATE,
450 m_ahead,
451 &psi11,
452 b21, rs_B,
453 a21, rs_A );
454
455 /*------------------------------------------------------------*/
456
457 }
458
459 return FLA_SUCCESS;
460 }
461
462