1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
FLA_Eig_gest_nl_opt_var2(FLA_Obj A,FLA_Obj Y,FLA_Obj B)13 FLA_Error FLA_Eig_gest_nl_opt_var2( FLA_Obj A, FLA_Obj Y, FLA_Obj B )
14 {
15 FLA_Datatype datatype;
16 int m_AB;
17 int rs_A, cs_A;
18 int rs_B, cs_B;
19 int inc_y;
20 FLA_Obj yL, yR;
21
22 datatype = FLA_Obj_datatype( A );
23
24 m_AB = FLA_Obj_length( A );
25
26 rs_A = FLA_Obj_row_stride( A );
27 cs_A = FLA_Obj_col_stride( A );
28
29 rs_B = FLA_Obj_row_stride( B );
30 cs_B = FLA_Obj_col_stride( B );
31
32 FLA_Part_1x2( Y, &yL, &yR, 1, FLA_LEFT );
33
34 inc_y = FLA_Obj_vector_inc( yL );
35
36 switch ( datatype )
37 {
38 case FLA_FLOAT:
39 {
40 float* buff_A = FLA_FLOAT_PTR( A );
41 float* buff_y = FLA_FLOAT_PTR( yL );
42 float* buff_B = FLA_FLOAT_PTR( B );
43
44 FLA_Eig_gest_nl_ops_var2( m_AB,
45 buff_A, rs_A, cs_A,
46 buff_y, inc_y,
47 buff_B, rs_B, cs_B );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_A = FLA_DOUBLE_PTR( A );
55 double* buff_y = FLA_DOUBLE_PTR( yL );
56 double* buff_B = FLA_DOUBLE_PTR( B );
57
58 FLA_Eig_gest_nl_opd_var2( m_AB,
59 buff_A, rs_A, cs_A,
60 buff_y, inc_y,
61 buff_B, rs_B, cs_B );
62
63 break;
64 }
65
66 case FLA_COMPLEX:
67 {
68 scomplex* buff_A = FLA_COMPLEX_PTR( A );
69 scomplex* buff_y = FLA_COMPLEX_PTR( yL );
70 scomplex* buff_B = FLA_COMPLEX_PTR( B );
71
72 FLA_Eig_gest_nl_opc_var2( m_AB,
73 buff_A, rs_A, cs_A,
74 buff_y, inc_y,
75 buff_B, rs_B, cs_B );
76
77 break;
78 }
79
80 case FLA_DOUBLE_COMPLEX:
81 {
82 dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
83 dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( yL );
84 dcomplex* buff_B = FLA_DOUBLE_COMPLEX_PTR( B );
85
86 FLA_Eig_gest_nl_opz_var2( m_AB,
87 buff_A, rs_A, cs_A,
88 buff_y, inc_y,
89 buff_B, rs_B, cs_B );
90
91 break;
92 }
93 }
94
95 return FLA_SUCCESS;
96 }
97
98
99
FLA_Eig_gest_nl_ops_var2(int m_AB,float * buff_A,int rs_A,int cs_A,float * buff_y,int inc_y,float * buff_B,int rs_B,int cs_B)100 FLA_Error FLA_Eig_gest_nl_ops_var2( int m_AB,
101 float* buff_A, int rs_A, int cs_A,
102 float* buff_y, int inc_y,
103 float* buff_B, int rs_B, int cs_B )
104 {
105 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
106 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
107 float* buff_1h = FLA_FLOAT_PTR( FLA_ONE_HALF );
108 int i;
109
110 for ( i = 0; i < m_AB; ++i )
111 {
112 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
113 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
114 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
115 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
116 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
117
118 float* y21 = buff_y + (i+1)*inc_y;
119
120 float* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
121 float* b21 = buff_B + (i )*cs_B + (i+1)*rs_B;
122
123 int m_ahead = m_AB - i - 1;
124 int m_behind = i;
125
126 /*------------------------------------------------------------*/
127
128 // FLA_Scal_external( beta11, a10t );
129 bl1_sscalv( BLIS1_NO_CONJUGATE,
130 m_behind,
131 beta11,
132 a10t, cs_A );
133
134 // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE,
135 // FLA_ONE, A20, b21, FLA_ONE, a10t );
136 bl1_sgemv( BLIS1_TRANSPOSE,
137 BLIS1_CONJUGATE,
138 m_ahead,
139 m_behind,
140 buff_1,
141 A20, rs_A, cs_A,
142 b21, rs_B,
143 buff_1,
144 a10t, cs_A );
145
146 // FLA_Hemv_external( FLA_LOWER_TRIANGULAR,
147 // FLA_ONE, A22, b21, FLA_ZERO, y21_l );
148 bl1_shemv( BLIS1_LOWER_TRIANGULAR,
149 BLIS1_NO_CONJUGATE,
150 m_ahead,
151 buff_1,
152 A22, rs_A, cs_A,
153 b21, rs_B,
154 buff_0,
155 y21, inc_y );
156
157 // FLA_Scal_external( beta11, a21 );
158 bl1_sscalv( BLIS1_NO_CONJUGATE,
159 m_ahead,
160 beta11,
161 a21, rs_A );
162
163 // FLA_Axpy_external( FLA_ONE_HALF, y21_l, a21 );
164 bl1_saxpyv( BLIS1_NO_CONJUGATE,
165 m_ahead,
166 buff_1h,
167 y21, inc_y,
168 a21, rs_A );
169
170 // FLA_Scal_external( beta11, alpha11 );
171 // FLA_Scal_external( beta11, alpha11 );
172 bl1_sscals( beta11, alpha11 );
173 bl1_sscals( beta11, alpha11 );
174
175 // FLA_Dot2cs_external( FLA_CONJUGATE, FLA_ONE, a21, b21, FLA_ONE, alpha11 );
176 bl1_sdot2s( BLIS1_CONJUGATE,
177 m_ahead,
178 buff_1,
179 a21, rs_A,
180 b21, rs_B,
181 buff_1,
182 alpha11 );
183
184 // FLA_Axpy_external( FLA_ONE_HALF, y21_l, a21 );
185 bl1_saxpyv( BLIS1_NO_CONJUGATE,
186 m_ahead,
187 buff_1h,
188 y21, inc_y,
189 a21, rs_A );
190
191 /*------------------------------------------------------------*/
192
193 }
194
195 return FLA_SUCCESS;
196 }
197
198
199
FLA_Eig_gest_nl_opd_var2(int m_AB,double * buff_A,int rs_A,int cs_A,double * buff_y,int inc_y,double * buff_B,int rs_B,int cs_B)200 FLA_Error FLA_Eig_gest_nl_opd_var2( int m_AB,
201 double* buff_A, int rs_A, int cs_A,
202 double* buff_y, int inc_y,
203 double* buff_B, int rs_B, int cs_B )
204 {
205 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
206 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
207 double* buff_1h = FLA_DOUBLE_PTR( FLA_ONE_HALF );
208 int i;
209
210 for ( i = 0; i < m_AB; ++i )
211 {
212 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
213 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
214 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
215 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
216 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
217
218 double* y21 = buff_y + (i+1)*inc_y;
219
220 double* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
221 double* b21 = buff_B + (i )*cs_B + (i+1)*rs_B;
222
223 int m_ahead = m_AB - i - 1;
224 int m_behind = i;
225
226 /*------------------------------------------------------------*/
227
228 // FLA_Scal_external( beta11, a10t );
229 bl1_dscalv( BLIS1_NO_CONJUGATE,
230 m_behind,
231 beta11,
232 a10t, cs_A );
233
234 // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE,
235 // FLA_ONE, A20, b21, FLA_ONE, a10t );
236 bl1_dgemv( BLIS1_TRANSPOSE,
237 BLIS1_CONJUGATE,
238 m_ahead,
239 m_behind,
240 buff_1,
241 A20, rs_A, cs_A,
242 b21, rs_B,
243 buff_1,
244 a10t, cs_A );
245
246 // FLA_Hemv_external( FLA_LOWER_TRIANGULAR,
247 // FLA_ONE, A22, b21, FLA_ZERO, y21_l );
248 bl1_dhemv( BLIS1_LOWER_TRIANGULAR,
249 BLIS1_NO_CONJUGATE,
250 m_ahead,
251 buff_1,
252 A22, rs_A, cs_A,
253 b21, rs_B,
254 buff_0,
255 y21, inc_y );
256
257 // FLA_Scal_external( beta11, a21 );
258 bl1_dscalv( BLIS1_NO_CONJUGATE,
259 m_ahead,
260 beta11,
261 a21, rs_A );
262
263 // FLA_Axpy_external( FLA_ONE_HALF, y21_l, a21 );
264 bl1_daxpyv( BLIS1_NO_CONJUGATE,
265 m_ahead,
266 buff_1h,
267 y21, inc_y,
268 a21, rs_A );
269
270 // FLA_Scal_external( beta11, alpha11 );
271 // FLA_Scal_external( beta11, alpha11 );
272 bl1_dscals( beta11, alpha11 );
273 bl1_dscals( beta11, alpha11 );
274
275 // FLA_Dot2cs_external( FLA_CONJUGATE, FLA_ONE, a21, b21, FLA_ONE, alpha11 );
276 bl1_ddot2s( BLIS1_CONJUGATE,
277 m_ahead,
278 buff_1,
279 a21, rs_A,
280 b21, rs_B,
281 buff_1,
282 alpha11 );
283
284 // FLA_Axpy_external( FLA_ONE_HALF, y21_l, a21 );
285 bl1_daxpyv( BLIS1_NO_CONJUGATE,
286 m_ahead,
287 buff_1h,
288 y21, inc_y,
289 a21, rs_A );
290
291 /*------------------------------------------------------------*/
292
293 }
294
295 return FLA_SUCCESS;
296 }
297
298
299
FLA_Eig_gest_nl_opc_var2(int m_AB,scomplex * buff_A,int rs_A,int cs_A,scomplex * buff_y,int inc_y,scomplex * buff_B,int rs_B,int cs_B)300 FLA_Error FLA_Eig_gest_nl_opc_var2( int m_AB,
301 scomplex* buff_A, int rs_A, int cs_A,
302 scomplex* buff_y, int inc_y,
303 scomplex* buff_B, int rs_B, int cs_B )
304 {
305 scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
306 scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
307 scomplex* buff_1h = FLA_COMPLEX_PTR( FLA_ONE_HALF );
308 int i;
309
310 for ( i = 0; i < m_AB; ++i )
311 {
312 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
313 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
314 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
315 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
316 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
317
318 scomplex* y21 = buff_y + (i+1)*inc_y;
319
320 scomplex* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
321 scomplex* b21 = buff_B + (i )*cs_B + (i+1)*rs_B;
322
323 int m_ahead = m_AB - i - 1;
324 int m_behind = i;
325
326 /*------------------------------------------------------------*/
327
328 // FLA_Scal_external( beta11, a10t );
329 bl1_cscalv( BLIS1_NO_CONJUGATE,
330 m_behind,
331 beta11,
332 a10t, cs_A );
333
334 // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE,
335 // FLA_ONE, A20, b21, FLA_ONE, a10t );
336 bl1_cgemv( BLIS1_TRANSPOSE,
337 BLIS1_CONJUGATE,
338 m_ahead,
339 m_behind,
340 buff_1,
341 A20, rs_A, cs_A,
342 b21, rs_B,
343 buff_1,
344 a10t, cs_A );
345
346 // FLA_Hemv_external( FLA_LOWER_TRIANGULAR,
347 // FLA_ONE, A22, b21, FLA_ZERO, y21_l );
348 bl1_chemv( BLIS1_LOWER_TRIANGULAR,
349 BLIS1_NO_CONJUGATE,
350 m_ahead,
351 buff_1,
352 A22, rs_A, cs_A,
353 b21, rs_B,
354 buff_0,
355 y21, inc_y );
356
357 // FLA_Scal_external( beta11, a21 );
358 bl1_cscalv( BLIS1_NO_CONJUGATE,
359 m_ahead,
360 beta11,
361 a21, rs_A );
362
363 // FLA_Axpy_external( FLA_ONE_HALF, y21_l, a21 );
364 bl1_caxpyv( BLIS1_NO_CONJUGATE,
365 m_ahead,
366 buff_1h,
367 y21, inc_y,
368 a21, rs_A );
369
370 // FLA_Scal_external( beta11, alpha11 );
371 // FLA_Scal_external( beta11, alpha11 );
372 bl1_cscals( beta11, alpha11 );
373 bl1_cscals( beta11, alpha11 );
374
375 // FLA_Dot2cs_external( FLA_CONJUGATE, FLA_ONE, a21, b21, FLA_ONE, alpha11 );
376 bl1_cdot2s( BLIS1_CONJUGATE,
377 m_ahead,
378 buff_1,
379 a21, rs_A,
380 b21, rs_B,
381 buff_1,
382 alpha11 );
383
384 // FLA_Axpy_external( FLA_ONE_HALF, y21_l, a21 );
385 bl1_caxpyv( BLIS1_NO_CONJUGATE,
386 m_ahead,
387 buff_1h,
388 y21, inc_y,
389 a21, rs_A );
390
391 /*------------------------------------------------------------*/
392
393 }
394
395 return FLA_SUCCESS;
396 }
397
398
399
FLA_Eig_gest_nl_opz_var2(int m_AB,dcomplex * buff_A,int rs_A,int cs_A,dcomplex * buff_y,int inc_y,dcomplex * buff_B,int rs_B,int cs_B)400 FLA_Error FLA_Eig_gest_nl_opz_var2( int m_AB,
401 dcomplex* buff_A, int rs_A, int cs_A,
402 dcomplex* buff_y, int inc_y,
403 dcomplex* buff_B, int rs_B, int cs_B )
404 {
405 dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
406 dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
407 dcomplex* buff_1h = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE_HALF );
408 int i;
409
410 for ( i = 0; i < m_AB; ++i )
411 {
412 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
413 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
414 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
415 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
416 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
417
418 dcomplex* y21 = buff_y + (i+1)*inc_y;
419
420 dcomplex* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
421 dcomplex* b21 = buff_B + (i )*cs_B + (i+1)*rs_B;
422
423 int m_ahead = m_AB - i - 1;
424 int m_behind = i;
425
426 /*------------------------------------------------------------*/
427
428 // FLA_Scal_external( beta11, a10t );
429 bl1_zscalv( BLIS1_NO_CONJUGATE,
430 m_behind,
431 beta11,
432 a10t, cs_A );
433
434 // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE,
435 // FLA_ONE, A20, b21, FLA_ONE, a10t );
436 bl1_zgemv( BLIS1_TRANSPOSE,
437 BLIS1_CONJUGATE,
438 m_ahead,
439 m_behind,
440 buff_1,
441 A20, rs_A, cs_A,
442 b21, rs_B,
443 buff_1,
444 a10t, cs_A );
445
446 // FLA_Hemv_external( FLA_LOWER_TRIANGULAR,
447 // FLA_ONE, A22, b21, FLA_ZERO, y21_l );
448 bl1_zhemv( BLIS1_LOWER_TRIANGULAR,
449 BLIS1_NO_CONJUGATE,
450 m_ahead,
451 buff_1,
452 A22, rs_A, cs_A,
453 b21, rs_B,
454 buff_0,
455 y21, inc_y );
456
457 // FLA_Scal_external( beta11, a21 );
458 bl1_zscalv( BLIS1_NO_CONJUGATE,
459 m_ahead,
460 beta11,
461 a21, rs_A );
462
463 // FLA_Axpy_external( FLA_ONE_HALF, y21_l, a21 );
464 bl1_zaxpyv( BLIS1_NO_CONJUGATE,
465 m_ahead,
466 buff_1h,
467 y21, inc_y,
468 a21, rs_A );
469
470 // FLA_Scal_external( beta11, alpha11 );
471 // FLA_Scal_external( beta11, alpha11 );
472 bl1_zscals( beta11, alpha11 );
473 bl1_zscals( beta11, alpha11 );
474
475 // FLA_Dot2cs_external( FLA_CONJUGATE, FLA_ONE, a21, b21, FLA_ONE, alpha11 );
476 bl1_zdot2s( BLIS1_CONJUGATE,
477 m_ahead,
478 buff_1,
479 a21, rs_A,
480 b21, rs_B,
481 buff_1,
482 alpha11 );
483
484 // FLA_Axpy_external( FLA_ONE_HALF, y21_l, a21 );
485 bl1_zaxpyv( BLIS1_NO_CONJUGATE,
486 m_ahead,
487 buff_1h,
488 y21, inc_y,
489 a21, rs_A );
490
491 /*------------------------------------------------------------*/
492
493 }
494
495 return FLA_SUCCESS;
496 }
497
498