1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
FLA_Eig_gest_nl_opt_var1(FLA_Obj A,FLA_Obj Y,FLA_Obj B)13 FLA_Error FLA_Eig_gest_nl_opt_var1( FLA_Obj A, FLA_Obj Y, FLA_Obj B )
14 {
15 FLA_Datatype datatype;
16 int m_AB;
17 int rs_A, cs_A;
18 int rs_B, cs_B;
19 int inc_y;
20 FLA_Obj yL, yR;
21
22 datatype = FLA_Obj_datatype( A );
23
24 m_AB = FLA_Obj_length( A );
25
26 rs_A = FLA_Obj_row_stride( A );
27 cs_A = FLA_Obj_col_stride( A );
28
29 rs_B = FLA_Obj_row_stride( B );
30 cs_B = FLA_Obj_col_stride( B );
31
32 FLA_Part_1x2( Y, &yL, &yR, 1, FLA_LEFT );
33
34 inc_y = FLA_Obj_vector_inc( yL );
35
36 switch ( datatype )
37 {
38 case FLA_FLOAT:
39 {
40 float* buff_A = FLA_FLOAT_PTR( A );
41 float* buff_y = FLA_FLOAT_PTR( yL );
42 float* buff_B = FLA_FLOAT_PTR( B );
43
44 FLA_Eig_gest_nl_ops_var1( m_AB,
45 buff_A, rs_A, cs_A,
46 buff_y, inc_y,
47 buff_B, rs_B, cs_B );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_A = FLA_DOUBLE_PTR( A );
55 double* buff_y = FLA_DOUBLE_PTR( yL );
56 double* buff_B = FLA_DOUBLE_PTR( B );
57
58 FLA_Eig_gest_nl_opd_var1( m_AB,
59 buff_A, rs_A, cs_A,
60 buff_y, inc_y,
61 buff_B, rs_B, cs_B );
62
63 break;
64 }
65
66 case FLA_COMPLEX:
67 {
68 scomplex* buff_A = FLA_COMPLEX_PTR( A );
69 scomplex* buff_y = FLA_COMPLEX_PTR( yL );
70 scomplex* buff_B = FLA_COMPLEX_PTR( B );
71
72 FLA_Eig_gest_nl_opc_var1( m_AB,
73 buff_A, rs_A, cs_A,
74 buff_y, inc_y,
75 buff_B, rs_B, cs_B );
76
77 break;
78 }
79
80 case FLA_DOUBLE_COMPLEX:
81 {
82 dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
83 dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( yL );
84 dcomplex* buff_B = FLA_DOUBLE_COMPLEX_PTR( B );
85
86 FLA_Eig_gest_nl_opz_var1( m_AB,
87 buff_A, rs_A, cs_A,
88 buff_y, inc_y,
89 buff_B, rs_B, cs_B );
90
91 break;
92 }
93 }
94
95 return FLA_SUCCESS;
96 }
97
98
99
FLA_Eig_gest_nl_ops_var1(int m_AB,float * buff_A,int rs_A,int cs_A,float * buff_y,int inc_y,float * buff_B,int rs_B,int cs_B)100 FLA_Error FLA_Eig_gest_nl_ops_var1( int m_AB,
101 float* buff_A, int rs_A, int cs_A,
102 float* buff_y, int inc_y,
103 float* buff_B, int rs_B, int cs_B )
104 {
105 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
106 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
107 float* buff_1h = FLA_FLOAT_PTR( FLA_ONE_HALF );
108 int i;
109
110 for ( i = 0; i < m_AB; ++i )
111 {
112 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
113 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
114 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
115
116 float* y21 = buff_y + (i+1)*inc_y;
117
118 float* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
119 float* b21 = buff_B + (i )*cs_B + (i+1)*rs_B;
120 float* B22 = buff_B + (i+1)*cs_B + (i+1)*rs_B;
121
122 int m_ahead = m_AB - i - 1;
123
124 /*------------------------------------------------------------*/
125
126 // FLA_Hemv_external( FLA_LOWER_TRIANGULAR,
127 // FLA_ONE, A22, b21, FLA_ZERO, y21_l );
128 bl1_shemv( BLIS1_LOWER_TRIANGULAR,
129 BLIS1_NO_CONJUGATE,
130 m_ahead,
131 buff_1,
132 A22, rs_A, cs_A,
133 b21, rs_B,
134 buff_0,
135 y21, inc_y );
136
137 // FLA_Scal_external( beta11, a21 );
138 bl1_sscalv( BLIS1_NO_CONJUGATE,
139 m_ahead,
140 beta11,
141 a21, rs_A );
142
143 // FLA_Axpy_external( FLA_ONE_HALF, y21_l, a21 );
144 bl1_saxpyv( BLIS1_NO_CONJUGATE,
145 m_ahead,
146 buff_1h,
147 y21, inc_y,
148 a21, rs_A );
149
150 // FLA_Scal_external( beta11, alpha11 );
151 // FLA_Scal_external( beta11, alpha11 );
152 bl1_sscals( beta11, alpha11 );
153 bl1_sscals( beta11, alpha11 );
154
155 // FLA_Dot2cs_external( FLA_CONJUGATE, FLA_ONE, a21, b21, FLA_ONE, alpha11 );
156 bl1_sdot2s( BLIS1_CONJUGATE,
157 m_ahead,
158 buff_1,
159 a21, rs_A,
160 b21, rs_B,
161 buff_1,
162 alpha11 );
163
164 // FLA_Axpy_external( FLA_ONE_HALF, y21_l, a21 );
165 bl1_saxpyv( BLIS1_NO_CONJUGATE,
166 m_ahead,
167 buff_1h,
168 y21, inc_y,
169 a21, rs_A );
170
171 // FLA_Trmv_external( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
172 // B22, a21 );
173 bl1_strmv( BLIS1_LOWER_TRIANGULAR,
174 BLIS1_CONJ_TRANSPOSE,
175 BLIS1_NONUNIT_DIAG,
176 m_ahead,
177 B22, rs_B, cs_B,
178 a21, rs_A );
179
180 /*------------------------------------------------------------*/
181
182 }
183
184 return FLA_SUCCESS;
185 }
186
187
188
FLA_Eig_gest_nl_opd_var1(int m_AB,double * buff_A,int rs_A,int cs_A,double * buff_y,int inc_y,double * buff_B,int rs_B,int cs_B)189 FLA_Error FLA_Eig_gest_nl_opd_var1( int m_AB,
190 double* buff_A, int rs_A, int cs_A,
191 double* buff_y, int inc_y,
192 double* buff_B, int rs_B, int cs_B )
193 {
194 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
195 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
196 double* buff_1h = FLA_DOUBLE_PTR( FLA_ONE_HALF );
197 int i;
198
199 for ( i = 0; i < m_AB; ++i )
200 {
201 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
202 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
203 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
204
205 double* y21 = buff_y + (i+1)*inc_y;
206
207 double* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
208 double* b21 = buff_B + (i )*cs_B + (i+1)*rs_B;
209 double* B22 = buff_B + (i+1)*cs_B + (i+1)*rs_B;
210
211 int m_ahead = m_AB - i - 1;
212
213 /*------------------------------------------------------------*/
214
215 // FLA_Hemv_external( FLA_LOWER_TRIANGULAR,
216 // FLA_ONE, A22, b21, FLA_ZERO, y21_l );
217 bl1_dhemv( BLIS1_LOWER_TRIANGULAR,
218 BLIS1_NO_CONJUGATE,
219 m_ahead,
220 buff_1,
221 A22, rs_A, cs_A,
222 b21, rs_B,
223 buff_0,
224 y21, inc_y );
225
226 // FLA_Scal_external( beta11, a21 );
227 bl1_dscalv( BLIS1_NO_CONJUGATE,
228 m_ahead,
229 beta11,
230 a21, rs_A );
231
232 // FLA_Axpy_external( FLA_ONE_HALF, y21_l, a21 );
233 bl1_daxpyv( BLIS1_NO_CONJUGATE,
234 m_ahead,
235 buff_1h,
236 y21, inc_y,
237 a21, rs_A );
238
239 // FLA_Scal_external( beta11, alpha11 );
240 // FLA_Scal_external( beta11, alpha11 );
241 bl1_dscals( beta11, alpha11 );
242 bl1_dscals( beta11, alpha11 );
243
244 // FLA_Dot2cs_external( FLA_CONJUGATE, FLA_ONE, a21, b21, FLA_ONE, alpha11 );
245 bl1_ddot2s( BLIS1_CONJUGATE,
246 m_ahead,
247 buff_1,
248 a21, rs_A,
249 b21, rs_B,
250 buff_1,
251 alpha11 );
252
253 // FLA_Axpy_external( FLA_ONE_HALF, y21_l, a21 );
254 bl1_daxpyv( BLIS1_NO_CONJUGATE,
255 m_ahead,
256 buff_1h,
257 y21, inc_y,
258 a21, rs_A );
259
260 // FLA_Trmv_external( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
261 // B22, a21 );
262 bl1_dtrmv( BLIS1_LOWER_TRIANGULAR,
263 BLIS1_CONJ_TRANSPOSE,
264 BLIS1_NONUNIT_DIAG,
265 m_ahead,
266 B22, rs_B, cs_B,
267 a21, rs_A );
268
269 /*------------------------------------------------------------*/
270
271 }
272
273 return FLA_SUCCESS;
274 }
275
276
277
FLA_Eig_gest_nl_opc_var1(int m_AB,scomplex * buff_A,int rs_A,int cs_A,scomplex * buff_y,int inc_y,scomplex * buff_B,int rs_B,int cs_B)278 FLA_Error FLA_Eig_gest_nl_opc_var1( int m_AB,
279 scomplex* buff_A, int rs_A, int cs_A,
280 scomplex* buff_y, int inc_y,
281 scomplex* buff_B, int rs_B, int cs_B )
282 {
283 scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
284 scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
285 scomplex* buff_1h = FLA_COMPLEX_PTR( FLA_ONE_HALF );
286 int i;
287
288 for ( i = 0; i < m_AB; ++i )
289 {
290 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
291 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
292 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
293
294 scomplex* y21 = buff_y + (i+1)*inc_y;
295
296 scomplex* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
297 scomplex* b21 = buff_B + (i )*cs_B + (i+1)*rs_B;
298 scomplex* B22 = buff_B + (i+1)*cs_B + (i+1)*rs_B;
299
300 int m_ahead = m_AB - i - 1;
301
302 /*------------------------------------------------------------*/
303
304 // FLA_Hemv_external( FLA_LOWER_TRIANGULAR,
305 // FLA_ONE, A22, b21, FLA_ZERO, y21_l );
306 bl1_chemv( BLIS1_LOWER_TRIANGULAR,
307 BLIS1_NO_CONJUGATE,
308 m_ahead,
309 buff_1,
310 A22, rs_A, cs_A,
311 b21, rs_B,
312 buff_0,
313 y21, inc_y );
314
315 // FLA_Scal_external( beta11, a21 );
316 bl1_cscalv( BLIS1_NO_CONJUGATE,
317 m_ahead,
318 beta11,
319 a21, rs_A );
320
321 // FLA_Axpy_external( FLA_ONE_HALF, y21_l, a21 );
322 bl1_caxpyv( BLIS1_NO_CONJUGATE,
323 m_ahead,
324 buff_1h,
325 y21, inc_y,
326 a21, rs_A );
327
328 // FLA_Scal_external( beta11, alpha11 );
329 // FLA_Scal_external( beta11, alpha11 );
330 bl1_cscals( beta11, alpha11 );
331 bl1_cscals( beta11, alpha11 );
332
333 // FLA_Dot2cs_external( FLA_CONJUGATE, FLA_ONE, a21, b21, FLA_ONE, alpha11 );
334 bl1_cdot2s( BLIS1_CONJUGATE,
335 m_ahead,
336 buff_1,
337 a21, rs_A,
338 b21, rs_B,
339 buff_1,
340 alpha11 );
341
342 // FLA_Axpy_external( FLA_ONE_HALF, y21_l, a21 );
343 bl1_caxpyv( BLIS1_NO_CONJUGATE,
344 m_ahead,
345 buff_1h,
346 y21, inc_y,
347 a21, rs_A );
348
349 // FLA_Trmv_external( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
350 // B22, a21 );
351 bl1_ctrmv( BLIS1_LOWER_TRIANGULAR,
352 BLIS1_CONJ_TRANSPOSE,
353 BLIS1_NONUNIT_DIAG,
354 m_ahead,
355 B22, rs_B, cs_B,
356 a21, rs_A );
357
358 /*------------------------------------------------------------*/
359
360 }
361
362 return FLA_SUCCESS;
363 }
364
365
366
FLA_Eig_gest_nl_opz_var1(int m_AB,dcomplex * buff_A,int rs_A,int cs_A,dcomplex * buff_y,int inc_y,dcomplex * buff_B,int rs_B,int cs_B)367 FLA_Error FLA_Eig_gest_nl_opz_var1( int m_AB,
368 dcomplex* buff_A, int rs_A, int cs_A,
369 dcomplex* buff_y, int inc_y,
370 dcomplex* buff_B, int rs_B, int cs_B )
371 {
372 dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
373 dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
374 dcomplex* buff_1h = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE_HALF );
375 int i;
376
377 for ( i = 0; i < m_AB; ++i )
378 {
379 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
380 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
381 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
382
383 dcomplex* y21 = buff_y + (i+1)*inc_y;
384
385 dcomplex* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
386 dcomplex* b21 = buff_B + (i )*cs_B + (i+1)*rs_B;
387 dcomplex* B22 = buff_B + (i+1)*cs_B + (i+1)*rs_B;
388
389 int m_ahead = m_AB - i - 1;
390
391 /*------------------------------------------------------------*/
392
393 // FLA_Hemv_external( FLA_LOWER_TRIANGULAR,
394 // FLA_ONE, A22, b21, FLA_ZERO, y21_l );
395 bl1_zhemv( BLIS1_LOWER_TRIANGULAR,
396 BLIS1_NO_CONJUGATE,
397 m_ahead,
398 buff_1,
399 A22, rs_A, cs_A,
400 b21, rs_B,
401 buff_0,
402 y21, inc_y );
403
404 // FLA_Scal_external( beta11, a21 );
405 bl1_zscalv( BLIS1_NO_CONJUGATE,
406 m_ahead,
407 beta11,
408 a21, rs_A );
409
410 // FLA_Axpy_external( FLA_ONE_HALF, y21_l, a21 );
411 bl1_zaxpyv( BLIS1_NO_CONJUGATE,
412 m_ahead,
413 buff_1h,
414 y21, inc_y,
415 a21, rs_A );
416
417 // FLA_Scal_external( beta11, alpha11 );
418 // FLA_Scal_external( beta11, alpha11 );
419 bl1_zscals( beta11, alpha11 );
420 bl1_zscals( beta11, alpha11 );
421
422 // FLA_Dot2cs_external( FLA_CONJUGATE, FLA_ONE, a21, b21, FLA_ONE, alpha11 );
423 bl1_zdot2s( BLIS1_CONJUGATE,
424 m_ahead,
425 buff_1,
426 a21, rs_A,
427 b21, rs_B,
428 buff_1,
429 alpha11 );
430
431 // FLA_Axpy_external( FLA_ONE_HALF, y21_l, a21 );
432 bl1_zaxpyv( BLIS1_NO_CONJUGATE,
433 m_ahead,
434 buff_1h,
435 y21, inc_y,
436 a21, rs_A );
437
438 // FLA_Trmv_external( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
439 // B22, a21 );
440 bl1_ztrmv( BLIS1_LOWER_TRIANGULAR,
441 BLIS1_CONJ_TRANSPOSE,
442 BLIS1_NONUNIT_DIAG,
443 m_ahead,
444 B22, rs_B, cs_B,
445 a21, rs_A );
446
447 /*------------------------------------------------------------*/
448
449 }
450
451 return FLA_SUCCESS;
452 }
453
454