1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
FLA_Eig_gest_nl_opt_var4(FLA_Obj A,FLA_Obj Y,FLA_Obj B)13 FLA_Error FLA_Eig_gest_nl_opt_var4( FLA_Obj A, FLA_Obj Y, FLA_Obj B )
14 {
15 FLA_Datatype datatype;
16 int m_AB;
17 int rs_A, cs_A;
18 int rs_B, cs_B;
19 int inc_y;
20 FLA_Obj yT, yB;
21
22 datatype = FLA_Obj_datatype( A );
23
24 m_AB = FLA_Obj_length( A );
25
26 rs_A = FLA_Obj_row_stride( A );
27 cs_A = FLA_Obj_col_stride( A );
28
29 rs_B = FLA_Obj_row_stride( B );
30 cs_B = FLA_Obj_col_stride( B );
31
32 FLA_Part_2x1( Y, &yT,
33 &yB, 1, FLA_TOP );
34
35 inc_y = FLA_Obj_vector_inc( yT );
36
37 switch ( datatype )
38 {
39 case FLA_FLOAT:
40 {
41 float* buff_A = FLA_FLOAT_PTR( A );
42 float* buff_y = FLA_FLOAT_PTR( yT );
43 float* buff_B = FLA_FLOAT_PTR( B );
44
45 FLA_Eig_gest_nl_ops_var4( m_AB,
46 buff_A, rs_A, cs_A,
47 buff_y, inc_y,
48 buff_B, rs_B, cs_B );
49
50 break;
51 }
52
53 case FLA_DOUBLE:
54 {
55 double* buff_A = FLA_DOUBLE_PTR( A );
56 double* buff_y = FLA_DOUBLE_PTR( yT );
57 double* buff_B = FLA_DOUBLE_PTR( B );
58
59 FLA_Eig_gest_nl_opd_var4( m_AB,
60 buff_A, rs_A, cs_A,
61 buff_y, inc_y,
62 buff_B, rs_B, cs_B );
63
64 break;
65 }
66
67 case FLA_COMPLEX:
68 {
69 scomplex* buff_A = FLA_COMPLEX_PTR( A );
70 scomplex* buff_y = FLA_COMPLEX_PTR( yT );
71 scomplex* buff_B = FLA_COMPLEX_PTR( B );
72
73 FLA_Eig_gest_nl_opc_var4( m_AB,
74 buff_A, rs_A, cs_A,
75 buff_y, inc_y,
76 buff_B, rs_B, cs_B );
77
78 break;
79 }
80
81 case FLA_DOUBLE_COMPLEX:
82 {
83 dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
84 dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( yT );
85 dcomplex* buff_B = FLA_DOUBLE_COMPLEX_PTR( B );
86
87 FLA_Eig_gest_nl_opz_var4( m_AB,
88 buff_A, rs_A, cs_A,
89 buff_y, inc_y,
90 buff_B, rs_B, cs_B );
91
92 break;
93 }
94 }
95
96 return FLA_SUCCESS;
97 }
98
99
100
FLA_Eig_gest_nl_ops_var4(int m_AB,float * buff_A,int rs_A,int cs_A,float * buff_y,int inc_y,float * buff_B,int rs_B,int cs_B)101 FLA_Error FLA_Eig_gest_nl_ops_var4( int m_AB,
102 float* buff_A, int rs_A, int cs_A,
103 float* buff_y, int inc_y,
104 float* buff_B, int rs_B, int cs_B )
105 {
106 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
107 float* buff_1h = FLA_FLOAT_PTR( FLA_ONE_HALF );
108 int i;
109
110 for ( i = 0; i < m_AB; ++i )
111 {
112 float* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
113 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
114 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
115 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
116 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
117
118 float* b10t = buff_B + (0 )*cs_B + (i )*rs_B;
119 float* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
120
121 float psi11;
122
123 int m_ahead = m_AB - i - 1;
124 int m_behind = i;
125
126 /*------------------------------------------------------------*/
127
128 // FLA_Copy_external( alpha11, psi11 );
129 // FLA_Scal_external( FLA_ONE_HALF, psi11 );
130 bl1_smult3( buff_1h, alpha11, &psi11 );
131
132 // FLA_Axpy_external( psi11, b10t, a10t );
133 bl1_saxpyv( BLIS1_NO_CONJUGATE,
134 m_behind,
135 &psi11,
136 b10t, cs_B,
137 a10t, cs_A );
138
139 // FLA_Her2c_external( FLA_LOWER_TRIANGULAR, FLA_CONJUGATE,
140 // FLA_ONE, a10t, b10t, A00 );
141 bl1_sher2( BLIS1_LOWER_TRIANGULAR,
142 BLIS1_CONJUGATE,
143 m_behind,
144 buff_1,
145 a10t, cs_A,
146 b10t, cs_B,
147 A00, rs_A, cs_A );
148
149 // FLA_Axpy_external( psi11, b10t, a10t );
150 bl1_saxpyv( BLIS1_NO_CONJUGATE,
151 m_behind,
152 &psi11,
153 b10t, cs_B,
154 a10t, cs_A );
155
156 // FLA_Scal_external( beta11, a10t );
157 bl1_sscalv( BLIS1_NO_CONJUGATE,
158 m_behind,
159 beta11,
160 a10t, cs_A );
161
162 // FLA_Scal_external( beta11, alpha11 );
163 // FLA_Scal_external( beta11, alpha11 );
164 bl1_sscals( beta11, alpha11 );
165 bl1_sscals( beta11, alpha11 );
166
167 // FLA_Ger_external( FLA_ONE, a21, b10t, A20 );
168 bl1_sger( BLIS1_NO_CONJUGATE,
169 BLIS1_NO_CONJUGATE,
170 m_ahead,
171 m_behind,
172 buff_1,
173 a21, rs_A,
174 b10t, cs_B,
175 A20, rs_A, cs_A );
176
177 // FLA_Scal_external( beta11, a21 );
178 bl1_sscalv( BLIS1_NO_CONJUGATE,
179 m_ahead,
180 beta11,
181 a21, rs_A );
182
183 /*------------------------------------------------------------*/
184
185 }
186
187 return FLA_SUCCESS;
188 }
189
190
191
FLA_Eig_gest_nl_opd_var4(int m_AB,double * buff_A,int rs_A,int cs_A,double * buff_y,int inc_y,double * buff_B,int rs_B,int cs_B)192 FLA_Error FLA_Eig_gest_nl_opd_var4( int m_AB,
193 double* buff_A, int rs_A, int cs_A,
194 double* buff_y, int inc_y,
195 double* buff_B, int rs_B, int cs_B )
196 {
197 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
198 double* buff_1h = FLA_DOUBLE_PTR( FLA_ONE_HALF );
199 int i;
200
201 for ( i = 0; i < m_AB; ++i )
202 {
203 double* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
204 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
205 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
206 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
207 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
208
209 double* b10t = buff_B + (0 )*cs_B + (i )*rs_B;
210 double* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
211
212 double psi11;
213
214 int m_ahead = m_AB - i - 1;
215 int m_behind = i;
216
217 /*------------------------------------------------------------*/
218
219 // FLA_Copy_external( alpha11, psi11 );
220 // FLA_Scal_external( FLA_ONE_HALF, psi11 );
221 bl1_dmult3( buff_1h, alpha11, &psi11 );
222
223 // FLA_Axpy_external( psi11, b10t, a10t );
224 bl1_daxpyv( BLIS1_NO_CONJUGATE,
225 m_behind,
226 &psi11,
227 b10t, cs_B,
228 a10t, cs_A );
229
230 // FLA_Her2c_external( FLA_LOWER_TRIANGULAR, FLA_CONJUGATE,
231 // FLA_ONE, a10t, b10t, A00 );
232 bl1_dher2( BLIS1_LOWER_TRIANGULAR,
233 BLIS1_CONJUGATE,
234 m_behind,
235 buff_1,
236 a10t, cs_A,
237 b10t, cs_B,
238 A00, rs_A, cs_A );
239
240 // FLA_Axpy_external( psi11, b10t, a10t );
241 bl1_daxpyv( BLIS1_NO_CONJUGATE,
242 m_behind,
243 &psi11,
244 b10t, cs_B,
245 a10t, cs_A );
246
247 // FLA_Scal_external( beta11, a10t );
248 bl1_dscalv( BLIS1_NO_CONJUGATE,
249 m_behind,
250 beta11,
251 a10t, cs_A );
252
253 // FLA_Scal_external( beta11, alpha11 );
254 // FLA_Scal_external( beta11, alpha11 );
255 bl1_dscals( beta11, alpha11 );
256 bl1_dscals( beta11, alpha11 );
257
258 // FLA_Ger_external( FLA_ONE, a21, b10t, A20 );
259 bl1_dger( BLIS1_NO_CONJUGATE,
260 BLIS1_NO_CONJUGATE,
261 m_ahead,
262 m_behind,
263 buff_1,
264 a21, rs_A,
265 b10t, cs_B,
266 A20, rs_A, cs_A );
267
268 // FLA_Scal_external( beta11, a21 );
269 bl1_dscalv( BLIS1_NO_CONJUGATE,
270 m_ahead,
271 beta11,
272 a21, rs_A );
273
274 /*------------------------------------------------------------*/
275
276 }
277
278 return FLA_SUCCESS;
279 }
280
281
282
FLA_Eig_gest_nl_opc_var4(int m_AB,scomplex * buff_A,int rs_A,int cs_A,scomplex * buff_y,int inc_y,scomplex * buff_B,int rs_B,int cs_B)283 FLA_Error FLA_Eig_gest_nl_opc_var4( int m_AB,
284 scomplex* buff_A, int rs_A, int cs_A,
285 scomplex* buff_y, int inc_y,
286 scomplex* buff_B, int rs_B, int cs_B )
287 {
288 scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
289 scomplex* buff_1h = FLA_COMPLEX_PTR( FLA_ONE_HALF );
290 int i;
291
292 for ( i = 0; i < m_AB; ++i )
293 {
294 scomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
295 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
296 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
297 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
298 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
299
300 scomplex* b10t = buff_B + (0 )*cs_B + (i )*rs_B;
301 scomplex* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
302
303 scomplex psi11;
304
305 int m_ahead = m_AB - i - 1;
306 int m_behind = i;
307
308 /*------------------------------------------------------------*/
309
310 // FLA_Copy_external( alpha11, psi11 );
311 // FLA_Scal_external( FLA_ONE_HALF, psi11 );
312 bl1_cmult3( buff_1h, alpha11, &psi11 );
313
314 // FLA_Axpy_external( psi11, b10t, a10t );
315 bl1_caxpyv( BLIS1_NO_CONJUGATE,
316 m_behind,
317 &psi11,
318 b10t, cs_B,
319 a10t, cs_A );
320
321 // FLA_Her2c_external( FLA_LOWER_TRIANGULAR, FLA_CONJUGATE,
322 // FLA_ONE, a10t, b10t, A00 );
323 bl1_cher2( BLIS1_LOWER_TRIANGULAR,
324 BLIS1_CONJUGATE,
325 m_behind,
326 buff_1,
327 a10t, cs_A,
328 b10t, cs_B,
329 A00, rs_A, cs_A );
330
331 // FLA_Axpy_external( psi11, b10t, a10t );
332 bl1_caxpyv( BLIS1_NO_CONJUGATE,
333 m_behind,
334 &psi11,
335 b10t, cs_B,
336 a10t, cs_A );
337
338 // FLA_Scal_external( beta11, a10t );
339 bl1_cscalv( BLIS1_NO_CONJUGATE,
340 m_behind,
341 beta11,
342 a10t, cs_A );
343
344 // FLA_Scal_external( beta11, alpha11 );
345 // FLA_Scal_external( beta11, alpha11 );
346 bl1_cscals( beta11, alpha11 );
347 bl1_cscals( beta11, alpha11 );
348
349 // FLA_Ger_external( FLA_ONE, a21, b10t, A20 );
350 bl1_cger( BLIS1_NO_CONJUGATE,
351 BLIS1_NO_CONJUGATE,
352 m_ahead,
353 m_behind,
354 buff_1,
355 a21, rs_A,
356 b10t, cs_B,
357 A20, rs_A, cs_A );
358
359 // FLA_Scal_external( beta11, a21 );
360 bl1_cscalv( BLIS1_NO_CONJUGATE,
361 m_ahead,
362 beta11,
363 a21, rs_A );
364
365 /*------------------------------------------------------------*/
366
367 }
368
369 return FLA_SUCCESS;
370 }
371
372
373
FLA_Eig_gest_nl_opz_var4(int m_AB,dcomplex * buff_A,int rs_A,int cs_A,dcomplex * buff_y,int inc_y,dcomplex * buff_B,int rs_B,int cs_B)374 FLA_Error FLA_Eig_gest_nl_opz_var4( int m_AB,
375 dcomplex* buff_A, int rs_A, int cs_A,
376 dcomplex* buff_y, int inc_y,
377 dcomplex* buff_B, int rs_B, int cs_B )
378 {
379 dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
380 dcomplex* buff_1h = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE_HALF );
381 int i;
382
383 for ( i = 0; i < m_AB; ++i )
384 {
385 dcomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
386 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
387 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
388 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
389 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
390
391 dcomplex* b10t = buff_B + (0 )*cs_B + (i )*rs_B;
392 dcomplex* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
393
394 dcomplex psi11;
395
396 int m_ahead = m_AB - i - 1;
397 int m_behind = i;
398
399 /*------------------------------------------------------------*/
400
401 // FLA_Copy_external( alpha11, psi11 );
402 // FLA_Scal_external( FLA_ONE_HALF, psi11 );
403 bl1_zmult3( buff_1h, alpha11, &psi11 );
404
405 // FLA_Axpy_external( psi11, b10t, a10t );
406 bl1_zaxpyv( BLIS1_NO_CONJUGATE,
407 m_behind,
408 &psi11,
409 b10t, cs_B,
410 a10t, cs_A );
411
412 // FLA_Her2c_external( FLA_LOWER_TRIANGULAR, FLA_CONJUGATE,
413 // FLA_ONE, a10t, b10t, A00 );
414 bl1_zher2( BLIS1_LOWER_TRIANGULAR,
415 BLIS1_CONJUGATE,
416 m_behind,
417 buff_1,
418 a10t, cs_A,
419 b10t, cs_B,
420 A00, rs_A, cs_A );
421
422 // FLA_Axpy_external( psi11, b10t, a10t );
423 bl1_zaxpyv( BLIS1_NO_CONJUGATE,
424 m_behind,
425 &psi11,
426 b10t, cs_B,
427 a10t, cs_A );
428
429 // FLA_Scal_external( beta11, a10t );
430 bl1_zscalv( BLIS1_NO_CONJUGATE,
431 m_behind,
432 beta11,
433 a10t, cs_A );
434
435 // FLA_Scal_external( beta11, alpha11 );
436 // FLA_Scal_external( beta11, alpha11 );
437 bl1_zscals( beta11, alpha11 );
438 bl1_zscals( beta11, alpha11 );
439
440 // FLA_Ger_external( FLA_ONE, a21, b10t, A20 );
441 bl1_zger( BLIS1_NO_CONJUGATE,
442 BLIS1_NO_CONJUGATE,
443 m_ahead,
444 m_behind,
445 buff_1,
446 a21, rs_A,
447 b10t, cs_B,
448 A20, rs_A, cs_A );
449
450 // FLA_Scal_external( beta11, a21 );
451 bl1_zscalv( BLIS1_NO_CONJUGATE,
452 m_ahead,
453 beta11,
454 a21, rs_A );
455
456 /*------------------------------------------------------------*/
457
458 }
459
460 return FLA_SUCCESS;
461 }
462
463