1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
FLA_Eig_gest_iu_opt_var5(FLA_Obj A,FLA_Obj Y,FLA_Obj B)13 FLA_Error FLA_Eig_gest_iu_opt_var5( FLA_Obj A, FLA_Obj Y, FLA_Obj B )
14 {
15 FLA_Datatype datatype;
16 int m_AB;
17 int rs_A, cs_A;
18 int rs_B, cs_B;
19 int inc_y;
20 FLA_Obj yL, yR;
21
22 datatype = FLA_Obj_datatype( A );
23
24 m_AB = FLA_Obj_length( A );
25
26 rs_A = FLA_Obj_row_stride( A );
27 cs_A = FLA_Obj_col_stride( A );
28
29 rs_B = FLA_Obj_row_stride( B );
30 cs_B = FLA_Obj_col_stride( B );
31
32 FLA_Part_1x2( Y, &yL, &yR, 1, FLA_LEFT );
33
34 inc_y = FLA_Obj_vector_inc( yL );
35
36 switch ( datatype )
37 {
38 case FLA_FLOAT:
39 {
40 float* buff_A = FLA_FLOAT_PTR( A );
41 float* buff_y = FLA_FLOAT_PTR( yL );
42 float* buff_B = FLA_FLOAT_PTR( B );
43
44 FLA_Eig_gest_iu_ops_var5( m_AB,
45 buff_A, rs_A, cs_A,
46 buff_y, inc_y,
47 buff_B, rs_B, cs_B );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_A = FLA_DOUBLE_PTR( A );
55 double* buff_y = FLA_DOUBLE_PTR( yL );
56 double* buff_B = FLA_DOUBLE_PTR( B );
57
58 FLA_Eig_gest_iu_opd_var5( m_AB,
59 buff_A, rs_A, cs_A,
60 buff_y, inc_y,
61 buff_B, rs_B, cs_B );
62
63 break;
64 }
65
66 case FLA_COMPLEX:
67 {
68 scomplex* buff_A = FLA_COMPLEX_PTR( A );
69 scomplex* buff_y = FLA_COMPLEX_PTR( yL );
70 scomplex* buff_B = FLA_COMPLEX_PTR( B );
71
72 FLA_Eig_gest_iu_opc_var5( m_AB,
73 buff_A, rs_A, cs_A,
74 buff_y, inc_y,
75 buff_B, rs_B, cs_B );
76
77 break;
78 }
79
80 case FLA_DOUBLE_COMPLEX:
81 {
82 dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
83 dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( yL );
84 dcomplex* buff_B = FLA_DOUBLE_COMPLEX_PTR( B );
85
86 FLA_Eig_gest_iu_opz_var5( m_AB,
87 buff_A, rs_A, cs_A,
88 buff_y, inc_y,
89 buff_B, rs_B, cs_B );
90
91 break;
92 }
93 }
94
95 return FLA_SUCCESS;
96 }
97
98
99
FLA_Eig_gest_iu_ops_var5(int m_AB,float * buff_A,int rs_A,int cs_A,float * buff_y,int inc_y,float * buff_B,int rs_B,int cs_B)100 FLA_Error FLA_Eig_gest_iu_ops_var5( int m_AB,
101 float* buff_A, int rs_A, int cs_A,
102 float* buff_y, int inc_y,
103 float* buff_B, int rs_B, int cs_B )
104 {
105 float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
106 float* buff_m1h = FLA_FLOAT_PTR( FLA_MINUS_ONE_HALF );
107 int i;
108
109 for ( i = 0; i < m_AB; ++i )
110 {
111 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
112 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
113 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
114
115 float* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
116 float* b12t = buff_B + (i+1)*cs_B + (i )*rs_B;
117 float* B22 = buff_B + (i+1)*cs_B + (i+1)*rs_B;
118
119 float psi11;
120
121 int m_ahead = m_AB - i - 1;
122
123 /*------------------------------------------------------------*/
124
125 // FLA_Inv_scal_external( beta11, alpha11 );
126 // FLA_Inv_scal_external( beta11, alpha11 );
127 bl1_sinvscals( beta11, alpha11 );
128 bl1_sinvscals( beta11, alpha11 );
129
130 // FLA_Copy_external( alpha11, psi11 );
131 // FLA_Scal_external( FLA_MINUS_ONE_HALF, psi11 );
132 bl1_smult3( buff_m1h, alpha11, &psi11 );
133
134 // FLA_Inv_scal_external( beta11, a12t );
135 bl1_sinvscalv( BLIS1_NO_CONJUGATE,
136 m_ahead,
137 beta11,
138 a12t, cs_A );
139
140 // FLA_Axpy_external( psi11, b12t, a12t );
141 bl1_saxpyv( BLIS1_NO_CONJUGATE,
142 m_ahead,
143 &psi11,
144 b12t, cs_B,
145 a12t, cs_A );
146
147 // FLA_Her2c_external( FLA_UPPER_TRIANGULAR, FLA_CONJUGATE,
148 // FLA_MINUS_ONE, a12t, b12t, A22 );
149 bl1_sher2( BLIS1_UPPER_TRIANGULAR,
150 BLIS1_CONJUGATE,
151 m_ahead,
152 buff_m1,
153 a12t, cs_A,
154 b12t, cs_B,
155 A22, rs_A, cs_A );
156
157 // FLA_Axpy_external( psi11, b12t, a12t );
158 bl1_saxpyv( BLIS1_NO_CONJUGATE,
159 m_ahead,
160 &psi11,
161 b12t, cs_B,
162 a12t, cs_A );
163
164 // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG,
165 // B22, a12t );
166 bl1_strsv( BLIS1_UPPER_TRIANGULAR,
167 BLIS1_TRANSPOSE,
168 BLIS1_NONUNIT_DIAG,
169 m_ahead,
170 B22, rs_B, cs_B,
171 a12t, cs_A );
172
173 /*------------------------------------------------------------*/
174
175 }
176
177 return FLA_SUCCESS;
178 }
179
180
181
FLA_Eig_gest_iu_opd_var5(int m_AB,double * buff_A,int rs_A,int cs_A,double * buff_y,int inc_y,double * buff_B,int rs_B,int cs_B)182 FLA_Error FLA_Eig_gest_iu_opd_var5( int m_AB,
183 double* buff_A, int rs_A, int cs_A,
184 double* buff_y, int inc_y,
185 double* buff_B, int rs_B, int cs_B )
186 {
187 double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
188 double* buff_m1h = FLA_DOUBLE_PTR( FLA_MINUS_ONE_HALF );
189 int i;
190
191 for ( i = 0; i < m_AB; ++i )
192 {
193 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
194 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
195 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
196
197 double* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
198 double* b12t = buff_B + (i+1)*cs_B + (i )*rs_B;
199 double* B22 = buff_B + (i+1)*cs_B + (i+1)*rs_B;
200
201 double psi11;
202
203 int m_ahead = m_AB - i - 1;
204
205 /*------------------------------------------------------------*/
206
207 // FLA_Inv_scal_external( beta11, alpha11 );
208 // FLA_Inv_scal_external( beta11, alpha11 );
209 bl1_dinvscals( beta11, alpha11 );
210 bl1_dinvscals( beta11, alpha11 );
211
212 // FLA_Copy_external( alpha11, psi11 );
213 // FLA_Scal_external( FLA_MINUS_ONE_HALF, psi11 );
214 bl1_dmult3( buff_m1h, alpha11, &psi11 );
215
216 // FLA_Inv_scal_external( beta11, a12t );
217 bl1_dinvscalv( BLIS1_NO_CONJUGATE,
218 m_ahead,
219 beta11,
220 a12t, cs_A );
221
222 // FLA_Axpy_external( psi11, b12t, a12t );
223 bl1_daxpyv( BLIS1_NO_CONJUGATE,
224 m_ahead,
225 &psi11,
226 b12t, cs_B,
227 a12t, cs_A );
228
229 // FLA_Her2c_external( FLA_UPPER_TRIANGULAR, FLA_CONJUGATE,
230 // FLA_MINUS_ONE, a12t, b12t, A22 );
231 bl1_dher2( BLIS1_UPPER_TRIANGULAR,
232 BLIS1_CONJUGATE,
233 m_ahead,
234 buff_m1,
235 a12t, cs_A,
236 b12t, cs_B,
237 A22, rs_A, cs_A );
238
239 // FLA_Axpy_external( psi11, b12t, a12t );
240 bl1_daxpyv( BLIS1_NO_CONJUGATE,
241 m_ahead,
242 &psi11,
243 b12t, cs_B,
244 a12t, cs_A );
245
246 // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG,
247 // B22, a12t );
248 bl1_dtrsv( BLIS1_UPPER_TRIANGULAR,
249 BLIS1_TRANSPOSE,
250 BLIS1_NONUNIT_DIAG,
251 m_ahead,
252 B22, rs_B, cs_B,
253 a12t, cs_A );
254
255 /*------------------------------------------------------------*/
256
257 }
258
259 return FLA_SUCCESS;
260 }
261
262
263
FLA_Eig_gest_iu_opc_var5(int m_AB,scomplex * buff_A,int rs_A,int cs_A,scomplex * buff_y,int inc_y,scomplex * buff_B,int rs_B,int cs_B)264 FLA_Error FLA_Eig_gest_iu_opc_var5( int m_AB,
265 scomplex* buff_A, int rs_A, int cs_A,
266 scomplex* buff_y, int inc_y,
267 scomplex* buff_B, int rs_B, int cs_B )
268 {
269 scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
270 scomplex* buff_m1h = FLA_COMPLEX_PTR( FLA_MINUS_ONE_HALF );
271 int i;
272
273 for ( i = 0; i < m_AB; ++i )
274 {
275 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
276 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
277 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
278
279 scomplex* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
280 scomplex* b12t = buff_B + (i+1)*cs_B + (i )*rs_B;
281 scomplex* B22 = buff_B + (i+1)*cs_B + (i+1)*rs_B;
282
283 scomplex psi11;
284
285 int m_ahead = m_AB - i - 1;
286
287 /*------------------------------------------------------------*/
288
289 // FLA_Inv_scal_external( beta11, alpha11 );
290 // FLA_Inv_scal_external( beta11, alpha11 );
291 bl1_cinvscals( beta11, alpha11 );
292 bl1_cinvscals( beta11, alpha11 );
293
294 // FLA_Copy_external( alpha11, psi11 );
295 // FLA_Scal_external( FLA_MINUS_ONE_HALF, psi11 );
296 bl1_cmult3( buff_m1h, alpha11, &psi11 );
297
298 // FLA_Inv_scal_external( beta11, a12t );
299 bl1_cinvscalv( BLIS1_NO_CONJUGATE,
300 m_ahead,
301 beta11,
302 a12t, cs_A );
303
304 // FLA_Axpy_external( psi11, b12t, a12t );
305 bl1_caxpyv( BLIS1_NO_CONJUGATE,
306 m_ahead,
307 &psi11,
308 b12t, cs_B,
309 a12t, cs_A );
310
311 // FLA_Her2c_external( FLA_UPPER_TRIANGULAR, FLA_CONJUGATE,
312 // FLA_MINUS_ONE, a12t, b12t, A22 );
313 bl1_cher2( BLIS1_UPPER_TRIANGULAR,
314 BLIS1_CONJUGATE,
315 m_ahead,
316 buff_m1,
317 a12t, cs_A,
318 b12t, cs_B,
319 A22, rs_A, cs_A );
320
321 // FLA_Axpy_external( psi11, b12t, a12t );
322 bl1_caxpyv( BLIS1_NO_CONJUGATE,
323 m_ahead,
324 &psi11,
325 b12t, cs_B,
326 a12t, cs_A );
327
328 // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG,
329 // B22, a12t );
330 bl1_ctrsv( BLIS1_UPPER_TRIANGULAR,
331 BLIS1_TRANSPOSE,
332 BLIS1_NONUNIT_DIAG,
333 m_ahead,
334 B22, rs_B, cs_B,
335 a12t, cs_A );
336
337 /*------------------------------------------------------------*/
338
339 }
340
341 return FLA_SUCCESS;
342 }
343
344
345
FLA_Eig_gest_iu_opz_var5(int m_AB,dcomplex * buff_A,int rs_A,int cs_A,dcomplex * buff_y,int inc_y,dcomplex * buff_B,int rs_B,int cs_B)346 FLA_Error FLA_Eig_gest_iu_opz_var5( int m_AB,
347 dcomplex* buff_A, int rs_A, int cs_A,
348 dcomplex* buff_y, int inc_y,
349 dcomplex* buff_B, int rs_B, int cs_B )
350 {
351 dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
352 dcomplex* buff_m1h = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE_HALF );
353 int i;
354
355 for ( i = 0; i < m_AB; ++i )
356 {
357 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
358 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
359 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
360
361 dcomplex* beta11 = buff_B + (i )*cs_B + (i )*rs_B;
362 dcomplex* b12t = buff_B + (i+1)*cs_B + (i )*rs_B;
363 dcomplex* B22 = buff_B + (i+1)*cs_B + (i+1)*rs_B;
364
365 dcomplex psi11;
366
367 int m_ahead = m_AB - i - 1;
368
369 /*------------------------------------------------------------*/
370
371 // FLA_Inv_scal_external( beta11, alpha11 );
372 // FLA_Inv_scal_external( beta11, alpha11 );
373 bl1_zinvscals( beta11, alpha11 );
374 bl1_zinvscals( beta11, alpha11 );
375
376 // FLA_Copy_external( alpha11, psi11 );
377 // FLA_Scal_external( FLA_MINUS_ONE_HALF, psi11 );
378 bl1_zmult3( buff_m1h, alpha11, &psi11 );
379
380 // FLA_Inv_scal_external( beta11, a12t );
381 bl1_zinvscalv( BLIS1_NO_CONJUGATE,
382 m_ahead,
383 beta11,
384 a12t, cs_A );
385
386 // FLA_Axpy_external( psi11, b12t, a12t );
387 bl1_zaxpyv( BLIS1_NO_CONJUGATE,
388 m_ahead,
389 &psi11,
390 b12t, cs_B,
391 a12t, cs_A );
392
393 // FLA_Her2c_external( FLA_UPPER_TRIANGULAR, FLA_CONJUGATE,
394 // FLA_MINUS_ONE, a12t, b12t, A22 );
395 bl1_zher2( BLIS1_UPPER_TRIANGULAR,
396 BLIS1_CONJUGATE,
397 m_ahead,
398 buff_m1,
399 a12t, cs_A,
400 b12t, cs_B,
401 A22, rs_A, cs_A );
402
403 // FLA_Axpy_external( psi11, b12t, a12t );
404 bl1_zaxpyv( BLIS1_NO_CONJUGATE,
405 m_ahead,
406 &psi11,
407 b12t, cs_B,
408 a12t, cs_A );
409
410 // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG,
411 // B22, a12t );
412 bl1_ztrsv( BLIS1_UPPER_TRIANGULAR,
413 BLIS1_TRANSPOSE,
414 BLIS1_NONUNIT_DIAG,
415 m_ahead,
416 B22, rs_B, cs_B,
417 a12t, cs_A );
418
419 /*------------------------------------------------------------*/
420
421 }
422
423 return FLA_SUCCESS;
424 }
425
426