1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "FLAME.h"
12 
FLA_Eig_gest_iu_opt_var5(FLA_Obj A,FLA_Obj Y,FLA_Obj B)13 FLA_Error FLA_Eig_gest_iu_opt_var5( FLA_Obj A, FLA_Obj Y, FLA_Obj B )
14 {
15   FLA_Datatype datatype;
16   int          m_AB;
17   int          rs_A, cs_A;
18   int          rs_B, cs_B;
19   int          inc_y;
20   FLA_Obj      yL, yR;
21 
22   datatype = FLA_Obj_datatype( A );
23 
24   m_AB     = FLA_Obj_length( A );
25 
26   rs_A     = FLA_Obj_row_stride( A );
27   cs_A     = FLA_Obj_col_stride( A );
28 
29   rs_B     = FLA_Obj_row_stride( B );
30   cs_B     = FLA_Obj_col_stride( B );
31 
32   FLA_Part_1x2( Y,    &yL, &yR,     1, FLA_LEFT );
33 
34   inc_y    = FLA_Obj_vector_inc( yL );
35 
36   switch ( datatype )
37   {
38     case FLA_FLOAT:
39     {
40       float* buff_A = FLA_FLOAT_PTR( A );
41       float* buff_y = FLA_FLOAT_PTR( yL );
42       float* buff_B = FLA_FLOAT_PTR( B );
43 
44       FLA_Eig_gest_iu_ops_var5( m_AB,
45                                 buff_A, rs_A, cs_A,
46                                 buff_y, inc_y,
47                                 buff_B, rs_B, cs_B );
48 
49       break;
50     }
51 
52     case FLA_DOUBLE:
53     {
54       double* buff_A = FLA_DOUBLE_PTR( A );
55       double* buff_y = FLA_DOUBLE_PTR( yL );
56       double* buff_B = FLA_DOUBLE_PTR( B );
57 
58       FLA_Eig_gest_iu_opd_var5( m_AB,
59                                 buff_A, rs_A, cs_A,
60                                 buff_y, inc_y,
61                                 buff_B, rs_B, cs_B );
62 
63       break;
64     }
65 
66     case FLA_COMPLEX:
67     {
68       scomplex* buff_A = FLA_COMPLEX_PTR( A );
69       scomplex* buff_y = FLA_COMPLEX_PTR( yL );
70       scomplex* buff_B = FLA_COMPLEX_PTR( B );
71 
72       FLA_Eig_gest_iu_opc_var5( m_AB,
73                                 buff_A, rs_A, cs_A,
74                                 buff_y, inc_y,
75                                 buff_B, rs_B, cs_B );
76 
77       break;
78     }
79 
80     case FLA_DOUBLE_COMPLEX:
81     {
82       dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
83       dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( yL );
84       dcomplex* buff_B = FLA_DOUBLE_COMPLEX_PTR( B );
85 
86       FLA_Eig_gest_iu_opz_var5( m_AB,
87                                 buff_A, rs_A, cs_A,
88                                 buff_y, inc_y,
89                                 buff_B, rs_B, cs_B );
90 
91       break;
92     }
93   }
94 
95   return FLA_SUCCESS;
96 }
97 
98 
99 
FLA_Eig_gest_iu_ops_var5(int m_AB,float * buff_A,int rs_A,int cs_A,float * buff_y,int inc_y,float * buff_B,int rs_B,int cs_B)100 FLA_Error FLA_Eig_gest_iu_ops_var5( int m_AB,
101                                     float* buff_A, int rs_A, int cs_A,
102                                     float* buff_y, int inc_y,
103                                     float* buff_B, int rs_B, int cs_B )
104 {
105   float*    buff_m1  = FLA_FLOAT_PTR( FLA_MINUS_ONE );
106   float*    buff_m1h = FLA_FLOAT_PTR( FLA_MINUS_ONE_HALF );
107   int       i;
108 
109   for ( i = 0; i < m_AB; ++i )
110   {
111     float*    alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
112     float*    a12t     = buff_A + (i+1)*cs_A + (i  )*rs_A;
113     float*    A22      = buff_A + (i+1)*cs_A + (i+1)*rs_A;
114 
115     float*    beta11   = buff_B + (i  )*cs_B + (i  )*rs_B;
116     float*    b12t     = buff_B + (i+1)*cs_B + (i  )*rs_B;
117     float*    B22      = buff_B + (i+1)*cs_B + (i+1)*rs_B;
118 
119     float     psi11;
120 
121     int       m_ahead  = m_AB - i - 1;
122 
123     /*------------------------------------------------------------*/
124 
125     // FLA_Inv_scal_external( beta11, alpha11 );
126     // FLA_Inv_scal_external( beta11, alpha11 );
127     bl1_sinvscals( beta11, alpha11 );
128     bl1_sinvscals( beta11, alpha11 );
129 
130     // FLA_Copy_external( alpha11, psi11 );
131     // FLA_Scal_external( FLA_MINUS_ONE_HALF, psi11 );
132     bl1_smult3( buff_m1h, alpha11, &psi11 );
133 
134     // FLA_Inv_scal_external( beta11, a12t );
135     bl1_sinvscalv( BLIS1_NO_CONJUGATE,
136                    m_ahead,
137                    beta11,
138                    a12t, cs_A );
139 
140     // FLA_Axpy_external( psi11, b12t, a12t );
141     bl1_saxpyv( BLIS1_NO_CONJUGATE,
142                 m_ahead,
143                 &psi11,
144                 b12t, cs_B,
145                 a12t, cs_A );
146 
147     // FLA_Her2c_external( FLA_UPPER_TRIANGULAR, FLA_CONJUGATE,
148     //                     FLA_MINUS_ONE, a12t, b12t, A22 );
149     bl1_sher2( BLIS1_UPPER_TRIANGULAR,
150                BLIS1_CONJUGATE,
151                m_ahead,
152                buff_m1,
153                a12t, cs_A,
154                b12t, cs_B,
155                A22,  rs_A, cs_A );
156 
157     // FLA_Axpy_external( psi11, b12t, a12t );
158     bl1_saxpyv( BLIS1_NO_CONJUGATE,
159                 m_ahead,
160                 &psi11,
161                 b12t, cs_B,
162                 a12t, cs_A );
163 
164     // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG,
165     //                    B22, a12t );
166     bl1_strsv( BLIS1_UPPER_TRIANGULAR,
167                BLIS1_TRANSPOSE,
168                BLIS1_NONUNIT_DIAG,
169                m_ahead,
170                B22,  rs_B, cs_B,
171                a12t, cs_A );
172 
173     /*------------------------------------------------------------*/
174 
175   }
176 
177   return FLA_SUCCESS;
178 }
179 
180 
181 
FLA_Eig_gest_iu_opd_var5(int m_AB,double * buff_A,int rs_A,int cs_A,double * buff_y,int inc_y,double * buff_B,int rs_B,int cs_B)182 FLA_Error FLA_Eig_gest_iu_opd_var5( int m_AB,
183                                     double* buff_A, int rs_A, int cs_A,
184                                     double* buff_y, int inc_y,
185                                     double* buff_B, int rs_B, int cs_B )
186 {
187   double*   buff_m1  = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
188   double*   buff_m1h = FLA_DOUBLE_PTR( FLA_MINUS_ONE_HALF );
189   int       i;
190 
191   for ( i = 0; i < m_AB; ++i )
192   {
193     double*   alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
194     double*   a12t     = buff_A + (i+1)*cs_A + (i  )*rs_A;
195     double*   A22      = buff_A + (i+1)*cs_A + (i+1)*rs_A;
196 
197     double*   beta11   = buff_B + (i  )*cs_B + (i  )*rs_B;
198     double*   b12t     = buff_B + (i+1)*cs_B + (i  )*rs_B;
199     double*   B22      = buff_B + (i+1)*cs_B + (i+1)*rs_B;
200 
201     double    psi11;
202 
203     int       m_ahead  = m_AB - i - 1;
204 
205     /*------------------------------------------------------------*/
206 
207     // FLA_Inv_scal_external( beta11, alpha11 );
208     // FLA_Inv_scal_external( beta11, alpha11 );
209     bl1_dinvscals( beta11, alpha11 );
210     bl1_dinvscals( beta11, alpha11 );
211 
212     // FLA_Copy_external( alpha11, psi11 );
213     // FLA_Scal_external( FLA_MINUS_ONE_HALF, psi11 );
214     bl1_dmult3( buff_m1h, alpha11, &psi11 );
215 
216     // FLA_Inv_scal_external( beta11, a12t );
217     bl1_dinvscalv( BLIS1_NO_CONJUGATE,
218                    m_ahead,
219                    beta11,
220                    a12t, cs_A );
221 
222     // FLA_Axpy_external( psi11, b12t, a12t );
223     bl1_daxpyv( BLIS1_NO_CONJUGATE,
224                 m_ahead,
225                 &psi11,
226                 b12t, cs_B,
227                 a12t, cs_A );
228 
229     // FLA_Her2c_external( FLA_UPPER_TRIANGULAR, FLA_CONJUGATE,
230     //                     FLA_MINUS_ONE, a12t, b12t, A22 );
231     bl1_dher2( BLIS1_UPPER_TRIANGULAR,
232                BLIS1_CONJUGATE,
233                m_ahead,
234                buff_m1,
235                a12t, cs_A,
236                b12t, cs_B,
237                A22,  rs_A, cs_A );
238 
239     // FLA_Axpy_external( psi11, b12t, a12t );
240     bl1_daxpyv( BLIS1_NO_CONJUGATE,
241                 m_ahead,
242                 &psi11,
243                 b12t, cs_B,
244                 a12t, cs_A );
245 
246     // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG,
247     //                    B22, a12t );
248     bl1_dtrsv( BLIS1_UPPER_TRIANGULAR,
249                BLIS1_TRANSPOSE,
250                BLIS1_NONUNIT_DIAG,
251                m_ahead,
252                B22,  rs_B, cs_B,
253                a12t, cs_A );
254 
255     /*------------------------------------------------------------*/
256 
257   }
258 
259   return FLA_SUCCESS;
260 }
261 
262 
263 
FLA_Eig_gest_iu_opc_var5(int m_AB,scomplex * buff_A,int rs_A,int cs_A,scomplex * buff_y,int inc_y,scomplex * buff_B,int rs_B,int cs_B)264 FLA_Error FLA_Eig_gest_iu_opc_var5( int m_AB,
265                                     scomplex* buff_A, int rs_A, int cs_A,
266                                     scomplex* buff_y, int inc_y,
267                                     scomplex* buff_B, int rs_B, int cs_B )
268 {
269   scomplex* buff_m1  = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
270   scomplex* buff_m1h = FLA_COMPLEX_PTR( FLA_MINUS_ONE_HALF );
271   int       i;
272 
273   for ( i = 0; i < m_AB; ++i )
274   {
275     scomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
276     scomplex* a12t     = buff_A + (i+1)*cs_A + (i  )*rs_A;
277     scomplex* A22      = buff_A + (i+1)*cs_A + (i+1)*rs_A;
278 
279     scomplex* beta11   = buff_B + (i  )*cs_B + (i  )*rs_B;
280     scomplex* b12t     = buff_B + (i+1)*cs_B + (i  )*rs_B;
281     scomplex* B22      = buff_B + (i+1)*cs_B + (i+1)*rs_B;
282 
283     scomplex  psi11;
284 
285     int       m_ahead  = m_AB - i - 1;
286 
287     /*------------------------------------------------------------*/
288 
289     // FLA_Inv_scal_external( beta11, alpha11 );
290     // FLA_Inv_scal_external( beta11, alpha11 );
291     bl1_cinvscals( beta11, alpha11 );
292     bl1_cinvscals( beta11, alpha11 );
293 
294     // FLA_Copy_external( alpha11, psi11 );
295     // FLA_Scal_external( FLA_MINUS_ONE_HALF, psi11 );
296     bl1_cmult3( buff_m1h, alpha11, &psi11 );
297 
298     // FLA_Inv_scal_external( beta11, a12t );
299     bl1_cinvscalv( BLIS1_NO_CONJUGATE,
300                    m_ahead,
301                    beta11,
302                    a12t, cs_A );
303 
304     // FLA_Axpy_external( psi11, b12t, a12t );
305     bl1_caxpyv( BLIS1_NO_CONJUGATE,
306                 m_ahead,
307                 &psi11,
308                 b12t, cs_B,
309                 a12t, cs_A );
310 
311     // FLA_Her2c_external( FLA_UPPER_TRIANGULAR, FLA_CONJUGATE,
312     //                     FLA_MINUS_ONE, a12t, b12t, A22 );
313     bl1_cher2( BLIS1_UPPER_TRIANGULAR,
314                BLIS1_CONJUGATE,
315                m_ahead,
316                buff_m1,
317                a12t, cs_A,
318                b12t, cs_B,
319                A22,  rs_A, cs_A );
320 
321     // FLA_Axpy_external( psi11, b12t, a12t );
322     bl1_caxpyv( BLIS1_NO_CONJUGATE,
323                 m_ahead,
324                 &psi11,
325                 b12t, cs_B,
326                 a12t, cs_A );
327 
328     // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG,
329     //                    B22, a12t );
330     bl1_ctrsv( BLIS1_UPPER_TRIANGULAR,
331                BLIS1_TRANSPOSE,
332                BLIS1_NONUNIT_DIAG,
333                m_ahead,
334                B22,  rs_B, cs_B,
335                a12t, cs_A );
336 
337     /*------------------------------------------------------------*/
338 
339   }
340 
341   return FLA_SUCCESS;
342 }
343 
344 
345 
FLA_Eig_gest_iu_opz_var5(int m_AB,dcomplex * buff_A,int rs_A,int cs_A,dcomplex * buff_y,int inc_y,dcomplex * buff_B,int rs_B,int cs_B)346 FLA_Error FLA_Eig_gest_iu_opz_var5( int m_AB,
347                                     dcomplex* buff_A, int rs_A, int cs_A,
348                                     dcomplex* buff_y, int inc_y,
349                                     dcomplex* buff_B, int rs_B, int cs_B )
350 {
351   dcomplex* buff_m1  = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
352   dcomplex* buff_m1h = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE_HALF );
353   int       i;
354 
355   for ( i = 0; i < m_AB; ++i )
356   {
357     dcomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
358     dcomplex* a12t     = buff_A + (i+1)*cs_A + (i  )*rs_A;
359     dcomplex* A22      = buff_A + (i+1)*cs_A + (i+1)*rs_A;
360 
361     dcomplex* beta11   = buff_B + (i  )*cs_B + (i  )*rs_B;
362     dcomplex* b12t     = buff_B + (i+1)*cs_B + (i  )*rs_B;
363     dcomplex* B22      = buff_B + (i+1)*cs_B + (i+1)*rs_B;
364 
365     dcomplex  psi11;
366 
367     int       m_ahead  = m_AB - i - 1;
368 
369     /*------------------------------------------------------------*/
370 
371     // FLA_Inv_scal_external( beta11, alpha11 );
372     // FLA_Inv_scal_external( beta11, alpha11 );
373     bl1_zinvscals( beta11, alpha11 );
374     bl1_zinvscals( beta11, alpha11 );
375 
376     // FLA_Copy_external( alpha11, psi11 );
377     // FLA_Scal_external( FLA_MINUS_ONE_HALF, psi11 );
378     bl1_zmult3( buff_m1h, alpha11, &psi11 );
379 
380     // FLA_Inv_scal_external( beta11, a12t );
381     bl1_zinvscalv( BLIS1_NO_CONJUGATE,
382                    m_ahead,
383                    beta11,
384                    a12t, cs_A );
385 
386     // FLA_Axpy_external( psi11, b12t, a12t );
387     bl1_zaxpyv( BLIS1_NO_CONJUGATE,
388                 m_ahead,
389                 &psi11,
390                 b12t, cs_B,
391                 a12t, cs_A );
392 
393     // FLA_Her2c_external( FLA_UPPER_TRIANGULAR, FLA_CONJUGATE,
394     //                     FLA_MINUS_ONE, a12t, b12t, A22 );
395     bl1_zher2( BLIS1_UPPER_TRIANGULAR,
396                BLIS1_CONJUGATE,
397                m_ahead,
398                buff_m1,
399                a12t, cs_A,
400                b12t, cs_B,
401                A22,  rs_A, cs_A );
402 
403     // FLA_Axpy_external( psi11, b12t, a12t );
404     bl1_zaxpyv( BLIS1_NO_CONJUGATE,
405                 m_ahead,
406                 &psi11,
407                 b12t, cs_B,
408                 a12t, cs_A );
409 
410     // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG,
411     //                    B22, a12t );
412     bl1_ztrsv( BLIS1_UPPER_TRIANGULAR,
413                BLIS1_TRANSPOSE,
414                BLIS1_NONUNIT_DIAG,
415                m_ahead,
416                B22,  rs_B, cs_B,
417                a12t, cs_A );
418 
419     /*------------------------------------------------------------*/
420 
421   }
422 
423   return FLA_SUCCESS;
424 }
425 
426