1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "FLAME.h"
12 
FLA_Bsvd_francis_v_opt_var1(FLA_Obj shift,FLA_Obj g,FLA_Obj h,FLA_Obj d,FLA_Obj e)13 FLA_Error FLA_Bsvd_francis_v_opt_var1( FLA_Obj shift, FLA_Obj g, FLA_Obj h, FLA_Obj d, FLA_Obj e )
14 {
15     FLA_Datatype datatype;
16     int          m_A;
17     int          inc_g;
18     int          inc_h;
19     int          inc_d;
20     int          inc_e;
21 
22     datatype = FLA_Obj_datatype( d );
23 
24     m_A      = FLA_Obj_vector_dim( d );
25 
26     inc_g    = FLA_Obj_vector_inc( g );
27     inc_h    = FLA_Obj_vector_inc( h );
28     inc_d    = FLA_Obj_vector_inc( d );
29     inc_e    = FLA_Obj_vector_inc( e );
30 
31 
32     switch ( datatype )
33     {
34     case FLA_FLOAT:
35     {
36         float*    buff_shift  = FLA_FLOAT_PTR( shift );
37         scomplex* buff_g      = FLA_COMPLEX_PTR( g );
38         scomplex* buff_h      = FLA_COMPLEX_PTR( h );
39         float*    buff_d      = FLA_FLOAT_PTR( d );
40         float*    buff_e      = FLA_FLOAT_PTR( e );
41 
42         FLA_Bsvd_francis_v_ops_var1( m_A,
43                                      *buff_shift,
44                                      buff_g, inc_g,
45                                      buff_h, inc_h,
46                                      buff_d, inc_d,
47                                      buff_e, inc_e );
48 
49         break;
50     }
51 
52     case FLA_DOUBLE:
53     {
54         double*   buff_shift  = FLA_DOUBLE_PTR( shift );
55         dcomplex* buff_g      = FLA_DOUBLE_COMPLEX_PTR( g );
56         dcomplex* buff_h      = FLA_DOUBLE_COMPLEX_PTR( h );
57         double*   buff_d      = FLA_DOUBLE_PTR( d );
58         double*   buff_e      = FLA_DOUBLE_PTR( e );
59 
60         FLA_Bsvd_francis_v_opd_var1( m_A,
61                                      *buff_shift,
62                                      buff_g, inc_g,
63                                      buff_h, inc_h,
64                                      buff_d, inc_d,
65                                      buff_e, inc_e );
66 
67         break;
68     }
69     }
70 
71     return FLA_SUCCESS;
72 }
73 
74 
75 
FLA_Bsvd_francis_v_ops_var1(int m_A,float shift,scomplex * buff_g,int inc_g,scomplex * buff_h,int inc_h,float * buff_d,int inc_d,float * buff_e,int inc_e)76 FLA_Error FLA_Bsvd_francis_v_ops_var1( int       m_A,
77                                        float     shift,
78                                        scomplex* buff_g, int inc_g,
79                                        scomplex* buff_h, int inc_h,
80                                        float*    buff_d, int inc_d,
81                                        float*    buff_e, int inc_e )
82 {
83     float     one   = bl1_s1();
84     float     bulge = 0.0F;
85     int       i;
86 
87     // If the shift is zero, perform a simplified Francis step.
88     if ( shift == 0.0F )
89     {
90         float cs, oldcs;
91         float sn, oldsn = 0.0F;
92         float r, temp;
93         float a11temp, a22temp;
94 
95         float* d_last = buff_d + (m_A-1)*inc_d;
96         float* e_last = buff_e + (m_A-2)*inc_e;
97 
98         cs    = one;
99         oldcs = one;
100 
101         for ( i = 0; i < m_A - 1; ++i )
102         {
103             float*   alpha01  = buff_e + (i-1)*inc_e;
104             float*   alpha11  = buff_d + (i  )*inc_d;
105             float*   alpha12  = buff_e + (i  )*inc_e;
106             float*   alpha22  = buff_d + (i+1)*inc_d;
107 
108             float*   gammaL   = &(buff_g[(i  )*inc_g].real);
109             float*   sigmaL   = &(buff_g[(i  )*inc_g].imag);
110             float*   gammaR   = &(buff_h[(i  )*inc_h].real);
111             float*   sigmaR   = &(buff_h[(i  )*inc_h].imag);
112 
113             a11temp = *alpha11 * cs;
114             MAC_Givens2_ops( &a11temp,
115                              alpha12,
116                              &cs,
117                              &sn,
118                              &r );
119 
120             if ( i > 0 ) *alpha01 = oldsn * r;
121 
122             a11temp = oldcs * r;
123             a22temp = *alpha22 * sn;
124             MAC_Givens2_ops( &a11temp,
125                              &a22temp,
126                              &oldcs,
127                              &oldsn,
128                              alpha11 );
129 
130             *gammaR = cs;
131             *sigmaR = sn;
132             *gammaL = oldcs;
133             *sigmaL = oldsn;
134         }
135 
136         temp    = *d_last * cs;
137         *d_last = temp * oldcs;
138         *e_last = temp * oldsn;
139 
140         return FLA_SUCCESS;
141     }
142 
143 
144     // Apply Givens rotations in forward order.
145     for ( i = 0; i < m_A - 1; ++i )
146     {
147         float*    alpha01  = buff_e + (i-1)*inc_e;
148         float*    alpha11  = buff_d + (i  )*inc_d;
149         float*    alpha21  = &bulge;
150         float*    alpha02  = &bulge;
151         float*    alpha12  = buff_e + (i  )*inc_e;
152         float*    alpha22  = buff_d + (i+1)*inc_d;
153         float*    alpha13  = &bulge;
154         float*    alpha23  = buff_e + (i+1)*inc_e;
155 
156         float*    gammaL   = &(buff_g[(i  )*inc_g].real);
157         float*    sigmaL   = &(buff_g[(i  )*inc_g].imag);
158         float*    gammaR   = &(buff_h[(i  )*inc_h].real);
159         float*    sigmaR   = &(buff_h[(i  )*inc_h].imag);
160 
161         float     alpha01_new;
162         float     alpha11_new;
163 
164         int       mn_ahead  = m_A - i - 2;
165 
166         /*------------------------------------------------------------*/
167 
168         if ( i == 0 )
169         {
170             float alpha11_temp;
171             float alpha12_temp;
172 
173             // Induce an iteration that introduces the bulge (from the right).
174             //alpha11_temp = *buff_d - shift;
175             alpha11_temp = ( fabsf( *buff_d ) - shift ) *
176                            ( signof( one, *buff_d ) + ( shift / *buff_d ) );
177             alpha12_temp = *buff_e;
178 
179             // Compute a Givens rotation that introduces the bulge.
180             MAC_Givens2_ops( &alpha11_temp,
181                              &alpha12_temp,
182                              gammaR,
183                              sigmaR,
184                              &alpha11_new );
185 
186             // Apply the bulge-introducting Givens rotation (from the right)
187             // to the top-left 2x2 matrix.
188             MAC_Apply_G_2x2_ops( gammaR,
189                                  sigmaR,
190                                  alpha11,
191                                  alpha21,
192                                  alpha12,
193                                  alpha22 );
194         }
195         else
196         {
197             // Compute a new Givens rotation to push the bulge (from the
198             // right).
199             MAC_Givens2_ops( alpha01,
200                              alpha02,
201                              gammaR,
202                              sigmaR,
203                              &alpha01_new );
204 
205             // Apply the Givens rotation (from the right) to the 1x2 vector
206             // from which it was computed, which annihilates alpha02.
207             *alpha01 = alpha01_new;
208             *alpha02 = 0.0F;
209 
210             // Apply the Givens rotation to the 2x2 matrix below the 1x2
211             // vector that was just modified.
212             MAC_Apply_G_2x2_ops( gammaR,
213                                  sigmaR,
214                                  alpha11,
215                                  alpha21,
216                                  alpha12,
217                                  alpha22 );
218         }
219 
220         // Compute a new Givens rotation to push the bulge (from the left).
221         MAC_Givens2_ops( alpha11,
222                          alpha21,
223                          gammaL,
224                          sigmaL,
225                          &alpha11_new );
226 
227         // Apply the Givens rotation (from the left) to the 2x1 vector
228         // from which it was computed, which annihilates alpha11.
229         *alpha11 = alpha11_new;
230         *alpha21 = 0.0F;
231 
232         if ( mn_ahead > 0 )
233         {
234             // Apply the Givens rotation (from the left) to the 2x2 submatrix
235             // at alpha12.
236             MAC_Apply_GT_2x2_ops( gammaL,
237                                   sigmaL,
238                                   alpha12,
239                                   alpha22,
240                                   alpha13,
241                                   alpha23 );
242         }
243         else
244         {
245             // Apply the Givens rotation (from the left) to the last 2x1 vector
246             // at alpha12.
247             MAC_Apply_GT_2x1_ops( gammaL,
248                                   sigmaL,
249                                   alpha12,
250                                   alpha22 );
251         }
252 
253         /*------------------------------------------------------------*/
254     }
255 
256     return FLA_SUCCESS;
257 }
258 
259 
260 
FLA_Bsvd_francis_v_opd_var1(int m_A,double shift,dcomplex * buff_g,int inc_g,dcomplex * buff_h,int inc_h,double * buff_d,int inc_d,double * buff_e,int inc_e)261 FLA_Error FLA_Bsvd_francis_v_opd_var1( int       m_A,
262                                        double    shift,
263                                        dcomplex* buff_g, int inc_g,
264                                        dcomplex* buff_h, int inc_h,
265                                        double*   buff_d, int inc_d,
266                                        double*   buff_e, int inc_e )
267 {
268     double    one   = bl1_d1();
269     double    bulge = 0.0;
270     int       i;
271 
272     // If the shift is zero, perform a simplified Francis step.
273     if ( shift == 0.0 )
274     {
275         double cs, oldcs;
276         double sn, oldsn = 0;
277         double r, temp;
278         double a11temp, a22temp;
279 
280         double* d_last = buff_d + (m_A-1)*inc_d;
281         double* e_last = buff_e + (m_A-2)*inc_e;
282 
283         cs    = one;
284         oldcs = one;
285 
286         for ( i = 0; i < m_A - 1; ++i )
287         {
288             double*   alpha01  = buff_e + (i-1)*inc_e;
289             double*   alpha11  = buff_d + (i  )*inc_d;
290             double*   alpha12  = buff_e + (i  )*inc_e;
291             double*   alpha22  = buff_d + (i+1)*inc_d;
292 
293             double*   gammaL   = &(buff_g[(i  )*inc_g].real);
294             double*   sigmaL   = &(buff_g[(i  )*inc_g].imag);
295             double*   gammaR   = &(buff_h[(i  )*inc_h].real);
296             double*   sigmaR   = &(buff_h[(i  )*inc_h].imag);
297 
298             a11temp = *alpha11 * cs;
299             MAC_Givens2_opd( &a11temp,
300                              alpha12,
301                              &cs,
302                              &sn,
303                              &r );
304 
305             if ( i > 0 ) *alpha01 = oldsn * r;
306 
307             a11temp = oldcs * r;
308             a22temp = *alpha22 * sn;
309             MAC_Givens2_opd( &a11temp,
310                              &a22temp,
311                              &oldcs,
312                              &oldsn,
313                              alpha11 );
314 
315             *gammaR = cs;
316             *sigmaR = sn;
317             *gammaL = oldcs;
318             *sigmaL = oldsn;
319         }
320 
321         temp    = *d_last * cs;
322         *d_last = temp * oldcs;
323         *e_last = temp * oldsn;
324 
325         return FLA_SUCCESS;
326     }
327 
328 
329     // Apply Givens rotations in forward order.
330     for ( i = 0; i < m_A - 1; ++i )
331     {
332         double*   alpha01  = buff_e + (i-1)*inc_e;
333         double*   alpha11  = buff_d + (i  )*inc_d;
334         double*   alpha21  = &bulge;
335         double*   alpha02  = &bulge;
336         double*   alpha12  = buff_e + (i  )*inc_e;
337         double*   alpha22  = buff_d + (i+1)*inc_d;
338         double*   alpha13  = &bulge;
339         double*   alpha23  = buff_e + (i+1)*inc_e;
340 
341         double*   gammaL   = &(buff_g[(i  )*inc_g].real);
342         double*   sigmaL   = &(buff_g[(i  )*inc_g].imag);
343         double*   gammaR   = &(buff_h[(i  )*inc_h].real);
344         double*   sigmaR   = &(buff_h[(i  )*inc_h].imag);
345 
346         double    alpha01_new;
347         double    alpha11_new;
348 
349         int       mn_ahead  = m_A - i - 2;
350 
351         /*------------------------------------------------------------*/
352 
353         if ( i == 0 )
354         {
355             double alpha11_temp;
356             double alpha12_temp;
357 
358             // Induce an iteration that introduces the bulge (from the right).
359             //alpha11_temp = *buff_d - shift;
360             alpha11_temp = ( fabs( *buff_d ) - shift ) *
361                            ( signof( one, *buff_d ) + ( shift / *buff_d ) );
362             alpha12_temp = *buff_e;
363 
364             // Compute a Givens rotation that introduces the bulge.
365             MAC_Givens2_opd( &alpha11_temp,
366                              &alpha12_temp,
367                              gammaR,
368                              sigmaR,
369                              &alpha11_new );
370 
371             // Apply the bulge-introducting Givens rotation (from the right)
372             // to the top-left 2x2 matrix.
373             MAC_Apply_G_2x2_opd( gammaR,
374                                  sigmaR,
375                                  alpha11,
376                                  alpha21,
377                                  alpha12,
378                                  alpha22 );
379         }
380         else
381         {
382             // Compute a new Givens rotation to push the bulge (from the
383             // right).
384             MAC_Givens2_opd( alpha01,
385                              alpha02,
386                              gammaR,
387                              sigmaR,
388                              &alpha01_new );
389 
390             // Apply the Givens rotation (from the right) to the 1x2 vector
391             // from which it was computed, which annihilates alpha02.
392             *alpha01 = alpha01_new;
393             *alpha02 = 0.0;
394 
395             // Apply the Givens rotation to the 2x2 matrix below the 1x2
396             // vector that was just modified.
397             MAC_Apply_G_2x2_opd( gammaR,
398                                  sigmaR,
399                                  alpha11,
400                                  alpha21,
401                                  alpha12,
402                                  alpha22 );
403         }
404 
405         // Compute a new Givens rotation to push the bulge (from the left).
406         MAC_Givens2_opd( alpha11,
407                          alpha21,
408                          gammaL,
409                          sigmaL,
410                          &alpha11_new );
411 
412         // Apply the Givens rotation (from the left) to the 2x1 vector
413         // from which it was computed, which annihilates alpha11.
414         *alpha11 = alpha11_new;
415         *alpha21 = 0.0;
416 
417         if ( mn_ahead > 0 )
418         {
419             // Apply the Givens rotation (from the left) to the 2x2 submatrix
420             // at alpha12.
421             MAC_Apply_GT_2x2_opd( gammaL,
422                                   sigmaL,
423                                   alpha12,
424                                   alpha22,
425                                   alpha13,
426                                   alpha23 );
427         }
428         else
429         {
430             // Apply the Givens rotation (from the left) to the last 2x1 vector
431             // at alpha12.
432             MAC_Apply_GT_2x1_opd( gammaL,
433                                   sigmaL,
434                                   alpha12,
435                                   alpha22 );
436         }
437 
438         /*------------------------------------------------------------*/
439     }
440 
441     return FLA_SUCCESS;
442 }
443 
444