1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
FLA_Bsvd_francis_v_opt_var1(FLA_Obj shift,FLA_Obj g,FLA_Obj h,FLA_Obj d,FLA_Obj e)13 FLA_Error FLA_Bsvd_francis_v_opt_var1( FLA_Obj shift, FLA_Obj g, FLA_Obj h, FLA_Obj d, FLA_Obj e )
14 {
15 FLA_Datatype datatype;
16 int m_A;
17 int inc_g;
18 int inc_h;
19 int inc_d;
20 int inc_e;
21
22 datatype = FLA_Obj_datatype( d );
23
24 m_A = FLA_Obj_vector_dim( d );
25
26 inc_g = FLA_Obj_vector_inc( g );
27 inc_h = FLA_Obj_vector_inc( h );
28 inc_d = FLA_Obj_vector_inc( d );
29 inc_e = FLA_Obj_vector_inc( e );
30
31
32 switch ( datatype )
33 {
34 case FLA_FLOAT:
35 {
36 float* buff_shift = FLA_FLOAT_PTR( shift );
37 scomplex* buff_g = FLA_COMPLEX_PTR( g );
38 scomplex* buff_h = FLA_COMPLEX_PTR( h );
39 float* buff_d = FLA_FLOAT_PTR( d );
40 float* buff_e = FLA_FLOAT_PTR( e );
41
42 FLA_Bsvd_francis_v_ops_var1( m_A,
43 *buff_shift,
44 buff_g, inc_g,
45 buff_h, inc_h,
46 buff_d, inc_d,
47 buff_e, inc_e );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_shift = FLA_DOUBLE_PTR( shift );
55 dcomplex* buff_g = FLA_DOUBLE_COMPLEX_PTR( g );
56 dcomplex* buff_h = FLA_DOUBLE_COMPLEX_PTR( h );
57 double* buff_d = FLA_DOUBLE_PTR( d );
58 double* buff_e = FLA_DOUBLE_PTR( e );
59
60 FLA_Bsvd_francis_v_opd_var1( m_A,
61 *buff_shift,
62 buff_g, inc_g,
63 buff_h, inc_h,
64 buff_d, inc_d,
65 buff_e, inc_e );
66
67 break;
68 }
69 }
70
71 return FLA_SUCCESS;
72 }
73
74
75
FLA_Bsvd_francis_v_ops_var1(int m_A,float shift,scomplex * buff_g,int inc_g,scomplex * buff_h,int inc_h,float * buff_d,int inc_d,float * buff_e,int inc_e)76 FLA_Error FLA_Bsvd_francis_v_ops_var1( int m_A,
77 float shift,
78 scomplex* buff_g, int inc_g,
79 scomplex* buff_h, int inc_h,
80 float* buff_d, int inc_d,
81 float* buff_e, int inc_e )
82 {
83 float one = bl1_s1();
84 float bulge = 0.0F;
85 int i;
86
87 // If the shift is zero, perform a simplified Francis step.
88 if ( shift == 0.0F )
89 {
90 float cs, oldcs;
91 float sn, oldsn = 0.0F;
92 float r, temp;
93 float a11temp, a22temp;
94
95 float* d_last = buff_d + (m_A-1)*inc_d;
96 float* e_last = buff_e + (m_A-2)*inc_e;
97
98 cs = one;
99 oldcs = one;
100
101 for ( i = 0; i < m_A - 1; ++i )
102 {
103 float* alpha01 = buff_e + (i-1)*inc_e;
104 float* alpha11 = buff_d + (i )*inc_d;
105 float* alpha12 = buff_e + (i )*inc_e;
106 float* alpha22 = buff_d + (i+1)*inc_d;
107
108 float* gammaL = &(buff_g[(i )*inc_g].real);
109 float* sigmaL = &(buff_g[(i )*inc_g].imag);
110 float* gammaR = &(buff_h[(i )*inc_h].real);
111 float* sigmaR = &(buff_h[(i )*inc_h].imag);
112
113 a11temp = *alpha11 * cs;
114 MAC_Givens2_ops( &a11temp,
115 alpha12,
116 &cs,
117 &sn,
118 &r );
119
120 if ( i > 0 ) *alpha01 = oldsn * r;
121
122 a11temp = oldcs * r;
123 a22temp = *alpha22 * sn;
124 MAC_Givens2_ops( &a11temp,
125 &a22temp,
126 &oldcs,
127 &oldsn,
128 alpha11 );
129
130 *gammaR = cs;
131 *sigmaR = sn;
132 *gammaL = oldcs;
133 *sigmaL = oldsn;
134 }
135
136 temp = *d_last * cs;
137 *d_last = temp * oldcs;
138 *e_last = temp * oldsn;
139
140 return FLA_SUCCESS;
141 }
142
143
144 // Apply Givens rotations in forward order.
145 for ( i = 0; i < m_A - 1; ++i )
146 {
147 float* alpha01 = buff_e + (i-1)*inc_e;
148 float* alpha11 = buff_d + (i )*inc_d;
149 float* alpha21 = &bulge;
150 float* alpha02 = &bulge;
151 float* alpha12 = buff_e + (i )*inc_e;
152 float* alpha22 = buff_d + (i+1)*inc_d;
153 float* alpha13 = &bulge;
154 float* alpha23 = buff_e + (i+1)*inc_e;
155
156 float* gammaL = &(buff_g[(i )*inc_g].real);
157 float* sigmaL = &(buff_g[(i )*inc_g].imag);
158 float* gammaR = &(buff_h[(i )*inc_h].real);
159 float* sigmaR = &(buff_h[(i )*inc_h].imag);
160
161 float alpha01_new;
162 float alpha11_new;
163
164 int mn_ahead = m_A - i - 2;
165
166 /*------------------------------------------------------------*/
167
168 if ( i == 0 )
169 {
170 float alpha11_temp;
171 float alpha12_temp;
172
173 // Induce an iteration that introduces the bulge (from the right).
174 //alpha11_temp = *buff_d - shift;
175 alpha11_temp = ( fabsf( *buff_d ) - shift ) *
176 ( signof( one, *buff_d ) + ( shift / *buff_d ) );
177 alpha12_temp = *buff_e;
178
179 // Compute a Givens rotation that introduces the bulge.
180 MAC_Givens2_ops( &alpha11_temp,
181 &alpha12_temp,
182 gammaR,
183 sigmaR,
184 &alpha11_new );
185
186 // Apply the bulge-introducting Givens rotation (from the right)
187 // to the top-left 2x2 matrix.
188 MAC_Apply_G_2x2_ops( gammaR,
189 sigmaR,
190 alpha11,
191 alpha21,
192 alpha12,
193 alpha22 );
194 }
195 else
196 {
197 // Compute a new Givens rotation to push the bulge (from the
198 // right).
199 MAC_Givens2_ops( alpha01,
200 alpha02,
201 gammaR,
202 sigmaR,
203 &alpha01_new );
204
205 // Apply the Givens rotation (from the right) to the 1x2 vector
206 // from which it was computed, which annihilates alpha02.
207 *alpha01 = alpha01_new;
208 *alpha02 = 0.0F;
209
210 // Apply the Givens rotation to the 2x2 matrix below the 1x2
211 // vector that was just modified.
212 MAC_Apply_G_2x2_ops( gammaR,
213 sigmaR,
214 alpha11,
215 alpha21,
216 alpha12,
217 alpha22 );
218 }
219
220 // Compute a new Givens rotation to push the bulge (from the left).
221 MAC_Givens2_ops( alpha11,
222 alpha21,
223 gammaL,
224 sigmaL,
225 &alpha11_new );
226
227 // Apply the Givens rotation (from the left) to the 2x1 vector
228 // from which it was computed, which annihilates alpha11.
229 *alpha11 = alpha11_new;
230 *alpha21 = 0.0F;
231
232 if ( mn_ahead > 0 )
233 {
234 // Apply the Givens rotation (from the left) to the 2x2 submatrix
235 // at alpha12.
236 MAC_Apply_GT_2x2_ops( gammaL,
237 sigmaL,
238 alpha12,
239 alpha22,
240 alpha13,
241 alpha23 );
242 }
243 else
244 {
245 // Apply the Givens rotation (from the left) to the last 2x1 vector
246 // at alpha12.
247 MAC_Apply_GT_2x1_ops( gammaL,
248 sigmaL,
249 alpha12,
250 alpha22 );
251 }
252
253 /*------------------------------------------------------------*/
254 }
255
256 return FLA_SUCCESS;
257 }
258
259
260
FLA_Bsvd_francis_v_opd_var1(int m_A,double shift,dcomplex * buff_g,int inc_g,dcomplex * buff_h,int inc_h,double * buff_d,int inc_d,double * buff_e,int inc_e)261 FLA_Error FLA_Bsvd_francis_v_opd_var1( int m_A,
262 double shift,
263 dcomplex* buff_g, int inc_g,
264 dcomplex* buff_h, int inc_h,
265 double* buff_d, int inc_d,
266 double* buff_e, int inc_e )
267 {
268 double one = bl1_d1();
269 double bulge = 0.0;
270 int i;
271
272 // If the shift is zero, perform a simplified Francis step.
273 if ( shift == 0.0 )
274 {
275 double cs, oldcs;
276 double sn, oldsn = 0;
277 double r, temp;
278 double a11temp, a22temp;
279
280 double* d_last = buff_d + (m_A-1)*inc_d;
281 double* e_last = buff_e + (m_A-2)*inc_e;
282
283 cs = one;
284 oldcs = one;
285
286 for ( i = 0; i < m_A - 1; ++i )
287 {
288 double* alpha01 = buff_e + (i-1)*inc_e;
289 double* alpha11 = buff_d + (i )*inc_d;
290 double* alpha12 = buff_e + (i )*inc_e;
291 double* alpha22 = buff_d + (i+1)*inc_d;
292
293 double* gammaL = &(buff_g[(i )*inc_g].real);
294 double* sigmaL = &(buff_g[(i )*inc_g].imag);
295 double* gammaR = &(buff_h[(i )*inc_h].real);
296 double* sigmaR = &(buff_h[(i )*inc_h].imag);
297
298 a11temp = *alpha11 * cs;
299 MAC_Givens2_opd( &a11temp,
300 alpha12,
301 &cs,
302 &sn,
303 &r );
304
305 if ( i > 0 ) *alpha01 = oldsn * r;
306
307 a11temp = oldcs * r;
308 a22temp = *alpha22 * sn;
309 MAC_Givens2_opd( &a11temp,
310 &a22temp,
311 &oldcs,
312 &oldsn,
313 alpha11 );
314
315 *gammaR = cs;
316 *sigmaR = sn;
317 *gammaL = oldcs;
318 *sigmaL = oldsn;
319 }
320
321 temp = *d_last * cs;
322 *d_last = temp * oldcs;
323 *e_last = temp * oldsn;
324
325 return FLA_SUCCESS;
326 }
327
328
329 // Apply Givens rotations in forward order.
330 for ( i = 0; i < m_A - 1; ++i )
331 {
332 double* alpha01 = buff_e + (i-1)*inc_e;
333 double* alpha11 = buff_d + (i )*inc_d;
334 double* alpha21 = &bulge;
335 double* alpha02 = &bulge;
336 double* alpha12 = buff_e + (i )*inc_e;
337 double* alpha22 = buff_d + (i+1)*inc_d;
338 double* alpha13 = &bulge;
339 double* alpha23 = buff_e + (i+1)*inc_e;
340
341 double* gammaL = &(buff_g[(i )*inc_g].real);
342 double* sigmaL = &(buff_g[(i )*inc_g].imag);
343 double* gammaR = &(buff_h[(i )*inc_h].real);
344 double* sigmaR = &(buff_h[(i )*inc_h].imag);
345
346 double alpha01_new;
347 double alpha11_new;
348
349 int mn_ahead = m_A - i - 2;
350
351 /*------------------------------------------------------------*/
352
353 if ( i == 0 )
354 {
355 double alpha11_temp;
356 double alpha12_temp;
357
358 // Induce an iteration that introduces the bulge (from the right).
359 //alpha11_temp = *buff_d - shift;
360 alpha11_temp = ( fabs( *buff_d ) - shift ) *
361 ( signof( one, *buff_d ) + ( shift / *buff_d ) );
362 alpha12_temp = *buff_e;
363
364 // Compute a Givens rotation that introduces the bulge.
365 MAC_Givens2_opd( &alpha11_temp,
366 &alpha12_temp,
367 gammaR,
368 sigmaR,
369 &alpha11_new );
370
371 // Apply the bulge-introducting Givens rotation (from the right)
372 // to the top-left 2x2 matrix.
373 MAC_Apply_G_2x2_opd( gammaR,
374 sigmaR,
375 alpha11,
376 alpha21,
377 alpha12,
378 alpha22 );
379 }
380 else
381 {
382 // Compute a new Givens rotation to push the bulge (from the
383 // right).
384 MAC_Givens2_opd( alpha01,
385 alpha02,
386 gammaR,
387 sigmaR,
388 &alpha01_new );
389
390 // Apply the Givens rotation (from the right) to the 1x2 vector
391 // from which it was computed, which annihilates alpha02.
392 *alpha01 = alpha01_new;
393 *alpha02 = 0.0;
394
395 // Apply the Givens rotation to the 2x2 matrix below the 1x2
396 // vector that was just modified.
397 MAC_Apply_G_2x2_opd( gammaR,
398 sigmaR,
399 alpha11,
400 alpha21,
401 alpha12,
402 alpha22 );
403 }
404
405 // Compute a new Givens rotation to push the bulge (from the left).
406 MAC_Givens2_opd( alpha11,
407 alpha21,
408 gammaL,
409 sigmaL,
410 &alpha11_new );
411
412 // Apply the Givens rotation (from the left) to the 2x1 vector
413 // from which it was computed, which annihilates alpha11.
414 *alpha11 = alpha11_new;
415 *alpha21 = 0.0;
416
417 if ( mn_ahead > 0 )
418 {
419 // Apply the Givens rotation (from the left) to the 2x2 submatrix
420 // at alpha12.
421 MAC_Apply_GT_2x2_opd( gammaL,
422 sigmaL,
423 alpha12,
424 alpha22,
425 alpha13,
426 alpha23 );
427 }
428 else
429 {
430 // Apply the Givens rotation (from the left) to the last 2x1 vector
431 // at alpha12.
432 MAC_Apply_GT_2x1_opd( gammaL,
433 sigmaL,
434 alpha12,
435 alpha22 );
436 }
437
438 /*------------------------------------------------------------*/
439 }
440
441 return FLA_SUCCESS;
442 }
443
444