1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
FLA_Tevd_v_opt_var2(dim_t n_iter_max,FLA_Obj d,FLA_Obj e,FLA_Obj G,FLA_Obj R,FLA_Obj W,FLA_Obj U,dim_t b_alg)13 FLA_Error FLA_Tevd_v_opt_var2( dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj R, FLA_Obj W, FLA_Obj U, dim_t b_alg )
14 {
15 FLA_Error r_val = FLA_SUCCESS;
16 FLA_Datatype datatype;
17 int m_A, m_U, n_G;
18 int inc_d;
19 int inc_e;
20 int rs_G, cs_G;
21 int rs_R, cs_R;
22 int rs_U, cs_U;
23 int rs_W, cs_W;
24
25 datatype = FLA_Obj_datatype( U );
26
27 m_A = FLA_Obj_vector_dim( d );
28 m_U = FLA_Obj_length( U );
29 n_G = FLA_Obj_width( G );
30
31 inc_d = FLA_Obj_vector_inc( d );
32 inc_e = FLA_Obj_vector_inc( e );
33
34 rs_G = FLA_Obj_row_stride( G );
35 cs_G = FLA_Obj_col_stride( G );
36
37 rs_R = FLA_Obj_row_stride( R );
38 cs_R = FLA_Obj_col_stride( R );
39
40 rs_W = FLA_Obj_row_stride( W );
41 cs_W = FLA_Obj_col_stride( W );
42
43 rs_U = FLA_Obj_row_stride( U );
44 cs_U = FLA_Obj_col_stride( U );
45
46
47 switch ( datatype )
48 {
49 case FLA_FLOAT:
50 {
51 float* buff_d = FLA_FLOAT_PTR( d );
52 float* buff_e = FLA_FLOAT_PTR( e );
53 scomplex* buff_G = FLA_COMPLEX_PTR( G );
54 float* buff_R = FLA_FLOAT_PTR( R );
55 float* buff_W = FLA_FLOAT_PTR( W );
56 float* buff_U = FLA_FLOAT_PTR( U );
57
58 r_val = FLA_Tevd_v_ops_var2( m_A,
59 m_U,
60 n_G,
61 n_iter_max,
62 buff_d, inc_d,
63 buff_e, inc_e,
64 buff_G, rs_G, cs_G,
65 buff_R, rs_R, cs_R,
66 buff_W, rs_W, cs_W,
67 buff_U, rs_U, cs_U,
68 b_alg );
69
70 break;
71 }
72
73 case FLA_DOUBLE:
74 {
75 double* buff_d = FLA_DOUBLE_PTR( d );
76 double* buff_e = FLA_DOUBLE_PTR( e );
77 dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
78 double* buff_R = FLA_DOUBLE_PTR( R );
79 double* buff_W = FLA_DOUBLE_PTR( W );
80 double* buff_U = FLA_DOUBLE_PTR( U );
81
82 r_val = FLA_Tevd_v_opd_var2( m_A,
83 m_U,
84 n_G,
85 n_iter_max,
86 buff_d, inc_d,
87 buff_e, inc_e,
88 buff_G, rs_G, cs_G,
89 buff_R, rs_R, cs_R,
90 buff_W, rs_W, cs_W,
91 buff_U, rs_U, cs_U,
92 b_alg );
93
94 break;
95 }
96
97 case FLA_COMPLEX:
98 {
99 float* buff_d = FLA_FLOAT_PTR( d );
100 float* buff_e = FLA_FLOAT_PTR( e );
101 scomplex* buff_G = FLA_COMPLEX_PTR( G );
102 float* buff_R = FLA_FLOAT_PTR( R );
103 scomplex* buff_W = FLA_COMPLEX_PTR( W );
104 scomplex* buff_U = FLA_COMPLEX_PTR( U );
105
106 r_val = FLA_Tevd_v_opc_var2( m_A,
107 m_U,
108 n_G,
109 n_iter_max,
110 buff_d, inc_d,
111 buff_e, inc_e,
112 buff_G, rs_G, cs_G,
113 buff_R, rs_R, cs_R,
114 buff_W, rs_W, cs_W,
115 buff_U, rs_U, cs_U,
116 b_alg );
117
118 break;
119 }
120
121 case FLA_DOUBLE_COMPLEX:
122 {
123 double* buff_d = FLA_DOUBLE_PTR( d );
124 double* buff_e = FLA_DOUBLE_PTR( e );
125 dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
126 double* buff_R = FLA_DOUBLE_PTR( R );
127 dcomplex* buff_W = FLA_DOUBLE_COMPLEX_PTR( W );
128 dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
129
130 r_val = FLA_Tevd_v_opz_var2( m_A,
131 m_U,
132 n_G,
133 n_iter_max,
134 buff_d, inc_d,
135 buff_e, inc_e,
136 buff_G, rs_G, cs_G,
137 buff_R, rs_R, cs_R,
138 buff_W, rs_W, cs_W,
139 buff_U, rs_U, cs_U,
140 b_alg );
141
142 break;
143 }
144 }
145
146 return r_val;
147 }
148
149
150
FLA_Tevd_v_ops_var2(int m_A,int m_U,int n_G,int n_iter_max,float * buff_d,int inc_d,float * buff_e,int inc_e,scomplex * buff_G,int rs_G,int cs_G,float * buff_R,int rs_R,int cs_R,float * buff_W,int rs_W,int cs_W,float * buff_U,int rs_U,int cs_U,int b_alg)151 FLA_Error FLA_Tevd_v_ops_var2( int m_A,
152 int m_U,
153 int n_G,
154 int n_iter_max,
155 float* buff_d, int inc_d,
156 float* buff_e, int inc_e,
157 scomplex* buff_G, int rs_G, int cs_G,
158 float* buff_R, int rs_R, int cs_R,
159 float* buff_W, int rs_W, int cs_W,
160 float* buff_U, int rs_U, int cs_U,
161 int b_alg )
162 {
163 FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
164
165 return FLA_SUCCESS;
166 }
167
168
169
FLA_Tevd_v_opd_var2(int m_A,int m_U,int n_G,int n_iter_max,double * buff_d,int inc_d,double * buff_e,int inc_e,dcomplex * buff_G,int rs_G,int cs_G,double * buff_R,int rs_R,int cs_R,double * buff_W,int rs_W,int cs_W,double * buff_U,int rs_U,int cs_U,int b_alg)170 FLA_Error FLA_Tevd_v_opd_var2( int m_A,
171 int m_U,
172 int n_G,
173 int n_iter_max,
174 double* buff_d, int inc_d,
175 double* buff_e, int inc_e,
176 dcomplex* buff_G, int rs_G, int cs_G,
177 double* buff_R, int rs_R, int cs_R,
178 double* buff_W, int rs_W, int cs_W,
179 double* buff_U, int rs_U, int cs_U,
180 int b_alg )
181 {
182 dcomplex one = bl1_z1();
183 double rone = bl1_d1();
184 double rzero = bl1_d0();
185
186 dcomplex* G;
187 double* d1;
188 double* e1;
189 int r_val;
190 int done;
191 int m_G_sweep_max;
192 int ij_begin;
193 int ijTL, ijBR;
194 int m_A11;
195 int n_iter_perf;
196 int n_U_apply;
197 int total_deflations;
198 int n_deflations;
199 int n_iter_prev;
200 int n_iter_perf_sweep_max;
201
202 // Initialize our completion flag.
203 done = FALSE;
204
205 // Initialize a counter that holds the maximum number of rows of G
206 // that we would need to initialize for the next sweep.
207 m_G_sweep_max = m_A - 1;
208
209 // Initialize a counter for the total number of iterations performed.
210 n_iter_prev = 0;
211
212 // Initialize R to identity.
213 bl1_dident( m_A,
214 buff_R, rs_R, cs_R );
215
216 // Iterate until the matrix has completely deflated.
217 for ( total_deflations = 0; done != TRUE; )
218 {
219
220 // Initialize G to contain only identity rotations.
221 bl1_zsetm( m_G_sweep_max,
222 n_G,
223 &one,
224 buff_G, rs_G, cs_G );
225
226 // Keep track of the maximum number of iterations performed in the
227 // current sweep. This is used when applying the sweep's Givens
228 // rotations.
229 n_iter_perf_sweep_max = 0;
230
231 // Perform a sweep: Move through the matrix and perform a tridiagonal
232 // EVD on each non-zero submatrix that is encountered. During the
233 // first time through, ijTL will be 0 and ijBR will be m_A - 1.
234 for ( ij_begin = 0; ij_begin < m_A; )
235 {
236
237 #ifdef PRINTF
238 if ( ij_begin == 0 )
239 printf( "FLA_Tevd_v_opd_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
240 #endif
241
242 // Search for the first submatrix along the diagonal that is
243 // bounded by zeroes (or endpoints of the matrix). If no
244 // submatrix is found (ie: if the entire subdiagonal is zero
245 // then FLA_FAILURE is returned. This function also inspects
246 // subdiagonal elements for proximity to zero. If a given
247 // element is close enough to zero, then it is deemed
248 // converged and manually set to zero.
249 r_val = FLA_Tevd_find_submatrix_opd( m_A,
250 ij_begin,
251 buff_d, inc_d,
252 buff_e, inc_e,
253 &ijTL,
254 &ijBR );
255
256 // Verify that a submatrix was found. If one was not found,
257 // then we are done with the current sweep. Furthermore, if
258 // a submatrix was not found AND we began our search at the
259 // beginning of the matrix (ie: ij_begin == 0), then the
260 // matrix has completely deflated and so we are done with
261 // Francis step iteration.
262 if ( r_val == FLA_FAILURE )
263 {
264 if ( ij_begin == 0 )
265 {
266 #ifdef PRINTF
267 printf( "FLA_Tevd_v_opd_var2: subdiagonal is completely zero.\n" );
268 printf( "FLA_Tevd_v_opd_var2: Francis iteration is done!\n" );
269 #endif
270 done = TRUE;
271 }
272
273 // Break out of the current sweep so we can apply the last
274 // remaining Givens rotations.
275 break;
276 }
277
278 // If we got this far, then:
279 // (a) ijTL refers to the index of the first non-zero
280 // subdiagonal along the diagonal, and
281 // (b) ijBR refers to either:
282 // - the first zero element that occurs after ijTL, or
283 // - the the last diagonal element.
284 // Note that ijTL and ijBR also correspond to the first and
285 // last diagonal elements of the submatrix of interest. Thus,
286 // we may compute the dimension of this submatrix as:
287 m_A11 = ijBR - ijTL + 1;
288
289 #ifdef PRINTF
290 printf( "FLA_Tevd_v_opd_var2: ij_begin = %d\n", ij_begin );
291 printf( "FLA_Tevd_v_opd_var2: ijTL = %d\n", ijTL );
292 printf( "FLA_Tevd_v_opd_var2: ijBR = %d\n", ijBR );
293 printf( "FLA_Tevd_v_opd_var2: m_A11 = %d\n", m_A11 );
294 #endif
295
296 // Adjust ij_begin, which gets us ready for the next subproblem, if
297 // there is one.
298 ij_begin = ijBR + 1;
299
300 // Index to the submatrices upon which we will operate.
301 d1 = buff_d + ijTL * inc_d;
302 e1 = buff_e + ijTL * inc_e;
303 G = buff_G + ijTL * rs_G;
304
305 // Search for a batch of eigenvalues, recursing on deflated
306 // subproblems whenever a split occurs. Iteration continues
307 // as long as
308 // (a) there is still matrix left to operate on, and
309 // (b) the number of iterations performed in this batch is
310 // less than n_G.
311 // If/when either of the two above conditions fails to hold,
312 // the function returns.
313 n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11,
314 n_G,
315 ijTL,
316 d1, inc_d,
317 e1, inc_e,
318 G, rs_G, cs_G,
319 &n_iter_perf );
320
321 // Record the number of deflations that we observed.
322 total_deflations += n_deflations;
323
324 // Update the maximum number of iterations performed in the
325 // current sweep.
326 n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
327
328 #ifdef PRINTF
329 printf( "FLA_Tevd_v_opd_var2: deflations observed = %d\n", n_deflations );
330 printf( "FLA_Tevd_v_opd_var2: total deflations observed = %d\n", total_deflations );
331 printf( "FLA_Tevd_v_opd_var2: num iterations = %d\n", n_iter_perf );
332 #endif
333
334 // Store the most recent value of ijBR in m_G_sweep_max.
335 // When the sweep is done, this value will contain the minimum
336 // number of rows of G we can apply and safely include all
337 // non-identity rotations that were computed during the
338 // eigenvalue searches.
339 m_G_sweep_max = ijBR;
340
341 // Make sure we haven't exceeded our maximum iteration count.
342 if ( n_iter_prev >= m_A * n_iter_max )
343 {
344 #ifdef PRINTF
345 printf( "FLA_Tevd_v_opd_var2: reached maximum total number of iterations: %d\n", n_iter_prev );
346 #endif
347 FLA_Abort();
348 //return FLA_FAILURE;
349 }
350 }
351
352 // The sweep is complete. Now we must apply the Givens rotations
353 // that were accumulated during the sweep.
354
355
356 // Recall that the number of columns of U to which we apply
357 // rotations is one more than the number of rotations.
358 n_U_apply = m_G_sweep_max + 1;
359
360 // Apply the Givens rotations that were computed as part of
361 // the previous batch of iterations.
362 //FLA_Apply_G_rf_bld_var8b( n_iter_perf_sweep_max,
363 //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
364 FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
365 //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
366 //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
367 m_U,
368 n_U_apply,
369 n_iter_prev,
370 buff_G, rs_G, cs_G,
371 buff_R, rs_R, cs_R,
372 b_alg );
373
374 #ifdef PRINTF
375 printf( "FLA_Tevd_v_opd_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
376 #endif
377
378 // Increment the total number of iterations previously performed.
379 n_iter_prev += n_iter_perf_sweep_max;
380 }
381
382 // Copy the contents of Q to temporary storage.
383 bl1_dcopymt( BLIS1_NO_TRANSPOSE,
384 m_A,
385 m_A,
386 buff_U, rs_U, cs_U,
387 buff_W, rs_W, cs_W );
388
389
390 // Multiply Q by R, overwriting U.
391 bl1_dgemm( BLIS1_NO_TRANSPOSE,
392 BLIS1_NO_TRANSPOSE,
393 m_A,
394 m_A,
395 m_A,
396 &rone,
397 ( double* )buff_W, rs_W, cs_W,
398 buff_R, rs_R, cs_R,
399 &rzero,
400 ( double* )buff_U, rs_U, cs_U );
401
402 return n_iter_prev;
403 }
404
FLA_Tevd_v_opc_var2(int m_A,int m_U,int n_G,int n_iter_max,float * buff_d,int inc_d,float * buff_e,int inc_e,scomplex * buff_G,int rs_G,int cs_G,float * buff_R,int rs_R,int cs_R,scomplex * buff_W,int rs_W,int cs_W,scomplex * buff_U,int rs_U,int cs_U,int b_alg)405 FLA_Error FLA_Tevd_v_opc_var2( int m_A,
406 int m_U,
407 int n_G,
408 int n_iter_max,
409 float* buff_d, int inc_d,
410 float* buff_e, int inc_e,
411 scomplex* buff_G, int rs_G, int cs_G,
412 float* buff_R, int rs_R, int cs_R,
413 scomplex* buff_W, int rs_W, int cs_W,
414 scomplex* buff_U, int rs_U, int cs_U,
415 int b_alg )
416 {
417 FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
418
419 return FLA_SUCCESS;
420 }
421
422 //#define PRINTF
423
FLA_Tevd_v_opz_var2(int m_A,int m_U,int n_G,int n_iter_max,double * buff_d,int inc_d,double * buff_e,int inc_e,dcomplex * buff_G,int rs_G,int cs_G,double * buff_R,int rs_R,int cs_R,dcomplex * buff_W,int rs_W,int cs_W,dcomplex * buff_U,int rs_U,int cs_U,int b_alg)424 FLA_Error FLA_Tevd_v_opz_var2( int m_A,
425 int m_U,
426 int n_G,
427 int n_iter_max,
428 double* buff_d, int inc_d,
429 double* buff_e, int inc_e,
430 dcomplex* buff_G, int rs_G, int cs_G,
431 double* buff_R, int rs_R, int cs_R,
432 dcomplex* buff_W, int rs_W, int cs_W,
433 dcomplex* buff_U, int rs_U, int cs_U,
434 int b_alg )
435 {
436 dcomplex one = bl1_z1();
437 double rone = bl1_d1();
438 double rzero = bl1_d0();
439
440 dcomplex* G;
441 double* d1;
442 double* e1;
443 int r_val;
444 int done;
445 int m_G_sweep_max;
446 int ij_begin;
447 int ijTL, ijBR;
448 int m_A11;
449 int n_iter_perf;
450 int n_U_apply;
451 int total_deflations;
452 int n_deflations;
453 int n_iter_prev;
454 int n_iter_perf_sweep_max;
455
456 // Initialize our completion flag.
457 done = FALSE;
458
459 // Initialize a counter that holds the maximum number of rows of G
460 // that we would need to initialize for the next sweep.
461 m_G_sweep_max = m_A - 1;
462
463 // Initialize a counter for the total number of iterations performed.
464 n_iter_prev = 0;
465
466 // Initialize R to identity.
467 bl1_dident( m_A,
468 buff_R, rs_R, cs_R );
469
470 // Iterate until the matrix has completely deflated.
471 for ( total_deflations = 0; done != TRUE; )
472 {
473
474 // Initialize G to contain only identity rotations.
475 bl1_zsetm( m_G_sweep_max,
476 n_G,
477 &one,
478 buff_G, rs_G, cs_G );
479
480 // Keep track of the maximum number of iterations performed in the
481 // current sweep. This is used when applying the sweep's Givens
482 // rotations.
483 n_iter_perf_sweep_max = 0;
484
485 // Perform a sweep: Move through the matrix and perform a tridiagonal
486 // EVD on each non-zero submatrix that is encountered. During the
487 // first time through, ijTL will be 0 and ijBR will be m_A - 1.
488 for ( ij_begin = 0; ij_begin < m_A; )
489 {
490
491 #ifdef PRINTF
492 if ( ij_begin == 0 )
493 printf( "FLA_Tevd_v_opz_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
494 #endif
495
496 // Search for the first submatrix along the diagonal that is
497 // bounded by zeroes (or endpoints of the matrix). If no
498 // submatrix is found (ie: if the entire subdiagonal is zero
499 // then FLA_FAILURE is returned. This function also inspects
500 // subdiagonal elements for proximity to zero. If a given
501 // element is close enough to zero, then it is deemed
502 // converged and manually set to zero.
503 r_val = FLA_Tevd_find_submatrix_opd( m_A,
504 ij_begin,
505 buff_d, inc_d,
506 buff_e, inc_e,
507 &ijTL,
508 &ijBR );
509
510 // Verify that a submatrix was found. If one was not found,
511 // then we are done with the current sweep. Furthermore, if
512 // a submatrix was not found AND we began our search at the
513 // beginning of the matrix (ie: ij_begin == 0), then the
514 // matrix has completely deflated and so we are done with
515 // Francis step iteration.
516 if ( r_val == FLA_FAILURE )
517 {
518 if ( ij_begin == 0 )
519 {
520 #ifdef PRINTF
521 printf( "FLA_Tevd_v_opz_var2: subdiagonal is completely zero.\n" );
522 printf( "FLA_Tevd_v_opz_var2: Francis iteration is done!\n" );
523 #endif
524 done = TRUE;
525 }
526
527 // Break out of the current sweep so we can apply the last
528 // remaining Givens rotations.
529 break;
530 }
531
532 // If we got this far, then:
533 // (a) ijTL refers to the index of the first non-zero
534 // subdiagonal along the diagonal, and
535 // (b) ijBR refers to either:
536 // - the first zero element that occurs after ijTL, or
537 // - the the last diagonal element.
538 // Note that ijTL and ijBR also correspond to the first and
539 // last diagonal elements of the submatrix of interest. Thus,
540 // we may compute the dimension of this submatrix as:
541 m_A11 = ijBR - ijTL + 1;
542
543 #ifdef PRINTF
544 printf( "FLA_Tevd_v_opz_var2: ij_begin = %d\n", ij_begin );
545 printf( "FLA_Tevd_v_opz_var2: ijTL = %d\n", ijTL );
546 printf( "FLA_Tevd_v_opz_var2: ijBR = %d\n", ijBR );
547 printf( "FLA_Tevd_v_opz_var2: m_A11 = %d\n", m_A11 );
548 #endif
549
550 // Adjust ij_begin, which gets us ready for the next subproblem, if
551 // there is one.
552 ij_begin = ijBR + 1;
553
554 // Index to the submatrices upon which we will operate.
555 d1 = buff_d + ijTL * inc_d;
556 e1 = buff_e + ijTL * inc_e;
557 G = buff_G + ijTL * rs_G;
558
559 // Search for a batch of eigenvalues, recursing on deflated
560 // subproblems whenever a split occurs. Iteration continues
561 // as long as
562 // (a) there is still matrix left to operate on, and
563 // (b) the number of iterations performed in this batch is
564 // less than n_G.
565 // If/when either of the two above conditions fails to hold,
566 // the function returns.
567 n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11,
568 n_G,
569 ijTL,
570 d1, inc_d,
571 e1, inc_e,
572 G, rs_G, cs_G,
573 &n_iter_perf );
574
575 // Record the number of deflations that we observed.
576 total_deflations += n_deflations;
577
578 // Update the maximum number of iterations performed in the
579 // current sweep.
580 n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
581
582 #ifdef PRINTF
583 printf( "FLA_Tevd_v_opz_var2: deflations observed = %d\n", n_deflations );
584 printf( "FLA_Tevd_v_opz_var2: total deflations observed = %d\n", total_deflations );
585 printf( "FLA_Tevd_v_opz_var2: num iterations = %d\n", n_iter_perf );
586 #endif
587
588 // Store the most recent value of ijBR in m_G_sweep_max.
589 // When the sweep is done, this value will contain the minimum
590 // number of rows of G we can apply and safely include all
591 // non-identity rotations that were computed during the
592 // eigenvalue searches.
593 m_G_sweep_max = ijBR;
594
595 // Make sure we haven't exceeded our maximum iteration count.
596 if ( n_iter_prev >= m_A * n_iter_max )
597 {
598 #ifdef PRINTF
599 printf( "FLA_Tevd_v_opz_var2: reached maximum total number of iterations: %d\n", n_iter_prev );
600 #endif
601 FLA_Abort();
602 //return FLA_FAILURE;
603 }
604 }
605
606 // The sweep is complete. Now we must apply the Givens rotations
607 // that were accumulated during the sweep.
608
609
610 // Recall that the number of columns of U to which we apply
611 // rotations is one more than the number of rotations.
612 n_U_apply = m_G_sweep_max + 1;
613
614 // Apply the Givens rotations that were computed as part of
615 // the previous batch of iterations.
616 //FLA_Apply_G_rf_bld_var8b( n_iter_perf_sweep_max,
617 //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
618 FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
619 //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
620 //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
621 m_U,
622 n_U_apply,
623 n_iter_prev,
624 buff_G, rs_G, cs_G,
625 buff_R, rs_R, cs_R,
626 b_alg );
627
628 #ifdef PRINTF
629 printf( "FLA_Tevd_v_opz_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
630 #endif
631
632 // Increment the total number of iterations previously performed.
633 n_iter_prev += n_iter_perf_sweep_max;
634 }
635
636 // Copy the contents of Q to temporary storage.
637 bl1_zcopymt( BLIS1_NO_TRANSPOSE,
638 m_A,
639 m_A,
640 buff_U, rs_U, cs_U,
641 buff_W, rs_W, cs_W );
642
643
644 // Multiply Q by R, overwriting U.
645 bl1_dgemm( BLIS1_NO_TRANSPOSE,
646 BLIS1_NO_TRANSPOSE,
647 2*m_A,
648 m_A,
649 m_A,
650 &rone,
651 ( double* )buff_W, rs_W, 2*cs_W,
652 buff_R, rs_R, cs_R,
653 &rzero,
654 ( double* )buff_U, rs_U, 2*cs_U );
655
656 return n_iter_prev;
657 }
658
659