1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "FLAME.h"
12 
FLA_Tevd_v_opt_var2(dim_t n_iter_max,FLA_Obj d,FLA_Obj e,FLA_Obj G,FLA_Obj R,FLA_Obj W,FLA_Obj U,dim_t b_alg)13 FLA_Error FLA_Tevd_v_opt_var2( dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj R, FLA_Obj W, FLA_Obj U, dim_t b_alg )
14 {
15 	FLA_Error    r_val = FLA_SUCCESS;
16 	FLA_Datatype datatype;
17 	int          m_A, m_U, n_G;
18 	int          inc_d;
19 	int          inc_e;
20 	int          rs_G, cs_G;
21 	int          rs_R, cs_R;
22 	int          rs_U, cs_U;
23 	int          rs_W, cs_W;
24 
25 	datatype = FLA_Obj_datatype( U );
26 
27 	m_A       = FLA_Obj_vector_dim( d );
28 	m_U       = FLA_Obj_length( U );
29 	n_G       = FLA_Obj_width( G );
30 
31 	inc_d     = FLA_Obj_vector_inc( d );
32 	inc_e     = FLA_Obj_vector_inc( e );
33 
34 	rs_G      = FLA_Obj_row_stride( G );
35 	cs_G      = FLA_Obj_col_stride( G );
36 
37 	rs_R      = FLA_Obj_row_stride( R );
38 	cs_R      = FLA_Obj_col_stride( R );
39 
40 	rs_W      = FLA_Obj_row_stride( W );
41 	cs_W      = FLA_Obj_col_stride( W );
42 
43 	rs_U      = FLA_Obj_row_stride( U );
44 	cs_U      = FLA_Obj_col_stride( U );
45 
46 
47 	switch ( datatype )
48 	{
49 		case FLA_FLOAT:
50 		{
51 			float*    buff_d = FLA_FLOAT_PTR( d );
52 			float*    buff_e = FLA_FLOAT_PTR( e );
53 			scomplex* buff_G = FLA_COMPLEX_PTR( G );
54 			float*    buff_R = FLA_FLOAT_PTR( R );
55 			float*    buff_W = FLA_FLOAT_PTR( W );
56 			float*    buff_U = FLA_FLOAT_PTR( U );
57 
58 			r_val = FLA_Tevd_v_ops_var2( m_A,
59 			                             m_U,
60 			                             n_G,
61 			                             n_iter_max,
62 			                             buff_d, inc_d,
63 			                             buff_e, inc_e,
64 			                             buff_G, rs_G, cs_G,
65 			                             buff_R, rs_R, cs_R,
66 			                             buff_W, rs_W, cs_W,
67 			                             buff_U, rs_U, cs_U,
68 			                             b_alg );
69 
70 			break;
71 		}
72 
73 		case FLA_DOUBLE:
74 		{
75 			double*   buff_d = FLA_DOUBLE_PTR( d );
76 			double*   buff_e = FLA_DOUBLE_PTR( e );
77 			dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
78 			double*   buff_R = FLA_DOUBLE_PTR( R );
79 			double*   buff_W = FLA_DOUBLE_PTR( W );
80 			double*   buff_U = FLA_DOUBLE_PTR( U );
81 
82 			r_val = FLA_Tevd_v_opd_var2( m_A,
83 			                             m_U,
84 			                             n_G,
85 			                             n_iter_max,
86 			                             buff_d, inc_d,
87 			                             buff_e, inc_e,
88 			                             buff_G, rs_G, cs_G,
89 			                             buff_R, rs_R, cs_R,
90 			                             buff_W, rs_W, cs_W,
91 			                             buff_U, rs_U, cs_U,
92 			                             b_alg );
93 
94 			break;
95 		}
96 
97 		case FLA_COMPLEX:
98 		{
99 			float*    buff_d = FLA_FLOAT_PTR( d );
100 			float*    buff_e = FLA_FLOAT_PTR( e );
101 			scomplex* buff_G = FLA_COMPLEX_PTR( G );
102 			float*    buff_R = FLA_FLOAT_PTR( R );
103 			scomplex* buff_W = FLA_COMPLEX_PTR( W );
104 			scomplex* buff_U = FLA_COMPLEX_PTR( U );
105 
106 			r_val = FLA_Tevd_v_opc_var2( m_A,
107 			                             m_U,
108 			                             n_G,
109 			                             n_iter_max,
110 			                             buff_d, inc_d,
111 			                             buff_e, inc_e,
112 			                             buff_G, rs_G, cs_G,
113 			                             buff_R, rs_R, cs_R,
114 			                             buff_W, rs_W, cs_W,
115 			                             buff_U, rs_U, cs_U,
116 			                             b_alg );
117 
118 			break;
119 		}
120 
121 		case FLA_DOUBLE_COMPLEX:
122 		{
123 			double*   buff_d = FLA_DOUBLE_PTR( d );
124 			double*   buff_e = FLA_DOUBLE_PTR( e );
125 			dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
126 			double*   buff_R = FLA_DOUBLE_PTR( R );
127 			dcomplex* buff_W = FLA_DOUBLE_COMPLEX_PTR( W );
128 			dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
129 
130 			r_val = FLA_Tevd_v_opz_var2( m_A,
131 			                             m_U,
132 			                             n_G,
133 			                             n_iter_max,
134 			                             buff_d, inc_d,
135 			                             buff_e, inc_e,
136 			                             buff_G, rs_G, cs_G,
137 			                             buff_R, rs_R, cs_R,
138 			                             buff_W, rs_W, cs_W,
139 			                             buff_U, rs_U, cs_U,
140 			                             b_alg );
141 
142 			break;
143 		}
144 	}
145 
146 	return r_val;
147 }
148 
149 
150 
FLA_Tevd_v_ops_var2(int m_A,int m_U,int n_G,int n_iter_max,float * buff_d,int inc_d,float * buff_e,int inc_e,scomplex * buff_G,int rs_G,int cs_G,float * buff_R,int rs_R,int cs_R,float * buff_W,int rs_W,int cs_W,float * buff_U,int rs_U,int cs_U,int b_alg)151 FLA_Error FLA_Tevd_v_ops_var2( int       m_A,
152                                int       m_U,
153                                int       n_G,
154                                int       n_iter_max,
155                                float*    buff_d, int inc_d,
156                                float*    buff_e, int inc_e,
157                                scomplex* buff_G, int rs_G, int cs_G,
158                                float*    buff_R, int rs_R, int cs_R,
159                                float*    buff_W, int rs_W, int cs_W,
160                                float*    buff_U, int rs_U, int cs_U,
161                                int       b_alg )
162 {
163 	FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
164 
165 	return FLA_SUCCESS;
166 }
167 
168 
169 
FLA_Tevd_v_opd_var2(int m_A,int m_U,int n_G,int n_iter_max,double * buff_d,int inc_d,double * buff_e,int inc_e,dcomplex * buff_G,int rs_G,int cs_G,double * buff_R,int rs_R,int cs_R,double * buff_W,int rs_W,int cs_W,double * buff_U,int rs_U,int cs_U,int b_alg)170 FLA_Error FLA_Tevd_v_opd_var2( int       m_A,
171                                int       m_U,
172                                int       n_G,
173                                int       n_iter_max,
174                                double*   buff_d, int inc_d,
175                                double*   buff_e, int inc_e,
176                                dcomplex* buff_G, int rs_G, int cs_G,
177                                double*   buff_R, int rs_R, int cs_R,
178                                double*   buff_W, int rs_W, int cs_W,
179                                double*   buff_U, int rs_U, int cs_U,
180                                int       b_alg )
181 {
182 	dcomplex  one   = bl1_z1();
183 	double    rone  = bl1_d1();
184 	double    rzero = bl1_d0();
185 
186 	dcomplex* G;
187 	double*   d1;
188 	double*   e1;
189 	int       r_val;
190 	int       done;
191 	int       m_G_sweep_max;
192 	int       ij_begin;
193 	int       ijTL, ijBR;
194 	int       m_A11;
195 	int       n_iter_perf;
196 	int       n_U_apply;
197 	int       total_deflations;
198 	int       n_deflations;
199 	int       n_iter_prev;
200 	int       n_iter_perf_sweep_max;
201 
202 	// Initialize our completion flag.
203 	done = FALSE;
204 
205 	// Initialize a counter that holds the maximum number of rows of G
206 	// that we would need to initialize for the next sweep.
207 	m_G_sweep_max = m_A - 1;
208 
209 	// Initialize a counter for the total number of iterations performed.
210 	n_iter_prev = 0;
211 
212 	// Initialize R to identity.
213 	bl1_dident( m_A,
214 	            buff_R, rs_R, cs_R );
215 
216 	// Iterate until the matrix has completely deflated.
217 	for ( total_deflations = 0; done != TRUE; )
218 	{
219 
220 		// Initialize G to contain only identity rotations.
221 		bl1_zsetm( m_G_sweep_max,
222 		           n_G,
223 		           &one,
224 		           buff_G, rs_G, cs_G );
225 
226 		// Keep track of the maximum number of iterations performed in the
227 		// current sweep. This is used when applying the sweep's Givens
228 		// rotations.
229 		n_iter_perf_sweep_max = 0;
230 
231 		// Perform a sweep: Move through the matrix and perform a tridiagonal
232 		// EVD on each non-zero submatrix that is encountered. During the
233 		// first time through, ijTL will be 0 and ijBR will be m_A - 1.
234 		for ( ij_begin = 0; ij_begin < m_A;  )
235 		{
236 
237 #ifdef PRINTF
238 if ( ij_begin == 0 )
239 printf( "FLA_Tevd_v_opd_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
240 #endif
241 
242 			// Search for the first submatrix along the diagonal that is
243 			// bounded by zeroes (or endpoints of the matrix). If no
244 			// submatrix is found (ie: if the entire subdiagonal is zero
245 			// then FLA_FAILURE is returned. This function also inspects
246 			// subdiagonal elements for proximity to zero. If a given
247 			// element is close enough to zero, then it is deemed
248 			// converged and manually set to zero.
249 			r_val = FLA_Tevd_find_submatrix_opd( m_A,
250 			                                     ij_begin,
251 			                                     buff_d, inc_d,
252 			                                     buff_e, inc_e,
253 			                                     &ijTL,
254 			                                     &ijBR );
255 
256 			// Verify that a submatrix was found. If one was not found,
257 			// then we are done with the current sweep. Furthermore, if
258 			// a submatrix was not found AND we began our search at the
259 			// beginning of the matrix (ie: ij_begin == 0), then the
260 			// matrix has completely deflated and so we are done with
261 			// Francis step iteration.
262 			if ( r_val == FLA_FAILURE )
263 			{
264 				if ( ij_begin == 0 )
265 				{
266 #ifdef PRINTF
267 printf( "FLA_Tevd_v_opd_var2: subdiagonal is completely zero.\n" );
268 printf( "FLA_Tevd_v_opd_var2: Francis iteration is done!\n" );
269 #endif
270 					done = TRUE;
271 				}
272 
273 				// Break out of the current sweep so we can apply the last
274 				// remaining Givens rotations.
275 				break;
276 			}
277 
278 			// If we got this far, then:
279 			//   (a) ijTL refers to the index of the first non-zero
280 			//       subdiagonal along the diagonal, and
281 			//   (b) ijBR refers to either:
282 			//       - the first zero element that occurs after ijTL, or
283 			//       - the the last diagonal element.
284 			// Note that ijTL and ijBR also correspond to the first and
285 			// last diagonal elements of the submatrix of interest. Thus,
286 			// we may compute the dimension of this submatrix as:
287 			m_A11 = ijBR - ijTL + 1;
288 
289 #ifdef PRINTF
290 printf( "FLA_Tevd_v_opd_var2: ij_begin = %d\n", ij_begin );
291 printf( "FLA_Tevd_v_opd_var2: ijTL     = %d\n", ijTL );
292 printf( "FLA_Tevd_v_opd_var2: ijBR     = %d\n", ijBR );
293 printf( "FLA_Tevd_v_opd_var2: m_A11    = %d\n", m_A11 );
294 #endif
295 
296 			// Adjust ij_begin, which gets us ready for the next subproblem, if
297 			// there is one.
298 			ij_begin = ijBR + 1;
299 
300 			// Index to the submatrices upon which we will operate.
301 			d1 = buff_d + ijTL * inc_d;
302 			e1 = buff_e + ijTL * inc_e;
303 			G  = buff_G + ijTL * rs_G;
304 
305 			// Search for a batch of eigenvalues, recursing on deflated
306 			// subproblems whenever a split occurs. Iteration continues
307 			// as long as
308 			//   (a) there is still matrix left to operate on, and
309 			//   (b) the number of iterations performed in this batch is
310 			//       less than n_G.
311 			// If/when either of the two above conditions fails to hold,
312 			// the function returns.
313 			n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11,
314 			                                            n_G,
315 			                                            ijTL,
316 			                                            d1, inc_d,
317 			                                            e1, inc_e,
318 			                                            G,  rs_G, cs_G,
319 			                                            &n_iter_perf );
320 
321 			// Record the number of deflations that we observed.
322 			total_deflations += n_deflations;
323 
324 			// Update the maximum number of iterations performed in the
325 			// current sweep.
326 			n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
327 
328 #ifdef PRINTF
329 printf( "FLA_Tevd_v_opd_var2: deflations observed       = %d\n", n_deflations );
330 printf( "FLA_Tevd_v_opd_var2: total deflations observed = %d\n", total_deflations );
331 printf( "FLA_Tevd_v_opd_var2: num iterations            = %d\n", n_iter_perf );
332 #endif
333 
334 			// Store the most recent value of ijBR in m_G_sweep_max.
335 			// When the sweep is done, this value will contain the minimum
336 			// number of rows of G we can apply and safely include all
337 			// non-identity rotations that were computed during the
338 			// eigenvalue searches.
339 			m_G_sweep_max = ijBR;
340 
341 			// Make sure we haven't exceeded our maximum iteration count.
342 			if ( n_iter_prev >= m_A * n_iter_max )
343 			{
344 #ifdef PRINTF
345 printf( "FLA_Tevd_v_opd_var2: reached maximum total number of iterations: %d\n", n_iter_prev );
346 #endif
347 				FLA_Abort();
348 				//return FLA_FAILURE;
349 			}
350 		}
351 
352 		// The sweep is complete. Now we must apply the Givens rotations
353 		// that were accumulated during the sweep.
354 
355 
356 		// Recall that the number of columns of U to which we apply
357 		// rotations is one more than the number of rotations.
358 		n_U_apply = m_G_sweep_max + 1;
359 
360 		// Apply the Givens rotations that were computed as part of
361 		// the previous batch of iterations.
362 		//FLA_Apply_G_rf_bld_var8b( n_iter_perf_sweep_max,
363 		//FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
364 		FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
365 		//FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
366 		//FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
367 		                          m_U,
368 		                          n_U_apply,
369 		                          n_iter_prev,
370 		                          buff_G, rs_G, cs_G,
371 		                          buff_R, rs_R, cs_R,
372 		                          b_alg );
373 
374 #ifdef PRINTF
375 printf( "FLA_Tevd_v_opd_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
376 #endif
377 
378 		// Increment the total number of iterations previously performed.
379 		n_iter_prev += n_iter_perf_sweep_max;
380 	}
381 
382 	// Copy the contents of Q to temporary storage.
383 	bl1_dcopymt( BLIS1_NO_TRANSPOSE,
384 	             m_A,
385 	             m_A,
386 	             buff_U, rs_U, cs_U,
387 	             buff_W, rs_W, cs_W );
388 
389 
390 	// Multiply Q by R, overwriting U.
391 	bl1_dgemm( BLIS1_NO_TRANSPOSE,
392 	           BLIS1_NO_TRANSPOSE,
393 	           m_A,
394 	           m_A,
395 	           m_A,
396 	           &rone,
397 	           ( double* )buff_W, rs_W, cs_W,
398 	                      buff_R, rs_R, cs_R,
399 	           &rzero,
400 	           ( double* )buff_U, rs_U, cs_U );
401 
402 	return n_iter_prev;
403 }
404 
FLA_Tevd_v_opc_var2(int m_A,int m_U,int n_G,int n_iter_max,float * buff_d,int inc_d,float * buff_e,int inc_e,scomplex * buff_G,int rs_G,int cs_G,float * buff_R,int rs_R,int cs_R,scomplex * buff_W,int rs_W,int cs_W,scomplex * buff_U,int rs_U,int cs_U,int b_alg)405 FLA_Error FLA_Tevd_v_opc_var2( int       m_A,
406                                int       m_U,
407                                int       n_G,
408                                int       n_iter_max,
409                                float*    buff_d, int inc_d,
410                                float*    buff_e, int inc_e,
411                                scomplex* buff_G, int rs_G, int cs_G,
412                                float*    buff_R, int rs_R, int cs_R,
413                                scomplex* buff_W, int rs_W, int cs_W,
414                                scomplex* buff_U, int rs_U, int cs_U,
415                                int       b_alg )
416 {
417 	FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
418 
419 	return FLA_SUCCESS;
420 }
421 
422 //#define PRINTF
423 
FLA_Tevd_v_opz_var2(int m_A,int m_U,int n_G,int n_iter_max,double * buff_d,int inc_d,double * buff_e,int inc_e,dcomplex * buff_G,int rs_G,int cs_G,double * buff_R,int rs_R,int cs_R,dcomplex * buff_W,int rs_W,int cs_W,dcomplex * buff_U,int rs_U,int cs_U,int b_alg)424 FLA_Error FLA_Tevd_v_opz_var2( int       m_A,
425                                int       m_U,
426                                int       n_G,
427                                int       n_iter_max,
428                                double*   buff_d, int inc_d,
429                                double*   buff_e, int inc_e,
430                                dcomplex* buff_G, int rs_G, int cs_G,
431                                double*   buff_R, int rs_R, int cs_R,
432                                dcomplex* buff_W, int rs_W, int cs_W,
433                                dcomplex* buff_U, int rs_U, int cs_U,
434                                int       b_alg )
435 {
436 	dcomplex  one   = bl1_z1();
437 	double    rone  = bl1_d1();
438 	double    rzero = bl1_d0();
439 
440 	dcomplex* G;
441 	double*   d1;
442 	double*   e1;
443 	int       r_val;
444 	int       done;
445 	int       m_G_sweep_max;
446 	int       ij_begin;
447 	int       ijTL, ijBR;
448 	int       m_A11;
449 	int       n_iter_perf;
450 	int       n_U_apply;
451 	int       total_deflations;
452 	int       n_deflations;
453 	int       n_iter_prev;
454 	int       n_iter_perf_sweep_max;
455 
456 	// Initialize our completion flag.
457 	done = FALSE;
458 
459 	// Initialize a counter that holds the maximum number of rows of G
460 	// that we would need to initialize for the next sweep.
461 	m_G_sweep_max = m_A - 1;
462 
463 	// Initialize a counter for the total number of iterations performed.
464 	n_iter_prev = 0;
465 
466 	// Initialize R to identity.
467 	bl1_dident( m_A,
468 	            buff_R, rs_R, cs_R );
469 
470 	// Iterate until the matrix has completely deflated.
471 	for ( total_deflations = 0; done != TRUE; )
472 	{
473 
474 		// Initialize G to contain only identity rotations.
475 		bl1_zsetm( m_G_sweep_max,
476 		           n_G,
477 		           &one,
478 		           buff_G, rs_G, cs_G );
479 
480 		// Keep track of the maximum number of iterations performed in the
481 		// current sweep. This is used when applying the sweep's Givens
482 		// rotations.
483 		n_iter_perf_sweep_max = 0;
484 
485 		// Perform a sweep: Move through the matrix and perform a tridiagonal
486 		// EVD on each non-zero submatrix that is encountered. During the
487 		// first time through, ijTL will be 0 and ijBR will be m_A - 1.
488 		for ( ij_begin = 0; ij_begin < m_A;  )
489 		{
490 
491 #ifdef PRINTF
492 if ( ij_begin == 0 )
493 printf( "FLA_Tevd_v_opz_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
494 #endif
495 
496 			// Search for the first submatrix along the diagonal that is
497 			// bounded by zeroes (or endpoints of the matrix). If no
498 			// submatrix is found (ie: if the entire subdiagonal is zero
499 			// then FLA_FAILURE is returned. This function also inspects
500 			// subdiagonal elements for proximity to zero. If a given
501 			// element is close enough to zero, then it is deemed
502 			// converged and manually set to zero.
503 			r_val = FLA_Tevd_find_submatrix_opd( m_A,
504 			                                     ij_begin,
505 			                                     buff_d, inc_d,
506 			                                     buff_e, inc_e,
507 			                                     &ijTL,
508 			                                     &ijBR );
509 
510 			// Verify that a submatrix was found. If one was not found,
511 			// then we are done with the current sweep. Furthermore, if
512 			// a submatrix was not found AND we began our search at the
513 			// beginning of the matrix (ie: ij_begin == 0), then the
514 			// matrix has completely deflated and so we are done with
515 			// Francis step iteration.
516 			if ( r_val == FLA_FAILURE )
517 			{
518 				if ( ij_begin == 0 )
519 				{
520 #ifdef PRINTF
521 printf( "FLA_Tevd_v_opz_var2: subdiagonal is completely zero.\n" );
522 printf( "FLA_Tevd_v_opz_var2: Francis iteration is done!\n" );
523 #endif
524 					done = TRUE;
525 				}
526 
527 				// Break out of the current sweep so we can apply the last
528 				// remaining Givens rotations.
529 				break;
530 			}
531 
532 			// If we got this far, then:
533 			//   (a) ijTL refers to the index of the first non-zero
534 			//       subdiagonal along the diagonal, and
535 			//   (b) ijBR refers to either:
536 			//       - the first zero element that occurs after ijTL, or
537 			//       - the the last diagonal element.
538 			// Note that ijTL and ijBR also correspond to the first and
539 			// last diagonal elements of the submatrix of interest. Thus,
540 			// we may compute the dimension of this submatrix as:
541 			m_A11 = ijBR - ijTL + 1;
542 
543 #ifdef PRINTF
544 printf( "FLA_Tevd_v_opz_var2: ij_begin = %d\n", ij_begin );
545 printf( "FLA_Tevd_v_opz_var2: ijTL     = %d\n", ijTL );
546 printf( "FLA_Tevd_v_opz_var2: ijBR     = %d\n", ijBR );
547 printf( "FLA_Tevd_v_opz_var2: m_A11    = %d\n", m_A11 );
548 #endif
549 
550 			// Adjust ij_begin, which gets us ready for the next subproblem, if
551 			// there is one.
552 			ij_begin = ijBR + 1;
553 
554 			// Index to the submatrices upon which we will operate.
555 			d1 = buff_d + ijTL * inc_d;
556 			e1 = buff_e + ijTL * inc_e;
557 			G  = buff_G + ijTL * rs_G;
558 
559 			// Search for a batch of eigenvalues, recursing on deflated
560 			// subproblems whenever a split occurs. Iteration continues
561 			// as long as
562 			//   (a) there is still matrix left to operate on, and
563 			//   (b) the number of iterations performed in this batch is
564 			//       less than n_G.
565 			// If/when either of the two above conditions fails to hold,
566 			// the function returns.
567 			n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11,
568 			                                            n_G,
569 			                                            ijTL,
570 			                                            d1, inc_d,
571 			                                            e1, inc_e,
572 			                                            G,  rs_G, cs_G,
573 			                                            &n_iter_perf );
574 
575 			// Record the number of deflations that we observed.
576 			total_deflations += n_deflations;
577 
578 			// Update the maximum number of iterations performed in the
579 			// current sweep.
580 			n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
581 
582 #ifdef PRINTF
583 printf( "FLA_Tevd_v_opz_var2: deflations observed       = %d\n", n_deflations );
584 printf( "FLA_Tevd_v_opz_var2: total deflations observed = %d\n", total_deflations );
585 printf( "FLA_Tevd_v_opz_var2: num iterations            = %d\n", n_iter_perf );
586 #endif
587 
588 			// Store the most recent value of ijBR in m_G_sweep_max.
589 			// When the sweep is done, this value will contain the minimum
590 			// number of rows of G we can apply and safely include all
591 			// non-identity rotations that were computed during the
592 			// eigenvalue searches.
593 			m_G_sweep_max = ijBR;
594 
595 			// Make sure we haven't exceeded our maximum iteration count.
596 			if ( n_iter_prev >= m_A * n_iter_max )
597 			{
598 #ifdef PRINTF
599 printf( "FLA_Tevd_v_opz_var2: reached maximum total number of iterations: %d\n", n_iter_prev );
600 #endif
601 				FLA_Abort();
602 				//return FLA_FAILURE;
603 			}
604 		}
605 
606 		// The sweep is complete. Now we must apply the Givens rotations
607 		// that were accumulated during the sweep.
608 
609 
610 		// Recall that the number of columns of U to which we apply
611 		// rotations is one more than the number of rotations.
612 		n_U_apply = m_G_sweep_max + 1;
613 
614 		// Apply the Givens rotations that were computed as part of
615 		// the previous batch of iterations.
616 		//FLA_Apply_G_rf_bld_var8b( n_iter_perf_sweep_max,
617 		//FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
618 		FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
619 		//FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
620 		//FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
621 		                          m_U,
622 		                          n_U_apply,
623 		                          n_iter_prev,
624 		                          buff_G, rs_G, cs_G,
625 		                          buff_R, rs_R, cs_R,
626 		                          b_alg );
627 
628 #ifdef PRINTF
629 printf( "FLA_Tevd_v_opz_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
630 #endif
631 
632 		// Increment the total number of iterations previously performed.
633 		n_iter_prev += n_iter_perf_sweep_max;
634 	}
635 
636 	// Copy the contents of Q to temporary storage.
637 	bl1_zcopymt( BLIS1_NO_TRANSPOSE,
638 	             m_A,
639 	             m_A,
640 	             buff_U, rs_U, cs_U,
641 	             buff_W, rs_W, cs_W );
642 
643 
644 	// Multiply Q by R, overwriting U.
645 	bl1_dgemm( BLIS1_NO_TRANSPOSE,
646 	           BLIS1_NO_TRANSPOSE,
647 	           2*m_A,
648 	           m_A,
649 	           m_A,
650 	           &rone,
651 	           ( double* )buff_W, rs_W, 2*cs_W,
652 	                      buff_R, rs_R,   cs_R,
653 	           &rzero,
654 	           ( double* )buff_U, rs_U, 2*cs_U );
655 
656 	return n_iter_prev;
657 }
658 
659