1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
FLA_Tevd_n_opt_var1(dim_t n_iter_max,FLA_Obj d,FLA_Obj e,FLA_Obj G,FLA_Obj U)13 FLA_Error FLA_Tevd_n_opt_var1( dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj U )
14 {
15 FLA_Error r_val = FLA_SUCCESS;
16 FLA_Datatype datatype;
17 int m_A, m_U, n_G;
18 int inc_d;
19 int inc_e;
20 int rs_G, cs_G;
21
22 datatype = FLA_Obj_datatype( U );
23
24 m_A = FLA_Obj_vector_dim( d );
25 m_U = FLA_Obj_vector_dim( d );
26 n_G = FLA_Obj_width( G );
27
28 inc_d = FLA_Obj_vector_inc( d );
29 inc_e = FLA_Obj_vector_inc( e );
30
31 rs_G = FLA_Obj_row_stride( G );
32 cs_G = FLA_Obj_col_stride( G );
33
34 /*
35 FLA_Obj de, deL, deR, deLT, deLB;
36 FLA_Obj_create( FLA_DOUBLE, m_A, 2, 0, 0, &de );
37 FLA_Part_1x2( de, &deL, &deR, 1, FLA_LEFT );
38 FLA_Part_2x1( deL, &deLT,
39 &deLB, 1, FLA_BOTTOM );
40 FLA_Copy( d, deR );
41 FLA_Copy( e, deLT );
42 FLA_Set( FLA_ZERO, deLB );
43 //FLA_Obj_show( "de", de, "%21.17e", "" );
44 */
45
46 switch ( datatype )
47 {
48 case FLA_FLOAT:
49 {
50 float* buff_d = FLA_FLOAT_PTR( d );
51 float* buff_e = FLA_FLOAT_PTR( e );
52 scomplex* buff_G = FLA_COMPLEX_PTR( G );
53
54 r_val = FLA_Tevd_n_ops_var1( m_A,
55 m_U,
56 n_G,
57 n_iter_max,
58 buff_d, inc_d,
59 buff_e, inc_e,
60 buff_G, rs_G, cs_G );
61
62 break;
63 }
64
65 case FLA_DOUBLE:
66 {
67 double* buff_d = FLA_DOUBLE_PTR( d );
68 double* buff_e = FLA_DOUBLE_PTR( e );
69 dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
70
71 r_val = FLA_Tevd_n_opd_var1( m_A,
72 m_U,
73 n_G,
74 n_iter_max,
75 buff_d, inc_d,
76 buff_e, inc_e,
77 buff_G, rs_G, cs_G );
78
79 break;
80 }
81
82 case FLA_COMPLEX:
83 {
84 float* buff_d = FLA_FLOAT_PTR( d );
85 float* buff_e = FLA_FLOAT_PTR( e );
86 scomplex* buff_G = FLA_COMPLEX_PTR( G );
87
88 r_val = FLA_Tevd_n_opc_var1( m_A,
89 m_U,
90 n_G,
91 n_iter_max,
92 buff_d, inc_d,
93 buff_e, inc_e,
94 buff_G, rs_G, cs_G );
95
96 break;
97 }
98
99 case FLA_DOUBLE_COMPLEX:
100 {
101 double* buff_d = FLA_DOUBLE_PTR( d );
102 double* buff_e = FLA_DOUBLE_PTR( e );
103 dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
104
105 r_val = FLA_Tevd_n_opz_var1( m_A,
106 m_U,
107 n_G,
108 n_iter_max,
109 buff_d, inc_d,
110 buff_e, inc_e,
111 buff_G, rs_G, cs_G );
112
113 break;
114 }
115 }
116 /*
117 FLA_Copy( d, deR );
118 FLA_Copy( e, deLT );
119 FLA_Set( FLA_ZERO, deLB );
120 FLA_Sort( FLA_FORWARD, deR );
121 FLA_Obj_show( "de after", de, "%21.17e", "" );
122 double eps = FLA_Mach_params_opd( FLA_MACH_EPS );
123 printf( "epsilon = %21.17e\n", eps );
124 FLA_Obj_free( &de );
125 */
126 return r_val;
127 }
128
129
130
FLA_Tevd_n_ops_var1(int m_A,int m_U,int n_G,int n_iter_max,float * buff_d,int inc_d,float * buff_e,int inc_e,scomplex * buff_G,int rs_G,int cs_G)131 FLA_Error FLA_Tevd_n_ops_var1( int m_A,
132 int m_U,
133 int n_G,
134 int n_iter_max,
135 float* buff_d, int inc_d,
136 float* buff_e, int inc_e,
137 scomplex* buff_G, int rs_G, int cs_G )
138 {
139 return FLA_SUCCESS;
140 }
141
142
143
FLA_Tevd_n_opd_var1(int m_A,int m_U,int n_G,int n_iter_max,double * buff_d,int inc_d,double * buff_e,int inc_e,dcomplex * buff_G,int rs_G,int cs_G)144 FLA_Error FLA_Tevd_n_opd_var1( int m_A,
145 int m_U,
146 int n_G,
147 int n_iter_max,
148 double* buff_d, int inc_d,
149 double* buff_e, int inc_e,
150 dcomplex* buff_G, int rs_G, int cs_G )
151 {
152 return FLA_SUCCESS;
153 }
154
FLA_Tevd_n_opc_var1(int m_A,int m_U,int n_G,int n_iter_max,float * buff_d,int inc_d,float * buff_e,int inc_e,scomplex * buff_G,int rs_G,int cs_G)155 FLA_Error FLA_Tevd_n_opc_var1( int m_A,
156 int m_U,
157 int n_G,
158 int n_iter_max,
159 float* buff_d, int inc_d,
160 float* buff_e, int inc_e,
161 scomplex* buff_G, int rs_G, int cs_G )
162 {
163 return FLA_SUCCESS;
164 }
165
166 //#define PRINTF
167
FLA_Tevd_n_opz_var1(int m_A,int m_U,int n_G,int n_iter_max,double * buff_d,int inc_d,double * buff_e,int inc_e,dcomplex * buff_G,int rs_G,int cs_G)168 FLA_Error FLA_Tevd_n_opz_var1( int m_A,
169 int m_U,
170 int n_G,
171 int n_iter_max,
172 double* buff_d, int inc_d,
173 double* buff_e, int inc_e,
174 dcomplex* buff_G, int rs_G, int cs_G )
175 {
176 dcomplex one = bl1_z1();
177 double rone = bl1_d1();
178
179 double eps;
180 double eps2;
181 double safmin;
182 double ssfmin;
183 double safmax;
184 double ssfmax;
185
186 dcomplex* G;
187 double* d1;
188 double* e1;
189 int r_val;
190 int done;
191 int m_G_sweep_max;
192 int ij_begin;
193 int ijTL, ijBR;
194 int m_A11;
195 int n_iter_perf;
196 int total_deflations;
197 int n_deflations;
198 int n_iter_prev;
199 int n_iter_perf_sweep_max;
200
201 // Initialize some numerical constants.
202 eps = FLA_Mach_params_opd( FLA_MACH_EPS );
203 eps2 = FLA_Mach_params_opd( FLA_MACH_EPS2 );
204 safmin = FLA_Mach_params_opd( FLA_MACH_SFMIN );
205 safmax = rone / safmin;
206 ssfmax = sqrt( safmax ) / 3.0;
207 ssfmin = sqrt( safmin ) / eps2;
208
209 // Initialize our completion flag.
210 done = FALSE;
211
212 // Initialize a counter that holds the maximum number of rows of G
213 // that we would need to initialize for the next sweep.
214 m_G_sweep_max = m_A - 1;
215
216 // Initialize a counter for the total number of iterations performed.
217 n_iter_prev = 0;
218
219 // Iterate until the matrix has completely deflated.
220 for ( total_deflations = 0; done != TRUE; )
221 {
222
223 // Initialize G to contain only identity rotations.
224 bl1_zsetm( m_G_sweep_max,
225 n_G,
226 &one,
227 buff_G, rs_G, cs_G );
228
229 // Keep track of the maximum number of iterations performed in the
230 // current sweep. This is used when applying the sweep's Givens
231 // rotations.
232 n_iter_perf_sweep_max = 0;
233
234 // Perform a sweep: Move through the matrix and perform a tridiagonal
235 // EVD on each non-zero submatrix that is encountered. During the
236 // first time through, ijTL will be 0 and ijBR will be m_A - 1.
237 for ( ij_begin = 0; ij_begin < m_A; )
238 {
239
240 #ifdef PRINTF
241 if ( ij_begin == 0 )
242 printf( "FLA_Tevd_n_opz_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
243 #endif
244
245 // Search for the first submatrix along the diagonal that is
246 // bounded by zeroes (or endpoints of the matrix). If no
247 // submatrix is found (ie: if the entire subdiagonal is zero
248 // then FLA_FAILURE is returned. This function also inspects
249 // subdiagonal elements for proximity to zero. If a given
250 // element is close enough to zero, then it is deemed
251 // converged and manually set to zero.
252 r_val = FLA_Tevd_find_submatrix_opd( m_A,
253 ij_begin,
254 buff_d, inc_d,
255 buff_e, inc_e,
256 &ijTL,
257 &ijBR );
258
259 // Verify that a submatrix was found. If one was not found,
260 // then we are done with the current sweep. Furthermore, if
261 // a submatrix was not found AND we began our search at the
262 // beginning of the matrix (ie: ij_begin == 0), then the
263 // matrix has completely deflated and so we are done with
264 // Francis step iteration.
265 if ( r_val == FLA_FAILURE )
266 {
267 if ( ij_begin == 0 )
268 {
269 #ifdef PRINTF
270 printf( "FLA_Tevd_n_opz_var1: subdiagonal is completely zero.\n" );
271 printf( "FLA_Tevd_n_opz_var1: Francis iteration is done!\n" );
272 #endif
273 done = TRUE;
274 }
275
276 // Break out of the current sweep so we can apply the last
277 // remaining Givens rotations.
278 break;
279 }
280
281 // If we got this far, then:
282 // (a) ijTL refers to the index of the first non-zero
283 // subdiagonal along the diagonal, and
284 // (b) ijBR refers to either:
285 // - the first zero element that occurs after ijTL, or
286 // - the the last diagonal element.
287 // Note that ijTL and ijBR also correspond to the first and
288 // last diagonal elements of the submatrix of interest. Thus,
289 // we may compute the dimension of this submatrix as:
290 m_A11 = ijBR - ijTL + 1;
291
292 #ifdef PRINTF
293 printf( "FLA_Tevd_n_opz_var1: ij_begin = %d\n", ij_begin );
294 printf( "FLA_Tevd_n_opz_var1: ijTL = %d\n", ijTL );
295 printf( "FLA_Tevd_n_opz_var1: ijBR = %d\n", ijBR );
296 printf( "FLA_Tevd_n_opz_var1: m_A11 = %d\n", m_A11 );
297 #endif
298
299 // Adjust ij_begin, which gets us ready for the next submatrix
300 // search in the current sweep.
301 ij_begin = ijBR + 1;
302
303 // Index to the submatrices upon which we will operate.
304 d1 = buff_d + ijTL * inc_d;
305 e1 = buff_e + ijTL * inc_e;
306 G = buff_G + ijTL * rs_G;
307
308 // Compute the 1-norm (which equals the infinity norm since the
309 // matrix is tridiagonal and symmetric) and perform appropriate
310 // scaling if necessary.
311 /*
312 FLA_Norm1_tridiag( m_A11,
313 d1, inc_d,
314 e1, inc_e,
315 &norm );
316 */
317
318 // Search for a batch of eigenvalues, recursing on deflated
319 // subproblems whenever a split occurs. Iteration continues
320 // as long as:
321 // (a) there is still matrix left to operate on, and
322 // (b) the number of iterations performed in this batch is
323 // less than n_G.
324 // If/when either of the two above conditions fails to hold,
325 // the function returns.
326 n_deflations = FLA_Tevd_iteracc_n_opd_var1( m_A11,
327 n_G,
328 ijTL,
329 d1, inc_d,
330 e1, inc_e,
331 &n_iter_perf );
332
333 // Record the number of deflations that were observed.
334 total_deflations += n_deflations;
335
336 // Update the maximum number of iterations performed in the
337 // current sweep.
338 n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
339
340 #ifdef PRINTF
341 printf( "FLA_Tevd_n_opz_var1: deflations observed = %d\n", n_deflations );
342 printf( "FLA_Tevd_n_opz_var1: total deflations observed = %d\n", total_deflations );
343 printf( "FLA_Tevd_n_opz_var1: num iterations performed = %d\n", n_iter_perf );
344 #endif
345
346 // Store the most recent value of ijBR in m_G_sweep_max.
347 // When the sweep is done, this value will contain the minimum
348 // number of rows of G we can apply and safely include all
349 // non-identity rotations that were computed during the
350 // eigenvalue searches.
351 m_G_sweep_max = ijBR;
352
353 // Make sure we haven't exceeded our maximum iteration count.
354 if ( n_iter_prev >= m_A * n_iter_max )
355 {
356 #ifdef PRINTF
357 printf( "FLA_Tevd_n_opz_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
358 #endif
359 FLA_Abort();
360 //return FLA_FAILURE;
361 }
362 }
363
364 // The sweep is complete.
365
366 // Increment the total number of iterations previously performed.
367 n_iter_prev += n_iter_perf_sweep_max;
368
369 #ifdef PRINTF
370 printf( "FLA_Tevd_n_opz_var1: total number of iterations performed: %d\n", n_iter_prev );
371 #endif
372 }
373
374 //return FLA_SUCCESS;
375 return n_iter_prev;
376 }
377
378