1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "FLAME.h"
12 #include "test_libflame.h"
13 
14 #define NUM_PARAM_COMBOS 2
15 #define NUM_MATRIX_ARGS  2
16 #define FIRST_VARIANT    1
17 #define LAST_VARIANT     3
18 
19 // Static variables.
20 static char* op_str                   = "Reduction to tridiagonal form via UT transform";
21 static char* fla_front_str            = "FLA_Tridiag_UT";
22 static char* fla_unb_var_str          = "unb_var";
23 static char* fla_opt_var_str          = "opt_var";
24 static char* fla_blk_var_str          = "blk_var";
25 static char* pc_str[NUM_PARAM_COMBOS] = { "l", "u" };
26 static test_thresh_t thresh           = { 1e-03, 1e-04,   // warn, pass for s
27                                           1e-12, 1e-13,   // warn, pass for d
28                                           1e-03, 1e-04,   // warn, pass for c
29                                           1e-12, 1e-13 }; // warn, pass for z
30 
31 static fla_tridiagut_t* tridiagut_cntl_opt;
32 static fla_tridiagut_t* tridiagut_cntl_unb;
33 static fla_tridiagut_t* tridiagut_cntl_blk;
34 static fla_blocksize_t* tridiagut_cntl_bsize;
35 
36 // Local prototypes.
37 void libfla_test_tridiagut_experiment( test_params_t params,
38                                        unsigned int  var,
39                                        char*         sc_str,
40                                        FLA_Datatype  datatype,
41                                        unsigned int  p,
42                                        unsigned int  pci,
43                                        unsigned int  n_repeats,
44                                        signed int    impl,
45                                        double*       perf,
46                                        double*       residual );
47 void libfla_test_tridiagut_impl( int      impl,
48                                  FLA_Uplo uplo,
49                                  FLA_Obj  A,
50                                  FLA_Obj  T );
51 void libfla_test_tridiagut_cntl_create( unsigned int var,
52                                         dim_t        b_alg_flat );
53 void libfla_test_tridiagut_cntl_free( void );
54 
55 
libfla_test_tridiagut(FILE * output_stream,test_params_t params,test_op_t op)56 void libfla_test_tridiagut( FILE* output_stream, test_params_t params, test_op_t op )
57 {
58 	libfla_test_output_info( "--- %s ---\n", op_str );
59 	libfla_test_output_info( "\n" );
60 
61 	if ( op.fla_front == ENABLE )
62 	{
63 		//libfla_test_output_info( "%s() front-end...\n", fla_front_str );
64 		//libfla_test_output_info( "\n" );
65 		libfla_test_op_driver( fla_front_str, NULL,
66 		                       FIRST_VARIANT, LAST_VARIANT,
67 		                       NUM_PARAM_COMBOS, pc_str,
68 		                       NUM_MATRIX_ARGS,
69 		                       FLA_TEST_FLAT_FRONT_END,
70 		                       params, thresh, libfla_test_tridiagut_experiment );
71 	}
72 
73 	if ( op.fla_unb_vars == ENABLE )
74 	{
75 		//libfla_test_output_info( "%s() unblocked variants...\n", fla_front_str );
76 		//libfla_test_output_info( "\n" );
77 		libfla_test_op_driver( fla_front_str, fla_unb_var_str,
78 		                       FIRST_VARIANT, LAST_VARIANT,
79 		                       NUM_PARAM_COMBOS, pc_str,
80 		                       NUM_MATRIX_ARGS,
81 		                       FLA_TEST_FLAT_UNB_VAR,
82 		                       params, thresh, libfla_test_tridiagut_experiment );
83 	}
84 
85 	if ( op.fla_opt_vars == ENABLE )
86 	{
87 		//libfla_test_output_info( "%s() optimized unblocked variants...\n", fla_front_str );
88 		//libfla_test_output_info( "\n" );
89 		libfla_test_op_driver( fla_front_str, fla_opt_var_str,
90 		                       FIRST_VARIANT, LAST_VARIANT,
91 		                       NUM_PARAM_COMBOS, pc_str,
92 		                       NUM_MATRIX_ARGS,
93 		                       FLA_TEST_FLAT_OPT_VAR,
94 		                       params, thresh, libfla_test_tridiagut_experiment );
95 	}
96 
97 	if ( op.fla_blk_vars == ENABLE )
98 	{
99 		//libfla_test_output_info( "%s() blocked variants...\n", fla_front_str );
100 		//libfla_test_output_info( "\n" );
101 		libfla_test_op_driver( fla_front_str, fla_blk_var_str,
102 		                       FIRST_VARIANT, LAST_VARIANT,
103 		                       NUM_PARAM_COMBOS, pc_str,
104 		                       NUM_MATRIX_ARGS,
105 		                       FLA_TEST_FLAT_BLK_VAR,
106 		                       params, thresh, libfla_test_tridiagut_experiment );
107 	}
108 
109 }
110 
111 
112 
libfla_test_tridiagut_experiment(test_params_t params,unsigned int var,char * sc_str,FLA_Datatype datatype,unsigned int p_cur,unsigned int pci,unsigned int n_repeats,signed int impl,double * perf,double * residual)113 void libfla_test_tridiagut_experiment( test_params_t params,
114                                        unsigned int  var,
115                                        char*         sc_str,
116                                        FLA_Datatype  datatype,
117                                        unsigned int  p_cur,
118                                        unsigned int  pci,
119                                        unsigned int  n_repeats,
120                                        signed int    impl,
121                                        double*       perf,
122                                        double*       residual )
123 {
124 	dim_t        b_alg_flat = params.b_alg_flat;
125 	double       time_min   = 1e9;
126 	double       time;
127 	unsigned int i;
128 	unsigned int m;
129 	signed int   m_input    = -1;
130 	FLA_Uplo     uplo;
131 	FLA_Obj      A, T, W, Qh, AQ, QhAQ, norm;
132 	FLA_Obj      AT, AB;
133 	FLA_Obj      QhT, QhB;
134 	FLA_Obj      A_save;
135 
136 	// Return early for 'u' parameter case since it does not yet exist.
137 	if ( pc_str[pci][0] == 'u' )
138 	{
139 		*perf     = 0.0;
140 		*residual = 0.0;
141 		return;
142 	}
143 
144 	// Determine the dimensions.
145 	if ( m_input < 0 ) m = p_cur * abs(m_input);
146 	else               m = p_cur;
147 
148 	// Translate parameter characters to libflame constants.
149 	FLA_Param_map_char_to_flame_uplo( &pc_str[pci][0], &uplo );
150 
151 	// Create the matrices for the current operation.
152 	libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], m, m, &A );
153 
154 	if ( impl == FLA_TEST_FLAT_FRONT_END ||
155 	     impl == FLA_TEST_FLAT_BLK_VAR )
156 	{
157 		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], b_alg_flat, m, &T );
158 		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], b_alg_flat, m, &W );
159 	}
160 	else
161 	{
162 		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], m, m, &T );
163 		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], m, m, &W );
164 	}
165 
166 	// Initialize the test matrices.
167 	FLA_Random_spd_matrix( uplo, A );
168 	FLA_Hermitianize( uplo, A );
169 
170 	// Save the original object contents in a temporary object.
171 	FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_save );
172 
173 	// Create auxiliary matrices to be used when checking the result.
174 	FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Qh );
175 	FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &AQ );
176 	FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &QhAQ );
177 
178 	// Create a real scalar object to hold the norm of A.
179 	FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );
180 
181 	// Create a control tree for the individual variants.
182 	if ( impl == FLA_TEST_FLAT_UNB_VAR ||
183 	     impl == FLA_TEST_FLAT_OPT_VAR ||
184 	     impl == FLA_TEST_FLAT_BLK_VAR )
185 		libfla_test_tridiagut_cntl_create( var, b_alg_flat );
186 
187 	// Repeat the experiment n_repeats times and record results.
188 	for ( i = 0; i < n_repeats; ++i )
189 	{
190 		FLA_Copy_external( A_save, A );
191 
192 		time = FLA_Clock();
193 
194 		libfla_test_tridiagut_impl( impl, uplo, A, T );
195 
196 		time = FLA_Clock() - time;
197 		time_min = min( time_min, time );
198 	}
199 
200 	// Free the control trees if we're testing the variants.
201 	if ( impl == FLA_TEST_FLAT_UNB_VAR ||
202 	     impl == FLA_TEST_FLAT_OPT_VAR ||
203 	     impl == FLA_TEST_FLAT_BLK_VAR )
204 		libfla_test_tridiagut_cntl_free();
205 
206 	// Compute the performance of the best experiment repeat.
207 	*perf = ( 4.0 / 3.0 * m * m * m ) / time_min / FLOPS_PER_UNIT_PERF;
208 	if ( FLA_Obj_is_complex( A ) ) *perf *= 4.0;
209 
210 	// Check the result by computing R - Q' A_orig Q.
211 	if ( uplo == FLA_LOWER_TRIANGULAR )
212 	{
213 		FLA_Set_to_identity( Qh );
214 		FLA_Part_2x1( Qh,   &QhT,
215 		                    &QhB,   1, FLA_TOP );
216 		FLA_Part_2x1( A,    &AT,
217 		                    &AB,    1, FLA_TOP );
218 		FLA_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
219 		                AB, T, W, QhB );
220 		FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
221 		          FLA_ONE, A_save, Qh, FLA_ZERO, AQ );
222 		FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
223 		          FLA_ONE, Qh, AQ, FLA_ZERO, QhAQ );
224 		FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, AB );
225 		FLA_Hermitianize( FLA_LOWER_TRIANGULAR, A );
226 		*residual = FLA_Max_elemwise_diff( A, QhAQ );
227 	}
228 	else
229 	{
230 		;
231 	}
232 
233 	// Free the supporting flat objects.
234 	FLA_Obj_free( &W );
235 	FLA_Obj_free( &Qh );
236 	FLA_Obj_free( &AQ );
237 	FLA_Obj_free( &QhAQ );
238 	FLA_Obj_free( &norm );
239 	FLA_Obj_free( &A_save );
240 
241 	// Free the flat test matrices.
242 	FLA_Obj_free( &A );
243 	FLA_Obj_free( &T );
244 }
245 
246 
247 
libfla_test_tridiagut_cntl_create(unsigned int var,dim_t b_alg_flat)248 void libfla_test_tridiagut_cntl_create( unsigned int var,
249                                         dim_t        b_alg_flat )
250 {
251 	int var_unb  = FLA_UNB_VAR_OFFSET + var;
252 	int var_opt  = FLA_OPT_VAR_OFFSET + var;
253 	int var_blk  = FLA_BLK_VAR_OFFSET + var;
254 
255 	tridiagut_cntl_bsize = FLA_Blocksize_create( b_alg_flat,
256 	                                             b_alg_flat,
257 	                                             b_alg_flat,
258 	                                             b_alg_flat );
259 
260 	tridiagut_cntl_unb   = FLA_Cntl_tridiagut_obj_create( FLA_FLAT,
261 	                                                      var_unb,
262 	                                                      NULL );
263 
264 	tridiagut_cntl_opt   = FLA_Cntl_tridiagut_obj_create( FLA_FLAT,
265 	                                                      var_opt,
266 	                                                      NULL );
267 
268 	tridiagut_cntl_blk   = FLA_Cntl_tridiagut_obj_create( FLA_FLAT,
269 	                                                      var_blk,
270 	                                                      NULL );
271 }
272 
273 
274 
libfla_test_tridiagut_cntl_free(void)275 void libfla_test_tridiagut_cntl_free( void )
276 {
277 	FLA_Blocksize_free( tridiagut_cntl_bsize );
278 
279 	FLA_Cntl_obj_free( tridiagut_cntl_unb );
280 	FLA_Cntl_obj_free( tridiagut_cntl_opt );
281 	FLA_Cntl_obj_free( tridiagut_cntl_blk );
282 }
283 
284 
285 
libfla_test_tridiagut_impl(int impl,FLA_Uplo uplo,FLA_Obj A,FLA_Obj T)286 void libfla_test_tridiagut_impl( int      impl,
287                                  FLA_Uplo uplo,
288                                  FLA_Obj  A,
289                                  FLA_Obj  T )
290 {
291 	switch ( impl )
292 	{
293 		case FLA_TEST_FLAT_FRONT_END:
294 		FLA_Tridiag_UT( uplo, A, T );
295 		break;
296 
297 		case FLA_TEST_FLAT_UNB_VAR:
298 		FLA_Tridiag_UT_internal( uplo, A, T, tridiagut_cntl_unb );
299 		break;
300 
301 		case FLA_TEST_FLAT_OPT_VAR:
302 		FLA_Tridiag_UT_internal( uplo, A, T, tridiagut_cntl_opt );
303 		break;
304 
305 		case FLA_TEST_FLAT_BLK_VAR:
306 		FLA_Tridiag_UT_internal( uplo, A, T, tridiagut_cntl_blk );
307 		break;
308 
309 		default:
310 		libfla_test_output_error( "Invalid implementation type.\n" );
311 	}
312 }
313 
314