1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "FLAME.h"
12 #include "test_libflame.h"
13 
14 #define NUM_PARAM_COMBOS 2
15 #define NUM_MATRIX_ARGS  3
16 #define FIRST_VARIANT    1
17 #define LAST_VARIANT     5
18 
19 // Static variables.
20 static char* op_str                   = "Reduction to bidiagonal form via UT transform";
21 static char* fla_front_str            = "FLA_Bidiag_UT";
22 static char* fla_unb_var_str          = "unb_var";
23 static char* fla_opt_var_str          = "opt_var";
24 static char* fla_blk_var_str          = "blk_var";
25 static char* pc_str[NUM_PARAM_COMBOS] = { "l", "u" };
26 static test_thresh_t thresh           = { 1e-03, 1e-04,   // warn, pass for s
27                                           1e-12, 1e-13,   // warn, pass for d
28                                           1e-03, 1e-04,   // warn, pass for c
29                                           1e-12, 1e-13 }; // warn, pass for z
30 
31 static fla_bidiagut_t*  bidiagut_cntl_opt;
32 static fla_bidiagut_t*  bidiagut_cntl_unb;
33 static fla_bidiagut_t*  bidiagut_cntl_blk;
34 static fla_blocksize_t* bidiagut_cntl_bsize;
35 
36 // Local prototypes.
37 void libfla_test_bidiagut_experiment( test_params_t params,
38                                       unsigned int  var,
39                                       char*         sc_str,
40                                       FLA_Datatype  datatype,
41                                       unsigned int  p,
42                                       unsigned int  pci,
43                                       unsigned int  n_repeats,
44                                       signed int    impl,
45                                       double*       perf,
46                                       double*       residual );
47 void libfla_test_bidiagut_impl( int      impl,
48                                 FLA_Obj  A,
49                                 FLA_Obj  TU,
50                                 FLA_Obj  TV );
51 void libfla_test_bidiagut_cntl_create( unsigned int var,
52                                        dim_t        b_alg_flat );
53 void libfla_test_bidiagut_cntl_free( void );
54 
55 
libfla_test_bidiagut(FILE * output_stream,test_params_t params,test_op_t op)56 void libfla_test_bidiagut( FILE* output_stream, test_params_t params, test_op_t op )
57 {
58 	libfla_test_output_info( "--- %s ---\n", op_str );
59 	libfla_test_output_info( "\n" );
60 
61 	if ( op.fla_front == ENABLE )
62 	{
63 		//libfla_test_output_info( "%s() front-end...\n", fla_front_str );
64 		//libfla_test_output_info( "\n" );
65 		libfla_test_op_driver( fla_front_str, NULL,
66 		                       FIRST_VARIANT, LAST_VARIANT,
67 		                       NUM_PARAM_COMBOS, pc_str,
68 		                       NUM_MATRIX_ARGS,
69 		                       FLA_TEST_FLAT_FRONT_END,
70 		                       params, thresh, libfla_test_bidiagut_experiment );
71 	}
72 
73 	if ( op.fla_unb_vars == ENABLE )
74 	{
75 		//libfla_test_output_info( "%s() unblocked variants...\n", fla_front_str );
76 		//libfla_test_output_info( "\n" );
77 		libfla_test_op_driver( fla_front_str, fla_unb_var_str,
78 		                       FIRST_VARIANT, LAST_VARIANT,
79 		                       NUM_PARAM_COMBOS, pc_str,
80 		                       NUM_MATRIX_ARGS,
81 		                       FLA_TEST_FLAT_UNB_VAR,
82 		                       params, thresh, libfla_test_bidiagut_experiment );
83 	}
84 
85 	if ( op.fla_opt_vars == ENABLE )
86 	{
87 		//libfla_test_output_info( "%s() optimized unblocked variants...\n", fla_front_str );
88 		//libfla_test_output_info( "\n" );
89 		libfla_test_op_driver( fla_front_str, fla_opt_var_str,
90 		                       FIRST_VARIANT, LAST_VARIANT,
91 		                       NUM_PARAM_COMBOS, pc_str,
92 		                       NUM_MATRIX_ARGS,
93 		                       FLA_TEST_FLAT_OPT_VAR,
94 		                       params, thresh, libfla_test_bidiagut_experiment );
95 	}
96 
97 	if ( op.fla_blk_vars == ENABLE )
98 	{
99 		//libfla_test_output_info( "%s() blocked variants...\n", fla_front_str );
100 		//libfla_test_output_info( "\n" );
101 		libfla_test_op_driver( fla_front_str, fla_blk_var_str,
102 		                       FIRST_VARIANT, LAST_VARIANT,
103 		                       NUM_PARAM_COMBOS, pc_str,
104 		                       NUM_MATRIX_ARGS,
105 		                       FLA_TEST_FLAT_BLK_VAR,
106 		                       params, thresh, libfla_test_bidiagut_experiment );
107 	}
108 
109 }
110 
111 
112 
libfla_test_bidiagut_experiment(test_params_t params,unsigned int var,char * sc_str,FLA_Datatype datatype,unsigned int p_cur,unsigned int pci,unsigned int n_repeats,signed int impl,double * perf,double * residual)113 void libfla_test_bidiagut_experiment( test_params_t params,
114                                       unsigned int  var,
115                                       char*         sc_str,
116                                       FLA_Datatype  datatype,
117                                       unsigned int  p_cur,
118                                       unsigned int  pci,
119                                       unsigned int  n_repeats,
120                                       signed int    impl,
121                                       double*       perf,
122                                       double*       residual )
123 {
124 	dim_t        b_alg_flat = params.b_alg_flat;
125 	double       time_min   = 1e9;
126 	double       time;
127 	unsigned int i;
128 	unsigned int m, n, min_m_n;
129 	signed int   m_input    = -1;
130 	signed int   n_input    = -2;
131 	FLA_Obj      A, TU, TV, WU, WV, QUh, QV, AQV, QUhAQV, norm;
132 	FLA_Obj      AL, AR;
133 	FLA_Obj      QVL, QVR;
134 	FLA_Obj      A_save;
135 
136 	// Return early for 'l' parameter case since it does not yet exist.
137 	if ( pc_str[pci][0] == 'l' )
138 	{
139 		*perf     = 0.0;
140 		*residual = 0.0;
141 		return;
142 	}
143 
144 	// Determine the dimensions.
145 	if ( m_input < 0 ) m = p_cur / abs(m_input);
146 	else               m = p_cur;
147 	if ( n_input < 0 ) n = p_cur / abs(n_input);
148 	else               n = p_cur;
149 
150 /// Uncomment when lower bidiagonal case is implemented.
151 /// This code swaps the m and n dimension because we want to use "tall"
152 /// rectangles for the upper bidiagonal case and "fat" rectangles for the
153 /// lower bidiagonal case. The ratios are hard-coded as m = -1 and n = -2
154 /// which only works for one or the other, depending on whether we divide
155 /// or multiply when computing m and n above.
156 /*
157 	// Swap the larger and smaller dimensions for the lower bidiagonal case.
158 	if ( pc_str[pci][0] == 'u' )
159 	{
160 		// Do nothing.
161 		;
162 	}
163 	else // if ( pc_str[pci][0] == 'l' )
164 	{
165 		unsigned int m_temp = n;
166 		unsigned int n_temp = m;
167 		m = m_temp;
168 		n = n_temp;
169 	}
170 */
171 
172 	// Compute the minimum dimension of A.
173 	min_m_n = min( m, n );
174 
175 	// Create the matrices for the current operation.
176 	libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], m, n, &A );
177 
178 	if ( impl == FLA_TEST_FLAT_FRONT_END ||
179 	     impl == FLA_TEST_FLAT_BLK_VAR )
180 	{
181 		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], b_alg_flat, min_m_n, &TU );
182 		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[2], b_alg_flat, min_m_n, &TV );
183 		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], b_alg_flat,       m, &WU );
184 		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[2], b_alg_flat,       n, &WV );
185 	}
186 	else
187 	{
188 		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], min_m_n, min_m_n, &TU );
189 		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[2], min_m_n, min_m_n, &TV );
190 		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], min_m_n,       m, &WU );
191 		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[2], min_m_n,       n, &WV );
192 	}
193 
194 	// Initialize the test matrices.
195 	FLA_Random_matrix( A );
196 
197 	// Save the original object contents in a temporary object.
198 	FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_save );
199 
200 	// Create auxiliary matrices to be used when checking the result.
201 	FLA_Obj_create( datatype, m, m, 0, 0, &QUh );
202 	FLA_Obj_create( datatype, n, n, 0, 0, &QV );
203 	FLA_Obj_create( datatype, m, n, 0, 0, &AQV );
204 	FLA_Obj_create( datatype, m, n, 0, 0, &QUhAQV );
205 
206 	// Create a real scalar object to hold the norm of A.
207 	FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );
208 
209 	// Create a control tree for the individual variants.
210 	if ( impl == FLA_TEST_FLAT_UNB_VAR ||
211 	     impl == FLA_TEST_FLAT_OPT_VAR ||
212 	     impl == FLA_TEST_FLAT_BLK_VAR )
213 		libfla_test_bidiagut_cntl_create( var, b_alg_flat );
214 
215 	// Repeat the experiment n_repeats times and record results.
216 	for ( i = 0; i < n_repeats; ++i )
217 	{
218 		FLA_Copy_external( A_save, A );
219 
220 		time = FLA_Clock();
221 
222 		libfla_test_bidiagut_impl( impl, A, TU, TV );
223 
224 		time = FLA_Clock() - time;
225 		time_min = min( time_min, time );
226 	}
227 
228 	// Free the control trees if we're testing the variants.
229 	if ( impl == FLA_TEST_FLAT_UNB_VAR ||
230 	     impl == FLA_TEST_FLAT_OPT_VAR ||
231 	     impl == FLA_TEST_FLAT_BLK_VAR )
232 		libfla_test_bidiagut_cntl_free();
233 
234 	// Compute the performance of the best experiment repeat.
235 	*perf = ( 4.0 * n * n * ( m - n / 3.0 ) ) / time_min / FLOPS_PER_UNIT_PERF;
236 	if ( FLA_Obj_is_complex( A ) ) *perf *= 4.0;
237 
238 	// Check the result by computing R - Q' A_orig Q.
239 	if ( m >= n )
240 	{
241 		FLA_Set_to_identity( QUh );
242 		FLA_Set_to_identity( QV );
243 		FLA_Part_1x2( QV,   &QVL, &QVR,    1, FLA_LEFT );
244 		FLA_Part_1x2( A,    &AL,  &AR,     1, FLA_LEFT );
245 		FLA_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
246 		                A, TU, WU, QUh );
247 		FLA_Apply_Q_UT( FLA_RIGHT, FLA_NO_TRANSPOSE, FLA_FORWARD, FLA_ROWWISE,
248 		                AR, TV, WV, QVR );
249 		FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
250 		          FLA_ONE, A_save, QV, FLA_ZERO, AQV );
251 		FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
252 		          FLA_ONE, QUh, AQV, FLA_ZERO, QUhAQV );
253 		FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A );
254 		FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, AR );
255 		*residual = FLA_Max_elemwise_diff( A, QUhAQV );
256 	}
257 	else // if ( m < n )
258 	{
259 		;
260 	}
261 
262 	// Free the supporting flat objects.
263 	FLA_Obj_free( &WU );
264 	FLA_Obj_free( &WV );
265 	FLA_Obj_free( &QUh );
266 	FLA_Obj_free( &QV );
267 	FLA_Obj_free( &AQV );
268 	FLA_Obj_free( &QUhAQV );
269 	FLA_Obj_free( &norm );
270 	FLA_Obj_free( &A_save );
271 
272 	// Free the flat test matrices.
273 	FLA_Obj_free( &A );
274 	FLA_Obj_free( &TU );
275 	FLA_Obj_free( &TV );
276 }
277 
278 
279 
libfla_test_bidiagut_cntl_create(unsigned int var,dim_t b_alg_flat)280 void libfla_test_bidiagut_cntl_create( unsigned int var,
281                                        dim_t        b_alg_flat )
282 {
283 	int var_unb  = FLA_UNB_VAR_OFFSET + var;
284 	int var_opt  = FLA_OPT_VAR_OFFSET + var;
285 	int var_blk  = FLA_BLK_VAR_OFFSET + var;
286 
287 	bidiagut_cntl_bsize = FLA_Blocksize_create( b_alg_flat,
288 	                                            b_alg_flat,
289 	                                            b_alg_flat,
290 	                                            b_alg_flat );
291 
292 	bidiagut_cntl_unb   = FLA_Cntl_bidiagut_obj_create( FLA_FLAT,
293 	                                                    var_unb,
294 	                                                    NULL );
295 
296 	bidiagut_cntl_opt   = FLA_Cntl_bidiagut_obj_create( FLA_FLAT,
297 	                                                    var_opt,
298 	                                                    NULL );
299 
300 	bidiagut_cntl_blk   = FLA_Cntl_bidiagut_obj_create( FLA_FLAT,
301 	                                                    var_blk,
302 	                                                    NULL );
303 }
304 
305 
306 
libfla_test_bidiagut_cntl_free(void)307 void libfla_test_bidiagut_cntl_free( void )
308 {
309 	FLA_Blocksize_free( bidiagut_cntl_bsize );
310 
311 	FLA_Cntl_obj_free( bidiagut_cntl_unb );
312 	FLA_Cntl_obj_free( bidiagut_cntl_opt );
313 	FLA_Cntl_obj_free( bidiagut_cntl_blk );
314 }
315 
316 
317 
libfla_test_bidiagut_impl(int impl,FLA_Obj A,FLA_Obj TU,FLA_Obj TV)318 void libfla_test_bidiagut_impl( int      impl,
319                                 FLA_Obj  A,
320                                 FLA_Obj  TU,
321                                 FLA_Obj  TV )
322 {
323 	switch ( impl )
324 	{
325 		case FLA_TEST_FLAT_FRONT_END:
326 		FLA_Bidiag_UT( A, TU, TV );
327 		break;
328 
329 		case FLA_TEST_FLAT_UNB_VAR:
330 		FLA_Bidiag_UT_internal( A, TU, TV, bidiagut_cntl_unb );
331 		break;
332 
333 		case FLA_TEST_FLAT_OPT_VAR:
334 		FLA_Bidiag_UT_internal( A, TU, TV, bidiagut_cntl_opt );
335 		break;
336 
337 		case FLA_TEST_FLAT_BLK_VAR:
338 		FLA_Bidiag_UT_internal( A, TU, TV, bidiagut_cntl_blk );
339 		break;
340 
341 		default:
342 		libfla_test_output_error( "Invalid implementation type.\n" );
343 	}
344 }
345 
346