1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12 #include "test_libflame.h"
13
14 #define NUM_PARAM_COMBOS 2
15 #define NUM_MATRIX_ARGS 3
16 #define FIRST_VARIANT 1
17 #define LAST_VARIANT 5
18
19 // Static variables.
20 static char* op_str = "Reduction to bidiagonal form via UT transform";
21 static char* fla_front_str = "FLA_Bidiag_UT";
22 static char* fla_unb_var_str = "unb_var";
23 static char* fla_opt_var_str = "opt_var";
24 static char* fla_blk_var_str = "blk_var";
25 static char* pc_str[NUM_PARAM_COMBOS] = { "l", "u" };
26 static test_thresh_t thresh = { 1e-03, 1e-04, // warn, pass for s
27 1e-12, 1e-13, // warn, pass for d
28 1e-03, 1e-04, // warn, pass for c
29 1e-12, 1e-13 }; // warn, pass for z
30
31 static fla_bidiagut_t* bidiagut_cntl_opt;
32 static fla_bidiagut_t* bidiagut_cntl_unb;
33 static fla_bidiagut_t* bidiagut_cntl_blk;
34 static fla_blocksize_t* bidiagut_cntl_bsize;
35
36 // Local prototypes.
37 void libfla_test_bidiagut_experiment( test_params_t params,
38 unsigned int var,
39 char* sc_str,
40 FLA_Datatype datatype,
41 unsigned int p,
42 unsigned int pci,
43 unsigned int n_repeats,
44 signed int impl,
45 double* perf,
46 double* residual );
47 void libfla_test_bidiagut_impl( int impl,
48 FLA_Obj A,
49 FLA_Obj TU,
50 FLA_Obj TV );
51 void libfla_test_bidiagut_cntl_create( unsigned int var,
52 dim_t b_alg_flat );
53 void libfla_test_bidiagut_cntl_free( void );
54
55
libfla_test_bidiagut(FILE * output_stream,test_params_t params,test_op_t op)56 void libfla_test_bidiagut( FILE* output_stream, test_params_t params, test_op_t op )
57 {
58 libfla_test_output_info( "--- %s ---\n", op_str );
59 libfla_test_output_info( "\n" );
60
61 if ( op.fla_front == ENABLE )
62 {
63 //libfla_test_output_info( "%s() front-end...\n", fla_front_str );
64 //libfla_test_output_info( "\n" );
65 libfla_test_op_driver( fla_front_str, NULL,
66 FIRST_VARIANT, LAST_VARIANT,
67 NUM_PARAM_COMBOS, pc_str,
68 NUM_MATRIX_ARGS,
69 FLA_TEST_FLAT_FRONT_END,
70 params, thresh, libfla_test_bidiagut_experiment );
71 }
72
73 if ( op.fla_unb_vars == ENABLE )
74 {
75 //libfla_test_output_info( "%s() unblocked variants...\n", fla_front_str );
76 //libfla_test_output_info( "\n" );
77 libfla_test_op_driver( fla_front_str, fla_unb_var_str,
78 FIRST_VARIANT, LAST_VARIANT,
79 NUM_PARAM_COMBOS, pc_str,
80 NUM_MATRIX_ARGS,
81 FLA_TEST_FLAT_UNB_VAR,
82 params, thresh, libfla_test_bidiagut_experiment );
83 }
84
85 if ( op.fla_opt_vars == ENABLE )
86 {
87 //libfla_test_output_info( "%s() optimized unblocked variants...\n", fla_front_str );
88 //libfla_test_output_info( "\n" );
89 libfla_test_op_driver( fla_front_str, fla_opt_var_str,
90 FIRST_VARIANT, LAST_VARIANT,
91 NUM_PARAM_COMBOS, pc_str,
92 NUM_MATRIX_ARGS,
93 FLA_TEST_FLAT_OPT_VAR,
94 params, thresh, libfla_test_bidiagut_experiment );
95 }
96
97 if ( op.fla_blk_vars == ENABLE )
98 {
99 //libfla_test_output_info( "%s() blocked variants...\n", fla_front_str );
100 //libfla_test_output_info( "\n" );
101 libfla_test_op_driver( fla_front_str, fla_blk_var_str,
102 FIRST_VARIANT, LAST_VARIANT,
103 NUM_PARAM_COMBOS, pc_str,
104 NUM_MATRIX_ARGS,
105 FLA_TEST_FLAT_BLK_VAR,
106 params, thresh, libfla_test_bidiagut_experiment );
107 }
108
109 }
110
111
112
libfla_test_bidiagut_experiment(test_params_t params,unsigned int var,char * sc_str,FLA_Datatype datatype,unsigned int p_cur,unsigned int pci,unsigned int n_repeats,signed int impl,double * perf,double * residual)113 void libfla_test_bidiagut_experiment( test_params_t params,
114 unsigned int var,
115 char* sc_str,
116 FLA_Datatype datatype,
117 unsigned int p_cur,
118 unsigned int pci,
119 unsigned int n_repeats,
120 signed int impl,
121 double* perf,
122 double* residual )
123 {
124 dim_t b_alg_flat = params.b_alg_flat;
125 double time_min = 1e9;
126 double time;
127 unsigned int i;
128 unsigned int m, n, min_m_n;
129 signed int m_input = -1;
130 signed int n_input = -2;
131 FLA_Obj A, TU, TV, WU, WV, QUh, QV, AQV, QUhAQV, norm;
132 FLA_Obj AL, AR;
133 FLA_Obj QVL, QVR;
134 FLA_Obj A_save;
135
136 // Return early for 'l' parameter case since it does not yet exist.
137 if ( pc_str[pci][0] == 'l' )
138 {
139 *perf = 0.0;
140 *residual = 0.0;
141 return;
142 }
143
144 // Determine the dimensions.
145 if ( m_input < 0 ) m = p_cur / abs(m_input);
146 else m = p_cur;
147 if ( n_input < 0 ) n = p_cur / abs(n_input);
148 else n = p_cur;
149
150 /// Uncomment when lower bidiagonal case is implemented.
151 /// This code swaps the m and n dimension because we want to use "tall"
152 /// rectangles for the upper bidiagonal case and "fat" rectangles for the
153 /// lower bidiagonal case. The ratios are hard-coded as m = -1 and n = -2
154 /// which only works for one or the other, depending on whether we divide
155 /// or multiply when computing m and n above.
156 /*
157 // Swap the larger and smaller dimensions for the lower bidiagonal case.
158 if ( pc_str[pci][0] == 'u' )
159 {
160 // Do nothing.
161 ;
162 }
163 else // if ( pc_str[pci][0] == 'l' )
164 {
165 unsigned int m_temp = n;
166 unsigned int n_temp = m;
167 m = m_temp;
168 n = n_temp;
169 }
170 */
171
172 // Compute the minimum dimension of A.
173 min_m_n = min( m, n );
174
175 // Create the matrices for the current operation.
176 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], m, n, &A );
177
178 if ( impl == FLA_TEST_FLAT_FRONT_END ||
179 impl == FLA_TEST_FLAT_BLK_VAR )
180 {
181 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], b_alg_flat, min_m_n, &TU );
182 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[2], b_alg_flat, min_m_n, &TV );
183 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], b_alg_flat, m, &WU );
184 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[2], b_alg_flat, n, &WV );
185 }
186 else
187 {
188 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], min_m_n, min_m_n, &TU );
189 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[2], min_m_n, min_m_n, &TV );
190 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], min_m_n, m, &WU );
191 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[2], min_m_n, n, &WV );
192 }
193
194 // Initialize the test matrices.
195 FLA_Random_matrix( A );
196
197 // Save the original object contents in a temporary object.
198 FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_save );
199
200 // Create auxiliary matrices to be used when checking the result.
201 FLA_Obj_create( datatype, m, m, 0, 0, &QUh );
202 FLA_Obj_create( datatype, n, n, 0, 0, &QV );
203 FLA_Obj_create( datatype, m, n, 0, 0, &AQV );
204 FLA_Obj_create( datatype, m, n, 0, 0, &QUhAQV );
205
206 // Create a real scalar object to hold the norm of A.
207 FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );
208
209 // Create a control tree for the individual variants.
210 if ( impl == FLA_TEST_FLAT_UNB_VAR ||
211 impl == FLA_TEST_FLAT_OPT_VAR ||
212 impl == FLA_TEST_FLAT_BLK_VAR )
213 libfla_test_bidiagut_cntl_create( var, b_alg_flat );
214
215 // Repeat the experiment n_repeats times and record results.
216 for ( i = 0; i < n_repeats; ++i )
217 {
218 FLA_Copy_external( A_save, A );
219
220 time = FLA_Clock();
221
222 libfla_test_bidiagut_impl( impl, A, TU, TV );
223
224 time = FLA_Clock() - time;
225 time_min = min( time_min, time );
226 }
227
228 // Free the control trees if we're testing the variants.
229 if ( impl == FLA_TEST_FLAT_UNB_VAR ||
230 impl == FLA_TEST_FLAT_OPT_VAR ||
231 impl == FLA_TEST_FLAT_BLK_VAR )
232 libfla_test_bidiagut_cntl_free();
233
234 // Compute the performance of the best experiment repeat.
235 *perf = ( 4.0 * n * n * ( m - n / 3.0 ) ) / time_min / FLOPS_PER_UNIT_PERF;
236 if ( FLA_Obj_is_complex( A ) ) *perf *= 4.0;
237
238 // Check the result by computing R - Q' A_orig Q.
239 if ( m >= n )
240 {
241 FLA_Set_to_identity( QUh );
242 FLA_Set_to_identity( QV );
243 FLA_Part_1x2( QV, &QVL, &QVR, 1, FLA_LEFT );
244 FLA_Part_1x2( A, &AL, &AR, 1, FLA_LEFT );
245 FLA_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
246 A, TU, WU, QUh );
247 FLA_Apply_Q_UT( FLA_RIGHT, FLA_NO_TRANSPOSE, FLA_FORWARD, FLA_ROWWISE,
248 AR, TV, WV, QVR );
249 FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
250 FLA_ONE, A_save, QV, FLA_ZERO, AQV );
251 FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
252 FLA_ONE, QUh, AQV, FLA_ZERO, QUhAQV );
253 FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A );
254 FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, AR );
255 *residual = FLA_Max_elemwise_diff( A, QUhAQV );
256 }
257 else // if ( m < n )
258 {
259 ;
260 }
261
262 // Free the supporting flat objects.
263 FLA_Obj_free( &WU );
264 FLA_Obj_free( &WV );
265 FLA_Obj_free( &QUh );
266 FLA_Obj_free( &QV );
267 FLA_Obj_free( &AQV );
268 FLA_Obj_free( &QUhAQV );
269 FLA_Obj_free( &norm );
270 FLA_Obj_free( &A_save );
271
272 // Free the flat test matrices.
273 FLA_Obj_free( &A );
274 FLA_Obj_free( &TU );
275 FLA_Obj_free( &TV );
276 }
277
278
279
libfla_test_bidiagut_cntl_create(unsigned int var,dim_t b_alg_flat)280 void libfla_test_bidiagut_cntl_create( unsigned int var,
281 dim_t b_alg_flat )
282 {
283 int var_unb = FLA_UNB_VAR_OFFSET + var;
284 int var_opt = FLA_OPT_VAR_OFFSET + var;
285 int var_blk = FLA_BLK_VAR_OFFSET + var;
286
287 bidiagut_cntl_bsize = FLA_Blocksize_create( b_alg_flat,
288 b_alg_flat,
289 b_alg_flat,
290 b_alg_flat );
291
292 bidiagut_cntl_unb = FLA_Cntl_bidiagut_obj_create( FLA_FLAT,
293 var_unb,
294 NULL );
295
296 bidiagut_cntl_opt = FLA_Cntl_bidiagut_obj_create( FLA_FLAT,
297 var_opt,
298 NULL );
299
300 bidiagut_cntl_blk = FLA_Cntl_bidiagut_obj_create( FLA_FLAT,
301 var_blk,
302 NULL );
303 }
304
305
306
libfla_test_bidiagut_cntl_free(void)307 void libfla_test_bidiagut_cntl_free( void )
308 {
309 FLA_Blocksize_free( bidiagut_cntl_bsize );
310
311 FLA_Cntl_obj_free( bidiagut_cntl_unb );
312 FLA_Cntl_obj_free( bidiagut_cntl_opt );
313 FLA_Cntl_obj_free( bidiagut_cntl_blk );
314 }
315
316
317
libfla_test_bidiagut_impl(int impl,FLA_Obj A,FLA_Obj TU,FLA_Obj TV)318 void libfla_test_bidiagut_impl( int impl,
319 FLA_Obj A,
320 FLA_Obj TU,
321 FLA_Obj TV )
322 {
323 switch ( impl )
324 {
325 case FLA_TEST_FLAT_FRONT_END:
326 FLA_Bidiag_UT( A, TU, TV );
327 break;
328
329 case FLA_TEST_FLAT_UNB_VAR:
330 FLA_Bidiag_UT_internal( A, TU, TV, bidiagut_cntl_unb );
331 break;
332
333 case FLA_TEST_FLAT_OPT_VAR:
334 FLA_Bidiag_UT_internal( A, TU, TV, bidiagut_cntl_opt );
335 break;
336
337 case FLA_TEST_FLAT_BLK_VAR:
338 FLA_Bidiag_UT_internal( A, TU, TV, bidiagut_cntl_blk );
339 break;
340
341 default:
342 libfla_test_output_error( "Invalid implementation type.\n" );
343 }
344 }
345
346