1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12 #include "test_libflame.h"
13
14 #define NUM_PARAM_COMBOS 2
15 #define NUM_MATRIX_ARGS 2
16 #define FIRST_VARIANT 1
17 #define LAST_VARIANT 3
18
19 // Static variables.
20 static char* op_str = "Reduction to tridiagonal form via UT transform";
21 static char* fla_front_str = "FLA_Tridiag_UT";
22 static char* fla_unb_var_str = "unb_var";
23 static char* fla_opt_var_str = "opt_var";
24 static char* fla_blk_var_str = "blk_var";
25 static char* pc_str[NUM_PARAM_COMBOS] = { "l", "u" };
26 static test_thresh_t thresh = { 1e-03, 1e-04, // warn, pass for s
27 1e-12, 1e-13, // warn, pass for d
28 1e-03, 1e-04, // warn, pass for c
29 1e-12, 1e-13 }; // warn, pass for z
30
31 static fla_tridiagut_t* tridiagut_cntl_opt;
32 static fla_tridiagut_t* tridiagut_cntl_unb;
33 static fla_tridiagut_t* tridiagut_cntl_blk;
34 static fla_blocksize_t* tridiagut_cntl_bsize;
35
36 // Local prototypes.
37 void libfla_test_tridiagut_experiment( test_params_t params,
38 unsigned int var,
39 char* sc_str,
40 FLA_Datatype datatype,
41 unsigned int p,
42 unsigned int pci,
43 unsigned int n_repeats,
44 signed int impl,
45 double* perf,
46 double* residual );
47 void libfla_test_tridiagut_impl( int impl,
48 FLA_Uplo uplo,
49 FLA_Obj A,
50 FLA_Obj T );
51 void libfla_test_tridiagut_cntl_create( unsigned int var,
52 dim_t b_alg_flat );
53 void libfla_test_tridiagut_cntl_free( void );
54
55
libfla_test_tridiagut(FILE * output_stream,test_params_t params,test_op_t op)56 void libfla_test_tridiagut( FILE* output_stream, test_params_t params, test_op_t op )
57 {
58 libfla_test_output_info( "--- %s ---\n", op_str );
59 libfla_test_output_info( "\n" );
60
61 if ( op.fla_front == ENABLE )
62 {
63 //libfla_test_output_info( "%s() front-end...\n", fla_front_str );
64 //libfla_test_output_info( "\n" );
65 libfla_test_op_driver( fla_front_str, NULL,
66 FIRST_VARIANT, LAST_VARIANT,
67 NUM_PARAM_COMBOS, pc_str,
68 NUM_MATRIX_ARGS,
69 FLA_TEST_FLAT_FRONT_END,
70 params, thresh, libfla_test_tridiagut_experiment );
71 }
72
73 if ( op.fla_unb_vars == ENABLE )
74 {
75 //libfla_test_output_info( "%s() unblocked variants...\n", fla_front_str );
76 //libfla_test_output_info( "\n" );
77 libfla_test_op_driver( fla_front_str, fla_unb_var_str,
78 FIRST_VARIANT, LAST_VARIANT,
79 NUM_PARAM_COMBOS, pc_str,
80 NUM_MATRIX_ARGS,
81 FLA_TEST_FLAT_UNB_VAR,
82 params, thresh, libfla_test_tridiagut_experiment );
83 }
84
85 if ( op.fla_opt_vars == ENABLE )
86 {
87 //libfla_test_output_info( "%s() optimized unblocked variants...\n", fla_front_str );
88 //libfla_test_output_info( "\n" );
89 libfla_test_op_driver( fla_front_str, fla_opt_var_str,
90 FIRST_VARIANT, LAST_VARIANT,
91 NUM_PARAM_COMBOS, pc_str,
92 NUM_MATRIX_ARGS,
93 FLA_TEST_FLAT_OPT_VAR,
94 params, thresh, libfla_test_tridiagut_experiment );
95 }
96
97 if ( op.fla_blk_vars == ENABLE )
98 {
99 //libfla_test_output_info( "%s() blocked variants...\n", fla_front_str );
100 //libfla_test_output_info( "\n" );
101 libfla_test_op_driver( fla_front_str, fla_blk_var_str,
102 FIRST_VARIANT, LAST_VARIANT,
103 NUM_PARAM_COMBOS, pc_str,
104 NUM_MATRIX_ARGS,
105 FLA_TEST_FLAT_BLK_VAR,
106 params, thresh, libfla_test_tridiagut_experiment );
107 }
108
109 }
110
111
112
libfla_test_tridiagut_experiment(test_params_t params,unsigned int var,char * sc_str,FLA_Datatype datatype,unsigned int p_cur,unsigned int pci,unsigned int n_repeats,signed int impl,double * perf,double * residual)113 void libfla_test_tridiagut_experiment( test_params_t params,
114 unsigned int var,
115 char* sc_str,
116 FLA_Datatype datatype,
117 unsigned int p_cur,
118 unsigned int pci,
119 unsigned int n_repeats,
120 signed int impl,
121 double* perf,
122 double* residual )
123 {
124 dim_t b_alg_flat = params.b_alg_flat;
125 double time_min = 1e9;
126 double time;
127 unsigned int i;
128 unsigned int m;
129 signed int m_input = -1;
130 FLA_Uplo uplo;
131 FLA_Obj A, T, W, Qh, AQ, QhAQ, norm;
132 FLA_Obj AT, AB;
133 FLA_Obj QhT, QhB;
134 FLA_Obj A_save;
135
136 // Return early for 'u' parameter case since it does not yet exist.
137 if ( pc_str[pci][0] == 'u' )
138 {
139 *perf = 0.0;
140 *residual = 0.0;
141 return;
142 }
143
144 // Determine the dimensions.
145 if ( m_input < 0 ) m = p_cur * abs(m_input);
146 else m = p_cur;
147
148 // Translate parameter characters to libflame constants.
149 FLA_Param_map_char_to_flame_uplo( &pc_str[pci][0], &uplo );
150
151 // Create the matrices for the current operation.
152 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], m, m, &A );
153
154 if ( impl == FLA_TEST_FLAT_FRONT_END ||
155 impl == FLA_TEST_FLAT_BLK_VAR )
156 {
157 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], b_alg_flat, m, &T );
158 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], b_alg_flat, m, &W );
159 }
160 else
161 {
162 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], m, m, &T );
163 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], m, m, &W );
164 }
165
166 // Initialize the test matrices.
167 FLA_Random_spd_matrix( uplo, A );
168 FLA_Hermitianize( uplo, A );
169
170 // Save the original object contents in a temporary object.
171 FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_save );
172
173 // Create auxiliary matrices to be used when checking the result.
174 FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Qh );
175 FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &AQ );
176 FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &QhAQ );
177
178 // Create a real scalar object to hold the norm of A.
179 FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );
180
181 // Create a control tree for the individual variants.
182 if ( impl == FLA_TEST_FLAT_UNB_VAR ||
183 impl == FLA_TEST_FLAT_OPT_VAR ||
184 impl == FLA_TEST_FLAT_BLK_VAR )
185 libfla_test_tridiagut_cntl_create( var, b_alg_flat );
186
187 // Repeat the experiment n_repeats times and record results.
188 for ( i = 0; i < n_repeats; ++i )
189 {
190 FLA_Copy_external( A_save, A );
191
192 time = FLA_Clock();
193
194 libfla_test_tridiagut_impl( impl, uplo, A, T );
195
196 time = FLA_Clock() - time;
197 time_min = min( time_min, time );
198 }
199
200 // Free the control trees if we're testing the variants.
201 if ( impl == FLA_TEST_FLAT_UNB_VAR ||
202 impl == FLA_TEST_FLAT_OPT_VAR ||
203 impl == FLA_TEST_FLAT_BLK_VAR )
204 libfla_test_tridiagut_cntl_free();
205
206 // Compute the performance of the best experiment repeat.
207 *perf = ( 4.0 / 3.0 * m * m * m ) / time_min / FLOPS_PER_UNIT_PERF;
208 if ( FLA_Obj_is_complex( A ) ) *perf *= 4.0;
209
210 // Check the result by computing R - Q' A_orig Q.
211 if ( uplo == FLA_LOWER_TRIANGULAR )
212 {
213 FLA_Set_to_identity( Qh );
214 FLA_Part_2x1( Qh, &QhT,
215 &QhB, 1, FLA_TOP );
216 FLA_Part_2x1( A, &AT,
217 &AB, 1, FLA_TOP );
218 FLA_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
219 AB, T, W, QhB );
220 FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
221 FLA_ONE, A_save, Qh, FLA_ZERO, AQ );
222 FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
223 FLA_ONE, Qh, AQ, FLA_ZERO, QhAQ );
224 FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, AB );
225 FLA_Hermitianize( FLA_LOWER_TRIANGULAR, A );
226 *residual = FLA_Max_elemwise_diff( A, QhAQ );
227 }
228 else
229 {
230 ;
231 }
232
233 // Free the supporting flat objects.
234 FLA_Obj_free( &W );
235 FLA_Obj_free( &Qh );
236 FLA_Obj_free( &AQ );
237 FLA_Obj_free( &QhAQ );
238 FLA_Obj_free( &norm );
239 FLA_Obj_free( &A_save );
240
241 // Free the flat test matrices.
242 FLA_Obj_free( &A );
243 FLA_Obj_free( &T );
244 }
245
246
247
libfla_test_tridiagut_cntl_create(unsigned int var,dim_t b_alg_flat)248 void libfla_test_tridiagut_cntl_create( unsigned int var,
249 dim_t b_alg_flat )
250 {
251 int var_unb = FLA_UNB_VAR_OFFSET + var;
252 int var_opt = FLA_OPT_VAR_OFFSET + var;
253 int var_blk = FLA_BLK_VAR_OFFSET + var;
254
255 tridiagut_cntl_bsize = FLA_Blocksize_create( b_alg_flat,
256 b_alg_flat,
257 b_alg_flat,
258 b_alg_flat );
259
260 tridiagut_cntl_unb = FLA_Cntl_tridiagut_obj_create( FLA_FLAT,
261 var_unb,
262 NULL );
263
264 tridiagut_cntl_opt = FLA_Cntl_tridiagut_obj_create( FLA_FLAT,
265 var_opt,
266 NULL );
267
268 tridiagut_cntl_blk = FLA_Cntl_tridiagut_obj_create( FLA_FLAT,
269 var_blk,
270 NULL );
271 }
272
273
274
libfla_test_tridiagut_cntl_free(void)275 void libfla_test_tridiagut_cntl_free( void )
276 {
277 FLA_Blocksize_free( tridiagut_cntl_bsize );
278
279 FLA_Cntl_obj_free( tridiagut_cntl_unb );
280 FLA_Cntl_obj_free( tridiagut_cntl_opt );
281 FLA_Cntl_obj_free( tridiagut_cntl_blk );
282 }
283
284
285
libfla_test_tridiagut_impl(int impl,FLA_Uplo uplo,FLA_Obj A,FLA_Obj T)286 void libfla_test_tridiagut_impl( int impl,
287 FLA_Uplo uplo,
288 FLA_Obj A,
289 FLA_Obj T )
290 {
291 switch ( impl )
292 {
293 case FLA_TEST_FLAT_FRONT_END:
294 FLA_Tridiag_UT( uplo, A, T );
295 break;
296
297 case FLA_TEST_FLAT_UNB_VAR:
298 FLA_Tridiag_UT_internal( uplo, A, T, tridiagut_cntl_unb );
299 break;
300
301 case FLA_TEST_FLAT_OPT_VAR:
302 FLA_Tridiag_UT_internal( uplo, A, T, tridiagut_cntl_opt );
303 break;
304
305 case FLA_TEST_FLAT_BLK_VAR:
306 FLA_Tridiag_UT_internal( uplo, A, T, tridiagut_cntl_blk );
307 break;
308
309 default:
310 libfla_test_output_error( "Invalid implementation type.\n" );
311 }
312 }
313
314