1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12 #include "test_libflame.h"
13
14 #define NUM_PARAM_COMBOS 1
15 #define NUM_MATRIX_ARGS 2
16 #define FIRST_VARIANT 1
17 #define LAST_VARIANT 5
18
19 // Static variables.
20 static char* op_str = "Reduction to upper Hessenberg form via UT transform";
21 static char* fla_front_str = "FLA_Hess_UT";
22 static char* fla_unb_var_str = "unb_var";
23 static char* fla_opt_var_str = "opt_var";
24 static char* fla_blk_var_str = "blk_var";
25 static char* pc_str[NUM_PARAM_COMBOS] = { "" };
26 static test_thresh_t thresh = { 1e-03, 1e-04, // warn, pass for s
27 1e-12, 1e-13, // warn, pass for d
28 1e-03, 1e-04, // warn, pass for c
29 1e-12, 1e-13 }; // warn, pass for z
30
31 static fla_hessut_t* hessut_cntl_opt;
32 static fla_hessut_t* hessut_cntl_unb;
33 static fla_hessut_t* hessut_cntl_blk;
34 static fla_blocksize_t* hessut_cntl_bsize;
35
36 // Local prototypes.
37 void libfla_test_hessut_experiment( test_params_t params,
38 unsigned int var,
39 char* sc_str,
40 FLA_Datatype datatype,
41 unsigned int p,
42 unsigned int pci,
43 unsigned int n_repeats,
44 signed int impl,
45 double* perf,
46 double* residual );
47 void libfla_test_hessut_impl( int impl,
48 FLA_Obj A,
49 FLA_Obj T );
50 void libfla_test_hessut_cntl_create( unsigned int var,
51 dim_t b_alg_flat );
52 void libfla_test_hessut_cntl_free( void );
53
54
libfla_test_hessut(FILE * output_stream,test_params_t params,test_op_t op)55 void libfla_test_hessut( FILE* output_stream, test_params_t params, test_op_t op )
56 {
57 libfla_test_output_info( "--- %s ---\n", op_str );
58 libfla_test_output_info( "\n" );
59
60 if ( op.fla_front == ENABLE )
61 {
62 //libfla_test_output_info( "%s() front-end...\n", fla_front_str );
63 //libfla_test_output_info( "\n" );
64 libfla_test_op_driver( fla_front_str, NULL,
65 FIRST_VARIANT, LAST_VARIANT,
66 NUM_PARAM_COMBOS, pc_str,
67 NUM_MATRIX_ARGS,
68 FLA_TEST_FLAT_FRONT_END,
69 params, thresh, libfla_test_hessut_experiment );
70 }
71
72 if ( op.fla_unb_vars == ENABLE )
73 {
74 //libfla_test_output_info( "%s() unblocked variants...\n", fla_front_str );
75 //libfla_test_output_info( "\n" );
76 libfla_test_op_driver( fla_front_str, fla_unb_var_str,
77 FIRST_VARIANT, LAST_VARIANT,
78 NUM_PARAM_COMBOS, pc_str,
79 NUM_MATRIX_ARGS,
80 FLA_TEST_FLAT_UNB_VAR,
81 params, thresh, libfla_test_hessut_experiment );
82 }
83
84 if ( op.fla_opt_vars == ENABLE )
85 {
86 //libfla_test_output_info( "%s() optimized unblocked variants...\n", fla_front_str );
87 //libfla_test_output_info( "\n" );
88 libfla_test_op_driver( fla_front_str, fla_opt_var_str,
89 FIRST_VARIANT, LAST_VARIANT,
90 NUM_PARAM_COMBOS, pc_str,
91 NUM_MATRIX_ARGS,
92 FLA_TEST_FLAT_OPT_VAR,
93 params, thresh, libfla_test_hessut_experiment );
94 }
95
96 if ( op.fla_blk_vars == ENABLE )
97 {
98 //libfla_test_output_info( "%s() blocked variants...\n", fla_front_str );
99 //libfla_test_output_info( "\n" );
100 libfla_test_op_driver( fla_front_str, fla_blk_var_str,
101 FIRST_VARIANT, LAST_VARIANT,
102 NUM_PARAM_COMBOS, pc_str,
103 NUM_MATRIX_ARGS,
104 FLA_TEST_FLAT_BLK_VAR,
105 params, thresh, libfla_test_hessut_experiment );
106 }
107
108 }
109
110
111
libfla_test_hessut_experiment(test_params_t params,unsigned int var,char * sc_str,FLA_Datatype datatype,unsigned int p_cur,unsigned int pci,unsigned int n_repeats,signed int impl,double * perf,double * residual)112 void libfla_test_hessut_experiment( test_params_t params,
113 unsigned int var,
114 char* sc_str,
115 FLA_Datatype datatype,
116 unsigned int p_cur,
117 unsigned int pci,
118 unsigned int n_repeats,
119 signed int impl,
120 double* perf,
121 double* residual )
122 {
123 dim_t b_alg_flat = params.b_alg_flat;
124 double time_min = 1e9;
125 double time;
126 unsigned int i;
127 unsigned int m;
128 signed int m_input = -1;
129 FLA_Obj A, T, W, Qh, AQ, QhAQ, norm;
130 FLA_Obj AT, AB;
131 FLA_Obj QhT, QhB;
132 FLA_Obj A_save;
133
134 // Determine the dimensions.
135 if ( m_input < 0 ) m = p_cur * abs(m_input);
136 else m = p_cur;
137
138 // Create the matrices for the current operation.
139 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], m, m, &A );
140
141 if ( impl == FLA_TEST_FLAT_FRONT_END ||
142 impl == FLA_TEST_FLAT_BLK_VAR )
143 {
144 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], b_alg_flat, m, &T );
145 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], b_alg_flat, m, &W );
146 }
147 else
148 {
149 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], m, m, &T );
150 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], m, m, &W );
151 }
152
153 // Initialize the test matrices.
154 FLA_Random_matrix( A );
155
156 // Save the original object contents in a temporary object.
157 FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_save );
158
159 // Create auxiliary matrices to be used when checking the result.
160 FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Qh );
161 FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &AQ );
162 FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &QhAQ );
163
164 // Create a real scalar object to hold the norm of A.
165 FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );
166
167 // Create a control tree for the individual variants.
168 if ( impl == FLA_TEST_FLAT_UNB_VAR ||
169 impl == FLA_TEST_FLAT_OPT_VAR ||
170 impl == FLA_TEST_FLAT_BLK_VAR )
171 libfla_test_hessut_cntl_create( var, b_alg_flat );
172
173 // Repeat the experiment n_repeats times and record results.
174 for ( i = 0; i < n_repeats; ++i )
175 {
176 FLA_Copy_external( A_save, A );
177
178 time = FLA_Clock();
179
180 libfla_test_hessut_impl( impl, A, T );
181
182 time = FLA_Clock() - time;
183 time_min = min( time_min, time );
184 }
185
186 // Free the control trees if we're testing the variants.
187 if ( impl == FLA_TEST_FLAT_UNB_VAR ||
188 impl == FLA_TEST_FLAT_OPT_VAR ||
189 impl == FLA_TEST_FLAT_BLK_VAR )
190 libfla_test_hessut_cntl_free();
191
192 // Compute the performance of the best experiment repeat.
193 *perf = ( 10.0 / 3.0 * m * m * m ) / time_min / FLOPS_PER_UNIT_PERF;
194 if ( FLA_Obj_is_complex( A ) ) *perf *= 4.0;
195
196 // Check the result by computing R - Q' A_orig Q.
197 FLA_Set_to_identity( Qh );
198 FLA_Part_2x1( Qh, &QhT,
199 &QhB, 1, FLA_TOP );
200 FLA_Part_2x1( A, &AT,
201 &AB, 1, FLA_TOP );
202 FLA_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
203 AB, T, W, QhB );
204 FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
205 FLA_ONE, A_save, Qh, FLA_ZERO, AQ );
206 FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
207 FLA_ONE, Qh, AQ, FLA_ZERO, QhAQ );
208 FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, AB );
209 *residual = FLA_Max_elemwise_diff( A, QhAQ );
210
211 // Free the supporting flat objects.
212 FLA_Obj_free( &W );
213 FLA_Obj_free( &Qh );
214 FLA_Obj_free( &AQ );
215 FLA_Obj_free( &QhAQ );
216 FLA_Obj_free( &norm );
217 FLA_Obj_free( &A_save );
218
219 // Free the flat test matrices.
220 FLA_Obj_free( &A );
221 FLA_Obj_free( &T );
222 }
223
224
225
libfla_test_hessut_cntl_create(unsigned int var,dim_t b_alg_flat)226 void libfla_test_hessut_cntl_create( unsigned int var,
227 dim_t b_alg_flat )
228 {
229 int var_unb = FLA_UNB_VAR_OFFSET + var;
230 int var_opt = FLA_OPT_VAR_OFFSET + var;
231 int var_blk = FLA_BLK_VAR_OFFSET + var;
232
233 hessut_cntl_bsize = FLA_Blocksize_create( b_alg_flat,
234 b_alg_flat,
235 b_alg_flat,
236 b_alg_flat );
237
238 hessut_cntl_unb = FLA_Cntl_hessut_obj_create( FLA_FLAT,
239 var_unb,
240 NULL );
241
242 hessut_cntl_opt = FLA_Cntl_hessut_obj_create( FLA_FLAT,
243 var_opt,
244 NULL );
245
246 hessut_cntl_blk = FLA_Cntl_hessut_obj_create( FLA_FLAT,
247 var_blk,
248 NULL );
249 }
250
251
252
libfla_test_hessut_cntl_free(void)253 void libfla_test_hessut_cntl_free( void )
254 {
255 FLA_Blocksize_free( hessut_cntl_bsize );
256
257 FLA_Cntl_obj_free( hessut_cntl_unb );
258 FLA_Cntl_obj_free( hessut_cntl_opt );
259 FLA_Cntl_obj_free( hessut_cntl_blk );
260 }
261
262
263
libfla_test_hessut_impl(int impl,FLA_Obj A,FLA_Obj T)264 void libfla_test_hessut_impl( int impl,
265 FLA_Obj A,
266 FLA_Obj T )
267 {
268 switch ( impl )
269 {
270 case FLA_TEST_FLAT_FRONT_END:
271 FLA_Hess_UT( A, T );
272 break;
273
274 case FLA_TEST_FLAT_UNB_VAR:
275 FLA_Hess_UT_internal( A, T, hessut_cntl_unb );
276 break;
277
278 case FLA_TEST_FLAT_OPT_VAR:
279 FLA_Hess_UT_internal( A, T, hessut_cntl_opt );
280 break;
281
282 case FLA_TEST_FLAT_BLK_VAR:
283 FLA_Hess_UT_internal( A, T, hessut_cntl_blk );
284 break;
285
286 default:
287 libfla_test_output_error( "Invalid implementation type.\n" );
288 }
289 }
290
291