1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "FLAME.h"
12 #include "test_libflame.h"
13 
14 #define NUM_PARAM_COMBOS 1
15 #define NUM_MATRIX_ARGS  2
16 #define FIRST_VARIANT    1
17 #define LAST_VARIANT     5
18 
19 // Static variables.
20 static char* op_str                   = "Reduction to upper Hessenberg form via UT transform";
21 static char* fla_front_str            = "FLA_Hess_UT";
22 static char* fla_unb_var_str          = "unb_var";
23 static char* fla_opt_var_str          = "opt_var";
24 static char* fla_blk_var_str          = "blk_var";
25 static char* pc_str[NUM_PARAM_COMBOS] = { "" };
26 static test_thresh_t thresh           = { 1e-03, 1e-04,   // warn, pass for s
27                                           1e-12, 1e-13,   // warn, pass for d
28                                           1e-03, 1e-04,   // warn, pass for c
29                                           1e-12, 1e-13 }; // warn, pass for z
30 
31 static fla_hessut_t*    hessut_cntl_opt;
32 static fla_hessut_t*    hessut_cntl_unb;
33 static fla_hessut_t*    hessut_cntl_blk;
34 static fla_blocksize_t* hessut_cntl_bsize;
35 
36 // Local prototypes.
37 void libfla_test_hessut_experiment( test_params_t params,
38                                     unsigned int  var,
39                                     char*         sc_str,
40                                     FLA_Datatype  datatype,
41                                     unsigned int  p,
42                                     unsigned int  pci,
43                                     unsigned int  n_repeats,
44                                     signed int    impl,
45                                     double*       perf,
46                                     double*       residual );
47 void libfla_test_hessut_impl( int     impl,
48                               FLA_Obj A,
49                               FLA_Obj T );
50 void libfla_test_hessut_cntl_create( unsigned int var,
51                                      dim_t        b_alg_flat );
52 void libfla_test_hessut_cntl_free( void );
53 
54 
libfla_test_hessut(FILE * output_stream,test_params_t params,test_op_t op)55 void libfla_test_hessut( FILE* output_stream, test_params_t params, test_op_t op )
56 {
57 	libfla_test_output_info( "--- %s ---\n", op_str );
58 	libfla_test_output_info( "\n" );
59 
60 	if ( op.fla_front == ENABLE )
61 	{
62 		//libfla_test_output_info( "%s() front-end...\n", fla_front_str );
63 		//libfla_test_output_info( "\n" );
64 		libfla_test_op_driver( fla_front_str, NULL,
65 		                       FIRST_VARIANT, LAST_VARIANT,
66 		                       NUM_PARAM_COMBOS, pc_str,
67 		                       NUM_MATRIX_ARGS,
68 		                       FLA_TEST_FLAT_FRONT_END,
69 		                       params, thresh, libfla_test_hessut_experiment );
70 	}
71 
72 	if ( op.fla_unb_vars == ENABLE )
73 	{
74 		//libfla_test_output_info( "%s() unblocked variants...\n", fla_front_str );
75 		//libfla_test_output_info( "\n" );
76 		libfla_test_op_driver( fla_front_str, fla_unb_var_str,
77 		                       FIRST_VARIANT, LAST_VARIANT,
78 		                       NUM_PARAM_COMBOS, pc_str,
79 		                       NUM_MATRIX_ARGS,
80 		                       FLA_TEST_FLAT_UNB_VAR,
81 		                       params, thresh, libfla_test_hessut_experiment );
82 	}
83 
84 	if ( op.fla_opt_vars == ENABLE )
85 	{
86 		//libfla_test_output_info( "%s() optimized unblocked variants...\n", fla_front_str );
87 		//libfla_test_output_info( "\n" );
88 		libfla_test_op_driver( fla_front_str, fla_opt_var_str,
89 		                       FIRST_VARIANT, LAST_VARIANT,
90 		                       NUM_PARAM_COMBOS, pc_str,
91 		                       NUM_MATRIX_ARGS,
92 		                       FLA_TEST_FLAT_OPT_VAR,
93 		                       params, thresh, libfla_test_hessut_experiment );
94 	}
95 
96 	if ( op.fla_blk_vars == ENABLE )
97 	{
98 		//libfla_test_output_info( "%s() blocked variants...\n", fla_front_str );
99 		//libfla_test_output_info( "\n" );
100 		libfla_test_op_driver( fla_front_str, fla_blk_var_str,
101 		                       FIRST_VARIANT, LAST_VARIANT,
102 		                       NUM_PARAM_COMBOS, pc_str,
103 		                       NUM_MATRIX_ARGS,
104 		                       FLA_TEST_FLAT_BLK_VAR,
105 		                       params, thresh, libfla_test_hessut_experiment );
106 	}
107 
108 }
109 
110 
111 
libfla_test_hessut_experiment(test_params_t params,unsigned int var,char * sc_str,FLA_Datatype datatype,unsigned int p_cur,unsigned int pci,unsigned int n_repeats,signed int impl,double * perf,double * residual)112 void libfla_test_hessut_experiment( test_params_t params,
113                                     unsigned int  var,
114                                     char*         sc_str,
115                                     FLA_Datatype  datatype,
116                                     unsigned int  p_cur,
117                                     unsigned int  pci,
118                                     unsigned int  n_repeats,
119                                     signed int    impl,
120                                     double*       perf,
121                                     double*       residual )
122 {
123 	dim_t        b_alg_flat = params.b_alg_flat;
124 	double       time_min   = 1e9;
125 	double       time;
126 	unsigned int i;
127 	unsigned int m;
128 	signed int   m_input    = -1;
129 	FLA_Obj      A, T, W, Qh, AQ, QhAQ, norm;
130 	FLA_Obj      AT, AB;
131 	FLA_Obj      QhT, QhB;
132 	FLA_Obj      A_save;
133 
134 	// Determine the dimensions.
135 	if ( m_input < 0 ) m = p_cur * abs(m_input);
136 	else               m = p_cur;
137 
138 	// Create the matrices for the current operation.
139 	libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], m, m, &A );
140 
141 	if ( impl == FLA_TEST_FLAT_FRONT_END ||
142 	     impl == FLA_TEST_FLAT_BLK_VAR )
143 	{
144 		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], b_alg_flat, m, &T );
145 		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], b_alg_flat, m, &W );
146 	}
147 	else
148 	{
149 		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], m, m, &T );
150 		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], m, m, &W );
151 	}
152 
153 	// Initialize the test matrices.
154 	FLA_Random_matrix( A );
155 
156 	// Save the original object contents in a temporary object.
157 	FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_save );
158 
159 	// Create auxiliary matrices to be used when checking the result.
160 	FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Qh );
161 	FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &AQ );
162 	FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &QhAQ );
163 
164 	// Create a real scalar object to hold the norm of A.
165 	FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );
166 
167 	// Create a control tree for the individual variants.
168 	if ( impl == FLA_TEST_FLAT_UNB_VAR ||
169 	     impl == FLA_TEST_FLAT_OPT_VAR ||
170 	     impl == FLA_TEST_FLAT_BLK_VAR )
171 		libfla_test_hessut_cntl_create( var, b_alg_flat );
172 
173 	// Repeat the experiment n_repeats times and record results.
174 	for ( i = 0; i < n_repeats; ++i )
175 	{
176 		FLA_Copy_external( A_save, A );
177 
178 		time = FLA_Clock();
179 
180 		libfla_test_hessut_impl( impl, A, T );
181 
182 		time = FLA_Clock() - time;
183 		time_min = min( time_min, time );
184 	}
185 
186 	// Free the control trees if we're testing the variants.
187 	if ( impl == FLA_TEST_FLAT_UNB_VAR ||
188 	     impl == FLA_TEST_FLAT_OPT_VAR ||
189 	     impl == FLA_TEST_FLAT_BLK_VAR )
190 		libfla_test_hessut_cntl_free();
191 
192 	// Compute the performance of the best experiment repeat.
193 	*perf = ( 10.0 / 3.0 * m * m * m ) / time_min / FLOPS_PER_UNIT_PERF;
194 	if ( FLA_Obj_is_complex( A ) ) *perf *= 4.0;
195 
196 	// Check the result by computing R - Q' A_orig Q.
197 	FLA_Set_to_identity( Qh );
198 	FLA_Part_2x1( Qh,   &QhT,
199 	                    &QhB,   1, FLA_TOP );
200 	FLA_Part_2x1( A,    &AT,
201 	                    &AB,    1, FLA_TOP );
202 	FLA_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
203 	                AB, T, W, QhB );
204 	FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
205 	          FLA_ONE, A_save, Qh, FLA_ZERO, AQ );
206 	FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
207 	          FLA_ONE, Qh, AQ, FLA_ZERO, QhAQ );
208 	FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, AB );
209 	*residual = FLA_Max_elemwise_diff( A, QhAQ );
210 
211 	// Free the supporting flat objects.
212 	FLA_Obj_free( &W );
213 	FLA_Obj_free( &Qh );
214 	FLA_Obj_free( &AQ );
215 	FLA_Obj_free( &QhAQ );
216 	FLA_Obj_free( &norm );
217 	FLA_Obj_free( &A_save );
218 
219 	// Free the flat test matrices.
220 	FLA_Obj_free( &A );
221 	FLA_Obj_free( &T );
222 }
223 
224 
225 
libfla_test_hessut_cntl_create(unsigned int var,dim_t b_alg_flat)226 void libfla_test_hessut_cntl_create( unsigned int var,
227                                      dim_t        b_alg_flat )
228 {
229 	int var_unb  = FLA_UNB_VAR_OFFSET + var;
230 	int var_opt  = FLA_OPT_VAR_OFFSET + var;
231 	int var_blk  = FLA_BLK_VAR_OFFSET + var;
232 
233 	hessut_cntl_bsize = FLA_Blocksize_create( b_alg_flat,
234                                               b_alg_flat,
235                                               b_alg_flat,
236                                               b_alg_flat );
237 
238 	hessut_cntl_unb   = FLA_Cntl_hessut_obj_create( FLA_FLAT,
239                                                     var_unb,
240                                                     NULL );
241 
242 	hessut_cntl_opt   = FLA_Cntl_hessut_obj_create( FLA_FLAT,
243                                                     var_opt,
244                                                     NULL );
245 
246 	hessut_cntl_blk   = FLA_Cntl_hessut_obj_create( FLA_FLAT,
247                                                     var_blk,
248                                                     NULL );
249 }
250 
251 
252 
libfla_test_hessut_cntl_free(void)253 void libfla_test_hessut_cntl_free( void )
254 {
255 	FLA_Blocksize_free( hessut_cntl_bsize );
256 
257 	FLA_Cntl_obj_free( hessut_cntl_unb );
258 	FLA_Cntl_obj_free( hessut_cntl_opt );
259 	FLA_Cntl_obj_free( hessut_cntl_blk );
260 }
261 
262 
263 
libfla_test_hessut_impl(int impl,FLA_Obj A,FLA_Obj T)264 void libfla_test_hessut_impl( int     impl,
265                               FLA_Obj A,
266                               FLA_Obj T )
267 {
268 	switch ( impl )
269 	{
270 		case FLA_TEST_FLAT_FRONT_END:
271 		FLA_Hess_UT( A, T );
272 		break;
273 
274 		case FLA_TEST_FLAT_UNB_VAR:
275 		FLA_Hess_UT_internal( A, T, hessut_cntl_unb );
276 		break;
277 
278 		case FLA_TEST_FLAT_OPT_VAR:
279 		FLA_Hess_UT_internal( A, T, hessut_cntl_opt );
280 		break;
281 
282 		case FLA_TEST_FLAT_BLK_VAR:
283 		FLA_Hess_UT_internal( A, T, hessut_cntl_blk );
284 		break;
285 
286 		default:
287 		libfla_test_output_error( "Invalid implementation type.\n" );
288 	}
289 }
290 
291