1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "FLAME.h"
12 #include "test_libflame.h"
13 
14 //#define NUM_PARAM_COMBOS 16
15 #define NUM_PARAM_COMBOS 2
16 #define NUM_MATRIX_ARGS  2
17 #define FIRST_VARIANT    1
18 #define LAST_VARIANT     1
19 
20 // Static variables.
21 static char* op_str                   = "Apply Q via UT transform (incremental)";
22 static char* flash_front_str          = "FLASH_Apply_Q_UT_inc";
23 //static char* fla_front_str            = "FLA_Apply_Q_UT_inc";
24 //static char* fla_unb_var_str          = "";
25 //static char* fla_opt_var_str          = "";
26 //static char* fla_blk_var_str          = "";
27 //static char* pc_str[NUM_PARAM_COMBOS] = { "lnfc", "lnfr", "lnbc", "lnbr",
28 //                                          "lhfc", "lhfr", "lhbc", "lhbr",
29 //                                          "rnfc", "rnfr", "rnbc", "rnbr",
30 //                                          "rhfc", "rhfr", "rhbc", "rhbr" };
31 static char* pc_str[NUM_PARAM_COMBOS] = { "lnfc",
32                                           "lhfc" };
33 static test_thresh_t thresh           = { 1e-03, 1e-04,   // warn, pass for s
34                                           1e-12, 1e-13,   // warn, pass for d
35                                           1e-03, 1e-04,   // warn, pass for c
36                                           1e-12, 1e-13 }; // warn, pass for z
37 
38 // Local prototypes.
39 void libfla_test_apqutinc_experiment( test_params_t params,
40                                       unsigned int  var,
41                                       char*         sc_str,
42                                       FLA_Datatype  datatype,
43                                       unsigned int  p,
44                                       unsigned int  pci,
45                                       unsigned int  n_repeats,
46                                       signed int    impl,
47                                       double*       perf,
48                                       double*       residual );
49 void libfla_test_apqutinc_impl( int        impl,
50                                 FLA_Side   side,
51                                 FLA_Trans  trans,
52                                 FLA_Direct direct,
53                                 FLA_Store  storev,
54                                 FLA_Obj    A,
55                                 FLA_Obj    T,
56                                 FLA_Obj    W,
57                                 FLA_Obj    B );
58 
libfla_test_apqutinc(FILE * output_stream,test_params_t params,test_op_t op)59 void libfla_test_apqutinc( FILE* output_stream, test_params_t params, test_op_t op )
60 {
61 	libfla_test_output_info( "--- %s ---\n", op_str );
62 	libfla_test_output_info( "\n" );
63 
64 	if ( op.flash_front == ENABLE )
65 	{
66 		libfla_test_op_driver( flash_front_str, NULL,
67 		                       FIRST_VARIANT, LAST_VARIANT,
68 		                       NUM_PARAM_COMBOS, pc_str,
69 		                       NUM_MATRIX_ARGS,
70 		                       FLA_TEST_HIER_FRONT_END,
71 		                       params, thresh, libfla_test_apqutinc_experiment );
72 	}
73 }
74 
75 
76 
libfla_test_apqutinc_experiment(test_params_t params,unsigned int var,char * sc_str,FLA_Datatype datatype,unsigned int p_cur,unsigned int pci,unsigned int n_repeats,signed int impl,double * perf,double * residual)77 void libfla_test_apqutinc_experiment( test_params_t params,
78                                       unsigned int  var,
79                                       char*         sc_str,
80                                       FLA_Datatype  datatype,
81                                       unsigned int  p_cur,
82                                       unsigned int  pci,
83                                       unsigned int  n_repeats,
84                                       signed int    impl,
85                                       double*       perf,
86                                       double*       residual )
87 {
88 	dim_t        b_flash    = params.b_flash;
89 	dim_t        b_alg_hier = params.b_alg_hier;
90 	double       time_min   = 1e9;
91 	double       time;
92 	unsigned int i;
93 	unsigned int m, n;
94 	unsigned int min_m_n, k;
95 	signed int   m_input;
96 	signed int   n_input;
97 	FLA_Side     side;
98 	FLA_Trans    trans;
99 	FLA_Direct   direct;
100 	FLA_Store    storev;
101 	FLA_Obj      A, B, eye, norm;
102 	FLA_Obj      B_save;
103 	FLA_Obj      A_test, TW_test, W_test, B_test;
104 
105 	// Translate parameter characters to libflame constants.
106 	FLA_Param_map_char_to_flame_side( &pc_str[pci][0], &side );
107 	FLA_Param_map_char_to_flame_trans( &pc_str[pci][1], &trans );
108 	FLA_Param_map_char_to_flame_direct( &pc_str[pci][2], &direct );
109 	FLA_Param_map_char_to_flame_storev( &pc_str[pci][3], &storev );
110 
111 	// We want to make sure the Apply_Q_UT_inc routines work with rectangular
112 	// matrices. So we use m > n when testing with column-wise storage (via
113 	// QR factorization) and m < n when testing with row-wise storage (via
114 	// LQ factorization).
115 	if ( storev == FLA_COLUMNWISE )
116 	{
117 		m_input = -1;
118 		n_input = -1;
119 		//m_input = -1;
120 		//n_input = -1;
121 	}
122 	else // if ( storev == FLA_ROWWISE )
123 	{
124 		m_input = -1;
125 		n_input = -1;
126 		//m_input = -1;
127 		//n_input = -1;
128 	}
129 
130 	// Determine the dimensions.
131 	if ( m_input < 0 ) m = p_cur * abs(m_input);
132 	else               m = p_cur;
133 	if ( n_input < 0 ) n = p_cur * abs(n_input);
134 	else               n = p_cur;
135 
136 	// Compute the minimum dimension.
137 	min_m_n = min( m, n );
138 
139 	// Choose the size of B based on the storev parameter.
140 	if ( storev == FLA_COLUMNWISE ) k = m;
141 	else                            k = n;
142 
143 	// Create the matrices for the current operation.
144 	libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], m, n, &A );
145 	libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], k, k, &B );
146 	FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, B, &eye );
147 
148 	// Initialize the test matrices.
149 	FLA_Random_matrix( A );
150 	FLA_Set_to_identity( B );
151 	FLA_Set_to_identity( eye );
152 
153 	// Use hierarchical matrices since we're testing the FLASH front-end.
154 	if ( storev == FLA_COLUMNWISE )
155 		FLASH_QR_UT_inc_create_hier_matrices( A, 1, &b_flash, b_alg_hier, &A_test, &TW_test );
156 	//else // if ( storev == FLA_ROWWISE )
157 		//FLASH_LQ_UT_inc_create_hier_matrices( A, 1, &b_flash, b_alg_hier, &A_test, &TW_test );
158 	FLASH_Obj_create_hier_copy_of_flat( B, 1, &b_flash, &B_test );
159 	FLASH_Apply_Q_UT_inc_create_workspace( TW_test, B_test, &W_test );
160 
161 	// Create a real scalar object to hold the norm of A.
162 	FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );
163 
164 	// Save the original object contents in a temporary object.
165 	FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, B, &B_save );
166 
167 	// Compute a Householder factorization.
168 	if ( storev == FLA_COLUMNWISE ) FLASH_QR_UT_inc( A_test, TW_test );
169 	//else                            FLASH_LQ_UT_inc( A_test, TW_test );
170 
171 	// Repeat the experiment n_repeats times and record results.
172 	for ( i = 0; i < n_repeats; ++i )
173 	{
174 		FLASH_Obj_hierarchify( B_save, B_test );
175 
176 		time = FLA_Clock();
177 
178 		libfla_test_apqutinc_impl( impl, side, trans, direct, storev,
179 		                           A_test, TW_test, W_test, B_test );
180 
181 		time = FLA_Clock() - time;
182 		time_min = min( time_min, time );
183 	}
184 
185 	// Multiply by its conjugate-transpose to get what should be (near) identity
186 	// and then subtract from actual identity to get what should be (near) zero.
187 	if ( impl == FLA_TEST_HIER_FRONT_END )
188 	{
189 		FLASH_Obj_flatten( B_test, B );
190 		FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
191 		                   FLA_ONE, B, B, FLA_MINUS_ONE, eye );
192 	}
193 
194 	// Free the hierarchical matrices if we're testing the FLASH front-end.
195 	if ( impl == FLA_TEST_HIER_FRONT_END )
196 	{
197 		FLASH_Obj_free( &A_test );
198 		FLASH_Obj_free( &TW_test );
199 		FLASH_Obj_free( &W_test );
200 		FLASH_Obj_free( &B_test );
201 	}
202 
203 	// Compute the norm of eye, which contains I - Q * Q'.
204 	FLA_Norm1( eye, norm );
205 	FLA_Obj_extract_real_scalar( norm, residual );
206 
207 	// Compute the performance of the best experiment repeat.
208 	*perf = (  4.0 *       m * min_m_n * n -
209 	           2.0 * min_m_n * min_m_n * n ) / time_min / FLOPS_PER_UNIT_PERF;
210 	if ( FLA_Obj_is_complex( A ) ) *perf *= 4.0;
211 
212 	// Free the supporting flat objects.
213 	FLA_Obj_free( &B_save );
214 
215 	// Free the flat test matrices.
216 	FLA_Obj_free( &A );
217 	FLA_Obj_free( &B );
218 	FLA_Obj_free( &eye );
219 	FLA_Obj_free( &norm );
220 }
221 
222 
223 
libfla_test_apqutinc_impl(int impl,FLA_Side side,FLA_Trans trans,FLA_Direct direct,FLA_Store storev,FLA_Obj A,FLA_Obj TW,FLA_Obj W,FLA_Obj B)224 void libfla_test_apqutinc_impl( int        impl,
225                                 FLA_Side   side,
226                                 FLA_Trans  trans,
227                                 FLA_Direct direct,
228                                 FLA_Store  storev,
229                                 FLA_Obj    A,
230                                 FLA_Obj    TW,
231                                 FLA_Obj    W,
232                                 FLA_Obj    B )
233 {
234 	switch ( impl )
235 	{
236 		case FLA_TEST_HIER_FRONT_END:
237 		FLASH_Apply_Q_UT_inc( side, trans, direct, storev,
238 		                      A, TW, W, B );
239 		break;
240 
241 		default:
242 		libfla_test_output_error( "Invalid implementation type.\n" );
243 	}
244 }
245 
246