1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12 #include "test_libflame.h"
13
14 //#define NUM_PARAM_COMBOS 16
15 #define NUM_PARAM_COMBOS 2
16 #define NUM_MATRIX_ARGS 2
17 #define FIRST_VARIANT 1
18 #define LAST_VARIANT 1
19
20 // Static variables.
21 static char* op_str = "Apply Q via UT transform (incremental)";
22 static char* flash_front_str = "FLASH_Apply_Q_UT_inc";
23 //static char* fla_front_str = "FLA_Apply_Q_UT_inc";
24 //static char* fla_unb_var_str = "";
25 //static char* fla_opt_var_str = "";
26 //static char* fla_blk_var_str = "";
27 //static char* pc_str[NUM_PARAM_COMBOS] = { "lnfc", "lnfr", "lnbc", "lnbr",
28 // "lhfc", "lhfr", "lhbc", "lhbr",
29 // "rnfc", "rnfr", "rnbc", "rnbr",
30 // "rhfc", "rhfr", "rhbc", "rhbr" };
31 static char* pc_str[NUM_PARAM_COMBOS] = { "lnfc",
32 "lhfc" };
33 static test_thresh_t thresh = { 1e-03, 1e-04, // warn, pass for s
34 1e-12, 1e-13, // warn, pass for d
35 1e-03, 1e-04, // warn, pass for c
36 1e-12, 1e-13 }; // warn, pass for z
37
38 // Local prototypes.
39 void libfla_test_apqutinc_experiment( test_params_t params,
40 unsigned int var,
41 char* sc_str,
42 FLA_Datatype datatype,
43 unsigned int p,
44 unsigned int pci,
45 unsigned int n_repeats,
46 signed int impl,
47 double* perf,
48 double* residual );
49 void libfla_test_apqutinc_impl( int impl,
50 FLA_Side side,
51 FLA_Trans trans,
52 FLA_Direct direct,
53 FLA_Store storev,
54 FLA_Obj A,
55 FLA_Obj T,
56 FLA_Obj W,
57 FLA_Obj B );
58
libfla_test_apqutinc(FILE * output_stream,test_params_t params,test_op_t op)59 void libfla_test_apqutinc( FILE* output_stream, test_params_t params, test_op_t op )
60 {
61 libfla_test_output_info( "--- %s ---\n", op_str );
62 libfla_test_output_info( "\n" );
63
64 if ( op.flash_front == ENABLE )
65 {
66 libfla_test_op_driver( flash_front_str, NULL,
67 FIRST_VARIANT, LAST_VARIANT,
68 NUM_PARAM_COMBOS, pc_str,
69 NUM_MATRIX_ARGS,
70 FLA_TEST_HIER_FRONT_END,
71 params, thresh, libfla_test_apqutinc_experiment );
72 }
73 }
74
75
76
libfla_test_apqutinc_experiment(test_params_t params,unsigned int var,char * sc_str,FLA_Datatype datatype,unsigned int p_cur,unsigned int pci,unsigned int n_repeats,signed int impl,double * perf,double * residual)77 void libfla_test_apqutinc_experiment( test_params_t params,
78 unsigned int var,
79 char* sc_str,
80 FLA_Datatype datatype,
81 unsigned int p_cur,
82 unsigned int pci,
83 unsigned int n_repeats,
84 signed int impl,
85 double* perf,
86 double* residual )
87 {
88 dim_t b_flash = params.b_flash;
89 dim_t b_alg_hier = params.b_alg_hier;
90 double time_min = 1e9;
91 double time;
92 unsigned int i;
93 unsigned int m, n;
94 unsigned int min_m_n, k;
95 signed int m_input;
96 signed int n_input;
97 FLA_Side side;
98 FLA_Trans trans;
99 FLA_Direct direct;
100 FLA_Store storev;
101 FLA_Obj A, B, eye, norm;
102 FLA_Obj B_save;
103 FLA_Obj A_test, TW_test, W_test, B_test;
104
105 // Translate parameter characters to libflame constants.
106 FLA_Param_map_char_to_flame_side( &pc_str[pci][0], &side );
107 FLA_Param_map_char_to_flame_trans( &pc_str[pci][1], &trans );
108 FLA_Param_map_char_to_flame_direct( &pc_str[pci][2], &direct );
109 FLA_Param_map_char_to_flame_storev( &pc_str[pci][3], &storev );
110
111 // We want to make sure the Apply_Q_UT_inc routines work with rectangular
112 // matrices. So we use m > n when testing with column-wise storage (via
113 // QR factorization) and m < n when testing with row-wise storage (via
114 // LQ factorization).
115 if ( storev == FLA_COLUMNWISE )
116 {
117 m_input = -1;
118 n_input = -1;
119 //m_input = -1;
120 //n_input = -1;
121 }
122 else // if ( storev == FLA_ROWWISE )
123 {
124 m_input = -1;
125 n_input = -1;
126 //m_input = -1;
127 //n_input = -1;
128 }
129
130 // Determine the dimensions.
131 if ( m_input < 0 ) m = p_cur * abs(m_input);
132 else m = p_cur;
133 if ( n_input < 0 ) n = p_cur * abs(n_input);
134 else n = p_cur;
135
136 // Compute the minimum dimension.
137 min_m_n = min( m, n );
138
139 // Choose the size of B based on the storev parameter.
140 if ( storev == FLA_COLUMNWISE ) k = m;
141 else k = n;
142
143 // Create the matrices for the current operation.
144 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], m, n, &A );
145 libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], k, k, &B );
146 FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, B, &eye );
147
148 // Initialize the test matrices.
149 FLA_Random_matrix( A );
150 FLA_Set_to_identity( B );
151 FLA_Set_to_identity( eye );
152
153 // Use hierarchical matrices since we're testing the FLASH front-end.
154 if ( storev == FLA_COLUMNWISE )
155 FLASH_QR_UT_inc_create_hier_matrices( A, 1, &b_flash, b_alg_hier, &A_test, &TW_test );
156 //else // if ( storev == FLA_ROWWISE )
157 //FLASH_LQ_UT_inc_create_hier_matrices( A, 1, &b_flash, b_alg_hier, &A_test, &TW_test );
158 FLASH_Obj_create_hier_copy_of_flat( B, 1, &b_flash, &B_test );
159 FLASH_Apply_Q_UT_inc_create_workspace( TW_test, B_test, &W_test );
160
161 // Create a real scalar object to hold the norm of A.
162 FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );
163
164 // Save the original object contents in a temporary object.
165 FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, B, &B_save );
166
167 // Compute a Householder factorization.
168 if ( storev == FLA_COLUMNWISE ) FLASH_QR_UT_inc( A_test, TW_test );
169 //else FLASH_LQ_UT_inc( A_test, TW_test );
170
171 // Repeat the experiment n_repeats times and record results.
172 for ( i = 0; i < n_repeats; ++i )
173 {
174 FLASH_Obj_hierarchify( B_save, B_test );
175
176 time = FLA_Clock();
177
178 libfla_test_apqutinc_impl( impl, side, trans, direct, storev,
179 A_test, TW_test, W_test, B_test );
180
181 time = FLA_Clock() - time;
182 time_min = min( time_min, time );
183 }
184
185 // Multiply by its conjugate-transpose to get what should be (near) identity
186 // and then subtract from actual identity to get what should be (near) zero.
187 if ( impl == FLA_TEST_HIER_FRONT_END )
188 {
189 FLASH_Obj_flatten( B_test, B );
190 FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
191 FLA_ONE, B, B, FLA_MINUS_ONE, eye );
192 }
193
194 // Free the hierarchical matrices if we're testing the FLASH front-end.
195 if ( impl == FLA_TEST_HIER_FRONT_END )
196 {
197 FLASH_Obj_free( &A_test );
198 FLASH_Obj_free( &TW_test );
199 FLASH_Obj_free( &W_test );
200 FLASH_Obj_free( &B_test );
201 }
202
203 // Compute the norm of eye, which contains I - Q * Q'.
204 FLA_Norm1( eye, norm );
205 FLA_Obj_extract_real_scalar( norm, residual );
206
207 // Compute the performance of the best experiment repeat.
208 *perf = ( 4.0 * m * min_m_n * n -
209 2.0 * min_m_n * min_m_n * n ) / time_min / FLOPS_PER_UNIT_PERF;
210 if ( FLA_Obj_is_complex( A ) ) *perf *= 4.0;
211
212 // Free the supporting flat objects.
213 FLA_Obj_free( &B_save );
214
215 // Free the flat test matrices.
216 FLA_Obj_free( &A );
217 FLA_Obj_free( &B );
218 FLA_Obj_free( &eye );
219 FLA_Obj_free( &norm );
220 }
221
222
223
libfla_test_apqutinc_impl(int impl,FLA_Side side,FLA_Trans trans,FLA_Direct direct,FLA_Store storev,FLA_Obj A,FLA_Obj TW,FLA_Obj W,FLA_Obj B)224 void libfla_test_apqutinc_impl( int impl,
225 FLA_Side side,
226 FLA_Trans trans,
227 FLA_Direct direct,
228 FLA_Store storev,
229 FLA_Obj A,
230 FLA_Obj TW,
231 FLA_Obj W,
232 FLA_Obj B )
233 {
234 switch ( impl )
235 {
236 case FLA_TEST_HIER_FRONT_END:
237 FLASH_Apply_Q_UT_inc( side, trans, direct, storev,
238 A, TW, W, B );
239 break;
240
241 default:
242 libfla_test_output_error( "Invalid implementation type.\n" );
243 }
244 }
245
246