1 /*
2
3 BLIS
4 An object-based framework for developing high-performance BLAS-like
5 libraries.
6
7 Copyright (C) 2014, The University of Texas at Austin
8 Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
9
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are
12 met:
13 - Redistributions of source code must retain the above copyright
14 notice, this list of conditions and the following disclaimer.
15 - Redistributions in binary form must reproduce the above copyright
16 notice, this list of conditions and the following disclaimer in the
17 documentation and/or other materials provided with the distribution.
18 - Neither the name(s) of the copyright holder(s) nor the names of its
19 contributors may be used to endorse or promote products derived
20 from this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
34 */
35
36 #include "blis.h"
37 #include "test_libblis.h"
38
39
40 // Static variables.
41 static char* op_str = "syrk";
42 static char* o_types = "mm"; // a c
43 static char* p_types = "uh"; // uploc transa
44 static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s
45 { 1e-04, 1e-05 }, // warn, pass for c
46 { 1e-13, 1e-14 }, // warn, pass for d
47 { 1e-13, 1e-14 } }; // warn, pass for z
48
49 // Local prototypes.
50 void libblis_test_syrk_deps
51 (
52 thread_data_t* tdata,
53 test_params_t* params,
54 test_op_t* op
55 );
56
57 void libblis_test_syrk_experiment
58 (
59 test_params_t* params,
60 test_op_t* op,
61 iface_t iface,
62 char* dc_str,
63 char* pc_str,
64 char* sc_str,
65 unsigned int p_cur,
66 double* perf,
67 double* resid
68 );
69
70 void libblis_test_syrk_impl
71 (
72 iface_t iface,
73 obj_t* alpha,
74 obj_t* a,
75 obj_t* beta,
76 obj_t* c
77 );
78
79 void libblis_test_syrk_check
80 (
81 test_params_t* params,
82 obj_t* alpha,
83 obj_t* a,
84 obj_t* beta,
85 obj_t* c,
86 obj_t* c_orig,
87 double* resid
88 );
89
90
91
libblis_test_syrk_deps(thread_data_t * tdata,test_params_t * params,test_op_t * op)92 void libblis_test_syrk_deps
93 (
94 thread_data_t* tdata,
95 test_params_t* params,
96 test_op_t* op
97 )
98 {
99 libblis_test_randv( tdata, params, &(op->ops->randv) );
100 libblis_test_randm( tdata, params, &(op->ops->randm) );
101 libblis_test_setv( tdata, params, &(op->ops->setv) );
102 libblis_test_normfv( tdata, params, &(op->ops->normfv) );
103 libblis_test_subv( tdata, params, &(op->ops->subv) );
104 libblis_test_scalv( tdata, params, &(op->ops->scalv) );
105 libblis_test_copym( tdata, params, &(op->ops->copym) );
106 libblis_test_scalm( tdata, params, &(op->ops->scalm) );
107 libblis_test_gemv( tdata, params, &(op->ops->gemv) );
108 libblis_test_symv( tdata, params, &(op->ops->symv) );
109 }
110
111
112
libblis_test_syrk(thread_data_t * tdata,test_params_t * params,test_op_t * op)113 void libblis_test_syrk
114 (
115 thread_data_t* tdata,
116 test_params_t* params,
117 test_op_t* op
118 )
119 {
120
121 // Return early if this test has already been done.
122 if ( libblis_test_op_is_done( op ) ) return;
123
124 // Return early if operation is disabled.
125 if ( libblis_test_op_is_disabled( op ) ||
126 libblis_test_l3_is_disabled( op ) ) return;
127
128 // Call dependencies first.
129 if ( TRUE ) libblis_test_syrk_deps( tdata, params, op );
130
131 // Execute the test driver for each implementation requested.
132 //if ( op->front_seq == ENABLE )
133 {
134 libblis_test_op_driver( tdata,
135 params,
136 op,
137 BLIS_TEST_SEQ_FRONT_END,
138 op_str,
139 p_types,
140 o_types,
141 thresh,
142 libblis_test_syrk_experiment );
143 }
144 }
145
146
147
libblis_test_syrk_experiment(test_params_t * params,test_op_t * op,iface_t iface,char * dc_str,char * pc_str,char * sc_str,unsigned int p_cur,double * perf,double * resid)148 void libblis_test_syrk_experiment
149 (
150 test_params_t* params,
151 test_op_t* op,
152 iface_t iface,
153 char* dc_str,
154 char* pc_str,
155 char* sc_str,
156 unsigned int p_cur,
157 double* perf,
158 double* resid
159 )
160 {
161 unsigned int n_repeats = params->n_repeats;
162 unsigned int i;
163
164 double time_min = DBL_MAX;
165 double time;
166
167 num_t datatype;
168
169 dim_t m, k;
170
171 uplo_t uploc;
172 trans_t transa;
173
174 obj_t alpha, a, beta, c;
175 obj_t c_save;
176
177
178 // Use the datatype of the first char in the datatype combination string.
179 bli_param_map_char_to_blis_dt( dc_str[0], &datatype );
180
181 // Map the dimension specifier to actual dimensions.
182 m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );
183 k = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur );
184
185 // Map parameter characters to BLIS constants.
186 bli_param_map_char_to_blis_uplo( pc_str[0], &uploc );
187 bli_param_map_char_to_blis_trans( pc_str[1], &transa );
188
189 // Create test scalars.
190 bli_obj_scalar_init_detached( datatype, &alpha );
191 bli_obj_scalar_init_detached( datatype, &beta );
192
193 // Create test operands (vectors and/or matrices).
194 libblis_test_mobj_create( params, datatype, transa,
195 sc_str[1], m, k, &a );
196 libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
197 sc_str[0], m, m, &c );
198 libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
199 sc_str[0], m, m, &c_save );
200
201 // Set alpha and beta.
202 if ( bli_obj_is_real( &c ) )
203 {
204 bli_setsc( 1.2, 0.0, &alpha );
205 bli_setsc( -1.0, 0.0, &beta );
206 }
207 else
208 {
209 // For syrk, both alpha and beta may be complex since, unlike herk,
210 // C is symmetric in both the real and complex cases.
211 bli_setsc( 1.2, 0.5, &alpha );
212 bli_setsc( -1.0, 0.5, &beta );
213 }
214
215 // Randomize A.
216 libblis_test_mobj_randomize( params, TRUE, &a );
217
218 // Set the structure and uplo properties of C.
219 bli_obj_set_struc( BLIS_SYMMETRIC, &c );
220 bli_obj_set_uplo( uploc, &c );
221
222 // Randomize A, make it densely symmetric, and zero the unstored triangle
223 // to ensure the implementation is reads only from the stored region.
224 libblis_test_mobj_randomize( params, TRUE, &c );
225 bli_mksymm( &c );
226 bli_mktrim( &c );
227
228 // Save C and set its structure and uplo properties.
229 bli_obj_set_struc( BLIS_SYMMETRIC, &c_save );
230 bli_obj_set_uplo( uploc, &c_save );
231 bli_copym( &c, &c_save );
232
233 // Apply the remaining parameters.
234 bli_obj_set_conjtrans( transa, &a );
235
236 // Repeat the experiment n_repeats times and record results.
237 for ( i = 0; i < n_repeats; ++i )
238 {
239 bli_copym( &c_save, &c );
240
241 time = bli_clock();
242
243 libblis_test_syrk_impl( iface, &alpha, &a, &beta, &c );
244
245 time_min = bli_clock_min_diff( time_min, time );
246 }
247
248 // Estimate the performance of the best experiment repeat.
249 *perf = ( 1.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF;
250 if ( bli_obj_is_complex( &c ) ) *perf *= 4.0;
251
252 // Perform checks.
253 libblis_test_syrk_check( params, &alpha, &a, &beta, &c, &c_save, resid );
254
255 // Zero out performance and residual if output matrix is empty.
256 libblis_test_check_empty_problem( &c, perf, resid );
257
258 // Free the test objects.
259 bli_obj_free( &a );
260 bli_obj_free( &c );
261 bli_obj_free( &c_save );
262 }
263
264
265
libblis_test_syrk_impl(iface_t iface,obj_t * alpha,obj_t * a,obj_t * beta,obj_t * c)266 void libblis_test_syrk_impl
267 (
268 iface_t iface,
269 obj_t* alpha,
270 obj_t* a,
271 obj_t* beta,
272 obj_t* c
273 )
274 {
275 switch ( iface )
276 {
277 case BLIS_TEST_SEQ_FRONT_END:
278 bli_syrk( alpha, a, beta, c );
279 //bli_syrk4m( alpha, a, beta, c );
280 //bli_syrk3m( alpha, a, beta, c );
281 break;
282
283 default:
284 libblis_test_printf_error( "Invalid interface type.\n" );
285 }
286 }
287
288
289
libblis_test_syrk_check(test_params_t * params,obj_t * alpha,obj_t * a,obj_t * beta,obj_t * c,obj_t * c_orig,double * resid)290 void libblis_test_syrk_check
291 (
292 test_params_t* params,
293 obj_t* alpha,
294 obj_t* a,
295 obj_t* beta,
296 obj_t* c,
297 obj_t* c_orig,
298 double* resid
299 )
300 {
301 num_t dt = bli_obj_dt( c );
302 num_t dt_real = bli_obj_dt_proj_to_real( c );
303
304 dim_t m = bli_obj_length( c );
305 dim_t k = bli_obj_width_after_trans( a );
306
307 obj_t at;
308 obj_t norm;
309 obj_t t, v, w, z;
310
311 double junk;
312
313 //
314 // Pre-conditions:
315 // - a is randomized.
316 // - c_orig is randomized and symmetric.
317 // Note:
318 // - alpha and beta should have non-zero imaginary components in the
319 // complex cases in order to more fully exercise the implementation.
320 //
321 // Under these conditions, we assume that the implementation for
322 //
323 // C := beta * C_orig + alpha * transa(A) * transa(A)^T
324 //
325 // is functioning correctly if
326 //
327 // normfv( v - z )
328 //
329 // is negligible, where
330 //
331 // v = C * t
332 // z = ( beta * C_orig + alpha * transa(A) * transa(A)^T ) * t
333 // = beta * C_orig * t + alpha * transa(A) * transa(A)^T * t
334 // = beta * C_orig * t + alpha * transa(A) * w
335 // = beta * C_orig * t + z
336 //
337
338 bli_obj_alias_with_trans( BLIS_TRANSPOSE, a, &at );
339
340 bli_obj_scalar_init_detached( dt_real, &norm );
341
342 bli_obj_create( dt, m, 1, 0, 0, &t );
343 bli_obj_create( dt, m, 1, 0, 0, &v );
344 bli_obj_create( dt, k, 1, 0, 0, &w );
345 bli_obj_create( dt, m, 1, 0, 0, &z );
346
347 libblis_test_vobj_randomize( params, TRUE, &t );
348
349 bli_symv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v );
350
351 bli_gemv( &BLIS_ONE, &at, &t, &BLIS_ZERO, &w );
352 bli_gemv( alpha, a, &w, &BLIS_ZERO, &z );
353 bli_symv( beta, c_orig, &t, &BLIS_ONE, &z );
354
355 bli_subv( &z, &v );
356 bli_normfv( &v, &norm );
357 bli_getsc( &norm, resid, &junk );
358
359 bli_obj_free( &t );
360 bli_obj_free( &v );
361 bli_obj_free( &w );
362 bli_obj_free( &z );
363 }
364
365