1 /*
2 
3    BLIS
4    An object-based framework for developing high-performance BLAS-like
5    libraries.
6 
7    Copyright (C) 2014, The University of Texas at Austin
8    Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
9 
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are
12    met:
13     - Redistributions of source code must retain the above copyright
14       notice, this list of conditions and the following disclaimer.
15     - Redistributions in binary form must reproduce the above copyright
16       notice, this list of conditions and the following disclaimer in the
17       documentation and/or other materials provided with the distribution.
18     - Neither the name(s) of the copyright holder(s) nor the names of its
19       contributors may be used to endorse or promote products derived
20       from this software without specific prior written permission.
21 
22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 
34 */
35 
36 #include "blis.h"
37 #include "test_libblis.h"
38 
39 
40 // Static variables.
41 static char*     op_str                    = "hemm";
42 static char*     o_types                   = "mmm";   // a b c
43 static char*     p_types                   = "such";  // side uploa conja transb
44 static thresh_t  thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 },   // warn, pass for s
45                                                { 1e-04, 1e-05 },   // warn, pass for c
46                                                { 1e-13, 1e-14 },   // warn, pass for d
47                                                { 1e-13, 1e-14 } }; // warn, pass for z
48 
49 // Local prototypes.
50 void libblis_test_hemm_deps
51      (
52        thread_data_t* tdata,
53        test_params_t* params,
54        test_op_t*     op
55      );
56 
57 void libblis_test_hemm_experiment
58      (
59        test_params_t* params,
60        test_op_t*     op,
61        iface_t        iface,
62        char*          dc_str,
63        char*          pc_str,
64        char*          sc_str,
65        unsigned int   p_cur,
66        double*        perf,
67        double*        resid
68      );
69 
70 void libblis_test_hemm_impl
71      (
72        iface_t   iface,
73        side_t    side,
74        obj_t*    alpha,
75        obj_t*    a,
76        obj_t*    b,
77        obj_t*    beta,
78        obj_t*    c
79      );
80 
81 void libblis_test_hemm_check
82      (
83        test_params_t* params,
84        side_t         side,
85        obj_t*         alpha,
86        obj_t*         a,
87        obj_t*         b,
88        obj_t*         beta,
89        obj_t*         c,
90        obj_t*         c_orig,
91        double*        resid
92      );
93 
94 
95 
libblis_test_hemm_deps(thread_data_t * tdata,test_params_t * params,test_op_t * op)96 void libblis_test_hemm_deps
97      (
98        thread_data_t* tdata,
99        test_params_t* params,
100        test_op_t*     op
101      )
102 {
103 	libblis_test_randv( tdata, params, &(op->ops->randv) );
104 	libblis_test_randm( tdata, params, &(op->ops->randm) );
105 	libblis_test_setv( tdata, params, &(op->ops->setv) );
106 	libblis_test_normfv( tdata, params, &(op->ops->normfv) );
107 	libblis_test_subv( tdata, params, &(op->ops->subv) );
108 	libblis_test_scalv( tdata, params, &(op->ops->scalv) );
109 	libblis_test_copym( tdata, params, &(op->ops->copym) );
110 	libblis_test_scalm( tdata, params, &(op->ops->scalm) );
111 	libblis_test_gemv( tdata, params, &(op->ops->gemv) );
112 	libblis_test_hemv( tdata, params, &(op->ops->hemv) );
113 }
114 
115 
116 
libblis_test_hemm(thread_data_t * tdata,test_params_t * params,test_op_t * op)117 void libblis_test_hemm
118      (
119        thread_data_t* tdata,
120        test_params_t* params,
121        test_op_t*     op
122      )
123 {
124 
125 	// Return early if this test has already been done.
126 	if ( libblis_test_op_is_done( op ) ) return;
127 
128 	// Return early if operation is disabled.
129 	if ( libblis_test_op_is_disabled( op ) ||
130 	     libblis_test_l3_is_disabled( op ) ) return;
131 
132 	// Call dependencies first.
133 	if ( TRUE ) libblis_test_hemm_deps( tdata, params, op );
134 
135 	// Execute the test driver for each implementation requested.
136 	//if ( op->front_seq == ENABLE )
137 	{
138 		libblis_test_op_driver( tdata,
139 		                        params,
140 		                        op,
141 		                        BLIS_TEST_SEQ_FRONT_END,
142 		                        op_str,
143 		                        p_types,
144 		                        o_types,
145 		                        thresh,
146 		                        libblis_test_hemm_experiment );
147 	}
148 }
149 
150 
151 
libblis_test_hemm_experiment(test_params_t * params,test_op_t * op,iface_t iface,char * dc_str,char * pc_str,char * sc_str,unsigned int p_cur,double * perf,double * resid)152 void libblis_test_hemm_experiment
153      (
154        test_params_t* params,
155        test_op_t*     op,
156        iface_t        iface,
157        char*          dc_str,
158        char*          pc_str,
159        char*          sc_str,
160        unsigned int   p_cur,
161        double*        perf,
162        double*        resid
163      )
164 {
165 	unsigned int n_repeats = params->n_repeats;
166 	unsigned int i;
167 
168 	double       time_min  = DBL_MAX;
169 	double       time;
170 
171 	num_t        datatype;
172 
173 	dim_t        m, n;
174 	dim_t        mn_side;
175 
176 	side_t       side;
177 	uplo_t       uploa;
178 	conj_t       conja;
179 	trans_t      transb;
180 
181 	obj_t        alpha, a, b, beta, c;
182 	obj_t        c_save;
183 
184 
185 	// Use the datatype of the first char in the datatype combination string.
186 	bli_param_map_char_to_blis_dt( dc_str[0], &datatype );
187 
188 	// Map the dimension specifier to actual dimensions.
189 	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );
190 	n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur );
191 
192 	// Map parameter characters to BLIS constants.
193 	bli_param_map_char_to_blis_side( pc_str[0], &side );
194 	bli_param_map_char_to_blis_uplo( pc_str[1], &uploa );
195 	bli_param_map_char_to_blis_conj( pc_str[2], &conja );
196 	bli_param_map_char_to_blis_trans( pc_str[3], &transb );
197 
198 	// Create test scalars.
199 	bli_obj_scalar_init_detached( datatype, &alpha );
200 	bli_obj_scalar_init_detached( datatype, &beta );
201 
202 	// Create test operands (vectors and/or matrices).
203 	bli_set_dim_with_side( side, m, n, &mn_side );
204 	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
205 	                          sc_str[1], mn_side, mn_side, &a );
206 	libblis_test_mobj_create( params, datatype, transb,
207 	                          sc_str[2], m,       n,       &b );
208 	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
209 	                          sc_str[0], m,       n,       &c );
210 	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
211 	                          sc_str[0], m,       n,       &c_save );
212 
213 	// Set alpha and beta.
214 	if ( bli_obj_is_real( &c ) )
215 	{
216 		bli_setsc(  1.2,  0.0, &alpha );
217 		bli_setsc( -1.0,  0.0, &beta );
218 	}
219 	else
220 	{
221 		bli_setsc(  1.2,  0.8, &alpha );
222 		bli_setsc( -1.0,  1.0, &beta );
223 	}
224 
225 	// Set the structure and uplo properties of A.
226 	bli_obj_set_struc( BLIS_HERMITIAN, &a );
227 	bli_obj_set_uplo( uploa, &a );
228 
229 	// Randomize A, make it densely Hermitian, and zero the unstored triangle
230 	// to ensure the implementation reads only from the stored region.
231 	libblis_test_mobj_randomize( params, TRUE, &a );
232 	bli_mkherm( &a );
233 	bli_mktrim( &a );
234 
235 	// Randomize B and C, and save C.
236 	libblis_test_mobj_randomize( params, TRUE, &b );
237 	libblis_test_mobj_randomize( params, TRUE, &c );
238 	bli_copym( &c, &c_save );
239 
240 	// Apply the remaining parameters.
241 	bli_obj_set_conj( conja, &a );
242 	bli_obj_set_conjtrans( transb, &b );
243 
244 	// Repeat the experiment n_repeats times and record results.
245 	for ( i = 0; i < n_repeats; ++i )
246 	{
247 		bli_copym( &c_save, &c );
248 
249 		time = bli_clock();
250 
251 		libblis_test_hemm_impl( iface, side, &alpha, &a, &b, &beta, &c );
252 
253 		time_min = bli_clock_min_diff( time_min, time );
254 	}
255 
256 	// Estimate the performance of the best experiment repeat.
257 	*perf = ( 2.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
258 	if ( bli_obj_is_complex( &c ) ) *perf *= 4.0;
259 
260 	// Perform checks.
261 	libblis_test_hemm_check( params, side, &alpha, &a, &b, &beta, &c, &c_save, resid );
262 
263 	// Zero out performance and residual if output matrix is empty.
264 	libblis_test_check_empty_problem( &c, perf, resid );
265 
266 	// Free the test objects.
267 	bli_obj_free( &a );
268 	bli_obj_free( &b );
269 	bli_obj_free( &c );
270 	bli_obj_free( &c_save );
271 }
272 
273 
274 
libblis_test_hemm_impl(iface_t iface,side_t side,obj_t * alpha,obj_t * a,obj_t * b,obj_t * beta,obj_t * c)275 void libblis_test_hemm_impl
276      (
277        iface_t   iface,
278        side_t    side,
279        obj_t*    alpha,
280        obj_t*    a,
281        obj_t*    b,
282        obj_t*    beta,
283        obj_t*    c
284      )
285 {
286 	switch ( iface )
287 	{
288 		case BLIS_TEST_SEQ_FRONT_END:
289 		bli_hemm( side, alpha, a, b, beta, c );
290 		//bli_hemm4m( side, alpha, a, b, beta, c );
291 		//bli_hemm3m( side, alpha, a, b, beta, c );
292 		break;
293 
294 		default:
295 		libblis_test_printf_error( "Invalid interface type.\n" );
296 	}
297 }
298 
299 
300 
libblis_test_hemm_check(test_params_t * params,side_t side,obj_t * alpha,obj_t * a,obj_t * b,obj_t * beta,obj_t * c,obj_t * c_orig,double * resid)301 void libblis_test_hemm_check
302      (
303        test_params_t* params,
304        side_t         side,
305        obj_t*         alpha,
306        obj_t*         a,
307        obj_t*         b,
308        obj_t*         beta,
309        obj_t*         c,
310        obj_t*         c_orig,
311        double*        resid
312      )
313 {
314 	num_t  dt      = bli_obj_dt( c );
315 	num_t  dt_real = bli_obj_dt_proj_to_real( c );
316 
317 	dim_t  m       = bli_obj_length( c );
318 	dim_t  n       = bli_obj_width( c );
319 
320 	obj_t  norm;
321 	obj_t  t, v, w, z;
322 
323 	double junk;
324 
325 	//
326 	// Pre-conditions:
327 	// - a is randomized and Hermitian.
328 	// - b is randomized.
329 	// - c_orig is randomized.
330 	// Note:
331 	// - alpha and beta should have non-zero imaginary components in the
332 	//   complex cases in order to more fully exercise the implementation.
333 	//
334 	// Under these conditions, we assume that the implementation for
335 	//
336 	//   C := beta * C_orig + alpha * conja(A) * transb(B)    (side = left)
337 	//   C := beta * C_orig + alpha * transb(B) * conja(A)    (side = right)
338 	//
339 	// is functioning correctly if
340 	//
341 	//   normfv( v - z )
342 	//
343 	// is negligible, where
344 	//
345 	//   v = C * t
346 	//
347 	//   z = ( beta * C_orig + alpha * conja(A) * transb(B) ) * t     (side = left)
348 	//     = beta * C_orig * t + alpha * conja(A) * transb(B) * t
349 	//     = beta * C_orig * t + alpha * conja(A) * w
350 	//     = beta * C_orig * t + z
351 	//
352 	//   z = ( beta * C_orig + alpha * transb(B) * conja(A) ) * t     (side = right)
353 	//     = beta * C_orig * t + alpha * transb(B) * conja(A) * t
354 	//     = beta * C_orig * t + alpha * transb(B) * w
355 	//     = beta * C_orig * t + z
356 
357 	bli_obj_scalar_init_detached( dt_real, &norm );
358 
359 	if ( bli_is_left( side ) )
360 	{
361 		bli_obj_create( dt, n, 1, 0, 0, &t );
362 		bli_obj_create( dt, m, 1, 0, 0, &v );
363 		bli_obj_create( dt, m, 1, 0, 0, &w );
364 		bli_obj_create( dt, m, 1, 0, 0, &z );
365 	}
366 	else // else if ( bli_is_right( side ) )
367 	{
368 		bli_obj_create( dt, n, 1, 0, 0, &t );
369 		bli_obj_create( dt, m, 1, 0, 0, &v );
370 		bli_obj_create( dt, n, 1, 0, 0, &w );
371 		bli_obj_create( dt, m, 1, 0, 0, &z );
372 	}
373 
374 	libblis_test_vobj_randomize( params, TRUE, &t );
375 
376 	bli_gemv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v );
377 
378 	if ( bli_is_left( side ) )
379 	{
380 		bli_gemv( &BLIS_ONE, b, &t, &BLIS_ZERO, &w );
381 		bli_hemv( alpha, a, &w, &BLIS_ZERO, &z );
382 	}
383 	else // else if ( bli_is_right( side ) )
384 	{
385 		bli_hemv( &BLIS_ONE, a, &t, &BLIS_ZERO, &w );
386 		bli_gemv( alpha, b, &w, &BLIS_ZERO, &z );
387 	}
388 
389 	bli_gemv( beta, c_orig, &t, &BLIS_ONE, &z );
390 
391 	bli_subv( &z, &v );
392 	bli_normfv( &v, &norm );
393 	bli_getsc( &norm, resid, &junk );
394 
395 	bli_obj_free( &t );
396 	bli_obj_free( &v );
397 	bli_obj_free( &w );
398 	bli_obj_free( &z );
399 }
400 
401