1 /*
2 
3    BLIS
4    An object-based framework for developing high-performance BLAS-like
5    libraries.
6 
7    Copyright (C) 2014, The University of Texas at Austin
8 
9    Redistribution and use in source and binary forms, with or without
10    modification, are permitted provided that the following conditions are
11    met:
12     - Redistributions of source code must retain the above copyright
13       notice, this list of conditions and the following disclaimer.
14     - Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17     - Neither the name of The University of Texas nor the names of its
18       contributors may be used to endorse or promote products derived
19       from this software without specific prior written permission.
20 
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 */
34 
35 #include <unistd.h>
36 #include "blis.h"
37 
38 //#define PRINT
39 
main(int argc,char ** argv)40 int main( int argc, char** argv )
41 {
42 	obj_t    a, c;
43 	obj_t    c_save;
44 	obj_t    alpha;
45 	dim_t    m, n;
46 	dim_t    p;
47 	dim_t    p_begin, p_end, p_inc;
48 	int      m_input, n_input;
49 	ind_t    ind;
50 	num_t    dt;
51 	char     dt_ch;
52 	int      r, n_repeats;
53 	side_t   side;
54 	uplo_t   uploa;
55 	trans_t  transa;
56 	diag_t   diaga;
57 	f77_char f77_side;
58 	f77_char f77_uploa;
59 	f77_char f77_transa;
60 	f77_char f77_diaga;
61 
62 	double   dtime;
63 	double   dtime_save;
64 	double   gflops;
65 
66 	//bli_init();
67 
68 	//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
69 
70 	n_repeats = 3;
71 
72 	dt      = DT;
73 
74 	ind     = IND;
75 
76 	p_begin = P_BEGIN;
77 	p_end   = P_END;
78 	p_inc   = P_INC;
79 
80 	m_input = -1;
81 	n_input = -1;
82 
83 
84 	// Supress compiler warnings about unused variable 'ind'.
85 	( void )ind;
86 
87 #if 0
88 
89 	cntx_t* cntx;
90 
91 	ind_t ind_mod = ind;
92 
93 	// A hack to use 3m1 as 1mpb (with 1m as 1mbp).
94 	if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M;
95 
96 	// Initialize a context for the current induced method and datatype.
97 	cntx = bli_gks_query_ind_cntx( ind_mod, dt );
98 
99 	// Set k to the kc blocksize for the current datatype.
100 	k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx );
101 
102 #elif 1
103 
104 	//k_input = 256;
105 
106 #endif
107 
108 	// Choose the char corresponding to the requested datatype.
109 	if      ( bli_is_float( dt ) )    dt_ch = 's';
110 	else if ( bli_is_double( dt ) )   dt_ch = 'd';
111 	else if ( bli_is_scomplex( dt ) ) dt_ch = 'c';
112 	else                              dt_ch = 'z';
113 
114 	side   = BLIS_LEFT;
115 	uploa  = BLIS_LOWER;
116 	transa = BLIS_NO_TRANSPOSE;
117 	diaga  = BLIS_NONUNIT_DIAG;
118 
119 	bli_param_map_blis_to_netlib_side( side, &f77_side );
120 	bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa );
121 	bli_param_map_blis_to_netlib_trans( transa, &f77_transa );
122 	bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga );
123 
124 	// Begin with initializing the last entry to zero so that
125 	// matlab allocates space for the entire array once up-front.
126 	for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
127 #ifdef BLIS
128 	printf( "data_%s_%ctrmm_%s_blis", THR_STR, dt_ch, STR );
129 #else
130 	printf( "data_%s_%ctrmm_%s",      THR_STR, dt_ch, STR );
131 #endif
132 	printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
133 	        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
134 	        ( unsigned long )0,
135 	        ( unsigned long )0, 0.0 );
136 
137 
138 	for ( p = p_begin; p <= p_end; p += p_inc )
139 	{
140 
141 		if ( m_input < 0 ) m = p / ( dim_t )abs(m_input);
142 		else               m =     ( dim_t )    m_input;
143 		if ( n_input < 0 ) n = p / ( dim_t )abs(n_input);
144 		else               n =     ( dim_t )    n_input;
145 
146 		bli_obj_create( dt, 1, 1, 0, 0, &alpha );
147 
148 		if ( bli_is_left( side ) )
149 			bli_obj_create( dt, m, m, 0, 0, &a );
150 		else
151 			bli_obj_create( dt, n, n, 0, 0, &a );
152 		bli_obj_create( dt, m, n, 0, 0, &c );
153 		bli_obj_create( dt, m, n, 0, 0, &c_save );
154 
155 		bli_randm( &a );
156 		bli_randm( &c );
157 
158 		bli_obj_set_struc( BLIS_TRIANGULAR, &a );
159 		bli_obj_set_uplo( uploa, &a );
160 		bli_obj_set_conjtrans( transa, &a );
161 		bli_obj_set_diag( diaga, &a );
162 
163 		bli_setsc(  (2.0/1.0), 0.0, &alpha );
164 
165 		bli_copym( &c, &c_save );
166 
167 #ifdef BLIS
168 		bli_ind_disable_all_dt( dt );
169 		bli_ind_enable_dt( ind, dt );
170 #endif
171 
172 		dtime_save = DBL_MAX;
173 
174 		for ( r = 0; r < n_repeats; ++r )
175 		{
176 			bli_copym( &c_save, &c );
177 
178 
179 			dtime = bli_clock();
180 
181 
182 #ifdef PRINT
183 			bli_printm( "a", &a, "%4.1f", "" );
184 			bli_printm( "b", &b, "%4.1f", "" );
185 			bli_printm( "c", &c, "%4.1f", "" );
186 #endif
187 
188 #ifdef BLIS
189 
190 			bli_trmm( side,
191 				  &alpha,
192 			          &a,
193 			          &c );
194 
195 #else
196 
197 		if ( bli_is_float( dt ) )
198 		{
199 			f77_int  mm     = bli_obj_length( &c );
200 			f77_int  nn     = bli_obj_width( &c );
201 			f77_int  lda    = bli_obj_col_stride( &a );
202 			f77_int  ldc    = bli_obj_col_stride( &c );
203 			float*   alphap = bli_obj_buffer( &alpha );
204 			float*   ap     = bli_obj_buffer( &a );
205 			float*   cp     = bli_obj_buffer( &c );
206 
207 			strmm_( &f77_side,
208 				&f77_uploa,
209 			        &f77_transa,
210 			        &f77_diaga,
211 			        &mm,
212 			        &nn,
213 			        alphap,
214 			        ap, &lda,
215 			        cp, &ldc );
216 		}
217 		else if ( bli_is_double( dt ) )
218 		{
219 			f77_int  mm     = bli_obj_length( &c );
220 			f77_int  nn     = bli_obj_width( &c );
221 			f77_int  lda    = bli_obj_col_stride( &a );
222 			f77_int  ldc    = bli_obj_col_stride( &c );
223 			double*  alphap = bli_obj_buffer( &alpha );
224 			double*  ap     = bli_obj_buffer( &a );
225 			double*  cp     = bli_obj_buffer( &c );
226 
227 			dtrmm_( &f77_side,
228 				&f77_uploa,
229 				&f77_transa,
230 			        &f77_diaga,
231 			        &mm,
232 			        &nn,
233 			        alphap,
234 			        ap, &lda,
235 			        cp, &ldc );
236 		}
237 		else if ( bli_is_scomplex( dt ) )
238 		{
239 			f77_int  mm     = bli_obj_length( &c );
240 			f77_int  nn     = bli_obj_width( &c );
241 			f77_int  lda    = bli_obj_col_stride( &a );
242 			f77_int  ldc    = bli_obj_col_stride( &c );
243 			scomplex*  alphap = bli_obj_buffer( &alpha );
244 			scomplex*  ap     = bli_obj_buffer( &a );
245 			scomplex*  cp     = bli_obj_buffer( &c );
246 
247 			ctrmm_( &f77_side,
248 				&f77_uploa,
249 				&f77_transa,
250 			        &f77_diaga,
251 			        &mm,
252 			        &nn,
253 			        alphap,
254 			        ap, &lda,
255 			        cp, &ldc );
256 		}
257 		else if ( bli_is_dcomplex( dt ) )
258 		{
259 			f77_int  mm     = bli_obj_length( &c );
260 			f77_int  nn     = bli_obj_width( &c );
261 			f77_int  lda    = bli_obj_col_stride( &a );
262 			f77_int  ldc    = bli_obj_col_stride( &c );
263 			dcomplex*  alphap = bli_obj_buffer( &alpha );
264 			dcomplex*  ap     = bli_obj_buffer( &a );
265 			dcomplex*  cp     = bli_obj_buffer( &c );
266 
267 			ztrmm_( &f77_side,
268 				&f77_uploa,
269 				&f77_transa,
270 			        &f77_diaga,
271 			        &mm,
272 			        &nn,
273 			        alphap,
274 			        ap, &lda,
275 			        cp, &ldc );
276 		}
277 #endif
278 
279 #ifdef PRINT
280 			bli_printm( "c after", &c, "%4.1f", "" );
281 			exit(1);
282 #endif
283 
284 
285 			dtime_save = bli_clock_min_diff( dtime_save, dtime );
286 		}
287 
288 		if ( bli_is_left(side) )
289 			gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 );
290 		else
291 			gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 );
292 
293 		if ( bli_is_complex( dt ) ) gflops *= 4.0;
294 
295 #ifdef BLIS
296 		printf( "data_%s_%ctrmm_%s_blis", THR_STR, dt_ch, STR );
297 #else
298 		printf( "data_%s_%ctrmm_%s",      THR_STR, dt_ch, STR );
299 #endif
300 		printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
301 		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
302 		        ( unsigned long )m,
303 		        ( unsigned long )n, gflops );
304 
305 		bli_obj_free( &alpha );
306 
307 		bli_obj_free( &a );
308 		bli_obj_free( &c );
309 		bli_obj_free( &c_save );
310 	}
311 
312 	//bli_finalize();
313 
314 	return 0;
315 }
316 
317