1 /*
2 
3    BLIS
4    An object-based framework for developing high-performance BLAS-like
5    libraries.
6 
7    Copyright (C) 2014, The University of Texas at Austin
8 
9    Redistribution and use in source and binary forms, with or without
10    modification, are permitted provided that the following conditions are
11    met:
12     - Redistributions of source code must retain the above copyright
13       notice, this list of conditions and the following disclaimer.
14     - Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17     - Neither the name(s) of the copyright holder(s) nor the names of its
18       contributors may be used to endorse or promote products derived
19       from this software without specific prior written permission.
20 
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 */
34 
35 #ifdef WIN32
36 #include <io.h>
37 #else
38 #include <unistd.h>
39 #endif
40 #include "blis.h"
41 
42 
43 //#define PRINT
44 
main(int argc,char ** argv)45 int main( int argc, char** argv )
46 {
47 	obj_t a, c;
48 	obj_t c_save;
49 	obj_t alpha;
50 	dim_t m, n;
51 	dim_t p;
52 	dim_t p_begin, p_end, p_inc;
53 	int   m_input, n_input;
54 	num_t dt;
55 	int   r, n_repeats;
56 	side_t side;
57 	uplo_t uploa;
58 	trans_t transa;
59 	diag_t diaga;
60 	f77_char f77_side;
61 	f77_char f77_uploa;
62 	f77_char f77_transa;
63 	f77_char f77_diaga;
64 
65 	double dtime;
66 	double dtime_save;
67 	double gflops;
68 
69 	//bli_init();
70 
71 	//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
72 
73 	n_repeats = 3;
74 
75 #ifndef PRINT
76 	p_begin = 200;
77 	p_end   = 2000;
78 	p_inc   = 200;
79 
80 	m_input = -1;
81 	n_input = -1;
82 #else
83 	p_begin = 16;
84 	p_end   = 16;
85 	p_inc   = 1;
86 
87 	m_input = 4;
88 	n_input = 4;
89 #endif
90 
91 #if 1
92 	//dt = BLIS_FLOAT;
93 	dt = BLIS_DOUBLE;
94 #else
95 	//dt = BLIS_SCOMPLEX;
96 	dt = BLIS_DCOMPLEX;
97 #endif
98 
99 	side = BLIS_LEFT;
100 	//side = BLIS_RIGHT;
101 
102 	uploa = BLIS_LOWER;
103 	//uploa = BLIS_UPPER;
104 
105 	transa = BLIS_NO_TRANSPOSE;
106 
107 	diaga = BLIS_NONUNIT_DIAG;
108 
109 	bli_param_map_blis_to_netlib_side( side, &f77_side );
110 	bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa );
111 	bli_param_map_blis_to_netlib_trans( transa, &f77_transa );
112 	bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga );
113 
114 	// Begin with initializing the last entry to zero so that
115 	// matlab allocates space for the entire array once up-front.
116 	for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
117 #ifdef BLIS
118 	printf( "data_trmm_blis" );
119 #else
120 	printf( "data_trmm_%s", BLAS );
121 #endif
122 	printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
123 	        ( unsigned long )(p - p_begin)/p_inc + 1,
124 	        ( unsigned long )0,
125 	        ( unsigned long )0, 0.0 );
126 
127 	//for ( p = p_begin; p <= p_end; p += p_inc )
128 	for ( p = p_end; p_begin <= p; p -= p_inc )
129 	{
130 		if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
131 		else               m =     ( dim_t )    m_input;
132 		if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
133 		else               n =     ( dim_t )    n_input;
134 
135 		bli_obj_create( dt, 1, 1, 0, 0, &alpha );
136 
137 		if ( bli_is_left( side ) )
138 			bli_obj_create( dt, m, m, 0, 0, &a );
139 		else
140 			bli_obj_create( dt, n, n, 0, 0, &a );
141 		bli_obj_create( dt, m, n, 0, 0, &c );
142 		bli_obj_create( dt, m, n, 0, 0, &c_save );
143 
144 		bli_randm( &a );
145 		bli_randm( &c );
146 
147 		bli_obj_set_struc( BLIS_TRIANGULAR, &a );
148 		bli_obj_set_uplo( uploa, &a );
149 		bli_obj_set_conjtrans( transa, &a );
150 		bli_obj_set_diag( diaga, &a );
151 
152 		// Randomize A, make it densely Hermitian, and zero the unstored
153 		// triangle to ensure the implementation reads only from the stored
154 		// region.
155 		bli_randm( &a );
156 		bli_mkherm( &a );
157 		bli_mktrim( &a );
158 
159 		bli_setsc(  (2.0/1.0), 1.0, &alpha );
160 
161 
162 		bli_copym( &c, &c_save );
163 
164 		dtime_save = DBL_MAX;
165 
166 		for ( r = 0; r < n_repeats; ++r )
167 		{
168 			bli_copym( &c_save, &c );
169 
170 
171 			dtime = bli_clock();
172 
173 
174 #ifdef PRINT
175 			bli_printm( "a", &a, "%4.1f", "" );
176 			bli_printm( "c", &c, "%4.1f", "" );
177 #endif
178 
179 #ifdef BLIS
180 
181 			bli_trmm( side,
182 			          &alpha,
183 			          &a,
184 			          &c );
185 #else
186 
187 		if ( bli_is_float( dt ) )
188 		{
189 			f77_int  mm     = bli_obj_length( &c );
190 			f77_int  nn     = bli_obj_width( &c );
191 			f77_int  lda    = bli_obj_col_stride( &a );
192 			f77_int  ldc    = bli_obj_col_stride( &c );
193 			float*   alphap = bli_obj_buffer( &alpha );
194 			float*   ap     = bli_obj_buffer( &a );
195 			float*   cp     = bli_obj_buffer( &c );
196 
197 			strmm_( &f77_side,
198 			        &f77_uploa,
199 			        &f77_transa,
200 			        &f77_diaga,
201 			        &mm,
202 			        &nn,
203 			        alphap,
204 			        ap, &lda,
205 			        cp, &ldc );
206 		}
207 		else if ( bli_is_double( dt ) )
208 		{
209 			f77_int  mm     = bli_obj_length( &c );
210 			f77_int  nn     = bli_obj_width( &c );
211 			f77_int  lda    = bli_obj_col_stride( &a );
212 			f77_int  ldc    = bli_obj_col_stride( &c );
213 			double*  alphap = bli_obj_buffer( &alpha );
214 			double*  ap     = bli_obj_buffer( &a );
215 			double*  cp     = bli_obj_buffer( &c );
216 
217 			dtrmm_( &f77_side,
218 			        &f77_uploa,
219 			        &f77_transa,
220 			        &f77_diaga,
221 			        &mm,
222 			        &nn,
223 			        alphap,
224 			        ap, &lda,
225 			        cp, &ldc );
226 		}
227 		else if ( bli_is_scomplex( dt ) )
228 		{
229 			f77_int  mm     = bli_obj_length( &c );
230 			f77_int  nn     = bli_obj_width( &c );
231 			f77_int  lda    = bli_obj_col_stride( &a );
232 			f77_int  ldc    = bli_obj_col_stride( &c );
233 			scomplex*  alphap = bli_obj_buffer( &alpha );
234 			scomplex*  ap     = bli_obj_buffer( &a );
235 			scomplex*  cp     = bli_obj_buffer( &c );
236 
237 			ctrmm_( &f77_side,
238 			        &f77_uploa,
239 			        &f77_transa,
240 			        &f77_diaga,
241 			        &mm,
242 			        &nn,
243 			        alphap,
244 			        ap, &lda,
245 			        cp, &ldc );
246 		}
247 		else if ( bli_is_dcomplex( dt ) )
248 		{
249 			f77_int  mm     = bli_obj_length( &c );
250 			f77_int  nn     = bli_obj_width( &c );
251 			f77_int  lda    = bli_obj_col_stride( &a );
252 			f77_int  ldc    = bli_obj_col_stride( &c );
253 			dcomplex*  alphap = bli_obj_buffer( &alpha );
254 			dcomplex*  ap     = bli_obj_buffer( &a );
255 			dcomplex*  cp     = bli_obj_buffer( &c );
256 
257 			ztrmm_( &f77_side,
258 			        &f77_uploa,
259 			        &f77_transa,
260 			        &f77_diaga,
261 			        &mm,
262 			        &nn,
263 			        alphap,
264 			        ap, &lda,
265 			        cp, &ldc );
266 		}
267 #endif
268 
269 #ifdef PRINT
270 			bli_printm( "c after", &c, "%9.5f", "" );
271 			exit(1);
272 #endif
273 
274 			dtime_save = bli_clock_min_diff( dtime_save, dtime );
275 		}
276 
277 		if ( bli_is_left( side ) )
278 			gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 );
279 		else
280 			gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 );
281 
282 		if ( bli_is_complex( dt ) ) gflops *= 4.0;
283 
284 #ifdef BLIS
285 		printf( "data_trmm_blis" );
286 #else
287 		printf( "data_trmm_%s", BLAS );
288 #endif
289 		printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
290 		        ( unsigned long )(p - p_begin)/p_inc + 1,
291 		        ( unsigned long )m,
292 		        ( unsigned long )n, gflops );
293 
294 		bli_obj_free( &alpha );
295 
296 		bli_obj_free( &a );
297 		bli_obj_free( &c );
298 		bli_obj_free( &c_save );
299 	}
300 
301 	//bli_finalize();
302 
303 	return 0;
304 }
305 
306