1 /*
2
3 BLIS
4 An object-based framework for developing high-performance BLAS-like
5 libraries.
6
7 Copyright (C) 2014, The University of Texas at Austin
8
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are
11 met:
12 - Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17 - Neither the name of The University of Texas nor the names of its
18 contributors may be used to endorse or promote products derived
19 from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
33 */
34
35 #include <unistd.h>
36 #include "blis.h"
37
38 //#define PRINT
39
main(int argc,char ** argv)40 int main( int argc, char** argv )
41 {
42 obj_t a, c;
43 obj_t c_save;
44 obj_t alpha;
45 dim_t m, n;
46 dim_t p;
47 dim_t p_begin, p_end, p_inc;
48 int m_input, n_input;
49 ind_t ind;
50 num_t dt;
51 char dt_ch;
52 int r, n_repeats;
53 side_t side;
54 uplo_t uploa;
55 trans_t transa;
56 diag_t diaga;
57 f77_char f77_side;
58 f77_char f77_uploa;
59 f77_char f77_transa;
60 f77_char f77_diaga;
61
62 double dtime;
63 double dtime_save;
64 double gflops;
65
66 //bli_init();
67
68 //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
69
70 n_repeats = 3;
71
72 dt = DT;
73
74 ind = IND;
75
76 p_begin = P_BEGIN;
77 p_end = P_END;
78 p_inc = P_INC;
79
80 m_input = -1;
81 n_input = -1;
82
83
84 // Supress compiler warnings about unused variable 'ind'.
85 ( void )ind;
86
87 #if 0
88
89 cntx_t* cntx;
90
91 ind_t ind_mod = ind;
92
93 // A hack to use 3m1 as 1mpb (with 1m as 1mbp).
94 if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M;
95
96 // Initialize a context for the current induced method and datatype.
97 cntx = bli_gks_query_ind_cntx( ind_mod, dt );
98
99 // Set k to the kc blocksize for the current datatype.
100 k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx );
101
102 #elif 1
103
104 //k_input = 256;
105
106 #endif
107
108 // Choose the char corresponding to the requested datatype.
109 if ( bli_is_float( dt ) ) dt_ch = 's';
110 else if ( bli_is_double( dt ) ) dt_ch = 'd';
111 else if ( bli_is_scomplex( dt ) ) dt_ch = 'c';
112 else dt_ch = 'z';
113
114 side = BLIS_LEFT;
115 uploa = BLIS_LOWER;
116 transa = BLIS_NO_TRANSPOSE;
117 diaga = BLIS_NONUNIT_DIAG;
118
119 bli_param_map_blis_to_netlib_side( side, &f77_side );
120 bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa );
121 bli_param_map_blis_to_netlib_trans( transa, &f77_transa );
122 bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga );
123
124 // Begin with initializing the last entry to zero so that
125 // matlab allocates space for the entire array once up-front.
126 for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
127 #ifdef BLIS
128 printf( "data_%s_%ctrmm_%s_blis", THR_STR, dt_ch, STR );
129 #else
130 printf( "data_%s_%ctrmm_%s", THR_STR, dt_ch, STR );
131 #endif
132 printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
133 ( unsigned long )(p - p_begin + 1)/p_inc + 1,
134 ( unsigned long )0,
135 ( unsigned long )0, 0.0 );
136
137
138 for ( p = p_begin; p <= p_end; p += p_inc )
139 {
140
141 if ( m_input < 0 ) m = p / ( dim_t )abs(m_input);
142 else m = ( dim_t ) m_input;
143 if ( n_input < 0 ) n = p / ( dim_t )abs(n_input);
144 else n = ( dim_t ) n_input;
145
146 bli_obj_create( dt, 1, 1, 0, 0, &alpha );
147
148 if ( bli_is_left( side ) )
149 bli_obj_create( dt, m, m, 0, 0, &a );
150 else
151 bli_obj_create( dt, n, n, 0, 0, &a );
152 bli_obj_create( dt, m, n, 0, 0, &c );
153 bli_obj_create( dt, m, n, 0, 0, &c_save );
154
155 bli_randm( &a );
156 bli_randm( &c );
157
158 bli_obj_set_struc( BLIS_TRIANGULAR, &a );
159 bli_obj_set_uplo( uploa, &a );
160 bli_obj_set_conjtrans( transa, &a );
161 bli_obj_set_diag( diaga, &a );
162
163 bli_setsc( (2.0/1.0), 0.0, &alpha );
164
165 bli_copym( &c, &c_save );
166
167 #ifdef BLIS
168 bli_ind_disable_all_dt( dt );
169 bli_ind_enable_dt( ind, dt );
170 #endif
171
172 dtime_save = DBL_MAX;
173
174 for ( r = 0; r < n_repeats; ++r )
175 {
176 bli_copym( &c_save, &c );
177
178
179 dtime = bli_clock();
180
181
182 #ifdef PRINT
183 bli_printm( "a", &a, "%4.1f", "" );
184 bli_printm( "b", &b, "%4.1f", "" );
185 bli_printm( "c", &c, "%4.1f", "" );
186 #endif
187
188 #ifdef BLIS
189
190 bli_trmm( side,
191 &alpha,
192 &a,
193 &c );
194
195 #else
196
197 if ( bli_is_float( dt ) )
198 {
199 f77_int mm = bli_obj_length( &c );
200 f77_int nn = bli_obj_width( &c );
201 f77_int lda = bli_obj_col_stride( &a );
202 f77_int ldc = bli_obj_col_stride( &c );
203 float* alphap = bli_obj_buffer( &alpha );
204 float* ap = bli_obj_buffer( &a );
205 float* cp = bli_obj_buffer( &c );
206
207 strmm_( &f77_side,
208 &f77_uploa,
209 &f77_transa,
210 &f77_diaga,
211 &mm,
212 &nn,
213 alphap,
214 ap, &lda,
215 cp, &ldc );
216 }
217 else if ( bli_is_double( dt ) )
218 {
219 f77_int mm = bli_obj_length( &c );
220 f77_int nn = bli_obj_width( &c );
221 f77_int lda = bli_obj_col_stride( &a );
222 f77_int ldc = bli_obj_col_stride( &c );
223 double* alphap = bli_obj_buffer( &alpha );
224 double* ap = bli_obj_buffer( &a );
225 double* cp = bli_obj_buffer( &c );
226
227 dtrmm_( &f77_side,
228 &f77_uploa,
229 &f77_transa,
230 &f77_diaga,
231 &mm,
232 &nn,
233 alphap,
234 ap, &lda,
235 cp, &ldc );
236 }
237 else if ( bli_is_scomplex( dt ) )
238 {
239 f77_int mm = bli_obj_length( &c );
240 f77_int nn = bli_obj_width( &c );
241 f77_int lda = bli_obj_col_stride( &a );
242 f77_int ldc = bli_obj_col_stride( &c );
243 scomplex* alphap = bli_obj_buffer( &alpha );
244 scomplex* ap = bli_obj_buffer( &a );
245 scomplex* cp = bli_obj_buffer( &c );
246
247 ctrmm_( &f77_side,
248 &f77_uploa,
249 &f77_transa,
250 &f77_diaga,
251 &mm,
252 &nn,
253 alphap,
254 ap, &lda,
255 cp, &ldc );
256 }
257 else if ( bli_is_dcomplex( dt ) )
258 {
259 f77_int mm = bli_obj_length( &c );
260 f77_int nn = bli_obj_width( &c );
261 f77_int lda = bli_obj_col_stride( &a );
262 f77_int ldc = bli_obj_col_stride( &c );
263 dcomplex* alphap = bli_obj_buffer( &alpha );
264 dcomplex* ap = bli_obj_buffer( &a );
265 dcomplex* cp = bli_obj_buffer( &c );
266
267 ztrmm_( &f77_side,
268 &f77_uploa,
269 &f77_transa,
270 &f77_diaga,
271 &mm,
272 &nn,
273 alphap,
274 ap, &lda,
275 cp, &ldc );
276 }
277 #endif
278
279 #ifdef PRINT
280 bli_printm( "c after", &c, "%4.1f", "" );
281 exit(1);
282 #endif
283
284
285 dtime_save = bli_clock_min_diff( dtime_save, dtime );
286 }
287
288 if ( bli_is_left(side) )
289 gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 );
290 else
291 gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 );
292
293 if ( bli_is_complex( dt ) ) gflops *= 4.0;
294
295 #ifdef BLIS
296 printf( "data_%s_%ctrmm_%s_blis", THR_STR, dt_ch, STR );
297 #else
298 printf( "data_%s_%ctrmm_%s", THR_STR, dt_ch, STR );
299 #endif
300 printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
301 ( unsigned long )(p - p_begin + 1)/p_inc + 1,
302 ( unsigned long )m,
303 ( unsigned long )n, gflops );
304
305 bli_obj_free( &alpha );
306
307 bli_obj_free( &a );
308 bli_obj_free( &c );
309 bli_obj_free( &c_save );
310 }
311
312 //bli_finalize();
313
314 return 0;
315 }
316
317