1 /*
2
3 BLIS
4 An object-based framework for developing high-performance BLAS-like
5 libraries.
6
7 Copyright (C) 2014, The University of Texas at Austin
8
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are
11 met:
12 - Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17 - Neither the name(s) of the copyright holder(s) nor the names of its
18 contributors may be used to endorse or promote products derived
19 from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
33 */
34
35 #ifdef WIN32
36 #include <io.h>
37 #else
38 #include <unistd.h>
39 #endif
40 #include "blis.h"
41
42
43 //#define PRINT
44
main(int argc,char ** argv)45 int main( int argc, char** argv )
46 {
47 obj_t a, c;
48 obj_t c_save;
49 obj_t alpha;
50 dim_t m, n;
51 dim_t p;
52 dim_t p_begin, p_end, p_inc;
53 int m_input, n_input;
54 num_t dt;
55 int r, n_repeats;
56 side_t side;
57 uplo_t uploa;
58 trans_t transa;
59 diag_t diaga;
60 f77_char f77_side;
61 f77_char f77_uploa;
62 f77_char f77_transa;
63 f77_char f77_diaga;
64
65 double dtime;
66 double dtime_save;
67 double gflops;
68
69 //bli_init();
70
71 //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
72
73 n_repeats = 3;
74
75 #ifndef PRINT
76 p_begin = 200;
77 p_end = 2000;
78 p_inc = 200;
79
80 m_input = -1;
81 n_input = -1;
82 #else
83 p_begin = 16;
84 p_end = 16;
85 p_inc = 1;
86
87 m_input = 4;
88 n_input = 4;
89 #endif
90
91 #if 1
92 //dt = BLIS_FLOAT;
93 dt = BLIS_DOUBLE;
94 #else
95 //dt = BLIS_SCOMPLEX;
96 dt = BLIS_DCOMPLEX;
97 #endif
98
99 side = BLIS_LEFT;
100 //side = BLIS_RIGHT;
101
102 uploa = BLIS_LOWER;
103 //uploa = BLIS_UPPER;
104
105 transa = BLIS_NO_TRANSPOSE;
106
107 diaga = BLIS_NONUNIT_DIAG;
108
109 bli_param_map_blis_to_netlib_side( side, &f77_side );
110 bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa );
111 bli_param_map_blis_to_netlib_trans( transa, &f77_transa );
112 bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga );
113
114 // Begin with initializing the last entry to zero so that
115 // matlab allocates space for the entire array once up-front.
116 for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
117 #ifdef BLIS
118 printf( "data_trmm_blis" );
119 #else
120 printf( "data_trmm_%s", BLAS );
121 #endif
122 printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
123 ( unsigned long )(p - p_begin)/p_inc + 1,
124 ( unsigned long )0,
125 ( unsigned long )0, 0.0 );
126
127 //for ( p = p_begin; p <= p_end; p += p_inc )
128 for ( p = p_end; p_begin <= p; p -= p_inc )
129 {
130 if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
131 else m = ( dim_t ) m_input;
132 if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
133 else n = ( dim_t ) n_input;
134
135 bli_obj_create( dt, 1, 1, 0, 0, &alpha );
136
137 if ( bli_is_left( side ) )
138 bli_obj_create( dt, m, m, 0, 0, &a );
139 else
140 bli_obj_create( dt, n, n, 0, 0, &a );
141 bli_obj_create( dt, m, n, 0, 0, &c );
142 bli_obj_create( dt, m, n, 0, 0, &c_save );
143
144 bli_randm( &a );
145 bli_randm( &c );
146
147 bli_obj_set_struc( BLIS_TRIANGULAR, &a );
148 bli_obj_set_uplo( uploa, &a );
149 bli_obj_set_conjtrans( transa, &a );
150 bli_obj_set_diag( diaga, &a );
151
152 // Randomize A, make it densely Hermitian, and zero the unstored
153 // triangle to ensure the implementation reads only from the stored
154 // region.
155 bli_randm( &a );
156 bli_mkherm( &a );
157 bli_mktrim( &a );
158
159 bli_setsc( (2.0/1.0), 1.0, &alpha );
160
161
162 bli_copym( &c, &c_save );
163
164 dtime_save = DBL_MAX;
165
166 for ( r = 0; r < n_repeats; ++r )
167 {
168 bli_copym( &c_save, &c );
169
170
171 dtime = bli_clock();
172
173
174 #ifdef PRINT
175 bli_printm( "a", &a, "%4.1f", "" );
176 bli_printm( "c", &c, "%4.1f", "" );
177 #endif
178
179 #ifdef BLIS
180
181 bli_trmm( side,
182 &alpha,
183 &a,
184 &c );
185 #else
186
187 if ( bli_is_float( dt ) )
188 {
189 f77_int mm = bli_obj_length( &c );
190 f77_int nn = bli_obj_width( &c );
191 f77_int lda = bli_obj_col_stride( &a );
192 f77_int ldc = bli_obj_col_stride( &c );
193 float* alphap = bli_obj_buffer( &alpha );
194 float* ap = bli_obj_buffer( &a );
195 float* cp = bli_obj_buffer( &c );
196
197 strmm_( &f77_side,
198 &f77_uploa,
199 &f77_transa,
200 &f77_diaga,
201 &mm,
202 &nn,
203 alphap,
204 ap, &lda,
205 cp, &ldc );
206 }
207 else if ( bli_is_double( dt ) )
208 {
209 f77_int mm = bli_obj_length( &c );
210 f77_int nn = bli_obj_width( &c );
211 f77_int lda = bli_obj_col_stride( &a );
212 f77_int ldc = bli_obj_col_stride( &c );
213 double* alphap = bli_obj_buffer( &alpha );
214 double* ap = bli_obj_buffer( &a );
215 double* cp = bli_obj_buffer( &c );
216
217 dtrmm_( &f77_side,
218 &f77_uploa,
219 &f77_transa,
220 &f77_diaga,
221 &mm,
222 &nn,
223 alphap,
224 ap, &lda,
225 cp, &ldc );
226 }
227 else if ( bli_is_scomplex( dt ) )
228 {
229 f77_int mm = bli_obj_length( &c );
230 f77_int nn = bli_obj_width( &c );
231 f77_int lda = bli_obj_col_stride( &a );
232 f77_int ldc = bli_obj_col_stride( &c );
233 scomplex* alphap = bli_obj_buffer( &alpha );
234 scomplex* ap = bli_obj_buffer( &a );
235 scomplex* cp = bli_obj_buffer( &c );
236
237 ctrmm_( &f77_side,
238 &f77_uploa,
239 &f77_transa,
240 &f77_diaga,
241 &mm,
242 &nn,
243 alphap,
244 ap, &lda,
245 cp, &ldc );
246 }
247 else if ( bli_is_dcomplex( dt ) )
248 {
249 f77_int mm = bli_obj_length( &c );
250 f77_int nn = bli_obj_width( &c );
251 f77_int lda = bli_obj_col_stride( &a );
252 f77_int ldc = bli_obj_col_stride( &c );
253 dcomplex* alphap = bli_obj_buffer( &alpha );
254 dcomplex* ap = bli_obj_buffer( &a );
255 dcomplex* cp = bli_obj_buffer( &c );
256
257 ztrmm_( &f77_side,
258 &f77_uploa,
259 &f77_transa,
260 &f77_diaga,
261 &mm,
262 &nn,
263 alphap,
264 ap, &lda,
265 cp, &ldc );
266 }
267 #endif
268
269 #ifdef PRINT
270 bli_printm( "c after", &c, "%9.5f", "" );
271 exit(1);
272 #endif
273
274 dtime_save = bli_clock_min_diff( dtime_save, dtime );
275 }
276
277 if ( bli_is_left( side ) )
278 gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 );
279 else
280 gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 );
281
282 if ( bli_is_complex( dt ) ) gflops *= 4.0;
283
284 #ifdef BLIS
285 printf( "data_trmm_blis" );
286 #else
287 printf( "data_trmm_%s", BLAS );
288 #endif
289 printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
290 ( unsigned long )(p - p_begin)/p_inc + 1,
291 ( unsigned long )m,
292 ( unsigned long )n, gflops );
293
294 bli_obj_free( &alpha );
295
296 bli_obj_free( &a );
297 bli_obj_free( &c );
298 bli_obj_free( &c_save );
299 }
300
301 //bli_finalize();
302
303 return 0;
304 }
305
306