1 /*
2
3 BLIS
4 An object-based framework for developing high-performance BLAS-like
5 libraries.
6
7 Copyright (C) 2014, The University of Texas at Austin
8
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are
11 met:
12 - Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17 - Neither the name(s) of the copyright holder(s) nor the names of its
18 contributors may be used to endorse or promote products derived
19 from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
33 */
34
35 #include <unistd.h>
36 #include "blis.h"
37 #include <mpi.h>
38
39 // side uplo trans diag m n alpha a lda b ldb
40 //void dtrsm_( char*, char*, char*, char*, int*, int*, double*, double*, int*, double*, int* );
41
42 //#define PRINT
43
main(int argc,char ** argv)44 int main( int argc, char** argv )
45 {
46 obj_t a, b, c;
47 obj_t c_save;
48 obj_t alpha, beta;
49 dim_t m, n;
50 dim_t p;
51 dim_t p_begin, p_end, p_inc;
52 int m_input, n_input;
53 num_t dt_a, dt_b, dt_c;
54 num_t dt_alpha, dt_beta;
55 int r, n_repeats;
56 side_t side;
57 uplo_t uplo;
58
59 double dtime;
60 double dtime_save;
61 double gflops;
62
63 bli_init();
64
65 n_repeats = 3;
66
67 if( argc < 7 )
68 {
69 printf("Usage:\n");
70 printf("test_foo.x m n k p_begin p_inc p_end:\n");
71 exit;
72 }
73
74 int world_size, world_rank, provided;
75 MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
76 MPI_Comm_size( MPI_COMM_WORLD, &world_size );
77 MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );
78
79 m_input = strtol( argv[1], NULL, 10 );
80 n_input = strtol( argv[2], NULL, 10 );
81 p_begin = strtol( argv[4], NULL, 10 );
82 p_inc = strtol( argv[5], NULL, 10 );
83 p_end = strtol( argv[6], NULL, 10 );
84
85 #if 1
86 dt_a = BLIS_DOUBLE;
87 dt_b = BLIS_DOUBLE;
88 dt_c = BLIS_DOUBLE;
89 dt_alpha = BLIS_DOUBLE;
90 dt_beta = BLIS_DOUBLE;
91 #else
92 dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_FLOAT;
93 //dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_SCOMPLEX;
94 #endif
95
96 side = BLIS_LEFT;
97 //side = BLIS_RIGHT;
98
99 uplo = BLIS_LOWER;
100 //uplo = BLIS_UPPER;
101
102 for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
103 {
104
105 if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
106 else m = ( dim_t ) m_input;
107 if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
108 else n = ( dim_t ) n_input;
109
110
111 bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
112 bli_obj_create( dt_beta, 1, 1, 0, 0, &beta );
113
114 if ( bli_is_left( side ) )
115 bli_obj_create( dt_a, m, m, 0, 0, &a );
116 else
117 bli_obj_create( dt_a, n, n, 0, 0, &a );
118 bli_obj_create( dt_b, m, n, 0, 0, &b );
119 bli_obj_create( dt_c, m, n, 0, 0, &c );
120 bli_obj_create( dt_c, m, n, 0, 0, &c_save );
121
122 bli_obj_set_struc( BLIS_TRIANGULAR, &a );
123 bli_obj_set_uplo( uplo, &a );
124 //bli_obj_set_diag( BLIS_UNIT_DIAG, &a );
125
126 bli_randm( &a );
127 bli_randm( &c );
128 bli_randm( &b );
129
130 /*
131 {
132 obj_t a2;
133
134 bli_obj_alias_to( &a, &a2 );
135 bli_obj_toggle_uplo( &a2 );
136 bli_obj_inc_diag_offset( 1, &a2 );
137 bli_setm( &BLIS_ZERO, &a2 );
138 bli_obj_inc_diag_offset( -2, &a2 );
139 bli_obj_toggle_uplo( &a2 );
140 bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a2 );
141 bli_scalm( &BLIS_TWO, &a2 );
142 //bli_scalm( &BLIS_TWO, &a );
143 }
144 */
145
146 bli_setsc( (2.0/1.0), 0.0, &alpha );
147 bli_setsc( (1.0/1.0), 0.0, &beta );
148
149
150 bli_copym( &c, &c_save );
151
152 dtime_save = 1.0e9;
153
154 for ( r = 0; r < n_repeats; ++r )
155 {
156 bli_copym( &c_save, &c );
157
158 dtime = bli_clock();
159
160
161 #ifdef PRINT
162 /*
163 obj_t ar, ai;
164 bli_obj_alias_to( &a, &ar );
165 bli_obj_alias_to( &a, &ai );
166 bli_obj_set_dt( BLIS_DOUBLE, &ar ); ar.rs *= 2; ar.cs *= 2;
167 bli_obj_set_dt( BLIS_DOUBLE, &ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
168
169 bli_printm( "ar", &ar, "%4.1f", "" );
170 bli_printm( "ai", &ai, "%4.1f", "" );
171 */
172
173 bli_invertd( &a );
174 bli_printm( "a", &a, "%4.1f", "" );
175 bli_invertd( &a );
176 bli_printm( "c", &c, "%4.1f", "" );
177 #endif
178
179 #ifdef BLIS
180 //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
181
182 bli_trsm( side,
183 //bli_trsm4m( side,
184 //bli_trsm3m( side,
185 &alpha,
186 &a,
187 &c );
188 #else
189
190 if ( bli_is_real( dt_a ) )
191 {
192 f77_char side = 'L';
193 f77_char uplo = 'L';
194 f77_char transa = 'N';
195 f77_char diag = 'N';
196 f77_int mm = bli_obj_length( &c );
197 f77_int nn = bli_obj_width( &c );
198 f77_int lda = bli_obj_col_stride( &a );
199 f77_int ldc = bli_obj_col_stride( &c );
200 float * alphap = bli_obj_buffer( &alpha );
201 float * ap = bli_obj_buffer( &a );
202 float * cp = bli_obj_buffer( &c );
203
204 strsm_( &side,
205 &uplo,
206 &transa,
207 &diag,
208 &mm,
209 &nn,
210 alphap,
211 ap, &lda,
212 cp, &ldc );
213 }
214 else // if ( bli_is_complex( dt_a ) )
215 {
216 f77_char side = 'L';
217 f77_char uplo = 'L';
218 f77_char transa = 'N';
219 f77_char diag = 'N';
220 f77_int mm = bli_obj_length( &c );
221 f77_int nn = bli_obj_width( &c );
222 f77_int lda = bli_obj_col_stride( &a );
223 f77_int ldc = bli_obj_col_stride( &c );
224 scomplex* alphap = bli_obj_buffer( &alpha );
225 scomplex* ap = bli_obj_buffer( &a );
226 scomplex* cp = bli_obj_buffer( &c );
227
228 ctrsm_( &side,
229 //ztrsm_( &side,
230 &uplo,
231 &transa,
232 &diag,
233 &mm,
234 &nn,
235 alphap,
236 ap, &lda,
237 cp, &ldc );
238 }
239
240 #endif
241
242 #ifdef PRINT
243 bli_printm( "c after", &c, "%4.1f", "" );
244 exit(1);
245 #endif
246
247
248 dtime_save = bli_clock_min_diff( dtime_save, dtime );
249 }
250
251 if ( bli_is_left( side ) )
252 gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 );
253 else
254 gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 );
255
256 if ( bli_is_complex( dt_a ) ) gflops *= 4.0;
257
258 #ifdef BLIS
259 printf( "data_trsm_blis" );
260 #else
261 printf( "data_trsm_%s", BLAS );
262 #endif
263 printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n",
264 ( unsigned long )(p - p_begin + 1)/p_inc + 1,
265 ( unsigned long )m,
266 ( unsigned long )n, dtime_save, gflops );
267
268
269 bli_obj_free( &alpha );
270 bli_obj_free( &beta );
271
272 bli_obj_free( &a );
273 bli_obj_free( &b );
274 bli_obj_free( &c );
275 bli_obj_free( &c_save );
276 }
277
278 bli_finalize();
279
280 return 0;
281 }
282
283