1 /*
2 
3    BLIS
4    An object-based framework for developing high-performance BLAS-like
5    libraries.
6 
7    Copyright (C) 2014, The University of Texas at Austin
8 
9    Redistribution and use in source and binary forms, with or without
10    modification, are permitted provided that the following conditions are
11    met:
12     - Redistributions of source code must retain the above copyright
13       notice, this list of conditions and the following disclaimer.
14     - Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17     - Neither the name(s) of the copyright holder(s) nor the names of its
18       contributors may be used to endorse or promote products derived
19       from this software without specific prior written permission.
20 
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 */
34 
35 #include <unistd.h>
36 #include "blis.h"
37 #include <mpi.h>
38 
39 //           side   uplo   trans  diag   m     n     alpha    a        lda   b        ldb
40 //void dtrsm_( char*, char*, char*, char*, int*, int*, double*, double*, int*, double*, int* );
41 
42 //#define PRINT
43 
main(int argc,char ** argv)44 int main( int argc, char** argv )
45 {
46 	obj_t a, b, c;
47 	obj_t c_save;
48 	obj_t alpha, beta;
49 	dim_t m, n;
50 	dim_t p;
51 	dim_t p_begin, p_end, p_inc;
52 	int   m_input, n_input;
53 	num_t dt_a, dt_b, dt_c;
54 	num_t dt_alpha, dt_beta;
55 	int   r, n_repeats;
56 	side_t side;
57 	uplo_t uplo;
58 
59 	double dtime;
60 	double dtime_save;
61 	double gflops;
62 
63 	bli_init();
64 
65 	n_repeats = 3;
66 
67     if( argc < 7 )
68     {
69         printf("Usage:\n");
70         printf("test_foo.x m n k p_begin p_inc p_end:\n");
71         exit;
72     }
73 
74     int world_size, world_rank, provided;
75     MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
76     MPI_Comm_size( MPI_COMM_WORLD, &world_size );
77     MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );
78 
79     m_input = strtol( argv[1], NULL, 10 );
80     n_input = strtol( argv[2], NULL, 10 );
81     p_begin = strtol( argv[4], NULL, 10 );
82     p_inc   = strtol( argv[5], NULL, 10 );
83     p_end   = strtol( argv[6], NULL, 10 );
84 
85 #if 1
86 	dt_a = BLIS_DOUBLE;
87 	dt_b = BLIS_DOUBLE;
88 	dt_c = BLIS_DOUBLE;
89 	dt_alpha = BLIS_DOUBLE;
90 	dt_beta = BLIS_DOUBLE;
91 #else
92 	dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_FLOAT;
93 	//dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_SCOMPLEX;
94 #endif
95 
96 	side = BLIS_LEFT;
97 	//side = BLIS_RIGHT;
98 
99 	uplo = BLIS_LOWER;
100 	//uplo = BLIS_UPPER;
101 
102     for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
103 	{
104 
105 		if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
106 		else               m =     ( dim_t )    m_input;
107 		if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
108 		else               n =     ( dim_t )    n_input;
109 
110 
111 		bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
112 		bli_obj_create( dt_beta,  1, 1, 0, 0, &beta );
113 
114 		if ( bli_is_left( side ) )
115 			bli_obj_create( dt_a, m, m, 0, 0, &a );
116 		else
117 			bli_obj_create( dt_a, n, n, 0, 0, &a );
118 		bli_obj_create( dt_b, m, n, 0, 0, &b );
119 		bli_obj_create( dt_c, m, n, 0, 0, &c );
120 		bli_obj_create( dt_c, m, n, 0, 0, &c_save );
121 
122 		bli_obj_set_struc( BLIS_TRIANGULAR, &a );
123 		bli_obj_set_uplo( uplo, &a );
124 		//bli_obj_set_diag( BLIS_UNIT_DIAG, &a );
125 
126 		bli_randm( &a );
127 		bli_randm( &c );
128 		bli_randm( &b );
129 
130 /*
131 		{
132 			obj_t a2;
133 
134 			bli_obj_alias_to( &a, &a2 );
135 			bli_obj_toggle_uplo( &a2 );
136 			bli_obj_inc_diag_offset( 1, &a2 );
137 			bli_setm( &BLIS_ZERO, &a2 );
138 			bli_obj_inc_diag_offset( -2, &a2 );
139 			bli_obj_toggle_uplo( &a2 );
140 			bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a2 );
141 			bli_scalm( &BLIS_TWO, &a2 );
142 			//bli_scalm( &BLIS_TWO, &a );
143 		}
144 */
145 
146 		bli_setsc(  (2.0/1.0), 0.0, &alpha );
147 		bli_setsc(  (1.0/1.0), 0.0, &beta );
148 
149 
150 		bli_copym( &c, &c_save );
151 
152 		dtime_save = 1.0e9;
153 
154 		for ( r = 0; r < n_repeats; ++r )
155 		{
156 			bli_copym( &c_save, &c );
157 
158 			dtime = bli_clock();
159 
160 
161 #ifdef PRINT
162 /*
163 			obj_t ar, ai;
164 			bli_obj_alias_to( &a, &ar );
165 			bli_obj_alias_to( &a, &ai );
166 			bli_obj_set_dt( BLIS_DOUBLE, &ar ); ar.rs *= 2; ar.cs *= 2;
167 			bli_obj_set_dt( BLIS_DOUBLE, &ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
168 
169 			bli_printm( "ar", &ar, "%4.1f", "" );
170 			bli_printm( "ai", &ai, "%4.1f", "" );
171 */
172 
173 			bli_invertd( &a );
174 			bli_printm( "a", &a, "%4.1f", "" );
175 			bli_invertd( &a );
176 			bli_printm( "c", &c, "%4.1f", "" );
177 #endif
178 
179 #ifdef BLIS
180 			//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
181 
182 			bli_trsm( side,
183 			//bli_trsm4m( side,
184 			//bli_trsm3m( side,
185 			          &alpha,
186 			          &a,
187 			          &c );
188 #else
189 
190 		if ( bli_is_real( dt_a ) )
191 		{
192 			f77_char side   = 'L';
193 			f77_char uplo   = 'L';
194 			f77_char transa = 'N';
195 			f77_char diag   = 'N';
196 			f77_int  mm     = bli_obj_length( &c );
197 			f77_int  nn     = bli_obj_width( &c );
198 			f77_int  lda    = bli_obj_col_stride( &a );
199 			f77_int  ldc    = bli_obj_col_stride( &c );
200 			float *  alphap = bli_obj_buffer( &alpha );
201 			float *  ap     = bli_obj_buffer( &a );
202 			float *  cp     = bli_obj_buffer( &c );
203 
204 			strsm_( &side,
205 			        &uplo,
206 			        &transa,
207 			        &diag,
208 			        &mm,
209 			        &nn,
210 			        alphap,
211 			        ap, &lda,
212 			        cp, &ldc );
213 		}
214 		else // if ( bli_is_complex( dt_a ) )
215 		{
216 			f77_char  side   = 'L';
217 			f77_char  uplo   = 'L';
218 			f77_char  transa = 'N';
219 			f77_char  diag   = 'N';
220 			f77_int   mm     = bli_obj_length( &c );
221 			f77_int   nn     = bli_obj_width( &c );
222 			f77_int   lda    = bli_obj_col_stride( &a );
223 			f77_int   ldc    = bli_obj_col_stride( &c );
224 			scomplex* alphap = bli_obj_buffer( &alpha );
225 			scomplex* ap     = bli_obj_buffer( &a );
226 			scomplex* cp     = bli_obj_buffer( &c );
227 
228 			ctrsm_( &side,
229 			//ztrsm_( &side,
230 			        &uplo,
231 			        &transa,
232 			        &diag,
233 			        &mm,
234 			        &nn,
235 			        alphap,
236 			        ap, &lda,
237 			        cp, &ldc );
238 		}
239 
240 #endif
241 
242 #ifdef PRINT
243 			bli_printm( "c after", &c, "%4.1f", "" );
244 			exit(1);
245 #endif
246 
247 
248 			dtime_save = bli_clock_min_diff( dtime_save, dtime );
249 		}
250 
251 		if ( bli_is_left( side ) )
252 			gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 );
253 		else
254 			gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 );
255 
256 		if ( bli_is_complex( dt_a ) ) gflops *= 4.0;
257 
258 #ifdef BLIS
259 		printf( "data_trsm_blis" );
260 #else
261 		printf( "data_trsm_%s", BLAS );
262 #endif
263 		printf( "( %2lu, 1:4 ) = [ %4lu %4lu  %10.3e  %6.3f ];\n",
264 		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
265 		        ( unsigned long )m,
266 		        ( unsigned long )n, dtime_save, gflops );
267 
268 
269 		bli_obj_free( &alpha );
270 		bli_obj_free( &beta );
271 
272 		bli_obj_free( &a );
273 		bli_obj_free( &b );
274 		bli_obj_free( &c );
275 		bli_obj_free( &c_save );
276 	}
277 
278 	bli_finalize();
279 
280 	return 0;
281 }
282 
283