1 /*
2 
3    BLIS
4    An object-based framework for developing high-performance BLAS-like
5    libraries.
6 
7    Copyright (C) 2014, The University of Texas at Austin
8 
9    Redistribution and use in source and binary forms, with or without
10    modification, are permitted provided that the following conditions are
11    met:
12     - Redistributions of source code must retain the above copyright
13       notice, this list of conditions and the following disclaimer.
14     - Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17     - Neither the name(s) of the copyright holder(s) nor the names of its
18       contributors may be used to endorse or promote products derived
19       from this software without specific prior written permission.
20 
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 */
34 
35 #include <unistd.h>
36 #include "blis.h"
37 #include <mpi.h>
38 
39 //           uploa  transa m     k     alpha    a        lda   beta     c        ldc
40 //void dsyrk_( char*, char*, int*, int*, double*, double*, int*, double*, double*, int* );
41 
42 //#define PRINT
43 
main(int argc,char ** argv)44 int main( int argc, char** argv )
45 {
46 	obj_t a, c;
47 	obj_t c_save;
48 	obj_t alpha, beta;
49 	dim_t m, k;
50 	dim_t p;
51 	dim_t p_begin, p_end, p_inc;
52 	int   m_input, k_input;
53 	num_t dt_a, dt_c;
54 	num_t dt_alpha, dt_beta;
55 	int   r, n_repeats;
56 	uplo_t uplo;
57 
58 	double dtime;
59 	double dtime_save;
60 	double gflops;
61 
62 	bli_init();
63 
64 	n_repeats = 3;
65 
66     if( argc < 7 )
67     {
68         printf("Usage:\n");
69         printf("test_foo.x m n k p_begin p_inc p_end:\n");
70         exit;
71     }
72 
73     int world_size, world_rank, provided;
74     MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
75     MPI_Comm_size( MPI_COMM_WORLD, &world_size );
76     MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );
77 
78     m_input = strtol( argv[1], NULL, 10 );
79     k_input = strtol( argv[3], NULL, 10 );
80     p_begin = strtol( argv[4], NULL, 10 );
81     p_inc   = strtol( argv[5], NULL, 10 );
82     p_end   = strtol( argv[6], NULL, 10 );
83 
84 	dt_a = BLIS_DOUBLE;
85 	dt_c = BLIS_DOUBLE;
86 	dt_alpha = BLIS_DOUBLE;
87 	dt_beta = BLIS_DOUBLE;
88 
89 	uplo = BLIS_LOWER;
90 
91     for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
92 	{
93 
94 		if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
95 		else               m =     ( dim_t )    m_input;
96 		if ( k_input < 0 ) k = p * ( dim_t )abs(k_input);
97 		else               k =     ( dim_t )    k_input;
98 
99 
100 		bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
101 		bli_obj_create( dt_beta,  1, 1, 0, 0, &beta );
102 
103 		bli_obj_create( dt_a, m, k, 0, 0, &a );
104 		bli_obj_create( dt_c, m, m, 0, 0, &c );
105 		bli_obj_create( dt_c, m, m, 0, 0, &c_save );
106 
107 		bli_randm( &a );
108 		bli_randm( &c );
109 
110 		bli_obj_set_struc( BLIS_HERMITIAN, &c );
111 		bli_obj_set_uplo( uplo, &c );
112 
113 
114 		bli_setsc(  (2.0/1.0), 0.0, &alpha );
115 		bli_setsc(  (1.0/1.0), 0.0, &beta );
116 
117 
118 		bli_copym( &c, &c_save );
119 
120 		dtime_save = 1.0e9;
121 
122 		for ( r = 0; r < n_repeats; ++r )
123 		{
124 			bli_copym( &c_save, &c );
125 
126 
127 			dtime = bli_clock();
128 
129 #ifdef PRINT
130 			bli_printm( "a", &a, "%4.1f", "" );
131 			bli_printm( "c", &c, "%4.1f", "" );
132 #endif
133 
134 #ifdef BLIS
135 
136 			//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
137 
138 			bli_herk( &alpha,
139 			          &a,
140 			          &beta,
141 			          &c );
142 
143 #else
144 
145 			f77_char uploa  = 'L';
146 			f77_char transa = 'N';
147 			f77_int  mm     = bli_obj_length( &c );
148 			f77_int  kk     = bli_obj_width_after_trans( &a );
149 			f77_int  lda    = bli_obj_col_stride( &a );
150 			f77_int  ldc    = bli_obj_col_stride( &c );
151 			double*  alphap = bli_obj_buffer( &alpha );
152 			double*  ap     = bli_obj_buffer( &a );
153 			double*  betap  = bli_obj_buffer( &beta );
154 			double*  cp     = bli_obj_buffer( &c );
155 
156 			dsyrk_( &uploa,
157 			        &transa,
158 			        &mm,
159 			        &kk,
160 			        alphap,
161 			        ap, &lda,
162 			        betap,
163 			        cp, &ldc );
164 #endif
165 
166 #ifdef PRINT
167 			bli_printm( "c after", &c, "%4.1f", "" );
168 			exit(1);
169 #endif
170 
171 
172 			dtime_save = bli_clock_min_diff( dtime_save, dtime );
173 		}
174 
175 		gflops = ( 1.0 * m * k * m ) / ( dtime_save * 1.0e9 );
176 
177 #ifdef BLIS
178 		printf( "data_herk_blis" );
179 #else
180 		printf( "data_herk_%s", BLAS );
181 #endif
182 		printf( "( %2lu, 1:4 ) = [ %4lu %4lu  %10.3e  %6.3f ];\n",
183 		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
184 		        ( unsigned long )m,
185 		        ( unsigned long )k, dtime_save, gflops );
186 
187 
188 		bli_obj_free( &alpha );
189 		bli_obj_free( &beta );
190 
191 		bli_obj_free( &a );
192 		bli_obj_free( &c );
193 		bli_obj_free( &c_save );
194 	}
195 
196 	bli_finalize();
197 
198 	return 0;
199 }
200 
201