1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10 #include "FLAME.h"
11
12
13
14 #define N_VARIANTS 4
15
16 #define FLA_ALG_REFERENCE 0
17 #define FLA_ALG_UNBLOCKED 1
18 #define FLA_ALG_BLOCKED 2
19 #define FLA_ALG_OPTIMIZED 3
20
21
22 void time_Trsm_lut(
23 int variant, int type, int n_repeats, int n, int nb_alg,
24 FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,
25 double *dtime, double *diff, double *gflops );
26
27
main(int argc,char * argv[])28 int main(int argc, char *argv[])
29 {
30 int
31 m_input, n_input,
32 m, n,
33 p_first, p_last, p_inc,
34 p,
35 nb_alg,
36 n_repeats,
37 variant,
38 i, j,
39 datatype,
40 n_variants = N_VARIANTS;
41
42 char *colors = "brkgmcbrkg";
43 char *ticks = "o+*xso+*xs";
44 char m_dim_desc[14];
45 char n_dim_desc[14];
46 char m_dim_tag[10];
47 char n_dim_tag[10];
48
49 double max_gflops=6.0;
50
51 double
52 d_n,
53 dtime,
54 gflops,
55 diff;
56
57 FLA_Obj
58 A, B, C, C_ref;
59
60 /* Initialize FLAME */
61 FLA_Init( );
62
63
64 fprintf( stdout, "%c number of repeats: ", '%' );
65 scanf( "%d", &n_repeats );
66 fprintf( stdout, "%c %d\n", '%', n_repeats );
67
68 fprintf( stdout, "%c Enter blocking size: ", '%' );
69 scanf( "%d", &nb_alg );
70 fprintf( stdout, "%c %d\n", '%', nb_alg );
71
72 fprintf( stdout, "%c enter problem size first, last, inc: ", '%' );
73 scanf( "%d%d%d", &p_first, &p_last, &p_inc );
74 fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc );
75
76 fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' );
77 scanf( "%d%d", &m_input, &n_input );
78 fprintf( stdout, "%c %d %d\n", '%', m_input, n_input );
79
80
81 /* Delete all existing data structures */
82 fprintf( stdout, "\nclear all;\n\n" );
83
84
85 if ( m_input > 0 ) {
86 sprintf( m_dim_desc, "m = %d", m_input );
87 sprintf( m_dim_tag, "m%dc", m_input);
88 }
89 else if( m_input < -1 ) {
90 sprintf( m_dim_desc, "m = p/%d", -m_input );
91 sprintf( m_dim_tag, "m%dp", -m_input );
92 }
93 else if( m_input == -1 ) {
94 sprintf( m_dim_desc, "m = p" );
95 sprintf( m_dim_tag, "m%dp", 1 );
96 }
97 if ( n_input > 0 ) {
98 sprintf( n_dim_desc, "n = %d", n_input );
99 sprintf( n_dim_tag, "n%dc", n_input);
100 }
101 else if( n_input < -1 ) {
102 sprintf( n_dim_desc, "n = p/%d", -n_input );
103 sprintf( n_dim_tag, "n%dp", -n_input );
104 }
105 else if( n_input == -1 ) {
106 sprintf( n_dim_desc, "n = p" );
107 sprintf( n_dim_tag, "n%dp", 1 );
108 }
109
110
111
112 for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )
113 {
114
115 m = m_input;
116 n = n_input;
117
118 if( m < 0 ) m = p / f2c_abs(m_input);
119 if( n < 0 ) n = p / f2c_abs(n_input);
120
121 //datatype = FLA_FLOAT;
122 datatype = FLA_DOUBLE;
123 //datatype = FLA_COMPLEX;
124 //datatype = FLA_DOUBLE_COMPLEX;
125
126 /* Allocate space for the matrices */
127 FLA_Obj_create( datatype, m, m, &A );
128 FLA_Obj_create( datatype, m, n, &C );
129 FLA_Obj_create( datatype, m, n, &C_ref );
130
131 /* Generate random matrices A, C */
132 FLA_Random_tri_matrix( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, A );
133 FLA_Random_matrix( C );
134
135
136 /* Time the reference implementation */
137 time_Trsm_lut( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg,
138 A, B, C, C_ref, &dtime, &diff, &gflops );
139
140 fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; \n", i, p, gflops );
141 fflush( stdout );
142
143 for ( variant = 1; variant <= n_variants; variant++ ){
144
145 //fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d ", variant, i, p );
146 fprintf( stdout, "data_var%d( %d, 1:5 ) = [ %d ", variant, i, p );
147 fflush( stdout );
148
149
150 time_Trsm_lut( variant, FLA_ALG_UNBLOCKED, n_repeats, p, nb_alg,
151 A, B, C, C_ref, &dtime, &diff, &gflops );
152
153 fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
154 fflush( stdout );
155
156 time_Trsm_lut( variant, FLA_ALG_BLOCKED, n_repeats, p, nb_alg,
157 A, B, C, C_ref, &dtime, &diff, &gflops );
158
159 fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
160 fflush( stdout );
161
162 //time_Trsm_lut( variant, FLA_ALG_OPTIMIZED, n_repeats, p, nb_alg,
163 // A, B, C, C_ref, &dtime, &diff, &gflops );
164
165 //fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
166 //fflush( stdout );
167
168
169 fprintf( stdout, " ]; \n" );
170 fflush( stdout );
171 }
172 fprintf( stdout, "\n" );
173
174 FLA_Obj_free( &A );
175 FLA_Obj_free( &C );
176 FLA_Obj_free( &C_ref );
177 }
178
179 /* Print the MATLAB commands to plot the data */
180
181 /* Delete all existing figures */
182 fprintf( stdout, "figure;\n" );
183
184 /* Plot the performance of the reference implementation */
185 fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); \n" );
186
187 /* Indicate that you want to add to the existing plot */
188 fprintf( stdout, "hold on;\n" );
189
190 /* Plot the data for the other numbers of threads */
191 for ( i = 1; i <= n_variants; i++ ) {
192 fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); \n",
193 i, i, colors[ i-1 ], ticks[ i-1 ] );
194 fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 4 ), '%c-.%c' ); \n",
195 i, i, colors[ i-1 ], ticks[ i-1 ] );
196 //fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 6 ), '%c--%c' ); \n",
197 // i, i, colors[ i-1 ], ticks[ i-1 ] );
198 }
199
200 fprintf( stdout, "legend( ... \n" );
201 fprintf( stdout, "'Reference', ... \n" );
202
203 for ( i = 1; i < n_variants; i++ )
204 //fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d', 'opt\\_var%d', ... \n", i, i, i );
205 fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d', ... \n", i, i );
206
207 i = n_variants;
208 fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d' ); \n", i, i );
209
210
211 fprintf( stdout, "xlabel( 'problem size p' );\n" );
212 fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
213 fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
214 fprintf( stdout, "title( 'FLAME trsm\\_lut performance (%s, %s)' );\n",
215 m_dim_desc, n_dim_desc );
216 fprintf( stdout, "print -depsc trsm_lut_%s_%s.eps\n", m_dim_tag, n_dim_tag );
217 fprintf( stdout, "hold off;\n");
218 fflush( stdout );
219
220 FLA_Finalize( );
221 }
222
223