1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 #include "FLAME.h"
11 
12 
13 
14 #define N_VARIANTS 4
15 
16 #define FLA_ALG_REFERENCE  0
17 #define FLA_ALG_UNBLOCKED  1
18 #define FLA_ALG_BLOCKED    2
19 #define FLA_ALG_OPTIMIZED  3
20 
21 
22 void time_Trsm_lut(
23                int variant, int type, int n_repeats, int n, int nb_alg,
24                FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,
25                double *dtime, double *diff, double *gflops );
26 
27 
main(int argc,char * argv[])28 int main(int argc, char *argv[])
29 {
30   int
31     m_input, n_input,
32     m, n,
33     p_first, p_last, p_inc,
34     p,
35     nb_alg,
36     n_repeats,
37     variant,
38     i, j,
39     datatype,
40     n_variants = N_VARIANTS;
41 
42   char *colors = "brkgmcbrkg";
43   char *ticks  = "o+*xso+*xs";
44   char m_dim_desc[14];
45   char n_dim_desc[14];
46   char m_dim_tag[10];
47   char n_dim_tag[10];
48 
49   double max_gflops=6.0;
50 
51   double
52     d_n,
53     dtime,
54     gflops,
55     diff;
56 
57   FLA_Obj
58     A, B, C, C_ref;
59 
60   /* Initialize FLAME */
61   FLA_Init( );
62 
63 
64   fprintf( stdout, "%c number of repeats: ", '%' );
65   scanf( "%d", &n_repeats );
66   fprintf( stdout, "%c %d\n", '%', n_repeats );
67 
68   fprintf( stdout, "%c Enter blocking size: ", '%' );
69   scanf( "%d", &nb_alg );
70   fprintf( stdout, "%c %d\n", '%', nb_alg );
71 
72   fprintf( stdout, "%c enter problem size first, last, inc: ", '%' );
73   scanf( "%d%d%d", &p_first, &p_last, &p_inc );
74   fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc );
75 
76   fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' );
77   scanf( "%d%d", &m_input, &n_input );
78   fprintf( stdout, "%c %d %d\n", '%', m_input, n_input );
79 
80 
81   /* Delete all existing data structures */
82   fprintf( stdout, "\nclear all;\n\n" );
83 
84 
85   if     ( m_input >  0 ) {
86     sprintf( m_dim_desc, "m = %d", m_input );
87     sprintf( m_dim_tag,  "m%dc", m_input);
88   }
89   else if( m_input <  -1 ) {
90     sprintf( m_dim_desc, "m = p/%d", -m_input );
91     sprintf( m_dim_tag,  "m%dp", -m_input );
92   }
93   else if( m_input == -1 ) {
94     sprintf( m_dim_desc, "m = p" );
95     sprintf( m_dim_tag,  "m%dp", 1 );
96   }
97   if     ( n_input >  0 ) {
98     sprintf( n_dim_desc, "n = %d", n_input );
99     sprintf( n_dim_tag,  "n%dc", n_input);
100   }
101   else if( n_input <  -1 ) {
102     sprintf( n_dim_desc, "n = p/%d", -n_input );
103     sprintf( n_dim_tag,  "n%dp", -n_input );
104   }
105   else if( n_input == -1 ) {
106     sprintf( n_dim_desc, "n = p" );
107     sprintf( n_dim_tag,  "n%dp", 1 );
108   }
109 
110 
111 
112   for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )
113   {
114 
115     m = m_input;
116     n = n_input;
117 
118     if( m < 0 ) m = p / f2c_abs(m_input);
119     if( n < 0 ) n = p / f2c_abs(n_input);
120 
121     //datatype = FLA_FLOAT;
122     datatype = FLA_DOUBLE;
123     //datatype = FLA_COMPLEX;
124     //datatype = FLA_DOUBLE_COMPLEX;
125 
126     /* Allocate space for the matrices */
127     FLA_Obj_create( datatype, m, m, &A );
128     FLA_Obj_create( datatype, m, n, &C );
129     FLA_Obj_create( datatype, m, n, &C_ref );
130 
131     /* Generate random matrices A, C */
132     FLA_Random_tri_matrix( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, A );
133     FLA_Random_matrix( C );
134 
135 
136     /* Time the reference implementation */
137     time_Trsm_lut( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg,
138                   A, B, C, C_ref, &dtime, &diff, &gflops );
139 
140     fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d  %6.3lf ]; \n", i, p, gflops );
141     fflush( stdout );
142 
143     for ( variant = 1; variant <= n_variants; variant++ ){
144 
145       //fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d  ", variant, i, p );
146       fprintf( stdout, "data_var%d( %d, 1:5 ) = [ %d  ", variant, i, p );
147       fflush( stdout );
148 
149 
150       time_Trsm_lut( variant, FLA_ALG_UNBLOCKED, n_repeats, p, nb_alg,
151                     A, B, C, C_ref, &dtime, &diff, &gflops );
152 
153       fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
154       fflush( stdout );
155 
156       time_Trsm_lut( variant, FLA_ALG_BLOCKED, n_repeats, p, nb_alg,
157                     A, B, C, C_ref, &dtime, &diff, &gflops );
158 
159       fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
160       fflush( stdout );
161 
162       //time_Trsm_lut( variant, FLA_ALG_OPTIMIZED, n_repeats, p, nb_alg,
163       //              A, B, C, C_ref, &dtime, &diff, &gflops );
164 
165       //fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
166       //fflush( stdout );
167 
168 
169       fprintf( stdout, " ]; \n" );
170       fflush( stdout );
171     }
172     fprintf( stdout, "\n" );
173 
174     FLA_Obj_free( &A );
175     FLA_Obj_free( &C );
176     FLA_Obj_free( &C_ref );
177   }
178 
179   /* Print the MATLAB commands to plot the data */
180 
181   /* Delete all existing figures */
182   fprintf( stdout, "figure;\n" );
183 
184   /* Plot the performance of the reference implementation */
185   fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); \n" );
186 
187   /* Indicate that you want to add to the existing plot */
188   fprintf( stdout, "hold on;\n" );
189 
190   /* Plot the data for the other numbers of threads */
191   for ( i = 1; i <= n_variants; i++ ) {
192     fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); \n",
193             i, i, colors[ i-1 ], ticks[ i-1 ] );
194     fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 4 ), '%c-.%c' ); \n",
195             i, i, colors[ i-1 ], ticks[ i-1 ] );
196     //fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 6 ), '%c--%c' ); \n",
197     //        i, i, colors[ i-1 ], ticks[ i-1 ] );
198   }
199 
200   fprintf( stdout, "legend( ... \n" );
201   fprintf( stdout, "'Reference', ... \n" );
202 
203   for ( i = 1; i < n_variants; i++ )
204     //fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d', 'opt\\_var%d', ... \n", i, i, i );
205     fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d', ... \n", i, i );
206 
207   i = n_variants;
208   fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d' ); \n", i, i );
209 
210 
211   fprintf( stdout, "xlabel( 'problem size p' );\n" );
212   fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
213   fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
214   fprintf( stdout, "title( 'FLAME trsm\\_lut performance (%s, %s)' );\n",
215            m_dim_desc, n_dim_desc );
216   fprintf( stdout, "print -depsc trsm_lut_%s_%s.eps\n", m_dim_tag, n_dim_tag );
217   fprintf( stdout, "hold off;\n");
218   fflush( stdout );
219 
220   FLA_Finalize( );
221 }
222 
223