1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "FLAME.h"
12 
13 
14 #define N_PARAM_COMBOS    4
15 
16 #define FLA_ALG_REFERENCE 0
17 #define FLA_ALG_FRONT     1
18 
19 char* pc_str[N_PARAM_COMBOS] = { "ll", "lu",
20                                  "rl", "ru" };
21 
22 void time_Symm(
23                int param_combo, int type, int n_repeats, int m, int n,
24                FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,
25                double *dtime, double *diff, double *gflops );
26 
27 
main(int argc,char * argv[])28 int main(int argc, char *argv[])
29 {
30   int
31     datatype,
32     m_input, n_input,
33     m, n,
34     p_first, p_last, p_inc,
35     p,
36     n_repeats,
37     param_combo,
38     i,
39     n_param_combos = N_PARAM_COMBOS;
40 
41   dim_t nb_alg, n_threads;
42 
43   char *colors = "brkgmcbrkgmcbrkgmc";
44   char *ticks  = "o+*xso+*xso+*xso+*xs";
45   char m_dim_desc[14];
46   char n_dim_desc[14];
47   char m_dim_tag[10];
48   char n_dim_tag[10];
49 
50   double max_gflops=6.0;
51 
52   double
53     dtime,
54     gflops,
55     diff;
56 
57   FLA_Obj
58     A, B, C, C_ref;
59 
60   FLA_Init( );
61 
62 
63   fprintf( stdout, "%c number of repeats: ", '%' );
64   scanf( "%d", &n_repeats );
65   fprintf( stdout, "%c %d\n", '%', n_repeats );
66 
67   fprintf( stdout, "%c enter FLASH blocksize: ", '%' );
68   scanf( "%u", &nb_alg );
69   fprintf( stdout, "%c %u\n", '%', nb_alg );
70 
71   fprintf( stdout, "%c enter problem size first, last, inc: ", '%' );
72   scanf( "%d%d%d", &p_first, &p_last, &p_inc );
73   fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc );
74 
75   fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' );
76   scanf( "%d%d", &m_input, &n_input );
77   fprintf( stdout, "%c %d %d\n", '%', m_input, n_input );
78 
79   fprintf( stdout, "%c enter the number of SuperMatrix threads: ", '%' );
80   scanf( "%u", &n_threads );
81   fprintf( stdout, "%c %u\n", '%', n_threads );
82 
83 
84   fprintf( stdout, "\nclear all;\n\n" );
85 
86 
87   if     ( m_input >  0 ) {
88     sprintf( m_dim_desc, "m = %d", m_input );
89     sprintf( m_dim_tag,  "m%dc", m_input);
90   }
91   else if( m_input <  -1 ) {
92     sprintf( m_dim_desc, "m = p/%d", -m_input );
93     sprintf( m_dim_tag,  "m%dp", -m_input );
94   }
95   else if( m_input == -1 ) {
96     sprintf( m_dim_desc, "m = p" );
97     sprintf( m_dim_tag,  "m%dp", 1 );
98   }
99   if     ( n_input >  0 ) {
100     sprintf( n_dim_desc, "n = %d", n_input );
101     sprintf( n_dim_tag,  "n%dc", n_input);
102   }
103   else if( n_input <  -1 ) {
104     sprintf( n_dim_desc, "n = p/%d", -n_input );
105     sprintf( n_dim_tag,  "n%dp", -n_input );
106   }
107   else if( n_input == -1 ) {
108     sprintf( n_dim_desc, "n = p" );
109     sprintf( n_dim_tag,  "n%dp", 1 );
110   }
111 
112   //datatype = FLA_FLOAT;
113   //datatype = FLA_DOUBLE;
114   //datatype = FLA_COMPLEX;
115   datatype = FLA_DOUBLE_COMPLEX;
116 
117   FLASH_Queue_set_num_threads( n_threads );
118 
119   for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )
120   {
121 
122     m = m_input;
123     n = n_input;
124 
125     if( m < 0 ) m = p / f2c_abs(m_input);
126     if( n < 0 ) n = p / f2c_abs(n_input);
127 
128     for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){
129 
130       // If multiplying A on the left, A is m x m; ...on the right, A is n x n.
131       if ( pc_str[param_combo][0] == 'l' )
132         FLASH_Obj_create( datatype, m, m, 1, &nb_alg, &A );
133       else
134         FLASH_Obj_create( datatype, n, n, 1, &nb_alg, &A );
135 
136       FLASH_Obj_create( datatype, m, n, 1, &nb_alg, &B );
137       FLASH_Obj_create( datatype, m, n, 1, &nb_alg, &C );
138       FLASH_Obj_create( datatype, m, n, 1, &nb_alg, &C_ref );
139 
140       FLASH_Random_matrix( A );
141       FLASH_Random_matrix( B );
142       FLASH_Random_matrix( C );
143 
144       FLASH_Copy( C, C_ref );
145 
146       fprintf( stdout, "data_symm_%s( %d, 1:5 ) = [ %d  ", pc_str[param_combo], i, p );
147       fflush( stdout );
148 
149       time_Symm( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n,
150                  A, B, C, C_ref, &dtime, &diff, &gflops );
151 
152       fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
153       fflush( stdout );
154 
155       time_Symm( param_combo, FLA_ALG_FRONT, n_repeats, m, n,
156                  A, B, C, C_ref, &dtime, &diff, &gflops );
157 
158       fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
159       fflush( stdout );
160 
161 
162       fprintf( stdout, " ]; \n" );
163       fflush( stdout );
164 
165       FLASH_Obj_free( &A );
166       FLASH_Obj_free( &B );
167       FLASH_Obj_free( &C );
168       FLASH_Obj_free( &C_ref );
169     }
170 
171     fprintf( stdout, "\n" );
172   }
173 
174 /*
175   fprintf( stdout, "figure;\n" );
176 
177   fprintf( stdout, "hold on;\n" );
178 
179   for ( i = 0; i < n_param_combos; i++ ) {
180     fprintf( stdout, "plot( data_symm_%s( :,1 ), data_symm_%s( :, 2 ), '%c:%c' ); \n",
181             pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
182     fprintf( stdout, "plot( data_symm_%s( :,1 ), data_symm_%s( :, 4 ), '%c-.%c' ); \n",
183             pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );
184   }
185 
186   fprintf( stdout, "legend( ... \n" );
187 
188   for ( i = 0; i < n_param_combos; i++ )
189     fprintf( stdout, "'ref\\_symm\\_%s', 'fla\\_symm\\_%s', ... \n", pc_str[i], pc_str[i] );
190 
191   fprintf( stdout, "'Location', 'SouthEast' ); \n" );
192 
193 
194   fprintf( stdout, "xlabel( 'problem size p' );\n" );
195   fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
196   fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
197   fprintf( stdout, "title( 'FLAME symm front-end performance (%s, %s)' );\n",
198            m_dim_desc, n_dim_desc );
199   fprintf( stdout, "print -depsc symm_front_%s_%s.eps\n", m_dim_tag, n_dim_tag );
200   fprintf( stdout, "hold off;\n");
201   fflush( stdout );
202 */
203 
204   FLA_Finalize( );
205 
206   return 0;
207 }
208 
209