1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "FLAME.h"
12 
13 #define FLA_ALG_REFERENCE     0
14 #define FLA_ALG_UNBLOCKED     1
15 #define FLA_ALG_UNB_OPT       2
16 #define FLA_ALG_UNB_ASM       3
17 #define FLA_ALG_BLOCKED       4
18 
19 void fill_cs( FLA_Obj G );
20 void fill_a( int ij, FLA_Obj A );
21 
22 void time_Apply_G_rf(
23                int variant, int type, int n_repeats, int m, int k, int n, int b_alg,
24                FLA_Obj A, FLA_Obj A_ref, FLA_Obj G, FLA_Obj P,
25                double *dtime, double *diff, double *gflops );
26 
27 
main(int argc,char * argv[])28 int main(int argc, char *argv[])
29 {
30   int
31     m_input, k_input, n_input,
32     m, k, n,
33     p_first, p_last, p_inc,
34     p,
35     b_alg,
36     variant,
37     n_repeats,
38     i,
39     datatype, dt_real, dt_comp,
40     n_variants = 9;
41 
42   char *colors = "brkgmcbrkg";
43   char *ticks  = "o+*xso+*xs";
44   char m_dim_desc[14];
45   char m_dim_tag[10];
46   char k_dim_desc[14];
47   char k_dim_tag[10];
48 
49   double max_gflops=6.0;
50 
51   double
52     dtime,
53     gflops,
54     diff;
55 
56   FLA_Obj
57     A, A_ref, G, P;
58 
59 
60   FLA_Init();
61 
62 
63   fprintf( stdout, "%c number of repeats:", '%' );
64   scanf( "%d", &n_repeats );
65   fprintf( stdout, "%c %d\n", '%', n_repeats );
66 
67   fprintf( stdout, "%c Enter blocking size:", '%' );
68   scanf( "%d", &b_alg );
69   fprintf( stdout, "%c %d\n", '%', b_alg );
70 
71   fprintf( stdout, "%c enter problem size first, last, inc:", '%' );
72   scanf( "%d%d%d", &p_first, &p_last, &p_inc );
73   fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc );
74 
75   fprintf( stdout, "%c enter m n k (-1 means bind to problem size): ", '%' );
76   scanf( "%d %d %d", &m_input, &n_input, &k_input );
77   fprintf( stdout, "%c %d %d %d\n", '%', m_input, n_input, k_input );
78 
79 
80   fprintf( stdout, "\n" );
81 
82 
83   if     ( m_input >  0 ) {
84     sprintf( m_dim_desc, "m = %d", m_input );
85     sprintf( m_dim_tag,  "m%dc", m_input);
86   }
87   else if( m_input <  -1 ) {
88     sprintf( m_dim_desc, "m = p/%d", -m_input );
89     sprintf( m_dim_tag,  "m%dp", -m_input );
90   }
91   else if( m_input == -1 ) {
92     sprintf( m_dim_desc, "m = p" );
93     sprintf( m_dim_tag,  "m%dp", 1 );
94   }
95   if     ( k_input >  0 ) {
96     sprintf( k_dim_desc, "k = %d", k_input );
97     sprintf( k_dim_tag,  "k%dc", k_input);
98   }
99   else if( k_input <  -1 ) {
100     sprintf( k_dim_desc, "k = p/%d", -k_input );
101     sprintf( k_dim_tag,  "k%dp", -k_input );
102   }
103   else if( k_input == -1 ) {
104     sprintf( k_dim_desc, "k = p" );
105     sprintf( k_dim_tag,  "k%dp", 1 );
106   }
107 
108 
109   for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )
110   {
111 
112     m = m_input;
113     k = k_input;
114     n = n_input;
115 
116     if( m < 0 ) m = p / f2c_abs(m_input);
117     if( k < 0 ) k = p / f2c_abs(k_input);
118     if( n < 0 ) n = p / f2c_abs(n_input);
119 
120     //datatype = FLA_FLOAT;
121     //datatype = FLA_DOUBLE;
122     //datatype = FLA_COMPLEX;
123     datatype = FLA_DOUBLE_COMPLEX;
124 
125 
126     FLA_Obj_create( datatype, m,   n, 0, 0, &A );
127     FLA_Obj_create( datatype, m,   n, 0, 0, &A_ref );
128 
129 	if ( FLA_Obj_is_double_precision( A ) ) dt_comp = FLA_DOUBLE_COMPLEX;
130 	else                                    dt_comp = FLA_COMPLEX;
131 
132     FLA_Obj_create( dt_comp, n-1, k, 0, 0, &G );
133     FLA_Obj_create( dt_comp, n-1, k, 0, 0, &P );
134 
135     FLA_Random_matrix( A );
136     //FLA_Set_to_identity( A );
137     //FLA_Set( FLA_ZERO, A );
138     //FLA_Set_diag( FLA_TWO, A );
139     //FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A );
140     //FLA_Random_tri_matrix( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, A );
141     FLA_Random_matrix( G );
142     //fill_cs( G );
143 
144 /*
145 {
146   FLA_Obj GTL, GTR;
147   FLA_Obj GBL, GBR;
148 
149   FLA_Part_2x2( G,   &GTL, &GTR,
150                      &GBL, &GBR,     6, 1, FLA_TL );
151   FLA_Obj_show( "GTL", GTL, "%9.2e %9.2e ", "" );
152 }
153 */
154 
155 /*
156     time_Apply_G_rf( 0, FLA_ALG_REFERENCE, n_repeats, m, nb_alg,
157                A, t, &dtime, &diff, &gflops );
158 
159     fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d  %6.3lf %6.2le ]; \n", i, p, gflops, diff );
160     fflush( stdout );
161 */
162 
163     for ( variant = 1; variant <= n_variants; variant++ ){
164 
165       fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d ", variant, i, p );
166       fflush( stdout );
167 
168       time_Apply_G_rf( variant, FLA_ALG_UNB_OPT, n_repeats, m, k, n, b_alg,
169                        A, A_ref, G, P, &dtime, &diff, &gflops );
170 
171       fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
172       fflush( stdout );
173 
174       time_Apply_G_rf( variant, FLA_ALG_UNB_ASM, n_repeats, m, k, n, b_alg,
175                        A, A_ref, G, P, &dtime, &diff, &gflops );
176 
177       fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
178       fflush( stdout );
179 
180       time_Apply_G_rf( variant, FLA_ALG_BLOCKED, n_repeats, m, k, n, b_alg,
181                        A, A_ref, G, P, &dtime, &diff, &gflops );
182 
183       fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
184       fflush( stdout );
185 
186       fprintf( stdout, "];\n" );
187       fflush( stdout );
188     }
189 
190     fprintf( stdout, "\n" );
191 
192     FLA_Obj_free( &A );
193     FLA_Obj_free( &A_ref );
194     FLA_Obj_free( &G );
195     FLA_Obj_free( &P );
196   }
197 
198 /*
199   fprintf( stdout, "figure;\n" );
200 
201   fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); \n" );
202 
203   fprintf( stdout, "hold on;\n" );
204 
205   for ( i = 1; i <= n_variants; i++ ) {
206     fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); \n",
207             i, i, colors[ i-1 ], ticks[ i-1 ] );
208     fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 4 ), '%c-.%c' ); \n",
209             i, i, colors[ i-1 ], ticks[ i-1 ] );
210   }
211 
212   fprintf( stdout, "legend( ... \n" );
213   fprintf( stdout, "'Reference', ... \n" );
214 
215   for ( i = 1; i < n_variants; i++ )
216     fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d', ... \n", i, i );
217   fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d' ); \n", i, i );
218 
219   fprintf( stdout, "xlabel( 'problem size p' );\n" );
220   fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
221   fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
222   fprintf( stdout, "title( 'FLAME Apply_G performance (%s, %s)' );\n",
223            m_dim_desc, n_dim_desc );
224   fprintf( stdout, "print -depsc tridiag_%s_%s.eps\n", m_dim_tag, n_dim_tag );
225   fprintf( stdout, "hold off;\n");
226   fflush( stdout );
227 */
228 
229   FLA_Finalize( );
230 
231   return 0;
232 }
233 
fill_cs(FLA_Obj G)234 void fill_cs( FLA_Obj G )
235 {
236   FLA_Obj GL,    GR,       G0,  g1,  G2;
237 
238   FLA_Obj g1T,
239           g1B;
240 
241   FLA_Part_1x2( G,    &GL,  &GR,      0, FLA_LEFT );
242 
243   while ( FLA_Obj_width( GL ) < FLA_Obj_width( G ) ){
244 
245     FLA_Repart_1x2_to_1x3( GL,  /**/ GR,        &G0, /**/ &g1, &G2,
246                            1, FLA_RIGHT );
247 
248     /*------------------------------------------------------------*/
249 
250     FLA_Part_2x1( g1,  &g1T,
251                        &g1B,    FLA_Obj_width( G0 ), FLA_TOP );
252     FLA_Set( FLA_ONE,  g1T );
253 
254     FLA_Part_2x1( g1,  &g1T,
255                        &g1B,    FLA_Obj_width( G0 ), FLA_BOTTOM );
256 //printf( "n(G0) = %d\n", FLA_Obj_width( G0 ) );
257 //printf( "m(g1B) = %d\n", FLA_Obj_length( g1B ) );
258     FLA_Set( FLA_ONE,  g1B );
259 //if ( FLA_Obj_length( g1B ) == 8 ) FLA_Obj_show( "g1", g1, "%9.2e + %9.2e ", "" );
260 
261     /*------------------------------------------------------------*/
262 
263     FLA_Cont_with_1x3_to_1x2( &GL,  /**/ &GR,        G0, g1, /**/ G2,
264                               FLA_LEFT );
265   }
266 
267 }
268 
269