1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "FLAME.h"
12 #include "test_libflame.h"
13 
14 #define NUM_PARAM_COMBOS 1
15 #define NUM_MATRIX_ARGS  1
16 #define FIRST_VARIANT    1
17 #define LAST_VARIANT     1
18 
19 // Static variables.
20 static char* op_str                   = "LU factorization with incremental pivoting";
21 static char* flash_front_str          = "FLASH_LU_incpiv";
22 //static char* fla_front_str            = "";
23 //static char* fla_unb_var_str          = "";
24 //static char* fla_opt_var_str          = "";
25 //static char* fla_blk_var_str          = "";
26 static char* pc_str[NUM_PARAM_COMBOS] = { "" };
27 static test_thresh_t thresh           = { 1e-02, 1e-03,   // warn, pass for s
28                                           1e-10, 1e-11,   // warn, pass for d
29                                           1e-02, 1e-03,   // warn, pass for c
30                                           1e-10, 1e-11 }; // warn, pass for z
31 
32 // Local prototypes.
33 void libfla_test_lu_incpiv_experiment( test_params_t params,
34                                        unsigned int  var,
35                                        char*         sc_str,
36                                        FLA_Datatype  datatype,
37                                        unsigned int  p,
38                                        unsigned int  pci,
39                                        unsigned int  n_repeats,
40                                        signed int    impl,
41                                        double*       perf,
42                                        double*       residual );
43 void libfla_test_lu_incpiv_impl( int     impl,
44                                  FLA_Obj A,
45                                  FLA_Obj p,
46                                  FLA_Obj L );
47 
48 
libfla_test_lu_incpiv(FILE * output_stream,test_params_t params,test_op_t op)49 void libfla_test_lu_incpiv( FILE* output_stream, test_params_t params, test_op_t op )
50 {
51 	libfla_test_output_info( "--- %s ---\n", op_str );
52 	libfla_test_output_info( "\n" );
53 
54 	if ( op.flash_front == ENABLE )
55 	{
56 		//libfla_test_output_info( "%s() front-end...\n", flash_front_str );
57 		//libfla_test_output_info( "\n" );
58 		libfla_test_op_driver( flash_front_str, NULL,
59 		                       FIRST_VARIANT, LAST_VARIANT,
60 		                       NUM_PARAM_COMBOS, pc_str,
61 		                       NUM_MATRIX_ARGS,
62 		                       FLA_TEST_HIER_FRONT_END,
63 		                       params, thresh, libfla_test_lu_incpiv_experiment );
64 	}
65 
66 }
67 
68 
69 
libfla_test_lu_incpiv_experiment(test_params_t params,unsigned int var,char * sc_str,FLA_Datatype datatype,unsigned int p_cur,unsigned int pci,unsigned int n_repeats,signed int impl,double * perf,double * residual)70 void libfla_test_lu_incpiv_experiment( test_params_t params,
71                                        unsigned int  var,
72                                        char*         sc_str,
73                                        FLA_Datatype  datatype,
74                                        unsigned int  p_cur,
75                                        unsigned int  pci,
76                                        unsigned int  n_repeats,
77                                        signed int    impl,
78                                        double*       perf,
79                                        double*       residual )
80 {
81 	dim_t        b_flash    = params.b_flash;
82 	dim_t        b_alg_hier = params.b_alg_hier;
83 	double       time_min   = 1e9;
84 	double       time;
85 	unsigned int i;
86 	unsigned int m, n;
87 	signed int   m_input    = -1;
88 	signed int   n_input    = -1;
89 	FLA_Obj      A, x, b, norm;
90 	FLA_Obj      A_save;
91 	FLA_Obj      A_test, p_test, L_test, x_test, b_test;
92 
93 	// Determine the dimensions.
94 	if ( m_input < 0 ) m = p_cur / abs(m_input);
95 	else               m = p_cur;
96 	if ( n_input < 0 ) n = p_cur / abs(n_input);
97 	else               n = p_cur;
98 
99 	// Create the matrices for the current operation.
100 	libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], m, n, &A );
101 
102 	// Initialize the test matrices.
103 	FLA_Random_matrix( A );
104 
105 	// Save the original object contents in a temporary object.
106 	FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_save );
107 
108 	// Create vectors to form a linear system.
109 	FLA_Obj_create( datatype, n, 1, 0, 0, &x );
110 	FLA_Obj_create( datatype, n, 1, 0, 0, &b );
111 
112 	// Create a real scalar object to hold the norm of A.
113 	FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );
114 
115 	// Create a random right-hand side vector.
116 	FLA_Random_matrix( b );
117 
118 	// Use hierarchical matrices since we're testing the FLASH front-end.
119 	FLASH_LU_incpiv_create_hier_matrices( A, 1, &b_flash, b_alg_hier, &A_test, &p_test, &L_test );
120 	FLASH_Obj_create_hier_copy_of_flat( b, 1, &b_flash, &b_test );
121 	FLASH_Obj_create_hier_copy_of_flat( x, 1, &b_flash, &x_test );
122 
123 	// Repeat the experiment n_repeats times and record results.
124 	for ( i = 0; i < n_repeats; ++i )
125 	{
126 		FLASH_Obj_hierarchify( A_save, A_test );
127 
128 		time = FLA_Clock();
129 
130 		libfla_test_lu_incpiv_impl( impl, A_test, p_test, L_test );
131 
132 		time = FLA_Clock() - time;
133 		time_min = min( time_min, time );
134 	}
135 
136 	// Perform a linear solve with the result.
137    	FLASH_LU_incpiv_solve( A_test, p_test, L_test, b_test, x_test );
138 	FLASH_Obj_flatten( x_test, x );
139 
140 	// Free the FLASH objects.
141 	FLASH_Obj_free( &A_test );
142 	FLASH_Obj_free( &p_test );
143 	FLASH_Obj_free( &L_test );
144 	FLASH_Obj_free( &b_test );
145 	FLASH_Obj_free( &x_test );
146 
147 	// Compute the performance of the best experiment repeat.
148 	*perf = 2.0 / 3.0 * m * m * m / time_min / FLOPS_PER_UNIT_PERF;
149 	if ( FLA_Obj_is_complex( A ) ) *perf *= 4.0;
150 
151 	// Compute the residual.
152 	FLA_Gemv_external( FLA_NO_TRANSPOSE,
153 	                   FLA_ONE, A_save, x, FLA_MINUS_ONE, b );
154 	FLA_Nrm2_external( b, norm );
155 	FLA_Obj_extract_real_scalar( norm, residual );
156 
157 	// Free the supporting flat objects.
158 	FLA_Obj_free( &x );
159 	FLA_Obj_free( &b );
160 	FLA_Obj_free( &norm );
161 	FLA_Obj_free( &A_save );
162 
163 	// Free the flat test matrices.
164 	FLA_Obj_free( &A );
165 }
166 
167 
168 
libfla_test_lu_incpiv_impl(int impl,FLA_Obj A,FLA_Obj p,FLA_Obj L)169 void libfla_test_lu_incpiv_impl( int     impl,
170                                  FLA_Obj A,
171                                  FLA_Obj p,
172                                  FLA_Obj L )
173 {
174 	switch ( impl )
175 	{
176 		case FLA_TEST_HIER_FRONT_END:
177 		FLASH_LU_incpiv( A, p, L );
178 		break;
179 
180 		default:
181 		libfla_test_output_error( "Invalid implementation type.\n" );
182 	}
183 }
184 
185