1 // Copyright (C) 2010  Davis E. King (davis@dlib.net)
2 // License: Boost Software License   See LICENSE.txt for the full license.
3 
4 #include "tester.h"
5 #include <dlib/svm.h>
6 #include <dlib/rand.h>
7 #include <dlib/string.h>
8 #include <vector>
9 #include <sstream>
10 #include <ctime>
11 #include <dlib/data_io.h>
12 
13 namespace
14 {
15     using namespace test;
16     using namespace dlib;
17     using namespace std;
18     dlib::logger dlog("test.sldf");
19 
20 
21     class sldf_tester : public tester
22     {
23         /*!
24             WHAT THIS OBJECT REPRESENTS
25                 This object represents a unit test.  When it is constructed
26                 it adds itself into the testing framework.
27         !*/
28     public:
sldf_tester()29         sldf_tester (
30         ) :
31             tester (
32                 "test_sldf",       // the command line argument name for this test
33                 "Run tests on the simplify_linear_decision_function routines.", // the command line argument description
34                 0                     // the number of command line arguments for this test
35             )
36         {
37         }
38 
39         dlib::rand rnd;
40 
41 
perform_test()42         void perform_test (
43         )
44         {
45             print_spinner();
46             typedef std::map<unsigned long,double> sample_type;
47 
48             typedef matrix<double,0,1> dense_sample_type;
49 
50             typedef sparse_linear_kernel<sample_type> kernel_type;
51             typedef linear_kernel<dense_sample_type> dense_kernel_type;
52 
53 
54             svm_nu_trainer<kernel_type> linear_trainer;
55             linear_trainer.set_nu(0.2);
56             svm_nu_trainer<dense_kernel_type> dense_linear_trainer;
57             dense_linear_trainer.set_nu(0.2);
58 
59             std::vector<sample_type> samples;
60             std::vector<double> labels;
61 
62             // make an instance of a sample vector so we can use it below
63             sample_type sample;
64 
65             // Now lets go into a loop and randomly generate 300 samples.
66             double label = +1;
67             for (int i = 0; i < 300; ++i)
68             {
69                 // flip this flag
70                 label *= -1;
71 
72                 sample.clear();
73 
74                 // now make a random sparse sample with at most 10 non-zero elements
75                 for (int j = 0; j < 10; ++j)
76                 {
77                     int idx = rnd.get_random_32bit_number()%100;
78                     double value = rnd.get_random_double();
79 
80                     sample[idx] = label*value;
81                 }
82 
83                 // Also save the samples we are generating so we can let the svm_c_linear_trainer
84                 // learn from them below.
85                 samples.push_back(sample);
86                 labels.push_back(label);
87             }
88 
89 
90             {
91                 print_spinner();
92                 dlog << LINFO << " test with sparse samples ";
93                 decision_function<kernel_type> df = linear_trainer.train(samples, labels);
94 
95                 dlog << LINFO << "df.basis_vectors.size(): "<< df.basis_vectors.size();
96                 DLIB_TEST(df.basis_vectors.size() > 4);
97 
98                 dlog << LINFO << "test scores: "<< test_binary_decision_function(df, samples, labels);
99 
100                 // save the outputs of the decision function before we mess with it
101                 std::vector<double> prev_vals;
102                 for (unsigned long i = 0; i < samples.size(); ++i)
103                     prev_vals.push_back(df(samples[i]));
104 
105                 df = simplify_linear_decision_function(df);
106 
107                 dlog << LINFO << "df.basis_vectors.size(): "<< df.basis_vectors.size();
108                 DLIB_TEST(df.basis_vectors.size() == 1);
109 
110                 dlog << LINFO << "test scores: "<< test_binary_decision_function(df, samples, labels);
111 
112                 // now check that the simplified decision function still produces the same results
113                 std::vector<double> cur_vals;
114                 for (unsigned long i = 0; i < samples.size(); ++i)
115                     cur_vals.push_back(df(samples[i]));
116 
117                 const double err = max(abs(mat(cur_vals) - mat(prev_vals)));
118                 dlog << LINFO << "simplify error: "<< err;
119                 DLIB_TEST(err < 1e-13);
120 
121             }
122 
123 
124             // same as above but call simplify_linear_decision_function() two times
125             {
126                 print_spinner();
127                 dlog << LINFO << " test with sparse samples ";
128                 decision_function<kernel_type> df = linear_trainer.train(samples, labels);
129 
130                 dlog << LINFO << "df.basis_vectors.size(): "<< df.basis_vectors.size();
131                 DLIB_TEST(df.basis_vectors.size() > 4);
132 
133                 dlog << LINFO << "test scores: "<< test_binary_decision_function(df, samples, labels);
134 
135                 // save the outputs of the decision function before we mess with it
136                 std::vector<double> prev_vals;
137                 for (unsigned long i = 0; i < samples.size(); ++i)
138                     prev_vals.push_back(df(samples[i]));
139 
140                 df = simplify_linear_decision_function(df);
141                 df = simplify_linear_decision_function(df);
142 
143                 dlog << LINFO << "df.basis_vectors.size(): "<< df.basis_vectors.size();
144                 DLIB_TEST(df.basis_vectors.size() == 1);
145 
146                 dlog << LINFO << "test scores: "<< test_binary_decision_function(df, samples, labels);
147 
148                 // now check that the simplified decision function still produces the same results
149                 std::vector<double> cur_vals;
150                 for (unsigned long i = 0; i < samples.size(); ++i)
151                     cur_vals.push_back(df(samples[i]));
152 
153                 const double err = max(abs(mat(cur_vals) - mat(prev_vals)));
154                 dlog << LINFO << "simplify error: "<< err;
155                 DLIB_TEST(err < 1e-13);
156 
157             }
158 
159 
160             {
161                 print_spinner();
162                 dlog << LINFO << " test with dense samples ";
163                 std::vector<dense_sample_type> dense_samples(sparse_to_dense(samples));
164 
165                 // In addition to the rule we learned with the pegasos trainer lets also use our linear_trainer
166                 // to learn a decision rule.
167                 decision_function<dense_kernel_type> dense_df = dense_linear_trainer.train(dense_samples, labels);
168 
169                 dlog << LINFO << "dense_df.basis_vectors.size(): "<< dense_df.basis_vectors.size();
170                 DLIB_TEST(dense_df.basis_vectors.size() > 4);
171 
172                 dlog << LINFO << "test scores: "<< test_binary_decision_function(dense_df, dense_samples, labels);
173 
174                 // save the outputs of the decision function before we mess with it
175                 std::vector<double> prev_vals;
176                 for (unsigned long i = 0; i < dense_samples.size(); ++i)
177                     prev_vals.push_back(dense_df(dense_samples[i]));
178 
179                 dense_df = simplify_linear_decision_function(dense_df);
180 
181                 dlog << LINFO << "dense_df.basis_vectors.size(): "<< dense_df.basis_vectors.size();
182                 DLIB_TEST(dense_df.basis_vectors.size() == 1);
183 
184                 dlog << LINFO << "test scores: "<< test_binary_decision_function(dense_df, dense_samples, labels);
185 
186 
187                 // now check that the simplified decision function still produces the same results
188                 std::vector<double> cur_vals;
189                 for (unsigned long i = 0; i < dense_samples.size(); ++i)
190                     cur_vals.push_back(dense_df(dense_samples[i]));
191 
192                 const double err = max(abs(mat(cur_vals) - mat(prev_vals)));
193                 dlog << LINFO << "simplify error: "<< err;
194                 DLIB_TEST(err < 1e-13);
195             }
196 
197             // same as above but call simplify_linear_decision_function() two times
198             {
199                 print_spinner();
200                 dlog << LINFO << " test with dense samples ";
201                 std::vector<dense_sample_type> dense_samples(sparse_to_dense(samples));
202 
203                 // In addition to the rule we learned with the pegasos trainer lets also use our linear_trainer
204                 // to learn a decision rule.
205                 decision_function<dense_kernel_type> dense_df = dense_linear_trainer.train(dense_samples, labels);
206 
207                 dlog << LINFO << "dense_df.basis_vectors.size(): "<< dense_df.basis_vectors.size();
208                 DLIB_TEST(dense_df.basis_vectors.size() > 4);
209 
210                 dlog << LINFO << "test scores: "<< test_binary_decision_function(dense_df, dense_samples, labels);
211 
212                 // save the outputs of the decision function before we mess with it
213                 std::vector<double> prev_vals;
214                 for (unsigned long i = 0; i < dense_samples.size(); ++i)
215                     prev_vals.push_back(dense_df(dense_samples[i]));
216 
217                 dense_df = simplify_linear_decision_function(dense_df);
218                 dense_df = simplify_linear_decision_function(dense_df);
219 
220                 dlog << LINFO << "dense_df.basis_vectors.size(): "<< dense_df.basis_vectors.size();
221                 DLIB_TEST(dense_df.basis_vectors.size() == 1);
222 
223                 dlog << LINFO << "test scores: "<< test_binary_decision_function(dense_df, dense_samples, labels);
224 
225 
226                 // now check that the simplified decision function still produces the same results
227                 std::vector<double> cur_vals;
228                 for (unsigned long i = 0; i < dense_samples.size(); ++i)
229                     cur_vals.push_back(dense_df(dense_samples[i]));
230 
231                 const double err = max(abs(mat(cur_vals) - mat(prev_vals)));
232                 dlog << LINFO << "simplify error: "<< err;
233                 DLIB_TEST(err < 1e-13);
234             }
235 
236             {
237                 print_spinner();
238 
239                 dlog << LINFO << " test with sparse samples and a vector normalizer";
240                 std::vector<dense_sample_type> dense_samples(sparse_to_dense(samples));
241                 std::vector<dense_sample_type> norm_samples;
242 
243                 // make a normalizer and normalize everything
244                 vector_normalizer<dense_sample_type> normalizer;
245                 normalizer.train(dense_samples);
246                 for (unsigned long i = 0; i < dense_samples.size(); ++i)
247                     norm_samples.push_back(normalizer(dense_samples[i]));
248 
249                 normalized_function<decision_function<dense_kernel_type> > dense_df;
250 
251                 dense_df.normalizer = normalizer;
252                 dense_df.function = dense_linear_trainer.train(norm_samples, labels);
253 
254                 dlog << LINFO << "dense_df.function.basis_vectors.size(): "<< dense_df.function.basis_vectors.size();
255                 DLIB_TEST(dense_df.function.basis_vectors.size() > 4);
256 
257                 dlog << LINFO << "test scores: "<< test_binary_decision_function(dense_df, dense_samples, labels);
258 
259                 // save the outputs of the decision function before we mess with it
260                 std::vector<double> prev_vals;
261                 for (unsigned long i = 0; i < dense_samples.size(); ++i)
262                     prev_vals.push_back(dense_df(dense_samples[i]));
263 
264 
265                 decision_function<dense_kernel_type> simple_df = simplify_linear_decision_function(dense_df);
266 
267                 dlog << LINFO << "simple_df.basis_vectors.size(): "<< simple_df.basis_vectors.size();
268                 DLIB_TEST(simple_df.basis_vectors.size() == 1);
269 
270                 dlog << LINFO << "test scores: "<< test_binary_decision_function(simple_df, dense_samples, labels);
271 
272 
273                 // now check that the simplified decision function still produces the same results
274                 std::vector<double> cur_vals;
275                 for (unsigned long i = 0; i < dense_samples.size(); ++i)
276                     cur_vals.push_back(simple_df(dense_samples[i]));
277 
278                 const double err = max(abs(mat(cur_vals) - mat(prev_vals)));
279                 dlog << LINFO << "simplify error: "<< err;
280                 DLIB_TEST(err < 1e-13);
281 
282             }
283 
284         }
285     };
286 
287     // Create an instance of this object.  Doing this causes this test
288     // to be automatically inserted into the testing framework whenever this cpp file
289     // is linked into the project.  Note that since we are inside an unnamed-namespace
290     // we won't get any linker errors about the symbol a being defined multiple times.
291     sldf_tester a;
292 
293 }
294 
295 
296 
297