1 // Copyright (C) 2011  Davis E. King (davis@dlib.net)
2 // License: Boost Software License   See LICENSE.txt for the full license.
3 
4 #include <sstream>
5 #include <string>
6 #include <cstdlib>
7 #include <ctime>
8 #include <dlib/svm_threaded.h>
9 
10 #include "tester.h"
11 
12 namespace
13 {
14     using namespace test;
15     using namespace dlib;
16     using namespace std;
17 
18     logger dlog("test.svm_struct");
19 
20 
21     template <
22         typename matrix_type,
23         typename sample_type,
24         typename label_type
25         >
26     class test_multiclass_svm_problem : public structural_svm_problem_threaded<matrix_type,
27                                                                  std::vector<std::pair<unsigned long,typename matrix_type::type> > >
28     {
29 
30     public:
31         typedef typename matrix_type::type scalar_type;
32         typedef std::vector<std::pair<unsigned long,scalar_type> > feature_vector_type;
33 
test_multiclass_svm_problem(const std::vector<sample_type> & samples_,const std::vector<label_type> & labels_)34         test_multiclass_svm_problem (
35             const std::vector<sample_type>& samples_,
36             const std::vector<label_type>& labels_
37         ) :
38             structural_svm_problem_threaded<matrix_type,
39                 std::vector<std::pair<unsigned long,typename matrix_type::type> > >(2),
40             samples(samples_),
41             labels(labels_),
42             dims(10+1) // +1 for the bias
43         {
44             for (int i = 0; i < 10; ++i)
45             {
46                 distinct_labels.push_back(i);
47             }
48         }
49 
get_num_dimensions() const50         virtual long get_num_dimensions (
51         ) const
52         {
53             return dims*10;
54         }
55 
get_num_samples() const56         virtual long get_num_samples (
57         ) const
58         {
59             return static_cast<long>(samples.size());
60         }
61 
get_truth_joint_feature_vector(long idx,feature_vector_type & psi) const62         virtual void get_truth_joint_feature_vector (
63             long idx,
64             feature_vector_type& psi
65         ) const
66         {
67             assign(psi, samples[idx]);
68             // Add a constant -1 to account for the bias term.
69             psi.push_back(std::make_pair(dims-1,static_cast<scalar_type>(-1)));
70 
71             // Find which distinct label goes with this psi.
72             const long label_idx = index_of_max(mat(distinct_labels) == labels[idx]);
73 
74             offset_feature_vector(psi, dims*label_idx);
75         }
76 
separation_oracle(const long idx,const matrix_type & current_solution,scalar_type & loss,feature_vector_type & psi) const77         virtual void separation_oracle (
78             const long idx,
79             const matrix_type& current_solution,
80             scalar_type& loss,
81             feature_vector_type& psi
82         ) const
83         {
84             scalar_type best_val = -std::numeric_limits<scalar_type>::infinity();
85             unsigned long best_idx = 0;
86 
87             // Figure out which label is the best.  That is, what label maximizes
88             // LOSS(idx,y) + F(x,y).  Note that y in this case is given by distinct_labels[i].
89             for (unsigned long i = 0; i < distinct_labels.size(); ++i)
90             {
91                 // Compute the F(x,y) part:
92                 // perform: temp == dot(relevant part of current solution, samples[idx]) - current_bias
93                 scalar_type temp = dot(rowm(current_solution, range(i*dims, (i+1)*dims-2)), samples[idx]) - current_solution((i+1)*dims-1);
94 
95                 // Add the LOSS(idx,y) part:
96                 if (labels[idx] != distinct_labels[i])
97                     temp += 1;
98 
99                 // Now temp == LOSS(idx,y) + F(x,y).  Check if it is the biggest we have seen.
100                 if (temp > best_val)
101                 {
102                     best_val = temp;
103                     best_idx = i;
104                 }
105             }
106 
107             assign(psi, samples[idx]);
108             // add a constant -1 to account for the bias term
109             psi.push_back(std::make_pair(dims-1,static_cast<scalar_type>(-1)));
110 
111             offset_feature_vector(psi, dims*best_idx);
112 
113             if (distinct_labels[best_idx] == labels[idx])
114                 loss = 0;
115             else
116                 loss = 1;
117         }
118 
119     private:
120 
offset_feature_vector(feature_vector_type & sample,const unsigned long val) const121         void offset_feature_vector (
122             feature_vector_type& sample,
123             const unsigned long val
124         ) const
125         {
126             if (val != 0)
127             {
128                 for (typename feature_vector_type::iterator i = sample.begin(); i != sample.end(); ++i)
129                 {
130                     i->first += val;
131                 }
132             }
133         }
134 
135 
136         const std::vector<sample_type>& samples;
137         const std::vector<label_type>& labels;
138         std::vector<label_type> distinct_labels;
139         const long dims;
140     };
141 
142 // ----------------------------------------------------------------------------------------
143 
144     template <
145         typename K,
146         typename label_type_ = typename K::scalar_type
147         >
148     class test_svm_multiclass_linear_trainer2
149     {
150     public:
151         typedef label_type_ label_type;
152         typedef K kernel_type;
153         typedef typename kernel_type::scalar_type scalar_type;
154         typedef typename kernel_type::sample_type sample_type;
155         typedef typename kernel_type::mem_manager_type mem_manager_type;
156 
157         typedef multiclass_linear_decision_function<kernel_type, label_type> trained_function_type;
158 
159 
test_svm_multiclass_linear_trainer2()160         test_svm_multiclass_linear_trainer2 (
161         ) :
162             C(10),
163             eps(1e-4),
164             verbose(false)
165         {
166         }
167 
train(const std::vector<sample_type> & all_samples,const std::vector<label_type> & all_labels) const168         trained_function_type train (
169             const std::vector<sample_type>& all_samples,
170             const std::vector<label_type>& all_labels
171         ) const
172         {
173             scalar_type svm_objective = 0;
174             return train(all_samples, all_labels, svm_objective);
175         }
176 
train(const std::vector<sample_type> & all_samples,const std::vector<label_type> & all_labels,scalar_type & svm_objective) const177         trained_function_type train (
178             const std::vector<sample_type>& all_samples,
179             const std::vector<label_type>& all_labels,
180             scalar_type& svm_objective
181         ) const
182         {
183             // make sure requires clause is not broken
184             DLIB_ASSERT(is_learning_problem(all_samples,all_labels),
185                 "\t trained_function_type test_svm_multiclass_linear_trainer2::train(all_samples,all_labels)"
186                 << "\n\t invalid inputs were given to this function"
187                 << "\n\t all_samples.size():     " << all_samples.size()
188                 << "\n\t all_labels.size():      " << all_labels.size()
189                 );
190 
191             typedef matrix<scalar_type,0,1> w_type;
192             w_type weights;
193             std::vector<sample_type> samples1(all_samples.begin(), all_samples.begin()+all_samples.size()/2);
194             std::vector<sample_type> samples2(all_samples.begin()+all_samples.size()/2, all_samples.end());
195 
196             std::vector<label_type> labels1(all_labels.begin(), all_labels.begin()+all_labels.size()/2);
197             std::vector<label_type> labels2(all_labels.begin()+all_labels.size()/2, all_labels.end());
198             test_multiclass_svm_problem<w_type, sample_type, label_type> problem1(samples1, labels1);
199             test_multiclass_svm_problem<w_type, sample_type, label_type> problem2(samples2, labels2);
200             problem1.set_max_cache_size(3);
201             problem2.set_max_cache_size(0);
202 
203             svm_struct_processing_node node1(problem1, 12345, 3);
204             svm_struct_processing_node node2(problem2, 12346, 0);
205 
206             solver.set_inactive_plane_threshold(50);
207             solver.set_subproblem_epsilon(1e-4);
208 
209             svm_struct_controller_node controller;
210             controller.set_c(C);
211             controller.set_epsilon(eps);
212             if (verbose)
213                 controller.be_verbose();
214             controller.add_processing_node("127.0.0.1", 12345);
215             controller.add_processing_node("localhost:12346");
216             svm_objective = controller(solver, weights);
217 
218 
219 
220             trained_function_type df;
221 
222             const long dims = max_index_plus_one(all_samples);
223             df.labels  = select_all_distinct_labels(all_labels);
224             df.weights = colm(reshape(weights, df.labels.size(), dims+1), range(0,dims-1));
225             df.b       = colm(reshape(weights, df.labels.size(), dims+1), dims);
226             return df;
227         }
228 
229     private:
230         scalar_type C;
231         scalar_type eps;
232         bool verbose;
233         mutable oca solver;
234     };
235 
236 // ----------------------------------------------------------------------------------------
237 
238     template <
239         typename K,
240         typename label_type_ = typename K::scalar_type
241         >
242     class test_svm_multiclass_linear_trainer3
243     {
244     public:
245         typedef label_type_ label_type;
246         typedef K kernel_type;
247         typedef typename kernel_type::scalar_type scalar_type;
248         typedef typename kernel_type::sample_type sample_type;
249         typedef typename kernel_type::mem_manager_type mem_manager_type;
250 
251         typedef multiclass_linear_decision_function<kernel_type, label_type> trained_function_type;
252 
253 
test_svm_multiclass_linear_trainer3()254         test_svm_multiclass_linear_trainer3 (
255         ) :
256             C(10),
257             eps(1e-4),
258             verbose(false)
259         {
260         }
261 
train(const std::vector<sample_type> & all_samples,const std::vector<label_type> & all_labels) const262         trained_function_type train (
263             const std::vector<sample_type>& all_samples,
264             const std::vector<label_type>& all_labels
265         ) const
266         {
267             scalar_type svm_objective = 0;
268             return train(all_samples, all_labels, svm_objective);
269         }
270 
train(const std::vector<sample_type> & all_samples,const std::vector<label_type> & all_labels,scalar_type & svm_objective) const271         trained_function_type train (
272             const std::vector<sample_type>& all_samples,
273             const std::vector<label_type>& all_labels,
274             scalar_type& svm_objective
275         ) const
276         {
277             // make sure requires clause is not broken
278             DLIB_ASSERT(is_learning_problem(all_samples,all_labels),
279                 "\t trained_function_type test_svm_multiclass_linear_trainer3::train(all_samples,all_labels)"
280                 << "\n\t invalid inputs were given to this function"
281                 << "\n\t all_samples.size():     " << all_samples.size()
282                 << "\n\t all_labels.size():      " << all_labels.size()
283                 );
284 
285             typedef matrix<scalar_type,0,1> w_type;
286             w_type weights;
287             test_multiclass_svm_problem<w_type, sample_type, label_type> problem(all_samples, all_labels);
288             problem.set_max_cache_size(0);
289 
290             problem.set_c(C);
291             problem.set_epsilon(eps);
292 
293             if (verbose)
294                 problem.be_verbose();
295 
296             solver.set_inactive_plane_threshold(50);
297             solver.set_subproblem_epsilon(1e-4);
298             svm_objective = solver(problem, weights);
299 
300 
301             trained_function_type df;
302 
303             const long dims = max_index_plus_one(all_samples);
304             df.labels  = select_all_distinct_labels(all_labels);
305             df.weights = colm(reshape(weights, df.labels.size(), dims+1), range(0,dims-1));
306             df.b       = colm(reshape(weights, df.labels.size(), dims+1), dims);
307             return df;
308         }
309 
310     private:
311         scalar_type C;
312         scalar_type eps;
313         bool verbose;
314         mutable oca solver;
315     };
316 
317 // ----------------------------------------------------------------------------------------
318 
319     template <
320         typename K,
321         typename label_type_ = typename K::scalar_type
322         >
323     class test_svm_multiclass_linear_trainer4
324     {
325     public:
326         typedef label_type_ label_type;
327         typedef K kernel_type;
328         typedef typename kernel_type::scalar_type scalar_type;
329         typedef typename kernel_type::sample_type sample_type;
330         typedef typename kernel_type::mem_manager_type mem_manager_type;
331 
332         typedef multiclass_linear_decision_function<kernel_type, label_type> trained_function_type;
333 
334 
test_svm_multiclass_linear_trainer4()335         test_svm_multiclass_linear_trainer4 (
336         ) :
337             C(10),
338             eps(1e-4),
339             verbose(false)
340         {
341         }
342 
train(const std::vector<sample_type> & all_samples,const std::vector<label_type> & all_labels) const343         trained_function_type train (
344             const std::vector<sample_type>& all_samples,
345             const std::vector<label_type>& all_labels
346         ) const
347         {
348             scalar_type svm_objective = 0;
349             return train(all_samples, all_labels, svm_objective);
350         }
351 
train(const std::vector<sample_type> & all_samples,const std::vector<label_type> & all_labels,scalar_type & svm_objective) const352         trained_function_type train (
353             const std::vector<sample_type>& all_samples,
354             const std::vector<label_type>& all_labels,
355             scalar_type& svm_objective
356         ) const
357         {
358             // make sure requires clause is not broken
359             DLIB_ASSERT(is_learning_problem(all_samples,all_labels),
360                 "\t trained_function_type test_svm_multiclass_linear_trainer4::train(all_samples,all_labels)"
361                 << "\n\t invalid inputs were given to this function"
362                 << "\n\t all_samples.size():     " << all_samples.size()
363                 << "\n\t all_labels.size():      " << all_labels.size()
364                 );
365 
366             typedef matrix<scalar_type,0,1> w_type;
367             w_type weights;
368             test_multiclass_svm_problem<w_type, sample_type, label_type> problem(all_samples, all_labels);
369             problem.set_max_cache_size(3);
370 
371             problem.set_c(C);
372             problem.set_epsilon(eps);
373 
374             if (verbose)
375                 problem.be_verbose();
376 
377             solver.set_inactive_plane_threshold(50);
378             solver.set_subproblem_epsilon(1e-4);
379             svm_objective = solver(problem, weights);
380 
381 
382             trained_function_type df;
383 
384             const long dims = max_index_plus_one(all_samples);
385             df.labels  = select_all_distinct_labels(all_labels);
386             df.weights = colm(reshape(weights, df.labels.size(), dims+1), range(0,dims-1));
387             df.b       = colm(reshape(weights, df.labels.size(), dims+1), dims);
388             return df;
389         }
390 
391     private:
392         scalar_type C;
393         scalar_type eps;
394         bool verbose;
395         mutable oca solver;
396     };
397 
398 // ----------------------------------------------------------------------------------------
399 
400     template <
401         typename K,
402         typename label_type_ = typename K::scalar_type
403         >
404     class test_svm_multiclass_linear_trainer5
405     {
406     public:
407         typedef label_type_ label_type;
408         typedef K kernel_type;
409         typedef typename kernel_type::scalar_type scalar_type;
410         typedef typename kernel_type::sample_type sample_type;
411         typedef typename kernel_type::mem_manager_type mem_manager_type;
412 
413         typedef multiclass_linear_decision_function<kernel_type, label_type> trained_function_type;
414 
415 
test_svm_multiclass_linear_trainer5()416         test_svm_multiclass_linear_trainer5 (
417         ) :
418             C(10),
419             eps(1e-4),
420             verbose(false)
421         {
422         }
423 
train(const std::vector<sample_type> & all_samples,const std::vector<label_type> & all_labels) const424         trained_function_type train (
425             const std::vector<sample_type>& all_samples,
426             const std::vector<label_type>& all_labels
427         ) const
428         {
429             scalar_type svm_objective = 0;
430             return train(all_samples, all_labels, svm_objective);
431         }
432 
train(const std::vector<sample_type> & all_samples,const std::vector<label_type> & all_labels,scalar_type & svm_objective) const433         trained_function_type train (
434             const std::vector<sample_type>& all_samples,
435             const std::vector<label_type>& all_labels,
436             scalar_type& svm_objective
437         ) const
438         {
439             // make sure requires clause is not broken
440             DLIB_ASSERT(is_learning_problem(all_samples,all_labels),
441                 "\t trained_function_type test_svm_multiclass_linear_trainer5::train(all_samples,all_labels)"
442                 << "\n\t invalid inputs were given to this function"
443                 << "\n\t all_samples.size():     " << all_samples.size()
444                 << "\n\t all_labels.size():      " << all_labels.size()
445                 );
446 
447             typedef matrix<scalar_type,0,1> w_type;
448             w_type weights;
449             const long dims = max_index_plus_one(all_samples);
450             trained_function_type df;
451             df.labels  = select_all_distinct_labels(all_labels);
452             multiclass_svm_problem<w_type, sample_type, label_type> problem(all_samples, all_labels, df.labels, dims, 4);
453             problem.set_max_cache_size(3);
454 
455             problem.set_c(C);
456             problem.set_epsilon(eps);
457 
458             if (verbose)
459                 problem.be_verbose();
460 
461             solver.set_inactive_plane_threshold(50);
462             solver.set_subproblem_epsilon(1e-4);
463             svm_objective = solver(problem, weights);
464 
465 
466 
467             df.weights = colm(reshape(weights, df.labels.size(), dims+1), range(0,dims-1));
468             df.b       = colm(reshape(weights, df.labels.size(), dims+1), dims);
469             return df;
470         }
471 
472     private:
473         scalar_type C;
474         scalar_type eps;
475         bool verbose;
476         mutable oca solver;
477     };
478 
479 
480 // ----------------------------------------------------------------------------------------
481 
482     typedef matrix<double,10,1> sample_type;
483     typedef double scalar_type;
484 
make_dataset(std::vector<sample_type> & samples,std::vector<scalar_type> & labels,int num,dlib::rand & rnd)485     void make_dataset (
486         std::vector<sample_type>& samples,
487         std::vector<scalar_type>& labels,
488         int num,
489         dlib::rand& rnd
490     )
491     {
492         samples.clear();
493         labels.clear();
494         for (int i = 0; i < 10; ++i)
495         {
496             for (int j = 0; j < num; ++j)
497             {
498                 sample_type samp;
499                 samp = 0;
500                 samp(i) = 10*rnd.get_random_double()+1;
501 
502                 samples.push_back(samp);
503                 labels.push_back(i);
504             }
505         }
506     }
507 
508 // ----------------------------------------------------------------------------------------
509 
510     class test_svm_struct : public tester
511     {
512     public:
test_svm_struct()513         test_svm_struct (
514         ) :
515             tester ("test_svm_struct",
516                     "Runs tests on the structural svm components.")
517         {}
518 
run_test(const std::vector<sample_type> & samples,const std::vector<scalar_type> & labels,const double true_obj)519         void run_test (
520             const std::vector<sample_type>& samples,
521             const std::vector<scalar_type>& labels,
522             const double true_obj
523         )
524         {
525             typedef linear_kernel<sample_type> kernel_type;
526             svm_multiclass_linear_trainer<kernel_type> trainer1;
527             test_svm_multiclass_linear_trainer2<kernel_type> trainer2;
528             test_svm_multiclass_linear_trainer3<kernel_type> trainer3;
529             test_svm_multiclass_linear_trainer4<kernel_type> trainer4;
530             test_svm_multiclass_linear_trainer5<kernel_type> trainer5;
531 
532             trainer1.set_epsilon(1e-4);
533             trainer1.set_c(10);
534 
535 
536             multiclass_linear_decision_function<kernel_type,double> df1, df2, df3, df4, df5;
537             double obj1, obj2, obj3, obj4, obj5;
538 
539             // Solve a multiclass SVM a whole bunch of different ways and make sure
540             // they all give the same answer.
541             print_spinner();
542             df1 = trainer1.train(samples, labels, obj1);
543             print_spinner();
544             df2 = trainer2.train(samples, labels, obj2);
545             print_spinner();
546             df3 = trainer3.train(samples, labels, obj3);
547             print_spinner();
548             df4 = trainer4.train(samples, labels, obj4);
549             print_spinner();
550             df5 = trainer5.train(samples, labels, obj5);
551             print_spinner();
552 
553             dlog << LINFO << "obj1: "<< obj1;
554             dlog << LINFO << "obj2: "<< obj2;
555             dlog << LINFO << "obj3: "<< obj3;
556             dlog << LINFO << "obj4: "<< obj4;
557             dlog << LINFO << "obj5: "<< obj5;
558             DLIB_TEST(std::abs(obj1 - obj2) < 1e-2);
559             DLIB_TEST(std::abs(obj1 - obj3) < 1e-2);
560             DLIB_TEST(std::abs(obj1 - obj4) < 1e-2);
561             DLIB_TEST(std::abs(obj1 - obj5) < 1e-2);
562             DLIB_TEST(std::abs(obj1 - true_obj) < 1e-2);
563             DLIB_TEST(std::abs(obj2 - true_obj) < 1e-2);
564             DLIB_TEST(std::abs(obj3 - true_obj) < 1e-2);
565             DLIB_TEST(std::abs(obj4 - true_obj) < 1e-2);
566             DLIB_TEST(std::abs(obj5 - true_obj) < 1e-2);
567 
568             dlog << LINFO << "weight error: "<< max(abs(df1.weights - df2.weights));
569             dlog << LINFO << "weight error: "<< max(abs(df1.weights - df3.weights));
570             dlog << LINFO << "weight error: "<< max(abs(df1.weights - df4.weights));
571             dlog << LINFO << "weight error: "<< max(abs(df1.weights - df5.weights));
572 
573             DLIB_TEST(max(abs(df1.weights - df2.weights)) < 1e-2);
574             DLIB_TEST(max(abs(df1.weights - df3.weights)) < 1e-2);
575             DLIB_TEST(max(abs(df1.weights - df4.weights)) < 1e-2);
576             DLIB_TEST(max(abs(df1.weights - df5.weights)) < 1e-2);
577 
578             dlog << LINFO << "b error: "<< max(abs(df1.b - df2.b));
579             dlog << LINFO << "b error: "<< max(abs(df1.b - df3.b));
580             dlog << LINFO << "b error: "<< max(abs(df1.b - df4.b));
581             dlog << LINFO << "b error: "<< max(abs(df1.b - df5.b));
582             DLIB_TEST(max(abs(df1.b - df2.b)) < 1e-2);
583             DLIB_TEST(max(abs(df1.b - df3.b)) < 1e-2);
584             DLIB_TEST(max(abs(df1.b - df4.b)) < 1e-2);
585             DLIB_TEST(max(abs(df1.b - df5.b)) < 1e-2);
586 
587             matrix<double> res = test_multiclass_decision_function(df1, samples, labels);
588             dlog << LINFO << res;
589             dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
590             DLIB_TEST(sum(diag(res)) == samples.size());
591 
592             res = test_multiclass_decision_function(df2, samples, labels);
593             dlog << LINFO << res;
594             dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
595             DLIB_TEST(sum(diag(res)) == samples.size());
596 
597             res = test_multiclass_decision_function(df3, samples, labels);
598             dlog << LINFO << res;
599             dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
600             DLIB_TEST(sum(diag(res)) == samples.size());
601 
602             res = test_multiclass_decision_function(df4, samples, labels);
603             dlog << LINFO << res;
604             dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
605             DLIB_TEST(sum(diag(res)) == samples.size());
606 
607             res = test_multiclass_decision_function(df5, samples, labels);
608             dlog << LINFO << res;
609             dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
610             DLIB_TEST(sum(diag(res)) == samples.size());
611         }
612 
perform_test()613         void perform_test (
614         )
615         {
616             std::vector<sample_type> samples;
617             std::vector<scalar_type> labels;
618 
619             dlib::rand rnd;
620 
621             dlog << LINFO << "test with 100 samples per class";
622             make_dataset(samples, labels, 100, rnd);
623             run_test(samples, labels, 1.155);
624 
625             dlog << LINFO << "test with 1 sample per class";
626             make_dataset(samples, labels, 1, rnd);
627             run_test(samples, labels, 0.251);
628 
629             dlog << LINFO << "test with 2 sample per class";
630             make_dataset(samples, labels, 2, rnd);
631             run_test(samples, labels, 0.444);
632         }
633     } a;
634 
635 
636 
637 }
638 
639 
640 
641 
642