1 // This is mul/clsfy/clsfy_smo_1.cxx
2 #include <iostream>
3 #include <cmath>
4 #include "clsfy_smo_1.h"
5 //:
6 // \file
7 // \author Ian Scott
8 // \date 14-Nov-2001
9 // \brief Sequential Minimum Optimisation algorithm
10 // This code is based on the C++ code of
11 // Xianping Ge, ( http://www.ics.uci.edu/~xge ) which he kindly
12 // put in the public domain.
13 // That code was in turn based on the algorithms of
14 // John Platt, ( http://research.microsoft.com/~jplatt ) described in
15 // Platt, J. C. (1998). Fast Training of Support Vector Machines Using Sequential
16 // Minimal Optimisation. In Advances in Kernel Methods - Support Vector Learning.
17 // B. Scholkopf, C. Burges and A. Smola, MIT Press: 185-208. and other papers.
18 
19 #ifdef _MSC_VER
20 #  include "vcl_msvc_warnings.h"
21 #endif
22 #include "vnl/vnl_math.h"
23 #include <cassert>
24 
25 // Linear SMO
26 
27 // ----------------------------------------------------------------
28 
kernel(int i1,int i2)29 double clsfy_smo_1_lin::kernel(int i1, int i2)
30 {
31   if (i1==i2)
32     return precomputed_self_dot_product_[i1];
33   else
34     return dot_product(data_point(i1),data_point(i2));
35 }
36 
37 // ----------------------------------------------------------------
38 
kernel(int i1,int i2)39 double clsfy_smo_1_rbf::kernel(int i1, int i2)
40 {
41   if (i1==i2) return 1.0;
42   double s = dot_product(data_point(i1),data_point(i2));
43   s *= -2.0f;
44   s += precomputed_self_dot_product_[i1] + precomputed_self_dot_product_[i2];
45   return std::exp(gamma_ * s);
46 }
47 
48 // ----------------------------------------------------------------
49 
50 //: Takes a copy of the data wrapper, but not the data.
51 // Be careful not to destroy the underlying data while using this object.
set_data(const mbl_data_wrapper<vnl_vector<double>> & data,const std::vector<int> & targets)52 void clsfy_smo_1_lin::set_data(const mbl_data_wrapper<vnl_vector<double> >& data, const std::vector<int>& targets)
53 {
54   const unsigned N = data.size();
55   data_ = data.clone();
56 
57   assert(targets.size() == N);
58   target_ = targets;
59 
60   precomputed_self_dot_product_.resize(N);
61   for (unsigned int i=0; i<N; i++)
62       precomputed_self_dot_product_[i] = dot_product(data_point(i),data_point(i));
63 }
64 
65 // ----------------------------------------------------------------
66 
C() const67 double clsfy_smo_1_lin::C() const
68 {
69   return C_;
70 }
71 
72 // ----------------------------------------------------------------
73 
set_C(double C)74 void clsfy_smo_1_lin::set_C(double C)
75 {
76   if (C <= 0) C_ = vnl_huge_val(double());
77   else C_ = C;
78 }
79 
80 // ----------------------------------------------------------------
81 
82 //: 0.5 sigma^-2, where sigma is the width of the Gaussian kernel
gamma() const83 double clsfy_smo_1_rbf::gamma() const
84 {
85   return -gamma_;
86 }
87 
88 // ----------------------------------------------------------------
89 
90 //: Control sigma, the width of the Gaussian kernel.
91 // gamma = 0.5 sigma^-2
set_gamma(double gamma)92 void clsfy_smo_1_rbf::set_gamma(double gamma)
93 {
94   gamma_ = -gamma;
95 }
96 
97 // ----------------------------------------------------------------
98 
clsfy_smo_1_rbf()99 clsfy_smo_1_rbf::clsfy_smo_1_rbf():
100   gamma_((double)-0.5)
101 {
102 }
103 
104 // ----------------------------------------------------------------
105 
clsfy_smo_1_lin()106 clsfy_smo_1_lin::clsfy_smo_1_lin():
107   C_(vnl_huge_val(double()))
108 {
109 }
110 
111 // ----------------------------------------------------------------
112 
take_step(int i1,int i2,double E1)113 int clsfy_smo_1_lin::take_step(int i1, int i2, double E1)
114 {
115   int  s;
116   double a1, a2;       /// new values of alpha_1, alpha_2
117   double E2, L, H, k11, k22, k12, eta, Lobj, Hobj;
118 
119   if (i1 == i2) return 0;
120 
121 
122   const double alph1 = alph_[i1]; // old_values of alpha_1
123   const int y1 = target_[i1];
124 
125   const double alph2 = alph_[i2]; // old_values of alpha_2
126   const int y2 = target_[i2];
127   if (alph2 > 0 && alph2 < C_)
128     E2 = error_cache_[i2];
129   else
130     E2 = learned_func(i2) - y2;
131 
132   s = y1 * y2;
133 
134   if (y1 == y2) {
135     const double g = alph1 + alph2;
136     if (g > C_) {
137         L = g-C_;
138         H = C_;
139     }
140     else {
141         L = 0;
142         H = g;
143     }
144   }
145   else
146   {
147     const double g = alph1 - alph2;
148     if (g > 0) {
149         L = 0;
150         H = C_ - g;
151     }
152     else {
153         L = -g;
154         H = C_;
155     }
156   }
157 
158   if (L == H)
159     return 0;
160 
161   k11 = kernel(i1, i1);
162   k12 = kernel(i1, i2);
163   k22 = kernel(i2, i2);
164   eta = 2.0 * k12 - k11 - k22;
165 
166 
167   if (eta < 0) {
168     a2 = alph2 + y2 * (E2 - E1) / eta;
169     if (a2 < L)
170       a2 = L;
171     else if (a2 > H)
172       a2 = H;
173   }
174   else {
175     {
176       double c1 = eta/2;
177       double c2 = y2 * (E1-E2)- eta * alph2;
178       Lobj = c1 * L * L + c2 * L;
179       Hobj = c1 * H * H + c2 * H;
180     }
181 
182     if (Lobj > Hobj+eps_)
183       a2 = L;
184     else if (Lobj < Hobj-eps_)
185       a2 = H;
186     else
187       a2 = alph2;
188   }
189 
190   if (vnl_math::abs(a2-alph2) < eps_*(a2+alph2+eps_) )
191     return 0;
192 
193   a1 = alph1 - s * (a2 - alph2);
194   if (a1 < 0.0) {
195     a2 += s * a1;
196     a1 = 0;
197   }
198   else if (a1 > C_) {
199     double t = a1-C_;
200     a2 += s * t;
201     a1 = C_;
202   }
203 
204 
205   double delta_b;
206   {
207     double b1, b2, bnew;
208 
209     const double eps_2 = eps_*eps_;
210 
211     if (a1 > eps_2 && a1 < (C_*(1-eps_2)))
212       bnew = b_ + E1 + y1 * (a1 - alph1) * k11 + y2 * (a2 - alph2) * k12;
213     else {
214       if (a2 > eps_2 && a2 < (C_*(1-eps_2)))
215         bnew = b_ + E2 + y1 * (a1 - alph1) * k12 + y2 * (a2 - alph2) * k22;
216       else {
217         b1 = b_ + E1 + y1 * (a1 - alph1) * k11 + y2 * (a2 - alph2) * k12;
218         b2 = b_ + E2 + y1 * (a1 - alph1) * k12 + y2 * (a2 - alph2) * k22;
219         bnew = (b1 + b2) / 2;
220       }
221     }
222     delta_b = bnew - b_;
223     b_ = bnew;
224   }
225 
226   {
227     const double t1 = y1 * (a1-alph1);
228     const double t2 = y2 * (a2-alph2);
229 
230     for (unsigned int i=0; i<data_->size(); i++)
231       if (0 < alph_[i] && alph_[i] < C_)
232         error_cache_[i] +=  t1 * kernel(i1,i) + t2 * kernel(i2,i) - delta_b;
233     error_cache_[i1] = 0.0;
234     error_cache_[i2] = 0.0;
235   }
236 
237   alph_[i1] = a1;  // Store a1 in the alpha array.
238   alph_[i2] = a2;  // Store a2 in the alpha array.
239 
240   return 1;
241 }
242 
243 // ----------------------------------------------------------------
244 
examine_example(int i1)245 int clsfy_smo_1_lin::examine_example(int i1)
246 {
247   double  E1, r1;
248   const unsigned long N = data_->size();
249 
250   const double y1 = target_[i1];
251   const double alph1 = alph_(i1);
252 
253   if (alph1 > 0 && alph1 < C_)
254     E1 = error_cache_[i1];
255   else
256     E1 = learned_func(i1) - y1;
257 
258   r1 = y1 * E1;
259   if ((r1 < -tolerance_ && alph1 < C_) ||
260       (r1 > tolerance_ && alph1 > 0)) // is the KKT condition for alph1 broken?
261   {
262     // Try i2 by three ways; if successful, then immediately return 1;
263     {
264       unsigned int k;
265       int i2;
266       double tmax;
267 
268       // Second choice heuristic A - Find the example i2 which maximises
269       // |E1 - E2| where E1
270 
271       for (i2 = (-1), tmax = 0, k = 0; k < N; ++k)
272         if (alph_(k) > 0 && alph_(k) < C_)
273         {
274           double E2, temp;
275 
276           E2 = error_cache_[k];
277           temp = vnl_math::abs(E1 - E2);
278           if (temp > tmax)
279           {
280             tmax = temp;
281             i2 = k;
282           }
283         }
284 
285       if (i2 >= 0) {
286         if (take_step (i1, i2, E1))
287           return 1;
288       }
289     }
290 
291     // second choice Heuristic B - Find any unbound example that give positive progress.
292     // start from random location
293     for (unsigned long k0 = rng_.lrand32(N-1), k = k0; k < N + k0; ++k)
294     {
295       unsigned long i2 = k % N;
296       if (alph_(i2) > 0 && alph_(i2) < C_)
297       {
298         if (take_step(i1, i2, E1))
299           return 1;
300       }
301     }
302 
303     // second choice Heuristic C - Find any example that give positive progress.
304     // start from random location
305     for (unsigned long k0 = rng_.lrand32(N-1), k = k0; k < N + k0; ++k)
306     {
307       unsigned long i2 = k % N;
308       if (alph_(i2) == 0 || alph_(i2) == C_)
309       {
310         if (take_step(i1, i2, E1))
311           return 1;
312       }
313     }
314   }
315   return 0;
316 }
317 
318 // ----------------------------------------------------------------
319 
calc()320 int clsfy_smo_1_rbf::calc()
321 {
322   assert(gamma_!=0.0);  //IS gamma set?
323   return clsfy_smo_1_lin::calc();
324 }
325 
326 // ----------------------------------------------------------------
327 
calc()328 int clsfy_smo_1_lin::calc()
329 {
330   //Check a bunch of things
331 
332   assert (data_ != nullptr); // Check that the data has been set.
333 
334   const unsigned long N = data_->size();
335   assert(N != 0);     // Check that there is some data.
336 
337   if (alph_.empty()) // only initialise alph if it hasn't been externally set.
338   {
339     alph_.set_size(N);
340     alph_.fill(0.0);
341   }
342 
343   // E_i = u_i - y_i = 0 - y_i = -y_i
344   error_cache_.resize(N);
345 
346   unsigned long numChanged = 0;
347   bool examineAll = true;
348   while (numChanged > 0 || examineAll)
349   {
350     numChanged = 0;
351     if (examineAll)
352       for (unsigned int k = 0; k < N; k++)
353         numChanged += examine_example (k);
354     else
355       for (unsigned int k = 0; k < N; k++)
356         if (alph_[k] != 0 && alph_[k] != C_)
357           numChanged += examine_example (k);
358     if (examineAll)
359       examineAll = false;
360     else if (numChanged == 0)
361       examineAll = true;
362 
363 #if !defined NDEBUG &&  CLSFY_SMO_BASE_PRINT_PROGRESS >1
364     {
365       double s = 0.;
366       for (int i=0; i<N; i++)
367         s += alph_[i];
368       double t = 0.;
369       for (int i=0; i<N; i++)
370       {
371         if (alph_(i) != 0.0)
372         {
373           for (int j=0; j<N; j++)
374             if (alph_[j] != 0.0)
375               t += alph_[i]*alph_[j]*target_[i]*target_[j]*kernel(i,j);
376         }
377       }
378       std::cerr << "Objective function=" << (s - t/2.) << '\t';
379       for (int i=0; i<N; i++)
380         if (alph_[i] < 0)
381           std::cerr << "alph_[" << i << "]=" << alph_[i] << " < 0\n";
382       s = 0.;
383       for (int i=0; i<N; i++)
384         s += alph_[i] * target_[i];
385       std::cerr << "s=" << s << "\terror_rate=" << error_rate() << '\t';
386     }
387 #endif
388 
389 #if !defined NDEBUG && CLSFY_SMO_BASE_PRINT_PROGRESS
390     {
391       int non_bound_support =0;
392       int bound_support =0;
393       for (int i=0; i<N; i++)
394         if (alph_[i] > 0)
395         {
396           if (alph_[i] < C_)
397             non_bound_support++;
398           else
399             bound_support++;
400         }
401       std::cerr << "non_bound=" << non_bound_support << '\t'
402                << "bound_support=" << bound_support << '\n';
403     }
404 #endif
405   }
406 
407   error_ = error_rate();
408 
409 #if !defined NDEBUG && CLSFY_SMO_BASE_PRINT_PROGRESS
410   std::cerr << "Threshold=" << b_ << '\n';
411   std::cout << "Error rate=" << error_ << std::endl;
412 #endif
413 
414   return 0;
415 }
416