1 #include <math.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <ctype.h>
5 #include <float.h>
6 #include <string.h>
7 #include <stdarg.h>
8 #include <limits.h>
9 #include <locale.h>
10 #include <assert.h>
11 #include "svm.h"
12 int libsvm_version = LIBSVM_VERSION;
13 typedef float Qfloat;
14 typedef signed char schar;
15 #ifndef min
min(T x,T y)16 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
17 #endif
18 #ifndef max
max(T x,T y)19 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
20 #endif
swap(T & x,T & y)21 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
clone(T * & dst,S * src,int n)22 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
23 {
24 dst = new T[n];
25 memcpy((void *)dst,(void *)src,sizeof(T)*n);
26 }
powi(double base,int times)27 static inline double powi(double base, int times)
28 {
29 double tmp = base, ret = 1.0;
30
31 for(int t=times; t>0; t/=2)
32 {
33 if(t%2==1) ret*=tmp;
34 tmp = tmp * tmp;
35 }
36 return ret;
37 }
38 #define INF HUGE_VAL
39 #define TAU 1e-12
40 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
41
print_string_stdout(const char * s)42 static void print_string_stdout(const char *s)
43 {
44 fputs(s,stdout);
45 fflush(stdout);
46 }
47 static void (*svm_print_string) (const char *) = &print_string_stdout;
48 #if 0
49 static void info(const char *fmt,...)
50 {
51 char buf[BUFSIZ];
52 va_list ap;
53 va_start(ap,fmt);
54 vsprintf(buf,fmt,ap);
55 va_end(ap);
56 (*svm_print_string)(buf);
57 }
58 #else
info(const char * fmt,...)59 static void info(const char *fmt,...) {}
60 #endif
61
62 //
63 // Kernel Cache
64 //
65 // l is the number of total data items
66 // size is the cache size limit in bytes
67 //
68 class Cache
69 {
70 public:
71 Cache(int l,long int size);
72 ~Cache();
73
74 // request data [0,len)
75 // return some position p where [p,len) need to be filled
76 // (p >= len if nothing needs to be filled)
77 int get_data(const int index, Qfloat **data, int len);
78 void swap_index(int i, int j);
79 private:
80 int l;
81 long int size;
82 struct head_t
83 {
84 head_t *prev, *next; // a circular list
85 Qfloat *data;
86 int len; // data[0,len) is cached in this entry
87 };
88
89 head_t *head;
90 head_t lru_head;
91 void lru_delete(head_t *h);
92 void lru_insert(head_t *h);
93 };
94
Cache(int l_,long int size_)95 Cache::Cache(int l_,long int size_):l(l_),size(size_)
96 {
97 head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0
98 size /= sizeof(Qfloat);
99 size -= l * sizeof(head_t) / sizeof(Qfloat);
100 size = max(size, 2 * (long int) l); // cache must be large enough for two columns
101 lru_head.next = lru_head.prev = &lru_head;
102 }
103
~Cache()104 Cache::~Cache()
105 {
106 for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
107 free(h->data);
108 free(head);
109 }
110
lru_delete(head_t * h)111 void Cache::lru_delete(head_t *h)
112 {
113 // delete from current location
114 h->prev->next = h->next;
115 h->next->prev = h->prev;
116 }
117
lru_insert(head_t * h)118 void Cache::lru_insert(head_t *h)
119 {
120 // insert to last position
121 h->next = &lru_head;
122 h->prev = lru_head.prev;
123 h->prev->next = h;
124 h->next->prev = h;
125 }
126
get_data(const int index,Qfloat ** data,int len)127 int Cache::get_data(const int index, Qfloat **data, int len)
128 {
129 head_t *h = &head[index];
130 if(h->len) lru_delete(h);
131 int more = len - h->len;
132
133 if(more > 0)
134 {
135 // free old space
136 while(size < more)
137 {
138 head_t *old = lru_head.next;
139 lru_delete(old);
140 free(old->data);
141 size += old->len;
142 old->data = 0;
143 old->len = 0;
144 }
145
146 // allocate new space
147 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
148 size -= more;
149 swap(h->len,len);
150 }
151
152 lru_insert(h);
153 *data = h->data;
154 return len;
155 }
156
swap_index(int i,int j)157 void Cache::swap_index(int i, int j)
158 {
159 if(i==j) return;
160
161 if(head[i].len) lru_delete(&head[i]);
162 if(head[j].len) lru_delete(&head[j]);
163 swap(head[i].data,head[j].data);
164 swap(head[i].len,head[j].len);
165 if(head[i].len) lru_insert(&head[i]);
166 if(head[j].len) lru_insert(&head[j]);
167
168 if(i>j) swap(i,j);
169 for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
170 {
171 if(h->len > i)
172 {
173 if(h->len > j)
174 swap(h->data[i],h->data[j]);
175 else
176 {
177 // give up
178 lru_delete(h);
179 free(h->data);
180 size += h->len;
181 h->data = 0;
182 h->len = 0;
183 }
184 }
185 }
186 }
187
188 //
189 // Kernel evaluation
190 //
191 // the static method k_function is for doing single kernel evaluation
192 // the constructor of Kernel prepares to calculate the l*l kernel matrix
193 // the member function get_Q is for getting one column from the Q Matrix
194 //
195 class QMatrix {
196 public:
197 virtual Qfloat *get_Q(int column, int len) const = 0;
198 virtual double *get_QD() const = 0;
199 virtual void swap_index(int i, int j) const = 0;
~QMatrix()200 virtual ~QMatrix() {}
201 };
202
203 class Kernel: public QMatrix {
204 public:
205 Kernel(int l, svm_node * const * x, const svm_parameter& param);
206 virtual ~Kernel();
207
208 static double k_function(const svm_node *x, const svm_node *y,
209 const svm_parameter& param);
210
211 virtual Qfloat *get_Q(int column, int len) const = 0;
212 virtual double *get_QD() const = 0;
swap_index(int i,int j) const213 virtual void swap_index(int i, int j) const // no so const...
214 {
215 swap(x[i],x[j]);
216 if(x_square) swap(x_square[i],x_square[j]);
217 }
218 protected:
219
220 double (Kernel::*kernel_function)(int i, int j) const;
221
222 private:
223 const svm_node **x;
224 double *x_square;
225
226 // svm_parameter
227 const int kernel_type;
228 const int degree;
229 const double gamma;
230 const double coef0;
231
232 static double dot(const svm_node *px, const svm_node *py);
kernel_linear(int i,int j) const233 double kernel_linear(int i, int j) const
234 {
235 return dot(x[i],x[j]);
236 }
kernel_poly(int i,int j) const237 double kernel_poly(int i, int j) const
238 {
239 return powi(gamma*dot(x[i],x[j])+coef0,degree);
240 }
kernel_rbf(int i,int j) const241 double kernel_rbf(int i, int j) const
242 {
243 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
244 }
kernel_sigmoid(int i,int j) const245 double kernel_sigmoid(int i, int j) const
246 {
247 return tanh(gamma*dot(x[i],x[j])+coef0);
248 }
kernel_precomputed(int i,int j) const249 double kernel_precomputed(int i, int j) const
250 {
251 return x[i][(int)(x[j][0].value)].value;
252 }
253 };
254
255
k_function(const svm_node * x,const svm_node * y,const svm_parameter & param)256 double k_function(const svm_node* x, const svm_node* y, const svm_parameter& param)
257 {
258 return Kernel::k_function(x, y, param);
259 }
260
Kernel(int l,svm_node * const * x_,const svm_parameter & param)261 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
262 :kernel_type(param.kernel_type), degree(param.degree),
263 gamma(param.gamma), coef0(param.coef0)
264 {
265 switch(kernel_type)
266 {
267 case LINEAR:
268 kernel_function = &Kernel::kernel_linear;
269 break;
270 case POLY:
271 kernel_function = &Kernel::kernel_poly;
272 break;
273 case RBF:
274 kernel_function = &Kernel::kernel_rbf;
275 break;
276 case SIGMOID:
277 kernel_function = &Kernel::kernel_sigmoid;
278 break;
279 case PRECOMPUTED:
280 kernel_function = &Kernel::kernel_precomputed;
281 break;
282 }
283
284 clone(x,x_,l);
285
286 if(kernel_type == RBF)
287 {
288 x_square = new double[l];
289 for(int i=0;i<l;i++)
290 x_square[i] = dot(x[i],x[i]);
291 }
292 else
293 x_square = 0;
294 }
295
~Kernel()296 Kernel::~Kernel()
297 {
298 delete[] x;
299 delete[] x_square;
300 }
301
dot(const svm_node * px,const svm_node * py)302 double Kernel::dot(const svm_node *px, const svm_node *py)
303 {
304 double sum = 0;
305 while(px->index != -1 && py->index != -1)
306 {
307 if(px->index == py->index)
308 {
309 sum += px->value * py->value;
310 ++px;
311 ++py;
312 }
313 else
314 {
315 if(px->index > py->index)
316 ++py;
317 else
318 ++px;
319 }
320 }
321 return sum;
322 }
323
k_function(const svm_node * x,const svm_node * y,const svm_parameter & param)324 double Kernel::k_function(const svm_node *x, const svm_node *y,
325 const svm_parameter& param)
326 {
327 switch(param.kernel_type)
328 {
329 case LINEAR:
330 return dot(x,y);
331 case POLY:
332 return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
333 case RBF:
334 {
335 double sum = 0;
336 while(x->index != -1 && y->index !=-1)
337 {
338 if(x->index == y->index)
339 {
340 double d = x->value - y->value;
341 sum += d*d;
342 ++x;
343 ++y;
344 }
345 else
346 {
347 if(x->index > y->index)
348 {
349 sum += y->value * y->value;
350 ++y;
351 }
352 else
353 {
354 sum += x->value * x->value;
355 ++x;
356 }
357 }
358 }
359
360 while(x->index != -1)
361 {
362 sum += x->value * x->value;
363 ++x;
364 }
365
366 while(y->index != -1)
367 {
368 sum += y->value * y->value;
369 ++y;
370 }
371
372 return exp(-param.gamma*sum);
373 }
374 case SIGMOID:
375 return tanh(param.gamma*dot(x,y)+param.coef0);
376 case PRECOMPUTED: //x: test (validation), y: SV
377 return x[(int)(y->value)].value;
378 default:
379 return 0; // Unreachable
380 }
381 }
382
383 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
384 // Solves:
385 //
386 // min 0.5(\alpha^T Q \alpha) + p^T \alpha
387 //
388 // y^T \alpha = \delta
389 // y_i = +1 or -1
390 // 0 <= alpha_i <= Cp for y_i = 1
391 // 0 <= alpha_i <= Cn for y_i = -1
392 //
393 // Given:
394 //
395 // Q, p, y, Cp, Cn, and an initial feasible point \alpha
396 // l is the size of vectors and matrices
397 // eps is the stopping tolerance
398 //
399 // solution will be put in \alpha, objective value will be put in obj
400 //
401 class Solver {
402 public:
Solver()403 Solver() {};
~Solver()404 virtual ~Solver() {};
405
406 struct SolutionInfo {
407 double obj;
408 double rho;
409 double *upper_bound;
410 double r; // for Solver_NU
411 };
412
413 void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
414 double *alpha_, const double* C_, double eps,
415 SolutionInfo* si, int shrinking);
416 protected:
417 int active_size;
418 schar *y;
419 double *G; // gradient of objective function
420 enum { LOWER_BOUND, UPPER_BOUND, FREE };
421 char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
422 double *alpha;
423 const QMatrix *Q;
424 const double *QD;
425 double eps;
426 double Cp,Cn;
427 double *C;
428 double *p;
429 int *active_set;
430 double *G_bar; // gradient, if we treat free variables as 0
431 int l;
432 bool unshrink; // XXX
433
get_C(int i)434 double get_C(int i)
435 {
436 return C[i];
437 }
update_alpha_status(int i)438 void update_alpha_status(int i)
439 {
440 if(alpha[i] >= get_C(i))
441 alpha_status[i] = UPPER_BOUND;
442 else if(alpha[i] <= 0)
443 alpha_status[i] = LOWER_BOUND;
444 else alpha_status[i] = FREE;
445 }
is_upper_bound(int i)446 bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
is_lower_bound(int i)447 bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
is_free(int i)448 bool is_free(int i) { return alpha_status[i] == FREE; }
449 void swap_index(int i, int j);
450 void reconstruct_gradient();
451 virtual int select_working_set(int &i, int &j);
452 virtual double calculate_rho();
453 virtual void do_shrinking();
454 private:
455 bool be_shrunk(int i, double Gmax1, double Gmax2);
456 };
457
swap_index(int i,int j)458 void Solver::swap_index(int i, int j)
459 {
460 Q->swap_index(i,j);
461 swap(y[i],y[j]);
462 swap(G[i],G[j]);
463 swap(alpha_status[i],alpha_status[j]);
464 swap(alpha[i],alpha[j]);
465 swap(p[i],p[j]);
466 swap(active_set[i],active_set[j]);
467 swap(G_bar[i],G_bar[j]);
468 swap(C[i],C[j]);
469 }
470
reconstruct_gradient()471 void Solver::reconstruct_gradient()
472 {
473 // reconstruct inactive elements of G from G_bar and free variables
474
475 if(active_size == l) return;
476
477 int i,j;
478 int nr_free = 0;
479
480 for(j=active_size;j<l;j++)
481 G[j] = G_bar[j] + p[j];
482
483 for(j=0;j<active_size;j++)
484 if(is_free(j))
485 nr_free++;
486
487 if(2*nr_free < active_size)
488 info("\nWARNING: using -h 0 may be faster\n");
489
490 if (nr_free*l > 2*active_size*(l-active_size))
491 {
492 for(i=active_size;i<l;i++)
493 {
494 const Qfloat *Q_i = Q->get_Q(i,active_size);
495 for(j=0;j<active_size;j++)
496 if(is_free(j))
497 G[i] += alpha[j] * Q_i[j];
498 }
499 }
500 else
501 {
502 for(i=0;i<active_size;i++)
503 if(is_free(i))
504 {
505 const Qfloat *Q_i = Q->get_Q(i,l);
506 double alpha_i = alpha[i];
507 for(j=active_size;j<l;j++)
508 G[j] += alpha_i * Q_i[j];
509 }
510 }
511 }
512
Solve(int l,const QMatrix & Q,const double * p_,const schar * y_,double * alpha_,const double * C_,double eps,SolutionInfo * si,int shrinking)513 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
514 double *alpha_, const double* C_, double eps,
515 SolutionInfo* si, int shrinking)
516 {
517 this->l = l;
518 this->Q = &Q;
519 QD=Q.get_QD();
520 clone(p, p_,l);
521 clone(y, y_,l);
522 clone(alpha,alpha_,l);
523 clone(C,C_,l);
524 this->eps = eps;
525 unshrink = false;
526
527 // initialize alpha_status
528 {
529 alpha_status = new char[l];
530 for(int i=0;i<l;i++)
531 update_alpha_status(i);
532 }
533
534 // initialize active set (for shrinking)
535 {
536 active_set = new int[l];
537 for(int i=0;i<l;i++)
538 active_set[i] = i;
539 active_size = l;
540 }
541
542 // initialize gradient
543 {
544 G = new double[l];
545 G_bar = new double[l];
546 int i;
547 for(i=0;i<l;i++)
548 {
549 G[i] = p[i];
550 G_bar[i] = 0;
551 }
552 for(i=0;i<l;i++)
553 if(!is_lower_bound(i))
554 {
555 const Qfloat *Q_i = Q.get_Q(i,l);
556 double alpha_i = alpha[i];
557 int j;
558 for(j=0;j<l;j++)
559 G[j] += alpha_i*Q_i[j];
560 if(is_upper_bound(i))
561 for(j=0;j<l;j++)
562 G_bar[j] += get_C(i) * Q_i[j];
563 }
564 }
565
566 // optimization step
567
568 int iter = 0;
569 int max_iter = max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
570 int counter = min(l,1000)+1;
571
572 while(iter < max_iter)
573 {
574 // show progress and do shrinking
575
576 if(--counter == 0)
577 {
578 counter = min(l,1000);
579 if(shrinking) do_shrinking();
580 info(".");
581 }
582
583 int i,j;
584 if(select_working_set(i,j)!=0)
585 {
586 // reconstruct the whole gradient
587 reconstruct_gradient();
588 // reset active set size and check
589 active_size = l;
590 info("*");
591 if(select_working_set(i,j)!=0)
592 break;
593 else
594 counter = 1; // do shrinking next iteration
595 }
596
597 ++iter;
598
599 // update alpha[i] and alpha[j], handle bounds carefully
600
601 const Qfloat *Q_i = Q.get_Q(i,active_size);
602 const Qfloat *Q_j = Q.get_Q(j,active_size);
603
604 double C_i = get_C(i);
605 double C_j = get_C(j);
606
607 double old_alpha_i = alpha[i];
608 double old_alpha_j = alpha[j];
609
610 if(y[i]!=y[j])
611 {
612 double quad_coef = QD[i]+QD[j]+2*Q_i[j];
613 if (quad_coef <= 0)
614 quad_coef = TAU;
615 double delta = (-G[i]-G[j])/quad_coef;
616 double diff = alpha[i] - alpha[j];
617 alpha[i] += delta;
618 alpha[j] += delta;
619
620 if(diff > 0)
621 {
622 if(alpha[j] < 0)
623 {
624 alpha[j] = 0;
625 alpha[i] = diff;
626 }
627 }
628 else
629 {
630 if(alpha[i] < 0)
631 {
632 alpha[i] = 0;
633 alpha[j] = -diff;
634 }
635 }
636 if(diff > C_i - C_j)
637 {
638 if(alpha[i] > C_i)
639 {
640 alpha[i] = C_i;
641 alpha[j] = C_i - diff;
642 }
643 }
644 else
645 {
646 if(alpha[j] > C_j)
647 {
648 alpha[j] = C_j;
649 alpha[i] = C_j + diff;
650 }
651 }
652 }
653 else
654 {
655 double quad_coef = QD[i]+QD[j]-2*Q_i[j];
656 if (quad_coef <= 0)
657 quad_coef = TAU;
658 double delta = (G[i]-G[j])/quad_coef;
659 double sum = alpha[i] + alpha[j];
660 alpha[i] -= delta;
661 alpha[j] += delta;
662
663 if(sum > C_i)
664 {
665 if(alpha[i] > C_i)
666 {
667 alpha[i] = C_i;
668 alpha[j] = sum - C_i;
669 }
670 }
671 else
672 {
673 if(alpha[j] < 0)
674 {
675 alpha[j] = 0;
676 alpha[i] = sum;
677 }
678 }
679 if(sum > C_j)
680 {
681 if(alpha[j] > C_j)
682 {
683 alpha[j] = C_j;
684 alpha[i] = sum - C_j;
685 }
686 }
687 else
688 {
689 if(alpha[i] < 0)
690 {
691 alpha[i] = 0;
692 alpha[j] = sum;
693 }
694 }
695 }
696
697 // update G
698
699 double delta_alpha_i = alpha[i] - old_alpha_i;
700 double delta_alpha_j = alpha[j] - old_alpha_j;
701
702 for(int k=0;k<active_size;k++)
703 {
704 G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
705 }
706
707 // update alpha_status and G_bar
708
709 {
710 bool ui = is_upper_bound(i);
711 bool uj = is_upper_bound(j);
712 update_alpha_status(i);
713 update_alpha_status(j);
714 int k;
715 if(ui != is_upper_bound(i))
716 {
717 Q_i = Q.get_Q(i,l);
718 if(ui)
719 for(k=0;k<l;k++)
720 G_bar[k] -= C_i * Q_i[k];
721 else
722 for(k=0;k<l;k++)
723 G_bar[k] += C_i * Q_i[k];
724 }
725
726 if(uj != is_upper_bound(j))
727 {
728 Q_j = Q.get_Q(j,l);
729 if(uj)
730 for(k=0;k<l;k++)
731 G_bar[k] -= C_j * Q_j[k];
732 else
733 for(k=0;k<l;k++)
734 G_bar[k] += C_j * Q_j[k];
735 }
736 }
737 }
738
739 if(iter >= max_iter)
740 {
741 if(active_size < l)
742 {
743 // reconstruct the whole gradient to calculate objective value
744 reconstruct_gradient();
745 active_size = l;
746 info("*");
747 }
748 fprintf(stderr,"\nWARNING: reaching max number of iterations\n");
749 }
750
751 // calculate rho
752
753 si->rho = calculate_rho();
754
755 // calculate objective value
756 {
757 double v = 0;
758 int i;
759 for(i=0;i<l;i++)
760 v += alpha[i] * (G[i] + p[i]);
761
762 si->obj = v/2;
763 }
764
765 // put back the solution
766 {
767 for(int i=0;i<l;i++)
768 alpha_[active_set[i]] = alpha[i];
769 }
770
771 // juggle everything back
772 /*{
773 for(int i=0;i<l;i++)
774 while(active_set[i] != i)
775 swap_index(i,active_set[i]);
776 // or Q.swap_index(i,active_set[i]);
777 }*/
778
779 for(int i=0;i<l;i++)
780 si->upper_bound[i] = C[i];
781
782 info("\noptimization finished, #iter = %d\n",iter);
783
784 delete[] p;
785 delete[] y;
786 delete[] C;
787 delete[] alpha;
788 delete[] alpha_status;
789 delete[] active_set;
790 delete[] G;
791 delete[] G_bar;
792 }
793
794 // return 1 if already optimal, return 0 otherwise
select_working_set(int & out_i,int & out_j)795 int Solver::select_working_set(int &out_i, int &out_j)
796 {
797 // return i,j such that
798 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
799 // j: minimizes the decrease of obj value
800 // (if quadratic coefficeint <= 0, replace it with tau)
801 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
802
803 double Gmax = -INF;
804 double Gmax2 = -INF;
805 int Gmax_idx = -1;
806 int Gmin_idx = -1;
807 double obj_diff_min = INF;
808
809 for(int t=0;t<active_size;t++)
810 if(y[t]==+1)
811 {
812 if(!is_upper_bound(t))
813 if(-G[t] >= Gmax)
814 {
815 Gmax = -G[t];
816 Gmax_idx = t;
817 }
818 }
819 else
820 {
821 if(!is_lower_bound(t))
822 if(G[t] >= Gmax)
823 {
824 Gmax = G[t];
825 Gmax_idx = t;
826 }
827 }
828
829 int i = Gmax_idx;
830 const Qfloat *Q_i = NULL;
831 if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
832 Q_i = Q->get_Q(i,active_size);
833
834 for(int j=0;j<active_size;j++)
835 {
836 if(y[j]==+1)
837 {
838 if (!is_lower_bound(j))
839 {
840 double grad_diff=Gmax+G[j];
841 if (G[j] >= Gmax2)
842 Gmax2 = G[j];
843 if (grad_diff > 0)
844 {
845 double obj_diff;
846 double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
847 if (quad_coef > 0)
848 obj_diff = -(grad_diff*grad_diff)/quad_coef;
849 else
850 obj_diff = -(grad_diff*grad_diff)/TAU;
851
852 if (obj_diff <= obj_diff_min)
853 {
854 Gmin_idx=j;
855 obj_diff_min = obj_diff;
856 }
857 }
858 }
859 }
860 else
861 {
862 if (!is_upper_bound(j))
863 {
864 double grad_diff= Gmax-G[j];
865 if (-G[j] >= Gmax2)
866 Gmax2 = -G[j];
867 if (grad_diff > 0)
868 {
869 double obj_diff;
870 double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
871 if (quad_coef > 0)
872 obj_diff = -(grad_diff*grad_diff)/quad_coef;
873 else
874 obj_diff = -(grad_diff*grad_diff)/TAU;
875
876 if (obj_diff <= obj_diff_min)
877 {
878 Gmin_idx=j;
879 obj_diff_min = obj_diff;
880 }
881 }
882 }
883 }
884 }
885
886 if(Gmax+Gmax2 < eps)
887 return 1;
888
889 out_i = Gmax_idx;
890 out_j = Gmin_idx;
891 return 0;
892 }
893
be_shrunk(int i,double Gmax1,double Gmax2)894 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
895 {
896 if(is_upper_bound(i))
897 {
898 if(y[i]==+1)
899 return(-G[i] > Gmax1);
900 else
901 return(-G[i] > Gmax2);
902 }
903 else if(is_lower_bound(i))
904 {
905 if(y[i]==+1)
906 return(G[i] > Gmax2);
907 else
908 return(G[i] > Gmax1);
909 }
910 else
911 return(false);
912 }
913
do_shrinking()914 void Solver::do_shrinking()
915 {
916 int i;
917 double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) }
918 double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) }
919
920 // find maximal violating pair first
921 for(i=0;i<active_size;i++)
922 {
923 if(y[i]==+1)
924 {
925 if(!is_upper_bound(i))
926 {
927 if(-G[i] >= Gmax1)
928 Gmax1 = -G[i];
929 }
930 if(!is_lower_bound(i))
931 {
932 if(G[i] >= Gmax2)
933 Gmax2 = G[i];
934 }
935 }
936 else
937 {
938 if(!is_upper_bound(i))
939 {
940 if(-G[i] >= Gmax2)
941 Gmax2 = -G[i];
942 }
943 if(!is_lower_bound(i))
944 {
945 if(G[i] >= Gmax1)
946 Gmax1 = G[i];
947 }
948 }
949 }
950
951 if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
952 {
953 unshrink = true;
954 reconstruct_gradient();
955 active_size = l;
956 info("*");
957 }
958
959 for(i=0;i<active_size;i++)
960 if (be_shrunk(i, Gmax1, Gmax2))
961 {
962 active_size--;
963 while (active_size > i)
964 {
965 if (!be_shrunk(active_size, Gmax1, Gmax2))
966 {
967 swap_index(i,active_size);
968 break;
969 }
970 active_size--;
971 }
972 }
973 }
974
calculate_rho()975 double Solver::calculate_rho()
976 {
977 double r;
978 int nr_free = 0;
979 double ub = INF, lb = -INF, sum_free = 0;
980 for(int i=0;i<active_size;i++)
981 {
982 double yG = y[i]*G[i];
983
984 if(is_upper_bound(i))
985 {
986 if(y[i]==-1)
987 ub = min(ub,yG);
988 else
989 lb = max(lb,yG);
990 }
991 else if(is_lower_bound(i))
992 {
993 if(y[i]==+1)
994 ub = min(ub,yG);
995 else
996 lb = max(lb,yG);
997 }
998 else
999 {
1000 ++nr_free;
1001 sum_free += yG;
1002 }
1003 }
1004
1005 if(nr_free>0)
1006 r = sum_free/nr_free;
1007 else
1008 r = (ub+lb)/2;
1009
1010 return r;
1011 }
1012
1013 //
1014 // Solver for nu-svm classification and regression
1015 //
1016 // additional constraint: e^T \alpha = constant
1017 //
1018 class Solver_NU : public Solver
1019 {
1020 public:
Solver_NU()1021 Solver_NU() {}
Solve(int l,const QMatrix & Q,const double * p,const schar * y,double * alpha,double * C_,double eps,SolutionInfo * si,int shrinking)1022 void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
1023 double *alpha, double* C_, double eps,
1024 SolutionInfo* si, int shrinking)
1025 {
1026 this->si = si;
1027 Solver::Solve(l,Q,p,y,alpha,C_,eps,si,shrinking);
1028 }
1029 private:
1030 SolutionInfo *si;
1031 int select_working_set(int &i, int &j);
1032 double calculate_rho();
1033 bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
1034 void do_shrinking();
1035 };
1036
1037 // return 1 if already optimal, return 0 otherwise
select_working_set(int & out_i,int & out_j)1038 int Solver_NU::select_working_set(int &out_i, int &out_j)
1039 {
1040 // return i,j such that y_i = y_j and
1041 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
1042 // j: minimizes the decrease of obj value
1043 // (if quadratic coefficeint <= 0, replace it with tau)
1044 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
1045
1046 double Gmaxp = -INF;
1047 double Gmaxp2 = -INF;
1048 int Gmaxp_idx = -1;
1049
1050 double Gmaxn = -INF;
1051 double Gmaxn2 = -INF;
1052 int Gmaxn_idx = -1;
1053
1054 int Gmin_idx = -1;
1055 double obj_diff_min = INF;
1056
1057 for(int t=0;t<active_size;t++)
1058 if(y[t]==+1)
1059 {
1060 if(!is_upper_bound(t))
1061 if(-G[t] >= Gmaxp)
1062 {
1063 Gmaxp = -G[t];
1064 Gmaxp_idx = t;
1065 }
1066 }
1067 else
1068 {
1069 if(!is_lower_bound(t))
1070 if(G[t] >= Gmaxn)
1071 {
1072 Gmaxn = G[t];
1073 Gmaxn_idx = t;
1074 }
1075 }
1076
1077 int ip = Gmaxp_idx;
1078 int in = Gmaxn_idx;
1079 const Qfloat *Q_ip = NULL;
1080 const Qfloat *Q_in = NULL;
1081 if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1082 Q_ip = Q->get_Q(ip,active_size);
1083 if(in != -1)
1084 Q_in = Q->get_Q(in,active_size);
1085
1086 for(int j=0;j<active_size;j++)
1087 {
1088 if(y[j]==+1)
1089 {
1090 if (!is_lower_bound(j))
1091 {
1092 double grad_diff=Gmaxp+G[j];
1093 if (G[j] >= Gmaxp2)
1094 Gmaxp2 = G[j];
1095 if (grad_diff > 0)
1096 {
1097 double obj_diff;
1098 double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
1099 if (quad_coef > 0)
1100 obj_diff = -(grad_diff*grad_diff)/quad_coef;
1101 else
1102 obj_diff = -(grad_diff*grad_diff)/TAU;
1103
1104 if (obj_diff <= obj_diff_min)
1105 {
1106 Gmin_idx=j;
1107 obj_diff_min = obj_diff;
1108 }
1109 }
1110 }
1111 }
1112 else
1113 {
1114 if (!is_upper_bound(j))
1115 {
1116 double grad_diff=Gmaxn-G[j];
1117 if (-G[j] >= Gmaxn2)
1118 Gmaxn2 = -G[j];
1119 if (grad_diff > 0)
1120 {
1121 double obj_diff;
1122 double quad_coef = QD[in]+QD[j]-2*Q_in[j];
1123 if (quad_coef > 0)
1124 obj_diff = -(grad_diff*grad_diff)/quad_coef;
1125 else
1126 obj_diff = -(grad_diff*grad_diff)/TAU;
1127
1128 if (obj_diff <= obj_diff_min)
1129 {
1130 Gmin_idx=j;
1131 obj_diff_min = obj_diff;
1132 }
1133 }
1134 }
1135 }
1136 }
1137
1138 if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
1139 return 1;
1140
1141 if (y[Gmin_idx] == +1)
1142 out_i = Gmaxp_idx;
1143 else
1144 out_i = Gmaxn_idx;
1145 out_j = Gmin_idx;
1146
1147 return 0;
1148 }
1149
be_shrunk(int i,double Gmax1,double Gmax2,double Gmax3,double Gmax4)1150 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
1151 {
1152 if(is_upper_bound(i))
1153 {
1154 if(y[i]==+1)
1155 return(-G[i] > Gmax1);
1156 else
1157 return(-G[i] > Gmax4);
1158 }
1159 else if(is_lower_bound(i))
1160 {
1161 if(y[i]==+1)
1162 return(G[i] > Gmax2);
1163 else
1164 return(G[i] > Gmax3);
1165 }
1166 else
1167 return(false);
1168 }
1169
do_shrinking()1170 void Solver_NU::do_shrinking()
1171 {
1172 double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
1173 double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
1174 double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
1175 double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
1176
1177 // find maximal violating pair first
1178 int i;
1179 for(i=0;i<active_size;i++)
1180 {
1181 if(!is_upper_bound(i))
1182 {
1183 if(y[i]==+1)
1184 {
1185 if(-G[i] > Gmax1) Gmax1 = -G[i];
1186 }
1187 else if(-G[i] > Gmax4) Gmax4 = -G[i];
1188 }
1189 if(!is_lower_bound(i))
1190 {
1191 if(y[i]==+1)
1192 {
1193 if(G[i] > Gmax2) Gmax2 = G[i];
1194 }
1195 else if(G[i] > Gmax3) Gmax3 = G[i];
1196 }
1197 }
1198
1199 if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
1200 {
1201 unshrink = true;
1202 reconstruct_gradient();
1203 active_size = l;
1204 }
1205
1206 for(i=0;i<active_size;i++)
1207 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1208 {
1209 active_size--;
1210 while (active_size > i)
1211 {
1212 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1213 {
1214 swap_index(i,active_size);
1215 break;
1216 }
1217 active_size--;
1218 }
1219 }
1220 }
1221
calculate_rho()1222 double Solver_NU::calculate_rho()
1223 {
1224 int nr_free1 = 0,nr_free2 = 0;
1225 double ub1 = INF, ub2 = INF;
1226 double lb1 = -INF, lb2 = -INF;
1227 double sum_free1 = 0, sum_free2 = 0;
1228
1229 for(int i=0;i<active_size;i++)
1230 {
1231 if(y[i]==+1)
1232 {
1233 if(is_upper_bound(i))
1234 lb1 = max(lb1,G[i]);
1235 else if(is_lower_bound(i))
1236 ub1 = min(ub1,G[i]);
1237 else
1238 {
1239 ++nr_free1;
1240 sum_free1 += G[i];
1241 }
1242 }
1243 else
1244 {
1245 if(is_upper_bound(i))
1246 lb2 = max(lb2,G[i]);
1247 else if(is_lower_bound(i))
1248 ub2 = min(ub2,G[i]);
1249 else
1250 {
1251 ++nr_free2;
1252 sum_free2 += G[i];
1253 }
1254 }
1255 }
1256
1257 double r1,r2;
1258 if(nr_free1 > 0)
1259 r1 = sum_free1/nr_free1;
1260 else
1261 r1 = (ub1+lb1)/2;
1262
1263 if(nr_free2 > 0)
1264 r2 = sum_free2/nr_free2;
1265 else
1266 r2 = (ub2+lb2)/2;
1267
1268 si->r = (r1+r2)/2;
1269 return (r1-r2)/2;
1270 }
1271
1272 //
1273 // Q matrices for various formulations
1274 //
1275 class SVC_Q: public Kernel
1276 {
1277 public:
SVC_Q(const svm_problem & prob,const svm_parameter & param,const schar * y_)1278 SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
1279 :Kernel(prob.l, prob.x, param)
1280 {
1281 clone(y,y_,prob.l);
1282 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1283 QD = new double[prob.l];
1284 for(int i=0;i<prob.l;i++)
1285 QD[i] = (this->*kernel_function)(i,i);
1286 }
1287
get_Q(int i,int len) const1288 Qfloat *get_Q(int i, int len) const
1289 {
1290 Qfloat *data;
1291 int start, j;
1292 if((start = cache->get_data(i,&data,len)) < len)
1293 {
1294 for(j=start;j<len;j++)
1295 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
1296 }
1297 return data;
1298 }
1299
get_QD() const1300 double *get_QD() const
1301 {
1302 return QD;
1303 }
1304
swap_index(int i,int j) const1305 void swap_index(int i, int j) const
1306 {
1307 cache->swap_index(i,j);
1308 Kernel::swap_index(i,j);
1309 swap(y[i],y[j]);
1310 swap(QD[i],QD[j]);
1311 }
1312
~SVC_Q()1313 ~SVC_Q()
1314 {
1315 delete[] y;
1316 delete cache;
1317 delete[] QD;
1318 }
1319 private:
1320 schar *y;
1321 Cache *cache;
1322 double *QD;
1323 };
1324
1325 class ONE_CLASS_Q: public Kernel
1326 {
1327 public:
ONE_CLASS_Q(const svm_problem & prob,const svm_parameter & param)1328 ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
1329 :Kernel(prob.l, prob.x, param)
1330 {
1331 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1332 QD = new double[prob.l];
1333 for(int i=0;i<prob.l;i++)
1334 QD[i] = (this->*kernel_function)(i,i);
1335 }
1336
get_Q(int i,int len) const1337 Qfloat *get_Q(int i, int len) const
1338 {
1339 Qfloat *data;
1340 int start, j;
1341 if((start = cache->get_data(i,&data,len)) < len)
1342 {
1343 for(j=start;j<len;j++)
1344 data[j] = (Qfloat)(this->*kernel_function)(i,j);
1345 }
1346 return data;
1347 }
1348
get_QD() const1349 double *get_QD() const
1350 {
1351 return QD;
1352 }
1353
swap_index(int i,int j) const1354 void swap_index(int i, int j) const
1355 {
1356 cache->swap_index(i,j);
1357 Kernel::swap_index(i,j);
1358 swap(QD[i],QD[j]);
1359 }
1360
~ONE_CLASS_Q()1361 ~ONE_CLASS_Q()
1362 {
1363 delete cache;
1364 delete[] QD;
1365 }
1366 private:
1367 Cache *cache;
1368 double *QD;
1369 };
1370
1371 class SVR_Q: public Kernel
1372 {
1373 public:
SVR_Q(const svm_problem & prob,const svm_parameter & param)1374 SVR_Q(const svm_problem& prob, const svm_parameter& param)
1375 :Kernel(prob.l, prob.x, param)
1376 {
1377 l = prob.l;
1378 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
1379 QD = new double[2*l];
1380 sign = new schar[2*l];
1381 index = new int[2*l];
1382 for(int k=0;k<l;k++)
1383 {
1384 sign[k] = 1;
1385 sign[k+l] = -1;
1386 index[k] = k;
1387 index[k+l] = k;
1388 QD[k] = (this->*kernel_function)(k,k);
1389 QD[k+l] = QD[k];
1390 }
1391 buffer[0] = new Qfloat[2*l];
1392 buffer[1] = new Qfloat[2*l];
1393 next_buffer = 0;
1394 }
1395
swap_index(int i,int j) const1396 void swap_index(int i, int j) const
1397 {
1398 swap(sign[i],sign[j]);
1399 swap(index[i],index[j]);
1400 swap(QD[i],QD[j]);
1401 }
1402
get_Q(int i,int len) const1403 Qfloat *get_Q(int i, int len) const
1404 {
1405 Qfloat *data;
1406 int j, real_i = index[i];
1407 if(cache->get_data(real_i,&data,l) < l)
1408 {
1409 for(j=0;j<l;j++)
1410 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
1411 }
1412
1413 // reorder and copy
1414 Qfloat *buf = buffer[next_buffer];
1415 next_buffer = 1 - next_buffer;
1416 schar si = sign[i];
1417 for(j=0;j<len;j++)
1418 buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
1419 return buf;
1420 }
1421
get_QD() const1422 double *get_QD() const
1423 {
1424 return QD;
1425 }
1426
~SVR_Q()1427 ~SVR_Q()
1428 {
1429 delete cache;
1430 delete[] sign;
1431 delete[] index;
1432 delete[] buffer[0];
1433 delete[] buffer[1];
1434 delete[] QD;
1435 }
1436 private:
1437 int l;
1438 Cache *cache;
1439 schar *sign;
1440 int *index;
1441 mutable int next_buffer;
1442 Qfloat *buffer[2];
1443 double *QD;
1444 };
1445 #include <iostream>
1446 //
1447 // construct and solve various formulations
1448 //
solve_c_svc(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si,double Cp,double Cn)1449 static void solve_c_svc(
1450 const svm_problem *prob, const svm_parameter* param,
1451 double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
1452 {
1453 int l = prob->l;
1454 double *minus_ones = new double[l];
1455 schar *y = new schar[l];
1456 double *C = new double[l];
1457
1458 int i;
1459
1460 for(i=0;i<l;i++)
1461 {
1462 alpha[i] = 0;
1463 minus_ones[i] = -1;
1464 if(prob->y[i] > 0)
1465 {
1466 y[i] = +1;
1467 C[i] = prob->W[i]*Cp;
1468 }
1469 else
1470 {
1471 y[i] = -1;
1472 C[i] = prob->W[i]*Cn;
1473 }
1474 //std::cout << C[i] << " ";
1475 }
1476 //std::cout << std::endl;
1477
1478 Solver s;
1479 s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
1480 alpha, C, param->eps, si, param->shrinking);
1481
1482 /*
1483 double sum_alpha=0;
1484 for(i=0;i<l;i++)
1485 sum_alpha += alpha[i];
1486 if (Cp==Cn)
1487 info("nu = %f\n", sum_alpha/(Cp*prob->l));
1488 */
1489
1490 for(i=0;i<l;i++)
1491 alpha[i] *= y[i];
1492
1493 delete[] C;
1494 delete[] minus_ones;
1495 delete[] y;
1496 }
1497
solve_nu_svc(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si)1498 static void solve_nu_svc(
1499 const svm_problem *prob, const svm_parameter *param,
1500 double *alpha, Solver::SolutionInfo* si)
1501 {
1502 int i;
1503 int l = prob->l;
1504 double nu = param->nu;
1505
1506 schar *y = new schar[l];
1507 double *C = new double[l];
1508
1509 for(i=0;i<l;i++)
1510 {
1511 if(prob->y[i]>0)
1512 y[i] = +1;
1513 else
1514 y[i] = -1;
1515 C[i] = prob->W[i];
1516 }
1517
1518 double nu_l = 0;
1519 for(i=0;i<l;i++) nu_l += nu*C[i];
1520 double sum_pos = nu_l/2;
1521 double sum_neg = nu_l/2;
1522
1523 for(i=0;i<l;i++)
1524 if(y[i] == +1)
1525 {
1526 alpha[i] = min(C[i],sum_pos);
1527 sum_pos -= alpha[i];
1528 }
1529 else
1530 {
1531 alpha[i] = min(C[i],sum_neg);
1532 sum_neg -= alpha[i];
1533 }
1534
1535 double *zeros = new double[l];
1536
1537 for(i=0;i<l;i++)
1538 zeros[i] = 0;
1539
1540 Solver_NU s;
1541 s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
1542 alpha, C, param->eps, si, param->shrinking);
1543 double r = si->r;
1544
1545 info("C = %f\n",1/r);
1546
1547 for(i=0;i<l;i++)
1548 {
1549 alpha[i] *= y[i]/r;
1550 si->upper_bound[i] /= r;
1551 }
1552
1553 si->rho /= r;
1554 si->obj /= (r*r);
1555
1556 delete[] C;
1557 delete[] y;
1558 delete[] zeros;
1559 }
1560
solve_one_class(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si)1561 static void solve_one_class(
1562 const svm_problem *prob, const svm_parameter *param,
1563 double *alpha, Solver::SolutionInfo* si)
1564 {
1565 int l = prob->l;
1566 double *zeros = new double[l];
1567 schar *ones = new schar[l];
1568 double *C = new double[l];
1569 int i;
1570
1571 double nu_l = 0;
1572
1573 for(i=0;i<l;i++)
1574 {
1575 C[i] = prob->W[i];
1576 nu_l += C[i] * param->nu;
1577 }
1578
1579 i = 0;
1580 while(nu_l > 0)
1581 {
1582 alpha[i] = min(C[i],nu_l);
1583 nu_l -= alpha[i];
1584 ++i;
1585 }
1586 for(;i<l;i++)
1587 alpha[i] = 0;
1588
1589 for(i=0;i<l;i++)
1590 {
1591 zeros[i] = 0;
1592 ones[i] = 1;
1593 }
1594
1595 Solver s;
1596 s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
1597 alpha, C, param->eps, si, param->shrinking);
1598
1599 delete[] C;
1600 delete[] zeros;
1601 delete[] ones;
1602 }
1603
solve_epsilon_svr(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si)1604 static void solve_epsilon_svr(
1605 const svm_problem *prob, const svm_parameter *param,
1606 double *alpha, Solver::SolutionInfo* si)
1607 {
1608 int l = prob->l;
1609 double *alpha2 = new double[2*l];
1610 double *linear_term = new double[2*l];
1611 double *C = new double[2*l];
1612 schar *y = new schar[2*l];
1613 int i;
1614
1615 for(i=0;i<l;i++)
1616 {
1617 alpha2[i] = 0;
1618 linear_term[i] = param->p - prob->y[i];
1619 y[i] = 1;
1620 C[i] = prob->W[i]*param->C;
1621
1622 alpha2[i+l] = 0;
1623 linear_term[i+l] = param->p + prob->y[i];
1624 y[i+l] = -1;
1625 C[i+l] = prob->W[i]*param->C;
1626 }
1627
1628 Solver s;
1629 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1630 alpha2, C, param->eps, si, param->shrinking);
1631 double sum_alpha = 0;
1632 for(i=0;i<l;i++)
1633 {
1634 alpha[i] = alpha2[i] - alpha2[i+l];
1635 sum_alpha += fabs(alpha[i]);
1636 }
1637 //info("nu = %f\n",sum_alpha/(param->C*l));
1638 delete[] alpha2;
1639 delete[] linear_term;
1640 delete[] C;
1641 delete[] y;
1642 }
1643
solve_nu_svr(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si)1644 static void solve_nu_svr(
1645 const svm_problem *prob, const svm_parameter *param,
1646 double *alpha, Solver::SolutionInfo* si)
1647 {
1648 int l = prob->l;
1649 double *C = new double[2*l];
1650 double *alpha2 = new double[2*l];
1651 double *linear_term = new double[2*l];
1652 schar *y = new schar[2*l];
1653 int i;
1654
1655 double sum = 0;
1656 for(i=0;i<l;i++)
1657 {
1658 C[i] = C[i+l] = prob->W[i]*param->C;
1659 sum += C[i] * param->nu;
1660 }
1661 sum /= 2;
1662
1663 for(i=0;i<l;i++)
1664 {
1665 alpha2[i] = alpha2[i+l] = min(sum,C[i]);
1666 sum -= alpha2[i];
1667
1668 linear_term[i] = - prob->y[i];
1669 y[i] = 1;
1670
1671 linear_term[i+l] = prob->y[i];
1672 y[i+l] = -1;
1673 }
1674
1675 Solver_NU s;
1676 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1677 alpha2, C, param->eps, si, param->shrinking);
1678
1679 info("epsilon = %f\n",-si->r);
1680
1681 for(i=0;i<l;i++)
1682 alpha[i] = alpha2[i] - alpha2[i+l];
1683
1684 delete[] alpha2;
1685 delete[] linear_term;
1686 delete[] C;
1687 delete[] y;
1688 }
1689
1690 //
1691 // decision_function
1692 //
1693 struct decision_function
1694 {
1695 double *alpha;
1696 double rho;
1697 };
1698
svm_train_one(const svm_problem * prob,const svm_parameter * param,double Cp,double Cn)1699 static decision_function svm_train_one(
1700 const svm_problem *prob, const svm_parameter *param,
1701 double Cp, double Cn)
1702 {
1703 double *alpha = Malloc(double,prob->l);
1704 Solver::SolutionInfo si;
1705 switch(param->svm_type)
1706 {
1707 case C_SVC:
1708 si.upper_bound = Malloc(double,prob->l);
1709 solve_c_svc(prob,param,alpha,&si,Cp,Cn);
1710 break;
1711 case NU_SVC:
1712 si.upper_bound = Malloc(double,prob->l);
1713 solve_nu_svc(prob,param,alpha,&si);
1714 break;
1715 case ONE_CLASS:
1716 si.upper_bound = Malloc(double,prob->l);
1717 solve_one_class(prob,param,alpha,&si);
1718 break;
1719 case EPSILON_SVR:
1720 si.upper_bound = Malloc(double,2*prob->l);
1721 solve_epsilon_svr(prob,param,alpha,&si);
1722 break;
1723 case NU_SVR:
1724 si.upper_bound = Malloc(double,2*prob->l);
1725 solve_nu_svr(prob,param,alpha,&si);
1726 break;
1727 }
1728
1729 info("obj = %f, rho = %f\n",si.obj,si.rho);
1730
1731 // output SVs
1732
1733 int nSV = 0;
1734 int nBSV = 0;
1735 for(int i=0;i<prob->l;i++)
1736 {
1737 if(fabs(alpha[i]) > 0)
1738 {
1739 ++nSV;
1740 if(prob->y[i] > 0)
1741 {
1742 if(fabs(alpha[i]) >= si.upper_bound[i])
1743 ++nBSV;
1744 }
1745 else
1746 {
1747 if(fabs(alpha[i]) >= si.upper_bound[i])
1748 ++nBSV;
1749 }
1750 }
1751 }
1752
1753 free(si.upper_bound);
1754
1755 info("nSV = %d, nBSV = %d\n",nSV,nBSV);
1756
1757 decision_function f;
1758 f.alpha = alpha;
1759 f.rho = si.rho;
1760 return f;
1761 }
1762
1763 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
sigmoid_train(int l,const double * dec_values,const double * labels,double & A,double & B)1764 static void sigmoid_train(
1765 int l, const double *dec_values, const double *labels,
1766 double& A, double& B)
1767 {
1768 double prior1=0, prior0 = 0;
1769 int i;
1770
1771 for (i=0;i<l;i++)
1772 if (labels[i] > 0) prior1+=1;
1773 else prior0+=1;
1774
1775 int max_iter=100; // Maximal number of iterations
1776 double min_step=1e-10; // Minimal step taken in line search
1777 double sigma=1e-12; // For numerically strict PD of Hessian
1778 double eps=1e-5;
1779 double hiTarget=(prior1+1.0)/(prior1+2.0);
1780 double loTarget=1/(prior0+2.0);
1781 double *t=Malloc(double,l);
1782 double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
1783 double newA,newB,newf,d1,d2;
1784 int iter;
1785
1786 // Initial Point and Initial Fun Value
1787 A=0.0; B=log((prior0+1.0)/(prior1+1.0));
1788 double fval = 0.0;
1789
1790 for (i=0;i<l;i++)
1791 {
1792 if (labels[i]>0) t[i]=hiTarget;
1793 else t[i]=loTarget;
1794 fApB = dec_values[i]*A+B;
1795 if (fApB>=0)
1796 fval += t[i]*fApB + log(1+exp(-fApB));
1797 else
1798 fval += (t[i] - 1)*fApB +log(1+exp(fApB));
1799 }
1800 for (iter=0;iter<max_iter;iter++)
1801 {
1802 // Update Gradient and Hessian (use H' = H + sigma I)
1803 h11=sigma; // numerically ensures strict PD
1804 h22=sigma;
1805 h21=0.0;g1=0.0;g2=0.0;
1806 for (i=0;i<l;i++)
1807 {
1808 fApB = dec_values[i]*A+B;
1809 if (fApB >= 0)
1810 {
1811 p=exp(-fApB)/(1.0+exp(-fApB));
1812 q=1.0/(1.0+exp(-fApB));
1813 }
1814 else
1815 {
1816 p=1.0/(1.0+exp(fApB));
1817 q=exp(fApB)/(1.0+exp(fApB));
1818 }
1819 d2=p*q;
1820 h11+=dec_values[i]*dec_values[i]*d2;
1821 h22+=d2;
1822 h21+=dec_values[i]*d2;
1823 d1=t[i]-p;
1824 g1+=dec_values[i]*d1;
1825 g2+=d1;
1826 }
1827
1828 // Stopping Criteria
1829 if (fabs(g1)<eps && fabs(g2)<eps)
1830 break;
1831
1832 // Finding Newton direction: -inv(H') * g
1833 det=h11*h22-h21*h21;
1834 dA=-(h22*g1 - h21 * g2) / det;
1835 dB=-(-h21*g1+ h11 * g2) / det;
1836 gd=g1*dA+g2*dB;
1837
1838
1839 stepsize = 1; // Line Search
1840 while (stepsize >= min_step)
1841 {
1842 newA = A + stepsize * dA;
1843 newB = B + stepsize * dB;
1844
1845 // New function value
1846 newf = 0.0;
1847 for (i=0;i<l;i++)
1848 {
1849 fApB = dec_values[i]*newA+newB;
1850 if (fApB >= 0)
1851 newf += t[i]*fApB + log(1+exp(-fApB));
1852 else
1853 newf += (t[i] - 1)*fApB +log(1+exp(fApB));
1854 }
1855 // Check sufficient decrease
1856 if (newf<fval+0.0001*stepsize*gd)
1857 {
1858 A=newA;B=newB;fval=newf;
1859 break;
1860 }
1861 else
1862 stepsize = stepsize / 2.0;
1863 }
1864
1865 if (stepsize < min_step)
1866 {
1867 info("Line search fails in two-class probability estimates\n");
1868 break;
1869 }
1870 }
1871
1872 if (iter>=max_iter)
1873 info("Reaching maximal iterations in two-class probability estimates\n");
1874 free(t);
1875 }
1876
sigmoid_predict(double decision_value,double A,double B)1877 static double sigmoid_predict(double decision_value, double A, double B)
1878 {
1879 double fApB = decision_value*A+B;
1880 // 1-p used later; avoid catastrophic cancellation
1881 if (fApB >= 0)
1882 return exp(-fApB)/(1.0+exp(-fApB));
1883 else
1884 return 1.0/(1+exp(fApB)) ;
1885 }
1886
1887 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
multiclass_probability(int k,double ** r,double * p)1888 static void multiclass_probability(int k, double **r, double *p)
1889 {
1890 int t,j;
1891 int iter = 0, max_iter=max(100,k);
1892 double **Q=Malloc(double *,k);
1893 double *Qp=Malloc(double,k);
1894 double pQp, eps=0.005/k;
1895
1896 for (t=0;t<k;t++)
1897 {
1898 p[t]=1.0/k; // Valid if k = 1
1899 Q[t]=Malloc(double,k);
1900 Q[t][t]=0;
1901 for (j=0;j<t;j++)
1902 {
1903 Q[t][t]+=r[j][t]*r[j][t];
1904 Q[t][j]=Q[j][t];
1905 }
1906 for (j=t+1;j<k;j++)
1907 {
1908 Q[t][t]+=r[j][t]*r[j][t];
1909 Q[t][j]=-r[j][t]*r[t][j];
1910 }
1911 }
1912 for (iter=0;iter<max_iter;iter++)
1913 {
1914 // stopping condition, recalculate QP,pQP for numerical accuracy
1915 pQp=0;
1916 for (t=0;t<k;t++)
1917 {
1918 Qp[t]=0;
1919 for (j=0;j<k;j++)
1920 Qp[t]+=Q[t][j]*p[j];
1921 pQp+=p[t]*Qp[t];
1922 }
1923 double max_error=0;
1924 for (t=0;t<k;t++)
1925 {
1926 double error=fabs(Qp[t]-pQp);
1927 if (error>max_error)
1928 max_error=error;
1929 }
1930 if (max_error<eps) break;
1931
1932 for (t=0;t<k;t++)
1933 {
1934 double diff=(-Qp[t]+pQp)/Q[t][t];
1935 p[t]+=diff;
1936 pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
1937 for (j=0;j<k;j++)
1938 {
1939 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
1940 p[j]/=(1+diff);
1941 }
1942 }
1943 }
1944 if (iter>=max_iter)
1945 info("Exceeds max_iter in multiclass_prob\n");
1946 for(t=0;t<k;t++) free(Q[t]);
1947 free(Q);
1948 free(Qp);
1949 }
1950
1951 // Cross-validation decision values for probability estimates
svm_binary_svc_probability(const svm_problem * prob,const svm_parameter * param,double Cp,double Cn,double & probA,double & probB)1952 static void svm_binary_svc_probability(
1953 const svm_problem *prob, const svm_parameter *param,
1954 double Cp, double Cn, double& probA, double& probB)
1955 {
1956 int i;
1957 int nr_fold = 5;
1958 int *perm = Malloc(int,prob->l);
1959 double *dec_values = Malloc(double,prob->l);
1960
1961 // random shuffle
1962 for(i=0;i<prob->l;i++) perm[i]=i;
1963 for(i=0;i<prob->l;i++)
1964 {
1965 int j = i+rand()%(prob->l-i);
1966 swap(perm[i],perm[j]);
1967 }
1968 for(i=0;i<nr_fold;i++)
1969 {
1970 int begin = i*prob->l/nr_fold;
1971 int end = (i+1)*prob->l/nr_fold;
1972 int j,k;
1973 struct svm_problem subprob;
1974
1975 subprob.l = prob->l-(end-begin);
1976 subprob.x = Malloc(struct svm_node*,subprob.l);
1977 subprob.y = Malloc(double,subprob.l);
1978 subprob.W = Malloc(double,subprob.l);
1979
1980 k=0;
1981 for(j=0;j<begin;j++)
1982 {
1983 subprob.x[k] = prob->x[perm[j]];
1984 subprob.y[k] = prob->y[perm[j]];
1985 subprob.W[k] = prob->W[perm[j]];
1986 ++k;
1987 }
1988 for(j=end;j<prob->l;j++)
1989 {
1990 subprob.x[k] = prob->x[perm[j]];
1991 subprob.y[k] = prob->y[perm[j]];
1992 subprob.W[k] = prob->W[perm[j]];
1993 ++k;
1994 }
1995 int p_count=0,n_count=0;
1996 for(j=0;j<k;j++)
1997 if(subprob.y[j]>0)
1998 p_count++;
1999 else
2000 n_count++;
2001
2002 if(p_count==0 && n_count==0)
2003 for(j=begin;j<end;j++)
2004 dec_values[perm[j]] = 0;
2005 else if(p_count > 0 && n_count == 0)
2006 for(j=begin;j<end;j++)
2007 dec_values[perm[j]] = 1;
2008 else if(p_count == 0 && n_count > 0)
2009 for(j=begin;j<end;j++)
2010 dec_values[perm[j]] = -1;
2011 else
2012 {
2013 svm_parameter subparam = *param;
2014 subparam.probability=0;
2015 subparam.C=1.0;
2016 subparam.nr_weight=2;
2017 subparam.weight_label = Malloc(int,2);
2018 subparam.weight = Malloc(double,2);
2019 subparam.weight_label[0]=+1;
2020 subparam.weight_label[1]=-1;
2021 subparam.weight[0]=Cp;
2022 subparam.weight[1]=Cn;
2023 struct svm_model *submodel = svm_train(&subprob,&subparam);
2024 for(j=begin;j<end;j++)
2025 {
2026 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
2027 // ensure +1 -1 order; reason not using CV subroutine
2028 dec_values[perm[j]] *= submodel->label[0];
2029 }
2030 svm_free_and_destroy_model(&submodel);
2031 svm_destroy_param(&subparam);
2032 }
2033 free(subprob.x);
2034 free(subprob.y);
2035 free(subprob.W);
2036 }
2037 sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
2038 free(dec_values);
2039 free(perm);
2040 }
2041
2042 // Return parameter of a Laplace distribution
svm_svr_probability(const svm_problem * prob,const svm_parameter * param)2043 static double svm_svr_probability(
2044 const svm_problem *prob, const svm_parameter *param)
2045 {
2046 int i;
2047 int nr_fold = 5;
2048 double *ymv = Malloc(double,prob->l);
2049 double mae = 0;
2050
2051 svm_parameter newparam = *param;
2052 newparam.probability = 0;
2053 svm_cross_validation(prob,&newparam,nr_fold,ymv);
2054 for(i=0;i<prob->l;i++)
2055 {
2056 ymv[i]=prob->y[i]-ymv[i];
2057 mae += fabs(ymv[i]);
2058 }
2059 mae /= prob->l;
2060 double std=sqrt(2*mae*mae);
2061 int count=0;
2062 mae=0;
2063 for(i=0;i<prob->l;i++)
2064 if (fabs(ymv[i]) > 5*std)
2065 count=count+1;
2066 else
2067 mae+=fabs(ymv[i]);
2068 mae /= (prob->l-count);
2069 info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
2070 free(ymv);
2071 return mae;
2072 }
2073
2074
2075 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2076 // perm, length l, must be allocated before calling this subroutine
svm_group_classes(const svm_problem * prob,int * nr_class_ret,int ** label_ret,int ** start_ret,int ** count_ret,int * perm)2077 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2078 {
2079 int l = prob->l;
2080 int max_nr_class = 16;
2081 int nr_class = 0;
2082 int *label = Malloc(int,max_nr_class);
2083 int *count = Malloc(int,max_nr_class);
2084 int *data_label = Malloc(int,l);
2085 int i;
2086
2087 for(i=0;i<l;i++)
2088 {
2089 int this_label = (int)prob->y[i];
2090 int j;
2091 for(j=0;j<nr_class;j++)
2092 {
2093 if(this_label == label[j])
2094 {
2095 ++count[j];
2096 break;
2097 }
2098 }
2099 data_label[i] = j;
2100 if(j == nr_class)
2101 {
2102 if(nr_class == max_nr_class)
2103 {
2104 max_nr_class *= 2;
2105 label = (int *)realloc(label,max_nr_class*sizeof(int));
2106 count = (int *)realloc(count,max_nr_class*sizeof(int));
2107 }
2108 label[nr_class] = this_label;
2109 count[nr_class] = 1;
2110 ++nr_class;
2111 }
2112 }
2113
2114 int *start = Malloc(int,nr_class);
2115 start[0] = 0;
2116 for(i=1;i<nr_class;i++)
2117 start[i] = start[i-1]+count[i-1];
2118 for(i=0;i<l;i++)
2119 {
2120 perm[start[data_label[i]]] = i;
2121 ++start[data_label[i]];
2122 }
2123 start[0] = 0;
2124 for(i=1;i<nr_class;i++)
2125 start[i] = start[i-1]+count[i-1];
2126
2127 *nr_class_ret = nr_class;
2128 *label_ret = label;
2129 *start_ret = start;
2130 *count_ret = count;
2131 free(data_label);
2132 }
2133
2134 //
2135 // Remove zero weighed data as libsvm and some liblinear solvers require C > 0.
2136 //
remove_zero_weight(svm_problem * newprob,const svm_problem * prob)2137 static void remove_zero_weight(svm_problem *newprob, const svm_problem *prob)
2138 {
2139 int i;
2140 int l = 0;
2141 for(i=0;i<prob->l;i++)
2142 if(prob->W[i] > 0) l++;
2143 *newprob = *prob;
2144 newprob->l = l;
2145 newprob->x = Malloc(svm_node*,l);
2146 newprob->y = Malloc(double,l);
2147 newprob->W = Malloc(double,l);
2148
2149 int j = 0;
2150 for(i=0;i<prob->l;i++)
2151 if(prob->W[i] > 0)
2152 {
2153 newprob->x[j] = prob->x[i];
2154 newprob->y[j] = prob->y[i];
2155 newprob->W[j] = prob->W[i];
2156 j++;
2157 }
2158 }
2159
2160 //
2161 // Interface functions
2162 //
svm_train(const svm_problem * prob,const svm_parameter * param)2163 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2164 {
2165 svm_problem newprob;
2166 remove_zero_weight(&newprob, prob);
2167 prob = &newprob;
2168
2169 svm_model *model = Malloc(svm_model,1);
2170 model->param = *param;
2171 model->free_sv = 0; // XXX
2172
2173 if(param->svm_type == ONE_CLASS ||
2174 param->svm_type == EPSILON_SVR ||
2175 param->svm_type == NU_SVR)
2176 {
2177 // regression or one-class-svm
2178 model->nr_class = 2;
2179 model->label = NULL;
2180 model->nSV = NULL;
2181 model->probA = NULL; model->probB = NULL;
2182 model->sv_coef = Malloc(double *,1);
2183
2184 if(param->probability &&
2185 (param->svm_type == EPSILON_SVR ||
2186 param->svm_type == NU_SVR))
2187 {
2188 model->probA = Malloc(double,1);
2189 model->probA[0] = svm_svr_probability(prob,param);
2190 }
2191
2192 decision_function f = svm_train_one(prob,param,0,0);
2193 model->rho = Malloc(double,1);
2194 model->rho[0] = f.rho;
2195
2196 int nSV = 0;
2197 int i;
2198 for(i=0;i<prob->l;i++)
2199 if(fabs(f.alpha[i]) > 0) ++nSV;
2200 model->l = nSV;
2201 model->SV = Malloc(svm_node *,nSV);
2202 model->sv_coef[0] = Malloc(double,nSV);
2203 model->sv_indices = Malloc(int,nSV);
2204 int j = 0;
2205 for(i=0;i<prob->l;i++)
2206 if(fabs(f.alpha[i]) > 0)
2207 {
2208 model->SV[j] = prob->x[i];
2209 model->sv_coef[0][j] = f.alpha[i];
2210 model->sv_indices[j] = i+1;
2211 ++j;
2212 }
2213
2214 free(f.alpha);
2215 }
2216 else
2217 {
2218 // classification
2219 int l = prob->l;
2220 int nr_class;
2221 int *label = NULL;
2222 int *start = NULL;
2223 int *count = NULL;
2224 int *perm = Malloc(int,l);
2225
2226 // group training data of the same class
2227 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2228 if(nr_class == 1)
2229 info("WARNING: training data in only one class. See README for details.\n");
2230
2231 svm_node **x = Malloc(svm_node *,l);
2232 double *W;
2233 W = Malloc(double,l);
2234
2235 int i;
2236 for(i=0;i<l;i++)
2237 {
2238 x[i] = prob->x[perm[i]];
2239 W[i] = prob->W[perm[i]];
2240 }
2241
2242 // calculate weighted C
2243
2244 double *weighted_C = Malloc(double, nr_class);
2245 for(i=0;i<nr_class;i++)
2246 weighted_C[i] = param->C;
2247 for(i=0;i<param->nr_weight;i++)
2248 {
2249 int j;
2250 for(j=0;j<nr_class;j++)
2251 if(param->weight_label[i] == label[j])
2252 break;
2253 if(j == nr_class)
2254 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2255 else
2256 weighted_C[j] *= param->weight[i];
2257 }
2258
2259 // train k*(k-1)/2 models
2260
2261 bool *nonzero = Malloc(bool,l);
2262 for(i=0;i<l;i++)
2263 nonzero[i] = false;
2264 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
2265
2266 double *probA=NULL,*probB=NULL;
2267 if (param->probability)
2268 {
2269 probA=Malloc(double,nr_class*(nr_class-1)/2);
2270 probB=Malloc(double,nr_class*(nr_class-1)/2);
2271 }
2272
2273 int p = 0;
2274 for(i=0;i<nr_class;i++)
2275 for(int j=i+1;j<nr_class;j++)
2276 {
2277 svm_problem sub_prob;
2278 int si = start[i], sj = start[j];
2279 int ci = count[i], cj = count[j];
2280 sub_prob.l = ci+cj;
2281 sub_prob.x = Malloc(svm_node *,sub_prob.l);
2282 sub_prob.y = Malloc(double,sub_prob.l);
2283 sub_prob.W = Malloc(double,sub_prob.l);
2284 int k;
2285 for(k=0;k<ci;k++)
2286 {
2287 sub_prob.x[k] = x[si+k];
2288 sub_prob.y[k] = +1;
2289 sub_prob.W[k] = W[si+k];
2290 }
2291 for(k=0;k<cj;k++)
2292 {
2293 sub_prob.x[ci+k] = x[sj+k];
2294 sub_prob.y[ci+k] = -1;
2295 sub_prob.W[ci+k] = W[sj+k];
2296 }
2297
2298 if(param->probability)
2299 svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
2300
2301 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2302 for(k=0;k<ci;k++)
2303 if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
2304 nonzero[si+k] = true;
2305 for(k=0;k<cj;k++)
2306 if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
2307 nonzero[sj+k] = true;
2308 free(sub_prob.x);
2309 free(sub_prob.y);
2310 free(sub_prob.W);
2311 ++p;
2312 }
2313
2314 // build output
2315
2316 model->nr_class = nr_class;
2317
2318 model->label = Malloc(int,nr_class);
2319 for(i=0;i<nr_class;i++)
2320 model->label[i] = label[i];
2321
2322 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
2323 for(i=0;i<nr_class*(nr_class-1)/2;i++)
2324 model->rho[i] = f[i].rho;
2325
2326 if(param->probability)
2327 {
2328 model->probA = Malloc(double,nr_class*(nr_class-1)/2);
2329 model->probB = Malloc(double,nr_class*(nr_class-1)/2);
2330 for(i=0;i<nr_class*(nr_class-1)/2;i++)
2331 {
2332 model->probA[i] = probA[i];
2333 model->probB[i] = probB[i];
2334 }
2335 }
2336 else
2337 {
2338 model->probA=NULL;
2339 model->probB=NULL;
2340 }
2341
2342 int total_sv = 0;
2343 int *nz_count = Malloc(int,nr_class);
2344 model->nSV = Malloc(int,nr_class);
2345 for(i=0;i<nr_class;i++)
2346 {
2347 int nSV = 0;
2348 for(int j=0;j<count[i];j++)
2349 if(nonzero[start[i]+j])
2350 {
2351 ++nSV;
2352 ++total_sv;
2353 }
2354 model->nSV[i] = nSV;
2355 nz_count[i] = nSV;
2356 }
2357
2358 info("Total nSV = %d\n",total_sv);
2359
2360 model->l = total_sv;
2361 model->SV = Malloc(svm_node *,total_sv);
2362 model->sv_indices = Malloc(int,total_sv);
2363 p = 0;
2364 for(i=0;i<l;i++)
2365 if(nonzero[i])
2366 {
2367 model->SV[p] = x[i];
2368 model->sv_indices[p++] = perm[i] + 1;
2369 }
2370
2371 int *nz_start = Malloc(int,nr_class);
2372 nz_start[0] = 0;
2373 for(i=1;i<nr_class;i++)
2374 nz_start[i] = nz_start[i-1]+nz_count[i-1];
2375
2376 model->sv_coef = Malloc(double *,nr_class-1);
2377 for(i=0;i<nr_class-1;i++)
2378 model->sv_coef[i] = Malloc(double,total_sv);
2379
2380 p = 0;
2381 for(i=0;i<nr_class;i++)
2382 for(int j=i+1;j<nr_class;j++)
2383 {
2384 // classifier (i,j): coefficients with
2385 // i are in sv_coef[j-1][nz_start[i]...],
2386 // j are in sv_coef[i][nz_start[j]...]
2387
2388 int si = start[i];
2389 int sj = start[j];
2390 int ci = count[i];
2391 int cj = count[j];
2392
2393 int q = nz_start[i];
2394 int k;
2395 for(k=0;k<ci;k++)
2396 if(nonzero[si+k])
2397 model->sv_coef[j-1][q++] = f[p].alpha[k];
2398 q = nz_start[j];
2399 for(k=0;k<cj;k++)
2400 if(nonzero[sj+k])
2401 model->sv_coef[i][q++] = f[p].alpha[ci+k];
2402 ++p;
2403 }
2404
2405 free(label);
2406 free(probA);
2407 free(probB);
2408 free(count);
2409 free(perm);
2410 free(start);
2411 free(W);
2412 free(x);
2413 free(weighted_C);
2414 free(nonzero);
2415 for(i=0;i<nr_class*(nr_class-1)/2;i++)
2416 free(f[i].alpha);
2417 free(f);
2418 free(nz_count);
2419 free(nz_start);
2420 }
2421 free(newprob.x);
2422 free(newprob.y);
2423 free(newprob.W);
2424 return model;
2425 }
2426
2427 // Stratified cross validation
svm_cross_validation(const svm_problem * prob,const svm_parameter * param,int nr_fold,double * target)2428 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
2429 {
2430 int i;
2431 int *fold_start = Malloc(int,nr_fold+1);
2432 int l = prob->l;
2433 int *perm = Malloc(int,l);
2434 int nr_class;
2435
2436 // stratified cv may not give leave-one-out rate
2437 // Each class to l folds -> some folds may have zero elements
2438 if((param->svm_type == C_SVC ||
2439 param->svm_type == NU_SVC) && nr_fold < l)
2440 {
2441 int *start = NULL;
2442 int *label = NULL;
2443 int *count = NULL;
2444 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2445
2446 // random shuffle and then data grouped by fold using the array perm
2447 int *fold_count = Malloc(int,nr_fold);
2448 int c;
2449 int *index = Malloc(int,l);
2450 for(i=0;i<l;i++)
2451 index[i]=perm[i];
2452 for (c=0; c<nr_class; c++)
2453 for(i=0;i<count[c];i++)
2454 {
2455 int j = i+rand()%(count[c]-i);
2456 swap(index[start[c]+j],index[start[c]+i]);
2457 }
2458 for(i=0;i<nr_fold;i++)
2459 {
2460 fold_count[i] = 0;
2461 for (c=0; c<nr_class;c++)
2462 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
2463 }
2464 fold_start[0]=0;
2465 for (i=1;i<=nr_fold;i++)
2466 fold_start[i] = fold_start[i-1]+fold_count[i-1];
2467 for (c=0; c<nr_class;c++)
2468 for(i=0;i<nr_fold;i++)
2469 {
2470 int begin = start[c]+i*count[c]/nr_fold;
2471 int end = start[c]+(i+1)*count[c]/nr_fold;
2472 for(int j=begin;j<end;j++)
2473 {
2474 perm[fold_start[i]] = index[j];
2475 fold_start[i]++;
2476 }
2477 }
2478 fold_start[0]=0;
2479 for (i=1;i<=nr_fold;i++)
2480 fold_start[i] = fold_start[i-1]+fold_count[i-1];
2481 free(start);
2482 free(label);
2483 free(count);
2484 free(index);
2485 free(fold_count);
2486 }
2487 else
2488 {
2489 for(i=0;i<l;i++) perm[i]=i;
2490 for(i=0;i<l;i++)
2491 {
2492 int j = i+rand()%(l-i);
2493 swap(perm[i],perm[j]);
2494 }
2495 for(i=0;i<=nr_fold;i++)
2496 fold_start[i]=i*l/nr_fold;
2497 }
2498
2499 for(i=0;i<nr_fold;i++)
2500 {
2501 int begin = fold_start[i];
2502 int end = fold_start[i+1];
2503 int j,k;
2504 struct svm_problem subprob;
2505
2506 subprob.l = l-(end-begin);
2507 subprob.x = Malloc(struct svm_node*,subprob.l);
2508 subprob.y = Malloc(double,subprob.l);
2509
2510 subprob.W = Malloc(double,subprob.l);
2511 k=0;
2512 for(j=0;j<begin;j++)
2513 {
2514 subprob.x[k] = prob->x[perm[j]];
2515 subprob.y[k] = prob->y[perm[j]];
2516 subprob.W[k] = prob->W[perm[j]];
2517 ++k;
2518 }
2519 for(j=end;j<l;j++)
2520 {
2521 subprob.x[k] = prob->x[perm[j]];
2522 subprob.y[k] = prob->y[perm[j]];
2523 subprob.W[k] = prob->W[perm[j]];
2524 ++k;
2525 }
2526 struct svm_model *submodel = svm_train(&subprob,param);
2527 if(param->probability &&
2528 (param->svm_type == C_SVC || param->svm_type == NU_SVC))
2529 {
2530 double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
2531 for(j=begin;j<end;j++)
2532 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
2533 free(prob_estimates);
2534 }
2535 else
2536 for(j=begin;j<end;j++)
2537 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
2538 svm_free_and_destroy_model(&submodel);
2539 free(subprob.x);
2540 free(subprob.y);
2541 free(subprob.W);
2542 }
2543 free(fold_start);
2544 free(perm);
2545 }
2546
2547
svm_get_svm_type(const svm_model * model)2548 int svm_get_svm_type(const svm_model *model)
2549 {
2550 return model->param.svm_type;
2551 }
2552
svm_get_nr_class(const svm_model * model)2553 int svm_get_nr_class(const svm_model *model)
2554 {
2555 return model->nr_class;
2556 }
2557
svm_get_labels(const svm_model * model,int * label)2558 void svm_get_labels(const svm_model *model, int* label)
2559 {
2560 if (model->label != NULL)
2561 for(int i=0;i<model->nr_class;i++)
2562 label[i] = model->label[i];
2563 }
2564
svm_get_sv_indices(const svm_model * model,int * indices)2565 void svm_get_sv_indices(const svm_model *model, int* indices)
2566 {
2567 if (model->sv_indices != NULL)
2568 for(int i=0;i<model->l;i++)
2569 indices[i] = model->sv_indices[i];
2570 }
2571
svm_get_nr_sv(const svm_model * model)2572 int svm_get_nr_sv(const svm_model *model)
2573 {
2574 return model->l;
2575 }
2576
svm_get_svr_probability(const svm_model * model)2577 double svm_get_svr_probability(const svm_model *model)
2578 {
2579 if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
2580 model->probA!=NULL)
2581 return model->probA[0];
2582 else
2583 {
2584 fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
2585 return 0;
2586 }
2587 }
2588
svm_hyper_w_normsqr_twoclass(const struct svm_model * model)2589 double svm_hyper_w_normsqr_twoclass(const struct svm_model* model)
2590 {
2591 assert(model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC);
2592 int i, j;
2593 // int nr_class = model->nr_class;
2594 // assert(nr_class == 2);
2595 int l = model->l;
2596
2597 double sum = 0;
2598 double *coef = model->sv_coef[0];
2599
2600 for(i=0;i<l;++i)
2601 for(j=0;j<l;++j)
2602 {
2603 double kv = Kernel::k_function(model->SV[i],model->SV[j],model->param);
2604 sum += kv * coef[i] * coef[j];
2605 }
2606
2607 return sum;
2608 }
2609
svm_predict_values_twoclass(const struct svm_model * model,const struct svm_node * x)2610 double svm_predict_values_twoclass(const struct svm_model* model, const struct svm_node* x)
2611 {
2612
2613 assert(model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC);
2614 int i;
2615 // int nr_class = model->nr_class;
2616 // assert(nr_class == 2);
2617 int l = model->l;
2618
2619 double *kvalue = Malloc(double,l);
2620 for(i=0;i<l;i++)
2621 kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
2622
2623
2624 double sum = 0;
2625 double *coef = model->sv_coef[0];
2626 for(i=0;i<l;++i)
2627 sum += coef[i] * kvalue[i];
2628 sum -= model->rho[0];
2629
2630 free(kvalue);
2631
2632 return sum * model->label[0];
2633 }
2634
svm_predict_values(const svm_model * model,const svm_node * x,double * dec_values)2635 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
2636 {
2637 int i;
2638 if(model->param.svm_type == ONE_CLASS ||
2639 model->param.svm_type == EPSILON_SVR ||
2640 model->param.svm_type == NU_SVR)
2641 {
2642 double *sv_coef = model->sv_coef[0];
2643 double sum = 0;
2644 for(i=0;i<model->l;i++)
2645 sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
2646 sum -= model->rho[0];
2647 *dec_values = sum;
2648
2649 if(model->param.svm_type == ONE_CLASS)
2650 return (sum>0)?1:-1;
2651 else
2652 return sum;
2653 }
2654 else
2655 {
2656 int nr_class = model->nr_class;
2657 int l = model->l;
2658
2659 double *kvalue = Malloc(double,l);
2660 for(i=0;i<l;i++)
2661 kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
2662
2663 int *start = Malloc(int,nr_class);
2664 start[0] = 0;
2665 for(i=1;i<nr_class;i++)
2666 start[i] = start[i-1]+model->nSV[i-1];
2667
2668 int *vote = Malloc(int,nr_class);
2669 for(i=0;i<nr_class;i++)
2670 vote[i] = 0;
2671
2672 int p=0;
2673 for(i=0;i<nr_class;i++)
2674 for(int j=i+1;j<nr_class;j++)
2675 {
2676 double sum = 0;
2677 int si = start[i];
2678 int sj = start[j];
2679 int ci = model->nSV[i];
2680 int cj = model->nSV[j];
2681
2682 int k;
2683 double *coef1 = model->sv_coef[j-1];
2684 double *coef2 = model->sv_coef[i];
2685 for(k=0;k<ci;k++)
2686 sum += coef1[si+k] * kvalue[si+k];
2687 for(k=0;k<cj;k++)
2688 sum += coef2[sj+k] * kvalue[sj+k];
2689 sum -= model->rho[p];
2690 dec_values[p] = sum;
2691
2692 if(dec_values[p] > 0)
2693 ++vote[i];
2694 else
2695 ++vote[j];
2696 p++;
2697 }
2698
2699 int vote_max_idx = 0;
2700 for(i=1;i<nr_class;i++)
2701 if(vote[i] > vote[vote_max_idx])
2702 vote_max_idx = i;
2703
2704 free(kvalue);
2705 free(start);
2706 free(vote);
2707 return model->label[vote_max_idx];
2708 }
2709 }
2710
svm_predict(const svm_model * model,const svm_node * x)2711 double svm_predict(const svm_model *model, const svm_node *x)
2712 {
2713 int nr_class = model->nr_class;
2714 double *dec_values;
2715 if(model->param.svm_type == ONE_CLASS ||
2716 model->param.svm_type == EPSILON_SVR ||
2717 model->param.svm_type == NU_SVR)
2718 dec_values = Malloc(double, 1);
2719 else
2720 dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2721 double pred_result = svm_predict_values(model, x, dec_values);
2722 free(dec_values);
2723 return pred_result;
2724 }
2725
svm_predict_probability(const svm_model * model,const svm_node * x,double * prob_estimates)2726 double svm_predict_probability(
2727 const svm_model *model, const svm_node *x, double *prob_estimates)
2728 {
2729 if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
2730 model->probA!=NULL && model->probB!=NULL)
2731 {
2732 int i;
2733 int nr_class = model->nr_class;
2734 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2735 svm_predict_values(model, x, dec_values);
2736
2737 double min_prob=1e-7;
2738 double **pairwise_prob=Malloc(double *,nr_class);
2739 for(i=0;i<nr_class;i++)
2740 pairwise_prob[i]=Malloc(double,nr_class);
2741 int k=0;
2742 for(i=0;i<nr_class;i++)
2743 for(int j=i+1;j<nr_class;j++)
2744 {
2745 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
2746 pairwise_prob[j][i]=1-pairwise_prob[i][j];
2747 k++;
2748 }
2749 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
2750
2751 int prob_max_idx = 0;
2752 for(i=1;i<nr_class;i++)
2753 if(prob_estimates[i] > prob_estimates[prob_max_idx])
2754 prob_max_idx = i;
2755 for(i=0;i<nr_class;i++)
2756 free(pairwise_prob[i]);
2757 free(dec_values);
2758 free(pairwise_prob);
2759 return model->label[prob_max_idx];
2760 }
2761 else
2762 return svm_predict(model, x);
2763 }
2764
2765 static const char *svm_type_table[] =
2766 {
2767 "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
2768 };
2769
2770 static const char *kernel_type_table[]=
2771 {
2772 "linear","polynomial","rbf","sigmoid","precomputed",NULL
2773 };
2774
svm_save_model(const char * model_file_name,const svm_model * model)2775 int svm_save_model(const char *model_file_name, const svm_model *model)
2776 {
2777 FILE *fp = fopen(model_file_name,"w");
2778 if(fp==NULL) return -1;
2779
2780 char *old_locale = strdup(setlocale(LC_ALL, NULL));
2781 setlocale(LC_ALL, "C");
2782
2783 const svm_parameter& param = model->param;
2784
2785 fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
2786 fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
2787
2788 if(param.kernel_type == POLY)
2789 fprintf(fp,"degree %d\n", param.degree);
2790
2791 if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
2792 fprintf(fp,"gamma %g\n", param.gamma);
2793
2794 if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
2795 fprintf(fp,"coef0 %g\n", param.coef0);
2796
2797 int nr_class = model->nr_class;
2798 int l = model->l;
2799 fprintf(fp, "nr_class %d\n", nr_class);
2800 fprintf(fp, "total_sv %d\n",l);
2801
2802 {
2803 fprintf(fp, "rho");
2804 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2805 fprintf(fp," %g",model->rho[i]);
2806 fprintf(fp, "\n");
2807 }
2808
2809 if(model->label)
2810 {
2811 fprintf(fp, "label");
2812 for(int i=0;i<nr_class;i++)
2813 fprintf(fp," %d",model->label[i]);
2814 fprintf(fp, "\n");
2815 }
2816
2817 if(model->probA) // regression has probA only
2818 {
2819 fprintf(fp, "probA");
2820 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2821 fprintf(fp," %g",model->probA[i]);
2822 fprintf(fp, "\n");
2823 }
2824 if(model->probB)
2825 {
2826 fprintf(fp, "probB");
2827 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2828 fprintf(fp," %g",model->probB[i]);
2829 fprintf(fp, "\n");
2830 }
2831
2832 if(model->nSV)
2833 {
2834 fprintf(fp, "nr_sv");
2835 for(int i=0;i<nr_class;i++)
2836 fprintf(fp," %d",model->nSV[i]);
2837 fprintf(fp, "\n");
2838 }
2839
2840 fprintf(fp, "SV\n");
2841 const double * const *sv_coef = model->sv_coef;
2842 const svm_node * const *SV = model->SV;
2843
2844 for(int i=0;i<l;i++)
2845 {
2846 for(int j=0;j<nr_class-1;j++)
2847 fprintf(fp, "%.16g ",sv_coef[j][i]);
2848
2849 const svm_node *p = SV[i];
2850
2851 if(param.kernel_type == PRECOMPUTED)
2852 fprintf(fp,"0:%d ",(int)(p->value));
2853 else
2854 while(p->index != -1)
2855 {
2856 fprintf(fp,"%d:%.8g ",p->index,p->value);
2857 p++;
2858 }
2859 fprintf(fp, "\n");
2860 }
2861
2862 setlocale(LC_ALL, old_locale);
2863 free(old_locale);
2864
2865 if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2866 else return 0;
2867 }
2868
2869 static char *line = NULL;
2870 static int max_line_len;
2871
readline(FILE * input)2872 static char* readline(FILE *input)
2873 {
2874 int len;
2875
2876 if(fgets(line,max_line_len,input) == NULL)
2877 return NULL;
2878
2879 while(strrchr(line,'\n') == NULL)
2880 {
2881 max_line_len *= 2;
2882 line = (char *) realloc(line,max_line_len);
2883 len = (int) strlen(line);
2884 if(fgets(line+len,max_line_len-len,input) == NULL)
2885 break;
2886 }
2887 return line;
2888 }
2889
svm_load_model(const char * model_file_name)2890 svm_model *svm_load_model(const char *model_file_name)
2891 {
2892 FILE *fp = fopen(model_file_name,"rb");
2893 if(fp==NULL) return NULL;
2894
2895 char *old_locale = strdup(setlocale(LC_ALL, NULL));
2896 setlocale(LC_ALL, "C");
2897
2898 // read parameters
2899
2900 svm_model *model = Malloc(svm_model,1);
2901 svm_parameter& param = model->param;
2902 model->rho = NULL;
2903 model->probA = NULL;
2904 model->probB = NULL;
2905 model->label = NULL;
2906 model->nSV = NULL;
2907
2908 char cmd[81];
2909 while(1)
2910 {
2911 fscanf(fp,"%80s",cmd);
2912
2913 if(strcmp(cmd,"svm_type")==0)
2914 {
2915 fscanf(fp,"%80s",cmd);
2916 int i;
2917 for(i=0;svm_type_table[i];i++)
2918 {
2919 if(strcmp(svm_type_table[i],cmd)==0)
2920 {
2921 param.svm_type=i;
2922 break;
2923 }
2924 }
2925 if(svm_type_table[i] == NULL)
2926 {
2927 fprintf(stderr,"unknown svm type.\n");
2928
2929 setlocale(LC_ALL, old_locale);
2930 free(old_locale);
2931 free(model->rho);
2932 free(model->label);
2933 free(model->nSV);
2934 free(model);
2935 return NULL;
2936 }
2937 }
2938 else if(strcmp(cmd,"kernel_type")==0)
2939 {
2940 fscanf(fp,"%80s",cmd);
2941 int i;
2942 for(i=0;kernel_type_table[i];i++)
2943 {
2944 if(strcmp(kernel_type_table[i],cmd)==0)
2945 {
2946 param.kernel_type=i;
2947 break;
2948 }
2949 }
2950 if(kernel_type_table[i] == NULL)
2951 {
2952 fprintf(stderr,"unknown kernel function.\n");
2953
2954 setlocale(LC_ALL, old_locale);
2955 free(old_locale);
2956 free(model->rho);
2957 free(model->label);
2958 free(model->nSV);
2959 free(model);
2960 return NULL;
2961 }
2962 }
2963 else if(strcmp(cmd,"degree")==0)
2964 fscanf(fp,"%d",¶m.degree);
2965 else if(strcmp(cmd,"gamma")==0)
2966 fscanf(fp,"%lf",¶m.gamma);
2967 else if(strcmp(cmd,"coef0")==0)
2968 fscanf(fp,"%lf",¶m.coef0);
2969 else if(strcmp(cmd,"nr_class")==0)
2970 fscanf(fp,"%d",&model->nr_class);
2971 else if(strcmp(cmd,"total_sv")==0)
2972 fscanf(fp,"%d",&model->l);
2973 else if(strcmp(cmd,"rho")==0)
2974 {
2975 int n = model->nr_class * (model->nr_class-1)/2;
2976 model->rho = Malloc(double,n);
2977 for(int i=0;i<n;i++)
2978 fscanf(fp,"%lf",&model->rho[i]);
2979 }
2980 else if(strcmp(cmd,"label")==0)
2981 {
2982 int n = model->nr_class;
2983 model->label = Malloc(int,n);
2984 for(int i=0;i<n;i++)
2985 fscanf(fp,"%d",&model->label[i]);
2986 }
2987 else if(strcmp(cmd,"probA")==0)
2988 {
2989 int n = model->nr_class * (model->nr_class-1)/2;
2990 model->probA = Malloc(double,n);
2991 for(int i=0;i<n;i++)
2992 fscanf(fp,"%lf",&model->probA[i]);
2993 }
2994 else if(strcmp(cmd,"probB")==0)
2995 {
2996 int n = model->nr_class * (model->nr_class-1)/2;
2997 model->probB = Malloc(double,n);
2998 for(int i=0;i<n;i++)
2999 fscanf(fp,"%lf",&model->probB[i]);
3000 }
3001 else if(strcmp(cmd,"nr_sv")==0)
3002 {
3003 int n = model->nr_class;
3004 model->nSV = Malloc(int,n);
3005 for(int i=0;i<n;i++)
3006 fscanf(fp,"%d",&model->nSV[i]);
3007 }
3008 else if(strcmp(cmd,"SV")==0)
3009 {
3010 while(1)
3011 {
3012 int c = getc(fp);
3013 if(c==EOF || c=='\n') break;
3014 }
3015 break;
3016 }
3017 else
3018 {
3019 fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
3020
3021 setlocale(LC_ALL, old_locale);
3022 free(old_locale);
3023 free(model->rho);
3024 free(model->label);
3025 free(model->nSV);
3026 free(model);
3027 return NULL;
3028 }
3029 }
3030
3031 // read sv_coef and SV
3032
3033 int elements = 0;
3034 long pos = ftell(fp);
3035
3036 max_line_len = 1024;
3037 line = Malloc(char,max_line_len);
3038 char *p,*endptr,*idx,*val;
3039
3040 while(readline(fp)!=NULL)
3041 {
3042 p = strtok(line,":");
3043 while(1)
3044 {
3045 p = strtok(NULL,":");
3046 if(p == NULL)
3047 break;
3048 ++elements;
3049 }
3050 }
3051 elements += model->l;
3052
3053 fseek(fp,pos,SEEK_SET);
3054
3055 int m = model->nr_class - 1;
3056 int l = model->l;
3057 model->sv_coef = Malloc(double *,m);
3058 int i;
3059 for(i=0;i<m;i++)
3060 model->sv_coef[i] = Malloc(double,l);
3061 model->SV = Malloc(svm_node*,l);
3062 svm_node *x_space = NULL;
3063 if(l>0) x_space = Malloc(svm_node,elements);
3064
3065 int j=0;
3066 for(i=0;i<l;i++)
3067 {
3068 readline(fp);
3069 model->SV[i] = &x_space[j];
3070
3071 p = strtok(line, " \t");
3072 model->sv_coef[0][i] = strtod(p,&endptr);
3073 for(int k=1;k<m;k++)
3074 {
3075 p = strtok(NULL, " \t");
3076 model->sv_coef[k][i] = strtod(p,&endptr);
3077 }
3078
3079 while(1)
3080 {
3081 idx = strtok(NULL, ":");
3082 val = strtok(NULL, " \t");
3083
3084 if(val == NULL)
3085 break;
3086 x_space[j].index = (int) strtol(idx,&endptr,10);
3087 x_space[j].value = strtod(val,&endptr);
3088
3089 ++j;
3090 }
3091 x_space[j++].index = -1;
3092 }
3093 free(line);
3094
3095 setlocale(LC_ALL, old_locale);
3096 free(old_locale);
3097
3098 if (ferror(fp) != 0 || fclose(fp) != 0)
3099 return NULL;
3100
3101 model->free_sv = 1; // XXX
3102 return model;
3103 }
3104
svm_free_model_content(svm_model * model_ptr)3105 void svm_free_model_content(svm_model* model_ptr)
3106 {
3107 if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
3108 free((void *)(model_ptr->SV[0]));
3109 if(model_ptr->sv_coef)
3110 {
3111 for(int i=0;i<model_ptr->nr_class-1;i++)
3112 free(model_ptr->sv_coef[i]);
3113 }
3114
3115 free(model_ptr->SV);
3116 model_ptr->SV = NULL;
3117
3118 free(model_ptr->sv_coef);
3119 model_ptr->sv_coef = NULL;
3120
3121 free(model_ptr->rho);
3122 model_ptr->rho = NULL;
3123
3124 free(model_ptr->label);
3125 model_ptr->label= NULL;
3126
3127 free(model_ptr->probA);
3128 model_ptr->probA = NULL;
3129
3130 free(model_ptr->probB);
3131 model_ptr->probB= NULL;
3132
3133 free(model_ptr->nSV);
3134 model_ptr->nSV = NULL;
3135 }
3136
svm_free_and_destroy_model(svm_model ** model_ptr_ptr)3137 void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
3138 {
3139 if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
3140 {
3141 svm_free_model_content(*model_ptr_ptr);
3142 free(*model_ptr_ptr);
3143 *model_ptr_ptr = NULL;
3144 }
3145 }
3146
svm_destroy_param(svm_parameter * param)3147 void svm_destroy_param(svm_parameter* param)
3148 {
3149 free(param->weight_label);
3150 free(param->weight);
3151 }
3152
svm_check_parameter(const svm_problem * prob,const svm_parameter * param)3153 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
3154 {
3155 // svm_type
3156
3157 int svm_type = param->svm_type;
3158 if(svm_type != C_SVC &&
3159 svm_type != NU_SVC &&
3160 svm_type != ONE_CLASS &&
3161 svm_type != EPSILON_SVR &&
3162 svm_type != NU_SVR)
3163 return "unknown svm type";
3164
3165 // kernel_type, degree
3166
3167 int kernel_type = param->kernel_type;
3168 if(kernel_type != LINEAR &&
3169 kernel_type != POLY &&
3170 kernel_type != RBF &&
3171 kernel_type != SIGMOID &&
3172 kernel_type != PRECOMPUTED)
3173 return "unknown kernel type";
3174
3175 if(param->gamma < 0)
3176 return "gamma < 0";
3177
3178 if(param->degree < 0)
3179 return "degree of polynomial kernel < 0";
3180
3181 // cache_size,eps,C,nu,p,shrinking
3182
3183 if(param->cache_size <= 0)
3184 return "cache_size <= 0";
3185
3186 if(param->eps <= 0)
3187 return "eps <= 0";
3188
3189 if(svm_type == C_SVC ||
3190 svm_type == EPSILON_SVR ||
3191 svm_type == NU_SVR)
3192 if(param->C <= 0)
3193 return "C <= 0";
3194
3195 if(svm_type == NU_SVC ||
3196 svm_type == ONE_CLASS ||
3197 svm_type == NU_SVR)
3198 if(param->nu <= 0 || param->nu > 1)
3199 return "nu <= 0 or nu > 1";
3200
3201 if(svm_type == EPSILON_SVR)
3202 if(param->p < 0)
3203 return "p < 0";
3204
3205 if(param->shrinking != 0 &&
3206 param->shrinking != 1)
3207 return "shrinking != 0 and shrinking != 1";
3208
3209 if(param->probability != 0 &&
3210 param->probability != 1)
3211 return "probability != 0 and probability != 1";
3212
3213 if(param->probability == 1 &&
3214 svm_type == ONE_CLASS)
3215 return "one-class SVM probability output not supported yet";
3216
3217
3218 // check whether nu-svc is feasible
3219
3220 if(svm_type == NU_SVC)
3221 {
3222 int l = prob->l;
3223 int max_nr_class = 16;
3224 int nr_class = 0;
3225 int *label = Malloc(int,max_nr_class);
3226 double *count = Malloc(double,max_nr_class);
3227
3228 int i;
3229 for(i=0;i<l;i++)
3230 {
3231 int this_label = (int)prob->y[i];
3232 int j;
3233 for(j=0;j<nr_class;j++)
3234 if(this_label == label[j])
3235 {
3236 count[j] += prob->W[i];
3237 break;
3238 }
3239 if(j == nr_class)
3240 {
3241 if(nr_class == max_nr_class)
3242 {
3243 max_nr_class *= 2;
3244 label = (int *)realloc(label,max_nr_class*sizeof(int));
3245 count = (double *)realloc(count,max_nr_class*sizeof(double));
3246 }
3247 label[nr_class] = this_label;
3248 count[nr_class] = prob->W[i];
3249 ++nr_class;
3250 }
3251 }
3252
3253 for(i=0;i<nr_class;i++)
3254 {
3255 double n1 = count[i];
3256 for(int j=i+1;j<nr_class;j++)
3257 {
3258 double n2 = count[j];
3259 if(param->nu*(n1+n2)/2 > min(n1,n2))
3260 {
3261 free(label);
3262 free(count);
3263 return "specified nu is infeasible";
3264 }
3265 }
3266 }
3267 free(label);
3268 free(count);
3269 }
3270
3271 return NULL;
3272 }
3273
svm_check_probability_model(const svm_model * model)3274 int svm_check_probability_model(const svm_model *model)
3275 {
3276 return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3277 model->probA!=NULL && model->probB!=NULL) ||
3278 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
3279 model->probA!=NULL);
3280 }
3281
svm_set_print_string_function(void (* print_func)(const char *))3282 void svm_set_print_string_function(void (*print_func)(const char *))
3283 {
3284 if(print_func == NULL)
3285 svm_print_string = &print_string_stdout;
3286 else
3287 svm_print_string = print_func;
3288 }
3289