1 /*
2
3 This code was extracted from libsvm 3.2.3 in Feb 2019 and
4 modified for the use with Octave and Matlab
5
6
7 Copyright (c) 2000-2019 Chih-Chung Chang and Chih-Jen Lin
8 All rights reserved.
9
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions
12 are met:
13
14 1. Redistributions of source code must retain the above copyright
15 notice, this list of conditions and the following disclaimer.
16
17 2. Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 3. Neither name of copyright holders nor the names of its contributors
22 may be used to endorse or promote products derived from this software
23 without specific prior written permission.
24
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
30 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <math.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <ctype.h>
43 #include <float.h>
44 #include <string.h>
45 #include <stdarg.h>
46 #include <limits.h>
47 #include <locale.h>
48 #include "svm.h"
49 int libsvm_version = LIBSVM_VERSION;
50 typedef float Qfloat;
51 typedef signed char schar;
52 #ifndef min
min(T x,T y)53 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
54 #endif
55 #ifndef max
max(T x,T y)56 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
57 #endif
swap(T & x,T & y)58 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
clone(T * & dst,S * src,int n)59 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
60 {
61 dst = new T[n];
62 memcpy((void *)dst,(void *)src,sizeof(T)*n);
63 }
powi(double base,int times)64 static inline double powi(double base, int times)
65 {
66 double tmp = base, ret = 1.0;
67
68 for(int t=times; t>0; t/=2)
69 {
70 if(t%2==1) ret*=tmp;
71 tmp = tmp * tmp;
72 }
73 return ret;
74 }
75 #define INF HUGE_VAL
76 #define TAU 1e-12
77 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
78
print_string_stdout(const char * s)79 static void print_string_stdout(const char *s)
80 {
81 fputs(s,stdout);
82 fflush(stdout);
83 }
84 static void (*svm_print_string) (const char *) = &print_string_stdout;
85 #if 1
info(const char * fmt,...)86 static void info(const char *fmt,...)
87 {
88 char buf[BUFSIZ];
89 va_list ap;
90 va_start(ap,fmt);
91 vsprintf(buf,fmt,ap);
92 va_end(ap);
93 (*svm_print_string)(buf);
94 }
95 #else
info(const char * fmt,...)96 static void info(const char *fmt,...) {}
97 #endif
98
99 //
100 // Kernel Cache
101 //
102 // l is the number of total data items
103 // size is the cache size limit in bytes
104 //
105 class Cache
106 {
107 public:
108 Cache(int l,long int size);
109 ~Cache();
110
111 // request data [0,len)
112 // return some position p where [p,len) need to be filled
113 // (p >= len if nothing needs to be filled)
114 int get_data(const int index, Qfloat **data, int len);
115 void swap_index(int i, int j);
116 private:
117 int l;
118 long int size;
119 struct head_t
120 {
121 head_t *prev, *next; // a circular list
122 Qfloat *data;
123 int len; // data[0,len) is cached in this entry
124 };
125
126 head_t *head;
127 head_t lru_head;
128 void lru_delete(head_t *h);
129 void lru_insert(head_t *h);
130 };
131
Cache(int l_,long int size_)132 Cache::Cache(int l_,long int size_):l(l_),size(size_)
133 {
134 head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0
135 size /= sizeof(Qfloat);
136 size -= l * sizeof(head_t) / sizeof(Qfloat);
137 size = max(size, 2 * (long int) l); // cache must be large enough for two columns
138 lru_head.next = lru_head.prev = &lru_head;
139 }
140
~Cache()141 Cache::~Cache()
142 {
143 for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
144 free(h->data);
145 free(head);
146 }
147
lru_delete(head_t * h)148 void Cache::lru_delete(head_t *h)
149 {
150 // delete from current location
151 h->prev->next = h->next;
152 h->next->prev = h->prev;
153 }
154
lru_insert(head_t * h)155 void Cache::lru_insert(head_t *h)
156 {
157 // insert to last position
158 h->next = &lru_head;
159 h->prev = lru_head.prev;
160 h->prev->next = h;
161 h->next->prev = h;
162 }
163
get_data(const int index,Qfloat ** data,int len)164 int Cache::get_data(const int index, Qfloat **data, int len)
165 {
166 head_t *h = &head[index];
167 if(h->len) lru_delete(h);
168 int more = len - h->len;
169
170 if(more > 0)
171 {
172 // free old space
173 while(size < more)
174 {
175 head_t *old = lru_head.next;
176 lru_delete(old);
177 free(old->data);
178 size += old->len;
179 old->data = 0;
180 old->len = 0;
181 }
182
183 // allocate new space
184 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
185 size -= more;
186 swap(h->len,len);
187 }
188
189 lru_insert(h);
190 *data = h->data;
191 return len;
192 }
193
swap_index(int i,int j)194 void Cache::swap_index(int i, int j)
195 {
196 if(i==j) return;
197
198 if(head[i].len) lru_delete(&head[i]);
199 if(head[j].len) lru_delete(&head[j]);
200 swap(head[i].data,head[j].data);
201 swap(head[i].len,head[j].len);
202 if(head[i].len) lru_insert(&head[i]);
203 if(head[j].len) lru_insert(&head[j]);
204
205 if(i>j) swap(i,j);
206 for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
207 {
208 if(h->len > i)
209 {
210 if(h->len > j)
211 swap(h->data[i],h->data[j]);
212 else
213 {
214 // give up
215 lru_delete(h);
216 free(h->data);
217 size += h->len;
218 h->data = 0;
219 h->len = 0;
220 }
221 }
222 }
223 }
224
225 //
226 // Kernel evaluation
227 //
228 // the static method k_function is for doing single kernel evaluation
229 // the constructor of Kernel prepares to calculate the l*l kernel matrix
230 // the member function get_Q is for getting one column from the Q Matrix
231 //
232 class QMatrix {
233 public:
234 virtual Qfloat *get_Q(int column, int len) const = 0;
235 virtual double *get_QD() const = 0;
236 virtual void swap_index(int i, int j) const = 0;
~QMatrix()237 virtual ~QMatrix() {}
238 };
239
240 class Kernel: public QMatrix {
241 public:
242 Kernel(int l, svm_node * const * x, const svm_parameter& param);
243 virtual ~Kernel();
244
245 static double k_function(const svm_node *x, const svm_node *y,
246 const svm_parameter& param);
247 virtual Qfloat *get_Q(int column, int len) const = 0;
248 virtual double *get_QD() const = 0;
swap_index(int i,int j) const249 virtual void swap_index(int i, int j) const // no so const...
250 {
251 swap(x[i],x[j]);
252 if(x_square) swap(x_square[i],x_square[j]);
253 }
254 protected:
255
256 double (Kernel::*kernel_function)(int i, int j) const;
257
258 private:
259 const svm_node **x;
260 double *x_square;
261
262 // svm_parameter
263 const int kernel_type;
264 const int degree;
265 const double gamma;
266 const double coef0;
267
268 static double dot(const svm_node *px, const svm_node *py);
kernel_linear(int i,int j) const269 double kernel_linear(int i, int j) const
270 {
271 return dot(x[i],x[j]);
272 }
kernel_poly(int i,int j) const273 double kernel_poly(int i, int j) const
274 {
275 return powi(gamma*dot(x[i],x[j])+coef0,degree);
276 }
kernel_rbf(int i,int j) const277 double kernel_rbf(int i, int j) const
278 {
279 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
280 }
kernel_sigmoid(int i,int j) const281 double kernel_sigmoid(int i, int j) const
282 {
283 return tanh(gamma*dot(x[i],x[j])+coef0);
284 }
kernel_precomputed(int i,int j) const285 double kernel_precomputed(int i, int j) const
286 {
287 return x[i][(int)(x[j][0].value)].value;
288 }
289 };
290
Kernel(int l,svm_node * const * x_,const svm_parameter & param)291 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
292 :kernel_type(param.kernel_type), degree(param.degree),
293 gamma(param.gamma), coef0(param.coef0)
294 {
295 switch(kernel_type)
296 {
297 case LINEAR:
298 kernel_function = &Kernel::kernel_linear;
299 break;
300 case POLY:
301 kernel_function = &Kernel::kernel_poly;
302 break;
303 case RBF:
304 kernel_function = &Kernel::kernel_rbf;
305 break;
306 case SIGMOID:
307 kernel_function = &Kernel::kernel_sigmoid;
308 break;
309 case PRECOMPUTED:
310 kernel_function = &Kernel::kernel_precomputed;
311 break;
312 }
313
314 clone(x,x_,l);
315
316 if(kernel_type == RBF)
317 {
318 x_square = new double[l];
319 for(int i=0;i<l;i++)
320 x_square[i] = dot(x[i],x[i]);
321 }
322 else
323 x_square = 0;
324 }
325
~Kernel()326 Kernel::~Kernel()
327 {
328 delete[] x;
329 delete[] x_square;
330 }
331
dot(const svm_node * px,const svm_node * py)332 double Kernel::dot(const svm_node *px, const svm_node *py)
333 {
334 double sum = 0;
335 while(px->index != -1 && py->index != -1)
336 {
337 if(px->index == py->index)
338 {
339 sum += px->value * py->value;
340 ++px;
341 ++py;
342 }
343 else
344 {
345 if(px->index > py->index)
346 ++py;
347 else
348 ++px;
349 }
350 }
351 return sum;
352 }
353
k_function(const svm_node * x,const svm_node * y,const svm_parameter & param)354 double Kernel::k_function(const svm_node *x, const svm_node *y,
355 const svm_parameter& param)
356 {
357 switch(param.kernel_type)
358 {
359 case LINEAR:
360 return dot(x,y);
361 case POLY:
362 return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
363 case RBF:
364 {
365 double sum = 0;
366 while(x->index != -1 && y->index !=-1)
367 {
368 if(x->index == y->index)
369 {
370 double d = x->value - y->value;
371 sum += d*d;
372 ++x;
373 ++y;
374 }
375 else
376 {
377 if(x->index > y->index)
378 {
379 sum += y->value * y->value;
380 ++y;
381 }
382 else
383 {
384 sum += x->value * x->value;
385 ++x;
386 }
387 }
388 }
389
390 while(x->index != -1)
391 {
392 sum += x->value * x->value;
393 ++x;
394 }
395
396 while(y->index != -1)
397 {
398 sum += y->value * y->value;
399 ++y;
400 }
401
402 return exp(-param.gamma*sum);
403 }
404 case SIGMOID:
405 return tanh(param.gamma*dot(x,y)+param.coef0);
406 case PRECOMPUTED: //x: test (validation), y: SV
407 return x[(int)(y->value)].value;
408 default:
409 return 0; // Unreachable
410 }
411 }
412
413 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
414 // Solves:
415 //
416 // min 0.5(\alpha^T Q \alpha) + p^T \alpha
417 //
418 // y^T \alpha = \delta
419 // y_i = +1 or -1
420 // 0 <= alpha_i <= Cp for y_i = 1
421 // 0 <= alpha_i <= Cn for y_i = -1
422 //
423 // Given:
424 //
425 // Q, p, y, Cp, Cn, and an initial feasible point \alpha
426 // l is the size of vectors and matrices
427 // eps is the stopping tolerance
428 //
429 // solution will be put in \alpha, objective value will be put in obj
430 //
431 class Solver {
432 public:
Solver()433 Solver() {};
~Solver()434 virtual ~Solver() {};
435
436 struct SolutionInfo {
437 double obj;
438 double rho;
439 double upper_bound_p;
440 double upper_bound_n;
441 double r; // for Solver_NU
442 };
443
444 void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
445 double *alpha_, double Cp, double Cn, double eps,
446 SolutionInfo* si, int shrinking);
447 protected:
448 int active_size;
449 schar *y;
450 double *G; // gradient of objective function
451 enum { LOWER_BOUND, UPPER_BOUND, FREE };
452 char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
453 double *alpha;
454 const QMatrix *Q;
455 const double *QD;
456 double eps;
457 double Cp,Cn;
458 double *p;
459 int *active_set;
460 double *G_bar; // gradient, if we treat free variables as 0
461 int l;
462 bool unshrink; // XXX
463
get_C(int i)464 double get_C(int i)
465 {
466 return (y[i] > 0)? Cp : Cn;
467 }
update_alpha_status(int i)468 void update_alpha_status(int i)
469 {
470 if(alpha[i] >= get_C(i))
471 alpha_status[i] = UPPER_BOUND;
472 else if(alpha[i] <= 0)
473 alpha_status[i] = LOWER_BOUND;
474 else alpha_status[i] = FREE;
475 }
is_upper_bound(int i)476 bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
is_lower_bound(int i)477 bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
is_free(int i)478 bool is_free(int i) { return alpha_status[i] == FREE; }
479 void swap_index(int i, int j);
480 void reconstruct_gradient();
481 virtual int select_working_set(int &i, int &j);
482 virtual double calculate_rho();
483 virtual void do_shrinking();
484 private:
485 bool be_shrunk(int i, double Gmax1, double Gmax2);
486 };
487
swap_index(int i,int j)488 void Solver::swap_index(int i, int j)
489 {
490 Q->swap_index(i,j);
491 swap(y[i],y[j]);
492 swap(G[i],G[j]);
493 swap(alpha_status[i],alpha_status[j]);
494 swap(alpha[i],alpha[j]);
495 swap(p[i],p[j]);
496 swap(active_set[i],active_set[j]);
497 swap(G_bar[i],G_bar[j]);
498 }
499
reconstruct_gradient()500 void Solver::reconstruct_gradient()
501 {
502 // reconstruct inactive elements of G from G_bar and free variables
503
504 if(active_size == l) return;
505
506 int i,j;
507 int nr_free = 0;
508
509 for(j=active_size;j<l;j++)
510 G[j] = G_bar[j] + p[j];
511
512 for(j=0;j<active_size;j++)
513 if(is_free(j))
514 nr_free++;
515
516 if(2*nr_free < active_size)
517 info("\nWARNING: using -h 0 may be faster\n");
518
519 if (nr_free*l > 2*active_size*(l-active_size))
520 {
521 for(i=active_size;i<l;i++)
522 {
523 const Qfloat *Q_i = Q->get_Q(i,active_size);
524 for(j=0;j<active_size;j++)
525 if(is_free(j))
526 G[i] += alpha[j] * Q_i[j];
527 }
528 }
529 else
530 {
531 for(i=0;i<active_size;i++)
532 if(is_free(i))
533 {
534 const Qfloat *Q_i = Q->get_Q(i,l);
535 double alpha_i = alpha[i];
536 for(j=active_size;j<l;j++)
537 G[j] += alpha_i * Q_i[j];
538 }
539 }
540 }
541
Solve(int l,const QMatrix & Q,const double * p_,const schar * y_,double * alpha_,double Cp,double Cn,double eps,SolutionInfo * si,int shrinking)542 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
543 double *alpha_, double Cp, double Cn, double eps,
544 SolutionInfo* si, int shrinking)
545 {
546 this->l = l;
547 this->Q = &Q;
548 QD=Q.get_QD();
549 clone(p, p_,l);
550 clone(y, y_,l);
551 clone(alpha,alpha_,l);
552 this->Cp = Cp;
553 this->Cn = Cn;
554 this->eps = eps;
555 unshrink = false;
556
557 // initialize alpha_status
558 {
559 alpha_status = new char[l];
560 for(int i=0;i<l;i++)
561 update_alpha_status(i);
562 }
563
564 // initialize active set (for shrinking)
565 {
566 active_set = new int[l];
567 for(int i=0;i<l;i++)
568 active_set[i] = i;
569 active_size = l;
570 }
571
572 // initialize gradient
573 {
574 G = new double[l];
575 G_bar = new double[l];
576 int i;
577 for(i=0;i<l;i++)
578 {
579 G[i] = p[i];
580 G_bar[i] = 0;
581 }
582 for(i=0;i<l;i++)
583 if(!is_lower_bound(i))
584 {
585 const Qfloat *Q_i = Q.get_Q(i,l);
586 double alpha_i = alpha[i];
587 int j;
588 for(j=0;j<l;j++)
589 G[j] += alpha_i*Q_i[j];
590 if(is_upper_bound(i))
591 for(j=0;j<l;j++)
592 G_bar[j] += get_C(i) * Q_i[j];
593 }
594 }
595
596 // optimization step
597
598 int iter = 0;
599 int max_iter = max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
600 int counter = min(l,1000)+1;
601
602 while(iter < max_iter)
603 {
604 // show progress and do shrinking
605
606 if(--counter == 0)
607 {
608 counter = min(l,1000);
609 if(shrinking) do_shrinking();
610 info(".");
611 }
612
613 int i,j;
614 if(select_working_set(i,j)!=0)
615 {
616 // reconstruct the whole gradient
617 reconstruct_gradient();
618 // reset active set size and check
619 active_size = l;
620 info("*");
621 if(select_working_set(i,j)!=0)
622 break;
623 else
624 counter = 1; // do shrinking next iteration
625 }
626
627 ++iter;
628
629 // update alpha[i] and alpha[j], handle bounds carefully
630
631 const Qfloat *Q_i = Q.get_Q(i,active_size);
632 const Qfloat *Q_j = Q.get_Q(j,active_size);
633
634 double C_i = get_C(i);
635 double C_j = get_C(j);
636
637 double old_alpha_i = alpha[i];
638 double old_alpha_j = alpha[j];
639
640 if(y[i]!=y[j])
641 {
642 double quad_coef = QD[i]+QD[j]+2*Q_i[j];
643 if (quad_coef <= 0)
644 quad_coef = TAU;
645 double delta = (-G[i]-G[j])/quad_coef;
646 double diff = alpha[i] - alpha[j];
647 alpha[i] += delta;
648 alpha[j] += delta;
649
650 if(diff > 0)
651 {
652 if(alpha[j] < 0)
653 {
654 alpha[j] = 0;
655 alpha[i] = diff;
656 }
657 }
658 else
659 {
660 if(alpha[i] < 0)
661 {
662 alpha[i] = 0;
663 alpha[j] = -diff;
664 }
665 }
666 if(diff > C_i - C_j)
667 {
668 if(alpha[i] > C_i)
669 {
670 alpha[i] = C_i;
671 alpha[j] = C_i - diff;
672 }
673 }
674 else
675 {
676 if(alpha[j] > C_j)
677 {
678 alpha[j] = C_j;
679 alpha[i] = C_j + diff;
680 }
681 }
682 }
683 else
684 {
685 double quad_coef = QD[i]+QD[j]-2*Q_i[j];
686 if (quad_coef <= 0)
687 quad_coef = TAU;
688 double delta = (G[i]-G[j])/quad_coef;
689 double sum = alpha[i] + alpha[j];
690 alpha[i] -= delta;
691 alpha[j] += delta;
692
693 if(sum > C_i)
694 {
695 if(alpha[i] > C_i)
696 {
697 alpha[i] = C_i;
698 alpha[j] = sum - C_i;
699 }
700 }
701 else
702 {
703 if(alpha[j] < 0)
704 {
705 alpha[j] = 0;
706 alpha[i] = sum;
707 }
708 }
709 if(sum > C_j)
710 {
711 if(alpha[j] > C_j)
712 {
713 alpha[j] = C_j;
714 alpha[i] = sum - C_j;
715 }
716 }
717 else
718 {
719 if(alpha[i] < 0)
720 {
721 alpha[i] = 0;
722 alpha[j] = sum;
723 }
724 }
725 }
726
727 // update G
728
729 double delta_alpha_i = alpha[i] - old_alpha_i;
730 double delta_alpha_j = alpha[j] - old_alpha_j;
731
732 for(int k=0;k<active_size;k++)
733 {
734 G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
735 }
736
737 // update alpha_status and G_bar
738
739 {
740 bool ui = is_upper_bound(i);
741 bool uj = is_upper_bound(j);
742 update_alpha_status(i);
743 update_alpha_status(j);
744 int k;
745 if(ui != is_upper_bound(i))
746 {
747 Q_i = Q.get_Q(i,l);
748 if(ui)
749 for(k=0;k<l;k++)
750 G_bar[k] -= C_i * Q_i[k];
751 else
752 for(k=0;k<l;k++)
753 G_bar[k] += C_i * Q_i[k];
754 }
755
756 if(uj != is_upper_bound(j))
757 {
758 Q_j = Q.get_Q(j,l);
759 if(uj)
760 for(k=0;k<l;k++)
761 G_bar[k] -= C_j * Q_j[k];
762 else
763 for(k=0;k<l;k++)
764 G_bar[k] += C_j * Q_j[k];
765 }
766 }
767 }
768
769 if(iter >= max_iter)
770 {
771 if(active_size < l)
772 {
773 // reconstruct the whole gradient to calculate objective value
774 reconstruct_gradient();
775 active_size = l;
776 info("*");
777 }
778 fprintf(stderr,"\nWARNING: reaching max number of iterations\n");
779 }
780
781 // calculate rho
782
783 si->rho = calculate_rho();
784
785 // calculate objective value
786 {
787 double v = 0;
788 int i;
789 for(i=0;i<l;i++)
790 v += alpha[i] * (G[i] + p[i]);
791
792 si->obj = v/2;
793 }
794
795 // put back the solution
796 {
797 for(int i=0;i<l;i++)
798 alpha_[active_set[i]] = alpha[i];
799 }
800
801 // juggle everything back
802 /*{
803 for(int i=0;i<l;i++)
804 while(active_set[i] != i)
805 swap_index(i,active_set[i]);
806 // or Q.swap_index(i,active_set[i]);
807 }*/
808
809 si->upper_bound_p = Cp;
810 si->upper_bound_n = Cn;
811
812 info("\noptimization finished, #iter = %d\n",iter);
813
814 delete[] p;
815 delete[] y;
816 delete[] alpha;
817 delete[] alpha_status;
818 delete[] active_set;
819 delete[] G;
820 delete[] G_bar;
821 }
822
823 // return 1 if already optimal, return 0 otherwise
select_working_set(int & out_i,int & out_j)824 int Solver::select_working_set(int &out_i, int &out_j)
825 {
826 // return i,j such that
827 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
828 // j: minimizes the decrease of obj value
829 // (if quadratic coefficeint <= 0, replace it with tau)
830 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
831
832 double Gmax = -INF;
833 double Gmax2 = -INF;
834 int Gmax_idx = -1;
835 int Gmin_idx = -1;
836 double obj_diff_min = INF;
837
838 for(int t=0;t<active_size;t++)
839 if(y[t]==+1)
840 {
841 if(!is_upper_bound(t))
842 if(-G[t] >= Gmax)
843 {
844 Gmax = -G[t];
845 Gmax_idx = t;
846 }
847 }
848 else
849 {
850 if(!is_lower_bound(t))
851 if(G[t] >= Gmax)
852 {
853 Gmax = G[t];
854 Gmax_idx = t;
855 }
856 }
857
858 int i = Gmax_idx;
859 const Qfloat *Q_i = NULL;
860 if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
861 Q_i = Q->get_Q(i,active_size);
862
863 for(int j=0;j<active_size;j++)
864 {
865 if(y[j]==+1)
866 {
867 if (!is_lower_bound(j))
868 {
869 double grad_diff=Gmax+G[j];
870 if (G[j] >= Gmax2)
871 Gmax2 = G[j];
872 if (grad_diff > 0)
873 {
874 double obj_diff;
875 double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
876 if (quad_coef > 0)
877 obj_diff = -(grad_diff*grad_diff)/quad_coef;
878 else
879 obj_diff = -(grad_diff*grad_diff)/TAU;
880
881 if (obj_diff <= obj_diff_min)
882 {
883 Gmin_idx=j;
884 obj_diff_min = obj_diff;
885 }
886 }
887 }
888 }
889 else
890 {
891 if (!is_upper_bound(j))
892 {
893 double grad_diff= Gmax-G[j];
894 if (-G[j] >= Gmax2)
895 Gmax2 = -G[j];
896 if (grad_diff > 0)
897 {
898 double obj_diff;
899 double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
900 if (quad_coef > 0)
901 obj_diff = -(grad_diff*grad_diff)/quad_coef;
902 else
903 obj_diff = -(grad_diff*grad_diff)/TAU;
904
905 if (obj_diff <= obj_diff_min)
906 {
907 Gmin_idx=j;
908 obj_diff_min = obj_diff;
909 }
910 }
911 }
912 }
913 }
914
915 if(Gmax+Gmax2 < eps || Gmin_idx == -1)
916 return 1;
917
918 out_i = Gmax_idx;
919 out_j = Gmin_idx;
920 return 0;
921 }
922
be_shrunk(int i,double Gmax1,double Gmax2)923 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
924 {
925 if(is_upper_bound(i))
926 {
927 if(y[i]==+1)
928 return(-G[i] > Gmax1);
929 else
930 return(-G[i] > Gmax2);
931 }
932 else if(is_lower_bound(i))
933 {
934 if(y[i]==+1)
935 return(G[i] > Gmax2);
936 else
937 return(G[i] > Gmax1);
938 }
939 else
940 return(false);
941 }
942
do_shrinking()943 void Solver::do_shrinking()
944 {
945 int i;
946 double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) }
947 double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) }
948
949 // find maximal violating pair first
950 for(i=0;i<active_size;i++)
951 {
952 if(y[i]==+1)
953 {
954 if(!is_upper_bound(i))
955 {
956 if(-G[i] >= Gmax1)
957 Gmax1 = -G[i];
958 }
959 if(!is_lower_bound(i))
960 {
961 if(G[i] >= Gmax2)
962 Gmax2 = G[i];
963 }
964 }
965 else
966 {
967 if(!is_upper_bound(i))
968 {
969 if(-G[i] >= Gmax2)
970 Gmax2 = -G[i];
971 }
972 if(!is_lower_bound(i))
973 {
974 if(G[i] >= Gmax1)
975 Gmax1 = G[i];
976 }
977 }
978 }
979
980 if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
981 {
982 unshrink = true;
983 reconstruct_gradient();
984 active_size = l;
985 info("*");
986 }
987
988 for(i=0;i<active_size;i++)
989 if (be_shrunk(i, Gmax1, Gmax2))
990 {
991 active_size--;
992 while (active_size > i)
993 {
994 if (!be_shrunk(active_size, Gmax1, Gmax2))
995 {
996 swap_index(i,active_size);
997 break;
998 }
999 active_size--;
1000 }
1001 }
1002 }
1003
calculate_rho()1004 double Solver::calculate_rho()
1005 {
1006 double r;
1007 int nr_free = 0;
1008 double ub = INF, lb = -INF, sum_free = 0;
1009 for(int i=0;i<active_size;i++)
1010 {
1011 double yG = y[i]*G[i];
1012
1013 if(is_upper_bound(i))
1014 {
1015 if(y[i]==-1)
1016 ub = min(ub,yG);
1017 else
1018 lb = max(lb,yG);
1019 }
1020 else if(is_lower_bound(i))
1021 {
1022 if(y[i]==+1)
1023 ub = min(ub,yG);
1024 else
1025 lb = max(lb,yG);
1026 }
1027 else
1028 {
1029 ++nr_free;
1030 sum_free += yG;
1031 }
1032 }
1033
1034 if(nr_free>0)
1035 r = sum_free/nr_free;
1036 else
1037 r = (ub+lb)/2;
1038
1039 return r;
1040 }
1041
1042 //
1043 // Solver for nu-svm classification and regression
1044 //
1045 // additional constraint: e^T \alpha = constant
1046 //
1047 class Solver_NU: public Solver
1048 {
1049 public:
Solver_NU()1050 Solver_NU() {}
Solve(int l,const QMatrix & Q,const double * p,const schar * y,double * alpha,double Cp,double Cn,double eps,SolutionInfo * si,int shrinking)1051 void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
1052 double *alpha, double Cp, double Cn, double eps,
1053 SolutionInfo* si, int shrinking)
1054 {
1055 this->si = si;
1056 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
1057 }
1058 private:
1059 SolutionInfo *si;
1060 int select_working_set(int &i, int &j);
1061 double calculate_rho();
1062 bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
1063 void do_shrinking();
1064 };
1065
1066 // return 1 if already optimal, return 0 otherwise
select_working_set(int & out_i,int & out_j)1067 int Solver_NU::select_working_set(int &out_i, int &out_j)
1068 {
1069 // return i,j such that y_i = y_j and
1070 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
1071 // j: minimizes the decrease of obj value
1072 // (if quadratic coefficeint <= 0, replace it with tau)
1073 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
1074
1075 double Gmaxp = -INF;
1076 double Gmaxp2 = -INF;
1077 int Gmaxp_idx = -1;
1078
1079 double Gmaxn = -INF;
1080 double Gmaxn2 = -INF;
1081 int Gmaxn_idx = -1;
1082
1083 int Gmin_idx = -1;
1084 double obj_diff_min = INF;
1085
1086 for(int t=0;t<active_size;t++)
1087 if(y[t]==+1)
1088 {
1089 if(!is_upper_bound(t))
1090 if(-G[t] >= Gmaxp)
1091 {
1092 Gmaxp = -G[t];
1093 Gmaxp_idx = t;
1094 }
1095 }
1096 else
1097 {
1098 if(!is_lower_bound(t))
1099 if(G[t] >= Gmaxn)
1100 {
1101 Gmaxn = G[t];
1102 Gmaxn_idx = t;
1103 }
1104 }
1105
1106 int ip = Gmaxp_idx;
1107 int in = Gmaxn_idx;
1108 const Qfloat *Q_ip = NULL;
1109 const Qfloat *Q_in = NULL;
1110 if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1111 Q_ip = Q->get_Q(ip,active_size);
1112 if(in != -1)
1113 Q_in = Q->get_Q(in,active_size);
1114
1115 for(int j=0;j<active_size;j++)
1116 {
1117 if(y[j]==+1)
1118 {
1119 if (!is_lower_bound(j))
1120 {
1121 double grad_diff=Gmaxp+G[j];
1122 if (G[j] >= Gmaxp2)
1123 Gmaxp2 = G[j];
1124 if (grad_diff > 0)
1125 {
1126 double obj_diff;
1127 double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
1128 if (quad_coef > 0)
1129 obj_diff = -(grad_diff*grad_diff)/quad_coef;
1130 else
1131 obj_diff = -(grad_diff*grad_diff)/TAU;
1132
1133 if (obj_diff <= obj_diff_min)
1134 {
1135 Gmin_idx=j;
1136 obj_diff_min = obj_diff;
1137 }
1138 }
1139 }
1140 }
1141 else
1142 {
1143 if (!is_upper_bound(j))
1144 {
1145 double grad_diff=Gmaxn-G[j];
1146 if (-G[j] >= Gmaxn2)
1147 Gmaxn2 = -G[j];
1148 if (grad_diff > 0)
1149 {
1150 double obj_diff;
1151 double quad_coef = QD[in]+QD[j]-2*Q_in[j];
1152 if (quad_coef > 0)
1153 obj_diff = -(grad_diff*grad_diff)/quad_coef;
1154 else
1155 obj_diff = -(grad_diff*grad_diff)/TAU;
1156
1157 if (obj_diff <= obj_diff_min)
1158 {
1159 Gmin_idx=j;
1160 obj_diff_min = obj_diff;
1161 }
1162 }
1163 }
1164 }
1165 }
1166
1167 if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps || Gmin_idx == -1)
1168 return 1;
1169
1170 if (y[Gmin_idx] == +1)
1171 out_i = Gmaxp_idx;
1172 else
1173 out_i = Gmaxn_idx;
1174 out_j = Gmin_idx;
1175
1176 return 0;
1177 }
1178
be_shrunk(int i,double Gmax1,double Gmax2,double Gmax3,double Gmax4)1179 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
1180 {
1181 if(is_upper_bound(i))
1182 {
1183 if(y[i]==+1)
1184 return(-G[i] > Gmax1);
1185 else
1186 return(-G[i] > Gmax4);
1187 }
1188 else if(is_lower_bound(i))
1189 {
1190 if(y[i]==+1)
1191 return(G[i] > Gmax2);
1192 else
1193 return(G[i] > Gmax3);
1194 }
1195 else
1196 return(false);
1197 }
1198
do_shrinking()1199 void Solver_NU::do_shrinking()
1200 {
1201 double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
1202 double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
1203 double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
1204 double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
1205
1206 // find maximal violating pair first
1207 int i;
1208 for(i=0;i<active_size;i++)
1209 {
1210 if(!is_upper_bound(i))
1211 {
1212 if(y[i]==+1)
1213 {
1214 if(-G[i] > Gmax1) Gmax1 = -G[i];
1215 }
1216 else if(-G[i] > Gmax4) Gmax4 = -G[i];
1217 }
1218 if(!is_lower_bound(i))
1219 {
1220 if(y[i]==+1)
1221 {
1222 if(G[i] > Gmax2) Gmax2 = G[i];
1223 }
1224 else if(G[i] > Gmax3) Gmax3 = G[i];
1225 }
1226 }
1227
1228 if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
1229 {
1230 unshrink = true;
1231 reconstruct_gradient();
1232 active_size = l;
1233 }
1234
1235 for(i=0;i<active_size;i++)
1236 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1237 {
1238 active_size--;
1239 while (active_size > i)
1240 {
1241 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1242 {
1243 swap_index(i,active_size);
1244 break;
1245 }
1246 active_size--;
1247 }
1248 }
1249 }
1250
calculate_rho()1251 double Solver_NU::calculate_rho()
1252 {
1253 int nr_free1 = 0,nr_free2 = 0;
1254 double ub1 = INF, ub2 = INF;
1255 double lb1 = -INF, lb2 = -INF;
1256 double sum_free1 = 0, sum_free2 = 0;
1257
1258 for(int i=0;i<active_size;i++)
1259 {
1260 if(y[i]==+1)
1261 {
1262 if(is_upper_bound(i))
1263 lb1 = max(lb1,G[i]);
1264 else if(is_lower_bound(i))
1265 ub1 = min(ub1,G[i]);
1266 else
1267 {
1268 ++nr_free1;
1269 sum_free1 += G[i];
1270 }
1271 }
1272 else
1273 {
1274 if(is_upper_bound(i))
1275 lb2 = max(lb2,G[i]);
1276 else if(is_lower_bound(i))
1277 ub2 = min(ub2,G[i]);
1278 else
1279 {
1280 ++nr_free2;
1281 sum_free2 += G[i];
1282 }
1283 }
1284 }
1285
1286 double r1,r2;
1287 if(nr_free1 > 0)
1288 r1 = sum_free1/nr_free1;
1289 else
1290 r1 = (ub1+lb1)/2;
1291
1292 if(nr_free2 > 0)
1293 r2 = sum_free2/nr_free2;
1294 else
1295 r2 = (ub2+lb2)/2;
1296
1297 si->r = (r1+r2)/2;
1298 return (r1-r2)/2;
1299 }
1300
1301 //
1302 // Q matrices for various formulations
1303 //
1304 class SVC_Q: public Kernel
1305 {
1306 public:
SVC_Q(const svm_problem & prob,const svm_parameter & param,const schar * y_)1307 SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
1308 :Kernel(prob.l, prob.x, param)
1309 {
1310 clone(y,y_,prob.l);
1311 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1312 QD = new double[prob.l];
1313 for(int i=0;i<prob.l;i++)
1314 QD[i] = (this->*kernel_function)(i,i);
1315 }
1316
get_Q(int i,int len) const1317 Qfloat *get_Q(int i, int len) const
1318 {
1319 Qfloat *data;
1320 int start, j;
1321 if((start = cache->get_data(i,&data,len)) < len)
1322 {
1323 for(j=start;j<len;j++)
1324 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
1325 }
1326 return data;
1327 }
1328
get_QD() const1329 double *get_QD() const
1330 {
1331 return QD;
1332 }
1333
swap_index(int i,int j) const1334 void swap_index(int i, int j) const
1335 {
1336 cache->swap_index(i,j);
1337 Kernel::swap_index(i,j);
1338 swap(y[i],y[j]);
1339 swap(QD[i],QD[j]);
1340 }
1341
~SVC_Q()1342 ~SVC_Q()
1343 {
1344 delete[] y;
1345 delete cache;
1346 delete[] QD;
1347 }
1348 private:
1349 schar *y;
1350 Cache *cache;
1351 double *QD;
1352 };
1353
1354 class ONE_CLASS_Q: public Kernel
1355 {
1356 public:
ONE_CLASS_Q(const svm_problem & prob,const svm_parameter & param)1357 ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
1358 :Kernel(prob.l, prob.x, param)
1359 {
1360 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1361 QD = new double[prob.l];
1362 for(int i=0;i<prob.l;i++)
1363 QD[i] = (this->*kernel_function)(i,i);
1364 }
1365
get_Q(int i,int len) const1366 Qfloat *get_Q(int i, int len) const
1367 {
1368 Qfloat *data;
1369 int start, j;
1370 if((start = cache->get_data(i,&data,len)) < len)
1371 {
1372 for(j=start;j<len;j++)
1373 data[j] = (Qfloat)(this->*kernel_function)(i,j);
1374 }
1375 return data;
1376 }
1377
get_QD() const1378 double *get_QD() const
1379 {
1380 return QD;
1381 }
1382
swap_index(int i,int j) const1383 void swap_index(int i, int j) const
1384 {
1385 cache->swap_index(i,j);
1386 Kernel::swap_index(i,j);
1387 swap(QD[i],QD[j]);
1388 }
1389
~ONE_CLASS_Q()1390 ~ONE_CLASS_Q()
1391 {
1392 delete cache;
1393 delete[] QD;
1394 }
1395 private:
1396 Cache *cache;
1397 double *QD;
1398 };
1399
1400 class SVR_Q: public Kernel
1401 {
1402 public:
SVR_Q(const svm_problem & prob,const svm_parameter & param)1403 SVR_Q(const svm_problem& prob, const svm_parameter& param)
1404 :Kernel(prob.l, prob.x, param)
1405 {
1406 l = prob.l;
1407 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
1408 QD = new double[2*l];
1409 sign = new schar[2*l];
1410 index = new int[2*l];
1411 for(int k=0;k<l;k++)
1412 {
1413 sign[k] = 1;
1414 sign[k+l] = -1;
1415 index[k] = k;
1416 index[k+l] = k;
1417 QD[k] = (this->*kernel_function)(k,k);
1418 QD[k+l] = QD[k];
1419 }
1420 buffer[0] = new Qfloat[2*l];
1421 buffer[1] = new Qfloat[2*l];
1422 next_buffer = 0;
1423 }
1424
swap_index(int i,int j) const1425 void swap_index(int i, int j) const
1426 {
1427 swap(sign[i],sign[j]);
1428 swap(index[i],index[j]);
1429 swap(QD[i],QD[j]);
1430 }
1431
get_Q(int i,int len) const1432 Qfloat *get_Q(int i, int len) const
1433 {
1434 Qfloat *data;
1435 int j, real_i = index[i];
1436 if(cache->get_data(real_i,&data,l) < l)
1437 {
1438 for(j=0;j<l;j++)
1439 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
1440 }
1441
1442 // reorder and copy
1443 Qfloat *buf = buffer[next_buffer];
1444 next_buffer = 1 - next_buffer;
1445 schar si = sign[i];
1446 for(j=0;j<len;j++)
1447 buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
1448 return buf;
1449 }
1450
get_QD() const1451 double *get_QD() const
1452 {
1453 return QD;
1454 }
1455
~SVR_Q()1456 ~SVR_Q()
1457 {
1458 delete cache;
1459 delete[] sign;
1460 delete[] index;
1461 delete[] buffer[0];
1462 delete[] buffer[1];
1463 delete[] QD;
1464 }
1465 private:
1466 int l;
1467 Cache *cache;
1468 schar *sign;
1469 int *index;
1470 mutable int next_buffer;
1471 Qfloat *buffer[2];
1472 double *QD;
1473 };
1474
1475 //
1476 // construct and solve various formulations
1477 //
solve_c_svc(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si,double Cp,double Cn)1478 static void solve_c_svc(
1479 const svm_problem *prob, const svm_parameter* param,
1480 double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
1481 {
1482 int l = prob->l;
1483 double *minus_ones = new double[l];
1484 schar *y = new schar[l];
1485
1486 int i;
1487
1488 for(i=0;i<l;i++)
1489 {
1490 alpha[i] = 0;
1491 minus_ones[i] = -1;
1492 if(prob->y[i] > 0) y[i] = +1; else y[i] = -1;
1493 }
1494
1495 Solver s;
1496 s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
1497 alpha, Cp, Cn, param->eps, si, param->shrinking);
1498
1499 double sum_alpha=0;
1500 for(i=0;i<l;i++)
1501 sum_alpha += alpha[i];
1502
1503 if (Cp==Cn)
1504 info("nu = %f\n", sum_alpha/(Cp*prob->l));
1505
1506 for(i=0;i<l;i++)
1507 alpha[i] *= y[i];
1508
1509 delete[] minus_ones;
1510 delete[] y;
1511 }
1512
solve_nu_svc(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si)1513 static void solve_nu_svc(
1514 const svm_problem *prob, const svm_parameter *param,
1515 double *alpha, Solver::SolutionInfo* si)
1516 {
1517 int i;
1518 int l = prob->l;
1519 double nu = param->nu;
1520
1521 schar *y = new schar[l];
1522
1523 for(i=0;i<l;i++)
1524 if(prob->y[i]>0)
1525 y[i] = +1;
1526 else
1527 y[i] = -1;
1528
1529 double sum_pos = nu*l/2;
1530 double sum_neg = nu*l/2;
1531
1532 for(i=0;i<l;i++)
1533 if(y[i] == +1)
1534 {
1535 alpha[i] = min(1.0,sum_pos);
1536 sum_pos -= alpha[i];
1537 }
1538 else
1539 {
1540 alpha[i] = min(1.0,sum_neg);
1541 sum_neg -= alpha[i];
1542 }
1543
1544 double *zeros = new double[l];
1545
1546 for(i=0;i<l;i++)
1547 zeros[i] = 0;
1548
1549 Solver_NU s;
1550 s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
1551 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
1552 double r = si->r;
1553
1554 info("C = %f\n",1/r);
1555
1556 for(i=0;i<l;i++)
1557 alpha[i] *= y[i]/r;
1558
1559 si->rho /= r;
1560 si->obj /= (r*r);
1561 si->upper_bound_p = 1/r;
1562 si->upper_bound_n = 1/r;
1563
1564 delete[] y;
1565 delete[] zeros;
1566 }
1567
solve_one_class(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si)1568 static void solve_one_class(
1569 const svm_problem *prob, const svm_parameter *param,
1570 double *alpha, Solver::SolutionInfo* si)
1571 {
1572 int l = prob->l;
1573 double *zeros = new double[l];
1574 schar *ones = new schar[l];
1575 int i;
1576
1577 int n = (int)(param->nu*prob->l); // # of alpha's at upper bound
1578
1579 for(i=0;i<n;i++)
1580 alpha[i] = 1;
1581 if(n<prob->l)
1582 alpha[n] = param->nu * prob->l - n;
1583 for(i=n+1;i<l;i++)
1584 alpha[i] = 0;
1585
1586 for(i=0;i<l;i++)
1587 {
1588 zeros[i] = 0;
1589 ones[i] = 1;
1590 }
1591
1592 Solver s;
1593 s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
1594 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
1595
1596 delete[] zeros;
1597 delete[] ones;
1598 }
1599
solve_epsilon_svr(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si)1600 static void solve_epsilon_svr(
1601 const svm_problem *prob, const svm_parameter *param,
1602 double *alpha, Solver::SolutionInfo* si)
1603 {
1604 int l = prob->l;
1605 double *alpha2 = new double[2*l];
1606 double *linear_term = new double[2*l];
1607 schar *y = new schar[2*l];
1608 int i;
1609
1610 for(i=0;i<l;i++)
1611 {
1612 alpha2[i] = 0;
1613 linear_term[i] = param->p - prob->y[i];
1614 y[i] = 1;
1615
1616 alpha2[i+l] = 0;
1617 linear_term[i+l] = param->p + prob->y[i];
1618 y[i+l] = -1;
1619 }
1620
1621 Solver s;
1622 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1623 alpha2, param->C, param->C, param->eps, si, param->shrinking);
1624
1625 double sum_alpha = 0;
1626 for(i=0;i<l;i++)
1627 {
1628 alpha[i] = alpha2[i] - alpha2[i+l];
1629 sum_alpha += fabs(alpha[i]);
1630 }
1631 info("nu = %f\n",sum_alpha/(param->C*l));
1632
1633 delete[] alpha2;
1634 delete[] linear_term;
1635 delete[] y;
1636 }
1637
solve_nu_svr(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si)1638 static void solve_nu_svr(
1639 const svm_problem *prob, const svm_parameter *param,
1640 double *alpha, Solver::SolutionInfo* si)
1641 {
1642 int l = prob->l;
1643 double C = param->C;
1644 double *alpha2 = new double[2*l];
1645 double *linear_term = new double[2*l];
1646 schar *y = new schar[2*l];
1647 int i;
1648
1649 double sum = C * param->nu * l / 2;
1650 for(i=0;i<l;i++)
1651 {
1652 alpha2[i] = alpha2[i+l] = min(sum,C);
1653 sum -= alpha2[i];
1654
1655 linear_term[i] = - prob->y[i];
1656 y[i] = 1;
1657
1658 linear_term[i+l] = prob->y[i];
1659 y[i+l] = -1;
1660 }
1661
1662 Solver_NU s;
1663 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1664 alpha2, C, C, param->eps, si, param->shrinking);
1665
1666 info("epsilon = %f\n",-si->r);
1667
1668 for(i=0;i<l;i++)
1669 alpha[i] = alpha2[i] - alpha2[i+l];
1670
1671 delete[] alpha2;
1672 delete[] linear_term;
1673 delete[] y;
1674 }
1675
1676 //
1677 // decision_function
1678 //
1679 struct decision_function
1680 {
1681 double *alpha;
1682 double rho;
1683 };
1684
svm_train_one(const svm_problem * prob,const svm_parameter * param,double Cp,double Cn)1685 static decision_function svm_train_one(
1686 const svm_problem *prob, const svm_parameter *param,
1687 double Cp, double Cn)
1688 {
1689 double *alpha = Malloc(double,prob->l);
1690 Solver::SolutionInfo si;
1691 switch(param->svm_type)
1692 {
1693 case C_SVC:
1694 solve_c_svc(prob,param,alpha,&si,Cp,Cn);
1695 break;
1696 case NU_SVC:
1697 solve_nu_svc(prob,param,alpha,&si);
1698 break;
1699 case ONE_CLASS:
1700 solve_one_class(prob,param,alpha,&si);
1701 break;
1702 case EPSILON_SVR:
1703 solve_epsilon_svr(prob,param,alpha,&si);
1704 break;
1705 case NU_SVR:
1706 solve_nu_svr(prob,param,alpha,&si);
1707 break;
1708 }
1709
1710 info("obj = %f, rho = %f\n",si.obj,si.rho);
1711
1712 // output SVs
1713
1714 int nSV = 0;
1715 int nBSV = 0;
1716 for(int i=0;i<prob->l;i++)
1717 {
1718 if(fabs(alpha[i]) > 0)
1719 {
1720 ++nSV;
1721 if(prob->y[i] > 0)
1722 {
1723 if(fabs(alpha[i]) >= si.upper_bound_p)
1724 ++nBSV;
1725 }
1726 else
1727 {
1728 if(fabs(alpha[i]) >= si.upper_bound_n)
1729 ++nBSV;
1730 }
1731 }
1732 }
1733
1734 info("nSV = %d, nBSV = %d\n",nSV,nBSV);
1735
1736 decision_function f;
1737 f.alpha = alpha;
1738 f.rho = si.rho;
1739 return f;
1740 }
1741
1742 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
sigmoid_train(int l,const double * dec_values,const double * labels,double & A,double & B)1743 static void sigmoid_train(
1744 int l, const double *dec_values, const double *labels,
1745 double& A, double& B)
1746 {
1747 double prior1=0, prior0 = 0;
1748 int i;
1749
1750 for (i=0;i<l;i++)
1751 if (labels[i] > 0) prior1+=1;
1752 else prior0+=1;
1753
1754 int max_iter=100; // Maximal number of iterations
1755 double min_step=1e-10; // Minimal step taken in line search
1756 double sigma=1e-12; // For numerically strict PD of Hessian
1757 double eps=1e-5;
1758 double hiTarget=(prior1+1.0)/(prior1+2.0);
1759 double loTarget=1/(prior0+2.0);
1760 double *t=Malloc(double,l);
1761 double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
1762 double newA,newB,newf,d1,d2;
1763 int iter;
1764
1765 // Initial Point and Initial Fun Value
1766 A=0.0; B=log((prior0+1.0)/(prior1+1.0));
1767 double fval = 0.0;
1768
1769 for (i=0;i<l;i++)
1770 {
1771 if (labels[i]>0) t[i]=hiTarget;
1772 else t[i]=loTarget;
1773 fApB = dec_values[i]*A+B;
1774 if (fApB>=0)
1775 fval += t[i]*fApB + log(1+exp(-fApB));
1776 else
1777 fval += (t[i] - 1)*fApB +log(1+exp(fApB));
1778 }
1779 for (iter=0;iter<max_iter;iter++)
1780 {
1781 // Update Gradient and Hessian (use H' = H + sigma I)
1782 h11=sigma; // numerically ensures strict PD
1783 h22=sigma;
1784 h21=0.0;g1=0.0;g2=0.0;
1785 for (i=0;i<l;i++)
1786 {
1787 fApB = dec_values[i]*A+B;
1788 if (fApB >= 0)
1789 {
1790 p=exp(-fApB)/(1.0+exp(-fApB));
1791 q=1.0/(1.0+exp(-fApB));
1792 }
1793 else
1794 {
1795 p=1.0/(1.0+exp(fApB));
1796 q=exp(fApB)/(1.0+exp(fApB));
1797 }
1798 d2=p*q;
1799 h11+=dec_values[i]*dec_values[i]*d2;
1800 h22+=d2;
1801 h21+=dec_values[i]*d2;
1802 d1=t[i]-p;
1803 g1+=dec_values[i]*d1;
1804 g2+=d1;
1805 }
1806
1807 // Stopping Criteria
1808 if (fabs(g1)<eps && fabs(g2)<eps)
1809 break;
1810
1811 // Finding Newton direction: -inv(H') * g
1812 det=h11*h22-h21*h21;
1813 dA=-(h22*g1 - h21 * g2) / det;
1814 dB=-(-h21*g1+ h11 * g2) / det;
1815 gd=g1*dA+g2*dB;
1816
1817
1818 stepsize = 1; // Line Search
1819 while (stepsize >= min_step)
1820 {
1821 newA = A + stepsize * dA;
1822 newB = B + stepsize * dB;
1823
1824 // New function value
1825 newf = 0.0;
1826 for (i=0;i<l;i++)
1827 {
1828 fApB = dec_values[i]*newA+newB;
1829 if (fApB >= 0)
1830 newf += t[i]*fApB + log(1+exp(-fApB));
1831 else
1832 newf += (t[i] - 1)*fApB +log(1+exp(fApB));
1833 }
1834 // Check sufficient decrease
1835 if (newf<fval+0.0001*stepsize*gd)
1836 {
1837 A=newA;B=newB;fval=newf;
1838 break;
1839 }
1840 else
1841 stepsize = stepsize / 2.0;
1842 }
1843
1844 if (stepsize < min_step)
1845 {
1846 info("Line search fails in two-class probability estimates\n");
1847 break;
1848 }
1849 }
1850
1851 if (iter>=max_iter)
1852 info("Reaching maximal iterations in two-class probability estimates\n");
1853 free(t);
1854 }
1855
sigmoid_predict(double decision_value,double A,double B)1856 static double sigmoid_predict(double decision_value, double A, double B)
1857 {
1858 double fApB = decision_value*A+B;
1859 // 1-p used later; avoid catastrophic cancellation
1860 if (fApB >= 0)
1861 return exp(-fApB)/(1.0+exp(-fApB));
1862 else
1863 return 1.0/(1+exp(fApB)) ;
1864 }
1865
1866 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
multiclass_probability(int k,double ** r,double * p)1867 static void multiclass_probability(int k, double **r, double *p)
1868 {
1869 int t,j;
1870 int iter = 0, max_iter=max(100,k);
1871 double **Q=Malloc(double *,k);
1872 double *Qp=Malloc(double,k);
1873 double pQp, eps=0.005/k;
1874
1875 for (t=0;t<k;t++)
1876 {
1877 p[t]=1.0/k; // Valid if k = 1
1878 Q[t]=Malloc(double,k);
1879 Q[t][t]=0;
1880 for (j=0;j<t;j++)
1881 {
1882 Q[t][t]+=r[j][t]*r[j][t];
1883 Q[t][j]=Q[j][t];
1884 }
1885 for (j=t+1;j<k;j++)
1886 {
1887 Q[t][t]+=r[j][t]*r[j][t];
1888 Q[t][j]=-r[j][t]*r[t][j];
1889 }
1890 }
1891 for (iter=0;iter<max_iter;iter++)
1892 {
1893 // stopping condition, recalculate QP,pQP for numerical accuracy
1894 pQp=0;
1895 for (t=0;t<k;t++)
1896 {
1897 Qp[t]=0;
1898 for (j=0;j<k;j++)
1899 Qp[t]+=Q[t][j]*p[j];
1900 pQp+=p[t]*Qp[t];
1901 }
1902 double max_error=0;
1903 for (t=0;t<k;t++)
1904 {
1905 double error=fabs(Qp[t]-pQp);
1906 if (error>max_error)
1907 max_error=error;
1908 }
1909 if (max_error<eps) break;
1910
1911 for (t=0;t<k;t++)
1912 {
1913 double diff=(-Qp[t]+pQp)/Q[t][t];
1914 p[t]+=diff;
1915 pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
1916 for (j=0;j<k;j++)
1917 {
1918 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
1919 p[j]/=(1+diff);
1920 }
1921 }
1922 }
1923 if (iter>=max_iter)
1924 info("Exceeds max_iter in multiclass_prob\n");
1925 for(t=0;t<k;t++) free(Q[t]);
1926 free(Q);
1927 free(Qp);
1928 }
1929
1930 // Cross-validation decision values for probability estimates
svm_binary_svc_probability(const svm_problem * prob,const svm_parameter * param,double Cp,double Cn,double & probA,double & probB)1931 static void svm_binary_svc_probability(
1932 const svm_problem *prob, const svm_parameter *param,
1933 double Cp, double Cn, double& probA, double& probB)
1934 {
1935 int i;
1936 int nr_fold = 5;
1937 int *perm = Malloc(int,prob->l);
1938 double *dec_values = Malloc(double,prob->l);
1939
1940 // random shuffle
1941 for(i=0;i<prob->l;i++) perm[i]=i;
1942 for(i=0;i<prob->l;i++)
1943 {
1944 int j = i+rand()%(prob->l-i);
1945 swap(perm[i],perm[j]);
1946 }
1947 for(i=0;i<nr_fold;i++)
1948 {
1949 int begin = i*prob->l/nr_fold;
1950 int end = (i+1)*prob->l/nr_fold;
1951 int j,k;
1952 struct svm_problem subprob;
1953
1954 subprob.l = prob->l-(end-begin);
1955 subprob.x = Malloc(struct svm_node*,subprob.l);
1956 subprob.y = Malloc(double,subprob.l);
1957
1958 k=0;
1959 for(j=0;j<begin;j++)
1960 {
1961 subprob.x[k] = prob->x[perm[j]];
1962 subprob.y[k] = prob->y[perm[j]];
1963 ++k;
1964 }
1965 for(j=end;j<prob->l;j++)
1966 {
1967 subprob.x[k] = prob->x[perm[j]];
1968 subprob.y[k] = prob->y[perm[j]];
1969 ++k;
1970 }
1971 int p_count=0,n_count=0;
1972 for(j=0;j<k;j++)
1973 if(subprob.y[j]>0)
1974 p_count++;
1975 else
1976 n_count++;
1977
1978 if(p_count==0 && n_count==0)
1979 for(j=begin;j<end;j++)
1980 dec_values[perm[j]] = 0;
1981 else if(p_count > 0 && n_count == 0)
1982 for(j=begin;j<end;j++)
1983 dec_values[perm[j]] = 1;
1984 else if(p_count == 0 && n_count > 0)
1985 for(j=begin;j<end;j++)
1986 dec_values[perm[j]] = -1;
1987 else
1988 {
1989 svm_parameter subparam = *param;
1990 subparam.probability=0;
1991 subparam.C=1.0;
1992 subparam.nr_weight=2;
1993 subparam.weight_label = Malloc(int,2);
1994 subparam.weight = Malloc(double,2);
1995 subparam.weight_label[0]=+1;
1996 subparam.weight_label[1]=-1;
1997 subparam.weight[0]=Cp;
1998 subparam.weight[1]=Cn;
1999 struct svm_model *submodel = svm_train(&subprob,&subparam);
2000 for(j=begin;j<end;j++)
2001 {
2002 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
2003 // ensure +1 -1 order; reason not using CV subroutine
2004 dec_values[perm[j]] *= submodel->label[0];
2005 }
2006 svm_free_and_destroy_model(&submodel);
2007 svm_destroy_param(&subparam);
2008 }
2009 free(subprob.x);
2010 free(subprob.y);
2011 }
2012 sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
2013 free(dec_values);
2014 free(perm);
2015 }
2016
2017 // Return parameter of a Laplace distribution
svm_svr_probability(const svm_problem * prob,const svm_parameter * param)2018 static double svm_svr_probability(
2019 const svm_problem *prob, const svm_parameter *param)
2020 {
2021 int i;
2022 int nr_fold = 5;
2023 double *ymv = Malloc(double,prob->l);
2024 double mae = 0;
2025
2026 svm_parameter newparam = *param;
2027 newparam.probability = 0;
2028 svm_cross_validation(prob,&newparam,nr_fold,ymv);
2029 for(i=0;i<prob->l;i++)
2030 {
2031 ymv[i]=prob->y[i]-ymv[i];
2032 mae += fabs(ymv[i]);
2033 }
2034 mae /= prob->l;
2035 double std=sqrt(2*mae*mae);
2036 int count=0;
2037 mae=0;
2038 for(i=0;i<prob->l;i++)
2039 if (fabs(ymv[i]) > 5*std)
2040 count=count+1;
2041 else
2042 mae+=fabs(ymv[i]);
2043 mae /= (prob->l-count);
2044 info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
2045 free(ymv);
2046 return mae;
2047 }
2048
2049
2050 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2051 // perm, length l, must be allocated before calling this subroutine
svm_group_classes(const svm_problem * prob,int * nr_class_ret,int ** label_ret,int ** start_ret,int ** count_ret,int * perm)2052 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2053 {
2054 int l = prob->l;
2055 int max_nr_class = 16;
2056 int nr_class = 0;
2057 int *label = Malloc(int,max_nr_class);
2058 int *count = Malloc(int,max_nr_class);
2059 int *data_label = Malloc(int,l);
2060 int i;
2061
2062 for(i=0;i<l;i++)
2063 {
2064 int this_label = (int)prob->y[i];
2065 int j;
2066 for(j=0;j<nr_class;j++)
2067 {
2068 if(this_label == label[j])
2069 {
2070 ++count[j];
2071 break;
2072 }
2073 }
2074 data_label[i] = j;
2075 if(j == nr_class)
2076 {
2077 if(nr_class == max_nr_class)
2078 {
2079 max_nr_class *= 2;
2080 label = (int *)realloc(label,max_nr_class*sizeof(int));
2081 count = (int *)realloc(count,max_nr_class*sizeof(int));
2082 }
2083 label[nr_class] = this_label;
2084 count[nr_class] = 1;
2085 ++nr_class;
2086 }
2087 }
2088
2089 //
2090 // Labels are ordered by their first occurrence in the training set.
2091 // However, for two-class sets with -1/+1 labels and -1 appears first,
2092 // we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
2093 //
2094 if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2095 {
2096 swap(label[0],label[1]);
2097 swap(count[0],count[1]);
2098 for(i=0;i<l;i++)
2099 {
2100 if(data_label[i] == 0)
2101 data_label[i] = 1;
2102 else
2103 data_label[i] = 0;
2104 }
2105 }
2106
2107 int *start = Malloc(int,nr_class);
2108 start[0] = 0;
2109 for(i=1;i<nr_class;i++)
2110 start[i] = start[i-1]+count[i-1];
2111 for(i=0;i<l;i++)
2112 {
2113 perm[start[data_label[i]]] = i;
2114 ++start[data_label[i]];
2115 }
2116 start[0] = 0;
2117 for(i=1;i<nr_class;i++)
2118 start[i] = start[i-1]+count[i-1];
2119
2120 *nr_class_ret = nr_class;
2121 *label_ret = label;
2122 *start_ret = start;
2123 *count_ret = count;
2124 free(data_label);
2125 }
2126
2127 //
2128 // Interface functions
2129 //
svm_train(const svm_problem * prob,const svm_parameter * param)2130 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2131 {
2132 svm_model *model = Malloc(svm_model,1);
2133 model->param = *param;
2134 model->free_sv = 0; // XXX
2135
2136 if(param->svm_type == ONE_CLASS ||
2137 param->svm_type == EPSILON_SVR ||
2138 param->svm_type == NU_SVR)
2139 {
2140 // regression or one-class-svm
2141 model->nr_class = 2;
2142 model->label = NULL;
2143 model->nSV = NULL;
2144 model->probA = NULL; model->probB = NULL;
2145 model->sv_coef = Malloc(double *,1);
2146
2147 if(param->probability &&
2148 (param->svm_type == EPSILON_SVR ||
2149 param->svm_type == NU_SVR))
2150 {
2151 model->probA = Malloc(double,1);
2152 model->probA[0] = svm_svr_probability(prob,param);
2153 }
2154
2155 decision_function f = svm_train_one(prob,param,0,0);
2156 model->rho = Malloc(double,1);
2157 model->rho[0] = f.rho;
2158
2159 int nSV = 0;
2160 int i;
2161 for(i=0;i<prob->l;i++)
2162 if(fabs(f.alpha[i]) > 0) ++nSV;
2163 model->l = nSV;
2164 model->SV = Malloc(svm_node *,nSV);
2165 model->sv_coef[0] = Malloc(double,nSV);
2166 model->sv_indices = Malloc(int,nSV);
2167 int j = 0;
2168 for(i=0;i<prob->l;i++)
2169 if(fabs(f.alpha[i]) > 0)
2170 {
2171 model->SV[j] = prob->x[i];
2172 model->sv_coef[0][j] = f.alpha[i];
2173 model->sv_indices[j] = i+1;
2174 ++j;
2175 }
2176
2177 free(f.alpha);
2178 }
2179 else
2180 {
2181 // classification
2182 int l = prob->l;
2183 int nr_class;
2184 int *label = NULL;
2185 int *start = NULL;
2186 int *count = NULL;
2187 int *perm = Malloc(int,l);
2188
2189 // group training data of the same class
2190 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2191 if(nr_class == 1)
2192 info("WARNING: training data in only one class. See README for details.\n");
2193
2194 svm_node **x = Malloc(svm_node *,l);
2195 int i;
2196 for(i=0;i<l;i++)
2197 x[i] = prob->x[perm[i]];
2198
2199 // calculate weighted C
2200
2201 double *weighted_C = Malloc(double, nr_class);
2202 for(i=0;i<nr_class;i++)
2203 weighted_C[i] = param->C;
2204 for(i=0;i<param->nr_weight;i++)
2205 {
2206 int j;
2207 for(j=0;j<nr_class;j++)
2208 if(param->weight_label[i] == label[j])
2209 break;
2210 if(j == nr_class)
2211 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2212 else
2213 weighted_C[j] *= param->weight[i];
2214 }
2215
2216 // train k*(k-1)/2 models
2217
2218 bool *nonzero = Malloc(bool,l);
2219 for(i=0;i<l;i++)
2220 nonzero[i] = false;
2221 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
2222
2223 double *probA=NULL,*probB=NULL;
2224 if (param->probability)
2225 {
2226 probA=Malloc(double,nr_class*(nr_class-1)/2);
2227 probB=Malloc(double,nr_class*(nr_class-1)/2);
2228 }
2229
2230 int p = 0;
2231 for(i=0;i<nr_class;i++)
2232 for(int j=i+1;j<nr_class;j++)
2233 {
2234 svm_problem sub_prob;
2235 int si = start[i], sj = start[j];
2236 int ci = count[i], cj = count[j];
2237 sub_prob.l = ci+cj;
2238 sub_prob.x = Malloc(svm_node *,sub_prob.l);
2239 sub_prob.y = Malloc(double,sub_prob.l);
2240 int k;
2241 for(k=0;k<ci;k++)
2242 {
2243 sub_prob.x[k] = x[si+k];
2244 sub_prob.y[k] = +1;
2245 }
2246 for(k=0;k<cj;k++)
2247 {
2248 sub_prob.x[ci+k] = x[sj+k];
2249 sub_prob.y[ci+k] = -1;
2250 }
2251
2252 if(param->probability)
2253 svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
2254
2255 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2256 for(k=0;k<ci;k++)
2257 if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
2258 nonzero[si+k] = true;
2259 for(k=0;k<cj;k++)
2260 if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
2261 nonzero[sj+k] = true;
2262 free(sub_prob.x);
2263 free(sub_prob.y);
2264 ++p;
2265 }
2266
2267 // build output
2268
2269 model->nr_class = nr_class;
2270
2271 model->label = Malloc(int,nr_class);
2272 for(i=0;i<nr_class;i++)
2273 model->label[i] = label[i];
2274
2275 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
2276 for(i=0;i<nr_class*(nr_class-1)/2;i++)
2277 model->rho[i] = f[i].rho;
2278
2279 if(param->probability)
2280 {
2281 model->probA = Malloc(double,nr_class*(nr_class-1)/2);
2282 model->probB = Malloc(double,nr_class*(nr_class-1)/2);
2283 for(i=0;i<nr_class*(nr_class-1)/2;i++)
2284 {
2285 model->probA[i] = probA[i];
2286 model->probB[i] = probB[i];
2287 }
2288 }
2289 else
2290 {
2291 model->probA=NULL;
2292 model->probB=NULL;
2293 }
2294
2295 int total_sv = 0;
2296 int *nz_count = Malloc(int,nr_class);
2297 model->nSV = Malloc(int,nr_class);
2298 for(i=0;i<nr_class;i++)
2299 {
2300 int nSV = 0;
2301 for(int j=0;j<count[i];j++)
2302 if(nonzero[start[i]+j])
2303 {
2304 ++nSV;
2305 ++total_sv;
2306 }
2307 model->nSV[i] = nSV;
2308 nz_count[i] = nSV;
2309 }
2310
2311 info("Total nSV = %d\n",total_sv);
2312
2313 model->l = total_sv;
2314 model->SV = Malloc(svm_node *,total_sv);
2315 model->sv_indices = Malloc(int,total_sv);
2316 p = 0;
2317 for(i=0;i<l;i++)
2318 if(nonzero[i])
2319 {
2320 model->SV[p] = x[i];
2321 model->sv_indices[p++] = perm[i] + 1;
2322 }
2323
2324 int *nz_start = Malloc(int,nr_class);
2325 nz_start[0] = 0;
2326 for(i=1;i<nr_class;i++)
2327 nz_start[i] = nz_start[i-1]+nz_count[i-1];
2328
2329 model->sv_coef = Malloc(double *,nr_class-1);
2330 for(i=0;i<nr_class-1;i++)
2331 model->sv_coef[i] = Malloc(double,total_sv);
2332
2333 p = 0;
2334 for(i=0;i<nr_class;i++)
2335 for(int j=i+1;j<nr_class;j++)
2336 {
2337 // classifier (i,j): coefficients with
2338 // i are in sv_coef[j-1][nz_start[i]...],
2339 // j are in sv_coef[i][nz_start[j]...]
2340
2341 int si = start[i];
2342 int sj = start[j];
2343 int ci = count[i];
2344 int cj = count[j];
2345
2346 int q = nz_start[i];
2347 int k;
2348 for(k=0;k<ci;k++)
2349 if(nonzero[si+k])
2350 model->sv_coef[j-1][q++] = f[p].alpha[k];
2351 q = nz_start[j];
2352 for(k=0;k<cj;k++)
2353 if(nonzero[sj+k])
2354 model->sv_coef[i][q++] = f[p].alpha[ci+k];
2355 ++p;
2356 }
2357
2358 free(label);
2359 free(probA);
2360 free(probB);
2361 free(count);
2362 free(perm);
2363 free(start);
2364 free(x);
2365 free(weighted_C);
2366 free(nonzero);
2367 for(i=0;i<nr_class*(nr_class-1)/2;i++)
2368 free(f[i].alpha);
2369 free(f);
2370 free(nz_count);
2371 free(nz_start);
2372 }
2373 return model;
2374 }
2375
2376 // Stratified cross validation
svm_cross_validation(const svm_problem * prob,const svm_parameter * param,int nr_fold,double * target)2377 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
2378 {
2379 int i;
2380 int *fold_start;
2381 int l = prob->l;
2382 int *perm = Malloc(int,l);
2383 int nr_class;
2384 if (nr_fold > l)
2385 {
2386 nr_fold = l;
2387 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2388 }
2389 fold_start = Malloc(int,nr_fold+1);
2390 // stratified cv may not give leave-one-out rate
2391 // Each class to l folds -> some folds may have zero elements
2392 if((param->svm_type == C_SVC ||
2393 param->svm_type == NU_SVC) && nr_fold < l)
2394 {
2395 int *start = NULL;
2396 int *label = NULL;
2397 int *count = NULL;
2398 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2399
2400 // random shuffle and then data grouped by fold using the array perm
2401 int *fold_count = Malloc(int,nr_fold);
2402 int c;
2403 int *index = Malloc(int,l);
2404 for(i=0;i<l;i++)
2405 index[i]=perm[i];
2406 for (c=0; c<nr_class; c++)
2407 for(i=0;i<count[c];i++)
2408 {
2409 int j = i+rand()%(count[c]-i);
2410 swap(index[start[c]+j],index[start[c]+i]);
2411 }
2412 for(i=0;i<nr_fold;i++)
2413 {
2414 fold_count[i] = 0;
2415 for (c=0; c<nr_class;c++)
2416 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
2417 }
2418 fold_start[0]=0;
2419 for (i=1;i<=nr_fold;i++)
2420 fold_start[i] = fold_start[i-1]+fold_count[i-1];
2421 for (c=0; c<nr_class;c++)
2422 for(i=0;i<nr_fold;i++)
2423 {
2424 int begin = start[c]+i*count[c]/nr_fold;
2425 int end = start[c]+(i+1)*count[c]/nr_fold;
2426 for(int j=begin;j<end;j++)
2427 {
2428 perm[fold_start[i]] = index[j];
2429 fold_start[i]++;
2430 }
2431 }
2432 fold_start[0]=0;
2433 for (i=1;i<=nr_fold;i++)
2434 fold_start[i] = fold_start[i-1]+fold_count[i-1];
2435 free(start);
2436 free(label);
2437 free(count);
2438 free(index);
2439 free(fold_count);
2440 }
2441 else
2442 {
2443 for(i=0;i<l;i++) perm[i]=i;
2444 for(i=0;i<l;i++)
2445 {
2446 int j = i+rand()%(l-i);
2447 swap(perm[i],perm[j]);
2448 }
2449 for(i=0;i<=nr_fold;i++)
2450 fold_start[i]=i*l/nr_fold;
2451 }
2452
2453 for(i=0;i<nr_fold;i++)
2454 {
2455 int begin = fold_start[i];
2456 int end = fold_start[i+1];
2457 int j,k;
2458 struct svm_problem subprob;
2459
2460 subprob.l = l-(end-begin);
2461 subprob.x = Malloc(struct svm_node*,subprob.l);
2462 subprob.y = Malloc(double,subprob.l);
2463
2464 k=0;
2465 for(j=0;j<begin;j++)
2466 {
2467 subprob.x[k] = prob->x[perm[j]];
2468 subprob.y[k] = prob->y[perm[j]];
2469 ++k;
2470 }
2471 for(j=end;j<l;j++)
2472 {
2473 subprob.x[k] = prob->x[perm[j]];
2474 subprob.y[k] = prob->y[perm[j]];
2475 ++k;
2476 }
2477 struct svm_model *submodel = svm_train(&subprob,param);
2478 if(param->probability &&
2479 (param->svm_type == C_SVC || param->svm_type == NU_SVC))
2480 {
2481 double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
2482 for(j=begin;j<end;j++)
2483 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
2484 free(prob_estimates);
2485 }
2486 else
2487 for(j=begin;j<end;j++)
2488 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
2489 svm_free_and_destroy_model(&submodel);
2490 free(subprob.x);
2491 free(subprob.y);
2492 }
2493 free(fold_start);
2494 free(perm);
2495 }
2496
2497
svm_get_svm_type(const svm_model * model)2498 int svm_get_svm_type(const svm_model *model)
2499 {
2500 return model->param.svm_type;
2501 }
2502
svm_get_nr_class(const svm_model * model)2503 int svm_get_nr_class(const svm_model *model)
2504 {
2505 return model->nr_class;
2506 }
2507
svm_get_labels(const svm_model * model,int * label)2508 void svm_get_labels(const svm_model *model, int* label)
2509 {
2510 if (model->label != NULL)
2511 for(int i=0;i<model->nr_class;i++)
2512 label[i] = model->label[i];
2513 }
2514
svm_get_sv_indices(const svm_model * model,int * indices)2515 void svm_get_sv_indices(const svm_model *model, int* indices)
2516 {
2517 if (model->sv_indices != NULL)
2518 for(int i=0;i<model->l;i++)
2519 indices[i] = model->sv_indices[i];
2520 }
2521
svm_get_nr_sv(const svm_model * model)2522 int svm_get_nr_sv(const svm_model *model)
2523 {
2524 return model->l;
2525 }
2526
svm_get_svr_probability(const svm_model * model)2527 double svm_get_svr_probability(const svm_model *model)
2528 {
2529 if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
2530 model->probA!=NULL)
2531 return model->probA[0];
2532 else
2533 {
2534 fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
2535 return 0;
2536 }
2537 }
2538
svm_predict_values(const svm_model * model,const svm_node * x,double * dec_values)2539 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
2540 {
2541 int i;
2542 if(model->param.svm_type == ONE_CLASS ||
2543 model->param.svm_type == EPSILON_SVR ||
2544 model->param.svm_type == NU_SVR)
2545 {
2546 double *sv_coef = model->sv_coef[0];
2547 double sum = 0;
2548 for(i=0;i<model->l;i++)
2549 sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
2550 sum -= model->rho[0];
2551 *dec_values = sum;
2552
2553 if(model->param.svm_type == ONE_CLASS)
2554 return (sum>0)?1:-1;
2555 else
2556 return sum;
2557 }
2558 else
2559 {
2560 int nr_class = model->nr_class;
2561 int l = model->l;
2562
2563 double *kvalue = Malloc(double,l);
2564 for(i=0;i<l;i++)
2565 kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
2566
2567 int *start = Malloc(int,nr_class);
2568 start[0] = 0;
2569 for(i=1;i<nr_class;i++)
2570 start[i] = start[i-1]+model->nSV[i-1];
2571
2572 int *vote = Malloc(int,nr_class);
2573 for(i=0;i<nr_class;i++)
2574 vote[i] = 0;
2575
2576 int p=0;
2577 for(i=0;i<nr_class;i++)
2578 for(int j=i+1;j<nr_class;j++)
2579 {
2580 double sum = 0;
2581 int si = start[i];
2582 int sj = start[j];
2583 int ci = model->nSV[i];
2584 int cj = model->nSV[j];
2585
2586 int k;
2587 double *coef1 = model->sv_coef[j-1];
2588 double *coef2 = model->sv_coef[i];
2589 for(k=0;k<ci;k++)
2590 sum += coef1[si+k] * kvalue[si+k];
2591 for(k=0;k<cj;k++)
2592 sum += coef2[sj+k] * kvalue[sj+k];
2593 sum -= model->rho[p];
2594 dec_values[p] = sum;
2595
2596 if(dec_values[p] > 0)
2597 ++vote[i];
2598 else
2599 ++vote[j];
2600 p++;
2601 }
2602
2603 int vote_max_idx = 0;
2604 for(i=1;i<nr_class;i++)
2605 if(vote[i] > vote[vote_max_idx])
2606 vote_max_idx = i;
2607
2608 free(kvalue);
2609 free(start);
2610 free(vote);
2611 return model->label[vote_max_idx];
2612 }
2613 }
2614
svm_predict(const svm_model * model,const svm_node * x)2615 double svm_predict(const svm_model *model, const svm_node *x)
2616 {
2617 int nr_class = model->nr_class;
2618 double *dec_values;
2619 if(model->param.svm_type == ONE_CLASS ||
2620 model->param.svm_type == EPSILON_SVR ||
2621 model->param.svm_type == NU_SVR)
2622 dec_values = Malloc(double, 1);
2623 else
2624 dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2625 double pred_result = svm_predict_values(model, x, dec_values);
2626 free(dec_values);
2627 return pred_result;
2628 }
2629
svm_predict_probability(const svm_model * model,const svm_node * x,double * prob_estimates)2630 double svm_predict_probability(
2631 const svm_model *model, const svm_node *x, double *prob_estimates)
2632 {
2633 if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
2634 model->probA!=NULL && model->probB!=NULL)
2635 {
2636 int i;
2637 int nr_class = model->nr_class;
2638 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2639 svm_predict_values(model, x, dec_values);
2640
2641 double min_prob=1e-7;
2642 double **pairwise_prob=Malloc(double *,nr_class);
2643 for(i=0;i<nr_class;i++)
2644 pairwise_prob[i]=Malloc(double,nr_class);
2645 int k=0;
2646 for(i=0;i<nr_class;i++)
2647 for(int j=i+1;j<nr_class;j++)
2648 {
2649 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
2650 pairwise_prob[j][i]=1-pairwise_prob[i][j];
2651 k++;
2652 }
2653 if (nr_class == 2)
2654 {
2655 prob_estimates[0] = pairwise_prob[0][1];
2656 prob_estimates[1] = pairwise_prob[1][0];
2657 }
2658 else
2659 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
2660
2661 int prob_max_idx = 0;
2662 for(i=1;i<nr_class;i++)
2663 if(prob_estimates[i] > prob_estimates[prob_max_idx])
2664 prob_max_idx = i;
2665 for(i=0;i<nr_class;i++)
2666 free(pairwise_prob[i]);
2667 free(dec_values);
2668 free(pairwise_prob);
2669 return model->label[prob_max_idx];
2670 }
2671 else
2672 return svm_predict(model, x);
2673 }
2674
2675 static const char *svm_type_table[] =
2676 {
2677 "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
2678 };
2679
2680 static const char *kernel_type_table[]=
2681 {
2682 "linear","polynomial","rbf","sigmoid","precomputed",NULL
2683 };
2684
svm_save_model(const char * model_file_name,const svm_model * model)2685 int svm_save_model(const char *model_file_name, const svm_model *model)
2686 {
2687 FILE *fp = fopen(model_file_name,"w");
2688 if(fp==NULL) return -1;
2689
2690 char *old_locale = setlocale(LC_ALL, NULL);
2691 if (old_locale) {
2692 old_locale = strdup(old_locale);
2693 }
2694 setlocale(LC_ALL, "C");
2695
2696 const svm_parameter& param = model->param;
2697
2698 fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
2699 fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
2700
2701 if(param.kernel_type == POLY)
2702 fprintf(fp,"degree %d\n", param.degree);
2703
2704 if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
2705 fprintf(fp,"gamma %.17g\n", param.gamma);
2706
2707 if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
2708 fprintf(fp,"coef0 %.17g\n", param.coef0);
2709
2710 int nr_class = model->nr_class;
2711 int l = model->l;
2712 fprintf(fp, "nr_class %d\n", nr_class);
2713 fprintf(fp, "total_sv %d\n",l);
2714
2715 {
2716 fprintf(fp, "rho");
2717 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2718 fprintf(fp," %.17g",model->rho[i]);
2719 fprintf(fp, "\n");
2720 }
2721
2722 if(model->label)
2723 {
2724 fprintf(fp, "label");
2725 for(int i=0;i<nr_class;i++)
2726 fprintf(fp," %d",model->label[i]);
2727 fprintf(fp, "\n");
2728 }
2729
2730 if(model->probA) // regression has probA only
2731 {
2732 fprintf(fp, "probA");
2733 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2734 fprintf(fp," %.17g",model->probA[i]);
2735 fprintf(fp, "\n");
2736 }
2737 if(model->probB)
2738 {
2739 fprintf(fp, "probB");
2740 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2741 fprintf(fp," %.17g",model->probB[i]);
2742 fprintf(fp, "\n");
2743 }
2744
2745 if(model->nSV)
2746 {
2747 fprintf(fp, "nr_sv");
2748 for(int i=0;i<nr_class;i++)
2749 fprintf(fp," %d",model->nSV[i]);
2750 fprintf(fp, "\n");
2751 }
2752
2753 fprintf(fp, "SV\n");
2754 const double * const *sv_coef = model->sv_coef;
2755 const svm_node * const *SV = model->SV;
2756
2757 for(int i=0;i<l;i++)
2758 {
2759 for(int j=0;j<nr_class-1;j++)
2760 fprintf(fp, "%.17g ",sv_coef[j][i]);
2761
2762 const svm_node *p = SV[i];
2763
2764 if(param.kernel_type == PRECOMPUTED)
2765 fprintf(fp,"0:%d ",(int)(p->value));
2766 else
2767 while(p->index != -1)
2768 {
2769 fprintf(fp,"%d:%.8g ",p->index,p->value);
2770 p++;
2771 }
2772 fprintf(fp, "\n");
2773 }
2774
2775 setlocale(LC_ALL, old_locale);
2776 free(old_locale);
2777
2778 if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2779 else return 0;
2780 }
2781
2782 static char *line = NULL;
2783 static int max_line_len;
2784
readline(FILE * input)2785 static char* readline(FILE *input)
2786 {
2787 int len;
2788
2789 if(fgets(line,max_line_len,input) == NULL)
2790 return NULL;
2791
2792 while(strrchr(line,'\n') == NULL)
2793 {
2794 max_line_len *= 2;
2795 line = (char *) realloc(line,max_line_len);
2796 len = (int) strlen(line);
2797 if(fgets(line+len,max_line_len-len,input) == NULL)
2798 break;
2799 }
2800 return line;
2801 }
2802
2803 //
2804 // FSCANF helps to handle fscanf failures.
2805 // Its do-while block avoids the ambiguity when
2806 // if (...)
2807 // FSCANF();
2808 // is used
2809 //
2810 #define FSCANF(_stream, _format, _var) do{ if (fscanf(_stream, _format, _var) != 1) return false; }while(0)
read_model_header(FILE * fp,svm_model * model)2811 bool read_model_header(FILE *fp, svm_model* model)
2812 {
2813 svm_parameter& param = model->param;
2814 // parameters for training only won't be assigned, but arrays are assigned as NULL for safety
2815 param.nr_weight = 0;
2816 param.weight_label = NULL;
2817 param.weight = NULL;
2818
2819 char cmd[81];
2820 while(1)
2821 {
2822 FSCANF(fp,"%80s",cmd);
2823
2824 if(strcmp(cmd,"svm_type")==0)
2825 {
2826 FSCANF(fp,"%80s",cmd);
2827 int i;
2828 for(i=0;svm_type_table[i];i++)
2829 {
2830 if(strcmp(svm_type_table[i],cmd)==0)
2831 {
2832 param.svm_type=i;
2833 break;
2834 }
2835 }
2836 if(svm_type_table[i] == NULL)
2837 {
2838 fprintf(stderr,"unknown svm type.\n");
2839 return false;
2840 }
2841 }
2842 else if(strcmp(cmd,"kernel_type")==0)
2843 {
2844 FSCANF(fp,"%80s",cmd);
2845 int i;
2846 for(i=0;kernel_type_table[i];i++)
2847 {
2848 if(strcmp(kernel_type_table[i],cmd)==0)
2849 {
2850 param.kernel_type=i;
2851 break;
2852 }
2853 }
2854 if(kernel_type_table[i] == NULL)
2855 {
2856 fprintf(stderr,"unknown kernel function.\n");
2857 return false;
2858 }
2859 }
2860 else if(strcmp(cmd,"degree")==0)
2861 FSCANF(fp,"%d",¶m.degree);
2862 else if(strcmp(cmd,"gamma")==0)
2863 FSCANF(fp,"%lf",¶m.gamma);
2864 else if(strcmp(cmd,"coef0")==0)
2865 FSCANF(fp,"%lf",¶m.coef0);
2866 else if(strcmp(cmd,"nr_class")==0)
2867 FSCANF(fp,"%d",&model->nr_class);
2868 else if(strcmp(cmd,"total_sv")==0)
2869 FSCANF(fp,"%d",&model->l);
2870 else if(strcmp(cmd,"rho")==0)
2871 {
2872 int n = model->nr_class * (model->nr_class-1)/2;
2873 model->rho = Malloc(double,n);
2874 for(int i=0;i<n;i++)
2875 FSCANF(fp,"%lf",&model->rho[i]);
2876 }
2877 else if(strcmp(cmd,"label")==0)
2878 {
2879 int n = model->nr_class;
2880 model->label = Malloc(int,n);
2881 for(int i=0;i<n;i++)
2882 FSCANF(fp,"%d",&model->label[i]);
2883 }
2884 else if(strcmp(cmd,"probA")==0)
2885 {
2886 int n = model->nr_class * (model->nr_class-1)/2;
2887 model->probA = Malloc(double,n);
2888 for(int i=0;i<n;i++)
2889 FSCANF(fp,"%lf",&model->probA[i]);
2890 }
2891 else if(strcmp(cmd,"probB")==0)
2892 {
2893 int n = model->nr_class * (model->nr_class-1)/2;
2894 model->probB = Malloc(double,n);
2895 for(int i=0;i<n;i++)
2896 FSCANF(fp,"%lf",&model->probB[i]);
2897 }
2898 else if(strcmp(cmd,"nr_sv")==0)
2899 {
2900 int n = model->nr_class;
2901 model->nSV = Malloc(int,n);
2902 for(int i=0;i<n;i++)
2903 FSCANF(fp,"%d",&model->nSV[i]);
2904 }
2905 else if(strcmp(cmd,"SV")==0)
2906 {
2907 while(1)
2908 {
2909 int c = getc(fp);
2910 if(c==EOF || c=='\n') break;
2911 }
2912 break;
2913 }
2914 else
2915 {
2916 fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2917 return false;
2918 }
2919 }
2920
2921 return true;
2922
2923 }
2924
svm_load_model(const char * model_file_name)2925 svm_model *svm_load_model(const char *model_file_name)
2926 {
2927 FILE *fp = fopen(model_file_name,"rb");
2928 if(fp==NULL) return NULL;
2929
2930 char *old_locale = setlocale(LC_ALL, NULL);
2931 if (old_locale) {
2932 old_locale = strdup(old_locale);
2933 }
2934 setlocale(LC_ALL, "C");
2935
2936 // read parameters
2937
2938 svm_model *model = Malloc(svm_model,1);
2939 model->rho = NULL;
2940 model->probA = NULL;
2941 model->probB = NULL;
2942 model->sv_indices = NULL;
2943 model->label = NULL;
2944 model->nSV = NULL;
2945
2946 // read header
2947 if (!read_model_header(fp, model))
2948 {
2949 fprintf(stderr, "ERROR: fscanf failed to read model\n");
2950 setlocale(LC_ALL, old_locale);
2951 free(old_locale);
2952 free(model->rho);
2953 free(model->label);
2954 free(model->nSV);
2955 free(model);
2956 return NULL;
2957 }
2958
2959 // read sv_coef and SV
2960
2961 int elements = 0;
2962 long pos = ftell(fp);
2963
2964 max_line_len = 1024;
2965 line = Malloc(char,max_line_len);
2966 char *p,*endptr,*idx,*val;
2967
2968 while(readline(fp)!=NULL)
2969 {
2970 p = strtok(line,":");
2971 while(1)
2972 {
2973 p = strtok(NULL,":");
2974 if(p == NULL)
2975 break;
2976 ++elements;
2977 }
2978 }
2979 elements += model->l;
2980
2981 fseek(fp,pos,SEEK_SET);
2982
2983 int m = model->nr_class - 1;
2984 int l = model->l;
2985 model->sv_coef = Malloc(double *,m);
2986 int i;
2987 for(i=0;i<m;i++)
2988 model->sv_coef[i] = Malloc(double,l);
2989 model->SV = Malloc(svm_node*,l);
2990 svm_node *x_space = NULL;
2991 if(l>0) x_space = Malloc(svm_node,elements);
2992
2993 int j=0;
2994 for(i=0;i<l;i++)
2995 {
2996 readline(fp);
2997 model->SV[i] = &x_space[j];
2998
2999 p = strtok(line, " \t");
3000 model->sv_coef[0][i] = strtod(p,&endptr);
3001 for(int k=1;k<m;k++)
3002 {
3003 p = strtok(NULL, " \t");
3004 model->sv_coef[k][i] = strtod(p,&endptr);
3005 }
3006
3007 while(1)
3008 {
3009 idx = strtok(NULL, ":");
3010 val = strtok(NULL, " \t");
3011
3012 if(val == NULL)
3013 break;
3014 x_space[j].index = (int) strtol(idx,&endptr,10);
3015 x_space[j].value = strtod(val,&endptr);
3016
3017 ++j;
3018 }
3019 x_space[j++].index = -1;
3020 }
3021 free(line);
3022
3023 setlocale(LC_ALL, old_locale);
3024 free(old_locale);
3025
3026 if (ferror(fp) != 0 || fclose(fp) != 0)
3027 return NULL;
3028
3029 model->free_sv = 1; // XXX
3030 return model;
3031 }
3032
svm_free_model_content(svm_model * model_ptr)3033 void svm_free_model_content(svm_model* model_ptr)
3034 {
3035 if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
3036 free((void *)(model_ptr->SV[0]));
3037 if(model_ptr->sv_coef)
3038 {
3039 for(int i=0;i<model_ptr->nr_class-1;i++)
3040 free(model_ptr->sv_coef[i]);
3041 }
3042
3043 free(model_ptr->SV);
3044 model_ptr->SV = NULL;
3045
3046 free(model_ptr->sv_coef);
3047 model_ptr->sv_coef = NULL;
3048
3049 free(model_ptr->rho);
3050 model_ptr->rho = NULL;
3051
3052 free(model_ptr->label);
3053 model_ptr->label= NULL;
3054
3055 free(model_ptr->probA);
3056 model_ptr->probA = NULL;
3057
3058 free(model_ptr->probB);
3059 model_ptr->probB= NULL;
3060
3061 free(model_ptr->sv_indices);
3062 model_ptr->sv_indices = NULL;
3063
3064 free(model_ptr->nSV);
3065 model_ptr->nSV = NULL;
3066 }
3067
svm_free_and_destroy_model(svm_model ** model_ptr_ptr)3068 void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
3069 {
3070 if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
3071 {
3072 svm_free_model_content(*model_ptr_ptr);
3073 free(*model_ptr_ptr);
3074 *model_ptr_ptr = NULL;
3075 }
3076 }
3077
svm_destroy_param(svm_parameter * param)3078 void svm_destroy_param(svm_parameter* param)
3079 {
3080 free(param->weight_label);
3081 free(param->weight);
3082 }
3083
svm_check_parameter(const svm_problem * prob,const svm_parameter * param)3084 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
3085 {
3086 // svm_type
3087
3088 int svm_type = param->svm_type;
3089 if(svm_type != C_SVC &&
3090 svm_type != NU_SVC &&
3091 svm_type != ONE_CLASS &&
3092 svm_type != EPSILON_SVR &&
3093 svm_type != NU_SVR)
3094 return "unknown svm type";
3095
3096 // kernel_type, degree
3097
3098 int kernel_type = param->kernel_type;
3099 if(kernel_type != LINEAR &&
3100 kernel_type != POLY &&
3101 kernel_type != RBF &&
3102 kernel_type != SIGMOID &&
3103 kernel_type != PRECOMPUTED)
3104 return "unknown kernel type";
3105
3106 if(param->gamma < 0)
3107 return "gamma < 0";
3108
3109 if(param->degree < 0)
3110 return "degree of polynomial kernel < 0";
3111
3112 // cache_size,eps,C,nu,p,shrinking
3113
3114 if(param->cache_size <= 0)
3115 return "cache_size <= 0";
3116
3117 if(param->eps <= 0)
3118 return "eps <= 0";
3119
3120 if(svm_type == C_SVC ||
3121 svm_type == EPSILON_SVR ||
3122 svm_type == NU_SVR)
3123 if(param->C <= 0)
3124 return "C <= 0";
3125
3126 if(svm_type == NU_SVC ||
3127 svm_type == ONE_CLASS ||
3128 svm_type == NU_SVR)
3129 if(param->nu <= 0 || param->nu > 1)
3130 return "nu <= 0 or nu > 1";
3131
3132 if(svm_type == EPSILON_SVR)
3133 if(param->p < 0)
3134 return "p < 0";
3135
3136 if(param->shrinking != 0 &&
3137 param->shrinking != 1)
3138 return "shrinking != 0 and shrinking != 1";
3139
3140 if(param->probability != 0 &&
3141 param->probability != 1)
3142 return "probability != 0 and probability != 1";
3143
3144 if(param->probability == 1 &&
3145 svm_type == ONE_CLASS)
3146 return "one-class SVM probability output not supported yet";
3147
3148
3149 // check whether nu-svc is feasible
3150
3151 if(svm_type == NU_SVC)
3152 {
3153 int l = prob->l;
3154 int max_nr_class = 16;
3155 int nr_class = 0;
3156 int *label = Malloc(int,max_nr_class);
3157 int *count = Malloc(int,max_nr_class);
3158
3159 int i;
3160 for(i=0;i<l;i++)
3161 {
3162 int this_label = (int)prob->y[i];
3163 int j;
3164 for(j=0;j<nr_class;j++)
3165 if(this_label == label[j])
3166 {
3167 ++count[j];
3168 break;
3169 }
3170 if(j == nr_class)
3171 {
3172 if(nr_class == max_nr_class)
3173 {
3174 max_nr_class *= 2;
3175 label = (int *)realloc(label,max_nr_class*sizeof(int));
3176 count = (int *)realloc(count,max_nr_class*sizeof(int));
3177 }
3178 label[nr_class] = this_label;
3179 count[nr_class] = 1;
3180 ++nr_class;
3181 }
3182 }
3183
3184 for(i=0;i<nr_class;i++)
3185 {
3186 int n1 = count[i];
3187 for(int j=i+1;j<nr_class;j++)
3188 {
3189 int n2 = count[j];
3190 if(param->nu*(n1+n2)/2 > min(n1,n2))
3191 {
3192 free(label);
3193 free(count);
3194 return "specified nu is infeasible";
3195 }
3196 }
3197 }
3198 free(label);
3199 free(count);
3200 }
3201
3202 return NULL;
3203 }
3204
svm_check_probability_model(const svm_model * model)3205 int svm_check_probability_model(const svm_model *model)
3206 {
3207 return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3208 model->probA!=NULL && model->probB!=NULL) ||
3209 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
3210 model->probA!=NULL);
3211 }
3212
svm_set_print_string_function(void (* print_func)(const char *))3213 void svm_set_print_string_function(void (*print_func)(const char *))
3214 {
3215 if(print_func == NULL)
3216 svm_print_string = &print_string_stdout;
3217 else
3218 svm_print_string = print_func;
3219 }
3220