1 #include <math.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <ctype.h>
5 #include <float.h>
6 #include <string.h>
7 #include <stdarg.h>
8 #include <limits.h>
9 #include <locale.h>
10 #include "svm.h"
11 int libsvm_version = LIBSVM_VERSION;
12 typedef float Qfloat;
13 typedef signed char schar;
14 #ifndef min
min(T x,T y)15 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
16 #endif
17 #ifndef max
max(T x,T y)18 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
19 #endif
swap(T & x,T & y)20 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
clone(T * & dst,S * src,int n)21 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
22 {
23 	dst = new T[n];
24 	memcpy((void *)dst,(void *)src,sizeof(T)*n);
25 }
powi(double base,int times)26 static inline double powi(double base, int times)
27 {
28 	double tmp = base, ret = 1.0;
29 
30 	for(int t=times; t>0; t/=2)
31 	{
32 		if(t%2==1) ret*=tmp;
33 		tmp = tmp * tmp;
34 	}
35 	return ret;
36 }
37 #define INF HUGE_VAL
38 #define TAU 1e-12
39 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
40 
print_string_stdout(const char * s)41 static void print_string_stdout(const char *s)
42 {
43 	fputs(s,stdout);
44 	fflush(stdout);
45 }
46 static void (*svm_print_string) (const char *) = &print_string_stdout;
47 #if 1
info(const char * fmt,...)48 static void info(const char *fmt,...)
49 {
50 	char buf[BUFSIZ];
51 	va_list ap;
52 	va_start(ap,fmt);
53 	vsprintf(buf,fmt,ap);
54 	va_end(ap);
55 	(*svm_print_string)(buf);
56 }
57 #else
info(const char * fmt,...)58 static void info(const char *fmt,...) {}
59 #endif
60 
61 //
62 // Kernel Cache
63 //
64 // l is the number of total data items
65 // size is the cache size limit in bytes
66 //
67 class Cache
68 {
69 public:
70 	Cache(int l,long int size);
71 	~Cache();
72 
73 	// request data [0,len)
74 	// return some position p where [p,len) need to be filled
75 	// (p >= len if nothing needs to be filled)
76 	int get_data(const int index, Qfloat **data, int len);
77 	void swap_index(int i, int j);
78 private:
79 	int l;
80 	long int size;
81 	struct head_t
82 	{
83 		head_t *prev, *next;	// a circular list
84 		Qfloat *data;
85 		int len;		// data[0,len) is cached in this entry
86 	};
87 
88 	head_t *head;
89 	head_t lru_head;
90 	void lru_delete(head_t *h);
91 	void lru_insert(head_t *h);
92 };
93 
Cache(int l_,long int size_)94 Cache::Cache(int l_,long int size_):l(l_),size(size_)
95 {
96 	head = (head_t *)calloc(l,sizeof(head_t));	// initialized to 0
97 	size /= sizeof(Qfloat);
98 	size -= l * sizeof(head_t) / sizeof(Qfloat);
99 	size = max(size, 2 * (long int) l);	// cache must be large enough for two columns
100 	lru_head.next = lru_head.prev = &lru_head;
101 }
102 
~Cache()103 Cache::~Cache()
104 {
105 	for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
106 		free(h->data);
107 	free(head);
108 }
109 
lru_delete(head_t * h)110 void Cache::lru_delete(head_t *h)
111 {
112 	// delete from current location
113 	h->prev->next = h->next;
114 	h->next->prev = h->prev;
115 }
116 
lru_insert(head_t * h)117 void Cache::lru_insert(head_t *h)
118 {
119 	// insert to last position
120 	h->next = &lru_head;
121 	h->prev = lru_head.prev;
122 	h->prev->next = h;
123 	h->next->prev = h;
124 }
125 
get_data(const int index,Qfloat ** data,int len)126 int Cache::get_data(const int index, Qfloat **data, int len)
127 {
128 	head_t *h = &head[index];
129 	if(h->len) lru_delete(h);
130 	int more = len - h->len;
131 
132 	if(more > 0)
133 	{
134 		// free old space
135 		while(size < more)
136 		{
137 			head_t *old = lru_head.next;
138 			lru_delete(old);
139 			free(old->data);
140 			size += old->len;
141 			old->data = 0;
142 			old->len = 0;
143 		}
144 
145 		// allocate new space
146 		h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
147 		size -= more;
148 		swap(h->len,len);
149 	}
150 
151 	lru_insert(h);
152 	*data = h->data;
153 	return len;
154 }
155 
swap_index(int i,int j)156 void Cache::swap_index(int i, int j)
157 {
158 	if(i==j) return;
159 
160 	if(head[i].len) lru_delete(&head[i]);
161 	if(head[j].len) lru_delete(&head[j]);
162 	swap(head[i].data,head[j].data);
163 	swap(head[i].len,head[j].len);
164 	if(head[i].len) lru_insert(&head[i]);
165 	if(head[j].len) lru_insert(&head[j]);
166 
167 	if(i>j) swap(i,j);
168 	for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
169 	{
170 		if(h->len > i)
171 		{
172 			if(h->len > j)
173 				swap(h->data[i],h->data[j]);
174 			else
175 			{
176 				// give up
177 				lru_delete(h);
178 				free(h->data);
179 				size += h->len;
180 				h->data = 0;
181 				h->len = 0;
182 			}
183 		}
184 	}
185 }
186 
187 //
188 // Kernel evaluation
189 //
190 // the static method k_function is for doing single kernel evaluation
191 // the constructor of Kernel prepares to calculate the l*l kernel matrix
192 // the member function get_Q is for getting one column from the Q Matrix
193 //
194 class QMatrix {
195 public:
196 	virtual Qfloat *get_Q(int column, int len) const = 0;
197 	virtual double *get_QD() const = 0;
198 	virtual void swap_index(int i, int j) const = 0;
~QMatrix()199 	virtual ~QMatrix() {}
200 };
201 
202 class Kernel: public QMatrix {
203 public:
204 	Kernel(int l, svm_node * const * x, const svm_parameter& param);
205 	virtual ~Kernel();
206 
207 	static double k_function(const svm_node *x, const svm_node *y,
208 				 const svm_parameter& param);
209 	virtual Qfloat *get_Q(int column, int len) const = 0;
210 	virtual double *get_QD() const = 0;
swap_index(int i,int j) const211 	virtual void swap_index(int i, int j) const	// no so const...
212 	{
213 		swap(x[i],x[j]);
214 		if(x_square) swap(x_square[i],x_square[j]);
215 	}
216 protected:
217 
218 	double (Kernel::*kernel_function)(int i, int j) const;
219 
220 private:
221 	const svm_node **x;
222 	double *x_square;
223 
224 	// svm_parameter
225 	const int kernel_type;
226 	const int degree;
227 	const double gamma;
228 	const double coef0;
229 
230 	static double dot(const svm_node *px, const svm_node *py);
kernel_linear(int i,int j) const231 	double kernel_linear(int i, int j) const
232 	{
233 		return dot(x[i],x[j]);
234 	}
kernel_poly(int i,int j) const235 	double kernel_poly(int i, int j) const
236 	{
237 		return powi(gamma*dot(x[i],x[j])+coef0,degree);
238 	}
kernel_rbf(int i,int j) const239 	double kernel_rbf(int i, int j) const
240 	{
241 		return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
242 	}
kernel_sigmoid(int i,int j) const243 	double kernel_sigmoid(int i, int j) const
244 	{
245 		return tanh(gamma*dot(x[i],x[j])+coef0);
246 	}
kernel_precomputed(int i,int j) const247 	double kernel_precomputed(int i, int j) const
248 	{
249 		return x[i][(int)(x[j][0].value)].value;
250 	}
251 };
252 
Kernel(int l,svm_node * const * x_,const svm_parameter & param)253 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
254 :kernel_type(param.kernel_type), degree(param.degree),
255  gamma(param.gamma), coef0(param.coef0)
256 {
257 	switch(kernel_type)
258 	{
259 		case LINEAR:
260 			kernel_function = &Kernel::kernel_linear;
261 			break;
262 		case POLY:
263 			kernel_function = &Kernel::kernel_poly;
264 			break;
265 		case RBF:
266 			kernel_function = &Kernel::kernel_rbf;
267 			break;
268 		case SIGMOID:
269 			kernel_function = &Kernel::kernel_sigmoid;
270 			break;
271 		case PRECOMPUTED:
272 			kernel_function = &Kernel::kernel_precomputed;
273 			break;
274 	}
275 
276 	clone(x,x_,l);
277 
278 	if(kernel_type == RBF)
279 	{
280 		x_square = new double[l];
281 		for(int i=0;i<l;i++)
282 			x_square[i] = dot(x[i],x[i]);
283 	}
284 	else
285 		x_square = 0;
286 }
287 
~Kernel()288 Kernel::~Kernel()
289 {
290 	delete[] x;
291 	delete[] x_square;
292 }
293 
dot(const svm_node * px,const svm_node * py)294 double Kernel::dot(const svm_node *px, const svm_node *py)
295 {
296 	double sum = 0;
297 	while(px->index != -1 && py->index != -1)
298 	{
299 		if(px->index == py->index)
300 		{
301 			sum += px->value * py->value;
302 			++px;
303 			++py;
304 		}
305 		else
306 		{
307 			if(px->index > py->index)
308 				++py;
309 			else
310 				++px;
311 		}
312 	}
313 	return sum;
314 }
315 
k_function(const svm_node * x,const svm_node * y,const svm_parameter & param)316 double Kernel::k_function(const svm_node *x, const svm_node *y,
317 			  const svm_parameter& param)
318 {
319 	switch(param.kernel_type)
320 	{
321 		case LINEAR:
322 			return dot(x,y);
323 		case POLY:
324 			return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
325 		case RBF:
326 		{
327 			double sum = 0;
328 			while(x->index != -1 && y->index !=-1)
329 			{
330 				if(x->index == y->index)
331 				{
332 					double d = x->value - y->value;
333 					sum += d*d;
334 					++x;
335 					++y;
336 				}
337 				else
338 				{
339 					if(x->index > y->index)
340 					{
341 						sum += y->value * y->value;
342 						++y;
343 					}
344 					else
345 					{
346 						sum += x->value * x->value;
347 						++x;
348 					}
349 				}
350 			}
351 
352 			while(x->index != -1)
353 			{
354 				sum += x->value * x->value;
355 				++x;
356 			}
357 
358 			while(y->index != -1)
359 			{
360 				sum += y->value * y->value;
361 				++y;
362 			}
363 
364 			return exp(-param.gamma*sum);
365 		}
366 		case SIGMOID:
367 			return tanh(param.gamma*dot(x,y)+param.coef0);
368 		case PRECOMPUTED:  //x: test (validation), y: SV
369 			return x[(int)(y->value)].value;
370 		default:
371 			return 0;  // Unreachable
372 	}
373 }
374 
375 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
376 // Solves:
377 //
378 //	min 0.5(\alpha^T Q \alpha) + p^T \alpha
379 //
380 //		y^T \alpha = \delta
381 //		y_i = +1 or -1
382 //		0 <= alpha_i <= Cp for y_i = 1
383 //		0 <= alpha_i <= Cn for y_i = -1
384 //
385 // Given:
386 //
387 //	Q, p, y, Cp, Cn, and an initial feasible point \alpha
388 //	l is the size of vectors and matrices
389 //	eps is the stopping tolerance
390 //
391 // solution will be put in \alpha, objective value will be put in obj
392 //
393 class Solver {
394 public:
Solver()395 	Solver() {};
~Solver()396 	virtual ~Solver() {};
397 
398 	struct SolutionInfo {
399 		double obj;
400 		double rho;
401 		double upper_bound_p;
402 		double upper_bound_n;
403 		double r;	// for Solver_NU
404 	};
405 
406 	void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
407 		   double *alpha_, double Cp, double Cn, double eps,
408 		   SolutionInfo* si, int shrinking);
409 protected:
410 	int active_size;
411 	schar *y;
412 	double *G;		// gradient of objective function
413 	enum { LOWER_BOUND, UPPER_BOUND, FREE };
414 	char *alpha_status;	// LOWER_BOUND, UPPER_BOUND, FREE
415 	double *alpha;
416 	const QMatrix *Q;
417 	const double *QD;
418 	double eps;
419 	double Cp,Cn;
420 	double *p;
421 	int *active_set;
422 	double *G_bar;		// gradient, if we treat free variables as 0
423 	int l;
424 	bool unshrink;	// XXX
425 
get_C(int i)426 	double get_C(int i)
427 	{
428 		return (y[i] > 0)? Cp : Cn;
429 	}
update_alpha_status(int i)430 	void update_alpha_status(int i)
431 	{
432 		if(alpha[i] >= get_C(i))
433 			alpha_status[i] = UPPER_BOUND;
434 		else if(alpha[i] <= 0)
435 			alpha_status[i] = LOWER_BOUND;
436 		else alpha_status[i] = FREE;
437 	}
is_upper_bound(int i)438 	bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
is_lower_bound(int i)439 	bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
is_free(int i)440 	bool is_free(int i) { return alpha_status[i] == FREE; }
441 	void swap_index(int i, int j);
442 	void reconstruct_gradient();
443 	virtual int select_working_set(int &i, int &j);
444 	virtual double calculate_rho();
445 	virtual void do_shrinking();
446 private:
447 	bool be_shrunk(int i, double Gmax1, double Gmax2);
448 };
449 
swap_index(int i,int j)450 void Solver::swap_index(int i, int j)
451 {
452 	Q->swap_index(i,j);
453 	swap(y[i],y[j]);
454 	swap(G[i],G[j]);
455 	swap(alpha_status[i],alpha_status[j]);
456 	swap(alpha[i],alpha[j]);
457 	swap(p[i],p[j]);
458 	swap(active_set[i],active_set[j]);
459 	swap(G_bar[i],G_bar[j]);
460 }
461 
reconstruct_gradient()462 void Solver::reconstruct_gradient()
463 {
464 	// reconstruct inactive elements of G from G_bar and free variables
465 
466 	if(active_size == l) return;
467 
468 	int i,j;
469 	int nr_free = 0;
470 
471 	for(j=active_size;j<l;j++)
472 		G[j] = G_bar[j] + p[j];
473 
474 	for(j=0;j<active_size;j++)
475 		if(is_free(j))
476 			nr_free++;
477 
478 	if(2*nr_free < active_size)
479 		info("\nWARNING: using -h 0 may be faster\n");
480 
481 	if (nr_free*l > 2*active_size*(l-active_size))
482 	{
483 		for(i=active_size;i<l;i++)
484 		{
485 			const Qfloat *Q_i = Q->get_Q(i,active_size);
486 			for(j=0;j<active_size;j++)
487 				if(is_free(j))
488 					G[i] += alpha[j] * Q_i[j];
489 		}
490 	}
491 	else
492 	{
493 		for(i=0;i<active_size;i++)
494 			if(is_free(i))
495 			{
496 				const Qfloat *Q_i = Q->get_Q(i,l);
497 				double alpha_i = alpha[i];
498 				for(j=active_size;j<l;j++)
499 					G[j] += alpha_i * Q_i[j];
500 			}
501 	}
502 }
503 
Solve(int l,const QMatrix & Q,const double * p_,const schar * y_,double * alpha_,double Cp,double Cn,double eps,SolutionInfo * si,int shrinking)504 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
505 		   double *alpha_, double Cp, double Cn, double eps,
506 		   SolutionInfo* si, int shrinking)
507 {
508 	this->l = l;
509 	this->Q = &Q;
510 	QD=Q.get_QD();
511 	clone(p, p_,l);
512 	clone(y, y_,l);
513 	clone(alpha,alpha_,l);
514 	this->Cp = Cp;
515 	this->Cn = Cn;
516 	this->eps = eps;
517 	unshrink = false;
518 
519 	// initialize alpha_status
520 	{
521 		alpha_status = new char[l];
522 		for(int i=0;i<l;i++)
523 			update_alpha_status(i);
524 	}
525 
526 	// initialize active set (for shrinking)
527 	{
528 		active_set = new int[l];
529 		for(int i=0;i<l;i++)
530 			active_set[i] = i;
531 		active_size = l;
532 	}
533 
534 	// initialize gradient
535 	{
536 		G = new double[l];
537 		G_bar = new double[l];
538 		int i;
539 		for(i=0;i<l;i++)
540 		{
541 			G[i] = p[i];
542 			G_bar[i] = 0;
543 		}
544 		for(i=0;i<l;i++)
545 			if(!is_lower_bound(i))
546 			{
547 				const Qfloat *Q_i = Q.get_Q(i,l);
548 				double alpha_i = alpha[i];
549 				int j;
550 				for(j=0;j<l;j++)
551 					G[j] += alpha_i*Q_i[j];
552 				if(is_upper_bound(i))
553 					for(j=0;j<l;j++)
554 						G_bar[j] += get_C(i) * Q_i[j];
555 			}
556 	}
557 
558 	// optimization step
559 
560 	int iter = 0;
561 	int max_iter = max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
562 	int counter = min(l,1000)+1;
563 
564 	while(iter < max_iter)
565 	{
566 		// show progress and do shrinking
567 
568 		if(--counter == 0)
569 		{
570 			counter = min(l,1000);
571 			if(shrinking) do_shrinking();
572 			info(".");
573 		}
574 
575 		int i,j;
576 		if(select_working_set(i,j)!=0)
577 		{
578 			// reconstruct the whole gradient
579 			reconstruct_gradient();
580 			// reset active set size and check
581 			active_size = l;
582 			info("*");
583 			if(select_working_set(i,j)!=0)
584 				break;
585 			else
586 				counter = 1;	// do shrinking next iteration
587 		}
588 
589 		++iter;
590 
591 		// update alpha[i] and alpha[j], handle bounds carefully
592 
593 		const Qfloat *Q_i = Q.get_Q(i,active_size);
594 		const Qfloat *Q_j = Q.get_Q(j,active_size);
595 
596 		double C_i = get_C(i);
597 		double C_j = get_C(j);
598 
599 		double old_alpha_i = alpha[i];
600 		double old_alpha_j = alpha[j];
601 
602 		if(y[i]!=y[j])
603 		{
604 			double quad_coef = QD[i]+QD[j]+2*Q_i[j];
605 			if (quad_coef <= 0)
606 				quad_coef = TAU;
607 			double delta = (-G[i]-G[j])/quad_coef;
608 			double diff = alpha[i] - alpha[j];
609 			alpha[i] += delta;
610 			alpha[j] += delta;
611 
612 			if(diff > 0)
613 			{
614 				if(alpha[j] < 0)
615 				{
616 					alpha[j] = 0;
617 					alpha[i] = diff;
618 				}
619 			}
620 			else
621 			{
622 				if(alpha[i] < 0)
623 				{
624 					alpha[i] = 0;
625 					alpha[j] = -diff;
626 				}
627 			}
628 			if(diff > C_i - C_j)
629 			{
630 				if(alpha[i] > C_i)
631 				{
632 					alpha[i] = C_i;
633 					alpha[j] = C_i - diff;
634 				}
635 			}
636 			else
637 			{
638 				if(alpha[j] > C_j)
639 				{
640 					alpha[j] = C_j;
641 					alpha[i] = C_j + diff;
642 				}
643 			}
644 		}
645 		else
646 		{
647 			double quad_coef = QD[i]+QD[j]-2*Q_i[j];
648 			if (quad_coef <= 0)
649 				quad_coef = TAU;
650 			double delta = (G[i]-G[j])/quad_coef;
651 			double sum = alpha[i] + alpha[j];
652 			alpha[i] -= delta;
653 			alpha[j] += delta;
654 
655 			if(sum > C_i)
656 			{
657 				if(alpha[i] > C_i)
658 				{
659 					alpha[i] = C_i;
660 					alpha[j] = sum - C_i;
661 				}
662 			}
663 			else
664 			{
665 				if(alpha[j] < 0)
666 				{
667 					alpha[j] = 0;
668 					alpha[i] = sum;
669 				}
670 			}
671 			if(sum > C_j)
672 			{
673 				if(alpha[j] > C_j)
674 				{
675 					alpha[j] = C_j;
676 					alpha[i] = sum - C_j;
677 				}
678 			}
679 			else
680 			{
681 				if(alpha[i] < 0)
682 				{
683 					alpha[i] = 0;
684 					alpha[j] = sum;
685 				}
686 			}
687 		}
688 
689 		// update G
690 
691 		double delta_alpha_i = alpha[i] - old_alpha_i;
692 		double delta_alpha_j = alpha[j] - old_alpha_j;
693 
694 		for(int k=0;k<active_size;k++)
695 		{
696 			G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
697 		}
698 
699 		// update alpha_status and G_bar
700 
701 		{
702 			bool ui = is_upper_bound(i);
703 			bool uj = is_upper_bound(j);
704 			update_alpha_status(i);
705 			update_alpha_status(j);
706 			int k;
707 			if(ui != is_upper_bound(i))
708 			{
709 				Q_i = Q.get_Q(i,l);
710 				if(ui)
711 					for(k=0;k<l;k++)
712 						G_bar[k] -= C_i * Q_i[k];
713 				else
714 					for(k=0;k<l;k++)
715 						G_bar[k] += C_i * Q_i[k];
716 			}
717 
718 			if(uj != is_upper_bound(j))
719 			{
720 				Q_j = Q.get_Q(j,l);
721 				if(uj)
722 					for(k=0;k<l;k++)
723 						G_bar[k] -= C_j * Q_j[k];
724 				else
725 					for(k=0;k<l;k++)
726 						G_bar[k] += C_j * Q_j[k];
727 			}
728 		}
729 	}
730 
731 	if(iter >= max_iter)
732 	{
733 		if(active_size < l)
734 		{
735 			// reconstruct the whole gradient to calculate objective value
736 			reconstruct_gradient();
737 			active_size = l;
738 			info("*");
739 		}
740 		fprintf(stderr,"\nWARNING: reaching max number of iterations\n");
741 	}
742 
743 	// calculate rho
744 
745 	si->rho = calculate_rho();
746 
747 	// calculate objective value
748 	{
749 		double v = 0;
750 		int i;
751 		for(i=0;i<l;i++)
752 			v += alpha[i] * (G[i] + p[i]);
753 
754 		si->obj = v/2;
755 	}
756 
757 	// put back the solution
758 	{
759 		for(int i=0;i<l;i++)
760 			alpha_[active_set[i]] = alpha[i];
761 	}
762 
763 	// juggle everything back
764 	/*{
765 		for(int i=0;i<l;i++)
766 			while(active_set[i] != i)
767 				swap_index(i,active_set[i]);
768 				// or Q.swap_index(i,active_set[i]);
769 	}*/
770 
771 	si->upper_bound_p = Cp;
772 	si->upper_bound_n = Cn;
773 
774 	info("\noptimization finished, #iter = %d\n",iter);
775 
776 	delete[] p;
777 	delete[] y;
778 	delete[] alpha;
779 	delete[] alpha_status;
780 	delete[] active_set;
781 	delete[] G;
782 	delete[] G_bar;
783 }
784 
785 // return 1 if already optimal, return 0 otherwise
select_working_set(int & out_i,int & out_j)786 int Solver::select_working_set(int &out_i, int &out_j)
787 {
788 	// return i,j such that
789 	// i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
790 	// j: minimizes the decrease of obj value
791 	//    (if quadratic coefficeint <= 0, replace it with tau)
792 	//    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
793 
794 	double Gmax = -INF;
795 	double Gmax2 = -INF;
796 	int Gmax_idx = -1;
797 	int Gmin_idx = -1;
798 	double obj_diff_min = INF;
799 
800 	for(int t=0;t<active_size;t++)
801 		if(y[t]==+1)
802 		{
803 			if(!is_upper_bound(t))
804 				if(-G[t] >= Gmax)
805 				{
806 					Gmax = -G[t];
807 					Gmax_idx = t;
808 				}
809 		}
810 		else
811 		{
812 			if(!is_lower_bound(t))
813 				if(G[t] >= Gmax)
814 				{
815 					Gmax = G[t];
816 					Gmax_idx = t;
817 				}
818 		}
819 
820 	int i = Gmax_idx;
821 	const Qfloat *Q_i = NULL;
822 	if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
823 		Q_i = Q->get_Q(i,active_size);
824 
825 	for(int j=0;j<active_size;j++)
826 	{
827 		if(y[j]==+1)
828 		{
829 			if (!is_lower_bound(j))
830 			{
831 				double grad_diff=Gmax+G[j];
832 				if (G[j] >= Gmax2)
833 					Gmax2 = G[j];
834 				if (grad_diff > 0)
835 				{
836 					double obj_diff;
837 					double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
838 					if (quad_coef > 0)
839 						obj_diff = -(grad_diff*grad_diff)/quad_coef;
840 					else
841 						obj_diff = -(grad_diff*grad_diff)/TAU;
842 
843 					if (obj_diff <= obj_diff_min)
844 					{
845 						Gmin_idx=j;
846 						obj_diff_min = obj_diff;
847 					}
848 				}
849 			}
850 		}
851 		else
852 		{
853 			if (!is_upper_bound(j))
854 			{
855 				double grad_diff= Gmax-G[j];
856 				if (-G[j] >= Gmax2)
857 					Gmax2 = -G[j];
858 				if (grad_diff > 0)
859 				{
860 					double obj_diff;
861 					double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
862 					if (quad_coef > 0)
863 						obj_diff = -(grad_diff*grad_diff)/quad_coef;
864 					else
865 						obj_diff = -(grad_diff*grad_diff)/TAU;
866 
867 					if (obj_diff <= obj_diff_min)
868 					{
869 						Gmin_idx=j;
870 						obj_diff_min = obj_diff;
871 					}
872 				}
873 			}
874 		}
875 	}
876 
877 	if(Gmax+Gmax2 < eps || Gmin_idx == -1)
878 		return 1;
879 
880 	out_i = Gmax_idx;
881 	out_j = Gmin_idx;
882 	return 0;
883 }
884 
be_shrunk(int i,double Gmax1,double Gmax2)885 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
886 {
887 	if(is_upper_bound(i))
888 	{
889 		if(y[i]==+1)
890 			return(-G[i] > Gmax1);
891 		else
892 			return(-G[i] > Gmax2);
893 	}
894 	else if(is_lower_bound(i))
895 	{
896 		if(y[i]==+1)
897 			return(G[i] > Gmax2);
898 		else
899 			return(G[i] > Gmax1);
900 	}
901 	else
902 		return(false);
903 }
904 
do_shrinking()905 void Solver::do_shrinking()
906 {
907 	int i;
908 	double Gmax1 = -INF;		// max { -y_i * grad(f)_i | i in I_up(\alpha) }
909 	double Gmax2 = -INF;		// max { y_i * grad(f)_i | i in I_low(\alpha) }
910 
911 	// find maximal violating pair first
912 	for(i=0;i<active_size;i++)
913 	{
914 		if(y[i]==+1)
915 		{
916 			if(!is_upper_bound(i))
917 			{
918 				if(-G[i] >= Gmax1)
919 					Gmax1 = -G[i];
920 			}
921 			if(!is_lower_bound(i))
922 			{
923 				if(G[i] >= Gmax2)
924 					Gmax2 = G[i];
925 			}
926 		}
927 		else
928 		{
929 			if(!is_upper_bound(i))
930 			{
931 				if(-G[i] >= Gmax2)
932 					Gmax2 = -G[i];
933 			}
934 			if(!is_lower_bound(i))
935 			{
936 				if(G[i] >= Gmax1)
937 					Gmax1 = G[i];
938 			}
939 		}
940 	}
941 
942 	if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
943 	{
944 		unshrink = true;
945 		reconstruct_gradient();
946 		active_size = l;
947 		info("*");
948 	}
949 
950 	for(i=0;i<active_size;i++)
951 		if (be_shrunk(i, Gmax1, Gmax2))
952 		{
953 			active_size--;
954 			while (active_size > i)
955 			{
956 				if (!be_shrunk(active_size, Gmax1, Gmax2))
957 				{
958 					swap_index(i,active_size);
959 					break;
960 				}
961 				active_size--;
962 			}
963 		}
964 }
965 
calculate_rho()966 double Solver::calculate_rho()
967 {
968 	double r;
969 	int nr_free = 0;
970 	double ub = INF, lb = -INF, sum_free = 0;
971 	for(int i=0;i<active_size;i++)
972 	{
973 		double yG = y[i]*G[i];
974 
975 		if(is_upper_bound(i))
976 		{
977 			if(y[i]==-1)
978 				ub = min(ub,yG);
979 			else
980 				lb = max(lb,yG);
981 		}
982 		else if(is_lower_bound(i))
983 		{
984 			if(y[i]==+1)
985 				ub = min(ub,yG);
986 			else
987 				lb = max(lb,yG);
988 		}
989 		else
990 		{
991 			++nr_free;
992 			sum_free += yG;
993 		}
994 	}
995 
996 	if(nr_free>0)
997 		r = sum_free/nr_free;
998 	else
999 		r = (ub+lb)/2;
1000 
1001 	return r;
1002 }
1003 
1004 //
1005 // Solver for nu-svm classification and regression
1006 //
1007 // additional constraint: e^T \alpha = constant
1008 //
1009 class Solver_NU: public Solver
1010 {
1011 public:
Solver_NU()1012 	Solver_NU() {}
Solve(int l,const QMatrix & Q,const double * p,const schar * y,double * alpha,double Cp,double Cn,double eps,SolutionInfo * si,int shrinking)1013 	void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
1014 		   double *alpha, double Cp, double Cn, double eps,
1015 		   SolutionInfo* si, int shrinking)
1016 	{
1017 		this->si = si;
1018 		Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
1019 	}
1020 private:
1021 	SolutionInfo *si;
1022 	int select_working_set(int &i, int &j);
1023 	double calculate_rho();
1024 	bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
1025 	void do_shrinking();
1026 };
1027 
1028 // return 1 if already optimal, return 0 otherwise
select_working_set(int & out_i,int & out_j)1029 int Solver_NU::select_working_set(int &out_i, int &out_j)
1030 {
1031 	// return i,j such that y_i = y_j and
1032 	// i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
1033 	// j: minimizes the decrease of obj value
1034 	//    (if quadratic coefficeint <= 0, replace it with tau)
1035 	//    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
1036 
1037 	double Gmaxp = -INF;
1038 	double Gmaxp2 = -INF;
1039 	int Gmaxp_idx = -1;
1040 
1041 	double Gmaxn = -INF;
1042 	double Gmaxn2 = -INF;
1043 	int Gmaxn_idx = -1;
1044 
1045 	int Gmin_idx = -1;
1046 	double obj_diff_min = INF;
1047 
1048 	for(int t=0;t<active_size;t++)
1049 		if(y[t]==+1)
1050 		{
1051 			if(!is_upper_bound(t))
1052 				if(-G[t] >= Gmaxp)
1053 				{
1054 					Gmaxp = -G[t];
1055 					Gmaxp_idx = t;
1056 				}
1057 		}
1058 		else
1059 		{
1060 			if(!is_lower_bound(t))
1061 				if(G[t] >= Gmaxn)
1062 				{
1063 					Gmaxn = G[t];
1064 					Gmaxn_idx = t;
1065 				}
1066 		}
1067 
1068 	int ip = Gmaxp_idx;
1069 	int in = Gmaxn_idx;
1070 	const Qfloat *Q_ip = NULL;
1071 	const Qfloat *Q_in = NULL;
1072 	if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1073 		Q_ip = Q->get_Q(ip,active_size);
1074 	if(in != -1)
1075 		Q_in = Q->get_Q(in,active_size);
1076 
1077 	for(int j=0;j<active_size;j++)
1078 	{
1079 		if(y[j]==+1)
1080 		{
1081 			if (!is_lower_bound(j))
1082 			{
1083 				double grad_diff=Gmaxp+G[j];
1084 				if (G[j] >= Gmaxp2)
1085 					Gmaxp2 = G[j];
1086 				if (grad_diff > 0)
1087 				{
1088 					double obj_diff;
1089 					double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
1090 					if (quad_coef > 0)
1091 						obj_diff = -(grad_diff*grad_diff)/quad_coef;
1092 					else
1093 						obj_diff = -(grad_diff*grad_diff)/TAU;
1094 
1095 					if (obj_diff <= obj_diff_min)
1096 					{
1097 						Gmin_idx=j;
1098 						obj_diff_min = obj_diff;
1099 					}
1100 				}
1101 			}
1102 		}
1103 		else
1104 		{
1105 			if (!is_upper_bound(j))
1106 			{
1107 				double grad_diff=Gmaxn-G[j];
1108 				if (-G[j] >= Gmaxn2)
1109 					Gmaxn2 = -G[j];
1110 				if (grad_diff > 0)
1111 				{
1112 					double obj_diff;
1113 					double quad_coef = QD[in]+QD[j]-2*Q_in[j];
1114 					if (quad_coef > 0)
1115 						obj_diff = -(grad_diff*grad_diff)/quad_coef;
1116 					else
1117 						obj_diff = -(grad_diff*grad_diff)/TAU;
1118 
1119 					if (obj_diff <= obj_diff_min)
1120 					{
1121 						Gmin_idx=j;
1122 						obj_diff_min = obj_diff;
1123 					}
1124 				}
1125 			}
1126 		}
1127 	}
1128 
1129 	if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps || Gmin_idx == -1)
1130 		return 1;
1131 
1132 	if (y[Gmin_idx] == +1)
1133 		out_i = Gmaxp_idx;
1134 	else
1135 		out_i = Gmaxn_idx;
1136 	out_j = Gmin_idx;
1137 
1138 	return 0;
1139 }
1140 
be_shrunk(int i,double Gmax1,double Gmax2,double Gmax3,double Gmax4)1141 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
1142 {
1143 	if(is_upper_bound(i))
1144 	{
1145 		if(y[i]==+1)
1146 			return(-G[i] > Gmax1);
1147 		else
1148 			return(-G[i] > Gmax4);
1149 	}
1150 	else if(is_lower_bound(i))
1151 	{
1152 		if(y[i]==+1)
1153 			return(G[i] > Gmax2);
1154 		else
1155 			return(G[i] > Gmax3);
1156 	}
1157 	else
1158 		return(false);
1159 }
1160 
do_shrinking()1161 void Solver_NU::do_shrinking()
1162 {
1163 	double Gmax1 = -INF;	// max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
1164 	double Gmax2 = -INF;	// max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
1165 	double Gmax3 = -INF;	// max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
1166 	double Gmax4 = -INF;	// max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
1167 
1168 	// find maximal violating pair first
1169 	int i;
1170 	for(i=0;i<active_size;i++)
1171 	{
1172 		if(!is_upper_bound(i))
1173 		{
1174 			if(y[i]==+1)
1175 			{
1176 				if(-G[i] > Gmax1) Gmax1 = -G[i];
1177 			}
1178 			else	if(-G[i] > Gmax4) Gmax4 = -G[i];
1179 		}
1180 		if(!is_lower_bound(i))
1181 		{
1182 			if(y[i]==+1)
1183 			{
1184 				if(G[i] > Gmax2) Gmax2 = G[i];
1185 			}
1186 			else	if(G[i] > Gmax3) Gmax3 = G[i];
1187 		}
1188 	}
1189 
1190 	if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
1191 	{
1192 		unshrink = true;
1193 		reconstruct_gradient();
1194 		active_size = l;
1195 	}
1196 
1197 	for(i=0;i<active_size;i++)
1198 		if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1199 		{
1200 			active_size--;
1201 			while (active_size > i)
1202 			{
1203 				if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1204 				{
1205 					swap_index(i,active_size);
1206 					break;
1207 				}
1208 				active_size--;
1209 			}
1210 		}
1211 }
1212 
calculate_rho()1213 double Solver_NU::calculate_rho()
1214 {
1215 	int nr_free1 = 0,nr_free2 = 0;
1216 	double ub1 = INF, ub2 = INF;
1217 	double lb1 = -INF, lb2 = -INF;
1218 	double sum_free1 = 0, sum_free2 = 0;
1219 
1220 	for(int i=0;i<active_size;i++)
1221 	{
1222 		if(y[i]==+1)
1223 		{
1224 			if(is_upper_bound(i))
1225 				lb1 = max(lb1,G[i]);
1226 			else if(is_lower_bound(i))
1227 				ub1 = min(ub1,G[i]);
1228 			else
1229 			{
1230 				++nr_free1;
1231 				sum_free1 += G[i];
1232 			}
1233 		}
1234 		else
1235 		{
1236 			if(is_upper_bound(i))
1237 				lb2 = max(lb2,G[i]);
1238 			else if(is_lower_bound(i))
1239 				ub2 = min(ub2,G[i]);
1240 			else
1241 			{
1242 				++nr_free2;
1243 				sum_free2 += G[i];
1244 			}
1245 		}
1246 	}
1247 
1248 	double r1,r2;
1249 	if(nr_free1 > 0)
1250 		r1 = sum_free1/nr_free1;
1251 	else
1252 		r1 = (ub1+lb1)/2;
1253 
1254 	if(nr_free2 > 0)
1255 		r2 = sum_free2/nr_free2;
1256 	else
1257 		r2 = (ub2+lb2)/2;
1258 
1259 	si->r = (r1+r2)/2;
1260 	return (r1-r2)/2;
1261 }
1262 
1263 //
1264 // Q matrices for various formulations
1265 //
1266 class SVC_Q: public Kernel
1267 {
1268 public:
SVC_Q(const svm_problem & prob,const svm_parameter & param,const schar * y_)1269 	SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
1270 	:Kernel(prob.l, prob.x, param)
1271 	{
1272 		clone(y,y_,prob.l);
1273 		cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1274 		QD = new double[prob.l];
1275 		for(int i=0;i<prob.l;i++)
1276 			QD[i] = (this->*kernel_function)(i,i);
1277 	}
1278 
get_Q(int i,int len) const1279 	Qfloat *get_Q(int i, int len) const
1280 	{
1281 		Qfloat *data;
1282 		int start, j;
1283 		if((start = cache->get_data(i,&data,len)) < len)
1284 		{
1285 			for(j=start;j<len;j++)
1286 				data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
1287 		}
1288 		return data;
1289 	}
1290 
get_QD() const1291 	double *get_QD() const
1292 	{
1293 		return QD;
1294 	}
1295 
swap_index(int i,int j) const1296 	void swap_index(int i, int j) const
1297 	{
1298 		cache->swap_index(i,j);
1299 		Kernel::swap_index(i,j);
1300 		swap(y[i],y[j]);
1301 		swap(QD[i],QD[j]);
1302 	}
1303 
~SVC_Q()1304 	~SVC_Q()
1305 	{
1306 		delete[] y;
1307 		delete cache;
1308 		delete[] QD;
1309 	}
1310 private:
1311 	schar *y;
1312 	Cache *cache;
1313 	double *QD;
1314 };
1315 
1316 class ONE_CLASS_Q: public Kernel
1317 {
1318 public:
ONE_CLASS_Q(const svm_problem & prob,const svm_parameter & param)1319 	ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
1320 	:Kernel(prob.l, prob.x, param)
1321 	{
1322 		cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1323 		QD = new double[prob.l];
1324 		for(int i=0;i<prob.l;i++)
1325 			QD[i] = (this->*kernel_function)(i,i);
1326 	}
1327 
get_Q(int i,int len) const1328 	Qfloat *get_Q(int i, int len) const
1329 	{
1330 		Qfloat *data;
1331 		int start, j;
1332 		if((start = cache->get_data(i,&data,len)) < len)
1333 		{
1334 			for(j=start;j<len;j++)
1335 				data[j] = (Qfloat)(this->*kernel_function)(i,j);
1336 		}
1337 		return data;
1338 	}
1339 
get_QD() const1340 	double *get_QD() const
1341 	{
1342 		return QD;
1343 	}
1344 
swap_index(int i,int j) const1345 	void swap_index(int i, int j) const
1346 	{
1347 		cache->swap_index(i,j);
1348 		Kernel::swap_index(i,j);
1349 		swap(QD[i],QD[j]);
1350 	}
1351 
~ONE_CLASS_Q()1352 	~ONE_CLASS_Q()
1353 	{
1354 		delete cache;
1355 		delete[] QD;
1356 	}
1357 private:
1358 	Cache *cache;
1359 	double *QD;
1360 };
1361 
1362 class SVR_Q: public Kernel
1363 {
1364 public:
SVR_Q(const svm_problem & prob,const svm_parameter & param)1365 	SVR_Q(const svm_problem& prob, const svm_parameter& param)
1366 	:Kernel(prob.l, prob.x, param)
1367 	{
1368 		l = prob.l;
1369 		cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
1370 		QD = new double[2*l];
1371 		sign = new schar[2*l];
1372 		index = new int[2*l];
1373 		for(int k=0;k<l;k++)
1374 		{
1375 			sign[k] = 1;
1376 			sign[k+l] = -1;
1377 			index[k] = k;
1378 			index[k+l] = k;
1379 			QD[k] = (this->*kernel_function)(k,k);
1380 			QD[k+l] = QD[k];
1381 		}
1382 		buffer[0] = new Qfloat[2*l];
1383 		buffer[1] = new Qfloat[2*l];
1384 		next_buffer = 0;
1385 	}
1386 
swap_index(int i,int j) const1387 	void swap_index(int i, int j) const
1388 	{
1389 		swap(sign[i],sign[j]);
1390 		swap(index[i],index[j]);
1391 		swap(QD[i],QD[j]);
1392 	}
1393 
get_Q(int i,int len) const1394 	Qfloat *get_Q(int i, int len) const
1395 	{
1396 		Qfloat *data;
1397 		int j, real_i = index[i];
1398 		if(cache->get_data(real_i,&data,l) < l)
1399 		{
1400 			for(j=0;j<l;j++)
1401 				data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
1402 		}
1403 
1404 		// reorder and copy
1405 		Qfloat *buf = buffer[next_buffer];
1406 		next_buffer = 1 - next_buffer;
1407 		schar si = sign[i];
1408 		for(j=0;j<len;j++)
1409 			buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
1410 		return buf;
1411 	}
1412 
get_QD() const1413 	double *get_QD() const
1414 	{
1415 		return QD;
1416 	}
1417 
~SVR_Q()1418 	~SVR_Q()
1419 	{
1420 		delete cache;
1421 		delete[] sign;
1422 		delete[] index;
1423 		delete[] buffer[0];
1424 		delete[] buffer[1];
1425 		delete[] QD;
1426 	}
1427 private:
1428 	int l;
1429 	Cache *cache;
1430 	schar *sign;
1431 	int *index;
1432 	mutable int next_buffer;
1433 	Qfloat *buffer[2];
1434 	double *QD;
1435 };
1436 
1437 //
1438 // construct and solve various formulations
1439 //
solve_c_svc(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si,double Cp,double Cn)1440 static void solve_c_svc(
1441 	const svm_problem *prob, const svm_parameter* param,
1442 	double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
1443 {
1444 	int l = prob->l;
1445 	double *minus_ones = new double[l];
1446 	schar *y = new schar[l];
1447 
1448 	int i;
1449 
1450 	for(i=0;i<l;i++)
1451 	{
1452 		alpha[i] = 0;
1453 		minus_ones[i] = -1;
1454 		if(prob->y[i] > 0) y[i] = +1; else y[i] = -1;
1455 	}
1456 
1457 	Solver s;
1458 	s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
1459 		alpha, Cp, Cn, param->eps, si, param->shrinking);
1460 
1461 	double sum_alpha=0;
1462 	for(i=0;i<l;i++)
1463 		sum_alpha += alpha[i];
1464 
1465 	if (Cp==Cn)
1466 		info("nu = %f\n", sum_alpha/(Cp*prob->l));
1467 
1468 	for(i=0;i<l;i++)
1469 		alpha[i] *= y[i];
1470 
1471 	delete[] minus_ones;
1472 	delete[] y;
1473 }
1474 
solve_nu_svc(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si)1475 static void solve_nu_svc(
1476 	const svm_problem *prob, const svm_parameter *param,
1477 	double *alpha, Solver::SolutionInfo* si)
1478 {
1479 	int i;
1480 	int l = prob->l;
1481 	double nu = param->nu;
1482 
1483 	schar *y = new schar[l];
1484 
1485 	for(i=0;i<l;i++)
1486 		if(prob->y[i]>0)
1487 			y[i] = +1;
1488 		else
1489 			y[i] = -1;
1490 
1491 	double sum_pos = nu*l/2;
1492 	double sum_neg = nu*l/2;
1493 
1494 	for(i=0;i<l;i++)
1495 		if(y[i] == +1)
1496 		{
1497 			alpha[i] = min(1.0,sum_pos);
1498 			sum_pos -= alpha[i];
1499 		}
1500 		else
1501 		{
1502 			alpha[i] = min(1.0,sum_neg);
1503 			sum_neg -= alpha[i];
1504 		}
1505 
1506 	double *zeros = new double[l];
1507 
1508 	for(i=0;i<l;i++)
1509 		zeros[i] = 0;
1510 
1511 	Solver_NU s;
1512 	s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
1513 		alpha, 1.0, 1.0, param->eps, si,  param->shrinking);
1514 	double r = si->r;
1515 
1516 	info("C = %f\n",1/r);
1517 
1518 	for(i=0;i<l;i++)
1519 		alpha[i] *= y[i]/r;
1520 
1521 	si->rho /= r;
1522 	si->obj /= (r*r);
1523 	si->upper_bound_p = 1/r;
1524 	si->upper_bound_n = 1/r;
1525 
1526 	delete[] y;
1527 	delete[] zeros;
1528 }
1529 
solve_one_class(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si)1530 static void solve_one_class(
1531 	const svm_problem *prob, const svm_parameter *param,
1532 	double *alpha, Solver::SolutionInfo* si)
1533 {
1534 	int l = prob->l;
1535 	double *zeros = new double[l];
1536 	schar *ones = new schar[l];
1537 	int i;
1538 
1539 	int n = (int)(param->nu*prob->l);	// # of alpha's at upper bound
1540 
1541 	for(i=0;i<n;i++)
1542 		alpha[i] = 1;
1543 	if(n<prob->l)
1544 		alpha[n] = param->nu * prob->l - n;
1545 	for(i=n+1;i<l;i++)
1546 		alpha[i] = 0;
1547 
1548 	for(i=0;i<l;i++)
1549 	{
1550 		zeros[i] = 0;
1551 		ones[i] = 1;
1552 	}
1553 
1554 	Solver s;
1555 	s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
1556 		alpha, 1.0, 1.0, param->eps, si, param->shrinking);
1557 
1558 	delete[] zeros;
1559 	delete[] ones;
1560 }
1561 
solve_epsilon_svr(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si)1562 static void solve_epsilon_svr(
1563 	const svm_problem *prob, const svm_parameter *param,
1564 	double *alpha, Solver::SolutionInfo* si)
1565 {
1566 	int l = prob->l;
1567 	double *alpha2 = new double[2*l];
1568 	double *linear_term = new double[2*l];
1569 	schar *y = new schar[2*l];
1570 	int i;
1571 
1572 	for(i=0;i<l;i++)
1573 	{
1574 		alpha2[i] = 0;
1575 		linear_term[i] = param->p - prob->y[i];
1576 		y[i] = 1;
1577 
1578 		alpha2[i+l] = 0;
1579 		linear_term[i+l] = param->p + prob->y[i];
1580 		y[i+l] = -1;
1581 	}
1582 
1583 	Solver s;
1584 	s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1585 		alpha2, param->C, param->C, param->eps, si, param->shrinking);
1586 
1587 	double sum_alpha = 0;
1588 	for(i=0;i<l;i++)
1589 	{
1590 		alpha[i] = alpha2[i] - alpha2[i+l];
1591 		sum_alpha += fabs(alpha[i]);
1592 	}
1593 	info("nu = %f\n",sum_alpha/(param->C*l));
1594 
1595 	delete[] alpha2;
1596 	delete[] linear_term;
1597 	delete[] y;
1598 }
1599 
solve_nu_svr(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si)1600 static void solve_nu_svr(
1601 	const svm_problem *prob, const svm_parameter *param,
1602 	double *alpha, Solver::SolutionInfo* si)
1603 {
1604 	int l = prob->l;
1605 	double C = param->C;
1606 	double *alpha2 = new double[2*l];
1607 	double *linear_term = new double[2*l];
1608 	schar *y = new schar[2*l];
1609 	int i;
1610 
1611 	double sum = C * param->nu * l / 2;
1612 	for(i=0;i<l;i++)
1613 	{
1614 		alpha2[i] = alpha2[i+l] = min(sum,C);
1615 		sum -= alpha2[i];
1616 
1617 		linear_term[i] = - prob->y[i];
1618 		y[i] = 1;
1619 
1620 		linear_term[i+l] = prob->y[i];
1621 		y[i+l] = -1;
1622 	}
1623 
1624 	Solver_NU s;
1625 	s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1626 		alpha2, C, C, param->eps, si, param->shrinking);
1627 
1628 	info("epsilon = %f\n",-si->r);
1629 
1630 	for(i=0;i<l;i++)
1631 		alpha[i] = alpha2[i] - alpha2[i+l];
1632 
1633 	delete[] alpha2;
1634 	delete[] linear_term;
1635 	delete[] y;
1636 }
1637 
1638 //
1639 // decision_function
1640 //
1641 struct decision_function
1642 {
1643 	double *alpha;
1644 	double rho;
1645 };
1646 
svm_train_one(const svm_problem * prob,const svm_parameter * param,double Cp,double Cn)1647 static decision_function svm_train_one(
1648 	const svm_problem *prob, const svm_parameter *param,
1649 	double Cp, double Cn)
1650 {
1651 	double *alpha = Malloc(double,prob->l);
1652 	Solver::SolutionInfo si;
1653 	switch(param->svm_type)
1654 	{
1655 		case C_SVC:
1656 			solve_c_svc(prob,param,alpha,&si,Cp,Cn);
1657 			break;
1658 		case NU_SVC:
1659 			solve_nu_svc(prob,param,alpha,&si);
1660 			break;
1661 		case ONE_CLASS:
1662 			solve_one_class(prob,param,alpha,&si);
1663 			break;
1664 		case EPSILON_SVR:
1665 			solve_epsilon_svr(prob,param,alpha,&si);
1666 			break;
1667 		case NU_SVR:
1668 			solve_nu_svr(prob,param,alpha,&si);
1669 			break;
1670 	}
1671 
1672 	info("obj = %f, rho = %f\n",si.obj,si.rho);
1673 
1674 	// output SVs
1675 
1676 	int nSV = 0;
1677 	int nBSV = 0;
1678 	for(int i=0;i<prob->l;i++)
1679 	{
1680 		if(fabs(alpha[i]) > 0)
1681 		{
1682 			++nSV;
1683 			if(prob->y[i] > 0)
1684 			{
1685 				if(fabs(alpha[i]) >= si.upper_bound_p)
1686 					++nBSV;
1687 			}
1688 			else
1689 			{
1690 				if(fabs(alpha[i]) >= si.upper_bound_n)
1691 					++nBSV;
1692 			}
1693 		}
1694 	}
1695 
1696 	info("nSV = %d, nBSV = %d\n",nSV,nBSV);
1697 
1698 	decision_function f;
1699 	f.alpha = alpha;
1700 	f.rho = si.rho;
1701 	return f;
1702 }
1703 
1704 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
sigmoid_train(int l,const double * dec_values,const double * labels,double & A,double & B)1705 static void sigmoid_train(
1706 	int l, const double *dec_values, const double *labels,
1707 	double& A, double& B)
1708 {
1709 	double prior1=0, prior0 = 0;
1710 	int i;
1711 
1712 	for (i=0;i<l;i++)
1713 		if (labels[i] > 0) prior1+=1;
1714 		else prior0+=1;
1715 
1716 	int max_iter=100;	// Maximal number of iterations
1717 	double min_step=1e-10;	// Minimal step taken in line search
1718 	double sigma=1e-12;	// For numerically strict PD of Hessian
1719 	double eps=1e-5;
1720 	double hiTarget=(prior1+1.0)/(prior1+2.0);
1721 	double loTarget=1/(prior0+2.0);
1722 	double *t=Malloc(double,l);
1723 	double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
1724 	double newA,newB,newf,d1,d2;
1725 	int iter;
1726 
1727 	// Initial Point and Initial Fun Value
1728 	A=0.0; B=log((prior0+1.0)/(prior1+1.0));
1729 	double fval = 0.0;
1730 
1731 	for (i=0;i<l;i++)
1732 	{
1733 		if (labels[i]>0) t[i]=hiTarget;
1734 		else t[i]=loTarget;
1735 		fApB = dec_values[i]*A+B;
1736 		if (fApB>=0)
1737 			fval += t[i]*fApB + log(1+exp(-fApB));
1738 		else
1739 			fval += (t[i] - 1)*fApB +log(1+exp(fApB));
1740 	}
1741 	for (iter=0;iter<max_iter;iter++)
1742 	{
1743 		// Update Gradient and Hessian (use H' = H + sigma I)
1744 		h11=sigma; // numerically ensures strict PD
1745 		h22=sigma;
1746 		h21=0.0;g1=0.0;g2=0.0;
1747 		for (i=0;i<l;i++)
1748 		{
1749 			fApB = dec_values[i]*A+B;
1750 			if (fApB >= 0)
1751 			{
1752 				p=exp(-fApB)/(1.0+exp(-fApB));
1753 				q=1.0/(1.0+exp(-fApB));
1754 			}
1755 			else
1756 			{
1757 				p=1.0/(1.0+exp(fApB));
1758 				q=exp(fApB)/(1.0+exp(fApB));
1759 			}
1760 			d2=p*q;
1761 			h11+=dec_values[i]*dec_values[i]*d2;
1762 			h22+=d2;
1763 			h21+=dec_values[i]*d2;
1764 			d1=t[i]-p;
1765 			g1+=dec_values[i]*d1;
1766 			g2+=d1;
1767 		}
1768 
1769 		// Stopping Criteria
1770 		if (fabs(g1)<eps && fabs(g2)<eps)
1771 			break;
1772 
1773 		// Finding Newton direction: -inv(H') * g
1774 		det=h11*h22-h21*h21;
1775 		dA=-(h22*g1 - h21 * g2) / det;
1776 		dB=-(-h21*g1+ h11 * g2) / det;
1777 		gd=g1*dA+g2*dB;
1778 
1779 
1780 		stepsize = 1;		// Line Search
1781 		while (stepsize >= min_step)
1782 		{
1783 			newA = A + stepsize * dA;
1784 			newB = B + stepsize * dB;
1785 
1786 			// New function value
1787 			newf = 0.0;
1788 			for (i=0;i<l;i++)
1789 			{
1790 				fApB = dec_values[i]*newA+newB;
1791 				if (fApB >= 0)
1792 					newf += t[i]*fApB + log(1+exp(-fApB));
1793 				else
1794 					newf += (t[i] - 1)*fApB +log(1+exp(fApB));
1795 			}
1796 			// Check sufficient decrease
1797 			if (newf<fval+0.0001*stepsize*gd)
1798 			{
1799 				A=newA;B=newB;fval=newf;
1800 				break;
1801 			}
1802 			else
1803 				stepsize = stepsize / 2.0;
1804 		}
1805 
1806 		if (stepsize < min_step)
1807 		{
1808 			info("Line search fails in two-class probability estimates\n");
1809 			break;
1810 		}
1811 	}
1812 
1813 	if (iter>=max_iter)
1814 		info("Reaching maximal iterations in two-class probability estimates\n");
1815 	free(t);
1816 }
1817 
sigmoid_predict(double decision_value,double A,double B)1818 static double sigmoid_predict(double decision_value, double A, double B)
1819 {
1820 	double fApB = decision_value*A+B;
1821 	// 1-p used later; avoid catastrophic cancellation
1822 	if (fApB >= 0)
1823 		return exp(-fApB)/(1.0+exp(-fApB));
1824 	else
1825 		return 1.0/(1+exp(fApB)) ;
1826 }
1827 
1828 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
multiclass_probability(int k,double ** r,double * p)1829 static void multiclass_probability(int k, double **r, double *p)
1830 {
1831 	int t,j;
1832 	int iter = 0, max_iter=max(100,k);
1833 	double **Q=Malloc(double *,k);
1834 	double *Qp=Malloc(double,k);
1835 	double pQp, eps=0.005/k;
1836 
1837 	for (t=0;t<k;t++)
1838 	{
1839 		p[t]=1.0/k;  // Valid if k = 1
1840 		Q[t]=Malloc(double,k);
1841 		Q[t][t]=0;
1842 		for (j=0;j<t;j++)
1843 		{
1844 			Q[t][t]+=r[j][t]*r[j][t];
1845 			Q[t][j]=Q[j][t];
1846 		}
1847 		for (j=t+1;j<k;j++)
1848 		{
1849 			Q[t][t]+=r[j][t]*r[j][t];
1850 			Q[t][j]=-r[j][t]*r[t][j];
1851 		}
1852 	}
1853 	for (iter=0;iter<max_iter;iter++)
1854 	{
1855 		// stopping condition, recalculate QP,pQP for numerical accuracy
1856 		pQp=0;
1857 		for (t=0;t<k;t++)
1858 		{
1859 			Qp[t]=0;
1860 			for (j=0;j<k;j++)
1861 				Qp[t]+=Q[t][j]*p[j];
1862 			pQp+=p[t]*Qp[t];
1863 		}
1864 		double max_error=0;
1865 		for (t=0;t<k;t++)
1866 		{
1867 			double error=fabs(Qp[t]-pQp);
1868 			if (error>max_error)
1869 				max_error=error;
1870 		}
1871 		if (max_error<eps) break;
1872 
1873 		for (t=0;t<k;t++)
1874 		{
1875 			double diff=(-Qp[t]+pQp)/Q[t][t];
1876 			p[t]+=diff;
1877 			pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
1878 			for (j=0;j<k;j++)
1879 			{
1880 				Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
1881 				p[j]/=(1+diff);
1882 			}
1883 		}
1884 	}
1885 	if (iter>=max_iter)
1886 		info("Exceeds max_iter in multiclass_prob\n");
1887 	for(t=0;t<k;t++) free(Q[t]);
1888 	free(Q);
1889 	free(Qp);
1890 }
1891 
1892 // Cross-validation decision values for probability estimates
svm_binary_svc_probability(const svm_problem * prob,const svm_parameter * param,double Cp,double Cn,double & probA,double & probB)1893 static void svm_binary_svc_probability(
1894 	const svm_problem *prob, const svm_parameter *param,
1895 	double Cp, double Cn, double& probA, double& probB)
1896 {
1897 	int i;
1898 	int nr_fold = 5;
1899 	int *perm = Malloc(int,prob->l);
1900 	double *dec_values = Malloc(double,prob->l);
1901 
1902 	// random shuffle
1903 	for(i=0;i<prob->l;i++) perm[i]=i;
1904 	for(i=0;i<prob->l;i++)
1905 	{
1906 		int j = i+rand()%(prob->l-i);
1907 		swap(perm[i],perm[j]);
1908 	}
1909 	for(i=0;i<nr_fold;i++)
1910 	{
1911 		int begin = i*prob->l/nr_fold;
1912 		int end = (i+1)*prob->l/nr_fold;
1913 		int j,k;
1914 		struct svm_problem subprob;
1915 
1916 		subprob.l = prob->l-(end-begin);
1917 		subprob.x = Malloc(struct svm_node*,subprob.l);
1918 		subprob.y = Malloc(double,subprob.l);
1919 
1920 		k=0;
1921 		for(j=0;j<begin;j++)
1922 		{
1923 			subprob.x[k] = prob->x[perm[j]];
1924 			subprob.y[k] = prob->y[perm[j]];
1925 			++k;
1926 		}
1927 		for(j=end;j<prob->l;j++)
1928 		{
1929 			subprob.x[k] = prob->x[perm[j]];
1930 			subprob.y[k] = prob->y[perm[j]];
1931 			++k;
1932 		}
1933 		int p_count=0,n_count=0;
1934 		for(j=0;j<k;j++)
1935 			if(subprob.y[j]>0)
1936 				p_count++;
1937 			else
1938 				n_count++;
1939 
1940 		if(p_count==0 && n_count==0)
1941 			for(j=begin;j<end;j++)
1942 				dec_values[perm[j]] = 0;
1943 		else if(p_count > 0 && n_count == 0)
1944 			for(j=begin;j<end;j++)
1945 				dec_values[perm[j]] = 1;
1946 		else if(p_count == 0 && n_count > 0)
1947 			for(j=begin;j<end;j++)
1948 				dec_values[perm[j]] = -1;
1949 		else
1950 		{
1951 			svm_parameter subparam = *param;
1952 			subparam.probability=0;
1953 			subparam.C=1.0;
1954 			subparam.nr_weight=2;
1955 			subparam.weight_label = Malloc(int,2);
1956 			subparam.weight = Malloc(double,2);
1957 			subparam.weight_label[0]=+1;
1958 			subparam.weight_label[1]=-1;
1959 			subparam.weight[0]=Cp;
1960 			subparam.weight[1]=Cn;
1961 			struct svm_model *submodel = svm_train(&subprob,&subparam);
1962 			for(j=begin;j<end;j++)
1963 			{
1964 				svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
1965 				// ensure +1 -1 order; reason not using CV subroutine
1966 				dec_values[perm[j]] *= submodel->label[0];
1967 			}
1968 			svm_free_and_destroy_model(&submodel);
1969 			svm_destroy_param(&subparam);
1970 		}
1971 		free(subprob.x);
1972 		free(subprob.y);
1973 	}
1974 	sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
1975 	free(dec_values);
1976 	free(perm);
1977 }
1978 
1979 // Return parameter of a Laplace distribution
svm_svr_probability(const svm_problem * prob,const svm_parameter * param)1980 static double svm_svr_probability(
1981 	const svm_problem *prob, const svm_parameter *param)
1982 {
1983 	int i;
1984 	int nr_fold = 5;
1985 	double *ymv = Malloc(double,prob->l);
1986 	double mae = 0;
1987 
1988 	svm_parameter newparam = *param;
1989 	newparam.probability = 0;
1990 	svm_cross_validation(prob,&newparam,nr_fold,ymv);
1991 	for(i=0;i<prob->l;i++)
1992 	{
1993 		ymv[i]=prob->y[i]-ymv[i];
1994 		mae += fabs(ymv[i]);
1995 	}
1996 	mae /= prob->l;
1997 	double std=sqrt(2*mae*mae);
1998 	int count=0;
1999 	mae=0;
2000 	for(i=0;i<prob->l;i++)
2001 		if (fabs(ymv[i]) > 5*std)
2002 			count=count+1;
2003 		else
2004 			mae+=fabs(ymv[i]);
2005 	mae /= (prob->l-count);
2006 	info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
2007 	free(ymv);
2008 	return mae;
2009 }
2010 
2011 
2012 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2013 // perm, length l, must be allocated before calling this subroutine
svm_group_classes(const svm_problem * prob,int * nr_class_ret,int ** label_ret,int ** start_ret,int ** count_ret,int * perm)2014 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2015 {
2016 	int l = prob->l;
2017 	int max_nr_class = 16;
2018 	int nr_class = 0;
2019 	int *label = Malloc(int,max_nr_class);
2020 	int *count = Malloc(int,max_nr_class);
2021 	int *data_label = Malloc(int,l);
2022 	int i;
2023 
2024 	for(i=0;i<l;i++)
2025 	{
2026 		int this_label = (int)prob->y[i];
2027 		int j;
2028 		for(j=0;j<nr_class;j++)
2029 		{
2030 			if(this_label == label[j])
2031 			{
2032 				++count[j];
2033 				break;
2034 			}
2035 		}
2036 		data_label[i] = j;
2037 		if(j == nr_class)
2038 		{
2039 			if(nr_class == max_nr_class)
2040 			{
2041 				max_nr_class *= 2;
2042 				label = (int *)realloc(label,max_nr_class*sizeof(int));
2043 				count = (int *)realloc(count,max_nr_class*sizeof(int));
2044 			}
2045 			label[nr_class] = this_label;
2046 			count[nr_class] = 1;
2047 			++nr_class;
2048 		}
2049 	}
2050 
2051 	//
2052 	// Labels are ordered by their first occurrence in the training set.
2053 	// However, for two-class sets with -1/+1 labels and -1 appears first,
2054 	// we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
2055 	//
2056 	if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2057 	{
2058 		swap(label[0],label[1]);
2059 		swap(count[0],count[1]);
2060 		for(i=0;i<l;i++)
2061 		{
2062 			if(data_label[i] == 0)
2063 				data_label[i] = 1;
2064 			else
2065 				data_label[i] = 0;
2066 		}
2067 	}
2068 
2069 	int *start = Malloc(int,nr_class);
2070 	start[0] = 0;
2071 	for(i=1;i<nr_class;i++)
2072 		start[i] = start[i-1]+count[i-1];
2073 	for(i=0;i<l;i++)
2074 	{
2075 		perm[start[data_label[i]]] = i;
2076 		++start[data_label[i]];
2077 	}
2078 	start[0] = 0;
2079 	for(i=1;i<nr_class;i++)
2080 		start[i] = start[i-1]+count[i-1];
2081 
2082 	*nr_class_ret = nr_class;
2083 	*label_ret = label;
2084 	*start_ret = start;
2085 	*count_ret = count;
2086 	free(data_label);
2087 }
2088 
2089 //
2090 // Interface functions
2091 //
svm_train(const svm_problem * prob,const svm_parameter * param)2092 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2093 {
2094 	svm_model *model = Malloc(svm_model,1);
2095 	model->param = *param;
2096 	model->free_sv = 0;	// XXX
2097 
2098 	if(param->svm_type == ONE_CLASS ||
2099 	   param->svm_type == EPSILON_SVR ||
2100 	   param->svm_type == NU_SVR)
2101 	{
2102 		// regression or one-class-svm
2103 		model->nr_class = 2;
2104 		model->label = NULL;
2105 		model->nSV = NULL;
2106 		model->probA = NULL; model->probB = NULL;
2107 		model->sv_coef = Malloc(double *,1);
2108 
2109 		if(param->probability &&
2110 		   (param->svm_type == EPSILON_SVR ||
2111 		    param->svm_type == NU_SVR))
2112 		{
2113 			model->probA = Malloc(double,1);
2114 			model->probA[0] = svm_svr_probability(prob,param);
2115 		}
2116 
2117 		decision_function f = svm_train_one(prob,param,0,0);
2118 		model->rho = Malloc(double,1);
2119 		model->rho[0] = f.rho;
2120 
2121 		int nSV = 0;
2122 		int i;
2123 		for(i=0;i<prob->l;i++)
2124 			if(fabs(f.alpha[i]) > 0) ++nSV;
2125 		model->l = nSV;
2126 		model->SV = Malloc(svm_node *,nSV);
2127 		model->sv_coef[0] = Malloc(double,nSV);
2128 		model->sv_indices = Malloc(int,nSV);
2129 		int j = 0;
2130 		for(i=0;i<prob->l;i++)
2131 			if(fabs(f.alpha[i]) > 0)
2132 			{
2133 				model->SV[j] = prob->x[i];
2134 				model->sv_coef[0][j] = f.alpha[i];
2135 				model->sv_indices[j] = i+1;
2136 				++j;
2137 			}
2138 
2139 		free(f.alpha);
2140 	}
2141 	else
2142 	{
2143 		// classification
2144 		int l = prob->l;
2145 		int nr_class;
2146 		int *label = NULL;
2147 		int *start = NULL;
2148 		int *count = NULL;
2149 		int *perm = Malloc(int,l);
2150 
2151 		// group training data of the same class
2152 		svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2153 		if(nr_class == 1)
2154 			info("WARNING: training data in only one class. See README for details.\n");
2155 
2156 		svm_node **x = Malloc(svm_node *,l);
2157 		int i;
2158 		for(i=0;i<l;i++)
2159 			x[i] = prob->x[perm[i]];
2160 
2161 		// calculate weighted C
2162 
2163 		double *weighted_C = Malloc(double, nr_class);
2164 		for(i=0;i<nr_class;i++)
2165 			weighted_C[i] = param->C;
2166 		for(i=0;i<param->nr_weight;i++)
2167 		{
2168 			int j;
2169 			for(j=0;j<nr_class;j++)
2170 				if(param->weight_label[i] == label[j])
2171 					break;
2172 			if(j == nr_class)
2173 				fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2174 			else
2175 				weighted_C[j] *= param->weight[i];
2176 		}
2177 
2178 		// train k*(k-1)/2 models
2179 
2180 		bool *nonzero = Malloc(bool,l);
2181 		for(i=0;i<l;i++)
2182 			nonzero[i] = false;
2183 		decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
2184 
2185 		double *probA=NULL,*probB=NULL;
2186 		if (param->probability)
2187 		{
2188 			probA=Malloc(double,nr_class*(nr_class-1)/2);
2189 			probB=Malloc(double,nr_class*(nr_class-1)/2);
2190 		}
2191 
2192 		int p = 0;
2193 		for(i=0;i<nr_class;i++)
2194 			for(int j=i+1;j<nr_class;j++)
2195 			{
2196 				svm_problem sub_prob;
2197 				int si = start[i], sj = start[j];
2198 				int ci = count[i], cj = count[j];
2199 				sub_prob.l = ci+cj;
2200 				sub_prob.x = Malloc(svm_node *,sub_prob.l);
2201 				sub_prob.y = Malloc(double,sub_prob.l);
2202 				int k;
2203 				for(k=0;k<ci;k++)
2204 				{
2205 					sub_prob.x[k] = x[si+k];
2206 					sub_prob.y[k] = +1;
2207 				}
2208 				for(k=0;k<cj;k++)
2209 				{
2210 					sub_prob.x[ci+k] = x[sj+k];
2211 					sub_prob.y[ci+k] = -1;
2212 				}
2213 
2214 				if(param->probability)
2215 					svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
2216 
2217 				f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2218 				for(k=0;k<ci;k++)
2219 					if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
2220 						nonzero[si+k] = true;
2221 				for(k=0;k<cj;k++)
2222 					if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
2223 						nonzero[sj+k] = true;
2224 				free(sub_prob.x);
2225 				free(sub_prob.y);
2226 				++p;
2227 			}
2228 
2229 		// build output
2230 
2231 		model->nr_class = nr_class;
2232 
2233 		model->label = Malloc(int,nr_class);
2234 		for(i=0;i<nr_class;i++)
2235 			model->label[i] = label[i];
2236 
2237 		model->rho = Malloc(double,nr_class*(nr_class-1)/2);
2238 		for(i=0;i<nr_class*(nr_class-1)/2;i++)
2239 			model->rho[i] = f[i].rho;
2240 
2241 		if(param->probability)
2242 		{
2243 			model->probA = Malloc(double,nr_class*(nr_class-1)/2);
2244 			model->probB = Malloc(double,nr_class*(nr_class-1)/2);
2245 			for(i=0;i<nr_class*(nr_class-1)/2;i++)
2246 			{
2247 				model->probA[i] = probA[i];
2248 				model->probB[i] = probB[i];
2249 			}
2250 		}
2251 		else
2252 		{
2253 			model->probA=NULL;
2254 			model->probB=NULL;
2255 		}
2256 
2257 		int total_sv = 0;
2258 		int *nz_count = Malloc(int,nr_class);
2259 		model->nSV = Malloc(int,nr_class);
2260 		for(i=0;i<nr_class;i++)
2261 		{
2262 			int nSV = 0;
2263 			for(int j=0;j<count[i];j++)
2264 				if(nonzero[start[i]+j])
2265 				{
2266 					++nSV;
2267 					++total_sv;
2268 				}
2269 			model->nSV[i] = nSV;
2270 			nz_count[i] = nSV;
2271 		}
2272 
2273 		info("Total nSV = %d\n",total_sv);
2274 
2275 		model->l = total_sv;
2276 		model->SV = Malloc(svm_node *,total_sv);
2277 		model->sv_indices = Malloc(int,total_sv);
2278 		p = 0;
2279 		for(i=0;i<l;i++)
2280 			if(nonzero[i])
2281 			{
2282 				model->SV[p] = x[i];
2283 				model->sv_indices[p++] = perm[i] + 1;
2284 			}
2285 
2286 		int *nz_start = Malloc(int,nr_class);
2287 		nz_start[0] = 0;
2288 		for(i=1;i<nr_class;i++)
2289 			nz_start[i] = nz_start[i-1]+nz_count[i-1];
2290 
2291 		model->sv_coef = Malloc(double *,nr_class-1);
2292 		for(i=0;i<nr_class-1;i++)
2293 			model->sv_coef[i] = Malloc(double,total_sv);
2294 
2295 		p = 0;
2296 		for(i=0;i<nr_class;i++)
2297 			for(int j=i+1;j<nr_class;j++)
2298 			{
2299 				// classifier (i,j): coefficients with
2300 				// i are in sv_coef[j-1][nz_start[i]...],
2301 				// j are in sv_coef[i][nz_start[j]...]
2302 
2303 				int si = start[i];
2304 				int sj = start[j];
2305 				int ci = count[i];
2306 				int cj = count[j];
2307 
2308 				int q = nz_start[i];
2309 				int k;
2310 				for(k=0;k<ci;k++)
2311 					if(nonzero[si+k])
2312 						model->sv_coef[j-1][q++] = f[p].alpha[k];
2313 				q = nz_start[j];
2314 				for(k=0;k<cj;k++)
2315 					if(nonzero[sj+k])
2316 						model->sv_coef[i][q++] = f[p].alpha[ci+k];
2317 				++p;
2318 			}
2319 
2320 		free(label);
2321 		free(probA);
2322 		free(probB);
2323 		free(count);
2324 		free(perm);
2325 		free(start);
2326 		free(x);
2327 		free(weighted_C);
2328 		free(nonzero);
2329 		for(i=0;i<nr_class*(nr_class-1)/2;i++)
2330 			free(f[i].alpha);
2331 		free(f);
2332 		free(nz_count);
2333 		free(nz_start);
2334 	}
2335 	return model;
2336 }
2337 
2338 // Stratified cross validation
svm_cross_validation(const svm_problem * prob,const svm_parameter * param,int nr_fold,double * target)2339 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
2340 {
2341 	int i;
2342 	int *fold_start;
2343 	int l = prob->l;
2344 	int *perm = Malloc(int,l);
2345 	int nr_class;
2346 	if (nr_fold > l)
2347 	{
2348 		nr_fold = l;
2349 		fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2350 	}
2351 	fold_start = Malloc(int,nr_fold+1);
2352 	// stratified cv may not give leave-one-out rate
2353 	// Each class to l folds -> some folds may have zero elements
2354 	if((param->svm_type == C_SVC ||
2355 	    param->svm_type == NU_SVC) && nr_fold < l)
2356 	{
2357 		int *start = NULL;
2358 		int *label = NULL;
2359 		int *count = NULL;
2360 		svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2361 
2362 		// random shuffle and then data grouped by fold using the array perm
2363 		int *fold_count = Malloc(int,nr_fold);
2364 		int c;
2365 		int *index = Malloc(int,l);
2366 		for(i=0;i<l;i++)
2367 			index[i]=perm[i];
2368 		for (c=0; c<nr_class; c++)
2369 			for(i=0;i<count[c];i++)
2370 			{
2371 				int j = i+rand()%(count[c]-i);
2372 				swap(index[start[c]+j],index[start[c]+i]);
2373 			}
2374 		for(i=0;i<nr_fold;i++)
2375 		{
2376 			fold_count[i] = 0;
2377 			for (c=0; c<nr_class;c++)
2378 				fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
2379 		}
2380 		fold_start[0]=0;
2381 		for (i=1;i<=nr_fold;i++)
2382 			fold_start[i] = fold_start[i-1]+fold_count[i-1];
2383 		for (c=0; c<nr_class;c++)
2384 			for(i=0;i<nr_fold;i++)
2385 			{
2386 				int begin = start[c]+i*count[c]/nr_fold;
2387 				int end = start[c]+(i+1)*count[c]/nr_fold;
2388 				for(int j=begin;j<end;j++)
2389 				{
2390 					perm[fold_start[i]] = index[j];
2391 					fold_start[i]++;
2392 				}
2393 			}
2394 		fold_start[0]=0;
2395 		for (i=1;i<=nr_fold;i++)
2396 			fold_start[i] = fold_start[i-1]+fold_count[i-1];
2397 		free(start);
2398 		free(label);
2399 		free(count);
2400 		free(index);
2401 		free(fold_count);
2402 	}
2403 	else
2404 	{
2405 		for(i=0;i<l;i++) perm[i]=i;
2406 		for(i=0;i<l;i++)
2407 		{
2408 			int j = i+rand()%(l-i);
2409 			swap(perm[i],perm[j]);
2410 		}
2411 		for(i=0;i<=nr_fold;i++)
2412 			fold_start[i]=i*l/nr_fold;
2413 	}
2414 
2415 	for(i=0;i<nr_fold;i++)
2416 	{
2417 		int begin = fold_start[i];
2418 		int end = fold_start[i+1];
2419 		int j,k;
2420 		struct svm_problem subprob;
2421 
2422 		subprob.l = l-(end-begin);
2423 		subprob.x = Malloc(struct svm_node*,subprob.l);
2424 		subprob.y = Malloc(double,subprob.l);
2425 
2426 		k=0;
2427 		for(j=0;j<begin;j++)
2428 		{
2429 			subprob.x[k] = prob->x[perm[j]];
2430 			subprob.y[k] = prob->y[perm[j]];
2431 			++k;
2432 		}
2433 		for(j=end;j<l;j++)
2434 		{
2435 			subprob.x[k] = prob->x[perm[j]];
2436 			subprob.y[k] = prob->y[perm[j]];
2437 			++k;
2438 		}
2439 		struct svm_model *submodel = svm_train(&subprob,param);
2440 		if(param->probability &&
2441 		   (param->svm_type == C_SVC || param->svm_type == NU_SVC))
2442 		{
2443 			double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
2444 			for(j=begin;j<end;j++)
2445 				target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
2446 			free(prob_estimates);
2447 		}
2448 		else
2449 			for(j=begin;j<end;j++)
2450 				target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
2451 		svm_free_and_destroy_model(&submodel);
2452 		free(subprob.x);
2453 		free(subprob.y);
2454 	}
2455 	free(fold_start);
2456 	free(perm);
2457 }
2458 
2459 
svm_get_svm_type(const svm_model * model)2460 int svm_get_svm_type(const svm_model *model)
2461 {
2462 	return model->param.svm_type;
2463 }
2464 
svm_get_nr_class(const svm_model * model)2465 int svm_get_nr_class(const svm_model *model)
2466 {
2467 	return model->nr_class;
2468 }
2469 
svm_get_labels(const svm_model * model,int * label)2470 void svm_get_labels(const svm_model *model, int* label)
2471 {
2472 	if (model->label != NULL)
2473 		for(int i=0;i<model->nr_class;i++)
2474 			label[i] = model->label[i];
2475 }
2476 
svm_get_sv_indices(const svm_model * model,int * indices)2477 void svm_get_sv_indices(const svm_model *model, int* indices)
2478 {
2479 	if (model->sv_indices != NULL)
2480 		for(int i=0;i<model->l;i++)
2481 			indices[i] = model->sv_indices[i];
2482 }
2483 
svm_get_nr_sv(const svm_model * model)2484 int svm_get_nr_sv(const svm_model *model)
2485 {
2486 	return model->l;
2487 }
2488 
svm_get_svr_probability(const svm_model * model)2489 double svm_get_svr_probability(const svm_model *model)
2490 {
2491 	if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
2492 	    model->probA!=NULL)
2493 		return model->probA[0];
2494 	else
2495 	{
2496 		fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
2497 		return 0;
2498 	}
2499 }
2500 
svm_predict_values(const svm_model * model,const svm_node * x,double * dec_values)2501 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
2502 {
2503 	int i;
2504 	if(model->param.svm_type == ONE_CLASS ||
2505 	   model->param.svm_type == EPSILON_SVR ||
2506 	   model->param.svm_type == NU_SVR)
2507 	{
2508 		double *sv_coef = model->sv_coef[0];
2509 		double sum = 0;
2510 		for(i=0;i<model->l;i++)
2511 			sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
2512 		sum -= model->rho[0];
2513 		*dec_values = sum;
2514 
2515 		if(model->param.svm_type == ONE_CLASS)
2516 			return (sum>0)?1:-1;
2517 		else
2518 			return sum;
2519 	}
2520 	else
2521 	{
2522 		int nr_class = model->nr_class;
2523 		int l = model->l;
2524 
2525 		double *kvalue = Malloc(double,l);
2526 		for(i=0;i<l;i++)
2527 			kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
2528 
2529 		int *start = Malloc(int,nr_class);
2530 		start[0] = 0;
2531 		for(i=1;i<nr_class;i++)
2532 			start[i] = start[i-1]+model->nSV[i-1];
2533 
2534 		int *vote = Malloc(int,nr_class);
2535 		for(i=0;i<nr_class;i++)
2536 			vote[i] = 0;
2537 
2538 		int p=0;
2539 		for(i=0;i<nr_class;i++)
2540 			for(int j=i+1;j<nr_class;j++)
2541 			{
2542 				double sum = 0;
2543 				int si = start[i];
2544 				int sj = start[j];
2545 				int ci = model->nSV[i];
2546 				int cj = model->nSV[j];
2547 
2548 				int k;
2549 				double *coef1 = model->sv_coef[j-1];
2550 				double *coef2 = model->sv_coef[i];
2551 				for(k=0;k<ci;k++)
2552 					sum += coef1[si+k] * kvalue[si+k];
2553 				for(k=0;k<cj;k++)
2554 					sum += coef2[sj+k] * kvalue[sj+k];
2555 				sum -= model->rho[p];
2556 				dec_values[p] = sum;
2557 
2558 				if(dec_values[p] > 0)
2559 					++vote[i];
2560 				else
2561 					++vote[j];
2562 				p++;
2563 			}
2564 
2565 		int vote_max_idx = 0;
2566 		for(i=1;i<nr_class;i++)
2567 			if(vote[i] > vote[vote_max_idx])
2568 				vote_max_idx = i;
2569 
2570 		free(kvalue);
2571 		free(start);
2572 		free(vote);
2573 		return model->label[vote_max_idx];
2574 	}
2575 }
2576 
svm_predict(const svm_model * model,const svm_node * x)2577 double svm_predict(const svm_model *model, const svm_node *x)
2578 {
2579 	int nr_class = model->nr_class;
2580 	double *dec_values;
2581 	if(model->param.svm_type == ONE_CLASS ||
2582 	   model->param.svm_type == EPSILON_SVR ||
2583 	   model->param.svm_type == NU_SVR)
2584 		dec_values = Malloc(double, 1);
2585 	else
2586 		dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2587 	double pred_result = svm_predict_values(model, x, dec_values);
2588 	free(dec_values);
2589 	return pred_result;
2590 }
2591 
svm_predict_probability(const svm_model * model,const svm_node * x,double * prob_estimates)2592 double svm_predict_probability(
2593 	const svm_model *model, const svm_node *x, double *prob_estimates)
2594 {
2595 	if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
2596 	    model->probA!=NULL && model->probB!=NULL)
2597 	{
2598 		int i;
2599 		int nr_class = model->nr_class;
2600 		double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2601 		svm_predict_values(model, x, dec_values);
2602 
2603 		double min_prob=1e-7;
2604 		double **pairwise_prob=Malloc(double *,nr_class);
2605 		for(i=0;i<nr_class;i++)
2606 			pairwise_prob[i]=Malloc(double,nr_class);
2607 		int k=0;
2608 		for(i=0;i<nr_class;i++)
2609 			for(int j=i+1;j<nr_class;j++)
2610 			{
2611 				pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
2612 				pairwise_prob[j][i]=1-pairwise_prob[i][j];
2613 				k++;
2614 			}
2615 		if (nr_class == 2)
2616 		{
2617 			prob_estimates[0] = pairwise_prob[0][1];
2618 			prob_estimates[1] = pairwise_prob[1][0];
2619 		}
2620 		else
2621 			multiclass_probability(nr_class,pairwise_prob,prob_estimates);
2622 
2623 		int prob_max_idx = 0;
2624 		for(i=1;i<nr_class;i++)
2625 			if(prob_estimates[i] > prob_estimates[prob_max_idx])
2626 				prob_max_idx = i;
2627 		for(i=0;i<nr_class;i++)
2628 			free(pairwise_prob[i]);
2629 		free(dec_values);
2630 		free(pairwise_prob);
2631 		return model->label[prob_max_idx];
2632 	}
2633 	else
2634 		return svm_predict(model, x);
2635 }
2636 
2637 static const char *svm_type_table[] =
2638 {
2639 	"c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
2640 };
2641 
2642 static const char *kernel_type_table[]=
2643 {
2644 	"linear","polynomial","rbf","sigmoid","precomputed",NULL
2645 };
2646 
svm_save_model(const char * model_file_name,const svm_model * model)2647 int svm_save_model(const char *model_file_name, const svm_model *model)
2648 {
2649 	FILE *fp = fopen(model_file_name,"w");
2650 	if(fp==NULL) return -1;
2651 
2652 	char *old_locale = setlocale(LC_ALL, NULL);
2653 	if (old_locale) {
2654 		old_locale = strdup(old_locale);
2655 	}
2656 	setlocale(LC_ALL, "C");
2657 
2658 	const svm_parameter& param = model->param;
2659 
2660 	fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
2661 	fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
2662 
2663 	if(param.kernel_type == POLY)
2664 		fprintf(fp,"degree %d\n", param.degree);
2665 
2666 	if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
2667 		fprintf(fp,"gamma %.17g\n", param.gamma);
2668 
2669 	if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
2670 		fprintf(fp,"coef0 %.17g\n", param.coef0);
2671 
2672 	int nr_class = model->nr_class;
2673 	int l = model->l;
2674 	fprintf(fp, "nr_class %d\n", nr_class);
2675 	fprintf(fp, "total_sv %d\n",l);
2676 
2677 	{
2678 		fprintf(fp, "rho");
2679 		for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2680 			fprintf(fp," %.17g",model->rho[i]);
2681 		fprintf(fp, "\n");
2682 	}
2683 
2684 	if(model->label)
2685 	{
2686 		fprintf(fp, "label");
2687 		for(int i=0;i<nr_class;i++)
2688 			fprintf(fp," %d",model->label[i]);
2689 		fprintf(fp, "\n");
2690 	}
2691 
2692 	if(model->probA) // regression has probA only
2693 	{
2694 		fprintf(fp, "probA");
2695 		for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2696 			fprintf(fp," %.17g",model->probA[i]);
2697 		fprintf(fp, "\n");
2698 	}
2699 	if(model->probB)
2700 	{
2701 		fprintf(fp, "probB");
2702 		for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2703 			fprintf(fp," %.17g",model->probB[i]);
2704 		fprintf(fp, "\n");
2705 	}
2706 
2707 	if(model->nSV)
2708 	{
2709 		fprintf(fp, "nr_sv");
2710 		for(int i=0;i<nr_class;i++)
2711 			fprintf(fp," %d",model->nSV[i]);
2712 		fprintf(fp, "\n");
2713 	}
2714 
2715 	fprintf(fp, "SV\n");
2716 	const double * const *sv_coef = model->sv_coef;
2717 	const svm_node * const *SV = model->SV;
2718 
2719 	for(int i=0;i<l;i++)
2720 	{
2721 		for(int j=0;j<nr_class-1;j++)
2722 			fprintf(fp, "%.17g ",sv_coef[j][i]);
2723 
2724 		const svm_node *p = SV[i];
2725 
2726 		if(param.kernel_type == PRECOMPUTED)
2727 			fprintf(fp,"0:%d ",(int)(p->value));
2728 		else
2729 			while(p->index != -1)
2730 			{
2731 				fprintf(fp,"%d:%.8g ",p->index,p->value);
2732 				p++;
2733 			}
2734 		fprintf(fp, "\n");
2735 	}
2736 
2737 	setlocale(LC_ALL, old_locale);
2738 	free(old_locale);
2739 
2740 	if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2741 	else return 0;
2742 }
2743 
2744 static char *line = NULL;
2745 static int max_line_len;
2746 
readline(FILE * input)2747 static char* readline(FILE *input)
2748 {
2749 	int len;
2750 
2751 	if(fgets(line,max_line_len,input) == NULL)
2752 		return NULL;
2753 
2754 	while(strrchr(line,'\n') == NULL)
2755 	{
2756 		max_line_len *= 2;
2757 		line = (char *) realloc(line,max_line_len);
2758 		len = (int) strlen(line);
2759 		if(fgets(line+len,max_line_len-len,input) == NULL)
2760 			break;
2761 	}
2762 	return line;
2763 }
2764 
2765 //
2766 // FSCANF helps to handle fscanf failures.
2767 // Its do-while block avoids the ambiguity when
2768 // if (...)
2769 //    FSCANF();
2770 // is used
2771 //
2772 #define FSCANF(_stream, _format, _var) do{ if (fscanf(_stream, _format, _var) != 1) return false; }while(0)
read_model_header(FILE * fp,svm_model * model)2773 bool read_model_header(FILE *fp, svm_model* model)
2774 {
2775 	svm_parameter& param = model->param;
2776 	// parameters for training only won't be assigned, but arrays are assigned as NULL for safety
2777 	param.nr_weight = 0;
2778 	param.weight_label = NULL;
2779 	param.weight = NULL;
2780 
2781 	char cmd[81];
2782 	while(1)
2783 	{
2784 		FSCANF(fp,"%80s",cmd);
2785 
2786 		if(strcmp(cmd,"svm_type")==0)
2787 		{
2788 			FSCANF(fp,"%80s",cmd);
2789 			int i;
2790 			for(i=0;svm_type_table[i];i++)
2791 			{
2792 				if(strcmp(svm_type_table[i],cmd)==0)
2793 				{
2794 					param.svm_type=i;
2795 					break;
2796 				}
2797 			}
2798 			if(svm_type_table[i] == NULL)
2799 			{
2800 				fprintf(stderr,"unknown svm type.\n");
2801 				return false;
2802 			}
2803 		}
2804 		else if(strcmp(cmd,"kernel_type")==0)
2805 		{
2806 			FSCANF(fp,"%80s",cmd);
2807 			int i;
2808 			for(i=0;kernel_type_table[i];i++)
2809 			{
2810 				if(strcmp(kernel_type_table[i],cmd)==0)
2811 				{
2812 					param.kernel_type=i;
2813 					break;
2814 				}
2815 			}
2816 			if(kernel_type_table[i] == NULL)
2817 			{
2818 				fprintf(stderr,"unknown kernel function.\n");
2819 				return false;
2820 			}
2821 		}
2822 		else if(strcmp(cmd,"degree")==0)
2823 			FSCANF(fp,"%d",&param.degree);
2824 		else if(strcmp(cmd,"gamma")==0)
2825 			FSCANF(fp,"%lf",&param.gamma);
2826 		else if(strcmp(cmd,"coef0")==0)
2827 			FSCANF(fp,"%lf",&param.coef0);
2828 		else if(strcmp(cmd,"nr_class")==0)
2829 			FSCANF(fp,"%d",&model->nr_class);
2830 		else if(strcmp(cmd,"total_sv")==0)
2831 			FSCANF(fp,"%d",&model->l);
2832 		else if(strcmp(cmd,"rho")==0)
2833 		{
2834 			int n = model->nr_class * (model->nr_class-1)/2;
2835 			model->rho = Malloc(double,n);
2836 			for(int i=0;i<n;i++)
2837 				FSCANF(fp,"%lf",&model->rho[i]);
2838 		}
2839 		else if(strcmp(cmd,"label")==0)
2840 		{
2841 			int n = model->nr_class;
2842 			model->label = Malloc(int,n);
2843 			for(int i=0;i<n;i++)
2844 				FSCANF(fp,"%d",&model->label[i]);
2845 		}
2846 		else if(strcmp(cmd,"probA")==0)
2847 		{
2848 			int n = model->nr_class * (model->nr_class-1)/2;
2849 			model->probA = Malloc(double,n);
2850 			for(int i=0;i<n;i++)
2851 				FSCANF(fp,"%lf",&model->probA[i]);
2852 		}
2853 		else if(strcmp(cmd,"probB")==0)
2854 		{
2855 			int n = model->nr_class * (model->nr_class-1)/2;
2856 			model->probB = Malloc(double,n);
2857 			for(int i=0;i<n;i++)
2858 				FSCANF(fp,"%lf",&model->probB[i]);
2859 		}
2860 		else if(strcmp(cmd,"nr_sv")==0)
2861 		{
2862 			int n = model->nr_class;
2863 			model->nSV = Malloc(int,n);
2864 			for(int i=0;i<n;i++)
2865 				FSCANF(fp,"%d",&model->nSV[i]);
2866 		}
2867 		else if(strcmp(cmd,"SV")==0)
2868 		{
2869 			while(1)
2870 			{
2871 				int c = getc(fp);
2872 				if(c==EOF || c=='\n') break;
2873 			}
2874 			break;
2875 		}
2876 		else
2877 		{
2878 			fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2879 			return false;
2880 		}
2881 	}
2882 
2883 	return true;
2884 
2885 }
2886 
svm_load_model(const char * model_file_name)2887 svm_model *svm_load_model(const char *model_file_name)
2888 {
2889 	FILE *fp = fopen(model_file_name,"rb");
2890 	if(fp==NULL) return NULL;
2891 
2892 	char *old_locale = setlocale(LC_ALL, NULL);
2893 	if (old_locale) {
2894 		old_locale = strdup(old_locale);
2895 	}
2896 	setlocale(LC_ALL, "C");
2897 
2898 	// read parameters
2899 
2900 	svm_model *model = Malloc(svm_model,1);
2901 	model->rho = NULL;
2902 	model->probA = NULL;
2903 	model->probB = NULL;
2904 	model->sv_indices = NULL;
2905 	model->label = NULL;
2906 	model->nSV = NULL;
2907 
2908 	// read header
2909 	if (!read_model_header(fp, model))
2910 	{
2911 		fprintf(stderr, "ERROR: fscanf failed to read model\n");
2912 		setlocale(LC_ALL, old_locale);
2913 		free(old_locale);
2914 		free(model->rho);
2915 		free(model->label);
2916 		free(model->nSV);
2917 		free(model);
2918 		return NULL;
2919 	}
2920 
2921 	// read sv_coef and SV
2922 
2923 	int elements = 0;
2924 	long pos = ftell(fp);
2925 
2926 	max_line_len = 1024;
2927 	line = Malloc(char,max_line_len);
2928 	char *p,*endptr,*idx,*val;
2929 
2930 	while(readline(fp)!=NULL)
2931 	{
2932 		p = strtok(line,":");
2933 		while(1)
2934 		{
2935 			p = strtok(NULL,":");
2936 			if(p == NULL)
2937 				break;
2938 			++elements;
2939 		}
2940 	}
2941 	elements += model->l;
2942 
2943 	fseek(fp,pos,SEEK_SET);
2944 
2945 	int m = model->nr_class - 1;
2946 	int l = model->l;
2947 	model->sv_coef = Malloc(double *,m);
2948 	int i;
2949 	for(i=0;i<m;i++)
2950 		model->sv_coef[i] = Malloc(double,l);
2951 	model->SV = Malloc(svm_node*,l);
2952 	svm_node *x_space = NULL;
2953 	if(l>0) x_space = Malloc(svm_node,elements);
2954 
2955 	int j=0;
2956 	for(i=0;i<l;i++)
2957 	{
2958 		readline(fp);
2959 		model->SV[i] = &x_space[j];
2960 
2961 		p = strtok(line, " \t");
2962 		model->sv_coef[0][i] = strtod(p,&endptr);
2963 		for(int k=1;k<m;k++)
2964 		{
2965 			p = strtok(NULL, " \t");
2966 			model->sv_coef[k][i] = strtod(p,&endptr);
2967 		}
2968 
2969 		while(1)
2970 		{
2971 			idx = strtok(NULL, ":");
2972 			val = strtok(NULL, " \t");
2973 
2974 			if(val == NULL)
2975 				break;
2976 			x_space[j].index = (int) strtol(idx,&endptr,10);
2977 			x_space[j].value = strtod(val,&endptr);
2978 
2979 			++j;
2980 		}
2981 		x_space[j++].index = -1;
2982 	}
2983 	free(line);
2984 
2985 	setlocale(LC_ALL, old_locale);
2986 	free(old_locale);
2987 
2988 	if (ferror(fp) != 0 || fclose(fp) != 0)
2989 		return NULL;
2990 
2991 	model->free_sv = 1;	// XXX
2992 	return model;
2993 }
2994 
svm_free_model_content(svm_model * model_ptr)2995 void svm_free_model_content(svm_model* model_ptr)
2996 {
2997 	if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
2998 		free((void *)(model_ptr->SV[0]));
2999 	if(model_ptr->sv_coef)
3000 	{
3001 		for(int i=0;i<model_ptr->nr_class-1;i++)
3002 			free(model_ptr->sv_coef[i]);
3003 	}
3004 
3005 	free(model_ptr->SV);
3006 	model_ptr->SV = NULL;
3007 
3008 	free(model_ptr->sv_coef);
3009 	model_ptr->sv_coef = NULL;
3010 
3011 	free(model_ptr->rho);
3012 	model_ptr->rho = NULL;
3013 
3014 	free(model_ptr->label);
3015 	model_ptr->label= NULL;
3016 
3017 	free(model_ptr->probA);
3018 	model_ptr->probA = NULL;
3019 
3020 	free(model_ptr->probB);
3021 	model_ptr->probB= NULL;
3022 
3023 	free(model_ptr->sv_indices);
3024 	model_ptr->sv_indices = NULL;
3025 
3026 	free(model_ptr->nSV);
3027 	model_ptr->nSV = NULL;
3028 }
3029 
svm_free_and_destroy_model(svm_model ** model_ptr_ptr)3030 void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
3031 {
3032 	if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
3033 	{
3034 		svm_free_model_content(*model_ptr_ptr);
3035 		free(*model_ptr_ptr);
3036 		*model_ptr_ptr = NULL;
3037 	}
3038 }
3039 
svm_destroy_param(svm_parameter * param)3040 void svm_destroy_param(svm_parameter* param)
3041 {
3042 	free(param->weight_label);
3043 	free(param->weight);
3044 }
3045 
svm_check_parameter(const svm_problem * prob,const svm_parameter * param)3046 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
3047 {
3048 	// svm_type
3049 
3050 	int svm_type = param->svm_type;
3051 	if(svm_type != C_SVC &&
3052 	   svm_type != NU_SVC &&
3053 	   svm_type != ONE_CLASS &&
3054 	   svm_type != EPSILON_SVR &&
3055 	   svm_type != NU_SVR)
3056 		return "unknown svm type";
3057 
3058 	// kernel_type, degree
3059 
3060 	int kernel_type = param->kernel_type;
3061 	if(kernel_type != LINEAR &&
3062 	   kernel_type != POLY &&
3063 	   kernel_type != RBF &&
3064 	   kernel_type != SIGMOID &&
3065 	   kernel_type != PRECOMPUTED)
3066 		return "unknown kernel type";
3067 
3068 	if((kernel_type == POLY || kernel_type == RBF || kernel_type == SIGMOID) &&
3069 	   param->gamma < 0)
3070 		return "gamma < 0";
3071 
3072 	if(kernel_type == POLY && param->degree < 0)
3073 		return "degree of polynomial kernel < 0";
3074 
3075 	// cache_size,eps,C,nu,p,shrinking
3076 
3077 	if(param->cache_size <= 0)
3078 		return "cache_size <= 0";
3079 
3080 	if(param->eps <= 0)
3081 		return "eps <= 0";
3082 
3083 	if(svm_type == C_SVC ||
3084 	   svm_type == EPSILON_SVR ||
3085 	   svm_type == NU_SVR)
3086 		if(param->C <= 0)
3087 			return "C <= 0";
3088 
3089 	if(svm_type == NU_SVC ||
3090 	   svm_type == ONE_CLASS ||
3091 	   svm_type == NU_SVR)
3092 		if(param->nu <= 0 || param->nu > 1)
3093 			return "nu <= 0 or nu > 1";
3094 
3095 	if(svm_type == EPSILON_SVR)
3096 		if(param->p < 0)
3097 			return "p < 0";
3098 
3099 	if(param->shrinking != 0 &&
3100 	   param->shrinking != 1)
3101 		return "shrinking != 0 and shrinking != 1";
3102 
3103 	if(param->probability != 0 &&
3104 	   param->probability != 1)
3105 		return "probability != 0 and probability != 1";
3106 
3107 	if(param->probability == 1 &&
3108 	   svm_type == ONE_CLASS)
3109 		return "one-class SVM probability output not supported yet";
3110 
3111 
3112 	// check whether nu-svc is feasible
3113 
3114 	if(svm_type == NU_SVC)
3115 	{
3116 		int l = prob->l;
3117 		int max_nr_class = 16;
3118 		int nr_class = 0;
3119 		int *label = Malloc(int,max_nr_class);
3120 		int *count = Malloc(int,max_nr_class);
3121 
3122 		int i;
3123 		for(i=0;i<l;i++)
3124 		{
3125 			int this_label = (int)prob->y[i];
3126 			int j;
3127 			for(j=0;j<nr_class;j++)
3128 				if(this_label == label[j])
3129 				{
3130 					++count[j];
3131 					break;
3132 				}
3133 			if(j == nr_class)
3134 			{
3135 				if(nr_class == max_nr_class)
3136 				{
3137 					max_nr_class *= 2;
3138 					label = (int *)realloc(label,max_nr_class*sizeof(int));
3139 					count = (int *)realloc(count,max_nr_class*sizeof(int));
3140 				}
3141 				label[nr_class] = this_label;
3142 				count[nr_class] = 1;
3143 				++nr_class;
3144 			}
3145 		}
3146 
3147 		for(i=0;i<nr_class;i++)
3148 		{
3149 			int n1 = count[i];
3150 			for(int j=i+1;j<nr_class;j++)
3151 			{
3152 				int n2 = count[j];
3153 				if(param->nu*(n1+n2)/2 > min(n1,n2))
3154 				{
3155 					free(label);
3156 					free(count);
3157 					return "specified nu is infeasible";
3158 				}
3159 			}
3160 		}
3161 		free(label);
3162 		free(count);
3163 	}
3164 
3165 	return NULL;
3166 }
3167 
svm_check_probability_model(const svm_model * model)3168 int svm_check_probability_model(const svm_model *model)
3169 {
3170 	return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3171 		model->probA!=NULL && model->probB!=NULL) ||
3172 		((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
3173 		 model->probA!=NULL);
3174 }
3175 
svm_set_print_string_function(void (* print_func)(const char *))3176 void svm_set_print_string_function(void (*print_func)(const char *))
3177 {
3178 	if(print_func == NULL)
3179 		svm_print_string = &print_string_stdout;
3180 	else
3181 		svm_print_string = print_func;
3182 }
3183