1 /*
2 
3   This code was extracted from libsvm 3.2.3 in Feb 2019 and
4   modified for the use with Octave and Matlab
5 
6 
7 Copyright (c) 2000-2019 Chih-Chung Chang and Chih-Jen Lin
8 All rights reserved.
9 
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions
12 are met:
13 
14 1. Redistributions of source code must retain the above copyright
15 notice, this list of conditions and the following disclaimer.
16 
17 2. Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20 
21 3. Neither name of copyright holders nor the names of its contributors
22 may be used to endorse or promote products derived from this software
23 without specific prior written permission.
24 
25 
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
30 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 */
38 
39 #include <math.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <ctype.h>
43 #include <float.h>
44 #include <string.h>
45 #include <stdarg.h>
46 #include <limits.h>
47 #include <locale.h>
48 #include "svm.h"
49 int libsvm_version = LIBSVM_VERSION;
50 typedef float Qfloat;
51 typedef signed char schar;
52 #ifndef min
min(T x,T y)53 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
54 #endif
55 #ifndef max
max(T x,T y)56 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
57 #endif
swap(T & x,T & y)58 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
clone(T * & dst,S * src,int n)59 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
60 {
61 	dst = new T[n];
62 	memcpy((void *)dst,(void *)src,sizeof(T)*n);
63 }
powi(double base,int times)64 static inline double powi(double base, int times)
65 {
66 	double tmp = base, ret = 1.0;
67 
68 	for(int t=times; t>0; t/=2)
69 	{
70 		if(t%2==1) ret*=tmp;
71 		tmp = tmp * tmp;
72 	}
73 	return ret;
74 }
75 #define INF HUGE_VAL
76 #define TAU 1e-12
77 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
78 
print_string_stdout(const char * s)79 static void print_string_stdout(const char *s)
80 {
81 	fputs(s,stdout);
82 	fflush(stdout);
83 }
84 static void (*svm_print_string) (const char *) = &print_string_stdout;
85 #if 1
info(const char * fmt,...)86 static void info(const char *fmt,...)
87 {
88 	char buf[BUFSIZ];
89 	va_list ap;
90 	va_start(ap,fmt);
91 	vsprintf(buf,fmt,ap);
92 	va_end(ap);
93 	(*svm_print_string)(buf);
94 }
95 #else
info(const char * fmt,...)96 static void info(const char *fmt,...) {}
97 #endif
98 
99 //
100 // Kernel Cache
101 //
102 // l is the number of total data items
103 // size is the cache size limit in bytes
104 //
105 class Cache
106 {
107 public:
108 	Cache(int l,long int size);
109 	~Cache();
110 
111 	// request data [0,len)
112 	// return some position p where [p,len) need to be filled
113 	// (p >= len if nothing needs to be filled)
114 	int get_data(const int index, Qfloat **data, int len);
115 	void swap_index(int i, int j);
116 private:
117 	int l;
118 	long int size;
119 	struct head_t
120 	{
121 		head_t *prev, *next;	// a circular list
122 		Qfloat *data;
123 		int len;		// data[0,len) is cached in this entry
124 	};
125 
126 	head_t *head;
127 	head_t lru_head;
128 	void lru_delete(head_t *h);
129 	void lru_insert(head_t *h);
130 };
131 
Cache(int l_,long int size_)132 Cache::Cache(int l_,long int size_):l(l_),size(size_)
133 {
134 	head = (head_t *)calloc(l,sizeof(head_t));	// initialized to 0
135 	size /= sizeof(Qfloat);
136 	size -= l * sizeof(head_t) / sizeof(Qfloat);
137 	size = max(size, 2 * (long int) l);	// cache must be large enough for two columns
138 	lru_head.next = lru_head.prev = &lru_head;
139 }
140 
~Cache()141 Cache::~Cache()
142 {
143 	for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
144 		free(h->data);
145 	free(head);
146 }
147 
lru_delete(head_t * h)148 void Cache::lru_delete(head_t *h)
149 {
150 	// delete from current location
151 	h->prev->next = h->next;
152 	h->next->prev = h->prev;
153 }
154 
lru_insert(head_t * h)155 void Cache::lru_insert(head_t *h)
156 {
157 	// insert to last position
158 	h->next = &lru_head;
159 	h->prev = lru_head.prev;
160 	h->prev->next = h;
161 	h->next->prev = h;
162 }
163 
get_data(const int index,Qfloat ** data,int len)164 int Cache::get_data(const int index, Qfloat **data, int len)
165 {
166 	head_t *h = &head[index];
167 	if(h->len) lru_delete(h);
168 	int more = len - h->len;
169 
170 	if(more > 0)
171 	{
172 		// free old space
173 		while(size < more)
174 		{
175 			head_t *old = lru_head.next;
176 			lru_delete(old);
177 			free(old->data);
178 			size += old->len;
179 			old->data = 0;
180 			old->len = 0;
181 		}
182 
183 		// allocate new space
184 		h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
185 		size -= more;
186 		swap(h->len,len);
187 	}
188 
189 	lru_insert(h);
190 	*data = h->data;
191 	return len;
192 }
193 
swap_index(int i,int j)194 void Cache::swap_index(int i, int j)
195 {
196 	if(i==j) return;
197 
198 	if(head[i].len) lru_delete(&head[i]);
199 	if(head[j].len) lru_delete(&head[j]);
200 	swap(head[i].data,head[j].data);
201 	swap(head[i].len,head[j].len);
202 	if(head[i].len) lru_insert(&head[i]);
203 	if(head[j].len) lru_insert(&head[j]);
204 
205 	if(i>j) swap(i,j);
206 	for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
207 	{
208 		if(h->len > i)
209 		{
210 			if(h->len > j)
211 				swap(h->data[i],h->data[j]);
212 			else
213 			{
214 				// give up
215 				lru_delete(h);
216 				free(h->data);
217 				size += h->len;
218 				h->data = 0;
219 				h->len = 0;
220 			}
221 		}
222 	}
223 }
224 
225 //
226 // Kernel evaluation
227 //
228 // the static method k_function is for doing single kernel evaluation
229 // the constructor of Kernel prepares to calculate the l*l kernel matrix
230 // the member function get_Q is for getting one column from the Q Matrix
231 //
232 class QMatrix {
233 public:
234 	virtual Qfloat *get_Q(int column, int len) const = 0;
235 	virtual double *get_QD() const = 0;
236 	virtual void swap_index(int i, int j) const = 0;
~QMatrix()237 	virtual ~QMatrix() {}
238 };
239 
240 class Kernel: public QMatrix {
241 public:
242 	Kernel(int l, svm_node * const * x, const svm_parameter& param);
243 	virtual ~Kernel();
244 
245 	static double k_function(const svm_node *x, const svm_node *y,
246 				 const svm_parameter& param);
247 	virtual Qfloat *get_Q(int column, int len) const = 0;
248 	virtual double *get_QD() const = 0;
swap_index(int i,int j) const249 	virtual void swap_index(int i, int j) const	// no so const...
250 	{
251 		swap(x[i],x[j]);
252 		if(x_square) swap(x_square[i],x_square[j]);
253 	}
254 protected:
255 
256 	double (Kernel::*kernel_function)(int i, int j) const;
257 
258 private:
259 	const svm_node **x;
260 	double *x_square;
261 
262 	// svm_parameter
263 	const int kernel_type;
264 	const int degree;
265 	const double gamma;
266 	const double coef0;
267 
268 	static double dot(const svm_node *px, const svm_node *py);
kernel_linear(int i,int j) const269 	double kernel_linear(int i, int j) const
270 	{
271 		return dot(x[i],x[j]);
272 	}
kernel_poly(int i,int j) const273 	double kernel_poly(int i, int j) const
274 	{
275 		return powi(gamma*dot(x[i],x[j])+coef0,degree);
276 	}
kernel_rbf(int i,int j) const277 	double kernel_rbf(int i, int j) const
278 	{
279 		return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
280 	}
kernel_sigmoid(int i,int j) const281 	double kernel_sigmoid(int i, int j) const
282 	{
283 		return tanh(gamma*dot(x[i],x[j])+coef0);
284 	}
kernel_precomputed(int i,int j) const285 	double kernel_precomputed(int i, int j) const
286 	{
287 		return x[i][(int)(x[j][0].value)].value;
288 	}
289 };
290 
Kernel(int l,svm_node * const * x_,const svm_parameter & param)291 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
292 :kernel_type(param.kernel_type), degree(param.degree),
293  gamma(param.gamma), coef0(param.coef0)
294 {
295 	switch(kernel_type)
296 	{
297 		case LINEAR:
298 			kernel_function = &Kernel::kernel_linear;
299 			break;
300 		case POLY:
301 			kernel_function = &Kernel::kernel_poly;
302 			break;
303 		case RBF:
304 			kernel_function = &Kernel::kernel_rbf;
305 			break;
306 		case SIGMOID:
307 			kernel_function = &Kernel::kernel_sigmoid;
308 			break;
309 		case PRECOMPUTED:
310 			kernel_function = &Kernel::kernel_precomputed;
311 			break;
312 	}
313 
314 	clone(x,x_,l);
315 
316 	if(kernel_type == RBF)
317 	{
318 		x_square = new double[l];
319 		for(int i=0;i<l;i++)
320 			x_square[i] = dot(x[i],x[i]);
321 	}
322 	else
323 		x_square = 0;
324 }
325 
~Kernel()326 Kernel::~Kernel()
327 {
328 	delete[] x;
329 	delete[] x_square;
330 }
331 
dot(const svm_node * px,const svm_node * py)332 double Kernel::dot(const svm_node *px, const svm_node *py)
333 {
334 	double sum = 0;
335 	while(px->index != -1 && py->index != -1)
336 	{
337 		if(px->index == py->index)
338 		{
339 			sum += px->value * py->value;
340 			++px;
341 			++py;
342 		}
343 		else
344 		{
345 			if(px->index > py->index)
346 				++py;
347 			else
348 				++px;
349 		}
350 	}
351 	return sum;
352 }
353 
k_function(const svm_node * x,const svm_node * y,const svm_parameter & param)354 double Kernel::k_function(const svm_node *x, const svm_node *y,
355 			  const svm_parameter& param)
356 {
357 	switch(param.kernel_type)
358 	{
359 		case LINEAR:
360 			return dot(x,y);
361 		case POLY:
362 			return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
363 		case RBF:
364 		{
365 			double sum = 0;
366 			while(x->index != -1 && y->index !=-1)
367 			{
368 				if(x->index == y->index)
369 				{
370 					double d = x->value - y->value;
371 					sum += d*d;
372 					++x;
373 					++y;
374 				}
375 				else
376 				{
377 					if(x->index > y->index)
378 					{
379 						sum += y->value * y->value;
380 						++y;
381 					}
382 					else
383 					{
384 						sum += x->value * x->value;
385 						++x;
386 					}
387 				}
388 			}
389 
390 			while(x->index != -1)
391 			{
392 				sum += x->value * x->value;
393 				++x;
394 			}
395 
396 			while(y->index != -1)
397 			{
398 				sum += y->value * y->value;
399 				++y;
400 			}
401 
402 			return exp(-param.gamma*sum);
403 		}
404 		case SIGMOID:
405 			return tanh(param.gamma*dot(x,y)+param.coef0);
406 		case PRECOMPUTED:  //x: test (validation), y: SV
407 			return x[(int)(y->value)].value;
408 		default:
409 			return 0;  // Unreachable
410 	}
411 }
412 
413 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
414 // Solves:
415 //
416 //	min 0.5(\alpha^T Q \alpha) + p^T \alpha
417 //
418 //		y^T \alpha = \delta
419 //		y_i = +1 or -1
420 //		0 <= alpha_i <= Cp for y_i = 1
421 //		0 <= alpha_i <= Cn for y_i = -1
422 //
423 // Given:
424 //
425 //	Q, p, y, Cp, Cn, and an initial feasible point \alpha
426 //	l is the size of vectors and matrices
427 //	eps is the stopping tolerance
428 //
429 // solution will be put in \alpha, objective value will be put in obj
430 //
431 class Solver {
432 public:
Solver()433 	Solver() {};
~Solver()434 	virtual ~Solver() {};
435 
436 	struct SolutionInfo {
437 		double obj;
438 		double rho;
439 		double upper_bound_p;
440 		double upper_bound_n;
441 		double r;	// for Solver_NU
442 	};
443 
444 	void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
445 		   double *alpha_, double Cp, double Cn, double eps,
446 		   SolutionInfo* si, int shrinking);
447 protected:
448 	int active_size;
449 	schar *y;
450 	double *G;		// gradient of objective function
451 	enum { LOWER_BOUND, UPPER_BOUND, FREE };
452 	char *alpha_status;	// LOWER_BOUND, UPPER_BOUND, FREE
453 	double *alpha;
454 	const QMatrix *Q;
455 	const double *QD;
456 	double eps;
457 	double Cp,Cn;
458 	double *p;
459 	int *active_set;
460 	double *G_bar;		// gradient, if we treat free variables as 0
461 	int l;
462 	bool unshrink;	// XXX
463 
get_C(int i)464 	double get_C(int i)
465 	{
466 		return (y[i] > 0)? Cp : Cn;
467 	}
update_alpha_status(int i)468 	void update_alpha_status(int i)
469 	{
470 		if(alpha[i] >= get_C(i))
471 			alpha_status[i] = UPPER_BOUND;
472 		else if(alpha[i] <= 0)
473 			alpha_status[i] = LOWER_BOUND;
474 		else alpha_status[i] = FREE;
475 	}
is_upper_bound(int i)476 	bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
is_lower_bound(int i)477 	bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
is_free(int i)478 	bool is_free(int i) { return alpha_status[i] == FREE; }
479 	void swap_index(int i, int j);
480 	void reconstruct_gradient();
481 	virtual int select_working_set(int &i, int &j);
482 	virtual double calculate_rho();
483 	virtual void do_shrinking();
484 private:
485 	bool be_shrunk(int i, double Gmax1, double Gmax2);
486 };
487 
swap_index(int i,int j)488 void Solver::swap_index(int i, int j)
489 {
490 	Q->swap_index(i,j);
491 	swap(y[i],y[j]);
492 	swap(G[i],G[j]);
493 	swap(alpha_status[i],alpha_status[j]);
494 	swap(alpha[i],alpha[j]);
495 	swap(p[i],p[j]);
496 	swap(active_set[i],active_set[j]);
497 	swap(G_bar[i],G_bar[j]);
498 }
499 
reconstruct_gradient()500 void Solver::reconstruct_gradient()
501 {
502 	// reconstruct inactive elements of G from G_bar and free variables
503 
504 	if(active_size == l) return;
505 
506 	int i,j;
507 	int nr_free = 0;
508 
509 	for(j=active_size;j<l;j++)
510 		G[j] = G_bar[j] + p[j];
511 
512 	for(j=0;j<active_size;j++)
513 		if(is_free(j))
514 			nr_free++;
515 
516 	if(2*nr_free < active_size)
517 		info("\nWARNING: using -h 0 may be faster\n");
518 
519 	if (nr_free*l > 2*active_size*(l-active_size))
520 	{
521 		for(i=active_size;i<l;i++)
522 		{
523 			const Qfloat *Q_i = Q->get_Q(i,active_size);
524 			for(j=0;j<active_size;j++)
525 				if(is_free(j))
526 					G[i] += alpha[j] * Q_i[j];
527 		}
528 	}
529 	else
530 	{
531 		for(i=0;i<active_size;i++)
532 			if(is_free(i))
533 			{
534 				const Qfloat *Q_i = Q->get_Q(i,l);
535 				double alpha_i = alpha[i];
536 				for(j=active_size;j<l;j++)
537 					G[j] += alpha_i * Q_i[j];
538 			}
539 	}
540 }
541 
Solve(int l,const QMatrix & Q,const double * p_,const schar * y_,double * alpha_,double Cp,double Cn,double eps,SolutionInfo * si,int shrinking)542 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
543 		   double *alpha_, double Cp, double Cn, double eps,
544 		   SolutionInfo* si, int shrinking)
545 {
546 	this->l = l;
547 	this->Q = &Q;
548 	QD=Q.get_QD();
549 	clone(p, p_,l);
550 	clone(y, y_,l);
551 	clone(alpha,alpha_,l);
552 	this->Cp = Cp;
553 	this->Cn = Cn;
554 	this->eps = eps;
555 	unshrink = false;
556 
557 	// initialize alpha_status
558 	{
559 		alpha_status = new char[l];
560 		for(int i=0;i<l;i++)
561 			update_alpha_status(i);
562 	}
563 
564 	// initialize active set (for shrinking)
565 	{
566 		active_set = new int[l];
567 		for(int i=0;i<l;i++)
568 			active_set[i] = i;
569 		active_size = l;
570 	}
571 
572 	// initialize gradient
573 	{
574 		G = new double[l];
575 		G_bar = new double[l];
576 		int i;
577 		for(i=0;i<l;i++)
578 		{
579 			G[i] = p[i];
580 			G_bar[i] = 0;
581 		}
582 		for(i=0;i<l;i++)
583 			if(!is_lower_bound(i))
584 			{
585 				const Qfloat *Q_i = Q.get_Q(i,l);
586 				double alpha_i = alpha[i];
587 				int j;
588 				for(j=0;j<l;j++)
589 					G[j] += alpha_i*Q_i[j];
590 				if(is_upper_bound(i))
591 					for(j=0;j<l;j++)
592 						G_bar[j] += get_C(i) * Q_i[j];
593 			}
594 	}
595 
596 	// optimization step
597 
598 	int iter = 0;
599 	int max_iter = max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
600 	int counter = min(l,1000)+1;
601 
602 	while(iter < max_iter)
603 	{
604 		// show progress and do shrinking
605 
606 		if(--counter == 0)
607 		{
608 			counter = min(l,1000);
609 			if(shrinking) do_shrinking();
610 			info(".");
611 		}
612 
613 		int i,j;
614 		if(select_working_set(i,j)!=0)
615 		{
616 			// reconstruct the whole gradient
617 			reconstruct_gradient();
618 			// reset active set size and check
619 			active_size = l;
620 			info("*");
621 			if(select_working_set(i,j)!=0)
622 				break;
623 			else
624 				counter = 1;	// do shrinking next iteration
625 		}
626 
627 		++iter;
628 
629 		// update alpha[i] and alpha[j], handle bounds carefully
630 
631 		const Qfloat *Q_i = Q.get_Q(i,active_size);
632 		const Qfloat *Q_j = Q.get_Q(j,active_size);
633 
634 		double C_i = get_C(i);
635 		double C_j = get_C(j);
636 
637 		double old_alpha_i = alpha[i];
638 		double old_alpha_j = alpha[j];
639 
640 		if(y[i]!=y[j])
641 		{
642 			double quad_coef = QD[i]+QD[j]+2*Q_i[j];
643 			if (quad_coef <= 0)
644 				quad_coef = TAU;
645 			double delta = (-G[i]-G[j])/quad_coef;
646 			double diff = alpha[i] - alpha[j];
647 			alpha[i] += delta;
648 			alpha[j] += delta;
649 
650 			if(diff > 0)
651 			{
652 				if(alpha[j] < 0)
653 				{
654 					alpha[j] = 0;
655 					alpha[i] = diff;
656 				}
657 			}
658 			else
659 			{
660 				if(alpha[i] < 0)
661 				{
662 					alpha[i] = 0;
663 					alpha[j] = -diff;
664 				}
665 			}
666 			if(diff > C_i - C_j)
667 			{
668 				if(alpha[i] > C_i)
669 				{
670 					alpha[i] = C_i;
671 					alpha[j] = C_i - diff;
672 				}
673 			}
674 			else
675 			{
676 				if(alpha[j] > C_j)
677 				{
678 					alpha[j] = C_j;
679 					alpha[i] = C_j + diff;
680 				}
681 			}
682 		}
683 		else
684 		{
685 			double quad_coef = QD[i]+QD[j]-2*Q_i[j];
686 			if (quad_coef <= 0)
687 				quad_coef = TAU;
688 			double delta = (G[i]-G[j])/quad_coef;
689 			double sum = alpha[i] + alpha[j];
690 			alpha[i] -= delta;
691 			alpha[j] += delta;
692 
693 			if(sum > C_i)
694 			{
695 				if(alpha[i] > C_i)
696 				{
697 					alpha[i] = C_i;
698 					alpha[j] = sum - C_i;
699 				}
700 			}
701 			else
702 			{
703 				if(alpha[j] < 0)
704 				{
705 					alpha[j] = 0;
706 					alpha[i] = sum;
707 				}
708 			}
709 			if(sum > C_j)
710 			{
711 				if(alpha[j] > C_j)
712 				{
713 					alpha[j] = C_j;
714 					alpha[i] = sum - C_j;
715 				}
716 			}
717 			else
718 			{
719 				if(alpha[i] < 0)
720 				{
721 					alpha[i] = 0;
722 					alpha[j] = sum;
723 				}
724 			}
725 		}
726 
727 		// update G
728 
729 		double delta_alpha_i = alpha[i] - old_alpha_i;
730 		double delta_alpha_j = alpha[j] - old_alpha_j;
731 
732 		for(int k=0;k<active_size;k++)
733 		{
734 			G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
735 		}
736 
737 		// update alpha_status and G_bar
738 
739 		{
740 			bool ui = is_upper_bound(i);
741 			bool uj = is_upper_bound(j);
742 			update_alpha_status(i);
743 			update_alpha_status(j);
744 			int k;
745 			if(ui != is_upper_bound(i))
746 			{
747 				Q_i = Q.get_Q(i,l);
748 				if(ui)
749 					for(k=0;k<l;k++)
750 						G_bar[k] -= C_i * Q_i[k];
751 				else
752 					for(k=0;k<l;k++)
753 						G_bar[k] += C_i * Q_i[k];
754 			}
755 
756 			if(uj != is_upper_bound(j))
757 			{
758 				Q_j = Q.get_Q(j,l);
759 				if(uj)
760 					for(k=0;k<l;k++)
761 						G_bar[k] -= C_j * Q_j[k];
762 				else
763 					for(k=0;k<l;k++)
764 						G_bar[k] += C_j * Q_j[k];
765 			}
766 		}
767 	}
768 
769 	if(iter >= max_iter)
770 	{
771 		if(active_size < l)
772 		{
773 			// reconstruct the whole gradient to calculate objective value
774 			reconstruct_gradient();
775 			active_size = l;
776 			info("*");
777 		}
778 		fprintf(stderr,"\nWARNING: reaching max number of iterations\n");
779 	}
780 
781 	// calculate rho
782 
783 	si->rho = calculate_rho();
784 
785 	// calculate objective value
786 	{
787 		double v = 0;
788 		int i;
789 		for(i=0;i<l;i++)
790 			v += alpha[i] * (G[i] + p[i]);
791 
792 		si->obj = v/2;
793 	}
794 
795 	// put back the solution
796 	{
797 		for(int i=0;i<l;i++)
798 			alpha_[active_set[i]] = alpha[i];
799 	}
800 
801 	// juggle everything back
802 	/*{
803 		for(int i=0;i<l;i++)
804 			while(active_set[i] != i)
805 				swap_index(i,active_set[i]);
806 				// or Q.swap_index(i,active_set[i]);
807 	}*/
808 
809 	si->upper_bound_p = Cp;
810 	si->upper_bound_n = Cn;
811 
812 	info("\noptimization finished, #iter = %d\n",iter);
813 
814 	delete[] p;
815 	delete[] y;
816 	delete[] alpha;
817 	delete[] alpha_status;
818 	delete[] active_set;
819 	delete[] G;
820 	delete[] G_bar;
821 }
822 
823 // return 1 if already optimal, return 0 otherwise
select_working_set(int & out_i,int & out_j)824 int Solver::select_working_set(int &out_i, int &out_j)
825 {
826 	// return i,j such that
827 	// i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
828 	// j: minimizes the decrease of obj value
829 	//    (if quadratic coefficeint <= 0, replace it with tau)
830 	//    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
831 
832 	double Gmax = -INF;
833 	double Gmax2 = -INF;
834 	int Gmax_idx = -1;
835 	int Gmin_idx = -1;
836 	double obj_diff_min = INF;
837 
838 	for(int t=0;t<active_size;t++)
839 		if(y[t]==+1)
840 		{
841 			if(!is_upper_bound(t))
842 				if(-G[t] >= Gmax)
843 				{
844 					Gmax = -G[t];
845 					Gmax_idx = t;
846 				}
847 		}
848 		else
849 		{
850 			if(!is_lower_bound(t))
851 				if(G[t] >= Gmax)
852 				{
853 					Gmax = G[t];
854 					Gmax_idx = t;
855 				}
856 		}
857 
858 	int i = Gmax_idx;
859 	const Qfloat *Q_i = NULL;
860 	if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
861 		Q_i = Q->get_Q(i,active_size);
862 
863 	for(int j=0;j<active_size;j++)
864 	{
865 		if(y[j]==+1)
866 		{
867 			if (!is_lower_bound(j))
868 			{
869 				double grad_diff=Gmax+G[j];
870 				if (G[j] >= Gmax2)
871 					Gmax2 = G[j];
872 				if (grad_diff > 0)
873 				{
874 					double obj_diff;
875 					double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
876 					if (quad_coef > 0)
877 						obj_diff = -(grad_diff*grad_diff)/quad_coef;
878 					else
879 						obj_diff = -(grad_diff*grad_diff)/TAU;
880 
881 					if (obj_diff <= obj_diff_min)
882 					{
883 						Gmin_idx=j;
884 						obj_diff_min = obj_diff;
885 					}
886 				}
887 			}
888 		}
889 		else
890 		{
891 			if (!is_upper_bound(j))
892 			{
893 				double grad_diff= Gmax-G[j];
894 				if (-G[j] >= Gmax2)
895 					Gmax2 = -G[j];
896 				if (grad_diff > 0)
897 				{
898 					double obj_diff;
899 					double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
900 					if (quad_coef > 0)
901 						obj_diff = -(grad_diff*grad_diff)/quad_coef;
902 					else
903 						obj_diff = -(grad_diff*grad_diff)/TAU;
904 
905 					if (obj_diff <= obj_diff_min)
906 					{
907 						Gmin_idx=j;
908 						obj_diff_min = obj_diff;
909 					}
910 				}
911 			}
912 		}
913 	}
914 
915 	if(Gmax+Gmax2 < eps || Gmin_idx == -1)
916 		return 1;
917 
918 	out_i = Gmax_idx;
919 	out_j = Gmin_idx;
920 	return 0;
921 }
922 
be_shrunk(int i,double Gmax1,double Gmax2)923 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
924 {
925 	if(is_upper_bound(i))
926 	{
927 		if(y[i]==+1)
928 			return(-G[i] > Gmax1);
929 		else
930 			return(-G[i] > Gmax2);
931 	}
932 	else if(is_lower_bound(i))
933 	{
934 		if(y[i]==+1)
935 			return(G[i] > Gmax2);
936 		else
937 			return(G[i] > Gmax1);
938 	}
939 	else
940 		return(false);
941 }
942 
do_shrinking()943 void Solver::do_shrinking()
944 {
945 	int i;
946 	double Gmax1 = -INF;		// max { -y_i * grad(f)_i | i in I_up(\alpha) }
947 	double Gmax2 = -INF;		// max { y_i * grad(f)_i | i in I_low(\alpha) }
948 
949 	// find maximal violating pair first
950 	for(i=0;i<active_size;i++)
951 	{
952 		if(y[i]==+1)
953 		{
954 			if(!is_upper_bound(i))
955 			{
956 				if(-G[i] >= Gmax1)
957 					Gmax1 = -G[i];
958 			}
959 			if(!is_lower_bound(i))
960 			{
961 				if(G[i] >= Gmax2)
962 					Gmax2 = G[i];
963 			}
964 		}
965 		else
966 		{
967 			if(!is_upper_bound(i))
968 			{
969 				if(-G[i] >= Gmax2)
970 					Gmax2 = -G[i];
971 			}
972 			if(!is_lower_bound(i))
973 			{
974 				if(G[i] >= Gmax1)
975 					Gmax1 = G[i];
976 			}
977 		}
978 	}
979 
980 	if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
981 	{
982 		unshrink = true;
983 		reconstruct_gradient();
984 		active_size = l;
985 		info("*");
986 	}
987 
988 	for(i=0;i<active_size;i++)
989 		if (be_shrunk(i, Gmax1, Gmax2))
990 		{
991 			active_size--;
992 			while (active_size > i)
993 			{
994 				if (!be_shrunk(active_size, Gmax1, Gmax2))
995 				{
996 					swap_index(i,active_size);
997 					break;
998 				}
999 				active_size--;
1000 			}
1001 		}
1002 }
1003 
calculate_rho()1004 double Solver::calculate_rho()
1005 {
1006 	double r;
1007 	int nr_free = 0;
1008 	double ub = INF, lb = -INF, sum_free = 0;
1009 	for(int i=0;i<active_size;i++)
1010 	{
1011 		double yG = y[i]*G[i];
1012 
1013 		if(is_upper_bound(i))
1014 		{
1015 			if(y[i]==-1)
1016 				ub = min(ub,yG);
1017 			else
1018 				lb = max(lb,yG);
1019 		}
1020 		else if(is_lower_bound(i))
1021 		{
1022 			if(y[i]==+1)
1023 				ub = min(ub,yG);
1024 			else
1025 				lb = max(lb,yG);
1026 		}
1027 		else
1028 		{
1029 			++nr_free;
1030 			sum_free += yG;
1031 		}
1032 	}
1033 
1034 	if(nr_free>0)
1035 		r = sum_free/nr_free;
1036 	else
1037 		r = (ub+lb)/2;
1038 
1039 	return r;
1040 }
1041 
1042 //
1043 // Solver for nu-svm classification and regression
1044 //
1045 // additional constraint: e^T \alpha = constant
1046 //
1047 class Solver_NU: public Solver
1048 {
1049 public:
Solver_NU()1050 	Solver_NU() {}
Solve(int l,const QMatrix & Q,const double * p,const schar * y,double * alpha,double Cp,double Cn,double eps,SolutionInfo * si,int shrinking)1051 	void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
1052 		   double *alpha, double Cp, double Cn, double eps,
1053 		   SolutionInfo* si, int shrinking)
1054 	{
1055 		this->si = si;
1056 		Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
1057 	}
1058 private:
1059 	SolutionInfo *si;
1060 	int select_working_set(int &i, int &j);
1061 	double calculate_rho();
1062 	bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
1063 	void do_shrinking();
1064 };
1065 
1066 // return 1 if already optimal, return 0 otherwise
select_working_set(int & out_i,int & out_j)1067 int Solver_NU::select_working_set(int &out_i, int &out_j)
1068 {
1069 	// return i,j such that y_i = y_j and
1070 	// i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
1071 	// j: minimizes the decrease of obj value
1072 	//    (if quadratic coefficeint <= 0, replace it with tau)
1073 	//    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
1074 
1075 	double Gmaxp = -INF;
1076 	double Gmaxp2 = -INF;
1077 	int Gmaxp_idx = -1;
1078 
1079 	double Gmaxn = -INF;
1080 	double Gmaxn2 = -INF;
1081 	int Gmaxn_idx = -1;
1082 
1083 	int Gmin_idx = -1;
1084 	double obj_diff_min = INF;
1085 
1086 	for(int t=0;t<active_size;t++)
1087 		if(y[t]==+1)
1088 		{
1089 			if(!is_upper_bound(t))
1090 				if(-G[t] >= Gmaxp)
1091 				{
1092 					Gmaxp = -G[t];
1093 					Gmaxp_idx = t;
1094 				}
1095 		}
1096 		else
1097 		{
1098 			if(!is_lower_bound(t))
1099 				if(G[t] >= Gmaxn)
1100 				{
1101 					Gmaxn = G[t];
1102 					Gmaxn_idx = t;
1103 				}
1104 		}
1105 
1106 	int ip = Gmaxp_idx;
1107 	int in = Gmaxn_idx;
1108 	const Qfloat *Q_ip = NULL;
1109 	const Qfloat *Q_in = NULL;
1110 	if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1111 		Q_ip = Q->get_Q(ip,active_size);
1112 	if(in != -1)
1113 		Q_in = Q->get_Q(in,active_size);
1114 
1115 	for(int j=0;j<active_size;j++)
1116 	{
1117 		if(y[j]==+1)
1118 		{
1119 			if (!is_lower_bound(j))
1120 			{
1121 				double grad_diff=Gmaxp+G[j];
1122 				if (G[j] >= Gmaxp2)
1123 					Gmaxp2 = G[j];
1124 				if (grad_diff > 0)
1125 				{
1126 					double obj_diff;
1127 					double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
1128 					if (quad_coef > 0)
1129 						obj_diff = -(grad_diff*grad_diff)/quad_coef;
1130 					else
1131 						obj_diff = -(grad_diff*grad_diff)/TAU;
1132 
1133 					if (obj_diff <= obj_diff_min)
1134 					{
1135 						Gmin_idx=j;
1136 						obj_diff_min = obj_diff;
1137 					}
1138 				}
1139 			}
1140 		}
1141 		else
1142 		{
1143 			if (!is_upper_bound(j))
1144 			{
1145 				double grad_diff=Gmaxn-G[j];
1146 				if (-G[j] >= Gmaxn2)
1147 					Gmaxn2 = -G[j];
1148 				if (grad_diff > 0)
1149 				{
1150 					double obj_diff;
1151 					double quad_coef = QD[in]+QD[j]-2*Q_in[j];
1152 					if (quad_coef > 0)
1153 						obj_diff = -(grad_diff*grad_diff)/quad_coef;
1154 					else
1155 						obj_diff = -(grad_diff*grad_diff)/TAU;
1156 
1157 					if (obj_diff <= obj_diff_min)
1158 					{
1159 						Gmin_idx=j;
1160 						obj_diff_min = obj_diff;
1161 					}
1162 				}
1163 			}
1164 		}
1165 	}
1166 
1167 	if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps || Gmin_idx == -1)
1168 		return 1;
1169 
1170 	if (y[Gmin_idx] == +1)
1171 		out_i = Gmaxp_idx;
1172 	else
1173 		out_i = Gmaxn_idx;
1174 	out_j = Gmin_idx;
1175 
1176 	return 0;
1177 }
1178 
be_shrunk(int i,double Gmax1,double Gmax2,double Gmax3,double Gmax4)1179 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
1180 {
1181 	if(is_upper_bound(i))
1182 	{
1183 		if(y[i]==+1)
1184 			return(-G[i] > Gmax1);
1185 		else
1186 			return(-G[i] > Gmax4);
1187 	}
1188 	else if(is_lower_bound(i))
1189 	{
1190 		if(y[i]==+1)
1191 			return(G[i] > Gmax2);
1192 		else
1193 			return(G[i] > Gmax3);
1194 	}
1195 	else
1196 		return(false);
1197 }
1198 
do_shrinking()1199 void Solver_NU::do_shrinking()
1200 {
1201 	double Gmax1 = -INF;	// max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
1202 	double Gmax2 = -INF;	// max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
1203 	double Gmax3 = -INF;	// max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
1204 	double Gmax4 = -INF;	// max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
1205 
1206 	// find maximal violating pair first
1207 	int i;
1208 	for(i=0;i<active_size;i++)
1209 	{
1210 		if(!is_upper_bound(i))
1211 		{
1212 			if(y[i]==+1)
1213 			{
1214 				if(-G[i] > Gmax1) Gmax1 = -G[i];
1215 			}
1216 			else	if(-G[i] > Gmax4) Gmax4 = -G[i];
1217 		}
1218 		if(!is_lower_bound(i))
1219 		{
1220 			if(y[i]==+1)
1221 			{
1222 				if(G[i] > Gmax2) Gmax2 = G[i];
1223 			}
1224 			else	if(G[i] > Gmax3) Gmax3 = G[i];
1225 		}
1226 	}
1227 
1228 	if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
1229 	{
1230 		unshrink = true;
1231 		reconstruct_gradient();
1232 		active_size = l;
1233 	}
1234 
1235 	for(i=0;i<active_size;i++)
1236 		if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1237 		{
1238 			active_size--;
1239 			while (active_size > i)
1240 			{
1241 				if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1242 				{
1243 					swap_index(i,active_size);
1244 					break;
1245 				}
1246 				active_size--;
1247 			}
1248 		}
1249 }
1250 
calculate_rho()1251 double Solver_NU::calculate_rho()
1252 {
1253 	int nr_free1 = 0,nr_free2 = 0;
1254 	double ub1 = INF, ub2 = INF;
1255 	double lb1 = -INF, lb2 = -INF;
1256 	double sum_free1 = 0, sum_free2 = 0;
1257 
1258 	for(int i=0;i<active_size;i++)
1259 	{
1260 		if(y[i]==+1)
1261 		{
1262 			if(is_upper_bound(i))
1263 				lb1 = max(lb1,G[i]);
1264 			else if(is_lower_bound(i))
1265 				ub1 = min(ub1,G[i]);
1266 			else
1267 			{
1268 				++nr_free1;
1269 				sum_free1 += G[i];
1270 			}
1271 		}
1272 		else
1273 		{
1274 			if(is_upper_bound(i))
1275 				lb2 = max(lb2,G[i]);
1276 			else if(is_lower_bound(i))
1277 				ub2 = min(ub2,G[i]);
1278 			else
1279 			{
1280 				++nr_free2;
1281 				sum_free2 += G[i];
1282 			}
1283 		}
1284 	}
1285 
1286 	double r1,r2;
1287 	if(nr_free1 > 0)
1288 		r1 = sum_free1/nr_free1;
1289 	else
1290 		r1 = (ub1+lb1)/2;
1291 
1292 	if(nr_free2 > 0)
1293 		r2 = sum_free2/nr_free2;
1294 	else
1295 		r2 = (ub2+lb2)/2;
1296 
1297 	si->r = (r1+r2)/2;
1298 	return (r1-r2)/2;
1299 }
1300 
1301 //
1302 // Q matrices for various formulations
1303 //
1304 class SVC_Q: public Kernel
1305 {
1306 public:
SVC_Q(const svm_problem & prob,const svm_parameter & param,const schar * y_)1307 	SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
1308 	:Kernel(prob.l, prob.x, param)
1309 	{
1310 		clone(y,y_,prob.l);
1311 		cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1312 		QD = new double[prob.l];
1313 		for(int i=0;i<prob.l;i++)
1314 			QD[i] = (this->*kernel_function)(i,i);
1315 	}
1316 
get_Q(int i,int len) const1317 	Qfloat *get_Q(int i, int len) const
1318 	{
1319 		Qfloat *data;
1320 		int start, j;
1321 		if((start = cache->get_data(i,&data,len)) < len)
1322 		{
1323 			for(j=start;j<len;j++)
1324 				data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
1325 		}
1326 		return data;
1327 	}
1328 
get_QD() const1329 	double *get_QD() const
1330 	{
1331 		return QD;
1332 	}
1333 
swap_index(int i,int j) const1334 	void swap_index(int i, int j) const
1335 	{
1336 		cache->swap_index(i,j);
1337 		Kernel::swap_index(i,j);
1338 		swap(y[i],y[j]);
1339 		swap(QD[i],QD[j]);
1340 	}
1341 
~SVC_Q()1342 	~SVC_Q()
1343 	{
1344 		delete[] y;
1345 		delete cache;
1346 		delete[] QD;
1347 	}
1348 private:
1349 	schar *y;
1350 	Cache *cache;
1351 	double *QD;
1352 };
1353 
1354 class ONE_CLASS_Q: public Kernel
1355 {
1356 public:
ONE_CLASS_Q(const svm_problem & prob,const svm_parameter & param)1357 	ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
1358 	:Kernel(prob.l, prob.x, param)
1359 	{
1360 		cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1361 		QD = new double[prob.l];
1362 		for(int i=0;i<prob.l;i++)
1363 			QD[i] = (this->*kernel_function)(i,i);
1364 	}
1365 
get_Q(int i,int len) const1366 	Qfloat *get_Q(int i, int len) const
1367 	{
1368 		Qfloat *data;
1369 		int start, j;
1370 		if((start = cache->get_data(i,&data,len)) < len)
1371 		{
1372 			for(j=start;j<len;j++)
1373 				data[j] = (Qfloat)(this->*kernel_function)(i,j);
1374 		}
1375 		return data;
1376 	}
1377 
get_QD() const1378 	double *get_QD() const
1379 	{
1380 		return QD;
1381 	}
1382 
swap_index(int i,int j) const1383 	void swap_index(int i, int j) const
1384 	{
1385 		cache->swap_index(i,j);
1386 		Kernel::swap_index(i,j);
1387 		swap(QD[i],QD[j]);
1388 	}
1389 
~ONE_CLASS_Q()1390 	~ONE_CLASS_Q()
1391 	{
1392 		delete cache;
1393 		delete[] QD;
1394 	}
1395 private:
1396 	Cache *cache;
1397 	double *QD;
1398 };
1399 
1400 class SVR_Q: public Kernel
1401 {
1402 public:
SVR_Q(const svm_problem & prob,const svm_parameter & param)1403 	SVR_Q(const svm_problem& prob, const svm_parameter& param)
1404 	:Kernel(prob.l, prob.x, param)
1405 	{
1406 		l = prob.l;
1407 		cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
1408 		QD = new double[2*l];
1409 		sign = new schar[2*l];
1410 		index = new int[2*l];
1411 		for(int k=0;k<l;k++)
1412 		{
1413 			sign[k] = 1;
1414 			sign[k+l] = -1;
1415 			index[k] = k;
1416 			index[k+l] = k;
1417 			QD[k] = (this->*kernel_function)(k,k);
1418 			QD[k+l] = QD[k];
1419 		}
1420 		buffer[0] = new Qfloat[2*l];
1421 		buffer[1] = new Qfloat[2*l];
1422 		next_buffer = 0;
1423 	}
1424 
swap_index(int i,int j) const1425 	void swap_index(int i, int j) const
1426 	{
1427 		swap(sign[i],sign[j]);
1428 		swap(index[i],index[j]);
1429 		swap(QD[i],QD[j]);
1430 	}
1431 
get_Q(int i,int len) const1432 	Qfloat *get_Q(int i, int len) const
1433 	{
1434 		Qfloat *data;
1435 		int j, real_i = index[i];
1436 		if(cache->get_data(real_i,&data,l) < l)
1437 		{
1438 			for(j=0;j<l;j++)
1439 				data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
1440 		}
1441 
1442 		// reorder and copy
1443 		Qfloat *buf = buffer[next_buffer];
1444 		next_buffer = 1 - next_buffer;
1445 		schar si = sign[i];
1446 		for(j=0;j<len;j++)
1447 			buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
1448 		return buf;
1449 	}
1450 
get_QD() const1451 	double *get_QD() const
1452 	{
1453 		return QD;
1454 	}
1455 
~SVR_Q()1456 	~SVR_Q()
1457 	{
1458 		delete cache;
1459 		delete[] sign;
1460 		delete[] index;
1461 		delete[] buffer[0];
1462 		delete[] buffer[1];
1463 		delete[] QD;
1464 	}
1465 private:
1466 	int l;
1467 	Cache *cache;
1468 	schar *sign;
1469 	int *index;
1470 	mutable int next_buffer;
1471 	Qfloat *buffer[2];
1472 	double *QD;
1473 };
1474 
1475 //
1476 // construct and solve various formulations
1477 //
solve_c_svc(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si,double Cp,double Cn)1478 static void solve_c_svc(
1479 	const svm_problem *prob, const svm_parameter* param,
1480 	double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
1481 {
1482 	int l = prob->l;
1483 	double *minus_ones = new double[l];
1484 	schar *y = new schar[l];
1485 
1486 	int i;
1487 
1488 	for(i=0;i<l;i++)
1489 	{
1490 		alpha[i] = 0;
1491 		minus_ones[i] = -1;
1492 		if(prob->y[i] > 0) y[i] = +1; else y[i] = -1;
1493 	}
1494 
1495 	Solver s;
1496 	s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
1497 		alpha, Cp, Cn, param->eps, si, param->shrinking);
1498 
1499 	double sum_alpha=0;
1500 	for(i=0;i<l;i++)
1501 		sum_alpha += alpha[i];
1502 
1503 	if (Cp==Cn)
1504 		info("nu = %f\n", sum_alpha/(Cp*prob->l));
1505 
1506 	for(i=0;i<l;i++)
1507 		alpha[i] *= y[i];
1508 
1509 	delete[] minus_ones;
1510 	delete[] y;
1511 }
1512 
solve_nu_svc(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si)1513 static void solve_nu_svc(
1514 	const svm_problem *prob, const svm_parameter *param,
1515 	double *alpha, Solver::SolutionInfo* si)
1516 {
1517 	int i;
1518 	int l = prob->l;
1519 	double nu = param->nu;
1520 
1521 	schar *y = new schar[l];
1522 
1523 	for(i=0;i<l;i++)
1524 		if(prob->y[i]>0)
1525 			y[i] = +1;
1526 		else
1527 			y[i] = -1;
1528 
1529 	double sum_pos = nu*l/2;
1530 	double sum_neg = nu*l/2;
1531 
1532 	for(i=0;i<l;i++)
1533 		if(y[i] == +1)
1534 		{
1535 			alpha[i] = min(1.0,sum_pos);
1536 			sum_pos -= alpha[i];
1537 		}
1538 		else
1539 		{
1540 			alpha[i] = min(1.0,sum_neg);
1541 			sum_neg -= alpha[i];
1542 		}
1543 
1544 	double *zeros = new double[l];
1545 
1546 	for(i=0;i<l;i++)
1547 		zeros[i] = 0;
1548 
1549 	Solver_NU s;
1550 	s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
1551 		alpha, 1.0, 1.0, param->eps, si,  param->shrinking);
1552 	double r = si->r;
1553 
1554 	info("C = %f\n",1/r);
1555 
1556 	for(i=0;i<l;i++)
1557 		alpha[i] *= y[i]/r;
1558 
1559 	si->rho /= r;
1560 	si->obj /= (r*r);
1561 	si->upper_bound_p = 1/r;
1562 	si->upper_bound_n = 1/r;
1563 
1564 	delete[] y;
1565 	delete[] zeros;
1566 }
1567 
solve_one_class(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si)1568 static void solve_one_class(
1569 	const svm_problem *prob, const svm_parameter *param,
1570 	double *alpha, Solver::SolutionInfo* si)
1571 {
1572 	int l = prob->l;
1573 	double *zeros = new double[l];
1574 	schar *ones = new schar[l];
1575 	int i;
1576 
1577 	int n = (int)(param->nu*prob->l);	// # of alpha's at upper bound
1578 
1579 	for(i=0;i<n;i++)
1580 		alpha[i] = 1;
1581 	if(n<prob->l)
1582 		alpha[n] = param->nu * prob->l - n;
1583 	for(i=n+1;i<l;i++)
1584 		alpha[i] = 0;
1585 
1586 	for(i=0;i<l;i++)
1587 	{
1588 		zeros[i] = 0;
1589 		ones[i] = 1;
1590 	}
1591 
1592 	Solver s;
1593 	s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
1594 		alpha, 1.0, 1.0, param->eps, si, param->shrinking);
1595 
1596 	delete[] zeros;
1597 	delete[] ones;
1598 }
1599 
solve_epsilon_svr(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si)1600 static void solve_epsilon_svr(
1601 	const svm_problem *prob, const svm_parameter *param,
1602 	double *alpha, Solver::SolutionInfo* si)
1603 {
1604 	int l = prob->l;
1605 	double *alpha2 = new double[2*l];
1606 	double *linear_term = new double[2*l];
1607 	schar *y = new schar[2*l];
1608 	int i;
1609 
1610 	for(i=0;i<l;i++)
1611 	{
1612 		alpha2[i] = 0;
1613 		linear_term[i] = param->p - prob->y[i];
1614 		y[i] = 1;
1615 
1616 		alpha2[i+l] = 0;
1617 		linear_term[i+l] = param->p + prob->y[i];
1618 		y[i+l] = -1;
1619 	}
1620 
1621 	Solver s;
1622 	s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1623 		alpha2, param->C, param->C, param->eps, si, param->shrinking);
1624 
1625 	double sum_alpha = 0;
1626 	for(i=0;i<l;i++)
1627 	{
1628 		alpha[i] = alpha2[i] - alpha2[i+l];
1629 		sum_alpha += fabs(alpha[i]);
1630 	}
1631 	info("nu = %f\n",sum_alpha/(param->C*l));
1632 
1633 	delete[] alpha2;
1634 	delete[] linear_term;
1635 	delete[] y;
1636 }
1637 
solve_nu_svr(const svm_problem * prob,const svm_parameter * param,double * alpha,Solver::SolutionInfo * si)1638 static void solve_nu_svr(
1639 	const svm_problem *prob, const svm_parameter *param,
1640 	double *alpha, Solver::SolutionInfo* si)
1641 {
1642 	int l = prob->l;
1643 	double C = param->C;
1644 	double *alpha2 = new double[2*l];
1645 	double *linear_term = new double[2*l];
1646 	schar *y = new schar[2*l];
1647 	int i;
1648 
1649 	double sum = C * param->nu * l / 2;
1650 	for(i=0;i<l;i++)
1651 	{
1652 		alpha2[i] = alpha2[i+l] = min(sum,C);
1653 		sum -= alpha2[i];
1654 
1655 		linear_term[i] = - prob->y[i];
1656 		y[i] = 1;
1657 
1658 		linear_term[i+l] = prob->y[i];
1659 		y[i+l] = -1;
1660 	}
1661 
1662 	Solver_NU s;
1663 	s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1664 		alpha2, C, C, param->eps, si, param->shrinking);
1665 
1666 	info("epsilon = %f\n",-si->r);
1667 
1668 	for(i=0;i<l;i++)
1669 		alpha[i] = alpha2[i] - alpha2[i+l];
1670 
1671 	delete[] alpha2;
1672 	delete[] linear_term;
1673 	delete[] y;
1674 }
1675 
1676 //
1677 // decision_function
1678 //
1679 struct decision_function
1680 {
1681 	double *alpha;
1682 	double rho;
1683 };
1684 
svm_train_one(const svm_problem * prob,const svm_parameter * param,double Cp,double Cn)1685 static decision_function svm_train_one(
1686 	const svm_problem *prob, const svm_parameter *param,
1687 	double Cp, double Cn)
1688 {
1689 	double *alpha = Malloc(double,prob->l);
1690 	Solver::SolutionInfo si;
1691 	switch(param->svm_type)
1692 	{
1693 		case C_SVC:
1694 			solve_c_svc(prob,param,alpha,&si,Cp,Cn);
1695 			break;
1696 		case NU_SVC:
1697 			solve_nu_svc(prob,param,alpha,&si);
1698 			break;
1699 		case ONE_CLASS:
1700 			solve_one_class(prob,param,alpha,&si);
1701 			break;
1702 		case EPSILON_SVR:
1703 			solve_epsilon_svr(prob,param,alpha,&si);
1704 			break;
1705 		case NU_SVR:
1706 			solve_nu_svr(prob,param,alpha,&si);
1707 			break;
1708 	}
1709 
1710 	info("obj = %f, rho = %f\n",si.obj,si.rho);
1711 
1712 	// output SVs
1713 
1714 	int nSV = 0;
1715 	int nBSV = 0;
1716 	for(int i=0;i<prob->l;i++)
1717 	{
1718 		if(fabs(alpha[i]) > 0)
1719 		{
1720 			++nSV;
1721 			if(prob->y[i] > 0)
1722 			{
1723 				if(fabs(alpha[i]) >= si.upper_bound_p)
1724 					++nBSV;
1725 			}
1726 			else
1727 			{
1728 				if(fabs(alpha[i]) >= si.upper_bound_n)
1729 					++nBSV;
1730 			}
1731 		}
1732 	}
1733 
1734 	info("nSV = %d, nBSV = %d\n",nSV,nBSV);
1735 
1736 	decision_function f;
1737 	f.alpha = alpha;
1738 	f.rho = si.rho;
1739 	return f;
1740 }
1741 
1742 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
sigmoid_train(int l,const double * dec_values,const double * labels,double & A,double & B)1743 static void sigmoid_train(
1744 	int l, const double *dec_values, const double *labels,
1745 	double& A, double& B)
1746 {
1747 	double prior1=0, prior0 = 0;
1748 	int i;
1749 
1750 	for (i=0;i<l;i++)
1751 		if (labels[i] > 0) prior1+=1;
1752 		else prior0+=1;
1753 
1754 	int max_iter=100;	// Maximal number of iterations
1755 	double min_step=1e-10;	// Minimal step taken in line search
1756 	double sigma=1e-12;	// For numerically strict PD of Hessian
1757 	double eps=1e-5;
1758 	double hiTarget=(prior1+1.0)/(prior1+2.0);
1759 	double loTarget=1/(prior0+2.0);
1760 	double *t=Malloc(double,l);
1761 	double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
1762 	double newA,newB,newf,d1,d2;
1763 	int iter;
1764 
1765 	// Initial Point and Initial Fun Value
1766 	A=0.0; B=log((prior0+1.0)/(prior1+1.0));
1767 	double fval = 0.0;
1768 
1769 	for (i=0;i<l;i++)
1770 	{
1771 		if (labels[i]>0) t[i]=hiTarget;
1772 		else t[i]=loTarget;
1773 		fApB = dec_values[i]*A+B;
1774 		if (fApB>=0)
1775 			fval += t[i]*fApB + log(1+exp(-fApB));
1776 		else
1777 			fval += (t[i] - 1)*fApB +log(1+exp(fApB));
1778 	}
1779 	for (iter=0;iter<max_iter;iter++)
1780 	{
1781 		// Update Gradient and Hessian (use H' = H + sigma I)
1782 		h11=sigma; // numerically ensures strict PD
1783 		h22=sigma;
1784 		h21=0.0;g1=0.0;g2=0.0;
1785 		for (i=0;i<l;i++)
1786 		{
1787 			fApB = dec_values[i]*A+B;
1788 			if (fApB >= 0)
1789 			{
1790 				p=exp(-fApB)/(1.0+exp(-fApB));
1791 				q=1.0/(1.0+exp(-fApB));
1792 			}
1793 			else
1794 			{
1795 				p=1.0/(1.0+exp(fApB));
1796 				q=exp(fApB)/(1.0+exp(fApB));
1797 			}
1798 			d2=p*q;
1799 			h11+=dec_values[i]*dec_values[i]*d2;
1800 			h22+=d2;
1801 			h21+=dec_values[i]*d2;
1802 			d1=t[i]-p;
1803 			g1+=dec_values[i]*d1;
1804 			g2+=d1;
1805 		}
1806 
1807 		// Stopping Criteria
1808 		if (fabs(g1)<eps && fabs(g2)<eps)
1809 			break;
1810 
1811 		// Finding Newton direction: -inv(H') * g
1812 		det=h11*h22-h21*h21;
1813 		dA=-(h22*g1 - h21 * g2) / det;
1814 		dB=-(-h21*g1+ h11 * g2) / det;
1815 		gd=g1*dA+g2*dB;
1816 
1817 
1818 		stepsize = 1;		// Line Search
1819 		while (stepsize >= min_step)
1820 		{
1821 			newA = A + stepsize * dA;
1822 			newB = B + stepsize * dB;
1823 
1824 			// New function value
1825 			newf = 0.0;
1826 			for (i=0;i<l;i++)
1827 			{
1828 				fApB = dec_values[i]*newA+newB;
1829 				if (fApB >= 0)
1830 					newf += t[i]*fApB + log(1+exp(-fApB));
1831 				else
1832 					newf += (t[i] - 1)*fApB +log(1+exp(fApB));
1833 			}
1834 			// Check sufficient decrease
1835 			if (newf<fval+0.0001*stepsize*gd)
1836 			{
1837 				A=newA;B=newB;fval=newf;
1838 				break;
1839 			}
1840 			else
1841 				stepsize = stepsize / 2.0;
1842 		}
1843 
1844 		if (stepsize < min_step)
1845 		{
1846 			info("Line search fails in two-class probability estimates\n");
1847 			break;
1848 		}
1849 	}
1850 
1851 	if (iter>=max_iter)
1852 		info("Reaching maximal iterations in two-class probability estimates\n");
1853 	free(t);
1854 }
1855 
sigmoid_predict(double decision_value,double A,double B)1856 static double sigmoid_predict(double decision_value, double A, double B)
1857 {
1858 	double fApB = decision_value*A+B;
1859 	// 1-p used later; avoid catastrophic cancellation
1860 	if (fApB >= 0)
1861 		return exp(-fApB)/(1.0+exp(-fApB));
1862 	else
1863 		return 1.0/(1+exp(fApB)) ;
1864 }
1865 
1866 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
multiclass_probability(int k,double ** r,double * p)1867 static void multiclass_probability(int k, double **r, double *p)
1868 {
1869 	int t,j;
1870 	int iter = 0, max_iter=max(100,k);
1871 	double **Q=Malloc(double *,k);
1872 	double *Qp=Malloc(double,k);
1873 	double pQp, eps=0.005/k;
1874 
1875 	for (t=0;t<k;t++)
1876 	{
1877 		p[t]=1.0/k;  // Valid if k = 1
1878 		Q[t]=Malloc(double,k);
1879 		Q[t][t]=0;
1880 		for (j=0;j<t;j++)
1881 		{
1882 			Q[t][t]+=r[j][t]*r[j][t];
1883 			Q[t][j]=Q[j][t];
1884 		}
1885 		for (j=t+1;j<k;j++)
1886 		{
1887 			Q[t][t]+=r[j][t]*r[j][t];
1888 			Q[t][j]=-r[j][t]*r[t][j];
1889 		}
1890 	}
1891 	for (iter=0;iter<max_iter;iter++)
1892 	{
1893 		// stopping condition, recalculate QP,pQP for numerical accuracy
1894 		pQp=0;
1895 		for (t=0;t<k;t++)
1896 		{
1897 			Qp[t]=0;
1898 			for (j=0;j<k;j++)
1899 				Qp[t]+=Q[t][j]*p[j];
1900 			pQp+=p[t]*Qp[t];
1901 		}
1902 		double max_error=0;
1903 		for (t=0;t<k;t++)
1904 		{
1905 			double error=fabs(Qp[t]-pQp);
1906 			if (error>max_error)
1907 				max_error=error;
1908 		}
1909 		if (max_error<eps) break;
1910 
1911 		for (t=0;t<k;t++)
1912 		{
1913 			double diff=(-Qp[t]+pQp)/Q[t][t];
1914 			p[t]+=diff;
1915 			pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
1916 			for (j=0;j<k;j++)
1917 			{
1918 				Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
1919 				p[j]/=(1+diff);
1920 			}
1921 		}
1922 	}
1923 	if (iter>=max_iter)
1924 		info("Exceeds max_iter in multiclass_prob\n");
1925 	for(t=0;t<k;t++) free(Q[t]);
1926 	free(Q);
1927 	free(Qp);
1928 }
1929 
1930 // Cross-validation decision values for probability estimates
svm_binary_svc_probability(const svm_problem * prob,const svm_parameter * param,double Cp,double Cn,double & probA,double & probB)1931 static void svm_binary_svc_probability(
1932 	const svm_problem *prob, const svm_parameter *param,
1933 	double Cp, double Cn, double& probA, double& probB)
1934 {
1935 	int i;
1936 	int nr_fold = 5;
1937 	int *perm = Malloc(int,prob->l);
1938 	double *dec_values = Malloc(double,prob->l);
1939 
1940 	// random shuffle
1941 	for(i=0;i<prob->l;i++) perm[i]=i;
1942 	for(i=0;i<prob->l;i++)
1943 	{
1944 		int j = i+rand()%(prob->l-i);
1945 		swap(perm[i],perm[j]);
1946 	}
1947 	for(i=0;i<nr_fold;i++)
1948 	{
1949 		int begin = i*prob->l/nr_fold;
1950 		int end = (i+1)*prob->l/nr_fold;
1951 		int j,k;
1952 		struct svm_problem subprob;
1953 
1954 		subprob.l = prob->l-(end-begin);
1955 		subprob.x = Malloc(struct svm_node*,subprob.l);
1956 		subprob.y = Malloc(double,subprob.l);
1957 
1958 		k=0;
1959 		for(j=0;j<begin;j++)
1960 		{
1961 			subprob.x[k] = prob->x[perm[j]];
1962 			subprob.y[k] = prob->y[perm[j]];
1963 			++k;
1964 		}
1965 		for(j=end;j<prob->l;j++)
1966 		{
1967 			subprob.x[k] = prob->x[perm[j]];
1968 			subprob.y[k] = prob->y[perm[j]];
1969 			++k;
1970 		}
1971 		int p_count=0,n_count=0;
1972 		for(j=0;j<k;j++)
1973 			if(subprob.y[j]>0)
1974 				p_count++;
1975 			else
1976 				n_count++;
1977 
1978 		if(p_count==0 && n_count==0)
1979 			for(j=begin;j<end;j++)
1980 				dec_values[perm[j]] = 0;
1981 		else if(p_count > 0 && n_count == 0)
1982 			for(j=begin;j<end;j++)
1983 				dec_values[perm[j]] = 1;
1984 		else if(p_count == 0 && n_count > 0)
1985 			for(j=begin;j<end;j++)
1986 				dec_values[perm[j]] = -1;
1987 		else
1988 		{
1989 			svm_parameter subparam = *param;
1990 			subparam.probability=0;
1991 			subparam.C=1.0;
1992 			subparam.nr_weight=2;
1993 			subparam.weight_label = Malloc(int,2);
1994 			subparam.weight = Malloc(double,2);
1995 			subparam.weight_label[0]=+1;
1996 			subparam.weight_label[1]=-1;
1997 			subparam.weight[0]=Cp;
1998 			subparam.weight[1]=Cn;
1999 			struct svm_model *submodel = svm_train(&subprob,&subparam);
2000 			for(j=begin;j<end;j++)
2001 			{
2002 				svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
2003 				// ensure +1 -1 order; reason not using CV subroutine
2004 				dec_values[perm[j]] *= submodel->label[0];
2005 			}
2006 			svm_free_and_destroy_model(&submodel);
2007 			svm_destroy_param(&subparam);
2008 		}
2009 		free(subprob.x);
2010 		free(subprob.y);
2011 	}
2012 	sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
2013 	free(dec_values);
2014 	free(perm);
2015 }
2016 
2017 // Return parameter of a Laplace distribution
svm_svr_probability(const svm_problem * prob,const svm_parameter * param)2018 static double svm_svr_probability(
2019 	const svm_problem *prob, const svm_parameter *param)
2020 {
2021 	int i;
2022 	int nr_fold = 5;
2023 	double *ymv = Malloc(double,prob->l);
2024 	double mae = 0;
2025 
2026 	svm_parameter newparam = *param;
2027 	newparam.probability = 0;
2028 	svm_cross_validation(prob,&newparam,nr_fold,ymv);
2029 	for(i=0;i<prob->l;i++)
2030 	{
2031 		ymv[i]=prob->y[i]-ymv[i];
2032 		mae += fabs(ymv[i]);
2033 	}
2034 	mae /= prob->l;
2035 	double std=sqrt(2*mae*mae);
2036 	int count=0;
2037 	mae=0;
2038 	for(i=0;i<prob->l;i++)
2039 		if (fabs(ymv[i]) > 5*std)
2040 			count=count+1;
2041 		else
2042 			mae+=fabs(ymv[i]);
2043 	mae /= (prob->l-count);
2044 	info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
2045 	free(ymv);
2046 	return mae;
2047 }
2048 
2049 
2050 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2051 // perm, length l, must be allocated before calling this subroutine
svm_group_classes(const svm_problem * prob,int * nr_class_ret,int ** label_ret,int ** start_ret,int ** count_ret,int * perm)2052 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2053 {
2054 	int l = prob->l;
2055 	int max_nr_class = 16;
2056 	int nr_class = 0;
2057 	int *label = Malloc(int,max_nr_class);
2058 	int *count = Malloc(int,max_nr_class);
2059 	int *data_label = Malloc(int,l);
2060 	int i;
2061 
2062 	for(i=0;i<l;i++)
2063 	{
2064 		int this_label = (int)prob->y[i];
2065 		int j;
2066 		for(j=0;j<nr_class;j++)
2067 		{
2068 			if(this_label == label[j])
2069 			{
2070 				++count[j];
2071 				break;
2072 			}
2073 		}
2074 		data_label[i] = j;
2075 		if(j == nr_class)
2076 		{
2077 			if(nr_class == max_nr_class)
2078 			{
2079 				max_nr_class *= 2;
2080 				label = (int *)realloc(label,max_nr_class*sizeof(int));
2081 				count = (int *)realloc(count,max_nr_class*sizeof(int));
2082 			}
2083 			label[nr_class] = this_label;
2084 			count[nr_class] = 1;
2085 			++nr_class;
2086 		}
2087 	}
2088 
2089 	//
2090 	// Labels are ordered by their first occurrence in the training set.
2091 	// However, for two-class sets with -1/+1 labels and -1 appears first,
2092 	// we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
2093 	//
2094 	if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2095 	{
2096 		swap(label[0],label[1]);
2097 		swap(count[0],count[1]);
2098 		for(i=0;i<l;i++)
2099 		{
2100 			if(data_label[i] == 0)
2101 				data_label[i] = 1;
2102 			else
2103 				data_label[i] = 0;
2104 		}
2105 	}
2106 
2107 	int *start = Malloc(int,nr_class);
2108 	start[0] = 0;
2109 	for(i=1;i<nr_class;i++)
2110 		start[i] = start[i-1]+count[i-1];
2111 	for(i=0;i<l;i++)
2112 	{
2113 		perm[start[data_label[i]]] = i;
2114 		++start[data_label[i]];
2115 	}
2116 	start[0] = 0;
2117 	for(i=1;i<nr_class;i++)
2118 		start[i] = start[i-1]+count[i-1];
2119 
2120 	*nr_class_ret = nr_class;
2121 	*label_ret = label;
2122 	*start_ret = start;
2123 	*count_ret = count;
2124 	free(data_label);
2125 }
2126 
2127 //
2128 // Interface functions
2129 //
svm_train(const svm_problem * prob,const svm_parameter * param)2130 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2131 {
2132 	svm_model *model = Malloc(svm_model,1);
2133 	model->param = *param;
2134 	model->free_sv = 0;	// XXX
2135 
2136 	if(param->svm_type == ONE_CLASS ||
2137 	   param->svm_type == EPSILON_SVR ||
2138 	   param->svm_type == NU_SVR)
2139 	{
2140 		// regression or one-class-svm
2141 		model->nr_class = 2;
2142 		model->label = NULL;
2143 		model->nSV = NULL;
2144 		model->probA = NULL; model->probB = NULL;
2145 		model->sv_coef = Malloc(double *,1);
2146 
2147 		if(param->probability &&
2148 		   (param->svm_type == EPSILON_SVR ||
2149 		    param->svm_type == NU_SVR))
2150 		{
2151 			model->probA = Malloc(double,1);
2152 			model->probA[0] = svm_svr_probability(prob,param);
2153 		}
2154 
2155 		decision_function f = svm_train_one(prob,param,0,0);
2156 		model->rho = Malloc(double,1);
2157 		model->rho[0] = f.rho;
2158 
2159 		int nSV = 0;
2160 		int i;
2161 		for(i=0;i<prob->l;i++)
2162 			if(fabs(f.alpha[i]) > 0) ++nSV;
2163 		model->l = nSV;
2164 		model->SV = Malloc(svm_node *,nSV);
2165 		model->sv_coef[0] = Malloc(double,nSV);
2166 		model->sv_indices = Malloc(int,nSV);
2167 		int j = 0;
2168 		for(i=0;i<prob->l;i++)
2169 			if(fabs(f.alpha[i]) > 0)
2170 			{
2171 				model->SV[j] = prob->x[i];
2172 				model->sv_coef[0][j] = f.alpha[i];
2173 				model->sv_indices[j] = i+1;
2174 				++j;
2175 			}
2176 
2177 		free(f.alpha);
2178 	}
2179 	else
2180 	{
2181 		// classification
2182 		int l = prob->l;
2183 		int nr_class;
2184 		int *label = NULL;
2185 		int *start = NULL;
2186 		int *count = NULL;
2187 		int *perm = Malloc(int,l);
2188 
2189 		// group training data of the same class
2190 		svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2191 		if(nr_class == 1)
2192 			info("WARNING: training data in only one class. See README for details.\n");
2193 
2194 		svm_node **x = Malloc(svm_node *,l);
2195 		int i;
2196 		for(i=0;i<l;i++)
2197 			x[i] = prob->x[perm[i]];
2198 
2199 		// calculate weighted C
2200 
2201 		double *weighted_C = Malloc(double, nr_class);
2202 		for(i=0;i<nr_class;i++)
2203 			weighted_C[i] = param->C;
2204 		for(i=0;i<param->nr_weight;i++)
2205 		{
2206 			int j;
2207 			for(j=0;j<nr_class;j++)
2208 				if(param->weight_label[i] == label[j])
2209 					break;
2210 			if(j == nr_class)
2211 				fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2212 			else
2213 				weighted_C[j] *= param->weight[i];
2214 		}
2215 
2216 		// train k*(k-1)/2 models
2217 
2218 		bool *nonzero = Malloc(bool,l);
2219 		for(i=0;i<l;i++)
2220 			nonzero[i] = false;
2221 		decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
2222 
2223 		double *probA=NULL,*probB=NULL;
2224 		if (param->probability)
2225 		{
2226 			probA=Malloc(double,nr_class*(nr_class-1)/2);
2227 			probB=Malloc(double,nr_class*(nr_class-1)/2);
2228 		}
2229 
2230 		int p = 0;
2231 		for(i=0;i<nr_class;i++)
2232 			for(int j=i+1;j<nr_class;j++)
2233 			{
2234 				svm_problem sub_prob;
2235 				int si = start[i], sj = start[j];
2236 				int ci = count[i], cj = count[j];
2237 				sub_prob.l = ci+cj;
2238 				sub_prob.x = Malloc(svm_node *,sub_prob.l);
2239 				sub_prob.y = Malloc(double,sub_prob.l);
2240 				int k;
2241 				for(k=0;k<ci;k++)
2242 				{
2243 					sub_prob.x[k] = x[si+k];
2244 					sub_prob.y[k] = +1;
2245 				}
2246 				for(k=0;k<cj;k++)
2247 				{
2248 					sub_prob.x[ci+k] = x[sj+k];
2249 					sub_prob.y[ci+k] = -1;
2250 				}
2251 
2252 				if(param->probability)
2253 					svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
2254 
2255 				f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2256 				for(k=0;k<ci;k++)
2257 					if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
2258 						nonzero[si+k] = true;
2259 				for(k=0;k<cj;k++)
2260 					if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
2261 						nonzero[sj+k] = true;
2262 				free(sub_prob.x);
2263 				free(sub_prob.y);
2264 				++p;
2265 			}
2266 
2267 		// build output
2268 
2269 		model->nr_class = nr_class;
2270 
2271 		model->label = Malloc(int,nr_class);
2272 		for(i=0;i<nr_class;i++)
2273 			model->label[i] = label[i];
2274 
2275 		model->rho = Malloc(double,nr_class*(nr_class-1)/2);
2276 		for(i=0;i<nr_class*(nr_class-1)/2;i++)
2277 			model->rho[i] = f[i].rho;
2278 
2279 		if(param->probability)
2280 		{
2281 			model->probA = Malloc(double,nr_class*(nr_class-1)/2);
2282 			model->probB = Malloc(double,nr_class*(nr_class-1)/2);
2283 			for(i=0;i<nr_class*(nr_class-1)/2;i++)
2284 			{
2285 				model->probA[i] = probA[i];
2286 				model->probB[i] = probB[i];
2287 			}
2288 		}
2289 		else
2290 		{
2291 			model->probA=NULL;
2292 			model->probB=NULL;
2293 		}
2294 
2295 		int total_sv = 0;
2296 		int *nz_count = Malloc(int,nr_class);
2297 		model->nSV = Malloc(int,nr_class);
2298 		for(i=0;i<nr_class;i++)
2299 		{
2300 			int nSV = 0;
2301 			for(int j=0;j<count[i];j++)
2302 				if(nonzero[start[i]+j])
2303 				{
2304 					++nSV;
2305 					++total_sv;
2306 				}
2307 			model->nSV[i] = nSV;
2308 			nz_count[i] = nSV;
2309 		}
2310 
2311 		info("Total nSV = %d\n",total_sv);
2312 
2313 		model->l = total_sv;
2314 		model->SV = Malloc(svm_node *,total_sv);
2315 		model->sv_indices = Malloc(int,total_sv);
2316 		p = 0;
2317 		for(i=0;i<l;i++)
2318 			if(nonzero[i])
2319 			{
2320 				model->SV[p] = x[i];
2321 				model->sv_indices[p++] = perm[i] + 1;
2322 			}
2323 
2324 		int *nz_start = Malloc(int,nr_class);
2325 		nz_start[0] = 0;
2326 		for(i=1;i<nr_class;i++)
2327 			nz_start[i] = nz_start[i-1]+nz_count[i-1];
2328 
2329 		model->sv_coef = Malloc(double *,nr_class-1);
2330 		for(i=0;i<nr_class-1;i++)
2331 			model->sv_coef[i] = Malloc(double,total_sv);
2332 
2333 		p = 0;
2334 		for(i=0;i<nr_class;i++)
2335 			for(int j=i+1;j<nr_class;j++)
2336 			{
2337 				// classifier (i,j): coefficients with
2338 				// i are in sv_coef[j-1][nz_start[i]...],
2339 				// j are in sv_coef[i][nz_start[j]...]
2340 
2341 				int si = start[i];
2342 				int sj = start[j];
2343 				int ci = count[i];
2344 				int cj = count[j];
2345 
2346 				int q = nz_start[i];
2347 				int k;
2348 				for(k=0;k<ci;k++)
2349 					if(nonzero[si+k])
2350 						model->sv_coef[j-1][q++] = f[p].alpha[k];
2351 				q = nz_start[j];
2352 				for(k=0;k<cj;k++)
2353 					if(nonzero[sj+k])
2354 						model->sv_coef[i][q++] = f[p].alpha[ci+k];
2355 				++p;
2356 			}
2357 
2358 		free(label);
2359 		free(probA);
2360 		free(probB);
2361 		free(count);
2362 		free(perm);
2363 		free(start);
2364 		free(x);
2365 		free(weighted_C);
2366 		free(nonzero);
2367 		for(i=0;i<nr_class*(nr_class-1)/2;i++)
2368 			free(f[i].alpha);
2369 		free(f);
2370 		free(nz_count);
2371 		free(nz_start);
2372 	}
2373 	return model;
2374 }
2375 
2376 // Stratified cross validation
svm_cross_validation(const svm_problem * prob,const svm_parameter * param,int nr_fold,double * target)2377 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
2378 {
2379 	int i;
2380 	int *fold_start;
2381 	int l = prob->l;
2382 	int *perm = Malloc(int,l);
2383 	int nr_class;
2384 	if (nr_fold > l)
2385 	{
2386 		nr_fold = l;
2387 		fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2388 	}
2389 	fold_start = Malloc(int,nr_fold+1);
2390 	// stratified cv may not give leave-one-out rate
2391 	// Each class to l folds -> some folds may have zero elements
2392 	if((param->svm_type == C_SVC ||
2393 	    param->svm_type == NU_SVC) && nr_fold < l)
2394 	{
2395 		int *start = NULL;
2396 		int *label = NULL;
2397 		int *count = NULL;
2398 		svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2399 
2400 		// random shuffle and then data grouped by fold using the array perm
2401 		int *fold_count = Malloc(int,nr_fold);
2402 		int c;
2403 		int *index = Malloc(int,l);
2404 		for(i=0;i<l;i++)
2405 			index[i]=perm[i];
2406 		for (c=0; c<nr_class; c++)
2407 			for(i=0;i<count[c];i++)
2408 			{
2409 				int j = i+rand()%(count[c]-i);
2410 				swap(index[start[c]+j],index[start[c]+i]);
2411 			}
2412 		for(i=0;i<nr_fold;i++)
2413 		{
2414 			fold_count[i] = 0;
2415 			for (c=0; c<nr_class;c++)
2416 				fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
2417 		}
2418 		fold_start[0]=0;
2419 		for (i=1;i<=nr_fold;i++)
2420 			fold_start[i] = fold_start[i-1]+fold_count[i-1];
2421 		for (c=0; c<nr_class;c++)
2422 			for(i=0;i<nr_fold;i++)
2423 			{
2424 				int begin = start[c]+i*count[c]/nr_fold;
2425 				int end = start[c]+(i+1)*count[c]/nr_fold;
2426 				for(int j=begin;j<end;j++)
2427 				{
2428 					perm[fold_start[i]] = index[j];
2429 					fold_start[i]++;
2430 				}
2431 			}
2432 		fold_start[0]=0;
2433 		for (i=1;i<=nr_fold;i++)
2434 			fold_start[i] = fold_start[i-1]+fold_count[i-1];
2435 		free(start);
2436 		free(label);
2437 		free(count);
2438 		free(index);
2439 		free(fold_count);
2440 	}
2441 	else
2442 	{
2443 		for(i=0;i<l;i++) perm[i]=i;
2444 		for(i=0;i<l;i++)
2445 		{
2446 			int j = i+rand()%(l-i);
2447 			swap(perm[i],perm[j]);
2448 		}
2449 		for(i=0;i<=nr_fold;i++)
2450 			fold_start[i]=i*l/nr_fold;
2451 	}
2452 
2453 	for(i=0;i<nr_fold;i++)
2454 	{
2455 		int begin = fold_start[i];
2456 		int end = fold_start[i+1];
2457 		int j,k;
2458 		struct svm_problem subprob;
2459 
2460 		subprob.l = l-(end-begin);
2461 		subprob.x = Malloc(struct svm_node*,subprob.l);
2462 		subprob.y = Malloc(double,subprob.l);
2463 
2464 		k=0;
2465 		for(j=0;j<begin;j++)
2466 		{
2467 			subprob.x[k] = prob->x[perm[j]];
2468 			subprob.y[k] = prob->y[perm[j]];
2469 			++k;
2470 		}
2471 		for(j=end;j<l;j++)
2472 		{
2473 			subprob.x[k] = prob->x[perm[j]];
2474 			subprob.y[k] = prob->y[perm[j]];
2475 			++k;
2476 		}
2477 		struct svm_model *submodel = svm_train(&subprob,param);
2478 		if(param->probability &&
2479 		   (param->svm_type == C_SVC || param->svm_type == NU_SVC))
2480 		{
2481 			double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
2482 			for(j=begin;j<end;j++)
2483 				target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
2484 			free(prob_estimates);
2485 		}
2486 		else
2487 			for(j=begin;j<end;j++)
2488 				target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
2489 		svm_free_and_destroy_model(&submodel);
2490 		free(subprob.x);
2491 		free(subprob.y);
2492 	}
2493 	free(fold_start);
2494 	free(perm);
2495 }
2496 
2497 
svm_get_svm_type(const svm_model * model)2498 int svm_get_svm_type(const svm_model *model)
2499 {
2500 	return model->param.svm_type;
2501 }
2502 
svm_get_nr_class(const svm_model * model)2503 int svm_get_nr_class(const svm_model *model)
2504 {
2505 	return model->nr_class;
2506 }
2507 
svm_get_labels(const svm_model * model,int * label)2508 void svm_get_labels(const svm_model *model, int* label)
2509 {
2510 	if (model->label != NULL)
2511 		for(int i=0;i<model->nr_class;i++)
2512 			label[i] = model->label[i];
2513 }
2514 
svm_get_sv_indices(const svm_model * model,int * indices)2515 void svm_get_sv_indices(const svm_model *model, int* indices)
2516 {
2517 	if (model->sv_indices != NULL)
2518 		for(int i=0;i<model->l;i++)
2519 			indices[i] = model->sv_indices[i];
2520 }
2521 
svm_get_nr_sv(const svm_model * model)2522 int svm_get_nr_sv(const svm_model *model)
2523 {
2524 	return model->l;
2525 }
2526 
svm_get_svr_probability(const svm_model * model)2527 double svm_get_svr_probability(const svm_model *model)
2528 {
2529 	if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
2530 	    model->probA!=NULL)
2531 		return model->probA[0];
2532 	else
2533 	{
2534 		fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
2535 		return 0;
2536 	}
2537 }
2538 
svm_predict_values(const svm_model * model,const svm_node * x,double * dec_values)2539 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
2540 {
2541 	int i;
2542 	if(model->param.svm_type == ONE_CLASS ||
2543 	   model->param.svm_type == EPSILON_SVR ||
2544 	   model->param.svm_type == NU_SVR)
2545 	{
2546 		double *sv_coef = model->sv_coef[0];
2547 		double sum = 0;
2548 		for(i=0;i<model->l;i++)
2549 			sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
2550 		sum -= model->rho[0];
2551 		*dec_values = sum;
2552 
2553 		if(model->param.svm_type == ONE_CLASS)
2554 			return (sum>0)?1:-1;
2555 		else
2556 			return sum;
2557 	}
2558 	else
2559 	{
2560 		int nr_class = model->nr_class;
2561 		int l = model->l;
2562 
2563 		double *kvalue = Malloc(double,l);
2564 		for(i=0;i<l;i++)
2565 			kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
2566 
2567 		int *start = Malloc(int,nr_class);
2568 		start[0] = 0;
2569 		for(i=1;i<nr_class;i++)
2570 			start[i] = start[i-1]+model->nSV[i-1];
2571 
2572 		int *vote = Malloc(int,nr_class);
2573 		for(i=0;i<nr_class;i++)
2574 			vote[i] = 0;
2575 
2576 		int p=0;
2577 		for(i=0;i<nr_class;i++)
2578 			for(int j=i+1;j<nr_class;j++)
2579 			{
2580 				double sum = 0;
2581 				int si = start[i];
2582 				int sj = start[j];
2583 				int ci = model->nSV[i];
2584 				int cj = model->nSV[j];
2585 
2586 				int k;
2587 				double *coef1 = model->sv_coef[j-1];
2588 				double *coef2 = model->sv_coef[i];
2589 				for(k=0;k<ci;k++)
2590 					sum += coef1[si+k] * kvalue[si+k];
2591 				for(k=0;k<cj;k++)
2592 					sum += coef2[sj+k] * kvalue[sj+k];
2593 				sum -= model->rho[p];
2594 				dec_values[p] = sum;
2595 
2596 				if(dec_values[p] > 0)
2597 					++vote[i];
2598 				else
2599 					++vote[j];
2600 				p++;
2601 			}
2602 
2603 		int vote_max_idx = 0;
2604 		for(i=1;i<nr_class;i++)
2605 			if(vote[i] > vote[vote_max_idx])
2606 				vote_max_idx = i;
2607 
2608 		free(kvalue);
2609 		free(start);
2610 		free(vote);
2611 		return model->label[vote_max_idx];
2612 	}
2613 }
2614 
svm_predict(const svm_model * model,const svm_node * x)2615 double svm_predict(const svm_model *model, const svm_node *x)
2616 {
2617 	int nr_class = model->nr_class;
2618 	double *dec_values;
2619 	if(model->param.svm_type == ONE_CLASS ||
2620 	   model->param.svm_type == EPSILON_SVR ||
2621 	   model->param.svm_type == NU_SVR)
2622 		dec_values = Malloc(double, 1);
2623 	else
2624 		dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2625 	double pred_result = svm_predict_values(model, x, dec_values);
2626 	free(dec_values);
2627 	return pred_result;
2628 }
2629 
svm_predict_probability(const svm_model * model,const svm_node * x,double * prob_estimates)2630 double svm_predict_probability(
2631 	const svm_model *model, const svm_node *x, double *prob_estimates)
2632 {
2633 	if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
2634 	    model->probA!=NULL && model->probB!=NULL)
2635 	{
2636 		int i;
2637 		int nr_class = model->nr_class;
2638 		double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2639 		svm_predict_values(model, x, dec_values);
2640 
2641 		double min_prob=1e-7;
2642 		double **pairwise_prob=Malloc(double *,nr_class);
2643 		for(i=0;i<nr_class;i++)
2644 			pairwise_prob[i]=Malloc(double,nr_class);
2645 		int k=0;
2646 		for(i=0;i<nr_class;i++)
2647 			for(int j=i+1;j<nr_class;j++)
2648 			{
2649 				pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
2650 				pairwise_prob[j][i]=1-pairwise_prob[i][j];
2651 				k++;
2652 			}
2653 		if (nr_class == 2)
2654 		{
2655 			prob_estimates[0] = pairwise_prob[0][1];
2656 			prob_estimates[1] = pairwise_prob[1][0];
2657 		}
2658 		else
2659 			multiclass_probability(nr_class,pairwise_prob,prob_estimates);
2660 
2661 		int prob_max_idx = 0;
2662 		for(i=1;i<nr_class;i++)
2663 			if(prob_estimates[i] > prob_estimates[prob_max_idx])
2664 				prob_max_idx = i;
2665 		for(i=0;i<nr_class;i++)
2666 			free(pairwise_prob[i]);
2667 		free(dec_values);
2668 		free(pairwise_prob);
2669 		return model->label[prob_max_idx];
2670 	}
2671 	else
2672 		return svm_predict(model, x);
2673 }
2674 
2675 static const char *svm_type_table[] =
2676 {
2677 	"c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
2678 };
2679 
2680 static const char *kernel_type_table[]=
2681 {
2682 	"linear","polynomial","rbf","sigmoid","precomputed",NULL
2683 };
2684 
svm_save_model(const char * model_file_name,const svm_model * model)2685 int svm_save_model(const char *model_file_name, const svm_model *model)
2686 {
2687 	FILE *fp = fopen(model_file_name,"w");
2688 	if(fp==NULL) return -1;
2689 
2690 	char *old_locale = setlocale(LC_ALL, NULL);
2691 	if (old_locale) {
2692 		old_locale = strdup(old_locale);
2693 	}
2694 	setlocale(LC_ALL, "C");
2695 
2696 	const svm_parameter& param = model->param;
2697 
2698 	fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
2699 	fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
2700 
2701 	if(param.kernel_type == POLY)
2702 		fprintf(fp,"degree %d\n", param.degree);
2703 
2704 	if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
2705 		fprintf(fp,"gamma %.17g\n", param.gamma);
2706 
2707 	if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
2708 		fprintf(fp,"coef0 %.17g\n", param.coef0);
2709 
2710 	int nr_class = model->nr_class;
2711 	int l = model->l;
2712 	fprintf(fp, "nr_class %d\n", nr_class);
2713 	fprintf(fp, "total_sv %d\n",l);
2714 
2715 	{
2716 		fprintf(fp, "rho");
2717 		for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2718 			fprintf(fp," %.17g",model->rho[i]);
2719 		fprintf(fp, "\n");
2720 	}
2721 
2722 	if(model->label)
2723 	{
2724 		fprintf(fp, "label");
2725 		for(int i=0;i<nr_class;i++)
2726 			fprintf(fp," %d",model->label[i]);
2727 		fprintf(fp, "\n");
2728 	}
2729 
2730 	if(model->probA) // regression has probA only
2731 	{
2732 		fprintf(fp, "probA");
2733 		for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2734 			fprintf(fp," %.17g",model->probA[i]);
2735 		fprintf(fp, "\n");
2736 	}
2737 	if(model->probB)
2738 	{
2739 		fprintf(fp, "probB");
2740 		for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2741 			fprintf(fp," %.17g",model->probB[i]);
2742 		fprintf(fp, "\n");
2743 	}
2744 
2745 	if(model->nSV)
2746 	{
2747 		fprintf(fp, "nr_sv");
2748 		for(int i=0;i<nr_class;i++)
2749 			fprintf(fp," %d",model->nSV[i]);
2750 		fprintf(fp, "\n");
2751 	}
2752 
2753 	fprintf(fp, "SV\n");
2754 	const double * const *sv_coef = model->sv_coef;
2755 	const svm_node * const *SV = model->SV;
2756 
2757 	for(int i=0;i<l;i++)
2758 	{
2759 		for(int j=0;j<nr_class-1;j++)
2760 			fprintf(fp, "%.17g ",sv_coef[j][i]);
2761 
2762 		const svm_node *p = SV[i];
2763 
2764 		if(param.kernel_type == PRECOMPUTED)
2765 			fprintf(fp,"0:%d ",(int)(p->value));
2766 		else
2767 			while(p->index != -1)
2768 			{
2769 				fprintf(fp,"%d:%.8g ",p->index,p->value);
2770 				p++;
2771 			}
2772 		fprintf(fp, "\n");
2773 	}
2774 
2775 	setlocale(LC_ALL, old_locale);
2776 	free(old_locale);
2777 
2778 	if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2779 	else return 0;
2780 }
2781 
2782 static char *line = NULL;
2783 static int max_line_len;
2784 
readline(FILE * input)2785 static char* readline(FILE *input)
2786 {
2787 	int len;
2788 
2789 	if(fgets(line,max_line_len,input) == NULL)
2790 		return NULL;
2791 
2792 	while(strrchr(line,'\n') == NULL)
2793 	{
2794 		max_line_len *= 2;
2795 		line = (char *) realloc(line,max_line_len);
2796 		len = (int) strlen(line);
2797 		if(fgets(line+len,max_line_len-len,input) == NULL)
2798 			break;
2799 	}
2800 	return line;
2801 }
2802 
2803 //
2804 // FSCANF helps to handle fscanf failures.
2805 // Its do-while block avoids the ambiguity when
2806 // if (...)
2807 //    FSCANF();
2808 // is used
2809 //
2810 #define FSCANF(_stream, _format, _var) do{ if (fscanf(_stream, _format, _var) != 1) return false; }while(0)
read_model_header(FILE * fp,svm_model * model)2811 bool read_model_header(FILE *fp, svm_model* model)
2812 {
2813 	svm_parameter& param = model->param;
2814 	// parameters for training only won't be assigned, but arrays are assigned as NULL for safety
2815 	param.nr_weight = 0;
2816 	param.weight_label = NULL;
2817 	param.weight = NULL;
2818 
2819 	char cmd[81];
2820 	while(1)
2821 	{
2822 		FSCANF(fp,"%80s",cmd);
2823 
2824 		if(strcmp(cmd,"svm_type")==0)
2825 		{
2826 			FSCANF(fp,"%80s",cmd);
2827 			int i;
2828 			for(i=0;svm_type_table[i];i++)
2829 			{
2830 				if(strcmp(svm_type_table[i],cmd)==0)
2831 				{
2832 					param.svm_type=i;
2833 					break;
2834 				}
2835 			}
2836 			if(svm_type_table[i] == NULL)
2837 			{
2838 				fprintf(stderr,"unknown svm type.\n");
2839 				return false;
2840 			}
2841 		}
2842 		else if(strcmp(cmd,"kernel_type")==0)
2843 		{
2844 			FSCANF(fp,"%80s",cmd);
2845 			int i;
2846 			for(i=0;kernel_type_table[i];i++)
2847 			{
2848 				if(strcmp(kernel_type_table[i],cmd)==0)
2849 				{
2850 					param.kernel_type=i;
2851 					break;
2852 				}
2853 			}
2854 			if(kernel_type_table[i] == NULL)
2855 			{
2856 				fprintf(stderr,"unknown kernel function.\n");
2857 				return false;
2858 			}
2859 		}
2860 		else if(strcmp(cmd,"degree")==0)
2861 			FSCANF(fp,"%d",&param.degree);
2862 		else if(strcmp(cmd,"gamma")==0)
2863 			FSCANF(fp,"%lf",&param.gamma);
2864 		else if(strcmp(cmd,"coef0")==0)
2865 			FSCANF(fp,"%lf",&param.coef0);
2866 		else if(strcmp(cmd,"nr_class")==0)
2867 			FSCANF(fp,"%d",&model->nr_class);
2868 		else if(strcmp(cmd,"total_sv")==0)
2869 			FSCANF(fp,"%d",&model->l);
2870 		else if(strcmp(cmd,"rho")==0)
2871 		{
2872 			int n = model->nr_class * (model->nr_class-1)/2;
2873 			model->rho = Malloc(double,n);
2874 			for(int i=0;i<n;i++)
2875 				FSCANF(fp,"%lf",&model->rho[i]);
2876 		}
2877 		else if(strcmp(cmd,"label")==0)
2878 		{
2879 			int n = model->nr_class;
2880 			model->label = Malloc(int,n);
2881 			for(int i=0;i<n;i++)
2882 				FSCANF(fp,"%d",&model->label[i]);
2883 		}
2884 		else if(strcmp(cmd,"probA")==0)
2885 		{
2886 			int n = model->nr_class * (model->nr_class-1)/2;
2887 			model->probA = Malloc(double,n);
2888 			for(int i=0;i<n;i++)
2889 				FSCANF(fp,"%lf",&model->probA[i]);
2890 		}
2891 		else if(strcmp(cmd,"probB")==0)
2892 		{
2893 			int n = model->nr_class * (model->nr_class-1)/2;
2894 			model->probB = Malloc(double,n);
2895 			for(int i=0;i<n;i++)
2896 				FSCANF(fp,"%lf",&model->probB[i]);
2897 		}
2898 		else if(strcmp(cmd,"nr_sv")==0)
2899 		{
2900 			int n = model->nr_class;
2901 			model->nSV = Malloc(int,n);
2902 			for(int i=0;i<n;i++)
2903 				FSCANF(fp,"%d",&model->nSV[i]);
2904 		}
2905 		else if(strcmp(cmd,"SV")==0)
2906 		{
2907 			while(1)
2908 			{
2909 				int c = getc(fp);
2910 				if(c==EOF || c=='\n') break;
2911 			}
2912 			break;
2913 		}
2914 		else
2915 		{
2916 			fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2917 			return false;
2918 		}
2919 	}
2920 
2921 	return true;
2922 
2923 }
2924 
svm_load_model(const char * model_file_name)2925 svm_model *svm_load_model(const char *model_file_name)
2926 {
2927 	FILE *fp = fopen(model_file_name,"rb");
2928 	if(fp==NULL) return NULL;
2929 
2930 	char *old_locale = setlocale(LC_ALL, NULL);
2931 	if (old_locale) {
2932 		old_locale = strdup(old_locale);
2933 	}
2934 	setlocale(LC_ALL, "C");
2935 
2936 	// read parameters
2937 
2938 	svm_model *model = Malloc(svm_model,1);
2939 	model->rho = NULL;
2940 	model->probA = NULL;
2941 	model->probB = NULL;
2942 	model->sv_indices = NULL;
2943 	model->label = NULL;
2944 	model->nSV = NULL;
2945 
2946 	// read header
2947 	if (!read_model_header(fp, model))
2948 	{
2949 		fprintf(stderr, "ERROR: fscanf failed to read model\n");
2950 		setlocale(LC_ALL, old_locale);
2951 		free(old_locale);
2952 		free(model->rho);
2953 		free(model->label);
2954 		free(model->nSV);
2955 		free(model);
2956 		return NULL;
2957 	}
2958 
2959 	// read sv_coef and SV
2960 
2961 	int elements = 0;
2962 	long pos = ftell(fp);
2963 
2964 	max_line_len = 1024;
2965 	line = Malloc(char,max_line_len);
2966 	char *p,*endptr,*idx,*val;
2967 
2968 	while(readline(fp)!=NULL)
2969 	{
2970 		p = strtok(line,":");
2971 		while(1)
2972 		{
2973 			p = strtok(NULL,":");
2974 			if(p == NULL)
2975 				break;
2976 			++elements;
2977 		}
2978 	}
2979 	elements += model->l;
2980 
2981 	fseek(fp,pos,SEEK_SET);
2982 
2983 	int m = model->nr_class - 1;
2984 	int l = model->l;
2985 	model->sv_coef = Malloc(double *,m);
2986 	int i;
2987 	for(i=0;i<m;i++)
2988 		model->sv_coef[i] = Malloc(double,l);
2989 	model->SV = Malloc(svm_node*,l);
2990 	svm_node *x_space = NULL;
2991 	if(l>0) x_space = Malloc(svm_node,elements);
2992 
2993 	int j=0;
2994 	for(i=0;i<l;i++)
2995 	{
2996 		readline(fp);
2997 		model->SV[i] = &x_space[j];
2998 
2999 		p = strtok(line, " \t");
3000 		model->sv_coef[0][i] = strtod(p,&endptr);
3001 		for(int k=1;k<m;k++)
3002 		{
3003 			p = strtok(NULL, " \t");
3004 			model->sv_coef[k][i] = strtod(p,&endptr);
3005 		}
3006 
3007 		while(1)
3008 		{
3009 			idx = strtok(NULL, ":");
3010 			val = strtok(NULL, " \t");
3011 
3012 			if(val == NULL)
3013 				break;
3014 			x_space[j].index = (int) strtol(idx,&endptr,10);
3015 			x_space[j].value = strtod(val,&endptr);
3016 
3017 			++j;
3018 		}
3019 		x_space[j++].index = -1;
3020 	}
3021 	free(line);
3022 
3023 	setlocale(LC_ALL, old_locale);
3024 	free(old_locale);
3025 
3026 	if (ferror(fp) != 0 || fclose(fp) != 0)
3027 		return NULL;
3028 
3029 	model->free_sv = 1;	// XXX
3030 	return model;
3031 }
3032 
svm_free_model_content(svm_model * model_ptr)3033 void svm_free_model_content(svm_model* model_ptr)
3034 {
3035 	if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
3036 		free((void *)(model_ptr->SV[0]));
3037 	if(model_ptr->sv_coef)
3038 	{
3039 		for(int i=0;i<model_ptr->nr_class-1;i++)
3040 			free(model_ptr->sv_coef[i]);
3041 	}
3042 
3043 	free(model_ptr->SV);
3044 	model_ptr->SV = NULL;
3045 
3046 	free(model_ptr->sv_coef);
3047 	model_ptr->sv_coef = NULL;
3048 
3049 	free(model_ptr->rho);
3050 	model_ptr->rho = NULL;
3051 
3052 	free(model_ptr->label);
3053 	model_ptr->label= NULL;
3054 
3055 	free(model_ptr->probA);
3056 	model_ptr->probA = NULL;
3057 
3058 	free(model_ptr->probB);
3059 	model_ptr->probB= NULL;
3060 
3061 	free(model_ptr->sv_indices);
3062 	model_ptr->sv_indices = NULL;
3063 
3064 	free(model_ptr->nSV);
3065 	model_ptr->nSV = NULL;
3066 }
3067 
svm_free_and_destroy_model(svm_model ** model_ptr_ptr)3068 void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
3069 {
3070 	if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
3071 	{
3072 		svm_free_model_content(*model_ptr_ptr);
3073 		free(*model_ptr_ptr);
3074 		*model_ptr_ptr = NULL;
3075 	}
3076 }
3077 
svm_destroy_param(svm_parameter * param)3078 void svm_destroy_param(svm_parameter* param)
3079 {
3080 	free(param->weight_label);
3081 	free(param->weight);
3082 }
3083 
svm_check_parameter(const svm_problem * prob,const svm_parameter * param)3084 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
3085 {
3086 	// svm_type
3087 
3088 	int svm_type = param->svm_type;
3089 	if(svm_type != C_SVC &&
3090 	   svm_type != NU_SVC &&
3091 	   svm_type != ONE_CLASS &&
3092 	   svm_type != EPSILON_SVR &&
3093 	   svm_type != NU_SVR)
3094 		return "unknown svm type";
3095 
3096 	// kernel_type, degree
3097 
3098 	int kernel_type = param->kernel_type;
3099 	if(kernel_type != LINEAR &&
3100 	   kernel_type != POLY &&
3101 	   kernel_type != RBF &&
3102 	   kernel_type != SIGMOID &&
3103 	   kernel_type != PRECOMPUTED)
3104 		return "unknown kernel type";
3105 
3106 	if(param->gamma < 0)
3107 		return "gamma < 0";
3108 
3109 	if(param->degree < 0)
3110 		return "degree of polynomial kernel < 0";
3111 
3112 	// cache_size,eps,C,nu,p,shrinking
3113 
3114 	if(param->cache_size <= 0)
3115 		return "cache_size <= 0";
3116 
3117 	if(param->eps <= 0)
3118 		return "eps <= 0";
3119 
3120 	if(svm_type == C_SVC ||
3121 	   svm_type == EPSILON_SVR ||
3122 	   svm_type == NU_SVR)
3123 		if(param->C <= 0)
3124 			return "C <= 0";
3125 
3126 	if(svm_type == NU_SVC ||
3127 	   svm_type == ONE_CLASS ||
3128 	   svm_type == NU_SVR)
3129 		if(param->nu <= 0 || param->nu > 1)
3130 			return "nu <= 0 or nu > 1";
3131 
3132 	if(svm_type == EPSILON_SVR)
3133 		if(param->p < 0)
3134 			return "p < 0";
3135 
3136 	if(param->shrinking != 0 &&
3137 	   param->shrinking != 1)
3138 		return "shrinking != 0 and shrinking != 1";
3139 
3140 	if(param->probability != 0 &&
3141 	   param->probability != 1)
3142 		return "probability != 0 and probability != 1";
3143 
3144 	if(param->probability == 1 &&
3145 	   svm_type == ONE_CLASS)
3146 		return "one-class SVM probability output not supported yet";
3147 
3148 
3149 	// check whether nu-svc is feasible
3150 
3151 	if(svm_type == NU_SVC)
3152 	{
3153 		int l = prob->l;
3154 		int max_nr_class = 16;
3155 		int nr_class = 0;
3156 		int *label = Malloc(int,max_nr_class);
3157 		int *count = Malloc(int,max_nr_class);
3158 
3159 		int i;
3160 		for(i=0;i<l;i++)
3161 		{
3162 			int this_label = (int)prob->y[i];
3163 			int j;
3164 			for(j=0;j<nr_class;j++)
3165 				if(this_label == label[j])
3166 				{
3167 					++count[j];
3168 					break;
3169 				}
3170 			if(j == nr_class)
3171 			{
3172 				if(nr_class == max_nr_class)
3173 				{
3174 					max_nr_class *= 2;
3175 					label = (int *)realloc(label,max_nr_class*sizeof(int));
3176 					count = (int *)realloc(count,max_nr_class*sizeof(int));
3177 				}
3178 				label[nr_class] = this_label;
3179 				count[nr_class] = 1;
3180 				++nr_class;
3181 			}
3182 		}
3183 
3184 		for(i=0;i<nr_class;i++)
3185 		{
3186 			int n1 = count[i];
3187 			for(int j=i+1;j<nr_class;j++)
3188 			{
3189 				int n2 = count[j];
3190 				if(param->nu*(n1+n2)/2 > min(n1,n2))
3191 				{
3192 					free(label);
3193 					free(count);
3194 					return "specified nu is infeasible";
3195 				}
3196 			}
3197 		}
3198 		free(label);
3199 		free(count);
3200 	}
3201 
3202 	return NULL;
3203 }
3204 
svm_check_probability_model(const svm_model * model)3205 int svm_check_probability_model(const svm_model *model)
3206 {
3207 	return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3208 		model->probA!=NULL && model->probB!=NULL) ||
3209 		((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
3210 		 model->probA!=NULL);
3211 }
3212 
svm_set_print_string_function(void (* print_func)(const char *))3213 void svm_set_print_string_function(void (*print_func)(const char *))
3214 {
3215 	if(print_func == NULL)
3216 		svm_print_string = &print_string_stdout;
3217 	else
3218 		svm_print_string = print_func;
3219 }
3220