1 #include <math.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <stdarg.h>
6 #include "linear.h"
7 #include "tron.h"
8 typedef signed char schar;
swap(T & x,T & y)9 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
10 #ifndef min
min(T x,T y)11 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
12 #endif
13 #ifndef max
max(T x,T y)14 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
15 #endif
clone(T * & dst,S * src,int n)16 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
17 {
18 	dst = new T[n];
19 	memcpy((void *)dst,(void *)src,sizeof(T)*n);
20 }
21 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
22 #define INF HUGE_VAL
23 
print_string_stdout(const char * s)24 static void print_string_stdout(const char *s)
25 {
26 	fputs(s,stdout);
27 	fflush(stdout);
28 }
29 
30 static void (*liblinear_print_string) (const char *) = &print_string_stdout;
31 
32 #if 1
info(const char * fmt,...)33 static void info(const char *fmt,...)
34 {
35 	char buf[BUFSIZ];
36 	va_list ap;
37 	va_start(ap,fmt);
38 	vsprintf(buf,fmt,ap);
39 	va_end(ap);
40 	(*liblinear_print_string)(buf);
41 }
42 #else
info(const char * fmt,...)43 static void info(const char *fmt,...) {}
44 #endif
45 
46 class l2r_lr_fun : public function
47 {
48 public:
49 	l2r_lr_fun(const problem *prob, double Cp, double Cn);
50 	~l2r_lr_fun();
51 
52 	double fun(double *w);
53 	void grad(double *w, double *g);
54 	void Hv(double *s, double *Hs);
55 
56 	int get_nr_variable(void);
57 
58 private:
59 	void Xv(double *v, double *Xv);
60 	void XTv(double *v, double *XTv);
61 
62 	double *C;
63 	double *z;
64 	double *D;
65 	const problem *prob;
66 };
67 
l2r_lr_fun(const problem * prob,double Cp,double Cn)68 l2r_lr_fun::l2r_lr_fun(const problem *prob, double Cp, double Cn)
69 {
70 	int i;
71 	int l=prob->l;
72 	int *y=prob->y;
73 
74 	this->prob = prob;
75 
76 	z = new double[l];
77 	D = new double[l];
78 	C = new double[l];
79 
80 	for (i=0; i<l; i++)
81 	{
82 		if (y[i] == 1)
83 			C[i] = Cp;
84 		else
85 			C[i] = Cn;
86 	}
87 }
88 
~l2r_lr_fun()89 l2r_lr_fun::~l2r_lr_fun()
90 {
91 	delete[] z;
92 	delete[] D;
93 	delete[] C;
94 }
95 
96 
fun(double * w)97 double l2r_lr_fun::fun(double *w)
98 {
99 	int i;
100 	double f=0;
101 	int *y=prob->y;
102 	int l=prob->l;
103 	int w_size=get_nr_variable();
104 
105 	Xv(w, z);
106 	for(i=0;i<l;i++)
107 	{
108 		double yz = y[i]*z[i];
109 		if (yz >= 0)
110 			f += C[i]*log(1 + exp(-yz));
111 		else
112 			f += C[i]*(-yz+log(1 + exp(yz)));
113 	}
114 	f = 2*f;
115 	for(i=0;i<w_size;i++)
116 		f += w[i]*w[i];
117 	f /= 2.0;
118 
119 	return(f);
120 }
121 
grad(double * w,double * g)122 void l2r_lr_fun::grad(double *w, double *g)
123 {
124 	int i;
125 	int *y=prob->y;
126 	int l=prob->l;
127 	int w_size=get_nr_variable();
128 
129 	for(i=0;i<l;i++)
130 	{
131 		z[i] = 1/(1 + exp(-y[i]*z[i]));
132 		D[i] = z[i]*(1-z[i]);
133 		z[i] = C[i]*(z[i]-1)*y[i];
134 	}
135 	XTv(z, g);
136 
137 	for(i=0;i<w_size;i++)
138 		g[i] = w[i] + g[i];
139 }
140 
get_nr_variable(void)141 int l2r_lr_fun::get_nr_variable(void)
142 {
143 	return prob->n;
144 }
145 
Hv(double * s,double * Hs)146 void l2r_lr_fun::Hv(double *s, double *Hs)
147 {
148 	int i;
149 	int l=prob->l;
150 	int w_size=get_nr_variable();
151 	double *wa = new double[l];
152 
153 	Xv(s, wa);
154 	for(i=0;i<l;i++)
155 		wa[i] = C[i]*D[i]*wa[i];
156 
157 	XTv(wa, Hs);
158 	for(i=0;i<w_size;i++)
159 		Hs[i] = s[i] + Hs[i];
160 	delete[] wa;
161 }
162 
Xv(double * v,double * Xv)163 void l2r_lr_fun::Xv(double *v, double *Xv)
164 {
165 	int i;
166 	int l=prob->l;
167 	feature_node **x=prob->x;
168 
169 	for(i=0;i<l;i++)
170 	{
171 		feature_node *s=x[i];
172 		Xv[i]=0;
173 		while(s->index!=-1)
174 		{
175 			Xv[i]+=v[s->index-1]*s->value;
176 			s++;
177 		}
178 	}
179 }
180 
XTv(double * v,double * XTv)181 void l2r_lr_fun::XTv(double *v, double *XTv)
182 {
183 	int i;
184 	int l=prob->l;
185 	int w_size=get_nr_variable();
186 	feature_node **x=prob->x;
187 
188 	for(i=0;i<w_size;i++)
189 		XTv[i]=0;
190 	for(i=0;i<l;i++)
191 	{
192 		feature_node *s=x[i];
193 		while(s->index!=-1)
194 		{
195 			XTv[s->index-1]+=v[i]*s->value;
196 			s++;
197 		}
198 	}
199 }
200 
201 class l2r_l2_svc_fun : public function
202 {
203 public:
204 	l2r_l2_svc_fun(const problem *prob, double Cp, double Cn);
205 	~l2r_l2_svc_fun();
206 
207 	double fun(double *w);
208 	void grad(double *w, double *g);
209 	void Hv(double *s, double *Hs);
210 
211 	int get_nr_variable(void);
212 
213 private:
214 	void Xv(double *v, double *Xv);
215 	void subXv(double *v, double *Xv);
216 	void subXTv(double *v, double *XTv);
217 
218 	double *C;
219 	double *z;
220 	double *D;
221 	int *I;
222 	int sizeI;
223 	const problem *prob;
224 };
225 
l2r_l2_svc_fun(const problem * prob,double Cp,double Cn)226 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double Cp, double Cn)
227 {
228 	int i;
229 	int l=prob->l;
230 	int *y=prob->y;
231 
232 	this->prob = prob;
233 
234 	z = new double[l];
235 	D = new double[l];
236 	C = new double[l];
237 	I = new int[l];
238 
239 	for (i=0; i<l; i++)
240 	{
241 		if (y[i] == 1)
242 			C[i] = Cp;
243 		else
244 			C[i] = Cn;
245 	}
246 }
247 
~l2r_l2_svc_fun()248 l2r_l2_svc_fun::~l2r_l2_svc_fun()
249 {
250 	delete[] z;
251 	delete[] D;
252 	delete[] C;
253 	delete[] I;
254 }
255 
fun(double * w)256 double l2r_l2_svc_fun::fun(double *w)
257 {
258 	int i;
259 	double f=0;
260 	int *y=prob->y;
261 	int l=prob->l;
262 	int w_size=get_nr_variable();
263 
264 	Xv(w, z);
265 	for(i=0;i<l;i++)
266 	{
267 		z[i] = y[i]*z[i];
268 		double d = 1-z[i];
269 		if (d > 0)
270 			f += C[i]*d*d;
271 	}
272 	f = 2*f;
273 	for(i=0;i<w_size;i++)
274 		f += w[i]*w[i];
275 	f /= 2.0;
276 
277 	return(f);
278 }
279 
grad(double * w,double * g)280 void l2r_l2_svc_fun::grad(double *w, double *g)
281 {
282 	int i;
283 	int *y=prob->y;
284 	int l=prob->l;
285 	int w_size=get_nr_variable();
286 
287 	sizeI = 0;
288 	for (i=0;i<l;i++)
289 		if (z[i] < 1)
290 		{
291 			z[sizeI] = C[i]*y[i]*(z[i]-1);
292 			I[sizeI] = i;
293 			sizeI++;
294 		}
295 	subXTv(z, g);
296 
297 	for(i=0;i<w_size;i++)
298 		g[i] = w[i] + 2*g[i];
299 }
300 
get_nr_variable(void)301 int l2r_l2_svc_fun::get_nr_variable(void)
302 {
303 	return prob->n;
304 }
305 
Hv(double * s,double * Hs)306 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
307 {
308 	int i;
309 	int l=prob->l;
310 	int w_size=get_nr_variable();
311 	double *wa = new double[l];
312 
313 	subXv(s, wa);
314 	for(i=0;i<sizeI;i++)
315 		wa[i] = C[I[i]]*wa[i];
316 
317 	subXTv(wa, Hs);
318 	for(i=0;i<w_size;i++)
319 		Hs[i] = s[i] + 2*Hs[i];
320 	delete[] wa;
321 }
322 
Xv(double * v,double * Xv)323 void l2r_l2_svc_fun::Xv(double *v, double *Xv)
324 {
325 	int i;
326 	int l=prob->l;
327 	feature_node **x=prob->x;
328 
329 	for(i=0;i<l;i++)
330 	{
331 		feature_node *s=x[i];
332 		Xv[i]=0;
333 		while(s->index!=-1)
334 		{
335 			Xv[i]+=v[s->index-1]*s->value;
336 			s++;
337 		}
338 	}
339 }
340 
subXv(double * v,double * Xv)341 void l2r_l2_svc_fun::subXv(double *v, double *Xv)
342 {
343 	int i;
344 	feature_node **x=prob->x;
345 
346 	for(i=0;i<sizeI;i++)
347 	{
348 		feature_node *s=x[I[i]];
349 		Xv[i]=0;
350 		while(s->index!=-1)
351 		{
352 			Xv[i]+=v[s->index-1]*s->value;
353 			s++;
354 		}
355 	}
356 }
357 
subXTv(double * v,double * XTv)358 void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
359 {
360 	int i;
361 	int w_size=get_nr_variable();
362 	feature_node **x=prob->x;
363 
364 	for(i=0;i<w_size;i++)
365 		XTv[i]=0;
366 	for(i=0;i<sizeI;i++)
367 	{
368 		feature_node *s=x[I[i]];
369 		while(s->index!=-1)
370 		{
371 			XTv[s->index-1]+=v[i]*s->value;
372 			s++;
373 		}
374 	}
375 }
376 
377 // A coordinate descent algorithm for
378 // multi-class support vector machines by Crammer and Singer
379 //
380 //  min_{\alpha}  0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
381 //    s.t.     \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
382 //
383 //  where e^m_i = 0 if y_i  = m,
384 //        e^m_i = 1 if y_i != m,
385 //  C^m_i = C if m  = y_i,
386 //  C^m_i = 0 if m != y_i,
387 //  and w_m(\alpha) = \sum_i \alpha^m_i x_i
388 //
389 // Given:
390 // x, y, C
391 // eps is the stopping tolerance
392 //
393 // solution will be put in w
394 //
395 // See Appendix of LIBLINEAR paper, Fan et al. (2008)
396 
397 #define GETI(i) (prob->y[i])
398 // To support weights for instances, use GETI(i) (i)
399 
400 class Solver_MCSVM_CS
401 {
402 	public:
403 		Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
404 		~Solver_MCSVM_CS();
405 		void Solve(double *w);
406 	private:
407 		void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
408 		bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
409 		double *B, *C, *G;
410 		int w_size, l;
411 		int nr_class;
412 		int max_iter;
413 		double eps;
414 		const problem *prob;
415 };
416 
Solver_MCSVM_CS(const problem * prob,int nr_class,double * weighted_C,double eps,int max_iter)417 Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
418 {
419 	this->w_size = prob->n;
420 	this->l = prob->l;
421 	this->nr_class = nr_class;
422 	this->eps = eps;
423 	this->max_iter = max_iter;
424 	this->prob = prob;
425 	this->B = new double[nr_class];
426 	this->G = new double[nr_class];
427 	this->C = weighted_C;
428 }
429 
~Solver_MCSVM_CS()430 Solver_MCSVM_CS::~Solver_MCSVM_CS()
431 {
432 	delete[] B;
433 	delete[] G;
434 }
435 
compare_double(const void * a,const void * b)436 int compare_double(const void *a, const void *b)
437 {
438 	if(*(double *)a > *(double *)b)
439 		return -1;
440 	if(*(double *)a < *(double *)b)
441 		return 1;
442 	return 0;
443 }
444 
solve_sub_problem(double A_i,int yi,double C_yi,int active_i,double * alpha_new)445 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
446 {
447 	int r;
448 	double *D;
449 
450 	clone(D, B, active_i);
451 	if(yi < active_i)
452 		D[yi] += A_i*C_yi;
453 	qsort(D, active_i, sizeof(double), compare_double);
454 
455 	double beta = D[0] - A_i*C_yi;
456 	for(r=1;r<active_i && beta<r*D[r];r++)
457 		beta += D[r];
458 
459 	beta /= r;
460 	for(r=0;r<active_i;r++)
461 	{
462 		if(r == yi)
463 			alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
464 		else
465 			alpha_new[r] = min((double)0, (beta - B[r])/A_i);
466 	}
467 	delete[] D;
468 }
469 
be_shrunk(int i,int m,int yi,double alpha_i,double minG)470 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
471 {
472 	double bound = 0;
473 	if(m == yi)
474 		bound = C[GETI(i)];
475 	if(alpha_i == bound && G[m] < minG)
476 		return true;
477 	return false;
478 }
479 
Solve(double * w)480 void Solver_MCSVM_CS::Solve(double *w)
481 {
482 	int i, m, s;
483 	int iter = 0;
484 	double *alpha =  new double[l*nr_class];
485 	double *alpha_new = new double[nr_class];
486 	int *index = new int[l];
487 	double *QD = new double[l];
488 	int *d_ind = new int[nr_class];
489 	double *d_val = new double[nr_class];
490 	int *alpha_index = new int[nr_class*l];
491 	int *y_index = new int[l];
492 	int active_size = l;
493 	int *active_size_i = new int[l];
494 	double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
495 	bool start_from_all = true;
496 	// initial
497 	for(i=0;i<l*nr_class;i++)
498 		alpha[i] = 0;
499 	for(i=0;i<w_size*nr_class;i++)
500 		w[i] = 0;
501 	for(i=0;i<l;i++)
502 	{
503 		for(m=0;m<nr_class;m++)
504 			alpha_index[i*nr_class+m] = m;
505 		feature_node *xi = prob->x[i];
506 		QD[i] = 0;
507 		while(xi->index != -1)
508 		{
509 			QD[i] += (xi->value)*(xi->value);
510 			xi++;
511 		}
512 		active_size_i[i] = nr_class;
513 		y_index[i] = prob->y[i];
514 		index[i] = i;
515 	}
516 
517 	while(iter < max_iter)
518 	{
519 		double stopping = -INF;
520 		for(i=0;i<active_size;i++)
521 		{
522 			int j = i+rand()%(active_size-i);
523 			swap(index[i], index[j]);
524 		}
525 		for(s=0;s<active_size;s++)
526 		{
527 			i = index[s];
528 			double Ai = QD[i];
529 			double *alpha_i = &alpha[i*nr_class];
530 			int *alpha_index_i = &alpha_index[i*nr_class];
531 
532 			if(Ai > 0)
533 			{
534 				for(m=0;m<active_size_i[i];m++)
535 					G[m] = 1;
536 				if(y_index[i] < active_size_i[i])
537 					G[y_index[i]] = 0;
538 
539 				feature_node *xi = prob->x[i];
540 				while(xi->index!= -1)
541 				{
542 					double *w_i = &w[(xi->index-1)*nr_class];
543 					for(m=0;m<active_size_i[i];m++)
544 						G[m] += w_i[alpha_index_i[m]]*(xi->value);
545 					xi++;
546 				}
547 
548 				double minG = INF;
549 				double maxG = -INF;
550 				for(m=0;m<active_size_i[i];m++)
551 				{
552 					if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
553 						minG = G[m];
554 					if(G[m] > maxG)
555 						maxG = G[m];
556 				}
557 				if(y_index[i] < active_size_i[i])
558 					if(alpha_i[prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
559 						minG = G[y_index[i]];
560 
561 				for(m=0;m<active_size_i[i];m++)
562 				{
563 					if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
564 					{
565 						active_size_i[i]--;
566 						while(active_size_i[i]>m)
567 						{
568 							if(!be_shrunk(i, active_size_i[i], y_index[i],
569 											alpha_i[alpha_index_i[active_size_i[i]]], minG))
570 							{
571 								swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
572 								swap(G[m], G[active_size_i[i]]);
573 								if(y_index[i] == active_size_i[i])
574 									y_index[i] = m;
575 								else if(y_index[i] == m)
576 									y_index[i] = active_size_i[i];
577 								break;
578 							}
579 							active_size_i[i]--;
580 						}
581 					}
582 				}
583 
584 				if(active_size_i[i] <= 1)
585 				{
586 					active_size--;
587 					swap(index[s], index[active_size]);
588 					s--;
589 					continue;
590 				}
591 
592 				if(maxG-minG <= 1e-12)
593 					continue;
594 				else
595 					stopping = max(maxG - minG, stopping);
596 
597 				for(m=0;m<active_size_i[i];m++)
598 					B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
599 
600 				solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
601 				int nz_d = 0;
602 				for(m=0;m<active_size_i[i];m++)
603 				{
604 					double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
605 					alpha_i[alpha_index_i[m]] = alpha_new[m];
606 					if(fabs(d) >= 1e-12)
607 					{
608 						d_ind[nz_d] = alpha_index_i[m];
609 						d_val[nz_d] = d;
610 						nz_d++;
611 					}
612 				}
613 
614 				xi = prob->x[i];
615 				while(xi->index != -1)
616 				{
617 					double *w_i = &w[(xi->index-1)*nr_class];
618 					for(m=0;m<nz_d;m++)
619 						w_i[d_ind[m]] += d_val[m]*xi->value;
620 					xi++;
621 				}
622 			}
623 		}
624 
625 		iter++;
626 		if(iter % 10 == 0)
627 		{
628 			info(".");
629 		}
630 
631 		if(stopping < eps_shrink)
632 		{
633 			if(stopping < eps && start_from_all == true)
634 				break;
635 			else
636 			{
637 				active_size = l;
638 				for(i=0;i<l;i++)
639 					active_size_i[i] = nr_class;
640 				info("*");
641 				eps_shrink = max(eps_shrink/2, eps);
642 				start_from_all = true;
643 			}
644 		}
645 		else
646 			start_from_all = false;
647 	}
648 
649 	info("\noptimization finished, #iter = %d\n",iter);
650 	if (iter >= max_iter)
651 		info("\nWARNING: reaching max number of iterations\n");
652 
653 	// calculate objective value
654 	double v = 0;
655 	int nSV = 0;
656 	for(i=0;i<w_size*nr_class;i++)
657 		v += w[i]*w[i];
658 	v = 0.5*v;
659 	for(i=0;i<l*nr_class;i++)
660 	{
661 		v += alpha[i];
662 		if(fabs(alpha[i]) > 0)
663 			nSV++;
664 	}
665 	for(i=0;i<l;i++)
666 		v -= alpha[i*nr_class+prob->y[i]];
667 	info("Objective value = %lf\n",v);
668 	info("nSV = %d\n",nSV);
669 
670 	delete [] alpha;
671 	delete [] alpha_new;
672 	delete [] index;
673 	delete [] QD;
674 	delete [] d_ind;
675 	delete [] d_val;
676 	delete [] alpha_index;
677 	delete [] y_index;
678 	delete [] active_size_i;
679 }
680 
681 // A coordinate descent algorithm for
682 // L1-loss and L2-loss SVM dual problems
683 //
684 //  min_\alpha  0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
685 //    s.t.      0 <= alpha_i <= upper_bound_i,
686 //
687 //  where Qij = yi yj xi^T xj and
688 //  D is a diagonal matrix
689 //
690 // In L1-SVM case:
691 // 		upper_bound_i = Cp if y_i = 1
692 // 		upper_bound_i = Cn if y_i = -1
693 // 		D_ii = 0
694 // In L2-SVM case:
695 // 		upper_bound_i = INF
696 // 		D_ii = 1/(2*Cp)	if y_i = 1
697 // 		D_ii = 1/(2*Cn)	if y_i = -1
698 //
699 // Given:
700 // x, y, Cp, Cn
701 // eps is the stopping tolerance
702 //
703 // solution will be put in w
704 //
705 // See Algorithm 3 of Hsieh et al., ICML 2008
706 
707 #undef GETI
708 #define GETI(i) (y[i]+1)
709 // To support weights for instances, use GETI(i) (i)
710 
solve_l2r_l1l2_svc(const problem * prob,double * w,double eps,double Cp,double Cn,int solver_type)711 static void solve_l2r_l1l2_svc(
712 	const problem *prob, double *w, double eps,
713 	double Cp, double Cn, int solver_type)
714 {
715 	int l = prob->l;
716 	int w_size = prob->n;
717 	int i, s, iter = 0;
718 	double C, d, G;
719 	double *QD = new double[l];
720 	int max_iter = 1000;
721 	int *index = new int[l];
722 	double *alpha = new double[l];
723 	schar *y = new schar[l];
724 	int active_size = l;
725 
726 	// PG: projected gradient, for shrinking and stopping
727 	double PG;
728 	double PGmax_old = INF;
729 	double PGmin_old = -INF;
730 	double PGmax_new, PGmin_new;
731 
732 	// default solver_type: L2R_L2LOSS_SVC_DUAL
733 	double diag[3] = {0.5/Cn, 0, 0.5/Cp};
734 	double upper_bound[3] = {INF, 0, INF};
735 	if(solver_type == L2R_L1LOSS_SVC_DUAL)
736 	{
737 		diag[0] = 0;
738 		diag[2] = 0;
739 		upper_bound[0] = Cn;
740 		upper_bound[2] = Cp;
741 	}
742 
743 	for(i=0; i<w_size; i++)
744 		w[i] = 0;
745 	for(i=0; i<l; i++)
746 	{
747 		alpha[i] = 0;
748 		if(prob->y[i] > 0)
749 		{
750 			y[i] = +1;
751 		}
752 		else
753 		{
754 			y[i] = -1;
755 		}
756 		QD[i] = diag[GETI(i)];
757 
758 		feature_node *xi = prob->x[i];
759 		while (xi->index != -1)
760 		{
761 			QD[i] += (xi->value)*(xi->value);
762 			xi++;
763 		}
764 		index[i] = i;
765 	}
766 
767 	while (iter < max_iter)
768 	{
769 		PGmax_new = -INF;
770 		PGmin_new = INF;
771 
772 		for (i=0; i<active_size; i++)
773 		{
774 			int j = i+rand()%(active_size-i);
775 			swap(index[i], index[j]);
776 		}
777 
778 		for (s=0; s<active_size; s++)
779 		{
780 			i = index[s];
781 			G = 0;
782 			schar yi = y[i];
783 
784 			feature_node *xi = prob->x[i];
785 			while(xi->index!= -1)
786 			{
787 				G += w[xi->index-1]*(xi->value);
788 				xi++;
789 			}
790 			G = G*yi-1;
791 
792 			C = upper_bound[GETI(i)];
793 			G += alpha[i]*diag[GETI(i)];
794 
795 			PG = 0;
796 			if (alpha[i] == 0)
797 			{
798 				if (G > PGmax_old)
799 				{
800 					active_size--;
801 					swap(index[s], index[active_size]);
802 					s--;
803 					continue;
804 				}
805 				else if (G < 0)
806 					PG = G;
807 			}
808 			else if (alpha[i] == C)
809 			{
810 				if (G < PGmin_old)
811 				{
812 					active_size--;
813 					swap(index[s], index[active_size]);
814 					s--;
815 					continue;
816 				}
817 				else if (G > 0)
818 					PG = G;
819 			}
820 			else
821 				PG = G;
822 
823 			PGmax_new = max(PGmax_new, PG);
824 			PGmin_new = min(PGmin_new, PG);
825 
826 			if(fabs(PG) > 1.0e-12)
827 			{
828 				double alpha_old = alpha[i];
829 				alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
830 				d = (alpha[i] - alpha_old)*yi;
831 				xi = prob->x[i];
832 				while (xi->index != -1)
833 				{
834 					w[xi->index-1] += d*xi->value;
835 					xi++;
836 				}
837 			}
838 		}
839 
840 		iter++;
841 		if(iter % 10 == 0)
842 			info(".");
843 
844 		if(PGmax_new - PGmin_new <= eps)
845 		{
846 			if(active_size == l)
847 				break;
848 			else
849 			{
850 				active_size = l;
851 				info("*");
852 				PGmax_old = INF;
853 				PGmin_old = -INF;
854 				continue;
855 			}
856 		}
857 		PGmax_old = PGmax_new;
858 		PGmin_old = PGmin_new;
859 		if (PGmax_old <= 0)
860 			PGmax_old = INF;
861 		if (PGmin_old >= 0)
862 			PGmin_old = -INF;
863 	}
864 
865 	info("\noptimization finished, #iter = %d\n",iter);
866 	if (iter >= max_iter)
867 		info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
868 
869 	// calculate objective value
870 
871 	double v = 0;
872 	int nSV = 0;
873 	for(i=0; i<w_size; i++)
874 		v += w[i]*w[i];
875 	for(i=0; i<l; i++)
876 	{
877 		v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
878 		if(alpha[i] > 0)
879 			++nSV;
880 	}
881 	info("Objective value = %lf\n",v/2);
882 	info("nSV = %d\n",nSV);
883 
884 	delete [] QD;
885 	delete [] alpha;
886 	delete [] y;
887 	delete [] index;
888 }
889 
890 // A coordinate descent algorithm for
891 // the dual of L2-regularized logistic regression problems
892 //
893 //  min_\alpha  0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - alpha_i) log (upper_bound_i - alpha_i) ,
894 //    s.t.      0 <= alpha_i <= upper_bound_i,
895 //
896 //  where Qij = yi yj xi^T xj and
897 //  upper_bound_i = Cp if y_i = 1
898 //  upper_bound_i = Cn if y_i = -1
899 //
900 // Given:
901 // x, y, Cp, Cn
902 // eps is the stopping tolerance
903 //
904 // solution will be put in w
905 //
906 // See Algorithm 5 of Yu et al., MLJ 2010
907 
908 #undef GETI
909 #define GETI(i) (y[i]+1)
910 // To support weights for instances, use GETI(i) (i)
911 
solve_l2r_lr_dual(const problem * prob,double * w,double eps,double Cp,double Cn)912 void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
913 {
914 	int l = prob->l;
915 	int w_size = prob->n;
916 	int i, s, iter = 0;
917 	double *xTx = new double[l];
918 	int max_iter = 1000;
919 	int *index = new int[l];
920 	double *alpha = new double[2*l]; // store alpha and C - alpha
921 	schar *y = new schar[l];
922 	int max_inner_iter = 100; // for inner Newton
923 	double innereps = 1e-2;
924 	double innereps_min = min(1e-8, eps);
925 	double upper_bound[3] = {Cn, 0, Cp};
926 
927 	for(i=0; i<w_size; i++)
928 		w[i] = 0;
929 	for(i=0; i<l; i++)
930 	{
931 		if(prob->y[i] > 0)
932 		{
933 			y[i] = +1;
934 		}
935 		else
936 		{
937 			y[i] = -1;
938 		}
939 		alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
940 		alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
941 
942 		xTx[i] = 0;
943 		feature_node *xi = prob->x[i];
944 		while (xi->index != -1)
945 		{
946 			xTx[i] += (xi->value)*(xi->value);
947 			w[xi->index-1] += y[i]*alpha[2*i]*xi->value;
948 			xi++;
949 		}
950 		index[i] = i;
951 	}
952 
953 	while (iter < max_iter)
954 	{
955 		for (i=0; i<l; i++)
956 		{
957 			int j = i+rand()%(l-i);
958 			swap(index[i], index[j]);
959 		}
960 		int newton_iter = 0;
961 		double Gmax = 0;
962 		for (s=0; s<l; s++)
963 		{
964 			i = index[s];
965 			schar yi = y[i];
966 			double C = upper_bound[GETI(i)];
967 			double ywTx = 0, xisq = xTx[i];
968 			feature_node *xi = prob->x[i];
969 			while (xi->index != -1)
970 			{
971 				ywTx += w[xi->index-1]*xi->value;
972 				xi++;
973 			}
974 			ywTx *= y[i];
975 			double a = xisq, b = ywTx;
976 
977 			// Decide to minimize g_1(z) or g_2(z)
978 			int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
979 			if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
980 			{
981 				ind1 = 2*i+1;
982 				ind2 = 2*i;
983 				sign = -1;
984 			}
985 
986 			//  g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
987 			double alpha_old = alpha[ind1];
988 			double z = alpha_old;
989 			if(C - z < 0.5 * C)
990 				z = 0.1*z;
991 			double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
992 			Gmax = max(Gmax, fabs(gp));
993 
994 			// Newton method on the sub-problem
995 			const double eta = 0.1; // xi in the paper
996 			int inner_iter = 0;
997 			while (inner_iter <= max_inner_iter)
998 			{
999 				if(fabs(gp) < innereps)
1000 					break;
1001 				double gpp = a + C/(C-z)/z;
1002 				double tmpz = z - gp/gpp;
1003 				if(tmpz <= 0)
1004 					z *= eta;
1005 				else // tmpz in (0, C)
1006 					z = tmpz;
1007 				gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1008 				newton_iter++;
1009 				inner_iter++;
1010 			}
1011 
1012 			if(inner_iter > 0) // update w
1013 			{
1014 				alpha[ind1] = z;
1015 				alpha[ind2] = C-z;
1016 				xi = prob->x[i];
1017 				while (xi->index != -1)
1018 				{
1019 					w[xi->index-1] += sign*(z-alpha_old)*yi*xi->value;
1020 					xi++;
1021 				}
1022 			}
1023 		}
1024 
1025 		iter++;
1026 		if(iter % 10 == 0)
1027 			info(".");
1028 
1029 		if(Gmax < eps)
1030 			break;
1031 
1032 		if(newton_iter <= l/10)
1033 			innereps = max(innereps_min, 0.1*innereps);
1034 
1035 	}
1036 
1037 	info("\noptimization finished, #iter = %d\n",iter);
1038 	if (iter >= max_iter)
1039 		info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
1040 
1041 	// calculate objective value
1042 
1043 	double v = 0;
1044 	for(i=0; i<w_size; i++)
1045 		v += w[i] * w[i];
1046 	v *= 0.5;
1047 	for(i=0; i<l; i++)
1048 		v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1049 			- upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1050 	info("Objective value = %lf\n", v);
1051 
1052 	delete [] xTx;
1053 	delete [] alpha;
1054 	delete [] y;
1055 	delete [] index;
1056 }
1057 
1058 // A coordinate descent algorithm for
1059 // L1-regularized L2-loss support vector classification
1060 //
1061 //  min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
1062 //
1063 // Given:
1064 // x, y, Cp, Cn
1065 // eps is the stopping tolerance
1066 //
1067 // solution will be put in w
1068 //
1069 // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1070 
1071 #undef GETI
1072 #define GETI(i) (y[i]+1)
1073 // To support weights for instances, use GETI(i) (i)
1074 
solve_l1r_l2_svc(problem * prob_col,double * w,double eps,double Cp,double Cn)1075 static void solve_l1r_l2_svc(
1076 	problem *prob_col, double *w, double eps,
1077 	double Cp, double Cn)
1078 {
1079 	int l = prob_col->l;
1080 	int w_size = prob_col->n;
1081 	int j, s, iter = 0;
1082 	int max_iter = 1000;
1083 	int active_size = w_size;
1084 	int max_num_linesearch = 20;
1085 
1086 	double sigma = 0.01;
1087 	double d, G_loss, G, H;
1088 	double Gmax_old = INF;
1089 	double Gmax_new, Gnorm1_new;
1090 	double Gnorm1_init;
1091 	double d_old, d_diff;
1092 	double loss_old, loss_new;
1093 	double appxcond, cond;
1094 
1095 	int *index = new int[w_size];
1096 	schar *y = new schar[l];
1097 	double *b = new double[l]; // b = 1-ywTx
1098 	double *xj_sq = new double[w_size];
1099 	feature_node *x;
1100 
1101 	double C[3] = {Cn,0,Cp};
1102 
1103 	for(j=0; j<l; j++)
1104 	{
1105 		b[j] = 1;
1106 		if(prob_col->y[j] > 0)
1107 			y[j] = 1;
1108 		else
1109 			y[j] = -1;
1110 	}
1111 	for(j=0; j<w_size; j++)
1112 	{
1113 		w[j] = 0;
1114 		index[j] = j;
1115 		xj_sq[j] = 0;
1116 		x = prob_col->x[j];
1117 		while(x->index != -1)
1118 		{
1119 			int ind = x->index-1;
1120 			double val = x->value;
1121 			x->value *= y[ind]; // x->value stores yi*xij
1122 			xj_sq[j] += C[GETI(ind)]*val*val;
1123 			x++;
1124 		}
1125 	}
1126 
1127 	while(iter < max_iter)
1128 	{
1129 		Gmax_new = 0;
1130 		Gnorm1_new = 0;
1131 
1132 		for(j=0; j<active_size; j++)
1133 		{
1134 			int i = j+rand()%(active_size-j);
1135 			swap(index[i], index[j]);
1136 		}
1137 
1138 		for(s=0; s<active_size; s++)
1139 		{
1140 			j = index[s];
1141 			G_loss = 0;
1142 			H = 0;
1143 
1144 			x = prob_col->x[j];
1145 			while(x->index != -1)
1146 			{
1147 				int ind = x->index-1;
1148 				if(b[ind] > 0)
1149 				{
1150 					double val = x->value;
1151 					double tmp = C[GETI(ind)]*val;
1152 					G_loss -= tmp*b[ind];
1153 					H += tmp*val;
1154 				}
1155 				x++;
1156 			}
1157 			G_loss *= 2;
1158 
1159 			G = G_loss;
1160 			H *= 2;
1161 			H = max(H, 1e-12);
1162 
1163 			double Gp = G+1;
1164 			double Gn = G-1;
1165 			double violation = 0;
1166 			if(w[j] == 0)
1167 			{
1168 				if(Gp < 0)
1169 					violation = -Gp;
1170 				else if(Gn > 0)
1171 					violation = Gn;
1172 				else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1173 				{
1174 					active_size--;
1175 					swap(index[s], index[active_size]);
1176 					s--;
1177 					continue;
1178 				}
1179 			}
1180 			else if(w[j] > 0)
1181 				violation = fabs(Gp);
1182 			else
1183 				violation = fabs(Gn);
1184 
1185 			Gmax_new = max(Gmax_new, violation);
1186 			Gnorm1_new += violation;
1187 
1188 			// obtain Newton direction d
1189 			if(Gp <= H*w[j])
1190 				d = -Gp/H;
1191 			else if(Gn >= H*w[j])
1192 				d = -Gn/H;
1193 			else
1194 				d = -w[j];
1195 
1196 			if(fabs(d) < 1.0e-12)
1197 				continue;
1198 
1199 			double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1200 			d_old = 0;
1201 			int num_linesearch;
1202 			for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1203 			{
1204 				d_diff = d_old - d;
1205 				cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1206 
1207 				appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1208 				if(appxcond <= 0)
1209 				{
1210 					x = prob_col->x[j];
1211 					while(x->index != -1)
1212 					{
1213 						b[x->index-1] += d_diff*x->value;
1214 						x++;
1215 					}
1216 					break;
1217 				}
1218 
1219 				if(num_linesearch == 0)
1220 				{
1221 					loss_old = 0;
1222 					loss_new = 0;
1223 					x = prob_col->x[j];
1224 					while(x->index != -1)
1225 					{
1226 						int ind = x->index-1;
1227 						if(b[ind] > 0)
1228 							loss_old += C[GETI(ind)]*b[ind]*b[ind];
1229 						double b_new = b[ind] + d_diff*x->value;
1230 						b[ind] = b_new;
1231 						if(b_new > 0)
1232 							loss_new += C[GETI(ind)]*b_new*b_new;
1233 						x++;
1234 					}
1235 				}
1236 				else
1237 				{
1238 					loss_new = 0;
1239 					x = prob_col->x[j];
1240 					while(x->index != -1)
1241 					{
1242 						int ind = x->index-1;
1243 						double b_new = b[ind] + d_diff*x->value;
1244 						b[ind] = b_new;
1245 						if(b_new > 0)
1246 							loss_new += C[GETI(ind)]*b_new*b_new;
1247 						x++;
1248 					}
1249 				}
1250 
1251 				cond = cond + loss_new - loss_old;
1252 				if(cond <= 0)
1253 					break;
1254 				else
1255 				{
1256 					d_old = d;
1257 					d *= 0.5;
1258 					delta *= 0.5;
1259 				}
1260 			}
1261 
1262 			w[j] += d;
1263 
1264 			// recompute b[] if line search takes too many steps
1265 			if(num_linesearch >= max_num_linesearch)
1266 			{
1267 				info("#");
1268 				for(int i=0; i<l; i++)
1269 					b[i] = 1;
1270 
1271 				for(int i=0; i<w_size; i++)
1272 				{
1273 					if(w[i]==0) continue;
1274 					x = prob_col->x[i];
1275 					while(x->index != -1)
1276 					{
1277 						b[x->index-1] -= w[i]*x->value;
1278 						x++;
1279 					}
1280 				}
1281 			}
1282 		}
1283 
1284 		if(iter == 0)
1285 			Gnorm1_init = Gnorm1_new;
1286 		iter++;
1287 		if(iter % 10 == 0)
1288 			info(".");
1289 
1290 		if(Gnorm1_new <= eps*Gnorm1_init)
1291 		{
1292 			if(active_size == w_size)
1293 				break;
1294 			else
1295 			{
1296 				active_size = w_size;
1297 				info("*");
1298 				Gmax_old = INF;
1299 				continue;
1300 			}
1301 		}
1302 
1303 		Gmax_old = Gmax_new;
1304 	}
1305 
1306 	info("\noptimization finished, #iter = %d\n", iter);
1307 	if(iter >= max_iter)
1308 		info("\nWARNING: reaching max number of iterations\n");
1309 
1310 	// calculate objective value
1311 
1312 	double v = 0;
1313 	int nnz = 0;
1314 	for(j=0; j<w_size; j++)
1315 	{
1316 		x = prob_col->x[j];
1317 		while(x->index != -1)
1318 		{
1319 			x->value *= prob_col->y[x->index-1]; // restore x->value
1320 			x++;
1321 		}
1322 		if(w[j] != 0)
1323 		{
1324 			v += fabs(w[j]);
1325 			nnz++;
1326 		}
1327 	}
1328 	for(j=0; j<l; j++)
1329 		if(b[j] > 0)
1330 			v += C[GETI(j)]*b[j]*b[j];
1331 
1332 	info("Objective value = %lf\n", v);
1333 	info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1334 
1335 	delete [] index;
1336 	delete [] y;
1337 	delete [] b;
1338 	delete [] xj_sq;
1339 }
1340 
1341 // A coordinate descent algorithm for
1342 // L1-regularized logistic regression problems
1343 //
1344 //  min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1345 //
1346 // Given:
1347 // x, y, Cp, Cn
1348 // eps is the stopping tolerance
1349 //
1350 // solution will be put in w
1351 //
1352 // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1353 
1354 #undef GETI
1355 #define GETI(i) (y[i]+1)
1356 // To support weights for instances, use GETI(i) (i)
1357 
solve_l1r_lr(const problem * prob_col,double * w,double eps,double Cp,double Cn)1358 static void solve_l1r_lr(
1359 	const problem *prob_col, double *w, double eps,
1360 	double Cp, double Cn)
1361 {
1362 	int l = prob_col->l;
1363 	int w_size = prob_col->n;
1364 	int j, s, newton_iter=0, iter=0;
1365 	int max_newton_iter = 100;
1366 	int max_iter = 1000;
1367 	int max_num_linesearch = 20;
1368 	int active_size;
1369 	int QP_active_size;
1370 
1371 	double nu = 1e-12;
1372 	double inner_eps = 1;
1373 	double sigma = 0.01;
1374 	double w_norm=0, w_norm_new;
1375 	double z, G, H;
1376 	double Gnorm1_init;
1377 	double Gmax_old = INF;
1378 	double Gmax_new, Gnorm1_new;
1379 	double QP_Gmax_old = INF;
1380 	double QP_Gmax_new, QP_Gnorm1_new;
1381 	double delta, negsum_xTd, cond;
1382 
1383 	int *index = new int[w_size];
1384 	schar *y = new schar[l];
1385 	double *Hdiag = new double[w_size];
1386 	double *Grad = new double[w_size];
1387 	double *wpd = new double[w_size];
1388 	double *xjneg_sum = new double[w_size];
1389 	double *xTd = new double[l];
1390 	double *exp_wTx = new double[l];
1391 	double *exp_wTx_new = new double[l];
1392 	double *tau = new double[l];
1393 	double *D = new double[l];
1394 	feature_node *x;
1395 
1396 	double C[3] = {Cn,0,Cp};
1397 
1398 	for(j=0; j<l; j++)
1399 	{
1400 		if(prob_col->y[j] > 0)
1401 			y[j] = 1;
1402 		else
1403 			y[j] = -1;
1404 
1405 		// assume initial w is 0
1406 		exp_wTx[j] = 1;
1407 		tau[j] = C[GETI(j)]*0.5;
1408 		D[j] = C[GETI(j)]*0.25;
1409 	}
1410 	for(j=0; j<w_size; j++)
1411 	{
1412 		w[j] = 0;
1413 		wpd[j] = w[j];
1414 		index[j] = j;
1415 		xjneg_sum[j] = 0;
1416 		x = prob_col->x[j];
1417 		while(x->index != -1)
1418 		{
1419 			int ind = x->index-1;
1420 			if(y[ind] == -1)
1421 				xjneg_sum[j] += C[GETI(ind)]*x->value;
1422 			x++;
1423 		}
1424 	}
1425 
1426 	while(newton_iter < max_newton_iter)
1427 	{
1428 		Gmax_new = 0;
1429 		Gnorm1_new = 0;
1430 		active_size = w_size;
1431 
1432 		for(s=0; s<active_size; s++)
1433 		{
1434 			j = index[s];
1435 			Hdiag[j] = nu;
1436 			Grad[j] = 0;
1437 
1438 			double tmp = 0;
1439 			x = prob_col->x[j];
1440 			while(x->index != -1)
1441 			{
1442 				int ind = x->index-1;
1443 				Hdiag[j] += x->value*x->value*D[ind];
1444 				tmp += x->value*tau[ind];
1445 				x++;
1446 			}
1447 			Grad[j] = -tmp + xjneg_sum[j];
1448 
1449 			double Gp = Grad[j]+1;
1450 			double Gn = Grad[j]-1;
1451 			double violation = 0;
1452 			if(w[j] == 0)
1453 			{
1454 				if(Gp < 0)
1455 					violation = -Gp;
1456 				else if(Gn > 0)
1457 					violation = Gn;
1458 				//outer-level shrinking
1459 				else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1460 				{
1461 					active_size--;
1462 					swap(index[s], index[active_size]);
1463 					s--;
1464 					continue;
1465 				}
1466 			}
1467 			else if(w[j] > 0)
1468 				violation = fabs(Gp);
1469 			else
1470 				violation = fabs(Gn);
1471 
1472 			Gmax_new = max(Gmax_new, violation);
1473 			Gnorm1_new += violation;
1474 		}
1475 
1476 		if(newton_iter == 0)
1477 			Gnorm1_init = Gnorm1_new;
1478 
1479 		if(Gnorm1_new <= eps*Gnorm1_init)
1480 			break;
1481 
1482 		iter = 0;
1483 		QP_Gmax_old = INF;
1484 		QP_active_size = active_size;
1485 
1486 		for(int i=0; i<l; i++)
1487 			xTd[i] = 0;
1488 
1489 		// optimize QP over wpd
1490 		while(iter < max_iter)
1491 		{
1492 			QP_Gmax_new = 0;
1493 			QP_Gnorm1_new = 0;
1494 
1495 			for(j=0; j<QP_active_size; j++)
1496 			{
1497 				int i = j+rand()%(QP_active_size-j);
1498 				swap(index[i], index[j]);
1499 			}
1500 
1501 			for(s=0; s<QP_active_size; s++)
1502 			{
1503 				j = index[s];
1504 				H = Hdiag[j];
1505 
1506 				x = prob_col->x[j];
1507 				G = Grad[j] + (wpd[j]-w[j])*nu;
1508 				while(x->index != -1)
1509 				{
1510 					int ind = x->index-1;
1511 					G += x->value*D[ind]*xTd[ind];
1512 					x++;
1513 				}
1514 
1515 				double Gp = G+1;
1516 				double Gn = G-1;
1517 				double violation = 0;
1518 				if(wpd[j] == 0)
1519 				{
1520 					if(Gp < 0)
1521 						violation = -Gp;
1522 					else if(Gn > 0)
1523 						violation = Gn;
1524 					//inner-level shrinking
1525 					else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1526 					{
1527 						QP_active_size--;
1528 						swap(index[s], index[QP_active_size]);
1529 						s--;
1530 						continue;
1531 					}
1532 				}
1533 				else if(wpd[j] > 0)
1534 					violation = fabs(Gp);
1535 				else
1536 					violation = fabs(Gn);
1537 
1538 				QP_Gmax_new = max(QP_Gmax_new, violation);
1539 				QP_Gnorm1_new += violation;
1540 
1541 				// obtain solution of one-variable problem
1542 				if(Gp <= H*wpd[j])
1543 					z = -Gp/H;
1544 				else if(Gn >= H*wpd[j])
1545 					z = -Gn/H;
1546 				else
1547 					z = -wpd[j];
1548 
1549 				if(fabs(z) < 1.0e-12)
1550 					continue;
1551 				z = min(max(z,-10.0),10.0);
1552 
1553 				wpd[j] += z;
1554 
1555 				x = prob_col->x[j];
1556 				while(x->index != -1)
1557 				{
1558 					int ind = x->index-1;
1559 					xTd[ind] += x->value*z;
1560 					x++;
1561 				}
1562 			}
1563 
1564 			iter++;
1565 
1566 			if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
1567 			{
1568 				//inner stopping
1569 				if(QP_active_size == active_size)
1570 					break;
1571 				//active set reactivation
1572 				else
1573 				{
1574 					QP_active_size = active_size;
1575 					QP_Gmax_old = INF;
1576 					continue;
1577 				}
1578 			}
1579 
1580 			QP_Gmax_old = QP_Gmax_new;
1581 		}
1582 
1583 		if(iter >= max_iter)
1584 			info("WARNING: reaching max number of inner iterations\n");
1585 
1586 		delta = 0;
1587 		w_norm_new = 0;
1588 		for(j=0; j<w_size; j++)
1589 		{
1590 			delta += Grad[j]*(wpd[j]-w[j]);
1591 			if(wpd[j] != 0)
1592 				w_norm_new += fabs(wpd[j]);
1593 		}
1594 		delta += (w_norm_new-w_norm);
1595 
1596 		negsum_xTd = 0;
1597 		for(int i=0; i<l; i++)
1598 			if(y[i] == -1)
1599 				negsum_xTd += C[GETI(i)]*xTd[i];
1600 
1601 		int num_linesearch;
1602 		for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1603 		{
1604 			cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
1605 
1606 			for(int i=0; i<l; i++)
1607 			{
1608 				double exp_xTd = exp(xTd[i]);
1609 				exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
1610 				cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
1611 			}
1612 
1613 			if(cond <= 0)
1614 			{
1615 				w_norm = w_norm_new;
1616 				for(j=0; j<w_size; j++)
1617 					w[j] = wpd[j];
1618 				for(int i=0; i<l; i++)
1619 				{
1620 					exp_wTx[i] = exp_wTx_new[i];
1621 					double tau_tmp = 1/(1+exp_wTx[i]);
1622 					tau[i] = C[GETI(i)]*tau_tmp;
1623 					D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
1624 				}
1625 				break;
1626 			}
1627 			else
1628 			{
1629 				w_norm_new = 0;
1630 				for(j=0; j<w_size; j++)
1631 				{
1632 					wpd[j] = (w[j]+wpd[j])*0.5;
1633 					if(wpd[j] != 0)
1634 						w_norm_new += fabs(wpd[j]);
1635 				}
1636 				delta *= 0.5;
1637 				negsum_xTd *= 0.5;
1638 				for(int i=0; i<l; i++)
1639 					xTd[i] *= 0.5;
1640 			}
1641 		}
1642 
1643 		// Recompute some info due to too many line search steps
1644 		if(num_linesearch >= max_num_linesearch)
1645 		{
1646 			for(int i=0; i<l; i++)
1647 				exp_wTx[i] = 0;
1648 
1649 			for(int i=0; i<w_size; i++)
1650 			{
1651 				if(w[i]==0) continue;
1652 				x = prob_col->x[i];
1653 				while(x->index != -1)
1654 				{
1655 					exp_wTx[x->index-1] += w[i]*x->value;
1656 					x++;
1657 				}
1658 			}
1659 
1660 			for(int i=0; i<l; i++)
1661 				exp_wTx[i] = exp(exp_wTx[i]);
1662 		}
1663 
1664 		if(iter == 1)
1665 			inner_eps *= 0.25;
1666 
1667 		newton_iter++;
1668 		Gmax_old = Gmax_new;
1669 
1670 		info("iter %3d  #CD cycles %d\n", newton_iter, iter);
1671 	}
1672 
1673 	info("=========================\n");
1674 	info("optimization finished, #iter = %d\n", newton_iter);
1675 	if(newton_iter >= max_newton_iter)
1676 		info("WARNING: reaching max number of iterations\n");
1677 
1678 	// calculate objective value
1679 
1680 	double v = 0;
1681 	int nnz = 0;
1682 	for(j=0; j<w_size; j++)
1683 		if(w[j] != 0)
1684 		{
1685 			v += fabs(w[j]);
1686 			nnz++;
1687 		}
1688 	for(j=0; j<l; j++)
1689 		if(y[j] == 1)
1690 			v += C[GETI(j)]*log(1+1/exp_wTx[j]);
1691 		else
1692 			v += C[GETI(j)]*log(1+exp_wTx[j]);
1693 
1694 	info("Objective value = %lf\n", v);
1695 	info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1696 
1697 	delete [] index;
1698 	delete [] y;
1699 	delete [] Hdiag;
1700 	delete [] Grad;
1701 	delete [] wpd;
1702 	delete [] xjneg_sum;
1703 	delete [] xTd;
1704 	delete [] exp_wTx;
1705 	delete [] exp_wTx_new;
1706 	delete [] tau;
1707 	delete [] D;
1708 }
1709 
1710 // transpose matrix X from row format to column format
transpose(const problem * prob,feature_node ** x_space_ret,problem * prob_col)1711 static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
1712 {
1713 	int i;
1714 	int l = prob->l;
1715 	int n = prob->n;
1716 	int nnz = 0;
1717 	int *col_ptr = new int[n+1];
1718 	feature_node *x_space;
1719 	prob_col->l = l;
1720 	prob_col->n = n;
1721 	prob_col->y = new int[l];
1722 	prob_col->x = new feature_node*[n];
1723 
1724 	for(i=0; i<l; i++)
1725 		prob_col->y[i] = prob->y[i];
1726 
1727 	for(i=0; i<n+1; i++)
1728 		col_ptr[i] = 0;
1729 	for(i=0; i<l; i++)
1730 	{
1731 		feature_node *x = prob->x[i];
1732 		while(x->index != -1)
1733 		{
1734 			nnz++;
1735 			col_ptr[x->index]++;
1736 			x++;
1737 		}
1738 	}
1739 	for(i=1; i<n+1; i++)
1740 		col_ptr[i] += col_ptr[i-1] + 1;
1741 
1742 	x_space = new feature_node[nnz+n];
1743 	for(i=0; i<n; i++)
1744 		prob_col->x[i] = &x_space[col_ptr[i]];
1745 
1746 	for(i=0; i<l; i++)
1747 	{
1748 		feature_node *x = prob->x[i];
1749 		while(x->index != -1)
1750 		{
1751 			int ind = x->index-1;
1752 			x_space[col_ptr[ind]].index = i+1; // starts from 1
1753 			x_space[col_ptr[ind]].value = x->value;
1754 			col_ptr[ind]++;
1755 			x++;
1756 		}
1757 	}
1758 	for(i=0; i<n; i++)
1759 		x_space[col_ptr[i]].index = -1;
1760 
1761 	*x_space_ret = x_space;
1762 
1763 	delete [] col_ptr;
1764 }
1765 
1766 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
1767 // perm, length l, must be allocated before calling this subroutine
group_classes(const problem * prob,int * nr_class_ret,int ** label_ret,int ** start_ret,int ** count_ret,int * perm)1768 static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
1769 {
1770 	int l = prob->l;
1771 	int max_nr_class = 16;
1772 	int nr_class = 0;
1773 	int *label = Malloc(int,max_nr_class);
1774 	int *count = Malloc(int,max_nr_class);
1775 	int *data_label = Malloc(int,l);
1776 	int i;
1777 
1778 	for(i=0;i<l;i++)
1779 	{
1780 		int this_label = prob->y[i];
1781 		int j;
1782 		for(j=0;j<nr_class;j++)
1783 		{
1784 			if(this_label == label[j])
1785 			{
1786 				++count[j];
1787 				break;
1788 			}
1789 		}
1790 		data_label[i] = j;
1791 		if(j == nr_class)
1792 		{
1793 			if(nr_class == max_nr_class)
1794 			{
1795 				max_nr_class *= 2;
1796 				label = (int *)realloc(label,max_nr_class*sizeof(int));
1797 				count = (int *)realloc(count,max_nr_class*sizeof(int));
1798 			}
1799 			label[nr_class] = this_label;
1800 			count[nr_class] = 1;
1801 			++nr_class;
1802 		}
1803 	}
1804 
1805 	int *start = Malloc(int,nr_class);
1806 	start[0] = 0;
1807 	for(i=1;i<nr_class;i++)
1808 		start[i] = start[i-1]+count[i-1];
1809 	for(i=0;i<l;i++)
1810 	{
1811 		perm[start[data_label[i]]] = i;
1812 		++start[data_label[i]];
1813 	}
1814 	start[0] = 0;
1815 	for(i=1;i<nr_class;i++)
1816 		start[i] = start[i-1]+count[i-1];
1817 
1818 	*nr_class_ret = nr_class;
1819 	*label_ret = label;
1820 	*start_ret = start;
1821 	*count_ret = count;
1822 	free(data_label);
1823 }
1824 
train_one(const problem * prob,const parameter * param,double * w,double Cp,double Cn)1825 static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
1826 {
1827 	double eps=param->eps;
1828 	int pos = 0;
1829 	int neg = 0;
1830 	for(int i=0;i<prob->l;i++)
1831 		if(prob->y[i]==+1)
1832 			pos++;
1833 	neg = prob->l - pos;
1834 
1835 	function *fun_obj=NULL;
1836 	switch(param->solver_type)
1837 	{
1838 		case L2R_LR:
1839 		{
1840 			fun_obj=new l2r_lr_fun(prob, Cp, Cn);
1841 			TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
1842 			tron_obj.set_print_string(liblinear_print_string);
1843 			tron_obj.tron(w);
1844 			delete fun_obj;
1845 			break;
1846 		}
1847 		case L2R_L2LOSS_SVC:
1848 		{
1849 			fun_obj=new l2r_l2_svc_fun(prob, Cp, Cn);
1850 			TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
1851 			tron_obj.set_print_string(liblinear_print_string);
1852 			tron_obj.tron(w);
1853 			delete fun_obj;
1854 			break;
1855 		}
1856 		case L2R_L2LOSS_SVC_DUAL:
1857 			solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
1858 			break;
1859 		case L2R_L1LOSS_SVC_DUAL:
1860 			solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
1861 			break;
1862 		case L1R_L2LOSS_SVC:
1863 		{
1864 			problem prob_col;
1865 			feature_node *x_space = NULL;
1866 			transpose(prob, &x_space ,&prob_col);
1867 			solve_l1r_l2_svc(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
1868 			delete [] prob_col.y;
1869 			delete [] prob_col.x;
1870 			delete [] x_space;
1871 			break;
1872 		}
1873 		case L1R_LR:
1874 		{
1875 			problem prob_col;
1876 			feature_node *x_space = NULL;
1877 			transpose(prob, &x_space ,&prob_col);
1878 			solve_l1r_lr(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
1879 			delete [] prob_col.y;
1880 			delete [] prob_col.x;
1881 			delete [] x_space;
1882 			break;
1883 		}
1884 		case L2R_LR_DUAL:
1885 			solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
1886 			break;
1887 		default:
1888 			fprintf(stderr, "Error: unknown solver_type\n");
1889 			break;
1890 	}
1891 }
1892 
1893 //
1894 // Interface functions
1895 //
train(const problem * prob,const parameter * param)1896 model* train(const problem *prob, const parameter *param)
1897 {
1898 	int i,j;
1899 	int l = prob->l;
1900 	int n = prob->n;
1901 	int w_size = prob->n;
1902 	model *model_ = Malloc(model,1);
1903 
1904 	if(prob->bias>=0)
1905 		model_->nr_feature=n-1;
1906 	else
1907 		model_->nr_feature=n;
1908 	model_->param = *param;
1909 	model_->bias = prob->bias;
1910 
1911 	int nr_class;
1912 	int *label = NULL;
1913 	int *start = NULL;
1914 	int *count = NULL;
1915 	int *perm = Malloc(int,l);
1916 
1917 	// group training data of the same class
1918 	group_classes(prob,&nr_class,&label,&start,&count,perm);
1919 
1920 	model_->nr_class=nr_class;
1921 	model_->label = Malloc(int,nr_class);
1922 	for(i=0;i<nr_class;i++)
1923 		model_->label[i] = label[i];
1924 
1925 	// calculate weighted C
1926 	double *weighted_C = Malloc(double, nr_class);
1927 	for(i=0;i<nr_class;i++)
1928 		weighted_C[i] = param->C;
1929 	for(i=0;i<param->nr_weight;i++)
1930 	{
1931 		for(j=0;j<nr_class;j++)
1932 			if(param->weight_label[i] == label[j])
1933 				break;
1934 		if(j == nr_class)
1935 			fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
1936 		else
1937 			weighted_C[j] *= param->weight[i];
1938 	}
1939 
1940 	// constructing the subproblem
1941 	feature_node **x = Malloc(feature_node *,l);
1942 	for(i=0;i<l;i++)
1943 		x[i] = prob->x[perm[i]];
1944 
1945 	int k;
1946 	problem sub_prob;
1947 	sub_prob.l = l;
1948 	sub_prob.n = n;
1949 	sub_prob.x = Malloc(feature_node *,sub_prob.l);
1950 	sub_prob.y = Malloc(int,sub_prob.l);
1951 
1952 	for(k=0; k<sub_prob.l; k++)
1953 		sub_prob.x[k] = x[k];
1954 
1955 	// multi-class svm by Crammer and Singer
1956 	if(param->solver_type == MCSVM_CS)
1957 	{
1958 		model_->w=Malloc(double, n*nr_class);
1959 		for(i=0;i<nr_class;i++)
1960 			for(j=start[i];j<start[i]+count[i];j++)
1961 				sub_prob.y[j] = i;
1962 		Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
1963 		Solver.Solve(model_->w);
1964 	}
1965 	else
1966 	{
1967 		if(nr_class == 2)
1968 		{
1969 			model_->w=Malloc(double, w_size);
1970 
1971 			int e0 = start[0]+count[0];
1972 			k=0;
1973 			for(; k<e0; k++)
1974 				sub_prob.y[k] = +1;
1975 			for(; k<sub_prob.l; k++)
1976 				sub_prob.y[k] = -1;
1977 
1978 			train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]);
1979 		}
1980 		else
1981 		{
1982 			model_->w=Malloc(double, w_size*nr_class);
1983 			double *w=Malloc(double, w_size);
1984 			for(i=0;i<nr_class;i++)
1985 			{
1986 				int si = start[i];
1987 				int ei = si+count[i];
1988 
1989 				k=0;
1990 				for(; k<si; k++)
1991 					sub_prob.y[k] = -1;
1992 				for(; k<ei; k++)
1993 					sub_prob.y[k] = +1;
1994 				for(; k<sub_prob.l; k++)
1995 					sub_prob.y[k] = -1;
1996 
1997 				train_one(&sub_prob, param, w, weighted_C[i], param->C);
1998 
1999 				for(int j=0;j<w_size;j++)
2000 					model_->w[j*nr_class+i] = w[j];
2001 			}
2002 			free(w);
2003 		}
2004 
2005 	}
2006 
2007 	free(x);
2008 	free(label);
2009 	free(start);
2010 	free(count);
2011 	free(perm);
2012 	free(sub_prob.x);
2013 	free(sub_prob.y);
2014 	free(weighted_C);
2015 	return model_;
2016 }
2017 
cross_validation(const problem * prob,const parameter * param,int nr_fold,int * target)2018 void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target)
2019 {
2020 	int i;
2021 	int *fold_start = Malloc(int,nr_fold+1);
2022 	int l = prob->l;
2023 	int *perm = Malloc(int,l);
2024 
2025 	for(i=0;i<l;i++) perm[i]=i;
2026 	for(i=0;i<l;i++)
2027 	{
2028 		int j = i+rand()%(l-i);
2029 		swap(perm[i],perm[j]);
2030 	}
2031 	for(i=0;i<=nr_fold;i++)
2032 		fold_start[i]=i*l/nr_fold;
2033 
2034 	for(i=0;i<nr_fold;i++)
2035 	{
2036 		int begin = fold_start[i];
2037 		int end = fold_start[i+1];
2038 		int j,k;
2039 		struct problem subprob;
2040 
2041 		subprob.bias = prob->bias;
2042 		subprob.n = prob->n;
2043 		subprob.l = l-(end-begin);
2044 		subprob.x = Malloc(struct feature_node*,subprob.l);
2045 		subprob.y = Malloc(int,subprob.l);
2046 
2047 		k=0;
2048 		for(j=0;j<begin;j++)
2049 		{
2050 			subprob.x[k] = prob->x[perm[j]];
2051 			subprob.y[k] = prob->y[perm[j]];
2052 			++k;
2053 		}
2054 		for(j=end;j<l;j++)
2055 		{
2056 			subprob.x[k] = prob->x[perm[j]];
2057 			subprob.y[k] = prob->y[perm[j]];
2058 			++k;
2059 		}
2060 		struct model *submodel = train(&subprob,param);
2061 		for(j=begin;j<end;j++)
2062 			target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2063 		free_and_destroy_model(&submodel);
2064 		free(subprob.x);
2065 		free(subprob.y);
2066 	}
2067 	free(fold_start);
2068 	free(perm);
2069 }
2070 
predict_values(const struct model * model_,const struct feature_node * x,double * dec_values)2071 int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
2072 {
2073 	int idx;
2074 	int n;
2075 	if(model_->bias>=0)
2076 		n=model_->nr_feature+1;
2077 	else
2078 		n=model_->nr_feature;
2079 	double *w=model_->w;
2080 	int nr_class=model_->nr_class;
2081 	int i;
2082 	int nr_w;
2083 	if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
2084 		nr_w = 1;
2085 	else
2086 		nr_w = nr_class;
2087 
2088 	const feature_node *lx=x;
2089 	for(i=0;i<nr_w;i++)
2090 		dec_values[i] = 0;
2091 	for(; (idx=lx->index)!=-1; lx++)
2092 	{
2093 		// the dimension of testing data may exceed that of training
2094 		if(idx<=n)
2095 			for(i=0;i<nr_w;i++)
2096 				dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
2097 	}
2098 
2099 	if(nr_class==2)
2100 		return (dec_values[0]>0)?model_->label[0]:model_->label[1];
2101 	else
2102 	{
2103 		int dec_max_idx = 0;
2104 		for(i=1;i<nr_class;i++)
2105 		{
2106 			if(dec_values[i] > dec_values[dec_max_idx])
2107 				dec_max_idx = i;
2108 		}
2109 		return model_->label[dec_max_idx];
2110 	}
2111 }
2112 
predict(const model * model_,const feature_node * x)2113 int predict(const model *model_, const feature_node *x)
2114 {
2115 	double *dec_values = Malloc(double, model_->nr_class);
2116 	int label=predict_values(model_, x, dec_values);
2117 	free(dec_values);
2118 	return label;
2119 }
2120 
predict_probability(const struct model * model_,const struct feature_node * x,double * prob_estimates)2121 int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
2122 {
2123 	if(check_probability_model(model_))
2124 	{
2125 		int i;
2126 		int nr_class=model_->nr_class;
2127 		int nr_w;
2128 		if(nr_class==2)
2129 			nr_w = 1;
2130 		else
2131 			nr_w = nr_class;
2132 
2133 		int label=predict_values(model_, x, prob_estimates);
2134 		for(i=0;i<nr_w;i++)
2135 			prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
2136 
2137 		if(nr_class==2) // for binary classification
2138 			prob_estimates[1]=1.-prob_estimates[0];
2139 		else
2140 		{
2141 			double sum=0;
2142 			for(i=0; i<nr_class; i++)
2143 				sum+=prob_estimates[i];
2144 
2145 			for(i=0; i<nr_class; i++)
2146 				prob_estimates[i]=prob_estimates[i]/sum;
2147 		}
2148 
2149 		return label;
2150 	}
2151 	else
2152 		return 0;
2153 }
2154 
2155 static const char *solver_type_table[]=
2156 {
2157 	"L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
2158 	"L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL", NULL
2159 };
2160 
save_model(const char * model_file_name,const struct model * model_)2161 int save_model(const char *model_file_name, const struct model *model_)
2162 {
2163 	int i;
2164 	int nr_feature=model_->nr_feature;
2165 	int n;
2166 	const parameter& param = model_->param;
2167 
2168 	if(model_->bias>=0)
2169 		n=nr_feature+1;
2170 	else
2171 		n=nr_feature;
2172 	int w_size = n;
2173 	FILE *fp = fopen(model_file_name,"w");
2174 	if(fp==NULL) return -1;
2175 
2176 	int nr_w;
2177 	if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
2178 		nr_w=1;
2179 	else
2180 		nr_w=model_->nr_class;
2181 
2182 	fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
2183 	fprintf(fp, "nr_class %d\n", model_->nr_class);
2184 	fprintf(fp, "label");
2185 	for(i=0; i<model_->nr_class; i++)
2186 		fprintf(fp, " %d", model_->label[i]);
2187 	fprintf(fp, "\n");
2188 
2189 	fprintf(fp, "nr_feature %d\n", nr_feature);
2190 
2191 	fprintf(fp, "bias %.16g\n", model_->bias);
2192 
2193 	fprintf(fp, "w\n");
2194 	for(i=0; i<w_size; i++)
2195 	{
2196 		int j;
2197 		for(j=0; j<nr_w; j++)
2198 			fprintf(fp, "%.16g ", model_->w[i*nr_w+j]);
2199 		fprintf(fp, "\n");
2200 	}
2201 
2202 	if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2203 	else return 0;
2204 }
2205 
load_model(const char * model_file_name)2206 struct model *load_model(const char *model_file_name)
2207 {
2208 	FILE *fp = fopen(model_file_name,"r");
2209 	if(fp==NULL) return NULL;
2210 
2211 	int i;
2212 	int nr_feature;
2213 	int n;
2214 	int nr_class;
2215 	double bias;
2216 	model *model_ = Malloc(model,1);
2217 	parameter& param = model_->param;
2218 
2219 	model_->label = NULL;
2220 
2221 	char cmd[81];
2222 	while(1)
2223 	{
2224 		fscanf(fp,"%80s",cmd);
2225 		if(strcmp(cmd,"solver_type")==0)
2226 		{
2227 			fscanf(fp,"%80s",cmd);
2228 			int i;
2229 			for(i=0;solver_type_table[i];i++)
2230 			{
2231 				if(strcmp(solver_type_table[i],cmd)==0)
2232 				{
2233 					param.solver_type=i;
2234 					break;
2235 				}
2236 			}
2237 			if(solver_type_table[i] == NULL)
2238 			{
2239 				fprintf(stderr,"unknown solver type.\n");
2240 				free(model_->label);
2241 				free(model_);
2242 				return NULL;
2243 			}
2244 		}
2245 		else if(strcmp(cmd,"nr_class")==0)
2246 		{
2247 			fscanf(fp,"%d",&nr_class);
2248 			model_->nr_class=nr_class;
2249 		}
2250 		else if(strcmp(cmd,"nr_feature")==0)
2251 		{
2252 			fscanf(fp,"%d",&nr_feature);
2253 			model_->nr_feature=nr_feature;
2254 		}
2255 		else if(strcmp(cmd,"bias")==0)
2256 		{
2257 			fscanf(fp,"%lf",&bias);
2258 			model_->bias=bias;
2259 		}
2260 		else if(strcmp(cmd,"w")==0)
2261 		{
2262 			break;
2263 		}
2264 		else if(strcmp(cmd,"label")==0)
2265 		{
2266 			int nr_class = model_->nr_class;
2267 			model_->label = Malloc(int,nr_class);
2268 			for(int i=0;i<nr_class;i++)
2269 				fscanf(fp,"%d",&model_->label[i]);
2270 		}
2271 		else
2272 		{
2273 			fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2274 			free(model_);
2275 			return NULL;
2276 		}
2277 	}
2278 
2279 	nr_feature=model_->nr_feature;
2280 	if(model_->bias>=0)
2281 		n=nr_feature+1;
2282 	else
2283 		n=nr_feature;
2284 	int w_size = n;
2285 	int nr_w;
2286 	if(nr_class==2 && param.solver_type != MCSVM_CS)
2287 		nr_w = 1;
2288 	else
2289 		nr_w = nr_class;
2290 
2291 	model_->w=Malloc(double, w_size*nr_w);
2292 	for(i=0; i<w_size; i++)
2293 	{
2294 		int j;
2295 		for(j=0; j<nr_w; j++)
2296 			fscanf(fp, "%lf ", &model_->w[i*nr_w+j]);
2297 		fscanf(fp, "\n");
2298 	}
2299 	if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
2300 
2301 	return model_;
2302 }
2303 
get_nr_feature(const model * model_)2304 int get_nr_feature(const model *model_)
2305 {
2306 	return model_->nr_feature;
2307 }
2308 
get_nr_class(const model * model_)2309 int get_nr_class(const model *model_)
2310 {
2311 	return model_->nr_class;
2312 }
2313 
get_labels(const model * model_,int * label)2314 void get_labels(const model *model_, int* label)
2315 {
2316 	if (model_->label != NULL)
2317 		for(int i=0;i<model_->nr_class;i++)
2318 			label[i] = model_->label[i];
2319 }
2320 
free_model_content(struct model * model_ptr)2321 void free_model_content(struct model *model_ptr)
2322 {
2323 	if(model_ptr->w != NULL)
2324 		free(model_ptr->w);
2325 	if(model_ptr->label != NULL)
2326 		free(model_ptr->label);
2327 }
2328 
free_and_destroy_model(struct model ** model_ptr_ptr)2329 void free_and_destroy_model(struct model **model_ptr_ptr)
2330 {
2331 	struct model *model_ptr = *model_ptr_ptr;
2332 	if(model_ptr != NULL)
2333 	{
2334 		free_model_content(model_ptr);
2335 		free(model_ptr);
2336 	}
2337 }
2338 
destroy_param(parameter * param)2339 void destroy_param(parameter* param)
2340 {
2341 	if(param->weight_label != NULL)
2342 		free(param->weight_label);
2343 	if(param->weight != NULL)
2344 		free(param->weight);
2345 }
2346 
check_parameter(const problem * prob,const parameter * param)2347 const char *check_parameter(const problem *prob, const parameter *param)
2348 {
2349 	if(param->eps <= 0)
2350 		return "eps <= 0";
2351 
2352 	if(param->C <= 0)
2353 		return "C <= 0";
2354 
2355 	if(param->solver_type != L2R_LR
2356 		&& param->solver_type != L2R_L2LOSS_SVC_DUAL
2357 		&& param->solver_type != L2R_L2LOSS_SVC
2358 		&& param->solver_type != L2R_L1LOSS_SVC_DUAL
2359 		&& param->solver_type != MCSVM_CS
2360 		&& param->solver_type != L1R_L2LOSS_SVC
2361 		&& param->solver_type != L1R_LR
2362 		&& param->solver_type != L2R_LR_DUAL)
2363 		return "unknown solver type";
2364 
2365 	return NULL;
2366 }
2367 
check_probability_model(const struct model * model_)2368 int check_probability_model(const struct model *model_)
2369 {
2370 	return (model_->param.solver_type==L2R_LR ||
2371 			model_->param.solver_type==L2R_LR_DUAL ||
2372 			model_->param.solver_type==L1R_LR);
2373 }
2374 
set_print_string_function(void (* print_func)(const char *))2375 void set_print_string_function(void (*print_func)(const char*))
2376 {
2377 	if (print_func == NULL)
2378 		liblinear_print_string = &print_string_stdout;
2379 	else
2380 		liblinear_print_string = print_func;
2381 }
2382 
2383