1 /*
2   This code was extracted from liblinear-2.2.1 in Feb 2019 and
3   modified for the use with Octave and Matlab
4 
5 Copyright (c) 2007-2019 The LIBLINEAR Project.
6 All rights reserved.
7 
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions
10 are met:
11 
12 1. Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 
15 2. Redistributions in binary form must reproduce the above copyright
16 notice, this list of conditions and the following disclaimer in the
17 documentation and/or other materials provided with the distribution.
18 
19 3. Neither name of copyright holders nor the names of its contributors
20 may be used to endorse or promote products derived from this software
21 without specific prior written permission.
22 
23 
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
28 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 
36 */
37 
38 #include <math.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <stdarg.h>
43 #include <locale.h>
44 #include "linear.h"
45 #include "tron.h"
46 int liblinear_version = LIBLINEAR_VERSION;
47 typedef signed char schar;
swap(T & x,T & y)48 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
49 #ifndef min
min(T x,T y)50 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
51 #endif
52 #ifndef max
max(T x,T y)53 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
54 #endif
clone(T * & dst,S * src,int n)55 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
56 {
57 	dst = new T[n];
58 	memcpy((void *)dst,(void *)src,sizeof(T)*n);
59 }
60 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
61 #define INF HUGE_VAL
62 
print_string_stdout(const char * s)63 static void print_string_stdout(const char *s)
64 {
65 	fputs(s,stdout);
66 	fflush(stdout);
67 }
print_null(const char *)68 static void print_null(const char*) {}
69 
70 static void (*liblinear_print_string) (const char *) = &print_string_stdout;
71 
72 #if 1
info(const char * fmt,...)73 static void info(const char *fmt,...)
74 {
75 	char buf[BUFSIZ];
76 	va_list ap;
77 	va_start(ap,fmt);
78 	vsprintf(buf,fmt,ap);
79 	va_end(ap);
80 	(*liblinear_print_string)(buf);
81 }
82 #else
info(const char * fmt,...)83 static void info(const char *fmt,...) {}
84 #endif
85 class sparse_operator
86 {
87 public:
nrm2_sq(const feature_node * x)88 	static double nrm2_sq(const feature_node *x)
89 	{
90 		double ret = 0;
91 		while(x->index != -1)
92 		{
93 			ret += x->value*x->value;
94 			x++;
95 		}
96 		return (ret);
97 	}
98 
dot(const double * s,const feature_node * x)99 	static double dot(const double *s, const feature_node *x)
100 	{
101 		double ret = 0;
102 		while(x->index != -1)
103 		{
104 			ret += s[x->index-1]*x->value;
105 			x++;
106 		}
107 		return (ret);
108 	}
109 
axpy(const double a,const feature_node * x,double * y)110 	static void axpy(const double a, const feature_node *x, double *y)
111 	{
112 		while(x->index != -1)
113 		{
114 			y[x->index-1] += a*x->value;
115 			x++;
116 		}
117 	}
118 };
119 
120 class l2r_lr_fun: public function
121 {
122 public:
123 	l2r_lr_fun(const problem *prob, double *C);
124 	~l2r_lr_fun();
125 
126 	double fun(double *w);
127 	void grad(double *w, double *g);
128 	void Hv(double *s, double *Hs);
129 
130 	int get_nr_variable(void);
131 	void get_diag_preconditioner(double *M);
132 
133 private:
134 	void Xv(double *v, double *Xv);
135 	void XTv(double *v, double *XTv);
136 
137 	double *C;
138 	double *z;
139 	double *D;
140 	const problem *prob;
141 };
142 
l2r_lr_fun(const problem * prob,double * C)143 l2r_lr_fun::l2r_lr_fun(const problem *prob, double *C)
144 {
145 	int l=prob->l;
146 
147 	this->prob = prob;
148 
149 	z = new double[l];
150 	D = new double[l];
151 	this->C = C;
152 }
153 
~l2r_lr_fun()154 l2r_lr_fun::~l2r_lr_fun()
155 {
156 	delete[] z;
157 	delete[] D;
158 }
159 
160 
fun(double * w)161 double l2r_lr_fun::fun(double *w)
162 {
163 	int i;
164 	double f=0;
165 	double *y=prob->y;
166 	int l=prob->l;
167 	int w_size=get_nr_variable();
168 
169 	Xv(w, z);
170 
171 	for(i=0;i<w_size;i++)
172 		f += w[i]*w[i];
173 	f /= 2.0;
174 	for(i=0;i<l;i++)
175 	{
176 		double yz = y[i]*z[i];
177 		if (yz >= 0)
178 			f += C[i]*log(1 + exp(-yz));
179 		else
180 			f += C[i]*(-yz+log(1 + exp(yz)));
181 	}
182 
183 	return(f);
184 }
185 
grad(double * w,double * g)186 void l2r_lr_fun::grad(double *w, double *g)
187 {
188 	int i;
189 	double *y=prob->y;
190 	int l=prob->l;
191 	int w_size=get_nr_variable();
192 
193 	for(i=0;i<l;i++)
194 	{
195 		z[i] = 1/(1 + exp(-y[i]*z[i]));
196 		D[i] = z[i]*(1-z[i]);
197 		z[i] = C[i]*(z[i]-1)*y[i];
198 	}
199 	XTv(z, g);
200 
201 	for(i=0;i<w_size;i++)
202 		g[i] = w[i] + g[i];
203 }
204 
get_nr_variable(void)205 int l2r_lr_fun::get_nr_variable(void)
206 {
207 	return prob->n;
208 }
209 
get_diag_preconditioner(double * M)210 void l2r_lr_fun::get_diag_preconditioner(double *M)
211 {
212 	int i;
213 	int l = prob->l;
214 	int w_size=get_nr_variable();
215 	feature_node **x = prob->x;
216 
217 	for (i=0; i<w_size; i++)
218 		M[i] = 1;
219 
220 	for (i=0; i<l; i++)
221 	{
222 		feature_node *s = x[i];
223 		while (s->index!=-1)
224 		{
225 			M[s->index-1] += s->value*s->value*C[i]*D[i];
226 			s++;
227 		}
228 	}
229 }
230 
Hv(double * s,double * Hs)231 void l2r_lr_fun::Hv(double *s, double *Hs)
232 {
233 	int i;
234 	int l=prob->l;
235 	int w_size=get_nr_variable();
236 	feature_node **x=prob->x;
237 
238 	for(i=0;i<w_size;i++)
239 		Hs[i] = 0;
240 	for(i=0;i<l;i++)
241 	{
242 		feature_node * const xi=x[i];
243 		double xTs = sparse_operator::dot(s, xi);
244 
245 		xTs = C[i]*D[i]*xTs;
246 
247 		sparse_operator::axpy(xTs, xi, Hs);
248 	}
249 	for(i=0;i<w_size;i++)
250 		Hs[i] = s[i] + Hs[i];
251 }
252 
Xv(double * v,double * Xv)253 void l2r_lr_fun::Xv(double *v, double *Xv)
254 {
255 	int i;
256 	int l=prob->l;
257 	feature_node **x=prob->x;
258 
259 	for(i=0;i<l;i++)
260 		Xv[i]=sparse_operator::dot(v, x[i]);
261 }
262 
XTv(double * v,double * XTv)263 void l2r_lr_fun::XTv(double *v, double *XTv)
264 {
265 	int i;
266 	int l=prob->l;
267 	int w_size=get_nr_variable();
268 	feature_node **x=prob->x;
269 
270 	for(i=0;i<w_size;i++)
271 		XTv[i]=0;
272 	for(i=0;i<l;i++)
273 		sparse_operator::axpy(v[i], x[i], XTv);
274 }
275 
276 class l2r_l2_svc_fun: public function
277 {
278 public:
279 	l2r_l2_svc_fun(const problem *prob, double *C);
280 	~l2r_l2_svc_fun();
281 
282 	double fun(double *w);
283 	void grad(double *w, double *g);
284 	void Hv(double *s, double *Hs);
285 
286 	int get_nr_variable(void);
287 	void get_diag_preconditioner(double *M);
288 
289 protected:
290 	void Xv(double *v, double *Xv);
291 	void subXTv(double *v, double *XTv);
292 
293 	double *C;
294 	double *z;
295 	int *I;
296 	int sizeI;
297 	const problem *prob;
298 };
299 
l2r_l2_svc_fun(const problem * prob,double * C)300 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double *C)
301 {
302 	int l=prob->l;
303 
304 	this->prob = prob;
305 
306 	z = new double[l];
307 	I = new int[l];
308 	this->C = C;
309 }
310 
~l2r_l2_svc_fun()311 l2r_l2_svc_fun::~l2r_l2_svc_fun()
312 {
313 	delete[] z;
314 	delete[] I;
315 }
316 
fun(double * w)317 double l2r_l2_svc_fun::fun(double *w)
318 {
319 	int i;
320 	double f=0;
321 	double *y=prob->y;
322 	int l=prob->l;
323 	int w_size=get_nr_variable();
324 
325 	Xv(w, z);
326 
327 	for(i=0;i<w_size;i++)
328 		f += w[i]*w[i];
329 	f /= 2.0;
330 	for(i=0;i<l;i++)
331 	{
332 		z[i] = y[i]*z[i];
333 		double d = 1-z[i];
334 		if (d > 0)
335 			f += C[i]*d*d;
336 	}
337 
338 	return(f);
339 }
340 
grad(double * w,double * g)341 void l2r_l2_svc_fun::grad(double *w, double *g)
342 {
343 	int i;
344 	double *y=prob->y;
345 	int l=prob->l;
346 	int w_size=get_nr_variable();
347 
348 	sizeI = 0;
349 	for (i=0;i<l;i++)
350 		if (z[i] < 1)
351 		{
352 			z[sizeI] = C[i]*y[i]*(z[i]-1);
353 			I[sizeI] = i;
354 			sizeI++;
355 		}
356 	subXTv(z, g);
357 
358 	for(i=0;i<w_size;i++)
359 		g[i] = w[i] + 2*g[i];
360 }
361 
get_nr_variable(void)362 int l2r_l2_svc_fun::get_nr_variable(void)
363 {
364 	return prob->n;
365 }
366 
get_diag_preconditioner(double * M)367 void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
368 {
369 	int i;
370 	int w_size=get_nr_variable();
371 	feature_node **x = prob->x;
372 
373 	for (i=0; i<w_size; i++)
374 		M[i] = 1;
375 
376 	for (i=0; i<sizeI; i++)
377 	{
378 		int idx = I[i];
379 		feature_node *s = x[idx];
380 		while (s->index!=-1)
381 		{
382 			M[s->index-1] += s->value*s->value*C[idx]*2;
383 			s++;
384 		}
385 	}
386 }
387 
Hv(double * s,double * Hs)388 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
389 {
390 	int i;
391 	int w_size=get_nr_variable();
392 	feature_node **x=prob->x;
393 
394 	for(i=0;i<w_size;i++)
395 		Hs[i]=0;
396 	for(i=0;i<sizeI;i++)
397 	{
398 		feature_node * const xi=x[I[i]];
399 		double xTs = sparse_operator::dot(s, xi);
400 
401 		xTs = C[I[i]]*xTs;
402 
403 		sparse_operator::axpy(xTs, xi, Hs);
404 	}
405 	for(i=0;i<w_size;i++)
406 		Hs[i] = s[i] + 2*Hs[i];
407 }
408 
Xv(double * v,double * Xv)409 void l2r_l2_svc_fun::Xv(double *v, double *Xv)
410 {
411 	int i;
412 	int l=prob->l;
413 	feature_node **x=prob->x;
414 
415 	for(i=0;i<l;i++)
416 		Xv[i]=sparse_operator::dot(v, x[i]);
417 }
418 
subXTv(double * v,double * XTv)419 void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
420 {
421 	int i;
422 	int w_size=get_nr_variable();
423 	feature_node **x=prob->x;
424 
425 	for(i=0;i<w_size;i++)
426 		XTv[i]=0;
427 	for(i=0;i<sizeI;i++)
428 		sparse_operator::axpy(v[i], x[I[i]], XTv);
429 }
430 
431 class l2r_l2_svr_fun: public l2r_l2_svc_fun
432 {
433 public:
434 	l2r_l2_svr_fun(const problem *prob, double *C, double p);
435 
436 	double fun(double *w);
437 	void grad(double *w, double *g);
438 
439 private:
440 	double p;
441 };
442 
l2r_l2_svr_fun(const problem * prob,double * C,double p)443 l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, double *C, double p):
444 	l2r_l2_svc_fun(prob, C)
445 {
446 	this->p = p;
447 }
448 
fun(double * w)449 double l2r_l2_svr_fun::fun(double *w)
450 {
451 	int i;
452 	double f=0;
453 	double *y=prob->y;
454 	int l=prob->l;
455 	int w_size=get_nr_variable();
456 	double d;
457 
458 	Xv(w, z);
459 
460 	for(i=0;i<w_size;i++)
461 		f += w[i]*w[i];
462 	f /= 2;
463 	for(i=0;i<l;i++)
464 	{
465 		d = z[i] - y[i];
466 		if(d < -p)
467 			f += C[i]*(d+p)*(d+p);
468 		else if(d > p)
469 			f += C[i]*(d-p)*(d-p);
470 	}
471 
472 	return(f);
473 }
474 
grad(double * w,double * g)475 void l2r_l2_svr_fun::grad(double *w, double *g)
476 {
477 	int i;
478 	double *y=prob->y;
479 	int l=prob->l;
480 	int w_size=get_nr_variable();
481 	double d;
482 
483 	sizeI = 0;
484 	for(i=0;i<l;i++)
485 	{
486 		d = z[i] - y[i];
487 
488 		// generate index set I
489 		if(d < -p)
490 		{
491 			z[sizeI] = C[i]*(d+p);
492 			I[sizeI] = i;
493 			sizeI++;
494 		}
495 		else if(d > p)
496 		{
497 			z[sizeI] = C[i]*(d-p);
498 			I[sizeI] = i;
499 			sizeI++;
500 		}
501 
502 	}
503 	subXTv(z, g);
504 
505 	for(i=0;i<w_size;i++)
506 		g[i] = w[i] + 2*g[i];
507 }
508 
509 // A coordinate descent algorithm for
510 // multi-class support vector machines by Crammer and Singer
511 //
512 //  min_{\alpha}  0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
513 //    s.t.     \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
514 //
515 //  where e^m_i = 0 if y_i  = m,
516 //        e^m_i = 1 if y_i != m,
517 //  C^m_i = C if m  = y_i,
518 //  C^m_i = 0 if m != y_i,
519 //  and w_m(\alpha) = \sum_i \alpha^m_i x_i
520 //
521 // Given:
522 // x, y, C
523 // eps is the stopping tolerance
524 //
525 // solution will be put in w
526 //
527 // See Appendix of LIBLINEAR paper, Fan et al. (2008)
528 
529 #define GETI(i) ((int) prob->y[i])
530 // To support weights for instances, use GETI(i) (i)
531 
532 class Solver_MCSVM_CS
533 {
534 	public:
535 		Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
536 		~Solver_MCSVM_CS();
537 		void Solve(double *w);
538 	private:
539 		void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
540 		bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
541 		double *B, *C, *G;
542 		int w_size, l;
543 		int nr_class;
544 		int max_iter;
545 		double eps;
546 		const problem *prob;
547 };
548 
Solver_MCSVM_CS(const problem * prob,int nr_class,double * weighted_C,double eps,int max_iter)549 Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
550 {
551 	this->w_size = prob->n;
552 	this->l = prob->l;
553 	this->nr_class = nr_class;
554 	this->eps = eps;
555 	this->max_iter = max_iter;
556 	this->prob = prob;
557 	this->B = new double[nr_class];
558 	this->G = new double[nr_class];
559 	this->C = weighted_C;
560 }
561 
~Solver_MCSVM_CS()562 Solver_MCSVM_CS::~Solver_MCSVM_CS()
563 {
564 	delete[] B;
565 	delete[] G;
566 }
567 
compare_double(const void * a,const void * b)568 int compare_double(const void *a, const void *b)
569 {
570 	if(*(double *)a > *(double *)b)
571 		return -1;
572 	if(*(double *)a < *(double *)b)
573 		return 1;
574 	return 0;
575 }
576 
solve_sub_problem(double A_i,int yi,double C_yi,int active_i,double * alpha_new)577 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
578 {
579 	int r;
580 	double *D;
581 
582 	clone(D, B, active_i);
583 	if(yi < active_i)
584 		D[yi] += A_i*C_yi;
585 	qsort(D, active_i, sizeof(double), compare_double);
586 
587 	double beta = D[0] - A_i*C_yi;
588 	for(r=1;r<active_i && beta<r*D[r];r++)
589 		beta += D[r];
590 	beta /= r;
591 
592 	for(r=0;r<active_i;r++)
593 	{
594 		if(r == yi)
595 			alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
596 		else
597 			alpha_new[r] = min((double)0, (beta - B[r])/A_i);
598 	}
599 	delete[] D;
600 }
601 
be_shrunk(int i,int m,int yi,double alpha_i,double minG)602 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
603 {
604 	double bound = 0;
605 	if(m == yi)
606 		bound = C[GETI(i)];
607 	if(alpha_i == bound && G[m] < minG)
608 		return true;
609 	return false;
610 }
611 
Solve(double * w)612 void Solver_MCSVM_CS::Solve(double *w)
613 {
614 	int i, m, s;
615 	int iter = 0;
616 	double *alpha =  new double[l*nr_class];
617 	double *alpha_new = new double[nr_class];
618 	int *index = new int[l];
619 	double *QD = new double[l];
620 	int *d_ind = new int[nr_class];
621 	double *d_val = new double[nr_class];
622 	int *alpha_index = new int[nr_class*l];
623 	int *y_index = new int[l];
624 	int active_size = l;
625 	int *active_size_i = new int[l];
626 	double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
627 	bool start_from_all = true;
628 
629 	// Initial alpha can be set here. Note that
630 	// sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l-1
631 	// alpha[i*nr_class+m] <= C[GETI(i)] if prob->y[i] == m
632 	// alpha[i*nr_class+m] <= 0 if prob->y[i] != m
633 	// If initial alpha isn't zero, uncomment the for loop below to initialize w
634 	for(i=0;i<l*nr_class;i++)
635 		alpha[i] = 0;
636 
637 	for(i=0;i<w_size*nr_class;i++)
638 		w[i] = 0;
639 	for(i=0;i<l;i++)
640 	{
641 		for(m=0;m<nr_class;m++)
642 			alpha_index[i*nr_class+m] = m;
643 		feature_node *xi = prob->x[i];
644 		QD[i] = 0;
645 		while(xi->index != -1)
646 		{
647 			double val = xi->value;
648 			QD[i] += val*val;
649 
650 			// Uncomment the for loop if initial alpha isn't zero
651 			// for(m=0; m<nr_class; m++)
652 			//	w[(xi->index-1)*nr_class+m] += alpha[i*nr_class+m]*val;
653 			xi++;
654 		}
655 		active_size_i[i] = nr_class;
656 		y_index[i] = (int)prob->y[i];
657 		index[i] = i;
658 	}
659 
660 	while(iter < max_iter)
661 	{
662 		double stopping = -INF;
663 		for(i=0;i<active_size;i++)
664 		{
665 			int j = i+rand()%(active_size-i);
666 			swap(index[i], index[j]);
667 		}
668 		for(s=0;s<active_size;s++)
669 		{
670 			i = index[s];
671 			double Ai = QD[i];
672 			double *alpha_i = &alpha[i*nr_class];
673 			int *alpha_index_i = &alpha_index[i*nr_class];
674 
675 			if(Ai > 0)
676 			{
677 				for(m=0;m<active_size_i[i];m++)
678 					G[m] = 1;
679 				if(y_index[i] < active_size_i[i])
680 					G[y_index[i]] = 0;
681 
682 				feature_node *xi = prob->x[i];
683 				while(xi->index!= -1)
684 				{
685 					double *w_i = &w[(xi->index-1)*nr_class];
686 					for(m=0;m<active_size_i[i];m++)
687 						G[m] += w_i[alpha_index_i[m]]*(xi->value);
688 					xi++;
689 				}
690 
691 				double minG = INF;
692 				double maxG = -INF;
693 				for(m=0;m<active_size_i[i];m++)
694 				{
695 					if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
696 						minG = G[m];
697 					if(G[m] > maxG)
698 						maxG = G[m];
699 				}
700 				if(y_index[i] < active_size_i[i])
701 					if(alpha_i[(int) prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
702 						minG = G[y_index[i]];
703 
704 				for(m=0;m<active_size_i[i];m++)
705 				{
706 					if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
707 					{
708 						active_size_i[i]--;
709 						while(active_size_i[i]>m)
710 						{
711 							if(!be_shrunk(i, active_size_i[i], y_index[i],
712 											alpha_i[alpha_index_i[active_size_i[i]]], minG))
713 							{
714 								swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
715 								swap(G[m], G[active_size_i[i]]);
716 								if(y_index[i] == active_size_i[i])
717 									y_index[i] = m;
718 								else if(y_index[i] == m)
719 									y_index[i] = active_size_i[i];
720 								break;
721 							}
722 							active_size_i[i]--;
723 						}
724 					}
725 				}
726 
727 				if(active_size_i[i] <= 1)
728 				{
729 					active_size--;
730 					swap(index[s], index[active_size]);
731 					s--;
732 					continue;
733 				}
734 
735 				if(maxG-minG <= 1e-12)
736 					continue;
737 				else
738 					stopping = max(maxG - minG, stopping);
739 
740 				for(m=0;m<active_size_i[i];m++)
741 					B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
742 
743 				solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
744 				int nz_d = 0;
745 				for(m=0;m<active_size_i[i];m++)
746 				{
747 					double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
748 					alpha_i[alpha_index_i[m]] = alpha_new[m];
749 					if(fabs(d) >= 1e-12)
750 					{
751 						d_ind[nz_d] = alpha_index_i[m];
752 						d_val[nz_d] = d;
753 						nz_d++;
754 					}
755 				}
756 
757 				xi = prob->x[i];
758 				while(xi->index != -1)
759 				{
760 					double *w_i = &w[(xi->index-1)*nr_class];
761 					for(m=0;m<nz_d;m++)
762 						w_i[d_ind[m]] += d_val[m]*xi->value;
763 					xi++;
764 				}
765 			}
766 		}
767 
768 		iter++;
769 		if(iter % 10 == 0)
770 		{
771 			info(".");
772 		}
773 
774 		if(stopping < eps_shrink)
775 		{
776 			if(stopping < eps && start_from_all == true)
777 				break;
778 			else
779 			{
780 				active_size = l;
781 				for(i=0;i<l;i++)
782 					active_size_i[i] = nr_class;
783 				info("*");
784 				eps_shrink = max(eps_shrink/2, eps);
785 				start_from_all = true;
786 			}
787 		}
788 		else
789 			start_from_all = false;
790 	}
791 
792 	info("\noptimization finished, #iter = %d\n",iter);
793 	if (iter >= max_iter)
794 		info("\nWARNING: reaching max number of iterations\n");
795 
796 	// calculate objective value
797 	double v = 0;
798 	int nSV = 0;
799 	for(i=0;i<w_size*nr_class;i++)
800 		v += w[i]*w[i];
801 	v = 0.5*v;
802 	for(i=0;i<l*nr_class;i++)
803 	{
804 		v += alpha[i];
805 		if(fabs(alpha[i]) > 0)
806 			nSV++;
807 	}
808 	for(i=0;i<l;i++)
809 		v -= alpha[i*nr_class+(int)prob->y[i]];
810 	info("Objective value = %lf\n",v);
811 	info("nSV = %d\n",nSV);
812 
813 	delete [] alpha;
814 	delete [] alpha_new;
815 	delete [] index;
816 	delete [] QD;
817 	delete [] d_ind;
818 	delete [] d_val;
819 	delete [] alpha_index;
820 	delete [] y_index;
821 	delete [] active_size_i;
822 }
823 
824 // A coordinate descent algorithm for
825 // L1-loss and L2-loss SVM dual problems
826 //
827 //  min_\alpha  0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
828 //    s.t.      0 <= \alpha_i <= upper_bound_i,
829 //
830 //  where Qij = yi yj xi^T xj and
831 //  D is a diagonal matrix
832 //
833 // In L1-SVM case:
834 // 		upper_bound_i = Cp if y_i = 1
835 // 		upper_bound_i = Cn if y_i = -1
836 // 		D_ii = 0
837 // In L2-SVM case:
838 // 		upper_bound_i = INF
839 // 		D_ii = 1/(2*Cp)	if y_i = 1
840 // 		D_ii = 1/(2*Cn)	if y_i = -1
841 //
842 // Given:
843 // x, y, Cp, Cn
844 // eps is the stopping tolerance
845 //
846 // solution will be put in w
847 //
848 // See Algorithm 3 of Hsieh et al., ICML 2008
849 
850 #undef GETI
851 #define GETI(i) (y[i]+1)
852 // To support weights for instances, use GETI(i) (i)
853 
solve_l2r_l1l2_svc(const problem * prob,double * w,double eps,double Cp,double Cn,int solver_type)854 static void solve_l2r_l1l2_svc(
855 	const problem *prob, double *w, double eps,
856 	double Cp, double Cn, int solver_type)
857 {
858 	int l = prob->l;
859 	int w_size = prob->n;
860 	int i, s, iter = 0;
861 	double C, d, G;
862 	double *QD = new double[l];
863 	int max_iter = 1000;
864 	int *index = new int[l];
865 	double *alpha = new double[l];
866 	schar *y = new schar[l];
867 	int active_size = l;
868 
869 	// PG: projected gradient, for shrinking and stopping
870 	double PG;
871 	double PGmax_old = INF;
872 	double PGmin_old = -INF;
873 	double PGmax_new, PGmin_new;
874 
875 	// default solver_type: L2R_L2LOSS_SVC_DUAL
876 	double diag[3] = {0.5/Cn, 0, 0.5/Cp};
877 	double upper_bound[3] = {INF, 0, INF};
878 	if(solver_type == L2R_L1LOSS_SVC_DUAL)
879 	{
880 		diag[0] = 0;
881 		diag[2] = 0;
882 		upper_bound[0] = Cn;
883 		upper_bound[2] = Cp;
884 	}
885 
886 	for(i=0; i<l; i++)
887 	{
888 		if(prob->y[i] > 0)
889 		{
890 			y[i] = +1;
891 		}
892 		else
893 		{
894 			y[i] = -1;
895 		}
896 	}
897 
898 	// Initial alpha can be set here. Note that
899 	// 0 <= alpha[i] <= upper_bound[GETI(i)]
900 	for(i=0; i<l; i++)
901 		alpha[i] = 0;
902 
903 	for(i=0; i<w_size; i++)
904 		w[i] = 0;
905 	for(i=0; i<l; i++)
906 	{
907 		QD[i] = diag[GETI(i)];
908 
909 		feature_node * const xi = prob->x[i];
910 		QD[i] += sparse_operator::nrm2_sq(xi);
911 		sparse_operator::axpy(y[i]*alpha[i], xi, w);
912 
913 		index[i] = i;
914 	}
915 
916 	while (iter < max_iter)
917 	{
918 		PGmax_new = -INF;
919 		PGmin_new = INF;
920 
921 		for (i=0; i<active_size; i++)
922 		{
923 			int j = i+rand()%(active_size-i);
924 			swap(index[i], index[j]);
925 		}
926 
927 		for (s=0; s<active_size; s++)
928 		{
929 			i = index[s];
930 			const schar yi = y[i];
931 			feature_node * const xi = prob->x[i];
932 
933 			G = yi*sparse_operator::dot(w, xi)-1;
934 
935 			C = upper_bound[GETI(i)];
936 			G += alpha[i]*diag[GETI(i)];
937 
938 			PG = 0;
939 			if (alpha[i] == 0)
940 			{
941 				if (G > PGmax_old)
942 				{
943 					active_size--;
944 					swap(index[s], index[active_size]);
945 					s--;
946 					continue;
947 				}
948 				else if (G < 0)
949 					PG = G;
950 			}
951 			else if (alpha[i] == C)
952 			{
953 				if (G < PGmin_old)
954 				{
955 					active_size--;
956 					swap(index[s], index[active_size]);
957 					s--;
958 					continue;
959 				}
960 				else if (G > 0)
961 					PG = G;
962 			}
963 			else
964 				PG = G;
965 
966 			PGmax_new = max(PGmax_new, PG);
967 			PGmin_new = min(PGmin_new, PG);
968 
969 			if(fabs(PG) > 1.0e-12)
970 			{
971 				double alpha_old = alpha[i];
972 				alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
973 				d = (alpha[i] - alpha_old)*yi;
974 				sparse_operator::axpy(d, xi, w);
975 			}
976 		}
977 
978 		iter++;
979 		if(iter % 10 == 0)
980 			info(".");
981 
982 		if(PGmax_new - PGmin_new <= eps)
983 		{
984 			if(active_size == l)
985 				break;
986 			else
987 			{
988 				active_size = l;
989 				info("*");
990 				PGmax_old = INF;
991 				PGmin_old = -INF;
992 				continue;
993 			}
994 		}
995 		PGmax_old = PGmax_new;
996 		PGmin_old = PGmin_new;
997 		if (PGmax_old <= 0)
998 			PGmax_old = INF;
999 		if (PGmin_old >= 0)
1000 			PGmin_old = -INF;
1001 	}
1002 
1003 	info("\noptimization finished, #iter = %d\n",iter);
1004 	if (iter >= max_iter)
1005 		info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
1006 
1007 	// calculate objective value
1008 
1009 	double v = 0;
1010 	int nSV = 0;
1011 	for(i=0; i<w_size; i++)
1012 		v += w[i]*w[i];
1013 	for(i=0; i<l; i++)
1014 	{
1015 		v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
1016 		if(alpha[i] > 0)
1017 			++nSV;
1018 	}
1019 	info("Objective value = %lf\n",v/2);
1020 	info("nSV = %d\n",nSV);
1021 
1022 	delete [] QD;
1023 	delete [] alpha;
1024 	delete [] y;
1025 	delete [] index;
1026 }
1027 
1028 
1029 // A coordinate descent algorithm for
1030 // L1-loss and L2-loss epsilon-SVR dual problem
1031 //
1032 //  min_\beta  0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
1033 //    s.t.      -upper_bound_i <= \beta_i <= upper_bound_i,
1034 //
1035 //  where Qij = xi^T xj and
1036 //  D is a diagonal matrix
1037 //
1038 // In L1-SVM case:
1039 // 		upper_bound_i = C
1040 // 		lambda_i = 0
1041 // In L2-SVM case:
1042 // 		upper_bound_i = INF
1043 // 		lambda_i = 1/(2*C)
1044 //
1045 // Given:
1046 // x, y, p, C
1047 // eps is the stopping tolerance
1048 //
1049 // solution will be put in w
1050 //
1051 // See Algorithm 4 of Ho and Lin, 2012
1052 
1053 #undef GETI
1054 #define GETI(i) (0)
1055 // To support weights for instances, use GETI(i) (i)
1056 
solve_l2r_l1l2_svr(const problem * prob,double * w,const parameter * param,int solver_type)1057 static void solve_l2r_l1l2_svr(
1058 	const problem *prob, double *w, const parameter *param,
1059 	int solver_type)
1060 {
1061 	int l = prob->l;
1062 	double C = param->C;
1063 	double p = param->p;
1064 	int w_size = prob->n;
1065 	double eps = param->eps;
1066 	int i, s, iter = 0;
1067 	int max_iter = 1000;
1068 	int active_size = l;
1069 	int *index = new int[l];
1070 
1071 	double d, G, H;
1072 	double Gmax_old = INF;
1073 	double Gmax_new, Gnorm1_new;
1074 	double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1075 	double *beta = new double[l];
1076 	double *QD = new double[l];
1077 	double *y = prob->y;
1078 
1079 	// L2R_L2LOSS_SVR_DUAL
1080 	double lambda[1], upper_bound[1];
1081 	lambda[0] = 0.5/C;
1082 	upper_bound[0] = INF;
1083 
1084 	if(solver_type == L2R_L1LOSS_SVR_DUAL)
1085 	{
1086 		lambda[0] = 0;
1087 		upper_bound[0] = C;
1088 	}
1089 
1090 	// Initial beta can be set here. Note that
1091 	// -upper_bound <= beta[i] <= upper_bound
1092 	for(i=0; i<l; i++)
1093 		beta[i] = 0;
1094 
1095 	for(i=0; i<w_size; i++)
1096 		w[i] = 0;
1097 	for(i=0; i<l; i++)
1098 	{
1099 		feature_node * const xi = prob->x[i];
1100 		QD[i] = sparse_operator::nrm2_sq(xi);
1101 		sparse_operator::axpy(beta[i], xi, w);
1102 
1103 		index[i] = i;
1104 	}
1105 
1106 
1107 	while(iter < max_iter)
1108 	{
1109 		Gmax_new = 0;
1110 		Gnorm1_new = 0;
1111 
1112 		for(i=0; i<active_size; i++)
1113 		{
1114 			int j = i+rand()%(active_size-i);
1115 			swap(index[i], index[j]);
1116 		}
1117 
1118 		for(s=0; s<active_size; s++)
1119 		{
1120 			i = index[s];
1121 			G = -y[i] + lambda[GETI(i)]*beta[i];
1122 			H = QD[i] + lambda[GETI(i)];
1123 
1124 			feature_node * const xi = prob->x[i];
1125 			G += sparse_operator::dot(w, xi);
1126 
1127 			double Gp = G+p;
1128 			double Gn = G-p;
1129 			double violation = 0;
1130 			if(beta[i] == 0)
1131 			{
1132 				if(Gp < 0)
1133 					violation = -Gp;
1134 				else if(Gn > 0)
1135 					violation = Gn;
1136 				else if(Gp>Gmax_old && Gn<-Gmax_old)
1137 				{
1138 					active_size--;
1139 					swap(index[s], index[active_size]);
1140 					s--;
1141 					continue;
1142 				}
1143 			}
1144 			else if(beta[i] >= upper_bound[GETI(i)])
1145 			{
1146 				if(Gp > 0)
1147 					violation = Gp;
1148 				else if(Gp < -Gmax_old)
1149 				{
1150 					active_size--;
1151 					swap(index[s], index[active_size]);
1152 					s--;
1153 					continue;
1154 				}
1155 			}
1156 			else if(beta[i] <= -upper_bound[GETI(i)])
1157 			{
1158 				if(Gn < 0)
1159 					violation = -Gn;
1160 				else if(Gn > Gmax_old)
1161 				{
1162 					active_size--;
1163 					swap(index[s], index[active_size]);
1164 					s--;
1165 					continue;
1166 				}
1167 			}
1168 			else if(beta[i] > 0)
1169 				violation = fabs(Gp);
1170 			else
1171 				violation = fabs(Gn);
1172 
1173 			Gmax_new = max(Gmax_new, violation);
1174 			Gnorm1_new += violation;
1175 
1176 			// obtain Newton direction d
1177 			if(Gp < H*beta[i])
1178 				d = -Gp/H;
1179 			else if(Gn > H*beta[i])
1180 				d = -Gn/H;
1181 			else
1182 				d = -beta[i];
1183 
1184 			if(fabs(d) < 1.0e-12)
1185 				continue;
1186 
1187 			double beta_old = beta[i];
1188 			beta[i] = min(max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
1189 			d = beta[i]-beta_old;
1190 
1191 			if(d != 0)
1192 				sparse_operator::axpy(d, xi, w);
1193 		}
1194 
1195 		if(iter == 0)
1196 			Gnorm1_init = Gnorm1_new;
1197 		iter++;
1198 		if(iter % 10 == 0)
1199 			info(".");
1200 
1201 		if(Gnorm1_new <= eps*Gnorm1_init)
1202 		{
1203 			if(active_size == l)
1204 				break;
1205 			else
1206 			{
1207 				active_size = l;
1208 				info("*");
1209 				Gmax_old = INF;
1210 				continue;
1211 			}
1212 		}
1213 
1214 		Gmax_old = Gmax_new;
1215 	}
1216 
1217 	info("\noptimization finished, #iter = %d\n", iter);
1218 	if(iter >= max_iter)
1219 		info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n");
1220 
1221 	// calculate objective value
1222 	double v = 0;
1223 	int nSV = 0;
1224 	for(i=0; i<w_size; i++)
1225 		v += w[i]*w[i];
1226 	v = 0.5*v;
1227 	for(i=0; i<l; i++)
1228 	{
1229 		v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
1230 		if(beta[i] != 0)
1231 			nSV++;
1232 	}
1233 
1234 	info("Objective value = %lf\n", v);
1235 	info("nSV = %d\n",nSV);
1236 
1237 	delete [] beta;
1238 	delete [] QD;
1239 	delete [] index;
1240 }
1241 
1242 
1243 // A coordinate descent algorithm for
1244 // the dual of L2-regularized logistic regression problems
1245 //
1246 //  min_\alpha  0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1247 //    s.t.      0 <= \alpha_i <= upper_bound_i,
1248 //
1249 //  where Qij = yi yj xi^T xj and
1250 //  upper_bound_i = Cp if y_i = 1
1251 //  upper_bound_i = Cn if y_i = -1
1252 //
1253 // Given:
1254 // x, y, Cp, Cn
1255 // eps is the stopping tolerance
1256 //
1257 // solution will be put in w
1258 //
1259 // See Algorithm 5 of Yu et al., MLJ 2010
1260 
1261 #undef GETI
1262 #define GETI(i) (y[i]+1)
1263 // To support weights for instances, use GETI(i) (i)
1264 
solve_l2r_lr_dual(const problem * prob,double * w,double eps,double Cp,double Cn)1265 void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
1266 {
1267 	int l = prob->l;
1268 	int w_size = prob->n;
1269 	int i, s, iter = 0;
1270 	double *xTx = new double[l];
1271 	int max_iter = 1000;
1272 	int *index = new int[l];
1273 	double *alpha = new double[2*l]; // store alpha and C - alpha
1274 	schar *y = new schar[l];
1275 	int max_inner_iter = 100; // for inner Newton
1276 	double innereps = 1e-2;
1277 	double innereps_min = min(1e-8, eps);
1278 	double upper_bound[3] = {Cn, 0, Cp};
1279 
1280 	for(i=0; i<l; i++)
1281 	{
1282 		if(prob->y[i] > 0)
1283 		{
1284 			y[i] = +1;
1285 		}
1286 		else
1287 		{
1288 			y[i] = -1;
1289 		}
1290 	}
1291 
1292 	// Initial alpha can be set here. Note that
1293 	// 0 < alpha[i] < upper_bound[GETI(i)]
1294 	// alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1295 	for(i=0; i<l; i++)
1296 	{
1297 		alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
1298 		alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1299 	}
1300 
1301 	for(i=0; i<w_size; i++)
1302 		w[i] = 0;
1303 	for(i=0; i<l; i++)
1304 	{
1305 		feature_node * const xi = prob->x[i];
1306 		xTx[i] = sparse_operator::nrm2_sq(xi);
1307 		sparse_operator::axpy(y[i]*alpha[2*i], xi, w);
1308 		index[i] = i;
1309 	}
1310 
1311 	while (iter < max_iter)
1312 	{
1313 		for (i=0; i<l; i++)
1314 		{
1315 			int j = i+rand()%(l-i);
1316 			swap(index[i], index[j]);
1317 		}
1318 		int newton_iter = 0;
1319 		double Gmax = 0;
1320 		for (s=0; s<l; s++)
1321 		{
1322 			i = index[s];
1323 			const schar yi = y[i];
1324 			double C = upper_bound[GETI(i)];
1325 			double ywTx = 0, xisq = xTx[i];
1326 			feature_node * const xi = prob->x[i];
1327 			ywTx = yi*sparse_operator::dot(w, xi);
1328 			double a = xisq, b = ywTx;
1329 
1330 			// Decide to minimize g_1(z) or g_2(z)
1331 			int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1332 			if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1333 			{
1334 				ind1 = 2*i+1;
1335 				ind2 = 2*i;
1336 				sign = -1;
1337 			}
1338 
1339 			//  g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1340 			double alpha_old = alpha[ind1];
1341 			double z = alpha_old;
1342 			if(C - z < 0.5 * C)
1343 				z = 0.1*z;
1344 			double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1345 			Gmax = max(Gmax, fabs(gp));
1346 
1347 			// Newton method on the sub-problem
1348 			const double eta = 0.1; // xi in the paper
1349 			int inner_iter = 0;
1350 			while (inner_iter <= max_inner_iter)
1351 			{
1352 				if(fabs(gp) < innereps)
1353 					break;
1354 				double gpp = a + C/(C-z)/z;
1355 				double tmpz = z - gp/gpp;
1356 				if(tmpz <= 0)
1357 					z *= eta;
1358 				else // tmpz in (0, C)
1359 					z = tmpz;
1360 				gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1361 				newton_iter++;
1362 				inner_iter++;
1363 			}
1364 
1365 			if(inner_iter > 0) // update w
1366 			{
1367 				alpha[ind1] = z;
1368 				alpha[ind2] = C-z;
1369 				sparse_operator::axpy(sign*(z-alpha_old)*yi, xi, w);
1370 			}
1371 		}
1372 
1373 		iter++;
1374 		if(iter % 10 == 0)
1375 			info(".");
1376 
1377 		if(Gmax < eps)
1378 			break;
1379 
1380 		if(newton_iter <= l/10)
1381 			innereps = max(innereps_min, 0.1*innereps);
1382 
1383 	}
1384 
1385 	info("\noptimization finished, #iter = %d\n",iter);
1386 	if (iter >= max_iter)
1387 		info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
1388 
1389 	// calculate objective value
1390 
1391 	double v = 0;
1392 	for(i=0; i<w_size; i++)
1393 		v += w[i] * w[i];
1394 	v *= 0.5;
1395 	for(i=0; i<l; i++)
1396 		v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1397 			- upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1398 	info("Objective value = %lf\n", v);
1399 
1400 	delete [] xTx;
1401 	delete [] alpha;
1402 	delete [] y;
1403 	delete [] index;
1404 }
1405 
1406 // A coordinate descent algorithm for
1407 // L1-regularized L2-loss support vector classification
1408 //
1409 //  min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
1410 //
1411 // Given:
1412 // x, y, Cp, Cn
1413 // eps is the stopping tolerance
1414 //
1415 // solution will be put in w
1416 //
1417 // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1418 
1419 #undef GETI
1420 #define GETI(i) (y[i]+1)
1421 // To support weights for instances, use GETI(i) (i)
1422 
solve_l1r_l2_svc(problem * prob_col,double * w,double eps,double Cp,double Cn)1423 static void solve_l1r_l2_svc(
1424 	problem *prob_col, double *w, double eps,
1425 	double Cp, double Cn)
1426 {
1427 	int l = prob_col->l;
1428 	int w_size = prob_col->n;
1429 	int j, s, iter = 0;
1430 	int max_iter = 1000;
1431 	int active_size = w_size;
1432 	int max_num_linesearch = 20;
1433 
1434 	double sigma = 0.01;
1435 	double d, G_loss, G, H;
1436 	double Gmax_old = INF;
1437 	double Gmax_new, Gnorm1_new;
1438 	double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1439 	double d_old, d_diff;
1440 	double loss_old, loss_new;
1441 	double appxcond, cond;
1442 
1443 	int *index = new int[w_size];
1444 	schar *y = new schar[l];
1445 	double *b = new double[l]; // b = 1-ywTx
1446 	double *xj_sq = new double[w_size];
1447 	feature_node *x;
1448 
1449 	double C[3] = {Cn,0,Cp};
1450 
1451 	// Initial w can be set here.
1452 	for(j=0; j<w_size; j++)
1453 		w[j] = 0;
1454 
1455 	for(j=0; j<l; j++)
1456 	{
1457 		b[j] = 1;
1458 		if(prob_col->y[j] > 0)
1459 			y[j] = 1;
1460 		else
1461 			y[j] = -1;
1462 	}
1463 	for(j=0; j<w_size; j++)
1464 	{
1465 		index[j] = j;
1466 		xj_sq[j] = 0;
1467 		x = prob_col->x[j];
1468 		while(x->index != -1)
1469 		{
1470 			int ind = x->index-1;
1471 			x->value *= y[ind]; // x->value stores yi*xij
1472 			double val = x->value;
1473 			b[ind] -= w[j]*val;
1474 			xj_sq[j] += C[GETI(ind)]*val*val;
1475 			x++;
1476 		}
1477 	}
1478 
1479 	while(iter < max_iter)
1480 	{
1481 		Gmax_new = 0;
1482 		Gnorm1_new = 0;
1483 
1484 		for(j=0; j<active_size; j++)
1485 		{
1486 			int i = j+rand()%(active_size-j);
1487 			swap(index[i], index[j]);
1488 		}
1489 
1490 		for(s=0; s<active_size; s++)
1491 		{
1492 			j = index[s];
1493 			G_loss = 0;
1494 			H = 0;
1495 
1496 			x = prob_col->x[j];
1497 			while(x->index != -1)
1498 			{
1499 				int ind = x->index-1;
1500 				if(b[ind] > 0)
1501 				{
1502 					double val = x->value;
1503 					double tmp = C[GETI(ind)]*val;
1504 					G_loss -= tmp*b[ind];
1505 					H += tmp*val;
1506 				}
1507 				x++;
1508 			}
1509 			G_loss *= 2;
1510 
1511 			G = G_loss;
1512 			H *= 2;
1513 			H = max(H, 1e-12);
1514 
1515 			double Gp = G+1;
1516 			double Gn = G-1;
1517 			double violation = 0;
1518 			if(w[j] == 0)
1519 			{
1520 				if(Gp < 0)
1521 					violation = -Gp;
1522 				else if(Gn > 0)
1523 					violation = Gn;
1524 				else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1525 				{
1526 					active_size--;
1527 					swap(index[s], index[active_size]);
1528 					s--;
1529 					continue;
1530 				}
1531 			}
1532 			else if(w[j] > 0)
1533 				violation = fabs(Gp);
1534 			else
1535 				violation = fabs(Gn);
1536 
1537 			Gmax_new = max(Gmax_new, violation);
1538 			Gnorm1_new += violation;
1539 
1540 			// obtain Newton direction d
1541 			if(Gp < H*w[j])
1542 				d = -Gp/H;
1543 			else if(Gn > H*w[j])
1544 				d = -Gn/H;
1545 			else
1546 				d = -w[j];
1547 
1548 			if(fabs(d) < 1.0e-12)
1549 				continue;
1550 
1551 			double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1552 			d_old = 0;
1553 			int num_linesearch;
1554 			for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1555 			{
1556 				d_diff = d_old - d;
1557 				cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1558 
1559 				appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1560 				if(appxcond <= 0)
1561 				{
1562 					x = prob_col->x[j];
1563 					sparse_operator::axpy(d_diff, x, b);
1564 					break;
1565 				}
1566 
1567 				if(num_linesearch == 0)
1568 				{
1569 					loss_old = 0;
1570 					loss_new = 0;
1571 					x = prob_col->x[j];
1572 					while(x->index != -1)
1573 					{
1574 						int ind = x->index-1;
1575 						if(b[ind] > 0)
1576 							loss_old += C[GETI(ind)]*b[ind]*b[ind];
1577 						double b_new = b[ind] + d_diff*x->value;
1578 						b[ind] = b_new;
1579 						if(b_new > 0)
1580 							loss_new += C[GETI(ind)]*b_new*b_new;
1581 						x++;
1582 					}
1583 				}
1584 				else
1585 				{
1586 					loss_new = 0;
1587 					x = prob_col->x[j];
1588 					while(x->index != -1)
1589 					{
1590 						int ind = x->index-1;
1591 						double b_new = b[ind] + d_diff*x->value;
1592 						b[ind] = b_new;
1593 						if(b_new > 0)
1594 							loss_new += C[GETI(ind)]*b_new*b_new;
1595 						x++;
1596 					}
1597 				}
1598 
1599 				cond = cond + loss_new - loss_old;
1600 				if(cond <= 0)
1601 					break;
1602 				else
1603 				{
1604 					d_old = d;
1605 					d *= 0.5;
1606 					delta *= 0.5;
1607 				}
1608 			}
1609 
1610 			w[j] += d;
1611 
1612 			// recompute b[] if line search takes too many steps
1613 			if(num_linesearch >= max_num_linesearch)
1614 			{
1615 				info("#");
1616 				for(int i=0; i<l; i++)
1617 					b[i] = 1;
1618 
1619 				for(int i=0; i<w_size; i++)
1620 				{
1621 					if(w[i]==0) continue;
1622 					x = prob_col->x[i];
1623 					sparse_operator::axpy(-w[i], x, b);
1624 				}
1625 			}
1626 		}
1627 
1628 		if(iter == 0)
1629 			Gnorm1_init = Gnorm1_new;
1630 		iter++;
1631 		if(iter % 10 == 0)
1632 			info(".");
1633 
1634 		if(Gnorm1_new <= eps*Gnorm1_init)
1635 		{
1636 			if(active_size == w_size)
1637 				break;
1638 			else
1639 			{
1640 				active_size = w_size;
1641 				info("*");
1642 				Gmax_old = INF;
1643 				continue;
1644 			}
1645 		}
1646 
1647 		Gmax_old = Gmax_new;
1648 	}
1649 
1650 	info("\noptimization finished, #iter = %d\n", iter);
1651 	if(iter >= max_iter)
1652 		info("\nWARNING: reaching max number of iterations\n");
1653 
1654 	// calculate objective value
1655 
1656 	double v = 0;
1657 	int nnz = 0;
1658 	for(j=0; j<w_size; j++)
1659 	{
1660 		x = prob_col->x[j];
1661 		while(x->index != -1)
1662 		{
1663 			x->value *= prob_col->y[x->index-1]; // restore x->value
1664 			x++;
1665 		}
1666 		if(w[j] != 0)
1667 		{
1668 			v += fabs(w[j]);
1669 			nnz++;
1670 		}
1671 	}
1672 	for(j=0; j<l; j++)
1673 		if(b[j] > 0)
1674 			v += C[GETI(j)]*b[j]*b[j];
1675 
1676 	info("Objective value = %lf\n", v);
1677 	info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1678 
1679 	delete [] index;
1680 	delete [] y;
1681 	delete [] b;
1682 	delete [] xj_sq;
1683 }
1684 
1685 // A coordinate descent algorithm for
1686 // L1-regularized logistic regression problems
1687 //
1688 //  min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1689 //
1690 // Given:
1691 // x, y, Cp, Cn
1692 // eps is the stopping tolerance
1693 //
1694 // solution will be put in w
1695 //
1696 // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1697 
1698 #undef GETI
1699 #define GETI(i) (y[i]+1)
1700 // To support weights for instances, use GETI(i) (i)
1701 
solve_l1r_lr(const problem * prob_col,double * w,double eps,double Cp,double Cn)1702 static void solve_l1r_lr(
1703 	const problem *prob_col, double *w, double eps,
1704 	double Cp, double Cn)
1705 {
1706 	int l = prob_col->l;
1707 	int w_size = prob_col->n;
1708 	int j, s, newton_iter=0, iter=0;
1709 	int max_newton_iter = 100;
1710 	int max_iter = 1000;
1711 	int max_num_linesearch = 20;
1712 	int active_size;
1713 	int QP_active_size;
1714 
1715 	double nu = 1e-12;
1716 	double inner_eps = 1;
1717 	double sigma = 0.01;
1718 	double w_norm, w_norm_new;
1719 	double z, G, H;
1720 	double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1721 	double Gmax_old = INF;
1722 	double Gmax_new, Gnorm1_new;
1723 	double QP_Gmax_old = INF;
1724 	double QP_Gmax_new, QP_Gnorm1_new;
1725 	double delta, negsum_xTd, cond;
1726 
1727 	int *index = new int[w_size];
1728 	schar *y = new schar[l];
1729 	double *Hdiag = new double[w_size];
1730 	double *Grad = new double[w_size];
1731 	double *wpd = new double[w_size];
1732 	double *xjneg_sum = new double[w_size];
1733 	double *xTd = new double[l];
1734 	double *exp_wTx = new double[l];
1735 	double *exp_wTx_new = new double[l];
1736 	double *tau = new double[l];
1737 	double *D = new double[l];
1738 	feature_node *x;
1739 
1740 	double C[3] = {Cn,0,Cp};
1741 
1742 	// Initial w can be set here.
1743 	for(j=0; j<w_size; j++)
1744 		w[j] = 0;
1745 
1746 	for(j=0; j<l; j++)
1747 	{
1748 		if(prob_col->y[j] > 0)
1749 			y[j] = 1;
1750 		else
1751 			y[j] = -1;
1752 
1753 		exp_wTx[j] = 0;
1754 	}
1755 
1756 	w_norm = 0;
1757 	for(j=0; j<w_size; j++)
1758 	{
1759 		w_norm += fabs(w[j]);
1760 		wpd[j] = w[j];
1761 		index[j] = j;
1762 		xjneg_sum[j] = 0;
1763 		x = prob_col->x[j];
1764 		while(x->index != -1)
1765 		{
1766 			int ind = x->index-1;
1767 			double val = x->value;
1768 			exp_wTx[ind] += w[j]*val;
1769 			if(y[ind] == -1)
1770 				xjneg_sum[j] += C[GETI(ind)]*val;
1771 			x++;
1772 		}
1773 	}
1774 	for(j=0; j<l; j++)
1775 	{
1776 		exp_wTx[j] = exp(exp_wTx[j]);
1777 		double tau_tmp = 1/(1+exp_wTx[j]);
1778 		tau[j] = C[GETI(j)]*tau_tmp;
1779 		D[j] = C[GETI(j)]*exp_wTx[j]*tau_tmp*tau_tmp;
1780 	}
1781 
1782 	while(newton_iter < max_newton_iter)
1783 	{
1784 		Gmax_new = 0;
1785 		Gnorm1_new = 0;
1786 		active_size = w_size;
1787 
1788 		for(s=0; s<active_size; s++)
1789 		{
1790 			j = index[s];
1791 			Hdiag[j] = nu;
1792 			Grad[j] = 0;
1793 
1794 			double tmp = 0;
1795 			x = prob_col->x[j];
1796 			while(x->index != -1)
1797 			{
1798 				int ind = x->index-1;
1799 				Hdiag[j] += x->value*x->value*D[ind];
1800 				tmp += x->value*tau[ind];
1801 				x++;
1802 			}
1803 			Grad[j] = -tmp + xjneg_sum[j];
1804 
1805 			double Gp = Grad[j]+1;
1806 			double Gn = Grad[j]-1;
1807 			double violation = 0;
1808 			if(w[j] == 0)
1809 			{
1810 				if(Gp < 0)
1811 					violation = -Gp;
1812 				else if(Gn > 0)
1813 					violation = Gn;
1814 				//outer-level shrinking
1815 				else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1816 				{
1817 					active_size--;
1818 					swap(index[s], index[active_size]);
1819 					s--;
1820 					continue;
1821 				}
1822 			}
1823 			else if(w[j] > 0)
1824 				violation = fabs(Gp);
1825 			else
1826 				violation = fabs(Gn);
1827 
1828 			Gmax_new = max(Gmax_new, violation);
1829 			Gnorm1_new += violation;
1830 		}
1831 
1832 		if(newton_iter == 0)
1833 			Gnorm1_init = Gnorm1_new;
1834 
1835 		if(Gnorm1_new <= eps*Gnorm1_init)
1836 			break;
1837 
1838 		iter = 0;
1839 		QP_Gmax_old = INF;
1840 		QP_active_size = active_size;
1841 
1842 		for(int i=0; i<l; i++)
1843 			xTd[i] = 0;
1844 
1845 		// optimize QP over wpd
1846 		while(iter < max_iter)
1847 		{
1848 			QP_Gmax_new = 0;
1849 			QP_Gnorm1_new = 0;
1850 
1851 			for(j=0; j<QP_active_size; j++)
1852 			{
1853 				int i = j+rand()%(QP_active_size-j);
1854 				swap(index[i], index[j]);
1855 			}
1856 
1857 			for(s=0; s<QP_active_size; s++)
1858 			{
1859 				j = index[s];
1860 				H = Hdiag[j];
1861 
1862 				x = prob_col->x[j];
1863 				G = Grad[j] + (wpd[j]-w[j])*nu;
1864 				while(x->index != -1)
1865 				{
1866 					int ind = x->index-1;
1867 					G += x->value*D[ind]*xTd[ind];
1868 					x++;
1869 				}
1870 
1871 				double Gp = G+1;
1872 				double Gn = G-1;
1873 				double violation = 0;
1874 				if(wpd[j] == 0)
1875 				{
1876 					if(Gp < 0)
1877 						violation = -Gp;
1878 					else if(Gn > 0)
1879 						violation = Gn;
1880 					//inner-level shrinking
1881 					else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1882 					{
1883 						QP_active_size--;
1884 						swap(index[s], index[QP_active_size]);
1885 						s--;
1886 						continue;
1887 					}
1888 				}
1889 				else if(wpd[j] > 0)
1890 					violation = fabs(Gp);
1891 				else
1892 					violation = fabs(Gn);
1893 
1894 				QP_Gmax_new = max(QP_Gmax_new, violation);
1895 				QP_Gnorm1_new += violation;
1896 
1897 				// obtain solution of one-variable problem
1898 				if(Gp < H*wpd[j])
1899 					z = -Gp/H;
1900 				else if(Gn > H*wpd[j])
1901 					z = -Gn/H;
1902 				else
1903 					z = -wpd[j];
1904 
1905 				if(fabs(z) < 1.0e-12)
1906 					continue;
1907 				z = min(max(z,-10.0),10.0);
1908 
1909 				wpd[j] += z;
1910 
1911 				x = prob_col->x[j];
1912 				sparse_operator::axpy(z, x, xTd);
1913 			}
1914 
1915 			iter++;
1916 
1917 			if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
1918 			{
1919 				//inner stopping
1920 				if(QP_active_size == active_size)
1921 					break;
1922 				//active set reactivation
1923 				else
1924 				{
1925 					QP_active_size = active_size;
1926 					QP_Gmax_old = INF;
1927 					continue;
1928 				}
1929 			}
1930 
1931 			QP_Gmax_old = QP_Gmax_new;
1932 		}
1933 
1934 		if(iter >= max_iter)
1935 			info("WARNING: reaching max number of inner iterations\n");
1936 
1937 		delta = 0;
1938 		w_norm_new = 0;
1939 		for(j=0; j<w_size; j++)
1940 		{
1941 			delta += Grad[j]*(wpd[j]-w[j]);
1942 			if(wpd[j] != 0)
1943 				w_norm_new += fabs(wpd[j]);
1944 		}
1945 		delta += (w_norm_new-w_norm);
1946 
1947 		negsum_xTd = 0;
1948 		for(int i=0; i<l; i++)
1949 			if(y[i] == -1)
1950 				negsum_xTd += C[GETI(i)]*xTd[i];
1951 
1952 		int num_linesearch;
1953 		for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1954 		{
1955 			cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
1956 
1957 			for(int i=0; i<l; i++)
1958 			{
1959 				double exp_xTd = exp(xTd[i]);
1960 				exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
1961 				cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
1962 			}
1963 
1964 			if(cond <= 0)
1965 			{
1966 				w_norm = w_norm_new;
1967 				for(j=0; j<w_size; j++)
1968 					w[j] = wpd[j];
1969 				for(int i=0; i<l; i++)
1970 				{
1971 					exp_wTx[i] = exp_wTx_new[i];
1972 					double tau_tmp = 1/(1+exp_wTx[i]);
1973 					tau[i] = C[GETI(i)]*tau_tmp;
1974 					D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
1975 				}
1976 				break;
1977 			}
1978 			else
1979 			{
1980 				w_norm_new = 0;
1981 				for(j=0; j<w_size; j++)
1982 				{
1983 					wpd[j] = (w[j]+wpd[j])*0.5;
1984 					if(wpd[j] != 0)
1985 						w_norm_new += fabs(wpd[j]);
1986 				}
1987 				delta *= 0.5;
1988 				negsum_xTd *= 0.5;
1989 				for(int i=0; i<l; i++)
1990 					xTd[i] *= 0.5;
1991 			}
1992 		}
1993 
1994 		// Recompute some info due to too many line search steps
1995 		if(num_linesearch >= max_num_linesearch)
1996 		{
1997 			for(int i=0; i<l; i++)
1998 				exp_wTx[i] = 0;
1999 
2000 			for(int i=0; i<w_size; i++)
2001 			{
2002 				if(w[i]==0) continue;
2003 				x = prob_col->x[i];
2004 				sparse_operator::axpy(w[i], x, exp_wTx);
2005 			}
2006 
2007 			for(int i=0; i<l; i++)
2008 				exp_wTx[i] = exp(exp_wTx[i]);
2009 		}
2010 
2011 		if(iter == 1)
2012 			inner_eps *= 0.25;
2013 
2014 		newton_iter++;
2015 		Gmax_old = Gmax_new;
2016 
2017 		info("iter %3d  #CD cycles %d\n", newton_iter, iter);
2018 	}
2019 
2020 	info("=========================\n");
2021 	info("optimization finished, #iter = %d\n", newton_iter);
2022 	if(newton_iter >= max_newton_iter)
2023 		info("WARNING: reaching max number of iterations\n");
2024 
2025 	// calculate objective value
2026 
2027 	double v = 0;
2028 	int nnz = 0;
2029 	for(j=0; j<w_size; j++)
2030 		if(w[j] != 0)
2031 		{
2032 			v += fabs(w[j]);
2033 			nnz++;
2034 		}
2035 	for(j=0; j<l; j++)
2036 		if(y[j] == 1)
2037 			v += C[GETI(j)]*log(1+1/exp_wTx[j]);
2038 		else
2039 			v += C[GETI(j)]*log(1+exp_wTx[j]);
2040 
2041 	info("Objective value = %lf\n", v);
2042 	info("#nonzeros/#features = %d/%d\n", nnz, w_size);
2043 
2044 	delete [] index;
2045 	delete [] y;
2046 	delete [] Hdiag;
2047 	delete [] Grad;
2048 	delete [] wpd;
2049 	delete [] xjneg_sum;
2050 	delete [] xTd;
2051 	delete [] exp_wTx;
2052 	delete [] exp_wTx_new;
2053 	delete [] tau;
2054 	delete [] D;
2055 }
2056 
2057 // transpose matrix X from row format to column format
transpose(const problem * prob,feature_node ** x_space_ret,problem * prob_col)2058 static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
2059 {
2060 	int i;
2061 	int l = prob->l;
2062 	int n = prob->n;
2063 	size_t nnz = 0;
2064 	size_t *col_ptr = new size_t [n+1];
2065 	feature_node *x_space;
2066 	prob_col->l = l;
2067 	prob_col->n = n;
2068 	prob_col->y = new double[l];
2069 	prob_col->x = new feature_node*[n];
2070 
2071 	for(i=0; i<l; i++)
2072 		prob_col->y[i] = prob->y[i];
2073 
2074 	for(i=0; i<n+1; i++)
2075 		col_ptr[i] = 0;
2076 	for(i=0; i<l; i++)
2077 	{
2078 		feature_node *x = prob->x[i];
2079 		while(x->index != -1)
2080 		{
2081 			nnz++;
2082 			col_ptr[x->index]++;
2083 			x++;
2084 		}
2085 	}
2086 	for(i=1; i<n+1; i++)
2087 		col_ptr[i] += col_ptr[i-1] + 1;
2088 
2089 	x_space = new feature_node[nnz+n];
2090 	for(i=0; i<n; i++)
2091 		prob_col->x[i] = &x_space[col_ptr[i]];
2092 
2093 	for(i=0; i<l; i++)
2094 	{
2095 		feature_node *x = prob->x[i];
2096 		while(x->index != -1)
2097 		{
2098 			int ind = x->index-1;
2099 			x_space[col_ptr[ind]].index = i+1; // starts from 1
2100 			x_space[col_ptr[ind]].value = x->value;
2101 			col_ptr[ind]++;
2102 			x++;
2103 		}
2104 	}
2105 	for(i=0; i<n; i++)
2106 		x_space[col_ptr[i]].index = -1;
2107 
2108 	*x_space_ret = x_space;
2109 
2110 	delete [] col_ptr;
2111 }
2112 
2113 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2114 // perm, length l, must be allocated before calling this subroutine
group_classes(const problem * prob,int * nr_class_ret,int ** label_ret,int ** start_ret,int ** count_ret,int * perm)2115 static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2116 {
2117 	int l = prob->l;
2118 	int max_nr_class = 16;
2119 	int nr_class = 0;
2120 	int *label = Malloc(int,max_nr_class);
2121 	int *count = Malloc(int,max_nr_class);
2122 	int *data_label = Malloc(int,l);
2123 	int i;
2124 
2125 	for(i=0;i<l;i++)
2126 	{
2127 		int this_label = (int)prob->y[i];
2128 		int j;
2129 		for(j=0;j<nr_class;j++)
2130 		{
2131 			if(this_label == label[j])
2132 			{
2133 				++count[j];
2134 				break;
2135 			}
2136 		}
2137 		data_label[i] = j;
2138 		if(j == nr_class)
2139 		{
2140 			if(nr_class == max_nr_class)
2141 			{
2142 				max_nr_class *= 2;
2143 				label = (int *)realloc(label,max_nr_class*sizeof(int));
2144 				count = (int *)realloc(count,max_nr_class*sizeof(int));
2145 			}
2146 			label[nr_class] = this_label;
2147 			count[nr_class] = 1;
2148 			++nr_class;
2149 		}
2150 	}
2151 
2152 	//
2153 	// Labels are ordered by their first occurrence in the training set.
2154 	// However, for two-class sets with -1/+1 labels and -1 appears first,
2155 	// we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
2156 	//
2157 	if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2158 	{
2159 		swap(label[0],label[1]);
2160 		swap(count[0],count[1]);
2161 		for(i=0;i<l;i++)
2162 		{
2163 			if(data_label[i] == 0)
2164 				data_label[i] = 1;
2165 			else
2166 				data_label[i] = 0;
2167 		}
2168 	}
2169 
2170 	int *start = Malloc(int,nr_class);
2171 	start[0] = 0;
2172 	for(i=1;i<nr_class;i++)
2173 		start[i] = start[i-1]+count[i-1];
2174 	for(i=0;i<l;i++)
2175 	{
2176 		perm[start[data_label[i]]] = i;
2177 		++start[data_label[i]];
2178 	}
2179 	start[0] = 0;
2180 	for(i=1;i<nr_class;i++)
2181 		start[i] = start[i-1]+count[i-1];
2182 
2183 	*nr_class_ret = nr_class;
2184 	*label_ret = label;
2185 	*start_ret = start;
2186 	*count_ret = count;
2187 	free(data_label);
2188 }
2189 
train_one(const problem * prob,const parameter * param,double * w,double Cp,double Cn)2190 static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
2191 {
2192 	//inner and outer tolerances for TRON
2193 	double eps = param->eps;
2194 	double eps_cg = 0.1;
2195 	if(param->init_sol != NULL)
2196 		eps_cg = 0.5;
2197 
2198 	int pos = 0;
2199 	int neg = 0;
2200 	for(int i=0;i<prob->l;i++)
2201 		if(prob->y[i] > 0)
2202 			pos++;
2203 	neg = prob->l - pos;
2204 	double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;
2205 
2206 	function *fun_obj=NULL;
2207 	switch(param->solver_type)
2208 	{
2209 		case L2R_LR:
2210 		{
2211 			double *C = new double[prob->l];
2212 			for(int i = 0; i < prob->l; i++)
2213 			{
2214 				if(prob->y[i] > 0)
2215 					C[i] = Cp;
2216 				else
2217 					C[i] = Cn;
2218 			}
2219 			fun_obj=new l2r_lr_fun(prob, C);
2220 			TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
2221 			tron_obj.set_print_string(liblinear_print_string);
2222 			tron_obj.tron(w);
2223 			delete fun_obj;
2224 			delete[] C;
2225 			break;
2226 		}
2227 		case L2R_L2LOSS_SVC:
2228 		{
2229 			double *C = new double[prob->l];
2230 			for(int i = 0; i < prob->l; i++)
2231 			{
2232 				if(prob->y[i] > 0)
2233 					C[i] = Cp;
2234 				else
2235 					C[i] = Cn;
2236 			}
2237 			fun_obj=new l2r_l2_svc_fun(prob, C);
2238 			TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
2239 			tron_obj.set_print_string(liblinear_print_string);
2240 			tron_obj.tron(w);
2241 			delete fun_obj;
2242 			delete[] C;
2243 			break;
2244 		}
2245 		case L2R_L2LOSS_SVC_DUAL:
2246 			solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
2247 			break;
2248 		case L2R_L1LOSS_SVC_DUAL:
2249 			solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
2250 			break;
2251 		case L1R_L2LOSS_SVC:
2252 		{
2253 			problem prob_col;
2254 			feature_node *x_space = NULL;
2255 			transpose(prob, &x_space ,&prob_col);
2256 			solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn);
2257 			delete [] prob_col.y;
2258 			delete [] prob_col.x;
2259 			delete [] x_space;
2260 			break;
2261 		}
2262 		case L1R_LR:
2263 		{
2264 			problem prob_col;
2265 			feature_node *x_space = NULL;
2266 			transpose(prob, &x_space ,&prob_col);
2267 			solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn);
2268 			delete [] prob_col.y;
2269 			delete [] prob_col.x;
2270 			delete [] x_space;
2271 			break;
2272 		}
2273 		case L2R_LR_DUAL:
2274 			solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
2275 			break;
2276 		case L2R_L2LOSS_SVR:
2277 		{
2278 			double *C = new double[prob->l];
2279 			for(int i = 0; i < prob->l; i++)
2280 				C[i] = param->C;
2281 
2282 			fun_obj=new l2r_l2_svr_fun(prob, C, param->p);
2283 			TRON tron_obj(fun_obj, param->eps);
2284 			tron_obj.set_print_string(liblinear_print_string);
2285 			tron_obj.tron(w);
2286 			delete fun_obj;
2287 			delete[] C;
2288 			break;
2289 
2290 		}
2291 		case L2R_L1LOSS_SVR_DUAL:
2292 			solve_l2r_l1l2_svr(prob, w, param, L2R_L1LOSS_SVR_DUAL);
2293 			break;
2294 		case L2R_L2LOSS_SVR_DUAL:
2295 			solve_l2r_l1l2_svr(prob, w, param, L2R_L2LOSS_SVR_DUAL);
2296 			break;
2297 		default:
2298 			fprintf(stderr, "ERROR: unknown solver_type\n");
2299 			break;
2300 	}
2301 }
2302 
2303 // Calculate the initial C for parameter selection
calc_start_C(const problem * prob,const parameter * param)2304 static double calc_start_C(const problem *prob, const parameter *param)
2305 {
2306 	int i;
2307 	double xTx,max_xTx;
2308 	max_xTx = 0;
2309 	for(i=0; i<prob->l; i++)
2310 	{
2311 		xTx = 0;
2312 		feature_node *xi=prob->x[i];
2313 		while(xi->index != -1)
2314 		{
2315 			double val = xi->value;
2316 			xTx += val*val;
2317 			xi++;
2318 		}
2319 		if(xTx > max_xTx)
2320 			max_xTx = xTx;
2321 	}
2322 
2323 	double min_C = 1.0;
2324 	if(param->solver_type == L2R_LR)
2325 		min_C = 1.0 / (prob->l * max_xTx);
2326 	else if(param->solver_type == L2R_L2LOSS_SVC)
2327 		min_C = 1.0 / (2 * prob->l * max_xTx);
2328 
2329 	return pow( 2, floor(log(min_C) / log(2.0)) );
2330 }
2331 
2332 
2333 //
2334 // Interface functions
2335 //
train(const problem * prob,const parameter * param)2336 model* train(const problem *prob, const parameter *param)
2337 {
2338 	int i,j;
2339 	int l = prob->l;
2340 	int n = prob->n;
2341 	int w_size = prob->n;
2342 	model *model_ = Malloc(model,1);
2343 
2344 	if(prob->bias>=0)
2345 		model_->nr_feature=n-1;
2346 	else
2347 		model_->nr_feature=n;
2348 	model_->param = *param;
2349 	model_->bias = prob->bias;
2350 
2351 	if(check_regression_model(model_))
2352 	{
2353 		model_->w = Malloc(double, w_size);
2354 		for(i=0; i<w_size; i++)
2355 			model_->w[i] = 0;
2356 		model_->nr_class = 2;
2357 		model_->label = NULL;
2358 		train_one(prob, param, model_->w, 0, 0);
2359 	}
2360 	else
2361 	{
2362 		int nr_class;
2363 		int *label = NULL;
2364 		int *start = NULL;
2365 		int *count = NULL;
2366 		int *perm = Malloc(int,l);
2367 
2368 		// group training data of the same class
2369 		group_classes(prob,&nr_class,&label,&start,&count,perm);
2370 
2371 		model_->nr_class=nr_class;
2372 		model_->label = Malloc(int,nr_class);
2373 		for(i=0;i<nr_class;i++)
2374 			model_->label[i] = label[i];
2375 
2376 		// calculate weighted C
2377 		double *weighted_C = Malloc(double, nr_class);
2378 		for(i=0;i<nr_class;i++)
2379 			weighted_C[i] = param->C;
2380 		for(i=0;i<param->nr_weight;i++)
2381 		{
2382 			for(j=0;j<nr_class;j++)
2383 				if(param->weight_label[i] == label[j])
2384 					break;
2385 			if(j == nr_class)
2386 				fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2387 			else
2388 				weighted_C[j] *= param->weight[i];
2389 		}
2390 
2391 		// constructing the subproblem
2392 		feature_node **x = Malloc(feature_node *,l);
2393 		for(i=0;i<l;i++)
2394 			x[i] = prob->x[perm[i]];
2395 
2396 		int k;
2397 		problem sub_prob;
2398 		sub_prob.l = l;
2399 		sub_prob.n = n;
2400 		sub_prob.x = Malloc(feature_node *,sub_prob.l);
2401 		sub_prob.y = Malloc(double,sub_prob.l);
2402 
2403 		for(k=0; k<sub_prob.l; k++)
2404 			sub_prob.x[k] = x[k];
2405 
2406 		// multi-class svm by Crammer and Singer
2407 		if(param->solver_type == MCSVM_CS)
2408 		{
2409 			model_->w=Malloc(double, n*nr_class);
2410 			for(i=0;i<nr_class;i++)
2411 				for(j=start[i];j<start[i]+count[i];j++)
2412 					sub_prob.y[j] = i;
2413 			Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
2414 			Solver.Solve(model_->w);
2415 		}
2416 		else
2417 		{
2418 			if(nr_class == 2)
2419 			{
2420 				model_->w=Malloc(double, w_size);
2421 
2422 				int e0 = start[0]+count[0];
2423 				k=0;
2424 				for(; k<e0; k++)
2425 					sub_prob.y[k] = +1;
2426 				for(; k<sub_prob.l; k++)
2427 					sub_prob.y[k] = -1;
2428 
2429 				if(param->init_sol != NULL)
2430 					for(i=0;i<w_size;i++)
2431 						model_->w[i] = param->init_sol[i];
2432 				else
2433 					for(i=0;i<w_size;i++)
2434 						model_->w[i] = 0;
2435 
2436 				train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]);
2437 			}
2438 			else
2439 			{
2440 				model_->w=Malloc(double, w_size*nr_class);
2441 				double *w=Malloc(double, w_size);
2442 				for(i=0;i<nr_class;i++)
2443 				{
2444 					int si = start[i];
2445 					int ei = si+count[i];
2446 
2447 					k=0;
2448 					for(; k<si; k++)
2449 						sub_prob.y[k] = -1;
2450 					for(; k<ei; k++)
2451 						sub_prob.y[k] = +1;
2452 					for(; k<sub_prob.l; k++)
2453 						sub_prob.y[k] = -1;
2454 
2455 					if(param->init_sol != NULL)
2456 						for(j=0;j<w_size;j++)
2457 							w[j] = param->init_sol[j*nr_class+i];
2458 					else
2459 						for(j=0;j<w_size;j++)
2460 							w[j] = 0;
2461 
2462 					train_one(&sub_prob, param, w, weighted_C[i], param->C);
2463 
2464 					for(j=0;j<w_size;j++)
2465 						model_->w[j*nr_class+i] = w[j];
2466 				}
2467 				free(w);
2468 			}
2469 
2470 		}
2471 
2472 		free(x);
2473 		free(label);
2474 		free(start);
2475 		free(count);
2476 		free(perm);
2477 		free(sub_prob.x);
2478 		free(sub_prob.y);
2479 		free(weighted_C);
2480 	}
2481 	return model_;
2482 }
2483 
cross_validation(const problem * prob,const parameter * param,int nr_fold,double * target)2484 void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target)
2485 {
2486 	int i;
2487 	int *fold_start;
2488 	int l = prob->l;
2489 	int *perm = Malloc(int,l);
2490 	if (nr_fold > l)
2491 	{
2492 		nr_fold = l;
2493 		fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2494 	}
2495 	fold_start = Malloc(int,nr_fold+1);
2496 	for(i=0;i<l;i++) perm[i]=i;
2497 	for(i=0;i<l;i++)
2498 	{
2499 		int j = i+rand()%(l-i);
2500 		swap(perm[i],perm[j]);
2501 	}
2502 	for(i=0;i<=nr_fold;i++)
2503 		fold_start[i]=i*l/nr_fold;
2504 
2505 	for(i=0;i<nr_fold;i++)
2506 	{
2507 		int begin = fold_start[i];
2508 		int end = fold_start[i+1];
2509 		int j,k;
2510 		struct problem subprob;
2511 
2512 		subprob.bias = prob->bias;
2513 		subprob.n = prob->n;
2514 		subprob.l = l-(end-begin);
2515 		subprob.x = Malloc(struct feature_node*,subprob.l);
2516 		subprob.y = Malloc(double,subprob.l);
2517 
2518 		k=0;
2519 		for(j=0;j<begin;j++)
2520 		{
2521 			subprob.x[k] = prob->x[perm[j]];
2522 			subprob.y[k] = prob->y[perm[j]];
2523 			++k;
2524 		}
2525 		for(j=end;j<l;j++)
2526 		{
2527 			subprob.x[k] = prob->x[perm[j]];
2528 			subprob.y[k] = prob->y[perm[j]];
2529 			++k;
2530 		}
2531 		struct model *submodel = train(&subprob,param);
2532 		for(j=begin;j<end;j++)
2533 			target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2534 		free_and_destroy_model(&submodel);
2535 		free(subprob.x);
2536 		free(subprob.y);
2537 	}
2538 	free(fold_start);
2539 	free(perm);
2540 }
2541 
find_parameter_C(const problem * prob,const parameter * param,int nr_fold,double start_C,double max_C,double * best_C,double * best_rate)2542 void find_parameter_C(const problem *prob, const parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate)
2543 {
2544 	// variables for CV
2545 	int i;
2546 	int *fold_start;
2547 	int l = prob->l;
2548 	int *perm = Malloc(int, l);
2549 	double *target = Malloc(double, prob->l);
2550 	struct problem *subprob = Malloc(problem,nr_fold);
2551 
2552 	// variables for warm start
2553 	double ratio = 2;
2554 	double **prev_w = Malloc(double*, nr_fold);
2555 	for(i = 0; i < nr_fold; i++)
2556 		prev_w[i] = NULL;
2557 	int num_unchanged_w = 0;
2558 	struct parameter param1 = *param;
2559 	void (*default_print_string) (const char *) = liblinear_print_string;
2560 
2561 	if (nr_fold > l)
2562 	{
2563 		nr_fold = l;
2564 		fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2565 	}
2566 	fold_start = Malloc(int,nr_fold+1);
2567 	for(i=0;i<l;i++) perm[i]=i;
2568 	for(i=0;i<l;i++)
2569 	{
2570 		int j = i+rand()%(l-i);
2571 		swap(perm[i],perm[j]);
2572 	}
2573 	for(i=0;i<=nr_fold;i++)
2574 		fold_start[i]=i*l/nr_fold;
2575 
2576 	for(i=0;i<nr_fold;i++)
2577 	{
2578 		int begin = fold_start[i];
2579 		int end = fold_start[i+1];
2580 		int j,k;
2581 
2582 		subprob[i].bias = prob->bias;
2583 		subprob[i].n = prob->n;
2584 		subprob[i].l = l-(end-begin);
2585 		subprob[i].x = Malloc(struct feature_node*,subprob[i].l);
2586 		subprob[i].y = Malloc(double,subprob[i].l);
2587 
2588 		k=0;
2589 		for(j=0;j<begin;j++)
2590 		{
2591 			subprob[i].x[k] = prob->x[perm[j]];
2592 			subprob[i].y[k] = prob->y[perm[j]];
2593 			++k;
2594 		}
2595 		for(j=end;j<l;j++)
2596 		{
2597 			subprob[i].x[k] = prob->x[perm[j]];
2598 			subprob[i].y[k] = prob->y[perm[j]];
2599 			++k;
2600 		}
2601 
2602 	}
2603 
2604 	*best_rate = 0;
2605 	if(start_C <= 0)
2606 		start_C = calc_start_C(prob,param);
2607 	param1.C = start_C;
2608 
2609 	while(param1.C <= max_C)
2610 	{
2611 		//Output disabled for running CV at a particular C
2612 		set_print_string_function(&print_null);
2613 
2614 		for(i=0; i<nr_fold; i++)
2615 		{
2616 			int j;
2617 			int begin = fold_start[i];
2618 			int end = fold_start[i+1];
2619 
2620 			param1.init_sol = prev_w[i];
2621 			struct model *submodel = train(&subprob[i],&param1);
2622 
2623 			int total_w_size;
2624 			if(submodel->nr_class == 2)
2625 				total_w_size = subprob[i].n;
2626 			else
2627 				total_w_size = subprob[i].n * submodel->nr_class;
2628 
2629 			if(prev_w[i] == NULL)
2630 			{
2631 				prev_w[i] = Malloc(double, total_w_size);
2632 				for(j=0; j<total_w_size; j++)
2633 					prev_w[i][j] = submodel->w[j];
2634 			}
2635 			else if(num_unchanged_w >= 0)
2636 			{
2637 				double norm_w_diff = 0;
2638 				for(j=0; j<total_w_size; j++)
2639 				{
2640 					norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
2641 					prev_w[i][j] = submodel->w[j];
2642 				}
2643 				norm_w_diff = sqrt(norm_w_diff);
2644 
2645 				if(norm_w_diff > 1e-15)
2646 					num_unchanged_w = -1;
2647 			}
2648 			else
2649 			{
2650 				for(j=0; j<total_w_size; j++)
2651 					prev_w[i][j] = submodel->w[j];
2652 			}
2653 
2654 			for(j=begin; j<end; j++)
2655 				target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2656 
2657 			free_and_destroy_model(&submodel);
2658 		}
2659 		set_print_string_function(default_print_string);
2660 
2661 		int total_correct = 0;
2662 		for(i=0; i<prob->l; i++)
2663 			if(target[i] == prob->y[i])
2664 				++total_correct;
2665 		double current_rate = (double)total_correct/prob->l;
2666 		if(current_rate > *best_rate)
2667 		{
2668 			*best_C = param1.C;
2669 			*best_rate = current_rate;
2670 		}
2671 
2672 		info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate);
2673 		num_unchanged_w++;
2674 		if(num_unchanged_w == 3)
2675 			break;
2676 		param1.C = param1.C*ratio;
2677 	}
2678 
2679 	if(param1.C > max_C && max_C > start_C)
2680 		info("warning: maximum C reached.\n");
2681 	free(fold_start);
2682 	free(perm);
2683 	free(target);
2684 	for(i=0; i<nr_fold; i++)
2685 	{
2686 		free(subprob[i].x);
2687 		free(subprob[i].y);
2688 		free(prev_w[i]);
2689 	}
2690 	free(prev_w);
2691 	free(subprob);
2692 }
2693 
predict_values(const struct model * model_,const struct feature_node * x,double * dec_values)2694 double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
2695 {
2696 	int idx;
2697 	int n;
2698 	if(model_->bias>=0)
2699 		n=model_->nr_feature+1;
2700 	else
2701 		n=model_->nr_feature;
2702 	double *w=model_->w;
2703 	int nr_class=model_->nr_class;
2704 	int i;
2705 	int nr_w;
2706 	if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
2707 		nr_w = 1;
2708 	else
2709 		nr_w = nr_class;
2710 
2711 	const feature_node *lx=x;
2712 	for(i=0;i<nr_w;i++)
2713 		dec_values[i] = 0;
2714 	for(; (idx=lx->index)!=-1; lx++)
2715 	{
2716 		// the dimension of testing data may exceed that of training
2717 		if(idx<=n)
2718 			for(i=0;i<nr_w;i++)
2719 				dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
2720 	}
2721 
2722 	if(nr_class==2)
2723 	{
2724 		if(check_regression_model(model_))
2725 			return dec_values[0];
2726 		else
2727 			return (dec_values[0]>0)?model_->label[0]:model_->label[1];
2728 	}
2729 	else
2730 	{
2731 		int dec_max_idx = 0;
2732 		for(i=1;i<nr_class;i++)
2733 		{
2734 			if(dec_values[i] > dec_values[dec_max_idx])
2735 				dec_max_idx = i;
2736 		}
2737 		return model_->label[dec_max_idx];
2738 	}
2739 }
2740 
predict(const model * model_,const feature_node * x)2741 double predict(const model *model_, const feature_node *x)
2742 {
2743 	double *dec_values = Malloc(double, model_->nr_class);
2744 	double label=predict_values(model_, x, dec_values);
2745 	free(dec_values);
2746 	return label;
2747 }
2748 
predict_probability(const struct model * model_,const struct feature_node * x,double * prob_estimates)2749 double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
2750 {
2751 	if(check_probability_model(model_))
2752 	{
2753 		int i;
2754 		int nr_class=model_->nr_class;
2755 		int nr_w;
2756 		if(nr_class==2)
2757 			nr_w = 1;
2758 		else
2759 			nr_w = nr_class;
2760 
2761 		double label=predict_values(model_, x, prob_estimates);
2762 		for(i=0;i<nr_w;i++)
2763 			prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
2764 
2765 		if(nr_class==2) // for binary classification
2766 			prob_estimates[1]=1.-prob_estimates[0];
2767 		else
2768 		{
2769 			double sum=0;
2770 			for(i=0; i<nr_class; i++)
2771 				sum+=prob_estimates[i];
2772 
2773 			for(i=0; i<nr_class; i++)
2774 				prob_estimates[i]=prob_estimates[i]/sum;
2775 		}
2776 
2777 		return label;
2778 	}
2779 	else
2780 		return 0;
2781 }
2782 
2783 static const char *solver_type_table[]=
2784 {
2785 	"L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
2786 	"L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL",
2787 	"", "", "",
2788 	"L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL", NULL
2789 };
2790 
save_model(const char * model_file_name,const struct model * model_)2791 int save_model(const char *model_file_name, const struct model *model_)
2792 {
2793 	int i;
2794 	int nr_feature=model_->nr_feature;
2795 	int n;
2796 	const parameter& param = model_->param;
2797 
2798 	if(model_->bias>=0)
2799 		n=nr_feature+1;
2800 	else
2801 		n=nr_feature;
2802 	int w_size = n;
2803 	FILE *fp = fopen(model_file_name,"w");
2804 	if(fp==NULL) return -1;
2805 
2806 	char *old_locale = setlocale(LC_ALL, NULL);
2807 	if (old_locale)
2808 	{
2809 		old_locale = strdup(old_locale);
2810 	}
2811 	setlocale(LC_ALL, "C");
2812 
2813 	int nr_w;
2814 	if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
2815 		nr_w=1;
2816 	else
2817 		nr_w=model_->nr_class;
2818 
2819 	fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
2820 	fprintf(fp, "nr_class %d\n", model_->nr_class);
2821 
2822 	if(model_->label)
2823 	{
2824 		fprintf(fp, "label");
2825 		for(i=0; i<model_->nr_class; i++)
2826 			fprintf(fp, " %d", model_->label[i]);
2827 		fprintf(fp, "\n");
2828 	}
2829 
2830 	fprintf(fp, "nr_feature %d\n", nr_feature);
2831 
2832 	fprintf(fp, "bias %.17g\n", model_->bias);
2833 
2834 	fprintf(fp, "w\n");
2835 	for(i=0; i<w_size; i++)
2836 	{
2837 		int j;
2838 		for(j=0; j<nr_w; j++)
2839 			fprintf(fp, "%.17g ", model_->w[i*nr_w+j]);
2840 		fprintf(fp, "\n");
2841 	}
2842 
2843 	setlocale(LC_ALL, old_locale);
2844 	free(old_locale);
2845 
2846 	if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2847 	else return 0;
2848 }
2849 
2850 //
2851 // FSCANF helps to handle fscanf failures.
2852 // Its do-while block avoids the ambiguity when
2853 // if (...)
2854 //    FSCANF();
2855 // is used
2856 //
2857 #define FSCANF(_stream, _format, _var)do\
2858 {\
2859 	if (fscanf(_stream, _format, _var) != 1)\
2860 	{\
2861 		fprintf(stderr, "ERROR: fscanf failed to read the model\n");\
2862 		EXIT_LOAD_MODEL()\
2863 	}\
2864 }while(0)
2865 // EXIT_LOAD_MODEL should NOT end with a semicolon.
2866 #define EXIT_LOAD_MODEL()\
2867 {\
2868 	setlocale(LC_ALL, old_locale);\
2869 	free(model_->label);\
2870 	free(model_);\
2871 	free(old_locale);\
2872 	return NULL;\
2873 }
load_model(const char * model_file_name)2874 struct model *load_model(const char *model_file_name)
2875 {
2876 	FILE *fp = fopen(model_file_name,"r");
2877 	if(fp==NULL) return NULL;
2878 
2879 	int i;
2880 	int nr_feature;
2881 	int n;
2882 	int nr_class;
2883 	double bias;
2884 	model *model_ = Malloc(model,1);
2885 	parameter& param = model_->param;
2886 	// parameters for training only won't be assigned, but arrays are assigned as NULL for safety
2887 	param.nr_weight = 0;
2888 	param.weight_label = NULL;
2889 	param.weight = NULL;
2890 	param.init_sol = NULL;
2891 
2892 	model_->label = NULL;
2893 
2894 	char *old_locale = setlocale(LC_ALL, NULL);
2895 	if (old_locale)
2896 	{
2897 		old_locale = strdup(old_locale);
2898 	}
2899 	setlocale(LC_ALL, "C");
2900 
2901 	char cmd[81];
2902 	while(1)
2903 	{
2904 		FSCANF(fp,"%80s",cmd);
2905 		if(strcmp(cmd,"solver_type")==0)
2906 		{
2907 			FSCANF(fp,"%80s",cmd);
2908 			int i;
2909 			for(i=0;solver_type_table[i];i++)
2910 			{
2911 				if(strcmp(solver_type_table[i],cmd)==0)
2912 				{
2913 					param.solver_type=i;
2914 					break;
2915 				}
2916 			}
2917 			if(solver_type_table[i] == NULL)
2918 			{
2919 				fprintf(stderr,"unknown solver type.\n");
2920 				EXIT_LOAD_MODEL()
2921 			}
2922 		}
2923 		else if(strcmp(cmd,"nr_class")==0)
2924 		{
2925 			FSCANF(fp,"%d",&nr_class);
2926 			model_->nr_class=nr_class;
2927 		}
2928 		else if(strcmp(cmd,"nr_feature")==0)
2929 		{
2930 			FSCANF(fp,"%d",&nr_feature);
2931 			model_->nr_feature=nr_feature;
2932 		}
2933 		else if(strcmp(cmd,"bias")==0)
2934 		{
2935 			FSCANF(fp,"%lf",&bias);
2936 			model_->bias=bias;
2937 		}
2938 		else if(strcmp(cmd,"w")==0)
2939 		{
2940 			break;
2941 		}
2942 		else if(strcmp(cmd,"label")==0)
2943 		{
2944 			int nr_class = model_->nr_class;
2945 			model_->label = Malloc(int,nr_class);
2946 			for(int i=0;i<nr_class;i++)
2947 				FSCANF(fp,"%d",&model_->label[i]);
2948 		}
2949 		else
2950 		{
2951 			fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2952 			EXIT_LOAD_MODEL()
2953 		}
2954 	}
2955 
2956 	nr_feature=model_->nr_feature;
2957 	if(model_->bias>=0)
2958 		n=nr_feature+1;
2959 	else
2960 		n=nr_feature;
2961 	int w_size = n;
2962 	int nr_w;
2963 	if(nr_class==2 && param.solver_type != MCSVM_CS)
2964 		nr_w = 1;
2965 	else
2966 		nr_w = nr_class;
2967 
2968 	model_->w=Malloc(double, w_size*nr_w);
2969 	for(i=0; i<w_size; i++)
2970 	{
2971 		int j;
2972 		for(j=0; j<nr_w; j++)
2973 			FSCANF(fp, "%lf ", &model_->w[i*nr_w+j]);
2974 	}
2975 
2976 	setlocale(LC_ALL, old_locale);
2977 	free(old_locale);
2978 
2979 	if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
2980 
2981 	return model_;
2982 }
2983 
get_nr_feature(const model * model_)2984 int get_nr_feature(const model *model_)
2985 {
2986 	return model_->nr_feature;
2987 }
2988 
get_nr_class(const model * model_)2989 int get_nr_class(const model *model_)
2990 {
2991 	return model_->nr_class;
2992 }
2993 
get_labels(const model * model_,int * label)2994 void get_labels(const model *model_, int* label)
2995 {
2996 	if (model_->label != NULL)
2997 		for(int i=0;i<model_->nr_class;i++)
2998 			label[i] = model_->label[i];
2999 }
3000 
3001 // use inline here for better performance (around 20% faster than the non-inline one)
get_w_value(const struct model * model_,int idx,int label_idx)3002 static inline double get_w_value(const struct model *model_, int idx, int label_idx)
3003 {
3004 	int nr_class = model_->nr_class;
3005 	int solver_type = model_->param.solver_type;
3006 	const double *w = model_->w;
3007 
3008 	if(idx < 0 || idx > model_->nr_feature)
3009 		return 0;
3010 	if(check_regression_model(model_))
3011 		return w[idx];
3012 	else
3013 	{
3014 		if(label_idx < 0 || label_idx >= nr_class)
3015 			return 0;
3016 		if(nr_class == 2 && solver_type != MCSVM_CS)
3017 		{
3018 			if(label_idx == 0)
3019 				return w[idx];
3020 			else
3021 				return -w[idx];
3022 		}
3023 		else
3024 			return w[idx*nr_class+label_idx];
3025 	}
3026 }
3027 
3028 // feat_idx: starting from 1 to nr_feature
3029 // label_idx: starting from 0 to nr_class-1 for classification models;
3030 //            for regression models, label_idx is ignored.
get_decfun_coef(const struct model * model_,int feat_idx,int label_idx)3031 double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx)
3032 {
3033 	if(feat_idx > model_->nr_feature)
3034 		return 0;
3035 	return get_w_value(model_, feat_idx-1, label_idx);
3036 }
3037 
get_decfun_bias(const struct model * model_,int label_idx)3038 double get_decfun_bias(const struct model *model_, int label_idx)
3039 {
3040 	int bias_idx = model_->nr_feature;
3041 	double bias = model_->bias;
3042 	if(bias <= 0)
3043 		return 0;
3044 	else
3045 		return bias*get_w_value(model_, bias_idx, label_idx);
3046 }
3047 
free_model_content(struct model * model_ptr)3048 void free_model_content(struct model *model_ptr)
3049 {
3050 	if(model_ptr->w != NULL)
3051 		free(model_ptr->w);
3052 	if(model_ptr->label != NULL)
3053 		free(model_ptr->label);
3054 }
3055 
free_and_destroy_model(struct model ** model_ptr_ptr)3056 void free_and_destroy_model(struct model **model_ptr_ptr)
3057 {
3058 	struct model *model_ptr = *model_ptr_ptr;
3059 	if(model_ptr != NULL)
3060 	{
3061 		free_model_content(model_ptr);
3062 		free(model_ptr);
3063 	}
3064 }
3065 
destroy_param(parameter * param)3066 void destroy_param(parameter* param)
3067 {
3068 	if(param->weight_label != NULL)
3069 		free(param->weight_label);
3070 	if(param->weight != NULL)
3071 		free(param->weight);
3072 	if(param->init_sol != NULL)
3073 		free(param->init_sol);
3074 }
3075 
check_parameter(const problem *,const parameter * param)3076 const char *check_parameter(const problem*, const parameter *param)
3077 {
3078 	if(param->eps <= 0)
3079 		return "eps <= 0";
3080 
3081 	if(param->C <= 0)
3082 		return "C <= 0";
3083 
3084 	if(param->p < 0)
3085 		return "p < 0";
3086 
3087 	if(param->solver_type != L2R_LR
3088 		&& param->solver_type != L2R_L2LOSS_SVC_DUAL
3089 		&& param->solver_type != L2R_L2LOSS_SVC
3090 		&& param->solver_type != L2R_L1LOSS_SVC_DUAL
3091 		&& param->solver_type != MCSVM_CS
3092 		&& param->solver_type != L1R_L2LOSS_SVC
3093 		&& param->solver_type != L1R_LR
3094 		&& param->solver_type != L2R_LR_DUAL
3095 		&& param->solver_type != L2R_L2LOSS_SVR
3096 		&& param->solver_type != L2R_L2LOSS_SVR_DUAL
3097 		&& param->solver_type != L2R_L1LOSS_SVR_DUAL)
3098 		return "unknown solver type";
3099 
3100 	if(param->init_sol != NULL
3101 		&& param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC)
3102 		return "Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC";
3103 
3104 	return NULL;
3105 }
3106 
check_probability_model(const struct model * model_)3107 int check_probability_model(const struct model *model_)
3108 {
3109 	return (model_->param.solver_type==L2R_LR ||
3110 			model_->param.solver_type==L2R_LR_DUAL ||
3111 			model_->param.solver_type==L1R_LR);
3112 }
3113 
check_regression_model(const struct model * model_)3114 int check_regression_model(const struct model *model_)
3115 {
3116 	return (model_->param.solver_type==L2R_L2LOSS_SVR ||
3117 			model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
3118 			model_->param.solver_type==L2R_L2LOSS_SVR_DUAL);
3119 }
3120 
set_print_string_function(void (* print_func)(const char *))3121 void set_print_string_function(void (*print_func)(const char*))
3122 {
3123 	if (print_func == NULL)
3124 		liblinear_print_string = &print_string_stdout;
3125 	else
3126 		liblinear_print_string = print_func;
3127 }
3128 
3129